Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagejs
var page = require('webpage').create(),
    system = require('system'),
    address;

address = system.args[1]; // The URL that is submitted to the proxy service

var result = { // Standard response structure, see Response structure section in the documentation
  body: null,
  headers: null,
  statusCode: null,
  statusMessage: null,
  httpVersion: null,
};

page.onResourceReceived = function(response) { // Used to obtain response headers and status code from the loaded page
    if (decodeURIComponent(response.url) == address) { // Verify that it is the actual page and not internal resources that have finished loaded
		result.headers = {};
		for (var i in response.headers) {
	    	result.headers[response.headers[i].name] = response.headers[i].value; // Clone headers into the final response
		}
		
		// Clone HTTP status code and text into the final response
		result.statusCode = response.status;
		result.statusMessage = response.statusText;
    }
};

page.onLoadFinished = function(status) { // Page load including all internal assets has completed
	result.body = page.content; // Clone page HTML source code (as manipulated by any internal JS scripts) into final response
	
	// Write out final response and exit
	console.log(JSON.stringify(result));
    phantom.exit();
}

page.open(address, function (status) {
    if (status !== 'success') { // Handle failures
        console.log('FAILED loading the address');
		phantom.exit();
    }
});

Retrieve URLs from Google results

The following code navigates to a submitted Google result page (e.g. http://www.google.com/search?q=example) and returns a plain text list of page addresses found in that page.

Info

Note: Google may change page structure at any time, making this example not work as intended. However, it should be relatively easy to adapt the example to an updated page.

Code Block
languagejs
var page = require('webpage').create(),
    system = require('system'),
    address;
address = system.args[1];
// Fake user agent
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36';
page.open(address, function (status) {
    if (status !== 'success') {
        console.log('FAILED loading the address');
    } else {
        var urls = page.evaluate(function() { // Execute code in the scope of the page
            var list = document.querySelectorAll('h3.r a');
            var urls = [];
            for (var i in list) {
                if (list[i].href !== undefined) {
                    urls.push(list[i].href);
                }
            }
            return urls;
        });
        
        for (var i in urls) { // Return URLs, one per line
            console.log(urls[i]);
        }
    }
    phantom.exit();
});