...
Code Block |
---|
|
var page = require('webpage').create(),
system = require('system'),
address;
address = system.args[1]; // The URL that is submitted to the proxy service
var result = { // Standard response structure, see Response structure section in the documentation
body: null,
headers: null,
statusCode: null,
statusMessage: null,
httpVersion: null,
};
page.onResourceReceived = function(response) { // Used to obtain response headers and status code from the loaded page
if (decodeURIComponent(response.url) == address) { // Verify that it is the actual page and not internal resources that have finished loaded
result.headers = {};
for (var i in response.headers) {
result.headers[response.headers[i].name] = response.headers[i].value; // Clone headers into the final response
}
// Clone HTTP status code and text into the final response
result.statusCode = response.status;
result.statusMessage = response.statusText;
}
};
page.onLoadFinished = function(status) { // Page load including all internal assets has completed
result.body = page.content; // Clone page HTML source code (as manipulated by any internal JS scripts) into final response
// Write out final response and exit
console.log(JSON.stringify(result));
phantom.exit();
}
page.open(address, function (status) {
if (status !== 'success') { // Handle failures
console.log('FAILED loading the address');
phantom.exit();
}
}); |
Retrieve URLs from Google results
The following code navigates to a submitted Google result page (e.g. http://www.google.com/search?q=example) and returns a plain text list of page addresses found in that page.
Info |
---|
Note: Google may change page structure at any time, making this example not work as intended. However, it should be relatively easy to adapt the example to an updated page. |
Code Block |
---|
|
var page = require('webpage').create(),
system = require('system'),
address;
address = system.args[1];
// Fake user agent
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36';
page.open(address, function (status) {
if (status !== 'success') {
console.log('FAILED loading the address');
} else {
var urls = page.evaluate(function() { // Execute code in the scope of the page
var list = document.querySelectorAll('h3.r a');
var urls = [];
for (var i in list) {
if (list[i].href !== undefined) {
urls.push(list[i].href);
}
}
return urls;
});
for (var i in urls) { // Return URLs, one per line
console.log(urls[i]);
}
}
phantom.exit();
}); |