Using PhantomJS to request a web page

PhantomJS is a very nice headless browser which uses the WebKit browser engine. It can be used for many purposes. However, I’m going to illustrate PhantomJS by performing a Bing search query and also executing JavaScript code on the page. Libraries such as jQuery can be injected into the page to perform all kinds of operations.

The div tag identifier with vm_c contains the search results.

bing.js

var page = require('webpage').create();
var url = 'http://www.bing.com/images/search?q=example&go=&qs=n&form=QBLH&filt=all&pq=example&sc=8-1&sp=-1&sk=';

page.onConsoleMessage = function(msg) {
    console.log(msg);
};

page.settings.userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11';
page.open(url, function(status) {
    if ( status === "success" ) {
        // Include jQuery on the page
        page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
            page.evaluate(function() {
                // Execute jQuery
                console.log("$(\"#vm_c\").html() -> " + $("#vm_c").html());
            });
            phantom.exit();
        });
    }
});

Output

$ /usr/bin/phantomjs bing.js
$("#vm_c").html() -> <script type="text/javascript">_G.KPT=new Date()</script><div id="dg_c" mu="1000" aria-label="Image Results" style="height: 1053px; width: 917px; "><div class="dg_b" dgw="917" dgh="1053" beg="0" end="29" noac="2" dgst="ir_d*rr_dc~1b1~2*ro_u1059*"><div class="border imgres"><div class="dg_u" style="width:93px;height:144px;left:120px;top:0px"><a href="/images/search?q=example&amp;view=detail&amp;id=FA5007AE21F151AF789BF26ADB6F4662034A45D9&amp;first=1" ihk="H.4904539927807127" m="{ns:&q
[...]
></div><script type="text/javascript">(_w.imageRichHover=new IRH(false)).attach(_ge('dg_c').firstChild, ['a']);</script>
This entry was posted in Bash, JavaScript and tagged , , , , , , , , , , . Bookmark the permalink. Trackbacks are closed, but you can post a comment.

Post a Comment

Your email is never published nor shared. Required fields are marked *

*
*

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>

Why ask?