PhantomJS is a headless WebKit browser. I can open a url with this and get content of a page that updates every second.
But I need to get the content of many (100) pages at the same time.
All pages must be opened concurrently and refresh every second.
It's possible for one page, but I don't know how to retrieve from multiple pages at once.
This is the example code from the PhantomJS website:
console.log('Loading a web page');
var page = require('webpage').create();
var url = 'http://www.phantomjs.org/';
page.open(url, function (status) {
  //Page is loaded!
  phantom.exit();
});
May I use many PhantomJS instances at one time ? I doesn't seem the best way. Does any body know how to open just one PhantomJS instance and get content from several pages?
PhantomJS uses WebKit that has a similar browsing environment like the famous browsers – Google Chrome, Mozilla Firefox, Safari, etc.
PhantomJS is a discontinued headless browser used for automating web page interaction. PhantomJS provides a JavaScript API enabling automated navigation, screenshots, user behavior and assertions making it a common tool used to run browser-based unit tests in a headless system like a continuous integration environment.
PhantomJS is a headless web browser scriptable with JavaScript. It runs on Windows, macOS, Linux, and FreeBSD. Using QtWebKit as the back-end, it offers fast and native support for various web standards: DOM handling, CSS selector, JSON, Canvas, and SVG.
Here is the code, I used before to parse the items for the E-shop and putting HTML code for each page of these items
I hope that it will help you!
var RenderUrlsToFile, system, url_string_for_array;
var arrayOfUrls = new Array();
system = require("system");
RenderUrlsToFile = function(urls, callbackPerUrl, callbackFinal) {
var getFilename, next, page, retrieve, urlIndex, webpage, link_name, sex;
var fs = {};
fs = require('fs');
urlIndex = 0;
webpage = require("webpage");
page = null;
// getFilename = function() {
//     return "parsed/" + urlIndex + ".png";
// };
next = function(status, url, file) {
    page.close();
    callbackPerUrl(status, url, file);
    return retrieve();
};
retrieve = function() {
    var url;
    if (urls.length > 0) {
        url = urls.shift();
        urlIndex++;
        page = webpage.create();
        page.viewportSize = {
            width: 800,
            height: 600
        };
        page.settings.userAgent = "Phantom.js bot";
        return page.open("http://" + url, function(status) {
            var file;
            // file = getFilename();
            if (status === "success") {
                return window.setTimeout((function() {
                    // page.render(file);
                    var js = page.evaluate(function () {
                            return document;
                        });
                    fs.write('your_file_path'.html', js.all[0].outerHTML, 'w');
                    return next(status, url, file);
                }), 100);
            } else {
                return next(status, url, file);
            }
        });
    } else {
        return callbackFinal();
    }
};
return retrieve();
};
if (system.args.length > 1) {
arrayOfUrls = Array.prototype.slice.call(system.args, 1);
} else {
------------MAIN PART OF CODE FOR YOUR QUESTION------
For example: I need to parse the items on the E-shop, so I take the first page and then I set "for" for the exactly numbe of pages
url_string_for_array = "www.lamoda.ru/c/559/accs-muzhskieaksessuary/?genders=men&page=1";
for(var k=2; k<20; k++)
    {
        url_string_for_array += ",www.lamoda.ru/c/559/accs-muzhskieaksessuary/?genders=men&page="+k;
    }
arrayOfUrls = url_string_for_array.split(',');
}
RenderUrlsToFile(arrayOfUrls, (function(status, url, file) {
if (status !== "success") {
    return console.log("Unable to render '" + url + "'");
} else {
    return console.log("Rendered '" + url + "'");
}
}), function() {
return phantom.exit();
});
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With