Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How get the next page after login with PhatomJs?

Tags:

I have found so many questions about this on here, but not sure why they are not answered.

I am trying to crawl a web page after logging in with this code : source

var steps=[];
var testindex = 0;
var loadInProgress = false;//This is set to true when a page is still loading

/*********SETTINGS*********************/
var webPage = require('webpage');
var page = webPage.create();
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36';
page.settings.javascriptEnabled = true;
page.settings.loadImages = false;//Script is much faster with this field set to false
phantom.cookiesEnabled = true;
phantom.javascriptEnabled = true;
/*********SETTINGS END*****************/

console.log('All settings loaded, start with execution');
page.onConsoleMessage = function(msg) {
    console.log(msg);
};
/**********DEFINE STEPS THAT FANTOM SHOULD DO***********************/
steps = [

    //Step 1 - Open Amazon home page
    function(){
        console.log('Step 1 - Abrindo página de login');
        page.open("http://parceriascury.housecrm.com.br", function(status){

        });
    },
    //Step 3 - Populate and submit the login form
    function(){
        console.log('Step 3 - Preenchendo o form');
        page.evaluate(function(){
            document.getElementById("login").value="xxxxx";
            document.getElementById("senha").value="xxxxx";
            document.getElementById("frmlandingpage").submit();
        });
    },
    //Step 4 - Wait Amazon to login user. After user is successfully logged in, user is redirected to home page. Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.
    function(){
        console.log("Step 4 - Wait Amazon to login user. After user is successfully logged in, user is redirected to home page. Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.");
         var fs = require('fs');
         var result = page.evaluate(function() {
            return document.documentElement.outerHTML;
        });
        fs.write('C:\\phantomjs\\logado_cury_10.html',result,'w');
    },
];
/**********END STEPS THAT FANTOM SHOULD DO***********************/

//Execute steps one by one
interval = setInterval(executeRequestsStepByStep,5000);

function executeRequestsStepByStep(){
    if (loadInProgress == false && typeof steps[testindex] == "function") {
        //console.log("step " + (testindex + 1));
        steps[testindex]();
        testindex++;
    }
    if (typeof steps[testindex] != "function") {
        console.log("test complete!");
        phantom.exit();
    }
}

/**
 * These listeners are very important in order to phantom work properly. Using these listeners, we control loadInProgress marker which controls, weather a page is fully loaded.
 * Without this, we will get content of the page, even a page is not fully loaded.
 */
page.onLoadStarted = function() {
    loadInProgress = true;
    console.log('Loading started');
};
page.onLoadFinished = function() {
    loadInProgress = false;
    console.log('Loading finished');
};
page.onConsoleMessage = function(msg) {
    console.log(msg);
};

But the response only this:

<html><head></head><body>ok</body></html>

I need to get the content of next page with URL:

http://parceriascury.housecrm.com.br/parceiro_busca

I can access this page directly, but not with all complements, because it needs to be logged in.

No errors and I don't know where I am making a mistake.

Edit Other solutions are welcome, i think maybe curl...But after js loading...

Sorry for my bad english.


Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!