gpt4 book ai didi

javascript - 使用 PhantomJs 登录后如何进入下一页?

转载 作者:可可西里 更新时间:2023-11-01 12:18:18 25 4
gpt4 key购买 nike

我在这里发现了很多关于此的问题,但不确定为什么没有人回答。

我在使用此代码登录后尝试抓取网页:source

var steps=[];
var testindex = 0;
var loadInProgress = false;//This is set to true when a page is still loading

/*********SETTINGS*********************/
var webPage = require('webpage');
var page = webPage.create();
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36';
page.settings.javascriptEnabled = true;
page.settings.loadImages = false;//Script is much faster with this field set to false
phantom.cookiesEnabled = true;
phantom.javascriptEnabled = true;
/*********SETTINGS END*****************/

console.log('All settings loaded, start with execution');
page.onConsoleMessage = function(msg) {
console.log(msg);
};
/**********DEFINE STEPS THAT FANTOM SHOULD DO***********************/
steps = [

//Step 1 - Open Amazon home page
function(){
console.log('Step 1 - Abrindo página de login');
page.open("http://parceriascury.housecrm.com.br", function(status){

});
},
//Step 3 - Populate and submit the login form
function(){
console.log('Step 3 - Preenchendo o form');
page.evaluate(function(){
document.getElementById("login").value="xxxxx";
document.getElementById("senha").value="xxxxx";
document.getElementById("frmlandingpage").submit();
});
},
//Step 4 - Wait Amazon to login user. After user is successfully logged in, user is redirected to home page. Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.
function(){
console.log("Step 4 - Wait Amazon to login user. After user is successfully logged in, user is redirected to home page. Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.");
var fs = require('fs');
var result = page.evaluate(function() {
return document.documentElement.outerHTML;
});
fs.write('C:\\phantomjs\\logado_cury_10.html',result,'w');
},
];
/**********END STEPS THAT FANTOM SHOULD DO***********************/

//Execute steps one by one
interval = setInterval(executeRequestsStepByStep,5000);

function executeRequestsStepByStep(){
if (loadInProgress == false && typeof steps[testindex] == "function") {
//console.log("step " + (testindex + 1));
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
console.log("test complete!");
phantom.exit();
}
}

/**
* These listeners are very important in order to phantom work properly. Using these listeners, we control loadInProgress marker which controls, weather a page is fully loaded.
* Without this, we will get content of the page, even a page is not fully loaded.
*/
page.onLoadStarted = function() {
loadInProgress = true;
console.log('Loading started');
};
page.onLoadFinished = function() {
loadInProgress = false;
console.log('Loading finished');
};
page.onConsoleMessage = function(msg) {
console.log(msg);
};

但响应只有这个:

<html><head></head><body>ok</body></html>

我需要使用 URL 获取下一页的内容:

http://parceriascury.housecrm.com.br/parceiro_busca

我可以直接访问这个页面,但不是所有的补充,因为它需要登录。

没有错误,我不知道我在哪里犯了错误。

编辑欢迎使用其他解决方案,我想可能是 curl...但是在加载 js 之后...

抱歉我的英语不好。

最佳答案

这段代码可能会更好:

var loadInProgress = false;//This is set to true when a page is still loading

/*********SETTINGS*********************/
var page = require('webpage').create({viewportSize:{width: 1600,height: 900},
settings:{userAgent:'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
javascriptEnabled:'true',
loadImages:'false'
}});
var fs = require('fs');
/*********SETTINGS END*****************/
console.log('All settings loaded, start with execution');

/**
* These listeners are very important in order to phantom work properly. Using these listeners, we control loadInProgress marker which controls, weather a page is fully loaded.
* Without this, we will get content of the page, even a page is not fully loaded.
*/
page.onLoadStarted = function() {
loadInProgress = true;
console.log('Loading started');
};
page.onLoadFinished = function() {
loadInProgress = false;
console.log('Loading finished');
};
page.onConsoleMessage = function(msg) {
console.log(msg);
};

//Log in to your account, then view the cookie you got, now you can use these cookies to login
// the site will recognize you with your cookies.

//for freebitco.in auth
phantom.cookies = [{// an array of objects
'name' : 'btc_address',
'value' : '1AuMxR6sPtB2Z6TkahSnpmm1H4KpYPBKqe',
'domain' : 'freebitco.in',
'path' : '/',
'httponly' : false,
'secure' : true,
'expires' : (new Date()).getTime() + (1000 * 60 * 60 * 43800) //5 years
},{ 'name' : 'password',
'value' : 'f574ca68a8650d1264d38da4b7687ca3bf631e6dfc59a98c89dd2564c7601f84',
'domain' : 'freebitco.in',
'path' : '/',
'httponly' : false,
'secure' : true,
'expires' : (new Date()).getTime() + (1000 * 60 * 60 * 43800) }]

//Execute steps one by one
page.open("http://parceriascury.housecrm.com.br/parceiro_busca", function(status){
console.log('Step 1 has been completed - we are on the target page!');
setTimeout(step2,5000);// Maybe we don't need to wait here, we can execute step2 immediately.
function step2(){
console.log("Step 2 - Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.");
var result = page.evaluate(function(){ return document.documentElement.outerHTML; });
fs.write('C:\\phantomjs\\logado_cury_10.html',result,'w');
phantom.exit();
}
});

关于javascript - 使用 PhantomJs 登录后如何进入下一页?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/40880785/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com