gpt4 book ai didi

javascript - CasperJS 循环或遍历多个网页?

转载 作者:可可西里 更新时间:2023-11-01 01:36:31 24 4
gpt4 key购买 nike

我有一个 CasperJS 脚本,可以从一个网页上抓取评级和日期。现在我想从同一网站下的多个页面中抓取相同的数据。给定此代码,我如何遍历不同的子页面:

var ratings = [];
var dates = [];
var casper = require('casper').create({

pageSettings: {
loadImages: false,
loadPlugins: false
},
logLevel: "debug",
verbose: true
});

var fs = require('fs');

function getRatings() {
var ratings = document.querySelectorAll('#BVRRRatingOverall_Review_Display > div.BVRRRatingNormalImage > img');
return Array.prototype.map.call(ratings, function(e) {
return e.getAttribute('title');
});
}

function getDate() {
var dates = document.querySelectorAll('#BVSubmissionPopupContainer > div.BVRRReviewDisplayStyle5Header > div.BVRRReviewDateContainer > span.BVRRValue.BVRRReviewDate');

return Array.prototype.map.call(dates, function(e) {

return e.innerHTML;

});
}

casper.start('http://www.t-mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/product/1/598aea53-16d0-4c12-b53a-105157092c52.htm', function(){

this.echo('hi');
});

casper.then(function() {

ratings = this.evaluate(getRatings);
dates = this.evaluate(getDate);

this.echo(ratings);
});


casper.run(function() {

this.echo(ratings.length + ' ratings found:');

for(var i=0; i<ratings.length; i++){
ratings[i] = ratings[i]+': '+dates[i];
dates[i] = '';
}
this.echo(ratings);
var content = ratings;

content = content.join("\n");

fs.write("C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt", content, 'w');

this.echo(dates.length + ' dates found:').exit();



});

感谢任何帮助:)

最佳答案

由于存在下一页按钮,您可以使用它递归遍历所有页面:

function getRatingsAndWrite(){
ratings = casper.evaluate(getRatings);
dates = casper.evaluate(getDate);

casper.echo(ratings);
casper.echo(ratings.length + ' ratings found:');

for(var i=0; i<ratings.length; i++){
ratings[i] = ratings[i]+': '+dates[i];
dates[i] = '';
}
casper.echo(ratings);
var content = ratings;

content = content.join("\n");

fs.write("C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt", content, 'a');

casper.echo(dates.length + ' dates found:');

var nextLink = ".BVRRPageLink.BVRRNextPage > a";
if (casper.visible(nextLink)) {
casper.thenClick(nextLink);
casper.then(getRatingsAndWrite);
} else {
casper.echo("END")
}
}

casper.start('http://www.t-mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/product/1/598aea53-16d0-4c12-b53a-105157092c52.htm');

casper.then(getRatingsAndWrite);

casper.run();

一个相关的答案是A: CasperJS parse next page after button click .

关于javascript - CasperJS 循环或遍历多个网页?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23384963/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com