gpt4 book ai didi

javascript - 在 phantomjs 中使用策略模式

转载 作者:行者123 更新时间:2023-12-03 06:07:47 25 4
gpt4 key购买 nike

我正在尝试在我的爬虫中实现策略模式,我认为使用不同的策略来爬行不同的网站会很巧妙。因此,我希望 page.evaluate 中的内容根据当前运行的网站而有所不同。 page.evaluate 中的注释代码可以工作,但是有没有办法可以将其提取到函数中?我尝试运行 this.findJobs() 但没有成功。

"use strict";

var Crawler = function() {
this.page = require('webpage').create();
this.website = "";
this.jobs_list = [];

};

Crawler.prototype.setStrategy = function(company) {
this.website = company;
};

Crawler.prototype.findJobData = function() {
return this.website.findJobData();
};

Crawler.prototype.collectJobData = function() {
var page = require('webpage').create();
page.onConsoleMessage = function(msg) { console.log(msg) };

page.open('URL', function (status) {
page.includeJs("https://ajax.googleapis.com/ajax/libs/jquery/3.1.0/jquery.min.js", function() {
var temp_jobs = page.evaluate(this.findJobs());

/*
var jobs = [];
var job;
$('ul.job-list').each(function(){
$(this).find('li').each(function(){
var job_link = $(this).find('a');
var url = "URL" + job_link.attr("href");
var location = $(this).find('span').text();

job = {title: job_link.text(), url: url, location: location, description: ""}
jobs.push(job);
console.log(job.title, job.url, job.location);
})
});
return jobs;*/
console.log(temp_jobs[0].title)

phantom.exit(0);
});
});

};

var strategy_a = function() {

this.findJobs = function() {
var jobs = [];
var job;
$('ul.job-list').each(function(){
$(this).find('li').each(function(){
var job_link = $(this).find('a');
var url = "URL" + job_link.attr("href");
var location = $(this).find('span').text();

job = {title : job_link.text(), url : url, location : location, description : ""};
jobs.push(job);
console.log(job.title, job.url, job.location);
})
});
return jobs;
};
};


var strategy_a = new strategy_a();
var crawler = new Crawler();

crawler.setStrategy(strategy_a);
crawler.collectJobData();

最佳答案

您有两个问题:

  • 您打算使用page.evaluate(this.findJobs);而不是page.evaluate(this.findJobs());

  • this page.includeJs里面回调不是对 Crawler 的引用实例。

这应该有效:

Crawler.prototype.collectJobData = function() {
var page = this.page;
var self = this;
page.onConsoleMessage = function(msg) { console.log(msg) };

page.open('URL', function (status) {
page.includeJs("https://ajax.googleapis.com/ajax/libs/jquery/3.1.0/jquery.min.js", function() {
var temp_jobs = page.evaluate(self.website.findJobs);
console.log(temp_jobs[0].title)

phantom.exit(0);
});
});
};

请注意,您生成了多个页面但没有使用所有页面,因此我删除了第二个 require('webpage').create() .

关于javascript - 在 phantomjs 中使用策略模式,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39458036/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com