gpt4 book ai didi

javascript - 如何在 NodeWebkit 的 html 代码中访问从 Node 模块抓取的数据

转载 作者:太空宇宙 更新时间:2023-11-04 00:46:24 26 4
gpt4 key购买 nike

我正在尝试使用 NodeWebkit 创建一个应用程序。我正在使用 node-phantom-simple 模块抓取内容。使用该模块,我能够从网站上抓取内容。但我应该如何在 html 端访问它。我认为我无法为这种情况创建休息服务。这是代码示例:

var file = require('file.js');
var gui = require('nw.gui');

var menu = new gui.Menu({ type: 'menubar' });

var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();

var driver = require('node-phantom-simple');



url = 'http://www.espncricinfo.com/';

request(url, function(error, response, html){
if(!error){
var $ = cheerio.load(html);

var title, release, rating;
var json = { title : "", release : "", rating : ""};

$('.scoreline-list').first().filter(function(){
var data = $(this);
var numOfMatches = data.children().length;
console.log("Number of Matches: ",numOfMatches);
var matches=[];

//GET URL FOR EACH MATCH
for(x=0;x<numOfMatches;x++)
{
var lielem = data.children().eq(x);
matches[x] = "http://www.espncricinfo.com" + lielem.children().first().attr('href');
$('#editor').val(matches[x]);
console.log(matches[x]);

}

//FOR EACH MATCH URL
for(x=0;x<numOfMatches;x++)
{
var matchurl = matches[x];
//console.log(matchurl);
driver.create({ path: require('phantomjs').path }, function (err, browser) {
return browser.createPage(function (err, page) {
return page.open(matchurl, function (err,status) {
console.log("opened site? ", status);
page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function (err) {
// jQuery Loaded.
// Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
setTimeout(function () {
return page.evaluate(function () {
//Get what you want from the page using jQuery. A good way is to populate an object with all the jQuery commands that you need and then return the object.
var h2Arr = [];

$('.innings-information').each(function () { h2Arr.push($(this).html()); });

return {
h2: h2Arr
};
}, function (err,result) {
console.log(result);
browser.exit();
});
}, 5000);
});
});
});
});
setTimeout(function(){
//waiting for the jquery to load
}, 5000);

} //END FOR LOOP EACH MATCH URL




})
}
})

感谢您的帮助!

最佳答案

为什么你不能为此创建一个休息端点?只需缓存每个抓取操作的结果,然后在 http 端点上返回缓存。

var cache={};

app.get('/myendpoint', function(req, res) {
res.json(cache);
})

request(url, function(error, response, html){
...
setTimeout(function () {
...
console.log(result);
//set cache here
cache=result;
browser.exit();
});
}, 5000);
});

app.listen(1338);

如果要缓存更新,请将抓取函数包装在 setInterval 中。

var cache={};

app.get('/myendpoint', function(req, res) {
res.json(cache);
})

function updateCache() {
request(url, function(error, response, html){
...
setTimeout(function () {
...
console.log(result);
//set cache here
cache=result;
browser.exit();
});
}, 5000);
});
}

//Update cache every 60 secs.
setInterval(updateCache, 60000);

app.listen(1338);

关于javascript - 如何在 NodeWebkit 的 html 代码中访问从 Node 模块抓取的数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34563370/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com