gpt4 book ai didi

javascript - 上传 Node 请求-响应响应到 MongoDB

转载 作者:行者123 更新时间:2023-11-30 20:57:08 25 4
gpt4 key购买 nike

我正在 Node.js 中使用 Cheerio。我有一个抓取文章列表的抓取器,抓取所有文章的 URLS,然后转到每篇文章并抓取标题和 url。一切正常,除了当我尝试将结果更新到我的 Mongodb 时,我得到了 undefined。

我假设它在定义值之前尝试更新插入...但即使使用请求-响应我也无法让它工作。任何帮助将不胜感激!由于代码不是太长,我只是粘贴整个代码,以便更容易看到我正在尝试做什么。同样,主要问题是让 upsertArticle 实际更新插入变量。

const request = require('request');
const cheerio = require('cheerio');
const rp = require('request-promise');
const mongoose = require('mongoose');
const Article = require('./models/article');

var urls = [];
//get the list of articles to scrape
rp('https://www.somesite.com/', function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('.c-entry-box--compact__title').each(function(i, element) {
var a = $(this);
urls.push(a.children().attr('href'));
}); } })
//scrape over each article individually
.then(function(getStuff) {
var arrayLength = urls.length;
//get the list of articles to scrape and upsert each one
for (var i = 0; i < arrayLength; i++) {
const result = rp(urls[i], function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var parsedResults = [];
$('.l-main-content').each(function(n, element) {
var a = $(this);
var title = a.find('.c-page-title').text();
var url = response.request.uri.href;
//I also tried upserting the variables right here, that didn't work
return { title, url };
});
} else {console.log(error);}
}).then(function(upsertStuff) {
//also tried returning and upserting stuff here... but nothing gets upserted
upsertArticle({
title: result.title,
source: result.url,
dateCrawled: new Date()
});
console.log('Upserted ' + result.title);
}).catch(function(err) {console.log(err); }); }
}) .catch(function(err) {console.log(err); });

function upsertArticle(userObj) {
const DB_URL = 'mongodb://localhost/articles';
if (mongoose.connection.readyState == 0) {
mongoose.connect(DB_URL, {
useMongoClient: true
});
}
let conditions = {
title: userObj.title
};
let options = {
upsert: true,
new: true,
setDefaultsOnInsert: true
};
Article.findOneAndUpdate(conditions, userObj, options, (err, result) => {
if (err) throw err;
});
}

最佳答案

我对提供的代码做了一些修改。也就是说,我使用 promises 而不是回调来让您的逻辑保持一致并确保一切都在应有的时间运行。

对于 for 循环,我将 upsertArticle({...}) 移回到 each 函数内部,所以titleurl 是在运行时定义的。

最后,我使用 Bluebird 的 Promise.all(request-promise 已经依赖于 Bluebird)在所有链接都已更新时发出信号。此更改是可选的,但我认为在完成所有操作后获得反馈会很有用:

试一试:

const request = require('request');
const cheerio = require('cheerio');
const rp = require('request-promise');
const mongoose = require('mongoose');
const Article = require('./models/article');
const Promise = require("bluebird");

var urls = [];

rp({uri: 'https://www.somesite.com', resolveWithFullResponse: true}).then(function(response) {

if(response.statusCode != 200) throw "Response: " + response.statusCode;

var html = response.body;

var $ = cheerio.load(html);

$('.c-entry-box--compact__title').each(function(i, element) {
var a = $(this);
urls.push(a.children().attr('href'));
});

}).then(function(getStuff) {

var arrayLength = urls.length;
var promiseArray = [];

for(var i = 0; i < arrayLength; i++) {

const p = rp({uri: urls[i], resolveWithFullResponse: true}).then(function(response) {

if(response.statusCode != 200) throw "Response: " + response.statusCode;

var html = response.body;

var $ = cheerio.load(html);
var parsedResults = [];

$('.l-main-content').each(function(n, element) {

var a = $(this);
var title = a.find('.c-page-title').text();
var url = response.request.uri.href;

upsertArticle({
title: title,
source: url,
dateCrawled: new Date()
});

console.log('Upserted ' + title);
});

});

promiseArray.push(p);
}

return Promise.all(promiseArray);

}).then(function() {
console.log("Done upserting!");
})
.catch(function(err) {
console.log(err);
});

function upsertArticle(userObj) {
const DB_URL = 'mongodb://localhost/articles';
if (mongoose.connection.readyState == 0) {
mongoose.connect(DB_URL, {
useMongoClient: true
});
}
let conditions = {
title: userObj.title
};
let options = {
upsert: true,
new: true,
setDefaultsOnInsert: true
};
Article.findOneAndUpdate(conditions, userObj, options, (err, result) => {
if (err) throw err;
});
}

我无法在不知道 https://www.somesite.com 的真实值的情况下测试代码,所以如果代码给您带来任何新错误,请告诉我。

关于javascript - 上传 Node 请求-响应响应到 MongoDB,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47536596/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com