gpt4 book ai didi

javascript - 使用MongoDb和Node.js来组织代码

转载 作者:行者123 更新时间:2023-11-30 11:22:28 25 4
gpt4 key购买 nike

我有一个Node.js应用程序,它可以进行网络抓取并将一些信息保存在json对象中。
我还将此对象保存到文件中。

现在,我想将这些信息保存到MongoDB数据库中。

这是我的代码的结构:

app.js文件:

var express = require('express');

// my files
var downloaderFirst = require('./routers/downloaderFirst.js');
var downloaderSecond = require('./routers/downloaderSecond.js');

// create app
const app = express();

downloaderFirst.download();
downloaderSecond.download();


./routers/downloaderFirst.js文件:

var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var request = require('request');
var textract = require('textract');

// object of methods
var methods = {};

var url = 'http://www....';

// json object containing data
var jsons = [];

methods.download = function(req, res) {
extractText();
};

function extractText() {
// get text and save it to jsons array and ./output/dataFirst.json file
var thisYear = ...;
var thisObject = ...;
var o = {year: thisYear, object: thisObject};
jsons.push(o);
printOnFile(jsons, './output/dataFirst.json');
}

module.exports = methods;


./routers/downloaderSecondt.js文件:

var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var request = require('request');
var textract = require('textract');

// object of methods
var methods = {};

var url = 'http://www....';

// json object containing data
var jsons = [];

methods.download = function(req, res) {
extractText();
};

function extractText() {
// get text and save it to jsons array and ./output/dataSecond.json file
var thisYear = ...;
var thisColor = ...;
var o = {year: thisYear, color: thisColor};
jsons.push(o);
printOnFile(jsons, './output/dataFirst.json');
}

module.exports = methods;


所以现在我有两个文件( dataFirst.jsondataSecond.json),其中包含一个对象,例如:

dataFirst.json文件:

[{
"year": "2006",
"object": "car"
},
{
"year": "2002",
"object": "car"
},
{
"year": "2006",
"object": "pen"
}, ...];


dataSecond.json文件:

[{
"year": "2006",
"color": "red"
},
{
"year": "2002",
"color": "blue"
},
{
"year": "2006",
"color": "yellow"
}, ...];


我想使用两个称为 firstsecond的不同集合将这些信息保存到MongoDB。
我想我可以做两件事:


从文件中获取这些文档并将其全部保存
一次保存文档。因此,代替创建 dataFirst.jsondataSecond.json,将文档直接保存到数据库的正确集合中。


显然,我使用 npm install mongodb --save命令安装了MongoDb驱动程序。

无论如何,我想要一个函数来连接(并创建,如果不存在)到db,并插入文档。

所以我创建了./data/db.js文件:

var MongoClient = require('mongodb').MongoClient;

var url = 'mongodb://localhost:27017/';

// object of methods
var methods = {};

methods.createDb = function(dbName) {
MongoClient.connect(url + dbName, function(err, db) {
if(err) {
throw err;
}
console.log('Database created!');
db.close();
});
}

methods.createCollection = function(dbName, collectionName) {
MongoClient.connect(url, function(err, db) {
if(err) {
throw err;
}
var dbo = db.db(dbName);
dbo.createCollection(collectionName, function(err, res) {
if(err) {
throw err;
}
console.log('Collection', collectionName, 'created!');
db.close();
});
});
}

methods.insertDoc = function(dbName, collectionName, doc) {
MongoClient.connect(url, function(err, db) {
if(err) {
throw err;
}
var dbo = db.db(dbName);
dbo.collection(collectionName).insertOne(doc, function(err, res) {
if(err) {
throw err;
}
console.log('1 document inserted');
db.close();
});
});
}

methods.insertManyDoc = function(dbName, collectionName, docs) {
MongoClient.connect(url, function(err, db) {
if(err) {
throw err;
}
var dbo = db.db(dbName);
dbo.collection(collectionName).insertMany(docs, function(err, res) {
if(err) {
throw err;
}
console.log('Number of documents inserted: ' + res.insertedCount);
db.close();
});
});
}

methods.insertFromFile = function(dbName, collectionName, filename) {
var objs = JSON.parse(fs.readFileSync(filename, 'utf8'));
this.insertManyDoc(dbName, collectionName, objs);
}

module.exports = methods;


对于解决方案(1),我修改了我的app.js文件:

var express = require('express');

// my files
var downloaderFirst = require('./routers/downloaderFirst.js');
var downloaderSecond = require('./routers/downloaderSecond.js');

var db = require('./data/db.js');
var MongoClient = require('mongodb').MongoClient;

// create app
const app = express();

downloaderFirst.download();
downloaderSecond.download();

var dbName = 'db';
var firstCollectionName = 'first';
var secondCollectionName = 'second';

// create database
db.createDb(dbName);
// create the collections (tables in SQL)
db.createCollection(dbName, firstCollectionName);
db.createCollection(dbName, secondCollectionName);

db.insertFromFile(dbName, firstCollectionName, './output/dataFirst.json');
db.findAll(dbName, firstCollectionName);


问题是,如果我运行该项目两次,则该集合中有重复的文档,因为insert函数不会测试该元素是否存在。

对于解决方案(2),我修改了app.js和./routers/downloaderFirst.js。

app.js文件:

var express = require('express');

// my files
var downloaderFirst = require('./routers/downloaderFirst.js');
var downloaderSecond = require('./routers/downloaderSecond.js');

var db = require('./data/db.js');
var MongoClient = require('mongodb').MongoClient;

// create app
const app = express();

downloaderFirst.download();
downloaderSecond.download();

var dbName = 'db';
var firstCollectionName = 'first';
var secondCollectionName = 'second';

// create database
db.createDb(dbName);
// create the collections (tables in SQL)
db.createCollection(dbName, firstCollectionName);
db.createCollection(dbName, secondCollectionName);


./routers/downloaderFirst.js文件:

var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var request = require('request');
var textract = require('textract');
var db = require('../data/db.js');

// object of methods
var methods = {};

var url = 'http://www....';

// json object containing data
var jsons = [];

methods.download = function(req, res) {
extractText();
};

function extractText() {
// get text and save it to jsons array and ./output/dataFirst.json file
var thisYear = ...;
var thisObject = ...;
var o = {year: thisYear, object: thisObject};
jsons.push(o);
printOnFile(jsons, './output/dataFirst.json');
db.insertDoc('db', 'first', obj);
}

module.exports = methods;


在这种情况下,我有相同的问题(重复的文档),然后出现此错误:


  数据库已创建!收藏集首次创建!收集第二
  创建!插入的文档数:10692
  
  C:... \ node_modules \ mongodb \ lib \ mongo_client.js:792
            犯错
            ^ MongoNetworkError:首次连接时无法连接到服务器[localhost:27017] [MongoNe tworkError:连接
  ECONNREFUSED 127.0.0.1:27017]
      在游泳池。 (C:... \ node_modules \ mongodb-core \ lib \ topologies \ server.js:503:11)
      在emitOne上(events.js:116:13)
      在Pool.emit(events.js:211:7)
      在连接。 (C:... \ node_modules \ mongod b核心\ lib \ connection \ pool.js:326:12)
      在Object.onceWrapper(events.js:317:30)
      在emitTwo(events.js:126:13)
      在Connection.emit(events.js:214:7)
      在套接字。 (C:... \ node_modules \ mongodb-co re \ lib \ connection \ connection.js:245:50)
      在Object.onceWrapper(events.js:315:30)
      在emitOne上(events.js:116:13)


无论如何,我如何修改我的代码以消除重复问题?
每次连接好并关闭连接好吗?
如何改善我的代码?我的意思是组织更好的代码。

我阅读了很多教程,但无法解决自己的问题。
抱歉,如果这很愚蠢,但这是我第一次使用Node.js,MongoDb,也是我第一次进行Web抓取,所以现在我非常困惑。

谢谢!



编辑1

我以这种方式修改代码:

app.js:

var express = require('express');

// my files
var downloaderFirst = require('./routers/downloaderFirst.js');
var downloaderSecond = require('./routers/downloaderSecond.js');

var db = require('./data/db.js');
var MongoClient = require('mongodb').MongoClient;

// create app
const app = express();

var dbName = 'db';

// create database
db.createDb(dbName);

downloaderFirst.download();
downloaderSecond.download();

db.disconnectDb(dbName);


./routers/downloaderFirst.js文件:

var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var request = require('request');
var textract = require('textract');
var db = require('../data/db.js');

// object of methods
var methods = {};

var url = 'http://www....';

// json object containing data
var jsons = [];

methods.download = function(req, res) {
extractText();
};

function extractText() {
// get text and save it to jsons array and ./output/dataFirst.json file
var thisYear = ...;
var thisObject = ...;
var o = {year: thisYear, object: thisObject};
jsons.push(o);
printOnFile(jsons, './output/dataFirst.json');
db.insertDocFirst('db', 'first', obj);
}

module.exports = methods;


db.js在哪里:

var fs = require('fs');
var MongoClient = require('mongodb').MongoClient;

var url = 'mongodb://localhost:27017/';

// object of methods
var methods = {};

methods.createDb = function(dbName) {
MongoClient.connect(url + dbName, function(err, db) {
if(err) {
console.log('createDb', err);
//throw err;
}
console.log('Database created!');
var dbo = db.db(dbName);
var first = dbo.createCollection('first', function(err, res) {
if(err) {
console.log('create collection first', err);
//throw err;
}
console.log('Collection first created!');
/*// because we are searching by name, we need an index! without an index, things can get slow
first.ensureIndex({year: true, object: true}, function(err) {
if(err) {
throw err;
}
});
console.log('Index of collection first created!');*/
});
var second = dbo.createCollection('second', function(err, res) {
if(err) {
console.log('create collection second', err);
//throw err;
}
console.log('Collection second created!');
/*// because we are searching by name, we need an index! without an index, things can get slow
dbo.collection('second').createIndex({year: true, color: true}, function(err) {
if(err) {
console.log('ensureIndex second', err);
//throw err;
}
});
console.log('Index of collection second created!');*/
});
});
}

methods.disconnectDb = function(dbName) {
MongoClient.connect(url, function(err, db) {
if(err) {
console.log('disconnectDb', err)
//throw err;
}
console.log('Disconnected. Bye :)');
db.close();
});
}

methods.insertDocFirst = function(dbName, collectionName, doc) {
MongoClient.connect(url, function(err, db) {
if(err) {
console.log('insertDoc', err); // ** ERROR HERE **
//throw err;
}
var dbo = db.db(dbName);
var selector = {
"year": doc.year,
"color": doc.color
};
dbo.collection(collectionName).update(selector, doc, {upsert: true});
});
}

methods.insertDocSecond = function(dbName, collectionName, doc) {
MongoClient.connect(url, function(err, db) {
if(err) {
console.log('insertDoc', err);
//throw err;
}
var dbo = db.db(dbName);
var selector = {
"year": doc.year,
"object": doc.color
};
dbo.collection(collectionName).update(selector, doc, {upsert: true});
});
}


运行代码时,我得到:

Disconnected. Bye :)
Database created!
insertDoc { MongoNetworkError: failed to connect to server [localhost:27017] on first conn
ect [MongoNetworkError: connect ECONNREFUSED 127.0.0.1:27017]
at Pool.<anonymous> (C:\...\node_modules\mongodb-core
\lib\topologies\server.js:503:11)
at emitOne (events.js:116:13)
at Pool.emit (events.js:211:7)
at Connection.<anonymous> (C:\...\node_modules\mongod
b-core\lib\connection\pool.js:326:12)
at Object.onceWrapper (events.js:317:30)
at emitTwo (events.js:126:13)
at Connection.emit (events.js:214:7)
at Socket.<anonymous> (C:\...\node_modules\mongodb-co
re\lib\connection\connection.js:245:50)
at Object.onceWrapper (events.js:315:30)
at emitOne (events.js:116:13)
name: 'MongoNetworkError',
message: 'failed to connect to server [localhost:27017] on first connect [MongoNetworkEr
ror: connect ECONNREFUSED 127.0.0.1:27017]' }
C:\...\node_modules\mongodb\lib\mongo_client.js:792
throw err;
^

TypeError: Cannot read property 'db' of null
at C:\...\data\db.js:105:16
at err (C:\...\node_modules\mongodb\lib\utils.js:414:
14)
at executeCallback (C:\...\node_modules\mongodb\lib\u
tils.js:403:25)
at C:\Users\...\node_modules\mongodb\lib\mongo_client.js:27
0:21
at connectCallback (C:\...\node_modules\mongodb\lib\m
ongo_client.js:940:5)
at C:\...\node_modules\mongodb\lib\mongo_client.js:78
9:11
at _combinedTickCallback (internal/process/next_tick.js:131:7)
at process._tickCallback (internal/process/next_tick.js:180:9)


显然,我在以这种方式启动MongoDb之前:

mongod -dbpath C:\...\data


问题是什么?



编辑2

非常感谢你的帮助。
我正在尝试修改cose以添加集合的创建。

现在这是我的代码。

app.js:

var express = require('express');

// my files
var downloaderFirst = require('./routers/downloaderFirst.js');
var downloaderSecond = require('./routers/downloaderSecond.js');

var db = require('./data/db.js');

// create app
const app = express();

downloaderFirst.download();
downloaderSecond.download();


db.js:

var MongoClient = require('mongodb').MongoClient;
var url = 'mongodb://localhost:27017/';
let dbInstance;

// object of methods
var methods = {};

const connectDb = function(dbName, cb) {
if(dbInstance) {
return cb(dbInstance);
}
else {
MongoClient.connect(url + dbName, function(err, db) {
if(!err) {
dbInstance = db;
return cb(db);
}
});
}
}

methods.insertFirst = function(dbName, collectionName, doc) {
connectDb(dbName, function(db) {
var dbo = db.db(dbName);
var selector = {
year: doc.year,
color: doc.color
};
dbo.collection(collectionName).update(selector, doc, {upsert: true});
});
}

methods.insertSecond = function(dbName, collectionName, doc) {
connectDb(dbName, function(db) {
var dbo = db.db(dbName);
var selector = {
year: doc.year,
object: doc.object
};
dbo.collection(collectionName).update(selector, doc, {upsert: true});
});
}


./routers/downloaderFirst.js:

var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var request = require('request');
var textract = require('textract');
var db = require('../data/db.js');

// object of methods
var methods = {};

var url = 'http://www....';

// json object containing data
var jsons = [];

methods.download = function(req, res) {
extractText();
};

function extractText() {
// get text and save it to jsons array and ./output/dataFirst.json file
var thisYear = ...;
var thisObject = ...;
var o = {year: thisYear, object: thisObject};
jsons.push(o);
printOnFile(jsons, './output/dataFirst.json');
db.insertFirst('db', 'first', obj);
}

module.exports = methods;


当我跑步时,我得到:

(node:5708) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 1
): MongoError: BSON field 'update.updates.q' is the wrong type 'array', expected type 'obj
ect'
(node:5708) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In
the future, promise rejections that are not handled will terminate the Node.js process wit
h a non-zero exit code.
(node:5708) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 2
): MongoError: BSON field 'update.updates.q' is the wrong type 'array', expected type 'obj
ect'
(node:5708) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 3
): MongoError: BSON field 'update.updates.q' is the wrong type 'array', expected type 'obj
ect'
^C


然后,我可以使用Compass来分析我的数据库。
我看到创建了集合,但是并没有所有应该存在的文档。
“第二”集合中应该有10692个文档,“第一”集合中应该有16102个文档,但是“第二”集合中应该有2398个文档,而“第一”集合中应该有933个文档。

然后,我考虑了如何(以及何时)调用db.close()。
问题是我的应用程序应该进行网络抓取,将信息保存在db上,并使用d3.js可视化数据。
现在,我想在应用程序完成时将db.close()保存到db上。

如果节点是同步的,我会做类似的事情:

app.js:

var express = require('express');

// my files
var downloaderFirst = require('./routers/downloaderFirst.js');
var downloaderSecond = require('./routers/downloaderSecond.js');

var db = require('./data/db.js');

// create app
const app = express();

// web scraping and saving on db
downloaderFirst.download();
downloaderSecond.download();

// close connection
db.disconnectDb();


在db.js中的disconnectDb():

methods.disconnectDb = function(dbName) {
connectDb(dbName, function(db) {
var dbo = db.db(dbName);
db.close();
});
}


但这行不通。
那么,有什么建议吗?这是我第一次使用Node和Mongo,虽然我读了很多书,但是却找不到完整的(也不是非常简单的)示例来说明如何构建与“ Hello World!”不同的应用程序。

谢谢

最佳答案

NodeJ具有异步特性。它具有单线程事件驱动的体系结构设置。因此,在每个函数调用中连接和断开数据库连接时,可能是某些其他资源或函数正在尝试执行相同的操作。

示例-function A已创建mongodb连接,并且还有其他一些功能也在尝试执行相同操作。因此,发生此错误的方式与您面临的情况相同。

解决方案-在启动应用程序时维护一个文件,用于在其中连接数据库。当完整的应用程序将要停止或您确定应用程序将终止时,只需断开数据库连接即可。这样,在race-aroundopen上将没有close条件。

编辑-1

我将根据您编辑的代码再添加几行。


app.js文件包含以下几行

1. db.createDb(dbName); //this is an asynchronous operation

2. downloaderFirst.download(); //this is a route (asynchronous) as per your code base
3. downloaderSecond.download(); //this is a route (asynchronous) as per your code base

4. db.disconnectDb(dbName); // this is an asynchronous operation too



现在行号由于Nodejs事件驱动的体系结构,第4行不会等待执行第1,2,3行。因此,当您运行此代码时, MongoDBconnecteddisconnected同时基于 event-loop彼此独立。


downloaderFirst.js具有此功能

function extractText() {
var thisYear = ...;
var thisObject = ...;
var o = {year: thisYear, object: thisObject};
jsons.push(o);
printOnFile(jsons, './output/dataFirst.json');
db.insertDocFirst('db', 'first', obj); /*********HERE*****/
}


请查看 HERE标记的行。这又是异步的,不等待 MongoDb connect。如果调用 extractText函数,它将开始执行。
db.js文件几乎没有问题。


在这里,您要在每个操作上连接数据库,这同时具有多个连接请求的缺点。因此,可以通过这种方式完成。

创建一个名为 connectDb的函数。它返回连接的 db实例。然后将其用于其他数据库操作,例如这种方式。

let dbInstance;
const connectDb = function(dbName,cb){
if(dbInstance){
return cb(dbInstance);
}else{
MongoClient.connect(url + dbName,function(err,db){
if(!err){
dbInstance = db;
return cb(db);
}
})

}
}


因此,使用此功能连接 MongoDB。并编写其他类似的操作。

methods.insertDocSecond = function(dbName, collectionName, doc) {
connectDb(dbName,function(db){
var dbo = db.db(dbName);
var selector = {
"year": doc.year,
"object": doc.color
};
dbo.collection(collectionName).update(selector, doc, {upsert: true});
})
}


这样,我们可以确保数据库仅连接一次,其余时间仅通过引用进行连接。

最后但并非最不重要的一点是,请确保以正确的方式调用 close连接,以便在某些其他函数忙于执行某些数据库操作时不会异步调用该连接。

编辑2

Update函数同时返回promise和callback。对于您当前的情况,您的 update操作返回了一些未处理的错误。使用回调函数正确捕获错误并查看错误所在。

无论如何,我发现您对如何使用 NodejsMongoDB有一些理解上的问题,而不是陷入StackOverflow所针对的问题。因此,我必须建议您仔细阅读一些在线资料和资源以查找了解如何与Nodejs和MongoDB一起使用,然后如果遇到任何特定问题,社区将很乐意为您提供帮助。
谢谢

关于javascript - 使用MongoDb和Node.js来组织代码,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49298010/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com