gpt4 book ai didi

node.js - 如何返回 mongoDB 数组中值的最大出现次数

转载 作者:行者123 更新时间:2023-12-03 01:07:42 25 4
gpt4 key购买 nike

我在 mongodb 中有一个数组:我想要给定数组中出现的最大 devDependencyList

[{
"_id" : 0,
"repoId" : 460078,
"devDependenciesList" : [
"value1",
"value2",
"value3",
"value4"
]
},{
"_id" : 1,
"repoId" : 1232,
"devDependenciesList" : [
"value1",
"value4",
"value7",
"value93"
]
},{
"_id" : 2,
"repoId" : 5423,
"devDependenciesList" : [
"value1",
"value23",
"value3",
"value4"
]
}]

输出应该是:

[value1:3,value4:3,value3:2]

最佳答案

基本上你需要$unwind数组内容,然后 $group每个值作为分组键,带有 $sum计数:

db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}}
])

这将返回:

{ "_id" : "value23", "count" : 1 }
{ "_id" : "value93", "count" : 1 }
{ "_id" : "value7", "count" : 1 }
{ "_id" : "value2", "count" : 1 }
{ "_id" : "value3", "count" : 2 }
{ "_id" : "value1", "count" : 3 }
{ "_id" : "value4", "count" : 3 }

这就是基本数据,但如果您确实想要“键/计数”形式,您可以这样做:

db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$sort": { "count": -1 } },
{ "$group": {
"_id": null,
"items": { "$push": { "k": "$_id", "v": "$count" } }
}},
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$items" }
}}
])

这将返回:

{
"value1" : 3,
"value4" : 3,
"value3" : 2,
"value23" : 1,
"value93" : 1,
"value7" : 1,
"value2" : 1
}

额外的$group$push将所有结果收集到一个文档中,其中包含一个以 "k""v" 元素命名的数组。您需要将此表格用于 $arrayToObject接下来使用的运算符 $replaceRoot返回最终输出的阶段。

您需要一个支持后面这些运算符的 MongoDB 版本,但实际上您并不支持。这实际上在客户端代码中最有效地完成。比如在 shell 中使用 JavaScript:

db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$sort": { "count": -1 } }
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})

这会产生与上面相同的结果。

当然,如果您想排除所有单一结果或类似结果,只需添加 $match $group之后:

db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})

或者使用 Node native 驱动程序,如下所示:

let result = (await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray()).reduce((o,{ _id, count }) => ({ ...o, [_id]: count }),{})

考虑到在返回实际数组时使用 async/await 以及使用 ES6 功能(例如对象扩展和解构)。

这当然只是:

{ "value1" : 3, "value4" : 3, "value3" : 2 }

仅供引用,这是一个完全可复制的列表:

const { MongoClient } = require('mongodb');

const uri = 'mongodb://localhost:27017';
const opts = { useNewUrlParser: true };

const data = [
{
"_id" : 0,
"repoId" : 460078,
"devDependenciesList" : [
"value1",
"value2",
"value3",
"value4"
]
},{
"_id" : 1,
"repoId" : 1232,
"devDependenciesList" : [
"value1",
"value4",
"value7",
"value93"
]
},{
"_id" : 2,
"repoId" : 5423,
"devDependenciesList" : [
"value1",
"value23",
"value3",
"value4"
]
}
];

const log = data => console.log(JSON.stringify(data, undefined, 2));

(async function() {

let client;

try {
client = await MongoClient.connect(uri, opts);

const db = client.db('test');

// Clean data
await db.collection('collection').deleteMany();

// Insert data
await db.collection('collection').insertMany(data);

let result = (await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray()).reduce((o, { _id, count }) => ({ ...o, [_id]: count }),{});

log(result);

let sample = await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$sortByCount": "$devDependenciesList" },
],{ "explain": true }).toArray();

log(sample);

} catch(e) {
console.error(e);
} finally {
if (client)
client.close();
}

})()

显示预期结果的输出和“解释”输出表明 $sortByCount 不是“真正的”聚合阶段,只是一种更短的方式来输入 MongoDB 中早先存在的内容2.2:

{
"value1": 3,
"value4": 3,
"value3": 2
}
[
{
"stages": [
{
"$cursor": {
"query": {},
"fields": {
"devDependenciesList": 1,
"_id": 0
},
"queryPlanner": {
"plannerVersion": 1,
"namespace": "test.collection",
"indexFilterSet": false,
"parsedQuery": {},
"winningPlan": {
"stage": "COLLSCAN",
"direction": "forward"
},
"rejectedPlans": []
}
}
},
{
"$unwind": {
"path": "$devDependenciesList"
}
},
{
"$group": {
"_id": "$devDependenciesList",
"count": {
"$sum": {
"$const": 1
}
}
}
},
{
"$sort": {
"sortKey": {
"count": -1
}
}
}
],
"ok": 1,
"operationTime": "6674186995377373190",
"$clusterTime": {
"clusterTime": "6674186995377373190",
"signature": {
"hash": "AAAAAAAAAAAAAAAAAAAAAAAAAAA=",
"keyId": 0
}
}
}
]

关于node.js - 如何返回 mongoDB 数组中值的最大出现次数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55431199/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com