gpt4 book ai didi

javascript - 使用聚合框架或 MapReduce 将事件数据中的文档嵌入到 MongoDB 中

转载 作者:行者123 更新时间:2023-12-02 17:05:20 25 4
gpt4 key购买 nike

我有以下数据结构,并且希望将包含 loc 字段的文档嵌入到包含持续时间的文档中,但前提是时间戳位于时间戳 (ts) 减去父文档的持续时间(以秒为单位)内。这可以通过聚合框架或 map 缩减来实现吗?

{
"_id" : ObjectId("53df2a44e6583c76253c9869"),
"deviceId" : NumberLong(1377700226807),
"ts" : ISODate("2014-08-04T08:37:55.000Z"),
"duration" : NumberLong(1642),
}
{
"_id" : ObjectId("53df2a41e6583c4e243c9869"),
"deviceId" : NumberLong(1377700226807),
"ts" : ISODate("2014-08-04T08:37:53.000Z"),
"loc" : {
"lon" : 5.1101453,
"lat" : 52.0625047
}
}
{
"_id" : ObjectId("53df2a3fe6583c38203c986a"),
"deviceId" : NumberLong(1377700226807),
"ts" : ISODate("2014-08-04T08:37:50.000Z"),
"loc" : {
"lon" : 5.1101297,
"lat" : 52.0625031
}
}
{
"_id" : ObjectId("53df2a44e6583c76253c9869"),
"deviceId" : NumberLong(1377700226807),
"ts" : ISODate("2014-08-04T06:37:55.000Z"),
"duration" : NumberLong(3600),
}
{
"_id" : ObjectId("53df2a38e6583c03253c9869"),
"deviceId" : NumberLong(1377700226807),
"ts" : ISODate("2014-08-04T06:37:44.000Z"),
"loc" : {
"lon" : 5.1101176,
"lat" : 52.0625171
}
}
{
"_id" : ObjectId("53df2a33e6583c51243c9869"),
"deviceId" : NumberLong(1377700226807),
"ts" : ISODate("2014-08-04T06:37:38.000Z"),
"loc" : {
"lon" : 5.1101409,
"lat" : 52.0625818
}
}
{
"_id" : ObjectId("53df2a2de6583c38203c9869"),
"deviceId" : NumberLong(1377700226807),
"ts" : ISODate("2014-08-04T06:37:32.000Z"),
"loc" : {
"lon" : 5.1099513,
"lat" : 52.0624157
}
}

这是所需的格式

{
"_id" : ObjectId("53df2a44e6583c76253c9869"),
"deviceId" : NumberLong(1377700226807),
"ts" : ISODate("2014-08-04T08:37:55.000Z"),
"duration" : NumberLong(1642),
"data" : [
{
"ts" : ISODate("2014-08-04T08:37:53.000Z"),
"loc" : {
"lon" : 5.1101453,
"lat" : 52.0625047
}
},
{
"ts" : ISODate("2014-08-04T08:37:50.000Z"),
"loc" : {
"lon" : 5.1101297,
"lat" : 52.0625031
}
}
]
}

最佳答案

使用聚合框架不容易做到这一点,但可以使用 MapReduce 来完成。

假设数据是通过属性收集的(即,不存在缺少具有“loc”值的文档的持续时间的“ride”文档),您可以这样做:

map=function () {
var startTime;
if (this.hasOwnProperty("duration"))
startTime=this.ts-this.duration*1000;
else
startTime=this.ts;
emit(this.deviceId, {startTs:new Date(startTime), endTs:this.ts, loc:this.loc, duration:this.duration});
}

Map 以标准化格式输出内容,reduce 将它们全部分组到每个 deviceId 的单个数组中。

reduce=function (key,values) {
var result = { vals : [ ] };
values.forEach(function(v) {
result.vals.push(v);
})
return result;
}

所有实际处理(对每个 deviceId 进行分组)都发生在 finalize 函数中,该函数为每个 deviceId 获取一个数组,对其进行排序,然后分组到您所在的文档中期待。

finalize=function (key, value) {
var lastI=-1;
var result = {rides: [ ] };
var ride = { };
value.vals.sort(function(a,b) { return a.startTs.getTime() - b.startTs.getTime(); } );

for (i=0; i<value.vals.length; i++) {
if (value.vals[i].loc == null ) {
if (ride.hasOwnProperty("locations")) {
result.rides.push(ride);
ride={};
}
ride["start"]=value.vals[i].startTs;
ride["end"]=value.vals[i].endTs;
ride["duration"]=value.vals[i].duration;
ride["locations"]=[];
lastI=i;
} else {
ride.locations.push({ loc: value.vals[i].loc, ts: value.vals[i].endTs});
}
}
result.rides.push(ride);
return result;
}

我在您的测试数据中添加了几个 deviceId:

db.rides.find({},{_id:0})
{ "deviceId" : NumberLong("1377700226807"), "ts" : ISODate("2014-08-04T06:37:32Z"), "loc" : { "lon" : 5.1099513, "lat" : 52.0624157 } }
{ "deviceId" : NumberLong("1377700226910"), "ts" : ISODate("2014-08-04T06:37:32Z"), "loc" : { "lon" : 5.1099513, "lat" : 52.0624157 } }
{ "deviceId" : NumberLong("1377700226807"), "ts" : ISODate("2014-08-04T06:37:38Z"), "loc" : { "lon" : 5.1101409, "lat" : 52.0625818 } }
{ "deviceId" : NumberLong("1377700226910"), "ts" : ISODate("2014-08-04T06:37:38Z"), "loc" : { "lon" : 5.1101409, "lat" : 52.0625818 } }
{ "deviceId" : NumberLong("1377700226807"), "ts" : ISODate("2014-08-04T06:37:44Z"), "loc" : { "lon" : 5.1101176, "lat" : 52.0625171 } }
{ "deviceId" : NumberLong("1377700226910"), "ts" : ISODate("2014-08-04T06:37:44Z"), "loc" : { "lon" : 5.1101176, "lat" : 52.0625171 } }
{ "deviceId" : NumberLong("1377700226807"), "ts" : ISODate("2014-08-04T06:37:55Z"), "duration" : NumberLong(3600) }
{ "deviceId" : NumberLong("1377700226910"), "ts" : ISODate("2014-08-04T06:37:55Z"), "duration" : NumberLong(3600) }
{ "deviceId" : NumberLong("1377700226807"), "ts" : ISODate("2014-08-04T08:37:50Z"), "loc" : { "lon" : 5.1101297, "lat" : 52.0625031 } }
{ "deviceId" : NumberLong("1377700226908"), "ts" : ISODate("2014-08-04T08:37:50Z"), "loc" : { "lon" : 5.1101297, "lat" : 52.0625031 } }
{ "deviceId" : NumberLong("1377700226807"), "ts" : ISODate("2014-08-04T08:37:53Z"), "loc" : { "lon" : 5.1101453, "lat" : 52.0625047 } }
{ "deviceId" : NumberLong("1377700226908"), "ts" : ISODate("2014-08-04T08:37:53Z"), "loc" : { "lon" : 5.1101453, "lat" : 52.0625047 } }
{ "deviceId" : NumberLong("1377700226807"), "ts" : ISODate("2014-08-04T08:37:55Z"), "duration" : NumberLong(1642) }
{ "deviceId" : NumberLong("1377700226908"), "ts" : ISODate("2014-08-04T08:37:55Z"), "duration" : NumberLong(1642) }

并通过 MR 运行它

db.rides.mapReduce(map, reduce, {out:"newrides", finalize:finalize})
{
"result" : "frides",
"timeMillis" : 47,
"counts" : {
"input" : 14,
"emit" : 14,
"reduce" : 3,
"output" : 3
},
"ok" : 1
}

结果是:

db.newrides.find().pretty()
{
"_id" : NumberLong("1377700226807"),
"value" : {
"rides" : [
{
"start" : ISODate("2014-08-04T05:37:55Z"),
"end" : ISODate("2014-08-04T06:37:55Z"),
"duration" : NumberLong(3600),
"locations" : [
{
"loc" : {
"lon" : 5.1099513,
"lat" : 52.0624157
},
"ts" : ISODate("2014-08-04T06:37:32Z")
},
{
"loc" : {
"lon" : 5.1101409,
"lat" : 52.0625818
},
"ts" : ISODate("2014-08-04T06:37:38Z")
},
{
"loc" : {
"lon" : 5.1101176,
"lat" : 52.0625171
},
"ts" : ISODate("2014-08-04T06:37:44Z")
}
]
},
{
"start" : ISODate("2014-08-04T08:10:33Z"),
"end" : ISODate("2014-08-04T08:37:55Z"),
"duration" : NumberLong(1642),
"locations" : [
{
"loc" : {
"lon" : 5.1101297,
"lat" : 52.0625031
},
"ts" : ISODate("2014-08-04T08:37:50Z")
},
{
"loc" : {
"lon" : 5.1101453,
"lat" : 52.0625047
},
"ts" : ISODate("2014-08-04T08:37:53Z")
}
]
}
]
}
}
{
"_id" : NumberLong("1377700226908"),
"value" : {
"rides" : [
{
"start" : ISODate("2014-08-04T08:10:33Z"),
"end" : ISODate("2014-08-04T08:37:55Z"),
"duration" : NumberLong(1642),
"locations" : [
{
"loc" : {
"lon" : 5.1101297,
"lat" : 52.0625031
},
"ts" : ISODate("2014-08-04T08:37:50Z")
},
{
"loc" : {
"lon" : 5.1101453,
"lat" : 52.0625047
},
"ts" : ISODate("2014-08-04T08:37:53Z")
}
]
}
]
}
}
{
"_id" : NumberLong("1377700226910"),
"value" : {
"rides" : [
{
"start" : ISODate("2014-08-04T05:37:55Z"),
"end" : ISODate("2014-08-04T06:37:55Z"),
"duration" : NumberLong(3600),
"locations" : [
{
"loc" : {
"lon" : 5.1099513,
"lat" : 52.0624157
},
"ts" : ISODate("2014-08-04T06:37:32Z")
},
{
"loc" : {
"lon" : 5.1101409,
"lat" : 52.0625818
},
"ts" : ISODate("2014-08-04T06:37:38Z")
},
{
"loc" : {
"lon" : 5.1101176,
"lat" : 52.0625171
},
"ts" : ISODate("2014-08-04T06:37:44Z")
}
]
}
]
}
}

关于javascript - 使用聚合框架或 MapReduce 将事件数据中的文档嵌入到 MongoDB 中,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/25294025/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com