gpt4 book ai didi

mongodb - 在具有索引的字段上使用 $exists 和 mongodb 的慢查询行为

转载 作者:可可西里 更新时间:2023-11-01 09:08:11 24 4
gpt4 key购买 nike

我一直在使用 mongo 3.2.9 安装进行一些实时数据调查。主要的难点是找出文档中缺失数据的记录的一些细节。但是我运行的查询在 robomongo 和 compass 中超时。

我有一个包含超过 300 万条记录的集合 (foo)。我正在搜索所有没有 barId 的记录,这是我在 mongo 上发起的查询:

db.foo.find({barId:{$exists:true}}).explain(true)

这是来自 mongo shell 的执行计划(它在 robomongo 或 compass 中超时)

MongoDB Enterprise > db.foo.find({barId:{$exists:true}}).explain(true)
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDatabase01.foo",
"indexFilterSet" : false,
"parsedQuery" : {
"barId" : {
"$exists" : true
}
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"barId" : {
"$exists" : true
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2,
"executionTimeMillis" : 154716,
"totalKeysExamined" : 3361040,
"totalDocsExamined" : 3361040,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"barId" : {
"$exists" : true
}
},
"nReturned" : 2,
"executionTimeMillisEstimate" : 152060,
"works" : 3361041,
"advanced" : 2,
"needTime" : 3361038,
"needYield" : 0,
"saveState" : 27619,
"restoreState" : 27619,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 3361040,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 3361040,
"executionTimeMillisEstimate" : 1260,
"works" : 3361041,
"advanced" : 3361040,
"needTime" : 0,
"needYield" : 0,
"saveState" : 27619,
"restoreState" : 27619,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 3361040,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "myLinuxMachine",
"port" : 8080,
"version" : "3.2.9",
"gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
},
"ok" : 1
}

它看起来像是在使用我的 barId_1 索引,但同时它扫描所有 300 万条记录只返回 2。

我运行了一个类似的查询,但我没有查找字段的存在,而是查找大于 0 的 ID(所有字段)

MongoDB Enterprise > db.foo.find({barId:{$gt:"0"}}).explain(true)
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDatabase01.foo",
"indexFilterSet" : false,
"parsedQuery" : {
"barId" : {
"$gt" : "0"
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"(\"0\", {})"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2,
"executionTimeMillis" : 54,
"totalKeysExamined" : 2,
"totalDocsExamined" : 2,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 2,
"executionTimeMillisEstimate" : 10,
"works" : 3,
"advanced" : 2,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 2,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 2,
"executionTimeMillisEstimate" : 10,
"works" : 3,
"advanced" : 2,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"(\"1\", {})"
]
},
"keysExamined" : 2,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "myLinuxMachine",
"port" : 8080,
"version" : "3.2.9",
"gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
},
"ok" : 1
}

这再次对 barId_1 进行了索引扫描。它扫描了 2 条记录,返回 2。

为了完整起见,这里只有 2 条记录,其他 300 万条记录的大小和组成非常相似。

MongoDB Enterprise > db.foo.find({barId:{$gt:"0"}})
{
"_id" : "00002f5d-ee4a-4996-bb27-b54ea84df777", "createdDate" : ISODate("2016-11-16T02:26:48.500Z"), "createdBy" : "Exporter", "lastModifiedDate" : ISODate("2016-11-16T02:26:48.500Z"), "lastModifiedBy" : "Exporter", "rolePlayed" : "LA", "roleType" : "T", "oId" : [ "d7316944-62ed-48dc-8ee4-e3bad8c58b10" ], "barId" : "e45b3160-bbb4-24e5-82b3-ad0c28329555", "cId" : "dcc29053-7a1f-439e-9536-fb4e44ff8a51", "timestamp" : "2017-02-20T16:23:15.795Z"
}
{
"_id" : "00002f5d-ee4a-4996-bb27-b54ea84df888", "createdDate" : ISODate("2016-11-16T02:26:48.500Z"), "createdBy" : "Exporter", "lastModifiedDate" : ISODate("2016-11-16T02:26:48.500Z"), "lastModifiedBy" : "Exporter", "rolePlayed" : "LA", "roleType" : "T", "oId" : [ "d7316944-62ed-48dc-8ee4-e3bad8c58b10" ], "barId" : "e45b3160-bbb4-24e5-82b3-ad0c28329555", "cId" : "dcc29053-7a1f-439e-9536-fb4e44ff8a51", "timestamp" : "2017-02-20T16:23:15.795Z"
}

当然,我已经进行了一些谷歌搜索,发现使用索引和 exists 子句曾经存在问题,但在许多线程中,我已经读到这个问题是固定的。是吗?此外,我还发现了以下 Hack,您可以使用它而不是 $exists 子句来在查找字段是否存在时强制“正确”使用索引。

MongoDB Enterprise > db.foo.find({barId:{$ne:null}}).explain(true)
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDatabase01.foo",
"indexFilterSet" : false,
"parsedQuery" : {
"$not" : {
"barId" : {
"$eq" : null
}
}
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$not" : {
"barId" : {
"$eq" : null
}
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"[MinKey, null)",
"(null, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2,
"executionTimeMillis" : 57,
"totalKeysExamined" : 3,
"totalDocsExamined" : 2,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$not" : {
"barId" : {
"$eq" : null
}
}
},
"nReturned" : 2,
"executionTimeMillisEstimate" : 10,
"works" : 4,
"advanced" : 2,
"needTime" : 1,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 2,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 2,
"executionTimeMillisEstimate" : 10,
"works" : 4,
"advanced" : 2,
"needTime" : 1,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"[MinKey, null)",
"(null, MaxKey]"
]
},
"keysExamined" : 3,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "myLinuxMachine",
"port" : 8080,
"version" : "3.2.9",
"gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
},
"ok" : 1
}

这有效,仅扫描了 2 个文档,仅返回了 2 个文档。

我的问题是这样的。我应该在查询中使用 $exists 吗?它是否适合在现场制作应用程序中使用?如果答案是否定的,为什么 $exist 子句一开始就存在?

总是有可能是 mongo 安装有问题,或者索引设计不当。任何光线都会非常受欢迎,但现在我坚持使用 $ne:null hack。

最佳答案

你应该使用 partial index (首选)或 barId 字段的稀疏索引:

db.foo.createIndex(
{ barId: 1 },
{ partialFilterExpression: { barId: { $exists: true } } }
)

关于mongodb - 在具有索引的字段上使用 $exists 和 mongodb 的慢查询行为,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42378355/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com