gpt4 book ai didi

mongodb - 为什么要计数查询抓取文件?

转载 作者:可可西里 更新时间:2023-11-01 09:16:21 25 4
gpt4 key购买 nike

我有一个像这样的 MongoDB 文档:

  • objectId集合数组
  • 一个 bool 用于草稿
  • 一个 bool 值表示已删除

示例:

"_id" : "55689be772ba931a30c87fd8",
"Draft" : false,
"Deleted" : false,
"productsId" : [
ObjectId("55688d7a72ba931bf430edf5"),
ObjectId("55688d7a72ba931bf430edf8"),
ObjectId("55688d7a72ba931bf430edf0"),
ObjectId("55688d7a72ba931bf430edee")
]

我有一个索引:

db.getCollection("mycolection").createIndex({ "Deleted": 1, "Draft": 1, "productsId": 1 }, { "name": "_deleted_draft_productsId" })

为什么当我执行这个查询时:

db.mycolection.explain("executionStats").count({productsId: ObjectId('55688d7a72ba931bf430edf4'),Draft: { $ne: true }, Deleted: { $ne: true } })

我通过 totalKeysExamined:687 收到此统计信息:

"executionStats" : {
"executionSuccess" : true,
"nReturned" : 685,
"executionTimeMillis" : 3,
"totalKeysExamined" : 687,
"totalDocsExamined" : 685,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$nor" : [
{
"Deleted" : {
"$eq" : true
}
}
]
},
{
"$nor" : [
{
"Draft" : {
"$eq" : true
}
}
]
}
]
},
"nReturned" : 685,
"executionTimeMillisEstimate" : 0,
"works" : 687,
"advanced" : 685,
"needTime" : 1,
"needYield" : 0,
"saveState" : 7,
"restoreState" : 7,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 685,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 685,
"executionTimeMillisEstimate" : 0,
"works" : 687,
"advanced" : 685,
"needTime" : 1,
"needYield" : 0,
"saveState" : 7,
"restoreState" : 7,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"productsId" : 1,
"Deleted" : 1,
"Draft" : 1
},
"indexName" : "_productsId_deleted_draft",
"isMultiKey" : true,
"multiKeyPaths" : {
"productsId" : [
"productsId"
],
"Deleted" : [ ],
"Draft" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"productsId" : [
"[ObjectId('55688d7a72ba931bf430edf4'), ObjectId('55688d7a72ba931bf430edf4')]"
],
"Deleted" : [
"[MinKey, true)",
"(true, MaxKey]"
],
"Draft" : [
"[MinKey, true)",
"(true, MaxKey]"
]
},
"keysExamined" : 687,
"seeks" : 2,
"dupsTested" : 685,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}

但如果我尝试以这种方式删除 draftdeleted bool 字段:

db.Redazionale.explain("executionStats").count({IdProdotti: ObjectId('55688d7a72ba931bf430edf4') })

我用 totalDocsExamined:0 得到了这个统计数据,这也是我在之前的查询中所期望的

"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 0,
"totalKeysExamined" : 690,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "COUNT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 690,
"advanced" : 0,
"needTime" : 689,
"needYield" : 0,
"saveState" : 5,
"restoreState" : 5,
"isEOF" : 1,
"invalidates" : 0,
"nCounted" : 689,
"nSkipped" : 0,
"inputStage" : {
"stage" : "COUNT_SCAN",
"nReturned" : 689,
"executionTimeMillisEstimate" : 0,
"works" : 690,
"advanced" : 689,
"needTime" : 0,
"needYield" : 0,
"saveState" : 5,
"restoreState" : 5,
"isEOF" : 1,
"invalidates" : 0,
"keysExamined" : 690,
"keyPattern" : {
"productsId" : 1,
"Deleted" : 1,
"Draft" : 1
},
"indexName" : "_productsId_deleted_draft",
"isMultiKey" : true,
"multiKeyPaths" : {
"productsId" : [
"productsId"
],
"Deleted" : [ ],
"Draft" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"indexBounds" : {
"startKey" : {
"productsId" : ObjectId("55688d7a72ba931bf430edf4"),
"Deleted" : MinKey,
"Draft" : MinKey
},
"startKeyInclusive" : true,
"endKey" : {
"productsId" : ObjectId("55688d7a72ba931bf430edf4"),
"Deleted" : MaxKey,
"Draft" : MaxKey
},
"endKeyInclusive" : true
}
}
}
}

更新

我上传了一个文件转储来重新创建整个数据库的一小部分。 click here to download

并尝试这些查询:

db.Redazionale.explain("executionStats").count({ IdProdotti: ObjectId('55688d7a72ba931bf430edf4'),Draft: { $ne: true }, Deleted: { $ne: true } })

db.Redazionale.explain("executionStats").count({ IdProdotti: ObjectId('55688d7a72ba931bf430edf4') })

最佳答案

问题是像 Draft: { $ne: true } 这样的查询实际上是具有以下边界的范围查询:

"Draft" : [
"[MinKey, true)",
"(true, MaxKey]"
]

所以你的查询结果有 3 个边界:

           "indexBounds" : {
"productsId" : [
"[ObjectId('55688d7a72ba931bf430edf4'), ObjectId('55688d7a72ba931bf430edf4')]"
],
"Deleted" : [
"[MinKey, true)",
"(true, MaxKey]"
],
"Draft" : [
"[MinKey, true)",
"(true, MaxKey]"
]
},

如果您有多个索引,则由优化器选择最具选择性的索引。

在您的情况下,它决定 productsId 是最有效的,这是一个公平的选择。它使用复合索引_productsId_deleted_draftproductsId部分来获取相关文档,然后通过其他2个字段进行过滤。

删除 3 个范围中的 2 个允许使用 COUNT_SCAN。将 bool 范围转换为精确比较也是如此。像这样的查询

db.mycolection.explain("executionStats").count({
productsId: ObjectId('55688d7a72ba931bf430edf4'),
Draft: false,
Deleted: false
})

是否像预期的那样使用以下边界进行 COUNT_SCAN:

            "indexBounds" : {
"startKey" : {
"productsId" : ObjectId("55688d7a72ba931bf430edf4"),
"Deleted" : false,
"Draft" : false
},
"startKeyInclusive" : true,
"endKey" : {
"productsId" : ObjectId("55688d7a72ba931bf430edf4"),
"Deleted" : false,
"Draft" : false
},
"endKeyInclusive" : true
}

问题是 Draft: { $ne: true } 等同于 Draft: false 只有当你所有的文档都有 Draft 字段,所以它可能是确保 bool 字段始终存在的选项。甚至可以用 document validation 强制执行它.这实际上取决于优化带来了多少好处。

关于mongodb - 为什么要计数查询抓取文件?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/45877524/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com