gpt4 book ai didi

elasticsearch - 使用min_doc_count = 0的 Elasticsearch 聚合返回与查询结果或命中无关的所有存储桶

转载 作者:行者123 更新时间:2023-12-02 23:44:16 25 4
gpt4 key购买 nike

这是我的查询-

{
"from": 0,
"size": 100,
"query": {
"bool": {
"filter": [
{
"terms": {
"folderId.keyword": [
"ff98505e-cdff-43aa-8b05-197bc3f3265e"
],
"boost": 1
}
},
{
"terms": {
"objectType.keyword": [
"File"
],
"boost": 1
}
},
{
"term": {
"tenantId": {
"value": "34202",
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"aggs":{
"_byformat":{
"terms":{
"field":"format.keyword",
"min_doc_count":0,
"size":200
}
}
}
}
min_doc_count = 0的结果-
  "took" : 1,  "timed_out" : false,  "_shards" : {    "total" : 1,    "successful" : 1,    "skipped" : 0,    "failed" : 0  },  "hits" : {    "total" : 3,    "max_score" : 0.0,    "hits" : [      {        "_index" : "plnesdv1-34202-1",        "_type" : "_doc",        "_id" : "6adbda83-53ad-457f-a2ab-d5b04c643005",        "_score" : 0.0,        "_source" : {          "format" : "vnd.openxmlformats-officedocument.spreadsheetml.sheet",          "externalSharing" : "N",          "description" : null,          "dateModified" : null,          "type" : "application",          "folderId" : "ff98505e-cdff-43aa-8b05-197bc3f3265e",          "tags" : [ ],          "objectType" : "File",          "dateCreated" : null,          "name" : "New XLSX file",          "tenantId" : "34202",          "modifiedBy" : "rdt001",          "id" : "6adbda83-53ad-457f-a2ab-d5b04c643005",          "status" : "active",          "expirationDate" : null        }      },      {        "_index" : "plnesdv1-34202-1",        "_type" : "_doc",        "_id" : "b1000a15-2d80-41f4-a5df-ba5c27f8e9c6",        "_score" : 0.0,        "_source" : {          "format" : "vnd.ms-excel",          "externalSharing" : "N",          "description" : null,          "dateModified" : null,          "type" : "application",          "folderId" : "ff98505e-cdff-43aa-8b05-197bc3f3265e",          "tags" : [ ],          "objectType" : "File",          "dateCreated" : null,          "name" : "New XLS file",          "tenantId" : "34202",          "modifiedBy" : "rdt001",          "id" : "b1000a15-2d80-41f4-a5df-ba5c27f8e9c6",          "status" : "active",          "expirationDate" : null        }      },      {        "_index" : "plnesdv1-34202-1",        "_type" : "_doc",        "_id" : "630e9f49-3368-408d-a091-03f253127004",        "_score" : 0.0,        "_source" : {          "format" : "msword",          "externalSharing" : "N",          "description" : null,          "dateModified" : null,          "type" : "application",          "folderId" : "ff98505e-cdff-43aa-8b05-197bc3f3265e",          "tags" : [ ],          "objectType" : "File",          "dateCreated" : null,          "name" : "New DOC file",          "tenantId" : "34202",          "modifiedBy" : "rdt001",          "id" : "630e9f49-3368-408d-a091-03f253127004",          "status" : "active",          "expirationDate" : null        }      }    ]  },  "aggregations" : {    "_byformat" : {      "doc_count_error_upper_bound" : 0,      "sum_other_doc_count" : 0,      "buckets" : [        {          "key" : "msword",          "doc_count" : 1        },        {          "key" : "vnd.ms-excel",          "doc_count" : 1        },        {          "key" : "vnd.openxmlformats-officedocument.spreadsheetml.sheet",          "doc_count" : 1        },        {          "key" : "bmp",          "doc_count" : 0        },        {          "key" : "gif",          "doc_count" : 0        },        {          "key" : "html",          "doc_count" : 0        }             ]    }  }}

result with min_doc_count = 1 -

{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 0.0,
"hits" : [
{
"_index" : "plnesdv1-34202-1",
"_type" : "_doc",
"_id" : "6adbda83-53ad-457f-a2ab-d5b04c643005",
"_score" : 0.0,
"_source" : {
"format" : "vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"externalSharing" : "N",
"description" : null,
"dateModified" : null,
"type" : "application",
"folderId" : "ff98505e-cdff-43aa-8b05-197bc3f3265e",
"tags" : [ ],
"objectType" : "File",
"dateCreated" : null,
"name" : "New XLSX file",
"tenantId" : "34202",
"modifiedBy" : "rdt001",
"id" : "6adbda83-53ad-457f-a2ab-d5b04c643005",
"status" : "active",
"expirationDate" : null
}
},
{
"_index" : "plnesdv1-34202-1",
"_type" : "_doc",
"_id" : "b1000a15-2d80-41f4-a5df-ba5c27f8e9c6",
"_score" : 0.0,
"_source" : {
"format" : "vnd.ms-excel",
"externalSharing" : "N",
"description" : null,
"dateModified" : null,
"type" : "application",
"folderId" : "ff98505e-cdff-43aa-8b05-197bc3f3265e",
"tags" : [ ],
"objectType" : "File",
"dateCreated" : null,
"name" : "New XLS file",
"tenantId" : "34202",
"modifiedBy" : "rdt001",
"id" : "b1000a15-2d80-41f4-a5df-ba5c27f8e9c6",
"status" : "active",
"expirationDate" : null
}
},
{
"_index" : "plnesdv1-34202-1",
"_type" : "_doc",
"_id" : "630e9f49-3368-408d-a091-03f253127004",
"_score" : 0.0,
"_source" : {
"format" : "msword",
"externalSharing" : "N",
"description" : null,
"dateModified" : null,
"type" : "application",
"folderId" : "ff98505e-cdff-43aa-8b05-197bc3f3265e",
"tags" : [ ],
"objectType" : "File",
"dateCreated" : null,
"name" : "New DOC file",
"tenantId" : "34202",
"modifiedBy" : "rdt001",
"id" : "630e9f49-3368-408d-a091-03f253127004",
"status" : "active",
"expirationDate" : null
}
}
]
},
"aggregations" : {
"_byformat" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "msword",
"doc_count" : 1
},
{
"key" : "vnd.ms-excel",
"doc_count" : 1
},
{
"key" : "vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"doc_count" : 1
}
]
}
}
}
当min_doc_count = 1时,聚合是正确的,并且仅提取与匹配项相关的存储桶。
谁能告诉我为什么设置了min_doc_count = 0时聚合会提取所有存储桶。我已经浏览了 flex 搜索文档,它指出此行为是设计使然,也可以通过其他任何方式仅针对匹配以及零计数来获取聚合存储桶。

最佳答案

似乎您在汇总时误解了min_doc_count
设置min_doc_count=0还将返回存储桶,以查找与任何命中的不匹配的字词
使用min_doc_count选项,只能返回匹配的匹配项数量超过配置的匹配项:

 "aggs":{
"_byformat":{
"terms":{
"field":"format.keyword",
"min_doc_count":0,
"size":200
}
}
}
上述汇总只会返回在 0个匹配或以上中找到的关键字。 0表示所有有/无,1表示至少1个文档应具有该关键字。

关于elasticsearch - 使用min_doc_count = 0的 Elasticsearch 聚合返回与查询结果或命中无关的所有存储桶,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/62593947/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com