elasticsearch - 获取 HitTest 门命中数组聚合的唯一文档数sum_other_doc

elasticsearch - 获取 HitTest 门命中数组聚合的唯一文档数sum_other_doc_count

转载作者：行者123 更新时间：2023-12-03 02:28:41

我有大量包含关键字值数组的文档(数百万):

对应:

{
    "my_index": {
        "mappings": {
            "properties": {
                "id": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "keywords": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                }
            }
        }
    }
}

示例文件:

{
  "id": "abc",
  "keywords": ["cat", "dog", "person"]
}
{
  "id": "def",
  "keywords": ["tree", "person"]
}
{
  "id": "ghi",
  "keywords": ["person", "human"]
}
...

假设我获得了前3个关键字存储桶，其余的显示在“other”中，如下所示:

/GET /my_index/_search
{
    "size": 0,
    "track_total_hits": true,
    "aggs": {
        "keyword_buckets": {
            "terms": {
                "field": "keywords.keyword",
                "size": 3
            }
        }
    }
}

有2,232,121个文档，但我正在像以下这样操作:

{
    "took": 256,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2232121,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "keyword_buckets": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 6250132,
            "buckets": [
                {
                    "key": "person",
                    "doc_count": 326552
                },
                {
                    "key": "human",
                    "doc_count": 326529
                },
                {
                    "key": "photograph",
                    "doc_count": 222190
                }
            ]
        }
    }
}