elasticsearch - 如何在Elasticsearch中用术语求和TOP N个文档？-6ren

elasticsearch - 如何在Elasticsearch中用术语求和TOP N个文档？

转载作者：行者123 更新时间：2023-12-03 02:26:09

以下是elasticsearch的样本文档。

         {
            "_index": “social”,
            "_type": “social”,
            "_id": "1632560884596186633",
            "_score": 1,
            "_source": {
                "created_date": "2017-10-24",
                "reach": 1692,                    
                "social_id": 200
            }
        },
        {
            "_index": “social”,
            "_type": “social”,
            "_id": "1626693964184981799",
            "_score": 1,
            "_source": {
                "created_date": "2017-10-25”,
                "reach": 1692,                    
                “social_id": 100               
            }
        },
        {
            "_index": “social”,
            "_type": “social”,
            "_id": "162669396418498170",
            "_score": 1,
            "_source": {
                "created_date": "2017-10-25”,
                "reach": 1692,                    
                “social_id": 50               
            }
        },
        {
            "_index": “social”,
            "_type": “social”,
            "_id": "1626693964184981756",
            "_score": 1,
            "_source": {
                "created_date": "2017-10-25”,
                "reach": 1692,                    
                “social_id": 25               
            }
        }

问题:根据每个社会ID的创建日期，对前2个文档的影响总和。

我尝试过的

{
"size": 0,
"aggs": {
    "reach_bucket": {
        "terms": {
            "size": 200,
            "field": "social_id"
        },
        "aggs": {
            "media_reach_bucket": {
                "terms": {
                    "field": "created_date",
                    "size": 200
                },
                "aggs": {
                    "top_sales_hits": {
                        "top_hits": {
                            "sort": [
                                {
                                    "created_date": {
                                        "order": "desc"
                                    }
                                }
                            ],
                            "_source": {
                                "includes": [
                                    "created_date",
                                    "reach"
                                ]
                            },
                            "size": 2
                        }
                    }
                }
            }
        }
    }
}
}

问题:

不要为top_hits进行子聚合。

任何建议将不胜感激。

最佳答案

每天进行存储时，您可能需要使用date_histogram而不是terms(我认为)。但更重要的是，您应该按top_hits而不是reach对created_date进行排序，因为这在每天的存储桶中将是相同的。

{
  "size": 0,
  "aggs": {
    "reach_bucket": {
      "terms": {
        "size": 200,
        "field": "social_id"
      },
      "aggs": {
        "media_reach_bucket": {
          "date_histogram": {
            "field": "created_date",
            "calendar_interval": "day"
          },
          "aggs": {
            "top_sales_hits": {
              "top_hits": {
                "sort": [
                  {
                    "reach": {
                      "order": "desc"
                    }
                  }
                ],
                "_source": {
                  "includes": [
                    "reach"
                  ]
                },
                "size": 2
              }
            }
          }
        }
      }
    }
  }
}

像这样产生热门

"aggregations" : {
    "reach_bucket" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 100,
          "doc_count" : 4,
          "media_reach_bucket" : {
            "buckets" : [
              {
                "key_as_string" : "2017-10-24T00:00:00.000Z",
                "key" : 1508803200000,
                "doc_count" : 4,
                "top_sales_hits" : {
                  "hits" : {
                    "total" : {
                      "value" : 4,
                      "relation" : "eq"
                    },
                    "max_score" : null,
                    "hits" : [
                      {
                        "_index" : "kart",
                        "_type" : "_doc",
                        "_id" : "3iLJRnEBZbobBB0NiV8R",
                        "_score" : null,
                        "_source" : {
                          "reach" : 40
                        },
                        "sort" : [
                          40
                        ]
                      },
                      {
                        "_index" : "kart",
                        "_type" : "_doc",
                        "_id" : "3SLJRnEBZbobBB0Nhl-Y",
                        "_score" : null,
                        "_source" : {
                          "reach" : 30
                        },
                        "sort" : [
                          30
                        ]
                      }
                    ]
                  }
                }
              }
            ]
          }
        }
      ]
    }
  }