gpt4 book ai didi

search - Elasticsearch每个groupId的聚合唯一属性

转载 作者:行者123 更新时间:2023-12-02 23:33:15 24 4
gpt4 key购买 nike

我在每种产品的Elasticsearch中都有单独的文档。
每个产品都有一个唯一 productId和非唯一 groupId以及其他属性,例如:类别。

我希望能够根据唯一 groupId的计数汇总不同的属性

例:

doc 1:

{
"productId": 123
"groupId" xyz,
"categories": [{"value": "shoes"}, {"value": "t-shirt"}]
}

文件2:
{
"productId": 345
"groupId" xyz,
"categories": [{"value": "shoes"}, {"value": "t-shirt"}]
}

文件3:
{
"productId": 456
"groupId" abc,
"categories": [{"value": "t-shirt"}]
}

文件4:
{
"productId": 567
"groupId" abc,
"categories": [{"value": "shoes"}, {"value": "makeup"}]
}

预期结果,例如:
  • 鞋子:2
  • T恤:2
  • 化妆:1

  • 所以我想对每个项目 计数一次(如果它与相同的groupId一起存在)

    我的查询:
    {  
    "from":0,
    "size":0,
    "query":{
    "filtered":{
    "filter":{

    }
    }
    },
    "aggs": {
    "group": {
    "terms": {"field": "group"},
    "aggs": {
    "brand": {
    "terms": {"field": "productMeta.brand.value"}
    }
    }
    }
    }
    }

    响应:
    {
    "took": 6,
    "timed_out": false,
    "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
    },
    "hits": {
    "total": 25,
    "max_score": 0,
    "hits": []
    },
    "aggregations": {
    "group": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 3,
    "buckets": [
    {
    "key": "wlmr34210507",
    "doc_count": 8,
    "brand": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "generic",
    "doc_count": 8
    }
    ]
    }
    },
    {
    "key": "wlmr19524441",
    "doc_count": 4,
    "brand": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "maybelline",
    "doc_count": 4
    }
    ]
    }
    },
    {
    "key": "wlmr34121549",
    "doc_count": 2,
    "brand": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "maybelline",
    "doc_count": 2
    }
    ]
    }
    },
    {
    "key": "wlmr34317301",
    "doc_count": 2,
    "brand": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "dream on me",
    "doc_count": 2
    }
    ]
    }
    },
    {
    "key": "bbfs40549552",
    "doc_count": 1,
    "brand": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "samsung",
    "doc_count": 1
    }
    ]
    }
    },
    {
    "key": "bobb7937347",
    "doc_count": 1,
    "brand": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "chicco",
    "doc_count": 1
    }
    ]
    }
    },
    {
    "key": "wlmr24241413",
    "doc_count": 1,
    "brand": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "maybelline",
    "doc_count": 1
    }
    ]
    }
    },
    {
    "key": "wlmr27504560",
    "doc_count": 1,
    "brand": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "mr. beer",
    "doc_count": 1
    }
    ]
    }
    },
    {
    "key": "wlmr33986448",
    "doc_count": 1,
    "brand": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "mr. beer",
    "doc_count": 1
    }
    ]
    }
    },
    {
    "key": "wlmr40806575",
    "doc_count": 1,
    "brand": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "healthtex",
    "doc_count": 1
    }
    ]
    }
    }
    ]
    }
    }
    }

    最佳答案

    所以基本上我可以通过使用基数来解决此问题,如下所示:

    {  
    "from":0,
    "size":0,
    "query":{
    "filtered":{
    "filter":{
    }
    }
    },
    "sort":{
    "ts":{
    "order":"desc",
    "mode":"max",
    "ignore_unmapped":true
    }
    },
    "aggs":{
    "categories":{
    "terms":{
    "field":"productMeta.brand.value",
    "size":0
    },
    "aggs": {
    "category" : {
    "cardinality" : {
    "field" : "group"
    }
    }
    }
    }
    }
    }

    结果是每个类别的每个产品ID的 唯一计数:
    {
    "took": 4,
    "timed_out": false,
    "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
    },
    "hits": {
    "total": 71,
    "max_score": 0,
    "hits": []
    },
    "aggregations": {
    "categories": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "chocolate",
    "doc_count": 23,
    "category": {
    "value": 23
    }
    },
    {
    "key": "notebook",
    "doc_count": 9,
    "category": {
    "value": 1
    }
    },
    {
    "key": "olive_oil",
    "doc_count": 7,
    "category": {
    "value": 7
    }
    },
    {
    "key": "physical_training",
    "doc_count": 5,
    "category": {
    "value": 5
    }
    },
    {
    "key": "ski",
    "doc_count": 5,
    "category": {
    "value": 2
    }
    },
    {
    "key": "gym_membership",
    "doc_count": 4,
    "category": {
    "value": 4
    }
    },
    {
    "key": "ski_boots",
    "doc_count": 4,
    "category": {
    "value": 1
    }
    },
    {
    "key": "vinegar",
    "doc_count": 4,
    "category": {
    "value": 4
    }
    },
    {
    "key": "bracelet",
    "doc_count": 3,
    "category": {
    "value": 3
    }
    },
    {
    "key": "handbags",
    "doc_count": 2,
    "category": {
    "value": 2
    }
    },
    {
    "key": "cider",
    "doc_count": 1,
    "category": {
    "value": 1
    }
    },
    {
    "key": "ice_cider",
    "doc_count": 1,
    "category": {
    "value": 1
    }
    },
    {
    "key": "jewelry_1",
    "doc_count": 1,
    "category": {
    "value": 1
    }
    },
    {
    "key": "laces",
    "doc_count": 1,
    "category": {
    "value": 1
    }
    },
    {
    "key": "stationery",
    "doc_count": 1,
    "category": {
    "value": 1
    }
    }
    ]
    }
    }
    }

    关于search - Elasticsearch每个groupId的聚合唯一属性,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34156757/

    24 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com