gpt4 book ai didi

elasticsearch - Elasticsearch 上的存储桶计数

转载 作者:行者123 更新时间:2023-12-03 01:15:29 26 4
gpt4 key购买 nike

我正在尝试提取用户的可穿戴设备使用情况统计数据。忠实用户是指最近30天内使用可穿戴设备超过20天且平均每天使用可穿戴设备大于4小时的用户。因此,简而言之,忠实用户 =(至少 20 天使用时间 + 每天平均使用时间 > 4 小时)

在 Elasticsearch 中,使用文档根据日期和使用时间进行索引。

{
id:"AL-2930",
"usage_duration":4.5,
"sessionDate":"2020-05-01"
},
{
id:"AL-2930",
"usage_duration":5.5,
"sessionDate":"2020-05-02"
},
{
id:"AL-2931",
"usage_duration":3.5,
"sessionDate":"2020-05-01"
},
{
id:"AL-2931",
"usage_duration":3.0,
"sessionDate":"2020-05-02"
},

我尝试运行的查询给出了正确的结果。

{

"aggs": {
"users": {
"terms": {
"field": "id",
"min_doc_count": 20,
"order" : { "_key" : "asc" }
},

"aggs": {
"avg_usage": {
"avg": {
"field": "usage_duration"
}

},
"usage_filter": {
"bucket_selector": {
"buckets_path": {
"avgUsage": "avg_usage"
},
"script": "params.avgUsage > 4.0"
}

}

}
}

}


}

我得到的结果是这样的:

{
"took": 15,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2139,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"patients": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1926,
"buckets": [
{
"key": "BG-P-A100CR",
"doc_count": 24,
"avg_usage": {
"value": 4.5
}
},
{
"key": "BG-P-A102XF",
"doc_count": 24,
"avg_usage": {
"value": 5.5
}
},
{
"key": "BG-P-A103ZU",
"doc_count": 24,
"avg_usage": {
"value": 5.0
}
},
{
"key": "BG-P-A104IA",
"doc_count": 24,
"avg_usage": {
"value": 6.5
}
},
{
"key": "BG-P-A104ZL",
"doc_count": 24,
"avg_usage": {
"value": 4.5
}
},
{
"key": "BG-P-A106BT",
"doc_count": 24,
"avg_usage": {
"value": 5.0
}
},
{
"key": "BG-P-A110VY",
"doc_count": 24,
"avg_usage": {
"value": 5.5
}
}
]
}
}

我真正需要的是返回找到的桶中桶总数的查询。我尝试回答类似的问题( Count buckets returned by sub aggregation ),但没有帮助。

最佳答案

以下内容是否有帮助:

POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"users": {
"terms": {
"field": "id",
"min_doc_count": 20,
"order" : { "_key" : "asc" },
"size": 100, <----- Added this
"show_term_doc_count_error": true <----- Added this
},
"aggs": {
"avg_usage": {
"avg": {
"field": "usage_duration"
}
},
"usage_filter": {
"bucket_selector": {
"buckets_path": {
"avgUsage": "avg_usage"
},
"script": "params.avgUsage > 4.0"
}
},
"bucket_count":{
"bucket_script": {
"buckets_path": {
"count": "_count"
},
"script": "return params.count"
}
}
}
},
"mybucketcount":{
"stats_bucket": {
"buckets_path":"users._count"
}
}
}
}

我通过将 "script": "params.avgUsage > 4.0" 替换为 "script": "params.avgUsage > 3.0" 来运行上述查询,并进行了min_doc_count as 2 对于您提到的相关文档集,我看到以下响应:

{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "AL-2930",
"doc_count" : 2,
"avg_usage" : {
"value" : 5.0
},
"bucket_count" : {
"value" : 2.0
}
},
{
"key" : "AL-2931",
"doc_count" : 2,
"avg_usage" : {
"value" : 3.25
},
"bucket_count" : {
"value" : 2.0
}
}
]
},
"mybucketcount" : {
"count" : 2, <---- Note this.
"min" : 2.0,
"max" : 2.0,
"avg" : 2.0,
"sum" : 4.0
}
}
}

我假设您需要 Terms Aggregation 返回的存储桶总数即对于用户,我只是添加了 Stats Aggregation到你所拥有的。

如果有帮助请告诉我!

关于elasticsearch - Elasticsearch 上的存储桶计数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/62729791/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com