gpt4 book ai didi

elasticsearch - `terms` 聚合的条件

转载 作者:行者123 更新时间:2023-12-03 01:31:42 35 4
gpt4 key购买 nike

我想根据聚合数据在换句话说过滤数据中放置一个条件。

目前,我有一个查询

GET sense/_search
{
"size": 0,
"aggs": {
"dates": {
"date_histogram": {
"field": "@timestamp",
"interval": "1d",
"format": "yyyy-MM-dd",
"offset": "+4h"
},
"aggs": {
"unique_sessions": {
"terms": {
"field": "sessionId"
}
}
}
}
}
}

返回这种数据
{
"aggregations" : {
"dates" : {
"buckets" : [
{
"key_as_string" : "2019-03-31",
"key" : 1554004800000,
"doc_count" : 14,
"unique_sessions" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "83e1c3a4-341c-4ac3-a81e-f00336ee1dfb",
"doc_count" : 3
},
{
"key" : "99c4d312-2477-4bf7-ad02-ef76f50443f9",
"doc_count" : 3
},
{
"key" : "425b840f-9604-4f1d-ab18-96a9a7ae44e0",
"doc_count" : 1
},
{
"key" : "580b1f6c-6256-4f38-9803-2cc79a0a63d7",
"doc_count" : 2
},
{
"key" : "8929d75d-153c-4b66-8dd7-2eacb7974b95",
"doc_count" : 1
},
{
"key" : "8da5d732-d1e7-4a63-8f02-2b84a8bdcb62",
"doc_count" : 2
}
]
}
},
{
"key_as_string" : "2019-04-01",
"key" : 1554091200000,
"doc_count" : 1,
"unique_sessions" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "513d4532-304d-44c7-bdc7-398795800383",
"doc_count" : 1
},
{
"key" : "8da5d732-d1e7-4a63-8f02-2791poc34gq1",
"doc_count" : 2
}
]
}
}
]
}
}
}


所以我想检索唯一的计数 sesssionId在哪里 doc_count等于 1。

这意味着我希望得到带有键 "2019-03-31" 的日期直方图的结果
将显示 2(因为名称为 unique_sessions 的聚合在存储桶中只有两个 session , doc_count 等于一个),因此 "2019-04-01"结果将显示 1。

不知道如何实现这种聚合。

最佳答案

您需要使用 Bucket Selector Aggregation根据您拥有的条款聚合。

以下是您的查询的显示方式:

示例查询

POST <your_index_name>/_search
{
"size":0,
"aggs":{
"dates":{
"date_histogram":{
"field":"@timestamp",
"interval":"1d",
"format":"yyyy-MM-dd",
"offset":"+4h"
},
"aggs":{
"unique_sessions":{
"terms":{
"field":"sessionId"
},
"aggs":{
"unique_buckets":{
"bucket_selector":{
"buckets_path":{
"count":"_count"
},
"script":"params.count==1"
}
}
}
}
}
}
}
}

请注意,在这种情况下,您最终会得到空桶,如下面的回复中所述。

样本响应
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 9,
"max_score": 0,
"hits": []
},
"aggregations": {
"dates": {
"buckets": [
{
"key_as_string": "2018-12-31",
"key": 1546228800000,
"doc_count": 3,
"unique_sessions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "83e1c3a4-3AFA1c-4ac3-a81e-f00336ee1dfb",
"doc_count": 1
}
]
}
},
{
"key_as_string": "2019-01-01",
"key": 1546315200000,
"doc_count": 0,
"unique_sessions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "2019-01-02",
"key": 1546401600000,
"doc_count": 3,
"unique_sessions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "2019-01-03",
"key": 1546488000000,
"doc_count": 3,
"unique_sessions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "83e1c3a4-3AFA1c-4ab3-a81e-f00336ee1dfb",
"doc_count": 1
}
]
}
}
]
}
}
}

在这种情况下,如果您想要过滤存储桶以仅显示与具有 count==1 的子存储桶匹配的父存储桶。只需使用下面的查询,我在其中添加了另一个存储桶选择器子句。

仔细注意查询的结构。

精细化查询解决方案:
POST <your_index_name>/_search
{
"size":0,
"aggs":{
"dates":{
"date_histogram":{
"field":"@timestamp",
"interval":"1d",
"format":"yyyy-MM-dd",
"offset":"+4h"
},
"aggs":{
"unique_sessions":{
"terms":{
"field":"sessionId"
},
"aggs":{
"unique_buckets":{
"bucket_selector":{
"buckets_path":{
"count":"_count"
},
"script":"params.count==1"
}
}
}
},
"terms_bucket_clause": {
"bucket_selector": {
"buckets_path": {
"count": "unique_sessions._bucket_count"
},
"script": "params.count>0"
}
}
}
}
}
}

精细化查询响应
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 9,
"max_score": 0,
"hits": []
},
"aggregations": {
"dates": {
"buckets": [
{
"key_as_string": "2018-12-31",
"key": 1546228800000,
"doc_count": 3,
"unique_sessions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "83e1c3a4-3AFA1c-4ac3-a81e-f00336ee1dfb",
"doc_count": 1
}
]
}
},
{
"key_as_string": "2019-01-03",
"key": 1546488000000,
"doc_count": 3,
"unique_sessions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "83e1c3a4-3AFA1c-4ab3-a81e-f00336ee1dfb",
"doc_count": 1
}
]
}
}
]
}
}
}

请注意两个查询中结果的差异。希望这可以帮助!

关于elasticsearch - `terms` 聚合的条件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55497303/

35 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com