gpt4 book ai didi

sorting - Elasticsearch 中的聚合和排序

转载 作者:行者123 更新时间:2023-12-03 02:12:04 24 4
gpt4 key购买 nike

我想在 Elasticsearch 中对查询的汇总结果进行排序
等效的SQL查询:-从表组中按col1选择col1,col2,sum(col3),col2按sum(col3)desc排序;
我尝试使用以下查询,它返回结果,但未按我期望的排序顺序

{
"from": 0,
"size": 0,
"_source": {
"includes": [
"col1",
"col2",
"SUM"
],
"excludes": []
},
"stored_fields": [
"col1",
"col2"
],
"aggregations": {
"col1": {
"terms": {
"field": "col1",
"size": 200,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false
},
"aggregations": {
"col2": {
"terms": {
"field": "col2",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false
},
"aggregations": {
"SUM_0": {
"sum": {
"field": "col3"
}
},
"col3_bucket_sort": {
"bucket_sort": {
"sort": [
{ "SUM_0": { "order": "desc" } }
],
"size": 3
}
}

}
}
}
}
}
}
采样索引数据
{
"_index": "my_index",
"_type": "products",
"_id": "OJfBSXUB0GzAt2o_zVdS",
"_score": 1.0,
"_source": {
"product_name": "car",
"product_type": "retail",
"qty": 5
}
}
{
"_index": "my_index",
"_type": "report",
"_id": "OpfBSXUB0GzAt2o_zVfG1",
"_score": 1.0,
"_source": {
"product_name": "bike",
"product_type": "retail",
"qty": 5
}
},

{
"_index": "my_index",
"_type": "report",
"_id": "OpfBSXUB0GzAt2o_zVfG",
"_score": 1.0,
"_source": {
"product_name": "car",
"product_type": "retail",
"qty": 3
}
},
{
"_index": "my_index",
"_type": "report",
"_id": "OpfBSXUB0GzAt2o_zVfG2",
"_score": 1.0,
"_source": {
"product_name": "bike",
"product_type": "retail",
"qty": 1
}
}
预期的输出:-希望基于字段product_name和product_type汇总(分组)我的文档,并按sum(qty)排序
等效的SQl查询:-从product_table组中按product_name选择product_name,product_type,sum(qty),按sum(qty)desc选择product_type order;
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"product_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "car",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value":8
}
}
]
}
},
{
"key": "bike",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value": 6
}
}
]
}
}

]
}
}
}
但我低于输出,即成功整合文档,但对sum(qty)不起作用
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"product_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "bike",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value": 6
}
}
]
}
},
{
"key": "car",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value":8
}
}
]
}
}

]
}
}
}

最佳答案

由于您是按col1,col2对数据进行分组(即使用两项聚合),因此,当您尝试使用存储桶排序聚合基于总和对结果进行排序时,结果将不合适。
您需要使用max bucket aggregation,它是同级管道聚合,可在同级聚合中标识具有指定指标最大值的存储桶,并同时输出存储桶的值和键。
然后,您应该对汇总结果执行bucket sort aggregation
添加带有索引数据(与问题中的用法相同),搜索查询和搜索结果的工作示例。
搜索查询:

{
"size": 0,
"aggs": {
"agg1": {
"terms": {
"field": "product_name.keyword"
},
"aggs": {
"agg2": {
"terms": {
"field": "product_type.keyword"
},
"aggregations": {
"SUM_0": {
"sum": {
"field": "qty"
}
}
}
},
"sum_max_bucket": {
"max_bucket": {
"buckets_path": "agg2>SUM_0" <-- note this
}
},
"sum_bucket_sort": {
"bucket_sort": {
"sort": {
"sum_max_bucket": {
"order": "desc"
}
}
}
}
}
}
}
}
搜索结果:
"aggregations": {
"agg1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "car",
"doc_count": 2,
"agg2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "retail",
"doc_count": 2,
"SUM_0": {
"value": 8.0 <-- note this
}
}
]
},
"sum_max_bucket": {
"value": 8.0,
"keys": [
"retail"
]
}
},
{
"key": "bike",
"doc_count": 2,
"agg2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "retail",
"doc_count": 2,
"SUM_0": {
"value": 6.0 <-- note this
}
}
]
},
"sum_max_bucket": {
"value": 6.0,
"keys": [
"retail"
]
}
}
]
}

关于sorting - Elasticsearch 中的聚合和排序,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/64463138/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com