gpt4 book ai didi

ElasticSearch 从每个类别中选择一个得分最高的产品

转载 作者:行者123 更新时间:2023-12-04 11:43:28 25 4
gpt4 key购买 nike

我正在尝试查询包含产品信息的 ES 索引,其中包含 product_id、category_id 和 variant_id 字段。每个产品都属于特定的类别和变体:

{
"product_id" : "PRODUCT_12345",
"category_id" : 1,
"variant_id" : 5
}
我还有 product_id 列表及其分数: [{'product_id': 'PRODUCT_46831', 'score': 1}, {'product_id': 'PRODUCT_47139', 'score': 0.95}, {'product_id': 'PRODUCT_46833', 'score': 0.8999999999999999}, {'product_id': 'PRODUCT_46834', 'score': 0.8499999999999999}, {'product_id': 'PRODUCT_46835', 'score': 0.7999999999999998}]这些分数是使用算法计算的,每个 product_id 都存在于 ES 中。我想过滤列表,以便从每个类别和变体中只选择一个产品。应从每个类别和变体中选择得分最高的产品。所以对于上面的列表,如果 PRODUCT_46831, PRODUCT_47139, PRODUCT_46833属于 category 1 . PRODUCT_46834, PRODUCT_46835属于 category 2 . PRODUCT_46831, PRODUCT_46834属于 variant 1PRODUCT_46833, PRODUCT_47139, PRODUCT_46835属于 variant 2 , 类别分组将创建列表 [PRODUCT_46831, PRODUCT_46834] , 自 PRODUCT_46831, PRODUCT_46834是这些类别中得分最高的产品:
进一步分组 PRODUCT_46831, PRODUCT_46834在 variant_id 上将创建结果: [PRODUCT_46831]PRODUCT_46831, PRODUCT_46834同属 variant id 1PRODUCT_46831在列表中得分最高。
我尝试使用聚合为每个类别形成桶,然后应用带有重量 = 产品得分的排序功能并选择顶级产品,我能够获得特定 category_id 中得分最高的产品列表,我正在努力在顶部应用变体_id 分组这份名单的。到目前为止,这是我的查询:
{
"query": {
"function_score": {
"functions": [
{
"field_value_factor":
{
"field": "item_id",
"factor": 0
}
},
{
"filter": { "term": { "id": "PRODUCT_46831" } },
"weight": 1
},
{
"filter": { "term": { "id": "PRODUCT_47139" } },
"weight": 0.95
},
{
"filter": { "term": { "id": "PRODUCT_46833" } },
"weight": 0.9
},
{
"filter": { "term": { "id": "PRODUCT_46834" } },
"weight": 0.85
},
{
"filter": { "term": { "id": "PRODUCT_46835" } },
"weight": 0.8
}
],
"score_mode": "sum",
"boost_mode": "sum",

"query" : {
"bool" : {
"must" : [
{
"terms" : {
"id" : [
"PRODUCT_46831",
"PRODUCT_47139",
"PRODUCT_46833",
"PRODUCT_46834",
"PRODUCT_46835"
],
"boost" : 0
}
}
],
"adjust_pure_negative" : true,
"boost" : 0
}
}
}
},
"aggs" : {
"category_id_max_product" : {
"terms" : { "field": "category_id" },
"aggs": {
"max_score": {
"top_hits": {
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"_source": {
"includes": ["_id", "category_id", "variant_id", "_score"]
},
"size": 1
}
}
}
}
}
, "_source": ["_id", "category_id", "variant_id", "_score"]
, "size": 0
}
文件 :
{
"_index" : "search_entities",
"_type" : "_doc",
"_id" : "PRODUCT_46831",
"_score" : null,
"_source" : {
"category_id" : 2296,
"variant_id" : 564819,
"id" : "PRODUCT_46831"
}
},
{
"_index" : "search_entities",
"_type" : "_doc",
"_id" : "PRODUCT_47139",
"_score" : null,
"_source" : {
"category_id" : 2296,
"variant_id" : 723311,
"id" : "PRODUCT_47139"
}
},
{
"_index" : "search_entities",
"_type" : "_doc",
"_id" : "PRODUCT_46833",
"_score" : null,
"_source" : {
"category_id" : 2296,
"variant_id" : 723311,
"id" : "PRODUCT_46833"
}
},
{
"_index" : "search_entities",
"_type" : "_doc",
"_id" : "PRODUCT_46834",
"_score" : null,
"_source" : {
"category_id" : 3321,
"variant_id" : 564819,
"id" : "PRODUCT_46834"
}
},
{
"_index" : "search_entities",
"_type" : "_doc",
"_id" : "PRODUCT_46835",
"_score" : null,
"_source" : {
"category_id" : 3321,
"variant_id" : 723311,
"id" : "PRODUCT_46835"
}
},

最佳答案

我可以使用脚本化指标来做到这一点,不是优化的指标,但有效:

{

"query": {
"function_score": {
"functions": [
{
"filter": { "term": { "id": "PRODUCT_229648" } },
"weight": 0.9
},
{
"filter": { "term": { "id": "PRODUCT_108882" } },
"weight": 0.95
},
{
"filter": { "term": { "id": "PRODUCT_108881" } },
"weight": 0.8
},
{
"filter": { "term": { "id": "PRODUCT_172062" } },
"weight": 0.95
},
{
"filter": { "term": { "id": "PRODUCT_172060" } },
"weight": 0.9
},
{
"filter": { "term": { "id": "PRODUCT_216303" } },
"weight": 0.95
},
{
"filter": { "term": { "id": "PRODUCT_220975" } },
"weight": 0.96
}
],
"boost_mode": "sum",

"query" : {
"bool" : {
"must" : [
{
"terms" : {
"id" : [
"PRODUCT_229648",
"PRODUCT_108882",
"PRODUCT_108881",
"PRODUCT_172062",
"PRODUCT_172060",
"PRODUCT_216303",
"PRODUCT_220975"
]
}
}
],
"adjust_pure_negative" : true
}
}
}
},
"aggs": {
"intentPathsCountAgg": {
"scripted_metric": {
"init_script": """state.messagesList = new ArrayList();
state.allVariantMap = new HashMap();
state.variantMap = new HashMap();
state.categoryMap = new HashMap();
state.emptyVariantMap = new ArrayList();
""",
"map_script": """
double score = _score;
String key = String.valueOf(doc['variant_group_id'].value);
Map map = [
'score' : score,
'category_id' : String.valueOf(doc['category_brand_id'].value),
'product_id' : doc['item_id'].value
];

if (state.variantMap.containsKey(key)) {
if (state.variantMap.get(key).score < score) {
state.variantMap.put(key, map);
}
}
else {
state.variantMap.put(key, map);
}
""",
"combine_script": """
return state.variantMap;
""",
"reduce_script": """
Map categoryMap = new HashMap();
for (state in states) {
for (String key : state.keySet()) {
String categoryKey = state.get(key).category_id;
double score = state.get(key).score;
long productId = state.get(key).product_id;
Map map = [
'score' : score,
'product_id' : productId
];
if (categoryMap.containsKey(categoryKey)) {
if (categoryMap.get(categoryKey).score < score) {
categoryMap.put(categoryKey, map);
}
}
else {
categoryMap.put(categoryKey, map);
}
}
}
return categoryMap;
"""
}
}
}


, "_source": ["_id", "category_id", "variant_group_id", "item_id", "id"]
, "size": 0
}

关于ElasticSearch 从每个类别中选择一个得分最高的产品,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/69100152/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com