gpt4 book ai didi

elasticsearch - 匹配query_string文档的分数

转载 作者:行者123 更新时间:2023-12-02 23:29:24 25 4
gpt4 key购买 nike

我目前正在处理我需要从ES获得的一个非常烦人的查询。
我的文档是嵌套文档,它们的索引看起来像这样:

"mydocs" : {
"properties" : {
"doc" : {
"type" : "nested",
"properties" : {
"name" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
"tagln" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
"tags" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
"featured" : {"type" : "integer", "store" : "yes", "index" : "not_analyzed"}
"blkd" : {"type" : "integer", "store" : "yes", "index" : "not_analyzed"},
... etc ...
}

我正在尝试通过特殊的评分算法来提高名称,tagln和标签字段的数量,该算法会添加有特色的分数* 10000 + [在名称中发现] * 1000 + [在tagln中发现] * 10 + [在标签中发现] * 10。我的查询如下:
{
"from" : 0,
"size" : 10,
"query" : {
"nested" : {
"query" : {
"filtered" : {
"query" : {
"bool" : {
"must" : [ {
"term" : {
"doc.blkd" : 0
}
} ],
"should" : [ {
"function_score" : {
"functions" : [ {
"field_value_factor" : {
"field" : "doc.featured",
"factor" : 10000.0
}
} ],
"score_mode" : "sum",
"boost_mode" : "sum"
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "featured*",
"fields" : [ "doc.name^1000.0" ]
}
},
"boost" : 1000.0
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "featured*",
"fields" : [ "doc.tags^10.0" ],
"boost" : 10.0
}
}
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "featured*",
"fields" : [ "doc.tagln^10.0" ],
"boost" : 10.0
}
}
}
} ],
"minimum_should_match" : "0"
}
}
}
},
"path" : "doc",
"score_mode" : "sum"
}
},
"explain" : false,
"sort" : [ {
"_score" : { }
} ]
}

分数并未考虑应有的提升,精选主题的分数按预期工作,但query_string中的提升不起作用,
名称中带有“aaa”的文档得分仅为5或0。而featured = 1的得分为4000/6000/7500,依此类推。

首先,分数不是10000+,这很奇怪(可能是由于分数的许多因素所致),但是名称中匹配的查询字符串对分数没有任何可见的影响。

我该如何解决该问题或至少对其进行更好的调试(以查看分数的构建方式)?
尝试将解释更改为true,但我得到的只是这个没用的(或对我来说可能不可读)的解释:
"_explanation": {
"value": 4000.0024,
"description": "sum of:",
"details": [
{
"value": 4000.0024,
"description": "Score based on child doc range from 387 to 387",
"details": []
},
{
"value": 0,
"description": "match on required clause, product of:",
"details": [
{
"value": 0,
"description": "# clause",
"details": []
},
{
"value": 0.0009999962,
"description": "-ConstantScore(_type:.percolator) #(+*:* -_type:__*), product of:",
"details": [
{
"value": 1,
"description": "boost",
"details": []
},
{
"value": 0.0009999962,
"description": "queryNorm",
"details": []
}
]
}
]
}
]
}

*已编辑*

感谢keety,我能够提供更多信息:
添加disable_coord-true和inner_hits说明-true之后
我已经尝试过以任何方式“提升” query_string。.查询如下:
{
"from" : 0,
"size" : 10,
"query" : {
"nested" : {
"query" : {
"filtered" : {
"query" : {
"bool" : {
"must" : [ {
"term" : {
"doc.blkd" : 0
}
} ],
"should" : [ {
"function_score" : {
"functions" : [ {
"field_value_factor" : {
"field" : "doc.featured",
"factor" : 10000.0
}
} ],
"score_mode" : "sum",
"boost_mode" : "sum"
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.name^1000.0" ]
}
},
"boost" : 1000.0
}
}, {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.tags^100.0" ],
"boost" : 100.0
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.tagln^10.0" ],
"boost" : 10.0
}
}
}
} ],
"disable_coord" : true,
"minimum_should_match" : "0"
}
},
"filter" : {
"bool" : {
"should" : [ {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.name^1000000.0", "doc.tags^10.0", "doc.tagln^10.0" ],
"boost" : 1000.0
}
} ],
"minimum_should_match" : "0"
}
}
}
},
"path" : "doc",
"score_mode" : "sum",
"inner_hits" : {
"explain" : "true"
}
}
},
"explain" : false,
"sort" : [ {
"_score" : { }
} ]
}

如您所见,我已经将query_string添加到过滤器中,并将其中一个查询更改为constant_score

该文档的说明现在看起来像这样:
"max_score": 10001,
"hits": [
{
"_index": "myindex",
"_type": "mydocs",
"_id": "1111",
"_score": 10001,
"_ttl": 86158563,
"_source": {
"meta": {
"id": "1111",
"rev": "35-14602ccf5c3d429e0000000002000000",
"expiration": 0,
"flags": 33554432
},
"doc": {
"featured": 1,
"tagln": "hello location 1",
"blkd": 0,
"tags": [
"UsLocTaglinefeat"
],
"name": "hello US location featured"
}
},
"inner_hits": {
"doc": {
"hits": {
"total": 1,
"max_score": 10001,
"hits": [
{
"_shard": 1,
"_node": "YIXx2rrKR2O5q9519FIr_Q",
"_index": "myindex",
"_type": "mydocs",
"_id": "1111",
"_nested": {
"field": "doc",
"offset": 0
},
"_score": 10001,
"_source": {
"featured": 1,
"tagln": "hello location 1",
"blkd": 0,
"tags": [
"UsLocTaglinefeat"
],
"name": "hello US location featured"
},
"_explanation": {
"value": 10001,
"description": "sum of:",
"details": [
{
"value": 10001,
"description": "sum of:",
"details": [
{
"value": 0.0041682906,
"description": "weight(doc.blkd:`\b\u0000\u0000\u0000\u0000 in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 0.0041682906,
"description": "score(doc=0,freq=1.0), product of:",
"details": [
{
"value": 0.0020365636,
"description": "queryWeight, product of:",
"details": [
{
"value": 2.0467274,
"description": "idf(docFreq=177, maxDocs=507)",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
},
{
"value": 2.0467274,
"description": "fieldWeight in 0, product of:",
"details": [
{
"value": 1,
"description": "tf(freq=1.0), with freq of:",
"details": [
{
"value": 1,
"description": "termFreq=1.0",
"details": []
}
]
},
{
"value": 2.0467274,
"description": "idf(docFreq=177, maxDocs=507)",
"details": []
},
{
"value": 1,
"description": "fieldNorm(doc=0)",
"details": []
}
]
}
]
}
]
},
{
"value": 10000.001,
"description": "sum of",
"details": [
{
"value": 0.0009950341,
"description": "*:*, product of:",
"details": [
{
"value": 1,
"description": "boost",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
},
{
"value": 10000,
"description": "min of:",
"details": [
{
"value": 10000,
"description": "field value function: none(doc['doc.featured'].value * factor=10000.0)",
"details": []
},
{
"value": 3.4028235e+38,
"description": "maxBoost",
"details": []
}
]
}
]
},
{
"value": 0.9950341,
"description": "ConstantScore(doc.name:*featured*), product of:",
"details": [
{
"value": 1000,
"description": "boost",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
}
]
},
{
"value": 0,
"description": "match on required clause, product of:",
"details": [
{
"value": 0,
"description": "# clause",
"details": []
},
{
"value": 0.0009950341,
"description": "((doc.name:*featured*)^1000000.0 | (doc.tags:*featured*)^10.0 | (doc.tagln:*featured*)^10.0), product of:",
"details": [
{
"value": 1,
"description": "boost",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
}
]
}
]
}
}
]
}
}
}
},

似乎唯一以任何方式影响得分的query_string都是过滤器内部的那个,但是我似乎无法提高得分……
任何提示,欢迎:)谢谢

最佳答案

对于OP中的查询,您需要在 bool(boolean) 查询中启用disable_coord以获得所需的行为。

同时启用inner_hits并在其中设置explain:true将提供嵌套文档的评分详细信息。此功能在Elasticsearch 1.5及更高版本中可用。

例:

{
"query": {
"nested": {
"query": {
"filtered": {
"query": {
"bool": {
"disable_coord": "true",
"must": [
{
"term": {
"doc.blkd": 0
}
}
],
"should": [
{
"function_score": {
"functions": [
{
"field_value_factor": {
"field": "doc.featured",
"factor": 10000
}
}
],
"score_mode": "sum",
"boost_mode": "sum"
}
},
{
"constant_score": {
"filter": {
"query_string": {
"query": "featured*",
"fields": [
"doc.name^1000.0"
]
}
},
"boost": 1000
}
},
{
"constant_score": {
"filter": {
"query_string": {
"query": "featured*",
"fields": [
"doc.tags^10.0"
],
"boost": 10
}
}
}
},
{
"constant_score": {
"filter": {
"query_string": {
"query": "featured*",
"fields": [
"doc.tagln^10.0"
],
"boost": 10
}
}
}
}
],
"minimum_should_match": "0"
}
}
}
},
"path": "doc",
"score_mode": "sum",
"inner_hits" : {
"explain" : "true"
}
}
}

}

已编辑

同样,使用函数得分重写查询可能更简单,如下例所示。
   {
"query": {
"nested": {
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field": "doc.featured",
"factor": 10000
}
},
{
"filter": {
"query_string": {
"query": "*featured*",
"fields": [
"doc.name^1000.0"
]
}
},
"weight": 1000
},
{
"filter": {
"query_string": {
"query": "*featured*",
"fields": [
"doc.tags^1000.0"
]
}
},
"weight": 100
},
{
"weight": 10,
"filter": {
"query_string": {
"query": "*featured*",
"fields": [
"doc.tagln^10.0"
]
}
}
}
],
"query": {
"term": {
"doc.blkd": 0
}
},
"score_mode": "sum",
"boost_mode": "sum"
}
},
"path": "doc",
"score_mode": "sum",
"inner_hits": {
"explain": "true"
}
}
}
}

关于elasticsearch - 匹配query_string文档的分数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/38292157/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com