gpt4 book ai didi

sorting - 文档的重新评分和排序

转载 作者:行者123 更新时间:2023-11-29 02:52:50 25 4
gpt4 key购买 nike

我的目标是编写一个查询,该查询将根据文档中某个字段的值对文档进行重新评分。为此,我使用了重新评分查询,然后对结果进行排序。但是,对查询的解释告诉我,文档的排序是根据先前计算的分数而不是新分数完成的。

我看到以下说明我不能同时使用 rescore 和 sort。

“有时我们想显示结果,页面上第一个文档的顺序受到附加规则的影响。不幸的是,这不能通过重新评分功能来实现。第一个想法指向 window_size 参数,但这个参数在fact 与结果列表中的第一个文档无关,但与每个分片上返回的结果数有关。此外 window_size 不能小于页面大小。(如果小于,ElasticSearch 会自动使用页面大小)。另外,一个非常重要的事情 – 重新评分不能与排序相结合,因为排序是在重新评分引入的更改之后完成的。”

http://elasticsearchserverbook.com/elasticsearch-0-90-using-rescore/

我的查询是:

{
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{
"constant_score": {
"query": {
"match": {
"question": {
"query": "diabetes"
}
}
},
"boost": 1
}
},
{
"dis_max": {
"queries": [
{
"constant_score": {
"query": {
"match": {
"question": {
"query": "diabetes"
}
}
},
"boost": 0.01
}
},
{
"constant_score": {
"query": {
"match": {
"answer_text": {
"query": "diabetes"
}
}
},
"boost": 0.0001
}
}
]
}
},
{
"dis_max": {
"queries": [
{
"constant_score": {
"query": {
"match_phrase": {
"question_phrase": {
"query": "what is diabetes",
"slop": 0
}
}
},
"boost": 100
}
},
{
"constant_score": {
"query": {
"match_phrase": {
"question_phrase": {
"query": "what is diabetes",
"slop": 1
}
}
},
"boost": 50
}
},
{
"constant_score": {
"query": {
"match_phrase": {
"question_phrase": {
"query": "what is diabetes",
"slop": 2
}
}
},
"boost": 33
}
},
{
"constant_score": {
"query": {
"match_phrase": {
"question_phrase": {
"query": "what is diabetes",
"slop": 3
}
}
},
"boost": 25
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "question_group_four",
"query": "what__is__diabetes"
}
},
"boost": 0.1
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "question_group_five",
"query": "what__is__diabetes"
}
},
"boost": 0.15
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_no_synonyms_20",
"query": "what__is__diabetes"
}
},
"boost": 35
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_no_synonyms_15",
"query": "what__is__diabetes"
}
},
"boost": 25
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_no_synonyms_10",
"query": "what__is__diabetes"
}
},
"boost": 15
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_20",
"query": "what__is__diabetes"
}
},
"boost": 28
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_15",
"query": "what__is__diabetes"
}
},
"boost": 16
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_10",
"query": "what__is__diabetes"
}
},
"boost": 13
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_05",
"query": "what__is__diabetes"
}
},
"boost": 4
}
}
]
}
},
{
"dis_max": {
"queries": [
{
"constant_score": {
"query": {
"query_string": {
"default_field": "question_group_four",
"query": "diabetes"
}
},
"boost": 0.1
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "question_group_five",
"query": "diabetes"
}
},
"boost": 0.15
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_no_synonyms_20",
"query": "diabetes"
}
},
"boost": 35
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_no_synonyms_15",
"query": "diabetes"
}
},
"boost": 25
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_no_synonyms_10",
"query": "diabetes"
}
},
"boost": 15
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_20",
"query": "diabetes"
}
},
"boost": 28
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_15",
"query": "diabetes"
}
},
"boost": 16
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_10",
"query": "diabetes"
}
},
"boost": 13
}
},
{
"constant_score": {
"query": {
"query_string": {
"default_field": "concept_words_05",
"query": "diabetes"
}
},
"boost": 4
}
}
]
}
}
],
"disable_coord": true
}
},
"filter": {
"and": [
{
"term": {
"posted_by_expert": false
}
},
{
"term": {
"tip_question": false
}
},
{
"term": {
"show_in_work_queue": true
}
},
{
"range": {
"verified_answers_count": {
"gt": 0
}
}
}
]
}
}
},
"rescore": {
"window_size": 100,
"query": {
"rescore_query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_score * _source.concierge_boost"
}
}
]
}
}
}
},
"sort": [
"_score",
{
"count_words_with_high_concepts": {
"order": "asc"
}
},
{
"popularity": {
"order": "desc"
}
},
{
"length": {
"order": "asc"
}
}
],
"fields": [],
"size": 10,
"from": 0

非常感谢任何帮助!

最佳答案

这确实是不可能的。 But this has been discussed并决定目前不值得实现。不过,github 上的讨论揭示了这方面的困难——需要对文档进行排序,选择前 100 个(在你的情况下),然后应用重新评分,然后再次对它们进行排序。我建议阅读该 github 问题中的评论,尤其是来自 simonw 的评论。这个问题仍然悬而未决,但似乎不会很快实现,如果有的话。

关于您在另一级评分后的排序,我理解只需要对少数文档进行重新评分,但这似乎是不可能的。如果您将查询包装在另一个 function_score 中,您在其中定义了一个 script_score 函数来计算最终分数,会怎样?像这样:

{
"query": {
"function_score": {
"query": {
.......
},
"functions": [
{
"script_score": {
"script": "doc['concierge_boost'].value"
}
}
]
}
},
"sort": [
"_score",
{
"count_words_with_high_concepts": {
"order": "asc"
}
},
{
"popularity": {
"order": "desc"
}
},
{
"length": {
"order": "asc"
}
}
],
"fields": [],
"size": 10,
"from": 0
}

关于sorting - 文档的重新评分和排序,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/30634016/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com