gpt4 book ai didi

使用嵌套 function_scores 对 elasticsearch 进行评分

转载 作者:行者123 更新时间:2023-11-29 02:57:36 35 4
gpt4 key购买 nike

我正在尝试为嵌套评分函数找到正确的方法。

数据:

PUT test

PUT test/test/_mapping
{
"properties": {
"driver_id": {
"type": "integer"
},
"driver_name": {
"type": "string"
},
"cities": {
"type": "nested",
"properties": {
"city_id": {
"type": "integer"
},
"used": {
"type": "float"
}
}
},
"cars": {
"type": "nested",
"properties": {
"car_id": {
"type": "integer"
},
"used": {
"type": "float"
}
}
}
}
}


PUT test/test/1
{
"id":1,
"driver_name":"Lady Smith",
"cars":[{"car_id":1,"brand":"Ford Focus","used":0.5},{"car_id":2,"brand":"Toyota Corola","used":0.5}],
"city":[{"city_id":3,"name":"Tel Aviv","used":0.8},{"city_id":4,"name":"New York","used":0.2}]
}
PUT test/test/2
{
"id":2,
"driver_name":"John Smith",
"cars":[{"car_id":1,"brand":"Ford Focus","used":0.3},{"car_id":2,"brand":"Toyota Corola","used":0.3}],
"city":[{"city_id":3,"name":"Tel Aviv","used":0.8},{"city_id":4,"name":"New York","used":0.2}]
}
PUT test/test/3
{
"id":3,
"driver_name":"Will Smith",
"cars":[{"car_id":1,"brand":"Ford Focus","used":0.1}],
"city":[{"city_id":3,"name":"New York","used":0.2}]
}
PUT test/test/4
{
"id":4,
"driver_name":"Ash Smith",
"cars":[],
"city":[]
}

简单地说,根据数据,我希望得到最适合在特拉维夫驾驶福特 AND 卡罗拉的司机的查询。

或者,松散地翻译成 SQL:

SELECT driver_id,
cr.cars_score * ct.city_score AS driver_score
FROM drivers drv
LEFT JOIN (SELECT sum(used) / 2 as cars_score
FROM car_usage
WHERE car_id IN (1,2) GROUP BY driver_id) AS cr
ON (cr.driver_id = drv.driver_id)
LEFT JOIN (SELECT sum(used) / 1 as city_score
FROM city_usage
WHERE city_id IN (3) GROUP BY driver_id) AS ct
ON (ct.driver_id = drv.driver_id)

尝试了以下方法:

{
"query": {
"bool": {
"disable_coord": true,
"must": [
{
"query": {
"bool": {
"disable_coord": true,
"must": [{
"function_score": {
"query": {
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 1
}
},
"boost_mode": "replace",
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
}
}
}, {
"function_score": {
"query": {
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 2
}
},
"boost_mode": "replace",
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
}
}
}
]
}
}
},
{
"function_score": {
"query": {
"nested": {
"path": "cities",
"query": {
"function_score": {
"filter": {
"term": {
"cities.city_id": 3
}
},
"score_mode": "multiply",
"boost_mode": "replace",
"functions": [
{
"field_value_factor": {
"field": "cities.used",
"missing": 0
}
}
]
}
}
}
}
}
}
]
}
}
}

这给了我奇怪的结果。

然后尝试:

{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 1
}
},
"score_mode": "sum",
"boost_mode":"replace",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 2
}
},
"score_mode": "sum",
"boost_mode":"replace",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cities",
"query": {
"function_score": {
"filter": {
"term": {
"cities.city_id": 3
}
},
"score_mode":"multiply",
"boost_mode":"replace",
"functions": [
{
"field_value_factor": {
"field": "cities.used",
"missing": 0
}
}
]
}
}
}
}
]
}
}
}

哪个更接近,但似乎只是对所有分数求和。

我的一个 friend 建议扁平化整个 JSON,并丢失嵌套对象(使它们成为属性),但我不确定这是否会使查询数据变得更容易。

更新 1

又一次失败的尝试:

{
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 1
}
},
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor": 0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 2
}
},
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor": 0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cities",
"query": {
"function_score": {
"filter": {
"term": {
"cities.city_id": 3
}
},
"score_mode": "multiply",
"functions": [
{
"field_value_factor": {
"field": "cities.used",
"missing": 0
}
}
]
}
}
}
}
]
}
},
"score_mode": "multiply"
}
}
}

更新 2

按照我将字段展平并丢失嵌套过滤器的替代方法,我以以下内容结束:

PUT test2

PUT test2/test2/1
{
"id":1,
"driver_name":"Lady Smith",
"cars_1":{"brand":"Ford Focus","used":0.5},
"cars_2":{"brand":"Toyota Corola","used":0.5},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/2
{
"id":2,
"driver_name":"John Smith",
"cars_1":{"brand":"Ford Focus","used":0.3},
"cars_2":{"brand":"Toyota Corola","used":0.3},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/3
{
"id":3,
"driver_name":"Will Smith",
"cars_1":{"brand":"Ford Focus","used":0.1},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/4
{
"id":4,
"driver_name":"Ash Smith",
}

post test2/_search
{
"query": {
"bool": {
"disable_coord": true,
"must": [
{
"match": {
"name": "red pepper"
}
}
],
"should": [
{
"nested": {
"path": "words",
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field" : "words.weight",
"missing": 0
}
}
],
"query": {
"match": {
"words.text": "red pepper"
}
},
"score_mode": "sum",
"boost_mode": "replace"
}
},
"score_mode": "total"
}
}
]
}
}
}

GET test2/_search
{
"query": {
"function_score": {
"query":{
"bool":{
"must":[{
"exists":{"field":"cars_1"}
},{
"exists":{"field":"cars_2"}
},{
"exists":{"field":"cities_3"}
}]
}
},
"score_mode": "multiply",
"boost_mode": "replace",
"functions": [{
"script_score": {
"script": {
"inline": "(doc['cars_1.used'].value + doc['cars_2.used'].value) / 2 * doc['cities_3.used'].value"
}
}
}]
}
}
}

但我不确定内联脚本的性能影响。感觉就像我错过了一个更简单的解决方案。

最佳答案

仅供访问此帖子的人将来引用,我最终更改了我的数据模型,并使用了 script_score (lang:"painless" 在 5.0.0 中)

警告:这种方法虽然符合我的需要,但对性能有影响,目测估计响应时间会慢 3-5 倍左右。

目前,这对我来说已经足够了。

PUT 测试 2

PUT test2/test2/1
{
"id":1,
"driver_name":"Lady Smith",
"cars_1":{"brand":"Ford Focus","used":0.5},
"cars_2":{"brand":"Toyota Corola","used":0.5},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/2
{
"id":2,
"driver_name":"John Smith",
"cars_1":{"brand":"Ford Focus","used":0.3},
"cars_2":{"brand":"Toyota Corola","used":0.3},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/3
{
"id":3,
"driver_name":"Will Smith",
"cars_1":{"brand":"Ford Focus","used":0.1},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/4
{
"id":4,
"driver_name":"Ash Smith",
}

post test2/_search
{
"query": {
"bool": {
"disable_coord": true,
"must": [
{
"match": {
"name": "red pepper"
}
}
],
"should": [
{
"nested": {
"path": "words",
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field" : "words.weight",
"missing": 0
}
}
],
"query": {
"match": {
"words.text": "red pepper"
}
},
"score_mode": "sum",
"boost_mode": "replace"
}
},
"score_mode": "total"
}
}
]
}
}
}

GET test2/_search
{
"query": {
"function_score": {
"query":{
"bool":{
"must":[{
"exists":{"field":"cars_1"}
},{
"exists":{"field":"cars_2"}
},{
"exists":{"field":"cities_3"}
}]
}
},
"score_mode": "multiply",
"boost_mode": "replace",
"functions": [{
"script_score": {
"script": {
"inline": "(doc['cars_1.used'].value + doc['cars_2.used'].value) / 2 * doc['cities_3.used'].value"
}
}
}]
}
}
}

关于使用嵌套 function_scores 对 elasticsearch 进行评分,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/40359203/

35 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com