gpt4 book ai didi

elasticsearch - 返回Elasticsearch查询中匹配的值和匹配的值的计数

转载 作者:行者123 更新时间:2023-12-02 23:11:01 24 4
gpt4 key购买 nike

假设我的elasticsearch索引中包含以下两个元素:

{
"name": "bob",
"likes": ["computer", "cat", "water"]
},
{
"name": "alice",
"likes": ["gaming", "gambling"]
}

我现在想查询元素,例如 computerlaptopcat。 (与bob匹配,请注意它应该是完全匹配的字符串)

结果,我需要火柴以及火柴的数量,因此想找回以下内容(因为它找到了计算机和猫,但没有找到笔记本电脑或水):
{
"name": "bob",
"likes": ["computer", "cat"],
"likes_count": 2
}

有没有一种方法可以通过单个Elasticsearch查询来实现? (请注意,我仍然坚持使用ES2.4,但希望很快就能进行升级)。

理想情况下,我还要按 likes_count对输出进行排序。

谢谢!

最佳答案

最好的方法是将喜欢创建为nested data type

制图

PUT index71
{
"mappings": {
"properties": {
"name":{
"type": "text"
},
"likes":{
"type": "nested",
"properties": {
"name":{
"type":"keyword"
}
}
}
}
}
}

查询:
GET index71/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "likes",
"query": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"inner_hits": {} ---> It will return matched elements in nested type
}
}
]
}
},
"aggs": {
"likes": {
"nested": {
"path": "likes"
},
"aggs": {
"matcheLikes": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"aggs": {
"likeCount": {
"value_count": {
"field": "likes.name"
}
}
}
}
}
}
}
}

结果:
   "hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_score" : 1.0,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "cat"
},
{
"name" : "water"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.0,
"_source" : {
"name" : "computer"
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 1
},
"_score" : 1.0,
"_source" : {
"name" : "cat"
}
}
]
}
}
}
}
]
},
"aggregations" : {
"likes" : {
"doc_count" : 3,
"matcheLikes" : {
"doc_count" : 2,
"likeCount" : {
"value" : 2
}
}
}
}

如果无法将喜欢更改为嵌套类型,则需要使用会影响性能的脚本

制图
{
"index72" : {
"mappings" : {
"properties" : {
"likes" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}

查询:
{
"script_fields": { ---> It will iterate through likes and get matched values
"matchedElements": {
"script": "def matchedLikes=[];def list_to_check = ['computer', 'laptop', 'cat']; def do_not_return = true; for(int i=0;i<doc['likes.keyword'].length;i++){ if(list_to_check.contains(doc['likes.keyword'][i])) {matchedLikes.add(doc['likes.keyword'][i])}} return matchedLikes;"
}
},
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes": [
"computer",
"laptop",
"cat"
]
}
}
]
}
}
}
},
"aggs": {
"Name": {
"terms": {
"field": "name.keyword",
"size": 10
},
"aggs": {
"Count": {
"scripted_metric": { --> get count of matched values
"init_script": "state.matchedLikes=[]",
"map_script": " def list_to_check = ['computer', 'laptop', 'cat']; def do_not_return = true; for(int i=0;i<doc['likes.keyword'].length;i++){ if(list_to_check.contains(doc['likes.keyword'][i])) {state.matchedLikes.add(doc['likes.keyword'][i]);}}",
"combine_script": "int count = 0; for (int i=0;i<state.matchedLikes.length;i++) { count += 1 } return count;",
"reduce_script": "int count = 0; for (a in states) { count += a } return count"
}
}
}
}
}
}

结果:
  "hits" : [
{
"_index" : "index72",
"_type" : "_doc",
"_id" : "wtqso3ABH6obcmRR0hSV",
"_score" : 0.0,
"fields" : {
"matchedElements" : [
"cat",
"computer"
]
}
}
]
},
"aggregations" : {
"Name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "bob",
"doc_count" : 1,
"Count" : {
"value" : 2
}
}
]
}
}

编辑1
要给更多的匹配更高的分数,请将条件查询更改为should子句。应该子句中的每个术语都将有助于得分
GET index71/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "likes",
"query": {
"bool": {
"should": [
{
"term": {
"likes.name": "computer"
}
},
{
"term": {
"likes.name": "cat"
}
},
{
"term": {
"likes.name": "laptop"
}
}
]
}
},
"inner_hits": {}
}
}
]
}
},
"aggs": {
"likes": {
"nested": {
"path": "likes"
},
"aggs": {
"matcheLikes": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"aggs": {
"likeCount": {
"value_count": {
"field": "likes.name"
}
}
}
}
}
}
}
}

结果
  "hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.5363467,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_score" : 1.5363467,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "cat"
},
{
"name" : "water"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.7917595,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 1
},
"_score" : 1.7917595,
"_source" : {
"name" : "cat"
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.2809337,
"_source" : {
"name" : "computer"
}
}
]
}
}
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "pr-lqHABcSMy6UhGAWtW",
"_score" : 1.2809337,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "gaming"
},
{
"name" : "gambling"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2809337,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "pr-lqHABcSMy6UhGAWtW",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.2809337,
"_source" : {
"name" : "computer"
}
}
]
}
}
}
}
]
},
"aggregations" : {
"likes" : {
"doc_count" : 6,
"matcheLikes" : {
"doc_count" : 3,
"likeCount" : {
"value" : 3
}
}
}
}

关于elasticsearch - 返回Elasticsearch查询中匹配的值和匹配的值的计数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/60514423/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com