gpt4 book ai didi

elasticsearch - Elasticsearch : find word parts in nested object

转载 作者:行者123 更新时间:2023-11-29 02:47:33 27 4
gpt4 key购买 nike

我无法在嵌套对象中找到单词的一部分。只找到完整的单词。我的分析仪配置如下:

{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"word_part_filter": {
"type": "ngram",
"min_gram": 3,
"max_gram": 15
},
"word_part_front_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 15
},
"codeid_filter": {
"type": "pattern_replace",
"pattern": "[-/.:]",
"replacement": "",
"preserve_original": true
}
},
"char_filter": {
"umlaut_char_filter": {
"type": "mapping",
"mappings": [
"ö=>oe",
"ä=>ae",
"ü=>ue",
"Ã?=>ss",
"Ã?=>Oe",
"Ã?=>Ae",
"Ã?=>Ue"
]
}
},
"analyzer": {
"description_analyser_query": {
"type": "custom",
"char_filter": [
"html_strip"
],
"tokenizer": "standard",
"filter": [
"lowercase",
"stop",
"asciifolding"
]
},
"description_analyser_idx": {
"type": "custom",
"char_filter": [
"html_strip"
],
"tokenizer": "standard",
"filter": [
"lowercase",
"stop",
"asciifolding",
"word_part_filter"
]
},
"name_analyser_query": {
"type": "custom",
"char_filter": [
"umlaut_char_filter"
],
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding"
]
},
"name_analyser_idx": {
"type": "custom",
"char_filter": [
"umlaut_char_filter"
],
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding",
"word_part_filter"
]
},
"codeid_analyser_query": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"codeid_filter"
]
},
"codeid_analyser_idx_front": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"codeid_filter",
"word_part_front_filter"
]
},
"codeid_analyser_idx_any": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"codeid_filter",
"word_part_filter"
]
}
}
}
}
}

这是嵌套对象映射(提取):

{
"properties": {
"aid": {
"type": "nested",
"properties": {
"tpid": {
"type": "string",
"analyzer": "codeid_analyser_idx_any"
},
"aid": {
"type": "string",
"analyzer": "codeid_analyser_idx_any"
}
}
}
}
}

我正在使用此查询(摘录)进行搜索。这里只有“嵌套”部分是必不可少的:

{
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"nested": {
"path": "aid",
"query": {
"bool": {
"must": {
"match": {
"aid.aid": {
"query": "1200",
"analyzer": "codeid_analyser_query"
}
}
},
"filter": {
"or": [
{
"match": {
"aid.tpid": "buyer_specific"
}
},
{
"match": {
"aid.tpid": "mytpid"
}
}
]
}
}
}
}
}
],
"minimum_should_match": 1
}
}
]
}
}
}

有一个元素带有aid=120000008

在字段中使用分析器时,它什么也没发现。当在嵌套对象映射和查询中完全不使用分析器时,只会找到完整的单词(如“120000008”),而不是“1200”。有什么想法吗?

最佳答案

实际上,使用 ElasticSearch 5.2,使用名为 test 的索引,并将映射应用于名为“product”的类型(仅重写过滤器部分以使其符合查询语言的发展),我获得了正确的结果.查询:

GET test/_search
{
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"nested": {
"path": "aid",
"query": {
"bool": {
"must": {
"match": {
"aid.aid": {
"query": "1200",
"analyzer": "codeid_analyser_query"
}
}
},
"filter": {
"terms": {
"aid.tpid": [
"mytpid",
"buyer_specific"
]
}
}
}
}
}
}
],
"minimum_should_match": 1
}
}
]
}
}
}

索引:

GET test/_search

{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "product",
"_id": "AVrJ1CSd-NyeQ4r64kP6",
"_score": 1,
"_source": {
"aid": {
"aid": "120000008",
"tpid": "mytpid"
}
}
}
]
}
}

分析器(我删除了元音变音过滤器,因为它在我的计算机上不可读,并且它不会改变结果,因为在测试中没有使用它):

PUT test
{
"settings": {
"analysis": {
"filter": {
"word_part_filter": {
"type": "ngram",
"min_gram": 3,
"max_gram": 15
},
"word_part_front_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 15
},
"codeid_filter": {
"type": "pattern_replace",
"pattern": "[-/.:]",
"replacement": "",
"preserve_original": true
}
},

"analyzer": {
"description_analyser_query": {
"type": "custom",
"char_filter": [
"html_strip"
],
"tokenizer": "standard",
"filter": [
"lowercase",
"stop",
"asciifolding"
]
},
"description_analyser_idx": {
"type": "custom",
"char_filter": [
"html_strip"
],
"tokenizer": "standard",
"filter": [
"lowercase",
"stop",
"asciifolding",
"word_part_filter"
]
},

"codeid_analyser_query": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"codeid_filter"
]
},
"codeid_analyser_idx_front": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"codeid_filter",
"word_part_front_filter"
]
},
"codeid_analyser_idx_any": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"codeid_filter",
"word_part_filter"
]
}
}
}
}
}

产品映射:

PUT test/_mapping/product
{

"properties": {
"aid": {
"type": "nested",
"properties": {
"tpid": {
"type": "string",
"analyzer": "codeid_analyser_idx_any"
},
"aid": {
"type": "string",
"analyzer": "codeid_analyser_idx_any"
}
}
}
}
}

关于elasticsearch - Elasticsearch : find word parts in nested object,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42577399/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com