gpt4 book ai didi

elasticsearch - Elasticsearch 中的匹配短语查询中的单个单词是否有字符限制?

转载 作者:行者123 更新时间:2023-12-04 08:35:39 30 4
gpt4 key购买 nike

Elasticsearch 相当新,所以可能不得不忍受我,我遇到了一个问题,如果我使用 20 个字符或更少的字符搜索文档,文档会出现,但是查询中同一个单词中的任何更多字符,我没有结果:

  • 使用“苯氧甲基青霉素”没有任何文件。
  • 使用 'phenoxymethylpenicil' 带回文件。

  • 这是我尝试使用的查询:
    {
    "match_phrase": {
    "genericNames.name": {
    "query": "phenoxymethylpenicillin",
    "slop": 15,
    "zero_terms_query": "NONE",
    "boost": 1.0
    }
    }
    }
    这是完整的查询: https://pastebin.com/DEJvP2uS
    就像我说的那样,我对此还很陌生,这可能是没有在正确的区域中寻找的一点。
    所以我的问题是,哪些可能的区域会导致这种情况,为什么?
    谢谢!
    编辑:
    提供了来自样本数据的文档之一的摘录。我不能展示很多,因为很多都是敏感的,幸运的是我可以分享样本数据中的名称。这是来自我试图搜索的数据:
    "genericNames":[
    {
    "nameType":1,
    "name":"Phenoxymethylpenicillin 250mg tablets",
    "nameChangeCode":"0000",
    "nameBasisCode":"0001",
    "nameTypeDescription":"Name",
    "startDate":"1948-01-01T00:00:00.000000+0000",
    "endDate":"3456-02-01T00:00:00.000000+0000"
    },
    {
    "nameType":5,
    "name":"Penicillin V 250mg tablets",
    "nameTypeDescription":"Alternative Name 3",
    "startDate":"1948-01-01T00:00:00.000000+0000",
    "endDate":"3456-02-01T00:00:00.000000+0000"
    }
    ],
    我还提供了索引映射,因为它可能会提供额外的信息:
    {
    "amp": {
    "mappings": {
    "properties": {
    "_class": {
    "type": "text",
    "fields": {
    "keyword": {
    "type": "keyword",
    "ignore_above": 256
    }
    }
    },
    "ampId": {
    "type": "long"
    },
    "amppId": {
    "type": "long"
    },
    "attributes": {
    "type": "nested",
    "properties": {
    "attributeQualifier": {
    "type": "keyword"
    },
    "attributeType": {
    "type": "integer"
    },
    "attributeTypeDescription": {
    "type": "keyword"
    },
    "attributeValue": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "countryId": {
    "type": "long"
    },
    "decodedValue": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    }
    }
    },
    "dictionaries": {
    "type": "nested",
    "properties": {
    "abbreviation": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "description": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "dictId": {
    "type": "integer"
    },
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    }
    }
    },
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "excipients": {
    "type": "nested",
    "properties": {
    "basisOfStrengthCode": {
    "type": "keyword"
    },
    "bossId": {
    "type": "long"
    },
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "id": {
    "type": "long"
    },
    "ingredientNames": {
    "properties": {
    "endDate": {
    "type": "date"
    },
    "name": {
    "type": "text",
    "fields": {
    "keyword": {
    "type": "keyword",
    "ignore_above": 256
    }
    }
    },
    "startDate": {
    "type": "date"
    }
    }
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "strengthDenominatorUnitOfMeasureCode": {
    "type": "keyword"
    },
    "strengthDenominatorValue": {
    "type": "keyword"
    },
    "strengthNumeratorUnitOfMeasureCode": {
    "type": "keyword"
    },
    "strengthNumeratorValue": {
    "type": "keyword"
    },
    "strengthVal": {
    "type": "keyword"
    },
    "unitOfMeasure": {
    "type": "keyword"
    }
    }
    },
    "extractableEntry": {
    "type": "boolean"
    },
    "genericNames": {
    "type": "nested",
    "properties": {
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "name": {
    "type": "text",
    "ignore_above": 256,
    "fields": {
    "raw": {
    "type": "keyword"
    }
    },
    "analyzer": "autocomplete_index",
    "search_analyzer": "autocomplete_search"
    },
    "nameBasisCode": {
    "type": "keyword"
    },
    "nameChangeCode": {
    "type": "keyword"
    },
    "nameType": {
    "type": "integer"
    },
    "nameTypeDescription": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    }
    }
    },
    "id": {
    "type": "keyword"
    },
    "ingredients": {
    "type": "nested",
    "properties": {
    "basisOfStrengthCode": {
    "type": "keyword"
    },
    "bossId": {
    "type": "long"
    },
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "id": {
    "type": "long"
    },
    "ingredientNames": {
    "properties": {
    "endDate": {
    "type": "date"
    },
    "name": {
    "type": "text",
    "fields": {
    "keyword": {
    "type": "keyword",
    "ignore_above": 256
    }
    }
    },
    "startDate": {
    "type": "date"
    }
    }
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "strengthDenominatorUnitOfMeasureCode": {
    "type": "keyword"
    },
    "strengthDenominatorValue": {
    "type": "keyword"
    },
    "strengthNumeratorUnitOfMeasureCode": {
    "type": "keyword"
    },
    "strengthNumeratorValue": {
    "type": "keyword"
    },
    "strengthVal": {
    "type": "keyword"
    },
    "unitOfMeasure": {
    "type": "keyword"
    }
    }
    },
    "invalidEntry": {
    "type": "boolean"
    },
    "pitId": {
    "type": "integer"
    },
    "ppaCodes": {
    "type": "nested",
    "properties": {
    "code": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    }
    }
    },
    "proprietaryNames": {
    "type": "nested",
    "properties": {
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "name": {
    "type": "text",
    "ignore_above": 256,
    "fields": {
    "raw": {
    "type": "keyword"
    }
    },
    "analyzer": "autocomplete_index",
    "search_analyzer": "autocomplete_search"
    },
    "nameBasisCode": {
    "type": "keyword"
    },
    "nameChangeCode": {
    "type": "keyword"
    },
    "nameType": {
    "type": "integer"
    },
    "nameTypeDescription": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    }
    }
    },
    "qpuUomCde": {
    "type": "keyword"
    },
    "qpuVal": {
    "type": "keyword"
    },
    "qtyUomCde": {
    "type": "keyword"
    },
    "qtyVal": {
    "type": "keyword"
    },
    "snomedCodes": {
    "type": "nested",
    "properties": {
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "ppaNextNo": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "snomed": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    }
    }
    },
    "snomedDescriptions": {
    "type": "nested",
    "properties": {
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "ppaNextNo": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "snomed": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    }
    }
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "suppliers": {
    "type": "nested",
    "properties": {
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "id": {
    "type": "long"
    },
    "names": {
    "type": "nested",
    "properties": {
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "name": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    },
    "analyzer": "autocomplete_index",
    "search_analyzer": "autocomplete_search"
    },
    "nameBasisCode": {
    "type": "keyword"
    },
    "nameChangeCode": {
    "type": "keyword"
    },
    "nameType": {
    "type": "integer"
    },
    "nameTypeDescription": {
    "type": "text",
    "fields": {
    "raw": {
    "type": "keyword"
    }
    }
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    }
    }
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    }
    }
    },
    "udfs": {
    "type": "nested",
    "properties": {
    "ddIndicator": {
    "type": "integer"
    },
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "udfsUomCode": {
    "type": "keyword"
    },
    "udfsValue": {
    "type": "keyword"
    },
    "vmpUomCode": {
    "type": "keyword"
    }
    }
    },
    "vmpId": {
    "type": "long"
    },
    "vmppId": {
    "type": "long"
    },
    "vtms": {
    "type": "nested",
    "properties": {
    "endDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    },
    "id": {
    "type": "long"
    },
    "startDate": {
    "type": "date",
    "format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
    }
    }
    }
    }
    }
    }
    }
    编辑:添加了完整查询的链接 - https://pastebin.com/DEJvP2uS
    编辑:索引设置:
    {
    "index": {
    "max_ngram_diff": "20",
    "analysis": {
    "filter": {
    "autocomplete_suffix_filter": {
    "type": "ngram",
    "min_gram": "1",
    "max_gram": "20"
    },
    "autocomplete_filter": {
    "type": "edge_ngram",
    "min_gram": "1",
    "max_gram": "20"
    }
    },
    "analyzer": {
    "autocomplete_index": {
    "filter": [
    "lowercase",
    "autocomplete_filter",
    "autocomplete_suffix_filter"
    ],
    "type": "custom",
    "tokenizer": "standard"
    },
    "autocomplete_search": {
    "filter": [
    "lowercase"
    ],
    "type": "custom",
    "tokenizer": "standard"
    }
    }
    },
    "number_of_replicas": "1"
    }
    }

    最佳答案

    这一定是由于您的 genericNames.name 上的自定义分析器造成的。字段,您有不同的自定义分析器,您使用的索引时间是autocomplete_index和搜索时间autocomplete_search分析器,但问题中没有提供这些分析器的定义,只有mapping部分提供。
    请提供 _setting 的输出索引上的 API,请参阅 https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-get-settings.html了解更多信息。
    您需要检查为 phenoxymethylpenicillin 生成的 token 使用 analyze API两者都适用 autocomplete_indexautocomplete_search分析器,您会注意到差异。

    关于elasticsearch - Elasticsearch 中的匹配短语查询中的单个单词是否有字符限制?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/64817981/

    30 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com