gpt4 book ai didi

Elasticsearch : search results on clicking on Hashtag

转载 作者:行者123 更新时间:2023-11-29 02:47:39 25 4
gpt4 key购买 nike

我有一个带有驼峰式标签的标签,例如#teamIndia。现在,当单击此主题标签时,它应该获取其中包含“#teamIndia”的所有结果,它应该首先显示带有“#teamIndia”的结果,然后显示带有“teamIndia”的结果,然后是“印度团队”,然后是“团队”或“印度”等。

我在做什么:

搜索文本:“#teamIndia”、“#NEWYORK”、“#profession”、“#2016”

POST /clip
{
"settings": {
"analysis": {
"char_filter" : {
"space_hashtags" : {
"type" : "mapping",
"mappings" : ["#=>|#"]
}
},
"filter": {
"substring": {
"max_gram": "20",
"type": "nGram",
"min_gram": "1",
"token_chars": [
"whitespace"
]
},
"camelcase": {
"type": "word_delimiter",
"type_table": ["# => ALPHANUM", "@ => ALPHANUM"]
},
"stopword": {
"type": "stop",
"stopwords": ["and", "is", "the"]
}
},
"analyzer": {
"substring_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "standard"
},
"camelcase_analyzer": {
"type" : "custom",
"char_filter" : "space_hashtags",
"tokenizer" : "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"description": {
"type": "string",
"analyzer": "substring_analyzer",
"search_analyzer": "standard"
},
"raw": {
"type": "string",
"index": "not_analyzed"
},
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer"
}
}
},
....
}
}
}
}

文档示例:-

POST /clip/Clip/2 {"id" : 1, "description" : "TheBestAndTheBeast"}

POST /clip/Clip/3 {"id" : 2, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}

POST /clip/Clip/3 {"id" : 2, "description" : "Know how a software engineer surprised his wife! <a href="search/clips?q=%23theProvider&source=hashtag" ng-click="handleModalClick()"> #theProvider </a> rioOlympic <a href="search/clips?q=%23DUBAI&source=hashtag" ng-click="handleModalClick()"> #DUBAI </a> <a href="search/clips?q=%23TheBestAndTheBeast&source=hashtag" ng-click="handleModalClick()"> #TheBestAndTheBeast </a> <a href="search/clips?q=%23rioOlympic&source=hashtag" ng-click="handleModalClick()"> #rioOlympic </a>"}

** 搜索查询 **

GET clip/_search
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must":
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "teamIndia"
}
},
"should": {
"match":
{ "description.raw": "#teamIndia"}
}
}
}
}
}

异常结果:"#teamIndia",“印度队”,“印度队”,“团队”,“印度”,

和其他测试关键字类似。

最佳答案

原始帖子中的查询无法按预期工作的原因之一是因为 description.rawnot_analyzed 。因此,#teamIndia 永远不会匹配具有 description: "Animals and Pets and #teamIndia" 的文档,因为 description.raw 将包含未分析的术语 Animals and Pets 和 #teamIndia 而不是 #teamIndia

假设您拥有的文档类似于 OP 中的第二个示例。

示例:

{"id" : 2, "description" : "Animals and Pets and #teamIndia"}

{"id":7,"description":"This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"}

然后您应该能够按以下顺序对文档进行排名:

1) 包含“#teamIndia”的描述,
2) 包含“teamIndia”的描述
3) 包含“印度队”的描述
4) 包含“印度”的描述

通过在 wordlimiter 中启用 preserve_orginalcatenate_words过滤如下例所示

示例:

索引文件

 PUT clip
{
"settings": {
"analysis": {
"char_filter": {
"zwsp_normalize": {
"type": "mapping",
"mappings": [
"\\u200B=>",
"\\u200C=>",
"\\u200D=>"
]
},
"html_decoder": {
"type": "mapping",
"mappings": [
"&lt;=> <",
"&gt;=> >"
]
}
},
"filter": {
"camelcase": {
"type": "word_delimiter",
"preserve_original": "true",
"catenate_all": "true"
},
"stopword": {
"type": "stop",
"stopwords": [
"and",
"is",
"the"
]
}
},
"analyzer": {
"camelcase_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
],
"char_filter": [
"zwsp_normalize",
"html_decoder",
"html_strip"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer",
"norms": {
"enabled": false
}
}
}
}
}
}
}
}



POST /clip/Clip/1
{
"id": 1,
"description": "Animals and Pets and #teamIndia"
}

POST /clip/Clip/2
{
"id": 2,
"description": "Animals and Pets and teamIndia"
}


POST /clip/Clip/3
{
"id": 3,
"description": "Animals and Pets and team India"
}


POST /clip/Clip/4
{
"id": 4,
"description": "Animals and Pets and India"
}



POST /clip/Clip/7
{
"id": 7,
"description": "This &lt;a href=&quot;search/clips?q=%23teamIndia&amp;source=hashtag&quot;&gt;#teamIndia&lt;/a&gt;"
}

查询结果:

POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#teamIndia"
}
}
]
}
}
}
}
}

结果:

      "hits": {
"total": 5,
"max_score": 1.4969246,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 1.4969246,
"_source": {
"id": 7,
"description": "This &lt;a href=&quot;search/clips?q=%23teamIndia&amp;source=hashtag&quot;&gt;#teamIndia&lt;/a&gt;"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "1",
"_score": 1.4969246,
"_source": {
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "2",
"_score": 1.0952718,
"_source": {
"id": 2,
"description": "Animals and Pets and teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "3",
"_score": 0.5207714,
"_source": {
"id": 3,
"description": "Animals and Pets and team India"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "4",
"_score": 0.11123338,
"_source": {
"id": 4,
"description": "Animals and Pets and India"
}
}
]
}

示例#dubai:

POST /clip/Clip/5
{
"id": 5,
"description": "#dubai is hot"
}

POST /clip/Clip/6
{
"id": 6,
"description": "dubai airport is huge"
}

POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#dubai"
}
}
]
}
}
}
}
}

"hits": {
"total": 2,
"max_score": 1.820827,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "5",
"_score": 1.820827,
"_source": {
"id": 5,
"description": "#dubai is hot"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "6",
"_score": 0.5856731,
"_source": {
"id": 6,
"description": "dubai airport is huge"
}
}
]
}

示例#professionalAndPunctual:

POST /clip/Clip/7
{
"id": 7,
"description": "professionalAndPunctual"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#professionalAndPunctual"
}
}
]
}
}
}
}
}

"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 2.2149992,
"_source": {
"id": 7,
"description": "professionalAndPunctual"
}
}
]

编辑示例

示例:#TheBestAndTheBea‌ st

   POST /clip/Clip/10
{"id" : 10, "description" : "TheBestAndTheBeast"}

POST /clip/Clip/11
{"id" :11, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}

POST /clip/Clip/12
{"id" : 12, "description" : "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"}

POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#TheBestAndTheBeast"
}
}
]
}
}
}
}
}

#结果

 "hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "12",
"_score": 1.8701664,
"_source": {
"id": 12,
"description": "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "10",
"_score": 0.9263139,
"_source": {
"id": 10,
"description": "TheBestAndTheBeast"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "11",
"_score": 0.9263139,
"_source": {
"id": 11,
"description": "bikes in DUBAI TheBestAndTheBeast profession"
}
}
]

分析器示例:

get clip/_analyze?analyzer=camelcase_analyzer&text=%23DUBAI

{
"tokens": [
{
"token": "#dubai",
"start_offset": 0,
"end_offset": 6,
"type": "word",
"position": 0
},
{
"token": "dubai",
"start_offset": 1,
"end_offset": 6,
"type": "word",
"position": 0
}
]
}

get clip/_analyze?analyzer=camelcase_analyzer&text=This%20%26lt%3Ba%20href%3D%26quot%3Bsearch%2Fclips%3Fq%3D%2523teamIndia%26amp%3Bsource%3Dhashtag%26quot%3B%26gt%3B%23teamIndia%26lt%3B%2Fa%26gt%3B

{
"tokens": [
{
"token": "this",
"start_offset": 0,
"end_offset": 4,
"type": "word",
"position": 0
},
{
"token": "#teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 1
},
{
"token": "india",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "team",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
}
]
}

关于 Elasticsearch : search results on clicking on Hashtag,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39345299/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com