gpt4 book ai didi

azure - 如何使用 Azure 认知搜索搜索字符串的一部分

转载 作者:行者123 更新时间:2023-12-03 05:36:00 27 4
gpt4 key购买 nike

我对 Azure 认知搜索还很陌生,并且已成功配置索引以实现自动完成(感谢 this article 使用部分搜索)。

但现在我有另一个用例,其中我将许多文件存储在带有元数据的 Azure Blob 容器中:

(每个文件的)元数据字段之一称为 partnumbers,其值是一串用逗号分隔的产品 SKU(例如“123456,78901,102938,09876”)。我构建了索引,以便将此信息存储为 Edm.String,如下所示:

{
"name": "my-index",
"fields": [
{
"name": "partnumbers",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "metadata_storage_name",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": false,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "metadata_storage_content_type",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": false,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "metadata_storage_last_modified",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": false,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "metadata_storage_path",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": false,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "metadata_storage_size",
"type": "Edm.Int64",
"facetable": true,
"filterable": true,
"retrievable": false,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "key",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": true,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "partialPartnumbers",
"type": "Edm.String",
"facetable": false,
"filterable": false,
"key": false,
"retrievable": false,
"searchable": true,
"sortable": false,
"analyzer": null,
"indexAnalyzer": "prefixCmAnalyzer",
"searchAnalyzer": "standardCmAnalyzer",
"synonymMaps": [],
"fields": []
},
],
"suggesters": [
{
"name": "my-index_suggester",
"searchMode": "analyzingInfixMatching",
"sourceFields": [
"partnumbers"
]
}
],
"scoringProfiles": [
{
"name": "exactFirst",
"functions": [],
"functionAggregation": null,
"text": {
"weights": {
"partnumbers": 2,
"partialPartnumbers": 1,
}
}
}
],
"defaultScoringProfile": "exactFirst",
"corsOptions": null,
"analyzers": [
{
"@odata.type": "#Microsoft.Azure.Search.CustomAnalyzer",
"name": "standardCmAnalyzer",
"tokenizer": "standard_v2",
"tokenFilters": [
"lowercase",
"asciifolding"
],
"charFilters": []
},
{
"@odata.type": "#Microsoft.Azure.Search.CustomAnalyzer",
"name": "prefixCmAnalyzer",
"tokenizer": "standard_v2",
"tokenFilters": [
"lowercase",
"asciifolding",
"edgeNGramCmTokenFilter"
],
"charFilters": []
}
],
"charFilters": [],
"tokenFilters": [
{
"@odata.type": "#Microsoft.Azure.Search.EdgeNGramTokenFilterV2",
"name": "edgeNGramCmTokenFilter",
"minGram": 2,
"maxGram": 20,
"side": "front"
}
],
"tokenizers": [],
"@odata.etag": "\"0x8D8184F367A74XX\""
}

现在我正在努力寻找一种方法(通过特定语法?分析器?分词器?)来查找具有包含一个 SKU 的 partnumbers 元数据字段的所有文件(这样我就可以检索与一种产品相关的所有文档):我想将 SKU“102938”传递给 Azure 搜索,它会返回我的部件号中包含此 SKU 的所有文件元数据字段(可能还有其他 SKU)。

但是我很难在 Google 上找到示例,而且文档似乎 - 目前 - 有点超出我的范围(我不太确定正确理解什么是分析器、分词器等以及它们如何工作!这是我第一次深入“搜索”世界......)。

因此,我非常感谢社区可以在这方面为我提供帮助,我很乐意阅读为初学者提供的文章以了解所有内容、教程或任何可以帮助我继续前进的内容!

提前致谢。

最佳答案

好的,我刚刚尝试了一些有效的方法:我定义了 pattern analyzer在我的 partnumbers 字段上,以及当我使用 Analyzer Text API 进行测试时,它确实将我的 SKU 分成了几个 token 。之后我可以搜索一个 SKU,它返回了我想要的所有文件!这是我的索引 JSON 定义:

{
"name": "my-index",
"fields": [
{
"name": "partnumbers",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": "pattern",
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "metadata_storage_name",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "metadata_storage_content_type",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "metadata_storage_last_modified",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "metadata_storage_path",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "metadata_storage_size",
"type": "Edm.Int64",
"facetable": true,
"filterable": true,
"retrievable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "key",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": true,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "name",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "partialPartnumbers",
"type": "Edm.String",
"facetable": false,
"filterable": false,
"key": false,
"retrievable": false,
"searchable": true,
"sortable": false,
"analyzer": null,
"indexAnalyzer": "prefixCmAnalyzer",
"searchAnalyzer": "standardCmAnalyzer",
"synonymMaps": [],
"fields": []
},
{
"name": "partialName",
"type": "Edm.String",
"facetable": false,
"filterable": false,
"key": false,
"retrievable": false,
"searchable": true,
"sortable": false,
"analyzer": null,
"indexAnalyzer": "prefixCmAnalyzer",
"searchAnalyzer": "standardCmAnalyzer",
"synonymMaps": [],
"fields": []
}
],
"suggesters": [
{
"name": "conformity-certificates-index_suggester",
"searchMode": "analyzingInfixMatching",
"sourceFields": [
"name"
]
}
],
"scoringProfiles": [
{
"name": "exactFirst",
"functions": [],
"functionAggregation": null,
"text": {
"weights": {
"partnumbers": 4,
"partialPartnumbers": 3,
"name": 2,
"partialName": 1
}
}
}
],
"defaultScoringProfile": "exactFirst",
"corsOptions": null,
"analyzers": [
{
"@odata.type": "#Microsoft.Azure.Search.CustomAnalyzer",
"name": "standardCmAnalyzer",
"tokenizer": "standard_v2",
"tokenFilters": [
"lowercase",
"asciifolding"
],
"charFilters": []
},
{
"@odata.type": "#Microsoft.Azure.Search.CustomAnalyzer",
"name": "prefixCmAnalyzer",
"tokenizer": "standard_v2",
"tokenFilters": [
"lowercase",
"asciifolding",
"edgeNGramCmTokenFilter"
],
"charFilters": []
}
],
"charFilters": [],
"tokenFilters": [
{
"@odata.type": "#Microsoft.Azure.Search.EdgeNGramTokenFilterV2",
"name": "edgeNGramCmTokenFilter",
"minGram": 2,
"maxGram": 20,
"side": "front"
}
],
"tokenizers": [],
"@odata.etag": "\"0x8D818EC80CXXXX\""
}

关于azure - 如何使用 Azure 认知搜索搜索字符串的一部分,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/62569938/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com