gpt4 book ai didi

django - 尽管使用Ngram和Edgengram构建索引,但部分搜索在Elasticsearch + Haystack上不起作用

转载 作者:行者123 更新时间:2023-12-03 01:56:43 25 4
gpt4 key购买 nike

我正在建立类似的索引:

class BookIndex(indexes.SearchIndex,indexes.Indexable):

text= indexes.EdgeNgramField(document=True,use_template=True)
content_auto = indexes.EdgeNgramField(model_attr='title')
isbn_13 = indexes.CharField(model_attr='isbn_13')
validate = indexes.IntegerField(model_attr='validate')
price = indexes.IntegerField(model_attr='price')
authors = indexes.EdgeNgramField()
reviews = indexes.CharField()
publishers = indexes.EdgeNgramField()
institutes = indexes.EdgeNgramField()
sellers = indexes.CharField()
category = indexes.CharField()
sub_category = indexes.CharField()

我什至尝试使用Ngram,但部分搜索无法正常工作。

我像 SearchQuerySet().all().filter(content=query)一样查询它,即使它没有显示部分匹配的结果,我也尝试过 SearchQuerySet().filter(content__contains=query)

有人可以帮我吗?

最佳答案

Haystack对于ElasticSearch并不是很好,您不能使用适当的索引值,因此必须提供自定义ElasticSearchBackEnd才能启用它:

#in a search_backends.py file
from django.conf import settings
from haystack.backends.elasticsearch_backend import (
ElasticsearchSearchBackend,
ElasticsearchSearchEngine
)
from haystack.fields import EdgeNgramField as BaseEdgeNgramField, NgramField as BaseNgramField
from haystack.indexes import CharField

#just an example of which degree of configuration could be possible
CUSTOM_FIELD_TYPE = {
'completion': {
'type': 'completion',
'payloads': True,
'analyzer': 'suggest_analyzer',
'preserve_separators': True,
'preserve_position_increments': False
},
}

# Custom Backend
class CustomElasticBackend(ElasticsearchSearchBackend):

DEFAULT_ANALYZER = None

def __init__(self, connection_alias, **connection_options):
super(CustomElasticBackend, self).__init__(
connection_alias, **connection_options)
user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', None)
self.DEFAULT_ANALYZER = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', "snowball")
if user_settings:
setattr(self, 'DEFAULT_SETTINGS', user_settings)

def build_schema(self, fields):
content_field_name, mapping = super(CustomElasticBackend,
self).build_schema(fields)

for field_name, field_class in fields.items():
field_mapping = mapping[field_class.index_fieldname]

index_analyzer = getattr(field_class, 'index_analyzer', None)
search_analyzer = getattr(field_class, 'search_analyzer', None)
field_analyzer = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)

if field_mapping['type'] == 'string' and field_class.indexed:
field_mapping["term_vector"] = "with_positions_offsets"
if not hasattr(field_class, 'facet_for') and not field_class.field_type in('ngram', 'edge_ngram'):
field_mapping['analyzer'] = field_analyzer

if field_class.field_type in CUSTOM_FIELD_TYPE:
field_mapping = CUSTOM_FIELD_TYPE.get(field_class.field_type).copy()

if index_analyzer and search_analyzer:
field_mapping['index_analyzer'] = index_analyzer
field_mapping['search_analyzer'] = search_analyzer
if 'analyzer' in field_mapping:
del(field_mapping['analyzer'])

mapping.update({field_class.index_fieldname: field_mapping})
return (content_field_name, mapping)


class CustomElasticSearchEngine(ElasticsearchSearchEngine):
backend = CustomElasticBackend


# Custom fields, just use the ones you need or create yours
class CustomFieldMixin(object):

def __init__(self, **kwargs):
self.analyzer = kwargs.pop('analyzer', None)
self.index_analyzer = kwargs.pop('index_analyzer', None)
self.search_analyzer = kwargs.pop('search_analyzer', None)
super(CustomFieldMixin, self).__init__(**kwargs)

class CustomCharField(CustomFieldMixin, CharField):
pass


class CustomCompletionField(CustomFieldMixin, CharField):
field_type = 'completion'


class CustomEdgeNgramField(CustomFieldMixin, BaseEdgeNgramField):
pass


class CustomNgramField(CustomFieldMixin, BaseNgramField):
pass




#settings.py
ELASTICSEARCH_INDEX_SETTINGS = {
'settings': {
"analysis": {
"analyzer": {
"custom_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [ "lowercase", "asciifolding" ]
},
"str_index_analyzer" : {
"type": "custom",
"tokenizer" : "haystack_ngram_tokenizer",
"filter" : ["stopwords", "asciifolding", "lowercase", "snowball", "elision", "worddelimiter"]
},
"str_search_analyzer" : {
"type": "custom",
"tokenizer" : "standard",
"filter" : ["stopwords", "asciifolding", "lowercase", "snowball", "elision", "worddelimiter"]
},
"suggest_analyzer": {
"type":"custom",
"tokenizer":"standard",
"filter":[
"stopwords",
"standard",
"lowercase",
"asciifolding"
]
},
},
"tokenizer": {
"haystack_ngram_tokenizer": {
"type": "nGram",
"min_gram": 2,
"max_gram": 20,
},
},
"filter": {
"elision": {
"type": "elision",
"articles": ["l", "m", "t", "qu", "n", "s", "j", "d"]
},
"stopwords": {
"type": "stop",
"stopwords": ["_french_", "_english_"],
"ignore_case": True
},
"worddelimiter": {
"type": "word_delimiter"
}
}
}
}
}

#Haystack settings
HAYSTACK_CONNECTIONS = {
'default': {
...
'ENGINE': 'path.to.search_backends.CustomElasticSearchEngine',
...
},
}

关于django - 尽管使用Ngram和Edgengram构建索引,但部分搜索在Elasticsearch + Haystack上不起作用,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/35982784/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com