gpt4 book ai didi

c# - Lucene 字段未包含在其他工作搜索中

转载 作者:太空宇宙 更新时间:2023-11-03 15:21:12 25 4
gpt4 key购买 nike

在具有多个字段的 C# bool 查询 Lucene 搜索中,其中三个字段(Sku、VariantSkus 和 Mpc)未包含在搜索中,而其他字段工作正常。

使用 Luke,我可以看到值存储在索引中。在 Luke 中搜索时,我使用搜索器中包含的查询得到了正确的结果(取自 Visual Studio 中的调试器)。例子:使用以下查询:(在 Visual Studio 中调试时直接从查询值中获取)

(+Mpc:B118^5) (+Sku:B118^5) (+Brand:B118) (+VariantSkus:B118^4) (+DisplayName:B118^3) (+DisplayName:B118*) (+DisplayName:B118~0.5) (+MisspelledNames:B118) (+Description:B118^0.4)

运行代码时不起作用(搜索器上的 totalHits 为 0),但给出了将 Mpc 与 Luke 中的正确产品相匹配的预期结果。

老实说,我很困惑为什么相同的查询在 C# 代码中不起作用。如有任何帮助或建议,我们将不胜感激。

索引的创建:

        public static String CreateLuceneIndex(string basePath, HttpContext context)
{
var stopwatch = new Stopwatch();

/* get the absolute path to the directory where the indexes will be created (and if it doesn't exist, create it) */
var dirPath = context.Server.MapPath(basePath);
if (!Directory.Exists(dirPath)) Directory.CreateDirectory(dirPath);
var di = new DirectoryInfo(dirPath);
var directory = FSDirectory.Open(di);

stopwatch.Start();

/* Select the standard Lucene analyser */
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
var count = 0;
var catalog = ProductCatalog.All().First();

/* Open the index writer using the selected analyser */
using (var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
using(var mediaRepository = new ProductMediaRepository())
{
var urlService = ObjectFactory.Instance.Resolve<IUrlService>();

// Get all the visible products from uCommerce we wish to index
foreach (var product in Product.Find(p => p.DisplayOnSite && p.ParentProduct == null))
{
var url = urlService.GetUrl(catalog, product);

var doc = new Document();
doc.Add(new Field("id", product.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES));
doc.Add(new Field("Url", url ?? String.Empty, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES));
doc.Add(new Field("Src", ImageService.GetProductMainImage(mediaRepository, product).Src ?? String.Empty
, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES));
doc.Add(new Field("Sku", product.Sku ?? String.Empty, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));

var varianSkus = String.Join(" ", product.Variants.Select(variant => variant.VariantSku));
doc.Add(new Field("VariantSkus", varianSkus, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));

doc.Add(new Field("DisplayName", product.DisplayName() ?? product.Name ?? String.Empty, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
var brands = String.Join(" ", product.Variants.Select(variant => variant.GetPropertyValue<String>("Brand")).Where(w => !String.IsNullOrWhiteSpace(w)));
doc.Add(new Field("Brand", brands ?? String.Empty, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));

doc.Add(new Field("MisspelledNames", product.GetPropertyValue<String>("MisspelledNames") ?? String.Empty,
Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));

doc.Add(new Field("Description", product.ShortDescription()?.StripHtml() ?? String.Empty, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));

doc.Add(new Field("Mpc", product.GetPropertyValue<String>("MPC") ?? String.Empty, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.YES));

writer.AddDocument(doc);
count++;
}

writer.Optimize();
writer.Close();
}

stopwatch.Stop();
return $"Indexed {count} products in {stopwatch.Elapsed}.\n\n";

搜索:

       public static ListItemsDtoModel ProductSearch(String searchTerm, String indexDirPath, Int32 maxResults = Int32.MaxValue)
{
searchTerm = searchTerm.Trim().ToLowerInvariant();
var searchWords = ParseSearchWords(searchTerm);

indexDirPath = HttpContext.Current.Server.MapPath(indexDirPath);
var di = new DirectoryInfo(indexDirPath);

using (var directory = FSDirectory.Open(di))
using (var searcher = new IndexSearcher(IndexReader.Open(directory, true)))
{
var query = new BooleanQuery();

query.Add(new BooleanClause(AddTermClauseGroup("Mpc", searchWords, 5), BooleanClause.Occur.SHOULD));
query.Add(new BooleanClause(AddTermClauseGroup("Sku", searchWords, 5), BooleanClause.Occur.SHOULD));
query.Add(new BooleanClause(AddTermClauseGroup("Brand", searchWords), BooleanClause.Occur.SHOULD));
query.Add(new BooleanClause(AddTermClauseGroup("VariantSkus", searchWords, 4), BooleanClause.Occur.SHOULD));
query.Add(new BooleanClause(AddTermClauseGroup("DisplayName", searchWords, 3), BooleanClause.Occur.SHOULD));
query.Add(new BooleanClause(AddWildcardClauseGroup("DisplayName", searchWords), BooleanClause.Occur.SHOULD));
query.Add(new BooleanClause(AddFuzzyTermClauseGroup("DisplayName", searchWords), BooleanClause.Occur.SHOULD));
query.Add(new BooleanClause(AddTermClauseGroup("MisspelledNames", searchWords), BooleanClause.Occur.SHOULD));
query.Add(new BooleanClause(AddTermClauseGroup("Description", searchWords, 0.4f), BooleanClause.Occur.SHOULD));

var searchResults = searcher.Search(query, maxResults);

return AsListItemsDtoModel(searchResults.ScoreDocs.Select(sd =>
{
var document = searcher.Doc(sd.doc);
return new ImageLinkDtoModel
{
Url = document.Get("Url"),
Text = document.Get("DisplayName"),
Alt = document.Get("DisplayName"),
Src = document.Get("Src"),
};
}).ToList());
}
}
private static String[] ParseSearchWords(string searchTerm)
{
return searchTerm.Split(' ', '-')
.Where(w => !String.IsNullOrWhiteSpace(w))
.Select(QueryParser.Escape)
.ToArray();
}


private static BooleanQuery AddTermClauseGroup(String field, IEnumerable<String> searchTerms, float boost = 1f)
{
var boostStr = Math.Abs(boost-1f) > 0.001 ? "^" + boost.ToString(CultureInfo.InvariantCulture) : String.Empty;

return AddClauseGroup(searchTerms, word => new TermQuery(new Term(field, word + boostStr)));
}

private static BooleanQuery AddFuzzyTermClauseGroup(String field, IEnumerable<String> searchTerms)
{
return AddClauseGroup(searchTerms, word => new FuzzyQuery(new Term(field, word), 0.5f));
}

private static BooleanQuery AddWildcardClauseGroup(String field, IEnumerable<String> searchTerms)
{
return AddClauseGroup(searchTerms, word => new WildcardQuery(new Term(field, word + "*")));
}

private static BooleanQuery AddClauseGroup(IEnumerable<String> searchTerms, Func<String, Query> createSubClause)
{
var query = new BooleanQuery();

foreach (var searchTerm in searchTerms)
{
query.Add(new BooleanClause(createSubClause(searchTerm), BooleanClause.Occur.MUST));
}
return query;
}

最佳答案

问题在于您应用提升的方式:

return AddClauseGroup(searchTerms, word => new TermQuery(new Term(field, word  + boostStr)));

您不能以这种方式将提升纳入术语本身。这里没有 QueryParser,所以像“term^4”这样的 QueryParser 语法是行不通的。它只会搜索字符串“term^4”,默认提升为 1.0。带有提升的 TermQuery 看起来像:

Query query = new TermQuery(new Term(field, word));
query.Boost = boost;

关于c# - Lucene 字段未包含在其他工作搜索中,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37325143/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com