gpt4 book ai didi

c# - Lucene 返回非正分数的文件

转载 作者:可可西里 更新时间:2023-11-01 03:02:06 26 4
gpt4 key购买 nike

我们最近升级了我们使用的 CMS,不得不从Lucene.net V2.3.1.301 到 V2.9.4.1

我们在最初的解决方案中使用了 CustomScoreQuery,它进行了内置查询无法实现的各种过滤。 (地理、多日期范围等)

自从从旧版本迁移到新版本的 Lucene 后,它开始返回文档,即使我们检查结果时它们的分数为 0 甚至负数

enter image description here下面是用于演示问题的重构代码示例

    public LuceneTest()
{
Lucene.Net.Store.Directory luceneIndexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(@"C:\inetpub\wwwroot\Project\build\Data\indexes\all_site_search_en"));
Analyzer analyzer = new WhitespaceAnalyzer();
IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory, true);
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_23, "", analyzer);
parser.SetAllowLeadingWildcard(true);
Query dateQuery = ComposeEventDateQuery(new DateTime(2015, 11, 23), new DateTime(2015,11,25), searcher);
BooleanQuery combinedQuery = new BooleanQuery();
BooleanQuery.SetMaxClauseCount(10000);
combinedQuery.Add(dateQuery, BooleanClause.Occur.MUST);

TopDocs hitsFound = searcher.Search(dateQuery, 1000);
System.Console.WriteLine(String.Format("Found {0} matches with the date filters", hitsFound.TotalHits));
System.Console.ReadKey();
}



public static Query ComposeEventDateQuery(DateTime fromDate, DateTime ToDate, IndexSearcher MySearcher)
{
BooleanQuery query = new BooleanQuery();
Query boolQuery3A = new TermQuery(new Lucene.Net.Index.Term("_language", "en"));
Query eventDateQuery = new EventDateQuery1(boolQuery3A, MySearcher, fromDate, ToDate, false);
query.Add(eventDateQuery, BooleanClause.Occur.MUST);
return query;
}


public class EventDateQuery1 : CustomScoreQuery
{
private Searcher _searcher;
private DateTime _fromDT;
private DateTime _toDT;
private readonly string _dateFormat = "yyyyMMdd";

private bool _shouldMatchNonEvents = true;

public EventDateQuery1(Query subQuery, Searcher searcher, DateTime fromDT, bool shouldMatchNonEvents, int dateRange = 14)
: base(subQuery)
{
_searcher = searcher;
_fromDT = fromDT.Date;
_toDT = fromDT.AddDays(dateRange).Date;
_shouldMatchNonEvents = shouldMatchNonEvents;
}

public EventDateQuery1(Query subQuery, Searcher searcher, DateTime fromDT, DateTime toDT, bool shouldMatchNonEvents)
: base(subQuery)
{
_searcher = searcher;
_fromDT = fromDT.Date;
_toDT = toDT.Date;
_shouldMatchNonEvents = shouldMatchNonEvents;
}


public override string ToString()
{
return GenerateUniqueKey();
}

public override string ToString(string field)
{
return GenerateUniqueKey();
}

public override string Name()
{
return GenerateUniqueKey();
}

public string GenerateUniqueKey()
{
return String.Format("EventDateQuery_{0}_{1}_{2}", _fromDT.ToString(_dateFormat), _toDT.ToString(_dateFormat), _shouldMatchNonEvents.ToString());
}

protected override CustomScoreProvider GetCustomScoreProvider(IndexReader reader)
{
return new EventDateQueryCustomScoreProvider(reader, _fromDT, _toDT, _shouldMatchNonEvents);
}



}

public class EventDateQueryCustomScoreProvider : CustomScoreProvider
{
private DateTime _fromDT;
private DateTime _toDT;
private readonly string _dateFormat = "yyyyMMdd";
private bool _shouldMatchNonEvents = true;
private float NoMatchFloat = 0f;
private float MatchFloat = 1f;

public EventDateQueryCustomScoreProvider(IndexReader reader, DateTime fromDT, DateTime toDT, bool shouldMatchNonEvents)
: base(reader)
{
_fromDT = fromDT.Date;
_toDT = toDT.Date;
_shouldMatchNonEvents = shouldMatchNonEvents;
}



public override float CustomScore(int doc, float subQueryScore, float valSrcScore)
{
return myScore(doc);
}

public override float CustomScore(int doc, float subQueryScore, float[] valSrcScores)
{
return myScore(doc);
}

public float myScore(int doc)
{
//Below is a fake implementation just to prove the run
if (doc < 10)
{
return 1F;
}
else
{
return 0F;
}
}



}

关于如何让 Lucene 不返回这些文档的任何建议都很棒。提前致谢。

最佳答案

您可以编写一个自定义收集器,它只收集得分为>0 的文档。然后将此收集器的实例传递给 Search() 方法。有这样一个 Collector 的实现 here .

然而,documentation如果您不需要所有结果,建议反对此解决方案。可能是这种情况,因为您只选择了前 1000 个文档。

关于c# - Lucene 返回非正分数的文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/33360962/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com