gpt4 book ai didi

c# - lucene 2.9.2.2 一个很奇怪的问题,不能搜索关键字 "a",其他的可以

转载 作者:太空宇宙 更新时间:2023-11-03 11:31:57 24 4
gpt4 key购买 nike

添加索引代码:

public class IndexManage
{
public static void AddIndex(List<QuestionItem> itemList)
{
Analyzer analyzer =new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
Lucene.Net.Store.FSDirectory fs = Lucene.Net.Store.FSDirectory.Open(new DirectoryInfo("IndexDirectory"));
IndexWriter writer =new IndexWriter(fs, analyzer,true,IndexWriter.MaxFieldLength.UNLIMITED);
foreach (var item in itemList)
{
AddDocument(writer, item);
}
writer.Commit();
writer.Optimize();
writer.Close();
}

private static void AddDocument(IndexWriter writer, QuestionItem item)
{
Document document =new Document();
document.Add(new Field("qid", item.QID.ToString(), Field.Store.YES, Field.Index.ANALYZED));
document.Add(new Field("title", item.Title, Field.Store.YES,Field.Index.ANALYZED));
document.Add(new Field("content", item.Content, Field.Store.YES, Field.Index.ANALYZED));
document.Add(new Field("supply", item.Supply, Field.Store.YES, Field.Index.ANALYZED));
writer.AddDocument(document);
}
}

搜索代码:

public class SearchManage
{
public static List<QuestionItem> Search(string keyword)
{
Analyzer analyzer =new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
Lucene.Net.Store.FSDirectory fs = Lucene.Net.Store.FSDirectory.Open(new DirectoryInfo("IndexDirectory"));
IndexSearcher searcher =new IndexSearcher(fs,true);
MultiFieldQueryParser parser =new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29,new string[] { "title", "content","supply" }, analyzer);
parser.SetDefaultOperator(QueryParser.Operator.OR);
Query query = parser.Parse(keyword);

var hits = searcher.Search(query, 2500);
List<QuestionItem> itemList =new List<QuestionItem>();
for (int i =0; i < hits.scoreDocs.Length; i++)
{
var doc =searcher.Doc ( hits.scoreDocs[i].doc);
itemList.Add(new QuestionItem() {
QID=Int32.Parse(doc.Get("qid")),
Title=doc.Get("title"),
Content=doc.Get("content"),
Supply=doc.Get("supply")
});
}
searcher.Close();
return itemList;
}
}

QuestionItem 模型是:

public class QuestionItem
{
public int QID { get;set; }
public string Title{get;set;}
public string Content { get; set; }
public string Supply { get; set; }
}

测试代码为:

public static void Show()
{
AddIndex();
List<QuestionItem> itemList = SearchManage.Search("a");
Console.WriteLine("search result:");
foreach (var item in itemList)
{
Console.WriteLine(item.QID +""+ item.Title +""+ item.Content +""+ item.Supply);
}
}

private static void AddIndex()
{
List<QuestionItem> itemList =new List<QuestionItem>() {
new QuestionItem(){QID=1,Title="a",Content="ab",Supply="abc"},
new QuestionItem(){QID=2,Title="b",Content="a",Supply="fds a"},
new QuestionItem(){QID=3,Title="c",Content="c defg",Supply="as dfg hjk"},
new QuestionItem(){QID=4,Title="d",Content="def a b",Supply="kjhgf ds a"},
new QuestionItem(){QID=5,Title="e",Content="ef ab c",Supply="a sdf g hjkl"}
};
IndexManage.AddIndex(itemList);
}

现在的问题是:搜索“a”,无结果,但“ab”,“b”,“c”有结果,很奇怪的问题,谁能帮帮我?

最佳答案

StandardAnalyzer 使用默认停用词列表,其中一个是“a”。如果您不需要停用词,您可以使用带有空集作为第二个参数的构造函数:

Analyzer ana = new StandardAnalyzer(LUCENE_30, Collections.emptySet());

或者像这样在 .net 中:

Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29, new Hashtable());

关于c# - lucene 2.9.2.2 一个很奇怪的问题,不能搜索关键字 "a",其他的可以,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/7479542/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com