- android - 多次调用 OnPrimaryClipChangedListener
- android - 无法更新 RecyclerView 中的 TextView 字段
- android.database.CursorIndexOutOfBoundsException : Index 0 requested, 光标大小为 0
- android - 使用 AppCompat 时,我们是否需要明确指定其 UI 组件(Spinner、EditText)颜色
我正在尝试使用 Lucene.Net 搜索相当复杂的查询
"inject* needle*" OR "point* thingy"~2
所以基本上我需要常规和邻近短语中的通配符。然而,基本的 Lucene.Net QueryParser 摆脱了这些通配符。
我知道 ComplexPhraseQueryParser 会为此工作,不幸的是,这不包含在 Lucene.Net 中。
有没有什么方法可以在 Lucene.Net 中构建这样的查询?
最佳答案
我最终将 ComplexPhraseQueryParser 从 Java 移植到 C#。这比预期的要容易得多,并且是一个很好的练习,有助于更好地学习 C#。
我已经提供了下面的代码,以防它对其他人有帮助。请注意,它仍然是非常像 Java 的代码,因为我对 Java 比对 C# 更熟悉 ;-)
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Ported to C# from Java source at http://grepcode.com/file/repo1.maven.org/maven2/org.apache.lucene/lucene-misc/3.0.3/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Search.Spans;
using System;
using System.Collections.Generic;
using Version = Lucene.Net.Util.Version;
public class ComplexPhraseQueryParser : QueryParser
{
private List<ComplexPhraseQuery> complexPhrases = null;
private Boolean isPass2ResolvingPhrases;
private ComplexPhraseQuery currentPhraseQuery = null;
public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) : base(matchVersion, f, a) { }
protected override Query GetFieldQuery(String field, String queryText, int slop)
{
ComplexPhraseQuery cpq = new ComplexPhraseQuery(field, queryText, slop);
complexPhrases.Add(cpq); // add to list of phrases to be parsed once
// we
// are through with this pass
return cpq;
}
public override Query Parse(String query)
{
if (isPass2ResolvingPhrases)
{
RewriteMethod oldMethod = MultiTermRewriteMethod;
try
{
// Temporarily force BooleanQuery rewrite so that Parser will
// generate visible
// collection of terms which we can convert into SpanQueries.
// ConstantScoreRewrite mode produces an
// opaque ConstantScoreQuery object which cannot be interrogated for
// terms in the same way a BooleanQuery can.
// QueryParser is not guaranteed threadsafe anyway so this temporary
// state change should not
// present an issue
MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
return base.Parse(query);
}
finally
{
MultiTermRewriteMethod = oldMethod;
}
}
// First pass - parse the top-level query recording any PhraseQuerys
// which will need to be resolved
complexPhrases = new List<ComplexPhraseQuery>();
Query q = base.Parse(query);
// Perform second pass, using this QueryParser to parse any nested
// PhraseQueries with different
// set of syntax restrictions (i.e. all fields must be same)
isPass2ResolvingPhrases = true;
try
{
using (IEnumerator<ComplexPhraseQuery> enumerator = complexPhrases.GetEnumerator())
{
while (enumerator.MoveNext())
{
currentPhraseQuery = enumerator.Current;
currentPhraseQuery.ParsePhraseElements(this);
}
}
}
finally
{
isPass2ResolvingPhrases = false;
}
return q;
}
// There is No "getTermQuery throws ParseException" method to override so
// unfortunately need
// to throw a runtime exception here if a term for another field is embedded
// in phrase query
protected override Query NewTermQuery(Term term)
{
if (isPass2ResolvingPhrases)
{
try
{
CheckPhraseClauseIsForSameField(term.Field);
}
catch (ParseException pe)
{
throw new SystemException("Error parsing complex phrase", pe);
}
}
return base.NewTermQuery(term);
}
// Helper method used to report on any clauses that appear in query syntax
private void CheckPhraseClauseIsForSameField(String field)
{
if (!field.Equals(currentPhraseQuery.Field))
{
throw new ParseException("Cannot have clause for field \"" + field
+ "\" nested in phrase " + " for field \"" + currentPhraseQuery.Field
+ "\"");
}
}
protected override Query GetWildcardQuery(String field, String termStr)
{
if (isPass2ResolvingPhrases)
{
CheckPhraseClauseIsForSameField(field);
}
return base.GetWildcardQuery(field, termStr);
}
protected override Query GetRangeQuery(String field, String part1, String part2, Boolean inclusive)
{
if (isPass2ResolvingPhrases)
{
CheckPhraseClauseIsForSameField(field);
}
return base.GetRangeQuery(field, part1, part2, inclusive);
}
protected override Query NewRangeQuery(String field, String part1, String part2,
Boolean inclusive)
{
if (isPass2ResolvingPhrases)
{
// Must use old-style RangeQuery in order to produce a BooleanQuery
// that can be turned into SpanOr clause
TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive, RangeCollator);
rangeQuery.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
return rangeQuery;
}
return base.NewRangeQuery(field, part1, part2, inclusive);
}
protected Query GetFuzzyQuery(String field, String termStr, float minSimilarity)
{
if (isPass2ResolvingPhrases)
{
CheckPhraseClauseIsForSameField(field);
}
return base.GetFuzzyQuery(field, termStr, minSimilarity);
}
/*
* Used to handle the query content in between quotes and produced Span-based
* interpretations of the clauses.
*/
class ComplexPhraseQuery : Query
{
public string Field { get; set; }
public string PhrasedQueryStringContents { get; set; }
public int SlopFactor { get; set; }
private Query Contents;
public ComplexPhraseQuery(string Field, string PhrasedQueryStringContents, int SlopFactor)
: base()
{
this.Field = Field;
this.PhrasedQueryStringContents = PhrasedQueryStringContents;
this.SlopFactor = SlopFactor;
}
// Called by ComplexPhraseQueryParser for each phrase after the main
// parse
// thread is through
public void ParsePhraseElements(QueryParser qp)
{
// TODO ensure that field-sensitivity is preserved ie the query
// string below is parsed as
// field+":("+phrasedQueryStringContents+")"
// but this will need code in rewrite to unwrap the first layer of
// boolean query
Contents = qp.Parse(PhrasedQueryStringContents);
}
public override Query Rewrite(IndexReader reader)
{
// ArrayList spanClauses = new ArrayList();
if (Contents is TermQuery)
{
return Contents;
}
// Build a sequence of Span clauses arranged in a SpanNear - child
// clauses can be complex
// Booleans e.g. nots and ors etc
int numNegatives = 0;
if (!(Contents is BooleanQuery))
{
throw new ArgumentException("Unknown query type \""
+ Contents.GetType()
+ "\" found in phrase query string \"" + PhrasedQueryStringContents
+ "\"");
}
BooleanQuery bq = (BooleanQuery)Contents;
BooleanClause[] bclauses = bq.GetClauses();
SpanQuery[] allSpanClauses = new SpanQuery[bclauses.Length];
// For all clauses e.g. one* two~
for (int i = 0; i < bclauses.Length; i++)
{
// HashSet bclauseterms=new HashSet();
Query qc = bclauses[i].Query;
// Rewrite this clause e.g one* becomes (one OR onerous)
qc = qc.Rewrite(reader);
if (bclauses[i].Occur.Equals(Occur.MUST_NOT))
{
numNegatives++;
}
if (qc is BooleanQuery)
{
List<SpanQuery> sc = new List<SpanQuery>();
AddComplexPhraseClause(sc, (BooleanQuery)qc);
if (sc.Count > 0)
{
allSpanClauses[i] = sc[0];
}
else
{
// Insert fake term e.g. phrase query was for "Fred Smithe*" and
// there were no "Smithe*" terms - need to
// prevent match on just "Fred".
allSpanClauses[i] = new SpanTermQuery(new Term(Field,
"Dummy clause because no terms found - must match nothing"));
}
}
else
{
if (qc is TermQuery)
{
TermQuery tq = (TermQuery)qc;
allSpanClauses[i] = new SpanTermQuery(tq.Term);
}
else
{
throw new ArgumentException("Unknown query type \""
+ qc.GetType()
+ "\" found in phrase query string \""
+ PhrasedQueryStringContents + "\"");
}
}
}
if (numNegatives == 0)
{
// The simple case - no negative elements in phrase
return new SpanNearQuery(allSpanClauses, SlopFactor, true);
}
// Complex case - we have mixed positives and negatives in the
// sequence.
// Need to return a SpanNotQuery
List<SpanQuery> positiveClauses = new List<SpanQuery>();
for (int j = 0; j < allSpanClauses.Length; j++)
{
if (!bclauses[j].Occur.Equals(Occur.MUST_NOT))
{
positiveClauses.Add(allSpanClauses[j]);
}
}
//SpanQuery[] includeClauses = positiveClauses.ToArray(new SpanQuery[positiveClauses.Count]);
SpanQuery[] includeClauses = positiveClauses.ToArray();
SpanQuery include = null;
if (includeClauses.Length == 1)
{
include = includeClauses[0]; // only one positive clause
}
else
{
// need to increase slop factor based on gaps introduced by
// negatives
include = new SpanNearQuery(includeClauses, SlopFactor + numNegatives,
true);
}
// Use sequence of positive and negative values as the exclude.
SpanNearQuery exclude = new SpanNearQuery(allSpanClauses, SlopFactor,
true);
SpanNotQuery snot = new SpanNotQuery(include, exclude);
return snot;
}
private void AddComplexPhraseClause(List<SpanQuery> spanClauses, BooleanQuery qc)
{
List<SpanQuery> ors = new List<SpanQuery>();
List<SpanQuery> nots = new List<SpanQuery>();
BooleanClause[] bclauses = qc.GetClauses();
// For all clauses e.g. one* two~
for (int i = 0; i < bclauses.Length; i++)
{
Query childQuery = bclauses[i].Query;
// select the list to which we will add these options
List<SpanQuery> chosenList = ors;
if (bclauses[i].Occur == Occur.MUST_NOT)
{
chosenList = nots;
}
if (childQuery is TermQuery)
{
TermQuery tq = (TermQuery)childQuery;
SpanTermQuery stq = new SpanTermQuery(tq.Term);
stq.Boost = tq.Boost;
chosenList.Add(stq);
}
else if (childQuery is BooleanQuery)
{
BooleanQuery cbq = (BooleanQuery)childQuery;
AddComplexPhraseClause(chosenList, cbq);
}
else
{
// TODO alternatively could call extract terms here?
throw new ArgumentException("Unknown query type:"
+ childQuery.GetType());
}
}
if (ors.Count == 0)
{
return;
}
SpanOrQuery soq = new SpanOrQuery(ors.ToArray());
if (nots.Count == 0)
{
spanClauses.Add(soq);
}
else
{
SpanOrQuery snqs = new SpanOrQuery(nots.ToArray());
SpanNotQuery snq = new SpanNotQuery(soq, snqs);
spanClauses.Add(snq);
}
}
public override String ToString(String field)
{
return "\"" + PhrasedQueryStringContents + "\"";
}
public override int GetHashCode()
{
const int prime = 31;
int result = 1;
result = prime * result + ((Field == null) ? 0 : Field.GetHashCode());
result = prime
* result
+ ((PhrasedQueryStringContents == null) ? 0
: PhrasedQueryStringContents.GetHashCode());
result = prime * result + SlopFactor;
return result;
}
public override Boolean Equals(Object obj)
{
if (this == obj)
return true;
if (obj == null)
return false;
if (GetType() != obj.GetType())
return false;
ComplexPhraseQuery other = (ComplexPhraseQuery)obj;
if (Field == null)
{
if (other.Field != null)
return false;
}
else if (!Field.Equals(other.Field))
return false;
if (PhrasedQueryStringContents == null)
{
if (other.PhrasedQueryStringContents != null)
return false;
}
else if (!PhrasedQueryStringContents
.Equals(other.PhrasedQueryStringContents))
return false;
if (SlopFactor != other.SlopFactor)
return false;
return true;
}
}
}
关于c# - Lucene.NET 中的复杂短语和/或 ComplexPhraseQueryParser,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/28094280/
创建使用.NET框架的asp.net页面时,访问该页面的客户端是否需要在其计算机上安装.NET框架? IE。用户访问www.fakesite.com/default.aspx,如果他们没有安装框架,他
我阅读了很多不同的博客和 StackOverflow 问题,试图找到我的问题的答案,但最后我找不到任何东西,所以我想自己问这个问题。 我正在构建一个应用程序,其中有一个长时间运行的工作线程,它执行一些
已锁定。这个问题及其答案是locked因为这个问题是题外话,但却具有历史意义。目前不接受新的答案或互动。 我一直想知道为什么微软为这样一个伟大的平台选择了一个如此奇怪的、对搜索引擎不友好的名称。他们就
.Net Framework .Net .NET Standard的区别 1、.NET Framework 在未来.NET Framework或许成为过去时,目前还是有很多地方在使用的。这一套
如果有选择的话,您会走哪条路? ASP.NET Webforms + ASP.NET AJAX 或 ASP.NET MVC + JavaScript Framework of your Choice
我有一个 Web 服务,它通过专用连接通过 https 使用第三方 Web 服务,我应用了 ServicePointManager.ServerCertificateValidationCallbac
为什么我应该选择ASP.NET Web Application (.NET Framework)而不是ASP.NET Core Web Application (.NET Framework)? 我在
我在网络上没有找到任何关于包含 .NET Standard、.NET Core 和 .NET Framework 项目的 .NET 解决方案的公认命名约定。 就我而言,我们在 .NET 框架项目中有以
.NET Compact 是 .NET 的完美子集吗? 假设我考虑了屏幕大小和其他限制并避免了 .NET Compact 不支持的类和方法,或者 .NET Compact 是一个不同且不兼容的 GUI
我已经阅读了所有我能找到的关于 connectionManagement 中的 maxconnection 设置的文章:即 http://support.microsoft.com/kb/821268
我现在正在使用asp.net mvc,想知道使用内置的Json或 Json.Net哪个是更好的选择,但我不确定一个人是否比另一个人有优势。 另外,如果我确实选择沿用Json.Net的路线,那么我应该选
在 Visual Studio 中,您至少可以创建三种不同类型的类库: 类库(.NET Framework) 类库(.NET 标准) 类库(.NET Core) 虽然第一个是我们多年来一直使用的,但我
.NET 和 ASP.NET 之间有什么区别?它们有什么关系? 最佳答案 ASP.Net 基于 .Net 框架构建,提供有关 Web 开发的附加功能。 你可以去看看wikipedia article
在安装更高版本(3.0)之前,我需要安装.net框架1.1和2.0吗?或者单独安装 3.0 框架就足够了,并为在早期框架版本上编写的软件提供支持?谢谢 ,丽然 最佳答案 不,您不必安装以前的框架。 我
我正在开发一个项目,人们可以“更新”类别,例如更改类别的名称。我收到以下消息 This is called after clicking update 按钮 with the SQL statemen
.NET 类 System.Net.CookieContainer 线程安全吗? --更新:交 key 答复-- 是否有任何方法可以确保异步请求期间修改的变量(即 HttpWebRequest.Coo
我正在使用 JScript.NET 在我编写的 C# WinForms 应用程序中编写脚本。它工作得很好,但我只是尝试在脚本中放置一些异常处理,但我无法弄清楚如何判断我的 C# 代码抛出了哪种类型的异
我需要你的帮助, 比如我有一个小数类型的变量,我想这样取整。 例如 3.0 = 3 3.1 = 4 3.2 = 4 3.3 = 4 3.4 = 4 3.5 = 4 3.6 = 4 3.7 = 4 3.
我使用过这样的代码:http://msdn.microsoft.com/en-us/library/dw70f090.aspx在 ASP.NET 中工作之前访问数据库(2-3 年前)。我没有意识到我正
自 ConfigurationManager .NET Standard 中不存在,检索正在执行的程序集的应用程序设置的最佳方法是什么,无论是 web.config或 appSettings.{env
我是一名优秀的程序员,十分优秀!