gpt4 book ai didi

java - 自定义标记器 solr 仅在第一次调用

转载 作者:搜寻专家 更新时间:2023-11-01 03:55:06 25 4
gpt4 key购买 nike

我创建了一个自定义分词器,通过检查 admin/analysis.jsp 和 system.out 日志,它似乎工作正常。但是,当我在使用此自定义分词器的字段中执行查询时,我看到仅针对第一个查询字符串调用了自定义分词器 solr(通过 system.out 日志检查)。你能帮我指出我错了吗?这些是我的代码:

package com.fosp.searchengine;
import java.io.Reader;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.solr.analysis.WhitespaceTokenizerFactory;

public class JvnTextProTokenizerFactory extends WhitespaceTokenizerFactory{
@Override
public WhitespaceTokenizer create(Reader input) {
System.out.println("WhitespaceTokenizer create(Reader input)");
Reader processedStringReader = new ProcessedStringReader(input);
return new WhitespaceTokenizer(processedStringReader);
}

}


package com.fosp.searchengine;
import java.io.IOException;
import java.io.Reader;

public class ProcessedStringReader extends java.io.Reader {

private static final int BUFFER_SIZE = 1024 * 8;
private static TextProcess m_textProcess = null;
private char[] m_inputData = null;
private int m_offset = 0;
private int m_length = 0;
public ProcessedStringReader(Reader input){
char[] arr = new char[BUFFER_SIZE];
StringBuffer buf = new StringBuffer();
int numChars;

try {
while ((numChars = input.read(arr, 0, arr.length)) > 0) {
buf.append(arr, 0, numChars);
}
} catch (IOException e) {
e.printStackTrace();
}
if(m_textProcess == null){
try {
m_textProcess = new TextProcess();
} catch (IOException e) {
e.printStackTrace();
}
}
m_inputData = m_textProcess.processText(buf.toString()).toCharArray();
m_offset = 0;
m_length = m_inputData.length;
}

@Override
public int read(char[] cbuf, int off, int len) throws IOException {
int charNumber = 0;
for(int i = m_offset + off;i<m_length && charNumber< len; i++){
cbuf[charNumber] = m_inputData[i];
m_offset ++;
charNumber++;
}
if(charNumber == 0){
return -1;
}
return charNumber;
}

@Override
public void close() throws IOException {
m_inputData = null;
m_offset = 0;
m_length = 0;
}

}

架构.xml

<fieldType name="text_jvnTextPro" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="com.fosp.searchengine.JvnTextProTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="com.fosp.searchengine.JvnTextProTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

最佳答案

这里没有错。工厂实例化的类被重新使用。这在分析/管理页面中是不同的。区别在于。

关于java - 自定义标记器 solr 仅在第一次调用,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/10185076/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com