gpt4 book ai didi

java - 如何使用 Lucene、Highlighter 和 StandardAnalyzer 通过 Hibernate 搜索引擎获取文本片段

转载 作者:太空宇宙 更新时间:2023-11-04 14:40:39 26 4
gpt4 key购买 nike

我正在阅读有关使用 Lucene 的 Hibernate 搜索引擎的信息,并且我能够在通过 mysql 数据库保存文章的 Article 类上按如下方式进行设置。我的目标是向用户显示他们在浏览器上搜索的文章标题和文本片段。这是我到目前为止所实现的:

@Entity
@Indexed
@Analyzer(impl = StandardAnalyzer.class)
@Table(name = "ARTICLE", catalog = "kefet3")
public class Article implements java.io.Serializable {


private static final long serialVersionUID = 1L;
// Fields

private Integer id;
private Articlelanguage articlelanguage;
private Users users;
private Articlecategory articlecategory;
@Analyzer(impl = StandardAnalyzer.class)
@Field(index = Index.YES, analyze = Analyze.YES, store = Store.NO)
private String artTitle;
@Field(index = Index.YES, analyze = Analyze.YES, store = Store.NO)
private String artContent;

对于上面的代码,有 get 和 set 以及数据库的所有注释。

下面是搜索的方法。

   @Override
@SuppressWarnings("unchecked")
public List<Article> search(String word) {

analyzer = new StandardAnalyzer(Version.LUCENE_36);
FullTextSession fullTextSession = Search.getFullTextSession(getCurrentSession());


// get a query builder
QueryBuilder queryBuilder = fullTextSession.getSearchFactory()
.buildQueryBuilder().forEntity(Article.class).get();

// build the query
org.apache.lucene.search.Query query = queryBuilder.keyword().
onFields("artTitle","artContent")
.matching(word).createQuery();


FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(query, Article.class);


// wrap Lucene query in a javax.persistence.Query
// org.hibernate.Query fullTextQuery = fullTextSession.createFullTextQuery(query, Article.class);

List<Article> searchResultList = fullTextQuery.list();

for (int i=0; i<searchResultList.size(); i++){
System.out.println("searchResultList###################"+searchResultList.get(i).getArtTitle());
}




Highlighter highlighter = new Highlighter( new QueryScorer( query ) );

highlighter.setTextFragmenter( new SimpleFragmenter( 20 ) );

int maxNumFragmentsRequired = 3;

for(Article art: searchResultList){
String artContent = art.getArtContent();
String artTitle = art.getArtTitle();

TokenStream tokenStream1 =
analyzer.tokenStream( "artContent", new StringReader( artContent ) );
TokenStream tokenStream2 =
analyzer.tokenStream( "artTitle", new StringReader( artTitle ) );


String result=null;
String resul2=null;
try {
result = highlighter.getBestFragments( tokenStream1, artContent, maxNumFragmentsRequired, " ..." );

resul2 = highlighter.getBestFragments( tokenStream2, artTitle, maxNumFragmentsRequired, " ..." );

} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("((((((((((((((((((((IOException))))))))))))))))))))"+e);
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
System.out.println("((((((((((((((((((((InvalidTokenOffsetsException))))))))))))))))))))"+e);
e.printStackTrace();
}

System.out.println( result );

System.out.println( resul2 );
}

// fullTextSession.close();

return searchResultList;
}

我得到的结果是:

org.springframework.web.util.NestedServletException: Request processing failed; nested exception is java.lang.IllegalStateException: No match found
org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:973)
org.springframework.web.servlet.FrameworkServlet.doGet(FrameworkServlet.java:852)
javax.servlet.http.HttpServlet.service(HttpServlet.java:620)
org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:837)
javax.servlet.http.HttpServlet.service(HttpServlet.java:727)
org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
com.github.dandelion.datatables.core.web.filter.DatatablesFilter.doFilter(DatatablesFilter.java:73)

root cause

java.lang.IllegalStateException: No match found
java.util.regex.Matcher.group(Matcher.java:485)
java.util.regex.Matcher.group(Matcher.java:445)
com.kefet.dao.impl.SearchDAOImpl.search(SearchDAOImpl.java:125)
com.kefet.service.impl.SearchServiceImpl.search(SearchServiceImpl.java:47)
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
java.lang.reflect.Method.invoke(Method.java:606)
org.springframework.aop.support.AopUtils.invokeJoinpointUsingReflection(AopUtils.java:317)
org.springframework.aop.framework.ReflectiveMethodInvocation.invokeJoinpoint(ReflectiveMethodInvocation.java:190)
org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:157)
org.springframework.transaction.interceptor.TransactionInterceptor$1.proceedWithInvocation(TransactionInterceptor.java:98)
org.springframework.transaction.interceptor.TransactionAspectSupport.invokeWithinTransaction(TransactionAspectSupport.java:262)
org.springframework.transaction.interceptor.TransactionInterceptor.invoke(TransactionInterceptor.java:95)
org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
org.springframework.aop.framework.JdkDynamicAopProxy.invoke(JdkDynamicAopProxy.java:207)
com.sun.proxy.$Proxy48.search(Unknown Source)
com.kefet.controller.SearchController.searchText(SearchController.java:30)
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
java.lang.reflect.Method.invoke(Method.java:606)
org.springframework.web.method.support.InvocableHandlerMethod.invoke(InvocableHandlerMethod.java:215)
org.springframework.web.method.support.InvocableHandlerMethod.invokeForRequest(InvocableHandlerMethod.java:132)
org.springframework.web.servlet.mvc.method.annotation.ServletInvocableHandlerMethod.invokeAndHandle(ServletInvocableHandlerMethod.java:104)
org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.invokeHandleMethod(RequestMappingHandlerAdapter.java:749)
org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.handleInternal(RequestMappingHandlerAdapter.java:689)
org.springframework.web.servlet.mvc.method.AbstractHandlerMethodAdapter.handle(AbstractHandlerMethodAdapter.java:83)
org.springframework.web.servlet.DispatcherServlet.doDispatch(DispatcherServlet.java:938)
org.springframework.web.servlet.DispatcherServlet.doService(DispatcherServlet.java:870)
org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:961)
org.springframework.web.servlet.FrameworkServlet.doGet(FrameworkServlet.java:852)
javax.servlet.http.HttpServlet.service(HttpServlet.java:620)
org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:837)
javax.servlet.http.HttpServlet.service(HttpServlet.java:727)
org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
com.github.dandelion.datatables.core.web.filter.DatatablesFilter.doFilter(DatatablesFilter.java:73)

我找到的教程位于下面的链接中:

https://code.google.com/p/hibernatesearchinaction/source/browse/trunk/ch13/src/com/manning/hsia/dvdstore/TestHighlighter.java?r=86

我的 pom 文件包含以下内容:

<hibernate.version>4.3.5.Final</hibernate.version>
<hibernate-search-orm.version>4.5.1.Final</hibernate-search-orm.version>
<hibernate-search-analyzers.version>4.5.1.Final</hibernate-search-analyzers.version>
<hibernate-search-infinispan.version>4.5.1.Final</hibernate-search-infinispan.version>
<lucene-highlighter.version>4.9.0</lucene-highlighter.version>
<lucene-analyzers-common.version>4.9.0</lucene-analyzers-common.version>

<mysql.connector.version>5.1.30</mysql.connector.version>

<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-search-orm</artifactId>
<version>${hibernate-search-orm.version}</version>
</dependency>

<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-search-analyzers</artifactId>
<version>${hibernate-search-analyzers.version}</version>
</dependency>


<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-search-infinispan</artifactId>
<version>${hibernate-search-infinispan.version}</version>
</dependency>

<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${lucene-highlighter.version}</version>
</dependency>

<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-core</artifactId>
<version>${hibernate.version}</version>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-entitymanager</artifactId>
<version>${hibernate.version}</version>
</dependency>
<dependency>

提前谢谢

最佳答案

我不知道如何使用 token 流来做到这一点,但这是过去 3 小时对 hibernate 搜索进行一些黑客攻击的结果。它确实需要黑客才能执行简单的任务。该解决方案最终适用于 HS 4.5.1 和 Lucene 3.6.2(IBaseEntity 是带有 id 的“某物”):

public static final String HIGHLIGHTER_PRE = "<span class='search-found'>";
public static final String HIGHLIGHTER_POST = "</span>";

protected static DocumentBuilderIndexedEntity getDocumentBuilder(Session session, Class clazz) {
FullTextSession fullTextSession = Search.getFullTextSession(session);
SearchFactoryImplementor searchFactoryImplementor =
(SearchFactoryImplementor) fullTextSession.getSearchFactory();
EntityIndexBinding entityIndexBinding = searchFactoryImplementor.getIndexBinding(clazz);
return entityIndexBinding.getDocumentBuilder();
}

/**
* Provides lucene document for given entity.
*/
@SuppressWarnings("unchecked")
public static Document getDocument(Session session, IBaseEntity o, Class clazz) {
return getDocumentBuilder(session, clazz).getDocument(o, o.getId(), new HashMap<String, String>(),
new HibernateSessionLoadingInitializer((SessionImplementor) session),
new ContextualExceptionBridgeHelper());
}

/**
* Provides lucene analyzer for given entity.
*/
public static Analyzer getAnalyzer(Session session, Class clazz) {
return getDocumentBuilder(session, clazz).getAnalyzer();
}

/**
* @param luceneQuery You have it before you create {@link FullTextQuery} from {@link FullTextSession}.
*/
public static Highlighter getHighlighter(Query luceneQuery) {
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHTER_PRE, HIGHLIGHTER_POST),
new QueryScorer(luceneQuery));
highlighter.setTextFragmenter(new SimpleFragmenter());
return highlighter;
}

/**
* Returns entity raw lucene text content without any analyzer usage (ie. even with HTML tags).
*/
public static String getFullTextContent(Session session, IBaseEntity o) {
if (o==null)
return "";

Set<String> strings = new LinkedHashSet<String>(); // to avoid multiple same strings
Document document = getDocument(session, o, o.getClass());

List<org.apache.lucene.document.Field> fields = new ArrayList<org.apache.lucene.document.Field>();

// here is the part where we should add fields by name to text content (manually as well)
for (String fieldName : listIndexedFields(o.getClass()))
fields.addAll(Arrays.asList(document.getField(fieldName)));

for (org.apache.lucene.document.Field field: fields) {
if (field!=null) {
String s = field.stringValue();
if (!StringUtils.isBlank(s))
strings.add(s);
}
}

if (strings.isEmpty())
return "";

StringBuilder sb = new StringBuilder();
for (String s: strings) {
if (sb.length()>0)
sb.append(" ");
sb.append(s);
}
return sb.toString();
}

/**
* @return Lucene highlighter text for search results for given object.
*/
public static String getHighlighterText(Session session, IBaseEntity o, Query query) {
Analyzer analyzer = getAnalyzer(session, o.getClass());
Highlighter highlighter = getHighlighter(query);
return getHighlightedText(analyzer, highlighter, getFullTextContent(session, o));
}

public static String getHighlightedText(Analyzer analyzer, Highlighter highlighter, String fullTextContent) {
try {
return highlighter.getBestFragment(analyzer, null, fullTextContent);
} catch (Exception e) {
throw new RuntimeException("Cannot highlight lucene results", e);
}
}

关于java - 如何使用 Lucene、Highlighter 和 StandardAnalyzer 通过 Hibernate 搜索引擎获取文本片段,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24975599/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com