gpt4 book ai didi

java - solr中的串联字符串搜索

转载 作者:行者123 更新时间:2023-12-01 14:36:39 24 4
gpt4 key购买 nike

我正在搜索医学相关术语。

搜索字段为

description, title, keywords, category.

按用户搜索关键字

"brain surgery"

当我使用上述键搜索时,它会根据架构搜索所有四个提到的字段。问题是这样的:索引项可能包含许多在描述中带有关键字“surgery”的记录。我的搜索结果是与心脏病学、膝盖等相关的项目,因为它的描述中有关键字“手术”

我只想要一条在描述中包含“脑外科手术” 的记录。我怎样才能实现这个目标?

仅供引用:发送时,我将 key 发送为“brain+surgery”

感谢任何帮助

SOLRCONFIG.XML

<?xml version="1.0" encoding="UTF-8" ?>

<config>

<luceneMatchVersion>LUCENE_42</luceneMatchVersion>


<lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
<lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />

<lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
<lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />

<lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
<lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />

<lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
<lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />

<lib dir="/total/crap/dir/ignored" />


<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>

<codecFactory class="solr.SchemaCodecFactory"/>


<indexConfig>

<lockType>${solr.lock.type:native}</lockType>


</indexConfig>



<jmx />

<updateHandler class="solr.DirectUpdateHandler2">


<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
</updateLog>


<autoCommit>
<maxTime>15000</maxTime>
<openSearcher>false</openSearcher>
</autoCommit>



</updateHandler>


<query>

<maxBooleanClauses>1024</maxBooleanClauses>


<filterCache class="solr.FastLRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>


<queryResultCache class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>


<documentCache class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>


<enableLazyFieldLoading>true</enableLazyFieldLoading>


<queryResultWindowSize>20</queryResultWindowSize>


<queryResultMaxDocsCached>200</queryResultMaxDocsCached>


<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">

</arr>
</listener>
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">static firstSearcher warming in solrconfig.xml</str>
</lst>
</arr>
</listener>


<useColdSearcher>false</useColdSearcher>


<maxWarmingSearchers>2</maxWarmingSearchers>

</query>


<requestDispatcher handleSelect="false" >

<requestParsers enableRemoteStreaming="true"
multipartUploadLimitInKB="2048000"
formdataUploadLimitInKB="2048"/>


<httpCaching never304="true" />

</requestDispatcher>


<requestHandler name="/select" class="solr.SearchHandler">

<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">10</int>
<str name="df">all_fields</str>
</lst>

</requestHandler>

<!-- A request handler that returns indented JSON by default -->
<requestHandler name="/query" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="wt">json</str>
<str name="indent">true</str>
<str name="df">all_fields</str>
</lst>
</requestHandler>


<requestHandler name="/get" class="solr.RealTimeGetHandler">
<lst name="defaults">
<str name="omitHeader">true</str>
<str name="wt">json</str>
<str name="indent">true</str>
</lst>
</requestHandler>

<requestHandler name="/browse" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>

<!-- VelocityResponseWriter settings -->
<str name="wt">velocity</str>
<str name="v.template">browse</str>
<str name="v.layout">layout</str>
<str name="title">Solritas</str>

<!-- Query settings -->
<str name="defType">edismax</str>
<str name="qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
</str>
<str name="df">all_fields</str>
<str name="mm">100%</str>
<str name="q.alt">*:*</str>
<str name="rows">10</str>
<str name="fl">*,score</str>

<str name="mlt.qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
</str>
<str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
<int name="mlt.count">3</int>

<!-- Faceting defaults -->
<str name="facet">on</str>
<str name="facet.field">cat</str>
<str name="facet.field">manu_exact</str>
<str name="facet.field">content_type</str>
<str name="facet.field">author_s</str>
<str name="facet.query">ipod</str>
<str name="facet.query">GB</str>
<str name="facet.mincount">1</str>
<str name="facet.pivot">cat,inStock</str>
<str name="facet.range.other">after</str>
<str name="facet.range">price</str>
<int name="f.price.facet.range.start">0</int>
<int name="f.price.facet.range.end">600</int>
<int name="f.price.facet.range.gap">50</int>
<str name="facet.range">popularity</str>
<int name="f.popularity.facet.range.start">0</int>
<int name="f.popularity.facet.range.end">10</int>
<int name="f.popularity.facet.range.gap">3</int>
<str name="facet.range">manufacturedate_dt</str>
<str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
<str name="f.manufacturedate_dt.facet.range.end">NOW</str>
<str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
<str name="f.manufacturedate_dt.facet.range.other">before</str>
<str name="f.manufacturedate_dt.facet.range.other">after</str>

<!-- Highlighting defaults -->
<str name="hl">on</str>
<str name="hl.fl">content features title name</str>
<str name="hl.encoder">html</str>
<str name="hl.simple.pre">&lt;b&gt;</str>
<str name="hl.simple.post">&lt;/b&gt;</str>
<str name="f.title.hl.fragsize">0</str>
<str name="f.title.hl.alternateField">title</str>
<str name="f.name.hl.fragsize">0</str>
<str name="f.name.hl.alternateField">name</str>
<str name="f.content.hl.snippets">3</str>
<str name="f.content.hl.fragsize">200</str>
<str name="f.content.hl.alternateField">content</str>
<str name="f.content.hl.maxAlternateFieldLength">750</str>

<!-- Spell checking defaults -->
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">false</str>
<str name="spellcheck.count">5</str>
<str name="spellcheck.alternativeTermCount">2</str>
<str name="spellcheck.maxResultsForSuggest">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.collateExtendedResults">true</str>
<str name="spellcheck.maxCollationTries">5</str>
<str name="spellcheck.maxCollations">3</str>
</lst>

<!-- append spellchecking to our list of components -->
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>

<requestHandler name="/update" class="solr.UpdateRequestHandler">

</requestHandler>

<!-- for back compat with clients using /update/json and /update/csv -->
<requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
<lst name="defaults">
<str name="stream.contentType">application/json</str>
</lst>
</requestHandler>
<requestHandler name="/update/csv" class="solr.CSVRequestHandler">
<lst name="defaults">
<str name="stream.contentType">application/csv</str>
</lst>
</requestHandler>

<!-- Solr Cell Update Request Handler

http://wiki.apache.org/solr/ExtractingRequestHandler

-->
<requestHandler name="/update/extract"
startup="lazy"
class="solr.extraction.ExtractingRequestHandler" >
<lst name="defaults">
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>

<!-- capture link hrefs but ignore div attributes -->
<str name="captureAttr">true</str>
<str name="fmap.a">links</str>
<str name="fmap.div">ignored_</str>
</lst>
</requestHandler>


<requestHandler name="/analysis/field"
startup="lazy"
class="solr.FieldAnalysisRequestHandler" />


<requestHandler name="/analysis/document"
class="solr.DocumentAnalysisRequestHandler"
startup="lazy" />


<requestHandler name="/admin/"
class="solr.admin.AdminHandlers" />

<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<lst name="invariants">
<str name="q">solrpingquery</str>
</lst>
<lst name="defaults">
<str name="echoParams">all</str>
</lst>

</requestHandler>

<!-- Echo the request contents back to the client -->
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="echoHandler">true</str>
</lst>
</requestHandler>


<requestHandler name="/replication" class="solr.ReplicationHandler" >

</requestHandler>


<searchComponent name="spellcheck" class="solr.SpellCheckComponent">

<str name="queryAnalyzerFieldType">textSpell</str>


<lst name="spellchecker">
<str name="name">default</str>
<str name="field">name</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str>
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
<float name="accuracy">0.5</float>
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
<int name="maxEdits">2</int>
<!-- the minimum shared prefix when enumerating terms -->
<int name="minPrefix">1</int>
<!-- maximum number of inspections per result. -->
<int name="maxInspections">5</int>
<!-- minimum length of a query term to be considered for correction -->
<int name="minQueryLength">4</int>
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
<float name="maxQueryFrequency">0.01</float>
<!-- uncomment this to require suggestions to occur in 1% of the documents
<float name="thresholdTokenFrequency">.01</float>
-->
</lst>

<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
<lst name="spellchecker">
<str name="name">wordbreak</str>
<str name="classname">solr.WordBreakSolrSpellChecker</str>
<str name="field">name</str>
<str name="combineWords">true</str>
<str name="breakWords">true</str>
<int name="maxChanges">10</int>
</lst>


</searchComponent>


<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="df">all_fields</str>
<!-- Solr will use suggestions from both the 'default' spellchecker
and from the 'wordbreak' spellchecker and combine them.
collations (re-written queries) can include a combination of
corrections from both spellcheckers -->
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
<str name="spellcheck.alternativeTermCount">5</str>
<str name="spellcheck.maxResultsForSuggest">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.collateExtendedResults">true</str>
<str name="spellcheck.maxCollationTries">10</str>
<str name="spellcheck.maxCollations">5</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>


<searchComponent name="tvComponent" class="solr.TermVectorComponent"/>


<requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="df">all_fields</str>
<bool name="tv">true</bool>
</lst>
<arr name="last-components">
<str>tvComponent</str>
</arr>
</requestHandler>


<searchComponent name="clustering"
enable="${solr.clustering.enabled:false}"
class="solr.clustering.ClusteringComponent" >
<!-- Declare an engine -->
<lst name="engine">
<!-- The name, only one can be named "default" -->
<str name="name">default</str>


<str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>


<str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>


<str name="carrot.lexicalResourcesDir">clustering/carrot2</str>


<str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
</lst>
<lst name="engine">
<str name="name">stc</str>
<str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
</lst>
</searchComponent>


<requestHandler name="/clustering"
startup="lazy"
enable="${solr.clustering.enabled:false}"
class="solr.SearchHandler">
<lst name="defaults">
<bool name="clustering">true</bool>
<str name="clustering.engine">default</str>
<bool name="clustering.results">true</bool>
<!-- The title field -->
<str name="carrot.title">name</str>
<str name="carrot.url">id</str>
<!-- The field to cluster on -->
<str name="carrot.snippet">features</str>
<!-- produce summaries -->
<bool name="carrot.produceSummary">true</bool>
<!-- the maximum number of labels per cluster -->
<!--<int name="carrot.numDescriptions">5</int>-->
<!-- produce sub clusters -->
<bool name="carrot.outputSubClusters">false</bool>

<str name="defType">edismax</str>
<str name="qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
</str>
<str name="q.alt">*:*</str>
<str name="rows">10</str>
<str name="fl">*,score</str>
</lst>
<arr name="last-components">
<str>clustering</str>
</arr>
</requestHandler>


<searchComponent name="terms" class="solr.TermsComponent"/>

<!-- A request handler for demonstrating the terms component -->
<requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<bool name="terms">true</bool>
<bool name="distrib">false</bool>
</lst>
<arr name="components">
<str>terms</str>
</arr>
</requestHandler>


<searchComponent name="elevator" class="solr.QueryElevationComponent" >
<!-- pick a fieldType to analyze queries -->
<str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent>

<!-- A request handler for demonstrating the elevator component -->
<requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="df">all_fields</str>
</lst>
<arr name="last-components">
<str>elevator</str>
</arr>
</requestHandler>

<!-- Highlighting Component

http://wiki.apache.org/solr/HighlightingParameters
-->
<searchComponent class="solr.HighlightComponent" name="highlight">
<highlighting>
<!-- Configure the standard fragmenter -->
<!-- This could most likely be commented out in the "default" case -->
<fragmenter name="gap"
default="true"
class="solr.highlight.GapFragmenter">
<lst name="defaults">
<int name="hl.fragsize">100</int>
</lst>
</fragmenter>

<!-- A regular-expression-based fragmenter
(for sentence extraction)
-->
<fragmenter name="regex"
class="solr.highlight.RegexFragmenter">
<lst name="defaults">
<!-- slightly smaller fragsizes work better because of slop -->
<int name="hl.fragsize">70</int>
<!-- allow 50% slop on fragment sizes -->
<float name="hl.regex.slop">0.5</float>
<!-- a basic sentence pattern -->
<str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
</lst>
</fragmenter>

<!-- Configure the standard formatter -->
<formatter name="html"
default="true"
class="solr.highlight.HtmlFormatter">
<lst name="defaults">
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
<str name="hl.simple.post"><![CDATA[</em>]]></str>
</lst>
</formatter>

<!-- Configure the standard encoder -->
<encoder name="html"
class="solr.highlight.HtmlEncoder" />

<!-- Configure the standard fragListBuilder -->
<fragListBuilder name="simple"
class="solr.highlight.SimpleFragListBuilder"/>

<!-- Configure the single fragListBuilder -->
<fragListBuilder name="single"
class="solr.highlight.SingleFragListBuilder"/>

<!-- Configure the weighted fragListBuilder -->
<fragListBuilder name="weighted"
default="true"
class="solr.highlight.WeightedFragListBuilder"/>

<!-- default tag FragmentsBuilder -->
<fragmentsBuilder name="default"
default="true"
class="solr.highlight.ScoreOrderFragmentsBuilder">

</fragmentsBuilder>
<!-- multi-colored tag FragmentsBuilder -->
<fragmentsBuilder name="colored"
class="solr.highlight.ScoreOrderFragmentsBuilder">
<lst name="defaults">
<str name="hl.tag.pre"><![CDATA[
<b style="background:yellow">,<b style="background:lawgreen">,
<b style="background:aquamarine">,<b style="background:magenta">,
<b style="background:palegreen">,<b style="background:coral">,
<b style="background:wheat">,<b style="background:khaki">,
<b style="background:lime">,<b style="background:deepskyblue">]]></str>
<str name="hl.tag.post"><![CDATA[</b>]]></str>
</lst>
</fragmentsBuilder>
<boundaryScanner name="default"
default="true"
class="solr.highlight.SimpleBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.maxScan">10</str>
<str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
</lst>
</boundaryScanner>

<boundaryScanner name="breakIterator"
class="solr.highlight.BreakIteratorBoundaryScanner">
<lst name="defaults">
<!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
<str name="hl.bs.type">WORD</str>
<!-- language and country are used when constructing Locale object. -->
<!-- And the Locale object will be used when getting instance of BreakIterator -->
<str name="hl.bs.language">en</str>
<str name="hl.bs.country">US</str>
</lst>
</boundaryScanner>
</highlighting>
</searchComponent>
<queryResponseWriter name="json" class="solr.JSONResponseWriter">
<!-- For the purposes of the tutorial, JSON responses are written as
plain text so that they are easy to read in *any* browser.
If you expect a MIME type of "application/json" just remove this override.
-->
<str name="content-type">text/plain; charset=UTF-8</str>
</queryResponseWriter>

<!--
Custom response writers can be declared as needed...
-->
<queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>


<!-- XSLT response writer transforms the XML output by any xslt file found
in Solr's conf/xslt directory. Changes to xslt files are checked for
every xsltCacheLifetimeSeconds.
-->
<queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
<int name="xsltCacheLifetimeSeconds">5</int>
</queryResponseWriter>
<admin>
<defaultQuery>*:*</defaultQuery>
</admin>

</config>

最佳答案

您正在使用默认的搜索处理程序,该处理程序似乎将 OR 作为 default query operator .
您可以使用 q=all_fields:brain+surgery&q.op=AND 进行检查,这会将查询运算符设置为 AND,并使所有查询项成为必填项。

关于java - solr中的串联字符串搜索,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/16433687/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com