gpt4 book ai didi

solr - Solr4.1 上的 CopyField 问题

转载 作者:行者123 更新时间:2023-12-01 12:47:23 25 4
gpt4 key购买 nike

我使用的是Solr 3.6.1,非常满意。现在我想继续 solr4.1。所以我采用了“schema.xml”和“solrconfig.xml”(稍作改动)并将它们放在我新的 solr4.1 配置下。索引编制成功 (DIH)。但是,我注意到一个问题。在“schema.xml”中,我有“copyField”指令,以便使用不同的“类型”索引相同的字段。当我尝试在 solr4.1 上使用相同的配置进行索引时,索引大小是 solr3.6.1 上索引大小的一半(当我查询时得到不同的结果)。 Solr4.1 有什么变化吗?我在这方面几乎不需要帮助。

schema.xml:

<?xml version="1.0" encoding="UTF-8" ?>

<schema name="areios_pagos" version="1.5">
<types>
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
<fieldtype name="binary" class="solr.BinaryField"/>
<!--
Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
-->
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>

<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>

<fieldType name="pint" class="solr.IntField"/>
<fieldType name="plong" class="solr.LongField"/>
<fieldType name="pfloat" class="solr.FloatField"/>
<fieldType name="pdouble" class="solr.DoubleField"/>
<fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>

<fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>

<fieldType name="random" class="solr.RandomSortField" indexed="true" />

<!-- Greek -->
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="1000">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- greek specific lowercase for sigma -->
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
</analyzer>
</fieldType>

<!-- THIS IS FOR TIKA-PDF -->
<fieldType name="text" class="solr.TextField" positionIncrementGap="1000">
<analyzer type="index">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
<!--<filter class="solr.HunspellStemFilterFactory" dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff" ignoreCase="true" />-->
</analyzer>
<analyzer type="query">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
<!--<filter class="solr.HunspellStemFilterFactory" dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff" ignoreCase="true" />-->
</analyzer>
</fieldType>

<!-- THIS FIELDTYPE IN CONJUNCTION WITH THE COPYFIELD DEFINITION
HELPS FOR TERMS RELEVANCY
-->
<fieldType name="text_areios_pagos_s" class="solr.TextField" positionIncrementGap="100" >
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="20"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<!-- <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="100"/> -->
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="20"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<!-- <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="100"/> -->
</analyzer>
</fieldType>
<!-- END -->

<fieldType name="text_areios_pagos" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
<!--<filter class="solr.HunspellStemFilterFactory" dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff" ignoreCase="true" />-->
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
</analyzer>
</fieldType>

<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />

<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>

<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>

<fieldtype name="geohash" class="solr.GeoHashField"/>

<fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
</types>



<fields>
<field name="ida" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="solr_id" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="apofasi_number" type="text_areios_pagos" indexed="true" stored="true" multiValued="true"/>
<field name="apofasi_date" type="text_areios_pagos" indexed="true" stored="true"/>
<field name="apofasi_tmima" type="text_areios_pagos" indexed="true" stored="true"/>
<field name="apofasi_taxonomy" type="text_areios_pagos" indexed="true" stored="true"/>
<field name="content" type="text_areios_pagos" indexed="true" stored="true" multiValued="true"/> <!-- SET "multiValued=true" IN ORDER TO "copyField" -->
<field name="type" type="string" indexed="true" stored="true"/>
<field name="model" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="url" type="string" indexed="true" stored="true"/>
<field name="search_tag" type="text_areios_pagos" indexed="true" stored="true"/>
<field name="contentbin" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="last_modified" type="string" indexed="true" stored="true"/>
<field name="title" type="text_areios_pagos" indexed="true" stored="true" multiValued="true"/>
<field name="grid_title" type="text_areios_pagos" indexed="true" stored="true"/>
<field name="contentS" type="text_areios_pagos_s" indexed="true" stored="true"/>
</fields>

<uniqueKey>solr_id</uniqueKey>
<defaultSearchField>content</defaultSearchField>
<solrQueryParser defaultOperator="AND"/>

<copyField source="apofasi_number" dest="content" />
<copyField source="apofasi_date" dest="content" />
<copyField source="apofasi_tmima" dest="content" />
<copyField source="apofasi_taxonomy" dest="content" />
<copyField source="title" dest="content" />
<copyField source="search_tag" dest="content" />
<copyField source="contentbin" dest="content"/>
<copyField source="content" dest="contentS" />


</schema>

solrconfig.xml

<?xml version="1.0" encoding="UTF-8" ?>

<config>

<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>


<luceneMatchVersion>LUCENE_41</luceneMatchVersion>

<dataDir>${solr.data.dir:}</dataDir>

<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>

<indexConfig>

</indexConfig>

<jmx />

<updateHandler class="solr.DirectUpdateHandler2">
</updateHandler>

<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Query section - these settings control query time things like caches
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<query>

<maxBooleanClauses>2048</maxBooleanClauses>


<filterCache class="solr.FastLRUCache"
size="2048"
initialSize="1024"
autowarmCount="512"
cleanupThread="true" />

<queryResultCache class="solr.FastLRUCache"
size="2048"
initialSize="1024"
autowarmCount="512"
cleanupThread="true" />

<documentCache class="solr.FastLRUCache"
size="2048"
initialSize="2048"
autowarmCount="512" />

<fieldValueCache class="solr.FastLRUCache"
size="2048"
initialSize="512"
autowarmCount="512"
cleanupThread="true" />

<enableLazyFieldLoading>true</enableLazyFieldLoading>

<queryResultWindowSize>150</queryResultWindowSize>

<queryResultMaxDocsCached>200</queryResultMaxDocsCached>

<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">χρησικτησια νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
<lst>
<str name="q">νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
<lst>
<str name="q">χρησικτησια νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΙΝΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
</arr>
</listener>

<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">χρησικτησια νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
<lst>
<str name="q">νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
<lst>
<str name="q">χρησικτησια νομη</str>
<str name="fq">apofasi_taxonomy:ΠΟΙΝΙΚΕΣ</str>
<str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
<str name="start">0</str>
<str name="rows">150</str>
</lst>
</arr>
</listener>

<useColdSearcher>false</useColdSearcher>

<maxWarmingSearchers>2</maxWarmingSearchers>

</query>

<requestDispatcher>
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" />
<httpCaching never304="true" />
</requestDispatcher>

<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">data-config.xml</str>
</lst>
</requestHandler>

<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="defType">edismax</str>
<str name="qf">content contentS^10</str>
<str name="pf">content^10 contentS^100</str>
<str name="ps">100</str>
<str name="echoParams">explicit</str>
<int name="rows">150</int>
<str name="sort">score desc</str>
<str name="defType">edismax</str>
<str name="qf">content contentS^10</str>
<str name="pf">content^10 contentS^100</str>
<str name="ps">100</str>
<str name="wt">json</str>
<str name="hl">true</str>
<str name="fl">solr_id,ida,type,model,keywordlist,title,apofasi_taxonomy,apofasi_tmima,apofasi_date,grid_title</str>
<str name="hl.fl">content,title</str>
<str name="f.content.hl.alternateField">content</str>
<str name="hl.maxAlternateFieldLength">800</str>
<str name="hl.fragsize">800</str>
</lst>
</requestHandler>

<requestHandler name="/update"
class="solr.XmlUpdateRequestHandler">
</requestHandler>

<requestHandler name="/update/javabin"
class="solr.BinaryUpdateRequestHandler" />

<requestHandler name="/update/csv"
class="solr.CSVRequestHandler"
startup="lazy" />

<requestHandler name="/update/json"
class="solr.JsonUpdateRequestHandler"
startup="lazy" />

<requestHandler name="/update/extract"
startup="lazy"
class="solr.extraction.ExtractingRequestHandler" >
<lst name="defaults">
<str name="fmap.content">text</str>
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>
<str name="fmap.Last-Modified">last_modified</str>
<str name="captureAttr">true</str>
<str name="fmap.a">links</str>
<str name="fmap.div">ignored_</str>
</lst>
</requestHandler>

<requestHandler name="/update/xslt"
startup="lazy"
class="solr.XsltUpdateRequestHandler"/>

<requestHandler name="/analysis/field"
startup="lazy"
class="solr.FieldAnalysisRequestHandler" />

<requestHandler name="/analysis/document"
class="solr.DocumentAnalysisRequestHandler"
startup="lazy" />

<requestHandler name="/admin/"
class="solr.admin.AdminHandlers" />


<!-- ping/healthcheck -->
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<lst name="invariants">
<str name="q">solrpingquery</str>
</lst>
<lst name="defaults">
<str name="echoParams">all</str>
</lst>
</requestHandler>

<!-- Echo the request contents back to the client -->
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="echoHandler">true</str>
</lst>
</requestHandler>

<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textSpell</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">name</str>
<str name="spellcheckIndexDir">spellchecker</str>
</lst>
</searchComponent>


<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="df">text</str>
<str name="spellcheck.onlyMorePopular">false</str>
<str name="spellcheck.extendedResults">false</str>
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>

<searchComponent name="terms" class="solr.TermsComponent"/>

<!-- A request handler for demonstrating the terms component -->
<requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<bool name="terms">true</bool>
</lst>
<arr name="components">
<str>terms</str>
</arr>
</requestHandler>

<searchComponent name="elevator" class="solr.QueryElevationComponent" >
<!-- pick a fieldType to analyze queries -->
<str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent>

<!-- A request handler for demonstrating the elevator component -->
<requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="df">text</str>
</lst>
<arr name="last-components">
<str>elevator</str>
</arr>
</requestHandler>

<searchComponent class="solr.HighlightComponent" name="highlight">
<highlighting>
<fragmenter name="gap"
default="true"
class="solr.highlight.GapFragmenter">
<lst name="defaults">
</lst>
</fragmenter>
<fragmenter name="regex"
class="solr.highlight.RegexFragmenter">
<lst name="defaults">
<!-- slightly smaller fragsizes work better because of slop -->
<int name="hl.fragsize">70</int>
<!-- allow 50% slop on fragment sizes -->
<float name="hl.regex.slop">0.5</float>
<!-- a basic sentence pattern -->
<str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
</lst>
</fragmenter>
<!-- Configure the standard formatter -->
<formatter name="html"
default="true"
class="solr.highlight.HtmlFormatter">
<lst name="defaults">
<str name="hl.simple.pre">&lt;shl&gt;</str>
<str name="hl.simple.post">&lt;/shl&gt;</str>
</lst>
</formatter>

<!-- Configure the standard encoder -->
<encoder name="html"
class="solr.highlight.HtmlEncoder" />

<!-- Configure the standard fragListBuilder -->
<fragListBuilder name="simple"
default="true"
class="solr.highlight.SimpleFragListBuilder"/>

<!-- Configure the single fragListBuilder -->
<fragListBuilder name="single"
class="solr.highlight.SingleFragListBuilder"/>

<!-- default tag FragmentsBuilder -->
<fragmentsBuilder name="default"
default="true"
class="solr.highlight.ScoreOrderFragmentsBuilder">
</fragmentsBuilder>

<fragmentsBuilder name="colored"
class="solr.highlight.ScoreOrderFragmentsBuilder">
<lst name="defaults">
<str name="hl.tag.pre"><![CDATA[
<b style="background:yellow">,<b style="background:lawgreen">,
<b style="background:aquamarine">,<b style="background:magenta">,
<b style="background:palegreen">,<b style="background:coral">,
<b style="background:wheat">,<b style="background:khaki">,
<b style="background:lime">,<b style="background:deepskyblue">]]></str>
<str name="hl.tag.post"><![CDATA[</b>]]></str>
</lst>
</fragmentsBuilder>

<boundaryScanner name="default"
default="true"
class="solr.highlight.SimpleBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.maxScan">10</str>
<str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
</lst>
</boundaryScanner>

<boundaryScanner name="breakIterator"
class="solr.highlight.BreakIteratorBoundaryScanner">
<lst name="defaults">

<str name="hl.bs.type">WORD</str>

<str name="hl.bs.language">en</str>
<str name="hl.bs.country">US</str>
</lst>
</boundaryScanner>
</highlighting>
</searchComponent>

<queryResponseWriter name="json" class="solr.JSONResponseWriter">

<str name="content-type">text/plain; charset=UTF-8</str>
</queryResponseWriter>


<queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>


<queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
<int name="xsltCacheLifetimeSeconds">5</int>
</queryResponseWriter>
<admin>
<defaultQuery>*:*</defaultQuery>


</admin>

</config>

问候,

汤姆

最佳答案

Solr 4.1 以压缩方式维护存储字段,这可以解释索引大小的减少。

此外,

<copyField source="content" dest="contentS" />

文档@ http://wiki.apache.org/solr/SchemaXml#Copy_Fields

The copy is done at the stream source level and no copy feeds into another copy.

将复制域作为复制域标记的来源是行不通的。
copyfield 源必须是一个实际的字段,它有一些值并且没有级联。

您还可以检查http://lucene.472066.n3.nabble.com/does-copyField-recurse-td2450208.html

这对你有用吗??

关于solr - Solr4.1 上的 CopyField 问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/14605847/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com