gpt4 book ai didi

solr - Solr 3.3.0-增加搜索中的字段权重-提高字段优先级

转载 作者:行者123 更新时间:2023-12-02 04:05:47 25 4
gpt4 key购买 nike

我正在使用solr 3.3.0索引这些文档:

<book id="bk101">
<keywords>----</keywords>
<title>----</title>
<owner>----</owner>
<artist>abcde</artist>
</book>
<book id="bk102">
<keywords>----</keywords>
<title>----</title>
<owner>abcde</owner>
<artist>----</artist>
</book>
<book id="bk103">
<keywords>----</keywords>
<title>----</title>
<owner>----</owner>
<artist>abc</artist>
</book>
<book id="bk104">
<keywords>----</keywords>
<title>----</title>
<owner>abc</owner>
<artist>----</artist>
</book>
<book id="bk105">
<keywords>abcde</keywords>
<title>----</title>
<owner>----</owner>
<artist>----</artist>
</book>
<book id="bk106">
<keywords>abc</keywords>
<title>----</title>
<owner>----</owner>
<artist>----</artist>
</book>
<book id="bk107">
<keywords>----</keywords>
<title>abcde</title>
<owner>----</owner>
<artist>----</artist>
</book>
<book id="bk108">
<keywords>----</keywords>
<title>abc</title>
<owner>----</owner>
<artist>----</artist>
</book>

Schema.xml
<types>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" omitNorms="false">
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" splitOnNumerics="0" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true" />
<filter class="solr.ISOLatin1AccentFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front" />
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="back" />
</fieldType>

<fieldType name="text_original" class="solr.TextField" positionIncrementGap="100" omitNorms="false">
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</fieldType>

<fieldType name="uuid" class="solr.UUIDField" indexed="true" />
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true" />
<fieldType name="float" class="solr.FloatField" omitNorms="true" />
<fieldType name="date" class="solr.DateField" />
</types>

<fields>
<field name="id" type="uuid" indexed="true" stored="true" default="NEW" />
<field name="book_id" type="string" indexed="true" stored="true" />
<field name="keywords" type="text" indexed="true" stored="true" />
<field name="keywords_ex" type="text_original" indexed="true" stored="true" />
<field name="title" type="text" indexed="true" stored="true" />
<field name="title_ex" type="text_original" indexed="true" stored="true" />
<field name="owner" type="text" indexed="true" stored="true" />
<field name="owner_ex" type="text_original" indexed="true" stored="true" />
<field name="artist" type="text" indexed="true" stored="true" />
<field name="artist_ex" type="text_original" indexed="true" stored="true" />

<copyField source="title" dest="title_ex" />
<copyField source="keywords" dest="keywords_ex" />
<copyField source="owner" dest="owner_ex" />
<copyField source="artist" dest="artist_ex" />

</fields>

如您所见,如果我搜索“abc”,solr将返回所有文档(我使用solr.EdgeNGramFilterFactory进行索引);我的成就是使用以下逻辑对文档进行了排序:
  • title_ex
  • 标题
  • keyword_ex
  • 关键字
  • owner_ex
  • artist_ex
  • 所有者
  • 艺术家

  • 我尝试了“^” lucene sintax和dismax,但是响应列表不是我应该的。

    `http:// localhost:8080 / solr33a / core0 / select?q = abc&defType = edismax&qf = title_ex ^ 10%20title ^ 8.0%20keywords_ex ^ 6%20keywords ^ 5.5%20owner_ex ^ 1.2%20artist_ex ^ 0.8%20owner ^ 0.5%20artist ^ 0.2&fl = *,得分

    结果:
    <doc>
    <float name="score">2.3862944</float>
    <str name="artist">----</str>
    <str name="artist_ex">----</str>
    <str name="book_id">bk108</str>
    <str name="id">2cc5d478-6901-4777-abc9-680fd959ef90</str>
    <str name="keywords">----</str>
    <str name="keywords_ex">----</str>
    <str name="owner">----</str>
    <str name="owner_ex">----</str>
    <str name="title">abc</str>
    <str name="title_ex">abc</str>
    </doc>
    <doc>
    <float name="score">1.4317766</float>
    <str name="artist">----</str>
    <str name="artist_ex">----</str>
    <str name="book_id">bk106</str>
    <str name="id">e12683a2-faff-4d86-8107-7406491f4f89</str>
    <str name="keywords">abc</str>
    <str name="keywords_ex">abc</str>
    <str name="owner">----</str>
    <str name="owner_ex">----</str>
    <str name="title">----</str>
    <str name="title_ex">----</str>
    </doc>
    <doc>
    <float name="score">0.3288517</float>
    <str name="artist">----</str>
    <str name="artist_ex">----</str>
    <str name="book_id">bk107</str>
    <str name="id">a6a4a014-ce94-4257-a215-c1a64aa41cf5</str>
    <str name="keywords">----</str>
    <str name="keywords_ex">----</str>
    <str name="owner">----</str>
    <str name="owner_ex">----</str>
    <str name="title">abcde</str>
    <str name="title_ex">abcde</str>
    </doc>
    <doc>
    <float name="score">0.28635535</float>
    <str name="artist">----</str>
    <str name="artist_ex">----</str>
    <str name="book_id">bk104</str>
    <str name="id">9b909c65-e56a-4407-a789-53a570a7ae40</str>
    <str name="keywords">----</str>
    <str name="keywords_ex">----</str>
    <str name="owner">abc</str>
    <str name="owner_ex">abc</str>
    <str name="title">----</str>
    <str name="title_ex">----</str>
    </doc>
    <doc>
    <float name="score">0.22608554</float>
    <str name="artist">----</str>
    <str name="artist_ex">----</str>
    <str name="book_id">bk105</str>
    <str name="id">dead87cc-f93b-4562-af32-4d9fb2613c7f</str>
    <str name="keywords">abcde</str>
    <str name="keywords_ex">abcde</str>
    <str name="owner">----</str>
    <str name="owner_ex">----</str>
    <str name="title">----</str>
    <str name="title_ex">----</str>
    </doc>
    <doc>
    <float name="score">0.19090356</float>
    <str name="artist">abc</str>
    <str name="artist_ex">abc</str>
    <str name="book_id">bk103</str>
    <str name="id">306a252c-a0b5-474d-b55d-a25740d063b4</str>
    <str name="keywords">----</str>
    <str name="keywords_ex">----</str>
    <str name="owner">----</str>
    <str name="owner_ex">----</str>
    <str name="title">----</str>
    <str name="title_ex">----</str>
    </doc>
    <doc>
    <float name="score">0.020553231</float>
    <str name="artist">----</str>
    <str name="artist_ex">----</str>
    <str name="book_id">bk102</str>
    <str name="id">a684de0c-b286-4d9e-bd68-d5305afeee76</str>
    <str name="keywords">----</str>
    <str name="keywords_ex">----</str>
    <str name="owner">abcde</str>
    <str name="owner_ex">abcde</str>
    <str name="title">----</str>
    <str name="title_ex">----</str>
    </doc>
    <doc>
    <float name="score">0.008221293</float>
    <str name="artist">abcde</str>
    <str name="artist_ex">abcde</str>
    <str name="book_id">bk101</str>
    <str name="id">30a0f9de-1224-49d2-90aa-41f57af4956c</str>
    <str name="keywords">----</str>
    <str name="keywords_ex">----</str>
    <str name="owner">----</str>
    <str name="owner_ex">----</str>
    <str name="title">----</str>
    <str name="title_ex">----</str>
    </doc>

    最佳答案

    当您要搜索具有可变权重的多个字段时,请将您的请求处理程序配置为使用edismax查询解析器。
    dismax允许您跨字段搜索并为每个字段添加权重。


    标题匹配的权重为1,而作者匹配的权重为0.8,因此具有标题匹配的文档将显示在顶部。

    <requestHandler name="search" class="solr.SearchHandler" default="true">
    <lst name="defaults">
    <str name="echoParams">explicit</str>
    <str name="defType">edismax</str>
    <str name="qf">
    title^1 author^0.8
    </str>
    <str name="q.alt">*:*</str>
    <str name="rows">10</str>
    <str name="fl">*,score</str>
    </lst>
    </requestHandler>

    关于solr - Solr 3.3.0-增加搜索中的字段权重-提高字段优先级,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/7737551/

    25 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com