<?xml version="1.0" encoding="UTF-8"?>
-<schema name="dovecot" version="2.0">
+<schema name="dovecot" version="2.1">
+ <uniqueKey>id</uniqueKey>
+
<fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true"/>
<fieldType name="long" class="solr.LongPointField" positionIncrementGap="0"/>
- <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
-
<fieldType name="text" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
- <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" splitOnNumerics="1" catenateAll="1" catenateWords="1"/>
- <filter class="solr.FlattenGraphFilterFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
+
+ <analyzer type="index">
+ <!-- Use unicode aware tokenizer, see
+ https://solr.apache.org/guide/7_7/tokenizers.html#icu-tokenizer
+ -->
+ <tokenizer class="solr.ICUTokenizerFactory"/>
+ <!-- Unicode aware case folding to normalize input, see
+ https://solr.apache.org/guide/7_7/filter-descriptions.html#icu-folding-filter
+ -->
+ <filter class="solr.ICUFoldingFilterFactory"/>
+ <!-- Protect protected words from being modified by stemmers. Edit protwords.txt
+ to customize -->
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <!-- Split tokens from word delimiters -->
+ <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1"
+ generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1"
+ splitOnNumerics="1" catenateAll="1" catenateWords="1"/>
+ <!-- Porter is optimized filter for english, if you want other languages
+ use SnowballPorterFilter here, see https://solr.apache.org/guide/7_6/filter-descriptions.html
+ <filter class="solr.SnowballPorterFilterFactory" language="French"/>
+ -->
<filter class="solr.PorterStemFilterFactory"/>
+ <!-- Map synonyms, use synonyms.txt to customize -->
+ <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true"
+ synonyms="synonyms.txt"/>
+ <!-- Remove stop words, customize with stopwords.txt, usually solr ships
+ stopword catalog for multiple languages, so you can choose from there. -->
+ <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
+ <!-- Flatten the graph so it can be consumed by indexer -->
+ <filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
- <filter class="solr.FlattenGraphFilterFactory"/>
- <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
- <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" splitOnNumerics="1" catenateAll="1" catenateWords="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
+ <tokenizer class="solr.ICUTokenizerFactory"/>
+ <filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1"
+ generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1"
+ splitOnNumerics="1" catenateAll="1" catenateWords="1"/>
+ <!-- See above for comment on Porter -->
<filter class="solr.PorterStemFilterFactory"/>
+ <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
+ <!-- Differs from stop word filter a bit,
+ see https://solr.apache.org/guide/7_7/filter-descriptions.html#synonym-graph-filter
+ -->
+ <filter class="solr.SuggestStopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
</analyzer>
</fieldType>
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-
-<!--
-For fts-solr:
-
-This is the Solr schema file, place it into solr/conf/schema.xml. You may
-want to modify the tokenizers and filters.
--->
-<schema name="dovecot" version="1.5">
- <types>
- <!-- IMAP has 32bit unsigned ints but java ints are signed, so use longs -->
- <fieldType name="string" class="solr.StrField" />
- <fieldType name="long" class="solr.TrieLongField" />
- <fieldType name="boolean" class="solr.BoolField" />
-
- <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPossessiveFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <filter class="solr.EnglishMinimalStemFilterFactory"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPossessiveFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <filter class="solr.EnglishMinimalStemFilterFactory"/>
- </analyzer>
- </fieldType>
- </types>
-
-
- <fields>
- <field name="id" type="string" indexed="true" stored="true" required="true" />
- <field name="uid" type="long" indexed="true" stored="true" required="true" />
- <field name="box" type="string" indexed="true" stored="true" required="true" />
- <field name="user" type="string" indexed="true" stored="true" required="true" />
-
- <field name="hdr" type="text" indexed="true" stored="false" />
- <field name="body" type="text" indexed="true" stored="false" />
-
- <field name="from" type="text" indexed="true" stored="false" />
- <field name="to" type="text" indexed="true" stored="false" />
- <field name="cc" type="text" indexed="true" stored="false" />
- <field name="bcc" type="text" indexed="true" stored="false" />
- <field name="subject" type="text" indexed="true" stored="false" />
-
- <!-- Used by Solr internally: -->
- <field name="_version_" type="long" indexed="true" stored="true"/>
- </fields>
-
- <uniqueKey>id</uniqueKey>
-</schema>