]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
solr-schema.xml: More updates for Solr v4.x and hopefully with better filters.
authorTimo Sirainen <tss@iki.fi>
Thu, 13 Jun 2013 15:53:22 +0000 (18:53 +0300)
committerTimo Sirainen <tss@iki.fi>
Thu, 13 Jun 2013 15:53:22 +0000 (18:53 +0300)
doc/solr-schema.xml

index 0736e3f78292931bcb802e8ed5331afa33344804..cea6a3bb401f8b906362fe00ba849ea670d9c2aa 100644 (file)
@@ -6,32 +6,32 @@ For fts-solr:
 This is the Solr schema file, place it into solr/conf/schema.xml. You may
 want to modify the tokenizers and filters.
 -->
-<schema name="dovecot" version="1.4">
+<schema name="dovecot" version="1.5">
   <types>
     <!-- IMAP has 32bit unsigned ints but java ints are signed, so use longs -->
-    <fieldType name="string" class="solr.StrField" omitNorms="true"/>
-    <fieldType name="long" class="solr.LongField" omitNorms="true"/>
-    <fieldType name="slong" class="solr.SortableLongField" omitNorms="true"/>
-    <fieldType name="float" class="solr.FloatField" omitNorms="true"/>
-    <fieldType name="boolean" class="solr.BoolField" omitNorms="true"/>
+    <fieldType name="string" class="solr.StrField" />
+    <fieldType name="long" class="solr.TrieLongField" />
+    <fieldType name="boolean" class="solr.BoolField" />
 
     <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
       <analyzer type="index">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.SnowballPorterFilterFactory" language="English"/>
-        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+        <filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
       </analyzer>
       <analyzer type="query">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.SnowballPorterFilterFactory" language="English"/>
-        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+        <filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
       </analyzer>
     </fieldType>
  </types>
@@ -39,7 +39,7 @@ want to modify the tokenizers and filters.
 
  <fields>
    <field name="id" type="string" indexed="true" stored="true" required="true" />
-   <field name="uid" type="slong" indexed="true" stored="true" required="true" />
+   <field name="uid" type="long" indexed="true" stored="true" required="true" />
    <field name="box" type="string" indexed="true" stored="true" required="true" />
    <field name="user" type="string" indexed="true" stored="true" required="true" />
 
@@ -51,9 +51,10 @@ want to modify the tokenizers and filters.
    <field name="cc" type="text" indexed="true" stored="false" />
    <field name="bcc" type="text" indexed="true" stored="false" />
    <field name="subject" type="text" indexed="true" stored="false" />
+
+   <!-- Used by Solr internally: -->
+   <field name="_version_" type="long" indexed="true" stored="true"/>
  </fields>
 
  <uniqueKey>id</uniqueKey>
- <defaultSearchField>body</defaultSearchField>
- <solrQueryParser defaultOperator="AND" />
 </schema>