]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Add another bunch of missed files.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 31 Dec 2015 17:54:04 +0000 (17:54 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 31 Dec 2015 17:54:04 +0000 (17:54 +0000)
contrib/snowball/libstemmer/modules.txt [new file with mode: 0644]
contrib/snowball/libstemmer/modules_utf8.txt [new file with mode: 0644]

diff --git a/contrib/snowball/libstemmer/modules.txt b/contrib/snowball/libstemmer/modules.txt
new file mode 100644 (file)
index 0000000..d237c1d
--- /dev/null
@@ -0,0 +1,50 @@
+# This file contains a list of stemmers to include in the distribution.
+# The format is a set of space separated lines - on each line:
+#  First item is name of stemmer.
+#  Second item is comma separated list of character sets.
+#  Third item is comma separated list of names to refer to the stemmer by.
+#
+# Lines starting with a #, or blank lines, are ignored.
+
+# List all the main algorithms for each language, in UTF-8, and also with
+# the most commonly used encoding.
+
+danish          UTF_8,ISO_8859_1        danish,da,dan
+dutch           UTF_8,ISO_8859_1        dutch,nl,dut,nld
+english         UTF_8,ISO_8859_1        english,en,eng
+finnish         UTF_8,ISO_8859_1        finnish,fi,fin
+french          UTF_8,ISO_8859_1        french,fr,fre,fra
+german          UTF_8,ISO_8859_1        german,de,ger,deu
+hungarian       UTF_8,ISO_8859_2        hungarian,hu,hun
+italian         UTF_8,ISO_8859_1        italian,it,ita
+norwegian       UTF_8,ISO_8859_1        norwegian,no,nor
+portuguese      UTF_8,ISO_8859_1        portuguese,pt,por
+romanian        UTF_8,ISO_8859_2        romanian,ro,rum,ron
+russian         UTF_8,KOI8_R            russian,ru,rus
+spanish         UTF_8,ISO_8859_1        spanish,es,esl,spa
+swedish         UTF_8,ISO_8859_1        swedish,sv,swe
+turkish         UTF_8                   turkish,tr,tur
+
+# Also include the traditional porter algorithm for english.
+# The porter algorithm is included in the libstemmer distribution to assist
+# with backwards compatibility, but for new systems the english algorithm
+# should be used in preference.
+porter          UTF_8,ISO_8859_1        porter
+
+# Some other stemmers in the snowball project are not included in the standard
+# distribution. To compile a libstemmer with them in, add them to this list,
+# and regenerate the distribution. (You will need a full source checkout for
+# this.) They are included in the snowball website as curiosities, but are not
+# intended for general use, and use of them is is not fully supported.  These
+# algorithms are:
+#
+# german2          - This is a slight modification of the german stemmer.
+#german2          UTF_8,ISO_8859_1        german2
+#
+# kraaij_pohlmann  - This is a different dutch stemmer.
+#kraaij_pohlmann  UTF_8,ISO_8859_1        kraaij_pohlmann
+#
+# lovins           - This is an english stemmer, but fairly outdated, and
+#                    only really applicable to a restricted type of input text
+#                    (keywords in academic publications).
+#lovins           UTF_8,ISO_8859_1        lovins
diff --git a/contrib/snowball/libstemmer/modules_utf8.txt b/contrib/snowball/libstemmer/modules_utf8.txt
new file mode 100644 (file)
index 0000000..60a0e1d
--- /dev/null
@@ -0,0 +1,49 @@
+# This file contains a list of stemmers to include in the distribution.
+# The format is a set of space separated lines - on each line:
+#  First item is name of stemmer.
+#  Second item is comma separated list of character sets.
+#  Third item is comma separated list of names to refer to the stemmer by.
+#
+# Lines starting with a #, or blank lines, are ignored.
+
+# List all the main algorithms for each language, in UTF-8.
+
+danish          UTF_8                   danish,da,dan
+dutch           UTF_8                   dutch,nl,dut,nld
+english         UTF_8                   english,en,eng
+finnish         UTF_8                   finnish,fi,fin
+french          UTF_8                   french,fr,fre,fra
+german          UTF_8                   german,de,ger,deu
+hungarian       UTF_8                   hungarian,hu,hun
+italian         UTF_8                   italian,it,ita
+norwegian       UTF_8                   norwegian,no,nor
+portuguese      UTF_8                   portuguese,pt,por
+romanian        UTF_8                   romanian,ro,rum,ron
+russian         UTF_8                   russian,ru,rus
+spanish         UTF_8                   spanish,es,esl,spa
+swedish         UTF_8                   swedish,sv,swe
+turkish         UTF_8                   turkish,tr,tur
+
+# Also include the traditional porter algorithm for english.
+# The porter algorithm is included in the libstemmer distribution to assist
+# with backwards compatibility, but for new systems the english algorithm
+# should be used in preference.
+porter          UTF_8                   porter
+
+# Some other stemmers in the snowball project are not included in the standard
+# distribution. To compile a libstemmer with them in, add them to this list,
+# and regenerate the distribution. (You will need a full source checkout for
+# this.) They are included in the snowball website as curiosities, but are not
+# intended for general use, and use of them is is not fully supported.  These
+# algorithms are:
+#
+# german2          - This is a slight modification of the german stemmer.
+#german2          UTF_8                   german2
+#
+# kraaij_pohlmann  - This is a different dutch stemmer.
+#kraaij_pohlmann  UTF_8                   kraaij_pohlmann
+#
+# lovins           - This is an english stemmer, but fairly outdated, and
+#                    only really applicable to a restricted type of input text
+#                    (keywords in academic publications).
+#lovins           UTF_8                   lovins