stopwordsdir = $(datadir)/${PACKAGE_TARNAME}/stopwords
dist_stopwords_DATA = \
+ stopwords/stopwords_da.txt \
+ stopwords/stopwords_de.txt \
stopwords/stopwords_en.txt \
+ stopwords/stopwords_es.txt \
stopwords/stopwords_fi.txt \
stopwords/stopwords_fr.txt \
+ stopwords/stopwords_it.txt \
+ stopwords/stopwords_nl.txt \
stopwords/stopwords_no.txt \
+ stopwords/stopwords_pt.txt \
+ stopwords/stopwords_ro.txt \
+ stopwords/stopwords_ru.txt \
stopwords/stopwords_sv.txt
BUILT_SOURCES = word-boundary-data.c word-break-data.c
--- /dev/null
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og | and
+i | in
+jeg | I
+det | that (dem. pronoun)/it (pers. pronoun)
+at | that (in front of a sentence)/to (with infinitive)
+en | a/an
+den | it (pers. pronoun)/that (dem. pronoun)
+til | to/at/for/until/against/by/of/into, more
+er | present tense of "to be"
+som | who, as
+på | on/upon/in/on/at/to/after/of/with/for, on
+de | they
+med | with/by/in, along
+han | he
+af | of/by/from/off/for/in/with/on, off
+for | at/for/to/from/by/of/ago, in front/before, because
+ikke | not
+der | who/which, there/those
+var | past tense of "to be"
+mig | me/myself
+sig | oneself/himself/herself/itself/themselves
+men | but
+et | a/an/one, one (number), someone/somebody/one
+har | present tense of "to have"
+om | round/about/for/in/a, about/around/down, if
+vi | we
+min | my
+havde | past tense of "to have"
+ham | him
+hun | she
+nu | now
+over | over/above/across/by/beyond/past/on/about, over/past
+da | then, when/as/since
+fra | from/off/since, off, since
+du | you
+ud | out
+sin | his/her/its/one's
+dem | them
+os | us/ourselves
+op | up
+man | you/one
+hans | his
+hvor | where
+eller | or
+hvad | what
+skal | must/shall etc.
+selv | myself/youself/herself/ourselves etc., even
+her | here
+alle | all/everyone/everybody etc.
+vil | will (verb)
+blev | past tense of "to stay/to remain/to get/to become"
+kunne | could
+ind | in
+når | when
+være | present tense of "to be"
+dog | however/yet/after all
+noget | something
+ville | would
+jo | you know/you see (adv), yes
+deres | their/theirs
+efter | after/behind/according to/for/by/from, later/afterwards
+ned | down
+skulle | should
+denne | this
+end | than
+dette | this
+mit | my/mine
+også | also
+under | under/beneath/below/during, below/underneath
+have | have
+dig | you
+anden | other
+hende | her
+mine | my
+alt | everything
+meget | much/very, plenty of
+sit | his, her, its, one's
+sine | his, her, its, one's
+vor | our
+mod | against
+disse | these
+hvis | if
+din | your/yours
+nogle | some
+hos | by/at
+blive | be/become
+mange | many
+ad | by/through
+bliver | present tense of "to be/to become"
+hendes | her/hers
+været | be
+thi | for (conj)
+jer | you
+sådan | such, like this/like that
--- /dev/null
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber | but
+
+alle | all
+allem
+allen
+aller
+alles
+
+als | than, as
+also | so
+am | an + dem
+an | at
+
+ander | other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch | also
+auf | on
+aus | out of
+bei | by
+bin | am
+bis | until
+bist | art
+da | there
+damit | with it
+dann | then
+
+der | the
+den
+des
+dem
+die
+das
+
+daß | that
+
+derselbe | the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu | to that
+
+dein | thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn | because
+
+derer | of those
+dessen | of him
+
+dich | thee
+dir | to thee
+du | thou
+
+dies | this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch | (several meanings)
+dort | (over) there
+
+
+durch | through
+
+ein | a
+eine
+einem
+einen
+einer
+eines
+
+einig | some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal | once
+
+er | he
+ihn | him
+ihm | to him
+
+es | it
+etwas | something
+
+euer | your
+eure
+eurem
+euren
+eurer
+eures
+
+für | for
+gegen | towards
+gewesen | p.p. of sein
+hab | have
+habe | have
+haben | have
+hat | has
+hatte | had
+hatten | had
+hier | here
+hin | there
+hinter | behind
+
+ich | I
+mich | me
+mir | to me
+
+
+ihr | you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch | to you
+
+im | in + dem
+in | in
+indem | while
+ins | in + das
+ist | is
+
+jede | each, every
+jedem
+jeden
+jeder
+jedes
+
+jene | that
+jenem
+jenen
+jener
+jenes
+
+jetzt | now
+kann | can
+
+kein | no
+keine
+keinem
+keinen
+keiner
+keines
+
+können | can
+könnte | could
+machen | do
+man | one
+
+manche | some, many a
+manchem
+manchen
+mancher
+manches
+
+mein | my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit | with
+muss | must
+musste | had to
+nach | to(wards)
+nicht | not
+nichts | nothing
+noch | still, yet
+nun | now
+nur | only
+ob | whether
+oder | or
+ohne | without
+sehr | very
+
+sein | his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst | self
+sich | herself
+
+sie | they, she
+ihnen | to them
+
+sind | are
+so | so
+
+solche | such
+solchem
+solchen
+solcher
+solches
+
+soll | shall
+sollte | should
+sondern | but
+sonst | else
+über | over
+um | about, around
+und | and
+
+uns | us
+unse
+unsem
+unsen
+unser
+unses
+
+unter | under
+viel | much
+vom | von + dem
+von | from
+vor | before
+während | while
+war | was
+waren | were
+warst | wast
+was | what
+weg | away, off
+weil | because
+weiter | further
+
+welche | which
+welchem
+welchen
+welcher
+welches
+
+wenn | when
+werde | will
+werden | will
+wie | how
+wieder | again
+will | want
+wir | we
+wird | will
+wirst | willst
+wo | where
+wollen | want
+wollte | wanted
+würde | would
+würden | would
+zu | to
+zum | zu + dem
+zur | zu + der
+zwar | indeed
+zwischen | between
+
--- /dev/null
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | from, of
+la | the, her
+que | who, that
+el | the
+en | in
+y | and
+a | to
+los | the, them
+del | de + el
+se | himself, from him etc
+las | the, them
+por | for, by, etc
+un | a
+para | for
+con | with
+no | no
+una | a
+su | his, her
+al | a + el
+ | es from SER
+lo | him
+como | how
+más | more
+pero | pero
+sus | su plural
+le | to him, her
+ya | already
+o | or
+ | fue from SER
+este | this
+ | ha from HABER
+sí | himself etc
+porque | because
+esta | this
+ | son from SER
+entre | between
+ | está from ESTAR
+cuando | when
+muy | very
+sin | without
+sobre | on
+ | ser from SER
+ | tiene from TENER
+también | also
+me | me
+hasta | until
+hay | there is/are
+donde | where
+ | han from HABER
+quien | whom, that
+ | están from ESTAR
+ | estado from ESTAR
+desde | from
+todo | all
+nos | us
+durante | during
+ | estados from ESTAR
+todos | all
+uno | a
+les | to them
+ni | nor
+contra | against
+otros | other
+ | fueron from SER
+ese | that
+eso | that
+ | había from HABER
+ante | before
+ellos | they
+e | and (variant of y)
+esto | this
+mí | me
+antes | before
+algunos | some
+qué | what?
+unos | a
+yo | I
+otro | other
+otras | other
+otra | other
+él | he
+tanto | so much, many
+esa | that
+estos | these
+mucho | much, many
+quienes | who
+nada | nothing
+muchos | many
+cual | who
+ | sea from SER
+poco | few
+ella | she
+estar | to be
+ | haber from HABER
+estas | these
+ | estaba from ESTAR
+ | estamos from ESTAR
+algunas | some
+algo | something
+nosotros | we
+
+ | other forms
+
+mi | me
+mis | mi plural
+tú | thou
+te | thee
+ti | thee
+tu | thy
+tus | tu plural
+ellas | they
+nosotras | we
+vosotros | you
+vosotras | you
+os | you
+mío | mine
+mía |
+míos |
+mías |
+tuyo | thine
+tuya |
+tuyos |
+tuyas |
+suyo | his, hers, theirs
+suya |
+suyos |
+suyas |
+nuestro | ours
+nuestra |
+nuestros |
+nuestras |
+vuestro | yours
+vuestra |
+vuestros |
+vuestras |
+esos | those
+esas | those
+
+ | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estaría
+estarías
+estaríamos
+estaríais
+estarían
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+ | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habría
+habrías
+habríamos
+habríais
+habrían
+había
+habías
+habíamos
+habíais
+habían
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+ | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+sería
+serías
+seríamos
+seríais
+serían
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+ | sed also means 'thirst'
+
+ | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendría
+tendrías
+tendríamos
+tendríais
+tendrían
+tenía
+tenías
+teníamos
+teníais
+tenían
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
--- /dev/null
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad | a (to) before vowel
+al | a + il
+allo | a + lo
+ai | a + i
+agli | a + gli
+all | a + l'
+agl | a + gl'
+alla | a + la
+alle | a + le
+con | with
+col | con + il
+coi | con + i (forms collo, cogli etc are now very rare)
+da | from
+dal | da + il
+dallo | da + lo
+dai | da + i
+dagli | da + gli
+dall | da + l'
+dagl | da + gll'
+dalla | da + la
+dalle | da + le
+di | of
+del | di + il
+dello | di + lo
+dei | di + i
+degli | di + gli
+dell | di + l'
+degl | di + gl'
+della | di + la
+delle | di + le
+in | in
+nel | in + el
+nello | in + lo
+nei | in + i
+negli | in + gli
+nell | in + l'
+negl | in + gl'
+nella | in + la
+nelle | in + le
+su | on
+sul | su + il
+sullo | su + lo
+sui | su + i
+sugli | su + gli
+sull | su + l'
+sugl | su + gl'
+sulla | su + la
+sulle | su + le
+per | through, by
+tra | among
+contro | against
+io | I
+tu | thou
+lui | he
+lei | she
+noi | we
+voi | you
+loro | they
+mio | my
+mia |
+miei |
+mie |
+tuo |
+tua |
+tuoi | thy
+tue |
+suo |
+sua |
+suoi | his, her
+sue |
+nostro | our
+nostra |
+nostri |
+nostre |
+vostro | your
+vostra |
+vostri |
+vostre |
+mi | me
+ti | thee
+ci | us, there
+vi | you, there
+lo | him, the
+la | her, the
+li | them
+le | them, the
+gli | to him, the
+ne | from there etc
+il | the
+un | a
+uno | a
+una | a
+ma | but
+ed | and
+se | if
+perché | why, because
+anche | also
+come | how
+dov | where (as dov')
+dove | where
+che | who, that
+chi | who
+cui | whom
+non | not
+più | more
+quale | who, that
+quanto | how much
+quanti |
+quanta |
+quante |
+quello | that
+quelli |
+quella |
+quelle |
+questo | this
+questi |
+questa |
+queste |
+si | yes
+tutto | all
+tutti | all
+
+ | single letter forms:
+
+a | at
+c | as c' for ce or ci
+e | and
+i | the
+l | as l'
+o | or
+
+ | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrà
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+ | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarà
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+ | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farà
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+ | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starà
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
--- /dev/null
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
--- /dev/null
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | of, from
+a | the; to, at; her
+o | the; him
+que | who, that
+e | and
+do | de + o
+da | de + a
+em | in
+um | a
+para | for
+ | é from SER
+com | with
+não | not, no
+uma | a
+os | the; them
+no | em + o
+se | himself etc
+na | em + a
+por | for
+mais | more
+as | the; them
+dos | de + os
+como | as, like
+mas | but
+ | foi from SER
+ao | a + o
+ele | he
+das | de + as
+ | tem from TER
+à | a + a
+seu | his
+sua | her
+ou | or
+ | ser from SER
+quando | when
+muito | much
+ | há from HAV
+nos | em + os; us
+já | already, now
+ | está from EST
+eu | I
+também | also
+só | only, just
+pelo | per + o
+pela | per + a
+até | up to
+isso | that
+ela | he
+entre | between
+ | era from SER
+depois | after
+sem | without
+mesmo | same
+aos | a + os
+ | ter from TER
+seus | his
+quem | whom
+nas | em + as
+me | me
+esse | that
+eles | they
+ | estão from EST
+você | you
+ | tinha from TER
+ | foram from SER
+essa | that
+num | em + um
+nem | nor
+suas | her
+meu | my
+às | a + as
+minha | my
+ | têm from TER
+numa | em + uma
+pelos | per + os
+elas | they
+ | havia from HAV
+ | seja from SER
+qual | which
+ | será from SER
+nós | we
+ | tenho from TER
+lhe | to him, her
+deles | of them
+essas | those
+esses | those
+pelas | per + as
+este | this
+ | fosse from SER
+dele | of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu | thou
+te | thee
+vocês | you (plural)
+vos | you
+lhes | to them
+meus | my
+minhas
+teu | thy
+tua
+teus
+tuas
+nosso | our
+nossa
+nossos
+nossas
+
+dela | of her
+delas | of them
+
+esta | this
+estes | these
+estas | these
+aquele | that
+aquela | that
+aqueles | those
+aquelas | those
+isto | this
+aquilo | that
+
+ | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+ | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houveríamos
+houveriam
+
+ | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+seríamos
+seriam
+
+ | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tínhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+teríamos
+teriam
--- /dev/null
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceşti
+aceştia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aş
+aşadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deşi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eşti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+nişte
+noastră
+noastre
+noi
+noştri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+şi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+ţi
+ţie
+tine
+toată
+toate
+tot
+toţi
+totuşi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voştri
+vostru
+vouă
+vreo
+vreun
--- /dev/null
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и | and
+в | in/into
+во | alternative form
+не | not
+что | what/that
+он | he
+на | on/onto
+я | i
+с | from
+со | alternative form
+как | how
+а | milder form of `no' (but)
+то | conjunction and form of `that'
+все | all
+она | she
+так | so, thus
+его | him
+но | but
+да | yes/and
+ты | thou
+к | towards, by
+у | around, chez
+же | intensifier particle
+вы | you
+за | beyond, behind
+бы | conditional/subj. particle
+по | up to, along
+только | only
+ее | her
+мне | to me
+было | it was
+вот | here is/are, particle
+от | away from
+меня | me
+еще | still, yet, more
+нет | no, there isnt/arent
+о | about
+из | out of
+ему | to him
+теперь | now
+когда | when
+даже | even
+ну | so, well
+вдруг | suddenly
+ли | interrogative particle
+если | if
+уже | already, but homonym of `narrower'
+или | or
+ни | neither
+быть | to be
+был | he was
+него | prepositional form of его
+до | up to
+вас | you accusative
+нибудь | indef. suffix preceded by hyphen
+опять | again
+уж | already, but homonym of `adder'
+вам | to you
+сказал | he said
+ведь | particle `after all'
+там | there
+потом | then
+себя | oneself
+ничего | nothing
+ей | to her
+может | usually with `быть' as `maybe'
+они | they
+тут | here
+где | where
+есть | there is/are
+надо | got to, must
+ней | prepositional form of ей
+для | for
+мы | we
+тебя | thee
+их | them, their
+чем | than
+была | she was
+сам | self
+чтоб | in order to
+без | without
+будто | as if
+человек | man, person, one
+чего | genitive form of `what'
+раз | once
+тоже | also
+себе | to oneself
+под | beneath
+жизнь | life
+будет | will be
+ж | short form of intensifer particle `же'
+тогда | then
+кто | who
+этот | this
+говорил | was saying
+того | genitive form of `that'
+потому | for that reason
+этого | genitive form of `this'
+какой | which
+совсем | altogether
+ним | prepositional form of `его', `они'
+здесь | here
+этом | prepositional form of `этот'
+один | one
+почти | almost
+мой | my
+тем | instrumental/dative plural of `тот', `то'
+чтобы | full form of `in order that'
+нее | her (acc.)
+кажется | it seems
+сейчас | now
+были | they were
+куда | where to
+зачем | why
+сказать | to say
+всех | all (acc., gen. preposn. plural)
+никогда | never
+сегодня | today
+можно | possible, one can
+при | by
+наконец | finally
+два | two
+об | alternative form of `о', about
+другой | another
+хоть | even
+после | after
+над | above
+больше | more
+тот | that one (masc.)
+через | across, in
+эти | these
+нас | us
+про | about
+всего | in all, only, of all
+них | prepositional form of `они' (they)
+какая | which, feminine
+много | lots
+разве | interrogative particle
+сказала | she said
+три | three
+эту | this, acc. fem. sing.
+моя | my, feminine
+впрочем | moreover, besides
+хорошо | good
+свою | ones own, acc. fem. sing.
+этой | oblique form of `эта', fem. `this'
+перед | in front of
+иногда | sometimes
+лучше | better
+чуть | a little
+том | preposn. form of `that one'
+нельзя | one must not
+такой | such a one
+им | to them
+более | more
+всегда | always
+конечно | of course
+всю | acc. fem. sing of `all'
+между | between
+
+
+ | b: some paradigms
+ |
+ | personal pronouns
+ |
+ | я меня мне мной [мною]
+ | ты тебя тебе тобой [тобою]
+ | он его ему им [него, нему, ним]
+ | она ее эи ею [нее, нэи, нею]
+ | оно его ему им [него, нему, ним]
+ |
+ | мы нас нам нами
+ | вы вас вам вами
+ | они их им ими [них, ним, ними]
+ |
+ | себя себе собой [собою]
+ |
+ | demonstrative pronouns: этот (this), тот (that)
+ |
+ | этот эта это эти
+ | этого эты это эти
+ | этого этой этого этих
+ | этому этой этому этим
+ | этим этой этим [этою] этими
+ | этом этой этом этих
+ |
+ | тот та то те
+ | того ту то те
+ | того той того тех
+ | тому той тому тем
+ | тем той тем [тою] теми
+ | том той том тех
+ |
+ | determinative pronouns
+ |
+ | (a) весь (all)
+ |
+ | весь вся все все
+ | всего всю все все
+ | всего всей всего всех
+ | всему всей всему всем
+ | всем всей всем [всею] всеми
+ | всем всей всем всех
+ |
+ | (b) сам (himself etc)
+ |
+ | сам сама само сами
+ | самого саму само самих
+ | самого самой самого самих
+ | самому самой самому самим
+ | самим самой самим [самою] самими
+ | самом самой самом самих
+ |
+ | stems of verbs `to be', `to have', `to do' and modal
+ |
+ | быть бы буд быв есть суть
+ | име
+ | дел
+ | мог мож мочь
+ | уме
+ | хоч хот
+ | долж
+ | можн
+ | нужн
+ | нельзя
+