]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Fuzzy check uses already normalized words.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 23 Feb 2015 14:29:04 +0000 (14:29 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 23 Feb 2015 14:29:04 +0000 (14:29 +0000)
src/plugins/fuzzy_check.c

index 2dd05a109c8d810797b24876c8fbe48c4f16f520..b1c24c6b8a0ab2c8a038aa3428a320eba3513aca 100644 (file)
@@ -49,7 +49,6 @@
 #include "main.h"
 #include "blake2.h"
 #include "ottery.h"
-#include "libstemmer.h"
 
 #define DEFAULT_SYMBOL "R_FUZZY_HASH"
 #define DEFAULT_UPSTREAM_ERROR_TIME 10
@@ -534,50 +533,18 @@ fuzzy_io_fin (void *ud)
        close (session->fd);
 }
 
-static void
-fuzzy_g_array_destructor (gpointer a)
-{
-       GArray *ar = (GArray *)a;
-
-       g_array_free (ar, TRUE);
-}
-
 static GArray *
 fuzzy_preprocess_words (struct mime_text_part *part, rspamd_mempool_t *pool)
 {
        GArray *res;
-       struct sb_stemmer *stem;
-       rspamd_fstring_t *w, stw;
-       const guchar *r;
-       guint i;
 
        if (!part->is_utf || !part->language || part->language[0] == '\0') {
                res = part->words;
        }
        else {
-               /* Lemmatize words */
-               stem = sb_stemmer_new (part->language, "UTF_8");
-               if (stem == NULL) {
-                       msg_debug ("cannot lemmatize %s language", part->language);
-                       res = part->words;
-               }
-               else {
-                       res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_fstring_t),
-                                       part->words->len);
-                       for (i = 0; i < part->words->len; i ++) {
-                               w = &g_array_index (part->words, rspamd_fstring_t, i);
-                               r = sb_stemmer_stem (stem, w->begin, w->len);
-                               if (r != NULL) {
-                                       stw.begin = rspamd_mempool_strdup (pool, r);
-                                       stw.len = strlen (r);
-                                       rspamd_str_lc (stw.begin, stw.len);
-                                       g_array_append_val (res, stw);
-                               }
-                       }
-                       rspamd_mempool_add_destructor (pool, fuzzy_g_array_destructor, res);
-                       sb_stemmer_delete (stem);
-               }
+               res = part->normalized_words;
        }
+
        return res;
 }