]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-language: Rename functions and macros
authorMarkus Valentin <markus.valentin@open-xchange.com>
Fri, 10 Nov 2023 17:09:38 +0000 (18:09 +0100)
committerAki Tuomi <aki.tuomi@open-xchange.com>
Wed, 12 Feb 2025 10:34:10 +0000 (12:34 +0200)
This replaces all fts mentions with lang to complete the lib-fts to
lib-lang renaming.

39 files changed:
src/lib-language/lang-common.h
src/lib-language/lang-filter-common.c
src/lib-language/lang-filter-common.h
src/lib-language/lang-filter-contractions.c
src/lib-language/lang-filter-english-possessive.c
src/lib-language/lang-filter-lowercase.c
src/lib-language/lang-filter-normalizer-icu.c
src/lib-language/lang-filter-private.h
src/lib-language/lang-filter-stemmer-snowball.c
src/lib-language/lang-filter-stopwords.c
src/lib-language/lang-filter.c
src/lib-language/lang-filter.h
src/lib-language/lang-icu.c
src/lib-language/lang-icu.h
src/lib-language/lang-indexer-status.h
src/lib-language/lang-library.c
src/lib-language/lang-library.h
src/lib-language/lang-tokenizer-address.c
src/lib-language/lang-tokenizer-common.c
src/lib-language/lang-tokenizer-common.h
src/lib-language/lang-tokenizer-generic-private.h
src/lib-language/lang-tokenizer-generic.c
src/lib-language/lang-tokenizer-private.h
src/lib-language/lang-tokenizer.c
src/lib-language/lang-tokenizer.h
src/lib-language/language.c
src/lib-language/language.h
src/lib-language/test-lang-filter.c
src/lib-language/test-lang-icu.c
src/lib-language/test-lang-tokenizer.c
src/lib-language/test-language.c
src/plugins/fts/doveadm-fts.c
src/plugins/fts/fts-api-private.h
src/plugins/fts/fts-api.c
src/plugins/fts/fts-build-mail.c
src/plugins/fts/fts-plugin.c
src/plugins/fts/fts-search-args.c
src/plugins/fts/fts-user.c
src/plugins/fts/fts-user.h

index 1a1446390a6386f2e88bf759152def1d58387bfb..7a5d58c6801541b30e28104064ba01830ae22258 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef FTS_COMMON_H
-#define FTS_COMMON_H
+#ifndef LANG_COMMON_H
+#define LANG_COMMON_H
 
 /* Some might consider 0x02BB an apostrophe also. */
 #define IS_NONASCII_APOSTROPHE(c) \
@@ -8,9 +8,9 @@
        ((c) == 0x0027 || IS_NONASCII_APOSTROPHE(c))
 #define IS_WB5A_APOSTROPHE(c) \
        ((c) == 0x0027 || (c) == 0x2019)
-#define FTS_PREFIX_SPLAT_CHAR 0x002A /* '*' */
+#define LANG_PREFIX_SPLAT_CHAR 0x002A /* '*' */
 #define IS_PREFIX_SPLAT(c) \
-       ((c) == FTS_PREFIX_SPLAT_CHAR)
+       ((c) == LANG_PREFIX_SPLAT_CHAR)
 /* The h letters are included because it is an exception in French.
    A, E, H, I, O, U, Y, a, e, h, i, o, u, y */
 #define IS_ASCII_VOWEL(c) \
index 75f006e0cf17ce7fb303f6709debb2ab9e1d84c4..ba14101ae13da7f1baad0e69311075575857ba47 100644 (file)
@@ -8,13 +8,13 @@
 #include "lang-filter-common.h"
 #include "lang-tokenizer-common.h"
 
-void fts_filter_truncate_token(string_t *token, size_t max_length)
+void lang_filter_truncate_token(string_t *token, size_t max_length)
 {
        if (str_len(token) <= max_length)
                return;
 
        size_t len = max_length;
-       fts_tokenizer_delete_trailing_partial_char(token->data, &len);
+       lang_tokenizer_delete_trailing_partial_char(token->data, &len);
        str_truncate(token, len);
        i_assert(len <= max_length);
 }
index 7b6552cf5d97525d87a0f33477972760fb5c8fc3..08ca137e189b79e8cfcdabccb10c083c2c2c1419 100644 (file)
@@ -1,6 +1,6 @@
-#ifndef FTS_FILTER_COMMON_H
-#define FTS_FILTER_COMMON_H
+#ifndef LANG_FILTER_COMMON_H
+#define LANG_FILTER_COMMON_H
 
-void fts_filter_truncate_token(string_t *token, size_t max_length);
+void lang_filter_truncate_token(string_t *token, size_t max_length);
 
 #endif
index 7531172998ba34f4c719ea0a982835418c981ad6..a2257183510f2cf0617feb063e251fa0a3242557 100644 (file)
@@ -8,12 +8,12 @@
 #include "unichar.h"
 
 static int
-fts_filter_contractions_create(const struct fts_language *lang,
+lang_filter_contractions_create(const struct language *lang,
                               const char *const *settings,
-                              struct fts_filter **filter_r,
+                              struct lang_filter **filter_r,
                               const char **error_r)
 {
-       struct fts_filter *filter;
+       struct lang_filter *filter;
 
        if (settings[0] != NULL) {
                *error_r = t_strdup_printf("Unknown setting: %s", settings[0]);
@@ -24,15 +24,15 @@ fts_filter_contractions_create(const struct fts_language *lang,
                return -1;
        }
 
-       filter = i_new(struct fts_filter, 1);
-       *filter = *fts_filter_contractions;
+       filter = i_new(struct lang_filter, 1);
+       *filter = *lang_filter_contractions;
        filter->token = str_new(default_pool, 64);
        *filter_r = filter;
        return 0;
 }
 
 static int
-fts_filter_contractions_filter(struct fts_filter *filter ATTR_UNUSED,
+lang_filter_contractions_filter(struct lang_filter *filter ATTR_UNUSED,
                            const char **_token,
                            const char **error_r ATTR_UNUSED)
 {
@@ -74,13 +74,13 @@ fts_filter_contractions_filter(struct fts_filter *filter ATTR_UNUSED,
        return 1;
 }
 
-static const struct fts_filter fts_filter_contractions_real = {
+static const struct lang_filter lang_filter_contractions_real = {
        .class_name = "contractions",
        .v = {
-               fts_filter_contractions_create,
-               fts_filter_contractions_filter,
+               lang_filter_contractions_create,
+               lang_filter_contractions_filter,
                NULL
        }
 };
 
-const struct fts_filter *fts_filter_contractions = &fts_filter_contractions_real;
+const struct lang_filter *lang_filter_contractions = &lang_filter_contractions_real;
index 33c809e729151352a3363310ab243e34caa3b8a7..192ef05c00c32f20c220ef15549ba930d5a02fde 100644 (file)
@@ -19,7 +19,7 @@ static unichar_t get_ending_utf8_char(const char *str, size_t *end_pos)
 }
 
 static int
-fts_filter_english_possessive_filter(struct fts_filter *filter ATTR_UNUSED,
+lang_filter_english_possessive_filter(struct lang_filter *filter ATTR_UNUSED,
                                     const char **token,
                                     const char **error_r ATTR_UNUSED)
 {
@@ -35,13 +35,13 @@ fts_filter_english_possessive_filter(struct fts_filter *filter ATTR_UNUSED,
        return 1;
 }
 
-static const struct fts_filter fts_filter_english_possessive_real = {
+static const struct lang_filter lang_filter_english_possessive_real = {
        .class_name = "english-possessive",
        .v = {
                NULL,
-               fts_filter_english_possessive_filter,
+               lang_filter_english_possessive_filter,
                NULL
        }
 };
 
-const struct fts_filter *fts_filter_english_possessive = &fts_filter_english_possessive_real;
+const struct lang_filter *lang_filter_english_possessive = &lang_filter_english_possessive_real;
index 96ab82bfeb720a4fd2832c8730816ebd778d230a..15775ff7251f76e75549c43587460a809e8fe319 100644 (file)
 #endif
 
 static int
-fts_filter_lowercase_create(const struct fts_language *lang ATTR_UNUSED,
-                            const char *const *settings,
-                            struct fts_filter **filter_r,
-                            const char **error_r)
+lang_filter_lowercase_create(const struct language *lang ATTR_UNUSED,
+                            const char *const *settings,
+                            struct lang_filter **filter_r,
+                            const char **error_r)
 {
-       struct fts_filter *filter;
+       struct lang_filter *filter;
        unsigned int i, max_length = 250;
 
        for (i = 0; settings[i] != NULL; i += 2) {
@@ -34,8 +34,8 @@ fts_filter_lowercase_create(const struct fts_language *lang ATTR_UNUSED,
                        return -1;
                }
        }
-       filter = i_new(struct fts_filter, 1);
-       *filter = *fts_filter_lowercase;
+       filter = i_new(struct lang_filter, 1);
+       *filter = *lang_filter_lowercase;
        filter->token = str_new(default_pool, 64);
        filter->max_length = max_length;
 
@@ -44,14 +44,14 @@ fts_filter_lowercase_create(const struct fts_language *lang ATTR_UNUSED,
 }
 
 static int
-fts_filter_lowercase_filter(struct fts_filter *filter ATTR_UNUSED,
-                            const char **token,
-                            const char **error_r ATTR_UNUSED)
+lang_filter_lowercase_filter(struct lang_filter *filter ATTR_UNUSED,
+                            const char **token,
+                            const char **error_r ATTR_UNUSED)
 {
 #ifdef HAVE_LIBICU
        str_truncate(filter->token, 0);
-       fts_icu_lcase(filter->token, *token);
-       fts_filter_truncate_token(filter->token, filter->max_length);
+       lang_icu_lcase(filter->token, *token);
+       lang_filter_truncate_token(filter->token, filter->max_length);
        *token = str_c(filter->token);
 #else
        *token = t_str_lcase(*token);
@@ -59,13 +59,13 @@ fts_filter_lowercase_filter(struct fts_filter *filter ATTR_UNUSED,
        return 1;
 }
 
-static const struct fts_filter fts_filter_lowercase_real = {
+static const struct lang_filter lang_filter_lowercase_real = {
        .class_name = "lowercase",
        .v = {
-               fts_filter_lowercase_create,
-               fts_filter_lowercase_filter,
+               lang_filter_lowercase_create,
+               lang_filter_lowercase_filter,
                NULL
        }
 };
 
-const struct fts_filter *fts_filter_lowercase = &fts_filter_lowercase_real;
+const struct lang_filter *lang_filter_lowercase = &lang_filter_lowercase_real;
index fb66423d96272e9f17d7a840961d15509340ca47..d6f8339e8c1287fd6a26957703ff39b8c19b9aa1 100644 (file)
@@ -11,8 +11,8 @@
 #ifdef HAVE_LIBICU
 #include "lang-icu.h"
 
-struct fts_filter_normalizer_icu {
-       struct fts_filter filter;
+struct lang_filter_normalizer_icu {
+       struct lang_filter filter;
        pool_t pool;
        const char *transliterator_id;
 
@@ -21,10 +21,10 @@ struct fts_filter_normalizer_icu {
        string_t *utf8_token;
 };
 
-static void fts_filter_normalizer_icu_destroy(struct fts_filter *filter)
+static void lang_filter_normalizer_icu_destroy(struct lang_filter *filter)
 {
-       struct fts_filter_normalizer_icu *np =
-               (struct fts_filter_normalizer_icu *)filter;
+       struct lang_filter_normalizer_icu *np =
+               (struct lang_filter_normalizer_icu *)filter;
 
        if (np->transliterator != NULL)
                utrans_close(np->transliterator);
@@ -32,12 +32,12 @@ static void fts_filter_normalizer_icu_destroy(struct fts_filter *filter)
 }
 
 static int
-fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
-                                const char *const *settings,
-                                struct fts_filter **filter_r,
-                                const char **error_r)
+lang_filter_normalizer_icu_create(const struct language *lang ATTR_UNUSED,
+                                 const char *const *settings,
+                                 struct lang_filter **filter_r,
+                                 const char **error_r)
 {
-       struct fts_filter_normalizer_icu *np;
+       struct lang_filter_normalizer_icu *np;
        pool_t pp;
        unsigned int i, max_length = 250;
        const char *id = "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC; [\\x20] Remove";
@@ -59,11 +59,11 @@ fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
                }
        }
 
-       pp = pool_alloconly_create(MEMPOOL_GROWING"fts_filter_normalizer_icu",
-                                  sizeof(struct fts_filter_normalizer_icu));
-       np = p_new(pp, struct fts_filter_normalizer_icu, 1);
+       pp = pool_alloconly_create(MEMPOOL_GROWING"lang_filter_normalizer_icu",
+                                  sizeof(struct lang_filter_normalizer_icu));
+       np = p_new(pp, struct lang_filter_normalizer_icu, 1);
        np->pool = pp;
-       np->filter = *fts_filter_normalizer_icu;
+       np->filter = *lang_filter_normalizer_icu;
        np->transliterator_id = p_strdup(pp, id);
        p_array_init(&np->utf16_token, pp, 64);
        p_array_init(&np->trans_token, pp, 64);
@@ -74,33 +74,33 @@ fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
 }
 
 static int
-fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token,
+lang_filter_normalizer_icu_filter(struct lang_filter *filter, const char **token,
                                 const char **error_r)
 {
-       struct fts_filter_normalizer_icu *np =
-               (struct fts_filter_normalizer_icu *)filter;
+       struct lang_filter_normalizer_icu *np =
+               (struct lang_filter_normalizer_icu *)filter;
 
        if (np->transliterator == NULL)
-               if (fts_icu_transliterator_create(np->transliterator_id,
-                                                 &np->transliterator,
-                                                 error_r) < 0)
+               if (lang_icu_transliterator_create(np->transliterator_id,
+                                                  &np->transliterator,
+                                                  error_r) < 0)
                        return -1;
 
-       fts_icu_utf8_to_utf16(&np->utf16_token, *token);
+       lang_icu_utf8_to_utf16(&np->utf16_token, *token);
        array_append_zero(&np->utf16_token);
        array_pop_back(&np->utf16_token);
        array_clear(&np->trans_token);
-       if (fts_icu_translate(&np->trans_token, array_front(&np->utf16_token),
-                             array_count(&np->utf16_token),
-                             np->transliterator, error_r) < 0)
+       if (lang_icu_translate(&np->trans_token, array_front(&np->utf16_token),
+                              array_count(&np->utf16_token),
+                              np->transliterator, error_r) < 0)
                return -1;
 
        if (array_count(&np->trans_token) == 0)
                return 0;
 
-       fts_icu_utf16_to_utf8(np->utf8_token, array_front(&np->trans_token),
+       lang_icu_utf16_to_utf8(np->utf8_token, array_front(&np->trans_token),
                              array_count(&np->trans_token));
-       fts_filter_truncate_token(np->utf8_token, np->filter.max_length);
+       lang_filter_truncate_token(np->utf8_token, np->filter.max_length);
        *token = str_c(np->utf8_token);
        return 1;
 }
@@ -108,38 +108,38 @@ fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token,
 #else
 
 static int
-fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
-                                const char *const *settings ATTR_UNUSED,
-                                struct fts_filter **filter_r ATTR_UNUSED,
-                                const char **error_r)
+lang_filter_normalizer_icu_create(const struct language *lang ATTR_UNUSED,
+                                 const char *const *settings ATTR_UNUSED,
+                                 struct lang_filter **filter_r ATTR_UNUSED,
+                                 const char **error_r)
 {
        *error_r = "libicu support not built in";
        return -1;
 }
 
 static int
-fts_filter_normalizer_icu_filter(struct fts_filter *filter ATTR_UNUSED,
-                                const char **token ATTR_UNUSED,
-                                const char **error_r ATTR_UNUSED)
+lang_filter_normalizer_icu_filter(struct lang_filter *filter ATTR_UNUSED,
+                                 const char **token ATTR_UNUSED,
+                                 const char **error_r ATTR_UNUSED)
 {
        return -1;
 }
 
 static void
-fts_filter_normalizer_icu_destroy(struct fts_filter *normalizer ATTR_UNUSED)
+lang_filter_normalizer_icu_destroy(struct lang_filter *normalizer ATTR_UNUSED)
 {
 }
 
 #endif
 
-static const struct fts_filter fts_filter_normalizer_icu_real = {
+static const struct lang_filter lang_filter_normalizer_icu_real = {
        .class_name = "normalizer-icu",
        .v = {
-               fts_filter_normalizer_icu_create,
-               fts_filter_normalizer_icu_filter,
-               fts_filter_normalizer_icu_destroy
+               lang_filter_normalizer_icu_create,
+               lang_filter_normalizer_icu_filter,
+               lang_filter_normalizer_icu_destroy
        }
 };
 
-const struct fts_filter *fts_filter_normalizer_icu =
-       &fts_filter_normalizer_icu_real;
+const struct lang_filter *lang_filter_normalizer_icu =
+       &lang_filter_normalizer_icu_real;
index a43422dc848e5963dcf90f77aec69d6834acd9a5..d2e755cfda20d2623de6dbf83240f8547978c748 100644 (file)
@@ -1,9 +1,9 @@
-#ifndef FTS_FILTER_PRIVATE_H
-#define FTS_FILTER_PRIVATE_H
+#ifndef LANG_FILTER_PRIVATE_H
+#define LANG_FILTER_PRIVATE_H
 
 #include "lang-filter.h"
 
-#define FTS_FILTER_CLASSES_NR 6
+#define LANG_FILTER_CLASSES_NR 6
 
 /*
  API that stemming providers (classes) must provide: The create()
  The destroy function is called to destroy an instance of a filter.
 
 */
-struct fts_filter_vfuncs {
-       int (*create)(const struct fts_language *lang,
+struct lang_filter_vfuncs {
+       int (*create)(const struct language *lang,
                      const char *const *settings,
-                     struct fts_filter **filter_r,
+                     struct lang_filter **filter_r,
                      const char **error_r);
-       int (*filter)(struct fts_filter *filter, const char **token,
+       int (*filter)(struct lang_filter *filter, const char **token,
                      const char **error_r);
 
-       void (*destroy)(struct fts_filter *filter);
+       void (*destroy)(struct lang_filter *filter);
 };
 
-struct fts_filter {
+struct lang_filter {
        const char *class_name; /* name of the class this is based on */
-       struct fts_filter_vfuncs v;
-       struct fts_filter *parent;
+       struct lang_filter_vfuncs v;
+       struct lang_filter *parent;
        string_t *token;
        size_t max_length;
        int refcount;
index 4c2047cec78642902c6b17083a5e22be5ec2c1ac..91fb16ef9ffb0bc0a6b2e0c254042b2483a9d812 100644 (file)
@@ -8,17 +8,17 @@
 
 #include <libstemmer.h>
 
-struct fts_filter_stemmer_snowball {
-       struct fts_filter filter;
+struct lang_filter_stemmer_snowball {
+       struct lang_filter filter;
        pool_t pool;
-       struct fts_language *lang;
+       struct language *lang;
        struct sb_stemmer *stemmer;
 };
 
-static void fts_filter_stemmer_snowball_destroy(struct fts_filter *filter)
+static void lang_filter_stemmer_snowball_destroy(struct lang_filter *filter)
 {
-       struct fts_filter_stemmer_snowball *sp =
-               (struct fts_filter_stemmer_snowball *)filter;
+       struct lang_filter_stemmer_snowball *sp =
+               (struct lang_filter_stemmer_snowball *)filter;
 
        if (sp->stemmer != NULL)
                sb_stemmer_delete(sp->stemmer);
@@ -26,12 +26,12 @@ static void fts_filter_stemmer_snowball_destroy(struct fts_filter *filter)
 }
 
 static int
-fts_filter_stemmer_snowball_create(const struct fts_language *lang,
-                                   const char *const *settings,
-                                   struct fts_filter **filter_r,
-                                   const char **error_r)
+lang_filter_stemmer_snowball_create(const struct language *lang,
+                                    const char *const *settings,
+                                    struct lang_filter **filter_r,
+                                    const char **error_r)
 {
-       struct fts_filter_stemmer_snowball *sp;
+       struct lang_filter_stemmer_snowball *sp;
        pool_t pp;
 
        *filter_r = NULL;
@@ -40,42 +40,42 @@ fts_filter_stemmer_snowball_create(const struct fts_language *lang,
                *error_r = t_strdup_printf("Unknown setting: %s", settings[0]);
                return -1;
        }
-       pp = pool_alloconly_create(MEMPOOL_GROWING"fts_filter_stemmer_snowball",
-                                  sizeof(struct fts_filter));
-       sp = p_new(pp, struct fts_filter_stemmer_snowball, 1);
+       pp = pool_alloconly_create(MEMPOOL_GROWING"lang_filter_stemmer_snowball",
+                                  sizeof(struct lang_filter));
+       sp = p_new(pp, struct lang_filter_stemmer_snowball, 1);
        sp->pool = pp;
-       sp->filter = *fts_filter_stemmer_snowball;
-       sp->lang = p_malloc(sp->pool, sizeof(struct fts_language));
+       sp->filter = *lang_filter_stemmer_snowball;
+       sp->lang = p_malloc(sp->pool, sizeof(struct language));
        sp->lang->name = p_strdup(sp->pool, lang->name);
        *filter_r = &sp->filter;
        return 0;
 }
 
 static int
-fts_filter_stemmer_snowball_create_stemmer(struct fts_filter_stemmer_snowball *sp,
-                                          const char **error_r)
+lang_filter_stemmer_snowball_create_stemmer(struct lang_filter_stemmer_snowball *sp,
+                                           const char **error_r)
 {
        sp->stemmer = sb_stemmer_new(sp->lang->name, "UTF_8");
        if (sp->stemmer == NULL) {
                *error_r = t_strdup_printf(
                        "Creating a Snowball stemmer for language '%s' failed.",
                        sp->lang->name);
-               fts_filter_stemmer_snowball_destroy(&sp->filter);
+               lang_filter_stemmer_snowball_destroy(&sp->filter);
                return -1;
        }
        return 0;
 }
 
 static int
-fts_filter_stemmer_snowball_filter(struct fts_filter *filter,
-                                   const char **token, const char **error_r)
+lang_filter_stemmer_snowball_filter(struct lang_filter *filter,
+                                    const char **token, const char **error_r)
 {
-       struct fts_filter_stemmer_snowball *sp =
-               (struct fts_filter_stemmer_snowball *) filter;
+       struct lang_filter_stemmer_snowball *sp =
+               (struct lang_filter_stemmer_snowball *) filter;
        const sb_symbol *base;
 
        if (sp->stemmer == NULL) {
-               if (fts_filter_stemmer_snowball_create_stemmer(sp, error_r) < 0)
+               if (lang_filter_stemmer_snowball_create_stemmer(sp, error_r) < 0)
                        return -1;
        }
 
@@ -93,7 +93,7 @@ fts_filter_stemmer_snowball_filter(struct fts_filter *filter,
        else {
                /* If the stemmer returns an empty token, the return value
                 * should be 0 instead of 1 (otherwise it causes an assertion
-                * fault in fts_filter_filter() ).
+                * fault in lang_filter() ).
                 * However, removing tokens may bring the same kind of issues
                 * and inconsistencies that stopwords cause when used with
                 * multiple languages and negations.
@@ -106,36 +106,36 @@ fts_filter_stemmer_snowball_filter(struct fts_filter *filter,
 #else
 
 static int
-fts_filter_stemmer_snowball_create(const struct fts_language *lang ATTR_UNUSED,
-                                   const char *const *settings ATTR_UNUSED,
-                                   struct fts_filter **filter_r ATTR_UNUSED,
-                                   const char **error_r)
+lang_filter_stemmer_snowball_create(const struct language *lang ATTR_UNUSED,
+                                    const char *const *settings ATTR_UNUSED,
+                                    struct lang_filter **filter_r ATTR_UNUSED,
+                                    const char **error_r)
 {
        *error_r = "Snowball support not built in";
        return -1;
 }
 static void
-fts_filter_stemmer_snowball_destroy(struct fts_filter *stemmer ATTR_UNUSED)
+lang_filter_stemmer_snowball_destroy(struct lang_filter *stemmer ATTR_UNUSED)
 {
 }
 
 static int
-fts_filter_stemmer_snowball_filter(struct fts_filter *filter ATTR_UNUSED,
-                                  const char **token ATTR_UNUSED,
-                                  const char **error_r ATTR_UNUSED)
+lang_filter_stemmer_snowball_filter(struct lang_filter *filter ATTR_UNUSED,
+                                   const char **token ATTR_UNUSED,
+                                   const char **error_r ATTR_UNUSED)
 {
        return -1;
 }
 
 #endif
 
-static const struct fts_filter fts_filter_stemmer_snowball_real = {
+static const struct lang_filter lang_filter_stemmer_snowball_real = {
        .class_name = "snowball",
        .v = {
-               fts_filter_stemmer_snowball_create,
-               fts_filter_stemmer_snowball_filter,
-               fts_filter_stemmer_snowball_destroy
+               lang_filter_stemmer_snowball_create,
+               lang_filter_stemmer_snowball_filter,
+               lang_filter_stemmer_snowball_destroy
        }
 };
 
-const struct fts_filter *fts_filter_stemmer_snowball = &fts_filter_stemmer_snowball_real;
+const struct lang_filter *lang_filter_stemmer_snowball = &lang_filter_stemmer_snowball_real;
index 32067f38b2d55d5c3994b84531a9736754e8d617..8f7ce00631514e18c86d3e8b88a7133dead63296 100644 (file)
 #define STOPWORDS_CUTCHARS "|#\t "
 #define STOPWORDS_DISALLOWED_CHARS "/\\<>.,\":()\t\n\r"
 
-struct fts_filter_stopwords {
-       struct fts_filter filter;
-       struct fts_language *lang;
+struct lang_filter_stopwords {
+       struct lang_filter filter;
+       struct language *lang;
        pool_t pool;
        HASH_TABLE(const char *, const char *) stopwords;
        const char *stopwords_dir;
 };
 
-static int fts_filter_stopwords_read_list(struct fts_filter_stopwords *filter,
-                                         const char **error_r)
+static int lang_filter_stopwords_read_list(struct lang_filter_stopwords *filter,
+                                          const char **error_r)
 {
        struct istream *input;
        const char *line, *word, *path;
@@ -61,21 +61,21 @@ static int fts_filter_stopwords_read_list(struct fts_filter_stopwords *filter,
        return ret;
 }
 
-static void fts_filter_stopwords_destroy(struct fts_filter *filter)
+static void lang_filter_stopwords_destroy(struct lang_filter *filter)
 {
-       struct fts_filter_stopwords *sp = (struct fts_filter_stopwords *)filter;
+       struct lang_filter_stopwords *sp = (struct lang_filter_stopwords *)filter;
 
        hash_table_destroy(&sp->stopwords);
        pool_unref(&sp->pool);
 }
 
 static int
-fts_filter_stopwords_create(const struct fts_language *lang,
-                            const char *const *settings,
-                            struct fts_filter **filter_r,
-                            const char **error_r)
+lang_filter_stopwords_create(const struct language *lang,
+                             const char *const *settings,
+                             struct lang_filter **filter_r,
+                             const char **error_r)
 {
-       struct fts_filter_stopwords *sp;
+       struct lang_filter_stopwords *sp;
        pool_t pp;
        const char *dir = NULL;
        unsigned int i;
@@ -90,12 +90,12 @@ fts_filter_stopwords_create(const struct fts_language *lang,
                        return -1;
                }
        }
-       pp = pool_alloconly_create(MEMPOOL_GROWING"fts_filter_stopwords",
-                                  sizeof(struct fts_filter));
-       sp = p_new(pp, struct fts_filter_stopwords, 1);
-       sp->filter = *fts_filter_stopwords;
+       pp = pool_alloconly_create(MEMPOOL_GROWING"lang_filter_stopwords",
+                                  sizeof(struct lang_filter));
+       sp = p_new(pp, struct lang_filter_stopwords, 1);
+       sp->filter = *lang_filter_stopwords;
        sp->pool = pp;
-       sp->lang = p_malloc(sp->pool, sizeof(struct fts_language));
+       sp->lang = p_malloc(sp->pool, sizeof(struct language));
        sp->lang->name = p_strdup(sp->pool, lang->name);
        if (dir != NULL)
                sp->stopwords_dir = p_strdup(pp, dir);
@@ -106,26 +106,26 @@ fts_filter_stopwords_create(const struct fts_language *lang,
 }
 
 static int
-fts_filter_stopwords_filter(struct fts_filter *filter, const char **token,
-                           const char **error_r)
+lang_filter_stopwords_filter(struct lang_filter *filter, const char **token,
+                            const char **error_r)
 {
-       struct fts_filter_stopwords *sp =
-               (struct fts_filter_stopwords *) filter;
+       struct lang_filter_stopwords *sp =
+               (struct lang_filter_stopwords *) filter;
 
        if (!hash_table_is_created(sp->stopwords)) {
                hash_table_create(&sp->stopwords, sp->pool, 0, str_hash, strcmp);
-               if (fts_filter_stopwords_read_list(sp, error_r) < 0)
+               if (lang_filter_stopwords_read_list(sp, error_r) < 0)
                        return -1;
        }
        return hash_table_lookup(sp->stopwords, *token) == NULL ? 1 : 0;
 }
 
-const struct fts_filter fts_filter_stopwords_real = {
+const struct lang_filter lang_filter_stopwords_real = {
        .class_name = "stopwords",
        .v = {
-               fts_filter_stopwords_create,
-               fts_filter_stopwords_filter,
-               fts_filter_stopwords_destroy
+               lang_filter_stopwords_create,
+               lang_filter_stopwords_filter,
+               lang_filter_stopwords_destroy
        }
 };
-const struct fts_filter *fts_filter_stopwords = &fts_filter_stopwords_real;
+const struct lang_filter *lang_filter_stopwords = &lang_filter_stopwords_real;
index 1e1a972c3bbec9c1482428d6455cf06d8b1e8fe0..b4021ba627d7320202fb39b3ea35ec1df406bd01 100644 (file)
 #  include "lang-icu.h"
 #endif
 
-static ARRAY(const struct fts_filter *) fts_filter_classes;
+static ARRAY(const struct lang_filter *) lang_filter_classes;
 
-void fts_filters_init(void)
+void lang_filters_init(void)
 {
-       i_array_init(&fts_filter_classes, FTS_FILTER_CLASSES_NR);
-
-       fts_filter_register(fts_filter_stopwords);
-       fts_filter_register(fts_filter_stemmer_snowball);
-       fts_filter_register(fts_filter_normalizer_icu);
-       fts_filter_register(fts_filter_lowercase);
-       fts_filter_register(fts_filter_english_possessive);
-       fts_filter_register(fts_filter_contractions);
+       i_array_init(&lang_filter_classes, LANG_FILTER_CLASSES_NR);
+
+       lang_filter_register(lang_filter_stopwords);
+       lang_filter_register(lang_filter_stemmer_snowball);
+       lang_filter_register(lang_filter_normalizer_icu);
+       lang_filter_register(lang_filter_lowercase);
+       lang_filter_register(lang_filter_english_possessive);
+       lang_filter_register(lang_filter_contractions);
 }
 
-void fts_filters_deinit(void)
+void lang_filters_deinit(void)
 {
 #ifdef HAVE_LIBICU
-       fts_icu_deinit();
+       lang_icu_deinit();
 #endif
-       array_free(&fts_filter_classes);
+       array_free(&lang_filter_classes);
 }
 
-void fts_filter_register(const struct fts_filter *filter_class)
+void lang_filter_register(const struct lang_filter *filter_class)
 {
-       i_assert(fts_filter_find(filter_class->class_name) == NULL);
+       i_assert(lang_filter_find(filter_class->class_name) == NULL);
 
-       array_push_back(&fts_filter_classes, &filter_class);
+       array_push_back(&lang_filter_classes, &filter_class);
 }
 
-const struct fts_filter *fts_filter_find(const char *name)
+const struct lang_filter *lang_filter_find(const char *name)
 {
-       const struct fts_filter *filter;
+       const struct lang_filter *filter;
 
-       array_foreach_elem(&fts_filter_classes, filter) {
+       array_foreach_elem(&lang_filter_classes, filter) {
                if (strcmp(filter->class_name, name) == 0)
                        return filter;
        }
        return NULL;
 }
 
-int fts_filter_create(const struct fts_filter *filter_class,
-                      struct fts_filter *parent,
-                      const struct fts_language *lang,
-                      const char *const *settings,
-                      struct fts_filter **filter_r,
-                      const char **error_r)
+int lang_filter_create(const struct lang_filter *filter_class,
+                       struct lang_filter *parent,
+                       const struct language *lang,
+                       const char *const *settings,
+                       struct lang_filter **filter_r,
+                       const char **error_r)
 {
-       struct fts_filter *fp;
+       struct lang_filter *fp;
        const char *empty_settings = NULL;
 
        i_assert(settings == NULL || str_array_length(settings) % 2 == 0);
@@ -76,27 +76,27 @@ int fts_filter_create(const struct fts_filter *filter_class,
                        *error_r = t_strdup_printf("Unknown setting: %s", settings[0]);
                        return -1;
                }
-               fp = i_new(struct fts_filter, 1);
+               fp = i_new(struct lang_filter, 1);
                *fp = *filter_class;
        }
        fp->refcount = 1;
        fp->parent = parent;
        if (parent != NULL) {
-               fts_filter_ref(parent);
+               lang_filter_ref(parent);
        }
        *filter_r = fp;
        return 0;
 }
-void fts_filter_ref(struct fts_filter *fp)
+void lang_filter_ref(struct lang_filter *fp)
 {
        i_assert(fp->refcount > 0);
 
        fp->refcount++;
 }
 
-void fts_filter_unref(struct fts_filter **_fpp)
+void lang_filter_unref(struct lang_filter **_fpp)
 {
-       struct fts_filter *fp = *_fpp;
+       struct lang_filter *fp = *_fpp;
 
        i_assert(fp->refcount > 0);
        *_fpp = NULL;
@@ -105,7 +105,7 @@ void fts_filter_unref(struct fts_filter **_fpp)
                return;
 
        if (fp->parent != NULL)
-               fts_filter_unref(&fp->parent);
+               lang_filter_unref(&fp->parent);
        if (fp->v.destroy != NULL)
                fp->v.destroy(fp);
        else {
@@ -115,8 +115,8 @@ void fts_filter_unref(struct fts_filter **_fpp)
        }
 }
 
-int fts_filter_filter(struct fts_filter *filter, const char **token,
-                     const char **error_r)
+int lang_filter(struct lang_filter *filter, const char **token,
+               const char **error_r)
 {
        int ret = 0;
 
@@ -124,7 +124,7 @@ int fts_filter_filter(struct fts_filter *filter, const char **token,
 
        /* Recurse to parent. */
        if (filter->parent != NULL)
-               ret = fts_filter_filter(filter->parent, token, error_r);
+               ret = lang_filter(filter->parent, token, error_r);
 
        /* Parent returned token or no parent. */
        if (ret > 0 || filter->parent == NULL)
index 89060b79716f8d720722b63d33b0876d47cf4ec7..6c2532a24e7e2292350dc57eab5d25343e1995f7 100644 (file)
@@ -1,8 +1,8 @@
-#ifndef FTS_FILTER_H
-#define FTS_FILTER_H
+#ifndef LANG_FILTER_H
+#define LANG_FILTER_H
 
-struct fts_language;
-struct fts_filter;
+struct language;
+struct lang_filter;
 /*
  Settings are given in the form of a const char * const *settings =
  {"key, "value", "key2", "value2", NULL} array of string pairs.
@@ -13,12 +13,12 @@ struct fts_filter;
   Stopword files are looked up in "<path>"/stopwords_<lang>.txt
 
  */
-extern const struct fts_filter *fts_filter_stopwords;
+extern const struct lang_filter *lang_filter_stopwords;
 
 /*
  Settings: "lang", language of the stemmed language.
  */
-extern const struct fts_filter *fts_filter_stemmer_snowball;
+extern const struct lang_filter *lang_filter_stemmer_snowball;
 
 /*
  Settings: "id", description of the normalizing/translitterating rules
@@ -29,43 +29,43 @@ extern const struct fts_filter *fts_filter_stemmer_snowball;
  "maxlen", maximum length of tokens that ICU normalizer will output.
   Defaults to 250.
  */
-extern const struct fts_filter *fts_filter_normalizer_icu;
+extern const struct lang_filter *lang_filter_normalizer_icu;
 
 /* Lowercases the input. Supports UTF8, if libicu is available. */
-extern const struct fts_filter *fts_filter_lowercase;
+extern const struct lang_filter *lang_filter_lowercase;
 
 /* Removes <'s> suffix from words. */
-extern const struct fts_filter *fts_filter_english_possessive;
+extern const struct lang_filter *lang_filter_english_possessive;
 
 /* Removes prefixing contractions from words. */
-extern const struct fts_filter *fts_filter_contractions;
+extern const struct lang_filter *lang_filter_contractions;
 
 /* Register all built-in filters. */
-void fts_filters_init(void);
-void fts_filters_deinit(void);
+void lang_filters_init(void);
+void lang_filters_deinit(void);
 
 /* Register a new class explicitly. Built-in classes are automatically
    registered. */
-void fts_filter_register(const struct fts_filter *filter_class);
+void lang_filter_register(const struct lang_filter *filter_class);
 
 /*
  Filtering workflow, find --> create --> filter --> destroy.
  */
-const struct fts_filter *fts_filter_find(const char *name);
-int fts_filter_create(const struct fts_filter *filter_class,
-                      struct fts_filter *parent,
-                      const struct fts_language *lang,
-                      const char *const *settings,
-                      struct fts_filter **filter_r,
-                      const char **error_r);
-void fts_filter_ref(struct fts_filter *filter);
-void fts_filter_unref(struct fts_filter **filter);
+const struct lang_filter *lang_filter_find(const char *name);
+int lang_filter_create(const struct lang_filter *filter_class,
+                       struct lang_filter *parent,
+                       const struct language *lang,
+                       const char *const *settings,
+                       struct lang_filter **filter_r,
+                       const char **error_r);
+void lang_filter_ref(struct lang_filter *filter);
+void lang_filter_unref(struct lang_filter **filter);
 
 /* Returns 1 if token is returned in *token, 0 if token was filtered
    out (*token is also set to NULL) and -1 on error.
    Input is also given via *token.
 */
-int fts_filter_filter(struct fts_filter *filter, const char **token,
-                     const char **error_r);
+int lang_filter(struct lang_filter *filter, const char **token,
+               const char **error_r);
 
 #endif
index 3ba86fce5e0f82c9ee0e0a86681547729e8a2367..6bfd6cf870ce21fc9bca8a617278ae4297a40ff9 100644 (file)
@@ -13,7 +13,7 @@
 
 static struct UCaseMap *icu_csm = NULL;
 
-static struct UCaseMap *fts_icu_csm(void)
+static struct UCaseMap *lang_icu_csm(void)
 {
        UErrorCode err = U_ZERO_ERROR;
 
@@ -27,8 +27,8 @@ static struct UCaseMap *fts_icu_csm(void)
        return icu_csm;
 }
 
-void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
-                          const char *src_utf8)
+void lang_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
+                           const char *src_utf8)
 {
        buffer_t *dest_buf = dest_utf16->arr.buffer;
        UErrorCode err = U_ZERO_ERROR;
@@ -60,8 +60,8 @@ void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
        i_assert(retp == dest_data);
 }
 
-void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
-                          unsigned int src_len)
+void lang_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
+                           unsigned int src_len)
 {
        int32_t dest_len = 0;
        int32_t sub_num = 0;
@@ -91,9 +91,9 @@ void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
        i_assert(retp == dest_data);
 }
 
-int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
-                     unsigned int src_len, UTransliterator *transliterator,
-                     const char **error_r)
+int lang_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
+                      unsigned int src_len, UTransliterator *transliterator,
+                      const char **error_r)
 {
        buffer_t *dest_buf = dest_utf16->arr.buffer;
        UErrorCode err = U_ZERO_ERROR;
@@ -134,9 +134,9 @@ int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
        return 0;
 }
 
-void fts_icu_lcase(string_t *dest_utf8, const char *src_utf8)
+void lang_icu_lcase(string_t *dest_utf8, const char *src_utf8)
 {
-       struct UCaseMap *csm = fts_icu_csm();
+       struct UCaseMap *csm = lang_icu_csm();
        size_t avail_bytes, dest_pos = dest_utf8->used;
        char *dest_data;
        int dest_full_len;
@@ -164,7 +164,7 @@ void fts_icu_lcase(string_t *dest_utf8, const char *src_utf8)
        buffer_set_used_size(dest_utf8, dest_full_len);
 }
 
-void fts_icu_deinit(void)
+void lang_icu_deinit(void)
 {
        if (icu_csm != NULL) {
                ucasemap_close(icu_csm);
@@ -173,9 +173,9 @@ void fts_icu_deinit(void)
        u_cleanup();
 }
 
-int fts_icu_transliterator_create(const char *id,
-                                  UTransliterator **transliterator_r,
-                                  const char **error_r)
+int lang_icu_transliterator_create(const char *id,
+                                   UTransliterator **transliterator_r,
+                                   const char **error_r)
 {
        UErrorCode err = U_ZERO_ERROR;
        UParseError perr;
@@ -183,7 +183,7 @@ int fts_icu_transliterator_create(const char *id,
        i_zero(&perr);
 
        t_array_init(&id_utf16, strlen(id));
-       fts_icu_utf8_to_utf16(&id_utf16, id);
+       lang_icu_utf8_to_utf16(&id_utf16, id);
        *transliterator_r = utrans_openU(array_front(&id_utf16),
                                         array_count(&id_utf16),
                                         UTRANS_FORWARD, NULL, 0, &perr, &err);
index 5b0f3dcce6df3335ad73ca94cec50a4aae340c07..2168477af29eabdfdbf40c7ecc1885c158025268 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef HAVE_FTS_ICU_H
-#define HAVE_FTS_ICU_H
+#ifndef HAVE_LANG_ICU_H
+#define HAVE_LANG_ICU_H
 
 #include <unicode/ustring.h>
 #include <unicode/utrans.h>
@@ -7,22 +7,22 @@
 ARRAY_DEFINE_TYPE(icu_utf16, UChar);
 
 /* Convert UTF-8 input to UTF-16 output. */
-void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
-                          const char *src_utf8);
+void lang_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
+                           const char *src_utf8);
 /* Convert UTF-16 input to UTF-8 output. */
-void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
-                          unsigned int src_len);
+void lang_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
+                           unsigned int src_len);
 /* Run ICU translation for the string. Returns 0 on success, -1 on error. */
-int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
-                     unsigned int src_len, UTransliterator *transliterator,
-                     const char **error_r);
+int lang_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
+                      unsigned int src_len, UTransliterator *transliterator,
+                      const char **error_r);
 /* Lowercase the given UTF-8 string. */
-void fts_icu_lcase(string_t *dest_utf8, const char *src_utf8);
+void lang_icu_lcase(string_t *dest_utf8, const char *src_utf8);
 
 /* Free all the memory used by ICU functions. */
-void fts_icu_deinit(void);
+void lang_icu_deinit(void);
 
-int fts_icu_transliterator_create(const char *id,
-                                  UTransliterator **transliterator_r,
-                                  const char **error_r) ;
+int lang_icu_transliterator_create(const char *id,
+                                   UTransliterator **transliterator_r,
+                                   const char **error_r) ;
 #endif
index 5c4d6c7ad70071787b8dceeea86c3af676e2581b..c56d1a957833da3013225f681787fcba5b8e51cf 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef FTS_INDEXER_STATUS_H
-#define FTS_INDEXER_STATUS_H
+#ifndef LANG_INDEXER_STATUS_H
+#define LANG_INDEXER_STATUS_H
 
 enum indexer_state {
        INDEXER_STATE_PROCESSING =  0,
index 7dc5c2c60387f5ed7545871915f38b7231b02876..7a8b6c7b3d75ee294cd3ea2bb99bb796b1b36169 100644 (file)
@@ -6,16 +6,16 @@
 #include "lang-filter.h"
 #include "lang-library.h"
 
-void fts_library_init(void)
+void lang_library_init(void)
 {
-       fts_languages_init();
-       fts_tokenizers_init();
-       fts_filters_init();
+       languages_init();
+       lang_tokenizers_init();
+       lang_filters_init();
 }
 
-void fts_library_deinit(void)
+void lang_library_deinit(void)
 {
-       fts_languages_deinit();
-       fts_tokenizers_deinit();
-       fts_filters_deinit();
+       languages_deinit();
+       lang_tokenizers_deinit();
+       lang_filters_deinit();
 }
index 8799b10f28b630f63d5e745a01b5251dbc05ec3f..55290cf1650486fa3758d1db4e71ba667171f857 100644 (file)
@@ -1,7 +1,7 @@
-#ifndef FTS_LIBRARY_H
-#define FTS_LIBRARY_H
+#ifndef LANG_LIBRARY_H
+#define LANG_LIBRARY_H
 
-void fts_library_init(void);
-void fts_library_deinit(void);
+void lang_library_init(void);
+void lang_library_deinit(void);
 
 #endif
index 7ceb1d00a993fa9e3b3f29217f2fcdfd9f72dba3..57b0cc24941f4238e9bd8368cca4f7008a3478f1 100644 (file)
@@ -10,7 +10,7 @@
 #define IS_DTEXT(c) \
        (rfc822_atext_chars[(int)(unsigned char)(c)] == 2)
 
-#define FTS_DEFAULT_ADDRESS_MAX_LENGTH 254
+#define LANG_DEFAULT_ADDRESS_MAX_LENGTH 254
 
 enum email_address_parser_state {
        EMAIL_ADDRESS_PARSER_STATE_NONE = 0,
@@ -20,8 +20,8 @@ enum email_address_parser_state {
        EMAIL_ADDRESS_PARSER_STATE_SKIP,
 };
 
-struct email_address_fts_tokenizer {
-       struct fts_tokenizer tokenizer;
+struct email_address_lang_tokenizer {
+       struct lang_tokenizer tokenizer;
        enum email_address_parser_state state;
        string_t *last_word;
        string_t *parent_data; /* Copy of input data between tokens. */
@@ -30,13 +30,13 @@ struct email_address_fts_tokenizer {
 };
 
 static int
-fts_tokenizer_email_address_create(const char *const *settings,
-                                  struct fts_tokenizer **tokenizer_r,
-                                  const char **error_r)
+lang_tokenizer_email_address_create(const char *const *settings,
+                                   struct lang_tokenizer **tokenizer_r,
+                                   const char **error_r)
 {
-       struct email_address_fts_tokenizer *tok;
+       struct email_address_lang_tokenizer *tok;
        bool search = FALSE;
-       unsigned int max_length = FTS_DEFAULT_ADDRESS_MAX_LENGTH;
+       unsigned int max_length = LANG_DEFAULT_ADDRESS_MAX_LENGTH;
        unsigned int i;
 
        for (i = 0; settings[i] != NULL; i += 2) {
@@ -56,8 +56,8 @@ fts_tokenizer_email_address_create(const char *const *settings,
                }
        }
 
-       tok = i_new(struct email_address_fts_tokenizer, 1);
-       tok->tokenizer = *fts_tokenizer_email_address;
+       tok = i_new(struct email_address_lang_tokenizer, 1);
+       tok->tokenizer = *lang_tokenizer_email_address;
        tok->last_word = str_new(default_pool, 128);
        tok->parent_data = str_new(default_pool, 128);
        tok->max_length = max_length;
@@ -66,10 +66,10 @@ fts_tokenizer_email_address_create(const char *const *settings,
        return 0;
 }
 
-static void fts_tokenizer_email_address_destroy(struct fts_tokenizer *_tok)
+static void lang_tokenizer_email_address_destroy(struct lang_tokenizer *_tok)
 {
-       struct email_address_fts_tokenizer *tok =
-               (struct email_address_fts_tokenizer *)_tok;
+       struct email_address_lang_tokenizer *tok =
+               (struct email_address_lang_tokenizer *)_tok;
 
        str_free(&tok->last_word);
        str_free(&tok->parent_data);
@@ -77,8 +77,8 @@ static void fts_tokenizer_email_address_destroy(struct fts_tokenizer *_tok)
 }
 
 static bool
-fts_tokenizer_address_current_token(struct email_address_fts_tokenizer *tok,
-                                    const char **token_r)
+lang_tokenizer_address_current_token(struct email_address_lang_tokenizer *tok,
+                                     const char **token_r)
 {
        const unsigned char *data = tok->last_word->data;
        size_t len = tok->last_word->used;
@@ -91,20 +91,20 @@ fts_tokenizer_address_current_token(struct email_address_fts_tokenizer *tok,
                   IS_DTEXT() does not actually allow utf8 addresses
                   yet though. */
                len = tok->last_word->used;
-               fts_tokenizer_delete_trailing_partial_char(data, &len);
+               lang_tokenizer_delete_trailing_partial_char(data, &len);
                i_assert(len <= tok->max_length);
        }
 
        if (len > 0)
-               fts_tokenizer_delete_trailing_invalid_char(data, &len);
+               lang_tokenizer_delete_trailing_invalid_char(data, &len);
        *token_r = len == 0 ? "" :
                t_strndup(data, len);
        return len > 0;
 }
 
 static bool
-fts_tokenizer_address_parent_data(struct email_address_fts_tokenizer *tok,
-                                  const char **token_r)
+lang_tokenizer_address_parent_data(struct email_address_lang_tokenizer *tok,
+                                   const char **token_r)
 {
        if (tok->tokenizer.parent == NULL || str_len(tok->parent_data) == 0)
                return FALSE;
@@ -141,8 +141,8 @@ static size_t skip_nonlocal_part(const unsigned char *data, size_t size)
 }
 
 static bool
-fts_tokenizer_email_address_too_large(struct email_address_fts_tokenizer *tok,
-                                     size_t pos)
+lang_tokenizer_email_address_too_large(struct email_address_lang_tokenizer *tok,
+                                      size_t pos)
 {
        if (str_len(tok->last_word) + pos <= tok->max_length)
                return FALSE;
@@ -151,15 +151,15 @@ fts_tokenizer_email_address_too_large(struct email_address_fts_tokenizer *tok,
 
           Truncate the input that was added so far to the token, so all of it
           gets sent to the parent tokenizer in
-          fts_tokenizer_address_parent_data(). */
+          lang_tokenizer_address_parent_data(). */
        str_truncate(tok->last_word, 0);
        return TRUE;
 }
 
 static enum email_address_parser_state
-fts_tokenizer_email_address_parse_local(struct email_address_fts_tokenizer *tok,
-                                        const unsigned char *data, size_t size,
-                                        size_t *skip_r)
+lang_tokenizer_email_address_parse_local(struct email_address_lang_tokenizer *tok,
+                                         const unsigned char *data, size_t size,
+                                         size_t *skip_r)
 {
        size_t pos = 0;
        bool seen_at = FALSE;
@@ -175,7 +175,7 @@ fts_tokenizer_email_address_parse_local(struct email_address_fts_tokenizer *tok,
                        break;
        }
 
-       if (fts_tokenizer_email_address_too_large(tok, pos)) {
+       if (lang_tokenizer_email_address_too_large(tok, pos)) {
                *skip_r = 0;
                return EMAIL_ADDRESS_PARSER_STATE_SKIP;
        }
@@ -199,7 +199,7 @@ fts_tokenizer_email_address_parse_local(struct email_address_fts_tokenizer *tok,
        return EMAIL_ADDRESS_PARSER_STATE_NONE;
 }
 
-static bool domain_is_empty(struct email_address_fts_tokenizer *tok)
+static bool domain_is_empty(struct email_address_lang_tokenizer *tok)
 {
        const char *p, *str = str_c(tok->last_word);
 
@@ -209,16 +209,16 @@ static bool domain_is_empty(struct email_address_fts_tokenizer *tok)
 }
 
 static enum email_address_parser_state
-fts_tokenizer_email_address_parse_domain(struct email_address_fts_tokenizer *tok,
-                                         const unsigned char *data, size_t size,
-                                         size_t *skip_r)
+lang_tokenizer_email_address_parse_domain(struct email_address_lang_tokenizer *tok,
+                                          const unsigned char *data, size_t size,
+                                          size_t *skip_r)
 {
        size_t pos = 0;
 
        while (pos < size && (IS_DTEXT(data[pos]) || data[pos] == '.' || data[pos] == '-'))
                pos++;
 
-       if (fts_tokenizer_email_address_too_large(tok, pos)) {
+       if (lang_tokenizer_email_address_too_large(tok, pos)) {
                *skip_r = 0;
                return EMAIL_ADDRESS_PARSER_STATE_SKIP;
        }
@@ -242,8 +242,8 @@ fts_tokenizer_email_address_parse_domain(struct email_address_fts_tokenizer *tok
 }
 
 static bool
-fts_tokenizer_address_skip(const unsigned char *data, size_t size,
-                          size_t *skip_r)
+lang_tokenizer_address_skip(const unsigned char *data, size_t size,
+                           size_t *skip_r)
 {
        for (size_t pos = 0; pos < size; pos++) {
                if (!(IS_ATEXT(data[pos]) || data[pos] == '.' ||
@@ -258,17 +258,17 @@ fts_tokenizer_address_skip(const unsigned char *data, size_t size,
 
 /* Buffer raw data for parent. */
 static void
-fts_tokenizer_address_update_parent(struct email_address_fts_tokenizer *tok,
-                                    const unsigned char *data, size_t size)
+lang_tokenizer_address_update_parent(struct email_address_lang_tokenizer *tok,
+                                     const unsigned char *data, size_t size)
 {
        if (tok->tokenizer.parent != NULL)
                str_append_data(tok->parent_data, data, size);
 }
 
-static void fts_tokenizer_email_address_reset(struct fts_tokenizer *_tok)
+static void lang_tokenizer_email_address_reset(struct lang_tokenizer *_tok)
 {
-       struct email_address_fts_tokenizer *tok =
-               (struct email_address_fts_tokenizer *)_tok;
+       struct email_address_lang_tokenizer *tok =
+               (struct email_address_lang_tokenizer *)_tok;
 
        tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE;
        str_truncate(tok->last_word, 0);
@@ -276,13 +276,13 @@ static void fts_tokenizer_email_address_reset(struct fts_tokenizer *_tok)
 }
 
 static int
-fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
-                                 const unsigned char *data, size_t size,
-                                size_t *skip_r, const char **token_r,
-                                const char **error_r ATTR_UNUSED)
+lang_tokenizer_email_address_next(struct lang_tokenizer *_tok,
+                                  const unsigned char *data, size_t size,
+                                 size_t *skip_r, const char **token_r,
+                                 const char **error_r ATTR_UNUSED)
 {
-       struct email_address_fts_tokenizer *tok =
-               (struct email_address_fts_tokenizer *)_tok;
+       struct email_address_lang_tokenizer *tok =
+               (struct email_address_lang_tokenizer *)_tok;
        size_t pos = 0, local_skip;
        bool finished;
 
@@ -291,7 +291,7 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
 
        if (tok->state == EMAIL_ADDRESS_PARSER_STATE_COMPLETE) {
                *skip_r = pos;
-               if (fts_tokenizer_address_current_token(tok, token_r))
+               if (lang_tokenizer_address_current_token(tok, token_r))
                        return 1;
        }
 
@@ -305,11 +305,11 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
                        tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE;
                }
 
-               if (fts_tokenizer_address_parent_data(tok, token_r))
+               if (lang_tokenizer_address_parent_data(tok, token_r))
                        return 1;
 
                if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN) {
-                       if (fts_tokenizer_address_current_token(tok, token_r))
+                       if (lang_tokenizer_address_current_token(tok, token_r))
                                return 1;
                }
                tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE;
@@ -324,7 +324,7 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
                        /* no part of address found yet. remove possible
                           earlier data */
                        str_truncate(tok->last_word, 0);
-                        if (fts_tokenizer_address_parent_data(tok, token_r)) {
+                        if (lang_tokenizer_address_parent_data(tok, token_r)) {
                                *skip_r = pos;
                                return 1;
                         }
@@ -336,11 +336,11 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
                           the beginning of data to see if it contains a full
                           local-part@ */
                        tok->state =
-                               fts_tokenizer_email_address_parse_local(tok,
+                               lang_tokenizer_email_address_parse_local(tok,
                                                                        data + pos,
                                                                        size - pos,
                                                                        &local_skip);
-                       fts_tokenizer_address_update_parent(tok, data+pos,
+                       lang_tokenizer_address_update_parent(tok, data+pos,
                                                            local_skip);
                        pos += local_skip;
 
@@ -351,20 +351,20 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
                           to see if it contains a valid domain. */
 
                        tok->state =
-                               fts_tokenizer_email_address_parse_domain(tok,
+                               lang_tokenizer_email_address_parse_domain(tok,
                                                                        data + pos,
                                                                        size - pos,
                                                                        &local_skip);
-                       fts_tokenizer_address_update_parent(tok, data+pos,
+                       lang_tokenizer_address_update_parent(tok, data+pos,
                                                            local_skip);
                        pos += local_skip;
 
                        break;
                case EMAIL_ADDRESS_PARSER_STATE_COMPLETE:
                        *skip_r = pos;
-                       if (fts_tokenizer_address_parent_data(tok, token_r))
+                       if (lang_tokenizer_address_parent_data(tok, token_r))
                                return 1;
-                       if (fts_tokenizer_address_current_token(tok, token_r))
+                       if (lang_tokenizer_address_current_token(tok, token_r))
                                return 1;
                        break;
                case EMAIL_ADDRESS_PARSER_STATE_SKIP:
@@ -373,18 +373,18 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
                           simply skipping over it, but the input is being
                           passed to the parent tokenizer. */
                        *skip_r = pos;
-                       if (fts_tokenizer_address_parent_data(tok, token_r))
+                       if (lang_tokenizer_address_parent_data(tok, token_r))
                                return 1;
 
-                       finished = fts_tokenizer_address_skip(data + pos,
+                       finished = lang_tokenizer_address_skip(data + pos,
                                                              size - pos,
                                                              &local_skip);
-                       fts_tokenizer_address_update_parent(tok, data+pos,
+                       lang_tokenizer_address_update_parent(tok, data+pos,
                                                            local_skip);
                        pos += local_skip;
                        if (finished) {
                                *skip_r = pos;
-                               if (fts_tokenizer_address_parent_data(tok, token_r)) {
+                               if (lang_tokenizer_address_parent_data(tok, token_r)) {
                                        tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE;
                                        return 1;
                                }
@@ -400,17 +400,17 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
        return 0;
 }
 
-static const struct fts_tokenizer_vfuncs email_address_tokenizer_vfuncs = {
-       fts_tokenizer_email_address_create,
-       fts_tokenizer_email_address_destroy,
-       fts_tokenizer_email_address_reset,
-       fts_tokenizer_email_address_next
+static const struct lang_tokenizer_vfuncs email_address_tokenizer_vfuncs = {
+       lang_tokenizer_email_address_create,
+       lang_tokenizer_email_address_destroy,
+       lang_tokenizer_email_address_reset,
+       lang_tokenizer_email_address_next
 };
 
-static const struct fts_tokenizer fts_tokenizer_email_address_real = {
+static const struct lang_tokenizer lang_tokenizer_email_address_real = {
        .name = "email-address",
        .v = &email_address_tokenizer_vfuncs,
        .stream_to_parents = TRUE,
 };
-const struct fts_tokenizer *fts_tokenizer_email_address =
-       &fts_tokenizer_email_address_real;
+const struct lang_tokenizer *lang_tokenizer_email_address =
+       &lang_tokenizer_email_address_real;
index 9007b588b7ce556c71f385d3f53e5e1a4d4fa998..4c9863354b02825a6e47bff9675f3c745cbcae74 100644 (file)
@@ -4,8 +4,8 @@
 #include "unichar.h"
 #include "lang-tokenizer-common.h"
 void
-fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
-                                          size_t *len)
+lang_tokenizer_delete_trailing_partial_char(const unsigned char *data,
+                                           size_t *len)
 {
        size_t pos;
        unsigned int char_bytes;
@@ -22,8 +22,8 @@ fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
                *len = pos;
        }
 }
-void fts_tokenizer_delete_trailing_invalid_char(const unsigned char *data,
-                  size_t *len)
+void lang_tokenizer_delete_trailing_invalid_char(const unsigned char *data,
+                                                size_t *len)
 {
        size_t pos = *len;
 
index b90e54353e98cfda45cc86072c5d338b98b00008..abb72528d5f0b772ecb40bca6b75b0b45dac6e5b 100644 (file)
@@ -1,9 +1,9 @@
-#ifndef FTS_TOKENIZER_COMMON_H
-#define FTS_TOKENIZER_COMMON_H
+#ifndef LANG_TOKENIZER_COMMON_H
+#define LANG_TOKENIZER_COMMON_H
 void
-fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
+lang_tokenizer_delete_trailing_partial_char(const unsigned char *data,
                                            size_t *len);
 void
-fts_tokenizer_delete_trailing_invalid_char(const unsigned char *data,
+lang_tokenizer_delete_trailing_invalid_char(const unsigned char *data,
                   size_t *len);
 #endif
index 87f4d48fa16dc5016f38c7c5e786906f90f68968..02f937afd44c0f1cc319fa50d331c704da2e0761 100644 (file)
@@ -1,8 +1,8 @@
-#ifndef FTS_TOKENIZER_GENERIC_PRIVATE_H
-#define FTS_TOKENIZER_GENERIC_PRIVATE_H
+#ifndef LANG_TOKENIZER_GENERIC_PRIVATE_H
+#define LANG_TOKENIZER_GENERIC_PRIVATE_H
 
-extern const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_simple;
-extern const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29;
+extern const struct lang_tokenizer_vfuncs generic_tokenizer_vfuncs_simple;
+extern const struct lang_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29;
 
 /* Word boundary letter type */
 enum letter_type {
@@ -38,8 +38,8 @@ enum boundary_algorithm {
 #define ALGORITHM_TR29_NAME "tr29"
 };
 
-struct generic_fts_tokenizer {
-       struct fts_tokenizer tokenizer;
+struct generic_lang_tokenizer {
+       struct lang_tokenizer tokenizer;
        unsigned int max_length;
        bool prefixsplat; /* for search strings, accept a trailing '*' for explicit prefix */
        bool wb5a; /* TR29 rule for prefix separation
index b3af8d586ccb70949bd182cb5fbddf0af9348a08..91b3f8283d5482a6fda62f35198fed30575a8687 100644 (file)
 #include "word-break-data.c"
 
 /* see comments below between is_base64() and skip_base64() */
-#define FTS_SKIP_BASE64_MIN_SEQUENCES 1
-#define FTS_SKIP_BASE64_MIN_CHARS 50
+#define LANG_SKIP_BASE64_MIN_SEQUENCES 1
+#define LANG_SKIP_BASE64_MIN_CHARS 50
 
-#define FTS_DEFAULT_TOKEN_MAX_LENGTH 30
-#define FTS_WB5A_PREFIX_MAX_LENGTH 3 /* Including apostrophe */
+#define LANG_DEFAULT_TOKEN_MAX_LENGTH 30
+#define LANG_WB5A_PREFIX_MAX_LENGTH 3 /* Including apostrophe */
 
-static unsigned char fts_ascii_word_breaks[128] = {
+static unsigned char lang_ascii_word_breaks[128] = {
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0-15 */
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 16-31 */
 
@@ -33,12 +33,12 @@ static unsigned char fts_ascii_word_breaks[128] = {
 };
 
 static int
-fts_tokenizer_generic_create(const char *const *settings,
-                            struct fts_tokenizer **tokenizer_r,
-                            const char **error_r)
+lang_tokenizer_generic_create(const char *const *settings,
+                             struct lang_tokenizer **tokenizer_r,
+                             const char **error_r)
 {
-       struct generic_fts_tokenizer *tok;
-       unsigned int max_length = FTS_DEFAULT_TOKEN_MAX_LENGTH;
+       struct generic_lang_tokenizer *tok;
+       unsigned int max_length = LANG_DEFAULT_TOKEN_MAX_LENGTH;
        enum boundary_algorithm algo = BOUNDARY_ALGORITHM_SIMPLE;
        bool wb5a = FALSE;
        bool search = FALSE;
@@ -91,7 +91,7 @@ fts_tokenizer_generic_create(const char *const *settings,
                return -1;
        }
 
-       tok = i_new(struct generic_fts_tokenizer, 1);
+       tok = i_new(struct generic_lang_tokenizer, 1);
        if (algo == BOUNDARY_ALGORITHM_TR29)
                tok->tokenizer.v = &generic_tokenizer_vfuncs_tr29;
        else
@@ -107,24 +107,24 @@ fts_tokenizer_generic_create(const char *const *settings,
 }
 
 static void
-fts_tokenizer_generic_destroy(struct fts_tokenizer *_tok)
+lang_tokenizer_generic_destroy(struct lang_tokenizer *_tok)
 {
-       struct generic_fts_tokenizer *tok =
-               container_of(_tok, struct generic_fts_tokenizer, tokenizer);
+       struct generic_lang_tokenizer *tok =
+               container_of(_tok, struct generic_lang_tokenizer, tokenizer);
 
        buffer_free(&tok->token);
        i_free(tok);
 }
 
 static inline void
-shift_prev_type(struct generic_fts_tokenizer *tok, enum letter_type lt)
+shift_prev_type(struct generic_lang_tokenizer *tok, enum letter_type lt)
 {
        tok->prev_prev_type = tok->prev_type;
        tok->prev_type = lt;
 }
 
 static inline void
-add_prev_type(struct generic_fts_tokenizer *tok, enum letter_type lt)
+add_prev_type(struct generic_lang_tokenizer *tok, enum letter_type lt)
 {
        if(tok->prev_type != LETTER_TYPE_NONE)
                tok->prev_prev_type = tok->prev_type;
@@ -132,7 +132,7 @@ add_prev_type(struct generic_fts_tokenizer *tok, enum letter_type lt)
 }
 
 static inline void
-add_letter(struct generic_fts_tokenizer *tok, unichar_t c)
+add_letter(struct generic_lang_tokenizer *tok, unichar_t c)
 {
        if(tok->letter != 0)
                tok->prev_letter = tok->letter;
@@ -140,8 +140,8 @@ add_letter(struct generic_fts_tokenizer *tok, unichar_t c)
 }
 
 static bool
-fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok,
-                                           const char **token_r)
+lang_tokenizer_generic_simple_current_token(struct generic_lang_tokenizer *tok,
+                                            const char **token_r)
 {
        const unsigned char *data = tok->token->data;
        size_t len = tok->token->used;
@@ -162,7 +162,7 @@ fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok,
                        i_assert(len > 0 && data[len-1] != '*');
                }
        } else {
-               fts_tokenizer_delete_trailing_partial_char(data, &len);
+               lang_tokenizer_delete_trailing_partial_char(data, &len);
        }
        i_assert(len <= tok->max_length);
 
@@ -179,7 +179,7 @@ static bool uint32_find(const uint32_t *data, unsigned int count,
        BINARY_NUMBER_SEARCH(data, count, value, idx_r);
 }
 
-static bool fts_uni_word_break(unichar_t c)
+static bool lang_uni_word_break(unichar_t c)
 {
        unsigned int idx;
 
@@ -202,38 +202,38 @@ static bool fts_uni_word_break(unichar_t c)
        return FALSE;
 }
 
-enum fts_break_type {
-       FTS_FROM_STOP = 0,
-       FTS_FROM_WORD = 2,
-       FTS_TO_STOP= 0,
-       FTS_TO_WORD = 1,
-#define FROM_TO(f,t) FTS_##f##_TO_##t = FTS_FROM_##f | FTS_TO_##t
+enum lang_break_type {
+       LANG_FROM_STOP = 0,
+       LANG_FROM_WORD = 2,
+       LANG_TO_STOP= 0,
+       LANG_TO_WORD = 1,
+#define FROM_TO(f,t) LANG_##f##_TO_##t = LANG_FROM_##f | LANG_TO_##t
        FROM_TO(STOP,STOP),
        FROM_TO(STOP,WORD),
        FROM_TO(WORD,STOP),
        FROM_TO(WORD,WORD),
 };
-static inline enum fts_break_type
-fts_simple_is_word_break(const struct generic_fts_tokenizer *tok,
+static inline enum lang_break_type
+lang_simple_is_word_break(const struct generic_lang_tokenizer *tok,
                         unichar_t c, bool apostrophe)
 {
        /* Until we know better, a letter followed by an apostrophe is continuation of the word.
           However, if we see non-word letters afterwards, we'll reverse that decision. */
        if (apostrophe)
-               return tok->prev_type == LETTER_TYPE_ALETTER ? FTS_WORD_TO_WORD : FTS_STOP_TO_STOP;
+               return tok->prev_type == LETTER_TYPE_ALETTER ? LANG_WORD_TO_WORD : LANG_STOP_TO_STOP;
 
-       bool new_breakiness = (c < 0x80) ? (fts_ascii_word_breaks[c] != 0) : fts_uni_word_break(c);
+       bool new_breakiness = (c < 0x80) ? (lang_ascii_word_breaks[c] != 0) : lang_uni_word_break(c);
 
-       return (new_breakiness ? FTS_TO_STOP : FTS_TO_WORD)
+       return (new_breakiness ? LANG_TO_STOP : LANG_TO_WORD)
                + (tok->prev_type == LETTER_TYPE_ALETTER ||
                   tok->prev_type == LETTER_TYPE_SINGLE_QUOTE
-                  ? FTS_FROM_WORD : FTS_FROM_STOP);
+                  ? LANG_FROM_WORD : LANG_FROM_STOP);
 }
 
-static void fts_tokenizer_generic_reset(struct fts_tokenizer *_tok)
+static void lang_tokenizer_generic_reset(struct lang_tokenizer *_tok)
 {
-       struct generic_fts_tokenizer *tok =
-               container_of(_tok, struct generic_fts_tokenizer, tokenizer);
+       struct generic_lang_tokenizer *tok =
+               container_of(_tok, struct generic_lang_tokenizer, tokenizer);
 
        tok->prev_type = LETTER_TYPE_NONE;
        tok->prev_prev_type = LETTER_TYPE_NONE;
@@ -241,7 +241,7 @@ static void fts_tokenizer_generic_reset(struct fts_tokenizer *_tok)
        buffer_set_used_size(tok->token, 0);
 }
 
-static void tok_append_truncated(struct generic_fts_tokenizer *tok,
+static void tok_append_truncated(struct generic_lang_tokenizer *tok,
                                 const unsigned char *data, size_t size)
 {
        buffer_append(tok->token, data,
@@ -285,7 +285,7 @@ static unsigned char allowed_base64_leaders[] = {
    criteria on its own to be discarded. What we pay is we will fail to reject
    small base64 chunks segments instead of rejecting the whole sequence.
 
-   When skip_base64() is invoked in fts_tokenizer_generic_XX_next(), we know
+   When skip_base64() is invoked in lang_tokenizer_generic_XX_next(), we know
    that we are not halfway the collection of a token.
 
    As (after the previous token) the buffer will contain non-token characters
@@ -328,7 +328,7 @@ skip_base64(const unsigned char *data, size_t size)
 
                const unsigned char *past;
                for (past = first; past < end && is_base64(*past); past++);
-               if (past - first < FTS_SKIP_BASE64_MIN_CHARS)
+               if (past - first < LANG_SKIP_BASE64_MIN_CHARS)
                        break;
                if (past < end && memchr(allowed_base64_trailers, *past,
                                         N_ELEMENTS(allowed_base64_trailers)) == NULL)
@@ -336,22 +336,22 @@ skip_base64(const unsigned char *data, size_t size)
                start = past;
                matches++;
        }
-       return matches < FTS_SKIP_BASE64_MIN_SEQUENCES ? 0 : start - data;
+       return matches < LANG_SKIP_BASE64_MIN_SEQUENCES ? 0 : start - data;
 }
 
 static int
-fts_tokenizer_generic_simple_next(struct fts_tokenizer *_tok,
-                                  const unsigned char *data, size_t size,
-                                 size_t *skip_r, const char **token_r,
-                                 const char **error_r ATTR_UNUSED)
+lang_tokenizer_generic_simple_next(struct lang_tokenizer *_tok,
+                                   const unsigned char *data, size_t size,
+                                  size_t *skip_r, const char **token_r,
+                                  const char **error_r ATTR_UNUSED)
 {
-       struct generic_fts_tokenizer *tok =
-               container_of(_tok, struct generic_fts_tokenizer, tokenizer);
+       struct generic_lang_tokenizer *tok =
+               container_of(_tok, struct generic_lang_tokenizer, tokenizer);
        size_t i, start;
        int char_size;
        unichar_t c;
        bool apostrophe;
-       enum fts_break_type break_type;
+       enum lang_break_type break_type;
 
        start = tok->token->used > 0 ? 0 : skip_base64(data, size);
        for (i = start; i < size; i += char_size) {
@@ -363,18 +363,18 @@ fts_tokenizer_generic_simple_next(struct fts_tokenizer *_tok,
                    (tok->prev_type == LETTER_TYPE_ALETTER)) {
                        /* this might be a prefix-mathing query */
                        shift_prev_type(tok, LETTER_TYPE_PREFIXSPLAT);
-               } else if ((break_type = fts_simple_is_word_break(tok, c, apostrophe))
-                          != FTS_WORD_TO_WORD) {
+               } else if ((break_type = lang_simple_is_word_break(tok, c, apostrophe))
+                          != LANG_WORD_TO_WORD) {
                        tok_append_truncated(tok, data + start, i - start);
-                       shift_prev_type(tok, (break_type & FTS_TO_WORD) != 0
+                       shift_prev_type(tok, (break_type & LANG_TO_WORD) != 0
                                        ? LETTER_TYPE_ALETTER : LETTER_TYPE_NONE);
-                       if (fts_tokenizer_generic_simple_current_token(tok, token_r)) {
+                       if (lang_tokenizer_generic_simple_current_token(tok, token_r)) {
                                *skip_r = i;
-                               if (break_type != FTS_STOP_TO_WORD) /* therefore *_TO_STOP */
+                               if (break_type != LANG_STOP_TO_WORD) /* therefore *_TO_STOP */
                                        *skip_r += char_size;
                                return 1;
                        }
-                       if ((break_type & FTS_TO_WORD) == 0)
+                       if ((break_type & LANG_TO_WORD) == 0)
                                start = i + char_size;
                } else if (apostrophe) {
                        /* all apostrophes require special handling */
@@ -400,7 +400,7 @@ fts_tokenizer_generic_simple_next(struct fts_tokenizer *_tok,
        /* return the last token */
        if (size == 0) {
                shift_prev_type(tok, LETTER_TYPE_NONE);
-               if (fts_tokenizer_generic_simple_current_token(tok, token_r))
+               if (lang_tokenizer_generic_simple_current_token(tok, token_r))
                        return 1;
        }
 
@@ -458,25 +458,25 @@ static enum letter_type letter_type(unichar_t c)
        return LETTER_TYPE_OTHER;
 }
 
-static bool letter_panic(struct generic_fts_tokenizer *tok ATTR_UNUSED)
+static bool letter_panic(struct generic_lang_tokenizer *tok ATTR_UNUSED)
 {
        i_panic("Letter type should not be used.");
 }
 
 /* WB3, WB3a and WB3b, but really different since we try to eat
    whitespace between words. */
-static bool letter_cr_lf_newline(struct generic_fts_tokenizer *tok ATTR_UNUSED)
+static bool letter_cr_lf_newline(struct generic_lang_tokenizer *tok ATTR_UNUSED)
 {
        return TRUE;
 }
 
-static bool letter_extend_format(struct generic_fts_tokenizer *tok ATTR_UNUSED)
+static bool letter_extend_format(struct generic_lang_tokenizer *tok ATTR_UNUSED)
 {
        /* WB4 */
        return FALSE;
 }
 
-static bool letter_regional_indicator(struct generic_fts_tokenizer *tok)
+static bool letter_regional_indicator(struct generic_lang_tokenizer *tok)
 {
        /* WB13c */
        if (tok->prev_type == LETTER_TYPE_REGIONAL_INDICATOR)
@@ -485,7 +485,7 @@ static bool letter_regional_indicator(struct generic_fts_tokenizer *tok)
        return TRUE; /* Any / Any */
 }
 
-static bool letter_katakana(struct generic_fts_tokenizer *tok)
+static bool letter_katakana(struct generic_lang_tokenizer *tok)
 {
        /* WB13 */
        if (tok->prev_type == LETTER_TYPE_KATAKANA)
@@ -498,7 +498,7 @@ static bool letter_katakana(struct generic_fts_tokenizer *tok)
        return TRUE; /* Any / Any */
 }
 
-static bool letter_hebrew(struct generic_fts_tokenizer *tok)
+static bool letter_hebrew(struct generic_lang_tokenizer *tok)
 {
        /* WB5 */
        if (tok->prev_type == LETTER_TYPE_HEBREW_LETTER)
@@ -523,11 +523,11 @@ static bool letter_hebrew(struct generic_fts_tokenizer *tok)
        return TRUE; /* Any / Any */
 }
 
-static bool letter_aletter(struct generic_fts_tokenizer *tok)
+static bool letter_aletter(struct generic_lang_tokenizer *tok)
 {
 
        /* WB5a */
-       if (tok->wb5a && tok->token->used <= FTS_WB5A_PREFIX_MAX_LENGTH)
+       if (tok->wb5a && tok->token->used <= LANG_WB5A_PREFIX_MAX_LENGTH)
                if (IS_WB5A_APOSTROPHE(tok->prev_letter) && IS_VOWEL(tok->letter)) {
                        tok->seen_wb5a = TRUE;
                        return TRUE;
@@ -556,7 +556,7 @@ static bool letter_aletter(struct generic_fts_tokenizer *tok)
        return TRUE; /* Any / Any */
 }
 
-static bool letter_single_quote(struct generic_fts_tokenizer *tok)
+static bool letter_single_quote(struct generic_lang_tokenizer *tok)
 {
        /* WB6 */
        if (tok->prev_type == LETTER_TYPE_ALETTER ||
@@ -570,7 +570,7 @@ static bool letter_single_quote(struct generic_fts_tokenizer *tok)
        return TRUE; /* Any / Any */
 }
 
-static bool letter_double_quote(struct generic_fts_tokenizer *tok)
+static bool letter_double_quote(struct generic_lang_tokenizer *tok)
 {
 
        if (tok->prev_type == LETTER_TYPE_DOUBLE_QUOTE)
@@ -579,14 +579,14 @@ static bool letter_double_quote(struct generic_fts_tokenizer *tok)
        return TRUE; /* Any / Any */
 }
 
-static bool letter_midnumlet(struct generic_fts_tokenizer *tok ATTR_UNUSED)
+static bool letter_midnumlet(struct generic_lang_tokenizer *tok ATTR_UNUSED)
 {
 
        /* Break at MidNumLet, non-conformant with WB6/WB7 */
        return TRUE;
 }
 
-static bool letter_midletter(struct generic_fts_tokenizer *tok)
+static bool letter_midletter(struct generic_lang_tokenizer *tok)
 {
        /* WB6 */
        if (tok->prev_type == LETTER_TYPE_ALETTER ||
@@ -596,7 +596,7 @@ static bool letter_midletter(struct generic_fts_tokenizer *tok)
        return TRUE; /* Any / Any */
 }
 
-static bool letter_midnum(struct generic_fts_tokenizer *tok)
+static bool letter_midnum(struct generic_lang_tokenizer *tok)
 {
        /* WB12 */
        if (tok->prev_type == LETTER_TYPE_NUMERIC)
@@ -605,7 +605,7 @@ static bool letter_midnum(struct generic_fts_tokenizer *tok)
        return TRUE; /* Any / Any */
 }
 
-static bool letter_numeric(struct generic_fts_tokenizer *tok)
+static bool letter_numeric(struct generic_lang_tokenizer *tok)
 {
        /* WB8 */
        if (tok->prev_type == LETTER_TYPE_NUMERIC)
@@ -630,7 +630,7 @@ static bool letter_numeric(struct generic_fts_tokenizer *tok)
        return TRUE; /* Any / Any */
 }
 
-static bool letter_extendnumlet(struct generic_fts_tokenizer *tok)
+static bool letter_extendnumlet(struct generic_lang_tokenizer *tok)
 {
 
        /* WB13a */
@@ -644,7 +644,7 @@ static bool letter_extendnumlet(struct generic_fts_tokenizer *tok)
        return TRUE; /* Any / Any */
 }
 
-static bool letter_apostrophe(struct generic_fts_tokenizer *tok)
+static bool letter_apostrophe(struct generic_lang_tokenizer *tok)
 {
 
        if (tok->prev_type == LETTER_TYPE_ALETTER ||
@@ -653,12 +653,12 @@ static bool letter_apostrophe(struct generic_fts_tokenizer *tok)
 
        return TRUE; /* Any / Any */
 }
-static bool letter_prefixsplat(struct generic_fts_tokenizer *tok ATTR_UNUSED)
+static bool letter_prefixsplat(struct generic_lang_tokenizer *tok ATTR_UNUSED)
 {
        /* Dovecot explicit-prefix specific */
        return TRUE; /* Always induces a word break - but with special handling */
 }
-static bool letter_other(struct generic_fts_tokenizer *tok ATTR_UNUSED)
+static bool letter_other(struct generic_lang_tokenizer *tok ATTR_UNUSED)
 {
        return TRUE; /* Any / Any */
 }
@@ -684,7 +684,7 @@ static bool is_nontoken(enum letter_type lt)
    very kludgy and should be coded into the rules themselves
    somehow.
 */
-static bool is_one_past_end(struct generic_fts_tokenizer *tok)
+static bool is_one_past_end(struct generic_lang_tokenizer *tok)
 {
        /* WB6/7 false positive detected at one past end. */
        if (tok->prev_type == LETTER_TYPE_MIDLETTER ||
@@ -704,8 +704,8 @@ static bool is_one_past_end(struct generic_fts_tokenizer *tok)
 }
 
 static void
-fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok,
-                                         const char **token_r)
+lang_tokenizer_generic_tr29_current_token(struct generic_lang_tokenizer *tok,
+                                          const char **token_r)
 {
        const unsigned char *data = tok->token->data;
        size_t len = tok->token->used;
@@ -718,7 +718,7 @@ fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok,
                i_assert(len > 0);
                len--;
        } else if (tok->untruncated_length > tok->max_length) {
-               fts_tokenizer_delete_trailing_partial_char(data, &len);
+               lang_tokenizer_delete_trailing_partial_char(data, &len);
        }
        /* we're skipping all non-token chars at the beginning of the word,
           so by this point we must have something here - even if we just
@@ -733,7 +733,7 @@ fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok,
        tok->untruncated_length = 0;
 }
 
-static void wb5a_reinsert(struct generic_fts_tokenizer *tok)
+static void wb5a_reinsert(struct generic_lang_tokenizer *tok)
 {
        string_t *utf8_str = t_str_new(6);
 
@@ -746,7 +746,7 @@ static void wb5a_reinsert(struct generic_fts_tokenizer *tok)
 }
 
 struct letter_fn {
-       bool (*fn)(struct generic_fts_tokenizer *tok);
+       bool (*fn)(struct generic_lang_tokenizer *tok);
 };
 static struct letter_fn letter_fns[] = {
        {letter_panic}, {letter_cr_lf_newline}, {letter_cr_lf_newline},
@@ -762,7 +762,7 @@ static struct letter_fn letter_fns[] = {
 
 /*
   Find word boundaries in input text. Based on Unicode standard annex
-  #29, but tailored for FTS purposes.
+  #29, but tailored for language purposes.
   http://www.unicode.org/reports/tr29/
 
   Note: The text of tr29 is a living standard, so it keeps
@@ -771,7 +771,7 @@ static struct letter_fn letter_fns[] = {
 
   Adaptions:
   * Added optional WB5a as a configurable option. The cut of prefix is
-   max FTS_WB5A_PREFIX chars.
+   max LANG_WB5A_PREFIX chars.
   * No word boundary at Start-Of-Text or End-of-Text (Wb1 and WB2).
   * Break just once, not before and after.
   * Break at MidNumLet, except apostrophes (diverging from WB6/WB7).
@@ -779,7 +779,7 @@ static struct letter_fn letter_fns[] = {
   to assist in finding individual words.
 */
 static bool
-uni_found_word_boundary(struct generic_fts_tokenizer *tok, enum letter_type lt)
+uni_found_word_boundary(struct generic_lang_tokenizer *tok, enum letter_type lt)
 {
        /* No rule knows what to do with just one char, except the linebreaks
           we eat away (above) anyway. */
@@ -797,13 +797,13 @@ uni_found_word_boundary(struct generic_fts_tokenizer *tok, enum letter_type lt)
 }
 
 static int
-fts_tokenizer_generic_tr29_next(struct fts_tokenizer *_tok,
-                               const unsigned char *data, size_t size,
-                               size_t *skip_r, const char **token_r,
-                               const char **error_r ATTR_UNUSED)
+lang_tokenizer_generic_tr29_next(struct lang_tokenizer *_tok,
+                                const unsigned char *data, size_t size,
+                                size_t *skip_r, const char **token_r,
+                                const char **error_r ATTR_UNUSED)
 {
-       struct generic_fts_tokenizer *tok =
-               container_of(_tok, struct generic_fts_tokenizer, tokenizer);
+       struct generic_lang_tokenizer *tok =
+               container_of(_tok, struct generic_lang_tokenizer, tokenizer);
        unichar_t c;
        size_t i, char_start_i, start_pos;
        enum letter_type lt;
@@ -830,7 +830,7 @@ fts_tokenizer_generic_tr29_next(struct fts_tokenizer *_tok,
                        continue;
                }
 
-               if (tok->wb5a &&  tok->token->used <= FTS_WB5A_PREFIX_MAX_LENGTH)
+               if (tok->wb5a &&  tok->token->used <= LANG_WB5A_PREFIX_MAX_LENGTH)
                        add_letter(tok, c);
 
                if (uni_found_word_boundary(tok, lt)) {
@@ -838,11 +838,11 @@ fts_tokenizer_generic_tr29_next(struct fts_tokenizer *_tok,
                        tok_append_truncated(tok, data + start_pos,
                                             char_start_i - start_pos);
                        if (lt == LETTER_TYPE_PREFIXSPLAT && tok->prefixsplat) {
-                               const unsigned char prefix_char = FTS_PREFIX_SPLAT_CHAR;
+                               const unsigned char prefix_char = LANG_PREFIX_SPLAT_CHAR;
                                tok_append_truncated(tok, &prefix_char, 1);
                        }
                        *skip_r = i;
-                       fts_tokenizer_generic_tr29_current_token(tok, token_r);
+                       lang_tokenizer_generic_tr29_current_token(tok, token_r);
                        return 1;
                } else if (lt == LETTER_TYPE_APOSTROPHE ||
                           lt == LETTER_TYPE_SINGLE_QUOTE) {
@@ -862,45 +862,45 @@ fts_tokenizer_generic_tr29_next(struct fts_tokenizer *_tok,
        if (size == 0 && tok->token->used > 0) {
                /* return the last token */
                *skip_r = 0;
-               fts_tokenizer_generic_tr29_current_token(tok, token_r);
+               lang_tokenizer_generic_tr29_current_token(tok, token_r);
                return 1;
        }
        return 0;
 }
 
 static int
-fts_tokenizer_generic_next(struct fts_tokenizer *_tok ATTR_UNUSED,
-                          const unsigned char *data ATTR_UNUSED,
-                           size_t size ATTR_UNUSED,
-                           size_t *skip_r ATTR_UNUSED,
-                          const char **token_r ATTR_UNUSED,
-                          const char **error_r ATTR_UNUSED)
+lang_tokenizer_generic_next(struct lang_tokenizer *_tok ATTR_UNUSED,
+                           const unsigned char *data ATTR_UNUSED,
+                            size_t size ATTR_UNUSED,
+                            size_t *skip_r ATTR_UNUSED,
+                           const char **token_r ATTR_UNUSED,
+                           const char **error_r ATTR_UNUSED)
 {
        i_unreached();
 }
 
-static const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs = {
-       fts_tokenizer_generic_create,
-       fts_tokenizer_generic_destroy,
-       fts_tokenizer_generic_reset,
-       fts_tokenizer_generic_next
+static const struct lang_tokenizer_vfuncs generic_tokenizer_vfuncs = {
+       lang_tokenizer_generic_create,
+       lang_tokenizer_generic_destroy,
+       lang_tokenizer_generic_reset,
+       lang_tokenizer_generic_next
 };
 
-static const struct fts_tokenizer fts_tokenizer_generic_real = {
+static const struct lang_tokenizer lang_tokenizer_generic_real = {
        .name = "generic",
        .v = &generic_tokenizer_vfuncs
 };
-const struct fts_tokenizer *fts_tokenizer_generic = &fts_tokenizer_generic_real;
+const struct lang_tokenizer *lang_tokenizer_generic = &lang_tokenizer_generic_real;
 
-const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_simple = {
-       fts_tokenizer_generic_create,
-       fts_tokenizer_generic_destroy,
-       fts_tokenizer_generic_reset,
-       fts_tokenizer_generic_simple_next
+const struct lang_tokenizer_vfuncs generic_tokenizer_vfuncs_simple = {
+       lang_tokenizer_generic_create,
+       lang_tokenizer_generic_destroy,
+       lang_tokenizer_generic_reset,
+       lang_tokenizer_generic_simple_next
 };
-const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29 = {
-       fts_tokenizer_generic_create,
-       fts_tokenizer_generic_destroy,
-       fts_tokenizer_generic_reset,
-       fts_tokenizer_generic_tr29_next
+const struct lang_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29 = {
+       lang_tokenizer_generic_create,
+       lang_tokenizer_generic_destroy,
+       lang_tokenizer_generic_reset,
+       lang_tokenizer_generic_tr29_next
 };
index 8d9be34bd2ad1bfbf7ccb1e667da358cbe6ef2be..6ba11ee1c276771e24519bc552a8a964e8d3f5e6 100644 (file)
@@ -1,35 +1,35 @@
-#ifndef FTS_TOKENIZER_PRIVATE_H
-#define FTS_TOKENIZER_PRIVATE_H
+#ifndef LANG_TOKENIZER_PRIVATE_H
+#define LANG_TOKENIZER_PRIVATE_H
 
 #include "lang-tokenizer.h"
 
-#define FTS_TOKENIZER_CLASSES_NR 2
+#define LANG_TOKENIZER_CLASSES_NR 2
 
-struct fts_tokenizer_vfuncs {
+struct lang_tokenizer_vfuncs {
        int (*create)(const char *const *settings,
-                     struct fts_tokenizer **tokenizer_r, const char **error_r);
-       void (*destroy)(struct fts_tokenizer *tok);
+                     struct lang_tokenizer **tokenizer_r, const char **error_r);
+       void (*destroy)(struct lang_tokenizer *tok);
 
-       void (*reset)(struct fts_tokenizer *tok);
-       int (*next)(struct fts_tokenizer *tok, const unsigned char *data,
+       void (*reset)(struct lang_tokenizer *tok);
+       int (*next)(struct lang_tokenizer *tok, const unsigned char *data,
                    size_t size, size_t *skip_r, const char **token_r,
                    const char **error_r);
 };
 
-enum fts_tokenizer_parent_state {
-       FTS_TOKENIZER_PARENT_STATE_ADD_DATA = 0,
-       FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT,
-       FTS_TOKENIZER_PARENT_STATE_FINALIZE
+enum lang_tokenizer_parent_state {
+       LANG_TOKENIZER_PARENT_STATE_ADD_DATA = 0,
+       LANG_TOKENIZER_PARENT_STATE_NEXT_OUTPUT,
+       LANG_TOKENIZER_PARENT_STATE_FINALIZE
 };
 
-struct fts_tokenizer {
+struct lang_tokenizer {
        const char *name;
-       const struct fts_tokenizer_vfuncs *v;
+       const struct lang_tokenizer_vfuncs *v;
        int refcount;
 
-       struct fts_tokenizer *parent;
+       struct lang_tokenizer *parent;
        buffer_t *parent_input;
-       enum fts_tokenizer_parent_state parent_state;
+       enum lang_tokenizer_parent_state parent_state;
 
        const unsigned char *prev_data;
        size_t prev_size;
@@ -46,7 +46,7 @@ struct fts_tokenizer {
        bool finalize_parent_pending;
 };
 
-void fts_tokenizer_register(const struct fts_tokenizer *tok_class);
-void fts_tokenizer_unregister(const struct fts_tokenizer *tok_class);
+void lang_tokenizer_register(const struct lang_tokenizer *tok_class);
+void lang_tokenizer_unregister(const struct lang_tokenizer *tok_class);
 
 #endif
index ac6b8bf48af90a8ebefd600d3f992a4449b31017..b732765413cdf352b6bf3c4d1aa8abe3773ab6c8 100644 (file)
@@ -8,65 +8,65 @@
 #include "lang-tokenizer.h"
 #include "lang-tokenizer-private.h"
 
-static ARRAY(const struct fts_tokenizer *) fts_tokenizer_classes;
+static ARRAY(const struct lang_tokenizer *) lang_tokenizer_classes;
 
-void fts_tokenizers_init(void)
+void lang_tokenizers_init(void)
 {
-       if (!array_is_created(&fts_tokenizer_classes)) {
-               fts_tokenizer_register(fts_tokenizer_generic);
-               fts_tokenizer_register(fts_tokenizer_email_address);
+       if (!array_is_created(&lang_tokenizer_classes)) {
+               lang_tokenizer_register(lang_tokenizer_generic);
+               lang_tokenizer_register(lang_tokenizer_email_address);
        }
 }
 
-void fts_tokenizers_deinit(void)
+void lang_tokenizers_deinit(void)
 {
-       if (array_is_created(&fts_tokenizer_classes))
-               array_free(&fts_tokenizer_classes);
+       if (array_is_created(&lang_tokenizer_classes))
+               array_free(&lang_tokenizer_classes);
 }
 
 /* private */
-void fts_tokenizer_register(const struct fts_tokenizer *tok_class)
+void lang_tokenizer_register(const struct lang_tokenizer *tok_class)
 {
-       if (!array_is_created(&fts_tokenizer_classes))
-               i_array_init(&fts_tokenizer_classes, FTS_TOKENIZER_CLASSES_NR);
-       array_push_back(&fts_tokenizer_classes, &tok_class);
+       if (!array_is_created(&lang_tokenizer_classes))
+               i_array_init(&lang_tokenizer_classes, LANG_TOKENIZER_CLASSES_NR);
+       array_push_back(&lang_tokenizer_classes, &tok_class);
 }
 
 /* private */
-void fts_tokenizer_unregister(const struct fts_tokenizer *tok_class)
+void lang_tokenizer_unregister(const struct lang_tokenizer *tok_class)
 {
-       const struct fts_tokenizer *const *tp;
+       const struct lang_tokenizer *const *tp;
        unsigned int idx;
 
-       array_foreach(&fts_tokenizer_classes, tp) {
+       array_foreach(&lang_tokenizer_classes, tp) {
                if (strcmp((*tp)->name, tok_class->name) == 0) {
-                       idx = array_foreach_idx(&fts_tokenizer_classes, tp);
-                       array_delete(&fts_tokenizer_classes, idx, 1);
-                       if (array_count(&fts_tokenizer_classes) == 0)
-                               array_free(&fts_tokenizer_classes);
+                       idx = array_foreach_idx(&lang_tokenizer_classes, tp);
+                       array_delete(&lang_tokenizer_classes, idx, 1);
+                       if (array_count(&lang_tokenizer_classes) == 0)
+                               array_free(&lang_tokenizer_classes);
                        return;
                }
        }
        i_unreached();
 }
 
-const struct fts_tokenizer *fts_tokenizer_find(const char *name)
+const struct lang_tokenizer *lang_tokenizer_find(const char *name)
 {
-       const struct fts_tokenizer *tok;
+       const struct lang_tokenizer *tok;
 
-       array_foreach_elem(&fts_tokenizer_classes, tok) {
+       array_foreach_elem(&lang_tokenizer_classes, tok) {
                if (strcmp(tok->name, name) == 0)
                        return tok;
        }
        return NULL;
 }
 
-const char *fts_tokenizer_name(const struct fts_tokenizer *tok)
+const char *lang_tokenizer_name(const struct lang_tokenizer *tok)
 {
        return tok->name;
 }
 
-static void fts_tokenizer_self_reset(struct fts_tokenizer *tok)
+static void lang_tokenizer_self_reset(struct lang_tokenizer *tok)
 {
        tok->prev_data = NULL;
        tok->prev_size = 0;
@@ -74,13 +74,13 @@ static void fts_tokenizer_self_reset(struct fts_tokenizer *tok)
        tok->prev_reply_finished = TRUE;
 }
 
-int fts_tokenizer_create(const struct fts_tokenizer *tok_class,
-                        struct fts_tokenizer *parent,
-                        const char *const *settings,
-                        struct fts_tokenizer **tokenizer_r,
-                        const char **error_r)
+int lang_tokenizer_create(const struct lang_tokenizer *tok_class,
+                         struct lang_tokenizer *parent,
+                         const char *const *settings,
+                         struct lang_tokenizer **tokenizer_r,
+                         const char **error_r)
 {
-       struct fts_tokenizer *tok;
+       struct lang_tokenizer *tok;
        const char *empty_settings = NULL;
 
        i_assert(settings == NULL || str_array_length(settings) % 2 == 0);
@@ -93,9 +93,9 @@ int fts_tokenizer_create(const struct fts_tokenizer *tok_class,
                return -1;
        }
        tok->refcount = 1;
-       fts_tokenizer_self_reset(tok);
+       lang_tokenizer_self_reset(tok);
        if (parent != NULL) {
-               fts_tokenizer_ref(parent);
+               lang_tokenizer_ref(parent);
                tok->parent = parent;
                tok->parent_input = buffer_create_dynamic(default_pool, 128);
        }
@@ -104,16 +104,16 @@ int fts_tokenizer_create(const struct fts_tokenizer *tok_class,
        return 0;
 }
 
-void fts_tokenizer_ref(struct fts_tokenizer *tok)
+void lang_tokenizer_ref(struct lang_tokenizer *tok)
 {
        i_assert(tok->refcount > 0);
 
        tok->refcount++;
 }
 
-void fts_tokenizer_unref(struct fts_tokenizer **_tok)
+void lang_tokenizer_unref(struct lang_tokenizer **_tok)
 {
-       struct fts_tokenizer *tok = *_tok;
+       struct lang_tokenizer *tok = *_tok;
 
        i_assert(tok->refcount > 0);
        *_tok = NULL;
@@ -123,14 +123,14 @@ void fts_tokenizer_unref(struct fts_tokenizer **_tok)
 
        buffer_free(&tok->parent_input);
        if (tok->parent != NULL)
-               fts_tokenizer_unref(&tok->parent);
+               lang_tokenizer_unref(&tok->parent);
        tok->v->destroy(tok);
 }
 
 static int
-fts_tokenizer_next_self(struct fts_tokenizer *tok,
-                        const unsigned char *data, size_t size,
-                        const char **token_r, const char **error_r)
+lang_tokenizer_next_self(struct lang_tokenizer *tok,
+                         const unsigned char *data, size_t size,
+                         const char **token_r, const char **error_r)
 {
        int ret = 0;
        size_t skip = 0;
@@ -170,27 +170,27 @@ fts_tokenizer_next_self(struct fts_tokenizer *tok,
        } else if (ret == 0) {
                /* Need more data to get the next token. The next call will
                   provide a whole new data block, so reset the prev_* state. */
-               fts_tokenizer_self_reset(tok);
+               lang_tokenizer_self_reset(tok);
        }
        return ret;
 }
 
-void fts_tokenizer_reset(struct fts_tokenizer *tok)
+void lang_tokenizer_reset(struct lang_tokenizer *tok)
 {
        tok->v->reset(tok);
-       fts_tokenizer_self_reset(tok);
+       lang_tokenizer_self_reset(tok);
 }
 
-int fts_tokenizer_next(struct fts_tokenizer *tok,
-                      const unsigned char *data, size_t size,
-                      const char **token_r, const char **error_r)
+int lang_tokenizer_next(struct lang_tokenizer *tok,
+                       const unsigned char *data, size_t size,
+                       const char **token_r, const char **error_r)
 {
        int ret;
 
        switch (tok->parent_state) {
-       case FTS_TOKENIZER_PARENT_STATE_ADD_DATA:
+       case LANG_TOKENIZER_PARENT_STATE_ADD_DATA:
                /* Try to get the next token using this tokenizer */
-               ret = fts_tokenizer_next_self(tok, data, size, token_r, error_r);
+               ret = lang_tokenizer_next_self(tok, data, size, token_r, error_r);
                if (ret <= 0) {
                        /* error / more data needed */
                        if (ret == 0 && size == 0 &&
@@ -200,8 +200,8 @@ int fts_tokenizer_next(struct fts_tokenizer *tok,
                                   tokenizer still needs to be finalized. */
                                tok->finalize_parent_pending = FALSE;
                                tok->parent_state =
-                                       FTS_TOKENIZER_PARENT_STATE_FINALIZE;
-                               return fts_tokenizer_next(tok, NULL, 0, token_r, error_r);
+                                       LANG_TOKENIZER_PARENT_STATE_FINALIZE;
+                               return lang_tokenizer_next(tok, NULL, 0, token_r, error_r);
                        }
                        break;
                }
@@ -221,19 +221,19 @@ int fts_tokenizer_next(struct fts_tokenizer *tok,
                buffer_append(tok->parent_input, *token_r, strlen(*token_r));
                tok->parent_state++;
                /* fall through */
-       case FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT:
+       case LANG_TOKENIZER_PARENT_STATE_NEXT_OUTPUT:
                /* Return the next token from parent tokenizer */
-               ret = fts_tokenizer_next(tok->parent, tok->parent_input->data,
+               ret = lang_tokenizer_next(tok->parent, tok->parent_input->data,
                                         tok->parent_input->used, token_r, error_r);
                if (ret != 0)
                        break;
                tok->parent_state++;
                /* fall through */
-       case FTS_TOKENIZER_PARENT_STATE_FINALIZE:
+       case LANG_TOKENIZER_PARENT_STATE_FINALIZE:
                /* No more input is coming from the child tokenizer. Return the
                   final token(s) from the parent tokenizer. */
                if (!tok->stream_to_parents || size == 0) {
-                       ret = fts_tokenizer_next(tok->parent, NULL, 0,
+                       ret = lang_tokenizer_next(tok->parent, NULL, 0,
                                                 token_r, error_r);
                        if (ret != 0)
                                break;
@@ -243,8 +243,8 @@ int fts_tokenizer_next(struct fts_tokenizer *tok,
                /* We're finished handling the previous child token. See if
                   there are more child tokens available with this same data
                   input. */
-               tok->parent_state = FTS_TOKENIZER_PARENT_STATE_ADD_DATA;
-               return fts_tokenizer_next(tok, data, size, token_r, error_r);
+               tok->parent_state = LANG_TOKENIZER_PARENT_STATE_ADD_DATA;
+               return lang_tokenizer_next(tok, data, size, token_r, error_r);
        default:
                i_unreached();
        }
@@ -253,8 +253,8 @@ int fts_tokenizer_next(struct fts_tokenizer *tok,
        return ret;
 }
 
-int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r,
-                       const char **error_r)
+int lang_tokenizer_final(struct lang_tokenizer *tok, const char **token_r,
+                        const char **error_r)
 {
-       return fts_tokenizer_next(tok, NULL, 0, token_r, error_r);
+       return lang_tokenizer_next(tok, NULL, 0, token_r, error_r);
 }
index 59ccf0703c2c88065bbcc494f700d37f02e183a4..b9572cb5a0d31a7ffc87fc2c14099bd0e40d06ad 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef FTS_TOKENIZER_H
-#define FTS_TOKENIZER_H
+#ifndef LANG_TOKENIZER_H
+#define LANG_TOKENIZER_H
 
 /*
  Settings are given in the form of a const char * const *settings =
    "search" Remove addresses from parent data stream, so they are not processed
    further. Defaults to disabled. Enable by defining the keyword (and any
    value). */
-extern const struct fts_tokenizer *fts_tokenizer_email_address;
+extern const struct lang_tokenizer *lang_tokenizer_email_address;
 
 /* Generic email content tokenizer. Cuts text into tokens. */
 /* Settings:
    "maxlen" Maximum length of token, before an arbitrary cut off is made.
-   Defaults to FTS_DEFAULT_TOKEN_MAX_LENGTH.
+   Defaults to LANG_DEFAULT_TOKEN_MAX_LENGTH.
 
    "algorithm", accepted values are "simple" or "tr29". Defines the
    method for looking for word boundaries. Simple is faster and will
@@ -39,7 +39,7 @@ extern const struct fts_tokenizer *fts_tokenizer_email_address;
    is also significantly slower than simple. The algorithms also
    differ in some details, e.g. simple will cut "a.b" and tr29 will
    not. The default is "simple" */
-extern const struct fts_tokenizer *fts_tokenizer_generic;
+extern const struct lang_tokenizer *lang_tokenizer_generic;
 
 /*
  Tokenizing workflow, find --> create --> filter --> destroy.
@@ -47,41 +47,41 @@ extern const struct fts_tokenizer *fts_tokenizer_generic;
  */
 
 /* Register all built-in tokenizers. */
-void fts_tokenizers_init(void);
-void fts_tokenizers_deinit(void);
+void lang_tokenizers_init(void);
+void lang_tokenizers_deinit(void);
 
-const struct fts_tokenizer *fts_tokenizer_find(const char *name);
+const struct lang_tokenizer *lang_tokenizer_find(const char *name);
 
 /* Create a new tokenizer. The settings are described above. */
-int fts_tokenizer_create(const struct fts_tokenizer *tok_class,
-                        struct fts_tokenizer *parent,
-                        const char *const *settings,
-                        struct fts_tokenizer **tokenizer_r,
-                        const char **error_r);
-void fts_tokenizer_ref(struct fts_tokenizer *tok);
-void fts_tokenizer_unref(struct fts_tokenizer **tok);
+int lang_tokenizer_create(const struct lang_tokenizer *tok_class,
+                         struct lang_tokenizer *parent,
+                         const char *const *settings,
+                         struct lang_tokenizer **tokenizer_r,
+                         const char **error_r);
+void lang_tokenizer_ref(struct lang_tokenizer *tok);
+void lang_tokenizer_unref(struct lang_tokenizer **tok);
 
-/* Reset FTS tokenizer state */
-void fts_tokenizer_reset(struct fts_tokenizer *tok);
+/* Reset lang tokenizer state */
+void lang_tokenizer_reset(struct lang_tokenizer *tok);
 
 /*
    Returns 1 if *token_r was returned, 0 if more data is needed, -1 on error.
 
    This function should be called with the same data+size until it
-   returns 0. After that fts_tokenizer_final() should be called until it
+   returns 0. After that lang_tokenizer_final() should be called until it
    returns 0 to flush out the final token(s).
 
    data must contain only valid complete UTF-8 sequences, but otherwise it
    may be broken into however small pieces. (Input to this function typically
    comes from message-decoder, which returns only complete UTF-8 sequences.) */
 
-int fts_tokenizer_next(struct fts_tokenizer *tok,
-                      const unsigned char *data, size_t size,
-                      const char **token_r, const char **error_r);
-/* Returns same as fts_tokenizer_next(). */
-int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r,
-                       const char **error_r);
+int lang_tokenizer_next(struct lang_tokenizer *tok,
+                       const unsigned char *data, size_t size,
+                       const char **token_r, const char **error_r);
+/* Returns same as lang_tokenizer_next(). */
+int lang_tokenizer_final(struct lang_tokenizer *tok, const char **token_r,
+                        const char **error_r);
 
-const char *fts_tokenizer_name(const struct fts_tokenizer *tok);
+const char *lang_tokenizer_name(const struct lang_tokenizer *tok);
 
 #endif
index b186c802f945d2f817ad21b568e8b1cc67b7214f..26d393021568e19a14e2901631f37fd3c820c4df 100644 (file)
 
 #define DETECT_STR_MAX_LEN 200
 
-struct fts_textcat {
+struct textcat {
        int refcount;
        void *handle;
        char *config_path, *data_dir, *failed;
 };
 
-struct fts_language_list {
+struct language_list {
        pool_t pool;
-       ARRAY_TYPE(fts_language) languages;
-       struct fts_textcat *textcat;
+       ARRAY_TYPE(language) languages;
+       struct textcat *textcat;
        const char *textcat_config;
        const char *textcat_datadir;
 };
 
-pool_t fts_languages_pool;
-ARRAY_TYPE(fts_language) fts_languages;
+pool_t languages_pool;
+ARRAY_TYPE(language) languages;
 #ifdef HAVE_FTS_EXTTEXTCAT
-static struct fts_textcat *fts_textcat_cache = NULL;
+static struct textcat *textcat_cache = NULL;
 #endif
 
 /*  ISO 639-1 alpha 2 codes for languages */
-const struct fts_language fts_languages_builtin [] = {
+const struct language languages_builtin [] = {
        { "da" }, /* Danish */
        { "de" }, /* German */
        { "en" }, /* English */
@@ -58,19 +58,19 @@ const struct fts_language fts_languages_builtin [] = {
        { "tr" }, /* Turkish */
 };
 
-const struct fts_language fts_language_data = {
+const struct language language_data = {
        "data"
 };
 
 #ifdef HAVE_FTS_EXTTEXTCAT
-static void fts_textcat_unref(struct fts_textcat *textcat)
+static void textcat_unref(struct textcat *textcat)
 {
        i_assert(textcat->refcount > 0);
        if (--textcat->refcount > 0)
                return;
 
-       if (textcat == fts_textcat_cache)
-               fts_textcat_cache = NULL;
+       if (textcat == textcat_cache)
+               textcat_cache = NULL;
 
        i_free(textcat->config_path);
        i_free(textcat->data_dir);
@@ -81,58 +81,57 @@ static void fts_textcat_unref(struct fts_textcat *textcat)
 }
 #endif
 
-void fts_languages_init(void)
+void languages_init(void)
 {
        unsigned int i;
-       const struct fts_language *lp;
-
-       fts_languages_pool = pool_alloconly_create("fts_language",
-                                                  sizeof(fts_languages_builtin));
-       p_array_init(&fts_languages, fts_languages_pool,
-                    N_ELEMENTS(fts_languages_builtin));
-       for (i = 0; i < N_ELEMENTS(fts_languages_builtin); i++){
-               lp = &fts_languages_builtin[i];
-               array_push_back(&fts_languages, &lp);
+       const struct language *lp;
+
+       languages_pool = pool_alloconly_create("language",
+                                              sizeof(languages_builtin));
+       p_array_init(&languages, languages_pool, N_ELEMENTS(languages_builtin));
+       for (i = 0; i < N_ELEMENTS(languages_builtin); i++){
+               lp = &languages_builtin[i];
+               array_push_back(&languages, &lp);
        }
 }
 
-void fts_languages_deinit(void)
+void languages_deinit(void)
 {
 #ifdef HAVE_FTS_EXTTEXTCAT
-       if (fts_textcat_cache != NULL)
-               fts_textcat_unref(fts_textcat_cache);
+       if (textcat_cache != NULL)
+               textcat_unref(textcat_cache);
 #endif
-       pool_unref(&fts_languages_pool);
+       pool_unref(&languages_pool);
 }
 
-void fts_language_register(const char *name)
+void language_register(const char *name)
 {
-       struct fts_language *lang;
+       struct language *lang;
 
-       if (fts_language_find(name) != NULL)
+       if (language_find(name) != NULL)
                return;
 
-       lang = p_new(fts_languages_pool, struct fts_language, 1);
-       lang->name = p_strdup(fts_languages_pool, name);
-       array_push_back(&fts_languages, (const struct fts_language **)&lang);
+       lang = p_new(languages_pool, struct language, 1);
+       lang->name = p_strdup(languages_pool, name);
+       array_push_back(&languages, (const struct language **)&lang);
 }
 
-const struct fts_language *fts_language_find(const char *name)
+const struct language *language_find(const char *name)
 {
-       const struct fts_language *lang;
+       const struct language *lang;
 
-       array_foreach_elem(&fts_languages, lang) {
+       array_foreach_elem(&languages, lang) {
                if (strcmp(lang->name, name) == 0)
                        return lang;
        }
        return NULL;
 }
 
-int fts_language_list_init(const char *const *settings,
-                          struct fts_language_list **list_r,
-                          const char **error_r)
+int language_list_init(const char *const *settings,
+                      struct language_list **list_r,
+                      const char **error_r)
 {
-       struct fts_language_list *lp;
+       struct language_list *lp;
        pool_t pool;
        unsigned int i;
        const char *conf = NULL, *data = NULL;
@@ -150,8 +149,8 @@ int fts_language_list_init(const char *const *settings,
                }
        }
 
-       pool = pool_alloconly_create("fts_language_list", 128);
-       lp = p_new(pool, struct fts_language_list, 1);
+       pool = pool_alloconly_create("language_list", 128);
+       lp = p_new(pool, struct language_list, 1);
        lp->pool = pool;
        if (conf != NULL)
                lp->textcat_config = p_strdup(pool, conf);
@@ -166,22 +165,22 @@ int fts_language_list_init(const char *const *settings,
        return 0;
 }
 
-void fts_language_list_deinit(struct fts_language_list **list)
+void language_list_deinit(struct language_list **list)
 {
-       struct fts_language_list *lp = *list;
+       struct language_list *lp = *list;
 
        *list = NULL;
 #ifdef HAVE_FTS_EXTTEXTCAT
        if (lp->textcat != NULL)
-               fts_textcat_unref(lp->textcat);
+               textcat_unref(lp->textcat);
 #endif
        pool_unref(&lp->pool);
 }
 
-static const struct fts_language *
-fts_language_list_find(struct fts_language_list *list, const char *name)
+static const struct language *
+language_list_find(struct language_list *list, const char *name)
 {
-       const struct fts_language *lang;
+       const struct language *lang;
 
        array_foreach_elem(&list->languages, lang) {
                if (strcmp(lang->name, name) == 0)
@@ -190,52 +189,52 @@ fts_language_list_find(struct fts_language_list *list, const char *name)
        return NULL;
 }
 
-void fts_language_list_add(struct fts_language_list *list,
-                          const struct fts_language *lang)
+void language_list_add(struct language_list *list,
+                      const struct language *lang)
 {
-       i_assert(fts_language_list_find(list, lang->name) == NULL);
+       i_assert(language_list_find(list, lang->name) == NULL);
        array_push_back(&list->languages, &lang);
 }
 
-bool fts_language_list_add_names(struct fts_language_list *list,
-                                const char *names,
-                                const char **unknown_name_r)
+bool language_list_add_names(struct language_list *list,
+                            const char *names,
+                            const char **unknown_name_r)
 {
        const char *const *langs;
-       const struct fts_language *lang;
+       const struct language *lang;
 
        for (langs = t_strsplit_spaces(names, ", "); *langs != NULL; langs++) {
-               lang = fts_language_find(*langs);
+               lang = language_find(*langs);
                if (lang == NULL) {
                        /* unknown language */
                        *unknown_name_r = *langs;
                        return FALSE;
                }
-               if (fts_language_list_find(list, lang->name) == NULL)
-                       fts_language_list_add(list, lang);
+               if (language_list_find(list, lang->name) == NULL)
+                       language_list_add(list, lang);
        }
        return TRUE;
 }
 
-const ARRAY_TYPE(fts_language) *
-fts_language_list_get_all(struct fts_language_list *list)
+const ARRAY_TYPE(language) *
+language_list_get_all(struct language_list *list)
 {
        return &list->languages;
 }
 
-const struct fts_language *
-fts_language_list_get_first(struct fts_language_list *list)
+const struct language *
+language_list_get_first(struct language_list *list)
 {
-       const struct fts_language *const *langp;
+       const struct language *const *langp;
 
        langp = array_front(&list->languages);
        return *langp;
 }
 
 #ifdef HAVE_FTS_EXTTEXTCAT
-static bool fts_language_match_lists(struct fts_language_list *list,
-                                     candidate_t *candp, int candp_len,
-                                     const struct fts_language **lang_r)
+static bool language_match_lists(struct language_list *list,
+                                 candidate_t *candp, int candp_len,
+                                 const struct language **lang_r)
 {
        const char *name;
 
@@ -247,7 +246,7 @@ static bool fts_language_match_lists(struct fts_language_list *list,
                /* For Norwegian we treat both bokmal and nynorsk as "no". */
                if (strcmp(name, "nb") == 0 || strcmp(name, "nn") == 0)
                        name = "no";
-               if ((*lang_r = fts_language_list_find(list, name)) != NULL)
+               if ((*lang_r = language_list_find(list, name)) != NULL)
                        return TRUE;
        }
        return FALSE;
@@ -255,8 +254,8 @@ static bool fts_language_match_lists(struct fts_language_list *list,
 #endif
 
 #ifdef HAVE_FTS_EXTTEXTCAT
-static int fts_language_textcat_init(struct fts_language_list *list,
-                                    const char **error_r)
+static int language_textcat_init(struct language_list *list,
+                                const char **error_r)
 {
        const char *config_path;
        const char *data_dir;
@@ -274,26 +273,26 @@ static int fts_language_textcat_init(struct fts_language_list *list,
                TEXTCAT_DATADIR"/fpdb.conf";
        data_dir = list->textcat_datadir != NULL ? list->textcat_datadir :
                TEXTCAT_DATADIR"/";
-       if (fts_textcat_cache != NULL) {
-               if (strcmp(fts_textcat_cache->config_path, config_path) == 0 &&
-                   strcmp(fts_textcat_cache->data_dir, data_dir) == 0) {
-                       list->textcat = fts_textcat_cache;
+       if (textcat_cache != NULL) {
+               if (strcmp(textcat_cache->config_path, config_path) == 0 &&
+                   strcmp(textcat_cache->data_dir, data_dir) == 0) {
+                       list->textcat = textcat_cache;
                        list->textcat->refcount++;
                        return 0;
                }
-               fts_textcat_unref(fts_textcat_cache);
+               textcat_unref(textcat_cache);
        }
 
-       fts_textcat_cache = list->textcat = i_new(struct fts_textcat, 1);
-       fts_textcat_cache->refcount = 2;
-       fts_textcat_cache->config_path = i_strdup(config_path);
-       fts_textcat_cache->data_dir = i_strdup(data_dir);
-       fts_textcat_cache->handle = special_textcat_Init(config_path, data_dir);
-       if (fts_textcat_cache->handle == NULL) {
-               fts_textcat_cache->failed = i_strdup_printf(
+       textcat_cache = list->textcat = i_new(struct textcat, 1);
+       textcat_cache->refcount = 2;
+       textcat_cache->config_path = i_strdup(config_path);
+       textcat_cache->data_dir = i_strdup(data_dir);
+       textcat_cache->handle = special_textcat_Init(config_path, data_dir);
+       if (textcat_cache->handle == NULL) {
+               textcat_cache->failed = i_strdup_printf(
                        "special_textcat_Init(%s, %s) failed",
                        config_path, data_dir);
-               *error_r = fts_textcat_cache->failed;
+               *error_r = textcat_cache->failed;
                return -1;
        }
        /* The textcat minimum document size could be set here. It
@@ -302,20 +301,20 @@ static int fts_language_textcat_init(struct fts_language_list *list,
 }
 #endif
 
-static enum fts_language_result
-fts_language_detect_textcat(struct fts_language_list *list ATTR_UNUSED,
-                           const unsigned char *text ATTR_UNUSED,
-                           size_t size ATTR_UNUSED,
-                           const struct fts_language **lang_r ATTR_UNUSED,
-                           const char **error_r ATTR_UNUSED)
+static enum language_result
+language_detect_textcat(struct language_list *list ATTR_UNUSED,
+                       const unsigned char *text ATTR_UNUSED,
+                       size_t size ATTR_UNUSED,
+                       const struct language **lang_r ATTR_UNUSED,
+                       const char **error_r ATTR_UNUSED)
 {
 #ifdef HAVE_FTS_EXTTEXTCAT
        candidate_t *candp; /* textcat candidate result array pointer */
        int cnt;
        bool match = FALSE;
 
-       if (fts_language_textcat_init(list, error_r) < 0)
-               return FTS_LANGUAGE_RESULT_ERROR;
+       if (language_textcat_init(list, error_r) < 0)
+               return LANGUAGE_RESULT_ERROR;
 
        candp = textcat_GetClassifyFullOutput(list->textcat->handle);
        if (candp == NULL)
@@ -324,45 +323,45 @@ fts_language_detect_textcat(struct fts_language_list *list ATTR_UNUSED,
                                   I_MIN(size, DETECT_STR_MAX_LEN), candp);
        if (cnt > 0) {
                T_BEGIN {
-                       match = fts_language_match_lists(list, candp, cnt, lang_r);
+                       match = language_match_lists(list, candp, cnt, lang_r);
                } T_END;
                textcat_ReleaseClassifyFullOutput(list->textcat->handle, candp);
                if (match)
-                       return FTS_LANGUAGE_RESULT_OK;
+                       return LANGUAGE_RESULT_OK;
                else
-                       return FTS_LANGUAGE_RESULT_UNKNOWN;
+                       return LANGUAGE_RESULT_UNKNOWN;
        } else {
                textcat_ReleaseClassifyFullOutput(list->textcat->handle, candp);
                switch (cnt) {
                case TEXTCAT_RESULT_SHORT:
                        i_assert(size < DETECT_STR_MAX_LEN);
-                       return FTS_LANGUAGE_RESULT_SHORT;
+                       return LANGUAGE_RESULT_SHORT;
                case TEXTCAT_RESULT_UNKNOWN:
-                       return FTS_LANGUAGE_RESULT_UNKNOWN;
+                       return LANGUAGE_RESULT_UNKNOWN;
                default:
                        i_unreached();
                }
        }
 #else
-       return FTS_LANGUAGE_RESULT_UNKNOWN;
+       return LANGUAGE_RESULT_UNKNOWN;
 #endif
 }
 
-enum fts_language_result
-fts_language_detect(struct fts_language_list *list,
-                   const unsigned char *text ATTR_UNUSED,
-                   size_t size ATTR_UNUSED,
-                   const struct fts_language **lang_r,
-                   const char **error_r)
+enum language_result
+language_detect(struct language_list *list,
+               const unsigned char *text ATTR_UNUSED,
+               size_t size ATTR_UNUSED,
+               const struct language **lang_r,
+               const char **error_r)
 {
        i_assert(array_count(&list->languages) > 0);
 
        /* if there's only a single wanted language, return it always. */
        if (array_count(&list->languages) == 1) {
-               const struct fts_language *const *langp =
+               const struct language *const *langp =
                        array_front(&list->languages);
                *lang_r = *langp;
-               return FTS_LANGUAGE_RESULT_OK;
+               return LANGUAGE_RESULT_OK;
        }
-       return fts_language_detect_textcat(list, text, size, lang_r, error_r);
+       return language_detect_textcat(list, text, size, lang_r, error_r);
 }
index 884998f07f43ed006faa8573e3c8b88d80bad3ed..91c3665cd16a550fa2e4f8e92204444fb4a32ae2 100644 (file)
@@ -1,72 +1,71 @@
-#ifndef FTS_LANGUAGE_H
-#define FTS_LANGUAGE_H
+#ifndef LANGUAGE_H
+#define LANGUAGE_H
 
-struct fts_language_list;
+struct language_list;
 
-enum fts_language_result {
+enum language_result {
        /* Provided sample is too short. */
-       FTS_LANGUAGE_RESULT_SHORT,
+       LANGUAGE_RESULT_SHORT,
        /* Language is unknown or not in the provided list . */
-       FTS_LANGUAGE_RESULT_UNKNOWN,
+       LANGUAGE_RESULT_UNKNOWN,
 
-       FTS_LANGUAGE_RESULT_OK,
+       LANGUAGE_RESULT_OK,
        /* textcat library initialization failed. */
-       FTS_LANGUAGE_RESULT_ERROR
+       LANGUAGE_RESULT_ERROR
 };
 
-struct fts_language {
+struct language {
        /* Two-letter language name lowercased, e.g. "en" */
        const char *name;
 };
-ARRAY_DEFINE_TYPE(fts_language, const struct fts_language *);
+ARRAY_DEFINE_TYPE(language, const struct language *);
 
 /* Used for raw data that is indexed. This data shouldn't go through any
    language-specific filters. */
-extern const struct fts_language fts_language_data;
+extern const struct language language_data;
 
 /*
   Language module API.
 */
-void fts_languages_init(void);
-void fts_languages_deinit(void);
+void languages_init(void);
+void languages_deinit(void);
 /* Add a language to the list of supported languages. */
-void fts_language_register(const char *name);
+void language_register(const char *name);
 /* Find a specified language by name. This finds from the internal list of
    supported languages. */
-const struct fts_language *fts_language_find(const char *name);
+const struct language *language_find(const char *name);
 
 /*
   Language list API
 */
-int fts_language_list_init(const char *const *settings,
-                          struct fts_language_list **list_r,
-                          const char **error_r);
-void fts_language_list_deinit(struct fts_language_list **list);
+int language_list_init(const char *const *settings,
+                      struct language_list **list_r,
+                      const char **error_r);
+void language_list_deinit(struct language_list **list);
 
 /* Add a language to the list of wanted languages. */
-void fts_language_list_add(struct fts_language_list *list,
-                          const struct fts_language *lang);
+void language_list_add(struct language_list *list,
+                      const struct language *lang);
 /* Add wanted languages from a space-separated list of language names.
    Duplicates are ignored. Returns TRUE if ok, FALSE and unknown_name if an
    unknown language was found from the list. */
-bool fts_language_list_add_names(struct fts_language_list *list,
-                                const char *names,
-                                const char **unknown_name_r);
+bool language_list_add_names(struct language_list *list,
+                            const char *names,
+                            const char **unknown_name_r);
 
 /* Return an array of all wanted languages. */
-const ARRAY_TYPE(fts_language) *
-fts_language_list_get_all(struct fts_language_list *list);
+const ARRAY_TYPE(language) * language_list_get_all(struct language_list *list);
 /* Returns the first wanted language (default language). */
-const struct fts_language *
-fts_language_list_get_first(struct fts_language_list *list);
+const struct language *
+language_list_get_first(struct language_list *list);
 
 /* If text was detected to be one of the languages in the list,
-   returns FTS_LANGUAGE_RESULT_OK and (a pointer to) the language (in
-   the list). error_r is set for FTS_LANGUAGE_RESULT_ERROR. */
-enum fts_language_result
-fts_language_detect(struct fts_language_list *list,
-                   const unsigned char *text, size_t size,
-                   const struct fts_language **lang_r,
-                   const char **error_r);
+   returns LANGUAGE_RESULT_OK and (a pointer to) the language (in
+   the list). error_r is set for LANGUAGE_RESULT_ERROR. */
+enum language_result
+language_detect(struct language_list *list,
+               const unsigned char *text, size_t size,
+               const struct language **lang_r,
+               const char **error_r);
 
 #endif
index 00014a2f2e9efcc8c326115a14e4195e998a1f96..7d240c6e4121fa76d58fb7d0ec6df2a777a3c337 100644 (file)
 #include <stdio.h>
 
 static const char *const stopword_settings[] = {"stopwords_dir", TEST_STOPWORDS_DIR, NULL};
-static struct fts_language english_language = { .name = "en" };
-static struct fts_language french_language = { .name = "fr" };
-static struct fts_language norwegian_language = { .name = "no" };
+static struct language english_language = { .name = "en" };
+static struct language french_language = { .name = "fr" };
+static struct language norwegian_language = { .name = "no" };
 #if defined(HAVE_LIBICU) && defined(HAVE_FTS_STEMMER)
-static struct fts_language swedish_language = { .name = "sv" };
+static struct language swedish_language = { .name = "sv" };
 #endif
 
-static void test_fts_filter_find(void)
+static void test_lang_filter_find(void)
 {
-       test_begin("fts filter find");
-       test_assert(fts_filter_find("stopwords") == fts_filter_stopwords);
-       test_assert(fts_filter_find("snowball") == fts_filter_stemmer_snowball);
-       test_assert(fts_filter_find("normalizer-icu") == fts_filter_normalizer_icu);
-       test_assert(fts_filter_find("lowercase") == fts_filter_lowercase);
-       test_assert(fts_filter_find("contractions") == fts_filter_contractions);
+       test_begin("lang filter find");
+       test_assert(lang_filter_find("stopwords") == lang_filter_stopwords);
+       test_assert(lang_filter_find("snowball") == lang_filter_stemmer_snowball);
+       test_assert(lang_filter_find("normalizer-icu") == lang_filter_normalizer_icu);
+       test_assert(lang_filter_find("lowercase") == lang_filter_lowercase);
+       test_assert(lang_filter_find("contractions") == lang_filter_contractions);
        test_end();
 }
 
 
-static void test_fts_filter_contractions_fail(void)
+static void test_lang_filter_contractions_fail(void)
 {
 
-       struct fts_filter *filter;
+       struct lang_filter *filter;
        const char *error;
 
-       test_begin("fts filter contractions, unsupported language");
-       test_assert(fts_filter_create(fts_filter_contractions, NULL, &english_language, NULL, &filter, &error) != 0);
+       test_begin("lang filter contractions, unsupported language");
+       test_assert(lang_filter_create(lang_filter_contractions, NULL, &english_language, NULL, &filter, &error) != 0);
        test_assert(error != NULL);
        test_end();
 }
 
-static void test_fts_filter_contractions_fr(void)
+static void test_lang_filter_contractions_fr(void)
 {
        static const struct {
                const char *input;
@@ -68,29 +68,29 @@ static void test_fts_filter_contractions_fr(void)
                { "quelqu'un", "quelqu'un" },
                { "l'esprit", "esprit" }
        };
-       struct fts_filter *filter;
+       struct lang_filter *filter;
        const char *error;
        const char *token;
        unsigned int i;
        int ret;
 
-       test_begin("fts filter contractions, French");
-       test_assert(fts_filter_create(fts_filter_contractions, NULL, &french_language, NULL, &filter, &error) == 0);
+       test_begin("lang filter contractions, French");
+       test_assert(lang_filter_create(lang_filter_contractions, NULL, &french_language, NULL, &filter, &error) == 0);
 
        for (i = 0; i < N_ELEMENTS(tests); i++) {
                token = tests[i].input;
-               ret = fts_filter_filter(filter, &token, &error);
+               ret = lang_filter(filter, &token, &error);
                test_assert(ret >= 0);
                if (ret > 0)
                        test_assert_idx(strcmp(token, tests[i].output) == 0, i);
                else if (ret == 0)
                        test_assert_idx(token == NULL && tests[i].output == NULL, i);
        }
-       fts_filter_unref(&filter);
+       lang_filter_unref(&filter);
        test_end();
 }
 
-static void test_fts_filter_lowercase(void)
+static void test_lang_filter_lowercase(void)
 {
        static const struct {
                const char *input;
@@ -100,25 +100,25 @@ static void test_fts_filter_lowercase(void)
                { "FOO", "foo" },
                { "fOo", "foo" }
        };
-       struct fts_filter *filter;
+       struct lang_filter *filter;
        const char *error;
        const char *token;
        unsigned int i;
 
-       test_begin("fts filter lowercase");
-       test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0);
+       test_begin("lang filter lowercase");
+       test_assert(lang_filter_create(lang_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0);
 
        for (i = 0; i < N_ELEMENTS(tests); i++) {
                token = tests[i].input;
-               test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 &&
+               test_assert_idx(lang_filter(filter, &token, &error) > 0 &&
                                strcmp(token, tests[i].output) == 0, 0);
        }
-       fts_filter_unref(&filter);
+       lang_filter_unref(&filter);
        test_end();
 }
 
 #ifdef HAVE_LIBICU
-static void test_fts_filter_lowercase_utf8(void)
+static void test_lang_filter_lowercase_utf8(void)
 {
        static const struct {
                const char *input;
@@ -128,24 +128,24 @@ static void test_fts_filter_lowercase_utf8(void)
                { "F\xC3\x85\xC3\x85", "f\xC3\xA5\xC3\xA5" },
                { "F\xC3\x85\xC3\xA5", "f\xC3\xA5\xC3\xA5" }
        };
-       struct fts_filter *filter;
+       struct lang_filter *filter;
        const char *error;
        const char *token;
        unsigned int i;
 
-       test_begin("fts filter lowercase, UTF8");
-       test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0);
+       test_begin("lang filter lowercase, UTF8");
+       test_assert(lang_filter_create(lang_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0);
 
        for (i = 0; i < N_ELEMENTS(tests); i++) {
                token = tests[i].input;
-               test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 &&
+               test_assert_idx(lang_filter(filter, &token, &error) > 0 &&
                                strcmp(token, tests[i].output) == 0, 0);
        }
-       fts_filter_unref(&filter);
+       lang_filter_unref(&filter);
        test_end();
 }
 
-static void test_fts_filter_lowercase_too_long_utf8(void)
+static void test_lang_filter_lowercase_too_long_utf8(void)
 {
        static const struct {
                const char *input;
@@ -156,28 +156,28 @@ static void test_fts_filter_lowercase_too_long_utf8(void)
                { "abc\xC3\x85""defghijklmnopqrstuvwxyz", "abc\xC3\xA5""defghijklmnopqrstuvw" },
                { "abcdefghijklmnopqrstuvwx\xC3\x85", "abcdefghijklmnopqrstuvwx" }
        };
-       struct fts_filter *filter;
+       struct lang_filter *filter;
        const char *error;
        const char *token;
        const char * const settings[] = {"maxlen", "25", NULL};
        unsigned int i;
 
-       test_begin("fts filter lowercase, too long UTF8");
-       test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, settings, &filter, &error) == 0);
+       test_begin("lang filter lowercase, too long UTF8");
+       test_assert(lang_filter_create(lang_filter_lowercase, NULL, &english_language, settings, &filter, &error) == 0);
 
        for (i = 0; i < N_ELEMENTS(tests); i++) {
                token = tests[i].input;
-               test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 &&
+               test_assert_idx(lang_filter(filter, &token, &error) > 0 &&
                                strcmp(token, tests[i].output) == 0, 0);
        }
-       fts_filter_unref(&filter);
+       lang_filter_unref(&filter);
        test_end();
 }
 #endif
 
-static void test_fts_filter_stopwords_eng(void)
+static void test_lang_filter_stopwords_eng(void)
 {
-       struct fts_filter *filter;
+       struct lang_filter *filter;
        const char *error;
        int ret;
        const char *input[] = {"an", "elephant", "and", "a", "bear",
@@ -189,14 +189,14 @@ static void test_fts_filter_stopwords_eng(void)
        const char **ip, **op;
        const char *token;
 
-       test_begin("fts filter stopwords, English");
-       test_assert(fts_filter_create(fts_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0);
+       test_begin("lang filter stopwords, English");
+       test_assert(lang_filter_create(lang_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0);
 
        ip = input;
        op = output;
        while (*ip != NULL) {
                token = *ip;
-               ret = fts_filter_filter(filter, &token, &error);
+               ret = lang_filter(filter, &token, &error);
                if (ret <= 0) {
                        test_assert(ret == 0);
                        test_assert(*op == NULL);
@@ -208,15 +208,15 @@ static void test_fts_filter_stopwords_eng(void)
                ip++;
        }
 
-       fts_filter_unref(&filter);
+       lang_filter_unref(&filter);
        test_assert(filter == NULL);
        test_end();
 }
 
-static void test_fts_filter_stopwords_fin(void)
+static void test_lang_filter_stopwords_fin(void)
 {
-       const struct fts_language finnish = { .name = "fi" };
-       struct fts_filter *filter;
+       const struct language finnish = { .name = "fi" };
+       struct lang_filter *filter;
        const char *error;
        int ret;
        const char *input[] = {"olla", "vaiko", "eik\xC3\xB6", "olla",
@@ -230,14 +230,14 @@ static void test_fts_filter_stopwords_fin(void)
        const char **ip, **op;
        const char *token;
 
-       test_begin("fts filter stopwords, Finnish");
-       test_assert(fts_filter_create(fts_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0);
+       test_begin("lang filter stopwords, Finnish");
+       test_assert(lang_filter_create(lang_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0);
 
        ip = input;
        op = output;
        while (*ip != NULL) {
                token = *ip;
-               ret = fts_filter_filter(filter, &token, &error);
+               ret = lang_filter(filter, &token, &error);
                if (ret <= 0) {
                        test_assert(ret == 0);
                        test_assert(*op == NULL);
@@ -249,15 +249,15 @@ static void test_fts_filter_stopwords_fin(void)
                ip++;
        }
 
-       fts_filter_unref(&filter);
+       lang_filter_unref(&filter);
        test_assert(filter == NULL);
 
-       test_assert(fts_filter_create(fts_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0);
        ip = input2;
        op = output2;
        while (*ip != NULL) {
                token = *ip;
-               ret = fts_filter_filter(filter, &token, &error);
+               ret = lang_filter(filter, &token, &error);
                if (ret <= 0) {
                        test_assert(ret == 0);
                        test_assert(*op == NULL);
@@ -269,14 +269,14 @@ static void test_fts_filter_stopwords_fin(void)
                ip++;
        }
 
-       fts_filter_unref(&filter);
+       lang_filter_unref(&filter);
        test_assert(filter == NULL);
        test_end();
 }
 
-static void test_fts_filter_stopwords_fra(void)
+static void test_lang_filter_stopwords_fra(void)
 {
-       struct fts_filter *filter;
+       struct lang_filter *filter;
        const char *error;
        int ret;
 
@@ -290,14 +290,14 @@ static void test_fts_filter_stopwords_fra(void)
        const char **ip, **op;
        const char *token;
 
-       test_begin("fts filter stopwords, French");
-       test_assert(fts_filter_create(fts_filter_stopwords, NULL, &french_language, stopword_settings, &filter, &error) == 0);
+       test_begin("lang filter stopwords, French");
+       test_assert(lang_filter_create(lang_filter_stopwords, NULL, &french_language, stopword_settings, &filter, &error) == 0);
 
        ip = input;
        op = output;
        while (*ip != NULL) {
                token = *ip;
-               ret = fts_filter_filter(filter, &token, &error);
+               ret = lang_filter(filter, &token, &error);
                if (ret <= 0) {
                        test_assert(ret == 0);
                        test_assert(*op == NULL);
@@ -309,14 +309,14 @@ static void test_fts_filter_stopwords_fra(void)
                ip++;
        }
 
-       fts_filter_unref(&filter);
+       lang_filter_unref(&filter);
        test_assert(filter == NULL);
        test_end();
 }
 
-static void test_fts_filter_stopwords_no(void)
+static void test_lang_filter_stopwords_no(void)
 {
-       struct fts_filter *filter;
+       struct lang_filter *filter;
        const char *error;
        int ret;
 
@@ -338,14 +338,14 @@ static void test_fts_filter_stopwords_no(void)
        const char **ip, **op;
        const char *token;
 
-       test_begin("fts filter stopwords, Norwegian");
-       test_assert(fts_filter_create(fts_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0);
+       test_begin("lang filter stopwords, Norwegian");
+       test_assert(lang_filter_create(lang_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0);
 
        ip = input;
        op = output;
        while (*ip != NULL) {
                token = *ip;
-               ret = fts_filter_filter(filter, &token, &error);
+               ret = lang_filter(filter, &token, &error);
                if (ret <= 0) {
                        test_assert(ret == 0);
                        test_assert(*op == NULL);
@@ -357,46 +357,46 @@ static void test_fts_filter_stopwords_no(void)
                ip++;
        }
 
-       fts_filter_unref(&filter);
+       lang_filter_unref(&filter);
        test_assert(filter == NULL);
        test_end();
 }
 
-static void test_fts_filter_stopwords_fail_lazy_init(void)
+static void test_lang_filter_stopwords_fail_lazy_init(void)
 {
-       const struct fts_language unknown = { .name = "bebobidoop" };
-       struct fts_filter *filter = NULL;
+       const struct language unknown = { .name = "bebobidoop" };
+       struct lang_filter *filter = NULL;
        const char *error = NULL, *token = "foobar";
 
-       test_begin("fts filter stopwords, fail filter() (lazy init)");
-       test_assert(fts_filter_create(fts_filter_stopwords, NULL, &unknown, stopword_settings, &filter, &error) == 0);
+       test_begin("lang filter stopwords, fail filter() (lazy init)");
+       test_assert(lang_filter_create(lang_filter_stopwords, NULL, &unknown, stopword_settings, &filter, &error) == 0);
        test_assert(filter != NULL && error == NULL);
-       test_assert(fts_filter_filter(filter, &token, &error) < 0 && error != NULL);
-       fts_filter_unref(&filter);
+       test_assert(lang_filter(filter, &token, &error) < 0 && error != NULL);
+       lang_filter_unref(&filter);
        test_end();
 
 }
 
-static void test_fts_filter_stopwords_malformed(void)
+static void test_lang_filter_stopwords_malformed(void)
 {
-       const struct fts_language malformed = { .name = "malformed" };
-       struct fts_filter *filter = NULL;
+       const struct language malformed = { .name = "malformed" };
+       struct lang_filter *filter = NULL;
        const char *error = NULL, *token = "foobar";
 
-       test_begin("fts filter stopwords, malformed list");
-       test_assert(fts_filter_create(fts_filter_stopwords, NULL, &malformed, stopword_settings, &filter, &error) == 0);
-       test_assert(fts_filter_filter(filter, &token, &error) < 0);
+       test_begin("lang filter stopwords, malformed list");
+       test_assert(lang_filter_create(lang_filter_stopwords, NULL, &malformed, stopword_settings, &filter, &error) == 0);
+       test_assert(lang_filter(filter, &token, &error) < 0);
        test_assert(strstr(error, "seems empty. Is the file correctly formatted?") != NULL);
        test_expect_no_more_errors();
-       fts_filter_unref(&filter);
+       lang_filter_unref(&filter);
        test_end();
 
 }
 
 #ifdef HAVE_FTS_STEMMER
-static void test_fts_filter_stemmer_snowball_stem_english(void)
+static void test_lang_filter_stemmer_snowball_stem_english(void)
 {
-       struct fts_filter *stemmer;
+       struct lang_filter *stemmer;
        const char *error;
        const char *token = NULL;
        const char * const tokens[] = {
@@ -414,24 +414,24 @@ static void test_fts_filter_stemmer_snowball_stem_english(void)
        const char * const *tpp;
        const char * const *bpp;
 
-       test_begin("fts filter stem English");
-       test_assert(fts_filter_create(fts_filter_stemmer_snowball, NULL, &english_language, NULL, &stemmer, &error) == 0);
+       test_begin("lang filter stem English");
+       test_assert(lang_filter_create(lang_filter_stemmer_snowball, NULL, &english_language, NULL, &stemmer, &error) == 0);
        bpp = bases;
        for (tpp=tokens; *tpp != NULL; tpp++) {
                token = *tpp;
-               test_assert(fts_filter_filter(stemmer, &token, &error) > 0);
+               test_assert(lang_filter(stemmer, &token, &error) > 0);
                test_assert(token != NULL);
                test_assert(null_strcmp(token, *bpp) == 0);
                bpp++;
        }
-       fts_filter_unref(&stemmer);
+       lang_filter_unref(&stemmer);
        test_assert(stemmer == NULL);
        test_end();
 }
 
-static void test_fts_filter_stemmer_snowball_stem_french(void)
+static void test_lang_filter_stemmer_snowball_stem_french(void)
 {
-       struct fts_filter *stemmer;
+       struct lang_filter *stemmer;
        const char *error;
        const char *token = NULL;
        const char * const tokens[] = {
@@ -444,26 +444,26 @@ static void test_fts_filter_stemmer_snowball_stem_french(void)
        const char * const *tpp;
        const char * const *bpp;
 
-       test_begin("fts filter stem French");
-       test_assert(fts_filter_create(fts_filter_stemmer_snowball, NULL, &french_language, NULL, &stemmer, &error) == 0);
+       test_begin("lang filter stem French");
+       test_assert(lang_filter_create(lang_filter_stemmer_snowball, NULL, &french_language, NULL, &stemmer, &error) == 0);
        bpp = bases;
        for (tpp=tokens; *tpp != NULL; tpp++) {
                token = *tpp;
-               test_assert(fts_filter_filter(stemmer, &token, &error) > 0);
+               test_assert(lang_filter(stemmer, &token, &error) > 0);
                test_assert(token != NULL);
                test_assert(null_strcmp(token, *bpp) == 0);
                bpp++;
        }
-       fts_filter_unref(&stemmer);
+       lang_filter_unref(&stemmer);
        test_assert(stemmer == NULL);
        test_end();
 }
 
-static void test_fts_filter_stopwords_stemmer_eng(void)
+static void test_lang_filter_stopwords_stemmer_eng(void)
 {
        int ret;
-       struct fts_filter *stemmer;
-       struct fts_filter *filter;
+       struct lang_filter *stemmer;
+       struct lang_filter *filter;
        const char *error;
        const char *token = NULL;
        const char * const tokens[] = {
@@ -481,15 +481,15 @@ static void test_fts_filter_stopwords_stemmer_eng(void)
        const char * const *tpp;
        const char * const *bpp;
 
-       test_begin("fts filters stopwords and stemming chained, English");
+       test_begin("lang filters stopwords and stemming chained, English");
 
-       test_assert(fts_filter_create(fts_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0);
-       test_assert(fts_filter_create(fts_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0);
 
        bpp = bases;
        for (tpp=tokens; *tpp != NULL; tpp++) {
                token = *tpp;
-               ret = fts_filter_filter(stemmer, &token, &error);
+               ret = lang_filter(stemmer, &token, &error);
                test_assert(ret >= 0);
                if (ret == 0)
                        test_assert(*bpp == NULL);
@@ -499,8 +499,8 @@ static void test_fts_filter_stopwords_stemmer_eng(void)
                }
                bpp++;
        }
-       fts_filter_unref(&stemmer);
-       fts_filter_unref(&filter);
+       lang_filter_unref(&stemmer);
+       lang_filter_unref(&filter);
        test_assert(stemmer == NULL);
        test_assert(filter == NULL);
        test_end();
@@ -508,9 +508,9 @@ static void test_fts_filter_stopwords_stemmer_eng(void)
 #endif
 
 #ifdef HAVE_LIBICU
-static void test_fts_filter_normalizer_swedish_short(void)
+static void test_lang_filter_normalizer_swedish_short(void)
 {
-       struct fts_filter *norm = NULL;
+       struct lang_filter *norm = NULL;
        const char *input[] = {
                "Vem",
                "\xC3\x85",
@@ -530,22 +530,22 @@ static void test_fts_filter_normalizer_swedish_short(void)
        const char *token = NULL;
        unsigned int i;
 
-       test_begin("fts filter normalizer Swedish short text");
+       test_begin("lang filter normalizer Swedish short text");
 
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
        for (i = 0; i < N_ELEMENTS(input); i++) {
                token = input[i];
-               test_assert_idx(fts_filter_filter(norm, &token, &error) == 1, i);
+               test_assert_idx(lang_filter(norm, &token, &error) == 1, i);
                test_assert_idx(null_strcmp(token, expected_output[i]) == 0, i);
        }
-       fts_filter_unref(&norm);
+       lang_filter_unref(&norm);
        test_assert(norm == NULL);
        test_end();
 }
 
-static void test_fts_filter_normalizer_swedish_short_default_id(void)
+static void test_lang_filter_normalizer_swedish_short_default_id(void)
 {
-       struct fts_filter *norm = NULL;
+       struct lang_filter *norm = NULL;
        const char *input[] = {
                "Vem",
                "\xC3\x85",
@@ -563,24 +563,24 @@ static void test_fts_filter_normalizer_swedish_short_default_id(void)
        const char *token = NULL;
        unsigned int i;
 
-       test_begin("fts filter normalizer Swedish short text using default ID");
+       test_begin("lang filter normalizer Swedish short text using default ID");
 
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, NULL, &norm, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, NULL, &norm, &error) == 0);
        for (i = 0; i < N_ELEMENTS(input); i++) {
                token = input[i];
-               test_assert_idx(fts_filter_filter(norm, &token, &error) == 1, i);
+               test_assert_idx(lang_filter(norm, &token, &error) == 1, i);
                test_assert_idx(null_strcmp(token, expected_output[i]) == 0, i);
        }
-       fts_filter_unref(&norm);
+       lang_filter_unref(&norm);
        test_assert(norm == NULL);
        test_end();
 }
 
 /* UDHRDIR comes from Automake AM_CPPFLAGS */
 #define UDHR_FRA_NAME "/udhr_fra.txt"
-static void test_fts_filter_normalizer_french(void)
+static void test_lang_filter_normalizer_french(void)
 {
-       struct fts_filter *norm = NULL;
+       struct lang_filter *norm = NULL;
        FILE *input;
        const char * const settings[] =
                {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove", NULL};
@@ -600,16 +600,16 @@ static void test_fts_filter_normalizer_french(void)
                0x8c, 0xd6, 0x7a, 0xb7, 0xc5, 0xc6, 0x85, 0x00};
        const char *udhr_path;
 
-       test_begin("fts filter normalizer French UDHR");
+       test_begin("lang filter normalizer French UDHR");
 
        udhr_path = t_strconcat(UDHRDIR, UDHR_FRA_NAME, NULL);
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
        input = fopen(udhr_path, "r");
        test_assert(input != NULL);
        sha512_init(&ctx);
        while (NULL != fgets(buf, sizeof(buf), input)) {
                tokens = buf;
-               if (fts_filter_filter(norm, &tokens, &error) != 1){
+               if (lang_filter(norm, &tokens, &error) != 1){
                        break;
                }
                sha512_loop(&ctx, tokens, strlen(tokens));
@@ -618,12 +618,12 @@ static void test_fts_filter_normalizer_french(void)
        sha512_result(&ctx, sha512_digest);
        test_assert(memcmp(sha512_digest, correct_digest,
                           sizeof(sha512_digest)) == 0);
-       fts_filter_unref(&norm);
+       lang_filter_unref(&norm);
        test_assert(norm == NULL);
        test_end();
 }
 
-static void test_fts_filter_normalizer_empty(void)
+static void test_lang_filter_normalizer_empty(void)
 {
        /* test just a couple of these */
        static const char *empty_tokens[] = {
@@ -634,32 +634,32 @@ static void test_fts_filter_normalizer_empty(void)
        };
        const char * const settings[] =
                {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; [\\x20] Remove", NULL};
-       struct fts_filter *norm;
+       struct lang_filter *norm;
        const char *error;
        unsigned int i;
 
-       test_begin("fts filter normalizer empty tokens");
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
+       test_begin("lang filter normalizer empty tokens");
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
        for (i = 0; i < N_ELEMENTS(empty_tokens); i++) {
                const char *token = empty_tokens[i];
-               test_assert_idx(fts_filter_filter(norm, &token, &error) == 0, i);
+               test_assert_idx(lang_filter(norm, &token, &error) == 0, i);
        }
-       fts_filter_unref(&norm);
+       lang_filter_unref(&norm);
        test_end();
 }
 
-static void test_fts_filter_normalizer_baddata(void)
+static void test_lang_filter_normalizer_baddata(void)
 {
        const char * const settings[] =
                {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove", NULL};
-       struct fts_filter *norm;
+       struct lang_filter *norm;
        const char *token, *error;
        string_t *str;
        unichar_t i;
 
-       test_begin("fts filter normalizer bad data");
+       test_begin("lang filter normalizer bad data");
 
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
        str = t_str_new(128);
        for (i = 1; i < 0x1ffff; i++) {
                if (!uni_is_valid_ucs4(i)) continue;
@@ -667,38 +667,38 @@ static void test_fts_filter_normalizer_baddata(void)
                uni_ucs4_to_utf8_c(i, str);
                token = str_c(str);
                T_BEGIN {
-                       test_assert_idx(fts_filter_filter(norm, &token, &error) >= 0, i);
+                       test_assert_idx(lang_filter(norm, &token, &error) >= 0, i);
                } T_END;
        }
 
        str_truncate(str, 0);
        uni_ucs4_to_utf8_c(UNICHAR_T_MAX, str);
        token = str_c(str);
-       test_assert(fts_filter_filter(norm, &token, &error) >= 0);
+       test_assert(lang_filter(norm, &token, &error) >= 0);
 
-       fts_filter_unref(&norm);
+       lang_filter_unref(&norm);
        test_end();
 }
 
-static void test_fts_filter_normalizer_invalid_id(void)
+static void test_lang_filter_normalizer_invalid_id(void)
 {
-       struct fts_filter *norm = NULL;
+       struct lang_filter *norm = NULL;
        const char *settings[] =
                {"id", "Any-One-Out-There; DKFN; [: Nonspacing Mark :] Remove",
                 NULL};
        const char *error = NULL, *token = "foo";
 
-       test_begin("fts filter normalizer invalid id");
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
+       test_begin("lang filter normalizer invalid id");
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
        test_assert(error == NULL);
-       test_assert(fts_filter_filter(norm, &token, &error) < 0 && error != NULL);
-       fts_filter_unref(&norm);
+       test_assert(lang_filter(norm, &token, &error) < 0 && error != NULL);
+       lang_filter_unref(&norm);
        test_end();
 }
 
-static void test_fts_filter_normalizer_oversized(void)
+static void test_lang_filter_normalizer_oversized(void)
 {
-       struct fts_filter *norm = NULL;
+       struct lang_filter *norm = NULL;
        const char *settings[] =
                {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove", "maxlen", "250",
                 NULL};
@@ -720,41 +720,41 @@ static void test_fts_filter_normalizer_oversized(void)
                                                "\xe6\xae\xb4\xe9\x8a\x85\xc4\xb9\xe4\x90\xb2\xe9\x96\xad\xef\x90"
                                                "\x9c\xe5\xa6\xae\xe9\x93\x91\xe8\x87\xa1";
 
-       test_begin("fts filter normalizer over-sized token");
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
+       test_begin("lang filter normalizer over-sized token");
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
        test_assert(error == NULL);
-       test_assert(fts_filter_filter(norm, &token, &error) >= 0);
+       test_assert(lang_filter(norm, &token, &error) >= 0);
        test_assert(strlen(token) <= 250);
-       fts_filter_unref(&norm);
+       lang_filter_unref(&norm);
        test_end();
 }
 
-static void test_fts_filter_normalizer_truncation(void)
+static void test_lang_filter_normalizer_truncation(void)
 {
-       struct fts_filter *norm = NULL;
+       struct lang_filter *norm = NULL;
        const char *settings[] =
                {"id", "Any-Lower;", "maxlen", "10",
                 NULL};
        const char *error = NULL;
        const char *token = "abcdefghi\xC3\x85";
 
-       test_begin("fts filter normalizer token truncated mid letter");
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL,
+       test_begin("lang filter normalizer token truncated mid letter");
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL,
                                      settings, &norm, &error) == 0);
        test_assert(error == NULL);
-       test_assert(fts_filter_filter(norm, &token, &error) >= 0);
+       test_assert(lang_filter(norm, &token, &error) >= 0);
        test_assert(strcmp(token, "abcdefghi") == 0);
-       fts_filter_unref(&norm);
+       lang_filter_unref(&norm);
        test_end();
 }
 
 #ifdef HAVE_FTS_STEMMER
-static void test_fts_filter_normalizer_stopwords_stemmer_eng(void)
+static void test_lang_filter_normalizer_stopwords_stemmer_eng(void)
 {
        int ret;
-       struct fts_filter *normalizer;
-       struct fts_filter *stemmer;
-       struct fts_filter *filter;
+       struct lang_filter *normalizer;
+       struct lang_filter *stemmer;
+       struct lang_filter *filter;
        const char *error;
        const char * const id_settings[] =
                //{"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC", NULL};
@@ -776,16 +776,16 @@ static void test_fts_filter_normalizer_stopwords_stemmer_eng(void)
        const char * const *tpp;
        const char * const *bpp;
 
-       test_begin("fts filters normalizer, stopwords and stemming chained, English");
+       test_begin("lang filters normalizer, stopwords and stemming chained, English");
 
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, id_settings, &normalizer, &error) == 0);
-       test_assert(fts_filter_create(fts_filter_stopwords, normalizer, &english_language, stopword_settings, &filter, &error) == 0);
-       test_assert(fts_filter_create(fts_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, id_settings, &normalizer, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_stopwords, normalizer, &english_language, stopword_settings, &filter, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0);
 
        bpp = bases;
        for (tpp = tokens; *tpp != NULL; tpp++) {
                token = *tpp;
-               ret = fts_filter_filter(stemmer, &token, &error);
+               ret = lang_filter(stemmer, &token, &error);
                if (ret <= 0) {
                        test_assert(ret == 0);
                        test_assert(*bpp == NULL);
@@ -795,21 +795,21 @@ static void test_fts_filter_normalizer_stopwords_stemmer_eng(void)
                }
                bpp++;
        }
-       fts_filter_unref(&stemmer);
-       fts_filter_unref(&filter);
-       fts_filter_unref(&normalizer);
+       lang_filter_unref(&stemmer);
+       lang_filter_unref(&filter);
+       lang_filter_unref(&normalizer);
        test_assert(stemmer == NULL);
        test_assert(filter == NULL);
        test_assert(normalizer == NULL);
        test_end();
 }
 
-static void test_fts_filter_stopwords_normalizer_stemmer_no(void)
+static void test_lang_filter_stopwords_normalizer_stemmer_no(void)
 {
        int ret;
-       struct fts_filter *normalizer;
-       struct fts_filter *stemmer;
-       struct fts_filter *filter;
+       struct lang_filter *normalizer;
+       struct lang_filter *stemmer;
+       struct lang_filter *filter;
        const char *error;
        const char *token = NULL;
        const char * const tokens[] = {
@@ -838,16 +838,16 @@ static void test_fts_filter_stopwords_normalizer_stemmer_no(void)
        const char * const *tpp;
        const char * const *bpp;
 
-       test_begin("fts filters with stopwords, default normalizer and stemming chained, Norwegian");
+       test_begin("lang filters with stopwords, default normalizer and stemming chained, Norwegian");
 
-       test_assert(fts_filter_create(fts_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0);
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0);
-       test_assert(fts_filter_create(fts_filter_stemmer_snowball, normalizer, &norwegian_language, NULL, &stemmer, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_stemmer_snowball, normalizer, &norwegian_language, NULL, &stemmer, &error) == 0);
 
        bpp = bases;
        for (tpp = tokens; *tpp != NULL; tpp++) {
                token = *tpp;
-               ret = fts_filter_filter(stemmer, &token, &error);
+               ret = lang_filter(stemmer, &token, &error);
                if (ret <= 0) {
                        test_assert(ret == 0);
                        test_assert(*bpp == NULL);
@@ -857,21 +857,21 @@ static void test_fts_filter_stopwords_normalizer_stemmer_no(void)
                }
                bpp++;
        }
-       fts_filter_unref(&stemmer);
-       fts_filter_unref(&normalizer);
-       fts_filter_unref(&filter);
+       lang_filter_unref(&stemmer);
+       lang_filter_unref(&normalizer);
+       lang_filter_unref(&filter);
        test_assert(stemmer == NULL);
        test_assert(filter == NULL);
        test_assert(normalizer == NULL);
        test_end();
 }
 
-static void test_fts_filter_stopwords_normalizer_stemmer_sv(void)
+static void test_lang_filter_stopwords_normalizer_stemmer_sv(void)
 {
        int ret;
-       struct fts_filter *normalizer;
-       struct fts_filter *stemmer;
-       struct fts_filter *filter;
+       struct lang_filter *normalizer;
+       struct lang_filter *stemmer;
+       struct lang_filter *filter;
        const char *error;
        const char *token = NULL;
        const char * const tokens[] = {
@@ -888,17 +888,17 @@ static void test_fts_filter_stopwords_normalizer_stemmer_sv(void)
        const char * const *tpp;
        const char * const *bpp;
 
-       test_begin("fts filters with stopwords, default normalizer and stemming chained, Swedish");
+       test_begin("lang filters with stopwords, default normalizer and stemming chained, Swedish");
 
 
-       test_assert(fts_filter_create(fts_filter_stopwords, NULL, &swedish_language, stopword_settings, &filter, &error) == 0);
-       test_assert(fts_filter_create(fts_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0);
-       test_assert(fts_filter_create(fts_filter_stemmer_snowball, normalizer, &swedish_language, NULL, &stemmer, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_stopwords, NULL, &swedish_language, stopword_settings, &filter, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_stemmer_snowball, normalizer, &swedish_language, NULL, &stemmer, &error) == 0);
 
        bpp = bases;
        for (tpp = tokens; *tpp != NULL; tpp++) {
                token = *tpp;
-               ret = fts_filter_filter(stemmer, &token, &error);
+               ret = lang_filter(stemmer, &token, &error);
                if (ret <= 0) {
                        test_assert(ret == 0);
                        test_assert(*bpp == NULL);
@@ -908,9 +908,9 @@ static void test_fts_filter_stopwords_normalizer_stemmer_sv(void)
                }
                bpp++;
        }
-       fts_filter_unref(&stemmer);
-       fts_filter_unref(&normalizer);
-       fts_filter_unref(&filter);
+       lang_filter_unref(&stemmer);
+       lang_filter_unref(&normalizer);
+       lang_filter_unref(&filter);
        test_assert(stemmer == NULL);
        test_assert(filter == NULL);
        test_assert(normalizer == NULL);
@@ -919,9 +919,9 @@ static void test_fts_filter_stopwords_normalizer_stemmer_sv(void)
 #endif
 #endif
 
-static void test_fts_filter_english_possessive(void)
+static void test_lang_filter_english_possessive(void)
 {
-       struct fts_filter *norm = NULL;
+       struct lang_filter *norm = NULL;
        const char *input[] = {
                "foo'",
 
@@ -960,15 +960,15 @@ static void test_fts_filter_english_possessive(void)
        const char *token = NULL;
        unsigned int i;
 
-       test_begin("fts filter english possessive");
+       test_begin("lang filter english possessive");
 
-       test_assert(fts_filter_create(fts_filter_english_possessive, NULL, NULL, NULL, &norm, &error) == 0);
+       test_assert(lang_filter_create(lang_filter_english_possessive, NULL, NULL, NULL, &norm, &error) == 0);
        for (i = 0; i < N_ELEMENTS(input); i++) {
                token = input[i];
-               test_assert_idx(fts_filter_filter(norm, &token, &error) == 1, i);
+               test_assert_idx(lang_filter(norm, &token, &error) == 1, i);
                test_assert_idx(null_strcmp(token, expected_output[i]) == 0, i);
        }
-       fts_filter_unref(&norm);
+       lang_filter_unref(&norm);
        test_assert(norm == NULL);
        test_end();
 }
@@ -979,47 +979,47 @@ static void test_fts_filter_english_possessive(void)
 int main(void)
 {
        static void (*const test_functions[])(void) = {
-               test_fts_filter_find,
-               test_fts_filter_contractions_fail,
-               test_fts_filter_contractions_fr,
-               test_fts_filter_lowercase,
+               test_lang_filter_find,
+               test_lang_filter_contractions_fail,
+               test_lang_filter_contractions_fr,
+               test_lang_filter_lowercase,
 #ifdef HAVE_LIBICU
-               test_fts_filter_lowercase_utf8,
-               test_fts_filter_lowercase_too_long_utf8,
+               test_lang_filter_lowercase_utf8,
+               test_lang_filter_lowercase_too_long_utf8,
 #endif
-               test_fts_filter_stopwords_eng,
-               test_fts_filter_stopwords_fin,
-               test_fts_filter_stopwords_fra,
-               test_fts_filter_stopwords_no,
-               test_fts_filter_stopwords_fail_lazy_init,
-               test_fts_filter_stopwords_malformed,
+               test_lang_filter_stopwords_eng,
+               test_lang_filter_stopwords_fin,
+               test_lang_filter_stopwords_fra,
+               test_lang_filter_stopwords_no,
+               test_lang_filter_stopwords_fail_lazy_init,
+               test_lang_filter_stopwords_malformed,
 #ifdef HAVE_FTS_STEMMER
-               test_fts_filter_stemmer_snowball_stem_english,
-               test_fts_filter_stemmer_snowball_stem_french,
-               test_fts_filter_stopwords_stemmer_eng,
+               test_lang_filter_stemmer_snowball_stem_english,
+               test_lang_filter_stemmer_snowball_stem_french,
+               test_lang_filter_stopwords_stemmer_eng,
 #endif
 #ifdef HAVE_LIBICU
-               test_fts_filter_normalizer_swedish_short,
-               test_fts_filter_normalizer_swedish_short_default_id,
-               test_fts_filter_normalizer_french,
-               test_fts_filter_normalizer_empty,
-               test_fts_filter_normalizer_baddata,
-               test_fts_filter_normalizer_invalid_id,
-               test_fts_filter_normalizer_oversized,
-               test_fts_filter_normalizer_truncation,
+               test_lang_filter_normalizer_swedish_short,
+               test_lang_filter_normalizer_swedish_short_default_id,
+               test_lang_filter_normalizer_french,
+               test_lang_filter_normalizer_empty,
+               test_lang_filter_normalizer_baddata,
+               test_lang_filter_normalizer_invalid_id,
+               test_lang_filter_normalizer_oversized,
+               test_lang_filter_normalizer_truncation,
 #ifdef HAVE_FTS_STEMMER
-               test_fts_filter_normalizer_stopwords_stemmer_eng,
-               test_fts_filter_stopwords_normalizer_stemmer_no,
-               test_fts_filter_stopwords_normalizer_stemmer_sv,
+               test_lang_filter_normalizer_stopwords_stemmer_eng,
+               test_lang_filter_stopwords_normalizer_stemmer_no,
+               test_lang_filter_stopwords_normalizer_stemmer_sv,
 #endif
 #endif
-               test_fts_filter_english_possessive,
+               test_lang_filter_english_possessive,
                NULL
        };
        int ret;
 
-       fts_filters_init();
+       lang_filters_init();
        ret = test_run(test_functions);
-       fts_filters_deinit();
+       lang_filters_deinit();
        return ret;
 }
index 79b95dfafb9e6aa4304611fb57268d42704d9abc..994dc8981df89bc5929675487e5a12a0dc70e0de 100644 (file)
@@ -9,68 +9,68 @@
 
 #include <unicode/uclean.h>
 
-static void test_fts_icu_utf8_to_utf16_ascii_resize(void)
+static void test_lang_icu_utf8_to_utf16_ascii_resize(void)
 {
        ARRAY_TYPE(icu_utf16) dest;
 
-       test_begin("fts_icu_utf8_to_utf16 ascii resize");
+       test_begin("lang_icu_utf8_to_utf16 ascii resize");
        t_array_init(&dest, 2);
        test_assert(buffer_get_writable_size(dest.arr.buffer) == 4);
-       fts_icu_utf8_to_utf16(&dest, "12");
+       lang_icu_utf8_to_utf16(&dest, "12");
        test_assert(array_count(&dest) == 2);
        test_assert(buffer_get_writable_size(dest.arr.buffer) == 4);
 
-       fts_icu_utf8_to_utf16(&dest, "123");
+       lang_icu_utf8_to_utf16(&dest, "123");
        test_assert(array_count(&dest) == 3);
        test_assert(buffer_get_writable_size(dest.arr.buffer) == 7);
 
-       fts_icu_utf8_to_utf16(&dest, "12345");
+       lang_icu_utf8_to_utf16(&dest, "12345");
        test_assert(array_count(&dest) == 5);
 
        test_end();
 }
 
-static void test_fts_icu_utf8_to_utf16_32bit_resize(void)
+static void test_lang_icu_utf8_to_utf16_32bit_resize(void)
 {
        ARRAY_TYPE(icu_utf16) dest;
        unsigned int i;
 
-       test_begin("fts_icu_utf8_to_utf16 32bit resize");
+       test_begin("lang_icu_utf8_to_utf16 32bit resize");
        for (i = 1; i <= 2; i++) {
                t_array_init(&dest, i);
                test_assert(buffer_get_writable_size(dest.arr.buffer) == i*2);
-               fts_icu_utf8_to_utf16(&dest, "\xF0\x90\x90\x80"); /* 0x10400 */
+               lang_icu_utf8_to_utf16(&dest, "\xF0\x90\x90\x80"); /* 0x10400 */
                test_assert(array_count(&dest) == 2);
        }
 
        test_end();
 }
 
-static void test_fts_icu_utf16_to_utf8(void)
+static void test_lang_icu_utf16_to_utf8(void)
 {
        string_t *dest = t_str_new(64);
        const UChar src[] = { 0xbd, 'b', 'c' };
        unsigned int i;
 
-       test_begin("fts_icu_utf16_to_utf8");
+       test_begin("lang_icu_utf16_to_utf8");
        for (i = N_ELEMENTS(src); i > 0; i--) {
-               fts_icu_utf16_to_utf8(dest, src, i);
+               lang_icu_utf16_to_utf8(dest, src, i);
                test_assert(dest->used == i+1);
        }
        test_end();
 }
 
-static void test_fts_icu_utf16_to_utf8_resize(void)
+static void test_lang_icu_utf16_to_utf8_resize(void)
 {
        string_t *dest;
        const UChar src = UNICODE_REPLACEMENT_CHAR;
        unsigned int i;
 
-       test_begin("fts_icu_utf16_to_utf8 resize");
+       test_begin("lang_icu_utf16_to_utf8 resize");
        for (i = 2; i <= 6; i++) {
                dest = t_str_new(i);
                test_assert(buffer_get_writable_size(dest) == i);
-               fts_icu_utf16_to_utf8(dest, &src, 1);
+               lang_icu_utf16_to_utf8(dest, &src, 1);
                test_assert(dest->used == 3);
                test_assert(strcmp(str_c(dest), UNICODE_REPLACEMENT_CHAR_UTF8) == 0);
        }
@@ -86,7 +86,7 @@ static UTransliterator *get_translit(const char *id)
        UParseError perr;
 
        t_array_init(&id_utf16, 8);
-       fts_icu_utf8_to_utf16(&id_utf16, id);
+       lang_icu_utf8_to_utf16(&id_utf16, id);
        translit = utrans_openU(array_front(&id_utf16),
                                array_count(&id_utf16),
                                UTRANS_FORWARD, NULL, 0, &perr, &err);
@@ -94,7 +94,7 @@ static UTransliterator *get_translit(const char *id)
        return translit;
 }
 
-static void test_fts_icu_translate(void)
+static void test_lang_icu_translate(void)
 {
        const char *translit_id = "Any-Lower";
        UTransliterator *translit;
@@ -103,12 +103,12 @@ static void test_fts_icu_translate(void)
        const char *error;
        unsigned int i;
 
-       test_begin("fts_icu_translate");
+       test_begin("lang_icu_translate");
        t_array_init(&dest, 32);
        translit = get_translit(translit_id);
        for (i = N_ELEMENTS(src); i > 0; i--) {
                array_clear(&dest);
-               test_assert(fts_icu_translate(&dest, src, i,
+               test_assert(lang_icu_translate(&dest, src, i,
                                              translit, &error) == 0);
                test_assert(array_count(&dest) == i);
        }
@@ -116,7 +116,7 @@ static void test_fts_icu_translate(void)
        test_end();
 }
 
-static void test_fts_icu_translate_resize(void)
+static void test_lang_icu_translate_resize(void)
 {
        const char *translit_id = "Any-Hex";
        const char *src_utf8 = "FOO";
@@ -125,16 +125,16 @@ static void test_fts_icu_translate_resize(void)
        const char *error;
        unsigned int i;
 
-       test_begin("fts_icu_translate_resize resize");
+       test_begin("lang_icu_translate_resize resize");
 
        t_array_init(&src_utf16, 8);
        translit = get_translit(translit_id);
        for (i = 1; i <= 10; i++) {
                array_clear(&src_utf16);
-               fts_icu_utf8_to_utf16(&src_utf16, src_utf8);
+               lang_icu_utf8_to_utf16(&src_utf16, src_utf8);
                t_array_init(&dest, i);
                test_assert(buffer_get_writable_size(dest.arr.buffer) == i*2);
-               test_assert(fts_icu_translate(&dest, array_front(&src_utf16),
+               test_assert(lang_icu_translate(&dest, array_front(&src_utf16),
                                              array_count(&src_utf16),
                                              translit, &error) == 0);
        }
@@ -143,28 +143,28 @@ static void test_fts_icu_translate_resize(void)
        test_end();
 }
 
-static void test_fts_icu_lcase(void)
+static void test_lang_icu_lcase(void)
 {
        const char *src = "aBcD\xC3\x84\xC3\xA4";
        string_t *dest = t_str_new(64);
 
-       test_begin("fts_icu_lcase");
-       fts_icu_lcase(dest, src);
+       test_begin("lang_icu_lcase");
+       lang_icu_lcase(dest, src);
        test_assert(strcmp(str_c(dest), "abcd\xC3\xA4\xC3\xA4") == 0);
        test_end();
 }
 
-static void test_fts_icu_lcase_resize(void)
+static void test_lang_icu_lcase_resize(void)
 {
        const char *src = "a\xC3\x84";
        string_t *dest;
        unsigned int i;
 
-       test_begin("fts_icu_lcase resize");
+       test_begin("lang_icu_lcase resize");
        for (i = 1; i <= 3; i++) {
                dest = t_str_new(i);
                test_assert(buffer_get_writable_size(dest) == i);
-               fts_icu_lcase(dest, src);
+               lang_icu_lcase(dest, src);
                test_assert(strcmp(str_c(dest), "a\xC3\xA4") == 0);
                test_assert(buffer_get_writable_size(dest) == 3);
        }
@@ -172,31 +172,31 @@ static void test_fts_icu_lcase_resize(void)
        test_end();
 }
 
-static void test_fts_icu_lcase_resize_invalid_utf8(void)
+static void test_lang_icu_lcase_resize_invalid_utf8(void)
 {
        string_t *dest;
 
-       test_begin("fts_icu_lcase resize invalid utf8");
+       test_begin("lang_icu_lcase resize invalid utf8");
        dest = t_str_new(1);
-       fts_icu_lcase(dest, ".\x80.");
+       lang_icu_lcase(dest, ".\x80.");
        test_end();
 }
 
 int main(void)
 {
        static void (*const test_functions[])(void) = {
-               test_fts_icu_utf8_to_utf16_ascii_resize,
-               test_fts_icu_utf8_to_utf16_32bit_resize,
-               test_fts_icu_utf16_to_utf8,
-               test_fts_icu_utf16_to_utf8_resize,
-               test_fts_icu_translate,
-               test_fts_icu_translate_resize,
-               test_fts_icu_lcase,
-               test_fts_icu_lcase_resize,
-               test_fts_icu_lcase_resize_invalid_utf8,
+               test_lang_icu_utf8_to_utf16_ascii_resize,
+               test_lang_icu_utf8_to_utf16_32bit_resize,
+               test_lang_icu_utf16_to_utf8,
+               test_lang_icu_utf16_to_utf8_resize,
+               test_lang_icu_translate,
+               test_lang_icu_translate_resize,
+               test_lang_icu_lcase,
+               test_lang_icu_lcase_resize,
+               test_lang_icu_lcase_resize_invalid_utf8,
                NULL
        };
        int ret = test_run(test_functions);
-       fts_icu_deinit();
+       lang_icu_deinit();
        return ret;
 }
index b69633486be327942429761c793ff2ee181a1746..678d477350f174e4fc02c48c39f11e2c329fbc8c 100644 (file)
@@ -66,16 +66,16 @@ static const char *test_inputs[] = {
        "l" SQ "homme l" SQ "humanit\xC3\xA9 d" SQ "immixtions qu" SQ "il aujourd'hui que'euq"
 };
 
-static void test_fts_tokenizer_find(void)
+static void test_lang_tokenizer_find(void)
 {
-       test_begin("fts tokenizer find");
-       test_assert(fts_tokenizer_find("email-address") == fts_tokenizer_email_address);
-       test_assert(fts_tokenizer_find("generic") == fts_tokenizer_generic);
+       test_begin("lang tokenizer find");
+       test_assert(lang_tokenizer_find("email-address") == lang_tokenizer_email_address);
+       test_assert(lang_tokenizer_find("generic") == lang_tokenizer_generic);
        test_end();
 }
 
 static unsigned int
-test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input,
+test_tokenizer_inputoutput(struct lang_tokenizer *tok, const char *_input,
                           const char *const *expected_output,
                           unsigned int first_outi)
 {
@@ -86,11 +86,11 @@ test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input,
 
        /* test all input at once */
        outi = first_outi;
-       while (fts_tokenizer_next(tok, input, input_len, &token, &error) > 0) {
+       while (lang_tokenizer_next(tok, input, input_len, &token, &error) > 0) {
                test_assert_strcmp(token, expected_output[outi]);
                outi++;
        }
-       while (fts_tokenizer_next(tok, NULL, 0, &token, &error) > 0) {
+       while (lang_tokenizer_next(tok, NULL, 0, &token, &error) > 0) {
                test_assert_strcmp(token, expected_output[outi]);
                outi++;
        }
@@ -100,12 +100,12 @@ test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input,
        outi = first_outi;
        for (i = 0; i < input_len; i += char_len) {
                char_len = uni_utf8_char_bytes(input[i]);
-               while (fts_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) {
+               while (lang_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) {
                        test_assert_strcmp(token, expected_output[outi]);
                        outi++;
                }
        }
-       while (fts_tokenizer_final(tok, &token, &error) > 0) {
+       while (lang_tokenizer_final(tok, &token, &error) > 0) {
                test_assert_strcmp(token, expected_output[outi]);
                outi++;
        }
@@ -117,12 +117,12 @@ test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input,
                max = i_rand_minmax(1, input_len - i);
                for (char_len = 0; char_len < max; )
                        char_len += uni_utf8_char_bytes(input[i+char_len]);
-               while (fts_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) {
+               while (lang_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) {
                        test_assert_strcmp(token, expected_output[outi]);
                        outi++;
                }
        }
-       while (fts_tokenizer_final(tok, &token, &error) > 0) {
+       while (lang_tokenizer_final(tok, &token, &error) > 0) {
                test_assert_strcmp(token, expected_output[outi]);
                outi++;
        }
@@ -132,7 +132,7 @@ test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input,
 }
 
 static void
-test_tokenizer_inputs(struct fts_tokenizer *tok,
+test_tokenizer_inputs(struct lang_tokenizer *tok,
                      const char *const *inputs, unsigned int count,
                      const char *const *expected_output)
 {
@@ -145,7 +145,7 @@ test_tokenizer_inputs(struct fts_tokenizer *tok,
        test_assert_idx(expected_output[outi] == NULL, outi);
 }
 
-static void test_fts_tokenizer_generic_only(void)
+static void test_lang_tokenizer_generic_only(void)
 {
        static const char *const expected_output[] = {
                "hello", "world", "And",
@@ -188,15 +188,15 @@ static void test_fts_tokenizer_generic_only(void)
 
                NULL
        };
-       struct fts_tokenizer *tok;
+       struct lang_tokenizer *tok;
        const char *error;
 
-       test_begin("fts tokenizer generic simple");
-       test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &tok, &error) == 0);
-       test_assert(((struct generic_fts_tokenizer *) tok)->algorithm == BOUNDARY_ALGORITHM_SIMPLE);
+       test_begin("lang tokenizer generic simple");
+       test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, NULL, &tok, &error) == 0);
+       test_assert(((struct generic_lang_tokenizer *) tok)->algorithm == BOUNDARY_ALGORITHM_SIMPLE);
 
        test_tokenizer_inputs(tok, test_inputs, N_ELEMENTS(test_inputs), expected_output);
-       fts_tokenizer_unref(&tok);
+       lang_tokenizer_unref(&tok);
        test_end();
 }
 
@@ -204,7 +204,7 @@ const char *const tr29_settings[] = {"algorithm", "tr29", NULL};
 
 /* TODO: U+206F is in "Format" and therefore currently not word break.
    This definitely needs to be remapped. */
-static void test_fts_tokenizer_generic_tr29_only(void)
+static void test_lang_tokenizer_generic_tr29_only(void)
 {
        static const char *const expected_output[] = {
                "hello", "world", "And",
@@ -246,13 +246,13 @@ static void test_fts_tokenizer_generic_tr29_only(void)
                "l'homme", "l'humanit\xC3\xA9", "d'immixtions", "qu'il", "aujourd'hui", "que'euq", NULL,
                NULL
        };
-       struct fts_tokenizer *tok;
+       struct lang_tokenizer *tok;
        const char *error;
 
-       test_begin("fts tokenizer generic TR29");
-       test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0);
+       test_begin("lang tokenizer generic TR29");
+       test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0);
        test_tokenizer_inputs(tok, test_inputs, N_ELEMENTS(test_inputs), expected_output);
-       fts_tokenizer_unref(&tok);
+       lang_tokenizer_unref(&tok);
        test_end();
 }
 
@@ -260,7 +260,7 @@ const char *const tr29_settings_wb5a[] = {"algorithm", "tr29", "wb5a", "yes", NU
 
 /* TODO: U+206F is in "Format" and therefore currently not word break.
    This definitely needs to be remapped. */
-static void test_fts_tokenizer_generic_tr29_wb5a(void)
+static void test_lang_tokenizer_generic_tr29_wb5a(void)
 {
        static const char *const expected_output[] = {
                "hello", "world", "And",
@@ -303,17 +303,17 @@ static void test_fts_tokenizer_generic_tr29_wb5a(void)
 
                NULL
        };
-       struct fts_tokenizer *tok;
+       struct lang_tokenizer *tok;
        const char *error;
 
-       test_begin("fts tokenizer generic TR29 with WB5a");
-       test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings_wb5a, &tok, &error) == 0);
+       test_begin("lang tokenizer generic TR29 with WB5a");
+       test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, tr29_settings_wb5a, &tok, &error) == 0);
        test_tokenizer_inputs(tok, test_inputs, N_ELEMENTS(test_inputs), expected_output);
-       fts_tokenizer_unref(&tok);
+       lang_tokenizer_unref(&tok);
        test_end();
 }
 
-static void test_fts_tokenizer_address_only(void)
+static void test_lang_tokenizer_address_only(void)
 {
        static const char input[] = TEST_INPUT_ADDRESS;
        static const char *const expected_output[] = {
@@ -326,17 +326,17 @@ static void test_fts_tokenizer_address_only(void)
                "hypen@hypen-hypen-sick.com",
                NULL
        };
-       struct fts_tokenizer *tok;
+       struct lang_tokenizer *tok;
        const char *error;
 
-       test_begin("fts tokenizer email address only");
-       test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, NULL, &tok, &error) == 0);
+       test_begin("lang tokenizer email address only");
+       test_assert(lang_tokenizer_create(lang_tokenizer_email_address, NULL, NULL, &tok, &error) == 0);
        test_tokenizer_inputoutput(tok, input, expected_output, 0);
-       fts_tokenizer_unref(&tok);
+       lang_tokenizer_unref(&tok);
        test_end();
 }
 
-static void test_fts_tokenizer_address_parent(const char *name, const char * const *settings)
+static void test_lang_tokenizer_address_parent(const char *name, const char * const *settings)
 {
        static const char input[] = TEST_INPUT_ADDRESS;
        static const char *const expected_output[] = {
@@ -362,30 +362,30 @@ static void test_fts_tokenizer_address_parent(const char *name, const char * con
                "hypen", "hypen", "hypen", "sick", "com", "hypen@hypen-hypen-sick.com",
                NULL
        };
-       struct fts_tokenizer *tok, *gen_tok;
+       struct lang_tokenizer *tok, *gen_tok;
        const char *error;
 
-       test_begin(t_strdup_printf("fts tokenizer email address + parent %s", name));
-       test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, settings, &gen_tok, &error) == 0);
-       test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0);
+       test_begin(t_strdup_printf("lang tokenizer email address + parent %s", name));
+       test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, settings, &gen_tok, &error) == 0);
+       test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0);
        test_tokenizer_inputoutput(tok, input, expected_output, 0);
-       fts_tokenizer_unref(&tok);
-       fts_tokenizer_unref(&gen_tok);
+       lang_tokenizer_unref(&tok);
+       lang_tokenizer_unref(&gen_tok);
        test_end();
 }
 
 const char *const simple_settings[] = {"algorithm", "simple", NULL};
-static void test_fts_tokenizer_address_parent_simple(void)
+static void test_lang_tokenizer_address_parent_simple(void)
 {
-       test_fts_tokenizer_address_parent("simple", simple_settings);
+       test_lang_tokenizer_address_parent("simple", simple_settings);
 }
 
-static void test_fts_tokenizer_address_parent_tr29(void)
+static void test_lang_tokenizer_address_parent_tr29(void)
 {
-       test_fts_tokenizer_address_parent("tr29", tr29_settings);
+       test_lang_tokenizer_address_parent("tr29", tr29_settings);
 }
 
-static void test_fts_tokenizer_address_search(void)
+static void test_lang_tokenizer_address_search(void)
 {
        static const char input[] = TEST_INPUT_ADDRESS;
        static const char *const expected_output[] = {
@@ -412,44 +412,44 @@ static void test_fts_tokenizer_address_search(void)
                NULL
        };
        static const char *const settings[] = { "search", "", NULL };
-       struct fts_tokenizer *tok, *gen_tok;
+       struct lang_tokenizer *tok, *gen_tok;
        const char *token, *error;
 
-       test_begin("fts tokenizer search email address + parent");
-       test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0);
-       test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, settings, &tok, &error) == 0);
+       test_begin("lang tokenizer search email address + parent");
+       test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0);
+       test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, settings, &tok, &error) == 0);
        test_tokenizer_inputoutput(tok, input, expected_output, 0);
 
        /* make sure state is forgotten at EOF */
-       test_assert(fts_tokenizer_next(tok, (const void *)"foo", 3, &token, &error) == 0);
-       test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
+       test_assert(lang_tokenizer_next(tok, (const void *)"foo", 3, &token, &error) == 0);
+       test_assert(lang_tokenizer_final(tok, &token, &error) > 0 &&
                    strcmp(token, "foo") == 0);
-       test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
+       test_assert(lang_tokenizer_final(tok, &token, &error) == 0);
 
-       test_assert(fts_tokenizer_next(tok, (const void *)"bar@baz", 7, &token, &error) == 0);
-       test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
+       test_assert(lang_tokenizer_next(tok, (const void *)"bar@baz", 7, &token, &error) == 0);
+       test_assert(lang_tokenizer_final(tok, &token, &error) > 0 &&
                    strcmp(token, "bar@baz") == 0);
-       test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
+       test_assert(lang_tokenizer_final(tok, &token, &error) == 0);
 
-       test_assert(fts_tokenizer_next(tok, (const void *)"foo@", 4, &token, &error) == 0);
-       test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
+       test_assert(lang_tokenizer_next(tok, (const void *)"foo@", 4, &token, &error) == 0);
+       test_assert(lang_tokenizer_final(tok, &token, &error) > 0 &&
                    strcmp(token, "foo") == 0);
-       test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
+       test_assert(lang_tokenizer_final(tok, &token, &error) == 0);
 
        /* test reset explicitly */
-       test_assert(fts_tokenizer_next(tok, (const void *)"a", 1, &token, &error) == 0);
-       fts_tokenizer_reset(tok);
-       test_assert(fts_tokenizer_next(tok, (const void *)"b@c", 3, &token, &error) == 0);
-       test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
+       test_assert(lang_tokenizer_next(tok, (const void *)"a", 1, &token, &error) == 0);
+       lang_tokenizer_reset(tok);
+       test_assert(lang_tokenizer_next(tok, (const void *)"b@c", 3, &token, &error) == 0);
+       test_assert(lang_tokenizer_final(tok, &token, &error) > 0 &&
                    strcmp(token, "b@c") == 0);
-       test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
+       test_assert(lang_tokenizer_final(tok, &token, &error) == 0);
 
-       fts_tokenizer_unref(&tok);
-       fts_tokenizer_unref(&gen_tok);
+       lang_tokenizer_unref(&tok);
+       lang_tokenizer_unref(&gen_tok);
        test_end();
 }
 
-static void test_fts_tokenizer_delete_trailing_partial_char(void)
+static void test_lang_tokenizer_delete_trailing_partial_char(void)
 {
        static const struct {
                const char *str;
@@ -468,33 +468,33 @@ static void test_fts_tokenizer_delete_trailing_partial_char(void)
        unsigned int i;
        size_t size;
 
-       test_begin("fts tokenizer delete trailing partial char");
+       test_begin("lang tokenizer delete trailing partial char");
        for (i = 0; i < N_ELEMENTS(tests); i++) {
                size = strlen(tests[i].str);
-               fts_tokenizer_delete_trailing_partial_char((const unsigned char *)tests[i].str, &size);
+               lang_tokenizer_delete_trailing_partial_char((const unsigned char *)tests[i].str, &size);
                test_assert(size == tests[i].truncated_len);
        }
        test_end();
 }
 
-static void test_fts_tokenizer_address_maxlen(void)
+static void test_lang_tokenizer_address_maxlen(void)
 {
        const char *const settings[] = {"maxlen", "5", NULL};
        const char *input = "...\357\277\275@a";
-       struct fts_tokenizer *tok;
+       struct lang_tokenizer *tok;
        const char *token, *error;
 
-       test_begin("fts tokenizer address maxlen");
-       test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, settings, &tok, &error) == 0);
+       test_begin("lang tokenizer address maxlen");
+       test_assert(lang_tokenizer_create(lang_tokenizer_email_address, NULL, settings, &tok, &error) == 0);
 
-       while (fts_tokenizer_next(tok, (const unsigned char *)input,
+       while (lang_tokenizer_next(tok, (const unsigned char *)input,
                                  strlen(input), &token, &error) > 0) ;
-       while (fts_tokenizer_final(tok, &token, &error) > 0) ;
-       fts_tokenizer_unref(&tok);
+       while (lang_tokenizer_final(tok, &token, &error) > 0) ;
+       lang_tokenizer_unref(&tok);
        test_end();
 }
 
-static void test_fts_tokenizer_random(void)
+static void test_lang_tokenizer_random(void)
 {
        const unsigned char test_chars[] = { 0, ' ', '.', 'a', 'b', 'c', '-', '@', '\xC3', '\xA4' };
        const char *const settings[] = {"algorithm", "simple", NULL};
@@ -502,12 +502,12 @@ static void test_fts_tokenizer_random(void)
        unsigned int i;
        unsigned char addr[10] = { 0 };
        string_t *str = t_str_new(20);
-       struct fts_tokenizer *tok, *gen_tok;
+       struct lang_tokenizer *tok, *gen_tok;
        const char *token, *error;
 
-       test_begin("fts tokenizer random");
-       test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, settings, &gen_tok, &error) == 0);
-       test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, email_settings, &tok, &error) == 0);
+       test_begin("lang tokenizer random");
+       test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, settings, &gen_tok, &error) == 0);
+       test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, email_settings, &tok, &error) == 0);
 
        for (i = 0; i < 10000; i++) T_BEGIN {
                for (unsigned int j = 0; j < sizeof(addr); j++)
@@ -515,18 +515,18 @@ static void test_fts_tokenizer_random(void)
                str_truncate(str, 0);
                if (uni_utf8_get_valid_data(addr, sizeof(addr), str))
                        str_append_data(str, addr, sizeof(addr));
-               while (fts_tokenizer_next(tok, str_data(str), str_len(str),
+               while (lang_tokenizer_next(tok, str_data(str), str_len(str),
                                          &token, &error) > 0) ;
-               while (fts_tokenizer_final(tok, &token, &error) > 0) ;
+               while (lang_tokenizer_final(tok, &token, &error) > 0) ;
        } T_END;
-       fts_tokenizer_unref(&tok);
-       fts_tokenizer_unref(&gen_tok);
+       lang_tokenizer_unref(&tok);
+       lang_tokenizer_unref(&gen_tok);
        test_end();
 }
 
 
 static void
-test_fts_tokenizer_explicit_prefix(void)
+test_lang_tokenizer_explicit_prefix(void)
 {
        const char *input = "* ** "
                "*pre *both* post* "
@@ -567,24 +567,24 @@ test_fts_tokenizer_explicit_prefix(void)
                                                           algos[algo],
                                                           searches[search],
                                                           prefixes[explicitprefix]));
-                               struct fts_tokenizer *tok;
+                               struct lang_tokenizer *tok;
                                const char *error;
 
-                               test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, settings,
+                               test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, settings,
                                                                 &tok, &error) == 0);
                                test_tokenizer_inputs(
                                        tok, &input, 1,
                                        (search!=0) && (explicitprefix!=0)
                                        ? expected_star : expected_nostar);
 
-                               fts_tokenizer_unref(&tok);
+                               lang_tokenizer_unref(&tok);
                                test_end();
                        }
                }
        }
 }
 
-static void test_fts_tokenizer_skip_base64(void)
+static void test_lang_tokenizer_skip_base64(void)
 {
        /* The skip_base64 works on the data already available in the buffer
           of the tokenizer, it does not pull more data to see if a base64
@@ -592,7 +592,7 @@ static void test_fts_tokenizer_skip_base64(void)
           use test_tokenizer_inputoutput that also tests with one-byte-at-once
           or random chunking, as those are known to fail with the current
           implementation */
-       struct fts_tokenizer *tok;
+       struct lang_tokenizer *tok;
        const char *error;
        const char *token;
 
@@ -641,16 +641,16 @@ static void test_fts_tokenizer_skip_base64(void)
                NULL
        };
 
-       test_begin("fts tokenizer skip base64");
-       test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0);
+       test_begin("lang tokenizer skip base64");
+       test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0);
 
        size_t index = 0;
-       while (fts_tokenizer_next(tok, (const unsigned char *) input, strlen(input), &token, &error) > 0) {
+       while (lang_tokenizer_next(tok, (const unsigned char *) input, strlen(input), &token, &error) > 0) {
                i_assert(index < N_ELEMENTS(expected_output));
                test_assert_strcmp(token, expected_output[index]);
                ++index;
        }
-       while (fts_tokenizer_next(tok, NULL, 0, &token, &error) > 0) {
+       while (lang_tokenizer_next(tok, NULL, 0, &token, &error) > 0) {
                i_assert(index < N_ELEMENTS(expected_output));
                test_assert_strcmp(token, expected_output[index]);
                ++index;
@@ -658,33 +658,33 @@ static void test_fts_tokenizer_skip_base64(void)
        i_assert(index < N_ELEMENTS(expected_output));
        test_assert_idx(expected_output[index] == NULL, index);
 
-       fts_tokenizer_unref(&tok);
+       lang_tokenizer_unref(&tok);
        test_end();
 }
 
 int main(void)
 {
        static void (*const test_functions[])(void) = {
-               test_fts_tokenizer_skip_base64,
-               test_fts_tokenizer_find,
-               test_fts_tokenizer_generic_only,
-               test_fts_tokenizer_generic_tr29_only,
-               test_fts_tokenizer_generic_tr29_wb5a,
-               test_fts_tokenizer_address_only,
-               test_fts_tokenizer_address_parent_simple,
-               test_fts_tokenizer_address_parent_tr29,
-               test_fts_tokenizer_address_maxlen,
-               test_fts_tokenizer_address_search,
-               test_fts_tokenizer_delete_trailing_partial_char,
-               test_fts_tokenizer_random,
-               test_fts_tokenizer_explicit_prefix,
+               test_lang_tokenizer_skip_base64,
+               test_lang_tokenizer_find,
+               test_lang_tokenizer_generic_only,
+               test_lang_tokenizer_generic_tr29_only,
+               test_lang_tokenizer_generic_tr29_wb5a,
+               test_lang_tokenizer_address_only,
+               test_lang_tokenizer_address_parent_simple,
+               test_lang_tokenizer_address_parent_tr29,
+               test_lang_tokenizer_address_maxlen,
+               test_lang_tokenizer_address_search,
+               test_lang_tokenizer_delete_trailing_partial_char,
+               test_lang_tokenizer_random,
+               test_lang_tokenizer_explicit_prefix,
                NULL
        };
        int ret;
 
-       fts_tokenizers_init();
+       lang_tokenizers_init();
        ret = test_run(test_functions);
-       fts_tokenizers_deinit();
+       lang_tokenizers_deinit();
 
        return ret;
 }
index 661f9021b2267bdc4e5ab1e557c0d1f562ca4192..85032f534915c2cdee1aad726db3443bed8e33c9 100644 (file)
@@ -11,10 +11,10 @@ const char *const settings[] =
         "fts_language_data", TEXTCAT_DATADIR"/", NULL};
 
 /* Detect Finnish. fi--utf8 */
-static void test_fts_language_detect_finnish(void)
+static void test_language_detect_finnish(void)
 {
-       struct fts_language_list *lp = NULL;
-       const struct fts_language *lang_r = NULL;
+       struct language_list *lp = NULL;
+       const struct language *lang_r = NULL;
        const unsigned char finnish[] =
                "Yhdistyneiden kansakuntien kolmas yleiskokous hyv\xC3\xA4ksyi "\
                "ja julkisti ihmisoikeuksien yleismaailmallisen julistuksen "\
@@ -25,21 +25,21 @@ static void test_fts_language_detect_finnish(void)
                "\xC3\xA4\xC3\xA4nest\xC3\xA4m\xC3\xA4st\xC3\xA4.";
        const char names[] = "de, fi, en";
        const char *unknown, *error;
-       test_begin("fts language detect Finnish");
-       test_assert(fts_language_list_init(settings, &lp, &error) == 0);
-       test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
-       test_assert(fts_language_detect(lp, finnish, sizeof(finnish)-1, &lang_r, &error)
-                   == FTS_LANGUAGE_RESULT_OK);
+       test_begin("language detect Finnish");
+       test_assert(language_list_init(settings, &lp, &error) == 0);
+       test_assert(language_list_add_names(lp, names, &unknown) == TRUE);
+       test_assert(language_detect(lp, finnish, sizeof(finnish)-1, &lang_r, &error)
+                   == LANGUAGE_RESULT_OK);
        test_assert(strcmp(lang_r->name, "fi") == 0);
-       fts_language_list_deinit(&lp);
+       language_list_deinit(&lp);
        test_end();
 }
 
 /* Detect English */
-static void test_fts_language_detect_english(void)
+static void test_language_detect_english(void)
 {
-       struct fts_language_list *lp = NULL;
-       const struct fts_language *lang_r = NULL;
+       struct language_list *lp = NULL;
+       const struct language *lang_r = NULL;
        const unsigned char english[]  = "Whereas recognition of the inherent dignity and"\
                " of the equal and inalienable rights of all members of the human"\
                "family is the foundation of freedom, justice and peace in the "\
@@ -52,21 +52,21 @@ static void test_fts_language_detect_english(void)
 
        const char names[] = "fi, de, fr, en";
        const char *unknown, *error;
-       test_begin("fts language detect English");
-       test_assert(fts_language_list_init(settings, &lp, &error) == 0);
-       test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
-       test_assert(fts_language_detect(lp, english, sizeof(english)-1, &lang_r, &error)
-                   == FTS_LANGUAGE_RESULT_OK);
+       test_begin("language detect English");
+       test_assert(language_list_init(settings, &lp, &error) == 0);
+       test_assert(language_list_add_names(lp, names, &unknown) == TRUE);
+       test_assert(language_detect(lp, english, sizeof(english)-1, &lang_r, &error)
+                   == LANGUAGE_RESULT_OK);
        test_assert(strcmp(lang_r->name, "en") == 0);
-       fts_language_list_deinit(&lp);
+       language_list_deinit(&lp);
        test_end();
 }
 
 /* Detect French */
-static void test_fts_language_detect_french(void)
+static void test_language_detect_french(void)
 {
-       struct fts_language_list *lp = NULL;
-       const struct fts_language *lang_r = NULL;
+       struct language_list *lp = NULL;
+       const struct language *lang_r = NULL;
        const unsigned char french[] =
                "D\xC3\xA9""claration universelle des droits de l\xE2\x80\x99"
                "homme Pr\xC3\xA9""ambule Consid\xC3\xA9rant que la "\
@@ -87,20 +87,20 @@ static void test_fts_language_detect_french(void)
 
        const char names[] = "de, fi, fr, en";
        const char *unknown, *error;
-       test_begin("fts language detect French");
-       test_assert(fts_language_list_init(settings, &lp, &error) == 0);
-       test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
-       test_assert(fts_language_detect(lp, french, sizeof(french)-1, &lang_r, &error)
-                   == FTS_LANGUAGE_RESULT_OK);
+       test_begin("language detect French");
+       test_assert(language_list_init(settings, &lp, &error) == 0);
+       test_assert(language_list_add_names(lp, names, &unknown) == TRUE);
+       test_assert(language_detect(lp, french, sizeof(french)-1, &lang_r, &error)
+                   == LANGUAGE_RESULT_OK);
        test_assert(strcmp(lang_r->name, "fr") == 0);
-       fts_language_list_deinit(&lp);
+       language_list_deinit(&lp);
        test_end();
 }
 /* Detect German */
-static void test_fts_language_detect_german(void)
+static void test_language_detect_german(void)
 {
-       struct fts_language_list *lp = NULL;
-       const struct fts_language *lang_r = NULL;
+       struct language_list *lp = NULL;
+       const struct language *lang_r = NULL;
        const unsigned char german[]  =
                "Artikel 1"\
                "Alle Menschen sind frei und gleich an W\xC3\xBCrde und Rechten "\
@@ -124,21 +124,21 @@ static void test_fts_language_detect_german(void)
 
        const char names[] = "fi, de, fr, en";
        const char *unknown, *error;
-       test_begin("fts language detect German");
-       test_assert(fts_language_list_init(settings, &lp, &error) == 0);
-       test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
-       test_assert(fts_language_detect(lp, german, sizeof(german)-1, &lang_r, &error)
-                   == FTS_LANGUAGE_RESULT_OK);
+       test_begin("language detect German");
+       test_assert(language_list_init(settings, &lp, &error) == 0);
+       test_assert(language_list_add_names(lp, names, &unknown) == TRUE);
+       test_assert(language_detect(lp, german, sizeof(german)-1, &lang_r, &error)
+                   == LANGUAGE_RESULT_OK);
        test_assert(strcmp(lang_r->name, "de") == 0);
-       fts_language_list_deinit(&lp);
+       language_list_deinit(&lp);
        test_end();
 }
 
 /* Detect Swedish */
-static void test_fts_language_detect_swedish(void)
+static void test_language_detect_swedish(void)
 {
-       struct fts_language_list *lp = NULL;
-       const struct fts_language *lang_r = NULL;
+       struct language_list *lp = NULL;
+       const struct language *lang_r = NULL;
        const unsigned char swedish[]  =
                "Artikel 1."\
                "Alla m\xC3\xA4nniskor \xC3\xA4ro f\xC3\xB6""dda fria och lika"\
@@ -150,21 +150,21 @@ static void test_fts_language_detect_swedish(void)
 
        const char names[] = "fi, de, sv, fr, en";
        const char *unknown, *error;
-       test_begin("fts language detect Swedish");
-       test_assert(fts_language_list_init(settings, &lp, &error) == 0);
-       test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
-       test_assert(fts_language_detect(lp, swedish, sizeof(swedish)-1, &lang_r, &error)
-                   == FTS_LANGUAGE_RESULT_OK);
+       test_begin("language detect Swedish");
+       test_assert(language_list_init(settings, &lp, &error) == 0);
+       test_assert(language_list_add_names(lp, names, &unknown) == TRUE);
+       test_assert(language_detect(lp, swedish, sizeof(swedish)-1, &lang_r, &error)
+                   == LANGUAGE_RESULT_OK);
        test_assert(strcmp(lang_r->name, "sv") == 0);
-       fts_language_list_deinit(&lp);
+       language_list_deinit(&lp);
        test_end();
 }
 
 /* Detect Bokmal */
-static void test_fts_language_detect_bokmal(void)
+static void test_language_detect_bokmal(void)
 {
-       struct fts_language_list *lp = NULL;
-       const struct fts_language *lang_r = NULL;
+       struct language_list *lp = NULL;
+       const struct language *lang_r = NULL;
        const unsigned char bokmal[]  =
                "Artikkel 1.\n"\
                "Alle mennesker er f\xC3\xB8""dt frie og med samme menneskeverd"\
@@ -174,21 +174,21 @@ static void test_fts_language_detect_bokmal(void)
 
        const char names[] = "fi, de, sv, no, fr, en";
        const char *unknown, *error;
-       test_begin("fts language detect Bokmal as Norwegian");
-       test_assert(fts_language_list_init(settings, &lp, &error) == 0);
-       test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
-       test_assert(fts_language_detect(lp, bokmal, sizeof(bokmal)-1, &lang_r, &error)
-                   == FTS_LANGUAGE_RESULT_OK);
+       test_begin("language detect Bokmal as Norwegian");
+       test_assert(language_list_init(settings, &lp, &error) == 0);
+       test_assert(language_list_add_names(lp, names, &unknown) == TRUE);
+       test_assert(language_detect(lp, bokmal, sizeof(bokmal)-1, &lang_r, &error)
+                   == LANGUAGE_RESULT_OK);
        test_assert(strcmp(lang_r->name, "no") == 0);
-       fts_language_list_deinit(&lp);
+       language_list_deinit(&lp);
        test_end();
 }
 
 /* Detect Nynorsk */
-static void test_fts_language_detect_nynorsk(void)
+static void test_language_detect_nynorsk(void)
 {
-       struct fts_language_list *lp = NULL;
-       const struct fts_language *lang_r = NULL;
+       struct language_list *lp = NULL;
+       const struct language *lang_r = NULL;
        const unsigned char nynorsk[]  =
                "Artikkel 1.\n"\
                "Alle menneske er f\xC3\xB8""dde til fridom og med same "\
@@ -198,21 +198,21 @@ static void test_fts_language_detect_nynorsk(void)
 
        const char names[] = "fi, de, sv, no, fr, en";
        const char *unknown, *error;
-       test_begin("fts language detect Nynorsk as Norwegian");
-       test_assert(fts_language_list_init(settings, &lp, &error) == 0);
-       test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
-       test_assert(fts_language_detect(lp, nynorsk, sizeof(nynorsk)-1, &lang_r, &error)
-                   == FTS_LANGUAGE_RESULT_OK);
+       test_begin("language detect Nynorsk as Norwegian");
+       test_assert(language_list_init(settings, &lp, &error) == 0);
+       test_assert(language_list_add_names(lp, names, &unknown) == TRUE);
+       test_assert(language_detect(lp, nynorsk, sizeof(nynorsk)-1, &lang_r, &error)
+                   == LANGUAGE_RESULT_OK);
        test_assert(strcmp(lang_r->name, "no") == 0);
-       fts_language_list_deinit(&lp);
+       language_list_deinit(&lp);
        test_end();
 }
 
 /* Detect Finnish as English */
-static void test_fts_language_detect_finnish_as_english(void)
+static void test_language_detect_finnish_as_english(void)
 {
-       struct fts_language_list *lp = NULL;
-       const struct fts_language *lang_r = NULL;
+       struct language_list *lp = NULL;
+       const struct language *lang_r = NULL;
        const unsigned char finnish[] =
                "Yhdistyneiden kansakuntien kolmas yleiskokous hyv\xC3\xA4ksyi "\
                "ja julkisti ihmisoikeuksien yleismaailmallisen julistuksen "\
@@ -223,21 +223,21 @@ static void test_fts_language_detect_finnish_as_english(void)
                "\xC3\xA4\xC3\xA4nest\xC3\xA4m\xC3\xA4st\xC3\xA4.";
        const char names[] = "en";
        const char *unknown, *error;
-       test_begin("fts language detect Finnish as English");
-       test_assert(fts_language_list_init(settings, &lp, &error) == 0);
-       test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
-       test_assert(fts_language_detect(lp, finnish, sizeof(finnish)-1, &lang_r, &error)
-                   == FTS_LANGUAGE_RESULT_OK);
+       test_begin("language detect Finnish as English");
+       test_assert(language_list_init(settings, &lp, &error) == 0);
+       test_assert(language_list_add_names(lp, names, &unknown) == TRUE);
+       test_assert(language_detect(lp, finnish, sizeof(finnish)-1, &lang_r, &error)
+                   == LANGUAGE_RESULT_OK);
        test_assert(strcmp(lang_r->name, "en") == 0);
-       fts_language_list_deinit(&lp);
+       language_list_deinit(&lp);
        test_end();
 }
 
 /* Successfully avoid detecting English, when en is not in language list. */
-static void test_fts_language_detect_na(void)
+static void test_language_detect_na(void)
 {
-       struct fts_language_list *lp = NULL;
-       const struct fts_language *lang_r = NULL;
+       struct language_list *lp = NULL;
+       const struct language *lang_r = NULL;
        const unsigned char english[]  = "Whereas recognition of the inherent dignity and"\
                " of the equal and inalienable rights of all members of the human"\
                "family is the foundation of freedom, justice and peace in the "\
@@ -250,49 +250,49 @@ static void test_fts_language_detect_na(void)
 
        const char names[] = "fi, de, fr";
        const char *unknown, *error;
-       test_begin("fts language detect not available");
-       test_assert(fts_language_list_init(settings, &lp, &error) == 0);
-       test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
-       test_assert(fts_language_detect(lp, english, sizeof(english)-1, &lang_r, &error)
-                   == FTS_LANGUAGE_RESULT_UNKNOWN);
-       fts_language_list_deinit(&lp);
+       test_begin("language detect not available");
+       test_assert(language_list_init(settings, &lp, &error) == 0);
+       test_assert(language_list_add_names(lp, names, &unknown) == TRUE);
+       test_assert(language_detect(lp, english, sizeof(english)-1, &lang_r, &error)
+                   == LANGUAGE_RESULT_UNKNOWN);
+       language_list_deinit(&lp);
        test_end();
 }
 
 /* Successfully detect, that Klingon is unknown. */
-static void test_fts_language_detect_unknown(void)
+static void test_language_detect_unknown(void)
 {
-       struct fts_language_list *lp = NULL;
-       const struct fts_language *lang_r = NULL;
+       struct language_list *lp = NULL;
+       const struct language *lang_r = NULL;
        const unsigned char klingon[]  = "nobwI''a'pu'qoqvam'e' "\
                "nuHegh'eghrupqa'moHlaHbe'law'lI'neS "\
                "SeH'eghtaHghach'a'na'chajmo'.";
 
        const char names[] = "fi, de, fr";
        const char *unknown, *error;
-       test_begin("fts language detect unknown");
-       test_assert(fts_language_list_init(settings, &lp, &error) == 0);
-       test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
-       test_assert(fts_language_detect(lp, klingon, sizeof(klingon), &lang_r, &error)
-                   == FTS_LANGUAGE_RESULT_UNKNOWN);
-       fts_language_list_deinit(&lp);
+       test_begin("language detect unknown");
+       test_assert(language_list_init(settings, &lp, &error) == 0);
+       test_assert(language_list_add_names(lp, names, &unknown) == TRUE);
+       test_assert(language_detect(lp, klingon, sizeof(klingon), &lang_r, &error)
+                   == LANGUAGE_RESULT_UNKNOWN);
+       language_list_deinit(&lp);
        test_end();
 }
-static void test_fts_language_find_builtin(void)
+static void test_language_find_builtin(void)
 {
-       const struct fts_language *lp;
-       test_begin("fts language find built-in");
-       lp = fts_language_find("en");
+       const struct language *lp;
+       test_begin("language find built-in");
+       lp = language_find("en");
        i_assert(lp != NULL);
        test_assert(strcmp(lp->name, "en") == 0);
        test_end();
 }
-static void test_fts_language_register(void)
+static void test_language_register(void)
 {
-       const struct fts_language *lp;
-       test_begin("fts language register");
-       fts_language_register("jp");
-       lp = fts_language_find("jp");
+       const struct language *lp;
+       test_begin("language register");
+       language_register("jp");
+       lp = language_find("jp");
        i_assert(lp != NULL);
        test_assert(strcmp(lp->name, "jp") == 0);
        test_end();
@@ -302,22 +302,22 @@ int main(void)
 {
        int ret;
        static void (*const test_functions[])(void) = {
-               test_fts_language_detect_finnish,
-               test_fts_language_detect_english,
-               test_fts_language_detect_french,
-               test_fts_language_detect_german,
-               test_fts_language_detect_swedish,
-               test_fts_language_detect_bokmal,
-               test_fts_language_detect_nynorsk,
-               test_fts_language_detect_finnish_as_english,
-               test_fts_language_detect_na,
-               test_fts_language_detect_unknown,
-               test_fts_language_find_builtin,
-               test_fts_language_register,
+               test_language_detect_finnish,
+               test_language_detect_english,
+               test_language_detect_french,
+               test_language_detect_german,
+               test_language_detect_swedish,
+               test_language_detect_bokmal,
+               test_language_detect_nynorsk,
+               test_language_detect_finnish_as_english,
+               test_language_detect_na,
+               test_language_detect_unknown,
+               test_language_find_builtin,
+               test_language_register,
                NULL
        };
-       fts_languages_init();
+       languages_init();
        ret = test_run(test_functions);
-       fts_languages_deinit();
+       languages_deinit();
        return ret;
 }
index a02775c19e259be972fb1bf7d242fb57b61e0d44..2951f8309c15190e83e45fba06f142c0ddffd888 100644 (file)
@@ -187,7 +187,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx,
        struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces);
        struct fts_backend *backend;
        struct fts_user_language *user_lang;
-       const struct fts_language *lang = NULL;
+       const struct language *lang = NULL;
        int ret, ret2;
        bool final = FALSE;
 
@@ -199,30 +199,30 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx,
        }
 
        if (ctx->language == NULL) {
-               struct fts_language_list *lang_list =
+               struct language_list *lang_list =
                        fts_user_get_language_list(user);
-               enum fts_language_result result;
+               enum language_result result;
                const char *error;
 
-               result = fts_language_detect(lang_list,
+               result = language_detect(lang_list,
                    (const unsigned char *)ctx->tokens, strlen(ctx->tokens),
                     &lang, &error);
                if (lang == NULL)
-                       lang = fts_language_list_get_first(lang_list);
+                       lang = language_list_get_first(lang_list);
                switch (result) {
-               case FTS_LANGUAGE_RESULT_SHORT:
+               case LANGUAGE_RESULT_SHORT:
                        e_warning(user->event,
                                  "Text too short, can't detect its language - assuming %s",
                                  lang->name);
                        break;
-               case FTS_LANGUAGE_RESULT_UNKNOWN:
+               case LANGUAGE_RESULT_UNKNOWN:
                        e_warning(user->event,
                                  "Can't detect its language - assuming %s",
                                  lang->name);
                        break;
-               case FTS_LANGUAGE_RESULT_OK:
+               case LANGUAGE_RESULT_OK:
                        break;
-               case FTS_LANGUAGE_RESULT_ERROR:
+               case LANGUAGE_RESULT_ERROR:
                        e_error(user->event,
                                "Language detection library initialization failed: %s",
                                error);
@@ -232,7 +232,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx,
                        i_unreached();
                }
        } else {
-               lang = fts_language_find(ctx->language);
+               lang = language_find(ctx->language);
                if (lang == NULL) {
                        e_error(user->event,
                                "Unknown language: %s", ctx->language);
@@ -248,22 +248,22 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx,
                return -1;
        }
 
-       fts_tokenizer_reset(user_lang->index_tokenizer);
+       lang_tokenizer_reset(user_lang->index_tokenizer);
        for (;;) {
                const char *token, *error;
 
                if (!final) {
-                       ret = fts_tokenizer_next(user_lang->index_tokenizer,
+                       ret = lang_tokenizer_next(user_lang->index_tokenizer,
                                (const unsigned char *)ctx->tokens, strlen(ctx->tokens),
                                &token, &error);
                } else {
-                       ret = fts_tokenizer_final(user_lang->index_tokenizer,
+                       ret = lang_tokenizer_final(user_lang->index_tokenizer,
                                                  &token, &error);
                }
                if (ret < 0)
                        break;
                if (ret > 0 && user_lang->filter != NULL) {
-                       ret2 = fts_filter_filter(user_lang->filter, &token, &error);
+                       ret2 = lang_filter(user_lang->filter, &token, &error);
                        if (ret2 > 0)
                                doveadm_print(token);
                        else if (ret2 < 0)
index 55466cfbd1fed745e50084a33dcae86dd72e10dd..e9eb5d8804bd6dea1ffecc1a7e9a1d43da70df3e 100644 (file)
@@ -117,7 +117,7 @@ void fts_backend_unregister(const char *name);
 bool fts_backend_default_can_lookup(struct fts_backend *backend,
                                    const struct mail_search_arg *args);
 
-void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
+void lang_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
                     const ARRAY_TYPE(seq_range) *definite_filter,
                     ARRAY_TYPE(seq_range) *maybe_dest,
                     const ARRAY_TYPE(seq_range) *maybe_filter);
index 9793db79c10d37c3bd50e4e0c4f1589aebed8340..6db3891ae42cd4f0cb432b649f4396eb0fc7f28b 100644 (file)
@@ -415,7 +415,7 @@ fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe,
        }
 }
 
-void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
+void lang_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
                     const ARRAY_TYPE(seq_range) *definite_filter,
                     ARRAY_TYPE(seq_range) *maybe_dest,
                     const ARRAY_TYPE(seq_range) *maybe_filter)
index 83599addbba8ade43015c77d471cccdba70219c6..75ecae989172fdf3099bb98be1d6582cb26f0691 100644 (file)
@@ -114,7 +114,7 @@ static void fts_mail_build_ctx_set_lang(struct fts_mail_build_context *ctx,
        /* reset tokenizer between fields - just to be sure no state
           leaks between fields (especially if previous indexing had
           failed) */
-       fts_tokenizer_reset(user_lang->index_tokenizer);
+       lang_tokenizer_reset(user_lang->index_tokenizer);
 }
 
 static void
@@ -267,15 +267,15 @@ static int
 fts_build_add_tokens_with_filter(struct fts_mail_build_context *ctx,
                                 const unsigned char *data, size_t size)
 {
-       struct fts_tokenizer *tokenizer = ctx->cur_user_lang->index_tokenizer;
-       struct fts_filter *filter = ctx->cur_user_lang->filter;
+       struct lang_tokenizer *tokenizer = ctx->cur_user_lang->index_tokenizer;
+       struct lang_filter *filter = ctx->cur_user_lang->filter;
        const char *token, *error;
        int ret = 1, ret2;
 
        while (ret > 0) T_BEGIN {
-               ret = ret2 = fts_tokenizer_next(tokenizer, data, size, &token, &error);
+               ret = ret2 = lang_tokenizer_next(tokenizer, data, size, &token, &error);
                if (ret2 > 0 && filter != NULL)
-                       ret2 = fts_filter_filter(filter, &token, &error);
+                       ret2 = lang_filter(filter, &token, &error);
                if (ret2 < 0) {
                        mail_set_critical(ctx->mail,
                                "fts: Couldn't create indexable tokens: %s",
@@ -296,31 +296,31 @@ fts_build_add_tokens_with_filter(struct fts_mail_build_context *ctx,
 static int
 fts_detect_language(struct fts_mail_build_context *ctx,
                    const unsigned char *data, size_t size, bool last,
-                   const struct fts_language **lang_r)
+                   const struct language **lang_r)
 {
        struct mail_user *user = ctx->update_ctx->backend->ns->user;
-       struct fts_language_list *lang_list = fts_user_get_language_list(user);
-       const struct fts_language *lang;
+       struct language_list *lang_list = fts_user_get_language_list(user);
+       const struct language *lang;
        const char *error;
 
-       switch (fts_language_detect(lang_list, data, size, &lang, &error)) {
-       case FTS_LANGUAGE_RESULT_SHORT:
+       switch (language_detect(lang_list, data, size, &lang, &error)) {
+       case LANGUAGE_RESULT_SHORT:
                /* save the input so far and try again later */
                buffer_append(ctx->pending_input, data, size);
                if (last) {
                        /* we've run out of data. use the default language. */
-                       *lang_r = fts_language_list_get_first(lang_list);
+                       *lang_r = language_list_get_first(lang_list);
                        return 1;
                }
                return 0;
-       case FTS_LANGUAGE_RESULT_UNKNOWN:
+       case LANGUAGE_RESULT_UNKNOWN:
                /* use the default language */
-               *lang_r = fts_language_list_get_first(lang_list);
+               *lang_r = language_list_get_first(lang_list);
                return 1;
-       case FTS_LANGUAGE_RESULT_OK:
+       case LANGUAGE_RESULT_OK:
                *lang_r = lang;
                return 1;
-       case FTS_LANGUAGE_RESULT_ERROR:
+       case LANGUAGE_RESULT_ERROR:
                /* internal language detection library failure
                   (e.g. invalid config). don't index anything. */
                mail_set_critical(ctx->mail,
@@ -337,7 +337,7 @@ fts_build_tokenized(struct fts_mail_build_context *ctx,
                    const unsigned char *data, size_t size, bool last)
 {
        struct mail_user *user = ctx->update_ctx->backend->ns->user;
-       const struct fts_language *lang;
+       const struct language *lang;
        int ret;
 
        if (ctx->cur_user_lang != NULL) {
index a6011c9439dcc1a72483f477c98a4ea348d2ccd1..1a1b97209a602a9be73dc5212b60b361263b156e 100644 (file)
@@ -21,13 +21,13 @@ static struct mail_storage_hooks fts_mail_storage_hooks = {
 
 void fts_plugin_init(struct module *module)
 {
-       fts_library_init();
+       lang_library_init();
        mail_storage_hooks_add(module, &fts_mail_storage_hooks);
 }
 
 void fts_plugin_deinit(void)
 {
-       fts_library_deinit();
+       lang_library_deinit();
        fts_parsers_unload();
        mail_storage_hooks_remove(&fts_mail_storage_hooks);
 }
index 5c7be41a3121c94934598bedec7ee45005c7d99c..85f0fa3862623d1790d11ae53f9f4fed1493bc26 100644 (file)
@@ -53,7 +53,7 @@ fts_search_arg_create_or(const struct mail_search_arg *orig_arg, pool_t pool,
 }
 
 static int
-fts_backend_dovecot_expand_tokens(struct fts_filter *filter,
+fts_backend_dovecot_expand_tokens(struct lang_filter *filter,
                                  pool_t pool,
                                  struct mail_search_arg *parent_arg,
                                  const struct mail_search_arg *orig_arg,
@@ -74,7 +74,7 @@ fts_backend_dovecot_expand_tokens(struct fts_filter *filter,
        /* add the word filtered */
        if (filter != NULL) {
                token2 = t_strdup(token);
-               ret = fts_filter_filter(filter, &token2, &error);
+               ret = lang_filter(filter, &token2, &error);
                if (ret > 0) {
                        token2 = t_strdup(token2);
                        array_push_back(&tokens, &token2);
@@ -118,8 +118,8 @@ fts_backend_dovecot_tokenize_lang(struct fts_user_language *user_lang,
 
        /* reset tokenizer between search args in case there's any state left
           from some previous failure */
-       fts_tokenizer_reset(user_lang->search_tokenizer);
-       while ((ret = fts_tokenizer_next(user_lang->search_tokenizer,
+       lang_tokenizer_reset(user_lang->search_tokenizer);
+       while ((ret = lang_tokenizer_next(user_lang->search_tokenizer,
                                         (const void *)orig_token,
                                         orig_token_len, &token, &error)) > 0) {
                if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool,
@@ -128,7 +128,7 @@ fts_backend_dovecot_tokenize_lang(struct fts_user_language *user_lang,
                        return -1;
        }
        while (ret >= 0 &&
-              (ret = fts_tokenizer_final(user_lang->search_tokenizer, &token, &error)) > 0) {
+              (ret = lang_tokenizer_final(user_lang->search_tokenizer, &token, &error)) > 0) {
                if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool,
                                                      and_arg, orig_arg, orig_token,
                                                      token, error_r) < 0)
index 09ada01ff7aff362cffcfb09efcfde87839cf693..a160efed0e07092df524b9c23cff15c0e84d01f2 100644 (file)
@@ -20,7 +20,7 @@ struct fts_user {
        union mail_user_module_context module_ctx;
        int refcount;
 
-       struct fts_language_list *lang_list;
+       struct language_list *lang_list;
        struct fts_user_language *data_lang;
        ARRAY_TYPE(fts_user_language) languages, data_languages;
 
@@ -74,15 +74,15 @@ fts_user_init_languages(struct mail_user *user, struct fts_user *fuser,
        lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config");
        if (lang_config[1] != NULL)
                lang_config[0] = "fts_language_config";
-       if (fts_language_list_init(lang_config, &fuser->lang_list, error_r) < 0)
+       if (language_list_init(lang_config, &fuser->lang_list, error_r) < 0)
                return -1;
 
-       if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) {
+       if (!language_list_add_names(fuser->lang_list, languages, &unknown)) {
                *error_r = t_strdup_printf(
                        "fts_languages: Unknown language '%s'", unknown);
                return -1;
        }
-       if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) {
+       if (array_count(language_list_get_all(fuser->lang_list)) == 0) {
                *error_r = "fts_languages setting is empty";
                return -1;
        }
@@ -90,11 +90,11 @@ fts_user_init_languages(struct mail_user *user, struct fts_user *fuser,
 }
 
 static int
-fts_user_create_filters(struct mail_user *user, const struct fts_language *lang,
-                       struct fts_filter **filter_r, const char **error_r)
+fts_user_create_filters(struct mail_user *user, const struct language *lang,
+                       struct lang_filter **filter_r, const char **error_r)
 {
-       const struct fts_filter *filter_class;
-       struct fts_filter *filter = NULL, *parent = NULL;
+       const struct lang_filter *filter_class;
+       struct lang_filter *filter = NULL, *parent = NULL;
        const char *filters_key, *const *filters, *filter_set_name;
        const char *str, *error, *set_key;
        unsigned int i;
@@ -116,7 +116,7 @@ fts_user_create_filters(struct mail_user *user, const struct fts_language *lang,
 
        filters = t_strsplit_spaces(str, " ");
        for (i = 0; filters[i] != NULL; i++) {
-               filter_class = fts_filter_find(filters[i]);
+               filter_class = lang_filter_find(filters[i]);
                if (filter_class == NULL) {
                        *error_r = t_strdup_printf("%s: Unknown filter '%s'",
                                                   filters_key, filters[i]);
@@ -134,20 +134,20 @@ fts_user_create_filters(struct mail_user *user, const struct fts_language *lang,
                        str = mail_user_plugin_getenv(user, set_key);
                }
 
-               if (fts_filter_create(filter_class, parent, lang,
-                                     str_keyvalues_to_array(str),
-                                     &filter, &error) < 0) {
+               if (lang_filter_create(filter_class, parent, lang,
+                                      str_keyvalues_to_array(str),
+                                      &filter, &error) < 0) {
                        *error_r = t_strdup_printf("%s: %s", set_key, error);
                        ret = -1;
                        break;
                }
                if (parent != NULL)
-                       fts_filter_unref(&parent);
+                       lang_filter_unref(&parent);
                parent = filter;
        }
        if (ret < 0) {
                if (parent != NULL)
-                       fts_filter_unref(&parent);
+                       lang_filter_unref(&parent);
                return -1;
        }
        *filter_r = filter;
@@ -156,12 +156,12 @@ fts_user_create_filters(struct mail_user *user, const struct fts_language *lang,
 
 static int
 fts_user_create_tokenizer(struct mail_user *user,
-                         const struct fts_language *lang,
-                         struct fts_tokenizer **tokenizer_r, bool search,
+                         const struct language *lang,
+                         struct lang_tokenizer **tokenizer_r, bool search,
                          const char **error_r)
 {
-       const struct fts_tokenizer *tokenizer_class;
-       struct fts_tokenizer *tokenizer = NULL, *parent = NULL;
+       const struct lang_tokenizer *tokenizer_class;
+       struct lang_tokenizer *tokenizer = NULL, *parent = NULL;
        const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name;
        const char *str, *error, *set_key;
        unsigned int i;
@@ -181,7 +181,7 @@ fts_user_create_tokenizer(struct mail_user *user,
        tokenizers = t_strsplit_spaces(str, " ");
 
        for (i = 0; tokenizers[i] != NULL; i++) {
-               tokenizer_class = fts_tokenizer_find(tokenizers[i]);
+               tokenizer_class = lang_tokenizer_find(tokenizers[i]);
                if (tokenizer_class == NULL) {
                        *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'",
                                                   tokenizers_key, tokenizers[i]);
@@ -202,20 +202,20 @@ fts_user_create_tokenizer(struct mail_user *user,
                if (search)
                        str = t_strconcat("search=yes ", str, NULL);
 
-               if (fts_tokenizer_create(tokenizer_class, parent,
-                                        str_keyvalues_to_array(str),
-                                        &tokenizer, &error) < 0) {
+               if (lang_tokenizer_create(tokenizer_class, parent,
+                                         str_keyvalues_to_array(str),
+                                         &tokenizer, &error) < 0) {
                        *error_r = t_strdup_printf("%s: %s", set_key, error);
                        ret = -1;
                        break;
                }
                if (parent != NULL)
-                       fts_tokenizer_unref(&parent);
+                       lang_tokenizer_unref(&parent);
                parent = tokenizer;
        }
        if (ret < 0) {
                if (parent != NULL)
-                       fts_tokenizer_unref(&parent);
+                       lang_tokenizer_unref(&parent);
                return -1;
        }
        *tokenizer_r = tokenizer;
@@ -246,7 +246,7 @@ fts_user_language_init_tokenizers(struct mail_user *user,
 
 struct fts_user_language *
 fts_user_language_find(struct mail_user *user,
-                      const struct fts_language *lang)
+                      const struct language *lang)
 {
        struct fts_user_language *user_lang;
        struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user);
@@ -260,7 +260,7 @@ fts_user_language_find(struct mail_user *user,
 
 static int fts_user_language_create(struct mail_user *user,
                                     struct fts_user *fuser,
-                                   const struct fts_language *lang,
+                                   const struct language *lang,
                                    const char **error_r)
 {
        struct fts_user_language *user_lang;
@@ -280,9 +280,9 @@ static int fts_user_languages_fill_all(struct mail_user *user,
                                        struct fts_user *fuser,
                                        const char **error_r)
 {
-       const struct fts_language *lang;
+       const struct language *lang;
 
-       array_foreach_elem(fts_language_list_get_all(fuser->lang_list), lang) {
+       array_foreach_elem(language_list_get_all(fuser->lang_list), lang) {
                if (fts_user_language_create(user, fuser, lang, error_r) < 0)
                        return -1;
        }
@@ -297,13 +297,13 @@ fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser,
        const char *error;
 
        user_lang = p_new(user->pool, struct fts_user_language, 1);
-       user_lang->lang = &fts_language_data;
+       user_lang->lang = &language_data;
 
        if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
                return -1;
 
-       if (fts_filter_create(fts_filter_lowercase, NULL, user_lang->lang, NULL,
-                             &user_lang->filter, &error) < 0)
+       if (lang_filter_create(lang_filter_lowercase, NULL, user_lang->lang, NULL,
+                              &user_lang->filter, &error) < 0)
                i_unreached();
        i_assert(user_lang->filter != NULL);
 
@@ -315,7 +315,7 @@ fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser,
        return 0;
 }
 
-struct fts_language_list *fts_user_get_language_list(struct mail_user *user)
+struct language_list *fts_user_get_language_list(struct mail_user *user)
 {
        struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user);
 
@@ -355,11 +355,11 @@ bool fts_user_autoindex_exclude(struct mailbox *box)
 static void fts_user_language_free(struct fts_user_language *user_lang)
 {
        if (user_lang->filter != NULL)
-               fts_filter_unref(&user_lang->filter);
+               lang_filter_unref(&user_lang->filter);
        if (user_lang->index_tokenizer != NULL)
-               fts_tokenizer_unref(&user_lang->index_tokenizer);
+               lang_tokenizer_unref(&user_lang->index_tokenizer);
        if (user_lang->search_tokenizer != NULL)
-               fts_tokenizer_unref(&user_lang->search_tokenizer);
+               lang_tokenizer_unref(&user_lang->search_tokenizer);
 }
 
 static void fts_user_free(struct fts_user *fuser)
@@ -367,7 +367,7 @@ static void fts_user_free(struct fts_user *fuser)
        struct fts_user_language *user_lang;
 
        if (fuser->lang_list != NULL)
-               fts_language_list_deinit(&fuser->lang_list);
+               language_list_deinit(&fuser->lang_list);
 
        if (array_is_created(&fuser->languages)) {
                array_foreach_elem(&fuser->languages, user_lang)
index c4d3d23029dc81d5ef30545926615041bb2d0ce4..29dee8ab6ebac69d8b4f5b4e28c5959ca130701c 100644 (file)
@@ -2,16 +2,16 @@
 #define FTS_USER_H
 
 struct fts_user_language {
-       const struct fts_language *lang;
-       struct fts_filter *filter;
-       struct fts_tokenizer *index_tokenizer, *search_tokenizer;
+       const struct language *lang;
+       struct lang_filter *filter;
+       struct lang_tokenizer *index_tokenizer, *search_tokenizer;
 };
 ARRAY_DEFINE_TYPE(fts_user_language, struct fts_user_language *);
 
 struct fts_user_language *
 fts_user_language_find(struct mail_user *user,
-                       const struct fts_language *lang);
-struct fts_language_list *fts_user_get_language_list(struct mail_user *user);
+                       const struct language *lang);
+struct language_list *fts_user_get_language_list(struct mail_user *user);
 const ARRAY_TYPE(fts_user_language) *
 fts_user_get_all_languages(struct mail_user *user);
 struct fts_user_language *fts_user_get_data_lang(struct mail_user *user);