From: Markus Valentin Date: Fri, 10 Nov 2023 17:09:38 +0000 (+0100) Subject: lib-language: Rename functions and macros X-Git-Tag: 2.4.1~1340 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=76ce4ff37a2464060c7de8e8d3bea68cdcfb2638;p=thirdparty%2Fdovecot%2Fcore.git lib-language: Rename functions and macros This replaces all fts mentions with lang to complete the lib-fts to lib-lang renaming. --- diff --git a/src/lib-language/lang-common.h b/src/lib-language/lang-common.h index 1a1446390a..7a5d58c680 100644 --- a/src/lib-language/lang-common.h +++ b/src/lib-language/lang-common.h @@ -1,5 +1,5 @@ -#ifndef FTS_COMMON_H -#define FTS_COMMON_H +#ifndef LANG_COMMON_H +#define LANG_COMMON_H /* Some might consider 0x02BB an apostrophe also. */ #define IS_NONASCII_APOSTROPHE(c) \ @@ -8,9 +8,9 @@ ((c) == 0x0027 || IS_NONASCII_APOSTROPHE(c)) #define IS_WB5A_APOSTROPHE(c) \ ((c) == 0x0027 || (c) == 0x2019) -#define FTS_PREFIX_SPLAT_CHAR 0x002A /* '*' */ +#define LANG_PREFIX_SPLAT_CHAR 0x002A /* '*' */ #define IS_PREFIX_SPLAT(c) \ - ((c) == FTS_PREFIX_SPLAT_CHAR) + ((c) == LANG_PREFIX_SPLAT_CHAR) /* The h letters are included because it is an exception in French. A, E, H, I, O, U, Y, a, e, h, i, o, u, y */ #define IS_ASCII_VOWEL(c) \ diff --git a/src/lib-language/lang-filter-common.c b/src/lib-language/lang-filter-common.c index 75f006e0cf..ba14101ae1 100644 --- a/src/lib-language/lang-filter-common.c +++ b/src/lib-language/lang-filter-common.c @@ -8,13 +8,13 @@ #include "lang-filter-common.h" #include "lang-tokenizer-common.h" -void fts_filter_truncate_token(string_t *token, size_t max_length) +void lang_filter_truncate_token(string_t *token, size_t max_length) { if (str_len(token) <= max_length) return; size_t len = max_length; - fts_tokenizer_delete_trailing_partial_char(token->data, &len); + lang_tokenizer_delete_trailing_partial_char(token->data, &len); str_truncate(token, len); i_assert(len <= max_length); } diff --git a/src/lib-language/lang-filter-common.h b/src/lib-language/lang-filter-common.h index 7b6552cf5d..08ca137e18 100644 --- a/src/lib-language/lang-filter-common.h +++ b/src/lib-language/lang-filter-common.h @@ -1,6 +1,6 @@ -#ifndef FTS_FILTER_COMMON_H -#define FTS_FILTER_COMMON_H +#ifndef LANG_FILTER_COMMON_H +#define LANG_FILTER_COMMON_H -void fts_filter_truncate_token(string_t *token, size_t max_length); +void lang_filter_truncate_token(string_t *token, size_t max_length); #endif diff --git a/src/lib-language/lang-filter-contractions.c b/src/lib-language/lang-filter-contractions.c index 7531172998..a225718351 100644 --- a/src/lib-language/lang-filter-contractions.c +++ b/src/lib-language/lang-filter-contractions.c @@ -8,12 +8,12 @@ #include "unichar.h" static int -fts_filter_contractions_create(const struct fts_language *lang, +lang_filter_contractions_create(const struct language *lang, const char *const *settings, - struct fts_filter **filter_r, + struct lang_filter **filter_r, const char **error_r) { - struct fts_filter *filter; + struct lang_filter *filter; if (settings[0] != NULL) { *error_r = t_strdup_printf("Unknown setting: %s", settings[0]); @@ -24,15 +24,15 @@ fts_filter_contractions_create(const struct fts_language *lang, return -1; } - filter = i_new(struct fts_filter, 1); - *filter = *fts_filter_contractions; + filter = i_new(struct lang_filter, 1); + *filter = *lang_filter_contractions; filter->token = str_new(default_pool, 64); *filter_r = filter; return 0; } static int -fts_filter_contractions_filter(struct fts_filter *filter ATTR_UNUSED, +lang_filter_contractions_filter(struct lang_filter *filter ATTR_UNUSED, const char **_token, const char **error_r ATTR_UNUSED) { @@ -74,13 +74,13 @@ fts_filter_contractions_filter(struct fts_filter *filter ATTR_UNUSED, return 1; } -static const struct fts_filter fts_filter_contractions_real = { +static const struct lang_filter lang_filter_contractions_real = { .class_name = "contractions", .v = { - fts_filter_contractions_create, - fts_filter_contractions_filter, + lang_filter_contractions_create, + lang_filter_contractions_filter, NULL } }; -const struct fts_filter *fts_filter_contractions = &fts_filter_contractions_real; +const struct lang_filter *lang_filter_contractions = &lang_filter_contractions_real; diff --git a/src/lib-language/lang-filter-english-possessive.c b/src/lib-language/lang-filter-english-possessive.c index 33c809e729..192ef05c00 100644 --- a/src/lib-language/lang-filter-english-possessive.c +++ b/src/lib-language/lang-filter-english-possessive.c @@ -19,7 +19,7 @@ static unichar_t get_ending_utf8_char(const char *str, size_t *end_pos) } static int -fts_filter_english_possessive_filter(struct fts_filter *filter ATTR_UNUSED, +lang_filter_english_possessive_filter(struct lang_filter *filter ATTR_UNUSED, const char **token, const char **error_r ATTR_UNUSED) { @@ -35,13 +35,13 @@ fts_filter_english_possessive_filter(struct fts_filter *filter ATTR_UNUSED, return 1; } -static const struct fts_filter fts_filter_english_possessive_real = { +static const struct lang_filter lang_filter_english_possessive_real = { .class_name = "english-possessive", .v = { NULL, - fts_filter_english_possessive_filter, + lang_filter_english_possessive_filter, NULL } }; -const struct fts_filter *fts_filter_english_possessive = &fts_filter_english_possessive_real; +const struct lang_filter *lang_filter_english_possessive = &lang_filter_english_possessive_real; diff --git a/src/lib-language/lang-filter-lowercase.c b/src/lib-language/lang-filter-lowercase.c index 96ab82bfeb..15775ff725 100644 --- a/src/lib-language/lang-filter-lowercase.c +++ b/src/lib-language/lang-filter-lowercase.c @@ -11,12 +11,12 @@ #endif static int -fts_filter_lowercase_create(const struct fts_language *lang ATTR_UNUSED, - const char *const *settings, - struct fts_filter **filter_r, - const char **error_r) +lang_filter_lowercase_create(const struct language *lang ATTR_UNUSED, + const char *const *settings, + struct lang_filter **filter_r, + const char **error_r) { - struct fts_filter *filter; + struct lang_filter *filter; unsigned int i, max_length = 250; for (i = 0; settings[i] != NULL; i += 2) { @@ -34,8 +34,8 @@ fts_filter_lowercase_create(const struct fts_language *lang ATTR_UNUSED, return -1; } } - filter = i_new(struct fts_filter, 1); - *filter = *fts_filter_lowercase; + filter = i_new(struct lang_filter, 1); + *filter = *lang_filter_lowercase; filter->token = str_new(default_pool, 64); filter->max_length = max_length; @@ -44,14 +44,14 @@ fts_filter_lowercase_create(const struct fts_language *lang ATTR_UNUSED, } static int -fts_filter_lowercase_filter(struct fts_filter *filter ATTR_UNUSED, - const char **token, - const char **error_r ATTR_UNUSED) +lang_filter_lowercase_filter(struct lang_filter *filter ATTR_UNUSED, + const char **token, + const char **error_r ATTR_UNUSED) { #ifdef HAVE_LIBICU str_truncate(filter->token, 0); - fts_icu_lcase(filter->token, *token); - fts_filter_truncate_token(filter->token, filter->max_length); + lang_icu_lcase(filter->token, *token); + lang_filter_truncate_token(filter->token, filter->max_length); *token = str_c(filter->token); #else *token = t_str_lcase(*token); @@ -59,13 +59,13 @@ fts_filter_lowercase_filter(struct fts_filter *filter ATTR_UNUSED, return 1; } -static const struct fts_filter fts_filter_lowercase_real = { +static const struct lang_filter lang_filter_lowercase_real = { .class_name = "lowercase", .v = { - fts_filter_lowercase_create, - fts_filter_lowercase_filter, + lang_filter_lowercase_create, + lang_filter_lowercase_filter, NULL } }; -const struct fts_filter *fts_filter_lowercase = &fts_filter_lowercase_real; +const struct lang_filter *lang_filter_lowercase = &lang_filter_lowercase_real; diff --git a/src/lib-language/lang-filter-normalizer-icu.c b/src/lib-language/lang-filter-normalizer-icu.c index fb66423d96..d6f8339e8c 100644 --- a/src/lib-language/lang-filter-normalizer-icu.c +++ b/src/lib-language/lang-filter-normalizer-icu.c @@ -11,8 +11,8 @@ #ifdef HAVE_LIBICU #include "lang-icu.h" -struct fts_filter_normalizer_icu { - struct fts_filter filter; +struct lang_filter_normalizer_icu { + struct lang_filter filter; pool_t pool; const char *transliterator_id; @@ -21,10 +21,10 @@ struct fts_filter_normalizer_icu { string_t *utf8_token; }; -static void fts_filter_normalizer_icu_destroy(struct fts_filter *filter) +static void lang_filter_normalizer_icu_destroy(struct lang_filter *filter) { - struct fts_filter_normalizer_icu *np = - (struct fts_filter_normalizer_icu *)filter; + struct lang_filter_normalizer_icu *np = + (struct lang_filter_normalizer_icu *)filter; if (np->transliterator != NULL) utrans_close(np->transliterator); @@ -32,12 +32,12 @@ static void fts_filter_normalizer_icu_destroy(struct fts_filter *filter) } static int -fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED, - const char *const *settings, - struct fts_filter **filter_r, - const char **error_r) +lang_filter_normalizer_icu_create(const struct language *lang ATTR_UNUSED, + const char *const *settings, + struct lang_filter **filter_r, + const char **error_r) { - struct fts_filter_normalizer_icu *np; + struct lang_filter_normalizer_icu *np; pool_t pp; unsigned int i, max_length = 250; const char *id = "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC; [\\x20] Remove"; @@ -59,11 +59,11 @@ fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED, } } - pp = pool_alloconly_create(MEMPOOL_GROWING"fts_filter_normalizer_icu", - sizeof(struct fts_filter_normalizer_icu)); - np = p_new(pp, struct fts_filter_normalizer_icu, 1); + pp = pool_alloconly_create(MEMPOOL_GROWING"lang_filter_normalizer_icu", + sizeof(struct lang_filter_normalizer_icu)); + np = p_new(pp, struct lang_filter_normalizer_icu, 1); np->pool = pp; - np->filter = *fts_filter_normalizer_icu; + np->filter = *lang_filter_normalizer_icu; np->transliterator_id = p_strdup(pp, id); p_array_init(&np->utf16_token, pp, 64); p_array_init(&np->trans_token, pp, 64); @@ -74,33 +74,33 @@ fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED, } static int -fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token, +lang_filter_normalizer_icu_filter(struct lang_filter *filter, const char **token, const char **error_r) { - struct fts_filter_normalizer_icu *np = - (struct fts_filter_normalizer_icu *)filter; + struct lang_filter_normalizer_icu *np = + (struct lang_filter_normalizer_icu *)filter; if (np->transliterator == NULL) - if (fts_icu_transliterator_create(np->transliterator_id, - &np->transliterator, - error_r) < 0) + if (lang_icu_transliterator_create(np->transliterator_id, + &np->transliterator, + error_r) < 0) return -1; - fts_icu_utf8_to_utf16(&np->utf16_token, *token); + lang_icu_utf8_to_utf16(&np->utf16_token, *token); array_append_zero(&np->utf16_token); array_pop_back(&np->utf16_token); array_clear(&np->trans_token); - if (fts_icu_translate(&np->trans_token, array_front(&np->utf16_token), - array_count(&np->utf16_token), - np->transliterator, error_r) < 0) + if (lang_icu_translate(&np->trans_token, array_front(&np->utf16_token), + array_count(&np->utf16_token), + np->transliterator, error_r) < 0) return -1; if (array_count(&np->trans_token) == 0) return 0; - fts_icu_utf16_to_utf8(np->utf8_token, array_front(&np->trans_token), + lang_icu_utf16_to_utf8(np->utf8_token, array_front(&np->trans_token), array_count(&np->trans_token)); - fts_filter_truncate_token(np->utf8_token, np->filter.max_length); + lang_filter_truncate_token(np->utf8_token, np->filter.max_length); *token = str_c(np->utf8_token); return 1; } @@ -108,38 +108,38 @@ fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token, #else static int -fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED, - const char *const *settings ATTR_UNUSED, - struct fts_filter **filter_r ATTR_UNUSED, - const char **error_r) +lang_filter_normalizer_icu_create(const struct language *lang ATTR_UNUSED, + const char *const *settings ATTR_UNUSED, + struct lang_filter **filter_r ATTR_UNUSED, + const char **error_r) { *error_r = "libicu support not built in"; return -1; } static int -fts_filter_normalizer_icu_filter(struct fts_filter *filter ATTR_UNUSED, - const char **token ATTR_UNUSED, - const char **error_r ATTR_UNUSED) +lang_filter_normalizer_icu_filter(struct lang_filter *filter ATTR_UNUSED, + const char **token ATTR_UNUSED, + const char **error_r ATTR_UNUSED) { return -1; } static void -fts_filter_normalizer_icu_destroy(struct fts_filter *normalizer ATTR_UNUSED) +lang_filter_normalizer_icu_destroy(struct lang_filter *normalizer ATTR_UNUSED) { } #endif -static const struct fts_filter fts_filter_normalizer_icu_real = { +static const struct lang_filter lang_filter_normalizer_icu_real = { .class_name = "normalizer-icu", .v = { - fts_filter_normalizer_icu_create, - fts_filter_normalizer_icu_filter, - fts_filter_normalizer_icu_destroy + lang_filter_normalizer_icu_create, + lang_filter_normalizer_icu_filter, + lang_filter_normalizer_icu_destroy } }; -const struct fts_filter *fts_filter_normalizer_icu = - &fts_filter_normalizer_icu_real; +const struct lang_filter *lang_filter_normalizer_icu = + &lang_filter_normalizer_icu_real; diff --git a/src/lib-language/lang-filter-private.h b/src/lib-language/lang-filter-private.h index a43422dc84..d2e755cfda 100644 --- a/src/lib-language/lang-filter-private.h +++ b/src/lib-language/lang-filter-private.h @@ -1,9 +1,9 @@ -#ifndef FTS_FILTER_PRIVATE_H -#define FTS_FILTER_PRIVATE_H +#ifndef LANG_FILTER_PRIVATE_H +#define LANG_FILTER_PRIVATE_H #include "lang-filter.h" -#define FTS_FILTER_CLASSES_NR 6 +#define LANG_FILTER_CLASSES_NR 6 /* API that stemming providers (classes) must provide: The create() @@ -12,21 +12,21 @@ The destroy function is called to destroy an instance of a filter. */ -struct fts_filter_vfuncs { - int (*create)(const struct fts_language *lang, +struct lang_filter_vfuncs { + int (*create)(const struct language *lang, const char *const *settings, - struct fts_filter **filter_r, + struct lang_filter **filter_r, const char **error_r); - int (*filter)(struct fts_filter *filter, const char **token, + int (*filter)(struct lang_filter *filter, const char **token, const char **error_r); - void (*destroy)(struct fts_filter *filter); + void (*destroy)(struct lang_filter *filter); }; -struct fts_filter { +struct lang_filter { const char *class_name; /* name of the class this is based on */ - struct fts_filter_vfuncs v; - struct fts_filter *parent; + struct lang_filter_vfuncs v; + struct lang_filter *parent; string_t *token; size_t max_length; int refcount; diff --git a/src/lib-language/lang-filter-stemmer-snowball.c b/src/lib-language/lang-filter-stemmer-snowball.c index 4c2047cec7..91fb16ef9f 100644 --- a/src/lib-language/lang-filter-stemmer-snowball.c +++ b/src/lib-language/lang-filter-stemmer-snowball.c @@ -8,17 +8,17 @@ #include -struct fts_filter_stemmer_snowball { - struct fts_filter filter; +struct lang_filter_stemmer_snowball { + struct lang_filter filter; pool_t pool; - struct fts_language *lang; + struct language *lang; struct sb_stemmer *stemmer; }; -static void fts_filter_stemmer_snowball_destroy(struct fts_filter *filter) +static void lang_filter_stemmer_snowball_destroy(struct lang_filter *filter) { - struct fts_filter_stemmer_snowball *sp = - (struct fts_filter_stemmer_snowball *)filter; + struct lang_filter_stemmer_snowball *sp = + (struct lang_filter_stemmer_snowball *)filter; if (sp->stemmer != NULL) sb_stemmer_delete(sp->stemmer); @@ -26,12 +26,12 @@ static void fts_filter_stemmer_snowball_destroy(struct fts_filter *filter) } static int -fts_filter_stemmer_snowball_create(const struct fts_language *lang, - const char *const *settings, - struct fts_filter **filter_r, - const char **error_r) +lang_filter_stemmer_snowball_create(const struct language *lang, + const char *const *settings, + struct lang_filter **filter_r, + const char **error_r) { - struct fts_filter_stemmer_snowball *sp; + struct lang_filter_stemmer_snowball *sp; pool_t pp; *filter_r = NULL; @@ -40,42 +40,42 @@ fts_filter_stemmer_snowball_create(const struct fts_language *lang, *error_r = t_strdup_printf("Unknown setting: %s", settings[0]); return -1; } - pp = pool_alloconly_create(MEMPOOL_GROWING"fts_filter_stemmer_snowball", - sizeof(struct fts_filter)); - sp = p_new(pp, struct fts_filter_stemmer_snowball, 1); + pp = pool_alloconly_create(MEMPOOL_GROWING"lang_filter_stemmer_snowball", + sizeof(struct lang_filter)); + sp = p_new(pp, struct lang_filter_stemmer_snowball, 1); sp->pool = pp; - sp->filter = *fts_filter_stemmer_snowball; - sp->lang = p_malloc(sp->pool, sizeof(struct fts_language)); + sp->filter = *lang_filter_stemmer_snowball; + sp->lang = p_malloc(sp->pool, sizeof(struct language)); sp->lang->name = p_strdup(sp->pool, lang->name); *filter_r = &sp->filter; return 0; } static int -fts_filter_stemmer_snowball_create_stemmer(struct fts_filter_stemmer_snowball *sp, - const char **error_r) +lang_filter_stemmer_snowball_create_stemmer(struct lang_filter_stemmer_snowball *sp, + const char **error_r) { sp->stemmer = sb_stemmer_new(sp->lang->name, "UTF_8"); if (sp->stemmer == NULL) { *error_r = t_strdup_printf( "Creating a Snowball stemmer for language '%s' failed.", sp->lang->name); - fts_filter_stemmer_snowball_destroy(&sp->filter); + lang_filter_stemmer_snowball_destroy(&sp->filter); return -1; } return 0; } static int -fts_filter_stemmer_snowball_filter(struct fts_filter *filter, - const char **token, const char **error_r) +lang_filter_stemmer_snowball_filter(struct lang_filter *filter, + const char **token, const char **error_r) { - struct fts_filter_stemmer_snowball *sp = - (struct fts_filter_stemmer_snowball *) filter; + struct lang_filter_stemmer_snowball *sp = + (struct lang_filter_stemmer_snowball *) filter; const sb_symbol *base; if (sp->stemmer == NULL) { - if (fts_filter_stemmer_snowball_create_stemmer(sp, error_r) < 0) + if (lang_filter_stemmer_snowball_create_stemmer(sp, error_r) < 0) return -1; } @@ -93,7 +93,7 @@ fts_filter_stemmer_snowball_filter(struct fts_filter *filter, else { /* If the stemmer returns an empty token, the return value * should be 0 instead of 1 (otherwise it causes an assertion - * fault in fts_filter_filter() ). + * fault in lang_filter() ). * However, removing tokens may bring the same kind of issues * and inconsistencies that stopwords cause when used with * multiple languages and negations. @@ -106,36 +106,36 @@ fts_filter_stemmer_snowball_filter(struct fts_filter *filter, #else static int -fts_filter_stemmer_snowball_create(const struct fts_language *lang ATTR_UNUSED, - const char *const *settings ATTR_UNUSED, - struct fts_filter **filter_r ATTR_UNUSED, - const char **error_r) +lang_filter_stemmer_snowball_create(const struct language *lang ATTR_UNUSED, + const char *const *settings ATTR_UNUSED, + struct lang_filter **filter_r ATTR_UNUSED, + const char **error_r) { *error_r = "Snowball support not built in"; return -1; } static void -fts_filter_stemmer_snowball_destroy(struct fts_filter *stemmer ATTR_UNUSED) +lang_filter_stemmer_snowball_destroy(struct lang_filter *stemmer ATTR_UNUSED) { } static int -fts_filter_stemmer_snowball_filter(struct fts_filter *filter ATTR_UNUSED, - const char **token ATTR_UNUSED, - const char **error_r ATTR_UNUSED) +lang_filter_stemmer_snowball_filter(struct lang_filter *filter ATTR_UNUSED, + const char **token ATTR_UNUSED, + const char **error_r ATTR_UNUSED) { return -1; } #endif -static const struct fts_filter fts_filter_stemmer_snowball_real = { +static const struct lang_filter lang_filter_stemmer_snowball_real = { .class_name = "snowball", .v = { - fts_filter_stemmer_snowball_create, - fts_filter_stemmer_snowball_filter, - fts_filter_stemmer_snowball_destroy + lang_filter_stemmer_snowball_create, + lang_filter_stemmer_snowball_filter, + lang_filter_stemmer_snowball_destroy } }; -const struct fts_filter *fts_filter_stemmer_snowball = &fts_filter_stemmer_snowball_real; +const struct lang_filter *lang_filter_stemmer_snowball = &lang_filter_stemmer_snowball_real; diff --git a/src/lib-language/lang-filter-stopwords.c b/src/lib-language/lang-filter-stopwords.c index 32067f38b2..8f7ce00631 100644 --- a/src/lib-language/lang-filter-stopwords.c +++ b/src/lib-language/lang-filter-stopwords.c @@ -14,16 +14,16 @@ #define STOPWORDS_CUTCHARS "|#\t " #define STOPWORDS_DISALLOWED_CHARS "/\\<>.,\":()\t\n\r" -struct fts_filter_stopwords { - struct fts_filter filter; - struct fts_language *lang; +struct lang_filter_stopwords { + struct lang_filter filter; + struct language *lang; pool_t pool; HASH_TABLE(const char *, const char *) stopwords; const char *stopwords_dir; }; -static int fts_filter_stopwords_read_list(struct fts_filter_stopwords *filter, - const char **error_r) +static int lang_filter_stopwords_read_list(struct lang_filter_stopwords *filter, + const char **error_r) { struct istream *input; const char *line, *word, *path; @@ -61,21 +61,21 @@ static int fts_filter_stopwords_read_list(struct fts_filter_stopwords *filter, return ret; } -static void fts_filter_stopwords_destroy(struct fts_filter *filter) +static void lang_filter_stopwords_destroy(struct lang_filter *filter) { - struct fts_filter_stopwords *sp = (struct fts_filter_stopwords *)filter; + struct lang_filter_stopwords *sp = (struct lang_filter_stopwords *)filter; hash_table_destroy(&sp->stopwords); pool_unref(&sp->pool); } static int -fts_filter_stopwords_create(const struct fts_language *lang, - const char *const *settings, - struct fts_filter **filter_r, - const char **error_r) +lang_filter_stopwords_create(const struct language *lang, + const char *const *settings, + struct lang_filter **filter_r, + const char **error_r) { - struct fts_filter_stopwords *sp; + struct lang_filter_stopwords *sp; pool_t pp; const char *dir = NULL; unsigned int i; @@ -90,12 +90,12 @@ fts_filter_stopwords_create(const struct fts_language *lang, return -1; } } - pp = pool_alloconly_create(MEMPOOL_GROWING"fts_filter_stopwords", - sizeof(struct fts_filter)); - sp = p_new(pp, struct fts_filter_stopwords, 1); - sp->filter = *fts_filter_stopwords; + pp = pool_alloconly_create(MEMPOOL_GROWING"lang_filter_stopwords", + sizeof(struct lang_filter)); + sp = p_new(pp, struct lang_filter_stopwords, 1); + sp->filter = *lang_filter_stopwords; sp->pool = pp; - sp->lang = p_malloc(sp->pool, sizeof(struct fts_language)); + sp->lang = p_malloc(sp->pool, sizeof(struct language)); sp->lang->name = p_strdup(sp->pool, lang->name); if (dir != NULL) sp->stopwords_dir = p_strdup(pp, dir); @@ -106,26 +106,26 @@ fts_filter_stopwords_create(const struct fts_language *lang, } static int -fts_filter_stopwords_filter(struct fts_filter *filter, const char **token, - const char **error_r) +lang_filter_stopwords_filter(struct lang_filter *filter, const char **token, + const char **error_r) { - struct fts_filter_stopwords *sp = - (struct fts_filter_stopwords *) filter; + struct lang_filter_stopwords *sp = + (struct lang_filter_stopwords *) filter; if (!hash_table_is_created(sp->stopwords)) { hash_table_create(&sp->stopwords, sp->pool, 0, str_hash, strcmp); - if (fts_filter_stopwords_read_list(sp, error_r) < 0) + if (lang_filter_stopwords_read_list(sp, error_r) < 0) return -1; } return hash_table_lookup(sp->stopwords, *token) == NULL ? 1 : 0; } -const struct fts_filter fts_filter_stopwords_real = { +const struct lang_filter lang_filter_stopwords_real = { .class_name = "stopwords", .v = { - fts_filter_stopwords_create, - fts_filter_stopwords_filter, - fts_filter_stopwords_destroy + lang_filter_stopwords_create, + lang_filter_stopwords_filter, + lang_filter_stopwords_destroy } }; -const struct fts_filter *fts_filter_stopwords = &fts_filter_stopwords_real; +const struct lang_filter *lang_filter_stopwords = &lang_filter_stopwords_real; diff --git a/src/lib-language/lang-filter.c b/src/lib-language/lang-filter.c index 1e1a972c3b..b4021ba627 100644 --- a/src/lib-language/lang-filter.c +++ b/src/lib-language/lang-filter.c @@ -10,54 +10,54 @@ # include "lang-icu.h" #endif -static ARRAY(const struct fts_filter *) fts_filter_classes; +static ARRAY(const struct lang_filter *) lang_filter_classes; -void fts_filters_init(void) +void lang_filters_init(void) { - i_array_init(&fts_filter_classes, FTS_FILTER_CLASSES_NR); - - fts_filter_register(fts_filter_stopwords); - fts_filter_register(fts_filter_stemmer_snowball); - fts_filter_register(fts_filter_normalizer_icu); - fts_filter_register(fts_filter_lowercase); - fts_filter_register(fts_filter_english_possessive); - fts_filter_register(fts_filter_contractions); + i_array_init(&lang_filter_classes, LANG_FILTER_CLASSES_NR); + + lang_filter_register(lang_filter_stopwords); + lang_filter_register(lang_filter_stemmer_snowball); + lang_filter_register(lang_filter_normalizer_icu); + lang_filter_register(lang_filter_lowercase); + lang_filter_register(lang_filter_english_possessive); + lang_filter_register(lang_filter_contractions); } -void fts_filters_deinit(void) +void lang_filters_deinit(void) { #ifdef HAVE_LIBICU - fts_icu_deinit(); + lang_icu_deinit(); #endif - array_free(&fts_filter_classes); + array_free(&lang_filter_classes); } -void fts_filter_register(const struct fts_filter *filter_class) +void lang_filter_register(const struct lang_filter *filter_class) { - i_assert(fts_filter_find(filter_class->class_name) == NULL); + i_assert(lang_filter_find(filter_class->class_name) == NULL); - array_push_back(&fts_filter_classes, &filter_class); + array_push_back(&lang_filter_classes, &filter_class); } -const struct fts_filter *fts_filter_find(const char *name) +const struct lang_filter *lang_filter_find(const char *name) { - const struct fts_filter *filter; + const struct lang_filter *filter; - array_foreach_elem(&fts_filter_classes, filter) { + array_foreach_elem(&lang_filter_classes, filter) { if (strcmp(filter->class_name, name) == 0) return filter; } return NULL; } -int fts_filter_create(const struct fts_filter *filter_class, - struct fts_filter *parent, - const struct fts_language *lang, - const char *const *settings, - struct fts_filter **filter_r, - const char **error_r) +int lang_filter_create(const struct lang_filter *filter_class, + struct lang_filter *parent, + const struct language *lang, + const char *const *settings, + struct lang_filter **filter_r, + const char **error_r) { - struct fts_filter *fp; + struct lang_filter *fp; const char *empty_settings = NULL; i_assert(settings == NULL || str_array_length(settings) % 2 == 0); @@ -76,27 +76,27 @@ int fts_filter_create(const struct fts_filter *filter_class, *error_r = t_strdup_printf("Unknown setting: %s", settings[0]); return -1; } - fp = i_new(struct fts_filter, 1); + fp = i_new(struct lang_filter, 1); *fp = *filter_class; } fp->refcount = 1; fp->parent = parent; if (parent != NULL) { - fts_filter_ref(parent); + lang_filter_ref(parent); } *filter_r = fp; return 0; } -void fts_filter_ref(struct fts_filter *fp) +void lang_filter_ref(struct lang_filter *fp) { i_assert(fp->refcount > 0); fp->refcount++; } -void fts_filter_unref(struct fts_filter **_fpp) +void lang_filter_unref(struct lang_filter **_fpp) { - struct fts_filter *fp = *_fpp; + struct lang_filter *fp = *_fpp; i_assert(fp->refcount > 0); *_fpp = NULL; @@ -105,7 +105,7 @@ void fts_filter_unref(struct fts_filter **_fpp) return; if (fp->parent != NULL) - fts_filter_unref(&fp->parent); + lang_filter_unref(&fp->parent); if (fp->v.destroy != NULL) fp->v.destroy(fp); else { @@ -115,8 +115,8 @@ void fts_filter_unref(struct fts_filter **_fpp) } } -int fts_filter_filter(struct fts_filter *filter, const char **token, - const char **error_r) +int lang_filter(struct lang_filter *filter, const char **token, + const char **error_r) { int ret = 0; @@ -124,7 +124,7 @@ int fts_filter_filter(struct fts_filter *filter, const char **token, /* Recurse to parent. */ if (filter->parent != NULL) - ret = fts_filter_filter(filter->parent, token, error_r); + ret = lang_filter(filter->parent, token, error_r); /* Parent returned token or no parent. */ if (ret > 0 || filter->parent == NULL) diff --git a/src/lib-language/lang-filter.h b/src/lib-language/lang-filter.h index 89060b7971..6c2532a24e 100644 --- a/src/lib-language/lang-filter.h +++ b/src/lib-language/lang-filter.h @@ -1,8 +1,8 @@ -#ifndef FTS_FILTER_H -#define FTS_FILTER_H +#ifndef LANG_FILTER_H +#define LANG_FILTER_H -struct fts_language; -struct fts_filter; +struct language; +struct lang_filter; /* Settings are given in the form of a const char * const *settings = {"key, "value", "key2", "value2", NULL} array of string pairs. @@ -13,12 +13,12 @@ struct fts_filter; Stopword files are looked up in ""/stopwords_.txt */ -extern const struct fts_filter *fts_filter_stopwords; +extern const struct lang_filter *lang_filter_stopwords; /* Settings: "lang", language of the stemmed language. */ -extern const struct fts_filter *fts_filter_stemmer_snowball; +extern const struct lang_filter *lang_filter_stemmer_snowball; /* Settings: "id", description of the normalizing/translitterating rules @@ -29,43 +29,43 @@ extern const struct fts_filter *fts_filter_stemmer_snowball; "maxlen", maximum length of tokens that ICU normalizer will output. Defaults to 250. */ -extern const struct fts_filter *fts_filter_normalizer_icu; +extern const struct lang_filter *lang_filter_normalizer_icu; /* Lowercases the input. Supports UTF8, if libicu is available. */ -extern const struct fts_filter *fts_filter_lowercase; +extern const struct lang_filter *lang_filter_lowercase; /* Removes <'s> suffix from words. */ -extern const struct fts_filter *fts_filter_english_possessive; +extern const struct lang_filter *lang_filter_english_possessive; /* Removes prefixing contractions from words. */ -extern const struct fts_filter *fts_filter_contractions; +extern const struct lang_filter *lang_filter_contractions; /* Register all built-in filters. */ -void fts_filters_init(void); -void fts_filters_deinit(void); +void lang_filters_init(void); +void lang_filters_deinit(void); /* Register a new class explicitly. Built-in classes are automatically registered. */ -void fts_filter_register(const struct fts_filter *filter_class); +void lang_filter_register(const struct lang_filter *filter_class); /* Filtering workflow, find --> create --> filter --> destroy. */ -const struct fts_filter *fts_filter_find(const char *name); -int fts_filter_create(const struct fts_filter *filter_class, - struct fts_filter *parent, - const struct fts_language *lang, - const char *const *settings, - struct fts_filter **filter_r, - const char **error_r); -void fts_filter_ref(struct fts_filter *filter); -void fts_filter_unref(struct fts_filter **filter); +const struct lang_filter *lang_filter_find(const char *name); +int lang_filter_create(const struct lang_filter *filter_class, + struct lang_filter *parent, + const struct language *lang, + const char *const *settings, + struct lang_filter **filter_r, + const char **error_r); +void lang_filter_ref(struct lang_filter *filter); +void lang_filter_unref(struct lang_filter **filter); /* Returns 1 if token is returned in *token, 0 if token was filtered out (*token is also set to NULL) and -1 on error. Input is also given via *token. */ -int fts_filter_filter(struct fts_filter *filter, const char **token, - const char **error_r); +int lang_filter(struct lang_filter *filter, const char **token, + const char **error_r); #endif diff --git a/src/lib-language/lang-icu.c b/src/lib-language/lang-icu.c index 3ba86fce5e..6bfd6cf870 100644 --- a/src/lib-language/lang-icu.c +++ b/src/lib-language/lang-icu.c @@ -13,7 +13,7 @@ static struct UCaseMap *icu_csm = NULL; -static struct UCaseMap *fts_icu_csm(void) +static struct UCaseMap *lang_icu_csm(void) { UErrorCode err = U_ZERO_ERROR; @@ -27,8 +27,8 @@ static struct UCaseMap *fts_icu_csm(void) return icu_csm; } -void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16, - const char *src_utf8) +void lang_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16, + const char *src_utf8) { buffer_t *dest_buf = dest_utf16->arr.buffer; UErrorCode err = U_ZERO_ERROR; @@ -60,8 +60,8 @@ void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16, i_assert(retp == dest_data); } -void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16, - unsigned int src_len) +void lang_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16, + unsigned int src_len) { int32_t dest_len = 0; int32_t sub_num = 0; @@ -91,9 +91,9 @@ void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16, i_assert(retp == dest_data); } -int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16, - unsigned int src_len, UTransliterator *transliterator, - const char **error_r) +int lang_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16, + unsigned int src_len, UTransliterator *transliterator, + const char **error_r) { buffer_t *dest_buf = dest_utf16->arr.buffer; UErrorCode err = U_ZERO_ERROR; @@ -134,9 +134,9 @@ int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16, return 0; } -void fts_icu_lcase(string_t *dest_utf8, const char *src_utf8) +void lang_icu_lcase(string_t *dest_utf8, const char *src_utf8) { - struct UCaseMap *csm = fts_icu_csm(); + struct UCaseMap *csm = lang_icu_csm(); size_t avail_bytes, dest_pos = dest_utf8->used; char *dest_data; int dest_full_len; @@ -164,7 +164,7 @@ void fts_icu_lcase(string_t *dest_utf8, const char *src_utf8) buffer_set_used_size(dest_utf8, dest_full_len); } -void fts_icu_deinit(void) +void lang_icu_deinit(void) { if (icu_csm != NULL) { ucasemap_close(icu_csm); @@ -173,9 +173,9 @@ void fts_icu_deinit(void) u_cleanup(); } -int fts_icu_transliterator_create(const char *id, - UTransliterator **transliterator_r, - const char **error_r) +int lang_icu_transliterator_create(const char *id, + UTransliterator **transliterator_r, + const char **error_r) { UErrorCode err = U_ZERO_ERROR; UParseError perr; @@ -183,7 +183,7 @@ int fts_icu_transliterator_create(const char *id, i_zero(&perr); t_array_init(&id_utf16, strlen(id)); - fts_icu_utf8_to_utf16(&id_utf16, id); + lang_icu_utf8_to_utf16(&id_utf16, id); *transliterator_r = utrans_openU(array_front(&id_utf16), array_count(&id_utf16), UTRANS_FORWARD, NULL, 0, &perr, &err); diff --git a/src/lib-language/lang-icu.h b/src/lib-language/lang-icu.h index 5b0f3dcce6..2168477af2 100644 --- a/src/lib-language/lang-icu.h +++ b/src/lib-language/lang-icu.h @@ -1,5 +1,5 @@ -#ifndef HAVE_FTS_ICU_H -#define HAVE_FTS_ICU_H +#ifndef HAVE_LANG_ICU_H +#define HAVE_LANG_ICU_H #include #include @@ -7,22 +7,22 @@ ARRAY_DEFINE_TYPE(icu_utf16, UChar); /* Convert UTF-8 input to UTF-16 output. */ -void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16, - const char *src_utf8); +void lang_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16, + const char *src_utf8); /* Convert UTF-16 input to UTF-8 output. */ -void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16, - unsigned int src_len); +void lang_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16, + unsigned int src_len); /* Run ICU translation for the string. Returns 0 on success, -1 on error. */ -int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16, - unsigned int src_len, UTransliterator *transliterator, - const char **error_r); +int lang_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16, + unsigned int src_len, UTransliterator *transliterator, + const char **error_r); /* Lowercase the given UTF-8 string. */ -void fts_icu_lcase(string_t *dest_utf8, const char *src_utf8); +void lang_icu_lcase(string_t *dest_utf8, const char *src_utf8); /* Free all the memory used by ICU functions. */ -void fts_icu_deinit(void); +void lang_icu_deinit(void); -int fts_icu_transliterator_create(const char *id, - UTransliterator **transliterator_r, - const char **error_r) ; +int lang_icu_transliterator_create(const char *id, + UTransliterator **transliterator_r, + const char **error_r) ; #endif diff --git a/src/lib-language/lang-indexer-status.h b/src/lib-language/lang-indexer-status.h index 5c4d6c7ad7..c56d1a9578 100644 --- a/src/lib-language/lang-indexer-status.h +++ b/src/lib-language/lang-indexer-status.h @@ -1,5 +1,5 @@ -#ifndef FTS_INDEXER_STATUS_H -#define FTS_INDEXER_STATUS_H +#ifndef LANG_INDEXER_STATUS_H +#define LANG_INDEXER_STATUS_H enum indexer_state { INDEXER_STATE_PROCESSING = 0, diff --git a/src/lib-language/lang-library.c b/src/lib-language/lang-library.c index 7dc5c2c603..7a8b6c7b3d 100644 --- a/src/lib-language/lang-library.c +++ b/src/lib-language/lang-library.c @@ -6,16 +6,16 @@ #include "lang-filter.h" #include "lang-library.h" -void fts_library_init(void) +void lang_library_init(void) { - fts_languages_init(); - fts_tokenizers_init(); - fts_filters_init(); + languages_init(); + lang_tokenizers_init(); + lang_filters_init(); } -void fts_library_deinit(void) +void lang_library_deinit(void) { - fts_languages_deinit(); - fts_tokenizers_deinit(); - fts_filters_deinit(); + languages_deinit(); + lang_tokenizers_deinit(); + lang_filters_deinit(); } diff --git a/src/lib-language/lang-library.h b/src/lib-language/lang-library.h index 8799b10f28..55290cf165 100644 --- a/src/lib-language/lang-library.h +++ b/src/lib-language/lang-library.h @@ -1,7 +1,7 @@ -#ifndef FTS_LIBRARY_H -#define FTS_LIBRARY_H +#ifndef LANG_LIBRARY_H +#define LANG_LIBRARY_H -void fts_library_init(void); -void fts_library_deinit(void); +void lang_library_init(void); +void lang_library_deinit(void); #endif diff --git a/src/lib-language/lang-tokenizer-address.c b/src/lib-language/lang-tokenizer-address.c index 7ceb1d00a9..57b0cc2494 100644 --- a/src/lib-language/lang-tokenizer-address.c +++ b/src/lib-language/lang-tokenizer-address.c @@ -10,7 +10,7 @@ #define IS_DTEXT(c) \ (rfc822_atext_chars[(int)(unsigned char)(c)] == 2) -#define FTS_DEFAULT_ADDRESS_MAX_LENGTH 254 +#define LANG_DEFAULT_ADDRESS_MAX_LENGTH 254 enum email_address_parser_state { EMAIL_ADDRESS_PARSER_STATE_NONE = 0, @@ -20,8 +20,8 @@ enum email_address_parser_state { EMAIL_ADDRESS_PARSER_STATE_SKIP, }; -struct email_address_fts_tokenizer { - struct fts_tokenizer tokenizer; +struct email_address_lang_tokenizer { + struct lang_tokenizer tokenizer; enum email_address_parser_state state; string_t *last_word; string_t *parent_data; /* Copy of input data between tokens. */ @@ -30,13 +30,13 @@ struct email_address_fts_tokenizer { }; static int -fts_tokenizer_email_address_create(const char *const *settings, - struct fts_tokenizer **tokenizer_r, - const char **error_r) +lang_tokenizer_email_address_create(const char *const *settings, + struct lang_tokenizer **tokenizer_r, + const char **error_r) { - struct email_address_fts_tokenizer *tok; + struct email_address_lang_tokenizer *tok; bool search = FALSE; - unsigned int max_length = FTS_DEFAULT_ADDRESS_MAX_LENGTH; + unsigned int max_length = LANG_DEFAULT_ADDRESS_MAX_LENGTH; unsigned int i; for (i = 0; settings[i] != NULL; i += 2) { @@ -56,8 +56,8 @@ fts_tokenizer_email_address_create(const char *const *settings, } } - tok = i_new(struct email_address_fts_tokenizer, 1); - tok->tokenizer = *fts_tokenizer_email_address; + tok = i_new(struct email_address_lang_tokenizer, 1); + tok->tokenizer = *lang_tokenizer_email_address; tok->last_word = str_new(default_pool, 128); tok->parent_data = str_new(default_pool, 128); tok->max_length = max_length; @@ -66,10 +66,10 @@ fts_tokenizer_email_address_create(const char *const *settings, return 0; } -static void fts_tokenizer_email_address_destroy(struct fts_tokenizer *_tok) +static void lang_tokenizer_email_address_destroy(struct lang_tokenizer *_tok) { - struct email_address_fts_tokenizer *tok = - (struct email_address_fts_tokenizer *)_tok; + struct email_address_lang_tokenizer *tok = + (struct email_address_lang_tokenizer *)_tok; str_free(&tok->last_word); str_free(&tok->parent_data); @@ -77,8 +77,8 @@ static void fts_tokenizer_email_address_destroy(struct fts_tokenizer *_tok) } static bool -fts_tokenizer_address_current_token(struct email_address_fts_tokenizer *tok, - const char **token_r) +lang_tokenizer_address_current_token(struct email_address_lang_tokenizer *tok, + const char **token_r) { const unsigned char *data = tok->last_word->data; size_t len = tok->last_word->used; @@ -91,20 +91,20 @@ fts_tokenizer_address_current_token(struct email_address_fts_tokenizer *tok, IS_DTEXT() does not actually allow utf8 addresses yet though. */ len = tok->last_word->used; - fts_tokenizer_delete_trailing_partial_char(data, &len); + lang_tokenizer_delete_trailing_partial_char(data, &len); i_assert(len <= tok->max_length); } if (len > 0) - fts_tokenizer_delete_trailing_invalid_char(data, &len); + lang_tokenizer_delete_trailing_invalid_char(data, &len); *token_r = len == 0 ? "" : t_strndup(data, len); return len > 0; } static bool -fts_tokenizer_address_parent_data(struct email_address_fts_tokenizer *tok, - const char **token_r) +lang_tokenizer_address_parent_data(struct email_address_lang_tokenizer *tok, + const char **token_r) { if (tok->tokenizer.parent == NULL || str_len(tok->parent_data) == 0) return FALSE; @@ -141,8 +141,8 @@ static size_t skip_nonlocal_part(const unsigned char *data, size_t size) } static bool -fts_tokenizer_email_address_too_large(struct email_address_fts_tokenizer *tok, - size_t pos) +lang_tokenizer_email_address_too_large(struct email_address_lang_tokenizer *tok, + size_t pos) { if (str_len(tok->last_word) + pos <= tok->max_length) return FALSE; @@ -151,15 +151,15 @@ fts_tokenizer_email_address_too_large(struct email_address_fts_tokenizer *tok, Truncate the input that was added so far to the token, so all of it gets sent to the parent tokenizer in - fts_tokenizer_address_parent_data(). */ + lang_tokenizer_address_parent_data(). */ str_truncate(tok->last_word, 0); return TRUE; } static enum email_address_parser_state -fts_tokenizer_email_address_parse_local(struct email_address_fts_tokenizer *tok, - const unsigned char *data, size_t size, - size_t *skip_r) +lang_tokenizer_email_address_parse_local(struct email_address_lang_tokenizer *tok, + const unsigned char *data, size_t size, + size_t *skip_r) { size_t pos = 0; bool seen_at = FALSE; @@ -175,7 +175,7 @@ fts_tokenizer_email_address_parse_local(struct email_address_fts_tokenizer *tok, break; } - if (fts_tokenizer_email_address_too_large(tok, pos)) { + if (lang_tokenizer_email_address_too_large(tok, pos)) { *skip_r = 0; return EMAIL_ADDRESS_PARSER_STATE_SKIP; } @@ -199,7 +199,7 @@ fts_tokenizer_email_address_parse_local(struct email_address_fts_tokenizer *tok, return EMAIL_ADDRESS_PARSER_STATE_NONE; } -static bool domain_is_empty(struct email_address_fts_tokenizer *tok) +static bool domain_is_empty(struct email_address_lang_tokenizer *tok) { const char *p, *str = str_c(tok->last_word); @@ -209,16 +209,16 @@ static bool domain_is_empty(struct email_address_fts_tokenizer *tok) } static enum email_address_parser_state -fts_tokenizer_email_address_parse_domain(struct email_address_fts_tokenizer *tok, - const unsigned char *data, size_t size, - size_t *skip_r) +lang_tokenizer_email_address_parse_domain(struct email_address_lang_tokenizer *tok, + const unsigned char *data, size_t size, + size_t *skip_r) { size_t pos = 0; while (pos < size && (IS_DTEXT(data[pos]) || data[pos] == '.' || data[pos] == '-')) pos++; - if (fts_tokenizer_email_address_too_large(tok, pos)) { + if (lang_tokenizer_email_address_too_large(tok, pos)) { *skip_r = 0; return EMAIL_ADDRESS_PARSER_STATE_SKIP; } @@ -242,8 +242,8 @@ fts_tokenizer_email_address_parse_domain(struct email_address_fts_tokenizer *tok } static bool -fts_tokenizer_address_skip(const unsigned char *data, size_t size, - size_t *skip_r) +lang_tokenizer_address_skip(const unsigned char *data, size_t size, + size_t *skip_r) { for (size_t pos = 0; pos < size; pos++) { if (!(IS_ATEXT(data[pos]) || data[pos] == '.' || @@ -258,17 +258,17 @@ fts_tokenizer_address_skip(const unsigned char *data, size_t size, /* Buffer raw data for parent. */ static void -fts_tokenizer_address_update_parent(struct email_address_fts_tokenizer *tok, - const unsigned char *data, size_t size) +lang_tokenizer_address_update_parent(struct email_address_lang_tokenizer *tok, + const unsigned char *data, size_t size) { if (tok->tokenizer.parent != NULL) str_append_data(tok->parent_data, data, size); } -static void fts_tokenizer_email_address_reset(struct fts_tokenizer *_tok) +static void lang_tokenizer_email_address_reset(struct lang_tokenizer *_tok) { - struct email_address_fts_tokenizer *tok = - (struct email_address_fts_tokenizer *)_tok; + struct email_address_lang_tokenizer *tok = + (struct email_address_lang_tokenizer *)_tok; tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE; str_truncate(tok->last_word, 0); @@ -276,13 +276,13 @@ static void fts_tokenizer_email_address_reset(struct fts_tokenizer *_tok) } static int -fts_tokenizer_email_address_next(struct fts_tokenizer *_tok, - const unsigned char *data, size_t size, - size_t *skip_r, const char **token_r, - const char **error_r ATTR_UNUSED) +lang_tokenizer_email_address_next(struct lang_tokenizer *_tok, + const unsigned char *data, size_t size, + size_t *skip_r, const char **token_r, + const char **error_r ATTR_UNUSED) { - struct email_address_fts_tokenizer *tok = - (struct email_address_fts_tokenizer *)_tok; + struct email_address_lang_tokenizer *tok = + (struct email_address_lang_tokenizer *)_tok; size_t pos = 0, local_skip; bool finished; @@ -291,7 +291,7 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok, if (tok->state == EMAIL_ADDRESS_PARSER_STATE_COMPLETE) { *skip_r = pos; - if (fts_tokenizer_address_current_token(tok, token_r)) + if (lang_tokenizer_address_current_token(tok, token_r)) return 1; } @@ -305,11 +305,11 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok, tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE; } - if (fts_tokenizer_address_parent_data(tok, token_r)) + if (lang_tokenizer_address_parent_data(tok, token_r)) return 1; if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN) { - if (fts_tokenizer_address_current_token(tok, token_r)) + if (lang_tokenizer_address_current_token(tok, token_r)) return 1; } tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE; @@ -324,7 +324,7 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok, /* no part of address found yet. remove possible earlier data */ str_truncate(tok->last_word, 0); - if (fts_tokenizer_address_parent_data(tok, token_r)) { + if (lang_tokenizer_address_parent_data(tok, token_r)) { *skip_r = pos; return 1; } @@ -336,11 +336,11 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok, the beginning of data to see if it contains a full local-part@ */ tok->state = - fts_tokenizer_email_address_parse_local(tok, + lang_tokenizer_email_address_parse_local(tok, data + pos, size - pos, &local_skip); - fts_tokenizer_address_update_parent(tok, data+pos, + lang_tokenizer_address_update_parent(tok, data+pos, local_skip); pos += local_skip; @@ -351,20 +351,20 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok, to see if it contains a valid domain. */ tok->state = - fts_tokenizer_email_address_parse_domain(tok, + lang_tokenizer_email_address_parse_domain(tok, data + pos, size - pos, &local_skip); - fts_tokenizer_address_update_parent(tok, data+pos, + lang_tokenizer_address_update_parent(tok, data+pos, local_skip); pos += local_skip; break; case EMAIL_ADDRESS_PARSER_STATE_COMPLETE: *skip_r = pos; - if (fts_tokenizer_address_parent_data(tok, token_r)) + if (lang_tokenizer_address_parent_data(tok, token_r)) return 1; - if (fts_tokenizer_address_current_token(tok, token_r)) + if (lang_tokenizer_address_current_token(tok, token_r)) return 1; break; case EMAIL_ADDRESS_PARSER_STATE_SKIP: @@ -373,18 +373,18 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok, simply skipping over it, but the input is being passed to the parent tokenizer. */ *skip_r = pos; - if (fts_tokenizer_address_parent_data(tok, token_r)) + if (lang_tokenizer_address_parent_data(tok, token_r)) return 1; - finished = fts_tokenizer_address_skip(data + pos, + finished = lang_tokenizer_address_skip(data + pos, size - pos, &local_skip); - fts_tokenizer_address_update_parent(tok, data+pos, + lang_tokenizer_address_update_parent(tok, data+pos, local_skip); pos += local_skip; if (finished) { *skip_r = pos; - if (fts_tokenizer_address_parent_data(tok, token_r)) { + if (lang_tokenizer_address_parent_data(tok, token_r)) { tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE; return 1; } @@ -400,17 +400,17 @@ fts_tokenizer_email_address_next(struct fts_tokenizer *_tok, return 0; } -static const struct fts_tokenizer_vfuncs email_address_tokenizer_vfuncs = { - fts_tokenizer_email_address_create, - fts_tokenizer_email_address_destroy, - fts_tokenizer_email_address_reset, - fts_tokenizer_email_address_next +static const struct lang_tokenizer_vfuncs email_address_tokenizer_vfuncs = { + lang_tokenizer_email_address_create, + lang_tokenizer_email_address_destroy, + lang_tokenizer_email_address_reset, + lang_tokenizer_email_address_next }; -static const struct fts_tokenizer fts_tokenizer_email_address_real = { +static const struct lang_tokenizer lang_tokenizer_email_address_real = { .name = "email-address", .v = &email_address_tokenizer_vfuncs, .stream_to_parents = TRUE, }; -const struct fts_tokenizer *fts_tokenizer_email_address = - &fts_tokenizer_email_address_real; +const struct lang_tokenizer *lang_tokenizer_email_address = + &lang_tokenizer_email_address_real; diff --git a/src/lib-language/lang-tokenizer-common.c b/src/lib-language/lang-tokenizer-common.c index 9007b588b7..4c9863354b 100644 --- a/src/lib-language/lang-tokenizer-common.c +++ b/src/lib-language/lang-tokenizer-common.c @@ -4,8 +4,8 @@ #include "unichar.h" #include "lang-tokenizer-common.h" void -fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, - size_t *len) +lang_tokenizer_delete_trailing_partial_char(const unsigned char *data, + size_t *len) { size_t pos; unsigned int char_bytes; @@ -22,8 +22,8 @@ fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, *len = pos; } } -void fts_tokenizer_delete_trailing_invalid_char(const unsigned char *data, - size_t *len) +void lang_tokenizer_delete_trailing_invalid_char(const unsigned char *data, + size_t *len) { size_t pos = *len; diff --git a/src/lib-language/lang-tokenizer-common.h b/src/lib-language/lang-tokenizer-common.h index b90e54353e..abb72528d5 100644 --- a/src/lib-language/lang-tokenizer-common.h +++ b/src/lib-language/lang-tokenizer-common.h @@ -1,9 +1,9 @@ -#ifndef FTS_TOKENIZER_COMMON_H -#define FTS_TOKENIZER_COMMON_H +#ifndef LANG_TOKENIZER_COMMON_H +#define LANG_TOKENIZER_COMMON_H void -fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, +lang_tokenizer_delete_trailing_partial_char(const unsigned char *data, size_t *len); void -fts_tokenizer_delete_trailing_invalid_char(const unsigned char *data, +lang_tokenizer_delete_trailing_invalid_char(const unsigned char *data, size_t *len); #endif diff --git a/src/lib-language/lang-tokenizer-generic-private.h b/src/lib-language/lang-tokenizer-generic-private.h index 87f4d48fa1..02f937afd4 100644 --- a/src/lib-language/lang-tokenizer-generic-private.h +++ b/src/lib-language/lang-tokenizer-generic-private.h @@ -1,8 +1,8 @@ -#ifndef FTS_TOKENIZER_GENERIC_PRIVATE_H -#define FTS_TOKENIZER_GENERIC_PRIVATE_H +#ifndef LANG_TOKENIZER_GENERIC_PRIVATE_H +#define LANG_TOKENIZER_GENERIC_PRIVATE_H -extern const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_simple; -extern const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29; +extern const struct lang_tokenizer_vfuncs generic_tokenizer_vfuncs_simple; +extern const struct lang_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29; /* Word boundary letter type */ enum letter_type { @@ -38,8 +38,8 @@ enum boundary_algorithm { #define ALGORITHM_TR29_NAME "tr29" }; -struct generic_fts_tokenizer { - struct fts_tokenizer tokenizer; +struct generic_lang_tokenizer { + struct lang_tokenizer tokenizer; unsigned int max_length; bool prefixsplat; /* for search strings, accept a trailing '*' for explicit prefix */ bool wb5a; /* TR29 rule for prefix separation diff --git a/src/lib-language/lang-tokenizer-generic.c b/src/lib-language/lang-tokenizer-generic.c index b3af8d586c..91b3f8283d 100644 --- a/src/lib-language/lang-tokenizer-generic.c +++ b/src/lib-language/lang-tokenizer-generic.c @@ -14,13 +14,13 @@ #include "word-break-data.c" /* see comments below between is_base64() and skip_base64() */ -#define FTS_SKIP_BASE64_MIN_SEQUENCES 1 -#define FTS_SKIP_BASE64_MIN_CHARS 50 +#define LANG_SKIP_BASE64_MIN_SEQUENCES 1 +#define LANG_SKIP_BASE64_MIN_CHARS 50 -#define FTS_DEFAULT_TOKEN_MAX_LENGTH 30 -#define FTS_WB5A_PREFIX_MAX_LENGTH 3 /* Including apostrophe */ +#define LANG_DEFAULT_TOKEN_MAX_LENGTH 30 +#define LANG_WB5A_PREFIX_MAX_LENGTH 3 /* Including apostrophe */ -static unsigned char fts_ascii_word_breaks[128] = { +static unsigned char lang_ascii_word_breaks[128] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0-15 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 16-31 */ @@ -33,12 +33,12 @@ static unsigned char fts_ascii_word_breaks[128] = { }; static int -fts_tokenizer_generic_create(const char *const *settings, - struct fts_tokenizer **tokenizer_r, - const char **error_r) +lang_tokenizer_generic_create(const char *const *settings, + struct lang_tokenizer **tokenizer_r, + const char **error_r) { - struct generic_fts_tokenizer *tok; - unsigned int max_length = FTS_DEFAULT_TOKEN_MAX_LENGTH; + struct generic_lang_tokenizer *tok; + unsigned int max_length = LANG_DEFAULT_TOKEN_MAX_LENGTH; enum boundary_algorithm algo = BOUNDARY_ALGORITHM_SIMPLE; bool wb5a = FALSE; bool search = FALSE; @@ -91,7 +91,7 @@ fts_tokenizer_generic_create(const char *const *settings, return -1; } - tok = i_new(struct generic_fts_tokenizer, 1); + tok = i_new(struct generic_lang_tokenizer, 1); if (algo == BOUNDARY_ALGORITHM_TR29) tok->tokenizer.v = &generic_tokenizer_vfuncs_tr29; else @@ -107,24 +107,24 @@ fts_tokenizer_generic_create(const char *const *settings, } static void -fts_tokenizer_generic_destroy(struct fts_tokenizer *_tok) +lang_tokenizer_generic_destroy(struct lang_tokenizer *_tok) { - struct generic_fts_tokenizer *tok = - container_of(_tok, struct generic_fts_tokenizer, tokenizer); + struct generic_lang_tokenizer *tok = + container_of(_tok, struct generic_lang_tokenizer, tokenizer); buffer_free(&tok->token); i_free(tok); } static inline void -shift_prev_type(struct generic_fts_tokenizer *tok, enum letter_type lt) +shift_prev_type(struct generic_lang_tokenizer *tok, enum letter_type lt) { tok->prev_prev_type = tok->prev_type; tok->prev_type = lt; } static inline void -add_prev_type(struct generic_fts_tokenizer *tok, enum letter_type lt) +add_prev_type(struct generic_lang_tokenizer *tok, enum letter_type lt) { if(tok->prev_type != LETTER_TYPE_NONE) tok->prev_prev_type = tok->prev_type; @@ -132,7 +132,7 @@ add_prev_type(struct generic_fts_tokenizer *tok, enum letter_type lt) } static inline void -add_letter(struct generic_fts_tokenizer *tok, unichar_t c) +add_letter(struct generic_lang_tokenizer *tok, unichar_t c) { if(tok->letter != 0) tok->prev_letter = tok->letter; @@ -140,8 +140,8 @@ add_letter(struct generic_fts_tokenizer *tok, unichar_t c) } static bool -fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok, - const char **token_r) +lang_tokenizer_generic_simple_current_token(struct generic_lang_tokenizer *tok, + const char **token_r) { const unsigned char *data = tok->token->data; size_t len = tok->token->used; @@ -162,7 +162,7 @@ fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok, i_assert(len > 0 && data[len-1] != '*'); } } else { - fts_tokenizer_delete_trailing_partial_char(data, &len); + lang_tokenizer_delete_trailing_partial_char(data, &len); } i_assert(len <= tok->max_length); @@ -179,7 +179,7 @@ static bool uint32_find(const uint32_t *data, unsigned int count, BINARY_NUMBER_SEARCH(data, count, value, idx_r); } -static bool fts_uni_word_break(unichar_t c) +static bool lang_uni_word_break(unichar_t c) { unsigned int idx; @@ -202,38 +202,38 @@ static bool fts_uni_word_break(unichar_t c) return FALSE; } -enum fts_break_type { - FTS_FROM_STOP = 0, - FTS_FROM_WORD = 2, - FTS_TO_STOP= 0, - FTS_TO_WORD = 1, -#define FROM_TO(f,t) FTS_##f##_TO_##t = FTS_FROM_##f | FTS_TO_##t +enum lang_break_type { + LANG_FROM_STOP = 0, + LANG_FROM_WORD = 2, + LANG_TO_STOP= 0, + LANG_TO_WORD = 1, +#define FROM_TO(f,t) LANG_##f##_TO_##t = LANG_FROM_##f | LANG_TO_##t FROM_TO(STOP,STOP), FROM_TO(STOP,WORD), FROM_TO(WORD,STOP), FROM_TO(WORD,WORD), }; -static inline enum fts_break_type -fts_simple_is_word_break(const struct generic_fts_tokenizer *tok, +static inline enum lang_break_type +lang_simple_is_word_break(const struct generic_lang_tokenizer *tok, unichar_t c, bool apostrophe) { /* Until we know better, a letter followed by an apostrophe is continuation of the word. However, if we see non-word letters afterwards, we'll reverse that decision. */ if (apostrophe) - return tok->prev_type == LETTER_TYPE_ALETTER ? FTS_WORD_TO_WORD : FTS_STOP_TO_STOP; + return tok->prev_type == LETTER_TYPE_ALETTER ? LANG_WORD_TO_WORD : LANG_STOP_TO_STOP; - bool new_breakiness = (c < 0x80) ? (fts_ascii_word_breaks[c] != 0) : fts_uni_word_break(c); + bool new_breakiness = (c < 0x80) ? (lang_ascii_word_breaks[c] != 0) : lang_uni_word_break(c); - return (new_breakiness ? FTS_TO_STOP : FTS_TO_WORD) + return (new_breakiness ? LANG_TO_STOP : LANG_TO_WORD) + (tok->prev_type == LETTER_TYPE_ALETTER || tok->prev_type == LETTER_TYPE_SINGLE_QUOTE - ? FTS_FROM_WORD : FTS_FROM_STOP); + ? LANG_FROM_WORD : LANG_FROM_STOP); } -static void fts_tokenizer_generic_reset(struct fts_tokenizer *_tok) +static void lang_tokenizer_generic_reset(struct lang_tokenizer *_tok) { - struct generic_fts_tokenizer *tok = - container_of(_tok, struct generic_fts_tokenizer, tokenizer); + struct generic_lang_tokenizer *tok = + container_of(_tok, struct generic_lang_tokenizer, tokenizer); tok->prev_type = LETTER_TYPE_NONE; tok->prev_prev_type = LETTER_TYPE_NONE; @@ -241,7 +241,7 @@ static void fts_tokenizer_generic_reset(struct fts_tokenizer *_tok) buffer_set_used_size(tok->token, 0); } -static void tok_append_truncated(struct generic_fts_tokenizer *tok, +static void tok_append_truncated(struct generic_lang_tokenizer *tok, const unsigned char *data, size_t size) { buffer_append(tok->token, data, @@ -285,7 +285,7 @@ static unsigned char allowed_base64_leaders[] = { criteria on its own to be discarded. What we pay is we will fail to reject small base64 chunks segments instead of rejecting the whole sequence. - When skip_base64() is invoked in fts_tokenizer_generic_XX_next(), we know + When skip_base64() is invoked in lang_tokenizer_generic_XX_next(), we know that we are not halfway the collection of a token. As (after the previous token) the buffer will contain non-token characters @@ -328,7 +328,7 @@ skip_base64(const unsigned char *data, size_t size) const unsigned char *past; for (past = first; past < end && is_base64(*past); past++); - if (past - first < FTS_SKIP_BASE64_MIN_CHARS) + if (past - first < LANG_SKIP_BASE64_MIN_CHARS) break; if (past < end && memchr(allowed_base64_trailers, *past, N_ELEMENTS(allowed_base64_trailers)) == NULL) @@ -336,22 +336,22 @@ skip_base64(const unsigned char *data, size_t size) start = past; matches++; } - return matches < FTS_SKIP_BASE64_MIN_SEQUENCES ? 0 : start - data; + return matches < LANG_SKIP_BASE64_MIN_SEQUENCES ? 0 : start - data; } static int -fts_tokenizer_generic_simple_next(struct fts_tokenizer *_tok, - const unsigned char *data, size_t size, - size_t *skip_r, const char **token_r, - const char **error_r ATTR_UNUSED) +lang_tokenizer_generic_simple_next(struct lang_tokenizer *_tok, + const unsigned char *data, size_t size, + size_t *skip_r, const char **token_r, + const char **error_r ATTR_UNUSED) { - struct generic_fts_tokenizer *tok = - container_of(_tok, struct generic_fts_tokenizer, tokenizer); + struct generic_lang_tokenizer *tok = + container_of(_tok, struct generic_lang_tokenizer, tokenizer); size_t i, start; int char_size; unichar_t c; bool apostrophe; - enum fts_break_type break_type; + enum lang_break_type break_type; start = tok->token->used > 0 ? 0 : skip_base64(data, size); for (i = start; i < size; i += char_size) { @@ -363,18 +363,18 @@ fts_tokenizer_generic_simple_next(struct fts_tokenizer *_tok, (tok->prev_type == LETTER_TYPE_ALETTER)) { /* this might be a prefix-mathing query */ shift_prev_type(tok, LETTER_TYPE_PREFIXSPLAT); - } else if ((break_type = fts_simple_is_word_break(tok, c, apostrophe)) - != FTS_WORD_TO_WORD) { + } else if ((break_type = lang_simple_is_word_break(tok, c, apostrophe)) + != LANG_WORD_TO_WORD) { tok_append_truncated(tok, data + start, i - start); - shift_prev_type(tok, (break_type & FTS_TO_WORD) != 0 + shift_prev_type(tok, (break_type & LANG_TO_WORD) != 0 ? LETTER_TYPE_ALETTER : LETTER_TYPE_NONE); - if (fts_tokenizer_generic_simple_current_token(tok, token_r)) { + if (lang_tokenizer_generic_simple_current_token(tok, token_r)) { *skip_r = i; - if (break_type != FTS_STOP_TO_WORD) /* therefore *_TO_STOP */ + if (break_type != LANG_STOP_TO_WORD) /* therefore *_TO_STOP */ *skip_r += char_size; return 1; } - if ((break_type & FTS_TO_WORD) == 0) + if ((break_type & LANG_TO_WORD) == 0) start = i + char_size; } else if (apostrophe) { /* all apostrophes require special handling */ @@ -400,7 +400,7 @@ fts_tokenizer_generic_simple_next(struct fts_tokenizer *_tok, /* return the last token */ if (size == 0) { shift_prev_type(tok, LETTER_TYPE_NONE); - if (fts_tokenizer_generic_simple_current_token(tok, token_r)) + if (lang_tokenizer_generic_simple_current_token(tok, token_r)) return 1; } @@ -458,25 +458,25 @@ static enum letter_type letter_type(unichar_t c) return LETTER_TYPE_OTHER; } -static bool letter_panic(struct generic_fts_tokenizer *tok ATTR_UNUSED) +static bool letter_panic(struct generic_lang_tokenizer *tok ATTR_UNUSED) { i_panic("Letter type should not be used."); } /* WB3, WB3a and WB3b, but really different since we try to eat whitespace between words. */ -static bool letter_cr_lf_newline(struct generic_fts_tokenizer *tok ATTR_UNUSED) +static bool letter_cr_lf_newline(struct generic_lang_tokenizer *tok ATTR_UNUSED) { return TRUE; } -static bool letter_extend_format(struct generic_fts_tokenizer *tok ATTR_UNUSED) +static bool letter_extend_format(struct generic_lang_tokenizer *tok ATTR_UNUSED) { /* WB4 */ return FALSE; } -static bool letter_regional_indicator(struct generic_fts_tokenizer *tok) +static bool letter_regional_indicator(struct generic_lang_tokenizer *tok) { /* WB13c */ if (tok->prev_type == LETTER_TYPE_REGIONAL_INDICATOR) @@ -485,7 +485,7 @@ static bool letter_regional_indicator(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_katakana(struct generic_fts_tokenizer *tok) +static bool letter_katakana(struct generic_lang_tokenizer *tok) { /* WB13 */ if (tok->prev_type == LETTER_TYPE_KATAKANA) @@ -498,7 +498,7 @@ static bool letter_katakana(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_hebrew(struct generic_fts_tokenizer *tok) +static bool letter_hebrew(struct generic_lang_tokenizer *tok) { /* WB5 */ if (tok->prev_type == LETTER_TYPE_HEBREW_LETTER) @@ -523,11 +523,11 @@ static bool letter_hebrew(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_aletter(struct generic_fts_tokenizer *tok) +static bool letter_aletter(struct generic_lang_tokenizer *tok) { /* WB5a */ - if (tok->wb5a && tok->token->used <= FTS_WB5A_PREFIX_MAX_LENGTH) + if (tok->wb5a && tok->token->used <= LANG_WB5A_PREFIX_MAX_LENGTH) if (IS_WB5A_APOSTROPHE(tok->prev_letter) && IS_VOWEL(tok->letter)) { tok->seen_wb5a = TRUE; return TRUE; @@ -556,7 +556,7 @@ static bool letter_aletter(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_single_quote(struct generic_fts_tokenizer *tok) +static bool letter_single_quote(struct generic_lang_tokenizer *tok) { /* WB6 */ if (tok->prev_type == LETTER_TYPE_ALETTER || @@ -570,7 +570,7 @@ static bool letter_single_quote(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_double_quote(struct generic_fts_tokenizer *tok) +static bool letter_double_quote(struct generic_lang_tokenizer *tok) { if (tok->prev_type == LETTER_TYPE_DOUBLE_QUOTE) @@ -579,14 +579,14 @@ static bool letter_double_quote(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_midnumlet(struct generic_fts_tokenizer *tok ATTR_UNUSED) +static bool letter_midnumlet(struct generic_lang_tokenizer *tok ATTR_UNUSED) { /* Break at MidNumLet, non-conformant with WB6/WB7 */ return TRUE; } -static bool letter_midletter(struct generic_fts_tokenizer *tok) +static bool letter_midletter(struct generic_lang_tokenizer *tok) { /* WB6 */ if (tok->prev_type == LETTER_TYPE_ALETTER || @@ -596,7 +596,7 @@ static bool letter_midletter(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_midnum(struct generic_fts_tokenizer *tok) +static bool letter_midnum(struct generic_lang_tokenizer *tok) { /* WB12 */ if (tok->prev_type == LETTER_TYPE_NUMERIC) @@ -605,7 +605,7 @@ static bool letter_midnum(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_numeric(struct generic_fts_tokenizer *tok) +static bool letter_numeric(struct generic_lang_tokenizer *tok) { /* WB8 */ if (tok->prev_type == LETTER_TYPE_NUMERIC) @@ -630,7 +630,7 @@ static bool letter_numeric(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_extendnumlet(struct generic_fts_tokenizer *tok) +static bool letter_extendnumlet(struct generic_lang_tokenizer *tok) { /* WB13a */ @@ -644,7 +644,7 @@ static bool letter_extendnumlet(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_apostrophe(struct generic_fts_tokenizer *tok) +static bool letter_apostrophe(struct generic_lang_tokenizer *tok) { if (tok->prev_type == LETTER_TYPE_ALETTER || @@ -653,12 +653,12 @@ static bool letter_apostrophe(struct generic_fts_tokenizer *tok) return TRUE; /* Any / Any */ } -static bool letter_prefixsplat(struct generic_fts_tokenizer *tok ATTR_UNUSED) +static bool letter_prefixsplat(struct generic_lang_tokenizer *tok ATTR_UNUSED) { /* Dovecot explicit-prefix specific */ return TRUE; /* Always induces a word break - but with special handling */ } -static bool letter_other(struct generic_fts_tokenizer *tok ATTR_UNUSED) +static bool letter_other(struct generic_lang_tokenizer *tok ATTR_UNUSED) { return TRUE; /* Any / Any */ } @@ -684,7 +684,7 @@ static bool is_nontoken(enum letter_type lt) very kludgy and should be coded into the rules themselves somehow. */ -static bool is_one_past_end(struct generic_fts_tokenizer *tok) +static bool is_one_past_end(struct generic_lang_tokenizer *tok) { /* WB6/7 false positive detected at one past end. */ if (tok->prev_type == LETTER_TYPE_MIDLETTER || @@ -704,8 +704,8 @@ static bool is_one_past_end(struct generic_fts_tokenizer *tok) } static void -fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok, - const char **token_r) +lang_tokenizer_generic_tr29_current_token(struct generic_lang_tokenizer *tok, + const char **token_r) { const unsigned char *data = tok->token->data; size_t len = tok->token->used; @@ -718,7 +718,7 @@ fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok, i_assert(len > 0); len--; } else if (tok->untruncated_length > tok->max_length) { - fts_tokenizer_delete_trailing_partial_char(data, &len); + lang_tokenizer_delete_trailing_partial_char(data, &len); } /* we're skipping all non-token chars at the beginning of the word, so by this point we must have something here - even if we just @@ -733,7 +733,7 @@ fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok, tok->untruncated_length = 0; } -static void wb5a_reinsert(struct generic_fts_tokenizer *tok) +static void wb5a_reinsert(struct generic_lang_tokenizer *tok) { string_t *utf8_str = t_str_new(6); @@ -746,7 +746,7 @@ static void wb5a_reinsert(struct generic_fts_tokenizer *tok) } struct letter_fn { - bool (*fn)(struct generic_fts_tokenizer *tok); + bool (*fn)(struct generic_lang_tokenizer *tok); }; static struct letter_fn letter_fns[] = { {letter_panic}, {letter_cr_lf_newline}, {letter_cr_lf_newline}, @@ -762,7 +762,7 @@ static struct letter_fn letter_fns[] = { /* Find word boundaries in input text. Based on Unicode standard annex - #29, but tailored for FTS purposes. + #29, but tailored for language purposes. http://www.unicode.org/reports/tr29/ Note: The text of tr29 is a living standard, so it keeps @@ -771,7 +771,7 @@ static struct letter_fn letter_fns[] = { Adaptions: * Added optional WB5a as a configurable option. The cut of prefix is - max FTS_WB5A_PREFIX chars. + max LANG_WB5A_PREFIX chars. * No word boundary at Start-Of-Text or End-of-Text (Wb1 and WB2). * Break just once, not before and after. * Break at MidNumLet, except apostrophes (diverging from WB6/WB7). @@ -779,7 +779,7 @@ static struct letter_fn letter_fns[] = { to assist in finding individual words. */ static bool -uni_found_word_boundary(struct generic_fts_tokenizer *tok, enum letter_type lt) +uni_found_word_boundary(struct generic_lang_tokenizer *tok, enum letter_type lt) { /* No rule knows what to do with just one char, except the linebreaks we eat away (above) anyway. */ @@ -797,13 +797,13 @@ uni_found_word_boundary(struct generic_fts_tokenizer *tok, enum letter_type lt) } static int -fts_tokenizer_generic_tr29_next(struct fts_tokenizer *_tok, - const unsigned char *data, size_t size, - size_t *skip_r, const char **token_r, - const char **error_r ATTR_UNUSED) +lang_tokenizer_generic_tr29_next(struct lang_tokenizer *_tok, + const unsigned char *data, size_t size, + size_t *skip_r, const char **token_r, + const char **error_r ATTR_UNUSED) { - struct generic_fts_tokenizer *tok = - container_of(_tok, struct generic_fts_tokenizer, tokenizer); + struct generic_lang_tokenizer *tok = + container_of(_tok, struct generic_lang_tokenizer, tokenizer); unichar_t c; size_t i, char_start_i, start_pos; enum letter_type lt; @@ -830,7 +830,7 @@ fts_tokenizer_generic_tr29_next(struct fts_tokenizer *_tok, continue; } - if (tok->wb5a && tok->token->used <= FTS_WB5A_PREFIX_MAX_LENGTH) + if (tok->wb5a && tok->token->used <= LANG_WB5A_PREFIX_MAX_LENGTH) add_letter(tok, c); if (uni_found_word_boundary(tok, lt)) { @@ -838,11 +838,11 @@ fts_tokenizer_generic_tr29_next(struct fts_tokenizer *_tok, tok_append_truncated(tok, data + start_pos, char_start_i - start_pos); if (lt == LETTER_TYPE_PREFIXSPLAT && tok->prefixsplat) { - const unsigned char prefix_char = FTS_PREFIX_SPLAT_CHAR; + const unsigned char prefix_char = LANG_PREFIX_SPLAT_CHAR; tok_append_truncated(tok, &prefix_char, 1); } *skip_r = i; - fts_tokenizer_generic_tr29_current_token(tok, token_r); + lang_tokenizer_generic_tr29_current_token(tok, token_r); return 1; } else if (lt == LETTER_TYPE_APOSTROPHE || lt == LETTER_TYPE_SINGLE_QUOTE) { @@ -862,45 +862,45 @@ fts_tokenizer_generic_tr29_next(struct fts_tokenizer *_tok, if (size == 0 && tok->token->used > 0) { /* return the last token */ *skip_r = 0; - fts_tokenizer_generic_tr29_current_token(tok, token_r); + lang_tokenizer_generic_tr29_current_token(tok, token_r); return 1; } return 0; } static int -fts_tokenizer_generic_next(struct fts_tokenizer *_tok ATTR_UNUSED, - const unsigned char *data ATTR_UNUSED, - size_t size ATTR_UNUSED, - size_t *skip_r ATTR_UNUSED, - const char **token_r ATTR_UNUSED, - const char **error_r ATTR_UNUSED) +lang_tokenizer_generic_next(struct lang_tokenizer *_tok ATTR_UNUSED, + const unsigned char *data ATTR_UNUSED, + size_t size ATTR_UNUSED, + size_t *skip_r ATTR_UNUSED, + const char **token_r ATTR_UNUSED, + const char **error_r ATTR_UNUSED) { i_unreached(); } -static const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs = { - fts_tokenizer_generic_create, - fts_tokenizer_generic_destroy, - fts_tokenizer_generic_reset, - fts_tokenizer_generic_next +static const struct lang_tokenizer_vfuncs generic_tokenizer_vfuncs = { + lang_tokenizer_generic_create, + lang_tokenizer_generic_destroy, + lang_tokenizer_generic_reset, + lang_tokenizer_generic_next }; -static const struct fts_tokenizer fts_tokenizer_generic_real = { +static const struct lang_tokenizer lang_tokenizer_generic_real = { .name = "generic", .v = &generic_tokenizer_vfuncs }; -const struct fts_tokenizer *fts_tokenizer_generic = &fts_tokenizer_generic_real; +const struct lang_tokenizer *lang_tokenizer_generic = &lang_tokenizer_generic_real; -const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_simple = { - fts_tokenizer_generic_create, - fts_tokenizer_generic_destroy, - fts_tokenizer_generic_reset, - fts_tokenizer_generic_simple_next +const struct lang_tokenizer_vfuncs generic_tokenizer_vfuncs_simple = { + lang_tokenizer_generic_create, + lang_tokenizer_generic_destroy, + lang_tokenizer_generic_reset, + lang_tokenizer_generic_simple_next }; -const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29 = { - fts_tokenizer_generic_create, - fts_tokenizer_generic_destroy, - fts_tokenizer_generic_reset, - fts_tokenizer_generic_tr29_next +const struct lang_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29 = { + lang_tokenizer_generic_create, + lang_tokenizer_generic_destroy, + lang_tokenizer_generic_reset, + lang_tokenizer_generic_tr29_next }; diff --git a/src/lib-language/lang-tokenizer-private.h b/src/lib-language/lang-tokenizer-private.h index 8d9be34bd2..6ba11ee1c2 100644 --- a/src/lib-language/lang-tokenizer-private.h +++ b/src/lib-language/lang-tokenizer-private.h @@ -1,35 +1,35 @@ -#ifndef FTS_TOKENIZER_PRIVATE_H -#define FTS_TOKENIZER_PRIVATE_H +#ifndef LANG_TOKENIZER_PRIVATE_H +#define LANG_TOKENIZER_PRIVATE_H #include "lang-tokenizer.h" -#define FTS_TOKENIZER_CLASSES_NR 2 +#define LANG_TOKENIZER_CLASSES_NR 2 -struct fts_tokenizer_vfuncs { +struct lang_tokenizer_vfuncs { int (*create)(const char *const *settings, - struct fts_tokenizer **tokenizer_r, const char **error_r); - void (*destroy)(struct fts_tokenizer *tok); + struct lang_tokenizer **tokenizer_r, const char **error_r); + void (*destroy)(struct lang_tokenizer *tok); - void (*reset)(struct fts_tokenizer *tok); - int (*next)(struct fts_tokenizer *tok, const unsigned char *data, + void (*reset)(struct lang_tokenizer *tok); + int (*next)(struct lang_tokenizer *tok, const unsigned char *data, size_t size, size_t *skip_r, const char **token_r, const char **error_r); }; -enum fts_tokenizer_parent_state { - FTS_TOKENIZER_PARENT_STATE_ADD_DATA = 0, - FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT, - FTS_TOKENIZER_PARENT_STATE_FINALIZE +enum lang_tokenizer_parent_state { + LANG_TOKENIZER_PARENT_STATE_ADD_DATA = 0, + LANG_TOKENIZER_PARENT_STATE_NEXT_OUTPUT, + LANG_TOKENIZER_PARENT_STATE_FINALIZE }; -struct fts_tokenizer { +struct lang_tokenizer { const char *name; - const struct fts_tokenizer_vfuncs *v; + const struct lang_tokenizer_vfuncs *v; int refcount; - struct fts_tokenizer *parent; + struct lang_tokenizer *parent; buffer_t *parent_input; - enum fts_tokenizer_parent_state parent_state; + enum lang_tokenizer_parent_state parent_state; const unsigned char *prev_data; size_t prev_size; @@ -46,7 +46,7 @@ struct fts_tokenizer { bool finalize_parent_pending; }; -void fts_tokenizer_register(const struct fts_tokenizer *tok_class); -void fts_tokenizer_unregister(const struct fts_tokenizer *tok_class); +void lang_tokenizer_register(const struct lang_tokenizer *tok_class); +void lang_tokenizer_unregister(const struct lang_tokenizer *tok_class); #endif diff --git a/src/lib-language/lang-tokenizer.c b/src/lib-language/lang-tokenizer.c index ac6b8bf48a..b732765413 100644 --- a/src/lib-language/lang-tokenizer.c +++ b/src/lib-language/lang-tokenizer.c @@ -8,65 +8,65 @@ #include "lang-tokenizer.h" #include "lang-tokenizer-private.h" -static ARRAY(const struct fts_tokenizer *) fts_tokenizer_classes; +static ARRAY(const struct lang_tokenizer *) lang_tokenizer_classes; -void fts_tokenizers_init(void) +void lang_tokenizers_init(void) { - if (!array_is_created(&fts_tokenizer_classes)) { - fts_tokenizer_register(fts_tokenizer_generic); - fts_tokenizer_register(fts_tokenizer_email_address); + if (!array_is_created(&lang_tokenizer_classes)) { + lang_tokenizer_register(lang_tokenizer_generic); + lang_tokenizer_register(lang_tokenizer_email_address); } } -void fts_tokenizers_deinit(void) +void lang_tokenizers_deinit(void) { - if (array_is_created(&fts_tokenizer_classes)) - array_free(&fts_tokenizer_classes); + if (array_is_created(&lang_tokenizer_classes)) + array_free(&lang_tokenizer_classes); } /* private */ -void fts_tokenizer_register(const struct fts_tokenizer *tok_class) +void lang_tokenizer_register(const struct lang_tokenizer *tok_class) { - if (!array_is_created(&fts_tokenizer_classes)) - i_array_init(&fts_tokenizer_classes, FTS_TOKENIZER_CLASSES_NR); - array_push_back(&fts_tokenizer_classes, &tok_class); + if (!array_is_created(&lang_tokenizer_classes)) + i_array_init(&lang_tokenizer_classes, LANG_TOKENIZER_CLASSES_NR); + array_push_back(&lang_tokenizer_classes, &tok_class); } /* private */ -void fts_tokenizer_unregister(const struct fts_tokenizer *tok_class) +void lang_tokenizer_unregister(const struct lang_tokenizer *tok_class) { - const struct fts_tokenizer *const *tp; + const struct lang_tokenizer *const *tp; unsigned int idx; - array_foreach(&fts_tokenizer_classes, tp) { + array_foreach(&lang_tokenizer_classes, tp) { if (strcmp((*tp)->name, tok_class->name) == 0) { - idx = array_foreach_idx(&fts_tokenizer_classes, tp); - array_delete(&fts_tokenizer_classes, idx, 1); - if (array_count(&fts_tokenizer_classes) == 0) - array_free(&fts_tokenizer_classes); + idx = array_foreach_idx(&lang_tokenizer_classes, tp); + array_delete(&lang_tokenizer_classes, idx, 1); + if (array_count(&lang_tokenizer_classes) == 0) + array_free(&lang_tokenizer_classes); return; } } i_unreached(); } -const struct fts_tokenizer *fts_tokenizer_find(const char *name) +const struct lang_tokenizer *lang_tokenizer_find(const char *name) { - const struct fts_tokenizer *tok; + const struct lang_tokenizer *tok; - array_foreach_elem(&fts_tokenizer_classes, tok) { + array_foreach_elem(&lang_tokenizer_classes, tok) { if (strcmp(tok->name, name) == 0) return tok; } return NULL; } -const char *fts_tokenizer_name(const struct fts_tokenizer *tok) +const char *lang_tokenizer_name(const struct lang_tokenizer *tok) { return tok->name; } -static void fts_tokenizer_self_reset(struct fts_tokenizer *tok) +static void lang_tokenizer_self_reset(struct lang_tokenizer *tok) { tok->prev_data = NULL; tok->prev_size = 0; @@ -74,13 +74,13 @@ static void fts_tokenizer_self_reset(struct fts_tokenizer *tok) tok->prev_reply_finished = TRUE; } -int fts_tokenizer_create(const struct fts_tokenizer *tok_class, - struct fts_tokenizer *parent, - const char *const *settings, - struct fts_tokenizer **tokenizer_r, - const char **error_r) +int lang_tokenizer_create(const struct lang_tokenizer *tok_class, + struct lang_tokenizer *parent, + const char *const *settings, + struct lang_tokenizer **tokenizer_r, + const char **error_r) { - struct fts_tokenizer *tok; + struct lang_tokenizer *tok; const char *empty_settings = NULL; i_assert(settings == NULL || str_array_length(settings) % 2 == 0); @@ -93,9 +93,9 @@ int fts_tokenizer_create(const struct fts_tokenizer *tok_class, return -1; } tok->refcount = 1; - fts_tokenizer_self_reset(tok); + lang_tokenizer_self_reset(tok); if (parent != NULL) { - fts_tokenizer_ref(parent); + lang_tokenizer_ref(parent); tok->parent = parent; tok->parent_input = buffer_create_dynamic(default_pool, 128); } @@ -104,16 +104,16 @@ int fts_tokenizer_create(const struct fts_tokenizer *tok_class, return 0; } -void fts_tokenizer_ref(struct fts_tokenizer *tok) +void lang_tokenizer_ref(struct lang_tokenizer *tok) { i_assert(tok->refcount > 0); tok->refcount++; } -void fts_tokenizer_unref(struct fts_tokenizer **_tok) +void lang_tokenizer_unref(struct lang_tokenizer **_tok) { - struct fts_tokenizer *tok = *_tok; + struct lang_tokenizer *tok = *_tok; i_assert(tok->refcount > 0); *_tok = NULL; @@ -123,14 +123,14 @@ void fts_tokenizer_unref(struct fts_tokenizer **_tok) buffer_free(&tok->parent_input); if (tok->parent != NULL) - fts_tokenizer_unref(&tok->parent); + lang_tokenizer_unref(&tok->parent); tok->v->destroy(tok); } static int -fts_tokenizer_next_self(struct fts_tokenizer *tok, - const unsigned char *data, size_t size, - const char **token_r, const char **error_r) +lang_tokenizer_next_self(struct lang_tokenizer *tok, + const unsigned char *data, size_t size, + const char **token_r, const char **error_r) { int ret = 0; size_t skip = 0; @@ -170,27 +170,27 @@ fts_tokenizer_next_self(struct fts_tokenizer *tok, } else if (ret == 0) { /* Need more data to get the next token. The next call will provide a whole new data block, so reset the prev_* state. */ - fts_tokenizer_self_reset(tok); + lang_tokenizer_self_reset(tok); } return ret; } -void fts_tokenizer_reset(struct fts_tokenizer *tok) +void lang_tokenizer_reset(struct lang_tokenizer *tok) { tok->v->reset(tok); - fts_tokenizer_self_reset(tok); + lang_tokenizer_self_reset(tok); } -int fts_tokenizer_next(struct fts_tokenizer *tok, - const unsigned char *data, size_t size, - const char **token_r, const char **error_r) +int lang_tokenizer_next(struct lang_tokenizer *tok, + const unsigned char *data, size_t size, + const char **token_r, const char **error_r) { int ret; switch (tok->parent_state) { - case FTS_TOKENIZER_PARENT_STATE_ADD_DATA: + case LANG_TOKENIZER_PARENT_STATE_ADD_DATA: /* Try to get the next token using this tokenizer */ - ret = fts_tokenizer_next_self(tok, data, size, token_r, error_r); + ret = lang_tokenizer_next_self(tok, data, size, token_r, error_r); if (ret <= 0) { /* error / more data needed */ if (ret == 0 && size == 0 && @@ -200,8 +200,8 @@ int fts_tokenizer_next(struct fts_tokenizer *tok, tokenizer still needs to be finalized. */ tok->finalize_parent_pending = FALSE; tok->parent_state = - FTS_TOKENIZER_PARENT_STATE_FINALIZE; - return fts_tokenizer_next(tok, NULL, 0, token_r, error_r); + LANG_TOKENIZER_PARENT_STATE_FINALIZE; + return lang_tokenizer_next(tok, NULL, 0, token_r, error_r); } break; } @@ -221,19 +221,19 @@ int fts_tokenizer_next(struct fts_tokenizer *tok, buffer_append(tok->parent_input, *token_r, strlen(*token_r)); tok->parent_state++; /* fall through */ - case FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT: + case LANG_TOKENIZER_PARENT_STATE_NEXT_OUTPUT: /* Return the next token from parent tokenizer */ - ret = fts_tokenizer_next(tok->parent, tok->parent_input->data, + ret = lang_tokenizer_next(tok->parent, tok->parent_input->data, tok->parent_input->used, token_r, error_r); if (ret != 0) break; tok->parent_state++; /* fall through */ - case FTS_TOKENIZER_PARENT_STATE_FINALIZE: + case LANG_TOKENIZER_PARENT_STATE_FINALIZE: /* No more input is coming from the child tokenizer. Return the final token(s) from the parent tokenizer. */ if (!tok->stream_to_parents || size == 0) { - ret = fts_tokenizer_next(tok->parent, NULL, 0, + ret = lang_tokenizer_next(tok->parent, NULL, 0, token_r, error_r); if (ret != 0) break; @@ -243,8 +243,8 @@ int fts_tokenizer_next(struct fts_tokenizer *tok, /* We're finished handling the previous child token. See if there are more child tokens available with this same data input. */ - tok->parent_state = FTS_TOKENIZER_PARENT_STATE_ADD_DATA; - return fts_tokenizer_next(tok, data, size, token_r, error_r); + tok->parent_state = LANG_TOKENIZER_PARENT_STATE_ADD_DATA; + return lang_tokenizer_next(tok, data, size, token_r, error_r); default: i_unreached(); } @@ -253,8 +253,8 @@ int fts_tokenizer_next(struct fts_tokenizer *tok, return ret; } -int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r, - const char **error_r) +int lang_tokenizer_final(struct lang_tokenizer *tok, const char **token_r, + const char **error_r) { - return fts_tokenizer_next(tok, NULL, 0, token_r, error_r); + return lang_tokenizer_next(tok, NULL, 0, token_r, error_r); } diff --git a/src/lib-language/lang-tokenizer.h b/src/lib-language/lang-tokenizer.h index 59ccf0703c..b9572cb5a0 100644 --- a/src/lib-language/lang-tokenizer.h +++ b/src/lib-language/lang-tokenizer.h @@ -1,5 +1,5 @@ -#ifndef FTS_TOKENIZER_H -#define FTS_TOKENIZER_H +#ifndef LANG_TOKENIZER_H +#define LANG_TOKENIZER_H /* Settings are given in the form of a const char * const *settings = @@ -22,12 +22,12 @@ "search" Remove addresses from parent data stream, so they are not processed further. Defaults to disabled. Enable by defining the keyword (and any value). */ -extern const struct fts_tokenizer *fts_tokenizer_email_address; +extern const struct lang_tokenizer *lang_tokenizer_email_address; /* Generic email content tokenizer. Cuts text into tokens. */ /* Settings: "maxlen" Maximum length of token, before an arbitrary cut off is made. - Defaults to FTS_DEFAULT_TOKEN_MAX_LENGTH. + Defaults to LANG_DEFAULT_TOKEN_MAX_LENGTH. "algorithm", accepted values are "simple" or "tr29". Defines the method for looking for word boundaries. Simple is faster and will @@ -39,7 +39,7 @@ extern const struct fts_tokenizer *fts_tokenizer_email_address; is also significantly slower than simple. The algorithms also differ in some details, e.g. simple will cut "a.b" and tr29 will not. The default is "simple" */ -extern const struct fts_tokenizer *fts_tokenizer_generic; +extern const struct lang_tokenizer *lang_tokenizer_generic; /* Tokenizing workflow, find --> create --> filter --> destroy. @@ -47,41 +47,41 @@ extern const struct fts_tokenizer *fts_tokenizer_generic; */ /* Register all built-in tokenizers. */ -void fts_tokenizers_init(void); -void fts_tokenizers_deinit(void); +void lang_tokenizers_init(void); +void lang_tokenizers_deinit(void); -const struct fts_tokenizer *fts_tokenizer_find(const char *name); +const struct lang_tokenizer *lang_tokenizer_find(const char *name); /* Create a new tokenizer. The settings are described above. */ -int fts_tokenizer_create(const struct fts_tokenizer *tok_class, - struct fts_tokenizer *parent, - const char *const *settings, - struct fts_tokenizer **tokenizer_r, - const char **error_r); -void fts_tokenizer_ref(struct fts_tokenizer *tok); -void fts_tokenizer_unref(struct fts_tokenizer **tok); +int lang_tokenizer_create(const struct lang_tokenizer *tok_class, + struct lang_tokenizer *parent, + const char *const *settings, + struct lang_tokenizer **tokenizer_r, + const char **error_r); +void lang_tokenizer_ref(struct lang_tokenizer *tok); +void lang_tokenizer_unref(struct lang_tokenizer **tok); -/* Reset FTS tokenizer state */ -void fts_tokenizer_reset(struct fts_tokenizer *tok); +/* Reset lang tokenizer state */ +void lang_tokenizer_reset(struct lang_tokenizer *tok); /* Returns 1 if *token_r was returned, 0 if more data is needed, -1 on error. This function should be called with the same data+size until it - returns 0. After that fts_tokenizer_final() should be called until it + returns 0. After that lang_tokenizer_final() should be called until it returns 0 to flush out the final token(s). data must contain only valid complete UTF-8 sequences, but otherwise it may be broken into however small pieces. (Input to this function typically comes from message-decoder, which returns only complete UTF-8 sequences.) */ -int fts_tokenizer_next(struct fts_tokenizer *tok, - const unsigned char *data, size_t size, - const char **token_r, const char **error_r); -/* Returns same as fts_tokenizer_next(). */ -int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r, - const char **error_r); +int lang_tokenizer_next(struct lang_tokenizer *tok, + const unsigned char *data, size_t size, + const char **token_r, const char **error_r); +/* Returns same as lang_tokenizer_next(). */ +int lang_tokenizer_final(struct lang_tokenizer *tok, const char **token_r, + const char **error_r); -const char *fts_tokenizer_name(const struct fts_tokenizer *tok); +const char *lang_tokenizer_name(const struct lang_tokenizer *tok); #endif diff --git a/src/lib-language/language.c b/src/lib-language/language.c index b186c802f9..26d3930215 100644 --- a/src/lib-language/language.c +++ b/src/lib-language/language.c @@ -20,28 +20,28 @@ #define DETECT_STR_MAX_LEN 200 -struct fts_textcat { +struct textcat { int refcount; void *handle; char *config_path, *data_dir, *failed; }; -struct fts_language_list { +struct language_list { pool_t pool; - ARRAY_TYPE(fts_language) languages; - struct fts_textcat *textcat; + ARRAY_TYPE(language) languages; + struct textcat *textcat; const char *textcat_config; const char *textcat_datadir; }; -pool_t fts_languages_pool; -ARRAY_TYPE(fts_language) fts_languages; +pool_t languages_pool; +ARRAY_TYPE(language) languages; #ifdef HAVE_FTS_EXTTEXTCAT -static struct fts_textcat *fts_textcat_cache = NULL; +static struct textcat *textcat_cache = NULL; #endif /* ISO 639-1 alpha 2 codes for languages */ -const struct fts_language fts_languages_builtin [] = { +const struct language languages_builtin [] = { { "da" }, /* Danish */ { "de" }, /* German */ { "en" }, /* English */ @@ -58,19 +58,19 @@ const struct fts_language fts_languages_builtin [] = { { "tr" }, /* Turkish */ }; -const struct fts_language fts_language_data = { +const struct language language_data = { "data" }; #ifdef HAVE_FTS_EXTTEXTCAT -static void fts_textcat_unref(struct fts_textcat *textcat) +static void textcat_unref(struct textcat *textcat) { i_assert(textcat->refcount > 0); if (--textcat->refcount > 0) return; - if (textcat == fts_textcat_cache) - fts_textcat_cache = NULL; + if (textcat == textcat_cache) + textcat_cache = NULL; i_free(textcat->config_path); i_free(textcat->data_dir); @@ -81,58 +81,57 @@ static void fts_textcat_unref(struct fts_textcat *textcat) } #endif -void fts_languages_init(void) +void languages_init(void) { unsigned int i; - const struct fts_language *lp; - - fts_languages_pool = pool_alloconly_create("fts_language", - sizeof(fts_languages_builtin)); - p_array_init(&fts_languages, fts_languages_pool, - N_ELEMENTS(fts_languages_builtin)); - for (i = 0; i < N_ELEMENTS(fts_languages_builtin); i++){ - lp = &fts_languages_builtin[i]; - array_push_back(&fts_languages, &lp); + const struct language *lp; + + languages_pool = pool_alloconly_create("language", + sizeof(languages_builtin)); + p_array_init(&languages, languages_pool, N_ELEMENTS(languages_builtin)); + for (i = 0; i < N_ELEMENTS(languages_builtin); i++){ + lp = &languages_builtin[i]; + array_push_back(&languages, &lp); } } -void fts_languages_deinit(void) +void languages_deinit(void) { #ifdef HAVE_FTS_EXTTEXTCAT - if (fts_textcat_cache != NULL) - fts_textcat_unref(fts_textcat_cache); + if (textcat_cache != NULL) + textcat_unref(textcat_cache); #endif - pool_unref(&fts_languages_pool); + pool_unref(&languages_pool); } -void fts_language_register(const char *name) +void language_register(const char *name) { - struct fts_language *lang; + struct language *lang; - if (fts_language_find(name) != NULL) + if (language_find(name) != NULL) return; - lang = p_new(fts_languages_pool, struct fts_language, 1); - lang->name = p_strdup(fts_languages_pool, name); - array_push_back(&fts_languages, (const struct fts_language **)&lang); + lang = p_new(languages_pool, struct language, 1); + lang->name = p_strdup(languages_pool, name); + array_push_back(&languages, (const struct language **)&lang); } -const struct fts_language *fts_language_find(const char *name) +const struct language *language_find(const char *name) { - const struct fts_language *lang; + const struct language *lang; - array_foreach_elem(&fts_languages, lang) { + array_foreach_elem(&languages, lang) { if (strcmp(lang->name, name) == 0) return lang; } return NULL; } -int fts_language_list_init(const char *const *settings, - struct fts_language_list **list_r, - const char **error_r) +int language_list_init(const char *const *settings, + struct language_list **list_r, + const char **error_r) { - struct fts_language_list *lp; + struct language_list *lp; pool_t pool; unsigned int i; const char *conf = NULL, *data = NULL; @@ -150,8 +149,8 @@ int fts_language_list_init(const char *const *settings, } } - pool = pool_alloconly_create("fts_language_list", 128); - lp = p_new(pool, struct fts_language_list, 1); + pool = pool_alloconly_create("language_list", 128); + lp = p_new(pool, struct language_list, 1); lp->pool = pool; if (conf != NULL) lp->textcat_config = p_strdup(pool, conf); @@ -166,22 +165,22 @@ int fts_language_list_init(const char *const *settings, return 0; } -void fts_language_list_deinit(struct fts_language_list **list) +void language_list_deinit(struct language_list **list) { - struct fts_language_list *lp = *list; + struct language_list *lp = *list; *list = NULL; #ifdef HAVE_FTS_EXTTEXTCAT if (lp->textcat != NULL) - fts_textcat_unref(lp->textcat); + textcat_unref(lp->textcat); #endif pool_unref(&lp->pool); } -static const struct fts_language * -fts_language_list_find(struct fts_language_list *list, const char *name) +static const struct language * +language_list_find(struct language_list *list, const char *name) { - const struct fts_language *lang; + const struct language *lang; array_foreach_elem(&list->languages, lang) { if (strcmp(lang->name, name) == 0) @@ -190,52 +189,52 @@ fts_language_list_find(struct fts_language_list *list, const char *name) return NULL; } -void fts_language_list_add(struct fts_language_list *list, - const struct fts_language *lang) +void language_list_add(struct language_list *list, + const struct language *lang) { - i_assert(fts_language_list_find(list, lang->name) == NULL); + i_assert(language_list_find(list, lang->name) == NULL); array_push_back(&list->languages, &lang); } -bool fts_language_list_add_names(struct fts_language_list *list, - const char *names, - const char **unknown_name_r) +bool language_list_add_names(struct language_list *list, + const char *names, + const char **unknown_name_r) { const char *const *langs; - const struct fts_language *lang; + const struct language *lang; for (langs = t_strsplit_spaces(names, ", "); *langs != NULL; langs++) { - lang = fts_language_find(*langs); + lang = language_find(*langs); if (lang == NULL) { /* unknown language */ *unknown_name_r = *langs; return FALSE; } - if (fts_language_list_find(list, lang->name) == NULL) - fts_language_list_add(list, lang); + if (language_list_find(list, lang->name) == NULL) + language_list_add(list, lang); } return TRUE; } -const ARRAY_TYPE(fts_language) * -fts_language_list_get_all(struct fts_language_list *list) +const ARRAY_TYPE(language) * +language_list_get_all(struct language_list *list) { return &list->languages; } -const struct fts_language * -fts_language_list_get_first(struct fts_language_list *list) +const struct language * +language_list_get_first(struct language_list *list) { - const struct fts_language *const *langp; + const struct language *const *langp; langp = array_front(&list->languages); return *langp; } #ifdef HAVE_FTS_EXTTEXTCAT -static bool fts_language_match_lists(struct fts_language_list *list, - candidate_t *candp, int candp_len, - const struct fts_language **lang_r) +static bool language_match_lists(struct language_list *list, + candidate_t *candp, int candp_len, + const struct language **lang_r) { const char *name; @@ -247,7 +246,7 @@ static bool fts_language_match_lists(struct fts_language_list *list, /* For Norwegian we treat both bokmal and nynorsk as "no". */ if (strcmp(name, "nb") == 0 || strcmp(name, "nn") == 0) name = "no"; - if ((*lang_r = fts_language_list_find(list, name)) != NULL) + if ((*lang_r = language_list_find(list, name)) != NULL) return TRUE; } return FALSE; @@ -255,8 +254,8 @@ static bool fts_language_match_lists(struct fts_language_list *list, #endif #ifdef HAVE_FTS_EXTTEXTCAT -static int fts_language_textcat_init(struct fts_language_list *list, - const char **error_r) +static int language_textcat_init(struct language_list *list, + const char **error_r) { const char *config_path; const char *data_dir; @@ -274,26 +273,26 @@ static int fts_language_textcat_init(struct fts_language_list *list, TEXTCAT_DATADIR"/fpdb.conf"; data_dir = list->textcat_datadir != NULL ? list->textcat_datadir : TEXTCAT_DATADIR"/"; - if (fts_textcat_cache != NULL) { - if (strcmp(fts_textcat_cache->config_path, config_path) == 0 && - strcmp(fts_textcat_cache->data_dir, data_dir) == 0) { - list->textcat = fts_textcat_cache; + if (textcat_cache != NULL) { + if (strcmp(textcat_cache->config_path, config_path) == 0 && + strcmp(textcat_cache->data_dir, data_dir) == 0) { + list->textcat = textcat_cache; list->textcat->refcount++; return 0; } - fts_textcat_unref(fts_textcat_cache); + textcat_unref(textcat_cache); } - fts_textcat_cache = list->textcat = i_new(struct fts_textcat, 1); - fts_textcat_cache->refcount = 2; - fts_textcat_cache->config_path = i_strdup(config_path); - fts_textcat_cache->data_dir = i_strdup(data_dir); - fts_textcat_cache->handle = special_textcat_Init(config_path, data_dir); - if (fts_textcat_cache->handle == NULL) { - fts_textcat_cache->failed = i_strdup_printf( + textcat_cache = list->textcat = i_new(struct textcat, 1); + textcat_cache->refcount = 2; + textcat_cache->config_path = i_strdup(config_path); + textcat_cache->data_dir = i_strdup(data_dir); + textcat_cache->handle = special_textcat_Init(config_path, data_dir); + if (textcat_cache->handle == NULL) { + textcat_cache->failed = i_strdup_printf( "special_textcat_Init(%s, %s) failed", config_path, data_dir); - *error_r = fts_textcat_cache->failed; + *error_r = textcat_cache->failed; return -1; } /* The textcat minimum document size could be set here. It @@ -302,20 +301,20 @@ static int fts_language_textcat_init(struct fts_language_list *list, } #endif -static enum fts_language_result -fts_language_detect_textcat(struct fts_language_list *list ATTR_UNUSED, - const unsigned char *text ATTR_UNUSED, - size_t size ATTR_UNUSED, - const struct fts_language **lang_r ATTR_UNUSED, - const char **error_r ATTR_UNUSED) +static enum language_result +language_detect_textcat(struct language_list *list ATTR_UNUSED, + const unsigned char *text ATTR_UNUSED, + size_t size ATTR_UNUSED, + const struct language **lang_r ATTR_UNUSED, + const char **error_r ATTR_UNUSED) { #ifdef HAVE_FTS_EXTTEXTCAT candidate_t *candp; /* textcat candidate result array pointer */ int cnt; bool match = FALSE; - if (fts_language_textcat_init(list, error_r) < 0) - return FTS_LANGUAGE_RESULT_ERROR; + if (language_textcat_init(list, error_r) < 0) + return LANGUAGE_RESULT_ERROR; candp = textcat_GetClassifyFullOutput(list->textcat->handle); if (candp == NULL) @@ -324,45 +323,45 @@ fts_language_detect_textcat(struct fts_language_list *list ATTR_UNUSED, I_MIN(size, DETECT_STR_MAX_LEN), candp); if (cnt > 0) { T_BEGIN { - match = fts_language_match_lists(list, candp, cnt, lang_r); + match = language_match_lists(list, candp, cnt, lang_r); } T_END; textcat_ReleaseClassifyFullOutput(list->textcat->handle, candp); if (match) - return FTS_LANGUAGE_RESULT_OK; + return LANGUAGE_RESULT_OK; else - return FTS_LANGUAGE_RESULT_UNKNOWN; + return LANGUAGE_RESULT_UNKNOWN; } else { textcat_ReleaseClassifyFullOutput(list->textcat->handle, candp); switch (cnt) { case TEXTCAT_RESULT_SHORT: i_assert(size < DETECT_STR_MAX_LEN); - return FTS_LANGUAGE_RESULT_SHORT; + return LANGUAGE_RESULT_SHORT; case TEXTCAT_RESULT_UNKNOWN: - return FTS_LANGUAGE_RESULT_UNKNOWN; + return LANGUAGE_RESULT_UNKNOWN; default: i_unreached(); } } #else - return FTS_LANGUAGE_RESULT_UNKNOWN; + return LANGUAGE_RESULT_UNKNOWN; #endif } -enum fts_language_result -fts_language_detect(struct fts_language_list *list, - const unsigned char *text ATTR_UNUSED, - size_t size ATTR_UNUSED, - const struct fts_language **lang_r, - const char **error_r) +enum language_result +language_detect(struct language_list *list, + const unsigned char *text ATTR_UNUSED, + size_t size ATTR_UNUSED, + const struct language **lang_r, + const char **error_r) { i_assert(array_count(&list->languages) > 0); /* if there's only a single wanted language, return it always. */ if (array_count(&list->languages) == 1) { - const struct fts_language *const *langp = + const struct language *const *langp = array_front(&list->languages); *lang_r = *langp; - return FTS_LANGUAGE_RESULT_OK; + return LANGUAGE_RESULT_OK; } - return fts_language_detect_textcat(list, text, size, lang_r, error_r); + return language_detect_textcat(list, text, size, lang_r, error_r); } diff --git a/src/lib-language/language.h b/src/lib-language/language.h index 884998f07f..91c3665cd1 100644 --- a/src/lib-language/language.h +++ b/src/lib-language/language.h @@ -1,72 +1,71 @@ -#ifndef FTS_LANGUAGE_H -#define FTS_LANGUAGE_H +#ifndef LANGUAGE_H +#define LANGUAGE_H -struct fts_language_list; +struct language_list; -enum fts_language_result { +enum language_result { /* Provided sample is too short. */ - FTS_LANGUAGE_RESULT_SHORT, + LANGUAGE_RESULT_SHORT, /* Language is unknown or not in the provided list . */ - FTS_LANGUAGE_RESULT_UNKNOWN, + LANGUAGE_RESULT_UNKNOWN, - FTS_LANGUAGE_RESULT_OK, + LANGUAGE_RESULT_OK, /* textcat library initialization failed. */ - FTS_LANGUAGE_RESULT_ERROR + LANGUAGE_RESULT_ERROR }; -struct fts_language { +struct language { /* Two-letter language name lowercased, e.g. "en" */ const char *name; }; -ARRAY_DEFINE_TYPE(fts_language, const struct fts_language *); +ARRAY_DEFINE_TYPE(language, const struct language *); /* Used for raw data that is indexed. This data shouldn't go through any language-specific filters. */ -extern const struct fts_language fts_language_data; +extern const struct language language_data; /* Language module API. */ -void fts_languages_init(void); -void fts_languages_deinit(void); +void languages_init(void); +void languages_deinit(void); /* Add a language to the list of supported languages. */ -void fts_language_register(const char *name); +void language_register(const char *name); /* Find a specified language by name. This finds from the internal list of supported languages. */ -const struct fts_language *fts_language_find(const char *name); +const struct language *language_find(const char *name); /* Language list API */ -int fts_language_list_init(const char *const *settings, - struct fts_language_list **list_r, - const char **error_r); -void fts_language_list_deinit(struct fts_language_list **list); +int language_list_init(const char *const *settings, + struct language_list **list_r, + const char **error_r); +void language_list_deinit(struct language_list **list); /* Add a language to the list of wanted languages. */ -void fts_language_list_add(struct fts_language_list *list, - const struct fts_language *lang); +void language_list_add(struct language_list *list, + const struct language *lang); /* Add wanted languages from a space-separated list of language names. Duplicates are ignored. Returns TRUE if ok, FALSE and unknown_name if an unknown language was found from the list. */ -bool fts_language_list_add_names(struct fts_language_list *list, - const char *names, - const char **unknown_name_r); +bool language_list_add_names(struct language_list *list, + const char *names, + const char **unknown_name_r); /* Return an array of all wanted languages. */ -const ARRAY_TYPE(fts_language) * -fts_language_list_get_all(struct fts_language_list *list); +const ARRAY_TYPE(language) * language_list_get_all(struct language_list *list); /* Returns the first wanted language (default language). */ -const struct fts_language * -fts_language_list_get_first(struct fts_language_list *list); +const struct language * +language_list_get_first(struct language_list *list); /* If text was detected to be one of the languages in the list, - returns FTS_LANGUAGE_RESULT_OK and (a pointer to) the language (in - the list). error_r is set for FTS_LANGUAGE_RESULT_ERROR. */ -enum fts_language_result -fts_language_detect(struct fts_language_list *list, - const unsigned char *text, size_t size, - const struct fts_language **lang_r, - const char **error_r); + returns LANGUAGE_RESULT_OK and (a pointer to) the language (in + the list). error_r is set for LANGUAGE_RESULT_ERROR. */ +enum language_result +language_detect(struct language_list *list, + const unsigned char *text, size_t size, + const struct language **lang_r, + const char **error_r); #endif diff --git a/src/lib-language/test-lang-filter.c b/src/lib-language/test-lang-filter.c index 00014a2f2e..7d240c6e41 100644 --- a/src/lib-language/test-lang-filter.c +++ b/src/lib-language/test-lang-filter.c @@ -11,38 +11,38 @@ #include static const char *const stopword_settings[] = {"stopwords_dir", TEST_STOPWORDS_DIR, NULL}; -static struct fts_language english_language = { .name = "en" }; -static struct fts_language french_language = { .name = "fr" }; -static struct fts_language norwegian_language = { .name = "no" }; +static struct language english_language = { .name = "en" }; +static struct language french_language = { .name = "fr" }; +static struct language norwegian_language = { .name = "no" }; #if defined(HAVE_LIBICU) && defined(HAVE_FTS_STEMMER) -static struct fts_language swedish_language = { .name = "sv" }; +static struct language swedish_language = { .name = "sv" }; #endif -static void test_fts_filter_find(void) +static void test_lang_filter_find(void) { - test_begin("fts filter find"); - test_assert(fts_filter_find("stopwords") == fts_filter_stopwords); - test_assert(fts_filter_find("snowball") == fts_filter_stemmer_snowball); - test_assert(fts_filter_find("normalizer-icu") == fts_filter_normalizer_icu); - test_assert(fts_filter_find("lowercase") == fts_filter_lowercase); - test_assert(fts_filter_find("contractions") == fts_filter_contractions); + test_begin("lang filter find"); + test_assert(lang_filter_find("stopwords") == lang_filter_stopwords); + test_assert(lang_filter_find("snowball") == lang_filter_stemmer_snowball); + test_assert(lang_filter_find("normalizer-icu") == lang_filter_normalizer_icu); + test_assert(lang_filter_find("lowercase") == lang_filter_lowercase); + test_assert(lang_filter_find("contractions") == lang_filter_contractions); test_end(); } -static void test_fts_filter_contractions_fail(void) +static void test_lang_filter_contractions_fail(void) { - struct fts_filter *filter; + struct lang_filter *filter; const char *error; - test_begin("fts filter contractions, unsupported language"); - test_assert(fts_filter_create(fts_filter_contractions, NULL, &english_language, NULL, &filter, &error) != 0); + test_begin("lang filter contractions, unsupported language"); + test_assert(lang_filter_create(lang_filter_contractions, NULL, &english_language, NULL, &filter, &error) != 0); test_assert(error != NULL); test_end(); } -static void test_fts_filter_contractions_fr(void) +static void test_lang_filter_contractions_fr(void) { static const struct { const char *input; @@ -68,29 +68,29 @@ static void test_fts_filter_contractions_fr(void) { "quelqu'un", "quelqu'un" }, { "l'esprit", "esprit" } }; - struct fts_filter *filter; + struct lang_filter *filter; const char *error; const char *token; unsigned int i; int ret; - test_begin("fts filter contractions, French"); - test_assert(fts_filter_create(fts_filter_contractions, NULL, &french_language, NULL, &filter, &error) == 0); + test_begin("lang filter contractions, French"); + test_assert(lang_filter_create(lang_filter_contractions, NULL, &french_language, NULL, &filter, &error) == 0); for (i = 0; i < N_ELEMENTS(tests); i++) { token = tests[i].input; - ret = fts_filter_filter(filter, &token, &error); + ret = lang_filter(filter, &token, &error); test_assert(ret >= 0); if (ret > 0) test_assert_idx(strcmp(token, tests[i].output) == 0, i); else if (ret == 0) test_assert_idx(token == NULL && tests[i].output == NULL, i); } - fts_filter_unref(&filter); + lang_filter_unref(&filter); test_end(); } -static void test_fts_filter_lowercase(void) +static void test_lang_filter_lowercase(void) { static const struct { const char *input; @@ -100,25 +100,25 @@ static void test_fts_filter_lowercase(void) { "FOO", "foo" }, { "fOo", "foo" } }; - struct fts_filter *filter; + struct lang_filter *filter; const char *error; const char *token; unsigned int i; - test_begin("fts filter lowercase"); - test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0); + test_begin("lang filter lowercase"); + test_assert(lang_filter_create(lang_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0); for (i = 0; i < N_ELEMENTS(tests); i++) { token = tests[i].input; - test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 && + test_assert_idx(lang_filter(filter, &token, &error) > 0 && strcmp(token, tests[i].output) == 0, 0); } - fts_filter_unref(&filter); + lang_filter_unref(&filter); test_end(); } #ifdef HAVE_LIBICU -static void test_fts_filter_lowercase_utf8(void) +static void test_lang_filter_lowercase_utf8(void) { static const struct { const char *input; @@ -128,24 +128,24 @@ static void test_fts_filter_lowercase_utf8(void) { "F\xC3\x85\xC3\x85", "f\xC3\xA5\xC3\xA5" }, { "F\xC3\x85\xC3\xA5", "f\xC3\xA5\xC3\xA5" } }; - struct fts_filter *filter; + struct lang_filter *filter; const char *error; const char *token; unsigned int i; - test_begin("fts filter lowercase, UTF8"); - test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0); + test_begin("lang filter lowercase, UTF8"); + test_assert(lang_filter_create(lang_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0); for (i = 0; i < N_ELEMENTS(tests); i++) { token = tests[i].input; - test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 && + test_assert_idx(lang_filter(filter, &token, &error) > 0 && strcmp(token, tests[i].output) == 0, 0); } - fts_filter_unref(&filter); + lang_filter_unref(&filter); test_end(); } -static void test_fts_filter_lowercase_too_long_utf8(void) +static void test_lang_filter_lowercase_too_long_utf8(void) { static const struct { const char *input; @@ -156,28 +156,28 @@ static void test_fts_filter_lowercase_too_long_utf8(void) { "abc\xC3\x85""defghijklmnopqrstuvwxyz", "abc\xC3\xA5""defghijklmnopqrstuvw" }, { "abcdefghijklmnopqrstuvwx\xC3\x85", "abcdefghijklmnopqrstuvwx" } }; - struct fts_filter *filter; + struct lang_filter *filter; const char *error; const char *token; const char * const settings[] = {"maxlen", "25", NULL}; unsigned int i; - test_begin("fts filter lowercase, too long UTF8"); - test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, settings, &filter, &error) == 0); + test_begin("lang filter lowercase, too long UTF8"); + test_assert(lang_filter_create(lang_filter_lowercase, NULL, &english_language, settings, &filter, &error) == 0); for (i = 0; i < N_ELEMENTS(tests); i++) { token = tests[i].input; - test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 && + test_assert_idx(lang_filter(filter, &token, &error) > 0 && strcmp(token, tests[i].output) == 0, 0); } - fts_filter_unref(&filter); + lang_filter_unref(&filter); test_end(); } #endif -static void test_fts_filter_stopwords_eng(void) +static void test_lang_filter_stopwords_eng(void) { - struct fts_filter *filter; + struct lang_filter *filter; const char *error; int ret; const char *input[] = {"an", "elephant", "and", "a", "bear", @@ -189,14 +189,14 @@ static void test_fts_filter_stopwords_eng(void) const char **ip, **op; const char *token; - test_begin("fts filter stopwords, English"); - test_assert(fts_filter_create(fts_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0); + test_begin("lang filter stopwords, English"); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0); ip = input; op = output; while (*ip != NULL) { token = *ip; - ret = fts_filter_filter(filter, &token, &error); + ret = lang_filter(filter, &token, &error); if (ret <= 0) { test_assert(ret == 0); test_assert(*op == NULL); @@ -208,15 +208,15 @@ static void test_fts_filter_stopwords_eng(void) ip++; } - fts_filter_unref(&filter); + lang_filter_unref(&filter); test_assert(filter == NULL); test_end(); } -static void test_fts_filter_stopwords_fin(void) +static void test_lang_filter_stopwords_fin(void) { - const struct fts_language finnish = { .name = "fi" }; - struct fts_filter *filter; + const struct language finnish = { .name = "fi" }; + struct lang_filter *filter; const char *error; int ret; const char *input[] = {"olla", "vaiko", "eik\xC3\xB6", "olla", @@ -230,14 +230,14 @@ static void test_fts_filter_stopwords_fin(void) const char **ip, **op; const char *token; - test_begin("fts filter stopwords, Finnish"); - test_assert(fts_filter_create(fts_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0); + test_begin("lang filter stopwords, Finnish"); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0); ip = input; op = output; while (*ip != NULL) { token = *ip; - ret = fts_filter_filter(filter, &token, &error); + ret = lang_filter(filter, &token, &error); if (ret <= 0) { test_assert(ret == 0); test_assert(*op == NULL); @@ -249,15 +249,15 @@ static void test_fts_filter_stopwords_fin(void) ip++; } - fts_filter_unref(&filter); + lang_filter_unref(&filter); test_assert(filter == NULL); - test_assert(fts_filter_create(fts_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0); ip = input2; op = output2; while (*ip != NULL) { token = *ip; - ret = fts_filter_filter(filter, &token, &error); + ret = lang_filter(filter, &token, &error); if (ret <= 0) { test_assert(ret == 0); test_assert(*op == NULL); @@ -269,14 +269,14 @@ static void test_fts_filter_stopwords_fin(void) ip++; } - fts_filter_unref(&filter); + lang_filter_unref(&filter); test_assert(filter == NULL); test_end(); } -static void test_fts_filter_stopwords_fra(void) +static void test_lang_filter_stopwords_fra(void) { - struct fts_filter *filter; + struct lang_filter *filter; const char *error; int ret; @@ -290,14 +290,14 @@ static void test_fts_filter_stopwords_fra(void) const char **ip, **op; const char *token; - test_begin("fts filter stopwords, French"); - test_assert(fts_filter_create(fts_filter_stopwords, NULL, &french_language, stopword_settings, &filter, &error) == 0); + test_begin("lang filter stopwords, French"); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, &french_language, stopword_settings, &filter, &error) == 0); ip = input; op = output; while (*ip != NULL) { token = *ip; - ret = fts_filter_filter(filter, &token, &error); + ret = lang_filter(filter, &token, &error); if (ret <= 0) { test_assert(ret == 0); test_assert(*op == NULL); @@ -309,14 +309,14 @@ static void test_fts_filter_stopwords_fra(void) ip++; } - fts_filter_unref(&filter); + lang_filter_unref(&filter); test_assert(filter == NULL); test_end(); } -static void test_fts_filter_stopwords_no(void) +static void test_lang_filter_stopwords_no(void) { - struct fts_filter *filter; + struct lang_filter *filter; const char *error; int ret; @@ -338,14 +338,14 @@ static void test_fts_filter_stopwords_no(void) const char **ip, **op; const char *token; - test_begin("fts filter stopwords, Norwegian"); - test_assert(fts_filter_create(fts_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0); + test_begin("lang filter stopwords, Norwegian"); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0); ip = input; op = output; while (*ip != NULL) { token = *ip; - ret = fts_filter_filter(filter, &token, &error); + ret = lang_filter(filter, &token, &error); if (ret <= 0) { test_assert(ret == 0); test_assert(*op == NULL); @@ -357,46 +357,46 @@ static void test_fts_filter_stopwords_no(void) ip++; } - fts_filter_unref(&filter); + lang_filter_unref(&filter); test_assert(filter == NULL); test_end(); } -static void test_fts_filter_stopwords_fail_lazy_init(void) +static void test_lang_filter_stopwords_fail_lazy_init(void) { - const struct fts_language unknown = { .name = "bebobidoop" }; - struct fts_filter *filter = NULL; + const struct language unknown = { .name = "bebobidoop" }; + struct lang_filter *filter = NULL; const char *error = NULL, *token = "foobar"; - test_begin("fts filter stopwords, fail filter() (lazy init)"); - test_assert(fts_filter_create(fts_filter_stopwords, NULL, &unknown, stopword_settings, &filter, &error) == 0); + test_begin("lang filter stopwords, fail filter() (lazy init)"); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, &unknown, stopword_settings, &filter, &error) == 0); test_assert(filter != NULL && error == NULL); - test_assert(fts_filter_filter(filter, &token, &error) < 0 && error != NULL); - fts_filter_unref(&filter); + test_assert(lang_filter(filter, &token, &error) < 0 && error != NULL); + lang_filter_unref(&filter); test_end(); } -static void test_fts_filter_stopwords_malformed(void) +static void test_lang_filter_stopwords_malformed(void) { - const struct fts_language malformed = { .name = "malformed" }; - struct fts_filter *filter = NULL; + const struct language malformed = { .name = "malformed" }; + struct lang_filter *filter = NULL; const char *error = NULL, *token = "foobar"; - test_begin("fts filter stopwords, malformed list"); - test_assert(fts_filter_create(fts_filter_stopwords, NULL, &malformed, stopword_settings, &filter, &error) == 0); - test_assert(fts_filter_filter(filter, &token, &error) < 0); + test_begin("lang filter stopwords, malformed list"); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, &malformed, stopword_settings, &filter, &error) == 0); + test_assert(lang_filter(filter, &token, &error) < 0); test_assert(strstr(error, "seems empty. Is the file correctly formatted?") != NULL); test_expect_no_more_errors(); - fts_filter_unref(&filter); + lang_filter_unref(&filter); test_end(); } #ifdef HAVE_FTS_STEMMER -static void test_fts_filter_stemmer_snowball_stem_english(void) +static void test_lang_filter_stemmer_snowball_stem_english(void) { - struct fts_filter *stemmer; + struct lang_filter *stemmer; const char *error; const char *token = NULL; const char * const tokens[] = { @@ -414,24 +414,24 @@ static void test_fts_filter_stemmer_snowball_stem_english(void) const char * const *tpp; const char * const *bpp; - test_begin("fts filter stem English"); - test_assert(fts_filter_create(fts_filter_stemmer_snowball, NULL, &english_language, NULL, &stemmer, &error) == 0); + test_begin("lang filter stem English"); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, NULL, &english_language, NULL, &stemmer, &error) == 0); bpp = bases; for (tpp=tokens; *tpp != NULL; tpp++) { token = *tpp; - test_assert(fts_filter_filter(stemmer, &token, &error) > 0); + test_assert(lang_filter(stemmer, &token, &error) > 0); test_assert(token != NULL); test_assert(null_strcmp(token, *bpp) == 0); bpp++; } - fts_filter_unref(&stemmer); + lang_filter_unref(&stemmer); test_assert(stemmer == NULL); test_end(); } -static void test_fts_filter_stemmer_snowball_stem_french(void) +static void test_lang_filter_stemmer_snowball_stem_french(void) { - struct fts_filter *stemmer; + struct lang_filter *stemmer; const char *error; const char *token = NULL; const char * const tokens[] = { @@ -444,26 +444,26 @@ static void test_fts_filter_stemmer_snowball_stem_french(void) const char * const *tpp; const char * const *bpp; - test_begin("fts filter stem French"); - test_assert(fts_filter_create(fts_filter_stemmer_snowball, NULL, &french_language, NULL, &stemmer, &error) == 0); + test_begin("lang filter stem French"); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, NULL, &french_language, NULL, &stemmer, &error) == 0); bpp = bases; for (tpp=tokens; *tpp != NULL; tpp++) { token = *tpp; - test_assert(fts_filter_filter(stemmer, &token, &error) > 0); + test_assert(lang_filter(stemmer, &token, &error) > 0); test_assert(token != NULL); test_assert(null_strcmp(token, *bpp) == 0); bpp++; } - fts_filter_unref(&stemmer); + lang_filter_unref(&stemmer); test_assert(stemmer == NULL); test_end(); } -static void test_fts_filter_stopwords_stemmer_eng(void) +static void test_lang_filter_stopwords_stemmer_eng(void) { int ret; - struct fts_filter *stemmer; - struct fts_filter *filter; + struct lang_filter *stemmer; + struct lang_filter *filter; const char *error; const char *token = NULL; const char * const tokens[] = { @@ -481,15 +481,15 @@ static void test_fts_filter_stopwords_stemmer_eng(void) const char * const *tpp; const char * const *bpp; - test_begin("fts filters stopwords and stemming chained, English"); + test_begin("lang filters stopwords and stemming chained, English"); - test_assert(fts_filter_create(fts_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0); - test_assert(fts_filter_create(fts_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0); bpp = bases; for (tpp=tokens; *tpp != NULL; tpp++) { token = *tpp; - ret = fts_filter_filter(stemmer, &token, &error); + ret = lang_filter(stemmer, &token, &error); test_assert(ret >= 0); if (ret == 0) test_assert(*bpp == NULL); @@ -499,8 +499,8 @@ static void test_fts_filter_stopwords_stemmer_eng(void) } bpp++; } - fts_filter_unref(&stemmer); - fts_filter_unref(&filter); + lang_filter_unref(&stemmer); + lang_filter_unref(&filter); test_assert(stemmer == NULL); test_assert(filter == NULL); test_end(); @@ -508,9 +508,9 @@ static void test_fts_filter_stopwords_stemmer_eng(void) #endif #ifdef HAVE_LIBICU -static void test_fts_filter_normalizer_swedish_short(void) +static void test_lang_filter_normalizer_swedish_short(void) { - struct fts_filter *norm = NULL; + struct lang_filter *norm = NULL; const char *input[] = { "Vem", "\xC3\x85", @@ -530,22 +530,22 @@ static void test_fts_filter_normalizer_swedish_short(void) const char *token = NULL; unsigned int i; - test_begin("fts filter normalizer Swedish short text"); + test_begin("lang filter normalizer Swedish short text"); - test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); for (i = 0; i < N_ELEMENTS(input); i++) { token = input[i]; - test_assert_idx(fts_filter_filter(norm, &token, &error) == 1, i); + test_assert_idx(lang_filter(norm, &token, &error) == 1, i); test_assert_idx(null_strcmp(token, expected_output[i]) == 0, i); } - fts_filter_unref(&norm); + lang_filter_unref(&norm); test_assert(norm == NULL); test_end(); } -static void test_fts_filter_normalizer_swedish_short_default_id(void) +static void test_lang_filter_normalizer_swedish_short_default_id(void) { - struct fts_filter *norm = NULL; + struct lang_filter *norm = NULL; const char *input[] = { "Vem", "\xC3\x85", @@ -563,24 +563,24 @@ static void test_fts_filter_normalizer_swedish_short_default_id(void) const char *token = NULL; unsigned int i; - test_begin("fts filter normalizer Swedish short text using default ID"); + test_begin("lang filter normalizer Swedish short text using default ID"); - test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, NULL, &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, NULL, &norm, &error) == 0); for (i = 0; i < N_ELEMENTS(input); i++) { token = input[i]; - test_assert_idx(fts_filter_filter(norm, &token, &error) == 1, i); + test_assert_idx(lang_filter(norm, &token, &error) == 1, i); test_assert_idx(null_strcmp(token, expected_output[i]) == 0, i); } - fts_filter_unref(&norm); + lang_filter_unref(&norm); test_assert(norm == NULL); test_end(); } /* UDHRDIR comes from Automake AM_CPPFLAGS */ #define UDHR_FRA_NAME "/udhr_fra.txt" -static void test_fts_filter_normalizer_french(void) +static void test_lang_filter_normalizer_french(void) { - struct fts_filter *norm = NULL; + struct lang_filter *norm = NULL; FILE *input; const char * const settings[] = {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove", NULL}; @@ -600,16 +600,16 @@ static void test_fts_filter_normalizer_french(void) 0x8c, 0xd6, 0x7a, 0xb7, 0xc5, 0xc6, 0x85, 0x00}; const char *udhr_path; - test_begin("fts filter normalizer French UDHR"); + test_begin("lang filter normalizer French UDHR"); udhr_path = t_strconcat(UDHRDIR, UDHR_FRA_NAME, NULL); - test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); input = fopen(udhr_path, "r"); test_assert(input != NULL); sha512_init(&ctx); while (NULL != fgets(buf, sizeof(buf), input)) { tokens = buf; - if (fts_filter_filter(norm, &tokens, &error) != 1){ + if (lang_filter(norm, &tokens, &error) != 1){ break; } sha512_loop(&ctx, tokens, strlen(tokens)); @@ -618,12 +618,12 @@ static void test_fts_filter_normalizer_french(void) sha512_result(&ctx, sha512_digest); test_assert(memcmp(sha512_digest, correct_digest, sizeof(sha512_digest)) == 0); - fts_filter_unref(&norm); + lang_filter_unref(&norm); test_assert(norm == NULL); test_end(); } -static void test_fts_filter_normalizer_empty(void) +static void test_lang_filter_normalizer_empty(void) { /* test just a couple of these */ static const char *empty_tokens[] = { @@ -634,32 +634,32 @@ static void test_fts_filter_normalizer_empty(void) }; const char * const settings[] = {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; [\\x20] Remove", NULL}; - struct fts_filter *norm; + struct lang_filter *norm; const char *error; unsigned int i; - test_begin("fts filter normalizer empty tokens"); - test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); + test_begin("lang filter normalizer empty tokens"); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); for (i = 0; i < N_ELEMENTS(empty_tokens); i++) { const char *token = empty_tokens[i]; - test_assert_idx(fts_filter_filter(norm, &token, &error) == 0, i); + test_assert_idx(lang_filter(norm, &token, &error) == 0, i); } - fts_filter_unref(&norm); + lang_filter_unref(&norm); test_end(); } -static void test_fts_filter_normalizer_baddata(void) +static void test_lang_filter_normalizer_baddata(void) { const char * const settings[] = {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove", NULL}; - struct fts_filter *norm; + struct lang_filter *norm; const char *token, *error; string_t *str; unichar_t i; - test_begin("fts filter normalizer bad data"); + test_begin("lang filter normalizer bad data"); - test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); str = t_str_new(128); for (i = 1; i < 0x1ffff; i++) { if (!uni_is_valid_ucs4(i)) continue; @@ -667,38 +667,38 @@ static void test_fts_filter_normalizer_baddata(void) uni_ucs4_to_utf8_c(i, str); token = str_c(str); T_BEGIN { - test_assert_idx(fts_filter_filter(norm, &token, &error) >= 0, i); + test_assert_idx(lang_filter(norm, &token, &error) >= 0, i); } T_END; } str_truncate(str, 0); uni_ucs4_to_utf8_c(UNICHAR_T_MAX, str); token = str_c(str); - test_assert(fts_filter_filter(norm, &token, &error) >= 0); + test_assert(lang_filter(norm, &token, &error) >= 0); - fts_filter_unref(&norm); + lang_filter_unref(&norm); test_end(); } -static void test_fts_filter_normalizer_invalid_id(void) +static void test_lang_filter_normalizer_invalid_id(void) { - struct fts_filter *norm = NULL; + struct lang_filter *norm = NULL; const char *settings[] = {"id", "Any-One-Out-There; DKFN; [: Nonspacing Mark :] Remove", NULL}; const char *error = NULL, *token = "foo"; - test_begin("fts filter normalizer invalid id"); - test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); + test_begin("lang filter normalizer invalid id"); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); test_assert(error == NULL); - test_assert(fts_filter_filter(norm, &token, &error) < 0 && error != NULL); - fts_filter_unref(&norm); + test_assert(lang_filter(norm, &token, &error) < 0 && error != NULL); + lang_filter_unref(&norm); test_end(); } -static void test_fts_filter_normalizer_oversized(void) +static void test_lang_filter_normalizer_oversized(void) { - struct fts_filter *norm = NULL; + struct lang_filter *norm = NULL; const char *settings[] = {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove", "maxlen", "250", NULL}; @@ -720,41 +720,41 @@ static void test_fts_filter_normalizer_oversized(void) "\xe6\xae\xb4\xe9\x8a\x85\xc4\xb9\xe4\x90\xb2\xe9\x96\xad\xef\x90" "\x9c\xe5\xa6\xae\xe9\x93\x91\xe8\x87\xa1"; - test_begin("fts filter normalizer over-sized token"); - test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); + test_begin("lang filter normalizer over-sized token"); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); test_assert(error == NULL); - test_assert(fts_filter_filter(norm, &token, &error) >= 0); + test_assert(lang_filter(norm, &token, &error) >= 0); test_assert(strlen(token) <= 250); - fts_filter_unref(&norm); + lang_filter_unref(&norm); test_end(); } -static void test_fts_filter_normalizer_truncation(void) +static void test_lang_filter_normalizer_truncation(void) { - struct fts_filter *norm = NULL; + struct lang_filter *norm = NULL; const char *settings[] = {"id", "Any-Lower;", "maxlen", "10", NULL}; const char *error = NULL; const char *token = "abcdefghi\xC3\x85"; - test_begin("fts filter normalizer token truncated mid letter"); - test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, + test_begin("lang filter normalizer token truncated mid letter"); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0); test_assert(error == NULL); - test_assert(fts_filter_filter(norm, &token, &error) >= 0); + test_assert(lang_filter(norm, &token, &error) >= 0); test_assert(strcmp(token, "abcdefghi") == 0); - fts_filter_unref(&norm); + lang_filter_unref(&norm); test_end(); } #ifdef HAVE_FTS_STEMMER -static void test_fts_filter_normalizer_stopwords_stemmer_eng(void) +static void test_lang_filter_normalizer_stopwords_stemmer_eng(void) { int ret; - struct fts_filter *normalizer; - struct fts_filter *stemmer; - struct fts_filter *filter; + struct lang_filter *normalizer; + struct lang_filter *stemmer; + struct lang_filter *filter; const char *error; const char * const id_settings[] = //{"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC", NULL}; @@ -776,16 +776,16 @@ static void test_fts_filter_normalizer_stopwords_stemmer_eng(void) const char * const *tpp; const char * const *bpp; - test_begin("fts filters normalizer, stopwords and stemming chained, English"); + test_begin("lang filters normalizer, stopwords and stemming chained, English"); - test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, id_settings, &normalizer, &error) == 0); - test_assert(fts_filter_create(fts_filter_stopwords, normalizer, &english_language, stopword_settings, &filter, &error) == 0); - test_assert(fts_filter_create(fts_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, NULL, id_settings, &normalizer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, normalizer, &english_language, stopword_settings, &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0); bpp = bases; for (tpp = tokens; *tpp != NULL; tpp++) { token = *tpp; - ret = fts_filter_filter(stemmer, &token, &error); + ret = lang_filter(stemmer, &token, &error); if (ret <= 0) { test_assert(ret == 0); test_assert(*bpp == NULL); @@ -795,21 +795,21 @@ static void test_fts_filter_normalizer_stopwords_stemmer_eng(void) } bpp++; } - fts_filter_unref(&stemmer); - fts_filter_unref(&filter); - fts_filter_unref(&normalizer); + lang_filter_unref(&stemmer); + lang_filter_unref(&filter); + lang_filter_unref(&normalizer); test_assert(stemmer == NULL); test_assert(filter == NULL); test_assert(normalizer == NULL); test_end(); } -static void test_fts_filter_stopwords_normalizer_stemmer_no(void) +static void test_lang_filter_stopwords_normalizer_stemmer_no(void) { int ret; - struct fts_filter *normalizer; - struct fts_filter *stemmer; - struct fts_filter *filter; + struct lang_filter *normalizer; + struct lang_filter *stemmer; + struct lang_filter *filter; const char *error; const char *token = NULL; const char * const tokens[] = { @@ -838,16 +838,16 @@ static void test_fts_filter_stopwords_normalizer_stemmer_no(void) const char * const *tpp; const char * const *bpp; - test_begin("fts filters with stopwords, default normalizer and stemming chained, Norwegian"); + test_begin("lang filters with stopwords, default normalizer and stemming chained, Norwegian"); - test_assert(fts_filter_create(fts_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0); - test_assert(fts_filter_create(fts_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0); - test_assert(fts_filter_create(fts_filter_stemmer_snowball, normalizer, &norwegian_language, NULL, &stemmer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, normalizer, &norwegian_language, NULL, &stemmer, &error) == 0); bpp = bases; for (tpp = tokens; *tpp != NULL; tpp++) { token = *tpp; - ret = fts_filter_filter(stemmer, &token, &error); + ret = lang_filter(stemmer, &token, &error); if (ret <= 0) { test_assert(ret == 0); test_assert(*bpp == NULL); @@ -857,21 +857,21 @@ static void test_fts_filter_stopwords_normalizer_stemmer_no(void) } bpp++; } - fts_filter_unref(&stemmer); - fts_filter_unref(&normalizer); - fts_filter_unref(&filter); + lang_filter_unref(&stemmer); + lang_filter_unref(&normalizer); + lang_filter_unref(&filter); test_assert(stemmer == NULL); test_assert(filter == NULL); test_assert(normalizer == NULL); test_end(); } -static void test_fts_filter_stopwords_normalizer_stemmer_sv(void) +static void test_lang_filter_stopwords_normalizer_stemmer_sv(void) { int ret; - struct fts_filter *normalizer; - struct fts_filter *stemmer; - struct fts_filter *filter; + struct lang_filter *normalizer; + struct lang_filter *stemmer; + struct lang_filter *filter; const char *error; const char *token = NULL; const char * const tokens[] = { @@ -888,17 +888,17 @@ static void test_fts_filter_stopwords_normalizer_stemmer_sv(void) const char * const *tpp; const char * const *bpp; - test_begin("fts filters with stopwords, default normalizer and stemming chained, Swedish"); + test_begin("lang filters with stopwords, default normalizer and stemming chained, Swedish"); - test_assert(fts_filter_create(fts_filter_stopwords, NULL, &swedish_language, stopword_settings, &filter, &error) == 0); - test_assert(fts_filter_create(fts_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0); - test_assert(fts_filter_create(fts_filter_stemmer_snowball, normalizer, &swedish_language, NULL, &stemmer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, &swedish_language, stopword_settings, &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, normalizer, &swedish_language, NULL, &stemmer, &error) == 0); bpp = bases; for (tpp = tokens; *tpp != NULL; tpp++) { token = *tpp; - ret = fts_filter_filter(stemmer, &token, &error); + ret = lang_filter(stemmer, &token, &error); if (ret <= 0) { test_assert(ret == 0); test_assert(*bpp == NULL); @@ -908,9 +908,9 @@ static void test_fts_filter_stopwords_normalizer_stemmer_sv(void) } bpp++; } - fts_filter_unref(&stemmer); - fts_filter_unref(&normalizer); - fts_filter_unref(&filter); + lang_filter_unref(&stemmer); + lang_filter_unref(&normalizer); + lang_filter_unref(&filter); test_assert(stemmer == NULL); test_assert(filter == NULL); test_assert(normalizer == NULL); @@ -919,9 +919,9 @@ static void test_fts_filter_stopwords_normalizer_stemmer_sv(void) #endif #endif -static void test_fts_filter_english_possessive(void) +static void test_lang_filter_english_possessive(void) { - struct fts_filter *norm = NULL; + struct lang_filter *norm = NULL; const char *input[] = { "foo'", @@ -960,15 +960,15 @@ static void test_fts_filter_english_possessive(void) const char *token = NULL; unsigned int i; - test_begin("fts filter english possessive"); + test_begin("lang filter english possessive"); - test_assert(fts_filter_create(fts_filter_english_possessive, NULL, NULL, NULL, &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_english_possessive, NULL, NULL, NULL, &norm, &error) == 0); for (i = 0; i < N_ELEMENTS(input); i++) { token = input[i]; - test_assert_idx(fts_filter_filter(norm, &token, &error) == 1, i); + test_assert_idx(lang_filter(norm, &token, &error) == 1, i); test_assert_idx(null_strcmp(token, expected_output[i]) == 0, i); } - fts_filter_unref(&norm); + lang_filter_unref(&norm); test_assert(norm == NULL); test_end(); } @@ -979,47 +979,47 @@ static void test_fts_filter_english_possessive(void) int main(void) { static void (*const test_functions[])(void) = { - test_fts_filter_find, - test_fts_filter_contractions_fail, - test_fts_filter_contractions_fr, - test_fts_filter_lowercase, + test_lang_filter_find, + test_lang_filter_contractions_fail, + test_lang_filter_contractions_fr, + test_lang_filter_lowercase, #ifdef HAVE_LIBICU - test_fts_filter_lowercase_utf8, - test_fts_filter_lowercase_too_long_utf8, + test_lang_filter_lowercase_utf8, + test_lang_filter_lowercase_too_long_utf8, #endif - test_fts_filter_stopwords_eng, - test_fts_filter_stopwords_fin, - test_fts_filter_stopwords_fra, - test_fts_filter_stopwords_no, - test_fts_filter_stopwords_fail_lazy_init, - test_fts_filter_stopwords_malformed, + test_lang_filter_stopwords_eng, + test_lang_filter_stopwords_fin, + test_lang_filter_stopwords_fra, + test_lang_filter_stopwords_no, + test_lang_filter_stopwords_fail_lazy_init, + test_lang_filter_stopwords_malformed, #ifdef HAVE_FTS_STEMMER - test_fts_filter_stemmer_snowball_stem_english, - test_fts_filter_stemmer_snowball_stem_french, - test_fts_filter_stopwords_stemmer_eng, + test_lang_filter_stemmer_snowball_stem_english, + test_lang_filter_stemmer_snowball_stem_french, + test_lang_filter_stopwords_stemmer_eng, #endif #ifdef HAVE_LIBICU - test_fts_filter_normalizer_swedish_short, - test_fts_filter_normalizer_swedish_short_default_id, - test_fts_filter_normalizer_french, - test_fts_filter_normalizer_empty, - test_fts_filter_normalizer_baddata, - test_fts_filter_normalizer_invalid_id, - test_fts_filter_normalizer_oversized, - test_fts_filter_normalizer_truncation, + test_lang_filter_normalizer_swedish_short, + test_lang_filter_normalizer_swedish_short_default_id, + test_lang_filter_normalizer_french, + test_lang_filter_normalizer_empty, + test_lang_filter_normalizer_baddata, + test_lang_filter_normalizer_invalid_id, + test_lang_filter_normalizer_oversized, + test_lang_filter_normalizer_truncation, #ifdef HAVE_FTS_STEMMER - test_fts_filter_normalizer_stopwords_stemmer_eng, - test_fts_filter_stopwords_normalizer_stemmer_no, - test_fts_filter_stopwords_normalizer_stemmer_sv, + test_lang_filter_normalizer_stopwords_stemmer_eng, + test_lang_filter_stopwords_normalizer_stemmer_no, + test_lang_filter_stopwords_normalizer_stemmer_sv, #endif #endif - test_fts_filter_english_possessive, + test_lang_filter_english_possessive, NULL }; int ret; - fts_filters_init(); + lang_filters_init(); ret = test_run(test_functions); - fts_filters_deinit(); + lang_filters_deinit(); return ret; } diff --git a/src/lib-language/test-lang-icu.c b/src/lib-language/test-lang-icu.c index 79b95dfafb..994dc8981d 100644 --- a/src/lib-language/test-lang-icu.c +++ b/src/lib-language/test-lang-icu.c @@ -9,68 +9,68 @@ #include -static void test_fts_icu_utf8_to_utf16_ascii_resize(void) +static void test_lang_icu_utf8_to_utf16_ascii_resize(void) { ARRAY_TYPE(icu_utf16) dest; - test_begin("fts_icu_utf8_to_utf16 ascii resize"); + test_begin("lang_icu_utf8_to_utf16 ascii resize"); t_array_init(&dest, 2); test_assert(buffer_get_writable_size(dest.arr.buffer) == 4); - fts_icu_utf8_to_utf16(&dest, "12"); + lang_icu_utf8_to_utf16(&dest, "12"); test_assert(array_count(&dest) == 2); test_assert(buffer_get_writable_size(dest.arr.buffer) == 4); - fts_icu_utf8_to_utf16(&dest, "123"); + lang_icu_utf8_to_utf16(&dest, "123"); test_assert(array_count(&dest) == 3); test_assert(buffer_get_writable_size(dest.arr.buffer) == 7); - fts_icu_utf8_to_utf16(&dest, "12345"); + lang_icu_utf8_to_utf16(&dest, "12345"); test_assert(array_count(&dest) == 5); test_end(); } -static void test_fts_icu_utf8_to_utf16_32bit_resize(void) +static void test_lang_icu_utf8_to_utf16_32bit_resize(void) { ARRAY_TYPE(icu_utf16) dest; unsigned int i; - test_begin("fts_icu_utf8_to_utf16 32bit resize"); + test_begin("lang_icu_utf8_to_utf16 32bit resize"); for (i = 1; i <= 2; i++) { t_array_init(&dest, i); test_assert(buffer_get_writable_size(dest.arr.buffer) == i*2); - fts_icu_utf8_to_utf16(&dest, "\xF0\x90\x90\x80"); /* 0x10400 */ + lang_icu_utf8_to_utf16(&dest, "\xF0\x90\x90\x80"); /* 0x10400 */ test_assert(array_count(&dest) == 2); } test_end(); } -static void test_fts_icu_utf16_to_utf8(void) +static void test_lang_icu_utf16_to_utf8(void) { string_t *dest = t_str_new(64); const UChar src[] = { 0xbd, 'b', 'c' }; unsigned int i; - test_begin("fts_icu_utf16_to_utf8"); + test_begin("lang_icu_utf16_to_utf8"); for (i = N_ELEMENTS(src); i > 0; i--) { - fts_icu_utf16_to_utf8(dest, src, i); + lang_icu_utf16_to_utf8(dest, src, i); test_assert(dest->used == i+1); } test_end(); } -static void test_fts_icu_utf16_to_utf8_resize(void) +static void test_lang_icu_utf16_to_utf8_resize(void) { string_t *dest; const UChar src = UNICODE_REPLACEMENT_CHAR; unsigned int i; - test_begin("fts_icu_utf16_to_utf8 resize"); + test_begin("lang_icu_utf16_to_utf8 resize"); for (i = 2; i <= 6; i++) { dest = t_str_new(i); test_assert(buffer_get_writable_size(dest) == i); - fts_icu_utf16_to_utf8(dest, &src, 1); + lang_icu_utf16_to_utf8(dest, &src, 1); test_assert(dest->used == 3); test_assert(strcmp(str_c(dest), UNICODE_REPLACEMENT_CHAR_UTF8) == 0); } @@ -86,7 +86,7 @@ static UTransliterator *get_translit(const char *id) UParseError perr; t_array_init(&id_utf16, 8); - fts_icu_utf8_to_utf16(&id_utf16, id); + lang_icu_utf8_to_utf16(&id_utf16, id); translit = utrans_openU(array_front(&id_utf16), array_count(&id_utf16), UTRANS_FORWARD, NULL, 0, &perr, &err); @@ -94,7 +94,7 @@ static UTransliterator *get_translit(const char *id) return translit; } -static void test_fts_icu_translate(void) +static void test_lang_icu_translate(void) { const char *translit_id = "Any-Lower"; UTransliterator *translit; @@ -103,12 +103,12 @@ static void test_fts_icu_translate(void) const char *error; unsigned int i; - test_begin("fts_icu_translate"); + test_begin("lang_icu_translate"); t_array_init(&dest, 32); translit = get_translit(translit_id); for (i = N_ELEMENTS(src); i > 0; i--) { array_clear(&dest); - test_assert(fts_icu_translate(&dest, src, i, + test_assert(lang_icu_translate(&dest, src, i, translit, &error) == 0); test_assert(array_count(&dest) == i); } @@ -116,7 +116,7 @@ static void test_fts_icu_translate(void) test_end(); } -static void test_fts_icu_translate_resize(void) +static void test_lang_icu_translate_resize(void) { const char *translit_id = "Any-Hex"; const char *src_utf8 = "FOO"; @@ -125,16 +125,16 @@ static void test_fts_icu_translate_resize(void) const char *error; unsigned int i; - test_begin("fts_icu_translate_resize resize"); + test_begin("lang_icu_translate_resize resize"); t_array_init(&src_utf16, 8); translit = get_translit(translit_id); for (i = 1; i <= 10; i++) { array_clear(&src_utf16); - fts_icu_utf8_to_utf16(&src_utf16, src_utf8); + lang_icu_utf8_to_utf16(&src_utf16, src_utf8); t_array_init(&dest, i); test_assert(buffer_get_writable_size(dest.arr.buffer) == i*2); - test_assert(fts_icu_translate(&dest, array_front(&src_utf16), + test_assert(lang_icu_translate(&dest, array_front(&src_utf16), array_count(&src_utf16), translit, &error) == 0); } @@ -143,28 +143,28 @@ static void test_fts_icu_translate_resize(void) test_end(); } -static void test_fts_icu_lcase(void) +static void test_lang_icu_lcase(void) { const char *src = "aBcD\xC3\x84\xC3\xA4"; string_t *dest = t_str_new(64); - test_begin("fts_icu_lcase"); - fts_icu_lcase(dest, src); + test_begin("lang_icu_lcase"); + lang_icu_lcase(dest, src); test_assert(strcmp(str_c(dest), "abcd\xC3\xA4\xC3\xA4") == 0); test_end(); } -static void test_fts_icu_lcase_resize(void) +static void test_lang_icu_lcase_resize(void) { const char *src = "a\xC3\x84"; string_t *dest; unsigned int i; - test_begin("fts_icu_lcase resize"); + test_begin("lang_icu_lcase resize"); for (i = 1; i <= 3; i++) { dest = t_str_new(i); test_assert(buffer_get_writable_size(dest) == i); - fts_icu_lcase(dest, src); + lang_icu_lcase(dest, src); test_assert(strcmp(str_c(dest), "a\xC3\xA4") == 0); test_assert(buffer_get_writable_size(dest) == 3); } @@ -172,31 +172,31 @@ static void test_fts_icu_lcase_resize(void) test_end(); } -static void test_fts_icu_lcase_resize_invalid_utf8(void) +static void test_lang_icu_lcase_resize_invalid_utf8(void) { string_t *dest; - test_begin("fts_icu_lcase resize invalid utf8"); + test_begin("lang_icu_lcase resize invalid utf8"); dest = t_str_new(1); - fts_icu_lcase(dest, ".\x80."); + lang_icu_lcase(dest, ".\x80."); test_end(); } int main(void) { static void (*const test_functions[])(void) = { - test_fts_icu_utf8_to_utf16_ascii_resize, - test_fts_icu_utf8_to_utf16_32bit_resize, - test_fts_icu_utf16_to_utf8, - test_fts_icu_utf16_to_utf8_resize, - test_fts_icu_translate, - test_fts_icu_translate_resize, - test_fts_icu_lcase, - test_fts_icu_lcase_resize, - test_fts_icu_lcase_resize_invalid_utf8, + test_lang_icu_utf8_to_utf16_ascii_resize, + test_lang_icu_utf8_to_utf16_32bit_resize, + test_lang_icu_utf16_to_utf8, + test_lang_icu_utf16_to_utf8_resize, + test_lang_icu_translate, + test_lang_icu_translate_resize, + test_lang_icu_lcase, + test_lang_icu_lcase_resize, + test_lang_icu_lcase_resize_invalid_utf8, NULL }; int ret = test_run(test_functions); - fts_icu_deinit(); + lang_icu_deinit(); return ret; } diff --git a/src/lib-language/test-lang-tokenizer.c b/src/lib-language/test-lang-tokenizer.c index b69633486b..678d477350 100644 --- a/src/lib-language/test-lang-tokenizer.c +++ b/src/lib-language/test-lang-tokenizer.c @@ -66,16 +66,16 @@ static const char *test_inputs[] = { "l" SQ "homme l" SQ "humanit\xC3\xA9 d" SQ "immixtions qu" SQ "il aujourd'hui que'euq" }; -static void test_fts_tokenizer_find(void) +static void test_lang_tokenizer_find(void) { - test_begin("fts tokenizer find"); - test_assert(fts_tokenizer_find("email-address") == fts_tokenizer_email_address); - test_assert(fts_tokenizer_find("generic") == fts_tokenizer_generic); + test_begin("lang tokenizer find"); + test_assert(lang_tokenizer_find("email-address") == lang_tokenizer_email_address); + test_assert(lang_tokenizer_find("generic") == lang_tokenizer_generic); test_end(); } static unsigned int -test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input, +test_tokenizer_inputoutput(struct lang_tokenizer *tok, const char *_input, const char *const *expected_output, unsigned int first_outi) { @@ -86,11 +86,11 @@ test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input, /* test all input at once */ outi = first_outi; - while (fts_tokenizer_next(tok, input, input_len, &token, &error) > 0) { + while (lang_tokenizer_next(tok, input, input_len, &token, &error) > 0) { test_assert_strcmp(token, expected_output[outi]); outi++; } - while (fts_tokenizer_next(tok, NULL, 0, &token, &error) > 0) { + while (lang_tokenizer_next(tok, NULL, 0, &token, &error) > 0) { test_assert_strcmp(token, expected_output[outi]); outi++; } @@ -100,12 +100,12 @@ test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input, outi = first_outi; for (i = 0; i < input_len; i += char_len) { char_len = uni_utf8_char_bytes(input[i]); - while (fts_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) { + while (lang_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) { test_assert_strcmp(token, expected_output[outi]); outi++; } } - while (fts_tokenizer_final(tok, &token, &error) > 0) { + while (lang_tokenizer_final(tok, &token, &error) > 0) { test_assert_strcmp(token, expected_output[outi]); outi++; } @@ -117,12 +117,12 @@ test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input, max = i_rand_minmax(1, input_len - i); for (char_len = 0; char_len < max; ) char_len += uni_utf8_char_bytes(input[i+char_len]); - while (fts_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) { + while (lang_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) { test_assert_strcmp(token, expected_output[outi]); outi++; } } - while (fts_tokenizer_final(tok, &token, &error) > 0) { + while (lang_tokenizer_final(tok, &token, &error) > 0) { test_assert_strcmp(token, expected_output[outi]); outi++; } @@ -132,7 +132,7 @@ test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input, } static void -test_tokenizer_inputs(struct fts_tokenizer *tok, +test_tokenizer_inputs(struct lang_tokenizer *tok, const char *const *inputs, unsigned int count, const char *const *expected_output) { @@ -145,7 +145,7 @@ test_tokenizer_inputs(struct fts_tokenizer *tok, test_assert_idx(expected_output[outi] == NULL, outi); } -static void test_fts_tokenizer_generic_only(void) +static void test_lang_tokenizer_generic_only(void) { static const char *const expected_output[] = { "hello", "world", "And", @@ -188,15 +188,15 @@ static void test_fts_tokenizer_generic_only(void) NULL }; - struct fts_tokenizer *tok; + struct lang_tokenizer *tok; const char *error; - test_begin("fts tokenizer generic simple"); - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &tok, &error) == 0); - test_assert(((struct generic_fts_tokenizer *) tok)->algorithm == BOUNDARY_ALGORITHM_SIMPLE); + test_begin("lang tokenizer generic simple"); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, NULL, &tok, &error) == 0); + test_assert(((struct generic_lang_tokenizer *) tok)->algorithm == BOUNDARY_ALGORITHM_SIMPLE); test_tokenizer_inputs(tok, test_inputs, N_ELEMENTS(test_inputs), expected_output); - fts_tokenizer_unref(&tok); + lang_tokenizer_unref(&tok); test_end(); } @@ -204,7 +204,7 @@ const char *const tr29_settings[] = {"algorithm", "tr29", NULL}; /* TODO: U+206F is in "Format" and therefore currently not word break. This definitely needs to be remapped. */ -static void test_fts_tokenizer_generic_tr29_only(void) +static void test_lang_tokenizer_generic_tr29_only(void) { static const char *const expected_output[] = { "hello", "world", "And", @@ -246,13 +246,13 @@ static void test_fts_tokenizer_generic_tr29_only(void) "l'homme", "l'humanit\xC3\xA9", "d'immixtions", "qu'il", "aujourd'hui", "que'euq", NULL, NULL }; - struct fts_tokenizer *tok; + struct lang_tokenizer *tok; const char *error; - test_begin("fts tokenizer generic TR29"); - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0); + test_begin("lang tokenizer generic TR29"); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0); test_tokenizer_inputs(tok, test_inputs, N_ELEMENTS(test_inputs), expected_output); - fts_tokenizer_unref(&tok); + lang_tokenizer_unref(&tok); test_end(); } @@ -260,7 +260,7 @@ const char *const tr29_settings_wb5a[] = {"algorithm", "tr29", "wb5a", "yes", NU /* TODO: U+206F is in "Format" and therefore currently not word break. This definitely needs to be remapped. */ -static void test_fts_tokenizer_generic_tr29_wb5a(void) +static void test_lang_tokenizer_generic_tr29_wb5a(void) { static const char *const expected_output[] = { "hello", "world", "And", @@ -303,17 +303,17 @@ static void test_fts_tokenizer_generic_tr29_wb5a(void) NULL }; - struct fts_tokenizer *tok; + struct lang_tokenizer *tok; const char *error; - test_begin("fts tokenizer generic TR29 with WB5a"); - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings_wb5a, &tok, &error) == 0); + test_begin("lang tokenizer generic TR29 with WB5a"); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, tr29_settings_wb5a, &tok, &error) == 0); test_tokenizer_inputs(tok, test_inputs, N_ELEMENTS(test_inputs), expected_output); - fts_tokenizer_unref(&tok); + lang_tokenizer_unref(&tok); test_end(); } -static void test_fts_tokenizer_address_only(void) +static void test_lang_tokenizer_address_only(void) { static const char input[] = TEST_INPUT_ADDRESS; static const char *const expected_output[] = { @@ -326,17 +326,17 @@ static void test_fts_tokenizer_address_only(void) "hypen@hypen-hypen-sick.com", NULL }; - struct fts_tokenizer *tok; + struct lang_tokenizer *tok; const char *error; - test_begin("fts tokenizer email address only"); - test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, NULL, &tok, &error) == 0); + test_begin("lang tokenizer email address only"); + test_assert(lang_tokenizer_create(lang_tokenizer_email_address, NULL, NULL, &tok, &error) == 0); test_tokenizer_inputoutput(tok, input, expected_output, 0); - fts_tokenizer_unref(&tok); + lang_tokenizer_unref(&tok); test_end(); } -static void test_fts_tokenizer_address_parent(const char *name, const char * const *settings) +static void test_lang_tokenizer_address_parent(const char *name, const char * const *settings) { static const char input[] = TEST_INPUT_ADDRESS; static const char *const expected_output[] = { @@ -362,30 +362,30 @@ static void test_fts_tokenizer_address_parent(const char *name, const char * con "hypen", "hypen", "hypen", "sick", "com", "hypen@hypen-hypen-sick.com", NULL }; - struct fts_tokenizer *tok, *gen_tok; + struct lang_tokenizer *tok, *gen_tok; const char *error; - test_begin(t_strdup_printf("fts tokenizer email address + parent %s", name)); - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, settings, &gen_tok, &error) == 0); - test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0); + test_begin(t_strdup_printf("lang tokenizer email address + parent %s", name)); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, settings, &gen_tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0); test_tokenizer_inputoutput(tok, input, expected_output, 0); - fts_tokenizer_unref(&tok); - fts_tokenizer_unref(&gen_tok); + lang_tokenizer_unref(&tok); + lang_tokenizer_unref(&gen_tok); test_end(); } const char *const simple_settings[] = {"algorithm", "simple", NULL}; -static void test_fts_tokenizer_address_parent_simple(void) +static void test_lang_tokenizer_address_parent_simple(void) { - test_fts_tokenizer_address_parent("simple", simple_settings); + test_lang_tokenizer_address_parent("simple", simple_settings); } -static void test_fts_tokenizer_address_parent_tr29(void) +static void test_lang_tokenizer_address_parent_tr29(void) { - test_fts_tokenizer_address_parent("tr29", tr29_settings); + test_lang_tokenizer_address_parent("tr29", tr29_settings); } -static void test_fts_tokenizer_address_search(void) +static void test_lang_tokenizer_address_search(void) { static const char input[] = TEST_INPUT_ADDRESS; static const char *const expected_output[] = { @@ -412,44 +412,44 @@ static void test_fts_tokenizer_address_search(void) NULL }; static const char *const settings[] = { "search", "", NULL }; - struct fts_tokenizer *tok, *gen_tok; + struct lang_tokenizer *tok, *gen_tok; const char *token, *error; - test_begin("fts tokenizer search email address + parent"); - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0); - test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, settings, &tok, &error) == 0); + test_begin("lang tokenizer search email address + parent"); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, settings, &tok, &error) == 0); test_tokenizer_inputoutput(tok, input, expected_output, 0); /* make sure state is forgotten at EOF */ - test_assert(fts_tokenizer_next(tok, (const void *)"foo", 3, &token, &error) == 0); - test_assert(fts_tokenizer_final(tok, &token, &error) > 0 && + test_assert(lang_tokenizer_next(tok, (const void *)"foo", 3, &token, &error) == 0); + test_assert(lang_tokenizer_final(tok, &token, &error) > 0 && strcmp(token, "foo") == 0); - test_assert(fts_tokenizer_final(tok, &token, &error) == 0); + test_assert(lang_tokenizer_final(tok, &token, &error) == 0); - test_assert(fts_tokenizer_next(tok, (const void *)"bar@baz", 7, &token, &error) == 0); - test_assert(fts_tokenizer_final(tok, &token, &error) > 0 && + test_assert(lang_tokenizer_next(tok, (const void *)"bar@baz", 7, &token, &error) == 0); + test_assert(lang_tokenizer_final(tok, &token, &error) > 0 && strcmp(token, "bar@baz") == 0); - test_assert(fts_tokenizer_final(tok, &token, &error) == 0); + test_assert(lang_tokenizer_final(tok, &token, &error) == 0); - test_assert(fts_tokenizer_next(tok, (const void *)"foo@", 4, &token, &error) == 0); - test_assert(fts_tokenizer_final(tok, &token, &error) > 0 && + test_assert(lang_tokenizer_next(tok, (const void *)"foo@", 4, &token, &error) == 0); + test_assert(lang_tokenizer_final(tok, &token, &error) > 0 && strcmp(token, "foo") == 0); - test_assert(fts_tokenizer_final(tok, &token, &error) == 0); + test_assert(lang_tokenizer_final(tok, &token, &error) == 0); /* test reset explicitly */ - test_assert(fts_tokenizer_next(tok, (const void *)"a", 1, &token, &error) == 0); - fts_tokenizer_reset(tok); - test_assert(fts_tokenizer_next(tok, (const void *)"b@c", 3, &token, &error) == 0); - test_assert(fts_tokenizer_final(tok, &token, &error) > 0 && + test_assert(lang_tokenizer_next(tok, (const void *)"a", 1, &token, &error) == 0); + lang_tokenizer_reset(tok); + test_assert(lang_tokenizer_next(tok, (const void *)"b@c", 3, &token, &error) == 0); + test_assert(lang_tokenizer_final(tok, &token, &error) > 0 && strcmp(token, "b@c") == 0); - test_assert(fts_tokenizer_final(tok, &token, &error) == 0); + test_assert(lang_tokenizer_final(tok, &token, &error) == 0); - fts_tokenizer_unref(&tok); - fts_tokenizer_unref(&gen_tok); + lang_tokenizer_unref(&tok); + lang_tokenizer_unref(&gen_tok); test_end(); } -static void test_fts_tokenizer_delete_trailing_partial_char(void) +static void test_lang_tokenizer_delete_trailing_partial_char(void) { static const struct { const char *str; @@ -468,33 +468,33 @@ static void test_fts_tokenizer_delete_trailing_partial_char(void) unsigned int i; size_t size; - test_begin("fts tokenizer delete trailing partial char"); + test_begin("lang tokenizer delete trailing partial char"); for (i = 0; i < N_ELEMENTS(tests); i++) { size = strlen(tests[i].str); - fts_tokenizer_delete_trailing_partial_char((const unsigned char *)tests[i].str, &size); + lang_tokenizer_delete_trailing_partial_char((const unsigned char *)tests[i].str, &size); test_assert(size == tests[i].truncated_len); } test_end(); } -static void test_fts_tokenizer_address_maxlen(void) +static void test_lang_tokenizer_address_maxlen(void) { const char *const settings[] = {"maxlen", "5", NULL}; const char *input = "...\357\277\275@a"; - struct fts_tokenizer *tok; + struct lang_tokenizer *tok; const char *token, *error; - test_begin("fts tokenizer address maxlen"); - test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, settings, &tok, &error) == 0); + test_begin("lang tokenizer address maxlen"); + test_assert(lang_tokenizer_create(lang_tokenizer_email_address, NULL, settings, &tok, &error) == 0); - while (fts_tokenizer_next(tok, (const unsigned char *)input, + while (lang_tokenizer_next(tok, (const unsigned char *)input, strlen(input), &token, &error) > 0) ; - while (fts_tokenizer_final(tok, &token, &error) > 0) ; - fts_tokenizer_unref(&tok); + while (lang_tokenizer_final(tok, &token, &error) > 0) ; + lang_tokenizer_unref(&tok); test_end(); } -static void test_fts_tokenizer_random(void) +static void test_lang_tokenizer_random(void) { const unsigned char test_chars[] = { 0, ' ', '.', 'a', 'b', 'c', '-', '@', '\xC3', '\xA4' }; const char *const settings[] = {"algorithm", "simple", NULL}; @@ -502,12 +502,12 @@ static void test_fts_tokenizer_random(void) unsigned int i; unsigned char addr[10] = { 0 }; string_t *str = t_str_new(20); - struct fts_tokenizer *tok, *gen_tok; + struct lang_tokenizer *tok, *gen_tok; const char *token, *error; - test_begin("fts tokenizer random"); - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, settings, &gen_tok, &error) == 0); - test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, email_settings, &tok, &error) == 0); + test_begin("lang tokenizer random"); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, settings, &gen_tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, email_settings, &tok, &error) == 0); for (i = 0; i < 10000; i++) T_BEGIN { for (unsigned int j = 0; j < sizeof(addr); j++) @@ -515,18 +515,18 @@ static void test_fts_tokenizer_random(void) str_truncate(str, 0); if (uni_utf8_get_valid_data(addr, sizeof(addr), str)) str_append_data(str, addr, sizeof(addr)); - while (fts_tokenizer_next(tok, str_data(str), str_len(str), + while (lang_tokenizer_next(tok, str_data(str), str_len(str), &token, &error) > 0) ; - while (fts_tokenizer_final(tok, &token, &error) > 0) ; + while (lang_tokenizer_final(tok, &token, &error) > 0) ; } T_END; - fts_tokenizer_unref(&tok); - fts_tokenizer_unref(&gen_tok); + lang_tokenizer_unref(&tok); + lang_tokenizer_unref(&gen_tok); test_end(); } static void -test_fts_tokenizer_explicit_prefix(void) +test_lang_tokenizer_explicit_prefix(void) { const char *input = "* ** " "*pre *both* post* " @@ -567,24 +567,24 @@ test_fts_tokenizer_explicit_prefix(void) algos[algo], searches[search], prefixes[explicitprefix])); - struct fts_tokenizer *tok; + struct lang_tokenizer *tok; const char *error; - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, settings, + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, settings, &tok, &error) == 0); test_tokenizer_inputs( tok, &input, 1, (search!=0) && (explicitprefix!=0) ? expected_star : expected_nostar); - fts_tokenizer_unref(&tok); + lang_tokenizer_unref(&tok); test_end(); } } } } -static void test_fts_tokenizer_skip_base64(void) +static void test_lang_tokenizer_skip_base64(void) { /* The skip_base64 works on the data already available in the buffer of the tokenizer, it does not pull more data to see if a base64 @@ -592,7 +592,7 @@ static void test_fts_tokenizer_skip_base64(void) use test_tokenizer_inputoutput that also tests with one-byte-at-once or random chunking, as those are known to fail with the current implementation */ - struct fts_tokenizer *tok; + struct lang_tokenizer *tok; const char *error; const char *token; @@ -641,16 +641,16 @@ static void test_fts_tokenizer_skip_base64(void) NULL }; - test_begin("fts tokenizer skip base64"); - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0); + test_begin("lang tokenizer skip base64"); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0); size_t index = 0; - while (fts_tokenizer_next(tok, (const unsigned char *) input, strlen(input), &token, &error) > 0) { + while (lang_tokenizer_next(tok, (const unsigned char *) input, strlen(input), &token, &error) > 0) { i_assert(index < N_ELEMENTS(expected_output)); test_assert_strcmp(token, expected_output[index]); ++index; } - while (fts_tokenizer_next(tok, NULL, 0, &token, &error) > 0) { + while (lang_tokenizer_next(tok, NULL, 0, &token, &error) > 0) { i_assert(index < N_ELEMENTS(expected_output)); test_assert_strcmp(token, expected_output[index]); ++index; @@ -658,33 +658,33 @@ static void test_fts_tokenizer_skip_base64(void) i_assert(index < N_ELEMENTS(expected_output)); test_assert_idx(expected_output[index] == NULL, index); - fts_tokenizer_unref(&tok); + lang_tokenizer_unref(&tok); test_end(); } int main(void) { static void (*const test_functions[])(void) = { - test_fts_tokenizer_skip_base64, - test_fts_tokenizer_find, - test_fts_tokenizer_generic_only, - test_fts_tokenizer_generic_tr29_only, - test_fts_tokenizer_generic_tr29_wb5a, - test_fts_tokenizer_address_only, - test_fts_tokenizer_address_parent_simple, - test_fts_tokenizer_address_parent_tr29, - test_fts_tokenizer_address_maxlen, - test_fts_tokenizer_address_search, - test_fts_tokenizer_delete_trailing_partial_char, - test_fts_tokenizer_random, - test_fts_tokenizer_explicit_prefix, + test_lang_tokenizer_skip_base64, + test_lang_tokenizer_find, + test_lang_tokenizer_generic_only, + test_lang_tokenizer_generic_tr29_only, + test_lang_tokenizer_generic_tr29_wb5a, + test_lang_tokenizer_address_only, + test_lang_tokenizer_address_parent_simple, + test_lang_tokenizer_address_parent_tr29, + test_lang_tokenizer_address_maxlen, + test_lang_tokenizer_address_search, + test_lang_tokenizer_delete_trailing_partial_char, + test_lang_tokenizer_random, + test_lang_tokenizer_explicit_prefix, NULL }; int ret; - fts_tokenizers_init(); + lang_tokenizers_init(); ret = test_run(test_functions); - fts_tokenizers_deinit(); + lang_tokenizers_deinit(); return ret; } diff --git a/src/lib-language/test-language.c b/src/lib-language/test-language.c index 661f9021b2..85032f5349 100644 --- a/src/lib-language/test-language.c +++ b/src/lib-language/test-language.c @@ -11,10 +11,10 @@ const char *const settings[] = "fts_language_data", TEXTCAT_DATADIR"/", NULL}; /* Detect Finnish. fi--utf8 */ -static void test_fts_language_detect_finnish(void) +static void test_language_detect_finnish(void) { - struct fts_language_list *lp = NULL; - const struct fts_language *lang_r = NULL; + struct language_list *lp = NULL; + const struct language *lang_r = NULL; const unsigned char finnish[] = "Yhdistyneiden kansakuntien kolmas yleiskokous hyv\xC3\xA4ksyi "\ "ja julkisti ihmisoikeuksien yleismaailmallisen julistuksen "\ @@ -25,21 +25,21 @@ static void test_fts_language_detect_finnish(void) "\xC3\xA4\xC3\xA4nest\xC3\xA4m\xC3\xA4st\xC3\xA4."; const char names[] = "de, fi, en"; const char *unknown, *error; - test_begin("fts language detect Finnish"); - test_assert(fts_language_list_init(settings, &lp, &error) == 0); - test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, finnish, sizeof(finnish)-1, &lang_r, &error) - == FTS_LANGUAGE_RESULT_OK); + test_begin("language detect Finnish"); + test_assert(language_list_init(settings, &lp, &error) == 0); + test_assert(language_list_add_names(lp, names, &unknown) == TRUE); + test_assert(language_detect(lp, finnish, sizeof(finnish)-1, &lang_r, &error) + == LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "fi") == 0); - fts_language_list_deinit(&lp); + language_list_deinit(&lp); test_end(); } /* Detect English */ -static void test_fts_language_detect_english(void) +static void test_language_detect_english(void) { - struct fts_language_list *lp = NULL; - const struct fts_language *lang_r = NULL; + struct language_list *lp = NULL; + const struct language *lang_r = NULL; const unsigned char english[] = "Whereas recognition of the inherent dignity and"\ " of the equal and inalienable rights of all members of the human"\ "family is the foundation of freedom, justice and peace in the "\ @@ -52,21 +52,21 @@ static void test_fts_language_detect_english(void) const char names[] = "fi, de, fr, en"; const char *unknown, *error; - test_begin("fts language detect English"); - test_assert(fts_language_list_init(settings, &lp, &error) == 0); - test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, english, sizeof(english)-1, &lang_r, &error) - == FTS_LANGUAGE_RESULT_OK); + test_begin("language detect English"); + test_assert(language_list_init(settings, &lp, &error) == 0); + test_assert(language_list_add_names(lp, names, &unknown) == TRUE); + test_assert(language_detect(lp, english, sizeof(english)-1, &lang_r, &error) + == LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "en") == 0); - fts_language_list_deinit(&lp); + language_list_deinit(&lp); test_end(); } /* Detect French */ -static void test_fts_language_detect_french(void) +static void test_language_detect_french(void) { - struct fts_language_list *lp = NULL; - const struct fts_language *lang_r = NULL; + struct language_list *lp = NULL; + const struct language *lang_r = NULL; const unsigned char french[] = "D\xC3\xA9""claration universelle des droits de l\xE2\x80\x99" "homme Pr\xC3\xA9""ambule Consid\xC3\xA9rant que la "\ @@ -87,20 +87,20 @@ static void test_fts_language_detect_french(void) const char names[] = "de, fi, fr, en"; const char *unknown, *error; - test_begin("fts language detect French"); - test_assert(fts_language_list_init(settings, &lp, &error) == 0); - test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, french, sizeof(french)-1, &lang_r, &error) - == FTS_LANGUAGE_RESULT_OK); + test_begin("language detect French"); + test_assert(language_list_init(settings, &lp, &error) == 0); + test_assert(language_list_add_names(lp, names, &unknown) == TRUE); + test_assert(language_detect(lp, french, sizeof(french)-1, &lang_r, &error) + == LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "fr") == 0); - fts_language_list_deinit(&lp); + language_list_deinit(&lp); test_end(); } /* Detect German */ -static void test_fts_language_detect_german(void) +static void test_language_detect_german(void) { - struct fts_language_list *lp = NULL; - const struct fts_language *lang_r = NULL; + struct language_list *lp = NULL; + const struct language *lang_r = NULL; const unsigned char german[] = "Artikel 1"\ "Alle Menschen sind frei und gleich an W\xC3\xBCrde und Rechten "\ @@ -124,21 +124,21 @@ static void test_fts_language_detect_german(void) const char names[] = "fi, de, fr, en"; const char *unknown, *error; - test_begin("fts language detect German"); - test_assert(fts_language_list_init(settings, &lp, &error) == 0); - test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, german, sizeof(german)-1, &lang_r, &error) - == FTS_LANGUAGE_RESULT_OK); + test_begin("language detect German"); + test_assert(language_list_init(settings, &lp, &error) == 0); + test_assert(language_list_add_names(lp, names, &unknown) == TRUE); + test_assert(language_detect(lp, german, sizeof(german)-1, &lang_r, &error) + == LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "de") == 0); - fts_language_list_deinit(&lp); + language_list_deinit(&lp); test_end(); } /* Detect Swedish */ -static void test_fts_language_detect_swedish(void) +static void test_language_detect_swedish(void) { - struct fts_language_list *lp = NULL; - const struct fts_language *lang_r = NULL; + struct language_list *lp = NULL; + const struct language *lang_r = NULL; const unsigned char swedish[] = "Artikel 1."\ "Alla m\xC3\xA4nniskor \xC3\xA4ro f\xC3\xB6""dda fria och lika"\ @@ -150,21 +150,21 @@ static void test_fts_language_detect_swedish(void) const char names[] = "fi, de, sv, fr, en"; const char *unknown, *error; - test_begin("fts language detect Swedish"); - test_assert(fts_language_list_init(settings, &lp, &error) == 0); - test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, swedish, sizeof(swedish)-1, &lang_r, &error) - == FTS_LANGUAGE_RESULT_OK); + test_begin("language detect Swedish"); + test_assert(language_list_init(settings, &lp, &error) == 0); + test_assert(language_list_add_names(lp, names, &unknown) == TRUE); + test_assert(language_detect(lp, swedish, sizeof(swedish)-1, &lang_r, &error) + == LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "sv") == 0); - fts_language_list_deinit(&lp); + language_list_deinit(&lp); test_end(); } /* Detect Bokmal */ -static void test_fts_language_detect_bokmal(void) +static void test_language_detect_bokmal(void) { - struct fts_language_list *lp = NULL; - const struct fts_language *lang_r = NULL; + struct language_list *lp = NULL; + const struct language *lang_r = NULL; const unsigned char bokmal[] = "Artikkel 1.\n"\ "Alle mennesker er f\xC3\xB8""dt frie og med samme menneskeverd"\ @@ -174,21 +174,21 @@ static void test_fts_language_detect_bokmal(void) const char names[] = "fi, de, sv, no, fr, en"; const char *unknown, *error; - test_begin("fts language detect Bokmal as Norwegian"); - test_assert(fts_language_list_init(settings, &lp, &error) == 0); - test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, bokmal, sizeof(bokmal)-1, &lang_r, &error) - == FTS_LANGUAGE_RESULT_OK); + test_begin("language detect Bokmal as Norwegian"); + test_assert(language_list_init(settings, &lp, &error) == 0); + test_assert(language_list_add_names(lp, names, &unknown) == TRUE); + test_assert(language_detect(lp, bokmal, sizeof(bokmal)-1, &lang_r, &error) + == LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "no") == 0); - fts_language_list_deinit(&lp); + language_list_deinit(&lp); test_end(); } /* Detect Nynorsk */ -static void test_fts_language_detect_nynorsk(void) +static void test_language_detect_nynorsk(void) { - struct fts_language_list *lp = NULL; - const struct fts_language *lang_r = NULL; + struct language_list *lp = NULL; + const struct language *lang_r = NULL; const unsigned char nynorsk[] = "Artikkel 1.\n"\ "Alle menneske er f\xC3\xB8""dde til fridom og med same "\ @@ -198,21 +198,21 @@ static void test_fts_language_detect_nynorsk(void) const char names[] = "fi, de, sv, no, fr, en"; const char *unknown, *error; - test_begin("fts language detect Nynorsk as Norwegian"); - test_assert(fts_language_list_init(settings, &lp, &error) == 0); - test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, nynorsk, sizeof(nynorsk)-1, &lang_r, &error) - == FTS_LANGUAGE_RESULT_OK); + test_begin("language detect Nynorsk as Norwegian"); + test_assert(language_list_init(settings, &lp, &error) == 0); + test_assert(language_list_add_names(lp, names, &unknown) == TRUE); + test_assert(language_detect(lp, nynorsk, sizeof(nynorsk)-1, &lang_r, &error) + == LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "no") == 0); - fts_language_list_deinit(&lp); + language_list_deinit(&lp); test_end(); } /* Detect Finnish as English */ -static void test_fts_language_detect_finnish_as_english(void) +static void test_language_detect_finnish_as_english(void) { - struct fts_language_list *lp = NULL; - const struct fts_language *lang_r = NULL; + struct language_list *lp = NULL; + const struct language *lang_r = NULL; const unsigned char finnish[] = "Yhdistyneiden kansakuntien kolmas yleiskokous hyv\xC3\xA4ksyi "\ "ja julkisti ihmisoikeuksien yleismaailmallisen julistuksen "\ @@ -223,21 +223,21 @@ static void test_fts_language_detect_finnish_as_english(void) "\xC3\xA4\xC3\xA4nest\xC3\xA4m\xC3\xA4st\xC3\xA4."; const char names[] = "en"; const char *unknown, *error; - test_begin("fts language detect Finnish as English"); - test_assert(fts_language_list_init(settings, &lp, &error) == 0); - test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, finnish, sizeof(finnish)-1, &lang_r, &error) - == FTS_LANGUAGE_RESULT_OK); + test_begin("language detect Finnish as English"); + test_assert(language_list_init(settings, &lp, &error) == 0); + test_assert(language_list_add_names(lp, names, &unknown) == TRUE); + test_assert(language_detect(lp, finnish, sizeof(finnish)-1, &lang_r, &error) + == LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "en") == 0); - fts_language_list_deinit(&lp); + language_list_deinit(&lp); test_end(); } /* Successfully avoid detecting English, when en is not in language list. */ -static void test_fts_language_detect_na(void) +static void test_language_detect_na(void) { - struct fts_language_list *lp = NULL; - const struct fts_language *lang_r = NULL; + struct language_list *lp = NULL; + const struct language *lang_r = NULL; const unsigned char english[] = "Whereas recognition of the inherent dignity and"\ " of the equal and inalienable rights of all members of the human"\ "family is the foundation of freedom, justice and peace in the "\ @@ -250,49 +250,49 @@ static void test_fts_language_detect_na(void) const char names[] = "fi, de, fr"; const char *unknown, *error; - test_begin("fts language detect not available"); - test_assert(fts_language_list_init(settings, &lp, &error) == 0); - test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, english, sizeof(english)-1, &lang_r, &error) - == FTS_LANGUAGE_RESULT_UNKNOWN); - fts_language_list_deinit(&lp); + test_begin("language detect not available"); + test_assert(language_list_init(settings, &lp, &error) == 0); + test_assert(language_list_add_names(lp, names, &unknown) == TRUE); + test_assert(language_detect(lp, english, sizeof(english)-1, &lang_r, &error) + == LANGUAGE_RESULT_UNKNOWN); + language_list_deinit(&lp); test_end(); } /* Successfully detect, that Klingon is unknown. */ -static void test_fts_language_detect_unknown(void) +static void test_language_detect_unknown(void) { - struct fts_language_list *lp = NULL; - const struct fts_language *lang_r = NULL; + struct language_list *lp = NULL; + const struct language *lang_r = NULL; const unsigned char klingon[] = "nobwI''a'pu'qoqvam'e' "\ "nuHegh'eghrupqa'moHlaHbe'law'lI'neS "\ "SeH'eghtaHghach'a'na'chajmo'."; const char names[] = "fi, de, fr"; const char *unknown, *error; - test_begin("fts language detect unknown"); - test_assert(fts_language_list_init(settings, &lp, &error) == 0); - test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, klingon, sizeof(klingon), &lang_r, &error) - == FTS_LANGUAGE_RESULT_UNKNOWN); - fts_language_list_deinit(&lp); + test_begin("language detect unknown"); + test_assert(language_list_init(settings, &lp, &error) == 0); + test_assert(language_list_add_names(lp, names, &unknown) == TRUE); + test_assert(language_detect(lp, klingon, sizeof(klingon), &lang_r, &error) + == LANGUAGE_RESULT_UNKNOWN); + language_list_deinit(&lp); test_end(); } -static void test_fts_language_find_builtin(void) +static void test_language_find_builtin(void) { - const struct fts_language *lp; - test_begin("fts language find built-in"); - lp = fts_language_find("en"); + const struct language *lp; + test_begin("language find built-in"); + lp = language_find("en"); i_assert(lp != NULL); test_assert(strcmp(lp->name, "en") == 0); test_end(); } -static void test_fts_language_register(void) +static void test_language_register(void) { - const struct fts_language *lp; - test_begin("fts language register"); - fts_language_register("jp"); - lp = fts_language_find("jp"); + const struct language *lp; + test_begin("language register"); + language_register("jp"); + lp = language_find("jp"); i_assert(lp != NULL); test_assert(strcmp(lp->name, "jp") == 0); test_end(); @@ -302,22 +302,22 @@ int main(void) { int ret; static void (*const test_functions[])(void) = { - test_fts_language_detect_finnish, - test_fts_language_detect_english, - test_fts_language_detect_french, - test_fts_language_detect_german, - test_fts_language_detect_swedish, - test_fts_language_detect_bokmal, - test_fts_language_detect_nynorsk, - test_fts_language_detect_finnish_as_english, - test_fts_language_detect_na, - test_fts_language_detect_unknown, - test_fts_language_find_builtin, - test_fts_language_register, + test_language_detect_finnish, + test_language_detect_english, + test_language_detect_french, + test_language_detect_german, + test_language_detect_swedish, + test_language_detect_bokmal, + test_language_detect_nynorsk, + test_language_detect_finnish_as_english, + test_language_detect_na, + test_language_detect_unknown, + test_language_find_builtin, + test_language_register, NULL }; - fts_languages_init(); + languages_init(); ret = test_run(test_functions); - fts_languages_deinit(); + languages_deinit(); return ret; } diff --git a/src/plugins/fts/doveadm-fts.c b/src/plugins/fts/doveadm-fts.c index a02775c19e..2951f8309c 100644 --- a/src/plugins/fts/doveadm-fts.c +++ b/src/plugins/fts/doveadm-fts.c @@ -187,7 +187,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces); struct fts_backend *backend; struct fts_user_language *user_lang; - const struct fts_language *lang = NULL; + const struct language *lang = NULL; int ret, ret2; bool final = FALSE; @@ -199,30 +199,30 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, } if (ctx->language == NULL) { - struct fts_language_list *lang_list = + struct language_list *lang_list = fts_user_get_language_list(user); - enum fts_language_result result; + enum language_result result; const char *error; - result = fts_language_detect(lang_list, + result = language_detect(lang_list, (const unsigned char *)ctx->tokens, strlen(ctx->tokens), &lang, &error); if (lang == NULL) - lang = fts_language_list_get_first(lang_list); + lang = language_list_get_first(lang_list); switch (result) { - case FTS_LANGUAGE_RESULT_SHORT: + case LANGUAGE_RESULT_SHORT: e_warning(user->event, "Text too short, can't detect its language - assuming %s", lang->name); break; - case FTS_LANGUAGE_RESULT_UNKNOWN: + case LANGUAGE_RESULT_UNKNOWN: e_warning(user->event, "Can't detect its language - assuming %s", lang->name); break; - case FTS_LANGUAGE_RESULT_OK: + case LANGUAGE_RESULT_OK: break; - case FTS_LANGUAGE_RESULT_ERROR: + case LANGUAGE_RESULT_ERROR: e_error(user->event, "Language detection library initialization failed: %s", error); @@ -232,7 +232,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, i_unreached(); } } else { - lang = fts_language_find(ctx->language); + lang = language_find(ctx->language); if (lang == NULL) { e_error(user->event, "Unknown language: %s", ctx->language); @@ -248,22 +248,22 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, return -1; } - fts_tokenizer_reset(user_lang->index_tokenizer); + lang_tokenizer_reset(user_lang->index_tokenizer); for (;;) { const char *token, *error; if (!final) { - ret = fts_tokenizer_next(user_lang->index_tokenizer, + ret = lang_tokenizer_next(user_lang->index_tokenizer, (const unsigned char *)ctx->tokens, strlen(ctx->tokens), &token, &error); } else { - ret = fts_tokenizer_final(user_lang->index_tokenizer, + ret = lang_tokenizer_final(user_lang->index_tokenizer, &token, &error); } if (ret < 0) break; if (ret > 0 && user_lang->filter != NULL) { - ret2 = fts_filter_filter(user_lang->filter, &token, &error); + ret2 = lang_filter(user_lang->filter, &token, &error); if (ret2 > 0) doveadm_print(token); else if (ret2 < 0) diff --git a/src/plugins/fts/fts-api-private.h b/src/plugins/fts/fts-api-private.h index 55466cfbd1..e9eb5d8804 100644 --- a/src/plugins/fts/fts-api-private.h +++ b/src/plugins/fts/fts-api-private.h @@ -117,7 +117,7 @@ void fts_backend_unregister(const char *name); bool fts_backend_default_can_lookup(struct fts_backend *backend, const struct mail_search_arg *args); -void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest, +void lang_filter_uids(ARRAY_TYPE(seq_range) *definite_dest, const ARRAY_TYPE(seq_range) *definite_filter, ARRAY_TYPE(seq_range) *maybe_dest, const ARRAY_TYPE(seq_range) *maybe_filter); diff --git a/src/plugins/fts/fts-api.c b/src/plugins/fts/fts-api.c index 9793db79c1..6db3891ae4 100644 --- a/src/plugins/fts/fts-api.c +++ b/src/plugins/fts/fts-api.c @@ -415,7 +415,7 @@ fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe, } } -void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest, +void lang_filter_uids(ARRAY_TYPE(seq_range) *definite_dest, const ARRAY_TYPE(seq_range) *definite_filter, ARRAY_TYPE(seq_range) *maybe_dest, const ARRAY_TYPE(seq_range) *maybe_filter) diff --git a/src/plugins/fts/fts-build-mail.c b/src/plugins/fts/fts-build-mail.c index 83599addbb..75ecae9891 100644 --- a/src/plugins/fts/fts-build-mail.c +++ b/src/plugins/fts/fts-build-mail.c @@ -114,7 +114,7 @@ static void fts_mail_build_ctx_set_lang(struct fts_mail_build_context *ctx, /* reset tokenizer between fields - just to be sure no state leaks between fields (especially if previous indexing had failed) */ - fts_tokenizer_reset(user_lang->index_tokenizer); + lang_tokenizer_reset(user_lang->index_tokenizer); } static void @@ -267,15 +267,15 @@ static int fts_build_add_tokens_with_filter(struct fts_mail_build_context *ctx, const unsigned char *data, size_t size) { - struct fts_tokenizer *tokenizer = ctx->cur_user_lang->index_tokenizer; - struct fts_filter *filter = ctx->cur_user_lang->filter; + struct lang_tokenizer *tokenizer = ctx->cur_user_lang->index_tokenizer; + struct lang_filter *filter = ctx->cur_user_lang->filter; const char *token, *error; int ret = 1, ret2; while (ret > 0) T_BEGIN { - ret = ret2 = fts_tokenizer_next(tokenizer, data, size, &token, &error); + ret = ret2 = lang_tokenizer_next(tokenizer, data, size, &token, &error); if (ret2 > 0 && filter != NULL) - ret2 = fts_filter_filter(filter, &token, &error); + ret2 = lang_filter(filter, &token, &error); if (ret2 < 0) { mail_set_critical(ctx->mail, "fts: Couldn't create indexable tokens: %s", @@ -296,31 +296,31 @@ fts_build_add_tokens_with_filter(struct fts_mail_build_context *ctx, static int fts_detect_language(struct fts_mail_build_context *ctx, const unsigned char *data, size_t size, bool last, - const struct fts_language **lang_r) + const struct language **lang_r) { struct mail_user *user = ctx->update_ctx->backend->ns->user; - struct fts_language_list *lang_list = fts_user_get_language_list(user); - const struct fts_language *lang; + struct language_list *lang_list = fts_user_get_language_list(user); + const struct language *lang; const char *error; - switch (fts_language_detect(lang_list, data, size, &lang, &error)) { - case FTS_LANGUAGE_RESULT_SHORT: + switch (language_detect(lang_list, data, size, &lang, &error)) { + case LANGUAGE_RESULT_SHORT: /* save the input so far and try again later */ buffer_append(ctx->pending_input, data, size); if (last) { /* we've run out of data. use the default language. */ - *lang_r = fts_language_list_get_first(lang_list); + *lang_r = language_list_get_first(lang_list); return 1; } return 0; - case FTS_LANGUAGE_RESULT_UNKNOWN: + case LANGUAGE_RESULT_UNKNOWN: /* use the default language */ - *lang_r = fts_language_list_get_first(lang_list); + *lang_r = language_list_get_first(lang_list); return 1; - case FTS_LANGUAGE_RESULT_OK: + case LANGUAGE_RESULT_OK: *lang_r = lang; return 1; - case FTS_LANGUAGE_RESULT_ERROR: + case LANGUAGE_RESULT_ERROR: /* internal language detection library failure (e.g. invalid config). don't index anything. */ mail_set_critical(ctx->mail, @@ -337,7 +337,7 @@ fts_build_tokenized(struct fts_mail_build_context *ctx, const unsigned char *data, size_t size, bool last) { struct mail_user *user = ctx->update_ctx->backend->ns->user; - const struct fts_language *lang; + const struct language *lang; int ret; if (ctx->cur_user_lang != NULL) { diff --git a/src/plugins/fts/fts-plugin.c b/src/plugins/fts/fts-plugin.c index a6011c9439..1a1b97209a 100644 --- a/src/plugins/fts/fts-plugin.c +++ b/src/plugins/fts/fts-plugin.c @@ -21,13 +21,13 @@ static struct mail_storage_hooks fts_mail_storage_hooks = { void fts_plugin_init(struct module *module) { - fts_library_init(); + lang_library_init(); mail_storage_hooks_add(module, &fts_mail_storage_hooks); } void fts_plugin_deinit(void) { - fts_library_deinit(); + lang_library_deinit(); fts_parsers_unload(); mail_storage_hooks_remove(&fts_mail_storage_hooks); } diff --git a/src/plugins/fts/fts-search-args.c b/src/plugins/fts/fts-search-args.c index 5c7be41a31..85f0fa3862 100644 --- a/src/plugins/fts/fts-search-args.c +++ b/src/plugins/fts/fts-search-args.c @@ -53,7 +53,7 @@ fts_search_arg_create_or(const struct mail_search_arg *orig_arg, pool_t pool, } static int -fts_backend_dovecot_expand_tokens(struct fts_filter *filter, +fts_backend_dovecot_expand_tokens(struct lang_filter *filter, pool_t pool, struct mail_search_arg *parent_arg, const struct mail_search_arg *orig_arg, @@ -74,7 +74,7 @@ fts_backend_dovecot_expand_tokens(struct fts_filter *filter, /* add the word filtered */ if (filter != NULL) { token2 = t_strdup(token); - ret = fts_filter_filter(filter, &token2, &error); + ret = lang_filter(filter, &token2, &error); if (ret > 0) { token2 = t_strdup(token2); array_push_back(&tokens, &token2); @@ -118,8 +118,8 @@ fts_backend_dovecot_tokenize_lang(struct fts_user_language *user_lang, /* reset tokenizer between search args in case there's any state left from some previous failure */ - fts_tokenizer_reset(user_lang->search_tokenizer); - while ((ret = fts_tokenizer_next(user_lang->search_tokenizer, + lang_tokenizer_reset(user_lang->search_tokenizer); + while ((ret = lang_tokenizer_next(user_lang->search_tokenizer, (const void *)orig_token, orig_token_len, &token, &error)) > 0) { if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool, @@ -128,7 +128,7 @@ fts_backend_dovecot_tokenize_lang(struct fts_user_language *user_lang, return -1; } while (ret >= 0 && - (ret = fts_tokenizer_final(user_lang->search_tokenizer, &token, &error)) > 0) { + (ret = lang_tokenizer_final(user_lang->search_tokenizer, &token, &error)) > 0) { if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool, and_arg, orig_arg, orig_token, token, error_r) < 0) diff --git a/src/plugins/fts/fts-user.c b/src/plugins/fts/fts-user.c index 09ada01ff7..a160efed0e 100644 --- a/src/plugins/fts/fts-user.c +++ b/src/plugins/fts/fts-user.c @@ -20,7 +20,7 @@ struct fts_user { union mail_user_module_context module_ctx; int refcount; - struct fts_language_list *lang_list; + struct language_list *lang_list; struct fts_user_language *data_lang; ARRAY_TYPE(fts_user_language) languages, data_languages; @@ -74,15 +74,15 @@ fts_user_init_languages(struct mail_user *user, struct fts_user *fuser, lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config"); if (lang_config[1] != NULL) lang_config[0] = "fts_language_config"; - if (fts_language_list_init(lang_config, &fuser->lang_list, error_r) < 0) + if (language_list_init(lang_config, &fuser->lang_list, error_r) < 0) return -1; - if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) { + if (!language_list_add_names(fuser->lang_list, languages, &unknown)) { *error_r = t_strdup_printf( "fts_languages: Unknown language '%s'", unknown); return -1; } - if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) { + if (array_count(language_list_get_all(fuser->lang_list)) == 0) { *error_r = "fts_languages setting is empty"; return -1; } @@ -90,11 +90,11 @@ fts_user_init_languages(struct mail_user *user, struct fts_user *fuser, } static int -fts_user_create_filters(struct mail_user *user, const struct fts_language *lang, - struct fts_filter **filter_r, const char **error_r) +fts_user_create_filters(struct mail_user *user, const struct language *lang, + struct lang_filter **filter_r, const char **error_r) { - const struct fts_filter *filter_class; - struct fts_filter *filter = NULL, *parent = NULL; + const struct lang_filter *filter_class; + struct lang_filter *filter = NULL, *parent = NULL; const char *filters_key, *const *filters, *filter_set_name; const char *str, *error, *set_key; unsigned int i; @@ -116,7 +116,7 @@ fts_user_create_filters(struct mail_user *user, const struct fts_language *lang, filters = t_strsplit_spaces(str, " "); for (i = 0; filters[i] != NULL; i++) { - filter_class = fts_filter_find(filters[i]); + filter_class = lang_filter_find(filters[i]); if (filter_class == NULL) { *error_r = t_strdup_printf("%s: Unknown filter '%s'", filters_key, filters[i]); @@ -134,20 +134,20 @@ fts_user_create_filters(struct mail_user *user, const struct fts_language *lang, str = mail_user_plugin_getenv(user, set_key); } - if (fts_filter_create(filter_class, parent, lang, - str_keyvalues_to_array(str), - &filter, &error) < 0) { + if (lang_filter_create(filter_class, parent, lang, + str_keyvalues_to_array(str), + &filter, &error) < 0) { *error_r = t_strdup_printf("%s: %s", set_key, error); ret = -1; break; } if (parent != NULL) - fts_filter_unref(&parent); + lang_filter_unref(&parent); parent = filter; } if (ret < 0) { if (parent != NULL) - fts_filter_unref(&parent); + lang_filter_unref(&parent); return -1; } *filter_r = filter; @@ -156,12 +156,12 @@ fts_user_create_filters(struct mail_user *user, const struct fts_language *lang, static int fts_user_create_tokenizer(struct mail_user *user, - const struct fts_language *lang, - struct fts_tokenizer **tokenizer_r, bool search, + const struct language *lang, + struct lang_tokenizer **tokenizer_r, bool search, const char **error_r) { - const struct fts_tokenizer *tokenizer_class; - struct fts_tokenizer *tokenizer = NULL, *parent = NULL; + const struct lang_tokenizer *tokenizer_class; + struct lang_tokenizer *tokenizer = NULL, *parent = NULL; const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name; const char *str, *error, *set_key; unsigned int i; @@ -181,7 +181,7 @@ fts_user_create_tokenizer(struct mail_user *user, tokenizers = t_strsplit_spaces(str, " "); for (i = 0; tokenizers[i] != NULL; i++) { - tokenizer_class = fts_tokenizer_find(tokenizers[i]); + tokenizer_class = lang_tokenizer_find(tokenizers[i]); if (tokenizer_class == NULL) { *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'", tokenizers_key, tokenizers[i]); @@ -202,20 +202,20 @@ fts_user_create_tokenizer(struct mail_user *user, if (search) str = t_strconcat("search=yes ", str, NULL); - if (fts_tokenizer_create(tokenizer_class, parent, - str_keyvalues_to_array(str), - &tokenizer, &error) < 0) { + if (lang_tokenizer_create(tokenizer_class, parent, + str_keyvalues_to_array(str), + &tokenizer, &error) < 0) { *error_r = t_strdup_printf("%s: %s", set_key, error); ret = -1; break; } if (parent != NULL) - fts_tokenizer_unref(&parent); + lang_tokenizer_unref(&parent); parent = tokenizer; } if (ret < 0) { if (parent != NULL) - fts_tokenizer_unref(&parent); + lang_tokenizer_unref(&parent); return -1; } *tokenizer_r = tokenizer; @@ -246,7 +246,7 @@ fts_user_language_init_tokenizers(struct mail_user *user, struct fts_user_language * fts_user_language_find(struct mail_user *user, - const struct fts_language *lang) + const struct language *lang) { struct fts_user_language *user_lang; struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user); @@ -260,7 +260,7 @@ fts_user_language_find(struct mail_user *user, static int fts_user_language_create(struct mail_user *user, struct fts_user *fuser, - const struct fts_language *lang, + const struct language *lang, const char **error_r) { struct fts_user_language *user_lang; @@ -280,9 +280,9 @@ static int fts_user_languages_fill_all(struct mail_user *user, struct fts_user *fuser, const char **error_r) { - const struct fts_language *lang; + const struct language *lang; - array_foreach_elem(fts_language_list_get_all(fuser->lang_list), lang) { + array_foreach_elem(language_list_get_all(fuser->lang_list), lang) { if (fts_user_language_create(user, fuser, lang, error_r) < 0) return -1; } @@ -297,13 +297,13 @@ fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser, const char *error; user_lang = p_new(user->pool, struct fts_user_language, 1); - user_lang->lang = &fts_language_data; + user_lang->lang = &language_data; if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0) return -1; - if (fts_filter_create(fts_filter_lowercase, NULL, user_lang->lang, NULL, - &user_lang->filter, &error) < 0) + if (lang_filter_create(lang_filter_lowercase, NULL, user_lang->lang, NULL, + &user_lang->filter, &error) < 0) i_unreached(); i_assert(user_lang->filter != NULL); @@ -315,7 +315,7 @@ fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser, return 0; } -struct fts_language_list *fts_user_get_language_list(struct mail_user *user) +struct language_list *fts_user_get_language_list(struct mail_user *user) { struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user); @@ -355,11 +355,11 @@ bool fts_user_autoindex_exclude(struct mailbox *box) static void fts_user_language_free(struct fts_user_language *user_lang) { if (user_lang->filter != NULL) - fts_filter_unref(&user_lang->filter); + lang_filter_unref(&user_lang->filter); if (user_lang->index_tokenizer != NULL) - fts_tokenizer_unref(&user_lang->index_tokenizer); + lang_tokenizer_unref(&user_lang->index_tokenizer); if (user_lang->search_tokenizer != NULL) - fts_tokenizer_unref(&user_lang->search_tokenizer); + lang_tokenizer_unref(&user_lang->search_tokenizer); } static void fts_user_free(struct fts_user *fuser) @@ -367,7 +367,7 @@ static void fts_user_free(struct fts_user *fuser) struct fts_user_language *user_lang; if (fuser->lang_list != NULL) - fts_language_list_deinit(&fuser->lang_list); + language_list_deinit(&fuser->lang_list); if (array_is_created(&fuser->languages)) { array_foreach_elem(&fuser->languages, user_lang) diff --git a/src/plugins/fts/fts-user.h b/src/plugins/fts/fts-user.h index c4d3d23029..29dee8ab6e 100644 --- a/src/plugins/fts/fts-user.h +++ b/src/plugins/fts/fts-user.h @@ -2,16 +2,16 @@ #define FTS_USER_H struct fts_user_language { - const struct fts_language *lang; - struct fts_filter *filter; - struct fts_tokenizer *index_tokenizer, *search_tokenizer; + const struct language *lang; + struct lang_filter *filter; + struct lang_tokenizer *index_tokenizer, *search_tokenizer; }; ARRAY_DEFINE_TYPE(fts_user_language, struct fts_user_language *); struct fts_user_language * fts_user_language_find(struct mail_user *user, - const struct fts_language *lang); -struct fts_language_list *fts_user_get_language_list(struct mail_user *user); + const struct language *lang); +struct language_list *fts_user_get_language_list(struct mail_user *user); const ARRAY_TYPE(fts_user_language) * fts_user_get_all_languages(struct mail_user *user); struct fts_user_language *fts_user_get_data_lang(struct mail_user *user);