From: Marco Bettini Date: Fri, 23 Feb 2024 15:12:15 +0000 (+0000) Subject: lib-language: Pass event with language filter to filter/tokenizer_create X-Git-Tag: 2.4.1~995 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f0fd6535c85ec9cbbdd56a78cd3796ce631bc26b;p=thirdparty%2Fdovecot%2Fcore.git lib-language: Pass event with language filter to filter/tokenizer_create This is required for non-core plugins to be able to load their language nested configurations. --- diff --git a/src/lib-language/lang-filter-contractions.c b/src/lib-language/lang-filter-contractions.c index c33940989e..1d63884a49 100644 --- a/src/lib-language/lang-filter-contractions.c +++ b/src/lib-language/lang-filter-contractions.c @@ -10,6 +10,7 @@ static int lang_filter_contractions_create(const struct lang_settings *set, + struct event *event ATTR_UNUSED, struct lang_filter **filter_r, const char **error_r) { diff --git a/src/lib-language/lang-filter-lowercase.c b/src/lib-language/lang-filter-lowercase.c index 1278d2fe0e..369c543abc 100644 --- a/src/lib-language/lang-filter-lowercase.c +++ b/src/lib-language/lang-filter-lowercase.c @@ -13,6 +13,7 @@ static int lang_filter_lowercase_create(const struct lang_settings *set, + struct event *event ATTR_UNUSED, struct lang_filter **filter_r, const char **error_r ATTR_UNUSED) { diff --git a/src/lib-language/lang-filter-normalizer-icu.c b/src/lib-language/lang-filter-normalizer-icu.c index fde2a0ac4a..3c1d140380 100644 --- a/src/lib-language/lang-filter-normalizer-icu.c +++ b/src/lib-language/lang-filter-normalizer-icu.c @@ -34,6 +34,7 @@ static void lang_filter_normalizer_icu_destroy(struct lang_filter *filter) static int lang_filter_normalizer_icu_create(const struct lang_settings *set, + struct event *event ATTR_UNUSED, struct lang_filter **filter_r, const char **error_r ATTR_UNUSED) { @@ -89,8 +90,8 @@ lang_filter_normalizer_icu_filter(struct lang_filter *filter, const char **token #else static int -lang_filter_normalizer_icu_create(const struct language *lang ATTR_UNUSED, - const char *const *settings ATTR_UNUSED, +lang_filter_normalizer_icu_create(const struct lang_settings *set ATTR_UNUSED, + struct event *event ATTR_UNUSED, struct lang_filter **filter_r ATTR_UNUSED, const char **error_r) { diff --git a/src/lib-language/lang-filter-private.h b/src/lib-language/lang-filter-private.h index dce8ae076d..880a6840f8 100644 --- a/src/lib-language/lang-filter-private.h +++ b/src/lib-language/lang-filter-private.h @@ -16,6 +16,7 @@ struct lang_settings; */ struct lang_filter_vfuncs { int (*create)(const struct lang_settings *set, + struct event *event, struct lang_filter **filter_r, const char **error_r); int (*filter)(struct lang_filter *filter, const char **token, diff --git a/src/lib-language/lang-filter-stemmer-snowball.c b/src/lib-language/lang-filter-stemmer-snowball.c index 5449496d09..8b533261de 100644 --- a/src/lib-language/lang-filter-stemmer-snowball.c +++ b/src/lib-language/lang-filter-stemmer-snowball.c @@ -28,6 +28,7 @@ static void lang_filter_stemmer_snowball_destroy(struct lang_filter *filter) static int lang_filter_stemmer_snowball_create(const struct lang_settings *set, + struct event *event ATTR_UNUSED, struct lang_filter **filter_r, const char **error_r ATTR_UNUSED) { @@ -101,6 +102,7 @@ lang_filter_stemmer_snowball_filter(struct lang_filter *filter, static int lang_filter_stemmer_snowball_create(const struct lang_settings *set ATTR_UNUSED, + struct event *event ATTR_UNUSED, struct lang_filter **filter_r ATTR_UNUSED, const char **error_r) { diff --git a/src/lib-language/lang-filter-stopwords.c b/src/lib-language/lang-filter-stopwords.c index 852f3c13da..8810014063 100644 --- a/src/lib-language/lang-filter-stopwords.c +++ b/src/lib-language/lang-filter-stopwords.c @@ -72,6 +72,7 @@ static void lang_filter_stopwords_destroy(struct lang_filter *filter) static int lang_filter_stopwords_create(const struct lang_settings *set, + struct event *event ATTR_UNUSED, struct lang_filter **filter_r, const char **error_r ATTR_UNUSED) { diff --git a/src/lib-language/lang-filter.c b/src/lib-language/lang-filter.c index 9b33a8fba3..bdb53f7a16 100644 --- a/src/lib-language/lang-filter.c +++ b/src/lib-language/lang-filter.c @@ -53,12 +53,13 @@ const struct lang_filter *lang_filter_find(const char *name) int lang_filter_create(const struct lang_filter *filter_class, struct lang_filter *parent, const struct lang_settings *set, + struct event *event, struct lang_filter **filter_r, const char **error_r) { struct lang_filter *fp; if (filter_class->v.create != NULL) { - if (filter_class->v.create(set, &fp, error_r) < 0) { + if (filter_class->v.create(set, event, &fp, error_r) < 0) { *filter_r = NULL; return -1; } diff --git a/src/lib-language/lang-filter.h b/src/lib-language/lang-filter.h index d6f2fbc94c..7c04df6ee5 100644 --- a/src/lib-language/lang-filter.h +++ b/src/lib-language/lang-filter.h @@ -57,6 +57,7 @@ const struct lang_filter *lang_filter_find(const char *name); int lang_filter_create(const struct lang_filter *filter_class, struct lang_filter *parent, const struct lang_settings *set, + struct event *event, struct lang_filter **filter_r, const char **error_r); void lang_filter_ref(struct lang_filter *filter); diff --git a/src/lib-language/lang-tokenizer-address.c b/src/lib-language/lang-tokenizer-address.c index fa191d3af3..acb6e9990c 100644 --- a/src/lib-language/lang-tokenizer-address.c +++ b/src/lib-language/lang-tokenizer-address.c @@ -30,6 +30,7 @@ struct email_address_lang_tokenizer { static int lang_tokenizer_email_address_create(const struct lang_settings *set, + struct event *event ATTR_UNUSED, enum lang_tokenizer_flags flags, struct lang_tokenizer **tokenizer_r, const char **error_r ATTR_UNUSED) diff --git a/src/lib-language/lang-tokenizer-generic.c b/src/lib-language/lang-tokenizer-generic.c index 2a7cf6d7ff..2514be5cfb 100644 --- a/src/lib-language/lang-tokenizer-generic.c +++ b/src/lib-language/lang-tokenizer-generic.c @@ -53,6 +53,7 @@ static const struct algorithm *parse_algorithm(const char *name) static int lang_tokenizer_generic_create(const struct lang_settings *set, + struct event *event ATTR_UNUSED, enum lang_tokenizer_flags flags, struct lang_tokenizer **tokenizer_r, const char **error_r) diff --git a/src/lib-language/lang-tokenizer-private.h b/src/lib-language/lang-tokenizer-private.h index 1154613439..c38b1e424a 100644 --- a/src/lib-language/lang-tokenizer-private.h +++ b/src/lib-language/lang-tokenizer-private.h @@ -7,6 +7,7 @@ struct lang_tokenizer_vfuncs { int (*create)(const struct lang_settings *set, + struct event *event, enum lang_tokenizer_flags flags, struct lang_tokenizer **tokenizer_r, const char **error_r); diff --git a/src/lib-language/lang-tokenizer.c b/src/lib-language/lang-tokenizer.c index e2df851ff3..98b017e446 100644 --- a/src/lib-language/lang-tokenizer.c +++ b/src/lib-language/lang-tokenizer.c @@ -77,12 +77,13 @@ static void lang_tokenizer_self_reset(struct lang_tokenizer *tok) int lang_tokenizer_create(const struct lang_tokenizer *tok_class, struct lang_tokenizer *parent, const struct lang_settings *set, + struct event *event, enum lang_tokenizer_flags flags, struct lang_tokenizer **tokenizer_r, const char **error_r) { struct lang_tokenizer *tok; - if (tok_class->v->create(set, flags, &tok, error_r) < 0) { + if (tok_class->v->create(set, event, flags, &tok, error_r) < 0) { *tokenizer_r = NULL; return -1; } diff --git a/src/lib-language/lang-tokenizer.h b/src/lib-language/lang-tokenizer.h index bae02c9eb6..92e1f8a730 100644 --- a/src/lib-language/lang-tokenizer.h +++ b/src/lib-language/lang-tokenizer.h @@ -56,6 +56,7 @@ const struct lang_tokenizer *lang_tokenizer_find(const char *name); int lang_tokenizer_create(const struct lang_tokenizer *tok_class, struct lang_tokenizer *parent, const struct lang_settings *set, + struct event *event, enum lang_tokenizer_flags flags, struct lang_tokenizer **tokenizer_r, const char **error_r); diff --git a/src/lib-language/test-lang-filter.c b/src/lib-language/test-lang-filter.c index a9daf45eba..9fc9de8169 100644 --- a/src/lib-language/test-lang-filter.c +++ b/src/lib-language/test-lang-filter.c @@ -20,6 +20,9 @@ #define LANG_NO "no" #define LANG_SV "sv" +/* core filters don't use the event in lang_filter_create() */ +static struct event *const event = NULL; + static struct lang_settings stopword_settings; static void init_lang_settings(void) { @@ -54,7 +57,7 @@ static void test_lang_filter_contractions_fail(void) const char *error; test_begin("lang filter contractions, unsupported language"); - test_assert(lang_filter_create(lang_filter_contractions, NULL, make_settings(LANG_EN, NULL), &filter, &error) != 0); + test_assert(lang_filter_create(lang_filter_contractions, NULL, make_settings(LANG_EN, NULL), event, &filter, &error) != 0); test_assert(error != NULL); test_end(); } @@ -92,7 +95,7 @@ static void test_lang_filter_contractions_fr(void) int ret; test_begin("lang filter contractions, French"); - test_assert(lang_filter_create(lang_filter_contractions, NULL, make_settings(LANG_FR, NULL), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_contractions, NULL, make_settings(LANG_FR, NULL), event, &filter, &error) == 0); for (i = 0; i < N_ELEMENTS(tests); i++) { token = tests[i].input; @@ -123,7 +126,7 @@ static void test_lang_filter_lowercase(void) unsigned int i; test_begin("lang filter lowercase"); - test_assert(lang_filter_create(lang_filter_lowercase, NULL, make_settings(LANG_EN, NULL), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_lowercase, NULL, make_settings(LANG_EN, NULL), event, &filter, &error) == 0); for (i = 0; i < N_ELEMENTS(tests); i++) { token = tests[i].input; @@ -151,7 +154,7 @@ static void test_lang_filter_lowercase_utf8(void) unsigned int i; test_begin("lang filter lowercase, UTF8"); - test_assert(lang_filter_create(lang_filter_lowercase, NULL, make_settings(LANG_EN, NULL), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_lowercase, NULL, make_settings(LANG_EN, NULL), event, &filter, &error) == 0); for (i = 0; i < N_ELEMENTS(tests); i++) { token = tests[i].input; @@ -181,7 +184,7 @@ static void test_lang_filter_lowercase_too_long_utf8(void) unsigned int i; test_begin("lang filter lowercase, too long UTF8"); - test_assert(lang_filter_create(lang_filter_lowercase, NULL, make_settings(LANG_EN, &set), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_lowercase, NULL, make_settings(LANG_EN, &set), event, &filter, &error) == 0); for (i = 0; i < N_ELEMENTS(tests); i++) { token = tests[i].input; @@ -208,7 +211,7 @@ static void test_lang_filter_stopwords_eng(void) const char *token; test_begin("lang filter stopwords, English"); - test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_EN, &stopword_settings), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_EN, &stopword_settings), event, &filter, &error) == 0); ip = input; op = output; @@ -248,7 +251,7 @@ static void test_lang_filter_stopwords_fin(void) const char *token; test_begin("lang filter stopwords, Finnish"); - test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_FI, &stopword_settings), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_FI, &stopword_settings), event, &filter, &error) == 0); ip = input; op = output; @@ -269,7 +272,7 @@ static void test_lang_filter_stopwords_fin(void) lang_filter_unref(&filter); test_assert(filter == NULL); - test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_FI, &stopword_settings), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_FI, &stopword_settings), event, &filter, &error) == 0); ip = input2; op = output2; while (*ip != NULL) { @@ -308,7 +311,7 @@ static void test_lang_filter_stopwords_fra(void) const char *token; test_begin("lang filter stopwords, French"); - test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_FR, &stopword_settings), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_FR, &stopword_settings), event, &filter, &error) == 0); ip = input; op = output; @@ -356,7 +359,7 @@ static void test_lang_filter_stopwords_no(void) const char *token; test_begin("lang filter stopwords, Norwegian"); - test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_NO, &stopword_settings), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_NO, &stopword_settings), event, &filter, &error) == 0); ip = input; op = output; @@ -385,7 +388,7 @@ static void test_lang_filter_stopwords_fail_lazy_init(void) const char *error = NULL, *token = "foobar"; test_begin("lang filter stopwords, fail filter() (lazy init)"); - test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(UNKNOWN, &stopword_settings), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(UNKNOWN, &stopword_settings), event, &filter, &error) == 0); test_assert(filter != NULL && error == NULL); test_assert(lang_filter(filter, &token, &error) < 0 && error != NULL); lang_filter_unref(&filter); @@ -399,7 +402,7 @@ static void test_lang_filter_stopwords_malformed(void) const char *error = NULL, *token = "foobar"; test_begin("lang filter stopwords, malformed list"); - test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(MALFORMED, &stopword_settings), &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(MALFORMED, &stopword_settings), event, &filter, &error) == 0); test_assert(lang_filter(filter, &token, &error) < 0); test_assert(strstr(error, "seems empty. Is the file correctly formatted?") != NULL); test_expect_no_more_errors(); @@ -430,7 +433,7 @@ static void test_lang_filter_stemmer_snowball_stem_english(void) const char * const *bpp; test_begin("lang filter stem English"); - test_assert(lang_filter_create(lang_filter_stemmer_snowball, NULL, make_settings(LANG_EN, NULL), &stemmer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, NULL, make_settings(LANG_EN, NULL), event, &stemmer, &error) == 0); bpp = bases; for (tpp=tokens; *tpp != NULL; tpp++) { token = *tpp; @@ -460,7 +463,7 @@ static void test_lang_filter_stemmer_snowball_stem_french(void) const char * const *bpp; test_begin("lang filter stem French"); - test_assert(lang_filter_create(lang_filter_stemmer_snowball, NULL, make_settings(LANG_FR, NULL), &stemmer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, NULL, make_settings(LANG_FR, NULL), event, &stemmer, &error) == 0); bpp = bases; for (tpp=tokens; *tpp != NULL; tpp++) { token = *tpp; @@ -498,8 +501,8 @@ static void test_lang_filter_stopwords_stemmer_eng(void) test_begin("lang filters stopwords and stemming chained, English"); - test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_EN, &stopword_settings), &filter, &error) == 0); - test_assert(lang_filter_create(lang_filter_stemmer_snowball, filter, make_settings(LANG_EN, NULL), &stemmer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_EN, &stopword_settings), event, &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, filter, make_settings(LANG_EN, NULL), event, &stemmer, &error) == 0); bpp = bases; for (tpp=tokens; *tpp != NULL; tpp++) { @@ -547,7 +550,7 @@ static void test_lang_filter_normalizer_swedish_short(void) test_begin("lang filter normalizer Swedish short text"); - test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), event, &norm, &error) == 0); for (i = 0; i < N_ELEMENTS(input); i++) { token = input[i]; test_assert_idx(lang_filter(norm, &token, &error) == 1, i); @@ -580,7 +583,7 @@ static void test_lang_filter_normalizer_swedish_short_default_id(void) test_begin("lang filter normalizer Swedish short text using default ID"); - test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, NULL), &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, NULL), event, &norm, &error) == 0); for (i = 0; i < N_ELEMENTS(input); i++) { token = input[i]; test_assert_idx(lang_filter(norm, &token, &error) == 1, i); @@ -618,7 +621,7 @@ static void test_lang_filter_normalizer_french(void) test_begin("lang filter normalizer French UDHR"); udhr_path = t_strconcat(UDHRDIR, UDHR_FRA_NAME, NULL); - test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), event, &norm, &error) == 0); input = fopen(udhr_path, "r"); test_assert(input != NULL); sha512_init(&ctx); @@ -654,7 +657,7 @@ static void test_lang_filter_normalizer_empty(void) unsigned int i; test_begin("lang filter normalizer empty tokens"); - test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), event, &norm, &error) == 0); for (i = 0; i < N_ELEMENTS(empty_tokens); i++) { const char *token = empty_tokens[i]; test_assert_idx(lang_filter(norm, &token, &error) == 0, i); @@ -674,7 +677,7 @@ static void test_lang_filter_normalizer_baddata(void) test_begin("lang filter normalizer bad data"); - test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), event, &norm, &error) == 0); str = t_str_new(128); for (i = 1; i < 0x1ffff; i++) { if (!uni_is_valid_ucs4(i)) continue; @@ -703,7 +706,7 @@ static void test_lang_filter_normalizer_invalid_id(void) const char *error = NULL, *token = "foo"; test_begin("lang filter normalizer invalid id"); - test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), event, &norm, &error) == 0); test_assert(error == NULL); test_assert(lang_filter(norm, &token, &error) < 0 && error != NULL); lang_filter_unref(&norm); @@ -735,7 +738,7 @@ static void test_lang_filter_normalizer_oversized(void) "\x9c\xe5\xa6\xae\xe9\x93\x91\xe8\x87\xa1"; test_begin("lang filter normalizer over-sized token"); - test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), event, &norm, &error) == 0); test_assert(error == NULL); test_assert(lang_filter(norm, &token, &error) >= 0); test_assert(strlen(token) <= 250); @@ -753,7 +756,7 @@ static void test_lang_filter_normalizer_truncation(void) const char *token = "abcdefghi\xC3\x85"; test_begin("lang filter normalizer token truncated mid letter"); - test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), event, &norm, &error) == 0); test_assert(error == NULL); test_assert(lang_filter(norm, &token, &error) >= 0); test_assert(strcmp(token, "abcdefghi") == 0); @@ -791,9 +794,9 @@ static void test_lang_filter_normalizer_stopwords_stemmer_eng(void) test_begin("lang filters normalizer, stopwords and stemming chained, English"); - test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), &normalizer, &error) == 0); - test_assert(lang_filter_create(lang_filter_stopwords, normalizer, make_settings(LANG_EN, &stopword_settings), &filter, &error) == 0); - test_assert(lang_filter_create(lang_filter_stemmer_snowball, filter, make_settings(LANG_EN, NULL), &stemmer, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, NULL, make_settings(NULL, &set), event, &normalizer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, normalizer, make_settings(LANG_EN, &stopword_settings), event, &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, filter, make_settings(LANG_EN, NULL), event, &stemmer, &error) == 0); bpp = bases; for (tpp = tokens; *tpp != NULL; tpp++) { @@ -853,9 +856,9 @@ static void test_lang_filter_stopwords_normalizer_stemmer_no(void) test_begin("lang filters with stopwords, default normalizer and stemming chained, Norwegian"); - test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_NO, &stopword_settings), &filter, &error) == 0); - test_assert(lang_filter_create(lang_filter_normalizer_icu, filter, make_settings(NULL, NULL), &normalizer, &error) == 0); - test_assert(lang_filter_create(lang_filter_stemmer_snowball, normalizer, make_settings(LANG_NO, NULL), &stemmer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_NO, &stopword_settings), event, &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, filter, make_settings(NULL, NULL), event, &normalizer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, normalizer, make_settings(LANG_NO, NULL), event, &stemmer, &error) == 0); bpp = bases; for (tpp = tokens; *tpp != NULL; tpp++) { @@ -904,9 +907,9 @@ static void test_lang_filter_stopwords_normalizer_stemmer_sv(void) test_begin("lang filters with stopwords, default normalizer and stemming chained, Swedish"); - test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_SV, &stopword_settings), &filter, &error) == 0); - test_assert(lang_filter_create(lang_filter_normalizer_icu, filter, make_settings(NULL, NULL), &normalizer, &error) == 0); - test_assert(lang_filter_create(lang_filter_stemmer_snowball, normalizer, make_settings(LANG_SV, NULL), &stemmer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stopwords, NULL, make_settings(LANG_SV, &stopword_settings), event, &filter, &error) == 0); + test_assert(lang_filter_create(lang_filter_normalizer_icu, filter, make_settings(NULL, NULL), event, &normalizer, &error) == 0); + test_assert(lang_filter_create(lang_filter_stemmer_snowball, normalizer, make_settings(LANG_SV, NULL), event, &stemmer, &error) == 0); bpp = bases; for (tpp = tokens; *tpp != NULL; tpp++) { @@ -975,7 +978,7 @@ static void test_lang_filter_english_possessive(void) test_begin("lang filter english possessive"); - test_assert(lang_filter_create(lang_filter_english_possessive, NULL, make_settings(NULL, NULL), &norm, &error) == 0); + test_assert(lang_filter_create(lang_filter_english_possessive, NULL, make_settings(NULL, NULL), event, &norm, &error) == 0); for (i = 0; i < N_ELEMENTS(input); i++) { token = input[i]; test_assert_idx(lang_filter(norm, &token, &error) == 1, i); diff --git a/src/lib-language/test-lang-tokenizer.c b/src/lib-language/test-lang-tokenizer.c index 690f90cb42..da9d61b5c8 100644 --- a/src/lib-language/test-lang-tokenizer.c +++ b/src/lib-language/test-lang-tokenizer.c @@ -10,6 +10,9 @@ #include "lang-tokenizer-generic-private.h" #include "lang-settings.h" +/* core filters don't use the event in lang_filter_create() */ +static struct event *const event = NULL; + static struct lang_settings simple_settings; static struct lang_settings tr29_settings; static struct lang_settings tr29_wb5a_settings; @@ -210,7 +213,7 @@ static void test_lang_tokenizer_generic_only(void) const char *error; test_begin("lang tokenizer generic simple"); - test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &lang_default_settings, 0, &tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &lang_default_settings, event, 0, &tok, &error) == 0); test_assert(((struct generic_lang_tokenizer *) tok)->algorithm == BOUNDARY_ALGORITHM_SIMPLE); test_tokenizer_inputs(tok, test_inputs, N_ELEMENTS(test_inputs), expected_output); @@ -266,7 +269,7 @@ static void test_lang_tokenizer_generic_tr29_only(void) const char *error; test_begin("lang tokenizer generic TR29"); - test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &tr29_settings, 0, &tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &tr29_settings, event, 0, &tok, &error) == 0); test_tokenizer_inputs(tok, test_inputs, N_ELEMENTS(test_inputs), expected_output); lang_tokenizer_unref(&tok); test_end(); @@ -321,7 +324,7 @@ static void test_lang_tokenizer_generic_tr29_wb5a(void) const char *error; test_begin("lang tokenizer generic TR29 with WB5a"); - test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &tr29_wb5a_settings, 0, &tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &tr29_wb5a_settings, event, 0, &tok, &error) == 0); test_tokenizer_inputs(tok, test_inputs, N_ELEMENTS(test_inputs), expected_output); lang_tokenizer_unref(&tok); test_end(); @@ -344,7 +347,7 @@ static void test_lang_tokenizer_address_only(void) const char *error; test_begin("lang tokenizer email address only"); - test_assert(lang_tokenizer_create(lang_tokenizer_email_address, NULL, &lang_default_settings, 0, &tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_email_address, NULL, &lang_default_settings, event, 0, &tok, &error) == 0); test_tokenizer_inputoutput(tok, input, expected_output, 0); lang_tokenizer_unref(&tok); test_end(); @@ -380,8 +383,8 @@ static void test_lang_tokenizer_address_parent(const char *name, struct lang_set const char *error; test_begin(t_strdup_printf("lang tokenizer email address + parent %s", name)); - test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, set, flags, &gen_tok, &error) == 0); - test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, &lang_default_settings, 0, &tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, set, event, flags, &gen_tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, &lang_default_settings, event, 0, &tok, &error) == 0); test_tokenizer_inputoutput(tok, input, expected_output, 0); lang_tokenizer_unref(&tok); lang_tokenizer_unref(&gen_tok); @@ -428,8 +431,8 @@ static void test_lang_tokenizer_address_search(void) const char *token, *error; test_begin("lang tokenizer search email address + parent"); - test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &lang_default_settings, 0, &gen_tok, &error) == 0); - test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, &lang_default_settings, LANG_TOKENIZER_FLAG_SEARCH, &tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &lang_default_settings, event, 0, &gen_tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, &lang_default_settings, event, LANG_TOKENIZER_FLAG_SEARCH, &tok, &error) == 0); test_tokenizer_inputoutput(tok, input, expected_output, 0); /* make sure state is forgotten at EOF */ @@ -499,7 +502,7 @@ static void test_lang_tokenizer_address_maxlen(void) const char *token, *error; test_begin("lang tokenizer address maxlen"); - test_assert(lang_tokenizer_create(lang_tokenizer_email_address, NULL, &set, 0, &tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_email_address, NULL, &set, event, 0, &tok, &error) == 0); while (lang_tokenizer_next(tok, (const unsigned char *)input, strlen(input), &token, &error) > 0) ; @@ -525,8 +528,8 @@ static void test_lang_tokenizer_random(void) const char *token, *error; test_begin("lang tokenizer random"); - test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &set, 0, &gen_tok, &error) == 0); - test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, &email_set, 0, &tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &set, event, 0, &gen_tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_email_address, gen_tok, &email_set, event, 0, &tok, &error) == 0); for (i = 0; i < 10000; i++) T_BEGIN { for (unsigned int j = 0; j < sizeof(addr); j++) @@ -589,7 +592,7 @@ test_lang_tokenizer_explicit_prefix(void) const char *error; test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, - &set, flags, &tok, &error) == 0); + &set, event, flags, &tok, &error) == 0); test_tokenizer_inputs( tok, &input, 1, (search!=0) && (explicitprefix!=0) @@ -660,7 +663,7 @@ static void test_lang_tokenizer_skip_base64(void) }; test_begin("lang tokenizer skip base64"); - test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &tr29_settings, 0, &tok, &error) == 0); + test_assert(lang_tokenizer_create(lang_tokenizer_generic, NULL, &tr29_settings, event, 0, &tok, &error) == 0); size_t index = 0; while (lang_tokenizer_next(tok, (const unsigned char *) input, strlen(input), &token, &error) > 0) { diff --git a/src/lib-storage/lang-user.c b/src/lib-storage/lang-user.c index 507fb8bea7..b37f8866bc 100644 --- a/src/lib-storage/lang-user.c +++ b/src/lib-storage/lang-user.c @@ -98,8 +98,12 @@ lang_user_create_filters(struct mail_user *user, const struct language *lang, } const char *error; - if (lang_filter_create(entry_class, parent, set, - &filter, &error) < 0) { + struct event *event = event_create(user->event); + event_add_str(event, "language", lang->name); + ret = lang_filter_create(entry_class, parent, set, event, + &filter, &error); + event_unref(&event); + if (ret < 0) { *error_r = t_strdup_printf( "%s:%s %s", set->name, entry_name, error); ret = -1; @@ -119,8 +123,7 @@ lang_user_create_filters(struct mail_user *user, const struct language *lang, } static int -lang_user_create_tokenizer(struct mail_user *user, - const struct language *lang, +lang_user_create_tokenizer(struct mail_user *user, const struct language *lang, struct lang_tokenizer **tokenizer_r, bool search, const char **error_r) { @@ -147,9 +150,13 @@ lang_user_create_tokenizer(struct mail_user *user, } const char *error; - if (lang_tokenizer_create(entry_class, parent, set, - search ? LANG_TOKENIZER_FLAG_SEARCH : 0, - &tokenizer, &error) < 0) { + struct event *event = event_create(user->event); + event_add_str(event, "language", set->name); + ret = lang_tokenizer_create(entry_class, parent, set, event, + search ? LANG_TOKENIZER_FLAG_SEARCH : 0, + &tokenizer, &error); + event_unref(&event); + if (ret < 0) { *error_r = t_strdup_printf( "%s:%s %s", set->name, entry_name, error); ret = -1; @@ -248,10 +255,12 @@ lang_user_init_data_language(struct mail_user *user, struct lang_user *luser, if (lang_user_language_init_tokenizers(user, user_lang, error_r) < 0) return -1; - if (lang_filter_create(lang_filter_lowercase, NULL, set, + struct event *event = event_create(user->event); + event_add_str(event, "language", language_data.name); + if (lang_filter_create(lang_filter_lowercase, NULL, set, event, &user_lang->filter, &error) < 0) i_unreached(); - i_assert(user_lang->filter != NULL); + event_unref(&event); p_array_init(&luser->data_languages, user->pool, 1); array_push_back(&luser->data_languages, &user_lang);