From: Marco Bettini Date: Tue, 23 Jan 2024 08:56:03 +0000 (+0000) Subject: fts: Switch to lang-user X-Git-Tag: 2.4.1~1002 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6202af27c9db798cd96734e27fad900d24b69ae9;p=thirdparty%2Fdovecot%2Fcore.git fts: Switch to lang-user --- diff --git a/src/plugins/fts/doveadm-fts.c b/src/plugins/fts/doveadm-fts.c index 14c9358c31..8d93b87114 100644 --- a/src/plugins/fts/doveadm-fts.c +++ b/src/plugins/fts/doveadm-fts.c @@ -8,6 +8,7 @@ #include "mailbox-list-iter.h" #include "lang-tokenizer.h" #include "lang-filter.h" +#include "lang-user.h" #include "language.h" #include "fts-storage.h" #include "fts-search-args.h" @@ -186,7 +187,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces); struct fts_backend *backend; - struct fts_user_language *user_lang; + struct language_user *user_lang; const struct language *lang = NULL; int ret, ret2; bool final = FALSE; @@ -200,7 +201,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, if (ctx->language == NULL) { struct language_list *lang_list = - fts_user_get_language_list(user); + lang_user_get_language_list(user); enum language_detect_result result; const char *error; @@ -240,7 +241,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, return -1; } } - user_lang = fts_user_language_find(user, lang); + user_lang = lang_user_language_find(user, lang); if (user_lang == NULL) { e_error(user->event, "Language not enabled for user: %s", ctx->language); diff --git a/src/plugins/fts/fts-build-mail.c b/src/plugins/fts/fts-build-mail.c index a86dca3554..ad133a5323 100644 --- a/src/plugins/fts/fts-build-mail.c +++ b/src/plugins/fts/fts-build-mail.c @@ -15,6 +15,7 @@ #include "language.h" #include "lang-tokenizer.h" #include "lang-filter.h" +#include "lang-user.h" #include "fts-api-private.h" #include "fts-build-mail.h" @@ -33,7 +34,7 @@ struct fts_mail_build_context { struct fts_parser *body_parser; buffer_t *word_buf, *pending_input; - struct fts_user_language *cur_user_lang; + struct language_user *cur_user_lang; }; static int fts_build_data(struct fts_mail_build_context *ctx, @@ -106,7 +107,7 @@ fts_build_unstructured_header(struct fts_mail_build_context *ctx, } static void fts_mail_build_ctx_set_lang(struct fts_mail_build_context *ctx, - struct fts_user_language *user_lang) + struct language_user *user_lang) { i_assert(user_lang != NULL); @@ -130,7 +131,7 @@ fts_build_tokenized_hdr_update_lang(struct fts_mail_build_context *ctx, ctx->cur_user_lang = NULL; else { fts_mail_build_ctx_set_lang(ctx, - fts_user_get_data_lang(ctx->update_ctx->backend->ns->user)); + lang_user_get_data_lang(ctx->update_ctx->backend->ns->user)); } } @@ -182,10 +183,10 @@ static int fts_build_mail_header(struct fts_mail_build_context *ctx, if ((ctx->update_ctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) { /* index the header name itself using data-language. */ - struct fts_user_language *prev_lang = ctx->cur_user_lang; + struct language_user *prev_lang = ctx->cur_user_lang; fts_mail_build_ctx_set_lang(ctx, - fts_user_get_data_lang(ctx->update_ctx->backend->ns->user)); + lang_user_get_data_lang(ctx->update_ctx->backend->ns->user)); key.hdr_name = ""; if (fts_backend_update_set_build_key(ctx->update_ctx, &key)) { if (fts_build_data(ctx, (const void *)hdr->name, @@ -299,7 +300,7 @@ fts_detect_language(struct fts_mail_build_context *ctx, const struct language **lang_r) { struct mail_user *user = ctx->update_ctx->backend->ns->user; - struct language_list *lang_list = fts_user_get_language_list(user); + struct language_list *lang_list = lang_user_get_language_list(user); const struct language *lang; const char *error; @@ -348,7 +349,7 @@ fts_build_tokenized(struct fts_mail_build_context *ctx, /* wait for more data */ return 0; } else { - fts_mail_build_ctx_set_lang(ctx, fts_user_language_find(user, lang)); + fts_mail_build_ctx_set_lang(ctx, lang_user_language_find(user, lang)); if (ctx->pending_input->used > 0) { if (fts_build_add_tokens_with_filter(ctx, diff --git a/src/plugins/fts/fts-search-args.c b/src/plugins/fts/fts-search-args.c index 85f0fa3862..fd1f8c65a5 100644 --- a/src/plugins/fts/fts-search-args.c +++ b/src/plugins/fts/fts-search-args.c @@ -7,6 +7,7 @@ #include "fts-api-private.h" #include "lang-tokenizer.h" #include "lang-filter.h" +#include "lang-user.h" #include "fts-user.h" #include "fts-search-args.h" @@ -98,7 +99,7 @@ fts_backend_dovecot_expand_tokens(struct lang_filter *filter, } static int -fts_backend_dovecot_tokenize_lang(struct fts_user_language *user_lang, +fts_backend_dovecot_tokenize_lang(struct language_user *user_lang, pool_t pool, struct mail_search_arg *or_arg, struct mail_search_arg *orig_arg, const char *orig_token, const char **error_r) @@ -149,8 +150,8 @@ static int fts_search_arg_expand(struct fts_backend *backend, pool_t pool, struct mail_search_arg **argp) { struct event *event = backend->event; - const ARRAY_TYPE(fts_user_language) *languages; - struct fts_user_language *lang; + const ARRAY_TYPE(language_user) *languages; + struct language_user *lang; struct mail_search_arg *or_arg, *orig_arg = *argp; const char *error, *orig_token = orig_arg->value.str; @@ -159,9 +160,9 @@ static int fts_search_arg_expand(struct fts_backend *backend, pool_t pool, (*argp)->type == SEARCH_HEADER_COMPRESS_LWSP) && !fts_header_has_language((*argp)->hdr_field_name)) { /* use only the data-language */ - languages = fts_user_get_data_languages(backend->ns->user); + languages = lang_user_get_data_languages(backend->ns->user); } else { - languages = fts_user_get_all_languages(backend->ns->user); + languages = lang_user_get_all_languages(backend->ns->user); } /* OR together all the different expansions for different languages. diff --git a/src/plugins/fts/fts-user.c b/src/plugins/fts/fts-user.c index cf8bc6e52e..e50ba5055a 100644 --- a/src/plugins/fts/fts-user.c +++ b/src/plugins/fts/fts-user.c @@ -8,6 +8,7 @@ #include "language.h" #include "lang-filter.h" #include "lang-tokenizer.h" +#include "lang-user.h" #include "fts-user.h" #include "settings.h" #include "fts-settings.h" @@ -21,322 +22,11 @@ struct fts_user { union mail_user_module_context module_ctx; const struct fts_settings *set; int refcount; - - struct language_list *lang_list; - struct fts_user_language *data_lang; - ARRAY_TYPE(fts_user_language) languages, data_languages; }; static MODULE_CONTEXT_DEFINE_INIT(fts_user_module, &mail_user_module_register); -static const char *const *str_keyvalues_to_array(const char *str) -{ - const char *key, *value, *const *keyvalues; - ARRAY_TYPE(const_string) arr; - unsigned int i; - - if (str == NULL) - return NULL; - - t_array_init(&arr, 8); - keyvalues = t_strsplit_spaces(str, " "); - for (i = 0; keyvalues[i] != NULL; i++) { - value = strchr(keyvalues[i], '='); - if (value != NULL) - key = t_strdup_until(keyvalues[i], value++); - else { - key = keyvalues[i]; - value = ""; - } - array_push_back(&arr, &key); - array_push_back(&arr, &value); - } - array_append_zero(&arr); - return array_front(&arr); -} - -static int -fts_user_init_languages(struct mail_user *user, struct fts_user *fuser, - const char **error_r) -{ - const char *languages, *unknown; - - languages = mail_user_plugin_getenv(user, "fts_languages"); - if (languages == NULL) { - *error_r = "fts_languages setting is missing"; - return -1; - } - - struct language_settings lang_settings = { - .textcat_config_path = mail_user_plugin_getenv(user, "fts_language_config") - }; - fuser->lang_list = language_list_init(&lang_settings); - - if (!language_list_add_names(fuser->lang_list, languages, &unknown)) { - *error_r = t_strdup_printf( - "fts_languages: Unknown language '%s'", unknown); - return -1; - } - if (array_count(language_list_get_all(fuser->lang_list)) == 0) { - *error_r = "fts_languages setting is empty"; - return -1; - } - return 0; -} - -static int -fts_user_create_filters(struct mail_user *user, const struct language *lang, - struct lang_filter **filter_r, const char **error_r) -{ - const struct lang_filter *filter_class; - struct lang_filter *filter = NULL, *parent = NULL; - const char *filters_key, *const *filters, *filter_set_name; - const char *str, *error, *set_key; - unsigned int i; - int ret = 0; - - /* try to get the language-specific filters first */ - filters_key = t_strconcat("fts_filters_", lang->name, NULL); - str = mail_user_plugin_getenv(user, filters_key); - if (str == NULL) { - /* fallback to global filters */ - filters_key = "fts_filters"; - str = mail_user_plugin_getenv(user, filters_key); - if (str == NULL) { - /* No filters */ - *filter_r = NULL; - return 0; - } - } - - filters = t_strsplit_spaces(str, " "); - for (i = 0; filters[i] != NULL; i++) { - filter_class = lang_filter_find(filters[i]); - if (filter_class == NULL) { - *error_r = t_strdup_printf("%s: Unknown filter '%s'", - filters_key, filters[i]); - ret = -1; - break; - } - - /* try the language-specific setting first */ - filter_set_name = t_str_replace(filters[i], '-', '_'); - set_key = t_strdup_printf("fts_filter_%s_%s", - lang->name, filter_set_name); - str = mail_user_plugin_getenv(user, set_key); - if (str == NULL) { - set_key = t_strdup_printf("fts_filter_%s", filter_set_name); - str = mail_user_plugin_getenv(user, set_key); - } - - if (lang_filter_create(filter_class, parent, lang, - str_keyvalues_to_array(str), - &filter, &error) < 0) { - *error_r = t_strdup_printf("%s: %s", set_key, error); - ret = -1; - break; - } - if (parent != NULL) - lang_filter_unref(&parent); - parent = filter; - } - if (ret < 0) { - if (parent != NULL) - lang_filter_unref(&parent); - return -1; - } - *filter_r = filter; - return 0; -} - -static int -fts_user_create_tokenizer(struct mail_user *user, - const struct language *lang, - struct lang_tokenizer **tokenizer_r, bool search, - const char **error_r) -{ - const struct lang_tokenizer *tokenizer_class; - struct lang_tokenizer *tokenizer = NULL, *parent = NULL; - const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name; - const char *str, *error, *set_key; - unsigned int i; - int ret = 0; - - tokenizers_key = t_strconcat("fts_tokenizers_", lang->name, NULL); - str = mail_user_plugin_getenv(user, tokenizers_key); - if (str == NULL) { - str = mail_user_plugin_getenv(user, "fts_tokenizers"); - if (str == NULL) { - *error_r = t_strdup_printf("%s or fts_tokenizers setting must exist", tokenizers_key); - return -1; - } - tokenizers_key = "fts_tokenizers"; - } - - tokenizers = t_strsplit_spaces(str, " "); - - for (i = 0; tokenizers[i] != NULL; i++) { - tokenizer_class = lang_tokenizer_find(tokenizers[i]); - if (tokenizer_class == NULL) { - *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'", - tokenizers_key, tokenizers[i]); - ret = -1; - break; - } - - tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_'); - set_key = t_strdup_printf("fts_tokenizer_%s_%s", tokenizer_set_name, lang->name); - str = mail_user_plugin_getenv(user, set_key); - if (str == NULL) { - set_key = t_strdup_printf("fts_tokenizer_%s", tokenizer_set_name); - str = mail_user_plugin_getenv(user, set_key); - } - - if (lang_tokenizer_create(tokenizer_class, parent, - str_keyvalues_to_array(str), - search ? LANG_TOKENIZER_FLAG_SEARCH : 0, - &tokenizer, &error) < 0) { - *error_r = t_strdup_printf("%s: %s", set_key, error); - ret = -1; - break; - } - if (parent != NULL) - lang_tokenizer_unref(&parent); - parent = tokenizer; - } - if (ret < 0) { - if (parent != NULL) - lang_tokenizer_unref(&parent); - return -1; - } - *tokenizer_r = tokenizer; - return 0; -} - -static int -fts_user_language_init_tokenizers(struct mail_user *user, - struct fts_user_language *user_lang, - const char **error_r) -{ - int ret; - T_BEGIN { - ret = fts_user_create_tokenizer(user, user_lang->lang, - &user_lang->index_tokenizer, - FALSE, error_r); - } T_END_PASS_STR_IF(ret < 0, error_r); - if (ret < 0) - return -1; - - T_BEGIN { - ret = fts_user_create_tokenizer(user, user_lang->lang, - &user_lang->search_tokenizer, - TRUE, error_r); - } T_END_PASS_STR_IF(ret < 0, error_r); - return ret; -} - -struct fts_user_language * -fts_user_language_find(struct mail_user *user, - const struct language *lang) -{ - struct fts_user_language *user_lang; - struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user); - - array_foreach_elem(&fuser->languages, user_lang) { - if (strcmp(user_lang->lang->name, lang->name) == 0) - return user_lang; - } - return NULL; -} - -static int fts_user_language_create(struct mail_user *user, - struct fts_user *fuser, - const struct language *lang, - const char **error_r) -{ - struct fts_user_language *user_lang; - - user_lang = p_new(user->pool, struct fts_user_language, 1); - user_lang->lang = lang; - array_push_back(&fuser->languages, &user_lang); - - if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0) - return -1; - if (fts_user_create_filters(user, lang, &user_lang->filter, error_r) < 0) - return -1; - return 0; -} - -static int fts_user_languages_fill_all(struct mail_user *user, - struct fts_user *fuser, - const char **error_r) -{ - const struct language *lang; - - array_foreach_elem(language_list_get_all(fuser->lang_list), lang) { - if (fts_user_language_create(user, fuser, lang, error_r) < 0) - return -1; - } - return 0; -} - -static int -fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser, - const char **error_r) -{ - struct fts_user_language *user_lang; - const char *error; - - user_lang = p_new(user->pool, struct fts_user_language, 1); - user_lang->lang = &language_data; - - if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0) - return -1; - - if (lang_filter_create(lang_filter_lowercase, NULL, user_lang->lang, NULL, - &user_lang->filter, &error) < 0) - i_unreached(); - i_assert(user_lang->filter != NULL); - - p_array_init(&fuser->data_languages, user->pool, 1); - array_push_back(&fuser->data_languages, &user_lang); - array_push_back(&fuser->languages, &user_lang); - - fuser->data_lang = user_lang; - return 0; -} - -struct language_list *fts_user_get_language_list(struct mail_user *user) -{ - struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user); - - return fuser->lang_list; -} - -const ARRAY_TYPE(fts_user_language) * -fts_user_get_all_languages(struct mail_user *user) -{ - struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user); - - return &fuser->languages; -} - -const ARRAY_TYPE(fts_user_language) * -fts_user_get_data_languages(struct mail_user *user) -{ - struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user); - - return &fuser->data_languages; -} - -struct fts_user_language *fts_user_get_data_lang(struct mail_user *user) -{ - struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user); - - return fuser->data_lang; -} - const struct fts_settings *fts_user_get_settings(struct mail_user *user) { struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user); @@ -352,45 +42,6 @@ int fts_user_try_get_settings(struct mail_user *user, return 0; } -static void fts_user_language_free(struct fts_user_language *user_lang) -{ - if (user_lang->filter != NULL) - lang_filter_unref(&user_lang->filter); - if (user_lang->index_tokenizer != NULL) - lang_tokenizer_unref(&user_lang->index_tokenizer); - if (user_lang->search_tokenizer != NULL) - lang_tokenizer_unref(&user_lang->search_tokenizer); -} - -static void fts_user_free(struct fts_user *fuser) -{ - struct fts_user_language *user_lang; - - if (fuser->lang_list != NULL) - language_list_deinit(&fuser->lang_list); - - if (array_is_created(&fuser->languages)) { - array_foreach_elem(&fuser->languages, user_lang) - fts_user_language_free(user_lang); - } - - settings_free(fuser->set); -} - -static int -fts_mail_user_init_libfts(struct mail_user *user, struct fts_user *fuser, - const char **error_r) -{ - p_array_init(&fuser->languages, user->pool, 4); - - if (fts_user_init_languages(user, fuser, error_r) < 0 || - fts_user_init_data_language(user, fuser, error_r) < 0) - return -1; - if (fts_user_languages_fill_all(user, fuser, error_r) < 0) - return -1; - return 0; -} - size_t fts_mail_user_message_max_size(struct mail_user *user) { struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user); @@ -412,15 +63,14 @@ int fts_mail_user_init(struct mail_user *user, bool initialize_libfts, if (settings_get(user->event, &fts_setting_parser_info, 0, &set, error_r) < 0) return -1; + if (lang_user_init(user, initialize_libfts, error_r) < 0) { + settings_free(set); + return -1; + } + fuser = p_new(user->pool, struct fts_user, 1); fuser->set = set; fuser->refcount = 1; - if (initialize_libfts) { - if (fts_mail_user_init_libfts(user, fuser, error_r) < 0) { - fts_user_free(fuser); - return -1; - } - } MODULE_CONTEXT_SET(user, fts_user_module, fuser); return 0; @@ -432,7 +82,9 @@ void fts_mail_user_deinit(struct mail_user *user) if (fuser != NULL) { i_assert(fuser->refcount > 0); - if (--fuser->refcount == 0) - fts_user_free(fuser); + if (--fuser->refcount == 0) { + settings_free(fuser->set); + lang_user_deinit(user); + } } } diff --git a/src/plugins/fts/fts-user.h b/src/plugins/fts/fts-user.h index 6fa4ea0c49..98cf897239 100644 --- a/src/plugins/fts/fts-user.h +++ b/src/plugins/fts/fts-user.h @@ -3,23 +3,6 @@ #include "fts-settings.h" -struct fts_user_language { - const struct language *lang; - struct lang_filter *filter; - struct lang_tokenizer *index_tokenizer, *search_tokenizer; -}; -ARRAY_DEFINE_TYPE(fts_user_language, struct fts_user_language *); - -struct fts_user_language * -fts_user_language_find(struct mail_user *user, - const struct language *lang); -struct language_list *fts_user_get_language_list(struct mail_user *user); -const ARRAY_TYPE(fts_user_language) * -fts_user_get_all_languages(struct mail_user *user); -struct fts_user_language *fts_user_get_data_lang(struct mail_user *user); -const ARRAY_TYPE(fts_user_language) * -fts_user_get_data_languages(struct mail_user *user); - const struct fts_settings *fts_user_get_settings(struct mail_user *user); int fts_user_try_get_settings(struct mail_user *user, const struct fts_settings **set_r);