]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
fts: Switch to lang-user
authorMarco Bettini <marco.bettini@open-xchange.com>
Tue, 23 Jan 2024 08:56:03 +0000 (08:56 +0000)
committerAki Tuomi <aki.tuomi@open-xchange.com>
Fri, 17 Jan 2025 08:39:58 +0000 (10:39 +0200)
src/plugins/fts/doveadm-fts.c
src/plugins/fts/fts-build-mail.c
src/plugins/fts/fts-search-args.c
src/plugins/fts/fts-user.c
src/plugins/fts/fts-user.h

index 14c9358c31daa8f50b0125b5393a47e40d83da43..8d93b8711418bf8c273872776edd5b23ae3ae721 100644 (file)
@@ -8,6 +8,7 @@
 #include "mailbox-list-iter.h"
 #include "lang-tokenizer.h"
 #include "lang-filter.h"
+#include "lang-user.h"
 #include "language.h"
 #include "fts-storage.h"
 #include "fts-search-args.h"
@@ -186,7 +187,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx,
 
        struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces);
        struct fts_backend *backend;
-       struct fts_user_language *user_lang;
+       struct language_user *user_lang;
        const struct language *lang = NULL;
        int ret, ret2;
        bool final = FALSE;
@@ -200,7 +201,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx,
 
        if (ctx->language == NULL) {
                struct language_list *lang_list =
-                       fts_user_get_language_list(user);
+                       lang_user_get_language_list(user);
                enum language_detect_result result;
                const char *error;
 
@@ -240,7 +241,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx,
                        return -1;
                }
        }
-       user_lang = fts_user_language_find(user, lang);
+       user_lang = lang_user_language_find(user, lang);
        if (user_lang == NULL) {
                e_error(user->event,
                        "Language not enabled for user: %s", ctx->language);
index a86dca35544aafee3ebd869788a18891c5a46905..ad133a5323eff0faa3da6185b07af72b659b9bef 100644 (file)
@@ -15,6 +15,7 @@
 #include "language.h"
 #include "lang-tokenizer.h"
 #include "lang-filter.h"
+#include "lang-user.h"
 #include "fts-api-private.h"
 #include "fts-build-mail.h"
 
@@ -33,7 +34,7 @@ struct fts_mail_build_context {
        struct fts_parser *body_parser;
 
        buffer_t *word_buf, *pending_input;
-       struct fts_user_language *cur_user_lang;
+       struct language_user *cur_user_lang;
 };
 
 static int fts_build_data(struct fts_mail_build_context *ctx,
@@ -106,7 +107,7 @@ fts_build_unstructured_header(struct fts_mail_build_context *ctx,
 }
 
 static void fts_mail_build_ctx_set_lang(struct fts_mail_build_context *ctx,
-                                       struct fts_user_language *user_lang)
+                                       struct language_user *user_lang)
 {
        i_assert(user_lang != NULL);
 
@@ -130,7 +131,7 @@ fts_build_tokenized_hdr_update_lang(struct fts_mail_build_context *ctx,
                ctx->cur_user_lang = NULL;
        else {
                fts_mail_build_ctx_set_lang(ctx,
-                       fts_user_get_data_lang(ctx->update_ctx->backend->ns->user));
+                       lang_user_get_data_lang(ctx->update_ctx->backend->ns->user));
        }
 }
 
@@ -182,10 +183,10 @@ static int fts_build_mail_header(struct fts_mail_build_context *ctx,
        if ((ctx->update_ctx->backend->flags &
             FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) {
                /* index the header name itself using data-language. */
-               struct fts_user_language *prev_lang = ctx->cur_user_lang;
+               struct language_user *prev_lang = ctx->cur_user_lang;
 
                fts_mail_build_ctx_set_lang(ctx,
-                       fts_user_get_data_lang(ctx->update_ctx->backend->ns->user));
+                       lang_user_get_data_lang(ctx->update_ctx->backend->ns->user));
                key.hdr_name = "";
                if (fts_backend_update_set_build_key(ctx->update_ctx, &key)) {
                        if (fts_build_data(ctx, (const void *)hdr->name,
@@ -299,7 +300,7 @@ fts_detect_language(struct fts_mail_build_context *ctx,
                    const struct language **lang_r)
 {
        struct mail_user *user = ctx->update_ctx->backend->ns->user;
-       struct language_list *lang_list = fts_user_get_language_list(user);
+       struct language_list *lang_list = lang_user_get_language_list(user);
        const struct language *lang;
        const char *error;
 
@@ -348,7 +349,7 @@ fts_build_tokenized(struct fts_mail_build_context *ctx,
                /* wait for more data */
                return 0;
        } else {
-               fts_mail_build_ctx_set_lang(ctx, fts_user_language_find(user, lang));
+               fts_mail_build_ctx_set_lang(ctx, lang_user_language_find(user, lang));
 
                if (ctx->pending_input->used > 0) {
                        if (fts_build_add_tokens_with_filter(ctx,
index 85f0fa3862623d1790d11ae53f9f4fed1493bc26..fd1f8c65a54f7c78330b7d2638f41db03cdc0df2 100644 (file)
@@ -7,6 +7,7 @@
 #include "fts-api-private.h"
 #include "lang-tokenizer.h"
 #include "lang-filter.h"
+#include "lang-user.h"
 #include "fts-user.h"
 #include "fts-search-args.h"
 
@@ -98,7 +99,7 @@ fts_backend_dovecot_expand_tokens(struct lang_filter *filter,
 }
 
 static int
-fts_backend_dovecot_tokenize_lang(struct fts_user_language *user_lang,
+fts_backend_dovecot_tokenize_lang(struct language_user *user_lang,
                                  pool_t pool, struct mail_search_arg *or_arg,
                                  struct mail_search_arg *orig_arg,
                                  const char *orig_token, const char **error_r)
@@ -149,8 +150,8 @@ static int fts_search_arg_expand(struct fts_backend *backend, pool_t pool,
                                 struct mail_search_arg **argp)
 {
        struct event *event = backend->event;
-       const ARRAY_TYPE(fts_user_language) *languages;
-       struct fts_user_language *lang;
+       const ARRAY_TYPE(language_user) *languages;
+       struct language_user *lang;
        struct mail_search_arg *or_arg, *orig_arg = *argp;
        const char *error, *orig_token = orig_arg->value.str;
 
@@ -159,9 +160,9 @@ static int fts_search_arg_expand(struct fts_backend *backend, pool_t pool,
             (*argp)->type == SEARCH_HEADER_COMPRESS_LWSP) &&
            !fts_header_has_language((*argp)->hdr_field_name)) {
                /* use only the data-language */
-               languages = fts_user_get_data_languages(backend->ns->user);
+               languages = lang_user_get_data_languages(backend->ns->user);
        } else {
-               languages = fts_user_get_all_languages(backend->ns->user);
+               languages = lang_user_get_all_languages(backend->ns->user);
        }
 
        /* OR together all the different expansions for different languages.
index cf8bc6e52ea986fcc8f9474a537fa2a7bf6b95ac..e50ba5055a82d419ac0e483480f9c1f5747be082 100644 (file)
@@ -8,6 +8,7 @@
 #include "language.h"
 #include "lang-filter.h"
 #include "lang-tokenizer.h"
+#include "lang-user.h"
 #include "fts-user.h"
 #include "settings.h"
 #include "fts-settings.h"
@@ -21,322 +22,11 @@ struct fts_user {
        union mail_user_module_context module_ctx;
        const struct fts_settings *set;
        int refcount;
-
-       struct language_list *lang_list;
-       struct fts_user_language *data_lang;
-       ARRAY_TYPE(fts_user_language) languages, data_languages;
 };
 
 static MODULE_CONTEXT_DEFINE_INIT(fts_user_module,
                                  &mail_user_module_register);
 
-static const char *const *str_keyvalues_to_array(const char *str)
-{
-       const char *key, *value, *const *keyvalues;
-       ARRAY_TYPE(const_string) arr;
-       unsigned int i;
-
-       if (str == NULL)
-               return NULL;
-
-       t_array_init(&arr, 8);
-       keyvalues = t_strsplit_spaces(str, " ");
-       for (i = 0; keyvalues[i] != NULL; i++) {
-               value = strchr(keyvalues[i], '=');
-               if (value != NULL)
-                       key = t_strdup_until(keyvalues[i], value++);
-               else {
-                       key = keyvalues[i];
-                       value = "";
-               }
-               array_push_back(&arr, &key);
-               array_push_back(&arr, &value);
-       }
-       array_append_zero(&arr);
-       return array_front(&arr);
-}
-
-static int
-fts_user_init_languages(struct mail_user *user, struct fts_user *fuser,
-                       const char **error_r)
-{
-       const char *languages, *unknown;
-
-       languages = mail_user_plugin_getenv(user, "fts_languages");
-       if (languages == NULL) {
-               *error_r = "fts_languages setting is missing";
-               return -1;
-       }
-
-       struct language_settings lang_settings = {
-               .textcat_config_path = mail_user_plugin_getenv(user, "fts_language_config")
-       };
-       fuser->lang_list = language_list_init(&lang_settings);
-
-       if (!language_list_add_names(fuser->lang_list, languages, &unknown)) {
-               *error_r = t_strdup_printf(
-                       "fts_languages: Unknown language '%s'", unknown);
-               return -1;
-       }
-       if (array_count(language_list_get_all(fuser->lang_list)) == 0) {
-               *error_r = "fts_languages setting is empty";
-               return -1;
-       }
-       return 0;
-}
-
-static int
-fts_user_create_filters(struct mail_user *user, const struct language *lang,
-                       struct lang_filter **filter_r, const char **error_r)
-{
-       const struct lang_filter *filter_class;
-       struct lang_filter *filter = NULL, *parent = NULL;
-       const char *filters_key, *const *filters, *filter_set_name;
-       const char *str, *error, *set_key;
-       unsigned int i;
-       int ret = 0;
-
-       /* try to get the language-specific filters first */
-       filters_key = t_strconcat("fts_filters_", lang->name, NULL);
-       str = mail_user_plugin_getenv(user, filters_key);
-       if (str == NULL) {
-               /* fallback to global filters */
-               filters_key = "fts_filters";
-               str = mail_user_plugin_getenv(user, filters_key);
-               if (str == NULL) {
-                       /* No filters */
-                       *filter_r = NULL;
-                       return 0;
-               }
-       }
-
-       filters = t_strsplit_spaces(str, " ");
-       for (i = 0; filters[i] != NULL; i++) {
-               filter_class = lang_filter_find(filters[i]);
-               if (filter_class == NULL) {
-                       *error_r = t_strdup_printf("%s: Unknown filter '%s'",
-                                                  filters_key, filters[i]);
-                       ret = -1;
-                       break;
-               }
-
-               /* try the language-specific setting first */
-               filter_set_name = t_str_replace(filters[i], '-', '_');
-               set_key = t_strdup_printf("fts_filter_%s_%s",
-                                         lang->name, filter_set_name);
-               str = mail_user_plugin_getenv(user, set_key);
-               if (str == NULL) {
-                       set_key = t_strdup_printf("fts_filter_%s", filter_set_name);
-                       str = mail_user_plugin_getenv(user, set_key);
-               }
-
-               if (lang_filter_create(filter_class, parent, lang,
-                                      str_keyvalues_to_array(str),
-                                      &filter, &error) < 0) {
-                       *error_r = t_strdup_printf("%s: %s", set_key, error);
-                       ret = -1;
-                       break;
-               }
-               if (parent != NULL)
-                       lang_filter_unref(&parent);
-               parent = filter;
-       }
-       if (ret < 0) {
-               if (parent != NULL)
-                       lang_filter_unref(&parent);
-               return -1;
-       }
-       *filter_r = filter;
-       return 0;
-}
-
-static int
-fts_user_create_tokenizer(struct mail_user *user,
-                         const struct language *lang,
-                         struct lang_tokenizer **tokenizer_r, bool search,
-                         const char **error_r)
-{
-       const struct lang_tokenizer *tokenizer_class;
-       struct lang_tokenizer *tokenizer = NULL, *parent = NULL;
-       const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name;
-       const char *str, *error, *set_key;
-       unsigned int i;
-       int ret = 0;
-
-       tokenizers_key = t_strconcat("fts_tokenizers_", lang->name, NULL);
-       str = mail_user_plugin_getenv(user, tokenizers_key);
-       if (str == NULL) {
-               str = mail_user_plugin_getenv(user, "fts_tokenizers");
-               if (str == NULL) {
-                       *error_r = t_strdup_printf("%s or fts_tokenizers setting must exist", tokenizers_key);
-                       return -1;
-               }
-               tokenizers_key = "fts_tokenizers";
-       }
-
-       tokenizers = t_strsplit_spaces(str, " ");
-
-       for (i = 0; tokenizers[i] != NULL; i++) {
-               tokenizer_class = lang_tokenizer_find(tokenizers[i]);
-               if (tokenizer_class == NULL) {
-                       *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'",
-                                                  tokenizers_key, tokenizers[i]);
-                       ret = -1;
-                       break;
-               }
-
-               tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_');
-               set_key = t_strdup_printf("fts_tokenizer_%s_%s", tokenizer_set_name, lang->name);
-               str = mail_user_plugin_getenv(user, set_key);
-               if (str == NULL) {
-                       set_key = t_strdup_printf("fts_tokenizer_%s", tokenizer_set_name);
-                       str = mail_user_plugin_getenv(user, set_key);
-               }
-
-               if (lang_tokenizer_create(tokenizer_class, parent,
-                                         str_keyvalues_to_array(str),
-                                         search ? LANG_TOKENIZER_FLAG_SEARCH : 0,
-                                         &tokenizer, &error) < 0) {
-                       *error_r = t_strdup_printf("%s: %s", set_key, error);
-                       ret = -1;
-                       break;
-               }
-               if (parent != NULL)
-                       lang_tokenizer_unref(&parent);
-               parent = tokenizer;
-       }
-       if (ret < 0) {
-               if (parent != NULL)
-                       lang_tokenizer_unref(&parent);
-               return -1;
-       }
-       *tokenizer_r = tokenizer;
-       return 0;
-}
-
-static int
-fts_user_language_init_tokenizers(struct mail_user *user,
-                                 struct fts_user_language *user_lang,
-                                 const char **error_r)
-{
-       int ret;
-       T_BEGIN {
-               ret = fts_user_create_tokenizer(user, user_lang->lang,
-                                               &user_lang->index_tokenizer,
-                                               FALSE, error_r);
-       } T_END_PASS_STR_IF(ret < 0, error_r);
-       if (ret < 0)
-               return -1;
-
-       T_BEGIN {
-               ret = fts_user_create_tokenizer(user, user_lang->lang,
-                                               &user_lang->search_tokenizer,
-                                               TRUE, error_r);
-       } T_END_PASS_STR_IF(ret < 0, error_r);
-       return ret;
-}
-
-struct fts_user_language *
-fts_user_language_find(struct mail_user *user,
-                      const struct language *lang)
-{
-       struct fts_user_language *user_lang;
-       struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user);
-
-       array_foreach_elem(&fuser->languages, user_lang) {
-               if (strcmp(user_lang->lang->name, lang->name) == 0)
-                       return user_lang;
-       }
-       return NULL;
-}
-
-static int fts_user_language_create(struct mail_user *user,
-                                    struct fts_user *fuser,
-                                   const struct language *lang,
-                                   const char **error_r)
-{
-       struct fts_user_language *user_lang;
-
-       user_lang = p_new(user->pool, struct fts_user_language, 1);
-       user_lang->lang = lang;
-       array_push_back(&fuser->languages, &user_lang);
-
-       if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
-               return -1;
-       if (fts_user_create_filters(user, lang, &user_lang->filter, error_r) < 0)
-               return -1;
-       return 0;
-}
-
-static int fts_user_languages_fill_all(struct mail_user *user,
-                                       struct fts_user *fuser,
-                                       const char **error_r)
-{
-       const struct language *lang;
-
-       array_foreach_elem(language_list_get_all(fuser->lang_list), lang) {
-               if (fts_user_language_create(user, fuser, lang, error_r) < 0)
-                       return -1;
-       }
-       return 0;
-}
-
-static int
-fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser,
-                           const char **error_r)
-{
-       struct fts_user_language *user_lang;
-       const char *error;
-
-       user_lang = p_new(user->pool, struct fts_user_language, 1);
-       user_lang->lang = &language_data;
-
-       if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
-               return -1;
-
-       if (lang_filter_create(lang_filter_lowercase, NULL, user_lang->lang, NULL,
-                              &user_lang->filter, &error) < 0)
-               i_unreached();
-       i_assert(user_lang->filter != NULL);
-
-       p_array_init(&fuser->data_languages, user->pool, 1);
-       array_push_back(&fuser->data_languages, &user_lang);
-       array_push_back(&fuser->languages, &user_lang);
-
-       fuser->data_lang = user_lang;
-       return 0;
-}
-
-struct language_list *fts_user_get_language_list(struct mail_user *user)
-{
-       struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user);
-
-       return fuser->lang_list;
-}
-
-const ARRAY_TYPE(fts_user_language) *
-fts_user_get_all_languages(struct mail_user *user)
-{
-       struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user);
-
-       return &fuser->languages;
-}
-
-const ARRAY_TYPE(fts_user_language) *
-fts_user_get_data_languages(struct mail_user *user)
-{
-       struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user);
-
-       return &fuser->data_languages;
-}
-
-struct fts_user_language *fts_user_get_data_lang(struct mail_user *user)
-{
-       struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user);
-
-       return fuser->data_lang;
-}
-
 const struct fts_settings *fts_user_get_settings(struct mail_user *user)
 {
        struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user);
@@ -352,45 +42,6 @@ int fts_user_try_get_settings(struct mail_user *user,
        return 0;
 }
 
-static void fts_user_language_free(struct fts_user_language *user_lang)
-{
-       if (user_lang->filter != NULL)
-               lang_filter_unref(&user_lang->filter);
-       if (user_lang->index_tokenizer != NULL)
-               lang_tokenizer_unref(&user_lang->index_tokenizer);
-       if (user_lang->search_tokenizer != NULL)
-               lang_tokenizer_unref(&user_lang->search_tokenizer);
-}
-
-static void fts_user_free(struct fts_user *fuser)
-{
-       struct fts_user_language *user_lang;
-
-       if (fuser->lang_list != NULL)
-               language_list_deinit(&fuser->lang_list);
-
-       if (array_is_created(&fuser->languages)) {
-               array_foreach_elem(&fuser->languages, user_lang)
-                       fts_user_language_free(user_lang);
-       }
-
-       settings_free(fuser->set);
-}
-
-static int
-fts_mail_user_init_libfts(struct mail_user *user, struct fts_user *fuser,
-                         const char **error_r)
-{
-       p_array_init(&fuser->languages, user->pool, 4);
-
-       if (fts_user_init_languages(user, fuser, error_r) < 0 ||
-           fts_user_init_data_language(user, fuser, error_r) < 0)
-               return -1;
-       if (fts_user_languages_fill_all(user, fuser, error_r) < 0)
-               return -1;
-       return 0;
-}
-
 size_t fts_mail_user_message_max_size(struct mail_user *user)
 {
        struct fts_user *fuser = FTS_USER_CONTEXT_REQUIRE(user);
@@ -412,15 +63,14 @@ int fts_mail_user_init(struct mail_user *user, bool initialize_libfts,
        if (settings_get(user->event, &fts_setting_parser_info, 0, &set, error_r) < 0)
                return -1;
 
+       if (lang_user_init(user, initialize_libfts, error_r) < 0) {
+               settings_free(set);
+               return -1;
+       }
+
        fuser = p_new(user->pool, struct fts_user, 1);
        fuser->set = set;
        fuser->refcount = 1;
-       if (initialize_libfts) {
-               if (fts_mail_user_init_libfts(user, fuser, error_r) < 0) {
-                       fts_user_free(fuser);
-                       return -1;
-               }
-       }
 
        MODULE_CONTEXT_SET(user, fts_user_module, fuser);
        return 0;
@@ -432,7 +82,9 @@ void fts_mail_user_deinit(struct mail_user *user)
 
        if (fuser != NULL) {
                i_assert(fuser->refcount > 0);
-               if (--fuser->refcount == 0)
-                       fts_user_free(fuser);
+               if (--fuser->refcount == 0) {
+                       settings_free(fuser->set);
+                       lang_user_deinit(user);
+               }
        }
 }
index 6fa4ea0c49fee1e18dcb21f1a44f216f658e1ae3..98cf8972395d5d20cb3cf09fd836d53e5f42be58 100644 (file)
@@ -3,23 +3,6 @@
 
 #include "fts-settings.h"
 
-struct fts_user_language {
-       const struct language *lang;
-       struct lang_filter *filter;
-       struct lang_tokenizer *index_tokenizer, *search_tokenizer;
-};
-ARRAY_DEFINE_TYPE(fts_user_language, struct fts_user_language *);
-
-struct fts_user_language *
-fts_user_language_find(struct mail_user *user,
-                       const struct language *lang);
-struct language_list *fts_user_get_language_list(struct mail_user *user);
-const ARRAY_TYPE(fts_user_language) *
-fts_user_get_all_languages(struct mail_user *user);
-struct fts_user_language *fts_user_get_data_lang(struct mail_user *user);
-const ARRAY_TYPE(fts_user_language) *
-fts_user_get_data_languages(struct mail_user *user);
-
 const struct fts_settings *fts_user_get_settings(struct mail_user *user);
 int fts_user_try_get_settings(struct mail_user *user,
                              const struct fts_settings **set_r);