]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-language: Add language config-rewrite settings
authorMarco Bettini <marco.bettini@open-xchange.com>
Wed, 24 Jan 2024 08:21:12 +0000 (08:21 +0000)
committerAki Tuomi <aki.tuomi@open-xchange.com>
Fri, 17 Jan 2025 08:39:58 +0000 (10:39 +0200)
src/lib-language/lang-settings.c
src/lib-language/lang-settings.h
src/lib-storage/lang-user.c

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..7e21d69efe8f45d71c6edb4877a3c23226985746 100644 (file)
@@ -0,0 +1,136 @@
+/* Copyright (c) 2023 Dovecot Oy, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "settings.h"
+#include "settings-parser.h"
+#include "lang-settings.h"
+
+/* <settings checks> */
+static bool langs_settings_ext_check(struct event *event, void *_set,
+                                    pool_t pool, const char **error_r);
+/* </settings checks> */
+
+#undef DEF
+#define DEF(_type, name) SETTING_DEFINE_STRUCT_##_type( \
+       "language_"#name, name, struct lang_settings)
+
+static const struct setting_define lang_setting_defines[] = {
+       DEF(STR, name),
+       DEF(BOOLLIST, filters),
+       DEF(UINT, filter_lowercase_token_maxlen),
+       DEF(STR,  filter_normalizer_icu_id),
+       DEF(UINT, filter_normalizer_token_maxlen),
+       DEF(STR,  filter_stopwords_dir),
+       DEF(BOOLLIST, tokenizers),
+       DEF(UINT, tokenizer_address_token_maxlen),
+       DEF(STR,  tokenizer_generic_algorithm),
+       DEF(BOOL, tokenizer_generic_explicit_prefix),
+       DEF(UINT, tokenizer_generic_token_maxlen),
+       DEF(BOOL, tokenizer_generic_wb5a),
+       SETTING_DEFINE_LIST_END
+};
+
+const struct lang_settings lang_default_settings = {
+       .name = "",
+       .filters = ARRAY_INIT,
+       .filter_lowercase_token_maxlen = 250,
+       .filter_normalizer_token_maxlen = 250,
+       .filter_normalizer_icu_id = "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC; [\\x20] Remove",
+       .filter_stopwords_dir = DATADIR"/stopwords",
+       .tokenizers = ARRAY_INIT,
+       .tokenizer_address_token_maxlen = 250,
+       .tokenizer_generic_algorithm = "simple",
+       .tokenizer_generic_explicit_prefix = FALSE,
+       .tokenizer_generic_token_maxlen = 30,
+       .tokenizer_generic_wb5a = FALSE,
+};
+
+const struct setting_parser_info lang_setting_parser_info = {
+       .name = "language",
+
+       .defines = lang_setting_defines,
+       .defaults = &lang_default_settings,
+
+       .struct_size = sizeof(struct lang_settings),
+       .pool_offset1 = 1 + offsetof(struct lang_settings, pool),
+};
+
+#undef DEF
+#define DEF(_type, name) SETTING_DEFINE_STRUCT_##_type( \
+       #name, name, struct langs_settings)
+
+static const struct setting_define langs_setting_defines[] = {
+       { .type = SET_FILTER_ARRAY, .key = "language",
+         .offset = offsetof(struct langs_settings, languages),
+         .filter_array_field_name = "name", },
+       DEF(STR, textcat_config_path),
+       SETTING_DEFINE_LIST_END
+};
+
+static const struct langs_settings langs_default_settings = {
+       .textcat_config_path = "",
+};
+
+const struct setting_parser_info langs_setting_parser_info = {
+       .name = "languages",
+
+       .defines = langs_setting_defines,
+       .defaults = &langs_default_settings,
+       .ext_check_func = langs_settings_ext_check,
+
+       .struct_size = sizeof(struct langs_settings),
+       .pool_offset1 = 1 + offsetof(struct langs_settings, pool),
+};
+
+/* <settings checks> */
+
+static bool langs_settings_ext_check(struct event *event, void *_set,
+                                    pool_t pool, const char **error_r)
+{
+       struct langs_settings *set = _set;
+       if (array_is_empty(&set->languages)) {
+#ifdef CONFIG_BINARY
+               return TRUE;
+#else
+               *error_r = "No language { .. } defined";
+               return FALSE;
+#endif
+       }
+
+       const char *filter_name;
+       unsigned int nondata_languages = 0;
+       p_array_init(&set->parsed_languages, pool, array_count(&set->languages));
+       array_foreach_elem(&set->languages, filter_name) {
+               const struct lang_settings *lang_set;
+               const char *error;
+
+               if (settings_get_filter(event, "language", filter_name,
+                                       &lang_setting_parser_info, 0,
+                                       &lang_set, &error) < 0) {
+                       *error_r = t_strdup_printf(
+                               "Failed to get language %s: %s",
+                               filter_name, error);
+                       return FALSE;
+               }
+
+               bool is_data = strcmp(lang_set->name, LANGUAGE_DATA) == 0;
+               if (!is_data)
+                       nondata_languages++;
+
+               struct lang_settings *lang_set_dup =
+                       p_memdup(pool, lang_set, sizeof(*lang_set));
+               pool_add_external_ref(pool, lang_set->pool);
+               array_push_back(&set->parsed_languages, &lang_set_dup);
+               settings_free(lang_set);
+       }
+
+       if (nondata_languages == 0) {
+               *error_r = "No valid languages";
+               return FALSE;
+       }
+
+       return TRUE;
+}
+
+/* </settings checks> */
index 41c56f5875e241ec40366c07f3a2bd39d01d46b8..968631c6f39c1c8ac0704a6c3ef33ef5859ee21f 100644 (file)
@@ -3,15 +3,36 @@
 
 #include "array.h"
 
+/* <settings checks> */
+#define LANGUAGE_DATA "data"
+/* </settings checks> */
+
 ARRAY_DEFINE_TYPE(lang_settings, struct lang_settings *);
 struct lang_settings {
        pool_t pool;
+       const char *name;
+       const char *filter_normalizer_icu_id;
+       const char *filter_stopwords_dir;
+       const char *tokenizer_generic_algorithm;
+       ARRAY_TYPE(const_string) filters;
+       ARRAY_TYPE(const_string) tokenizers;
+       unsigned int filter_lowercase_token_maxlen;
+       unsigned int filter_normalizer_token_maxlen;
+       unsigned int tokenizer_address_token_maxlen;
+       unsigned int tokenizer_generic_token_maxlen;
+       bool tokenizer_generic_explicit_prefix;
+       bool tokenizer_generic_wb5a;
 };
 
 struct langs_settings {
        pool_t pool;
+       ARRAY_TYPE(const_string) languages;
+       const char *textcat_config_path;
+
+       ARRAY_TYPE(lang_settings) parsed_languages;
 };
 
+extern const struct lang_settings lang_default_settings;
 extern const struct setting_parser_info langs_setting_parser_info;
 
 #endif
index 39be7963600d179448bfcf38077a191f84503759..3ed4e9be2bdf9c4a9c154e178cdc1ec12342159c 100644 (file)
@@ -385,7 +385,7 @@ int lang_user_init(struct mail_user *user, bool initialize_libfts,
        struct lang_user *luser = LANG_USER_CONTEXT(user);
 
        if (luser != NULL) {
-               /* multiple fts plugins are loaded */
+               /* language user confs loaded multiple times */
                luser->refcount++;
                return 0;
        }
@@ -397,14 +397,15 @@ int lang_user_init(struct mail_user *user, bool initialize_libfts,
        luser = p_new(user->pool, struct lang_user, 1);
        luser->set = set;
        luser->refcount = 1;
+
+       MODULE_CONTEXT_SET(user, lang_user_module, luser);
        if (initialize_libfts) {
                if (lang_user_init_libfts(user, luser, error_r) < 0) {
+                       MODULE_CONTEXT_UNSET(user, lang_user_module);
                        lang_user_free(luser);
                        return -1;
                }
        }
-
-       MODULE_CONTEXT_SET(user, lang_user_module, luser);
        return 0;
 }