From: Marco Bettini Date: Wed, 24 Jan 2024 08:21:12 +0000 (+0000) Subject: lib-language: Add language config-rewrite settings X-Git-Tag: 2.4.1~1001 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bb394a677929a0b515f809b5aa53d880c7635f0a;p=thirdparty%2Fdovecot%2Fcore.git lib-language: Add language config-rewrite settings --- diff --git a/src/lib-language/lang-settings.c b/src/lib-language/lang-settings.c index e69de29bb2..7e21d69efe 100644 --- a/src/lib-language/lang-settings.c +++ b/src/lib-language/lang-settings.c @@ -0,0 +1,136 @@ +/* Copyright (c) 2023 Dovecot Oy, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "settings.h" +#include "settings-parser.h" +#include "lang-settings.h" + +/* */ +static bool langs_settings_ext_check(struct event *event, void *_set, + pool_t pool, const char **error_r); +/* */ + +#undef DEF +#define DEF(_type, name) SETTING_DEFINE_STRUCT_##_type( \ + "language_"#name, name, struct lang_settings) + +static const struct setting_define lang_setting_defines[] = { + DEF(STR, name), + DEF(BOOLLIST, filters), + DEF(UINT, filter_lowercase_token_maxlen), + DEF(STR, filter_normalizer_icu_id), + DEF(UINT, filter_normalizer_token_maxlen), + DEF(STR, filter_stopwords_dir), + DEF(BOOLLIST, tokenizers), + DEF(UINT, tokenizer_address_token_maxlen), + DEF(STR, tokenizer_generic_algorithm), + DEF(BOOL, tokenizer_generic_explicit_prefix), + DEF(UINT, tokenizer_generic_token_maxlen), + DEF(BOOL, tokenizer_generic_wb5a), + SETTING_DEFINE_LIST_END +}; + +const struct lang_settings lang_default_settings = { + .name = "", + .filters = ARRAY_INIT, + .filter_lowercase_token_maxlen = 250, + .filter_normalizer_token_maxlen = 250, + .filter_normalizer_icu_id = "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC; [\\x20] Remove", + .filter_stopwords_dir = DATADIR"/stopwords", + .tokenizers = ARRAY_INIT, + .tokenizer_address_token_maxlen = 250, + .tokenizer_generic_algorithm = "simple", + .tokenizer_generic_explicit_prefix = FALSE, + .tokenizer_generic_token_maxlen = 30, + .tokenizer_generic_wb5a = FALSE, +}; + +const struct setting_parser_info lang_setting_parser_info = { + .name = "language", + + .defines = lang_setting_defines, + .defaults = &lang_default_settings, + + .struct_size = sizeof(struct lang_settings), + .pool_offset1 = 1 + offsetof(struct lang_settings, pool), +}; + +#undef DEF +#define DEF(_type, name) SETTING_DEFINE_STRUCT_##_type( \ + #name, name, struct langs_settings) + +static const struct setting_define langs_setting_defines[] = { + { .type = SET_FILTER_ARRAY, .key = "language", + .offset = offsetof(struct langs_settings, languages), + .filter_array_field_name = "name", }, + DEF(STR, textcat_config_path), + SETTING_DEFINE_LIST_END +}; + +static const struct langs_settings langs_default_settings = { + .textcat_config_path = "", +}; + +const struct setting_parser_info langs_setting_parser_info = { + .name = "languages", + + .defines = langs_setting_defines, + .defaults = &langs_default_settings, + .ext_check_func = langs_settings_ext_check, + + .struct_size = sizeof(struct langs_settings), + .pool_offset1 = 1 + offsetof(struct langs_settings, pool), +}; + +/* */ + +static bool langs_settings_ext_check(struct event *event, void *_set, + pool_t pool, const char **error_r) +{ + struct langs_settings *set = _set; + if (array_is_empty(&set->languages)) { +#ifdef CONFIG_BINARY + return TRUE; +#else + *error_r = "No language { .. } defined"; + return FALSE; +#endif + } + + const char *filter_name; + unsigned int nondata_languages = 0; + p_array_init(&set->parsed_languages, pool, array_count(&set->languages)); + array_foreach_elem(&set->languages, filter_name) { + const struct lang_settings *lang_set; + const char *error; + + if (settings_get_filter(event, "language", filter_name, + &lang_setting_parser_info, 0, + &lang_set, &error) < 0) { + *error_r = t_strdup_printf( + "Failed to get language %s: %s", + filter_name, error); + return FALSE; + } + + bool is_data = strcmp(lang_set->name, LANGUAGE_DATA) == 0; + if (!is_data) + nondata_languages++; + + struct lang_settings *lang_set_dup = + p_memdup(pool, lang_set, sizeof(*lang_set)); + pool_add_external_ref(pool, lang_set->pool); + array_push_back(&set->parsed_languages, &lang_set_dup); + settings_free(lang_set); + } + + if (nondata_languages == 0) { + *error_r = "No valid languages"; + return FALSE; + } + + return TRUE; +} + +/* */ diff --git a/src/lib-language/lang-settings.h b/src/lib-language/lang-settings.h index 41c56f5875..968631c6f3 100644 --- a/src/lib-language/lang-settings.h +++ b/src/lib-language/lang-settings.h @@ -3,15 +3,36 @@ #include "array.h" +/* */ +#define LANGUAGE_DATA "data" +/* */ + ARRAY_DEFINE_TYPE(lang_settings, struct lang_settings *); struct lang_settings { pool_t pool; + const char *name; + const char *filter_normalizer_icu_id; + const char *filter_stopwords_dir; + const char *tokenizer_generic_algorithm; + ARRAY_TYPE(const_string) filters; + ARRAY_TYPE(const_string) tokenizers; + unsigned int filter_lowercase_token_maxlen; + unsigned int filter_normalizer_token_maxlen; + unsigned int tokenizer_address_token_maxlen; + unsigned int tokenizer_generic_token_maxlen; + bool tokenizer_generic_explicit_prefix; + bool tokenizer_generic_wb5a; }; struct langs_settings { pool_t pool; + ARRAY_TYPE(const_string) languages; + const char *textcat_config_path; + + ARRAY_TYPE(lang_settings) parsed_languages; }; +extern const struct lang_settings lang_default_settings; extern const struct setting_parser_info langs_setting_parser_info; #endif diff --git a/src/lib-storage/lang-user.c b/src/lib-storage/lang-user.c index 39be796360..3ed4e9be2b 100644 --- a/src/lib-storage/lang-user.c +++ b/src/lib-storage/lang-user.c @@ -385,7 +385,7 @@ int lang_user_init(struct mail_user *user, bool initialize_libfts, struct lang_user *luser = LANG_USER_CONTEXT(user); if (luser != NULL) { - /* multiple fts plugins are loaded */ + /* language user confs loaded multiple times */ luser->refcount++; return 0; } @@ -397,14 +397,15 @@ int lang_user_init(struct mail_user *user, bool initialize_libfts, luser = p_new(user->pool, struct lang_user, 1); luser->set = set; luser->refcount = 1; + + MODULE_CONTEXT_SET(user, lang_user_module, luser); if (initialize_libfts) { if (lang_user_init_libfts(user, luser, error_r) < 0) { + MODULE_CONTEXT_UNSET(user, lang_user_module); lang_user_free(luser); return -1; } } - - MODULE_CONTEXT_SET(user, lang_user_module, luser); return 0; }