From: Timo Sirainen Date: Thu, 9 Dec 2021 17:06:11 +0000 (+0100) Subject: lib-fts: Reuse textcat handle between sessions X-Git-Tag: 2.3.18~45 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ac7bcc31c9ad1cc746494c1d655482a3df5fcedf;p=thirdparty%2Fdovecot%2Fcore.git lib-fts: Reuse textcat handle between sessions textcat initialization is rather CPU intensive. Its configuration is normally always the same between sessions, so we can keep the latest textcat handle cached. --- diff --git a/src/lib-fts/fts-language.c b/src/lib-fts/fts-language.c index 2efb6e4d65..7874d3a713 100644 --- a/src/lib-fts/fts-language.c +++ b/src/lib-fts/fts-language.c @@ -20,17 +20,25 @@ #define DETECT_STR_MAX_LEN 200 +struct fts_textcat { + int refcount; + void *handle; + char *config_path, *data_dir, *failed; +}; + struct fts_language_list { pool_t pool; ARRAY_TYPE(fts_language) languages; + struct fts_textcat *textcat; const char *textcat_config; const char *textcat_datadir; - void *textcat_handle; - const char *textcat_failed; }; pool_t fts_languages_pool; ARRAY_TYPE(fts_language) fts_languages; +#ifdef HAVE_FTS_EXTTEXTCAT +static struct fts_textcat *fts_textcat_cache = NULL; +#endif /* ISO 639-1 alpha 2 codes for languages */ const struct fts_language fts_languages_builtin [] = { @@ -54,6 +62,25 @@ const struct fts_language fts_language_data = { "data" }; +#ifdef HAVE_FTS_EXTTEXTCAT +static void fts_textcat_unref(struct fts_textcat *textcat) +{ + i_assert(textcat->refcount > 0); + if (--textcat->refcount > 0) + return; + + if (textcat == fts_textcat_cache) + fts_textcat_cache = NULL; + + i_free(textcat->config_path); + i_free(textcat->data_dir); + i_free(textcat->failed); + if (textcat->handle != NULL) + textcat_Done(textcat->handle); + i_free(textcat); +} +#endif + void fts_languages_init(void) { unsigned int i; @@ -71,6 +98,10 @@ void fts_languages_init(void) void fts_languages_deinit(void) { +#ifdef HAVE_FTS_EXTTEXTCAT + if (fts_textcat_cache != NULL) + fts_textcat_unref(fts_textcat_cache); +#endif pool_unref(&fts_languages_pool); } @@ -141,8 +172,8 @@ void fts_language_list_deinit(struct fts_language_list **list) *list = NULL; #ifdef HAVE_FTS_EXTTEXTCAT - if (lp->textcat_handle != NULL) - textcat_Done(lp->textcat_handle); + if (lp->textcat != NULL) + fts_textcat_unref(lp->textcat); #endif pool_unref(&lp->pool); } @@ -230,23 +261,39 @@ static int fts_language_textcat_init(struct fts_language_list *list, const char *config_path; const char *data_dir; - if (list->textcat_handle != NULL) + if (list->textcat != NULL) { + if (list->textcat->failed != NULL) { + *error_r = list->textcat->failed; + return -1; + } + i_assert(list->textcat->handle != NULL); return 0; - - if (list->textcat_failed != NULL) { - *error_r = list->textcat_failed; - return -1; } - + config_path = list->textcat_config != NULL ? list->textcat_config : TEXTCAT_DATADIR"/fpdb.conf"; data_dir = list->textcat_datadir != NULL ? list->textcat_datadir : TEXTCAT_DATADIR"/"; - list->textcat_handle = special_textcat_Init(config_path, data_dir); - if (list->textcat_handle == NULL) { - *error_r = list->textcat_failed = p_strdup_printf(list->pool, + if (fts_textcat_cache != NULL) { + if (strcmp(fts_textcat_cache->config_path, config_path) == 0 && + strcmp(fts_textcat_cache->data_dir, data_dir) == 0) { + list->textcat = fts_textcat_cache; + list->textcat->refcount++; + return 0; + } + fts_textcat_unref(fts_textcat_cache); + } + + fts_textcat_cache = list->textcat = i_new(struct fts_textcat, 1); + fts_textcat_cache->refcount = 2; + fts_textcat_cache->config_path = i_strdup(config_path); + fts_textcat_cache->data_dir = i_strdup(data_dir); + fts_textcat_cache->handle = special_textcat_Init(config_path, data_dir); + if (fts_textcat_cache->handle == NULL) { + fts_textcat_cache->failed = i_strdup_printf( "special_textcat_Init(%s, %s) failed", config_path, data_dir); + *error_r = fts_textcat_cache->failed; return -1; } /* The textcat minimum document size could be set here. It @@ -270,22 +317,22 @@ fts_language_detect_textcat(struct fts_language_list *list ATTR_UNUSED, if (fts_language_textcat_init(list, error_r) < 0) return FTS_LANGUAGE_RESULT_ERROR; - candp = textcat_GetClassifyFullOutput(list->textcat_handle); + candp = textcat_GetClassifyFullOutput(list->textcat->handle); if (candp == NULL) i_fatal_status(FATAL_OUTOFMEM, "textcat_GetCLassifyFullOutput failed: malloc() returned NULL"); - cnt = textcat_ClassifyFull(list->textcat_handle, (const void *)text, + cnt = textcat_ClassifyFull(list->textcat->handle, (const void *)text, I_MIN(size, DETECT_STR_MAX_LEN), candp); if (cnt > 0) { T_BEGIN { match = fts_language_match_lists(list, candp, cnt, lang_r); } T_END; - textcat_ReleaseClassifyFullOutput(list->textcat_handle, candp); + textcat_ReleaseClassifyFullOutput(list->textcat->handle, candp); if (match) return FTS_LANGUAGE_RESULT_OK; else return FTS_LANGUAGE_RESULT_UNKNOWN; } else { - textcat_ReleaseClassifyFullOutput(list->textcat_handle, candp); + textcat_ReleaseClassifyFullOutput(list->textcat->handle, candp); switch (cnt) { case TEXTCAT_RESULT_SHORT: i_assert(size < DETECT_STR_MAX_LEN);