From: Timo Sirainen Date: Wed, 3 Feb 2021 17:27:08 +0000 (+0200) Subject: lib-fts, fts: fts_language_detect() - Return error string instead of logging it X-Git-Tag: 2.3.15~335 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=dc78357452fae426795010785b1c3ac32a5808c6;p=thirdparty%2Fdovecot%2Fcore.git lib-fts, fts: fts_language_detect() - Return error string instead of logging it --- diff --git a/src/lib-fts/fts-language.c b/src/lib-fts/fts-language.c index fb5c0ea9a8..2f1d11228f 100644 --- a/src/lib-fts/fts-language.c +++ b/src/lib-fts/fts-language.c @@ -26,7 +26,7 @@ struct fts_language_list { const char *textcat_config; const char *textcat_datadir; void *textcat_handle; - bool textcat_failed; + const char *textcat_failed; }; pool_t fts_languages_pool; @@ -224,7 +224,8 @@ static bool fts_language_match_lists(struct fts_language_list *list, #endif #ifdef HAVE_FTS_EXTTEXTCAT -static int fts_language_textcat_init(struct fts_language_list *list) +static int fts_language_textcat_init(struct fts_language_list *list, + const char **error_r) { const char *config_path; const char *data_dir; @@ -232,8 +233,10 @@ static int fts_language_textcat_init(struct fts_language_list *list) if (list->textcat_handle != NULL) return 0; - if (list->textcat_failed) + if (list->textcat_failed != NULL) { + *error_r = list->textcat_failed; return -1; + } config_path = list->textcat_config != NULL ? list->textcat_config : TEXTCAT_DATADIR"/fpdb.conf"; @@ -241,9 +244,9 @@ static int fts_language_textcat_init(struct fts_language_list *list) TEXTCAT_DATADIR"/"; list->textcat_handle = special_textcat_Init(config_path, data_dir); if (list->textcat_handle == NULL) { - i_error("special_textcat_Init(%s, %s) failed", + *error_r = list->textcat_failed = p_strdup_printf(list->pool, + "special_textcat_Init(%s, %s) failed", config_path, data_dir); - list->textcat_failed = TRUE; return -1; } /* The textcat minimum document size could be set here. It @@ -256,14 +259,15 @@ static enum fts_language_result fts_language_detect_textcat(struct fts_language_list *list ATTR_UNUSED, const unsigned char *text ATTR_UNUSED, size_t size ATTR_UNUSED, - const struct fts_language **lang_r ATTR_UNUSED) + const struct fts_language **lang_r ATTR_UNUSED, + const char **error_r ATTR_UNUSED) { #ifdef HAVE_FTS_EXTTEXTCAT candidate_t *candp; /* textcat candidate result array pointer */ int cnt; bool match = FALSE; - if (fts_language_textcat_init(list) < 0) + if (fts_language_textcat_init(list, error_r) < 0) return FTS_LANGUAGE_RESULT_ERROR; candp = textcat_GetClassifyFullOutput(list->textcat_handle); @@ -301,7 +305,8 @@ enum fts_language_result fts_language_detect(struct fts_language_list *list, const unsigned char *text ATTR_UNUSED, size_t size ATTR_UNUSED, - const struct fts_language **lang_r) + const struct fts_language **lang_r, + const char **error_r) { i_assert(array_count(&list->languages) > 0); @@ -312,5 +317,5 @@ fts_language_detect(struct fts_language_list *list, *lang_r = *langp; return FTS_LANGUAGE_RESULT_OK; } - return fts_language_detect_textcat(list, text, size, lang_r); + return fts_language_detect_textcat(list, text, size, lang_r, error_r); } diff --git a/src/lib-fts/fts-language.h b/src/lib-fts/fts-language.h index 85b2bc22df..884998f07f 100644 --- a/src/lib-fts/fts-language.h +++ b/src/lib-fts/fts-language.h @@ -62,10 +62,11 @@ fts_language_list_get_first(struct fts_language_list *list); /* If text was detected to be one of the languages in the list, returns FTS_LANGUAGE_RESULT_OK and (a pointer to) the language (in - the list). */ + the list). error_r is set for FTS_LANGUAGE_RESULT_ERROR. */ enum fts_language_result fts_language_detect(struct fts_language_list *list, const unsigned char *text, size_t size, - const struct fts_language **lang_r); + const struct fts_language **lang_r, + const char **error_r); #endif diff --git a/src/lib-fts/test-fts-language.c b/src/lib-fts/test-fts-language.c index d7afc6b928..e5968e9bdc 100644 --- a/src/lib-fts/test-fts-language.c +++ b/src/lib-fts/test-fts-language.c @@ -28,7 +28,7 @@ static void test_fts_language_detect_finnish(void) test_begin("fts language detect Finnish"); test_assert(fts_language_list_init(settings, &lp, &error) == 0); test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, finnish, sizeof(finnish)-1, &lang_r) + test_assert(fts_language_detect(lp, finnish, sizeof(finnish)-1, &lang_r, &error) == FTS_LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "fi") == 0); fts_language_list_deinit(&lp); @@ -55,7 +55,7 @@ static void test_fts_language_detect_english(void) test_begin("fts language detect English"); test_assert(fts_language_list_init(settings, &lp, &error) == 0); test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, english, sizeof(english)-1, &lang_r) + test_assert(fts_language_detect(lp, english, sizeof(english)-1, &lang_r, &error) == FTS_LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "en") == 0); fts_language_list_deinit(&lp); @@ -90,7 +90,7 @@ static void test_fts_language_detect_french(void) test_begin("fts language detect French"); test_assert(fts_language_list_init(settings, &lp, &error) == 0); test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, french, sizeof(french)-1, &lang_r) + test_assert(fts_language_detect(lp, french, sizeof(french)-1, &lang_r, &error) == FTS_LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "fr") == 0); fts_language_list_deinit(&lp); @@ -127,7 +127,7 @@ static void test_fts_language_detect_german(void) test_begin("fts language detect German"); test_assert(fts_language_list_init(settings, &lp, &error) == 0); test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, german, sizeof(german)-1, &lang_r) + test_assert(fts_language_detect(lp, german, sizeof(german)-1, &lang_r, &error) == FTS_LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "de") == 0); fts_language_list_deinit(&lp); @@ -153,7 +153,7 @@ static void test_fts_language_detect_swedish(void) test_begin("fts language detect Swedish"); test_assert(fts_language_list_init(settings, &lp, &error) == 0); test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, swedish, sizeof(swedish)-1, &lang_r) + test_assert(fts_language_detect(lp, swedish, sizeof(swedish)-1, &lang_r, &error) == FTS_LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "sv") == 0); fts_language_list_deinit(&lp); @@ -177,7 +177,7 @@ static void test_fts_language_detect_bokmal(void) test_begin("fts language detect Bokmal as Norwegian"); test_assert(fts_language_list_init(settings, &lp, &error) == 0); test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, bokmal, sizeof(bokmal)-1, &lang_r) + test_assert(fts_language_detect(lp, bokmal, sizeof(bokmal)-1, &lang_r, &error) == FTS_LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "no") == 0); fts_language_list_deinit(&lp); @@ -201,7 +201,7 @@ static void test_fts_language_detect_nynorsk(void) test_begin("fts language detect Nynorsk as Norwegian"); test_assert(fts_language_list_init(settings, &lp, &error) == 0); test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, nynorsk, sizeof(nynorsk)-1, &lang_r) + test_assert(fts_language_detect(lp, nynorsk, sizeof(nynorsk)-1, &lang_r, &error) == FTS_LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "no") == 0); fts_language_list_deinit(&lp); @@ -226,7 +226,7 @@ static void test_fts_language_detect_finnish_as_english(void) test_begin("fts language detect Finnish as English"); test_assert(fts_language_list_init(settings, &lp, &error) == 0); test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, finnish, sizeof(finnish)-1, &lang_r) + test_assert(fts_language_detect(lp, finnish, sizeof(finnish)-1, &lang_r, &error) == FTS_LANGUAGE_RESULT_OK); test_assert(strcmp(lang_r->name, "en") == 0); fts_language_list_deinit(&lp); @@ -253,7 +253,7 @@ static void test_fts_language_detect_na(void) test_begin("fts language detect not available"); test_assert(fts_language_list_init(settings, &lp, &error) == 0); test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, english, sizeof(english)-1, &lang_r) + test_assert(fts_language_detect(lp, english, sizeof(english)-1, &lang_r, &error) == FTS_LANGUAGE_RESULT_UNKNOWN); fts_language_list_deinit(&lp); test_end(); @@ -273,7 +273,7 @@ static void test_fts_language_detect_unknown(void) test_begin("fts language detect unknown"); test_assert(fts_language_list_init(settings, &lp, &error) == 0); test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE); - test_assert(fts_language_detect(lp, klingon, sizeof(klingon), &lang_r) + test_assert(fts_language_detect(lp, klingon, sizeof(klingon), &lang_r, &error) == FTS_LANGUAGE_RESULT_UNKNOWN); fts_language_list_deinit(&lp); test_end(); diff --git a/src/plugins/fts/doveadm-fts.c b/src/plugins/fts/doveadm-fts.c index 697e3d07b7..1b902a1ee6 100644 --- a/src/plugins/fts/doveadm-fts.c +++ b/src/plugins/fts/doveadm-fts.c @@ -190,10 +190,11 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, struct fts_language_list *lang_list = fts_user_get_language_list(user); enum fts_language_result result; + const char *error; result = fts_language_detect(lang_list, (const unsigned char *)ctx->tokens, strlen(ctx->tokens), - &lang); + &lang, &error); if (lang == NULL) lang = fts_language_list_get_first(lang_list); switch (result) { @@ -206,7 +207,7 @@ cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, case FTS_LANGUAGE_RESULT_OK: break; case FTS_LANGUAGE_RESULT_ERROR: - i_error("Language detection library initialization failed"); + i_error("Language detection library initialization failed: %s", error); _ctx->exit_code = EX_CONFIG; return -1; default: diff --git a/src/plugins/fts/fts-build-mail.c b/src/plugins/fts/fts-build-mail.c index e088e7397b..ec353380a5 100644 --- a/src/plugins/fts/fts-build-mail.c +++ b/src/plugins/fts/fts-build-mail.c @@ -298,8 +298,9 @@ fts_detect_language(struct fts_mail_build_context *ctx, struct mail_user *user = ctx->update_ctx->backend->ns->user; struct fts_language_list *lang_list = fts_user_get_language_list(user); const struct fts_language *lang; + const char *error; - switch (fts_language_detect(lang_list, data, size, &lang)) { + switch (fts_language_detect(lang_list, data, size, &lang, &error)) { case FTS_LANGUAGE_RESULT_SHORT: /* save the input so far and try again later */ buffer_append(ctx->pending_input, data, size); @@ -319,6 +320,7 @@ fts_detect_language(struct fts_mail_build_context *ctx, case FTS_LANGUAGE_RESULT_ERROR: /* internal language detection library failure (e.g. invalid config). don't index anything. */ + i_error("Language detection library initialization failed: %s", error); return -1; default: i_unreached();