From: Stephan Bosch Date: Tue, 1 Apr 2025 02:43:33 +0000 (+0200) Subject: lib-language: Substitute libicu lowercase mapping implementation with our own X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f4eed047d36e10e0278b0da3e27513ce0e84355b;p=thirdparty%2Fdovecot%2Fcore.git lib-language: Substitute libicu lowercase mapping implementation with our own --- diff --git a/src/lib-language/lang-filter-lowercase.c b/src/lib-language/lang-filter-lowercase.c index 2d7535b4c6..75fe5dc0f3 100644 --- a/src/lib-language/lang-filter-lowercase.c +++ b/src/lib-language/lang-filter-lowercase.c @@ -2,14 +2,11 @@ #include "lib.h" #include "str.h" +#include "unichar.h" #include "language.h" #include "lang-settings.h" #include "lang-filter-private.h" -#ifdef HAVE_LIBICU -# include "lang-icu.h" -#endif - static int lang_filter_lowercase_create(const struct lang_settings *set ATTR_UNUSED, struct event *event ATTR_UNUSED, @@ -30,13 +27,7 @@ lang_filter_lowercase_filter(struct lang_filter *filter ATTR_UNUSED, const char **token, const char **error_r ATTR_UNUSED) { -#ifdef HAVE_LIBICU - str_truncate(filter->token, 0); - lang_icu_lcase(filter->token, *token); - *token = str_c(filter->token); -#else - *token = t_str_lcase(*token); -#endif + (void)uni_utf8_to_lowercase(*token, strlen(*token), token); return 1; } diff --git a/src/lib-language/lang-icu.c b/src/lib-language/lang-icu.c index 6bfd6cf870..6e68fc4362 100644 --- a/src/lib-language/lang-icu.c +++ b/src/lib-language/lang-icu.c @@ -13,20 +13,6 @@ static struct UCaseMap *icu_csm = NULL; -static struct UCaseMap *lang_icu_csm(void) -{ - UErrorCode err = U_ZERO_ERROR; - - if (icu_csm != NULL) - return icu_csm; - icu_csm = ucasemap_open(NULL, U_FOLD_CASE_DEFAULT, &err); - if (U_FAILURE(err)) { - i_fatal("LibICU ucasemap_open() failed: %s", - u_errorName(err)); - } - return icu_csm; -} - void lang_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16, const char *src_utf8) { @@ -134,36 +120,6 @@ int lang_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16 return 0; } -void lang_icu_lcase(string_t *dest_utf8, const char *src_utf8) -{ - struct UCaseMap *csm = lang_icu_csm(); - size_t avail_bytes, dest_pos = dest_utf8->used; - char *dest_data; - int dest_full_len; - UErrorCode err = U_ZERO_ERROR; - - avail_bytes = buffer_get_writable_size(dest_utf8) - dest_pos; - dest_data = buffer_get_space_unsafe(dest_utf8, dest_pos, avail_bytes); - - /* ucasemap_utf8ToLower() may need to be called multiple times, because - the first return value may not be large enough. */ - for (unsigned int i = 0;; i++) { - dest_full_len = ucasemap_utf8ToLower(csm, dest_data, avail_bytes, - src_utf8, -1, &err); - if (err != U_BUFFER_OVERFLOW_ERROR || i == 2) - break; - - err = U_ZERO_ERROR; - dest_data = buffer_get_space_unsafe(dest_utf8, dest_pos, dest_full_len); - avail_bytes = dest_full_len; - } - if (U_FAILURE(err)) { - i_fatal("LibICU ucasemap_utf8ToLower() failed: %s", - u_errorName(err)); - } - buffer_set_used_size(dest_utf8, dest_full_len); -} - void lang_icu_deinit(void) { if (icu_csm != NULL) { diff --git a/src/lib-language/lang-icu.h b/src/lib-language/lang-icu.h index 2168477af2..29b8e12e9d 100644 --- a/src/lib-language/lang-icu.h +++ b/src/lib-language/lang-icu.h @@ -16,8 +16,6 @@ void lang_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16, int lang_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16, unsigned int src_len, UTransliterator *transliterator, const char **error_r); -/* Lowercase the given UTF-8 string. */ -void lang_icu_lcase(string_t *dest_utf8, const char *src_utf8); /* Free all the memory used by ICU functions. */ void lang_icu_deinit(void); diff --git a/src/lib-language/test-lang-filter.c b/src/lib-language/test-lang-filter.c index cc6f15254a..b7842b67a6 100644 --- a/src/lib-language/test-lang-filter.c +++ b/src/lib-language/test-lang-filter.c @@ -137,7 +137,6 @@ static void test_lang_filter_lowercase(void) test_end(); } -#ifdef HAVE_LIBICU static void test_lang_filter_lowercase_utf8(void) { static const struct { @@ -165,8 +164,6 @@ static void test_lang_filter_lowercase_utf8(void) test_end(); } -#endif - static void test_lang_filter_stopwords_eng(void) { struct lang_filter *filter; diff --git a/src/lib-language/test-lang-icu.c b/src/lib-language/test-lang-icu.c index 994dc8981d..e2a5b56bf8 100644 --- a/src/lib-language/test-lang-icu.c +++ b/src/lib-language/test-lang-icu.c @@ -143,45 +143,6 @@ static void test_lang_icu_translate_resize(void) test_end(); } -static void test_lang_icu_lcase(void) -{ - const char *src = "aBcD\xC3\x84\xC3\xA4"; - string_t *dest = t_str_new(64); - - test_begin("lang_icu_lcase"); - lang_icu_lcase(dest, src); - test_assert(strcmp(str_c(dest), "abcd\xC3\xA4\xC3\xA4") == 0); - test_end(); -} - -static void test_lang_icu_lcase_resize(void) -{ - const char *src = "a\xC3\x84"; - string_t *dest; - unsigned int i; - - test_begin("lang_icu_lcase resize"); - for (i = 1; i <= 3; i++) { - dest = t_str_new(i); - test_assert(buffer_get_writable_size(dest) == i); - lang_icu_lcase(dest, src); - test_assert(strcmp(str_c(dest), "a\xC3\xA4") == 0); - test_assert(buffer_get_writable_size(dest) == 3); - } - - test_end(); -} - -static void test_lang_icu_lcase_resize_invalid_utf8(void) -{ - string_t *dest; - - test_begin("lang_icu_lcase resize invalid utf8"); - dest = t_str_new(1); - lang_icu_lcase(dest, ".\x80."); - test_end(); -} - int main(void) { static void (*const test_functions[])(void) = { @@ -191,9 +152,6 @@ int main(void) test_lang_icu_utf16_to_utf8_resize, test_lang_icu_translate, test_lang_icu_translate_resize, - test_lang_icu_lcase, - test_lang_icu_lcase_resize, - test_lang_icu_lcase_resize_invalid_utf8, NULL }; int ret = test_run(test_functions); diff --git a/src/lib/test-unicode-casemap.c b/src/lib/test-unicode-casemap.c index 49fe8ec24d..2b083632c1 100644 --- a/src/lib/test-unicode-casemap.c +++ b/src/lib/test-unicode-casemap.c @@ -23,6 +23,14 @@ static const struct casemap_test { /* weisskopfseeadler */ .casefold = "weisskopfseeadler", }, + { + /* aBcD */ + .input = "aBcD\xC3\x84\xC3\xA4", + /* ABCD */ + .uppercase = "ABCD\xC3\x84\xC3\x84", + /* abcd */ + .lowercase = "abcd\xC3\xA4\xC3\xA4", + } }; static const unsigned int tests_count = N_ELEMENTS(tests);