]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-language: Substitute libicu lowercase mapping implementation with our own
authorStephan Bosch <stephan.bosch@open-xchange.com>
Tue, 1 Apr 2025 02:43:33 +0000 (04:43 +0200)
committerStephan Bosch <stephan.bosch@open-xchange.com>
Fri, 1 Aug 2025 01:27:35 +0000 (03:27 +0200)
src/lib-language/lang-filter-lowercase.c
src/lib-language/lang-icu.c
src/lib-language/lang-icu.h
src/lib-language/test-lang-filter.c
src/lib-language/test-lang-icu.c
src/lib/test-unicode-casemap.c

index 2d7535b4c6c97cf9afe5370ca97e1417ff1c33ce..75fe5dc0f3b1cc4fa0b4cdf7128955ee5285bc09 100644 (file)
@@ -2,14 +2,11 @@
 
 #include "lib.h"
 #include "str.h"
+#include "unichar.h"
 #include "language.h"
 #include "lang-settings.h"
 #include "lang-filter-private.h"
 
-#ifdef HAVE_LIBICU
-#  include "lang-icu.h"
-#endif
-
 static int
 lang_filter_lowercase_create(const struct lang_settings *set ATTR_UNUSED,
                             struct event *event ATTR_UNUSED,
@@ -30,13 +27,7 @@ lang_filter_lowercase_filter(struct lang_filter *filter ATTR_UNUSED,
                             const char **token,
                             const char **error_r ATTR_UNUSED)
 {
-#ifdef HAVE_LIBICU
-       str_truncate(filter->token, 0);
-       lang_icu_lcase(filter->token, *token);
-       *token = str_c(filter->token);
-#else
-       *token = t_str_lcase(*token);
-#endif
+       (void)uni_utf8_to_lowercase(*token, strlen(*token), token);
        return 1;
 }
 
index 6bfd6cf870ce21fc9bca8a617278ae4297a40ff9..6e68fc4362bac8f20a6a707314d564ee61b7c9f5 100644 (file)
 
 static struct UCaseMap *icu_csm = NULL;
 
-static struct UCaseMap *lang_icu_csm(void)
-{
-       UErrorCode err = U_ZERO_ERROR;
-
-       if (icu_csm != NULL)
-               return icu_csm;
-       icu_csm = ucasemap_open(NULL, U_FOLD_CASE_DEFAULT, &err);
-       if (U_FAILURE(err)) {
-               i_fatal("LibICU ucasemap_open() failed: %s",
-                       u_errorName(err));
-       }
-       return icu_csm;
-}
-
 void lang_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
                            const char *src_utf8)
 {
@@ -134,36 +120,6 @@ int lang_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16
        return 0;
 }
 
-void lang_icu_lcase(string_t *dest_utf8, const char *src_utf8)
-{
-       struct UCaseMap *csm = lang_icu_csm();
-       size_t avail_bytes, dest_pos = dest_utf8->used;
-       char *dest_data;
-       int dest_full_len;
-       UErrorCode err = U_ZERO_ERROR;
-
-       avail_bytes = buffer_get_writable_size(dest_utf8) - dest_pos;
-       dest_data = buffer_get_space_unsafe(dest_utf8, dest_pos, avail_bytes);
-
-       /* ucasemap_utf8ToLower() may need to be called multiple times, because
-          the first return value may not be large enough. */
-       for (unsigned int i = 0;; i++) {
-               dest_full_len = ucasemap_utf8ToLower(csm, dest_data, avail_bytes,
-                                                    src_utf8, -1, &err);
-               if (err != U_BUFFER_OVERFLOW_ERROR || i == 2)
-                       break;
-
-               err = U_ZERO_ERROR;
-               dest_data = buffer_get_space_unsafe(dest_utf8, dest_pos, dest_full_len);
-               avail_bytes = dest_full_len;
-       }
-       if (U_FAILURE(err)) {
-               i_fatal("LibICU ucasemap_utf8ToLower() failed: %s",
-                       u_errorName(err));
-       }
-       buffer_set_used_size(dest_utf8, dest_full_len);
-}
-
 void lang_icu_deinit(void)
 {
        if (icu_csm != NULL) {
index 2168477af29eabdfdbf40c7ecc1885c158025268..29b8e12e9dd7a4ea1f93d7d3eb3950be1fff48f6 100644 (file)
@@ -16,8 +16,6 @@ void lang_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
 int lang_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
                       unsigned int src_len, UTransliterator *transliterator,
                       const char **error_r);
-/* Lowercase the given UTF-8 string. */
-void lang_icu_lcase(string_t *dest_utf8, const char *src_utf8);
 
 /* Free all the memory used by ICU functions. */
 void lang_icu_deinit(void);
index cc6f15254a7787b6cd630f7680c99a69573117f1..b7842b67a6f7c452f56a99bc317343a1ea5d24de 100644 (file)
@@ -137,7 +137,6 @@ static void test_lang_filter_lowercase(void)
        test_end();
 }
 
-#ifdef HAVE_LIBICU
 static void test_lang_filter_lowercase_utf8(void)
 {
        static const struct {
@@ -165,8 +164,6 @@ static void test_lang_filter_lowercase_utf8(void)
        test_end();
 }
 
-#endif
-
 static void test_lang_filter_stopwords_eng(void)
 {
        struct lang_filter *filter;
index 994dc8981df89bc5929675487e5a12a0dc70e0de..e2a5b56bf81e58269b02d5680172efa67dde9a27 100644 (file)
@@ -143,45 +143,6 @@ static void test_lang_icu_translate_resize(void)
        test_end();
 }
 
-static void test_lang_icu_lcase(void)
-{
-       const char *src = "aBcD\xC3\x84\xC3\xA4";
-       string_t *dest = t_str_new(64);
-
-       test_begin("lang_icu_lcase");
-       lang_icu_lcase(dest, src);
-       test_assert(strcmp(str_c(dest), "abcd\xC3\xA4\xC3\xA4") == 0);
-       test_end();
-}
-
-static void test_lang_icu_lcase_resize(void)
-{
-       const char *src = "a\xC3\x84";
-       string_t *dest;
-       unsigned int i;
-
-       test_begin("lang_icu_lcase resize");
-       for (i = 1; i <= 3; i++) {
-               dest = t_str_new(i);
-               test_assert(buffer_get_writable_size(dest) == i);
-               lang_icu_lcase(dest, src);
-               test_assert(strcmp(str_c(dest), "a\xC3\xA4") == 0);
-               test_assert(buffer_get_writable_size(dest) == 3);
-       }
-
-       test_end();
-}
-
-static void test_lang_icu_lcase_resize_invalid_utf8(void)
-{
-       string_t *dest;
-
-       test_begin("lang_icu_lcase resize invalid utf8");
-       dest = t_str_new(1);
-       lang_icu_lcase(dest, ".\x80.");
-       test_end();
-}
-
 int main(void)
 {
        static void (*const test_functions[])(void) = {
@@ -191,9 +152,6 @@ int main(void)
                test_lang_icu_utf16_to_utf8_resize,
                test_lang_icu_translate,
                test_lang_icu_translate_resize,
-               test_lang_icu_lcase,
-               test_lang_icu_lcase_resize,
-               test_lang_icu_lcase_resize_invalid_utf8,
                NULL
        };
        int ret = test_run(test_functions);
index 49fe8ec24d67f8b732bb922d83a247c3c111c5ce..2b083632c1bb12466c58e389f5e583736c4bd69c 100644 (file)
@@ -23,6 +23,14 @@ static const struct casemap_test {
                /* weisskopfseeadler */
                .casefold = "weisskopfseeadler",
        },
+       {
+               /* aBcD<U+00C4><U+00E4> */
+               .input = "aBcD\xC3\x84\xC3\xA4",
+               /* ABCD<U+00C4><U+00C4> */
+               .uppercase = "ABCD\xC3\x84\xC3\x84",
+               /* abcd<U+00E4><U+00E4> */
+               .lowercase = "abcd\xC3\xA4\xC3\xA4",
+       }
 };
 
 static const unsigned int tests_count = N_ELEMENTS(tests);