#include "lib.h"
#include "str.h"
+#include "unichar.h"
#include "language.h"
#include "lang-settings.h"
#include "lang-filter-private.h"
-#ifdef HAVE_LIBICU
-# include "lang-icu.h"
-#endif
-
static int
lang_filter_lowercase_create(const struct lang_settings *set ATTR_UNUSED,
struct event *event ATTR_UNUSED,
const char **token,
const char **error_r ATTR_UNUSED)
{
-#ifdef HAVE_LIBICU
- str_truncate(filter->token, 0);
- lang_icu_lcase(filter->token, *token);
- *token = str_c(filter->token);
-#else
- *token = t_str_lcase(*token);
-#endif
+ (void)uni_utf8_to_lowercase(*token, strlen(*token), token);
return 1;
}
static struct UCaseMap *icu_csm = NULL;
-static struct UCaseMap *lang_icu_csm(void)
-{
- UErrorCode err = U_ZERO_ERROR;
-
- if (icu_csm != NULL)
- return icu_csm;
- icu_csm = ucasemap_open(NULL, U_FOLD_CASE_DEFAULT, &err);
- if (U_FAILURE(err)) {
- i_fatal("LibICU ucasemap_open() failed: %s",
- u_errorName(err));
- }
- return icu_csm;
-}
-
void lang_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
const char *src_utf8)
{
return 0;
}
-void lang_icu_lcase(string_t *dest_utf8, const char *src_utf8)
-{
- struct UCaseMap *csm = lang_icu_csm();
- size_t avail_bytes, dest_pos = dest_utf8->used;
- char *dest_data;
- int dest_full_len;
- UErrorCode err = U_ZERO_ERROR;
-
- avail_bytes = buffer_get_writable_size(dest_utf8) - dest_pos;
- dest_data = buffer_get_space_unsafe(dest_utf8, dest_pos, avail_bytes);
-
- /* ucasemap_utf8ToLower() may need to be called multiple times, because
- the first return value may not be large enough. */
- for (unsigned int i = 0;; i++) {
- dest_full_len = ucasemap_utf8ToLower(csm, dest_data, avail_bytes,
- src_utf8, -1, &err);
- if (err != U_BUFFER_OVERFLOW_ERROR || i == 2)
- break;
-
- err = U_ZERO_ERROR;
- dest_data = buffer_get_space_unsafe(dest_utf8, dest_pos, dest_full_len);
- avail_bytes = dest_full_len;
- }
- if (U_FAILURE(err)) {
- i_fatal("LibICU ucasemap_utf8ToLower() failed: %s",
- u_errorName(err));
- }
- buffer_set_used_size(dest_utf8, dest_full_len);
-}
-
void lang_icu_deinit(void)
{
if (icu_csm != NULL) {
int lang_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
unsigned int src_len, UTransliterator *transliterator,
const char **error_r);
-/* Lowercase the given UTF-8 string. */
-void lang_icu_lcase(string_t *dest_utf8, const char *src_utf8);
/* Free all the memory used by ICU functions. */
void lang_icu_deinit(void);
test_end();
}
-#ifdef HAVE_LIBICU
static void test_lang_filter_lowercase_utf8(void)
{
static const struct {
test_end();
}
-#endif
-
static void test_lang_filter_stopwords_eng(void)
{
struct lang_filter *filter;
test_end();
}
-static void test_lang_icu_lcase(void)
-{
- const char *src = "aBcD\xC3\x84\xC3\xA4";
- string_t *dest = t_str_new(64);
-
- test_begin("lang_icu_lcase");
- lang_icu_lcase(dest, src);
- test_assert(strcmp(str_c(dest), "abcd\xC3\xA4\xC3\xA4") == 0);
- test_end();
-}
-
-static void test_lang_icu_lcase_resize(void)
-{
- const char *src = "a\xC3\x84";
- string_t *dest;
- unsigned int i;
-
- test_begin("lang_icu_lcase resize");
- for (i = 1; i <= 3; i++) {
- dest = t_str_new(i);
- test_assert(buffer_get_writable_size(dest) == i);
- lang_icu_lcase(dest, src);
- test_assert(strcmp(str_c(dest), "a\xC3\xA4") == 0);
- test_assert(buffer_get_writable_size(dest) == 3);
- }
-
- test_end();
-}
-
-static void test_lang_icu_lcase_resize_invalid_utf8(void)
-{
- string_t *dest;
-
- test_begin("lang_icu_lcase resize invalid utf8");
- dest = t_str_new(1);
- lang_icu_lcase(dest, ".\x80.");
- test_end();
-}
-
int main(void)
{
static void (*const test_functions[])(void) = {
test_lang_icu_utf16_to_utf8_resize,
test_lang_icu_translate,
test_lang_icu_translate_resize,
- test_lang_icu_lcase,
- test_lang_icu_lcase_resize,
- test_lang_icu_lcase_resize_invalid_utf8,
NULL
};
int ret = test_run(test_functions);
/* weisskopfseeadler */
.casefold = "weisskopfseeadler",
},
+ {
+ /* aBcD<U+00C4><U+00E4> */
+ .input = "aBcD\xC3\x84\xC3\xA4",
+ /* ABCD<U+00C4><U+00C4> */
+ .uppercase = "ABCD\xC3\x84\xC3\x84",
+ /* abcd<U+00E4><U+00E4> */
+ .lowercase = "abcd\xC3\xA4\xC3\xA4",
+ }
};
static const unsigned int tests_count = N_ELEMENTS(tests);