From: Timo Sirainen Date: Wed, 21 Dec 2016 18:39:37 +0000 (+0200) Subject: lib-fts: Change fts_icu_*() to use ARRAY_TYPE(icu_utf16) for UTF16 input. X-Git-Tag: 2.3.0.rc1~2387 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=dcb783533ad1d6944db7c227cde46414a575d81c;p=thirdparty%2Fdovecot%2Fcore.git lib-fts: Change fts_icu_*() to use ARRAY_TYPE(icu_utf16) for UTF16 input. This makes it clearer how the API is intended to be used. --- diff --git a/src/lib-fts/fts-filter-normalizer-icu.c b/src/lib-fts/fts-filter-normalizer-icu.c index d49852dbfa..f5318847df 100644 --- a/src/lib-fts/fts-filter-normalizer-icu.c +++ b/src/lib-fts/fts-filter-normalizer-icu.c @@ -1,7 +1,7 @@ /* Copyright (c) 2014-2016 Dovecot authors, see the included COPYING file */ #include "lib.h" -#include "buffer.h" +#include "array.h" #include "str.h" #include "unichar.h" /* unicode replacement char */ #include "fts-filter-common.h" @@ -17,7 +17,7 @@ struct fts_filter_normalizer_icu { const char *transliterator_id; UTransliterator *transliterator; - buffer_t *utf16_token, *trans_token; + ARRAY_TYPE(icu_utf16) utf16_token, trans_token; string_t *utf8_token; }; @@ -65,8 +65,8 @@ fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED, np->pool = pp; np->filter = *fts_filter_normalizer_icu; np->transliterator_id = p_strdup(pp, id); - np->utf16_token = buffer_create_dynamic(pp, 128); - np->trans_token = buffer_create_dynamic(pp, 128); + p_array_init(&np->utf16_token, pp, 64); + p_array_init(&np->trans_token, pp, 64); np->utf8_token = buffer_create_dynamic(pp, 128); np->filter.max_length = max_length; *filter_r = &np->filter; @@ -86,20 +86,20 @@ fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token, error_r) < 0) return -1; - fts_icu_utf8_to_utf16(np->utf16_token, *token); - buffer_append_zero(np->utf16_token, 2); - buffer_set_used_size(np->utf16_token, np->utf16_token->used-2); - buffer_set_used_size(np->trans_token, 0); - if (fts_icu_translate(np->trans_token, np->utf16_token->data, - np->utf16_token->used / sizeof(UChar), + fts_icu_utf8_to_utf16(&np->utf16_token, *token); + array_append_zero(&np->utf16_token); + array_delete(&np->utf16_token, array_count(&np->utf16_token)-1, 1); + array_clear(&np->trans_token); + if (fts_icu_translate(&np->trans_token, array_idx(&np->utf16_token, 0), + array_count(&np->utf16_token), np->transliterator, error_r) < 0) return -1; - if (np->trans_token->used == 0) + if (array_count(&np->trans_token) == 0) return 0; - fts_icu_utf16_to_utf8(np->utf8_token, np->trans_token->data, - np->trans_token->used / sizeof(UChar)); + fts_icu_utf16_to_utf8(np->utf8_token, array_idx(&np->trans_token, 0), + array_count(&np->trans_token)); fts_filter_truncate_token(np->utf8_token, np->filter.max_length); *token = str_c(np->utf8_token); return 1; diff --git a/src/lib-fts/fts-icu.c b/src/lib-fts/fts-icu.c index 327e9e2993..a1038193a8 100644 --- a/src/lib-fts/fts-icu.c +++ b/src/lib-fts/fts-icu.c @@ -2,7 +2,7 @@ #include "lib.h" #include "mempool.h" -#include "buffer.h" +#include "array.h" #include "str.h" #include "unichar.h" #include "fts-icu.h" @@ -27,8 +27,10 @@ static struct UCaseMap *fts_icu_csm(void) return icu_csm; } -void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8) +void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16, + const char *src_utf8) { + buffer_t *dest_buf = dest_utf16->arr.buffer; UErrorCode err = U_ZERO_ERROR; size_t src_bytes = strlen(src_utf8); int32_t utf16_len; @@ -36,14 +38,14 @@ void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8) int32_t avail_uchars = 0; /* try to encode with the current buffer size */ - avail_uchars = buffer_get_writable_size(dest_utf16) / sizeof(UChar); - dest_data = buffer_get_space_unsafe(dest_utf16, 0, - buffer_get_writable_size(dest_utf16)); + avail_uchars = buffer_get_writable_size(dest_buf) / sizeof(UChar); + dest_data = buffer_get_space_unsafe(dest_buf, 0, + buffer_get_writable_size(dest_buf)); retp = u_strFromUTF8Lenient(dest_data, avail_uchars, &utf16_len, src_utf8, src_bytes, &err); if (err == U_BUFFER_OVERFLOW_ERROR) { /* try again with a larger buffer */ - dest_data = buffer_get_space_unsafe(dest_utf16, 0, + dest_data = buffer_get_space_unsafe(dest_buf, 0, utf16_len * sizeof(UChar)); err = U_ZERO_ERROR; retp = u_strFromUTF8Lenient(dest_data, utf16_len, @@ -54,7 +56,7 @@ void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8) i_panic("LibICU u_strFromUTF8Lenient() failed: %s", u_errorName(err)); } - buffer_set_used_size(dest_utf16, utf16_len * sizeof(UChar)); + buffer_set_used_size(dest_buf, utf16_len * sizeof(UChar)); i_assert(retp == dest_data); } @@ -89,23 +91,24 @@ void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16, i_assert(retp == dest_data); } -int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16, +int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16, unsigned int src_len, UTransliterator *transliterator, const char **error_r) { + buffer_t *dest_buf = dest_utf16->arr.buffer; UErrorCode err = U_ZERO_ERROR; int32_t utf16_len = src_len; UChar *dest_data; int32_t avail_uchars, limit = src_len; - size_t dest_pos = dest_utf16->used; + size_t dest_pos = dest_buf->used; /* translation is done in-place in the buffer. try first with the current buffer size. */ - buffer_append(dest_utf16, src_utf16, src_len*sizeof(UChar)); + array_append(dest_utf16, src_utf16, src_len); - avail_uchars = (buffer_get_writable_size(dest_utf16)-dest_pos) / sizeof(UChar); - dest_data = buffer_get_space_unsafe(dest_utf16, dest_pos, - buffer_get_writable_size(dest_utf16)-dest_pos); + avail_uchars = (buffer_get_writable_size(dest_buf)-dest_pos) / sizeof(UChar); + dest_data = buffer_get_space_unsafe(dest_buf, dest_pos, + buffer_get_writable_size(dest_buf) - dest_pos); utrans_transUChars(transliterator, dest_data, &utf16_len, avail_uchars, 0, &limit, &err); if (err == U_BUFFER_OVERFLOW_ERROR) { @@ -113,9 +116,9 @@ int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16, err = U_ZERO_ERROR; avail_uchars = utf16_len; limit = utf16_len = src_len; - buffer_write(dest_utf16, dest_pos, + buffer_write(dest_buf, dest_pos, src_utf16, src_len*sizeof(UChar)); - dest_data = buffer_get_space_unsafe(dest_utf16, dest_pos, + dest_data = buffer_get_space_unsafe(dest_buf, dest_pos, avail_uchars * sizeof(UChar)); utrans_transUChars(transliterator, dest_data, &utf16_len, avail_uchars, 0, &limit, &err); @@ -124,10 +127,10 @@ int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16, if (U_FAILURE(err)) { *error_r = t_strdup_printf("LibICU utrans_transUChars() failed: %s", u_errorName(err)); - buffer_set_used_size(dest_utf16, dest_pos); + buffer_set_used_size(dest_buf, dest_pos); return -1; } - buffer_set_used_size(dest_utf16, utf16_len * sizeof(UChar)); + buffer_set_used_size(dest_buf, utf16_len * sizeof(UChar)); return 0; } @@ -173,14 +176,13 @@ int fts_icu_transliterator_create(const char *id, { UErrorCode err = U_ZERO_ERROR; UParseError perr; - buffer_t *id_utf16_buf = buffer_create_dynamic(pool_datastack_create(), 2 * strlen(id)); - UChar *id_utf16; + ARRAY_TYPE(icu_utf16) id_utf16; memset(&perr, 0, sizeof(perr)); - fts_icu_utf8_to_utf16(id_utf16_buf, id); - id_utf16 = (UChar *)str_c(id_utf16_buf); - *transliterator_r = utrans_openU(id_utf16, - id_utf16_buf->used / sizeof(UChar), + t_array_init(&id_utf16, strlen(id)); + fts_icu_utf8_to_utf16(&id_utf16, id); + *transliterator_r = utrans_openU(array_idx(&id_utf16, 0), + array_count(&id_utf16), UTRANS_FORWARD, NULL, 0, &perr, &err); if (U_FAILURE(err)) { string_t *str = t_str_new(128); diff --git a/src/lib-fts/fts-icu.h b/src/lib-fts/fts-icu.h index 09fbcb551e..5b0f3dcce6 100644 --- a/src/lib-fts/fts-icu.h +++ b/src/lib-fts/fts-icu.h @@ -4,13 +4,16 @@ #include #include -/* Convert UTF-8 input to UTF-16 output. The dest_utf16 contains UChars. */ -void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8); +ARRAY_DEFINE_TYPE(icu_utf16, UChar); + +/* Convert UTF-8 input to UTF-16 output. */ +void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16, + const char *src_utf8); /* Convert UTF-16 input to UTF-8 output. */ void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16, unsigned int src_len); /* Run ICU translation for the string. Returns 0 on success, -1 on error. */ -int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16, +int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16, unsigned int src_len, UTransliterator *transliterator, const char **error_r); /* Lowercase the given UTF-8 string. */ diff --git a/src/lib-fts/test-fts-icu.c b/src/lib-fts/test-fts-icu.c index f1748bd63c..3a030904bf 100644 --- a/src/lib-fts/test-fts-icu.c +++ b/src/lib-fts/test-fts-icu.c @@ -1,7 +1,7 @@ /* Copyright (c) 2015-2016 Dovecot authors, see the included COPYING file */ #include "lib.h" -#include "buffer.h" +#include "array.h" #include "str.h" #include "unichar.h" #include "test-common.h" @@ -11,35 +11,36 @@ static void test_fts_icu_utf8_to_utf16_ascii_resize(void) { - buffer_t *dest = buffer_create_dynamic(pool_datastack_create(), 4); + ARRAY_TYPE(icu_utf16) dest; test_begin("fts_icu_utf8_to_utf16 ascii resize"); - test_assert(buffer_get_writable_size(dest) == 4); - fts_icu_utf8_to_utf16(dest, "12"); - test_assert(dest->used == 4); - test_assert(buffer_get_writable_size(dest) == 4); + t_array_init(&dest, 2); + test_assert(buffer_get_writable_size(dest.arr.buffer) == 4); + fts_icu_utf8_to_utf16(&dest, "12"); + test_assert(array_count(&dest) == 2); + test_assert(buffer_get_writable_size(dest.arr.buffer) == 4); - fts_icu_utf8_to_utf16(dest, "123"); - test_assert(dest->used == 6); - test_assert(buffer_get_writable_size(dest) == 7); + fts_icu_utf8_to_utf16(&dest, "123"); + test_assert(array_count(&dest) == 3); + test_assert(buffer_get_writable_size(dest.arr.buffer) == 7); - fts_icu_utf8_to_utf16(dest, "12345"); - test_assert(dest->used == 10); + fts_icu_utf8_to_utf16(&dest, "12345"); + test_assert(array_count(&dest) == 5); test_end(); } static void test_fts_icu_utf8_to_utf16_32bit_resize(void) { - buffer_t *dest; + ARRAY_TYPE(icu_utf16) dest; unsigned int i; test_begin("fts_icu_utf8_to_utf16 32bit resize"); - for (i = 2; i <= 5; i++) { - dest = buffer_create_dynamic(pool_datastack_create(), i); - test_assert(buffer_get_writable_size(dest) == i); - fts_icu_utf8_to_utf16(dest, "\xF0\x90\x90\x80"); /* 0x10400 */ - test_assert(dest->used == 4); + for (i = 1; i <= 2; i++) { + t_array_init(&dest, i); + test_assert(buffer_get_writable_size(dest.arr.buffer) == i*2); + fts_icu_utf8_to_utf16(&dest, "\xF0\x90\x90\x80"); /* 0x10400 */ + test_assert(array_count(&dest) == 2); } test_end(); @@ -80,13 +81,13 @@ static void test_fts_icu_utf16_to_utf8_resize(void) static UTransliterator *get_translit(const char *id) { UTransliterator *translit; - buffer_t *id_utf16; + ARRAY_TYPE(icu_utf16) id_utf16; UErrorCode err = U_ZERO_ERROR; UParseError perr; - id_utf16 = buffer_create_dynamic(pool_datastack_create(), 16); - fts_icu_utf8_to_utf16(id_utf16, id); - translit = utrans_openU(id_utf16->data, id_utf16->used/sizeof(UChar), + t_array_init(&id_utf16, 8); + fts_icu_utf8_to_utf16(&id_utf16, id); + translit = utrans_openU(array_idx(&id_utf16, 0), array_count(&id_utf16), UTRANS_FORWARD, NULL, 0, &perr, &err); test_assert(!U_FAILURE(err)); return translit; @@ -96,18 +97,19 @@ static void test_fts_icu_translate(void) { const char *translit_id = "Any-Lower"; UTransliterator *translit; - buffer_t *dest = buffer_create_dynamic(pool_datastack_create(), 64); + ARRAY_TYPE(icu_utf16) dest; const UChar src[] = { 0xbd, 'B', 'C' }; const char *error; unsigned int i; test_begin("fts_icu_translate"); + t_array_init(&dest, 32); translit = get_translit(translit_id); for (i = N_ELEMENTS(src); i > 0; i--) { - buffer_set_used_size(dest, 0); - test_assert(fts_icu_translate(dest, src, i, + array_clear(&dest); + test_assert(fts_icu_translate(&dest, src, i, translit, &error) == 0); - test_assert(dest->used == i * sizeof(UChar)); + test_assert(array_count(&dest) == i); } utrans_close(translit); test_end(); @@ -117,22 +119,22 @@ static void test_fts_icu_translate_resize(void) { const char *translit_id = "Any-Hex"; const char *src_utf8 = "FOO"; - buffer_t *dest, *src_utf16; + ARRAY_TYPE(icu_utf16) src_utf16, dest; UTransliterator *translit; const char *error; unsigned int i; test_begin("fts_icu_translate_resize resize"); - src_utf16 = buffer_create_dynamic(pool_datastack_create(), 16); + t_array_init(&src_utf16, 8); translit = get_translit(translit_id); - for (i = 2; i <= 20; i++) { - buffer_set_used_size(src_utf16, 0); - fts_icu_utf8_to_utf16(src_utf16, src_utf8); - dest = buffer_create_dynamic(pool_datastack_create(), i); - test_assert(buffer_get_writable_size(dest) == i); - test_assert(fts_icu_translate(dest, src_utf16->data, - src_utf16->used/sizeof(UChar), + for (i = 1; i <= 10; i++) { + array_clear(&src_utf16); + fts_icu_utf8_to_utf16(&src_utf16, src_utf8); + t_array_init(&dest, i); + test_assert(buffer_get_writable_size(dest.arr.buffer) == i*2); + test_assert(fts_icu_translate(&dest, array_idx(&src_utf16, 0), + array_count(&src_utf16), translit, &error) == 0); }