]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-fts: Change fts_icu_*() to use ARRAY_TYPE(icu_utf16) for UTF16 input.
authorTimo Sirainen <timo.sirainen@dovecot.fi>
Wed, 21 Dec 2016 18:39:37 +0000 (20:39 +0200)
committerGitLab <gitlab@git.dovecot.net>
Fri, 23 Dec 2016 13:22:05 +0000 (15:22 +0200)
This makes it clearer how the API is intended to be used.

src/lib-fts/fts-filter-normalizer-icu.c
src/lib-fts/fts-icu.c
src/lib-fts/fts-icu.h
src/lib-fts/test-fts-icu.c

index d49852dbfa55d177b2d1123ce700e9bab5469dcd..f5318847dff166fe5795b0de7c6339e97d8b69a6 100644 (file)
@@ -1,7 +1,7 @@
 /* Copyright (c) 2014-2016 Dovecot authors, see the included COPYING file */
 
 #include "lib.h"
-#include "buffer.h"
+#include "array.h"
 #include "str.h"
 #include "unichar.h" /* unicode replacement char */
 #include "fts-filter-common.h"
@@ -17,7 +17,7 @@ struct fts_filter_normalizer_icu {
        const char *transliterator_id;
 
        UTransliterator *transliterator;
-       buffer_t *utf16_token, *trans_token;
+       ARRAY_TYPE(icu_utf16) utf16_token, trans_token;
        string_t *utf8_token;
 };
 
@@ -65,8 +65,8 @@ fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
        np->pool = pp;
        np->filter = *fts_filter_normalizer_icu;
        np->transliterator_id = p_strdup(pp, id);
-       np->utf16_token = buffer_create_dynamic(pp, 128);
-       np->trans_token = buffer_create_dynamic(pp, 128);
+       p_array_init(&np->utf16_token, pp, 64);
+       p_array_init(&np->trans_token, pp, 64);
        np->utf8_token = buffer_create_dynamic(pp, 128);
        np->filter.max_length = max_length;
        *filter_r = &np->filter;
@@ -86,20 +86,20 @@ fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token,
                                                  error_r) < 0)
                        return -1;
 
-       fts_icu_utf8_to_utf16(np->utf16_token, *token);
-       buffer_append_zero(np->utf16_token, 2);
-       buffer_set_used_size(np->utf16_token, np->utf16_token->used-2);
-       buffer_set_used_size(np->trans_token, 0);
-       if (fts_icu_translate(np->trans_token, np->utf16_token->data,
-                             np->utf16_token->used / sizeof(UChar),
+       fts_icu_utf8_to_utf16(&np->utf16_token, *token);
+       array_append_zero(&np->utf16_token);
+       array_delete(&np->utf16_token, array_count(&np->utf16_token)-1, 1);
+       array_clear(&np->trans_token);
+       if (fts_icu_translate(&np->trans_token, array_idx(&np->utf16_token, 0),
+                             array_count(&np->utf16_token),
                              np->transliterator, error_r) < 0)
                return -1;
 
-       if (np->trans_token->used == 0)
+       if (array_count(&np->trans_token) == 0)
                return 0;
 
-       fts_icu_utf16_to_utf8(np->utf8_token, np->trans_token->data,
-                             np->trans_token->used / sizeof(UChar));
+       fts_icu_utf16_to_utf8(np->utf8_token, array_idx(&np->trans_token, 0),
+                             array_count(&np->trans_token));
        fts_filter_truncate_token(np->utf8_token, np->filter.max_length);
        *token = str_c(np->utf8_token);
        return 1;
index 327e9e2993cde2bf6b48c0a1b2d8210b806d067b..a1038193a8cff8f3169dd157c253e2e81867f8dc 100644 (file)
@@ -2,7 +2,7 @@
 
 #include "lib.h"
 #include "mempool.h"
-#include "buffer.h"
+#include "array.h"
 #include "str.h"
 #include "unichar.h"
 #include "fts-icu.h"
@@ -27,8 +27,10 @@ static struct UCaseMap *fts_icu_csm(void)
        return icu_csm;
 }
 
-void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8)
+void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
+                          const char *src_utf8)
 {
+       buffer_t *dest_buf = dest_utf16->arr.buffer;
        UErrorCode err = U_ZERO_ERROR;
        size_t src_bytes = strlen(src_utf8);
        int32_t utf16_len;
@@ -36,14 +38,14 @@ void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8)
        int32_t avail_uchars = 0;
 
        /* try to encode with the current buffer size */
-       avail_uchars = buffer_get_writable_size(dest_utf16) / sizeof(UChar);
-       dest_data = buffer_get_space_unsafe(dest_utf16, 0,
-                               buffer_get_writable_size(dest_utf16));
+       avail_uchars = buffer_get_writable_size(dest_buf) / sizeof(UChar);
+       dest_data = buffer_get_space_unsafe(dest_buf, 0,
+                               buffer_get_writable_size(dest_buf));
        retp = u_strFromUTF8Lenient(dest_data, avail_uchars,
                                    &utf16_len, src_utf8, src_bytes, &err);
        if (err == U_BUFFER_OVERFLOW_ERROR) {
                /* try again with a larger buffer */
-               dest_data = buffer_get_space_unsafe(dest_utf16, 0,
+               dest_data = buffer_get_space_unsafe(dest_buf, 0,
                                                    utf16_len * sizeof(UChar));
                err = U_ZERO_ERROR;
                retp = u_strFromUTF8Lenient(dest_data, utf16_len,
@@ -54,7 +56,7 @@ void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8)
                i_panic("LibICU u_strFromUTF8Lenient() failed: %s",
                        u_errorName(err));
        }
-       buffer_set_used_size(dest_utf16, utf16_len * sizeof(UChar));
+       buffer_set_used_size(dest_buf, utf16_len * sizeof(UChar));
        i_assert(retp == dest_data);
 }
 
@@ -89,23 +91,24 @@ void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
        i_assert(retp == dest_data);
 }
 
-int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16,
+int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
                      unsigned int src_len, UTransliterator *transliterator,
                      const char **error_r)
 {
+       buffer_t *dest_buf = dest_utf16->arr.buffer;
        UErrorCode err = U_ZERO_ERROR;
        int32_t utf16_len = src_len;
        UChar *dest_data;
        int32_t avail_uchars, limit = src_len;
-       size_t dest_pos = dest_utf16->used;
+       size_t dest_pos = dest_buf->used;
 
        /* translation is done in-place in the buffer. try first with the
           current buffer size. */
-       buffer_append(dest_utf16, src_utf16, src_len*sizeof(UChar));
+       array_append(dest_utf16, src_utf16, src_len);
 
-       avail_uchars = (buffer_get_writable_size(dest_utf16)-dest_pos) / sizeof(UChar);
-       dest_data = buffer_get_space_unsafe(dest_utf16, dest_pos,
-                               buffer_get_writable_size(dest_utf16)-dest_pos);
+       avail_uchars = (buffer_get_writable_size(dest_buf)-dest_pos) / sizeof(UChar);
+       dest_data = buffer_get_space_unsafe(dest_buf, dest_pos,
+                       buffer_get_writable_size(dest_buf) - dest_pos);
        utrans_transUChars(transliterator, dest_data, &utf16_len,
                           avail_uchars, 0, &limit, &err);
        if (err == U_BUFFER_OVERFLOW_ERROR) {
@@ -113,9 +116,9 @@ int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16,
                err = U_ZERO_ERROR;
                avail_uchars = utf16_len;
                limit = utf16_len = src_len;
-               buffer_write(dest_utf16, dest_pos,
+               buffer_write(dest_buf, dest_pos,
                             src_utf16, src_len*sizeof(UChar));
-               dest_data = buffer_get_space_unsafe(dest_utf16, dest_pos,
+               dest_data = buffer_get_space_unsafe(dest_buf, dest_pos,
                                                    avail_uchars * sizeof(UChar));
                utrans_transUChars(transliterator, dest_data, &utf16_len,
                                   avail_uchars, 0, &limit, &err);
@@ -124,10 +127,10 @@ int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16,
        if (U_FAILURE(err)) {
                *error_r = t_strdup_printf("LibICU utrans_transUChars() failed: %s",
                                           u_errorName(err));
-               buffer_set_used_size(dest_utf16, dest_pos);
+               buffer_set_used_size(dest_buf, dest_pos);
                return -1;
        }
-       buffer_set_used_size(dest_utf16, utf16_len * sizeof(UChar));
+       buffer_set_used_size(dest_buf, utf16_len * sizeof(UChar));
        return 0;
 }
 
@@ -173,14 +176,13 @@ int fts_icu_transliterator_create(const char *id,
 {
        UErrorCode err = U_ZERO_ERROR;
        UParseError perr;
-       buffer_t *id_utf16_buf = buffer_create_dynamic(pool_datastack_create(), 2 * strlen(id));
-       UChar *id_utf16;
+       ARRAY_TYPE(icu_utf16) id_utf16;
        memset(&perr, 0, sizeof(perr));
 
-       fts_icu_utf8_to_utf16(id_utf16_buf, id);
-       id_utf16 = (UChar *)str_c(id_utf16_buf);
-       *transliterator_r = utrans_openU(id_utf16,
-                                       id_utf16_buf->used / sizeof(UChar),
+       t_array_init(&id_utf16, strlen(id));
+       fts_icu_utf8_to_utf16(&id_utf16, id);
+       *transliterator_r = utrans_openU(array_idx(&id_utf16, 0),
+                                       array_count(&id_utf16),
                                        UTRANS_FORWARD, NULL, 0, &perr, &err);
        if (U_FAILURE(err)) {
                string_t *str = t_str_new(128);
index 09fbcb551e77675efccfd9e6fa6944b874e91661..5b0f3dcce6df3335ad73ca94cec50a4aae340c07 100644 (file)
@@ -4,13 +4,16 @@
 #include <unicode/ustring.h>
 #include <unicode/utrans.h>
 
-/* Convert UTF-8 input to UTF-16 output. The dest_utf16 contains UChars. */
-void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8);
+ARRAY_DEFINE_TYPE(icu_utf16, UChar);
+
+/* Convert UTF-8 input to UTF-16 output. */
+void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
+                          const char *src_utf8);
 /* Convert UTF-16 input to UTF-8 output. */
 void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
                           unsigned int src_len);
 /* Run ICU translation for the string. Returns 0 on success, -1 on error. */
-int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16,
+int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
                      unsigned int src_len, UTransliterator *transliterator,
                      const char **error_r);
 /* Lowercase the given UTF-8 string. */
index f1748bd63cb08d7890b670c21fe13f3412c765a0..3a030904bf903596fe493bdb2dc9d8134e21eb1b 100644 (file)
@@ -1,7 +1,7 @@
 /* Copyright (c) 2015-2016 Dovecot authors, see the included COPYING file */
 
 #include "lib.h"
-#include "buffer.h"
+#include "array.h"
 #include "str.h"
 #include "unichar.h"
 #include "test-common.h"
 
 static void test_fts_icu_utf8_to_utf16_ascii_resize(void)
 {
-       buffer_t *dest = buffer_create_dynamic(pool_datastack_create(), 4);
+       ARRAY_TYPE(icu_utf16) dest;
 
        test_begin("fts_icu_utf8_to_utf16 ascii resize");
-       test_assert(buffer_get_writable_size(dest) == 4);
-       fts_icu_utf8_to_utf16(dest, "12");
-       test_assert(dest->used == 4);
-       test_assert(buffer_get_writable_size(dest) == 4);
+       t_array_init(&dest, 2);
+       test_assert(buffer_get_writable_size(dest.arr.buffer) == 4);
+       fts_icu_utf8_to_utf16(&dest, "12");
+       test_assert(array_count(&dest) == 2);
+       test_assert(buffer_get_writable_size(dest.arr.buffer) == 4);
 
-       fts_icu_utf8_to_utf16(dest, "123");
-       test_assert(dest->used == 6);
-       test_assert(buffer_get_writable_size(dest) == 7);
+       fts_icu_utf8_to_utf16(&dest, "123");
+       test_assert(array_count(&dest) == 3);
+       test_assert(buffer_get_writable_size(dest.arr.buffer) == 7);
 
-       fts_icu_utf8_to_utf16(dest, "12345");
-       test_assert(dest->used == 10);
+       fts_icu_utf8_to_utf16(&dest, "12345");
+       test_assert(array_count(&dest) == 5);
 
        test_end();
 }
 
 static void test_fts_icu_utf8_to_utf16_32bit_resize(void)
 {
-       buffer_t *dest;
+       ARRAY_TYPE(icu_utf16) dest;
        unsigned int i;
 
        test_begin("fts_icu_utf8_to_utf16 32bit resize");
-       for (i = 2; i <= 5; i++) {
-               dest = buffer_create_dynamic(pool_datastack_create(), i);
-               test_assert(buffer_get_writable_size(dest) == i);
-               fts_icu_utf8_to_utf16(dest, "\xF0\x90\x90\x80"); /* 0x10400 */
-               test_assert(dest->used == 4);
+       for (i = 1; i <= 2; i++) {
+               t_array_init(&dest, i);
+               test_assert(buffer_get_writable_size(dest.arr.buffer) == i*2);
+               fts_icu_utf8_to_utf16(&dest, "\xF0\x90\x90\x80"); /* 0x10400 */
+               test_assert(array_count(&dest) == 2);
        }
 
        test_end();
@@ -80,13 +81,13 @@ static void test_fts_icu_utf16_to_utf8_resize(void)
 static UTransliterator *get_translit(const char *id)
 {
        UTransliterator *translit;
-       buffer_t *id_utf16;
+       ARRAY_TYPE(icu_utf16) id_utf16;
        UErrorCode err = U_ZERO_ERROR;
        UParseError perr;
 
-       id_utf16 = buffer_create_dynamic(pool_datastack_create(), 16);
-       fts_icu_utf8_to_utf16(id_utf16, id);
-       translit = utrans_openU(id_utf16->data, id_utf16->used/sizeof(UChar),
+       t_array_init(&id_utf16, 8);
+       fts_icu_utf8_to_utf16(&id_utf16, id);
+       translit = utrans_openU(array_idx(&id_utf16, 0), array_count(&id_utf16),
                                UTRANS_FORWARD, NULL, 0, &perr, &err);
        test_assert(!U_FAILURE(err));
        return translit;
@@ -96,18 +97,19 @@ static void test_fts_icu_translate(void)
 {
        const char *translit_id = "Any-Lower";
        UTransliterator *translit;
-       buffer_t *dest = buffer_create_dynamic(pool_datastack_create(), 64);
+       ARRAY_TYPE(icu_utf16) dest;
        const UChar src[] = { 0xbd, 'B', 'C' };
        const char *error;
        unsigned int i;
 
        test_begin("fts_icu_translate");
+       t_array_init(&dest, 32);
        translit = get_translit(translit_id);
        for (i = N_ELEMENTS(src); i > 0; i--) {
-               buffer_set_used_size(dest, 0);
-               test_assert(fts_icu_translate(dest, src, i,
+               array_clear(&dest);
+               test_assert(fts_icu_translate(&dest, src, i,
                                              translit, &error) == 0);
-               test_assert(dest->used == i * sizeof(UChar));
+               test_assert(array_count(&dest) == i);
        }
        utrans_close(translit);
        test_end();
@@ -117,22 +119,22 @@ static void test_fts_icu_translate_resize(void)
 {
        const char *translit_id = "Any-Hex";
        const char *src_utf8 = "FOO";
-       buffer_t *dest, *src_utf16;
+       ARRAY_TYPE(icu_utf16) src_utf16, dest;
        UTransliterator *translit;
        const char *error;
        unsigned int i;
 
        test_begin("fts_icu_translate_resize resize");
 
-       src_utf16 = buffer_create_dynamic(pool_datastack_create(), 16);
+       t_array_init(&src_utf16, 8);
        translit = get_translit(translit_id);
-       for (i = 2; i <= 20; i++) {
-               buffer_set_used_size(src_utf16, 0);
-               fts_icu_utf8_to_utf16(src_utf16, src_utf8);
-               dest = buffer_create_dynamic(pool_datastack_create(), i);
-               test_assert(buffer_get_writable_size(dest) == i);
-               test_assert(fts_icu_translate(dest, src_utf16->data,
-                                             src_utf16->used/sizeof(UChar),
+       for (i = 1; i <= 10; i++) {
+               array_clear(&src_utf16);
+               fts_icu_utf8_to_utf16(&src_utf16, src_utf8);
+               t_array_init(&dest, i);
+               test_assert(buffer_get_writable_size(dest.arr.buffer) == i*2);
+               test_assert(fts_icu_translate(&dest, array_idx(&src_utf16, 0),
+                                             array_count(&src_utf16),
                                              translit, &error) == 0);
        }