]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-fts: Rewrite ICU handling functions.
authorTimo Sirainen <tss@iki.fi>
Sat, 16 May 2015 15:47:20 +0000 (18:47 +0300)
committerTimo Sirainen <tss@iki.fi>
Sat, 16 May 2015 15:47:20 +0000 (18:47 +0300)
Some of the changes:
 - Use buffers instead of allocating everything from data stack.
 - Optimistically attempt to write the data directly to the buffers without
first calculating their size. Grow the buffer if it doesn't fit first.
 - Use u_strFromUTF8Lenient() instead of u_strFromUTF8(). Our input is
already supposed to be valid UTF-8, although we don't check if all code
points are valid, while u_strFromUTF8() does check them and return failures.
We don't really care about if code points are valid or not and
u_strFromUTF8Lenient() passes through everything.

Added unit tests to make sure all the functions work as intended and all the
UTF-8 input passes through them successfully.

src/lib-fts/Makefile.am
src/lib-fts/fts-filter-normalizer-icu.c
src/lib-fts/fts-icu.c [new file with mode: 0644]
src/lib-fts/fts-icu.h [new file with mode: 0644]
src/lib-fts/test-fts-filter.c
src/lib-fts/test-fts-icu.c [new file with mode: 0644]

index e0639528793a593c77c523352cf4bf0534b8c5dd..67f528282c2d3ba5de3b62b1e7aa449e3edc3a45 100644 (file)
@@ -50,7 +50,9 @@ endif
 endif
 
 if BUILD_LIBICU
+ICU_SOURCES = fts-icu.c
 NORMALIZER_LIBS = $(LIBICU_LIBS)
+ICU_TESTS = test-fts-icu
 endif
 
 libfts_la_LIBADD = \
@@ -67,19 +69,22 @@ libfts_la_SOURCES = \
        fts-language.c \
        fts-tokenizer.c \
        fts-tokenizer-address.c \
-       fts-tokenizer-generic.c
+       fts-tokenizer-generic.c \
+       $(ICU_SOURCES)
 
 noinst_HEADERS = \
        fts-filter.h \
        fts-filter-private.h \
+       fts-icu.h \
        fts-language.h \
        fts-tokenizer.h \
        fts-tokenizer-private.h \
        fts-tokenizer-generic-private.h
 
 test_programs = \
-       test-fts-filter \
+       $(ICU_TESTS) \
        $(TEST_FTS_LANGUAGE) \
+       test-fts-filter \
        test-fts-tokenizer
 
 noinst_PROGRAMS = $(test_programs)
@@ -89,6 +94,10 @@ test_libs = \
        ../lib/liblib.la
 test_deps = $(noinst_LTLIBRARIES) $(test_libs)
 
+test_fts_icu_SOURCES = test-fts-icu.c
+test_fts_icu_LDADD = fts-icu.lo $(LIBICU_LIBS) $(test_libs)
+test_fts_icu_DEPENDENCIES = fts-icu.lo $(test_deps)
+
 test_fts_filter_SOURCES = test-fts-filter.c
 test_fts_filter_LDADD = libfts.la $(test_libs)
 test_fts_filter_DEPENDENCIES = libfts.la $(test_deps)
index 026123bb9fdf4ffd67a65cc9934824588a346880..e1704ad928813ddf1f342dbbb4cc5573625207ea 100644 (file)
 #include "fts-language.h"
 
 #ifdef HAVE_LIBICU
-
-#include <unicode/utrans.h>
-#include <unicode/uenum.h>
-#include <unicode/ustring.h>
-#include <unicode/ucnv.h>
-#include <stdlib.h>
+#include "fts-icu.h"
 
 struct fts_filter_normalizer_icu {
        struct fts_filter filter;
        pool_t pool;
        const char *transliterator_id;
+       const UChar *transliterator_id_utf16;
+       unsigned int transliterator_id_utf16_len;
+
        UTransliterator *transliterator;
+       buffer_t *utf16_token, *trans_token;
+       string_t *utf8_token;
 };
 
-/* Helper to create UTF16, which libicu wants as input.
-
- On input,  if *dst_uchars_r  > 0,  it indicates  the number  of UChar
- sized  units that  should be  allocated  for the  text. However,  the
- function will not  use the number, if  the text will not  fit in that
- amount.
-
- On return *dst_uchars_r will contain the number of UChar sized units
- allocated for the dst. NOT the number of bytes nor the length of the
- text. */
-static void make_uchar(const char *src, UChar **dst, int32_t *dst_uchars_r)
-{
-       UErrorCode err = U_ZERO_ERROR;
-       int32_t len = strlen(src);
-       int32_t ustr_len = 0;
-       int32_t ustr_len_actual = 0;
-       UChar *retp = NULL;
-       int32_t alloc_uchars = 0;
-
-       i_assert(dst_uchars_r != NULL);
-
-       /* Check length required for encoded dst. */
-       retp = u_strFromUTF8(NULL, 0, &ustr_len, src, len, &err);
-
-       /* When preflighting a successful call returns a buffer overflow
-          error. */
-       if (U_BUFFER_OVERFLOW_ERROR != err && U_FAILURE(err)) {
-               i_panic("Failed to estimate allocation size with lib ICU"
-                       " u_strFromUTF8(): %s",u_errorName(err));
-       }
-       i_assert(NULL == retp);
-
-       err = U_ZERO_ERROR;
-       if (*dst_uchars_r > 0 && *dst_uchars_r > ustr_len)
-               alloc_uchars =  *dst_uchars_r;
-       else
-               alloc_uchars = ustr_len;
-       alloc_uchars++; /* room for null bytes(2) */
-       *dst = t_malloc(alloc_uchars * sizeof(UChar));
-       *dst_uchars_r = alloc_uchars;
-       retp = u_strFromUTF8(*dst, alloc_uchars, &ustr_len_actual,
-                            src, len, &err);
-
-       if (U_FAILURE(err))
-               i_panic("Lib ICU u_strFromUTF8 failed: %s", u_errorName(err));
-       i_assert(retp == *dst);
-       i_assert(ustr_len == ustr_len_actual);
-}
-
-static void make_utf8(const UChar *src, const char **_dst)
-{
-       char *dst;
-       char *retp = NULL;
-       int32_t dsize = 0;
-       int32_t dsize_actual = 0;
-       int32_t sub_num = 0;
-       UErrorCode err = U_ZERO_ERROR;
-       int32_t usrc_len = u_strlen(src); /* libicu selects different codepaths
-                                            depending if srclen -1 or not */
-
-       retp = u_strToUTF8WithSub(NULL, 0, &dsize, src, usrc_len,
-                                 UNICODE_REPLACEMENT_CHAR, &sub_num, &err);
-
-       /* Preflighting can cause buffer overflow to be reported */
-       if (U_BUFFER_OVERFLOW_ERROR != err && U_FAILURE(err)) {
-               i_panic("Failed to estimate allocation size with lib ICU"
-                       " u_strToUTF8(): %s",u_errorName(err));
-       }
-       i_assert(0 == sub_num);
-       i_assert(NULL == retp);
-
-       dsize++; /* room for '\0' byte */
-       dst = t_malloc(dsize);
-       err = U_ZERO_ERROR;
-       retp = u_strToUTF8WithSub(dst, dsize, &dsize_actual, src, usrc_len,
-                                UNICODE_REPLACEMENT_CHAR, &sub_num, &err);
-       if (U_FAILURE(err))
-               i_panic("Lib ICU u_strToUTF8WithSub() failed: %s",
-                       u_errorName(err));
-       if (dsize_actual >= dsize) {
-               i_panic("Produced UTF8 string length (%d) does not fit in "
-                       "preflighted(%d). Buffer overflow?",
-                       dsize_actual, dsize);
-       }
-       if (0 != sub_num) {
-               i_panic("UTF8 string not well formed. "
-                       "Substitutions (%d) were made.", sub_num);
-       }
-       i_assert(retp == dst);
-       *_dst = dst;
-}
-
 static void fts_filter_normalizer_icu_destroy(struct fts_filter *filter)
 {
        struct fts_filter_normalizer_icu *np =
@@ -152,6 +60,13 @@ fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
        np->pool = pp;
        np->filter = *fts_filter_normalizer_icu;
        np->transliterator_id = p_strdup(pp, id);
+       np->utf16_token = buffer_create_dynamic(pp, 128);
+       np->trans_token = buffer_create_dynamic(pp, 128);
+       np->utf8_token = buffer_create_dynamic(pp, 128);
+       fts_icu_utf8_to_utf16(np->utf16_token, id);
+       np->transliterator_id_utf16 =
+               p_memdup(pp, np->utf16_token->data, np->utf16_token->used);
+       np->transliterator_id_utf16_len = np->utf16_token->used / sizeof(UChar);
        *filter_r = &np->filter;
        return 0;
 }
@@ -162,14 +77,11 @@ fts_filter_normalizer_icu_create_trans(struct fts_filter_normalizer_icu *np,
 {
        UErrorCode err = U_ZERO_ERROR;
        UParseError perr;
-       UChar *id_uchar = NULL;
-       int32_t id_len_uchar = 0;
 
        memset(&perr, 0, sizeof(perr));
 
-       make_uchar(np->transliterator_id, &id_uchar, &id_len_uchar);
-
-       np->transliterator = utrans_openU(id_uchar, u_strlen(id_uchar),
+       np->transliterator = utrans_openU(np->transliterator_id_utf16,
+                                         np->transliterator_id_utf16_len,
                                          UTRANS_FORWARD, NULL, 0, &perr, &err);
        if (U_FAILURE(err)) {
                string_t *str = t_str_new(128);
@@ -193,48 +105,27 @@ fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token,
 {
        struct fts_filter_normalizer_icu *np =
                (struct fts_filter_normalizer_icu *)filter;
-       UErrorCode err = U_ZERO_ERROR;
-       UChar *utext = NULL;
-       int32_t utext_cap = 0;
-       int32_t utext_len = -1;
-       int32_t utext_limit;
 
        if (np->transliterator == NULL) {
                if (fts_filter_normalizer_icu_create_trans(np, error_r) < 0)
                        return -1;
        }
 
-       make_uchar(*token, &utext, &utext_cap);
-       utext_limit = u_strlen(utext);
-       utrans_transUChars(np->transliterator, utext, &utext_len,
-                          utext_cap, 0, &utext_limit, &err);
-
-       /* Data did not fit into utext. */
-       if (utext_len > utext_cap || err == U_BUFFER_OVERFLOW_ERROR) {
-               /* This is a crude retry fix... Make a new utext of the
-                  size utrans_transUChars indicated */
-               utext_len++; /* room for '\0' bytes(2) */
-               utext_cap = utext_len;
-               make_uchar(*token, &utext, &utext_cap);
-               i_assert(utext_cap ==  utext_len);
-               utext_limit = u_strlen(utext);
-               utext_len = -1;
-               err = U_ZERO_ERROR;
-               utrans_transUChars(np->transliterator, utext,
-                                  &utext_len, utext_cap, 0,
-                                  &utext_limit, &err);
-       }
-
-       if (U_FAILURE(err)) {
-               *error_r = t_strdup_printf("utrans_transUChars() failed: %s\n",
-                                           u_errorName(err));
+       fts_icu_utf8_to_utf16(np->utf16_token, *token);
+       buffer_append_zero(np->utf16_token, 2);
+       buffer_set_used_size(np->utf16_token, np->utf16_token->used-2);
+       buffer_set_used_size(np->trans_token, 0);
+       if (fts_icu_translate(np->trans_token, np->utf16_token->data,
+                             np->utf16_token->used / sizeof(UChar),
+                             np->transliterator, error_r) < 0)
                return -1;
-       }
 
-       if (utext_len == 0)
+       if (np->trans_token->used == 0)
                return 0;
 
-       make_utf8(utext, token);
+       fts_icu_utf16_to_utf8(np->utf8_token, np->trans_token->data,
+                             np->trans_token->used / sizeof(UChar));
+       *token = str_c(np->utf8_token);
        return 1;
 }
 
diff --git a/src/lib-fts/fts-icu.c b/src/lib-fts/fts-icu.c
new file mode 100644 (file)
index 0000000..e10f157
--- /dev/null
@@ -0,0 +1,110 @@
+/* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "unichar.h"
+#include "fts-icu.h"
+
+void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8)
+{
+       UErrorCode err = U_ZERO_ERROR;
+       unsigned int src_bytes = strlen(src_utf8);
+       int32_t utf16_len;
+       UChar *dest_data, *retp = NULL;
+       int32_t avail_uchars = 0;
+
+       /* try to encode with the current buffer size */
+       avail_uchars = buffer_get_writable_size(dest_utf16) / sizeof(UChar);
+       dest_data = buffer_get_space_unsafe(dest_utf16, 0,
+                               buffer_get_writable_size(dest_utf16));
+       retp = u_strFromUTF8Lenient(dest_data, avail_uchars,
+                                   &utf16_len, src_utf8, src_bytes, &err);
+       if (err == U_BUFFER_OVERFLOW_ERROR) {
+               /* try again with a larger buffer */
+               dest_data = buffer_get_space_unsafe(dest_utf16, 0,
+                                                   utf16_len * sizeof(UChar));
+               err = U_ZERO_ERROR;
+               retp = u_strFromUTF8Lenient(dest_data, utf16_len,
+                                           &utf16_len, src_utf8,
+                                           src_bytes, &err);
+       }
+       if (U_FAILURE(err)) {
+               i_panic("LibICU u_strFromUTF8Lenient() failed: %s",
+                       u_errorName(err));
+       }
+       buffer_set_used_size(dest_utf16, utf16_len * sizeof(UChar));
+       i_assert(retp == dest_data);
+}
+
+void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
+                          unsigned int src_len)
+{
+       int32_t dest_len = 0;
+       int32_t sub_num = 0;
+       char *dest_data, *retp = NULL;
+       UErrorCode err = U_ZERO_ERROR;
+
+       /* try to encode with the current buffer size */
+       dest_data = buffer_get_space_unsafe(dest_utf8, 0,
+                                           buffer_get_writable_size(dest_utf8));
+       retp = u_strToUTF8WithSub(dest_data, buffer_get_writable_size(dest_utf8),
+                                 &dest_len, src_utf16, src_len,
+                                 UNICODE_REPLACEMENT_CHAR, &sub_num, &err);
+       if (err == U_BUFFER_OVERFLOW_ERROR) {
+               /* try again with a larger buffer */
+               dest_data = buffer_get_space_unsafe(dest_utf8, 0, dest_len);
+               err = U_ZERO_ERROR;
+               retp = u_strToUTF8WithSub(dest_data, buffer_get_writable_size(dest_utf8), &dest_len,
+                                         src_utf16, src_len,
+                                         UNICODE_REPLACEMENT_CHAR,
+                                         &sub_num, &err);
+       }
+       if (U_FAILURE(err)) {
+               i_panic("LibICU u_strToUTF8WithSub() failed: %s",
+                       u_errorName(err));
+       }
+       buffer_set_used_size(dest_utf8, dest_len);
+       i_assert(retp == dest_data);
+}
+
+int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16,
+                     unsigned int src_len, UTransliterator *transliterator,
+                     const char **error_r)
+{
+       UErrorCode err = U_ZERO_ERROR;
+       int32_t utf16_len = src_len;
+       UChar *dest_data;
+       int32_t avail_uchars, limit = src_len;
+       size_t dest_pos = dest_utf16->used;
+
+       /* translation is done in-place in the buffer. try first with the
+          current buffer size. */
+       buffer_append(dest_utf16, src_utf16, src_len*sizeof(UChar));
+
+       avail_uchars = (buffer_get_writable_size(dest_utf16)-dest_pos) / sizeof(UChar);
+       dest_data = buffer_get_space_unsafe(dest_utf16, dest_pos,
+                               buffer_get_writable_size(dest_utf16)-dest_pos);
+       utrans_transUChars(transliterator, dest_data, &utf16_len,
+                          avail_uchars, 0, &limit, &err);
+       if (err == U_BUFFER_OVERFLOW_ERROR) {
+               /* try again with a larger buffer */
+               err = U_ZERO_ERROR;
+               avail_uchars = utf16_len;
+               limit = utf16_len = src_len;
+               buffer_write(dest_utf16, dest_pos,
+                            src_utf16, src_len*sizeof(UChar));
+               dest_data = buffer_get_space_unsafe(dest_utf16, dest_pos,
+                                                   avail_uchars * sizeof(UChar));
+               utrans_transUChars(transliterator, dest_data, &utf16_len,
+                                  avail_uchars, 0, &limit, &err);
+               i_assert(err != U_BUFFER_OVERFLOW_ERROR);
+       }
+       if (U_FAILURE(err)) {
+               *error_r = t_strdup_printf("LibICU utrans_transUChars() failed: %s",
+                                          u_errorName(err));
+               buffer_set_used_size(dest_utf16, dest_pos);
+               return -1;
+       }
+       buffer_set_used_size(dest_utf16, utf16_len * sizeof(UChar));
+       return 0;
+}
diff --git a/src/lib-fts/fts-icu.h b/src/lib-fts/fts-icu.h
new file mode 100644 (file)
index 0000000..928b4b0
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef HAVE_FTS_ICU_H
+#define HAVE_FTS_ICU_H
+
+#include <unicode/ustring.h>
+#include <unicode/utrans.h>
+
+/* Convert UTF-8 input to UTF-16 output. The dest_utf16 contains UChars. */
+void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8);
+/* Convert UTF-16 input to UTF-8 output. */
+void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
+                          unsigned int src_len);
+/* Run ICU translation for the string. Returns 0 on success, -1 on error. */
+int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16,
+                     unsigned int src_len, UTransliterator *transliterator,
+                     const char **error_r);
+
+#endif
index 9c5fec1cfb8f1ff27b257bf80bf115ab5716f28f..cb80a5116e0a67e079f7fa75173d0c9618547614 100644 (file)
@@ -2,6 +2,8 @@
 
 #include "lib.h"
 #include "sha2.h"
+#include "str.h"
+#include "unichar.h"
 #include "test-common.h"
 #include "fts-language.h"
 #include "fts-filter.h"
@@ -463,6 +465,37 @@ static void test_fts_filter_normalizer_empty(void)
        test_end();
 }
 
+static void test_fts_filter_normalizer_baddata(void)
+{
+       const char * const settings[] =
+               {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove", NULL};
+       struct fts_filter *norm;
+       const char *token, *error;
+       string_t *str;
+       unsigned int i;
+
+       test_begin("fts filter normalizer bad data");
+
+       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
+       str = t_str_new(128);
+       for (i = 1; i < 0x1ffff; i++) {
+               str_truncate(str, 0);
+               uni_ucs4_to_utf8_c(i, str);
+               token = str_c(str);
+               T_BEGIN {
+                       test_assert_idx(fts_filter_filter(norm, &token, &error) >= 0, i);
+               } T_END;
+       }
+
+       str_truncate(str, 0);
+       uni_ucs4_to_utf8_c(0x7fffffff, str);
+       token = str_c(str);
+       test_assert(fts_filter_filter(norm, &token, &error) >= 0);
+
+       fts_filter_unref(&norm);
+       test_end();
+}
+
 static void test_fts_filter_normalizer_invalid_id(void)
 {
        struct fts_filter *norm = NULL;
@@ -558,6 +591,7 @@ int main(void)
                test_fts_filter_normalizer_swedish_short_default_id,
                test_fts_filter_normalizer_french,
                test_fts_filter_normalizer_empty,
+               test_fts_filter_normalizer_baddata,
                test_fts_filter_normalizer_invalid_id,
 #ifdef HAVE_FTS_STEMMER
                test_fts_filter_normalizer_stopwords_stemmer_eng,
diff --git a/src/lib-fts/test-fts-icu.c b/src/lib-fts/test-fts-icu.c
new file mode 100644 (file)
index 0000000..3f1367e
--- /dev/null
@@ -0,0 +1,152 @@
+/* Copyright (c) 2015 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "str.h"
+#include "unichar.h"
+#include "test-common.h"
+#include "fts-icu.h"
+
+static void test_fts_icu_utf8_to_utf16_ascii_resize(void)
+{
+       buffer_t *dest = buffer_create_dynamic(pool_datastack_create(), 5);
+
+       test_begin("fts_icu_utf8_to_utf16 ascii resize");
+       /* dynamic buffers reserve +1 for str_c()'s NUL, so 5 -> 4 */
+       test_assert(buffer_get_size(dest) == 5);
+       fts_icu_utf8_to_utf16(dest, "12");
+       test_assert(dest->used == 4);
+       test_assert(buffer_get_size(dest) == 5);
+
+       fts_icu_utf8_to_utf16(dest, "123");
+       test_assert(dest->used == 6);
+       test_assert(buffer_get_size(dest) == 8);
+
+       fts_icu_utf8_to_utf16(dest, "12345");
+       test_assert(dest->used == 10);
+
+       test_end();
+}
+
+static void test_fts_icu_utf8_to_utf16_32bit_resize(void)
+{
+       buffer_t *dest;
+       unsigned int i;
+
+       test_begin("fts_icu_utf8_to_utf16 32bit resize");
+       for (i = 2; i <= 5; i++) {
+               dest = buffer_create_dynamic(pool_datastack_create(), i);
+               test_assert(buffer_get_size(dest) == i);
+               fts_icu_utf8_to_utf16(dest, "\xF0\x90\x90\x80"); /* 0x10400 */
+               test_assert(dest->used == 4);
+       }
+
+       test_end();
+}
+
+static void test_fts_icu_utf16_to_utf8(void)
+{
+       string_t *dest = t_str_new(64);
+       const UChar src[] = { 0xbd, 'b', 'c' };
+       unsigned int i;
+
+       test_begin("fts_icu_utf16_to_utf8");
+       for (i = N_ELEMENTS(src); i > 0; i--) {
+               fts_icu_utf16_to_utf8(dest, src, i);
+               test_assert(dest->used == i+1);
+       }
+       test_end();
+}
+
+static void test_fts_icu_utf16_to_utf8_resize(void)
+{
+       string_t *dest;
+       const UChar src = UNICODE_REPLACEMENT_CHAR;
+       unsigned int i;
+
+       test_begin("fts_icu_utf16_to_utf8 resize");
+       for (i = 2; i <= 6; i++) {
+               dest = t_str_new(i);
+               test_assert(buffer_get_size(dest) == i);
+               fts_icu_utf16_to_utf8(dest, &src, 1);
+               test_assert(dest->used == 3);
+               test_assert(strcmp(str_c(dest), UNICODE_REPLACEMENT_CHAR_UTF8) == 0);
+       }
+
+       test_end();
+}
+
+static UTransliterator *get_translit(const char *id)
+{
+       UTransliterator *translit;
+       buffer_t *id_utf16;
+       UErrorCode err = U_ZERO_ERROR;
+       UParseError perr;
+
+       id_utf16 = buffer_create_dynamic(pool_datastack_create(), 16);
+       fts_icu_utf8_to_utf16(id_utf16, id);
+       translit = utrans_openU(id_utf16->data, id_utf16->used/sizeof(UChar),
+                               UTRANS_FORWARD, NULL, 0, &perr, &err);
+       test_assert(!U_FAILURE(err));
+       return translit;
+}
+
+static void test_fts_icu_translate(void)
+{
+       const char *translit_id = "Any-Lower";
+       UTransliterator *translit;
+       buffer_t *dest = buffer_create_dynamic(pool_datastack_create(), 64);
+       const UChar src[] = { 0xbd, 'B', 'C' };
+       const char *error;
+       unsigned int i;
+
+       test_begin("fts_icu_translate");
+       translit = get_translit(translit_id);
+       for (i = N_ELEMENTS(src); i > 0; i--) {
+               buffer_set_used_size(dest, 0);
+               test_assert(fts_icu_translate(dest, src, i,
+                                             translit, &error) == 0);
+               test_assert(dest->used == i * sizeof(UChar));
+       }
+       test_end();
+}
+
+static void test_fts_icu_translate_resize(void)
+{
+       const char *translit_id = "Any-Hex";
+       const char *src_utf8 = "FOO";
+       buffer_t *dest, *src_utf16;
+       UTransliterator *translit;
+       const char *error;
+       unsigned int i;
+
+       test_begin("fts_icu_translate_resize resize");
+
+       src_utf16 = buffer_create_dynamic(pool_datastack_create(), 16);
+       translit = get_translit(translit_id);
+       for (i = 2; i <= 20; i++) {
+               buffer_set_used_size(src_utf16, 0);
+               fts_icu_utf8_to_utf16(src_utf16, src_utf8);
+               dest = buffer_create_dynamic(pool_datastack_create(), i);
+               test_assert(buffer_get_size(dest) == i);
+               test_assert(fts_icu_translate(dest, src_utf16->data,
+                                             src_utf16->used/sizeof(UChar),
+                                             translit, &error) == 0);
+       }
+
+       test_end();
+}
+
+int main(void)
+{
+       static void (*test_functions[])(void) = {
+               test_fts_icu_utf8_to_utf16_ascii_resize,
+               test_fts_icu_utf8_to_utf16_32bit_resize,
+               test_fts_icu_utf16_to_utf8,
+               test_fts_icu_utf16_to_utf8_resize,
+               test_fts_icu_translate,
+               test_fts_icu_translate_resize,
+               NULL
+       };
+       return test_run(test_functions);
+}