This makes it clearer how the API is intended to be used.
/* Copyright (c) 2014-2016 Dovecot authors, see the included COPYING file */
#include "lib.h"
-#include "buffer.h"
+#include "array.h"
#include "str.h"
#include "unichar.h" /* unicode replacement char */
#include "fts-filter-common.h"
const char *transliterator_id;
UTransliterator *transliterator;
- buffer_t *utf16_token, *trans_token;
+ ARRAY_TYPE(icu_utf16) utf16_token, trans_token;
string_t *utf8_token;
};
np->pool = pp;
np->filter = *fts_filter_normalizer_icu;
np->transliterator_id = p_strdup(pp, id);
- np->utf16_token = buffer_create_dynamic(pp, 128);
- np->trans_token = buffer_create_dynamic(pp, 128);
+ p_array_init(&np->utf16_token, pp, 64);
+ p_array_init(&np->trans_token, pp, 64);
np->utf8_token = buffer_create_dynamic(pp, 128);
np->filter.max_length = max_length;
*filter_r = &np->filter;
error_r) < 0)
return -1;
- fts_icu_utf8_to_utf16(np->utf16_token, *token);
- buffer_append_zero(np->utf16_token, 2);
- buffer_set_used_size(np->utf16_token, np->utf16_token->used-2);
- buffer_set_used_size(np->trans_token, 0);
- if (fts_icu_translate(np->trans_token, np->utf16_token->data,
- np->utf16_token->used / sizeof(UChar),
+ fts_icu_utf8_to_utf16(&np->utf16_token, *token);
+ array_append_zero(&np->utf16_token);
+ array_delete(&np->utf16_token, array_count(&np->utf16_token)-1, 1);
+ array_clear(&np->trans_token);
+ if (fts_icu_translate(&np->trans_token, array_idx(&np->utf16_token, 0),
+ array_count(&np->utf16_token),
np->transliterator, error_r) < 0)
return -1;
- if (np->trans_token->used == 0)
+ if (array_count(&np->trans_token) == 0)
return 0;
- fts_icu_utf16_to_utf8(np->utf8_token, np->trans_token->data,
- np->trans_token->used / sizeof(UChar));
+ fts_icu_utf16_to_utf8(np->utf8_token, array_idx(&np->trans_token, 0),
+ array_count(&np->trans_token));
fts_filter_truncate_token(np->utf8_token, np->filter.max_length);
*token = str_c(np->utf8_token);
return 1;
#include "lib.h"
#include "mempool.h"
-#include "buffer.h"
+#include "array.h"
#include "str.h"
#include "unichar.h"
#include "fts-icu.h"
return icu_csm;
}
-void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8)
+void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
+ const char *src_utf8)
{
+ buffer_t *dest_buf = dest_utf16->arr.buffer;
UErrorCode err = U_ZERO_ERROR;
size_t src_bytes = strlen(src_utf8);
int32_t utf16_len;
int32_t avail_uchars = 0;
/* try to encode with the current buffer size */
- avail_uchars = buffer_get_writable_size(dest_utf16) / sizeof(UChar);
- dest_data = buffer_get_space_unsafe(dest_utf16, 0,
- buffer_get_writable_size(dest_utf16));
+ avail_uchars = buffer_get_writable_size(dest_buf) / sizeof(UChar);
+ dest_data = buffer_get_space_unsafe(dest_buf, 0,
+ buffer_get_writable_size(dest_buf));
retp = u_strFromUTF8Lenient(dest_data, avail_uchars,
&utf16_len, src_utf8, src_bytes, &err);
if (err == U_BUFFER_OVERFLOW_ERROR) {
/* try again with a larger buffer */
- dest_data = buffer_get_space_unsafe(dest_utf16, 0,
+ dest_data = buffer_get_space_unsafe(dest_buf, 0,
utf16_len * sizeof(UChar));
err = U_ZERO_ERROR;
retp = u_strFromUTF8Lenient(dest_data, utf16_len,
i_panic("LibICU u_strFromUTF8Lenient() failed: %s",
u_errorName(err));
}
- buffer_set_used_size(dest_utf16, utf16_len * sizeof(UChar));
+ buffer_set_used_size(dest_buf, utf16_len * sizeof(UChar));
i_assert(retp == dest_data);
}
i_assert(retp == dest_data);
}
-int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16,
+int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
unsigned int src_len, UTransliterator *transliterator,
const char **error_r)
{
+ buffer_t *dest_buf = dest_utf16->arr.buffer;
UErrorCode err = U_ZERO_ERROR;
int32_t utf16_len = src_len;
UChar *dest_data;
int32_t avail_uchars, limit = src_len;
- size_t dest_pos = dest_utf16->used;
+ size_t dest_pos = dest_buf->used;
/* translation is done in-place in the buffer. try first with the
current buffer size. */
- buffer_append(dest_utf16, src_utf16, src_len*sizeof(UChar));
+ array_append(dest_utf16, src_utf16, src_len);
- avail_uchars = (buffer_get_writable_size(dest_utf16)-dest_pos) / sizeof(UChar);
- dest_data = buffer_get_space_unsafe(dest_utf16, dest_pos,
- buffer_get_writable_size(dest_utf16)-dest_pos);
+ avail_uchars = (buffer_get_writable_size(dest_buf)-dest_pos) / sizeof(UChar);
+ dest_data = buffer_get_space_unsafe(dest_buf, dest_pos,
+ buffer_get_writable_size(dest_buf) - dest_pos);
utrans_transUChars(transliterator, dest_data, &utf16_len,
avail_uchars, 0, &limit, &err);
if (err == U_BUFFER_OVERFLOW_ERROR) {
err = U_ZERO_ERROR;
avail_uchars = utf16_len;
limit = utf16_len = src_len;
- buffer_write(dest_utf16, dest_pos,
+ buffer_write(dest_buf, dest_pos,
src_utf16, src_len*sizeof(UChar));
- dest_data = buffer_get_space_unsafe(dest_utf16, dest_pos,
+ dest_data = buffer_get_space_unsafe(dest_buf, dest_pos,
avail_uchars * sizeof(UChar));
utrans_transUChars(transliterator, dest_data, &utf16_len,
avail_uchars, 0, &limit, &err);
if (U_FAILURE(err)) {
*error_r = t_strdup_printf("LibICU utrans_transUChars() failed: %s",
u_errorName(err));
- buffer_set_used_size(dest_utf16, dest_pos);
+ buffer_set_used_size(dest_buf, dest_pos);
return -1;
}
- buffer_set_used_size(dest_utf16, utf16_len * sizeof(UChar));
+ buffer_set_used_size(dest_buf, utf16_len * sizeof(UChar));
return 0;
}
{
UErrorCode err = U_ZERO_ERROR;
UParseError perr;
- buffer_t *id_utf16_buf = buffer_create_dynamic(pool_datastack_create(), 2 * strlen(id));
- UChar *id_utf16;
+ ARRAY_TYPE(icu_utf16) id_utf16;
memset(&perr, 0, sizeof(perr));
- fts_icu_utf8_to_utf16(id_utf16_buf, id);
- id_utf16 = (UChar *)str_c(id_utf16_buf);
- *transliterator_r = utrans_openU(id_utf16,
- id_utf16_buf->used / sizeof(UChar),
+ t_array_init(&id_utf16, strlen(id));
+ fts_icu_utf8_to_utf16(&id_utf16, id);
+ *transliterator_r = utrans_openU(array_idx(&id_utf16, 0),
+ array_count(&id_utf16),
UTRANS_FORWARD, NULL, 0, &perr, &err);
if (U_FAILURE(err)) {
string_t *str = t_str_new(128);
#include <unicode/ustring.h>
#include <unicode/utrans.h>
-/* Convert UTF-8 input to UTF-16 output. The dest_utf16 contains UChars. */
-void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8);
+ARRAY_DEFINE_TYPE(icu_utf16, UChar);
+
+/* Convert UTF-8 input to UTF-16 output. */
+void fts_icu_utf8_to_utf16(ARRAY_TYPE(icu_utf16) *dest_utf16,
+ const char *src_utf8);
/* Convert UTF-16 input to UTF-8 output. */
void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16,
unsigned int src_len);
/* Run ICU translation for the string. Returns 0 on success, -1 on error. */
-int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16,
+int fts_icu_translate(ARRAY_TYPE(icu_utf16) *dest_utf16, const UChar *src_utf16,
unsigned int src_len, UTransliterator *transliterator,
const char **error_r);
/* Lowercase the given UTF-8 string. */
/* Copyright (c) 2015-2016 Dovecot authors, see the included COPYING file */
#include "lib.h"
-#include "buffer.h"
+#include "array.h"
#include "str.h"
#include "unichar.h"
#include "test-common.h"
static void test_fts_icu_utf8_to_utf16_ascii_resize(void)
{
- buffer_t *dest = buffer_create_dynamic(pool_datastack_create(), 4);
+ ARRAY_TYPE(icu_utf16) dest;
test_begin("fts_icu_utf8_to_utf16 ascii resize");
- test_assert(buffer_get_writable_size(dest) == 4);
- fts_icu_utf8_to_utf16(dest, "12");
- test_assert(dest->used == 4);
- test_assert(buffer_get_writable_size(dest) == 4);
+ t_array_init(&dest, 2);
+ test_assert(buffer_get_writable_size(dest.arr.buffer) == 4);
+ fts_icu_utf8_to_utf16(&dest, "12");
+ test_assert(array_count(&dest) == 2);
+ test_assert(buffer_get_writable_size(dest.arr.buffer) == 4);
- fts_icu_utf8_to_utf16(dest, "123");
- test_assert(dest->used == 6);
- test_assert(buffer_get_writable_size(dest) == 7);
+ fts_icu_utf8_to_utf16(&dest, "123");
+ test_assert(array_count(&dest) == 3);
+ test_assert(buffer_get_writable_size(dest.arr.buffer) == 7);
- fts_icu_utf8_to_utf16(dest, "12345");
- test_assert(dest->used == 10);
+ fts_icu_utf8_to_utf16(&dest, "12345");
+ test_assert(array_count(&dest) == 5);
test_end();
}
static void test_fts_icu_utf8_to_utf16_32bit_resize(void)
{
- buffer_t *dest;
+ ARRAY_TYPE(icu_utf16) dest;
unsigned int i;
test_begin("fts_icu_utf8_to_utf16 32bit resize");
- for (i = 2; i <= 5; i++) {
- dest = buffer_create_dynamic(pool_datastack_create(), i);
- test_assert(buffer_get_writable_size(dest) == i);
- fts_icu_utf8_to_utf16(dest, "\xF0\x90\x90\x80"); /* 0x10400 */
- test_assert(dest->used == 4);
+ for (i = 1; i <= 2; i++) {
+ t_array_init(&dest, i);
+ test_assert(buffer_get_writable_size(dest.arr.buffer) == i*2);
+ fts_icu_utf8_to_utf16(&dest, "\xF0\x90\x90\x80"); /* 0x10400 */
+ test_assert(array_count(&dest) == 2);
}
test_end();
static UTransliterator *get_translit(const char *id)
{
UTransliterator *translit;
- buffer_t *id_utf16;
+ ARRAY_TYPE(icu_utf16) id_utf16;
UErrorCode err = U_ZERO_ERROR;
UParseError perr;
- id_utf16 = buffer_create_dynamic(pool_datastack_create(), 16);
- fts_icu_utf8_to_utf16(id_utf16, id);
- translit = utrans_openU(id_utf16->data, id_utf16->used/sizeof(UChar),
+ t_array_init(&id_utf16, 8);
+ fts_icu_utf8_to_utf16(&id_utf16, id);
+ translit = utrans_openU(array_idx(&id_utf16, 0), array_count(&id_utf16),
UTRANS_FORWARD, NULL, 0, &perr, &err);
test_assert(!U_FAILURE(err));
return translit;
{
const char *translit_id = "Any-Lower";
UTransliterator *translit;
- buffer_t *dest = buffer_create_dynamic(pool_datastack_create(), 64);
+ ARRAY_TYPE(icu_utf16) dest;
const UChar src[] = { 0xbd, 'B', 'C' };
const char *error;
unsigned int i;
test_begin("fts_icu_translate");
+ t_array_init(&dest, 32);
translit = get_translit(translit_id);
for (i = N_ELEMENTS(src); i > 0; i--) {
- buffer_set_used_size(dest, 0);
- test_assert(fts_icu_translate(dest, src, i,
+ array_clear(&dest);
+ test_assert(fts_icu_translate(&dest, src, i,
translit, &error) == 0);
- test_assert(dest->used == i * sizeof(UChar));
+ test_assert(array_count(&dest) == i);
}
utrans_close(translit);
test_end();
{
const char *translit_id = "Any-Hex";
const char *src_utf8 = "FOO";
- buffer_t *dest, *src_utf16;
+ ARRAY_TYPE(icu_utf16) src_utf16, dest;
UTransliterator *translit;
const char *error;
unsigned int i;
test_begin("fts_icu_translate_resize resize");
- src_utf16 = buffer_create_dynamic(pool_datastack_create(), 16);
+ t_array_init(&src_utf16, 8);
translit = get_translit(translit_id);
- for (i = 2; i <= 20; i++) {
- buffer_set_used_size(src_utf16, 0);
- fts_icu_utf8_to_utf16(src_utf16, src_utf8);
- dest = buffer_create_dynamic(pool_datastack_create(), i);
- test_assert(buffer_get_writable_size(dest) == i);
- test_assert(fts_icu_translate(dest, src_utf16->data,
- src_utf16->used/sizeof(UChar),
+ for (i = 1; i <= 10; i++) {
+ array_clear(&src_utf16);
+ fts_icu_utf8_to_utf16(&src_utf16, src_utf8);
+ t_array_init(&dest, i);
+ test_assert(buffer_get_writable_size(dest.arr.buffer) == i*2);
+ test_assert(fts_icu_translate(&dest, array_idx(&src_utf16, 0),
+ array_count(&src_utf16),
translit, &error) == 0);
}