From: Teemu Huovila Date: Mon, 22 Aug 2016 20:41:05 +0000 (+0300) Subject: lib-fts: Change normalizer filter to use new truncate. X-Git-Tag: 2.2.26~325 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d5e93292e0eb2a2da53a78da9a373f717f2c685d;p=thirdparty%2Fdovecot%2Fcore.git lib-fts: Change normalizer filter to use new truncate. --- diff --git a/src/lib-fts/fts-filter-normalizer-icu.c b/src/lib-fts/fts-filter-normalizer-icu.c index 58b2a9b475..d49852dbfa 100644 --- a/src/lib-fts/fts-filter-normalizer-icu.c +++ b/src/lib-fts/fts-filter-normalizer-icu.c @@ -4,7 +4,7 @@ #include "buffer.h" #include "str.h" #include "unichar.h" /* unicode replacement char */ -#include "fts-tokenizer-common.h" +#include "fts-filter-common.h" #include "fts-filter-private.h" #include "fts-language.h" @@ -19,7 +19,6 @@ struct fts_filter_normalizer_icu { UTransliterator *transliterator; buffer_t *utf16_token, *trans_token; string_t *utf8_token; - unsigned int maxlen; }; static void fts_filter_normalizer_icu_destroy(struct fts_filter *filter) @@ -69,7 +68,7 @@ fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED, np->utf16_token = buffer_create_dynamic(pp, 128); np->trans_token = buffer_create_dynamic(pp, 128); np->utf8_token = buffer_create_dynamic(pp, 128); - np->maxlen = max_length; + np->filter.max_length = max_length; *filter_r = &np->filter; return 0; } @@ -101,11 +100,7 @@ fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token, fts_icu_utf16_to_utf8(np->utf8_token, np->trans_token->data, np->trans_token->used / sizeof(UChar)); - if (str_len(np->utf8_token) > np->maxlen) { - size_t len = np->maxlen; - fts_tokenizer_delete_trailing_partial_char(np->utf8_token->data, &len); - str_truncate(np->utf8_token, len); - } + fts_filter_truncate_token(np->utf8_token, np->filter.max_length); *token = str_c(np->utf8_token); return 1; } diff --git a/src/lib-fts/test-fts-filter.c b/src/lib-fts/test-fts-filter.c index 8f8ff48a69..dc001f2160 100644 --- a/src/lib-fts/test-fts-filter.c +++ b/src/lib-fts/test-fts-filter.c @@ -728,6 +728,25 @@ static void test_fts_filter_normalizer_oversized(void) test_end(); } +static void test_fts_filter_normalizer_truncation(void) +{ + struct fts_filter *norm = NULL; + const char *settings[] = + {"id", "Any-Lower;", "maxlen", "10", + NULL}; + const char *error = NULL; + const char *token = "abcdefghi\xC3\x85"; + + test_begin("fts filter normalizer token truncated mid letter"); + test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, + settings, &norm, &error) == 0); + test_assert(error == NULL); + test_assert(fts_filter_filter(norm, &token, &error) >= 0); + test_assert(strcmp(token, "abcdefghi") == 0); + fts_filter_unref(&norm); + test_end(); +} + #ifdef HAVE_FTS_STEMMER static void test_fts_filter_normalizer_stopwords_stemmer_eng(void) { @@ -986,6 +1005,7 @@ int main(void) test_fts_filter_normalizer_baddata, test_fts_filter_normalizer_invalid_id, test_fts_filter_normalizer_oversized, + test_fts_filter_normalizer_truncation, #ifdef HAVE_FTS_STEMMER test_fts_filter_normalizer_stopwords_stemmer_eng, test_fts_filter_stopwords_normalizer_stemmer_no,