]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-fts: Change normalizer filter to use new truncate.
authorTeemu Huovila <teemu.huovila@dovecot.fi>
Mon, 22 Aug 2016 20:41:05 +0000 (23:41 +0300)
committerTimo Sirainen <timo.sirainen@dovecot.fi>
Tue, 23 Aug 2016 10:18:42 +0000 (13:18 +0300)
src/lib-fts/fts-filter-normalizer-icu.c
src/lib-fts/test-fts-filter.c

index 58b2a9b475fc7f0c6ea204c74c4ef0c09e19edc8..d49852dbfa55d177b2d1123ce700e9bab5469dcd 100644 (file)
@@ -4,7 +4,7 @@
 #include "buffer.h"
 #include "str.h"
 #include "unichar.h" /* unicode replacement char */
-#include "fts-tokenizer-common.h"
+#include "fts-filter-common.h"
 #include "fts-filter-private.h"
 #include "fts-language.h"
 
@@ -19,7 +19,6 @@ struct fts_filter_normalizer_icu {
        UTransliterator *transliterator;
        buffer_t *utf16_token, *trans_token;
        string_t *utf8_token;
-       unsigned int maxlen;
 };
 
 static void fts_filter_normalizer_icu_destroy(struct fts_filter *filter)
@@ -69,7 +68,7 @@ fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
        np->utf16_token = buffer_create_dynamic(pp, 128);
        np->trans_token = buffer_create_dynamic(pp, 128);
        np->utf8_token = buffer_create_dynamic(pp, 128);
-       np->maxlen = max_length;
+       np->filter.max_length = max_length;
        *filter_r = &np->filter;
        return 0;
 }
@@ -101,11 +100,7 @@ fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token,
 
        fts_icu_utf16_to_utf8(np->utf8_token, np->trans_token->data,
                              np->trans_token->used / sizeof(UChar));
-       if (str_len(np->utf8_token) > np->maxlen) {
-               size_t len = np->maxlen;
-               fts_tokenizer_delete_trailing_partial_char(np->utf8_token->data, &len);
-               str_truncate(np->utf8_token, len);
-       }
+       fts_filter_truncate_token(np->utf8_token, np->filter.max_length);
        *token = str_c(np->utf8_token);
        return 1;
 }
index 8f8ff48a69693a46b8bc6ab7e8aac2666ae08bc9..dc001f2160ebb9ea25c516e966d82d97a644f07b 100644 (file)
@@ -728,6 +728,25 @@ static void test_fts_filter_normalizer_oversized(void)
        test_end();
 }
 
+static void test_fts_filter_normalizer_truncation(void)
+{
+       struct fts_filter *norm = NULL;
+       const char *settings[] =
+               {"id", "Any-Lower;", "maxlen", "10",
+                NULL};
+       const char *error = NULL;
+       const char *token = "abcdefghi\xC3\x85";
+
+       test_begin("fts filter normalizer token truncated mid letter");
+       test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL,
+                                     settings, &norm, &error) == 0);
+       test_assert(error == NULL);
+       test_assert(fts_filter_filter(norm, &token, &error) >= 0);
+       test_assert(strcmp(token, "abcdefghi") == 0);
+       fts_filter_unref(&norm);
+       test_end();
+}
+
 #ifdef HAVE_FTS_STEMMER
 static void test_fts_filter_normalizer_stopwords_stemmer_eng(void)
 {
@@ -986,6 +1005,7 @@ int main(void)
                test_fts_filter_normalizer_baddata,
                test_fts_filter_normalizer_invalid_id,
                test_fts_filter_normalizer_oversized,
+               test_fts_filter_normalizer_truncation,
 #ifdef HAVE_FTS_STEMMER
                test_fts_filter_normalizer_stopwords_stemmer_eng,
                test_fts_filter_stopwords_normalizer_stemmer_no,