From: Teemu Huovila Date: Tue, 15 Mar 2016 08:47:20 +0000 (+0200) Subject: lib-fts: Lift helper function out of generic tokenizer. X-Git-Tag: 2.2.22~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=11100790aa456929b3786bde2dbe27b9c76402b8;p=thirdparty%2Fdovecot%2Fcore.git lib-fts: Lift helper function out of generic tokenizer. --- diff --git a/src/lib-fts/Makefile.am b/src/lib-fts/Makefile.am index 41959e0c8b..fa04b2bad0 100644 --- a/src/lib-fts/Makefile.am +++ b/src/lib-fts/Makefile.am @@ -78,6 +78,7 @@ libfts_la_SOURCES = \ fts-library.c \ fts-tokenizer.c \ fts-tokenizer-address.c \ + fts-tokenizer-common.c \ fts-tokenizer-generic.c \ $(ICU_SOURCES) @@ -89,6 +90,7 @@ headers = \ fts-language.h \ fts-library.h \ fts-tokenizer.h \ + fts-tokenizer-common.h \ fts-tokenizer-private.h \ fts-tokenizer-generic-private.h @@ -132,7 +134,7 @@ test_fts_language_DEPENDENCIES = $(test_deps) endif test_fts_tokenizer_SOURCES = test-fts-tokenizer.c -test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo ../lib-mail/libmail.la $(test_libs) +test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo fts-tokenizer-common.lo ../lib-mail/libmail.la $(test_libs) test_fts_tokenizer_DEPENDENCIES = ../lib-mail/libmail.la $(test_deps) check: check-am check-test diff --git a/src/lib-fts/fts-tokenizer-common.c b/src/lib-fts/fts-tokenizer-common.c new file mode 100644 index 0000000000..f71113d036 --- /dev/null +++ b/src/lib-fts/fts-tokenizer-common.c @@ -0,0 +1,22 @@ +#include "lib.h" +#include "unichar.h" +#include "fts-tokenizer-common.h" +void +fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, + size_t *len) +{ + size_t pos; + unsigned int char_bytes; + + /* the token is truncated - make sure the last character + exists entirely in the token */ + for (pos = *len-1; pos > 0; pos--) { + if (UTF8_IS_START_SEQ(data[pos])) + break; + } + char_bytes = uni_utf8_char_bytes(data[pos]); + if (char_bytes != *len-pos) { + i_assert(char_bytes > *len-pos); + *len = pos; + } +} diff --git a/src/lib-fts/fts-tokenizer-common.h b/src/lib-fts/fts-tokenizer-common.h new file mode 100644 index 0000000000..fdd3b16313 --- /dev/null +++ b/src/lib-fts/fts-tokenizer-common.h @@ -0,0 +1,6 @@ +#ifndef FTS_TOKENIZER_COMMON_H +#define FTS_TOKENIZER_COMMON_H +void +fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, + size_t *len); +#endif diff --git a/src/lib-fts/fts-tokenizer-generic.c b/src/lib-fts/fts-tokenizer-generic.c index dbc3398558..2ae352e2bb 100644 --- a/src/lib-fts/fts-tokenizer-generic.c +++ b/src/lib-fts/fts-tokenizer-generic.c @@ -8,6 +8,7 @@ #include "fts-common.h" #include "fts-tokenizer-private.h" #include "fts-tokenizer-generic-private.h" +#include "fts-tokenizer-common.h" #include "word-boundary-data.c" #include "word-break-data.c" @@ -100,26 +101,6 @@ fts_tokenizer_generic_destroy(struct fts_tokenizer *_tok) i_free(tok); } -static void -fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, - size_t *len) -{ - size_t pos; - unsigned int char_bytes; - - /* the token is truncated - make sure the last character - exists entirely in the token */ - for (pos = *len-1; pos > 0; pos--) { - if (UTF8_IS_START_SEQ(data[pos])) - break; - } - char_bytes = uni_utf8_char_bytes(data[pos]); - if (char_bytes != *len-pos) { - i_assert(char_bytes > *len-pos); - *len = pos; - } -} - static bool fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok, const char **token_r)