]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-fts: Lift helper function out of generic tokenizer.
authorTeemu Huovila <teemu.huovila@dovecot.fi>
Tue, 15 Mar 2016 08:47:20 +0000 (10:47 +0200)
committerTimo Sirainen <timo.sirainen@dovecot.fi>
Wed, 16 Mar 2016 00:07:03 +0000 (11:07 +1100)
src/lib-fts/Makefile.am
src/lib-fts/fts-tokenizer-common.c [new file with mode: 0644]
src/lib-fts/fts-tokenizer-common.h [new file with mode: 0644]
src/lib-fts/fts-tokenizer-generic.c

index 41959e0c8b8c939e971e98817b0c5de2f68cde0f..fa04b2bad056ec832414bc211a2842287fec049a 100644 (file)
@@ -78,6 +78,7 @@ libfts_la_SOURCES = \
        fts-library.c \
        fts-tokenizer.c \
        fts-tokenizer-address.c \
+       fts-tokenizer-common.c \
        fts-tokenizer-generic.c \
        $(ICU_SOURCES)
 
@@ -89,6 +90,7 @@ headers = \
        fts-language.h \
        fts-library.h \
        fts-tokenizer.h \
+       fts-tokenizer-common.h \
        fts-tokenizer-private.h \
        fts-tokenizer-generic-private.h
 
@@ -132,7 +134,7 @@ test_fts_language_DEPENDENCIES = $(test_deps)
 endif
 
 test_fts_tokenizer_SOURCES = test-fts-tokenizer.c
-test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo ../lib-mail/libmail.la $(test_libs)
+test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo fts-tokenizer-common.lo ../lib-mail/libmail.la $(test_libs)
 test_fts_tokenizer_DEPENDENCIES = ../lib-mail/libmail.la $(test_deps)
 
 check: check-am check-test
diff --git a/src/lib-fts/fts-tokenizer-common.c b/src/lib-fts/fts-tokenizer-common.c
new file mode 100644 (file)
index 0000000..f71113d
--- /dev/null
@@ -0,0 +1,22 @@
+#include "lib.h"
+#include "unichar.h"
+#include "fts-tokenizer-common.h"
+void
+fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
+                                          size_t *len)
+{
+       size_t pos;
+       unsigned int char_bytes;
+
+       /* the token is truncated - make sure the last character
+          exists entirely in the token */
+       for (pos = *len-1; pos > 0; pos--) {
+               if (UTF8_IS_START_SEQ(data[pos]))
+                       break;
+       }
+       char_bytes = uni_utf8_char_bytes(data[pos]);
+       if (char_bytes != *len-pos) {
+               i_assert(char_bytes > *len-pos);
+               *len = pos;
+       }
+}
diff --git a/src/lib-fts/fts-tokenizer-common.h b/src/lib-fts/fts-tokenizer-common.h
new file mode 100644 (file)
index 0000000..fdd3b16
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef FTS_TOKENIZER_COMMON_H
+#define FTS_TOKENIZER_COMMON_H
+void
+fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
+                                           size_t *len);
+#endif
index dbc3398558e8c4dc6c3518ad1688426c2ea190d1..2ae352e2bb8b4eb75f1750ec361f068fde91bc3c 100644 (file)
@@ -8,6 +8,7 @@
 #include "fts-common.h"
 #include "fts-tokenizer-private.h"
 #include "fts-tokenizer-generic-private.h"
+#include "fts-tokenizer-common.h"
 #include "word-boundary-data.c"
 #include "word-break-data.c"
 
@@ -100,26 +101,6 @@ fts_tokenizer_generic_destroy(struct fts_tokenizer *_tok)
        i_free(tok);
 }
 
-static void
-fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
-                                          size_t *len)
-{
-       size_t pos;
-       unsigned int char_bytes;
-
-       /* the token is truncated - make sure the last character
-          exists entirely in the token */
-       for (pos = *len-1; pos > 0; pos--) {
-               if (UTF8_IS_START_SEQ(data[pos]))
-                       break;
-       }
-       char_bytes = uni_utf8_char_bytes(data[pos]);
-       if (char_bytes != *len-pos) {
-               i_assert(char_bytes > *len-pos);
-               *len = pos;
-       }
-}
-
 static bool
 fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok,
                                            const char **token_r)