fts-library.c \
fts-tokenizer.c \
fts-tokenizer-address.c \
+ fts-tokenizer-common.c \
fts-tokenizer-generic.c \
$(ICU_SOURCES)
fts-language.h \
fts-library.h \
fts-tokenizer.h \
+ fts-tokenizer-common.h \
fts-tokenizer-private.h \
fts-tokenizer-generic-private.h
endif
test_fts_tokenizer_SOURCES = test-fts-tokenizer.c
-test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo ../lib-mail/libmail.la $(test_libs)
+test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo fts-tokenizer-common.lo ../lib-mail/libmail.la $(test_libs)
test_fts_tokenizer_DEPENDENCIES = ../lib-mail/libmail.la $(test_deps)
check: check-am check-test
--- /dev/null
+#include "lib.h"
+#include "unichar.h"
+#include "fts-tokenizer-common.h"
+void
+fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
+ size_t *len)
+{
+ size_t pos;
+ unsigned int char_bytes;
+
+ /* the token is truncated - make sure the last character
+ exists entirely in the token */
+ for (pos = *len-1; pos > 0; pos--) {
+ if (UTF8_IS_START_SEQ(data[pos]))
+ break;
+ }
+ char_bytes = uni_utf8_char_bytes(data[pos]);
+ if (char_bytes != *len-pos) {
+ i_assert(char_bytes > *len-pos);
+ *len = pos;
+ }
+}
--- /dev/null
+#ifndef FTS_TOKENIZER_COMMON_H
+#define FTS_TOKENIZER_COMMON_H
+void
+fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
+ size_t *len);
+#endif
#include "fts-common.h"
#include "fts-tokenizer-private.h"
#include "fts-tokenizer-generic-private.h"
+#include "fts-tokenizer-common.h"
#include "word-boundary-data.c"
#include "word-break-data.c"
i_free(tok);
}
-static void
-fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
- size_t *len)
-{
- size_t pos;
- unsigned int char_bytes;
-
- /* the token is truncated - make sure the last character
- exists entirely in the token */
- for (pos = *len-1; pos > 0; pos--) {
- if (UTF8_IS_START_SEQ(data[pos]))
- break;
- }
- char_bytes = uni_utf8_char_bytes(data[pos]);
- if (char_bytes != *len-pos) {
- i_assert(char_bytes > *len-pos);
- *len = pos;
- }
-}
-
static bool
fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok,
const char **token_r)