From 72c4ef3b44c50c662b37bba93b463b0caeb63a4f Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 21 May 2015 06:35:59 -0400 Subject: [PATCH] lib-fts: Fixed handling tokens that contain only apostrophes --- src/lib-fts/fts-tokenizer-generic.c | 9 +++------ src/lib-fts/test-fts-tokenizer.c | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/lib-fts/fts-tokenizer-generic.c b/src/lib-fts/fts-tokenizer-generic.c index d58484d77e..7730d19c9c 100644 --- a/src/lib-fts/fts-tokenizer-generic.c +++ b/src/lib-fts/fts-tokenizer-generic.c @@ -234,15 +234,12 @@ fts_tokenizer_generic_next_simple(struct fts_tokenizer *_tok, fts_apostrophe_word_break(tok, c)) { len = char_start_i - start; tok_append_truncated(tok, data + start, len); - if (tok->token->used == 0) { - start = i + char_size; - continue; - } - - if (fts_tokenizer_generic_simple_current_token(tok, token_r)) { + if (tok->token->used > 0 && + fts_tokenizer_generic_simple_current_token(tok, token_r)) { *skip_r = i + char_size; return 1; } + start = i + char_size; } } /* word boundary not found yet */ diff --git a/src/lib-fts/test-fts-tokenizer.c b/src/lib-fts/test-fts-tokenizer.c index 94e6166d1b..9668b7a86a 100644 --- a/src/lib-fts/test-fts-tokenizer.c +++ b/src/lib-fts/test-fts-tokenizer.c @@ -29,7 +29,7 @@ static const char *test_inputs[] = { "1.", - "'quoted text' 'word' 'hlo words' you're bad'''word '''pre post'''", + "' ' '' ''' 'quoted text' 'word' 'hlo words' you're bad'''word '''pre post'''", /* whitespace: with Unicode(utf8) U+FF01(ef bc 81)(U+2000(e2 80 80) and U+205A(e2 81 9a) and U+205F(e2 81 9f) */ -- 2.47.3