]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-fts: Fixed handling tokens that contain only apostrophes
authorTimo Sirainen <tss@iki.fi>
Thu, 21 May 2015 10:35:59 +0000 (06:35 -0400)
committerTimo Sirainen <tss@iki.fi>
Thu, 21 May 2015 10:35:59 +0000 (06:35 -0400)
src/lib-fts/fts-tokenizer-generic.c
src/lib-fts/test-fts-tokenizer.c

index d58484d77e24ee70262182220b36a2824e03a4a9..7730d19c9c94267a49a0a66dbb5a86274110a88c 100644 (file)
@@ -234,15 +234,12 @@ fts_tokenizer_generic_next_simple(struct fts_tokenizer *_tok,
                    fts_apostrophe_word_break(tok, c)) {
                        len = char_start_i - start;
                        tok_append_truncated(tok, data + start, len);
-                       if (tok->token->used == 0) {
-                               start = i + char_size;
-                               continue;
-                       }
-
-                       if (fts_tokenizer_generic_simple_current_token(tok, token_r)) {
+                       if (tok->token->used > 0 &&
+                           fts_tokenizer_generic_simple_current_token(tok, token_r)) {
                                *skip_r = i + char_size;
                                return 1;
                        }
+                       start = i + char_size;
                }
        }
        /* word boundary not found yet */
index 94e6166d1b0f8ee916db5b18d9b8eab7cf84561a..9668b7a86ab6fbc2317f9778c6e911b0763ab8bf 100644 (file)
@@ -29,7 +29,7 @@ static const char *test_inputs[] = {
 
        "1.",
 
-       "'quoted text' 'word' 'hlo words' you're bad'''word '''pre post'''",
+       "' ' '' ''' 'quoted text' 'word' 'hlo words' you're bad'''word '''pre post'''",
 
        /* whitespace: with Unicode(utf8) U+FF01(ef bc 81)(U+2000(e2 80 80) and
           U+205A(e2 81 9a) and U+205F(e2 81 9f) */