]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-fts: generic simple tokeniser - distinguish "letters" from non-"letters"
authorPhil Carmody <phil@dovecot.fi>
Fri, 11 May 2018 10:34:40 +0000 (13:34 +0300)
committerVille Savolainen <ville.savolainen@dovecot.fi>
Tue, 12 Feb 2019 13:40:45 +0000 (15:40 +0200)
prev_type is only compared against SINGLE_QUOTE, so there will be no
behavioural differences. However, maintaining the state that we've just
seen something we are prepared to search for (very loosely, a "letter")
rather than something that we threw away (word breaks) will be important
when it comes to explicit prefix query parsing.

Signed-off-by: Phil Carmody <phil@dovecot.fi>
src/lib-fts/fts-tokenizer-generic.c

index 0ac61dea38303a9ff824f156006d0357599731d2..1bcc8db4eb12ced622d25d43b2ceceff41bdd8ad 100644 (file)
@@ -255,7 +255,10 @@ fts_tokenizer_generic_simple_next(struct fts_tokenizer *_tok,
                        start = i + char_size;
                        shift_prev_type(tok, LETTER_TYPE_SINGLE_QUOTE);
                } else {
-                       shift_prev_type(tok, LETTER_TYPE_NONE);
+                       /* Lie slightly about the type. This is anything that
+                          we're not skipping or cutting on and are prepared to
+                          search for - it's "as good as" a letter. */
+                       shift_prev_type(tok, LETTER_TYPE_ALETTER);
                }
        }
        /* word boundary not found yet */