lib-fts: tokenizer-generic - recognise request for explicit prefix searching

author Phil Carmody <phil@dovecot.fi>

Wed, 16 May 2018 13:32:35 +0000 (16:32 +0300)

committer Ville Savolainen <ville.savolainen@dovecot.fi>

Tue, 12 Feb 2019 13:40:45 +0000 (15:40 +0200)
author Phil Carmody <phil@dovecot.fi>
Wed, 16 May 2018 13:32:35 +0000 (16:32 +0300)
committer Ville Savolainen <ville.savolainen@dovecot.fi>
Tue, 12 Feb 2019 13:40:45 +0000 (15:40 +0200)
diff --git a/src/lib-fts/fts-tokenizer-generic-private.h b/src/lib-fts/fts-tokenizer-generic-private.h

index e9b8f597afddb30541876e0f33a8060f05f7f806..2669023bc5a665d92a7ae6d6396c5aa8bc620067 100644 (file)
--- a/src/lib-fts/fts-tokenizer-generic-private.h
+++ b/src/lib-fts/fts-tokenizer-generic-private.h
@@ -40,6 +40,7 @@ enum boundary_algorithm {
  struct generic_fts_tokenizer {
         struct fts_tokenizer tokenizer;
         unsigned int max_length;
+       bool prefixsplat; /* for search strings, accept a trailing '*' for explicit prefix */
         bool wb5a; /* TR29 rule for prefix separation
                       in e.g. French or Italian. */
         bool seen_wb5a;
diff --git a/src/lib-fts/fts-tokenizer-generic.c b/src/lib-fts/fts-tokenizer-generic.c

index 1bcc8db4eb12ced622d25d43b2ceceff41bdd8ad..797e4316166798bf1bab5cbccbe0ae4d11ccccf9 100644 (file)
--- a/src/lib-fts/fts-tokenizer-generic.c
+++ b/src/lib-fts/fts-tokenizer-generic.c
@@ -36,6 +36,8 @@ fts_tokenizer_generic_create(const char *const *settings,
         unsigned int max_length = FTS_DEFAULT_TOKEN_MAX_LENGTH;
         enum boundary_algorithm algo = BOUNDARY_ALGORITHM_SIMPLE;
         bool wb5a = FALSE;
+       bool search = FALSE;
+       bool explicitprefix = FALSE;
         unsigned int i;
  
         for (i = 0; settings[i] != NULL; i += 2) {
@@ -61,17 +63,24 @@ fts_tokenizer_generic_create(const char *const *settings,
                 } else if (strcmp(key, "search") == 0) {
                         /* tokenizing a search string -
                            makes no difference to us */
+                       search = TRUE;
                 } else if (strcasecmp(key, "wb5a") == 0) {
                         if (strcasecmp(value, "no") == 0)
                                 wb5a = FALSE;
                         else
                                 wb5a = TRUE;
+               } else if (strcasecmp(key, "explicitprefix") == 0) {
+                       explicitprefix = TRUE;
                 } else {
                         *error_r = t_strdup_printf("Unknown setting: %s", key);
                         return -1;
                 }
         }
  
+       /* Tokenise normally unless tokenising an explicit prefix query */
+       if (!search)
+               explicitprefix = FALSE;
+
         if (wb5a && algo != BOUNDARY_ALGORITHM_TR29) {
                 *error_r = "Can not use WB5a for algorithms other than TR29.";
                 return -1;
@@ -85,6 +94,7 @@ fts_tokenizer_generic_create(const char *const *settings,
         tok->max_length = max_length;
         tok->algorithm = algo;
         tok->wb5a = wb5a;
+       tok->prefixsplat = explicitprefix;
         tok->token = buffer_create_dynamic(default_pool, 64);
  
         *tokenizer_r = &tok->tokenizer;
author	Phil Carmody <phil@dovecot.fi>
	Wed, 16 May 2018 13:32:35 +0000 (16:32 +0300)
committer	Ville Savolainen <ville.savolainen@dovecot.fi>
	Tue, 12 Feb 2019 13:40:45 +0000 (15:40 +0200)
src/lib-fts/fts-tokenizer-generic-private.h		patch \| blob \| blame \| history
src/lib-fts/fts-tokenizer-generic.c		patch \| blob \| blame \| history