]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
fts: Added a new lookup API where the backend can look up all the fields
authorTimo Sirainen <tss@iki.fi>
Sun, 13 Jul 2008 15:04:19 +0000 (18:04 +0300)
committerTimo Sirainen <tss@iki.fi>
Sun, 13 Jul 2008 15:04:19 +0000 (18:04 +0300)
using a single query. Implemented it to fts-solr.

--HG--
branch : HEAD

src/plugins/fts-lucene/fts-backend-lucene.c
src/plugins/fts-solr/fts-backend-solr.c
src/plugins/fts-squat/fts-backend-squat.c
src/plugins/fts/fts-api-private.h
src/plugins/fts/fts-api.c
src/plugins/fts/fts-api.h
src/plugins/fts/fts-search.c
src/plugins/fts/fts-storage.h

index b98f7cf13bc6561f568b561943bd882d06f9d62b..99c5a3408a94af6cc32757952df5538d0249bb87 100644 (file)
@@ -216,6 +216,7 @@ struct fts_backend fts_backend_lucene = {
                fts_backend_lucene_lock,
                fts_backend_lucene_unlock,
                fts_backend_lucene_lookup,
+               NULL,
                NULL
        }
 };
index d3d8e28f6fcacb02a2a4587dbef5c8e5208cc2f9..a14bae54f4fe062ae3908640f1da9d7b53df23d4 100644 (file)
@@ -255,37 +255,50 @@ static void fts_backend_solr_unlock(struct fts_backend *backend ATTR_UNUSED)
 {
 }
 
-static int
-fts_backend_solr_lookup(struct fts_backend *backend, const char *key,
-                       enum fts_lookup_flags flags,
-                       ARRAY_TYPE(seq_range) *definite_uids,
-                       ARRAY_TYPE(seq_range) *maybe_uids)
+static int fts_backend_solr_lookup(struct fts_backend_lookup_context *ctx,
+                                  ARRAY_TYPE(seq_range) *definite_uids,
+                                  ARRAY_TYPE(seq_range) *maybe_uids)
 {
+       struct mailbox *box = ctx->backend->box;
+       const struct fts_backend_lookup_field *fields;
+       unsigned int i, count;
        struct mailbox_status status;
        string_t *str;
 
-       i_assert((flags & FTS_LOOKUP_FLAG_INVERT) == 0);
+       mailbox_get_status(box, STATUS_UIDVALIDITY, &status);
 
        str = t_str_new(256);
-       str_append(str, "fl=uid&q=");
-       if ((flags & FTS_LOOKUP_FLAG_HEADER) == 0) {
-               /* body only */
-               i_assert((flags & FTS_LOOKUP_FLAG_BODY) != 0);
-               str_append(str, "body:");
-       } else if ((flags & FTS_LOOKUP_FLAG_BODY) == 0) {
-               /* header only */
-               str_append(str, "hdr:");
-       } else {
-               /* both */
-               str_append(str, "any:");
+       str_printfa(str, "fl=uid&rows=%u&q=", status.uidnext);
+
+       /* build a lucene search query from the fields */
+       fields = array_get(&ctx->fields, &count);
+       for (i = 0; i < count; i++) {
+               if (i > 0)
+                       str_append(str, "%20");
+
+               if ((fields[i].flags & FTS_LOOKUP_FLAG_INVERT) != 0)
+                       str_append_c(str, '-');
+
+               if ((fields[i].flags & FTS_LOOKUP_FLAG_HEADER) == 0) {
+                       /* body only */
+                       i_assert((fields[i].flags & FTS_LOOKUP_FLAG_BODY) != 0);
+                       str_append(str, "body:");
+               } else if ((fields[i].flags & FTS_LOOKUP_FLAG_BODY) == 0) {
+                       /* header only */
+                       str_append(str, "hdr:");
+               } else {
+                       /* both */
+                       str_append(str, "any:");
+               }
+               solr_quote_str(str, fields[i].key);
        }
-       solr_quote_str(str, key);
 
-       mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
-       str_printfa(str, "%%20uidv:%u%%20box:", status.uidvalidity);
-       solr_quote_str(str, backend->box->name);
+       /* use a separate filter query for selecting the mailbox. it shouldn't
+          affect the score and there could be some caching benefits too. */
+       str_printfa(str, "&fq=uidv:%u%%20box:", status.uidvalidity);
+       solr_quote_str(str, box->name);
        str_append(str, "%20user:");
-       solr_quote_str(str, backend->box->storage->user);
+       solr_quote_str(str, box->storage->user);
 
        array_clear(maybe_uids);
        return solr_connection_select(solr_conn, str_c(str), definite_uids);
@@ -306,7 +319,8 @@ struct fts_backend fts_backend_solr = {
                fts_backend_solr_expunge_finish,
                fts_backend_solr_lock,
                fts_backend_solr_unlock,
-               fts_backend_solr_lookup,
-               NULL
+               NULL,
+               NULL,
+               fts_backend_solr_lookup
        }
 };
index 06d22b7c319c44dd29948fef435a46b9a941b9f4..78f6a954920b77f2f725a9f5018caa9cc6867c93 100644 (file)
@@ -252,6 +252,7 @@ struct fts_backend fts_backend_squat = {
                fts_backend_squat_lock,
                fts_backend_squat_unlock,
                fts_backend_squat_lookup,
+               NULL,
                NULL
        }
 };
index 43e6f8934687248f12b9a4fd451955623e5a3678..3b82ab4472b27e1bfcb8d0f049d33b175607772c 100644 (file)
@@ -30,6 +30,10 @@ struct fts_backend_vfuncs {
                      enum fts_lookup_flags flags,
                      ARRAY_TYPE(seq_range) *definite_uids,
                      ARRAY_TYPE(seq_range) *maybe_uids);
+
+       int (*lookup2)(struct fts_backend_lookup_context *ctx,
+                      ARRAY_TYPE(seq_range) *definite_uids,
+                      ARRAY_TYPE(seq_range) *maybe_uids);
 };
 
 enum fts_backend_flags {
@@ -56,7 +60,24 @@ struct fts_backend_build_context {
        unsigned int failed:1;
 };
 
+struct fts_backend_lookup_field {
+       const char *key;
+       enum fts_lookup_flags flags;
+};
+
+struct fts_backend_lookup_context {
+       struct fts_backend *backend;
+       pool_t pool;
+
+       ARRAY_DEFINE(fields, struct fts_backend_lookup_field);
+};
+
 void fts_backend_register(const struct fts_backend *backend);
 void fts_backend_unregister(const char *name);
 
+void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
+                    const ARRAY_TYPE(seq_range) *definite_filter,
+                    ARRAY_TYPE(seq_range) *maybe_dest,
+                    const ARRAY_TYPE(seq_range) *maybe_filter);
+
 #endif
index 49bd11d67427f8068f5d2ee7b1eb81a5ef693dac..91a052f0939e33ede364d5ba522ea070bb553846 100644 (file)
@@ -144,33 +144,6 @@ void fts_backend_unlock(struct fts_backend *backend)
        backend->v.unlock(backend);
 }
 
-static void fts_lookup_invert(ARRAY_TYPE(seq_range) *definite_uids,
-                             const ARRAY_TYPE(seq_range) *maybe_uids)
-{
-       /* we'll begin by inverting definite UIDs */
-       seq_range_array_invert(definite_uids, 1, (uint32_t)-1);
-
-       /* from that list remove UIDs in the maybe list.
-          the maybe list itself isn't touched. */
-       (void)seq_range_array_remove_seq_range(definite_uids, maybe_uids);
-}
-
-int fts_backend_lookup(struct fts_backend *backend, const char *key,
-                      enum fts_lookup_flags flags,
-                      ARRAY_TYPE(seq_range) *definite_uids,
-                      ARRAY_TYPE(seq_range) *maybe_uids)
-{
-       int ret;
-
-       ret = backend->v.lookup(backend, key, flags & ~FTS_LOOKUP_FLAG_INVERT,
-                               definite_uids, maybe_uids);
-       if (unlikely(ret < 0))
-               return -1;
-       if ((flags & FTS_LOOKUP_FLAG_INVERT) != 0)
-               fts_lookup_invert(definite_uids, maybe_uids);
-       return 0;
-}
-
 static void
 fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe,
                  const ARRAY_TYPE(seq_range) *dest_definite,
@@ -206,10 +179,51 @@ fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe,
        }
 }
 
-int fts_backend_filter(struct fts_backend *backend, const char *key,
-                      enum fts_lookup_flags flags,
-                      ARRAY_TYPE(seq_range) *definite_uids,
-                      ARRAY_TYPE(seq_range) *maybe_uids)
+void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
+                    const ARRAY_TYPE(seq_range) *definite_filter,
+                    ARRAY_TYPE(seq_range) *maybe_dest,
+                    const ARRAY_TYPE(seq_range) *maybe_filter)
+{
+       T_BEGIN {
+               fts_merge_maybies(maybe_dest, definite_dest,
+                                 maybe_filter, definite_filter);
+       } T_END;
+       /* keep only what exists in both lists. the rest is in
+          maybies or not wanted */
+       seq_range_array_intersect(definite_dest, definite_filter);
+}
+
+static void fts_lookup_invert(ARRAY_TYPE(seq_range) *definite_uids,
+                             const ARRAY_TYPE(seq_range) *maybe_uids)
+{
+       /* we'll begin by inverting definite UIDs */
+       seq_range_array_invert(definite_uids, 1, (uint32_t)-1);
+
+       /* from that list remove UIDs in the maybe list.
+          the maybe list itself isn't touched. */
+       (void)seq_range_array_remove_seq_range(definite_uids, maybe_uids);
+}
+
+static int fts_backend_lookup(struct fts_backend *backend, const char *key,
+                             enum fts_lookup_flags flags,
+                             ARRAY_TYPE(seq_range) *definite_uids,
+                             ARRAY_TYPE(seq_range) *maybe_uids)
+{
+       int ret;
+
+       ret = backend->v.lookup(backend, key, flags & ~FTS_LOOKUP_FLAG_INVERT,
+                               definite_uids, maybe_uids);
+       if (unlikely(ret < 0))
+               return -1;
+       if ((flags & FTS_LOOKUP_FLAG_INVERT) != 0)
+               fts_lookup_invert(definite_uids, maybe_uids);
+       return 0;
+}
+
+static int fts_backend_filter(struct fts_backend *backend, const char *key,
+                             enum fts_lookup_flags flags,
+                             ARRAY_TYPE(seq_range) *definite_uids,
+                             ARRAY_TYPE(seq_range) *maybe_uids)
 {
        ARRAY_TYPE(seq_range) tmp_definite, tmp_maybe;
        int ret;
@@ -228,15 +242,72 @@ int fts_backend_filter(struct fts_backend *backend, const char *key,
                array_clear(definite_uids);
                array_clear(maybe_uids);
        } else {
-               T_BEGIN {
-                       fts_merge_maybies(maybe_uids, definite_uids,
-                                         &tmp_maybe, &tmp_definite);
-               } T_END;
-               /* keep only what exists in both lists. the rest is in
-                  maybies or not wanted */
-               seq_range_array_intersect(definite_uids, &tmp_definite);
+               fts_filter_uids(definite_uids, &tmp_definite,
+                               maybe_uids, &tmp_maybe);
        }
        array_free(&tmp_maybe);
        array_free(&tmp_definite);
        return ret;
 }
+
+struct fts_backend_lookup_context *
+fts_backend_lookup_init(struct fts_backend *backend)
+{
+       struct fts_backend_lookup_context *ctx;
+       pool_t pool;
+
+       pool = pool_alloconly_create("fts backend lookup", 256);
+       ctx = p_new(pool, struct fts_backend_lookup_context, 1);
+       ctx->pool = pool;
+       ctx->backend = backend;
+       p_array_init(&ctx->fields, pool, 8);
+       return ctx;
+}
+
+void fts_backend_lookup_add(struct fts_backend_lookup_context *ctx,
+                           const char *key, enum fts_lookup_flags flags)
+{
+       struct fts_backend_lookup_field *field;
+
+       field = array_append_space(&ctx->fields);
+       field->key = p_strdup(ctx->pool, key);
+       field->flags = flags;
+}
+
+static int fts_backend_lookup_old(struct fts_backend_lookup_context *ctx,
+                                 ARRAY_TYPE(seq_range) *definite_uids,
+                                 ARRAY_TYPE(seq_range) *maybe_uids)
+{
+       const struct fts_backend_lookup_field *fields;
+       unsigned int i, count;
+
+       fields = array_get(&ctx->fields, &count);
+       i_assert(count > 0);
+
+       if (fts_backend_lookup(ctx->backend, fields[0].key, fields[0].flags,
+                              definite_uids, maybe_uids) < 0)
+               return -1;
+       for (i = 1; i < count; i++) {
+               if (fts_backend_filter(ctx->backend,
+                                      fields[i].key, fields[i].flags,
+                                      definite_uids, maybe_uids) < 0)
+                       return -1;
+       }
+       return 0;
+}
+
+int fts_backend_lookup_deinit(struct fts_backend_lookup_context **_ctx,
+                             ARRAY_TYPE(seq_range) *definite_uids,
+                             ARRAY_TYPE(seq_range) *maybe_uids)
+{
+       struct fts_backend_lookup_context *ctx = *_ctx;
+       int ret;
+
+       *_ctx = NULL;
+       if (ctx->backend->v.lookup2 != NULL)
+               ret = ctx->backend->v.lookup2(ctx, definite_uids, maybe_uids);
+       else
+               ret = fts_backend_lookup_old(ctx, definite_uids, maybe_uids);
+       pool_unref(&ctx->pool);
+       return ret;
+}
index 49fa1aaa0f7869e98b92933fb0ae5b4569101244..196c467dd6cc83fe31bd97b7f7f8459ea1db6f7b 100644 (file)
@@ -52,17 +52,15 @@ void fts_backend_expunge_finish(struct fts_backend *backend,
 int fts_backend_lock(struct fts_backend *backend);
 void fts_backend_unlock(struct fts_backend *backend);
 
-/* Lookup key from the index and return the found UIDs in result. */
-int fts_backend_lookup(struct fts_backend *backend, const char *key,
-                      enum fts_lookup_flags flags,
-                      ARRAY_TYPE(seq_range) *definite_uids,
-                      ARRAY_TYPE(seq_range) *maybe_uids);
-/* Drop UIDs from the result list for which the key doesn't exist. The idea
-   is that with multiple search keywords you first lookup one and then filter
-   the rest. */
-int fts_backend_filter(struct fts_backend *backend, const char *key,
-                      enum fts_lookup_flags flags,
-                      ARRAY_TYPE(seq_range) *definite_uids,
-                      ARRAY_TYPE(seq_range) *maybe_uids);
+/* Start building a FTS lookup. */
+struct fts_backend_lookup_context *
+fts_backend_lookup_init(struct fts_backend *backend);
+/* Add a new search key to the lookup. */
+void fts_backend_lookup_add(struct fts_backend_lookup_context *ctx,
+                           const char *key, enum fts_lookup_flags flags);
+/* Finish the lookup and return found UIDs. */
+int fts_backend_lookup_deinit(struct fts_backend_lookup_context **ctx,
+                             ARRAY_TYPE(seq_range) *definite_uids,
+                             ARRAY_TYPE(seq_range) *maybe_uids);
 
 #endif
index 7b347953336264751974f4c1c725698cdaf7db7e..00c3f6eb8205e2127a11513c055374771a3cfbe2 100644 (file)
@@ -42,9 +42,10 @@ static void fts_uid_results_to_seq(struct fts_search_context *fctx)
 }
 
 static int fts_search_lookup_arg(struct fts_search_context *fctx,
-                                struct mail_search_arg *arg, bool filter)
+                                struct mail_search_arg *arg)
 {
        struct fts_backend *backend;
+       struct fts_backend_lookup_context **lookup_ctx_p;
        enum fts_lookup_flags flags = 0;
        const char *key;
        string_t *key_utf8;
@@ -78,7 +79,6 @@ static int fts_search_lookup_arg(struct fts_search_context *fctx,
                break;
        default:
                /* can't filter this */
-               i_assert(filter);
                return 0;
        }
        if (arg->not)
@@ -96,14 +96,16 @@ static int fts_search_lookup_arg(struct fts_search_context *fctx,
                ret = 0;
        } else if (!backend->locked && fts_backend_lock(backend) <= 0)
                ret = -1;
-       else if (!filter) {
-               ret = fts_backend_lookup(backend, str_c(key_utf8), flags,
-                                        &fctx->definite_seqs,
-                                        &fctx->maybe_seqs);
-       } else {
-               ret = fts_backend_filter(backend, str_c(key_utf8), flags,
-                                        &fctx->definite_seqs,
-                                        &fctx->maybe_seqs);
+       else {
+               ret = 0;
+               if (backend == fctx->fbox->backend_substr)
+                       lookup_ctx_p = &fctx->lookup_ctx_substr;
+               else
+                       lookup_ctx_p = &fctx->lookup_ctx_fast;
+
+               if (*lookup_ctx_p == NULL)
+                       *lookup_ctx_p = fts_backend_lookup_init(backend);
+               fts_backend_lookup_add(*lookup_ctx_p, str_c(key_utf8), flags);
        }
        return ret;
 }
@@ -111,6 +113,7 @@ static int fts_search_lookup_arg(struct fts_search_context *fctx,
 void fts_search_lookup(struct fts_search_context *fctx)
 {
        struct mail_search_arg *arg;
+       bool have_seqs;
        int ret;
 
        if (fctx->best_arg == NULL)
@@ -119,25 +122,53 @@ void fts_search_lookup(struct fts_search_context *fctx)
        i_array_init(&fctx->definite_seqs, 64);
        i_array_init(&fctx->maybe_seqs, 64);
 
-       /* start filtering with the best arg */
+       /* start lookup with the best arg */
        T_BEGIN {
-               ret = fts_search_lookup_arg(fctx, fctx->best_arg, FALSE);
+               ret = fts_search_lookup_arg(fctx, fctx->best_arg);
        } T_END;
        /* filter the rest */
        for (arg = fctx->args->args; arg != NULL && ret == 0; arg = arg->next) {
                if (arg != fctx->best_arg) {
                        T_BEGIN {
-                               ret = fts_search_lookup_arg(fctx, arg, TRUE);
+                               ret = fts_search_lookup_arg(fctx, arg);
                        } T_END;
                }
        }
 
-       if (fctx->fbox->backend_fast != NULL &&
-           fctx->fbox->backend_fast->locked)
-               fts_backend_unlock(fctx->fbox->backend_fast);
-       if (fctx->fbox->backend_substr != NULL &&
-           fctx->fbox->backend_substr->locked)
-               fts_backend_unlock(fctx->fbox->backend_substr);
+       have_seqs = FALSE;
+       if (fctx->fbox->backend_fast != NULL) {
+               if (fctx->lookup_ctx_fast != NULL) {
+                       have_seqs = TRUE;
+                       fts_backend_lookup_deinit(&fctx->lookup_ctx_fast,
+                                                 &fctx->definite_seqs,
+                                                 &fctx->maybe_seqs);
+               }
+               if (fctx->fbox->backend_fast->locked)
+                       fts_backend_unlock(fctx->fbox->backend_fast);
+       }
+       if (fctx->fbox->backend_substr != NULL) {
+               if (fctx->lookup_ctx_substr == NULL) {
+                       /* no substr lookups */
+               } else if (!have_seqs) {
+                       fts_backend_lookup_deinit(&fctx->lookup_ctx_substr,
+                                                 &fctx->definite_seqs,
+                                                 &fctx->maybe_seqs);
+               } else {
+                       /* have to merge the results */
+                       ARRAY_TYPE(seq_range) tmp_def, tmp_maybe;
+
+                       i_array_init(&tmp_def, 64);
+                       i_array_init(&tmp_maybe, 64);
+                       fts_backend_lookup_deinit(&fctx->lookup_ctx_substr,
+                                                 &tmp_def, &tmp_maybe);
+                       fts_filter_uids(&fctx->definite_seqs, &tmp_def,
+                                       &fctx->maybe_seqs, &tmp_maybe);
+                       array_free(&tmp_def);
+                       array_free(&tmp_maybe);
+               }
+               if (fctx->fbox->backend_substr->locked)
+                       fts_backend_unlock(fctx->fbox->backend_substr);
+       }
 
        if (ret == 0) {
                fctx->seqs_set = TRUE;
index f127247201479f9f7876d28edd616da62923cc8b..5dec96fd73577ad64ff577db4811043f6c953b12 100644 (file)
@@ -18,6 +18,7 @@ struct fts_search_context {
        struct mail_search_args *args;
        struct mail_search_arg *best_arg;
 
+       struct fts_backend_lookup_context *lookup_ctx_substr, *lookup_ctx_fast;
        ARRAY_TYPE(seq_range) definite_seqs, maybe_seqs;
        unsigned int definite_idx, maybe_idx;