]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
fts-flatcurve: Fix maybe queries
authorMarco Bettini <marco.bettini@open-xchange.com>
Fri, 24 Jan 2025 16:06:17 +0000 (16:06 +0000)
committeraki.tuomi <aki.tuomi@open-xchange.com>
Thu, 20 Feb 2025 13:57:08 +0000 (13:57 +0000)
src/plugins/fts-flatcurve/fts-backend-flatcurve-xapian.cc
src/plugins/fts-flatcurve/fts-backend-flatcurve-xapian.h
src/plugins/fts-flatcurve/fts-backend-flatcurve.c
src/plugins/fts-flatcurve/fts-backend-flatcurve.h

index e24a81821d8dc162236cbb78e0769a5cbab972b4..0873b3515dc234aea88f2e35090c61a5962da499 100644 (file)
@@ -3,6 +3,7 @@
 
 extern "C" {
 #include "lib.h"
+#include "array.h"
 #include "file-create-locked.h"
 #include "hash.h"
 #include "message-header-parser.h"
@@ -159,10 +160,19 @@ struct flatcurve_xapian {
        bool deinit:1;
 };
 
-struct flatcurve_fts_query_xapian {
+struct flatcurve_fts_query_xapian_maybe {
        Xapian::Query *query;
 };
 
+ struct flatcurve_fts_query_xapian {
+       Xapian::Query *query;
+       ARRAY(struct flatcurve_fts_query_xapian_maybe) maybe_queries;
+
+       bool and_search:1;
+       bool maybe:1;
+       bool start:1;
+ };
+
 struct flatcurve_xapian_db_iter {
        struct flatcurve_fts_backend *backend;
        DIR *dirp;
@@ -200,6 +210,8 @@ struct fts_flatcurve_xapian_query_iter {
        Xapian::Database *db;
        Xapian::Enquire *enquire;
        Xapian::MSetIterator mset_iter;
+       int curr_query;
+       bool next_query:1;
 };
 
 static int
@@ -1964,12 +1976,14 @@ fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
                                   const char *term)
 {
        const char *hdr;
+       bool maybe_or = FALSE;
+       struct flatcurve_fts_query_xapian_maybe *mquery;
        Xapian::Query::op op = Xapian::Query::OP_INVALID;
        Xapian::Query *oldq, q;
        struct flatcurve_fts_query_xapian *x = query->xapian;
 
-       if (x->query != NULL) {
-               if ((query->flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0) {
+       if (x->start) {
+               if (x->and_search) {
                        op = Xapian::Query::OP_AND;
                        str_append(query->qtext, " AND ");
                } else {
@@ -1977,6 +1991,7 @@ fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
                        str_append(query->qtext, " OR ");
                }
        }
+       x->start = TRUE;
 
        if (arg->match_not)
                str_append(query->qtext, "NOT ");
@@ -2028,7 +2043,10 @@ fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
                                 * appears in the general pool of header
                                 * terms for the message, not to a specific
                                 * header, so this is only a maybe match. */
-                               query->maybe = TRUE;
+                               if (x->and_search)
+                                       x->maybe = TRUE;
+                               else
+                                       maybe_or = TRUE;
                        }
                } else {
                        hdr = t_str_lcase(arg->hdr_field_name);
@@ -2046,9 +2064,17 @@ fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
                q = Xapian::Query(Xapian::Query::OP_AND_NOT,
                                  Xapian::Query::MatchAll, q);
 
-       if (x->query == NULL)
+       if (maybe_or) {
+               /* Maybe searches are not added to the "master search" query if this
+                * is an OR search; they will be run independently. Matches will be
+                * placed in the maybe results array. */
+               if (!array_is_created(&x->maybe_queries))
+                       p_array_init(&x->maybe_queries, query->pool, 4);
+               mquery = array_append_space(&x->maybe_queries);
+               mquery->query = new Xapian::Query(std_move(q));
+       } else if (x->query == NULL) {
                x->query = new Xapian::Query(std_move(q));
-       else {
+       else {
                oldq = x->query;
                x->query = new Xapian::Query(op, *(x->query), q);
                delete(oldq);
@@ -2125,6 +2151,7 @@ void fts_flatcurve_xapian_build_query(struct flatcurve_fts_query *query)
        struct mail_search_arg *args;
 
        query->xapian = p_new(query->pool, struct flatcurve_fts_query_xapian, 1);
+       query->xapian->and_search = ((query->flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0);
        for (args = query->args; args != NULL ; args = args->next)
                fts_flatcurve_build_query_arg(query, args);
 }
@@ -2134,6 +2161,8 @@ fts_flatcurve_xapian_query_iter_init(struct flatcurve_fts_query *query)
 {
        struct fts_flatcurve_xapian_query_iter *iter;
        iter = new fts_flatcurve_xapian_query_iter();
+       iter->curr_query = -1;
+       iter->next_query = TRUE;
        iter->query = query;
        iter->result = p_new(query->pool,
                             struct fts_flatcurve_xapian_query_result, 1);
@@ -2151,22 +2180,45 @@ fts_flatcurve_xapian_query_iter_next(struct fts_flatcurve_xapian_query_iter *ite
                return FALSE;
 
        Xapian::MSet m;
-       if (iter->enquire == NULL) {
-               if (iter->query->xapian->query == NULL)
-                       return FALSE;
+       Xapian::Query *q = NULL;
+       if (iter->next_query) {
+               iter->next_query = FALSE;
 
-               const char *error;
-               int ret = fts_flatcurve_xapian_read_db(
-                       iter->query->backend, opts, &iter->db, &error);
-               if (ret < 0)
-                       iter->error = i_strdup(error);
-               if (ret <= 0)
+               if (iter->curr_query == -1) {
+                       if (iter->query->xapian->query == NULL)
+                               ++iter->curr_query;
+                       else
+                               q = iter->query->xapian->query;
+               }
+
+               /* Maybe queries. */
+               if ((iter->curr_query >= 0) &&
+                   (array_not_empty(&iter->query->xapian->maybe_queries)) &&
+                   (array_count(&iter->query->xapian->maybe_queries) > iter->curr_query)) {
+                       const struct flatcurve_fts_query_xapian_maybe *mquery;
+                       mquery = array_idx(&iter->query->xapian->maybe_queries,
+                                                          iter->curr_query);
+                       q = mquery->query;
+               }
+
+               if (q == NULL)
                        return FALSE;
 
-               iter->enquire = new Xapian::Enquire(*iter->db);
-               iter->enquire->set_docid_order(
-                               Xapian::Enquire::DONT_CARE);
-               iter->enquire->set_query(*iter->query->xapian->query);
+               if (iter->db == NULL) {
+                       const char *error;
+                       int ret = fts_flatcurve_xapian_read_db(
+                               iter->query->backend, opts, &iter->db, &error);
+                       if (ret < 0)
+                               iter->error = i_strdup(error);
+                       if (ret <= 0)
+                               return FALSE;
+               }
+
+               if (iter->enquire == NULL) {
+                       iter->enquire = new Xapian::Enquire(*iter->db);
+                       iter->enquire->set_docid_order(Xapian::Enquire::DONT_CARE);
+               }
+               iter->enquire->set_query(*q);
 
                try {
                        m = iter->enquire->get_mset(0, iter->db->get_doccount());
@@ -2186,9 +2238,13 @@ fts_flatcurve_xapian_query_iter_next(struct fts_flatcurve_xapian_query_iter *ite
                iter->mset_iter = m.begin();
        }
 
-       if (iter->mset_iter == m.end())
-               return FALSE;
+       if (iter->mset_iter == m.end()) {
+               ++iter->curr_query;
+               iter->next_query = TRUE;
+               return fts_flatcurve_xapian_query_iter_next(iter, result_r);
+       }
 
+       iter->result->maybe = (iter->curr_query >= 0);
        iter->result->score = iter->mset_iter.get_weight();
        /* MSet docid can be an "interleaved" docid generated by
         * Xapian::Database when handling multiple DBs at once. Instead, we
@@ -2234,7 +2290,10 @@ int fts_flatcurve_xapian_run_query(struct flatcurve_fts_query *query,
 
        iter = fts_flatcurve_xapian_query_iter_init(query);
        while (fts_flatcurve_xapian_query_iter_next(iter, &result)) {
-               seq_range_array_add(&r->uids, result->uid);
+               if (result->maybe || query->xapian->maybe)
+                       seq_range_array_add(&r->maybe_uids, result->uid);
+               else
+                       seq_range_array_add(&r->uids, result->uid);
                score = array_append_space(&r->scores);
                score->score = (float)result->score;
                score->uid = result->uid;
@@ -2245,6 +2304,14 @@ int fts_flatcurve_xapian_run_query(struct flatcurve_fts_query *query,
 void fts_flatcurve_xapian_destroy_query(struct flatcurve_fts_query *query)
 {
        delete(query->xapian->query);
+
+       if (array_is_created(&query->xapian->maybe_queries)) {
+               struct flatcurve_fts_query_xapian_maybe *mquery;
+               array_foreach_modifiable(&query->xapian->maybe_queries, mquery) {
+                       delete(mquery->query);
+               }
+               array_free(&query->xapian->maybe_queries);
+       }
 }
 
 const char *fts_flatcurve_xapian_library_version()
index 0e1d5d08fdd8530e7eda9cefe55da4103ee7fb86..aa6551acd8a5c59f0eb05965db23d9c21841542c 100644 (file)
@@ -7,6 +7,8 @@
 struct fts_flatcurve_xapian_query_result {
        double score;
        uint32_t uid;
+
+       bool maybe:1;
 };
 
 struct fts_flatcurve_xapian_db_check {
index 9be6f2505d8d7f61f2bfc0404aa91ef9bd377a7b..7e663e5ee5f7fcf37c01496d7faf28913d3808a0 100644 (file)
@@ -590,6 +590,7 @@ fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
                r->box = boxes[i];
 
                fresult = p_new(result->pool, struct flatcurve_fts_result, 1);
+               p_array_init(&fresult->maybe_uids, result->pool, 32);
                p_array_init(&fresult->scores, result->pool, 32);
                p_array_init(&fresult->uids, result->pool, 32);
 
@@ -603,10 +604,8 @@ fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
                        break;
                }
 
-               if (query->maybe)
-                       r->maybe_uids = fresult->uids;
-               else
-                       r->definite_uids = fresult->uids;
+               r->definite_uids = fresult->uids;
+               r->maybe_uids = fresult->maybe_uids;
                r->scores = fresult->scores;
 
                if (str_len(query->qtext) == 0) {
@@ -615,17 +614,25 @@ fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
                }
 
                T_BEGIN {
-                       const char *u = fts_backend_flatcurve_seq_range_string(&fresult->uids);
+                       const char *m_debug = "", *u_debug = "";
+
+                       if (array_not_empty(&fresult->maybe_uids))
+                               m_debug = fts_backend_flatcurve_seq_range_string(
+                                                               &fresult->maybe_uids);
+                       if (array_not_empty(&fresult->uids))
+                               u_debug = fts_backend_flatcurve_seq_range_string(
+                                                               &fresult->uids);
+
                        e_debug(event_create_passthrough(backend->event)->
                                set_name("fts_flatcurve_query")->
-                               add_int("count", array_count(&fresult->uids))->
+                               add_int("count", seq_range_count(&fresult->uids))->
                                add_str("mailbox", r->box->vname)->
-                               add_str("maybe", query->maybe ? "yes" : "no")->
+                               add_str("maybe_uids", m_debug)->
                                add_str("query", str_c(query->qtext))->
-                               add_str("uids", u)->event(), "Query (%s) "
-                               "%smatches=%d uids=%s", str_c(query->qtext),
-                               query->maybe ? "maybe_" : "",
-                               array_count(&fresult->uids), u);
+                               add_str("uids", u_debug)->event(), "Query (%s) "
+                               "matches=%d uids=%s maybe_matches=%d maybe_uids=%s",
+                               str_c(query->qtext), seq_range_count(&fresult->uids),
+                               u_debug, seq_range_count(&fresult->maybe_uids), m_debug);
                } T_END;
        }
 
index deb96975315f5d449303b5b0f3bd32546a153719..c2e1c6360e0ebcde925e96bfd92cf8e8dc9d0583 100644 (file)
@@ -46,12 +46,11 @@ struct flatcurve_fts_query {
        struct flatcurve_fts_query_xapian *xapian;
 
        pool_t pool;
-
-       bool maybe:1;
 };
 
 struct flatcurve_fts_result {
        ARRAY_TYPE(fts_score_map) scores;
+       ARRAY_TYPE(seq_range) maybe_uids;
        ARRAY_TYPE(seq_range) uids;
 };