]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
fts-lucene: Use fts expunge log instead of scanning the entire index for expunged...
authorTimo Sirainen <tss@iki.fi>
Fri, 12 Aug 2011 14:39:19 +0000 (17:39 +0300)
committerTimo Sirainen <tss@iki.fi>
Fri, 12 Aug 2011 14:39:19 +0000 (17:39 +0300)
src/plugins/fts-lucene/fts-backend-lucene.c
src/plugins/fts-lucene/lucene-wrapper.cc
src/plugins/fts-lucene/lucene-wrapper.h

index bb88c875ada81d6588347f9ad327dcfd766e0ddf..24f1758051c43da2a3ce5abb40dcb86c5d629fc2 100644 (file)
@@ -4,16 +4,16 @@
 #include "array.h"
 #include "hash.h"
 #include "hex-binary.h"
-#include "file-dotlock.h"
 #include "mail-namespace.h"
 #include "mail-storage-private.h"
+#include "fts-expunge-log.h"
 #include "lucene-wrapper.h"
 #include "fts-lucene-plugin.h"
 
 #include <wchar.h>
 
 #define LUCENE_INDEX_DIR_NAME "lucene-indexes"
-#define LUCENE_EXPUNGE_FILENAME "pending-expunges"
+#define LUCENE_EXPUNGE_LOG_NAME "dovecot-expunges.log"
 #define LUCENE_OPTIMIZE_BATCH_MSGS_COUNT 100
 
 struct lucene_fts_backend {
@@ -23,6 +23,9 @@ struct lucene_fts_backend {
        struct lucene_index *index;
        struct mailbox *selected_box;
        unsigned int selected_box_generation;
+       mail_guid_128_t selected_box_guid;
+
+       struct fts_expunge_log *expunge_log;
 
        unsigned int dir_created:1;
        unsigned int updating:1;
@@ -37,8 +40,10 @@ struct lucene_fts_backend_update_context {
        uint32_t uid;
        char *hdr_name;
 
-       unsigned int added_msgs, expunges;
+       unsigned int added_msgs;
        bool lucene_opened;
+
+       struct fts_expunge_log_append_ctx *expunge_ctx;
 };
 
 static int fts_backend_lucene_mkdir(struct lucene_fts_backend *backend)
@@ -94,8 +99,11 @@ fts_backend_select(struct lucene_fts_backend *backend, struct mailbox *box)
                lucene_index_select_mailbox(backend->index, wguid_hex);
        } else {
                lucene_index_unselect_mailbox(backend->index);
+               memset(&guid, 0, sizeof(guid));
        }
        backend->selected_box = box;
+       memcpy(backend->selected_box_guid, guid,
+              sizeof(backend->selected_box_guid));
        backend->selected_box_generation =
                box == NULL ? 0 : box->generation_sequence;
        return 0;
@@ -133,6 +141,9 @@ fts_backend_lucene_init(struct fts_backend *_backend,
                backend->index = lucene_index_init(backend->dir_path,
                                                   NULL, NULL);
        }
+
+       path = t_strconcat(backend->dir_path, "/"LUCENE_EXPUNGE_LOG_NAME, NULL);
+       backend->expunge_log = fts_expunge_log_init(path);
        return 0;
 }
 
@@ -142,6 +153,7 @@ static void fts_backend_lucene_deinit(struct fts_backend *_backend)
                (struct lucene_fts_backend *)_backend;
 
        lucene_index_deinit(backend->index);
+       fts_expunge_log_deinit(&backend->expunge_log);
        i_free(backend->dir_path);
        i_free(backend);
 }
@@ -190,62 +202,23 @@ fts_backend_lucene_need_optimize(struct lucene_fts_backend_update_context *ctx)
 {
        struct lucene_fts_backend *backend =
                (struct lucene_fts_backend *)ctx->ctx.backend;
-       const struct dotlock_settings dotlock_set = {
-               .timeout = 1,
-               .stale_timeout = 30,
-               .use_excl_lock = TRUE
-       };
-       struct dotlock *dotlock;
-       const char *path;
-       char buf[MAX_INT_STRLEN+1];
-       unsigned int expunges = 0;
+       unsigned int expunges;
        uint32_t numdocs;
-       int fdw, fdr, ret;
-
-       if (ctx->added_msgs >= LUCENE_OPTIMIZE_BATCH_MSGS_COUNT)
-               return TRUE;
-
-       if (ctx->expunges == 0)
-               return FALSE;
-
-       /* update pending expunges count */
-       path = t_strconcat(backend->dir_path, "/"LUCENE_EXPUNGE_FILENAME, NULL);
-       fdw = file_dotlock_open(&dotlock_set, path, 0, &dotlock);
-       if (fdw == -1)
-               return FALSE;
-
-       fdr = open(path, O_RDONLY);
-       if (fdr == -1) {
-               if (errno != ENOENT)
-                       i_error("open(%s) failed: %m", path);
-       } else {
-               ret = read(fdr, buf, sizeof(buf)-1);
-               if (ret < 0)
-                       i_error("read(%s) failed: %m", path);
-               else {
-                       buf[ret] = '\0';
-                       if (str_to_uint(buf, &expunges) < 0)
-                               i_error("%s is corrupted: '%s'", path, buf);
-               }
-               if (close(fdr) < 0)
-                       i_error("close(%s) failed: %m", path);
-       }
-       expunges += ctx->expunges;
-
-       i_snprintf(buf, sizeof(buf), "%u", expunges);
-       if (write(fdw, buf, strlen(buf)) < 0)
-               i_error("write(%s) failed: %m", path);
-       (void)file_dotlock_replace(&dotlock, 0);
 
        if (!ctx->ctx.backend->syncing) {
                /* only indexer process can actually do anything
                   about optimizing */
                return FALSE;
        }
+       if (ctx->added_msgs >= LUCENE_OPTIMIZE_BATCH_MSGS_COUNT)
+               return TRUE;
        if (lucene_index_get_doc_count(backend->index, &numdocs) < 0)
                return FALSE;
 
-       return numdocs / expunges <= 50; /* >2% of index has been expunged */
+       if (fts_expunge_log_uid_count(backend->expunge_log, &expunges) < 0)
+               return FALSE;
+       return expunges > 0 &&
+               numdocs / expunges <= 50; /* >2% of index has been expunged */
 }
 
 static int
@@ -263,6 +236,11 @@ fts_backend_lucene_update_deinit(struct fts_backend_update_context *_ctx)
        if (ctx->lucene_opened)
                lucene_index_build_deinit(backend->index);
 
+       if (ctx->expunge_ctx != NULL) {
+               if (fts_expunge_log_append_commit(&ctx->expunge_ctx) < 0)
+                       ret = -1;
+       }
+
        if (fts_backend_lucene_need_optimize(ctx))
                (void)fts_backend_optimize(_ctx->backend);
 
@@ -286,12 +264,23 @@ fts_backend_lucene_update_set_mailbox(struct fts_backend_update_context *_ctx,
 
 static void
 fts_backend_lucene_update_expunge(struct fts_backend_update_context *_ctx,
-                                 uint32_t uid ATTR_UNUSED)
+                                 uint32_t uid)
 {
        struct lucene_fts_backend_update_context *ctx =
                (struct lucene_fts_backend_update_context *)_ctx;
+       struct lucene_fts_backend *backend =
+               (struct lucene_fts_backend *)_ctx->backend;
 
-       ctx->expunges++;
+       if (ctx->expunge_ctx == NULL) {
+               ctx->expunge_ctx =
+                       fts_expunge_log_append_begin(backend->expunge_log);
+       }
+
+       if (fts_backend_select(backend, ctx->box) < 0)
+               _ctx->failed = TRUE;
+
+       fts_expunge_log_append_next(ctx->expunge_ctx,
+                                   backend->selected_box_guid, uid);
 }
 
 static bool
@@ -379,15 +368,28 @@ fts_backend_lucene_refresh(struct fts_backend *_backend)
        return 0;
 }
 
+static int fts_backend_lucene_rescan(struct fts_backend *_backend)
+{
+       struct lucene_fts_backend *backend =
+               (struct lucene_fts_backend *)_backend;
+
+       return lucene_index_rescan(backend->index, _backend->ns->list);
+}
+
 static int fts_backend_lucene_optimize(struct fts_backend *_backend)
 {
        struct lucene_fts_backend *backend =
                (struct lucene_fts_backend *)_backend;
        int ret;
 
-       ret = lucene_index_rescan(backend->index, _backend->ns->list);
-       if (lucene_index_optimize(backend->index) < 0)
-               ret = -1;
+       ret = lucene_index_expunge_from_log(backend->index,
+                                           backend->expunge_log);
+       if (ret == 0) {
+               /* log was corrupted, need to rescan */
+               ret = lucene_index_rescan(backend->index, _backend->ns->list);
+       }
+       if (ret >= 0)
+               ret = lucene_index_optimize(backend->index);
        return ret;
 }
 
@@ -504,6 +506,7 @@ struct fts_backend fts_backend_lucene = {
                fts_backend_lucene_update_unset_build_key,
                fts_backend_lucene_update_build_more,
                fts_backend_lucene_refresh,
+               fts_backend_lucene_rescan,
                fts_backend_lucene_optimize,
                fts_backend_default_can_lookup,
                fts_backend_lucene_lookup,
index 418dc8ca09618738ebc5e2aef29acd37c50163e2..de643fb3640d7eb2c3a572955d589fdd5f5f930d 100644 (file)
@@ -10,6 +10,7 @@ extern "C" {
 #include "mail-search.h"
 #include "mail-namespace.h"
 #include "mail-storage.h"
+#include "fts-expunge-log.h"
 #include "lucene-wrapper.h"
 
 #include <sys/stat.h>
@@ -669,6 +670,88 @@ int lucene_index_rescan(struct lucene_index *index,
        return failed ? -1 : 0;
 }
 
+static void guid128_to_wguid(const mail_guid_128_t guid,
+                            wchar_t wguid_hex[MAILBOX_GUID_HEX_LENGTH + 1])
+{
+       buffer_t buf = { 0, 0, { 0, 0, 0, 0, 0 } };
+       unsigned char guid_hex[MAILBOX_GUID_HEX_LENGTH];
+       unsigned int i;
+
+       buffer_create_data(&buf, guid_hex, MAILBOX_GUID_HEX_LENGTH);
+       binary_to_hex_append(&buf, guid, MAIL_GUID_128_SIZE);
+       for (i = 0; i < MAILBOX_GUID_HEX_LENGTH; i++)
+               wguid_hex[i] = guid_hex[i];
+       wguid_hex[i] = '\0';
+}
+
+static int
+lucene_index_expunge_record(struct lucene_index *index,
+                           const struct fts_expunge_log_read_record *rec)
+{
+       const struct seq_range *range;
+       unsigned int count;
+       int ret;
+
+       if ((ret = lucene_index_open_search(index)) <= 0)
+               return ret;
+       range = array_get(&rec->uids, &count);
+
+       BooleanQuery query;
+
+       /* search for UIDs between lowest and highest expunged UID */
+       wchar_t wuid1[MAX_INT_STRLEN], wuid2[MAX_INT_STRLEN];
+       swprintf(wuid1, N_ELEMENTS(wuid1), L"%u", range[0].seq1);
+       swprintf(wuid2, N_ELEMENTS(wuid2), L"%u", range[count-1].seq2);
+       Term wuid1_term(_T("uid"), wuid1);
+       Term wuid2_term(_T("uid"), wuid2);
+       RangeQuery rq(&wuid1_term, &wuid2_term, true);
+       query.add(&rq, BooleanClause::MUST);
+
+       wchar_t wguid[MAILBOX_GUID_HEX_LENGTH + 1];
+       guid128_to_wguid(rec->mailbox_guid, wguid);
+       Term term(_T("box"), wguid);
+       TermQuery mailbox_query(&term);
+       query.add(&mailbox_query, BooleanClause::MUST);
+
+       try {
+               Hits *hits = index->searcher->search(&query);
+
+               for (size_t i = 0; i < hits->length(); i++) {
+                       uint32_t uid;
+
+                       if (lucene_doc_get_uid(index, &hits->doc(i),
+                                              &uid) < 0 ||
+                           seq_range_exists(&rec->uids, uid))
+                               index->reader->deleteDocument(hits->id(i));
+               }
+               _CLDELETE(hits);
+       } catch (CLuceneError &err) {
+               lucene_handle_error(index, err, "expunge search");
+               ret = -1;
+       }
+       return ret < 0 ? -1 : 0;
+}
+
+int lucene_index_expunge_from_log(struct lucene_index *index,
+                                 struct fts_expunge_log *log)
+{
+       struct fts_expunge_log_read_ctx *ctx;
+       const struct fts_expunge_log_read_record *rec;
+       int ret = 0, ret2;
+
+       ctx = fts_expunge_log_read_begin(log);
+       while ((rec = fts_expunge_log_read_next(ctx)) != NULL) {
+               if (lucene_index_expunge_record(index, rec) < 0) {
+                       ret = -1;
+                       break;
+               }
+       }
+       ret2 = fts_expunge_log_read_end(&ctx);
+       if (ret < 0 || ret2 < 0)
+               return -1;
+       return ret2;
+}
+
 int lucene_index_optimize(struct lucene_index *index)
 {
        int ret = 0;
index 22ea0b0047d30e3c5ad58313ebb323d35555e2be..a4108e87b80a738ab962bd90bf92ba5248bce280 100644 (file)
@@ -35,6 +35,8 @@ int lucene_index_build_deinit(struct lucene_index *index);
 void lucene_index_close(struct lucene_index *index);
 int lucene_index_rescan(struct lucene_index *index,
                        struct mailbox_list *list);
+int lucene_index_expunge_from_log(struct lucene_index *index,
+                                 struct fts_expunge_log *log);
 int lucene_index_optimize(struct lucene_index *index);
 
 int lucene_index_lookup(struct lucene_index *index,