]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
fts-flatcurve: Add xapian-based FTS plugin
authorMichael M Slusarz <slusarz@users.noreply.github.com>
Mon, 1 Nov 2021 07:32:50 +0000 (09:32 +0200)
committerMarco Bettini <marco.bettini@open-xchange.com>
Tue, 26 Apr 2022 07:50:20 +0000 (09:50 +0200)
12 files changed:
AUTHORS
configure.ac
m4/dovecot.m4
m4/want_flatcurve.m4 [new file with mode: 0644]
src/plugins/Makefile.am
src/plugins/fts-flatcurve/Makefile.am [new file with mode: 0644]
src/plugins/fts-flatcurve/fts-backend-flatcurve-xapian.cc [new file with mode: 0644]
src/plugins/fts-flatcurve/fts-backend-flatcurve-xapian.h [new file with mode: 0644]
src/plugins/fts-flatcurve/fts-backend-flatcurve.c [new file with mode: 0644]
src/plugins/fts-flatcurve/fts-backend-flatcurve.h [new file with mode: 0644]
src/plugins/fts-flatcurve/fts-flatcurve-plugin.c [new file with mode: 0644]
src/plugins/fts-flatcurve/fts-flatcurve-plugin.h [new file with mode: 0644]

diff --git a/AUTHORS b/AUTHORS
index 26fa1cdf8a59c4f9de9b72bdb63c3ffd229eb95e..1d2873794ab3aab297e7776443ac29f2960c2e46 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -21,4 +21,6 @@ Blowfish code by Solar Designer <solar at openwall.com>
 
 SMTP fuzzer code by Catena cyber <p.antoine at catenacyber.fr>
 
+FTS flatcurve by Michael Slusarz <slusarz@curecanti.org>
+
 Grepping 'Patch by' from ChangeLog shows up more people.
index dbe3e3fadeb8024770525712c9d7614731f46aab..b4b30dc042b0ec27da6b5c482d003730e852b1f4 100644 (file)
@@ -154,6 +154,11 @@ AS_HELP_STRING([--with-solr], [Build with Solr full text search support]),
   TEST_WITH(solr, $withval),
   want_solr=no)
 
+AC_ARG_WITH(flatcurve,
+AS_HELP_STRING([--with-flatcurve], [Build with Flatcurve (Xapian) full text search support]),
+  TEST_WITH(flatcurve, $withval),
+  want_flatcurve=no)
+
 AC_ARG_WITH(sodium,
 AS_HELP_STRING([--with-sodium], [Build with libsodium support (enables argon2, default: auto)]),
   TEST_WITH(sodium, $withval),
@@ -741,6 +746,7 @@ fts=""
 not_fts=""
 
 DOVECOT_WANT_SOLR
+DOVECOT_WANT_FLATCURVE
 DOVECOT_WANT_STEMMER
 DOVECOT_WANT_TEXTCAT
 
@@ -890,6 +896,7 @@ src/plugins/acl/Makefile
 src/plugins/imap-acl/Makefile
 src/plugins/fs-compress/Makefile
 src/plugins/fts/Makefile
+src/plugins/fts-flatcurve/Makefile
 src/plugins/fts-solr/Makefile
 src/plugins/last-login/Makefile
 src/plugins/lazy-expunge/Makefile
index 17295bffc7388e3b3222ee58a3b5924701175b1f..3f1febbc2b971631b9056d97799e4fd4c125f796 100644 (file)
@@ -531,7 +531,7 @@ AC_DEFUN([DOVECOT_WANT_UBSAN], [
   AS_IF([test x$want_ubsan = xyes], [
      san_flags=""
      gl_COMPILER_OPTION_IF([-fsanitize=undefined], [
-             san_flags="$san_flags -fsanitize=undefined"
+             san_flags="$san_flags -fsanitize=undefined -fno-sanitize=function,vptr"
              AC_DEFINE([HAVE_FSANITIZE_UNDEFINED], [1], [Define if your compiler has -fsanitize=undefined])
      ])
      gl_COMPILER_OPTION_IF([-fno-sanitize=nonnull-attribute], [
diff --git a/m4/want_flatcurve.m4 b/m4/want_flatcurve.m4
new file mode 100644 (file)
index 0000000..aff59b0
--- /dev/null
@@ -0,0 +1,19 @@
+AC_DEFUN([DOVECOT_WANT_FLATCURVE], [
+  have_flatcurve=no
+  AS_IF([test "$want_flatcurve" != "no"], [
+    PKG_CHECK_MODULES(XAPIAN, xapian-core >= 1.2, [
+      AC_DEFINE([HAVE_XAPIAN], 1, [Xapian is available])
+      PKGCONFIG_REQUIRES="$PKGCONFIG_REQUIRES xapian-core"
+      have_flatcurve=yes
+      fts="$fts flatcurve"
+      PKG_CHECK_MODULES(XAPIAN, xapian-core >= 1.4, [
+        AC_DEFINE([XAPIAN_HAS_COMPACT],1,[Xapian compaction support (1.4+)])
+      ])
+    ],[
+      AS_IF([test $want_flatcurve = yes], [
+        AC_ERROR([Can't build with Flatcurve FTS: $XAPIAN_PKG_ERRORS])
+      ])
+    ])
+  ])
+  AM_CONDITIONAL(BUILD_FLATCURVE, test "$have_flatcurve" = "yes")
+])
index 1f30f16526b05886e1acf7d3210332b9c24a5b72..6b858ed8454a5d8e6689a8c540a1ad7f77c35f09 100644 (file)
@@ -6,6 +6,10 @@ if BUILD_SOLR
 FTS_SOLR = fts-solr
 endif
 
+if BUILD_FLATCURVE
+FTS_FLATCURVE = fts-flatcurve
+endif
+
 if HAVE_APPARMOR
 APPARMOR = apparmor
 endif
@@ -38,6 +42,7 @@ SUBDIRS = \
        welcome \
        $(ZLIB) \
        $(FTS_SOLR) \
+       $(FTS_FLATCURVE) \
        $(DICT_LDAP) \
        $(APPARMOR) \
        fs-compress \
diff --git a/src/plugins/fts-flatcurve/Makefile.am b/src/plugins/fts-flatcurve/Makefile.am
new file mode 100644 (file)
index 0000000..3899cc9
--- /dev/null
@@ -0,0 +1,36 @@
+AM_CPPFLAGS = \
+       -I$(top_srcdir)/src/lib \
+       -I$(top_srcdir)/src/lib-ssl-iostream \
+       -I$(top_srcdir)/src/lib-http \
+       -I$(top_srcdir)/src/lib-mail \
+       -I$(top_srcdir)/src/lib-imap \
+       -I$(top_srcdir)/src/lib-index \
+       -I$(top_srcdir)/src/lib-settings \
+       -I$(top_srcdir)/src/lib-storage \
+       -I$(top_srcdir)/src/plugins/fts
+
+AM_CXXFLAGS = \
+       $(XAPIAN_CXXFLAGS)
+
+lib21_fts_flatcurve_plugin_la_LDFLAGS = -module -avoid-version
+
+module_LTLIBRARIES = \
+       lib21_fts_flatcurve_plugin.la
+
+if DOVECOT_PLUGIN_DEPS
+fts_plugin_dep = ../fts/lib20_fts_plugin.la
+endif
+
+lib21_fts_flatcurve_plugin_la_LIBADD = \
+       $(fts_plugin_dep) \
+       $(XAPIAN_LIBS)
+
+lib21_fts_flatcurve_plugin_la_SOURCES = \
+       fts-flatcurve-plugin.c \
+       fts-backend-flatcurve.c \
+       fts-backend-flatcurve-xapian.cc
+
+noinst_HEADERS = \
+       fts-flatcurve-plugin.h \
+       fts-backend-flatcurve.h \
+       fts-backend-flatcurve-xapian.h
diff --git a/src/plugins/fts-flatcurve/fts-backend-flatcurve-xapian.cc b/src/plugins/fts-flatcurve/fts-backend-flatcurve-xapian.cc
new file mode 100644 (file)
index 0000000..319bc47
--- /dev/null
@@ -0,0 +1,2300 @@
+/* Copyright (c) the Dovecot authors, based on code by Michael Slusarz.
+ * See the included COPYING file */
+
+extern "C" {
+#include "lib.h"
+#include "file-create-locked.h"
+#include "hash.h"
+#include "message-header-parser.h"
+#include "path-util.h"
+#include "mail-storage-private.h"
+#include "mail-search.h"
+#include "sleep.h"
+#include "str.h"
+#include "unichar.h"
+#include "time-util.h"
+#include "fts-backend-flatcurve.h"
+#include "fts-backend-flatcurve-xapian.h"
+#include <dirent.h>
+#include <stdio.h>
+};
+
+#pragma GCC diagnostic push
+#  ifdef __clang__ // for building xapian's libs from gcc built debian package
+#    pragma GCC diagnostic ignored "-W#warnings"
+#    pragma GCC diagnostic ignored "-Wunused-command-line-argument"
+#  endif
+#  include <xapian.h>
+#pragma GCC diagnostic pop
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+
+/* How Xapian DBs work in fts-flatcurve: all data lives in under one
+ * per-mailbox directory (FTS_FLATCURVE_LABEL) stored at the root of the
+ * mailbox indexes directory.
+ *
+ * There are two different permanent data types within that library:
+ * - "index.###": The actual Xapian DB shards. Combined, this comprises the
+ *   FTS data for the mailbox. These shards may be directly written to, but
+ *   only when deleting messages - new messages are never stored directly to
+ *   this DB. Additionally, these DBs are never directly queried; a dummy
+ *   object is used to indirectly query them. These indexes may occasionally
+ *   be combined into a single index via optimization processes.
+ * - "current.###": Xapian DB that contains the index shard where new messages
+ *   are stored. Once this index reaches certain (configurable) limits, a new
+ *   shard is created and rotated in as the new current index by creating
+ *   a shard with a suffix higher than the previous current DB.
+ *
+ * Within a session, we create a dummy Xapian::Database object, scan the data
+ * directory for all indexes, and add each of them to the dummy object. For
+ * queries, we then just need to query the dummy object and Xapian handles
+ * everything for us. Writes need to be handled separately, as a
+ * WritableDatabase object only supports a single on-disk DB at a time; a DB
+ * shard, whether "index" or "current", must be directly written to in order
+ * to modify.
+ *
+ * Data storage: Xapian does not support substring searches by default, so
+ * (if substring searching is enabled) we instead need to explicitly store all
+ * substrings of the string, up to the point where the substring becomes
+ * smaller than min_term_size. Requires libicu in order to correctly handle
+ * multi-byte characters. */
+#define FLATCURVE_XAPIAN_DB_PREFIX "index."
+#define FLATCURVE_XAPIAN_DB_CURRENT_PREFIX "current."
+
+/* These are temporary data types that may appear in the fts directory. They
+ * are not intended to perservere between sessions. */
+#define FLATCURVE_XAPIAN_DB_OPTIMIZE "optimize"
+
+/* Xapian "recommendations" are that you begin your local prefix identifier
+ * with "X" for data that doesn't match with a data type listed as a Xapian
+ * "convention". However, this recommendation is for maintaining
+ * compatability with the search front-end (Omega) that they provide. We don't
+ * care about compatability, so save storage space by using single letter
+ * prefixes. Bodytext is stored without prefixes, as it is expected to be the
+ * single largest storage pool. */
+
+/* Caution: the code below expects these prefix to be 1-char long */
+#define FLATCURVE_XAPIAN_ALL_HEADERS_PREFIX   "A"
+#define FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX "B"
+#define FLATCURVE_XAPIAN_HEADER_PREFIX        "H"
+
+#define FLATCURVE_XAPIAN_ALL_HEADERS_QP "allhdrs"
+#define FLATCURVE_XAPIAN_HEADER_BOOL_QP "hdr_bool"
+#define FLATCURVE_XAPIAN_HEADER_QP      "hdr_"
+#define FLATCURVE_XAPIAN_BODY_QP        "body"
+
+/* Version database, so that any schema changes can be caught. */
+#define FLATCURVE_XAPIAN_DB_KEY_PREFIX "dovecot."
+#define FLATCURVE_XAPIAN_DB_VERSION_KEY \
+               FLATCURVE_XAPIAN_DB_KEY_PREFIX FTS_FLATCURVE_LABEL
+#define FLATCURVE_XAPIAN_DB_VERSION 1
+
+#define FLATCURVE_DBW_LOCK_RETRY_SECS 1
+#define FLATCURVE_DBW_LOCK_RETRY_MAX 60
+#define FLATCURVE_MANUAL_OPTIMIZE_COMMIT_LIMIT 500
+
+/* Lock: needed to ensure we don't run into race conditions when
+ * manipulating current directory. */
+#define FLATCURVE_XAPIAN_LOCK_FNAME "flatcurve-lock"
+#define FLATCURVE_XAPIAN_LOCK_TIMEOUT_SECS 5
+
+#define ENUM_EMPTY(x) ((enum x) 0)
+
+
+struct flatcurve_xapian_db_path {
+       const char *fname;
+       const char *path;
+};
+
+enum flatcurve_xapian_db_type {
+       FLATCURVE_XAPIAN_DB_TYPE_INDEX,
+       FLATCURVE_XAPIAN_DB_TYPE_CURRENT,
+       FLATCURVE_XAPIAN_DB_TYPE_OPTIMIZE,
+       FLATCURVE_XAPIAN_DB_TYPE_LOCK,
+       FLATCURVE_XAPIAN_DB_TYPE_UNKNOWN
+};
+
+struct flatcurve_xapian_db {
+       Xapian::Database *db;
+       Xapian::WritableDatabase *dbw;
+       struct flatcurve_xapian_db_path *dbpath;
+       unsigned int changes;
+       enum flatcurve_xapian_db_type type;
+};
+HASH_TABLE_DEFINE_TYPE(xapian_db, char *, struct flatcurve_xapian_db *);
+
+struct flatcurve_xapian {
+       /* Current database objects. */
+       struct flatcurve_xapian_db *dbw_current;
+       Xapian::Database *db_read;
+       HASH_TABLE_TYPE(xapian_db) dbs;
+       unsigned int shards;
+
+       /* Locking for current shard manipulation. */
+       struct file_lock *lock;
+       const char *lock_path;
+
+       /* Xapian pool: used for per mailbox DB info, so it can be easily
+        * cleared when switching mailboxes. Not for use with long
+        * lived data (e.g. optimize). */
+       pool_t pool;
+
+       /* Current document. */
+       Xapian::Document *doc;
+       uint32_t doc_uid;
+       unsigned int doc_updates;
+       bool doc_created:1;
+
+       /* List of mailboxes to optimize at shutdown. */
+       HASH_TABLE(char *, char *) optimize;
+
+       bool deinit:1;
+};
+
+ARRAY_DEFINE_TYPE(flatcurve_fts_query_arg, struct flatcurve_fts_query_arg);
+struct flatcurve_fts_query_arg {
+       string_t *value;
+
+       bool is_and:1;
+       bool is_not:1;
+};
+
+struct flatcurve_fts_query_xapian {
+       Xapian::Query *query;
+       Xapian::QueryParser *qp;
+       ARRAY_TYPE(flatcurve_fts_query_arg) args;
+};
+
+struct flatcurve_xapian_db_iter {
+       struct flatcurve_fts_backend *backend;
+       DIR *dirp;
+       char *error;
+
+       /* These are set every time next() is run. */
+       struct flatcurve_xapian_db_path *path;
+       enum flatcurve_xapian_db_type type;
+};
+
+enum flatcurve_xapian_db_opts {
+       FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT = BIT(0),
+       FLATCURVE_XAPIAN_DB_IGNORE_EMPTY     = BIT(1),
+       FLATCURVE_XAPIAN_DB_NOCLOSE_CURRENT  = BIT(2)
+};
+
+enum flatcurve_xapian_wdb {
+       FLATCURVE_XAPIAN_WDB_CREATE = BIT(0)
+};
+
+enum flatcurve_xapian_db_close {
+       FLATCURVE_XAPIAN_DB_CLOSE_WDB_COMMIT = BIT(0),
+       FLATCURVE_XAPIAN_DB_CLOSE_WDB        = BIT(1),
+       FLATCURVE_XAPIAN_DB_CLOSE_DB         = BIT(2),
+       FLATCURVE_XAPIAN_DB_CLOSE_ROTATE     = BIT(3),
+       FLATCURVE_XAPIAN_DB_CLOSE_MBOX       = BIT(4)
+};
+
+/* Externally accessible struct. */
+struct fts_flatcurve_xapian_query_iter {
+       struct flatcurve_fts_backend *backend;
+       struct flatcurve_fts_query *query;
+       struct fts_flatcurve_xapian_query_result *result;
+       char *error;
+       Xapian::Database *db;
+       Xapian::Enquire *enquire;
+       Xapian::MSetIterator mset_iter;
+};
+
+static int
+fts_flatcurve_xapian_check_db_version(struct flatcurve_fts_backend *backend,
+                                     struct flatcurve_xapian_db *xdb,
+                                     const char **error_r);
+static int
+fts_flatcurve_xapian_close_db(struct flatcurve_fts_backend *backend,
+                             struct flatcurve_xapian_db *xdb,
+                             enum flatcurve_xapian_db_close opts,
+                             const char **error_r);
+static int
+fts_flatcurve_xapian_close_dbs(struct flatcurve_fts_backend *backend,
+                              enum flatcurve_xapian_db_close opts,
+                              const char **error_r);
+static int
+fts_flatcurve_xapian_db_populate(struct flatcurve_fts_backend *backend,
+                                enum flatcurve_xapian_db_opts opts,
+                                const char **error_r);
+
+void fts_flatcurve_xapian_init(struct flatcurve_fts_backend *backend)
+{
+       backend->xapian = p_new(backend->pool, struct flatcurve_xapian, 1);
+       backend->xapian->pool =
+               pool_alloconly_create(FTS_FLATCURVE_LABEL " xapian", 2048);
+       hash_table_create(&backend->xapian->dbs, backend->xapian->pool,
+                         4, str_hash, strcmp);
+}
+
+void fts_flatcurve_xapian_deinit(struct flatcurve_fts_backend *backend)
+{
+       struct flatcurve_xapian *x = backend->xapian;
+
+       x->deinit = TRUE;
+       if (hash_table_is_created(x->optimize)) {
+               struct hash_iterate_context *iter =
+                       hash_table_iterate_init(x->optimize);
+
+               void *key, *val;
+               while (hash_table_iterate(iter, x->optimize, &key, &val)) {
+                       str_append(backend->boxname, (const char *)key);
+                       str_append(backend->db_path, (const char *)val);
+
+                       const char *error;
+                       if (fts_flatcurve_xapian_optimize_box(
+                               backend, &error) < 0)
+                               e_error(backend->event, "%s", error);
+               }
+
+               hash_table_iterate_deinit(&iter);
+               hash_table_destroy(&x->optimize);
+       }
+       hash_table_destroy(&x->dbs);
+       pool_unref(&x->pool);
+       x->deinit = FALSE;
+}
+
+static struct flatcurve_xapian_db_path *
+fts_flatcurve_xapian_create_db_path(struct flatcurve_fts_backend *backend,
+                                   const char *fname)
+{
+       struct flatcurve_xapian_db_path *dbpath;
+
+       dbpath = p_new(backend->xapian->pool,
+                      struct flatcurve_xapian_db_path, 1);
+       dbpath->fname = p_strdup(backend->xapian->pool, fname);
+       dbpath->path = p_strdup_printf(backend->xapian->pool, "%s%s",
+                                      str_c(backend->db_path), fname);
+
+       return dbpath;
+}
+
+
+/* If dbpath = NULL, delete the entire flatcurve index
+ * Returns: 0 if FTS directory doesn't exist, 1 on deletion, -1 on error */
+static int
+fts_flatcurve_xapian_delete(struct flatcurve_fts_backend *backend,
+                           struct flatcurve_xapian_db_path *dbpath,
+                           const char **error_r)
+{
+       const char *path = dbpath == NULL ?
+               str_c(backend->db_path) : dbpath->path;
+       return fts_backend_flatcurve_delete_dir(path, error_r);
+}
+
+static flatcurve_xapian_db_iter *
+fts_flatcurve_xapian_db_iter_init(struct flatcurve_fts_backend *backend,
+                                 enum flatcurve_xapian_db_opts opts)
+{
+       flatcurve_xapian_db_iter *iter =
+               p_new(backend->xapian->pool, struct flatcurve_xapian_db_iter, 1);
+       iter->backend = backend;
+       iter->dirp = opendir(str_c(backend->db_path));
+
+       if (iter->dirp == NULL &&
+           HAS_NO_BITS(opts, FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT)) {
+               iter->error = i_strdup_printf(
+                       "Cannot open DB (RO); opendir(%s) failed: %m",
+                       str_c(backend->db_path));
+       }
+       return iter;
+}
+
+static bool
+fts_flatcurve_xapian_db_iter_next(struct flatcurve_xapian_db_iter *iter)
+{
+       if (iter->error != NULL || iter->dirp == NULL)
+               return FALSE;
+
+       errno = 0;
+       struct dirent *dir = readdir(iter->dirp);
+       if (errno < 0) {
+               iter->error = i_strdup_printf(
+                       "readdir(%s) failed: %m",
+                       str_c(iter->backend->db_path));
+               return FALSE;
+       }
+
+       if (dir == NULL)
+               return FALSE;
+
+       if (strcmp(dir->d_name, ".") == 0 ||
+           strcmp(dir->d_name, "..") == 0)
+               return fts_flatcurve_xapian_db_iter_next(iter);
+
+       iter->type = FLATCURVE_XAPIAN_DB_TYPE_UNKNOWN;
+       iter->path = fts_flatcurve_xapian_create_db_path(
+                       iter->backend, dir->d_name);
+
+       struct stat st;
+       if (stat(iter->path->path, &st) < 0) {
+               iter->error = i_strdup_printf(
+                       "stat(%s) failed: %m",
+                       str_c(iter->backend->db_path));
+               return FALSE;
+       }
+
+       if (str_begins_with(dir->d_name, FLATCURVE_XAPIAN_LOCK_FNAME)) {
+               iter->type = FLATCURVE_XAPIAN_DB_TYPE_LOCK;
+               return TRUE;
+       }
+
+       if (!S_ISDIR(st.st_mode))
+               return TRUE;
+
+       if (str_begins_with(dir->d_name, FLATCURVE_XAPIAN_DB_PREFIX))
+               iter->type = FLATCURVE_XAPIAN_DB_TYPE_INDEX;
+       else if (str_begins_with(dir->d_name, FLATCURVE_XAPIAN_DB_CURRENT_PREFIX))
+               iter->type = FLATCURVE_XAPIAN_DB_TYPE_CURRENT;
+       else if (strcmp(dir->d_name, FLATCURVE_XAPIAN_DB_OPTIMIZE) == 0)
+               iter->type = FLATCURVE_XAPIAN_DB_TYPE_OPTIMIZE;
+
+       return TRUE;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_db_iter_deinit(struct flatcurve_xapian_db_iter **_iter,
+                                   const char **error_r)
+{
+       struct flatcurve_xapian_db_iter *iter = *_iter;
+       *_iter = NULL;
+
+       if (iter->dirp != NULL && closedir(iter->dirp) < 0) {
+               if (iter->error == NULL)
+                       iter->error = i_strdup_printf(
+                               "closedir(%s) failed: %m",
+                               str_c(iter->backend->db_path));
+       }
+
+       int ret = 0;
+       if (iter->error != NULL) {
+               *error_r = t_strdup(iter->error);
+               i_free(iter->error);
+               ret = -1;
+       }
+
+       p_free(iter->backend->xapian->pool, iter);
+       return ret;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_write_db_get_do(struct flatcurve_fts_backend *backend,
+                                    struct flatcurve_xapian_db *xdb,
+                                    int db_flags, const char **error_r)
+{
+       if (xdb->dbw != NULL)
+               return 0;
+
+       for (unsigned int elapsed = 0;
+            elapsed <= FLATCURVE_DBW_LOCK_RETRY_MAX;
+            elapsed += FLATCURVE_DBW_LOCK_RETRY_SECS) {
+               try {
+                       xdb->dbw = new Xapian::WritableDatabase(
+                                       xdb->dbpath->path, db_flags);
+                       return 0;
+               } catch (Xapian::DatabaseLockError &e) {
+                       e_debug(backend->event,
+                               "Waiting for DB (RW, %s) lock",
+                               xdb->dbpath->fname);
+                       if (!i_sleep_intr_secs(FLATCURVE_DBW_LOCK_RETRY_SECS)) {
+                               *error_r = t_strdup_printf(
+                                       "Cannot open DB (RW, %s): "
+                                       "sleep() interrupted by signal",
+                                       xdb->dbpath->fname);
+                               return -1;
+                       }
+                       continue;
+               } catch (Xapian::Error &e) {
+                       *error_r = t_strdup_printf(
+                               "Cannot open DB (RW, %s): %s",
+                               xdb->dbpath->fname,
+                               e.get_description().c_str());
+                       return -1;
+               }
+       }
+       *error_r = t_strdup_printf(
+               "DB (RW, %s) was locked for over %d seconds ",
+               xdb->dbpath->fname, FLATCURVE_DBW_LOCK_RETRY_MAX);
+       return -1;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_write_db_get(struct flatcurve_fts_backend *backend,
+                                 struct flatcurve_xapian_db *xdb,
+                                 enum flatcurve_xapian_wdb wopts,
+                                 const char **error_r)
+{
+       if (xdb->dbw != NULL)
+               return 0;
+
+       int db_flags = (HAS_ALL_BITS(wopts, FLATCURVE_XAPIAN_WDB_CREATE)
+               ? Xapian::DB_CREATE_OR_OPEN : Xapian::DB_OPEN) |
+               Xapian::DB_NO_SYNC;
+
+       if (fts_flatcurve_xapian_write_db_get_do(
+               backend, xdb, db_flags, error_r) < 0)
+               return -1;
+
+       if (xdb->type == FLATCURVE_XAPIAN_DB_TYPE_CURRENT &&
+           fts_flatcurve_xapian_check_db_version(backend, xdb, error_r) < 0)
+               return -1;
+
+       e_debug(backend->event, "Opened DB (RW, %s) messages=%u version=%u",
+               xdb->dbpath->fname, xdb->dbw->get_doccount(),
+               FLATCURVE_XAPIAN_DB_VERSION);
+
+       return 0;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_rename_db(struct flatcurve_fts_backend *backend,
+                              struct flatcurve_xapian_db_path *path,
+                              struct flatcurve_xapian_db_path **newpath_r,
+                              const char **error_r)
+{
+       unsigned int i;
+       std::string new_fname;
+       struct flatcurve_xapian_db_path *newpath;
+       bool retry = FALSE;
+       std::ostringstream ss;
+
+       for (i = 0; i < 5; ++i) {
+               new_fname.clear();
+               new_fname = FLATCURVE_XAPIAN_DB_PREFIX;
+               ss << i_rand_limit(8192);
+               new_fname += ss.str();
+
+               newpath = fts_flatcurve_xapian_create_db_path(
+                               backend, new_fname.c_str());
+
+               if (rename(path->path, newpath->path) < 0) {
+                       if (retry ||
+                           (errno != ENOTEMPTY) && (errno != EEXIST)) {
+                               *error_r = t_strdup_printf(
+                                       "rename(%s, %s) failed: %m",
+                                       path->path, newpath->path);
+                               p_free(backend->xapian->pool, newpath);
+                               return -1;
+                       }
+
+                       /* Looks like a naming conflict; try once again with
+                        * a different filename. ss will have additional
+                        * randomness added to the original suffix, so it
+                        * will almost certainly work the second time. */
+                       retry = TRUE;
+               } else {
+                       if (newpath_r != NULL) *newpath_r = newpath;
+                       return 0;
+               }
+       }
+
+       /* If we still haven't found a valid filename, something is very
+        * wrong. Exit before we enter an infinite loop and consume all the
+        * memory. */
+       i_unreached();
+}
+
+static bool
+fts_flatcurve_xapian_need_optimize(struct flatcurve_fts_backend *backend)
+{
+       if (backend->fuser == NULL) return FALSE;
+       if (backend->fuser->set.optimize_limit == 0) return FALSE;
+       return backend->xapian->shards >= backend->fuser->set.optimize_limit;
+}
+
+static void
+fts_flatcurve_xapian_optimize_mailbox(struct flatcurve_fts_backend *backend)
+{
+       struct flatcurve_xapian *x = backend->xapian;
+
+       if (x->deinit || !fts_flatcurve_xapian_need_optimize(backend))
+               return;
+
+       if (!hash_table_is_created(x->optimize))
+               hash_table_create(&x->optimize, backend->pool, 0, str_hash,
+                                 strcmp);
+       if (hash_table_lookup(x->optimize, str_c(backend->boxname)) == NULL)
+               hash_table_insert(x->optimize,
+                                 p_strdup(backend->pool, str_c(backend->boxname)),
+                                 p_strdup(backend->pool, str_c(backend->db_path)));
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_db_add(struct flatcurve_fts_backend *backend,
+                           struct flatcurve_xapian_db_path *dbpath,
+                           enum flatcurve_xapian_db_type type,
+                           bool open_wdb,
+                           struct flatcurve_xapian_db **xdb_r,
+                           const char **error_r)
+{
+       struct flatcurve_xapian *x = backend->xapian;
+
+       if (type != FLATCURVE_XAPIAN_DB_TYPE_INDEX &&
+           type != FLATCURVE_XAPIAN_DB_TYPE_CURRENT) {
+               if (xdb_r != NULL) *xdb_r = NULL;
+               return 0;
+       }
+
+       struct flatcurve_xapian_db *o, *xdb;
+       xdb = p_new(x->pool, struct flatcurve_xapian_db, 1);
+       xdb->dbpath = dbpath;
+       xdb->type = type;
+
+       if (open_wdb && fts_flatcurve_xapian_write_db_get(
+               backend, xdb, FLATCURVE_XAPIAN_WDB_CREATE, error_r) < 0)
+               return -1;
+
+       hash_table_insert(x->dbs, dbpath->fname, xdb);
+
+       bool failed = FALSE;
+       /* If multiple current DBs exist, rename the oldest. */
+       if (type == FLATCURVE_XAPIAN_DB_TYPE_CURRENT &&
+           x->dbw_current != NULL) {
+               struct flatcurve_xapian_db *db =
+                       strcmp(dbpath->fname,
+                              x->dbw_current->dbpath->fname) > 0 ?
+                       x->dbw_current : xdb;
+
+               struct flatcurve_xapian_db_path *newpath;
+               if (fts_flatcurve_xapian_rename_db(
+                       backend, db->dbpath, &newpath, error_r) < 0)
+                       failed = TRUE;
+               if (fts_flatcurve_xapian_close_db(
+                       backend, db, FLATCURVE_XAPIAN_DB_CLOSE_WDB, error_r) < 0)
+                       failed = TRUE;
+               hash_table_remove(x->dbs, db->dbpath->fname);
+               hash_table_insert(x->dbs, newpath->fname, db);
+
+               db->dbpath = newpath;
+               db->type = FLATCURVE_XAPIAN_DB_TYPE_INDEX;
+       }
+
+       if (xdb->type == FLATCURVE_XAPIAN_DB_TYPE_CURRENT)
+               x->dbw_current = xdb;
+
+       if (xdb_r != NULL) *xdb_r = xdb;
+       return failed ? -1 : 0;
+}
+
+/* Returns: lock fd >=0 on success, -1 on error */
+static int fts_flatcurve_xapian_lock(struct flatcurve_fts_backend *backend,
+                                    const char **error_r)
+{
+       struct flatcurve_xapian *x = backend->xapian;
+
+       if (x->lock_path == NULL)
+               x->lock_path = p_strdup_printf(
+                       x->pool, "%s" FLATCURVE_XAPIAN_LOCK_FNAME,
+                       str_c(backend->db_path));
+
+       struct file_create_settings set;
+       i_zero(&set);
+       set.lock_timeout_secs = FLATCURVE_XAPIAN_LOCK_TIMEOUT_SECS;
+       set.lock_settings.close_on_free = TRUE;
+       set.lock_settings.unlink_on_free = TRUE;
+       set.lock_settings.lock_method = backend->parsed_lock_method;
+
+       bool created;
+       return file_create_locked(x->lock_path, &set, &x->lock, &created, error_r);
+}
+
+static void fts_flatcurve_xapian_unlock(struct flatcurve_fts_backend *backend)
+{
+       file_lock_free(&backend->xapian->lock);
+}
+
+/* Returns: 0 if read DB is null, 1 if database has been addeds, -1 on error */
+static int
+fts_flatcurve_xapian_db_read_add(struct flatcurve_fts_backend *backend,
+                                struct flatcurve_xapian_db *xdb,
+                                const char **error_r)
+{
+       struct flatcurve_xapian *x = backend->xapian;
+
+       if (x->db_read == NULL)
+               return 0;
+
+       try {
+               xdb->db = new Xapian::Database(xdb->dbpath->path);
+       } catch (Xapian::Error &e) {
+               *error_r = t_strdup_printf("Cannot open DB (RO; %s); %s",
+                       xdb->dbpath->fname, e.get_description().c_str());
+               return -1;
+       }
+
+       if (fts_flatcurve_xapian_check_db_version(backend, xdb, error_r) < 0)
+               return -1;
+
+       ++x->shards;
+       x->db_read->add_database(*(xdb->db));
+
+       fts_flatcurve_xapian_optimize_mailbox(backend);
+
+       return 1;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_create_current(struct flatcurve_fts_backend *backend,
+                                   enum flatcurve_xapian_db_close copts,
+                                   const char **error_r)
+{
+       /* The current shard has filename of the format PREFIX.timestamp. This
+        * ensures that we will catch any current DB renaming done by another
+        * process (reopen() on the DB will fail, causing the entire DB to be
+        * closed/reopened). */
+
+       int ret;
+       struct flatcurve_xapian_db *xdb;
+       T_BEGIN {
+               const char *fname = t_strdup_printf(
+                       FLATCURVE_XAPIAN_DB_CURRENT_PREFIX "%lu",
+                       i_microseconds());
+               ret = fts_flatcurve_xapian_db_add(backend,
+                       fts_flatcurve_xapian_create_db_path(backend, fname),
+                       FLATCURVE_XAPIAN_DB_TYPE_CURRENT, TRUE, &xdb, error_r);
+       } T_END;
+
+       if (ret < 0)
+               return -1;
+
+       if (xdb == NULL) {
+               *error_r = "Could not add db";
+               return -1;
+       }
+
+       if (fts_flatcurve_xapian_db_read_add(backend, xdb, error_r) < 0)
+               return -1;
+
+       if (copts == 0)
+               return 0;
+
+       return fts_flatcurve_xapian_close_db(backend, xdb, copts, error_r);
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_db_populate(struct flatcurve_fts_backend *backend,
+                                enum flatcurve_xapian_db_opts opts,
+                                const char **error_r)
+{
+       struct flatcurve_xapian_db_iter *iter;
+       struct flatcurve_xapian *x = backend->xapian;
+
+       bool dbs_exist = hash_table_count(backend->xapian->dbs) > 0;
+       bool no_create = HAS_ALL_BITS(opts, FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT);
+
+       if (dbs_exist && (no_create || x->dbw_current != NULL))
+               return 0;
+
+       bool lock;
+       if (no_create) {
+               struct stat st;
+               if (stat(str_c(backend->db_path), &st) == 0)
+                       lock = S_ISDIR(st.st_mode);
+               else if (errno == ENOENT)
+                       lock = FALSE;
+               else {
+                       e_error(backend->event, "stat(%s) failed: %m",
+                               str_c(backend->db_path));
+                       return -1;
+               }
+       } else {
+               if (mailbox_list_mkdir_root(
+                               backend->backend.ns->list,
+                               str_c(backend->db_path),
+                               MAILBOX_LIST_PATH_TYPE_INDEX) < 0) {
+                       e_error(backend->event, "Cannot create DB (RW); %s",
+                               str_c(backend->db_path));
+                       return -1;
+               }
+               lock = TRUE;
+       }
+
+       if (lock && fts_flatcurve_xapian_lock(backend, error_r) < 0)
+               return -1;
+
+       if (!dbs_exist) {
+               const char *error, *last_error = NULL;
+               iter = fts_flatcurve_xapian_db_iter_init(backend, opts);
+               while (fts_flatcurve_xapian_db_iter_next(iter)) {
+                       if (fts_flatcurve_xapian_db_add(
+                               backend, iter->path, iter->type,
+                               FALSE, NULL, &last_error) < 0)
+                               break;
+               }
+               if (fts_flatcurve_xapian_db_iter_deinit(&iter, &error) < 0) {
+                       if (last_error != NULL)
+                               e_error(backend->event, "%s", error);
+                       else
+                               last_error = error;
+               }
+               if (last_error != NULL) {
+                       fts_flatcurve_xapian_unlock(backend);
+                       *error_r = last_error;
+                       return -1;
+               }
+       }
+
+       int ret = 0;
+       if (!no_create && x->dbw_current == NULL) {
+               enum flatcurve_xapian_db_close flags =
+                       HAS_ALL_BITS(opts, FLATCURVE_XAPIAN_DB_NOCLOSE_CURRENT) ?
+                               (enum flatcurve_xapian_db_close) 0 :
+                               FLATCURVE_XAPIAN_DB_CLOSE_WDB;
+
+               ret = fts_flatcurve_xapian_create_current(
+                               backend, flags, error_r);
+       }
+
+       fts_flatcurve_xapian_unlock(backend);
+       return ret;
+}
+
+/* Returns: 0 if dbw_current == NULL, 1 dbw_current != NULL, -1 on error */
+static int
+fts_flatcurve_xapian_write_db_current(struct flatcurve_fts_backend *backend,
+                                     enum flatcurve_xapian_db_opts opts,
+                                     struct flatcurve_xapian_db **dbw_current_r,
+                                     const char **error_r)
+{
+       static const enum flatcurve_xapian_wdb wopts =
+               ENUM_EMPTY(flatcurve_xapian_wdb);
+
+       struct flatcurve_xapian *x = backend->xapian;
+       struct flatcurve_xapian_db *xdb;
+
+       if (x->dbw_current != NULL && x->dbw_current->dbw != NULL) {
+               if (dbw_current_r != NULL) *dbw_current_r = x->dbw_current;
+               return 1;
+       }
+
+       opts = (enum flatcurve_xapian_db_opts)
+               (opts | FLATCURVE_XAPIAN_DB_NOCLOSE_CURRENT);
+       /* dbw_current can be NULL if FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT
+        * is set in opts. */
+       if (fts_flatcurve_xapian_db_populate(backend, opts, error_r) < 0)
+               return -1;
+
+       if (x->dbw_current == NULL)
+               return 0;
+
+       if (fts_flatcurve_xapian_write_db_get(
+               backend, x->dbw_current, wopts, error_r) < 0)
+               return -1;
+
+       if (dbw_current_r != NULL) *dbw_current_r = x->dbw_current;
+       return 1;
+}
+
+/* Returns: 0 if DBs table is empty, 1 otherwise, -1 on error */
+static int
+fts_flatcurve_xapian_read_db(struct flatcurve_fts_backend *backend,
+                            enum flatcurve_xapian_db_opts opts,
+                            Xapian::Database **db_read_r,
+                            const char **error_r)
+{
+       struct hash_iterate_context *iter;
+       void *key, *val;
+       struct fts_flatcurve_xapian_db_stats stats;
+       struct flatcurve_xapian *x = backend->xapian;
+       struct flatcurve_xapian_db *xdb;
+
+       if (x->db_read != NULL) {
+               try {
+                       (void)x->db_read->reopen();
+                       if (db_read_r != NULL) *db_read_r = x->db_read;
+                       return 1;
+               } catch (Xapian::DatabaseNotFoundError &e) {
+                       /* This means that the underlying databases have
+                        * changed (i.e. DB rotation by another process).
+                        * Close all DBs and reopen. */
+                       if (fts_flatcurve_xapian_close(backend, error_r) < 0)
+                               return -1;
+                       return fts_flatcurve_xapian_read_db(
+                              backend, opts, db_read_r, error_r);
+               }
+       }
+
+       if (fts_flatcurve_xapian_db_populate(backend, opts, error_r) < 0)
+               return -1;
+
+       if (HAS_ALL_BITS(opts, FLATCURVE_XAPIAN_DB_IGNORE_EMPTY) &&
+           (hash_table_count(x->dbs) == 0))
+               return 0;
+
+       x->db_read = new Xapian::Database();
+
+       iter = hash_table_iterate_init(x->dbs);
+       while (hash_table_iterate(iter, x->dbs, &key, &val)) {
+               xdb = (struct flatcurve_xapian_db *)val;
+               if (fts_flatcurve_xapian_db_read_add(
+                       backend, xdb, error_r) < 0)
+                       e_error(backend->event, "%s", *error_r);
+       }
+       hash_table_iterate_deinit(&iter);
+
+       if (fts_flatcurve_xapian_mailbox_stats(backend, &stats, error_r) < 0)
+               return -1;
+
+       e_debug(backend->event, "Opened DB (RO) messages=%u version=%u "
+               "shards=%u", stats.messages, stats.version, stats.shards);
+
+       if (db_read_r != NULL) *db_read_r = x->db_read;
+       return 1;
+}
+
+/* Returns: 0 on success, -1 on error */
+int
+fts_flatcurve_xapian_mailbox_check(struct flatcurve_fts_backend *backend,
+                                  struct fts_flatcurve_xapian_db_check *check,
+                                  const char **error_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               (enum flatcurve_xapian_db_opts)
+                       (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+                        FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+       struct flatcurve_xapian *x = backend->xapian;
+
+       i_zero(check);
+
+       int ret = fts_flatcurve_xapian_read_db(backend, opts, NULL, error_r);
+       if (ret <= 0)
+               return ret;
+
+       bool failed = FALSE;
+       void *key, *val;
+       struct hash_iterate_context *iter = hash_table_iterate_init(x->dbs);
+       while (hash_table_iterate(iter, x->dbs, &key, &val)) {
+               try {
+                       struct flatcurve_xapian_db *xdb =
+                               (struct flatcurve_xapian_db *)val;
+                       check->errors += Xapian::Database::check(
+                               xdb->dbpath->path, Xapian::DBCHECK_FIX, NULL);
+               } catch (const Xapian::Error &e) {
+                       const char *error = t_strdup_printf(
+                               "Check failed; %s",
+                               e.get_description().c_str());
+                       if (!failed)
+                               *error_r = error;
+                       else
+                               e_error(backend->event, "%s", error);
+                       failed = TRUE;
+               }
+               ++check->shards;
+       }
+       hash_table_iterate_deinit(&iter);
+       return failed ? -1 : 0;
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_mailbox_rotate(struct flatcurve_fts_backend *backend,
+                                       const char **error_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               (enum flatcurve_xapian_db_opts)
+                       (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+                        FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+       struct flatcurve_xapian *x = backend->xapian;
+       struct flatcurve_xapian_db *xdb;
+
+       int ret = fts_flatcurve_xapian_write_db_current(
+               backend, opts, &xdb, error_r);
+       if (ret <= 0)
+               return ret;
+
+       return fts_flatcurve_xapian_close_db(backend, xdb,
+               FLATCURVE_XAPIAN_DB_CLOSE_ROTATE, error_r);
+}
+
+/* Returns: 0 if DBs table is empty, 1 otherwise, -1 on error */
+int
+fts_flatcurve_xapian_mailbox_stats(struct flatcurve_fts_backend *backend,
+                                  struct fts_flatcurve_xapian_db_stats *stats,
+                                  const char **error_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               (enum flatcurve_xapian_db_opts)
+                       (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+                        FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+       struct flatcurve_xapian *x = backend->xapian;
+
+       if (x->db_read == NULL) {
+               int ret = fts_flatcurve_xapian_read_db(backend, opts, NULL, error_r);
+               if (ret <= 0) {
+                       i_zero(stats);
+                       return ret;
+               }
+       }
+       stats->messages = x->db_read->get_doccount();
+       stats->shards = x->shards;
+       stats->version = FLATCURVE_XAPIAN_DB_VERSION;
+       return 1;
+}
+
+/* The input of the doveadm dump command can be any file or dir inside the
+   flatcurve index tree. Climb the path tree until finding a directory with
+   the expected name
+
+   Returns: 0 on success, -1 on failure */
+static int fts_flatcurve_database_find_dir(const char *path, const char **dir_r,
+                                          const char **error_r)
+{
+       /* These don't depend on inputs and are not going to change during
+          execution. No need to recalculate them each time either */
+       static const char *const wanted = FTS_FLATCURVE_LABEL;
+       static const size_t wanted_len = strlen(wanted);
+
+       /* Resolve symlinks, . and .. , and repeated path separators
+          what remains is a cleaned path, either relative or absolute
+          that is good to parse */
+       const char *normalized, *error;
+       if (t_realpath(path, &normalized, error_r) < 0)
+               return -1;
+
+       /* Scan into the path and match as many times as possible
+          What we are interested in is the most nested match,
+          i.e. the (last) rightmost one.
+
+          On each successive iteration we simply start from the
+          end of the preceding match, which in worst case exists
+          as the string nul termination.
+       */
+       const char *hit_start, *match_start = NULL;
+       for (const char *ptr = normalized;; ptr = hit_start + wanted_len) {
+               hit_start = strstr(ptr, wanted);
+               if (hit_start == NULL) break;
+
+               /* Safe as wanted_len and wanted are de facto compile
+                  time constants */
+               const char *hit_end = hit_start + wanted_len;
+               if (*hit_end != '\0' && *hit_end != '/') continue;
+
+               /* The first condition protects from underruns */
+               if (hit_start > normalized && *(hit_start - 1) != '/') continue;
+
+               match_start = hit_start;
+       }
+       if (match_start == NULL) {
+               *error_r = "could not find a valid xapian database directory";
+               return -1;
+       };
+
+       /* Safe as wanted_len and wanted are de facto compile time constants
+          match_end derives from normalized by increments of wanted_len size */
+       const char *match_end = match_start + wanted_len;
+       size_t match_size = match_end - normalized;
+       const char *index_dir = t_strndup(normalized, match_size);
+
+       /* Caller expects a trailing slash to be in place */
+       *dir_r = t_strdup_printf("%s/", index_dir);
+       return 0;
+}
+
+/* The input of the doveadm dump command can be any file or dir inside the
+   flatcurve index tree. Climb the path tree until we find a directory with
+   the expected name.
+
+   Once a dir with the expected name has been located, check if there is at
+   least one subdir in it whose name starts either with the db-current-prefix
+   or with the db-prefix.
+
+   If none of those is found, consider this an invalid path and fail
+   Returns: 0 on success, -1 on failure */
+int fts_flatcurve_database_locate_dir(const char *arg_path,
+                                     const char **index_path_r,
+                                     const char **error_r)
+{
+       const char* path;
+
+       if (fts_flatcurve_database_find_dir(arg_path, &path, error_r) < 0)
+               return -1;
+
+       DIR *dir = opendir(path);
+       if (dir == NULL) {
+               *error_r = t_strdup_printf("opendir(%s) failed: %m", path);
+               return -1;
+       }
+
+       bool valid = FALSE;
+       do {
+               errno = 0;
+               struct dirent *entry = readdir(dir);
+               if (errno < 0) {
+                       *error_r = t_strdup_printf("readdir(%s) failed: %m", path);
+                       return -1;
+               }
+
+               if (entry == NULL)
+                       break;
+
+               valid = (entry->d_type & DT_DIR) != 0 &&
+                       (str_begins_with(entry->d_name, FLATCURVE_XAPIAN_DB_CURRENT_PREFIX) ||
+                        str_begins_with(entry->d_name, FLATCURVE_XAPIAN_DB_PREFIX));
+       }
+       while (!valid);
+
+       if (closedir(dir) < 0) {
+               *error_r = t_strdup_printf("closedir(%s) failed: %m", path);
+               return -1;
+       }
+
+       if (!valid) {
+               *error_r = t_strdup_printf("No xapian databases in %s", path);
+               return -1;
+       }
+
+       *index_path_r = path;
+       return 0;
+}
+
+/* Returns: 0 if no DB, 1 if DB as accessed, -1 on error */
+static int
+fts_flatcurve_database_terms_fetch(bool headers,
+                                  struct flatcurve_fts_backend *backend,
+                                  HASH_TABLE_TYPE(term_counter) *terms,
+                                  const char **error_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               (enum flatcurve_xapian_db_opts)
+                       (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+                        FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+
+       Xapian::Database *db;
+       Xapian::TermIterator iter, end;
+
+       const char *prefix = headers ? FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX : "";
+
+       int ret = fts_flatcurve_xapian_read_db(backend, opts, &db, error_r);
+       if (ret <= 0)
+               return ret;
+
+       for (iter = db->allterms_begin(prefix), end = db->allterms_end(prefix);
+               iter != end; ++iter) {
+
+               const std::string &term = *iter;
+               const char *key = term.data();
+
+               if (headers) {
+                       if (*key == *FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX)
+                               ++key;
+                       else
+                               continue;
+               } else {
+                       if (*key == *FLATCURVE_XAPIAN_ALL_HEADERS_PREFIX)
+                               ++key;
+                       else if (*key == *FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX ||
+                                *key == *FLATCURVE_XAPIAN_HEADER_PREFIX)
+                               continue;
+               }
+
+               void *k, *v;
+               const char *pkey;
+               unsigned int counter = iter.get_termfreq();
+               if (hash_table_lookup_full(*terms, key, &k, &v)) {
+                       counter += POINTER_CAST_TO(v, unsigned int);
+                       pkey = (const char *)k;
+               } else {
+                       pkey = t_strdup(key);
+               }
+               hash_table_update(*terms, pkey, POINTER_CAST(counter));
+       }
+       return 1;
+}
+
+/* Returns: 0 if no DB, 1 if DB as accessed, -1 on error */
+int fts_flatcurve_database_terms(bool headers, const char *path,
+                                HASH_TABLE_TYPE(term_counter) *terms,
+                                const char **error_r)
+{
+       struct flatcurve_fts_backend backend;
+
+       i_zero(&backend);
+       backend.pool = pool_alloconly_create("doveadm-" FTS_FLATCURVE_LABEL, 1024);
+       backend.db_path = str_new_const(backend.pool, path, strlen(path));
+       backend.event = event_create(NULL);
+       fts_flatcurve_xapian_init(&backend);
+
+       int ret = fts_flatcurve_database_terms_fetch(
+               headers, &backend, terms, error_r);
+
+       fts_flatcurve_xapian_deinit(&backend);
+       event_unref(&backend.event);
+       pool_unref(&backend.pool);
+
+       return ret;
+}
+
+void fts_flatcurve_xapian_set_mailbox(struct flatcurve_fts_backend *backend)
+{
+       event_set_append_log_prefix(backend->event, p_strdup_printf(
+               backend->xapian->pool, FTS_FLATCURVE_LABEL "(%s): ",
+               str_c(backend->boxname)));
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_check_db_version(struct flatcurve_fts_backend *backend,
+                                     struct flatcurve_xapian_db *xdb,
+                                     const char **error_r)
+{
+       static const enum flatcurve_xapian_wdb wopts =
+               ENUM_EMPTY(flatcurve_xapian_wdb);
+
+       Xapian::Database *db = (xdb->dbw == NULL) ? xdb->db : xdb->dbw;
+
+       std::string str = db->get_metadata(FLATCURVE_XAPIAN_DB_VERSION_KEY);
+       const char* value = str.c_str();
+       unsigned int ver = 0;
+       if (*value != '\0' && str_to_uint(value, &ver) < 0)
+               e_error(backend->event,
+                       "unexpected Xapian db version '%s' in %s",
+                       value, str_c(backend->db_path));
+
+       if (ver == FLATCURVE_XAPIAN_DB_VERSION)
+               return 0;
+
+       /* If we need to upgrade DB, and this is NOT the write DB, open the
+        * write DB, do the changes there, and reopen the read DB. */
+       if (xdb->dbw == NULL) {
+               if (fts_flatcurve_xapian_write_db_get(
+                       backend, xdb, wopts, error_r) < 0)
+                       return -1;
+               if (fts_flatcurve_xapian_close_db(
+                       backend, xdb, FLATCURVE_XAPIAN_DB_CLOSE_WDB, error_r) < 0)
+                       return -1;
+               (void)xdb->db->reopen();
+               return 0;
+        }
+
+       /* 0->1: Added DB version. Always implicity update version when we
+        * upgrade (done at end of this function). */
+       if (ver == 0) ++ver;
+       T_BEGIN {
+               xdb->dbw->set_metadata(FLATCURVE_XAPIAN_DB_VERSION_KEY,
+                                      dec2str(ver));
+       } T_END;
+
+       /* Commit the changes now. */
+       try {
+               xdb->dbw->commit();
+               return 0;
+       }
+       catch(Xapian::Error &e) {
+               e_error(backend->event,
+                       "Xapian::Error on '%s': %s",
+                       str_c(backend->db_path),
+                       e.get_description().c_str());
+               return -1;
+       }
+}
+
+/* Requires read DB to have been opened
+ * Returns: 0 not found, 1 if found, -1 on error */
+static int
+fts_flatcurve_xapian_uid_exists_db(struct flatcurve_fts_backend *backend,
+                                  uint32_t uid,
+                                  struct flatcurve_xapian_db **xdb_r,
+                                  const char **error_r)
+{
+       void *key, *val;
+       int ret = 0;
+        struct hash_iterate_context *iter =
+               hash_table_iterate_init(backend->xapian->dbs);
+
+        while (hash_table_iterate(iter, backend->xapian->dbs, &key, &val)) {
+               try {
+                       struct flatcurve_xapian_db *xdb =
+                               (struct flatcurve_xapian_db *)val;
+                       (void)xdb->db->get_document(uid);
+                       if (xdb_r != NULL) *xdb_r = xdb;
+                       ret = 1;
+                       break;
+               }
+               catch (Xapian::DocNotFoundError &e) {
+                       continue;
+               }
+               catch (Xapian::Error &e) {
+                       *error_r = t_strdup(e.get_description().c_str());
+                       ret = -1;
+                       break;
+               }
+       }
+       hash_table_iterate_deinit(&iter);
+       return ret;
+}
+
+/* Returns: 0 if no DBs, 1 if DB exists, -1 on error */
+static int
+fts_flatcurve_xapian_write_db_by_uid(struct flatcurve_fts_backend *backend,
+                                    uint32_t uid,
+                                    struct flatcurve_xapian_db **xdb_r,
+                                    const char **error_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               ENUM_EMPTY(flatcurve_xapian_db_opts);
+       static const enum flatcurve_xapian_wdb wopts =
+               ENUM_EMPTY(flatcurve_xapian_wdb);
+
+       if (fts_flatcurve_xapian_read_db(backend, opts, NULL, error_r) < 0)
+               return -1;
+
+       struct flatcurve_xapian_db *xdb;
+       int ret = fts_flatcurve_xapian_uid_exists_db(backend, uid, &xdb, error_r);
+       if (ret <= 0)
+               return ret;
+
+       if (fts_flatcurve_xapian_write_db_get(
+               backend, xdb, wopts, error_r) < 0)
+               return -1;
+
+       *xdb_r = xdb;
+       return 1;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_check_commit_limit(struct flatcurve_fts_backend *backend,
+                                       struct flatcurve_xapian_db *xdb,
+                                       const char **error_r)
+{
+       struct fts_flatcurve_user *fuser = backend->fuser;
+       struct flatcurve_xapian *x = backend->xapian;
+
+       ++x->doc_updates;
+       ++xdb->changes;
+
+       if (xdb->type == FLATCURVE_XAPIAN_DB_TYPE_CURRENT &&
+           fuser->set.rotate_count > 0 &&
+           xdb->dbw->get_doccount() >= fuser->set.rotate_count) {
+               return fts_flatcurve_xapian_close_db(
+                       backend, xdb, FLATCURVE_XAPIAN_DB_CLOSE_ROTATE, error_r);
+       }
+
+       if (fuser->set.commit_limit > 0 &&
+           x->doc_updates >= fuser->set.commit_limit) {
+               e_debug(backend->event,
+                       "Committing DB as update limit was reached; limit=%d",
+                       fuser->set.commit_limit);
+               return fts_flatcurve_xapian_close_dbs(
+                       backend, FLATCURVE_XAPIAN_DB_CLOSE_WDB_COMMIT, error_r);
+       }
+
+       return 0;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_clear_document(struct flatcurve_fts_backend *backend,
+                                   const char **error_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               ENUM_EMPTY(flatcurve_xapian_db_opts);
+
+       struct flatcurve_xapian *x = backend->xapian;
+
+       if (x->doc == NULL)
+               return 0;
+
+       struct flatcurve_xapian_db *xdb;
+       int ret = fts_flatcurve_xapian_write_db_current(
+                       backend, opts, &xdb, error_r);
+       if (ret <= 0)
+               return ret;
+
+       ret = 0;
+       try {
+               xdb->dbw->replace_document(x->doc_uid, *x->doc);
+       } catch (std::bad_alloc &b) {
+               i_fatal_status(FATAL_OUTOFMEM,
+                       "Out of memory when indexing mail (%s); UID=%d "
+                       "(Hint: increase indexing process vsz_limit or "
+                       "define smaller commit limit value in "
+                       "plugin { fts_flatcurve_commit_limit = ...})",
+                       b.what(), x->doc_uid);
+       } catch (Xapian::Error &e) {
+               *error_r = t_strdup_printf(
+                       "Could not write message data: uid=%u; %s",
+                       x->doc_uid,
+                       e.get_description().c_str());
+               ret = -1;
+       }
+
+       if (x->doc_created)
+               delete(x->doc);
+       x->doc = NULL;
+       x->doc_created = FALSE;
+       x->doc_uid = 0;
+
+       if (ret < 0)
+               return -1;
+
+       return fts_flatcurve_xapian_check_commit_limit(backend, xdb, error_r);
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_close_db(struct flatcurve_fts_backend *backend,
+                             struct flatcurve_xapian_db *xdb,
+                             enum flatcurve_xapian_db_close opts,
+                             const char **error_r)
+{
+       struct flatcurve_xapian *x = backend->xapian;
+
+       if (fts_flatcurve_xapian_clear_document(backend, error_r) < 0)
+               return -1;
+
+       struct timeval start;
+       i_gettimeofday(&start);
+
+       bool commit = FALSE;
+       if (xdb->dbw != NULL) {
+               if (HAS_ANY_BITS(opts, FLATCURVE_XAPIAN_DB_CLOSE_WDB |
+                                      FLATCURVE_XAPIAN_DB_CLOSE_MBOX)) {
+                       int ret = 0;
+                       try {
+                               /* even if xapian documetation states that close
+                               auto-commits, GlassWritableDatabase::close() can
+                               fail to actually close the db if commit fails.
+                               We explicitly commit before invoking close to
+                               have a better chance to properly clean up */
+                               xdb->dbw->commit();
+                       }
+                       catch (Xapian::Error &e) {
+                               *error_r = t_strdup(e.get_description().c_str());
+                               ret = -1;
+                       }
+                       xdb->dbw->close();
+                       delete(xdb->dbw);
+                       xdb->dbw = NULL;
+                       commit = TRUE; // mark anyway as committed
+                       if (ret < 0)
+                               return -1;
+               } else if (HAS_ANY_BITS(opts, FLATCURVE_XAPIAN_DB_CLOSE_WDB_COMMIT |
+                                             FLATCURVE_XAPIAN_DB_CLOSE_ROTATE)) {
+                       try {
+                               xdb->dbw->commit();
+                               commit = TRUE;
+                       }
+                       catch (Xapian::Error &e) {
+                               *error_r = t_strdup(e.get_description().c_str());
+                               return -1;
+                       }
+               }
+       }
+
+       bool rotate = FALSE;
+       if (commit) {
+               struct timeval now;
+               i_gettimeofday(&now);
+               int elapsed = timeval_diff_msecs(&now, &start);
+               if (xdb->changes > 0)
+                       e_debug(backend->event, "Committed %u changes to DB "
+                               "(RW, %s) in %u.%03u secs", xdb->changes,
+                               xdb->dbpath->fname, elapsed / 1000, elapsed % 1000);
+
+               xdb->changes = 0;
+               x->doc_updates = 0;
+
+               if (xdb->type == FLATCURVE_XAPIAN_DB_TYPE_CURRENT) {
+                       if (HAS_ALL_BITS(opts, FLATCURVE_XAPIAN_DB_CLOSE_ROTATE) ||
+                               (backend->fuser->set.rotate_time > 0 &&
+                                elapsed > backend->fuser->set.rotate_time))
+                               rotate = TRUE;
+               }
+       }
+
+
+       if (rotate) {
+               const char *error;
+               if (fts_flatcurve_xapian_lock(backend, &error) < 0)
+                       e_error(backend->event, "%s", error);
+               else {
+                       const char *fname = p_strdup(x->pool, xdb->dbpath->fname);
+                       enum flatcurve_xapian_db_close flags =
+                               (enum flatcurve_xapian_db_close)
+                               (opts & FLATCURVE_XAPIAN_DB_CLOSE_MBOX);
+                       if (fts_flatcurve_xapian_create_current(
+                               backend, flags, &error) < 0)
+                               e_error(backend->event, "Error rotating DB (%s)",
+                                       xdb->dbpath->fname);
+                       else
+                               e_debug(event_create_passthrough(backend->event)->
+                                       set_name("fts_flatcurve_rotate")->
+                                       add_str("mailbox", str_c(backend->boxname))->
+                                       event(),
+                                       "Rotating index (from: %s, to: %s)", fname,
+                                       xdb->dbpath->fname);
+
+                       fts_flatcurve_xapian_unlock(backend);
+               }
+       }
+
+       if (xdb->db != NULL &&
+           HAS_ANY_BITS(opts, FLATCURVE_XAPIAN_DB_CLOSE_DB |
+                              FLATCURVE_XAPIAN_DB_CLOSE_MBOX)) {
+               delete(xdb->db);
+               xdb->db = NULL;
+       }
+
+       return 0;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_close_dbs(struct flatcurve_fts_backend *backend,
+                              enum flatcurve_xapian_db_close opts,
+                              const char **error_r)
+{
+       struct hash_iterate_context *iter;
+       void *key, *val;
+       struct flatcurve_xapian *x = backend->xapian;
+
+       const char *error, *last_error = NULL;
+       iter = hash_table_iterate_init(x->dbs);
+       while (hash_table_iterate(iter, x->dbs, &key, &val)) {
+               struct flatcurve_xapian_db *db =
+                       (struct flatcurve_xapian_db *)val;
+               if (fts_flatcurve_xapian_close_db(backend, db, opts, &error) < 0) {
+                       if (last_error != NULL)
+                               e_error(backend->event, "%s", last_error);
+                       last_error = error;
+               }
+       }
+       hash_table_iterate_deinit(&iter);
+       if (last_error != NULL) {
+               *error_r = last_error;
+               return -1;
+       }
+       return 0;
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_refresh(struct flatcurve_fts_backend *backend,
+                                const char **error_r)
+{
+       return fts_flatcurve_xapian_close_dbs(
+               backend, FLATCURVE_XAPIAN_DB_CLOSE_WDB, error_r);
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_close(struct flatcurve_fts_backend *backend,
+                              const char **error_r)
+{
+       struct flatcurve_xapian *x = backend->xapian;
+       int ret = fts_flatcurve_xapian_close_dbs(
+               backend, FLATCURVE_XAPIAN_DB_CLOSE_MBOX, error_r);
+
+       hash_table_clear(x->dbs, TRUE);
+
+       x->lock_path = NULL;
+       x->dbw_current = NULL;
+       x->shards = 0;
+
+       if (x->db_read != NULL) {
+               x->db_read->close();
+               delete(x->db_read);
+               x->db_read = NULL;
+       }
+
+       p_clear(x->pool);
+       return ret;
+}
+
+static uint32_t
+fts_flatcurve_xapian_get_last_uid_query(struct flatcurve_fts_backend *backend,
+                                       Xapian::Database *db)
+{
+       Xapian::Enquire enquire(*db);
+       Xapian::MSet m;
+
+       enquire.set_docid_order(Xapian::Enquire::DESCENDING);
+       enquire.set_query(Xapian::Query::MatchAll);
+
+       m = enquire.get_mset(0, 1);
+       return (m.empty())
+               ? 0 : m.begin().get_document().get_docid();
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_get_last_uid(struct flatcurve_fts_backend *backend,
+                                     uint32_t *last_uid_r, const char **error_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               (enum flatcurve_xapian_db_opts)
+                       (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+                        FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+
+       Xapian::Database *db;
+       int ret = fts_flatcurve_xapian_read_db(backend, opts, &db, error_r);
+       if (ret < 0)
+               return ret;
+
+       if (ret == 0) {
+               *last_uid_r = 0;
+               return 0;
+       }
+
+       try {
+               /* Optimization: if last used ID still exists in  mailbox,
+                * this is a cheap call. */
+               *last_uid_r = db->get_document(db->get_lastdocid()).get_docid();
+               return 0;
+       } catch (Xapian::DocNotFoundError &e) {
+               /* Last used Xapian ID is no longer in the DB. Need
+                       * to do a manual search for the last existing ID. */
+               *last_uid_r = fts_flatcurve_xapian_get_last_uid_query(backend, db);
+               return 0;
+       } catch (Xapian::InvalidArgumentError &e) {
+               *last_uid_r = 0;
+               return 0;
+       }
+}
+
+/* Returns: 0 not found, 1 if found, -1 on error */
+int fts_flatcurve_xapian_uid_exists(struct flatcurve_fts_backend *backend,
+                                   uint32_t uid, const char **error_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               (enum flatcurve_xapian_db_opts)
+                       (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+                        FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+
+       if (fts_flatcurve_xapian_read_db(backend, opts, NULL, error_r) <= 0)
+               return -1;
+       return fts_flatcurve_xapian_uid_exists_db(backend, uid, NULL, error_r);
+}
+
+/* Returns: 0 not found, 1 deleted, -1 on error */
+int fts_flatcurve_xapian_expunge(struct flatcurve_fts_backend *backend,
+                                uint32_t uid, const char **error_r)
+{
+       struct flatcurve_xapian_db *xdb;
+
+       if (fts_flatcurve_xapian_write_db_by_uid(
+               backend, uid, &xdb, error_r) <= 0) {
+               e_debug(backend->event, "Expunge failed uid=%u; UID not found",
+                       uid);
+               return 0;
+       }
+
+       try {
+               xdb->dbw->delete_document(uid);
+               if (fts_flatcurve_xapian_check_commit_limit(
+                       backend, xdb, error_r) < 0)
+                       return -1;
+               return 1;
+       } catch (Xapian::Error &e) {
+               *error_r = t_strdup_printf(
+                       "Failed to expunge uid=%u: %s",
+                       uid, e.get_description().c_str());
+               return -1;
+       }
+}
+
+/* Returns: TBD 0 not found, 1 deleted, -1 on error */
+int
+fts_flatcurve_xapian_init_msg(struct flatcurve_fts_backend_update_context *ctx,
+                             const char **error_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               ENUM_EMPTY(flatcurve_xapian_db_opts);
+
+       struct flatcurve_xapian *x = ctx->backend->xapian;
+       struct flatcurve_xapian_db *xdb;
+
+       if (ctx->uid == x->doc_uid)
+               /* already indexed, nothing else to do */
+               return 1;
+
+       if (fts_flatcurve_xapian_clear_document(ctx->backend, error_r) < 0)
+               return -1;
+
+       int ret = fts_flatcurve_xapian_write_db_current(
+               ctx->backend, opts, &xdb, error_r);
+       if (ret <= 0)
+               /* error or x->dbw_current == NULL */
+               return ret;
+       try {
+               (void)xdb->dbw->get_document(ctx->uid);
+               /* document already existed */
+               return 0;
+       } catch (Xapian::DocNotFoundError &e) {
+               x->doc = new Xapian::Document();
+               x->doc_created = TRUE;
+               x->doc_uid = ctx->uid;
+               /* document did not exist */
+               return 1;
+       } catch (Xapian::Error &e) {
+               ctx->ctx.failed = TRUE;
+               *error_r = t_strdup(e.get_description().c_str());
+               return -1;
+       }
+}
+
+int
+fts_flatcurve_xapian_index_header(struct flatcurve_fts_backend_update_context *ctx,
+                                 const unsigned char *data, size_t size,
+                                 const char **error_r)
+{
+       struct fts_flatcurve_user *fuser = ctx->backend->fuser;
+       struct flatcurve_xapian *x = ctx->backend->xapian;
+
+       int ret = fts_flatcurve_xapian_init_msg(ctx, error_r);
+       if (ret <= 0)
+               return ret;
+
+       T_BEGIN {
+               char *hdr_name =
+                       str_lcase(t_strdup_noconst(str_c(ctx->hdr_name)));
+
+               if (*hdr_name != '\0')
+                       x->doc->add_boolean_term(t_strdup_printf(
+                               FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX
+                               "%s", hdr_name));
+
+               if (ctx->indexed_hdr)
+                       hdr_name = str_ucase(hdr_name);
+
+               i_assert(uni_utf8_data_is_valid(data, size));
+               const unsigned char *end = data + size;
+               for(; end > data; data += uni_utf8_char_bytes((unsigned char) *data)) {
+                       size_t len = end - data;
+                       if (len == 0 || len < fuser->set.min_term_size)
+                               break;
+
+                       T_BEGIN {
+                               x->doc->add_term(t_strdup_printf(
+                                       FLATCURVE_XAPIAN_ALL_HEADERS_PREFIX
+                                       "%s", data));
+                               if (ctx->indexed_hdr)
+                                       x->doc->add_term(t_strdup_printf(
+                                               FLATCURVE_XAPIAN_HEADER_PREFIX
+                                               "%s%s", hdr_name,
+                                               (const char*) data));
+                       } T_END;
+
+                       if (!fuser->set.substring_search)
+                               break;
+               }
+       } T_END;
+       return 1;
+}
+
+int
+fts_flatcurve_xapian_index_body(struct flatcurve_fts_backend_update_context *ctx,
+                               const unsigned char *data, size_t size,
+                               const char **error_r)
+{
+       struct fts_flatcurve_user *fuser = ctx->backend->fuser;
+       struct flatcurve_xapian *x = ctx->backend->xapian;
+
+       int ret = fts_flatcurve_xapian_init_msg(ctx, error_r);
+       if (ret <= 0)
+               return ret;
+
+       i_assert(uni_utf8_data_is_valid(data, size));
+       const unsigned char *end = data + size;
+       for(; end > data; data += uni_utf8_char_bytes((unsigned char) *data)) {
+               size_t len = end - data;
+               if (len == 0 || len < fuser->set.min_term_size)
+                       break;
+
+               x->doc->add_term((const char*) data);
+               if (!fuser->set.substring_search)
+                       break;
+       }
+       return 1;
+}
+
+/* Returns: 0 if index doesn't exist, 1 on deletion, -1 on error */
+int fts_flatcurve_xapian_delete_index(struct flatcurve_fts_backend *backend,
+                                     const char **error_r)
+{
+       const char *error;
+       int ret = fts_flatcurve_xapian_close(backend, error_r);
+       if (fts_flatcurve_xapian_delete(backend, NULL, &error) < 0) {
+               if (ret < 0)
+                       e_error(backend->event, "%s", error);
+               else
+                       *error_r = error;
+               ret = -1;
+       }
+       return ret;
+}
+
+#ifdef XAPIAN_HAS_COMPACT
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_optimize_rebuild(struct flatcurve_fts_backend *backend,
+                                     Xapian::Database *db,
+                                     struct flatcurve_xapian_db_path *path,
+                                     const char **error_r)
+{
+       struct flatcurve_xapian *x = backend->xapian;
+
+       /* Create the optimize shard. */
+       struct flatcurve_xapian_db *xdb =
+               p_new(x->pool, struct flatcurve_xapian_db, 1);
+       xdb->dbpath = path;
+       xdb->type = FLATCURVE_XAPIAN_DB_TYPE_OPTIMIZE;
+
+       if (fts_flatcurve_xapian_write_db_get(
+               backend, xdb, FLATCURVE_XAPIAN_WDB_CREATE, error_r) < 0)
+               return -1;
+
+       Xapian::Enquire enquire(*db);
+       enquire.set_docid_order(Xapian::Enquire::ASCENDING);
+       enquire.set_query(Xapian::Query::MatchAll);
+
+       Xapian::MSet mset = enquire.get_mset(0, db->get_doccount());
+       Xapian::MSetIterator iter = mset.begin();
+
+       unsigned int updates = 0;
+       for (iter = mset.begin(); iter != mset.end(); ++iter) {
+               Xapian::Document doc = iter.get_document();
+               try {
+                       xdb->dbw->replace_document(doc.get_docid(), doc);
+                       if (++updates > FLATCURVE_MANUAL_OPTIMIZE_COMMIT_LIMIT) {
+                               xdb->dbw->commit();
+                               updates = 0;
+                       }
+               } catch (Xapian::Error &e) {
+                       *error_r = t_strdup(e.get_description().c_str());
+                       return -1;
+               }
+       }
+
+       return fts_flatcurve_xapian_close_db(
+                       backend, xdb, FLATCURVE_XAPIAN_DB_CLOSE_WDB, error_r);
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_optimize_box_do(struct flatcurve_fts_backend *backend,
+                                    Xapian::Database *db, const char **error_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               ENUM_EMPTY(flatcurve_xapian_db_opts);
+       static const enum flatcurve_xapian_wdb wopts =
+               ENUM_EMPTY(flatcurve_xapian_wdb);
+
+       struct flatcurve_xapian *x = backend->xapian;
+
+       /* We need to lock all of the mailboxes so nothing changes while we
+        * are optimizing. */
+
+       void *key, *val;
+       struct hash_iterate_context *hiter = hash_table_iterate_init(x->dbs);
+       while (hash_table_iterate(hiter, x->dbs, &key, &val)) {
+               struct flatcurve_xapian_db *db = (struct flatcurve_xapian_db *)val;
+               if (fts_flatcurve_xapian_write_db_get(
+                       backend, db, wopts, error_r) < 0)
+                       return -1;
+       }
+       hash_table_iterate_deinit(&hiter);
+
+       /* Create the optimize target. */
+       struct flatcurve_xapian_db_path *oldpath =
+               fts_flatcurve_xapian_create_db_path(
+                       backend, FLATCURVE_XAPIAN_DB_OPTIMIZE);
+       if (fts_flatcurve_xapian_delete(backend, oldpath, error_r) < 0)
+               return -1;
+
+       struct timeval start;
+       i_gettimeofday(&start);
+
+       bool failed = FALSE;
+       try {
+               (void)db->reopen();
+               db->compact(oldpath->path, Xapian::DBCOMPACT_NO_RENUMBER |
+                                       Xapian::DBCOMPACT_MULTIPASS |
+                                       Xapian::Compactor::FULLER);
+       } catch (Xapian::InvalidOperationError &e) {
+               /* This exception is not as specific as it could be...
+                * but the likely reason it happens is due to
+                * Xapian::DBCOMPACT_NO_RENUMBER and shards having disjoint
+                * ranges of UIDs (e.g. shard 1 = 1..2, shard 2 = 2..3).
+                * Xapian, as of 1.4.18, cannot handle this situation.
+                * Since we will never be able to compact this data unless
+                * we do something about it, the options are either:
+                *   1) delete the index totally and start fresh (not great
+                *      for large mailboxes), or
+                *   2) to incrementally build the optimized DB by walking
+                *      through all DBs and copying, ignoring duplicate
+                *      documents.
+                * Let's try to be awesome and do the latter. */
+               failed = fts_flatcurve_xapian_optimize_rebuild(
+                               backend, db, oldpath, error_r) < 0;
+               if (!failed)
+                       e_debug(backend->event, "Native optimize failed, "
+                               "falling back to manual optimization; %s",
+                               e.get_description().c_str());
+       } catch (Xapian::Error &e) {
+               *error_r = t_strdup(e.get_description().c_str());
+               failed = TRUE;
+       }
+       if (failed) {
+               e_error(backend->event, "Optimize failed: %s", *error_r);
+               return 0;
+       }
+
+       struct flatcurve_xapian_db_path *newpath =
+               p_new(x->pool, struct flatcurve_xapian_db_path, 1);
+       newpath->fname = p_strdup(x->pool, oldpath->fname);
+       newpath->path = p_strdup(x->pool, oldpath->path);
+
+       /* Delete old indexes. */
+       struct flatcurve_xapian_db_iter *iter =
+               fts_flatcurve_xapian_db_iter_init(backend, opts);
+
+       int ret = 0;
+       while (fts_flatcurve_xapian_db_iter_next(iter)) {
+               if (iter->type != FLATCURVE_XAPIAN_DB_TYPE_OPTIMIZE &&
+                   iter->type != FLATCURVE_XAPIAN_DB_TYPE_LOCK) {
+                       if (fts_flatcurve_xapian_delete(
+                               backend, iter->path, error_r) < 0) {
+                               ret = -1;
+                               break;
+                       }
+               }
+       }
+       const char *error;
+       if (fts_flatcurve_xapian_db_iter_deinit(&iter, &error) < 0) {
+               if (ret < 0)
+                       e_error(backend->event, "%s", error);
+               else
+                       *error_r = error;
+               ret = -1;
+       }
+       if (ret < 0)
+               return -1;
+
+       /* Rename optimize index to an active index. */
+       if (fts_flatcurve_xapian_rename_db(backend, newpath, NULL, error_r) < 0 ||
+           fts_flatcurve_xapian_delete(backend, oldpath, error_r) < 0)
+               return -1;
+
+       struct timeval now;
+       i_gettimeofday(&now);
+       int elapsed = timeval_diff_msecs(&now, &start);
+       e_debug(backend->event, "Optimized DB in %u.%03u secs",
+                               elapsed / 1000, elapsed % 1000);
+
+       return 0;
+}
+#endif
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_optimize_box(struct flatcurve_fts_backend *backend,
+                                     const char **error_r)
+{
+#ifdef XAPIAN_HAS_COMPACT
+       static const enum flatcurve_xapian_db_opts opts =
+               (enum flatcurve_xapian_db_opts)
+                       (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+                        FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+
+       Xapian::Database *db;
+       int ret;
+       if ((ret = fts_flatcurve_xapian_read_db(
+               backend, opts, &db, error_r)) <= 0)
+               return ret;
+
+       if (backend->xapian->deinit &&
+           !fts_flatcurve_xapian_need_optimize(backend)) {
+               return fts_flatcurve_xapian_close(backend, error_r);
+       }
+
+       e_debug(event_create_passthrough(backend->event)->
+               set_name("fts_flatcurve_optimize")->
+               add_str("mailbox", str_c(backend->boxname))->event(),
+               "Optimizing");
+
+       ret = 0;
+       if (fts_flatcurve_xapian_lock(backend, error_r) < 0 ||
+           fts_flatcurve_xapian_optimize_box_do(backend, db, error_r) < 0)
+               ret = -1;
+
+       const char *error;
+       if (fts_flatcurve_xapian_close(backend, &error) < 0) {
+               if (ret < 0)
+                       e_error(backend->event, "%s", error);
+               else
+                       *error_r = error;
+               ret = -1;
+       }
+       fts_flatcurve_xapian_unlock(backend);
+       return ret;
+#else
+       return 0;
+#endif
+}
+
+static void
+fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
+                                  struct mail_search_arg *arg,
+                                  const char *term)
+{
+       struct flatcurve_fts_query_xapian *x = query->xapian;
+
+       struct flatcurve_fts_query_arg *qarg = array_append_space(&x->args);
+       qarg->value = str_new(query->pool, 64);
+
+       /* Absence of NOT or AND flags means an OR search. */
+       if (arg->match_not)
+               qarg->is_not = TRUE;
+       if ((query->flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0)
+               qarg->is_and = TRUE;
+
+       switch (arg->type) {
+       case SEARCH_TEXT:
+               x->qp->add_prefix(FLATCURVE_XAPIAN_ALL_HEADERS_QP,
+                                 FLATCURVE_XAPIAN_ALL_HEADERS_PREFIX);
+               str_printfa(qarg->value, "(%s:%s OR %s:%s)",
+                           FLATCURVE_XAPIAN_ALL_HEADERS_QP, term,
+                           FLATCURVE_XAPIAN_BODY_QP, term);
+               break;
+
+       case SEARCH_BODY:
+               str_printfa(qarg->value, "%s:%s",
+                           FLATCURVE_XAPIAN_BODY_QP, term);
+               break;
+
+       case SEARCH_HEADER:
+       case SEARCH_HEADER_ADDRESS:
+       case SEARCH_HEADER_COMPRESS_LWSP:
+               if (*term != '\0') {
+                       if (fts_header_want_indexed(arg->hdr_field_name)) {
+                               string_t *hdr = str_new(query->pool, 32);
+                               str_printfa(hdr, "%s%s",
+                                           FLATCURVE_XAPIAN_HEADER_QP,
+                                           t_str_lcase(arg->hdr_field_name));
+                               string_t *hdr2 = str_new(query->pool, 32);
+                               str_printfa(hdr2, "%s%s",
+                                           FLATCURVE_XAPIAN_HEADER_PREFIX,
+                                           t_str_ucase(arg->hdr_field_name));
+                               x->qp->add_prefix(str_c(hdr), str_c(hdr2));
+                               str_printfa(qarg->value, "%s:%s", str_c(hdr),
+                                           term);
+                       } else {
+                               x->qp->add_prefix(
+                                       FLATCURVE_XAPIAN_ALL_HEADERS_QP,
+                                       FLATCURVE_XAPIAN_ALL_HEADERS_PREFIX);
+                               str_printfa(qarg->value, "%s:%s",
+                                           FLATCURVE_XAPIAN_ALL_HEADERS_QP,
+                                           term);
+                               /* Non-indexed headers only match if it
+                                * appears in the general pool of header
+                                * terms for the message, not to a specific
+                                * header, so this is only a maybe match. */
+                               query->maybe = TRUE;
+                       }
+               } else {
+                       x->qp->add_boolean_prefix(
+                               FLATCURVE_XAPIAN_HEADER_BOOL_QP,
+                               FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX);
+                       str_printfa(qarg->value, "%s:%s",
+                                   FLATCURVE_XAPIAN_HEADER_BOOL_QP,
+                                   t_str_lcase(arg->hdr_field_name));
+               }
+               break;
+       default:
+               i_unreached();
+       }
+}
+
+static void
+fts_flatcurve_build_query_arg(struct flatcurve_fts_query *query,
+                             struct mail_search_arg *arg)
+{
+       struct flatcurve_fts_query_xapian *x = query->xapian;
+
+       if (arg->no_fts)
+               return;
+
+       switch (arg->type) {
+       case SEARCH_TEXT:
+       case SEARCH_BODY:
+       case SEARCH_HEADER:
+       case SEARCH_HEADER_ADDRESS:
+       case SEARCH_HEADER_COMPRESS_LWSP:
+               /* Valid search term. Set match_always, as required by FTS
+                * API, to avoid this argument being looked up later via
+                * regular search code. */
+               arg->match_always = TRUE;
+               break;
+
+       case SEARCH_MAILBOX:
+               /* doveadm will pass this through in 'doveadm search'
+                * commands with a 'mailbox' search argument. The code has
+                * already handled setting the proper mailbox by this point
+                * so just ignore this. */
+               return;
+
+       case SEARCH_OR:
+       case SEARCH_SUB:
+               /* FTS API says to ignore these. */
+               return;
+
+       default:
+               /* We should never get here - this is a search argument that
+                * we don't understand how to handle that has leaked to this
+                * point. For performance reasons, we will ignore this
+                * argument and err on the side of returning too many
+                * results (rather than falling back to slow, manual
+                * search). */
+               return;
+       }
+
+       if (*arg->value.str == '\0') {
+               /* This is an existence search. */
+               fts_flatcurve_build_query_arg_term(query, arg, "");
+               return;
+       }
+
+       /* Prepare search term. Phrase searching is not supported
+               * natively (FTS core provides index terms without positional
+               * context) so we can only do single term searching with
+               * Xapian. Therefore, if we do see a multi-term search, break
+               * it apart and do a maybe query. */
+       const char *const *parts = t_strsplit_spaces(arg->value.str, " ");
+       unsigned int count = str_array_length(parts);
+       if (count > 1)
+               query->maybe = TRUE;
+
+       for (unsigned int index = 0; index < count; index++, parts++) {
+               /* For phrase searches, we only add wildcard to the
+                       * last term. */
+               const char *term = (index + 1) == count ?
+                       t_strconcat(*parts, "*", NULL) : *parts;
+
+               fts_flatcurve_build_query_arg_term(query, arg, term);
+
+               /* We need to AND search all phrase terms. */
+               if (count > 1) {
+                       struct flatcurve_fts_query_arg *qarg =
+                               array_back_modifiable(&x->args);
+                       qarg->is_and = TRUE;
+               }
+       }
+}
+
+static void
+fts_flatcurve_xapian_build_query_deinit(struct flatcurve_fts_query *query)
+{
+       array_free(&query->xapian->args);
+       delete(query->xapian->qp);
+}
+
+void
+fts_flatcurve_xapian_build_query_match_all(struct flatcurve_fts_query *query)
+{
+       struct flatcurve_fts_query_xapian *x = query->xapian =
+               p_new(query->pool, struct flatcurve_fts_query_xapian, 1);
+       query->qtext = str_new_const(query->pool, "[Match All]", 11);
+       x->query = new Xapian::Query(Xapian::Query::MatchAll);
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_build_query(struct flatcurve_fts_query *query,
+                                    const char **error_r)
+{
+       struct flatcurve_fts_query_xapian *x = query->xapian =
+               p_new(query->pool, struct flatcurve_fts_query_xapian, 1);
+       p_array_init(&x->args, query->pool, 4);
+
+       x->qp = new Xapian::QueryParser();
+       x->qp->add_prefix(FLATCURVE_XAPIAN_BODY_QP, "");
+       x->qp->set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
+
+       struct mail_search_arg *args;
+       for (args = query->args; args != NULL ; args = args->next)
+               fts_flatcurve_build_query_arg(query, args);
+
+       /* Empty Query. Optimize by not creating a query and returning no
+        * results when we go through the iteration later. */
+       if (array_is_empty(&x->args)) {
+               fts_flatcurve_xapian_build_query_deinit(query);
+               return 0;
+       }
+
+       std::string str;
+       const struct flatcurve_fts_query_arg *arg, *prev;
+       /* Generate the query. */
+       prev = NULL;
+       array_foreach(&x->args, arg) {
+               if (arg->is_not) {
+                       if (prev != NULL)
+                               str += " ";
+                       str += "NOT ";
+               }
+               if (arg->is_not || (prev == NULL)) {
+                       str += str_c(arg->value);
+               } else if (!str_equals(arg->value, prev->value)) {
+                       if (arg->is_and)
+                               str += " AND ";
+                       else
+                               str += " OR ";
+                       str += str_c(arg->value);
+               }
+               prev = arg;
+       }
+
+       query->qtext = str_new(query->pool, 64);
+       str_append(query->qtext, str.c_str());
+
+       int ret = 0;
+       try {
+               x->query = new Xapian::Query(x->qp->parse_query(
+                       str,
+                       Xapian::QueryParser::FLAG_BOOLEAN |
+                       Xapian::QueryParser::FLAG_PHRASE |
+                       Xapian::QueryParser::FLAG_PURE_NOT |
+                       Xapian::QueryParser::FLAG_WILDCARD
+               ));
+       } catch (Xapian::QueryParserError &e) {
+               *error_r = t_strdup_printf(
+                       "Parsing query failed (query: %s); %s",
+                       str.c_str(), e.get_description().c_str());
+               ret = -1;
+       }
+
+       fts_flatcurve_xapian_build_query_deinit(query);
+       return ret;
+}
+
+struct fts_flatcurve_xapian_query_iter *
+fts_flatcurve_xapian_query_iter_init(struct flatcurve_fts_query *query)
+{
+       struct fts_flatcurve_xapian_query_iter *iter;
+       iter = new fts_flatcurve_xapian_query_iter();
+       iter->query = query;
+       iter->result = p_new(query->pool,
+                            struct fts_flatcurve_xapian_query_result, 1);
+       return iter;
+}
+
+bool
+fts_flatcurve_xapian_query_iter_next(struct fts_flatcurve_xapian_query_iter *iter,
+                                    struct fts_flatcurve_xapian_query_result **result_r)
+{
+       static const enum flatcurve_xapian_db_opts opts =
+               ENUM_EMPTY(flatcurve_xapian_db_opts);
+
+       if (iter->error != NULL)
+               return FALSE;
+
+       Xapian::MSet m;
+       if (iter->enquire == NULL) {
+               if (iter->query->xapian->query == NULL)
+                       return FALSE;
+
+               const char *error;
+               int ret = fts_flatcurve_xapian_read_db(
+                       iter->query->backend, opts, &iter->db, &error);
+               if (ret < 0)
+                       iter->error = i_strdup(error);
+               if (ret <= 0)
+                       return FALSE;
+
+               iter->enquire = new Xapian::Enquire(*iter->db);
+               iter->enquire->set_docid_order(
+                               Xapian::Enquire::DONT_CARE);
+               iter->enquire->set_query(*iter->query->xapian->query);
+
+               try {
+                       m = iter->enquire->get_mset(0, iter->db->get_doccount());
+               } catch (Xapian::DatabaseModifiedError &e) {
+                       /* Per documentation, this is only thrown if more than
+                        * one change has been made to the database. To
+                        * resolve you need to reopen the DB (Xapian can
+                        * handle a single snapshot of a modified DB natively,
+                        * so this only occurs if there have been multiple
+                        * writes). However, we ALWAYS want to use the
+                        * most up-to-date version, so we have already
+                        * explicitly called reopen() above. Thus, we should
+                        * never see this exception. */
+                       i_unreached();
+               }
+
+               iter->mset_iter = m.begin();
+       }
+
+       if (iter->mset_iter == m.end())
+               return FALSE;
+
+       iter->result->score = iter->mset_iter.get_weight();
+       /* MSet docid can be an "interleaved" docid generated by
+        * Xapian::Database when handling multiple DBs at once. Instead, we
+        * want the "unique docid", which is obtained by looking at the
+        * doc id from the Document object itself. */
+       iter->result->uid = iter->mset_iter.get_document().get_docid();
+       ++iter->mset_iter;
+
+       *result_r = iter->result;
+       return TRUE;
+}
+
+/* Returns: 0 on success, -1 on error */
+int
+fts_flatcurve_xapian_query_iter_deinit(struct fts_flatcurve_xapian_query_iter **_iter,
+                                      const char **error_r)
+{
+       struct fts_flatcurve_xapian_query_iter *iter = *_iter;
+       *_iter = NULL;
+
+       p_free(iter->query->pool, iter->result);
+       if (iter->enquire != NULL)
+               delete(iter->enquire);
+
+       int ret = 0;
+       if (iter->error != NULL) {
+               *error_r = t_strdup(iter->error);
+               i_free(iter->error);
+               ret = -1;
+       }
+       delete(iter);
+       return ret;
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_run_query(struct flatcurve_fts_query *query,
+                                  struct flatcurve_fts_result *r,
+                                  const char **error_r)
+{
+       struct fts_flatcurve_xapian_query_iter *iter;
+       struct fts_flatcurve_xapian_query_result *result;
+       struct fts_score_map *score;
+
+       iter = fts_flatcurve_xapian_query_iter_init(query);
+       while (fts_flatcurve_xapian_query_iter_next(iter, &result)) {
+               seq_range_array_add(&r->uids, result->uid);
+               score = array_append_space(&r->scores);
+               score->score = (float)result->score;
+               score->uid = result->uid;
+       }
+       return fts_flatcurve_xapian_query_iter_deinit(&iter, error_r);
+}
+
+void fts_flatcurve_xapian_destroy_query(struct flatcurve_fts_query *query)
+{
+       delete(query->xapian->query);
+}
+
+const char *fts_flatcurve_xapian_library_version()
+{
+       return Xapian::version_string();
+}
diff --git a/src/plugins/fts-flatcurve/fts-backend-flatcurve-xapian.h b/src/plugins/fts-flatcurve/fts-backend-flatcurve-xapian.h
new file mode 100644 (file)
index 0000000..6ada484
--- /dev/null
@@ -0,0 +1,95 @@
+/* Copyright (c) the Dovecot authors, based on code by Michael Slusarz.
+ * See the included COPYING file */
+
+#ifndef FTS_BACKEND_FLATCURVE_XAPIAN_H
+#define FTS_BACKEND_FLATCURVE_XAPIAN_H
+
+struct fts_flatcurve_xapian_query_result {
+       double score;
+       uint32_t uid;
+};
+
+struct fts_flatcurve_xapian_db_check {
+       int errors;
+       unsigned int shards;
+};
+
+struct fts_flatcurve_xapian_db_stats {
+       int messages;
+       unsigned int shards;
+       unsigned int version;
+};
+
+HASH_TABLE_DEFINE_TYPE(term_counter, char *, void *);
+
+struct fts_flatcurve_xapian_query_iter;
+
+void fts_flatcurve_xapian_init(struct flatcurve_fts_backend *backend);
+int fts_flatcurve_xapian_refresh(struct flatcurve_fts_backend *backend,
+                                const char **error_r);
+int fts_flatcurve_xapian_close(struct flatcurve_fts_backend *backend,
+                              const char **error_r);
+void fts_flatcurve_xapian_deinit(struct flatcurve_fts_backend *backend);
+
+int fts_flatcurve_xapian_get_last_uid(struct flatcurve_fts_backend *backend,
+                                     uint32_t *last_uid_r, const char **error_r);
+/* Return -1 if DB doesn't exist, 0 if UID doesn't exist, 1 if UID exists */
+int fts_flatcurve_xapian_uid_exists(struct flatcurve_fts_backend *backend,
+                                   uint32_t uid, const char **error_r);
+int fts_flatcurve_xapian_expunge(struct flatcurve_fts_backend *backend,
+                                uint32_t uid, const char **error_r);
+int
+fts_flatcurve_xapian_init_msg(struct flatcurve_fts_backend_update_context *ctx,
+                             const char **error_r);
+int
+fts_flatcurve_xapian_index_header(struct flatcurve_fts_backend_update_context *ctx,
+                                 const unsigned char *data, size_t size,
+                                 const char **error_r);
+int
+fts_flatcurve_xapian_index_body(struct flatcurve_fts_backend_update_context *ctx,
+                               const unsigned char *data, size_t size,
+                               const char **error_r);
+int fts_flatcurve_xapian_optimize_box(struct flatcurve_fts_backend *backend,
+                                     const char **error_r);
+void
+fts_flatcurve_xapian_build_query_match_all(struct flatcurve_fts_query *query);
+int fts_flatcurve_xapian_build_query(struct flatcurve_fts_query *query,
+                                    const char **error_r);
+int fts_flatcurve_xapian_run_query(struct flatcurve_fts_query *query,
+                                  struct flatcurve_fts_result *r,
+                                  const char **error_r);
+void fts_flatcurve_xapian_destroy_query(struct flatcurve_fts_query *query);
+int fts_flatcurve_xapian_delete_index(struct flatcurve_fts_backend *backend,
+                                     const char **error_r);
+
+struct fts_flatcurve_xapian_query_iter *
+fts_flatcurve_xapian_query_iter_init(struct flatcurve_fts_query *query);
+bool
+fts_flatcurve_xapian_query_iter_next(struct fts_flatcurve_xapian_query_iter *iter,
+                                    struct fts_flatcurve_xapian_query_result **result_r);
+int
+fts_flatcurve_xapian_query_iter_deinit(struct fts_flatcurve_xapian_query_iter **_iter,
+                                      const char **error_r);
+
+int
+fts_flatcurve_xapian_mailbox_check(struct flatcurve_fts_backend *backend,
+                                  struct fts_flatcurve_xapian_db_check *check,
+                                  const char **error_r);
+int
+fts_flatcurve_xapian_mailbox_rotate(struct flatcurve_fts_backend *backend,
+                                   const char **error_r);
+int
+fts_flatcurve_xapian_mailbox_stats(struct flatcurve_fts_backend *backend,
+                                   struct fts_flatcurve_xapian_db_stats *stats,
+                                  const char **error_r);
+
+int fts_flatcurve_database_locate_dir(const char *path, const char **index_path_r,
+                                     const char **error_r);
+int fts_flatcurve_database_terms(bool headers, const char *path,
+                                HASH_TABLE_TYPE(term_counter) *terms,
+                                const char **error_r);
+
+void fts_flatcurve_xapian_set_mailbox(struct flatcurve_fts_backend *backend);
+
+const char *fts_flatcurve_xapian_library_version();
+#endif
diff --git a/src/plugins/fts-flatcurve/fts-backend-flatcurve.c b/src/plugins/fts-flatcurve/fts-backend-flatcurve.c
new file mode 100644 (file)
index 0000000..316d194
--- /dev/null
@@ -0,0 +1,720 @@
+/* Copyright (c) the Dovecot authors, based on code by Michael Slusarz.
+ * See the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "imap-util.h"
+#include "mail-storage-private.h"
+#include "mail-search-build.h"
+#include "mailbox-list-iter.h"
+#include "str.h"
+#include "time-util.h"
+#include "unlink-directory.h"
+#include "fts-backend-flatcurve.h"
+#include "fts-backend-flatcurve-xapian.h"
+
+enum fts_backend_flatcurve_action {
+       FTS_BACKEND_FLATCURVE_ACTION_OPTIMIZE,
+       FTS_BACKEND_FLATCURVE_ACTION_RESCAN
+};
+
+struct event_category event_category_fts_flatcurve = {
+       .name = FTS_FLATCURVE_LABEL,
+       .parent = &event_category_fts
+};
+
+static struct fts_backend *fts_backend_flatcurve_alloc(void)
+{
+       struct flatcurve_fts_backend *backend;
+       pool_t pool;
+
+       pool = pool_alloconly_create(FTS_FLATCURVE_LABEL " pool", 4096);
+
+       backend = i_new(struct flatcurve_fts_backend, 1);
+       backend->backend = fts_backend_flatcurve;
+       backend->pool = pool;
+
+       return &backend->backend;
+}
+
+static int
+fts_backend_flatcurve_init(struct fts_backend *_backend, const char **error_r)
+{
+       struct flatcurve_fts_backend *backend =
+               container_of(_backend, struct flatcurve_fts_backend, backend);
+       struct fts_flatcurve_user *fuser =
+               FTS_FLATCURVE_USER_CONTEXT(_backend->ns->user);
+
+       if (fuser == NULL) {
+               *error_r = "Invalid fts-flatcurve settings";
+               return -1;
+       }
+
+       backend->boxname = str_new(backend->pool, 128);
+       backend->db_path = str_new(backend->pool, 256);
+       backend->fuser = fuser;
+
+       fuser->backend = backend;
+
+       fts_flatcurve_xapian_init(backend);
+
+       backend->event = event_create(_backend->ns->user->event);
+       event_add_category(backend->event, &event_category_fts_flatcurve);
+
+       return fts_backend_flatcurve_close_mailbox(backend, error_r);
+}
+
+int
+fts_backend_flatcurve_close_mailbox(struct flatcurve_fts_backend *backend,
+                                   const char **error_r)
+{
+       int ret = 0;
+       if (str_len(backend->boxname) > 0) {
+               ret = fts_flatcurve_xapian_close(backend, error_r);
+
+               str_truncate(backend->boxname, 0);
+               str_truncate(backend->db_path, 0);
+       }
+
+       event_set_append_log_prefix(backend->event, FTS_FLATCURVE_DEBUG_PREFIX);
+       return ret;
+}
+
+static int fts_backend_flatcurve_refresh(struct fts_backend * _backend)
+{
+       const char *error;
+       struct flatcurve_fts_backend *backend =
+               (struct flatcurve_fts_backend *)_backend;
+
+       int ret = fts_flatcurve_xapian_refresh(backend, &error);
+       if (ret < 0)
+               e_error(backend->event, "%s", error);
+       return ret;
+}
+
+static void fts_backend_flatcurve_deinit(struct fts_backend *_backend)
+{
+       const char *error;
+       struct flatcurve_fts_backend *backend =
+               (struct flatcurve_fts_backend *)_backend;
+
+       int ret = fts_backend_flatcurve_close_mailbox(backend, &error);
+       fts_flatcurve_xapian_deinit(backend);
+       if (ret < 0)
+               e_error(backend->event, "%s", error);
+
+       event_unref(&backend->event);
+       pool_unref(&backend->pool);
+       i_free(backend);
+}
+
+int
+fts_backend_flatcurve_set_mailbox(struct flatcurve_fts_backend *backend,
+                                  struct mailbox *box, const char **error_r)
+{
+       const char *path;
+       struct mail_storage *storage;
+
+       if (str_len(backend->boxname) > 0 &&
+           strcasecmp(box->vname, str_c(backend->boxname)) == 0)
+               return 0;
+
+       if (fts_backend_flatcurve_close_mailbox(backend, error_r) < 0) {
+               *error_r = t_strdup_printf("Could not open mailbox: %s: %s",
+                                          box->vname, *error_r);
+               return -1;
+       }
+
+       if (mailbox_open(box) < 0 ||
+           mailbox_get_path_to(box, MAILBOX_LIST_PATH_TYPE_INDEX, &path) <= 0) {
+               *error_r = t_strdup_printf("Could not open mailbox: %s: %s",
+                                          box->vname,
+                                          mailbox_get_last_internal_error(box, NULL));
+               return -1;
+       }
+
+       str_append(backend->boxname, box->vname);
+       str_printfa(backend->db_path, "%s/%s/", path, FTS_FLATCURVE_LABEL);
+
+       storage = mailbox_get_storage(box);
+       backend->parsed_lock_method = storage->set->parsed_lock_method;
+
+       fts_flatcurve_xapian_set_mailbox(backend);
+       return 0;
+}
+
+static int
+fts_backend_flatcurve_get_last_uid(struct fts_backend *_backend,
+                                  struct mailbox *box, uint32_t *last_uid_r)
+{
+       const char *error;
+       struct flatcurve_fts_backend *backend =
+               (struct flatcurve_fts_backend *)_backend;
+
+       if (fts_backend_flatcurve_set_mailbox(backend, box, &error) < 0 ||
+           fts_flatcurve_xapian_get_last_uid(backend, last_uid_r, &error) < 0) {
+               e_error(backend->event, "%s", error);
+               return -1;
+       }
+       return 0;
+}
+
+static struct fts_backend_update_context
+*fts_backend_flatcurve_update_init(struct fts_backend *_backend)
+{
+       struct flatcurve_fts_backend *backend =
+               (struct flatcurve_fts_backend *)_backend;
+       struct flatcurve_fts_backend_update_context *ctx;
+
+       ctx = p_new(backend->pool,
+                   struct flatcurve_fts_backend_update_context, 1);
+       ctx->ctx.backend = _backend;
+       ctx->backend = backend;
+       ctx->hdr_name = str_new(backend->pool, 128);
+       i_gettimeofday(&ctx->start);
+
+       return &ctx->ctx;
+}
+
+static int
+fts_backend_flatcurve_update_deinit(struct fts_backend_update_context *_ctx)
+{
+       struct flatcurve_fts_backend_update_context *ctx =
+               (struct flatcurve_fts_backend_update_context *)_ctx;
+       int diff, ret = _ctx->failed ? -1 : 0;
+       struct timeval now;
+
+       if (ret == 0) {
+               i_gettimeofday(&now);
+               diff = timeval_diff_msecs(&now, &ctx->start);
+
+               e_debug(ctx->backend->event, "Update transaction completed in "
+                       "%u.%03u secs", diff/1000, diff%1000);
+       }
+
+       str_free(&ctx->hdr_name);
+       p_free(ctx->backend->pool, ctx);
+
+       return ret;
+}
+
+static void
+fts_backend_flatcurve_update_set_mailbox(struct fts_backend_update_context *_ctx,
+                                        struct mailbox *box)
+{
+       const char *error;
+       struct flatcurve_fts_backend_update_context *ctx =
+               (struct flatcurve_fts_backend_update_context *)_ctx;
+
+       int ret = box == NULL ?
+               fts_backend_flatcurve_close_mailbox(ctx->backend, &error) :
+               fts_backend_flatcurve_set_mailbox(ctx->backend, box, &error);
+       if (ret < 0)
+               e_error(ctx->backend->event, "%s", error);
+}
+
+static void
+fts_backend_flatcurve_update_expunge(struct fts_backend_update_context *_ctx,
+                                    uint32_t uid)
+{
+       const char *error;
+       struct flatcurve_fts_backend_update_context *ctx =
+               (struct flatcurve_fts_backend_update_context *)_ctx;
+
+       e_debug(event_create_passthrough(ctx->backend->event)->
+               set_name("fts_flatcurve_expunge")->
+               add_str("mailbox", str_c(ctx->backend->boxname))->
+               add_int("uid", uid)->event(),
+               "Expunge uid=%d", uid);
+
+       if (fts_flatcurve_xapian_expunge(ctx->backend, uid, &error) < 0)
+               e_error(ctx->backend->event, "%s", error);
+}
+
+static bool
+fts_backend_flatcurve_update_set_build_key(struct fts_backend_update_context *_ctx,
+                                          const struct fts_backend_build_key *key)
+{
+       struct flatcurve_fts_backend_update_context *ctx =
+               (struct flatcurve_fts_backend_update_context *)_ctx;
+
+       i_assert(str_len(ctx->backend->boxname) > 0);
+
+       if (_ctx->failed || ctx->skip_uid)
+               return FALSE;
+
+       bool changed = FALSE;
+       if (ctx->uid != key->uid) {
+               changed = TRUE;
+               ctx->skip_uid = FALSE;
+               ctx->uid = key->uid;
+       }
+       ctx->type = key->type;
+
+       /* Specifically init message, as there is a chance that there
+        * is no valid search info in a message so the message will
+        * not be saved to DB after processing. */
+       if (changed) {
+               const char *error;
+               int ret = fts_flatcurve_xapian_init_msg(ctx, &error);
+               if (ret < 0) {
+                       e_error(ctx->backend->event, "%s", error);
+                       return FALSE;
+               }
+               if (ret == 0) {
+                       /* This UID has already been indexed, so skip all
+                        * future update calls. */
+                       ctx->skip_uid = TRUE;
+                       return FALSE;
+               }
+
+               e_debug(event_create_passthrough(ctx->backend->event)->
+                       set_name("fts_flatcurve_index")->
+                       add_str("mailbox", str_c(ctx->backend->boxname))->
+                       add_int("uid", key->uid)->event(),
+                       "Indexing uid=%d", key->uid);
+       }
+
+       switch (key->type) {
+       case FTS_BACKEND_BUILD_KEY_HDR:
+               i_assert(key->hdr_name != NULL);
+               str_append(ctx->hdr_name, key->hdr_name);
+               ctx->indexed_hdr = fts_header_want_indexed(key->hdr_name);
+               break;
+       case FTS_BACKEND_BUILD_KEY_MIME_HDR:
+       case FTS_BACKEND_BUILD_KEY_BODY_PART:
+               /* noop */
+               break;
+       case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY:
+               i_unreached();
+       }
+       return TRUE;
+}
+
+static void
+fts_backend_flatcurve_update_unset_build_key(struct fts_backend_update_context *_ctx)
+{
+       struct flatcurve_fts_backend_update_context *ctx =
+               (struct flatcurve_fts_backend_update_context *)_ctx;
+
+       str_truncate(ctx->hdr_name, 0);
+}
+
+static int
+fts_backend_flatcurve_update_build_more(struct fts_backend_update_context *_ctx,
+                                       const unsigned char *data, size_t size)
+{
+       struct flatcurve_fts_backend_update_context *ctx =
+               (struct flatcurve_fts_backend_update_context *)_ctx;
+
+       i_assert(ctx->uid != 0);
+
+       if (_ctx->failed || ctx->skip_uid)
+               return -1;
+
+       if (size < ctx->backend->fuser->set.min_term_size)
+               return 0;
+
+       /* Xapian has a hard limit of "245 bytes", at least with the glass
+        * and chert backends.  However, it is highly doubtful that people
+        * are realistically going to search with more than 10s of
+        * characters. Therefore, limit term size (via a configurable
+        * value). */
+       size = I_MIN(size, ctx->backend->fuser->set.max_term_size);
+
+       const char *error;
+       int ret;
+       switch (ctx->type) {
+       case FTS_BACKEND_BUILD_KEY_HDR:
+       case FTS_BACKEND_BUILD_KEY_MIME_HDR:
+               ret = fts_flatcurve_xapian_index_header(ctx, data, size, &error);
+               break;
+       case FTS_BACKEND_BUILD_KEY_BODY_PART:
+               ret = fts_flatcurve_xapian_index_body(ctx, data, size, &error);
+               break;
+       default:
+               i_unreached();
+       }
+
+       if (ret < 0)
+               e_error(ctx->backend->event, "%s", error);
+       return ret < 0 || _ctx->failed ? -1 : 0;
+}
+
+static const char *
+fts_backend_flatcurve_seq_range_string(ARRAY_TYPE(seq_range) *uids)
+{
+       string_t *dest = t_str_new(256);
+       imap_write_seq_range(dest, uids);
+       return str_c(dest);
+}
+
+static int
+fts_backend_flatcurve_rescan_box(struct flatcurve_fts_backend *backend,
+                                struct mailbox *box, pool_t pool,
+                                const char **error_r)
+{
+       bool dbexist = FALSE;
+       struct event_passthrough *e;
+       struct fts_flatcurve_xapian_query_iter *iter;
+       struct seq_range_iter iter2;
+       uint32_t low_uid = 0;
+       struct mail *mail;
+       ARRAY_TYPE(seq_range) expunged, missing, uids;
+       struct flatcurve_fts_query *query;
+       struct fts_flatcurve_xapian_query_result *result;
+       struct mail_search_args *search_args;
+       struct mail_search_context *search_ctx;
+       struct mailbox_transaction_context *trans;
+
+       /* Check for non-indexed mails. */
+       if (mailbox_sync(box, MAILBOX_SYNC_FLAG_FULL_READ) < 0)
+               return -1;
+
+       trans = mailbox_transaction_begin(box, 0, __func__);
+       search_args = mail_search_build_init();
+       mail_search_build_add_all(search_args);
+
+       p_array_init(&missing, pool, 32);
+       p_array_init(&uids, pool, 256);
+
+       int ret = 0;
+       search_ctx = mailbox_search_init(trans, search_args, NULL, 0, NULL);
+       while (mailbox_search_next(search_ctx, &mail)) {
+               seq_range_array_add(&uids, mail->uid);
+               ret = fts_flatcurve_xapian_uid_exists(backend, mail->uid, error_r);
+               if (ret < 0)
+                       break;
+               if (ret == 0)
+                       seq_range_array_add(&missing, mail->uid);
+               dbexist = TRUE;
+       }
+
+       if (mailbox_search_deinit(&search_ctx) < 0)
+               e_error(backend->event, "Could not deinit %s: %s",
+                       box->vname, mailbox_get_last_internal_error(box, NULL));
+
+       mail_search_args_unref(&search_args);
+       if (mailbox_transaction_commit(&trans) < 0)
+               e_error(backend->event, "Could not commit %s: %s",
+                       box->vname, mailbox_get_last_internal_error(box, NULL));
+
+       if (ret < 0 || !dbexist)
+               return ret;
+
+       e = event_create_passthrough(backend->event)->
+                                    set_name("fts_flatcurve_rescan")->
+                                    add_str("mailbox", box->name);
+
+       if (!array_is_empty(&missing)) {
+               /* There does not seem to be an easy way via FTS API (as of
+                * 2.3.15) to indicate what specific uids need to be indexed.
+                * Instead, delete all messages above the lowest, non-indexed
+                * UID and recreate the index the next time the mailbox
+                * is accessed. */
+               seq_range_array_iter_init(&iter2, &missing);
+               bool ret1 = seq_range_array_iter_nth(&iter2, 0, &low_uid);
+               i_assert(ret1);
+       }
+
+       query = p_new(pool, struct flatcurve_fts_query, 1);
+       query->backend = backend;
+       query->pool = pool;
+       fts_flatcurve_xapian_build_query_match_all(query);
+
+       p_array_init(&expunged, pool, 256);
+
+       iter = fts_flatcurve_xapian_query_iter_init(query);
+       while (fts_flatcurve_xapian_query_iter_next(iter, &result)) {
+               if ((low_uid > 0  && result->uid >= low_uid) ||
+                   (low_uid == 0 && !seq_range_exists(&uids, result->uid))) {
+                       if (fts_flatcurve_xapian_expunge(
+                               backend, result->uid, error_r) < 0)
+                               e_error(backend->event, "%s", *error_r);
+                       else
+                               seq_range_array_add(&expunged, result->uid);
+               }
+       }
+
+       ret = fts_flatcurve_xapian_query_iter_deinit(&iter, error_r);
+       fts_flatcurve_xapian_destroy_query(query);
+       if (ret < 0)
+               return -1;
+
+       if (array_is_empty(&expunged)) {
+               e_debug(e->add_str("status", "ok")->event(),
+                       "Rescan: no issues found");
+       } else T_BEGIN {
+               const char *u = fts_backend_flatcurve_seq_range_string(&expunged);
+               e->add_str("expunged", u);
+
+               if (low_uid > 0) {
+                       const char *u2 = fts_backend_flatcurve_seq_range_string(&missing);
+                       e_debug(e->add_str("status", "missing_msgs")->
+                               add_str("uids", u2)->event(),
+                               "Rescan: missing messages uids=%s expunged=%s",
+                               u2, u);
+               } else {
+                       e_debug(e->add_str("status", "expunge_msgs")->event(),
+                               "Rescan: expunge non-existent messages "
+                               "expunged=%s", u);
+               }
+       } T_END;
+       return ret;
+}
+
+static int
+fts_backend_flatcurve_iterate_ns(struct fts_backend *_backend,
+                                enum fts_backend_flatcurve_action act)
+{
+       const char *error;
+       struct flatcurve_fts_backend *backend =
+               (struct flatcurve_fts_backend *)_backend;
+       struct mailbox *box;
+       const struct mailbox_info *info;
+       struct mailbox_list_iterate_context *iter;
+       const enum mailbox_list_iter_flags iter_flags =
+               MAILBOX_LIST_ITER_NO_AUTO_BOXES |
+               MAILBOX_LIST_ITER_RETURN_NO_FLAGS;
+       enum mailbox_flags mbox_flags = 0;
+       pool_t pool = NULL;
+
+       bool failed = FALSE;
+       iter = mailbox_list_iter_init(_backend->ns->list, "*", iter_flags);
+       while ((info = mailbox_list_iter_next(iter)) != NULL) {
+               box = mailbox_alloc(backend->backend.ns->list, info->vname,
+                                   mbox_flags);
+
+               if (fts_backend_flatcurve_set_mailbox(
+                       backend, box, &error) < 0) {
+                       e_error(backend->event, "%s", error);
+                       failed = TRUE;
+                       continue;
+               }
+
+               switch (act) {
+               case FTS_BACKEND_FLATCURVE_ACTION_OPTIMIZE:
+                       if (fts_flatcurve_xapian_optimize_box(
+                               backend, &error) < 0) {
+                               e_error(backend->event, "%s", error);
+                               failed = TRUE;
+                       }
+                       break;
+               case FTS_BACKEND_FLATCURVE_ACTION_RESCAN:
+                       if (pool == NULL)
+                               pool = pool_alloconly_create(
+                                       FTS_FLATCURVE_LABEL " rescan pool",
+                                       4096);
+                       if (fts_backend_flatcurve_rescan_box(
+                               backend, box, pool, &error) < 0) {
+                               e_error(backend->event, "%s", error);
+                               failed = TRUE;
+                       }
+                       p_clear(pool);
+                       break;
+               }
+
+               mailbox_free(&box);
+       }
+       if (mailbox_list_iter_deinit(&iter) < 0)
+               e_error(backend->event, "%s",
+                       mailbox_list_get_last_internal_error(
+                               _backend->ns->list, NULL));
+
+       pool_unref(&pool);
+
+       return failed ? -1 : 0;
+}
+
+static int fts_backend_flatcurve_optimize(struct fts_backend *backend)
+{
+       return fts_backend_flatcurve_iterate_ns(backend,
+                       FTS_BACKEND_FLATCURVE_ACTION_OPTIMIZE);
+}
+
+static int fts_backend_flatcurve_rescan(struct fts_backend *backend)
+{
+       return fts_backend_flatcurve_iterate_ns(backend,
+                       FTS_BACKEND_FLATCURVE_ACTION_RESCAN);
+}
+
+static int
+fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
+                                  struct mailbox *const boxes[],
+                                  struct mail_search_arg *args,
+                                  enum fts_lookup_flags flags,
+                                  struct fts_multi_result *result)
+{
+       const char *error;
+       struct flatcurve_fts_backend *backend =
+               (struct flatcurve_fts_backend *)_backend;
+       ARRAY(struct fts_result) box_results;
+       struct flatcurve_fts_result *fresult;
+       unsigned int i;
+       struct flatcurve_fts_query *query;
+       struct fts_result *r;
+       int ret = 0;
+
+       /* Create query */
+       query = p_new(result->pool, struct flatcurve_fts_query, 1);
+       query->args = args;
+       query->backend = backend;
+       query->flags = flags;
+       query->pool = result->pool;
+       if (fts_flatcurve_xapian_build_query(query, &error) < 0) {
+               fts_flatcurve_xapian_destroy_query(query);
+               e_error(backend->event, "%s", error);
+               return -1;
+       }
+
+       p_array_init(&box_results, result->pool, 8);
+       for (i = 0; boxes[i] != NULL; i++) {
+               r = array_append_space(&box_results);
+               r->box = boxes[i];
+
+               fresult = p_new(result->pool, struct flatcurve_fts_result, 1);
+               p_array_init(&fresult->scores, result->pool, 32);
+               p_array_init(&fresult->uids, result->pool, 32);
+
+               if (fts_backend_flatcurve_set_mailbox(backend, r->box, &error) < 0) {
+                       ret = -1;
+                       break;
+               }
+
+               if (fts_flatcurve_xapian_run_query(query, fresult, &error) < 0) {
+                       ret = -1;
+                       break;
+               }
+
+               if ((query->maybe) ||
+                   ((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) != 0))
+                       r->maybe_uids = fresult->uids;
+               else
+                       r->definite_uids = fresult->uids;
+               r->scores = fresult->scores;
+
+               /* This was an empty query - skip output of debug info. */
+               if (query->qtext == NULL)
+                       continue;
+
+               T_BEGIN {
+                       const char *u = fts_backend_flatcurve_seq_range_string(&fresult->uids);
+                       e_debug(event_create_passthrough(backend->event)->
+                               set_name("fts_flatcurve_query")->
+                               add_int("count", array_count(&fresult->uids))->
+                               add_str("mailbox", r->box->vname)->
+                               add_str("maybe", query->maybe ? "yes" : "no")->
+                               add_str("query", str_c(query->qtext))->
+                               add_str("uids", u)->event(), "Query (%s) "
+                               "%smatches=%d uids=%s", str_c(query->qtext),
+                               query->maybe ? "maybe_" : "",
+                               array_count(&fresult->uids), u);
+               } T_END;
+       }
+
+       if (ret == 0) {
+               array_append_zero(&box_results);
+               result->box_results = array_idx_modifiable(&box_results, 0);
+       } else {
+               e_error(backend->event, "%s", error);
+       }
+
+       fts_flatcurve_xapian_destroy_query(query);
+       return ret;
+}
+
+static int
+fts_backend_flatcurve_lookup(struct fts_backend *_backend, struct mailbox *box,
+                            struct mail_search_arg *args,
+                            enum fts_lookup_flags flags,
+                            struct fts_result *result)
+{
+       struct mailbox *boxes[2];
+       struct fts_multi_result multi_result;
+       const struct fts_result *br;
+       int ret;
+
+       boxes[0] = box;
+       boxes[1] = NULL;
+
+       i_zero(&multi_result);
+       multi_result.pool = pool_alloconly_create(FTS_FLATCURVE_LABEL
+                                                 " results pool", 4096);
+       ret = fts_backend_flatcurve_lookup_multi(_backend, boxes, args,
+                                                flags, &multi_result);
+
+       if ((ret == 0) &&
+           (multi_result.box_results != NULL) &&
+           (multi_result.box_results[0].box != NULL)) {
+               br = &multi_result.box_results[0];
+               result->box = br->box;
+               if (array_is_created(&br->definite_uids))
+                       array_append_array(&result->definite_uids,
+                                          &br->definite_uids);
+               if (array_is_created(&br->maybe_uids))
+                       array_append_array(&result->maybe_uids,
+                                          &br->maybe_uids);
+               array_append_array(&result->scores, &br->scores);
+               result->scores_sorted = TRUE;
+       }
+       pool_unref(&multi_result.pool);
+
+       return ret;
+}
+
+/* Returns: 0 if FTS directory doesn't exist, 1 on deletion, -1 on error */
+int fts_backend_flatcurve_delete_dir(const char *path, const char **error_r)
+{
+       struct stat st;
+       enum unlink_directory_flags unlink_flags = UNLINK_DIRECTORY_FLAG_RMDIR;
+
+       if (stat(path, &st) < 0) {
+               if (errno == ENOENT)
+                       return 0;
+               else {
+                       *error_r = t_strdup_printf("Deleting fts data failed: "
+                               "stat(%s) failed: %m", path);
+                       return -1;
+               }
+       }
+
+       if (S_ISDIR(st.st_mode)) {
+               if (unlink_directory(path, unlink_flags, error_r) < 0) {
+                       *error_r = t_strdup_printf("Deleting fts data failed: "
+                               "unlink_directory(%s) failed: %s", path, *error_r);
+                       return -1;
+               }
+       } else if (unlink(path) < 0) {
+               *error_r = t_strdup_printf("Deleting fts data failed: "
+                       "unlink(%s) failed: %m", path);
+               return -1;
+       }
+
+       return 1;
+}
+
+
+struct fts_backend fts_backend_flatcurve = {
+       .name = "flatcurve",
+       .flags = FTS_BACKEND_FLAG_TOKENIZED_INPUT,
+       .v = {
+               .alloc = fts_backend_flatcurve_alloc,
+               .init = fts_backend_flatcurve_init,
+               .deinit = fts_backend_flatcurve_deinit,
+               .get_last_uid = fts_backend_flatcurve_get_last_uid,
+               .update_init = fts_backend_flatcurve_update_init,
+               .update_deinit = fts_backend_flatcurve_update_deinit,
+               .update_set_mailbox = fts_backend_flatcurve_update_set_mailbox,
+               .update_expunge = fts_backend_flatcurve_update_expunge,
+               .update_set_build_key = fts_backend_flatcurve_update_set_build_key,
+               .update_unset_build_key = fts_backend_flatcurve_update_unset_build_key,
+               .update_build_more = fts_backend_flatcurve_update_build_more,
+               .refresh = fts_backend_flatcurve_refresh,
+               .rescan = fts_backend_flatcurve_rescan,
+               .optimize = fts_backend_flatcurve_optimize,
+               .can_lookup = fts_backend_default_can_lookup,
+               .lookup = fts_backend_flatcurve_lookup,
+               .lookup_multi = fts_backend_flatcurve_lookup_multi,
+               .lookup_done = NULL,
+       }
+};
diff --git a/src/plugins/fts-flatcurve/fts-backend-flatcurve.h b/src/plugins/fts-flatcurve/fts-backend-flatcurve.h
new file mode 100644 (file)
index 0000000..deb9697
--- /dev/null
@@ -0,0 +1,69 @@
+/* Copyright (c) the Dovecot authors, based on code by Michael Slusarz.
+ * See the included COPYING file */
+
+#ifndef FTS_FLATCURVE_BACKEND_H
+#define FTS_FLATCURVE_BACKEND_H
+
+#include "file-lock.h"
+#include "fts-flatcurve-plugin.h"
+
+#define FTS_FLATCURVE_LABEL "fts-flatcurve"
+#define FTS_FLATCURVE_DEBUG_PREFIX FTS_FLATCURVE_LABEL ": "
+
+struct flatcurve_fts_backend {
+       struct fts_backend backend;
+       string_t *boxname, *db_path;
+
+       struct event *event;
+
+       struct fts_flatcurve_user *fuser;
+       struct flatcurve_xapian *xapian;
+
+       enum file_lock_method parsed_lock_method;
+
+       pool_t pool;
+};
+
+struct flatcurve_fts_backend_update_context {
+       struct fts_backend_update_context ctx;
+
+       struct flatcurve_fts_backend *backend;
+       enum fts_backend_build_key_type type;
+       string_t *hdr_name;
+       uint32_t uid;
+       struct timeval start;
+
+       bool indexed_hdr:1;
+       bool skip_uid:1;
+};
+
+struct flatcurve_fts_query {
+       struct mail_search_arg *args;
+       enum fts_lookup_flags flags;
+       string_t *qtext;
+
+       struct flatcurve_fts_backend *backend;
+       struct flatcurve_fts_query_xapian *xapian;
+
+       pool_t pool;
+
+       bool maybe:1;
+};
+
+struct flatcurve_fts_result {
+       ARRAY_TYPE(fts_score_map) scores;
+       ARRAY_TYPE(seq_range) uids;
+};
+
+
+int
+fts_backend_flatcurve_set_mailbox(struct flatcurve_fts_backend *backend,
+                                  struct mailbox *box, const char **error_r);
+int
+fts_backend_flatcurve_close_mailbox(struct flatcurve_fts_backend *backend,
+                                   const char **error_r);
+
+/* Returns: 0 if FTS directory doesn't exist, 1 on deletion, -1 on error */
+int fts_backend_flatcurve_delete_dir(const char *path, const char **error_r);
+
+#endif
diff --git a/src/plugins/fts-flatcurve/fts-flatcurve-plugin.c b/src/plugins/fts-flatcurve/fts-flatcurve-plugin.c
new file mode 100644 (file)
index 0000000..bf19eb9
--- /dev/null
@@ -0,0 +1,164 @@
+/* Copyright (c) the Dovecot authors, based on code by Michael Slusarz.
+ * See the included COPYING file */
+
+#include "lib.h"
+#include "mail-storage-hooks.h"
+#include "settings-parser.h"
+#include "fts-user.h"
+#include "fts-backend-flatcurve.h"
+#include "fts-backend-flatcurve-xapian.h"
+#include "fts-flatcurve-plugin.h"
+
+#define FTS_FLATCURVE_PLUGIN_COMMIT_LIMIT "fts_flatcurve_commit_limit"
+#define FTS_FLATCURVE_COMMIT_LIMIT_DEFAULT 500
+
+#define FTS_FLATCURVE_PLUGIN_MAX_TERM_SIZE "fts_flatcurve_max_term_size"
+#define FTS_FLATCURVE_MAX_TERM_SIZE_DEFAULT 30
+#define FTS_FLATCURVE_MAX_TERM_SIZE_MAX 200
+
+#define FTS_FLATCURVE_PLUGIN_MIN_TERM_SIZE "fts_flatcurve_min_term_size"
+#define FTS_FLATCURVE_MIN_TERM_SIZE_DEFAULT 2
+
+#define FTS_FLATCURVE_PLUGIN_OPTIMIZE_LIMIT "fts_flatcurve_optimize_limit"
+#define FTS_FLATCURVE_OPTIMIZE_LIMIT_DEFAULT 10
+
+#define FTS_FLATCURVE_PLUGIN_ROTATE_COUNT "fts_flatcurve_rotate_count"
+#define FTS_FLATCURVE_ROTATE_SIZE_DEFAULT 5000
+
+#define FTS_FLATCURVE_PLUGIN_ROTATE_TIME "fts_flatcurve_rotate_time"
+#define FTS_FLATCURVE_ROTATE_TIME_DEFAULT 5000
+
+#define FTS_FLATCURVE_PLUGIN_SUBSTRING_SEARCH "fts_flatcurve_substring_search"
+
+const char *fts_flatcurve_plugin_version = DOVECOT_ABI_VERSION;
+
+struct fts_flatcurve_user_module fts_flatcurve_user_module =
+       MODULE_CONTEXT_INIT(&mail_user_module_register);
+
+static void fts_flatcurve_mail_user_deinit(struct mail_user *user)
+{
+       struct fts_flatcurve_user *fuser =
+               FTS_FLATCURVE_USER_CONTEXT_REQUIRE(user);
+
+       fts_mail_user_deinit(user);
+       fuser->module_ctx.super.deinit(user);
+}
+
+static int
+fts_flatcurve_plugin_init_settings(struct mail_user *user,
+                                  struct fts_flatcurve_settings *set,
+                                  const char **error_r)
+{
+       const char *pset;
+       unsigned int val;
+
+       set->commit_limit = FTS_FLATCURVE_COMMIT_LIMIT_DEFAULT;
+       pset = mail_user_plugin_getenv(user, FTS_FLATCURVE_PLUGIN_COMMIT_LIMIT);
+       if (pset != NULL) {
+               if (str_to_uint(pset, &val) < 0) {
+                       *error_r = t_strdup_printf("Invalid %s: %s",
+                               FTS_FLATCURVE_PLUGIN_COMMIT_LIMIT, pset);
+                       return -1;
+               }
+               set->commit_limit = val;
+       }
+
+       set->max_term_size = FTS_FLATCURVE_MAX_TERM_SIZE_DEFAULT;
+       pset = mail_user_plugin_getenv(user, FTS_FLATCURVE_PLUGIN_MAX_TERM_SIZE);
+       if (pset != NULL) {
+               if (str_to_uint(pset, &val) < 0) {
+                       *error_r = t_strdup_printf("Invalid %s: %s",
+                               FTS_FLATCURVE_PLUGIN_MAX_TERM_SIZE, pset);
+                       return -1;
+               }
+               set->max_term_size = I_MIN(val, FTS_FLATCURVE_MAX_TERM_SIZE_MAX);
+       }
+
+       set->min_term_size = FTS_FLATCURVE_MIN_TERM_SIZE_DEFAULT;
+       pset = mail_user_plugin_getenv(user, FTS_FLATCURVE_PLUGIN_MIN_TERM_SIZE);
+       if (pset != NULL) {
+               if (str_to_uint(pset, &val) < 0) {
+                       *error_r = t_strdup_printf("Invalid %s: %s",
+                               FTS_FLATCURVE_PLUGIN_MIN_TERM_SIZE, pset);
+                       return -1;
+               }
+               set->min_term_size = val;
+       }
+
+       set->optimize_limit = FTS_FLATCURVE_OPTIMIZE_LIMIT_DEFAULT;
+       pset = mail_user_plugin_getenv(user, FTS_FLATCURVE_PLUGIN_OPTIMIZE_LIMIT);
+       if (pset != NULL) {
+               if (str_to_uint(pset, &val) < 0) {
+                       *error_r = t_strdup_printf("Invalid %s: %s",
+                               FTS_FLATCURVE_PLUGIN_OPTIMIZE_LIMIT, pset);
+                       return -1;
+               }
+               set->optimize_limit = val;
+       }
+
+       set->rotate_count = FTS_FLATCURVE_ROTATE_SIZE_DEFAULT;
+       pset = mail_user_plugin_getenv(user, FTS_FLATCURVE_PLUGIN_ROTATE_COUNT);
+       if (pset != NULL) {
+               if (str_to_uint(pset, &val) < 0) {
+                       *error_r = t_strdup_printf("Invalid %s: %s",
+                               FTS_FLATCURVE_PLUGIN_ROTATE_COUNT, pset);
+                       return -1;
+               }
+               set->rotate_count = val;
+       }
+
+       set->rotate_time = FTS_FLATCURVE_ROTATE_TIME_DEFAULT;
+       pset = mail_user_plugin_getenv(user, FTS_FLATCURVE_PLUGIN_ROTATE_TIME);
+       if (pset != NULL) {
+               const char *error;
+               if (settings_get_time_msecs(pset, &val, &error) < 0) {
+                       *error_r = t_strdup_printf("Invalid %s: %s",
+                               FTS_FLATCURVE_PLUGIN_ROTATE_TIME, error);
+                       return -1;
+               }
+               set->rotate_time = val;
+       }
+
+       set->substring_search = mail_user_plugin_getenv_bool(
+               user, FTS_FLATCURVE_PLUGIN_SUBSTRING_SEARCH);
+
+       return 0;
+}
+
+static void fts_flatcurve_mail_user_created(struct mail_user *user)
+{
+       struct mail_user_vfuncs *v = user->vlast;
+       struct fts_flatcurve_user *fuser;
+       const char *error;
+
+       fuser = p_new(user->pool, struct fts_flatcurve_user, 1);
+
+       if (fts_flatcurve_plugin_init_settings(user, &fuser->set, &error) < 0 ||
+           fts_mail_user_init(user, TRUE, &error) < 0) {
+               e_error(user->event, FTS_FLATCURVE_DEBUG_PREFIX "%s", error);
+               return;
+       }
+
+       fuser->module_ctx.super = *v;
+       user->vlast = &fuser->module_ctx.super;
+       v->deinit = fts_flatcurve_mail_user_deinit;
+       MODULE_CONTEXT_SET(user, fts_flatcurve_user_module, fuser);
+}
+
+static struct mail_storage_hooks fts_backend_mail_storage_hooks = {
+       .mail_user_created = fts_flatcurve_mail_user_created
+};
+
+void fts_flatcurve_plugin_init(struct module *module)
+{
+       fts_backend_register(&fts_backend_flatcurve);
+       mail_storage_hooks_add(module, &fts_backend_mail_storage_hooks);
+}
+
+void fts_flatcurve_plugin_deinit(void)
+{
+       fts_backend_unregister(fts_backend_flatcurve.name);
+       mail_storage_hooks_remove(&fts_backend_mail_storage_hooks);
+}
+
+const char *fts_flatcurve_plugin_dependencies[] = { "fts", NULL };
diff --git a/src/plugins/fts-flatcurve/fts-flatcurve-plugin.h b/src/plugins/fts-flatcurve/fts-flatcurve-plugin.h
new file mode 100644 (file)
index 0000000..42519be
--- /dev/null
@@ -0,0 +1,40 @@
+/* Copyright (c) the Dovecot authors, based on code by Michael Slusarz.
+ * See the included COPYING file */
+
+#ifndef FTS_FLATCURVE_PLUGIN_H
+#define FTS_FLATCURVE_PLUGIN_H
+
+#include "module-context.h"
+#include "mail-user.h"
+#include "lib.h"
+#include "fts-api-private.h"
+
+#define FTS_FLATCURVE_USER_CONTEXT(obj) \
+       MODULE_CONTEXT(obj, fts_flatcurve_user_module)
+#define FTS_FLATCURVE_USER_CONTEXT_REQUIRE(obj) \
+       MODULE_CONTEXT_REQUIRE(obj, fts_flatcurve_user_module)
+
+struct fts_flatcurve_settings {
+       unsigned int commit_limit;
+       unsigned int max_term_size;
+       unsigned int min_term_size;
+       unsigned int optimize_limit;
+       unsigned int rotate_count;
+       unsigned int rotate_time;
+       bool substring_search;
+};
+
+struct fts_flatcurve_user {
+       union mail_user_module_context module_ctx;
+       struct flatcurve_fts_backend *backend;
+       struct fts_flatcurve_settings set;
+};
+
+extern struct fts_backend fts_backend_flatcurve;
+extern MODULE_CONTEXT_DEFINE(fts_flatcurve_user_module, &mail_user_module_register);
+
+void fts_flatcurve_plugin_init(struct module *module);
+void fts_flatcurve_plugin_deinit(void);
+
+#endif
+