--- /dev/null
+/* Copyright (c) the Dovecot authors, based on code by Michael Slusarz.
+ * See the included COPYING file */
+
+extern "C" {
+#include "lib.h"
+#include "file-create-locked.h"
+#include "hash.h"
+#include "message-header-parser.h"
+#include "path-util.h"
+#include "mail-storage-private.h"
+#include "mail-search.h"
+#include "sleep.h"
+#include "str.h"
+#include "unichar.h"
+#include "time-util.h"
+#include "fts-backend-flatcurve.h"
+#include "fts-backend-flatcurve-xapian.h"
+#include <dirent.h>
+#include <stdio.h>
+};
+
+#pragma GCC diagnostic push
+# ifdef __clang__ // for building xapian's libs from gcc built debian package
+# pragma GCC diagnostic ignored "-W#warnings"
+# pragma GCC diagnostic ignored "-Wunused-command-line-argument"
+# endif
+# include <xapian.h>
+#pragma GCC diagnostic pop
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+
+/* How Xapian DBs work in fts-flatcurve: all data lives in under one
+ * per-mailbox directory (FTS_FLATCURVE_LABEL) stored at the root of the
+ * mailbox indexes directory.
+ *
+ * There are two different permanent data types within that library:
+ * - "index.###": The actual Xapian DB shards. Combined, this comprises the
+ * FTS data for the mailbox. These shards may be directly written to, but
+ * only when deleting messages - new messages are never stored directly to
+ * this DB. Additionally, these DBs are never directly queried; a dummy
+ * object is used to indirectly query them. These indexes may occasionally
+ * be combined into a single index via optimization processes.
+ * - "current.###": Xapian DB that contains the index shard where new messages
+ * are stored. Once this index reaches certain (configurable) limits, a new
+ * shard is created and rotated in as the new current index by creating
+ * a shard with a suffix higher than the previous current DB.
+ *
+ * Within a session, we create a dummy Xapian::Database object, scan the data
+ * directory for all indexes, and add each of them to the dummy object. For
+ * queries, we then just need to query the dummy object and Xapian handles
+ * everything for us. Writes need to be handled separately, as a
+ * WritableDatabase object only supports a single on-disk DB at a time; a DB
+ * shard, whether "index" or "current", must be directly written to in order
+ * to modify.
+ *
+ * Data storage: Xapian does not support substring searches by default, so
+ * (if substring searching is enabled) we instead need to explicitly store all
+ * substrings of the string, up to the point where the substring becomes
+ * smaller than min_term_size. Requires libicu in order to correctly handle
+ * multi-byte characters. */
+#define FLATCURVE_XAPIAN_DB_PREFIX "index."
+#define FLATCURVE_XAPIAN_DB_CURRENT_PREFIX "current."
+
+/* These are temporary data types that may appear in the fts directory. They
+ * are not intended to perservere between sessions. */
+#define FLATCURVE_XAPIAN_DB_OPTIMIZE "optimize"
+
+/* Xapian "recommendations" are that you begin your local prefix identifier
+ * with "X" for data that doesn't match with a data type listed as a Xapian
+ * "convention". However, this recommendation is for maintaining
+ * compatability with the search front-end (Omega) that they provide. We don't
+ * care about compatability, so save storage space by using single letter
+ * prefixes. Bodytext is stored without prefixes, as it is expected to be the
+ * single largest storage pool. */
+
+/* Caution: the code below expects these prefix to be 1-char long */
+#define FLATCURVE_XAPIAN_ALL_HEADERS_PREFIX "A"
+#define FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX "B"
+#define FLATCURVE_XAPIAN_HEADER_PREFIX "H"
+
+#define FLATCURVE_XAPIAN_ALL_HEADERS_QP "allhdrs"
+#define FLATCURVE_XAPIAN_HEADER_BOOL_QP "hdr_bool"
+#define FLATCURVE_XAPIAN_HEADER_QP "hdr_"
+#define FLATCURVE_XAPIAN_BODY_QP "body"
+
+/* Version database, so that any schema changes can be caught. */
+#define FLATCURVE_XAPIAN_DB_KEY_PREFIX "dovecot."
+#define FLATCURVE_XAPIAN_DB_VERSION_KEY \
+ FLATCURVE_XAPIAN_DB_KEY_PREFIX FTS_FLATCURVE_LABEL
+#define FLATCURVE_XAPIAN_DB_VERSION 1
+
+#define FLATCURVE_DBW_LOCK_RETRY_SECS 1
+#define FLATCURVE_DBW_LOCK_RETRY_MAX 60
+#define FLATCURVE_MANUAL_OPTIMIZE_COMMIT_LIMIT 500
+
+/* Lock: needed to ensure we don't run into race conditions when
+ * manipulating current directory. */
+#define FLATCURVE_XAPIAN_LOCK_FNAME "flatcurve-lock"
+#define FLATCURVE_XAPIAN_LOCK_TIMEOUT_SECS 5
+
+#define ENUM_EMPTY(x) ((enum x) 0)
+
+
+struct flatcurve_xapian_db_path {
+ const char *fname;
+ const char *path;
+};
+
+enum flatcurve_xapian_db_type {
+ FLATCURVE_XAPIAN_DB_TYPE_INDEX,
+ FLATCURVE_XAPIAN_DB_TYPE_CURRENT,
+ FLATCURVE_XAPIAN_DB_TYPE_OPTIMIZE,
+ FLATCURVE_XAPIAN_DB_TYPE_LOCK,
+ FLATCURVE_XAPIAN_DB_TYPE_UNKNOWN
+};
+
+struct flatcurve_xapian_db {
+ Xapian::Database *db;
+ Xapian::WritableDatabase *dbw;
+ struct flatcurve_xapian_db_path *dbpath;
+ unsigned int changes;
+ enum flatcurve_xapian_db_type type;
+};
+HASH_TABLE_DEFINE_TYPE(xapian_db, char *, struct flatcurve_xapian_db *);
+
+struct flatcurve_xapian {
+ /* Current database objects. */
+ struct flatcurve_xapian_db *dbw_current;
+ Xapian::Database *db_read;
+ HASH_TABLE_TYPE(xapian_db) dbs;
+ unsigned int shards;
+
+ /* Locking for current shard manipulation. */
+ struct file_lock *lock;
+ const char *lock_path;
+
+ /* Xapian pool: used for per mailbox DB info, so it can be easily
+ * cleared when switching mailboxes. Not for use with long
+ * lived data (e.g. optimize). */
+ pool_t pool;
+
+ /* Current document. */
+ Xapian::Document *doc;
+ uint32_t doc_uid;
+ unsigned int doc_updates;
+ bool doc_created:1;
+
+ /* List of mailboxes to optimize at shutdown. */
+ HASH_TABLE(char *, char *) optimize;
+
+ bool deinit:1;
+};
+
+ARRAY_DEFINE_TYPE(flatcurve_fts_query_arg, struct flatcurve_fts_query_arg);
+struct flatcurve_fts_query_arg {
+ string_t *value;
+
+ bool is_and:1;
+ bool is_not:1;
+};
+
+struct flatcurve_fts_query_xapian {
+ Xapian::Query *query;
+ Xapian::QueryParser *qp;
+ ARRAY_TYPE(flatcurve_fts_query_arg) args;
+};
+
+struct flatcurve_xapian_db_iter {
+ struct flatcurve_fts_backend *backend;
+ DIR *dirp;
+ char *error;
+
+ /* These are set every time next() is run. */
+ struct flatcurve_xapian_db_path *path;
+ enum flatcurve_xapian_db_type type;
+};
+
+enum flatcurve_xapian_db_opts {
+ FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT = BIT(0),
+ FLATCURVE_XAPIAN_DB_IGNORE_EMPTY = BIT(1),
+ FLATCURVE_XAPIAN_DB_NOCLOSE_CURRENT = BIT(2)
+};
+
+enum flatcurve_xapian_wdb {
+ FLATCURVE_XAPIAN_WDB_CREATE = BIT(0)
+};
+
+enum flatcurve_xapian_db_close {
+ FLATCURVE_XAPIAN_DB_CLOSE_WDB_COMMIT = BIT(0),
+ FLATCURVE_XAPIAN_DB_CLOSE_WDB = BIT(1),
+ FLATCURVE_XAPIAN_DB_CLOSE_DB = BIT(2),
+ FLATCURVE_XAPIAN_DB_CLOSE_ROTATE = BIT(3),
+ FLATCURVE_XAPIAN_DB_CLOSE_MBOX = BIT(4)
+};
+
+/* Externally accessible struct. */
+struct fts_flatcurve_xapian_query_iter {
+ struct flatcurve_fts_backend *backend;
+ struct flatcurve_fts_query *query;
+ struct fts_flatcurve_xapian_query_result *result;
+ char *error;
+ Xapian::Database *db;
+ Xapian::Enquire *enquire;
+ Xapian::MSetIterator mset_iter;
+};
+
+static int
+fts_flatcurve_xapian_check_db_version(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db *xdb,
+ const char **error_r);
+static int
+fts_flatcurve_xapian_close_db(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db *xdb,
+ enum flatcurve_xapian_db_close opts,
+ const char **error_r);
+static int
+fts_flatcurve_xapian_close_dbs(struct flatcurve_fts_backend *backend,
+ enum flatcurve_xapian_db_close opts,
+ const char **error_r);
+static int
+fts_flatcurve_xapian_db_populate(struct flatcurve_fts_backend *backend,
+ enum flatcurve_xapian_db_opts opts,
+ const char **error_r);
+
+void fts_flatcurve_xapian_init(struct flatcurve_fts_backend *backend)
+{
+ backend->xapian = p_new(backend->pool, struct flatcurve_xapian, 1);
+ backend->xapian->pool =
+ pool_alloconly_create(FTS_FLATCURVE_LABEL " xapian", 2048);
+ hash_table_create(&backend->xapian->dbs, backend->xapian->pool,
+ 4, str_hash, strcmp);
+}
+
+void fts_flatcurve_xapian_deinit(struct flatcurve_fts_backend *backend)
+{
+ struct flatcurve_xapian *x = backend->xapian;
+
+ x->deinit = TRUE;
+ if (hash_table_is_created(x->optimize)) {
+ struct hash_iterate_context *iter =
+ hash_table_iterate_init(x->optimize);
+
+ void *key, *val;
+ while (hash_table_iterate(iter, x->optimize, &key, &val)) {
+ str_append(backend->boxname, (const char *)key);
+ str_append(backend->db_path, (const char *)val);
+
+ const char *error;
+ if (fts_flatcurve_xapian_optimize_box(
+ backend, &error) < 0)
+ e_error(backend->event, "%s", error);
+ }
+
+ hash_table_iterate_deinit(&iter);
+ hash_table_destroy(&x->optimize);
+ }
+ hash_table_destroy(&x->dbs);
+ pool_unref(&x->pool);
+ x->deinit = FALSE;
+}
+
+static struct flatcurve_xapian_db_path *
+fts_flatcurve_xapian_create_db_path(struct flatcurve_fts_backend *backend,
+ const char *fname)
+{
+ struct flatcurve_xapian_db_path *dbpath;
+
+ dbpath = p_new(backend->xapian->pool,
+ struct flatcurve_xapian_db_path, 1);
+ dbpath->fname = p_strdup(backend->xapian->pool, fname);
+ dbpath->path = p_strdup_printf(backend->xapian->pool, "%s%s",
+ str_c(backend->db_path), fname);
+
+ return dbpath;
+}
+
+
+/* If dbpath = NULL, delete the entire flatcurve index
+ * Returns: 0 if FTS directory doesn't exist, 1 on deletion, -1 on error */
+static int
+fts_flatcurve_xapian_delete(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db_path *dbpath,
+ const char **error_r)
+{
+ const char *path = dbpath == NULL ?
+ str_c(backend->db_path) : dbpath->path;
+ return fts_backend_flatcurve_delete_dir(path, error_r);
+}
+
+static flatcurve_xapian_db_iter *
+fts_flatcurve_xapian_db_iter_init(struct flatcurve_fts_backend *backend,
+ enum flatcurve_xapian_db_opts opts)
+{
+ flatcurve_xapian_db_iter *iter =
+ p_new(backend->xapian->pool, struct flatcurve_xapian_db_iter, 1);
+ iter->backend = backend;
+ iter->dirp = opendir(str_c(backend->db_path));
+
+ if (iter->dirp == NULL &&
+ HAS_NO_BITS(opts, FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT)) {
+ iter->error = i_strdup_printf(
+ "Cannot open DB (RO); opendir(%s) failed: %m",
+ str_c(backend->db_path));
+ }
+ return iter;
+}
+
+static bool
+fts_flatcurve_xapian_db_iter_next(struct flatcurve_xapian_db_iter *iter)
+{
+ if (iter->error != NULL || iter->dirp == NULL)
+ return FALSE;
+
+ errno = 0;
+ struct dirent *dir = readdir(iter->dirp);
+ if (errno < 0) {
+ iter->error = i_strdup_printf(
+ "readdir(%s) failed: %m",
+ str_c(iter->backend->db_path));
+ return FALSE;
+ }
+
+ if (dir == NULL)
+ return FALSE;
+
+ if (strcmp(dir->d_name, ".") == 0 ||
+ strcmp(dir->d_name, "..") == 0)
+ return fts_flatcurve_xapian_db_iter_next(iter);
+
+ iter->type = FLATCURVE_XAPIAN_DB_TYPE_UNKNOWN;
+ iter->path = fts_flatcurve_xapian_create_db_path(
+ iter->backend, dir->d_name);
+
+ struct stat st;
+ if (stat(iter->path->path, &st) < 0) {
+ iter->error = i_strdup_printf(
+ "stat(%s) failed: %m",
+ str_c(iter->backend->db_path));
+ return FALSE;
+ }
+
+ if (str_begins_with(dir->d_name, FLATCURVE_XAPIAN_LOCK_FNAME)) {
+ iter->type = FLATCURVE_XAPIAN_DB_TYPE_LOCK;
+ return TRUE;
+ }
+
+ if (!S_ISDIR(st.st_mode))
+ return TRUE;
+
+ if (str_begins_with(dir->d_name, FLATCURVE_XAPIAN_DB_PREFIX))
+ iter->type = FLATCURVE_XAPIAN_DB_TYPE_INDEX;
+ else if (str_begins_with(dir->d_name, FLATCURVE_XAPIAN_DB_CURRENT_PREFIX))
+ iter->type = FLATCURVE_XAPIAN_DB_TYPE_CURRENT;
+ else if (strcmp(dir->d_name, FLATCURVE_XAPIAN_DB_OPTIMIZE) == 0)
+ iter->type = FLATCURVE_XAPIAN_DB_TYPE_OPTIMIZE;
+
+ return TRUE;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_db_iter_deinit(struct flatcurve_xapian_db_iter **_iter,
+ const char **error_r)
+{
+ struct flatcurve_xapian_db_iter *iter = *_iter;
+ *_iter = NULL;
+
+ if (iter->dirp != NULL && closedir(iter->dirp) < 0) {
+ if (iter->error == NULL)
+ iter->error = i_strdup_printf(
+ "closedir(%s) failed: %m",
+ str_c(iter->backend->db_path));
+ }
+
+ int ret = 0;
+ if (iter->error != NULL) {
+ *error_r = t_strdup(iter->error);
+ i_free(iter->error);
+ ret = -1;
+ }
+
+ p_free(iter->backend->xapian->pool, iter);
+ return ret;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_write_db_get_do(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db *xdb,
+ int db_flags, const char **error_r)
+{
+ if (xdb->dbw != NULL)
+ return 0;
+
+ for (unsigned int elapsed = 0;
+ elapsed <= FLATCURVE_DBW_LOCK_RETRY_MAX;
+ elapsed += FLATCURVE_DBW_LOCK_RETRY_SECS) {
+ try {
+ xdb->dbw = new Xapian::WritableDatabase(
+ xdb->dbpath->path, db_flags);
+ return 0;
+ } catch (Xapian::DatabaseLockError &e) {
+ e_debug(backend->event,
+ "Waiting for DB (RW, %s) lock",
+ xdb->dbpath->fname);
+ if (!i_sleep_intr_secs(FLATCURVE_DBW_LOCK_RETRY_SECS)) {
+ *error_r = t_strdup_printf(
+ "Cannot open DB (RW, %s): "
+ "sleep() interrupted by signal",
+ xdb->dbpath->fname);
+ return -1;
+ }
+ continue;
+ } catch (Xapian::Error &e) {
+ *error_r = t_strdup_printf(
+ "Cannot open DB (RW, %s): %s",
+ xdb->dbpath->fname,
+ e.get_description().c_str());
+ return -1;
+ }
+ }
+ *error_r = t_strdup_printf(
+ "DB (RW, %s) was locked for over %d seconds ",
+ xdb->dbpath->fname, FLATCURVE_DBW_LOCK_RETRY_MAX);
+ return -1;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_write_db_get(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db *xdb,
+ enum flatcurve_xapian_wdb wopts,
+ const char **error_r)
+{
+ if (xdb->dbw != NULL)
+ return 0;
+
+ int db_flags = (HAS_ALL_BITS(wopts, FLATCURVE_XAPIAN_WDB_CREATE)
+ ? Xapian::DB_CREATE_OR_OPEN : Xapian::DB_OPEN) |
+ Xapian::DB_NO_SYNC;
+
+ if (fts_flatcurve_xapian_write_db_get_do(
+ backend, xdb, db_flags, error_r) < 0)
+ return -1;
+
+ if (xdb->type == FLATCURVE_XAPIAN_DB_TYPE_CURRENT &&
+ fts_flatcurve_xapian_check_db_version(backend, xdb, error_r) < 0)
+ return -1;
+
+ e_debug(backend->event, "Opened DB (RW, %s) messages=%u version=%u",
+ xdb->dbpath->fname, xdb->dbw->get_doccount(),
+ FLATCURVE_XAPIAN_DB_VERSION);
+
+ return 0;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_rename_db(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db_path *path,
+ struct flatcurve_xapian_db_path **newpath_r,
+ const char **error_r)
+{
+ unsigned int i;
+ std::string new_fname;
+ struct flatcurve_xapian_db_path *newpath;
+ bool retry = FALSE;
+ std::ostringstream ss;
+
+ for (i = 0; i < 5; ++i) {
+ new_fname.clear();
+ new_fname = FLATCURVE_XAPIAN_DB_PREFIX;
+ ss << i_rand_limit(8192);
+ new_fname += ss.str();
+
+ newpath = fts_flatcurve_xapian_create_db_path(
+ backend, new_fname.c_str());
+
+ if (rename(path->path, newpath->path) < 0) {
+ if (retry ||
+ (errno != ENOTEMPTY) && (errno != EEXIST)) {
+ *error_r = t_strdup_printf(
+ "rename(%s, %s) failed: %m",
+ path->path, newpath->path);
+ p_free(backend->xapian->pool, newpath);
+ return -1;
+ }
+
+ /* Looks like a naming conflict; try once again with
+ * a different filename. ss will have additional
+ * randomness added to the original suffix, so it
+ * will almost certainly work the second time. */
+ retry = TRUE;
+ } else {
+ if (newpath_r != NULL) *newpath_r = newpath;
+ return 0;
+ }
+ }
+
+ /* If we still haven't found a valid filename, something is very
+ * wrong. Exit before we enter an infinite loop and consume all the
+ * memory. */
+ i_unreached();
+}
+
+static bool
+fts_flatcurve_xapian_need_optimize(struct flatcurve_fts_backend *backend)
+{
+ if (backend->fuser == NULL) return FALSE;
+ if (backend->fuser->set.optimize_limit == 0) return FALSE;
+ return backend->xapian->shards >= backend->fuser->set.optimize_limit;
+}
+
+static void
+fts_flatcurve_xapian_optimize_mailbox(struct flatcurve_fts_backend *backend)
+{
+ struct flatcurve_xapian *x = backend->xapian;
+
+ if (x->deinit || !fts_flatcurve_xapian_need_optimize(backend))
+ return;
+
+ if (!hash_table_is_created(x->optimize))
+ hash_table_create(&x->optimize, backend->pool, 0, str_hash,
+ strcmp);
+ if (hash_table_lookup(x->optimize, str_c(backend->boxname)) == NULL)
+ hash_table_insert(x->optimize,
+ p_strdup(backend->pool, str_c(backend->boxname)),
+ p_strdup(backend->pool, str_c(backend->db_path)));
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_db_add(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db_path *dbpath,
+ enum flatcurve_xapian_db_type type,
+ bool open_wdb,
+ struct flatcurve_xapian_db **xdb_r,
+ const char **error_r)
+{
+ struct flatcurve_xapian *x = backend->xapian;
+
+ if (type != FLATCURVE_XAPIAN_DB_TYPE_INDEX &&
+ type != FLATCURVE_XAPIAN_DB_TYPE_CURRENT) {
+ if (xdb_r != NULL) *xdb_r = NULL;
+ return 0;
+ }
+
+ struct flatcurve_xapian_db *o, *xdb;
+ xdb = p_new(x->pool, struct flatcurve_xapian_db, 1);
+ xdb->dbpath = dbpath;
+ xdb->type = type;
+
+ if (open_wdb && fts_flatcurve_xapian_write_db_get(
+ backend, xdb, FLATCURVE_XAPIAN_WDB_CREATE, error_r) < 0)
+ return -1;
+
+ hash_table_insert(x->dbs, dbpath->fname, xdb);
+
+ bool failed = FALSE;
+ /* If multiple current DBs exist, rename the oldest. */
+ if (type == FLATCURVE_XAPIAN_DB_TYPE_CURRENT &&
+ x->dbw_current != NULL) {
+ struct flatcurve_xapian_db *db =
+ strcmp(dbpath->fname,
+ x->dbw_current->dbpath->fname) > 0 ?
+ x->dbw_current : xdb;
+
+ struct flatcurve_xapian_db_path *newpath;
+ if (fts_flatcurve_xapian_rename_db(
+ backend, db->dbpath, &newpath, error_r) < 0)
+ failed = TRUE;
+ if (fts_flatcurve_xapian_close_db(
+ backend, db, FLATCURVE_XAPIAN_DB_CLOSE_WDB, error_r) < 0)
+ failed = TRUE;
+ hash_table_remove(x->dbs, db->dbpath->fname);
+ hash_table_insert(x->dbs, newpath->fname, db);
+
+ db->dbpath = newpath;
+ db->type = FLATCURVE_XAPIAN_DB_TYPE_INDEX;
+ }
+
+ if (xdb->type == FLATCURVE_XAPIAN_DB_TYPE_CURRENT)
+ x->dbw_current = xdb;
+
+ if (xdb_r != NULL) *xdb_r = xdb;
+ return failed ? -1 : 0;
+}
+
+/* Returns: lock fd >=0 on success, -1 on error */
+static int fts_flatcurve_xapian_lock(struct flatcurve_fts_backend *backend,
+ const char **error_r)
+{
+ struct flatcurve_xapian *x = backend->xapian;
+
+ if (x->lock_path == NULL)
+ x->lock_path = p_strdup_printf(
+ x->pool, "%s" FLATCURVE_XAPIAN_LOCK_FNAME,
+ str_c(backend->db_path));
+
+ struct file_create_settings set;
+ i_zero(&set);
+ set.lock_timeout_secs = FLATCURVE_XAPIAN_LOCK_TIMEOUT_SECS;
+ set.lock_settings.close_on_free = TRUE;
+ set.lock_settings.unlink_on_free = TRUE;
+ set.lock_settings.lock_method = backend->parsed_lock_method;
+
+ bool created;
+ return file_create_locked(x->lock_path, &set, &x->lock, &created, error_r);
+}
+
+static void fts_flatcurve_xapian_unlock(struct flatcurve_fts_backend *backend)
+{
+ file_lock_free(&backend->xapian->lock);
+}
+
+/* Returns: 0 if read DB is null, 1 if database has been addeds, -1 on error */
+static int
+fts_flatcurve_xapian_db_read_add(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db *xdb,
+ const char **error_r)
+{
+ struct flatcurve_xapian *x = backend->xapian;
+
+ if (x->db_read == NULL)
+ return 0;
+
+ try {
+ xdb->db = new Xapian::Database(xdb->dbpath->path);
+ } catch (Xapian::Error &e) {
+ *error_r = t_strdup_printf("Cannot open DB (RO; %s); %s",
+ xdb->dbpath->fname, e.get_description().c_str());
+ return -1;
+ }
+
+ if (fts_flatcurve_xapian_check_db_version(backend, xdb, error_r) < 0)
+ return -1;
+
+ ++x->shards;
+ x->db_read->add_database(*(xdb->db));
+
+ fts_flatcurve_xapian_optimize_mailbox(backend);
+
+ return 1;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_create_current(struct flatcurve_fts_backend *backend,
+ enum flatcurve_xapian_db_close copts,
+ const char **error_r)
+{
+ /* The current shard has filename of the format PREFIX.timestamp. This
+ * ensures that we will catch any current DB renaming done by another
+ * process (reopen() on the DB will fail, causing the entire DB to be
+ * closed/reopened). */
+
+ int ret;
+ struct flatcurve_xapian_db *xdb;
+ T_BEGIN {
+ const char *fname = t_strdup_printf(
+ FLATCURVE_XAPIAN_DB_CURRENT_PREFIX "%lu",
+ i_microseconds());
+ ret = fts_flatcurve_xapian_db_add(backend,
+ fts_flatcurve_xapian_create_db_path(backend, fname),
+ FLATCURVE_XAPIAN_DB_TYPE_CURRENT, TRUE, &xdb, error_r);
+ } T_END;
+
+ if (ret < 0)
+ return -1;
+
+ if (xdb == NULL) {
+ *error_r = "Could not add db";
+ return -1;
+ }
+
+ if (fts_flatcurve_xapian_db_read_add(backend, xdb, error_r) < 0)
+ return -1;
+
+ if (copts == 0)
+ return 0;
+
+ return fts_flatcurve_xapian_close_db(backend, xdb, copts, error_r);
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_db_populate(struct flatcurve_fts_backend *backend,
+ enum flatcurve_xapian_db_opts opts,
+ const char **error_r)
+{
+ struct flatcurve_xapian_db_iter *iter;
+ struct flatcurve_xapian *x = backend->xapian;
+
+ bool dbs_exist = hash_table_count(backend->xapian->dbs) > 0;
+ bool no_create = HAS_ALL_BITS(opts, FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT);
+
+ if (dbs_exist && (no_create || x->dbw_current != NULL))
+ return 0;
+
+ bool lock;
+ if (no_create) {
+ struct stat st;
+ if (stat(str_c(backend->db_path), &st) == 0)
+ lock = S_ISDIR(st.st_mode);
+ else if (errno == ENOENT)
+ lock = FALSE;
+ else {
+ e_error(backend->event, "stat(%s) failed: %m",
+ str_c(backend->db_path));
+ return -1;
+ }
+ } else {
+ if (mailbox_list_mkdir_root(
+ backend->backend.ns->list,
+ str_c(backend->db_path),
+ MAILBOX_LIST_PATH_TYPE_INDEX) < 0) {
+ e_error(backend->event, "Cannot create DB (RW); %s",
+ str_c(backend->db_path));
+ return -1;
+ }
+ lock = TRUE;
+ }
+
+ if (lock && fts_flatcurve_xapian_lock(backend, error_r) < 0)
+ return -1;
+
+ if (!dbs_exist) {
+ const char *error, *last_error = NULL;
+ iter = fts_flatcurve_xapian_db_iter_init(backend, opts);
+ while (fts_flatcurve_xapian_db_iter_next(iter)) {
+ if (fts_flatcurve_xapian_db_add(
+ backend, iter->path, iter->type,
+ FALSE, NULL, &last_error) < 0)
+ break;
+ }
+ if (fts_flatcurve_xapian_db_iter_deinit(&iter, &error) < 0) {
+ if (last_error != NULL)
+ e_error(backend->event, "%s", error);
+ else
+ last_error = error;
+ }
+ if (last_error != NULL) {
+ fts_flatcurve_xapian_unlock(backend);
+ *error_r = last_error;
+ return -1;
+ }
+ }
+
+ int ret = 0;
+ if (!no_create && x->dbw_current == NULL) {
+ enum flatcurve_xapian_db_close flags =
+ HAS_ALL_BITS(opts, FLATCURVE_XAPIAN_DB_NOCLOSE_CURRENT) ?
+ (enum flatcurve_xapian_db_close) 0 :
+ FLATCURVE_XAPIAN_DB_CLOSE_WDB;
+
+ ret = fts_flatcurve_xapian_create_current(
+ backend, flags, error_r);
+ }
+
+ fts_flatcurve_xapian_unlock(backend);
+ return ret;
+}
+
+/* Returns: 0 if dbw_current == NULL, 1 dbw_current != NULL, -1 on error */
+static int
+fts_flatcurve_xapian_write_db_current(struct flatcurve_fts_backend *backend,
+ enum flatcurve_xapian_db_opts opts,
+ struct flatcurve_xapian_db **dbw_current_r,
+ const char **error_r)
+{
+ static const enum flatcurve_xapian_wdb wopts =
+ ENUM_EMPTY(flatcurve_xapian_wdb);
+
+ struct flatcurve_xapian *x = backend->xapian;
+ struct flatcurve_xapian_db *xdb;
+
+ if (x->dbw_current != NULL && x->dbw_current->dbw != NULL) {
+ if (dbw_current_r != NULL) *dbw_current_r = x->dbw_current;
+ return 1;
+ }
+
+ opts = (enum flatcurve_xapian_db_opts)
+ (opts | FLATCURVE_XAPIAN_DB_NOCLOSE_CURRENT);
+ /* dbw_current can be NULL if FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT
+ * is set in opts. */
+ if (fts_flatcurve_xapian_db_populate(backend, opts, error_r) < 0)
+ return -1;
+
+ if (x->dbw_current == NULL)
+ return 0;
+
+ if (fts_flatcurve_xapian_write_db_get(
+ backend, x->dbw_current, wopts, error_r) < 0)
+ return -1;
+
+ if (dbw_current_r != NULL) *dbw_current_r = x->dbw_current;
+ return 1;
+}
+
+/* Returns: 0 if DBs table is empty, 1 otherwise, -1 on error */
+static int
+fts_flatcurve_xapian_read_db(struct flatcurve_fts_backend *backend,
+ enum flatcurve_xapian_db_opts opts,
+ Xapian::Database **db_read_r,
+ const char **error_r)
+{
+ struct hash_iterate_context *iter;
+ void *key, *val;
+ struct fts_flatcurve_xapian_db_stats stats;
+ struct flatcurve_xapian *x = backend->xapian;
+ struct flatcurve_xapian_db *xdb;
+
+ if (x->db_read != NULL) {
+ try {
+ (void)x->db_read->reopen();
+ if (db_read_r != NULL) *db_read_r = x->db_read;
+ return 1;
+ } catch (Xapian::DatabaseNotFoundError &e) {
+ /* This means that the underlying databases have
+ * changed (i.e. DB rotation by another process).
+ * Close all DBs and reopen. */
+ if (fts_flatcurve_xapian_close(backend, error_r) < 0)
+ return -1;
+ return fts_flatcurve_xapian_read_db(
+ backend, opts, db_read_r, error_r);
+ }
+ }
+
+ if (fts_flatcurve_xapian_db_populate(backend, opts, error_r) < 0)
+ return -1;
+
+ if (HAS_ALL_BITS(opts, FLATCURVE_XAPIAN_DB_IGNORE_EMPTY) &&
+ (hash_table_count(x->dbs) == 0))
+ return 0;
+
+ x->db_read = new Xapian::Database();
+
+ iter = hash_table_iterate_init(x->dbs);
+ while (hash_table_iterate(iter, x->dbs, &key, &val)) {
+ xdb = (struct flatcurve_xapian_db *)val;
+ if (fts_flatcurve_xapian_db_read_add(
+ backend, xdb, error_r) < 0)
+ e_error(backend->event, "%s", *error_r);
+ }
+ hash_table_iterate_deinit(&iter);
+
+ if (fts_flatcurve_xapian_mailbox_stats(backend, &stats, error_r) < 0)
+ return -1;
+
+ e_debug(backend->event, "Opened DB (RO) messages=%u version=%u "
+ "shards=%u", stats.messages, stats.version, stats.shards);
+
+ if (db_read_r != NULL) *db_read_r = x->db_read;
+ return 1;
+}
+
+/* Returns: 0 on success, -1 on error */
+int
+fts_flatcurve_xapian_mailbox_check(struct flatcurve_fts_backend *backend,
+ struct fts_flatcurve_xapian_db_check *check,
+ const char **error_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ (enum flatcurve_xapian_db_opts)
+ (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+ FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+ struct flatcurve_xapian *x = backend->xapian;
+
+ i_zero(check);
+
+ int ret = fts_flatcurve_xapian_read_db(backend, opts, NULL, error_r);
+ if (ret <= 0)
+ return ret;
+
+ bool failed = FALSE;
+ void *key, *val;
+ struct hash_iterate_context *iter = hash_table_iterate_init(x->dbs);
+ while (hash_table_iterate(iter, x->dbs, &key, &val)) {
+ try {
+ struct flatcurve_xapian_db *xdb =
+ (struct flatcurve_xapian_db *)val;
+ check->errors += Xapian::Database::check(
+ xdb->dbpath->path, Xapian::DBCHECK_FIX, NULL);
+ } catch (const Xapian::Error &e) {
+ const char *error = t_strdup_printf(
+ "Check failed; %s",
+ e.get_description().c_str());
+ if (!failed)
+ *error_r = error;
+ else
+ e_error(backend->event, "%s", error);
+ failed = TRUE;
+ }
+ ++check->shards;
+ }
+ hash_table_iterate_deinit(&iter);
+ return failed ? -1 : 0;
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_mailbox_rotate(struct flatcurve_fts_backend *backend,
+ const char **error_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ (enum flatcurve_xapian_db_opts)
+ (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+ FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+ struct flatcurve_xapian *x = backend->xapian;
+ struct flatcurve_xapian_db *xdb;
+
+ int ret = fts_flatcurve_xapian_write_db_current(
+ backend, opts, &xdb, error_r);
+ if (ret <= 0)
+ return ret;
+
+ return fts_flatcurve_xapian_close_db(backend, xdb,
+ FLATCURVE_XAPIAN_DB_CLOSE_ROTATE, error_r);
+}
+
+/* Returns: 0 if DBs table is empty, 1 otherwise, -1 on error */
+int
+fts_flatcurve_xapian_mailbox_stats(struct flatcurve_fts_backend *backend,
+ struct fts_flatcurve_xapian_db_stats *stats,
+ const char **error_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ (enum flatcurve_xapian_db_opts)
+ (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+ FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+ struct flatcurve_xapian *x = backend->xapian;
+
+ if (x->db_read == NULL) {
+ int ret = fts_flatcurve_xapian_read_db(backend, opts, NULL, error_r);
+ if (ret <= 0) {
+ i_zero(stats);
+ return ret;
+ }
+ }
+ stats->messages = x->db_read->get_doccount();
+ stats->shards = x->shards;
+ stats->version = FLATCURVE_XAPIAN_DB_VERSION;
+ return 1;
+}
+
+/* The input of the doveadm dump command can be any file or dir inside the
+ flatcurve index tree. Climb the path tree until finding a directory with
+ the expected name
+
+ Returns: 0 on success, -1 on failure */
+static int fts_flatcurve_database_find_dir(const char *path, const char **dir_r,
+ const char **error_r)
+{
+ /* These don't depend on inputs and are not going to change during
+ execution. No need to recalculate them each time either */
+ static const char *const wanted = FTS_FLATCURVE_LABEL;
+ static const size_t wanted_len = strlen(wanted);
+
+ /* Resolve symlinks, . and .. , and repeated path separators
+ what remains is a cleaned path, either relative or absolute
+ that is good to parse */
+ const char *normalized, *error;
+ if (t_realpath(path, &normalized, error_r) < 0)
+ return -1;
+
+ /* Scan into the path and match as many times as possible
+ What we are interested in is the most nested match,
+ i.e. the (last) rightmost one.
+
+ On each successive iteration we simply start from the
+ end of the preceding match, which in worst case exists
+ as the string nul termination.
+ */
+ const char *hit_start, *match_start = NULL;
+ for (const char *ptr = normalized;; ptr = hit_start + wanted_len) {
+ hit_start = strstr(ptr, wanted);
+ if (hit_start == NULL) break;
+
+ /* Safe as wanted_len and wanted are de facto compile
+ time constants */
+ const char *hit_end = hit_start + wanted_len;
+ if (*hit_end != '\0' && *hit_end != '/') continue;
+
+ /* The first condition protects from underruns */
+ if (hit_start > normalized && *(hit_start - 1) != '/') continue;
+
+ match_start = hit_start;
+ }
+ if (match_start == NULL) {
+ *error_r = "could not find a valid xapian database directory";
+ return -1;
+ };
+
+ /* Safe as wanted_len and wanted are de facto compile time constants
+ match_end derives from normalized by increments of wanted_len size */
+ const char *match_end = match_start + wanted_len;
+ size_t match_size = match_end - normalized;
+ const char *index_dir = t_strndup(normalized, match_size);
+
+ /* Caller expects a trailing slash to be in place */
+ *dir_r = t_strdup_printf("%s/", index_dir);
+ return 0;
+}
+
+/* The input of the doveadm dump command can be any file or dir inside the
+ flatcurve index tree. Climb the path tree until we find a directory with
+ the expected name.
+
+ Once a dir with the expected name has been located, check if there is at
+ least one subdir in it whose name starts either with the db-current-prefix
+ or with the db-prefix.
+
+ If none of those is found, consider this an invalid path and fail
+ Returns: 0 on success, -1 on failure */
+int fts_flatcurve_database_locate_dir(const char *arg_path,
+ const char **index_path_r,
+ const char **error_r)
+{
+ const char* path;
+
+ if (fts_flatcurve_database_find_dir(arg_path, &path, error_r) < 0)
+ return -1;
+
+ DIR *dir = opendir(path);
+ if (dir == NULL) {
+ *error_r = t_strdup_printf("opendir(%s) failed: %m", path);
+ return -1;
+ }
+
+ bool valid = FALSE;
+ do {
+ errno = 0;
+ struct dirent *entry = readdir(dir);
+ if (errno < 0) {
+ *error_r = t_strdup_printf("readdir(%s) failed: %m", path);
+ return -1;
+ }
+
+ if (entry == NULL)
+ break;
+
+ valid = (entry->d_type & DT_DIR) != 0 &&
+ (str_begins_with(entry->d_name, FLATCURVE_XAPIAN_DB_CURRENT_PREFIX) ||
+ str_begins_with(entry->d_name, FLATCURVE_XAPIAN_DB_PREFIX));
+ }
+ while (!valid);
+
+ if (closedir(dir) < 0) {
+ *error_r = t_strdup_printf("closedir(%s) failed: %m", path);
+ return -1;
+ }
+
+ if (!valid) {
+ *error_r = t_strdup_printf("No xapian databases in %s", path);
+ return -1;
+ }
+
+ *index_path_r = path;
+ return 0;
+}
+
+/* Returns: 0 if no DB, 1 if DB as accessed, -1 on error */
+static int
+fts_flatcurve_database_terms_fetch(bool headers,
+ struct flatcurve_fts_backend *backend,
+ HASH_TABLE_TYPE(term_counter) *terms,
+ const char **error_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ (enum flatcurve_xapian_db_opts)
+ (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+ FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+
+ Xapian::Database *db;
+ Xapian::TermIterator iter, end;
+
+ const char *prefix = headers ? FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX : "";
+
+ int ret = fts_flatcurve_xapian_read_db(backend, opts, &db, error_r);
+ if (ret <= 0)
+ return ret;
+
+ for (iter = db->allterms_begin(prefix), end = db->allterms_end(prefix);
+ iter != end; ++iter) {
+
+ const std::string &term = *iter;
+ const char *key = term.data();
+
+ if (headers) {
+ if (*key == *FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX)
+ ++key;
+ else
+ continue;
+ } else {
+ if (*key == *FLATCURVE_XAPIAN_ALL_HEADERS_PREFIX)
+ ++key;
+ else if (*key == *FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX ||
+ *key == *FLATCURVE_XAPIAN_HEADER_PREFIX)
+ continue;
+ }
+
+ void *k, *v;
+ const char *pkey;
+ unsigned int counter = iter.get_termfreq();
+ if (hash_table_lookup_full(*terms, key, &k, &v)) {
+ counter += POINTER_CAST_TO(v, unsigned int);
+ pkey = (const char *)k;
+ } else {
+ pkey = t_strdup(key);
+ }
+ hash_table_update(*terms, pkey, POINTER_CAST(counter));
+ }
+ return 1;
+}
+
+/* Returns: 0 if no DB, 1 if DB as accessed, -1 on error */
+int fts_flatcurve_database_terms(bool headers, const char *path,
+ HASH_TABLE_TYPE(term_counter) *terms,
+ const char **error_r)
+{
+ struct flatcurve_fts_backend backend;
+
+ i_zero(&backend);
+ backend.pool = pool_alloconly_create("doveadm-" FTS_FLATCURVE_LABEL, 1024);
+ backend.db_path = str_new_const(backend.pool, path, strlen(path));
+ backend.event = event_create(NULL);
+ fts_flatcurve_xapian_init(&backend);
+
+ int ret = fts_flatcurve_database_terms_fetch(
+ headers, &backend, terms, error_r);
+
+ fts_flatcurve_xapian_deinit(&backend);
+ event_unref(&backend.event);
+ pool_unref(&backend.pool);
+
+ return ret;
+}
+
+void fts_flatcurve_xapian_set_mailbox(struct flatcurve_fts_backend *backend)
+{
+ event_set_append_log_prefix(backend->event, p_strdup_printf(
+ backend->xapian->pool, FTS_FLATCURVE_LABEL "(%s): ",
+ str_c(backend->boxname)));
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_check_db_version(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db *xdb,
+ const char **error_r)
+{
+ static const enum flatcurve_xapian_wdb wopts =
+ ENUM_EMPTY(flatcurve_xapian_wdb);
+
+ Xapian::Database *db = (xdb->dbw == NULL) ? xdb->db : xdb->dbw;
+
+ std::string str = db->get_metadata(FLATCURVE_XAPIAN_DB_VERSION_KEY);
+ const char* value = str.c_str();
+ unsigned int ver = 0;
+ if (*value != '\0' && str_to_uint(value, &ver) < 0)
+ e_error(backend->event,
+ "unexpected Xapian db version '%s' in %s",
+ value, str_c(backend->db_path));
+
+ if (ver == FLATCURVE_XAPIAN_DB_VERSION)
+ return 0;
+
+ /* If we need to upgrade DB, and this is NOT the write DB, open the
+ * write DB, do the changes there, and reopen the read DB. */
+ if (xdb->dbw == NULL) {
+ if (fts_flatcurve_xapian_write_db_get(
+ backend, xdb, wopts, error_r) < 0)
+ return -1;
+ if (fts_flatcurve_xapian_close_db(
+ backend, xdb, FLATCURVE_XAPIAN_DB_CLOSE_WDB, error_r) < 0)
+ return -1;
+ (void)xdb->db->reopen();
+ return 0;
+ }
+
+ /* 0->1: Added DB version. Always implicity update version when we
+ * upgrade (done at end of this function). */
+ if (ver == 0) ++ver;
+ T_BEGIN {
+ xdb->dbw->set_metadata(FLATCURVE_XAPIAN_DB_VERSION_KEY,
+ dec2str(ver));
+ } T_END;
+
+ /* Commit the changes now. */
+ try {
+ xdb->dbw->commit();
+ return 0;
+ }
+ catch(Xapian::Error &e) {
+ e_error(backend->event,
+ "Xapian::Error on '%s': %s",
+ str_c(backend->db_path),
+ e.get_description().c_str());
+ return -1;
+ }
+}
+
+/* Requires read DB to have been opened
+ * Returns: 0 not found, 1 if found, -1 on error */
+static int
+fts_flatcurve_xapian_uid_exists_db(struct flatcurve_fts_backend *backend,
+ uint32_t uid,
+ struct flatcurve_xapian_db **xdb_r,
+ const char **error_r)
+{
+ void *key, *val;
+ int ret = 0;
+ struct hash_iterate_context *iter =
+ hash_table_iterate_init(backend->xapian->dbs);
+
+ while (hash_table_iterate(iter, backend->xapian->dbs, &key, &val)) {
+ try {
+ struct flatcurve_xapian_db *xdb =
+ (struct flatcurve_xapian_db *)val;
+ (void)xdb->db->get_document(uid);
+ if (xdb_r != NULL) *xdb_r = xdb;
+ ret = 1;
+ break;
+ }
+ catch (Xapian::DocNotFoundError &e) {
+ continue;
+ }
+ catch (Xapian::Error &e) {
+ *error_r = t_strdup(e.get_description().c_str());
+ ret = -1;
+ break;
+ }
+ }
+ hash_table_iterate_deinit(&iter);
+ return ret;
+}
+
+/* Returns: 0 if no DBs, 1 if DB exists, -1 on error */
+static int
+fts_flatcurve_xapian_write_db_by_uid(struct flatcurve_fts_backend *backend,
+ uint32_t uid,
+ struct flatcurve_xapian_db **xdb_r,
+ const char **error_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ ENUM_EMPTY(flatcurve_xapian_db_opts);
+ static const enum flatcurve_xapian_wdb wopts =
+ ENUM_EMPTY(flatcurve_xapian_wdb);
+
+ if (fts_flatcurve_xapian_read_db(backend, opts, NULL, error_r) < 0)
+ return -1;
+
+ struct flatcurve_xapian_db *xdb;
+ int ret = fts_flatcurve_xapian_uid_exists_db(backend, uid, &xdb, error_r);
+ if (ret <= 0)
+ return ret;
+
+ if (fts_flatcurve_xapian_write_db_get(
+ backend, xdb, wopts, error_r) < 0)
+ return -1;
+
+ *xdb_r = xdb;
+ return 1;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_check_commit_limit(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db *xdb,
+ const char **error_r)
+{
+ struct fts_flatcurve_user *fuser = backend->fuser;
+ struct flatcurve_xapian *x = backend->xapian;
+
+ ++x->doc_updates;
+ ++xdb->changes;
+
+ if (xdb->type == FLATCURVE_XAPIAN_DB_TYPE_CURRENT &&
+ fuser->set.rotate_count > 0 &&
+ xdb->dbw->get_doccount() >= fuser->set.rotate_count) {
+ return fts_flatcurve_xapian_close_db(
+ backend, xdb, FLATCURVE_XAPIAN_DB_CLOSE_ROTATE, error_r);
+ }
+
+ if (fuser->set.commit_limit > 0 &&
+ x->doc_updates >= fuser->set.commit_limit) {
+ e_debug(backend->event,
+ "Committing DB as update limit was reached; limit=%d",
+ fuser->set.commit_limit);
+ return fts_flatcurve_xapian_close_dbs(
+ backend, FLATCURVE_XAPIAN_DB_CLOSE_WDB_COMMIT, error_r);
+ }
+
+ return 0;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_clear_document(struct flatcurve_fts_backend *backend,
+ const char **error_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ ENUM_EMPTY(flatcurve_xapian_db_opts);
+
+ struct flatcurve_xapian *x = backend->xapian;
+
+ if (x->doc == NULL)
+ return 0;
+
+ struct flatcurve_xapian_db *xdb;
+ int ret = fts_flatcurve_xapian_write_db_current(
+ backend, opts, &xdb, error_r);
+ if (ret <= 0)
+ return ret;
+
+ ret = 0;
+ try {
+ xdb->dbw->replace_document(x->doc_uid, *x->doc);
+ } catch (std::bad_alloc &b) {
+ i_fatal_status(FATAL_OUTOFMEM,
+ "Out of memory when indexing mail (%s); UID=%d "
+ "(Hint: increase indexing process vsz_limit or "
+ "define smaller commit limit value in "
+ "plugin { fts_flatcurve_commit_limit = ...})",
+ b.what(), x->doc_uid);
+ } catch (Xapian::Error &e) {
+ *error_r = t_strdup_printf(
+ "Could not write message data: uid=%u; %s",
+ x->doc_uid,
+ e.get_description().c_str());
+ ret = -1;
+ }
+
+ if (x->doc_created)
+ delete(x->doc);
+ x->doc = NULL;
+ x->doc_created = FALSE;
+ x->doc_uid = 0;
+
+ if (ret < 0)
+ return -1;
+
+ return fts_flatcurve_xapian_check_commit_limit(backend, xdb, error_r);
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_close_db(struct flatcurve_fts_backend *backend,
+ struct flatcurve_xapian_db *xdb,
+ enum flatcurve_xapian_db_close opts,
+ const char **error_r)
+{
+ struct flatcurve_xapian *x = backend->xapian;
+
+ if (fts_flatcurve_xapian_clear_document(backend, error_r) < 0)
+ return -1;
+
+ struct timeval start;
+ i_gettimeofday(&start);
+
+ bool commit = FALSE;
+ if (xdb->dbw != NULL) {
+ if (HAS_ANY_BITS(opts, FLATCURVE_XAPIAN_DB_CLOSE_WDB |
+ FLATCURVE_XAPIAN_DB_CLOSE_MBOX)) {
+ int ret = 0;
+ try {
+ /* even if xapian documetation states that close
+ auto-commits, GlassWritableDatabase::close() can
+ fail to actually close the db if commit fails.
+ We explicitly commit before invoking close to
+ have a better chance to properly clean up */
+ xdb->dbw->commit();
+ }
+ catch (Xapian::Error &e) {
+ *error_r = t_strdup(e.get_description().c_str());
+ ret = -1;
+ }
+ xdb->dbw->close();
+ delete(xdb->dbw);
+ xdb->dbw = NULL;
+ commit = TRUE; // mark anyway as committed
+ if (ret < 0)
+ return -1;
+ } else if (HAS_ANY_BITS(opts, FLATCURVE_XAPIAN_DB_CLOSE_WDB_COMMIT |
+ FLATCURVE_XAPIAN_DB_CLOSE_ROTATE)) {
+ try {
+ xdb->dbw->commit();
+ commit = TRUE;
+ }
+ catch (Xapian::Error &e) {
+ *error_r = t_strdup(e.get_description().c_str());
+ return -1;
+ }
+ }
+ }
+
+ bool rotate = FALSE;
+ if (commit) {
+ struct timeval now;
+ i_gettimeofday(&now);
+ int elapsed = timeval_diff_msecs(&now, &start);
+ if (xdb->changes > 0)
+ e_debug(backend->event, "Committed %u changes to DB "
+ "(RW, %s) in %u.%03u secs", xdb->changes,
+ xdb->dbpath->fname, elapsed / 1000, elapsed % 1000);
+
+ xdb->changes = 0;
+ x->doc_updates = 0;
+
+ if (xdb->type == FLATCURVE_XAPIAN_DB_TYPE_CURRENT) {
+ if (HAS_ALL_BITS(opts, FLATCURVE_XAPIAN_DB_CLOSE_ROTATE) ||
+ (backend->fuser->set.rotate_time > 0 &&
+ elapsed > backend->fuser->set.rotate_time))
+ rotate = TRUE;
+ }
+ }
+
+
+ if (rotate) {
+ const char *error;
+ if (fts_flatcurve_xapian_lock(backend, &error) < 0)
+ e_error(backend->event, "%s", error);
+ else {
+ const char *fname = p_strdup(x->pool, xdb->dbpath->fname);
+ enum flatcurve_xapian_db_close flags =
+ (enum flatcurve_xapian_db_close)
+ (opts & FLATCURVE_XAPIAN_DB_CLOSE_MBOX);
+ if (fts_flatcurve_xapian_create_current(
+ backend, flags, &error) < 0)
+ e_error(backend->event, "Error rotating DB (%s)",
+ xdb->dbpath->fname);
+ else
+ e_debug(event_create_passthrough(backend->event)->
+ set_name("fts_flatcurve_rotate")->
+ add_str("mailbox", str_c(backend->boxname))->
+ event(),
+ "Rotating index (from: %s, to: %s)", fname,
+ xdb->dbpath->fname);
+
+ fts_flatcurve_xapian_unlock(backend);
+ }
+ }
+
+ if (xdb->db != NULL &&
+ HAS_ANY_BITS(opts, FLATCURVE_XAPIAN_DB_CLOSE_DB |
+ FLATCURVE_XAPIAN_DB_CLOSE_MBOX)) {
+ delete(xdb->db);
+ xdb->db = NULL;
+ }
+
+ return 0;
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_close_dbs(struct flatcurve_fts_backend *backend,
+ enum flatcurve_xapian_db_close opts,
+ const char **error_r)
+{
+ struct hash_iterate_context *iter;
+ void *key, *val;
+ struct flatcurve_xapian *x = backend->xapian;
+
+ const char *error, *last_error = NULL;
+ iter = hash_table_iterate_init(x->dbs);
+ while (hash_table_iterate(iter, x->dbs, &key, &val)) {
+ struct flatcurve_xapian_db *db =
+ (struct flatcurve_xapian_db *)val;
+ if (fts_flatcurve_xapian_close_db(backend, db, opts, &error) < 0) {
+ if (last_error != NULL)
+ e_error(backend->event, "%s", last_error);
+ last_error = error;
+ }
+ }
+ hash_table_iterate_deinit(&iter);
+ if (last_error != NULL) {
+ *error_r = last_error;
+ return -1;
+ }
+ return 0;
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_refresh(struct flatcurve_fts_backend *backend,
+ const char **error_r)
+{
+ return fts_flatcurve_xapian_close_dbs(
+ backend, FLATCURVE_XAPIAN_DB_CLOSE_WDB, error_r);
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_close(struct flatcurve_fts_backend *backend,
+ const char **error_r)
+{
+ struct flatcurve_xapian *x = backend->xapian;
+ int ret = fts_flatcurve_xapian_close_dbs(
+ backend, FLATCURVE_XAPIAN_DB_CLOSE_MBOX, error_r);
+
+ hash_table_clear(x->dbs, TRUE);
+
+ x->lock_path = NULL;
+ x->dbw_current = NULL;
+ x->shards = 0;
+
+ if (x->db_read != NULL) {
+ x->db_read->close();
+ delete(x->db_read);
+ x->db_read = NULL;
+ }
+
+ p_clear(x->pool);
+ return ret;
+}
+
+static uint32_t
+fts_flatcurve_xapian_get_last_uid_query(struct flatcurve_fts_backend *backend,
+ Xapian::Database *db)
+{
+ Xapian::Enquire enquire(*db);
+ Xapian::MSet m;
+
+ enquire.set_docid_order(Xapian::Enquire::DESCENDING);
+ enquire.set_query(Xapian::Query::MatchAll);
+
+ m = enquire.get_mset(0, 1);
+ return (m.empty())
+ ? 0 : m.begin().get_document().get_docid();
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_get_last_uid(struct flatcurve_fts_backend *backend,
+ uint32_t *last_uid_r, const char **error_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ (enum flatcurve_xapian_db_opts)
+ (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+ FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+
+ Xapian::Database *db;
+ int ret = fts_flatcurve_xapian_read_db(backend, opts, &db, error_r);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0) {
+ *last_uid_r = 0;
+ return 0;
+ }
+
+ try {
+ /* Optimization: if last used ID still exists in mailbox,
+ * this is a cheap call. */
+ *last_uid_r = db->get_document(db->get_lastdocid()).get_docid();
+ return 0;
+ } catch (Xapian::DocNotFoundError &e) {
+ /* Last used Xapian ID is no longer in the DB. Need
+ * to do a manual search for the last existing ID. */
+ *last_uid_r = fts_flatcurve_xapian_get_last_uid_query(backend, db);
+ return 0;
+ } catch (Xapian::InvalidArgumentError &e) {
+ *last_uid_r = 0;
+ return 0;
+ }
+}
+
+/* Returns: 0 not found, 1 if found, -1 on error */
+int fts_flatcurve_xapian_uid_exists(struct flatcurve_fts_backend *backend,
+ uint32_t uid, const char **error_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ (enum flatcurve_xapian_db_opts)
+ (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+ FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+
+ if (fts_flatcurve_xapian_read_db(backend, opts, NULL, error_r) <= 0)
+ return -1;
+ return fts_flatcurve_xapian_uid_exists_db(backend, uid, NULL, error_r);
+}
+
+/* Returns: 0 not found, 1 deleted, -1 on error */
+int fts_flatcurve_xapian_expunge(struct flatcurve_fts_backend *backend,
+ uint32_t uid, const char **error_r)
+{
+ struct flatcurve_xapian_db *xdb;
+
+ if (fts_flatcurve_xapian_write_db_by_uid(
+ backend, uid, &xdb, error_r) <= 0) {
+ e_debug(backend->event, "Expunge failed uid=%u; UID not found",
+ uid);
+ return 0;
+ }
+
+ try {
+ xdb->dbw->delete_document(uid);
+ if (fts_flatcurve_xapian_check_commit_limit(
+ backend, xdb, error_r) < 0)
+ return -1;
+ return 1;
+ } catch (Xapian::Error &e) {
+ *error_r = t_strdup_printf(
+ "Failed to expunge uid=%u: %s",
+ uid, e.get_description().c_str());
+ return -1;
+ }
+}
+
+/* Returns: TBD 0 not found, 1 deleted, -1 on error */
+int
+fts_flatcurve_xapian_init_msg(struct flatcurve_fts_backend_update_context *ctx,
+ const char **error_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ ENUM_EMPTY(flatcurve_xapian_db_opts);
+
+ struct flatcurve_xapian *x = ctx->backend->xapian;
+ struct flatcurve_xapian_db *xdb;
+
+ if (ctx->uid == x->doc_uid)
+ /* already indexed, nothing else to do */
+ return 1;
+
+ if (fts_flatcurve_xapian_clear_document(ctx->backend, error_r) < 0)
+ return -1;
+
+ int ret = fts_flatcurve_xapian_write_db_current(
+ ctx->backend, opts, &xdb, error_r);
+ if (ret <= 0)
+ /* error or x->dbw_current == NULL */
+ return ret;
+ try {
+ (void)xdb->dbw->get_document(ctx->uid);
+ /* document already existed */
+ return 0;
+ } catch (Xapian::DocNotFoundError &e) {
+ x->doc = new Xapian::Document();
+ x->doc_created = TRUE;
+ x->doc_uid = ctx->uid;
+ /* document did not exist */
+ return 1;
+ } catch (Xapian::Error &e) {
+ ctx->ctx.failed = TRUE;
+ *error_r = t_strdup(e.get_description().c_str());
+ return -1;
+ }
+}
+
+int
+fts_flatcurve_xapian_index_header(struct flatcurve_fts_backend_update_context *ctx,
+ const unsigned char *data, size_t size,
+ const char **error_r)
+{
+ struct fts_flatcurve_user *fuser = ctx->backend->fuser;
+ struct flatcurve_xapian *x = ctx->backend->xapian;
+
+ int ret = fts_flatcurve_xapian_init_msg(ctx, error_r);
+ if (ret <= 0)
+ return ret;
+
+ T_BEGIN {
+ char *hdr_name =
+ str_lcase(t_strdup_noconst(str_c(ctx->hdr_name)));
+
+ if (*hdr_name != '\0')
+ x->doc->add_boolean_term(t_strdup_printf(
+ FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX
+ "%s", hdr_name));
+
+ if (ctx->indexed_hdr)
+ hdr_name = str_ucase(hdr_name);
+
+ i_assert(uni_utf8_data_is_valid(data, size));
+ const unsigned char *end = data + size;
+ for(; end > data; data += uni_utf8_char_bytes((unsigned char) *data)) {
+ size_t len = end - data;
+ if (len == 0 || len < fuser->set.min_term_size)
+ break;
+
+ T_BEGIN {
+ x->doc->add_term(t_strdup_printf(
+ FLATCURVE_XAPIAN_ALL_HEADERS_PREFIX
+ "%s", data));
+ if (ctx->indexed_hdr)
+ x->doc->add_term(t_strdup_printf(
+ FLATCURVE_XAPIAN_HEADER_PREFIX
+ "%s%s", hdr_name,
+ (const char*) data));
+ } T_END;
+
+ if (!fuser->set.substring_search)
+ break;
+ }
+ } T_END;
+ return 1;
+}
+
+int
+fts_flatcurve_xapian_index_body(struct flatcurve_fts_backend_update_context *ctx,
+ const unsigned char *data, size_t size,
+ const char **error_r)
+{
+ struct fts_flatcurve_user *fuser = ctx->backend->fuser;
+ struct flatcurve_xapian *x = ctx->backend->xapian;
+
+ int ret = fts_flatcurve_xapian_init_msg(ctx, error_r);
+ if (ret <= 0)
+ return ret;
+
+ i_assert(uni_utf8_data_is_valid(data, size));
+ const unsigned char *end = data + size;
+ for(; end > data; data += uni_utf8_char_bytes((unsigned char) *data)) {
+ size_t len = end - data;
+ if (len == 0 || len < fuser->set.min_term_size)
+ break;
+
+ x->doc->add_term((const char*) data);
+ if (!fuser->set.substring_search)
+ break;
+ }
+ return 1;
+}
+
+/* Returns: 0 if index doesn't exist, 1 on deletion, -1 on error */
+int fts_flatcurve_xapian_delete_index(struct flatcurve_fts_backend *backend,
+ const char **error_r)
+{
+ const char *error;
+ int ret = fts_flatcurve_xapian_close(backend, error_r);
+ if (fts_flatcurve_xapian_delete(backend, NULL, &error) < 0) {
+ if (ret < 0)
+ e_error(backend->event, "%s", error);
+ else
+ *error_r = error;
+ ret = -1;
+ }
+ return ret;
+}
+
+#ifdef XAPIAN_HAS_COMPACT
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_optimize_rebuild(struct flatcurve_fts_backend *backend,
+ Xapian::Database *db,
+ struct flatcurve_xapian_db_path *path,
+ const char **error_r)
+{
+ struct flatcurve_xapian *x = backend->xapian;
+
+ /* Create the optimize shard. */
+ struct flatcurve_xapian_db *xdb =
+ p_new(x->pool, struct flatcurve_xapian_db, 1);
+ xdb->dbpath = path;
+ xdb->type = FLATCURVE_XAPIAN_DB_TYPE_OPTIMIZE;
+
+ if (fts_flatcurve_xapian_write_db_get(
+ backend, xdb, FLATCURVE_XAPIAN_WDB_CREATE, error_r) < 0)
+ return -1;
+
+ Xapian::Enquire enquire(*db);
+ enquire.set_docid_order(Xapian::Enquire::ASCENDING);
+ enquire.set_query(Xapian::Query::MatchAll);
+
+ Xapian::MSet mset = enquire.get_mset(0, db->get_doccount());
+ Xapian::MSetIterator iter = mset.begin();
+
+ unsigned int updates = 0;
+ for (iter = mset.begin(); iter != mset.end(); ++iter) {
+ Xapian::Document doc = iter.get_document();
+ try {
+ xdb->dbw->replace_document(doc.get_docid(), doc);
+ if (++updates > FLATCURVE_MANUAL_OPTIMIZE_COMMIT_LIMIT) {
+ xdb->dbw->commit();
+ updates = 0;
+ }
+ } catch (Xapian::Error &e) {
+ *error_r = t_strdup(e.get_description().c_str());
+ return -1;
+ }
+ }
+
+ return fts_flatcurve_xapian_close_db(
+ backend, xdb, FLATCURVE_XAPIAN_DB_CLOSE_WDB, error_r);
+}
+
+/* Returns: 0 on success, -1 on error */
+static int
+fts_flatcurve_xapian_optimize_box_do(struct flatcurve_fts_backend *backend,
+ Xapian::Database *db, const char **error_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ ENUM_EMPTY(flatcurve_xapian_db_opts);
+ static const enum flatcurve_xapian_wdb wopts =
+ ENUM_EMPTY(flatcurve_xapian_wdb);
+
+ struct flatcurve_xapian *x = backend->xapian;
+
+ /* We need to lock all of the mailboxes so nothing changes while we
+ * are optimizing. */
+
+ void *key, *val;
+ struct hash_iterate_context *hiter = hash_table_iterate_init(x->dbs);
+ while (hash_table_iterate(hiter, x->dbs, &key, &val)) {
+ struct flatcurve_xapian_db *db = (struct flatcurve_xapian_db *)val;
+ if (fts_flatcurve_xapian_write_db_get(
+ backend, db, wopts, error_r) < 0)
+ return -1;
+ }
+ hash_table_iterate_deinit(&hiter);
+
+ /* Create the optimize target. */
+ struct flatcurve_xapian_db_path *oldpath =
+ fts_flatcurve_xapian_create_db_path(
+ backend, FLATCURVE_XAPIAN_DB_OPTIMIZE);
+ if (fts_flatcurve_xapian_delete(backend, oldpath, error_r) < 0)
+ return -1;
+
+ struct timeval start;
+ i_gettimeofday(&start);
+
+ bool failed = FALSE;
+ try {
+ (void)db->reopen();
+ db->compact(oldpath->path, Xapian::DBCOMPACT_NO_RENUMBER |
+ Xapian::DBCOMPACT_MULTIPASS |
+ Xapian::Compactor::FULLER);
+ } catch (Xapian::InvalidOperationError &e) {
+ /* This exception is not as specific as it could be...
+ * but the likely reason it happens is due to
+ * Xapian::DBCOMPACT_NO_RENUMBER and shards having disjoint
+ * ranges of UIDs (e.g. shard 1 = 1..2, shard 2 = 2..3).
+ * Xapian, as of 1.4.18, cannot handle this situation.
+ * Since we will never be able to compact this data unless
+ * we do something about it, the options are either:
+ * 1) delete the index totally and start fresh (not great
+ * for large mailboxes), or
+ * 2) to incrementally build the optimized DB by walking
+ * through all DBs and copying, ignoring duplicate
+ * documents.
+ * Let's try to be awesome and do the latter. */
+ failed = fts_flatcurve_xapian_optimize_rebuild(
+ backend, db, oldpath, error_r) < 0;
+ if (!failed)
+ e_debug(backend->event, "Native optimize failed, "
+ "falling back to manual optimization; %s",
+ e.get_description().c_str());
+ } catch (Xapian::Error &e) {
+ *error_r = t_strdup(e.get_description().c_str());
+ failed = TRUE;
+ }
+ if (failed) {
+ e_error(backend->event, "Optimize failed: %s", *error_r);
+ return 0;
+ }
+
+ struct flatcurve_xapian_db_path *newpath =
+ p_new(x->pool, struct flatcurve_xapian_db_path, 1);
+ newpath->fname = p_strdup(x->pool, oldpath->fname);
+ newpath->path = p_strdup(x->pool, oldpath->path);
+
+ /* Delete old indexes. */
+ struct flatcurve_xapian_db_iter *iter =
+ fts_flatcurve_xapian_db_iter_init(backend, opts);
+
+ int ret = 0;
+ while (fts_flatcurve_xapian_db_iter_next(iter)) {
+ if (iter->type != FLATCURVE_XAPIAN_DB_TYPE_OPTIMIZE &&
+ iter->type != FLATCURVE_XAPIAN_DB_TYPE_LOCK) {
+ if (fts_flatcurve_xapian_delete(
+ backend, iter->path, error_r) < 0) {
+ ret = -1;
+ break;
+ }
+ }
+ }
+ const char *error;
+ if (fts_flatcurve_xapian_db_iter_deinit(&iter, &error) < 0) {
+ if (ret < 0)
+ e_error(backend->event, "%s", error);
+ else
+ *error_r = error;
+ ret = -1;
+ }
+ if (ret < 0)
+ return -1;
+
+ /* Rename optimize index to an active index. */
+ if (fts_flatcurve_xapian_rename_db(backend, newpath, NULL, error_r) < 0 ||
+ fts_flatcurve_xapian_delete(backend, oldpath, error_r) < 0)
+ return -1;
+
+ struct timeval now;
+ i_gettimeofday(&now);
+ int elapsed = timeval_diff_msecs(&now, &start);
+ e_debug(backend->event, "Optimized DB in %u.%03u secs",
+ elapsed / 1000, elapsed % 1000);
+
+ return 0;
+}
+#endif
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_optimize_box(struct flatcurve_fts_backend *backend,
+ const char **error_r)
+{
+#ifdef XAPIAN_HAS_COMPACT
+ static const enum flatcurve_xapian_db_opts opts =
+ (enum flatcurve_xapian_db_opts)
+ (FLATCURVE_XAPIAN_DB_NOCREATE_CURRENT |
+ FLATCURVE_XAPIAN_DB_IGNORE_EMPTY);
+
+ Xapian::Database *db;
+ int ret;
+ if ((ret = fts_flatcurve_xapian_read_db(
+ backend, opts, &db, error_r)) <= 0)
+ return ret;
+
+ if (backend->xapian->deinit &&
+ !fts_flatcurve_xapian_need_optimize(backend)) {
+ return fts_flatcurve_xapian_close(backend, error_r);
+ }
+
+ e_debug(event_create_passthrough(backend->event)->
+ set_name("fts_flatcurve_optimize")->
+ add_str("mailbox", str_c(backend->boxname))->event(),
+ "Optimizing");
+
+ ret = 0;
+ if (fts_flatcurve_xapian_lock(backend, error_r) < 0 ||
+ fts_flatcurve_xapian_optimize_box_do(backend, db, error_r) < 0)
+ ret = -1;
+
+ const char *error;
+ if (fts_flatcurve_xapian_close(backend, &error) < 0) {
+ if (ret < 0)
+ e_error(backend->event, "%s", error);
+ else
+ *error_r = error;
+ ret = -1;
+ }
+ fts_flatcurve_xapian_unlock(backend);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+static void
+fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
+ struct mail_search_arg *arg,
+ const char *term)
+{
+ struct flatcurve_fts_query_xapian *x = query->xapian;
+
+ struct flatcurve_fts_query_arg *qarg = array_append_space(&x->args);
+ qarg->value = str_new(query->pool, 64);
+
+ /* Absence of NOT or AND flags means an OR search. */
+ if (arg->match_not)
+ qarg->is_not = TRUE;
+ if ((query->flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0)
+ qarg->is_and = TRUE;
+
+ switch (arg->type) {
+ case SEARCH_TEXT:
+ x->qp->add_prefix(FLATCURVE_XAPIAN_ALL_HEADERS_QP,
+ FLATCURVE_XAPIAN_ALL_HEADERS_PREFIX);
+ str_printfa(qarg->value, "(%s:%s OR %s:%s)",
+ FLATCURVE_XAPIAN_ALL_HEADERS_QP, term,
+ FLATCURVE_XAPIAN_BODY_QP, term);
+ break;
+
+ case SEARCH_BODY:
+ str_printfa(qarg->value, "%s:%s",
+ FLATCURVE_XAPIAN_BODY_QP, term);
+ break;
+
+ case SEARCH_HEADER:
+ case SEARCH_HEADER_ADDRESS:
+ case SEARCH_HEADER_COMPRESS_LWSP:
+ if (*term != '\0') {
+ if (fts_header_want_indexed(arg->hdr_field_name)) {
+ string_t *hdr = str_new(query->pool, 32);
+ str_printfa(hdr, "%s%s",
+ FLATCURVE_XAPIAN_HEADER_QP,
+ t_str_lcase(arg->hdr_field_name));
+ string_t *hdr2 = str_new(query->pool, 32);
+ str_printfa(hdr2, "%s%s",
+ FLATCURVE_XAPIAN_HEADER_PREFIX,
+ t_str_ucase(arg->hdr_field_name));
+ x->qp->add_prefix(str_c(hdr), str_c(hdr2));
+ str_printfa(qarg->value, "%s:%s", str_c(hdr),
+ term);
+ } else {
+ x->qp->add_prefix(
+ FLATCURVE_XAPIAN_ALL_HEADERS_QP,
+ FLATCURVE_XAPIAN_ALL_HEADERS_PREFIX);
+ str_printfa(qarg->value, "%s:%s",
+ FLATCURVE_XAPIAN_ALL_HEADERS_QP,
+ term);
+ /* Non-indexed headers only match if it
+ * appears in the general pool of header
+ * terms for the message, not to a specific
+ * header, so this is only a maybe match. */
+ query->maybe = TRUE;
+ }
+ } else {
+ x->qp->add_boolean_prefix(
+ FLATCURVE_XAPIAN_HEADER_BOOL_QP,
+ FLATCURVE_XAPIAN_BOOLEAN_FIELD_PREFIX);
+ str_printfa(qarg->value, "%s:%s",
+ FLATCURVE_XAPIAN_HEADER_BOOL_QP,
+ t_str_lcase(arg->hdr_field_name));
+ }
+ break;
+ default:
+ i_unreached();
+ }
+}
+
+static void
+fts_flatcurve_build_query_arg(struct flatcurve_fts_query *query,
+ struct mail_search_arg *arg)
+{
+ struct flatcurve_fts_query_xapian *x = query->xapian;
+
+ if (arg->no_fts)
+ return;
+
+ switch (arg->type) {
+ case SEARCH_TEXT:
+ case SEARCH_BODY:
+ case SEARCH_HEADER:
+ case SEARCH_HEADER_ADDRESS:
+ case SEARCH_HEADER_COMPRESS_LWSP:
+ /* Valid search term. Set match_always, as required by FTS
+ * API, to avoid this argument being looked up later via
+ * regular search code. */
+ arg->match_always = TRUE;
+ break;
+
+ case SEARCH_MAILBOX:
+ /* doveadm will pass this through in 'doveadm search'
+ * commands with a 'mailbox' search argument. The code has
+ * already handled setting the proper mailbox by this point
+ * so just ignore this. */
+ return;
+
+ case SEARCH_OR:
+ case SEARCH_SUB:
+ /* FTS API says to ignore these. */
+ return;
+
+ default:
+ /* We should never get here - this is a search argument that
+ * we don't understand how to handle that has leaked to this
+ * point. For performance reasons, we will ignore this
+ * argument and err on the side of returning too many
+ * results (rather than falling back to slow, manual
+ * search). */
+ return;
+ }
+
+ if (*arg->value.str == '\0') {
+ /* This is an existence search. */
+ fts_flatcurve_build_query_arg_term(query, arg, "");
+ return;
+ }
+
+ /* Prepare search term. Phrase searching is not supported
+ * natively (FTS core provides index terms without positional
+ * context) so we can only do single term searching with
+ * Xapian. Therefore, if we do see a multi-term search, break
+ * it apart and do a maybe query. */
+ const char *const *parts = t_strsplit_spaces(arg->value.str, " ");
+ unsigned int count = str_array_length(parts);
+ if (count > 1)
+ query->maybe = TRUE;
+
+ for (unsigned int index = 0; index < count; index++, parts++) {
+ /* For phrase searches, we only add wildcard to the
+ * last term. */
+ const char *term = (index + 1) == count ?
+ t_strconcat(*parts, "*", NULL) : *parts;
+
+ fts_flatcurve_build_query_arg_term(query, arg, term);
+
+ /* We need to AND search all phrase terms. */
+ if (count > 1) {
+ struct flatcurve_fts_query_arg *qarg =
+ array_back_modifiable(&x->args);
+ qarg->is_and = TRUE;
+ }
+ }
+}
+
+static void
+fts_flatcurve_xapian_build_query_deinit(struct flatcurve_fts_query *query)
+{
+ array_free(&query->xapian->args);
+ delete(query->xapian->qp);
+}
+
+void
+fts_flatcurve_xapian_build_query_match_all(struct flatcurve_fts_query *query)
+{
+ struct flatcurve_fts_query_xapian *x = query->xapian =
+ p_new(query->pool, struct flatcurve_fts_query_xapian, 1);
+ query->qtext = str_new_const(query->pool, "[Match All]", 11);
+ x->query = new Xapian::Query(Xapian::Query::MatchAll);
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_build_query(struct flatcurve_fts_query *query,
+ const char **error_r)
+{
+ struct flatcurve_fts_query_xapian *x = query->xapian =
+ p_new(query->pool, struct flatcurve_fts_query_xapian, 1);
+ p_array_init(&x->args, query->pool, 4);
+
+ x->qp = new Xapian::QueryParser();
+ x->qp->add_prefix(FLATCURVE_XAPIAN_BODY_QP, "");
+ x->qp->set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
+
+ struct mail_search_arg *args;
+ for (args = query->args; args != NULL ; args = args->next)
+ fts_flatcurve_build_query_arg(query, args);
+
+ /* Empty Query. Optimize by not creating a query and returning no
+ * results when we go through the iteration later. */
+ if (array_is_empty(&x->args)) {
+ fts_flatcurve_xapian_build_query_deinit(query);
+ return 0;
+ }
+
+ std::string str;
+ const struct flatcurve_fts_query_arg *arg, *prev;
+ /* Generate the query. */
+ prev = NULL;
+ array_foreach(&x->args, arg) {
+ if (arg->is_not) {
+ if (prev != NULL)
+ str += " ";
+ str += "NOT ";
+ }
+ if (arg->is_not || (prev == NULL)) {
+ str += str_c(arg->value);
+ } else if (!str_equals(arg->value, prev->value)) {
+ if (arg->is_and)
+ str += " AND ";
+ else
+ str += " OR ";
+ str += str_c(arg->value);
+ }
+ prev = arg;
+ }
+
+ query->qtext = str_new(query->pool, 64);
+ str_append(query->qtext, str.c_str());
+
+ int ret = 0;
+ try {
+ x->query = new Xapian::Query(x->qp->parse_query(
+ str,
+ Xapian::QueryParser::FLAG_BOOLEAN |
+ Xapian::QueryParser::FLAG_PHRASE |
+ Xapian::QueryParser::FLAG_PURE_NOT |
+ Xapian::QueryParser::FLAG_WILDCARD
+ ));
+ } catch (Xapian::QueryParserError &e) {
+ *error_r = t_strdup_printf(
+ "Parsing query failed (query: %s); %s",
+ str.c_str(), e.get_description().c_str());
+ ret = -1;
+ }
+
+ fts_flatcurve_xapian_build_query_deinit(query);
+ return ret;
+}
+
+struct fts_flatcurve_xapian_query_iter *
+fts_flatcurve_xapian_query_iter_init(struct flatcurve_fts_query *query)
+{
+ struct fts_flatcurve_xapian_query_iter *iter;
+ iter = new fts_flatcurve_xapian_query_iter();
+ iter->query = query;
+ iter->result = p_new(query->pool,
+ struct fts_flatcurve_xapian_query_result, 1);
+ return iter;
+}
+
+bool
+fts_flatcurve_xapian_query_iter_next(struct fts_flatcurve_xapian_query_iter *iter,
+ struct fts_flatcurve_xapian_query_result **result_r)
+{
+ static const enum flatcurve_xapian_db_opts opts =
+ ENUM_EMPTY(flatcurve_xapian_db_opts);
+
+ if (iter->error != NULL)
+ return FALSE;
+
+ Xapian::MSet m;
+ if (iter->enquire == NULL) {
+ if (iter->query->xapian->query == NULL)
+ return FALSE;
+
+ const char *error;
+ int ret = fts_flatcurve_xapian_read_db(
+ iter->query->backend, opts, &iter->db, &error);
+ if (ret < 0)
+ iter->error = i_strdup(error);
+ if (ret <= 0)
+ return FALSE;
+
+ iter->enquire = new Xapian::Enquire(*iter->db);
+ iter->enquire->set_docid_order(
+ Xapian::Enquire::DONT_CARE);
+ iter->enquire->set_query(*iter->query->xapian->query);
+
+ try {
+ m = iter->enquire->get_mset(0, iter->db->get_doccount());
+ } catch (Xapian::DatabaseModifiedError &e) {
+ /* Per documentation, this is only thrown if more than
+ * one change has been made to the database. To
+ * resolve you need to reopen the DB (Xapian can
+ * handle a single snapshot of a modified DB natively,
+ * so this only occurs if there have been multiple
+ * writes). However, we ALWAYS want to use the
+ * most up-to-date version, so we have already
+ * explicitly called reopen() above. Thus, we should
+ * never see this exception. */
+ i_unreached();
+ }
+
+ iter->mset_iter = m.begin();
+ }
+
+ if (iter->mset_iter == m.end())
+ return FALSE;
+
+ iter->result->score = iter->mset_iter.get_weight();
+ /* MSet docid can be an "interleaved" docid generated by
+ * Xapian::Database when handling multiple DBs at once. Instead, we
+ * want the "unique docid", which is obtained by looking at the
+ * doc id from the Document object itself. */
+ iter->result->uid = iter->mset_iter.get_document().get_docid();
+ ++iter->mset_iter;
+
+ *result_r = iter->result;
+ return TRUE;
+}
+
+/* Returns: 0 on success, -1 on error */
+int
+fts_flatcurve_xapian_query_iter_deinit(struct fts_flatcurve_xapian_query_iter **_iter,
+ const char **error_r)
+{
+ struct fts_flatcurve_xapian_query_iter *iter = *_iter;
+ *_iter = NULL;
+
+ p_free(iter->query->pool, iter->result);
+ if (iter->enquire != NULL)
+ delete(iter->enquire);
+
+ int ret = 0;
+ if (iter->error != NULL) {
+ *error_r = t_strdup(iter->error);
+ i_free(iter->error);
+ ret = -1;
+ }
+ delete(iter);
+ return ret;
+}
+
+/* Returns: 0 on success, -1 on error */
+int fts_flatcurve_xapian_run_query(struct flatcurve_fts_query *query,
+ struct flatcurve_fts_result *r,
+ const char **error_r)
+{
+ struct fts_flatcurve_xapian_query_iter *iter;
+ struct fts_flatcurve_xapian_query_result *result;
+ struct fts_score_map *score;
+
+ iter = fts_flatcurve_xapian_query_iter_init(query);
+ while (fts_flatcurve_xapian_query_iter_next(iter, &result)) {
+ seq_range_array_add(&r->uids, result->uid);
+ score = array_append_space(&r->scores);
+ score->score = (float)result->score;
+ score->uid = result->uid;
+ }
+ return fts_flatcurve_xapian_query_iter_deinit(&iter, error_r);
+}
+
+void fts_flatcurve_xapian_destroy_query(struct flatcurve_fts_query *query)
+{
+ delete(query->xapian->query);
+}
+
+const char *fts_flatcurve_xapian_library_version()
+{
+ return Xapian::version_string();
+}
--- /dev/null
+/* Copyright (c) the Dovecot authors, based on code by Michael Slusarz.
+ * See the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "imap-util.h"
+#include "mail-storage-private.h"
+#include "mail-search-build.h"
+#include "mailbox-list-iter.h"
+#include "str.h"
+#include "time-util.h"
+#include "unlink-directory.h"
+#include "fts-backend-flatcurve.h"
+#include "fts-backend-flatcurve-xapian.h"
+
+enum fts_backend_flatcurve_action {
+ FTS_BACKEND_FLATCURVE_ACTION_OPTIMIZE,
+ FTS_BACKEND_FLATCURVE_ACTION_RESCAN
+};
+
+struct event_category event_category_fts_flatcurve = {
+ .name = FTS_FLATCURVE_LABEL,
+ .parent = &event_category_fts
+};
+
+static struct fts_backend *fts_backend_flatcurve_alloc(void)
+{
+ struct flatcurve_fts_backend *backend;
+ pool_t pool;
+
+ pool = pool_alloconly_create(FTS_FLATCURVE_LABEL " pool", 4096);
+
+ backend = i_new(struct flatcurve_fts_backend, 1);
+ backend->backend = fts_backend_flatcurve;
+ backend->pool = pool;
+
+ return &backend->backend;
+}
+
+static int
+fts_backend_flatcurve_init(struct fts_backend *_backend, const char **error_r)
+{
+ struct flatcurve_fts_backend *backend =
+ container_of(_backend, struct flatcurve_fts_backend, backend);
+ struct fts_flatcurve_user *fuser =
+ FTS_FLATCURVE_USER_CONTEXT(_backend->ns->user);
+
+ if (fuser == NULL) {
+ *error_r = "Invalid fts-flatcurve settings";
+ return -1;
+ }
+
+ backend->boxname = str_new(backend->pool, 128);
+ backend->db_path = str_new(backend->pool, 256);
+ backend->fuser = fuser;
+
+ fuser->backend = backend;
+
+ fts_flatcurve_xapian_init(backend);
+
+ backend->event = event_create(_backend->ns->user->event);
+ event_add_category(backend->event, &event_category_fts_flatcurve);
+
+ return fts_backend_flatcurve_close_mailbox(backend, error_r);
+}
+
+int
+fts_backend_flatcurve_close_mailbox(struct flatcurve_fts_backend *backend,
+ const char **error_r)
+{
+ int ret = 0;
+ if (str_len(backend->boxname) > 0) {
+ ret = fts_flatcurve_xapian_close(backend, error_r);
+
+ str_truncate(backend->boxname, 0);
+ str_truncate(backend->db_path, 0);
+ }
+
+ event_set_append_log_prefix(backend->event, FTS_FLATCURVE_DEBUG_PREFIX);
+ return ret;
+}
+
+static int fts_backend_flatcurve_refresh(struct fts_backend * _backend)
+{
+ const char *error;
+ struct flatcurve_fts_backend *backend =
+ (struct flatcurve_fts_backend *)_backend;
+
+ int ret = fts_flatcurve_xapian_refresh(backend, &error);
+ if (ret < 0)
+ e_error(backend->event, "%s", error);
+ return ret;
+}
+
+static void fts_backend_flatcurve_deinit(struct fts_backend *_backend)
+{
+ const char *error;
+ struct flatcurve_fts_backend *backend =
+ (struct flatcurve_fts_backend *)_backend;
+
+ int ret = fts_backend_flatcurve_close_mailbox(backend, &error);
+ fts_flatcurve_xapian_deinit(backend);
+ if (ret < 0)
+ e_error(backend->event, "%s", error);
+
+ event_unref(&backend->event);
+ pool_unref(&backend->pool);
+ i_free(backend);
+}
+
+int
+fts_backend_flatcurve_set_mailbox(struct flatcurve_fts_backend *backend,
+ struct mailbox *box, const char **error_r)
+{
+ const char *path;
+ struct mail_storage *storage;
+
+ if (str_len(backend->boxname) > 0 &&
+ strcasecmp(box->vname, str_c(backend->boxname)) == 0)
+ return 0;
+
+ if (fts_backend_flatcurve_close_mailbox(backend, error_r) < 0) {
+ *error_r = t_strdup_printf("Could not open mailbox: %s: %s",
+ box->vname, *error_r);
+ return -1;
+ }
+
+ if (mailbox_open(box) < 0 ||
+ mailbox_get_path_to(box, MAILBOX_LIST_PATH_TYPE_INDEX, &path) <= 0) {
+ *error_r = t_strdup_printf("Could not open mailbox: %s: %s",
+ box->vname,
+ mailbox_get_last_internal_error(box, NULL));
+ return -1;
+ }
+
+ str_append(backend->boxname, box->vname);
+ str_printfa(backend->db_path, "%s/%s/", path, FTS_FLATCURVE_LABEL);
+
+ storage = mailbox_get_storage(box);
+ backend->parsed_lock_method = storage->set->parsed_lock_method;
+
+ fts_flatcurve_xapian_set_mailbox(backend);
+ return 0;
+}
+
+static int
+fts_backend_flatcurve_get_last_uid(struct fts_backend *_backend,
+ struct mailbox *box, uint32_t *last_uid_r)
+{
+ const char *error;
+ struct flatcurve_fts_backend *backend =
+ (struct flatcurve_fts_backend *)_backend;
+
+ if (fts_backend_flatcurve_set_mailbox(backend, box, &error) < 0 ||
+ fts_flatcurve_xapian_get_last_uid(backend, last_uid_r, &error) < 0) {
+ e_error(backend->event, "%s", error);
+ return -1;
+ }
+ return 0;
+}
+
+static struct fts_backend_update_context
+*fts_backend_flatcurve_update_init(struct fts_backend *_backend)
+{
+ struct flatcurve_fts_backend *backend =
+ (struct flatcurve_fts_backend *)_backend;
+ struct flatcurve_fts_backend_update_context *ctx;
+
+ ctx = p_new(backend->pool,
+ struct flatcurve_fts_backend_update_context, 1);
+ ctx->ctx.backend = _backend;
+ ctx->backend = backend;
+ ctx->hdr_name = str_new(backend->pool, 128);
+ i_gettimeofday(&ctx->start);
+
+ return &ctx->ctx;
+}
+
+static int
+fts_backend_flatcurve_update_deinit(struct fts_backend_update_context *_ctx)
+{
+ struct flatcurve_fts_backend_update_context *ctx =
+ (struct flatcurve_fts_backend_update_context *)_ctx;
+ int diff, ret = _ctx->failed ? -1 : 0;
+ struct timeval now;
+
+ if (ret == 0) {
+ i_gettimeofday(&now);
+ diff = timeval_diff_msecs(&now, &ctx->start);
+
+ e_debug(ctx->backend->event, "Update transaction completed in "
+ "%u.%03u secs", diff/1000, diff%1000);
+ }
+
+ str_free(&ctx->hdr_name);
+ p_free(ctx->backend->pool, ctx);
+
+ return ret;
+}
+
+static void
+fts_backend_flatcurve_update_set_mailbox(struct fts_backend_update_context *_ctx,
+ struct mailbox *box)
+{
+ const char *error;
+ struct flatcurve_fts_backend_update_context *ctx =
+ (struct flatcurve_fts_backend_update_context *)_ctx;
+
+ int ret = box == NULL ?
+ fts_backend_flatcurve_close_mailbox(ctx->backend, &error) :
+ fts_backend_flatcurve_set_mailbox(ctx->backend, box, &error);
+ if (ret < 0)
+ e_error(ctx->backend->event, "%s", error);
+}
+
+static void
+fts_backend_flatcurve_update_expunge(struct fts_backend_update_context *_ctx,
+ uint32_t uid)
+{
+ const char *error;
+ struct flatcurve_fts_backend_update_context *ctx =
+ (struct flatcurve_fts_backend_update_context *)_ctx;
+
+ e_debug(event_create_passthrough(ctx->backend->event)->
+ set_name("fts_flatcurve_expunge")->
+ add_str("mailbox", str_c(ctx->backend->boxname))->
+ add_int("uid", uid)->event(),
+ "Expunge uid=%d", uid);
+
+ if (fts_flatcurve_xapian_expunge(ctx->backend, uid, &error) < 0)
+ e_error(ctx->backend->event, "%s", error);
+}
+
+static bool
+fts_backend_flatcurve_update_set_build_key(struct fts_backend_update_context *_ctx,
+ const struct fts_backend_build_key *key)
+{
+ struct flatcurve_fts_backend_update_context *ctx =
+ (struct flatcurve_fts_backend_update_context *)_ctx;
+
+ i_assert(str_len(ctx->backend->boxname) > 0);
+
+ if (_ctx->failed || ctx->skip_uid)
+ return FALSE;
+
+ bool changed = FALSE;
+ if (ctx->uid != key->uid) {
+ changed = TRUE;
+ ctx->skip_uid = FALSE;
+ ctx->uid = key->uid;
+ }
+ ctx->type = key->type;
+
+ /* Specifically init message, as there is a chance that there
+ * is no valid search info in a message so the message will
+ * not be saved to DB after processing. */
+ if (changed) {
+ const char *error;
+ int ret = fts_flatcurve_xapian_init_msg(ctx, &error);
+ if (ret < 0) {
+ e_error(ctx->backend->event, "%s", error);
+ return FALSE;
+ }
+ if (ret == 0) {
+ /* This UID has already been indexed, so skip all
+ * future update calls. */
+ ctx->skip_uid = TRUE;
+ return FALSE;
+ }
+
+ e_debug(event_create_passthrough(ctx->backend->event)->
+ set_name("fts_flatcurve_index")->
+ add_str("mailbox", str_c(ctx->backend->boxname))->
+ add_int("uid", key->uid)->event(),
+ "Indexing uid=%d", key->uid);
+ }
+
+ switch (key->type) {
+ case FTS_BACKEND_BUILD_KEY_HDR:
+ i_assert(key->hdr_name != NULL);
+ str_append(ctx->hdr_name, key->hdr_name);
+ ctx->indexed_hdr = fts_header_want_indexed(key->hdr_name);
+ break;
+ case FTS_BACKEND_BUILD_KEY_MIME_HDR:
+ case FTS_BACKEND_BUILD_KEY_BODY_PART:
+ /* noop */
+ break;
+ case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY:
+ i_unreached();
+ }
+ return TRUE;
+}
+
+static void
+fts_backend_flatcurve_update_unset_build_key(struct fts_backend_update_context *_ctx)
+{
+ struct flatcurve_fts_backend_update_context *ctx =
+ (struct flatcurve_fts_backend_update_context *)_ctx;
+
+ str_truncate(ctx->hdr_name, 0);
+}
+
+static int
+fts_backend_flatcurve_update_build_more(struct fts_backend_update_context *_ctx,
+ const unsigned char *data, size_t size)
+{
+ struct flatcurve_fts_backend_update_context *ctx =
+ (struct flatcurve_fts_backend_update_context *)_ctx;
+
+ i_assert(ctx->uid != 0);
+
+ if (_ctx->failed || ctx->skip_uid)
+ return -1;
+
+ if (size < ctx->backend->fuser->set.min_term_size)
+ return 0;
+
+ /* Xapian has a hard limit of "245 bytes", at least with the glass
+ * and chert backends. However, it is highly doubtful that people
+ * are realistically going to search with more than 10s of
+ * characters. Therefore, limit term size (via a configurable
+ * value). */
+ size = I_MIN(size, ctx->backend->fuser->set.max_term_size);
+
+ const char *error;
+ int ret;
+ switch (ctx->type) {
+ case FTS_BACKEND_BUILD_KEY_HDR:
+ case FTS_BACKEND_BUILD_KEY_MIME_HDR:
+ ret = fts_flatcurve_xapian_index_header(ctx, data, size, &error);
+ break;
+ case FTS_BACKEND_BUILD_KEY_BODY_PART:
+ ret = fts_flatcurve_xapian_index_body(ctx, data, size, &error);
+ break;
+ default:
+ i_unreached();
+ }
+
+ if (ret < 0)
+ e_error(ctx->backend->event, "%s", error);
+ return ret < 0 || _ctx->failed ? -1 : 0;
+}
+
+static const char *
+fts_backend_flatcurve_seq_range_string(ARRAY_TYPE(seq_range) *uids)
+{
+ string_t *dest = t_str_new(256);
+ imap_write_seq_range(dest, uids);
+ return str_c(dest);
+}
+
+static int
+fts_backend_flatcurve_rescan_box(struct flatcurve_fts_backend *backend,
+ struct mailbox *box, pool_t pool,
+ const char **error_r)
+{
+ bool dbexist = FALSE;
+ struct event_passthrough *e;
+ struct fts_flatcurve_xapian_query_iter *iter;
+ struct seq_range_iter iter2;
+ uint32_t low_uid = 0;
+ struct mail *mail;
+ ARRAY_TYPE(seq_range) expunged, missing, uids;
+ struct flatcurve_fts_query *query;
+ struct fts_flatcurve_xapian_query_result *result;
+ struct mail_search_args *search_args;
+ struct mail_search_context *search_ctx;
+ struct mailbox_transaction_context *trans;
+
+ /* Check for non-indexed mails. */
+ if (mailbox_sync(box, MAILBOX_SYNC_FLAG_FULL_READ) < 0)
+ return -1;
+
+ trans = mailbox_transaction_begin(box, 0, __func__);
+ search_args = mail_search_build_init();
+ mail_search_build_add_all(search_args);
+
+ p_array_init(&missing, pool, 32);
+ p_array_init(&uids, pool, 256);
+
+ int ret = 0;
+ search_ctx = mailbox_search_init(trans, search_args, NULL, 0, NULL);
+ while (mailbox_search_next(search_ctx, &mail)) {
+ seq_range_array_add(&uids, mail->uid);
+ ret = fts_flatcurve_xapian_uid_exists(backend, mail->uid, error_r);
+ if (ret < 0)
+ break;
+ if (ret == 0)
+ seq_range_array_add(&missing, mail->uid);
+ dbexist = TRUE;
+ }
+
+ if (mailbox_search_deinit(&search_ctx) < 0)
+ e_error(backend->event, "Could not deinit %s: %s",
+ box->vname, mailbox_get_last_internal_error(box, NULL));
+
+ mail_search_args_unref(&search_args);
+ if (mailbox_transaction_commit(&trans) < 0)
+ e_error(backend->event, "Could not commit %s: %s",
+ box->vname, mailbox_get_last_internal_error(box, NULL));
+
+ if (ret < 0 || !dbexist)
+ return ret;
+
+ e = event_create_passthrough(backend->event)->
+ set_name("fts_flatcurve_rescan")->
+ add_str("mailbox", box->name);
+
+ if (!array_is_empty(&missing)) {
+ /* There does not seem to be an easy way via FTS API (as of
+ * 2.3.15) to indicate what specific uids need to be indexed.
+ * Instead, delete all messages above the lowest, non-indexed
+ * UID and recreate the index the next time the mailbox
+ * is accessed. */
+ seq_range_array_iter_init(&iter2, &missing);
+ bool ret1 = seq_range_array_iter_nth(&iter2, 0, &low_uid);
+ i_assert(ret1);
+ }
+
+ query = p_new(pool, struct flatcurve_fts_query, 1);
+ query->backend = backend;
+ query->pool = pool;
+ fts_flatcurve_xapian_build_query_match_all(query);
+
+ p_array_init(&expunged, pool, 256);
+
+ iter = fts_flatcurve_xapian_query_iter_init(query);
+ while (fts_flatcurve_xapian_query_iter_next(iter, &result)) {
+ if ((low_uid > 0 && result->uid >= low_uid) ||
+ (low_uid == 0 && !seq_range_exists(&uids, result->uid))) {
+ if (fts_flatcurve_xapian_expunge(
+ backend, result->uid, error_r) < 0)
+ e_error(backend->event, "%s", *error_r);
+ else
+ seq_range_array_add(&expunged, result->uid);
+ }
+ }
+
+ ret = fts_flatcurve_xapian_query_iter_deinit(&iter, error_r);
+ fts_flatcurve_xapian_destroy_query(query);
+ if (ret < 0)
+ return -1;
+
+ if (array_is_empty(&expunged)) {
+ e_debug(e->add_str("status", "ok")->event(),
+ "Rescan: no issues found");
+ } else T_BEGIN {
+ const char *u = fts_backend_flatcurve_seq_range_string(&expunged);
+ e->add_str("expunged", u);
+
+ if (low_uid > 0) {
+ const char *u2 = fts_backend_flatcurve_seq_range_string(&missing);
+ e_debug(e->add_str("status", "missing_msgs")->
+ add_str("uids", u2)->event(),
+ "Rescan: missing messages uids=%s expunged=%s",
+ u2, u);
+ } else {
+ e_debug(e->add_str("status", "expunge_msgs")->event(),
+ "Rescan: expunge non-existent messages "
+ "expunged=%s", u);
+ }
+ } T_END;
+ return ret;
+}
+
+static int
+fts_backend_flatcurve_iterate_ns(struct fts_backend *_backend,
+ enum fts_backend_flatcurve_action act)
+{
+ const char *error;
+ struct flatcurve_fts_backend *backend =
+ (struct flatcurve_fts_backend *)_backend;
+ struct mailbox *box;
+ const struct mailbox_info *info;
+ struct mailbox_list_iterate_context *iter;
+ const enum mailbox_list_iter_flags iter_flags =
+ MAILBOX_LIST_ITER_NO_AUTO_BOXES |
+ MAILBOX_LIST_ITER_RETURN_NO_FLAGS;
+ enum mailbox_flags mbox_flags = 0;
+ pool_t pool = NULL;
+
+ bool failed = FALSE;
+ iter = mailbox_list_iter_init(_backend->ns->list, "*", iter_flags);
+ while ((info = mailbox_list_iter_next(iter)) != NULL) {
+ box = mailbox_alloc(backend->backend.ns->list, info->vname,
+ mbox_flags);
+
+ if (fts_backend_flatcurve_set_mailbox(
+ backend, box, &error) < 0) {
+ e_error(backend->event, "%s", error);
+ failed = TRUE;
+ continue;
+ }
+
+ switch (act) {
+ case FTS_BACKEND_FLATCURVE_ACTION_OPTIMIZE:
+ if (fts_flatcurve_xapian_optimize_box(
+ backend, &error) < 0) {
+ e_error(backend->event, "%s", error);
+ failed = TRUE;
+ }
+ break;
+ case FTS_BACKEND_FLATCURVE_ACTION_RESCAN:
+ if (pool == NULL)
+ pool = pool_alloconly_create(
+ FTS_FLATCURVE_LABEL " rescan pool",
+ 4096);
+ if (fts_backend_flatcurve_rescan_box(
+ backend, box, pool, &error) < 0) {
+ e_error(backend->event, "%s", error);
+ failed = TRUE;
+ }
+ p_clear(pool);
+ break;
+ }
+
+ mailbox_free(&box);
+ }
+ if (mailbox_list_iter_deinit(&iter) < 0)
+ e_error(backend->event, "%s",
+ mailbox_list_get_last_internal_error(
+ _backend->ns->list, NULL));
+
+ pool_unref(&pool);
+
+ return failed ? -1 : 0;
+}
+
+static int fts_backend_flatcurve_optimize(struct fts_backend *backend)
+{
+ return fts_backend_flatcurve_iterate_ns(backend,
+ FTS_BACKEND_FLATCURVE_ACTION_OPTIMIZE);
+}
+
+static int fts_backend_flatcurve_rescan(struct fts_backend *backend)
+{
+ return fts_backend_flatcurve_iterate_ns(backend,
+ FTS_BACKEND_FLATCURVE_ACTION_RESCAN);
+}
+
+static int
+fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
+ struct mailbox *const boxes[],
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_multi_result *result)
+{
+ const char *error;
+ struct flatcurve_fts_backend *backend =
+ (struct flatcurve_fts_backend *)_backend;
+ ARRAY(struct fts_result) box_results;
+ struct flatcurve_fts_result *fresult;
+ unsigned int i;
+ struct flatcurve_fts_query *query;
+ struct fts_result *r;
+ int ret = 0;
+
+ /* Create query */
+ query = p_new(result->pool, struct flatcurve_fts_query, 1);
+ query->args = args;
+ query->backend = backend;
+ query->flags = flags;
+ query->pool = result->pool;
+ if (fts_flatcurve_xapian_build_query(query, &error) < 0) {
+ fts_flatcurve_xapian_destroy_query(query);
+ e_error(backend->event, "%s", error);
+ return -1;
+ }
+
+ p_array_init(&box_results, result->pool, 8);
+ for (i = 0; boxes[i] != NULL; i++) {
+ r = array_append_space(&box_results);
+ r->box = boxes[i];
+
+ fresult = p_new(result->pool, struct flatcurve_fts_result, 1);
+ p_array_init(&fresult->scores, result->pool, 32);
+ p_array_init(&fresult->uids, result->pool, 32);
+
+ if (fts_backend_flatcurve_set_mailbox(backend, r->box, &error) < 0) {
+ ret = -1;
+ break;
+ }
+
+ if (fts_flatcurve_xapian_run_query(query, fresult, &error) < 0) {
+ ret = -1;
+ break;
+ }
+
+ if ((query->maybe) ||
+ ((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) != 0))
+ r->maybe_uids = fresult->uids;
+ else
+ r->definite_uids = fresult->uids;
+ r->scores = fresult->scores;
+
+ /* This was an empty query - skip output of debug info. */
+ if (query->qtext == NULL)
+ continue;
+
+ T_BEGIN {
+ const char *u = fts_backend_flatcurve_seq_range_string(&fresult->uids);
+ e_debug(event_create_passthrough(backend->event)->
+ set_name("fts_flatcurve_query")->
+ add_int("count", array_count(&fresult->uids))->
+ add_str("mailbox", r->box->vname)->
+ add_str("maybe", query->maybe ? "yes" : "no")->
+ add_str("query", str_c(query->qtext))->
+ add_str("uids", u)->event(), "Query (%s) "
+ "%smatches=%d uids=%s", str_c(query->qtext),
+ query->maybe ? "maybe_" : "",
+ array_count(&fresult->uids), u);
+ } T_END;
+ }
+
+ if (ret == 0) {
+ array_append_zero(&box_results);
+ result->box_results = array_idx_modifiable(&box_results, 0);
+ } else {
+ e_error(backend->event, "%s", error);
+ }
+
+ fts_flatcurve_xapian_destroy_query(query);
+ return ret;
+}
+
+static int
+fts_backend_flatcurve_lookup(struct fts_backend *_backend, struct mailbox *box,
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_result *result)
+{
+ struct mailbox *boxes[2];
+ struct fts_multi_result multi_result;
+ const struct fts_result *br;
+ int ret;
+
+ boxes[0] = box;
+ boxes[1] = NULL;
+
+ i_zero(&multi_result);
+ multi_result.pool = pool_alloconly_create(FTS_FLATCURVE_LABEL
+ " results pool", 4096);
+ ret = fts_backend_flatcurve_lookup_multi(_backend, boxes, args,
+ flags, &multi_result);
+
+ if ((ret == 0) &&
+ (multi_result.box_results != NULL) &&
+ (multi_result.box_results[0].box != NULL)) {
+ br = &multi_result.box_results[0];
+ result->box = br->box;
+ if (array_is_created(&br->definite_uids))
+ array_append_array(&result->definite_uids,
+ &br->definite_uids);
+ if (array_is_created(&br->maybe_uids))
+ array_append_array(&result->maybe_uids,
+ &br->maybe_uids);
+ array_append_array(&result->scores, &br->scores);
+ result->scores_sorted = TRUE;
+ }
+ pool_unref(&multi_result.pool);
+
+ return ret;
+}
+
+/* Returns: 0 if FTS directory doesn't exist, 1 on deletion, -1 on error */
+int fts_backend_flatcurve_delete_dir(const char *path, const char **error_r)
+{
+ struct stat st;
+ enum unlink_directory_flags unlink_flags = UNLINK_DIRECTORY_FLAG_RMDIR;
+
+ if (stat(path, &st) < 0) {
+ if (errno == ENOENT)
+ return 0;
+ else {
+ *error_r = t_strdup_printf("Deleting fts data failed: "
+ "stat(%s) failed: %m", path);
+ return -1;
+ }
+ }
+
+ if (S_ISDIR(st.st_mode)) {
+ if (unlink_directory(path, unlink_flags, error_r) < 0) {
+ *error_r = t_strdup_printf("Deleting fts data failed: "
+ "unlink_directory(%s) failed: %s", path, *error_r);
+ return -1;
+ }
+ } else if (unlink(path) < 0) {
+ *error_r = t_strdup_printf("Deleting fts data failed: "
+ "unlink(%s) failed: %m", path);
+ return -1;
+ }
+
+ return 1;
+}
+
+
+struct fts_backend fts_backend_flatcurve = {
+ .name = "flatcurve",
+ .flags = FTS_BACKEND_FLAG_TOKENIZED_INPUT,
+ .v = {
+ .alloc = fts_backend_flatcurve_alloc,
+ .init = fts_backend_flatcurve_init,
+ .deinit = fts_backend_flatcurve_deinit,
+ .get_last_uid = fts_backend_flatcurve_get_last_uid,
+ .update_init = fts_backend_flatcurve_update_init,
+ .update_deinit = fts_backend_flatcurve_update_deinit,
+ .update_set_mailbox = fts_backend_flatcurve_update_set_mailbox,
+ .update_expunge = fts_backend_flatcurve_update_expunge,
+ .update_set_build_key = fts_backend_flatcurve_update_set_build_key,
+ .update_unset_build_key = fts_backend_flatcurve_update_unset_build_key,
+ .update_build_more = fts_backend_flatcurve_update_build_more,
+ .refresh = fts_backend_flatcurve_refresh,
+ .rescan = fts_backend_flatcurve_rescan,
+ .optimize = fts_backend_flatcurve_optimize,
+ .can_lookup = fts_backend_default_can_lookup,
+ .lookup = fts_backend_flatcurve_lookup,
+ .lookup_multi = fts_backend_flatcurve_lookup_multi,
+ .lookup_done = NULL,
+ }
+};