]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
Rewrote thread indexing code. It's a lot simpler and takes less disk space.
authorTimo Sirainen <tss@iki.fi>
Mon, 1 Sep 2008 12:17:00 +0000 (15:17 +0300)
committerTimo Sirainen <tss@iki.fi>
Mon, 1 Sep 2008 12:17:00 +0000 (15:17 +0300)
We no longer try to keep a hash table and the entire thread tree stored on
disk. Instead we keep a simple Message-ID string (actually just "uid, ref#"
pointer) -> unique index number mapping on disk, read it to memory and use
it to build the thread tree. After the initial build the thread tree is
still updated incrementally.

--HG--
branch : HEAD

18 files changed:
.hgignore
src/imap/cmd-thread.c
src/lib-index/Makefile.am
src/lib-index/mail-hash.c [deleted file]
src/lib-index/mail-hash.h [deleted file]
src/lib-index/mail-index-private.h
src/lib-index/mail-index-strmap.c [new file with mode: 0644]
src/lib-index/mail-index-strmap.h [new file with mode: 0644]
src/lib-index/mail-index.c
src/lib-storage/index/Makefile.am
src/lib-storage/index/index-search.c
src/lib-storage/index/index-thread-finish.c
src/lib-storage/index/index-thread-links.c
src/lib-storage/index/index-thread-private.h
src/lib-storage/index/index-thread.c
src/lib-storage/mail-thread.h
src/util/Makefile.am
src/util/threadview.c [new file with mode: 0644]

index 83732ac61306fa015958680feb497eb98ccb5ab3..3dd73572e77e3c00996a89bfdbed0258504fd786 100644 (file)
--- a/.hgignore
+++ b/.hgignore
@@ -78,5 +78,6 @@ src/util/listview
 src/util/logview
 src/util/maildirlock
 src/util/rawlog
+src/util/threadview
 src/plugins/quota/rquota_xdr.c
 src/plugins/quota/rquota.h
index 6e57c20f5e87dac139cfec8373315bcf1dae299d..c48bb283781ffe1ace15c9cb54264597692ecc3f 100644 (file)
@@ -80,27 +80,18 @@ static int imap_thread(struct client_command_context *cmd,
 {
        struct mail_thread_context *ctx;
        string_t *str;
-       bool reset = FALSE;
        int ret;
 
        i_assert(thread_type == MAIL_THREAD_REFERENCES ||
                 thread_type == MAIL_THREAD_REFERENCES2);
 
        str = str_new(default_pool, 1024);
-       for (;;) {
-               ret = mail_thread_init(cmd->client->mailbox, reset,
-                                      search_args, &ctx);
-               if (ret == 0) {
-                       ret = imap_thread_write_reply(ctx, str, thread_type,
-                                                     !cmd->uid);
-                       mail_thread_deinit(&ctx);
-               }
-
-               if (ret == 0 || reset)
-                       break;
-               /* try again with in-memory hash */
-               reset = TRUE;
-               str_truncate(str, 0);
+       ret = mail_thread_init(cmd->client->mailbox,
+                              search_args, &ctx);
+       if (ret == 0) {
+               ret = imap_thread_write_reply(ctx, str, thread_type,
+                                             !cmd->uid);
+               mail_thread_deinit(&ctx);
        }
 
        if (ret == 0) {
index e6c68f08104514018683e7ebdf3e1c5e0af792bc..1384a1cf2afb00539363493c1b4cd5eb60989e40 100644 (file)
@@ -12,7 +12,6 @@ libindex_a_SOURCES = \
        mail-cache-lookup.c \
        mail-cache-transaction.c \
        mail-cache-sync-update.c \
-        mail-hash.c \
         mail-index.c \
         mail-index-dummy-view.c \
         mail-index-fsck.c \
@@ -21,6 +20,7 @@ libindex_a_SOURCES = \
         mail-index-modseq.c \
         mail-index-transaction.c \
         mail-index-transaction-view.c \
+        mail-index-strmap.c \
         mail-index-sync.c \
         mail-index-sync-ext.c \
         mail-index-sync-keywords.c \
@@ -38,10 +38,10 @@ libindex_a_SOURCES = \
 headers = \
        mail-cache.h \
        mail-cache-private.h \
-        mail-hash.h \
        mail-index.h \
         mail-index-modseq.h \
        mail-index-private.h \
+        mail-index-strmap.h \
        mail-index-sync-private.h \
        mail-index-transaction-private.h \
        mail-index-view-private.h \
diff --git a/src/lib-index/mail-hash.c b/src/lib-index/mail-hash.c
deleted file mode 100644 (file)
index 0f50701..0000000
+++ /dev/null
@@ -1,1136 +0,0 @@
-/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
-
-#include "lib.h"
-#include "ioloop.h"
-#include "array.h"
-#include "primes.h"
-#include "nfs-workarounds.h"
-#include "file-dotlock.h"
-#include "file-set-size.h"
-#include "read-full.h"
-#include "write-full.h"
-#include "mmap-util.h"
-#include "nfs-workarounds.h"
-#include "mail-index-private.h"
-#include "mail-hash.h"
-
-#include <stdio.h>
-#include <stddef.h>
-#include <utime.h>
-#include <sys/stat.h>
-
-/* How large to create the file initially */
-#define FILE_SIZE_INIT_PERCENTAGE 120
-/* How much larger to grow the file when it needs to be done */
-#define MAIL_HASH_GROW_PERCENTAGE 20
-/* Minimum hash size to use */
-#define MAIL_HASH_MIN_SIZE 109
-
-#define MAIL_HASH_SHRINK_PRESSURE 0.3
-#define MAIL_HASH_GROW_PRESSURE 2
-
-#define MAIL_HASH_TIMEOUT_SECS 60
-
-struct mail_hash {
-       struct mail_index *index;
-
-       hash_callback_t *key_hash_cb;
-       mail_hash_ctx_cmp_callback_t *key_compare_cb;
-       mail_hash_remap_callback_t *remap_callback;
-       hash_callback_t *rec_hash_cb;
-       void *cb_context;
-       unsigned int transaction_count;
-
-       char *filepath;
-       char *suffix;
-       int fd;
-       unsigned int record_size;
-
-       dev_t dev;
-       ino_t ino;
-
-       void *mmap_base;
-       size_t mmap_size;
-
-       time_t mtime, mapped_mtime;
-       size_t change_offset_start, change_offset_end;
-
-       int lock_type;
-       struct file_lock *file_lock;
-       struct dotlock *dotlock;
-       struct dotlock_settings dotlock_settings;
-
-       const struct mail_hash_header *hdr;
-
-       unsigned int in_memory:1;
-       unsigned int recreate:1;
-       unsigned int recreated:1;
-};
-
-#define HASH_RECORD_IDX(trans, idx) \
-       PTR_OFFSET((trans)->records_base, (idx) * (trans)->hdr.record_size)
-
-struct mail_hash_transaction {
-       struct mail_hash *hash;
-
-       struct mail_hash_header hdr;
-       /* hash size is [hdr.hash_size] */
-       uint32_t *hash_base;
-       /* record [0] is always unused */
-       void *records_base;
-       /* number of records in records_base.
-          base_count + inserts.count == hdr.record_count */
-       unsigned int base_count;
-
-       /* bit array of modified data. each bit represents 1024 bytes of the
-          hash file. used only for data read into memory from hash (not
-          for mmaped data) */
-       ARRAY_TYPE(uint32_t) updates;
-       /* Records inserted within this transaction */
-       ARRAY_TYPE(mail_hash_record) inserts;
-       unsigned int next_grow_hashed_count;
-
-       uint32_t *hash_buf;
-       uint32_t records_base_1; /* used as records_base if base_count=1 */
-
-       unsigned int failed:1;
-       unsigned int mapped:1;
-};
-
-struct mail_hash_iterate_context {
-       struct mail_hash_transaction *trans;
-       uint32_t next_idx;
-       unsigned int iter_count;
-};
-
-const struct dotlock_settings default_dotlock_settings = {
-       MEMBER(temp_prefix) NULL,
-       MEMBER(lock_suffix) NULL,
-
-       MEMBER(timeout) 10,
-       MEMBER(stale_timeout) 30
-};
-
-static void mail_hash_set_syscall_error(struct mail_hash *hash,
-                                       const char *function)
-{
-       if (ENOSPACE(errno)) {
-               hash->index->nodiskspace = TRUE;
-               return;
-       }
-
-       mail_index_set_error(hash->index,
-                            "%s failed with index hash file %s: %m",
-                            function, hash->filepath);
-}
-
-void mail_hash_set_corrupted(struct mail_hash *hash, const char *error)
-{
-       mail_index_set_error(hash->index, "Corrupted index hash file %s: %s",
-                            hash->filepath, error);
-       if (unlink(hash->filepath) < 0 && errno != ENOENT)
-               mail_hash_set_syscall_error(hash, "unlink()");
-}
-
-static inline struct mail_hash_record *
-mail_hash_idx(struct mail_hash_transaction *trans, uint32_t idx)
-{
-       if (idx < trans->base_count)
-               return HASH_RECORD_IDX(trans, idx);
-
-       i_assert(idx < trans->hdr.record_count);
-       return array_idx_modifiable(&trans->inserts, idx - trans->base_count);
-}
-
-static void mail_hash_file_close(struct mail_hash *hash)
-{
-       i_assert(hash->transaction_count == 0);
-
-       if (hash->file_lock != NULL)
-               file_lock_free(&hash->file_lock);
-
-       if (hash->mmap_base != NULL) {
-               if (munmap(hash->mmap_base, hash->mmap_size) < 0)
-                       mail_hash_set_syscall_error(hash, "munmap()");
-               hash->mmap_base = NULL;
-               hash->mmap_size = 0;
-       }
-       hash->ino = 0;
-       hash->mapped_mtime = 0;
-
-       if (hash->fd != -1) {
-               if (close(hash->fd) < 0)
-                       mail_hash_set_syscall_error(hash, "close()");
-               hash->fd = -1;
-       }
-
-       hash->hdr = NULL;
-       hash->recreate = FALSE;
-       hash->recreated = FALSE;
-}
-
-struct mail_hash *
-mail_hash_alloc(struct mail_index *index, const char *suffix,
-               unsigned int record_size,
-               hash_callback_t *key_hash_cb,
-               hash_callback_t *rec_hash_cb,
-               mail_hash_ctx_cmp_callback_t *key_compare_cb,
-               mail_hash_remap_callback_t *remap_callback,
-               void *context)
-{
-       struct mail_hash *hash;
-
-       i_assert(record_size >= sizeof(struct mail_hash_record));
-
-       hash = i_new(struct mail_hash, 1);
-       hash->index = index;
-       hash->in_memory = MAIL_INDEX_IS_IN_MEMORY(index) || suffix == NULL;
-       hash->filepath = hash->in_memory ? i_strdup("(in-memory hash)") :
-               i_strconcat(index->filepath, suffix, NULL);
-       i_assert(hash->filepath != NULL);
-
-       hash->suffix = i_strdup(suffix);
-       hash->record_size = record_size;
-       hash->fd = -1;
-       hash->lock_type = F_UNLCK;
-       hash->dotlock_settings = default_dotlock_settings;
-       hash->dotlock_settings.use_excl_lock = index->use_excl_dotlocks;
-       hash->dotlock_settings.nfs_flush = index->nfs_flush;
-
-       hash->key_hash_cb = key_hash_cb;
-       hash->rec_hash_cb = rec_hash_cb;
-       hash->key_compare_cb = key_compare_cb;
-       hash->remap_callback = remap_callback,
-       hash->cb_context = context;
-       return hash;
-}
-
-void mail_hash_free(struct mail_hash **_hash)
-{
-       struct mail_hash *hash = *_hash;
-
-       *_hash = NULL;
-
-       mail_hash_file_close(hash);
-       i_free(hash->filepath);
-       i_free(hash->suffix);
-       i_free(hash);
-}
-
-static void
-mail_hash_header_init(struct mail_hash *hash, unsigned int hash_size,
-                     struct mail_hash_header *hdr)
-{
-       memset(hdr, 0, sizeof(*hdr));
-       hdr->version = MAIL_HASH_VERSION;
-       hdr->base_header_size = sizeof(*hdr);
-       hdr->header_size = hdr->base_header_size;
-       hdr->record_size = hash->record_size;
-       /* note that since the index may not have been synced yet, the
-          uid_validity may be 0 */
-       hdr->uid_validity = hash->index->map->hdr.uid_validity;
-
-       hdr->hash_size = I_MAX(primes_closest(hash_size), MAIL_HASH_MIN_SIZE);
-       hdr->record_count = 1; /* [0] always exists */
-       hdr->reset_counter = hash->hdr == NULL ? ioloop_time :
-               hash->hdr->reset_counter + 1;
-}
-
-static bool mail_hash_check_header(struct mail_hash *hash,
-                                  const struct mail_hash_header *hdr)
-{
-       uoff_t file_size;
-
-       if (hdr->version != MAIL_HASH_VERSION ||
-           (hdr->last_uid != 0 &&
-            hdr->uid_validity != hash->index->map->hdr.uid_validity) ||
-           (hdr->corrupted && hash->change_offset_end == 0)) {
-               /* silent rebuild */
-               if (unlink(hash->filepath) < 0 && errno != ENOENT)
-                       mail_hash_set_syscall_error(hash, "unlink()");
-               return FALSE;
-       }
-
-       if (hdr->record_size != hash->record_size) {
-               mail_hash_set_corrupted(hash, "record_size mismatch");
-               return FALSE;
-       }
-       if (hdr->base_header_size != sizeof(*hdr)) {
-               mail_hash_set_corrupted(hash, "base_header_size mismatch");
-               return FALSE;
-       }
-       if (hdr->header_size < hdr->base_header_size) {
-               mail_hash_set_corrupted(hash, "Invalid header_size");
-               return FALSE;
-       }
-       if (hdr->record_count == 0) {
-               mail_hash_set_corrupted(hash, "Invalid record_count");
-               return FALSE;
-       }
-       if (hdr->hashed_count > hdr->record_count) {
-               mail_hash_set_corrupted(hash, "Invalid hashed_count");
-               return FALSE;
-       }
-       if (hdr->message_count > hdr->record_count - 1) {
-               mail_hash_set_corrupted(hash, "Invalid message_count");
-               return FALSE;
-       }
-       if (hdr->last_uid < hdr->message_count) {
-               mail_hash_set_corrupted(hash, "Invalid last_uid");
-               return FALSE;
-       }
-       if (hdr->uid_validity == 0 && hdr->message_count > 0) {
-               mail_hash_set_corrupted(hash, "Zero uidvalidity");
-               return FALSE;
-       }
-
-       if (hdr->hash_size < primes_closest(1)) {
-               mail_hash_set_corrupted(hash, "Invalid hash_size");
-               return FALSE;
-       }
-
-       file_size = hdr->header_size +
-               hdr->hash_size * sizeof(uint32_t) +
-               hdr->record_count * hdr->record_size;
-       if (hash->mmap_size < file_size) {
-               mail_hash_set_corrupted(hash, "File too small");
-               return FALSE;
-       }
-       return TRUE;
-}
-
-static int mail_hash_file_fstat(struct mail_hash *hash, struct stat *st_r)
-{
-       if (fstat(hash->fd, st_r) < 0) {
-               mail_hash_set_syscall_error(hash, "fstat()");
-               return -1;
-       }
-       hash->dev = st_r->st_dev;
-       hash->ino = st_r->st_ino;
-       return 0;
-}
-
-static int mail_hash_file_map(struct mail_hash *hash, bool full)
-{
-       struct stat st;
-
-       i_assert(hash->transaction_count == 0);
-       i_assert(hash->lock_type != F_UNLCK);
-
-       if (mail_hash_file_fstat(hash, &st) < 0)
-               return -1;
-
-       if (st.st_size < (off_t)sizeof(*hash->hdr)) {
-               mail_hash_set_corrupted(hash, "File too small");
-               return 0;
-       }
-
-       if (!hash->index->mmap_disable) {
-               if (hash->mmap_base != NULL) {
-                       if (munmap(hash->mmap_base, hash->mmap_size) < 0)
-                               mail_hash_set_syscall_error(hash, "munmap()");
-               }
-               hash->mmap_size = st.st_size;
-               hash->mmap_base = mmap(NULL, hash->mmap_size,
-                                      PROT_READ | PROT_WRITE,
-                                      MAP_PRIVATE, hash->fd, 0);
-               if (hash->mmap_base == MAP_FAILED) {
-                       hash->mmap_size = 0;
-                       hash->mmap_base = NULL;
-                       mail_hash_set_syscall_error(hash, "mmap()");
-                       return -1;
-               }
-               hash->mapped_mtime = st.st_mtime;
-       } else {
-               //FIXME
-       }
-       hash->mtime = st.st_mtime;
-       hash->hdr = hash->mmap_base;
-       return 1;
-}
-
-static int mail_hash_file_map_header(struct mail_hash *hash)
-{
-       int ret;
-
-       if (hash->fd == -1) {
-               hash->hdr = NULL;
-               return 1;
-       }
-
-       if ((ret = mail_hash_file_map(hash, FALSE)) <= 0)
-               return ret;
-
-       return mail_hash_check_header(hash, hash->hdr) ? 1 : 0;
-}
-
-static int mail_hash_open_fd(struct mail_hash *hash)
-{
-       hash->fd = nfs_safe_open(hash->filepath, O_RDWR);
-       if (hash->fd == -1) {
-               if (errno == ENOENT)
-                       return 0;
-               mail_hash_set_syscall_error(hash, "open()");
-               return -1;
-       }
-       return 1;
-}
-
-static int mail_hash_reopen_if_needed(struct mail_hash *hash)
-{
-       struct stat st;
-
-       if (hash->fd == -1)
-               return mail_hash_open_fd(hash);
-
-       if (hash->index->nfs_flush)
-               nfs_flush_file_handle_cache(hash->filepath);
-
-       if (hash->ino == 0) {
-               if (mail_hash_file_fstat(hash, &st) < 0)
-                       return -1;
-       }
-
-       if (nfs_safe_stat(hash->filepath, &st) < 0) {
-               if (errno != ENOENT) {
-                       mail_hash_set_syscall_error(hash, "stat()");
-                       return -1;
-               }
-       } else if (st.st_ino == hash->ino && CMP_DEV_T(st.st_dev, hash->dev)) {
-               /* the file looks the same */
-               if (!hash->index->nfs_flush || fstat(hash->fd, &st) == 0) {
-                       /* it is the same */
-                       return 0;
-               }
-               if (errno != ESTALE) {
-                       mail_hash_set_syscall_error(hash, "fstat()");
-                       return -1;
-               }
-               /* ESTALE - another file renamed over */
-       }
-       mail_hash_file_close(hash);
-       return mail_hash_open_fd(hash);
-}
-
-static int
-mail_hash_file_lock(struct mail_hash *hash, int lock_type, bool try_lock)
-{
-       enum dotlock_create_flags flags;
-       unsigned int timeout;
-       int ret;
-
-       i_assert(hash->fd != -1);
-
-       if (hash->index->lock_method != FILE_LOCK_METHOD_DOTLOCK) {
-               i_assert(hash->file_lock == NULL);
-
-               timeout = !try_lock ? MAIL_HASH_TIMEOUT_SECS : 0;
-               ret = file_wait_lock(hash->fd, hash->filepath, lock_type,
-                                    hash->index->lock_method,
-                                    timeout, &hash->file_lock);
-               if (ret < 0 || (ret == 0 && !try_lock)) {
-                       mail_hash_set_syscall_error(hash,
-                                                   "file_wait_lock()");
-               }
-       } else {
-               i_assert(hash->dotlock == NULL);
-
-               flags = try_lock ? DOTLOCK_CREATE_FLAG_NONBLOCK : 0;
-               ret = file_dotlock_create(&hash->dotlock_settings,
-                                         hash->filepath, flags,
-                                         &hash->dotlock);
-               if (ret < 0 || (ret == 0 && !try_lock)) {
-                       mail_hash_set_syscall_error(hash,
-                                                   "file_dotlock_create()");
-               }
-       }
-       return ret;
-}
-
-int mail_hash_lock_shared(struct mail_hash *hash)
-{
-       int ret;
-
-       i_assert(hash->lock_type == F_UNLCK);
-
-       if (hash->in_memory) {
-               if (hash->hdr == NULL)
-                       return 0;
-               hash->lock_type = F_RDLCK;
-               return 1;
-       }
-
-       if (hash->fd == -1) {
-               if ((ret = mail_hash_open_fd(hash)) <= 0)
-                       return ret;
-       }
-
-       do {
-               if ((ret = mail_hash_file_lock(hash, F_RDLCK, FALSE)) <= 0)
-                       return -1;
-       } while ((ret = mail_hash_reopen_if_needed(hash)) > 0);
-       if (ret < 0 || hash->fd == -1)
-               return ret;
-
-       hash->lock_type = F_RDLCK;
-       mail_index_flush_read_cache(hash->index, hash->filepath,
-                                   hash->fd, TRUE);
-       if ((ret = mail_hash_file_map_header(hash)) <= 0) {
-               mail_hash_unlock(hash);
-               return ret;
-       }
-       return 1;
-}
-
-static int
-mail_hash_lock_exclusive_fd(struct mail_hash *hash,
-                           enum mail_hash_lock_flags flags, bool *created_r)
-{
-       bool exists = TRUE;
-       int ret;
-
-       i_assert(hash->file_lock == NULL);
-       i_assert(hash->dotlock == NULL);
-
-       *created_r = FALSE;
-
-       if (hash->index->lock_method == FILE_LOCK_METHOD_DOTLOCK) {
-               /* use dotlocking */
-       } else if (hash->fd == -1 && (ret = mail_hash_open_fd(hash)) <= 0) {
-               if (ret < 0 ||
-                   (flags & MAIL_HASH_LOCK_FLAG_CREATE_MISSING) == 0)
-                       return ret;
-               /* the file doesn't exist - we need to use dotlocking */
-               exists = FALSE;
-       } else {
-               /* first try to lock the file descriptor */
-               ret = mail_hash_file_lock(hash, F_WRLCK, TRUE);
-               if (ret != 0) {
-                       /* success / error */
-                       return ret;
-               }
-               if ((flags & MAIL_HASH_LOCK_FLAG_TRY) != 0)
-                       return 0;
-
-               /* already locked. if it's only read-locked, we can
-                  overwrite the file. first wait for a shared lock. */
-               if (mail_hash_lock_shared(hash) <= 0)
-                       return -1;
-               /* try once again if we can upgrade our shared lock to
-                  an exclusive lock */
-               ret = file_lock_try_update(hash->file_lock, F_WRLCK);
-               if (ret != 0)
-                       return ret;
-               /* still no luck - fallback to dotlocking */
-       }
-       if (file_dotlock_create(&hash->dotlock_settings, hash->filepath,
-                               0, &hash->dotlock) <= 0) {
-               mail_hash_set_syscall_error(hash, "file_dotlock_create()");
-               return -1;
-       }
-       if (!exists) {
-               /* file didn't exist - see if someone just created it */
-               i_assert(hash->fd == -1);
-               ret = mail_hash_open_fd(hash);
-               if (ret != 0) {
-                       (void)file_dotlock_delete(&hash->dotlock);
-                       if (ret < 0)
-                               return -1;
-
-                       /* the file was created - we need to have it locked,
-                          so retry this operation */
-                       return mail_hash_lock_exclusive_fd(hash, flags,
-                                                          created_r);
-               }
-               *created_r = TRUE;
-       }
-       /* other sessions are reading the file, we must not overwrite */
-       hash->recreate = TRUE;
-       return 1;
-}
-
-static int mail_hash_lock_excl_full(struct mail_hash *hash,
-                                   enum mail_hash_lock_flags flags,
-                                   bool *created_r)
-{
-       bool create_missing = (flags & MAIL_HASH_LOCK_FLAG_CREATE_MISSING) != 0;
-       int ret;
-
-       i_assert(hash->lock_type == F_UNLCK);
-
-       if (hash->in_memory) {
-               if (hash->hdr == NULL && !create_missing)
-                       return 0;
-               hash->lock_type = F_WRLCK;
-               return 1;
-       }
-
-       if ((ret = mail_hash_lock_exclusive_fd(hash, flags, created_r)) <= 0) {
-               mail_hash_unlock(hash);
-               return ret;
-       }
-       hash->lock_type = F_WRLCK;
-
-       mail_index_flush_read_cache(hash->index, hash->filepath,
-                                   hash->fd, TRUE);
-       if ((ret = mail_hash_file_map_header(hash)) <= 0) {
-               mail_hash_unlock(hash);
-               if (ret == 0 && create_missing) {
-                       /* the broken file was unlinked - try again */
-                       mail_hash_file_close(hash);
-                       return mail_hash_lock_excl_full(hash, flags, created_r);
-               }
-               return ret;
-       }
-       return 1;
-}
-
-int mail_hash_lock_exclusive(struct mail_hash *hash,
-                            enum mail_hash_lock_flags flags)
-{
-       bool created;
-
-       return mail_hash_lock_excl_full(hash, flags, &created);
-}
-
-void mail_hash_unlock(struct mail_hash *hash)
-{
-       if (hash->recreated)
-               mail_hash_file_close(hash);
-
-       if (hash->file_lock != NULL)
-               file_unlock(&hash->file_lock);
-       if (hash->dotlock != NULL)
-               (void)file_dotlock_delete(&hash->dotlock);
-       hash->lock_type = F_UNLCK;
-}
-
-int mail_hash_get_lock_type(struct mail_hash *hash)
-{
-       return hash->lock_type;
-}
-
-int mail_hash_create_excl_locked(struct mail_hash *hash)
-{
-       bool created;
-
-       if (mail_hash_lock_excl_full(hash, MAIL_HASH_LOCK_FLAG_CREATE_MISSING,
-                                    &created) <= 0)
-               return -1;
-
-       if (!created) {
-               mail_hash_unlock(hash);
-               return 0;
-       }
-       return 1;
-}
-
-static void mail_hash_resize(struct mail_hash_transaction *trans)
-{
-       struct mail_hash_record *rec;
-       unsigned int idx, new_size, hash_idx, hash_key;
-
-       new_size = I_MAX(primes_closest(trans->hdr.hashed_count),
-                        MAIL_HASH_MIN_SIZE);
-       i_assert(new_size != trans->hdr.hash_size);
-       trans->hdr.hash_size = new_size;
-
-       i_free(trans->hash_buf);
-       trans->hash_buf = i_new(uint32_t, trans->hdr.hash_size);
-       trans->hash_base = trans->hash_buf;
-
-       for (idx = 1; idx < trans->hdr.record_count; idx++) {
-               rec = mail_hash_idx(trans, idx);
-               if (MAIL_HASH_RECORD_IS_DELETED(rec))
-                       continue;
-
-               hash_key = trans->hash->rec_hash_cb(rec);
-               if (hash_key == 0)
-                       continue;
-
-               /* found a hashed record, move it to its new position */
-               hash_idx = hash_key % trans->hdr.hash_size;
-               rec->next_idx = trans->hash_buf[hash_idx];
-               trans->hash_buf[hash_idx] = idx;
-       }
-
-       trans->next_grow_hashed_count =
-               trans->hdr.hash_size * MAIL_HASH_GROW_PRESSURE;
-       i_assert(trans->hdr.hashed_count < trans->next_grow_hashed_count);
-}
-
-struct mail_hash_transaction *
-mail_hash_transaction_begin(struct mail_hash *hash, unsigned int min_hash_size)
-{
-       struct mail_hash_transaction *trans;
-
-       i_assert(hash->lock_type != F_UNLCK);
-
-       trans = i_new(struct mail_hash_transaction, 1);
-       trans->hash = hash;
-       if (hash->hdr != NULL)
-               trans->hdr = *hash->hdr;
-       else {
-               mail_hash_header_init(hash, min_hash_size, &trans->hdr);
-               trans->mapped = TRUE;
-       }
-       trans->base_count = trans->hdr.record_count;
-       if (trans->base_count <= 1) {
-               /* empty hash */
-               trans->hash_buf = i_new(uint32_t, trans->hdr.hash_size);
-               trans->hash_base = trans->hash_buf;
-               trans->records_base = &trans->records_base_1;
-       } else {
-               trans->hash_base =
-                       PTR_OFFSET(hash->mmap_base, hash->hdr->header_size);
-               trans->records_base = &trans->hash_base[hash->hdr->hash_size];
-       }
-
-       trans->next_grow_hashed_count =
-               trans->hdr.hash_size * MAIL_HASH_GROW_PRESSURE;
-       hash->transaction_count++;
-       return trans;
-}
-
-int mail_hash_transaction_write(struct mail_hash_transaction *trans)
-{
-       struct mail_hash *hash = trans->hash;
-       const struct mail_hash_record *inserts;
-       unsigned int size, count, existing_size;
-       const char *temp_path = NULL;
-       uoff_t offset;
-       float nodes_per_list;
-       int fd = hash->fd;
-
-       i_assert(hash->lock_type == F_WRLCK);
-       i_assert(trans->hdr.hashed_count <= trans->hdr.record_count);
-
-       if (trans->failed)
-               return -1;
-       if (!array_is_created(&trans->inserts) &&
-           !array_is_created(&trans->updates)) {
-               /* nothing changed */
-               return 0;
-       }
-
-       /* see if hash needs resizing */
-       nodes_per_list = (float)trans->hdr.hashed_count /
-               (float)trans->hdr.hash_size;
-       if ((nodes_per_list < MAIL_HASH_SHRINK_PRESSURE &&
-            trans->hdr.hash_size > MAIL_HASH_MIN_SIZE) ||
-           nodes_per_list > MAIL_HASH_GROW_PRESSURE)
-               mail_hash_resize(trans);
-
-       if (trans->hash->in_memory)
-               return 0;
-
-       if (hash->recreate || hash->hdr->hash_size != trans->hdr.hash_size) {
-               /* recreate the file instead of overwriting */
-               fd = -1;
-       }
-
-       existing_size = sizeof(trans->hdr) +
-               trans->hdr.hash_size * sizeof(uint32_t) +
-               trans->base_count * trans->hdr.record_size;
-       if (fd == -1) {
-               temp_path = t_strconcat(hash->filepath, ".tmp", NULL);
-               fd = open(temp_path, O_RDWR | O_CREAT | O_TRUNC,
-                         hash->index->mode);
-               if (fd == -1) {
-                       if (ENOSPACE(errno)) {
-                               hash->index->nodiskspace = TRUE;
-                               return -1;
-                       }
-                       mail_index_set_error(hash->index,
-                               "creat(%s) failed: %m", temp_path);
-                       return -1;
-               }
-       }
-
-       if (pwrite_full(fd, &trans->hdr, sizeof(trans->hdr), 0) < 0) {
-               mail_hash_set_syscall_error(hash, "pwrite()");
-               return -1;
-       }
-       /* FIXME: use updates array */
-       offset = sizeof(trans->hdr);
-       if (pwrite_full(fd, trans->hash_base,
-                       trans->hdr.hash_size * sizeof(uint32_t), offset) < 0) {
-               mail_hash_set_syscall_error(hash, "pwrite()");
-               return -1;
-       }
-       offset += trans->hdr.hash_size * sizeof(uint32_t);
-       /* if there's only the first null record, don't bother writing it.
-          especially because then records_base may point to sizeof(uint32_t)
-          instead of hdr.record_size */
-       if (trans->base_count > 1) {
-               if (pwrite_full(fd, trans->records_base,
-                               trans->base_count * trans->hdr.record_size,
-                               offset) < 0) {
-                       mail_hash_set_syscall_error(hash, "pwrite()");
-                       return -1;
-               }
-       }
-
-       /* write the new data */
-       if (array_is_created(&trans->inserts)) {
-               inserts = array_get(&trans->inserts, &count);
-               size = count * trans->hdr.record_size;
-               if (pwrite_full(fd, inserts, size, existing_size) < 0) {
-                       mail_hash_set_syscall_error(hash, "pwrite()");
-                       return -1;
-               }
-       }
-       if (temp_path != NULL) {
-               if (rename(temp_path, hash->filepath) < 0) {
-                       mail_index_set_error(hash->index,
-                               "rename(%s, %s) failed: %m",
-                               temp_path, hash->filepath);
-                       return -1;
-               }
-               if (close(fd) < 0)
-                       mail_hash_set_syscall_error(hash, "close()");
-               /* we must not overwrite before reopening the file */
-               hash->recreate = TRUE;
-               hash->recreated = TRUE;
-       }
-       return 0;
-}
-
-void mail_hash_transaction_end(struct mail_hash_transaction **_trans)
-{
-       struct mail_hash_transaction *trans = *_trans;
-
-       *_trans = NULL;
-
-       trans->hash->transaction_count--;
-       if (array_is_created(&trans->inserts))
-               array_free(&trans->inserts);
-       if (array_is_created(&trans->updates))
-               array_free(&trans->updates);
-       i_free(trans->hash_buf);
-       i_free(trans);
-}
-
-bool mail_hash_transaction_is_broken(struct mail_hash_transaction *trans)
-{
-       return trans->failed;
-}
-
-bool mail_hash_transaction_is_in_memory(struct mail_hash_transaction *trans)
-{
-       return trans->hash->in_memory;
-}
-
-struct mail_hash *
-mail_hash_transaction_get_hash(struct mail_hash_transaction *trans)
-{
-       return trans->hash;
-}
-
-void mail_hash_transaction_set_corrupted(struct mail_hash_transaction *trans,
-                                        const char *error)
-{
-       trans->failed = TRUE;
-       mail_hash_set_corrupted(trans->hash, error);
-}
-
-void mail_hash_reset(struct mail_hash_transaction *trans)
-{
-       mail_hash_header_init(trans->hash, trans->hdr.hash_size / 2,
-                             &trans->hdr);
-       trans->mapped = TRUE;
-       trans->base_count = trans->hdr.record_count;
-       i_assert(trans->base_count == 1);
-
-       i_free(trans->hash_buf);
-       trans->hash_buf = i_new(uint32_t, trans->hdr.hash_size);
-       trans->hash_base = trans->hash_buf;
-       trans->records_base = &trans->records_base_1;
-}
-
-int mail_hash_map_file(struct mail_hash_transaction *trans)
-{
-       if (!trans->mapped) {
-               if (mail_hash_file_map(trans->hash, TRUE) <= 0) {
-                       trans->failed = TRUE;
-                       return -1;
-               }
-               trans->mapped = TRUE;
-       }
-       return 0;
-}
-
-struct mail_hash_header *
-mail_hash_get_header(struct mail_hash_transaction *trans)
-{
-       return &trans->hdr;
-}
-
-static void mail_hash_iterate_init(struct mail_hash_iterate_context *iter,
-                                  struct mail_hash_transaction *trans,
-                                  uint32_t start_idx)
-{
-       memset(iter, 0, sizeof(*iter));
-       iter->trans = trans;
-       iter->next_idx = start_idx;
-}
-
-static int mail_hash_iterate_next(struct mail_hash_iterate_context *iter,
-                                 uint32_t *idx_r)
-{
-       struct mail_hash_record *rec;
-       uint32_t idx = iter->next_idx;
-
-       if (idx == 0)
-               return 0;
-       if (unlikely(idx >= iter->trans->hdr.record_count)) {
-               mail_hash_transaction_set_corrupted(iter->trans,
-                       "Index points outside file");
-               return -1;
-       }
-       rec = mail_hash_idx(iter->trans, idx);
-       iter->next_idx = rec->next_idx;
-
-       if (++iter->iter_count > iter->trans->hdr.record_count) {
-               /* we've iterated through more indexes than there exist.
-                  we must be looping. */
-               mail_hash_transaction_set_corrupted(iter->trans,
-                       "next_iter loops");
-               return -1;
-       }
-       *idx_r = idx;
-       return 1;
-}
-
-void *mail_hash_lookup(struct mail_hash_transaction *trans, const void *key,
-                      uint32_t *idx_r)
-{
-       struct mail_hash *hash = trans->hash;
-       struct mail_hash_iterate_context iter;
-       unsigned int hash_idx;
-       uint32_t idx;
-       int ret;
-
-       hash_idx = hash->key_hash_cb(key) % trans->hdr.hash_size;
-       mail_hash_iterate_init(&iter, trans, trans->hash_base[hash_idx]);
-
-       while ((ret = mail_hash_iterate_next(&iter, &idx)) > 0) {
-               if (hash->key_compare_cb(trans, key, idx, hash->cb_context))
-                       break;
-       }
-
-       if (ret <= 0) {
-               *idx_r = 0;
-               return NULL;
-       } else {
-               *idx_r = idx;
-               return mail_hash_idx(trans, idx);
-       }
-}
-
-void *mail_hash_lookup_idx(struct mail_hash_transaction *trans, uint32_t idx)
-{
-       i_assert(idx > 0);
-
-       if (idx >= trans->hdr.record_count) {
-               mail_hash_transaction_set_corrupted(trans,
-                       "Index points outside file");
-               /* return pointer to the first dummy record */
-               idx = 0;
-       }
-
-       return mail_hash_idx(trans, idx);
-}
-
-static uoff_t
-mail_hash_idx_to_offset(struct mail_hash_transaction *trans, uint32_t idx)
-{
-       return trans->hdr.header_size +
-               trans->hdr.hash_size * sizeof(uint32_t) +
-               trans->hdr.record_size * idx;
-}
-
-static void
-mail_hash_update_offset(struct mail_hash_transaction *trans,
-                       uoff_t offset, unsigned int size)
-{
-       uint32_t *p;
-       unsigned int pos = offset / 1024;
-       unsigned int pos2 = (offset + size - 1) / 1024;
-
-       if (!array_is_created(&trans->updates))
-               i_array_init(&trans->updates, I_MAX(pos, 256));
-
-       while (pos <= pos2) {
-               p = array_idx_modifiable(&trans->updates, pos / 32);
-               *p |= 1 << (pos % 32);
-               pos++;
-       }
-}
-
-static void mail_hash_update_hash_idx(struct mail_hash_transaction *trans,
-                                     uint32_t hash_idx)
-{
-       size_t offset;
-
-       offset = trans->hdr.header_size + hash_idx * sizeof(uint32_t);
-       mail_hash_update_offset(trans, offset, sizeof(uint32_t));
-}
-
-static void mail_hash_insert_idx(struct mail_hash_transaction *trans,
-                                const void *value, uint32_t *idx_r)
-{
-       struct mail_hash_record *rec;
-       uint32_t idx;
-
-       if (trans->hdr.first_hole_idx != 0) {
-               /* allocate the record from the first hole */
-               idx = trans->hdr.first_hole_idx;
-               if (idx >= trans->hdr.record_count) {
-                       mail_hash_transaction_set_corrupted(trans,
-                               "Corrupted first_hole_idx");
-                       idx = 0;
-               }
-               rec = mail_hash_idx(trans, idx);
-
-               memcpy(&trans->hdr.first_hole_idx, rec + 1,
-                      sizeof(trans->hdr.first_hole_idx));
-               mail_hash_update(trans, idx);
-       } else {
-               idx = trans->hdr.record_count++;
-               if (!array_is_created(&trans->inserts)) {
-                       array_create(&trans->inserts, default_pool,
-                                    trans->hdr.record_size, 128);
-               }
-               rec = array_append_space(&trans->inserts);
-       }
-
-       memcpy(rec, value, trans->hdr.record_size);
-       rec->next_idx = 0;
-
-       *idx_r = idx;
-}
-
-static void mail_hash_insert_hash(struct mail_hash_transaction *trans,
-                                 uint32_t hash_key, uint32_t idx)
-{
-       struct mail_hash_record *rec;
-       uint32_t hash_idx;
-
-       if (trans->hdr.hashed_count >= trans->next_grow_hashed_count)
-               mail_hash_resize(trans);
-
-       hash_idx = hash_key % trans->hdr.hash_size;
-
-       rec = mail_hash_idx(trans, idx);
-       rec->next_idx = trans->hash_base[hash_idx];
-       trans->hash_base[hash_idx] = idx;
-
-       mail_hash_update_hash_idx(trans, hash_idx);
-       trans->hdr.hashed_count++;
-}
-
-void mail_hash_insert(struct mail_hash_transaction *trans, const void *key,
-                     const void *value, uint32_t *idx_r)
-{
-       uint32_t hash_key;
-
-       mail_hash_insert_idx(trans, value, idx_r);
-       if (key == NULL)
-               hash_key = 0;
-       else {
-               hash_key = trans->hash->key_hash_cb(key);
-               mail_hash_insert_hash(trans, hash_key, *idx_r);
-       }
-       i_assert(trans->hash->rec_hash_cb(value) == hash_key);
-}
-
-void mail_hash_update(struct mail_hash_transaction *trans, uint32_t idx)
-{
-       uoff_t offset;
-
-       i_assert(idx > 0 && idx < trans->hdr.record_count);
-
-       offset = mail_hash_idx_to_offset(trans, idx);
-       mail_hash_update_offset(trans, offset,
-                               trans->hdr.record_size);
-}
-
-static void
-mail_hash_remove_idx(struct mail_hash_transaction *trans, uint32_t idx)
-{
-       struct mail_hash_record *rec;
-
-       if (idx+1 == trans->hdr.record_count) {
-               /* removing last record */
-               trans->hdr.record_count--;
-       } else {
-               /* mark the record expunged */
-               rec = mail_hash_idx(trans, idx);
-               rec->next_idx = (uint32_t)-1;
-               /* update the linked list of holes */
-               i_assert(trans->hdr.record_size >=
-                        sizeof(*rec) + sizeof(trans->hdr.first_hole_idx));
-               memcpy(rec+1, &trans->hdr.first_hole_idx,
-                      sizeof(trans->hdr.first_hole_idx));
-               trans->hdr.first_hole_idx = idx;
-
-               mail_hash_update(trans, idx);
-       }
-}
-
-void mail_hash_remove(struct mail_hash_transaction *trans,
-                     uint32_t idx, uint32_t key_hash)
-{
-       struct mail_hash_record *rec, *rec2;
-       unsigned int hash_idx;
-       uint32_t idx2;
-       int ret;
-
-       i_assert(idx > 0 && idx < trans->hdr.record_count);
-
-       if (key_hash == 0) {
-               /* key not in hash table */
-               mail_hash_remove_idx(trans, idx);
-               return;
-       }
-
-       rec = mail_hash_idx(trans, idx);
-
-       hash_idx = key_hash % trans->hdr.hash_size;
-       idx2 = trans->hash_base[hash_idx];
-       if (idx2 == idx) {
-               /* first in the hash table */
-               trans->hash_base[hash_idx] = rec->next_idx;
-               mail_hash_update_hash_idx(trans, hash_idx);
-       } else {
-               /* find the previous record */
-               struct mail_hash_iterate_context iter;
-
-               mail_hash_iterate_init(&iter, trans, idx2);
-               while ((ret = mail_hash_iterate_next(&iter, &idx2)) > 0) {
-                       rec2 = mail_hash_idx(trans, idx2);
-                       if (rec2->next_idx == idx)
-                               break;
-               }
-               if (ret <= 0) {
-                       if (ret == 0) {
-                               mail_hash_set_corrupted(trans->hash,
-                                       "Tried to remove non-existing key");
-                       }
-                       return;
-               }
-
-               rec2->next_idx = rec->next_idx;
-               mail_hash_update_offset(trans, idx2, trans->hdr.record_size);
-
-               if (trans->hdr.hashed_count == 0) {
-                       mail_hash_set_corrupted(trans->hash,
-                                               "Too low hashed_count");
-                       return;
-               }
-       }
-
-       trans->hdr.hashed_count--;
-       mail_hash_remove_idx(trans, idx);
-}
diff --git a/src/lib-index/mail-hash.h b/src/lib-index/mail-hash.h
deleted file mode 100644 (file)
index 5b2a4ac..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-#ifndef MAIL_HASH_H
-#define MAIL_HASH_H
-
-#include "hash.h"
-
-#define MAIL_HASH_VERSION 1
-
-struct mail_index;
-struct mail_hash;
-struct mail_hash_transaction;
-
-/* File format:
-
-   [header]
-   [hash_size * (uint32_t idx)]
-   [record_count * hdr.record_size]
-
-   The indexes start from 1, so 0 means "doesn't exist".
-*/
-
-struct mail_hash_header {
-       /* File format version */
-       uint8_t version;
-       /* Corrupted flag is set while file is being modified. */
-       uint8_t corrupted;
-       uint8_t unused[3];
-
-       uint16_t base_header_size;
-       /* Full size of each mail_hash_record */
-       uint16_t record_size;
-       /* Full header size. Currently always the same as base_header_size. */
-       uint32_t header_size;
-       /* Number of records after the hash table, includes [0] and holes */
-       uint32_t record_count;
-       /* Number of messages with non-zero hash */
-       uint32_t hashed_count;
-
-       /* Holes is a linked list of unused records. 0 = no holes. */
-       uint32_t first_hole_idx;
-       /* Hash size as number of elements */
-       uint32_t hash_size;
-
-       /* UID validity. */
-       uint32_t uid_validity;
-       /* The last UID which has been added to this file (but may have been
-          expunged already) */
-       uint32_t last_uid;
-       /* Number of message records (records with non-zero UID) */
-       uint32_t message_count;
-       /* Increased every time the hash is reset */
-       uint32_t reset_counter;
-};
-
-struct mail_hash_record {
-       /* Linked list of records for hash collisions.
-          (uint32_t)-1 means this record is deleted */
-       uint32_t next_idx;
-       /* user_data[] */
-};
-ARRAY_DEFINE_TYPE(mail_hash_record, struct mail_hash_record);
-#define MAIL_HASH_RECORD_IS_DELETED(rec) \
-       ((rec)->next_idx == (uint32_t)-1)
-
-enum mail_hash_lock_flags {
-       MAIL_HASH_LOCK_FLAG_TRY                 = 0x01,
-       MAIL_HASH_LOCK_FLAG_CREATE_MISSING      = 0x02
-};
-
-/* Returns 0 if the pointers are equal. */
-typedef bool mail_hash_ctx_cmp_callback_t(struct mail_hash_transaction *trans,
-                                         const void *key, uint32_t idx,
-                                         void *context);
-/* map[] contains old -> new index mapping. */
-typedef int mail_hash_remap_callback_t(struct mail_hash_transaction *trans,
-                                      const uint32_t *map,
-                                      unsigned int map_size, void *context);
-
-/* suffix=NULL keeps the has only in memory */
-struct mail_hash *
-mail_hash_alloc(struct mail_index *index, const char *suffix,
-               unsigned int record_size,
-               hash_callback_t *key_hash_cb,
-               hash_callback_t *rec_hash_cb,
-               mail_hash_ctx_cmp_callback_t *key_compare_cb,
-               mail_hash_remap_callback_t *remap_callback,
-               void *context);
-void mail_hash_free(struct mail_hash **hash);
-
-/* Returns 1 if created and locked, 0 if already exists, -1 if error. */
-int mail_hash_create_excl_locked(struct mail_hash *hash);
-
-/* Lock the file. Returns 1 if locking was successful, 0 if file doesn't exist,
-   -1 if error. */
-int mail_hash_lock_shared(struct mail_hash *hash);
-/* If FLAG_TRY_LOCK is set and file is already locked, return 0.
-   Otherwise return values are identical with mail_hash_lock_shared() */
-int mail_hash_lock_exclusive(struct mail_hash *hash,
-                            enum mail_hash_lock_flags flags);
-void mail_hash_unlock(struct mail_hash *hash);
-/* Returns the current locking state (F_UNLCK, F_RDLCK, F_WRLCK) */
-int mail_hash_get_lock_type(struct mail_hash *hash);
-
-struct mail_hash_transaction *
-mail_hash_transaction_begin(struct mail_hash *hash, unsigned int min_hash_size);
-int mail_hash_transaction_write(struct mail_hash_transaction *trans);
-void mail_hash_transaction_end(struct mail_hash_transaction **trans);
-/* Returns TRUE if transaction is in broken state because of an earlier
-   I/O error or detected file corruption. */
-bool mail_hash_transaction_is_broken(struct mail_hash_transaction *trans);
-/* Returns TRUE if hash is currently being updated in memory. */
-bool mail_hash_transaction_is_in_memory(struct mail_hash_transaction *trans);
-
-/* Returns the hash structure of the transaction. */
-struct mail_hash *
-mail_hash_transaction_get_hash(struct mail_hash_transaction *trans);
-
-/* Clear the entire hash file's contents. */
-void mail_hash_reset(struct mail_hash_transaction *trans);
-/* Read the entire file to memory. */
-int mail_hash_map_file(struct mail_hash_transaction *trans);
-
-/* Returns a modifiable hash header. */
-struct mail_hash_header *
-mail_hash_get_header(struct mail_hash_transaction *trans);
-
-/* Look up key from hash and return its value or NULL if key isn't in the hash.
-   NULL is also returned if the lookup fails because of I/O error or file
-   corruption. */
-void *mail_hash_lookup(struct mail_hash_transaction *trans, const void *key,
-                      uint32_t *idx_r);
-/* Never returns NULL. If the lookup fails the transaction is marked broken
-   and a pointer to a dummy record is returned. */
-void *mail_hash_lookup_idx(struct mail_hash_transaction *trans, uint32_t idx);
-
-/* If key=NULL, it's only added as a record after the hash table and not
-   added to the actual hash table. Note that hash=0 equals to key=NULL insert,
-   so a valid hash value must never be 0. */
-void mail_hash_insert(struct mail_hash_transaction *trans, const void *key,
-                     const void *value, uint32_t *idx_r);
-/* Mark the record at given index as modified. */
-void mail_hash_update(struct mail_hash_transaction *trans, uint32_t idx);
-/* idx must be provided. key_hash must be provided if the record was added
-   with non-NULL key. */
-void mail_hash_remove(struct mail_hash_transaction *trans,
-                     uint32_t idx, uint32_t key_hash);
-
-void mail_hash_set_corrupted(struct mail_hash *hash, const char *error);
-void mail_hash_transaction_set_corrupted(struct mail_hash_transaction *trans,
-                                        const char *error);
-
-#endif
index 402beca05def234605d89eb2aa234c19de3a6d5d..aa247a99e1ede64571a0bfc5138a49280346267f 100644 (file)
@@ -332,4 +332,9 @@ int mail_index_file_set_syscall_error(struct mail_index *index,
 uint32_t mail_index_uint32_to_offset(uint32_t offset);
 uint32_t mail_index_offset_to_uint32(uint32_t offset);
 
+#define MAIL_INDEX_PACK_MAX_SIZE ((sizeof(uint32_t) * 8 + 7) / 7)
+void mail_index_pack_num(uint8_t **p, uint32_t num);
+int mail_index_unpack_num(const uint8_t **p, const uint8_t *end,
+                         uint32_t *num_r);
+
 #endif
diff --git a/src/lib-index/mail-index-strmap.c b/src/lib-index/mail-index-strmap.c
new file mode 100644 (file)
index 0000000..bf7f4ea
--- /dev/null
@@ -0,0 +1,1224 @@
+/* Copyright (c) 2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "bsearch-insert-pos.h"
+#include "istream.h"
+#include "ostream.h"
+#include "file-lock.h"
+#include "file-dotlock.h"
+#include "crc32.h"
+#include "safe-mkstemp.h"
+#include "str.h"
+#include "mail-index-private.h"
+#include "mail-index-strmap.h"
+
+#include <stdio.h>
+
+struct mail_index_strmap {
+       struct mail_index *index;
+       char *path;
+       int fd;
+       struct istream *input;
+
+       struct file_lock *file_lock;
+       struct dotlock *dotlock;
+       struct dotlock_settings dotlock_settings;
+};
+
+struct mail_index_strmap_view {
+       struct mail_index_strmap *strmap;
+       struct mail_index_view *view;
+
+       ARRAY_TYPE(mail_index_strmap_rec) recs;
+       ARRAY_DEFINE(recs_crc32, uint32_t);
+       struct hash2_table *hash;
+
+       mail_index_strmap_key_cmp_t *key_compare;
+       mail_index_strmap_rec_cmp_t *rec_compare;
+       mail_index_strmap_remap_t *remap_cb;
+       void *cb_context;
+
+       uoff_t last_read_block_offset;
+       uint32_t last_read_uid;
+       uint32_t last_added_uid;
+       uint32_t total_ref_count;
+
+       uint32_t last_ref_index;
+       uint32_t next_str_idx;
+       uint32_t lost_expunged_uid;
+
+       unsigned int desynced:1;
+};
+
+struct mail_index_strmap_read_context {
+       struct mail_index_strmap_view *view;
+
+       struct istream *input;
+       uoff_t end_offset;
+       uint32_t highest_str_idx;
+       uint32_t uid_lookup_idx;
+       uint32_t lost_expunged_uid;
+
+       const unsigned char *data, *end, *str_idx_base;
+       struct mail_index_strmap_rec rec;
+       uint32_t next_ref_index;
+       unsigned int rec_size;
+
+       unsigned int too_large_uids:1;
+};
+
+struct mail_index_strmap_view_sync {
+       struct mail_index_strmap_view *view;
+};
+
+struct mail_index_strmap_hash_key {
+       const char *str;
+       uint32_t crc32;
+};
+
+/* number of bytes required to store one string idx */
+#define STRMAP_FILE_STRIDX_SIZE (sizeof(uint32_t)*2)
+
+/* renumber the string indexes when highest string idx becomes larger than
+   <number of indexes>*STRMAP_FILE_MAX_STRIDX_MULTIPLIER */
+#define STRMAP_FILE_MAX_STRIDX_MULTIPLIER 2
+
+#define STRIDX_MUST_RENUMBER(highest_idx, n_unique_indexes) \
+       (highest_idx > n_unique_indexes * STRMAP_FILE_MAX_STRIDX_MULTIPLIER)
+
+#define MAIL_INDEX_STRMAP_TIMEOUT_SECS 10
+
+const struct dotlock_settings default_dotlock_settings = {
+       MEMBER(temp_prefix) NULL,
+       MEMBER(lock_suffix) NULL,
+
+       MEMBER(timeout) MAIL_INDEX_STRMAP_TIMEOUT_SECS,
+       MEMBER(stale_timeout) 30
+};
+
+struct mail_index_strmap *
+mail_index_strmap_init(struct mail_index *index, const char *suffix)
+{
+       struct mail_index_strmap *strmap;
+
+       strmap = i_new(struct mail_index_strmap, 1);
+       strmap->index = index;
+       strmap->path = i_strconcat(index->filepath, suffix, NULL);
+       strmap->fd = -1;
+
+       strmap->dotlock_settings = default_dotlock_settings;
+       strmap->dotlock_settings.use_excl_lock = index->use_excl_dotlocks;
+       strmap->dotlock_settings.nfs_flush = index->nfs_flush;
+       return strmap;
+}
+
+static bool
+mail_index_strmap_read_rec_next(struct mail_index_strmap_read_context *ctx,
+                               uint32_t *crc32_r);
+
+static void
+mail_index_strmap_set_syscall_error(struct mail_index_strmap *strmap,
+                                   const char *function)
+{
+       i_assert(function != NULL);
+
+       if (ENOSPACE(errno)) {
+               strmap->index->nodiskspace = TRUE;
+               return;
+       }
+
+       mail_index_set_error(strmap->index,
+                            "%s failed with strmap index file %s: %m",
+                            function, strmap->path);
+}
+
+static void mail_index_strmap_close(struct mail_index_strmap *strmap)
+{
+       if (strmap->file_lock != NULL)
+               file_lock_free(&strmap->file_lock);
+       else if (strmap->dotlock != NULL)
+               file_dotlock_delete(&strmap->dotlock);
+
+       if (strmap->fd != -1) {
+               if (close(strmap->fd) < 0)
+                       mail_index_strmap_set_syscall_error(strmap, "close()");
+               strmap->fd = -1;
+       }
+       if (strmap->input != NULL)
+               i_stream_unref(&strmap->input);
+}
+
+void mail_index_strmap_deinit(struct mail_index_strmap **_strmap)
+{
+       struct mail_index_strmap *strmap = *_strmap;
+
+       *_strmap = NULL;
+       mail_index_strmap_close(strmap);
+       i_free(strmap->path);
+       i_free(strmap);
+}
+
+static unsigned int mail_index_strmap_hash_key(const void *_key)
+{
+       const struct mail_index_strmap_hash_key *key = _key;
+
+       return key->crc32;
+}
+
+static bool
+mail_index_strmap_hash_cmp(const void *_key, const void *_value, void *context)
+{
+       const struct mail_index_strmap_hash_key *key = _key;
+       const struct mail_index_strmap_rec *rec = _value;
+       struct mail_index_strmap_view *view = context;
+
+       return view->key_compare(key->str, rec, view->cb_context);
+}
+
+struct mail_index_strmap_view *
+mail_index_strmap_view_open(struct mail_index_strmap *strmap,
+                           struct mail_index_view *idx_view,
+                           mail_index_strmap_key_cmp_t *key_compare_cb,
+                           mail_index_strmap_rec_cmp_t *rec_compare_cb,
+                           mail_index_strmap_remap_t *remap_cb,
+                           void *context,
+                           const ARRAY_TYPE(mail_index_strmap_rec) **recs_r,
+                           const struct hash2_table **hash_r)
+{
+       struct mail_index_strmap_view *view;
+
+       view = i_new(struct mail_index_strmap_view, 1);
+       view->strmap = strmap;
+       view->view = idx_view;
+       view->key_compare = key_compare_cb;
+       view->rec_compare = rec_compare_cb;
+       view->remap_cb = remap_cb;
+       view->cb_context = context;
+       view->next_str_idx = 1;
+
+       i_array_init(&view->recs, 64);
+       i_array_init(&view->recs_crc32, 64);
+       view->hash = hash2_create(0, sizeof(struct mail_index_strmap_rec),
+                                 mail_index_strmap_hash_key,
+                                 mail_index_strmap_hash_cmp, view);
+       *recs_r = &view->recs;
+       *hash_r = view->hash;
+       return view;
+}
+
+void mail_index_strmap_view_close(struct mail_index_strmap_view **_view)
+{
+       struct mail_index_strmap_view *view = *_view;
+
+       *_view = NULL;
+       array_free(&view->recs);
+       array_free(&view->recs_crc32);
+       hash2_destroy(&view->hash);
+       i_free(view);
+}
+
+uint32_t mail_index_strmap_view_get_highest_idx(struct mail_index_strmap_view *view)
+{
+       return view->next_str_idx-1;
+}
+
+static void mail_index_strmap_view_reset(struct mail_index_strmap_view *view)
+{
+       view->remap_cb(NULL, 0, 0, view->cb_context);
+       array_clear(&view->recs);
+       array_clear(&view->recs_crc32);
+       hash2_clear(view->hash);
+
+       view->last_added_uid = 0;
+       view->lost_expunged_uid = 0;
+       view->desynced = FALSE;
+}
+
+static void
+mail_index_strmap_view_set_corrupted(struct mail_index_strmap_view *view)
+{
+       mail_index_set_error(view->strmap->index,
+                            "Corrupted strmap index file: %s",
+                            view->strmap->path);
+       (void)unlink(view->strmap->path);
+       mail_index_strmap_close(view->strmap);
+       mail_index_strmap_view_reset(view);
+}
+
+static int mail_index_strmap_open(struct mail_index_strmap_view *view)
+{
+       struct mail_index_strmap *strmap = view->strmap;
+       const struct mail_index_header *idx_hdr;
+       struct mail_index_strmap_header hdr;
+       const unsigned char *data;
+       size_t size;
+       int ret;
+
+       i_assert(strmap->fd == -1);
+
+       strmap->fd = open(strmap->path, O_RDWR);
+       if (strmap->fd == -1) {
+               if (errno == ENOENT)
+                       return 0;
+               mail_index_strmap_set_syscall_error(strmap, "open()");
+               return -1;
+       }
+       strmap->input = i_stream_create_fd(strmap->fd, (size_t)-1, FALSE);
+       ret = i_stream_read_data(strmap->input, &data, &size, sizeof(hdr)-1);
+       if (ret <= 0) {
+               if (ret < 0) {
+                       mail_index_strmap_set_syscall_error(strmap, "read()");
+                       mail_index_strmap_close(strmap);
+               } else {
+                       i_assert(ret == 0);
+                       mail_index_strmap_view_set_corrupted(view);
+               }
+               return ret;
+       }
+       memcpy(&hdr, data, sizeof(hdr));
+
+       idx_hdr = mail_index_get_header(view->view);
+       if (hdr.version != MAIL_INDEX_STRMAP_VERSION ||
+           hdr.uid_validity != idx_hdr->uid_validity) {
+               /* need to rebuild. if we already had something in the strmap,
+                  we can keep it. */
+               (void)unlink(strmap->path);
+               mail_index_strmap_close(strmap);
+               return 0;
+       }
+
+       /* we'll read the entire file from the beginning */
+       view->last_added_uid = 0;
+       view->last_read_uid = 0;
+       view->total_ref_count = 0;
+       view->last_read_block_offset = sizeof(struct mail_index_strmap_header);
+       view->next_str_idx = 1;
+
+       mail_index_strmap_view_reset(view);
+       return 0;
+}
+
+static bool mail_index_strmap_need_reopen(struct mail_index_strmap *strmap)
+{
+       struct stat st1, st2;
+
+       /* FIXME: nfs flush */
+       if (fstat(strmap->fd, &st1) < 0) {
+               if (errno != ESTALE)
+                       mail_index_strmap_set_syscall_error(strmap, "fstat()");
+               return TRUE;
+       }
+       if (stat(strmap->path, &st2) < 0) {
+               mail_index_strmap_set_syscall_error(strmap, "stat()");
+               return TRUE;
+       }
+       return st1.st_ino != st2.st_ino || !CMP_DEV_T(st1.st_dev, st2.st_dev);
+}
+
+static int mail_index_strmap_refresh(struct mail_index_strmap_view *view)
+{
+       uint32_t seq;
+
+       if (MAIL_INDEX_IS_IN_MEMORY(view->strmap->index))
+               return -1;
+
+       if (view->strmap->fd != -1) {
+               if (!mail_index_strmap_need_reopen(view->strmap)) {
+                       if (view->lost_expunged_uid != 0) {
+                               /* last read failed because view had a message
+                                  that didn't exist in the strmap (because it
+                                  was expunged by another session). if the
+                                  message still isn't expunged in this view,
+                                  just continue using the current strmap. */
+                               if (mail_index_lookup_seq(view->view,
+                                               view->lost_expunged_uid, &seq))
+                                       return -1;
+                       } else if (view->desynced) {
+                               /* our view isn't synced with the disk, we
+                                  can't read strmap without first resetting
+                                  the view */
+                       } else {
+                               i_stream_sync(view->strmap->input);
+                               return 0;
+                       }
+               }
+               mail_index_strmap_close(view->strmap);
+       }
+
+       return mail_index_strmap_open(view);
+}
+
+static int
+mail_index_strmap_read_packed(struct mail_index_strmap_read_context *ctx,
+                             uint32_t *num_r)
+{
+       const unsigned char *data;
+       const uint8_t *bytes, *p, *end;
+       size_t size;
+       int ret;
+
+       ret = i_stream_read_data(ctx->input, &data, &size, sizeof(*num_r) - 1);
+       if (ret <= 0)
+               return ret;
+
+       if (ctx->input->v_offset + size > ctx->end_offset)
+               size = ctx->end_offset - ctx->input->v_offset;
+       bytes = p = (const uint8_t *)data;
+       end = bytes + size;
+
+       if (mail_index_unpack_num(&p, end, num_r) <  0)
+               return -1;
+       i_stream_skip(ctx->input, p - bytes);
+       return 1;
+}
+
+static int
+mail_index_strmap_uid_exists(struct mail_index_strmap_read_context *ctx,
+                            uint32_t uid)
+{
+       const struct mail_index_record *rec;
+
+       if (ctx->uid_lookup_idx >= ctx->view->view->map->hdr.messages_count) {
+               if (uid >= ctx->view->view->map->hdr.next_uid) {
+                       /* thread index has larger UIDs than what we've seen
+                          in our view. we'll have to read them again later
+                          when we know about them */
+                       ctx->too_large_uids = TRUE;
+               }
+               return 0;
+       }
+
+       rec = MAIL_INDEX_MAP_IDX(ctx->view->view->map, ctx->uid_lookup_idx);
+       if (rec->uid == uid) {
+               ctx->uid_lookup_idx++;
+               return 1;
+       } else if (rec->uid > uid) {
+               return 0;
+       } else {
+               /* record that exists in index is missing from strmap.
+                  see if it's because the strmap is corrupted or because
+                  our current view is a bit stale and the message has already
+                  been expunged. */
+               (void)mail_index_refresh(ctx->view->view->index);
+               if (mail_index_is_expunged(ctx->view->view,
+                                          ctx->uid_lookup_idx + 1))
+                       ctx->lost_expunged_uid = rec->uid;
+               return -1;
+       }
+}
+
+static int
+mail_index_strmap_read_rec_first(struct mail_index_strmap_read_context *ctx,
+                                uint32_t *crc32_r)
+{
+       size_t size;
+       uint32_t n, i, count, str_idx;
+       int ret;
+
+       /* <uid> <n> <crc32>*count <str_idx>*count
+          where
+            n = 0 -> count=1 (only Message-ID:)
+            n = 1 -> count=2 (Message-ID: + In-Reply-To:)
+            n = 2+ -> count=n (Message-ID: + References:)
+       */
+       if (mail_index_strmap_read_packed(ctx, &n) <= 0)
+               return -1;
+       count = n < 2 ? n + 1 : n;
+       ctx->view->total_ref_count += count;
+
+       ctx->rec_size = count * (sizeof(ctx->rec.str_idx) + sizeof(*crc32_r));
+       ret = mail_index_strmap_uid_exists(ctx, ctx->rec.uid);
+       if (ret < 0)
+               return -1;
+       if (i_stream_read_data(ctx->view->strmap->input, &ctx->data, &size,
+                              ctx->rec_size - 1) <= 0)
+               return -1;
+       ctx->str_idx_base = ctx->data + count * sizeof(uint32_t);
+
+       if (ret == 0) {
+               /* this message has already been expunged, ignore it.
+                  update highest string indexes anyway. */
+               for (i = 0; i < count; i++) {
+                       memcpy(&str_idx, ctx->str_idx_base, sizeof(str_idx));
+                       if (ctx->highest_str_idx < str_idx)
+                               ctx->highest_str_idx = str_idx;
+                       ctx->str_idx_base += sizeof(str_idx);
+               }
+               i_stream_skip(ctx->view->strmap->input, ctx->rec_size);
+               return 0;
+       }
+
+       /* everything exists. save it. FIXME: these ref_index values
+          are thread index specific, perhaps something more generic
+          should be used some day */
+       ctx->end = ctx->data + count * sizeof(*crc32_r);
+
+       ctx->next_ref_index = 0;
+       if (!mail_index_strmap_read_rec_next(ctx, crc32_r))
+               i_unreached();
+       ctx->next_ref_index = n == 1 ? 1 : 2;
+       return 1;
+}
+
+static bool
+mail_index_strmap_read_rec_next(struct mail_index_strmap_read_context *ctx,
+                               uint32_t *crc32_r)
+{
+       if (ctx->data == ctx->end) {
+               i_stream_skip(ctx->view->strmap->input, ctx->rec_size);
+               return FALSE;
+       }
+
+       /* FIXME: str_idx could be stored as packed relative values
+          (first relative to highest_idx, the rest relative to the
+          previous str_idx) */
+
+       /* read the record contents */
+       memcpy(&ctx->rec.str_idx, ctx->str_idx_base, sizeof(ctx->rec.str_idx));
+       memcpy(crc32_r, ctx->data, sizeof(*crc32_r));
+
+       ctx->rec.ref_index = ctx->next_ref_index++;
+
+       if (ctx->highest_str_idx < ctx->rec.str_idx)
+               ctx->highest_str_idx = ctx->rec.str_idx;
+
+       /* get to the next record */
+       ctx->data += sizeof(*crc32_r);
+       ctx->str_idx_base += sizeof(ctx->rec.str_idx);
+       return TRUE;
+}
+
+static int
+strmap_read_block_init(struct mail_index_strmap_view *view,
+                      struct mail_index_strmap_read_context *ctx)
+{
+       struct mail_index_strmap *strmap = view->strmap;
+       const unsigned char *data;
+       size_t size;
+       uint32_t block_size, seq1, seq2;
+       int ret;
+
+       if (view->last_read_uid + 1 >= view->view->map->hdr.next_uid) {
+               /* come back later when we know about the new UIDs */
+               return 0;
+       }
+
+       memset(ctx, 0, sizeof(*ctx));
+       ret = i_stream_read_data(strmap->input, &data, &size,
+                                sizeof(block_size)-1);
+       if (ret <= 0) {
+               if (strmap->input->stream_errno == 0) {
+                       /* no new data */
+                       return 0;
+               }
+               mail_index_strmap_set_syscall_error(strmap, "read()");
+               return -1;
+       }
+       memcpy(&block_size, data, sizeof(block_size));
+       block_size = mail_index_offset_to_uint32(block_size) >> 2;
+       if (block_size == 0) {
+               /* the rest of the file is either not written, or the previous
+                  write didn't finish */
+               return 0;
+       }
+       i_stream_skip(strmap->input, sizeof(block_size));
+
+       ctx->view = view;
+       ctx->input = strmap->input;
+       ctx->end_offset = strmap->input->v_offset + block_size;
+       if (ctx->end_offset < strmap->input->v_offset) {
+               /* block size too large */
+               mail_index_strmap_view_set_corrupted(view);
+               return -1;
+       }
+       ctx->rec.uid = view->last_read_uid + 1;
+
+       /* FIXME: when reading multiple blocks we shouldn't have to calculate
+          this every time */
+       if (!mail_index_lookup_seq_range(view->view, ctx->rec.uid, (uint32_t)-1,
+                                        &seq1, &seq2))
+               seq1 = mail_index_view_get_messages_count(view->view) + 1;
+       ctx->uid_lookup_idx = seq1 - 1;
+       return 1;
+}
+
+static int
+strmap_read_block_next(struct mail_index_strmap_read_context *ctx,
+                      uint32_t *crc32_r)
+{
+       uint32_t uid_diff;
+       int ret;
+
+       if (mail_index_strmap_read_rec_next(ctx, crc32_r))
+               return 1;
+
+       /* get next UID */
+       do {
+               if (ctx->input->v_offset == ctx->end_offset) {
+                       /* this block is done */
+                       return 0;
+               }
+               if (mail_index_strmap_read_packed(ctx, &uid_diff) < 0)
+                       return -1;
+
+               ctx->rec.uid += uid_diff;
+               ret = mail_index_strmap_read_rec_first(ctx, crc32_r);
+       } while (ret == 0);
+       return ret;
+}
+
+static int
+strmap_read_block_deinit(struct mail_index_strmap_read_context *ctx, int ret,
+                        bool update_block_offset)
+{
+       struct mail_index_strmap_view *view = ctx->view;
+       struct mail_index_strmap *strmap = view->strmap;
+
+       if (ctx->highest_str_idx > view->total_ref_count) {
+               /* if all string indexes are unique, highest_str_index equals
+                  total_ref_count. otherwise it's always lower. */
+               mail_index_set_error(strmap->index,
+                                    "Corrupted strmap index file %s: "
+                                    "String indexes too high "
+                                    "(highest=%u max=%u)",
+                                    strmap->path, ctx->highest_str_idx,
+                                    view->total_ref_count);
+               mail_index_strmap_view_set_corrupted(view);
+               return -1;
+       }
+       if (ctx->lost_expunged_uid != 0) {
+               /* our view contained a message that had since been expunged. */
+               i_assert(ret < 0);
+               view->lost_expunged_uid = ctx->lost_expunged_uid;
+       } else if (ret < 0) {
+               if (strmap->input->stream_errno != 0)
+                       mail_index_strmap_set_syscall_error(strmap, "read()");
+               else
+                       mail_index_strmap_view_set_corrupted(view);
+               return -1;
+       } else if (update_block_offset && !ctx->too_large_uids) {
+               view->last_read_block_offset = strmap->input->v_offset;
+               view->last_read_uid = ctx->rec.uid;
+       }
+       if (view->next_str_idx <= ctx->highest_str_idx)
+               view->next_str_idx = ctx->highest_str_idx + 1;
+       return ret;
+}
+
+static bool
+strmap_view_sync_handle_conflict(struct mail_index_strmap_read_context *ctx,
+                                const struct mail_index_strmap_rec *hash_rec,
+                                struct hash2_iter *iter)
+{
+       uint32_t seq;
+
+       /* hopefully it's a message that has since been expunged */
+       if (!mail_index_lookup_seq(ctx->view->view, hash_rec->uid, &seq)) {
+               /* message is no longer in our view. remove it completely. */
+               hash2_remove_iter(ctx->view->hash, iter);
+               return TRUE;
+       }
+       if (mail_index_is_expunged(ctx->view->view, seq)) {
+               /* it's quite likely a conflict. we may not be able to verify
+                  it, so just assume it is. nothing breaks even if we guess
+                  wrong, the performance just suffers a bit. */
+               return FALSE;
+       }
+
+       /* 0 means "doesn't match", which is the only acceptable case */
+       return ctx->view->rec_compare(&ctx->rec, hash_rec,
+                                     ctx->view->cb_context) == 0;
+}
+
+static int
+mail_index_strmap_view_sync_block(struct mail_index_strmap_read_context *ctx)
+{
+       struct mail_index_strmap_rec *hash_rec;
+       struct hash2_iter iter;
+       uint32_t crc32, prev_uid = 0;
+       int ret;
+
+       while ((ret = strmap_read_block_next(ctx, &crc32)) > 0) {
+               if (ctx->rec.uid <= ctx->view->last_added_uid) {
+                       if (ctx->rec.uid < ctx->view->last_added_uid ||
+                           prev_uid != ctx->rec.uid) {
+                               /* we've already added this */
+                               continue;
+                       }
+               }
+               prev_uid = ctx->rec.uid;
+
+               /* check for conflicting string indexes. they may happen if
+
+                  1) msgid exists only for a message X that has been expunged
+                  2) another process doesn't see X, but sees msgid for another
+                     message and writes it using a new string index
+                  3) if we still see X, we now see the same msgid with two
+                     string indexes.
+
+                  if we detect such a conflict, we can't continue using the
+                  strmap index until X has been expunged. */
+               memset(&iter, 0, sizeof(iter));
+               while ((hash_rec = hash2_iterate(ctx->view->hash,
+                                                crc32, &iter)) != NULL &&
+                      hash_rec->str_idx != ctx->rec.str_idx) {
+                       /* CRC32 matches, but string index doesn't */
+                       if (!strmap_view_sync_handle_conflict(ctx, hash_rec,
+                                                             &iter)) {
+                               ctx->lost_expunged_uid = hash_rec->uid;
+                               ret = -1;
+                               goto error;
+                       }
+               }
+
+               ctx->view->last_added_uid = ctx->rec.uid;
+
+               /* add the record to records array */
+               array_append(&ctx->view->recs, &ctx->rec, 1);
+               array_append(&ctx->view->recs_crc32, &crc32, 1);
+
+               /* add a separate copy of the record to hash */
+               hash_rec = hash2_insert_hash(ctx->view->hash, crc32);
+               memcpy(hash_rec, &ctx->rec, sizeof(*hash_rec));
+       }
+error:
+       return strmap_read_block_deinit(ctx, ret, TRUE);
+}
+
+struct mail_index_strmap_view_sync *
+mail_index_strmap_view_sync_init(struct mail_index_strmap_view *view,
+                                uint32_t *last_uid_r)
+{
+       struct mail_index_strmap_view_sync *sync;
+       struct mail_index_strmap_read_context ctx;
+       int ret;
+
+       sync = i_new(struct mail_index_strmap_view_sync, 1);
+       sync->view = view;
+
+       if (mail_index_strmap_refresh(view) < 0) {
+               /* reading the strmap failed - just ignore and do
+                  this in-memory based on whatever we knew last */
+       } else if (view->strmap->input != NULL) {
+               i_stream_seek(view->strmap->input,
+                             view->last_read_block_offset);
+               while ((ret = strmap_read_block_init(view, &ctx)) > 0) {
+                       if (mail_index_strmap_view_sync_block(&ctx) < 0) {
+                               ret = -1;
+                               break;
+                       }
+                       if (ctx.too_large_uids)
+                               break;
+               }
+
+               if (ret < 0) {
+                       /* something failed - we can still use the strmap as far
+                          as we managed to read it, but our view is now out
+                          of sync */
+                       view->desynced = TRUE;
+               } else {
+                       i_assert(view->lost_expunged_uid == 0);
+               }
+       }
+       *last_uid_r = view->last_added_uid;
+       return sync;
+}
+
+static inline uint32_t crc32_str_nonzero(const char *str)
+{
+       uint32_t value = crc32_str(str);
+       return value == 0 ? 1 : value;
+}
+
+void mail_index_strmap_view_sync_add(struct mail_index_strmap_view_sync *sync,
+                                    uint32_t uid, uint32_t ref_index,
+                                    const char *key)
+{
+       struct mail_index_strmap_view *view = sync->view;
+       struct mail_index_strmap_rec *rec, *old_rec;
+       struct mail_index_strmap_hash_key hash_key;
+       uint32_t str_idx;
+
+       i_assert(uid > view->last_added_uid ||
+                (uid == view->last_added_uid &&
+                 ref_index > view->last_ref_index));
+
+       hash_key.str = key;
+       hash_key.crc32 = crc32_str_nonzero(key);
+
+       old_rec = hash2_lookup(view->hash, &hash_key);
+       if (old_rec != NULL) {
+               /* The string already exists, use the same unique idx */
+               str_idx = old_rec->str_idx;
+       } else {
+               /* Newly seen string, assign a new unique idx to it */
+               str_idx = view->next_str_idx++;
+       }
+       i_assert(str_idx != 0);
+
+       rec = hash2_insert(view->hash, &hash_key);
+       rec->uid = uid;
+       rec->ref_index = ref_index;
+       rec->str_idx = str_idx;
+       array_append(&view->recs, rec, 1);
+       array_append(&view->recs_crc32, &hash_key.crc32, 1);
+
+       view->last_added_uid = uid;
+       view->last_ref_index = ref_index;
+}
+
+void mail_index_strmap_view_sync_add_unique(struct mail_index_strmap_view_sync *sync,
+                                           uint32_t uid, uint32_t ref_index)
+{
+       struct mail_index_strmap_view *view = sync->view;
+       struct mail_index_strmap_rec rec;
+
+       i_assert(uid > view->last_added_uid ||
+                (uid == view->last_added_uid &&
+                 ref_index > view->last_ref_index));
+
+       memset(&rec, 0, sizeof(rec));
+       rec.uid = uid;
+       rec.ref_index = ref_index;
+       rec.str_idx = view->next_str_idx++;
+       array_append(&view->recs, &rec, 1);
+       (void)array_append_space(&view->recs_crc32);
+
+       view->last_added_uid = uid;
+       view->last_ref_index = ref_index;
+}
+
+static void mail_index_strmap_view_renumber(struct mail_index_strmap_view *view)
+{
+       struct mail_index_strmap_read_context ctx;
+       struct mail_index_strmap_rec *recs, *hash_rec;
+       uint32_t prev_uid, str_idx, *recs_crc32, *renumber_map;
+       unsigned int i, dest, count, count2;
+       int ret;
+
+       memset(&ctx, 0, sizeof(ctx));
+       ctx.view = view;
+
+       /* create a map of old -> new index and remove records of
+          expunged messages */
+       renumber_map = i_new(uint32_t, view->next_str_idx);
+       str_idx = 0; prev_uid = 0;
+       recs = array_get_modifiable(&view->recs, &count);
+       recs_crc32 = array_get_modifiable(&view->recs_crc32, &count2);
+       i_assert(count == count2);
+
+       for (i = dest = 0; i < count; ) {
+               if (prev_uid != recs[i].uid) {
+                       /* see if this record should be removed */
+                       prev_uid = recs[i].uid;
+                       ret = mail_index_strmap_uid_exists(&ctx, prev_uid);
+                       i_assert(ret >= 0);
+                       if (ret == 0) {
+                               /* message expunged */
+                               do {
+                                       i++;
+                               } while (i < count && recs[i].uid == prev_uid);
+                               continue;
+                       }
+               }
+
+               i_assert(recs[i].str_idx < view->next_str_idx);
+               if (renumber_map[recs[i].str_idx] == 0)
+                       renumber_map[recs[i].str_idx] = ++str_idx;
+               if (i != dest) {
+                       recs[dest] = recs[i];
+                       recs_crc32[dest] = recs_crc32[i];
+               }
+               i++; dest++;
+       }
+       i_assert(renumber_map[0] == 0);
+       array_delete(&view->recs, dest, i-dest);
+       array_delete(&view->recs_crc32, dest, i-dest);
+
+       /* notify caller of the renumbering */
+       i_assert(str_idx <= view->next_str_idx);
+       view->remap_cb(renumber_map, view->next_str_idx, str_idx + 1,
+                      view->cb_context);
+
+       /* renumber the indexes in-place and recreate the hash */
+       recs = array_get_modifiable(&view->recs, &count);
+       hash2_clear(view->hash);
+       for (i = 0; i < count; i++) {
+               recs[i].str_idx = renumber_map[recs[i].str_idx];
+               hash_rec = hash2_insert_hash(view->hash, recs_crc32[i]);
+               memcpy(hash_rec, &recs[i], sizeof(*hash_rec));
+       }
+
+       /* update the new next_str_idx only after remapping */
+       view->next_str_idx = str_idx + 1;
+       i_free(renumber_map);
+}
+
+static int mail_index_strmap_write_block(struct mail_index_strmap_view *view,
+                                        struct ostream *output,
+                                        unsigned int i, uint32_t base_uid)
+{
+       const struct mail_index_strmap_rec *recs;
+       const uint32_t *crc32;
+       unsigned int j, n, count, count2, uid_rec_count;
+       uint32_t block_size;
+       uint8_t *p, packed[MAIL_INDEX_PACK_MAX_SIZE*2];
+       uoff_t block_offset, end_offset;
+
+       /* skip over the block size for now, we don't know it yet */
+       block_offset = output->offset;
+       block_size = 0;
+       o_stream_send(output, &block_size, sizeof(block_size));
+
+       /* write records */
+       recs = array_get(&view->recs, &count);
+       crc32 = array_get(&view->recs_crc32, &count2);
+       i_assert(count == count2);
+       while (i < count) {
+               /* @UNSAFE: <uid diff> */
+               p = packed;
+               mail_index_pack_num(&p, recs[i].uid - base_uid);
+               base_uid = recs[i].uid;
+
+               /* find how many records belong to this UID */
+               uid_rec_count = 1;
+               for (j = i + 1; j < count; j++) {
+                       if (recs[j].uid != base_uid)
+                               break;
+                       uid_rec_count++;
+               }
+               view->total_ref_count += uid_rec_count;
+
+               /* <n> <crc32>*count <str_idx>*count -
+                  FIXME: thread index specific code */
+               i_assert(recs[i].ref_index == 0);
+               if (uid_rec_count == 1) {
+                       /* Only Message-ID: header */
+                       n = 0;
+               } else if (recs[i+1].ref_index == 1) {
+                       /* In-Reply-To: header */
+                       n = 1;
+                       i_assert(uid_rec_count == 2);
+               } else {
+                       /* References: header */
+                       n = uid_rec_count;
+                       i_assert(recs[i+1].ref_index == 2);
+               }
+
+               mail_index_pack_num(&p, n);
+               o_stream_send(output, packed, p-packed);
+               for (j = 0; j < uid_rec_count; j++)
+                       o_stream_send(output, &crc32[i+j], sizeof(crc32[i+j]));
+               for (j = 0; j < uid_rec_count; j++) {
+                       i_assert(j < 2 || recs[i+j].ref_index == j+1);
+                       o_stream_send(output, &recs[i+j].str_idx,
+                                     sizeof(recs[i+j].str_idx));
+               }
+               i += uid_rec_count;
+       }
+
+       /* we know the block size now - write it */
+       block_size = output->offset - (block_offset + sizeof(block_size));
+       block_size = mail_index_uint32_to_offset(block_size << 2);
+       i_assert(block_size != 0);
+
+       end_offset = output->offset;
+       o_stream_seek(output, block_offset);
+       o_stream_send(output, &block_size, sizeof(block_size));
+       o_stream_seek(output, end_offset);
+
+       if (output->last_failed_errno != 0)
+               return -1;
+       else {
+               i_assert(view->last_added_uid == recs[count-1].uid);
+               view->last_read_uid = recs[count-1].uid;
+               view->last_read_block_offset = output->offset;
+               return 0;
+       }
+}
+
+static void
+mail_index_strmap_recreate_write(struct mail_index_strmap_view *view,
+                                struct ostream *output)
+{
+       const struct mail_index_header *idx_hdr;
+       struct mail_index_strmap_header hdr;
+
+       idx_hdr = mail_index_get_header(view->view);
+
+       /* write header */
+       memset(&hdr, 0, sizeof(hdr));
+       hdr.version = MAIL_INDEX_STRMAP_VERSION;
+       hdr.uid_validity = idx_hdr->uid_validity;
+       o_stream_send(output, &hdr, sizeof(hdr));
+
+       view->total_ref_count = 0;
+       (void)mail_index_strmap_write_block(view, output, 0, 1);
+}
+
+static int mail_index_strmap_recreate(struct mail_index_strmap_view *view)
+{
+       struct mail_index_strmap *strmap = view->strmap;
+       string_t *str;
+       struct ostream *output;
+       const char *temp_path;
+       int fd, ret = 0;
+
+       if (array_count(&view->recs) == 0) {
+               /* everything expunged - just unlink the existing index */
+               if (unlink(strmap->path) < 0)
+                       mail_index_strmap_set_syscall_error(strmap, "unlink()");
+               return 0;
+       }
+
+       str = t_str_new(256);
+       str_append(str, strmap->path);
+       fd = safe_mkstemp_hostpid(str, view->view->index->mode,
+                                 (uid_t)-1, view->view->index->gid);
+       temp_path = str_c(str);
+
+       if (fd == -1) {
+               mail_index_set_error(strmap->index,
+                                    "safe_mkstemp_hostpid(%s) failed: %m",
+                                    temp_path);
+               return -1;
+       }
+       output = o_stream_create_fd(fd, 0, FALSE);
+       o_stream_cork(output);
+       mail_index_strmap_recreate_write(view, output);
+       if (output->last_failed_errno != 0) {
+               errno = output->last_failed_errno;
+               mail_index_set_error(strmap->index,
+                                    "write(%s) failed: %m", temp_path);
+               ret = -1;
+       }
+       o_stream_destroy(&output);
+       if (close(fd) < 0) {
+               mail_index_set_error(strmap->index,
+                                    "close(%s) failed: %m", temp_path);
+               ret = -1;
+       } else if (ret == 0 && rename(temp_path, strmap->path) < 0) {
+               mail_index_set_error(strmap->index,
+                                    "rename(%s, %s) failed: %m",
+                                    temp_path, strmap->path);
+               ret = -1;
+       }
+       if (ret < 0)
+               (void)unlink(temp_path);
+       return ret;
+}
+
+static int mail_index_strmap_lock(struct mail_index_strmap *strmap)
+{
+       int ret;
+
+       i_assert(strmap->fd != -1);
+
+       if (strmap->index->lock_method != FILE_LOCK_METHOD_DOTLOCK) {
+               i_assert(strmap->file_lock == NULL);
+
+               ret = file_wait_lock(strmap->fd, strmap->path, F_WRLCK,
+                                    strmap->index->lock_method,
+                                    MAIL_INDEX_STRMAP_TIMEOUT_SECS,
+                                    &strmap->file_lock);
+               if (ret <= 0) {
+                       mail_index_strmap_set_syscall_error(strmap,
+                                                           "file_wait_lock()");
+               }
+       } else {
+               i_assert(strmap->dotlock == NULL);
+
+               ret = file_dotlock_create(&strmap->dotlock_settings,
+                                         strmap->path, 0, &strmap->dotlock);
+               if (ret <= 0) {
+                       mail_index_strmap_set_syscall_error(strmap,
+                               "file_dotlock_create()");
+               }
+       }
+       return ret;
+}
+
+static void mail_index_strmap_unlock(struct mail_index_strmap *strmap)
+{
+       if (strmap->file_lock != NULL)
+               file_unlock(&strmap->file_lock);
+       else if (strmap->dotlock != NULL)
+               file_dotlock_delete(&strmap->dotlock);
+}
+
+static int strmap_rec_cmp(const void *key, const void *value)
+{
+       const uint32_t *uid = key;
+       const struct mail_index_strmap_rec *rec = value;
+
+       return *uid < rec->uid ? -1 :
+               (*uid > rec->uid ? 1 : 0);
+}
+
+static int
+mail_index_strmap_write_append(struct mail_index_strmap_view *view)
+{
+       struct mail_index_strmap_read_context ctx;
+       const struct mail_index_strmap_rec *old_recs;
+       unsigned int i, old_count;
+       struct ostream *output;
+       uint32_t crc32, next_uid;
+       bool full_block;
+       int ret;
+
+       /* Check first if another process had written new records to the file.
+          If there are any, hopefully they're the same as what we would be
+          writing. There are two problematic cases when messages have been
+          expunged recently:
+
+          1) The file contains UIDs that we don't have. This means the string
+          indexes won't be compatible anymore, so we'll have to renumber ours
+          to match the ones in the strmap file.
+
+          Currently we don't bother handling 1) case. If indexes don't match
+          what we have, we just don't write anything.
+
+          2) We have UIDs that don't exist in the file. We can't simply skip
+          those records, because other records may have pointers to them using
+          different string indexes than we have. Even if we renumbered those,
+          future appends by other processes might cause the same problem (they
+          see the string for the first time and assign it a new index, but we
+          already have internally given it another index). So the only
+          sensible choice is to write nothing and hope that the message goes
+          away soon. */
+       old_recs = array_get(&view->recs, &old_count);
+       next_uid = view->last_read_uid + 1;
+       (void)bsearch_insert_pos(&next_uid, old_recs, old_count,
+                                sizeof(*old_recs), strmap_rec_cmp, &i);
+       if (i < old_count) {
+               while (i > 0 && old_recs[i-1].uid == old_recs[i].uid)
+                       i--;
+       }
+
+       i_stream_sync(view->strmap->input);
+       i_stream_seek(view->strmap->input, view->last_read_block_offset);
+       full_block = TRUE;
+       while (i < old_count &&
+              (ret = strmap_read_block_init(view, &ctx)) > 0) {
+               while ((ret = strmap_read_block_next(&ctx, &crc32)) > 0) {
+                       if (ctx.rec.uid != old_recs[i].uid ||
+                           ctx.rec.str_idx != old_recs[i].str_idx) {
+                               /* mismatch */
+                               if (ctx.rec.uid > old_recs[i].uid) {
+                                       /* 1) case */
+                                       ctx.lost_expunged_uid = ctx.rec.uid;
+                               } else if (ctx.rec.uid < old_recs[i].uid) {
+                                       /* 2) case */
+                                       ctx.lost_expunged_uid = old_recs[i].uid;
+                               } else {
+                                       /* string index mismatch,
+                                          shouldn't happen */
+                               }
+                               ret = -1;
+                               break;
+                       }
+                       if (++i == old_count) {
+                               full_block = FALSE;
+                               break;
+                       }
+               }
+               if (strmap_read_block_deinit(&ctx, ret, full_block) < 0) {
+                       ret = -1;
+                       break;
+               }
+       }
+       if (ret < 0)
+               return -1;
+       if (i == old_count) {
+               /* nothing new to write */
+               return 0;
+       }
+       i_assert(full_block);
+       i_assert(old_recs[i].uid > view->last_read_uid);
+
+       /* write the new records */
+       output = o_stream_create_fd(view->strmap->fd, 0, FALSE);
+       o_stream_seek(output, view->last_read_block_offset);
+       o_stream_cork(output);
+       if (mail_index_strmap_write_block(view, output, i,
+                                         view->last_read_uid + 1) < 0) {
+               errno = output->last_failed_errno;
+               mail_index_strmap_set_syscall_error(view->strmap, "write()");
+               ret = -1;
+       }
+       o_stream_destroy(&output);
+       return ret;
+}
+
+static int mail_index_strmap_write(struct mail_index_strmap_view *view)
+{
+       int ret;
+
+       /* FIXME: this renumbering doesn't work well when running for a long
+          time since records aren't removed from hash often enough */
+       if (STRIDX_MUST_RENUMBER(view->next_str_idx - 1,
+                                hash2_count(view->hash))) {
+               mail_index_strmap_view_renumber(view);
+               if (!MAIL_INDEX_IS_IN_MEMORY(view->strmap->index)) {
+                       if (mail_index_strmap_recreate(view) < 0) {
+                               view->desynced = TRUE;
+                               return -1;
+                       }
+               }
+               return 0;
+       }
+
+       if (MAIL_INDEX_IS_IN_MEMORY(view->strmap->index) || view->desynced)
+               return 0;
+
+       if (view->strmap->fd == -1) {
+               /* initial file creation */
+               if (mail_index_strmap_recreate(view) < 0) {
+                       view->desynced = TRUE;
+                       return -1;
+               }
+               return 0;
+       }
+
+       /* append the new records to the strmap file */
+       if (mail_index_strmap_lock(view->strmap) <= 0) {
+               /* timeout / error */
+               ret = -1;
+       } else if (mail_index_strmap_need_reopen(view->strmap)) {
+               /* the file was already recreated - leave the syncing as it is
+                  for now and let the next sync re-read the file. */
+               ret = 0;
+       } else {
+               ret = mail_index_strmap_write_append(view);
+       }
+       mail_index_strmap_unlock(view->strmap);
+       if (ret < 0)
+               view->desynced = TRUE;
+       return ret;
+}
+
+void mail_index_strmap_view_sync_commit(struct mail_index_strmap_view_sync **_sync)
+{
+       struct mail_index_strmap_view_sync *sync = *_sync;
+       struct mail_index_strmap_view *view = sync->view;
+
+       *_sync = NULL;
+       i_free(sync);
+
+       (void)mail_index_strmap_write(view);
+
+       /* zero-terminate the records array */
+       (void)array_append_space(&view->recs);
+       array_delete(&view->recs, array_count(&view->recs)-1, 1);
+}
+
+void mail_index_strmap_view_sync_rollback(struct mail_index_strmap_view_sync **_sync)
+{
+       struct mail_index_strmap_view_sync *sync = *_sync;
+
+       *_sync = NULL;
+
+       mail_index_strmap_view_reset(sync->view);
+       i_free(sync);
+}
diff --git a/src/lib-index/mail-index-strmap.h b/src/lib-index/mail-index-strmap.h
new file mode 100644 (file)
index 0000000..69e1cbd
--- /dev/null
@@ -0,0 +1,79 @@
+#ifndef MAIL_INDEX_STRMAP_H
+#define MAIL_INDEX_STRMAP_H
+
+#include "hash2.h"
+
+struct mail_index;
+struct mail_index_view;
+
+struct mail_index_strmap_header {
+#define MAIL_INDEX_STRMAP_VERSION 1
+       uint8_t version;
+       uint8_t unused[3];
+
+       uint32_t uid_validity;
+};
+
+struct mail_index_strmap_rec {
+       uint32_t uid;
+       uint32_t ref_index;
+       /* unique index number for the string */
+       uint32_t str_idx;
+};
+ARRAY_DEFINE_TYPE(mail_index_strmap_rec, struct mail_index_strmap_rec);
+
+typedef bool
+mail_index_strmap_key_cmp_t(const char *key,
+                           const struct mail_index_strmap_rec *rec,
+                           void *context);
+/* Returns 1 if matches, 0 if not, -1 if one of the records is expunged and
+   the result can't be determined */
+typedef int
+mail_index_strmap_rec_cmp_t(const struct mail_index_strmap_rec *rec1,
+                           const struct mail_index_strmap_rec *rec2,
+                           void *context);
+/* called when string indexes are renumbered. idx_map[old_idx] = new_idx.
+   if new_idx is 0, the record was expunged. As a special case if count=0,
+   the strmap was reset. */
+typedef void mail_index_strmap_remap_t(const uint32_t *idx_map,
+                                      unsigned int old_count,
+                                      unsigned int new_count, void *context);
+
+struct mail_index_strmap *
+mail_index_strmap_init(struct mail_index *index, const char *suffix);
+void mail_index_strmap_deinit(struct mail_index_strmap **strmap);
+
+/* Returns strmap records and hash that can be used for read-only access.
+   The records array always teminates with a record containing zeros (but it's
+   not counted in the array count). */
+struct mail_index_strmap_view *
+mail_index_strmap_view_open(struct mail_index_strmap *strmap,
+                           struct mail_index_view *idx_view,
+                           mail_index_strmap_key_cmp_t *key_compare_cb,
+                           mail_index_strmap_rec_cmp_t *rec_compare_cb,
+                           mail_index_strmap_remap_t *remap_cb,
+                           void *context,
+                           const ARRAY_TYPE(mail_index_strmap_rec) **recs_r,
+                           const struct hash2_table **hash_r);
+void mail_index_strmap_view_close(struct mail_index_strmap_view **view);
+
+/* Return the highest used string index. */
+uint32_t mail_index_strmap_view_get_highest_idx(struct mail_index_strmap_view *view);
+
+/* Synchronize strmap: Caller adds missing entries, expunged messages may be
+   removed internally and the changes are written to disk. Note that the strmap
+   recs/hash shouldn't be used until _sync_commit() is called, because the
+   string indexes may be renumbered if another process had already written the
+   same changes as us. */
+struct mail_index_strmap_view_sync *
+mail_index_strmap_view_sync_init(struct mail_index_strmap_view *view,
+                                uint32_t *last_uid_r);
+void mail_index_strmap_view_sync_add(struct mail_index_strmap_view_sync *sync,
+                                    uint32_t uid, uint32_t ref_index,
+                                    const char *key);
+void mail_index_strmap_view_sync_add_unique(struct mail_index_strmap_view_sync *sync,
+                                           uint32_t uid, uint32_t ref_index);
+void mail_index_strmap_view_sync_commit(struct mail_index_strmap_view_sync **sync);
+void mail_index_strmap_view_sync_rollback(struct mail_index_strmap_view_sync **sync);
+
+#endif
index a808ece73d1b9fe49ae5fa56ee60ad0f9922f734..178dd2f91d3757ab47666d7c4e5dee636c956340 100644 (file)
@@ -690,3 +690,50 @@ uint32_t mail_index_offset_to_uint32(uint32_t offset)
                 ((offset & 0x7f000000) >> 24)) << 2;
 }
 #endif
+
+void mail_index_pack_num(uint8_t **p, uint32_t num)
+{
+       /* number continues as long as the highest bit is set */
+       while (num >= 0x80) {
+               **p = (num & 0x7f) | 0x80;
+               *p += 1;
+               num >>= 7;
+       }
+
+       **p = num;
+       *p += 1;
+}
+
+int mail_index_unpack_num(const uint8_t **p, const uint8_t *end,
+                         uint32_t *num_r)
+{
+       const uint8_t *c = *p;
+       uint32_t value = 0;
+       unsigned int bits = 0;
+
+       for (;;) {
+               if (unlikely(c == end)) {
+                       /* we should never see EOF */
+                       *num_r = 0;
+                       return -1;
+               }
+
+               value |= (*c & 0x7f) << bits;
+               if (*c < 0x80)
+                       break;
+
+               bits += 7;
+               c++;
+       }
+
+       if (unlikely(bits >= 32)) {
+               /* broken input */
+               *p = end;
+               *num_r = 0;
+               return -1;
+       }
+
+       *p = c + 1;
+       *num_r = value;
+       return 0;
+}
index e3d9141045627597efc8032808588378da205a83..06e85554a7476f1459f24880e0ad3bdeb8823349 100644 (file)
@@ -26,7 +26,6 @@ libstorage_index_a_SOURCES = \
        index-thread.c \
        index-thread-finish.c \
        index-thread-links.c \
-       index-thread-list.c \
        index-transaction.c
 
 headers = \
index 41531b7a1bbeb77a9939f90e2b563866ca28a848..249790683e17c929157d043f81bb254c6342eff6 100644 (file)
@@ -986,8 +986,7 @@ index_storage_search_init(struct mailbox_transaction_context *_t,
 
        mail_search_args_reset(ctx->mail_ctx.args->args, TRUE);
        if (args->have_inthreads) {
-               if (mail_thread_init(_t->box, FALSE, NULL,
-                                    &ctx->thread_ctx) < 0)
+               if (mail_thread_init(_t->box, NULL, &ctx->thread_ctx) < 0)
                        ctx->failed = TRUE;
                if (search_build_inthreads(ctx, args->args) < 0)
                        ctx->failed = TRUE;
index ad34a43fb9ccf5ad000ef0a3fcd613ab22348fa2..03b1ad74f2decde093e4927774a30f9a2ae02636 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "lib.h"
 #include "array.h"
+#include "hash.h"
 #include "imap-base-subject.h"
 #include "mail-storage-private.h"
 #include "index-thread-private.h"
@@ -36,8 +37,7 @@ struct thread_finish_context {
        unsigned int refcount;
 
        struct mail *tmp_mail;
-       struct mail_hash_transaction *hash_trans;
-       struct mail_thread_list_context *hash_list_ctx;
+       struct mail_thread_cache *cache;
 
        ARRAY_DEFINE(roots, struct mail_thread_root_node);
        ARRAY_DEFINE(shadow_nodes, struct mail_thread_shadow_node);
@@ -133,13 +133,13 @@ thread_lookup_existing(struct thread_finish_context *ctx, uint32_t idx)
 {
        const struct mail_thread_node *node;
 
-       node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-       i_assert(MAIL_INDEX_NODE_EXISTS(node));
-       i_assert(node->uid_or_id != 0);
-       return node->uid_or_id;
+       node = array_idx(&ctx->cache->thread_nodes, idx);
+       i_assert(MAIL_THREAD_NODE_EXISTS(node));
+       i_assert(node->uid != 0);
+       return node->uid;
 }
 
-static int
+static void
 thread_child_node_fill(struct thread_finish_context *ctx,
                       struct mail_thread_child_node *child)
 {
@@ -150,9 +150,7 @@ thread_child_node_fill(struct thread_finish_context *ctx,
        if (!mail_set_uid(ctx->tmp_mail, child->uid)) {
                /* the UID should have existed. we would have rebuild
                   the thread tree otherwise. */
-               mail_hash_transaction_set_corrupted(ctx->hash_trans,
-                       t_strdup_printf("Found expunged UID %u", child->uid));
-               return -1;
+               i_unreached();
        }
 
        /* get sent date if we want to use it and if it's valid */
@@ -165,10 +163,9 @@ thread_child_node_fill(struct thread_finish_context *ctx,
                /* fallback to received date */
                (void)mail_get_received_date(ctx->tmp_mail, &child->sort_date);
        }
-       return 0;
 }
 
-static int
+static void
 thread_sort_children(struct thread_finish_context *ctx, uint32_t parent_idx,
                     ARRAY_TYPE(mail_thread_child_node) *sorted_children)
 {
@@ -188,11 +185,10 @@ thread_sort_children(struct thread_finish_context *ctx, uint32_t parent_idx,
                child.uid = thread_lookup_existing(ctx, child.idx);
 
                array_append(sorted_children, &child, 1);
-               return 0;
+               return;
        }
        while (child.idx != 0) {
-               if (thread_child_node_fill(ctx, &child) < 0)
-                       return -1;
+               thread_child_node_fill(ctx, &child);
 
                array_append(sorted_children, &child, 1);
                child.idx = shadows[child.idx].next_sibling_idx;
@@ -201,10 +197,9 @@ thread_sort_children(struct thread_finish_context *ctx, uint32_t parent_idx,
        /* sort the children */
        children = array_get_modifiable(sorted_children, &count);
        qsort(children, count, sizeof(*children), mail_thread_child_node_cmp);
-       return 0;
 }
 
-static int gather_base_subjects(struct thread_finish_context *ctx)
+static void gather_base_subjects(struct thread_finish_context *ctx)
 {
        struct subject_gather_context gather_ctx;
        struct mail_thread_root_node *roots;
@@ -213,12 +208,13 @@ static int gather_base_subjects(struct thread_finish_context *ctx)
        ARRAY_TYPE(mail_thread_child_node) sorted_children;
        const struct mail_thread_child_node *children;
        uint32_t idx, uid;
-       int ret = 0;
 
        memset(&gather_ctx, 0, sizeof(gather_ctx));
        gather_ctx.ctx = ctx;
 
        roots = array_get_modifiable(&ctx->roots, &count);
+       if (count == 0)
+               return;
        gather_ctx.subject_pool =
                pool_alloconly_create(MEMPOOL_GROWING"base subjects",
                                      nearest_power(count * 20));
@@ -233,11 +229,8 @@ static int gather_base_subjects(struct thread_finish_context *ctx)
                        idx = roots[i].node.idx;
                else if (!roots[i].ignore) {
                        /* find the oldest child */
-                       if (thread_sort_children(ctx, roots[i].node.idx,
-                                                &sorted_children) < 0) {
-                               ret = -1;
-                               break;
-                       }
+                       thread_sort_children(ctx, roots[i].node.idx,
+                                            &sorted_children);
                        children = array_idx(&sorted_children, 0);
                        idx = children[0].idx;
                } else {
@@ -249,10 +242,7 @@ static int gather_base_subjects(struct thread_finish_context *ctx)
                if (!mail_set_uid(ctx->tmp_mail, uid)) {
                        /* the UID should have existed. we would have rebuild
                           the thread tree otherwise. */
-                       mail_hash_transaction_set_corrupted(ctx->hash_trans,
-                               "Found expunged UID");
-                       ret = -1;
-                       break;
+                       i_unreached();
                }
                if (mail_get_first_header(ctx->tmp_mail, HDR_SUBJECT,
                                          &subject) > 0) T_BEGIN {
@@ -263,8 +253,6 @@ static int gather_base_subjects(struct thread_finish_context *ctx)
        array_free(&sorted_children);
        hash_destroy(&gather_ctx.subject_hash);
        pool_unref(&gather_ctx.subject_pool);
-
-       return ret;
 }
 
 static void thread_add_shadow_child(struct thread_finish_context *ctx,
@@ -374,19 +362,18 @@ static bool merge_subject_threads(struct thread_finish_context *ctx)
        return changed;
 }
 
-static int sort_root_nodes(struct thread_finish_context *ctx)
+static void sort_root_nodes(struct thread_finish_context *ctx)
 {
        ARRAY_TYPE(mail_thread_child_node) sorted_children;
        const struct mail_thread_child_node *children;
        const struct mail_thread_shadow_node *shadows;
        struct mail_thread_root_node *roots;
        unsigned int i, count, child_count;
-       int ret = 0;
 
        i_array_init(&sorted_children, 64);
        shadows = array_idx(&ctx->shadow_nodes, 0);
        roots = array_get_modifiable(&ctx->roots, &count);
-       for (i = 0; i < count && ret == 0; i++) {
+       for (i = 0; i < count; i++) {
                if (roots[i].ignore)
                        continue;
                if (roots[i].dummy) {
@@ -396,33 +383,25 @@ static int sort_root_nodes(struct thread_finish_context *ctx)
                                roots[i].ignore = TRUE;
                                continue;
                        }
-                       if (thread_sort_children(ctx, roots[i].node.idx,
-                                                &sorted_children) < 0) {
-                               ret = -1;
-                               break;
-                       }
+                       thread_sort_children(ctx, roots[i].node.idx,
+                                            &sorted_children);
                        children = array_get(&sorted_children, &child_count);
                        if (child_count == 1) {
                                /* only one child - deferred step (3).
                                   promote the child to the root. */
                                roots[i].node = children[0];
-                               ret = thread_child_node_fill(ctx,
-                                                            &roots[i].node);
+                               thread_child_node_fill(ctx, &roots[i].node);
                                roots[i].dummy = FALSE;
                        } else {
                                roots[i].node.uid = children[0].uid;
                                roots[i].node.sort_date = children[0].sort_date;
                        }
                } else {
-                       ret = thread_child_node_fill(ctx, &roots[i].node);
+                       thread_child_node_fill(ctx, &roots[i].node);
                }
        }
        array_free(&sorted_children);
-       if (ret < 0)
-               return -1;
-
        qsort(roots, count, sizeof(*roots), mail_thread_child_node_cmp);
-       return 0;
 }
 
 static int mail_thread_root_node_idx_cmp(const void *key, const void *value)
@@ -434,10 +413,10 @@ static int mail_thread_root_node_idx_cmp(const void *key, const void *value)
                *idx > root->node.idx ? 1 : 0;
 }
 
-static int sort_root_nodes_ref2(struct thread_finish_context *ctx,
-                               uint32_t record_count)
+static void sort_root_nodes_ref2(struct thread_finish_context *ctx,
+                                uint32_t record_count)
 {
-       struct mail_thread_node *node;
+       const struct mail_thread_node *node;
        struct mail_thread_root_node *roots, *root;
        struct mail_thread_child_node child;
        unsigned int root_count;
@@ -445,20 +424,18 @@ static int sort_root_nodes_ref2(struct thread_finish_context *ctx,
 
        roots = array_get_modifiable(&ctx->roots, &root_count);
        for (idx = 1; idx < record_count; idx++) {
-               node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-               if (MAIL_HASH_RECORD_IS_DELETED(&node->rec) ||
-                   !MAIL_INDEX_NODE_EXISTS(node))
+               node = array_idx(&ctx->cache->thread_nodes, idx);
+               if (!MAIL_THREAD_NODE_EXISTS(node))
                        continue;
 
                child.idx = idx;
-               if (thread_child_node_fill(ctx, &child) < 0)
-                       return -1;
+               thread_child_node_fill(ctx, &child);
 
                parent_idx = idx;
                while (node->parent_idx != 0) {
                        parent_idx = node->parent_idx;
-                       node = mail_hash_lookup_idx(ctx->hash_trans,
-                                                   node->parent_idx);
+                       node = array_idx(&ctx->cache->thread_nodes,
+                                        node->parent_idx);
                }
                root = bsearch(&parent_idx, roots, root_count, sizeof(*roots),
                               mail_thread_root_node_idx_cmp);
@@ -468,66 +445,45 @@ static int sort_root_nodes_ref2(struct thread_finish_context *ctx,
                        root->node.sort_date = child.sort_date;
        }
        qsort(roots, root_count, sizeof(*roots), mail_thread_child_node_cmp);
-       return 0;
 }
 
-static int mail_thread_create_shadows(struct thread_finish_context *ctx,
-                                     uint32_t record_count)
+static void mail_thread_create_shadows(struct thread_finish_context *ctx,
+                                      uint32_t record_count)
 {
-       struct mail_thread_list_update_context *thread_list_ctx;
-       struct mail_thread_node *node, *parent;
+       const struct mail_thread_node *node, *parent;
        struct mail_thread_root_node root;
        struct mail_thread_child_node child;
-       struct mail_hash *hash;
-       uint8_t *referenced;
-       uint32_t idx, i, j, parent_idx, alloc_size, max;
-       int ret = 0;
+       uint32_t idx, parent_idx;
 
        ctx->use_sent_date = FALSE;
 
        memset(&root, 0, sizeof(root));
        memset(&child, 0, sizeof(child));
 
-       alloc_size = record_count/8 + 1;
-       referenced = i_new(uint8_t, alloc_size);
+       /* We may see dummy messages without parents or children. We can't
+          free them since the nodes are in an array, but they may get reused
+          later so just leave them be. With the current algorithm when this
+          happens all the struct fields are always zero at that point, so
+          we don't even have to try to zero them. */
        for (idx = 1; idx < record_count; idx++) {
-               node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-               if (MAIL_HASH_RECORD_IS_DELETED(&node->rec)) {
-                       /* @UNSAFE: mark deleted records as referenced, so we
-                          don't waste time with them */
-                       referenced[idx / 8] |= 1 << (idx % 8);
-                       continue;
-               }
-
-               if (MAIL_INDEX_NODE_EXISTS(node)) {
-                       /* @UNSAFE: don't remove existing nodes */
-                       referenced[idx / 8] |= 1 << (idx % 8);
-               }
+               node = array_idx(&ctx->cache->thread_nodes, idx);
 
                if (node->parent_idx == 0) {
                        /* root node - add to roots list */
                        root.node.idx = idx;
-                       if (!MAIL_INDEX_NODE_EXISTS(node)) {
+                       if (!MAIL_THREAD_NODE_EXISTS(node)) {
                                root.dummy = TRUE;
                                root.node.uid = 0;
                        } else {
                                root.dummy = FALSE;
-                               root.node.uid = node->uid_or_id;
+                               root.node.uid = node->uid;
                        }
                        array_append(&ctx->roots, &root, 1);
                        continue;
                }
-               if (node->parent_idx >= record_count) {
-                       mail_hash_transaction_set_corrupted(ctx->hash_trans,
-                               "parent_idx too large");
-                       ret = -1;
-                       break;
-               }
+               i_assert(node->parent_idx < record_count);
 
-               /* @UNSAFE: keep track of referenced nodes */
-               referenced[node->parent_idx / 8] |= 1 << (node->parent_idx % 8);
-
-               if (!MAIL_INDEX_NODE_EXISTS(node)) {
+               if (!MAIL_THREAD_NODE_EXISTS(node)) {
                        /* dummy node */
                        continue;
                }
@@ -536,96 +492,52 @@ static int mail_thread_create_shadows(struct thread_finish_context *ctx,
                   node as its child. If there are no non-dummy
                   parents, add it as the highest dummy's child. */
                parent_idx = node->parent_idx;
-               parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx);
-               while (!MAIL_INDEX_NODE_EXISTS(parent) &&
+               parent = array_idx(&ctx->cache->thread_nodes, parent_idx);
+               while (!MAIL_THREAD_NODE_EXISTS(parent) &&
                       parent->parent_idx != 0) {
                        parent_idx = parent->parent_idx;
-                       parent = mail_hash_lookup_idx(ctx->hash_trans,
-                                                     parent_idx);
+                       parent = array_idx(&ctx->cache->thread_nodes,
+                                          parent_idx);
                }
                thread_add_shadow_child(ctx, parent_idx, idx);
        }
-
-       /* remove unneeded records from hash */
-       hash = mail_hash_transaction_get_hash(ctx->hash_trans);
-       if (ret < 0 || mail_hash_get_lock_type(hash) != F_WRLCK) {
-               /* thread index isn't locked, can't delete anything */
-               i_free(referenced);
-               return ret;
-       }
-
-       /* FIXME: we could remove everything from thread list by pointing to
-          existing messages' References: headers */
-       thread_list_ctx = mail_thread_list_update_begin(ctx->hash_list_ctx,
-                                                       ctx->hash_trans);
-       for (i = 0; i < alloc_size; i++) {
-               if (referenced[i] == 0xff)
-                       continue;
-
-               j = i == 0 ? 1 : 0;
-               max = i+1 < alloc_size ? 8 : (record_count % 8);
-               for (; j < max; j++) {
-                       if ((referenced[i] & (1 << j)) != 0)
-                               continue;
-
-                       idx = i*8 + j;
-                       node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-
-                       if (node->ref_index == MAIL_INDEX_NODE_REF_EXT) {
-                               mail_thread_list_remove(thread_list_ctx,
-                                                       node->uid_or_id);
-                       }
-                       mail_hash_remove(ctx->hash_trans, idx,
-                                        node->msgid_crc32);
-               }
-       }
-       i_free(referenced);
-       return mail_thread_list_commit(&thread_list_ctx);
 }
 
-static int mail_thread_finish(struct thread_finish_context *ctx,
-                             enum mail_thread_type thread_type)
+static void mail_thread_finish(struct thread_finish_context *ctx,
+                              enum mail_thread_type thread_type)
 {
-       const struct mail_hash_header *hdr;
+       unsigned int record_count = array_count(&ctx->cache->thread_nodes);
 
-       hdr = mail_hash_get_header(ctx->hash_trans);
-       i_assert(hdr->record_count > 0);
-       ctx->next_new_root_idx = hdr->record_count + 1;
+       ctx->next_new_root_idx = record_count + 1;
 
        /* (2) save root nodes and (3) remove dummy messages */
-       i_array_init(&ctx->roots, I_MIN(128, hdr->record_count));
-       i_array_init(&ctx->shadow_nodes, hdr->record_count);
+       i_array_init(&ctx->roots, I_MIN(128, record_count));
+       i_array_init(&ctx->shadow_nodes, record_count);
        /* make sure all shadow indexes are accessible directly. */
-       (void)array_idx_modifiable(&ctx->shadow_nodes, hdr->record_count);
+       (void)array_idx_modifiable(&ctx->shadow_nodes, record_count);
 
-       if (mail_thread_create_shadows(ctx, hdr->record_count) < 0)
-               return -1;
+       mail_thread_create_shadows(ctx, record_count);
 
        /* (4) */
        ctx->use_sent_date = TRUE;
        switch (thread_type) {
        case MAIL_THREAD_REFERENCES:
-               if (sort_root_nodes(ctx) < 0)
-                       return -1;
+               sort_root_nodes(ctx);
                /* (5) Gather together messages under the root that have
                   the same base subject text. */
-               if (gather_base_subjects(ctx) < 0)
-                       return -1;
+               gather_base_subjects(ctx);
                /* (5.C) Merge threads with the same thread subject. */
                if (merge_subject_threads(ctx)) {
                        /* root ordering may have changed, sort them again. */
-                       if (sort_root_nodes(ctx) < 0)
-                               return -1;
+                       sort_root_nodes(ctx);
                }
                break;
        case MAIL_THREAD_REFERENCES2:
-               if (sort_root_nodes_ref2(ctx, hdr->record_count) < 0)
-                       return -1;
+               sort_root_nodes_ref2(ctx, record_count);
                break;
        default:
                i_unreached();
        }
-       return 0;
 }
 
 static void
@@ -643,15 +555,10 @@ nodes_change_uids_to_seqs(struct mail_thread_iterate_context *iter, bool root)
                        /* dummy root */
                        if (root)
                                continue;
+                       i_unreached();
                } else {
                        mailbox_get_seq_range(box, uid, uid, &seq, &seq);
-               }
-               if (uid == 0) {
-                       mail_hash_transaction_set_corrupted(
-                               iter->ctx->hash_trans,
-                               t_strdup_printf("Found expunged UID %u", uid));
-                       iter->failed = TRUE;
-                       seq = 0;
+                       i_assert(seq != 0);
                }
                children[i].uid = seq;
        }
@@ -685,45 +592,33 @@ mail_thread_iterate_children(struct mail_thread_iterate_context *parent_iter,
        child_iter->ctx->refcount++;
 
        i_array_init(&child_iter->children, 8);
-       if (thread_sort_children(child_iter->ctx, parent_idx,
-                                &child_iter->children) < 0) {
-               child_iter->failed = TRUE;
-               array_clear(&child_iter->children);
-       } else {
-               if (child_iter->ctx->return_seqs)
-                       nodes_change_uids_to_seqs(child_iter, FALSE);
-       }
+       thread_sort_children(child_iter->ctx, parent_idx,
+                            &child_iter->children);
+       if (child_iter->ctx->return_seqs)
+               nodes_change_uids_to_seqs(child_iter, FALSE);
        return child_iter;
 }
 
 struct mail_thread_iterate_context *
-mail_thread_iterate_init_full(struct mail *tmp_mail,
-                             struct mail_hash_transaction *hash_trans,
-                             struct mail_thread_list_context *hash_list_ctx,
+mail_thread_iterate_init_full(struct mail_thread_cache *cache,
+                             struct mail *tmp_mail,
                              enum mail_thread_type thread_type,
                              bool return_seqs)
 {
        struct mail_thread_iterate_context *iter;
        struct thread_finish_context *ctx;
-       struct mail_hash *hash;
 
        iter = i_new(struct mail_thread_iterate_context, 1);
        ctx = iter->ctx = i_new(struct thread_finish_context, 1);
        ctx->refcount = 1;
+       ctx->cache = cache;
        ctx->tmp_mail = tmp_mail;
-       ctx->hash_trans = hash_trans;
-       ctx->hash_list_ctx = hash_list_ctx;
        ctx->return_seqs = return_seqs;
-       if (mail_thread_finish(ctx, thread_type) < 0)
-               iter->failed = TRUE;
-       else {
-               hash = mail_hash_transaction_get_hash(hash_trans);
-               if (mail_hash_get_lock_type(hash) == F_WRLCK)
-                       (void)mail_hash_transaction_write(hash_trans);
-               mail_thread_iterate_fill_root(iter);
-               if (return_seqs)
-                       nodes_change_uids_to_seqs(iter, TRUE);
-       }
+       mail_thread_finish(ctx, thread_type);
+
+       mail_thread_iterate_fill_root(iter);
+       if (return_seqs)
+               nodes_change_uids_to_seqs(iter, TRUE);
        return iter;
 }
 
@@ -756,14 +651,9 @@ unsigned int mail_thread_iterate_count(struct mail_thread_iterate_context *iter)
 int mail_thread_iterate_deinit(struct mail_thread_iterate_context **_iter)
 {
        struct mail_thread_iterate_context *iter = *_iter;
-       int ret = iter->failed ? -1 : 0;
 
        *_iter = NULL;
 
-       if (ret < 0) {
-               struct mail *mail = iter->ctx->tmp_mail;
-               mail_storage_set_internal_error(mail->box->storage);
-       }
        if (--iter->ctx->refcount == 0) {
                array_free(&iter->ctx->roots);
                array_free(&iter->ctx->shadow_nodes);
@@ -771,5 +661,5 @@ int mail_thread_iterate_deinit(struct mail_thread_iterate_context **_iter)
        }
        array_free(&iter->children);
        i_free(iter);
-       return ret;
+       return 0;
 }
index e4f8fe8f0444fc4bfb048c24b508c909fe3be446..2e0a5cdd9210fdbfc26ec5c52cc40c49beecd350 100644 (file)
@@ -6,85 +6,30 @@
 #include "mail-storage.h"
 #include "index-thread-private.h"
 
-static struct mail_thread_node *
-thread_msgid_get(struct mail_thread_update_context *ctx, uint32_t ref_uid,
-                unsigned int ref_index, const char *msgid, uint32_t *idx_r)
+static uint32_t thread_msg_add(struct mail_thread_cache *cache,
+                              uint32_t uid, uint32_t msgid_idx)
 {
-       struct mail_thread_node *node, new_node;
-       struct msgid_search_key key;
-       const char **msgidp;
-       uint32_t idx;
-
-       i_assert(ref_index != MAIL_INDEX_NODE_REF_MSGID);
-
-       key.msgid = msgid;
-       key.msgid_crc32 = crc32_str_nonzero(msgid);
-
-       node = mail_hash_lookup(ctx->hash_trans, &key, &idx);
-       if (node == NULL) {
-               /* not found, create */
-               memset(&new_node, 0, sizeof(new_node));
-               new_node.msgid_crc32 = key.msgid_crc32;
-               if (ref_index <= MAIL_INDEX_NODE_REF_MAX_VALUE) {
-                       new_node.uid_or_id = ref_uid;
-                       new_node.ref_index = ref_index;
-               } else {
-                       new_node.uid_or_id =
-                               mail_thread_list_add(ctx->thread_list_ctx,
-                                                    msgid);
-                       new_node.ref_index = MAIL_INDEX_NODE_REF_EXT;
-               }
-
-               mail_hash_insert(ctx->hash_trans, &key, &new_node, &idx);
-               node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-       }
-
-       /* keep message-ids cached */
-       msgidp = array_idx_modifiable(&ctx->msgid_cache, idx);
-       if (*msgidp == NULL)
-               *msgidp = p_strdup(ctx->msgid_pool, msgid);
+       struct mail_thread_node *node;
 
-       *idx_r = idx;
-       return node;
-}
+       i_assert(msgid_idx != 0);
+       i_assert(msgid_idx < cache->first_invalid_msgid_str_idx);
 
-static void thread_msg_add(struct mail_thread_update_context *ctx, uint32_t uid,
-                          const char *msgid, uint32_t *idx_r)
-{
-       struct mail_thread_node *node;
-       struct mail_thread_node unode;
-
-       if (msgid != NULL) {
-               node = thread_msgid_get(ctx, 0, 0, msgid, idx_r);
-               if (!MAIL_INDEX_NODE_EXISTS(node)) {
-                       /* add UID to node */
-                       if (node->ref_index == MAIL_INDEX_NODE_REF_EXT &&
-                           node->uid_or_id != 0) {
-                               mail_thread_list_remove(ctx->thread_list_ctx,
-                                                       node->uid_or_id);
-                       }
-
-                       node->uid_or_id = uid;
-                       node->ref_index = MAIL_INDEX_NODE_REF_MSGID;
-               } else {
-                       /* duplicate, keep the original. if the original ever
-                          gets expunged, rebuild. */
-                       node->expunge_rebuilds = TRUE;
-                       msgid = NULL;
-               }
-               mail_hash_update(ctx->hash_trans, *idx_r);
-       }
+       node = array_idx_modifiable(&cache->thread_nodes, msgid_idx);
+       if (node->uid == 0)
+               node->uid = uid;
+       else {
+               /* duplicate message-id, keep the original.
+                  if the original ever gets expunged, rebuild. */
+               node->expunge_rebuilds = TRUE;
 
-       if (msgid == NULL) {
-               /* no valid message-id */
-               memset(&unode, 0, sizeof(unode));
-               unode.uid_or_id = uid;
-               unode.ref_index = MAIL_INDEX_NODE_REF_MSGID;
-               mail_hash_insert(ctx->hash_trans, NULL, &unode, idx_r);
+               msgid_idx = cache->next_invalid_msgid_str_idx++;
+               node = array_idx_modifiable(&cache->thread_nodes, msgid_idx);
+               node->uid = uid;
        }
+       return msgid_idx;
 }
 
-static bool thread_node_has_ancestor(struct mail_thread_update_context *ctx,
+static bool thread_node_has_ancestor(struct mail_thread_cache *cache,
                                     const struct mail_thread_node *node,
                                     const struct mail_thread_node *ancestor)
 {
@@ -92,24 +37,24 @@ static bool thread_node_has_ancestor(struct mail_thread_update_context *ctx,
                if (node->parent_idx == 0)
                        return FALSE;
 
-               node = mail_hash_lookup_idx(ctx->hash_trans, node->parent_idx);
+               node = array_idx(&cache->thread_nodes, node->parent_idx);
        }
        return TRUE;
 }
 
-static void thread_link_reference(struct mail_thread_update_context *ctx,
+static void thread_link_reference(struct mail_thread_cache *cache,
                                  uint32_t parent_idx, uint32_t child_idx)
 {
        struct mail_thread_node *node, *parent, *child;
        uint32_t idx;
 
-       parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx);
-       child = mail_hash_lookup_idx(ctx->hash_trans, child_idx);
+       i_assert(parent_idx < cache->first_invalid_msgid_str_idx);
 
-       child->parent_link_refcount++;
-       mail_hash_update(ctx->hash_trans, child_idx);
+       parent = array_idx_modifiable(&cache->thread_nodes, parent_idx);
+       child = array_idx_modifiable(&cache->thread_nodes, child_idx);
 
-       if (thread_node_has_ancestor(ctx, parent, child)) {
+       child->parent_link_refcount++;
+       if (thread_node_has_ancestor(cache, parent, child)) {
                if (parent == child) {
                        /* loops to itself - ignore */
                        return;
@@ -130,14 +75,9 @@ static void thread_link_reference(struct mail_thread_update_context *ctx,
                node = parent;
                do {
                        idx = node->parent_idx;
-                       if (idx == 0) {
-                               /* earlier lookup_idx() failed */
-                               ctx->failed = TRUE;
-                               break;
-                       }
-                       node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-                       node->parent_unref_rebuilds = TRUE;
-                       mail_hash_update(ctx->hash_trans, idx);
+                       i_assert(idx != 0);
+                       node = array_idx_modifiable(&cache->thread_nodes, idx);
+                       node->child_unref_rebuilds = TRUE;
                } while (node != child);
                return;
        } else if (child->parent_idx == parent_idx) {
@@ -146,11 +86,11 @@ static void thread_link_reference(struct mail_thread_update_context *ctx,
        }
 
        /* Set parent_node as child_node's parent */
-       if (child->parent_idx == 0)
+       if (child->parent_idx == 0) {
                child->parent_idx = parent_idx;
-       else {
+       else {
                /* Conflicting parent already exists, keep the original */
-               if (MAIL_INDEX_NODE_EXISTS(child)) {
+               if (MAIL_THREAD_NODE_EXISTS(child)) {
                        /* If this message gets expunged,
                           the parent is changed. */
                        child->expunge_rebuilds = TRUE;
@@ -166,304 +106,126 @@ static void thread_link_reference(struct mail_thread_update_context *ctx,
 
                           b) is probably the least likely to happen,
                           so use it */
-                       child->parent_unref_rebuilds = TRUE;
+                       child->child_unref_rebuilds = TRUE;
                }
        }
-       mail_hash_update(ctx->hash_trans, child_idx);
 }
 
-struct thread_message_id {
-       const char *str;
-       uint32_t crc32;
-       unsigned int collisions_after;
-};
-
-static const char *
-thread_link_references(struct mail_thread_update_context *ctx, uint32_t ref_uid,
-                      const char *references)
+static uint32_t
+thread_link_references(struct mail_thread_cache *cache, uint32_t uid,
+                      const struct mail_index_strmap_rec *msgid_map,
+                      unsigned int *msgid_map_idx)
 {
-       ARRAY_DEFINE(id_arr, struct thread_message_id);
-       struct thread_message_id *ids;
-       struct thread_message_id id;
-       uint32_t parent_idx, child_idx;
-       unsigned int i, j, count, ref_index;
-
-       /* put all message IDs to an array */
-       memset(&id, 0, sizeof(id));
-       t_array_init(&id_arr, 32);
-       while ((id.str = message_id_get_next(&references)) != NULL) {
-               id.crc32 = crc32_str_nonzero(id.str);
-               array_append(&id_arr, &id, 1);
-       }
-
-       ids = array_get_modifiable(&id_arr, &count);
-       if (count <= 1)
-               return count == 0 ? NULL : ids[0].str;
+       uint32_t parent_idx;
 
-       /* count collisions */
-       for (i = 0; i < count; i++) {
-               for (j = i + 1; j < count; j++) {
-                       if (ids[i].crc32 == ids[j].crc32)
-                               ids[i].collisions_after++;
-               }
-       }
-
-       ref_index = MAIL_INDEX_NODE_REF_REFERENCES_LAST +
-               ids[0].collisions_after;
-       thread_msgid_get(ctx, ref_uid, ref_index, ids[0].str, &parent_idx);
-       for (i = 1; i < count; i++) {
-               ref_index = MAIL_INDEX_NODE_REF_REFERENCES_LAST +
-                       ids[i].collisions_after;
-               thread_msgid_get(ctx, ref_uid, ref_index,
-                                ids[i].str, &child_idx);
-               thread_link_reference(ctx, parent_idx, child_idx);
-               parent_idx = child_idx;
-       }
-
-       /* link the last ID to us */
-       return ids[count-1].str;
-}
+       if (msgid_map->uid != uid)
+               return 0;
 
-static int thread_get_mail_header(struct mail *mail, const char *name,
-                                 const char **value_r)
-{
-       if (mail_get_first_header(mail, name, value_r) < 0) {
-               if (!mail->expunged)
-                       return -1;
+       parent_idx = msgid_map->str_idx;
+       msgid_map++;
+       *msgid_map_idx += 1;
 
-               /* Message is expunged. Instead of failing the entire THREAD
-                  command, just treat the header as non-existing. */
-               *value_r = NULL;
+       for (; msgid_map->uid == uid; msgid_map++) {
+               thread_link_reference(cache, parent_idx, msgid_map->str_idx);
+               parent_idx = msgid_map->str_idx;
+               *msgid_map_idx += 1;
        }
-       return 0;
+       i_assert(parent_idx < cache->first_invalid_msgid_str_idx);
+       return parent_idx;
 }
 
-int mail_thread_add(struct mail_thread_update_context *ctx, struct mail *mail)
+void mail_thread_add(struct mail_thread_cache *cache,
+                    const struct mail_index_strmap_rec *msgid_map,
+                    unsigned int *msgid_map_idx)
 {
-       const char *message_id, *in_reply_to, *references, *parent_msgid;
-       const struct mail_thread_node *parent, *old_parent;
-       struct mail_hash_header *hdr;
        struct mail_thread_node *node;
        uint32_t idx, parent_idx;
-       unsigned int ref_index;
-
-       hdr = mail_hash_get_header(ctx->hash_trans);
-       i_assert(mail->uid > hdr->last_uid);
-       hdr->last_uid = mail->uid;
-       hdr->message_count++;
 
-       if (thread_get_mail_header(mail, HDR_MESSAGE_ID, &message_id) < 0 ||
-           thread_get_mail_header(mail, HDR_REFERENCES, &references) < 0)
-               return -1;
+       i_assert(msgid_map->ref_index == MAIL_THREAD_NODE_REF_MSGID);
+       i_assert(cache->last_uid <= msgid_map->uid);
 
-       thread_msg_add(ctx, mail->uid, message_id_get_next(&message_id), &idx);
-       parent_msgid = thread_link_references(ctx, mail->uid, references);
-
-       if (parent_msgid != NULL)
-               ref_index = MAIL_INDEX_NODE_REF_REFERENCES_LAST;
-       else {
-               /* no valid IDs in References:, use In-Reply-To: instead */
-               if (thread_get_mail_header(mail, HDR_IN_REPLY_TO,
-                                          &in_reply_to) < 0)
-                       return -1;
-               parent_msgid = message_id_get_next(&in_reply_to);
-               ref_index = MAIL_INDEX_NODE_REF_INREPLYTO;
-       }
-       parent = parent_msgid == NULL ? NULL :
-               thread_msgid_get(ctx, mail->uid, ref_index,
-                                parent_msgid, &parent_idx);
+       cache->last_uid = msgid_map->uid;
 
-       node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-       old_parent = node->parent_idx == 0 ? NULL :
-               mail_hash_lookup_idx(ctx->hash_trans, node->parent_idx);
+       idx = thread_msg_add(cache, msgid_map->uid, msgid_map->str_idx);
+       parent_idx = thread_link_references(cache, msgid_map->uid,
+                                           msgid_map + 1, msgid_map_idx);
 
-       if (old_parent != NULL &&
-           (parent == NULL || old_parent->parent_idx != parent_idx)) {
+       node = array_idx_modifiable(&cache->thread_nodes, idx);
+       if (node->parent_idx != parent_idx && node->parent_idx != 0) {
                /* conflicting parent, remove it. */
                node->parent_idx = 0;
                /* If this message gets expunged, we have to revert back to
                   the original parent. */
                node->expunge_rebuilds = TRUE;
-               mail_hash_update(ctx->hash_trans, idx);
        }
-       if (parent != NULL)
-               thread_link_reference(ctx, parent_idx, idx);
-       return 0;
+       if (parent_idx != 0)
+               thread_link_reference(cache, parent_idx, idx);
+       *msgid_map_idx += 1;
 }
 
 static bool
-mail_thread_node_lookup(struct mail_thread_update_context *ctx, uint32_t uid,
-                       uint32_t *idx_r, const char **msgid_r,
-                       struct mail_thread_node **node_r)
-{
-       struct mail_thread_node *node;
-       struct msgid_search_key key;
-       const char *msgids;
-       int ret;
-
-       if (!mail_set_uid(ctx->tmp_mail, uid))
-               return FALSE;
-
-       ret = mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID, &msgids);
-       if (ret <= 0)
-               return FALSE;
-
-       key.msgid = message_id_get_next(&msgids);
-       if (key.msgid == NULL)
-               return FALSE;
-       key.msgid_crc32 = crc32_str_nonzero(key.msgid);
-
-       node = mail_hash_lookup(ctx->hash_trans, &key, idx_r);
-       if (node == NULL)
-               return FALSE;
-
-       if (node->ref_index != MAIL_INDEX_NODE_REF_MSGID ||
-           node->uid_or_id != uid) {
-               /* duplicate Message-ID probably */
-               return FALSE;
-       }
-       *msgid_r = key.msgid;
-       *node_r = node;
-       return TRUE;
-}
-
-static bool
-thread_msgid_lookup(struct mail_thread_update_context *ctx, const char *msgid,
-                   uint32_t *idx_r)
-{
-       struct msgid_search_key key;
-
-       key.msgid = msgid;
-       key.msgid_crc32 = crc32_str_nonzero(msgid);
-
-       ctx->cmp_match_count = 0;
-       ctx->cmp_last_idx = 0;
-
-       if (mail_hash_lookup(ctx->hash_trans, &key, idx_r) == NULL) {
-               if (ctx->cmp_match_count != 1 || ctx->failed) {
-                       /* couldn't find the message-id */
-                       return FALSE;
-               }
-
-               /* there's only one key with this crc32 value, so it
-                  must be what we're looking for */
-               *idx_r = ctx->cmp_last_idx;
-       }
-       return TRUE;
-}
-
-static bool
-thread_unref_msgid(struct mail_thread_update_context *ctx,
-                  uint32_t ref_uid, uint32_t parent_idx,
-                  const char *child_msgid, uint32_t child_idx)
+mail_thread_unref_link(struct mail_thread_cache *cache,
+                      uint32_t parent_idx, uint32_t child_idx)
 {
        struct mail_thread_node *parent, *child;
 
-       parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx);
-       if (parent->parent_unref_rebuilds)
+       parent = array_idx_modifiable(&cache->thread_nodes, parent_idx);
+       if (parent->child_unref_rebuilds)
                return FALSE;
 
-       child = mail_hash_lookup_idx(ctx->hash_trans, child_idx);
-       if (child->parent_link_refcount == 0) {
-               mail_hash_transaction_set_corrupted(ctx->hash_trans,
-                                                   "unexpected refcount=0");
-               return FALSE;
-       }
+       child = array_idx_modifiable(&cache->thread_nodes, child_idx);
+       i_assert(child->parent_link_refcount > 0);
+
        child->parent_link_refcount--;
        if (child->parent_link_refcount == 0) {
                /* we don't have a root anymore */
                child->parent_idx = 0;
        }
-
-       if (child->uid_or_id == ref_uid &&
-           child->ref_index != MAIL_INDEX_NODE_REF_EXT) {
-               child->uid_or_id =
-                       mail_thread_list_add(ctx->thread_list_ctx, child_msgid);
-               child->ref_index = MAIL_INDEX_NODE_REF_EXT;
-       }
-       mail_hash_update(ctx->hash_trans, child_idx);
-
-       if (parent->uid_or_id == ref_uid &&
-           parent->ref_index != MAIL_INDEX_NODE_REF_EXT) {
-               parent->uid_or_id =
-                       mail_thread_list_add(ctx->thread_list_ctx, child_msgid);
-               parent->ref_index = MAIL_INDEX_NODE_REF_EXT;
-               mail_hash_update(ctx->hash_trans, parent_idx);
-       }
        return TRUE;
 }
 
-static bool
-thread_unref_links(struct mail_thread_update_context *ctx, uint32_t ref_uid,
-                  const char *last_child_msgid, uint32_t last_child_idx,
-                  const char *references, bool *valid_r)
-{
-       const char *msgid;
-       uint32_t parent_idx, child_idx;
-
-       /* tmp_mail may be changed below, so we have to duplicate the
-          references string */
-       references = t_strdup(references);
-       *valid_r = FALSE;
-
-       msgid = message_id_get_next(&references);
-       if (msgid == NULL)
-               return TRUE;
-       if (!thread_msgid_lookup(ctx, msgid, &parent_idx))
-               return FALSE;
-       *valid_r = TRUE;
-
-       while ((msgid = message_id_get_next(&references)) != NULL) {
-               if (!thread_msgid_lookup(ctx, msgid, &child_idx) ||
-                   !thread_unref_msgid(ctx, ref_uid, parent_idx,
-                                       msgid, child_idx))
-                       return FALSE;
-               parent_idx = child_idx;
-       }
-       return thread_unref_msgid(ctx, ref_uid, parent_idx,
-                                 last_child_msgid, last_child_idx);
-}
-
-int mail_thread_remove(struct mail_thread_update_context *ctx, uint32_t uid)
+bool mail_thread_remove(struct mail_thread_cache *cache,
+                       const struct mail_index_strmap_rec *msgid_map,
+                       unsigned int *msgid_map_idx)
 {
-       struct mail_hash_header *hdr;
        struct mail_thread_node *node;
-       const char *msgid, *references, *in_reply_to;
        uint32_t idx, parent_idx;
-       bool have_refs;
+       unsigned int count = 1;
 
-       if (!mail_thread_node_lookup(ctx, uid, &idx, &msgid, &node))
-               return 0;
-       if (node->expunge_rebuilds)
-               return 0;
-
-       if (mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES,
-                                 &references) < 0)
-               return -1;
+       idx = msgid_map->str_idx;
+       i_assert(idx != 0);
 
-       if (!thread_unref_links(ctx, uid, msgid, idx, references, &have_refs))
-               return 0;
-       if (!have_refs) {
-               /* no valid IDs in References:, use In-Reply-To: instead */
-               if (mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO,
-                                         &in_reply_to) < 0)
-                       return -1;
-               in_reply_to = message_id_get_next(&in_reply_to);
-               if (in_reply_to != NULL &&
-                   (!thread_msgid_lookup(ctx, in_reply_to, &parent_idx) ||
-                    !thread_unref_msgid(ctx, uid, parent_idx,
-                                        in_reply_to, idx)))
-                       return 0;
+       if (msgid_map->uid > cache->last_uid) {
+               /* this message was never added to the cache, skip */
+               return TRUE;
        }
 
-       /* get the node again, the pointer may have changed */
-       node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-
-       node->uid_or_id = mail_thread_list_add(ctx->thread_list_ctx, msgid);
-       node->ref_index = MAIL_INDEX_NODE_REF_EXT;
-       mail_hash_update(ctx->hash_trans, idx);
+       node = array_idx_modifiable(&cache->thread_nodes, idx);
+       if (node->expunge_rebuilds) {
+               /* this catches the duplicate message-id case */
+               return FALSE;
+       }
+       i_assert(node->uid == msgid_map->uid);
+
+       /* update link refcounts */
+       if (msgid_map[count].uid == node->uid) {
+               parent_idx = msgid_map[count].str_idx;
+               count++;
+               while (msgid_map[count].uid == node->uid) {
+                       if (!mail_thread_unref_link(cache, parent_idx,
+                                                   msgid_map[count].str_idx))
+                               return FALSE;
+                       parent_idx = msgid_map[count].str_idx;
+                       count++;
+               }
+               if (!mail_thread_unref_link(cache, parent_idx, idx))
+                       return FALSE;
+       }
+       /* mark this message as expunged */
+       node->uid = 0;
 
-       hdr = mail_hash_get_header(ctx->hash_trans);
-       hdr->message_count--;
-       return 1;
+       /* we don't know (and don't want to waste time figuring out) if other
+          messages point to this removed message, so don't delete the node */
+       *msgid_map_idx += count;
+       return TRUE;
 }
index 42296e44cca80dbf7e3196f90e9f654bb178113f..4b49d6e0ba964cad7839397eea6c132448bfb2e6 100644 (file)
@@ -2,50 +2,34 @@
 #define INDEX_THREAD_PRIVATE_H
 
 struct index_mailbox;
-struct mail_thread_list_context;
 
 #include "crc32.h"
-#include "mail-hash.h"
 #include "mail-thread.h"
+#include "mail-index-strmap.h"
 
 #define MAIL_THREAD_INDEX_SUFFIX ".thread"
 
+/* After initially building the index, assign first_invalid_msgid_idx to
+   the next unused index + SKIP_COUNT. When more messages are added and
+   the next valid msgid conflicts with the first invalid msgid, the invalid
+   msgids will be moved forward again this many indexes. */
+#define THREAD_INVALID_MSGID_STR_IDX_SKIP_COUNT \
+       (4096 / sizeof(struct mail_thread_node))
+
 #define HDR_MESSAGE_ID "message-id"
 #define HDR_IN_REPLY_TO "in-reply-to"
 #define HDR_REFERENCES "references"
 #define HDR_SUBJECT "subject"
 
-struct msgid_search_key {
-       const char *msgid;
-       uint32_t msgid_crc32;
-};
+#define MAIL_THREAD_NODE_REF_MSGID 0
+#define MAIL_THREAD_NODE_REF_INREPLYTO 1
+#define MAIL_THREAD_NODE_REF_REFERENCES1 2
 
 struct mail_thread_node {
-       struct mail_hash_record rec;
-
-       /* CRC32 checksum of this node's message ID (except CRC32 0 -> 1).
-          If there are more nodes with the same checksum, use uid and
-          ref_index fields to find the original string. */
-       uint32_t msgid_crc32;
-       /* ref_index=0: ID in external thread IDs list (beginning from 1)
-          ref_index=1: UID of the mail whose Message-ID: header points to
-            this node.
-          ref_index>1: UID of some message which has references to this
-            node. ref_index spcifies how the reference string is found. */
-       uint32_t uid_or_id;
-       /* Index for this node's parent node */
-       unsigned int parent_idx:27;
-       /* 0 = uid contains the index to the wanted Message ID in external
-              thread IDs list
-          1 = Message-ID: header's first valid ID
-          2 = In-Reply-To: header's first valid ID
-          3 = References: header's last ID with the same CRC32
-          4 = References: header's second last ID with the same CRC32
-            (i.e. one non-matching ID already had a CRC32 collision)
-          ..
-          31 = References: header's 29th last ID with the same CRC32
-       */
-       unsigned int ref_index:5;
+       /* UID of the message, or 0 for dummy nodes */
+       uint32_t uid;
+       /* Index for this node's parent node, 0 = this is root */
+       uint32_t parent_idx;
        /* Number of messages containing "this message" -> "parent message"
           link, i.e. "number of links to parent node". However since parents
           can change, not all of these references might be from our current
@@ -56,34 +40,24 @@ struct mail_thread_node {
        unsigned int expunge_rebuilds:1;
        /* If a link between this node and its child gets unreferenced,
           rebuild the thread tree. */
-       unsigned int parent_unref_rebuilds:1;
+       unsigned int child_unref_rebuilds:1;
 };
-#define MAIL_INDEX_NODE_REF_EXT 0
-#define MAIL_INDEX_NODE_REF_MSGID 1
-#define MAIL_INDEX_NODE_REF_INREPLYTO 2
-#define MAIL_INDEX_NODE_REF_REFERENCES_LAST 3
-#define MAIL_INDEX_NODE_REF_MAX_VALUE 31
-
-#define MAIL_INDEX_NODE_EXISTS(node) \
-       ((node)->ref_index == MAIL_INDEX_NODE_REF_MSGID)
-
-struct mail_thread_update_context {
-       struct mail *tmp_mail;
-
-       struct mail_hash *hash;
-       struct mail_hash_transaction *hash_trans;
-       struct mail_thread_list_update_context *thread_list_ctx;
-
-       /* Hash record idx -> Message-ID */
-       ARRAY_DEFINE(msgid_cache, const char *);
-       pool_t msgid_pool;
-
-       unsigned int cmp_match_count;
-       uint32_t cmp_last_idx;
-
-       unsigned int failed:1;
-       unsigned int rebuild:1;
-       unsigned int syncing:1;
+ARRAY_DEFINE_TYPE(mail_thread_node, struct mail_thread_node);
+#define MAIL_THREAD_NODE_EXISTS(node) \
+       ((node)->uid != 0)
+
+struct mail_thread_cache {
+       uint32_t last_uid;
+       /* indexes used for invalid Message-IDs. that means no other messages
+          point to them and they can safely be moved around whenever
+          necessary. */
+       uint32_t first_invalid_msgid_str_idx;
+       uint32_t next_invalid_msgid_str_idx;
+
+       struct mail_search_result *search_result;
+
+       /* indexed by mail_index_strmap_rec.str_idx */
+       ARRAY_TYPE(mail_thread_node) thread_nodes;
 };
 
 static inline uint32_t crc32_str_nonzero(const char *str)
@@ -92,32 +66,19 @@ static inline uint32_t crc32_str_nonzero(const char *str)
        return value == 0 ? 1 : value;
 }
 
-int mail_thread_add(struct mail_thread_update_context *ctx, struct mail *mail);
-int mail_thread_remove(struct mail_thread_update_context *ctx, uint32_t seq);
+void mail_thread_add(struct mail_thread_cache *cache,
+                    const struct mail_index_strmap_rec *msgid_map,
+                    unsigned int *msgid_map_idx);
+bool mail_thread_remove(struct mail_thread_cache *cache,
+                       const struct mail_index_strmap_rec *msgid_map,
+                       unsigned int *msgid_map_idx);
 
 struct mail_thread_iterate_context *
-mail_thread_iterate_init_full(struct mail *tmp_mail,
-                             struct mail_hash_transaction *hash_trans,
-                             struct mail_thread_list_context *hash_list_ctx,
+mail_thread_iterate_init_full(struct mail_thread_cache *cache,
+                             struct mail *tmp_mail,
                              enum mail_thread_type thread_type,
                              bool return_seqs);
 
 void index_thread_mailbox_index_opened(struct index_mailbox *ibox);
 
-struct mail_thread_list_context *
-mail_thread_list_init(struct mailbox *box);
-void mail_thread_list_deinit(struct mail_thread_list_context **ctx);
-
-struct mail_thread_list_update_context *
-mail_thread_list_update_begin(struct mail_thread_list_context *ctx,
-                             struct mail_hash_transaction *hash_trans);
-int mail_thread_list_lookup(struct mail_thread_list_update_context *ctx,
-                           uint32_t id, const char **msgid_r);
-uint32_t mail_thread_list_add(struct mail_thread_list_update_context *ctx,
-                             const char *msgid);
-void mail_thread_list_remove(struct mail_thread_list_update_context *ctx,
-                            uint32_t id);
-int mail_thread_list_commit(struct mail_thread_list_update_context **ctx);
-void mail_thread_list_rollback(struct mail_thread_list_update_context **ctx);
-
 #endif
index e608c1cb5933deee869dbbd4b8a5a948a55c50ab..ca1368e672e71f0780d982bb6b1b71d1e784980c 100644 (file)
@@ -4,41 +4,48 @@
 
 #include "lib.h"
 #include "array.h"
+#include "bsearch-insert-pos.h"
+#include "hash2.h"
 #include "message-id.h"
 #include "mail-index-private.h"
 #include "mail-index-sync-private.h"
 #include "mail-search.h"
 #include "mail-search-build.h"
+#include "mailbox-search-result-private.h"
 #include "index-storage.h"
 #include "index-thread-private.h"
 
+#include <stdlib.h>
+
 #define MAIL_THREAD_CONTEXT(obj) \
        MODULE_CONTEXT(obj, mail_thread_storage_module)
 
-/* how much memory to allocate initially. these are very rough
-   approximations. */
-#define APPROX_MSG_EXTRA_COUNT 10
-#define APPROX_MSGID_SIZE 45
-
 struct mail_thread_context {
-       struct mail_thread_update_context thread_ctx;
-
        struct mailbox *box;
        struct mailbox_transaction_context *t;
+       struct mail_index_strmap_view_sync *strmap_sync;
 
-       struct mail_search_context *search;
+       struct mail *tmp_mail;
        struct mail_search_args *search_args;
-       struct mail_search_arg tmp_search_arg;
+       ARRAY_TYPE(seq_range) added_uids;
+
+       unsigned int failed:1;
 };
 
 struct mail_thread_mailbox {
        union mailbox_module_context module_ctx;
-       struct mail_hash *hash;
 
-       struct mail_thread_list_context *list_ctx;
+       unsigned int next_msgid_idx;
+       struct mail_thread_cache *cache;
+
+       struct mail_index_strmap *strmap;
+       struct mail_index_strmap_view *strmap_view;
+       /* sorted by UID, ref_index */
+       const ARRAY_TYPE(mail_index_strmap_rec) *msgid_map;
+       const struct hash2_table *msgid_hash;
 
        /* set only temporarily while needed */
-       struct mail_thread_update_context *ctx;
+       struct mail_thread_context *ctx;
 };
 
 static MODULE_CONTEXT_DEFINE_INIT(mail_thread_storage_module,
@@ -57,402 +64,481 @@ bool mail_thread_type_parse(const char *str, enum mail_thread_type *type_r)
        return TRUE;
 }
 
-static unsigned int mail_thread_hash_key(const void *key)
-{
-       const struct msgid_search_key *key_rec = key;
-
-       return key_rec->msgid_crc32;
-}
-
-static const char *
-mail_thread_nth_last_refid_crc32(const char *msgids, unsigned int ref_index,
-                                uint32_t msgid_crc32)
-{
-       const unsigned int idx_from_end =
-               ref_index - MAIL_INDEX_NODE_REF_REFERENCES_LAST + 1;
-       const char **last_msgids, *msgid;
-       unsigned int idx = 0;
-
-       last_msgids = t_new(const char *, idx_from_end);
-       while ((msgid = message_id_get_next(&msgids)) != NULL) {
-               if (crc32_str_nonzero(msgid) == msgid_crc32) {
-                       last_msgids[idx % idx_from_end] = msgid;
-                       idx++;
-               }
-       }
-       return last_msgids[idx % idx_from_end];
-}
-
-static const char *
-mail_thread_node_get_msgid(struct mail_hash_transaction *trans,
-                          struct mail_thread_update_context *ctx,
-                          const struct mail_thread_node *node, uint32_t idx)
+static int
+mail_strmap_rec_get_msgid(struct mail_thread_context *ctx,
+                         const struct mail_index_strmap_rec *rec,
+                         const char **msgid_r)
 {
-       const char *msgids, *msgid = NULL, **p;
+       const char *orig_msgids;
+       const char *msgids = NULL, *msgid;
+       unsigned int n = 0;
        int ret;
 
-       if (node->ref_index == MAIL_INDEX_NODE_REF_EXT) {
-               if (mail_thread_list_lookup(ctx->thread_list_ctx,
-                                           node->uid_or_id, &msgid) < 0)
-                       return NULL;
-               if (msgid == NULL) {
-                       mail_hash_transaction_set_corrupted(trans,
-                               "Referenced list msgid lost");
-               }
-               return msgid;
-       }
+       ret = mail_set_uid(ctx->tmp_mail, rec->uid);
+       if (ret <= 0)
+               return ret;
 
-       p = array_idx_modifiable(&ctx->msgid_cache, idx);
-       if (*p != NULL)
-               return *p;
-
-       ret = mail_set_uid(ctx->tmp_mail, node->uid_or_id);
-       if (ret <= 0) {
-               if (ret == 0) {
-                       mail_hash_transaction_set_corrupted(trans,
-                               t_strdup_printf("Referenced UID %u lost",
-                                               node->uid_or_id));
-               }
-               return NULL;
-       }
-       switch (node->ref_index) {
-       case MAIL_INDEX_NODE_REF_MSGID:
+       switch (rec->ref_index) {
+       case MAIL_THREAD_NODE_REF_MSGID:
                /* Message-ID: header */
-               if (mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID,
-                                         &msgids) < 0)
-                       return NULL;
-               msgid = message_id_get_next(&msgids);
+               ret = mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID,
+                                           &msgids);
                break;
-       case MAIL_INDEX_NODE_REF_INREPLYTO:
+       case MAIL_THREAD_NODE_REF_INREPLYTO:
                /* In-Reply-To: header */
-               if (mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO,
-                                         &msgids) < 0)
-                       return NULL;
-               msgid = message_id_get_next(&msgids);
+               ret = mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO,
+                                           &msgids);
                break;
        default:
                /* References: header */
-               if (mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES,
-                                         &msgids) < 0)
-                       return NULL;
-               msgid = mail_thread_nth_last_refid_crc32(msgids,
-                                                        node->ref_index,
-                                                        node->msgid_crc32);
+               ret = mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES,
+                                           &msgids);
+               n = rec->ref_index - MAIL_THREAD_NODE_REF_REFERENCES1;
                break;
        }
 
+       if (ret < 0) {
+               if (ctx->tmp_mail->expunged) {
+                       /* treat it as if it didn't exist. trying to add it
+                          again will result in failure. */
+                       return 0;
+               }
+               return -1;
+       }
+
+       /* get the nth message-id */
+       orig_msgids = msgids;
+       msgid = message_id_get_next(&msgids);
+       if (msgid != NULL) {
+               for (; n > 0 && *msgids != '\0'; n--)
+                       msgid = message_id_get_next(&msgids);
+       }
+
        if (msgid == NULL) {
                /* shouldn't have happened */
-               mail_hash_transaction_set_corrupted(trans, "Message ID lost");
-               return NULL;
+               mail_storage_set_critical(ctx->box->storage,
+                                         "Threading lost Message ID");
+               return -1;
        }
-
-       *p = p_strdup(ctx->msgid_pool, msgid);
-       return *p;
+       *msgid_r = msgid;
+       return 1;
 }
 
-static bool mail_thread_hash_cmp(struct mail_hash_transaction *trans,
-                                const void *key, uint32_t idx, void *context)
+static bool
+mail_thread_hash_key_cmp(const char *key,
+                        const struct mail_index_strmap_rec *rec,
+                        void *context)
 {
-       const struct msgid_search_key *key_rec = key;
        struct mail_thread_mailbox *tbox = context;
-       struct mail_thread_update_context *ctx = tbox->ctx;
-       const struct mail_thread_node *node;
+       struct mail_thread_context *ctx = tbox->ctx;
        const char *msgid;
-       bool ret;
-
-       node = mail_hash_lookup_idx(trans, idx);
-       if (key_rec->msgid_crc32 != node->msgid_crc32)
-               return FALSE;
-
-       ctx->cmp_match_count++;
-       ctx->cmp_last_idx = idx;
+       bool cmp_ret;
+       int ret;
 
        /* either a match or a collision, need to look closer */
        T_BEGIN {
-               msgid = mail_thread_node_get_msgid(trans, ctx, node, idx);
-               if (msgid != NULL)
-                       ret = strcmp(msgid, key_rec->msgid) == 0;
-               else {
-                       /* we couldn't figure out the Message-ID for whatever
-                          reason. we'll need to fallback to rebuilding the
-                          whole thread. */
-                       ctx->rebuild = TRUE;
-                       ret = FALSE;
+               ret = mail_strmap_rec_get_msgid(ctx, rec, &msgid);
+               if (ret <= 0) {
+                       if (ret < 0)
+                               ctx->failed = TRUE;
+                       cmp_ret = FALSE;
+               } else {
+                       cmp_ret = strcmp(msgid, key) == 0;
                }
        } T_END;
-       return ret;
+       return cmp_ret;
 }
 
-static unsigned int mail_thread_hash_rec(const void *p)
+static int
+mail_thread_hash_rec_cmp(const struct mail_index_strmap_rec *rec1,
+                        const struct mail_index_strmap_rec *rec2,
+                        void *context)
 {
-       const struct mail_thread_node *rec = p;
+       struct mail_thread_mailbox *tbox = context;
+       struct mail_thread_context *ctx = tbox->ctx;
+       const char *msgid1, *msgid2;
+       int ret;
 
-       return rec->msgid_crc32;
+       T_BEGIN {
+               ret = mail_strmap_rec_get_msgid(ctx, rec1, &msgid1);
+               if (ret > 0) {
+                       msgid1 = t_strdup(msgid1);
+                       ret = mail_strmap_rec_get_msgid(ctx, rec2, &msgid2);
+               }
+               ret = ret <= 0 ? -1 :
+                       strcmp(msgid1, msgid2) == 0;
+       } T_END;
+       return ret;
 }
 
-static int
-mail_thread_hash_remap(struct mail_hash_transaction *trans,
-                      const uint32_t *map, unsigned int map_size,
-                      void *context ATTR_UNUSED)
+static void mail_thread_strmap_remap(const uint32_t *idx_map,
+                                    unsigned int old_count,
+                                    unsigned int new_count, void *context)
 {
-       const struct mail_hash_header *hdr;
+       struct mail_thread_mailbox *tbox = context;
+       struct mail_thread_cache *cache = tbox->cache;
+       ARRAY_TYPE(mail_thread_node) new_nodes;
+       const struct mail_thread_node *old_nodes;
        struct mail_thread_node *node;
-       uint32_t idx;
+       unsigned int i, nodes_count, max, new_first_invalid, invalid_count;
 
-       hdr = mail_hash_get_header(trans);
-       for (idx = 1; idx < hdr->record_count; idx++) {
-               node = mail_hash_lookup_idx(trans, idx);
-               i_assert(!MAIL_HASH_RECORD_IS_DELETED(&node->rec));
+       if (cache->search_result == NULL)
+               return;
 
-               if (node->parent_idx >= map_size) {
-                       mail_hash_transaction_set_corrupted(trans,
-                               "invalid parent_idx");
-                       return -1;
+       if (new_count == 0) {
+               /* strmap was reset, we'll need to rebuild thread */
+               mailbox_search_result_free(&cache->search_result);
+               return;
+       }
+
+       invalid_count = cache->next_invalid_msgid_str_idx -
+               cache->first_invalid_msgid_str_idx;
+
+       old_nodes = array_get(&cache->thread_nodes, &nodes_count);
+       i_array_init(&new_nodes, new_count + invalid_count + 32);
+
+       /* renumber existing valid nodes. all existing records in old_nodes
+          should also exist in idx_map since we've removed expunged messages
+          from the cache before committing the sync. */
+       max = I_MIN(I_MIN(old_count, nodes_count),
+                   cache->first_invalid_msgid_str_idx);
+       for (i = 0; i < max; i++) {
+               if (idx_map[i] == 0) {
+                       /* expunged record. */
+                       i_assert(old_nodes[i].uid == 0);
+               } else {
+                       node = array_idx_modifiable(&new_nodes, idx_map[i]);
+                       *node = old_nodes[i];
+                       if (node->parent_idx != 0) {
+                               node->parent_idx = idx_map[node->parent_idx];
+                               i_assert(node->parent_idx != 0);
+                       }
                }
+       }
 
-               node->parent_idx = map[node->parent_idx];
+       /* copy invalid nodes, if any. no other messages point to them,
+          so this is safe. */
+       new_first_invalid = new_count + 1 +
+               THREAD_INVALID_MSGID_STR_IDX_SKIP_COUNT;
+       array_copy(&new_nodes.arr, new_first_invalid, &cache->thread_nodes.arr,
+                  cache->first_invalid_msgid_str_idx, invalid_count);
+       cache->first_invalid_msgid_str_idx = new_first_invalid;
+       cache->next_invalid_msgid_str_idx = new_first_invalid + invalid_count;
+
+       /* replace the old nodes with the renumbered ones */
+       array_free(&cache->thread_nodes);
+       cache->thread_nodes = new_nodes;
+}
+
+static int thread_get_mail_header(struct mail *mail, const char *name,
+                                 const char **value_r)
+{
+       if (mail_get_first_header(mail, name, value_r) < 0) {
+               if (!mail->expunged)
+                       return -1;
+
+               /* Message is expunged. Instead of failing the entire THREAD
+                  command, just treat the header as non-existing. */
+               *value_r = NULL;
        }
        return 0;
 }
 
-static bool
-mail_thread_try_use_hash(struct mail_thread_context *ctx,
-                        struct mail_hash *hash,
-                        const struct mailbox_status *status, bool reset)
+static int
+mail_thread_map_add_mail(struct mail_thread_context *ctx, struct mail *mail)
 {
-       struct mail_search_args *search_args = ctx->search_args;
-       struct mail_search_arg *limit_arg = NULL;
-       const struct mail_hash_header *hdr;
-       struct mail_hash_transaction *hash_trans;
-       uint32_t last_seq, last_uid, seq1, seq2;
-       bool can_use = TRUE, shared_lock = FALSE;
-       int try, ret;
-
-       last_seq = status->messages;
-       last_uid = status->uidnext - 1;
-
-       /* Each search condition requires their own separate thread index.
-          Pretty much all the clients use only "search all" threading, so
-          we don't need to worry about anything else. */
-       if (search_args->args->next != NULL) {
-               /* too difficult to figure out if we could optimize this.
-                  we most likely couldn't. */
-               return FALSE;
-       } else if (search_args->args->type == SEARCH_ALL) {
-               /* optimize */
-       } else if (search_args->args->type == SEARCH_SEQSET) {
-               const struct seq_range *range;
-               unsigned int count;
-
-               range = array_get(&search_args->args->value.seqset, &count);
-               if (count == 1 && range[0].seq1 == 1) {
-                       /* If we're searching 1..n, we might be able to
-                          optimize this. This is at least useful for testing
-                          incremental index updates if nothing else. :) */
-                       last_seq = range[0].seq2;
-                       last_uid = 0;
-               } else {
-                       return FALSE;
-               }
+       const char *message_id, *in_reply_to, *references, *msgid;
+       uint32_t ref_index;
+
+       if (thread_get_mail_header(mail, HDR_MESSAGE_ID, &message_id) < 0 ||
+           thread_get_mail_header(mail, HDR_REFERENCES, &references) < 0)
+               return -1;
+
+       /* add Message-ID: */
+       msgid = message_id_get_next(&message_id);
+       if (msgid != NULL) {
+               mail_index_strmap_view_sync_add(ctx->strmap_sync, mail->uid,
+                                               MAIL_THREAD_NODE_REF_MSGID,
+                                               msgid);
        } else {
-               return FALSE;
+               mail_index_strmap_view_sync_add_unique(ctx->strmap_sync,
+                                       mail->uid, MAIL_THREAD_NODE_REF_MSGID);
        }
 
-       for (try = 0;; try++) {
-               if ((ret = mail_hash_lock_shared(hash)) < 0)
-                       return FALSE;
-               if (ret > 0)
-                       break;
-               if (try == 5) {
-                       /* enough tries */
-                       return FALSE;
-               }
+       /* add References: if there are any valid ones */
+       msgid = message_id_get_next(&references);
+       if (msgid != NULL) {
+               ref_index = MAIL_THREAD_NODE_REF_REFERENCES1;
+               do {
+                       mail_index_strmap_view_sync_add(ctx->strmap_sync,
+                                                       mail->uid,
+                                                       ref_index, msgid);
+                       ref_index++;
+                       msgid = message_id_get_next(&references);
+               } while (msgid != NULL);
+       } else {
+               /* no References:, use In-Reply-To: */
+               if (thread_get_mail_header(mail, HDR_IN_REPLY_TO,
+                                          &in_reply_to) < 0)
+                       return -1;
 
-               /* doesn't exist, create a new hash */
-               if ((ret = mail_hash_create_excl_locked(hash)) < 0)
-                       return FALSE;
-               if (ret > 0) {
-                       ctx->thread_ctx.hash_trans =
-                               mail_hash_transaction_begin(hash,
-                                                           status->messages);
-                       return TRUE;
+               msgid = message_id_get_next(&in_reply_to);
+               if (msgid != NULL) {
+                       mail_index_strmap_view_sync_add(ctx->strmap_sync,
+                               mail->uid, MAIL_THREAD_NODE_REF_INREPLYTO,
+                               msgid);
                }
        }
+       if (ctx->failed) {
+               /* message-id lookup failed in hash compare */
+               return -1;
+       }
+       return 0;
+}
 
-again:
-       hash_trans = mail_hash_transaction_begin(hash, status->messages);
-       hdr = mail_hash_get_header(hash_trans);
-       if (reset)
-               mail_hash_reset(hash_trans);
-       else if (hdr->last_uid > last_uid) {
-               /* thread index is newer than our current mailbox view,
-                  can't optimize */
-               can_use = FALSE;
-       } else if (hdr->message_count > last_seq) {
-               /* messages have been expunged, but not removed from
-                  the thread index. we don't know their Message-IDs
-                  anymore, so we have to rebuild the index. */
-               mail_hash_reset(hash_trans);
-       } else if (hdr->message_count > 0) {
-               /* non-empty hash. add only the new messages in there. */
-               mailbox_get_seq_range(ctx->box, 1, hdr->last_uid, &seq1, &seq2);
-
-               if (seq2 != hdr->message_count ||
-                   hdr->uid_validity != status->uidvalidity) {
-                       /* some messages have been expunged. have to rebuild. */
-                       mail_hash_reset(hash_trans);
-               } else {
-                       /* after all these checks, this is the only case we
-                          can actually optimize. */
-                       struct mail_search_arg *arg = &ctx->tmp_search_arg;
-
-                       arg->type = SEARCH_SEQSET;
-                       p_array_init(&arg->value.seqset, search_args->pool, 1);
-                       if (seq2 == last_seq) {
-                               /* no need to update the index,
-                                  search nothing */
-                               shared_lock = TRUE;
-                       } else {
-                               /* search next+1..n */
-                               seq_range_array_add_range(&arg->value.seqset,
-                                                         seq2 + 1, last_seq);
-                       }
-                       limit_arg = &ctx->tmp_search_arg;
-               }
-       } else {
-               /* empty hash - make sure anyway that it gets reset */
-               mail_hash_reset(hash_trans);
+static int mail_thread_index_map_build(struct mail_thread_context *ctx)
+{
+       static const char *wanted_headers[] = {
+               HDR_MESSAGE_ID, HDR_IN_REPLY_TO, HDR_REFERENCES,
+               NULL
+       };
+       struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(ctx->box);
+       struct mailbox_header_lookup_ctx *headers_ctx;
+       struct mail_search_args *search_args;
+       struct mail_search_context *search_ctx;
+       struct mail *mail;
+       uint32_t last_uid, seq1, seq2;
+       int ret = 0;
+
+       if (tbox->strmap_view == NULL) {
+               /* first time we're threading this mailbox */
+               struct index_mailbox *ibox = (struct index_mailbox *)ctx->box;
+
+               tbox->strmap_view =
+                       mail_index_strmap_view_open(tbox->strmap, ibox->view,
+                                                   mail_thread_hash_key_cmp,
+                                                   mail_thread_hash_rec_cmp,
+                                                   mail_thread_strmap_remap,
+                                                   tbox, &tbox->msgid_map,
+                                                   &tbox->msgid_hash);
+       }
+
+       ctx->t = mailbox_transaction_begin(ctx->box, 0);
+       headers_ctx = mailbox_header_lookup_init(ctx->box, wanted_headers);
+       ctx->tmp_mail = mail_alloc(ctx->t, 0, headers_ctx);
+
+       /* add all missing UIDs */
+       ctx->strmap_sync = mail_index_strmap_view_sync_init(tbox->strmap_view,
+                                                           &last_uid);
+       mailbox_get_seq_range(ctx->box, last_uid + 1, (uint32_t)-1,
+                             &seq1, &seq2);
+       if (seq1 == 0) {
+               /* nothing is missing */
+               mailbox_header_lookup_unref(&headers_ctx);
+               return 0;
        }
 
-       if (can_use && !shared_lock) {
-               mail_hash_transaction_end(&hash_trans);
-               mail_hash_unlock(hash);
-               if (mail_hash_lock_exclusive(hash,
-                               MAIL_HASH_LOCK_FLAG_CREATE_MISSING) <= 0)
-                       return FALSE;
-               shared_lock = TRUE;
-               limit_arg = NULL;
-               goto again;
+       search_args = mail_search_build_init();
+       mail_search_build_add_seqset(search_args, seq1, seq2);
+       search_ctx = mailbox_search_init(ctx->t, search_args, NULL);
+
+       mail = mail_alloc(ctx->t, 0, headers_ctx);
+       mailbox_header_lookup_unref(&headers_ctx);
+
+       while (mailbox_search_next(search_ctx, mail) > 0) {
+               if (mail_thread_map_add_mail(ctx, mail) < 0) {
+                       ret = -1;
+                       break;
+               }
        }
-       if (!can_use) {
-               mail_hash_transaction_end(&hash_trans);
-               mail_hash_unlock(hash);
+       mail_free(&mail);
+       if (mailbox_search_deinit(&search_ctx) < 0)
+               ret = -1;
+
+       if (ret < 0)
+               mail_index_strmap_view_sync_rollback(&ctx->strmap_sync);
+       else
+               mail_index_strmap_view_sync_commit(&ctx->strmap_sync);
+       return ret;
+}
+
+static int msgid_map_cmp(const void *key, const void *value)
+{
+       const uint32_t *uid = key;
+       const struct mail_index_strmap_rec *rec = value;
+
+       return *uid < rec->uid ? -1 :
+               (*uid > rec->uid ? 1 : 0);
+}
+
+static bool mail_thread_cache_update_removes(struct mail_thread_mailbox *tbox,
+                                            ARRAY_TYPE(seq_range) *added_uids)
+{
+       struct mail_thread_cache *cache = tbox->cache;
+       ARRAY_TYPE(seq_range) removed_uids;
+       const struct seq_range *uids;
+       const struct mail_index_strmap_rec *msgid_map;
+       unsigned int i, j, idx, map_count, uid_count;
+       uint32_t uid;
+
+       t_array_init(&removed_uids, 64);
+       mailbox_search_result_sync(cache->search_result,
+                                  &removed_uids, added_uids);
+
+       /* first check that we're not inserting any messages in the middle */
+       uids = array_get(added_uids, &uid_count);
+       if (uid_count > 0 && uids[0].seq1 <= cache->last_uid)
                return FALSE;
-       } else {
-               ctx->thread_ctx.hash_trans = hash_trans;
-               if (limit_arg != NULL) {
-                       limit_arg->next = search_args->args;
-                       search_args->args = limit_arg;
+
+       /* next remove messages so we'll see early if we have to rebuild.
+          we expect to find all removed UIDs from msgid_map that are <= max
+          UID in msgid_map */
+       msgid_map = array_get(tbox->msgid_map, &map_count);
+       uids = array_get(&removed_uids, &uid_count);
+       for (i = j = 0; i < uid_count; i++) {
+               /* find and remove from the map */
+               bsearch_insert_pos(&uids[i].seq1, &msgid_map[j], map_count - j,
+                                  sizeof(*msgid_map), msgid_map_cmp, &idx);
+               j += idx;
+               if (j == map_count) {
+                       /* all removals after this are about messages we never
+                          even added to the cache */
+                       i_assert(uids[i].seq1 > cache->last_uid);
+                       break;
+               }
+               while (j > 0 && msgid_map[j-1].uid == msgid_map[j].uid)
+                       j--;
+
+               /* remove the messages from cache */
+               for (uid = uids[i].seq1; uid <= uids[i].seq2; uid++) {
+                       i_assert(msgid_map[j].uid == uid);
+                       if (!mail_thread_remove(cache, msgid_map + j, &j))
+                               return FALSE;
                }
-               return TRUE;
        }
+       return TRUE;
 }
 
-static void
-mail_thread_update_init(struct mail_thread_context *ctx, bool reset)
+static void mail_thread_cache_update_adds(struct mail_thread_mailbox *tbox,
+                                         ARRAY_TYPE(seq_range) *added_uids)
 {
-       struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(ctx->box);
-       struct mail_hash *hash = NULL;
-       struct mailbox_status status;
-       const struct mail_hash_header *hdr;
-       unsigned int count;
-
-       mailbox_get_status(ctx->box, STATUS_MESSAGES | STATUS_UIDNEXT, &status);
-       if (mail_thread_try_use_hash(ctx, tbox->hash, &status, reset))
-               hash = tbox->hash;
-       else {
-               /* fallback to using in-memory hash */
-               struct index_mailbox *ibox = (struct index_mailbox *)ctx->box;
+       struct mail_thread_cache *cache = tbox->cache;
+       const struct seq_range *uids;
+       const struct mail_index_strmap_rec *msgid_map;
+       unsigned int i, j, map_count, uid_count;
+       uint32_t uid;
 
-               hash = mail_hash_alloc(ibox->index, NULL,
-                                      sizeof(struct mail_thread_node),
-                                      mail_thread_hash_key,
-                                      mail_thread_hash_rec,
-                                      mail_thread_hash_cmp,
-                                      mail_thread_hash_remap,
-                                      tbox);
-               if (mail_hash_lock_exclusive(hash,
-                               MAIL_HASH_LOCK_FLAG_CREATE_MISSING) <= 0)
-                       i_unreached();
-               ctx->thread_ctx.hash_trans =
-                       mail_hash_transaction_begin(hash, 0);
+       /* everything removed successfully, add the new messages. all of them
+          should already be in msgid_map. */
+       msgid_map = array_get(tbox->msgid_map, &map_count);
+       uids = array_get(added_uids, &uid_count);
+       i_assert(uid_count == 0 || msgid_map[j].uid <= uids[0].seq1);
+       for (i = 0; i < uid_count; i++) {
+               for (uid = uids[i].seq1; uid <= uids[i].seq2; uid++) {
+                       while (j < map_count && msgid_map[j].uid < uid)
+                               j++;
+                       i_assert(j < map_count && msgid_map[j].uid == uid);
+                       mail_thread_add(cache, msgid_map+j, &j);
+               }
        }
-       ctx->thread_ctx.hash = hash;
+}
 
-       /* initialize searching */
-       ctx->t = mailbox_transaction_begin(ctx->box, 0);
-       ctx->search = mailbox_search_init(ctx->t, ctx->search_args, NULL);
-       ctx->thread_ctx.tmp_mail = mail_alloc(ctx->t, 0, NULL);
-       ctx->thread_ctx.thread_list_ctx =
-               mail_thread_list_update_begin(tbox->list_ctx,
-                                             ctx->thread_ctx.hash_trans);
-
-       hdr = mail_hash_get_header(ctx->thread_ctx.hash_trans);
-       count = status.messages < hdr->record_count ? 0 :
-               status.messages - hdr->record_count;
-       count += APPROX_MSG_EXTRA_COUNT;
-       ctx->thread_ctx.msgid_pool =
-               pool_alloconly_create(MEMPOOL_GROWING"msgids",
-                                     count * APPROX_MSGID_SIZE);
-       i_array_init(&ctx->thread_ctx.msgid_cache,
-                    I_MAX(hdr->record_count, status.messages));
+static void
+mail_thread_cache_fix_invalid_indexes(struct mail_thread_mailbox *tbox)
+{
+       struct mail_thread_cache *cache = tbox->cache;
+       uint32_t highest_idx, new_first_idx, count;
+
+       highest_idx = mail_index_strmap_view_get_highest_idx(tbox->strmap_view);
+       new_first_idx = highest_idx + 1 +
+               THREAD_INVALID_MSGID_STR_IDX_SKIP_COUNT;
+       count = cache->next_invalid_msgid_str_idx -
+               cache->first_invalid_msgid_str_idx;
+
+       if (count == 0) {
+               /* there are no invalid indexes yet, we can update the first
+                  invalid index position to delay conflicts. */
+               cache->first_invalid_msgid_str_idx =
+                       cache->next_invalid_msgid_str_idx = new_first_idx;
+       } else if (highest_idx >= cache->first_invalid_msgid_str_idx) {
+               /* conflict - move the invalid indexes forward */
+               array_copy(&cache->thread_nodes.arr, new_first_idx,
+                          &cache->thread_nodes.arr,
+                          cache->first_invalid_msgid_str_idx, count);
+               cache->first_invalid_msgid_str_idx = new_first_idx;
+               cache->next_invalid_msgid_str_idx = new_first_idx + count;
+       }
 }
 
-static int
-mail_thread_update(struct mail_thread_context *ctx, bool reset)
+static void mail_thread_cache_sync_remove(struct mail_thread_mailbox *tbox,
+                                         struct mail_thread_context *ctx)
 {
-       static const char *wanted_headers[] = {
-               HDR_MESSAGE_ID, HDR_IN_REPLY_TO, HDR_REFERENCES, HDR_SUBJECT,
-               NULL
-       };
-       struct mailbox *box;
-       struct mailbox_header_lookup_ctx *headers_ctx;
-       struct mail_hash_header *hdr;
-       struct mail *mail;
-       bool changed = FALSE;
-       uint32_t prev_uid;
-       int ret = 0;
+       struct mail_thread_cache *cache = tbox->cache;
 
-       mail_thread_update_init(ctx, reset);
-       box = mailbox_transaction_get_mailbox(ctx->t);
+       if (cache->search_result == NULL)
+               return;
 
-       hdr = mail_hash_get_header(ctx->thread_ctx.hash_trans);
-       prev_uid = hdr->last_uid;
+       if (mail_search_args_equal(ctx->search_args,
+                                  cache->search_result->search_args)) {
+               t_array_init(&ctx->added_uids, 64);
+               if (mail_thread_cache_update_removes(tbox, &ctx->added_uids)) {
+                       /* successfully updated the cache */
+                       return;
+               }
+       }
+       /* failed to use the cache, rebuild */
+       mailbox_search_result_free(&cache->search_result);
+}
 
-       headers_ctx = mailbox_header_lookup_init(box, wanted_headers);
-       mail = mail_alloc(ctx->t, MAIL_FETCH_DATE, headers_ctx);
-       while (ret == 0 &&
-              mailbox_search_next(ctx->search, mail) > 0) {
-               i_assert(mail->uid > prev_uid);
-               prev_uid = mail->uid;
-               changed = TRUE;
+static int mail_thread_cache_sync_add(struct mail_thread_mailbox *tbox,
+                                     struct mail_thread_context *ctx)
+{
+       struct mail_thread_cache *cache = tbox->cache;
+       struct mail_search_context *search_ctx;
+       struct mail *mail;
+       const struct mail_index_strmap_rec *msgid_map;
+       unsigned int i, count;
 
-               T_BEGIN {
-                       ret = mail_thread_add(&ctx->thread_ctx, mail);
-               } T_END;
-       }
-       mail_free(&mail);
-       mailbox_header_lookup_unref(&headers_ctx);
+       mail_thread_cache_fix_invalid_indexes(tbox);
 
-       if (ret < 0 || ctx->thread_ctx.failed || ctx->thread_ctx.rebuild)
-               return -1;
+       if (cache->search_result != NULL) {
+               /* we already checked at sync_remove that we can use this
+                  search result. */
+               mail_thread_cache_update_adds(tbox, &ctx->added_uids);
+               return 0;
+       }
 
-       if (changed) {
-               /* even if write failed, we can still finish the thread
-                  building */
-               (void)mail_hash_transaction_write(ctx->thread_ctx.hash_trans);
+       cache->last_uid = 0;
+       cache->first_invalid_msgid_str_idx = cache->next_invalid_msgid_str_idx =
+               mail_index_strmap_view_get_highest_idx(tbox->strmap_view) + 1 +
+               THREAD_INVALID_MSGID_STR_IDX_SKIP_COUNT;
+       array_clear(&cache->thread_nodes);
+
+       search_ctx = mailbox_search_init(ctx->t, ctx->search_args, NULL);
+       mail = mail_alloc(ctx->t, 0, NULL);
+
+       cache->search_result =
+               mailbox_search_result_save(search_ctx,
+                       MAILBOX_SEARCH_RESULT_FLAG_UPDATE |
+                       MAILBOX_SEARCH_RESULT_FLAG_QUEUE_SYNC);
+
+       msgid_map = array_get(tbox->msgid_map, &count);
+       i = 0;
+       while (i < count && mailbox_search_next(search_ctx, mail) > 0) {
+               while (msgid_map[i].uid < mail->uid)
+                       i++;
+               i_assert(i < count);
+               mail_thread_add(cache, msgid_map+i, &i);
        }
-       return 0;
+       mail_free(&mail);
+       return mailbox_search_deinit(&search_ctx);
 }
 
-int mail_thread_init(struct mailbox *box, bool reset,
-                    struct mail_search_args *args,
+int mail_thread_init(struct mailbox *box, struct mail_search_args *args,
                     struct mail_thread_context **ctx_r)
 {
        struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(box);
        struct mail_thread_context *ctx;
-       int ret;
 
        i_assert(tbox->ctx == NULL);
 
@@ -464,27 +550,17 @@ int mail_thread_init(struct mailbox *box, bool reset,
        }
 
        ctx = i_new(struct mail_thread_context, 1);
-       tbox->ctx = &ctx->thread_ctx;
+       tbox->ctx = ctx;
        ctx->box = box;
        ctx->search_args = args;
 
-       while ((ret = mail_thread_update(ctx, reset)) < 0) {
-               if (ctx->thread_ctx.hash != tbox->hash) {
-                       /* failed with in-memory hash */
-                       mail_storage_set_critical(box->storage,
-                               "Threading mailbox %s failed unexpectedly",
-                               box->name);
-                       mail_thread_deinit(&ctx);
-                       return -1;
-               }
-
-               /* try again with in-memory hash */
-               mail_thread_clear(ctx);
-               reset = TRUE;
-               memset(ctx, 0, sizeof(*ctx));
-               ctx->box = box;
-               ctx->search_args = args;
+       mail_thread_cache_sync_remove(tbox, ctx);
+       if (mail_thread_index_map_build(ctx) < 0 ||
+           mail_thread_cache_sync_add(tbox, ctx) < 0) {
+               mail_thread_deinit(&ctx);
+               return -1;
        }
+       memset(&ctx->added_uids, 0, sizeof(ctx->added_uids));
 
        *ctx_r = ctx;
        return 0;
@@ -492,26 +568,8 @@ int mail_thread_init(struct mailbox *box, bool reset,
 
 static void mail_thread_clear(struct mail_thread_context *ctx)
 {
-       struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(ctx->box);
-
-       if (ctx->thread_ctx.thread_list_ctx != NULL)
-               mail_thread_list_rollback(&ctx->thread_ctx.thread_list_ctx);
-
-       mail_hash_transaction_end(&ctx->thread_ctx.hash_trans);
-
-       (void)mailbox_search_deinit(&ctx->search);
-       mail_free(&ctx->thread_ctx.tmp_mail);
+       mail_free(&ctx->tmp_mail);
        (void)mailbox_transaction_commit(&ctx->t);
-
-       mail_hash_unlock(ctx->thread_ctx.hash);
-       if (ctx->thread_ctx.hash != tbox->hash)
-               mail_hash_free(&ctx->thread_ctx.hash);
-
-       if (ctx->search_args->args == &ctx->tmp_search_arg)
-               ctx->search_args->args = ctx->tmp_search_arg.next;
-
-       array_free(&ctx->thread_ctx.msgid_cache);
-       pool_unref(&ctx->thread_ctx.msgid_pool);
 }
 
 void mail_thread_deinit(struct mail_thread_context **_ctx)
@@ -523,7 +581,6 @@ void mail_thread_deinit(struct mail_thread_context **_ctx)
 
        mail_thread_clear(ctx);
        mail_search_args_unref(&ctx->search_args);
-       i_assert(!tbox->ctx->syncing);
        tbox->ctx = NULL;
        i_free(ctx);
 }
@@ -534,127 +591,10 @@ mail_thread_iterate_init(struct mail_thread_context *ctx,
 {
        struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(ctx->box);
 
-       return mail_thread_iterate_init_full(ctx->thread_ctx.tmp_mail,
-                                            ctx->thread_ctx.hash_trans,
-                                            tbox->list_ctx,
+       return mail_thread_iterate_init_full(tbox->cache, ctx->tmp_mail,
                                             thread_type, write_seqs);
 }
 
-static bool
-mail_thread_index_is_up_to_date(struct mail_thread_update_context *ctx,
-                               struct mail_index_view *view)
-{
-       const struct mail_index_header *hdr;
-       struct mail_hash_header *hash_hdr;
-       uint32_t seq1, seq2;
-
-       hdr = mail_index_get_header(view);
-       hash_hdr = mail_hash_get_header(ctx->hash_trans);
-       if (hash_hdr->last_uid + 1 == hdr->next_uid) {
-               /* all messages have been added to hash */
-               return hash_hdr->message_count == hdr->messages_count;
-       }
-
-       if (!mail_index_lookup_seq_range(view, 1, hash_hdr->last_uid,
-                                        &seq1, &seq2))
-               seq2 = 0;
-       return seq2 == hash_hdr->message_count;
-}
-
-static void
-mail_thread_expunge_handler_deinit(struct mail_thread_mailbox *tbox,
-                                  struct mail_thread_update_context *ctx)
-{
-       struct mailbox_transaction_context *t;
-
-       t = ctx->tmp_mail->transaction;
-
-       if (ctx->failed)
-               mail_thread_list_rollback(&ctx->thread_list_ctx);
-       else {
-               if (mail_thread_list_commit(&ctx->thread_list_ctx) < 0)
-                       ctx->failed = TRUE;
-       }
-       if (!ctx->failed)
-               (void)mail_hash_transaction_write(ctx->hash_trans);
-       mail_hash_transaction_end(&ctx->hash_trans);
-       mail_hash_unlock(tbox->hash);
-
-       mail_free(&ctx->tmp_mail);
-       /* don't commit. we're in the middle of syncing and
-          this transaction isn't marked as external. */
-       (void)mailbox_transaction_rollback(&t);
-       array_free(&ctx->msgid_cache);
-       pool_unref(&ctx->msgid_pool);
-}
-
-static int
-mail_thread_expunge_handler(struct mail_index_sync_map_ctx *sync_ctx,
-                           uint32_t seq, const void *data,
-                           void **sync_context ATTR_UNUSED, void *context)
-{
-       struct mailbox *box = context;
-       struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(box);
-       struct mail_thread_update_context *ctx = tbox->ctx;
-       struct mailbox_transaction_context *t;
-       uint32_t uid;
-
-       if (data == NULL) {
-               /* deinit */
-               if (!ctx->syncing)
-                       return 0;
-               if (ctx->hash != NULL)
-                       mail_thread_expunge_handler_deinit(tbox, ctx);
-               i_free_and_null(tbox->ctx);
-               return 0;
-       }
-       if (ctx == NULL) {
-               /* init */
-               if (tbox->ctx != NULL) {
-                       /* we're already in the middle of threading */
-                       return 0;
-               }
-               tbox->ctx = ctx = i_new(struct mail_thread_update_context, 1);
-               ctx->syncing = TRUE;
-
-               /* we can't wait the lock in here or we could deadlock. */
-               if (mail_hash_lock_exclusive(tbox->hash,
-                                            MAIL_HASH_LOCK_FLAG_TRY) <= 0)
-                       return 0;
-
-               ctx->hash = tbox->hash;
-               ctx->hash_trans = mail_hash_transaction_begin(ctx->hash, 0);
-               ctx->thread_list_ctx =
-                       mail_thread_list_update_begin(tbox->list_ctx,
-                                                     ctx->hash_trans);
-               ctx->msgid_pool = pool_alloconly_create(MEMPOOL_GROWING"msgids",
-                                                       20 * APPROX_MSGID_SIZE);
-               i_array_init(&ctx->msgid_cache, 20);
-
-               t = mailbox_transaction_begin(box, 0);
-               ctx->tmp_mail = mail_alloc(t, 0, NULL);
-
-               if (!mail_thread_index_is_up_to_date(ctx, sync_ctx->view)) {
-                       ctx->failed = TRUE;
-                       return 0;
-               }
-       } else {
-               if (ctx->hash == NULL) {
-                       /* locking had failed */
-                       return 0;
-               }
-               if (!ctx->syncing || ctx->failed)
-                       return 0;
-       }
-
-       T_BEGIN {
-               mail_index_lookup_uid(sync_ctx->view, seq, &uid);
-               if (mail_thread_remove(ctx, uid) <= 0)
-                       ctx->failed = TRUE;
-       } T_END;
-       return 0;
-}
-
 static int mail_thread_mailbox_close(struct mailbox *box)
 {
        struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(box);
@@ -662,8 +602,14 @@ static int mail_thread_mailbox_close(struct mailbox *box)
 
        i_assert(tbox->ctx == NULL);
 
-       mail_thread_list_deinit(&tbox->list_ctx);
-       mail_hash_free(&tbox->hash);
+       if (tbox->strmap_view != NULL)
+               mail_index_strmap_view_close(&tbox->strmap_view);
+       mail_index_strmap_deinit(&tbox->strmap);
+       if (tbox->cache->search_result != NULL)
+               mailbox_search_result_free(&tbox->cache->search_result);
+       array_free(&tbox->cache->thread_nodes);
+       i_free(tbox->cache);
+
        ret = tbox->module_ctx.super.close(box);
        i_free(tbox);
        return ret;
@@ -673,24 +619,17 @@ void index_thread_mailbox_index_opened(struct index_mailbox *ibox)
 {
        struct mailbox *box = &ibox->box;
        struct mail_thread_mailbox *tbox;
-       uint32_t ext_id;
 
        tbox = i_new(struct mail_thread_mailbox, 1);
        tbox->module_ctx.super = box->v;
        box->v.close = mail_thread_mailbox_close;
 
-       tbox->list_ctx = mail_thread_list_init(box);
-       tbox->hash = mail_hash_alloc(ibox->index, MAIL_THREAD_INDEX_SUFFIX,
-                                    sizeof(struct mail_thread_node),
-                                    mail_thread_hash_key,
-                                    mail_thread_hash_rec,
-                                    mail_thread_hash_cmp,
-                                    mail_thread_hash_remap,
-                                    tbox);
-
-       ext_id = mail_index_ext_register(ibox->index, "thread", 0, 0, 0);
-       mail_index_register_expunge_handler(ibox->index, ext_id, TRUE,
-                                           mail_thread_expunge_handler, box);
+       tbox->strmap = mail_index_strmap_init(ibox->index,
+                                             MAIL_THREAD_INDEX_SUFFIX);
+       tbox->next_msgid_idx = 1;
+
+       tbox->cache = i_new(struct mail_thread_cache, 1);
+       i_array_init(&tbox->cache->thread_nodes, 128);
 
        MODULE_CONTEXT_SET(box, mail_thread_storage_module, tbox);
 }
index c5ebe998ca5bc975a43adeb35e46fa1427faf381..3ed23a0e8f78064e4408c14997360d194a5babc9 100644 (file)
@@ -26,10 +26,8 @@ ARRAY_DEFINE_TYPE(mail_thread_child_node, struct mail_thread_child_node);
    unknown. */
 bool mail_thread_type_parse(const char *str, enum mail_thread_type *type_r);
 
-/* Build thread from given search arguments. If reset=TRUE, build a new thread
-   tree to memory even if thread index exists. args=NULL searches everything. */
-int mail_thread_init(struct mailbox *box, bool reset,
-                    struct mail_search_args *args,
+/* Build thread from given search arguments. args=NULL searches everything. */
+int mail_thread_init(struct mailbox *box, struct mail_search_args *args,
                     struct mail_thread_context **ctx_r);
 void mail_thread_deinit(struct mail_thread_context **ctx);
 
index 7a8069e1e17f98f7ae452a97b656d95f7fb09b75..eecd991f2d6826c44cd5ba105aa39caddee303ee 100644 (file)
@@ -1,6 +1,14 @@
 pkglibexecdir = $(libexecdir)/dovecot
 
-pkglibexec_PROGRAMS = rawlog gdbhelper idxview listview logview maildirlock
+pkglibexec_PROGRAMS = \
+       rawlog \
+       gdbhelper \
+       idxview \
+       listview \
+       logview \
+       maildirlock \
+       threadview
+
 sbin_PROGRAMS = dovecotpw
 
 AM_CPPFLAGS = \
@@ -41,6 +49,11 @@ maildirlock_LDADD = \
 maildirlock_SOURCES = \
        maildirlock.c
 
+threadview_LDADD = \
+       ../lib/liblib.a
+threadview_SOURCES = \
+       threadview.c
+
 dovecotpw_LDADD = \
        ../auth/libpassword.a \
        ../lib-ntlm/libntlm.a \
diff --git a/src/util/threadview.c b/src/util/threadview.c
new file mode 100644 (file)
index 0000000..1be01df
--- /dev/null
@@ -0,0 +1,177 @@
+/* Copyright (c) 2007-2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "mmap-util.h"
+#include "mail-index-private.h"
+#include "mail-index-strmap.h"
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+static uint32_t max_likely_index;
+
+uint32_t mail_index_offset_to_uint32(uint32_t offset)
+{
+       const unsigned char *buf = (const unsigned char *) &offset;
+
+       if ((offset & 0x80808080) != 0x80808080)
+               return 0;
+
+       return (((uint32_t)buf[3] & 0x7f) << 2) |
+               (((uint32_t)buf[2] & 0x7f) << 9) |
+               (((uint32_t)buf[1] & 0x7f) << 16) |
+               (((uint32_t)buf[0] & 0x7f) << 23);
+}
+
+int mail_index_unpack_num(const uint8_t **p, const uint8_t *end,
+                         uint32_t *num_r)
+{
+       const uint8_t *c = *p;
+       uint32_t value = 0;
+       unsigned int bits = 0;
+
+       for (;;) {
+               if (unlikely(c == end)) {
+                       /* we should never see EOF */
+                       *num_r = 0;
+                       return -1;
+               }
+
+               value |= (*c & 0x7f) << bits;
+               if (*c < 0x80)
+                       break;
+
+               bits += 7;
+               c++;
+       }
+
+       if (unlikely(bits >= 32)) {
+               /* broken input */
+               *p = end;
+               *num_r = 0;
+               return -1;
+       }
+
+       *p = c + 1;
+       *num_r = value;
+       return 0;
+}
+
+static size_t dump_hdr(const struct mail_index_strmap_header *hdr)
+{
+       printf("version = %u\n", hdr->version);
+       printf("uid validity = %u\n", hdr->uid_validity);
+       return sizeof(*hdr);
+}
+
+static int dump_record(const uint8_t **p, const uint8_t *end, uint32_t *uid)
+{
+       uint32_t uid_diff, n, i, count, crc32, idx;
+       size_t size;
+
+       /* <uid diff> <n> <crc32>*count <str_idx>*count */
+       if (mail_index_unpack_num(p, end, &uid_diff) <  0)
+               return -1;
+       *uid += uid_diff;
+
+       if (mail_index_unpack_num(p, end, &n) <  0)
+               return -1;
+       printf(" - uid %u: n=%u\n", *uid, n);
+
+       count = n < 2 ? n + 1 : n;
+       size = sizeof(crc32)*count + sizeof(idx)*count;
+       if (*p + size > end)
+               return -1;
+       for (i = 0; i < count; i++) {
+               if (i == 0)
+                       printf("   - message-id: ");
+               else if (i == 1) {
+                       if (n == 1)
+                               printf("   - in-reply-to: ");
+                       else
+                               printf("   - references[1]: ");
+               } else {
+                       printf("   - references[%u]: ", i);
+               }
+               memcpy(&crc32, *p + sizeof(crc32)*i, sizeof(crc32));
+               memcpy(&idx, *p + sizeof(crc32)*count + sizeof(idx)*i, sizeof(idx));
+               printf("crc32=%08x index=%u\n", crc32, idx);
+               if (idx > max_likely_index)
+                       printf(" - index probably broken\n");
+       }
+       *p += size;
+       return 0;
+}
+
+static int dump_block(const uint8_t *data, const uint8_t *end, uint32_t *uid)
+{
+       const uint8_t *p;
+       uint32_t block_size;
+
+       if (data + 4 >= end)
+               return -1;
+
+       memcpy(&block_size, data, sizeof(block_size));
+       block_size = mail_index_offset_to_uint32(block_size) >> 2;
+       printf(" - block_size=%u\n", block_size);
+       if (block_size == 0) {
+               /* finished */
+               return -1;
+       }
+       if (data + sizeof(block_size) + block_size > end) {
+               printf("   - broken!\n");
+               return -1;
+       }
+       p = data + sizeof(block_size);
+       end = p + block_size;
+
+       *uid += 1;
+       while (p != end) {
+               if (dump_record(&p, end, uid) < 0) {
+                       printf(" - broken\n");
+                       return -1;
+               }
+       }
+       return p - data;
+}
+
+int main(int argc, const char *argv[])
+{
+       unsigned int pos;
+       const void *map, *end;
+       struct stat st;
+       uint32_t uid;
+       int fd, ret;
+
+       lib_init();
+
+       if (argc < 2)
+               i_fatal("Usage: threadview dovecot.index.thread");
+
+       fd = open(argv[1], O_RDONLY);
+       if (fd < 0) {
+               i_error("open(%s) failed: %m", argv[1]);
+               return 1;
+       }
+
+       if (fstat(fd, &st) < 0) {
+               i_error("fstat(%s) failed: %m", argv[1]);
+               return 1;
+       }
+       max_likely_index = (st.st_size / 8) * 2;
+
+       map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+       end = CONST_PTR_OFFSET(map, st.st_size);
+       pos = dump_hdr(map);
+       uid = 0;
+       do {
+               printf("block at offset %u:\n", pos);
+               T_BEGIN {
+                       ret = dump_block(CONST_PTR_OFFSET(map, pos), end, &uid);
+                       pos += ret;
+               } T_END;
+       } while (ret > 0);
+       return 0;
+}