From: Timo Sirainen Date: Thu, 19 Mar 2009 00:01:34 +0000 (-0400) Subject: dbox: Initial commit for rebuilding multi-file dbox. Also added support for using... X-Git-Tag: 2.0.alpha1~1038^2~39 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e1a2c7eaad433c690e8456f2f4e8ba46746afdee;p=thirdparty%2Fdovecot%2Fcore.git dbox: Initial commit for rebuilding multi-file dbox. Also added support for using backup index. These changes are fully untested. --HG-- branch : HEAD --- diff --git a/src/lib-storage/index/dbox/Makefile.am b/src/lib-storage/index/dbox/Makefile.am index e695532cd9..9e129c4eac 100644 --- a/src/lib-storage/index/dbox/Makefile.am +++ b/src/lib-storage/index/dbox/Makefile.am @@ -18,6 +18,7 @@ libstorage_dbox_a_SOURCES = \ dbox-sync-file.c \ dbox-sync-rebuild.c \ dbox-storage.c \ + dbox-storage-rebuild.c \ dbox-transaction.c headers = \ @@ -25,6 +26,7 @@ headers = \ dbox-file-maildir.h \ dbox-map.h \ dbox-storage.h \ + dbox-storage-rebuild.h \ dbox-sync.h if INSTALL_HEADERS diff --git a/src/lib-storage/index/dbox/dbox-file.c b/src/lib-storage/index/dbox/dbox-file.c index 47adb556f4..dab9049c66 100644 --- a/src/lib-storage/index/dbox/dbox-file.c +++ b/src/lib-storage/index/dbox/dbox-file.c @@ -166,6 +166,7 @@ dbox_file_init_single(struct dbox_mailbox *mbox, uint32_t uid) file->storage = mbox->storage; file->single_mbox = mbox; file->fd = -1; + file->cur_offset = (uoff_t)-1; if (uid != 0) { file->uid = uid; file->fname = dbox_file_uid_get_fname(mbox, uid, &maildir); @@ -199,6 +200,7 @@ dbox_file_init_multi(struct dbox_storage *storage, uint32_t file_id) file->storage = storage; file->file_id = file_id; file->fd = -1; + file->cur_offset = (uoff_t)-1; file->fname = file_id == 0 ? dbox_generate_tmp_filename() : i_strdup_printf(DBOX_MAIL_FILE_MULTI_FORMAT, file_id); file->current_path = @@ -588,7 +590,7 @@ int dbox_file_get_mail_stream(struct dbox_file *file, uoff_t offset, if (offset == 0) offset = file->file_header_size; - if (offset != file->cur_offset || file->cur_physical_size == 0) { + if (offset != file->cur_offset) { file->cur_offset = offset; i_stream_seek(file->input, offset); ret = dbox_file_read_mail_header(file, &file->cur_physical_size); @@ -608,7 +610,6 @@ static int dbox_file_seek_next_at_metadata(struct dbox_file *file, uoff_t *offset) { const char *line; - uoff_t physical_size; int ret; if ((ret = dbox_file_metadata_skip_header(file)) <= 0) @@ -616,31 +617,33 @@ dbox_file_seek_next_at_metadata(struct dbox_file *file, uoff_t *offset) /* skip over the actual metadata */ while ((line = i_stream_read_next_line(file->input)) != NULL) { - if (*line == DBOX_METADATA_OLDV1_SPACE) { + if (*line == DBOX_METADATA_OLDV1_SPACE || *line == '\0') { /* end of metadata */ break; } } *offset = file->input->v_offset; - - (void)i_stream_read(file->input); - if (!i_stream_have_bytes_left(file->input)) - return 1; - - return dbox_file_read_mail_header(file, &physical_size); + return 1; } -int dbox_file_seek_next(struct dbox_file *file, uoff_t *offset, bool *last_r) +int dbox_file_seek_next(struct dbox_file *file, uoff_t *offset_r, bool *last_r) { - uoff_t size; - bool first = *offset == 0; + uoff_t offset, size; bool expunged; int ret; - /* FIXME: see if we can get rid of this function. only rebuild needs it. */ + if (file->cur_offset == (uoff_t)-1) { + /* first mail */ + offset = file->file_header_size; + } else { + offset = file->cur_offset + file->cur_physical_size; + if ((ret = dbox_file_seek_next_at_metadata(file, &offset)) <= 0) + return ret; + } + *offset_r = offset; *last_r = FALSE; - ret = dbox_file_get_mail_stream(file, *offset, &size, NULL, &expunged); + ret = dbox_file_get_mail_stream(file, offset, &size, NULL, &expunged); if (ret <= 0) return ret; @@ -648,20 +651,6 @@ int dbox_file_seek_next(struct dbox_file *file, uoff_t *offset, bool *last_r) *last_r = TRUE; return 0; } - if (first) - return 1; - - i_stream_skip(file->input, size); - if ((ret = dbox_file_seek_next_at_metadata(file, offset)) <= 0) - return ret; - - ret = dbox_file_get_mail_stream(file, *offset, &size, NULL, &expunged); - if (ret <= 0) - return ret; - if (expunged) { - *last_r = TRUE; - return 0; - } return 1; } @@ -842,7 +831,7 @@ int dbox_file_metadata_read(struct dbox_file *file) uoff_t metadata_offset; int ret; - i_assert(file->cur_offset != 0 || file->maildir_file); + i_assert(file->cur_offset != (uoff_t)-1); if (file->metadata_read_offset == file->cur_offset || file->maildir_file) diff --git a/src/lib-storage/index/dbox/dbox-file.h b/src/lib-storage/index/dbox/dbox-file.h index 1ff684dd8a..aa49ee0747 100644 --- a/src/lib-storage/index/dbox/dbox-file.h +++ b/src/lib-storage/index/dbox/dbox-file.h @@ -150,7 +150,7 @@ int dbox_file_get_mail_stream(struct dbox_file *file, uoff_t offset, uoff_t *physical_size_r, struct istream **stream_r, bool *expunged_r); /* Seek to next message after current one. If there are no more messages, - returns 0 and last_r is set to TRUE. Returns 1 if ok, 0 if file/offset is + returns 0 and last_r is set to TRUE. Returns 1 if ok, 0 if file is corrupted, -1 if I/O error. */ int dbox_file_seek_next(struct dbox_file *file, uoff_t *offset_r, bool *last_r); diff --git a/src/lib-storage/index/dbox/dbox-storage-rebuild.c b/src/lib-storage/index/dbox/dbox-storage-rebuild.c new file mode 100644 index 0000000000..8cf1262fbe --- /dev/null +++ b/src/lib-storage/index/dbox/dbox-storage-rebuild.c @@ -0,0 +1,704 @@ +/* Copyright (c) 2009 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "hash.h" +#include "hex-binary.h" +#include "str.h" +#include "dbox-storage.h" +#include "dbox-file.h" +#include "dbox-map-private.h" +#include "dbox-sync.h" +#include "dbox-storage-rebuild.h" + +#include +#include +#include + +struct dbox_rebuild_msg { + uint8_t guid_128[DBOX_GUID_BIN_LEN]; + uint32_t file_id; + uint32_t offset; + uint32_t size; + uint32_t map_uid; + + uint16_t refcount; + unsigned int seen_zero_ref_in_map:1; +}; + +struct rebuild_msg_mailbox { + struct mailbox *box; + struct mail_index_sync_ctx *sync_ctx; + struct mail_index_view *view; + struct mail_index_transaction *trans; + uint32_t next_uid; +}; + +struct dbox_storage_rebuild_context { + struct dbox_storage *storage; + pool_t pool; + + struct hash_table *guid_hash; + ARRAY_DEFINE(msgs, struct dbox_rebuild_msg *); + + uint32_t prev_file_id; + uint32_t highest_seen_map_uid; + + struct mail_index_sync_ctx *sync_ctx; + struct mail_index_view *sync_view; + struct mail_index_transaction *trans; + + struct rebuild_msg_mailbox prev_msg; + + unsigned int msgs_unsorted:1; +}; + +static unsigned int guid_hash(const void *p) +{ + const uint8_t *s = p; + unsigned int i, g, h = 0; + + for (i = 0; i < DBOX_GUID_BIN_LEN; i++) { + h = (h << 4) + s[i]; + if ((g = h & 0xf0000000UL)) { + h = h ^ (g >> 24); + h = h ^ g; + } + } + return h; +} + +static int guid_cmp(const void *p1, const void *p2) +{ + return memcmp(p1, p2, DBOX_GUID_BIN_LEN); +} + +static struct dbox_storage_rebuild_context * +dbox_storage_rebuild_init(struct dbox_storage *storage) +{ + struct dbox_storage_rebuild_context *ctx; + + ctx = i_new(struct dbox_storage_rebuild_context, 1); + ctx->storage = storage; + ctx->pool = pool_alloconly_create("dbox map rebuild", 1024*256); + ctx->guid_hash = hash_table_create(default_pool, ctx->pool, 0, + guid_hash, guid_cmp); + i_array_init(&ctx->msgs, 512); + return ctx; +} + +static void dbox_storage_rebuild_deinit(struct dbox_storage_rebuild_context *ctx) +{ + if (ctx->sync_ctx != NULL) + mail_index_sync_rollback(&ctx->sync_ctx); + + hash_table_destroy(&ctx->guid_hash); + pool_unref(&ctx->pool); + array_free(&ctx->msgs); + i_free(ctx); +} + +static int dbox_rebuild_msg_offset_cmp(const void *p1, const void *p2) +{ + const struct dbox_rebuild_msg *const *m1 = p1, *const *m2 = p2; + + if ((*m1)->file_id < (*m2)->file_id) + return -1; + if ((*m1)->file_id > (*m2)->file_id) + return 1; + + if ((*m1)->offset < (*m2)->offset) + return -1; + if ((*m1)->offset > (*m2)->offset) + return 1; + return 0; +} + +static int dbox_rebuild_msg_uid_cmp(const void *p1, const void *p2) +{ + const struct dbox_rebuild_msg *const *m1 = p1, *const *m2 = p2; + + if ((*m1)->map_uid < (*m2)->map_uid) + return -1; + if ((*m1)->map_uid > (*m2)->map_uid) + return 1; + return 0; +} + +static int rebuild_add_file(struct dbox_storage_rebuild_context *ctx, + const char *path) +{ + struct dbox_file *file; + const char *fname, *guid; + struct dbox_rebuild_msg *rec; + uint32_t file_id; + buffer_t *guid_buf; + uoff_t offset; + bool last; + int ret = 0; + + fname = strrchr(path, '/'); + i_assert(fname != NULL); + fname += strlen(DBOX_MAIL_FILE_MULTI_PREFIX) + 1; + + file_id = strtoul(fname, NULL, 10); + if (!is_numeric(fname, '\0') || file_id == 0) { + i_warning("dbox rebuild: File name is missing ID: %s", path); + return 0; + } + + /* small optimization: typically files are returned sorted. in that + case we don't need to sort them ourself. */ + if (file_id < ctx->prev_file_id) + ctx->msgs_unsorted = TRUE; + ctx->prev_file_id = file_id; + + guid_buf = buffer_create_dynamic(pool_datastack_create(), + DBOX_GUID_BIN_LEN); + + file = dbox_file_init_multi(ctx->storage, file_id); + while ((ret = dbox_file_seek_next(file, &offset, &last)) > 0) { + if ((ret = dbox_file_metadata_read(file)) <= 0) + break; + + guid = dbox_file_metadata_get(file, DBOX_METADATA_GUID); + if (guid == NULL) { + dbox_file_set_corrupted(file, + "Message is missing GUID"); + ret = 0; + break; + } + buffer_set_used_size(guid_buf, 0); + if (hex_to_binary(guid, guid_buf) < 0 || + guid_buf->used != sizeof(rec->guid_128)) { + dbox_file_set_corrupted(file, + "Message GUID is not 128 bit hex: %s", guid); + ret = 0; + break; + } + + rec = p_new(ctx->pool, struct dbox_rebuild_msg, 1); + rec->file_id = file_id; + rec->offset = offset; + rec->size = file->cur_physical_size; + memcpy(rec->guid_128, guid_buf->data, sizeof(rec->guid_128)); + array_append(&ctx->msgs, &rec, 1); + + if (hash_table_lookup(ctx->guid_hash, guid_buf->data) != NULL) { + /* duplicate. save this as a refcount=0 to map, + so it will eventually be deleted. */ + } else { + hash_table_insert(ctx->guid_hash, rec->guid_128, rec); + } + } + if (ret == 0) { + /* FIXME: file is corrupted. should we try to fix it? */ + } + dbox_file_unref(&file); + return ret < 0 ? -1 : 0; +} + +static void +rebuild_add_missing_map_uids(struct dbox_storage_rebuild_context *ctx, + uint32_t next_uid) +{ + struct dbox_rebuild_msg **msgs; + struct dbox_mail_index_map_record rec; + unsigned int i, count; + uint32_t seq; + + memset(&rec, 0, sizeof(rec)); + msgs = array_get_modifiable(&ctx->msgs, &count); + for (i = 0; i < count; i++) { + if (msgs[i]->map_uid != 0) + continue; + + rec.file_id = msgs[i]->file_id; + rec.offset = msgs[i]->offset; + rec.size = msgs[i]->size; + + msgs[i]->map_uid = next_uid++; + mail_index_append(ctx->trans, msgs[i]->map_uid, &seq); + mail_index_update_ext(ctx->trans, seq, + ctx->storage->map->map_ext_id, + &rec, NULL); + } +} + +static int rebuild_apply_map(struct dbox_storage_rebuild_context *ctx) +{ + struct dbox_map *map = ctx->storage->map; + const struct mail_index_header *hdr; + struct dbox_rebuild_msg **msgs, *pos; + struct dbox_rebuild_msg search_msg, *search_msgp = &search_msg; + struct dbox_mail_lookup_rec rec; + uint32_t seq; + unsigned int count; + + msgs = array_get_modifiable(&ctx->msgs, &count); + if (ctx->msgs_unsorted) + qsort(msgs, count, sizeof(*msgs), dbox_rebuild_msg_offset_cmp); + + hdr = mail_index_get_header(ctx->sync_view); + for (seq = 1; seq <= hdr->messages_count; seq++) { + if (dbox_map_view_lookup_rec(map, ctx->sync_view, + seq, &rec) < 0) + return -1; + + /* look up the rebuild msg record for this message */ + search_msg.file_id = rec.rec.file_id; + search_msg.offset = rec.rec.offset; + pos = bsearch(&search_msgp, msgs, count, sizeof(*msgs), + dbox_rebuild_msg_offset_cmp); + if (pos == NULL) { + /* map record points to non-existing message. */ + mail_index_expunge(ctx->trans, seq); + } else { + pos->map_uid = rec.map_uid; + pos->seen_zero_ref_in_map = rec.refcount == 0; + } + } + rebuild_add_missing_map_uids(ctx, hdr->next_uid); + + /* afterwards we're interested in looking up map_uids. + re-sort the messages to make it easier. */ + qsort(msgs, count, sizeof(*msgs), dbox_rebuild_msg_uid_cmp); + return 0; +} + +static struct dbox_rebuild_msg * +rebuild_lookup_map_uid(struct dbox_storage_rebuild_context *ctx, + uint32_t map_uid) +{ + struct dbox_rebuild_msg search_msg, *search_msgp = &search_msg; + struct dbox_rebuild_msg *const *msgs; + unsigned int count; + + search_msg.map_uid = map_uid; + msgs = array_get(&ctx->msgs, &count); + return bsearch(&search_msgp, msgs, count, sizeof(*msgs), + dbox_rebuild_msg_uid_cmp); +} + +static void +rebuild_mailbox_multi(struct dbox_storage_rebuild_context *ctx, + struct dbox_sync_rebuild_context *rebuild_ctx, + struct dbox_mailbox *mbox, + struct mail_index_view *view, + struct mail_index_transaction *trans) +{ + const struct dbox_mail_index_record *dbox_rec; + struct dbox_mail_index_record new_dbox_rec; + const struct mail_index_header *hdr; + struct dbox_rebuild_msg *rec; + const void *data; + bool expunged; + uint32_t seq, uid, new_seq, map_uid; + + memset(&new_dbox_rec, 0, sizeof(new_dbox_rec)); + hdr = mail_index_get_header(view); + for (seq = 1; seq <= hdr->messages_count; seq++) { + mail_index_lookup_ext(view, seq, mbox->dbox_ext_id, + &data, &expunged); + dbox_rec = data; + map_uid = dbox_rec == NULL ? 0 : dbox_rec->map_uid; + + mail_index_lookup_ext(view, seq, mbox->guid_ext_id, + &data, &expunged); + + /* see if we can find this message based on + 1) GUID, 2) map_uid */ + rec = data == NULL ? NULL : + hash_table_lookup(ctx->guid_hash, data); + if (rec == NULL) { + if (map_uid == 0) { + /* not a multi-dbox message, ignore. */ + continue; + } + /* multi-dbox message that wasn't found with GUID. + either it's lost or GUID has been corrupted. we can + still try to look it up using map_uid. */ + rec = rebuild_lookup_map_uid(ctx, map_uid); + if (rec != NULL) { + mail_index_update_ext(trans, seq, + mbox->guid_ext_id, + rec->guid_128, NULL); + } + } else if (map_uid != rec->map_uid) { + /* map_uid is wrong, update it */ + i_assert(rec->map_uid != 0); + new_dbox_rec.map_uid = rec->map_uid; + mail_index_update_ext(trans, seq, mbox->dbox_ext_id, + &new_dbox_rec, NULL); + } else { + /* everything was ok */ + } + + if (rec != NULL) { + /* keep this message */ + rec->refcount++; + + mail_index_lookup_uid(view, seq, &uid); + mail_index_append(trans, uid, &new_seq); + dbox_sync_rebuild_index_metadata(rebuild_ctx, + NULL, new_seq, uid); + + new_dbox_rec.map_uid = rec->map_uid; + mail_index_update_ext(ctx->trans, new_seq, + mbox->dbox_ext_id, + &new_dbox_rec, NULL); + } + } +} + +static int +rebuild_mailbox(struct dbox_storage_rebuild_context *ctx, const char *name) +{ + struct mailbox *box; + struct dbox_mailbox *mbox; + struct mail_index_sync_ctx *sync_ctx; + struct mail_index_view *view; + struct mail_index_transaction *trans; + struct dbox_sync_rebuild_context *rebuild_ctx; + enum mail_error error; + int ret; + + box = dbox_mailbox_open(&ctx->storage->storage, name, NULL, + MAILBOX_OPEN_READONLY | + MAILBOX_OPEN_KEEP_RECENT | + MAILBOX_OPEN_IGNORE_ACLS); + if (box == NULL) { + mail_storage_get_last_error(&ctx->storage->storage, &error); + if (error == MAIL_ERROR_TEMP) + return -1; + /* non-temporary error, ignore */ + return 0; + } + mbox = (struct dbox_mailbox *)box; + + ret = mail_index_sync_begin(mbox->ibox.index, &sync_ctx, &view, &trans, + MAIL_INDEX_SYNC_FLAG_AVOID_FLAG_UPDATES); + if (ret <= 0) { + i_assert(ret != 0); + mail_storage_set_index_error(&mbox->ibox); + (void)mailbox_close(&box); + return -1; + } + + rebuild_ctx = dbox_sync_index_rebuild_init(mbox, view, trans); + ret = dbox_sync_index_rebuild_singles(rebuild_ctx); + if (ret == 0) + rebuild_mailbox_multi(ctx, rebuild_ctx, mbox, view, trans); + dbox_sync_index_rebuild_deinit(&rebuild_ctx); + + if (mail_index_sync_commit(&sync_ctx) < 0) { + mail_storage_set_index_error(&mbox->ibox); + ret = -1; + } + + (void)mailbox_close(&box); + return ret < 0 ? -1 : 0; +} + +static int rebuild_mailboxes(struct dbox_storage_rebuild_context *ctx) +{ + struct mailbox_list_iterate_context *iter; + const struct mailbox_info *info; + int ret = 0; + + iter = mailbox_list_iter_init(ctx->storage->storage.list, "*", + MAILBOX_LIST_ITER_RETURN_NO_FLAGS); + while ((info = mailbox_list_iter_next(iter)) != NULL) { + if ((info->flags & (MAILBOX_NONEXISTENT | + MAILBOX_NOSELECT)) == 0) { + if (rebuild_mailbox(ctx, info->name) < 0) { + ret = -1; + break; + } + } + } + if (mailbox_list_iter_deinit(&iter) < 0) + ret = -1; + return ret; +} + +static int rebuild_msg_mailbox_commit(struct rebuild_msg_mailbox *msg) +{ + if (mail_index_sync_commit(&msg->sync_ctx) < 0) + return -1; + (void)mailbox_close(&msg->box); + memset(msg, 0, sizeof(*msg)); + return 0; +} + +static int rebuild_restore_msg(struct dbox_storage_rebuild_context *ctx, + struct dbox_rebuild_msg *msg) +{ + struct mail_storage *storage = &ctx->storage->storage; + struct dbox_file *file; + const struct mail_index_header *hdr; + struct dbox_mail_index_record dbox_rec; + const char *mailbox = NULL; + struct mailbox *box; + struct dbox_mailbox *mbox; + enum mail_error error; + bool expunged, created; + uoff_t size; + int ret; + uint32_t seq; + + /* first see if message contains the mailbox it was originally + saved to */ + file = dbox_file_init_multi(ctx->storage, msg->file_id); + ret = dbox_file_get_mail_stream(file, msg->offset, &size, NULL, + &expunged); + if (ret > 0 && !expunged && dbox_file_metadata_read(file) == 0) { + mailbox = dbox_file_metadata_get(file, + DBOX_METADATA_ORIG_MAILBOX); + } + dbox_file_unref(&file); + if (ret <= 0 || expunged) { + if (ret < 0) + return -1; + /* we shouldn't get here, so apparently we couldn't fix + something. just ignore the mail.. */ + return 0; + } + + if (mailbox == NULL) + mailbox = "INBOX"; + + /* we have the destination mailbox. now open it and add the message + there. */ + created = FALSE; + box = ctx->prev_msg.box != NULL && + strcmp(mailbox, ctx->prev_msg.box->name) == 0 ? + ctx->prev_msg.box : NULL; + while (box == NULL) { + box = dbox_mailbox_open(storage, mailbox, NULL, + MAILBOX_OPEN_READONLY | + MAILBOX_OPEN_KEEP_RECENT | + MAILBOX_OPEN_IGNORE_ACLS); + if (box != NULL) + break; + + mail_storage_get_last_error(storage, &error); + if (error == MAIL_ERROR_TEMP) + return -1; + + if (error == MAIL_ERROR_NOTFOUND && !created) { + /* mailbox doesn't exist currently? see if creating + it helps. */ + created = TRUE; + (void)mail_storage_mailbox_create(storage, mailbox, + FALSE); + } else if (strcmp(mailbox, "INBOX") != 0) { + /* see if we can save to INBOX instead. */ + mailbox = "INBOX"; + } else { + /* this shouldn't happen */ + return -1; + } + } + mbox = (struct dbox_mailbox *)box; + + /* switch the mailbox cache if necessary */ + if (box != ctx->prev_msg.box && ctx->prev_msg.box != NULL) { + if (rebuild_msg_mailbox_commit(&ctx->prev_msg) < 0) + return -1; + } + if (ctx->prev_msg.box == NULL) { + ret = mail_index_sync_begin(mbox->ibox.index, + &ctx->prev_msg.sync_ctx, + &ctx->prev_msg.view, + &ctx->prev_msg.trans, 0); + if (ret <= 0) { + i_assert(ret != 0); + mail_storage_set_index_error(&mbox->ibox); + (void)mailbox_close(&box); + return -1; + } + ctx->prev_msg.box = box; + hdr = mail_index_get_header(ctx->prev_msg.view); + ctx->prev_msg.next_uid = hdr->next_uid; + } + + /* add the new message */ + memset(&dbox_rec, 0, sizeof(dbox_rec)); + dbox_rec.map_uid = msg->map_uid; + mail_index_append(ctx->prev_msg.trans, ctx->prev_msg.next_uid++, &seq); + mail_index_update_ext(ctx->prev_msg.trans, seq, mbox->dbox_ext_id, + &dbox_rec, NULL); + + msg->refcount++; + return 0; +} + +static int rebuild_handle_zero_refs(struct dbox_storage_rebuild_context *ctx) +{ + struct dbox_rebuild_msg **msgs; + unsigned int i, count; + + /* if we have messages at this point which have refcount=0, they're + either already expunged or they were somehow lost for some reason. + we'll need to figure out what to do about them. */ + msgs = array_get_modifiable(&ctx->msgs, &count); + for (i = 0; i < count; i++) { + if (msgs[i]->refcount != 0) + continue; + + if (msgs[i]->seen_zero_ref_in_map) { + /* we've seen the map record, trust it. */ + continue; + } + /* either map record was lost for this message or the message + was lost from its mailbox. safest way to handle this is to + restore the message. */ + if (rebuild_restore_msg(ctx, msgs[i]) < 0) + return -1; + } + if (ctx->prev_msg.box != NULL) { + if (rebuild_msg_mailbox_commit(&ctx->prev_msg) < 0) + return -1; + } + return 0; +} + +static void rebuild_update_refcounts(struct dbox_storage_rebuild_context *ctx) +{ + const struct mail_index_header *hdr; + const void *data; + struct dbox_rebuild_msg **msgs; + const uint16_t *ref16_p; + bool expunged; + uint32_t seq, map_uid; + unsigned int i, count; + + /* update refcounts for existing map records */ + msgs = array_get_modifiable(&ctx->msgs, &count); + hdr = mail_index_get_header(ctx->sync_view); + for (seq = 1, i = 0; seq <= hdr->messages_count && i < count; seq++) { + mail_index_lookup_uid(ctx->sync_view, seq, &map_uid); + if (map_uid != msgs[i]->map_uid) { + /* we've already expunged this map record */ + i_assert(map_uid < msgs[i]->map_uid); + continue; + } + + mail_index_lookup_ext(ctx->sync_view, seq, + ctx->storage->map->ref_ext_id, + &data, &expunged); + ref16_p = data; + if (ref16_p == NULL || *ref16_p != msgs[i]->refcount) { + mail_index_update_ext(ctx->trans, seq, + ctx->storage->map->ref_ext_id, + &msgs[i]->refcount, NULL); + } + i++; + } + + /* update refcounts for newly created map records */ + for (; i < count; i++, seq++) { + mail_index_update_ext(ctx->trans, seq, + ctx->storage->map->ref_ext_id, + &msgs[i]->refcount, NULL); + } +} + +static int rebuild_finish(struct dbox_storage_rebuild_context *ctx) +{ + if (rebuild_handle_zero_refs(ctx) < 0) + return -1; + rebuild_update_refcounts(ctx); + return 0; +} + +static int dbox_storage_rebuild_scan(struct dbox_storage_rebuild_context *ctx) +{ + DIR *dir; + struct dirent *d; + string_t *path; + unsigned int dir_len; + int ret = 0; + + /* begin by locking the map, so that other processes can't try to + rebuild at the same time. */ + ret = mail_index_sync_begin(ctx->storage->map->index, &ctx->sync_ctx, + &ctx->sync_view, &ctx->trans, 0); + if (ret <= 0) { + i_assert(ret != 0); + mail_storage_set_internal_error(&ctx->storage->storage); + mail_index_reset_error(ctx->storage->map->index); + return -1; + } + + dir = opendir(ctx->storage->storage_dir); + if (dir == NULL) { + mail_storage_set_critical(&ctx->storage->storage, + "opendir(%s) failed: %m", ctx->storage->storage_dir); + return -1; + } + path = t_str_new(256); + str_append(path, ctx->storage->storage_dir); + str_append_c(path, '/'); + dir_len = str_len(path); + + for (errno = 0; (d = readdir(dir)) != NULL; errno = 0) { + if (strncmp(d->d_name, DBOX_MAIL_FILE_MULTI_PREFIX, + strlen(DBOX_MAIL_FILE_MULTI_PREFIX)) == 0) { + str_truncate(path, dir_len); + str_append(path, d->d_name); + T_BEGIN { + ret = rebuild_add_file(ctx, d->d_name); + } T_END; + if (ret < 0) { + ret = -1; + break; + } + } + } + if (ret == 0 && errno != 0) { + mail_storage_set_critical(&ctx->storage->storage, + "readdir(%s) failed: %m", ctx->storage->storage_dir); + ret = -1; + } + if (closedir(dir) < 0) { + mail_storage_set_critical(&ctx->storage->storage, + "closedir(%s) failed: %m", ctx->storage->storage_dir); + ret = -1; + } + + if (ret < 0 || + rebuild_apply_map(ctx) < 0 || + rebuild_mailboxes(ctx) < 0 || + rebuild_finish(ctx) < 0 || + mail_index_sync_commit(&ctx->sync_ctx) < 0) + return -1; + return 0; +} + +int dbox_storage_rebuild(struct dbox_storage *storage) +{ + struct dbox_storage_rebuild_context *ctx; + struct stat st; + int ret; + + if (stat(storage->storage_dir, &st) < 0) { + if (errno == ENOENT) { + /* no multi-dbox files */ + return 0; + } + + mail_storage_set_critical(&storage->storage, + "stat(%s) failed: %m", storage->storage_dir); + return -1; + } + + ctx = dbox_storage_rebuild_init(storage); + ret = dbox_storage_rebuild_scan(ctx); + dbox_storage_rebuild_deinit(ctx); + + if (ret == 0) + storage->sync_rebuild = FALSE; + return ret; +} diff --git a/src/lib-storage/index/dbox/dbox-storage-rebuild.h b/src/lib-storage/index/dbox/dbox-storage-rebuild.h new file mode 100644 index 0000000000..7ace7c70f1 --- /dev/null +++ b/src/lib-storage/index/dbox/dbox-storage-rebuild.h @@ -0,0 +1,6 @@ +#ifndef DBOX_STORAGE_REBUILD_H +#define DBOX_STORAGE_REBUILD_H + +int dbox_storage_rebuild(struct dbox_storage *storage); + +#endif diff --git a/src/lib-storage/index/dbox/dbox-sync-rebuild.c b/src/lib-storage/index/dbox/dbox-sync-rebuild.c index 91c113feaa..bf3925c638 100644 --- a/src/lib-storage/index/dbox/dbox-sync-rebuild.c +++ b/src/lib-storage/index/dbox/dbox-sync-rebuild.c @@ -21,6 +21,9 @@ struct dbox_sync_rebuild_context { uint32_t cache_ext_id; uint32_t cache_reset_id; + struct mail_index *backup_index; + struct mail_index_view *backup_view; + struct maildir_uidlist_sync_ctx *maildir_sync_ctx; struct maildir_keywords *mk; struct maildir_keywords_sync_ctx *maildir_sync_keywords; @@ -57,6 +60,7 @@ static void dbox_sync_set_uidvalidity(struct dbox_sync_rebuild_context *ctx) static void dbox_sync_index_copy_cache(struct dbox_sync_rebuild_context *ctx, + struct mail_index_view *view, uint32_t old_seq, uint32_t new_seq) { struct mail_index_map *map; @@ -67,12 +71,12 @@ dbox_sync_index_copy_cache(struct dbox_sync_rebuild_context *ctx, if (ctx->cache_ext_id == (uint32_t)-1) return; - mail_index_lookup_ext_full(ctx->view, old_seq, ctx->cache_ext_id, + mail_index_lookup_ext_full(view, old_seq, ctx->cache_ext_id, &map, &data, &expunged); if (expunged) return; - if (!mail_index_ext_get_reset_id(ctx->view, map, ctx->cache_ext_id, + if (!mail_index_ext_get_reset_id(view, map, ctx->cache_ext_id, &reset_id) || reset_id == 0) return; @@ -91,26 +95,27 @@ dbox_sync_index_copy_cache(struct dbox_sync_rebuild_context *ctx, static void dbox_sync_index_copy_from_old(struct dbox_sync_rebuild_context *ctx, + struct mail_index_view *view, uint32_t old_seq, uint32_t new_seq) { - struct mail_index *index = mail_index_view_get_index(ctx->view); + struct mail_index *index = mail_index_view_get_index(view); const struct mail_index_record *rec; ARRAY_TYPE(keyword_indexes) old_keywords; struct mail_keywords *kw; /* copy flags */ - rec = mail_index_lookup(ctx->view, old_seq); + rec = mail_index_lookup(view, old_seq); mail_index_update_flags(ctx->trans, new_seq, MODIFY_REPLACE, rec->flags); /* copy keywords */ t_array_init(&old_keywords, 32); - mail_index_lookup_keywords(ctx->view, old_seq, &old_keywords); + mail_index_lookup_keywords(view, old_seq, &old_keywords); kw = mail_index_keywords_create_from_indexes(index, &old_keywords); mail_index_update_keywords(ctx->trans, new_seq, MODIFY_REPLACE, kw); mail_index_keywords_free(&kw); - dbox_sync_index_copy_cache(ctx, old_seq, new_seq); + dbox_sync_index_copy_cache(ctx, view, old_seq, new_seq); } static void @@ -132,66 +137,64 @@ dbox_sync_index_copy_from_maildir(struct dbox_sync_rebuild_context *ctx, mail_index_keywords_free(&keywords); } -static void -dbox_sync_index_metadata(struct dbox_sync_rebuild_context *ctx, - struct dbox_file *file, uint32_t seq, uint32_t uid) +void dbox_sync_rebuild_index_metadata(struct dbox_sync_rebuild_context *ctx, + struct dbox_file *file, + uint32_t new_seq, uint32_t uid) { uint32_t old_seq; if (mail_index_lookup_seq(ctx->view, uid, &old_seq)) { /* the message exists in the old index. copy the metadata from it. */ - dbox_sync_index_copy_from_old(ctx, old_seq, seq); - } else if (file->maildir_file) { + dbox_sync_index_copy_from_old(ctx, ctx->view, old_seq, new_seq); + } else if (ctx->backup_view != NULL && + mail_index_lookup_seq(ctx->backup_view, uid, &old_seq)) { + /* copy the metadata from backup index. */ + dbox_sync_index_copy_from_old(ctx, ctx->backup_view, + old_seq, new_seq); + } else if (file != NULL && file->maildir_file) { /* we're probably doing initial sync after migration from maildir. preserve the old flags. */ - dbox_sync_index_copy_from_maildir(ctx, file, seq); + dbox_sync_index_copy_from_maildir(ctx, file, new_seq); } } -static int dbox_sync_index_file_next(struct dbox_sync_rebuild_context *ctx, - struct dbox_file *file, uoff_t *offset) +static int dbox_sync_add_file_index(struct dbox_sync_rebuild_context *ctx, + struct dbox_file *file) { uint32_t seq; - bool last; + uoff_t size; + bool expunged; int ret; - ret = dbox_file_seek_next(file, offset, &last); + ret = dbox_file_get_mail_stream(file, 0, &size, NULL, &expunged); if (ret <= 0) { if (ret < 0) return -1; - i_warning("%s: Ignoring broken file (header)", - file->current_path); + i_warning("dbox: Ignoring broken file: %s", file->current_path); return 0; } - - ret = dbox_file_metadata_read(file); - if (ret <= 0) { - if (ret < 0) - return -1; - i_warning("%s: Ignoring broken file (metadata)", - file->current_path); + if (expunged) { + /* the file just got deleted? */ return 0; } - /* FIXME: file->uid doesn't work for multi files */ mail_index_append(ctx->trans, file->uid, &seq); - dbox_sync_index_metadata(ctx, file, seq, file->uid); - return 1; + dbox_sync_rebuild_index_metadata(ctx, file, seq, file->uid); + return 0; } static int -dbox_sync_index_uid_file(struct dbox_sync_rebuild_context *ctx, - const char *dir, const char *fname) +dbox_sync_add_uid_file(struct dbox_sync_rebuild_context *ctx, + const char *dir, const char *fname) { struct dbox_file *file; unsigned long uid; char *p; - uoff_t offset = 0; int ret; - fname += sizeof(DBOX_MAIL_FILE_MULTI_PREFIX)-1; + fname += sizeof(DBOX_MAIL_FILE_UID_PREFIX)-1; uid = strtoul(fname, &p, 10); if (*p != '\0' || uid == 0 || uid >= (uint32_t)-1) { i_warning("dbox %s: Ignoring invalid filename %s", @@ -205,22 +208,14 @@ dbox_sync_index_uid_file(struct dbox_sync_rebuild_context *ctx, file = dbox_file_init_single(ctx->mbox, uid); file->current_path = i_strdup_printf("%s/%s", dir, fname); - ret = dbox_sync_index_file_next(ctx, file, &offset) < 0 ? -1 : 0; + ret = dbox_sync_add_file_index(ctx, file); dbox_file_unref(&file); return ret; } static int -dbox_sync_index_multi_file(struct dbox_sync_rebuild_context *ctx, - const char *dir, const char *fname) -{ - /* FIXME */ - return 0; -} - -static int -dbox_sync_index_maildir_file(struct dbox_sync_rebuild_context *ctx, - const char *fname) +dbox_sync_add_maildir_file(struct dbox_sync_rebuild_context *ctx, + const char *fname) { int ret; @@ -252,19 +247,15 @@ dbox_sync_index_maildir_file(struct dbox_sync_rebuild_context *ctx, } static int -dbox_sync_index_file(struct dbox_sync_rebuild_context *ctx, - const char *path, const char *fname, bool primary) +dbox_sync_add_file(struct dbox_sync_rebuild_context *ctx, + const char *path, const char *fname, bool primary) { if (strncmp(fname, DBOX_MAIL_FILE_UID_PREFIX, sizeof(DBOX_MAIL_FILE_UID_PREFIX)-1) == 0) - return dbox_sync_index_uid_file(ctx, path, fname); - - if (strncmp(fname, DBOX_MAIL_FILE_MULTI_PREFIX, - sizeof(DBOX_MAIL_FILE_MULTI_PREFIX)-1) == 0) - return dbox_sync_index_multi_file(ctx, path, fname); + return dbox_sync_add_uid_file(ctx, path, fname); if (primary && strstr(fname, ":2,") != NULL) - return dbox_sync_index_maildir_file(ctx, fname); + return dbox_sync_add_maildir_file(ctx, fname); return 0; } @@ -296,8 +287,7 @@ static int dbox_sync_index_rebuild_dir(struct dbox_sync_rebuild_context *ctx, break; T_BEGIN { - ret = dbox_sync_index_file(ctx, path, d->d_name, - primary); + ret = dbox_sync_add_file(ctx, path, d->d_name, primary); } T_END; } while (ret >= 0); if (errno != 0) { @@ -323,7 +313,6 @@ static int dbox_sync_maildir_finish(struct dbox_sync_rebuild_context *ctx) const char *fname; enum maildir_uidlist_rec_flag flags; uint32_t uid, next_uid; - uoff_t offset; int ret = 0; if (ctx->maildir_sync_ctx == NULL) @@ -354,8 +343,7 @@ static int dbox_sync_maildir_finish(struct dbox_sync_rebuild_context *ctx) file->current_path = i_strdup_printf("%s/%s", ctx->mbox->path, fname); - offset = 0; - ret = dbox_sync_index_file_next(ctx, file, &offset); + ret = dbox_sync_add_file_index(ctx, file); dbox_file_unref(&file); if (ret < 0) break; @@ -389,53 +377,106 @@ static void dbox_sync_update_header(struct dbox_sync_rebuild_context *ctx) &new_hdr, sizeof(new_hdr)); } -static int dbox_sync_index_rebuild_ctx(struct dbox_sync_rebuild_context *ctx) +struct dbox_sync_rebuild_context * +dbox_sync_index_rebuild_init(struct dbox_mailbox *mbox, + struct mail_index_view *view, + struct mail_index_transaction *trans) { + struct mailbox *box = &mbox->ibox.box; + struct dbox_sync_rebuild_context *ctx; + const char *index_dir; + enum mail_index_open_flags open_flags = MAIL_INDEX_OPEN_FLAG_READONLY; + + ctx = i_new(struct dbox_sync_rebuild_context, 1); + ctx->mbox = mbox; + ctx->view = view; + ctx->trans = trans; + mail_index_reset(ctx->trans); + index_mailbox_reset_uidvalidity(&mbox->ibox); + mail_index_ext_lookup(mbox->ibox.index, "cache", &ctx->cache_ext_id); + + /* if backup index file exists, try to use it */ + index_dir = mailbox_list_get_path(box->storage->list, box->name, + MAILBOX_LIST_PATH_TYPE_INDEX); + ctx->backup_index = + mail_index_alloc(index_dir, DBOX_INDEX_PREFIX".backup"); + +#ifndef MMAP_CONFLICTS_WRITE + if ((box->storage->flags & MAIL_STORAGE_FLAG_MMAP_DISABLE) != 0) +#endif + open_flags |= MAIL_INDEX_OPEN_FLAG_MMAP_DISABLE; + if (mail_index_open(ctx->backup_index, open_flags, + box->storage->lock_method) < 0) + mail_index_free(&ctx->backup_index); + else + ctx->backup_view = mail_index_view_open(ctx->backup_index); + return ctx; +} + +int dbox_sync_index_rebuild_singles(struct dbox_sync_rebuild_context *ctx) +{ + int ret = 0; + dbox_sync_set_uidvalidity(ctx); if (dbox_sync_index_rebuild_dir(ctx, ctx->mbox->path, TRUE) < 0) - return -1; - - if (ctx->mbox->alt_path != NULL) { + ret = -1; + else if (ctx->mbox->alt_path != NULL) { if (dbox_sync_index_rebuild_dir(ctx, ctx->mbox->alt_path, FALSE) < 0) - return -1; + ret = -1; } - if (dbox_sync_maildir_finish(ctx) < 0) - return -1; + if (ret == 0) { + if (dbox_sync_maildir_finish(ctx) < 0) + ret = -1; + } + + if (ctx->maildir_sync_ctx != NULL) { + if (maildir_uidlist_sync_deinit(&ctx->maildir_sync_ctx) < 0) + ret = -1; + } + if (ctx->maildir_sync_keywords != NULL) + maildir_keywords_sync_deinit(&ctx->maildir_sync_keywords); + if (ctx->mk != NULL) + maildir_keywords_deinit(&ctx->mk); + return ret; +} + +void dbox_sync_index_rebuild_deinit(struct dbox_sync_rebuild_context **_ctx) +{ + struct dbox_sync_rebuild_context *ctx = *_ctx; + + *_ctx = NULL; + if (ctx->backup_index != NULL) { + mail_index_view_close(&ctx->backup_view); + mail_index_free(&ctx->backup_index); + } dbox_sync_update_header(ctx); - return 0; + i_free(ctx); } int dbox_sync_index_rebuild(struct dbox_mailbox *mbox) { - struct dbox_sync_rebuild_context ctx; + struct dbox_sync_rebuild_context *ctx; + struct mail_index_view *view; + struct mail_index_transaction *trans; int ret; - memset(&ctx, 0, sizeof(ctx)); - ctx.mbox = mbox; - ctx.view = mail_index_view_open(mbox->ibox.index); - ctx.trans = mail_index_transaction_begin(ctx.view, + view = mail_index_view_open(mbox->ibox.index); + trans = mail_index_transaction_begin(view, MAIL_INDEX_TRANSACTION_FLAG_EXTERNAL); - mail_index_reset(ctx.trans); - index_mailbox_reset_uidvalidity(&mbox->ibox); - mail_index_ext_lookup(mbox->ibox.index, "cache", &ctx.cache_ext_id); - if ((ret = dbox_sync_index_rebuild_ctx(&ctx)) < 0) - mail_index_transaction_rollback(&ctx.trans); + ctx = dbox_sync_index_rebuild_init(mbox, view, trans); + ret = dbox_sync_index_rebuild_singles(ctx); + dbox_sync_index_rebuild_deinit(&ctx); + + if (ret < 0) + mail_index_transaction_rollback(&trans); else - ret = mail_index_transaction_commit(&ctx.trans); - mail_index_view_close(&ctx.view); + ret = mail_index_transaction_commit(&trans); + mail_index_view_close(&view); - if (ctx.maildir_sync_ctx != NULL) { - if (maildir_uidlist_sync_deinit(&ctx.maildir_sync_ctx) < 0) - ret = -1; - } - if (ctx.maildir_sync_keywords != NULL) - maildir_keywords_sync_deinit(&ctx.maildir_sync_keywords); - if (ctx.mk != NULL) - maildir_keywords_deinit(&ctx.mk); if (ret == 0) - mbox->storage->sync_rebuild = FALSE; + ctx->mbox->storage->sync_rebuild = FALSE; return ret; } diff --git a/src/lib-storage/index/dbox/dbox-sync.c b/src/lib-storage/index/dbox/dbox-sync.c index bbb9942da3..b86c6e4c76 100644 --- a/src/lib-storage/index/dbox/dbox-sync.c +++ b/src/lib-storage/index/dbox/dbox-sync.c @@ -6,6 +6,7 @@ #include "str.h" #include "hash.h" #include "dbox-storage.h" +#include "dbox-storage-rebuild.h" #include "dbox-map.h" #include "dbox-file.h" #include "dbox-sync.h" @@ -211,10 +212,16 @@ int dbox_sync_begin(struct dbox_mailbox *mbox, bool force, enum mail_index_sync_flags sync_flags = 0; unsigned int i; int ret; - bool rebuild; + bool rebuild, storage_rebuilt = FALSE; rebuild = dbox_refresh_header(mbox) < 0; + if (mbox->storage->sync_rebuild) { + if (dbox_storage_rebuild(mbox->storage) < 0) + return -1; + storage_rebuilt = TRUE; + } + ctx = i_new(struct dbox_sync_context, 1); ctx->mbox = mbox; @@ -238,13 +245,16 @@ int dbox_sync_begin(struct dbox_mailbox *mbox, bool force, return ret; } - if (rebuild && dbox_refresh_header(mbox) == 0) { - /* another process rebuilt it already */ - rebuild = FALSE; - } - if (rebuild) { + /* now that we're locked, check again if we want to rebuild */ + if (dbox_refresh_header(mbox) < 0) { + if (!storage_rebuilt) { + /* we'll need to rebuild storage too. + try again from the beginning. */ + mail_index_sync_rollback(&ctx->index_sync_ctx); + i_free(ctx); + return dbox_sync_begin(mbox, force, ctx_r); + } ret = 0; - rebuild = FALSE; } else { if ((ret = dbox_sync_index(ctx)) > 0) break; diff --git a/src/lib-storage/index/dbox/dbox-sync.h b/src/lib-storage/index/dbox/dbox-sync.h index b8e40a14b3..811ad03df5 100644 --- a/src/lib-storage/index/dbox/dbox-sync.h +++ b/src/lib-storage/index/dbox/dbox-sync.h @@ -35,6 +35,17 @@ void dbox_sync_cleanup(struct dbox_storage *storage); int dbox_sync_file(struct dbox_sync_context *ctx, const struct dbox_sync_file_entry *entry); int dbox_sync_file_cleanup(struct dbox_file *file); + +struct dbox_sync_rebuild_context * +dbox_sync_index_rebuild_init(struct dbox_mailbox *mbox, + struct mail_index_view *view, + struct mail_index_transaction *trans); +int dbox_sync_index_rebuild_singles(struct dbox_sync_rebuild_context *ctx); +void dbox_sync_rebuild_index_metadata(struct dbox_sync_rebuild_context *ctx, + struct dbox_file *file, + uint32_t new_seq, uint32_t uid); +void dbox_sync_index_rebuild_deinit(struct dbox_sync_rebuild_context **ctx); + int dbox_sync_index_rebuild(struct dbox_mailbox *mbox); struct mailbox_sync_context *