From: Timo Sirainen Date: Tue, 31 Mar 2009 23:35:11 +0000 (-0400) Subject: dbox: If we find a broken dbox file, do the best we can to save the mails in there. X-Git-Tag: 2.0.alpha1~1038^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4a9055eff4c0d5b0f98bc1789951155ffe10b265;p=thirdparty%2Fdovecot%2Fcore.git dbox: If we find a broken dbox file, do the best we can to save the mails in there. --HG-- branch : HEAD --- diff --git a/src/lib-storage/index/dbox/Makefile.am b/src/lib-storage/index/dbox/Makefile.am index 9e129c4eac..1225bda5bd 100644 --- a/src/lib-storage/index/dbox/Makefile.am +++ b/src/lib-storage/index/dbox/Makefile.am @@ -10,6 +10,7 @@ AM_CPPFLAGS = \ libstorage_dbox_a_SOURCES = \ dbox-file.c \ + dbox-file-fix.c \ dbox-file-maildir.c \ dbox-mail.c \ dbox-map.c \ diff --git a/src/lib-storage/index/dbox/dbox-file-fix.c b/src/lib-storage/index/dbox/dbox-file-fix.c new file mode 100644 index 0000000000..864a3ce24b --- /dev/null +++ b/src/lib-storage/index/dbox/dbox-file-fix.c @@ -0,0 +1,340 @@ +/* Copyright (c) 2009 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "istream.h" +#include "ostream.h" +#include "str-find.h" +#include "hex-binary.h" +#include "message-size.h" +#include "dbox-storage.h" +#include "dbox-file.h" + +#include + +static int +dbox_file_find_next_magic(struct dbox_file *file, uoff_t *offset_r, bool *pre_r) +{ + struct istream *input = file->input; + struct str_find_context *pre_ctx, *post_ctx; + uoff_t orig_offset, pre_offset, post_offset; + const unsigned char *data; + size_t size; + int ret; + + *pre_r = FALSE; + + pre_ctx = str_find_init(default_pool, "\n"DBOX_MAGIC_PRE); + post_ctx = str_find_init(default_pool, DBOX_MAGIC_POST); + + /* \n isn't part of the DBOX_MAGIC_PRE, but it always preceds it. + assume that at this point we've already just read the \n. when + scanning for it later we'll need to find the \n though. */ + str_find_more(pre_ctx, (const unsigned char *)"\n", 1); + + orig_offset = input->v_offset; + while ((ret = i_stream_read_data(input, &data, &size, 0)) > 0) { + pre_offset = (uoff_t)-1; + post_offset = (uoff_t)-1; + if (str_find_more(pre_ctx, data, size)) { + pre_offset = input->v_offset + + str_find_get_match_end_pos(pre_ctx) - + (strlen(DBOX_MAGIC_PRE) + 1); + *pre_r = TRUE; + } + if (str_find_more(post_ctx, data, size)) { + post_offset = input->v_offset + + str_find_get_match_end_pos(post_ctx) - + strlen(DBOX_MAGIC_POST); + if (pre_offset == (uoff_t)-1 || + post_offset < pre_offset) { + pre_offset = post_offset; + *pre_r = FALSE; + } + } + + if (pre_offset != (uoff_t)-1) { + if (*pre_r) { + /* LF isn't part of the magic */ + pre_offset++; + } + *offset_r = pre_offset; + break; + } + i_stream_skip(input, size); + } + if (ret <= 0) { + i_assert(ret == -1); + if (input->stream_errno != 0) + dbox_file_set_syscall_error(file, "read()"); + else { + ret = 0; + *offset_r = input->v_offset; + } + } + i_stream_seek(input, orig_offset); + str_find_deinit(&pre_ctx); + str_find_deinit(&post_ctx); + return ret; +} + +static int +stream_copy(struct dbox_file *file, struct ostream *output, + const char *path, uoff_t count) +{ + struct istream *input; + off_t bytes; + + input = i_stream_create_limit(file->input, count); + bytes = o_stream_send_istream(output, input); + i_stream_unref(&input); + + if (bytes < 0) { + mail_storage_set_critical(&file->storage->storage, + "o_stream_send_istream(%s, %s) failed: %m", + file->current_path, path); + return -1; + } + if ((uoff_t)bytes != count) { + mail_storage_set_critical(&file->storage->storage, + "o_stream_send_istream(%s) copied only %" + PRIuUOFF_T" of %"PRIuUOFF_T" bytes", + path, bytes, count); + return -1; + } + return 0; +} + +static void dbox_file_skip_broken_header(struct dbox_file *file) +{ + const unsigned int magic_len = strlen(DBOX_MAGIC_PRE); + const unsigned char *data; + size_t i, size; + + /* if there's LF close to our position, assume that the header ends + there. */ + data = i_stream_get_data(file->input, &size); + if (size > file->msg_header_size + 16) + size = file->msg_header_size + 16; + for (i = 0; i < size; i++) { + if (data[i] == '\n') { + i_stream_skip(file->input, i); + return; + } + } + + /* skip at least the magic bytes if possible */ + if (size > magic_len && memcmp(data, DBOX_MAGIC_PRE, magic_len) == 0) + i_stream_skip(file->input, magic_len); +} + +static void +dbox_file_copy_metadata(struct dbox_file *file, struct ostream *output, + bool *have_guid_r) +{ + const char *line; + uoff_t prev_offset = file->input->v_offset; + + *have_guid_r = FALSE; + while ((line = i_stream_read_next_line(file->input)) != NULL) { + if (*line == DBOX_METADATA_OLDV1_SPACE || *line == '\0') { + /* end of metadata */ + return; + } + if (*line < 32) { + /* broken - possibly a new pre-magic block */ + i_stream_seek(file->input, prev_offset); + return; + } + if (*line == DBOX_METADATA_VIRTUAL_SIZE) { + /* it may be wrong - recreate it */ + continue; + } + if (*line == DBOX_METADATA_GUID) + *have_guid_r = TRUE; + o_stream_send_str(output, line); + o_stream_send_str(output, "\n"); + } +} + +static int +dbox_file_fix_write_stream(struct dbox_file *file, uoff_t start_offset, + const char *temp_path, struct ostream *output) +{ + struct dbox_message_header msg_hdr; + uoff_t offset, msg_size, hdr_offset, body_offset; + bool pre, write_header, have_guid; + struct message_size body; + struct istream *body_input; + uint8_t guid_128[16]; + int ret; + + i_stream_seek(file->input, 0); + if (start_offset > 0) { + /* copy the valid data */ + if (stream_copy(file, output, temp_path, start_offset) < 0) + return -1; + } else { + /* the file header is broken. recreate it */ + if (dbox_file_header_write(file, output) < 0) { + dbox_file_set_syscall_error(file, "write()"); + return -1; + } + } + + while ((ret = dbox_file_find_next_magic(file, &offset, &pre)) > 0) { + msg_size = offset - file->input->v_offset; + if (msg_size < 256 && pre) { + /* probably some garbage or some broken headers. + we most likely don't miss anything by skipping + over this data. */ + i_stream_skip(file->input, msg_size); + hdr_offset = file->input->v_offset; + ret = dbox_file_read_mail_header(file, &msg_size); + if (ret <= 0) { + if (ret < 0) + return -1; + dbox_file_skip_broken_header(file); + body_offset = file->input->v_offset; + msg_size = (uoff_t)-1; + } else { + i_stream_skip(file->input, + file->msg_header_size); + body_offset = file->input->v_offset; + i_stream_skip(file->input, msg_size); + } + + ret = dbox_file_find_next_magic(file, &offset, &pre); + if (ret <= 0) + break; + + if (!pre && msg_size == offset - body_offset) { + /* msg header ok, copy it */ + i_stream_seek(file->input, hdr_offset); + if (stream_copy(file, output, temp_path, + file->msg_header_size) < 0) + return -1; + write_header = FALSE; + } else { + /* msg header is broken. write our own. */ + i_stream_seek(file->input, body_offset); + if (msg_size != (uoff_t)-1) { + /* previous magic find might have + skipped too much. seek back and + make sure */ + ret = dbox_file_find_next_magic(file, &offset, &pre); + if (ret <= 0) + break; + } + + write_header = TRUE; + msg_size = offset - body_offset; + } + } else { + /* treat this data as a separate message. */ + write_header = TRUE; + body_offset = file->input->v_offset; + } + /* write msg header */ + if (write_header) { + dbox_msg_header_fill(&msg_hdr, msg_size); + (void)o_stream_send(output, &msg_hdr, sizeof(msg_hdr)); + } + /* write msg body */ + i_assert(file->input->v_offset == body_offset); + if (stream_copy(file, output, temp_path, msg_size) < 0) + return -1; + i_assert(file->input->v_offset == offset); + + /* get message body size */ + i_stream_seek(file->input, body_offset); + body_input = i_stream_create_limit(file->input, msg_size); + ret = message_get_body_size(body_input, &body, NULL); + i_stream_unref(&body_input); + if (ret < 0) { + errno = output->stream_errno; + mail_storage_set_critical(&file->storage->storage, + "read(%s) failed: %m", file->current_path); + return -1; + } + + /* write msg metadata. */ + i_assert(file->input->v_offset == offset); + ret = dbox_file_metadata_skip_header(file); + if (ret < 0) + return -1; + o_stream_send_str(output, DBOX_MAGIC_POST); + if (ret == 0) + have_guid = FALSE; + else + dbox_file_copy_metadata(file, output, &have_guid); + if (!have_guid) { + mail_generate_guid_128(guid_128); + o_stream_send_str(output, + t_strdup_printf("%c%s\n", DBOX_METADATA_GUID, + binary_to_hex(guid_128, sizeof(guid_128)))); + } + o_stream_send_str(output, + t_strdup_printf("%c%llx\n", DBOX_METADATA_VIRTUAL_SIZE, + (unsigned long long)body.virtual_size)); + o_stream_send_str(output, "\n"); + if (output->stream_errno != 0) { + errno = output->stream_errno; + mail_storage_set_critical(&file->storage->storage, + "write(%s) failed: %m", temp_path); + return -1; + } + } + return 0; +} + +int dbox_file_fix(struct dbox_file *file, uoff_t start_offset) +{ + struct ostream *output; + const char *temp_path; + char *temp_fname; + bool deleted; + int fd, ret; + + i_assert(file->input != NULL); + + temp_fname = dbox_generate_tmp_filename(); + temp_path = t_strdup_printf("%s/%s", file->storage->storage_dir, + temp_fname); + i_free(temp_fname); + + fd = dbox_create_fd(file->storage, temp_path); + if (fd == -1) + return -1; + + output = o_stream_create_fd_file(fd, 0, FALSE); + ret = dbox_file_fix_write_stream(file, start_offset, temp_path, output); + o_stream_unref(&output); + if (close(fd) < 0) { + mail_storage_set_critical(&file->storage->storage, + "close(%s) failed: %m", temp_path); + ret = -1; + } + if (ret < 0) { + if (unlink(temp_path) < 0) { + mail_storage_set_critical(&file->storage->storage, + "unlink(%s) failed: %m", temp_path); + } + return -1; + } + if (rename(temp_path, file->current_path) < 0) { + mail_storage_set_critical(&file->storage->storage, + "rename(%s, %s) failed: %m", + temp_path, file->current_path); + return -1; + } + + /* file was successfully recreated - reopen it */ + dbox_file_close(file); + if (dbox_file_open(file, &deleted) <= 0) { + mail_storage_set_critical(&file->storage->storage, + "dbox_file_fix(%s): reopening file failed", + file->current_path); + return -1; + } + return 0; +} diff --git a/src/lib-storage/index/dbox/dbox-file.c b/src/lib-storage/index/dbox/dbox-file.c index 4650b430a8..7e93895531 100644 --- a/src/lib-storage/index/dbox/dbox-file.c +++ b/src/lib-storage/index/dbox/dbox-file.c @@ -21,9 +21,7 @@ #include #include -static int dbox_file_metadata_skip_header(struct dbox_file *file); - -static char *dbox_generate_tmp_filename(void) +char *dbox_generate_tmp_filename(void) { static unsigned int create_count = 0; @@ -337,6 +335,7 @@ static int dbox_file_parse_header(struct dbox_file *file, const char *line) static int dbox_file_read_header(struct dbox_file *file) { const char *line; + unsigned int hdr_size; int ret; i_stream_seek(file->input, 0); @@ -351,10 +350,12 @@ static int dbox_file_read_header(struct dbox_file *file) dbox_file_set_syscall_error(file, "read()"); return -1; } - file->file_header_size = file->input->v_offset; + hdr_size = file->input->v_offset; T_BEGIN { ret = dbox_file_parse_header(file, line) < 0 ? 0 : 1; } T_END; + if (ret > 0) + file->file_header_size = hdr_size; return ret; } @@ -412,7 +413,7 @@ int dbox_file_open(struct dbox_file *file, bool *deleted_r) dbox_file_read_header(file); } -static int dbox_create_fd(struct dbox_storage *storage, const char *path) +int dbox_create_fd(struct dbox_storage *storage, const char *path) { mode_t old_mask; int fd; @@ -434,17 +435,10 @@ static int dbox_create_fd(struct dbox_storage *storage, const char *path) return fd; } -static int dbox_file_create(struct dbox_file *file) +int dbox_file_header_write(struct dbox_file *file, struct ostream *output) { string_t *hdr; - i_assert(file->fd == -1); - - file->fd = dbox_create_fd(file->storage, file->current_path); - if (file->fd == -1) - return -1; - file->output = o_stream_create_fd_file(file->fd, 0, FALSE); - hdr = t_str_new(128); str_printfa(hdr, "%u %c%x %c%x\n", DBOX_VERSION, DBOX_HEADER_MSG_HEADER_SIZE, @@ -453,8 +447,18 @@ static int dbox_file_create(struct dbox_file *file) file->file_header_size = str_len(hdr); file->msg_header_size = sizeof(struct dbox_message_header); + return o_stream_send(output, str_data(hdr), str_len(hdr)); +} - if (o_stream_send(file->output, str_data(hdr), str_len(hdr)) < 0) { +static int dbox_file_create(struct dbox_file *file) +{ + i_assert(file->fd == -1); + + file->fd = dbox_create_fd(file->storage, file->current_path); + if (file->fd == -1) + return -1; + file->output = o_stream_create_fd_file(file->fd, 0, FALSE); + if (dbox_file_header_write(file, file->output) < 0) { dbox_file_set_syscall_error(file, "write()"); return -1; } @@ -490,6 +494,7 @@ void dbox_file_close(struct dbox_file *file) dbox_file_set_syscall_error(file, "close()"); file->fd = -1; } + file->cur_offset = (uoff_t)-1; } int dbox_file_try_lock(struct dbox_file *file) @@ -525,8 +530,7 @@ void dbox_file_unlock(struct dbox_file *file) i_stream_sync(file->input); } -static int -dbox_file_read_mail_header(struct dbox_file *file, uoff_t *physical_size_r) +int dbox_file_read_mail_header(struct dbox_file *file, uoff_t *physical_size_r) { struct dbox_message_header hdr; struct stat st; @@ -556,11 +560,6 @@ dbox_file_read_mail_header(struct dbox_file *file, uoff_t *physical_size_r) dbox_file_set_syscall_error(file, "read()"); return -1; } - if (data[file->msg_header_size-1] != '\n') { - dbox_file_set_corrupted(file, "msg header doesn't end with LF"); - return 0; - } - memcpy(&hdr, data, I_MIN(sizeof(hdr), file->msg_header_size)); if (memcmp(hdr.magic_pre, DBOX_MAGIC_PRE, sizeof(hdr.magic_pre)) != 0) { /* probably broken offset */ @@ -568,6 +567,11 @@ dbox_file_read_mail_header(struct dbox_file *file, uoff_t *physical_size_r) return 0; } + if (data[file->msg_header_size-1] != '\n') { + dbox_file_set_corrupted(file, "msg header doesn't end with LF"); + return 0; + } + *physical_size_r = hex2dec(hdr.message_size_hex, sizeof(hdr.message_size_hex)); return 1; @@ -577,6 +581,7 @@ int dbox_file_get_mail_stream(struct dbox_file *file, uoff_t offset, uoff_t *physical_size_r, struct istream **stream_r, bool *expunged_r) { + uoff_t size; int ret; *expunged_r = FALSE; @@ -591,11 +596,12 @@ int dbox_file_get_mail_stream(struct dbox_file *file, uoff_t offset, offset = file->file_header_size; if (offset != file->cur_offset) { - file->cur_offset = offset; i_stream_seek(file->input, offset); - ret = dbox_file_read_mail_header(file, &file->cur_physical_size); + ret = dbox_file_read_mail_header(file, &size); if (ret <= 0) return ret; + file->cur_offset = offset; + file->cur_physical_size = size; } i_stream_seek(file->input, offset + file->msg_header_size); if (stream_r != NULL) { @@ -627,6 +633,11 @@ dbox_file_seek_next_at_metadata(struct dbox_file *file, uoff_t *offset) return 1; } +void dbox_file_seek_rewind(struct dbox_file *file) +{ + file->cur_offset = (uoff_t)-1; +} + int dbox_file_seek_next(struct dbox_file *file, uoff_t *offset_r, bool *last_r) { uoff_t offset, size; @@ -640,8 +651,10 @@ int dbox_file_seek_next(struct dbox_file *file, uoff_t *offset_r, bool *last_r) } else { offset = file->cur_offset + file->msg_header_size + file->cur_physical_size; - if ((ret = dbox_file_seek_next_at_metadata(file, &offset)) <= 0) + if ((ret = dbox_file_seek_next_at_metadata(file, &offset)) <= 0) { + *offset_r = file->cur_offset; return ret; + } } *offset_r = offset; @@ -652,12 +665,9 @@ int dbox_file_seek_next(struct dbox_file *file, uoff_t *offset_r, bool *last_r) *last_r = FALSE; ret = dbox_file_get_mail_stream(file, offset, &size, NULL, &expunged); - if (ret <= 0) - return ret; - if (*offset_r == 0) *offset_r = file->file_header_size; - return 1; + return ret; } static int @@ -761,7 +771,7 @@ int dbox_file_flush_append(struct dbox_file *file) return 0; } -static int dbox_file_metadata_skip_header(struct dbox_file *file) +int dbox_file_metadata_skip_header(struct dbox_file *file) { struct dbox_metadata_header metadata_hdr; const unsigned char *data; diff --git a/src/lib-storage/index/dbox/dbox-file.h b/src/lib-storage/index/dbox/dbox-file.h index bc3475c45f..b36dc65b62 100644 --- a/src/lib-storage/index/dbox/dbox-file.h +++ b/src/lib-storage/index/dbox/dbox-file.h @@ -116,6 +116,7 @@ struct dbox_file { unsigned int alt_path:1; unsigned int maildir_file:1; unsigned int deleted:1; + unsigned int corrupted:1; }; struct dbox_file * @@ -150,6 +151,8 @@ void dbox_file_unlock(struct dbox_file *file); int dbox_file_get_mail_stream(struct dbox_file *file, uoff_t offset, uoff_t *physical_size_r, struct istream **stream_r, bool *expunged_r); +/* Start seeking at the beginning of the file. */ +void dbox_file_seek_rewind(struct dbox_file *file); /* Seek to next message after current one. If there are no more messages, returns 0 and last_r is set to TRUE. Returns 1 if ok, 0 if file is corrupted, -1 if I/O error. */ @@ -178,6 +181,10 @@ const char *dbox_file_metadata_get(struct dbox_file *file, /* Move the file to alt path or back. */ int dbox_file_move(struct dbox_file *file, bool alt_path); +/* Fix a broken dbox file by rename()ing over it with a fixed file. Everything + before start_offset is assumed to be valid and is simply copied. The file + is reopened afterwards. Returns 0 if ok, -1 if I/O error. */ +int dbox_file_fix(struct dbox_file *file, uoff_t start_offset); /* Fill dbox_message_header with given size. */ void dbox_msg_header_fill(struct dbox_message_header *dbox_msg_hdr, @@ -189,4 +196,11 @@ void dbox_file_set_syscall_error(struct dbox_file *file, const char *function); void dbox_file_set_corrupted(struct dbox_file *file, const char *reason, ...) ATTR_FORMAT(2, 3); +/* private: */ +char *dbox_generate_tmp_filename(void); +int dbox_create_fd(struct dbox_storage *storage, const char *path); +int dbox_file_header_write(struct dbox_file *file, struct ostream *output); +int dbox_file_read_mail_header(struct dbox_file *file, uoff_t *physical_size_r); +int dbox_file_metadata_skip_header(struct dbox_file *file); + #endif diff --git a/src/lib-storage/index/dbox/dbox-storage-rebuild.c b/src/lib-storage/index/dbox/dbox-storage-rebuild.c index b0abcba2ef..a64d78b8e8 100644 --- a/src/lib-storage/index/dbox/dbox-storage-rebuild.c +++ b/src/lib-storage/index/dbox/dbox-storage-rebuild.c @@ -134,8 +134,8 @@ static int rebuild_add_file(struct dbox_storage_rebuild_context *ctx, struct dbox_rebuild_msg *rec; uint32_t file_id; buffer_t *guid_buf; - uoff_t offset; - bool last; + uoff_t offset, prev_offset, size; + bool last, expunged, first, fixed = FALSE; int ret = 0; fname = strrchr(path, '/'); @@ -158,9 +158,38 @@ static int rebuild_add_file(struct dbox_storage_rebuild_context *ctx, DBOX_GUID_BIN_LEN); file = dbox_file_init_multi(ctx->storage, file_id); - while ((ret = dbox_file_seek_next(file, &offset, &last)) > 0) { - if ((ret = dbox_file_metadata_read(file)) <= 0) - break; + prev_offset = 0; + dbox_file_seek_rewind(file); + while ((ret = dbox_file_seek_next(file, &offset, &last)) >= 0) { + if (ret > 0) { + if ((ret = dbox_file_metadata_read(file)) < 0) + break; + } + + if (ret == 0) { + /* file is corrupted. fix it and retry. */ + if (fixed || last) + break; + first = prev_offset == 0; + if (prev_offset == 0) { + /* use existing file header if it was ok */ + prev_offset = offset; + } + if (dbox_file_fix(file, prev_offset) < 0) { + ret = -1; + break; + } + fixed = TRUE; + if (!first) { + /* seek to the offset where we last left off */ + ret = dbox_file_get_mail_stream(file, + prev_offset, &size, NULL, &expunged); + if (ret <= 0) + break; + } + continue; + } + prev_offset = offset; guid = dbox_file_metadata_get(file, DBOX_METADATA_GUID); if (guid == NULL) { @@ -193,9 +222,8 @@ static int rebuild_add_file(struct dbox_storage_rebuild_context *ctx, hash_table_insert(ctx->guid_hash, rec->guid_128, rec); } } - if (ret == 0) { - /* FIXME: file is corrupted. should we try to fix it? */ - } + if (ret == 0 && !last) + i_error("dbox rebuild: Failed to fix file %s", path); dbox_file_unref(&file); return ret < 0 ? -1 : 0; }