]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
Complain if file isn't in mbox format. Complain if From-line wasn't found
authorTimo Sirainen <tss@iki.fi>
Wed, 16 Jun 2004 05:38:23 +0000 (08:38 +0300)
committerTimo Sirainen <tss@iki.fi>
Wed, 16 Jun 2004 05:38:23 +0000 (08:38 +0300)
from expected location. Parser should handle now correctly any kind of mbox
no matter how corrupted.

--HG--
branch : HEAD

src/lib-storage/index/mbox/istream-raw-mbox.c
src/lib-storage/index/mbox/istream-raw-mbox.h
src/lib-storage/index/mbox/mbox-mail.c
src/lib-storage/index/mbox/mbox-sync-rewrite.c
src/lib-storage/index/mbox/mbox-sync-update.c
src/lib-storage/index/mbox/mbox-sync.c

index ed6837009d71b732b446aa35ee2172d43e427a16..d072997383960905f1c6b0575d8bd053942a30ff 100644 (file)
@@ -14,6 +14,11 @@ struct raw_mbox_istream {
 
        uoff_t from_offset, hdr_offset, body_offset, mail_size;
        struct istream *input;
+       uoff_t input_peak_offset;
+
+       unsigned int corrupted:1;
+       unsigned int eom:1;
+       unsigned int next_eof:1;
 };
 
 static void _close(struct _iostream *stream __attr_unused__)
@@ -93,6 +98,7 @@ static int mbox_read_from_line(struct raw_mbox_istream *rstream)
 
        /* we'll skip over From-line */
        rstream->istream.istream.v_offset += line_pos+1;
+       i_stream_skip(rstream->input, line_pos+1);
        rstream->hdr_offset = rstream->istream.istream.v_offset;
        return 0;
 }
@@ -105,8 +111,19 @@ static ssize_t _read(struct _istream *stream)
        const char *fromp;
        char *sender, eoh_char;
        time_t received_time;
-       size_t i, pos, new_pos;
-       ssize_t ret;
+       size_t i, pos, new_pos, from_start_pos;
+       ssize_t ret = 0;
+
+       if (rstream->eom) {
+               if (rstream->body_offset == (uoff_t)-1) {
+                       /* missing \n from headers */
+                       rstream->body_offset =
+                               stream->istream.v_offset +
+                               (stream->pos - stream->skip);
+               }
+               stream->istream.eof = rstream->next_eof;
+               return -1;
+       }
 
        i_stream_seek(rstream->input, stream->istream.v_offset);
 
@@ -117,90 +134,90 @@ static ssize_t _read(struct _istream *stream)
        do {
                ret = i_stream_read(rstream->input);
                buf = i_stream_get_data(rstream->input, &pos);
-       } while (ret > 0 && pos <= 6);
+       } while (ret > 0 &&
+                stream->istream.v_offset + pos <= rstream->input_peak_offset);
 
-       if (pos == 0 || (pos == 1 && buf[0] == '\n')) {
-               /* EOF */
-               stream->pos = 0;
-               stream->istream.eof = TRUE;
-               rstream->mail_size = stream->istream.v_offset -
+       if (ret < 0) {
+               if (ret == -2)
+                       return -2;
+
+               /* we've read the whole file, final byte should be
+                  the \n trailer */
+               if (pos > 0 && buf[pos-1] == '\n')
+                       pos--;
+
+               i_assert(pos >= stream->pos);
+               ret = pos == stream->pos ? -1 :
+                       (ssize_t)(pos - stream->pos);
+
+               stream->buffer = buf;
+               stream->pos = pos;
+
+               rstream->eom = TRUE;
+               rstream->next_eof = TRUE;
+               rstream->mail_size = stream->istream.v_offset + pos -
                        rstream->hdr_offset;
-               return -1;
+               return ret < 0 ? _read(stream) : ret;
        }
 
        if (stream->istream.v_offset == rstream->from_offset) {
+               /* beginning of message, we haven't yet read our From-line */
                if (mbox_read_from_line(rstream) < 0) {
                        stream->pos = 0;
                        stream->istream.eof = TRUE;
+                       rstream->corrupted = TRUE;
                        return -1;
                }
-               return _read(stream);
-       }
-
-       i = 0;
-
-       if (pos >= 31) {
-               if (memcmp(buf, "\nFrom ", 6) == 0) {
-                       if (mbox_from_parse(buf+6, pos-6,
-                                           &received_time, &sender) == 0) {
-                               rstream->next_received_time = received_time;
-                               rstream->mail_size = stream->istream.v_offset -
-                                       rstream->hdr_offset;
-
-                               i_free(rstream->next_sender);
-                               rstream->next_sender = sender;
-                               i_assert(stream->pos == 0);
-                               return -1;
-                       }
-
-                       /* we don't want to get stuck at invalid From-line */
-                       i += 6;
-               }
-       } else if (ret == -1) {
-               /* last few bytes, can't contain From-line */
-               if (buf[pos-1] == '\n') {
-                       /* last LF doesn't belong to last message */
-                       pos--;
-               }
-
-               if (rstream->body_offset == (uoff_t)-1) {
-                       /* find body_offset */
-                       for (; i < pos; i++) {
-                               if (buf[i] == '\n' && i > 0 &&
-                                   buf[i-1] == '\n') {
-                                       rstream->body_offset =
-                                               stream->istream.v_offset +
-                                               i + 1;
-                                       break;
-                               }
-                       }
-               }
-
-               ret = pos <= stream->pos ? -1 :
-                       (ssize_t) (pos - stream->pos);
-
-               rstream->mail_size = stream->istream.v_offset + pos -
-                       rstream->hdr_offset;
 
-               stream->buffer = buf;
-               stream->pos = pos;
-               stream->istream.eof = ret == -1;
-               return ret;
+               /* got it. we don't want to return it however,
+                  so start again from headers */
+               buf = i_stream_get_data(rstream->input, &pos);
+               if (pos == 0)
+                       return _read(stream);
        }
 
        /* See if we have From-line here - note that it works right only
           because all characters are different in mbox_from. */
+        fromp = mbox_from; from_start_pos = 0;
        eoh_char = rstream->body_offset == (uoff_t)-1 ? '\n' : '\0';
-       for (fromp = mbox_from; i < pos; i++) {
+       for (i = 0; i < pos; i++) {
                if (buf[i] == eoh_char && i > 0 && buf[i-1] == '\n') {
                        rstream->body_offset = stream->istream.v_offset + i + 1;
                        eoh_char = '\0';
                }
                if (buf[i] == *fromp) {
                        if (*++fromp == '\0') {
-                               /* potential From-line - stop here */
+                               /* potential From-line, see if we have the
+                                  rest of the line buffered.
+                                  FIXME: if From-line is longer than input
+                                  buffer, we break. probably irrelevant.. */
                                i++;
-                               break;
+                               from_start_pos = i;
+                               fromp = mbox_from;
+                       } else if (from_start_pos != 0) {
+                               /* we have the whole From-line here now.
+                                  See if it's a valid one. */
+                               if (mbox_from_parse(buf + from_start_pos,
+                                                   pos - from_start_pos,
+                                                   &received_time,
+                                                   &sender) == 0) {
+                                       /* yep, we stop here. */
+                                       rstream->next_received_time =
+                                               received_time;
+                                       i_free(rstream->next_sender);
+                                       rstream->next_sender = sender;
+                                       rstream->eom = TRUE;
+
+                                        /* rewind "\nFrom " */
+                                       from_start_pos -= 6;
+
+                                       rstream->mail_size =
+                                               stream->istream.v_offset +
+                                               from_start_pos -
+                                               rstream->hdr_offset;
+                                       break;
+                               }
+                               from_start_pos = 0;
                        }
                } else {
                        fromp = mbox_from;
@@ -208,27 +225,36 @@ static ssize_t _read(struct _istream *stream)
                                fromp++;
                }
        }
-       new_pos = i - (fromp - mbox_from);
 
-       ret = new_pos <= stream->pos ? -1 :
-               (ssize_t) (pos - stream->pos);
-       stream->buffer = buf;
-       stream->pos = new_pos;
+       /* we want to go at least one byte further next time */
+       rstream->input_peak_offset = stream->istream.v_offset + i;
 
-       if (i < pos) {
-               /* beginning from From-line, try again
-                  FIXME: loops forever if we don't skip forward */
-               ret = 0;
+       if (from_start_pos != 0) {
+               /* we're waiting for the \n at the end of From-line */
+               new_pos = from_start_pos;
+       } else {
+               /* leave out the beginnings of potential From-line */
+               new_pos = i - (fromp - mbox_from);
        }
+       i_assert(new_pos > stream->pos);
+       ret = new_pos - stream->pos;
 
+       stream->buffer = buf;
+       stream->pos = new_pos;
        return ret;
 }
 
 static void _seek(struct _istream *stream, uoff_t v_offset)
 {
+       struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
+
        stream->istream.v_offset = v_offset;
        stream->skip = stream->pos = 0;
        stream->buffer = NULL;
+
+        rstream->input_peak_offset = 0;
+       rstream->eom = FALSE;
+       rstream->next_eof = FALSE;
 }
 
 struct istream *i_stream_create_raw_mbox(pool_t pool, struct istream *input)
@@ -322,7 +348,7 @@ uoff_t istream_raw_mbox_get_body_offset(struct istream *stream)
        offset = stream->v_offset;
        i_stream_seek(stream, rstream->hdr_offset);
        while (rstream->body_offset == (uoff_t)-1) {
-               i_stream_get_data(rstream->input, &pos);
+               i_stream_get_data(stream, &pos);
                i_stream_skip(stream, pos);
 
                if (_read(&rstream->istream) < 0)
@@ -409,22 +435,33 @@ void istream_raw_mbox_next(struct istream *stream, uoff_t body_size)
        if (stream->v_offset != rstream->from_offset)
                i_stream_seek(stream, rstream->from_offset);
        i_stream_seek(rstream->input, rstream->from_offset);
+
+        rstream->input_peak_offset = 0;
+       rstream->eom = FALSE;
+       rstream->next_eof = FALSE;
 }
 
-void istream_raw_mbox_seek(struct istream *stream, uoff_t offset)
+int istream_raw_mbox_seek(struct istream *stream, uoff_t offset)
 {
        struct raw_mbox_istream *rstream =
                (struct raw_mbox_istream *)stream->real_stream;
+       int check;
+
+       rstream->corrupted = FALSE;
+       rstream->eom = FALSE;
+       rstream->next_eof = FALSE;
+        rstream->input_peak_offset = 0;
 
        if (rstream->mail_size != (uoff_t)-1 &&
            rstream->hdr_offset + rstream->mail_size == offset) {
                istream_raw_mbox_next(stream, (uoff_t)-1);
-               return;
+               return 0;
        }
 
        if (offset == rstream->from_offset) {
                /* back to beginning of current message */
                offset = rstream->hdr_offset;
+               check = offset == 0;
        } else {
                rstream->body_offset = (uoff_t)-1;
                rstream->mail_size = (uoff_t)-1;
@@ -438,10 +475,15 @@ void istream_raw_mbox_seek(struct istream *stream, uoff_t offset)
 
                 rstream->from_offset = offset;
                rstream->hdr_offset = offset;
+               check = TRUE;
        }
 
        i_stream_seek(stream, offset);
        i_stream_seek(rstream->input, offset);
+
+       if (check)
+               (void)_read(&rstream->istream);
+       return rstream->corrupted ? -1 : 0;
 }
 
 void istream_raw_mbox_flush(struct istream *stream)
index a1b89019c6bf7e28f223bd5c2777c213d9162716..e428d6bb801d65f7f9a005c6f50eeaf82a896cb7 100644 (file)
@@ -29,8 +29,9 @@ const char *istream_raw_mbox_get_sender(struct istream *stream);
 void istream_raw_mbox_next(struct istream *stream, uoff_t body_size);
 
 /* Seek to message at given offset. offset must point to beginning of
-   "\nFrom ", or 0 for beginning of file. */
-void istream_raw_mbox_seek(struct istream *stream, uoff_t offset);
+   "\nFrom ", or 0 for beginning of file. Returns -1 if it offset doesn't
+   contain a valid From-line. */
+int istream_raw_mbox_seek(struct istream *stream, uoff_t offset);
 
 /* Flush all buffering. Call if you modify the mbox. */
 void istream_raw_mbox_flush(struct istream *stream);
index 84de015e12358e2441d0469ecb8700e3243ce54b..0d1180dfe651934d8424c87587a2b085461ea90b 100644 (file)
@@ -16,6 +16,7 @@ static int mbox_mail_seek(struct index_mail *mail)
 {
        struct index_mailbox *ibox = mail->ibox;
        const void *data;
+       uint64_t offset;
 
        if (ibox->mbox_lock_type == F_UNLCK) {
                if (mbox_sync(ibox, FALSE, TRUE) < 0)
@@ -34,7 +35,14 @@ static int mbox_mail_seek(struct index_mail *mail)
                return -1;
        }
 
-       istream_raw_mbox_seek(ibox->mbox_stream, *((const uint64_t *)data));
+       offset = *((const uint64_t *)data);
+       if (istream_raw_mbox_seek(ibox->mbox_stream, offset) < 0) {
+               mail_storage_set_critical(ibox->box.storage,
+                       "Cached message offset %s is invalid for mbox file %s",
+                       dec2str(offset), ibox->path);
+               mail_index_mark_corrupted(ibox->index);
+               return -1;
+       }
        return 0;
 }
 
index a6b4c64e8b09e1e40ecf977e63b5755277aa4012..adb383cc236da2738cffc38d84d7f2a1e76a3f8a 100644 (file)
@@ -54,6 +54,8 @@ static void mbox_sync_headers_add_space(struct mbox_sync_mail_context *ctx,
        const unsigned char *data;
        void *p;
 
+       i_assert(size < SSIZE_T_MAX);
+
        /* Append at the end of X-Keywords header,
           or X-UID if it doesn't exist */
        pos = ctx->hdr_pos[MBOX_HDR_X_KEYWORDS] != (size_t)-1 ?
@@ -216,7 +218,7 @@ static int mbox_sync_read_and_move(struct mbox_sync_context *sync_ctx,
        uint32_t old_prev_msg_uid;
        uoff_t offset;
 
-       i_stream_seek(sync_ctx->file_input, mails[idx].offset);
+       i_stream_seek(sync_ctx->input, mails[idx].offset);
 
        memset(&mail_ctx, 0, sizeof(mail_ctx));
        mail_ctx.sync_ctx = sync_ctx;
@@ -228,10 +230,10 @@ static int mbox_sync_read_and_move(struct mbox_sync_context *sync_ctx,
 
        /* mbox_sync_parse_next_mail() checks that UIDs are growing,
           so we have to fool it. */
-        old_prev_msg_uid = sync_ctx->prev_msg_uid;
-        sync_ctx->prev_msg_uid = mails[idx].uid-1;
+       old_prev_msg_uid = sync_ctx->prev_msg_uid;
+       sync_ctx->prev_msg_uid = mails[idx].uid-1;
 
-       mbox_sync_parse_next_mail(sync_ctx->file_input, &mail_ctx, TRUE);
+       mbox_sync_parse_next_mail(sync_ctx->input, &mail_ctx, TRUE);
        if (mails[idx].space != 0)
                mbox_sync_update_header_from(&mail_ctx, &mails[idx]);
        else {
@@ -242,7 +244,7 @@ static int mbox_sync_read_and_move(struct mbox_sync_context *sync_ctx,
        }
 
        i_assert(mail_ctx.mail.space == mails[idx].space);
-        sync_ctx->prev_msg_uid = old_prev_msg_uid;
+       sync_ctx->prev_msg_uid = old_prev_msg_uid;
 
        if (mail_ctx.mail.space <= 0)
                mbox_sync_headers_add_space(&mail_ctx, extra_per_mail);
@@ -257,7 +259,7 @@ static int mbox_sync_read_and_move(struct mbox_sync_context *sync_ctx,
        /* now we have to move it. first move the body of the message,
           then write the header and leave the extra space to beginning of
           headers. */
-       offset = sync_ctx->file_input->v_offset;
+       offset = sync_ctx->input->v_offset;
        if (mbox_move(sync_ctx, offset + mails[idx+1].space, offset,
                      *end_offset - offset - mails[idx+1].space) < 0)
                return -1;
@@ -284,7 +286,8 @@ static int mbox_sync_fill_leftover(struct mbox_sync_context *sync_ctx,
        struct mbox_sync_mail_context mail_ctx;
        uint32_t old_prev_msg_uid;
 
-       i_stream_seek(sync_ctx->file_input, mails[idx].offset);
+       i_assert(start_offset < end_offset);
+       i_stream_seek(sync_ctx->input, mails[idx].offset);
 
        memset(&mail_ctx, 0, sizeof(mail_ctx));
        mail_ctx.sync_ctx = sync_ctx;
@@ -299,12 +302,12 @@ static int mbox_sync_fill_leftover(struct mbox_sync_context *sync_ctx,
         old_prev_msg_uid = sync_ctx->prev_msg_uid;
         sync_ctx->prev_msg_uid = mails[idx].uid-1;
 
-       mbox_sync_parse_next_mail(sync_ctx->file_input, &mail_ctx, TRUE);
+       mbox_sync_parse_next_mail(sync_ctx->input, &mail_ctx, TRUE);
        mbox_sync_update_header_from(&mail_ctx, &mails[idx]);
 
         sync_ctx->prev_msg_uid = old_prev_msg_uid;
 
-       mbox_sync_headers_add_space(&mail_ctx, end_offset - start_offset);
+       mbox_sync_headers_add_space(&mail_ctx,end_offset - start_offset);
 
        if (pwrite_full(sync_ctx->fd, str_data(mail_ctx.header),
                        str_len(mail_ctx.header), start_offset) < 0) {
index f398d8a630dc497a5cc1f2c14433fa465ee50291..348a012ced7daef2550d73247f9e4c480028e570 100644 (file)
@@ -105,6 +105,12 @@ static void mbox_sync_add_missing_headers(struct mbox_sync_mail_context *ctx)
        old_hdr_size = ctx->body_offset - ctx->hdr_offset;
        new_hdr_size = str_len(ctx->header);
 
+       if (new_hdr_size > 0 &&
+           str_data(ctx->header)[new_hdr_size-1] != '\n') {
+               /* broken header - doesn't end with \n. fix it. */
+               str_append_c(ctx->header, '\n');
+       }
+
        if (ctx->mail.uid == ctx->sync_ctx->first_uid &&
            ctx->hdr_pos[MBOX_HDR_X_IMAPBASE] == (size_t)-1) {
                if (ctx->sync_ctx->base_uid_validity == 0) {
index 51f8ce1a9cb3c006e11493c2a4a100d4a82645f3..54f4d0198427cfce770aff38f6e817fda75b23f3 100644 (file)
@@ -112,8 +112,13 @@ static int mbox_sync_lock(struct mbox_sync_context *sync_ctx, int lock_type)
        /* same as before. we'll have to fix mbox stream to contain
           correct from_offset, hdr_offset and body_offset. so, seek
           to from_offset and read through the header. */
-       istream_raw_mbox_seek(sync_ctx->input, old_from_offset);
-        (void)istream_raw_mbox_get_body_offset(sync_ctx->input);
+       if (istream_raw_mbox_seek(sync_ctx->input, old_from_offset) < 0) {
+               mail_storage_set_critical(ibox->box.storage,
+                       "Message offset %s changed unexpectedly for mbox file "
+                       "%s", dec2str(old_from_offset), sync_ctx->ibox->path);
+               return 0;
+       }
+       (void)istream_raw_mbox_get_body_offset(sync_ctx->input);
        i_stream_seek(sync_ctx->input, old_offset);
        return 1;
 }
@@ -299,8 +304,8 @@ mbox_sync_read_index_rec(struct mbox_sync_context *sync_ctx,
        if (rec != NULL && rec->uid != uid) {
                /* new UID in the middle of the mailbox - shouldn't happen */
                mail_storage_set_critical(sync_ctx->ibox->box.storage,
-                       "mbox sync: UID inserted in the middle of mailbox "
-                       "(%u > %u)", rec->uid, uid);
+                       "mbox sync: UID inserted in the middle of mailbox %s "
+                       "(%u > %u)", sync_ctx->ibox->path, rec->uid, uid);
                mail_index_mark_corrupted(sync_ctx->ibox->index);
                return -1;
        }
@@ -620,7 +625,13 @@ mbox_sync_seek_to_uid(struct mbox_sync_context *sync_ctx, uint32_t uid)
 
         /* set to -1, since they're always increased later */
        sync_ctx->seq = sync_ctx->idx_seq = seq-1;
-       istream_raw_mbox_seek(sync_ctx->input, offset);
+       if (istream_raw_mbox_seek(sync_ctx->input, offset) < 0) {
+               mail_storage_set_critical(sync_ctx->ibox->box.storage,
+                       "Cached message offset %s is invalid for mbox file %s",
+                       dec2str(offset), sync_ctx->ibox->path);
+               mail_index_mark_corrupted(sync_ctx->ibox->index);
+               return -1;
+       }
         (void)istream_raw_mbox_get_body_offset(sync_ctx->input);
        return 0;
 }
@@ -634,9 +645,14 @@ static int mbox_sync_loop(struct mbox_sync_context *sync_ctx,
        uoff_t offset;
        int ret, expunged;
 
-       if (min_message_count != 0)
-               istream_raw_mbox_seek(sync_ctx->input, 0);
-       else {
+       if (min_message_count != 0) {
+               if (istream_raw_mbox_seek(sync_ctx->input, 0) < 0) {
+                       /* doesn't begin with a From-line */
+                       mail_storage_set_error(sync_ctx->ibox->box.storage,
+                               "Mailbox isn't a valid mbox file");
+                       return -1;
+               }
+       } else {
                /* we sync only what we need to. jump to first record that
                   needs updating */
                if (sync_ctx->sync_rec.uid1 == 0) {