]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
mbox: Don't stop at From_-lines in the message bodies. Use Content-Length:
authorTimo Sirainen <tss@iki.fi>
Sun, 4 May 2008 13:57:58 +0000 (16:57 +0300)
committerTimo Sirainen <tss@iki.fi>
Sun, 4 May 2008 13:57:58 +0000 (16:57 +0300)
header to figure out if it belongs to a message body or not.

--HG--
branch : HEAD

src/lib-storage/index/mbox/istream-raw-mbox.c
src/lib-storage/index/mbox/istream-raw-mbox.h
src/lib-storage/index/mbox/mbox-mail.c

index 37c2d7811076bade3e54edafb4b6148c71c585a7..784f754815c37e98cd3a9127c6132f137ca6756c 100644 (file)
@@ -238,15 +238,19 @@ static ssize_t i_stream_raw_mbox_read(struct istream_private *stream)
                                   FIXME: if From-line is longer than input
                                   buffer, we break. probably irrelevant.. */
                                i++;
-                               from_after_pos = i;
-                               from_start_pos = i - 6;
-                               if (from_start_pos > 0 &&
-                                   buf[from_start_pos-1] == '\r') {
-                                       /* CR also belongs to it. */
-                                       crlf_ending = TRUE;
-                                       from_start_pos--;
-                               } else {
-                                       crlf_ending = FALSE;
+                               if (rstream->hdr_offset + rstream->mail_size ==
+                                   stream->istream.v_offset + i - 6 ||
+                                   rstream->mail_size == (uoff_t)-1) {
+                                       from_after_pos = i;
+                                       from_start_pos = i - 6;
+                                       if (from_start_pos > 0 &&
+                                           buf[from_start_pos-1] == '\r') {
+                                               /* CR also belongs to it. */
+                                               crlf_ending = TRUE;
+                                               from_start_pos--;
+                                       } else {
+                                               crlf_ending = FALSE;
+                                       }
                                }
                                fromp = mbox_from;
                        } else if (from_start_pos != (size_t)-1) {
@@ -290,6 +294,17 @@ static ssize_t i_stream_raw_mbox_read(struct istream_private *stream)
                        new_pos--;
        }
 
+       if (stream->istream.v_offset -
+           rstream->hdr_offset + new_pos > rstream->mail_size) {
+               /* istream_raw_mbox_set_next_offset() used invalid
+                  cached next_offset? */
+               i_error("Unexpectedly lost From-line at %"PRIuUOFF_T,
+                       rstream->hdr_offset + rstream->mail_size);
+               rstream->eof = TRUE;
+               rstream->corrupted = TRUE;
+               return -1;
+       }
+
        stream->buffer = buf;
        if (new_pos == stream->pos) {
                if (stream->istream.eof || ret > 0)
@@ -379,8 +394,7 @@ static int istream_raw_mbox_is_valid_from(struct raw_mbox_istream *rstream)
        char *sender;
 
        /* minimal: "From x Thu Nov 29 22:33:52 2001" = 31 chars */
-       if (i_stream_read_data(rstream->istream.parent, &data, &size, 30) == -1)
-               return -1;
+       (void)i_stream_read_data(rstream->istream.parent, &data, &size, 30);
 
        if ((size == 1 && data[0] == '\n') ||
            (size == 2 && data[0] == '\r' && data[1] == '\n')) {
@@ -469,33 +483,41 @@ uoff_t istream_raw_mbox_get_body_offset(struct istream *stream)
        return rstream->body_offset;
 }
 
-uoff_t istream_raw_mbox_get_body_size(struct istream *stream, uoff_t body_size)
+uoff_t istream_raw_mbox_get_body_size(struct istream *stream,
+                                     uoff_t expected_body_size)
 {
        struct raw_mbox_istream *rstream =
                (struct raw_mbox_istream *)stream->real_stream;
        const unsigned char *data;
        size_t size;
-       uoff_t old_offset;
+       uoff_t old_offset, body_size;
 
        i_assert(rstream->hdr_offset != (uoff_t)-1);
        i_assert(rstream->body_offset != (uoff_t)-1);
 
-       if (rstream->mail_size != (uoff_t)-1) {
-               return rstream->mail_size -
-                       (rstream->body_offset - rstream->hdr_offset);
-       }
-
+       body_size = rstream->mail_size == (uoff_t)-1 ? (uoff_t)-1 :
+               rstream->mail_size - (rstream->body_offset -
+                                     rstream->hdr_offset);
        old_offset = stream->v_offset;
-       if (body_size != (uoff_t)-1) {
+       if (expected_body_size != (uoff_t)-1) {
+               /* if we already have the existing body size, use it as long as
+                  it's >= expected body_size. otherwise the previous parsing
+                  may have stopped at a From_-line that belongs to the body. */
+               if (body_size != (uoff_t)-1 && body_size >= expected_body_size)
+                       return body_size;
+
                i_stream_seek(rstream->istream.parent,
-                             rstream->body_offset + body_size);
+                             rstream->body_offset + expected_body_size);
                if (istream_raw_mbox_is_valid_from(rstream) > 0) {
-                       rstream->mail_size = body_size +
+                       rstream->mail_size = expected_body_size +
                                (rstream->body_offset - rstream->hdr_offset);
                        i_stream_seek(stream, old_offset);
-                       return body_size;
+                       return expected_body_size;
                }
+               /* invalid expected_body_size */
        }
+       if (body_size != (uoff_t)-1)
+               return body_size;
 
        /* have to read through the message body */
        while (i_stream_read_data(stream, &data, &size, 0) > 0)
@@ -535,12 +557,13 @@ bool istream_raw_mbox_has_crlf_ending(struct istream *stream)
        return rstream->crlf_ending;
 }
 
-void istream_raw_mbox_next(struct istream *stream, uoff_t body_size)
+void istream_raw_mbox_next(struct istream *stream, uoff_t expected_body_size)
 {
        struct raw_mbox_istream *rstream =
                (struct raw_mbox_istream *)stream->real_stream;
+       uoff_t body_size;
 
-       body_size = istream_raw_mbox_get_body_size(stream, body_size);
+       body_size = istream_raw_mbox_get_body_size(stream, expected_body_size);
        rstream->mail_size = (uoff_t)-1;
 
        rstream->received_time = rstream->next_received_time;
@@ -606,6 +629,14 @@ int istream_raw_mbox_seek(struct istream *stream, uoff_t offset)
        return rstream->corrupted ? -1 : 0;
 }
 
+void istream_raw_mbox_set_next_offset(struct istream *stream, uoff_t offset)
+{
+       struct raw_mbox_istream *rstream =
+               (struct raw_mbox_istream *)stream->real_stream;
+
+       rstream->mail_size = offset - rstream->hdr_offset;
+}
+
 bool istream_raw_mbox_is_eof(struct istream *stream)
 {
        struct raw_mbox_istream *rstream =
index b6a48b4857bc23e82f3a492d44ffca75a104b492..d6319d0e0473d1e3511d4508c04e9e895413c079 100644 (file)
@@ -12,10 +12,11 @@ uoff_t istream_raw_mbox_get_header_offset(struct istream *stream);
 /* Return offset to beginning of the body. */
 uoff_t istream_raw_mbox_get_body_offset(struct istream *stream);
 
-/* Return the number of bytes in the body of this message. If body_size isn't
-   (uoff_t)-1, we'll use it as potentially valid body size to avoid actually
-   reading through the whole message. */
-uoff_t istream_raw_mbox_get_body_size(struct istream *stream, uoff_t body_size);
+/* Return the number of bytes in the body of this message. If
+   expected_body_size isn't (uoff_t)-1, we'll use it as potentially valid body
+   size to avoid actually reading through the whole message. */
+uoff_t istream_raw_mbox_get_body_size(struct istream *stream,
+                                     uoff_t expected_body_size);
 
 /* Return received time of current message, or (time_t)-1 if the timestamp is
    broken. */
@@ -26,14 +27,18 @@ const char *istream_raw_mbox_get_sender(struct istream *stream);
 /* Return TRUE if the empty line between this and the next mail contains CR. */
 bool istream_raw_mbox_has_crlf_ending(struct istream *stream);
 
-/* Jump to next message. If body_size isn't (uoff_t)-1, we'll use it as
-   potentially valid body size. */
-void istream_raw_mbox_next(struct istream *stream, uoff_t body_size);
+/* Jump to next message. If expected_body_size isn't (uoff_t)-1, we'll use it
+   as potentially valid body size. */
+void istream_raw_mbox_next(struct istream *stream, uoff_t expected_body_size);
 
 /* Seek to message at given offset. offset must point to beginning of
    "\nFrom ", or 0 for beginning of file. Returns -1 if it offset doesn't
    contain a valid From-line. */
 int istream_raw_mbox_seek(struct istream *stream, uoff_t offset);
+/* Set next message's start offset. If this isn't set, read stops at the next
+   valid From_-line, even if it belongs to the current message's body
+   (Content-Length: header can be used to determine that). */
+void istream_raw_mbox_set_next_offset(struct istream *stream, uoff_t offset);
 
 /* Returns TRUE if we've read the whole mbox. */
 bool istream_raw_mbox_is_eof(struct istream *stream);
index 5dc4d9159256856aea4484f0857cc0a7b59839cc..8a7b3fea44c610b30ccc3869ae21b3e7a4b4357e 100644 (file)
@@ -166,12 +166,37 @@ mbox_mail_get_special(struct mail *_mail, enum mail_fetch_field field,
        return index_mail_get_special(_mail, field, value_r);
 }
 
+static bool
+mbox_mail_get_next_offset(struct index_mail *mail, uoff_t *next_offset_r)
+{
+       struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox;
+       struct mail *_mail = &mail->mail.mail;
+       const struct mail_index_header *hdr;
+
+       hdr = mail_index_get_header(mail->trans->trans_view);
+       if (_mail->seq >= hdr->messages_count) {
+               if (_mail->seq != hdr->messages_count) {
+                       /* we're appending a new message */
+                       return FALSE;
+               }
+
+               /* last message, use the synced mbox size */
+               int trailer_size;
+
+               trailer_size = (mbox->storage->storage.flags &
+                               MAIL_STORAGE_FLAG_SAVE_CRLF) != 0 ? 2 : 1;
+               *next_offset_r = hdr->sync_size - trailer_size;
+               return TRUE;
+       }
+       return mbox_file_lookup_offset(mbox, mail->trans->trans_view,
+                                      _mail->seq + 1, next_offset_r);
+}
+
 static int mbox_mail_get_physical_size(struct mail *_mail, uoff_t *size_r)
 {
        struct index_mail *mail = (struct index_mail *)_mail;
        struct index_mail_data *data = &mail->data;
        struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox;
-       const struct mail_index_header *hdr;
        struct istream *input;
        struct message_size hdr_size;
        uoff_t old_offset, body_offset, body_size, next_offset;
@@ -194,26 +219,10 @@ static int mbox_mail_get_physical_size(struct mail *_mail, uoff_t *size_r)
 
        /* use the next message's offset to avoid reading through the entire
           message body to find out its size */
-       hdr = mail_index_get_header(mail->trans->trans_view);
-       if (_mail->seq >= hdr->messages_count) {
-               if (_mail->seq == hdr->messages_count) {
-                       /* last message, use the synced mbox size */
-                       int trailer_size;
-
-                       trailer_size = (mbox->storage->storage.flags &
-                                       MAIL_STORAGE_FLAG_SAVE_CRLF) != 0 ?
-                               2 : 1;
-                       body_size = hdr->sync_size - body_offset - trailer_size;
-               } else {
-                       /* we're appending a new message */
-                       body_size = (uoff_t)-1;
-               }
-       } else if (mbox_file_lookup_offset(mbox, mail->trans->trans_view,
-                                          _mail->seq + 1, &next_offset) > 0) {
+       if (mbox_mail_get_next_offset(mail, &next_offset))
                body_size = next_offset - body_offset;
-       } else {
+       else
                body_size = (uoff_t)-1;
-       }
 
        /* verify that the calculated body size is correct */
        body_size = istream_raw_mbox_get_body_size(mbox->mbox_stream,
@@ -226,31 +235,52 @@ static int mbox_mail_get_physical_size(struct mail *_mail, uoff_t *size_r)
        return 0;
 }
 
-static int mbox_mail_get_stream(struct mail *_mail,
-                               struct message_size *hdr_size,
-                               struct message_size *body_size,
-                               struct istream **stream_r)
+static int mbox_mail_init_stream(struct index_mail *mail)
 {
-       struct index_mail *mail = (struct index_mail *)_mail;
-       struct index_mail_data *data = &mail->data;
        struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox;
        struct istream *raw_stream;
-       uoff_t offset;
+       uoff_t hdr_offset, next_offset;
+
+       if (mbox_mail_seek(mail) < 0)
+               return -1;
 
-       if (data->stream == NULL) {
+       if (!mbox_mail_get_next_offset(mail, &next_offset)) {
                if (mbox_mail_seek(mail) < 0)
                        return -1;
+               if (!mbox_mail_get_next_offset(mail, &next_offset)) {
+                       i_warning("mbox %s: Can't find next message offset",
+                                 mbox->path);
+                       next_offset = (uoff_t)-1;
+               }
+       }
+
+       raw_stream = mbox->mbox_stream;
+       hdr_offset = istream_raw_mbox_get_header_offset(raw_stream);
+       i_stream_seek(raw_stream, hdr_offset);
 
-               raw_stream = mbox->mbox_stream;
-               offset = istream_raw_mbox_get_header_offset(raw_stream);
-               i_stream_seek(raw_stream, offset);
-               raw_stream = i_stream_create_limit(raw_stream, (uoff_t)-1);
-               data->stream =
-                       i_stream_create_header_filter(raw_stream,
+       if (next_offset != (uoff_t)-1)
+               istream_raw_mbox_set_next_offset(raw_stream, next_offset);
+
+       raw_stream = i_stream_create_limit(raw_stream, (uoff_t)-1);
+       mail->data.stream =
+               i_stream_create_header_filter(raw_stream,
                                HEADER_FILTER_EXCLUDE | HEADER_FILTER_NO_CR,
                                mbox_hide_headers, mbox_hide_headers_count,
                                null_header_filter_callback, NULL);
-               i_stream_unref(&raw_stream);
+       i_stream_unref(&raw_stream);
+       return 0;
+}
+
+static int mbox_mail_get_stream(struct mail *_mail,
+                               struct message_size *hdr_size,
+                               struct message_size *body_size,
+                               struct istream **stream_r)
+{
+       struct index_mail *mail = (struct index_mail *)_mail;
+
+       if (mail->data.stream == NULL) {
+               if (mbox_mail_init_stream(mail) < 0)
+                       return -1;
        }
 
        return index_mail_init_stream(mail, hdr_size, body_size, stream_r);