]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
dsync: When comparing headers' hashes to match messages, try to normalize the input.
authorTimo Sirainen <timo.sirainen@dovecot.fi>
Tue, 26 Jan 2016 17:56:43 +0000 (19:56 +0200)
committerTimo Sirainen <timo.sirainen@dovecot.fi>
Tue, 26 Jan 2016 18:02:24 +0000 (20:02 +0200)
This is especially useful because some IMAP servers return different data
depending on whether we're fetching only specific header fields, all headers
or entire body. For now we're assuming that any non-ASCII is going to be
replaced with '?', which helps at least with Zimbra and Yahoo. The header
hashing algorithm is now versionable, so it can be modified more easily in
future.

This change should make imapc_features=zimbra-workarounds setting obsolete.

12 files changed:
src/doveadm/dsync/dsync-brain-mailbox.c
src/doveadm/dsync/dsync-brain-private.h
src/doveadm/dsync/dsync-brain.c
src/doveadm/dsync/dsync-ibc-stream.c
src/doveadm/dsync/dsync-ibc.h
src/doveadm/dsync/dsync-mail.c
src/doveadm/dsync/dsync-mail.h
src/doveadm/dsync/dsync-mailbox-export.c
src/doveadm/dsync/dsync-mailbox-export.h
src/doveadm/dsync/dsync-mailbox-import.c
src/doveadm/dsync/dsync-mailbox-import.h
src/plugins/pop3-migration/pop3-migration-plugin.c

index a6c6ee78557f52db1c80795329ecf57dd6baed8a..5dadf978c819b9a9ccaf73fe188a8183bfbd7aa9 100644 (file)
@@ -218,6 +218,8 @@ dsync_brain_sync_mailbox_init_remote(struct dsync_brain *brain,
                import_flags |= DSYNC_MAILBOX_IMPORT_FLAG_MAILS_USE_GUID128;
        if (brain->no_notify)
                import_flags |= DSYNC_MAILBOX_IMPORT_FLAG_NO_NOTIFY;
+       if (brain->hdr_hash_v2)
+               import_flags |= DSYNC_MAILBOX_IMPORT_FLAG_HDR_HASH_V2;
 
        brain->box_importer = brain->backup_send ? NULL :
                dsync_mailbox_import_init(brain->box, brain->virtual_all_box,
@@ -318,6 +320,8 @@ int dsync_brain_sync_mailbox_open(struct dsync_brain *brain,
                exporter_flags |= DSYNC_MAILBOX_EXPORTER_FLAG_MINIMAL_DMAIL_FILL;
        if (brain->sync_since_timestamp > 0)
                exporter_flags |= DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS;
+       if (brain->hdr_hash_v2)
+               exporter_flags |= DSYNC_MAILBOX_EXPORTER_FLAG_HDR_HASH_V2;
 
        brain->box_exporter = brain->backup_recv ? NULL :
                dsync_mailbox_export_init(brain->box, brain->log_scan,
index a6be2222dd23c8b52d95ec28883fe07c6d0893d8..bba71841867a1520c9c9d8e42cb41838b5a29889 100644 (file)
@@ -112,6 +112,7 @@ struct dsync_brain {
        unsigned int require_full_resync:1;
        unsigned int verbose_proctitle:1;
        unsigned int no_notify:1;
+       unsigned int hdr_hash_v2:1;
        unsigned int failed:1;
 };
 
index 5f6a50ebf60693aef2f415b6178540189b7f4cfc..915d4f23f77955bf03cef13d1342889748827a60 100644 (file)
@@ -233,6 +233,7 @@ dsync_brain_master_init(struct mail_user *user, struct dsync_ibc *ibc,
        memcpy(ibc_set.sync_box_guid, set->sync_box_guid,
               sizeof(ibc_set.sync_box_guid));
        ibc_set.sync_type = sync_type;
+       ibc_set.hdr_hash_v2 = TRUE;
        ibc_set.lock_timeout = set->lock_timeout_secs;
        /* reverse the backup direction for the slave */
        ibc_set.brain_flags = flags & ~(DSYNC_BRAIN_FLAG_BACKUP_SEND |
@@ -267,6 +268,7 @@ dsync_brain_slave_init(struct mail_user *user, struct dsync_ibc *ibc,
        }
 
        memset(&ibc_set, 0, sizeof(ibc_set));
+       ibc_set.hdr_hash_v2 = TRUE;
        ibc_set.hostname = my_hostdomain();
        dsync_ibc_send_handshake(ibc, &ibc_set);
 
@@ -430,6 +432,7 @@ static bool dsync_brain_master_recv_handshake(struct dsync_brain *brain)
                        return FALSE;
                }
        }
+       brain->hdr_hash_v2 = ibc_set->hdr_hash_v2;
 
        brain->state = brain->sync_type == DSYNC_BRAIN_SYNC_TYPE_STATE ?
                DSYNC_STATE_MASTER_SEND_LAST_COMMON :
@@ -447,6 +450,7 @@ static bool dsync_brain_slave_recv_handshake(struct dsync_brain *brain)
 
        if (dsync_ibc_recv_handshake(brain->ibc, &ibc_set) == 0)
                return FALSE;
+       brain->hdr_hash_v2 = ibc_set->hdr_hash_v2;
 
        if (ibc_set->lock_timeout > 0) {
                brain->lock_timeout = ibc_set->lock_timeout;
index d6557e2ceb6217c94882fd22d6640fa35c40c7dd..526f9d5bf5031f089891e5bc601a75f21001d3d6 100644 (file)
 #define DSYNC_IBC_STREAM_OUTBUF_THROTTLE_SIZE (1024*128)
 
 #define DSYNC_PROTOCOL_VERSION_MAJOR 3
-#define DSYNC_PROTOCOL_VERSION_MINOR 3
-#define DSYNC_HANDSHAKE_VERSION "VERSION\tdsync\t3\t3\n"
+#define DSYNC_PROTOCOL_VERSION_MINOR 4
+#define DSYNC_HANDSHAKE_VERSION "VERSION\tdsync\t3\t4\n"
 
 #define DSYNC_PROTOCOL_MINOR_HAVE_ATTRIBUTES 1
 #define DSYNC_PROTOCOL_MINOR_HAVE_SAVE_GUID 2
 #define DSYNC_PROTOCOL_MINOR_HAVE_FINISH 3
+#define DSYNC_PROTOCOL_MINOR_HAVE_HDR_HASH_V2 4
 
 enum item_type {
        ITEM_NONE,
@@ -826,6 +827,7 @@ dsync_ibc_stream_recv_handshake(struct dsync_ibc *_ibc,
                set->brain_flags |= DSYNC_BRAIN_FLAG_PURGE_REMOTE;
        if (dsync_deserializer_decode_try(decoder, "no_notify", &value))
                set->brain_flags |= DSYNC_BRAIN_FLAG_NO_NOTIFY;
+       set->hdr_hash_v2 = ibc->minor_version >= DSYNC_PROTOCOL_MINOR_HAVE_HDR_HASH_V2;
 
        *set_r = set;
        return DSYNC_IBC_RECV_RET_OK;
index 18a577457a5e3086bf1a707e986e5962ea8deedc..d35d9211eccc56e36324922382a48d2e7b894f78 100644 (file)
@@ -63,6 +63,7 @@ struct dsync_ibc_settings {
 
        enum dsync_brain_sync_type sync_type;
        enum dsync_brain_flags brain_flags;
+       bool hdr_hash_v2;
        unsigned int lock_timeout;
 };
 
index dc64833e8fa3c7f477e2b89be85343d3426960fd..fd3cde0db5ca6a8b8e517c21581769721596ab52 100644 (file)
@@ -24,7 +24,44 @@ dsync_mail_get_hash_headers(struct mailbox *box)
        return mailbox_header_lookup_init(box, hashed_headers);
 }
 
-int dsync_mail_get_hdr_hash(struct mail *mail, const char **hdr_hash_r)
+static void
+dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version,
+                    const unsigned char *data, size_t size)
+{
+       size_t i, start;
+
+       i_assert(version == 1 || version == 2);
+
+       if (version == 1) {
+               md5_update(md5_ctx, data, size);
+               return;
+       }
+       /* - Dovecot IMAP replaces NULs with 0x80 character.
+          - Dovecot POP3 with outlook-no-nuls workaround replaces NULs
+          with 0x80 character.
+          - Zimbra replaces 8bit chars with '?' in header fetches,
+          but not body fetches.
+          - Yahoo replaces 8bit chars with '?' in partial header
+          fetches, but not POP3 TOP.
+
+          So we'll just replace all control and 8bit chars with '?',
+          which hopefully will satisfy everybody.
+
+          (Keep this code in sync with pop3-migration plugin.)
+          */
+       for (i = start = 0; i < size; i++) {
+               if ((data[i] < 0x20 || data[i] >= 0x80) &&
+                   (data[i] != '\t' && data[i] != '\n')) {
+                       md5_update(md5_ctx, data + start, i-start);
+                       md5_update(md5_ctx, "?", 1);
+                       start = i+1;
+               }
+       }
+       md5_update(md5_ctx, data + start, i-start);
+}
+
+int dsync_mail_get_hdr_hash(struct mail *mail, unsigned int version,
+                           const char **hdr_hash_r)
 {
        struct istream *hdr_input, *input;
        struct mailbox_header_lookup_ctx *hdr_ctx;
@@ -48,7 +85,7 @@ int dsync_mail_get_hdr_hash(struct mail *mail, const char **hdr_hash_r)
                        break;
                if (size == 0)
                        break;
-               md5_update(&md5_ctx, data, size);
+               dsync_mail_hash_more(&md5_ctx, version, data, size);
                i_stream_skip(input, size);
        }
        if (input->stream_errno != 0)
index cdd4167c5c718b1f8ba03fa799d5ecf405548a17..8dfbb3075ce219c33ce07e4d77eb4154790c8fc1 100644 (file)
@@ -85,7 +85,8 @@ struct dsync_mail_change {
 struct mailbox_header_lookup_ctx *
 dsync_mail_get_hash_headers(struct mailbox *box);
 
-int dsync_mail_get_hdr_hash(struct mail *mail, const char **hdr_hash_r);
+int dsync_mail_get_hdr_hash(struct mail *mail, unsigned int version,
+                           const char **hdr_hash_r);
 int dsync_mail_fill(struct mail *mail, bool minimal_fill,
                    struct dsync_mail *dmail_r, const char **error_field_r);
 int dsync_mail_fill_nonminimal(struct mail *mail, struct dsync_mail *dmail_r,
index 50288cbfb7e3c535c43096f9057a2b1c5ad84636..d08e0e1fced1ceef26e4c2fcc258f0943a298af0 100644 (file)
@@ -28,6 +28,7 @@ struct dsync_mailbox_exporter {
        struct mailbox_transaction_context *trans;
        struct mail_search_context *search_ctx;
        unsigned int search_pos, search_count;
+       unsigned int hdr_hash_version;
 
        /* GUID => instances */
        HASH_TABLE(char *, struct dsync_mail_guid_instances *) export_guids;
@@ -162,7 +163,7 @@ exporter_get_guids(struct dsync_mailbox_exporter *exporter,
 
        if (!exporter->mails_have_guids) {
                /* get header hash also */
-               if (dsync_mail_get_hdr_hash(mail, hdr_hash_r) < 0)
+               if (dsync_mail_get_hdr_hash(mail, exporter->hdr_hash_version, hdr_hash_r) < 0)
                        return dsync_mail_error(exporter, mail, "hdr-stream");
                return 1;
        } else if (**guid_r == '\0') {
@@ -502,6 +503,8 @@ dsync_mailbox_export_init(struct mailbox *box,
                (flags & DSYNC_MAILBOX_EXPORTER_FLAG_MINIMAL_DMAIL_FILL) != 0;
        exporter->export_received_timestamps =
                (flags & DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS) != 0;
+       exporter->hdr_hash_version =
+               (flags & DSYNC_MAILBOX_EXPORTER_FLAG_HDR_HASH_V2) ? 2 : 1;
        p_array_init(&exporter->requested_uids, pool, 16);
        p_array_init(&exporter->search_uids, pool, 16);
        hash_table_create(&exporter->export_guids, pool, 0, str_hash, strcmp);
index ed7cd307338c5564ad1f283b8694bfb3e0a565b8..fffeedbbb25a7398c9de3e0f475290dfd085fb46 100644 (file)
@@ -5,7 +5,8 @@ enum dsync_mailbox_exporter_flags {
        DSYNC_MAILBOX_EXPORTER_FLAG_AUTO_EXPORT_MAILS   = 0x01,
        DSYNC_MAILBOX_EXPORTER_FLAG_MAILS_HAVE_GUIDS    = 0x02,
        DSYNC_MAILBOX_EXPORTER_FLAG_MINIMAL_DMAIL_FILL  = 0x04,
-       DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS          = 0x08
+       DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS          = 0x08,
+       DSYNC_MAILBOX_EXPORTER_FLAG_HDR_HASH_V2         = 0x10
 };
 
 struct dsync_mailbox_exporter *
index bc2abe383c3224a46b24c9daa6579a0495916387..98de1356d4d318a128632abd88290f22dbafc5a2 100644 (file)
@@ -63,6 +63,7 @@ struct dsync_mailbox_importer {
        uint64_t remote_highest_modseq, remote_highest_pvt_modseq;
        time_t sync_since_timestamp;
        enum mailbox_transaction_flags transaction_flags;
+       unsigned int hdr_hash_version;
 
        enum mail_flags sync_flag;
        const char *sync_keyword;
@@ -255,6 +256,8 @@ dsync_mailbox_import_init(struct mailbox *box,
                (flags & DSYNC_MAILBOX_IMPORT_FLAG_MAILS_HAVE_GUIDS) != 0;
        importer->mails_use_guid128 =
                (flags & DSYNC_MAILBOX_IMPORT_FLAG_MAILS_USE_GUID128) != 0;
+       importer->hdr_hash_version =
+               (flags & DSYNC_MAILBOX_IMPORT_FLAG_HDR_HASH_V2) != 0 ? 2 : 1;
 
        mailbox_get_open_status(importer->box, STATUS_UIDNEXT |
                                STATUS_HIGHESTMODSEQ | STATUS_HIGHESTPVTMODSEQ,
@@ -601,6 +604,7 @@ importer_try_next_mail(struct dsync_mailbox_importer *importer,
                }
        } else {
                if (dsync_mail_get_hdr_hash(importer->cur_mail,
+                                           importer->hdr_hash_version,
                                            &hdr_hash) < 0) {
                        dsync_mail_error(importer, importer->cur_mail,
                                         "header hash");
@@ -1483,7 +1487,8 @@ dsync_mailbox_import_match_msg(struct dsync_mailbox_importer *importer,
                return -1;
        }
 
-       if (dsync_mail_get_hdr_hash(importer->cur_mail, &hdr_hash) < 0) {
+       if (dsync_mail_get_hdr_hash(importer->cur_mail,
+                                   importer->hdr_hash_version, &hdr_hash) < 0) {
                dsync_mail_error(importer, importer->cur_mail, "hdr-stream");
                *result_r = "Error fetching header stream";
                return -1;
index c4a473720c1b6a67c0b97b6a906db25574556e98..aaa302bdfc4a26d3aeb89286c5a90173e34231f9 100644 (file)
@@ -10,7 +10,8 @@ enum dsync_mailbox_import_flags {
        DSYNC_MAILBOX_IMPORT_FLAG_DEBUG                 = 0x08,
        DSYNC_MAILBOX_IMPORT_FLAG_MAILS_HAVE_GUIDS      = 0x10,
        DSYNC_MAILBOX_IMPORT_FLAG_MAILS_USE_GUID128     = 0x20,
-       DSYNC_MAILBOX_IMPORT_FLAG_NO_NOTIFY             = 0x40
+       DSYNC_MAILBOX_IMPORT_FLAG_NO_NOTIFY             = 0x40,
+       DSYNC_MAILBOX_IMPORT_FLAG_HDR_HASH_V2           = 0x80
 };
 
 struct mailbox;
index 3a31c4b214de421d825bbfa7c6d074795d5173ab..791423d7114ff630a3165f12934088f09e4a3e2f 100644 (file)
@@ -207,6 +207,8 @@ int pop3_migration_get_hdr_sha1(uint32_t mail_seq, struct istream *input,
 
                   So we'll just replace all control and 8bit chars with '?',
                   which hopefully will satisfy everybody.
+
+                  (Keep this code in sync with dsync.)
                */
                for (i = start = 0; i < size; i++) {
                        if ((data[i] < 0x20 || data[i] >= 0x80) &&