From: Timo Sirainen Date: Tue, 26 Jan 2016 17:56:43 +0000 (+0200) Subject: dsync: When comparing headers' hashes to match messages, try to normalize the input. X-Git-Tag: 2.2.22.rc1~274 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=45af47783693b3ba2768c5ad34eeff68132382d0;p=thirdparty%2Fdovecot%2Fcore.git dsync: When comparing headers' hashes to match messages, try to normalize the input. This is especially useful because some IMAP servers return different data depending on whether we're fetching only specific header fields, all headers or entire body. For now we're assuming that any non-ASCII is going to be replaced with '?', which helps at least with Zimbra and Yahoo. The header hashing algorithm is now versionable, so it can be modified more easily in future. This change should make imapc_features=zimbra-workarounds setting obsolete. --- diff --git a/src/doveadm/dsync/dsync-brain-mailbox.c b/src/doveadm/dsync/dsync-brain-mailbox.c index a6c6ee7855..5dadf978c8 100644 --- a/src/doveadm/dsync/dsync-brain-mailbox.c +++ b/src/doveadm/dsync/dsync-brain-mailbox.c @@ -218,6 +218,8 @@ dsync_brain_sync_mailbox_init_remote(struct dsync_brain *brain, import_flags |= DSYNC_MAILBOX_IMPORT_FLAG_MAILS_USE_GUID128; if (brain->no_notify) import_flags |= DSYNC_MAILBOX_IMPORT_FLAG_NO_NOTIFY; + if (brain->hdr_hash_v2) + import_flags |= DSYNC_MAILBOX_IMPORT_FLAG_HDR_HASH_V2; brain->box_importer = brain->backup_send ? NULL : dsync_mailbox_import_init(brain->box, brain->virtual_all_box, @@ -318,6 +320,8 @@ int dsync_brain_sync_mailbox_open(struct dsync_brain *brain, exporter_flags |= DSYNC_MAILBOX_EXPORTER_FLAG_MINIMAL_DMAIL_FILL; if (brain->sync_since_timestamp > 0) exporter_flags |= DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS; + if (brain->hdr_hash_v2) + exporter_flags |= DSYNC_MAILBOX_EXPORTER_FLAG_HDR_HASH_V2; brain->box_exporter = brain->backup_recv ? NULL : dsync_mailbox_export_init(brain->box, brain->log_scan, diff --git a/src/doveadm/dsync/dsync-brain-private.h b/src/doveadm/dsync/dsync-brain-private.h index a6be2222dd..bba7184186 100644 --- a/src/doveadm/dsync/dsync-brain-private.h +++ b/src/doveadm/dsync/dsync-brain-private.h @@ -112,6 +112,7 @@ struct dsync_brain { unsigned int require_full_resync:1; unsigned int verbose_proctitle:1; unsigned int no_notify:1; + unsigned int hdr_hash_v2:1; unsigned int failed:1; }; diff --git a/src/doveadm/dsync/dsync-brain.c b/src/doveadm/dsync/dsync-brain.c index 5f6a50ebf6..915d4f23f7 100644 --- a/src/doveadm/dsync/dsync-brain.c +++ b/src/doveadm/dsync/dsync-brain.c @@ -233,6 +233,7 @@ dsync_brain_master_init(struct mail_user *user, struct dsync_ibc *ibc, memcpy(ibc_set.sync_box_guid, set->sync_box_guid, sizeof(ibc_set.sync_box_guid)); ibc_set.sync_type = sync_type; + ibc_set.hdr_hash_v2 = TRUE; ibc_set.lock_timeout = set->lock_timeout_secs; /* reverse the backup direction for the slave */ ibc_set.brain_flags = flags & ~(DSYNC_BRAIN_FLAG_BACKUP_SEND | @@ -267,6 +268,7 @@ dsync_brain_slave_init(struct mail_user *user, struct dsync_ibc *ibc, } memset(&ibc_set, 0, sizeof(ibc_set)); + ibc_set.hdr_hash_v2 = TRUE; ibc_set.hostname = my_hostdomain(); dsync_ibc_send_handshake(ibc, &ibc_set); @@ -430,6 +432,7 @@ static bool dsync_brain_master_recv_handshake(struct dsync_brain *brain) return FALSE; } } + brain->hdr_hash_v2 = ibc_set->hdr_hash_v2; brain->state = brain->sync_type == DSYNC_BRAIN_SYNC_TYPE_STATE ? DSYNC_STATE_MASTER_SEND_LAST_COMMON : @@ -447,6 +450,7 @@ static bool dsync_brain_slave_recv_handshake(struct dsync_brain *brain) if (dsync_ibc_recv_handshake(brain->ibc, &ibc_set) == 0) return FALSE; + brain->hdr_hash_v2 = ibc_set->hdr_hash_v2; if (ibc_set->lock_timeout > 0) { brain->lock_timeout = ibc_set->lock_timeout; diff --git a/src/doveadm/dsync/dsync-ibc-stream.c b/src/doveadm/dsync/dsync-ibc-stream.c index d6557e2ceb..526f9d5bf5 100644 --- a/src/doveadm/dsync/dsync-ibc-stream.c +++ b/src/doveadm/dsync/dsync-ibc-stream.c @@ -26,12 +26,13 @@ #define DSYNC_IBC_STREAM_OUTBUF_THROTTLE_SIZE (1024*128) #define DSYNC_PROTOCOL_VERSION_MAJOR 3 -#define DSYNC_PROTOCOL_VERSION_MINOR 3 -#define DSYNC_HANDSHAKE_VERSION "VERSION\tdsync\t3\t3\n" +#define DSYNC_PROTOCOL_VERSION_MINOR 4 +#define DSYNC_HANDSHAKE_VERSION "VERSION\tdsync\t3\t4\n" #define DSYNC_PROTOCOL_MINOR_HAVE_ATTRIBUTES 1 #define DSYNC_PROTOCOL_MINOR_HAVE_SAVE_GUID 2 #define DSYNC_PROTOCOL_MINOR_HAVE_FINISH 3 +#define DSYNC_PROTOCOL_MINOR_HAVE_HDR_HASH_V2 4 enum item_type { ITEM_NONE, @@ -826,6 +827,7 @@ dsync_ibc_stream_recv_handshake(struct dsync_ibc *_ibc, set->brain_flags |= DSYNC_BRAIN_FLAG_PURGE_REMOTE; if (dsync_deserializer_decode_try(decoder, "no_notify", &value)) set->brain_flags |= DSYNC_BRAIN_FLAG_NO_NOTIFY; + set->hdr_hash_v2 = ibc->minor_version >= DSYNC_PROTOCOL_MINOR_HAVE_HDR_HASH_V2; *set_r = set; return DSYNC_IBC_RECV_RET_OK; diff --git a/src/doveadm/dsync/dsync-ibc.h b/src/doveadm/dsync/dsync-ibc.h index 18a577457a..d35d9211ec 100644 --- a/src/doveadm/dsync/dsync-ibc.h +++ b/src/doveadm/dsync/dsync-ibc.h @@ -63,6 +63,7 @@ struct dsync_ibc_settings { enum dsync_brain_sync_type sync_type; enum dsync_brain_flags brain_flags; + bool hdr_hash_v2; unsigned int lock_timeout; }; diff --git a/src/doveadm/dsync/dsync-mail.c b/src/doveadm/dsync/dsync-mail.c index dc64833e8f..fd3cde0db5 100644 --- a/src/doveadm/dsync/dsync-mail.c +++ b/src/doveadm/dsync/dsync-mail.c @@ -24,7 +24,44 @@ dsync_mail_get_hash_headers(struct mailbox *box) return mailbox_header_lookup_init(box, hashed_headers); } -int dsync_mail_get_hdr_hash(struct mail *mail, const char **hdr_hash_r) +static void +dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version, + const unsigned char *data, size_t size) +{ + size_t i, start; + + i_assert(version == 1 || version == 2); + + if (version == 1) { + md5_update(md5_ctx, data, size); + return; + } + /* - Dovecot IMAP replaces NULs with 0x80 character. + - Dovecot POP3 with outlook-no-nuls workaround replaces NULs + with 0x80 character. + - Zimbra replaces 8bit chars with '?' in header fetches, + but not body fetches. + - Yahoo replaces 8bit chars with '?' in partial header + fetches, but not POP3 TOP. + + So we'll just replace all control and 8bit chars with '?', + which hopefully will satisfy everybody. + + (Keep this code in sync with pop3-migration plugin.) + */ + for (i = start = 0; i < size; i++) { + if ((data[i] < 0x20 || data[i] >= 0x80) && + (data[i] != '\t' && data[i] != '\n')) { + md5_update(md5_ctx, data + start, i-start); + md5_update(md5_ctx, "?", 1); + start = i+1; + } + } + md5_update(md5_ctx, data + start, i-start); +} + +int dsync_mail_get_hdr_hash(struct mail *mail, unsigned int version, + const char **hdr_hash_r) { struct istream *hdr_input, *input; struct mailbox_header_lookup_ctx *hdr_ctx; @@ -48,7 +85,7 @@ int dsync_mail_get_hdr_hash(struct mail *mail, const char **hdr_hash_r) break; if (size == 0) break; - md5_update(&md5_ctx, data, size); + dsync_mail_hash_more(&md5_ctx, version, data, size); i_stream_skip(input, size); } if (input->stream_errno != 0) diff --git a/src/doveadm/dsync/dsync-mail.h b/src/doveadm/dsync/dsync-mail.h index cdd4167c5c..8dfbb3075c 100644 --- a/src/doveadm/dsync/dsync-mail.h +++ b/src/doveadm/dsync/dsync-mail.h @@ -85,7 +85,8 @@ struct dsync_mail_change { struct mailbox_header_lookup_ctx * dsync_mail_get_hash_headers(struct mailbox *box); -int dsync_mail_get_hdr_hash(struct mail *mail, const char **hdr_hash_r); +int dsync_mail_get_hdr_hash(struct mail *mail, unsigned int version, + const char **hdr_hash_r); int dsync_mail_fill(struct mail *mail, bool minimal_fill, struct dsync_mail *dmail_r, const char **error_field_r); int dsync_mail_fill_nonminimal(struct mail *mail, struct dsync_mail *dmail_r, diff --git a/src/doveadm/dsync/dsync-mailbox-export.c b/src/doveadm/dsync/dsync-mailbox-export.c index 50288cbfb7..d08e0e1fce 100644 --- a/src/doveadm/dsync/dsync-mailbox-export.c +++ b/src/doveadm/dsync/dsync-mailbox-export.c @@ -28,6 +28,7 @@ struct dsync_mailbox_exporter { struct mailbox_transaction_context *trans; struct mail_search_context *search_ctx; unsigned int search_pos, search_count; + unsigned int hdr_hash_version; /* GUID => instances */ HASH_TABLE(char *, struct dsync_mail_guid_instances *) export_guids; @@ -162,7 +163,7 @@ exporter_get_guids(struct dsync_mailbox_exporter *exporter, if (!exporter->mails_have_guids) { /* get header hash also */ - if (dsync_mail_get_hdr_hash(mail, hdr_hash_r) < 0) + if (dsync_mail_get_hdr_hash(mail, exporter->hdr_hash_version, hdr_hash_r) < 0) return dsync_mail_error(exporter, mail, "hdr-stream"); return 1; } else if (**guid_r == '\0') { @@ -502,6 +503,8 @@ dsync_mailbox_export_init(struct mailbox *box, (flags & DSYNC_MAILBOX_EXPORTER_FLAG_MINIMAL_DMAIL_FILL) != 0; exporter->export_received_timestamps = (flags & DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS) != 0; + exporter->hdr_hash_version = + (flags & DSYNC_MAILBOX_EXPORTER_FLAG_HDR_HASH_V2) ? 2 : 1; p_array_init(&exporter->requested_uids, pool, 16); p_array_init(&exporter->search_uids, pool, 16); hash_table_create(&exporter->export_guids, pool, 0, str_hash, strcmp); diff --git a/src/doveadm/dsync/dsync-mailbox-export.h b/src/doveadm/dsync/dsync-mailbox-export.h index ed7cd30733..fffeedbbb2 100644 --- a/src/doveadm/dsync/dsync-mailbox-export.h +++ b/src/doveadm/dsync/dsync-mailbox-export.h @@ -5,7 +5,8 @@ enum dsync_mailbox_exporter_flags { DSYNC_MAILBOX_EXPORTER_FLAG_AUTO_EXPORT_MAILS = 0x01, DSYNC_MAILBOX_EXPORTER_FLAG_MAILS_HAVE_GUIDS = 0x02, DSYNC_MAILBOX_EXPORTER_FLAG_MINIMAL_DMAIL_FILL = 0x04, - DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS = 0x08 + DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS = 0x08, + DSYNC_MAILBOX_EXPORTER_FLAG_HDR_HASH_V2 = 0x10 }; struct dsync_mailbox_exporter * diff --git a/src/doveadm/dsync/dsync-mailbox-import.c b/src/doveadm/dsync/dsync-mailbox-import.c index bc2abe383c..98de1356d4 100644 --- a/src/doveadm/dsync/dsync-mailbox-import.c +++ b/src/doveadm/dsync/dsync-mailbox-import.c @@ -63,6 +63,7 @@ struct dsync_mailbox_importer { uint64_t remote_highest_modseq, remote_highest_pvt_modseq; time_t sync_since_timestamp; enum mailbox_transaction_flags transaction_flags; + unsigned int hdr_hash_version; enum mail_flags sync_flag; const char *sync_keyword; @@ -255,6 +256,8 @@ dsync_mailbox_import_init(struct mailbox *box, (flags & DSYNC_MAILBOX_IMPORT_FLAG_MAILS_HAVE_GUIDS) != 0; importer->mails_use_guid128 = (flags & DSYNC_MAILBOX_IMPORT_FLAG_MAILS_USE_GUID128) != 0; + importer->hdr_hash_version = + (flags & DSYNC_MAILBOX_IMPORT_FLAG_HDR_HASH_V2) != 0 ? 2 : 1; mailbox_get_open_status(importer->box, STATUS_UIDNEXT | STATUS_HIGHESTMODSEQ | STATUS_HIGHESTPVTMODSEQ, @@ -601,6 +604,7 @@ importer_try_next_mail(struct dsync_mailbox_importer *importer, } } else { if (dsync_mail_get_hdr_hash(importer->cur_mail, + importer->hdr_hash_version, &hdr_hash) < 0) { dsync_mail_error(importer, importer->cur_mail, "header hash"); @@ -1483,7 +1487,8 @@ dsync_mailbox_import_match_msg(struct dsync_mailbox_importer *importer, return -1; } - if (dsync_mail_get_hdr_hash(importer->cur_mail, &hdr_hash) < 0) { + if (dsync_mail_get_hdr_hash(importer->cur_mail, + importer->hdr_hash_version, &hdr_hash) < 0) { dsync_mail_error(importer, importer->cur_mail, "hdr-stream"); *result_r = "Error fetching header stream"; return -1; diff --git a/src/doveadm/dsync/dsync-mailbox-import.h b/src/doveadm/dsync/dsync-mailbox-import.h index c4a473720c..aaa302bdfc 100644 --- a/src/doveadm/dsync/dsync-mailbox-import.h +++ b/src/doveadm/dsync/dsync-mailbox-import.h @@ -10,7 +10,8 @@ enum dsync_mailbox_import_flags { DSYNC_MAILBOX_IMPORT_FLAG_DEBUG = 0x08, DSYNC_MAILBOX_IMPORT_FLAG_MAILS_HAVE_GUIDS = 0x10, DSYNC_MAILBOX_IMPORT_FLAG_MAILS_USE_GUID128 = 0x20, - DSYNC_MAILBOX_IMPORT_FLAG_NO_NOTIFY = 0x40 + DSYNC_MAILBOX_IMPORT_FLAG_NO_NOTIFY = 0x40, + DSYNC_MAILBOX_IMPORT_FLAG_HDR_HASH_V2 = 0x80 }; struct mailbox; diff --git a/src/plugins/pop3-migration/pop3-migration-plugin.c b/src/plugins/pop3-migration/pop3-migration-plugin.c index 3a31c4b214..791423d711 100644 --- a/src/plugins/pop3-migration/pop3-migration-plugin.c +++ b/src/plugins/pop3-migration/pop3-migration-plugin.c @@ -207,6 +207,8 @@ int pop3_migration_get_hdr_sha1(uint32_t mail_seq, struct istream *input, So we'll just replace all control and 8bit chars with '?', which hopefully will satisfy everybody. + + (Keep this code in sync with dsync.) */ for (i = start = 0; i < size; i++) { if ((data[i] < 0x20 || data[i] >= 0x80) &&