]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
dsync: Improved header hash v2 algorithm to remove repeated '?' chars.
authorTimo Sirainen <timo.sirainen@dovecot.fi>
Thu, 28 Jan 2016 18:47:02 +0000 (20:47 +0200)
committerTimo Sirainen <timo.sirainen@dovecot.fi>
Thu, 28 Jan 2016 18:48:33 +0000 (20:48 +0200)
This is to help with Yahoo that replaces UTF-8 chars in headers with a
single '?' (instead of '?' per each 8bit byte).

src/doveadm/dsync/Makefile.am
src/doveadm/dsync/dsync-mail.c
src/doveadm/dsync/dsync-mail.h
src/doveadm/dsync/test-dsync-mail.c [new file with mode: 0644]

index 16d88bf962a999212e3cf266fdd77fb62b6f1c2e..99349175b259151efe8edd344648cc8fa0f6a417 100644 (file)
@@ -58,6 +58,7 @@ noinst_HEADERS = \
        dsync-transaction-log-scan.h
 
 test_programs = \
+       test-dsync-mail \
        test-dsync-mailbox-tree-sync
 
 noinst_PROGRAMS = $(test_programs)
@@ -66,6 +67,10 @@ test_libs = \
        ../../lib-test/libtest.la \
        ../../lib/liblib.la
 
+test_dsync_mail_SOURCES = test-dsync-mail.c
+test_dsync_mail_LDADD = $(pkglib_LTLIBRARIES) $(test_libs)
+test_dsync_mail_DEPENDENCIES = $(pkglib_LTLIBRARIES) $(test_libs)
+
 test_dsync_mailbox_tree_sync_SOURCES = test-dsync-mailbox-tree-sync.c
 test_dsync_mailbox_tree_sync_LDADD = dsync-mailbox-tree-sync.lo dsync-mailbox-tree.lo $(test_libs)
 test_dsync_mailbox_tree_sync_DEPENDENCIES = $(pkglib_LTLIBRARIES) $(test_libs)
index fd3cde0db5ca6a8b8e517c21581769721596ab52..7550a57a85d2dfebee0a25647161024098911036 100644 (file)
@@ -24,9 +24,8 @@ dsync_mail_get_hash_headers(struct mailbox *box)
        return mailbox_header_lookup_init(box, hashed_headers);
 }
 
-static void
-dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version,
-                    const unsigned char *data, size_t size)
+void dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version,
+                         const unsigned char *data, size_t size)
 {
        size_t i, start;
 
@@ -42,18 +41,22 @@ dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version,
           - Zimbra replaces 8bit chars with '?' in header fetches,
           but not body fetches.
           - Yahoo replaces 8bit chars with '?' in partial header
-          fetches, but not POP3 TOP.
+          fetches, but not POP3 TOP. UTF-8 character sequence writes only a
+          single '?'
 
-          So we'll just replace all control and 8bit chars with '?',
-          which hopefully will satisfy everybody.
+          So we'll just replace all control and 8bit chars with '?' and
+          remove any repeated '?', which hopefully will satisfy everybody.
 
           (Keep this code in sync with pop3-migration plugin.)
           */
        for (i = start = 0; i < size; i++) {
-               if ((data[i] < 0x20 || data[i] >= 0x80) &&
+               if ((data[i] < 0x20 || data[i] >= 0x7f || data[i] == '?') &&
                    (data[i] != '\t' && data[i] != '\n')) {
-                       md5_update(md5_ctx, data + start, i-start);
-                       md5_update(md5_ctx, "?", 1);
+                       /* remove repeated '?' */
+                       if (start < i || i == 0) {
+                               md5_update(md5_ctx, data + start, i-start);
+                               md5_update(md5_ctx, "?", 1);
+                       }
                        start = i+1;
                }
        }
index 8dfbb3075ce219c33ce07e4d77eb4154790c8fc1..0b1ce9f1f99b12c4ebdb55e0bd5facd34fa04372 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "mail-types.h"
 
+struct md5_context;
 struct mail;
 struct mailbox;
 
@@ -95,4 +96,8 @@ int dsync_mail_fill_nonminimal(struct mail *mail, struct dsync_mail *dmail_r,
 void dsync_mail_change_dup(pool_t pool, const struct dsync_mail_change *src,
                           struct dsync_mail_change *dest_r);
 
+/* private: */
+void dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version,
+                         const unsigned char *data, size_t size);
+
 #endif
diff --git a/src/doveadm/dsync/test-dsync-mail.c b/src/doveadm/dsync/test-dsync-mail.c
new file mode 100644 (file)
index 0000000..a35ee64
--- /dev/null
@@ -0,0 +1,40 @@
+/* Copyright (c) 2016 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "md5.h"
+#include "dsync-mail.h"
+#include "test-common.h"
+
+static const unsigned char test_input[] =
+       "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+       "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+       "\x20!?x??yz\x7f\x80\x90\xff-plop\xff";
+static const unsigned char test_output[] =
+       "?\t\n? !?x?yz?-plop?";
+
+static void test_dsync_mail_hash_more(void)
+{
+       struct md5_context md5_ctx;
+       unsigned char md5_input[MD5_RESULTLEN], md5_output[MD5_RESULTLEN];
+
+       test_begin("dsync_mail_hash_more v2");
+       md5_init(&md5_ctx);
+       dsync_mail_hash_more(&md5_ctx, 2, test_input, sizeof(test_input)-1);
+       md5_final(&md5_ctx, md5_input);
+
+       md5_init(&md5_ctx);
+       md5_update(&md5_ctx, test_output, sizeof(test_output)-1);
+       md5_final(&md5_ctx, md5_output);
+
+       test_assert(memcmp(md5_input, md5_output, MD5_RESULTLEN) == 0);
+       test_end();
+}
+
+int main(void)
+{
+       static void (*test_functions[])(void) = {
+               test_dsync_mail_hash_more,
+               NULL
+       };
+       return test_run(test_functions);
+}