]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
doveadm: Added deduplicate command.
authorTimo Sirainen <tss@iki.fi>
Tue, 18 Jun 2013 14:05:20 +0000 (17:05 +0300)
committerTimo Sirainen <tss@iki.fi>
Tue, 18 Jun 2013 14:05:20 +0000 (17:05 +0300)
By default it deduplicates only by GUIDs. With -m parameter it deduplicates
by Message-Id: header.

src/doveadm/Makefile.am
src/doveadm/doveadm-mail-deduplicate.c [new file with mode: 0644]
src/doveadm/doveadm-mail.c
src/doveadm/doveadm-mail.h

index 30f48d4274900c1b8c71754037be49f919f6b23d..99e297b8ced7a862b2435879f2c265ba27fdb473 100644 (file)
@@ -62,6 +62,7 @@ common = \
        doveadm-mail.c \
        doveadm-mail-altmove.c \
        doveadm-mail-batch.c \
+       doveadm-mail-deduplicate.c \
        doveadm-mail-expunge.c \
        doveadm-mail-fetch.c \
        doveadm-mail-flags.c \
diff --git a/src/doveadm/doveadm-mail-deduplicate.c b/src/doveadm/doveadm-mail-deduplicate.c
new file mode 100644 (file)
index 0000000..daa76d1
--- /dev/null
@@ -0,0 +1,203 @@
+/* Copyright (c) 2013 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "hash.h"
+#include "mail-storage.h"
+#include "mail-search-build.h"
+#include "doveadm-mailbox-list-iter.h"
+#include "doveadm-mail-iter.h"
+#include "doveadm-mail.h"
+
+struct uidlist {
+       struct uidlist *next;
+       uint32_t uid;
+};
+
+struct deduplicate_cmd_context {
+       struct doveadm_mail_cmd_context ctx;
+       bool by_msgid;
+};
+
+static int cmd_deduplicate_uidlist(struct mailbox *box, struct uidlist *uidlist)
+{
+       struct mailbox_transaction_context *trans;
+       struct mail_search_context *search_ctx;
+       struct mail_search_args *search_args;
+       struct mail_search_arg *arg;
+       struct mail *mail;
+       ARRAY_TYPE(seq_range) uids;
+       int ret = 0;
+
+       /* the uidlist is reversed with oldest mails at the end.
+          we'll delete everything but the oldest mail. */
+       if (uidlist->next == NULL)
+               return 0;
+
+       t_array_init(&uids, 8);
+       for (; uidlist->next != NULL; uidlist = uidlist->next)
+               seq_range_array_add(&uids, uidlist->uid);
+
+       search_args = mail_search_build_init();
+       arg = mail_search_build_add(search_args, SEARCH_UIDSET);
+       arg->value.seqset = uids;
+
+       trans = mailbox_transaction_begin(box, 0);
+       search_ctx = mailbox_search_init(trans, search_args, NULL, 0, NULL);
+       mail_search_args_unref(&search_args);
+
+       while (mailbox_search_next(search_ctx, &mail))
+               mail_expunge(mail);
+       if (mailbox_search_deinit(&search_ctx) < 0)
+               ret = -1;
+       if (mailbox_transaction_commit(&trans) < 0)
+               ret = -1;
+       return ret;
+}
+
+static int
+cmd_deduplicate_box(struct doveadm_mail_cmd_context *_ctx,
+                   const struct mailbox_info *info,
+                   struct mail_search_args *search_args)
+{
+       struct deduplicate_cmd_context *ctx =
+               (struct deduplicate_cmd_context *)_ctx;
+       struct doveadm_mail_iter *iter;
+       struct mailbox *box;
+       struct mail *mail;
+       enum mail_error error;
+       pool_t pool;
+       HASH_TABLE(const char *, struct uidlist *) hash;
+       const char *key, *errstr;
+       struct uidlist *value;
+       int ret = 0;
+
+       if (doveadm_mail_iter_init(_ctx, info, search_args, 0, NULL,
+                                  &iter) < 0)
+               return -1;
+
+       pool = pool_alloconly_create("deduplicate", 10240);
+       hash_table_create(&hash, pool, 0, str_hash, strcmp);
+       while (doveadm_mail_iter_next(iter, &mail)) {
+               if (ctx->by_msgid) {
+                       if (mail_get_first_header(mail, "Message-ID", &key) < 0) {
+                               errstr = mailbox_get_last_error(box, &error);
+                               if (error == MAIL_ERROR_NOTFOUND)
+                                       continue;
+                               i_error("Couldn't lookup Message-ID: for UID=%u: %s",
+                                       mail->uid, errstr);
+                               ret = -1;
+                               break;
+                       }
+               } else {
+                       if (mail_get_special(mail, MAIL_FETCH_GUID, &key) < 0) {
+                               errstr = mailbox_get_last_error(box, &error);
+                               if (error == MAIL_ERROR_NOTFOUND)
+                                       continue;
+                               i_error("Couldn't lookup GUID: for UID=%u: %s",
+                                       mail->uid, errstr);
+                               ret = -1;
+                               break;
+                       }
+               }
+               if (key != NULL && *key != '\0') {
+                       value = p_new(pool, struct uidlist, 1);
+                       value->uid = mail->uid;
+                       value->next = hash_table_lookup(hash, key);
+
+                       if (value->next == NULL) {
+                               key = p_strdup(pool, key);
+                               hash_table_insert(hash, key, value);
+                       } else {
+                               hash_table_update(hash, key, value);
+                       }
+               }
+       }
+
+       if (doveadm_mail_iter_deinit_keep_box(&iter, &box) < 0)
+               ret = -1;
+
+       if (ret == 0) {
+               struct hash_iterate_context *iter;
+
+               iter = hash_table_iterate_init(hash);
+               while (hash_table_iterate(iter, hash, &key, &value)) {
+                       T_BEGIN {
+                               if (cmd_deduplicate_uidlist(box, value) < 0)
+                                       ret = -1;
+                       } T_END;
+               }
+               hash_table_iterate_deinit(&iter);
+       }
+
+       hash_table_destroy(&hash);
+       pool_unref(&pool);
+
+       if (mailbox_sync(box, 0) < 0) {
+               doveadm_mail_failed_mailbox(_ctx, box);
+               ret = -1;
+       }
+       mailbox_free(&box);
+       return ret;
+}
+
+static int
+cmd_deduplicate_run(struct doveadm_mail_cmd_context *ctx, struct mail_user *user)
+{
+       const enum mailbox_list_iter_flags iter_flags =
+               MAILBOX_LIST_ITER_NO_AUTO_BOXES |
+               MAILBOX_LIST_ITER_RETURN_NO_FLAGS;
+       struct doveadm_mailbox_list_iter *iter;
+       const struct mailbox_info *info;
+       int ret = 0;
+
+       iter = doveadm_mailbox_list_iter_init(ctx, user, ctx->search_args,
+                                             iter_flags);
+       while ((info = doveadm_mailbox_list_iter_next(iter)) != NULL) T_BEGIN {
+               if (cmd_deduplicate_box(ctx, info, ctx->search_args) < 0)
+                       ret = -1;
+       } T_END;
+       if (doveadm_mailbox_list_iter_deinit(&iter) < 0)
+               ret = -1;
+       return ret;
+}
+
+static void cmd_deduplicate_init(struct doveadm_mail_cmd_context *ctx,
+                                const char *const args[])
+{
+       if (args[0] == NULL)
+               doveadm_mail_help_name("deduplicate");
+
+       ctx->search_args = doveadm_mail_build_search_args(args);
+}
+
+static bool
+cmd_deduplicate_parse_arg(struct doveadm_mail_cmd_context *_ctx, int c)
+{
+       struct deduplicate_cmd_context *ctx =
+               (struct deduplicate_cmd_context *)_ctx;
+
+       switch (c) {
+       case 'm':
+               ctx->by_msgid = TRUE;
+               break;
+       default:
+               return FALSE;
+       }
+       return TRUE;
+}
+
+static struct doveadm_mail_cmd_context *cmd_deduplicate_alloc(void)
+{
+       struct deduplicate_cmd_context *ctx;
+
+       ctx = doveadm_mail_cmd_alloc(struct deduplicate_cmd_context);
+       ctx->ctx.getopt_args = "m";
+       ctx->ctx.v.parse_arg = cmd_deduplicate_parse_arg;
+       ctx->ctx.v.init = cmd_deduplicate_init;
+       ctx->ctx.v.run = cmd_deduplicate_run;
+       return &ctx->ctx;
+}
+
+struct doveadm_mail_cmd cmd_deduplicate = {
+       cmd_deduplicate_alloc, "deduplicate", "[-m] <search query>"
+};
index 5f5fc2e73b519f9ec319a31b1052301da786292d..c955eaa504e3bf6c8fe428e08cab2e6d6bcd41c5 100644 (file)
@@ -699,6 +699,7 @@ static struct doveadm_mail_cmd *mail_commands[] = {
        &cmd_index,
        &cmd_altmove,
        &cmd_copy,
+       &cmd_deduplicate,
        &cmd_move,
        &cmd_mailbox_list,
        &cmd_mailbox_create,
index 415a614407de833e5531e8433117c2ba961b37d3..ce07bdf1598ba041894b6f94012cba38aec3b72e 100644 (file)
@@ -145,6 +145,7 @@ extern struct doveadm_mail_cmd cmd_import;
 extern struct doveadm_mail_cmd cmd_index;
 extern struct doveadm_mail_cmd cmd_altmove;
 extern struct doveadm_mail_cmd cmd_copy;
+extern struct doveadm_mail_cmd cmd_deduplicate;
 extern struct doveadm_mail_cmd cmd_move;
 extern struct doveadm_mail_cmd cmd_mailbox_list;
 extern struct doveadm_mail_cmd cmd_mailbox_create;