From: Stephen Finucane Date: Thu, 17 Dec 2015 17:29:32 +0000 (+0000) Subject: parsearchive: Handle duplicates X-Git-Tag: v1.1.0~82 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8001be4ac2b92569942919299ff61447ca04478c;p=thirdparty%2Fpatchwork.git parsearchive: Handle duplicates The parsearchive tool can be used to load missing messages sourced from mailman or another source. In this use case, there's a good possibility that at least some of the messages found in the archive are already stored in patchwork. Handle this case by ignoring these duplicates. Signed-off-by: Stephen Finucane --- diff --git a/patchwork/bin/parsearchive.py b/patchwork/bin/parsearchive.py index f8792155..33cb5cb5 100755 --- a/patchwork/bin/parsearchive.py +++ b/patchwork/bin/parsearchive.py @@ -31,6 +31,8 @@ import django from patchwork.bin import parsemail +LOGGER = logging.getLogger(__name__) + VERBOSITY_LEVELS = { 'debug': logging.DEBUG, 'info': logging.INFO, @@ -42,8 +44,14 @@ VERBOSITY_LEVELS = { def parse_mbox(path, list_id): mbox = mailbox.mbox(path) + duplicates = 0 for msg in mbox: - parsemail.parse_mail(msg, list_id) + try: + parsemail.parse_mail(msg, list_id) + except django.db.utils.IntegrityError: + duplicates += 1 + LOGGER.info('Processed %d messages, %d duplicates', + len(mbox), duplicates) def main():