]> git.ipfire.org Git - thirdparty/patchwork.git/commitdiff
parsearchive: Handle duplicates
authorStephen Finucane <stephen.finucane@intel.com>
Thu, 17 Dec 2015 17:29:32 +0000 (17:29 +0000)
committerStephen Finucane <stephen.finucane@intel.com>
Thu, 17 Dec 2015 17:30:27 +0000 (17:30 +0000)
The parsearchive tool can be used to load missing messages sourced
from mailman or another source. In this use case, there's a good
possibility that at least some of the messages found in the archive
are already stored in patchwork. Handle this case by ignoring these
duplicates.

Signed-off-by: Stephen Finucane <stephen.finucane@intel.com>
patchwork/bin/parsearchive.py

index f8792155cfe07479cebbaf7fd326737876d593cc..33cb5cb5fb148794e86ec9c2c9498617734783cd 100755 (executable)
@@ -31,6 +31,8 @@ import django
 
 from patchwork.bin import parsemail
 
+LOGGER = logging.getLogger(__name__)
+
 VERBOSITY_LEVELS = {
     'debug': logging.DEBUG,
     'info': logging.INFO,
@@ -42,8 +44,14 @@ VERBOSITY_LEVELS = {
 
 def parse_mbox(path, list_id):
     mbox = mailbox.mbox(path)
+    duplicates = 0
     for msg in mbox:
-        parsemail.parse_mail(msg, list_id)
+        try:
+            parsemail.parse_mail(msg, list_id)
+        except django.db.utils.IntegrityError:
+            duplicates += 1
+    LOGGER.info('Processed %d messages, %d duplicates',
+                len(mbox), duplicates)
 
 
 def main():