]> git.ipfire.org Git - thirdparty/patchwork.git/commitdiff
parse(mail|archive): handle early fail within email module
authorDaniel Axtens <dja@axtens.net>
Sat, 1 Jul 2017 04:28:42 +0000 (14:28 +1000)
committerStephen Finucane <stephen@that.guru>
Mon, 3 Jul 2017 19:42:00 +0000 (20:42 +0100)
Certain really messed up email messages can cause a failure within
the email module (at least on py3). Catch this.

Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Stephen Finucane <stephen@that.guru>
patchwork/management/commands/parsearchive.py
patchwork/management/commands/parsemail.py

index 4e102a988e76908c9a0325a678e3d73b2cb7fcb2..3eee8382e389bf503264345c7325207e29d09e0b 100644 (file)
@@ -77,6 +77,23 @@ class Command(BaseCommand):
 
         count = len(mbox)
 
+        # Iterate through the mbox. This will pick up exceptions that are only
+        # thrown when a broken email is found part way through. Without this
+        # block, we'd get the exception thrown in enumerate(mbox) below, which
+        # is harder to catch. This is due to a bug in the Python 'email'
+        # library, as described here:
+        #
+        #   https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html
+        #
+        # The alternative is converting the mbox to a list of messages, but
+        # that requires holding the entire thing in memory, which is wateful.
+        try:
+            for m in mbox:
+                pass
+        except AttributeError:
+            logger.warning('Broken mbox/Maildir, aborting')
+            return
+
         logger.info('Parsing %d mails', count)
         for i, msg in enumerate(mbox):
             try:
index 9adfb25b09e3ba14cfd669c63d8a4f67e243fc88..52ec8bc568999ba4d3e421ac3476cf1eeade820d 100644 (file)
@@ -58,20 +58,25 @@ class Command(base.BaseCommand):
     def handle(self, *args, **options):
         infile = args[0] if args else options['infile']
 
-        if infile:
-            logger.info('Parsing mail loaded by filename')
-            if six.PY3:
-                with open(infile, 'rb') as file_:
-                    mail = email.message_from_binary_file(file_)
-            else:
-                with open(infile) as file_:
-                    mail = email.message_from_file(file_)
-        else:
-            logger.info('Parsing mail loaded from stdin')
-            if six.PY3:
-                mail = email.message_from_binary_file(sys.stdin.buffer)
+        try:
+            if infile:
+                logger.info('Parsing mail loaded by filename')
+                if six.PY3:
+                    with open(infile, 'rb') as file_:
+                        mail = email.message_from_binary_file(file_)
+                else:
+                    with open(infile) as file_:
+                        mail = email.message_from_file(file_)
             else:
-                mail = email.message_from_file(sys.stdin)
+                logger.info('Parsing mail loaded from stdin')
+                if six.PY3:
+                    mail = email.message_from_binary_file(sys.stdin.buffer)
+                else:
+                    mail = email.message_from_file(sys.stdin)
+        except AttributeError:
+            logger.warning("Broken email ignored")
+            return
+
         try:
             result = parse_mail(mail, options['list_id'])
             if result: