From: Daniel Axtens Date: Sat, 1 Jul 2017 04:28:42 +0000 (+1000) Subject: parse(mail|archive): handle early fail within email module X-Git-Tag: v2.0.0~11 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c2cfced9c8351661e943e89a26f8453a815d323a;p=thirdparty%2Fpatchwork.git parse(mail|archive): handle early fail within email module Certain really messed up email messages can cause a failure within the email module (at least on py3). Catch this. Signed-off-by: Daniel Axtens Signed-off-by: Stephen Finucane --- diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py index 4e102a98..3eee8382 100644 --- a/patchwork/management/commands/parsearchive.py +++ b/patchwork/management/commands/parsearchive.py @@ -77,6 +77,23 @@ class Command(BaseCommand): count = len(mbox) + # Iterate through the mbox. This will pick up exceptions that are only + # thrown when a broken email is found part way through. Without this + # block, we'd get the exception thrown in enumerate(mbox) below, which + # is harder to catch. This is due to a bug in the Python 'email' + # library, as described here: + # + # https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html + # + # The alternative is converting the mbox to a list of messages, but + # that requires holding the entire thing in memory, which is wateful. + try: + for m in mbox: + pass + except AttributeError: + logger.warning('Broken mbox/Maildir, aborting') + return + logger.info('Parsing %d mails', count) for i, msg in enumerate(mbox): try: diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py index 9adfb25b..52ec8bc5 100644 --- a/patchwork/management/commands/parsemail.py +++ b/patchwork/management/commands/parsemail.py @@ -58,20 +58,25 @@ class Command(base.BaseCommand): def handle(self, *args, **options): infile = args[0] if args else options['infile'] - if infile: - logger.info('Parsing mail loaded by filename') - if six.PY3: - with open(infile, 'rb') as file_: - mail = email.message_from_binary_file(file_) - else: - with open(infile) as file_: - mail = email.message_from_file(file_) - else: - logger.info('Parsing mail loaded from stdin') - if six.PY3: - mail = email.message_from_binary_file(sys.stdin.buffer) + try: + if infile: + logger.info('Parsing mail loaded by filename') + if six.PY3: + with open(infile, 'rb') as file_: + mail = email.message_from_binary_file(file_) + else: + with open(infile) as file_: + mail = email.message_from_file(file_) else: - mail = email.message_from_file(sys.stdin) + logger.info('Parsing mail loaded from stdin') + if six.PY3: + mail = email.message_from_binary_file(sys.stdin.buffer) + else: + mail = email.message_from_file(sys.stdin) + except AttributeError: + logger.warning("Broken email ignored") + return + try: result = parse_mail(mail, options['list_id']) if result: