From c2cfced9c8351661e943e89a26f8453a815d323a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Sat, 1 Jul 2017 14:28:42 +1000 Subject: [PATCH] parse(mail|archive): handle early fail within email module Certain really messed up email messages can cause a failure within the email module (at least on py3). Catch this. Signed-off-by: Daniel Axtens Signed-off-by: Stephen Finucane --- patchwork/management/commands/parsearchive.py | 17 ++++++++++ patchwork/management/commands/parsemail.py | 31 +++++++++++-------- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py index 4e102a98..3eee8382 100644 --- a/patchwork/management/commands/parsearchive.py +++ b/patchwork/management/commands/parsearchive.py @@ -77,6 +77,23 @@ class Command(BaseCommand): count = len(mbox) + # Iterate through the mbox. This will pick up exceptions that are only + # thrown when a broken email is found part way through. Without this + # block, we'd get the exception thrown in enumerate(mbox) below, which + # is harder to catch. This is due to a bug in the Python 'email' + # library, as described here: + # + # https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html + # + # The alternative is converting the mbox to a list of messages, but + # that requires holding the entire thing in memory, which is wateful. + try: + for m in mbox: + pass + except AttributeError: + logger.warning('Broken mbox/Maildir, aborting') + return + logger.info('Parsing %d mails', count) for i, msg in enumerate(mbox): try: diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py index 9adfb25b..52ec8bc5 100644 --- a/patchwork/management/commands/parsemail.py +++ b/patchwork/management/commands/parsemail.py @@ -58,20 +58,25 @@ class Command(base.BaseCommand): def handle(self, *args, **options): infile = args[0] if args else options['infile'] - if infile: - logger.info('Parsing mail loaded by filename') - if six.PY3: - with open(infile, 'rb') as file_: - mail = email.message_from_binary_file(file_) - else: - with open(infile) as file_: - mail = email.message_from_file(file_) - else: - logger.info('Parsing mail loaded from stdin') - if six.PY3: - mail = email.message_from_binary_file(sys.stdin.buffer) + try: + if infile: + logger.info('Parsing mail loaded by filename') + if six.PY3: + with open(infile, 'rb') as file_: + mail = email.message_from_binary_file(file_) + else: + with open(infile) as file_: + mail = email.message_from_file(file_) else: - mail = email.message_from_file(sys.stdin) + logger.info('Parsing mail loaded from stdin') + if six.PY3: + mail = email.message_from_binary_file(sys.stdin.buffer) + else: + mail = email.message_from_file(sys.stdin) + except AttributeError: + logger.warning("Broken email ignored") + return + try: result = parse_mail(mail, options['list_id']) if result: -- 2.47.3