count = len(mbox)
+ # Iterate through the mbox. This will pick up exceptions that are only
+ # thrown when a broken email is found part way through. Without this
+ # block, we'd get the exception thrown in enumerate(mbox) below, which
+ # is harder to catch. This is due to a bug in the Python 'email'
+ # library, as described here:
+ #
+ # https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html
+ #
+ # The alternative is converting the mbox to a list of messages, but
+ # that requires holding the entire thing in memory, which is wateful.
+ try:
+ for m in mbox:
+ pass
+ except AttributeError:
+ logger.warning('Broken mbox/Maildir, aborting')
+ return
+
logger.info('Parsing %d mails', count)
for i, msg in enumerate(mbox):
try:
def handle(self, *args, **options):
infile = args[0] if args else options['infile']
- if infile:
- logger.info('Parsing mail loaded by filename')
- if six.PY3:
- with open(infile, 'rb') as file_:
- mail = email.message_from_binary_file(file_)
- else:
- with open(infile) as file_:
- mail = email.message_from_file(file_)
- else:
- logger.info('Parsing mail loaded from stdin')
- if six.PY3:
- mail = email.message_from_binary_file(sys.stdin.buffer)
+ try:
+ if infile:
+ logger.info('Parsing mail loaded by filename')
+ if six.PY3:
+ with open(infile, 'rb') as file_:
+ mail = email.message_from_binary_file(file_)
+ else:
+ with open(infile) as file_:
+ mail = email.message_from_file(file_)
else:
- mail = email.message_from_file(sys.stdin)
+ logger.info('Parsing mail loaded from stdin')
+ if six.PY3:
+ mail = email.message_from_binary_file(sys.stdin.buffer)
+ else:
+ mail = email.message_from_file(sys.stdin)
+ except AttributeError:
+ logger.warning("Broken email ignored")
+ return
+
try:
result = parse_mail(mail, options['list_id'])
if result: