From 19aad6096a13603c088461dcc7cf534289b8479a Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Wed, 19 Sep 2018 21:03:33 +0100 Subject: [PATCH] parser: Handle IntegrityError for cover letters, comments This was already done for patches but cover letters and comments were not handled correctly, resulting in errors while parsing archives. While we're here, we slightly modify how these exceptions are handle. Rather than simply ignoring them, as we were doing, we raise a custom exception. This allows us to specifically identify these types of exceptions, print a log and still skip them (which we want, as seen in commit d2eb1f6d2). While we're here, we change from separate create-save calls to a combined create-save call for both created CoverLetter and Comment objects. We were already doing this for patches. Signed-off-by: Stephen Finucane (cherry picked from commit 300ba3eb283ab1af3dc5924e5ed540159c3ee0bc) --- patchwork/management/commands/parsearchive.py | 15 ++++-- patchwork/management/commands/parsemail.py | 8 ++- patchwork/parser.py | 50 ++++++++++++------- 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py index 5468d35e..bffb4647 100644 --- a/patchwork/management/commands/parsearchive.py +++ b/patchwork/management/commands/parsearchive.py @@ -26,6 +26,7 @@ from django.core.management.base import BaseCommand from patchwork import models from patchwork.parser import parse_mail +from patchwork.parser import DuplicateMailError logger = logging.getLogger(__name__) @@ -48,6 +49,7 @@ class Command(BaseCommand): models.CoverLetter: 0, models.Comment: 0, } + duplicates = 0 dropped = 0 errors = 0 @@ -90,10 +92,13 @@ class Command(BaseCommand): results[type(obj)] += 1 else: dropped += 1 - except ValueError: - # TODO(stephenfin): Perhaps we should store the broken patch - # somewhere for future reference? + except DuplicateMailError as exc: + duplicates += 1 + logger.warning('Duplicate mail for message ID %s', exc.msgid) + except (ValueError, Exception) as exc: errors += 1 + logger.warning('Invalid mail: %s', exc.message, + extra={'mail': mail.as_string()}) if (i % 10) == 0: self.stdout.write('%06d/%06d\r' % (i, count), ending='') @@ -104,6 +109,7 @@ class Command(BaseCommand): ' %(covers)4d cover letters\n' ' %(patches)4d patches\n' ' %(comments)4d comments\n' + ' %(duplicates)4d duplicates\n' ' %(dropped)4d dropped\n' ' %(errors)4d errors\n' 'Total: %(new)s new entries' % { @@ -111,8 +117,9 @@ class Command(BaseCommand): 'covers': results[models.CoverLetter], 'patches': results[models.Patch], 'comments': results[models.Comment], + 'duplicates': duplicates, 'dropped': dropped, 'errors': errors, - 'new': count - dropped - errors, + 'new': count - duplicates - dropped - errors, }) mbox.close() diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py index f62fb4f5..9098047d 100644 --- a/patchwork/management/commands/parsemail.py +++ b/patchwork/management/commands/parsemail.py @@ -25,6 +25,7 @@ from django.core.management import base from django.utils import six from patchwork.parser import parse_mail +from patchwork.parser import DuplicateMailError logger = logging.getLogger(__name__) @@ -79,7 +80,10 @@ class Command(base.BaseCommand): result = parse_mail(mail, options['list_id']) if result is None: logger.warning('Nothing added to database') - except Exception: - logger.exception('Error when parsing incoming email', + except DuplicateMailError as exc: + logger.warning('Duplicate mail for message ID %s', exc.msgid) + except (ValueError, Exception) as exc: + logger.exception('Error when parsing incoming email: %s', + exc.message, extra={'mail': mail.as_string()}) sys.exit(1) diff --git a/patchwork/parser.py b/patchwork/parser.py index 01bdd9f9..1304e95e 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -64,6 +64,12 @@ EXTENDED_HEADER_LINES = ( logger = logging.getLogger(__name__) +class DuplicateMailError(Exception): + + def __init__(self, msgid): + self.msgid = msgid + + def normalise_space(value): whitespace_re = re.compile(r'\s+') return whitespace_re.sub(' ', value).strip() @@ -1036,8 +1042,7 @@ def parse_mail(mail, list_id=None): state=find_state(mail)) logger.debug('Patch saved') except IntegrityError: - logger.error("Duplicate mail for message ID %s" % msgid) - return None + raise DuplicateMailError(msgid=msgid) # if we don't have a series marker, we will never have an existing # series to match against. @@ -1143,15 +1148,18 @@ def parse_mail(mail, list_id=None): logger.error("Multiple SeriesReferences for %s" " in project %s!" % (msgid, project.name)) - cover_letter = CoverLetter( - msgid=msgid, - project=project, - name=name[:255], - date=date, - headers=headers, - submitter=author, - content=message) - cover_letter.save() + try: + cover_letter = CoverLetter.objects.create( + msgid=msgid, + project=project, + name=name[:255], + date=date, + headers=headers, + submitter=author, + content=message) + except IntegrityError: + raise DuplicateMailError(msgid=msgid) + logger.debug('Cover letter saved') series.add_cover_letter(cover_letter) @@ -1167,14 +1175,18 @@ def parse_mail(mail, list_id=None): author = get_or_create_author(mail) - comment = Comment( - submission=submission, - msgid=msgid, - date=date, - headers=headers, - submitter=author, - content=message) - comment.save() + + try: + comment = Comment.objects.create( + submission=submission, + msgid=msgid, + date=date, + headers=headers, + submitter=author, + content=message) + except IntegrityError: + raise DuplicateMailError(msgid=msgid) + logger.debug('Comment saved') return comment -- 2.47.3