From: Stephen Finucane Date: Sat, 29 Oct 2016 13:13:35 +0000 (+0100) Subject: parser: Add series parsing X-Git-Tag: v2.0.0-rc1~179 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0cd464569b267e3459da7a143cf38c6024c512b4;p=thirdparty%2Fpatchwork.git parser: Add series parsing It is now possible to parse and store series, so do just that. The parsing at the moment is based on both RFC822 headers and subject lines. Signed-off-by: Stephen Finucane Reviewed-by: Andy Doan Reviewed-by: Daniel Axtens Reviewed-by: Andrew Donnellan Tested-by: Russell Currey --- diff --git a/patchwork/parser.py b/patchwork/parser.py index 15476b12..a4036ff1 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -21,8 +21,10 @@ import codecs import datetime -from email.header import decode_header, make_header -from email.utils import parsedate_tz, mktime_tz +from email.header import decode_header +from email.header import make_header +from email.utils import mktime_tz +from email.utils import parsedate_tz from fnmatch import fnmatch import logging import re @@ -30,9 +32,17 @@ import re from django.contrib.auth.models import User from django.utils import six -from patchwork.models import (Patch, Project, Person, Comment, State, - DelegationRule, Submission, CoverLetter, - get_default_initial_patch_state) +from patchwork.models import Comment +from patchwork.models import CoverLetter +from patchwork.models import DelegationRule +from patchwork.models import get_default_initial_patch_state +from patchwork.models import Patch +from patchwork.models import Person +from patchwork.models import Project +from patchwork.models import Series +from patchwork.models import SeriesReference +from patchwork.models import State +from patchwork.models import Submission _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') @@ -162,6 +172,42 @@ def find_project_by_header(mail): return project +def find_series(mail): + """Find a patch's series. + + Traverse RFC822 headers, starting with most recent first, to find + ancestors and the related series. Headers are traversed in reverse + to handle series sent in reply to previous series, like so: + + [PATCH 0/3] A cover letter + [PATCH 1/3] The first patch + ... + [PATCH v2 0/3] A cover letter + [PATCH v2 1/3] The first patch + ... + + This means we evaluate headers like so: + + - first, check for a Series that directly matches this message's + Message-ID + - then, check for a series that matches the In-Reply-To + - then, check for a series that matches the References, from most + recent (the patch's closest ancestor) to least recent + + Args: + mail (email.message.Message): The mail to extract series from + + Returns: + The matching ``Series`` instance, if any + """ + for ref in [mail.get('Message-Id')] + find_references(mail): + # try parsing by RFC5322 fields first + try: + return SeriesReference.objects.get(msgid=ref).series + except SeriesReference.DoesNotExist: + pass + + def find_author(mail): from_header = clean_header(mail.get('From')) name, email = (None, None) @@ -243,6 +289,13 @@ def find_references(mail): return refs +def _find_matching_prefix(subject_prefixes, regex): + for prefix in subject_prefixes: + m = regex.match(prefix) + if m: + return m + + def parse_series_marker(subject_prefixes): """Extract series markers from subject. @@ -258,14 +311,36 @@ def parse_series_marker(subject_prefixes): """ regex = re.compile('^([0-9]+)/([0-9]+)$') - for prefix in subject_prefixes: - m = regex.match(prefix) - if not m: - continue + m = _find_matching_prefix(subject_prefixes, regex) + if m: return (int(m.group(1)), int(m.group(2))) + return (None, None) +def parse_version(subject, subject_prefixes): + """Extract patch version. + + Args: + subject: Main body of subject line + subject_prefixes: List of subject prefixes to extract version + from + + Returns: + version if found, else 1 + """ + regex = re.compile('^[vV](\d+)$') + m = _find_matching_prefix(subject_prefixes, regex) + if m: + return int(m.group(1)) + + m = re.search(r'\([vV](\d+)\)', subject) + if m: + return int(m.group(1)) + + return 1 + + def find_content(project, mail): """Extract a comment and potential diff from a mail.""" patchbuf = None @@ -696,6 +771,7 @@ def parse_mail(mail, list_id=None): name, prefixes = clean_subject(subject, [project.linkname]) is_comment = subject_check(subject) x, n = parse_series_marker(prefixes) + version = parse_version(name, prefixes) refs = find_references(mail) date = find_date(mail) headers = find_headers(mail) @@ -712,6 +788,23 @@ def parse_mail(mail, list_id=None): filenames = find_filenames(diff) delegate = auto_delegate(project, filenames) + series = find_series(mail) + if not series and n: # the series markers indicates a series + series = Series(date=date, + submitter=author, + version=version, + total=n) + series.save() + + # NOTE(stephenfin) We must save references for series. We + # do this to handle the case where a later patch is + # received first. Without storing references, it would not + # be possible to identify the relationship between patches + # as the earlier patch does not reference the later one. + for ref in refs + [msgid]: + # we don't want duplicates + SeriesReference.objects.get_or_create(series=series, msgid=ref) + patch = Patch( msgid=msgid, project=project, @@ -727,6 +820,9 @@ def parse_mail(mail, list_id=None): patch.save() logger.debug('Patch saved') + if series: + series.add_patch(patch, x) + return patch elif x == 0: # (potential) cover letters # if refs are empty, it's implicitly a cover letter. If not, @@ -749,6 +845,28 @@ def parse_mail(mail, list_id=None): if is_cover_letter: author.save() + # we don't use 'find_series' here as a cover letter will + # always be the first item in a thread, thus the references + # could only point to a different series or unrelated + # message + try: + series = SeriesReference.objects.get(msgid=msgid).series + except SeriesReference.DoesNotExist: + series = None + + if not series: + series = Series(date=date, + submitter=author, + version=version, + total=n) + series.save() + + # we don't save the in-reply-to or references fields + # for a cover letter, as they can't refer to the same + # series + SeriesReference.objects.get_or_create(series=series, + msgid=msgid) + cover_letter = CoverLetter( msgid=msgid, project=project, @@ -760,6 +878,8 @@ def parse_mail(mail, list_id=None): cover_letter.save() logger.debug('Cover letter saved') + series.add_cover_letter(cover_letter) + return cover_letter # comments diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py index e16ffbc6..96166ad2 100644 --- a/patchwork/tests/test_parser.py +++ b/patchwork/tests/test_parser.py @@ -35,13 +35,16 @@ from patchwork.parser import clean_subject from patchwork.parser import find_author from patchwork.parser import find_content from patchwork.parser import find_project_by_header +from patchwork.parser import find_series from patchwork.parser import parse_mail as _parse_mail from patchwork.parser import parse_pull_request from patchwork.parser import parse_series_marker +from patchwork.parser import parse_version from patchwork.parser import split_prefixes from patchwork.parser import subject_check from patchwork.tests import TEST_MAIL_DIR from patchwork.tests.utils import create_project +from patchwork.tests.utils import create_series_reference from patchwork.tests.utils import create_state from patchwork.tests.utils import create_user from patchwork.tests.utils import read_patch @@ -328,6 +331,72 @@ class SenderCorrelationTest(TestCase): self.assertEqual(person_b.id, person_a.id) +class SeriesCorrelationTest(TestCase): + """Validate correct behavior of find_series.""" + + @staticmethod + def _create_email(msgid, references=None): + """Create a sample mail. + + Arguments: + msgid (str): The message's msgid + references (list): A list of preceding messages' msgids, + oldest first + """ + mail = 'Message-Id: %s\n' % msgid + \ + 'From: example user \n' + \ + 'Subject: Tests\n' + + if references: + mail += 'In-Reply-To: %s\n' % references[-1] + mail += 'References: %s\n' % '\n\t'.join(references) + + mail += 'test\n\n' + SAMPLE_DIFF + return message_from_string(mail) + + def test_new_series(self): + msgid = make_msgid() + email = self._create_email(msgid) + + self.assertIsNone(find_series(email)) + + def test_first_reply(self): + msgid_a = make_msgid() + msgid_b = make_msgid() + email = self._create_email(msgid_b, [msgid_a]) + + # assume msgid_a was already handled + ref = create_series_reference(msgid=msgid_a) + + series = find_series(email) + self.assertEqual(series, ref.series) + + def test_nested_series(self): + """Handle a series sent in-reply-to an existing series.""" + # create an old series with a "cover letter" + msgids = [make_msgid()] + ref_v1 = create_series_reference(msgid=msgids[0]) + + # ...and three patches + for i in range(3): + msgids.append(make_msgid()) + create_series_reference(msgid=msgids[-1], + series=ref_v1.series) + + # now create a new series with "cover letter" + msgids.append(make_msgid()) + ref_v2 = create_series_reference(msgid=msgids[-1]) + + # ...and the "first patch" of this new series + msgid = make_msgid() + email = self._create_email(msgid, msgids) + series = find_series(email) + + # this should link to the second series - not the first + self.assertEqual(len(msgids), 4 + 1) # old series + new cover + self.assertEqual(series, ref_v2.series) + + class SubjectEncodingTest(TestCase): """Validate correct handling of encoded subjects.""" @@ -692,24 +761,6 @@ class ParseInitialTagsTest(PatchTest): tag__name='Tested-by').count, 1) -class PrefixTest(TestCase): - - def test_split_prefixes(self): - self.assertEqual(split_prefixes('PATCH'), ['PATCH']) - self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) - self.assertEqual(split_prefixes(''), []) - self.assertEqual(split_prefixes('PATCH,'), ['PATCH']) - self.assertEqual(split_prefixes('PATCH '), ['PATCH']) - self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) - self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2']) - - def test_series_markers(self): - self.assertEqual(parse_series_marker([]), (None, None)) - self.assertEqual(parse_series_marker(['bar']), (None, None)) - self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2)) - self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12)) - - class SubjectTest(TestCase): def test_clean_subject(self): @@ -748,3 +799,28 @@ class SubjectTest(TestCase): self.assertIsNotNone(subject_check('RE meep')) self.assertIsNotNone(subject_check('Re meep')) self.assertIsNotNone(subject_check('re meep')) + + def test_split_prefixes(self): + self.assertEqual(split_prefixes('PATCH'), ['PATCH']) + self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) + self.assertEqual(split_prefixes(''), []) + self.assertEqual(split_prefixes('PATCH,'), ['PATCH']) + self.assertEqual(split_prefixes('PATCH '), ['PATCH']) + self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) + self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2']) + + def test_series_markers(self): + self.assertEqual(parse_series_marker([]), (None, None)) + self.assertEqual(parse_series_marker(['bar']), (None, None)) + self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2)) + self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12)) + + def test_version(self): + self.assertEqual(parse_version('', []), 1) + self.assertEqual(parse_version('Hello, world', []), 1) + self.assertEqual(parse_version('Hello, world', ['version']), 1) + self.assertEqual(parse_version('Hello, world', ['v2']), 2) + self.assertEqual(parse_version('Hello, world', ['V6']), 6) + self.assertEqual(parse_version('Hello, world', ['v10']), 10) + self.assertEqual(parse_version('Hello, world (v2)', []), 2) + self.assertEqual(parse_version('Hello, world (V6)', []), 6) diff --git a/patchwork/tests/utils.py b/patchwork/tests/utils.py index 23b0c87c..52cd0f9b 100644 --- a/patchwork/tests/utils.py +++ b/patchwork/tests/utils.py @@ -31,6 +31,8 @@ from patchwork.models import CoverLetter from patchwork.models import Patch from patchwork.models import Person from patchwork.models import Project +from patchwork.models import Series +from patchwork.models import SeriesReference from patchwork.models import State from patchwork.tests import TEST_PATCH_DIR @@ -217,6 +219,29 @@ def create_check(**kwargs): return Check.objects.create(**values) +def create_series(**kwargs): + """Create 'Series' object.""" + values = { + 'date': dt.now(), + 'submitter': create_person() if 'submitter' not in kwargs else None, + 'total': 1, + } + values.update(**kwargs) + + return Series.objects.create(**values) + + +def create_series_reference(**kwargs): + """Create 'SeriesReference' object.""" + values = { + 'series': create_series() if 'series' not in kwargs else None, + 'msgid': make_msgid(), + } + values.update(**kwargs) + + return SeriesReference.objects.create(**values) + + def _create_submissions(create_func, count=1, **kwargs): """Create 'count' Submission-based objects.