From cd8f14fb03adf57f2c4c53de2e318af67be94047 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Fri, 6 May 2022 17:53:13 +0100 Subject: [PATCH] parser: Ignore CFWS in In-Reply-To, References headers RFC2822 states that [1] a comment or folding white space is permitted to be inserted before or after a msg-id in in the Message-ID, In-Reply-To or References fields. Allow for this. [1] https://tools.ietf.org/html/rfc2822#section-3.6.4 Conflicts: patchwork/tests/test_parser.py NOTE(stephenfin): Conflicts are due to the absence of commits f5cd52144c ("parser: Add 'X-Patchwork-Action-Required' header") and 94d75a1c78 ("Blackify code"), neither of which we want to backport. Signed-off-by: Stephen Finucane Reviewed-by: DJ Delorie Closes: #399 --- patchwork/parser.py | 13 +-- patchwork/tests/test_parser.py | 98 +++++++++++++++++-- .../notes/issue-399-09d6f17aa54b14b2.yaml | 11 +++ 3 files changed, 105 insertions(+), 17 deletions(-) create mode 100644 releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml diff --git a/patchwork/parser.py b/patchwork/parser.py index 61a81246..11aa47e9 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -31,6 +31,7 @@ from patchwork.models import SeriesReference from patchwork.models import State +_msgid_re = re.compile(r'<[^>]+>') _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') _filename_re = re.compile(r'^(---|\+\+\+) (\S+)') list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list'] @@ -483,19 +484,15 @@ def find_references(mail): if 'In-Reply-To' in mail: for in_reply_to in mail.get_all('In-Reply-To'): - r = clean_header(in_reply_to) - if r: - refs.append(r) + ref = _msgid_re.search(clean_header(in_reply_to)) + if ref: + refs.append(ref.group(0)) if 'References' in mail: for references_header in mail.get_all('References'): - h = clean_header(references_header) - if not h: - continue - references = h.split() + references = _msgid_re.findall(clean_header(references_header)) references.reverse() for ref in references: - ref = ref.strip() if ref not in refs: refs.append(ref) diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py index eaf6599c..0c594f66 100644 --- a/patchwork/tests/test_parser.py +++ b/patchwork/tests/test_parser.py @@ -68,7 +68,15 @@ def read_mail(filename, project=None): return mail -def _create_email(msg, msgid=None, sender=None, listid=None, in_reply_to=None): +def _create_email( + msg, + msgid=None, + subject=None, + sender=None, + listid=None, + in_reply_to=None, + references=None, +): msg['Message-Id'] = msgid or make_msgid() msg['Subject'] = 'Test subject' msg['From'] = sender or 'Test Author ' @@ -76,14 +84,32 @@ def _create_email(msg, msgid=None, sender=None, listid=None, in_reply_to=None): if in_reply_to: msg['In-Reply-To'] = in_reply_to + if references: + msg['References'] = references + return msg -def create_email(content, msgid=None, sender=None, listid=None, - in_reply_to=None): +def create_email( + content, + msgid=None, + subject=None, + sender=None, + listid=None, + in_reply_to=None, + references=None, +): msg = MIMEText(content, _charset='us-ascii') - return _create_email(msg, msgid, sender, listid, in_reply_to) + return _create_email( + msg, + msgid, + subject, + sender, + listid, + in_reply_to, + references, + ) def parse_mail(*args, **kwargs): @@ -1146,7 +1172,7 @@ class DuplicateMailTest(TestCase): def test_duplicate_patch(self): diff = read_patch('0001-add-line.patch') - m = create_email(diff, listid=self.listid, msgid='1@example.com') + m = create_email(diff, listid=self.listid, msgid='<1@example.com>') self._test_duplicate_mail(m) @@ -1154,18 +1180,26 @@ class DuplicateMailTest(TestCase): def test_duplicate_comment(self): diff = read_patch('0001-add-line.patch') - m1 = create_email(diff, listid=self.listid, msgid='1@example.com') + m1 = create_email( + diff, + listid=self.listid, + msgid='<1@example.com>', + ) _parse_mail(m1) - m2 = create_email('test', listid=self.listid, msgid='2@example.com', - in_reply_to='1@example.com') + m2 = create_email( + 'test', + listid=self.listid, + msgid='<2@example.com>', + in_reply_to='<1@example.com>', + ) self._test_duplicate_mail(m2) self.assertEqual(Patch.objects.count(), 1) self.assertEqual(PatchComment.objects.count(), 1) def test_duplicate_coverletter(self): - m = create_email('test', listid=self.listid, msgid='1@example.com') + m = create_email('test', listid=self.listid, msgid='<1@example.com>') del m['Subject'] m['Subject'] = '[PATCH 0/1] test cover letter' @@ -1174,6 +1208,52 @@ class DuplicateMailTest(TestCase): self.assertEqual(Cover.objects.count(), 1) +class TestFindReferences(TestCase): + def test_find_references__header_with_comments(self): + """Test that we strip comments from References, In-Reply-To fields.""" + in_reply_to = ( + '<4574b99b-edac-d8dc-9141-79c3109d2fcc@huawei.com> (message from\n' + ' liqingqing on Thu, 1 Apr 2021 16:51:45 +0800)' + ) + email = create_email('test', in_reply_to=in_reply_to) + + expected = ['<4574b99b-edac-d8dc-9141-79c3109d2fcc@huawei.com>'] + actual = parser.find_references(email) + + self.assertEqual(expected, actual) + + def test_find_references__duplicate_references(self): + """Test that we ignore duplicate message IDs in 'References'.""" + message_id = '<20130510114450.7104c5d2@nehalam.linuxnetplumber.net>' + in_reply_to = ( + '<525534677.5312512.1368202896189.JavaMail.root@vmware.com>' + ) + references = ( + '\n' # noqa: E501 + ' \n' # noqa: E501 + ' <1676591087.5291867.1368201908283.JavaMail.root@vmware.com>\n' + ' <20130510091549.3c064df6@nehalam.linuxnetplumber.net>\n' + ' <525534677.5312512.1368202896189.JavaMail.root@vmware.com>' + ) + email = create_email( + 'test', + msgid=message_id, + in_reply_to=in_reply_to, + references=references, + ) + + expected = [ + '<525534677.5312512.1368202896189.JavaMail.root@vmware.com>', + '<20130510091549.3c064df6@nehalam.linuxnetplumber.net>', + '<1676591087.5291867.1368201908283.JavaMail.root@vmware.com>', + '', # noqa: E501 + '', + ] + actual = parser.find_references(email) + + self.assertEqual(expected, actual) + + class TestCommentCorrelation(TestCase): def test_find_patch_for_comment__no_reply(self): diff --git a/releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml b/releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml new file mode 100644 index 00000000..e925a606 --- /dev/null +++ b/releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + Comments and whitespace are now correctly stripped from the ``Message-ID``, + ``In-Reply-To``, and ``References`` headers. One side effect of this change + is that the parser is now stricter with regards to the format of the + ``msg-id`` component of these headers: all identifiers must now be + surrounded by angle brackets, e.g. ````. This is + mandated in the spec and a review of mailing lists archives suggest it is + broadly adhered to. Without these markers, there is no way to delimit + ``msg-id`` from any surrounding comments and whitespace. -- 2.47.3