From 7546f7e92120dd1c668cdb64bf441f28cdf9fd42 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Fri, 6 May 2022 17:53:13 +0100 Subject: [PATCH] parser: Ignore CFWS in In-Reply-To, References headers RFC2822 states that [1] a comment or folding white space is permitted to be inserted before or after a msg-id in in the Message-ID, In-Reply-To or References fields. Allow for this. [1] https://tools.ietf.org/html/rfc2822#section-3.6.4 Signed-off-by: Stephen Finucane Reviewed-by: DJ Delorie Closes: #399 --- patchwork/parser.py | 13 ++-- patchwork/tests/test_parser.py | 74 +++++++++++++++++-- .../notes/issue-399-09d6f17aa54b14b2.yaml | 11 +++ 3 files changed, 84 insertions(+), 14 deletions(-) create mode 100644 releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml diff --git a/patchwork/parser.py b/patchwork/parser.py index 026da4ec..779a519f 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -31,6 +31,7 @@ from patchwork.models import SeriesReference from patchwork.models import State +_msgid_re = re.compile(r'<[^>]+>') _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') _filename_re = re.compile(r'^(---|\+\+\+) (\S+)') list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list'] @@ -502,19 +503,15 @@ def find_references(mail): if 'In-Reply-To' in mail: for in_reply_to in mail.get_all('In-Reply-To'): - r = clean_header(in_reply_to) - if r: - refs.append(r) + ref = _msgid_re.search(clean_header(in_reply_to)) + if ref: + refs.append(ref.group(0)) if 'References' in mail: for references_header in mail.get_all('References'): - h = clean_header(references_header) - if not h: - continue - references = h.split() + references = _msgid_re.findall(clean_header(references_header)) references.reverse() for ref in references: - ref = ref.strip() if ref not in refs: refs.append(ref) diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py index c22974a4..b03e49d4 100644 --- a/patchwork/tests/test_parser.py +++ b/patchwork/tests/test_parser.py @@ -74,6 +74,7 @@ def _create_email( sender=None, listid=None, in_reply_to=None, + references=None, headers=None, ): msg['Message-Id'] = msgid or make_msgid() @@ -84,6 +85,9 @@ def _create_email( if in_reply_to: msg['In-Reply-To'] = in_reply_to + if references: + msg['References'] = references + for header in headers or {}: msg[header] = headers[header] @@ -97,12 +101,20 @@ def create_email( sender=None, listid=None, in_reply_to=None, + references=None, headers=None, ): msg = MIMEText(content, _charset='us-ascii') return _create_email( - msg, msgid, subject, sender, listid, in_reply_to, headers + msg, + msgid, + subject, + sender, + listid, + in_reply_to, + references, + headers, ) @@ -1290,7 +1302,7 @@ class DuplicateMailTest(TestCase): def test_duplicate_patch(self): diff = read_patch('0001-add-line.patch') - m = create_email(diff, listid=self.listid, msgid='1@example.com') + m = create_email(diff, listid=self.listid, msgid='<1@example.com>') self._test_duplicate_mail(m) @@ -1298,14 +1310,18 @@ class DuplicateMailTest(TestCase): def test_duplicate_comment(self): diff = read_patch('0001-add-line.patch') - m1 = create_email(diff, listid=self.listid, msgid='1@example.com') + m1 = create_email( + diff, + listid=self.listid, + msgid='<1@example.com>', + ) _parse_mail(m1) m2 = create_email( 'test', listid=self.listid, - msgid='2@example.com', - in_reply_to='1@example.com', + msgid='<2@example.com>', + in_reply_to='<1@example.com>', ) self._test_duplicate_mail(m2) @@ -1313,7 +1329,7 @@ class DuplicateMailTest(TestCase): self.assertEqual(PatchComment.objects.count(), 1) def test_duplicate_coverletter(self): - m = create_email('test', listid=self.listid, msgid='1@example.com') + m = create_email('test', listid=self.listid, msgid='<1@example.com>') del m['Subject'] m['Subject'] = '[PATCH 0/1] test cover letter' @@ -1322,6 +1338,52 @@ class DuplicateMailTest(TestCase): self.assertEqual(Cover.objects.count(), 1) +class TestFindReferences(TestCase): + def test_find_references__header_with_comments(self): + """Test that we strip comments from References, In-Reply-To fields.""" + in_reply_to = ( + '<4574b99b-edac-d8dc-9141-79c3109d2fcc@huawei.com> (message from\n' + ' liqingqing on Thu, 1 Apr 2021 16:51:45 +0800)' + ) + email = create_email('test', in_reply_to=in_reply_to) + + expected = ['<4574b99b-edac-d8dc-9141-79c3109d2fcc@huawei.com>'] + actual = parser.find_references(email) + + self.assertEqual(expected, actual) + + def test_find_references__duplicate_references(self): + """Test that we ignore duplicate message IDs in 'References'.""" + message_id = '<20130510114450.7104c5d2@nehalam.linuxnetplumber.net>' + in_reply_to = ( + '<525534677.5312512.1368202896189.JavaMail.root@vmware.com>' + ) + references = ( + '\n' # noqa: E501 + ' \n' # noqa: E501 + ' <1676591087.5291867.1368201908283.JavaMail.root@vmware.com>\n' + ' <20130510091549.3c064df6@nehalam.linuxnetplumber.net>\n' + ' <525534677.5312512.1368202896189.JavaMail.root@vmware.com>' + ) + email = create_email( + 'test', + msgid=message_id, + in_reply_to=in_reply_to, + references=references, + ) + + expected = [ + '<525534677.5312512.1368202896189.JavaMail.root@vmware.com>', + '<20130510091549.3c064df6@nehalam.linuxnetplumber.net>', + '<1676591087.5291867.1368201908283.JavaMail.root@vmware.com>', + '', # noqa: E501 + '', + ] + actual = parser.find_references(email) + + self.assertEqual(expected, actual) + + class TestCommentCorrelation(TestCase): def test_find_patch_for_comment__no_reply(self): """Test behavior for mails that don't match anything we have.""" diff --git a/releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml b/releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml new file mode 100644 index 00000000..e925a606 --- /dev/null +++ b/releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + Comments and whitespace are now correctly stripped from the ``Message-ID``, + ``In-Reply-To``, and ``References`` headers. One side effect of this change + is that the parser is now stricter with regards to the format of the + ``msg-id`` component of these headers: all identifiers must now be + surrounded by angle brackets, e.g. ````. This is + mandated in the spec and a review of mailing lists archives suggest it is + broadly adhered to. Without these markers, there is no way to delimit + ``msg-id`` from any surrounding comments and whitespace. -- 2.47.3