]> git.ipfire.org Git - thirdparty/patchwork.git/commitdiff
parser: Ignore CFWS in In-Reply-To, References headers
authorStephen Finucane <stephen@that.guru>
Fri, 6 May 2022 16:53:13 +0000 (17:53 +0100)
committerStephen Finucane <stephen@that.guru>
Mon, 9 May 2022 09:45:39 +0000 (10:45 +0100)
RFC2822 states that [1] a comment or folding white space is permitted to
be inserted before or after a msg-id in in the Message-ID, In-Reply-To
or References fields. Allow for this.

[1] https://tools.ietf.org/html/rfc2822#section-3.6.4

Signed-off-by: Stephen Finucane <stephen@that.guru>
Reviewed-by: DJ Delorie <dj@redhat.com>
Closes: #399
patchwork/parser.py
patchwork/tests/test_parser.py
releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml [new file with mode: 0644]

index 026da4ecde9019a863413beb12e9a0340b272743..779a519f2d205b20570311b7b21cf22bd1dd607b 100644 (file)
@@ -31,6 +31,7 @@ from patchwork.models import SeriesReference
 from patchwork.models import State
 
 
+_msgid_re = re.compile(r'<[^>]+>')
 _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
 _filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
 list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list']
@@ -502,19 +503,15 @@ def find_references(mail):
 
     if 'In-Reply-To' in mail:
         for in_reply_to in mail.get_all('In-Reply-To'):
-            r = clean_header(in_reply_to)
-            if r:
-                refs.append(r)
+            ref = _msgid_re.search(clean_header(in_reply_to))
+            if ref:
+                refs.append(ref.group(0))
 
     if 'References' in mail:
         for references_header in mail.get_all('References'):
-            h = clean_header(references_header)
-            if not h:
-                continue
-            references = h.split()
+            references = _msgid_re.findall(clean_header(references_header))
             references.reverse()
             for ref in references:
-                ref = ref.strip()
                 if ref not in refs:
                     refs.append(ref)
 
index c22974a4934728efd086911cd05fe465c0bdc4b9..b03e49d43772a0ef68eb6767b463393a0c1da9fd 100644 (file)
@@ -74,6 +74,7 @@ def _create_email(
     sender=None,
     listid=None,
     in_reply_to=None,
+    references=None,
     headers=None,
 ):
     msg['Message-Id'] = msgid or make_msgid()
@@ -84,6 +85,9 @@ def _create_email(
     if in_reply_to:
         msg['In-Reply-To'] = in_reply_to
 
+    if references:
+        msg['References'] = references
+
     for header in headers or {}:
         msg[header] = headers[header]
 
@@ -97,12 +101,20 @@ def create_email(
     sender=None,
     listid=None,
     in_reply_to=None,
+    references=None,
     headers=None,
 ):
     msg = MIMEText(content, _charset='us-ascii')
 
     return _create_email(
-        msg, msgid, subject, sender, listid, in_reply_to, headers
+        msg,
+        msgid,
+        subject,
+        sender,
+        listid,
+        in_reply_to,
+        references,
+        headers,
     )
 
 
@@ -1290,7 +1302,7 @@ class DuplicateMailTest(TestCase):
 
     def test_duplicate_patch(self):
         diff = read_patch('0001-add-line.patch')
-        m = create_email(diff, listid=self.listid, msgid='1@example.com')
+        m = create_email(diff, listid=self.listid, msgid='<1@example.com>')
 
         self._test_duplicate_mail(m)
 
@@ -1298,14 +1310,18 @@ class DuplicateMailTest(TestCase):
 
     def test_duplicate_comment(self):
         diff = read_patch('0001-add-line.patch')
-        m1 = create_email(diff, listid=self.listid, msgid='1@example.com')
+        m1 = create_email(
+            diff,
+            listid=self.listid,
+            msgid='<1@example.com>',
+        )
         _parse_mail(m1)
 
         m2 = create_email(
             'test',
             listid=self.listid,
-            msgid='2@example.com',
-            in_reply_to='1@example.com',
+            msgid='<2@example.com>',
+            in_reply_to='<1@example.com>',
         )
         self._test_duplicate_mail(m2)
 
@@ -1313,7 +1329,7 @@ class DuplicateMailTest(TestCase):
         self.assertEqual(PatchComment.objects.count(), 1)
 
     def test_duplicate_coverletter(self):
-        m = create_email('test', listid=self.listid, msgid='1@example.com')
+        m = create_email('test', listid=self.listid, msgid='<1@example.com>')
         del m['Subject']
         m['Subject'] = '[PATCH 0/1] test cover letter'
 
@@ -1322,6 +1338,52 @@ class DuplicateMailTest(TestCase):
         self.assertEqual(Cover.objects.count(), 1)
 
 
+class TestFindReferences(TestCase):
+    def test_find_references__header_with_comments(self):
+        """Test that we strip comments from References, In-Reply-To fields."""
+        in_reply_to = (
+            '<4574b99b-edac-d8dc-9141-79c3109d2fcc@huawei.com> (message from\n'
+            ' liqingqing on Thu, 1 Apr 2021 16:51:45 +0800)'
+        )
+        email = create_email('test', in_reply_to=in_reply_to)
+
+        expected = ['<4574b99b-edac-d8dc-9141-79c3109d2fcc@huawei.com>']
+        actual = parser.find_references(email)
+
+        self.assertEqual(expected, actual)
+
+    def test_find_references__duplicate_references(self):
+        """Test that we ignore duplicate message IDs in 'References'."""
+        message_id = '<20130510114450.7104c5d2@nehalam.linuxnetplumber.net>'
+        in_reply_to = (
+            '<525534677.5312512.1368202896189.JavaMail.root@vmware.com>'
+        )
+        references = (
+            '<AFCFCEB8EB0E24448E4EB95988BA1E531FA2EA0D@xmb-aln-x05.cisco.com>\n'  # noqa: E501
+            ' <CAE68AUOr7B5a2QvduJhH0kEHPi+sR9X3qfrtumgLxT1BK4VS+Q@mail.gmail.com>\n'  # noqa: E501
+            ' <1676591087.5291867.1368201908283.JavaMail.root@vmware.com>\n'
+            ' <20130510091549.3c064df6@nehalam.linuxnetplumber.net>\n'
+            ' <525534677.5312512.1368202896189.JavaMail.root@vmware.com>'
+        )
+        email = create_email(
+            'test',
+            msgid=message_id,
+            in_reply_to=in_reply_to,
+            references=references,
+        )
+
+        expected = [
+            '<525534677.5312512.1368202896189.JavaMail.root@vmware.com>',
+            '<20130510091549.3c064df6@nehalam.linuxnetplumber.net>',
+            '<1676591087.5291867.1368201908283.JavaMail.root@vmware.com>',
+            '<CAE68AUOr7B5a2QvduJhH0kEHPi+sR9X3qfrtumgLxT1BK4VS+Q@mail.gmail.com>',  # noqa: E501
+            '<AFCFCEB8EB0E24448E4EB95988BA1E531FA2EA0D@xmb-aln-x05.cisco.com>',
+        ]
+        actual = parser.find_references(email)
+
+        self.assertEqual(expected, actual)
+
+
 class TestCommentCorrelation(TestCase):
     def test_find_patch_for_comment__no_reply(self):
         """Test behavior for mails that don't match anything we have."""
diff --git a/releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml b/releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml
new file mode 100644 (file)
index 0000000..e925a60
--- /dev/null
@@ -0,0 +1,11 @@
+---
+fixes:
+  - |
+    Comments and whitespace are now correctly stripped from the ``Message-ID``,
+    ``In-Reply-To``, and ``References`` headers. One side effect of this change
+    is that the parser is now stricter with regards to the format of the
+    ``msg-id`` component of these headers: all identifiers must now be
+    surrounded by angle brackets, e.g. ``<abcdef@example.com>``. This is
+    mandated in the spec and a review of mailing lists archives suggest it is
+    broadly adhered to. Without these markers, there is no way to delimit
+    ``msg-id`` from any surrounding comments and whitespace.