Ensure HTML is dropped as expected.
Signed-off-by: Stephen Finucane <stephen@that.guru>
--- /dev/null
+From yuri.volchkov@gmail.com Wed Jun 20 12:22:05 2018
+From: Yuri Volchkov <yuri.volchkov@gmail.com>
+To: patchwork@lists.ozlabs.org
+Cc: stephen@that.guru
+Subject: [PATCH] parsemail: ignore html part of multi-part comments
+Date: Wed, 20 Jun 2018 14:21:42 +0200
+Message-Id: <20180620122142.9917-1-yuri.volchkov@gmail.com>
+Content-Type: multipart/alternative; boundary="000000000000f93f23056f12c80c"
+
+
+--000000000000f93f23056f12c80c
+Content-Type: text/plain; charset="UTF-8"
+Content-Transfer-Encoding: 8bit
+
+Currently an html-protection present only for patch-emails. If a
+multi-part comment-email arrives, it messes up patchwork. In my case,
+the symptom was a non intended 'Signed-off-by' in the downloaded
+patches, with html-like junk.
+
+This patch makes parsemail skip all parts of comment which are not
+text/plain.
+
+Of course, this will drop html-only emails completely. But they can
+not be parsed anyways.
+
+Signed-off-by: Yuri Volchkov <yuri.volchkov@gmail.com>
+---
+ patchwork/parser.py | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/patchwork/parser.py b/patchwork/parser.py
+index 8f9af811..b1fb7b9c 100644
+--- a/patchwork/parser.py
++++ b/patchwork/parser.py
+@@ -576,9 +576,11 @@ def find_comment_content(mail):
+ """Extract content from a mail."""
+ commentbuf = ''
+
+- for payload, _ in _find_content(mail):
++ for payload, subtype in _find_content(mail):
+ if not payload:
+ continue
++ if subtype != 'plain':
++ continue
+
+ commentbuf += payload.strip() + '\n'
+
+--000000000000f93f23056f12c80c
+Content-Type: text/html; charset="UTF-8"
+Content-Transfer-Encoding: 8bit
+
+<div dir="ltr">Currently an html-protection present only for patch-emails. If a<br>multi-part comment-email arrives, it messes up patchwork. In my case,<br>the symptom was a non intended 'Signed-off-by' in the downloaded<br>patches, with html-like junk.<br><br>This patch makes parsemail skip all parts of comment which are not<br>text/plain.<br><br>Of course, this will drop html-only emails completely. But they can<br>not be parsed anyways.<br><br>Signed-off-by: Yuri Volchkov <<a href="mailto:yuri.volchkov@gmail.com">yuri.volchkov@gmail.com</a>><br>---<br> patchwork/parser.py | 4 +++-<br> 1 file changed, 3 insertions(+), 1 deletion(-)<br><br>diff --git a/patchwork/parser.py b/patchwork/parser.py<br>index 8f9af811..b1fb7b9c 100644<br>--- a/patchwork/parser.py<br>+++ b/patchwork/parser.py<br>@@ -576,9 +576,11 @@ def find_comment_content(mail):<br> """Extract content from a mail."""<br> commentbuf = ''<br> <br>- for payload, _ in _find_content(mail):<br>+ for payload, subtype in _find_content(mail):<br> if not payload:<br> continue<br>+ if subtype != 'plain':<br>+ continue<br> <br> commentbuf += payload.strip() + '\n'<br> <br>-- <br>2.17.1<br></div>
+
+--000000000000f93f23056f12c80c--
+
--- /dev/null
+From stephenfinucane@hotmail.com Wed Jun 20 13:35:48 2018
+From: Stephen Finucane <stephenfinucane@hotmail.com>
+To: "stephen@that.guru" <stephen@that.guru>
+Subject: Re: [PATCH] parsemail: ignore html part of multi-part comments
+Date: Wed, 20 Jun 2018 13:35:37 +0000
+Message-ID: <DB5PR03MB18774049A0E62D211988EC8CA3770@DB5PR03MB1877.eurprd03.prod.outlook.com>
+References: <20180620122142.9917-1-yuri.volchkov@gmail.com>
+In-Reply-To: <20180620122142.9917-1-yuri.volchkov@gmail.com>
+Content-Type: multipart/alternative;
+ boundary="_000_DB5PR03MB18774049A0E62D211988EC8CA3770DB5PR03MB1877eurp_"
+MIME-Version: 1.0
+
+
+--_000_DB5PR03MB18774049A0E62D211988EC8CA3770DB5PR03MB1877eurp_
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Transfer-Encoding: 8bit
+
+Yup, this looks sensible to me. Replying from Outlook's awful HTML editor to get
+a sample comment to test with.
+
+Stephen
+
+
+--_000_DB5PR03MB18774049A0E62D211988EC8CA3770DB5PR03MB1877eurp_
+Content-Type: text/html; charset="iso-8859-1"
+Content-Transfer-Encoding: 8bit
+
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
+</head>
+<body dir="ltr">
+<div style="font-family: Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
+Yup, this looks sensible to me. Replying from Outlook's awful HTML editor to get a sample comment to test with.</div>
+<div style="font-family: Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
+<br>
+</div>
+<div style="font-family: Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
+Stephen<br>
+</div>
+<div style="font-family: Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
+<br>
+</div>
+</body>
+</html>
+
+--_000_DB5PR03MB18774049A0E62D211988EC8CA3770DB5PR03MB1877eurp_--
+
from patchwork.parser import clean_subject
from patchwork.parser import get_or_create_author
from patchwork.parser import find_patch_content as find_content
+from patchwork.parser import find_comment_content
from patchwork.parser import find_project
from patchwork.parser import find_series
from patchwork.parser import parse_mail as _parse_mail
self.assertTrue(diff is not None)
self.assertTrue(message is not None)
+ def test_html_multipart(self):
+ """Validate parsing a mail with multiple parts."""
+ diff, message = self._find_content('0019-multipart-patch.mbox')
+ self.assertTrue(diff is not None)
+ self.assertTrue(message is not None)
+ self.assertFalse('<div' in diff)
+ self.assertFalse('<div' in message)
+
class EncodingParseTest(TestCase):
"""Test parsing of patches with different encoding issues."""
self._test_encoded_patch_parse('0015-with-invalid-utf8-headers.mbox')
+class CommentParseTest(TestCase):
+ """Test parsing of different comment formats."""
+
+ @staticmethod
+ def _find_content(mbox_filename):
+ mail = read_mail(mbox_filename)
+ _, message = find_comment_content(mail)
+
+ return message
+
+ def test_html_multipart(self):
+ """Validate parsing a mail with multiple parts."""
+ message = self._find_content('0020-multipart-comment.mbox')
+ self.assertTrue(message is not None)
+ self.assertFalse('<div' in message)
+
+
class DelegateRequestTest(TestCase):
patch_filename = '0001-add-line.patch'