From d6491b59d97ba7fd6a498490f4cb3b6159dd53ee Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Wed, 20 Jun 2018 15:25:33 +0100 Subject: [PATCH] tests: Add tests for multipart emails Ensure HTML is dropped as expected. Signed-off-by: Stephen Finucane --- .../tests/mail/0019-multipart-patch.mbox | 55 +++++++++++++++++++ .../tests/mail/0020-multipart-comment.mbox | 49 +++++++++++++++++ patchwork/tests/test_parser.py | 26 +++++++++ 3 files changed, 130 insertions(+) create mode 100644 patchwork/tests/mail/0019-multipart-patch.mbox create mode 100644 patchwork/tests/mail/0020-multipart-comment.mbox diff --git a/patchwork/tests/mail/0019-multipart-patch.mbox b/patchwork/tests/mail/0019-multipart-patch.mbox new file mode 100644 index 00000000..99d23a83 --- /dev/null +++ b/patchwork/tests/mail/0019-multipart-patch.mbox @@ -0,0 +1,55 @@ +From yuri.volchkov@gmail.com Wed Jun 20 12:22:05 2018 +From: Yuri Volchkov +To: patchwork@lists.ozlabs.org +Cc: stephen@that.guru +Subject: [PATCH] parsemail: ignore html part of multi-part comments +Date: Wed, 20 Jun 2018 14:21:42 +0200 +Message-Id: <20180620122142.9917-1-yuri.volchkov@gmail.com> +Content-Type: multipart/alternative; boundary="000000000000f93f23056f12c80c" + + +--000000000000f93f23056f12c80c +Content-Type: text/plain; charset="UTF-8" +Content-Transfer-Encoding: 8bit + +Currently an html-protection present only for patch-emails. If a +multi-part comment-email arrives, it messes up patchwork. In my case, +the symptom was a non intended 'Signed-off-by' in the downloaded +patches, with html-like junk. + +This patch makes parsemail skip all parts of comment which are not +text/plain. + +Of course, this will drop html-only emails completely. But they can +not be parsed anyways. + +Signed-off-by: Yuri Volchkov +--- + patchwork/parser.py | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/patchwork/parser.py b/patchwork/parser.py +index 8f9af811..b1fb7b9c 100644 +--- a/patchwork/parser.py ++++ b/patchwork/parser.py +@@ -576,9 +576,11 @@ def find_comment_content(mail): + """Extract content from a mail.""" + commentbuf = '' + +- for payload, _ in _find_content(mail): ++ for payload, subtype in _find_content(mail): + if not payload: + continue ++ if subtype != 'plain': ++ continue + + commentbuf += payload.strip() + '\n' + +--000000000000f93f23056f12c80c +Content-Type: text/html; charset="UTF-8" +Content-Transfer-Encoding: 8bit + +
Currently an html-protection present only for patch-emails. If a
multi-part comment-email arrives, it messes up patchwork. In my case,
the symptom was a non intended 'Signed-off-by' in the downloaded
patches, with html-like junk.

This patch makes parsemail skip all parts of comment which are not
text/plain.

Of course, this will drop html-only emails completely. But they can
not be parsed anyways.

Signed-off-by: Yuri Volchkov <yuri.volchkov@gmail.com>
---
 patchwork/parser.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/patchwork/parser.py b/patchwork/parser.py
index 8f9af811..b1fb7b9c 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -576,9 +576,11 @@ def find_comment_content(mail):
     """Extract content from a mail."""
     commentbuf = ''
 
-    for payload, _ in _find_content(mail):
+    for payload, subtype in _find_content(mail):
         if not payload:
             continue
+        if subtype != 'plain':
+            continue
 
         commentbuf += payload.strip() + '\n'
 
--
2.17.1
+ +--000000000000f93f23056f12c80c-- + diff --git a/patchwork/tests/mail/0020-multipart-comment.mbox b/patchwork/tests/mail/0020-multipart-comment.mbox new file mode 100644 index 00000000..7a696a55 --- /dev/null +++ b/patchwork/tests/mail/0020-multipart-comment.mbox @@ -0,0 +1,49 @@ +From stephenfinucane@hotmail.com Wed Jun 20 13:35:48 2018 +From: Stephen Finucane +To: "stephen@that.guru" +Subject: Re: [PATCH] parsemail: ignore html part of multi-part comments +Date: Wed, 20 Jun 2018 13:35:37 +0000 +Message-ID: +References: <20180620122142.9917-1-yuri.volchkov@gmail.com> +In-Reply-To: <20180620122142.9917-1-yuri.volchkov@gmail.com> +Content-Type: multipart/alternative; + boundary="_000_DB5PR03MB18774049A0E62D211988EC8CA3770DB5PR03MB1877eurp_" +MIME-Version: 1.0 + + +--_000_DB5PR03MB18774049A0E62D211988EC8CA3770DB5PR03MB1877eurp_ +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: 8bit + +Yup, this looks sensible to me. Replying from Outlook's awful HTML editor to get +a sample comment to test with. + +Stephen + + +--_000_DB5PR03MB18774049A0E62D211988EC8CA3770DB5PR03MB1877eurp_ +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: 8bit + + + + + + + +
+Yup, this looks sensible to me. Replying from Outlook's awful HTML editor to get a sample comment to test with.
+
+
+
+
+Stephen
+
+
+
+
+ + + +--_000_DB5PR03MB18774049A0E62D211988EC8CA3770DB5PR03MB1877eurp_-- + diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py index 5ba06c0f..e99cf214 100644 --- a/patchwork/tests/test_parser.py +++ b/patchwork/tests/test_parser.py @@ -36,6 +36,7 @@ from patchwork.models import State from patchwork.parser import clean_subject from patchwork.parser import get_or_create_author from patchwork.parser import find_patch_content as find_content +from patchwork.parser import find_comment_content from patchwork.parser import find_project from patchwork.parser import find_series from patchwork.parser import parse_mail as _parse_mail @@ -632,6 +633,14 @@ class PatchParseTest(PatchTest): self.assertTrue(diff is not None) self.assertTrue(message is not None) + def test_html_multipart(self): + """Validate parsing a mail with multiple parts.""" + diff, message = self._find_content('0019-multipart-patch.mbox') + self.assertTrue(diff is not None) + self.assertTrue(message is not None) + self.assertFalse('