From: R David Murray Date: Tue, 5 Feb 2013 15:49:49 +0000 (-0500) Subject: #16948: Fix quopri encoding of non-latin1 character sets. X-Git-Tag: v3.2.4rc1~141 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=f581b372003de0ae604c14a1f1dc2e8c36ea277b;p=thirdparty%2FPython%2Fcpython.git #16948: Fix quopri encoding of non-latin1 character sets. --- diff --git a/Lib/email/charset.py b/Lib/email/charset.py index f22be2c52c1d..c106649a55c1 100644 --- a/Lib/email/charset.py +++ b/Lib/email/charset.py @@ -392,6 +392,19 @@ class Charset: string = string.encode(self.output_charset) return email.base64mime.body_encode(string) elif self.body_encoding is QP: + # quopromime.body_encode takes a string, but operates on it as if + # it were a list of byte codes. For a (minimal) history on why + # this is so, see changeset 0cf700464177. To correctly encode a + # character set, then, we must turn it into pseudo bytes via the + # latin1 charset, which will encode any byte as a single code point + # between 0 and 255, which is what body_encode is expecting. + # + # Note that this clause doesn't handle the case of a _payload that + # is already bytes. It never did, and the semantics of _payload + # being bytes has never been nailed down, so fixing that is a + # longer term TODO. + if isinstance(string, str): + string = string.encode(self.output_charset).decode('latin1') return email.quoprimime.body_encode(string) else: if isinstance(string, str): diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index 352b9b1d9a0b..2fa4aa8b6a93 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -670,6 +670,27 @@ class TestEncoders(unittest.TestCase): msg = MIMEText('文', _charset='euc-jp') eq(msg['content-transfer-encoding'], '7bit') + def test_qp_encode_latin1(self): + msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1') + self.assertEqual(str(msg), textwrap.dedent("""\ + MIME-Version: 1.0 + Content-Type: text/text; charset="iso-8859-1" + Content-Transfer-Encoding: quoted-printable + + =E1=F6 + """)) + + def test_qp_encode_non_latin1(self): + # Issue 16948 + msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2') + self.assertEqual(str(msg), textwrap.dedent("""\ + MIME-Version: 1.0 + Content-Type: text/text; charset="iso-8859-2" + Content-Transfer-Encoding: quoted-printable + + =BF + """)) + # Test long header wrapping class TestLongHeaders(TestEmailBase): diff --git a/Misc/NEWS b/Misc/NEWS index 07a33cee7106..b7eabd73a26e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -212,6 +212,10 @@ Core and Builtins Library ------- + +- Issue #16948: Fix quoted printable body encoding for non-latin1 character + sets in the email package. + - Issue #17089: Expat parser now correctly works with string input not only when an internal XML encoding is UTF-8 or US-ASCII. It now accepts bytes and strings larger than 2 GiB.