From: Victor Stinner Date: Wed, 8 Dec 2010 22:25:45 +0000 (+0000) Subject: Issue #10546: UTF-16-LE and UTF-16-BE *do* support non-BMP characters X-Git-Tag: v3.2b2~170 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=53a9dd776e62f6bc3b1884f3aa82e49a78bd83a8;p=thirdparty%2FPython%2Fcpython.git Issue #10546: UTF-16-LE and UTF-16-BE *do* support non-BMP characters Fix the doc and add tests. --- diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 5416d3b2593f..26e31a42862b 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1114,9 +1114,9 @@ particular, the following variants typically exist: +-----------------+--------------------------------+--------------------------------+ | utf_16 | U16, utf16 | all languages | +-----------------+--------------------------------+--------------------------------+ -| utf_16_be | UTF-16BE | all languages (BMP only) | +| utf_16_be | UTF-16BE | all languages | +-----------------+--------------------------------+--------------------------------+ -| utf_16_le | UTF-16LE | all languages (BMP only) | +| utf_16_le | UTF-16LE | all languages | +-----------------+--------------------------------+--------------------------------+ | utf_7 | U7, unicode-1-1-utf-7 | all languages | +-----------------+--------------------------------+--------------------------------+ diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index bc29e06c4f50..8287a5b4ec53 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -544,6 +544,12 @@ class UTF16LETest(ReadTest): self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, b"\xff", "strict", True) + def test_nonbmp(self): + self.assertEqual("\U00010203".encode(self.encoding), + b'\x00\xd8\x03\xde') + self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding), + "\U00010203") + class UTF16BETest(ReadTest): encoding = "utf-16-be" @@ -566,6 +572,12 @@ class UTF16BETest(ReadTest): self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, b"\xff", "strict", True) + def test_nonbmp(self): + self.assertEqual("\U00010203".encode(self.encoding), + b'\xd8\x00\xde\x03') + self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding), + "\U00010203") + class UTF8Test(ReadTest): encoding = "utf-8"