From: Victor Stinner <victor.stinner@haypocalc.com>
Date: Wed, 8 Dec 2010 22:25:45 +0000 (+0000)
Subject: Issue #10546: UTF-16-LE and UTF-16-BE *do* support non-BMP characters
X-Git-Tag: v3.2b2~170
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=53a9dd776e62f6bc3b1884f3aa82e49a78bd83a8;p=thirdparty%2FPython%2Fcpython.git

Issue #10546: UTF-16-LE and UTF-16-BE *do* support non-BMP characters

Fix the doc and add tests.
---

diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index 5416d3b2593f..26e31a42862b 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -1114,9 +1114,9 @@ particular, the following variants typically exist:
 +-----------------+--------------------------------+--------------------------------+
 | utf_16          | U16, utf16                     | all languages                  |
 +-----------------+--------------------------------+--------------------------------+
-| utf_16_be       | UTF-16BE                       | all languages (BMP only)       |
+| utf_16_be       | UTF-16BE                       | all languages                  |
 +-----------------+--------------------------------+--------------------------------+
-| utf_16_le       | UTF-16LE                       | all languages (BMP only)       |
+| utf_16_le       | UTF-16LE                       | all languages                  |
 +-----------------+--------------------------------+--------------------------------+
 | utf_7           | U7, unicode-1-1-utf-7          | all languages                  |
 +-----------------+--------------------------------+--------------------------------+
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index bc29e06c4f50..8287a5b4ec53 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -544,6 +544,12 @@ class UTF16LETest(ReadTest):
         self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode,
                           b"\xff", "strict", True)
 
+    def test_nonbmp(self):
+        self.assertEqual("\U00010203".encode(self.encoding),
+                         b'\x00\xd8\x03\xde')
+        self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
+                         "\U00010203")
+
 class UTF16BETest(ReadTest):
     encoding = "utf-16-be"
 
@@ -566,6 +572,12 @@ class UTF16BETest(ReadTest):
         self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode,
                           b"\xff", "strict", True)
 
+    def test_nonbmp(self):
+        self.assertEqual("\U00010203".encode(self.encoding),
+                         b'\xd8\x00\xde\x03')
+        self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
+                         "\U00010203")
+
 class UTF8Test(ReadTest):
     encoding = "utf-8"