]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
support encoding error handlers that return bytes (closes #16585)
authorBenjamin Peterson <benjamin@python.org>
Sun, 2 Dec 2012 16:20:28 +0000 (11:20 -0500)
committerBenjamin Peterson <benjamin@python.org>
Sun, 2 Dec 2012 16:20:28 +0000 (11:20 -0500)
Lib/test/test_multibytecodec.py
Misc/NEWS
Modules/cjkcodecs/multibytecodec.c

index feb7bd595a26566c07a80d6510f884b1b08798b0..7b47cb50e82853181236e3a4a200da0a3586f25b 100644 (file)
@@ -45,6 +45,10 @@ class Test_MultibyteCodec(unittest.TestCase):
         self.assertRaises(IndexError, dec,
                           b'apple\x92ham\x93spam', 'test.cjktest')
 
+    def test_errorhandler_returns_bytes(self):
+        enc = "\u30fb\udc80".encode('gb18030', 'surrogateescape')
+        self.assertEqual(enc, b'\x819\xa79\x80')
+
     def test_codingspec(self):
         try:
             for enc in ALL_CJKENCODINGS:
index bdfe16158bc681b5cb3c49e27d8480c26d54e2ae..6eff12c84ecc8b9cfe9790f98ca16252e129f551 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -98,6 +98,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #16585: Make CJK encoders support error handlers that return bytes per
+  PEP 383.
+
 - Issue #10182: The re module doesn't truncate indices to 32 bits anymore.
   Patch by Serhiy Storchaka.
 
index 40717d8ba93927dd6981d0e28ffac45992fed67a..c032cdb0ed319dc982ece91aabc8164b671c7dd4 100644 (file)
@@ -316,7 +316,7 @@ multibytecodec_encerror(MultibyteCodec *codec,
         goto errorexit;
 
     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
-        !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
+        (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
         PyErr_SetString(PyExc_TypeError,
                         "encoding error handler must return "
@@ -324,7 +324,7 @@ multibytecodec_encerror(MultibyteCodec *codec,
         goto errorexit;
     }
 
-    {
+    if (PyUnicode_Check(tobj)) {
         const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
 
         retstr = multibytecodec_encode(codec, state, &uraw,
@@ -333,6 +333,10 @@ multibytecodec_encerror(MultibyteCodec *codec,
         if (retstr == NULL)
             goto errorexit;
     }
+    else {
+        Py_INCREF(tobj);
+        retstr = tobj;
+    }
 
     assert(PyBytes_Check(retstr));
     retstrsize = PyBytes_GET_SIZE(retstr);