-import unittest
+import codecs
+import contextlib
+import io
+import re
import sys
+import unittest
+import unittest.mock as mock
+import _testcapi
from test.support import import_helper
_testlimitedcapi = import_helper.import_module('_testlimitedcapi')
NULL = None
+BAD_ARGUMENT = re.escape('bad argument type for built-in operation')
-class CAPITest(unittest.TestCase):
+class CAPIUnicodeTest(unittest.TestCase):
# TODO: Test the following functions:
#
# PyUnicode_BuildEncodingMap
# CRASHES asrawunicodeescapestring(NULL)
+class CAPICodecs(unittest.TestCase):
+
+ def setUp(self):
+ # Encoding names are normalized internally by converting them
+ # to lowercase and their hyphens are replaced by underscores.
+ self.encoding_name = 'test.test_capi.test_codecs.codec_reversed'
+ # Make sure that our custom codec is not already registered (that
+ # way we know whether we correctly unregistered the custom codec
+ # after a test or not).
+ self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
+ # create the search function without registering yet
+ self._create_custom_codec()
+
+ def _create_custom_codec(self):
+ def codec_encoder(m, errors='strict'):
+ return (type(m)().join(reversed(m)), len(m))
+
+ def codec_decoder(c, errors='strict'):
+ return (type(c)().join(reversed(c)), len(c))
+
+ class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codec_encoder(input)
+
+ class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codec_decoder(input)
+
+ class StreamReader(codecs.StreamReader):
+ def encode(self, input, errors='strict'):
+ return codec_encoder(input, errors=errors)
+
+ def decode(self, input, errors='strict'):
+ return codec_decoder(input, errors=errors)
+
+ class StreamWriter(codecs.StreamWriter):
+ def encode(self, input, errors='strict'):
+ return codec_encoder(input, errors=errors)
+
+ def decode(self, input, errors='strict'):
+ return codec_decoder(input, errors=errors)
+
+ info = codecs.CodecInfo(
+ encode=codec_encoder,
+ decode=codec_decoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ name=self.encoding_name
+ )
+
+ def search_function(encoding):
+ if encoding == self.encoding_name:
+ return info
+ return None
+
+ self.codec_info = info
+ self.search_function = search_function
+
+ @contextlib.contextmanager
+ def use_custom_encoder(self):
+ self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
+ codecs.register(self.search_function)
+ yield
+ codecs.unregister(self.search_function)
+ self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
+
+ def test_codec_register(self):
+ search_function, encoding = self.search_function, self.encoding_name
+ # register the search function using the C API
+ self.assertIsNone(_testcapi.codec_register(search_function))
+ # in case the test failed before cleaning up
+ self.addCleanup(codecs.unregister, self.search_function)
+ self.assertIs(codecs.lookup(encoding), search_function(encoding))
+ self.assertEqual(codecs.encode('123', encoding=encoding), '321')
+ # unregister the search function using the regular API
+ codecs.unregister(search_function)
+ self.assertRaises(LookupError, codecs.lookup, encoding)
+
+ def test_codec_unregister(self):
+ search_function, encoding = self.search_function, self.encoding_name
+ self.assertRaises(LookupError, codecs.lookup, encoding)
+ # register the search function using the regular API
+ codecs.register(search_function)
+ # in case the test failed before cleaning up
+ self.addCleanup(codecs.unregister, self.search_function)
+ self.assertIsNotNone(codecs.lookup(encoding))
+ # unregister the search function using the C API
+ self.assertIsNone(_testcapi.codec_unregister(search_function))
+ self.assertRaises(LookupError, codecs.lookup, encoding)
+
+ def test_codec_known_encoding(self):
+ self.assertRaises(LookupError, codecs.lookup, 'unknown-codec')
+ self.assertFalse(_testcapi.codec_known_encoding('unknown-codec'))
+ self.assertFalse(_testcapi.codec_known_encoding('unknown_codec'))
+ self.assertFalse(_testcapi.codec_known_encoding('UNKNOWN-codec'))
+
+ encoding_name = self.encoding_name
+ self.assertRaises(LookupError, codecs.lookup, encoding_name)
+
+ codecs.register(self.search_function)
+ self.addCleanup(codecs.unregister, self.search_function)
+
+ for name in [
+ encoding_name,
+ encoding_name.upper(),
+ encoding_name.replace('_', '-'),
+ ]:
+ with self.subTest(name):
+ self.assertTrue(_testcapi.codec_known_encoding(name))
+
+ def test_codec_encode(self):
+ encode = _testcapi.codec_encode
+ self.assertEqual(encode('a', 'utf-8', NULL), b'a')
+ self.assertEqual(encode('a', 'utf-8', 'strict'), b'a')
+ self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]')
+
+ self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict')
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ encode('a', NULL, 'strict')
+
+ def test_codec_decode(self):
+ decode = _testcapi.codec_decode
+
+ s = 'a\xa1\u4f60\U0001f600'
+ b = s.encode()
+
+ self.assertEqual(decode(b, 'utf-8', 'strict'), s)
+ self.assertEqual(decode(b, 'utf-8', NULL), s)
+ self.assertEqual(decode(b, 'latin1', 'strict'), b.decode('latin1'))
+ self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', 'strict')
+ self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
+ self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
+
+ # _codecs.decode() only reports an unknown error handling name when
+ # the corresponding error handling function is used; this difers
+ # from PyUnicode_Decode() which checks that both the encoding and
+ # the error handling name are recognized before even attempting to
+ # call the decoder.
+ self.assertEqual(decode(b'', 'utf-8', 'unknown-error-handler'), '')
+ self.assertEqual(decode(b'a', 'utf-8', 'unknown-error-handler'), 'a')
+
+ self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict')
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ decode(b, NULL, 'strict')
+
+ def test_codec_encoder(self):
+ codec_encoder = _testcapi.codec_encoder
+
+ with self.use_custom_encoder():
+ encoder = codec_encoder(self.encoding_name)
+ self.assertIs(encoder, self.codec_info.encode)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_encoder(NULL)
+
+ def test_codec_decoder(self):
+ codec_decoder = _testcapi.codec_decoder
+
+ with self.use_custom_encoder():
+ decoder = codec_decoder(self.encoding_name)
+ self.assertIs(decoder, self.codec_info.decode)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_decoder(NULL)
+
+ def test_codec_incremental_encoder(self):
+ codec_incremental_encoder = _testcapi.codec_incremental_encoder
+
+ with self.use_custom_encoder():
+ encoding = self.encoding_name
+
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ encoder = codec_incremental_encoder(encoding, errors)
+ self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_incremental_encoder(NULL, 'strict')
+
+ def test_codec_incremental_decoder(self):
+ codec_incremental_decoder = _testcapi.codec_incremental_decoder
+
+ with self.use_custom_encoder():
+ encoding = self.encoding_name
+
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ decoder = codec_incremental_decoder(encoding, errors)
+ self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_incremental_decoder(NULL, 'strict')
+
+ def test_codec_stream_reader(self):
+ codec_stream_reader = _testcapi.codec_stream_reader
+
+ with self.use_custom_encoder():
+ encoding, stream = self.encoding_name, io.StringIO()
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ writer = codec_stream_reader(encoding, stream, errors)
+ self.assertIsInstance(writer, self.codec_info.streamreader)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_stream_reader(NULL, stream, 'strict')
+
+ def test_codec_stream_writer(self):
+ codec_stream_writer = _testcapi.codec_stream_writer
+
+ with self.use_custom_encoder():
+ encoding, stream = self.encoding_name, io.StringIO()
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ writer = codec_stream_writer(encoding, stream, errors)
+ self.assertIsInstance(writer, self.codec_info.streamwriter)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_stream_writer(NULL, stream, 'strict')
+
+
+class CAPICodecErrors(unittest.TestCase):
+
+ def test_codec_register_error(self):
+ # for cleaning up between tests
+ from _codecs import _unregister_error as _codecs_unregister_error
+
+ self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom')
+
+ def custom_error_handler(exc):
+ raise exc
+
+ error_handler = mock.Mock(wraps=custom_error_handler)
+ _testcapi.codec_register_error('custom', error_handler)
+ self.addCleanup(_codecs_unregister_error, 'custom')
+
+ self.assertRaises(UnicodeEncodeError, codecs.encode,
+ '\xff', 'ascii', errors='custom')
+ error_handler.assert_called_once()
+ error_handler.reset_mock()
+
+ self.assertRaises(UnicodeDecodeError, codecs.decode,
+ b'\xff', 'ascii', errors='custom')
+ error_handler.assert_called_once()
+
+ # _codecs._unregister_error directly delegates to the internal C
+ # function so a Python-level function test is sufficient (it is
+ # tested in test_codeccallbacks).
+
+ def test_codec_lookup_error(self):
+ codec_lookup_error = _testcapi.codec_lookup_error
+ self.assertIs(codec_lookup_error(NULL), codecs.strict_errors)
+ self.assertIs(codec_lookup_error('strict'), codecs.strict_errors)
+ self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
+ self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
+ self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
+ self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
+ self.assertRaises(LookupError, codec_lookup_error, 'unknown')
+
+ def test_codec_error_handlers(self):
+ exceptions = [
+ # A UnicodeError with an empty message currently crashes:
+ # See: https://github.com/python/cpython/issues/123378
+ # UnicodeEncodeError('bad', '', 0, 1, 'reason'),
+ UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
+ UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
+ UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
+ ]
+
+ strict_handler = _testcapi.codec_strict_errors
+ for exc in exceptions:
+ with self.subTest(handler=strict_handler, exc=exc):
+ self.assertRaises(UnicodeEncodeError, strict_handler, exc)
+
+ for handler in [
+ _testcapi.codec_ignore_errors,
+ _testcapi.codec_replace_errors,
+ _testcapi.codec_xmlcharrefreplace_errors,
+ _testlimitedcapi.codec_namereplace_errors,
+ ]:
+ for exc in exceptions:
+ with self.subTest(handler=handler, exc=exc):
+ self.assertIsInstance(handler(exc), tuple)
+
+
if __name__ == "__main__":
unittest.main()
#include "parts.h"
#include "util.h"
+// === Codecs registration and un-registration ================================
+
+static PyObject *
+codec_register(PyObject *Py_UNUSED(module), PyObject *search_function)
+{
+ if (PyCodec_Register(search_function) < 0) {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function)
+{
+ if (PyCodec_Unregister(search_function) < 0) {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ return NULL;
+ }
+ return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False;
+}
+
+// === Codecs encoding and decoding interfaces ================================
+
+static PyObject *
+codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ PyObject *input;
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+ return NULL;
+ }
+ return PyCodec_Encode(input, encoding, errors);
+}
+
+static PyObject *
+codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ PyObject *input;
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+ return NULL;
+ }
+ return PyCodec_Decode(input, encoding, errors);
+}
+
+static PyObject *
+codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ return NULL;
+ }
+ return PyCodec_Encoder(encoding);
+}
+
+static PyObject *
+codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ return NULL;
+ }
+ return PyCodec_Decoder(encoding);
+}
+
+static PyObject *
+codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+ return NULL;
+ }
+ return PyCodec_IncrementalEncoder(encoding, errors);
+}
+
+static PyObject *
+codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+ return NULL;
+ }
+ return PyCodec_IncrementalDecoder(encoding, errors);
+}
+
+static PyObject *
+codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ PyObject *stream;
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+ return NULL;
+ }
+ return PyCodec_StreamReader(encoding, stream, errors);
+}
+
+static PyObject *
+codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ PyObject *stream;
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+ return NULL;
+ }
+ return PyCodec_StreamWriter(encoding, stream, errors);
+}
+
+// === Codecs errors handlers =================================================
+
+static PyObject *
+codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *encoding; // must not be NULL
+ PyObject *error;
+ if (!PyArg_ParseTuple(args, "sO", &encoding, &error)) {
+ return NULL;
+ }
+ if (PyCodec_RegisterError(encoding, error) < 0) {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+codec_lookup_error(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ return NULL;
+ }
+ return PyCodec_LookupError(encoding);
+}
+
+static PyObject *
+codec_strict_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_StrictErrors(exc);
+}
+
+static PyObject *
+codec_ignore_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_IgnoreErrors(exc);
+}
+
+static PyObject *
+codec_replace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_ReplaceErrors(exc);
+}
+
+static PyObject *
+codec_xmlcharrefreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_XMLCharRefReplaceErrors(exc);
+}
+
+static PyObject *
+codec_backslashreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_BackslashReplaceErrors(exc);
+}
static PyMethodDef test_methods[] = {
- {NULL},
+ /* codecs registration */
+ {"codec_register", codec_register, METH_O},
+ {"codec_unregister", codec_unregister, METH_O},
+ {"codec_known_encoding", codec_known_encoding, METH_VARARGS},
+ /* encoding and decoding interface */
+ {"codec_encode", codec_encode, METH_VARARGS},
+ {"codec_decode", codec_decode, METH_VARARGS},
+ {"codec_encoder", codec_encoder, METH_VARARGS},
+ {"codec_decoder", codec_decoder, METH_VARARGS},
+ {"codec_incremental_encoder", codec_incremental_encoder, METH_VARARGS},
+ {"codec_incremental_decoder", codec_incremental_decoder, METH_VARARGS},
+ {"codec_stream_reader", codec_stream_reader, METH_VARARGS},
+ {"codec_stream_writer", codec_stream_writer, METH_VARARGS},
+ /* error handling */
+ {"codec_register_error", codec_register_error, METH_VARARGS},
+ {"codec_lookup_error", codec_lookup_error, METH_VARARGS},
+ {"codec_strict_errors", codec_strict_errors, METH_O},
+ {"codec_ignore_errors", codec_ignore_errors, METH_O},
+ {"codec_replace_errors", codec_replace_errors, METH_O},
+ {"codec_xmlcharrefreplace_errors", codec_xmlcharrefreplace_errors, METH_O},
+ {"codec_backslashreplace_errors", codec_backslashreplace_errors, METH_O},
+ // PyCodec_NameReplaceErrors() is tested in _testlimitedcapi/codec.c
+ {NULL, NULL, 0, NULL},
};
int
_PyTestCapi_Init_Codec(PyObject *m)
{
- if (PyModule_AddFunctions(m, test_methods) < 0){
+ if (PyModule_AddFunctions(m, test_methods) < 0) {
return -1;
}