]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-124665: Add `_PyCodec_UnregisterError` and `_codecs._unregister_error` (#124677)
authorBénédikt Tran <10796600+picnixz@users.noreply.github.com>
Sun, 29 Sep 2024 00:25:23 +0000 (02:25 +0200)
committerGitHub <noreply@github.com>
Sun, 29 Sep 2024 00:25:23 +0000 (02:25 +0200)
Include/internal/pycore_codecs.h
Lib/test/test_codeccallbacks.py
Modules/_codecsmodule.c
Modules/clinic/_codecsmodule.c.h
Python/codecs.c
Tools/c-analyzer/cpython/ignored.tsv

index 5e2d5c5ce9d868a2b87824e458028e2b65ad922b..4400be8b33dee7e2f66546da8f34f9a7f9ea98f6 100644 (file)
@@ -21,6 +21,17 @@ extern void _PyCodec_Fini(PyInterpreterState *interp);
 
 extern PyObject* _PyCodec_Lookup(const char *encoding);
 
+/*
+ * Un-register the error handling callback function registered under
+ * the given 'name'. Only custom error handlers can be un-registered.
+ *
+ * - Return -1 and set an exception if 'name' refers to a built-in
+ *   error handling name (e.g., 'strict'), or if an error occurred.
+ * - Return 0 if no custom error handler can be found for 'name'.
+ * - Return 1 if the custom error handler was successfully removed.
+ */
+extern int _PyCodec_UnregisterError(const char *name);
+
 /* Text codec specific encoding and decoding API.
 
    Checks the encoding against a list of codecs which do not
index 4991330489d139dee68126d53665f2e39760e3e6..86e5e5c14746741fee31d766e9c018cad16d5424 100644 (file)
@@ -1,3 +1,4 @@
+from _codecs import _unregister_error as _codecs_unregister_error
 import codecs
 import html.entities
 import itertools
@@ -1210,7 +1211,6 @@ class CodecCallbackTest(unittest.TestCase):
             '\ufffd\x00\x00'
         )
 
-
     def test_fake_error_class(self):
         handlers = [
             codecs.strict_errors,
@@ -1235,6 +1235,31 @@ class CodecCallbackTest(unittest.TestCase):
                     with self.assertRaises((TypeError, FakeUnicodeError)):
                         handler(FakeUnicodeError())
 
+    def test_reject_unregister_builtin_error_handler(self):
+        for name in [
+            'strict', 'ignore', 'replace', 'backslashreplace', 'namereplace',
+            'xmlcharrefreplace', 'surrogateescape', 'surrogatepass',
+        ]:
+            with self.subTest(name):
+                self.assertRaises(ValueError, _codecs_unregister_error, name)
+
+    def test_unregister_custom_error_handler(self):
+        def custom_handler(exc):
+            raise exc
+
+        custom_name = 'test.test_unregister_custom_error_handler'
+        self.assertRaises(LookupError, codecs.lookup_error, custom_name)
+        codecs.register_error(custom_name, custom_handler)
+        self.assertIs(codecs.lookup_error(custom_name), custom_handler)
+        self.assertTrue(_codecs_unregister_error(custom_name))
+        self.assertRaises(LookupError, codecs.lookup_error, custom_name)
+
+    def test_unregister_custom_unknown_error_handler(self):
+        unknown_name = 'test.test_unregister_custom_unknown_error_handler'
+        self.assertRaises(LookupError, codecs.lookup_error, unknown_name)
+        self.assertFalse(_codecs_unregister_error(unknown_name))
+        self.assertRaises(LookupError, codecs.lookup_error, unknown_name)
+
 
 if __name__ == "__main__":
     unittest.main()
index 32373f0799bfeb8bdd258e8f6907e2502c269bda..471b42badc8e8c639d40bdd0a21fb5beffaa78e3 100644 (file)
@@ -979,6 +979,30 @@ _codecs_register_error_impl(PyObject *module, const char *errors,
     Py_RETURN_NONE;
 }
 
+/*[clinic input]
+_codecs._unregister_error -> bool
+    errors: str
+    /
+
+Un-register the specified error handler for the error handling `errors'.
+
+Only custom error handlers can be un-registered. An exception is raised
+if the error handling is a built-in one (e.g., 'strict'), or if an error
+occurs.
+
+Otherwise, this returns True if a custom handler has been successfully
+un-registered, and False if no custom handler for the specified error
+handling exists.
+
+[clinic start generated code]*/
+
+static int
+_codecs__unregister_error_impl(PyObject *module, const char *errors)
+/*[clinic end generated code: output=28c22be667465503 input=a63ab9e9ce1686d4]*/
+{
+    return _PyCodec_UnregisterError(errors);
+}
+
 /*[clinic input]
 _codecs.lookup_error
     name: str
@@ -1044,6 +1068,7 @@ static PyMethodDef _codecs_functions[] = {
     _CODECS_CODE_PAGE_ENCODE_METHODDEF
     _CODECS_CODE_PAGE_DECODE_METHODDEF
     _CODECS_REGISTER_ERROR_METHODDEF
+    _CODECS__UNREGISTER_ERROR_METHODDEF
     _CODECS_LOOKUP_ERROR_METHODDEF
     {NULL, NULL}                /* sentinel */
 };
index 1c0f37442ab3509a479455a9932d3a592a3308b5..01855aec5e123e69e5513b1d478a716146936a3f 100644 (file)
@@ -2683,6 +2683,56 @@ exit:
     return return_value;
 }
 
+PyDoc_STRVAR(_codecs__unregister_error__doc__,
+"_unregister_error($module, errors, /)\n"
+"--\n"
+"\n"
+"Un-register the specified error handler for the error handling `errors\'.\n"
+"\n"
+"Only custom error handlers can be un-registered. An exception is raised\n"
+"if the error handling is a built-in one (e.g., \'strict\'), or if an error\n"
+"occurs.\n"
+"\n"
+"Otherwise, this returns True if a custom handler has been successfully\n"
+"un-registered, and False if no custom handler for the specified error\n"
+"handling exists.");
+
+#define _CODECS__UNREGISTER_ERROR_METHODDEF    \
+    {"_unregister_error", (PyCFunction)_codecs__unregister_error, METH_O, _codecs__unregister_error__doc__},
+
+static int
+_codecs__unregister_error_impl(PyObject *module, const char *errors);
+
+static PyObject *
+_codecs__unregister_error(PyObject *module, PyObject *arg)
+{
+    PyObject *return_value = NULL;
+    const char *errors;
+    int _return_value;
+
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("_unregister_error", "argument", "str", arg);
+        goto exit;
+    }
+    Py_ssize_t errors_length;
+    errors = PyUnicode_AsUTF8AndSize(arg, &errors_length);
+    if (errors == NULL) {
+        goto exit;
+    }
+    if (strlen(errors) != (size_t)errors_length) {
+        PyErr_SetString(PyExc_ValueError, "embedded null character");
+        goto exit;
+    }
+    _return_value = _codecs__unregister_error_impl(module, errors);
+    if ((_return_value == -1) && PyErr_Occurred()) {
+        goto exit;
+    }
+    return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_codecs_lookup_error__doc__,
 "lookup_error($module, name, /)\n"
 "--\n"
@@ -2746,4 +2796,4 @@ exit:
 #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
     #define _CODECS_CODE_PAGE_ENCODE_METHODDEF
 #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=e50d5fdf65bd45fa input=a9049054013a1b77]*/
+/*[clinic end generated code: output=b3013d4709d96ffe input=a9049054013a1b77]*/
index 9c0a3fad314cb5cd9ff32cce1f2286e1f5f64663..68dc232bb86163cb5cd9e8cab5cc2bd6be1fc2ad 100644 (file)
@@ -16,6 +16,12 @@ Copyright (c) Corporation for National Research Initiatives.
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 #include "pycore_ucnhash.h"       // _PyUnicode_Name_CAPI
 
+static const char *codecs_builtin_error_handlers[] = {
+    "strict", "ignore", "replace",
+    "xmlcharrefreplace", "backslashreplace", "namereplace",
+    "surrogatepass", "surrogateescape",
+};
+
 const char *Py_hexdigits = "0123456789abcdef";
 
 /* --- Codec Registry ----------------------------------------------------- */
@@ -618,6 +624,20 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
                                 name, error);
 }
 
+int _PyCodec_UnregisterError(const char *name)
+{
+    for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_builtin_error_handlers); ++i) {
+        if (strcmp(name, codecs_builtin_error_handlers[i]) == 0) {
+            PyErr_Format(PyExc_ValueError,
+                         "cannot un-register built-in error handler '%s'", name);
+            return -1;
+        }
+    }
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    assert(interp->codecs.initialized);
+    return PyDict_PopString(interp->codecs.error_registry, name, NULL);
+}
+
 /* Lookup the error handling callback function registered under the
    name error. As a special case NULL can be passed, in which case
    the error handling callback for strict encoding will be returned. */
@@ -1470,6 +1490,8 @@ _PyCodec_InitRegistry(PyInterpreterState *interp)
             }
         }
     };
+    // ensure that the built-in error handlers' names are kept in sync
+    assert(Py_ARRAY_LENGTH(methods) == Py_ARRAY_LENGTH(codecs_builtin_error_handlers));
 
     assert(interp->codecs.initialized == 0);
     interp->codecs.search_path = PyList_New(0);
index f4dc807198a8ef8e226b7248884ffc185e374122..e6c599a2ac4a464a2011bd363c71f8133c82c37b 100644 (file)
@@ -345,6 +345,7 @@ Python/ast_opt.c    fold_unaryop    ops     -
 Python/ceval.c -       _PyEval_BinaryOps       -
 Python/ceval.c -       _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS -
 Python/codecs.c        -       Py_hexdigits    -
+Python/codecs.c        -       codecs_builtin_error_handlers   -
 Python/codecs.c        -       ucnhash_capi    -
 Python/codecs.c        _PyCodec_InitRegistry   methods -
 Python/compile.c       -       NO_LOCATION     -