]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.12] gh-126742: Add _PyErr_SetLocaleString, use it for gdbm & dlerror messages...
authorBénédikt Tran <10796600+picnixz@users.noreply.github.com>
Tue, 17 Dec 2024 12:53:16 +0000 (13:53 +0100)
committerGitHub <noreply@github.com>
Tue, 17 Dec 2024 12:53:16 +0000 (14:53 +0200)
- Add a helper to set an error from locale-encoded `char*`
- Use the helper for gdbm & dlerror messages

Co-authored-by: Victor Stinner <vstinner@python.org>
12 files changed:
Include/internal/pycore_pyerrors.h
Lib/test/test_ctypes/test_dlerror.py
Lib/test/test_dbm_gnu.py
Modules/_ctypes/_ctypes.c
Modules/_ctypes/callproc.c
Modules/_gdbmmodule.c
Modules/_hashopenssl.c
Modules/_sqlite/util.c
Modules/_testcapi/exceptions.c
Modules/main.c
Modules/pyexpat.c
Python/errors.c

index 4620a2696449178500796a721b0b20e341075add..dab41405b92281a146b19c9ab0aec4e20af07b75 100644 (file)
@@ -75,6 +75,18 @@ PyAPI_FUNC(void) _PyErr_SetString(
     PyObject *exception,
     const char *string);
 
+/*
+ * Set an exception with the error message decoded from the current locale
+ * encoding (LC_CTYPE).
+ *
+ * Exceptions occurring in decoding take priority over the desired exception.
+ *
+ * Exported for '_ctypes' shared extensions.
+ */
+PyAPI_FUNC(void) _PyErr_SetLocaleString(
+    PyObject *exception,
+    const char *string);
+
 PyAPI_FUNC(PyObject *) _PyErr_Format(
     PyThreadState *tstate,
     PyObject *exception,
index 4441e30cd7a2a7f4f042717e874e8a72228ce077..c3c34d43481d3677e3a838b7812e2c932eb25dcc 100644 (file)
@@ -1,7 +1,12 @@
+import _ctypes
 import os
+import platform
 import sys
+import test.support
 import unittest
-import platform
+from ctypes import CDLL, c_int
+from ctypes.util import find_library
+
 
 FOO_C = r"""
 #include <unistd.h>
@@ -26,7 +31,7 @@ void *foo(void)
 
 
 @unittest.skipUnless(sys.platform.startswith('linux'),
-                     'Test only valid for Linux')
+                     'test requires GNU IFUNC support')
 class TestNullDlsym(unittest.TestCase):
     """GH-126554: Ensure that we catch NULL dlsym return values
 
@@ -53,14 +58,6 @@ class TestNullDlsym(unittest.TestCase):
         import subprocess
         import tempfile
 
-        # To avoid ImportErrors on Windows, where _ctypes does not have
-        # dlopen and dlsym,
-        # import here, i.e., inside the test function.
-        # The skipUnless('linux') decorator ensures that we're on linux
-        # if we're executing these statements.
-        from ctypes import CDLL, c_int
-        from _ctypes import dlopen, dlsym
-
         retcode = subprocess.call(["gcc", "--version"],
                                   stdout=subprocess.DEVNULL,
                                   stderr=subprocess.DEVNULL)
@@ -111,6 +108,8 @@ class TestNullDlsym(unittest.TestCase):
             self.assertEqual(os.read(pipe_r, 2), b'OK')
 
             # Case #3: Test 'py_dl_sym' from Modules/_ctypes/callproc.c
+            dlopen = test.support.get_attribute(_ctypes, 'dlopen')
+            dlsym = test.support.get_attribute(_ctypes, 'dlsym')
             L = dlopen(dstname)
             with self.assertRaisesRegex(OSError, "symbol 'foo' not found"):
                 dlsym(L, "foo")
@@ -119,5 +118,66 @@ class TestNullDlsym(unittest.TestCase):
             self.assertEqual(os.read(pipe_r, 2), b'OK')
 
 
+@unittest.skipUnless(os.name != 'nt', 'test requires dlerror() calls')
+class TestLocalization(unittest.TestCase):
+
+    @staticmethod
+    def configure_locales(func):
+        return test.support.run_with_locale(
+            'LC_ALL',
+            'fr_FR.iso88591', 'ja_JP.sjis', 'zh_CN.gbk',
+            'fr_FR.utf8', 'en_US.utf8',
+            '',
+        )(func)
+
+    @classmethod
+    def setUpClass(cls):
+        cls.libc_filename = find_library("c")
+
+    @configure_locales
+    def test_localized_error_from_dll(self):
+        dll = CDLL(self.libc_filename)
+        with self.assertRaises(AttributeError) as cm:
+            dll.this_name_does_not_exist
+        if sys.platform.startswith('linux'):
+            # On macOS, the filename is not reported by dlerror().
+            self.assertIn(self.libc_filename, str(cm.exception))
+
+    @configure_locales
+    def test_localized_error_in_dll(self):
+        dll = CDLL(self.libc_filename)
+        with self.assertRaises(ValueError) as cm:
+            c_int.in_dll(dll, 'this_name_does_not_exist')
+        if sys.platform.startswith('linux'):
+            # On macOS, the filename is not reported by dlerror().
+            self.assertIn(self.libc_filename, str(cm.exception))
+
+    @unittest.skipUnless(hasattr(_ctypes, 'dlopen'),
+                         'test requires _ctypes.dlopen()')
+    @configure_locales
+    def test_localized_error_dlopen(self):
+        missing_filename = b'missing\xff.so'
+        # Depending whether the locale, we may encode '\xff' differently
+        # but we are only interested in avoiding a UnicodeDecodeError
+        # when reporting the dlerror() error message which contains
+        # the localized filename.
+        filename_pattern = r'missing.*?\.so'
+        with self.assertRaisesRegex(OSError, filename_pattern):
+            _ctypes.dlopen(missing_filename, 2)
+
+    @unittest.skipUnless(hasattr(_ctypes, 'dlopen'),
+                         'test requires _ctypes.dlopen()')
+    @unittest.skipUnless(hasattr(_ctypes, 'dlsym'),
+                         'test requires _ctypes.dlsym()')
+    @configure_locales
+    def test_localized_error_dlsym(self):
+        dll = _ctypes.dlopen(self.libc_filename)
+        with self.assertRaises(OSError) as cm:
+            _ctypes.dlsym(dll, 'this_name_does_not_exist')
+        if sys.platform.startswith('linux'):
+            # On macOS, the filename is not reported by dlerror().
+            self.assertIn(self.libc_filename, str(cm.exception))
+
+
 if __name__ == "__main__":
     unittest.main()
index 73602cab5180fc7fc0dc35b79a8383cda25ed42e..f1ad4a3a9cbc25bd1aa2654f41270d83e2e996be 100644 (file)
@@ -1,10 +1,11 @@
-from test import support
-from test.support import import_helper, cpython_only
-gdbm = import_helper.import_module("dbm.gnu") #skip if not supported
-import unittest
 import os
-from test.support.os_helper import TESTFN, TESTFN_NONASCII, unlink, FakePath
+import unittest
+from test import support
+from test.support import cpython_only, import_helper
+from test.support.os_helper import (TESTFN, TESTFN_NONASCII, FakePath,
+                                    create_empty_file, temp_dir, unlink)
 
+gdbm = import_helper.import_module("dbm.gnu")  # skip if not supported
 
 filename = TESTFN
 
@@ -192,6 +193,17 @@ class TestGdbm(unittest.TestCase):
     def test_open_with_pathlib_bytes_path(self):
         gdbm.open(FakePath(os.fsencode(filename)), "c").close()
 
+    @support.run_with_locale(
+        'LC_ALL',
+        'fr_FR.iso88591', 'ja_JP.sjis', 'zh_CN.gbk',
+        'fr_FR.utf8', 'en_US.utf8',
+        '',
+    )
+    def test_localized_error(self):
+        with temp_dir() as d:
+            create_empty_file(os.path.join(d, 'test'))
+            self.assertRaises(gdbm.error, gdbm.open, filename, 'r')
+
 
 if __name__ == '__main__':
     unittest.main()
index 74e586af4a3210ed2a755be79e03a5003272a5cd..96a057554960e2de75bc703ddfb17fd0efbd3850 100644 (file)
@@ -125,6 +125,7 @@ bytes(cdata)
 #include "ctypes.h"
 
 #include "pycore_long.h"          // _PyLong_GetZero()
+#include "pycore_pyerrors.h"      // _PyErr_SetLocaleString()
 
 ctypes_state global_state;
 
@@ -803,15 +804,8 @@ CDataType_in_dll(PyObject *type, PyObject *args)
     #ifdef USE_DLERROR
     const char *dlerr = dlerror();
     if (dlerr) {
-        PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
-        if (message) {
-            PyErr_SetObject(PyExc_ValueError, message);
-            Py_DECREF(message);
-            return NULL;
-        }
-        // Ignore errors from PyUnicode_DecodeLocale,
-        // fall back to the generic error below.
-        PyErr_Clear();
+        _PyErr_SetLocaleString(PyExc_ValueError, dlerr);
+        return NULL;
     }
     #endif
 #undef USE_DLERROR
@@ -3646,21 +3640,14 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds)
     #endif
     address = (PPROC)dlsym(handle, name);
     if (!address) {
-       #ifdef USE_DLERROR
+    #ifdef USE_DLERROR
         const char *dlerr = dlerror();
         if (dlerr) {
-            PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
-            if (message) {
-                PyErr_SetObject(PyExc_AttributeError, message);
-                Py_DECREF(ftuple);
-                Py_DECREF(message);
-                return NULL;
-            }
-            // Ignore errors from PyUnicode_DecodeLocale,
-            // fall back to the generic error below.
-            PyErr_Clear();
+            _PyErr_SetLocaleString(PyExc_AttributeError, dlerr);
+            Py_DECREF(ftuple);
+            return NULL;
         }
-       #endif
+    #endif
         PyErr_Format(PyExc_AttributeError, "function '%s' not found", name);
         Py_DECREF(ftuple);
         return NULL;
index d121526af88415ec3aae54f91f8b3b0189199d3b..f98479ee85f270a89915030ce05369cdeeecaadb 100644 (file)
@@ -96,6 +96,7 @@
 #define DONT_USE_SEH
 #endif
 
+#include "pycore_pyerrors.h"        // _PyErr_SetLocaleString()
 #include "pycore_runtime.h"         // _PyRuntime
 #include "pycore_global_objects.h"  // _Py_ID()
 
@@ -1550,10 +1551,11 @@ static PyObject *py_dl_open(PyObject *self, PyObject *args)
     Py_XDECREF(name2);
     if (!handle) {
         const char *errmsg = dlerror();
-        if (!errmsg)
-            errmsg = "dlopen() error";
-        PyErr_SetString(PyExc_OSError,
-                               errmsg);
+        if (errmsg) {
+            _PyErr_SetLocaleString(PyExc_OSError, errmsg);
+            return NULL;
+        }
+        PyErr_SetString(PyExc_OSError, "dlopen() error");
         return NULL;
     }
     return PyLong_FromVoidPtr(handle);
@@ -1566,8 +1568,12 @@ static PyObject *py_dl_close(PyObject *self, PyObject *args)
     if (!PyArg_ParseTuple(args, "O&:dlclose", &_parse_voidp, &handle))
         return NULL;
     if (dlclose(handle)) {
-        PyErr_SetString(PyExc_OSError,
-                               dlerror());
+        const char *errmsg = dlerror();
+        if (errmsg) {
+            _PyErr_SetLocaleString(PyExc_OSError, errmsg);
+            return NULL;
+        }
+        PyErr_SetString(PyExc_OSError, "dlclose() error");
         return NULL;
     }
     Py_RETURN_NONE;
@@ -1601,21 +1607,14 @@ static PyObject *py_dl_sym(PyObject *self, PyObject *args)
     if (ptr) {
         return PyLong_FromVoidPtr(ptr);
     }
-       #ifdef USE_DLERROR
-    const char *dlerr = dlerror();
-    if (dlerr) {
-        PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
-        if (message) {
-            PyErr_SetObject(PyExc_OSError, message);
-            Py_DECREF(message);
-            return NULL;
-        }
-        // Ignore errors from PyUnicode_DecodeLocale,
-        // fall back to the generic error below.
-        PyErr_Clear();
+    #ifdef USE_DLERROR
+    const char *errmsg = dlerror();
+    if (errmsg) {
+        _PyErr_SetLocaleString(PyExc_OSError, errmsg);
+        return NULL;
     }
-       #endif
-       #undef USE_DLERROR
+    #endif
+    #undef USE_DLERROR
     PyErr_Format(PyExc_OSError, "symbol '%s' not found", name);
     return NULL;
 }
index 4dbb5741b2ede82385354977e21851cca2ccd042..ffd7b6420894b2f1f655cd4b5ffe0243c2dc3761 100644 (file)
@@ -3,12 +3,18 @@
 /* Author: Anthony Baxter, after dbmmodule.c */
 /* Doc strings: Mitch Chapman */
 
+// required for pycore_pyerrors.h
+#ifndef Py_BUILD_CORE_BUILTIN
+#  define Py_BUILD_CORE_MODULE 1
+#endif
+
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
+#include "pycore_pyerrors.h"        // _PyErr_SetLocaleString()
 #include "gdbm.h"
 
 #include <fcntl.h>
-#include <stdlib.h>               // free()
+#include <stdlib.h>                 // free()
 #include <sys/stat.h>
 #include <sys/types.h>
 
@@ -30,6 +36,24 @@ get_gdbm_state(PyObject *module)
     return (_gdbm_state *)state;
 }
 
+/*
+ * Set the gdbm error obtained by gdbm_strerror(gdbm_errno).
+ *
+ * If no error message exists, a generic (UTF-8) error message
+ * is used instead.
+ */
+static void
+set_gdbm_error(_gdbm_state *state, const char *generic_error)
+{
+    const char *gdbm_errmsg = gdbm_strerror(gdbm_errno);
+    if (gdbm_errmsg) {
+        _PyErr_SetLocaleString(state->gdbm_error, gdbm_errmsg);
+    }
+    else {
+        PyErr_SetString(state->gdbm_error, generic_error);
+    }
+}
+
 /*[clinic input]
 module _gdbm
 class _gdbm.gdbm "gdbmobject *" "&Gdbmtype"
@@ -88,7 +112,7 @@ newgdbmobject(_gdbm_state *state, const char *file, int flags, int mode)
             PyErr_SetFromErrnoWithFilename(state->gdbm_error, file);
         }
         else {
-            PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+            set_gdbm_error(state, "gdbm_open() error");
         }
         Py_DECREF(dp);
         return NULL;
@@ -133,7 +157,7 @@ gdbm_length(gdbmobject *dp)
                 PyErr_SetFromErrno(state->gdbm_error);
             }
             else {
-                PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+                set_gdbm_error(state, "gdbm_count() error");
             }
             return -1;
         }
@@ -283,7 +307,7 @@ gdbm_ass_sub(gdbmobject *dp, PyObject *v, PyObject *w)
                 PyErr_SetObject(PyExc_KeyError, v);
             }
             else {
-                PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+                set_gdbm_error(state, "gdbm_delete() error");
             }
             return -1;
         }
@@ -294,11 +318,12 @@ gdbm_ass_sub(gdbmobject *dp, PyObject *v, PyObject *w)
         }
         errno = 0;
         if (gdbm_store(dp->di_dbm, krec, drec, GDBM_REPLACE) < 0) {
-            if (errno != 0)
+            if (errno != 0) {
                 PyErr_SetFromErrno(state->gdbm_error);
-            else
-                PyErr_SetString(state->gdbm_error,
-                                gdbm_strerror(gdbm_errno));
+            }
+            else {
+                set_gdbm_error(state, "gdbm_store() error");
+            }
             return -1;
         }
     }
@@ -531,10 +556,12 @@ _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject *cls)
     check_gdbmobject_open(self, state->gdbm_error);
     errno = 0;
     if (gdbm_reorganize(self->di_dbm) < 0) {
-        if (errno != 0)
+        if (errno != 0) {
             PyErr_SetFromErrno(state->gdbm_error);
-        else
-            PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+        }
+        else {
+            set_gdbm_error(state, "gdbm_reorganize() error");
+        }
         return NULL;
     }
     Py_RETURN_NONE;
index 2998820953bda9e8df5cad0d889ed44817c47044..3cc7d6f50e896ff31d696e54e3e3c9560b77a1bc 100644 (file)
@@ -320,6 +320,7 @@ _setException(PyObject *exc, const char* altmsg, ...)
     va_end(vargs);
     ERR_clear_error();
 
+    /* ERR_ERROR_STRING(3) ensures that the messages below are ASCII */
     lib = ERR_lib_error_string(errcode);
     func = ERR_func_error_string(errcode);
     reason = ERR_reason_error_string(errcode);
index 2b3bbfefa3cf5fc0410b91b74c2fa64d1529b603..c521fc5ad6bcda6003f73f4b4fbb090c15f63723 100644 (file)
@@ -129,6 +129,7 @@ _pysqlite_seterror(pysqlite_state *state, sqlite3 *db)
 
     /* Create and set the exception. */
     int extended_errcode = sqlite3_extended_errcode(db);
+    // sqlite3_errmsg() always returns an UTF-8 encoded message
     const char *errmsg = sqlite3_errmsg(db);
     raise_exception(exc_class, extended_errcode, errmsg);
     return extended_errcode;
index 4e02f5bec1a942c43fc9ce4003e30cadb286cd9e..3e7d14f3b88e321210a069a0cd1ed8c63d5a09a3 100644 (file)
@@ -1,6 +1,7 @@
 #define PY_SSIZE_T_CLEAN
 #include "parts.h"
 #include "util.h"
+
 #include "clinic/exceptions.c.h"
 
 
index b602272b78befd721401c576d9b1d26d2ec867a0..f4e4018212f59e6ffb81e959a02c3dc7f32c95e9 100644 (file)
@@ -325,6 +325,7 @@ pymain_run_file_obj(PyObject *program_name, PyObject *filename,
     if (fp == NULL) {
         // Ignore the OSError
         PyErr_Clear();
+        // TODO(picnixz): strerror() is locale dependent but not PySys_FormatStderr().
         PySys_FormatStderr("%S: can't open file %R: [Errno %d] %s\n",
                            program_name, filename, errno, strerror(errno));
         return 2;
index be31c637fcc8d5c3eed813e368c4265e81ef6525..b354a86e7f9fee7586dbb36806a46f2e06fa9bc1 100644 (file)
@@ -1784,7 +1784,12 @@ add_error(PyObject *errors_module, PyObject *codes_dict,
      *       with the other uses of the XML_ErrorString function
      *       elsewhere within this file.  pyexpat's copy of the messages
      *       only acts as a fallback in case of outdated runtime libexpat,
-     *       where it returns NULL. */
+     *       where it returns NULL.
+     *
+     *       In addition, XML_ErrorString is assumed to return UTF-8 encoded
+     *       strings (in conv_string_to_unicode, we decode them using 'strict'
+     *       error handling).
+     */
     const char *error_string = XML_ErrorString(error_code);
     if (error_string == NULL) {
         error_string = error_info_of[error_index].description;
index cbfc2faf8bb3bcc1d014df4027810d68017fbfd0..bfc37f3dbb6ff0f01cf9201620058b07b72c145f 100644 (file)
@@ -305,6 +305,15 @@ PyErr_SetString(PyObject *exception, const char *string)
     _PyErr_SetString(tstate, exception, string);
 }
 
+void
+_PyErr_SetLocaleString(PyObject *exception, const char *string)
+{
+    PyObject *value = PyUnicode_DecodeLocale(string, "surrogateescape");
+    if (value != NULL) {
+        PyErr_SetObject(exception, value);
+        Py_DECREF(value);
+    }
+}
 
 PyObject* _Py_HOT_FUNCTION
 PyErr_Occurred(void)