]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-126742: Add _PyErr_SetLocaleString, use it for gdbm & dlerror messages (GH-126746)
authorBénédikt Tran <10796600+picnixz@users.noreply.github.com>
Tue, 17 Dec 2024 11:12:45 +0000 (12:12 +0100)
committerGitHub <noreply@github.com>
Tue, 17 Dec 2024 11:12:45 +0000 (12:12 +0100)
- Add a helper to set an error from locale-encoded `char*`
- Use the helper for gdbm & dlerror messages

Co-authored-by: Victor Stinner <vstinner@python.org>
12 files changed:
Include/internal/pycore_pyerrors.h
Lib/test/test_ctypes/test_dlerror.py
Lib/test/test_dbm_gnu.py
Modules/_ctypes/_ctypes.c
Modules/_ctypes/callproc.c
Modules/_gdbmmodule.c
Modules/_hashopenssl.c
Modules/_sqlite/util.c
Modules/_testcapi/exceptions.c
Modules/main.c
Modules/pyexpat.c
Python/errors.c

index 02945f0e71a1451ff78b78b160077047ebc7aaf2..6f2fdda9a9f12f5e204bd483c53d1a737e73eb02 100644 (file)
@@ -130,6 +130,18 @@ PyAPI_FUNC(void) _PyErr_SetString(
     PyObject *exception,
     const char *string);
 
+/*
+ * Set an exception with the error message decoded from the current locale
+ * encoding (LC_CTYPE).
+ *
+ * Exceptions occurring in decoding take priority over the desired exception.
+ *
+ * Exported for '_ctypes' shared extensions.
+ */
+PyAPI_FUNC(void) _PyErr_SetLocaleString(
+    PyObject *exception,
+    const char *string);
+
 PyAPI_FUNC(PyObject*) _PyErr_Format(
     PyThreadState *tstate,
     PyObject *exception,
index 4441e30cd7a2a7f4f042717e874e8a72228ce077..c3c34d43481d3677e3a838b7812e2c932eb25dcc 100644 (file)
@@ -1,7 +1,12 @@
+import _ctypes
 import os
+import platform
 import sys
+import test.support
 import unittest
-import platform
+from ctypes import CDLL, c_int
+from ctypes.util import find_library
+
 
 FOO_C = r"""
 #include <unistd.h>
@@ -26,7 +31,7 @@ void *foo(void)
 
 
 @unittest.skipUnless(sys.platform.startswith('linux'),
-                     'Test only valid for Linux')
+                     'test requires GNU IFUNC support')
 class TestNullDlsym(unittest.TestCase):
     """GH-126554: Ensure that we catch NULL dlsym return values
 
@@ -53,14 +58,6 @@ class TestNullDlsym(unittest.TestCase):
         import subprocess
         import tempfile
 
-        # To avoid ImportErrors on Windows, where _ctypes does not have
-        # dlopen and dlsym,
-        # import here, i.e., inside the test function.
-        # The skipUnless('linux') decorator ensures that we're on linux
-        # if we're executing these statements.
-        from ctypes import CDLL, c_int
-        from _ctypes import dlopen, dlsym
-
         retcode = subprocess.call(["gcc", "--version"],
                                   stdout=subprocess.DEVNULL,
                                   stderr=subprocess.DEVNULL)
@@ -111,6 +108,8 @@ class TestNullDlsym(unittest.TestCase):
             self.assertEqual(os.read(pipe_r, 2), b'OK')
 
             # Case #3: Test 'py_dl_sym' from Modules/_ctypes/callproc.c
+            dlopen = test.support.get_attribute(_ctypes, 'dlopen')
+            dlsym = test.support.get_attribute(_ctypes, 'dlsym')
             L = dlopen(dstname)
             with self.assertRaisesRegex(OSError, "symbol 'foo' not found"):
                 dlsym(L, "foo")
@@ -119,5 +118,66 @@ class TestNullDlsym(unittest.TestCase):
             self.assertEqual(os.read(pipe_r, 2), b'OK')
 
 
+@unittest.skipUnless(os.name != 'nt', 'test requires dlerror() calls')
+class TestLocalization(unittest.TestCase):
+
+    @staticmethod
+    def configure_locales(func):
+        return test.support.run_with_locale(
+            'LC_ALL',
+            'fr_FR.iso88591', 'ja_JP.sjis', 'zh_CN.gbk',
+            'fr_FR.utf8', 'en_US.utf8',
+            '',
+        )(func)
+
+    @classmethod
+    def setUpClass(cls):
+        cls.libc_filename = find_library("c")
+
+    @configure_locales
+    def test_localized_error_from_dll(self):
+        dll = CDLL(self.libc_filename)
+        with self.assertRaises(AttributeError) as cm:
+            dll.this_name_does_not_exist
+        if sys.platform.startswith('linux'):
+            # On macOS, the filename is not reported by dlerror().
+            self.assertIn(self.libc_filename, str(cm.exception))
+
+    @configure_locales
+    def test_localized_error_in_dll(self):
+        dll = CDLL(self.libc_filename)
+        with self.assertRaises(ValueError) as cm:
+            c_int.in_dll(dll, 'this_name_does_not_exist')
+        if sys.platform.startswith('linux'):
+            # On macOS, the filename is not reported by dlerror().
+            self.assertIn(self.libc_filename, str(cm.exception))
+
+    @unittest.skipUnless(hasattr(_ctypes, 'dlopen'),
+                         'test requires _ctypes.dlopen()')
+    @configure_locales
+    def test_localized_error_dlopen(self):
+        missing_filename = b'missing\xff.so'
+        # Depending whether the locale, we may encode '\xff' differently
+        # but we are only interested in avoiding a UnicodeDecodeError
+        # when reporting the dlerror() error message which contains
+        # the localized filename.
+        filename_pattern = r'missing.*?\.so'
+        with self.assertRaisesRegex(OSError, filename_pattern):
+            _ctypes.dlopen(missing_filename, 2)
+
+    @unittest.skipUnless(hasattr(_ctypes, 'dlopen'),
+                         'test requires _ctypes.dlopen()')
+    @unittest.skipUnless(hasattr(_ctypes, 'dlsym'),
+                         'test requires _ctypes.dlsym()')
+    @configure_locales
+    def test_localized_error_dlsym(self):
+        dll = _ctypes.dlopen(self.libc_filename)
+        with self.assertRaises(OSError) as cm:
+            _ctypes.dlsym(dll, 'this_name_does_not_exist')
+        if sys.platform.startswith('linux'):
+            # On macOS, the filename is not reported by dlerror().
+            self.assertIn(self.libc_filename, str(cm.exception))
+
+
 if __name__ == "__main__":
     unittest.main()
index e20addf1f04f1b082a07917f77443f922af522c9..66268c42a300b5e0e65db4bbaf9369509415810a 100644 (file)
@@ -1,10 +1,11 @@
-from test import support
-from test.support import import_helper, cpython_only
-gdbm = import_helper.import_module("dbm.gnu") #skip if not supported
-import unittest
 import os
-from test.support.os_helper import TESTFN, TESTFN_NONASCII, unlink, FakePath
+import unittest
+from test import support
+from test.support import cpython_only, import_helper
+from test.support.os_helper import (TESTFN, TESTFN_NONASCII, FakePath,
+                                    create_empty_file, temp_dir, unlink)
 
+gdbm = import_helper.import_module("dbm.gnu")  # skip if not supported
 
 filename = TESTFN
 
@@ -205,6 +206,16 @@ class TestGdbm(unittest.TestCase):
                 self.assertNotIn(k, db)
             self.assertEqual(len(db), 0)
 
+    @support.run_with_locale(
+        'LC_ALL',
+        'fr_FR.iso88591', 'ja_JP.sjis', 'zh_CN.gbk',
+        'fr_FR.utf8', 'en_US.utf8',
+        '',
+    )
+    def test_localized_error(self):
+        with temp_dir() as d:
+            create_empty_file(os.path.join(d, 'test'))
+            self.assertRaises(gdbm.error, gdbm.open, filename, 'r')
 
 
 if __name__ == '__main__':
index bb4699884057ba1a185ea823ef1e6e4bbfa0e992..3a3b1da5084a673b4ff33a9512b8df1c4f454d4f 100644 (file)
@@ -984,15 +984,8 @@ CDataType_in_dll_impl(PyObject *type, PyTypeObject *cls, PyObject *dll,
     #ifdef USE_DLERROR
     const char *dlerr = dlerror();
     if (dlerr) {
-        PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
-        if (message) {
-            PyErr_SetObject(PyExc_ValueError, message);
-            Py_DECREF(message);
-            return NULL;
-        }
-        // Ignore errors from PyUnicode_DecodeLocale,
-        // fall back to the generic error below.
-        PyErr_Clear();
+        _PyErr_SetLocaleString(PyExc_ValueError, dlerr);
+        return NULL;
     }
     #endif
 #undef USE_DLERROR
@@ -3825,21 +3818,14 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds)
     address = (PPROC)dlsym(handle, name);
 
     if (!address) {
-       #ifdef USE_DLERROR
+    #ifdef USE_DLERROR
         const char *dlerr = dlerror();
         if (dlerr) {
-            PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
-            if (message) {
-                PyErr_SetObject(PyExc_AttributeError, message);
-                Py_DECREF(ftuple);
-                Py_DECREF(message);
-                return NULL;
-            }
-            // Ignore errors from PyUnicode_DecodeLocale,
-            // fall back to the generic error below.
-            PyErr_Clear();
+            _PyErr_SetLocaleString(PyExc_AttributeError, dlerr);
+            Py_DECREF(ftuple);
+            return NULL;
         }
-       #endif
+    #endif
         PyErr_Format(PyExc_AttributeError, "function '%s' not found", name);
         Py_DECREF(ftuple);
         return NULL;
index 218c3a9c81e05f45cac721f8e607127f5a146ded..92eedff5ec94f189027f097d68c95e8b8253c8b3 100644 (file)
@@ -1588,10 +1588,11 @@ static PyObject *py_dl_open(PyObject *self, PyObject *args)
     Py_XDECREF(name2);
     if (!handle) {
         const char *errmsg = dlerror();
-        if (!errmsg)
-            errmsg = "dlopen() error";
-        PyErr_SetString(PyExc_OSError,
-                               errmsg);
+        if (errmsg) {
+            _PyErr_SetLocaleString(PyExc_OSError, errmsg);
+            return NULL;
+        }
+        PyErr_SetString(PyExc_OSError, "dlopen() error");
         return NULL;
     }
     return PyLong_FromVoidPtr(handle);
@@ -1604,8 +1605,12 @@ static PyObject *py_dl_close(PyObject *self, PyObject *args)
     if (!PyArg_ParseTuple(args, "O&:dlclose", &_parse_voidp, &handle))
         return NULL;
     if (dlclose(handle)) {
-        PyErr_SetString(PyExc_OSError,
-                               dlerror());
+        const char *errmsg = dlerror();
+        if (errmsg) {
+            _PyErr_SetLocaleString(PyExc_OSError, errmsg);
+            return NULL;
+        }
+        PyErr_SetString(PyExc_OSError, "dlclose() error");
         return NULL;
     }
     Py_RETURN_NONE;
@@ -1639,21 +1644,14 @@ static PyObject *py_dl_sym(PyObject *self, PyObject *args)
     if (ptr) {
         return PyLong_FromVoidPtr(ptr);
     }
-       #ifdef USE_DLERROR
-    const char *dlerr = dlerror();
-    if (dlerr) {
-        PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
-        if (message) {
-            PyErr_SetObject(PyExc_OSError, message);
-            Py_DECREF(message);
-            return NULL;
-        }
-        // Ignore errors from PyUnicode_DecodeLocale,
-        // fall back to the generic error below.
-        PyErr_Clear();
+    #ifdef USE_DLERROR
+    const char *errmsg = dlerror();
+    if (errmsg) {
+        _PyErr_SetLocaleString(PyExc_OSError, errmsg);
+        return NULL;
     }
-       #endif
-       #undef USE_DLERROR
+    #endif
+    #undef USE_DLERROR
     PyErr_Format(PyExc_OSError, "symbol '%s' not found", name);
     return NULL;
 }
index df7fba67810ed04c84bce8134d0553bf117d4af7..ea4fe247987e9dc47fc7349f5da9a660d72e45bd 100644 (file)
@@ -8,10 +8,11 @@
 #endif
 
 #include "Python.h"
+#include "pycore_pyerrors.h"        // _PyErr_SetLocaleString()
 #include "gdbm.h"
 
 #include <fcntl.h>
-#include <stdlib.h>               // free()
+#include <stdlib.h>                 // free()
 #include <sys/stat.h>
 #include <sys/types.h>
 
@@ -33,6 +34,24 @@ get_gdbm_state(PyObject *module)
     return (_gdbm_state *)state;
 }
 
+/*
+ * Set the gdbm error obtained by gdbm_strerror(gdbm_errno).
+ *
+ * If no error message exists, a generic (UTF-8) error message
+ * is used instead.
+ */
+static void
+set_gdbm_error(_gdbm_state *state, const char *generic_error)
+{
+    const char *gdbm_errmsg = gdbm_strerror(gdbm_errno);
+    if (gdbm_errmsg) {
+        _PyErr_SetLocaleString(state->gdbm_error, gdbm_errmsg);
+    }
+    else {
+        PyErr_SetString(state->gdbm_error, generic_error);
+    }
+}
+
 /*[clinic input]
 module _gdbm
 class _gdbm.gdbm "gdbmobject *" "&Gdbmtype"
@@ -91,7 +110,7 @@ newgdbmobject(_gdbm_state *state, const char *file, int flags, int mode)
             PyErr_SetFromErrnoWithFilename(state->gdbm_error, file);
         }
         else {
-            PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+            set_gdbm_error(state, "gdbm_open() error");
         }
         Py_DECREF(dp);
         return NULL;
@@ -136,7 +155,7 @@ gdbm_length(gdbmobject *dp)
                 PyErr_SetFromErrno(state->gdbm_error);
             }
             else {
-                PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+                set_gdbm_error(state, "gdbm_count() error");
             }
             return -1;
         }
@@ -286,7 +305,7 @@ gdbm_ass_sub(gdbmobject *dp, PyObject *v, PyObject *w)
                 PyErr_SetObject(PyExc_KeyError, v);
             }
             else {
-                PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+                set_gdbm_error(state, "gdbm_delete() error");
             }
             return -1;
         }
@@ -297,11 +316,12 @@ gdbm_ass_sub(gdbmobject *dp, PyObject *v, PyObject *w)
         }
         errno = 0;
         if (gdbm_store(dp->di_dbm, krec, drec, GDBM_REPLACE) < 0) {
-            if (errno != 0)
+            if (errno != 0) {
                 PyErr_SetFromErrno(state->gdbm_error);
-            else
-                PyErr_SetString(state->gdbm_error,
-                                gdbm_strerror(gdbm_errno));
+            }
+            else {
+                set_gdbm_error(state, "gdbm_store() error");
+            }
             return -1;
         }
     }
@@ -534,10 +554,12 @@ _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject *cls)
     check_gdbmobject_open(self, state->gdbm_error);
     errno = 0;
     if (gdbm_reorganize(self->di_dbm) < 0) {
-        if (errno != 0)
+        if (errno != 0) {
             PyErr_SetFromErrno(state->gdbm_error);
-        else
-            PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
+        }
+        else {
+            set_gdbm_error(state, "gdbm_reorganize() error");
+        }
         return NULL;
     }
     Py_RETURN_NONE;
index 2c9a9feecc79f0b4b4dac360bb0dbb97bf47d087..082929be3c77b7346c82a91639e69b3733e9d4df 100644 (file)
@@ -319,6 +319,7 @@ _setException(PyObject *exc, const char* altmsg, ...)
     va_end(vargs);
     ERR_clear_error();
 
+    /* ERR_ERROR_STRING(3) ensures that the messages below are ASCII */
     lib = ERR_lib_error_string(errcode);
     func = ERR_func_error_string(errcode);
     reason = ERR_reason_error_string(errcode);
index 9e8613ef67916e9662e930767a17393702e87537..b0622e66928f47e5d891ff8d5466a97c3c86f5e2 100644 (file)
@@ -134,6 +134,7 @@ _pysqlite_seterror(pysqlite_state *state, sqlite3 *db)
 
     /* Create and set the exception. */
     int extended_errcode = sqlite3_extended_errcode(db);
+    // sqlite3_errmsg() always returns an UTF-8 encoded message
     const char *errmsg = sqlite3_errmsg(db);
     raise_exception(exc_class, extended_errcode, errmsg);
     return extended_errcode;
index e92d9670e7c7926691368f9e3d4b10d8be53521a..b647bfc71eae2445a46d56cb863265d79a7e563e 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "parts.h"
 #include "util.h"
+
 #include "clinic/exceptions.c.h"
 
 
index 15ea49a1bad19e9b93d7273de86f1854aee9d75e..3bf2241f2837a3341b25573304e93c14d6b28a9b 100644 (file)
@@ -374,6 +374,7 @@ pymain_run_file_obj(PyObject *program_name, PyObject *filename,
     if (fp == NULL) {
         // Ignore the OSError
         PyErr_Clear();
+        // TODO(picnixz): strerror() is locale dependent but not PySys_FormatStderr().
         PySys_FormatStderr("%S: can't open file %R: [Errno %d] %s\n",
                            program_name, filename, errno, strerror(errno));
         return 2;
index 9733bc34f7c80a1c6533e1d7bd224c19374e9e5b..cf7714e7656205e2302ab231c930d6cc41aabb25 100644 (file)
@@ -1782,7 +1782,12 @@ add_error(PyObject *errors_module, PyObject *codes_dict,
      *       with the other uses of the XML_ErrorString function
      *       elsewhere within this file.  pyexpat's copy of the messages
      *       only acts as a fallback in case of outdated runtime libexpat,
-     *       where it returns NULL. */
+     *       where it returns NULL.
+     *
+     *       In addition, XML_ErrorString is assumed to return UTF-8 encoded
+     *       strings (in conv_string_to_unicode, we decode them using 'strict'
+     *       error handling).
+     */
     const char *error_string = XML_ErrorString(error_code);
     if (error_string == NULL) {
         error_string = error_info_of[error_index].description;
index 7f3b4aabc432d71cba50ac0dd1da5511a1970c99..2d362c1864ffff6416e32fd95a32868b755cd8b2 100644 (file)
@@ -301,6 +301,15 @@ PyErr_SetString(PyObject *exception, const char *string)
     _PyErr_SetString(tstate, exception, string);
 }
 
+void
+_PyErr_SetLocaleString(PyObject *exception, const char *string)
+{
+    PyObject *value = PyUnicode_DecodeLocale(string, "surrogateescape");
+    if (value != NULL) {
+        PyErr_SetObject(exception, value);
+        Py_DECREF(value);
+    }
+}
 
 PyObject* _Py_HOT_FUNCTION
 PyErr_Occurred(void)