]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-117953: Work Relative to Specific Extension Kinds in the Import Machinery (gh...
authorEric Snow <ericsnowcurrently@gmail.com>
Wed, 1 May 2024 23:40:28 +0000 (17:40 -0600)
committerGitHub <noreply@github.com>
Wed, 1 May 2024 23:40:28 +0000 (17:40 -0600)
This change will make some later changes simpler.

Include/internal/pycore_importdl.h
Python/import.c
Python/importdl.c

index adb89167d124eb4be3e385e30f16a85b11d60ca7..b0af28da34e08719a879cdfc1443b1de824e3a12 100644 (file)
@@ -15,8 +15,15 @@ extern "C" {
 extern const char *_PyImport_DynLoadFiletab[];
 
 
-typedef PyObject *(*PyModInitFunction)(void);
+typedef enum ext_module_kind {
+    _Py_ext_module_kind_UNKNOWN = 0,
+    _Py_ext_module_kind_SINGLEPHASE = 1,
+    _Py_ext_module_kind_MULTIPHASE = 2,
+    _Py_ext_module_kind_INVALID = 3,
+} _Py_ext_module_kind;
+
 
+/* Input for loading an extension module. */
 struct _Py_ext_module_loader_info {
     PyObject *filename;
 #ifndef MS_WINDOWS
@@ -43,10 +50,33 @@ extern int _Py_ext_module_loader_info_init_from_spec(
     struct _Py_ext_module_loader_info *info,
     PyObject *spec);
 
+/* The result from running an extension module's init function. */
 struct _Py_ext_module_loader_result {
     PyModuleDef *def;
     PyObject *module;
+    _Py_ext_module_kind kind;
+    struct _Py_ext_module_loader_result_error *err;
+    struct _Py_ext_module_loader_result_error {
+        enum _Py_ext_module_loader_result_error_kind {
+            _Py_ext_module_loader_result_EXCEPTION = 0,
+            _Py_ext_module_loader_result_ERR_MISSING = 1,
+            _Py_ext_module_loader_result_ERR_UNREPORTED_EXC = 2,
+            _Py_ext_module_loader_result_ERR_UNINITIALIZED = 3,
+            _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE = 4,
+            _Py_ext_module_loader_result_ERR_NOT_MODULE = 5,
+            _Py_ext_module_loader_result_ERR_MISSING_DEF = 6,
+        } kind;
+        PyObject *exc;
+    } _err;
 };
+extern void _Py_ext_module_loader_result_clear(
+    struct _Py_ext_module_loader_result *res);
+extern void _Py_ext_module_loader_result_apply_error(
+    struct _Py_ext_module_loader_result *res,
+    const char *name);
+
+/* The module init function. */
+typedef PyObject *(*PyModInitFunction)(void);
 extern PyModInitFunction _PyImport_GetModInitFunc(
     struct _Py_ext_module_loader_info *info,
     FILE *fp);
index 447114ab115bc7daecca533e1a873bf1249edb7d..0c51ffc6285a9c618150b7006f50f97c422a3061 100644 (file)
@@ -1193,26 +1193,63 @@ is_core_module(PyInterpreterState *interp, PyObject *name, PyObject *path)
 }
 
 #ifndef NDEBUG
-static bool
-is_singlephase(PyModuleDef *def)
+static _Py_ext_module_kind
+_get_extension_kind(PyModuleDef *def, bool check_size)
 {
+    _Py_ext_module_kind kind;
     if (def == NULL) {
         /* It must be a module created by reload_singlephase_extension()
          * from m_copy.  Ideally we'd do away with this case. */
-        return true;
+        kind = _Py_ext_module_kind_SINGLEPHASE;
     }
-    else if (def->m_slots == NULL) {
-        return true;
+    else if (def->m_slots != NULL) {
+        kind = _Py_ext_module_kind_MULTIPHASE;
     }
-    else {
-        return false;
+    else if (check_size && def->m_size == -1) {
+        kind = _Py_ext_module_kind_SINGLEPHASE;
     }
-}
+    else if (def->m_base.m_init != NULL) {
+        kind = _Py_ext_module_kind_SINGLEPHASE;
+    }
+    else {
+        // This is probably single-phase init, but a multi-phase
+        // module *can* have NULL m_slots.
+        kind = _Py_ext_module_kind_UNKNOWN;
+    }
+    return kind;
+}
+
+/* The module might not be fully initialized yet
+ * and PyModule_FromDefAndSpec() checks m_size
+ * so we skip m_size. */
+#define assert_multiphase_def(def)                                  \
+    do {                                                            \
+        _Py_ext_module_kind kind = _get_extension_kind(def, false); \
+        assert(kind == _Py_ext_module_kind_MULTIPHASE               \
+                /* m_slots can be NULL. */                          \
+                || kind == _Py_ext_module_kind_UNKNOWN);            \
+    } while (0)
+
+#define assert_singlephase_def(def)                                 \
+    do {                                                            \
+        _Py_ext_module_kind kind = _get_extension_kind(def, true);  \
+        assert(kind == _Py_ext_module_kind_SINGLEPHASE              \
+                || kind == _Py_ext_module_kind_UNKNOWN);            \
+    } while (0)
+
+#define assert_singlephase(def) \
+    assert_singlephase_def(def)
+
+#else  /* defined(NDEBUG) */
+#define assert_multiphase_def(def)
+#define assert_singlephase_def(def)
+#define assert_singlephase(def)
 #endif
 
 
 struct singlephase_global_update {
     PyObject *m_dict;
+    PyModInitFunction m_init;
 };
 
 static int
@@ -1226,10 +1263,24 @@ update_global_state_for_extension(PyThreadState *tstate,
         assert(def->m_base.m_copy == NULL);
     }
     else {
-        assert(def->m_base.m_init != NULL
-               || is_core_module(tstate->interp, name, path));
-        if (singlephase->m_dict == NULL) {
+        if (singlephase->m_init != NULL) {
+            assert(singlephase->m_dict == NULL);
+            assert(def->m_base.m_copy == NULL);
+            assert(def->m_size >= 0);
+            /* Remember pointer to module init function. */
+            // XXX If two modules share a def then def->m_base will
+            // reflect the last one added (here) to the global cache.
+            // We should prevent this somehow.  The simplest solution
+            // is probably to store m_copy/m_init in the cache along
+            // with the def, rather than within the def.
+            def->m_base.m_init = singlephase->m_init;
+        }
+        else if (singlephase->m_dict == NULL) {
+            /* It must be a core builtin module. */
+            assert(is_core_module(tstate->interp, name, path));
+            assert(def->m_size == -1);
             assert(def->m_base.m_copy == NULL);
+            assert(def->m_base.m_init == NULL);
         }
         else {
             assert(PyDict_Check(singlephase->m_dict));
@@ -1316,7 +1367,7 @@ import_find_extension(PyThreadState *tstate,
     if (def == NULL) {
         return NULL;
     }
-    assert(is_singlephase(def));
+    assert_singlephase(def);
 
     /* It may have been successfully imported previously
        in an interpreter that allows legacy modules
@@ -1369,11 +1420,26 @@ import_find_extension(PyThreadState *tstate,
         }
         struct _Py_ext_module_loader_result res;
         if (_PyImport_RunModInitFunc(def->m_base.m_init, info, &res) < 0) {
+            _Py_ext_module_loader_result_apply_error(&res, name_buf);
             return NULL;
         }
         assert(!PyErr_Occurred());
+        assert(res.err == NULL);
+        assert(res.kind == _Py_ext_module_kind_SINGLEPHASE);
         mod = res.module;
-        // XXX __file__ doesn't get set!
+        /* Tchnically, the init function could return a different module def.
+         * Then we would probably need to update the global cache.
+         * However, we don't expect anyone to change the def. */
+        assert(res.def == def);
+        _Py_ext_module_loader_result_clear(&res);
+
+        /* Remember the filename as the __file__ attribute */
+        if (info->filename != NULL) {
+            if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) {
+                PyErr_Clear(); /* Not important enough to report */
+            }
+        }
+
         if (PyObject_SetItem(modules, info->name, mod) == -1) {
             Py_DECREF(mod);
             return NULL;
@@ -1398,78 +1464,89 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
                      struct _Py_ext_module_loader_info *info,
                      PyObject *spec, PyObject *modules)
 {
+    /* Core modules go through _PyImport_FixupBuiltin(). */
+    assert(!is_core_module(tstate->interp, info->name, info->path));
+
     PyObject *mod = NULL;
     PyModuleDef *def = NULL;
+    const char *name_buf = PyBytes_AS_STRING(info->name_encoded);
 
     struct _Py_ext_module_loader_result res;
     if (_PyImport_RunModInitFunc(p0, info, &res) < 0) {
         /* We discard res.def. */
         assert(res.module == NULL);
-        assert(PyErr_Occurred());
-        goto finally;
+        _Py_ext_module_loader_result_apply_error(&res, name_buf);
+        return NULL;
     }
     assert(!PyErr_Occurred());
+    assert(res.err == NULL);
 
     mod = res.module;
     res.module = NULL;
     def = res.def;
     assert(def != NULL);
 
-    if (mod == NULL) {
-        //assert(!is_singlephase(def));
+    if (res.kind == _Py_ext_module_kind_MULTIPHASE) {
+        assert_multiphase_def(def);
         assert(mod == NULL);
         mod = PyModule_FromDefAndSpec(def, spec);
         if (mod == NULL) {
-            goto finally;
+            goto error;
         }
     }
     else {
-        assert(is_singlephase(def));
+        assert(res.kind == _Py_ext_module_kind_SINGLEPHASE);
+        assert_singlephase_def(def);
         assert(PyModule_Check(mod));
 
-        const char *name_buf = PyBytes_AS_STRING(info->name_encoded);
         if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) {
-            Py_CLEAR(mod);
-            goto finally;
+            goto error;
         }
 
-        /* Remember pointer to module init function. */
-        def->m_base.m_init = p0;
-
+        /* Remember the filename as the __file__ attribute */
         if (info->filename != NULL) {
-            /* Remember the filename as the __file__ attribute */
             if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) {
                 PyErr_Clear(); /* Not important enough to report */
             }
         }
 
+        /* Update global import state. */
         struct singlephase_global_update singlephase = {0};
         // gh-88216: Extensions and def->m_base.m_copy can be updated
         // when the extension module doesn't support sub-interpreters.
-        if (def->m_size == -1
-                && !is_core_module(tstate->interp, info->name, info->path))
-        {
+        if (def->m_size == -1) {
+            /* We will reload from m_copy. */
+            assert(def->m_base.m_init == NULL);
             singlephase.m_dict = PyModule_GetDict(mod);
             assert(singlephase.m_dict != NULL);
         }
+        else {
+            /* We will reload via the init function. */
+            assert(def->m_size >= 0);
+            singlephase.m_init = p0;
+        }
         if (update_global_state_for_extension(
                 tstate, info->path, info->name, def, &singlephase) < 0)
         {
-            Py_CLEAR(mod);
-            goto finally;
+            goto error;
         }
 
+        /* Update per-interpreter import state. */
         PyObject *modules = get_modules_dict(tstate, true);
         if (finish_singlephase_extension(
                 tstate, mod, def, info->name, modules) < 0)
         {
-            Py_CLEAR(mod);
-            goto finally;
+            goto error;
         }
     }
 
-finally:
+    _Py_ext_module_loader_result_clear(&res);
     return mod;
+
+error:
+    Py_XDECREF(mod);
+    _Py_ext_module_loader_result_clear(&res);
+    return NULL;
 }
 
 
@@ -1532,7 +1609,7 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name,
      * module state, but we also don't populate def->m_base.m_copy
      * for them. */
     assert(is_core_module(tstate->interp, nameobj, nameobj));
-    assert(is_singlephase(def));
+    assert_singlephase_def(def);
     assert(def->m_size == -1);
     assert(def->m_base.m_copy == NULL);
 
@@ -1586,7 +1663,7 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec)
     PyObject *mod = import_find_extension(tstate, &info);
     if (mod != NULL) {
         assert(!_PyErr_Occurred(tstate));
-        assert(is_singlephase(_PyModule_GetDef(mod)));
+        assert_singlephase(_PyModule_GetDef(mod));
         goto finally;
     }
     else if (_PyErr_Occurred(tstate)) {
@@ -3940,7 +4017,7 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file)
     mod = import_find_extension(tstate, &info);
     if (mod != NULL) {
         assert(!_PyErr_Occurred(tstate));
-        assert(is_singlephase(_PyModule_GetDef(mod)));
+        assert_singlephase(_PyModule_GetDef(mod));
         goto finally;
     }
     else if (_PyErr_Occurred(tstate)) {
index 3e4ea175c0cfbc9b417efdc118ada8f329bbfbdd..a3091946bc94bfef4a8106a24afe05370fa5c9a2 100644 (file)
@@ -28,6 +28,11 @@ extern dl_funcptr _PyImport_FindSharedFuncptr(const char *prefix,
                                               const char *pathname, FILE *fp);
 #endif
 
+
+/***********************************/
+/* module info to use when loading */
+/***********************************/
+
 static const char * const ascii_only_prefix = "PyInit";
 static const char * const nonascii_prefix = "PyInitU";
 
@@ -205,6 +210,150 @@ _Py_ext_module_loader_info_init_from_spec(
 }
 
 
+/********************************/
+/* module init function results */
+/********************************/
+
+void
+_Py_ext_module_loader_result_clear(struct _Py_ext_module_loader_result *res)
+{
+    /* Instead, the caller should have called
+     * _Py_ext_module_loader_result_apply_error(). */
+    assert(res->err == NULL);
+    *res = (struct _Py_ext_module_loader_result){0};
+}
+
+static void
+_Py_ext_module_loader_result_set_error(
+                            struct _Py_ext_module_loader_result *res,
+                            enum _Py_ext_module_loader_result_error_kind kind)
+{
+#ifndef NDEBUG
+    switch (kind) {
+    case _Py_ext_module_loader_result_EXCEPTION:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC:
+        assert(PyErr_Occurred());
+        break;
+    case _Py_ext_module_loader_result_ERR_MISSING:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_UNINITIALIZED:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_NOT_MODULE:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_MISSING_DEF:
+        assert(!PyErr_Occurred());
+        break;
+    default:
+        /* We added a new error kind but forgot to add it to this switch. */
+        assert(0);
+    }
+#endif
+
+    assert(res->err == NULL && res->_err.exc == NULL);
+    res->err = &res->_err;
+    *res->err = (struct _Py_ext_module_loader_result_error){
+        .kind=kind,
+        .exc=PyErr_GetRaisedException(),
+    };
+
+    /* For some kinds, we also set/check res->kind. */
+    switch (kind) {
+    case _Py_ext_module_loader_result_ERR_UNINITIALIZED:
+        assert(res->kind == _Py_ext_module_kind_UNKNOWN);
+        res->kind = _Py_ext_module_kind_INVALID;
+        break;
+    /* None of the rest affect the result kind. */
+    case _Py_ext_module_loader_result_EXCEPTION:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_MISSING:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_NOT_MODULE:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_MISSING_DEF:
+        break;
+    default:
+        /* We added a new error kind but forgot to add it to this switch. */
+        assert(0);
+    }
+}
+
+void
+_Py_ext_module_loader_result_apply_error(
+                            struct _Py_ext_module_loader_result *res,
+                            const char *name)
+{
+    assert(!PyErr_Occurred());
+    assert(res->err != NULL && res->err == &res->_err);
+    struct _Py_ext_module_loader_result_error err = *res->err;
+    res->err = NULL;
+
+    /* We're otherwise done with the result at this point. */
+    _Py_ext_module_loader_result_clear(res);
+
+#ifndef NDEBUG
+    switch (err.kind) {
+    case _Py_ext_module_loader_result_EXCEPTION:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC:
+        assert(err.exc != NULL);
+        break;
+    case _Py_ext_module_loader_result_ERR_MISSING:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_UNINITIALIZED:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_NOT_MODULE:  /* fall through */
+    case _Py_ext_module_loader_result_ERR_MISSING_DEF:
+        assert(err.exc == NULL);
+        break;
+    default:
+        /* We added a new error kind but forgot to add it to this switch. */
+        assert(0);
+    }
+#endif
+
+    const char *msg = NULL;
+    switch (err.kind) {
+    case _Py_ext_module_loader_result_EXCEPTION:
+        break;
+    case _Py_ext_module_loader_result_ERR_MISSING:
+        msg = "initialization of %s failed without raising an exception";
+        break;
+    case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC:
+        msg = "initialization of %s raised unreported exception";
+        break;
+    case _Py_ext_module_loader_result_ERR_UNINITIALIZED:
+        msg = "init function of %s returned uninitialized object";
+        break;
+    case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE:
+        msg = "initialization of %s did not return PyModuleDef";
+        break;
+    case _Py_ext_module_loader_result_ERR_NOT_MODULE:
+        msg = "initialization of %s did not return an extension module";
+        break;
+    case _Py_ext_module_loader_result_ERR_MISSING_DEF:
+        msg = "initialization of %s did not return a valid extension module";
+        break;
+    default:
+        /* We added a new error kind but forgot to add it to this switch. */
+        assert(0);
+        PyErr_Format(PyExc_SystemError,
+                     "loading %s failed due to init function", name);
+        return;
+    }
+
+    if (err.exc != NULL) {
+        PyErr_SetRaisedException(err.exc);
+        err.exc = NULL;  /* PyErr_SetRaisedException() stole our reference. */
+        if (msg != NULL) {
+            _PyErr_FormatFromCause(PyExc_SystemError, msg, name);
+        }
+    }
+    else {
+        assert(msg != NULL);
+        PyErr_Format(PyExc_SystemError, msg, name);
+    }
+}
+
+
+/********************************************/
+/* getting/running the module init function */
+/********************************************/
+
 PyModInitFunction
 _PyImport_GetModInitFunc(struct _Py_ext_module_loader_info *info,
                          FILE *fp)
@@ -245,8 +394,9 @@ _PyImport_RunModInitFunc(PyModInitFunction p0,
                          struct _Py_ext_module_loader_info *info,
                          struct _Py_ext_module_loader_result *p_res)
 {
-    struct _Py_ext_module_loader_result res = {0};
-    const char *name_buf = PyBytes_AS_STRING(info->name_encoded);
+    struct _Py_ext_module_loader_result res = {
+        .kind=_Py_ext_module_kind_UNKNOWN,
+    };
 
     /* Call the module init function. */
 
@@ -258,18 +408,18 @@ _PyImport_RunModInitFunc(PyModInitFunction p0,
     /* Validate the result (and populate "res". */
 
     if (m == NULL) {
-        if (!PyErr_Occurred()) {
-            PyErr_Format(
-                PyExc_SystemError,
-                "initialization of %s failed without raising an exception",
-                name_buf);
+        if (PyErr_Occurred()) {
+            _Py_ext_module_loader_result_set_error(
+                        &res, _Py_ext_module_loader_result_EXCEPTION);
+        }
+        else {
+            _Py_ext_module_loader_result_set_error(
+                        &res, _Py_ext_module_loader_result_ERR_MISSING);
         }
         goto error;
     } else if (PyErr_Occurred()) {
-        _PyErr_FormatFromCause(
-            PyExc_SystemError,
-            "initialization of %s raised unreported exception",
-            name_buf);
+        _Py_ext_module_loader_result_set_error(
+                &res, _Py_ext_module_loader_result_ERR_UNREPORTED_EXC);
         /* We would probably be correct to decref m here,
          * but we weren't doing so before,
          * so we stick with doing nothing. */
@@ -281,9 +431,8 @@ _PyImport_RunModInitFunc(PyModInitFunction p0,
         /* This can happen when a PyModuleDef is returned without calling
          * PyModuleDef_Init on it
          */
-        PyErr_Format(PyExc_SystemError,
-                     "init function of %s returned uninitialized object",
-                     name_buf);
+        _Py_ext_module_loader_result_set_error(
+                &res, _Py_ext_module_loader_result_ERR_UNINITIALIZED);
         /* Likewise, decref'ing here makes sense.  However, the original
          * code has a note about "prevent segfault in DECREF",
          * so we play it safe and leave it alone. */
@@ -293,48 +442,50 @@ _PyImport_RunModInitFunc(PyModInitFunction p0,
 
     if (PyObject_TypeCheck(m, &PyModuleDef_Type)) {
         /* multi-phase init */
+        res.kind = _Py_ext_module_kind_MULTIPHASE;
         res.def = (PyModuleDef *)m;
         /* Run PyModule_FromDefAndSpec() to finish loading the module. */
     }
     else if (info->hook_prefix == nonascii_prefix) {
-        /* It should have been multi-phase init? */
+        /* Non-ASCII is only supported for multi-phase init. */
+        res.kind = _Py_ext_module_kind_MULTIPHASE;
         /* Don't allow legacy init for non-ASCII module names. */
-        PyErr_Format(
-            PyExc_SystemError,
-            "initialization of %s did not return PyModuleDef",
-            name_buf);
-        Py_DECREF(m);
-        return -1;
+        _Py_ext_module_loader_result_set_error(
+                &res, _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE);
+        goto error;
     }
     else {
         /* single-phase init (legacy) */
+        res.kind = _Py_ext_module_kind_SINGLEPHASE;
         res.module = m;
 
         if (!PyModule_Check(m)) {
-            PyErr_Format(PyExc_SystemError,
-                         "initialization of %s did not return an extension "
-                         "module", name_buf);
+            _Py_ext_module_loader_result_set_error(
+                    &res, _Py_ext_module_loader_result_ERR_NOT_MODULE);
             goto error;
         }
 
         res.def = _PyModule_GetDef(m);
         if (res.def == NULL) {
-            PyErr_Format(PyExc_SystemError,
-                         "initialization of %s did not return a valid extension "
-                         "module", name_buf);
+            PyErr_Clear();
+            _Py_ext_module_loader_result_set_error(
+                    &res, _Py_ext_module_loader_result_ERR_MISSING_DEF);
             goto error;
         }
     }
 
     assert(!PyErr_Occurred());
+    assert(res.err == NULL);
     *p_res = res;
     return 0;
 
 error:
-    assert(PyErr_Occurred());
+    assert(!PyErr_Occurred());
+    assert(res.err != NULL);
     Py_CLEAR(res.module);
     res.def = NULL;
     *p_res = res;
+    p_res->err = &p_res->_err;
     return -1;
 }