]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-109587: Allow "precompiled" perf-trampolines to largely mitigate the cost of enabl...
authorgsallam <123525874+gsallam@users.noreply.github.com>
Fri, 27 Oct 2023 03:57:29 +0000 (20:57 -0700)
committerGitHub <noreply@github.com>
Fri, 27 Oct 2023 03:57:29 +0000 (03:57 +0000)
Include/cpython/sysmodule.h
Include/internal/pycore_ceval_state.h
Include/sysmodule.h
Lib/test/test_perf_profiler.py
Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst [new file with mode: 0644]
Modules/_testinternalcapi.c
Python/perf_trampoline.c
Python/sysmodule.c

index df12ae440f024be10afdcb62ae300bce9077d856..9fd7cc0cb43931150e197a1929e6f3591854031b 100644 (file)
@@ -21,3 +21,6 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
     unsigned int code_size,
     const char *entry_name);
 PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
+PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename);
+PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *);
+PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable);
index 1717ec4f41c36bba5014acfd56cabcc2570ae473..072bbcda0c3c828ebef0bc301acc3381a8b9e104 100644 (file)
@@ -55,6 +55,7 @@ struct _ceval_runtime_state {
         struct code_arena_st *code_arena;
         struct trampoline_api_st trampoline_api;
         FILE *map_file;
+        Py_ssize_t persist_after_fork;
 #else
         int _not_used;
 #endif
@@ -68,6 +69,7 @@ struct _ceval_runtime_state {
     { \
         .status = PERF_STATUS_NO_INIT, \
         .extra_code_index = -1, \
+        .persist_after_fork = 0, \
     }
 #else
 # define _PyEval_RUNTIME_PERF_INIT {0}
index 7406513ec1439ab3912bd71946711bde7d6857c7..7b14f72ee2e494576e1f8bed96285e39d0f28ab0 100644 (file)
@@ -1,6 +1,3 @@
-
-/* System module interface */
-
 #ifndef Py_SYSMODULE_H
 #define Py_SYSMODULE_H
 #ifdef __cplusplus
index fe8707a156e9dc1df6bc59be4f1ea4b822ddbe66..040be63da11447729f05c2141e9d284b3c3da526 100644 (file)
@@ -353,6 +353,82 @@ class TestPerfProfiler(unittest.TestCase):
             self.assertNotIn(f"py::bar:{script}", stdout)
             self.assertNotIn(f"py::baz:{script}", stdout)
 
+    def test_pre_fork_compile(self):
+        code = """if 1:
+                import sys
+                import os
+                import sysconfig
+                from _testinternalcapi import (
+                    compile_perf_trampoline_entry,
+                    perf_trampoline_set_persist_after_fork,
+                )
+
+                def foo_fork():
+                    pass
+
+                def bar_fork():
+                    foo_fork()
+
+                def foo():
+                    pass
+
+                def bar():
+                    foo()
+
+                def compile_trampolines_for_all_functions():
+                    perf_trampoline_set_persist_after_fork(1)
+                    for _, obj in globals().items():
+                        if callable(obj) and hasattr(obj, '__code__'):
+                            compile_perf_trampoline_entry(obj.__code__)
+
+                if __name__ == "__main__":
+                    compile_trampolines_for_all_functions()
+                    pid = os.fork()
+                    if pid == 0:
+                        print(os.getpid())
+                        bar_fork()
+                    else:
+                        bar()
+                """
+
+        with temp_dir() as script_dir:
+            script = make_script(script_dir, "perftest", code)
+            with subprocess.Popen(
+                [sys.executable, "-Xperf", script],
+                universal_newlines=True,
+                stderr=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+            ) as process:
+                stdout, stderr = process.communicate()
+
+        self.assertEqual(process.returncode, 0)
+        self.assertNotIn("Error:", stderr)
+        child_pid = int(stdout.strip())
+        perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map")
+        perf_child_file = pathlib.Path(f"/tmp/perf-{child_pid}.map")
+        self.assertTrue(perf_file.exists())
+        self.assertTrue(perf_child_file.exists())
+
+        perf_file_contents = perf_file.read_text()
+        self.assertIn(f"py::foo:{script}", perf_file_contents)
+        self.assertIn(f"py::bar:{script}", perf_file_contents)
+        self.assertIn(f"py::foo_fork:{script}", perf_file_contents)
+        self.assertIn(f"py::bar_fork:{script}", perf_file_contents)
+
+        child_perf_file_contents = perf_child_file.read_text()
+        self.assertIn(f"py::foo_fork:{script}", child_perf_file_contents)
+        self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents)
+
+        # Pre-compiled perf-map entries of a forked process must be
+        # identical in both the parent and child perf-map files.
+        perf_file_lines = perf_file_contents.split("\n")
+        for line in perf_file_lines:
+            if (
+                f"py::foo_fork:{script}" in line
+                or f"py::bar_fork:{script}" in line
+            ):
+                self.assertIn(line, child_perf_file_contents)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst b/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst
new file mode 100644 (file)
index 0000000..c6fa24f
--- /dev/null
@@ -0,0 +1,2 @@
+Introduced :c:func:`PyUnstable_PerfTrampoline_CompileCode`, :c:func:`PyUnstable_PerfTrampoline_SetPersistAfterFork` and\r
+:c:func:`PyUnstable_CopyPerfMapFile`. These functions allow extension modules to initialize trampolines eagerly, after the application is "warmed up". This makes it possible to have perf-trampolines running in an always-enabled fashion.
index 4ead1b6bea7fae3f424f8dfa84c94b0428e246fd..1869f48c2b1fbfedea72f63ddca404c9dab1fcbf 100644 (file)
@@ -1556,6 +1556,36 @@ _testinternalcapi_test_long_numbits_impl(PyObject *module)
     Py_RETURN_NONE;
 }
 
+static PyObject *
+compile_perf_trampoline_entry(PyObject *self, PyObject *args)
+{
+    PyObject *co;
+    if (!PyArg_ParseTuple(args, "O!", &PyCode_Type, &co)) {
+        return NULL;
+    }
+    int ret = PyUnstable_PerfTrampoline_CompileCode((PyCodeObject *)co);
+    if (ret != 0) {
+        PyErr_SetString(PyExc_AssertionError, "Failed to compile trampoline");
+        return NULL;
+    }
+    return PyLong_FromLong(ret);
+}
+
+static PyObject *
+perf_trampoline_set_persist_after_fork(PyObject *self, PyObject *args)
+{
+    int enable;
+    if (!PyArg_ParseTuple(args, "i", &enable)) {
+        return NULL;
+    }
+    int ret = PyUnstable_PerfTrampoline_SetPersistAfterFork(enable);
+    if (ret == 0) {
+        PyErr_SetString(PyExc_AssertionError, "Failed to set persist_after_fork");
+        return NULL;
+    }
+    return PyLong_FromLong(ret);
+}
+
 
 static PyMethodDef module_functions[] = {
     {"get_configs", get_configs, METH_NOARGS},
@@ -1613,6 +1643,8 @@ static PyMethodDef module_functions[] = {
     {"run_in_subinterp_with_config",
      _PyCFunction_CAST(run_in_subinterp_with_config),
      METH_VARARGS | METH_KEYWORDS},
+    {"compile_perf_trampoline_entry", compile_perf_trampoline_entry, METH_VARARGS},
+    {"perf_trampoline_set_persist_after_fork", perf_trampoline_set_persist_after_fork, METH_VARARGS},
     _TESTINTERNALCAPI_WRITE_UNRAISABLE_EXC_METHODDEF
     _TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF
     {NULL, NULL} /* sentinel */
index 209a23b6c1cbc7b1ec0b3efeeefd05623f2c3b54..491223924ed7f2f8c9a8b5995701179a60f2c1d2 100644 (file)
@@ -193,7 +193,7 @@ typedef struct trampoline_api_st trampoline_api_t;
 #define perf_code_arena _PyRuntime.ceval.perf.code_arena
 #define trampoline_api _PyRuntime.ceval.perf.trampoline_api
 #define perf_map_file _PyRuntime.ceval.perf.map_file
-
+#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork
 
 static void
 perf_map_write_entry(void *state, const void *code_addr,
@@ -361,6 +361,26 @@ default_eval:
 }
 #endif  // PY_HAVE_PERF_TRAMPOLINE
 
+int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co)
+{
+#ifdef PY_HAVE_PERF_TRAMPOLINE
+    py_trampoline f = NULL;
+    assert(extra_code_index != -1);
+    int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
+    if (ret != 0 || f == NULL) {
+        py_trampoline new_trampoline = compile_trampoline();
+        if (new_trampoline == NULL) {
+            return 0;
+        }
+        trampoline_api.write_state(trampoline_api.state, new_trampoline,
+                                   perf_code_arena->code_size, co);
+        return _PyCode_SetExtra((PyObject *)co, extra_code_index,
+                         (void *)new_trampoline);
+    }
+#endif // PY_HAVE_PERF_TRAMPOLINE
+    return 0;
+}
+
 int
 _PyIsPerfTrampolineActive(void)
 {
@@ -448,16 +468,34 @@ _PyPerfTrampoline_Fini(void)
     return 0;
 }
 
+int
+PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){
+#ifdef PY_HAVE_PERF_TRAMPOLINE
+    persist_after_fork = enable;
+    return persist_after_fork;
+#endif
+    return 0;
+}
+
 PyStatus
 _PyPerfTrampoline_AfterFork_Child(void)
 {
 #ifdef PY_HAVE_PERF_TRAMPOLINE
-    // Restart trampoline in file in child.
-    int was_active = _PyIsPerfTrampolineActive();
-    _PyPerfTrampoline_Fini();
     PyUnstable_PerfMapState_Fini();
-    if (was_active) {
-        _PyPerfTrampoline_Init(1);
+    if (persist_after_fork) {
+        char filename[256];
+        pid_t parent_pid = getppid();
+        snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid);
+        if (PyUnstable_CopyPerfMapFile(filename) != 0) {
+            return PyStatus_Error("Failed to copy perf map file.");
+        }
+    } else {
+        // Restart trampoline in file in child.
+        int was_active = _PyIsPerfTrampolineActive();
+        _PyPerfTrampoline_Fini();
+        if (was_active) {
+            _PyPerfTrampoline_Init(1);
+        }
     }
 #endif
     return PyStatus_Ok();
index 3debe7f7c139c610f6f46cc574cfc13f25410e36..4008a28ad7bd8a954044563300c39a06390142b5 100644 (file)
@@ -2361,7 +2361,7 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
 #ifndef MS_WINDOWS
     if (perf_map_state.perf_map == NULL) {
         int ret = PyUnstable_PerfMapState_Init();
-        if(ret != 0){
+        if (ret != 0){
             return ret;
         }
     }
@@ -2388,6 +2388,45 @@ PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) {
 #endif
 }
 
+PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename) {
+#ifndef MS_WINDOWS
+    FILE* from = fopen(parent_filename, "r");
+    if (!from) {
+        return -1;
+    }
+    if (perf_map_state.perf_map == NULL) {
+        int ret = PyUnstable_PerfMapState_Init();
+        if (ret != 0) {
+            return ret;
+        }
+    }
+    char buf[4096];
+    PyThread_acquire_lock(perf_map_state.map_lock, 1);
+    int fflush_result = 0, result = 0;
+    while (1) {
+        size_t bytes_read = fread(buf, 1, sizeof(buf), from);
+        size_t bytes_written = fwrite(buf, 1, bytes_read, perf_map_state.perf_map);
+        fflush_result = fflush(perf_map_state.perf_map);
+        if (fflush_result != 0 || bytes_read == 0 || bytes_written < bytes_read) {
+            result = -1;
+            goto close_and_release;
+        }
+        if (bytes_read < sizeof(buf) && feof(from)) {
+            goto close_and_release;
+        }
+    }
+close_and_release:
+    fclose(from);
+    PyThread_release_lock(perf_map_state.map_lock);
+    return result;
+#endif
+    return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
 
 static PyMethodDef sys_methods[] = {
     /* Might as well keep this in alphabetic order */