]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-131253: free-threaded build support for pystats (gh-137189)
authorNeil Schemenauer <nas-github@arctrix.com>
Mon, 3 Nov 2025 19:36:37 +0000 (11:36 -0800)
committerGitHub <noreply@github.com>
Mon, 3 Nov 2025 19:36:37 +0000 (11:36 -0800)
Allow the --enable-pystats build option to be used with free-threading.  The
stats are now stored on a per-interpreter basis, rather than process global.
For free-threaded builds, the stats structure is allocated per-thread and
then periodically merged into the per-interpreter stats structure (on thread
exit or when the reporting function is called). Most of the pystats related
code has be moved into the file Python/pystats.c.

24 files changed:
Include/cpython/pystate.h
Include/cpython/pystats.h
Include/internal/pycore_interp_structs.h
Include/internal/pycore_pystats.h
Include/internal/pycore_stats.h
Include/internal/pycore_tstate.h
Lib/test/test_pystats.py [new file with mode: 0644]
Makefile.pre.in
Misc/NEWS.d/next/Core_and_Builtins/2025-07-29-17-51-14.gh-issue-131253.GpRjWy.rst [new file with mode: 0644]
Modules/_xxtestfuzz/fuzzer.c
PCbuild/_freeze_module.vcxproj
PCbuild/_freeze_module.vcxproj.filters
PCbuild/pythoncore.vcxproj
Python/ceval_macros.h
Python/gc.c
Python/gc_free_threading.c
Python/initconfig.c
Python/lock.c
Python/pylifecycle.c
Python/pystate.c
Python/pystats.c [new file with mode: 0644]
Python/qsbr.c
Python/specialize.c
Python/sysmodule.c

index ac8798ff6129a0dbaa51378a823b12d2c468ba0d..dd2ea1202b379537901224cc71a89e96bcbc09b2 100644 (file)
@@ -217,6 +217,15 @@ struct _ts {
     */
     PyObject *threading_local_sentinel;
     _PyRemoteDebuggerSupport remote_debugger_support;
+
+#ifdef Py_STATS
+    // Pointer to PyStats structure, NULL if recording is off.  For the
+    // free-threaded build, the structure is per-thread (stored as a pointer
+    // in _PyThreadStateImpl).  For the default build, the structure is stored
+    // in the PyInterpreterState structure (threads do not have their own
+    // structure and all share the same per-interpreter structure).
+    PyStats *pystats;
+#endif
 };
 
 /* other API */
@@ -239,6 +248,21 @@ PyAPI_FUNC(void) PyThreadState_EnterTracing(PyThreadState *tstate);
 // function is set, otherwise disable them.
 PyAPI_FUNC(void) PyThreadState_LeaveTracing(PyThreadState *tstate);
 
+#ifdef Py_STATS
+#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
+extern _Py_thread_local PyThreadState *_Py_tss_tstate;
+
+static inline PyStats*
+_PyThreadState_GetStatsFast(void)
+{
+    if (_Py_tss_tstate == NULL) {
+        return NULL; // no attached thread state
+    }
+    return _Py_tss_tstate->pystats;
+}
+#endif
+#endif // Py_STATS
+
 /* PyGILState */
 
 /* Helper/diagnostic function - return 1 if the current thread
index cf830b6066f4abac07e395fb8a0c67f64ebbcf98..d0a925a305548535268af411472240a517e99204 100644 (file)
@@ -4,7 +4,7 @@
 //
 // - _Py_INCREF_STAT_INC() and _Py_DECREF_STAT_INC() used by Py_INCREF()
 //   and Py_DECREF().
-// - _Py_stats variable
+// - _PyStats_GET()
 //
 // Functions of the sys module:
 //
@@ -14,7 +14,7 @@
 // - sys._stats_dump()
 //
 // Python must be built with ./configure --enable-pystats to define the
-// Py_STATS macro.
+// _PyStats_GET() macro.
 //
 // Define _PY_INTERPRETER macro to increment interpreter_increfs and
 // interpreter_decrefs. Otherwise, increment increfs and decrefs.
@@ -109,6 +109,18 @@ typedef struct _gc_stats {
     uint64_t objects_not_transitively_reachable;
 } GCStats;
 
+#ifdef Py_GIL_DISABLED
+// stats specific to free-threaded build
+typedef struct _ft_stats {
+    // number of times interpreter had to spin or park when trying to acquire a mutex
+    uint64_t mutex_sleeps;
+    // number of times that the QSBR mechanism polled (compute read sequence value)
+    uint64_t qsbr_polls;
+    // number of times stop-the-world mechanism was used
+    uint64_t world_stops;
+} FTStats;
+#endif
+
 typedef struct _uop_stats {
     uint64_t execution_count;
     uint64_t miss;
@@ -173,22 +185,48 @@ typedef struct _stats {
     CallStats call_stats;
     ObjectStats object_stats;
     OptimizationStats optimization_stats;
+#ifdef Py_GIL_DISABLED
+    FTStats ft_stats;
+#endif
     RareEventStats rare_event_stats;
-    GCStats *gc_stats;
+    GCStats gc_stats[3]; // must match NUM_GENERATIONS
 } PyStats;
 
+// Export for most shared extensions
+PyAPI_FUNC(PyStats *) _PyStats_GetLocal(void);
+
+#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
+// use inline function version defined in cpython/pystate.h
+static inline PyStats *_PyThreadState_GetStatsFast(void);
+#define _PyStats_GET _PyThreadState_GetStatsFast
+#else
+#define _PyStats_GET _PyStats_GetLocal
+#endif
 
-// Export for shared extensions like 'math'
-PyAPI_DATA(PyStats*) _Py_stats;
+#define _Py_STATS_EXPR(expr) \
+    do { \
+        PyStats *s = _PyStats_GET(); \
+        if (s != NULL) { \
+            s->expr; \
+        } \
+    } while (0)
+
+#define _Py_STATS_COND_EXPR(cond, expr) \
+    do { \
+        PyStats *s = _PyStats_GET(); \
+        if (s != NULL && (cond)) { \
+            s->expr; \
+        } \
+    } while (0)
 
 #ifdef _PY_INTERPRETER
-#  define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_increfs++; } while (0)
-#  define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_decrefs++; } while (0)
-#  define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_increfs++; } while (0)
-#  define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_decrefs++; } while (0)
+#  define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_increfs++)
+#  define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_decrefs++)
+#  define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_increfs++)
+#  define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_decrefs++)
 #else
-#  define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.increfs++; } while (0)
-#  define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.decrefs++; } while (0)
-#  define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_increfs++; } while (0)
-#  define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_decrefs++; } while (0)
+#  define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.increfs++)
+#  define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.decrefs++)
+#  define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_increfs++)
+#  define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_decrefs++)
 #endif
index 9cdaa950e3479a8bf62f0632ccc2186616cb2485..e8cbe9d894e1c76afe2e6029231ee96667c65485 100644 (file)
@@ -199,7 +199,7 @@ enum _GCPhase {
 };
 
 /* If we change this, we need to change the default value in the
-   signature of gc.collect. */
+   signature of gc.collect and change the size of PyStats.gc_stats */
 #define NUM_GENERATIONS 3
 
 struct _gc_runtime_state {
@@ -963,6 +963,18 @@ struct _is {
 #  ifdef Py_STACKREF_CLOSE_DEBUG
     _Py_hashtable_t *closed_stackrefs_table;
 #  endif
+#endif
+
+#ifdef Py_STATS
+    // true if recording of pystats is on, this is used when new threads
+    // are created to decide if recording should be on for them
+    int pystats_enabled;
+    // allocated when (and if) stats are first enabled
+    PyStats *pystats_struct;
+#ifdef Py_GIL_DISABLED
+    // held when pystats related interpreter state is being updated
+    PyMutex pystats_mutex;
+#endif
 #endif
 
     /* the initial PyInterpreterState.threads.head */
index f8af398a56058619c35842f2a4f5c6f9a9dada97..50ab21aa0f1902cfc3dc528d432033c71485c8bb 100644 (file)
@@ -9,7 +9,7 @@ extern "C" {
 #endif
 
 #ifdef Py_STATS
-extern void _Py_StatsOn(void);
+extern int _Py_StatsOn(void);
 extern void _Py_StatsOff(void);
 extern void _Py_StatsClear(void);
 extern int _Py_PrintSpecializationStats(int to_file);
index 24f239a2135b93dc3b4ee60d6661bc1d029170eb..850e6ea455227c0345d239f0e6112879dd854dd8 100644 (file)
@@ -15,39 +15,56 @@ extern "C" {
 
 #include "pycore_bitutils.h"  // _Py_bit_length
 
-#define STAT_INC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name++; } while (0)
-#define STAT_DEC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name--; } while (0)
-#define OPCODE_EXE_INC(opname) do { if (_Py_stats) _Py_stats->opcode_stats[opname].execution_count++; } while (0)
-#define CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.name++; } while (0)
-#define OBJECT_STAT_INC(name) do { if (_Py_stats) _Py_stats->object_stats.name++; } while (0)
-#define OBJECT_STAT_INC_COND(name, cond) \
-    do { if (_Py_stats && cond) _Py_stats->object_stats.name++; } while (0)
-#define EVAL_CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.eval_calls[name]++; } while (0)
-#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
-    do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
-#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
-#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
-#define OPT_STAT_ADD(name, n) do { if (_Py_stats) _Py_stats->optimization_stats.name += (n); } while (0)
-#define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0)
-#define UOP_PAIR_INC(uopcode, lastuop)                                              \
-    do {                                                                            \
-        if (lastuop && _Py_stats) {                                                 \
-            _Py_stats->optimization_stats.opcode[lastuop].pair_count[uopcode]++;    \
-        }                                                                           \
-        lastuop = uopcode;                                                          \
+#define STAT_INC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name++)
+#define STAT_DEC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name--)
+#define OPCODE_EXE_INC(opname) _Py_STATS_EXPR(opcode_stats[opname].execution_count++)
+#define CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.name++)
+#define OBJECT_STAT_INC(name) _Py_STATS_EXPR(object_stats.name++)
+#define OBJECT_STAT_INC_COND(name, cond) _Py_STATS_COND_EXPR(cond, object_stats.name++)
+#define EVAL_CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.eval_calls[name]++)
+#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) _Py_STATS_COND_EXPR(PyFunction_Check(callable), call_stats.eval_calls[name]++)
+#define GC_STAT_ADD(gen, name, n) _Py_STATS_EXPR(gc_stats[(gen)].name += (n))
+#define OPT_STAT_INC(name) _Py_STATS_EXPR(optimization_stats.name++)
+#define OPT_STAT_ADD(name, n) _Py_STATS_EXPR(optimization_stats.name += (n))
+#define UOP_STAT_INC(opname, name) \
+    do { \
+        PyStats *s = _PyStats_GET(); \
+        if (s) { \
+            assert(opname < 512); \
+            s->optimization_stats.opcode[opname].name++; \
+        } \
+    } while (0)
+#define UOP_PAIR_INC(uopcode, lastuop) \
+    do { \
+        PyStats *s = _PyStats_GET(); \
+        if (lastuop && s) { \
+            s->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \
+        } \
+        lastuop = uopcode; \
     } while (0)
-#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
-#define OPT_ERROR_IN_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.error_in_opcode[opname]++; } while (0)
+#define OPT_UNSUPPORTED_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.unsupported_opcode[opname]++)
+#define OPT_ERROR_IN_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.error_in_opcode[opname]++)
 #define OPT_HIST(length, name) \
     do { \
-        if (_Py_stats) { \
+        PyStats *s = _PyStats_GET(); \
+        if (s) { \
             int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
             bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
-            _Py_stats->optimization_stats.name[bucket]++; \
+            s->optimization_stats.name[bucket]++; \
         } \
     } while (0)
-#define RARE_EVENT_STAT_INC(name) do { if (_Py_stats) _Py_stats->rare_event_stats.name++; } while (0)
-#define OPCODE_DEFERRED_INC(opname) do { if (_Py_stats && opcode == opname) _Py_stats->opcode_stats[opname].specialization.deferred++; } while (0)
+#define RARE_EVENT_STAT_INC(name) _Py_STATS_EXPR(rare_event_stats.name++)
+#define OPCODE_DEFERRED_INC(opname) _Py_STATS_COND_EXPR(opcode==opname, opcode_stats[opname].specialization.deferred++)
+
+#ifdef Py_GIL_DISABLED
+#define FT_STAT_MUTEX_SLEEP_INC() _Py_STATS_EXPR(ft_stats.mutex_sleeps++)
+#define FT_STAT_QSBR_POLL_INC() _Py_STATS_EXPR(ft_stats.qsbr_polls++)
+#define FT_STAT_WORLD_STOP_INC() _Py_STATS_EXPR(ft_stats.world_stops++)
+#else
+#define FT_STAT_MUTEX_SLEEP_INC()
+#define FT_STAT_QSBR_POLL_INC()
+#define FT_STAT_WORLD_STOP_INC()
+#endif
 
 // Export for '_opcode' shared extension
 PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
@@ -71,6 +88,9 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
 #define OPT_HIST(length, name) ((void)0)
 #define RARE_EVENT_STAT_INC(name) ((void)0)
 #define OPCODE_DEFERRED_INC(opname) ((void)0)
+#define FT_STAT_MUTEX_SLEEP_INC()
+#define FT_STAT_QSBR_POLL_INC()
+#define FT_STAT_WORLD_STOP_INC()
 #endif  // !Py_STATS
 
 
@@ -90,6 +110,11 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
         RARE_EVENT_INTERP_INC(interp, name); \
     } while (0); \
 
+PyStatus _PyStats_InterpInit(PyInterpreterState *);
+bool _PyStats_ThreadInit(PyInterpreterState *, _PyThreadStateImpl *);
+void _PyStats_ThreadFini(_PyThreadStateImpl *);
+void _PyStats_Attach(_PyThreadStateImpl *);
+void _PyStats_Detach(_PyThreadStateImpl *);
 
 #ifdef __cplusplus
 }
index bad968428c73a1aac3910464d8a9896d6f0f4c80..29ebdfd7e01613e72ee1161e26249535d738cbf3 100644 (file)
@@ -70,8 +70,14 @@ typedef struct _PyThreadStateImpl {
 
     // When >1, code objects do not immortalize their non-string constants.
     int suppress_co_const_immortalization;
+
+#ifdef Py_STATS
+     // per-thread stats, will be merged into interp->pystats_struct
+     PyStats *pystats_struct; // allocated by _PyStats_ThreadInit()
 #endif
 
+#endif // Py_GIL_DISABLED
+
 #if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
     Py_ssize_t reftotal;  // this thread's total refcount operations
 #endif
diff --git a/Lib/test/test_pystats.py b/Lib/test/test_pystats.py
new file mode 100644 (file)
index 0000000..c50cecf
--- /dev/null
@@ -0,0 +1,215 @@
+import sys
+import textwrap
+import unittest
+from test.support import script_helper
+
+# This function is available for the --enable-pystats config.
+HAVE_PYSTATS = hasattr(sys, '_stats_on')
+
+TEST_TEMPLATE = """
+    import sys
+    import threading
+    import time
+
+    THREADS = 2
+
+    class A:
+        pass
+
+    class B:
+        pass
+
+    def modify_class():
+        # This is used as a rare event we can assume doesn't happen unless we do it.
+        # It increments the "Rare event (set_class)" count.
+        a = A()
+        a.__class__ = B
+
+    TURNED_ON = False
+    def stats_on():
+        global TURNED_ON
+        sys._stats_on()
+        TURNED_ON = True
+
+    TURNED_OFF = False
+    def stats_off():
+        global TURNED_OFF
+        sys._stats_off()
+        TURNED_OFF = True
+
+    CLEARED = False
+    def stats_clear():
+        global CLEARED
+        sys._stats_clear()
+        CLEARED = True
+
+    def func_start():
+        pass
+
+    def func_end():
+        pass
+
+    def func_test(thread_id):
+        pass
+
+    _TEST_CODE_
+
+    func_start()
+    threads = []
+    for i in range(THREADS):
+        t = threading.Thread(target=func_test, args=(i,))
+        threads.append(t)
+        t.start()
+    for t in threads:
+        t.join()
+    func_end()
+    """
+
+
+def run_test_code(
+    test_code,
+    args=[],
+    env_vars=None,
+):
+    """Run test code and return the value of the "set_class" stats counter.
+    """
+    code = textwrap.dedent(TEST_TEMPLATE)
+    code = code.replace('_TEST_CODE_', textwrap.dedent(test_code))
+    script_args = args + ['-c', code]
+    env_vars = env_vars or {}
+    res, _ = script_helper.run_python_until_end(*script_args, **env_vars)
+    stderr = res.err.decode("ascii", "backslashreplace")
+    for line in stderr.split('\n'):
+        if 'Rare event (set_class)' in line:
+            label, _, value = line.partition(':')
+            return value.strip()
+    return ''
+
+
+@unittest.skipUnless(HAVE_PYSTATS, "requires pystats build option")
+class TestPyStats(unittest.TestCase):
+    """Tests for pystats functionality (requires --enable-pystats build
+    option).
+    """
+
+    def test_stats_toggle_on(self):
+        """Check the toggle on functionality.
+        """
+        code = """
+        def func_start():
+            modify_class()
+        """
+
+        # If turned on with command line flag, should get one count.
+        stat_count = run_test_code(code, args=['-X', 'pystats'])
+        self.assertEqual(stat_count, '1')
+
+        # If turned on with env var, should get one count.
+        stat_count = run_test_code(code, env_vars={'PYTHONSTATS': '1'})
+        self.assertEqual(stat_count, '1')
+
+        # If not turned on, should be no counts.
+        stat_count = run_test_code(code)
+        self.assertEqual(stat_count, '')
+
+        code = """
+        def func_start():
+            modify_class()
+            sys._stats_on()
+            modify_class()
+        """
+        # Not initially turned on but enabled by sys._stats_on(), should get
+        # one count.
+        stat_count = run_test_code(code)
+        self.assertEqual(stat_count, '1')
+
+    def test_stats_toggle_on_thread(self):
+        """Check the toggle on functionality when threads are used.
+        """
+        code = """
+        def func_test(thread_id):
+            if thread_id == 0:
+                modify_class()
+                stats_on()
+                modify_class()
+            else:
+                while not TURNED_ON:
+                    pass
+                modify_class()
+        """
+        # Turning on in one thread will count in other thread.
+        stat_count = run_test_code(code)
+        self.assertEqual(stat_count, '2')
+
+        code = """
+        def func_test(thread_id):
+            if thread_id == 0:
+                modify_class()
+                stats_off()
+                modify_class()
+            else:
+                while not TURNED_OFF:
+                    pass
+                modify_class()
+        """
+        # Turning off in one thread will not count in other threads.
+        stat_count = run_test_code(code, args=['-X', 'pystats'])
+        self.assertEqual(stat_count, '1')
+
+    def test_thread_exit_merge(self):
+        """Check that per-thread stats (when free-threading enabled) are merged.
+        """
+        code = """
+        def func_test(thread_id):
+            modify_class()
+            if thread_id == 0:
+                raise SystemExit
+        """
+        # Stats from a thread exiting early should still be counted.
+        stat_count = run_test_code(code, args=['-X', 'pystats'])
+        self.assertEqual(stat_count, '2')
+
+    def test_stats_dump(self):
+        """Check that sys._stats_dump() works.
+        """
+        code = """
+        def func_test(thread_id):
+            if thread_id == 0:
+                stats_on()
+            else:
+                while not TURNED_ON:
+                    pass
+                modify_class()
+                sys._stats_dump()
+                stats_off()
+        """
+        # Stats from a thread exiting early should still be counted.
+        stat_count = run_test_code(code)
+        self.assertEqual(stat_count, '1')
+
+    def test_stats_clear(self):
+        """Check that sys._stats_clear() works.
+        """
+        code = """
+        ready = False
+        def func_test(thread_id):
+            global ready
+            if thread_id == 0:
+                stats_on()
+                modify_class()
+                while not ready:
+                    pass  # wait until other thread has called modify_class()
+                stats_clear()  # clears stats for all threads
+            else:
+                while not TURNED_ON:
+                    pass
+                modify_class()
+                ready = True
+        """
+        # Clearing stats will clear for all threads
+        stat_count = run_test_code(code)
+        self.assertEqual(stat_count, '0')
+
+
+if __name__ == "__main__":
+    unittest.main()
index 656d9dacd962e3c2dbf2711fbe5241be44229d53..dd28ff5d2a3ed1d8cb17f7d0121d2497d6d8605b 100644 (file)
@@ -483,6 +483,7 @@ PYTHON_OBJS=        \
                Python/pylifecycle.o \
                Python/pymath.o \
                Python/pystate.o \
+               Python/pystats.o \
                Python/pythonrun.o \
                Python/pytime.o \
                Python/qsbr.o \
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-29-17-51-14.gh-issue-131253.GpRjWy.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-29-17-51-14.gh-issue-131253.GpRjWy.rst
new file mode 100644 (file)
index 0000000..2826fad
--- /dev/null
@@ -0,0 +1 @@
+Support the ``--enable-pystats`` build option for the free-threaded build.
index a04f1412eefda1d5a9e5394e2574a15692384d9e..0cbe10c79ab4a6c840f051924104f56b4c4096be 100644 (file)
@@ -10,8 +10,8 @@
 
   See the source code for LLVMFuzzerTestOneInput for details. */
 
-#ifndef Py_BUILD_CORE
-#  define Py_BUILD_CORE 1
+#ifndef Py_BUILD_CORE_MODULE
+#  define Py_BUILD_CORE_MODULE 1
 #endif
 
 #include <Python.h>
index e65f201623fbbe871a03b7e91ca0797a656f2fe7..605861ad3fd06c10b791653d3273c5a4005c23a6 100644 (file)
     <ClCompile Include="..\Python\pylifecycle.c" />
     <ClCompile Include="..\Python\pymath.c" />
     <ClCompile Include="..\Python\pystate.c" />
+    <ClCompile Include="..\Python\pystats.c" />
     <ClCompile Include="..\Python\pystrcmp.c" />
     <ClCompile Include="..\Python\pystrhex.c" />
     <ClCompile Include="..\Python\pystrtod.c" />
index a9fb6f2328ad9562964ab20860f87db0652349d8..c67fe53363ee847daf686cd36167714e864fbced 100644 (file)
     <ClCompile Include="..\Python\pystate.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\Python\pystats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\Python\pystrcmp.c">
       <Filter>Source Files</Filter>
     </ClCompile>
index a50ffb120bc013c3857192c6d1513a1b439edefe..359a47fbfc4fe2ed902636fffc9a8bbd3c2ed299 100644 (file)
     <ClCompile Include="..\Python\pymath.c" />
     <ClCompile Include="..\Python\pytime.c" />
     <ClCompile Include="..\Python\pystate.c" />
+    <ClCompile Include="..\Python\pystats.c" />
     <ClCompile Include="..\Python\pystrcmp.c" />
     <ClCompile Include="..\Python\pystrhex.c" />
     <ClCompile Include="..\Python\pystrtod.c" />
index 868ab6f755874f3c2e983e4a045186c7c2783bb9..afdcbc563b2c601da1d2b510790a48056dc31e5b 100644 (file)
@@ -62,8 +62,9 @@
 #ifdef Py_STATS
 #define INSTRUCTION_STATS(op) \
     do { \
+        PyStats *s = _PyStats_GET(); \
         OPCODE_EXE_INC(op); \
-        if (_Py_stats) _Py_stats->opcode_stats[lastopcode].pair_count[op]++; \
+        if (s) s->opcode_stats[lastopcode].pair_count[op]++; \
         lastopcode = op; \
     } while (0)
 #else
index a1f3d86d91036b5f178cf2d6af0bdc24a087e480..03a5d7366ea6c964879ec1f6cc2c2625a227db07 100644 (file)
@@ -2111,10 +2111,11 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
     _PyErr_SetRaisedException(tstate, exc);
     GC_STAT_ADD(generation, objects_collected, stats.collected);
 #ifdef Py_STATS
-    if (_Py_stats) {
+    PyStats *s = _PyStats_GET();
+    if (s) {
         GC_STAT_ADD(generation, object_visits,
-            _Py_stats->object_stats.object_visits);
-        _Py_stats->object_stats.object_visits = 0;
+            s->object_stats.object_visits);
+        s->object_stats.object_visits = 0;
     }
 #endif
     validate_spaces(gcstate);
index 842aa3401548c92486314c15a6016c48cf4e298b..f39793c3eeb532a67762e6d40f5ea8223c64bd35 100644 (file)
@@ -2362,8 +2362,9 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
     assert(generation >= 0 && generation < NUM_GENERATIONS);
 
 #ifdef Py_STATS
-    if (_Py_stats) {
-        _Py_stats->object_stats.object_visits = 0;
+    PyStats *s = _PyStats_GET();
+    if (s) {
+        s->object_stats.object_visits = 0;
     }
 #endif
     GC_STAT_ADD(generation, collections, 1);
@@ -2426,10 +2427,13 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
 
     GC_STAT_ADD(generation, objects_collected, m);
 #ifdef Py_STATS
-    if (_Py_stats) {
-        GC_STAT_ADD(generation, object_visits,
-            _Py_stats->object_stats.object_visits);
-        _Py_stats->object_stats.object_visits = 0;
+    {
+        PyStats *s = _PyStats_GET();
+        if (s) {
+            GC_STAT_ADD(generation, object_visits,
+                s->object_stats.object_visits);
+            s->object_stats.object_visits = 0;
+        }
     }
 #endif
 
index 5dc68eb4ec2cca90633fdaff1a52081d87295685..7176670c110d692eeb08bc7e769de296ce917899 100644 (file)
@@ -2810,12 +2810,6 @@ _PyConfig_Write(const PyConfig *config, _PyRuntimeState *runtime)
         return _PyStatus_NO_MEMORY();
     }
 
-#ifdef Py_STATS
-    if (config->_pystats) {
-        _Py_StatsOn();
-    }
-#endif
-
     return _PyStatus_OK();
 }
 
index 98f3f89c201fef220c370483417c336a154cfb2a..789065d816279266df578c7ab1aca0972bde435d 100644 (file)
@@ -6,6 +6,7 @@
 #include "pycore_parking_lot.h"
 #include "pycore_semaphore.h"
 #include "pycore_time.h"          // _PyTime_Add()
+#include "pycore_stats.h"         // FT_STAT_MUTEX_SLEEP_INC()
 
 #ifdef MS_WINDOWS
 #  ifndef WIN32_LEAN_AND_MEAN
@@ -62,6 +63,8 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags)
         return PY_LOCK_FAILURE;
     }
 
+    FT_STAT_MUTEX_SLEEP_INC();
+
     PyTime_t now;
     // silently ignore error: cannot report error to the caller
     (void)PyTime_MonotonicRaw(&now);
index 8fcb31cfd1229957967b5c45c8ad32766867544d..805805ef188e833008a1e7522ff6a0cbc3ce795b 100644 (file)
@@ -26,6 +26,7 @@
 #include "pycore_runtime.h"       // _Py_ID()
 #include "pycore_runtime_init.h"  // _PyRuntimeState_INIT
 #include "pycore_setobject.h"     // _PySet_NextEntry()
+#include "pycore_stats.h"         // _PyStats_InterpInit()
 #include "pycore_sysmodule.h"     // _PySys_ClearAttrString()
 #include "pycore_traceback.h"     // _Py_DumpTracebackThreads()
 #include "pycore_typeobject.h"    // _PyTypes_InitTypes()
@@ -656,6 +657,14 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
         return status;
     }
 
+#ifdef Py_STATS
+    // initialize pystats.  This must be done after the settings are loaded.
+    status = _PyStats_InterpInit(interp);
+    if (_PyStatus_EXCEPTION(status)) {
+        return status;
+    }
+#endif
+
     // initialize the interp->obmalloc state.  This must be done after
     // the settings are loaded (so that feature_flags are set) but before
     // any calls are made to obmalloc functions.
@@ -2469,6 +2478,14 @@ new_interpreter(PyThreadState **tstate_p,
         return status;
     }
 
+#ifdef Py_STATS
+    // initialize pystats.  This must be done after the settings are loaded.
+    status = _PyStats_InterpInit(interp);
+    if (_PyStatus_EXCEPTION(status)) {
+        return status;
+    }
+#endif
+
     // initialize the interp->obmalloc state.  This must be done after
     // the settings are loaded (so that feature_flags are set) but before
     // any calls are made to obmalloc functions.
index 24681536797f94edbddc8793333f3025cd91de45..cf251c120d75afe6f1bc2335ecdefb727d7ab929 100644 (file)
@@ -21,6 +21,7 @@
 #include "pycore_runtime.h"       // _PyRuntime
 #include "pycore_runtime_init.h"  // _PyRuntimeState_INIT
 #include "pycore_stackref.h"      // Py_STACKREF_DEBUG
+#include "pycore_stats.h"         // FT_STAT_WORLD_STOP_INC()
 #include "pycore_time.h"          // _PyTime_Init()
 #include "pycore_uop.h"           // UOP_BUFFER_SIZE
 #include "pycore_uniqueid.h"      // _PyObject_FinalizePerThreadRefcounts()
@@ -465,6 +466,12 @@ alloc_interpreter(void)
 static void
 free_interpreter(PyInterpreterState *interp)
 {
+#ifdef Py_STATS
+    if (interp->pystats_struct) {
+        PyMem_RawFree(interp->pystats_struct);
+        interp->pystats_struct = NULL;
+    }
+#endif
     // The main interpreter is statically allocated so
     // should not be freed.
     if (interp != &_PyRuntime._main_interpreter) {
@@ -1407,6 +1414,9 @@ static void
 free_threadstate(_PyThreadStateImpl *tstate)
 {
     PyInterpreterState *interp = tstate->base.interp;
+#ifdef Py_STATS
+    _PyStats_ThreadFini(tstate);
+#endif
     // The initial thread state of the interpreter is allocated
     // as part of the interpreter state so should not be freed.
     if (tstate == &interp->_initial_thread) {
@@ -1535,6 +1545,13 @@ new_threadstate(PyInterpreterState *interp, int whence)
         return NULL;
     }
 #endif
+#ifdef Py_STATS
+    // The PyStats structure is quite large and is allocated separated from tstate.
+    if (!_PyStats_ThreadInit(interp, tstate)) {
+        free_threadstate(tstate);
+        return NULL;
+    }
+#endif
 
     /* We serialize concurrent creation to protect global state. */
     HEAD_LOCK(interp->runtime);
@@ -1846,6 +1863,9 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate)
     _Py_EnsureTstateNotNULL(tstate);
 #ifdef Py_GIL_DISABLED
     _Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr);
+#endif
+#ifdef Py_STATS
+    _PyStats_Detach((_PyThreadStateImpl *)tstate);
 #endif
     current_fast_clear(tstate->interp->runtime);
     tstate_delete_common(tstate, 1);  // release GIL as part of call
@@ -2020,6 +2040,10 @@ tstate_deactivate(PyThreadState *tstate)
     assert(tstate_is_bound(tstate));
     assert(tstate->_status.active);
 
+#if Py_STATS
+    _PyStats_Detach((_PyThreadStateImpl *)tstate);
+#endif
+
     tstate->_status.active = 0;
 
     // We do not unbind the gilstate tstate here.
@@ -2123,6 +2147,10 @@ _PyThreadState_Attach(PyThreadState *tstate)
         _PyCriticalSection_Resume(tstate);
     }
 
+#ifdef Py_STATS
+    _PyStats_Attach((_PyThreadStateImpl *)tstate);
+#endif
+
 #if defined(Py_DEBUG)
     errno = err;
 #endif
@@ -2272,6 +2300,7 @@ stop_the_world(struct _stoptheworld_state *stw)
     stw->thread_countdown = 0;
     stw->stop_event = (PyEvent){0};  // zero-initialize (unset)
     stw->requester = _PyThreadState_GET();  // may be NULL
+    FT_STAT_WORLD_STOP_INC();
 
     _Py_FOR_EACH_STW_INTERP(stw, i) {
         _Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
diff --git a/Python/pystats.c b/Python/pystats.c
new file mode 100644 (file)
index 0000000..2e377b8
--- /dev/null
@@ -0,0 +1,819 @@
+#include "Python.h"
+
+#include "pycore_opcode_metadata.h" // _PyOpcode_Caches
+#include "pycore_pyatomic_ft_wrappers.h"
+#include "pycore_pylifecycle.h"     // _PyOS_URandomNonblock()
+#include "pycore_tstate.h"
+#include "pycore_initconfig.h"      // _PyStatus_OK()
+#include "pycore_uop_metadata.h"    // _PyOpcode_uop_name
+#include "pycore_uop_ids.h"         // MAX_UOP_ID
+#include "pycore_pystate.h"         // _PyThreadState_GET()
+#include "pycore_runtime.h"         // NUM_GENERATIONS
+
+#include <stdlib.h> // rand()
+
+#ifdef Py_STATS
+
+PyStats *
+_PyStats_GetLocal(void)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    if (tstate) {
+        return tstate->pystats;
+    }
+    return NULL;
+}
+
+#ifdef Py_GIL_DISABLED
+#define STATS_LOCK(interp) PyMutex_Lock(&interp->pystats_mutex)
+#define STATS_UNLOCK(interp) PyMutex_Unlock(&interp->pystats_mutex)
+#else
+#define STATS_LOCK(interp)
+#define STATS_UNLOCK(interp)
+#endif
+
+
+#if PYSTATS_MAX_UOP_ID < MAX_UOP_ID
+#error "Not enough space allocated for pystats. Increase PYSTATS_MAX_UOP_ID to at least MAX_UOP_ID"
+#endif
+
+#define ADD_STAT_TO_DICT(res, field) \
+    do { \
+        PyObject *val = PyLong_FromUnsignedLongLong(stats->field); \
+        if (val == NULL) { \
+            Py_DECREF(res); \
+            return NULL; \
+        } \
+        if (PyDict_SetItemString(res, #field, val) == -1) { \
+            Py_DECREF(res); \
+            Py_DECREF(val); \
+            return NULL; \
+        } \
+        Py_DECREF(val); \
+    } while(0);
+
+static PyObject*
+stats_to_dict(SpecializationStats *stats)
+{
+    PyObject *res = PyDict_New();
+    if (res == NULL) {
+        return NULL;
+    }
+    ADD_STAT_TO_DICT(res, success);
+    ADD_STAT_TO_DICT(res, failure);
+    ADD_STAT_TO_DICT(res, hit);
+    ADD_STAT_TO_DICT(res, deferred);
+    ADD_STAT_TO_DICT(res, miss);
+    ADD_STAT_TO_DICT(res, deopt);
+    PyObject *failure_kinds = PyTuple_New(SPECIALIZATION_FAILURE_KINDS);
+    if (failure_kinds == NULL) {
+        Py_DECREF(res);
+        return NULL;
+    }
+    for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
+        PyObject *stat = PyLong_FromUnsignedLongLong(stats->failure_kinds[i]);
+        if (stat == NULL) {
+            Py_DECREF(res);
+            Py_DECREF(failure_kinds);
+            return NULL;
+        }
+        PyTuple_SET_ITEM(failure_kinds, i, stat);
+    }
+    if (PyDict_SetItemString(res, "failure_kinds", failure_kinds)) {
+        Py_DECREF(res);
+        Py_DECREF(failure_kinds);
+        return NULL;
+    }
+    Py_DECREF(failure_kinds);
+    return res;
+}
+#undef ADD_STAT_TO_DICT
+
+static int
+add_stat_dict(
+    PyStats *src,
+    PyObject *res,
+    int opcode,
+    const char *name) {
+
+    SpecializationStats *stats = &src->opcode_stats[opcode].specialization;
+    PyObject *d = stats_to_dict(stats);
+    if (d == NULL) {
+        return -1;
+    }
+    int err = PyDict_SetItemString(res, name, d);
+    Py_DECREF(d);
+    return err;
+}
+
+PyObject*
+_Py_GetSpecializationStats(void) {
+    PyThreadState *tstate = _PyThreadState_GET();
+    PyStats *src = FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct);
+    if (src == NULL) {
+        Py_RETURN_NONE;
+    }
+    PyObject *stats = PyDict_New();
+    if (stats == NULL) {
+        return NULL;
+    }
+    int err = 0;
+    err += add_stat_dict(src, stats, CONTAINS_OP, "contains_op");
+    err += add_stat_dict(src, stats, LOAD_SUPER_ATTR, "load_super_attr");
+    err += add_stat_dict(src, stats, LOAD_ATTR, "load_attr");
+    err += add_stat_dict(src, stats, LOAD_GLOBAL, "load_global");
+    err += add_stat_dict(src, stats, STORE_SUBSCR, "store_subscr");
+    err += add_stat_dict(src, stats, STORE_ATTR, "store_attr");
+    err += add_stat_dict(src, stats, JUMP_BACKWARD, "jump_backward");
+    err += add_stat_dict(src, stats, CALL, "call");
+    err += add_stat_dict(src, stats, CALL_KW, "call_kw");
+    err += add_stat_dict(src, stats, BINARY_OP, "binary_op");
+    err += add_stat_dict(src, stats, COMPARE_OP, "compare_op");
+    err += add_stat_dict(src, stats, UNPACK_SEQUENCE, "unpack_sequence");
+    err += add_stat_dict(src, stats, FOR_ITER, "for_iter");
+    err += add_stat_dict(src, stats, TO_BOOL, "to_bool");
+    err += add_stat_dict(src, stats, SEND, "send");
+    if (err < 0) {
+        Py_DECREF(stats);
+        return NULL;
+    }
+    return stats;
+}
+
+
+#define PRINT_STAT(i, field) \
+    if (stats[i].field) { \
+        fprintf(out, "    opcode[%s]." #field " : %" PRIu64 "\n", _PyOpcode_OpName[i], stats[i].field); \
+    }
+
+static void
+print_spec_stats(FILE *out, OpcodeStats *stats)
+{
+    /* Mark some opcodes as specializable for stats,
+     * even though we don't specialize them yet. */
+    fprintf(out, "opcode[BINARY_SLICE].specializable : 1\n");
+    fprintf(out, "opcode[STORE_SLICE].specializable : 1\n");
+    fprintf(out, "opcode[GET_ITER].specializable : 1\n");
+    for (int i = 0; i < 256; i++) {
+        if (_PyOpcode_Caches[i]) {
+            /* Ignore jumps as they cannot be specialized */
+            switch (i) {
+                case POP_JUMP_IF_FALSE:
+                case POP_JUMP_IF_TRUE:
+                case POP_JUMP_IF_NONE:
+                case POP_JUMP_IF_NOT_NONE:
+                case JUMP_BACKWARD:
+                    break;
+                default:
+                    fprintf(out, "opcode[%s].specializable : 1\n", _PyOpcode_OpName[i]);
+            }
+        }
+        PRINT_STAT(i, specialization.success);
+        PRINT_STAT(i, specialization.failure);
+        PRINT_STAT(i, specialization.hit);
+        PRINT_STAT(i, specialization.deferred);
+        PRINT_STAT(i, specialization.miss);
+        PRINT_STAT(i, specialization.deopt);
+        PRINT_STAT(i, execution_count);
+        for (int j = 0; j < SPECIALIZATION_FAILURE_KINDS; j++) {
+            uint64_t val = stats[i].specialization.failure_kinds[j];
+            if (val) {
+                fprintf(out, "    opcode[%s].specialization.failure_kinds[%d] : %"
+                    PRIu64 "\n", _PyOpcode_OpName[i], j, val);
+            }
+        }
+        for (int j = 0; j < 256; j++) {
+            if (stats[i].pair_count[j]) {
+                fprintf(out, "opcode[%s].pair_count[%s] : %" PRIu64 "\n",
+                        _PyOpcode_OpName[i], _PyOpcode_OpName[j], stats[i].pair_count[j]);
+            }
+        }
+    }
+}
+#undef PRINT_STAT
+
+
+static void
+print_call_stats(FILE *out, CallStats *stats)
+{
+    fprintf(out, "Calls to PyEval_EvalDefault: %" PRIu64 "\n", stats->pyeval_calls);
+    fprintf(out, "Calls to Python functions inlined: %" PRIu64 "\n", stats->inlined_py_calls);
+    fprintf(out, "Frames pushed: %" PRIu64 "\n", stats->frames_pushed);
+    fprintf(out, "Frame objects created: %" PRIu64 "\n", stats->frame_objects_created);
+    for (int i = 0; i < EVAL_CALL_KINDS; i++) {
+        fprintf(out, "Calls via PyEval_EvalFrame[%d] : %" PRIu64 "\n", i, stats->eval_calls[i]);
+    }
+}
+
+static void
+print_object_stats(FILE *out, ObjectStats *stats)
+{
+    fprintf(out, "Object allocations from freelist: %" PRIu64 "\n", stats->from_freelist);
+    fprintf(out, "Object frees to freelist: %" PRIu64 "\n", stats->to_freelist);
+    fprintf(out, "Object allocations: %" PRIu64 "\n", stats->allocations);
+    fprintf(out, "Object allocations to 512 bytes: %" PRIu64 "\n", stats->allocations512);
+    fprintf(out, "Object allocations to 4 kbytes: %" PRIu64 "\n", stats->allocations4k);
+    fprintf(out, "Object allocations over 4 kbytes: %" PRIu64 "\n", stats->allocations_big);
+    fprintf(out, "Object frees: %" PRIu64 "\n", stats->frees);
+    fprintf(out, "Object inline values: %" PRIu64 "\n", stats->inline_values);
+    fprintf(out, "Object interpreter mortal increfs: %" PRIu64 "\n", stats->interpreter_increfs);
+    fprintf(out, "Object interpreter mortal decrefs: %" PRIu64 "\n", stats->interpreter_decrefs);
+    fprintf(out, "Object mortal increfs: %" PRIu64 "\n", stats->increfs);
+    fprintf(out, "Object mortal decrefs: %" PRIu64 "\n", stats->decrefs);
+    fprintf(out, "Object interpreter immortal increfs: %" PRIu64 "\n", stats->interpreter_immortal_increfs);
+    fprintf(out, "Object interpreter immortal decrefs: %" PRIu64 "\n", stats->interpreter_immortal_decrefs);
+    fprintf(out, "Object immortal increfs: %" PRIu64 "\n", stats->immortal_increfs);
+    fprintf(out, "Object immortal decrefs: %" PRIu64 "\n", stats->immortal_decrefs);
+    fprintf(out, "Object materialize dict (on request): %" PRIu64 "\n", stats->dict_materialized_on_request);
+    fprintf(out, "Object materialize dict (new key): %" PRIu64 "\n", stats->dict_materialized_new_key);
+    fprintf(out, "Object materialize dict (too big): %" PRIu64 "\n", stats->dict_materialized_too_big);
+    fprintf(out, "Object materialize dict (str subclass): %" PRIu64 "\n", stats->dict_materialized_str_subclass);
+    fprintf(out, "Object method cache hits: %" PRIu64 "\n", stats->type_cache_hits);
+    fprintf(out, "Object method cache misses: %" PRIu64 "\n", stats->type_cache_misses);
+    fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
+    fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
+    fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
+}
+
+static void
+print_gc_stats(FILE *out, GCStats *stats)
+{
+    for (int i = 0; i < NUM_GENERATIONS; i++) {
+        fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
+        fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
+        fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
+        fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable);
+        fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable);
+    }
+}
+
+#ifdef _Py_TIER2
+static void
+print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
+{
+    for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
+        fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
+    }
+}
+
+extern const char *_PyUOpName(int index);
+
+static void
+print_optimization_stats(FILE *out, OptimizationStats *stats)
+{
+    fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
+    fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
+    fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
+    fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
+    fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
+    fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
+    fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
+    fprintf(out, "Optimization trace too short: %" PRIu64 "\n", stats->trace_too_short);
+    fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
+    fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
+    fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence);
+    fprintf(out, "Optimization unknown callee: %" PRIu64 "\n", stats->unknown_callee);
+    fprintf(out, "Executors invalidated: %" PRIu64 "\n", stats->executors_invalidated);
+
+    print_histogram(out, "Trace length", stats->trace_length_hist);
+    print_histogram(out, "Trace run length", stats->trace_run_length_hist);
+    print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
+
+    fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts);
+    fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes);
+    fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n",
+            stats->optimizer_failure_reason_no_memory);
+    fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed);
+    fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys);
+    for (int i = 0; i <= MAX_UOP_ID; i++) {
+        if (stats->opcode[i].execution_count) {
+            fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
+        }
+        if (stats->opcode[i].miss) {
+            fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
+        }
+    }
+    for (int i = 0; i < 256; i++) {
+        if (stats->unsupported_opcode[i]) {
+            fprintf(
+                out,
+                "unsupported_opcode[%s].count : %" PRIu64 "\n",
+                _PyOpcode_OpName[i],
+                stats->unsupported_opcode[i]
+            );
+        }
+    }
+
+    for (int i = 1; i <= MAX_UOP_ID; i++){
+        for (int j = 1; j <= MAX_UOP_ID; j++) {
+            if (stats->opcode[i].pair_count[j]) {
+                fprintf(out, "uop[%s].pair_count[%s] : %" PRIu64 "\n",
+                        _PyOpcode_uop_name[i], _PyOpcode_uop_name[j], stats->opcode[i].pair_count[j]);
+            }
+        }
+    }
+    for (int i = 0; i < MAX_UOP_ID; i++) {
+        if (stats->error_in_opcode[i]) {
+            fprintf(
+                out,
+                "error_in_opcode[%s].count : %" PRIu64 "\n",
+                _PyUOpName(i),
+                stats->error_in_opcode[i]
+            );
+        }
+    }
+    fprintf(out, "JIT total memory size: %" PRIu64 "\n", stats->jit_total_memory_size);
+    fprintf(out, "JIT code size: %" PRIu64 "\n", stats->jit_code_size);
+    fprintf(out, "JIT trampoline size: %" PRIu64 "\n", stats->jit_trampoline_size);
+    fprintf(out, "JIT data size: %" PRIu64 "\n", stats->jit_data_size);
+    fprintf(out, "JIT padding size: %" PRIu64 "\n", stats->jit_padding_size);
+    fprintf(out, "JIT freed memory size: %" PRIu64 "\n", stats->jit_freed_memory_size);
+
+    print_histogram(out, "Trace total memory size", stats->trace_total_memory_hist);
+}
+#endif
+
+#ifdef Py_GIL_DISABLED
+static void
+print_ft_stats(FILE *out, FTStats *stats)
+{
+    fprintf(out, "Mutex sleeps (mutex_sleeps): %" PRIu64 "\n", stats->mutex_sleeps);
+    fprintf(out, "QSBR polls (qsbr_polls): %" PRIu64 "\n", stats->qsbr_polls);
+    fprintf(out, "World stops (world_stops): %" PRIu64 "\n", stats->world_stops);
+}
+#endif
+
+static void
+print_rare_event_stats(FILE *out, RareEventStats *stats)
+{
+    fprintf(out, "Rare event (set_class): %" PRIu64 "\n", stats->set_class);
+    fprintf(out, "Rare event (set_bases): %" PRIu64 "\n", stats->set_bases);
+    fprintf(out, "Rare event (set_eval_frame_func): %" PRIu64 "\n", stats->set_eval_frame_func);
+    fprintf(out, "Rare event (builtin_dict): %" PRIu64 "\n", stats->builtin_dict);
+    fprintf(out, "Rare event (func_modification): %" PRIu64 "\n", stats->func_modification);
+    fprintf(out, "Rare event (watched_dict_modification): %" PRIu64 "\n", stats->watched_dict_modification);
+    fprintf(out, "Rare event (watched_globals_modification): %" PRIu64 "\n", stats->watched_globals_modification);
+}
+
+static void
+print_stats(FILE *out, PyStats *stats)
+{
+    print_spec_stats(out, stats->opcode_stats);
+    print_call_stats(out, &stats->call_stats);
+    print_object_stats(out, &stats->object_stats);
+    print_gc_stats(out, stats->gc_stats);
+#ifdef _Py_TIER2
+    print_optimization_stats(out, &stats->optimization_stats);
+#endif
+#ifdef Py_GIL_DISABLED
+    print_ft_stats(out, &stats->ft_stats);
+#endif
+    print_rare_event_stats(out, &stats->rare_event_stats);
+}
+
+#ifdef Py_GIL_DISABLED
+
+static void
+merge_specialization_stats(SpecializationStats *dest, const SpecializationStats *src)
+{
+    dest->success += src->success;
+    dest->failure += src->failure;
+    dest->hit += src->hit;
+    dest->deferred += src->deferred;
+    dest->miss += src->miss;
+    dest->deopt += src->deopt;
+    for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
+        dest->failure_kinds[i] += src->failure_kinds[i];
+    }
+}
+
+static void
+merge_opcode_stats_array(OpcodeStats *dest, const OpcodeStats *src)
+{
+    for (int i = 0; i < 256; i++) {
+        merge_specialization_stats(&dest[i].specialization, &src[i].specialization);
+        dest[i].execution_count += src[i].execution_count;
+        for (int j = 0; j < 256; j++) {
+            dest[i].pair_count[j] += src[i].pair_count[j];
+        }
+    }
+}
+
+static void
+merge_call_stats(CallStats *dest, const CallStats *src)
+{
+    dest->inlined_py_calls += src->inlined_py_calls;
+    dest->pyeval_calls += src->pyeval_calls;
+    dest->frames_pushed += src->frames_pushed;
+    dest->frame_objects_created += src->frame_objects_created;
+    for (int i = 0; i < EVAL_CALL_KINDS; i++) {
+        dest->eval_calls[i] += src->eval_calls[i];
+    }
+}
+
+static void
+merge_object_stats(ObjectStats *dest, const ObjectStats *src)
+{
+    dest->increfs += src->increfs;
+    dest->decrefs += src->decrefs;
+    dest->interpreter_increfs += src->interpreter_increfs;
+    dest->interpreter_decrefs += src->interpreter_decrefs;
+    dest->immortal_increfs += src->immortal_increfs;
+    dest->immortal_decrefs += src->immortal_decrefs;
+    dest->interpreter_immortal_increfs += src->interpreter_immortal_increfs;
+    dest->interpreter_immortal_decrefs += src->interpreter_immortal_decrefs;
+    dest->allocations += src->allocations;
+    dest->allocations512 += src->allocations512;
+    dest->allocations4k += src->allocations4k;
+    dest->allocations_big += src->allocations_big;
+    dest->frees += src->frees;
+    dest->to_freelist += src->to_freelist;
+    dest->from_freelist += src->from_freelist;
+    dest->inline_values += src->inline_values;
+    dest->dict_materialized_on_request += src->dict_materialized_on_request;
+    dest->dict_materialized_new_key += src->dict_materialized_new_key;
+    dest->dict_materialized_too_big += src->dict_materialized_too_big;
+    dest->dict_materialized_str_subclass += src->dict_materialized_str_subclass;
+    dest->type_cache_hits += src->type_cache_hits;
+    dest->type_cache_misses += src->type_cache_misses;
+    dest->type_cache_dunder_hits += src->type_cache_dunder_hits;
+    dest->type_cache_dunder_misses += src->type_cache_dunder_misses;
+    dest->type_cache_collisions += src->type_cache_collisions;
+    dest->object_visits += src->object_visits;
+}
+
+static void
+merge_uop_stats_array(UOpStats *dest, const UOpStats *src)
+{
+    for (int i = 0; i <= PYSTATS_MAX_UOP_ID; i++) {
+        dest[i].execution_count += src[i].execution_count;
+        dest[i].miss += src[i].miss;
+        for (int j = 0; j <= PYSTATS_MAX_UOP_ID; j++) {
+            dest[i].pair_count[j] += src[i].pair_count[j];
+        }
+    }
+}
+
+static void
+merge_optimization_stats(OptimizationStats *dest, const OptimizationStats *src)
+{
+    dest->attempts += src->attempts;
+    dest->traces_created += src->traces_created;
+    dest->traces_executed += src->traces_executed;
+    dest->uops_executed += src->uops_executed;
+    dest->trace_stack_overflow += src->trace_stack_overflow;
+    dest->trace_stack_underflow += src->trace_stack_underflow;
+    dest->trace_too_long += src->trace_too_long;
+    dest->trace_too_short += src->trace_too_short;
+    dest->inner_loop += src->inner_loop;
+    dest->recursive_call += src->recursive_call;
+    dest->low_confidence += src->low_confidence;
+    dest->unknown_callee += src->unknown_callee;
+    dest->executors_invalidated += src->executors_invalidated;
+    dest->optimizer_attempts += src->optimizer_attempts;
+    dest->optimizer_successes += src->optimizer_successes;
+    dest->optimizer_failure_reason_no_memory += src->optimizer_failure_reason_no_memory;
+    dest->remove_globals_builtins_changed += src->remove_globals_builtins_changed;
+    dest->remove_globals_incorrect_keys += src->remove_globals_incorrect_keys;
+    dest->jit_total_memory_size += src->jit_total_memory_size;
+    dest->jit_code_size += src->jit_code_size;
+    dest->jit_trampoline_size += src->jit_trampoline_size;
+    dest->jit_data_size += src->jit_data_size;
+    dest->jit_padding_size += src->jit_padding_size;
+    dest->jit_freed_memory_size += src->jit_freed_memory_size;
+
+    merge_uop_stats_array(dest->opcode, src->opcode);
+
+    for (int i = 0; i < 256; i++) {
+        dest->unsupported_opcode[i] += src->unsupported_opcode[i];
+    }
+    for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
+        dest->trace_length_hist[i] += src->trace_length_hist[i];
+        dest->trace_run_length_hist[i] += src->trace_run_length_hist[i];
+        dest->optimized_trace_length_hist[i] += src->optimized_trace_length_hist[i];
+        dest->trace_total_memory_hist[i] += src->trace_total_memory_hist[i];
+    }
+    for (int i = 0; i <= PYSTATS_MAX_UOP_ID; i++) {
+        dest->error_in_opcode[i] += src->error_in_opcode[i];
+    }
+}
+
+static void
+merge_ft_stats(FTStats *dest, const FTStats *src)
+{
+    dest->mutex_sleeps = src->mutex_sleeps;
+    dest->qsbr_polls = src->qsbr_polls;
+    dest->world_stops = src->world_stops;
+}
+
+static void
+merge_rare_event_stats(RareEventStats *dest, const RareEventStats *src)
+{
+    dest->set_class += src->set_class;
+    dest->set_bases += src->set_bases;
+    dest->set_eval_frame_func += src->set_eval_frame_func;
+    dest->builtin_dict += src->builtin_dict;
+    dest->func_modification += src->func_modification;
+    dest->watched_dict_modification += src->watched_dict_modification;
+    dest->watched_globals_modification += src->watched_globals_modification;
+}
+
+static void
+merge_gc_stats_array(GCStats *dest, const GCStats *src)
+{
+    for (int i = 0; i < NUM_GENERATIONS; i++) {
+        dest[i].collections += src[i].collections;
+        dest[i].object_visits += src[i].object_visits;
+        dest[i].objects_collected += src[i].objects_collected;
+        dest[i].objects_transitively_reachable += src[i].objects_transitively_reachable;
+        dest[i].objects_not_transitively_reachable += src[i].objects_not_transitively_reachable;
+    }
+}
+
+void
+stats_zero_thread(_PyThreadStateImpl *tstate)
+{
+    // Zero the thread local stat counters
+    if (tstate->pystats_struct) {
+        memset(tstate->pystats_struct, 0, sizeof(PyStats));
+    }
+}
+
+// merge stats for a single thread into the global structure
+void
+stats_merge_thread(_PyThreadStateImpl *tstate)
+{
+    PyStats *src = tstate->pystats_struct;
+    PyStats *dest = ((PyThreadState *)tstate)->interp->pystats_struct;
+
+    if (src == NULL || dest == NULL) {
+        return;
+    }
+
+    // Merge each category of stats using the helper functions.
+    merge_opcode_stats_array(dest->opcode_stats, src->opcode_stats);
+    merge_call_stats(&dest->call_stats, &src->call_stats);
+    merge_object_stats(&dest->object_stats, &src->object_stats);
+    merge_optimization_stats(&dest->optimization_stats, &src->optimization_stats);
+    merge_ft_stats(&dest->ft_stats, &src->ft_stats);
+    merge_rare_event_stats(&dest->rare_event_stats, &src->rare_event_stats);
+    merge_gc_stats_array(dest->gc_stats, src->gc_stats);
+}
+#endif // Py_GIL_DISABLED
+
+// toggle stats collection on or off for all threads
+static int
+stats_toggle_on_off(PyThreadState *tstate, int on)
+{
+    bool changed = false;
+    PyInterpreterState *interp = tstate->interp;
+    STATS_LOCK(interp);
+    if (on && interp->pystats_struct == NULL) {
+        PyStats *s = PyMem_RawCalloc(1, sizeof(PyStats));
+        if (s == NULL) {
+            STATS_UNLOCK(interp);
+            return -1;
+        }
+        FT_ATOMIC_STORE_PTR_RELAXED(interp->pystats_struct, s);
+    }
+    if (tstate->interp->pystats_enabled != on) {
+        FT_ATOMIC_STORE_INT_RELAXED(interp->pystats_enabled, on);
+        changed = true;
+    }
+    STATS_UNLOCK(interp);
+    if (!changed) {
+        return 0;
+    }
+    _PyEval_StopTheWorld(interp);
+    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
+        PyStats *s = NULL;
+        if (interp->pystats_enabled) {
+#ifdef Py_GIL_DISABLED
+            _PyThreadStateImpl *ts_impl = (_PyThreadStateImpl *)ts;
+            if (ts_impl->pystats_struct == NULL) {
+                // first activation for this thread, allocate structure
+                ts_impl->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
+            }
+            s = ts_impl->pystats_struct;
+#else
+            s = ts->interp->pystats_struct;
+#endif
+        }
+        ts->pystats = s;
+    }
+    _PyEval_StartTheWorld(interp);
+    return 0;
+}
+
+// zero stats for all threads and for the interpreter
+static void
+stats_zero_all(void)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    if (tstate == NULL) {
+        return;
+    }
+    if (FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct) == NULL) {
+        return;
+    }
+    PyInterpreterState *interp = tstate->interp;
+    _PyEval_StopTheWorld(interp);
+#ifdef Py_GIL_DISABLED
+    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
+        stats_zero_thread((_PyThreadStateImpl *)ts);
+    }
+#endif
+    if (interp->pystats_struct) {
+        memset(interp->pystats_struct, 0, sizeof(PyStats));
+    }
+    _PyEval_StartTheWorld(interp);
+}
+
+// merge stats for all threads into the per-interpreter structure
+static void
+stats_merge_all(void)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    if (tstate == NULL) {
+        return;
+    }
+    if (FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct) == NULL) {
+        return;
+    }
+    PyInterpreterState *interp = tstate->interp;
+    _PyEval_StopTheWorld(interp);
+#ifdef Py_GIL_DISABLED
+    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
+        stats_merge_thread((_PyThreadStateImpl *)ts);
+        stats_zero_thread((_PyThreadStateImpl *)ts);
+    }
+#endif
+    _PyEval_StartTheWorld(interp);
+}
+
+int
+_Py_StatsOn(void)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    return stats_toggle_on_off(tstate, 1);
+}
+
+void
+_Py_StatsOff(void)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    stats_toggle_on_off(tstate, 0);
+}
+
+void
+_Py_StatsClear(void)
+{
+    stats_zero_all();
+}
+
+static int
+mem_is_zero(unsigned char *ptr, size_t size)
+{
+    for (size_t i=0; i < size; i++) {
+        if (*ptr != 0) {
+            return 0;
+        }
+        ptr++;
+    }
+    return 1;
+}
+
+int
+_Py_PrintSpecializationStats(int to_file)
+{
+    assert(to_file);
+    stats_merge_all();
+    PyThreadState *tstate = _PyThreadState_GET();
+    STATS_LOCK(tstate->interp);
+    PyStats *stats = tstate->interp->pystats_struct;
+    if (stats == NULL) {
+        STATS_UNLOCK(tstate->interp);
+        return 0;
+    }
+#define MEM_IS_ZERO(DATA) mem_is_zero((unsigned char*)DATA, sizeof(*(DATA)))
+    int is_zero = (
+        MEM_IS_ZERO(stats->gc_stats)  // is a pointer
+        && MEM_IS_ZERO(&stats->opcode_stats)
+        && MEM_IS_ZERO(&stats->call_stats)
+        && MEM_IS_ZERO(&stats->object_stats)
+    );
+#undef MEM_IS_ZERO
+    STATS_UNLOCK(tstate->interp);
+    if (is_zero) {
+        // gh-108753: -X pystats command line was used, but then _stats_off()
+        // and _stats_clear() have been called: in this case, avoid printing
+        // useless "all zeros" statistics.
+        return 0;
+    }
+
+    FILE *out = stderr;
+    if (to_file) {
+        /* Write to a file instead of stderr. */
+# ifdef MS_WINDOWS
+        const char *dirname = "c:\\temp\\py_stats\\";
+# else
+        const char *dirname = "/tmp/py_stats/";
+# endif
+        /* Use random 160 bit number as file name,
+        * to avoid both accidental collisions and
+        * symlink attacks. */
+        unsigned char rand[20];
+        char hex_name[41];
+        _PyOS_URandomNonblock(rand, 20);
+        for (int i = 0; i < 20; i++) {
+            hex_name[2*i] = Py_hexdigits[rand[i]&15];
+            hex_name[2*i+1] = Py_hexdigits[(rand[i]>>4)&15];
+        }
+        hex_name[40] = '\0';
+        char buf[64];
+        assert(strlen(dirname) + 40 + strlen(".txt") < 64);
+        sprintf(buf, "%s%s.txt", dirname, hex_name);
+        FILE *fout = fopen(buf, "w");
+        if (fout) {
+            out = fout;
+        }
+    }
+    else {
+        fprintf(out, "Specialization stats:\n");
+    }
+    STATS_LOCK(tstate->interp);
+    print_stats(out, stats);
+    STATS_UNLOCK(tstate->interp);
+    if (out != stderr) {
+        fclose(out);
+    }
+    return 1;
+}
+
+PyStatus
+_PyStats_InterpInit(PyInterpreterState *interp)
+{
+    if (interp->config._pystats) {
+        // start with pystats enabled, can be disabled via sys._stats_off()
+        // this needs to be set before the first tstate is created
+        interp->pystats_enabled = 1;
+        interp->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
+        if (interp->pystats_struct == NULL) {
+            return _PyStatus_ERR("out-of-memory while initializing interpreter");
+        }
+    }
+    return _PyStatus_OK();
+}
+
+bool
+_PyStats_ThreadInit(PyInterpreterState *interp, _PyThreadStateImpl *tstate)
+{
+#ifdef Py_GIL_DISABLED
+    if (FT_ATOMIC_LOAD_INT_RELAXED(interp->pystats_enabled)) {
+        assert(interp->pystats_struct != NULL);
+        tstate->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
+        if (tstate->pystats_struct == NULL) {
+            return false;
+        }
+    }
+#endif
+    return true;
+}
+
+void
+_PyStats_ThreadFini(_PyThreadStateImpl *tstate)
+{
+#ifdef Py_GIL_DISABLED
+    STATS_LOCK(((PyThreadState *)tstate)->interp);
+    stats_merge_thread(tstate);
+    STATS_UNLOCK(((PyThreadState *)tstate)->interp);
+    PyMem_RawFree(tstate->pystats_struct);
+#endif
+}
+
+void
+_PyStats_Attach(_PyThreadStateImpl *tstate_impl)
+{
+    PyStats *s;
+    PyThreadState *tstate = (PyThreadState *)tstate_impl;
+    PyInterpreterState *interp = tstate->interp;
+    if (FT_ATOMIC_LOAD_INT_RELAXED(interp->pystats_enabled)) {
+#ifdef Py_GIL_DISABLED
+        s = ((_PyThreadStateImpl *)tstate)->pystats_struct;
+#else
+        s = tstate->interp->pystats_struct;
+#endif
+    }
+    else {
+        s = NULL;
+    }
+    tstate->pystats = s;
+}
+
+void
+_PyStats_Detach(_PyThreadStateImpl *tstate_impl)
+{
+    ((PyThreadState *)tstate_impl)->pystats = NULL;
+}
+
+#endif // Py_STATS
index c992c285cb13e4b6f9c75041fd2edd3f2d554d37..b2153bf9d67230cb8ede561a258736edbbee5acf 100644 (file)
@@ -36,6 +36,7 @@
 #include "pycore_pystate.h"         // _PyThreadState_GET()
 #include "pycore_qsbr.h"
 #include "pycore_tstate.h"          // _PyThreadStateImpl
+#include "pycore_stats.h"           // FT_STAT_QSBR_POLL_INC()
 
 
 // Starting size of the array of qsbr thread states
@@ -158,7 +159,7 @@ _Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)
     if (_Py_qbsr_goal_reached(qsbr, goal)) {
         return true;
     }
-
+    FT_STAT_QSBR_POLL_INC();
     uint64_t rd_seq = qsbr_poll_scan(qsbr->shared);
     return QSBR_LEQ(goal, rd_seq);
 }
index a1c5dedd61563bea2a046897ea4a3085f2c82dc6..2193596a331d3cfc39d1a2a8de770b03e9f3f0f4 100644 (file)
 
 #include <stdlib.h> // rand()
 
-extern const char *_PyUOpName(int index);
-
 /* For guidance on adding or extending families of instructions see
  * InternalDocs/interpreter.md `Specialization` section.
  */
 
-#ifdef Py_STATS
-GCStats _py_gc_stats[NUM_GENERATIONS] = { 0 };
-static PyStats _Py_stats_struct = { .gc_stats = _py_gc_stats };
-PyStats *_Py_stats = NULL;
-
-#if PYSTATS_MAX_UOP_ID < MAX_UOP_ID
-#error "Not enough space allocated for pystats. Increase PYSTATS_MAX_UOP_ID to at least MAX_UOP_ID"
-#endif
-
-#define ADD_STAT_TO_DICT(res, field) \
-    do { \
-        PyObject *val = PyLong_FromUnsignedLongLong(stats->field); \
-        if (val == NULL) { \
-            Py_DECREF(res); \
-            return NULL; \
-        } \
-        if (PyDict_SetItemString(res, #field, val) == -1) { \
-            Py_DECREF(res); \
-            Py_DECREF(val); \
-            return NULL; \
-        } \
-        Py_DECREF(val); \
-    } while(0);
-
-static PyObject*
-stats_to_dict(SpecializationStats *stats)
-{
-    PyObject *res = PyDict_New();
-    if (res == NULL) {
-        return NULL;
-    }
-    ADD_STAT_TO_DICT(res, success);
-    ADD_STAT_TO_DICT(res, failure);
-    ADD_STAT_TO_DICT(res, hit);
-    ADD_STAT_TO_DICT(res, deferred);
-    ADD_STAT_TO_DICT(res, miss);
-    ADD_STAT_TO_DICT(res, deopt);
-    PyObject *failure_kinds = PyTuple_New(SPECIALIZATION_FAILURE_KINDS);
-    if (failure_kinds == NULL) {
-        Py_DECREF(res);
-        return NULL;
-    }
-    for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
-        PyObject *stat = PyLong_FromUnsignedLongLong(stats->failure_kinds[i]);
-        if (stat == NULL) {
-            Py_DECREF(res);
-            Py_DECREF(failure_kinds);
-            return NULL;
-        }
-        PyTuple_SET_ITEM(failure_kinds, i, stat);
-    }
-    if (PyDict_SetItemString(res, "failure_kinds", failure_kinds)) {
-        Py_DECREF(res);
-        Py_DECREF(failure_kinds);
-        return NULL;
-    }
-    Py_DECREF(failure_kinds);
-    return res;
-}
-#undef ADD_STAT_TO_DICT
-
-static int
-add_stat_dict(
-    PyObject *res,
-    int opcode,
-    const char *name) {
-
-    SpecializationStats *stats = &_Py_stats_struct.opcode_stats[opcode].specialization;
-    PyObject *d = stats_to_dict(stats);
-    if (d == NULL) {
-        return -1;
-    }
-    int err = PyDict_SetItemString(res, name, d);
-    Py_DECREF(d);
-    return err;
-}
-
-PyObject*
-_Py_GetSpecializationStats(void) {
-    PyObject *stats = PyDict_New();
-    if (stats == NULL) {
-        return NULL;
-    }
-    int err = 0;
-    err += add_stat_dict(stats, CONTAINS_OP, "contains_op");
-    err += add_stat_dict(stats, LOAD_SUPER_ATTR, "load_super_attr");
-    err += add_stat_dict(stats, LOAD_ATTR, "load_attr");
-    err += add_stat_dict(stats, LOAD_GLOBAL, "load_global");
-    err += add_stat_dict(stats, STORE_SUBSCR, "store_subscr");
-    err += add_stat_dict(stats, STORE_ATTR, "store_attr");
-    err += add_stat_dict(stats, JUMP_BACKWARD, "jump_backward");
-    err += add_stat_dict(stats, CALL, "call");
-    err += add_stat_dict(stats, CALL_KW, "call_kw");
-    err += add_stat_dict(stats, BINARY_OP, "binary_op");
-    err += add_stat_dict(stats, COMPARE_OP, "compare_op");
-    err += add_stat_dict(stats, UNPACK_SEQUENCE, "unpack_sequence");
-    err += add_stat_dict(stats, FOR_ITER, "for_iter");
-    err += add_stat_dict(stats, TO_BOOL, "to_bool");
-    err += add_stat_dict(stats, SEND, "send");
-    if (err < 0) {
-        Py_DECREF(stats);
-        return NULL;
-    }
-    return stats;
-}
-
-
-#define PRINT_STAT(i, field) \
-    if (stats[i].field) { \
-        fprintf(out, "    opcode[%s]." #field " : %" PRIu64 "\n", _PyOpcode_OpName[i], stats[i].field); \
-    }
-
-static void
-print_spec_stats(FILE *out, OpcodeStats *stats)
-{
-    /* Mark some opcodes as specializable for stats,
-     * even though we don't specialize them yet. */
-    fprintf(out, "opcode[BINARY_SLICE].specializable : 1\n");
-    fprintf(out, "opcode[STORE_SLICE].specializable : 1\n");
-    fprintf(out, "opcode[GET_ITER].specializable : 1\n");
-    for (int i = 0; i < 256; i++) {
-        if (_PyOpcode_Caches[i]) {
-            /* Ignore jumps as they cannot be specialized */
-            switch (i) {
-                case POP_JUMP_IF_FALSE:
-                case POP_JUMP_IF_TRUE:
-                case POP_JUMP_IF_NONE:
-                case POP_JUMP_IF_NOT_NONE:
-                case JUMP_BACKWARD:
-                    break;
-                default:
-                    fprintf(out, "opcode[%s].specializable : 1\n", _PyOpcode_OpName[i]);
-            }
-        }
-        PRINT_STAT(i, specialization.success);
-        PRINT_STAT(i, specialization.failure);
-        PRINT_STAT(i, specialization.hit);
-        PRINT_STAT(i, specialization.deferred);
-        PRINT_STAT(i, specialization.miss);
-        PRINT_STAT(i, specialization.deopt);
-        PRINT_STAT(i, execution_count);
-        for (int j = 0; j < SPECIALIZATION_FAILURE_KINDS; j++) {
-            uint64_t val = stats[i].specialization.failure_kinds[j];
-            if (val) {
-                fprintf(out, "    opcode[%s].specialization.failure_kinds[%d] : %"
-                    PRIu64 "\n", _PyOpcode_OpName[i], j, val);
-            }
-        }
-        for (int j = 0; j < 256; j++) {
-            if (stats[i].pair_count[j]) {
-                fprintf(out, "opcode[%s].pair_count[%s] : %" PRIu64 "\n",
-                        _PyOpcode_OpName[i], _PyOpcode_OpName[j], stats[i].pair_count[j]);
-            }
-        }
-    }
-}
-#undef PRINT_STAT
-
-
-static void
-print_call_stats(FILE *out, CallStats *stats)
-{
-    fprintf(out, "Calls to PyEval_EvalDefault: %" PRIu64 "\n", stats->pyeval_calls);
-    fprintf(out, "Calls to Python functions inlined: %" PRIu64 "\n", stats->inlined_py_calls);
-    fprintf(out, "Frames pushed: %" PRIu64 "\n", stats->frames_pushed);
-    fprintf(out, "Frame objects created: %" PRIu64 "\n", stats->frame_objects_created);
-    for (int i = 0; i < EVAL_CALL_KINDS; i++) {
-        fprintf(out, "Calls via PyEval_EvalFrame[%d] : %" PRIu64 "\n", i, stats->eval_calls[i]);
-    }
-}
-
-static void
-print_object_stats(FILE *out, ObjectStats *stats)
-{
-    fprintf(out, "Object allocations from freelist: %" PRIu64 "\n", stats->from_freelist);
-    fprintf(out, "Object frees to freelist: %" PRIu64 "\n", stats->to_freelist);
-    fprintf(out, "Object allocations: %" PRIu64 "\n", stats->allocations);
-    fprintf(out, "Object allocations to 512 bytes: %" PRIu64 "\n", stats->allocations512);
-    fprintf(out, "Object allocations to 4 kbytes: %" PRIu64 "\n", stats->allocations4k);
-    fprintf(out, "Object allocations over 4 kbytes: %" PRIu64 "\n", stats->allocations_big);
-    fprintf(out, "Object frees: %" PRIu64 "\n", stats->frees);
-    fprintf(out, "Object inline values: %" PRIu64 "\n", stats->inline_values);
-    fprintf(out, "Object interpreter mortal increfs: %" PRIu64 "\n", stats->interpreter_increfs);
-    fprintf(out, "Object interpreter mortal decrefs: %" PRIu64 "\n", stats->interpreter_decrefs);
-    fprintf(out, "Object mortal increfs: %" PRIu64 "\n", stats->increfs);
-    fprintf(out, "Object mortal decrefs: %" PRIu64 "\n", stats->decrefs);
-    fprintf(out, "Object interpreter immortal increfs: %" PRIu64 "\n", stats->interpreter_immortal_increfs);
-    fprintf(out, "Object interpreter immortal decrefs: %" PRIu64 "\n", stats->interpreter_immortal_decrefs);
-    fprintf(out, "Object immortal increfs: %" PRIu64 "\n", stats->immortal_increfs);
-    fprintf(out, "Object immortal decrefs: %" PRIu64 "\n", stats->immortal_decrefs);
-    fprintf(out, "Object materialize dict (on request): %" PRIu64 "\n", stats->dict_materialized_on_request);
-    fprintf(out, "Object materialize dict (new key): %" PRIu64 "\n", stats->dict_materialized_new_key);
-    fprintf(out, "Object materialize dict (too big): %" PRIu64 "\n", stats->dict_materialized_too_big);
-    fprintf(out, "Object materialize dict (str subclass): %" PRIu64 "\n", stats->dict_materialized_str_subclass);
-    fprintf(out, "Object method cache hits: %" PRIu64 "\n", stats->type_cache_hits);
-    fprintf(out, "Object method cache misses: %" PRIu64 "\n", stats->type_cache_misses);
-    fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
-    fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
-    fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
-}
-
-static void
-print_gc_stats(FILE *out, GCStats *stats)
-{
-    for (int i = 0; i < NUM_GENERATIONS; i++) {
-        fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
-        fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
-        fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
-        fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable);
-        fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable);
-    }
-}
-
-#ifdef _Py_TIER2
-static void
-print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
-{
-    for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
-        fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
-    }
-}
-
-static void
-print_optimization_stats(FILE *out, OptimizationStats *stats)
-{
-    fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
-    fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
-    fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
-    fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
-    fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
-    fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
-    fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
-    fprintf(out, "Optimization trace too short: %" PRIu64 "\n", stats->trace_too_short);
-    fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
-    fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
-    fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence);
-    fprintf(out, "Optimization unknown callee: %" PRIu64 "\n", stats->unknown_callee);
-    fprintf(out, "Executors invalidated: %" PRIu64 "\n", stats->executors_invalidated);
-
-    print_histogram(out, "Trace length", stats->trace_length_hist);
-    print_histogram(out, "Trace run length", stats->trace_run_length_hist);
-    print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
-
-    fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts);
-    fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes);
-    fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n",
-            stats->optimizer_failure_reason_no_memory);
-    fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed);
-    fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys);
-    for (int i = 0; i <= MAX_UOP_ID; i++) {
-        if (stats->opcode[i].execution_count) {
-            fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
-        }
-        if (stats->opcode[i].miss) {
-            fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
-        }
-    }
-    for (int i = 0; i < 256; i++) {
-        if (stats->unsupported_opcode[i]) {
-            fprintf(
-                out,
-                "unsupported_opcode[%s].count : %" PRIu64 "\n",
-                _PyOpcode_OpName[i],
-                stats->unsupported_opcode[i]
-            );
-        }
-    }
-
-    for (int i = 1; i <= MAX_UOP_ID; i++){
-        for (int j = 1; j <= MAX_UOP_ID; j++) {
-            if (stats->opcode[i].pair_count[j]) {
-                fprintf(out, "uop[%s].pair_count[%s] : %" PRIu64 "\n",
-                        _PyOpcode_uop_name[i], _PyOpcode_uop_name[j], stats->opcode[i].pair_count[j]);
-            }
-        }
-    }
-    for (int i = 0; i < MAX_UOP_ID; i++) {
-        if (stats->error_in_opcode[i]) {
-            fprintf(
-                out,
-                "error_in_opcode[%s].count : %" PRIu64 "\n",
-                _PyUOpName(i),
-                stats->error_in_opcode[i]
-            );
-        }
-    }
-    fprintf(out, "JIT total memory size: %" PRIu64 "\n", stats->jit_total_memory_size);
-    fprintf(out, "JIT code size: %" PRIu64 "\n", stats->jit_code_size);
-    fprintf(out, "JIT trampoline size: %" PRIu64 "\n", stats->jit_trampoline_size);
-    fprintf(out, "JIT data size: %" PRIu64 "\n", stats->jit_data_size);
-    fprintf(out, "JIT padding size: %" PRIu64 "\n", stats->jit_padding_size);
-    fprintf(out, "JIT freed memory size: %" PRIu64 "\n", stats->jit_freed_memory_size);
-
-    print_histogram(out, "Trace total memory size", stats->trace_total_memory_hist);
-}
-#endif
-
-static void
-print_rare_event_stats(FILE *out, RareEventStats *stats)
-{
-    fprintf(out, "Rare event (set_class): %" PRIu64 "\n", stats->set_class);
-    fprintf(out, "Rare event (set_bases): %" PRIu64 "\n", stats->set_bases);
-    fprintf(out, "Rare event (set_eval_frame_func): %" PRIu64 "\n", stats->set_eval_frame_func);
-    fprintf(out, "Rare event (builtin_dict): %" PRIu64 "\n", stats->builtin_dict);
-    fprintf(out, "Rare event (func_modification): %" PRIu64 "\n", stats->func_modification);
-    fprintf(out, "Rare event (watched_dict_modification): %" PRIu64 "\n", stats->watched_dict_modification);
-    fprintf(out, "Rare event (watched_globals_modification): %" PRIu64 "\n", stats->watched_globals_modification);
-}
-
-static void
-print_stats(FILE *out, PyStats *stats)
-{
-    print_spec_stats(out, stats->opcode_stats);
-    print_call_stats(out, &stats->call_stats);
-    print_object_stats(out, &stats->object_stats);
-    print_gc_stats(out, stats->gc_stats);
-#ifdef _Py_TIER2
-    print_optimization_stats(out, &stats->optimization_stats);
-#endif
-    print_rare_event_stats(out, &stats->rare_event_stats);
-}
-
-void
-_Py_StatsOn(void)
-{
-    _Py_stats = &_Py_stats_struct;
-}
-
-void
-_Py_StatsOff(void)
-{
-    _Py_stats = NULL;
-}
-
-void
-_Py_StatsClear(void)
-{
-    memset(&_py_gc_stats, 0, sizeof(_py_gc_stats));
-    memset(&_Py_stats_struct, 0, sizeof(_Py_stats_struct));
-    _Py_stats_struct.gc_stats = _py_gc_stats;
-}
-
-static int
-mem_is_zero(unsigned char *ptr, size_t size)
-{
-    for (size_t i=0; i < size; i++) {
-        if (*ptr != 0) {
-            return 0;
-        }
-        ptr++;
-    }
-    return 1;
-}
-
-int
-_Py_PrintSpecializationStats(int to_file)
-{
-    PyStats *stats = &_Py_stats_struct;
-#define MEM_IS_ZERO(DATA) mem_is_zero((unsigned char*)DATA, sizeof(*(DATA)))
-    int is_zero = (
-        MEM_IS_ZERO(stats->gc_stats)  // is a pointer
-        && MEM_IS_ZERO(&stats->opcode_stats)
-        && MEM_IS_ZERO(&stats->call_stats)
-        && MEM_IS_ZERO(&stats->object_stats)
-    );
-#undef MEM_IS_ZERO
-    if (is_zero) {
-        // gh-108753: -X pystats command line was used, but then _stats_off()
-        // and _stats_clear() have been called: in this case, avoid printing
-        // useless "all zeros" statistics.
-        return 0;
-    }
-
-    FILE *out = stderr;
-    if (to_file) {
-        /* Write to a file instead of stderr. */
-# ifdef MS_WINDOWS
-        const char *dirname = "c:\\temp\\py_stats\\";
-# else
-        const char *dirname = "/tmp/py_stats/";
-# endif
-        /* Use random 160 bit number as file name,
-        * to avoid both accidental collisions and
-        * symlink attacks. */
-        unsigned char rand[20];
-        char hex_name[41];
-        _PyOS_URandomNonblock(rand, 20);
-        for (int i = 0; i < 20; i++) {
-            hex_name[2*i] = Py_hexdigits[rand[i]&15];
-            hex_name[2*i+1] = Py_hexdigits[(rand[i]>>4)&15];
-        }
-        hex_name[40] = '\0';
-        char buf[64];
-        assert(strlen(dirname) + 40 + strlen(".txt") < 64);
-        sprintf(buf, "%s%s.txt", dirname, hex_name);
-        FILE *fout = fopen(buf, "w");
-        if (fout) {
-            out = fout;
-        }
-    }
-    else {
-        fprintf(out, "Specialization stats:\n");
-    }
-    print_stats(out, stats);
-    if (out != stderr) {
-        fclose(out);
-    }
-    return 1;
-}
-
+#if Py_STATS
 #define SPECIALIZATION_FAIL(opcode, kind) \
 do { \
-    if (_Py_stats) { \
+    PyStats *s = _PyStats_GET(); \
+    if (s) { \
         int _kind = (kind); \
         assert(_kind < SPECIALIZATION_FAILURE_KINDS); \
-        _Py_stats->opcode_stats[opcode].specialization.failure_kinds[_kind]++; \
+        s->opcode_stats[opcode].specialization.failure_kinds[_kind]++; \
     } \
 } while (0)
-
-#endif  // Py_STATS
-
-
-#ifndef SPECIALIZATION_FAIL
+#else
 #  define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
-#endif
+#endif  // Py_STATS
 
 // Initialize warmup counters and optimize instructions. This cannot fail.
 void
index 59baca26793f6c226e32a48bf70ff0d91c848fc6..86dd1395cae4a8e385f7844eb730102d718f1ca2 100644 (file)
@@ -2281,7 +2281,9 @@ static PyObject *
 sys__stats_on_impl(PyObject *module)
 /*[clinic end generated code: output=aca53eafcbb4d9fe input=43b5bfe145299e55]*/
 {
-    _Py_StatsOn();
+    if (_Py_StatsOn() < 0) {
+        return NULL;
+    }
     Py_RETURN_NONE;
 }