]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-144888: Replace bloom filter linked lists with continuous arrays to optimize...
authorHai Zhu <haiizhu@outlook.com>
Mon, 16 Mar 2026 15:58:18 +0000 (23:58 +0800)
committerGitHub <noreply@github.com>
Mon, 16 Mar 2026 15:58:18 +0000 (15:58 +0000)
Include/internal/pycore_interp_structs.h
Include/internal/pycore_optimizer.h
InternalDocs/jit.md
Modules/_testinternalcapi.c
Objects/funcobject.c
Python/jit.c
Python/optimizer.c
Python/pylifecycle.c
Python/pystate.c

index 776fb9575c2365b028c8dbca47159f9fe9235fd5..e2008f8303e21be0acee8090c132e554f88c3253 100644 (file)
@@ -14,6 +14,7 @@ extern "C" {
 #include "pycore_structs.h"       // PyHamtObject
 #include "pycore_tstate.h"        // _PyThreadStateImpl
 #include "pycore_typedefs.h"      // _PyRuntimeState
+#include "pycore_uop.h"           // _PyBloomFilter
 
 #define CODE_MAX_WATCHERS 8
 #define CONTEXT_MAX_WATCHERS 8
@@ -972,7 +973,10 @@ struct _is {
 
     // Optimization configuration (thresholds and flags for JIT and interpreter)
     _PyOptimizationConfig opt_config;
-    struct _PyExecutorObject *executor_list_head;
+    _PyBloomFilter *executor_blooms;             // Contiguous bloom filter array
+    struct _PyExecutorObject **executor_ptrs;    // Corresponding executor pointer array
+    size_t executor_count;                       // Number of valid executors
+    size_t executor_capacity;                    // Array capacity
     struct _PyExecutorObject *executor_deletion_list_head;
     struct _PyExecutorObject *cold_executor;
     struct _PyExecutorObject *cold_dynamic_executor;
index c63f0167a0f64ab52c331ff2f68860d336d6174a..cea9fcce237301a24d6dd276928578e812fa1e68 100644 (file)
@@ -128,8 +128,8 @@ typedef struct {
     bool cold;
     uint8_t pending_deletion;
     int32_t index;           // Index of ENTER_EXECUTOR (if code isn't NULL, below).
-    _PyBloomFilter bloom;
-    _PyExecutorLinkListNode links;
+    int32_t bloom_array_idx;        // Index in interp->executor_blooms/executor_ptrs.
+    _PyExecutorLinkListNode links;  // Used by deletion list.
     PyCodeObject *code;  // Weak (NULL if no corresponding ENTER_EXECUTOR).
 } _PyVMData;
 
@@ -157,7 +157,7 @@ typedef struct _PyExecutorObject {
 // Export for '_opcode' shared extension (JIT compiler).
 PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset);
 
-void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
+int _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
 void _Py_ExecutorDetach(_PyExecutorObject *);
 void _Py_BloomFilter_Init(_PyBloomFilter *);
 void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
index 1740b22b85f77b9c69bba26a05f075c5d848cc14..decfccad2d8d37581aebe9f33287cf6598018b07 100644 (file)
@@ -78,8 +78,8 @@ and execution returns to the adaptive interpreter.
 ## Invalidating Executors
 
 In addition to being stored on the code object, each executor is also
-inserted into a list of all executors, which is stored in the interpreter
-state's `executor_list_head` field. This list is used when it is necessary
+inserted into contiguous arrays (`executor_blooms` and `executor_ptrs`)
+stored in the interpreter state. These arrays are used when it is necessary
 to invalidate executors because values they used in their construction may
 have changed.
 
index aa5911ef2fb449ecd9b4d4eb7c6a5803a74e8c9e..8c316b7c8ddda0492ce0307049ed4740a9aa6029 100644 (file)
@@ -278,10 +278,8 @@ get_jit_code_ranges(PyObject *self, PyObject *Py_UNUSED(args))
     if (interp == NULL) {
         return ranges;
     }
-    for (_PyExecutorObject *exec = interp->executor_list_head;
-         exec != NULL;
-         exec = exec->vm_data.links.next)
-    {
+    for (size_t i = 0; i < interp->executor_count; i++) {
+        _PyExecutorObject *exec = interp->executor_ptrs[i];
         if (exec->jit_code == NULL || exec->jit_size == 0) {
             continue;
         }
index fc32826fb3a861701a14bf07533375cdaf4d0aa3..a7a3d9c78ef3d0bbf94319904d6a29bd60030128 100644 (file)
@@ -12,7 +12,6 @@
 #include "pycore_setobject.h"     // _PySet_NextEntry()
 #include "pycore_stats.h"
 #include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
-#include "pycore_optimizer.h"     // _Py_Executors_InvalidateDependency
 
 static const char *
 func_event_name(PyFunction_WatchEvent event) {
index 3e0a0aa8bfcc81f9a0bde36dad85c7231b287691..4990c743224d3ca5f75de5bc2f327c813f34004e 100644 (file)
@@ -62,6 +62,23 @@ jit_error(const char *message)
 
 static size_t _Py_jit_shim_size = 0;
 
+static int
+address_in_executor_array(_PyExecutorObject **ptrs, size_t count, uintptr_t addr)
+{
+    for (size_t i = 0; i < count; i++) {
+        _PyExecutorObject *exec = ptrs[i];
+        if (exec->jit_code == NULL || exec->jit_size == 0) {
+            continue;
+        }
+        uintptr_t start = (uintptr_t)exec->jit_code;
+        uintptr_t end = start + exec->jit_size;
+        if (addr >= start && addr < end) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
 static int
 address_in_executor_list(_PyExecutorObject *head, uintptr_t addr)
 {
@@ -94,7 +111,7 @@ _PyJIT_AddressInJitCode(PyInterpreterState *interp, uintptr_t addr)
             return 1;
         }
     }
-    if (address_in_executor_list(interp->executor_list_head, addr)) {
+    if (address_in_executor_array(interp->executor_ptrs, interp->executor_count, addr)) {
         return 1;
     }
     if (address_in_executor_list(interp->executor_deletion_list_head, addr)) {
index 7315bb6b9f603d2ffdb4bb3061303fcd12302f09..a9e788d0dcb1d5fe139ac6b1866d326646730f3e 100644 (file)
@@ -1379,7 +1379,10 @@ make_executor_from_uops(_PyThreadStateImpl *tstate, _PyUOpInstruction *buffer, i
     // linking of executor. Otherwise, the GC tries to untrack a
     // still untracked object during dealloc.
     _PyObject_GC_TRACK(executor);
-    _Py_ExecutorInit(executor, dependencies);
+    if (_Py_ExecutorInit(executor, dependencies) < 0) {
+        Py_DECREF(executor);
+        return NULL;
+    }
 #ifdef Py_DEBUG
     char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
     int lltrace = 0;
@@ -1646,59 +1649,63 @@ bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes)
     return true;
 }
 
-static void
-link_executor(_PyExecutorObject *executor)
+static int
+link_executor(_PyExecutorObject *executor, const _PyBloomFilter *bloom)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    _PyExecutorLinkListNode *links = &executor->vm_data.links;
-    _PyExecutorObject *head = interp->executor_list_head;
-    if (head == NULL) {
-        interp->executor_list_head = executor;
-        links->previous = NULL;
-        links->next = NULL;
-    }
-    else {
-        assert(head->vm_data.links.previous == NULL);
-        links->previous = NULL;
-        links->next = head;
-        head->vm_data.links.previous = executor;
-        interp->executor_list_head = executor;
-    }
-    /* executor_list_head must be first in list */
-    assert(interp->executor_list_head->vm_data.links.previous == NULL);
+    if (interp->executor_count == interp->executor_capacity) {
+        size_t new_cap = interp->executor_capacity ? interp->executor_capacity * 2 : 64;
+        _PyBloomFilter *new_blooms = PyMem_Realloc(
+            interp->executor_blooms, new_cap * sizeof(_PyBloomFilter));
+        if (new_blooms == NULL) {
+            return -1;
+        }
+        _PyExecutorObject **new_ptrs = PyMem_Realloc(
+            interp->executor_ptrs, new_cap * sizeof(_PyExecutorObject *));
+        if (new_ptrs == NULL) {
+            /* Revert blooms realloc — the old pointer may have been freed by
+             * a successful realloc, but new_blooms is the valid pointer. */
+            interp->executor_blooms = new_blooms;
+            return -1;
+        }
+        interp->executor_blooms = new_blooms;
+        interp->executor_ptrs = new_ptrs;
+        interp->executor_capacity = new_cap;
+    }
+    size_t idx = interp->executor_count++;
+    interp->executor_blooms[idx] = *bloom;
+    interp->executor_ptrs[idx] = executor;
+    executor->vm_data.bloom_array_idx = (int32_t)idx;
+    return 0;
 }
 
 static void
 unlink_executor(_PyExecutorObject *executor)
 {
-    _PyExecutorLinkListNode *links = &executor->vm_data.links;
-    _PyExecutorObject *next = links->next;
-    _PyExecutorObject *prev = links->previous;
-    if (next != NULL) {
-        next->vm_data.links.previous = prev;
-    }
-    if (prev != NULL) {
-        prev->vm_data.links.next = next;
-    }
-    else {
-        // prev == NULL implies that executor is the list head
-        PyInterpreterState *interp = PyInterpreterState_Get();
-        assert(interp->executor_list_head == executor);
-        interp->executor_list_head = next;
+    PyInterpreterState *interp = PyInterpreterState_Get();
+    int32_t idx = executor->vm_data.bloom_array_idx;
+    assert(idx >= 0 && (size_t)idx < interp->executor_count);
+    size_t last = --interp->executor_count;
+    if ((size_t)idx != last) {
+        /* Swap-remove: move the last element into the vacated slot */
+        interp->executor_blooms[idx] = interp->executor_blooms[last];
+        interp->executor_ptrs[idx] = interp->executor_ptrs[last];
+        interp->executor_ptrs[idx]->vm_data.bloom_array_idx = idx;
     }
+    executor->vm_data.bloom_array_idx = -1;
 }
 
 /* This must be called by optimizers before using the executor */
-void
+int
 _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_set)
 {
     executor->vm_data.valid = true;
     executor->vm_data.pending_deletion = 0;
     executor->vm_data.code = NULL;
-    for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) {
-        executor->vm_data.bloom.bits[i] = dependency_set->bits[i];
+    if (link_executor(executor, dependency_set) < 0) {
+        return -1;
     }
-    link_executor(executor);
+    return 0;
 }
 
 static _PyExecutorObject *
@@ -1809,11 +1816,15 @@ void
 _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj)
 {
     assert(executor->vm_data.valid);
-    _Py_BloomFilter_Add(&executor->vm_data.bloom, obj);
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    int32_t idx = executor->vm_data.bloom_array_idx;
+    assert(idx >= 0 && (size_t)idx < interp->executor_count);
+    _Py_BloomFilter_Add(&interp->executor_blooms[idx], obj);
 }
 
 /* Invalidate all executors that depend on `obj`
- * May cause other executors to be invalidated as well
+ * May cause other executors to be invalidated as well.
+ * Uses contiguous bloom filter array for cache-friendly scanning.
  */
 void
 _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation)
@@ -1821,23 +1832,20 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is
     _PyBloomFilter obj_filter;
     _Py_BloomFilter_Init(&obj_filter);
     _Py_BloomFilter_Add(&obj_filter, obj);
-    /* Walk the list of executors */
-    /* TO DO -- Use a tree to avoid traversing as many objects */
+    /* Scan contiguous bloom filter array */
     PyObject *invalidate = PyList_New(0);
     if (invalidate == NULL) {
         goto error;
     }
     /* Clearing an executor can clear others, so we need to make a list of
      * executors to invalidate first */
-    for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
-        assert(exec->vm_data.valid);
-        _PyExecutorObject *next = exec->vm_data.links.next;
-        if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter) &&
-            PyList_Append(invalidate, (PyObject *)exec))
+    for (size_t i = 0; i < interp->executor_count; i++) {
+        assert(interp->executor_ptrs[i]->vm_data.valid);
+        if (bloom_filter_may_contain(&interp->executor_blooms[i], &obj_filter) &&
+            PyList_Append(invalidate, (PyObject *)interp->executor_ptrs[i]))
         {
             goto error;
         }
-        exec = next;
     }
     for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
         PyObject *exec = PyList_GET_ITEM(invalidate, i);
@@ -1859,8 +1867,9 @@ error:
 void
 _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
 {
-    while (interp->executor_list_head) {
-        _PyExecutorObject *executor = interp->executor_list_head;
+    while (interp->executor_count > 0) {
+        /* Invalidate from the end to avoid repeated swap-remove shifts */
+        _PyExecutorObject *executor = interp->executor_ptrs[interp->executor_count - 1];
         assert(executor->vm_data.valid);
         if (executor->vm_data.code) {
             // Clear the entire code object so its co_executors array be freed:
@@ -1878,8 +1887,7 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
 void
 _Py_Executors_InvalidateCold(PyInterpreterState *interp)
 {
-    /* Walk the list of executors */
-    /* TO DO -- Use a tree to avoid traversing as many objects */
+    /* Scan contiguous executor array */
     PyObject *invalidate = PyList_New(0);
     if (invalidate == NULL) {
         goto error;
@@ -1887,9 +1895,9 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp)
 
     /* Clearing an executor can deallocate others, so we need to make a list of
      * executors to invalidate first */
-    for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
+    for (size_t i = 0; i < interp->executor_count; i++) {
+        _PyExecutorObject *exec = interp->executor_ptrs[i];
         assert(exec->vm_data.valid);
-        _PyExecutorObject *next = exec->vm_data.links.next;
 
         if (exec->vm_data.cold && PyList_Append(invalidate, (PyObject *)exec) < 0) {
             goto error;
@@ -1897,8 +1905,6 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp)
         else {
             exec->vm_data.cold = true;
         }
-
-        exec = next;
     }
     for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
         PyObject *exec = PyList_GET_ITEM(invalidate, i);
@@ -2142,9 +2148,8 @@ _PyDumpExecutors(FILE *out)
     fprintf(out, "    rankdir = \"LR\"\n\n");
     fprintf(out, "    node [colorscheme=greys9]\n");
     PyInterpreterState *interp = PyInterpreterState_Get();
-    for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
-        executor_to_gv(exec, out);
-        exec = exec->vm_data.links.next;
+    for (size_t i = 0; i < interp->executor_count; i++) {
+        executor_to_gv(interp->executor_ptrs[i], out);
     }
     fprintf(out, "}\n\n");
     return 0;
index 2b8e9a02cb6184ad32fb55c9b25bfed20a24aa39..68052a91d6a1f152905fc01d0570bd66073411c0 100644 (file)
@@ -1761,6 +1761,12 @@ finalize_modules(PyThreadState *tstate)
     interp->compiling = false;
 #ifdef _Py_TIER2
     _Py_Executors_InvalidateAll(interp, 0);
+    PyMem_Free(interp->executor_blooms);
+    PyMem_Free(interp->executor_ptrs);
+    interp->executor_blooms = NULL;
+    interp->executor_ptrs = NULL;
+    interp->executor_count = 0;
+    interp->executor_capacity = 0;
 #endif
 
     // Stop watching __builtin__ modifications
index 17b8430b19c188ba10c1502aa0dad6ac28a84dc4..fcb73b4dcefe1d0f58b31df50a55269f3ff10582 100644 (file)
@@ -597,7 +597,10 @@ init_interpreter(PyInterpreterState *interp,
     interp->_code_object_generation = 0;
     interp->jit = false;
     interp->compiling = false;
-    interp->executor_list_head = NULL;
+    interp->executor_blooms = NULL;
+    interp->executor_ptrs = NULL;
+    interp->executor_count = 0;
+    interp->executor_capacity = 0;
     interp->executor_deletion_list_head = NULL;
     interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD;