From e4058d7cb115286c3c53e0a0be2a7b60af46e073 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 18 Dec 2025 16:43:44 +0000 Subject: [PATCH] GH-142513: Reimplement executor management (GH-142931) * Invalidating an executor does not cause arbitrary code to run * Executors are only freed at safe points --- Include/internal/pycore_interp_structs.h | 1 - Include/internal/pycore_optimizer.h | 7 - Python/ceval_gil.c | 6 + Python/optimizer.c | 181 +++++++++++------------ Python/pystate.c | 1 - 5 files changed, 90 insertions(+), 106 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 6b3d5711b929..818c4f159591 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -947,7 +947,6 @@ struct _is { struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *cold_executor; struct _PyExecutorObject *cold_dynamic_executor; - int executor_deletion_list_remaining_capacity; size_t executor_creation_counter; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 295d4909e147..3ee62f172832 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -25,7 +25,6 @@ typedef struct { uint8_t opcode; uint8_t oparg; uint8_t valid; - uint8_t linked; uint8_t chain_depth; // Must be big enough for MAX_CHAIN_DEPTH - 1. bool warm; int32_t index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). @@ -55,11 +54,6 @@ typedef struct _PyExecutorObject { _PyExitData exits[1]; } _PyExecutorObject; -/* If pending deletion list gets large enough, then scan, - * and free any executors that aren't executing - * i.e. any that aren't a thread's current_executor. */ -#define EXECUTOR_DELETE_LIST_MAX 100 - // Export for '_opcode' shared extension (JIT compiler). PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset); @@ -80,7 +74,6 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); #else # define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) # define _Py_Executors_InvalidateAll(A, B) ((void)0) -# define _Py_Executors_InvalidateCold(A) ((void)0) #endif diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index f6ada3892f80..88cc66e97f34 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1397,13 +1397,19 @@ _Py_HandlePending(PyThreadState *tstate) if ((breaker & _PY_GC_SCHEDULED_BIT) != 0) { _Py_unset_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT); _Py_RunGC(tstate); +#ifdef _Py_TIER2 + _Py_ClearExecutorDeletionList(tstate->interp); +#endif } +#ifdef _Py_TIER2 if ((breaker & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) != 0) { _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); _Py_Executors_InvalidateCold(tstate->interp); tstate->interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD; + _Py_ClearExecutorDeletionList(tstate->interp); } +#endif /* GIL drop request */ if ((breaker & _PY_GIL_DROP_REQUEST_BIT) != 0) { diff --git a/Python/optimizer.c b/Python/optimizer.c index fc984a5374a5..1e905850e3de 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -246,8 +246,6 @@ get_oparg(PyObject *self, PyObject *Py_UNUSED(ignored)) ///////////////////// Experimental UOp Optimizer ///////////////////// static int executor_clear(PyObject *executor); -static void unlink_executor(_PyExecutorObject *executor); - void _PyExecutor_Free(_PyExecutorObject *self) @@ -258,63 +256,76 @@ _PyExecutor_Free(_PyExecutorObject *self) PyObject_GC_Del(self); } +static void executor_invalidate(PyObject *op); + +static void +executor_clear_exits(_PyExecutorObject *executor) +{ + _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); + _PyExecutorObject *cold_dynamic = _PyExecutor_GetColdDynamicExecutor(); + for (uint32_t i = 0; i < executor->exit_count; i++) { + _PyExitData *exit = &executor->exits[i]; + exit->temperature = initial_unreachable_backoff_counter(); + _PyExecutorObject *old = executor->exits[i].executor; + exit->executor = exit->is_dynamic ? cold_dynamic : cold; + Py_DECREF(old); + } +} + + void _Py_ClearExecutorDeletionList(PyInterpreterState *interp) { + if (interp->executor_deletion_list_head == NULL) { + return; + } _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); PyThreadState* ts = PyInterpreterState_ThreadHead(interp); + while (ts) { + _PyExecutorObject *current = (_PyExecutorObject *)ts->current_executor; + Py_XINCREF(current); + ts = ts->next; + } HEAD_UNLOCK(runtime); + _PyExecutorObject *keep_list = NULL; + do { + _PyExecutorObject *exec = interp->executor_deletion_list_head; + interp->executor_deletion_list_head = exec->vm_data.links.next; + if (Py_REFCNT(exec) == 0) { + _PyExecutor_Free(exec); + } else { + exec->vm_data.links.next = keep_list; + keep_list = exec; + } + } while (interp->executor_deletion_list_head != NULL); + interp->executor_deletion_list_head = keep_list; + HEAD_LOCK(runtime); + ts = PyInterpreterState_ThreadHead(interp); while (ts) { _PyExecutorObject *current = (_PyExecutorObject *)ts->current_executor; if (current != NULL) { - /* Anything in this list will be unlinked, so we can reuse the - * linked field as a reachability marker. */ - current->vm_data.linked = 1; + _Py_DECREF_NO_DEALLOC((PyObject *)current); } - HEAD_LOCK(runtime); - ts = PyThreadState_Next(ts); - HEAD_UNLOCK(runtime); - } - _PyExecutorObject **prev_to_next_ptr = &interp->executor_deletion_list_head; - _PyExecutorObject *exec = *prev_to_next_ptr; - while (exec != NULL) { - if (exec->vm_data.linked) { - // This executor is currently executing - exec->vm_data.linked = 0; - prev_to_next_ptr = &exec->vm_data.links.next; - } - else { - *prev_to_next_ptr = exec->vm_data.links.next; - _PyExecutor_Free(exec); - } - exec = *prev_to_next_ptr; + ts = ts->next; } - interp->executor_deletion_list_remaining_capacity = EXECUTOR_DELETE_LIST_MAX; + HEAD_UNLOCK(runtime); } static void add_to_pending_deletion_list(_PyExecutorObject *self) { PyInterpreterState *interp = PyInterpreterState_Get(); + self->vm_data.links.previous = NULL; self->vm_data.links.next = interp->executor_deletion_list_head; interp->executor_deletion_list_head = self; - if (interp->executor_deletion_list_remaining_capacity > 0) { - interp->executor_deletion_list_remaining_capacity--; - } - else { - _Py_ClearExecutorDeletionList(interp); - } } static void uop_dealloc(PyObject *op) { _PyExecutorObject *self = _PyExecutorObject_CAST(op); - _PyObject_GC_UNTRACK(self); + executor_invalidate(op); assert(self->vm_data.code == NULL); - unlink_executor(self); - // Once unlinked it becomes impossible to invalidate an executor, so do it here. - self->vm_data.valid = 0; add_to_pending_deletion_list(self); } @@ -1621,7 +1632,6 @@ link_executor(_PyExecutorObject *executor) head->vm_data.links.previous = executor; interp->executor_list_head = executor; } - executor->vm_data.linked = true; /* executor_list_head must be first in list */ assert(interp->executor_list_head->vm_data.links.previous == NULL); } @@ -1629,11 +1639,7 @@ link_executor(_PyExecutorObject *executor) static void unlink_executor(_PyExecutorObject *executor) { - if (!executor->vm_data.linked) { - return; - } _PyExecutorLinkListNode *links = &executor->vm_data.links; - assert(executor->vm_data.valid); _PyExecutorObject *next = links->next; _PyExecutorObject *prev = links->previous; if (next != NULL) { @@ -1648,7 +1654,6 @@ unlink_executor(_PyExecutorObject *executor) assert(interp->executor_list_head == executor); interp->executor_list_head = next; } - executor->vm_data.linked = false; } /* This must be called by optimizers before using the executor */ @@ -1662,61 +1667,47 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s link_executor(executor); } -_PyExecutorObject * -_PyExecutor_GetColdExecutor(void) +static _PyExecutorObject * +make_cold_executor(uint16_t opcode) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->cold_executor != NULL) { - return interp->cold_executor; - } _PyExecutorObject *cold = allocate_executor(0, 1); if (cold == NULL) { Py_FatalError("Cannot allocate core JIT code"); } - ((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT_r00; -#ifdef _Py_JIT - cold->jit_code = NULL; - cold->jit_size = 0; + ((_PyUOpInstruction *)cold->trace)->opcode = opcode; // This is initialized to true so we can prevent the executor // from being immediately detected as cold and invalidated. cold->vm_data.warm = true; +#ifdef _Py_JIT + cold->jit_code = NULL; + cold->jit_size = 0; if (_PyJIT_Compile(cold, cold->trace, 1)) { Py_DECREF(cold); Py_FatalError("Cannot allocate core JIT code"); } #endif _Py_SetImmortal((PyObject *)cold); - interp->cold_executor = cold; return cold; } _PyExecutorObject * -_PyExecutor_GetColdDynamicExecutor(void) +_PyExecutor_GetColdExecutor(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->cold_dynamic_executor != NULL) { - assert(interp->cold_dynamic_executor->trace[0].opcode == _COLD_DYNAMIC_EXIT_r00); - return interp->cold_dynamic_executor; - } - _PyExecutorObject *cold = allocate_executor(0, 1); - if (cold == NULL) { - Py_FatalError("Cannot allocate core JIT code"); + if (interp->cold_executor == NULL) { + return interp->cold_executor = make_cold_executor(_COLD_EXIT_r00);; } - ((_PyUOpInstruction *)cold->trace)->opcode = _COLD_DYNAMIC_EXIT_r00; -#ifdef _Py_JIT - cold->jit_code = NULL; - cold->jit_size = 0; - // This is initialized to true so we can prevent the executor - // from being immediately detected as cold and invalidated. - cold->vm_data.warm = true; - if (_PyJIT_Compile(cold, cold->trace, 1)) { - Py_DECREF(cold); - Py_FatalError("Cannot allocate core JIT code"); + return interp->cold_executor; +} + +_PyExecutorObject * +_PyExecutor_GetColdDynamicExecutor(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->cold_dynamic_executor == NULL) { + interp->cold_dynamic_executor = make_cold_executor(_COLD_DYNAMIC_EXIT_r00); } -#endif - _Py_SetImmortal((PyObject *)cold); - interp->cold_dynamic_executor = cold; - return cold; + return interp->cold_dynamic_executor; } void @@ -1755,32 +1746,28 @@ _Py_ExecutorDetach(_PyExecutorObject *executor) Py_DECREF(executor); } -static int -executor_clear(PyObject *op) +/* Executors can be invalidated at any time, + even with a stop-the-world lock held. + Consequently it must not run arbitrary code, + including Py_DECREF with a non-executor. */ +static void +executor_invalidate(PyObject *op) { _PyExecutorObject *executor = _PyExecutorObject_CAST(op); if (!executor->vm_data.valid) { - return 0; + return; } - assert(executor->vm_data.valid == 1); - unlink_executor(executor); executor->vm_data.valid = 0; - - /* It is possible for an executor to form a reference - * cycle with itself, so decref'ing a side exit could - * free the executor unless we hold a strong reference to it - */ - _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); - Py_INCREF(executor); - for (uint32_t i = 0; i < executor->exit_count; i++) { - executor->exits[i].temperature = initial_unreachable_backoff_counter(); - _PyExecutorObject *e = executor->exits[i].executor; - executor->exits[i].executor = cold; - Py_DECREF(e); - } + unlink_executor(executor); + executor_clear_exits(executor); _Py_ExecutorDetach(executor); - Py_DECREF(executor); - return 0; + _PyObject_GC_UNTRACK(op); +} + +static int +executor_clear(PyObject *op) +{ + executor_invalidate(op); } void @@ -1805,7 +1792,7 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is if (invalidate == NULL) { goto error; } - /* Clearing an executor can deallocate others, so we need to make a list of + /* Clearing an executor can clear others, so we need to make a list of * executors to invalidate first */ for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { assert(exec->vm_data.valid); @@ -1819,7 +1806,7 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is } for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { PyObject *exec = PyList_GET_ITEM(invalidate, i); - executor_clear(exec); + executor_invalidate(exec); if (is_invalidation) { OPT_STAT_INC(executors_invalidated); } @@ -1851,13 +1838,13 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) { while (interp->executor_list_head) { _PyExecutorObject *executor = interp->executor_list_head; - assert(executor->vm_data.valid == 1 && executor->vm_data.linked == 1); + assert(executor->vm_data.valid); if (executor->vm_data.code) { // Clear the entire code object so its co_executors array be freed: _PyCode_Clear_Executors(executor->vm_data.code); } else { - executor_clear((PyObject *)executor); + executor_invalidate((PyObject *)executor); } if (is_invalidation) { OPT_STAT_INC(executors_invalidated); @@ -1892,7 +1879,7 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) } for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { PyObject *exec = PyList_GET_ITEM(invalidate, i); - executor_clear(exec); + executor_invalidate(exec); } Py_DECREF(invalidate); return; diff --git a/Python/pystate.c b/Python/pystate.c index 4bf89a234266..7ea8ef91f107 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -570,7 +570,6 @@ init_interpreter(PyInterpreterState *interp, interp->compiling = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; - interp->executor_deletion_list_remaining_capacity = 0; interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD; if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ -- 2.47.3