* Track the current executor, not the previous one, on the thread-state.
* Batch executors for deallocation to avoid having to constantly incref executors; this is an ad-hoc form of deferred reference counting.
/* The thread's exception stack entry. (Always the last entry.) */
_PyErr_StackItem exc_state;
- PyObject *previous_executor;
+ PyObject *current_executor;
uint64_t dict_global_version;
PyObject *common_consts[NUM_COMMON_CONSTANTS];
bool jit;
struct _PyExecutorObject *executor_list_head;
+ struct _PyExecutorObject *executor_deletion_list_head;
+ int executor_deletion_list_remaining_capacity;
size_t trace_run_counter;
_rare_events rare_events;
PyDict_WatchCallback builtins_dict_watcher;
[INSTRUMENTED_RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
[INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, 0 },
+ [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG },
[IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG },
[JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[JUMP_BACKWARD_JIT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
typedef struct {
uint32_t target;
_Py_BackoffCounter temperature;
- const struct _PyExecutorObject *executor;
+ struct _PyExecutorObject *executor;
} _PyExitData;
typedef struct _PyExecutorObject {
_PyExitData exits[1];
} _PyExecutorObject;
+/* If pending deletion list gets large enough, then scan,
+ * and free any executors that aren't executing
+ * i.e. any that aren't a thread's current_executor. */
+#define EXECUTOR_DELETE_LIST_MAX 100
// Export for '_opcode' shared extension (JIT compiler).
PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset);
}
PyAPI_FUNC(int) _PyDumpExecutors(FILE *out);
+#ifdef _Py_TIER2
+extern void _Py_ClearExecutorDeletionList(PyInterpreterState *interp);
+#endif
#ifdef __cplusplus
}
[_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_CHECK_FUNCTION] = HAS_DEOPT_FLAG,
- [_START_EXECUTOR] = HAS_ESCAPES_FLAG,
+ [_START_EXECUTOR] = 0,
[_MAKE_WARM] = 0,
[_FATAL_ERROR] = 0,
[_DEOPT] = 0,
tstate->current_frame = frame->previous;
assert(!_PyErr_Occurred(tstate));
PyObject *result = PyStackRef_AsPyObjectSteal(retval);
+#if !Py_TAIL_CALL_INTERP
+ assert(frame == &entry.frame);
+#endif
+#ifdef _Py_TIER2
+ _PyStackRef executor = frame->localsplus[0];
+ assert(tstate->current_executor == NULL);
+ if (!PyStackRef_IsNull(executor)) {
+ tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor);
+ PyStackRef_CLOSE(executor);
+ }
+#endif
LLTRACE_RESUME_FRAME();
return result;
}
}
else {
this_instr[1].counter = initial_jump_backoff_counter();
- assert(tstate->previous_executor == NULL);
- tstate->previous_executor = Py_None;
+ assert(tstate->current_executor == NULL);
GOTO_TIER_TWO(executor);
}
}
assert(executor->vm_data.index == INSTR_OFFSET() - 1);
assert(executor->vm_data.code == code);
assert(executor->vm_data.valid);
- assert(tstate->previous_executor == NULL);
+ assert(tstate->current_executor == NULL);
/* If the eval breaker is set then stay in tier 1.
* This avoids any potentially infinite loops
* involving _RESUME_CHECK */
}
DISPATCH_GOTO();
}
- tstate->previous_executor = Py_None;
- Py_INCREF(executor);
GOTO_TIER_TWO(executor);
#else
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
exit->temperature = initial_temperature_backoff_counter();
Py_CLEAR(exit->executor);
}
- tstate->previous_executor = (PyObject *)current_executor;
if (exit->executor == NULL) {
_Py_BackoffCounter temperature = exit->temperature;
if (!backoff_counter_triggers(temperature)) {
}
exit->executor = executor;
}
- Py_INCREF(exit->executor);
GOTO_TIER_TWO(exit->executor);
}
}
tier2 op(_START_EXECUTOR, (executor/4 --)) {
- Py_CLEAR(tstate->previous_executor);
#ifndef _Py_JIT
current_executor = (_PyExecutorObject*)executor;
#endif
}
tier2 op(_DEOPT, (--)) {
- tstate->previous_executor = (PyObject *)current_executor;
GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET());
}
tier2 op(_ERROR_POP_N, (target/2 --)) {
- tstate->previous_executor = (PyObject *)current_executor;
assert(oparg == 0);
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
SYNC_SP();
if (frame->owner == FRAME_OWNED_BY_INTERPRETER) {
/* Restore previous frame and exit */
tstate->current_frame = frame->previous;
+#if !Py_TAIL_CALL_INTERP
+ assert(frame == &entry.frame);
+#endif
+#ifdef _Py_TIER2
+ _PyStackRef executor = frame->localsplus[0];
+ assert(tstate->current_executor == NULL);
+ if (!PyStackRef_IsNull(executor)) {
+ tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor);
+ PyStackRef_CLOSE(executor);
+ }
+#endif
return NULL;
}
next_instr = frame->instr_ptr;
#define DONT_SLP_VECTORIZE
#endif
+typedef struct {
+ _PyInterpreterFrame frame;
+ _PyStackRef stack[1];
+} _PyEntryFrame;
+
PyObject* _Py_HOT_FUNCTION DONT_SLP_VECTORIZE
_PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag)
{
int oparg; /* Current opcode argument, if any */
assert(tstate->current_frame == NULL || tstate->current_frame->stackpointer != NULL);
#endif
- _PyInterpreterFrame entry_frame;
+ _PyEntryFrame entry;
if (_Py_EnterRecursiveCallTstate(tstate, "")) {
assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
* These are cached values from the frame and code object. */
_Py_CODEUNIT *next_instr;
_PyStackRef *stack_pointer;
- entry_frame.localsplus[0] = PyStackRef_NULL;
+ entry.stack[0] = PyStackRef_NULL;
#ifdef Py_STACKREF_DEBUG
- entry_frame.f_funcobj = PyStackRef_None;
+ entry.frame.f_funcobj = PyStackRef_None;
#elif defined(Py_DEBUG)
/* Set these to invalid but identifiable values for debugging. */
- entry_frame.f_funcobj = (_PyStackRef){.bits = 0xaaa0};
- entry_frame.f_locals = (PyObject*)0xaaa1;
- entry_frame.frame_obj = (PyFrameObject*)0xaaa2;
- entry_frame.f_globals = (PyObject*)0xaaa3;
- entry_frame.f_builtins = (PyObject*)0xaaa4;
+ entry.frame.f_funcobj = (_PyStackRef){.bits = 0xaaa0};
+ entry.frame.f_locals = (PyObject*)0xaaa1;
+ entry.frame.frame_obj = (PyFrameObject*)0xaaa2;
+ entry.frame.f_globals = (PyObject*)0xaaa3;
+ entry.frame.f_builtins = (PyObject*)0xaaa4;
#endif
- entry_frame.f_executable = PyStackRef_None;
- entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
- entry_frame.stackpointer = entry_frame.localsplus;
- entry_frame.owner = FRAME_OWNED_BY_INTERPRETER;
- entry_frame.visited = 0;
- entry_frame.return_offset = 0;
+ entry.frame.f_executable = PyStackRef_None;
+ entry.frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
+ entry.frame.stackpointer = entry.stack;
+ entry.frame.owner = FRAME_OWNED_BY_INTERPRETER;
+ entry.frame.visited = 0;
+ entry.frame.return_offset = 0;
#ifdef Py_DEBUG
- entry_frame.lltrace = 0;
+ entry.frame.lltrace = 0;
#endif
/* Push frame */
- entry_frame.previous = tstate->current_frame;
- frame->previous = &entry_frame;
+ entry.frame.previous = tstate->current_frame;
+ frame->previous = &entry.frame;
tstate->current_frame = frame;
+ entry.frame.localsplus[0] = PyStackRef_NULL;
+#ifdef _Py_TIER2
+ if (tstate->current_executor != NULL) {
+ entry.frame.localsplus[0] = PyStackRef_FromPyObjectNew(tstate->current_executor);
+ tstate->current_executor = NULL;
+ }
+#endif
/* support for generator.throw() */
if (throwflag) {
stack_pointer = _PyFrame_GetStackPointer(frame);
#if Py_TAIL_CALL_INTERP
# if Py_STATS
- return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0, lastopcode);
+ return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0, lastopcode);
# else
- return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0);
+ return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0);
# endif
#else
goto error;
do { \
OPT_STAT_INC(traces_executed); \
_PyExecutorObject *_executor = (EXECUTOR); \
+ tstate->current_executor = (PyObject *)_executor; \
jit_func jitted = _executor->jit_code; \
/* Keep the shim frame alive via the executor: */ \
Py_INCREF(_executor); \
next_instr = jitted(frame, stack_pointer, tstate); \
Py_DECREF(_executor); \
- Py_CLEAR(tstate->previous_executor); \
frame = tstate->current_frame; \
stack_pointer = _PyFrame_GetStackPointer(frame); \
if (next_instr == NULL) { \
#define GOTO_TIER_TWO(EXECUTOR) \
do { \
OPT_STAT_INC(traces_executed); \
- next_uop = (EXECUTOR)->trace; \
+ _PyExecutorObject *_executor = (EXECUTOR); \
+ tstate->current_executor = (PyObject *)_executor; \
+ next_uop = _executor->trace; \
assert(next_uop->opcode == _START_EXECUTOR); \
goto enter_tier_two; \
} while (0)
#define GOTO_TIER_ONE(TARGET) \
do \
{ \
+ tstate->current_executor = NULL; \
next_instr = (TARGET); \
+ assert(tstate->current_executor == NULL); \
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
_PyFrame_SetStackPointer(frame, stack_pointer); \
- Py_CLEAR(tstate->previous_executor); \
stack_pointer = _PyFrame_GetStackPointer(frame); \
if (next_instr == NULL) \
{ \
Py_CLEAR(exit->executor);
stack_pointer = _PyFrame_GetStackPointer(frame);
}
- tstate->previous_executor = (PyObject *)current_executor;
if (exit->executor == NULL) {
_Py_BackoffCounter temperature = exit->temperature;
if (!backoff_counter_triggers(temperature)) {
}
exit->executor = executor;
}
- Py_INCREF(exit->executor);
GOTO_TIER_TWO(exit->executor);
break;
}
case _START_EXECUTOR: {
PyObject *executor = (PyObject *)CURRENT_OPERAND0();
- _PyFrame_SetStackPointer(frame, stack_pointer);
- Py_CLEAR(tstate->previous_executor);
- stack_pointer = _PyFrame_GetStackPointer(frame);
#ifndef _Py_JIT
current_executor = (_PyExecutorObject*)executor;
#endif
}
case _DEOPT: {
- tstate->previous_executor = (PyObject *)current_executor;
GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET());
break;
}
case _ERROR_POP_N: {
oparg = CURRENT_OPARG();
uint32_t target = (uint32_t)CURRENT_OPERAND0();
- tstate->previous_executor = (PyObject *)current_executor;
assert(oparg == 0);
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
GOTO_TIER_ONE(NULL);
assert(executor->vm_data.index == INSTR_OFFSET() - 1);
assert(executor->vm_data.code == code);
assert(executor->vm_data.valid);
- assert(tstate->previous_executor == NULL);
+ assert(tstate->current_executor == NULL);
if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
opcode = executor->vm_data.opcode;
oparg = (oparg & ~255) | executor->vm_data.oparg;
}
DISPATCH_GOTO();
}
- tstate->previous_executor = Py_None;
- Py_INCREF(executor);
GOTO_TIER_TWO(executor);
#else
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
tstate->current_frame = frame->previous;
assert(!_PyErr_Occurred(tstate));
PyObject *result = PyStackRef_AsPyObjectSteal(retval);
+ #if !Py_TAIL_CALL_INTERP
+ assert(frame == &entry.frame);
+ #endif
+ #ifdef _Py_TIER2
+ _PyStackRef executor = frame->localsplus[0];
+ assert(tstate->current_executor == NULL);
+ if (!PyStackRef_IsNull(executor)) {
+ tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor);
+ stack_pointer += -1;
+ assert(WITHIN_STACK_BOUNDS());
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ PyStackRef_CLOSE(executor);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ stack_pointer += 1;
+ }
+ #endif
LLTRACE_RESUME_FRAME();
return result;
}
_PyFrame_SetStackPointer(frame, stack_pointer);
this_instr[1].counter = initial_jump_backoff_counter();
stack_pointer = _PyFrame_GetStackPointer(frame);
- assert(tstate->previous_executor == NULL);
- tstate->previous_executor = Py_None;
+ assert(tstate->current_executor == NULL);
GOTO_TIER_TWO(executor);
}
}
frame->return_offset = 0;
if (frame->owner == FRAME_OWNED_BY_INTERPRETER) {
tstate->current_frame = frame->previous;
+ #if !Py_TAIL_CALL_INTERP
+ assert(frame == &entry.frame);
+ #endif
+ #ifdef _Py_TIER2
+ _PyStackRef executor = frame->localsplus[0];
+ assert(tstate->current_executor == NULL);
+ if (!PyStackRef_IsNull(executor)) {
+ tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor);
+ PyStackRef_CLOSE(executor);
+ }
+ #endif
return NULL;
}
next_instr = frame->instr_ptr;
static int executor_clear(PyObject *executor);
static void unlink_executor(_PyExecutorObject *executor);
+
+static void
+free_executor(_PyExecutorObject *self)
+{
+#ifdef _Py_JIT
+ _PyJIT_Free(self);
+#endif
+ PyObject_GC_Del(self);
+}
+
+void
+_Py_ClearExecutorDeletionList(PyInterpreterState *interp)
+{
+ _PyRuntimeState *runtime = &_PyRuntime;
+ HEAD_LOCK(runtime);
+ PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
+ HEAD_UNLOCK(runtime);
+ while (ts) {
+ _PyExecutorObject *current = (_PyExecutorObject *)ts->current_executor;
+ if (current != NULL) {
+ /* Anything in this list will be unlinked, so we can reuse the
+ * linked field as a reachability marker. */
+ current->vm_data.linked = 1;
+ }
+ HEAD_LOCK(runtime);
+ ts = PyThreadState_Next(ts);
+ HEAD_UNLOCK(runtime);
+ }
+ _PyExecutorObject **prev_to_next_ptr = &interp->executor_deletion_list_head;
+ _PyExecutorObject *exec = *prev_to_next_ptr;
+ while (exec != NULL) {
+ if (exec->vm_data.linked) {
+ // This executor is currently executing
+ exec->vm_data.linked = 0;
+ prev_to_next_ptr = &exec->vm_data.links.next;
+ }
+ else {
+ *prev_to_next_ptr = exec->vm_data.links.next;
+ free_executor(exec);
+ }
+ exec = *prev_to_next_ptr;
+ }
+ interp->executor_deletion_list_remaining_capacity = EXECUTOR_DELETE_LIST_MAX;
+}
+
+static void
+add_to_pending_deletion_list(_PyExecutorObject *self)
+{
+ PyInterpreterState *interp = PyInterpreterState_Get();
+ self->vm_data.links.next = interp->executor_deletion_list_head;
+ interp->executor_deletion_list_head = self;
+ if (interp->executor_deletion_list_remaining_capacity > 0) {
+ interp->executor_deletion_list_remaining_capacity--;
+ }
+ else {
+ _Py_ClearExecutorDeletionList(interp);
+ }
+}
+
static void
uop_dealloc(PyObject *op) {
_PyExecutorObject *self = _PyExecutorObject_CAST(op);
_PyObject_GC_UNTRACK(self);
assert(self->vm_data.code == NULL);
unlink_executor(self);
-#ifdef _Py_JIT
- _PyJIT_Free(self);
-#endif
- PyObject_GC_Del(self);
+ // Once unlinked it becomes impossible to invalidate an executor, so do it here.
+ self->vm_data.valid = 0;
+ add_to_pending_deletion_list(self);
}
const char *
interp->sys_trace_initialized = false;
interp->jit = false;
interp->executor_list_head = NULL;
+ interp->executor_deletion_list_head = NULL;
+ interp->executor_deletion_list_remaining_capacity = 0;
interp->trace_run_counter = JIT_CLEANUP_THRESHOLD;
if (interp != &runtime->_main_interpreter) {
/* Fix the self-referential, statically initialized fields. */
Py_CLEAR(interp->after_forkers_child);
#endif
+
+#ifdef _Py_TIER2
+ _Py_ClearExecutorDeletionList(interp);
+#endif
_PyAST_Fini(interp);
_PyWarnings_Fini(interp);
_PyAtExit_Fini(interp);
tstate->datastack_top = NULL;
tstate->datastack_limit = NULL;
tstate->what_event = -1;
- tstate->previous_executor = NULL;
+ tstate->current_executor = NULL;
tstate->dict_global_version = 0;
_tstate->c_stack_soft_limit = UINTPTR_MAX;
#define GOTO_TIER_TWO(EXECUTOR) \
do { \
OPT_STAT_INC(traces_executed); \
- jit_func_preserve_none jitted = (EXECUTOR)->jit_side_entry; \
+ _PyExecutorObject *_executor = (EXECUTOR); \
+ tstate->current_executor = (PyObject *)_executor; \
+ jit_func_preserve_none jitted = _executor->jit_side_entry; \
__attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \
} while (0)
#undef GOTO_TIER_ONE
#define GOTO_TIER_ONE(TARGET) \
do { \
+ tstate->current_executor = NULL; \
_PyFrame_SetStackPointer(frame, stack_pointer); \
return TARGET; \
} while (0)