return tstate->interp->eval_frame(tstate, frame, throwflag);
}
+#ifdef _Py_TIER2
+#ifdef _Py_JIT
+_Py_CODEUNIT *_Py_LazyJitTrampoline(
+ struct _PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
+ _PyStackRef *stack_pointer, PyThreadState *tstate
+);
+#else
+_Py_CODEUNIT *_PyTier2Interpreter(
+ struct _PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
+ _PyStackRef *stack_pointer, PyThreadState *tstate
+);
+#endif
+#endif
+
+extern _PyJitEntryFuncPtr _Py_jit_entry;
+
extern PyObject*
_PyEval_Vector(PyThreadState *tstate,
PyFunctionObject *func, PyObject *locals,
#endif
+typedef _Py_CODEUNIT *(*_PyJitEntryFuncPtr)(struct _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate);
/* PyInterpreterState holds the global state for one of the runtime's
interpreters. Typically the initial (main) interpreter is the only one.
uint32_t code_size;
size_t jit_size;
void *jit_code;
- void *jit_side_entry;
_PyExitData exits[1];
} _PyExecutorObject;
--- /dev/null
+Replace the shim code added to every piece of jitted code with a single
+trampoline function.
assert(tstate->current_executor == NULL);
assert(executor != tstate->interp->cold_executor);
tstate->jit_exit = NULL;
- GOTO_TIER_TWO(executor);
+ TIER1_TO_TIER2(executor);
}
}
else {
}
assert(executor != tstate->interp->cold_executor);
tstate->jit_exit = NULL;
- GOTO_TIER_TWO(executor);
+ TIER1_TO_TIER2(executor);
#else
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
#endif /* _Py_TIER2 */
}
#endif
tstate->jit_exit = exit;
- GOTO_TIER_TWO(exit->executor);
+ TIER2_TO_TIER2(exit->executor);
}
tier2 op(_CHECK_VALIDITY, (--)) {
tier2 op(_START_EXECUTOR, (executor/4 --)) {
#ifndef _Py_JIT
- current_executor = (_PyExecutorObject*)executor;
+ assert(current_executor == (_PyExecutorObject*)executor);
#endif
assert(tstate->jit_exit == NULL || tstate->jit_exit->executor == current_executor);
tstate->current_executor = (PyObject *)executor;
}
assert(tstate->jit_exit == exit);
exit->executor = executor;
- GOTO_TIER_TWO(exit->executor);
+ TIER2_TO_TIER2(exit->executor);
}
label(pop_2_error) {
}
int r = PyDict_Contains(globals, &_Py_ID(__lltrace__));
if (r < 0) {
- return -1;
+ PyErr_Clear();
+ return 0;
}
int lltrace = r * 5; // Levels 1-4 only trace uops
if (!lltrace) {
#endif
}
-#if defined(_Py_TIER2) && !defined(_Py_JIT)
- /* Tier 2 interpreter state */
- _PyExecutorObject *current_executor = NULL;
- const _PyUOpInstruction *next_uop = NULL;
-#endif
#if Py_TAIL_CALL_INTERP
# if Py_STATS
return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, 0, lastopcode);
#endif
+early_exit:
+ assert(_PyErr_Occurred(tstate));
+ _Py_LeaveRecursiveCallPy(tstate);
+ assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
+ // GH-99729: We need to unlink the frame *before* clearing it:
+ _PyInterpreterFrame *dying = frame;
+ frame = tstate->current_frame = dying->previous;
+ _PyEval_FrameClearAndPop(tstate, dying);
+ frame->return_offset = 0;
+ assert(frame->owner == FRAME_OWNED_BY_INTERPRETER);
+ /* Restore previous frame and exit */
+ tstate->current_frame = frame->previous;
+ return NULL;
+}
#ifdef _Py_TIER2
-
-// Tier 2 is also here!
-enter_tier_two:
-
#ifdef _Py_JIT
- assert(0);
+_PyJitEntryFuncPtr _Py_jit_entry = _Py_LazyJitTrampoline;
#else
+_PyJitEntryFuncPtr _Py_jit_entry = _PyTier2Interpreter;
+#endif
+#endif
+
+#if defined(_Py_TIER2) && !defined(_Py_JIT)
+
+_Py_CODEUNIT *
+_PyTier2Interpreter(
+ _PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
+ _PyStackRef *stack_pointer, PyThreadState *tstate
+) {
+ const _PyUOpInstruction *next_uop;
+ int oparg;
+tier2_start:
+
+ next_uop = current_executor->trace;
+ assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT);
#undef LOAD_IP
#define LOAD_IP(UNUSED) (void)0
#undef ENABLE_SPECIALIZATION_FT
#define ENABLE_SPECIALIZATION_FT 0
- ; // dummy statement after a label, before a declaration
uint16_t uopcode;
#ifdef Py_STATS
int lastuop = 0;
next_uop = current_executor->trace + target;
goto tier2_dispatch;
-#endif // _Py_JIT
-
+}
#endif // _Py_TIER2
-early_exit:
- assert(_PyErr_Occurred(tstate));
- _Py_LeaveRecursiveCallPy(tstate);
- assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
- // GH-99729: We need to unlink the frame *before* clearing it:
- _PyInterpreterFrame *dying = frame;
- frame = tstate->current_frame = dying->previous;
- _PyEval_FrameClearAndPop(tstate, dying);
- frame->return_offset = 0;
- assert(frame->owner == FRAME_OWNED_BY_INTERPRETER);
- /* Restore previous frame and exit */
- tstate->current_frame = frame->previous;
- return NULL;
-}
#ifdef DO_NOT_OPTIMIZE_INTERP_LOOP
# pragma optimize("", on)
_PyFrame_SetStackPointer(frame, stack_pointer); \
int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
stack_pointer = _PyFrame_GetStackPointer(frame); \
- if (lltrace < 0) { \
- JUMP_TO_LABEL(exit_unwind); \
- } \
frame->lltrace = lltrace; \
} while (0)
#else
/* Tier-switching macros. */
-#ifdef _Py_JIT
-#define GOTO_TIER_TWO(EXECUTOR) \
+#define TIER1_TO_TIER2(EXECUTOR) \
do { \
OPT_STAT_INC(traces_executed); \
- _PyExecutorObject *_executor = (EXECUTOR); \
- jit_func jitted = _executor->jit_code; \
- /* Keep the shim frame alive via the executor: */ \
- Py_INCREF(_executor); \
- next_instr = jitted(frame, stack_pointer, tstate); \
- Py_DECREF(_executor); \
+ next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \
frame = tstate->current_frame; \
stack_pointer = _PyFrame_GetStackPointer(frame); \
if (next_instr == NULL) { \
} \
DISPATCH(); \
} while (0)
-#else
-#define GOTO_TIER_TWO(EXECUTOR) \
-do { \
- OPT_STAT_INC(traces_executed); \
- _PyExecutorObject *_executor = (EXECUTOR); \
- next_uop = _executor->trace; \
- assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); \
- goto enter_tier_two; \
+
+#define TIER2_TO_TIER2(EXECUTOR) \
+do { \
+ OPT_STAT_INC(traces_executed); \
+ current_executor = (EXECUTOR); \
+ goto tier2_start; \
} while (0)
-#endif
#define GOTO_TIER_ONE(TARGET) \
do \
{ \
tstate->current_executor = NULL; \
- next_instr = (TARGET); \
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
_PyFrame_SetStackPointer(frame, stack_pointer); \
- stack_pointer = _PyFrame_GetStackPointer(frame); \
- if (next_instr == NULL) \
- { \
- next_instr = frame->instr_ptr; \
- goto error; \
- } \
- DISPATCH(); \
+ return TARGET; \
} while (0)
#define CURRENT_OPARG() (next_uop[-1].oparg)
}
#endif
tstate->jit_exit = exit;
- GOTO_TIER_TWO(exit->executor);
+ TIER2_TO_TIER2(exit->executor);
break;
}
case _START_EXECUTOR: {
PyObject *executor = (PyObject *)CURRENT_OPERAND0();
#ifndef _Py_JIT
- current_executor = (_PyExecutorObject*)executor;
+ assert(current_executor == (_PyExecutorObject*)executor);
#endif
assert(tstate->jit_exit == NULL || tstate->jit_exit->executor == current_executor);
tstate->current_executor = (PyObject *)executor;
}
assert(tstate->jit_exit == exit);
exit->executor = executor;
- GOTO_TIER_TWO(exit->executor);
+ TIER2_TO_TIER2(exit->executor);
break;
}
}
assert(executor != tstate->interp->cold_executor);
tstate->jit_exit = NULL;
- GOTO_TIER_TWO(executor);
+ TIER1_TO_TIER2(executor);
#else
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
#endif /* _Py_TIER2 */
assert(tstate->current_executor == NULL);
assert(executor != tstate->interp->cold_executor);
tstate->jit_exit = NULL;
- GOTO_TIER_TWO(executor);
+ TIER1_TO_TIER2(executor);
}
}
else {
size_t code_size = 0;
size_t data_size = 0;
jit_state state = {0};
- group = &shim;
- code_size += group->code_size;
- data_size += group->data_size;
- combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
for (size_t i = 0; i < length; i++) {
const _PyUOpInstruction *instruction = &trace[i];
group = &stencil_groups[instruction->opcode];
unsigned char *code = memory;
state.trampolines.mem = memory + code_size;
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
- // Compile the shim, which handles converting between the native
- // calling convention and the calling convention used by jitted code
- // (which may be different for efficiency reasons).
- group = &shim;
- group->emit(code, data, executor, NULL, &state);
- code += group->code_size;
- data += group->data_size;
assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT);
for (size_t i = 0; i < length; i++) {
const _PyUOpInstruction *instruction = &trace[i];
return -1;
}
executor->jit_code = memory;
- executor->jit_side_entry = memory + shim.code_size;
executor->jit_size = total_size;
return 0;
}
+/* One-off compilation of the jit entry trampoline
+ * We compile this once only as it effectively a normal
+ * function, but we need to use the JIT because it needs
+ * to understand the jit-specific calling convention.
+ */
+static _PyJitEntryFuncPtr
+compile_trampoline(void)
+{
+ _PyExecutorObject dummy;
+ const StencilGroup *group;
+ size_t code_size = 0;
+ size_t data_size = 0;
+ jit_state state = {0};
+ group = &trampoline;
+ code_size += group->code_size;
+ data_size += group->data_size;
+ combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
+ // Round up to the nearest page:
+ size_t page_size = get_page_size();
+ assert((page_size & (page_size - 1)) == 0);
+ size_t code_padding = DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1));
+ size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size) & (page_size - 1));
+ size_t total_size = code_size + state.trampolines.size + code_padding + data_size + padding;
+ unsigned char *memory = jit_alloc(total_size);
+ if (memory == NULL) {
+ return NULL;
+ }
+ unsigned char *code = memory;
+ state.trampolines.mem = memory + code_size;
+ unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
+ // Compile the shim, which handles converting between the native
+ // calling convention and the calling convention used by jitted code
+ // (which may be different for efficiency reasons).
+ group = &trampoline;
+ group->emit(code, data, &dummy, NULL, &state);
+ code += group->code_size;
+ data += group->data_size;
+ assert(code == memory + code_size);
+ assert(data == memory + code_size + state.trampolines.size + code_padding + data_size);
+ if (mark_executable(memory, total_size)) {
+ jit_free(memory, total_size);
+ return NULL;
+ }
+ return (_PyJitEntryFuncPtr)memory;
+}
+
+static PyMutex lazy_jit_mutex = { 0 };
+
+_Py_CODEUNIT *
+_Py_LazyJitTrampoline(
+ _PyExecutorObject *executor, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
+) {
+ PyMutex_Lock(&lazy_jit_mutex);
+ if (_Py_jit_entry == _Py_LazyJitTrampoline) {
+ _PyJitEntryFuncPtr trampoline = compile_trampoline();
+ if (trampoline == NULL) {
+ PyMutex_Unlock(&lazy_jit_mutex);
+ Py_FatalError("Cannot allocate core JIT code");
+ }
+ _Py_jit_entry = trampoline;
+ }
+ PyMutex_Unlock(&lazy_jit_mutex);
+ return _Py_jit_entry(executor, frame, stack_pointer, tstate);
+}
+
void
_PyJIT_Free(_PyExecutorObject *executor)
{
size_t size = executor->jit_size;
if (memory) {
executor->jit_code = NULL;
- executor->jit_side_entry = NULL;
executor->jit_size = 0;
if (jit_free(memory, size)) {
PyErr_FormatUnraisable("Exception ignored while "
#endif
#ifdef _Py_JIT
executor->jit_code = NULL;
- executor->jit_side_entry = NULL;
executor->jit_size = 0;
// This is initialized to true so we can prevent the executor
// from being immediately detected as cold and invalidated.
((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT;
#ifdef _Py_JIT
cold->jit_code = NULL;
- cold->jit_side_entry = NULL;
cold->jit_size = 0;
// This is initialized to true so we can prevent the executor
// from being immediately detected as cold and invalidated.
static inline int check_interpreter_whence(long);
#endif
+extern _Py_CODEUNIT *
+_Py_LazyJitTrampoline(
+ struct _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
+);
+
/* Get the interpreter state to a minimal consistent state.
Further init happens in pylifecycle.c before it can be used.
All fields not initialized here are expected to be zeroed out,
with tempfile.TemporaryDirectory() as tempdir:
work = pathlib.Path(tempdir).resolve()
async with asyncio.TaskGroup() as group:
- coro = self._compile("shim", TOOLS_JIT / "shim.c", work)
- tasks.append(group.create_task(coro, name="shim"))
+ coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work)
+ tasks.append(group.create_task(coro, name="trampoline"))
template = TOOLS_JIT_TEMPLATE_C.read_text()
for case, opname in cases_and_opnames:
# Write out a copy of the template with *only* this case
yield " symbol_mask trampoline_mask;"
yield "} StencilGroup;"
yield ""
- yield f"static const StencilGroup shim = {groups['shim'].as_c('shim')};"
+ yield f"static const StencilGroup trampoline = {groups['trampoline'].as_c('trampoline')};"
yield ""
yield "static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = {"
for opname, group in sorted(groups.items()):
- if opname == "shim":
+ if opname == "trampoline":
continue
yield f" [{opname}] = {group.as_c(opname)},"
yield "};"
+++ /dev/null
-#include "Python.h"
-
-#include "pycore_ceval.h"
-#include "pycore_frame.h"
-#include "pycore_jit.h"
-
-#include "jit.h"
-
-_Py_CODEUNIT *
-_JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate)
-{
- // Note that this is *not* a tail call:
- DECLARE_TARGET(_JIT_CONTINUE);
- return _JIT_CONTINUE(frame, stack_pointer, tstate);
-}
#undef CURRENT_TARGET
#define CURRENT_TARGET() (_target)
-#undef GOTO_TIER_TWO
-#define GOTO_TIER_TWO(EXECUTOR) \
+#undef TIER2_TO_TIER2
+#define TIER2_TO_TIER2(EXECUTOR) \
do { \
OPT_STAT_INC(traces_executed); \
_PyExecutorObject *_executor = (EXECUTOR); \
- jit_func_preserve_none jitted = _executor->jit_side_entry; \
+ jit_func_preserve_none jitted = _executor->jit_code; \
__attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \
} while (0)
--- /dev/null
+#include "Python.h"
+
+#include "pycore_ceval.h"
+#include "pycore_frame.h"
+#include "pycore_jit.h"
+
+#include "jit.h"
+
+_Py_CODEUNIT *
+_JIT_ENTRY(
+ _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
+) {
+ typedef DECLARE_TARGET((*jit_func));
+ jit_func jitted = (jit_func)exec->jit_code;
+ return jitted(frame, stack_pointer, tstate);
+}