fail-fast: false
matrix:
target:
- - i686-pc-windows-msvc/msvc
- - x86_64-pc-windows-msvc/msvc
- - aarch64-pc-windows-msvc/msvc
+# To re-enable later when we support these.
+# - i686-pc-windows-msvc/msvc
+# - x86_64-pc-windows-msvc/msvc
+# - aarch64-pc-windows-msvc/msvc
- x86_64-apple-darwin/clang
- aarch64-apple-darwin/clang
- x86_64-unknown-linux-gnu/gcc
llvm:
- 21
include:
- - target: i686-pc-windows-msvc/msvc
- architecture: Win32
- runner: windows-2022
- - target: x86_64-pc-windows-msvc/msvc
- architecture: x64
- runner: windows-2022
- - target: aarch64-pc-windows-msvc/msvc
- architecture: ARM64
- runner: windows-11-arm
+# To re-enable later when we support these.
+# - target: i686-pc-windows-msvc/msvc
+# architecture: Win32
+# runner: windows-2022
+# - target: x86_64-pc-windows-msvc/msvc
+# architecture: x64
+# runner: windows-2022
+# - target: aarch64-pc-windows-msvc/msvc
+# architecture: ARM64
+# runner: windows-11-arm
- target: x86_64-apple-darwin/clang
architecture: x86_64
runner: macos-15-intel
uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimizer_attempts;
uint64_t optimizer_successes;
+ uint64_t optimizer_contradiction;
+ uint64_t optimizer_frame_overflow;
uint64_t optimizer_failure_reason_no_memory;
uint64_t remove_globals_builtins_changed;
uint64_t remove_globals_incorrect_keys;
return counter.value_and_backoff < UNREACHABLE_BACKOFF;
}
+static inline _Py_BackoffCounter
+trigger_backoff_counter(void)
+{
+ _Py_BackoffCounter result;
+ result.value_and_backoff = 0;
+ return result;
+}
+
// Initial JUMP_BACKWARD counter.
// Must be larger than ADAPTIVE_COOLDOWN_VALUE, otherwise when JIT code is
// invalidated we may construct a new trace before the bytecode has properly
// re-specialized:
-#define JUMP_BACKWARD_INITIAL_VALUE 4095
+// Note: this should be a prime number-1. This increases the likelihood of
+// finding a "good" loop iteration to trace.
+// For example, 4095 does not work for the nqueens benchmark on pyperformance
+// as we always end up tracing the loop iteration's
+// exhaustion iteration. Which aborts our current tracer.
+#define JUMP_BACKWARD_INITIAL_VALUE 4000
#define JUMP_BACKWARD_INITIAL_BACKOFF 12
static inline _Py_BackoffCounter
initial_jump_backoff_counter(void)
* Must be larger than ADAPTIVE_COOLDOWN_VALUE,
* otherwise when a side exit warms up we may construct
* a new trace before the Tier 1 code has properly re-specialized. */
-#define SIDE_EXIT_INITIAL_VALUE 4095
+#define SIDE_EXIT_INITIAL_VALUE 4000
#define SIDE_EXIT_INITIAL_BACKOFF 12
static inline _Py_BackoffCounter
#define SPECIAL___AEXIT__ 3
#define SPECIAL_MAX 3
+PyAPI_DATA(const _Py_CODEUNIT *) _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR;
+
#ifdef __cplusplus
}
#endif
#include "pycore_structs.h" // PyHamtObject
#include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_typedefs.h" // _PyRuntimeState
-#include "pycore_uop.h" // struct _PyUOpInstruction
-
#define CODE_MAX_WATCHERS 8
#define CONTEXT_MAX_WATCHERS 8
PyObject *common_consts[NUM_COMMON_CONSTANTS];
bool jit;
bool compiling;
- struct _PyUOpInstruction *jit_uop_buffer;
struct _PyExecutorObject *executor_list_head;
struct _PyExecutorObject *executor_deletion_list_head;
struct _PyExecutorObject *cold_executor;
+ struct _PyExecutorObject *cold_dynamic_executor;
int executor_deletion_list_remaining_capacity;
size_t executor_creation_counter;
_rare_events rare_events;
#define HAS_ERROR_NO_POP_FLAG (4096)
#define HAS_NO_SAVE_IP_FLAG (8192)
#define HAS_PERIODIC_FLAG (16384)
+#define HAS_UNPREDICTABLE_JUMP_FLAG (32768)
+#define HAS_NEEDS_GUARD_IP_FLAG (65536)
#define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG))
#define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG))
#define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG))
#define OPCODE_HAS_ERROR_NO_POP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_NO_POP_FLAG))
#define OPCODE_HAS_NO_SAVE_IP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NO_SAVE_IP_FLAG))
#define OPCODE_HAS_PERIODIC(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PERIODIC_FLAG))
+#define OPCODE_HAS_UNPREDICTABLE_JUMP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_UNPREDICTABLE_JUMP_FLAG))
+#define OPCODE_HAS_NEEDS_GUARD_IP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NEEDS_GUARD_IP_FLAG))
#define OPARG_SIMPLE 0
#define OPARG_CACHE_1 1
struct opcode_metadata {
uint8_t valid_entry;
uint8_t instr_format;
- uint16_t flags;
+ uint32_t flags;
};
extern const struct opcode_metadata _PyOpcode_opcode_metadata[267];
[BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
[BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG },
[BINARY_OP_SUBSCR_DICT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG },
+ [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[BINARY_OP_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_SUBSCR_LIST_SLICE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[BUILD_TEMPLATE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BUILD_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG },
[CACHE] = { true, INSTR_FMT_IX, 0 },
- [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
- [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
+ [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[CALL_BUILTIN_CLASS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_BUILTIN_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_BUILTIN_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_BUILTIN_O] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
+ [CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[CALL_INTRINSIC_1] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_INTRINSIC_2] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_ISINSTANCE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
+ [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
+ [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[CALL_LEN] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
[CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_METHOD_DESCRIPTOR_O] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_NON_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
- [CALL_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
+ [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [CALL_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[CALL_STR_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_TUPLE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[CALL_TYPE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[DELETE_SUBSCR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[DICT_MERGE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[DICT_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
+ [END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[END_FOR] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG },
[END_SEND] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_PURE_FLAG },
[ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG },
[EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG },
[FORMAT_SIMPLE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[FORMAT_WITH_SPEC] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
- [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
- [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG },
- [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG },
+ [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG },
+ [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG },
+ [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG },
+ [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG },
[GET_AITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[GET_ANEXT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
[GET_AWAITABLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
[IMPORT_FROM] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[IMPORT_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [INSTRUMENTED_CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [INSTRUMENTED_END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
+ [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [INSTRUMENTED_CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [INSTRUMENTED_END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[INSTRUMENTED_END_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG },
[INSTRUMENTED_END_SEND] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
+ [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[INSTRUMENTED_INSTRUCTION] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[INSTRUMENTED_JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[INSTRUMENTED_JUMP_FORWARD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG },
[INSTRUMENTED_POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG },
[INSTRUMENTED_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG },
[INSTRUMENTED_RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
- [INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
+ [INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[INTERPRETER_EXIT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG },
[IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG },
[JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
- [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG },
+ [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG },
[LOAD_ATTR_MODULE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
- [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+ [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[RESERVED] = { true, INSTR_FMT_IX, 0 },
[RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
[RESUME_CHECK] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
- [RETURN_GENERATOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG },
- [SEND] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [SEND_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
+ [RETURN_GENERATOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [SEND] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
+ [SEND_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[SETUP_ANNOTATIONS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[SET_ADD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[SET_FUNCTION_ATTRIBUTE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG },
[UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
- [YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG },
+ [YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NEEDS_GUARD_IP_FLAG },
[ANNOTATIONS_PLACEHOLDER] = { true, -1, HAS_PURE_FLAG },
[JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[JUMP_IF_FALSE] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[IMPORT_FROM] = { .nuops = 1, .uops = { { _IMPORT_FROM, OPARG_SIMPLE, 0 } } },
[IMPORT_NAME] = { .nuops = 1, .uops = { { _IMPORT_NAME, OPARG_SIMPLE, 0 } } },
[IS_OP] = { .nuops = 1, .uops = { { _IS_OP, OPARG_SIMPLE, 0 } } },
+ [JUMP_BACKWARD] = { .nuops = 2, .uops = { { _CHECK_PERIODIC, OPARG_SIMPLE, 1 }, { _JUMP_BACKWARD_NO_INTERRUPT, OPARG_REPLACED, 1 } } },
+ [JUMP_BACKWARD_NO_INTERRUPT] = { .nuops = 1, .uops = { { _JUMP_BACKWARD_NO_INTERRUPT, OPARG_REPLACED, 0 } } },
+ [JUMP_BACKWARD_NO_JIT] = { .nuops = 2, .uops = { { _CHECK_PERIODIC, OPARG_SIMPLE, 1 }, { _JUMP_BACKWARD_NO_INTERRUPT, OPARG_REPLACED, 1 } } },
[LIST_APPEND] = { .nuops = 1, .uops = { { _LIST_APPEND, OPARG_SIMPLE, 0 } } },
[LIST_EXTEND] = { .nuops = 1, .uops = { { _LIST_EXTEND, OPARG_SIMPLE, 0 } } },
[LOAD_ATTR] = { .nuops = 1, .uops = { { _LOAD_ATTR, OPARG_SIMPLE, 8 } } },
} _PyExecutorLinkListNode;
-/* Bloom filter with m = 256
- * https://en.wikipedia.org/wiki/Bloom_filter */
-#define _Py_BLOOM_FILTER_WORDS 8
-
-typedef struct {
- uint32_t bits[_Py_BLOOM_FILTER_WORDS];
-} _PyBloomFilter;
-
typedef struct {
uint8_t opcode;
uint8_t oparg;
typedef struct _PyExitData {
uint32_t target;
- uint16_t index;
+ uint16_t index:14;
+ uint16_t is_dynamic:1;
+ uint16_t is_control_flow:1;
_Py_BackoffCounter temperature;
struct _PyExecutorObject *executor;
} _PyExitData;
// This value is arbitrary and was not optimized.
#define JIT_CLEANUP_THRESHOLD 1000
-#define TRACE_STACK_SIZE 5
-
-int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame,
+int _Py_uop_analyze_and_optimize(
+ PyFunctionObject *func,
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
_PyBloomFilter *dependencies);
#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
-#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2)
+#define MAX_ABSTRACT_FRAME_DEPTH (16)
// The maximum number of side exits that we can take before requiring forward
// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
int stack_len;
int locals_len;
PyFunctionObject *func;
+ PyCodeObject *code;
JitOptRef *stack_pointer;
JitOptRef *stack;
int curr_stackentries,
JitOptRef *args,
int arg_len);
-extern int _Py_uop_frame_pop(JitOptContext *ctx);
+extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries);
PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
-PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyExecutorObject **exec_ptr, int chain_depth);
+PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, PyThreadState *tstate);
static inline _PyExecutorObject *_PyExecutor_FromExit(_PyExitData *exit)
{
}
extern _PyExecutorObject *_PyExecutor_GetColdExecutor(void);
+extern _PyExecutorObject *_PyExecutor_GetColdDynamicExecutor(void);
PyAPI_FUNC(void) _PyExecutor_ClearExit(_PyExitData *exit);
int opcode = uop->opcode;
return (
opcode == _EXIT_TRACE ||
- opcode == _JUMP_TO_TOP
+ opcode == _DEOPT ||
+ opcode == _JUMP_TO_TOP ||
+ opcode == _DYNAMIC_EXIT
);
}
extern void _Py_ClearExecutorDeletionList(PyInterpreterState *interp);
#endif
+int _PyJit_translate_single_bytecode_to_trace(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *next_instr, bool stop_tracing);
+
+int
+_PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame,
+ _Py_CODEUNIT *curr_instr, _Py_CODEUNIT *start_instr,
+ _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth, _PyExitData *exit,
+ int oparg);
+
+void _PyJit_FinalizeTracing(PyThreadState *tstate);
+
+void _PyJit_Tracer_InvalidateDependency(PyThreadState *old_tstate, void *obj);
+
#ifdef __cplusplus
}
#endif
#include "pycore_freelist_state.h" // struct _Py_freelists
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
#include "pycore_qsbr.h" // struct qsbr
-
+#include "pycore_uop.h" // struct _PyUOpInstruction
+#include "pycore_structs.h"
#ifdef Py_GIL_DISABLED
struct _gc_thread_state {
};
#endif
+#if _Py_TIER2
+typedef struct _PyJitTracerInitialState {
+ int stack_depth;
+ int chain_depth;
+ struct _PyExitData *exit;
+ PyCodeObject *code; // Strong
+ PyFunctionObject *func; // Strong
+ _Py_CODEUNIT *start_instr;
+ _Py_CODEUNIT *close_loop_instr;
+ _Py_CODEUNIT *jump_backward_instr;
+} _PyJitTracerInitialState;
+
+typedef struct _PyJitTracerPreviousState {
+ bool dependencies_still_valid;
+ bool instr_is_super;
+ int code_max_size;
+ int code_curr_size;
+ int instr_oparg;
+ int instr_stacklevel;
+ _Py_CODEUNIT *instr;
+ PyCodeObject *instr_code; // Strong
+ struct _PyInterpreterFrame *instr_frame;
+ _PyBloomFilter dependencies;
+} _PyJitTracerPreviousState;
+
+typedef struct _PyJitTracerState {
+ _PyUOpInstruction *code_buffer;
+ _PyJitTracerInitialState initial_state;
+ _PyJitTracerPreviousState prev_state;
+} _PyJitTracerState;
+#endif
+
// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
// PyThreadState fields are exposed as part of the C API, although most fields
// are intended to be private. The _PyThreadStateImpl fields not exposed.
#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
Py_ssize_t reftotal; // this thread's total refcount operations
#endif
-
+#if _Py_TIER2
+ _PyJitTracerState jit_tracer_state;
+#endif
} _PyThreadStateImpl;
#ifdef __cplusplus
#endif
} _PyUOpInstruction;
-// This is the length of the trace we project initially.
-#define UOP_MAX_TRACE_LENGTH 1200
+// This is the length of the trace we translate initially.
+#define UOP_MAX_TRACE_LENGTH 3000
#define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction))
+/* Bloom filter with m = 256
+ * https://en.wikipedia.org/wiki/Bloom_filter */
+#define _Py_BLOOM_FILTER_WORDS 8
+
+typedef struct {
+ uint32_t bits[_Py_BLOOM_FILTER_WORDS];
+} _PyBloomFilter;
+
#ifdef __cplusplus
}
#endif
#define _CHECK_STACK_SPACE 357
#define _CHECK_STACK_SPACE_OPERAND 358
#define _CHECK_VALIDITY 359
-#define _COLD_EXIT 360
-#define _COMPARE_OP 361
-#define _COMPARE_OP_FLOAT 362
-#define _COMPARE_OP_INT 363
-#define _COMPARE_OP_STR 364
-#define _CONTAINS_OP 365
-#define _CONTAINS_OP_DICT 366
-#define _CONTAINS_OP_SET 367
+#define _COLD_DYNAMIC_EXIT 360
+#define _COLD_EXIT 361
+#define _COMPARE_OP 362
+#define _COMPARE_OP_FLOAT 363
+#define _COMPARE_OP_INT 364
+#define _COMPARE_OP_STR 365
+#define _CONTAINS_OP 366
+#define _CONTAINS_OP_DICT 367
+#define _CONTAINS_OP_SET 368
#define _CONVERT_VALUE CONVERT_VALUE
-#define _COPY 368
-#define _COPY_1 369
-#define _COPY_2 370
-#define _COPY_3 371
+#define _COPY 369
+#define _COPY_1 370
+#define _COPY_2 371
+#define _COPY_3 372
#define _COPY_FREE_VARS COPY_FREE_VARS
-#define _CREATE_INIT_FRAME 372
+#define _CREATE_INIT_FRAME 373
#define _DELETE_ATTR DELETE_ATTR
#define _DELETE_DEREF DELETE_DEREF
#define _DELETE_FAST DELETE_FAST
#define _DELETE_GLOBAL DELETE_GLOBAL
#define _DELETE_NAME DELETE_NAME
#define _DELETE_SUBSCR DELETE_SUBSCR
-#define _DEOPT 373
+#define _DEOPT 374
#define _DICT_MERGE DICT_MERGE
#define _DICT_UPDATE DICT_UPDATE
-#define _DO_CALL 374
-#define _DO_CALL_FUNCTION_EX 375
-#define _DO_CALL_KW 376
+#define _DO_CALL 375
+#define _DO_CALL_FUNCTION_EX 376
+#define _DO_CALL_KW 377
+#define _DYNAMIC_EXIT 378
#define _END_FOR END_FOR
#define _END_SEND END_SEND
-#define _ERROR_POP_N 377
+#define _ERROR_POP_N 379
#define _EXIT_INIT_CHECK EXIT_INIT_CHECK
-#define _EXPAND_METHOD 378
-#define _EXPAND_METHOD_KW 379
-#define _FATAL_ERROR 380
+#define _EXPAND_METHOD 380
+#define _EXPAND_METHOD_KW 381
+#define _FATAL_ERROR 382
#define _FORMAT_SIMPLE FORMAT_SIMPLE
#define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC
-#define _FOR_ITER 381
-#define _FOR_ITER_GEN_FRAME 382
-#define _FOR_ITER_TIER_TWO 383
+#define _FOR_ITER 383
+#define _FOR_ITER_GEN_FRAME 384
+#define _FOR_ITER_TIER_TWO 385
#define _GET_AITER GET_AITER
#define _GET_ANEXT GET_ANEXT
#define _GET_AWAITABLE GET_AWAITABLE
#define _GET_ITER GET_ITER
#define _GET_LEN GET_LEN
#define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER
-#define _GUARD_BINARY_OP_EXTEND 384
-#define _GUARD_CALLABLE_ISINSTANCE 385
-#define _GUARD_CALLABLE_LEN 386
-#define _GUARD_CALLABLE_LIST_APPEND 387
-#define _GUARD_CALLABLE_STR_1 388
-#define _GUARD_CALLABLE_TUPLE_1 389
-#define _GUARD_CALLABLE_TYPE_1 390
-#define _GUARD_DORV_NO_DICT 391
-#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 392
-#define _GUARD_GLOBALS_VERSION 393
-#define _GUARD_IS_FALSE_POP 394
-#define _GUARD_IS_NONE_POP 395
-#define _GUARD_IS_NOT_NONE_POP 396
-#define _GUARD_IS_TRUE_POP 397
-#define _GUARD_KEYS_VERSION 398
-#define _GUARD_NOS_DICT 399
-#define _GUARD_NOS_FLOAT 400
-#define _GUARD_NOS_INT 401
-#define _GUARD_NOS_LIST 402
-#define _GUARD_NOS_NOT_NULL 403
-#define _GUARD_NOS_NULL 404
-#define _GUARD_NOS_OVERFLOWED 405
-#define _GUARD_NOS_TUPLE 406
-#define _GUARD_NOS_UNICODE 407
-#define _GUARD_NOT_EXHAUSTED_LIST 408
-#define _GUARD_NOT_EXHAUSTED_RANGE 409
-#define _GUARD_NOT_EXHAUSTED_TUPLE 410
-#define _GUARD_THIRD_NULL 411
-#define _GUARD_TOS_ANY_SET 412
-#define _GUARD_TOS_DICT 413
-#define _GUARD_TOS_FLOAT 414
-#define _GUARD_TOS_INT 415
-#define _GUARD_TOS_LIST 416
-#define _GUARD_TOS_OVERFLOWED 417
-#define _GUARD_TOS_SLICE 418
-#define _GUARD_TOS_TUPLE 419
-#define _GUARD_TOS_UNICODE 420
-#define _GUARD_TYPE_VERSION 421
-#define _GUARD_TYPE_VERSION_AND_LOCK 422
-#define _HANDLE_PENDING_AND_DEOPT 423
+#define _GUARD_BINARY_OP_EXTEND 386
+#define _GUARD_CALLABLE_ISINSTANCE 387
+#define _GUARD_CALLABLE_LEN 388
+#define _GUARD_CALLABLE_LIST_APPEND 389
+#define _GUARD_CALLABLE_STR_1 390
+#define _GUARD_CALLABLE_TUPLE_1 391
+#define _GUARD_CALLABLE_TYPE_1 392
+#define _GUARD_DORV_NO_DICT 393
+#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 394
+#define _GUARD_GLOBALS_VERSION 395
+#define _GUARD_IP_RETURN_GENERATOR 396
+#define _GUARD_IP_RETURN_VALUE 397
+#define _GUARD_IP_YIELD_VALUE 398
+#define _GUARD_IP__PUSH_FRAME 399
+#define _GUARD_IS_FALSE_POP 400
+#define _GUARD_IS_NONE_POP 401
+#define _GUARD_IS_NOT_NONE_POP 402
+#define _GUARD_IS_TRUE_POP 403
+#define _GUARD_KEYS_VERSION 404
+#define _GUARD_NOS_DICT 405
+#define _GUARD_NOS_FLOAT 406
+#define _GUARD_NOS_INT 407
+#define _GUARD_NOS_LIST 408
+#define _GUARD_NOS_NOT_NULL 409
+#define _GUARD_NOS_NULL 410
+#define _GUARD_NOS_OVERFLOWED 411
+#define _GUARD_NOS_TUPLE 412
+#define _GUARD_NOS_UNICODE 413
+#define _GUARD_NOT_EXHAUSTED_LIST 414
+#define _GUARD_NOT_EXHAUSTED_RANGE 415
+#define _GUARD_NOT_EXHAUSTED_TUPLE 416
+#define _GUARD_THIRD_NULL 417
+#define _GUARD_TOS_ANY_SET 418
+#define _GUARD_TOS_DICT 419
+#define _GUARD_TOS_FLOAT 420
+#define _GUARD_TOS_INT 421
+#define _GUARD_TOS_LIST 422
+#define _GUARD_TOS_OVERFLOWED 423
+#define _GUARD_TOS_SLICE 424
+#define _GUARD_TOS_TUPLE 425
+#define _GUARD_TOS_UNICODE 426
+#define _GUARD_TYPE_VERSION 427
+#define _GUARD_TYPE_VERSION_AND_LOCK 428
+#define _HANDLE_PENDING_AND_DEOPT 429
#define _IMPORT_FROM IMPORT_FROM
#define _IMPORT_NAME IMPORT_NAME
-#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 424
-#define _INIT_CALL_PY_EXACT_ARGS 425
-#define _INIT_CALL_PY_EXACT_ARGS_0 426
-#define _INIT_CALL_PY_EXACT_ARGS_1 427
-#define _INIT_CALL_PY_EXACT_ARGS_2 428
-#define _INIT_CALL_PY_EXACT_ARGS_3 429
-#define _INIT_CALL_PY_EXACT_ARGS_4 430
-#define _INSERT_NULL 431
+#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 430
+#define _INIT_CALL_PY_EXACT_ARGS 431
+#define _INIT_CALL_PY_EXACT_ARGS_0 432
+#define _INIT_CALL_PY_EXACT_ARGS_1 433
+#define _INIT_CALL_PY_EXACT_ARGS_2 434
+#define _INIT_CALL_PY_EXACT_ARGS_3 435
+#define _INIT_CALL_PY_EXACT_ARGS_4 436
+#define _INSERT_NULL 437
#define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER
#define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION
#define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD
#define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE
#define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE
#define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE
-#define _IS_NONE 432
+#define _IS_NONE 438
#define _IS_OP IS_OP
-#define _ITER_CHECK_LIST 433
-#define _ITER_CHECK_RANGE 434
-#define _ITER_CHECK_TUPLE 435
-#define _ITER_JUMP_LIST 436
-#define _ITER_JUMP_RANGE 437
-#define _ITER_JUMP_TUPLE 438
-#define _ITER_NEXT_LIST 439
-#define _ITER_NEXT_LIST_TIER_TWO 440
-#define _ITER_NEXT_RANGE 441
-#define _ITER_NEXT_TUPLE 442
-#define _JUMP_TO_TOP 443
+#define _ITER_CHECK_LIST 439
+#define _ITER_CHECK_RANGE 440
+#define _ITER_CHECK_TUPLE 441
+#define _ITER_JUMP_LIST 442
+#define _ITER_JUMP_RANGE 443
+#define _ITER_JUMP_TUPLE 444
+#define _ITER_NEXT_LIST 445
+#define _ITER_NEXT_LIST_TIER_TWO 446
+#define _ITER_NEXT_RANGE 447
+#define _ITER_NEXT_TUPLE 448
+#define _JUMP_BACKWARD_NO_INTERRUPT JUMP_BACKWARD_NO_INTERRUPT
+#define _JUMP_TO_TOP 449
#define _LIST_APPEND LIST_APPEND
#define _LIST_EXTEND LIST_EXTEND
-#define _LOAD_ATTR 444
-#define _LOAD_ATTR_CLASS 445
+#define _LOAD_ATTR 450
+#define _LOAD_ATTR_CLASS 451
#define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
-#define _LOAD_ATTR_INSTANCE_VALUE 446
-#define _LOAD_ATTR_METHOD_LAZY_DICT 447
-#define _LOAD_ATTR_METHOD_NO_DICT 448
-#define _LOAD_ATTR_METHOD_WITH_VALUES 449
-#define _LOAD_ATTR_MODULE 450
-#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 451
-#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 452
-#define _LOAD_ATTR_PROPERTY_FRAME 453
-#define _LOAD_ATTR_SLOT 454
-#define _LOAD_ATTR_WITH_HINT 455
+#define _LOAD_ATTR_INSTANCE_VALUE 452
+#define _LOAD_ATTR_METHOD_LAZY_DICT 453
+#define _LOAD_ATTR_METHOD_NO_DICT 454
+#define _LOAD_ATTR_METHOD_WITH_VALUES 455
+#define _LOAD_ATTR_MODULE 456
+#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 457
+#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 458
+#define _LOAD_ATTR_PROPERTY_FRAME 459
+#define _LOAD_ATTR_SLOT 460
+#define _LOAD_ATTR_WITH_HINT 461
#define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
-#define _LOAD_BYTECODE 456
+#define _LOAD_BYTECODE 462
#define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT
#define _LOAD_CONST LOAD_CONST
-#define _LOAD_CONST_INLINE 457
-#define _LOAD_CONST_INLINE_BORROW 458
-#define _LOAD_CONST_UNDER_INLINE 459
-#define _LOAD_CONST_UNDER_INLINE_BORROW 460
+#define _LOAD_CONST_INLINE 463
+#define _LOAD_CONST_INLINE_BORROW 464
+#define _LOAD_CONST_UNDER_INLINE 465
+#define _LOAD_CONST_UNDER_INLINE_BORROW 466
#define _LOAD_DEREF LOAD_DEREF
-#define _LOAD_FAST 461
-#define _LOAD_FAST_0 462
-#define _LOAD_FAST_1 463
-#define _LOAD_FAST_2 464
-#define _LOAD_FAST_3 465
-#define _LOAD_FAST_4 466
-#define _LOAD_FAST_5 467
-#define _LOAD_FAST_6 468
-#define _LOAD_FAST_7 469
+#define _LOAD_FAST 467
+#define _LOAD_FAST_0 468
+#define _LOAD_FAST_1 469
+#define _LOAD_FAST_2 470
+#define _LOAD_FAST_3 471
+#define _LOAD_FAST_4 472
+#define _LOAD_FAST_5 473
+#define _LOAD_FAST_6 474
+#define _LOAD_FAST_7 475
#define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
-#define _LOAD_FAST_BORROW 470
-#define _LOAD_FAST_BORROW_0 471
-#define _LOAD_FAST_BORROW_1 472
-#define _LOAD_FAST_BORROW_2 473
-#define _LOAD_FAST_BORROW_3 474
-#define _LOAD_FAST_BORROW_4 475
-#define _LOAD_FAST_BORROW_5 476
-#define _LOAD_FAST_BORROW_6 477
-#define _LOAD_FAST_BORROW_7 478
+#define _LOAD_FAST_BORROW 476
+#define _LOAD_FAST_BORROW_0 477
+#define _LOAD_FAST_BORROW_1 478
+#define _LOAD_FAST_BORROW_2 479
+#define _LOAD_FAST_BORROW_3 480
+#define _LOAD_FAST_BORROW_4 481
+#define _LOAD_FAST_BORROW_5 482
+#define _LOAD_FAST_BORROW_6 483
+#define _LOAD_FAST_BORROW_7 484
#define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW
#define _LOAD_FAST_CHECK LOAD_FAST_CHECK
#define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
#define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
#define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
-#define _LOAD_GLOBAL 479
-#define _LOAD_GLOBAL_BUILTINS 480
-#define _LOAD_GLOBAL_MODULE 481
+#define _LOAD_GLOBAL 485
+#define _LOAD_GLOBAL_BUILTINS 486
+#define _LOAD_GLOBAL_MODULE 487
#define _LOAD_LOCALS LOAD_LOCALS
#define _LOAD_NAME LOAD_NAME
-#define _LOAD_SMALL_INT 482
-#define _LOAD_SMALL_INT_0 483
-#define _LOAD_SMALL_INT_1 484
-#define _LOAD_SMALL_INT_2 485
-#define _LOAD_SMALL_INT_3 486
-#define _LOAD_SPECIAL 487
+#define _LOAD_SMALL_INT 488
+#define _LOAD_SMALL_INT_0 489
+#define _LOAD_SMALL_INT_1 490
+#define _LOAD_SMALL_INT_2 491
+#define _LOAD_SMALL_INT_3 492
+#define _LOAD_SPECIAL 493
#define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
#define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
-#define _MAKE_CALLARGS_A_TUPLE 488
+#define _MAKE_CALLARGS_A_TUPLE 494
#define _MAKE_CELL MAKE_CELL
#define _MAKE_FUNCTION MAKE_FUNCTION
-#define _MAKE_WARM 489
+#define _MAKE_WARM 495
#define _MAP_ADD MAP_ADD
#define _MATCH_CLASS MATCH_CLASS
#define _MATCH_KEYS MATCH_KEYS
#define _MATCH_MAPPING MATCH_MAPPING
#define _MATCH_SEQUENCE MATCH_SEQUENCE
-#define _MAYBE_EXPAND_METHOD 490
-#define _MAYBE_EXPAND_METHOD_KW 491
-#define _MONITOR_CALL 492
-#define _MONITOR_CALL_KW 493
-#define _MONITOR_JUMP_BACKWARD 494
-#define _MONITOR_RESUME 495
+#define _MAYBE_EXPAND_METHOD 496
+#define _MAYBE_EXPAND_METHOD_KW 497
+#define _MONITOR_CALL 498
+#define _MONITOR_CALL_KW 499
+#define _MONITOR_JUMP_BACKWARD 500
+#define _MONITOR_RESUME 501
#define _NOP NOP
-#define _POP_CALL 496
-#define _POP_CALL_LOAD_CONST_INLINE_BORROW 497
-#define _POP_CALL_ONE 498
-#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 499
-#define _POP_CALL_TWO 500
-#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 501
+#define _POP_CALL 502
+#define _POP_CALL_LOAD_CONST_INLINE_BORROW 503
+#define _POP_CALL_ONE 504
+#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 505
+#define _POP_CALL_TWO 506
+#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 507
#define _POP_EXCEPT POP_EXCEPT
#define _POP_ITER POP_ITER
-#define _POP_JUMP_IF_FALSE 502
-#define _POP_JUMP_IF_TRUE 503
+#define _POP_JUMP_IF_FALSE 508
+#define _POP_JUMP_IF_TRUE 509
#define _POP_TOP POP_TOP
-#define _POP_TOP_FLOAT 504
-#define _POP_TOP_INT 505
-#define _POP_TOP_LOAD_CONST_INLINE 506
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 507
-#define _POP_TOP_NOP 508
-#define _POP_TOP_UNICODE 509
-#define _POP_TWO 510
-#define _POP_TWO_LOAD_CONST_INLINE_BORROW 511
+#define _POP_TOP_FLOAT 510
+#define _POP_TOP_INT 511
+#define _POP_TOP_LOAD_CONST_INLINE 512
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 513
+#define _POP_TOP_NOP 514
+#define _POP_TOP_UNICODE 515
+#define _POP_TWO 516
+#define _POP_TWO_LOAD_CONST_INLINE_BORROW 517
#define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 512
+#define _PUSH_FRAME 518
#define _PUSH_NULL PUSH_NULL
-#define _PUSH_NULL_CONDITIONAL 513
-#define _PY_FRAME_GENERAL 514
-#define _PY_FRAME_KW 515
-#define _QUICKEN_RESUME 516
-#define _REPLACE_WITH_TRUE 517
+#define _PUSH_NULL_CONDITIONAL 519
+#define _PY_FRAME_GENERAL 520
+#define _PY_FRAME_KW 521
+#define _QUICKEN_RESUME 522
+#define _REPLACE_WITH_TRUE 523
#define _RESUME_CHECK RESUME_CHECK
#define _RETURN_GENERATOR RETURN_GENERATOR
#define _RETURN_VALUE RETURN_VALUE
-#define _SAVE_RETURN_OFFSET 518
-#define _SEND 519
-#define _SEND_GEN_FRAME 520
+#define _SAVE_RETURN_OFFSET 524
+#define _SEND 525
+#define _SEND_GEN_FRAME 526
#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
#define _SET_ADD SET_ADD
#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
#define _SET_UPDATE SET_UPDATE
-#define _START_EXECUTOR 521
-#define _STORE_ATTR 522
-#define _STORE_ATTR_INSTANCE_VALUE 523
-#define _STORE_ATTR_SLOT 524
-#define _STORE_ATTR_WITH_HINT 525
+#define _START_EXECUTOR 527
+#define _STORE_ATTR 528
+#define _STORE_ATTR_INSTANCE_VALUE 529
+#define _STORE_ATTR_SLOT 530
+#define _STORE_ATTR_WITH_HINT 531
#define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 526
-#define _STORE_FAST_0 527
-#define _STORE_FAST_1 528
-#define _STORE_FAST_2 529
-#define _STORE_FAST_3 530
-#define _STORE_FAST_4 531
-#define _STORE_FAST_5 532
-#define _STORE_FAST_6 533
-#define _STORE_FAST_7 534
+#define _STORE_FAST 532
+#define _STORE_FAST_0 533
+#define _STORE_FAST_1 534
+#define _STORE_FAST_2 535
+#define _STORE_FAST_3 536
+#define _STORE_FAST_4 537
+#define _STORE_FAST_5 538
+#define _STORE_FAST_6 539
+#define _STORE_FAST_7 540
#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
#define _STORE_GLOBAL STORE_GLOBAL
#define _STORE_NAME STORE_NAME
-#define _STORE_SLICE 535
-#define _STORE_SUBSCR 536
-#define _STORE_SUBSCR_DICT 537
-#define _STORE_SUBSCR_LIST_INT 538
-#define _SWAP 539
-#define _SWAP_2 540
-#define _SWAP_3 541
-#define _TIER2_RESUME_CHECK 542
-#define _TO_BOOL 543
+#define _STORE_SLICE 541
+#define _STORE_SUBSCR 542
+#define _STORE_SUBSCR_DICT 543
+#define _STORE_SUBSCR_LIST_INT 544
+#define _SWAP 545
+#define _SWAP_2 546
+#define _SWAP_3 547
+#define _TIER2_RESUME_CHECK 548
+#define _TO_BOOL 549
#define _TO_BOOL_BOOL TO_BOOL_BOOL
#define _TO_BOOL_INT TO_BOOL_INT
-#define _TO_BOOL_LIST 544
+#define _TO_BOOL_LIST 550
#define _TO_BOOL_NONE TO_BOOL_NONE
-#define _TO_BOOL_STR 545
+#define _TO_BOOL_STR 551
#define _UNARY_INVERT UNARY_INVERT
#define _UNARY_NEGATIVE UNARY_NEGATIVE
#define _UNARY_NOT UNARY_NOT
#define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 546
-#define _UNPACK_SEQUENCE_LIST 547
-#define _UNPACK_SEQUENCE_TUPLE 548
-#define _UNPACK_SEQUENCE_TWO_TUPLE 549
+#define _UNPACK_SEQUENCE 552
+#define _UNPACK_SEQUENCE_LIST 553
+#define _UNPACK_SEQUENCE_TUPLE 554
+#define _UNPACK_SEQUENCE_TWO_TUPLE 555
#define _WITH_EXCEPT_START WITH_EXCEPT_START
#define _YIELD_VALUE YIELD_VALUE
-#define MAX_UOP_ID 549
+#define MAX_UOP_ID 555
#ifdef __cplusplus
}
#include <stdint.h>
#include "pycore_uop_ids.h"
-extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];
+extern const uint32_t _PyUop_Flags[MAX_UOP_ID+1];
typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;
extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];
extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];
extern int _PyUop_num_popped(int opcode, int oparg);
#ifdef NEED_OPCODE_METADATA
-const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
+const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_NOP] = HAS_PURE_FLAG,
[_CHECK_PERIODIC] = HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_CHECK_PERIODIC_IF_NOT_YIELD_FROM] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_DELETE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_CALL_INTRINSIC_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_CALL_INTRINSIC_2] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
- [_RETURN_VALUE] = HAS_ESCAPES_FLAG,
+ [_RETURN_VALUE] = HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG,
[_GET_AITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_GET_ANEXT] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_GET_AWAITABLE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_SEND_GEN_FRAME] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
- [_YIELD_VALUE] = HAS_ARG_FLAG,
+ [_YIELD_VALUE] = HAS_ARG_FLAG | HAS_NEEDS_GUARD_IP_FLAG,
[_POP_EXCEPT] = HAS_ESCAPES_FLAG,
[_LOAD_COMMON_CONSTANT] = HAS_ARG_FLAG,
[_LOAD_BUILD_CLASS] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_INIT_CALL_PY_EXACT_ARGS_3] = HAS_PURE_FLAG,
[_INIT_CALL_PY_EXACT_ARGS_4] = HAS_PURE_FLAG,
[_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_PURE_FLAG,
- [_PUSH_FRAME] = 0,
+ [_PUSH_FRAME] = HAS_NEEDS_GUARD_IP_FLAG,
[_GUARD_NOS_NULL] = HAS_DEOPT_FLAG,
[_GUARD_NOS_NOT_NULL] = HAS_EXIT_FLAG,
[_GUARD_THIRD_NULL] = HAS_DEOPT_FLAG,
[_MAKE_CALLARGS_A_TUPLE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_MAKE_FUNCTION] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_SET_FUNCTION_ATTRIBUTE] = HAS_ARG_FLAG,
- [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
+ [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG,
[_BUILD_SLICE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_CHECK_STACK_SPACE_OPERAND] = HAS_DEOPT_FLAG,
[_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG,
[_EXIT_TRACE] = HAS_ESCAPES_FLAG,
+ [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG,
[_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
[_LOAD_CONST_INLINE] = HAS_PURE_FLAG,
[_POP_TOP_LOAD_CONST_INLINE] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_HANDLE_PENDING_AND_DEOPT] = HAS_ESCAPES_FLAG,
[_ERROR_POP_N] = HAS_ARG_FLAG,
[_TIER2_RESUME_CHECK] = HAS_PERIODIC_FLAG,
- [_COLD_EXIT] = HAS_ESCAPES_FLAG,
+ [_COLD_EXIT] = 0,
+ [_COLD_DYNAMIC_EXIT] = 0,
+ [_GUARD_IP__PUSH_FRAME] = HAS_EXIT_FLAG,
+ [_GUARD_IP_YIELD_VALUE] = HAS_EXIT_FLAG,
+ [_GUARD_IP_RETURN_VALUE] = HAS_EXIT_FLAG,
+ [_GUARD_IP_RETURN_GENERATOR] = HAS_EXIT_FLAG,
};
const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {
[_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE",
[_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND",
[_CHECK_VALIDITY] = "_CHECK_VALIDITY",
+ [_COLD_DYNAMIC_EXIT] = "_COLD_DYNAMIC_EXIT",
[_COLD_EXIT] = "_COLD_EXIT",
[_COMPARE_OP] = "_COMPARE_OP",
[_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT",
[_DEOPT] = "_DEOPT",
[_DICT_MERGE] = "_DICT_MERGE",
[_DICT_UPDATE] = "_DICT_UPDATE",
+ [_DYNAMIC_EXIT] = "_DYNAMIC_EXIT",
[_END_FOR] = "_END_FOR",
[_END_SEND] = "_END_SEND",
[_ERROR_POP_N] = "_ERROR_POP_N",
[_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT",
[_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT",
[_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION",
+ [_GUARD_IP_RETURN_GENERATOR] = "_GUARD_IP_RETURN_GENERATOR",
+ [_GUARD_IP_RETURN_VALUE] = "_GUARD_IP_RETURN_VALUE",
+ [_GUARD_IP_YIELD_VALUE] = "_GUARD_IP_YIELD_VALUE",
+ [_GUARD_IP__PUSH_FRAME] = "_GUARD_IP__PUSH_FRAME",
[_GUARD_IS_FALSE_POP] = "_GUARD_IS_FALSE_POP",
[_GUARD_IS_NONE_POP] = "_GUARD_IS_NONE_POP",
[_GUARD_IS_NOT_NONE_POP] = "_GUARD_IS_NOT_NONE_POP",
return 0;
case _EXIT_TRACE:
return 0;
+ case _DYNAMIC_EXIT:
+ return 0;
case _CHECK_VALIDITY:
return 0;
case _LOAD_CONST_INLINE:
return 0;
case _COLD_EXIT:
return 0;
+ case _COLD_DYNAMIC_EXIT:
+ return 0;
+ case _GUARD_IP__PUSH_FRAME:
+ return 0;
+ case _GUARD_IP_YIELD_VALUE:
+ return 0;
+ case _GUARD_IP_RETURN_VALUE:
+ return 0;
+ case _GUARD_IP_RETURN_GENERATOR:
+ return 0;
default:
return -1;
}
class NodeTransformerTests(ASTTestMixin, unittest.TestCase):
def assertASTTransformation(self, transformer_class,
- initial_code, expected_code):
- initial_ast = ast.parse(dedent(initial_code))
+ code, expected_code):
+ initial_ast = ast.parse(dedent(code))
expected_ast = ast.parse(dedent(expected_code))
transformer = transformer_class()
uops = get_opnames(ex)
self.assertIn("_FOR_ITER_TIER_TWO", uops)
- def test_confidence_score(self):
- def testfunc(n):
- bits = 0
- for i in range(n):
- if i & 0x01:
- bits += 1
- if i & 0x02:
- bits += 1
- if i&0x04:
- bits += 1
- if i&0x08:
- bits += 1
- if i&0x10:
- bits += 1
- return bits
-
- x = testfunc(TIER2_THRESHOLD * 2)
-
- self.assertEqual(x, TIER2_THRESHOLD * 5)
- ex = get_first_executor(testfunc)
- self.assertIsNotNone(ex)
- ops = list(iter_opnames(ex))
- #Since branch is 50/50 the trace could go either way.
- count = ops.count("_GUARD_IS_TRUE_POP") + ops.count("_GUARD_IS_FALSE_POP")
- self.assertLessEqual(count, 2)
-
@requires_specialization
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
self.assertLessEqual(len(guard_nos_unicode_count), 1)
self.assertIn("_COMPARE_OP_STR", uops)
- def test_type_inconsistency(self):
- ns = {}
- src = textwrap.dedent("""
- def testfunc(n):
- for i in range(n):
- x = _test_global + _test_global
- """)
- exec(src, ns, ns)
- testfunc = ns['testfunc']
- ns['_test_global'] = 0
- _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1)
- self.assertIsNone(ex)
- ns['_test_global'] = 1
- _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1)
- self.assertIsNotNone(ex)
- uops = get_opnames(ex)
- self.assertNotIn("_GUARD_TOS_INT", uops)
- self.assertNotIn("_GUARD_NOS_INT", uops)
- self.assertNotIn("_BINARY_OP_ADD_INT", uops)
- self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops)
- # Try again, but between the runs, set the global to a float.
- # This should result in no executor the second time.
- ns = {}
- exec(src, ns, ns)
- testfunc = ns['testfunc']
- ns['_test_global'] = 0
- _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1)
- self.assertIsNone(ex)
- ns['_test_global'] = 3.14
- _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1)
- self.assertIsNone(ex)
-
+ @unittest.skip("gh-139109 WIP")
def test_combine_stack_space_checks_sequential(self):
def dummy12(x):
return x - 1
largest_stack = _testinternalcapi.get_co_framesize(dummy13.__code__)
self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
+ @unittest.skip("gh-139109 WIP")
def test_combine_stack_space_checks_nested(self):
def dummy12(x):
return x + 3
)
self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
+ @unittest.skip("gh-139109 WIP")
def test_combine_stack_space_checks_several_calls(self):
def dummy12(x):
return x + 3
)
self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
+ @unittest.skip("gh-139109 WIP")
def test_combine_stack_space_checks_several_calls_different_order(self):
# same as `several_calls` but with top-level calls reversed
def dummy12(x):
)
self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
+ @unittest.skip("gh-139109 WIP")
def test_combine_stack_space_complex(self):
def dummy0(x):
return x
("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands
)
+ @unittest.skip("gh-139109 WIP")
def test_combine_stack_space_checks_large_framesize(self):
# Create a function with a large framesize. This ensures _CHECK_STACK_SPACE is
# actually doing its job. Note that the resulting trace hits
("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands
)
+ @unittest.skip("gh-139109 WIP")
def test_combine_stack_space_checks_recursion(self):
def dummy15(x):
while x > 0:
def frame_3_jit() -> None:
# JITs just before the last loop:
- for i in range(_testinternalcapi.TIER2_THRESHOLD + 1):
+ # 1 extra iteration for tracing.
+ for i in range(_testinternalcapi.TIER2_THRESHOLD + 2):
# Careful, doing this in the reverse order breaks tracing:
- expected = {enabled} and i == _testinternalcapi.TIER2_THRESHOLD
+ expected = {enabled} and i >= _testinternalcapi.TIER2_THRESHOLD + 1
assert sys._jit.is_active() is expected
frame_2_jit(expected)
assert sys._jit.is_active() is expected
--- /dev/null
+A new tracing frontend for the JIT compiler has been implemented. Patch by Ken Jin. Design for CPython by Ken Jin, Mark Shannon and Brandt Bucher.
}
if (PyModule_Add(module, "TIER2_THRESHOLD",
- PyLong_FromLong(JUMP_BACKWARD_INITIAL_VALUE + 1)) < 0) {
+ // + 1 more due to one loop spent on tracing.
+ PyLong_FromLong(JUMP_BACKWARD_INITIAL_VALUE + 2)) < 0) {
return 1;
}
PyMem_Free(co_extra);
}
#ifdef _Py_TIER2
+ _PyJit_Tracer_InvalidateDependency(tstate, self);
if (co->co_executors != NULL) {
clear_executors(co);
}
#include "frameobject.h" // PyFrameLocalsProxyObject
#include "opcode.h" // EXTENDED_ARG
+#include "pycore_optimizer.h"
#include "clinic/frameobject.c.h"
return -1;
}
- _Py_Executors_InvalidateDependency(PyInterpreterState_Get(), co, 1);
+#if _Py_TIER2
+ _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), co, 1);
+ _PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), co);
+#endif
_PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
_PyStackRef oldvalue = fast[i];
#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_stats.h"
#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS()
-
+#include "pycore_optimizer.h" // _PyJit_Tracer_InvalidateDependency
static const char *
func_event_name(PyFunction_WatchEvent event) {
if (_PyObject_ResurrectEnd(self)) {
return;
}
+#if _Py_TIER2
+ _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), self, 1);
+ _PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), self);
+#endif
_PyObject_GC_UNTRACK(op);
FT_CLEAR_WEAKREFS(self, op->func_weakreflist);
(void)func_clear((PyObject*)op);
JUMP_BACKWARD_JIT,
};
- tier1 op(_SPECIALIZE_JUMP_BACKWARD, (--)) {
- #if ENABLE_SPECIALIZATION_FT
+ specializing tier1 op(_SPECIALIZE_JUMP_BACKWARD, (--)) {
+ #if ENABLE_SPECIALIZATION
if (this_instr->op.code == JUMP_BACKWARD) {
uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT;
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired);
tier1 op(_JIT, (--)) {
#ifdef _Py_TIER2
_Py_BackoffCounter counter = this_instr[1].counter;
- if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT) {
- _Py_CODEUNIT *start = this_instr;
- /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
+ if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) &&
+ this_instr->op.code == JUMP_BACKWARD_JIT &&
+ next_instr->op.code != ENTER_EXECUTOR) {
+ /* Back up over EXTENDED_ARGs so executor is inserted at the correct place */
+ _Py_CODEUNIT *insert_exec_at = this_instr;
while (oparg > 255) {
oparg >>= 8;
- start--;
+ insert_exec_at--;
}
- _PyExecutorObject *executor;
- int optimized = _PyOptimizer_Optimize(frame, start, &executor, 0);
- if (optimized <= 0) {
- this_instr[1].counter = restart_backoff_counter(counter);
- ERROR_IF(optimized < 0);
+ int succ = _PyJit_TryInitializeTracing(tstate, frame, this_instr, insert_exec_at, next_instr, STACK_LEVEL(), 0, NULL, oparg);
+ if (succ) {
+ ENTER_TRACING();
}
else {
- this_instr[1].counter = initial_jump_backoff_counter();
- assert(tstate->current_executor == NULL);
- assert(executor != tstate->interp->cold_executor);
- tstate->jit_exit = NULL;
- TIER1_TO_TIER2(executor);
+ this_instr[1].counter = restart_backoff_counter(counter);
}
}
else {
tier1 inst(ENTER_EXECUTOR, (--)) {
#ifdef _Py_TIER2
+ if (IS_JIT_TRACING()) {
+ next_instr = this_instr;
+ goto stop_tracing;
+ }
PyCodeObject *code = _PyFrame_GetCode(frame);
_PyExecutorObject *executor = code->co_executors->executors[oparg & 255];
assert(executor->vm_data.index == INSTR_OFFSET() - 1);
macro(POP_JUMP_IF_NOT_NONE) = unused/1 + _IS_NONE + _POP_JUMP_IF_FALSE;
- tier1 inst(JUMP_BACKWARD_NO_INTERRUPT, (--)) {
+ replaced inst(JUMP_BACKWARD_NO_INTERRUPT, (--)) {
/* This bytecode is used in the `yield from` or `await` loop.
* If there is an interrupt, we want it handled in the innermost
* generator or coroutine, so we deliberately do not check it here.
tier2 op(_EXIT_TRACE, (exit_p/4 --)) {
_PyExitData *exit = (_PyExitData *)exit_p;
#if defined(Py_DEBUG) && !defined(_Py_JIT)
- _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
+ const _Py_CODEUNIT *target = ((frame->owner == FRAME_OWNED_BY_INTERPRETER)
+ ? _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame))
+ + exit->target;
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
- if (frame->lltrace >= 2) {
+ if (frame->lltrace >= 3) {
printf("SIDE EXIT: [UOp ");
_PyUOpPrint(&next_uop[-1]);
- printf(", exit %tu, temp %d, target %d -> %s]\n",
+ printf(", exit %tu, temp %d, target %d -> %s, is_control_flow %d]\n",
exit - current_executor->exits, exit->temperature.value_and_backoff,
(int)(target - _PyFrame_GetBytecode(frame)),
- _PyOpcode_OpName[target->op.code]);
+ _PyOpcode_OpName[target->op.code], exit->is_control_flow);
}
#endif
tstate->jit_exit = exit;
TIER2_TO_TIER2(exit->executor);
}
+ tier2 op(_DYNAMIC_EXIT, (exit_p/4 --)) {
+ #if defined(Py_DEBUG) && !defined(_Py_JIT)
+ _PyExitData *exit = (_PyExitData *)exit_p;
+ _Py_CODEUNIT *target = frame->instr_ptr;
+ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
+ if (frame->lltrace >= 3) {
+ printf("DYNAMIC EXIT: [UOp ");
+ _PyUOpPrint(&next_uop[-1]);
+ printf(", exit %tu, temp %d, target %d -> %s]\n",
+ exit - current_executor->exits, exit->temperature.value_and_backoff,
+ (int)(target - _PyFrame_GetBytecode(frame)),
+ _PyOpcode_OpName[target->op.code]);
+ }
+ #endif
+ // Disabled for now (gh-139109) as it slows down dynamic code tremendously.
+ // Compile and jump to the cold dynamic executors in the future.
+ GOTO_TIER_ONE(frame->instr_ptr);
+ }
+
tier2 op(_CHECK_VALIDITY, (--)) {
DEOPT_IF(!current_executor->vm_data.valid);
}
}
tier2 op(_DEOPT, (--)) {
- GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET());
+ GOTO_TIER_ONE((frame->owner == FRAME_OWNED_BY_INTERPRETER)
+ ? _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame) + CURRENT_TARGET());
}
tier2 op(_HANDLE_PENDING_AND_DEOPT, (--)) {
tier2 op(_COLD_EXIT, ( -- )) {
_PyExitData *exit = tstate->jit_exit;
assert(exit != NULL);
+ assert(frame->owner < FRAME_OWNED_BY_INTERPRETER);
_Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
_Py_BackoffCounter temperature = exit->temperature;
- if (!backoff_counter_triggers(temperature)) {
- exit->temperature = advance_backoff_counter(temperature);
- GOTO_TIER_ONE(target);
- }
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
PyCodeObject *code = _PyFrame_GetCode(frame);
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
+ assert(tstate->jit_exit == exit);
+ exit->executor = executor;
+ TIER2_TO_TIER2(exit->executor);
}
else {
+ if (!backoff_counter_triggers(temperature)) {
+ exit->temperature = advance_backoff_counter(temperature);
+ GOTO_TIER_ONE(target);
+ }
_PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit);
assert(tstate->current_executor == (PyObject *)previous_executor);
- int chain_depth = previous_executor->vm_data.chain_depth + 1;
- int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth);
- if (optimized <= 0) {
- exit->temperature = restart_backoff_counter(temperature);
- GOTO_TIER_ONE(optimized < 0 ? NULL : target);
+ // For control-flow guards, we don't want to increase the chain depth, as those don't actually
+ // represent deopts but rather just normal programs!
+ int chain_depth = previous_executor->vm_data.chain_depth + !exit->is_control_flow;
+ // Note: it's safe to use target->op.arg here instead of the oparg given by EXTENDED_ARG.
+ // The invariant in the optimizer is the deopt target always points back to the first EXTENDED_ARG.
+ // So setting it to anything else is wrong.
+ int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, target->op.arg);
+ exit->temperature = restart_backoff_counter(exit->temperature);
+ if (succ) {
+ GOTO_TIER_ONE_CONTINUE_TRACING(target);
}
- exit->temperature = initial_temperature_backoff_counter();
+ GOTO_TIER_ONE(target);
+ }
+ }
+
+ tier2 op(_COLD_DYNAMIC_EXIT, ( -- )) {
+ // TODO (gh-139109): This should be similar to _COLD_EXIT in the future.
+ _Py_CODEUNIT *target = frame->instr_ptr;
+ GOTO_TIER_ONE(target);
+ }
+
+ tier2 op(_GUARD_IP__PUSH_FRAME, (ip/4 --)) {
+ _Py_CODEUNIT *target = frame->instr_ptr + IP_OFFSET_OF(_PUSH_FRAME);
+ if (target != (_Py_CODEUNIT *)ip) {
+ frame->instr_ptr += IP_OFFSET_OF(_PUSH_FRAME);
+ EXIT_IF(true);
+ }
+ }
+
+ tier2 op(_GUARD_IP_YIELD_VALUE, (ip/4 --)) {
+ _Py_CODEUNIT *target = frame->instr_ptr + IP_OFFSET_OF(YIELD_VALUE);
+ if (target != (_Py_CODEUNIT *)ip) {
+ frame->instr_ptr += IP_OFFSET_OF(YIELD_VALUE);
+ EXIT_IF(true);
+ }
+ }
+
+ tier2 op(_GUARD_IP_RETURN_VALUE, (ip/4 --)) {
+ _Py_CODEUNIT *target = frame->instr_ptr + IP_OFFSET_OF(RETURN_VALUE);
+ if (target != (_Py_CODEUNIT *)ip) {
+ frame->instr_ptr += IP_OFFSET_OF(RETURN_VALUE);
+ EXIT_IF(true);
+ }
+ }
+
+ tier2 op(_GUARD_IP_RETURN_GENERATOR, (ip/4 --)) {
+ _Py_CODEUNIT *target = frame->instr_ptr + IP_OFFSET_OF(RETURN_GENERATOR);
+ if (target != (_Py_CODEUNIT *)ip) {
+ frame->instr_ptr += IP_OFFSET_OF(RETURN_GENERATOR);
+ EXIT_IF(true);
}
- assert(tstate->jit_exit == exit);
- exit->executor = executor;
- TIER2_TO_TIER2(exit->executor);
}
label(pop_2_error) {
DISPATCH();
}
+ label(record_previous_inst) {
+#if _Py_TIER2
+ assert(IS_JIT_TRACING());
+ int opcode = next_instr->op.code;
+ bool stop_tracing = (opcode == WITH_EXCEPT_START ||
+ opcode == RERAISE || opcode == CLEANUP_THROW ||
+ opcode == PUSH_EXC_INFO || opcode == INTERPRETER_EXIT);
+ int full = !_PyJit_translate_single_bytecode_to_trace(tstate, frame, next_instr, stop_tracing);
+ if (full) {
+ LEAVE_TRACING();
+ int err = stop_tracing_and_jit(tstate, frame);
+ ERROR_IF(err < 0);
+ DISPATCH_GOTO_NON_TRACING();
+ }
+ // Super instructions. Instruction deopted. There's a mismatch in what the stack expects
+ // in the optimizer. So we have to reflect in the trace correctly.
+ _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+ if ((_tstate->jit_tracer_state.prev_state.instr->op.code == CALL_LIST_APPEND &&
+ opcode == POP_TOP) ||
+ (_tstate->jit_tracer_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE &&
+ opcode == STORE_FAST)) {
+ _tstate->jit_tracer_state.prev_state.instr_is_super = true;
+ }
+ else {
+ _tstate->jit_tracer_state.prev_state.instr = next_instr;
+ }
+ PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
+ if (_tstate->jit_tracer_state.prev_state.instr_code != (PyCodeObject *)prev_code) {
+ Py_SETREF(_tstate->jit_tracer_state.prev_state.instr_code, (PyCodeObject*)Py_NewRef((prev_code)));
+ }
+
+ _tstate->jit_tracer_state.prev_state.instr_frame = frame;
+ _tstate->jit_tracer_state.prev_state.instr_oparg = oparg;
+ _tstate->jit_tracer_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL();
+ if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
+ (&next_instr[1])->counter = trigger_backoff_counter();
+ }
+ DISPATCH_GOTO_NON_TRACING();
+#else
+ Py_FatalError("JIT label executed in non-jit build.");
+#endif
+ }
+
+ label(stop_tracing) {
+#if _Py_TIER2
+ assert(IS_JIT_TRACING());
+ int opcode = next_instr->op.code;
+ _PyJit_translate_single_bytecode_to_trace(tstate, frame, NULL, true);
+ LEAVE_TRACING();
+ int err = stop_tracing_and_jit(tstate, frame);
+ ERROR_IF(err < 0);
+ DISPATCH_GOTO_NON_TRACING();
+#else
+ Py_FatalError("JIT label executed in non-jit build.");
+#endif
+ }
// END BYTECODES //
{ .op.code = RESUME, .op.arg = RESUME_OPARG_DEPTH1_MASK | RESUME_AT_FUNC_START }
};
+const _Py_CODEUNIT *_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR = (_Py_CODEUNIT*)&_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS;
+
#ifdef Py_DEBUG
extern void _PyUOpPrint(const _PyUOpInstruction *uop);
#endif
}
}
+#if _Py_TIER2
+// 0 for success, -1 for error.
+static int
+stop_tracing_and_jit(PyThreadState *tstate, _PyInterpreterFrame *frame)
+{
+ int _is_sys_tracing = (tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL);
+ int err = 0;
+ if (!_PyErr_Occurred(tstate) && !_is_sys_tracing) {
+ err = _PyOptimizer_Optimize(frame, tstate);
+ }
+ _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+ // Deal with backoffs
+ _PyExitData *exit = _tstate->jit_tracer_state.initial_state.exit;
+ if (exit == NULL) {
+ // We hold a strong reference to the code object, so the instruction won't be freed.
+ if (err <= 0) {
+ _Py_BackoffCounter counter = _tstate->jit_tracer_state.initial_state.jump_backward_instr[1].counter;
+ _tstate->jit_tracer_state.initial_state.jump_backward_instr[1].counter = restart_backoff_counter(counter);
+ }
+ else {
+ _tstate->jit_tracer_state.initial_state.jump_backward_instr[1].counter = initial_jump_backoff_counter();
+ }
+ }
+ else {
+ // Likewise, we hold a strong reference to the executor containing this exit, so the exit is guaranteed
+ // to be valid to access.
+ if (err <= 0) {
+ exit->temperature = restart_backoff_counter(exit->temperature);
+ }
+ else {
+ exit->temperature = initial_temperature_backoff_counter();
+ }
+ }
+ _PyJit_FinalizeTracing(tstate);
+ return err;
+}
+#endif
/* _PyEval_EvalFrameDefault is too large to optimize for speed with PGO on MSVC.
*/
stack_pointer = _PyFrame_GetStackPointer(frame);
#if _Py_TAIL_CALL_INTERP
# if Py_STATS
- return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, 0, lastopcode);
+ return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, instruction_funcptr_handler_table, 0, lastopcode);
# else
- return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, 0);
+ return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, instruction_funcptr_handler_table, 0);
# endif
#else
goto error;
#if _Py_TAIL_CALL_INTERP
# if Py_STATS
- return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, instruction_funcptr_table, 0, lastopcode);
+ return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, instruction_funcptr_handler_table, 0, lastopcode);
# else
- return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, instruction_funcptr_table, 0);
+ return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, instruction_funcptr_handler_table, 0);
# endif
#else
goto start_frame;
tier2_start:
next_uop = current_executor->trace;
- assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT);
+ assert(next_uop->opcode == _START_EXECUTOR ||
+ next_uop->opcode == _COLD_EXIT ||
+ next_uop->opcode == _COLD_DYNAMIC_EXIT);
#undef LOAD_IP
#define LOAD_IP(UNUSED) (void)0
uint64_t trace_uop_execution_counter = 0;
#endif
- assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT);
+ assert(next_uop->opcode == _START_EXECUTOR ||
+ next_uop->opcode == _COLD_EXIT ||
+ next_uop->opcode == _COLD_DYNAMIC_EXIT);
tier2_dispatch:
for (;;) {
uopcode = next_uop->opcode;
# define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);
+# define DISPATCH_TABLE_VAR instruction_funcptr_table
+# define DISPATCH_TABLE instruction_funcptr_handler_table
+# define TRACING_DISPATCH_TABLE instruction_funcptr_tracing_table
# define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
+
# define DISPATCH_GOTO() \
do { \
Py_MUSTTAIL return (((py_tail_call_funcptr *)instruction_funcptr_table)[opcode])(TAIL_CALL_ARGS); \
} while (0)
+# define DISPATCH_GOTO_NON_TRACING() \
+ do { \
+ Py_MUSTTAIL return (((py_tail_call_funcptr *)DISPATCH_TABLE)[opcode])(TAIL_CALL_ARGS); \
+ } while (0)
# define JUMP_TO_LABEL(name) \
do { \
Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \
# endif
# define LABEL(name) TARGET(name)
#elif USE_COMPUTED_GOTOS
+# define DISPATCH_TABLE_VAR opcode_targets
+# define DISPATCH_TABLE opcode_targets_table
+# define TRACING_DISPATCH_TABLE opcode_tracing_targets_table
# define TARGET(op) TARGET_##op:
# define DISPATCH_GOTO() goto *opcode_targets[opcode]
+# define DISPATCH_GOTO_NON_TRACING() goto *DISPATCH_TABLE[opcode];
# define JUMP_TO_LABEL(name) goto name;
# define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
# define LABEL(name) name:
#else
# define TARGET(op) case op: TARGET_##op:
# define DISPATCH_GOTO() goto dispatch_opcode
+# define DISPATCH_GOTO_NON_TRACING() goto dispatch_opcode
# define JUMP_TO_LABEL(name) goto name;
# define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
# define LABEL(name) name:
#endif
+#if (_Py_TAIL_CALL_INTERP || USE_COMPUTED_GOTOS) && _Py_TIER2
+# define IS_JIT_TRACING() (DISPATCH_TABLE_VAR == TRACING_DISPATCH_TABLE)
+# define ENTER_TRACING() \
+ DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE;
+# define LEAVE_TRACING() \
+ DISPATCH_TABLE_VAR = DISPATCH_TABLE;
+#else
+# define IS_JIT_TRACING() (0)
+# define ENTER_TRACING()
+# define LEAVE_TRACING()
+#endif
+
/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
#ifdef Py_DEBUG
#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
DISPATCH_GOTO(); \
}
+#define DISPATCH_NON_TRACING() \
+ { \
+ assert(frame->stackpointer == NULL); \
+ NEXTOPARG(); \
+ PRE_DISPATCH_GOTO(); \
+ DISPATCH_GOTO_NON_TRACING(); \
+ }
+
#define DISPATCH_SAME_OPARG() \
{ \
opcode = next_instr->op.code; \
PRE_DISPATCH_GOTO(); \
- DISPATCH_GOTO(); \
+ DISPATCH_GOTO_NON_TRACING(); \
}
#define DISPATCH_INLINED(NEW_FRAME) \
/* This takes a uint16_t instead of a _Py_BackoffCounter,
* because it is used directly on the cache entry in generated code,
* which is always an integral type. */
+// Force re-specialization when tracing a side exit to get good side exits.
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
backoff_counter_triggers(forge_backoff_counter((COUNTER)))
next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \
frame = tstate->current_frame; \
stack_pointer = _PyFrame_GetStackPointer(frame); \
+ int keep_tracing_bit = (uintptr_t)next_instr & 1; \
+ next_instr = (_Py_CODEUNIT *)(((uintptr_t)next_instr) & (~1)); \
if (next_instr == NULL) { \
/* gh-140104: The exception handler expects frame->instr_ptr
to after this_instr, not this_instr! */ \
next_instr = frame->instr_ptr + 1; \
JUMP_TO_LABEL(error); \
} \
+ if (keep_tracing_bit) { \
+ assert(((_PyThreadStateImpl *)tstate)->jit_tracer_state.prev_state.code_curr_size == 2); \
+ ENTER_TRACING(); \
+ DISPATCH_NON_TRACING(); \
+ } \
DISPATCH(); \
} while (0)
goto tier2_start; \
} while (0)
-#define GOTO_TIER_ONE(TARGET) \
- do \
- { \
- tstate->current_executor = NULL; \
- OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
- _PyFrame_SetStackPointer(frame, stack_pointer); \
- return TARGET; \
+#define GOTO_TIER_ONE_SETUP \
+ tstate->current_executor = NULL; \
+ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+
+#define GOTO_TIER_ONE(TARGET) \
+ do \
+ { \
+ GOTO_TIER_ONE_SETUP \
+ return (_Py_CODEUNIT *)(TARGET); \
+ } while (0)
+
+#define GOTO_TIER_ONE_CONTINUE_TRACING(TARGET) \
+ do \
+ { \
+ GOTO_TIER_ONE_SETUP \
+ return (_Py_CODEUNIT *)(((uintptr_t)(TARGET))| 1); \
} while (0)
#define CURRENT_OPARG() (next_uop[-1].oparg)
break;
}
+ /* _JUMP_BACKWARD_NO_INTERRUPT is not a viable micro-op for tier 2 because it is replaced */
+
case _GET_LEN: {
_PyStackRef obj;
_PyStackRef len;
PyObject *exit_p = (PyObject *)CURRENT_OPERAND0();
_PyExitData *exit = (_PyExitData *)exit_p;
#if defined(Py_DEBUG) && !defined(_Py_JIT)
- _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
+ const _Py_CODEUNIT *target = ((frame->owner == FRAME_OWNED_BY_INTERPRETER)
+ ? _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame))
+ + exit->target;
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
- if (frame->lltrace >= 2) {
+ if (frame->lltrace >= 3) {
_PyFrame_SetStackPointer(frame, stack_pointer);
printf("SIDE EXIT: [UOp ");
_PyUOpPrint(&next_uop[-1]);
- printf(", exit %tu, temp %d, target %d -> %s]\n",
+ printf(", exit %tu, temp %d, target %d -> %s, is_control_flow %d]\n",
exit - current_executor->exits, exit->temperature.value_and_backoff,
(int)(target - _PyFrame_GetBytecode(frame)),
- _PyOpcode_OpName[target->op.code]);
+ _PyOpcode_OpName[target->op.code], exit->is_control_flow);
stack_pointer = _PyFrame_GetStackPointer(frame);
}
#endif
break;
}
+ case _DYNAMIC_EXIT: {
+ PyObject *exit_p = (PyObject *)CURRENT_OPERAND0();
+ #if defined(Py_DEBUG) && !defined(_Py_JIT)
+ _PyExitData *exit = (_PyExitData *)exit_p;
+ _Py_CODEUNIT *target = frame->instr_ptr;
+ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
+ if (frame->lltrace >= 3) {
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ printf("DYNAMIC EXIT: [UOp ");
+ _PyUOpPrint(&next_uop[-1]);
+ printf(", exit %tu, temp %d, target %d -> %s]\n",
+ exit - current_executor->exits, exit->temperature.value_and_backoff,
+ (int)(target - _PyFrame_GetBytecode(frame)),
+ _PyOpcode_OpName[target->op.code]);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ }
+ #endif
+
+ GOTO_TIER_ONE(frame->instr_ptr);
+ break;
+ }
+
case _CHECK_VALIDITY: {
if (!current_executor->vm_data.valid) {
UOP_STAT_INC(uopcode, miss);
}
case _DEOPT: {
- GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET());
+ GOTO_TIER_ONE((frame->owner == FRAME_OWNED_BY_INTERPRETER)
+ ? _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame) + CURRENT_TARGET());
break;
}
case _COLD_EXIT: {
_PyExitData *exit = tstate->jit_exit;
assert(exit != NULL);
+ assert(frame->owner < FRAME_OWNED_BY_INTERPRETER);
_Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
_Py_BackoffCounter temperature = exit->temperature;
- if (!backoff_counter_triggers(temperature)) {
- exit->temperature = advance_backoff_counter(temperature);
- GOTO_TIER_ONE(target);
- }
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
PyCodeObject *code = _PyFrame_GetCode(frame);
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
+ assert(tstate->jit_exit == exit);
+ exit->executor = executor;
+ TIER2_TO_TIER2(exit->executor);
}
else {
- _PyFrame_SetStackPointer(frame, stack_pointer);
+ if (!backoff_counter_triggers(temperature)) {
+ exit->temperature = advance_backoff_counter(temperature);
+ GOTO_TIER_ONE(target);
+ }
_PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit);
- stack_pointer = _PyFrame_GetStackPointer(frame);
assert(tstate->current_executor == (PyObject *)previous_executor);
- int chain_depth = previous_executor->vm_data.chain_depth + 1;
- _PyFrame_SetStackPointer(frame, stack_pointer);
- int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth);
- stack_pointer = _PyFrame_GetStackPointer(frame);
- if (optimized <= 0) {
- exit->temperature = restart_backoff_counter(temperature);
- GOTO_TIER_ONE(optimized < 0 ? NULL : target);
+ int chain_depth = previous_executor->vm_data.chain_depth + !exit->is_control_flow;
+ int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, target->op.arg);
+ exit->temperature = restart_backoff_counter(exit->temperature);
+ if (succ) {
+ GOTO_TIER_ONE_CONTINUE_TRACING(target);
}
- exit->temperature = initial_temperature_backoff_counter();
+ GOTO_TIER_ONE(target);
}
- assert(tstate->jit_exit == exit);
- exit->executor = executor;
- TIER2_TO_TIER2(exit->executor);
break;
}
+ case _COLD_DYNAMIC_EXIT: {
+ _Py_CODEUNIT *target = frame->instr_ptr;
+ GOTO_TIER_ONE(target);
+ break;
+ }
+
+ case _GUARD_IP__PUSH_FRAME: {
+ #define OFFSET_OF__PUSH_FRAME ((0))
+ PyObject *ip = (PyObject *)CURRENT_OPERAND0();
+ _Py_CODEUNIT *target = frame->instr_ptr + OFFSET_OF__PUSH_FRAME;
+ if (target != (_Py_CODEUNIT *)ip) {
+ frame->instr_ptr += OFFSET_OF__PUSH_FRAME;
+ if (true) {
+ UOP_STAT_INC(uopcode, miss);
+ JUMP_TO_JUMP_TARGET();
+ }
+ }
+ #undef OFFSET_OF__PUSH_FRAME
+ break;
+ }
+
+ case _GUARD_IP_YIELD_VALUE: {
+ #define OFFSET_OF_YIELD_VALUE ((1+INLINE_CACHE_ENTRIES_SEND))
+ PyObject *ip = (PyObject *)CURRENT_OPERAND0();
+ _Py_CODEUNIT *target = frame->instr_ptr + OFFSET_OF_YIELD_VALUE;
+ if (target != (_Py_CODEUNIT *)ip) {
+ frame->instr_ptr += OFFSET_OF_YIELD_VALUE;
+ if (true) {
+ UOP_STAT_INC(uopcode, miss);
+ JUMP_TO_JUMP_TARGET();
+ }
+ }
+ #undef OFFSET_OF_YIELD_VALUE
+ break;
+ }
+
+ case _GUARD_IP_RETURN_VALUE: {
+ #define OFFSET_OF_RETURN_VALUE ((frame->return_offset))
+ PyObject *ip = (PyObject *)CURRENT_OPERAND0();
+ _Py_CODEUNIT *target = frame->instr_ptr + OFFSET_OF_RETURN_VALUE;
+ if (target != (_Py_CODEUNIT *)ip) {
+ frame->instr_ptr += OFFSET_OF_RETURN_VALUE;
+ if (true) {
+ UOP_STAT_INC(uopcode, miss);
+ JUMP_TO_JUMP_TARGET();
+ }
+ }
+ #undef OFFSET_OF_RETURN_VALUE
+ break;
+ }
+
+ case _GUARD_IP_RETURN_GENERATOR: {
+ #define OFFSET_OF_RETURN_GENERATOR ((frame->return_offset))
+ PyObject *ip = (PyObject *)CURRENT_OPERAND0();
+ _Py_CODEUNIT *target = frame->instr_ptr + OFFSET_OF_RETURN_GENERATOR;
+ if (target != (_Py_CODEUNIT *)ip) {
+ frame->instr_ptr += OFFSET_OF_RETURN_GENERATOR;
+ if (true) {
+ UOP_STAT_INC(uopcode, miss);
+ JUMP_TO_JUMP_TARGET();
+ }
+ }
+ #undef OFFSET_OF_RETURN_GENERATOR
+ break;
+ }
+
+
#undef TIER_TWO
INSTRUCTION_STATS(ENTER_EXECUTOR);
opcode = ENTER_EXECUTOR;
#ifdef _Py_TIER2
+ if (IS_JIT_TRACING()) {
+ next_instr = this_instr;
+ JUMP_TO_LABEL(stop_tracing);
+ }
PyCodeObject *code = _PyFrame_GetCode(frame);
_PyExecutorObject *executor = code->co_executors->executors[oparg & 255];
assert(executor->vm_data.index == INSTR_OFFSET() - 1);
/* Skip 1 cache entry */
// _SPECIALIZE_JUMP_BACKWARD
{
- #if ENABLE_SPECIALIZATION_FT
+ #if ENABLE_SPECIALIZATION
if (this_instr->op.code == JUMP_BACKWARD) {
uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT;
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired);
{
#ifdef _Py_TIER2
_Py_BackoffCounter counter = this_instr[1].counter;
- if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT) {
- _Py_CODEUNIT *start = this_instr;
+ if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) &&
+ this_instr->op.code == JUMP_BACKWARD_JIT &&
+ next_instr->op.code != ENTER_EXECUTOR) {
+ _Py_CODEUNIT *insert_exec_at = this_instr;
while (oparg > 255) {
oparg >>= 8;
- start--;
+ insert_exec_at--;
}
- _PyExecutorObject *executor;
- _PyFrame_SetStackPointer(frame, stack_pointer);
- int optimized = _PyOptimizer_Optimize(frame, start, &executor, 0);
- stack_pointer = _PyFrame_GetStackPointer(frame);
- if (optimized <= 0) {
- this_instr[1].counter = restart_backoff_counter(counter);
- if (optimized < 0) {
- JUMP_TO_LABEL(error);
- }
+ int succ = _PyJit_TryInitializeTracing(tstate, frame, this_instr, insert_exec_at, next_instr, STACK_LEVEL(), 0, NULL, oparg);
+ if (succ) {
+ ENTER_TRACING();
}
else {
- _PyFrame_SetStackPointer(frame, stack_pointer);
- this_instr[1].counter = initial_jump_backoff_counter();
- stack_pointer = _PyFrame_GetStackPointer(frame);
- assert(tstate->current_executor == NULL);
- assert(executor != tstate->interp->cold_executor);
- tstate->jit_exit = NULL;
- TIER1_TO_TIER2(executor);
+ this_instr[1].counter = restart_backoff_counter(counter);
}
}
else {
DISPATCH();
}
+ LABEL(record_previous_inst)
+ {
+ #if _Py_TIER2
+ assert(IS_JIT_TRACING());
+ int opcode = next_instr->op.code;
+ bool stop_tracing = (opcode == WITH_EXCEPT_START ||
+ opcode == RERAISE || opcode == CLEANUP_THROW ||
+ opcode == PUSH_EXC_INFO || opcode == INTERPRETER_EXIT);
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ int full = !_PyJit_translate_single_bytecode_to_trace(tstate, frame, next_instr, stop_tracing);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ if (full) {
+ LEAVE_TRACING();
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ int err = stop_tracing_and_jit(tstate, frame);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ if (err < 0) {
+ JUMP_TO_LABEL(error);
+ }
+ DISPATCH_GOTO_NON_TRACING();
+ }
+ _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+ if ((_tstate->jit_tracer_state.prev_state.instr->op.code == CALL_LIST_APPEND &&
+ opcode == POP_TOP) ||
+ (_tstate->jit_tracer_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE &&
+ opcode == STORE_FAST)) {
+ _tstate->jit_tracer_state.prev_state.instr_is_super = true;
+ }
+ else {
+ _tstate->jit_tracer_state.prev_state.instr = next_instr;
+ }
+ PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
+ if (_tstate->jit_tracer_state.prev_state.instr_code != (PyCodeObject *)prev_code) {
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ Py_SETREF(_tstate->jit_tracer_state.prev_state.instr_code, (PyCodeObject*)Py_NewRef((prev_code)));
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ }
+ _tstate->jit_tracer_state.prev_state.instr_frame = frame;
+ _tstate->jit_tracer_state.prev_state.instr_oparg = oparg;
+ _tstate->jit_tracer_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL();
+ if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
+ (&next_instr[1])->counter = trigger_backoff_counter();
+ }
+ DISPATCH_GOTO_NON_TRACING();
+ #else
+ Py_FatalError("JIT label executed in non-jit build.");
+ #endif
+ }
+
+ LABEL(stop_tracing)
+ {
+ #if _Py_TIER2
+ assert(IS_JIT_TRACING());
+ int opcode = next_instr->op.code;
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ _PyJit_translate_single_bytecode_to_trace(tstate, frame, NULL, true);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ LEAVE_TRACING();
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ int err = stop_tracing_and_jit(tstate, frame);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ if (err < 0) {
+ JUMP_TO_LABEL(error);
+ }
+ DISPATCH_GOTO_NON_TRACING();
+ #else
+ Py_FatalError("JIT label executed in non-jit build.");
+ #endif
+ }
+
/* END LABELS */
#undef TIER_ONE
#include "pycore_tuple.h" // _PyTuple_FromArraySteal()
#include "opcode_ids.h"
+#include "pycore_optimizer.h"
/* Uncomment this to dump debugging output when assertions fail */
_PyCode_Clear_Executors(code);
}
_Py_Executors_InvalidateDependency(interp, code, 1);
+ _PyJit_Tracer_InvalidateDependency(PyThreadState_GET(), code);
#endif
int code_len = (int)Py_SIZE(code);
/* Exit early to avoid creating instrumentation
unsigned char *code = memory;
state.trampolines.mem = memory + code_size;
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
- assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT);
+ assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT || trace[0].opcode == _COLD_DYNAMIC_EXIT);
for (size_t i = 0; i < length; i++) {
const _PyUOpInstruction *instruction = &trace[i];
group = &stencil_groups[instruction->opcode];
&&TARGET_INSTRUMENTED_LINE,
&&TARGET_ENTER_EXECUTOR,
};
+#if _Py_TIER2
+static void *opcode_tracing_targets_table[256] = {
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&_unknown_opcode,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+ &&record_previous_inst,
+};
+#endif
#else /* _Py_TAIL_CALL_INTERP */
-static py_tail_call_funcptr instruction_funcptr_table[256];
+static py_tail_call_funcptr instruction_funcptr_handler_table[256];
+
+static py_tail_call_funcptr instruction_funcptr_tracing_table[256];
Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_2_error(TAIL_CALL_PARAMS);
Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_1_error(TAIL_CALL_PARAMS);
Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_exception_unwind(TAIL_CALL_PARAMS);
Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_exit_unwind(TAIL_CALL_PARAMS);
Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_start_frame(TAIL_CALL_PARAMS);
+Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_record_previous_inst(TAIL_CALL_PARAMS);
+Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_stop_tracing(TAIL_CALL_PARAMS);
Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_BINARY_OP(TAIL_CALL_PARAMS);
Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_BINARY_OP_ADD_FLOAT(TAIL_CALL_PARAMS);
JUMP_TO_LABEL(error);
}
-static py_tail_call_funcptr instruction_funcptr_table[256] = {
+static py_tail_call_funcptr instruction_funcptr_handler_table[256] = {
[BINARY_OP] = _TAIL_CALL_BINARY_OP,
[BINARY_OP_ADD_FLOAT] = _TAIL_CALL_BINARY_OP_ADD_FLOAT,
[BINARY_OP_ADD_INT] = _TAIL_CALL_BINARY_OP_ADD_INT,
[232] = _TAIL_CALL_UNKNOWN_OPCODE,
[233] = _TAIL_CALL_UNKNOWN_OPCODE,
};
+static py_tail_call_funcptr instruction_funcptr_tracing_table[256] = {
+ [BINARY_OP] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_ADD_FLOAT] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_ADD_INT] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_ADD_UNICODE] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_EXTEND] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_INPLACE_ADD_UNICODE] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_MULTIPLY_FLOAT] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_MULTIPLY_INT] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_SUBSCR_DICT] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_SUBSCR_GETITEM] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_SUBSCR_LIST_INT] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_SUBSCR_LIST_SLICE] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_SUBSCR_STR_INT] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_SUBSCR_TUPLE_INT] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_SUBTRACT_FLOAT] = _TAIL_CALL_record_previous_inst,
+ [BINARY_OP_SUBTRACT_INT] = _TAIL_CALL_record_previous_inst,
+ [BINARY_SLICE] = _TAIL_CALL_record_previous_inst,
+ [BUILD_INTERPOLATION] = _TAIL_CALL_record_previous_inst,
+ [BUILD_LIST] = _TAIL_CALL_record_previous_inst,
+ [BUILD_MAP] = _TAIL_CALL_record_previous_inst,
+ [BUILD_SET] = _TAIL_CALL_record_previous_inst,
+ [BUILD_SLICE] = _TAIL_CALL_record_previous_inst,
+ [BUILD_STRING] = _TAIL_CALL_record_previous_inst,
+ [BUILD_TEMPLATE] = _TAIL_CALL_record_previous_inst,
+ [BUILD_TUPLE] = _TAIL_CALL_record_previous_inst,
+ [CACHE] = _TAIL_CALL_record_previous_inst,
+ [CALL] = _TAIL_CALL_record_previous_inst,
+ [CALL_ALLOC_AND_ENTER_INIT] = _TAIL_CALL_record_previous_inst,
+ [CALL_BOUND_METHOD_EXACT_ARGS] = _TAIL_CALL_record_previous_inst,
+ [CALL_BOUND_METHOD_GENERAL] = _TAIL_CALL_record_previous_inst,
+ [CALL_BUILTIN_CLASS] = _TAIL_CALL_record_previous_inst,
+ [CALL_BUILTIN_FAST] = _TAIL_CALL_record_previous_inst,
+ [CALL_BUILTIN_FAST_WITH_KEYWORDS] = _TAIL_CALL_record_previous_inst,
+ [CALL_BUILTIN_O] = _TAIL_CALL_record_previous_inst,
+ [CALL_FUNCTION_EX] = _TAIL_CALL_record_previous_inst,
+ [CALL_INTRINSIC_1] = _TAIL_CALL_record_previous_inst,
+ [CALL_INTRINSIC_2] = _TAIL_CALL_record_previous_inst,
+ [CALL_ISINSTANCE] = _TAIL_CALL_record_previous_inst,
+ [CALL_KW] = _TAIL_CALL_record_previous_inst,
+ [CALL_KW_BOUND_METHOD] = _TAIL_CALL_record_previous_inst,
+ [CALL_KW_NON_PY] = _TAIL_CALL_record_previous_inst,
+ [CALL_KW_PY] = _TAIL_CALL_record_previous_inst,
+ [CALL_LEN] = _TAIL_CALL_record_previous_inst,
+ [CALL_LIST_APPEND] = _TAIL_CALL_record_previous_inst,
+ [CALL_METHOD_DESCRIPTOR_FAST] = _TAIL_CALL_record_previous_inst,
+ [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = _TAIL_CALL_record_previous_inst,
+ [CALL_METHOD_DESCRIPTOR_NOARGS] = _TAIL_CALL_record_previous_inst,
+ [CALL_METHOD_DESCRIPTOR_O] = _TAIL_CALL_record_previous_inst,
+ [CALL_NON_PY_GENERAL] = _TAIL_CALL_record_previous_inst,
+ [CALL_PY_EXACT_ARGS] = _TAIL_CALL_record_previous_inst,
+ [CALL_PY_GENERAL] = _TAIL_CALL_record_previous_inst,
+ [CALL_STR_1] = _TAIL_CALL_record_previous_inst,
+ [CALL_TUPLE_1] = _TAIL_CALL_record_previous_inst,
+ [CALL_TYPE_1] = _TAIL_CALL_record_previous_inst,
+ [CHECK_EG_MATCH] = _TAIL_CALL_record_previous_inst,
+ [CHECK_EXC_MATCH] = _TAIL_CALL_record_previous_inst,
+ [CLEANUP_THROW] = _TAIL_CALL_record_previous_inst,
+ [COMPARE_OP] = _TAIL_CALL_record_previous_inst,
+ [COMPARE_OP_FLOAT] = _TAIL_CALL_record_previous_inst,
+ [COMPARE_OP_INT] = _TAIL_CALL_record_previous_inst,
+ [COMPARE_OP_STR] = _TAIL_CALL_record_previous_inst,
+ [CONTAINS_OP] = _TAIL_CALL_record_previous_inst,
+ [CONTAINS_OP_DICT] = _TAIL_CALL_record_previous_inst,
+ [CONTAINS_OP_SET] = _TAIL_CALL_record_previous_inst,
+ [CONVERT_VALUE] = _TAIL_CALL_record_previous_inst,
+ [COPY] = _TAIL_CALL_record_previous_inst,
+ [COPY_FREE_VARS] = _TAIL_CALL_record_previous_inst,
+ [DELETE_ATTR] = _TAIL_CALL_record_previous_inst,
+ [DELETE_DEREF] = _TAIL_CALL_record_previous_inst,
+ [DELETE_FAST] = _TAIL_CALL_record_previous_inst,
+ [DELETE_GLOBAL] = _TAIL_CALL_record_previous_inst,
+ [DELETE_NAME] = _TAIL_CALL_record_previous_inst,
+ [DELETE_SUBSCR] = _TAIL_CALL_record_previous_inst,
+ [DICT_MERGE] = _TAIL_CALL_record_previous_inst,
+ [DICT_UPDATE] = _TAIL_CALL_record_previous_inst,
+ [END_ASYNC_FOR] = _TAIL_CALL_record_previous_inst,
+ [END_FOR] = _TAIL_CALL_record_previous_inst,
+ [END_SEND] = _TAIL_CALL_record_previous_inst,
+ [ENTER_EXECUTOR] = _TAIL_CALL_record_previous_inst,
+ [EXIT_INIT_CHECK] = _TAIL_CALL_record_previous_inst,
+ [EXTENDED_ARG] = _TAIL_CALL_record_previous_inst,
+ [FORMAT_SIMPLE] = _TAIL_CALL_record_previous_inst,
+ [FORMAT_WITH_SPEC] = _TAIL_CALL_record_previous_inst,
+ [FOR_ITER] = _TAIL_CALL_record_previous_inst,
+ [FOR_ITER_GEN] = _TAIL_CALL_record_previous_inst,
+ [FOR_ITER_LIST] = _TAIL_CALL_record_previous_inst,
+ [FOR_ITER_RANGE] = _TAIL_CALL_record_previous_inst,
+ [FOR_ITER_TUPLE] = _TAIL_CALL_record_previous_inst,
+ [GET_AITER] = _TAIL_CALL_record_previous_inst,
+ [GET_ANEXT] = _TAIL_CALL_record_previous_inst,
+ [GET_AWAITABLE] = _TAIL_CALL_record_previous_inst,
+ [GET_ITER] = _TAIL_CALL_record_previous_inst,
+ [GET_LEN] = _TAIL_CALL_record_previous_inst,
+ [GET_YIELD_FROM_ITER] = _TAIL_CALL_record_previous_inst,
+ [IMPORT_FROM] = _TAIL_CALL_record_previous_inst,
+ [IMPORT_NAME] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_CALL] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_CALL_FUNCTION_EX] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_CALL_KW] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_END_ASYNC_FOR] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_END_FOR] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_END_SEND] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_FOR_ITER] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_INSTRUCTION] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_JUMP_BACKWARD] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_JUMP_FORWARD] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_LINE] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_LOAD_SUPER_ATTR] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_NOT_TAKEN] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_POP_ITER] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_POP_JUMP_IF_FALSE] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_POP_JUMP_IF_NONE] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_POP_JUMP_IF_NOT_NONE] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_POP_JUMP_IF_TRUE] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_RESUME] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_RETURN_VALUE] = _TAIL_CALL_record_previous_inst,
+ [INSTRUMENTED_YIELD_VALUE] = _TAIL_CALL_record_previous_inst,
+ [INTERPRETER_EXIT] = _TAIL_CALL_record_previous_inst,
+ [IS_OP] = _TAIL_CALL_record_previous_inst,
+ [JUMP_BACKWARD] = _TAIL_CALL_record_previous_inst,
+ [JUMP_BACKWARD_JIT] = _TAIL_CALL_record_previous_inst,
+ [JUMP_BACKWARD_NO_INTERRUPT] = _TAIL_CALL_record_previous_inst,
+ [JUMP_BACKWARD_NO_JIT] = _TAIL_CALL_record_previous_inst,
+ [JUMP_FORWARD] = _TAIL_CALL_record_previous_inst,
+ [LIST_APPEND] = _TAIL_CALL_record_previous_inst,
+ [LIST_EXTEND] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_CLASS] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_INSTANCE_VALUE] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_METHOD_LAZY_DICT] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_METHOD_NO_DICT] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_METHOD_WITH_VALUES] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_MODULE] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_PROPERTY] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_SLOT] = _TAIL_CALL_record_previous_inst,
+ [LOAD_ATTR_WITH_HINT] = _TAIL_CALL_record_previous_inst,
+ [LOAD_BUILD_CLASS] = _TAIL_CALL_record_previous_inst,
+ [LOAD_COMMON_CONSTANT] = _TAIL_CALL_record_previous_inst,
+ [LOAD_CONST] = _TAIL_CALL_record_previous_inst,
+ [LOAD_DEREF] = _TAIL_CALL_record_previous_inst,
+ [LOAD_FAST] = _TAIL_CALL_record_previous_inst,
+ [LOAD_FAST_AND_CLEAR] = _TAIL_CALL_record_previous_inst,
+ [LOAD_FAST_BORROW] = _TAIL_CALL_record_previous_inst,
+ [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = _TAIL_CALL_record_previous_inst,
+ [LOAD_FAST_CHECK] = _TAIL_CALL_record_previous_inst,
+ [LOAD_FAST_LOAD_FAST] = _TAIL_CALL_record_previous_inst,
+ [LOAD_FROM_DICT_OR_DEREF] = _TAIL_CALL_record_previous_inst,
+ [LOAD_FROM_DICT_OR_GLOBALS] = _TAIL_CALL_record_previous_inst,
+ [LOAD_GLOBAL] = _TAIL_CALL_record_previous_inst,
+ [LOAD_GLOBAL_BUILTIN] = _TAIL_CALL_record_previous_inst,
+ [LOAD_GLOBAL_MODULE] = _TAIL_CALL_record_previous_inst,
+ [LOAD_LOCALS] = _TAIL_CALL_record_previous_inst,
+ [LOAD_NAME] = _TAIL_CALL_record_previous_inst,
+ [LOAD_SMALL_INT] = _TAIL_CALL_record_previous_inst,
+ [LOAD_SPECIAL] = _TAIL_CALL_record_previous_inst,
+ [LOAD_SUPER_ATTR] = _TAIL_CALL_record_previous_inst,
+ [LOAD_SUPER_ATTR_ATTR] = _TAIL_CALL_record_previous_inst,
+ [LOAD_SUPER_ATTR_METHOD] = _TAIL_CALL_record_previous_inst,
+ [MAKE_CELL] = _TAIL_CALL_record_previous_inst,
+ [MAKE_FUNCTION] = _TAIL_CALL_record_previous_inst,
+ [MAP_ADD] = _TAIL_CALL_record_previous_inst,
+ [MATCH_CLASS] = _TAIL_CALL_record_previous_inst,
+ [MATCH_KEYS] = _TAIL_CALL_record_previous_inst,
+ [MATCH_MAPPING] = _TAIL_CALL_record_previous_inst,
+ [MATCH_SEQUENCE] = _TAIL_CALL_record_previous_inst,
+ [NOP] = _TAIL_CALL_record_previous_inst,
+ [NOT_TAKEN] = _TAIL_CALL_record_previous_inst,
+ [POP_EXCEPT] = _TAIL_CALL_record_previous_inst,
+ [POP_ITER] = _TAIL_CALL_record_previous_inst,
+ [POP_JUMP_IF_FALSE] = _TAIL_CALL_record_previous_inst,
+ [POP_JUMP_IF_NONE] = _TAIL_CALL_record_previous_inst,
+ [POP_JUMP_IF_NOT_NONE] = _TAIL_CALL_record_previous_inst,
+ [POP_JUMP_IF_TRUE] = _TAIL_CALL_record_previous_inst,
+ [POP_TOP] = _TAIL_CALL_record_previous_inst,
+ [PUSH_EXC_INFO] = _TAIL_CALL_record_previous_inst,
+ [PUSH_NULL] = _TAIL_CALL_record_previous_inst,
+ [RAISE_VARARGS] = _TAIL_CALL_record_previous_inst,
+ [RERAISE] = _TAIL_CALL_record_previous_inst,
+ [RESERVED] = _TAIL_CALL_record_previous_inst,
+ [RESUME] = _TAIL_CALL_record_previous_inst,
+ [RESUME_CHECK] = _TAIL_CALL_record_previous_inst,
+ [RETURN_GENERATOR] = _TAIL_CALL_record_previous_inst,
+ [RETURN_VALUE] = _TAIL_CALL_record_previous_inst,
+ [SEND] = _TAIL_CALL_record_previous_inst,
+ [SEND_GEN] = _TAIL_CALL_record_previous_inst,
+ [SETUP_ANNOTATIONS] = _TAIL_CALL_record_previous_inst,
+ [SET_ADD] = _TAIL_CALL_record_previous_inst,
+ [SET_FUNCTION_ATTRIBUTE] = _TAIL_CALL_record_previous_inst,
+ [SET_UPDATE] = _TAIL_CALL_record_previous_inst,
+ [STORE_ATTR] = _TAIL_CALL_record_previous_inst,
+ [STORE_ATTR_INSTANCE_VALUE] = _TAIL_CALL_record_previous_inst,
+ [STORE_ATTR_SLOT] = _TAIL_CALL_record_previous_inst,
+ [STORE_ATTR_WITH_HINT] = _TAIL_CALL_record_previous_inst,
+ [STORE_DEREF] = _TAIL_CALL_record_previous_inst,
+ [STORE_FAST] = _TAIL_CALL_record_previous_inst,
+ [STORE_FAST_LOAD_FAST] = _TAIL_CALL_record_previous_inst,
+ [STORE_FAST_STORE_FAST] = _TAIL_CALL_record_previous_inst,
+ [STORE_GLOBAL] = _TAIL_CALL_record_previous_inst,
+ [STORE_NAME] = _TAIL_CALL_record_previous_inst,
+ [STORE_SLICE] = _TAIL_CALL_record_previous_inst,
+ [STORE_SUBSCR] = _TAIL_CALL_record_previous_inst,
+ [STORE_SUBSCR_DICT] = _TAIL_CALL_record_previous_inst,
+ [STORE_SUBSCR_LIST_INT] = _TAIL_CALL_record_previous_inst,
+ [SWAP] = _TAIL_CALL_record_previous_inst,
+ [TO_BOOL] = _TAIL_CALL_record_previous_inst,
+ [TO_BOOL_ALWAYS_TRUE] = _TAIL_CALL_record_previous_inst,
+ [TO_BOOL_BOOL] = _TAIL_CALL_record_previous_inst,
+ [TO_BOOL_INT] = _TAIL_CALL_record_previous_inst,
+ [TO_BOOL_LIST] = _TAIL_CALL_record_previous_inst,
+ [TO_BOOL_NONE] = _TAIL_CALL_record_previous_inst,
+ [TO_BOOL_STR] = _TAIL_CALL_record_previous_inst,
+ [UNARY_INVERT] = _TAIL_CALL_record_previous_inst,
+ [UNARY_NEGATIVE] = _TAIL_CALL_record_previous_inst,
+ [UNARY_NOT] = _TAIL_CALL_record_previous_inst,
+ [UNPACK_EX] = _TAIL_CALL_record_previous_inst,
+ [UNPACK_SEQUENCE] = _TAIL_CALL_record_previous_inst,
+ [UNPACK_SEQUENCE_LIST] = _TAIL_CALL_record_previous_inst,
+ [UNPACK_SEQUENCE_TUPLE] = _TAIL_CALL_record_previous_inst,
+ [UNPACK_SEQUENCE_TWO_TUPLE] = _TAIL_CALL_record_previous_inst,
+ [WITH_EXCEPT_START] = _TAIL_CALL_record_previous_inst,
+ [YIELD_VALUE] = _TAIL_CALL_record_previous_inst,
+ [121] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [122] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [123] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [124] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [125] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [126] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [127] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [210] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [211] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [212] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [213] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [214] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [215] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [216] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [217] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [218] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [219] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [220] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [221] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [222] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [223] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [224] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [225] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [226] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [227] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [228] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [229] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [230] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [231] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [232] = _TAIL_CALL_UNKNOWN_OPCODE,
+ [233] = _TAIL_CALL_UNKNOWN_OPCODE,
+};
#endif /* _Py_TAIL_CALL_INTERP */
#define MAX_EXECUTORS_SIZE 256
+// Trace too short, no progress:
+// _START_EXECUTOR
+// _MAKE_WARM
+// _CHECK_VALIDITY
+// _SET_IP
+// is 4-5 instructions.
+#define CODE_SIZE_NO_PROGRESS 5
+// We start with _START_EXECUTOR, _MAKE_WARM
+#define CODE_SIZE_EMPTY 2
+
#define _PyExecutorObject_CAST(op) ((_PyExecutorObject *)(op))
static bool
has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr)
{
+ if (code == (PyCodeObject *)&_Py_InitCleanup) {
+ return false;
+ }
if (instr->op.code == ENTER_EXECUTOR) {
return true;
}
}
static _PyExecutorObject *
-make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies);
+make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies, int chain_depth);
static int
-uop_optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *instr,
- _PyExecutorObject **exec_ptr, int curr_stackentries,
+uop_optimize(_PyInterpreterFrame *frame, PyThreadState *tstate,
+ _PyExecutorObject **exec_ptr,
bool progress_needed);
/* Returns 1 if optimized, 0 if not optimized, and -1 for an error.
// gh-137573: inlining this function causes stack overflows
Py_NO_INLINE int
_PyOptimizer_Optimize(
- _PyInterpreterFrame *frame, _Py_CODEUNIT *start,
- _PyExecutorObject **executor_ptr, int chain_depth)
+ _PyInterpreterFrame *frame, PyThreadState *tstate)
{
- _PyStackRef *stack_pointer = frame->stackpointer;
+ _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+ int chain_depth = _tstate->jit_tracer_state.initial_state.chain_depth;
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!interp->jit) {
// gh-140936: It is possible that interp->jit will become false during
return 0;
}
assert(!interp->compiling);
+ assert(_tstate->jit_tracer_state.initial_state.stack_depth >= 0);
#ifndef Py_GIL_DISABLED
+ assert(_tstate->jit_tracer_state.initial_state.func != NULL);
interp->compiling = true;
// The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must*
// make progress in order to avoid infinite loops or excessively-long
// this is true, since a deopt won't infinitely re-enter the executor:
chain_depth %= MAX_CHAIN_DEPTH;
bool progress_needed = chain_depth == 0;
- PyCodeObject *code = _PyFrame_GetCode(frame);
- assert(PyCode_Check(code));
+ PyCodeObject *code = (PyCodeObject *)_tstate->jit_tracer_state.initial_state.code;
+ _Py_CODEUNIT *start = _tstate->jit_tracer_state.initial_state.start_instr;
if (progress_needed && !has_space_for_executor(code, start)) {
interp->compiling = false;
return 0;
}
- int err = uop_optimize(frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed);
+ // One of our dependencies while tracing was invalidated. Not worth compiling.
+ if (!_tstate->jit_tracer_state.prev_state.dependencies_still_valid) {
+ interp->compiling = false;
+ return 0;
+ }
+ _PyExecutorObject *executor;
+ int err = uop_optimize(frame, tstate, &executor, progress_needed);
if (err <= 0) {
interp->compiling = false;
return err;
}
- assert(*executor_ptr != NULL);
+ assert(executor != NULL);
if (progress_needed) {
int index = get_index_for_executor(code, start);
if (index < 0) {
* If an optimizer has already produced an executor,
* it might get confused by the executor disappearing,
* but there is not much we can do about that here. */
- Py_DECREF(*executor_ptr);
+ Py_DECREF(executor);
interp->compiling = false;
return 0;
}
- insert_executor(code, start, index, *executor_ptr);
+ insert_executor(code, start, index, executor);
}
else {
- (*executor_ptr)->vm_data.code = NULL;
+ executor->vm_data.code = NULL;
+ }
+ _PyExitData *exit = _tstate->jit_tracer_state.initial_state.exit;
+ if (exit != NULL) {
+ exit->executor = executor;
}
- (*executor_ptr)->vm_data.chain_depth = chain_depth;
- assert((*executor_ptr)->vm_data.valid);
+ executor->vm_data.chain_depth = chain_depth;
+ assert(executor->vm_data.valid);
interp->compiling = false;
return 1;
#else
[POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP,
};
+static const uint16_t
+guard_ip_uop[MAX_UOP_ID + 1] = {
+ [_PUSH_FRAME] = _GUARD_IP__PUSH_FRAME,
+ [_RETURN_GENERATOR] = _GUARD_IP_RETURN_GENERATOR,
+ [_RETURN_VALUE] = _GUARD_IP_RETURN_VALUE,
+ [_YIELD_VALUE] = _GUARD_IP_YIELD_VALUE,
+};
+
#define CONFIDENCE_RANGE 1000
#define CONFIDENCE_CUTOFF 333
DPRINTF(2, "No room for %s (need %d, got %d)\n", \
(opname), (n), max_length - trace_length); \
OPT_STAT_INC(trace_too_long); \
- goto done; \
- }
-
-// Reserve space for N uops, plus 3 for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE
-#define RESERVE(needed) RESERVE_RAW((needed) + 3, _PyUOpName(opcode))
-
-// Trace stack operations (used by _PUSH_FRAME, _RETURN_VALUE)
-#define TRACE_STACK_PUSH() \
- if (trace_stack_depth >= TRACE_STACK_SIZE) { \
- DPRINTF(2, "Trace stack overflow\n"); \
- OPT_STAT_INC(trace_stack_overflow); \
- return 0; \
- } \
- assert(func == NULL || func->func_code == (PyObject *)code); \
- trace_stack[trace_stack_depth].func = func; \
- trace_stack[trace_stack_depth].code = code; \
- trace_stack[trace_stack_depth].instr = instr; \
- trace_stack_depth++;
-#define TRACE_STACK_POP() \
- if (trace_stack_depth <= 0) { \
- Py_FatalError("Trace stack underflow\n"); \
- } \
- trace_stack_depth--; \
- func = trace_stack[trace_stack_depth].func; \
- code = trace_stack[trace_stack_depth].code; \
- assert(func == NULL || func->func_code == (PyObject *)code); \
- instr = trace_stack[trace_stack_depth].instr;
-
-/* Returns the length of the trace on success,
- * 0 if it failed to produce a worthwhile trace,
- * and -1 on an error.
+ goto full; \
+ }
+
+
+/* Returns 1 on success (added to trace), 0 on trace end.
*/
-static int
-translate_bytecode_to_trace(
+int
+_PyJit_translate_single_bytecode_to_trace(
+ PyThreadState *tstate,
_PyInterpreterFrame *frame,
- _Py_CODEUNIT *instr,
- _PyUOpInstruction *trace,
- int buffer_size,
- _PyBloomFilter *dependencies, bool progress_needed)
+ _Py_CODEUNIT *next_instr,
+ bool stop_tracing)
{
- bool first = true;
- PyCodeObject *code = _PyFrame_GetCode(frame);
- PyFunctionObject *func = _PyFrame_GetFunction(frame);
- assert(PyFunction_Check(func));
- PyCodeObject *initial_code = code;
- _Py_BloomFilter_Add(dependencies, initial_code);
- _Py_CODEUNIT *initial_instr = instr;
- int trace_length = 0;
- // Leave space for possible trailing _EXIT_TRACE
- int max_length = buffer_size-2;
- struct {
- PyFunctionObject *func;
- PyCodeObject *code;
- _Py_CODEUNIT *instr;
- } trace_stack[TRACE_STACK_SIZE];
- int trace_stack_depth = 0;
- int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
- bool jump_seen = false;
#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
}
#endif
-
- DPRINTF(2,
- "Optimizing %s (%s:%d) at byte offset %d\n",
- PyUnicode_AsUTF8(code->co_qualname),
- PyUnicode_AsUTF8(code->co_filename),
- code->co_firstlineno,
- 2 * INSTR_IP(initial_instr, code));
- ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code));
- ADD_TO_TRACE(_MAKE_WARM, 0, 0, 0);
+ _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+ PyCodeObject *old_code = _tstate->jit_tracer_state.prev_state.instr_code;
+ bool progress_needed = (_tstate->jit_tracer_state.initial_state.chain_depth % MAX_CHAIN_DEPTH) == 0;
+ _PyBloomFilter *dependencies = &_tstate->jit_tracer_state.prev_state.dependencies;
+ int trace_length = _tstate->jit_tracer_state.prev_state.code_curr_size;
+ _PyUOpInstruction *trace = _tstate->jit_tracer_state.code_buffer;
+ int max_length = _tstate->jit_tracer_state.prev_state.code_max_size;
+
+ _Py_CODEUNIT *this_instr = _tstate->jit_tracer_state.prev_state.instr;
+ _Py_CODEUNIT *target_instr = this_instr;
uint32_t target = 0;
- for (;;) {
- target = INSTR_IP(instr, code);
- // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
- max_length-=2;
- uint32_t opcode = instr->op.code;
- uint32_t oparg = instr->op.arg;
-
- if (!first && instr == initial_instr) {
- // We have looped around to the start:
- RESERVE(1);
- ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
- goto done;
+ target = Py_IsNone((PyObject *)old_code)
+ ? (int)(target_instr - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR)
+ : INSTR_IP(target_instr, old_code);
+
+ // Rewind EXTENDED_ARG so that we see the whole thing.
+ // We must point to the first EXTENDED_ARG when deopting.
+ int oparg = _tstate->jit_tracer_state.prev_state.instr_oparg;
+ int opcode = this_instr->op.code;
+ int rewind_oparg = oparg;
+ while (rewind_oparg > 255) {
+ rewind_oparg >>= 8;
+ target--;
+ }
+
+ int old_stack_level = _tstate->jit_tracer_state.prev_state.instr_stacklevel;
+
+ // Strange control-flow
+ bool has_dynamic_jump_taken = OPCODE_HAS_UNPREDICTABLE_JUMP(opcode) &&
+ (next_instr != this_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]);
+
+ /* Special case the first instruction,
+ * so that we can guarantee forward progress */
+ if (progress_needed && _tstate->jit_tracer_state.prev_state.code_curr_size < CODE_SIZE_NO_PROGRESS) {
+ if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
+ opcode = _PyOpcode_Deopt[opcode];
}
+ assert(!OPCODE_HAS_EXIT(opcode));
+ assert(!OPCODE_HAS_DEOPT(opcode));
+ }
- DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);
+ bool needs_guard_ip = OPCODE_HAS_NEEDS_GUARD_IP(opcode);
+ if (has_dynamic_jump_taken && !needs_guard_ip) {
+ DPRINTF(2, "Unsupported: dynamic jump taken %s\n", _PyOpcode_OpName[opcode]);
+ goto unsupported;
+ }
- if (opcode == EXTENDED_ARG) {
- instr++;
- opcode = instr->op.code;
- oparg = (oparg << 8) | instr->op.arg;
- if (opcode == EXTENDED_ARG) {
- instr--;
- goto done;
+ int is_sys_tracing = (tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL);
+ if (is_sys_tracing) {
+ goto full;
+ }
+
+ if (stop_tracing) {
+ ADD_TO_TRACE(_DEOPT, 0, 0, target);
+ goto done;
+ }
+
+ DPRINTF(2, "%p %d: %s(%d) %d %d\n", old_code, target, _PyOpcode_OpName[opcode], oparg, needs_guard_ip, old_stack_level);
+
+#ifdef Py_DEBUG
+ if (oparg > 255) {
+ assert(_Py_GetBaseCodeUnit(old_code, target).op.code == EXTENDED_ARG);
+ }
+#endif
+
+ // Skip over super instructions.
+ if (_tstate->jit_tracer_state.prev_state.instr_is_super) {
+ _tstate->jit_tracer_state.prev_state.instr_is_super = false;
+ return 1;
+ }
+
+ if (opcode == ENTER_EXECUTOR) {
+ goto full;
+ }
+
+ if (!_tstate->jit_tracer_state.prev_state.dependencies_still_valid) {
+ goto done;
+ }
+
+ // This happens when a recursive call happens that we can't trace. Such as Python -> C -> Python calls
+ // If we haven't guarded the IP, then it's untraceable.
+ if (frame != _tstate->jit_tracer_state.prev_state.instr_frame && !needs_guard_ip) {
+ DPRINTF(2, "Unsupported: unguardable jump taken\n");
+ goto unsupported;
+ }
+
+ if (oparg > 0xFFFF) {
+ DPRINTF(2, "Unsupported: oparg too large\n");
+ goto unsupported;
+ }
+
+ // TODO (gh-140277): The constituent use one extra stack slot. So we need to check for headroom.
+ if (opcode == BINARY_OP_SUBSCR_GETITEM && old_stack_level + 1 > old_code->co_stacksize) {
+ unsupported:
+ {
+ // Rewind to previous instruction and replace with _EXIT_TRACE.
+ _PyUOpInstruction *curr = &trace[trace_length-1];
+ while (curr->opcode != _SET_IP && trace_length > 2) {
+ trace_length--;
+ curr = &trace[trace_length-1];
+ }
+ assert(curr->opcode == _SET_IP || trace_length == 2);
+ if (curr->opcode == _SET_IP) {
+ int32_t old_target = (int32_t)uop_get_target(curr);
+ curr++;
+ trace_length++;
+ curr->opcode = _EXIT_TRACE;
+ curr->format = UOP_FORMAT_TARGET;
+ curr->target = old_target;
}
- }
- if (opcode == ENTER_EXECUTOR) {
- // We have a couple of options here. We *could* peek "underneath"
- // this executor and continue tracing, which could give us a longer,
- // more optimizeable trace (at the expense of lots of duplicated
- // tier two code). Instead, we choose to just end here and stitch to
- // the other trace, which allows a side-exit traces to rejoin the
- // "main" trace periodically (and also helps protect us against
- // pathological behavior where the amount of tier two code explodes
- // for a medium-length, branchy code path). This seems to work
- // better in practice, but in the future we could be smarter about
- // what we do here:
goto done;
}
- assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
- RESERVE_RAW(2, "_CHECK_VALIDITY");
- ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target);
- if (!OPCODE_HAS_NO_SAVE_IP(opcode)) {
- RESERVE_RAW(2, "_SET_IP");
- ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)instr, target);
- }
+ }
- /* Special case the first instruction,
- * so that we can guarantee forward progress */
- if (first && progress_needed) {
- assert(first);
- if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
- opcode = _PyOpcode_Deopt[opcode];
- }
- assert(!OPCODE_HAS_EXIT(opcode));
- assert(!OPCODE_HAS_DEOPT(opcode));
- }
+ if (opcode == NOP) {
+ return 1;
+ }
- if (OPCODE_HAS_EXIT(opcode)) {
- // Make space for side exit and final _EXIT_TRACE:
- RESERVE_RAW(2, "_EXIT_TRACE");
- max_length--;
- }
- if (OPCODE_HAS_ERROR(opcode)) {
- // Make space for error stub and final _EXIT_TRACE:
- RESERVE_RAW(2, "_ERROR_POP_N");
- max_length--;
- }
- switch (opcode) {
- case POP_JUMP_IF_NONE:
- case POP_JUMP_IF_NOT_NONE:
- case POP_JUMP_IF_FALSE:
- case POP_JUMP_IF_TRUE:
- {
- RESERVE(1);
- int counter = instr[1].cache;
- int bitcount = _Py_popcount32(counter);
- int jump_likely = bitcount > 8;
- /* If bitcount is 8 (half the jumps were taken), adjust confidence by 50%.
- For values in between, adjust proportionally. */
- if (jump_likely) {
- confidence = confidence * bitcount / 16;
- }
- else {
- confidence = confidence * (16 - bitcount) / 16;
- }
- uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely];
- DPRINTF(2, "%d: %s(%d): counter=%04x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n",
- target, _PyOpcode_OpName[opcode], oparg,
- counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode));
- if (confidence < CONFIDENCE_CUTOFF) {
- DPRINTF(2, "Confidence too low (%d < %d)\n", confidence, CONFIDENCE_CUTOFF);
- OPT_STAT_INC(low_confidence);
- goto done;
- }
- _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
- _Py_CODEUNIT *target_instr = next_instr + oparg;
- if (jump_likely) {
- DPRINTF(2, "Jump likely (%04x = %d bits), continue at byte offset %d\n",
- instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code));
- instr = target_instr;
- ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(next_instr, code));
- goto top;
- }
- ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(target_instr, code));
- break;
- }
+ if (opcode == JUMP_FORWARD) {
+ return 1;
+ }
- case JUMP_BACKWARD:
- case JUMP_BACKWARD_JIT:
- ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target);
- _Py_FALLTHROUGH;
- case JUMP_BACKWARD_NO_INTERRUPT:
- {
- instr += 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] - (int)oparg;
- if (jump_seen) {
- OPT_STAT_INC(inner_loop);
- DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
- goto done;
- }
- jump_seen = true;
- goto top;
- }
+ if (opcode == EXTENDED_ARG) {
+ return 1;
+ }
- case JUMP_FORWARD:
- {
- RESERVE(0);
- // This will emit two _SET_IP instructions; leave it to the optimizer
- instr += oparg;
- break;
- }
+ // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
+ max_length -= 2;
- case RESUME:
- /* Use a special tier 2 version of RESUME_CHECK to allow traces to
- * start with RESUME_CHECK */
- ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target);
- break;
+ const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
- default:
- {
- const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
- if (expansion->nuops > 0) {
- // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE)
- int nuops = expansion->nuops;
- RESERVE(nuops + 1); /* One extra for exit */
- int16_t last_op = expansion->uops[nuops-1].uop;
- if (last_op == _RETURN_VALUE || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) {
- // Check for trace stack underflow now:
- // We can't bail e.g. in the middle of
- // LOAD_CONST + _RETURN_VALUE.
- if (trace_stack_depth == 0) {
- DPRINTF(2, "Trace stack underflow\n");
- OPT_STAT_INC(trace_stack_underflow);
- return 0;
- }
- }
- uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
- for (int i = 0; i < nuops; i++) {
- oparg = orig_oparg;
- uint32_t uop = expansion->uops[i].uop;
- uint64_t operand = 0;
- // Add one to account for the actual opcode/oparg pair:
- int offset = expansion->uops[i].offset + 1;
- switch (expansion->uops[i].size) {
- case OPARG_SIMPLE:
- assert(opcode != JUMP_BACKWARD_NO_INTERRUPT && opcode != JUMP_BACKWARD);
- break;
- case OPARG_CACHE_1:
- operand = read_u16(&instr[offset].cache);
- break;
- case OPARG_CACHE_2:
- operand = read_u32(&instr[offset].cache);
- break;
- case OPARG_CACHE_4:
- operand = read_u64(&instr[offset].cache);
- break;
- case OPARG_TOP: // First half of super-instr
- oparg = orig_oparg >> 4;
- break;
- case OPARG_BOTTOM: // Second half of super-instr
- oparg = orig_oparg & 0xF;
- break;
- case OPARG_SAVE_RETURN_OFFSET: // op=_SAVE_RETURN_OFFSET; oparg=return_offset
- oparg = offset;
- assert(uop == _SAVE_RETURN_OFFSET);
- break;
- case OPARG_REPLACED:
- uop = _PyUOp_Replacements[uop];
- assert(uop != 0);
- uint32_t next_inst = target + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + (oparg > 255);
- if (uop == _TIER2_RESUME_CHECK) {
- target = next_inst;
- }
-#ifdef Py_DEBUG
- else {
- uint32_t jump_target = next_inst + oparg;
- assert(_Py_GetBaseCodeUnit(code, jump_target).op.code == END_FOR);
- assert(_Py_GetBaseCodeUnit(code, jump_target+1).op.code == POP_ITER);
- }
-#endif
- break;
- case OPERAND1_1:
- assert(trace[trace_length-1].opcode == uop);
- operand = read_u16(&instr[offset].cache);
- trace[trace_length-1].operand1 = operand;
- continue;
- case OPERAND1_2:
- assert(trace[trace_length-1].opcode == uop);
- operand = read_u32(&instr[offset].cache);
- trace[trace_length-1].operand1 = operand;
- continue;
- case OPERAND1_4:
- assert(trace[trace_length-1].opcode == uop);
- operand = read_u64(&instr[offset].cache);
- trace[trace_length-1].operand1 = operand;
- continue;
- default:
- fprintf(stderr,
- "opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n",
- opcode, oparg, nuops, i,
- expansion->uops[i].size,
- expansion->uops[i].offset);
- Py_FatalError("garbled expansion");
- }
+ assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
+ assert(!_PyErr_Occurred(tstate));
- if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) {
- TRACE_STACK_POP();
- /* Set the operand to the function or code object returned to,
- * to assist optimization passes. (See _PUSH_FRAME below.)
- */
- if (func != NULL) {
- operand = (uintptr_t)func;
- }
- else if (code != NULL) {
- operand = (uintptr_t)code | 1;
- }
- else {
- operand = 0;
- }
- ADD_TO_TRACE(uop, oparg, operand, target);
- DPRINTF(2,
- "Returning to %s (%s:%d) at byte offset %d\n",
- PyUnicode_AsUTF8(code->co_qualname),
- PyUnicode_AsUTF8(code->co_filename),
- code->co_firstlineno,
- 2 * INSTR_IP(instr, code));
- goto top;
- }
- if (uop == _PUSH_FRAME) {
- assert(i + 1 == nuops);
- if (opcode == FOR_ITER_GEN ||
- opcode == LOAD_ATTR_PROPERTY ||
- opcode == BINARY_OP_SUBSCR_GETITEM ||
- opcode == SEND_GEN)
- {
- DPRINTF(2, "Bailing due to dynamic target\n");
- OPT_STAT_INC(unknown_callee);
- return 0;
- }
- assert(_PyOpcode_Deopt[opcode] == CALL || _PyOpcode_Deopt[opcode] == CALL_KW);
- int func_version_offset =
- offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT)
- // Add one to account for the actual opcode/oparg pair:
- + 1;
- uint32_t func_version = read_u32(&instr[func_version_offset].cache);
- PyCodeObject *new_code = NULL;
- PyFunctionObject *new_func =
- _PyFunction_LookupByVersion(func_version, (PyObject **) &new_code);
- DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n",
- (int)func_version, new_func, new_code);
- if (new_code != NULL) {
- if (new_code == code) {
- // Recursive call, bail (we could be here forever).
- DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
- PyUnicode_AsUTF8(new_code->co_qualname),
- PyUnicode_AsUTF8(new_code->co_filename),
- new_code->co_firstlineno);
- OPT_STAT_INC(recursive_call);
- ADD_TO_TRACE(uop, oparg, 0, target);
- ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
- goto done;
- }
- if (new_code->co_version != func_version) {
- // func.__code__ was updated.
- // Perhaps it may happen again, so don't bother tracing.
- // TODO: Reason about this -- is it better to bail or not?
- DPRINTF(2, "Bailing because co_version != func_version\n");
- ADD_TO_TRACE(uop, oparg, 0, target);
- ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
- goto done;
- }
- // Increment IP to the return address
- instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
- TRACE_STACK_PUSH();
- _Py_BloomFilter_Add(dependencies, new_code);
- /* Set the operand to the callee's function or code object,
- * to assist optimization passes.
- * We prefer setting it to the function
- * but if that's not available but the code is available,
- * use the code, setting the low bit so the optimizer knows.
- */
- if (new_func != NULL) {
- operand = (uintptr_t)new_func;
- }
- else if (new_code != NULL) {
- operand = (uintptr_t)new_code | 1;
- }
- else {
- operand = 0;
- }
- ADD_TO_TRACE(uop, oparg, operand, target);
- code = new_code;
- func = new_func;
- instr = _PyCode_CODE(code);
- DPRINTF(2,
- "Continuing in %s (%s:%d) at byte offset %d\n",
- PyUnicode_AsUTF8(code->co_qualname),
- PyUnicode_AsUTF8(code->co_filename),
- code->co_firstlineno,
- 2 * INSTR_IP(instr, code));
- goto top;
+ if (OPCODE_HAS_EXIT(opcode)) {
+ // Make space for side exit and final _EXIT_TRACE:
+ max_length--;
+ }
+ if (OPCODE_HAS_ERROR(opcode)) {
+ // Make space for error stub and final _EXIT_TRACE:
+ max_length--;
+ }
+
+ // _GUARD_IP leads to an exit.
+ max_length -= needs_guard_ip;
+
+ RESERVE_RAW(expansion->nuops + needs_guard_ip + 2 + (!OPCODE_HAS_NO_SAVE_IP(opcode)), "uop and various checks");
+
+ ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target);
+
+ if (!OPCODE_HAS_NO_SAVE_IP(opcode)) {
+ ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)target_instr, target);
+ }
+
+ // Can be NULL for the entry frame.
+ if (old_code != NULL) {
+ _Py_BloomFilter_Add(dependencies, old_code);
+ }
+
+ switch (opcode) {
+ case POP_JUMP_IF_NONE:
+ case POP_JUMP_IF_NOT_NONE:
+ case POP_JUMP_IF_FALSE:
+ case POP_JUMP_IF_TRUE:
+ {
+ _Py_CODEUNIT *computed_next_instr_without_modifiers = target_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
+ _Py_CODEUNIT *computed_next_instr = computed_next_instr_without_modifiers + (computed_next_instr_without_modifiers->op.code == NOT_TAKEN);
+ _Py_CODEUNIT *computed_jump_instr = computed_next_instr_without_modifiers + oparg;
+ assert(next_instr == computed_next_instr || next_instr == computed_jump_instr);
+ int jump_happened = computed_jump_instr == next_instr;
+ assert(jump_happened == (target_instr[1].cache & 1));
+ uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_happened];
+ ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(jump_happened ? computed_next_instr : computed_jump_instr, old_code));
+ break;
+ }
+ case JUMP_BACKWARD_JIT:
+ // This is possible as the JIT might have re-activated after it was disabled
+ case JUMP_BACKWARD_NO_JIT:
+ case JUMP_BACKWARD:
+ ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target);
+ _Py_FALLTHROUGH;
+ case JUMP_BACKWARD_NO_INTERRUPT:
+ {
+ if ((next_instr != _tstate->jit_tracer_state.initial_state.close_loop_instr) &&
+ (next_instr != _tstate->jit_tracer_state.initial_state.start_instr) &&
+ _tstate->jit_tracer_state.prev_state.code_curr_size > CODE_SIZE_NO_PROGRESS &&
+ // For side exits, we don't want to terminate them early.
+ _tstate->jit_tracer_state.initial_state.exit == NULL &&
+ // These are coroutines, and we want to unroll those usually.
+ opcode != JUMP_BACKWARD_NO_INTERRUPT) {
+ // We encountered a JUMP_BACKWARD but not to the top of our own loop.
+ // We don't want to continue tracing as we might get stuck in the
+ // inner loop. Instead, end the trace where the executor of the
+ // inner loop might start and let the traces rejoin.
+ OPT_STAT_INC(inner_loop);
+ ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
+ trace[trace_length-1].operand1 = true; // is_control_flow
+ DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr,
+ _tstate->jit_tracer_state.initial_state.close_loop_instr, _tstate->jit_tracer_state.initial_state.start_instr);
+ goto done;
+ }
+ break;
+ }
+
+ case RESUME:
+ case RESUME_CHECK:
+ /* Use a special tier 2 version of RESUME_CHECK to allow traces to
+ * start with RESUME_CHECK */
+ ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target);
+ break;
+ default:
+ {
+ const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
+ // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE)
+ int nuops = expansion->nuops;
+ if (nuops == 0) {
+ DPRINTF(2, "Unsupported opcode %s\n", _PyOpcode_OpName[opcode]);
+ goto unsupported;
+ }
+ assert(nuops > 0);
+ uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
+ uint32_t orig_target = target;
+ for (int i = 0; i < nuops; i++) {
+ oparg = orig_oparg;
+ target = orig_target;
+ uint32_t uop = expansion->uops[i].uop;
+ uint64_t operand = 0;
+ // Add one to account for the actual opcode/oparg pair:
+ int offset = expansion->uops[i].offset + 1;
+ switch (expansion->uops[i].size) {
+ case OPARG_SIMPLE:
+ assert(opcode != _JUMP_BACKWARD_NO_INTERRUPT && opcode != JUMP_BACKWARD);
+ break;
+ case OPARG_CACHE_1:
+ operand = read_u16(&this_instr[offset].cache);
+ break;
+ case OPARG_CACHE_2:
+ operand = read_u32(&this_instr[offset].cache);
+ break;
+ case OPARG_CACHE_4:
+ operand = read_u64(&this_instr[offset].cache);
+ break;
+ case OPARG_TOP: // First half of super-instr
+ assert(orig_oparg <= 255);
+ oparg = orig_oparg >> 4;
+ break;
+ case OPARG_BOTTOM: // Second half of super-instr
+ assert(orig_oparg <= 255);
+ oparg = orig_oparg & 0xF;
+ break;
+ case OPARG_SAVE_RETURN_OFFSET: // op=_SAVE_RETURN_OFFSET; oparg=return_offset
+ oparg = offset;
+ assert(uop == _SAVE_RETURN_OFFSET);
+ break;
+ case OPARG_REPLACED:
+ uop = _PyUOp_Replacements[uop];
+ assert(uop != 0);
+
+ uint32_t next_inst = target + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
+ if (uop == _TIER2_RESUME_CHECK) {
+ target = next_inst;
+ }
+ else {
+ int extended_arg = orig_oparg > 255;
+ uint32_t jump_target = next_inst + orig_oparg + extended_arg;
+ assert(_Py_GetBaseCodeUnit(old_code, jump_target).op.code == END_FOR);
+ assert(_Py_GetBaseCodeUnit(old_code, jump_target+1).op.code == POP_ITER);
+ if (is_for_iter_test[uop]) {
+ target = jump_target + 1;
}
- DPRINTF(2, "Bail, new_code == NULL\n");
- OPT_STAT_INC(unknown_callee);
- return 0;
}
-
- if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) {
- assert(i + 1 == nuops);
- _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
- assert(next_instr->op.code == STORE_FAST);
- operand = next_instr->op.arg;
- // Skip the STORE_FAST:
- instr++;
+ break;
+ case OPERAND1_1:
+ assert(trace[trace_length-1].opcode == uop);
+ operand = read_u16(&this_instr[offset].cache);
+ trace[trace_length-1].operand1 = operand;
+ continue;
+ case OPERAND1_2:
+ assert(trace[trace_length-1].opcode == uop);
+ operand = read_u32(&this_instr[offset].cache);
+ trace[trace_length-1].operand1 = operand;
+ continue;
+ case OPERAND1_4:
+ assert(trace[trace_length-1].opcode == uop);
+ operand = read_u64(&this_instr[offset].cache);
+ trace[trace_length-1].operand1 = operand;
+ continue;
+ default:
+ fprintf(stderr,
+ "opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n",
+ opcode, oparg, nuops, i,
+ expansion->uops[i].size,
+ expansion->uops[i].offset);
+ Py_FatalError("garbled expansion");
+ }
+ if (uop == _PUSH_FRAME || uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) {
+ PyCodeObject *new_code = (PyCodeObject *)PyStackRef_AsPyObjectBorrow(frame->f_executable);
+ PyFunctionObject *new_func = (PyFunctionObject *)PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
+
+ operand = 0;
+ if (frame->owner < FRAME_OWNED_BY_INTERPRETER) {
+ // Don't add nested code objects to the dependency.
+ // It causes endless re-traces.
+ if (new_func != NULL && !Py_IsNone((PyObject*)new_func) && !(new_code->co_flags & CO_NESTED)) {
+ operand = (uintptr_t)new_func;
+ DPRINTF(2, "Adding %p func to op\n", (void *)operand);
+ _Py_BloomFilter_Add(dependencies, new_func);
+ }
+ else if (new_code != NULL && !Py_IsNone((PyObject*)new_code)) {
+ operand = (uintptr_t)new_code | 1;
+ DPRINTF(2, "Adding %p code to op\n", (void *)operand);
+ _Py_BloomFilter_Add(dependencies, new_code);
}
-
- // All other instructions
- ADD_TO_TRACE(uop, oparg, operand, target);
}
+ ADD_TO_TRACE(uop, oparg, operand, target);
+ trace[trace_length - 1].operand1 = PyStackRef_IsNone(frame->f_executable) ? 2 : ((int)(frame->stackpointer - _PyFrame_Stackbase(frame)));
break;
}
- DPRINTF(2, "Unsupported opcode %s\n", _PyOpcode_OpName[opcode]);
- OPT_UNSUPPORTED_OPCODE(opcode);
- goto done; // Break out of loop
- } // End default
-
- } // End switch (opcode)
+ if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) {
+ assert(i + 1 == nuops);
+ _Py_CODEUNIT *next = target_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
+ assert(next->op.code == STORE_FAST);
+ operand = next->op.arg;
+ }
+ // All other instructions
+ ADD_TO_TRACE(uop, oparg, operand, target);
+ }
+ break;
+ } // End default
- instr++;
- // Add cache size for opcode
- instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
+ } // End switch (opcode)
- if (opcode == CALL_LIST_APPEND) {
- assert(instr->op.code == POP_TOP);
- instr++;
+ if (needs_guard_ip) {
+ uint16_t guard_ip = guard_ip_uop[trace[trace_length-1].opcode];
+ if (guard_ip == 0) {
+ DPRINTF(1, "Unknown uop needing guard ip %s\n", _PyOpcode_uop_name[trace[trace_length-1].opcode]);
+ Py_UNREACHABLE();
}
- top:
- // Jump here after _PUSH_FRAME or likely branches.
- first = false;
- } // End for (;;)
-
+ ADD_TO_TRACE(guard_ip, 0, (uintptr_t)next_instr, 0);
+ }
+ // Loop back to the start
+ int is_first_instr = _tstate->jit_tracer_state.initial_state.close_loop_instr == next_instr ||
+ _tstate->jit_tracer_state.initial_state.start_instr == next_instr;
+ if (is_first_instr && _tstate->jit_tracer_state.prev_state.code_curr_size > CODE_SIZE_NO_PROGRESS) {
+ if (needs_guard_ip) {
+ ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)next_instr, 0);
+ }
+ ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
+ goto done;
+ }
+ DPRINTF(2, "Trace continuing\n");
+ _tstate->jit_tracer_state.prev_state.code_curr_size = trace_length;
+ _tstate->jit_tracer_state.prev_state.code_max_size = max_length;
+ return 1;
done:
- while (trace_stack_depth > 0) {
- TRACE_STACK_POP();
- }
- assert(code == initial_code);
- // Skip short traces where we can't even translate a single instruction:
- if (first) {
- OPT_STAT_INC(trace_too_short);
- DPRINTF(2,
- "No trace for %s (%s:%d) at byte offset %d (no progress)\n",
- PyUnicode_AsUTF8(code->co_qualname),
- PyUnicode_AsUTF8(code->co_filename),
- code->co_firstlineno,
- 2 * INSTR_IP(initial_instr, code));
+ DPRINTF(2, "Trace done\n");
+ _tstate->jit_tracer_state.prev_state.code_curr_size = trace_length;
+ _tstate->jit_tracer_state.prev_state.code_max_size = max_length;
+ return 0;
+full:
+ DPRINTF(2, "Trace full\n");
+ if (!is_terminator(&_tstate->jit_tracer_state.code_buffer[trace_length-1])) {
+ // Undo the last few instructions.
+ trace_length = _tstate->jit_tracer_state.prev_state.code_curr_size;
+ max_length = _tstate->jit_tracer_state.prev_state.code_max_size;
+ // We previously reversed one.
+ max_length += 1;
+ ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
+ trace[trace_length-1].operand1 = true; // is_control_flow
+ }
+ _tstate->jit_tracer_state.prev_state.code_curr_size = trace_length;
+ _tstate->jit_tracer_state.prev_state.code_max_size = max_length;
+ return 0;
+}
+
+// Returns 0 for do not enter tracing, 1 on enter tracing.
+int
+_PyJit_TryInitializeTracing(
+ PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *curr_instr,
+ _Py_CODEUNIT *start_instr, _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth,
+ _PyExitData *exit, int oparg)
+{
+ _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+ // A recursive trace.
+ // Don't trace into the inner call because it will stomp on the previous trace, causing endless retraces.
+ if (_tstate->jit_tracer_state.prev_state.code_curr_size > CODE_SIZE_EMPTY) {
return 0;
}
- if (!is_terminator(&trace[trace_length-1])) {
- /* Allow space for _EXIT_TRACE */
- max_length += 2;
- ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
+ if (oparg > 0xFFFF) {
+ return 0;
+ }
+ if (_tstate->jit_tracer_state.code_buffer == NULL) {
+ _tstate->jit_tracer_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
+ if (_tstate->jit_tracer_state.code_buffer == NULL) {
+ // Don't error, just go to next instruction.
+ return 0;
+ }
+ }
+ PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
+ if (func == NULL) {
+ return 0;
+ }
+ PyCodeObject *code = _PyFrame_GetCode(frame);
+#ifdef Py_DEBUG
+ char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
+ int lltrace = 0;
+ if (python_lltrace != NULL && *python_lltrace >= '0') {
+ lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
}
- DPRINTF(1,
- "Created a proto-trace for %s (%s:%d) at byte offset %d -- length %d\n",
- PyUnicode_AsUTF8(code->co_qualname),
- PyUnicode_AsUTF8(code->co_filename),
- code->co_firstlineno,
- 2 * INSTR_IP(initial_instr, code),
- trace_length);
- OPT_HIST(trace_length, trace_length_hist);
- return trace_length;
+ DPRINTF(2,
+ "Tracing %s (%s:%d) at byte offset %d at chain depth %d\n",
+ PyUnicode_AsUTF8(code->co_qualname),
+ PyUnicode_AsUTF8(code->co_filename),
+ code->co_firstlineno,
+ 2 * INSTR_IP(close_loop_instr, code),
+ chain_depth);
+#endif
+
+ add_to_trace(_tstate->jit_tracer_state.code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code));
+ add_to_trace(_tstate->jit_tracer_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0);
+ _tstate->jit_tracer_state.prev_state.code_curr_size = CODE_SIZE_EMPTY;
+
+ _tstate->jit_tracer_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH;
+ _tstate->jit_tracer_state.initial_state.start_instr = start_instr;
+ _tstate->jit_tracer_state.initial_state.close_loop_instr = close_loop_instr;
+ _tstate->jit_tracer_state.initial_state.code = (PyCodeObject *)Py_NewRef(code);
+ _tstate->jit_tracer_state.initial_state.func = (PyFunctionObject *)Py_NewRef(func);
+ _tstate->jit_tracer_state.initial_state.exit = exit;
+ _tstate->jit_tracer_state.initial_state.stack_depth = curr_stackdepth;
+ _tstate->jit_tracer_state.initial_state.chain_depth = chain_depth;
+ _tstate->jit_tracer_state.prev_state.instr_frame = frame;
+ _tstate->jit_tracer_state.prev_state.dependencies_still_valid = true;
+ _tstate->jit_tracer_state.prev_state.instr_code = (PyCodeObject *)Py_NewRef(_PyFrame_GetCode(frame));
+ _tstate->jit_tracer_state.prev_state.instr = curr_instr;
+ _tstate->jit_tracer_state.prev_state.instr_frame = frame;
+ _tstate->jit_tracer_state.prev_state.instr_oparg = oparg;
+ _tstate->jit_tracer_state.prev_state.instr_stacklevel = curr_stackdepth;
+ _tstate->jit_tracer_state.prev_state.instr_is_super = false;
+ assert(curr_instr->op.code == JUMP_BACKWARD_JIT || (exit != NULL));
+ _tstate->jit_tracer_state.initial_state.jump_backward_instr = curr_instr;
+
+ if (_PyOpcode_Caches[_PyOpcode_Deopt[close_loop_instr->op.code]]) {
+ close_loop_instr[1].counter = trigger_backoff_counter();
+ }
+ _Py_BloomFilter_Init(&_tstate->jit_tracer_state.prev_state.dependencies);
+ return 1;
+}
+
+void
+_PyJit_FinalizeTracing(PyThreadState *tstate)
+{
+ _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+ Py_CLEAR(_tstate->jit_tracer_state.initial_state.code);
+ Py_CLEAR(_tstate->jit_tracer_state.initial_state.func);
+ Py_CLEAR(_tstate->jit_tracer_state.prev_state.instr_code);
+ _tstate->jit_tracer_state.prev_state.code_curr_size = CODE_SIZE_EMPTY;
+ _tstate->jit_tracer_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH - 1;
}
+
#undef RESERVE
#undef RESERVE_RAW
#undef INSTR_IP
int exit_count = 0;
for (int i = 0; i < length; i++) {
int opcode = buffer[i].opcode;
- if (opcode == _EXIT_TRACE) {
+ if (opcode == _EXIT_TRACE || opcode == _DYNAMIC_EXIT) {
exit_count++;
}
}
return exit_count;
}
-static void make_exit(_PyUOpInstruction *inst, int opcode, int target)
+static void make_exit(_PyUOpInstruction *inst, int opcode, int target, bool is_control_flow)
{
inst->opcode = opcode;
inst->oparg = 0;
inst->operand0 = 0;
inst->format = UOP_FORMAT_TARGET;
inst->target = target;
+ inst->operand1 = is_control_flow;
#ifdef Py_STATS
inst->execution_count = 0;
#endif
exit_op = _HANDLE_PENDING_AND_DEOPT;
}
int32_t jump_target = target;
- if (is_for_iter_test[opcode]) {
- /* Target the POP_TOP immediately after the END_FOR,
- * leaving only the iterator on the stack. */
- int extended_arg = inst->oparg > 255;
- int32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended_arg;
- jump_target = next_inst + inst->oparg + 1;
+ if (
+ opcode == _GUARD_IP__PUSH_FRAME ||
+ opcode == _GUARD_IP_RETURN_VALUE ||
+ opcode == _GUARD_IP_YIELD_VALUE ||
+ opcode == _GUARD_IP_RETURN_GENERATOR
+ ) {
+ exit_op = _DYNAMIC_EXIT;
}
+ bool is_control_flow = (opcode == _GUARD_IS_FALSE_POP || opcode == _GUARD_IS_TRUE_POP || is_for_iter_test[opcode]);
if (jump_target != current_jump_target || current_exit_op != exit_op) {
- make_exit(&buffer[next_spare], exit_op, jump_target);
+ make_exit(&buffer[next_spare], exit_op, jump_target, is_control_flow);
current_exit_op = exit_op;
current_jump_target = jump_target;
current_jump = next_spare;
current_popped = popped;
current_error = next_spare;
current_error_target = target;
- make_exit(&buffer[next_spare], _ERROR_POP_N, 0);
+ make_exit(&buffer[next_spare], _ERROR_POP_N, 0, false);
buffer[next_spare].operand0 = target;
next_spare++;
}
}
bool ended = false;
uint32_t i = 0;
- CHECK(executor->trace[0].opcode == _START_EXECUTOR || executor->trace[0].opcode == _COLD_EXIT);
+ CHECK(executor->trace[0].opcode == _START_EXECUTOR ||
+ executor->trace[0].opcode == _COLD_EXIT ||
+ executor->trace[0].opcode == _COLD_DYNAMIC_EXIT);
for (; i < executor->code_size; i++) {
const _PyUOpInstruction *inst = &executor->trace[i];
uint16_t opcode = inst->opcode;
opcode == _DEOPT ||
opcode == _HANDLE_PENDING_AND_DEOPT ||
opcode == _EXIT_TRACE ||
- opcode == _ERROR_POP_N);
+ opcode == _ERROR_POP_N ||
+ opcode == _DYNAMIC_EXIT);
}
}
* and not a NOP.
*/
static _PyExecutorObject *
-make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies)
+make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies, int chain_depth)
{
int exit_count = count_exits(buffer, length);
_PyExecutorObject *executor = allocate_executor(exit_count, length);
/* Initialize exits */
_PyExecutorObject *cold = _PyExecutor_GetColdExecutor();
+ _PyExecutorObject *cold_dynamic = _PyExecutor_GetColdDynamicExecutor();
+ cold->vm_data.chain_depth = chain_depth;
for (int i = 0; i < exit_count; i++) {
executor->exits[i].index = i;
executor->exits[i].temperature = initial_temperature_backoff_counter();
- executor->exits[i].executor = cold;
}
int next_exit = exit_count-1;
_PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
int opcode = buffer[i].opcode;
dest--;
*dest = buffer[i];
- assert(opcode != _POP_JUMP_IF_FALSE && opcode != _POP_JUMP_IF_TRUE);
- if (opcode == _EXIT_TRACE) {
+ if (opcode == _EXIT_TRACE || opcode == _DYNAMIC_EXIT) {
_PyExitData *exit = &executor->exits[next_exit];
exit->target = buffer[i].target;
dest->operand0 = (uint64_t)exit;
+ exit->executor = opcode == _EXIT_TRACE ? cold : cold_dynamic;
+ exit->is_dynamic = (char)(opcode == _DYNAMIC_EXIT);
+ exit->is_control_flow = (char)buffer[i].operand1;
next_exit--;
}
}
static int
uop_optimize(
_PyInterpreterFrame *frame,
- _Py_CODEUNIT *instr,
+ PyThreadState *tstate,
_PyExecutorObject **exec_ptr,
- int curr_stackentries,
bool progress_needed)
{
- _PyBloomFilter dependencies;
- _Py_BloomFilter_Init(&dependencies);
- PyInterpreterState *interp = _PyInterpreterState_GET();
- if (interp->jit_uop_buffer == NULL) {
- interp->jit_uop_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
- if (interp->jit_uop_buffer == NULL) {
- return 0;
- }
- }
- _PyUOpInstruction *buffer = interp->jit_uop_buffer;
+ _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+ _PyBloomFilter *dependencies = &_tstate->jit_tracer_state.prev_state.dependencies;
+ _PyUOpInstruction *buffer = _tstate->jit_tracer_state.code_buffer;
OPT_STAT_INC(attempts);
char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE");
bool is_noopt = true;
if (env_var == NULL || *env_var == '\0' || *env_var > '0') {
is_noopt = false;
}
- int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies, progress_needed);
- if (length <= 0) {
- // Error or nothing translated
- return length;
+ int curr_stackentries = _tstate->jit_tracer_state.initial_state.stack_depth;
+ int length = _tstate->jit_tracer_state.prev_state.code_curr_size;
+ if (length <= CODE_SIZE_NO_PROGRESS) {
+ return 0;
}
+ assert(length > 0);
assert(length < UOP_MAX_TRACE_LENGTH);
OPT_STAT_INC(traces_created);
if (!is_noopt) {
- length = _Py_uop_analyze_and_optimize(frame, buffer,
- length,
- curr_stackentries, &dependencies);
+ length = _Py_uop_analyze_and_optimize(
+ _tstate->jit_tracer_state.initial_state.func,
+ buffer,length,
+ curr_stackentries, dependencies);
if (length <= 0) {
return length;
}
OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist);
length = prepare_for_execution(buffer, length);
assert(length <= UOP_MAX_TRACE_LENGTH);
- _PyExecutorObject *executor = make_executor_from_uops(buffer, length, &dependencies);
+ _PyExecutorObject *executor = make_executor_from_uops(
+ buffer, length, dependencies, _tstate->jit_tracer_state.initial_state.chain_depth);
if (executor == NULL) {
return -1;
}
assert(length <= UOP_MAX_TRACE_LENGTH);
// Check executor coldness
- PyThreadState *tstate = PyThreadState_Get();
// It's okay if this ends up going negative.
if (--tstate->interp->executor_creation_counter == 0) {
_Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT);
return cold;
}
+_PyExecutorObject *
+_PyExecutor_GetColdDynamicExecutor(void)
+{
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ if (interp->cold_dynamic_executor != NULL) {
+ assert(interp->cold_dynamic_executor->trace[0].opcode == _COLD_DYNAMIC_EXIT);
+ return interp->cold_dynamic_executor;
+ }
+ _PyExecutorObject *cold = allocate_executor(0, 1);
+ if (cold == NULL) {
+ Py_FatalError("Cannot allocate core JIT code");
+ }
+ ((_PyUOpInstruction *)cold->trace)->opcode = _COLD_DYNAMIC_EXIT;
+#ifdef _Py_JIT
+ cold->jit_code = NULL;
+ cold->jit_size = 0;
+ // This is initialized to true so we can prevent the executor
+ // from being immediately detected as cold and invalidated.
+ cold->vm_data.warm = true;
+ if (_PyJIT_Compile(cold, cold->trace, 1)) {
+ Py_DECREF(cold);
+ Py_FatalError("Cannot allocate core JIT code");
+ }
+#endif
+ _Py_SetImmortal((PyObject *)cold);
+ interp->cold_dynamic_executor = cold;
+ return cold;
+}
+
void
_PyExecutor_ClearExit(_PyExitData *exit)
{
return;
}
_PyExecutorObject *old = exit->executor;
- exit->executor = _PyExecutor_GetColdExecutor();
+ if (exit->is_dynamic) {
+ exit->executor = _PyExecutor_GetColdDynamicExecutor();
+ }
+ else {
+ exit->executor = _PyExecutor_GetColdExecutor();
+ }
Py_DECREF(old);
}
_Py_Executors_InvalidateAll(interp, is_invalidation);
}
+void
+_PyJit_Tracer_InvalidateDependency(PyThreadState *tstate, void *obj)
+{
+ _PyBloomFilter obj_filter;
+ _Py_BloomFilter_Init(&obj_filter);
+ _Py_BloomFilter_Add(&obj_filter, obj);
+ _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+ if (bloom_filter_may_contain(&_tstate->jit_tracer_state.prev_state.dependencies, &obj_filter))
+ {
+ _tstate->jit_tracer_state.prev_state.dependencies_still_valid = false;
+ }
+}
/* Invalidate all executors */
void
_Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
#ifdef Py_STATS
fprintf(out, " <tr><td port=\"i%d\" border=\"1\" >%s -- %" PRIu64 "</td></tr>\n", i, opname, inst->execution_count);
#else
- fprintf(out, " <tr><td port=\"i%d\" border=\"1\" >%s</td></tr>\n", i, opname);
+ fprintf(out, " <tr><td port=\"i%d\" border=\"1\" >%s op0=%" PRIu64 "</td></tr>\n", i, opname, inst->operand0);
#endif
if (inst->opcode == _EXIT_TRACE || inst->opcode == _JUMP_TO_TOP) {
break;
fprintf(out, "]\n\n");
/* Write all the outgoing edges */
+ _PyExecutorObject *cold = _PyExecutor_GetColdExecutor();
+ _PyExecutorObject *cold_dynamic = _PyExecutor_GetColdDynamicExecutor();
for (uint32_t i = 0; i < executor->code_size; i++) {
_PyUOpInstruction const *inst = &executor->trace[i];
uint16_t flags = _PyUop_Flags[inst->opcode];
else if (flags & HAS_EXIT_FLAG) {
assert(inst->format == UOP_FORMAT_JUMP);
_PyUOpInstruction const *exit_inst = &executor->trace[inst->jump_target];
- assert(exit_inst->opcode == _EXIT_TRACE);
+ assert(exit_inst->opcode == _EXIT_TRACE || exit_inst->opcode == _DYNAMIC_EXIT);
exit = (_PyExitData *)exit_inst->operand0;
}
- if (exit != NULL && exit->executor != NULL) {
+ if (exit != NULL && exit->executor != cold && exit->executor != cold_dynamic) {
fprintf(out, "executor_%p:i%d -> executor_%p:start\n", executor, i, exit->executor);
}
if (inst->opcode == _EXIT_TRACE || inst->opcode == _JUMP_TO_TOP) {
#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack))
#define STACK_SIZE() ((int)(ctx->frame->stack_len))
+#define CURRENT_FRAME_IS_INIT_SHIM() (ctx->frame->code == ((PyCodeObject *)&_Py_InitCleanup))
+
#define WITHIN_STACK_BOUNDS() \
- (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE())
+ (CURRENT_FRAME_IS_INIT_SHIM() || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))
#define GETLOCAL(idx) ((ctx->frame->locals[idx]))
PyCodeObject *
get_current_code_object(JitOptContext *ctx)
{
- return (PyCodeObject *)ctx->frame->func->func_code;
+ return (PyCodeObject *)ctx->frame->code;
}
static PyObject *
JitOptContext context;
JitOptContext *ctx = &context;
uint32_t opcode = UINT16_MAX;
- int curr_space = 0;
- int max_space = 0;
- _PyUOpInstruction *first_valid_check_stack = NULL;
- _PyUOpInstruction *corresponding_check_stack = NULL;
// Make sure that watchers are set up
PyInterpreterState *interp = _PyInterpreterState_GET();
ctx->frame = frame;
_PyUOpInstruction *this_instr = NULL;
+ JitOptRef *stack_pointer = ctx->frame->stack_pointer;
+
for (int i = 0; !ctx->done; i++) {
assert(i < trace_len);
this_instr = &trace[i];
int oparg = this_instr->oparg;
opcode = this_instr->opcode;
- JitOptRef *stack_pointer = ctx->frame->stack_pointer;
+
+ if (!CURRENT_FRAME_IS_INIT_SHIM()) {
+ stack_pointer = ctx->frame->stack_pointer;
+ }
#ifdef Py_DEBUG
if (get_lltrace() >= 3) {
Py_UNREACHABLE();
}
assert(ctx->frame != NULL);
- DPRINTF(3, " stack_level %d\n", STACK_LEVEL());
- ctx->frame->stack_pointer = stack_pointer;
- assert(STACK_LEVEL() >= 0);
+ if (!CURRENT_FRAME_IS_INIT_SHIM()) {
+ DPRINTF(3, " stack_level %d\n", STACK_LEVEL());
+ ctx->frame->stack_pointer = stack_pointer;
+ assert(STACK_LEVEL() >= 0);
+ }
}
if (ctx->out_of_space) {
DPRINTF(3, "\n");
}
if (ctx->contradiction) {
// Attempted to push a "bottom" (contradiction) symbol onto the stack.
- // This means that the abstract interpreter has hit unreachable code.
+ // This means that the abstract interpreter has optimized to trace
+ // to an unreachable estate.
// We *could* generate an _EXIT_TRACE or _FATAL_ERROR here, but hitting
- // bottom indicates type instability, so we are probably better off
+ // bottom usually indicates an optimizer bug, so we are probably better off
// retrying later.
DPRINTF(3, "\n");
DPRINTF(1, "Hit bottom in abstract interpreter\n");
_Py_uop_abstractcontext_fini(ctx);
+ OPT_STAT_INC(optimizer_contradiction);
return 0;
}
/* Either reached the end or cannot optimize further, but there
* would be no benefit in retrying later */
_Py_uop_abstractcontext_fini(ctx);
- if (first_valid_check_stack != NULL) {
- assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
- assert(max_space > 0);
- assert(max_space <= INT_MAX);
- assert(max_space <= INT32_MAX);
- first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
- first_valid_check_stack->operand0 = max_space;
- }
return trace_len;
error:
buffer[pc].opcode = _NOP;
}
break;
+ case _EXIT_TRACE:
default:
{
// Cancel out pushes and pops, repeatedly. So:
}
/* _PUSH_FRAME doesn't escape or error, but it
* does need the IP for the return address */
- bool needs_ip = opcode == _PUSH_FRAME;
+ bool needs_ip = (opcode == _PUSH_FRAME || opcode == _YIELD_VALUE || opcode == _DYNAMIC_EXIT || opcode == _EXIT_TRACE);
if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
needs_ip = true;
may_have_escaped = true;
buffer[last_set_ip].opcode = _SET_IP;
last_set_ip = -1;
}
+ if (opcode == _EXIT_TRACE) {
+ return pc + 1;
+ }
break;
}
case _JUMP_TO_TOP:
- case _EXIT_TRACE:
+ case _DYNAMIC_EXIT:
+ case _DEOPT:
return pc + 1;
}
}
// > 0 - length of optimized trace
int
_Py_uop_analyze_and_optimize(
- _PyInterpreterFrame *frame,
+ PyFunctionObject *func,
_PyUOpInstruction *buffer,
int length,
int curr_stacklen,
OPT_STAT_INC(optimizer_attempts);
length = optimize_uops(
- _PyFrame_GetFunction(frame), buffer,
- length, curr_stacklen, dependencies);
+ func, buffer,
+ length, curr_stacklen, dependencies);
if (length == 0) {
return length;
int already_bool = optimize_to_bool(this_instr, ctx, value, &value);
if (!already_bool) {
sym_set_type(value, &PyBool_Type);
- value = sym_new_truthiness(ctx, value, true);
}
}
}
op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame)) {
- new_frame = PyJitRef_NULL;
- ctx->done = true;
+ assert((this_instr + 2)->opcode == _PUSH_FRAME);
+ PyCodeObject *co = get_code_with_logging((this_instr + 2));
+ if (co == NULL) {
+ ctx->done = true;
+ break;
+ }
+
+ new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0));
}
op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
}
op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame)) {
- init_frame = PyJitRef_NULL;
- ctx->done = true;
+ ctx->frame->stack_pointer = stack_pointer - oparg - 2;
+ _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject *)&_Py_InitCleanup, 0, NULL, 0);
+ if (shim == NULL) {
+ break;
+ }
+ /* Push self onto stack of shim */
+ shim->stack[0] = self;
+ shim->stack_pointer++;
+ assert((int)(shim->stack_pointer - shim->stack) == 1);
+ ctx->frame = shim;
+ ctx->curr_frame_depth++;
+ assert((this_instr + 1)->opcode == _PUSH_FRAME);
+ PyCodeObject *co = get_code_with_logging((this_instr + 1));
+ init_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args-1, oparg+1));
}
op(_RETURN_VALUE, (retval -- res)) {
JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
DEAD(retval);
SAVE_STACK();
- PyCodeObject *co = get_current_code_object(ctx);
ctx->frame->stack_pointer = stack_pointer;
- frame_pop(ctx);
+ PyCodeObject *returning_code = get_code_with_logging(this_instr);
+ if (returning_code == NULL) {
+ ctx->done = true;
+ break;
+ }
+ int returning_stacklevel = this_instr->operand1;
+ if (ctx->curr_frame_depth >= 2) {
+ PyCodeObject *expected_code = ctx->frames[ctx->curr_frame_depth - 2].code;
+ if (expected_code == returning_code) {
+ assert((this_instr + 1)->opcode == _GUARD_IP_RETURN_VALUE);
+ REPLACE_OP((this_instr + 1), _NOP, 0, 0);
+ }
+ }
+ if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ break;
+ }
stack_pointer = ctx->frame->stack_pointer;
- /* Stack space handling */
- assert(corresponding_check_stack == NULL);
- assert(co != NULL);
- int framesize = co->co_framesize;
- assert(framesize > 0);
- assert(framesize <= curr_space);
- curr_space -= framesize;
-
RELOAD_STACK();
res = temp;
}
op(_RETURN_GENERATOR, ( -- res)) {
SYNC_SP();
- PyCodeObject *co = get_current_code_object(ctx);
ctx->frame->stack_pointer = stack_pointer;
- frame_pop(ctx);
+ PyCodeObject *returning_code = get_code_with_logging(this_instr);
+ if (returning_code == NULL) {
+ ctx->done = true;
+ break;
+ }
+ _Py_BloomFilter_Add(dependencies, returning_code);
+ int returning_stacklevel = this_instr->operand1;
+ if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ break;
+ }
stack_pointer = ctx->frame->stack_pointer;
res = sym_new_unknown(ctx);
-
- /* Stack space handling */
- assert(corresponding_check_stack == NULL);
- assert(co != NULL);
- int framesize = co->co_framesize;
- assert(framesize > 0);
- assert(framesize <= curr_space);
- curr_space -= framesize;
}
- op(_YIELD_VALUE, (unused -- value)) {
- value = sym_new_unknown(ctx);
+ op(_YIELD_VALUE, (retval -- value)) {
+ // Mimics PyStackRef_MakeHeapSafe in the interpreter.
+ JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
+ DEAD(retval);
+ SAVE_STACK();
+ ctx->frame->stack_pointer = stack_pointer;
+ PyCodeObject *returning_code = get_code_with_logging(this_instr);
+ if (returning_code == NULL) {
+ ctx->done = true;
+ break;
+ }
+ _Py_BloomFilter_Add(dependencies, returning_code);
+ int returning_stacklevel = this_instr->operand1;
+ if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ break;
+ }
+ stack_pointer = ctx->frame->stack_pointer;
+ RELOAD_STACK();
+ value = temp;
}
op(_GET_ITER, (iterable -- iter, index_or_null)) {
}
op(_CHECK_STACK_SPACE, (unused, unused, unused[oparg] -- unused, unused, unused[oparg])) {
- assert(corresponding_check_stack == NULL);
- corresponding_check_stack = this_instr;
}
op (_CHECK_STACK_SPACE_OPERAND, (framesize/2 -- )) {
op(_PUSH_FRAME, (new_frame -- )) {
SYNC_SP();
- ctx->frame->stack_pointer = stack_pointer;
+ if (!CURRENT_FRAME_IS_INIT_SHIM()) {
+ ctx->frame->stack_pointer = stack_pointer;
+ }
ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame);
ctx->curr_frame_depth++;
stack_pointer = ctx->frame->stack_pointer;
uint64_t operand = this_instr->operand0;
- if (operand == 0 || (operand & 1)) {
- // It's either a code object or NULL
+ if (operand == 0) {
ctx->done = true;
break;
}
- PyFunctionObject *func = (PyFunctionObject *)operand;
- PyCodeObject *co = (PyCodeObject *)func->func_code;
- assert(PyFunction_Check(func));
- ctx->frame->func = func;
- /* Stack space handling */
- int framesize = co->co_framesize;
- assert(framesize > 0);
- curr_space += framesize;
- if (curr_space < 0 || curr_space > INT32_MAX) {
- // won't fit in signed 32-bit int
- ctx->done = true;
- break;
- }
- max_space = curr_space > max_space ? curr_space : max_space;
- if (first_valid_check_stack == NULL) {
- first_valid_check_stack = corresponding_check_stack;
+ if (!(operand & 1)) {
+ PyFunctionObject *func = (PyFunctionObject *)operand;
+ // No need to re-add to dependencies here. Already
+ // handled by the tracer.
+ ctx->frame->func = func;
}
- else if (corresponding_check_stack) {
- // delete all but the first valid _CHECK_STACK_SPACE
- corresponding_check_stack->opcode = _NOP;
+ // Fixed calls don't need IP guards.
+ if ((this_instr-1)->opcode == _SAVE_RETURN_OFFSET ||
+ (this_instr-1)->opcode == _CREATE_INIT_FRAME) {
+ assert((this_instr+1)->opcode == _GUARD_IP__PUSH_FRAME);
+ REPLACE_OP(this_instr+1, _NOP, 0, 0);
}
- corresponding_check_stack = NULL;
}
op(_UNPACK_SEQUENCE, (seq -- values[oparg], top[0])) {
ctx->done = true;
}
+ op(_DEOPT, (--)) {
+ ctx->done = true;
+ }
+
op(_REPLACE_WITH_TRUE, (value -- res)) {
REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)Py_True);
res = sym_new_const(ctx, Py_True);
int already_bool = optimize_to_bool(this_instr, ctx, value, &value);
if (!already_bool) {
sym_set_type(value, &PyBool_Type);
- value = sym_new_truthiness(ctx, value, true);
}
stack_pointer[-1] = value;
break;
JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
- PyCodeObject *co = get_current_code_object(ctx);
ctx->frame->stack_pointer = stack_pointer;
- frame_pop(ctx);
+ PyCodeObject *returning_code = get_code_with_logging(this_instr);
+ if (returning_code == NULL) {
+ ctx->done = true;
+ break;
+ }
+ int returning_stacklevel = this_instr->operand1;
+ if (ctx->curr_frame_depth >= 2) {
+ PyCodeObject *expected_code = ctx->frames[ctx->curr_frame_depth - 2].code;
+ if (expected_code == returning_code) {
+ assert((this_instr + 1)->opcode == _GUARD_IP_RETURN_VALUE);
+ REPLACE_OP((this_instr + 1), _NOP, 0, 0);
+ }
+ }
+ if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ break;
+ }
stack_pointer = ctx->frame->stack_pointer;
- assert(corresponding_check_stack == NULL);
- assert(co != NULL);
- int framesize = co->co_framesize;
- assert(framesize > 0);
- assert(framesize <= curr_space);
- curr_space -= framesize;
res = temp;
stack_pointer[0] = res;
stack_pointer += 1;
}
case _YIELD_VALUE: {
+ JitOptRef retval;
JitOptRef value;
- value = sym_new_unknown(ctx);
- stack_pointer[-1] = value;
+ retval = stack_pointer[-1];
+ JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
+ stack_pointer += -1;
+ assert(WITHIN_STACK_BOUNDS());
+ ctx->frame->stack_pointer = stack_pointer;
+ PyCodeObject *returning_code = get_code_with_logging(this_instr);
+ if (returning_code == NULL) {
+ ctx->done = true;
+ break;
+ }
+ _Py_BloomFilter_Add(dependencies, returning_code);
+ int returning_stacklevel = this_instr->operand1;
+ if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ break;
+ }
+ stack_pointer = ctx->frame->stack_pointer;
+ value = temp;
+ stack_pointer[0] = value;
+ stack_pointer += 1;
+ assert(WITHIN_STACK_BOUNDS());
break;
}
break;
}
+ /* _JUMP_BACKWARD_NO_INTERRUPT is not a viable micro-op for tier 2 */
+
case _GET_LEN: {
JitOptRef obj;
JitOptRef len;
}
case _CHECK_STACK_SPACE: {
- assert(corresponding_check_stack == NULL);
- corresponding_check_stack = this_instr;
break;
}
new_frame = stack_pointer[-1];
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
- ctx->frame->stack_pointer = stack_pointer;
+ if (!CURRENT_FRAME_IS_INIT_SHIM()) {
+ ctx->frame->stack_pointer = stack_pointer;
+ }
ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame);
ctx->curr_frame_depth++;
stack_pointer = ctx->frame->stack_pointer;
uint64_t operand = this_instr->operand0;
- if (operand == 0 || (operand & 1)) {
+ if (operand == 0) {
ctx->done = true;
break;
}
- PyFunctionObject *func = (PyFunctionObject *)operand;
- PyCodeObject *co = (PyCodeObject *)func->func_code;
- assert(PyFunction_Check(func));
- ctx->frame->func = func;
- int framesize = co->co_framesize;
- assert(framesize > 0);
- curr_space += framesize;
- if (curr_space < 0 || curr_space > INT32_MAX) {
- ctx->done = true;
- break;
- }
- max_space = curr_space > max_space ? curr_space : max_space;
- if (first_valid_check_stack == NULL) {
- first_valid_check_stack = corresponding_check_stack;
+ if (!(operand & 1)) {
+ PyFunctionObject *func = (PyFunctionObject *)operand;
+ ctx->frame->func = func;
}
- else if (corresponding_check_stack) {
- corresponding_check_stack->opcode = _NOP;
+ if ((this_instr-1)->opcode == _SAVE_RETURN_OFFSET ||
+ (this_instr-1)->opcode == _CREATE_INIT_FRAME) {
+ assert((this_instr+1)->opcode == _GUARD_IP__PUSH_FRAME);
+ REPLACE_OP(this_instr+1, _NOP, 0, 0);
}
- corresponding_check_stack = NULL;
break;
}
}
case _CREATE_INIT_FRAME: {
+ JitOptRef *args;
+ JitOptRef self;
JitOptRef init_frame;
- init_frame = PyJitRef_NULL;
- ctx->done = true;
+ args = &stack_pointer[-oparg];
+ self = stack_pointer[-1 - oparg];
+ ctx->frame->stack_pointer = stack_pointer - oparg - 2;
+ _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject *)&_Py_InitCleanup, 0, NULL, 0);
+ if (shim == NULL) {
+ break;
+ }
+ shim->stack[0] = self;
+ shim->stack_pointer++;
+ assert((int)(shim->stack_pointer - shim->stack) == 1);
+ ctx->frame = shim;
+ ctx->curr_frame_depth++;
+ assert((this_instr + 1)->opcode == _PUSH_FRAME);
+ PyCodeObject *co = get_code_with_logging((this_instr + 1));
+ init_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args-1, oparg+1));
stack_pointer[-2 - oparg] = init_frame;
stack_pointer += -1 - oparg;
assert(WITHIN_STACK_BOUNDS());
case _PY_FRAME_KW: {
JitOptRef new_frame;
- new_frame = PyJitRef_NULL;
- ctx->done = true;
+ assert((this_instr + 2)->opcode == _PUSH_FRAME);
+ PyCodeObject *co = get_code_with_logging((this_instr + 2));
+ if (co == NULL) {
+ ctx->done = true;
+ break;
+ }
+ new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0));
stack_pointer[-3 - oparg] = new_frame;
stack_pointer += -2 - oparg;
assert(WITHIN_STACK_BOUNDS());
case _RETURN_GENERATOR: {
JitOptRef res;
- PyCodeObject *co = get_current_code_object(ctx);
ctx->frame->stack_pointer = stack_pointer;
- frame_pop(ctx);
+ PyCodeObject *returning_code = get_code_with_logging(this_instr);
+ if (returning_code == NULL) {
+ ctx->done = true;
+ break;
+ }
+ _Py_BloomFilter_Add(dependencies, returning_code);
+ int returning_stacklevel = this_instr->operand1;
+ if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+ break;
+ }
stack_pointer = ctx->frame->stack_pointer;
res = sym_new_unknown(ctx);
- assert(corresponding_check_stack == NULL);
- assert(co != NULL);
- int framesize = co->co_framesize;
- assert(framesize > 0);
- assert(framesize <= curr_space);
- curr_space -= framesize;
stack_pointer[0] = res;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());
break;
}
+ case _DYNAMIC_EXIT: {
+ break;
+ }
+
case _CHECK_VALIDITY: {
break;
}
}
case _DEOPT: {
+ ctx->done = true;
break;
}
break;
}
+ case _COLD_DYNAMIC_EXIT: {
+ break;
+ }
+
+ case _GUARD_IP__PUSH_FRAME: {
+ break;
+ }
+
+ case _GUARD_IP_YIELD_VALUE: {
+ break;
+ }
+
+ case _GUARD_IP_RETURN_VALUE: {
+ break;
+ }
+
+ case _GUARD_IP_RETURN_GENERATOR: {
+ break;
+ }
+
JitOptRef *args,
int arg_len)
{
- assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH);
+ if (ctx->curr_frame_depth >= MAX_ABSTRACT_FRAME_DEPTH) {
+ ctx->done = true;
+ ctx->out_of_space = true;
+ OPT_STAT_INC(optimizer_frame_overflow);
+ return NULL;
+ }
_Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
-
+ frame->code = co;
frame->stack_len = co->co_stacksize;
frame->locals_len = co->co_nlocalsplus;
}
int
-_Py_uop_frame_pop(JitOptContext *ctx)
+_Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries)
{
_Py_UOpsAbstractFrame *frame = ctx->frame;
ctx->n_consumed = frame->locals;
+
ctx->curr_frame_depth--;
- assert(ctx->curr_frame_depth >= 1);
- ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1];
+
+ if (ctx->curr_frame_depth >= 1) {
+ ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1];
+
+ // We returned to the correct code. Nothing to do here.
+ if (co == ctx->frame->code) {
+ return 0;
+ }
+ // Else: the code we recorded doesn't match the code we *think* we're
+ // returning to. We could trace anything, we can't just return to the
+ // old frame. We have to restore what the tracer recorded
+ // as the traced next frame.
+ // Remove the current frame, and later swap it out with the right one.
+ else {
+ ctx->curr_frame_depth--;
+ }
+ }
+ // Else: trace stack underflow.
+
+ // This handles swapping out frames.
+ assert(curr_stackentries >= 1);
+ // -1 to stackentries as we push to the stack our return value after this.
+ _Py_UOpsAbstractFrame *new_frame = _Py_uop_frame_new(ctx, co, curr_stackentries - 1, NULL, 0);
+ if (new_frame == NULL) {
+ ctx->done = true;
+ return 1;
+ }
+
+ ctx->curr_frame_depth++;
+ ctx->frame = new_frame;
return 0;
}
_Py_brc_init_state(interp);
#endif
-#ifdef _Py_TIER2
- // Ensure the buffer is to be set as NULL.
- interp->jit_uop_buffer = NULL;
-#endif
llist_init(&interp->mem_free_queue.head);
llist_init(&interp->asyncio_tasks_head);
interp->asyncio_tasks_lock = (PyMutex){0};
#ifdef _Py_TIER2
_Py_ClearExecutorDeletionList(interp);
- if (interp->jit_uop_buffer != NULL) {
- _PyObject_VirtualFree(interp->jit_uop_buffer, UOP_BUFFER_SIZE);
- interp->jit_uop_buffer = NULL;
- }
#endif
_PyAST_Fini(interp);
_PyAtExit_Fini(interp);
assert(cold->vm_data.warm);
_PyExecutor_Free(cold);
}
+
+ struct _PyExecutorObject *cold_dynamic = interp->cold_dynamic_executor;
+ if (cold_dynamic != NULL) {
+ interp->cold_dynamic_executor = NULL;
+ assert(cold_dynamic->vm_data.valid);
+ assert(cold_dynamic->vm_data.warm);
+ _PyExecutor_Free(cold_dynamic);
+ }
/* We don't clear sysdict and builtins until the end of this function.
Because clearing other attributes can execute arbitrary Python code
which requires sysdict and builtins. */
_tstate->asyncio_running_loop = NULL;
_tstate->asyncio_running_task = NULL;
+#ifdef _Py_TIER2
+ _tstate->jit_tracer_state.code_buffer = NULL;
+#endif
tstate->delete_later = NULL;
llist_init(&_tstate->mem_free_queue);
assert(tstate_impl->refcounts.values == NULL);
#endif
+#if _Py_TIER2
+ _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+ if (_tstate->jit_tracer_state.code_buffer != NULL) {
+ _PyObject_VirtualFree(_tstate->jit_tracer_state.code_buffer, UOP_BUFFER_SIZE);
+ _tstate->jit_tracer_state.code_buffer = NULL;
+ }
+#endif
+
HEAD_UNLOCK(runtime);
// XXX Unbind in PyThreadState_Clear(), or earlier
Parser/lexer/lexer.c - type_comment_prefix -
Python/ceval.c - _PyEval_BinaryOps -
Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS -
+Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR -
Python/codecs.c - Py_hexdigits -
Python/codecs.c - codecs_builtin_error_handlers -
Python/codecs.c - ucnhash_capi -
side_exit: bool
pure: bool
uses_opcode: bool
+ needs_guard_ip: bool
+ unpredictable_jump: bool
tier: int | None = None
const_oparg: int = -1
needs_prev: bool = False
pure=all(p.pure for p in properties),
needs_prev=any(p.needs_prev for p in properties),
no_save_ip=all(p.no_save_ip for p in properties),
+ needs_guard_ip=any(p.needs_guard_ip for p in properties),
+ unpredictable_jump=any(p.unpredictable_jump for p in properties),
)
@property
side_exit=False,
pure=True,
no_save_ip=False,
+ needs_guard_ip=False,
+ unpredictable_jump=False,
)
"PyStackRef_Wrap",
"PyStackRef_Unwrap",
"_PyLong_CheckExactAndCompact",
+ "_PyExecutor_FromExit",
+ "_PyJit_TryInitializeTracing",
+ "_Py_unset_eval_breaker_bit",
+ "_Py_set_eval_breaker_bit",
+ "trigger_backoff_counter",
)
else:
assert False, "Unexpected statement type"
+def stmt_has_jump_on_unpredictable_path_body(stmts: list[Stmt] | None, branches_seen: int) -> tuple[bool, int]:
+ if not stmts:
+ return False, branches_seen
+ predict = False
+ seen = 0
+ for st in stmts:
+ predict_body, seen_body = stmt_has_jump_on_unpredictable_path(st, branches_seen)
+ predict = predict or predict_body
+ seen += seen_body
+ return predict, seen
+
+def stmt_has_jump_on_unpredictable_path(stmt: Stmt, branches_seen: int) -> tuple[bool, int]:
+ if isinstance(stmt, BlockStmt):
+ return stmt_has_jump_on_unpredictable_path_body(stmt.body, branches_seen)
+ elif isinstance(stmt, SimpleStmt):
+ for tkn in stmt.contents:
+ if tkn.text == "JUMPBY":
+ return True, branches_seen
+ return False, branches_seen
+ elif isinstance(stmt, IfStmt):
+ predict, seen = stmt_has_jump_on_unpredictable_path(stmt.body, branches_seen)
+ if stmt.else_body:
+ predict_else, seen_else = stmt_has_jump_on_unpredictable_path(stmt.else_body, branches_seen)
+ return predict != predict_else, seen + seen_else + 1
+ return predict, seen + 1
+ elif isinstance(stmt, MacroIfStmt):
+ predict, seen = stmt_has_jump_on_unpredictable_path_body(stmt.body, branches_seen)
+ if stmt.else_body:
+ predict_else, seen_else = stmt_has_jump_on_unpredictable_path_body(stmt.else_body, branches_seen)
+ return predict != predict_else, seen + seen_else
+ return predict, seen
+ elif isinstance(stmt, ForStmt):
+ unpredictable, branches_seen = stmt_has_jump_on_unpredictable_path(stmt.body, branches_seen)
+ return unpredictable, branches_seen + 1
+ elif isinstance(stmt, WhileStmt):
+ unpredictable, branches_seen = stmt_has_jump_on_unpredictable_path(stmt.body, branches_seen)
+ return unpredictable, branches_seen + 1
+ else:
+ assert False, f"Unexpected statement type {stmt}"
+
def compute_properties(op: parser.CodeDef) -> Properties:
escaping_calls = find_escaping_api_calls(op)
escapes = stmt_escapes(op.block)
pure = False if isinstance(op, parser.LabelDef) else "pure" in op.annotations
no_save_ip = False if isinstance(op, parser.LabelDef) else "no_save_ip" in op.annotations
+ unpredictable, branches_seen = stmt_has_jump_on_unpredictable_path(op.block, 0)
+ unpredictable_jump = False if isinstance(op, parser.LabelDef) else (unpredictable and branches_seen > 0)
return Properties(
escaping_calls=escaping_calls,
escapes=escapes,
no_save_ip=no_save_ip,
tier=tier_variable(op),
needs_prev=variable_used(op, "prev_instr"),
+ needs_guard_ip=(isinstance(op, parser.InstDef)
+ and (unpredictable_jump and "replaced" not in op.annotations))
+ or variable_used(op, "LOAD_IP")
+ or variable_used(op, "DISPATCH_INLINED"),
+ unpredictable_jump=unpredictable_jump,
)
def expand(items: list[StackItem], oparg: int) -> list[StackItem]:
analysis_error,
Label,
CodeSection,
+ Uop,
)
from cwriter import CWriter
from typing import Callable, TextIO, Iterator, Iterable
labels: dict[str, Label]
_replacers: dict[str, ReplacementFunctionType]
cannot_escape: bool
+ jump_prefix: str
- def __init__(self, out: CWriter, labels: dict[str, Label], cannot_escape: bool = False):
+ def __init__(self, out: CWriter, labels: dict[str, Label], cannot_escape: bool = False, jump_prefix: str = ""):
self._replacers = {
"EXIT_IF": self.exit_if,
"AT_END_EXIT_IF": self.exit_if_after,
self.out = out
self.labels = labels
self.cannot_escape = cannot_escape
+ self.jump_prefix = jump_prefix
def dispatch(
self,
family_name = inst.family.name
self.emit(f"UPDATE_MISS_STATS({family_name});\n")
self.emit(f"assert(_PyOpcode_Deopt[opcode] == ({family_name}));\n")
- self.emit(f"JUMP_TO_PREDICTED({family_name});\n")
+ self.emit(f"JUMP_TO_PREDICTED({self.jump_prefix}{family_name});\n")
self.emit("}\n")
return not always_true(first_tkn)
def goto_error(self, offset: int, storage: Storage) -> str:
if offset > 0:
- return f"JUMP_TO_LABEL(pop_{offset}_error);"
+ return f"{self.jump_prefix}JUMP_TO_LABEL(pop_{offset}_error);"
if offset < 0:
storage.copy().flush(self.out)
- return f"JUMP_TO_LABEL(error);"
+ return f"{self.jump_prefix}JUMP_TO_LABEL(error);"
def error_if(
self,
elif storage.spilled:
raise analysis_error("Cannot jump from spilled label without reloading the stack pointer", goto)
self.out.start_line()
- self.out.emit("JUMP_TO_LABEL(")
+ self.out.emit(f"{self.jump_prefix}JUMP_TO_LABEL(")
self.out.emit(label)
self.out.emit(")")
flags.append("HAS_PURE_FLAG")
if p.no_save_ip:
flags.append("HAS_NO_SAVE_IP_FLAG")
+ if p.unpredictable_jump:
+ flags.append("HAS_UNPREDICTABLE_JUMP_FLAG")
+ if p.needs_guard_ip:
+ flags.append("HAS_NEEDS_GUARD_IP_FLAG")
if flags:
return " | ".join(flags)
else:
"ERROR_NO_POP",
"NO_SAVE_IP",
"PERIODIC",
+ "UNPREDICTABLE_JUMP",
+ "NEEDS_GUARD_IP",
]
out.emit("struct opcode_metadata {\n")
out.emit("uint8_t valid_entry;\n")
out.emit("uint8_t instr_format;\n")
- out.emit("uint16_t flags;\n")
+ out.emit("uint32_t flags;\n")
out.emit("};\n\n")
out.emit(
f"extern const struct opcode_metadata _PyOpcode_opcode_metadata[{table_size}];\n"
for target in targets:
out.emit(target)
out.emit("};\n")
+ targets = ["&&_unknown_opcode,\n"] * 256
+ for name, op in analysis.opmap.items():
+ if op < 256:
+ targets[op] = f"&&record_previous_inst,\n"
+ out.emit("#if _Py_TIER2\n")
+ out.emit("static void *opcode_tracing_targets_table[256] = {\n")
+ for target in targets:
+ out.emit(target)
+ out.emit("};\n")
+ out.emit(f"#endif\n")
out.emit("#else /* _Py_TAIL_CALL_INTERP */\n")
def function_proto(name: str) -> str:
def write_tailcall_dispatch_table(analysis: Analysis, out: CWriter) -> None:
- out.emit("static py_tail_call_funcptr instruction_funcptr_table[256];\n")
+ out.emit("static py_tail_call_funcptr instruction_funcptr_handler_table[256];\n")
+ out.emit("\n")
+ out.emit("static py_tail_call_funcptr instruction_funcptr_tracing_table[256];\n")
out.emit("\n")
# Emit function prototypes for labels.
out.emit("\n")
# Emit the dispatch table.
- out.emit("static py_tail_call_funcptr instruction_funcptr_table[256] = {\n")
+ out.emit("static py_tail_call_funcptr instruction_funcptr_handler_table[256] = {\n")
for name in sorted(analysis.instructions.keys()):
out.emit(f"[{name}] = _TAIL_CALL_{name},\n")
named_values = analysis.opmap.values()
if rest not in named_values:
out.emit(f"[{rest}] = _TAIL_CALL_UNKNOWN_OPCODE,\n")
out.emit("};\n")
+
+ # Emit the tracing dispatch table.
+ out.emit("static py_tail_call_funcptr instruction_funcptr_tracing_table[256] = {\n")
+ for name in sorted(analysis.instructions.keys()):
+ out.emit(f"[{name}] = _TAIL_CALL_record_previous_inst,\n")
+ named_values = analysis.opmap.values()
+ for rest in range(256):
+ if rest not in named_values:
+ out.emit(f"[{rest}] = _TAIL_CALL_UNKNOWN_OPCODE,\n")
+ out.emit("};\n")
outfile.write("#endif /* _Py_TAIL_CALL_INTERP */\n")
arg_parser = argparse.ArgumentParser(
def __init__(self, out: CWriter, labels: dict[str, Label]):
super().__init__(out, labels)
self._replacers["oparg"] = self.oparg
+ self._replacers["IP_OFFSET_OF"] = self.ip_offset_of
def goto_error(self, offset: int, storage: Storage) -> str:
# To do: Add jump targets for popping values.
self.out.emit_at(uop.name[-1], tkn)
return True
+ def ip_offset_of(
+ self,
+ tkn: Token,
+ tkn_iter: TokenIterator,
+ uop: CodeSection,
+ storage: Storage,
+ inst: Instruction | None,
+ ) -> bool:
+ assert uop.name.startswith("_GUARD_IP")
+ # LPAREN
+ next(tkn_iter)
+ tok = next(tkn_iter)
+ self.emit(f" OFFSET_OF_{tok.text};\n")
+ # RPAREN
+ next(tkn_iter)
+ # SEMI
+ next(tkn_iter)
+ return True
-def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> Stack:
+def write_uop(uop: Uop, emitter: Emitter, stack: Stack, offset_strs: dict[str, tuple[str, str]]) -> Stack:
locals: dict[str, Local] = {}
try:
+ if name_offset_pair := offset_strs.get(uop.name):
+ emitter.emit(f"#define OFFSET_OF_{name_offset_pair[0]} ({name_offset_pair[1]})\n")
emitter.out.start_line()
if uop.properties.oparg:
emitter.emit("oparg = CURRENT_OPARG();\n")
idx += 1
_, storage = emitter.emit_tokens(uop, storage, None, False)
storage.flush(emitter.out)
+ if name_offset_pair:
+ emitter.emit(f"#undef OFFSET_OF_{name_offset_pair[0]}\n")
except StackError as ex:
raise analysis_error(ex.args[0], uop.body.open) from None
return storage.stack
SKIPS = ("_EXTENDED_ARG",)
+def populate_offset_strs(analysis: Analysis) -> dict[str, tuple[str, str]]:
+ offset_strs: dict[str, tuple[str, str]] = {}
+ for name, uop in analysis.uops.items():
+ if not f"_GUARD_IP_{name}" in analysis.uops:
+ continue
+ tkn_iter = uop.body.tokens()
+ found = False
+ offset_str = ""
+ for token in tkn_iter:
+ if token.kind == "IDENTIFIER" and token.text == "LOAD_IP":
+ if found:
+ raise analysis_error("Cannot have two LOAD_IP in a guarded single uop.", uop.body.open)
+ offset = []
+ while token.kind != "SEMI":
+ offset.append(token.text)
+ token = next(tkn_iter)
+ # 1: to remove the LOAD_IP text
+ offset_str = "".join(offset[1:])
+ found = True
+ assert offset_str
+ offset_strs[f"_GUARD_IP_{name}"] = (name, offset_str)
+ return offset_strs
+
def generate_tier2(
filenames: list[str], analysis: Analysis, outfile: TextIO, lines: bool
) -> None:
)
out = CWriter(outfile, 2, lines)
emitter = Tier2Emitter(out, analysis.labels)
+ offset_strs = populate_offset_strs(analysis)
out.emit("\n")
+
for name, uop in analysis.uops.items():
if uop.properties.tier == 1:
continue
out.emit(f"case {uop.name}: {{\n")
declare_variables(uop, out)
stack = Stack()
- stack = write_uop(uop, emitter, stack)
+ stack = write_uop(uop, emitter, stack, offset_strs)
out.start_line()
if not uop.properties.always_exits:
out.emit("break;\n")
out.start_line()
out.emit("}")
out.emit("\n\n")
+
+ out.emit("\n")
outfile.write("#undef TIER_TWO\n")
def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
- out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n")
+ out.emit("extern const uint32_t _PyUop_Flags[MAX_UOP_ID+1];\n")
out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n")
out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n")
out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n")
out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n")
out.emit("#ifdef NEED_OPCODE_METADATA\n")
- out.emit("const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {\n")
+ out.emit("const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = {\n")
for uop in analysis.uops.values():
if uop.is_viable() and uop.properties.tier != 1:
out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n")
__attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \
} while (0)
-#undef GOTO_TIER_ONE
-#define GOTO_TIER_ONE(TARGET) \
-do { \
- tstate->current_executor = NULL; \
- _PyFrame_SetStackPointer(frame, stack_pointer); \
- return TARGET; \
-} while (0)
+#undef GOTO_TIER_ONE_SETUP
+#define GOTO_TIER_ONE_SETUP \
+ tstate->current_executor = NULL; \
+ _PyFrame_SetStackPointer(frame, stack_pointer);
#undef LOAD_IP
#define LOAD_IP(UNUSED) \