#define _ITER_CHECK_RANGE 328
#define _IS_ITER_EXHAUSTED_RANGE 329
#define _ITER_NEXT_RANGE 330
-#define _POP_JUMP_IF_FALSE 331
-#define _POP_JUMP_IF_TRUE 332
-#define JUMP_TO_TOP 333
-#define INSERT 334
+#define _CHECK_PEP_523 331
+#define _CHECK_FUNCTION_EXACT_ARGS 332
+#define _CHECK_STACK_SPACE 333
+#define _INIT_CALL_PY_EXACT_ARGS 334
+#define _PUSH_FRAME 335
+#define _POP_JUMP_IF_FALSE 336
+#define _POP_JUMP_IF_TRUE 337
+#define JUMP_TO_TOP 338
+#define SAVE_CURRENT_IP 339
+#define INSERT 340
extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
#ifdef NEED_OPCODE_METADATA
return oparg + 2;
case CALL_BOUND_METHOD_EXACT_ARGS:
return oparg + 2;
+ case _CHECK_PEP_523:
+ return 0;
+ case _CHECK_FUNCTION_EXACT_ARGS:
+ return oparg + 2;
+ case _CHECK_STACK_SPACE:
+ return oparg + 2;
+ case _INIT_CALL_PY_EXACT_ARGS:
+ return oparg + 2;
+ case _PUSH_FRAME:
+ return 1;
case CALL_PY_EXACT_ARGS:
return oparg + 2;
case CALL_PY_WITH_DEFAULTS:
return 0;
case SAVE_IP:
return 0;
+ case SAVE_CURRENT_IP:
+ return 0;
case EXIT_TRACE:
return 0;
case INSERT:
return 1;
case CALL_BOUND_METHOD_EXACT_ARGS:
return 1;
+ case _CHECK_PEP_523:
+ return 0;
+ case _CHECK_FUNCTION_EXACT_ARGS:
+ return oparg + 2;
+ case _CHECK_STACK_SPACE:
+ return oparg + 2;
+ case _INIT_CALL_PY_EXACT_ARGS:
+ return 1;
+ case _PUSH_FRAME:
+ return 1;
case CALL_PY_EXACT_ARGS:
return 1;
case CALL_PY_WITH_DEFAULTS:
return 0;
case SAVE_IP:
return 0;
+ case SAVE_CURRENT_IP:
+ return 0;
case EXIT_TRACE:
return 0;
case INSERT:
enum InstructionFormat {
INSTR_FMT_IB,
INSTR_FMT_IBC,
+ INSTR_FMT_IBC0,
INSTR_FMT_IBC00,
INSTR_FMT_IBC000,
INSTR_FMT_IBC00000000,
#define OPARG_CACHE_4 4
#define OPARG_TOP 5
#define OPARG_BOTTOM 6
+#define OPARG_SAVE_IP 7
#define OPCODE_METADATA_FMT(OP) (_PyOpcode_opcode_metadata[(OP)].instr_format)
#define SAME_OPCODE_METADATA(OP1, OP2) \
[GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } },
[WITH_EXCEPT_START] = { .nuops = 1, .uops = { { WITH_EXCEPT_START, 0, 0 } } },
[PUSH_EXC_INFO] = { .nuops = 1, .uops = { { PUSH_EXC_INFO, 0, 0 } } },
+ [CALL_PY_EXACT_ARGS] = { .nuops = 7, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { SAVE_IP, 7, 3 }, { SAVE_CURRENT_IP, 0, 0 }, { _PUSH_FRAME, 0, 0 } } },
[CALL_NO_KW_TYPE_1] = { .nuops = 1, .uops = { { CALL_NO_KW_TYPE_1, 0, 0 } } },
[CALL_NO_KW_STR_1] = { .nuops = 1, .uops = { { CALL_NO_KW_STR_1, 0, 0 } } },
[CALL_NO_KW_TUPLE_1] = { .nuops = 1, .uops = { { CALL_NO_KW_TUPLE_1, 0, 0 } } },
[_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE",
[_IS_ITER_EXHAUSTED_RANGE] = "_IS_ITER_EXHAUSTED_RANGE",
[_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE",
+ [_CHECK_PEP_523] = "_CHECK_PEP_523",
+ [_CHECK_FUNCTION_EXACT_ARGS] = "_CHECK_FUNCTION_EXACT_ARGS",
+ [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE",
+ [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS",
+ [_PUSH_FRAME] = "_PUSH_FRAME",
[_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE",
[_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE",
[JUMP_TO_TOP] = "JUMP_TO_TOP",
+ [SAVE_CURRENT_IP] = "SAVE_CURRENT_IP",
[INSERT] = "INSERT",
};
#endif // NEED_OPCODE_METADATA
with self.assertRaises(StopIteration):
next(it)
+ def test_call_py_exact_args(self):
+ def testfunc(n):
+ def dummy(x):
+ return x+1
+ for i in range(n):
+ dummy(i)
+
+ opt = _testinternalcapi.get_uop_optimizer()
+ with temporary_optimizer(opt):
+ testfunc(10)
+
+ ex = get_first_executor(testfunc)
+ self.assertIsNotNone(ex)
+ uops = {opname for opname, _, _ in ex}
+ self.assertIn("_PUSH_FRAME", uops)
+
+
if __name__ == "__main__":
unittest.main()
break;
}
+ case _CHECK_PEP_523: {
+ break;
+ }
+
+ case _CHECK_FUNCTION_EXACT_ARGS: {
+ break;
+ }
+
+ case _CHECK_STACK_SPACE: {
+ break;
+ }
+
+ case _INIT_CALL_PY_EXACT_ARGS: {
+ STACK_SHRINK(oparg);
+ STACK_SHRINK(1);
+ PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
+ break;
+ }
+
+ case _PUSH_FRAME: {
+ PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
+ break;
+ }
+
case CALL_NO_KW_TYPE_1: {
STACK_SHRINK(oparg);
STACK_SHRINK(1);
break;
}
+ case SAVE_CURRENT_IP: {
+ break;
+ }
+
case EXIT_TRACE: {
break;
}
{
PyGenObject *gen = (PyGenObject *)receiver;
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
- frame->return_offset = oparg;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND);
+ frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}
if (Py_IsNone(v) && PyIter_Check(receiver)) {
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND);
STAT_INC(SEND, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
- frame->return_offset = oparg;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND);
+ frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
STAT_INC(FOR_ITER, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
- frame->return_offset = oparg;
_PyFrame_StackPush(gen_frame, Py_None);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
assert(next_instr[oparg].op.code == END_FOR ||
next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
+ frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}
GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS);
}
- inst(CALL_PY_EXACT_ARGS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
- ASSERT_KWNAMES_IS_NULL();
+ op(_CHECK_PEP_523, (--)) {
DEOPT_IF(tstate->interp->eval_frame, CALL);
- int argcount = oparg;
- if (self_or_null != NULL) {
- args--;
- argcount++;
- }
+ }
+
+ op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
+ ASSERT_KWNAMES_IS_NULL();
DEOPT_IF(!PyFunction_Check(callable), CALL);
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != func_version, CALL);
PyCodeObject *code = (PyCodeObject *)func->func_code;
- DEOPT_IF(code->co_argcount != argcount, CALL);
+ DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
+ }
+
+ op(_CHECK_STACK_SPACE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
+ PyFunctionObject *func = (PyFunctionObject *)callable;
+ PyCodeObject *code = (PyCodeObject *)func->func_code;
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
+ }
+
+ op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) {
+ int argcount = oparg;
+ if (self_or_null != NULL) {
+ args--;
+ argcount++;
+ }
STAT_INC(CALL, hit);
- _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
+ PyFunctionObject *func = (PyFunctionObject *)callable;
+ new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
- // Manipulate stack directly since we leave using DISPATCH_INLINED().
- STACK_SHRINK(oparg + 2);
- SKIP_OVER(INLINE_CACHE_ENTRIES_CALL);
+ }
+
+ // The 'unused' output effect represents the return value
+ // (which will be pushed when the frame returns).
+ // It is needed so CALL_PY_EXACT_ARGS matches its family.
+ op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused)) {
+ // Write it out explicitly because it's subtly different.
+ // Eventually this should be the only occurrence of this code.
frame->return_offset = 0;
- DISPATCH_INLINED(new_frame);
+ assert(tstate->interp->eval_frame == NULL);
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ new_frame->previous = frame;
+ CALL_STAT_INC(inlined_py_calls);
+ #if TIER_ONE
+ frame = cframe.current_frame = new_frame;
+ goto start_frame;
+ #endif
+ #if TIER_TWO
+ frame = tstate->cframe->current_frame = new_frame;
+ ERROR_IF(_Py_EnterRecursivePy(tstate), exit_unwind);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
+ #endif
}
+ macro(CALL_PY_EXACT_ARGS) =
+ unused/1 + // Skip over the counter
+ _CHECK_PEP_523 +
+ _CHECK_FUNCTION_EXACT_ARGS +
+ _CHECK_STACK_SPACE +
+ _INIT_CALL_PY_EXACT_ARGS +
+ SAVE_IP + // Tier 2 only; special-cased oparg
+ SAVE_CURRENT_IP + // Sets frame->prev_instr
+ _PUSH_FRAME;
+
inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
ASSERT_KWNAMES_IS_NULL();
DEOPT_IF(tstate->interp->eval_frame, CALL);
frame->prev_instr = ip_offset + oparg;
}
+ op(SAVE_CURRENT_IP, (--)) {
+ #if TIER_ONE
+ frame->prev_instr = next_instr - 1;
+ #endif
+ #if TIER_TWO
+ // Relies on a preceding SAVE_IP
+ frame->prev_instr--;
+ #endif
+ }
+
op(EXIT_TRACE, (--)) {
frame->prev_instr--; // Back up to just before destination
_PyFrame_SetStackPointer(frame, stack_pointer);
return 0;
}
-static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
- return (tstate->py_recursion_remaining-- <= 0) &&
- _Py_CheckRecursiveCallPy(tstate);
-}
-
static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) {
tstate->py_recursion_remaining++;
#endif
{
+#define TIER_ONE 1
#include "generated_cases.c.h"
/* INSTRUMENTED_LINE has to be here, rather than in bytecodes.c,
#else
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL)
#endif
+
+static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
+ return (tstate->py_recursion_remaining-- <= 0) &&
+ _Py_CheckRecursiveCallPy(tstate);
+}
OBJECT_STAT_INC(optimization_uops_executed);
switch (opcode) {
+#define TIER_TWO 2
#include "executor_cases.c.h"
default:
pop_2_error:
STACK_SHRINK(1);
pop_1_error:
+pop_1_exit_unwind:
STACK_SHRINK(1);
error:
// On ERROR_IF we return NULL as the frame.
}
case TO_BOOL: {
- static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size");
PyObject *value;
PyObject *res;
value = stack_pointer[-1];
}
case BINARY_SUBSCR: {
- static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size");
PyObject *sub;
PyObject *container;
PyObject *res;
}
case STORE_SUBSCR: {
- static_assert(INLINE_CACHE_ENTRIES_STORE_SUBSCR == 1, "incorrect cache size");
PyObject *sub;
PyObject *container;
PyObject *v;
}
case UNPACK_SEQUENCE: {
- static_assert(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE == 1, "incorrect cache size");
PyObject *seq;
seq = stack_pointer[-1];
#if ENABLE_SPECIALIZATION
}
case STORE_ATTR: {
- static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size");
PyObject *owner;
PyObject *v;
owner = stack_pointer[-1];
}
case LOAD_GLOBAL: {
- static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size");
PyObject *res;
PyObject *null = NULL;
#if ENABLE_SPECIALIZATION
}
case LOAD_ATTR: {
- static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size");
PyObject *owner;
PyObject *attr;
PyObject *self_or_null = NULL;
}
case COMPARE_OP: {
- static_assert(INLINE_CACHE_ENTRIES_COMPARE_OP == 1, "incorrect cache size");
PyObject *right;
PyObject *left;
PyObject *res;
break;
}
+ case _CHECK_PEP_523: {
+ DEOPT_IF(tstate->interp->eval_frame, CALL);
+ break;
+ }
+
+ case _CHECK_FUNCTION_EXACT_ARGS: {
+ PyObject *self_or_null;
+ PyObject *callable;
+ self_or_null = stack_pointer[-1 - oparg];
+ callable = stack_pointer[-2 - oparg];
+ uint32_t func_version = (uint32_t)operand;
+ ASSERT_KWNAMES_IS_NULL();
+ DEOPT_IF(!PyFunction_Check(callable), CALL);
+ PyFunctionObject *func = (PyFunctionObject *)callable;
+ DEOPT_IF(func->func_version != func_version, CALL);
+ PyCodeObject *code = (PyCodeObject *)func->func_code;
+ DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
+ break;
+ }
+
+ case _CHECK_STACK_SPACE: {
+ PyObject *callable;
+ callable = stack_pointer[-2 - oparg];
+ PyFunctionObject *func = (PyFunctionObject *)callable;
+ PyCodeObject *code = (PyCodeObject *)func->func_code;
+ DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
+ break;
+ }
+
+ case _INIT_CALL_PY_EXACT_ARGS: {
+ PyObject **args;
+ PyObject *self_or_null;
+ PyObject *callable;
+ _PyInterpreterFrame *new_frame;
+ args = stack_pointer - oparg;
+ self_or_null = stack_pointer[-1 - oparg];
+ callable = stack_pointer[-2 - oparg];
+ int argcount = oparg;
+ if (self_or_null != NULL) {
+ args--;
+ argcount++;
+ }
+ STAT_INC(CALL, hit);
+ PyFunctionObject *func = (PyFunctionObject *)callable;
+ new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
+ for (int i = 0; i < argcount; i++) {
+ new_frame->localsplus[i] = args[i];
+ }
+ STACK_SHRINK(oparg);
+ STACK_SHRINK(1);
+ stack_pointer[-1] = (PyObject *)new_frame;
+ break;
+ }
+
+ case _PUSH_FRAME: {
+ _PyInterpreterFrame *new_frame;
+ new_frame = (_PyInterpreterFrame *)stack_pointer[-1];
+ STACK_SHRINK(1);
+ // Write it out explicitly because it's subtly different.
+ // Eventually this should be the only occurrence of this code.
+ frame->return_offset = 0;
+ assert(tstate->interp->eval_frame == NULL);
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ new_frame->previous = frame;
+ CALL_STAT_INC(inlined_py_calls);
+ #if TIER_ONE
+ frame = cframe.current_frame = new_frame;
+ goto start_frame;
+ #endif
+ #if TIER_TWO
+ frame = tstate->cframe->current_frame = new_frame;
+ if (_Py_EnterRecursivePy(tstate)) goto pop_1_exit_unwind;
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
+ #endif
+ break;
+ }
+
case CALL_NO_KW_TYPE_1: {
PyObject **args;
PyObject *null;
}
case BINARY_OP: {
- static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size");
PyObject *rhs;
PyObject *lhs;
PyObject *res;
break;
}
+ case SAVE_CURRENT_IP: {
+ #if TIER_ONE
+ frame->prev_instr = next_instr - 1;
+ #endif
+ #if TIER_TWO
+ // Relies on a preceding SAVE_IP
+ frame->prev_instr--;
+ #endif
+ break;
+ }
+
case EXIT_TRACE: {
frame->prev_instr--; // Back up to just before destination
_PyFrame_SetStackPointer(frame, stack_pointer);
{
PyGenObject *gen = (PyGenObject *)receiver;
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
- frame->return_offset = oparg;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND);
+ frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}
if (Py_IsNone(v) && PyIter_Check(receiver)) {
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND);
STAT_INC(SEND, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
- frame->return_offset = oparg;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND);
+ frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
STAT_INC(FOR_ITER, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
- frame->return_offset = oparg;
_PyFrame_StackPush(gen_frame, Py_None);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
assert(next_instr[oparg].op.code == END_FOR ||
next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
+ frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
STACK_GROW(1);
}
TARGET(CALL_PY_EXACT_ARGS) {
PREDICTED(CALL_PY_EXACT_ARGS);
- PyObject **args;
PyObject *self_or_null;
PyObject *callable;
- args = stack_pointer - oparg;
+ PyObject **args;
+ _PyInterpreterFrame *new_frame;
+ // _CHECK_PEP_523
+ {
+ DEOPT_IF(tstate->interp->eval_frame, CALL);
+ }
+ // _CHECK_FUNCTION_EXACT_ARGS
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
- uint32_t func_version = read_u32(&next_instr[1].cache);
- ASSERT_KWNAMES_IS_NULL();
- DEOPT_IF(tstate->interp->eval_frame, CALL);
- int argcount = oparg;
- if (self_or_null != NULL) {
- args--;
- argcount++;
+ {
+ uint32_t func_version = read_u32(&next_instr[1].cache);
+ ASSERT_KWNAMES_IS_NULL();
+ DEOPT_IF(!PyFunction_Check(callable), CALL);
+ PyFunctionObject *func = (PyFunctionObject *)callable;
+ DEOPT_IF(func->func_version != func_version, CALL);
+ PyCodeObject *code = (PyCodeObject *)func->func_code;
+ DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
+ }
+ // _CHECK_STACK_SPACE
+ callable = stack_pointer[-2 - oparg];
+ {
+ PyFunctionObject *func = (PyFunctionObject *)callable;
+ PyCodeObject *code = (PyCodeObject *)func->func_code;
+ DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
}
- DEOPT_IF(!PyFunction_Check(callable), CALL);
- PyFunctionObject *func = (PyFunctionObject *)callable;
- DEOPT_IF(func->func_version != func_version, CALL);
- PyCodeObject *code = (PyCodeObject *)func->func_code;
- DEOPT_IF(code->co_argcount != argcount, CALL);
- DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
- STAT_INC(CALL, hit);
- _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
- for (int i = 0; i < argcount; i++) {
- new_frame->localsplus[i] = args[i];
+ // _INIT_CALL_PY_EXACT_ARGS
+ args = stack_pointer - oparg;
+ self_or_null = stack_pointer[-1 - oparg];
+ callable = stack_pointer[-2 - oparg];
+ {
+ int argcount = oparg;
+ if (self_or_null != NULL) {
+ args--;
+ argcount++;
+ }
+ STAT_INC(CALL, hit);
+ PyFunctionObject *func = (PyFunctionObject *)callable;
+ new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
+ for (int i = 0; i < argcount; i++) {
+ new_frame->localsplus[i] = args[i];
+ }
}
- // Manipulate stack directly since we leave using DISPATCH_INLINED().
- STACK_SHRINK(oparg + 2);
- SKIP_OVER(INLINE_CACHE_ENTRIES_CALL);
- frame->return_offset = 0;
- DISPATCH_INLINED(new_frame);
+ // SAVE_CURRENT_IP
+ next_instr += 3;
+ {
+ #if TIER_ONE
+ frame->prev_instr = next_instr - 1;
+ #endif
+ #if TIER_TWO
+ // Relies on a preceding SAVE_IP
+ frame->prev_instr--;
+ #endif
+ }
+ // _PUSH_FRAME
STACK_SHRINK(oparg);
- STACK_SHRINK(1);
+ STACK_SHRINK(2);
+ {
+ // Write it out explicitly because it's subtly different.
+ // Eventually this should be the only occurrence of this code.
+ frame->return_offset = 0;
+ assert(tstate->interp->eval_frame == NULL);
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ new_frame->previous = frame;
+ CALL_STAT_INC(inlined_py_calls);
+ #if TIER_ONE
+ frame = cframe.current_frame = new_frame;
+ goto start_frame;
+ #endif
+ #if TIER_TWO
+ frame = tstate->cframe->current_frame = new_frame;
+ if (_Py_EnterRecursivePy(tstate)) goto pop_1_exit_unwind;
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
+ #endif
+ }
}
TARGET(CALL_PY_WITH_DEFAULTS) {
case OPARG_BOTTOM: // Second half of super-instr
oparg = orig_oparg & 0xF;
break;
+ case OPARG_SAVE_IP: // op==SAVE_IP; oparg=next instr
+ oparg = INSTR_IP(instr + offset, code);
+ break;
+
default:
fprintf(stderr,
"opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n",
Py_FatalError("garbled expansion");
}
ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand);
+ if (expansion->uops[i].uop == _PUSH_FRAME) {
+ assert(i + 1 == nuops);
+ ADD_TO_TRACE(SAVE_IP, 0, 0);
+ goto done;
+ }
}
break;
}
if text == "#if":
if (
i + 1 < len(node.tokens)
- and node.tokens[i + 1].text == "ENABLE_SPECIALIZATION"
+ and node.tokens[i + 1].text in ("ENABLE_SPECIALIZATION", "TIER_ONE")
):
skipping = True
elif text in ("#else", "#endif"):
PseudoInstruction,
StackEffect,
OverriddenInstructionPlaceHolder,
+ TIER_ONE,
TIER_TWO,
)
import parsing
"OPARG_CACHE_4": 4,
"OPARG_TOP": 5,
"OPARG_BOTTOM": 6,
+ "OPARG_SAVE_IP": 7,
}
INSTR_FMT_PREFIX = "INSTR_FMT_"
if instr.kind == "inst" and instr.is_viable_uop():
# Construct a dummy Component -- input/output mappings are not used
part = Component(instr, instr.active_caches)
- self.write_macro_expansions(instr.name, [part])
+ self.write_macro_expansions(
+ instr.name, [part], instr.cache_offset
+ )
elif instr.kind == "inst" and variable_used(
instr.inst, "oparg1"
):
self.write_super_expansions(instr.name)
case parsing.Macro():
mac = self.macro_instrs[thing.name]
- self.write_macro_expansions(mac.name, mac.parts)
+ self.write_macro_expansions(
+ mac.name, mac.parts, mac.cache_offset
+ )
case parsing.Pseudo():
pass
case _:
if instr.kind == "op" and instr.is_viable_uop():
add(instr.name)
- def write_macro_expansions(self, name: str, parts: MacroParts) -> None:
+ def write_macro_expansions(
+ self, name: str, parts: MacroParts, cache_offset: int
+ ) -> None:
"""Write the macro expansions for a macro-instruction."""
# TODO: Refactor to share code with write_cody(), is_viaible_uop(), etc.
offset = 0 # Cache effect offset
)
return
if not part.active_caches:
- size, offset = OPARG_SIZES["OPARG_FULL"], 0
+ if part.instr.name == "SAVE_IP":
+ size, offset = OPARG_SIZES["OPARG_SAVE_IP"], cache_offset
+ else:
+ size, offset = OPARG_SIZES["OPARG_FULL"], 0
else:
# If this assert triggers, is_viable_uops() lied
assert len(part.active_caches) == 1, (name, part.instr.name)
case parsing.Macro():
n_macros += 1
mac = self.macro_instrs[thing.name]
- stacking.write_macro_instr(mac, self.out, self.families.get(mac.name))
+ stacking.write_macro_instr(
+ mac, self.out, self.families.get(mac.name)
+ )
# self.write_macro(self.macro_instrs[thing.name])
case parsing.Pseudo():
pass
n_instrs += 1
self.out.emit("")
with self.out.block(f"case {thing.name}:"):
- instr.write(self.out, tier=TIER_TWO)
+ stacking.write_single_instr(
+ instr, self.out, tier=TIER_TWO
+ )
if instr.check_eval_breaker:
self.out.emit("CHECK_EVAL_BREAKER();")
self.out.emit("break;")
with self.out.block(f"TARGET({name})"):
if instr.predicted:
self.out.emit(f"PREDICTED({name});")
- instr.write(self.out)
+ self.out.static_assert_family_size(
+ instr.name, instr.family, instr.cache_offset
+ )
+ stacking.write_single_instr(instr, self.out, tier=TIER_ONE)
if not instr.always_exits:
+ if instr.cache_offset:
+ self.out.emit(f"next_instr += {instr.cache_offset};")
if instr.check_eval_breaker:
self.out.emit("CHECK_EVAL_BREAKER();")
self.out.emit(f"DISPATCH();")
block_line: int # First line of block in original code
# Computed by constructor
- always_exits: bool
+ always_exits: str # If the block always exits, its last line; else ""
has_deopt: bool
cache_offset: int
cache_effects: list[parsing.CacheEffect]
def is_viable_uop(self) -> bool:
"""Whether this instruction is viable as a uop."""
dprint: typing.Callable[..., None] = lambda *args, **kwargs: None
- # if self.name.startswith("CALL"):
- # dprint = print
+ if "FRAME" in self.name:
+ dprint = print
if self.name == "EXIT_TRACE":
return True # This has 'return frame' but it's okay
if self.always_exits:
- dprint(f"Skipping {self.name} because it always exits")
+ dprint(f"Skipping {self.name} because it always exits: {self.always_exits}")
return False
if len(self.active_caches) > 1:
# print(f"Skipping {self.name} because it has >1 cache entries")
res = False
return res
- def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:
- """Write one instruction, sans prologue and epilogue."""
-
- # Write a static assertion that a family's cache size is correct
- out.static_assert_family_size(self.name, self.family, self.cache_offset)
-
- # Write input stack effect variable declarations and initializations
- stacking.write_single_instr(self, out, tier)
-
- # Skip the rest if the block always exits
- if self.always_exits:
- return
-
- # Write cache effect
- if tier == TIER_ONE and self.cache_offset:
- out.emit(f"next_instr += {self.cache_offset};")
-
def write_body(
self,
out: Formatter,
return blocklines, check_eval_breaker, block_line
-def always_exits(lines: list[str]) -> bool:
+def always_exits(lines: list[str]) -> str:
"""Determine whether a block always ends in a return/goto/etc."""
if not lines:
- return False
+ return ""
line = lines[-1].rstrip()
# Indent must match exactly (TODO: Do something better)
if line[:12] != " " * 12:
- return False
+ return ""
line = line[12:]
- return line.startswith(
+ if line.startswith(
(
"goto ",
"return ",
"Py_UNREACHABLE()",
"ERROR_IF(true, ",
)
- )
+ ):
+ return line
+ return ""
import dataclasses
import typing
+from flags import variable_used_unspecialized
from formatting import (
Formatter,
UNUSED,
# Track offsets from stack pointer
min_offset: StackOffset
final_offset: StackOffset
+ # Link to previous manager
+ pred: "EffectManager | None" = None
def __init__(
self,
self.pokes.append(StackItem(offset=self.final_offset.clone(), effect=eff))
self.final_offset.higher(eff)
- if pred:
+ self.pred = pred
+ while pred:
# Replace push(x) + pop(y) with copy(x, y).
# Check that the sources and destinations are disjoint.
sources: set[str] = set()
sources,
destinations,
)
+ # See if we can get more copies of a earlier predecessor.
+ if self.peeks and not pred.pokes and not pred.peeks:
+ pred = pred.pred
+ else:
+ pred = None # Break
def adjust_deeper(self, eff: StackEffect) -> None:
for peek in self.peeks:
[Component(instr, instr.active_caches)],
out,
tier,
+ 0,
)
except AssertionError as err:
raise AssertionError(f"Error writing instruction {instr.name}") from err
def write_macro_instr(
mac: MacroInstruction, out: Formatter, family: Family | None
) -> None:
- parts = [part for part in mac.parts if isinstance(part, Component)]
-
- cache_adjust = 0
- for part in mac.parts:
- match part:
- case CacheEffect(size=size):
- cache_adjust += size
- case Component(instr=instr):
- cache_adjust += instr.cache_offset
- case _:
- typing.assert_never(part)
-
+ parts = [
+ part
+ for part in mac.parts
+ if isinstance(part, Component) and part.instr.name != "SAVE_IP"
+ ]
out.emit("")
with out.block(f"TARGET({mac.name})"):
if mac.predicted:
out.emit(f"PREDICTED({mac.name});")
- out.static_assert_family_size(mac.name, family, cache_adjust)
+ out.static_assert_family_size(mac.name, family, mac.cache_offset)
try:
- write_components(parts, out, TIER_ONE)
+ next_instr_is_set = write_components(parts, out, TIER_ONE, mac.cache_offset)
except AssertionError as err:
raise AssertionError(f"Error writing macro {mac.name}") from err
- if cache_adjust:
- out.emit(f"next_instr += {cache_adjust};")
- out.emit("DISPATCH();")
+ if not parts[-1].instr.always_exits and not next_instr_is_set:
+ if mac.cache_offset:
+ out.emit(f"next_instr += {mac.cache_offset};")
+ out.emit("DISPATCH();")
def write_components(
parts: list[Component],
out: Formatter,
tier: Tiers,
-) -> None:
+ cache_offset: int,
+) -> bool:
managers = get_managers(parts)
all_vars: dict[str, StackEffect] = {}
for name, eff in all_vars.items():
out.declare(eff, None)
+ next_instr_is_set = False
for mgr in managers:
if len(parts) > 1:
out.emit(f"// {mgr.instr.name}")
poke.as_stack_effect(lax=True),
)
+ if mgr.instr.name == "_PUSH_FRAME":
+ # Adjust stack to min_offset (input effects materialized)
+ out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high)
+ # Use clone() since adjust_inverse() mutates final_offset
+ mgr.adjust_inverse(mgr.final_offset.clone())
+
+ if mgr.instr.name == "SAVE_CURRENT_IP":
+ next_instr_is_set = True
+ if cache_offset:
+ out.emit(f"next_instr += {cache_offset};")
+
if len(parts) == 1:
mgr.instr.write_body(out, 0, mgr.active_caches, tier)
else:
with out.block(""):
mgr.instr.write_body(out, -4, mgr.active_caches, tier)
- if mgr is managers[-1]:
+ if mgr is managers[-1] and not next_instr_is_set:
+ # TODO: Explain why this adjustment is needed.
out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high)
# Use clone() since adjust_inverse() mutates final_offset
mgr.adjust_inverse(mgr.final_offset.clone())
poke.effect,
)
+ return next_instr_is_set
+
def write_single_instr_for_abstract_interp(
instr: Instruction, out: Formatter