[_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
- [_PUSH_FRAME] = 0,
+ [_PUSH_FRAME] = HAS_ESCAPES_FLAG,
[_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
[_CALL_STR_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
#if TIER_ONE
assert(frame != &entry_frame);
#endif
- STORE_SP();
+ SYNC_SP();
+ _PyFrame_SetStackPointer(frame, stack_pointer);
assert(EMPTY());
_Py_LeaveRecursiveCallPy(tstate);
// GH-99729: We need to unlink the frame *before* clearing it:
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
- STORE_SP();
+ SYNC_SP();
+ _PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
CALL_STAT_INC(inlined_py_calls);
frame = tstate->current_frame = new_frame;
///////// Tier-2 only opcodes /////////
op (_GUARD_IS_TRUE_POP, (flag -- )) {
- DEOPT_IF(Py_IsFalse(flag));
+ SYNC_SP();
+ DEOPT_IF(!Py_IsTrue(flag));
assert(Py_IsTrue(flag));
}
op (_GUARD_IS_FALSE_POP, (flag -- )) {
- DEOPT_IF(Py_IsTrue(flag));
+ SYNC_SP();
+ DEOPT_IF(!Py_IsFalse(flag));
assert(Py_IsFalse(flag));
}
op (_GUARD_IS_NONE_POP, (val -- )) {
- DEOPT_IF(!Py_IsNone(val));
+ SYNC_SP();
+ if (!Py_IsNone(val)) {
+ Py_DECREF(val);
+ DEOPT_IF(1);
+ }
}
op (_GUARD_IS_NOT_NONE_POP, (val -- )) {
+ SYNC_SP();
DEOPT_IF(Py_IsNone(val));
Py_DECREF(val);
}
case _GUARD_IS_TRUE_POP: {
PyObject *flag;
flag = stack_pointer[-1];
- if (Py_IsFalse(flag)) goto deoptimize;
- assert(Py_IsTrue(flag));
stack_pointer += -1;
+ if (!Py_IsTrue(flag)) goto deoptimize;
+ assert(Py_IsTrue(flag));
break;
}
case _GUARD_IS_FALSE_POP: {
PyObject *flag;
flag = stack_pointer[-1];
- if (Py_IsTrue(flag)) goto deoptimize;
- assert(Py_IsFalse(flag));
stack_pointer += -1;
+ if (!Py_IsFalse(flag)) goto deoptimize;
+ assert(Py_IsFalse(flag));
break;
}
case _GUARD_IS_NONE_POP: {
PyObject *val;
val = stack_pointer[-1];
- if (!Py_IsNone(val)) goto deoptimize;
stack_pointer += -1;
+ if (!Py_IsNone(val)) {
+ Py_DECREF(val);
+ if (1) goto deoptimize;
+ }
break;
}
case _GUARD_IS_NOT_NONE_POP: {
PyObject *val;
val = stack_pointer[-1];
+ stack_pointer += -1;
if (Py_IsNone(val)) goto deoptimize;
Py_DECREF(val);
- stack_pointer += -1;
break;
}
goto done;
}
uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely];
- _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
DPRINTF(2, "%s(%d): counter=%x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n",
_PyOpcode_OpName[opcode], oparg,
counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode));
- ADD_TO_TRACE(uopcode, max_length, 0, target);
+ _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
+ _Py_CODEUNIT *target_instr = next_instr + oparg;
if (jump_likely) {
- _Py_CODEUNIT *target_instr = next_instr + oparg;
DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n",
instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code));
instr = target_instr;
+ ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(next_instr, code));
goto top;
}
+ ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(target_instr, code));
break;
}
ends_with_eval_breaker=eval_breaker_at_end(op),
needs_this=variable_used(op, "this_instr"),
always_exits=always_exits(op),
- stores_sp=variable_used(op, "STORE_SP"),
+ stores_sp=variable_used(op, "SYNC_SP"),
tier_one_only=variable_used(op, "TIER_ONE_ONLY"),
uses_co_consts=variable_used(op, "FRAME_CO_CONSTS"),
uses_co_names=variable_used(op, "FRAME_CO_NAMES"),
out.emit(f"Py_DECREF({var.name});\n")
-def replace_store_sp(
+def replace_sync_sp(
out: CWriter,
tkn: Token,
tkn_iter: Iterator[Token],
next(tkn_iter)
next(tkn_iter)
next(tkn_iter)
- out.emit_at("", tkn)
stack.flush(out)
- out.emit("_PyFrame_SetStackPointer(frame, stack_pointer);\n")
def replace_check_eval_breaker(
"ERROR_IF": replace_error,
"DECREF_INPUTS": replace_decrefs,
"CHECK_EVAL_BREAKER": replace_check_eval_breaker,
- "STORE_SP": replace_store_sp,
+ "SYNC_SP": replace_sync_sp,
}
ReplacementFunctionType = Callable[
bytecode instructions, the dispatching mechanism, error handling, and
tracing and instrumentation are all intermixed.
-This document proposes defining a custom C-like DSL for defining the
+This document proposes defining a custom C-like DSL for defining the
instruction semantics and tools for generating the code deriving from
the instruction definitions.
As we improve the performance of CPython, we need to optimize larger regions
of code, use more complex optimizations and, ultimately, translate to machine
-code.
+code.
All of these steps introduce the possibility of more bugs, and require more code
to be written. One way to mitigate this is through the use of code generators.
Rewriting all the instructions is tedious and error-prone, and changing the
instructions is a maintenance headache as both versions need to be kept in sync.
-By using a code generator and using a common source for the instructions, or
+By using a code generator and using a common source for the instructions, or
parts of instructions, we can reduce the potential for errors considerably.
Each op definition has a kind, a name, a stack and instruction stream effect,
and a piece of C code describing its semantics::
-
+
```
file:
(definition | family | pseudo)+
"op" "(" NAME "," stack_effect ")" "{" C-code "}"
|
"macro" "(" NAME ")" "=" uop ("+" uop)* ";"
-
+
stack_effect:
"(" [inputs] "--" [outputs] ")"
* `DEOPT_IF(cond, instruction)`. Deoptimize if `cond` is met.
* `ERROR_IF(cond, label)`. Jump to error handler at `label` if `cond` is true.
* `DECREF_INPUTS()`. Generate `Py_DECREF()` calls for the input stack effects.
+* `SYNC_SP()`. Synchronizes the physical stack pointer with the stack effects.
Note that the use of `DECREF_INPUTS()` is optional -- manual calls
to `Py_DECREF()` or other approaches are also acceptable
stack_pointer += 1;
}
s1 = res;
- }
+ }
next_instr += (1 + 1 + 2 + 1 + 4);
stack_pointer[-1] = s1;
DISPATCH();
return ""
def flush(self, out: CWriter) -> None:
+ out.start_line()
for var in self.variables:
if not var.peek:
cast = "(PyObject *)" if var.type else ""
self.base_offset.clear()
self.top_offset.clear()
self.peek_offset.clear()
+ out.start_line()
def as_comment(self) -> str:
return f"/* Variables: {[v.name for v in self.variables]}. Base offset: {self.base_offset.to_c()}. Top offset: {self.top_offset.to_c()} */"