#ifndef NEED_OPCODE_METADATA
extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];
extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];
+#ifdef Py_DEBUG
+extern const char * const _PyOpcode_uop_name[512];
+#endif
#else
const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {
[NOP] = { true, INSTR_FMT_IX, 0 },
[STORE_FAST] = { .nuops = 1, .uops = { { STORE_FAST, 0, 0 } } },
[POP_TOP] = { .nuops = 1, .uops = { { POP_TOP, 0, 0 } } },
[PUSH_NULL] = { .nuops = 1, .uops = { { PUSH_NULL, 0, 0 } } },
+ [END_FOR] = { .nuops = 2, .uops = { { POP_TOP, 0, 0 }, { POP_TOP, 0, 0 } } },
[END_SEND] = { .nuops = 1, .uops = { { END_SEND, 0, 0 } } },
[UNARY_NEGATIVE] = { .nuops = 1, .uops = { { UNARY_NEGATIVE, 0, 0 } } },
[UNARY_NOT] = { .nuops = 1, .uops = { { UNARY_NOT, 0, 0 } } },
[UNARY_INVERT] = { .nuops = 1, .uops = { { UNARY_INVERT, 0, 0 } } },
+ [BINARY_OP_MULTIPLY_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_MULTIPLY_INT, 0, 0 } } },
+ [BINARY_OP_ADD_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_ADD_INT, 0, 0 } } },
+ [BINARY_OP_SUBTRACT_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_SUBTRACT_INT, 0, 0 } } },
+ [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, 0, 0 } } },
+ [BINARY_OP_ADD_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_ADD_FLOAT, 0, 0 } } },
+ [BINARY_OP_SUBTRACT_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_SUBTRACT_FLOAT, 0, 0 } } },
+ [BINARY_OP_ADD_UNICODE] = { .nuops = 2, .uops = { { _GUARD_BOTH_UNICODE, 0, 0 }, { _BINARY_OP_ADD_UNICODE, 0, 0 } } },
[BINARY_SLICE] = { .nuops = 1, .uops = { { BINARY_SLICE, 0, 0 } } },
[STORE_SLICE] = { .nuops = 1, .uops = { { STORE_SLICE, 0, 0 } } },
[BINARY_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_LIST_INT, 0, 0 } } },
[DELETE_ATTR] = { .nuops = 1, .uops = { { DELETE_ATTR, 0, 0 } } },
[STORE_GLOBAL] = { .nuops = 1, .uops = { { STORE_GLOBAL, 0, 0 } } },
[DELETE_GLOBAL] = { .nuops = 1, .uops = { { DELETE_GLOBAL, 0, 0 } } },
+ [LOAD_LOCALS] = { .nuops = 1, .uops = { { _LOAD_LOCALS, 0, 0 } } },
+ [LOAD_NAME] = { .nuops = 2, .uops = { { _LOAD_LOCALS, 0, 0 }, { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } },
+ [LOAD_FROM_DICT_OR_GLOBALS] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } },
[DELETE_DEREF] = { .nuops = 1, .uops = { { DELETE_DEREF, 0, 0 } } },
[LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { LOAD_FROM_DICT_OR_DEREF, 0, 0 } } },
[LOAD_DEREF] = { .nuops = 1, .uops = { { LOAD_DEREF, 0, 0 } } },
[COPY] = { .nuops = 1, .uops = { { COPY, 0, 0 } } },
[SWAP] = { .nuops = 1, .uops = { { SWAP, 0, 0 } } },
};
+#ifdef Py_DEBUG
+const char * const _PyOpcode_uop_name[512] = {
+ [300] = "EXIT_TRACE",
+ [301] = "SET_IP",
+ [302] = "_GUARD_BOTH_INT",
+ [303] = "_BINARY_OP_MULTIPLY_INT",
+ [304] = "_BINARY_OP_ADD_INT",
+ [305] = "_BINARY_OP_SUBTRACT_INT",
+ [306] = "_GUARD_BOTH_FLOAT",
+ [307] = "_BINARY_OP_MULTIPLY_FLOAT",
+ [308] = "_BINARY_OP_ADD_FLOAT",
+ [309] = "_BINARY_OP_SUBTRACT_FLOAT",
+ [310] = "_GUARD_BOTH_UNICODE",
+ [311] = "_BINARY_OP_ADD_UNICODE",
+ [312] = "_LOAD_LOCALS",
+ [313] = "_LOAD_FROM_DICT_OR_GLOBALS",
+};
+#endif
#endif
f"(_PyOpcode_opcode_metadata[(OP)].flags & ({name}))")
+@dataclasses.dataclass
+class ActiveCacheEffect:
+ """Wraps a CacheEffect that is actually used, in context."""
+ effect: parser.CacheEffect
+ offset: int
+
+
FORBIDDEN_NAMES_IN_UOPS = (
"resume_with_error", # Proxy for "goto", which isn't an IDENTIFIER
"unbound_local_error",
unmoved_names: frozenset[str]
instr_fmt: str
instr_flags: InstructionFlags
+ active_caches: list[ActiveCacheEffect]
# Set later
family: parser.Family | None = None
self.instr_flags = InstructionFlags.fromInstruction(inst)
+ self.active_caches = []
+ offset = 0
+ for effect in self.cache_effects:
+ if effect.name != UNUSED:
+ self.active_caches.append(ActiveCacheEffect(effect, offset))
+ offset += effect.size
+
if self.instr_flags.HAS_ARG_FLAG:
fmt = "IB"
else:
fmt = "IX"
- cache = "C"
- for ce in self.cache_effects:
- for _ in range(ce.size):
- fmt += cache
- cache = "0"
+ if offset:
+ fmt += "C" + "0"*(offset-1)
self.instr_fmt = fmt
def is_viable_uop(self) -> bool:
return False
if self.instr_flags.HAS_ARG_FLAG:
# If the instruction uses oparg, it cannot use any caches
- for c in self.cache_effects:
- if c.name != UNUSED:
- return False
+ if self.active_caches:
+ return False
else:
# If it doesn't use oparg, it can have one cache entry
- caches: list[parser.CacheEffect] = []
- cache_offset = 0
- for c in self.cache_effects:
- if c.name != UNUSED:
- caches.append(c)
- cache_offset += c.size
- if len(caches) > 1:
+ if len(self.active_caches) > 1:
return False
for forbidden in FORBIDDEN_NAMES_IN_UOPS:
# TODO: Don't check in '#ifdef ENABLE_SPECIALIZATION' regions
# out.emit(f"next_instr += OPSIZE({self.inst.name}) - 1;")
- self.write_body(out, 0, tier=tier)
+ self.write_body(out, 0, self.active_caches, tier=tier)
# Skip the rest if the block always exits
if self.always_exits:
self,
out: Formatter,
dedent: int,
- cache_adjust: int = 0,
+ active_caches: list[ActiveCacheEffect],
tier: Tiers = TIER_ONE,
) -> None:
"""Write the instruction body."""
# Write cache effect variable declarations and initializations
- cache_offset = cache_adjust
- for ceffect in self.cache_effects:
- if ceffect.name != UNUSED:
- bits = ceffect.size * BITS_PER_CODE_UNIT
- if bits == 64:
- # NOTE: We assume that 64-bit data in the cache
- # is always an object pointer.
- # If this becomes false, we need a way to specify
- # syntactically what type the cache data is.
- typ = "PyObject *"
- func = "read_obj"
- else:
- typ = f"uint{bits}_t "
- func = f"read_u{bits}"
- if tier == TIER_ONE:
- out.emit(
- f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);"
- )
- else:
- out.emit(f"{typ}{ceffect.name} = operand;")
- cache_offset += ceffect.size
- assert cache_offset == self.cache_offset + cache_adjust
+ for active in active_caches:
+ ceffect = active.effect
+ bits = ceffect.size * BITS_PER_CODE_UNIT
+ if bits == 64:
+ # NOTE: We assume that 64-bit data in the cache
+ # is always an object pointer.
+ # If this becomes false, we need a way to specify
+ # syntactically what type the cache data is.
+ typ = "PyObject *"
+ func = "read_obj"
+ else:
+ typ = f"uint{bits}_t "
+ func = f"read_u{bits}"
+ if tier == TIER_ONE:
+ out.emit(
+ f"{typ}{ceffect.name} = {func}(&next_instr[{active.offset}].cache);"
+ )
+ else:
+ out.emit(f"{typ}{ceffect.name} = operand;")
# Write the body, substituting a goto for ERROR_IF() and other stuff
assert dedent <= 0
instr: Instruction
input_mapping: StackEffectMapping
output_mapping: StackEffectMapping
+ active_caches: list[ActiveCacheEffect]
- def write_body(self, out: Formatter, cache_adjust: int) -> None:
+ def write_body(self, out: Formatter) -> None:
with out.block(""):
input_names = {ieffect.name for _, ieffect in self.input_mapping}
for var, ieffect in self.input_mapping:
if oeffect.name not in input_names:
out.declare(oeffect, None)
- self.instr.write_body(out, dedent=-4, cache_adjust=cache_adjust)
+ self.instr.write_body(out, -4, self.active_caches)
for var, oeffect in self.output_mapping:
out.assign(var, oeffect)
instr_flags: InstructionFlags
macro: parser.Macro
parts: list[Component | parser.CacheEffect]
+ cache_offset: int
predicted: bool = False
cache = instr.cache_offset
input = len(instr.input_effects)
output = len(instr.output_effects)
- elif macro := self.macro_instrs.get(name):
- cache, input, output = 0, 0, 0
- for part in macro.parts:
+ elif mac := self.macro_instrs.get(name):
+ cache = mac.cache_offset
+ input, output = 0, 0
+ for part in mac.parts:
if isinstance(part, Component):
- cache += part.instr.cache_offset
# A component may pop what the previous component pushed,
# so we offset the input/output counts by that.
delta_i = len(part.instr.input_effects)
offset = min(delta_i, output)
input += delta_i - offset
output += delta_o - offset
- else:
- assert isinstance(part, parser.CacheEffect), part
- cache += part.size
else:
assert False, f"Unknown instruction {name!r}"
return cache, input, output
stack, initial_sp = self.stack_analysis(components)
sp = initial_sp
parts: list[Component | parser.CacheEffect] = []
- format = "IB"
flags = InstructionFlags.newEmpty()
- cache = "C"
+ offset = 0
for component in components:
match component:
case parser.CacheEffect() as ceffect:
parts.append(ceffect)
- for _ in range(ceffect.size):
- format += cache
- cache = "0"
+ offset += ceffect.size
case Instruction() as instr:
- part, sp = self.analyze_instruction(instr, stack, sp)
+ part, sp, offset = self.analyze_instruction(instr, stack, sp, offset)
parts.append(part)
- for ce in instr.cache_effects:
- for _ in range(ce.size):
- format += cache
- cache = "0"
flags.add(instr.instr_flags)
case _:
typing.assert_never(component)
final_sp = sp
+ format = "IB"
+ if offset:
+ format += "C" + "0"*(offset-1)
return MacroInstruction(
- macro.name, stack, initial_sp, final_sp, format, flags, macro, parts
+ macro.name, stack, initial_sp, final_sp, format, flags, macro, parts, offset
)
def analyze_pseudo(self, pseudo: parser.Pseudo) -> PseudoInstruction:
return PseudoInstruction(pseudo.name, targets, fmts[0], targets[0].instr_flags)
def analyze_instruction(
- self, instr: Instruction, stack: list[StackEffect], sp: int
- ) -> tuple[Component, int]:
+ self, instr: Instruction, stack: list[StackEffect], sp: int, offset: int
+ ) -> tuple[Component, int, int]:
input_mapping: StackEffectMapping = []
for ieffect in reversed(instr.input_effects):
sp -= 1
for oeffect in instr.output_effects:
output_mapping.append((stack[sp], oeffect))
sp += 1
- return Component(instr, input_mapping, output_mapping), sp
+ active_effects: list[ActiveCacheEffect] = []
+ for ceffect in instr.cache_effects:
+ if ceffect.name != UNUSED:
+ active_effects.append(ActiveCacheEffect(ceffect, offset))
+ offset += ceffect.size
+ return Component(instr, input_mapping, output_mapping, active_effects), sp, offset
def check_macro_components(
self, macro: parser.Macro
def get_stack_effect_info(
self, thing: parser.InstDef | parser.Macro | parser.Pseudo
- ) -> tuple[AnyInstruction | None, str, str]:
+ ) -> tuple[AnyInstruction | None, str | None, str | None]:
def effect_str(effects: list[StackEffect]) -> str:
n_effect, sym_effect = list_effect_size(effects)
if sym_effect:
continue
instr, popped, pushed = self.get_stack_effect_info(thing)
if instr is not None:
+ assert popped is not None and pushed is not None
popped_data.append((instr, popped))
pushed_data.append((instr, pushed))
self.write_pseudo_instrs()
- self.write_uop_defines()
+ self.out.emit("")
+ self.write_uop_items(lambda name, counter: f"#define {name} {counter}")
self.write_stack_effect_functions()
self.out.emit("#ifndef NEED_OPCODE_METADATA")
self.out.emit("extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];")
self.out.emit("extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];")
+ self.out.emit("#ifdef Py_DEBUG")
+ self.out.emit("extern const char * const _PyOpcode_uop_name[512];")
+ self.out.emit("#endif")
self.out.emit("#else")
self.out.emit("const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {")
pass
case parser.InstDef(name=name):
instr = self.instrs[name]
+ # Since an 'op' is not a bytecode, it has no expansion
if instr.kind != "op" and instr.is_viable_uop():
+ # Double check there aren't any used cache effects.
+ # If this fails, see write_macro_expansions().
+ assert not instr.active_caches, (instr.name, instr.cache_effects)
self.out.emit(
f"[{name}] = "
f"{{ .nuops = 1, .uops = {{ {{ {name}, 0, 0 }} }} }},"
)
case parser.Macro():
- # TODO: emit expansion if all parts are viable uops
- pass
+ self.write_macro_expansions(self.macro_instrs[thing.name])
case parser.Pseudo():
pass
case _:
typing.assert_never(thing)
+ self.out.emit("#ifdef Py_DEBUG")
+ with self.out.block("const char * const _PyOpcode_uop_name[512] =", ";"):
+ self.write_uop_items(lambda name, counter: f"[{counter}] = \"{name}\",")
+ self.out.emit("#endif")
+
self.out.emit("#endif")
with open(self.pymetadata_filename, "w") as f:
self.out.emit(f" ((OP) == {op}) || \\")
self.out.emit(f" 0")
- def write_uop_defines(self) -> None:
+ def write_uop_items(self, make_text: typing.Callable[[str, int], str]) -> None:
"""Write '#define XXX NNN' for each uop"""
- self.out.emit("")
- counter = 300
+ counter = 300 # TODO: Avoid collision with pseudo instructions
def add(name: str) -> None:
nonlocal counter
- self.out.emit(f"#define {name} {counter}")
+ self.out.emit(make_text(name, counter))
counter += 1
add("EXIT_TRACE")
add("SET_IP")
if instr.kind == "op" and instr.is_viable_uop():
add(instr.name)
+ def write_macro_expansions(self, mac: MacroInstruction) -> None:
+ """Write the macro expansions for a macro-instruction."""
+ # TODO: Refactor to share code with write_cody(), is_viaible_uop(), etc.
+ offset = 0 # Cache effect offset
+ expansions: list[tuple[str, int, int]] = [] # [(name, size, offset), ...]
+ for part in mac.parts:
+ if isinstance(part, Component):
+ # All component instructions must be viable uops
+ if not part.instr.is_viable_uop():
+ print(f"NOTE: Part {part.instr.name} of {mac.name} is not a viable uop")
+ return
+ if part.instr.instr_flags.HAS_ARG_FLAG or not part.active_caches:
+ size, offset = 0, 0
+ else:
+ # If this assert triggers, is_viable_uops() lied
+ assert len(part.active_caches) == 1, (mac.name, part.instr.name)
+ cache = part.active_caches[0]
+ size, offset = cache.effect.size, cache.offset
+ expansions.append((part.instr.name, size, offset))
+ assert len(expansions) > 0, f"Macro {mac.name} has empty expansion?!"
+ pieces = [f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions]
+ self.out.emit(
+ f"[{mac.name}] = "
+ f"{{ .nuops = {len(expansions)}, .uops = {{ {', '.join(pieces)} }} }},"
+ )
+
def emit_metadata_entry(
self, name: str, fmt: str, flags: InstructionFlags
) -> None:
for thing in self.everything:
match thing:
case OverriddenInstructionPlaceHolder():
+ # TODO: Is this helpful?
self.write_overridden_instr_place_holder(thing)
case parser.InstDef():
instr = self.instrs[thing.name]
instr.write(self.out, tier=TIER_TWO)
self.out.emit("break;")
case parser.Macro():
- pass # TODO
+ pass
case parser.Pseudo():
pass
case _:
cache_adjust += size
case Component() as comp:
last_instr = comp.instr
- comp.write_body(self.out, cache_adjust)
+ comp.write_body(self.out)
cache_adjust += comp.instr.cache_offset
if cache_adjust: