]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-109287: Desugar inst(X) to op(X); macro(X) = X (#109294)
authorGuido van Rossum <guido@python.org>
Fri, 15 Sep 2023 15:39:05 +0000 (08:39 -0700)
committerGitHub <noreply@github.com>
Fri, 15 Sep 2023 15:39:05 +0000 (08:39 -0700)
This makes the internal representation in the code generator simpler: there's a list of ops, and a list of macros, and there's no special-casing needed for ops that aren't macros. (There's now special-casing for ops that are also macros, but that's simpler.)

Include/internal/pycore_opcode_metadata.h
Tools/cases_generator/analysis.py
Tools/cases_generator/generate_cases.py
Tools/cases_generator/instructions.py
Tools/cases_generator/stacking.py

index 856c3acb284b2340d2b892fbcadb0d6e73036773..bb37e9a1d1b6b64c4bc8b0d8100992b8599c3789 100644 (file)
@@ -170,6 +170,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump)  {
             return 2;
         case BINARY_OP_ADD_UNICODE:
             return 2;
+        case _BINARY_OP_INPLACE_ADD_UNICODE:
+            return 2;
         case BINARY_OP_INPLACE_ADD_UNICODE:
             return 2;
         case BINARY_SUBSCR:
@@ -430,6 +432,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump)  {
             return 0;
         case _ITER_CHECK_LIST:
             return 1;
+        case _ITER_JUMP_LIST:
+            return 1;
         case _IS_ITER_EXHAUSTED_LIST:
             return 1;
         case _ITER_NEXT_LIST:
@@ -438,6 +442,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump)  {
             return 1;
         case _ITER_CHECK_TUPLE:
             return 1;
+        case _ITER_JUMP_TUPLE:
+            return 1;
         case _IS_ITER_EXHAUSTED_TUPLE:
             return 1;
         case _ITER_NEXT_TUPLE:
@@ -446,6 +452,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump)  {
             return 1;
         case _ITER_CHECK_RANGE:
             return 1;
+        case _ITER_JUMP_RANGE:
+            return 1;
         case _IS_ITER_EXHAUSTED_RANGE:
             return 1;
         case _ITER_NEXT_RANGE:
@@ -702,6 +710,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump)  {
             return 1;
         case BINARY_OP_ADD_UNICODE:
             return 1;
+        case _BINARY_OP_INPLACE_ADD_UNICODE:
+            return 0;
         case BINARY_OP_INPLACE_ADD_UNICODE:
             return 0;
         case BINARY_SUBSCR:
@@ -962,6 +972,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump)  {
             return 0;
         case _ITER_CHECK_LIST:
             return 1;
+        case _ITER_JUMP_LIST:
+            return 1;
         case _IS_ITER_EXHAUSTED_LIST:
             return 2;
         case _ITER_NEXT_LIST:
@@ -970,6 +982,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump)  {
             return 2;
         case _ITER_CHECK_TUPLE:
             return 1;
+        case _ITER_JUMP_TUPLE:
+            return 1;
         case _IS_ITER_EXHAUSTED_TUPLE:
             return 2;
         case _ITER_NEXT_TUPLE:
@@ -978,6 +992,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump)  {
             return 2;
         case _ITER_CHECK_RANGE:
             return 1;
+        case _ITER_JUMP_RANGE:
+            return 1;
         case _IS_ITER_EXHAUSTED_RANGE:
             return 2;
         case _ITER_NEXT_RANGE:
@@ -1905,11 +1921,11 @@ const uint8_t _PyOpcode_Caches[256] = {
     [COMPARE_OP] = 1,
     [POP_JUMP_IF_FALSE] = 1,
     [POP_JUMP_IF_TRUE] = 1,
+    [POP_JUMP_IF_NONE] = 1,
+    [POP_JUMP_IF_NOT_NONE] = 1,
     [FOR_ITER] = 1,
     [CALL] = 3,
     [BINARY_OP] = 1,
-    [POP_JUMP_IF_NONE] = 1,
-    [POP_JUMP_IF_NOT_NONE] = 1,
     [JUMP_BACKWARD] = 1,
 };
 #endif // NEED_OPCODE_METADATA
index 9e0124bd90878ef758ecbdf4da6792458bd93fdb..b920c0aa8c1c8af9761da18aa948201b3a131216 100644 (file)
@@ -94,13 +94,8 @@ class Analyzer:
             self.parse_file(filename, instrs_idx)
 
         files = " + ".join(self.input_filenames)
-        n_instrs = 0
-        n_ops = 0
-        for instr in self.instrs.values():
-            if instr.kind == "op":
-                n_ops += 1
-            else:
-                n_instrs += 1
+        n_instrs = len(set(self.instrs) & set(self.macros))
+        n_ops = len(self.instrs) - n_instrs
         print(
             f"Read {n_instrs} instructions, {n_ops} ops, "
             f"{len(self.macros)} macros, {len(self.pseudos)} pseudos, "
@@ -145,6 +140,9 @@ class Analyzer:
 
             match thing:
                 case parsing.InstDef(name=name):
+                    macro: parsing.Macro | None = None
+                    if thing.kind == "inst":
+                        macro = parsing.Macro(name, [parsing.OpName(name)])
                     if name in self.instrs:
                         if not thing.override:
                             raise psr.make_syntax_error(
@@ -152,9 +150,12 @@ class Analyzer:
                                 f"previous definition @ {self.instrs[name].inst.context}",
                                 thing_first_token,
                             )
-                        self.everything[
-                            instrs_idx[name]
-                        ] = OverriddenInstructionPlaceHolder(name=name)
+                        placeholder = OverriddenInstructionPlaceHolder(name=name)
+                        self.everything[instrs_idx[name]] = placeholder
+                        if macro is not None:
+                            self.warning(
+                                f"Overriding desugared {macro.name} may not work", thing
+                            )
                     if name not in self.instrs and thing.override:
                         raise psr.make_syntax_error(
                             f"Definition of '{name}' @ {thing.context} is supposed to be "
@@ -164,6 +165,9 @@ class Analyzer:
                     self.instrs[name] = Instruction(thing)
                     instrs_idx[name] = len(self.everything)
                     self.everything.append(thing)
+                    if macro is not None:
+                        self.macros[macro.name] = macro
+                        self.everything.append(macro)
                 case parsing.Macro(name):
                     self.macros[name] = thing
                     self.everything.append(thing)
@@ -197,9 +201,9 @@ class Analyzer:
             for target in targets:
                 if target_instr := self.instrs.get(target):
                     target_instr.predicted = True
-                elif target_macro := self.macro_instrs.get(target):
+                if target_macro := self.macro_instrs.get(target):
                     target_macro.predicted = True
-                else:
+                if not target_instr and not target_macro:
                     self.error(
                         f"Unknown instruction {target!r} predicted in {instr.name!r}",
                         instr.inst,  # TODO: Use better location
@@ -263,11 +267,7 @@ class Analyzer:
                     )
 
     def effect_counts(self, name: str) -> tuple[int, int, int]:
-        if instr := self.instrs.get(name):
-            cache = instr.cache_offset
-            input = len(instr.input_effects)
-            output = len(instr.output_effects)
-        elif mac := self.macro_instrs.get(name):
+        if mac := self.macro_instrs.get(name):
             cache = mac.cache_offset
             input, output = 0, 0
             for part in mac.parts:
@@ -407,7 +407,8 @@ class Analyzer:
                 case parsing.OpName(name):
                     if name not in self.instrs:
                         self.error(f"Unknown instruction {name!r}", macro)
-                    components.append(self.instrs[name])
+                    else:
+                        components.append(self.instrs[name])
                 case parsing.CacheEffect():
                     components.append(uop)
                 case _:
index 3a738da6c05a51bf32471717847abd70f0492855..5ddcd6ef7bf2740005dd78f78f76ef4f2bb2cbc3 100644 (file)
@@ -160,14 +160,9 @@ class Generator(Analyzer):
         pushed: str | None = None
         match thing:
             case parsing.InstDef():
-                if thing.kind != "op" or self.instrs[thing.name].is_viable_uop():
-                    instr = self.instrs[thing.name]
-                    popped = effect_str(instr.input_effects)
-                    pushed = effect_str(instr.output_effects)
-                else:
-                    instr = None
-                    popped = ""
-                    pushed = ""
+                instr = self.instrs[thing.name]
+                popped = effect_str(instr.input_effects)
+                pushed = effect_str(instr.output_effects)
             case parsing.Macro():
                 instr = self.macro_instrs[thing.name]
                 popped, pushed = stacking.get_stack_effect_info_for_macro(instr)
@@ -208,6 +203,8 @@ class Generator(Analyzer):
         for thing in self.everything:
             if isinstance(thing, OverriddenInstructionPlaceHolder):
                 continue
+            if isinstance(thing, parsing.Macro) and thing.name in self.instrs:
+                continue
             instr, popped, pushed = self.get_stack_effect_info(thing)
             if instr is not None:
                 popped_data.append((instr, popped))
@@ -255,15 +252,11 @@ class Generator(Analyzer):
         ops: list[tuple[bool, str]] = []  # (has_arg, name) for each opcode
         instrumented_ops: list[str] = []
 
-        specialized_ops = set()
+        specialized_ops: set[str] = set()
         for name, family in self.families.items():
             specialized_ops.update(family.members)
 
-        for instr in itertools.chain(
-            [instr for instr in self.instrs.values() if instr.kind != "op"],
-            self.macro_instrs.values(),
-        ):
-            assert isinstance(instr, (Instruction, MacroInstruction, PseudoInstruction))
+        for instr in self.macro_instrs.values():
             name = instr.name
             if name in specialized_ops:
                 continue
@@ -320,7 +313,7 @@ class Generator(Analyzer):
             while opname[next_opcode] is not None:
                 next_opcode += 1
 
-        assert next_opcode < min_internal
+        assert next_opcode < min_internal, next_opcode
 
         for i, op in enumerate(sorted(specialized_ops)):
             map_op(min_internal + i, op)
@@ -421,13 +414,12 @@ class Generator(Analyzer):
 
             self.write_provenance_header()
 
-            self.out.emit("\n" + textwrap.dedent("""
-                #ifndef Py_BUILD_CORE
-                #  error "this header requires Py_BUILD_CORE define"
-                #endif
-            """).strip())
-
-            self.out.emit("\n#include <stdbool.h>              // bool")
+            self.out.emit("")
+            self.out.emit("#ifndef Py_BUILD_CORE")
+            self.out.emit('#  error "this header requires Py_BUILD_CORE define"')
+            self.out.emit("#endif")
+            self.out.emit("")
+            self.out.emit("#include <stdbool.h>              // bool")
 
             self.write_pseudo_instrs()
 
@@ -498,7 +490,10 @@ class Generator(Analyzer):
                         case parsing.InstDef():
                             self.write_metadata_for_inst(self.instrs[thing.name])
                         case parsing.Macro():
-                            self.write_metadata_for_macro(self.macro_instrs[thing.name])
+                            if thing.name not in self.instrs:
+                                self.write_metadata_for_macro(
+                                    self.macro_instrs[thing.name]
+                                )
                         case parsing.Pseudo():
                             self.write_metadata_for_pseudo(
                                 self.pseudo_instrs[thing.name]
@@ -513,35 +508,14 @@ class Generator(Analyzer):
                 ";",
             ):
                 # Write macro expansion for each non-pseudo instruction
-                for thing in self.everything:
-                    match thing:
-                        case OverriddenInstructionPlaceHolder():
-                            pass
-                        case parsing.InstDef(name=name):
-                            instr = self.instrs[name]
-                            # Since an 'op' is not a bytecode, it has no expansion; but 'inst' is
-                            if instr.kind == "inst" and instr.is_viable_uop():
-                                # Construct a dummy Component -- input/output mappings are not used
-                                part = Component(instr, instr.active_caches)
-                                self.write_macro_expansions(
-                                    instr.name, [part], instr.cache_offset
-                                )
-                            elif instr.kind == "inst" and variable_used(
-                                instr.inst, "oparg1"
-                            ):
-                                assert variable_used(
-                                    instr.inst, "oparg2"
-                                ), "Half super-instr?"
-                                self.write_super_expansions(instr.name)
-                        case parsing.Macro():
-                            mac = self.macro_instrs[thing.name]
-                            self.write_macro_expansions(
-                                mac.name, mac.parts, mac.cache_offset
-                            )
-                        case parsing.Pseudo():
-                            pass
-                        case _:
-                            assert_never(thing)
+                for mac in self.macro_instrs.values():
+                    if is_super_instruction(mac):
+                        # Special-case the heck out of super-instructions
+                        self.write_super_expansions(mac.name)
+                    else:
+                        self.write_macro_expansions(
+                            mac.name, mac.parts, mac.cache_offset
+                        )
 
             with self.metadata_item(
                 "const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE]", "=", ";"
@@ -564,19 +538,15 @@ class Generator(Analyzer):
                 family_member_names: set[str] = set()
                 for family in self.families.values():
                     family_member_names.update(family.members)
-                for instr in self.instrs.values():
+                for mac in self.macro_instrs.values():
                     if (
-                        instr.name not in family_member_names
-                        and instr.cache_offset > 0
-                        and instr.kind == "inst"
-                        and not instr.name.startswith("INSTRUMENTED_")
+                        mac.cache_offset > 0
+                        and mac.name not in family_member_names
+                        and not mac.name.startswith("INSTRUMENTED_")
                     ):
-                        self.out.emit(f"[{instr.name}] = {instr.cache_offset},")
-                for mac in self.macro_instrs.values():
-                    if mac.name not in family_member_names and mac.cache_offset > 0:
                         self.out.emit(f"[{mac.name}] = {mac.cache_offset},")
                 # Irregular case:
-                self.out.emit('[JUMP_BACKWARD] = 1,')
+                self.out.emit("[JUMP_BACKWARD] = 1,")
 
             deoptcodes = {}
             for name, op in self.opmap.items():
@@ -674,7 +644,8 @@ class Generator(Analyzer):
         add("_SET_IP")
 
         for instr in self.instrs.values():
-            if instr.kind == "op":
+            # Skip ops that are also macros -- those are desugared inst()s
+            if instr.name not in self.macros:
                 add(instr.name)
 
     def write_macro_expansions(
@@ -693,10 +664,11 @@ class Generator(Analyzer):
                     # It is sometimes emitted for macros that have a
                     # manual translation in translate_bytecode_to_trace()
                     # in Python/optimizer.c.
-                    self.note(
-                        f"Part {part.instr.name} of {name} is not a viable uop",
-                        part.instr.inst,
-                    )
+                    if len(parts) > 1 or part.instr.name != name:
+                        self.note(
+                            f"Part {part.instr.name} of {name} is not a viable uop",
+                            part.instr.inst,
+                        )
                     return
                 if not part.active_caches:
                     if part.instr.name == "_SET_IP":
@@ -792,31 +764,26 @@ class Generator(Analyzer):
             self.write_provenance_header()
 
             # Write and count instructions of all kinds
-            n_instrs = 0
             n_macros = 0
             for thing in self.everything:
                 match thing:
                     case OverriddenInstructionPlaceHolder():
                         self.write_overridden_instr_place_holder(thing)
                     case parsing.InstDef():
-                        if thing.kind != "op":
-                            n_instrs += 1
-                            self.write_instr(self.instrs[thing.name])
+                        pass
                     case parsing.Macro():
                         n_macros += 1
                         mac = self.macro_instrs[thing.name]
                         stacking.write_macro_instr(
                             mac, self.out, self.families.get(mac.name)
                         )
-                        # self.write_macro(self.macro_instrs[thing.name])
                     case parsing.Pseudo():
                         pass
                     case _:
                         assert_never(thing)
 
         print(
-            f"Wrote {n_instrs} instructions and {n_macros} macros "
-            f"to {output_filename}",
+            f"Wrote {n_macros} cases to {output_filename}",
             file=sys.stderr,
         )
 
@@ -824,41 +791,21 @@ class Generator(Analyzer):
         self, executor_filename: str, emit_line_directives: bool
     ) -> None:
         """Generate cases for the Tier 2 interpreter."""
-        n_instrs = 0
         n_uops = 0
         with open(executor_filename, "w") as f:
             self.out = Formatter(f, 8, emit_line_directives)
             self.write_provenance_header()
-            for thing in self.everything:
-                match thing:
-                    case OverriddenInstructionPlaceHolder():
-                        # TODO: Is this helpful?
-                        self.write_overridden_instr_place_holder(thing)
-                    case parsing.InstDef():
-                        instr = self.instrs[thing.name]
-                        if instr.is_viable_uop():
-                            if instr.kind == "op":
-                                n_uops += 1
-                            else:
-                                n_instrs += 1
-                            self.out.emit("")
-                            with self.out.block(f"case {thing.name}:"):
-                                stacking.write_single_instr(
-                                    instr, self.out, tier=TIER_TWO
-                                )
-                                if instr.check_eval_breaker:
-                                    self.out.emit("CHECK_EVAL_BREAKER();")
-                                self.out.emit("break;")
-                        # elif instr.kind != "op":
-                        #     print(f"NOTE: {thing.name} is not a viable uop")
-                    case parsing.Macro():
-                        pass
-                    case parsing.Pseudo():
-                        pass
-                    case _:
-                        assert_never(thing)
+            for instr in self.instrs.values():
+                if instr.is_viable_uop():
+                    n_uops += 1
+                    self.out.emit("")
+                    with self.out.block(f"case {instr.name}:"):
+                        stacking.write_single_instr(instr, self.out, tier=TIER_TWO)
+                        if instr.check_eval_breaker:
+                            self.out.emit("CHECK_EVAL_BREAKER();")
+                        self.out.emit("break;")
         print(
-            f"Wrote {n_instrs} instructions and {n_uops} ops to {executor_filename}",
+            f"Wrote {n_uops} cases to {executor_filename}",
             file=sys.stderr,
         )
 
@@ -869,26 +816,16 @@ class Generator(Analyzer):
         with open(abstract_interpreter_filename, "w") as f:
             self.out = Formatter(f, 8, emit_line_directives)
             self.write_provenance_header()
-            for thing in self.everything:
-                match thing:
-                    case OverriddenInstructionPlaceHolder():
-                        pass
-                    case parsing.InstDef():
-                        instr = AbstractInstruction(self.instrs[thing.name].inst)
-                        if (
-                            instr.is_viable_uop()
-                            and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR
-                        ):
-                            self.out.emit("")
-                            with self.out.block(f"case {thing.name}:"):
-                                instr.write(self.out, tier=TIER_TWO)
-                                self.out.emit("break;")
-                    case parsing.Macro():
-                        pass
-                    case parsing.Pseudo():
-                        pass
-                    case _:
-                        assert_never(thing)
+            for instr in self.instrs.values():
+                instr = AbstractInstruction(instr.inst)
+                if (
+                    instr.is_viable_uop()
+                    and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR
+                ):
+                    self.out.emit("")
+                    with self.out.block(f"case {instr.name}:"):
+                        instr.write(self.out, tier=TIER_TWO)
+                        self.out.emit("break;")
         print(
             f"Wrote some stuff to {abstract_interpreter_filename}",
             file=sys.stderr,
@@ -902,24 +839,17 @@ class Generator(Analyzer):
             f"{self.out.comment} TARGET({place_holder.name}) overridden by later definition"
         )
 
-    def write_instr(self, instr: Instruction) -> None:
-        name = instr.name
-        self.out.emit("")
-        if instr.inst.override:
-            self.out.emit("{self.out.comment} Override")
-        with self.out.block(f"TARGET({name})"):
-            if instr.predicted:
-                self.out.emit(f"PREDICTED({name});")
-            self.out.static_assert_family_size(
-                instr.name, instr.family, instr.cache_offset
-            )
-            stacking.write_single_instr(instr, self.out, tier=TIER_ONE)
-            if not instr.always_exits:
-                if instr.cache_offset:
-                    self.out.emit(f"next_instr += {instr.cache_offset};")
-                if instr.check_eval_breaker:
-                    self.out.emit("CHECK_EVAL_BREAKER();")
-                self.out.emit(f"DISPATCH();")
+
+def is_super_instruction(mac: MacroInstruction) -> bool:
+    if (
+        len(mac.parts) == 1
+        and isinstance(mac.parts[0], Component)
+        and variable_used(mac.parts[0].instr.inst, "oparg1")
+    ):
+        assert variable_used(mac.parts[0].instr.inst, "oparg2")
+        return True
+    else:
+        return False
 
 
 def main() -> None:
index 78b3c290a49273eabd939fd75ef984c42c4c54c4..6fbf7d93f42fdea5ed897c46db9886725f1d4e74 100644 (file)
@@ -53,7 +53,6 @@ class Instruction:
 
     # Parts of the underlying instruction definition
     inst: parsing.InstDef
-    kind: typing.Literal["inst", "op"]
     name: str
     block: parsing.Block
     block_text: list[str]  # Block.text, less curlies, less PREDICT() calls
@@ -77,7 +76,6 @@ class Instruction:
 
     def __init__(self, inst: parsing.InstDef):
         self.inst = inst
-        self.kind = inst.kind
         self.name = inst.name
         self.block = inst.block
         self.block_text, self.check_eval_breaker, self.block_line = extract_block_text(
index 9cf9ad1a6c9e6d552cd1104b2617e409f03bcf0c..1f9fda66a5f034068da8349e4e507ea4d084d5f0 100644 (file)
@@ -376,6 +376,8 @@ def write_macro_instr(
         if not parts[-1].instr.always_exits:
             if not next_instr_is_set and mac.cache_offset:
                 out.emit(f"next_instr += {mac.cache_offset};")
+            if parts[-1].instr.check_eval_breaker:
+                out.emit("CHECK_EVAL_BREAKER();")
             out.emit("DISPATCH();")