]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-104584: Move super-instruction special-casing to generator (#106500)
authorGuido van Rossum <guido@python.org>
Fri, 7 Jul 2023 17:42:10 +0000 (10:42 -0700)
committerGitHub <noreply@github.com>
Fri, 7 Jul 2023 17:42:10 +0000 (17:42 +0000)
Instead of special-casing specific instructions,
we add a few more special values to the 'size' field of expansions,
so in the future we can automatically handle
additional super-instructions in the generator.

Python/opcode_metadata.h
Python/optimizer.c
Tools/cases_generator/generate_cases.py

index ac86a4abd9c1b380f56180c454bbd7cd2f5f9ef7..d29f7216ea65e93ea7b62dbcffd172e33fc9d432 100644 (file)
@@ -934,6 +934,12 @@ struct opcode_macro_expansion {
     struct { int16_t uop; int8_t size; int8_t offset; } uops[8];
 };
 
+#define OPARG_FULL 0
+#define OPARG_CACHE_1 1
+#define OPARG_CACHE_2 2
+#define OPARG_CACHE_4 4
+#define OPARG_TOP 5
+#define OPARG_BOTTOM 6
 
 #define OPCODE_METADATA_FMT(OP) (_PyOpcode_opcode_metadata[(OP)].instr_format)
 #define SAME_OPCODE_METADATA(OP1, OP2) \
@@ -1165,8 +1171,11 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
     [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { LOAD_FAST_CHECK, 0, 0 } } },
     [LOAD_FAST] = { .nuops = 1, .uops = { { LOAD_FAST, 0, 0 } } },
     [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { LOAD_FAST_AND_CLEAR, 0, 0 } } },
+    [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { LOAD_FAST, 5, 0 }, { LOAD_FAST, 6, 0 } } },
     [LOAD_CONST] = { .nuops = 1, .uops = { { LOAD_CONST, 0, 0 } } },
     [STORE_FAST] = { .nuops = 1, .uops = { { STORE_FAST, 0, 0 } } },
+    [STORE_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { STORE_FAST, 5, 0 }, { LOAD_FAST, 6, 0 } } },
+    [STORE_FAST_STORE_FAST] = { .nuops = 2, .uops = { { STORE_FAST, 5, 0 }, { STORE_FAST, 6, 0 } } },
     [POP_TOP] = { .nuops = 1, .uops = { { POP_TOP, 0, 0 } } },
     [PUSH_NULL] = { .nuops = 1, .uops = { { PUSH_NULL, 0, 0 } } },
     [END_FOR] = { .nuops = 2, .uops = { { POP_TOP, 0, 0 }, { POP_TOP, 0, 0 } } },
index db117bb180c1c8eefd497cd5f63f218d5d6b7a49..2870f2fd05052ec076df404066b6cc3fdcae9351 100644 (file)
@@ -411,44 +411,15 @@ translate_bytecode_to_trace(
     for (;;) {
         ADD_TO_TRACE(SAVE_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
         int opcode = instr->op.code;
-        uint64_t operand = instr->op.arg;
+        int oparg = instr->op.arg;
         int extras = 0;
         while (opcode == EXTENDED_ARG) {
             instr++;
             extras += 1;
             opcode = instr->op.code;
-            operand = (operand << 8) | instr->op.arg;
+            oparg = (oparg << 8) | instr->op.arg;
         }
         switch (opcode) {
-            case LOAD_FAST_LOAD_FAST:
-            case STORE_FAST_LOAD_FAST:
-            case STORE_FAST_STORE_FAST:
-            {
-                // Reserve space for two uops (+ SAVE_IP + EXIT_TRACE)
-                if (trace_length + 4 > max_length) {
-                    DPRINTF(1, "Ran out of space for LOAD_FAST_LOAD_FAST\n");
-                    goto done;
-                }
-                uint64_t oparg1 = operand >> 4;
-                uint64_t oparg2 = operand & 15;
-                switch (opcode) {
-                    case LOAD_FAST_LOAD_FAST:
-                        ADD_TO_TRACE(LOAD_FAST, oparg1);
-                        ADD_TO_TRACE(LOAD_FAST, oparg2);
-                        break;
-                    case STORE_FAST_LOAD_FAST:
-                        ADD_TO_TRACE(STORE_FAST, oparg1);
-                        ADD_TO_TRACE(LOAD_FAST, oparg2);
-                        break;
-                    case STORE_FAST_STORE_FAST:
-                        ADD_TO_TRACE(STORE_FAST, oparg1);
-                        ADD_TO_TRACE(STORE_FAST, oparg2);
-                        break;
-                    default:
-                        Py_FatalError("Missing case");
-                }
-                break;
-            }
             default:
             {
                 const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
@@ -462,9 +433,11 @@ translate_bytecode_to_trace(
                         goto done;
                     }
                     for (int i = 0; i < nuops; i++) {
+                        uint64_t operand;
                         int offset = expansion->uops[i].offset;
                         switch (expansion->uops[i].size) {
-                            case 0:
+                            case OPARG_FULL:
+                                operand = oparg;
                                 if (extras && OPCODE_HAS_JUMP(opcode)) {
                                     if (opcode == JUMP_BACKWARD_NO_INTERRUPT) {
                                         operand -= extras;
@@ -475,19 +448,25 @@ translate_bytecode_to_trace(
                                     }
                                 }
                                 break;
-                            case 1:
+                            case OPARG_CACHE_1:
                                 operand = read_u16(&instr[offset].cache);
                                 break;
-                            case 2:
+                            case OPARG_CACHE_2:
                                 operand = read_u32(&instr[offset].cache);
                                 break;
-                            case 4:
+                            case OPARG_CACHE_4:
                                 operand = read_u64(&instr[offset].cache);
                                 break;
+                            case OPARG_TOP:  // First half of super-instr
+                                operand = oparg >> 4;
+                                break;
+                            case OPARG_BOTTOM:  // Second half of super-instr
+                                operand = oparg & 0xF;
+                                break;
                             default:
                                 fprintf(stderr,
-                                        "opcode=%d, operand=%" PRIu64 "; nuops=%d, i=%d; size=%d, offset=%d\n",
-                                        opcode, operand, nuops, i,
+                                        "opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n",
+                                        opcode, oparg, nuops, i,
                                         expansion->uops[i].size,
                                         expansion->uops[i].offset);
                                 Py_FatalError("garbled expansion");
index a90abfe20c17398499129641e93b0951661010dc..632834ce2316642bc35102f03f4fd5f8a3c39a82 100644 (file)
@@ -40,6 +40,17 @@ RE_PREDICTED = (
 UNUSED = "unused"
 BITS_PER_CODE_UNIT = 16
 
+# Constants used instead of size for macro expansions.
+# Note: 1, 2, 4 must match actual cache entry sizes.
+OPARG_SIZES = {
+    "OPARG_FULL": 0,
+    "OPARG_CACHE_1": 1,
+    "OPARG_CACHE_2": 2,
+    "OPARG_CACHE_4": 4,
+    "OPARG_TOP": 5,
+    "OPARG_BOTTOM": 6,
+}
+
 RESERVED_WORDS = {
     "co_consts" : "Use FRAME_CO_CONSTS.",
     "co_names": "Use FRAME_CO_NAMES.",
@@ -1213,7 +1224,10 @@ class Analyzer:
                 self.out.emit("struct { int16_t uop; int8_t size; int8_t offset; } uops[8];")
             self.out.emit("")
 
+            for key, value in OPARG_SIZES.items():
+                self.out.emit(f"#define {key} {value}")
             self.out.emit("")
+
             self.out.emit("#define OPCODE_METADATA_FMT(OP) "
                           "(_PyOpcode_opcode_metadata[(OP)].instr_format)")
             self.out.emit("#define SAME_OPCODE_METADATA(OP1, OP2) \\")
@@ -1263,6 +1277,9 @@ class Analyzer:
                                 # Construct a dummy Component -- input/output mappings are not used
                                 part = Component(instr, [], [], instr.active_caches)
                                 self.write_macro_expansions(instr.name, [part])
+                            elif instr.kind == "inst" and variable_used(instr.inst, "oparg1"):
+                                assert variable_used(instr.inst, "oparg2"), "Half super-instr?"
+                                self.write_super_expansions(instr.name)
                         case parser.Macro():
                             mac = self.macro_instrs[thing.name]
                             self.write_macro_expansions(mac.name, mac.parts)
@@ -1342,7 +1359,7 @@ class Analyzer:
                     print(f"NOTE: Part {part.instr.name} of {name} is not a viable uop")
                     return
                 if part.instr.instr_flags.HAS_ARG_FLAG or not part.active_caches:
-                    size, offset = 0, 0
+                    size, offset = OPARG_SIZES["OPARG_FULL"], 0
                 else:
                     # If this assert triggers, is_viable_uops() lied
                     assert len(part.active_caches) == 1, (name, part.instr.name)
@@ -1350,10 +1367,50 @@ class Analyzer:
                     size, offset = cache.effect.size, cache.offset
                 expansions.append((part.instr.name, size, offset))
         assert len(expansions) > 0, f"Macro {name} has empty expansion?!"
+        self.write_expansions(name, expansions)
+
+    def write_super_expansions(self, name: str) -> None:
+        """Write special macro expansions for super-instructions.
+
+        If you get an assertion failure here, you probably have accidentally
+        violated one of the assumptions here.
+
+        - A super-instruction's name is of the form FIRST_SECOND where
+          FIRST and SECOND are regular instructions whose name has the
+          form FOO_BAR. Thus, there must be exactly 3 underscores.
+          Example: LOAD_CONST_STORE_FAST.
+
+        - A super-instruction's body uses `oparg1 and `oparg2`, and no
+          other instruction's body uses those variable names.
+
+        - A super-instruction has no active (used) cache entries.
+
+        In the expansion, the first instruction's operand is all but the
+        bottom 4 bits of the super-instruction's oparg, and the second
+        instruction's operand is the bottom 4 bits. We use the special
+        size codes OPARG_TOP and OPARG_BOTTOM for these.
+        """
+        pieces = name.split("_")
+        assert len(pieces) == 4, f"{name} doesn't look like a super-instr"
+        name1 = "_".join(pieces[:2])
+        name2 = "_".join(pieces[2:])
+        assert name1 in self.instrs, f"{name1} doesn't match any instr"
+        assert name2 in self.instrs, f"{name2} doesn't match any instr"
+        instr1 = self.instrs[name1]
+        instr2 = self.instrs[name2]
+        assert not instr1.active_caches, f"{name1} has active caches"
+        assert not instr2.active_caches, f"{name2} has active caches"
+        expansions = [
+            (name1, OPARG_SIZES["OPARG_TOP"], 0),
+            (name2, OPARG_SIZES["OPARG_BOTTOM"], 0),
+        ]
+        self.write_expansions(name, expansions)
+
+    def write_expansions(self, name: str, expansions: list[tuple[str, int, int]]) -> None:
         pieces = [f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions]
         self.out.emit(
             f"[{name}] = "
-            f"{{ .nuops = {len(expansions)}, .uops = {{ {', '.join(pieces)} }} }},"
+            f"{{ .nuops = {len(pieces)}, .uops = {{ {', '.join(pieces)} }} }},"
         )
 
     def emit_metadata_entry(