]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-118423: Add `INSTRUCTION_SIZE` macro to code generator (GH-125467)
authorTomas R. <tomas.roun8@gmail.com>
Tue, 29 Oct 2024 17:25:05 +0000 (18:25 +0100)
committerGitHub <noreply@github.com>
Tue, 29 Oct 2024 17:25:05 +0000 (17:25 +0000)
Lib/test/test_generated_cases.py
Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-17-13-12.gh-issue-118423.SkBoda.rst [new file with mode: 0644]
Python/bytecodes.c
Python/executor_cases.c.h
Python/generated_cases.c.h
Tools/cases_generator/analyzer.py
Tools/cases_generator/generators_common.py
Tools/cases_generator/interpreter_definition.md
Tools/cases_generator/tier1_generator.py

index 95813e1e32c7af8f21f2879dd88a7eddefb8b178..173e405b785ddc2bc4d896149295853556f3b2f4 100644 (file)
@@ -1398,6 +1398,37 @@ class TestGeneratedCases(unittest.TestCase):
         with self.assertRaises(SyntaxError):
             self.run_cases_test(input, output)
 
+    def test_instruction_size_macro(self):
+        input = """
+        inst(OP, (--)) {
+            frame->return_offset = INSTRUCTION_SIZE;
+        }
+        """
+
+        output = """
+        TARGET(OP) {
+            frame->instr_ptr = next_instr;
+            next_instr += 1;
+            INSTRUCTION_STATS(OP);
+            frame->return_offset = 1 ;
+            DISPATCH();
+        }
+        """
+        self.run_cases_test(input, output)
+
+        # Two instructions of different sizes referencing the same
+        # uop containing the `INSTRUCTION_SIZE` macro is not allowed.
+        input = """
+        inst(OP, (--)) {
+            frame->return_offset = INSTRUCTION_SIZE;
+        }
+        macro(OP2) = unused/1 + OP;
+        """
+
+        output = ""  # No output needed as this should raise an error.
+        with self.assertRaisesRegex(SyntaxError, "All instructions containing a uop"):
+            self.run_cases_test(input, output)
+
 
 class TestGeneratedAbstractCases(unittest.TestCase):
     def setUp(self) -> None:
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-17-13-12.gh-issue-118423.SkBoda.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-17-13-12.gh-issue-118423.SkBoda.rst
new file mode 100644 (file)
index 0000000..8511a8d
--- /dev/null
@@ -0,0 +1,2 @@
+Add a new ``INSTRUCTION_SIZE`` macro to the cases generator which returns
+the current instruction size.
index b7469c2dd8d42e6e1b87381742021fde988772ce..fa98af12c69aefad2c70ea1069ad39b60d7b8084 100644 (file)
@@ -859,7 +859,7 @@ dummy_func(
             new_frame->localsplus[0] = container;
             new_frame->localsplus[1] = sub;
             INPUTS_DEAD();
-            frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
+            frame->return_offset = INSTRUCTION_SIZE;
         }
 
         macro(BINARY_SUBSCR_GETITEM) =
@@ -1111,8 +1111,8 @@ dummy_func(
                 gen->gi_frame_state = FRAME_EXECUTING;
                 gen->gi_exc_state.previous_item = tstate->exc_info;
                 tstate->exc_info = &gen->gi_exc_state;
-                assert(next_instr - this_instr + oparg <= UINT16_MAX);
-                frame->return_offset = (uint16_t)(next_instr - this_instr + oparg);
+                assert(INSTRUCTION_SIZE + oparg <= UINT16_MAX);
+                frame->return_offset = (uint16_t)(INSTRUCTION_SIZE + oparg);
                 assert(gen_frame->previous == NULL);
                 gen_frame->previous = frame;
                 DISPATCH_INLINED(gen_frame);
@@ -1157,8 +1157,8 @@ dummy_func(
             gen->gi_frame_state = FRAME_EXECUTING;
             gen->gi_exc_state.previous_item = tstate->exc_info;
             tstate->exc_info = &gen->gi_exc_state;
-            assert(1 + INLINE_CACHE_ENTRIES_SEND + oparg <= UINT16_MAX);
-            frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_SEND + oparg);
+            assert(INSTRUCTION_SIZE + oparg <= UINT16_MAX);
+            frame->return_offset = (uint16_t)(INSTRUCTION_SIZE + oparg);
             gen_frame->previous = frame;
         }
 
@@ -2265,7 +2265,7 @@ dummy_func(
             new_frame->localsplus[0] = owner;
             DEAD(owner);
             new_frame->localsplus[1] = PyStackRef_FromPyObjectNew(name);
-            frame->return_offset = (uint16_t)(next_instr - this_instr);
+            frame->return_offset = INSTRUCTION_SIZE;
             DISPATCH_INLINED(new_frame);
         }
 
@@ -3062,7 +3062,7 @@ dummy_func(
             tstate->exc_info = &gen->gi_exc_state;
             gen_frame->previous = frame;
             // oparg is the return offset from the next instruction.
-            frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
+            frame->return_offset = (uint16_t)(INSTRUCTION_SIZE + oparg);
         }
 
         macro(FOR_ITER_GEN) =
@@ -3341,7 +3341,7 @@ dummy_func(
                 if (new_frame == NULL) {
                     ERROR_NO_POP();
                 }
-                frame->return_offset = (uint16_t)(next_instr - this_instr);
+                frame->return_offset = INSTRUCTION_SIZE;
                 DISPATCH_INLINED(new_frame);
             }
             /* Callable is not a normal Python function */
@@ -4205,8 +4205,8 @@ dummy_func(
                 if (new_frame == NULL) {
                     ERROR_NO_POP();
                 }
-                assert(next_instr - this_instr == 1 + INLINE_CACHE_ENTRIES_CALL_KW);
-                frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL_KW;
+                assert(INSTRUCTION_SIZE == 1 + INLINE_CACHE_ENTRIES_CALL_KW);
+                frame->return_offset = INSTRUCTION_SIZE;
                 DISPATCH_INLINED(new_frame);
             }
             /* Callable is not a normal Python function */
@@ -4472,7 +4472,7 @@ dummy_func(
                     if (new_frame == NULL) {
                         ERROR_NO_POP();
                     }
-                    assert(next_instr - this_instr == 1);
+                    assert(INSTRUCTION_SIZE == 1);
                     frame->return_offset = 1;
                     DISPATCH_INLINED(new_frame);
                 }
index 27b7e324ebea373dbd3085a191902971a4e43280..ff4a0a52a0b445110939ac6556ffea0c6d01a2fc 100644 (file)
             new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame);
             new_frame->localsplus[0] = container;
             new_frame->localsplus[1] = sub;
-            frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
+            frame->return_offset = ;
             stack_pointer[-2].bits = (uintptr_t)new_frame;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
             gen->gi_frame_state = FRAME_EXECUTING;
             gen->gi_exc_state.previous_item = tstate->exc_info;
             tstate->exc_info = &gen->gi_exc_state;
-            assert(1 + INLINE_CACHE_ENTRIES_SEND + oparg <= UINT16_MAX);
-            frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_SEND + oparg);
+            assert( 2 + oparg <= UINT16_MAX);
+            frame->return_offset = (uint16_t)( 2 + oparg);
             gen_frame->previous = frame;
             stack_pointer[-1].bits = (uintptr_t)gen_frame;
             break;
             break;
         }
 
-        /* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
+        /* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 because it has unused cache entries */
 
         case _GUARD_DORV_NO_DICT: {
             _PyStackRef owner;
             tstate->exc_info = &gen->gi_exc_state;
             gen_frame->previous = frame;
             // oparg is the return offset from the next instruction.
-            frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
+            frame->return_offset = (uint16_t)( 2 + oparg);
             stack_pointer[0].bits = (uintptr_t)gen_frame;
             stack_pointer += 1;
             assert(WITHIN_STACK_BOUNDS());
index a615f65c4133d3d3496d41278b975f3faf56558d..632cbc7790a4d8895d22a28221cbd27d388a63c7 100644 (file)
                 new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame);
                 new_frame->localsplus[0] = container;
                 new_frame->localsplus[1] = sub;
-                frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
+                frame->return_offset = ;
             }
             // _PUSH_FRAME
             {
                     if (new_frame == NULL) {
                         goto error;
                     }
-                    frame->return_offset = (uint16_t)(next_instr - this_instr);
+                    frame->return_offset = ;
                     DISPATCH_INLINED(new_frame);
                 }
                 /* Callable is not a normal Python function */
                         if (new_frame == NULL) {
                             goto error;
                         }
-                        assert(next_instr - this_instr == 1);
+                        assert( 1 == 1);
                         frame->return_offset = 1;
                         DISPATCH_INLINED(new_frame);
                     }
                     if (new_frame == NULL) {
                         goto error;
                     }
-                    assert(next_instr - this_instr == 1 + INLINE_CACHE_ENTRIES_CALL_KW);
-                    frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL_KW;
+                    assert( 4 == 1 + INLINE_CACHE_ENTRIES_CALL_KW);
+                    frame->return_offset = ;
                     DISPATCH_INLINED(new_frame);
                 }
                 /* Callable is not a normal Python function */
                 tstate->exc_info = &gen->gi_exc_state;
                 gen_frame->previous = frame;
                 // oparg is the return offset from the next instruction.
-                frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
+                frame->return_offset = (uint16_t)( 2 + oparg);
             }
             // _PUSH_FRAME
             {
                     if (new_frame == NULL) {
                         goto error;
                     }
-                    frame->return_offset = (uint16_t)(next_instr - this_instr);
+                    frame->return_offset = ;
                     DISPATCH_INLINED(new_frame);
                 }
                 /* Callable is not a normal Python function */
             STACK_SHRINK(1);
             new_frame->localsplus[0] = owner;
             new_frame->localsplus[1] = PyStackRef_FromPyObjectNew(name);
-            frame->return_offset = (uint16_t)(next_instr - this_instr);
+            frame->return_offset = 10 ;
             DISPATCH_INLINED(new_frame);
         }
 
                     gen->gi_frame_state = FRAME_EXECUTING;
                     gen->gi_exc_state.previous_item = tstate->exc_info;
                     tstate->exc_info = &gen->gi_exc_state;
-                    assert(next_instr - this_instr + oparg <= UINT16_MAX);
-                    frame->return_offset = (uint16_t)(next_instr - this_instr + oparg);
+                    assert( 2 + oparg <= UINT16_MAX);
+                    frame->return_offset = (uint16_t)( 2 + oparg);
                     assert(gen_frame->previous == NULL);
                     gen_frame->previous = frame;
                     DISPATCH_INLINED(gen_frame);
                 gen->gi_frame_state = FRAME_EXECUTING;
                 gen->gi_exc_state.previous_item = tstate->exc_info;
                 tstate->exc_info = &gen->gi_exc_state;
-                assert(1 + INLINE_CACHE_ENTRIES_SEND + oparg <= UINT16_MAX);
-                frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_SEND + oparg);
+                assert( 2 + oparg <= UINT16_MAX);
+                frame->return_offset = (uint16_t)( 2 + oparg);
                 gen_frame->previous = frame;
             }
             // _PUSH_FRAME
index f41a8d161099dfcb631d9fa154580765fa59ab4f..66ead741b87a2bbf6284d33d196b2d368eacf40d 100644 (file)
@@ -173,6 +173,8 @@ class Uop:
     implicitly_created: bool = False
     replicated = 0
     replicates: "Uop | None" = None
+    # Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro
+    instruction_size: int | None = None
 
     def dump(self, indent: str) -> None:
         print(
@@ -1079,6 +1081,35 @@ def assign_opcodes(
     return instmap, len(no_arg), min_instrumented
 
 
+def get_instruction_size_for_uop(instructions: dict[str, Instruction], uop: Uop) -> int | None:
+    """Return the size of the instruction that contains the given uop or
+    `None` if the uop does not contains the `INSTRUCTION_SIZE` macro.
+
+    If there is more than one instruction that contains the uop,
+    ensure that they all have the same size.
+    """
+    for tkn in uop.body:
+          if tkn.text == "INSTRUCTION_SIZE":
+               break
+    else:
+        return None
+
+    size = None
+    for inst in instructions.values():
+        if uop in inst.parts:
+            if size is None:
+                size = inst.size
+            if size != inst.size:
+                raise analysis_error(
+                    "All instructions containing a uop with the `INSTRUCTION_SIZE` macro "
+                    f"must have the same size: {size} != {inst.size}",
+                    tkn
+                )
+    if size is None:
+        raise analysis_error(f"No instruction containing the uop '{uop.name}' was found", tkn)
+    return size
+
+
 def analyze_forest(forest: list[parser.AstNode]) -> Analysis:
     instructions: dict[str, Instruction] = {}
     uops: dict[str, Uop] = {}
@@ -1122,6 +1153,8 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis:
                     continue
                 if target.text in instructions:
                     instructions[target.text].is_target = True
+    for uop in uops.values():
+        uop.instruction_size = get_instruction_size_for_uop(instructions, uop)
     # Special case BINARY_OP_INPLACE_ADD_UNICODE
     # BINARY_OP_INPLACE_ADD_UNICODE is not a normal family member,
     # as it is the wrong size, but we need it to maintain an
index 3b158f5ac4eb481d8d3c321d4b38087cf0187f15..dad2557e97a9482d4df1aa42e38f40e25a9fbe38 100644 (file)
@@ -9,9 +9,9 @@ from analyzer import (
     analysis_error,
 )
 from cwriter import CWriter
-from typing import Callable, Mapping, TextIO, Iterator, Iterable
+from typing import Callable, TextIO, Iterator, Iterable
 from lexer import Token
-from stack import Stack, Local, Storage, StackError
+from stack import Storage, StackError
 
 # Set this to true for voluminous output showing state of stack and locals
 PRINT_STACKS = False
@@ -118,7 +118,8 @@ class Emitter:
             "PyStackRef_CLOSE": self.stackref_close,
             "PyStackRef_CLOSE_SPECIALIZED": self.stackref_close,
             "PyStackRef_AsPyObjectSteal": self.stackref_steal,
-            "DISPATCH": self.dispatch
+            "DISPATCH": self.dispatch,
+            "INSTRUCTION_SIZE": self.instruction_size,
         }
         self.out = out
 
@@ -365,6 +366,19 @@ class Emitter:
         self.emit_reload(storage)
         return True
 
+    def instruction_size(self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: Uop,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        """Replace the INSTRUCTION_SIZE macro with the size of the current instruction."""
+        if uop.instruction_size is None:
+            raise analysis_error("The INSTRUCTION_SIZE macro requires uop.instruction_size to be set", tkn)
+        self.out.emit(f" {uop.instruction_size} ")
+        return True
+
     def _print_storage(self, storage: Storage) -> None:
         if PRINT_STACKS:
             self.out.start_line()
index 6cf36f343d5fa729497f2f152cf2b5641c08fbda..203286834e3e3f69f511b0908db167f30b2b2d9a 100644 (file)
@@ -178,15 +178,17 @@ list of annotations and their meanings are as follows:
 
 ### Special functions/macros
 
-The C code may include special functions that are understood by the tools as
+The C code may include special functions and macros that are understood by the tools as
 part of the DSL.
 
-Those functions include:
+Those include:
 
 * `DEOPT_IF(cond, instruction)`. Deoptimize if `cond` is met.
 * `ERROR_IF(cond, label)`. Jump to error handler at `label` if `cond` is true.
 * `DECREF_INPUTS()`. Generate `Py_DECREF()` calls for the input stack effects.
 * `SYNC_SP()`. Synchronizes the physical stack pointer with the stack effects.
+* `INSTRUCTION_SIZE`. Replaced with the size of the instruction which is equal
+to `1 + INLINE_CACHE_ENTRIES`.
 
 Note that the use of `DECREF_INPUTS()` is optional -- manual calls
 to `Py_DECREF()` or other approaches are also acceptable
index 8dadc5736c88890729891cd83a739a78cce0d691..fcdd3bdacd0e7af1007a41f96a67731e7df7a192 100644 (file)
@@ -22,7 +22,6 @@ from generators_common import (
     write_header,
     type_and_null,
     Emitter,
-    TokenIterator,
 )
 from cwriter import CWriter
 from typing import TextIO