]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-130887: Always remove trailing jumps in AArch64 JIT stencils (GH-131042)
authorDiego Russo <diego.russo@arm.com>
Tue, 25 Mar 2025 17:15:36 +0000 (17:15 +0000)
committerGitHub <noreply@github.com>
Tue, 25 Mar 2025 17:15:36 +0000 (10:15 -0700)
Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst [new file with mode: 0644]
Tools/jit/_stencils.py
Tools/jit/_targets.py

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst
new file mode 100644 (file)
index 0000000..daf9c8e
--- /dev/null
@@ -0,0 +1 @@
+Optimize the AArch64 code generation for the JIT. Patch by Diego Russo
index 4ddbe967438bd138bab2d3c3557b8259593a02be..8faa9e8cac2d855b3048cdb1bfdc76fd4bbf0e30 100644 (file)
@@ -209,7 +209,24 @@ class Stencil:
             self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
         self.body.extend([0] * padding)
 
-    def remove_jump(self, *, alignment: int = 1) -> None:
+    def add_nops(self, nop: bytes, alignment: int) -> None:
+        """Add NOPs until there is alignment. Fail if it is not possible."""
+        offset = len(self.body)
+        nop_size = len(nop)
+
+        # Calculate the gap to the next multiple of alignment.
+        gap = -offset % alignment
+        if gap:
+            if gap % nop_size == 0:
+                count = gap // nop_size
+                self.body.extend(nop * count)
+            else:
+                raise ValueError(
+                    f"Cannot add nops of size '{nop_size}' to a body with "
+                    f"offset '{offset}' to align with '{alignment}'"
+                )
+
+    def remove_jump(self) -> None:
         """Remove a zero-length continuation jump, if it exists."""
         hole = max(self.holes, key=lambda hole: hole.offset)
         match hole:
@@ -244,7 +261,7 @@ class Stencil:
                 jump = b"\x00\x00\x00\x14"
             case _:
                 return
-        if self.body[offset:] == jump and offset % alignment == 0:
+        if self.body[offset:] == jump:
             self.body = self.body[:offset]
             self.holes.remove(hole)
 
@@ -266,10 +283,7 @@ class StencilGroup:
     _trampolines: set[int] = dataclasses.field(default_factory=set, init=False)
 
     def process_relocations(
-        self,
-        known_symbols: dict[str, int],
-        *,
-        alignment: int = 1,
+        self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b""
     ) -> None:
         """Fix up all GOT and internal relocations for this stencil group."""
         for hole in self.code.holes.copy():
@@ -289,8 +303,8 @@ class StencilGroup:
                 self._trampolines.add(ordinal)
                 hole.addend = ordinal
                 hole.symbol = None
-        self.code.remove_jump(alignment=alignment)
-        self.code.pad(alignment)
+        self.code.remove_jump()
+        self.code.add_nops(nop=nop, alignment=alignment)
         self.data.pad(8)
         for stencil in [self.code, self.data]:
             for hole in stencil.holes:
index aa2b56abf446b139d43508d3c5140d19ba8513a8..b5a839e07d4dafb359d1fcaf8cca1bc3754d1a99 100644 (file)
@@ -44,6 +44,15 @@ class _Target(typing.Generic[_S, _R]):
     verbose: bool = False
     known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
 
+    def _get_nop(self) -> bytes:
+        if re.fullmatch(r"aarch64-.*", self.triple):
+            nop = b"\x1f\x20\x03\xD5"
+        elif re.fullmatch(r"x86_64-.*|i686.*", self.triple):
+            nop = b"\x90"
+        else:
+            raise ValueError(f"NOP not defined for {self.triple}")
+        return nop
+
     def _compute_digest(self, out: pathlib.Path) -> str:
         hasher = hashlib.sha256()
         hasher.update(self.triple.encode())
@@ -172,7 +181,9 @@ class _Target(typing.Generic[_S, _R]):
         stencil_groups = {task.get_name(): task.result() for task in tasks}
         for stencil_group in stencil_groups.values():
             stencil_group.process_relocations(
-                known_symbols=self.known_symbols, alignment=self.alignment
+                known_symbols=self.known_symbols,
+                alignment=self.alignment,
+                nop=self._get_nop(),
             )
         return stencil_groups