]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-135904: JIT compiler: Support 19 bit branch instructions on AArch64 for Mach-O...
authorMark Shannon <mark@hotpy.org>
Thu, 23 Oct 2025 15:45:57 +0000 (16:45 +0100)
committerGitHub <noreply@github.com>
Thu, 23 Oct 2025 15:45:57 +0000 (16:45 +0100)
* Insert labels into assembly for custom relocation during stencil creation.

Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-11-30-16.gh-issue-135904.3WE5oW.rst [new file with mode: 0644]
Tools/jit/_optimizers.py
Tools/jit/_stencils.py
Tools/jit/_targets.py

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-11-30-16.gh-issue-135904.3WE5oW.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-11-30-16.gh-issue-135904.3WE5oW.rst
new file mode 100644 (file)
index 0000000..b52a57d
--- /dev/null
@@ -0,0 +1,3 @@
+Add special labels to the assembly created during stencil creation to
+support relocations that the native object file format does not support.
+Specifically, 19 bit branches for AArch64 in Mach-O object files.
index 866417398b0ba53d00f7f5bc9b6c7660e6ba9265..0adc550ba5e84cc15b0bc3eddcd34f3ce1ccee9e 100644 (file)
@@ -9,7 +9,7 @@ import typing
 _RE_NEVER_MATCH = re.compile(r"(?!)")
 # Dictionary mapping branch instructions to their inverted branch instructions.
 # If a branch cannot be inverted, the value is None:
-_X86_BRANCHES = {
+_X86_BRANCH_NAMES = {
     # https://www.felixcloutier.com/x86/jcc
     "ja": "jna",
     "jae": "jnae",
@@ -37,7 +37,11 @@ _X86_BRANCHES = {
     "loopz": None,
 }
 # Update with all of the inverted branches, too:
-_X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v}
+_X86_BRANCH_NAMES |= {v: k for k, v in _X86_BRANCH_NAMES.items() if v}
+# No custom relocations needed
+_X86_BRANCHES: dict[str, tuple[str | None, str | None]] = {
+    k: (v, None) for k, v in _X86_BRANCH_NAMES.items()
+}
 
 _AARCH64_COND_CODES = {
     # https://developer.arm.com/documentation/dui0801/b/CJAJIHAD?lang=en
@@ -58,12 +62,15 @@ _AARCH64_COND_CODES = {
     "hi": "ls",
     "ls": "hi",
 }
+# MyPy doesn't understand that a invariant variable can be initialized by a covariant value
+CUSTOM_AARCH64_BRANCH19: str | None = "CUSTOM_AARCH64_BRANCH19"
+
 # Branches are either b.{cond} or bc.{cond}
-_AARCH64_BRANCHES = {
-    "b." + cond: ("b." + inverse if inverse else None)
+_AARCH64_BRANCHES: dict[str, tuple[str | None, str | None]] = {
+    "b." + cond: (("b." + inverse if inverse else None), CUSTOM_AARCH64_BRANCH19)
     for (cond, inverse) in _AARCH64_COND_CODES.items()
 } | {
-    "bc." + cond: ("bc." + inverse if inverse else None)
+    "bc." + cond: (("bc." + inverse if inverse else None), CUSTOM_AARCH64_BRANCH19)
     for (cond, inverse) in _AARCH64_COND_CODES.items()
 }
 
@@ -113,7 +120,8 @@ class Optimizer:
         r'\s*(?P<label>[\w."$?@]+):'
     )
     # Override everything that follows in subclasses:
-    _branches: typing.ClassVar[dict[str, str | None]] = {}
+    _supports_external_relocations = True
+    _branches: typing.ClassVar[dict[str, tuple[str | None, str | None]]] = {}
     # Two groups (instruction and target):
     _re_branch: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH
     # One group (target):
@@ -170,7 +178,10 @@ class Optimizer:
     def _invert_branch(cls, line: str, target: str) -> str | None:
         match = cls._re_branch.match(line)
         assert match
-        inverted = cls._branches.get(match["instruction"])
+        inverted_reloc = cls._branches.get(match["instruction"])
+        if inverted_reloc is None:
+            return None
+        inverted = inverted_reloc[0]
         if not inverted:
             return None
         (a, b), (c, d) = match.span("instruction"), match.span("target")
@@ -302,27 +313,45 @@ class Optimizer:
                 block.fallthrough = True
                 block.instructions.pop()
 
+    def _fixup_external_labels(self) -> None:
+        if self._supports_external_relocations:
+            # Nothing to fix up
+            return
+        for block in self._blocks():
+            if block.target and block.fallthrough:
+                branch = block.instructions[-1]
+                match = self._re_branch.match(branch)
+                assert match is not None
+                target = match["target"]
+                reloc = self._branches[match["instruction"]][1]
+                if reloc is not None and not target.startswith(self.label_prefix):
+                    name = target[len(self.symbol_prefix) :]
+                    block.instructions[-1] = (
+                        f"// target='{target}' prefix='{self.label_prefix}'"
+                    )
+                    block.instructions.append(
+                        f"{self.symbol_prefix}{reloc}_JIT_RELOCATION_{name}:"
+                    )
+                    a, b = match.span("target")
+                    branch = "".join([branch[:a], "0", branch[b:]])
+                    block.instructions.append(branch)
+
     def run(self) -> None:
         """Run this optimizer."""
         self._insert_continue_label()
         self._mark_hot_blocks()
         self._invert_hot_branches()
         self._remove_redundant_jumps()
+        self._fixup_external_labels()
         self.path.write_text(self._body())
 
 
-# Mach-O does not support the 19 bit branch locations needed for branch reordering
-class OptimizerAArch64_MachO(Optimizer):  # pylint: disable = too-few-public-methods
-    """aarch64-apple-darwin"""
-
-    # https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch-
-    _re_jump = re.compile(r"\s*b\s+(?P<target>[\w.]+)")
-
-
 class OptimizerAArch64(Optimizer):  # pylint: disable = too-few-public-methods
-    """aarch64-pc-windows-msvc/aarch64-unknown-linux-gnu"""
+    """aarch64-pc-windows-msvc/aarch64-apple-darwin/aarch64-unknown-linux-gnu"""
 
     _branches = _AARCH64_BRANCHES
+    # Mach-O does not support the 19 bit branch locations needed for branch reordering
+    _supports_external_relocations = False
     _re_branch = re.compile(
         rf"\s*(?P<instruction>{'|'.join(_AARCH64_BRANCHES)})\s+(.+,\s+)*(?P<target>[\w.]+)"
     )
index 16bc1ea4e17e6b15ba5f7ef2741e485f31b5be21..777db7366b186bbb58cfec303c8e5a5d0f84cb0a 100644 (file)
@@ -58,6 +58,7 @@ _PATCH_FUNCS = {
     "ARM64_RELOC_PAGE21": "patch_aarch64_21r",
     "ARM64_RELOC_PAGEOFF12": "patch_aarch64_12",
     "ARM64_RELOC_UNSIGNED": "patch_64",
+    "CUSTOM_AARCH64_BRANCH19": "patch_aarch64_19r",
     # x86_64-pc-windows-msvc:
     "IMAGE_REL_AMD64_REL32": "patch_x86_64_32rx",
     # aarch64-pc-windows-msvc:
@@ -221,6 +222,17 @@ class StencilGroup:
     _got: dict[str, int] = dataclasses.field(default_factory=dict, init=False)
     _trampolines: set[int] = dataclasses.field(default_factory=set, init=False)
 
+    def convert_labels_to_relocations(self) -> None:
+        for name, hole_plus in self.symbols.items():
+            if isinstance(name, str) and "_JIT_RELOCATION_" in name:
+                _, offset = hole_plus
+                reloc, target = name.split("_JIT_RELOCATION_")
+                value, symbol = symbol_to_value(target)
+                hole = Hole(
+                    int(offset), typing.cast(_schema.HoleKind, reloc), value, symbol, 0
+                )
+                self.code.holes.append(hole)
+
     def process_relocations(self, known_symbols: dict[str, int]) -> None:
         """Fix up all GOT and internal relocations for this stencil group."""
         for hole in self.code.holes.copy():
index 7ff7c4fba496521c8dc827c725733390c377ac1f..dcc0abaf23f16d7b1c8592f823e03c31752d65a0 100644 (file)
@@ -218,6 +218,7 @@ class _Target(typing.Generic[_S, _R]):
                     tasks.append(group.create_task(coro, name=opname))
         stencil_groups = {task.get_name(): task.result() for task in tasks}
         for stencil_group in stencil_groups.values():
+            stencil_group.convert_labels_to_relocations()
             stencil_group.process_relocations(self.known_symbols)
         return stencil_groups
 
@@ -565,7 +566,7 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
     if re.fullmatch(r"aarch64-apple-darwin.*", host):
         host = "aarch64-apple-darwin"
         condition = "defined(__aarch64__) && defined(__APPLE__)"
-        optimizer = _optimizers.OptimizerAArch64_MachO
+        optimizer = _optimizers.OptimizerAArch64
         target = _MachO(host, condition, optimizer=optimizer)
     elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
         host = "aarch64-pc-windows-msvc"