]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-136895: Update JIT builds to use LLVM 20 (#140329)
authorSavannah Ostrowski <savannah@python.org>
Mon, 3 Nov 2025 18:01:44 +0000 (10:01 -0800)
committerGitHub <noreply@github.com>
Mon, 3 Nov 2025 18:01:44 +0000 (10:01 -0800)
Co-authored-by: Emma Harper Smith <emma@emmatyping.dev>
.github/workflows/jit.yml
Misc/NEWS.d/next/Core_and_Builtins/2025-10-19-10-32-28.gh-issue-136895.HfsEh0.rst [new file with mode: 0644]
PCbuild/get_external.py
PCbuild/get_externals.bat
Python/jit.c
Tools/jit/README.md
Tools/jit/_llvm.py
Tools/jit/_stencils.py
Tools/jit/_targets.py

index c32bf4fd63cc8f3737146bcb7b54945650a23169..151b17e8442582d9946920a54acd2cb72d4b9c63 100644 (file)
@@ -68,7 +68,7 @@ jobs:
           - true
           - false
         llvm:
-          - 19
+          - 20
         include:
           - target: i686-pc-windows-msvc/msvc
             architecture: Win32
@@ -138,7 +138,7 @@ jobs:
       fail-fast: false
       matrix:
         llvm:
-          - 19
+          - 20
     steps:
       - uses: actions/checkout@v4
         with:
@@ -166,7 +166,7 @@ jobs:
       fail-fast: false
       matrix:
         llvm:
-          - 19
+          - 20
     steps:
       - uses: actions/checkout@v4
         with:
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-19-10-32-28.gh-issue-136895.HfsEh0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-19-10-32-28.gh-issue-136895.HfsEh0.rst
new file mode 100644 (file)
index 0000000..fffc264
--- /dev/null
@@ -0,0 +1 @@
+Update JIT compilation to use LLVM 20 at build time.
index a78aa6a23041adf4b3d378bc6893dab19121e749..07970624e8647e2c6e825a5baa2fb9b3e3b3c4c3 100755 (executable)
@@ -3,6 +3,7 @@
 import argparse
 import os
 import pathlib
+import shutil
 import sys
 import time
 import urllib.error
@@ -22,15 +23,13 @@ def retrieve_with_retries(download_location, output_path, reporthook,
             )
         except (urllib.error.URLError, ConnectionError) as ex:
             if attempt == max_retries:
-                msg = f"Download from {download_location} failed."
-                raise OSError(msg) from ex
+                raise OSError(f'Download from {download_location} failed.') from ex
             time.sleep(2.25**attempt)
         else:
             return resp
 
-
 def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
-    repo = f'cpython-{"bin" if binary else "source"}-deps'
+    repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
     url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
     reporthook = None
     if verbose:
@@ -44,6 +43,23 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
     return filename
 
 
+def fetch_release(tag, tarball_dir, *, org='python', verbose=False):
+    url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz'
+    reporthook = None
+    if verbose:
+        reporthook = print
+    tarball_dir.mkdir(parents=True, exist_ok=True)
+    output_path = tarball_dir / f'{tag}.tar.xz'
+    retrieve_with_retries(url, output_path, reporthook)
+    return output_path
+
+
+def extract_tarball(externals_dir, tarball_path, tag):
+    output_path = externals_dir / tag
+    shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path))
+    return output_path
+
+
 def extract_zip(externals_dir, zip_path):
     with zipfile.ZipFile(os.fspath(zip_path)) as zf:
         zf.extractall(os.fspath(externals_dir))
@@ -55,6 +71,8 @@ def parse_args():
     p.add_argument('-v', '--verbose', action='store_true')
     p.add_argument('-b', '--binary', action='store_true',
                    help='Is the dependency in the binary repo?')
+    p.add_argument('-r', '--release', action='store_true',
+                   help='Download from GitHub release assets instead of branch')
     p.add_argument('-O', '--organization',
                    help='Organization owning the deps repos', default='python')
     p.add_argument('-e', '--externals-dir', type=pathlib.Path,
@@ -67,15 +85,36 @@ def parse_args():
 
 def main():
     args = parse_args()
-    zip_path = fetch_zip(
-        args.tag,
-        args.externals_dir / 'zips',
-        org=args.organization,
-        binary=args.binary,
-        verbose=args.verbose,
-    )
     final_name = args.externals_dir / args.tag
-    extracted = extract_zip(args.externals_dir, zip_path)
+
+    # Check if the dependency already exists in externals/ directory
+    # (either already downloaded/extracted, or checked into the git tree)
+    if final_name.exists():
+        if args.verbose:
+            print(f'{args.tag} already exists at {final_name}, skipping download.')
+        return
+
+    # Determine download method: release artifacts for large deps (like LLVM),
+    # otherwise zip download from GitHub branches
+    if args.release:
+        tarball_path = fetch_release(
+            args.tag,
+            args.externals_dir / 'tarballs',
+            org=args.organization,
+            verbose=args.verbose,
+        )
+        extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
+    else:
+        # Use zip download from GitHub branches
+        # (cpython-bin-deps if --binary, cpython-source-deps otherwise)
+        zip_path = fetch_zip(
+            args.tag,
+            args.externals_dir / 'zips',
+            org=args.organization,
+            binary=args.binary,
+            verbose=args.verbose,
+        )
+        extracted = extract_zip(args.externals_dir, zip_path)
     for wait in [1, 2, 3, 5, 8, 0]:
         try:
             extracted.replace(final_name)
index 50a227b563a7c0d99f8d8e6a3277db85b5a80505..319024e0f50f46541f252afe5c89e8bbe228d1f8 100644 (file)
@@ -82,7 +82,7 @@ if NOT "%IncludeLibffi%"=="false"  set binaries=%binaries% libffi-3.4.4
 if NOT "%IncludeSSL%"=="false"     set binaries=%binaries% openssl-bin-3.0.18
 if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0
 if NOT "%IncludeSSLSrc%"=="false"  set binaries=%binaries% nasm-2.11.06
-if NOT "%IncludeLLVM%"=="false"    set binaries=%binaries% llvm-19.1.7.0
+if NOT "%IncludeLLVM%"=="false"    set binaries=%binaries% llvm-20.1.8.0
 
 for %%b in (%binaries%) do (
     if exist "%EXTERNALS_DIR%\%%b" (
@@ -92,7 +92,11 @@ for %%b in (%binaries%) do (
         git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b"
     ) else (
         echo.Fetching %%b...
-        %PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b
+        if "%%b"=="llvm-20.1.8.0" (
+            %PYTHON% -E "%PCBUILD%\get_external.py" --release --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
+        ) else (
+            %PYTHON% -E "%PCBUILD%\get_external.py" --binary --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
+        )
     )
 )
 
index c3f3d686013fe4a3240ae33796dcec2c0567d29b..279e1ce6a0d2e5cba7b5c46fc3276d60c898bdfd 100644 (file)
@@ -444,17 +444,42 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
 }
 
 void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
+void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
 
 #include "jit_stencils.h"
 
 #if defined(__aarch64__) || defined(_M_ARM64)
     #define TRAMPOLINE_SIZE 16
     #define DATA_ALIGN 8
+#elif defined(__x86_64__) && defined(__APPLE__)
+    // LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative
+    // range.
+    #define TRAMPOLINE_SIZE 16  // 14 bytes + 2 bytes padding for alignment
+    #define DATA_ALIGN 8
 #else
     #define TRAMPOLINE_SIZE 0
     #define DATA_ALIGN 1
 #endif
 
+// Get the trampoline memory location for a given symbol ordinal.
+static unsigned char *
+get_trampoline_slot(int ordinal, jit_state *state)
+{
+    const uint32_t symbol_mask = 1 << (ordinal % 32);
+    const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
+    assert(symbol_mask & trampoline_mask);
+
+     // Count the number of set bits in the trampoline mask lower than ordinal
+    int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
+    for (int i = 0; i < ordinal / 32; i++) {
+        index += _Py_popcount32(state->trampolines.mask[i]);
+    }
+
+    unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
+    assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
+    return trampoline;
+}
+
 // Generate and patch AArch64 trampolines. The symbols to jump to are stored
 // in the jit_stencils.h in the symbols_map.
 void
@@ -471,20 +496,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
         return;
     }
 
-    // Masking is done modulo 32 as the mask is stored as an array of uint32_t
-    const uint32_t symbol_mask = 1 << (ordinal % 32);
-    const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
-    assert(symbol_mask & trampoline_mask);
-
-    // Count the number of set bits in the trampoline mask lower than ordinal,
-    // this gives the index into the array of trampolines.
-    int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
-    for (int i = 0; i < ordinal / 32; i++) {
-        index += _Py_popcount32(state->trampolines.mask[i]);
-    }
-
-    uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
-    assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
+    // Out of range - need a trampoline
+    uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);
 
 
     /* Generate the trampoline
@@ -501,6 +514,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
     patch_aarch64_26r(location, (uintptr_t)p);
 }
 
+// Generate and patch x86_64 trampolines.
+void
+patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
+{
+    uint64_t value = (uintptr_t)symbols_map[ordinal];
+    int64_t range = (int64_t)value - 4 - (int64_t)location;
+
+    // If we are in range of 32 signed bits, we can patch directly
+    if (range >= -(1LL << 31) && range < (1LL << 31)) {
+        patch_32r(location, value - 4);
+        return;
+    }
+
+    // Out of range - need a trampoline
+    unsigned char *trampoline = get_trampoline_slot(ordinal, state);
+
+    /* Generate the trampoline (14 bytes, padded to 16):
+       0: ff 25 00 00 00 00    jmp *(%rip)
+       6: XX XX XX XX XX XX XX XX   (64-bit target address)
+
+       Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
+    */
+    trampoline[0] = 0xFF;
+    trampoline[1] = 0x25;
+    memset(trampoline + 2, 0, 4);
+    memcpy(trampoline + 6, &value, 8);
+
+    // Patch the call site to call the trampoline instead
+    patch_32r(location, (uintptr_t)trampoline - 4);
+}
+
 static void
 combine_symbol_mask(const symbol_mask src, symbol_mask dest)
 {
index 35c7ffd7a283f8c647fd64e8abe990f1eec3f84e..d83b09aab59f8cdda839a7ee9635b9547bb7c267 100644 (file)
@@ -9,32 +9,32 @@ Python 3.11 or newer is required to build the JIT.
 
 The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
 
-LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
+LLVM version 20 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
 
 It's easy to install all of the required tools:
 
 ### Linux
 
-Install LLVM 19 on Ubuntu/Debian:
+Install LLVM 20 on Ubuntu/Debian:
 
 ```sh
 wget https://apt.llvm.org/llvm.sh
 chmod +x llvm.sh
-sudo ./llvm.sh 19
+sudo ./llvm.sh 20
 ```
 
-Install LLVM 19 on Fedora Linux 40 or newer:
+Install LLVM 20 on Fedora Linux 40 or newer:
 
 ```sh
-sudo dnf install 'clang(major) = 19' 'llvm(major) = 19'
+sudo dnf install 'clang(major) = 20' 'llvm(major) = 20'
 ```
 
 ### macOS
 
-Install LLVM 19 with [Homebrew](https://brew.sh):
+Install LLVM 20 with [Homebrew](https://brew.sh):
 
 ```sh
-brew install llvm@19
+brew install llvm@20
 ```
 
 Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
@@ -43,18 +43,18 @@ Homebrew won't add any of the tools to your `$PATH`. That's okay; the build scri
 
 LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`.
 
-Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
+Otherwise, you can install LLVM 20 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=20), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
 
 Alternatively, you can use [chocolatey](https://chocolatey.org):
 
 ```sh
-choco install llvm --version=19.1.0
+choco install llvm --version=20.1.8
 ```
 
 ### Dev Containers
 
 If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no 
-need to install LLVM as the Fedora 41 base image includes LLVM 19 out of the box.
+need to install LLVM as the Fedora 42 base image includes LLVM 20 out of the box.
 
 ## Building
 
index bc3b50ffe616348200a02976b57061451ca2df12..54c2bf86a36ed6c5f41860005a22cb09dbac08e2 100644 (file)
@@ -11,8 +11,8 @@ import typing
 import _targets
 
 
-_LLVM_VERSION = "19"
-_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0"
+_LLVM_VERSION = "20"
+_EXTERNALS_LLVM_TAG = "llvm-20.1.8.0"
 
 _P = typing.ParamSpec("_P")
 _R = typing.TypeVar("_R")
index 777db7366b186bbb58cfec303c8e5a5d0f84cb0a..e717365b6b97854f2c5ae08035014b6b3baba3fc 100644 (file)
@@ -253,6 +253,23 @@ class StencilGroup:
                 self._trampolines.add(ordinal)
                 hole.addend = ordinal
                 hole.symbol = None
+            # x86_64 Darwin trampolines for external symbols
+            elif (
+                hole.kind == "X86_64_RELOC_BRANCH"
+                and hole.value is HoleValue.ZERO
+                and hole.symbol not in self.symbols
+            ):
+                hole.func = "patch_x86_64_trampoline"
+                hole.need_state = True
+                assert hole.symbol is not None
+                if hole.symbol in known_symbols:
+                    ordinal = known_symbols[hole.symbol]
+                else:
+                    ordinal = len(known_symbols)
+                    known_symbols[hole.symbol] = ordinal
+                self._trampolines.add(ordinal)
+                hole.addend = ordinal
+                hole.symbol = None
         self.data.pad(8)
         for stencil in [self.code, self.data]:
             for hole in stencil.holes:
index dcc0abaf23f16d7b1c8592f823e03c31752d65a0..a76d8ff2792602797f4dab86f99569e1ed41b1ed 100644 (file)
@@ -166,10 +166,6 @@ class _Target(typing.Generic[_S, _R]):
             "-fno-asynchronous-unwind-tables",
             # Don't call built-in functions that we can't find or patch:
             "-fno-builtin",
-            # Emit relaxable 64-bit calls/jumps, so we don't have to worry about
-            # about emitting in-range trampolines for out-of-range targets.
-            # We can probably remove this and emit trampolines in the future:
-            "-fno-plt",
             # Don't call stack-smashing canaries that we can't find or patch:
             "-fno-stack-protector",
             "-std=c11",
@@ -571,14 +567,14 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
     elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
         host = "aarch64-pc-windows-msvc"
         condition = "defined(_M_ARM64)"
-        args = ["-fms-runtime-lib=dll", "-fplt"]
+        args = ["-fms-runtime-lib=dll"]
         optimizer = _optimizers.OptimizerAArch64
         target = _COFF64(host, condition, args=args, optimizer=optimizer)
     elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
         host = "aarch64-unknown-linux-gnu"
         condition = "defined(__aarch64__) && defined(__linux__)"
         # -mno-outline-atomics: Keep intrinsics from being emitted.
-        args = ["-fpic", "-mno-outline-atomics"]
+        args = ["-fpic", "-mno-outline-atomics", "-fno-plt"]
         optimizer = _optimizers.OptimizerAArch64
         target = _ELF(host, condition, args=args, optimizer=optimizer)
     elif re.fullmatch(r"i686-pc-windows-msvc", host):
@@ -602,7 +598,7 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
     elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
         host = "x86_64-unknown-linux-gnu"
         condition = "defined(__x86_64__) && defined(__linux__)"
-        args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
+        args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0", "-fno-plt"]
         optimizer = _optimizers.OptimizerX86
         target = _ELF(host, condition, args=args, optimizer=optimizer)
     else: