From 4e2ff4ac4cc725572e1458a91418deeadd03d8aa Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 3 Nov 2025 10:01:44 -0800 Subject: [PATCH] GH-136895: Update JIT builds to use LLVM 20 (#140329) Co-authored-by: Emma Harper Smith --- .github/workflows/jit.yml | 6 +- ...-10-19-10-32-28.gh-issue-136895.HfsEh0.rst | 1 + PCbuild/get_external.py | 63 ++++++++++++---- PCbuild/get_externals.bat | 8 ++- Python/jit.c | 72 +++++++++++++++---- Tools/jit/README.md | 20 +++--- Tools/jit/_llvm.py | 4 +- Tools/jit/_stencils.py | 17 +++++ Tools/jit/_targets.py | 10 +-- 9 files changed, 151 insertions(+), 50 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-10-19-10-32-28.gh-issue-136895.HfsEh0.rst diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index c32bf4fd63cc..151b17e84425 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -68,7 +68,7 @@ jobs: - true - false llvm: - - 19 + - 20 include: - target: i686-pc-windows-msvc/msvc architecture: Win32 @@ -138,7 +138,7 @@ jobs: fail-fast: false matrix: llvm: - - 19 + - 20 steps: - uses: actions/checkout@v4 with: @@ -166,7 +166,7 @@ jobs: fail-fast: false matrix: llvm: - - 19 + - 20 steps: - uses: actions/checkout@v4 with: diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-19-10-32-28.gh-issue-136895.HfsEh0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-19-10-32-28.gh-issue-136895.HfsEh0.rst new file mode 100644 index 000000000000..fffc264a8650 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-19-10-32-28.gh-issue-136895.HfsEh0.rst @@ -0,0 +1 @@ +Update JIT compilation to use LLVM 20 at build time. diff --git a/PCbuild/get_external.py b/PCbuild/get_external.py index a78aa6a23041..07970624e864 100755 --- a/PCbuild/get_external.py +++ b/PCbuild/get_external.py @@ -3,6 +3,7 @@ import argparse import os import pathlib +import shutil import sys import time import urllib.error @@ -22,15 +23,13 @@ def retrieve_with_retries(download_location, output_path, reporthook, ) except (urllib.error.URLError, ConnectionError) as ex: if attempt == max_retries: - msg = f"Download from {download_location} failed." - raise OSError(msg) from ex + raise OSError(f'Download from {download_location} failed.') from ex time.sleep(2.25**attempt) else: return resp - def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): - repo = f'cpython-{"bin" if binary else "source"}-deps' + repo = 'cpython-bin-deps' if binary else 'cpython-source-deps' url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip' reporthook = None if verbose: @@ -44,6 +43,23 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): return filename +def fetch_release(tag, tarball_dir, *, org='python', verbose=False): + url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz' + reporthook = None + if verbose: + reporthook = print + tarball_dir.mkdir(parents=True, exist_ok=True) + output_path = tarball_dir / f'{tag}.tar.xz' + retrieve_with_retries(url, output_path, reporthook) + return output_path + + +def extract_tarball(externals_dir, tarball_path, tag): + output_path = externals_dir / tag + shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path)) + return output_path + + def extract_zip(externals_dir, zip_path): with zipfile.ZipFile(os.fspath(zip_path)) as zf: zf.extractall(os.fspath(externals_dir)) @@ -55,6 +71,8 @@ def parse_args(): p.add_argument('-v', '--verbose', action='store_true') p.add_argument('-b', '--binary', action='store_true', help='Is the dependency in the binary repo?') + p.add_argument('-r', '--release', action='store_true', + help='Download from GitHub release assets instead of branch') p.add_argument('-O', '--organization', help='Organization owning the deps repos', default='python') p.add_argument('-e', '--externals-dir', type=pathlib.Path, @@ -67,15 +85,36 @@ def parse_args(): def main(): args = parse_args() - zip_path = fetch_zip( - args.tag, - args.externals_dir / 'zips', - org=args.organization, - binary=args.binary, - verbose=args.verbose, - ) final_name = args.externals_dir / args.tag - extracted = extract_zip(args.externals_dir, zip_path) + + # Check if the dependency already exists in externals/ directory + # (either already downloaded/extracted, or checked into the git tree) + if final_name.exists(): + if args.verbose: + print(f'{args.tag} already exists at {final_name}, skipping download.') + return + + # Determine download method: release artifacts for large deps (like LLVM), + # otherwise zip download from GitHub branches + if args.release: + tarball_path = fetch_release( + args.tag, + args.externals_dir / 'tarballs', + org=args.organization, + verbose=args.verbose, + ) + extracted = extract_tarball(args.externals_dir, tarball_path, args.tag) + else: + # Use zip download from GitHub branches + # (cpython-bin-deps if --binary, cpython-source-deps otherwise) + zip_path = fetch_zip( + args.tag, + args.externals_dir / 'zips', + org=args.organization, + binary=args.binary, + verbose=args.verbose, + ) + extracted = extract_zip(args.externals_dir, zip_path) for wait in [1, 2, 3, 5, 8, 0]: try: extracted.replace(final_name) diff --git a/PCbuild/get_externals.bat b/PCbuild/get_externals.bat index 50a227b563a7..319024e0f50f 100644 --- a/PCbuild/get_externals.bat +++ b/PCbuild/get_externals.bat @@ -82,7 +82,7 @@ if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4 if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.18 if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0 if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06 -if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0 +if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-20.1.8.0 for %%b in (%binaries%) do ( if exist "%EXTERNALS_DIR%\%%b" ( @@ -92,7 +92,11 @@ for %%b in (%binaries%) do ( git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b" ) else ( echo.Fetching %%b... - %PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b + if "%%b"=="llvm-20.1.8.0" ( + %PYTHON% -E "%PCBUILD%\get_external.py" --release --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b + ) else ( + %PYTHON% -E "%PCBUILD%\get_external.py" --binary --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b + ) ) ) diff --git a/Python/jit.c b/Python/jit.c index c3f3d686013f..279e1ce6a0d2 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -444,17 +444,42 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value) } void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state); +void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state); #include "jit_stencils.h" #if defined(__aarch64__) || defined(_M_ARM64) #define TRAMPOLINE_SIZE 16 #define DATA_ALIGN 8 +#elif defined(__x86_64__) && defined(__APPLE__) + // LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative + // range. + #define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment + #define DATA_ALIGN 8 #else #define TRAMPOLINE_SIZE 0 #define DATA_ALIGN 1 #endif +// Get the trampoline memory location for a given symbol ordinal. +static unsigned char * +get_trampoline_slot(int ordinal, jit_state *state) +{ + const uint32_t symbol_mask = 1 << (ordinal % 32); + const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32]; + assert(symbol_mask & trampoline_mask); + + // Count the number of set bits in the trampoline mask lower than ordinal + int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1)); + for (int i = 0; i < ordinal / 32; i++) { + index += _Py_popcount32(state->trampolines.mask[i]); + } + + unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE; + assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size); + return trampoline; +} + // Generate and patch AArch64 trampolines. The symbols to jump to are stored // in the jit_stencils.h in the symbols_map. void @@ -471,20 +496,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state) return; } - // Masking is done modulo 32 as the mask is stored as an array of uint32_t - const uint32_t symbol_mask = 1 << (ordinal % 32); - const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32]; - assert(symbol_mask & trampoline_mask); - - // Count the number of set bits in the trampoline mask lower than ordinal, - // this gives the index into the array of trampolines. - int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1)); - for (int i = 0; i < ordinal / 32; i++) { - index += _Py_popcount32(state->trampolines.mask[i]); - } - - uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE); - assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size); + // Out of range - need a trampoline + uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state); /* Generate the trampoline @@ -501,6 +514,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state) patch_aarch64_26r(location, (uintptr_t)p); } +// Generate and patch x86_64 trampolines. +void +patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state) +{ + uint64_t value = (uintptr_t)symbols_map[ordinal]; + int64_t range = (int64_t)value - 4 - (int64_t)location; + + // If we are in range of 32 signed bits, we can patch directly + if (range >= -(1LL << 31) && range < (1LL << 31)) { + patch_32r(location, value - 4); + return; + } + + // Out of range - need a trampoline + unsigned char *trampoline = get_trampoline_slot(ordinal, state); + + /* Generate the trampoline (14 bytes, padded to 16): + 0: ff 25 00 00 00 00 jmp *(%rip) + 6: XX XX XX XX XX XX XX XX (64-bit target address) + + Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64) + */ + trampoline[0] = 0xFF; + trampoline[1] = 0x25; + memset(trampoline + 2, 0, 4); + memcpy(trampoline + 6, &value, 8); + + // Patch the call site to call the trampoline instead + patch_32r(location, (uintptr_t)trampoline - 4); +} + static void combine_symbol_mask(const symbol_mask src, symbol_mask dest) { diff --git a/Tools/jit/README.md b/Tools/jit/README.md index 35c7ffd7a283..d83b09aab59f 100644 --- a/Tools/jit/README.md +++ b/Tools/jit/README.md @@ -9,32 +9,32 @@ Python 3.11 or newer is required to build the JIT. The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon). -LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code. +LLVM version 20 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code. It's easy to install all of the required tools: ### Linux -Install LLVM 19 on Ubuntu/Debian: +Install LLVM 20 on Ubuntu/Debian: ```sh wget https://apt.llvm.org/llvm.sh chmod +x llvm.sh -sudo ./llvm.sh 19 +sudo ./llvm.sh 20 ``` -Install LLVM 19 on Fedora Linux 40 or newer: +Install LLVM 20 on Fedora Linux 40 or newer: ```sh -sudo dnf install 'clang(major) = 19' 'llvm(major) = 19' +sudo dnf install 'clang(major) = 20' 'llvm(major) = 20' ``` ### macOS -Install LLVM 19 with [Homebrew](https://brew.sh): +Install LLVM 20 with [Homebrew](https://brew.sh): ```sh -brew install llvm@19 +brew install llvm@20 ``` Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them. @@ -43,18 +43,18 @@ Homebrew won't add any of the tools to your `$PATH`. That's okay; the build scri LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`. -Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** +Otherwise, you can install LLVM 20 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=20), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** Alternatively, you can use [chocolatey](https://chocolatey.org): ```sh -choco install llvm --version=19.1.0 +choco install llvm --version=20.1.8 ``` ### Dev Containers If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no -need to install LLVM as the Fedora 41 base image includes LLVM 19 out of the box. +need to install LLVM as the Fedora 42 base image includes LLVM 20 out of the box. ## Building diff --git a/Tools/jit/_llvm.py b/Tools/jit/_llvm.py index bc3b50ffe616..54c2bf86a36e 100644 --- a/Tools/jit/_llvm.py +++ b/Tools/jit/_llvm.py @@ -11,8 +11,8 @@ import typing import _targets -_LLVM_VERSION = "19" -_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0" +_LLVM_VERSION = "20" +_EXTERNALS_LLVM_TAG = "llvm-20.1.8.0" _P = typing.ParamSpec("_P") _R = typing.TypeVar("_R") diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 777db7366b18..e717365b6b97 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -253,6 +253,23 @@ class StencilGroup: self._trampolines.add(ordinal) hole.addend = ordinal hole.symbol = None + # x86_64 Darwin trampolines for external symbols + elif ( + hole.kind == "X86_64_RELOC_BRANCH" + and hole.value is HoleValue.ZERO + and hole.symbol not in self.symbols + ): + hole.func = "patch_x86_64_trampoline" + hole.need_state = True + assert hole.symbol is not None + if hole.symbol in known_symbols: + ordinal = known_symbols[hole.symbol] + else: + ordinal = len(known_symbols) + known_symbols[hole.symbol] = ordinal + self._trampolines.add(ordinal) + hole.addend = ordinal + hole.symbol = None self.data.pad(8) for stencil in [self.code, self.data]: for hole in stencil.holes: diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index dcc0abaf23f1..a76d8ff27926 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -166,10 +166,6 @@ class _Target(typing.Generic[_S, _R]): "-fno-asynchronous-unwind-tables", # Don't call built-in functions that we can't find or patch: "-fno-builtin", - # Emit relaxable 64-bit calls/jumps, so we don't have to worry about - # about emitting in-range trampolines for out-of-range targets. - # We can probably remove this and emit trampolines in the future: - "-fno-plt", # Don't call stack-smashing canaries that we can't find or patch: "-fno-stack-protector", "-std=c11", @@ -571,14 +567,14 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: elif re.fullmatch(r"aarch64-pc-windows-msvc", host): host = "aarch64-pc-windows-msvc" condition = "defined(_M_ARM64)" - args = ["-fms-runtime-lib=dll", "-fplt"] + args = ["-fms-runtime-lib=dll"] optimizer = _optimizers.OptimizerAArch64 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"aarch64-.*-linux-gnu", host): host = "aarch64-unknown-linux-gnu" condition = "defined(__aarch64__) && defined(__linux__)" # -mno-outline-atomics: Keep intrinsics from being emitted. - args = ["-fpic", "-mno-outline-atomics"] + args = ["-fpic", "-mno-outline-atomics", "-fno-plt"] optimizer = _optimizers.OptimizerAArch64 target = _ELF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"i686-pc-windows-msvc", host): @@ -602,7 +598,7 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: elif re.fullmatch(r"x86_64-.*-linux-gnu", host): host = "x86_64-unknown-linux-gnu" condition = "defined(__x86_64__) && defined(__linux__)" - args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] + args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0", "-fno-plt"] optimizer = _optimizers.OptimizerX86 target = _ELF(host, condition, args=args, optimizer=optimizer) else: -- 2.47.3