- true
- false
llvm:
- - 19
+ - 20
include:
- target: i686-pc-windows-msvc/msvc
architecture: Win32
fail-fast: false
matrix:
llvm:
- - 19
+ - 20
steps:
- uses: actions/checkout@v4
with:
fail-fast: false
matrix:
llvm:
- - 19
+ - 20
steps:
- uses: actions/checkout@v4
with:
--- /dev/null
+Update JIT compilation to use LLVM 20 at build time.
import argparse
import os
import pathlib
+import shutil
import sys
import time
import urllib.error
)
except (urllib.error.URLError, ConnectionError) as ex:
if attempt == max_retries:
- msg = f"Download from {download_location} failed."
- raise OSError(msg) from ex
+ raise OSError(f'Download from {download_location} failed.') from ex
time.sleep(2.25**attempt)
else:
return resp
-
def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
- repo = f'cpython-{"bin" if binary else "source"}-deps'
+ repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
reporthook = None
if verbose:
return filename
+def fetch_release(tag, tarball_dir, *, org='python', verbose=False):
+ url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz'
+ reporthook = None
+ if verbose:
+ reporthook = print
+ tarball_dir.mkdir(parents=True, exist_ok=True)
+ output_path = tarball_dir / f'{tag}.tar.xz'
+ retrieve_with_retries(url, output_path, reporthook)
+ return output_path
+
+
+def extract_tarball(externals_dir, tarball_path, tag):
+ output_path = externals_dir / tag
+ shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path))
+ return output_path
+
+
def extract_zip(externals_dir, zip_path):
with zipfile.ZipFile(os.fspath(zip_path)) as zf:
zf.extractall(os.fspath(externals_dir))
p.add_argument('-v', '--verbose', action='store_true')
p.add_argument('-b', '--binary', action='store_true',
help='Is the dependency in the binary repo?')
+ p.add_argument('-r', '--release', action='store_true',
+ help='Download from GitHub release assets instead of branch')
p.add_argument('-O', '--organization',
help='Organization owning the deps repos', default='python')
p.add_argument('-e', '--externals-dir', type=pathlib.Path,
def main():
args = parse_args()
- zip_path = fetch_zip(
- args.tag,
- args.externals_dir / 'zips',
- org=args.organization,
- binary=args.binary,
- verbose=args.verbose,
- )
final_name = args.externals_dir / args.tag
- extracted = extract_zip(args.externals_dir, zip_path)
+
+ # Check if the dependency already exists in externals/ directory
+ # (either already downloaded/extracted, or checked into the git tree)
+ if final_name.exists():
+ if args.verbose:
+ print(f'{args.tag} already exists at {final_name}, skipping download.')
+ return
+
+ # Determine download method: release artifacts for large deps (like LLVM),
+ # otherwise zip download from GitHub branches
+ if args.release:
+ tarball_path = fetch_release(
+ args.tag,
+ args.externals_dir / 'tarballs',
+ org=args.organization,
+ verbose=args.verbose,
+ )
+ extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
+ else:
+ # Use zip download from GitHub branches
+ # (cpython-bin-deps if --binary, cpython-source-deps otherwise)
+ zip_path = fetch_zip(
+ args.tag,
+ args.externals_dir / 'zips',
+ org=args.organization,
+ binary=args.binary,
+ verbose=args.verbose,
+ )
+ extracted = extract_zip(args.externals_dir, zip_path)
for wait in [1, 2, 3, 5, 8, 0]:
try:
extracted.replace(final_name)
if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.18
if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0
if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06
-if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0
+if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-20.1.8.0
for %%b in (%binaries%) do (
if exist "%EXTERNALS_DIR%\%%b" (
git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b"
) else (
echo.Fetching %%b...
- %PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b
+ if "%%b"=="llvm-20.1.8.0" (
+ %PYTHON% -E "%PCBUILD%\get_external.py" --release --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
+ ) else (
+ %PYTHON% -E "%PCBUILD%\get_external.py" --binary --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
+ )
)
)
}
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
+void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
#include "jit_stencils.h"
#if defined(__aarch64__) || defined(_M_ARM64)
#define TRAMPOLINE_SIZE 16
#define DATA_ALIGN 8
+#elif defined(__x86_64__) && defined(__APPLE__)
+ // LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative
+ // range.
+ #define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment
+ #define DATA_ALIGN 8
#else
#define TRAMPOLINE_SIZE 0
#define DATA_ALIGN 1
#endif
+// Get the trampoline memory location for a given symbol ordinal.
+static unsigned char *
+get_trampoline_slot(int ordinal, jit_state *state)
+{
+ const uint32_t symbol_mask = 1 << (ordinal % 32);
+ const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
+ assert(symbol_mask & trampoline_mask);
+
+ // Count the number of set bits in the trampoline mask lower than ordinal
+ int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
+ for (int i = 0; i < ordinal / 32; i++) {
+ index += _Py_popcount32(state->trampolines.mask[i]);
+ }
+
+ unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
+ assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
+ return trampoline;
+}
+
// Generate and patch AArch64 trampolines. The symbols to jump to are stored
// in the jit_stencils.h in the symbols_map.
void
return;
}
- // Masking is done modulo 32 as the mask is stored as an array of uint32_t
- const uint32_t symbol_mask = 1 << (ordinal % 32);
- const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
- assert(symbol_mask & trampoline_mask);
-
- // Count the number of set bits in the trampoline mask lower than ordinal,
- // this gives the index into the array of trampolines.
- int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
- for (int i = 0; i < ordinal / 32; i++) {
- index += _Py_popcount32(state->trampolines.mask[i]);
- }
-
- uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
- assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
+ // Out of range - need a trampoline
+ uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);
/* Generate the trampoline
patch_aarch64_26r(location, (uintptr_t)p);
}
+// Generate and patch x86_64 trampolines.
+void
+patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
+{
+ uint64_t value = (uintptr_t)symbols_map[ordinal];
+ int64_t range = (int64_t)value - 4 - (int64_t)location;
+
+ // If we are in range of 32 signed bits, we can patch directly
+ if (range >= -(1LL << 31) && range < (1LL << 31)) {
+ patch_32r(location, value - 4);
+ return;
+ }
+
+ // Out of range - need a trampoline
+ unsigned char *trampoline = get_trampoline_slot(ordinal, state);
+
+ /* Generate the trampoline (14 bytes, padded to 16):
+ 0: ff 25 00 00 00 00 jmp *(%rip)
+ 6: XX XX XX XX XX XX XX XX (64-bit target address)
+
+ Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
+ */
+ trampoline[0] = 0xFF;
+ trampoline[1] = 0x25;
+ memset(trampoline + 2, 0, 4);
+ memcpy(trampoline + 6, &value, 8);
+
+ // Patch the call site to call the trampoline instead
+ patch_32r(location, (uintptr_t)trampoline - 4);
+}
+
static void
combine_symbol_mask(const symbol_mask src, symbol_mask dest)
{
The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
-LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
+LLVM version 20 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
It's easy to install all of the required tools:
### Linux
-Install LLVM 19 on Ubuntu/Debian:
+Install LLVM 20 on Ubuntu/Debian:
```sh
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
-sudo ./llvm.sh 19
+sudo ./llvm.sh 20
```
-Install LLVM 19 on Fedora Linux 40 or newer:
+Install LLVM 20 on Fedora Linux 40 or newer:
```sh
-sudo dnf install 'clang(major) = 19' 'llvm(major) = 19'
+sudo dnf install 'clang(major) = 20' 'llvm(major) = 20'
```
### macOS
-Install LLVM 19 with [Homebrew](https://brew.sh):
+Install LLVM 20 with [Homebrew](https://brew.sh):
```sh
-brew install llvm@19
+brew install llvm@20
```
Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`.
-Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
+Otherwise, you can install LLVM 20 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=20), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
Alternatively, you can use [chocolatey](https://chocolatey.org):
```sh
-choco install llvm --version=19.1.0
+choco install llvm --version=20.1.8
```
### Dev Containers
If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no
-need to install LLVM as the Fedora 41 base image includes LLVM 19 out of the box.
+need to install LLVM as the Fedora 42 base image includes LLVM 20 out of the box.
## Building
import _targets
-_LLVM_VERSION = "19"
-_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0"
+_LLVM_VERSION = "20"
+_EXTERNALS_LLVM_TAG = "llvm-20.1.8.0"
_P = typing.ParamSpec("_P")
_R = typing.TypeVar("_R")
self._trampolines.add(ordinal)
hole.addend = ordinal
hole.symbol = None
+ # x86_64 Darwin trampolines for external symbols
+ elif (
+ hole.kind == "X86_64_RELOC_BRANCH"
+ and hole.value is HoleValue.ZERO
+ and hole.symbol not in self.symbols
+ ):
+ hole.func = "patch_x86_64_trampoline"
+ hole.need_state = True
+ assert hole.symbol is not None
+ if hole.symbol in known_symbols:
+ ordinal = known_symbols[hole.symbol]
+ else:
+ ordinal = len(known_symbols)
+ known_symbols[hole.symbol] = ordinal
+ self._trampolines.add(ordinal)
+ hole.addend = ordinal
+ hole.symbol = None
self.data.pad(8)
for stencil in [self.code, self.data]:
for hole in stencil.holes:
"-fno-asynchronous-unwind-tables",
# Don't call built-in functions that we can't find or patch:
"-fno-builtin",
- # Emit relaxable 64-bit calls/jumps, so we don't have to worry about
- # about emitting in-range trampolines for out-of-range targets.
- # We can probably remove this and emit trampolines in the future:
- "-fno-plt",
# Don't call stack-smashing canaries that we can't find or patch:
"-fno-stack-protector",
"-std=c11",
elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
host = "aarch64-pc-windows-msvc"
condition = "defined(_M_ARM64)"
- args = ["-fms-runtime-lib=dll", "-fplt"]
+ args = ["-fms-runtime-lib=dll"]
optimizer = _optimizers.OptimizerAArch64
target = _COFF64(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
host = "aarch64-unknown-linux-gnu"
condition = "defined(__aarch64__) && defined(__linux__)"
# -mno-outline-atomics: Keep intrinsics from being emitted.
- args = ["-fpic", "-mno-outline-atomics"]
+ args = ["-fpic", "-mno-outline-atomics", "-fno-plt"]
optimizer = _optimizers.OptimizerAArch64
target = _ELF(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"i686-pc-windows-msvc", host):
elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
host = "x86_64-unknown-linux-gnu"
condition = "defined(__x86_64__) && defined(__linux__)"
- args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
+ args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0", "-fno-plt"]
optimizer = _optimizers.OptimizerX86
target = _ELF(host, condition, args=args, optimizer=optimizer)
else: