]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-126910: Build/link the JIT shim in the Python interpreter (#148872)
authorDiego Russo <diego.russo@arm.com>
Thu, 23 Apr 2026 11:23:18 +0000 (12:23 +0100)
committerGitHub <noreply@github.com>
Thu, 23 Apr 2026 11:23:18 +0000 (12:23 +0100)
16 files changed:
Include/internal/pycore_ceval.h
Include/internal/pycore_jit.h
Makefile.pre.in
PCbuild/pyproject.props
PCbuild/pythoncore.vcxproj
PCbuild/regen.targets
Python/ceval.c
Python/jit.c
Python/pylifecycle.c
Python/pystate.c
Tools/jit/_targets.py
Tools/jit/_writer.py
Tools/jit/build.py
Tools/jit/shim.c
configure
configure.ac

index ee8eb1095fe541ef3974a82f65460b2603b88a98..f9507fda1606dbea2d8da130f38daa2915b06203 100644 (file)
@@ -121,18 +121,11 @@ _PyEval_EvalFrame(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwfl
 }
 
 #ifdef _Py_TIER2
-#ifdef _Py_JIT
-_Py_CODEUNIT *_Py_LazyJitShim(
-    struct _PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
-    _PyStackRef *stack_pointer, PyThreadState *tstate
-);
-#else
 _Py_CODEUNIT *_PyTier2Interpreter(
     struct _PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
     _PyStackRef *stack_pointer, PyThreadState *tstate
 );
 #endif
-#endif
 
 extern _PyJitEntryFuncPtr _Py_jit_entry;
 
index 70bccce4166c18a04516b16bc725f1f8d4ba7219..b3cadcce8247d0d99f59ba4d394beda379d53a31 100644 (file)
@@ -23,9 +23,13 @@ typedef _Py_CODEUNIT *(*jit_func)(
     _PyStackRef _tos_cache0, _PyStackRef _tos_cache1, _PyStackRef _tos_cache2
 );
 
+_Py_CODEUNIT *_PyJIT(
+    _PyExecutorObject *executor, _PyInterpreterFrame *frame,
+    _PyStackRef *stack_pointer, PyThreadState *tstate
+);
+
 int _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length);
 void _PyJIT_Free(_PyExecutorObject *executor);
-void _PyJIT_Fini(void);
 PyAPI_FUNC(int) _PyJIT_AddressInJitCode(PyInterpreterState *interp, uintptr_t addr);
 
 #endif  // _Py_JIT
index 57fce05d476e9ec1596e0333f83527d0aa37617f..8b46db33a2ac1884332d97263edd4c94c9b2391a 100644 (file)
@@ -290,6 +290,7 @@ LDLIBRARYDIR=   @LDLIBRARYDIR@
 INSTSONAME=    @INSTSONAME@
 LIBRARY_DEPS=  @LIBRARY_DEPS@
 LINK_PYTHON_DEPS=@LINK_PYTHON_DEPS@
+JIT_OBJS=      @JIT_SHIM_O@
 PY_ENABLE_SHARED=      @PY_ENABLE_SHARED@
 STATIC_LIBPYTHON=      @STATIC_LIBPYTHON@
 
@@ -469,6 +470,7 @@ PYTHON_OBJS=        \
                Python/instruction_sequence.o \
                Python/intrinsics.o \
                Python/jit.o \
+               $(JIT_OBJS) \
                Python/legacy_tracing.o \
                Python/lock.o \
                Python/marshal.o \
@@ -3204,21 +3206,37 @@ Python/emscripten_trampoline_inner.wasm: $(srcdir)/Python/emscripten_trampoline_
 Python/emscripten_trampoline_wasm.c: Python/emscripten_trampoline_inner.wasm
        $(PYTHON_FOR_REGEN) $(srcdir)/Platforms/emscripten/prepare_external_wasm.py $< $@ getWasmTrampolineModule
 
+JIT_SHIM_BUILD_OBJS=   @JIT_SHIM_BUILD_O@
+JIT_BUILD_TARGETS=     jit_stencils.h @JIT_STENCILS_H@ $(JIT_SHIM_BUILD_OBJS)
+JIT_TARGETS=   $(JIT_BUILD_TARGETS) $(filter-out $(JIT_SHIM_BUILD_OBJS),$(JIT_OBJS))
+JIT_GENERATED_STAMP=   .jit-stamp
+
 JIT_DEPS = \
                $(srcdir)/Tools/jit/*.c \
+               $(srcdir)/Tools/jit/*.h \
                $(srcdir)/Tools/jit/*.py \
                $(srcdir)/Python/executor_cases.c.h \
                pyconfig.h
 
-jit_stencils.h @JIT_STENCILS_H@: $(JIT_DEPS)
+$(JIT_GENERATED_STAMP): $(JIT_DEPS)
        @REGEN_JIT_COMMAND@
+       @touch $@
+
+$(JIT_BUILD_TARGETS): $(JIT_GENERATED_STAMP)
+       @if test ! -f "$@"; then \
+               rm -f $(JIT_GENERATED_STAMP); \
+               $(MAKE) $(JIT_GENERATED_STAMP); \
+               test -f "$@"; \
+       fi
+
+jit_shim-universal2-apple-darwin.o: jit_shim-aarch64-apple-darwin.o jit_shim-x86_64-apple-darwin.o
+       lipo -create -output $@ jit_shim-aarch64-apple-darwin.o jit_shim-x86_64-apple-darwin.o
 
 Python/jit.o: $(srcdir)/Python/jit.c @JIT_STENCILS_H@
        $(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
 
 .PHONY: regen-jit
-regen-jit:
-       @REGEN_JIT_COMMAND@
+regen-jit: $(JIT_TARGETS)
 
 # Some make's put the object file in the current directory
 .c.o:
@@ -3342,7 +3360,7 @@ clean-profile: clean-retain-profile clean-bolt
 # gh-141808: The JIT stencils are deliberately kept in clean-profile
 .PHONY: clean-jit-stencils
 clean-jit-stencils:
-       -rm -f jit_stencils*.h
+       -rm -f $(JIT_TARGETS) $(JIT_GENERATED_STAMP) jit_stencils*.h jit_shim*.o
 
 .PHONY: clean
 clean: clean-profile clean-jit-stencils
index 94ae718d58c4ba964e45668386cffaab99a26166..f79608e1d58dbc8c8266209c42882ec24081c96b 100644 (file)
@@ -12,8 +12,9 @@
     <IntDir>$(IntDir.Replace(`\\`, `\`))</IntDir>
     <GeneratedFrozenModulesDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_frozen\</GeneratedFrozenModulesDir>
     <GeneratedZlibNgDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\zlib-ng\</GeneratedZlibNgDir>
-    <GeneratedJitStencilsDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_$(Configuration)</GeneratedJitStencilsDir>
-    <GeneratedJitStencilsDir Condition="$(Configuration) == 'PGUpdate'">$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_PGInstrument</GeneratedJitStencilsDir>
+    <GeneratedJitStencilsDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_$(Configuration)\</GeneratedJitStencilsDir>
+    <GeneratedJitStencilsDir Condition="$(Configuration) == 'PGUpdate'">$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_PGInstrument\</GeneratedJitStencilsDir>
+    <GeneratedJitStencilsDir>$(GeneratedJitStencilsDir.Replace(`\\`, `\`))</GeneratedJitStencilsDir>
     <TargetName Condition="'$(TargetName)' == ''">$(ProjectName)</TargetName>
     <TargetName>$(TargetName)$(PyDebugExt)</TargetName>
     <GenerateManifest>false</GenerateManifest>
index fe70e02536bbb600fcde24ba6f90ce28a935caea..07305add81d055a0803a98320a09f03b77214403 100644 (file)
     <Link>
       <AdditionalDependencies>version.lib;ws2_32.lib;pathcch.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AdditionalDependencies Condition="$(IncludeExternals)">zlib-ng$(PyDebugExt).lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies Condition="'$(UseJIT)' == 'true' and $(Platform) == 'ARM64'">$(GeneratedJitStencilsDir)jit_shim-aarch64-pc-windows-msvc.o;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies Condition="'$(UseJIT)' == 'true' and $(Platform) == 'Win32'">$(GeneratedJitStencilsDir)jit_shim-i686-pc-windows-msvc.o;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies Condition="'$(UseJIT)' == 'true' and $(Platform) == 'x64'">$(GeneratedJitStencilsDir)jit_shim-x86_64-pc-windows-msvc.o;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
index bb059f382eb375f2442c8c51c7e4d82cd570998e..9552e73ef6a2eca91868cfca7921290da5447709 100644 (file)
@@ -35,6 +35,9 @@
     <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-aarch64-pc-windows-msvc.h" Condition="$(Platform) == 'ARM64'"/>
     <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-i686-pc-windows-msvc.h" Condition="$(Platform) == 'Win32'"/>
     <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-x86_64-pc-windows-msvc.h" Condition="$(Platform) == 'x64'"/>
+    <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_shim-aarch64-pc-windows-msvc.o" Condition="$(Platform) == 'ARM64'"/>
+    <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_shim-i686-pc-windows-msvc.o" Condition="$(Platform) == 'Win32'"/>
+    <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_shim-x86_64-pc-windows-msvc.o" Condition="$(Platform) == 'x64'"/>
     <_CasesSources Include="$(PySourcePath)Python\bytecodes.c;$(PySourcePath)Python\optimizer_bytecodes.c;"/>
     <_CasesOutputs Include="$(PySourcePath)Python\generated_cases.c.h;$(PySourcePath)Include\opcode_ids.h;$(PySourcePath)Include\internal\pycore_uop_ids.h;$(PySourcePath)Python\opcode_targets.h;$(PySourcePath)Include\internal\pycore_opcode_metadata.h;$(PySourcePath)Include\internal\pycore_uop_metadata.h;$(PySourcePath)Python\optimizer_cases.c.h;$(PySourcePath)Lib\_opcode_metadata.py"/>
     <_SbomSources Include="$(PySourcePath)PCbuild\get_externals.bat" />
       <JITArgs Condition="$(Platform) == 'x64'">x86_64-pc-windows-msvc</JITArgs>
       <JITArgs Condition="$(Configuration) == 'Debug'">$(JITArgs) --debug</JITArgs>
     </PropertyGroup>
-    <Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\jit\build.py" $(JITArgs) --output-dir "$(GeneratedJitStencilsDir)" --pyconfig-dir "$(PySourcePath)PC" --llvm-version="$(LLVM_VERSION)" --llvm-tools-install-dir="$(LLVM_TOOLS_INSTALL_DIR)"'/>
+    <Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\jit\build.py" $(JITArgs) --output-dir "$(GeneratedJitStencilsDir.TrimEnd(`\`))" --pyconfig-dir "$(PySourcePath)PC" --llvm-version="$(LLVM_VERSION)" --llvm-tools-install-dir="$(LLVM_TOOLS_INSTALL_DIR)"'/>
   </Target>
   <Target Name="_CleanJIT" AfterTargets="Clean">
     <Delete Files="@(_JITOutputs)"/>
index 967d92f4ea68557b8e2411ab19d3453ddc12aed3..506ea591c385c0f9ebf863c7dac636229c820b53 100644 (file)
@@ -1305,7 +1305,7 @@ early_exit:
 }
 #ifdef _Py_TIER2
 #ifdef _Py_JIT
-_PyJitEntryFuncPtr _Py_jit_entry = _Py_LazyJitShim;
+_PyJitEntryFuncPtr _Py_jit_entry = _PyJIT;
 #else
 _PyJitEntryFuncPtr _Py_jit_entry = _PyTier2Interpreter;
 #endif
index af75acf1ff2bb342a8fe5689022ddb31e0631031..26e01b25d48c04a9165350273261eb71cc3652fa 100644 (file)
@@ -60,8 +60,6 @@ jit_error(const char *message)
     PyErr_Format(PyExc_RuntimeWarning, "JIT %s (%d)", message, hint);
 }
 
-static size_t _Py_jit_shim_size = 0;
-
 static int
 address_in_executor_array(_PyExecutorObject **ptrs, size_t count, uintptr_t addr)
 {
@@ -104,13 +102,6 @@ _PyJIT_AddressInJitCode(PyInterpreterState *interp, uintptr_t addr)
     if (interp == NULL) {
         return 0;
     }
-    if (_Py_jit_entry != _Py_LazyJitShim && _Py_jit_shim_size != 0) {
-        uintptr_t start = (uintptr_t)_Py_jit_entry;
-        uintptr_t end = start + _Py_jit_shim_size;
-        if (addr >= start && addr < end) {
-            return 1;
-        }
-    }
     if (address_in_executor_array(interp->executor_ptrs, interp->executor_count, addr)) {
         return 1;
     }
@@ -727,75 +718,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
     return 0;
 }
 
-/* One-off compilation of the jit entry shim
- * We compile this once only as it effectively a normal
- * function, but we need to use the JIT because it needs
- * to understand the jit-specific calling convention.
- * Don't forget to call _PyJIT_Fini later!
- */
-static _PyJitEntryFuncPtr
-compile_shim(void)
-{
-    _PyExecutorObject dummy;
-    const StencilGroup *group;
-    size_t code_size = 0;
-    size_t data_size = 0;
-    jit_state state = {0};
-    group = &shim;
-    code_size += group->code_size;
-    data_size += group->data_size;
-    combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
-    combine_symbol_mask(group->got_mask, state.got_symbols.mask);
-    // Round up to the nearest page:
-    size_t page_size = get_page_size();
-    assert((page_size & (page_size - 1)) == 0);
-    size_t code_padding = DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1));
-    size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size + state.got_symbols.size) & (page_size - 1));
-    size_t total_size = code_size + state.trampolines.size + code_padding + data_size + state.got_symbols.size + padding;
-    unsigned char *memory = jit_alloc(total_size);
-    if (memory == NULL) {
-        return NULL;
-    }
-    unsigned char *code = memory;
-    state.trampolines.mem = memory + code_size;
-    unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
-    state.got_symbols.mem = data + data_size;
-    // Compile the shim, which handles converting between the native
-    // calling convention and the calling convention used by jitted code
-    // (which may be different for efficiency reasons).
-    group = &shim;
-    group->emit(code, data, &dummy, NULL, &state);
-    code += group->code_size;
-    data += group->data_size;
-    assert(code == memory + code_size);
-    assert(data == memory + code_size + state.trampolines.size + code_padding + data_size);
-    if (mark_executable(memory, total_size)) {
-        jit_free(memory, total_size);
-        return NULL;
-    }
-    _Py_jit_shim_size = total_size;
-    return (_PyJitEntryFuncPtr)memory;
-}
-
-static PyMutex lazy_jit_mutex = { 0 };
-
-_Py_CODEUNIT *
-_Py_LazyJitShim(
-    _PyExecutorObject *executor, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
-) {
-    PyMutex_Lock(&lazy_jit_mutex);
-    if (_Py_jit_entry == _Py_LazyJitShim) {
-        _PyJitEntryFuncPtr shim = compile_shim();
-        if (shim == NULL) {
-            PyMutex_Unlock(&lazy_jit_mutex);
-            Py_FatalError("Cannot allocate core JIT code");
-        }
-        _Py_jit_entry = shim;
-    }
-    PyMutex_Unlock(&lazy_jit_mutex);
-    return _Py_jit_entry(executor, frame, stack_pointer, tstate);
-}
-
 // Free executor's memory allocated with _PyJIT_Compile
 void
 _PyJIT_Free(_PyExecutorObject *executor)
@@ -812,22 +734,4 @@ _PyJIT_Free(_PyExecutorObject *executor)
     }
 }
 
-// Free shim memory allocated with compile_shim
-void
-_PyJIT_Fini(void)
-{
-    PyMutex_Lock(&lazy_jit_mutex);
-    unsigned char *memory = (unsigned char *)_Py_jit_entry;
-    size_t size = _Py_jit_shim_size;
-    if (size) {
-        _Py_jit_entry = _Py_LazyJitShim;
-        _Py_jit_shim_size = 0;
-        if (jit_free(memory, size)) {
-            PyErr_FormatUnraisable("Exception ignored while "
-                                   "freeing JIT entry code");
-        }
-    }
-    PyMutex_Unlock(&lazy_jit_mutex);
-}
-
 #endif  // _Py_JIT
index 0232ed6c382c61db083f78c4a20d7b98ad8d8d45..0a88e32bb6b65e4ff8b0b043daafb398080c4111 100644 (file)
@@ -37,9 +37,6 @@
 #include "pycore_uniqueid.h"      // _PyObject_FinalizeUniqueIdPool()
 #include "pycore_warnings.h"      // _PyWarnings_InitState()
 #include "pycore_weakref.h"       // _PyWeakref_GET_REF()
-#ifdef _Py_JIT
-#include "pycore_jit.h"           // _PyJIT_Fini()
-#endif
 
 #if defined(PYMALLOC_USE_HUGEPAGES) && defined(MS_WINDOWS)
 #include <Windows.h>
@@ -2531,11 +2528,6 @@ _Py_Finalize(_PyRuntimeState *runtime)
 
     finalize_interp_clear(tstate);
 
-#ifdef _Py_JIT
-    /* Free JIT shim memory */
-    _PyJIT_Fini();
-#endif
-
 #ifdef Py_TRACE_REFS
     /* Display addresses (& refcnts) of all objects still alive.
      * An address can be used to find the repr of the object, printed
index d6a26f3339b863c91fd7b611e4d436837f8ad172..b7c838a1c156ae38c558208449415ba72faa4be5 100644 (file)
@@ -489,11 +489,6 @@ free_interpreter(PyInterpreterState *interp)
 static inline int check_interpreter_whence(long);
 #endif
 
-extern _Py_CODEUNIT *
-_Py_LazyJitShim(
-    struct _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
-);
-
 /* Get the interpreter state to a minimal consistent state.
    Further init happens in pylifecycle.c before it can be used.
    All fields not initialized here are expected to be zeroed out,
index f78e80db165fc8796c1bd20d04380e1cb525dbce..15cac3de3fe11fbdb3fd56e67ffe33e084610324 100644 (file)
@@ -57,6 +57,12 @@ class _Target(typing.Generic[_S, _R]):
     known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
     pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve()
 
+    def _compile_args(self) -> list[str]:
+        return list(self.args)
+
+    def _shim_compile_args(self) -> list[str]:
+        return []
+
     def _get_nop(self) -> bytes:
         if re.fullmatch(r"aarch64-.*", self.triple):
             nop = b"\x1f\x20\x03\xd5"
@@ -139,12 +145,8 @@ class _Target(typing.Generic[_S, _R]):
     ) -> _stencils.Hole:
         raise NotImplementedError(type(self))
 
-    async def _compile(
-        self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
-    ) -> _stencils.StencilGroup:
-        s = tempdir / f"{opname}.s"
-        o = tempdir / f"{opname}.o"
-        args_s = [
+    def _base_clang_args(self, opname: str, tempdir: pathlib.Path) -> list[str]:
+        return [
             f"--target={self.triple}",
             "-DPy_BUILD_CORE_MODULE",
             "-D_DEBUG" if self.debug else "-DNDEBUG",
@@ -167,29 +169,38 @@ class _Target(typing.Generic[_S, _R]):
             # generates better code than -O2 (and -O2 usually generates better
             # code than -O3). As a nice benefit, it uses less memory too:
             "-Os",
-            "-S",
             # Shorten full absolute file paths in the generated code (like the
             # __FILE__ macro and assert failure messages) for reproducibility:
             f"-ffile-prefix-map={CPYTHON}=.",
             f"-ffile-prefix-map={tempdir}=.",
-            # This debug info isn't necessary, and bloats out the JIT'ed code.
-            # We *may* be able to re-enable this, process it, and JIT it for a
-            # nicer debugging experience... but that needs a lot more research:
-            "-fno-asynchronous-unwind-tables",
             # Don't call built-in functions that we can't find or patch:
             "-fno-builtin",
             # Don't call stack-smashing canaries that we can't find or patch:
             "-fno-stack-protector",
             "-std=c11",
+        ]
+
+    async def _build_stencil_group(
+        self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
+    ) -> _stencils.StencilGroup:
+        s = tempdir / f"{opname}.s"
+        o = tempdir / f"{opname}.o"
+        args_s = self._base_clang_args(opname, tempdir)
+        args_s += [
+            "-S",
+            # Stencils do not need unwind info, and the optimizer does not
+            # preserve .cfi_* directives correctly. On Darwin,
+            # -fno-asynchronous-unwind-tables alone still leaves synchronous
+            # unwind directives in the assembly, so disable both forms here.
+            "-fno-unwind-tables",
+            "-fno-asynchronous-unwind-tables",
             "-o",
             f"{s}",
             f"{c}",
         ]
-        is_shim = opname == "shim"
         if self.frame_pointers:
-            frame_pointer = "all" if is_shim else "reserved"
-            args_s += ["-Xclang", f"-mframe-pointer={frame_pointer}"]
-        args_s += self.args
+            args_s += ["-Xclang", "-mframe-pointer=reserved"]
+        args_s += self._compile_args()
         # Allow user-provided CFLAGS to override any defaults
         args_s += shlex.split(self.cflags)
         await _llvm.run(
@@ -199,14 +210,13 @@ class _Target(typing.Generic[_S, _R]):
             llvm_version=self.llvm_version,
             llvm_tools_install_dir=self.llvm_tools_install_dir,
         )
-        if not is_shim:
-            self.optimizer(
-                s,
-                label_prefix=self.label_prefix,
-                symbol_prefix=self.symbol_prefix,
-                re_global=self.re_global,
-                frame_pointers=self.frame_pointers,
-            ).run()
+        self.optimizer(
+            s,
+            label_prefix=self.label_prefix,
+            symbol_prefix=self.symbol_prefix,
+            re_global=self.re_global,
+            frame_pointers=self.frame_pointers,
+        ).run()
         args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"]
         await _llvm.run(
             "clang",
@@ -217,6 +227,30 @@ class _Target(typing.Generic[_S, _R]):
         )
         return await self._parse(o)
 
+    async def _build_shim_object(self, output: pathlib.Path) -> None:
+        with tempfile.TemporaryDirectory() as tempdir:
+            work = pathlib.Path(tempdir).resolve()
+            args_o = self._base_clang_args("shim", work)
+            args_o += self._shim_compile_args()
+            args_o += [
+                "-c",
+                # The linked shim is a real function in the final binary, so
+                # keep unwind info for debuggers and stack walkers.
+                "-fasynchronous-unwind-tables",
+            ]
+            if self.frame_pointers:
+                args_o += ["-Xclang", "-mframe-pointer=all"]
+            args_o += self._compile_args()
+            args_o += shlex.split(self.cflags)
+            args_o += ["-o", f"{output}", f"{TOOLS_JIT / 'shim.c'}"]
+            await _llvm.run(
+                "clang",
+                args_o,
+                echo=self.verbose,
+                llvm_version=self.llvm_version,
+                llvm_tools_install_dir=self.llvm_tools_install_dir,
+            )
+
     async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
         generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
         cases_and_opnames = sorted(
@@ -231,8 +265,6 @@ class _Target(typing.Generic[_S, _R]):
         with tempfile.TemporaryDirectory() as tempdir:
             work = pathlib.Path(tempdir).resolve()
             async with asyncio.TaskGroup() as group:
-                coro = self._compile("shim", TOOLS_JIT / "shim.c", work)
-                tasks.append(group.create_task(coro, name="shim"))
                 template = TOOLS_JIT_TEMPLATE_C.read_text()
                 for case, opname in cases_and_opnames:
                     # Write out a copy of the template with *only* this case
@@ -242,7 +274,7 @@ class _Target(typing.Generic[_S, _R]):
                     # all of the other cases):
                     c = work / f"{opname}.c"
                     c.write_text(template.replace("CASE", case))
-                    coro = self._compile(opname, c, work)
+                    coro = self._build_stencil_group(opname, c, work)
                     tasks.append(group.create_task(coro, name=opname))
         stencil_groups = {task.get_name(): task.result() for task in tasks}
         for stencil_group in stencil_groups.values():
@@ -256,8 +288,9 @@ class _Target(typing.Generic[_S, _R]):
         comment: str = "",
         force: bool = False,
         jit_stencils: pathlib.Path,
+        jit_shim_object: pathlib.Path,
     ) -> None:
-        """Build jit_stencils.h in the given directory."""
+        """Build jit_stencils.h and the shim object in the given directory."""
         jit_stencils.parent.mkdir(parents=True, exist_ok=True)
         if not self.stable:
             warning = f"JIT support for {self.triple} is still experimental!"
@@ -271,8 +304,10 @@ class _Target(typing.Generic[_S, _R]):
             not force
             and jit_stencils.exists()
             and jit_stencils.read_text().startswith(digest)
+            and jit_shim_object.exists()
         ):
             return
+        ASYNCIO_RUNNER.run(self._build_shim_object(jit_shim_object))
         stencil_groups = ASYNCIO_RUNNER.run(self._build_stencils())
         jit_stencils_new = jit_stencils.parent / "jit_stencils.h.new"
         try:
@@ -296,6 +331,13 @@ class _Target(typing.Generic[_S, _R]):
 class _COFF(
     _Target[_schema.COFFSection, _schema.COFFRelocation]
 ):  # pylint: disable = too-few-public-methods
+    def _shim_compile_args(self) -> list[str]:
+        # The linked shim is part of pythoncore, not a shared extension.
+        # On Windows, Py_BUILD_CORE_MODULE makes public APIs import from
+        # pythonXY.lib, which creates a self-dependency when linking
+        # pythoncore.dll. Build the shim with builtin/core semantics.
+        return ["-UPy_BUILD_CORE_MODULE", "-DPy_BUILD_CORE_BUILTIN"]
+
     def _handle_section(
         self, section: _schema.COFFSection, group: _stencils.StencilGroup
     ) -> None:
@@ -396,6 +438,10 @@ class _COFF64(_COFF):
     symbol_prefix = ""
     re_global = re.compile(r'\s*\.def\s+(?P<label>[\w."$?@]+);')
 
+    def _compile_args(self) -> list[str]:
+        runtime = "-fms-runtime-lib=dll_dbg" if self.debug else "-fms-runtime-lib=dll"
+        return [runtime, *self.args]
+
 
 class _ELF(
     _Target[_schema.ELFSection, _schema.ELFRelocation]
@@ -607,9 +653,8 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
     elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
         host = "aarch64-pc-windows-msvc"
         condition = "defined(_M_ARM64)"
-        args = ["-fms-runtime-lib=dll"]
         optimizer = _optimizers.OptimizerAArch64
-        target = _COFF64(host, condition, args=args, optimizer=optimizer)
+        target = _COFF64(host, condition, optimizer=optimizer)
     elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
         host = "aarch64-unknown-linux-gnu"
         condition = "defined(__aarch64__) && defined(__linux__)"
@@ -636,9 +681,8 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
     elif re.fullmatch(r"x86_64-pc-windows-msvc", host):
         host = "x86_64-pc-windows-msvc"
         condition = "defined(_M_X64)"
-        args = ["-fms-runtime-lib=dll"]
         optimizer = _optimizers.OptimizerX86
-        target = _COFF64(host, condition, args=args, optimizer=optimizer)
+        target = _COFF64(host, condition, optimizer=optimizer)
     elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
         host = "x86_64-unknown-linux-gnu"
         condition = "defined(__x86_64__) && defined(__linux__)"
index 20209450d0d8d0b751842f23fade070d053da9ac..a0b6cf9b3fa865a45c9c933e5f3821a1e1114069 100644 (file)
@@ -22,12 +22,8 @@ def _dump_footer(
     yield "    symbol_mask got_mask;"
     yield "} StencilGroup;"
     yield ""
-    yield f"static const StencilGroup shim = {groups['shim'].as_c('shim')};"
-    yield ""
     yield "static const StencilGroup stencil_groups[MAX_UOP_REGS_ID + 1] = {"
     for opname, group in sorted(groups.items()):
-        if opname == "shim":
-            continue
         yield f"    [{opname}] = {group.as_c(opname)},"
     yield "};"
     yield ""
index 5e1b05a3d86cb49b1e552c2032df752558180c35..60fa4e58e97af8be9422da1a945610bc3bb7d346 100644 (file)
@@ -61,6 +61,7 @@ if __name__ == "__main__":
             comment=comment,
             force=args.force,
             jit_stencils=args.output_dir / f"jit_stencils-{target.triple}.h",
+            jit_shim_object=args.output_dir / f"jit_shim-{target.triple}.o",
         )
     jit_stencils_h = args.output_dir / "jit_stencils.h"
     lines = [f"// {comment}\n"]
index 8ec4885a48354fd6ca10382d797d46aaed4c1e7c..f143e1dc1003b336209d03ce120bffb9cacb71a6 100644 (file)
@@ -7,7 +7,7 @@
 #include "jit.h"
 
 _Py_CODEUNIT *
-_JIT_ENTRY(
+_PyJIT(
     _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
 ) {
     // Note that this is *not* a tail call
index 49319bc2aa44599ac7209f631d693817e2b2e804..6cd7a1900463ee5bc4d25d9e34be23c51bf6a0df 100755 (executable)
--- a/configure
+++ b/configure
@@ -644,6 +644,8 @@ ac_includes_default="\
 ac_header_c_list=
 ac_subst_vars='LTLIBOBJS
 MODULE_BLOCK
+JIT_SHIM_BUILD_O
+JIT_SHIM_O
 JIT_STENCILS_H
 MODULE_XXLIMITED_35_FALSE
 MODULE_XXLIMITED_35_TRUE
@@ -34703,38 +34705,56 @@ printf "%s\n" "$py_cv_module_xxlimited_35" >&6; }
 
 # Determine JIT stencils header files based on target platform
 JIT_STENCILS_H=""
-if test "x$enable_experimental_jit" = xno
+JIT_SHIM_O=""
+JIT_SHIM_BUILD_O=""
+if ${jit_flags:+false} :
 then :
 
 else case e in #(
-  e) case "$host" in
-    aarch64-apple-darwin*)
-      JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h"
-      ;;
-    x86_64-apple-darwin*)
-      JIT_STENCILS_H="jit_stencils-x86_64-apple-darwin.h"
-      ;;
-    aarch64-pc-windows-msvc)
-      JIT_STENCILS_H="jit_stencils-aarch64-pc-windows-msvc.h"
-      ;;
-    i686-pc-windows-msvc)
-      JIT_STENCILS_H="jit_stencils-i686-pc-windows-msvc.h"
-      ;;
-    x86_64-pc-windows-msvc)
-      JIT_STENCILS_H="jit_stencils-x86_64-pc-windows-msvc.h"
-      ;;
-    aarch64-*-linux-gnu)
-      JIT_STENCILS_H="jit_stencils-aarch64-unknown-linux-gnu.h"
-      ;;
-    x86_64-*-linux-gnu)
-      JIT_STENCILS_H="jit_stencils-x86_64-unknown-linux-gnu.h"
-      ;;
-  esac ;;
+  e) if test "${enable_universalsdk}" && test "$UNIVERSAL_ARCHS" = "universal2"; then
+     JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h jit_stencils-x86_64-apple-darwin.h"
+     JIT_SHIM_O="jit_shim-universal2-apple-darwin.o"
+     JIT_SHIM_BUILD_O="jit_shim-aarch64-apple-darwin.o jit_shim-x86_64-apple-darwin.o"
+   else
+     case "$host" in
+       aarch64-apple-darwin*)
+         JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h"
+         JIT_SHIM_O="jit_shim-aarch64-apple-darwin.o"
+         ;;
+       x86_64-apple-darwin*)
+         JIT_STENCILS_H="jit_stencils-x86_64-apple-darwin.h"
+         JIT_SHIM_O="jit_shim-x86_64-apple-darwin.o"
+         ;;
+       aarch64-pc-windows-msvc)
+         JIT_STENCILS_H="jit_stencils-aarch64-pc-windows-msvc.h"
+         JIT_SHIM_O="jit_shim-aarch64-pc-windows-msvc.o"
+         ;;
+       i686-pc-windows-msvc)
+         JIT_STENCILS_H="jit_stencils-i686-pc-windows-msvc.h"
+         JIT_SHIM_O="jit_shim-i686-pc-windows-msvc.o"
+         ;;
+       x86_64-pc-windows-msvc)
+         JIT_STENCILS_H="jit_stencils-x86_64-pc-windows-msvc.h"
+         JIT_SHIM_O="jit_shim-x86_64-pc-windows-msvc.o"
+         ;;
+       aarch64-*-linux-gnu)
+         JIT_STENCILS_H="jit_stencils-aarch64-unknown-linux-gnu.h"
+         JIT_SHIM_O="jit_shim-aarch64-unknown-linux-gnu.o"
+         ;;
+       x86_64-*-linux-gnu)
+         JIT_STENCILS_H="jit_stencils-x86_64-unknown-linux-gnu.h"
+         JIT_SHIM_O="jit_shim-x86_64-unknown-linux-gnu.o"
+         ;;
+     esac
+     JIT_SHIM_BUILD_O="$JIT_SHIM_O"
+   fi ;;
 esac
 fi
 
 
 
+
+
 # substitute multiline block, must come after last PY_STDLIB_MOD()
 
 
index 7b6f3c5e0ed5be8d7d75997eb88c5b10c6fe50fe..60511db39fad1e60167b28422dd551f86b6fffa9 100644 (file)
@@ -8384,33 +8384,52 @@ PY_STDLIB_MOD([xxlimited_35], [test "$TEST_MODULES" = yes], [test "$ac_cv_func_d
 
 # Determine JIT stencils header files based on target platform
 JIT_STENCILS_H=""
-AS_VAR_IF([enable_experimental_jit], [no],
+JIT_SHIM_O=""
+JIT_SHIM_BUILD_O=""
+AS_VAR_IF([jit_flags],
   [],
-  [case "$host" in
-    aarch64-apple-darwin*)
-      JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h"
-      ;;
-    x86_64-apple-darwin*)
-      JIT_STENCILS_H="jit_stencils-x86_64-apple-darwin.h"
-      ;;
-    aarch64-pc-windows-msvc)
-      JIT_STENCILS_H="jit_stencils-aarch64-pc-windows-msvc.h"
-      ;;
-    i686-pc-windows-msvc)
-      JIT_STENCILS_H="jit_stencils-i686-pc-windows-msvc.h"
-      ;;
-    x86_64-pc-windows-msvc)
-      JIT_STENCILS_H="jit_stencils-x86_64-pc-windows-msvc.h"
-      ;;
-    aarch64-*-linux-gnu)
-      JIT_STENCILS_H="jit_stencils-aarch64-unknown-linux-gnu.h"
-      ;;
-    x86_64-*-linux-gnu)
-      JIT_STENCILS_H="jit_stencils-x86_64-unknown-linux-gnu.h"
-      ;;
-  esac])
+  [],
+  [if test "${enable_universalsdk}" && test "$UNIVERSAL_ARCHS" = "universal2"; then
+     JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h jit_stencils-x86_64-apple-darwin.h"
+     JIT_SHIM_O="jit_shim-universal2-apple-darwin.o"
+     JIT_SHIM_BUILD_O="jit_shim-aarch64-apple-darwin.o jit_shim-x86_64-apple-darwin.o"
+   else
+     case "$host" in
+       aarch64-apple-darwin*)
+         JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h"
+         JIT_SHIM_O="jit_shim-aarch64-apple-darwin.o"
+         ;;
+       x86_64-apple-darwin*)
+         JIT_STENCILS_H="jit_stencils-x86_64-apple-darwin.h"
+         JIT_SHIM_O="jit_shim-x86_64-apple-darwin.o"
+         ;;
+       aarch64-pc-windows-msvc)
+         JIT_STENCILS_H="jit_stencils-aarch64-pc-windows-msvc.h"
+         JIT_SHIM_O="jit_shim-aarch64-pc-windows-msvc.o"
+         ;;
+       i686-pc-windows-msvc)
+         JIT_STENCILS_H="jit_stencils-i686-pc-windows-msvc.h"
+         JIT_SHIM_O="jit_shim-i686-pc-windows-msvc.o"
+         ;;
+       x86_64-pc-windows-msvc)
+         JIT_STENCILS_H="jit_stencils-x86_64-pc-windows-msvc.h"
+         JIT_SHIM_O="jit_shim-x86_64-pc-windows-msvc.o"
+         ;;
+       aarch64-*-linux-gnu)
+         JIT_STENCILS_H="jit_stencils-aarch64-unknown-linux-gnu.h"
+         JIT_SHIM_O="jit_shim-aarch64-unknown-linux-gnu.o"
+         ;;
+       x86_64-*-linux-gnu)
+         JIT_STENCILS_H="jit_stencils-x86_64-unknown-linux-gnu.h"
+         JIT_SHIM_O="jit_shim-x86_64-unknown-linux-gnu.o"
+         ;;
+     esac
+     JIT_SHIM_BUILD_O="$JIT_SHIM_O"
+   fi])
 
 AC_SUBST([JIT_STENCILS_H])
+AC_SUBST([JIT_SHIM_O])
+AC_SUBST([JIT_SHIM_BUILD_O])
 
 # substitute multiline block, must come after last PY_STDLIB_MOD()
 AC_SUBST([MODULE_BLOCK])