From 96e4cd698a3000382f1796366e9c963902381382 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 30 Jan 2026 18:18:56 +0000 Subject: [PATCH] gh-144319: Fix huge page safety in pymalloc arenas (#144331) The pymalloc huge page support had two problems. First, on architectures where the default huge page size exceeds the arena size (e.g. 32 MiB on PPC, 512 MiB on ARM64 with 64 KB base pages), mmap with MAP_HUGETLB silently allocates a full huge page even when the requested size is smaller. The subsequent munmap with the original arena size then fails with EINVAL, permanently leaking the entire huge page. Second, huge pages were always attempted when compiled in, with no way to disable them at runtime. On Linux, if the huge page pool is exhausted, page faults including copy-on-write faults after fork deliver SIGBUS and kill the process. The arena allocator now queries the system huge page size from /proc/meminfo and skips MAP_HUGETLB when the arena size is not a multiple of it. Huge pages also now require explicit opt-in at runtime via the PYTHON_PYMALLOC_HUGEPAGES environment variable, which is read through PyConfig and respects -E and -I flags. The config field pymalloc_hugepages is propagated to the runtime allocators struct so the low-level arena allocator can check it without calling getenv directly. --- Doc/using/cmdline.rst | 21 +++++++ Doc/using/configure.rst | 6 ++ Doc/whatsnew/3.15.rst | 2 + Include/cpython/initconfig.h | 1 + Include/internal/pycore_runtime_structs.h | 1 + Lib/test/test_capi/test_config.py | 1 + Lib/test/test_embed.py | 4 ++ Objects/obmalloc.c | 75 +++++++++++++++++++---- Programs/_testembed.c | 2 + Python/initconfig.c | 18 ++++++ 10 files changed, 120 insertions(+), 11 deletions(-) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index aff165191b76..c97058119ae8 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -1087,6 +1087,27 @@ conflict. It now has no effect if set to an empty string. +.. envvar:: PYTHON_PYMALLOC_HUGEPAGES + + If set to a non-zero integer, enable huge page support for + :ref:`pymalloc ` arenas. Set to ``0`` or unset to disable. + Python must be compiled with :option:`--with-pymalloc-hugepages` for this + variable to have any effect. + + When enabled, arena allocation uses ``MAP_HUGETLB`` (Linux) or + ``MEM_LARGE_PAGES`` (Windows) with automatic fallback to regular pages if + huge pages are not available. + + .. warning:: + + On Linux, if the huge-page pool is exhausted, page faults — including + copy-on-write faults triggered by :func:`os.fork` — deliver ``SIGBUS`` + and kill the process. Only enable this in environments where the + huge-page pool is properly sized and fork-safety is not a concern. + + .. versionadded:: next + + .. envvar:: PYTHONLEGACYWINDOWSFSENCODING If set to a non-empty string, the default :term:`filesystem encoding and diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index c455272af727..26322045879c 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -790,6 +790,12 @@ also be used to improve performance. 2 MiB and arena allocation uses ``MAP_HUGETLB`` (Linux) or ``MEM_LARGE_PAGES`` (Windows) with automatic fallback to regular pages. + Even when compiled with this option, huge pages are **not** used at runtime + unless the :envvar:`PYTHON_PYMALLOC_HUGEPAGES` environment variable is set + to ``1``. This opt-in is required because huge pages carry risks on Linux: + if the huge-page pool is exhausted, page faults (including copy-on-write + faults after :func:`os.fork`) deliver ``SIGBUS`` and kill the process. + The configure script checks that the platform supports ``MAP_HUGETLB`` and emits a warning if it is not available. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 68c491f8a8cb..637dd0cca24b 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1482,6 +1482,8 @@ Build changes increases to 2 MiB and allocation uses ``MAP_HUGETLB`` (Linux) or ``MEM_LARGE_PAGES`` (Windows) with automatic fallback to regular pages. On Windows, use ``build.bat --pymalloc-hugepages``. + At runtime, huge pages must be explicitly enabled by setting the + :envvar:`PYTHON_PYMALLOC_HUGEPAGES` environment variable to ``1``. * Annotating anonymous mmap usage is now supported if Linux kernel supports :manpage:`PR_SET_VMA_ANON_NAME ` (Linux 5.17 or newer). diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 1c979d91a408..5606ebeb7c95 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -149,6 +149,7 @@ typedef struct PyConfig { int dump_refs; wchar_t *dump_refs_file; int malloc_stats; + int pymalloc_hugepages; wchar_t *filesystem_encoding; wchar_t *filesystem_errors; wchar_t *pycache_prefix; diff --git a/Include/internal/pycore_runtime_structs.h b/Include/internal/pycore_runtime_structs.h index 92387031ad74..f48d203dda00 100644 --- a/Include/internal/pycore_runtime_structs.h +++ b/Include/internal/pycore_runtime_structs.h @@ -31,6 +31,7 @@ struct _pymem_allocators { debug_alloc_api_t obj; } debug; int is_debug_enabled; + int use_hugepages; PyObjectArenaAllocator obj_arena; }; diff --git a/Lib/test/test_capi/test_config.py b/Lib/test/test_capi/test_config.py index 04a27de8d849..b04d0923926d 100644 --- a/Lib/test/test_capi/test_config.py +++ b/Lib/test/test_capi/test_config.py @@ -63,6 +63,7 @@ class CAPITests(unittest.TestCase): ("interactive", bool, None), ("isolated", bool, None), ("malloc_stats", bool, None), + ("pymalloc_hugepages", bool, None), ("module_search_paths", list[str], "path"), ("optimization_level", int, None), ("orig_argv", list[str], "orig_argv"), diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index b53679412278..29b1249b10df 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -642,6 +642,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'dump_refs': False, 'dump_refs_file': None, 'malloc_stats': False, + 'pymalloc_hugepages': False, 'filesystem_encoding': GET_DEFAULT_CONFIG, 'filesystem_errors': GET_DEFAULT_CONFIG, @@ -1044,6 +1045,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'code_debug_ranges': False, 'show_ref_count': True, 'malloc_stats': True, + 'pymalloc_hugepages': True, 'stdio_encoding': 'iso8859-1', 'stdio_errors': 'replace', @@ -1109,6 +1111,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'import_time': 1, 'code_debug_ranges': False, 'malloc_stats': True, + 'pymalloc_hugepages': True, 'inspect': True, 'optimization_level': 2, 'pythonpath_env': '/my/path', @@ -1145,6 +1148,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'import_time': 1, 'code_debug_ranges': False, 'malloc_stats': True, + 'pymalloc_hugepages': True, 'inspect': True, 'optimization_level': 2, 'pythonpath_env': '/my/path', diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 71dc4bf0d046..ce2e39790bd7 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -13,6 +13,7 @@ #include // malloc() #include +#include // fopen(), fgets(), sscanf() #ifdef WITH_MIMALLOC // Forward declarations of functions used in our mimalloc modifications static void _PyMem_mi_page_clear_qsbr(mi_page_t *page); @@ -492,16 +493,57 @@ _PyMem_DefaultRawWcsdup(const wchar_t *str) # endif #endif +/* Return the system's default huge page size in bytes, or 0 if it + * cannot be determined. The result is cached after the first call. + * + * This is Linux-only (/proc/meminfo). On other systems that define + * MAP_HUGETLB the caller should skip huge pages gracefully. */ +#if defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB) +static size_t +_pymalloc_system_hugepage_size(void) +{ + static size_t hp_size = 0; + static int initialized = 0; + + if (initialized) { + return hp_size; + } + +#ifdef __linux__ + FILE *f = fopen("/proc/meminfo", "r"); + if (f != NULL) { + char line[256]; + while (fgets(line, sizeof(line), f)) { + unsigned long size_kb; + if (sscanf(line, "Hugepagesize: %lu kB", &size_kb) == 1) { + hp_size = (size_t)size_kb * 1024; + break; + } + } + fclose(f); + } +#endif + + initialized = 1; + return hp_size; +} +#endif + void * _PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size) { #ifdef MS_WINDOWS # ifdef PYMALLOC_USE_HUGEPAGES - void *ptr = VirtualAlloc(NULL, size, - MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, - PAGE_READWRITE); - if (ptr != NULL) - return ptr; + if (_PyRuntime.allocators.use_hugepages) { + SIZE_T lp_size = GetLargePageMinimum(); + if (lp_size > 0 && size % lp_size == 0) { + void *ptr = VirtualAlloc(NULL, size, + MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, + PAGE_READWRITE); + if (ptr != NULL) + return ptr; + } + } /* Fall back to regular pages */ # endif return VirtualAlloc(NULL, size, @@ -510,12 +552,23 @@ _PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size) void *ptr; # ifdef PYMALLOC_USE_HUGEPAGES # ifdef MAP_HUGETLB - ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0); - if (ptr != MAP_FAILED) { - assert(ptr != NULL); - (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage"); - return ptr; + if (_PyRuntime.allocators.use_hugepages) { + size_t hp_size = _pymalloc_system_hugepage_size(); + /* Only use huge pages if the arena size is a multiple of the + * system's default huge page size. When the arena is smaller + * than the huge page, mmap still succeeds but silently + * allocates an entire huge page; the subsequent munmap with + * the smaller arena size then fails with EINVAL, leaking + * all of that memory. */ + if (hp_size > 0 && size % hp_size == 0) { + ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0); + if (ptr != MAP_FAILED) { + assert(ptr != NULL); + (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage"); + return ptr; + } + } } /* Fall back to regular pages */ # endif diff --git a/Programs/_testembed.c b/Programs/_testembed.c index c5e764e426b5..38f546b976ca 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -639,6 +639,7 @@ static int test_init_from_config(void) putenv("PYTHONMALLOCSTATS=0"); config.malloc_stats = 1; + config.pymalloc_hugepages = 1; putenv("PYTHONPYCACHEPREFIX=env_pycache_prefix"); config_set_string(&config, &config.pycache_prefix, L"conf_pycache_prefix"); @@ -795,6 +796,7 @@ static void set_most_env_vars(void) putenv("PYTHONPROFILEIMPORTTIME=1"); putenv("PYTHONNODEBUGRANGES=1"); putenv("PYTHONMALLOCSTATS=1"); + putenv("PYTHON_PYMALLOC_HUGEPAGES=1"); putenv("PYTHONUTF8=1"); putenv("PYTHONVERBOSE=1"); putenv("PYTHONINSPECT=1"); diff --git a/Python/initconfig.c b/Python/initconfig.c index 9cdc10c4e780..46fd8929041f 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -160,6 +160,7 @@ static const PyConfigSpec PYCONFIG_SPEC[] = { SPEC(legacy_windows_stdio, BOOL, READ_ONLY, NO_SYS), #endif SPEC(malloc_stats, BOOL, READ_ONLY, NO_SYS), + SPEC(pymalloc_hugepages, BOOL, READ_ONLY, NO_SYS), SPEC(orig_argv, WSTR_LIST, READ_ONLY, SYS_ATTR("orig_argv")), SPEC(parse_argv, BOOL, READ_ONLY, NO_SYS), SPEC(pathconfig_warnings, BOOL, READ_ONLY, NO_SYS), @@ -900,6 +901,7 @@ config_check_consistency(const PyConfig *config) assert(config->show_ref_count >= 0); assert(config->dump_refs >= 0); assert(config->malloc_stats >= 0); + assert(config->pymalloc_hugepages >= 0); assert(config->site_import >= 0); assert(config->bytes_warning >= 0); assert(config->warn_default_encoding >= 0); @@ -1879,6 +1881,18 @@ config_read_env_vars(PyConfig *config) if (config_get_env(config, "PYTHONMALLOCSTATS")) { config->malloc_stats = 1; } + { + const char *env = _Py_GetEnv(use_env, "PYTHON_PYMALLOC_HUGEPAGES"); + if (env) { + int value; + if (_Py_str_to_int(env, &value) < 0 || value < 0) { + /* PYTHON_PYMALLOC_HUGEPAGES=text or negative + behaves as PYTHON_PYMALLOC_HUGEPAGES=1 */ + value = 1; + } + config->pymalloc_hugepages = (value > 0); + } + } if (config->dump_refs_file == NULL) { status = CONFIG_GET_ENV_DUP(config, &config->dump_refs_file, @@ -2812,6 +2826,10 @@ _PyConfig_Write(const PyConfig *config, _PyRuntimeState *runtime) return _PyStatus_NO_MEMORY(); } +#ifdef PYMALLOC_USE_HUGEPAGES + runtime->allocators.use_hugepages = config->pymalloc_hugepages; +#endif + return _PyStatus_OK(); } -- 2.47.3