Python has a *pymalloc* allocator optimized for small objects (smaller or equal
to 512 bytes) with a short lifetime. It uses memory mappings called "arenas"
with a fixed size of either 256 KiB on 32-bit platforms or 1 MiB on 64-bit
-platforms. It falls back to :c:func:`PyMem_RawMalloc` and
+platforms. When Python is configured with :option:`--with-pymalloc-hugepages`,
+the arena size on 64-bit platforms is increased to 2 MiB to match the huge page
+size, and arena allocation will attempt to use huge pages (``MAP_HUGETLB`` on
+Linux, ``MEM_LARGE_PAGES`` on Windows) with automatic fallback to regular pages.
+It falls back to :c:func:`PyMem_RawMalloc` and
:c:func:`PyMem_RawRealloc` for allocations larger than 512 bytes.
*pymalloc* is the :ref:`default allocator <default-memory-allocators>` of the
See also :envvar:`PYTHONMALLOC` environment variable.
+.. option:: --with-pymalloc-hugepages
+
+ Enable huge page support for :ref:`pymalloc <pymalloc>` arenas (disabled by
+ default). When enabled, the arena size on 64-bit platforms is increased to
+ 2 MiB and arena allocation uses ``MAP_HUGETLB`` (Linux) or
+ ``MEM_LARGE_PAGES`` (Windows) with automatic fallback to regular pages.
+
+ The configure script checks that the platform supports ``MAP_HUGETLB``
+ and emits a warning if it is not available.
+
+ On Windows, use the ``--pymalloc-hugepages`` flag with ``build.bat`` or
+ set the ``UsePymallocHugepages`` MSBuild property.
+
+ .. versionadded:: 3.15
+
.. option:: --without-doc-strings
Disable static documentation strings to reduce the memory footprint (enabled
modules that are missing or packaged separately.
(Contributed by Stan Ulbrych and Petr Viktorin in :gh:`139707`.)
+* The new configure option :option:`--with-pymalloc-hugepages` enables huge
+ page support for :ref:`pymalloc <pymalloc>` arenas. When enabled, arena size
+ increases to 2 MiB and allocation uses ``MAP_HUGETLB`` (Linux) or
+ ``MEM_LARGE_PAGES`` (Windows) with automatic fallback to regular pages.
+ On Windows, use ``build.bat --pymalloc-hugepages``.
+
* Annotating anonymous mmap usage is now supported if Linux kernel supports
:manpage:`PR_SET_VMA_ANON_NAME <PR_SET_VMA(2const)>` (Linux 5.17 or newer).
Annotations are visible in ``/proc/<pid>/maps`` if the kernel supports the feature
* mappings to reduce heap fragmentation.
*/
#ifdef USE_LARGE_ARENAS
-#define ARENA_BITS 20 /* 1 MiB */
+# ifdef PYMALLOC_USE_HUGEPAGES
+# define ARENA_BITS 21 /* 2 MiB */
+# else
+# define ARENA_BITS 20 /* 1 MiB */
+# endif
#else
#define ARENA_BITS 18 /* 256 KiB */
#endif
*/
/* How many arena_objects do we initially allocate?
- * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
+ * 16 = can allocate 16 arenas = 16 * ARENA_SIZE before growing the
* `arenas` vector.
*/
#define INITIAL_ARENA_OBJECTS 16
memory address bit allocation for keys
- 64-bit pointers, IGNORE_BITS=0 and 2^20 arena size:
+ ARENA_BITS is configurable: 20 (1 MiB) by default on 64-bit, or
+ 21 (2 MiB) when PYMALLOC_USE_HUGEPAGES is enabled. All bit widths
+ below are derived from ARENA_BITS automatically.
+
+ 64-bit pointers, IGNORE_BITS=0 and 2^20 arena size (default):
15 -> MAP_TOP_BITS
15 -> MAP_MID_BITS
14 -> MAP_BOT_BITS
----
64
+ 64-bit pointers, IGNORE_BITS=0 and 2^21 arena size (hugepages):
+ 15 -> MAP_TOP_BITS
+ 15 -> MAP_MID_BITS
+ 13 -> MAP_BOT_BITS
+ 21 -> ideal aligned arena
+ ----
+ 64
+
64-bit pointers, IGNORE_BITS=16, and 2^20 arena size:
16 -> IGNORE_BITS
10 -> MAP_TOP_BITS
--- /dev/null
+Add huge pages support for the pymalloc allocator. Patch by Pablo Galindo
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
{
#ifdef MS_WINDOWS
+# ifdef PYMALLOC_USE_HUGEPAGES
+ void *ptr = VirtualAlloc(NULL, size,
+ MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES,
+ PAGE_READWRITE);
+ if (ptr != NULL)
+ return ptr;
+ /* Fall back to regular pages */
+# endif
return VirtualAlloc(NULL, size,
MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
#elif defined(ARENAS_USE_MMAP)
void *ptr;
+# ifdef PYMALLOC_USE_HUGEPAGES
+# ifdef MAP_HUGETLB
+ ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);
+ if (ptr != MAP_FAILED) {
+ assert(ptr != NULL);
+ (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage");
+ return ptr;
+ }
+ /* Fall back to regular pages */
+# endif
+# endif
ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED)
echo. --pystats Enable PyStats collection.
echo. --tail-call-interp Enable tail-calling interpreter (requires LLVM 19 or higher).
echo. --enable-stackref-debug Enable stackref debugging mode.
+echo. --pymalloc-hugepages Enable huge page support for pymalloc arenas.
echo.
echo.Available flags to avoid building certain modules.
echo.These flags have no effect if '-e' is not given:
if "%~1"=="--pystats" (set PyStats=1) & shift & goto CheckOpts
if "%~1"=="--tail-call-interp" (set UseTailCallInterp=true) & shift & goto CheckOpts
if "%~1"=="--enable-stackref-debug" (set StackRefDebug=true) & shift & goto CheckOpts
+if "%~1"=="--pymalloc-hugepages" (set UsePymallocHugepages=true) & shift & goto CheckOpts
rem These use the actual property names used by MSBuild. We could just let
rem them in through the environment, but we specify them on the command line
rem anyway for visibility so set defaults after this
/p:UseTailCallInterp=%UseTailCallInterp%^
/p:DisableRemoteDebug=%DisableRemoteDebug%^
/p:StackRefDebug=%StackRefDebug%^
+ /p:UsePymallocHugepages=%UsePymallocHugepages%^
%1 %2 %3 %4 %5 %6 %7 %8 %9
@echo off
<_PlatformPreprocessorDefinition Condition="$(Platform) == 'x64' and $(PlatformToolset) != 'ClangCL'">_M_X64;$(_PlatformPreprocessorDefinition)</_PlatformPreprocessorDefinition>
<_Py3NamePreprocessorDefinition>PY3_DLLNAME=L"$(Py3DllName)$(PyDebugExt)";</_Py3NamePreprocessorDefinition>
<_FreeThreadedPreprocessorDefinition Condition="$(DisableGil) == 'true'">Py_GIL_DISABLED=1;</_FreeThreadedPreprocessorDefinition>
+ <_PymallocHugepagesPreprocessorDefinition Condition="$(UsePymallocHugepages) == 'true'">PYMALLOC_USE_HUGEPAGES=1;</_PymallocHugepagesPreprocessorDefinition>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(PySourcePath)Include;$(PySourcePath)Include\internal;$(PySourcePath)Include\internal\mimalloc;$(PySourcePath)PC;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;$(_Py3NamePreprocessorDefinition)$(_PlatformPreprocessorDefinition)$(_DebugPreprocessorDefinition)$(_PyStatsPreprocessorDefinition)$(_PydPreprocessorDefinition)$(_FreeThreadedPreprocessorDefinition)%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;$(_Py3NamePreprocessorDefinition)$(_PlatformPreprocessorDefinition)$(_DebugPreprocessorDefinition)$(_PyStatsPreprocessorDefinition)$(_PydPreprocessorDefinition)$(_FreeThreadedPreprocessorDefinition)$(_PymallocHugepagesPreprocessorDefinition)%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(SupportPGO)' and ($(Configuration) == 'PGInstrument' or $(Configuration) == 'PGUpdate')">_Py_USING_PGO=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Optimization>MaxSpeed</Optimization>
* WITH_COMPUTED_GOTOS: build the interpreter using "computed gotos".
Currently only supported by clang-cl.
+* UsePymallocHugepages: enable huge page support for pymalloc arenas.
+ When enabled, the arena size on 64-bit platforms is increased to 2 MiB
+ and arena allocation uses MEM_LARGE_PAGES with automatic fallback to
+ regular pages. Can also be enabled via `--pymalloc-hugepages` flag.
+
Static library
--------------
with_doc_strings
with_mimalloc
with_pymalloc
+with_pymalloc_hugepages
with_c_locale_coercion
with_valgrind
with_dtrace
--with-mimalloc build with mimalloc memory allocator (default is yes
if C11 stdatomic.h is available.)
--with-pymalloc enable specialized mallocs (default is yes)
+ --with-pymalloc-hugepages
+ enable huge page support for pymalloc arenas
+ (default is no)
--with-c-locale-coercion
enable C locale coercion to a UTF-8 based locale
(default is yes)
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_pymalloc" >&5
printf "%s\n" "$with_pymalloc" >&6; }
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --with-pymalloc-hugepages" >&5
+printf %s "checking for --with-pymalloc-hugepages... " >&6; }
+
+# Check whether --with-pymalloc-hugepages was given.
+if test ${with_pymalloc_hugepages+y}
+then :
+ withval=$with_pymalloc_hugepages;
+fi
+
+if test "$with_pymalloc_hugepages" = "yes"
+then
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <sys/mman.h>
+
+int
+main (void)
+{
+
+int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
+(void)flags;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+
+printf "%s\n" "#define PYMALLOC_USE_HUGEPAGES 1" >>confdefs.h
+
+else case e in #(
+ e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: --with-pymalloc-hugepages requested but MAP_HUGETLB not found" >&5
+printf "%s\n" "$as_me: WARNING: --with-pymalloc-hugepages requested but MAP_HUGETLB not found" >&2;}
+ with_pymalloc_hugepages=no ;;
+esac
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${with_pymalloc_hugepages:-no}" >&5
+printf "%s\n" "${with_pymalloc_hugepages:-no}" >&6; }
+
# Check for --with-c-locale-coercion
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --with-c-locale-coercion" >&5
printf %s "checking for --with-c-locale-coercion... " >&6; }
fi
AC_MSG_RESULT([$with_pymalloc])
+AC_MSG_CHECKING([for --with-pymalloc-hugepages])
+AC_ARG_WITH(
+ [pymalloc-hugepages],
+ [AS_HELP_STRING([--with-pymalloc-hugepages],
+ [enable huge page support for pymalloc arenas (default is no)])])
+if test "$with_pymalloc_hugepages" = "yes"
+then
+ dnl configure only runs on Unix-like systems; Windows uses MEM_LARGE_PAGES
+ dnl via VirtualAlloc but does not use configure. Only check MAP_HUGETLB here.
+ AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM([[
+#include <sys/mman.h>
+ ]], [[
+int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
+(void)flags;
+ ]])],
+ [AC_DEFINE([PYMALLOC_USE_HUGEPAGES], [1],
+ [Define to use huge pages for pymalloc arenas])],
+ [AC_MSG_WARN([--with-pymalloc-hugepages requested but MAP_HUGETLB not found])
+ with_pymalloc_hugepages=no])
+fi
+AC_MSG_RESULT([${with_pymalloc_hugepages:-no}])
+
# Check for --with-c-locale-coercion
AC_MSG_CHECKING([for --with-c-locale-coercion])
AC_ARG_WITH(
/* Define as the preferred size in bits of long digits */
#undef PYLONG_BITS_IN_DIGIT
+/* Define to use huge pages for pymalloc arenas */
+#undef PYMALLOC_USE_HUGEPAGES
+
/* enabled builtin hash modules */
#undef PY_BUILTIN_HASHLIB_HASHES