]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: introduce MemoryTHP= unit file setting
authorUsama Arif <usamaarif642@gmail.com>
Mon, 15 Sep 2025 12:33:28 +0000 (13:33 +0100)
committerUsama Arif <usamaarif642@gmail.com>
Tue, 6 Jan 2026 11:26:14 +0000 (03:26 -0800)
Transparent Hugepages (THP) is a Linux kernel feature that manages
memory using larger pages (2MB on x86, compared to the default 4KB).
The main goal is to improve memory management efficiency and system
performance, especially for memory-intensive applications.
However, it can cause drawbacks in some scenarios, such as memory
regression and latency spikes. THP policy is governed for the entire
system via /sys/kernel/mm/transparent_hugepage/enabled.
However, it can be overridden for individual workloads via prctl(2)
call.
MemoryTHP= is used to disable THPs at exec-invoke to stop
providing THPs for workloads where the drawbacks outweigh the advantages.
When set to "disable", MemoryTHP= disables THPs completely for the
process, irrespecitive of global THP controls.
When set to "madvise", MemoryTHP= disables THPs for the process except
when specifically madvised by the process with MADV_HUGEPAGE or MADV_COLLAPSE.

18 files changed:
man/org.freedesktop.systemd1.xml
man/systemd.exec.xml
src/core/dbus-execute.c
src/core/exec-invoke.c
src/core/execute-serialize.c
src/core/execute.c
src/core/execute.h
src/core/load-fragment-gperf.gperf.in
src/core/load-fragment.c
src/core/load-fragment.h
src/core/namespace.c
src/core/namespace.h
src/core/varlink-execute.c
src/include/musl/sys/prctl.h
src/shared/bus-unit-util.c
src/shared/exit-status.c
src/shared/exit-status.h
src/shared/varlink-io.systemd.Unit.c

index 708793d7d1bed3480d4b01c7486b5910f8081837..7a65bfa809cca09c312811c9f0a5dae15d012505 100644 (file)
@@ -3421,6 +3421,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MemoryKSM = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly s MemoryTHP = '...';
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s UserNamespacePath = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s NetworkNamespacePath = '...';
@@ -4036,6 +4038,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <!--property MemoryKSM is not documented!-->
 
+    <!--property MemoryTHP is not documented!-->
+
     <!--property UserNamespacePath is not documented!-->
 
     <!--property NetworkNamespacePath is not documented!-->
@@ -4784,6 +4788,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="MemoryTHP"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="UserNamespacePath"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
@@ -5684,6 +5690,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MemoryKSM = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly s MemoryTHP = '...';
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s UserNamespacePath = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s NetworkNamespacePath = '...';
@@ -6317,6 +6325,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
 
     <!--property MemoryKSM is not documented!-->
 
+    <!--property MemoryTHP is not documented!-->
+
     <!--property UserNamespacePath is not documented!-->
 
     <!--property NetworkNamespacePath is not documented!-->
@@ -7041,6 +7051,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="MemoryTHP"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="UserNamespacePath"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
@@ -7765,6 +7777,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MemoryKSM = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly s MemoryTHP = '...';
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s UserNamespacePath = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s NetworkNamespacePath = '...';
@@ -8322,6 +8336,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
 
     <!--property MemoryKSM is not documented!-->
 
+    <!--property MemoryTHP is not documented!-->
+
     <!--property UserNamespacePath is not documented!-->
 
     <!--property NetworkNamespacePath is not documented!-->
@@ -8954,6 +8970,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="MemoryTHP"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="UserNamespacePath"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
@@ -9811,6 +9829,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MemoryKSM = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly s MemoryTHP = '...';
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s UserNamespacePath = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s NetworkNamespacePath = '...';
@@ -10350,6 +10370,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
 
     <!--property MemoryKSM is not documented!-->
 
+    <!--property MemoryTHP is not documented!-->
+
     <!--property UserNamespacePath is not documented!-->
 
     <!--property NetworkNamespacePath is not documented!-->
@@ -10964,6 +10986,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="MemoryTHP"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="UserNamespacePath"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
@@ -12598,7 +12622,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <varname>ManagedOOMKills</varname>,
       <varname>ExecReloadPost</varname>, and
       <varname>ExecReloadPostEx</varname> were added in version 259.</para>
-      <para><varname>BindNetworkInterface</varname> was added in version 260.</para>
+      <para><varname>BindNetworkInterface</varname>, and
+      <varname>MemoryTHP</varname> were added in version 260.</para>
     </refsect2>
     <refsect2>
       <title>Socket Unit Objects</title>
@@ -12667,7 +12692,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <para><varname>UserNamespacePath</varname>,
       <varname>OOMKills</varname>, and
       <varname>ManagedOOMKills</varname> were added in 259.</para>
-      <para><varname>BindNetworkInterface</varname> was added in version 260.</para>
+      <para><varname>BindNetworkInterface</varname>, and
+      <varname>MemoryTHP</varname> were added in version 260.</para>
     </refsect2>
     <refsect2>
       <title>Mount Unit Objects</title>
@@ -12731,7 +12757,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <para><varname>UserNamespacePath</varname>,
       <varname>OOMKills</varname>, and
       <varname>ManagedOOMKills</varname> were added in 259.</para>
-      <para><varname>BindNetworkInterface</varname> was added in version 260.</para>
+      <para><varname>BindNetworkInterface</varname>, and
+      <varname>MemoryTHP</varname> were added in version 260.</para>
     </refsect2>
     <refsect2>
       <title>Swap Unit Objects</title>
@@ -12793,7 +12820,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <para><varname>UserNamespacePath</varname>,
       <varname>OOMKills</varname>, and
       <varname>ManagedOOMKills</varname> were added in 259.</para>
-      <para><varname>BindNetworkInterface</varname> was added in version 260.</para>
+      <para><varname>BindNetworkInterface</varname>, and
+      <varname>MemoryTHP</varname> were added in version 260.</para>
     </refsect2>
     <refsect2>
       <title>Slice Unit Objects</title>
index 927fa3e0c42329340e0ae6eff56ea62a002e4f9f..ae24b9dc14f0af9e3b3d3f209f2c4517312a4feb 100644 (file)
@@ -2134,6 +2134,41 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
         </listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>MemoryTHP=</varname></term>
+
+        <listitem><para>Transparent Hugepages (THPs) is a Linux kernel feature that manages memory
+        using larger pages (2MB on x86, compared to the default 4KB). The main goal is to improve memory management
+        efficiency and system performance, especially for memory-intensive applications.
+        However, it can cause drawbacks in some scenarios, such as memory regression and latency spikes.
+        THP policy is governed for the entire system via <filename>/sys/kernel/mm/transparent_hugepage/enabled</filename>.
+        However, it can be overridden for individual workloads via
+        <citerefentry><refentrytitle>prctl</refentrytitle><manvolnum>2</manvolnum></citerefentry>.
+        <varname>MemoryTHP=</varname> may be used to disable THPs at process invocation time to stop providing
+        THPs for workloads where the drawbacks outweigh the advantages.
+        When <varname>MemoryTHP=</varname> is set to <literal>inherit</literal> or not set at all, systemd
+        inherits THP settings from the process that starts it and no
+        <citerefentry><refentrytitle>prctl</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+        <constant>PR_SET_THP_DISABLE</constant> call is made.
+        When set to <literal>disable</literal>, <varname>MemoryTHP=</varname> disables THPs completely for the process,
+        irrespecitive of global THP controls.
+        When set to <literal>madvise</literal>, <varname>MemoryTHP=</varname> disables THPs for the process except when
+        specifically requested via <citerefentry><refentrytitle>madvise</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+        by the process with <constant>MADV_HUGEPAGE</constant> or <constant>MADV_COLLAPSE</constant>.
+        When set to <literal>system</literal>, <varname>MemoryTHP=</varname> resets the THP policy to system wide policy.
+        This can be used when the process that starts systemd has already disabled THPs via
+        <constant>PR_SET_THP_DISABLE</constant>, and we want to restore the system default THP setting at
+        process invokation time. For details, see
+        <ulink url="https://docs.kernel.org/admin-guide/mm/transhuge.html">Transparent Hugepage Support</ulink>
+        in the kernel documentation.</para>
+        <para>Note that this functionality might not be available, for example if THP is disabled in the
+        kernel, or the kernel does not support controlling THP at the process level through
+        <citerefentry><refentrytitle>prctl</refentrytitle><manvolnum>2</manvolnum></citerefentry>.</para>
+
+        <xi:include href="version-info.xml" xpointer="v260"/>
+        </listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>PrivatePIDs=</varname></term>
 
index 658fc1ee060d3ea7d1631f9bdb2f213a593835e7..19d454820ed0b6e80ba417a55129c21bfaf84b18 100644 (file)
@@ -58,6 +58,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_private_bpf, private_bpf, Priva
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_home, protect_home, ProtectHome);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_system, protect_system, ProtectSystem);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_personality, personality, unsigned long);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_memory_thp, memory_thp, MemoryTHP);
 static BUS_DEFINE_PROPERTY_GET(property_get_ioprio, "i", ExecContext, exec_context_get_effective_ioprio);
 static BUS_DEFINE_PROPERTY_GET(property_get_mount_apivfs, "b", ExecContext, exec_context_get_effective_mount_apivfs);
 static BUS_DEFINE_PROPERTY_GET(property_get_bind_log_sockets, "b", ExecContext, exec_context_get_effective_bind_log_sockets);
@@ -1407,6 +1408,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("BPFDelegatePrograms", "s", property_get_bpf_delegate_programs, offsetof(ExecContext, bpf_delegate_programs), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("BPFDelegateAttachments", "s", property_get_bpf_delegate_attachments, offsetof(ExecContext, bpf_delegate_attachments), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("MemoryKSM", "b", bus_property_get_tristate, offsetof(ExecContext, memory_ksm), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("MemoryTHP", "s", property_get_memory_thp, offsetof(ExecContext, memory_thp), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("UserNamespacePath", "s", NULL, offsetof(ExecContext, user_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("IPCNamespacePath", "s", NULL, offsetof(ExecContext, ipc_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1845,6 +1847,7 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(keyring_mode, ExecKeyringMode, exec_keyrin
 static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_proc, ProtectProc, protect_proc_from_string);
 static BUS_DEFINE_SET_TRANSIENT_PARSE(proc_subset, ProcSubset, proc_subset_from_string);
 static BUS_DEFINE_SET_TRANSIENT_PARSE(private_bpf, PrivateBPF, private_bpf_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(memory_thp, MemoryTHP, memory_thp_from_string);
 static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(bpf_delegate_commands, uint64_t, bpf_delegate_commands_from_string);
 static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(bpf_delegate_maps, uint64_t, bpf_delegate_maps_from_string);
 static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(bpf_delegate_programs, uint64_t, bpf_delegate_programs_from_string);
@@ -2333,6 +2336,9 @@ int bus_exec_context_set_transient_property(
         if (streq(name, "MemoryKSM"))
                 return bus_set_transient_tristate(u, name, &c->memory_ksm, message, flags, reterr_error);
 
+        if (streq(name, "MemoryTHP"))
+                return bus_set_transient_memory_thp(u, name, &c->memory_thp, message, flags, reterr_error);
+
         if (streq(name, "UtmpIdentifier"))
                 return bus_set_transient_string(u, name, &c->utmp_id, message, flags, reterr_error);
 
index ef4c272bf7f5ab9896e182ad966c0fb86c82435d..d87392d53838702d769ccf876d0f726ea0e9f4ed 100644 (file)
@@ -4866,6 +4866,32 @@ static int exec_fd_mark_hot(
         return 1;
 }
 
+static int set_memory_thp(MemoryTHP thp) {
+        switch (thp) {
+
+        case MEMORY_THP_INHERIT:
+                return 0;
+
+        case MEMORY_THP_DISABLE:
+                if (prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0) < 0)
+                        return errno == EINVAL ? -EOPNOTSUPP : -errno;
+                return 0;
+
+        case MEMORY_THP_MADVISE:
+                if (prctl(PR_SET_THP_DISABLE, 1, PR_THP_DISABLE_EXCEPT_ADVISED, 0, 0) < 0)
+                        return errno == EINVAL ? -EOPNOTSUPP : -errno;
+                return 0;
+
+        case MEMORY_THP_SYSTEM:
+                if (prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0) < 0)
+                        return errno == EINVAL ? -EOPNOTSUPP : -errno;
+                return 0;
+
+        default:
+                assert_not_reached();
+        }
+}
+
 static int send_handoff_timestamp(
                 const ExecContext *c,
                 ExecParameters *p,
@@ -5550,6 +5576,16 @@ int exec_invoke(
                         }
                 }
 
+        r = set_memory_thp(context->memory_thp);
+        if (r == -EOPNOTSUPP)
+                log_debug_errno(r, "Setting MemoryTHP=%s is not supported, ignoring: %m",
+                                memory_thp_to_string(context->memory_thp));
+        else if (r < 0) {
+                *exit_status = EXIT_MEMORY_THP;
+                return log_error_errno(r, "Failed to set MemoryTHP=%s: %m",
+                                       memory_thp_to_string(context->memory_thp));
+        }
+
 #if ENABLE_UTMP
         if (context->utmp_id) {
                 _cleanup_free_ char *username_alloc = NULL;
index ba28e913aed8a43358cdd6474bfc2dda8f73ac22..538ce7239706097631e1c0b189120f87ab59cff8 100644 (file)
@@ -1664,6 +1664,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
         if (r < 0)
                 return r;
 
+        r = serialize_item(f, "exec-context-memory-thp", memory_thp_to_string(c->memory_thp));
+        if (r < 0)
+                return r;
+
         r = serialize_item(f, "exec-context-private-tmp", private_tmp_to_string(c->private_tmp));
         if (r < 0)
                 return r;
@@ -2649,6 +2653,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         r = safe_atoi(val, &c->memory_ksm);
                         if (r < 0)
                                 return r;
+                } else if ((val = startswith(l, "exec-context-memory-thp="))) {
+                        c->memory_thp = memory_thp_from_string(val);
+                        if (c->memory_thp < 0)
+                                return c->memory_thp;
                 } else if ((val = startswith(l, "exec-context-private-tmp="))) {
                         c->private_tmp = private_tmp_from_string(val);
                         if (c->private_tmp < 0)
index a5cb664d6120d98e071669613eaeeabf2ffcd6e8..92c09947db5bb322ed04cb2b77dbb2729749b0fc 100644 (file)
@@ -1117,7 +1117,8 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                 "%sProtectHostname: %s%s%s\n"
                 "%sProtectProc: %s\n"
                 "%sProcSubset: %s\n"
-                "%sPrivateBPF: %s\n",
+                "%sPrivateBPF: %s\n"
+                "%sMemoryTHP: %s\n",
                 prefix, c->umask,
                 prefix, empty_to_root(c->working_directory),
                 prefix, empty_to_root(c->root_directory),
@@ -1145,7 +1146,8 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                 prefix, protect_hostname_to_string(c->protect_hostname), c->private_hostname ? ":" : "", strempty(c->private_hostname),
                 prefix, protect_proc_to_string(c->protect_proc),
                 prefix, proc_subset_to_string(c->proc_subset),
-                prefix, private_bpf_to_string(c->private_bpf));
+                prefix, private_bpf_to_string(c->private_bpf),
+                prefix, memory_thp_to_string(c->memory_thp));
 
         if (c->private_bpf == PRIVATE_BPF_YES) {
                 _cleanup_free_ char
index ae26be6781c2bee98a38a751c869d43b9e00288c..29a23f021fa87043c6ca5755321b1e3824f9165f 100644 (file)
@@ -311,6 +311,7 @@ typedef struct ExecContext {
         int mount_apivfs;
         int bind_log_sockets;
         int memory_ksm;
+        MemoryTHP memory_thp;
         PrivateTmp private_tmp;
         PrivateTmp private_var_tmp; /* This is not an independent parameter, but calculated from other
                                      * parameters in unit_patch_contexts(). */
index 69b92d03cb51ffcb9bbb66307ef0a9080f215d31..69e39ed0dc3a2c02d4cd79245f0e3d791086f1a7 100644 (file)
 {% endif %}
 {{type}}.ProtectHostname,                     config_parse_protect_hostname,                      0,                                  offsetof({{type}}, exec_context)
 {{type}}.MemoryKSM,                           config_parse_tristate,                              0,                                  offsetof({{type}}, exec_context.memory_ksm)
+{{type}}.MemoryTHP,                           config_parse_memory_thp,                            0,                                  offsetof({{type}}, exec_context.memory_thp)
 {%- endmacro -%}
 
 {%- macro KILL_CONTEXT_CONFIG_ITEMS(type) -%}
index 6a59d33af25799668596f6c44e85373705be68c3..7a5845ab7d5ff7fafaf1ae9f144a208b9fb668db 100644 (file)
@@ -163,6 +163,7 @@ DEFINE_CONFIG_PARSE_PTR(config_parse_bpf_delegate_commands, bpf_delegate_command
 DEFINE_CONFIG_PARSE_PTR(config_parse_bpf_delegate_maps, bpf_delegate_maps_from_string, uint64_t);
 DEFINE_CONFIG_PARSE_PTR(config_parse_bpf_delegate_programs, bpf_delegate_programs_from_string, uint64_t);
 DEFINE_CONFIG_PARSE_PTR(config_parse_bpf_delegate_attachments, bpf_delegate_attachments_from_string, uint64_t);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_memory_thp, memory_thp, MemoryTHP);
 
 bool contains_instance_specifier_superset(const char *s) {
         const char *p, *q;
index 336ba250bfc261d4c4feec37a471c84180d107cc..3b0ee5fb30cd70db6c5e14281552ee9e5c0aa4a1 100644 (file)
@@ -168,6 +168,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_nft_set);
 CONFIG_PARSER_PROTOTYPE(config_parse_mount_node);
 CONFIG_PARSER_PROTOTYPE(config_parse_concurrency_max);
 CONFIG_PARSER_PROTOTYPE(config_parse_bind_network_interface);
+CONFIG_PARSER_PROTOTYPE(config_parse_memory_thp);
 
 /* gperf prototypes */
 const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
index d75d70f1bc2f0a0d1103fc6190bc504ab4f72113..8893daa230e7c396900099d402cf9b341ecb6627 100644 (file)
@@ -4047,6 +4047,15 @@ DEFINE_STRING_TABLE_LOOKUP(bpf_delegate_map_type, uint64_t);
 DEFINE_STRING_TABLE_LOOKUP(bpf_delegate_prog_type, uint64_t);
 DEFINE_STRING_TABLE_LOOKUP(bpf_delegate_attach_type, uint64_t);
 
+static const char* const memory_thp_table[_MEMORY_THP_MAX] = {
+        [MEMORY_THP_INHERIT] = "inherit",
+        [MEMORY_THP_DISABLE] = "disable",
+        [MEMORY_THP_MADVISE] = "madvise",
+        [MEMORY_THP_SYSTEM]  = "system",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(memory_thp, MemoryTHP);
+
 char* bpf_delegate_to_string(uint64_t u, const char * (*parser)(uint64_t) _const_ ) {
         assert(parser);
 
index aae43db3896c79fb5ae41091ffa2f415a3f5c72d..d5a2198f1e350f44334aa3e7916ec6ec219889b0 100644 (file)
@@ -91,6 +91,24 @@ typedef enum PrivatePIDs {
         _PRIVATE_PIDS_INVALID = -EINVAL,
 } PrivatePIDs;
 
+typedef enum MemoryTHP {
+        /*
+         * Inherit default from process that starts systemd, i.e. do not make
+         * any PR_SET_THP_DISABLE call.
+         */
+        MEMORY_THP_INHERIT,
+        MEMORY_THP_DISABLE, /* Disable THPs completely for the prcess */
+        MEMORY_THP_MADVISE, /* Disable THPs for the process except when madvised */
+        /*
+         * Use system default THP setting. this can be used when the process that
+         * starts systemd has already disabled THPs via PR_SET_THP_DISABLE, and we
+         * want to restore the system default THP setting at process invokation time.
+         */
+        MEMORY_THP_SYSTEM,
+        _MEMORY_THP_MAX,
+        _MEMORY_THP_INVALID = -EINVAL,
+} MemoryTHP;
+
 typedef struct BindMount {
         char *source;
         char *destination;
@@ -233,6 +251,8 @@ DECLARE_STRING_TABLE_LOOKUP(proc_subset, ProcSubset);
 
 DECLARE_STRING_TABLE_LOOKUP(private_bpf, PrivateBPF);
 
+DECLARE_STRING_TABLE_LOOKUP(memory_thp, MemoryTHP);
+
 DECLARE_STRING_TABLE_LOOKUP(bpf_delegate_cmd, uint64_t);
 
 DECLARE_STRING_TABLE_LOOKUP(bpf_delegate_map_type, uint64_t);
index 70e43386c4ed4f78628fa7632552966cc209e219..e3d736ad5fb26410a70ef4c812daefc709b00851 100644 (file)
@@ -908,6 +908,7 @@ int unit_exec_context_build_json(sd_json_variant **ret, const char *name, void *
                         SD_JSON_BUILD_PAIR_BOOLEAN("RemoveIPC", c->remove_ipc),
                         JSON_BUILD_PAIR_TRISTATE_NON_NULL("PrivateMounts", c->private_mounts),
                         JSON_BUILD_PAIR_STRING_NON_EMPTY("MountFlags", mount_propagation_flag_to_string(c->mount_propagation_flag)),
+                        SD_JSON_BUILD_PAIR_STRING("MemoryTHP", memory_thp_to_string(c->memory_thp)),
 
                         /* System Call Filtering */
                         JSON_BUILD_PAIR_CALLBACK_NON_NULL("SystemCallFilter", syscall_filter_build_json, c),
index 0473a706cce261c122ab542e2ccbce170c6b8673..cc1d4ac9833758a13c61dab2ccc61494737d27d4 100644 (file)
@@ -17,3 +17,7 @@
 #ifndef PR_SET_MEMORY_MERGE
 #define PR_SET_MEMORY_MERGE  67
 #endif
+
+#ifndef PR_THP_DISABLE_EXCEPT_ADVISED
+#define PR_THP_DISABLE_EXCEPT_ADVISED  (1 << 1)
+#endif
index 0b9e6ba073b670ec5d1b8513cda1a03d8d9514ee..048e02e5274743c42ddea62d340544390bbb4eec 100644 (file)
@@ -2494,6 +2494,7 @@ static const BusProperty execute_properties[] = {
         { "LogRateLimitBurst",                     bus_append_safe_atou                          },
         { "TTYRows",                               bus_append_safe_atou                          },
         { "TTYColumns",                            bus_append_safe_atou                          },
+        { "MemoryTHP",                             bus_append_string                             },
         { "MountFlags",                            bus_append_mount_propagation_flag_from_string },
         { "Environment",                           bus_append_strv_cunescape                     },
         { "UnsetEnvironment",                      bus_append_strv_cunescape                     },
index d7567dc026f897b8aef381f4f78b930f6b64cf57..b40664d204ce207221836c981ec35cb07ae18ed0 100644 (file)
@@ -71,6 +71,7 @@ const ExitStatusMapping exit_status_mappings[256] = {
         [EXIT_CREDENTIALS] =             { "CREDENTIALS",             EXIT_STATUS_SYSTEMD },
         [EXIT_BPF] =                     { "BPF",                     EXIT_STATUS_SYSTEMD },
         [EXIT_KSM] =                     { "KSM",                     EXIT_STATUS_SYSTEMD },
+        [EXIT_MEMORY_THP] =              { "MEMORY_THP",              EXIT_STATUS_SYSTEMD },
 
         [EXIT_EXCEPTION] =               { "EXCEPTION",               EXIT_STATUS_SYSTEMD },
 
index 4c656c109f196c02790028bd8446468e0c41daeb..28730ff64d79210424d49f980550a5a53da2d327 100644 (file)
@@ -70,6 +70,7 @@ enum {
         EXIT_CREDENTIALS,
         EXIT_BPF,
         EXIT_KSM,
+        EXIT_MEMORY_THP,
 
         EXIT_EXCEPTION = 255,  /* Whenever we want to propagate an abnormal/signal exit, in line with bash */
 };
index 390e1b414b8b16483968b43bf144b300f2078684..9670951c47eb671bc7b836426e39dd4b0e894249 100644 (file)
@@ -626,6 +626,8 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE(
                 SD_VARLINK_DEFINE_FIELD(PrivateMounts, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE),
                 SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man"PROJECT_VERSION_STR"systemd.exec.html#MountFlags="),
                 SD_VARLINK_DEFINE_FIELD(MountFlags, SD_VARLINK_STRING, SD_VARLINK_NULLABLE),
+                SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man"PROJECT_VERSION_STR"systemd.exec.html#MemoryTHP="),
+                SD_VARLINK_DEFINE_FIELD(MemoryTHP, SD_VARLINK_STRING, SD_VARLINK_NULLABLE),
 
                 /* System Call Filtering
                  * https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html#System%20Call%20Filtering */