readonly t DefaultLimitRTTIMESoft = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t DefaultTasksMax = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t DefaultMemoryPressureThresholdUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s DefaultMemoryPressureWatch = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly t TimerSlackNSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
<!--property DefaultTasksMax is not documented!-->
+ <!--property DefaultMemoryPressureThresholdUSec is not documented!-->
+
+ <!--property DefaultMemoryPressureWatch is not documented!-->
+
<!--property TimerSlackNSec is not documented!-->
<!--property DefaultOOMPolicy is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="DefaultTasksMax"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="DefaultMemoryPressureThresholdUSec"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="DefaultMemoryPressureWatch"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="TimerSlackNSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="DefaultOOMPolicy"/>
readonly a(iiqq) SocketBindDeny = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s MemoryPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
<!--property RestrictNetworkInterfaces is not documented!-->
+ <!--property MemoryPressureWatch is not documented!-->
+
+ <!--property MemoryPressureThresholdUSec is not documented!-->
+
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
readonly a(iiqq) SocketBindDeny = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s MemoryPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
<!--property RestrictNetworkInterfaces is not documented!-->
+ <!--property MemoryPressureWatch is not documented!-->
+
+ <!--property MemoryPressureThresholdUSec is not documented!-->
+
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
readonly a(iiqq) SocketBindDeny = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s MemoryPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
<!--property RestrictNetworkInterfaces is not documented!-->
+ <!--property MemoryPressureWatch is not documented!-->
+
+ <!--property MemoryPressureThresholdUSec is not documented!-->
+
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
readonly a(iiqq) SocketBindDeny = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s MemoryPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
<!--property RestrictNetworkInterfaces is not documented!-->
+ <!--property MemoryPressureWatch is not documented!-->
+
+ <!--property MemoryPressureThresholdUSec is not documented!-->
+
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
readonly a(iiqq) SocketBindDeny = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s MemoryPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t MemoryPressureThresholdUSec = ...;
};
interface org.freedesktop.DBus.Peer { ... };
interface org.freedesktop.DBus.Introspectable { ... };
<!--property RestrictNetworkInterfaces is not documented!-->
+ <!--property MemoryPressureWatch is not documented!-->
+
+ <!--property MemoryPressureThresholdUSec is not documented!-->
+
<!--Autogenerated cross-references for systemd.directives, do not edit-->
<variablelist class="dbus-interface" generated="True" extra-ref="org.freedesktop.systemd1.Unit"/>
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
+
<!--End of Autogenerated section-->
<refsect2>
readonly a(iiqq) SocketBindDeny = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s MemoryPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s KillMode = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
<!--property RestrictNetworkInterfaces is not documented!-->
+ <!--property MemoryPressureWatch is not documented!-->
+
+ <!--property MemoryPressureThresholdUSec is not documented!-->
+
<!--property KillMode is not documented!-->
<!--property KillSignal is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="KillMode"/>
<variablelist class="dbus-property" generated="True" extra-ref="KillSignal"/>
to configure the rate limit window, and <varname>ReloadLimitBurst=</varname> takes a positive integer to
configure the maximum allowed number of reloads within the configured time window.</para></listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><varname>DefaultMemoryPressureWatch=</varname></term>
+ <term><varname>DefaultMemoryPressureThresholdSec=</varname></term>
+
+ <listitem><para>Configures the default settings for the per-unit
+ <varname>MemoryPressureWatch=</varname> and <varname>MemoryPressureThresholdSec=</varname>
+ settings. See
+ <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ for details. Defaults to <literal>auto</literal> and <literal>100ms</literal>, respectively. This
+ also sets the memory pressure monitoring threshold for the service manager itself.</para></listitem>
+ </varlistentry>
</variablelist>
</refsect1>
</para></listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>$MEMORY_PRESSURE_WATCH</varname></term>
+ <term><varname>$MEMORY_PRESSURE_WRITE</varname></term>
+
+ <listitem><para>If memory pressure monitoring is enabled for this service unit, the path to watch
+ and the data to write into it. See <ulink url="https://systemd.io/MEMORY_PRESSURE">Memory Pressure
+ Handling</ulink> for details about these variables and the service protocol data they
+ convey.</para></listitem>
+ </varlistentry>
+
</variablelist>
<para>For system services, when <varname>PAMName=</varname> is enabled and <command>pam_systemd</command> is part
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><varname>MemoryPressureWatch=</varname></term>
+
+ <listitem><para>Controls memory pressure monitoring for invoked processes. Takes one of
+ <literal>off</literal>, <literal>on</literal>, <literal>auto</literal> or <literal>skip</literal>. If
+ <literal>off</literal> tells the service not to watch for memory pressure events, by setting the
+ <varname>$MEMORY_PRESSURE_WATCH</varname> environment variable to the literal string
+ <filename>/dev/null</filename>. If <literal>on</literal> tells the service to watch for memory
+ pressure events. This enables memory accounting for the service, and ensures the
+ <filename>memory.pressure</filename> cgroup attribute files is accessible for read and write to the
+ service's user. It then sets the <varname>$MEMORY_PRESSURE_WATCH</varname> environment variable for
+ processes invoked by the unit to the file system path to this file. The threshold information
+ configured with <varname>MemoryPressureThresholdSec=</varname> is encoded in the
+ <varname>$MEMORY_PRESSURE_WRITE</varname> environment variable. If the <literal>auto</literal> value
+ is set the protocol is enabled if memory accounting is anyway enabled for the unit, and disabled
+ otherwise. If set to <literal>skip</literal> the logic is neither enabled, nor disabled and the two
+ environment variables are not set.</para>
+
+ <para>Note that services are free to use the two environment variables, but it's unproblematic if
+ they ignore them. Memory pressure handling must be implemented individually in each service, and
+ usually means different things for different software. For further details on memory pressure
+ handling see <ulink url="https://systemd.io/MEMORY_PRESSURE">Memory Pressure Handling in
+ systemd</ulink>.</para>
+
+ <para>Services implemented using
+ <citerefentry><refentrytitle>sd-event</refentrytitle><manvolnum>3</manvolnum></citerefentry> may use
+ <citerefentry><refentrytitle>sd_event_add_memory_pressure</refentrytitle><manvolnum>3</manvolnum></citerefentry>
+ to watch for and handle memory pressure events.</para>
+
+ <para>If not explicit set, defaults to the <varname>DefaultMemoryPressureWatch=</varname> setting in
+ <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>MemoryPressureThresholdSec=</varname></term>
+
+ <listitem><para>Sets the memory pressure threshold time for memory pressure monitor as configured via
+ <varname>MemoryPressureWatch=</varname>. Specifies the maximum allocation latency before a memory
+ pressure event is signalled to the service, per 1s window. If not specified defaults to the
+ <varname>DefaultMemoryPressureThresholdSec=</varname> setting in
+ <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ (which in turn defaults to 100ms). The specified value expects a time unit such as
+ <literal>ms</literal> or <literal>µs</literal>, see
+ <citerefentry><refentrytitle>systemd.time</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
+ details on the permitted syntax.</para></listitem>
+ </varlistentry>
</variablelist>
</refsect1>
.moom_swap = MANAGED_OOM_AUTO,
.moom_mem_pressure = MANAGED_OOM_AUTO,
.moom_preference = MANAGED_OOM_PREFERENCE_NONE,
+
+ .memory_pressure_watch = _CGROUP_PRESSURE_WATCH_INVALID,
+ .memory_pressure_threshold_usec = USEC_INFINITY,
};
}
"%sManagedOOMSwap: %s\n"
"%sManagedOOMMemoryPressure: %s\n"
"%sManagedOOMMemoryPressureLimit: " PERMYRIAD_AS_PERCENT_FORMAT_STR "\n"
- "%sManagedOOMPreference: %s\n",
+ "%sManagedOOMPreference: %s\n"
+ "%sMemoryPressureWatch: %s\n",
prefix, yes_no(c->cpu_accounting),
prefix, yes_no(c->io_accounting),
prefix, yes_no(c->blockio_accounting),
prefix, managed_oom_mode_to_string(c->moom_swap),
prefix, managed_oom_mode_to_string(c->moom_mem_pressure),
prefix, PERMYRIAD_AS_PERCENT_FORMAT_VAL(UINT32_SCALE_TO_PERMYRIAD(c->moom_mem_pressure_limit)),
- prefix, managed_oom_preference_to_string(c->moom_preference));
+ prefix, managed_oom_preference_to_string(c->moom_preference),
+ prefix, cgroup_pressure_watch_to_string(c->memory_pressure_watch));
+
+ if (c->memory_pressure_threshold_usec != USEC_INFINITY)
+ fprintf(f, "%sMemoryPressureThresholdSec: %s\n",
+ prefix, FORMAT_TIMESPAN(c->memory_pressure_threshold_usec, 1));
if (c->delegate) {
_cleanup_free_ char *t = NULL;
};
DEFINE_STRING_TABLE_LOOKUP(freezer_action, FreezerAction);
+
+static const char* const cgroup_pressure_watch_table[_CGROUP_PRESSURE_WATCH_MAX] = {
+ [CGROUP_PRESSURE_WATCH_OFF] = "off",
+ [CGROUP_PRESSURE_WATCH_AUTO] = "auto",
+ [CGROUP_PRESSURE_WATCH_ON] = "on",
+ [CGROUP_PRESSURE_WATCH_SKIP] = "skip",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(cgroup_pressure_watch, CGroupPressureWatch, CGROUP_PRESSURE_WATCH_ON);
uint16_t port_min;
};
+typedef enum CGroupPressureWatch {
+ CGROUP_PRESSURE_WATCH_OFF, /* → tells the service payload explicitly not to watch for memory pressure */
+ CGROUP_PRESSURE_WATCH_AUTO, /* → on if memory account is on anyway for the unit, otherwise off */
+ CGROUP_PRESSURE_WATCH_ON,
+ CGROUP_PRESSURE_WATCH_SKIP, /* → doesn't set up memory pressure watch, but also doesn't explicitly tell payload to avoid it */
+ _CGROUP_PRESSURE_WATCH_MAX,
+ _CGROUP_PRESSURE_WATCH_INVALID = -EINVAL,
+} CGroupPressureWatch;
+
struct CGroupContext {
bool cpu_accounting;
bool io_accounting;
ManagedOOMMode moom_mem_pressure;
uint32_t moom_mem_pressure_limit; /* Normalized to 2^32-1 == 100% */
ManagedOOMPreference moom_preference;
+
+ /* Memory pressure logic */
+ CGroupPressureWatch memory_pressure_watch;
+ usec_t memory_pressure_threshold_usec;
+ /* NB: For now we don't make the period configurable, not the type, nor do we allow multiple
+ * triggers, nor triggers for non-memory pressure. We might add that later. */
};
/* Used when querying IP accounting data */
void cgroup_context_remove_bpf_foreign_program(CGroupContext *c, CGroupBPFForeignProgram *p);
void cgroup_context_remove_socket_bind(CGroupSocketBindItem **head);
+static inline bool cgroup_context_want_memory_pressure(const CGroupContext *c) {
+ assert(c);
+
+ return c->memory_pressure_watch == CGROUP_PRESSURE_WATCH_ON ||
+ (c->memory_pressure_watch == CGROUP_PRESSURE_WATCH_AUTO && c->memory_accounting);
+}
+
int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
int cgroup_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *path);
const char* freezer_action_to_string(FreezerAction a) _const_;
FreezerAction freezer_action_from_string(const char *s) _pure_;
+
+const char* cgroup_pressure_watch_to_string(CGroupPressureWatch a) _const_;
+CGroupPressureWatch cgroup_pressure_watch_from_string(const char *s) _pure_;
#include "socket-util.h"
BUS_DEFINE_PROPERTY_GET(bus_property_get_tasks_max, "t", TasksMax, tasks_max_resolve);
+BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_cgroup_pressure_watch, cgroup_pressure_watch, CGroupPressureWatch);
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy);
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_managed_oom_mode, managed_oom_mode, ManagedOOMMode);
SD_BUS_PROPERTY("SocketBindAllow", "a(iiqq)", property_get_socket_bind, offsetof(CGroupContext, socket_bind_allow), 0),
SD_BUS_PROPERTY("SocketBindDeny", "a(iiqq)", property_get_socket_bind, offsetof(CGroupContext, socket_bind_deny), 0),
SD_BUS_PROPERTY("RestrictNetworkInterfaces", "(bas)", property_get_restrict_network_interfaces, 0, 0),
+ SD_BUS_PROPERTY("MemoryPressureWatch", "s", bus_property_get_cgroup_pressure_watch, offsetof(CGroupContext, memory_pressure_watch), 0),
+ SD_BUS_PROPERTY("MemoryPressureThresholdUSec", "t", bus_property_get_usec, offsetof(CGroupContext, memory_pressure_threshold_usec), 0),
SD_BUS_VTABLE_END
};
}
}
+ return 1;
+
+ } else if (streq(name, "MemoryPressureWatch")) {
+ CGroupPressureWatch p;
+ const char *t;
+
+ r = sd_bus_message_read(message, "s", &t);
+ if (r < 0)
+ return r;
+
+ if (isempty(t))
+ p = _CGROUP_PRESSURE_WATCH_INVALID;
+ else {
+ p = cgroup_pressure_watch_from_string(t);
+ if (p < 0)
+ return p;
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->memory_pressure_watch = p;
+ unit_write_settingf(u, flags, name, "MemoryPressureWatch=%s", strempty(cgroup_pressure_watch_to_string(p)));
+ }
+
+ return 1;
+
+ } else if (streq(name, "MemoryPressureThresholdUSec")) {
+ uint64_t t;
+
+ r = sd_bus_message_read(message, "t", &t);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->memory_pressure_threshold_usec = t;
+
+ if (t == UINT64_MAX)
+ unit_write_setting(u, flags, name, "MemoryPressureThresholdUSec=");
+ else
+ unit_write_settingf(u, flags, name, "MemoryPressureThresholdUSec=%" PRIu64, t);
+ }
+
return 1;
}
extern const sd_bus_vtable bus_cgroup_vtable[];
int bus_property_get_tasks_max(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+int bus_property_get_cgroup_pressure_watch(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
int bus_cgroup_set_property(Unit *u, CGroupContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
SD_BUS_PROPERTY("DefaultLimitRTTIME", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultLimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultTasksMax", "t", bus_property_get_tasks_max, offsetof(Manager, default_tasks_max), 0),
+ SD_BUS_PROPERTY("DefaultMemoryPressureThresholdUSec", "t", bus_property_get_usec, offsetof(Manager, default_memory_pressure_threshold_usec), 0),
+ SD_BUS_PROPERTY("DefaultMemoryPressureWatch", "s", bus_property_get_cgroup_pressure_watch, offsetof(Manager, default_memory_pressure_watch), 0),
SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, default_oom_policy), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultOOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST),
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
+#include "psi-util.h"
#include "random-util.h"
#include "recurse-dir.h"
#include "rlimit-util.h"
const Unit *u,
const ExecContext *c,
const ExecParameters *p,
+ const CGroupContext *cgroup_context,
size_t n_fds,
char **fdnames,
const char *home,
const char *shell,
dev_t journal_stream_dev,
ino_t journal_stream_ino,
+ const char *memory_pressure_path,
char ***ret) {
_cleanup_strv_free_ char **our_env = NULL;
assert(p);
assert(ret);
-#define N_ENV_VARS 17
+#define N_ENV_VARS 19
our_env = new0(char*, N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
if (!our_env)
return -ENOMEM;
our_env[n_env++] = x;
- our_env[n_env++] = NULL;
- assert(n_env <= N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
+ if (memory_pressure_path) {
+ x = strjoin("MEMORY_PRESSURE_WATCH=", memory_pressure_path);
+ if (!x)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+
+ if (cgroup_context && !path_equal(memory_pressure_path, "/dev/null")) {
+ _cleanup_free_ char *b = NULL, *e = NULL;
+
+ if (asprintf(&b, "%s " USEC_FMT " " USEC_FMT,
+ MEMORY_PRESSURE_DEFAULT_TYPE,
+ cgroup_context->memory_pressure_threshold_usec == USEC_INFINITY ? MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC :
+ CLAMP(cgroup_context->memory_pressure_threshold_usec, 1U, MEMORY_PRESSURE_DEFAULT_WINDOW_USEC),
+ MEMORY_PRESSURE_DEFAULT_WINDOW_USEC) < 0)
+ return -ENOMEM;
+
+ if (base64mem(b, strlen(b) + 1, &e) < 0)
+ return -ENOMEM;
+
+ x = strjoin("MEMORY_PRESSURE_WRITE=", e);
+ if (!x)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+ }
+
+ assert(n_env < N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
#undef N_ENV_VARS
*ret = TAKE_PTR(our_env);
const ExecParameters *params,
ExecRuntime *runtime,
DynamicCreds *dcreds,
+ const CGroupContext *cgroup_context,
int socket_fd,
const int named_iofds[static 3],
int *params_fds,
int r, ngids = 0, exec_fd;
_cleanup_free_ gid_t *supplementary_gids = NULL;
const char *username = NULL, *groupname = NULL;
- _cleanup_free_ char *home_buffer = NULL;
+ _cleanup_free_ char *home_buffer = NULL, *memory_pressure_path = NULL;
const char *home = NULL, *shell = NULL;
char **final_argv = NULL;
dev_t journal_stream_dev = 0;
}
}
- /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
- * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
- * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
- * touch a single hierarchy too. */
- if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
- r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
- if (r < 0) {
- *exit_status = EXIT_CGROUP;
- return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
+ if (params->cgroup_path) {
+ /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
+ * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
+ * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
+ * touch a single hierarchy too. */
+
+ if (params->flags & EXEC_CGROUP_DELEGATE) {
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
+ }
+ }
+
+ if (cgroup_context && cg_unified() > 0 && is_pressure_supported() > 0) {
+ if (cgroup_context_want_memory_pressure(cgroup_context)) {
+ r = cg_get_path("memory", params->cgroup_path, "memory.pressure", &memory_pressure_path);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ r = chmod_and_chown(memory_pressure_path, 0644, uid, gid);
+ if (r < 0) {
+ log_unit_full_errno(unit, r == -ENOENT || ERRNO_IS_PRIVILEGE(r) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to adjust ownership of '%s', ignoring: %m", memory_pressure_path);
+ memory_pressure_path = mfree(memory_pressure_path);
+ }
+ } else if (cgroup_context->memory_pressure_watch == CGROUP_PRESSURE_WATCH_OFF) {
+ memory_pressure_path = strdup("/dev/null"); /* /dev/null is explicit indicator for turning of memory pressure watch */
+ if (!memory_pressure_path) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+ }
}
}
unit,
context,
params,
+ cgroup_context,
n_fds,
fdnames,
home,
shell,
journal_stream_dev,
journal_stream_ino,
+ memory_pressure_path,
&our_env);
if (r < 0) {
*exit_status = EXIT_MEMORY;
const ExecParameters *params,
ExecRuntime *runtime,
DynamicCreds *dcreds,
+ const CGroupContext *cgroup_context,
pid_t *ret) {
int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
params,
runtime,
dcreds,
+ cgroup_context,
socket_fd,
named_iofds,
fds,
const ExecParameters *exec_params,
ExecRuntime *runtime,
DynamicCreds *dynamic_creds,
+ const CGroupContext *cgroup_context,
pid_t *ret);
void exec_command_done_array(ExecCommand *c, size_t n);
DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_bind, socket_address_bind_ipv6_only_or_bool, SocketAddressBindIPv6Only, "Failed to parse bind IPv6 only value");
DEFINE_CONFIG_PARSE_ENUM(config_parse_oom_policy, oom_policy, OOMPolicy, "Failed to parse OOM policy");
DEFINE_CONFIG_PARSE_ENUM(config_parse_managed_oom_preference, managed_oom_preference, ManagedOOMPreference, "Failed to parse ManagedOOMPreference=");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_cgroup_pressure_watch, cgroup_pressure_watch, CGroupPressureWatch, "Failed to parse CGroupPressureWatch=");
DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_ip_tos, ip_tos, int, -1, "Failed to parse IP TOS value");
DEFINE_CONFIG_PARSE_PTR(config_parse_blockio_weight, cg_blkio_weight_parse, uint64_t, "Invalid block IO weight");
DEFINE_CONFIG_PARSE_PTR(config_parse_cg_weight, cg_weight_parse, uint64_t, "Invalid weight");
CONFIG_PARSER_PROTOTYPE(config_parse_tty_size);
CONFIG_PARSER_PROTOTYPE(config_parse_log_filter_patterns);
CONFIG_PARSER_PROTOTYPE(config_parse_open_file);
+CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_pressure_watch);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
#include "pretty-print.h"
#include "proc-cmdline.h"
#include "process-util.h"
+#include "psi-util.h"
#include "random-util.h"
#include "rlimit-util.h"
#if HAVE_SECCOMP
static bool arg_default_memory_accounting;
static bool arg_default_tasks_accounting;
static TasksMax arg_default_tasks_max;
+static usec_t arg_default_memory_pressure_threshold_usec;
+static CGroupPressureWatch arg_default_memory_pressure_watch;
static sd_id128_t arg_machine_id;
static EmergencyAction arg_cad_burst_action;
static OOMPolicy arg_default_oom_policy;
{ "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
{ "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
{ "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
+ { "Manager", "DefaultMemoryPressureThresholdSec", config_parse_sec, 0, &arg_default_memory_pressure_threshold_usec },
+ { "Manager", "DefaultMemoryPressureWatch", config_parse_cgroup_pressure_watch, 0, &arg_default_memory_pressure_watch },
{ "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, arg_system, &arg_cad_burst_action },
{ "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_default_oom_policy },
{ "Manager", "DefaultOOMScoreAdjust", config_parse_oom_score_adjust, 0, NULL },
m->default_memory_accounting = arg_default_memory_accounting;
m->default_tasks_accounting = arg_default_tasks_accounting;
m->default_tasks_max = arg_default_tasks_max;
+ m->default_memory_pressure_watch = arg_default_memory_pressure_watch;
+ m->default_memory_pressure_threshold_usec = arg_default_memory_pressure_threshold_usec;
m->default_oom_policy = arg_default_oom_policy;
m->default_oom_score_adjust_set = arg_default_oom_score_adjust_set;
m->default_oom_score_adjust = arg_default_oom_score_adjust;
arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
arg_default_tasks_accounting = true;
arg_default_tasks_max = DEFAULT_TASKS_MAX;
+ arg_default_memory_pressure_threshold_usec = MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC;
+ arg_default_memory_pressure_watch = CGROUP_PRESSURE_WATCH_AUTO;
arg_machine_id = (sd_id128_t) {};
arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
arg_default_oom_policy = OOM_STOP;
#include "path-lookup.h"
#include "path-util.h"
#include "process-util.h"
+#include "psi-util.h"
#include "ratelimit.h"
#include "rlimit-util.h"
#include "rm-rf.h"
"LOG_NAMESPACE",
"MAINPID",
"MANAGERPID",
+ "MEMORY_PRESSURE_WATCH",
+ "MEMORY_PRESSURE_WRITE",
"MONITOR_EXIT_CODE",
"MONITOR_EXIT_STATUS",
"MONITOR_INVOCATION_ID",
if (r < 0)
log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || (r == -EHOSTDOWN) ? LOG_DEBUG : LOG_NOTICE, r,
"Failed to establish memory pressure event source, ignoring: %m");
+ else if (m->default_memory_pressure_threshold_usec != USEC_INFINITY) {
+
+ /* If there's a default memory pressure threshold set, also apply it to the service manager itself */
+ r = sd_event_source_set_memory_pressure_period(
+ m->memory_pressure_event_source,
+ m->default_memory_pressure_threshold_usec,
+ MEMORY_PRESSURE_DEFAULT_WINDOW_USEC);
+ if (r < 0)
+ log_warning_errno(r, "Failed to adjust memory pressure threshold, ignoring: %m");
+ }
return 0;
}
.test_run_flags = test_run_flags,
.default_oom_policy = OOM_STOP,
+
+ .default_memory_pressure_watch = CGROUP_PRESSURE_WATCH_AUTO,
+ .default_memory_pressure_threshold_usec = USEC_INFINITY,
};
#if ENABLE_EFI
int default_oom_score_adjust;
bool default_oom_score_adjust_set;
+ CGroupPressureWatch default_memory_pressure_watch;
+ usec_t default_memory_pressure_threshold_usec;
+
int original_log_level;
LogTarget original_log_target;
bool log_level_overridden;
&exec_params,
m->exec_runtime,
&m->dynamic_creds,
+ &m->cgroup_context,
&pid);
if (r < 0)
return r;
&exec_params,
s->exec_runtime,
&s->dynamic_creds,
+ &s->cgroup_context,
&pid);
if (r < 0)
return r;
&exec_params,
s->exec_runtime,
&s->dynamic_creds,
+ &s->cgroup_context,
&pid);
if (r < 0)
return r;
&exec_params,
s->exec_runtime,
&s->dynamic_creds,
+ &s->cgroup_context,
&pid);
if (r < 0)
goto fail;
if (u->type != UNIT_SLICE)
cc->tasks_max = u->manager->default_tasks_max;
+
+ cc->memory_pressure_watch = u->manager->default_memory_pressure_watch;
+ cc->memory_pressure_threshold_usec = u->manager->default_memory_pressure_threshold_usec;
}
ec = unit_get_exec_context(u);
"Slice",
"ManagedOOMSwap",
"ManagedOOMMemoryPressure",
- "ManagedOOMPreference"))
+ "ManagedOOMPreference",
+ "MemoryPressureWatch"))
return bus_append_string(m, field, eq);
if (STR_IN_SET(field, "ManagedOOMMemoryPressureLimit")) {
return 1;
}
+ if (streq(field, "MemoryPressureThresholdSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
return 0;
}