readonly t DefaultCPUPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s DefaultCPUPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t DefaultIOPressureThresholdUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s DefaultIOPressureWatch = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly t TimerSlackNSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
<!--property DefaultCPUPressureWatch is not documented!-->
+ <!--property DefaultIOPressureThresholdUSec is not documented!-->
+
+ <!--property DefaultIOPressureWatch is not documented!-->
+
<!--property TimerSlackNSec is not documented!-->
<!--property DefaultOOMPolicy is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="DefaultCPUPressureWatch"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="DefaultIOPressureThresholdUSec"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="DefaultIOPressureWatch"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="TimerSlackNSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="DefaultOOMPolicy"/>
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t CPUPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s IOPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t IOPressureThresholdUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly b CoredumpReceive = ...;
<!--property CPUPressureThresholdUSec is not documented!-->
+ <!--property IOPressureWatch is not documented!-->
+
+ <!--property IOPressureThresholdUSec is not documented!-->
+
<!--property NFTSet is not documented!-->
<!--property CoredumpReceive is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="CPUPressureThresholdUSec"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t CPUPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s IOPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t IOPressureThresholdUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly b CoredumpReceive = ...;
<!--property CPUPressureThresholdUSec is not documented!-->
+ <!--property IOPressureWatch is not documented!-->
+
+ <!--property IOPressureThresholdUSec is not documented!-->
+
<!--property NFTSet is not documented!-->
<!--property CoredumpReceive is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="CPUPressureThresholdUSec"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t CPUPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s IOPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t IOPressureThresholdUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly b CoredumpReceive = ...;
<!--property CPUPressureThresholdUSec is not documented!-->
+ <!--property IOPressureWatch is not documented!-->
+
+ <!--property IOPressureThresholdUSec is not documented!-->
+
<!--property NFTSet is not documented!-->
<!--property CoredumpReceive is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="CPUPressureThresholdUSec"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t CPUPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s IOPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t IOPressureThresholdUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly b CoredumpReceive = ...;
<!--property CPUPressureThresholdUSec is not documented!-->
+ <!--property IOPressureWatch is not documented!-->
+
+ <!--property IOPressureThresholdUSec is not documented!-->
+
<!--property NFTSet is not documented!-->
<!--property CoredumpReceive is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="CPUPressureThresholdUSec"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t CPUPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s IOPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t IOPressureThresholdUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly b CoredumpReceive = ...;
<!--property CPUPressureThresholdUSec is not documented!-->
+ <!--property IOPressureWatch is not documented!-->
+
+ <!--property IOPressureThresholdUSec is not documented!-->
+
<!--property NFTSet is not documented!-->
<!--property CoredumpReceive is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="CPUPressureThresholdUSec"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t CPUPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly s IOPressureWatch = '...';
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t IOPressureThresholdUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly b CoredumpReceive = ...;
<!--property CPUPressureThresholdUSec is not documented!-->
+ <!--property IOPressureWatch is not documented!-->
+
+ <!--property IOPressureThresholdUSec is not documented!-->
+
<!--property NFTSet is not documented!-->
<!--property CoredumpReceive is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="CPUPressureThresholdUSec"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureWatch"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="IOPressureThresholdUSec"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
<function>KillUnitSubgroup()</function> were added in version 258.</para>
<para><varname>TransactionsWithOrderingCycle</varname> was added in version 259.</para>
<para><varname>DefaultMemoryZSwapWriteback</varname>,
- <varname>DefaultCPUPressureThresholdUSec</varname> and
- <varname>DefaultCPUPressureWatch</varname> were added in version 261.</para>
+ <varname>DefaultCPUPressureThresholdUSec</varname>,
+ <varname>DefaultCPUPressureWatch</varname>,
+ <varname>DefaultIOPressureThresholdUSec</varname>, and
+ <varname>DefaultIOPressureWatch</varname> were added in version 261.</para>
</refsect2>
<refsect2>
<title>Unit Objects</title>
<varname>ExecReloadPostEx</varname> were added in version 259.</para>
<para><varname>BindNetworkInterface</varname>, <varname>MemoryTHP</varname>,
<varname>RefreshOnReload</varname>, and <varname>RootMStack</varname> were added in version 260.</para>
- <para><varname>CPUPressureThresholdUSec</varname> and
- <varname>CPUPressureWatch</varname> were added in version 261.</para>
+ <para><varname>CPUPressureThresholdUSec</varname>,
+ <varname>CPUPressureWatch</varname>,
+ <varname>IOPressureThresholdUSec</varname>, and
+ <varname>IOPressureWatch</varname> were added in version 261.</para>
</refsect2>
<refsect2>
<title>Socket Unit Objects</title>
<varname>ManagedOOMKills</varname> were added in 259.</para>
<para><varname>BindNetworkInterface</varname> <varname>MemoryTHP</varname>, and
<varname>RootMStack</varname> were added in version 260.</para>
- <para><varname>CPUPressureThresholdUSec</varname> and
- <varname>CPUPressureWatch</varname> were added in version 261.</para>
+ <para><varname>CPUPressureThresholdUSec</varname>,
+ <varname>CPUPressureWatch</varname>,
+ <varname>IOPressureThresholdUSec</varname>, and
+ <varname>IOPressureWatch</varname> were added in version 261.</para>
</refsect2>
<refsect2>
<title>Mount Unit Objects</title>
<varname>ManagedOOMKills</varname> were added in 259.</para>
<para><varname>BindNetworkInterface</varname> <varname>MemoryTHP</varname>, and
<varname>RootMStack</varname> were added in version 260.</para>
- <para><varname>CPUPressureThresholdUSec</varname> and
- <varname>CPUPressureWatch</varname> were added in version 261.</para>
+ <para><varname>CPUPressureThresholdUSec</varname>,
+ <varname>CPUPressureWatch</varname>,
+ <varname>IOPressureThresholdUSec</varname>, and
+ <varname>IOPressureWatch</varname> were added in version 261.</para>
</refsect2>
<refsect2>
<title>Swap Unit Objects</title>
<varname>ManagedOOMKills</varname> were added in 259.</para>
<para><varname>BindNetworkInterface</varname>, <varname>MemoryTHP</varname>, and
<varname>RootMStack</varname> were added in version 260.</para>
- <para><varname>CPUPressureThresholdUSec</varname> and
- <varname>CPUPressureWatch</varname> were added in version 261.</para>
+ <para><varname>CPUPressureThresholdUSec</varname>,
+ <varname>CPUPressureWatch</varname>,
+ <varname>IOPressureThresholdUSec</varname>, and
+ <varname>IOPressureWatch</varname> were added in version 261.</para>
</refsect2>
<refsect2>
<title>Slice Unit Objects</title>
<para><varname>OOMKills</varname>, and
<varname>ManagedOOMKills</varname> were added in 259.</para>
<para><varname>BindNetworkInterface</varname> was added in version 260.</para>
- <para><varname>CPUPressureThresholdUSec</varname> and
- <varname>CPUPressureWatch</varname> were added in version 261.</para>
+ <para><varname>CPUPressureThresholdUSec</varname>,
+ <varname>CPUPressureWatch</varname>,
+ <varname>IOPressureThresholdUSec</varname>, and
+ <varname>IOPressureWatch</varname> were added in version 261.</para>
</refsect2>
<refsect2>
<title>Scope Unit Objects</title>
<para><varname>OOMKills</varname>, and
<varname>ManagedOOMKills</varname> were added in 259.</para>
<para><varname>BindNetworkInterface</varname> was added in version 260.</para>
- <para><varname>CPUPressureThresholdUSec</varname> and
- <varname>CPUPressureWatch</varname> were added in version 261.</para>
+ <para><varname>CPUPressureThresholdUSec</varname>,
+ <varname>CPUPressureWatch</varname>,
+ <varname>IOPressureThresholdUSec</varname>, and
+ <varname>IOPressureWatch</varname> were added in version 261.</para>
</refsect2>
<refsect2>
<title>Job Objects</title>
['sd_event_add_memory_pressure',
'3',
['sd_event_add_cpu_pressure',
+ 'sd_event_add_io_pressure',
'sd_event_source_set_cpu_pressure_period',
'sd_event_source_set_cpu_pressure_type',
+ 'sd_event_source_set_io_pressure_period',
+ 'sd_event_source_set_io_pressure_type',
'sd_event_source_set_memory_pressure_period',
'sd_event_source_set_memory_pressure_type',
'sd_event_trim_memory'],
<refname>sd_event_source_set_cpu_pressure_type</refname>
<refname>sd_event_source_set_cpu_pressure_period</refname>
- <refpurpose>Add and configure an event source run as result of memory or CPU pressure</refpurpose>
+ <refname>sd_event_add_io_pressure</refname>
+ <refname>sd_event_source_set_io_pressure_type</refname>
+ <refname>sd_event_source_set_io_pressure_period</refname>
+
+ <refpurpose>Add and configure an event source for memory, CPU, or IO pressure notifications</refpurpose>
</refnamediv>
<refsynopsisdiv>
<paramdef>uint64_t <parameter>window_usec</parameter></paramdef>
</funcprototype>
+ <funcprototype>
+ <funcdef>int <function>sd_event_add_io_pressure</function></funcdef>
+ <paramdef>sd_event *<parameter>event</parameter></paramdef>
+ <paramdef>sd_event_source **<parameter>ret_source</parameter></paramdef>
+ <paramdef>sd_event_handler_t <parameter>handler</parameter></paramdef>
+ <paramdef>void *<parameter>userdata</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>int <function>sd_event_source_set_io_pressure_type</function></funcdef>
+ <paramdef>sd_event_source *<parameter>source</parameter></paramdef>
+ <paramdef>const char *<parameter>type</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>int <function>sd_event_source_set_io_pressure_period</function></funcdef>
+ <paramdef>sd_event_source *<parameter>source</parameter></paramdef>
+ <paramdef>uint64_t <parameter>threshold_usec</parameter></paramdef>
+ <paramdef>uint64_t <parameter>window_usec</parameter></paramdef>
+ </funcprototype>
+
<funcprototype>
<funcdef>int <function>sd_event_trim_memory</function></funcdef>
<paramdef>void</paramdef>
<para><function>sd_event_add_memory_pressure()</function> adds a new event source that is triggered
whenever memory pressure is seen. Similarly,
- <function>sd_event_add_cpu_pressure()</function> adds a new event source that is triggered whenever CPU
- pressure is seen. This functionality is built around the Linux kernel's <ulink
+ <function>sd_event_add_cpu_pressure()</function> and <function>sd_event_add_io_pressure()</function> add
+ new event sources that are triggered whenever CPU or IO pressure is seen, respectively. This functionality
+ is built around the Linux kernel's <ulink
url="https://docs.kernel.org/accounting/psi.html">Pressure Stall Information (PSI)</ulink> logic.</para>
- <para>Both functions expect an event loop object as first parameter, and return the allocated event source
+ <para>These functions expect an event loop object as first parameter, and return the allocated event source
object in the second parameter, on success. The <parameter>handler</parameter> parameter is a function to
call when pressure is seen, or <constant>NULL</constant>. The handler function will be passed the
<parameter>userdata</parameter> pointer, which may be chosen freely by the caller. The handler may return
negative to signal an error (see below), other return values are ignored. If
- <parameter>handler</parameter> is <constant>NULL</constant>, a default handler that compacts allocation
- caches maintained by <filename>libsystemd</filename> as well as glibc (via <citerefentry
- project='man-pages'><refentrytitle>malloc_trim</refentrytitle><manvolnum>3</manvolnum></citerefentry>)
- will be used.</para>
+ <parameter>handler</parameter> is <constant>NULL</constant>, a default handler is used. For
+ <function>sd_event_add_memory_pressure()</function>, the default handler compacts allocation caches
+ maintained by <filename>libsystemd</filename> as well as glibc (via <citerefentry
+ project='man-pages'><refentrytitle>malloc_trim</refentrytitle><manvolnum>3</manvolnum></citerefentry>).
+ For <function>sd_event_add_cpu_pressure()</function> and
+ <function>sd_event_add_io_pressure()</function>, the default handler is a no-op. It is recommended to
+ pass a custom handler for CPU and IO pressure to take meaningful action when pressure is
+ detected.</para>
<para>To destroy an event source object use
<citerefentry><refentrytitle>sd_event_source_unref</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_event_source_set_enabled</refentrytitle><manvolnum>3</manvolnum></citerefentry>
with <constant>SD_EVENT_OFF</constant>.</para>
- <para>If the second parameter of <function>sd_event_add_memory_pressure()</function> or
- <function>sd_event_add_cpu_pressure()</function> is
+ <para>If the second parameter of <function>sd_event_add_memory_pressure()</function>,
+ <function>sd_event_add_cpu_pressure()</function>, or <function>sd_event_add_io_pressure()</function> is
<constant>NULL</constant> no reference to the event source object is returned. In this case, the event
source is considered "floating", and will be destroyed implicitly when the event loop itself is
destroyed.</para>
provides the <literal>some</literal> line, not the <literal>full</literal> line, so only
<literal>some</literal> is valid when watching at the system level.</para>
+ <para>The IO pressure event source follows the same logic, but uses the
+ <varname>$IO_PRESSURE_WATCH</varname>/<varname>$IO_PRESSURE_WRITE</varname> environment variables,
+ the <filename>io.pressure</filename> cgroup file, and the system-wide PSI interface file
+ <filename>/proc/pressure/io</filename> instead.</para>
+
<para>Or in other words: preferably any explicit configuration passed in by an invoking service manager
(or similar) is used as notification source, before falling back to local notifications of the service,
and finally to global notifications of the system.</para>
<para>Similarly, <function>sd_event_source_set_cpu_pressure_type()</function> and
<function>sd_event_source_set_cpu_pressure_period()</function> can be used to fine-tune the PSI
- parameters for CPU pressure notifications. They work identically to their memory pressure counterparts.
+ parameters for CPU pressure notifications, and
+ <function>sd_event_source_set_io_pressure_type()</function> and
+ <function>sd_event_source_set_io_pressure_period()</function> can be used to fine-tune the PSI
+ parameters for IO pressure notifications. They work identically to their memory pressure counterparts.
The type parameter takes either <literal>some</literal> or <literal>full</literal>, and the period
function takes threshold and period times in microseconds. The same constraints apply: these calls must
- be invoked immediately after allocating the event source, and will fail if CPU pressure parameterization
- has been passed in via the
- <varname>$CPU_PRESSURE_WATCH</varname>/<varname>$CPU_PRESSURE_WRITE</varname> environment
+ be invoked immediately after allocating the event source, and will fail if pressure parameterization
+ has been passed in via the corresponding
+ <varname>$*_PRESSURE_WATCH</varname>/<varname>$*_PRESSURE_WRITE</varname> environment
variables.</para>
<para>The <function>sd_event_trim_memory()</function> function releases various internal allocation
<varlistentry>
<term><constant>-EHOSTDOWN</constant></term>
- <listitem><para>The <varname>$MEMORY_PRESSURE_WATCH</varname> or
- <varname>$CPU_PRESSURE_WATCH</varname> variable has been set to the literal
- string <filename>/dev/null</filename>, in order to explicitly disable pressure
+ <listitem><para>The <varname>$MEMORY_PRESSURE_WATCH</varname>,
+ <varname>$CPU_PRESSURE_WATCH</varname>, or <varname>$IO_PRESSURE_WATCH</varname> variable has been
+ set to the literal string <filename>/dev/null</filename>, in order to explicitly disable pressure
handling.</para>
<xi:include href="version-info.xml" xpointer="v254"/></listitem>
<varlistentry>
<term><constant>-EBADMSG</constant></term>
- <listitem><para>The <varname>$MEMORY_PRESSURE_WATCH</varname> or
- <varname>$CPU_PRESSURE_WATCH</varname> variable has been set to an invalid
- string, for example a relative rather than an absolute path.</para>
+ <listitem><para>The <varname>$MEMORY_PRESSURE_WATCH</varname>,
+ <varname>$CPU_PRESSURE_WATCH</varname>, or <varname>$IO_PRESSURE_WATCH</varname> variable has been
+ set to an invalid string, for example a relative rather than an absolute path.</para>
<xi:include href="version-info.xml" xpointer="v254"/></listitem>
</varlistentry>
<varlistentry>
<term><constant>-ENOTTY</constant></term>
- <listitem><para>The <varname>$MEMORY_PRESSURE_WATCH</varname> or
- <varname>$CPU_PRESSURE_WATCH</varname> variable points to a regular file
- outside of the procfs or cgroupfs file systems.</para>
+ <listitem><para>The <varname>$MEMORY_PRESSURE_WATCH</varname>,
+ <varname>$CPU_PRESSURE_WATCH</varname>, or <varname>$IO_PRESSURE_WATCH</varname> variable points
+ to a regular file outside of the procfs or cgroupfs file systems.</para>
<xi:include href="version-info.xml" xpointer="v254"/></listitem>
</varlistentry>
<varlistentry>
<term><constant>-EOPNOTSUPP</constant></term>
- <listitem><para>No configuration via <varname>$MEMORY_PRESSURE_WATCH</varname> or
- <varname>$CPU_PRESSURE_WATCH</varname> has been specified and the local kernel does not support the
- PSI interface.</para>
+ <listitem><para>No configuration via <varname>$MEMORY_PRESSURE_WATCH</varname>,
+ <varname>$CPU_PRESSURE_WATCH</varname>, or <varname>$IO_PRESSURE_WATCH</varname> has been specified
+ and the local kernel does not support the PSI interface.</para>
<xi:include href="version-info.xml" xpointer="v254"/></listitem>
</varlistentry>
<listitem><para>This is returned by <function>sd_event_source_set_memory_pressure_type()</function>,
<function>sd_event_source_set_memory_pressure_period()</function>,
<function>sd_event_source_set_cpu_pressure_type()</function>,
- and <function>sd_event_source_set_cpu_pressure_period()</function> if invoked on event sources
+ <function>sd_event_source_set_cpu_pressure_period()</function>,
+ <function>sd_event_source_set_io_pressure_type()</function>,
+ and <function>sd_event_source_set_io_pressure_period()</function> if invoked on event sources
at a time later than immediately after allocating them.</para>
<xi:include href="version-info.xml" xpointer="v254"/></listitem>
<function>sd_event_source_set_memory_pressure_period()</function>, and
<function>sd_event_trim_memory()</function> were added in version 254.</para>
<para><function>sd_event_add_cpu_pressure()</function>,
- <function>sd_event_source_set_cpu_pressure_type()</function>, and
- <function>sd_event_source_set_cpu_pressure_period()</function> were added in version 261.</para>
+ <function>sd_event_source_set_cpu_pressure_type()</function>,
+ <function>sd_event_source_set_cpu_pressure_period()</function>,
+ <function>sd_event_add_io_pressure()</function>,
+ <function>sd_event_source_set_io_pressure_type()</function>, and
+ <function>sd_event_source_set_io_pressure_period()</function> were added in version 261.</para>
</refsect1>
<refsect1>
<xi:include href="version-info.xml" xpointer="v261"/></listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><varname>DefaultIOPressureWatch=</varname></term>
+ <term><varname>DefaultIOPressureThresholdSec=</varname></term>
+
+ <listitem><para>Configures the default settings for the per-unit
+ <varname>IOPressureWatch=</varname> and <varname>IOPressureThresholdSec=</varname>
+ settings. See
+ <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ for details. Defaults to <literal>auto</literal> and <literal>200ms</literal>, respectively. This
+ also sets the IO pressure monitoring threshold for the service manager itself.</para>
+
+ <xi:include href="version-info.xml" xpointer="v261"/></listitem>
+ </varlistentry>
</variablelist>
</refsect1>
<xi:include href="version-info.xml" xpointer="v261"/></listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>$IO_PRESSURE_WATCH</varname></term>
+ <term><varname>$IO_PRESSURE_WRITE</varname></term>
+
+ <listitem><para>If IO pressure monitoring is enabled for this service unit, the path to watch
+ and the data to write into it. See <ulink url="https://systemd.io/PRESSURE">Resource Pressure
+ Handling</ulink> for details about these variables and the service protocol data they
+ convey.</para>
+
+ <xi:include href="version-info.xml" xpointer="v261"/></listitem>
+ </varlistentry>
+
<varlistentry>
<term><varname>$FDSTORE</varname></term>
<xi:include href="version-info.xml" xpointer="v261"/></listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><varname>IOPressureWatch=</varname></term>
+
+ <listitem><para>Controls IO pressure monitoring for invoked processes. Takes a boolean or one of
+ <literal>auto</literal> and <literal>skip</literal>. If <literal>no</literal>, tells the service not
+ to watch for IO pressure events, by setting the <varname>$IO_PRESSURE_WATCH</varname>
+ environment variable to the literal string <filename>/dev/null</filename>. If <literal>yes</literal>,
+ tells the service to watch for IO pressure events. This enables IO accounting for the
+ service, and ensures the <filename>io.pressure</filename> cgroup attribute file is accessible for
+ reading and writing by the service's user. It then sets the <varname>$IO_PRESSURE_WATCH</varname>
+ environment variable for processes invoked by the unit to the file system path to this file. The
+ threshold information configured with <varname>IOPressureThresholdSec=</varname> is encoded in
+ the <varname>$IO_PRESSURE_WRITE</varname> environment variable. If the <literal>auto</literal>
+ value is set the protocol is enabled if IO accounting is anyway enabled for the unit (e.g. because
+ <varname>IOWeight=</varname> or <varname>IODeviceWeight=</varname> is set), and
+ disabled otherwise. If set to <literal>skip</literal> the logic is neither enabled, nor disabled and
+ the two environment variables are not set.</para>
+
+ <para>Note that services are free to use the two environment variables, but it is unproblematic if
+ they ignore them. IO pressure handling must be implemented individually in each service, and
+ usually means different things for different software.</para>
+
+ <para>Services implemented using
+ <citerefentry><refentrytitle>sd-event</refentrytitle><manvolnum>3</manvolnum></citerefentry> may use
+ <citerefentry><refentrytitle>sd_event_add_io_pressure</refentrytitle><manvolnum>3</manvolnum></citerefentry>
+ to watch for and handle IO pressure events.</para>
+
+ <para>If not explicitly set, defaults to the <varname>DefaultIOPressureWatch=</varname> setting in
+ <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
+
+ <xi:include href="version-info.xml" xpointer="v261"/></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>IOPressureThresholdSec=</varname></term>
+
+ <listitem><para>Sets the IO pressure threshold time for IO pressure monitor as configured via
+ <varname>IOPressureWatch=</varname>. Specifies the maximum IO stall time before an IO
+ pressure event is signalled to the service, per 2s window. If not specified, defaults to the
+ <varname>DefaultIOPressureThresholdSec=</varname> setting in
+ <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ (which in turn defaults to 200ms). The specified value expects a time unit such as
+ <literal>ms</literal> or <literal>μs</literal>, see
+ <citerefentry><refentrytitle>systemd.time</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
+ details on the permitted syntax.</para>
+
+ <xi:include href="version-info.xml" xpointer="v261"/></listitem>
+ </varlistentry>
</variablelist>
</refsect2><refsect2><title>Coredump Control</title>
.env_watch = "CPU_PRESSURE_WATCH",
.env_write = "CPU_PRESSURE_WRITE",
},
+ [PRESSURE_IO] = {
+ .name = "io",
+ .env_watch = "IO_PRESSURE_WATCH",
+ .env_write = "IO_PRESSURE_WRITE",
+ },
};
static const char* const pressure_resource_table[_PRESSURE_RESOURCE_MAX] = {
[PRESSURE_MEMORY] = "memory",
[PRESSURE_CPU] = "cpu",
+ [PRESSURE_IO] = "io",
};
DEFINE_STRING_TABLE_LOOKUP(pressure_resource, PressureResource);
typedef enum PressureResource {
PRESSURE_MEMORY,
PRESSURE_CPU,
+ PRESSURE_IO,
_PRESSURE_RESOURCE_MAX,
_PRESSURE_RESOURCE_INVALID = -EINVAL,
} PressureResource;
.pressure = {
[PRESSURE_MEMORY] = { .watch = _CGROUP_PRESSURE_WATCH_INVALID, .threshold_usec = USEC_INFINITY },
[PRESSURE_CPU] = { .watch = _CGROUP_PRESSURE_WATCH_INVALID, .threshold_usec = USEC_INFINITY },
+ [PRESSURE_IO] = { .watch = _CGROUP_PRESSURE_WATCH_INVALID, .threshold_usec = USEC_INFINITY },
},
};
}
"%sManagedOOMPreference: %s\n"
"%sMemoryPressureWatch: %s\n"
"%sCPUPressureWatch: %s\n"
+ "%sIOPressureWatch: %s\n"
"%sCoredumpReceive: %s\n",
prefix, yes_no(c->io_accounting),
prefix, yes_no(c->memory_accounting),
prefix, managed_oom_preference_to_string(c->moom_preference),
prefix, cgroup_pressure_watch_to_string(c->pressure[PRESSURE_MEMORY].watch),
prefix, cgroup_pressure_watch_to_string(c->pressure[PRESSURE_CPU].watch),
+ prefix, cgroup_pressure_watch_to_string(c->pressure[PRESSURE_IO].watch),
prefix, yes_no(c->coredump_receive));
if (c->delegate_subgroup)
fprintf(f, "%sCPUPressureThresholdSec: %s\n",
prefix, FORMAT_TIMESPAN(c->pressure[PRESSURE_CPU].threshold_usec, 1));
+ if (c->pressure[PRESSURE_IO].threshold_usec != USEC_INFINITY)
+ fprintf(f, "%sIOPressureThresholdSec: %s\n",
+ prefix, FORMAT_TIMESPAN(c->pressure[PRESSURE_IO].threshold_usec, 1));
+
if (c->moom_mem_pressure_duration_usec != USEC_INFINITY)
fprintf(f, "%sManagedOOMMemoryPressureDurationSec: %s\n",
prefix, FORMAT_TIMESPAN(c->moom_mem_pressure_duration_usec, 1));
c->startup_cpu_weight != CGROUP_WEIGHT_INVALID ||
c->cpu_quota_per_sec_usec != USEC_INFINITY;
+ case PRESSURE_IO:
+ return c->io_accounting ||
+ c->io_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_io_weight != CGROUP_WEIGHT_INVALID ||
+ c->io_device_weights ||
+ c->io_device_latencies ||
+ c->io_device_limits;
+
default:
assert_not_reached();
}
SD_BUS_PROPERTY("MemoryPressureThresholdUSec", "t", bus_property_get_usec, offsetof(CGroupContext, pressure[PRESSURE_MEMORY].threshold_usec), 0),
SD_BUS_PROPERTY("CPUPressureWatch", "s", bus_property_get_cgroup_pressure_watch, offsetof(CGroupContext, pressure[PRESSURE_CPU].watch), 0),
SD_BUS_PROPERTY("CPUPressureThresholdUSec", "t", bus_property_get_usec, offsetof(CGroupContext, pressure[PRESSURE_CPU].threshold_usec), 0),
+ SD_BUS_PROPERTY("IOPressureWatch", "s", bus_property_get_cgroup_pressure_watch, offsetof(CGroupContext, pressure[PRESSURE_IO].watch), 0),
+ SD_BUS_PROPERTY("IOPressureThresholdUSec", "t", bus_property_get_usec, offsetof(CGroupContext, pressure[PRESSURE_IO].threshold_usec), 0),
SD_BUS_PROPERTY("NFTSet", "a(iiss)", property_get_cgroup_nft_set, 0, 0),
SD_BUS_PROPERTY("CoredumpReceive", "b", bus_property_get_bool, offsetof(CGroupContext, coredump_receive), 0),
return 1;
- } else if (STR_IN_SET(name, "MemoryPressureWatch", "CPUPressureWatch")) {
+ } else if (STR_IN_SET(name, "MemoryPressureWatch", "CPUPressureWatch", "IOPressureWatch")) {
CGroupPressureWatch p;
const char *t;
- PressureResource pt = streq(name, "MemoryPressureWatch") ? PRESSURE_MEMORY : PRESSURE_CPU;
+ PressureResource pt = streq(name, "MemoryPressureWatch") ? PRESSURE_MEMORY :
+ streq(name, "CPUPressureWatch") ? PRESSURE_CPU : PRESSURE_IO;
r = sd_bus_message_read(message, "s", &t);
if (r < 0)
return 1;
- } else if (STR_IN_SET(name, "MemoryPressureThresholdUSec", "CPUPressureThresholdUSec")) {
+ } else if (STR_IN_SET(name, "MemoryPressureThresholdUSec", "CPUPressureThresholdUSec", "IOPressureThresholdUSec")) {
uint64_t t;
- PressureResource pt = streq(name, "MemoryPressureThresholdUSec") ? PRESSURE_MEMORY : PRESSURE_CPU;
+ PressureResource pt = streq(name, "MemoryPressureThresholdUSec") ? PRESSURE_MEMORY :
+ streq(name, "CPUPressureThresholdUSec") ? PRESSURE_CPU : PRESSURE_IO;
r = sd_bus_message_read(message, "t", &t);
if (r < 0)
SD_BUS_PROPERTY("DefaultMemoryPressureWatch", "s", bus_property_get_cgroup_pressure_watch, offsetof(Manager, defaults.pressure[PRESSURE_MEMORY].watch), 0),
SD_BUS_PROPERTY("DefaultCPUPressureThresholdUSec", "t", bus_property_get_usec, offsetof(Manager, defaults.pressure[PRESSURE_CPU].threshold_usec), 0),
SD_BUS_PROPERTY("DefaultCPUPressureWatch", "s", bus_property_get_cgroup_pressure_watch, offsetof(Manager, defaults.pressure[PRESSURE_CPU].watch), 0),
+ SD_BUS_PROPERTY("DefaultIOPressureThresholdUSec", "t", bus_property_get_usec, offsetof(Manager, defaults.pressure[PRESSURE_IO].threshold_usec), 0),
+ SD_BUS_PROPERTY("DefaultIOPressureWatch", "s", bus_property_get_cgroup_pressure_watch, offsetof(Manager, defaults.pressure[PRESSURE_IO].watch), 0),
SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, defaults.oom_policy), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultOOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST),
if (r < 0)
return r;
+ r = serialize_item(f, "exec-cgroup-context-io-pressure-watch", cgroup_pressure_watch_to_string(c->pressure[PRESSURE_IO].watch));
+ if (r < 0)
+ return r;
+
r = serialize_item(f, "exec-cgroup-context-delegate-subgroup", c->delegate_subgroup);
if (r < 0)
return r;
return r;
}
+ if (c->pressure[PRESSURE_IO].threshold_usec != USEC_INFINITY) {
+ r = serialize_usec(f, "exec-cgroup-context-io-pressure-threshold-usec", c->pressure[PRESSURE_IO].threshold_usec);
+ if (r < 0)
+ return r;
+ }
+
LIST_FOREACH(device_allow, a, c->device_allow) {
r = serialize_item_format(f, "exec-cgroup-context-device-allow", "%s %s",
a->path,
c->pressure[PRESSURE_CPU].watch = cgroup_pressure_watch_from_string(val);
if (c->pressure[PRESSURE_CPU].watch < 0)
return -EINVAL;
+ } else if ((val = startswith(l, "exec-cgroup-context-io-pressure-watch="))) {
+ c->pressure[PRESSURE_IO].watch = cgroup_pressure_watch_from_string(val);
+ if (c->pressure[PRESSURE_IO].watch < 0)
+ return -EINVAL;
} else if ((val = startswith(l, "exec-cgroup-context-delegate-subgroup="))) {
r = free_and_strdup(&c->delegate_subgroup, val);
if (r < 0)
r = deserialize_usec(val, &c->pressure[PRESSURE_CPU].threshold_usec);
if (r < 0)
return r;
+ } else if ((val = startswith(l, "exec-cgroup-context-io-pressure-threshold-usec="))) {
+ r = deserialize_usec(val, &c->pressure[PRESSURE_IO].threshold_usec);
+ if (r < 0)
+ return r;
} else if ((val = startswith(l, "exec-cgroup-context-device-allow="))) {
_cleanup_free_ char *path = NULL, *rwm = NULL;
CGroupDevicePermissions p;
{{type}}.MemoryPressureWatch, config_parse_pressure_watch, 0, offsetof({{type}}, cgroup_context.pressure[PRESSURE_MEMORY].watch)
{{type}}.CPUPressureThresholdSec, config_parse_sec, 0, offsetof({{type}}, cgroup_context.pressure[PRESSURE_CPU].threshold_usec)
{{type}}.CPUPressureWatch, config_parse_pressure_watch, 0, offsetof({{type}}, cgroup_context.pressure[PRESSURE_CPU].watch)
+{{type}}.IOPressureThresholdSec, config_parse_sec, 0, offsetof({{type}}, cgroup_context.pressure[PRESSURE_IO].threshold_usec)
+{{type}}.IOPressureWatch, config_parse_pressure_watch, 0, offsetof({{type}}, cgroup_context.pressure[PRESSURE_IO].watch)
{{type}}.NFTSet, config_parse_cgroup_nft_set, NFT_SET_PARSE_CGROUP, offsetof({{type}}, cgroup_context)
{{type}}.CoredumpReceive, config_parse_bool, 0, offsetof({{type}}, cgroup_context.coredump_receive)
{{type}}.BindNetworkInterface, config_parse_bind_network_interface, 0, offsetof({{type}}, cgroup_context)
{ "Manager", "DefaultMemoryPressureWatch", config_parse_pressure_watch, 0, &arg_defaults.pressure[PRESSURE_MEMORY].watch },
{ "Manager", "DefaultCPUPressureThresholdSec", config_parse_sec, 0, &arg_defaults.pressure[PRESSURE_CPU].threshold_usec },
{ "Manager", "DefaultCPUPressureWatch", config_parse_pressure_watch, 0, &arg_defaults.pressure[PRESSURE_CPU].watch },
+ { "Manager", "DefaultIOPressureThresholdSec", config_parse_sec, 0, &arg_defaults.pressure[PRESSURE_IO].threshold_usec },
+ { "Manager", "DefaultIOPressureWatch", config_parse_pressure_watch, 0, &arg_defaults.pressure[PRESSURE_IO].watch },
{ "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, arg_runtime_scope, &arg_cad_burst_action },
{ "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_defaults.oom_policy },
{ "Manager", "DefaultOOMScoreAdjust", config_parse_oom_score_adjust, 0, NULL },
"CREDENTIALS_DIRECTORY",
"EXIT_CODE",
"EXIT_STATUS",
+ "IO_PRESSURE_WATCH",
+ "IO_PRESSURE_WRITE",
"INVOCATION_ID",
"JOURNAL_STREAM",
"LISTEN_FDNAMES",
} pressure_dispatch_table[_PRESSURE_RESOURCE_MAX] = {
[PRESSURE_MEMORY] = { sd_event_add_memory_pressure, sd_event_source_set_memory_pressure_period },
[PRESSURE_CPU] = { sd_event_add_cpu_pressure, sd_event_source_set_cpu_pressure_period },
+ [PRESSURE_IO] = { sd_event_add_io_pressure, sd_event_source_set_io_pressure_period },
};
int manager_setup_pressure_event_source(Manager *m, PressureResource t) {
.pressure = {
[PRESSURE_MEMORY] = { .watch = CGROUP_PRESSURE_WATCH_AUTO, .threshold_usec = PRESSURE_DEFAULT_THRESHOLD_USEC },
[PRESSURE_CPU] = { .watch = CGROUP_PRESSURE_WATCH_AUTO, .threshold_usec = PRESSURE_DEFAULT_THRESHOLD_USEC },
+ [PRESSURE_IO] = { .watch = CGROUP_PRESSURE_WATCH_AUTO, .threshold_usec = PRESSURE_DEFAULT_THRESHOLD_USEC },
},
.oom_policy = OOM_STOP,
#DefaultMemoryPressureWatch=auto
#DefaultCPUPressureThresholdSec=200ms
#DefaultCPUPressureWatch=auto
+#DefaultIOPressureThresholdSec=200ms
+#DefaultIOPressureWatch=auto
#DefaultOOMPolicy=stop
#DefaultSmackProcessLabel=
#DefaultRestrictSUIDSGID=
#DefaultMemoryPressureWatch=auto
#DefaultCPUPressureThresholdSec=200ms
#DefaultCPUPressureWatch=auto
+#DefaultIOPressureThresholdSec=200ms
+#DefaultIOPressureWatch=auto
#DefaultSmackProcessLabel=
#DefaultRestrictSUIDSGID=
#ReloadLimitIntervalSec=
JSON_BUILD_PAIR_FINITE_USEC("MemoryPressureThresholdUSec", c->pressure[PRESSURE_MEMORY].threshold_usec),
SD_JSON_BUILD_PAIR_STRING("CPUPressureWatch", cgroup_pressure_watch_to_string(c->pressure[PRESSURE_CPU].watch)),
JSON_BUILD_PAIR_FINITE_USEC("CPUPressureThresholdUSec", c->pressure[PRESSURE_CPU].threshold_usec),
+ SD_JSON_BUILD_PAIR_STRING("IOPressureWatch", cgroup_pressure_watch_to_string(c->pressure[PRESSURE_IO].watch)),
+ JSON_BUILD_PAIR_FINITE_USEC("IOPressureThresholdUSec", c->pressure[PRESSURE_IO].threshold_usec),
/* Others */
SD_JSON_BUILD_PAIR_BOOLEAN("CoredumpReceive", c->coredump_receive));
SD_JSON_BUILD_PAIR_STRING("DefaultMemoryPressureWatch", cgroup_pressure_watch_to_string(m->defaults.pressure[PRESSURE_MEMORY].watch)),
JSON_BUILD_PAIR_FINITE_USEC("DefaultCPUPressureThresholdUSec", m->defaults.pressure[PRESSURE_CPU].threshold_usec),
SD_JSON_BUILD_PAIR_STRING("DefaultCPUPressureWatch", cgroup_pressure_watch_to_string(m->defaults.pressure[PRESSURE_CPU].watch)),
+ JSON_BUILD_PAIR_FINITE_USEC("DefaultIOPressureThresholdUSec", m->defaults.pressure[PRESSURE_IO].threshold_usec),
+ SD_JSON_BUILD_PAIR_STRING("DefaultIOPressureWatch", cgroup_pressure_watch_to_string(m->defaults.pressure[PRESSURE_IO].watch)),
JSON_BUILD_PAIR_FINITE_USEC("RuntimeWatchdogUSec", manager_get_watchdog(m, WATCHDOG_RUNTIME)),
JSON_BUILD_PAIR_FINITE_USEC("RebootWatchdogUSec", manager_get_watchdog(m, WATCHDOG_REBOOT)),
JSON_BUILD_PAIR_FINITE_USEC("KExecWatchdogUSec", manager_get_watchdog(m, WATCHDOG_KEXEC)),
sd_event_add_cpu_pressure;
sd_event_source_set_cpu_pressure_type;
sd_event_source_set_cpu_pressure_period;
+ sd_event_add_io_pressure;
+ sd_event_source_set_io_pressure_type;
+ sd_event_source_set_io_pressure_period;
} LIBSYSTEMD_260;
SOURCE_INOTIFY,
SOURCE_MEMORY_PRESSURE,
SOURCE_CPU_PRESSURE,
+ SOURCE_IO_PRESSURE,
_SOURCE_EVENT_SOURCE_TYPE_MAX,
_SOURCE_EVENT_SOURCE_TYPE_INVALID = -EINVAL,
} EventSourceType;
[SOURCE_INOTIFY] = "inotify",
[SOURCE_MEMORY_PRESSURE] = "memory-pressure",
[SOURCE_CPU_PRESSURE] = "cpu-pressure",
+ [SOURCE_IO_PRESSURE] = "io-pressure",
};
DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
SOURCE_DEFER, \
SOURCE_INOTIFY, \
SOURCE_MEMORY_PRESSURE, \
- SOURCE_CPU_PRESSURE)
+ SOURCE_CPU_PRESSURE, \
+ SOURCE_IO_PRESSURE)
/* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
* Time sources and ratelimited sources can be passed, so effectively this is the same as the
return 0;
}
-#define EVENT_SOURCE_IS_PRESSURE(s) IN_SET((s)->type, SOURCE_MEMORY_PRESSURE, SOURCE_CPU_PRESSURE)
+#define EVENT_SOURCE_IS_PRESSURE(s) IN_SET((s)->type, SOURCE_MEMORY_PRESSURE, SOURCE_CPU_PRESSURE, SOURCE_IO_PRESSURE)
static void source_pressure_unregister(sd_event_source *s) {
assert(s);
case SOURCE_MEMORY_PRESSURE:
case SOURCE_CPU_PRESSURE:
+ case SOURCE_IO_PRESSURE:
source_pressure_remove_from_write_list(s);
source_pressure_unregister(s);
break;
[SOURCE_INOTIFY] = endoffsetof_field(sd_event_source, inotify),
[SOURCE_MEMORY_PRESSURE] = endoffsetof_field(sd_event_source, pressure),
[SOURCE_CPU_PRESSURE] = endoffsetof_field(sd_event_source, pressure),
+ [SOURCE_IO_PRESSURE] = endoffsetof_field(sd_event_source, pressure),
};
sd_event_source *s;
* fd with the epoll right-away. Instead, we just add the event source to a list of pressure event
* sources on which writes must be executed before the first event loop iteration is executed. (We
* could also write the data here, right away, but we want to give the caller the freedom to call
- * sd_event_source_set_{memory,cpu}_pressure_type() and
- * sd_event_source_set_{memory,cpu}_pressure_period() before we write it. */
+ * sd_event_source_set_{memory,cpu,io}_pressure_type() and
+ * sd_event_source_set_{memory,cpu,io}_pressure_period() before we write it. */
if (s->pressure.write_buffer_size > 0)
source_pressure_add_to_write_list(s);
PRESSURE_CPU);
}
+static int io_pressure_callback(sd_event_source *s, void *userdata) {
+ assert(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_io_pressure(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ return event_add_pressure(
+ e, ret, callback, userdata,
+ SOURCE_IO_PRESSURE,
+ io_pressure_callback,
+ PRESSURE_IO);
+}
+
static void event_free_inotify_data(sd_event *e, InotifyData *d) {
assert(e);
case SOURCE_MEMORY_PRESSURE:
case SOURCE_CPU_PRESSURE:
+ case SOURCE_IO_PRESSURE:
source_pressure_unregister(s);
break;
case SOURCE_MEMORY_PRESSURE:
case SOURCE_CPU_PRESSURE:
+ case SOURCE_IO_PRESSURE:
/* As documented in sd_event_add_{memory,cpu,io}_pressure(), we can only register the PSI fd
* with epoll after writing the watch string. */
if (s->pressure.write_buffer_size == 0) {
case SOURCE_MEMORY_PRESSURE:
case SOURCE_CPU_PRESSURE:
+ case SOURCE_IO_PRESSURE:
r = s->pressure.callback(s, s->userdata);
break;
case SOURCE_MEMORY_PRESSURE:
case SOURCE_CPU_PRESSURE:
+ case SOURCE_IO_PRESSURE:
r = process_pressure(s, i->events);
break;
return event_source_set_pressure_type(s, ty);
}
+_public_ int sd_event_source_set_io_pressure_type(sd_event_source *s, const char *ty) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO_PRESSURE, -EDOM);
+
+ return event_source_set_pressure_type(s, ty);
+}
+
static int event_source_set_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec) {
_cleanup_free_ char *b = NULL;
_cleanup_free_ void *w = NULL;
return event_source_set_pressure_period(s, threshold_usec, window_usec);
}
+
+_public_ int sd_event_source_set_io_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO_PRESSURE, -EDOM);
+
+ return event_source_set_pressure_period(s, threshold_usec, window_usec);
+}
{ "ManagedOOMPreference", bus_append_string },
{ "MemoryPressureWatch", bus_append_string },
{ "CPUPressureWatch", bus_append_string },
+ { "IOPressureWatch", bus_append_string },
{ "DelegateSubgroup", bus_append_string },
{ "ManagedOOMMemoryPressureLimit", bus_append_parse_permyriad },
{ "MemoryAccounting", bus_append_parse_boolean },
{ "SocketBindDeny", bus_append_socket_filter },
{ "MemoryPressureThresholdSec", bus_append_parse_sec_rename },
{ "CPUPressureThresholdSec", bus_append_parse_sec_rename },
+ { "IOPressureThresholdSec", bus_append_parse_sec_rename },
{ "NFTSet", bus_append_nft_set },
{ "BindNetworkInterface", bus_append_string },
SD_VARLINK_DEFINE_FIELD(DefaultCPUPressureThresholdUSec, SD_VARLINK_INT, 0),
SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd-system.conf.html#DefaultCPUPressureWatch="),
SD_VARLINK_DEFINE_FIELD(DefaultCPUPressureWatch, SD_VARLINK_STRING, 0),
+ SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd-system.conf.html#DefaultIOPressureThresholdUSec="),
+ SD_VARLINK_DEFINE_FIELD(DefaultIOPressureThresholdUSec, SD_VARLINK_INT, 0),
+ SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd-system.conf.html#DefaultIOPressureWatch="),
+ SD_VARLINK_DEFINE_FIELD(DefaultIOPressureWatch, SD_VARLINK_STRING, 0),
SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd-system.conf.html#RuntimeWatchdogSec="),
SD_VARLINK_DEFINE_FIELD(RuntimeWatchdogUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd-system.conf.html#RebootWatchdogSec="),
SD_VARLINK_DEFINE_FIELD(CPUPressureWatch, SD_VARLINK_STRING, 0),
SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.resource-control.html#CPUPressureThresholdSec="),
SD_VARLINK_DEFINE_FIELD(CPUPressureThresholdUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
+ SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.resource-control.html#IOPressureWatch="),
+ SD_VARLINK_DEFINE_FIELD(IOPressureWatch, SD_VARLINK_STRING, 0),
+ SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.resource-control.html#IOPressureThresholdSec="),
+ SD_VARLINK_DEFINE_FIELD(IOPressureThresholdUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
/* Others */
SD_VARLINK_FIELD_COMMENT("Reflects whether to forward coredumps for processes that crash within this cgroup"),
int sd_event_add_exit(sd_event *e, sd_event_source **ret, sd_event_handler_t callback, void *userdata);
int sd_event_add_memory_pressure(sd_event *e, sd_event_source **ret, sd_event_handler_t callback, void *userdata);
int sd_event_add_cpu_pressure(sd_event *e, sd_event_source **ret, sd_event_handler_t callback, void *userdata);
+int sd_event_add_io_pressure(sd_event *e, sd_event_source **ret, sd_event_handler_t callback, void *userdata);
int sd_event_prepare(sd_event *e);
int sd_event_wait(sd_event *e, uint64_t timeout);
int sd_event_source_set_memory_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec);
int sd_event_source_set_cpu_pressure_type(sd_event_source *s, const char *ty);
int sd_event_source_set_cpu_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec);
+int sd_event_source_set_io_pressure_type(sd_event_source *s, const char *ty);
+int sd_event_source_set_io_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec);
int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback);
int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret);
int sd_event_source_get_floating(sd_event_source *s);
test_fake_pressure("cpu", fake_cpu_pressure_wrapper);
}
+static int fake_io_pressure_wrapper(sd_event *e, sd_event_source **ret, sd_event_handler_t callback, void *userdata) {
+ return sd_event_add_io_pressure(e, ret, callback, userdata);
+}
+
+TEST(fake_io_pressure) {
+ test_fake_pressure("io", fake_io_pressure_wrapper);
+}
+
/* Shared infrastructure for real pressure tests */
struct real_pressure_context {
ASSERT_EQ(ex, 31);
}
+/* IO pressure real test */
+
+static int real_io_pressure_callback(sd_event_source *s, void *userdata) {
+ struct real_pressure_context *c = ASSERT_PTR(userdata);
+ const char *d;
+
+ ASSERT_NOT_NULL(s);
+ ASSERT_OK(sd_event_source_get_description(s, &d));
+
+ log_notice("real io pressure event: %s", d);
+
+ ASSERT_NOT_NULL(c->pid);
+ ASSERT_OK(sd_event_source_send_child_signal(c->pid, SIGKILL, NULL, 0));
+ c->pid = NULL;
+
+ return 0;
+}
+
+_noreturn_ static void real_pressure_eat_io(int pipe_fd) {
+ char x;
+ ASSERT_EQ(read(pipe_fd, &x, 1), 1); /* Wait for the GO! */
+
+ /* Write and fsync in a loop to generate IO pressure */
+ for (;;) {
+ _cleanup_close_ int fd = -EBADF;
+
+ fd = open("/var/tmp/.io-pressure-test", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0600);
+ if (fd < 0)
+ continue;
+
+ char buf[4096];
+ memset(buf, 'x', sizeof(buf));
+ for (int i = 0; i < 256; i++)
+ if (write(fd, buf, sizeof(buf)) < 0)
+ break;
+ (void) fsync(fd);
+ }
+}
+
+TEST(real_io_pressure) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *es = NULL, *cs = NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_close_pair_ int pipe_fd[2] = EBADF_PAIR;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_free_ char *scope = NULL;
+ const char *object;
+ int r;
+
+ if (getuid() == 0)
+ r = sd_bus_open_system(&bus);
+ else
+ r = sd_bus_open_user(&bus);
+ if (r < 0)
+ return (void) log_tests_skipped_errno(r, "can't connect to bus");
+
+ ASSERT_OK(bus_wait_for_jobs_new(bus, &w));
+
+ ASSERT_OK(bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit"));
+ ASSERT_OK(asprintf(&scope, "test-%" PRIu64 ".scope", random_u64()));
+ ASSERT_OK(sd_bus_message_append(m, "ss", scope, "fail"));
+ ASSERT_OK(sd_bus_message_open_container(m, 'a', "(sv)"));
+ ASSERT_OK(sd_bus_message_append(m, "(sv)", "PIDs", "au", 1, 0));
+ ASSERT_OK(sd_bus_message_append(m, "(sv)", "IOAccounting", "b", true));
+ ASSERT_OK(sd_bus_message_close_container(m));
+ ASSERT_OK(sd_bus_message_append(m, "a(sa(sv))", 0));
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return (void) log_tests_skipped_errno(r, "can't issue transient unit call");
+
+ ASSERT_OK(sd_bus_message_read(reply, "o", &object));
+
+ ASSERT_OK(bus_wait_for_jobs_one(w, object, /* flags= */ BUS_WAIT_JOBS_LOG_ERROR, /* extra_args= */ NULL));
+
+ ASSERT_OK(sd_event_default(&e));
+
+ ASSERT_OK_ERRNO(pipe2(pipe_fd, O_CLOEXEC));
+
+ _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
+ r = pidref_safe_fork("(eat-io)", FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM, &pidref);
+ ASSERT_OK(r);
+ if (r == 0) {
+ real_pressure_eat_io(pipe_fd[0]);
+ _exit(EXIT_SUCCESS);
+ }
+
+ ASSERT_OK(event_add_child_pidref(e, &cs, &pidref, WEXITED, real_pressure_child_callback, NULL));
+ ASSERT_OK(sd_event_source_set_child_process_own(cs, true));
+
+ ASSERT_OK_ERRNO(unsetenv("IO_PRESSURE_WATCH"));
+ ASSERT_OK_ERRNO(unsetenv("IO_PRESSURE_WRITE"));
+
+ struct real_pressure_context context = {
+ .pid = cs,
+ };
+
+ r = sd_event_add_io_pressure(e, &es, real_io_pressure_callback, &context);
+ if (r < 0)
+ return (void) log_tests_skipped_errno(r, "can't allocate io pressure fd");
+
+ ASSERT_OK(sd_event_source_set_description(es, "real pressure event source"));
+ ASSERT_OK_ZERO(sd_event_source_set_io_pressure_type(es, "some"));
+ /* Unprivileged writes require a minimum of 2s otherwise the kernel will refuse the write. */
+ ASSERT_OK_POSITIVE(sd_event_source_set_io_pressure_period(es, 70 * USEC_PER_MSEC, 2 * USEC_PER_SEC));
+ ASSERT_OK_ZERO(sd_event_source_set_io_pressure_period(es, 70 * USEC_PER_MSEC, 2 * USEC_PER_SEC));
+ ASSERT_OK(sd_event_source_set_enabled(es, SD_EVENT_ONESHOT));
+
+ m = sd_bus_message_unref(m);
+
+ ASSERT_OK(bus_message_new_method_call(bus, &m, bus_systemd_mgr, "SetUnitProperties"));
+ ASSERT_OK(sd_bus_message_append(m, "sb", scope, true));
+ ASSERT_OK(sd_bus_message_open_container(m, 'a', "(sv)"));
+ ASSERT_OK(sd_bus_message_open_container(m, 'r', "sv"));
+ ASSERT_OK(sd_bus_message_append(m, "s", "IOWriteBandwidthMax"));
+ ASSERT_OK(sd_bus_message_open_container(m, 'v', "a(st)"));
+ ASSERT_OK(sd_bus_message_append(m, "a(st)", 1, "/var/tmp", (uint64_t) 1024*1024)); /* 1M/s */
+ ASSERT_OK(sd_bus_message_close_container(m));
+ ASSERT_OK(sd_bus_message_close_container(m));
+ ASSERT_OK(sd_bus_message_close_container(m));
+
+ ASSERT_OK(sd_bus_call(bus, m, 0, NULL, NULL));
+
+ /* Now start eating IO */
+ ASSERT_EQ(write(pipe_fd[1], &(const char) { 'x' }, 1), 1);
+
+ ASSERT_OK(sd_event_loop(e));
+ int ex = 0;
+ ASSERT_OK(sd_event_get_exit_code(e, &ex));
+ ASSERT_EQ(ex, 31);
+}
+
static int outro(void) {
+ (void) unlink("/var/tmp/.io-pressure-test");
hashmap_trim_pools();
return 0;
}
rm "$SCRIPT"
+# Now test IO pressure
+
+if ! cat /proc/pressure/io >/dev/null ; then
+ echo "kernel has no IO PSI support." >&2
+ echo OK >/testok
+ exit 0
+fi
+
+if ! test -f "$CGROUP"/io.pressure ; then
+ echo "No IO accounting/PSI delegated via cgroup, can't test." >&2
+ echo OK >/testok
+ exit 0
+fi
+
+UNIT="test-iopress-$RANDOM.service"
+SCRIPT="/tmp/iopress-$RANDOM.sh"
+
+cat >"$SCRIPT" <<'EOF'
+#!/usr/bin/env bash
+
+set -ex
+
+export
+id
+
+test -n "$IO_PRESSURE_WATCH"
+test "$IO_PRESSURE_WATCH" != /dev/null
+test -w "$IO_PRESSURE_WATCH"
+
+ls -al "$IO_PRESSURE_WATCH"
+
+EXPECTED="$(echo -n -e "some 123000 2000000\x00" | base64)"
+
+test "$EXPECTED" = "$IO_PRESSURE_WRITE"
+
+EOF
+
+chmod +x "$SCRIPT"
+
+systemd-run \
+ -u "$UNIT" \
+ -p Type=exec \
+ -p ProtectControlGroups=1 \
+ -p DynamicUser=1 \
+ -p IOPressureWatch=on \
+ -p IOPressureThresholdSec=123ms \
+ -p BindPaths=$SCRIPT \
+ `# Make sanitizers happy when DynamicUser=1 pulls in instrumented systemd NSS modules` \
+ -p EnvironmentFile=-/usr/lib/systemd/systemd-asan-env \
+ --wait "$SCRIPT"
+
+rm "$SCRIPT"
+
touch /testok