]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
Merge pull request #29159 from poettering/socket-pause
authorLennart Poettering <lennart@poettering.net>
Tue, 19 Sep 2023 07:40:27 +0000 (09:40 +0200)
committerGitHub <noreply@github.com>
Tue, 19 Sep 2023 07:40:27 +0000 (09:40 +0200)
core: add new "PollLimit" settings to .socket units

man/org.freedesktop.systemd1.xml
man/systemd.socket.xml
src/core/dbus-socket.c
src/core/load-fragment-gperf.gperf.in
src/core/socket.c
src/core/socket.h
src/shared/bus-unit-util.c
test/TEST-07-PID1/test.sh
test/units/testsuite-07.poll-limit.sh [new file with mode: 0755]

index 4ca0583d65522517d09c3ccaa61476dc988b407c..47d4b4828b43e5d9535f9b22815977bc3641b480 100644 (file)
@@ -4735,6 +4735,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
       readonly t TriggerLimitIntervalUSec = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly u TriggerLimitBurst = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly t PollLimitIntervalUSec = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly u PollLimitBurst = ...;
       readonly u UID = ...;
       readonly u GID = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("invalidates")
@@ -5969,6 +5973,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
 
     <variablelist class="dbus-property" generated="True" extra-ref="TriggerLimitBurst"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="PollLimitIntervalUSec"/>
+
+    <variablelist class="dbus-property" generated="True" extra-ref="PollLimitBurst"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="UID"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="GID"/>
@@ -6505,6 +6513,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
 
     <!--End of Autogenerated section-->
 
+    <para><varname>PollLimitIntervalUSec</varname>/<varname>PollLimitBurst</varname> properties configure the
+    polling limit for the socket unit. Expects a time in µs, resp. an unsigned integer. If either is set to
+    zero the limiting feature is turned off.</para>
+
     <refsect2>
       <title>Properties</title>
 
index f260c4ed26a8cdb15e126ff7bc8b9401d4aac02c..6137d94a0cbe43d500c9001fb33e7d1414e67d02 100644 (file)
         <term><varname>TriggerLimitIntervalSec=</varname></term>
         <term><varname>TriggerLimitBurst=</varname></term>
 
-        <listitem><para>Configures a limit on how often this socket unit may be activated within a specific time
-        interval. The <varname>TriggerLimitIntervalSec=</varname> may be used to configure the length of the time
-        interval in the usual time units <literal>us</literal>, <literal>ms</literal>, <literal>s</literal>,
-        <literal>min</literal>, <literal>h</literal>, … and defaults to 2s (See
-        <citerefentry><refentrytitle>systemd.time</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details on
-        the various time units understood). The <varname>TriggerLimitBurst=</varname> setting takes a positive integer
-        value and specifies the number of permitted activations per time interval, and defaults to 200 for
-        <varname>Accept=yes</varname> sockets (thus by default permitting 200 activations per 2s), and 20 otherwise (20
-        activations per 2s). Set either to 0 to disable any form of trigger rate limiting. If the limit is hit, the
-        socket unit is placed into a failure mode, and will not be connectible anymore until restarted. Note that this
-        limit is enforced before the service activation is enqueued.</para>
+        <listitem><para>Configures a limit on how often this socket unit may be activated within a specific
+        time interval. The <varname>TriggerLimitIntervalSec=</varname> setting may be used to configure the
+        length of the time interval in the usual time units <literal>us</literal>, <literal>ms</literal>,
+        <literal>s</literal>, <literal>min</literal>, <literal>h</literal>, … and defaults to 2s (See
+        <citerefentry><refentrytitle>systemd.time</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
+        details on the various time units understood). The <varname>TriggerLimitBurst=</varname> setting
+        takes a positive integer value and specifies the number of permitted activations per time interval,
+        and defaults to 200 for <varname>Accept=yes</varname> sockets (thus by default permitting 200
+        activations per 2s), and 20 otherwise (20 activations per 2s). Set either to 0 to disable any form of
+        trigger rate limiting.</para>
+
+        <para>If the limit is hit, the socket unit is placed into a failure mode, and will not be connectible
+        anymore until restarted. Note that this limit is enforced before the service activation is
+        enqueued.</para>
+
+        <para>Compare with <varname>PollLimitIntervalSec=</varname>/<varname>PollLimitBurst=</varname>
+        described below, which implements a temporary slowdown if a socket unit is flooded with incoming
+        traffic, as opposed to the permanent failure state
+        <varname>TriggerLimitIntervalSec=</varname>/<varname>TriggerLimitBurst=</varname> results in.</para>
 
         <xi:include href="version-info.xml" xpointer="v230"/></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>PollLimitIntervalSec=</varname></term>
+        <term><varname>PollLimitBurst=</varname></term>
+
+        <listitem><para>Configures a limit on how often polling events on the file descriptors backing this
+        socket unit will be considered. This pair of settings is similar to
+        <varname>TriggerLimitIntervalSec=</varname>/<varname>TriggerLimitBurst=</varname> but instead of
+        putting a (fatal) limit on the activation frequency puts a (transient) limit on the polling
+        frequency. The expected parameter syntax and range are identical to that of the aforementioned
+        options, and can be disabled the same way.</para>
+
+        <para>If the polling limit is hit polling is temporarily disabled on it until the specified time
+        window passes. The polling limit hence slows down connection attempts if hit, but unlike the trigger
+        limit won't cause permanent failures. It's the recommended mechanism to deal with DoS attempts
+        through packet flooding.</para>
+
+        <para>The polling limit is enforced per file descriptor to listen on, as opposed to the trigger limit
+        which is enforced for the entire socket unit. This distinction matters for socket units that listen
+        on multiple file descriptors (i.e. have multiple <varname>ListenXYZ=</varname> stanzas).</para>
+
+        <para>These setting defaults to 150 (in case of <varname>Accept=yes</varname>) and 15 (otherwise)
+        polling events per 2s. This is considerably lower than the default values for the trigger limit (see
+        above) and means that the polling limit should typically ensure the trigger limit is never hit,
+        unless one of them is reconfigured or disabled.</para>
+
+        <xi:include href="version-info.xml" xpointer="v255"/></listitem>
+      </varlistentry>
+
     </variablelist>
 
     <xi:include href="systemd.service.xml" xpointer="shared-unit-options" />
index 09a3a9502b33f98ddafc13b9622c1483f11bd8cb..04552b7c60bd68bca1583cffee07513a93d36696 100644 (file)
@@ -129,6 +129,8 @@ const sd_bus_vtable bus_socket_vtable[] = {
         SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("PollLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, poll_limit_interval), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("PollLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, poll_limit_burst), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
@@ -248,6 +250,9 @@ static int bus_socket_set_transient_property(
         if (streq(name, "TriggerLimitBurst"))
                 return bus_set_transient_unsigned(u, name, &s->trigger_limit.burst, message, flags, error);
 
+        if (streq(name, "PollLimitBurst"))
+                return bus_set_transient_unsigned(u, name, &s->poll_limit_burst, message, flags, error);
+
         if (streq(name, "SocketMode"))
                 return bus_set_transient_mode_t(u, name, &s->socket_mode, message, flags, error);
 
@@ -275,6 +280,9 @@ static int bus_socket_set_transient_property(
         if (streq(name, "TriggerLimitIntervalUSec"))
                 return bus_set_transient_usec(u, name, &s->trigger_limit.interval, message, flags, error);
 
+        if (streq(name, "PollLimitIntervalUSec"))
+                return bus_set_transient_usec(u, name, &s->poll_limit_interval, message, flags, error);
+
         if (streq(name, "SmackLabel"))
                 return bus_set_transient_string(u, name, &s->smack, message, flags, error);
 
index b66adf281193617c56b70b462cee4ff29d8003c1..0d1ee9c231aa1c91eccb6dd63e1c6c8c0dcb5d77 100644 (file)
@@ -507,6 +507,8 @@ Socket.FileDescriptorName,               config_parse_fdname,
 Socket.Service,                          config_parse_socket_service,                 0,                                  0
 Socket.TriggerLimitIntervalSec,          config_parse_sec,                            0,                                  offsetof(Socket, trigger_limit.interval)
 Socket.TriggerLimitBurst,                config_parse_unsigned,                       0,                                  offsetof(Socket, trigger_limit.burst)
+Socket.PollLimitIntervalSec,             config_parse_sec,                            0,                                  offsetof(Socket, poll_limit_interval)
+Socket.PollLimitBurst,                   config_parse_unsigned,                       0,                                  offsetof(Socket, poll_limit_burst)
 {% if ENABLE_SMACK %}
 Socket.SmackLabel,                       config_parse_unit_string_printf,             0,                                  offsetof(Socket, smack)
 Socket.SmackLabelIPIn,                   config_parse_unit_string_printf,             0,                                  offsetof(Socket, smack_ip_in)
index 3304af831dbe24ee2111b40940e1d8c4043d5527..43f412bc895fcd0f362cbafde9926c9ce771755f 100644 (file)
@@ -102,6 +102,9 @@ static void socket_init(Unit *u) {
 
         s->trigger_limit.interval = USEC_INFINITY;
         s->trigger_limit.burst = UINT_MAX;
+
+        s->poll_limit_interval = USEC_INFINITY;
+        s->poll_limit_burst = UINT_MAX;
 }
 
 static void socket_unwatch_control_pid(Socket *s) {
@@ -307,17 +310,20 @@ static int socket_add_extras(Socket *s) {
          * off the queues, which it might not necessarily do. Moreover, while Accept=no services are supposed to
          * process whatever is queued in one go, and thus should normally never have to be started frequently. This is
          * different for Accept=yes where each connection is processed by a new service instance, and thus frequent
-         * service starts are typical. */
+         * service starts are typical.
+         *
+         * For the poll limit we follow a similar rule, but use 3/4th of the trigger limit parameters, to
+         * trigger this earlier. */
 
         if (s->trigger_limit.interval == USEC_INFINITY)
                 s->trigger_limit.interval = 2 * USEC_PER_SEC;
+        if (s->trigger_limit.burst == UINT_MAX)
+                s->trigger_limit.burst = s->accept ? 200 : 20;
 
-        if (s->trigger_limit.burst == UINT_MAX) {
-                if (s->accept)
-                        s->trigger_limit.burst = 200;
-                else
-                        s->trigger_limit.burst = 20;
-        }
+        if (s->poll_limit_interval == USEC_INFINITY)
+                s->poll_limit_interval = 2 * USEC_PER_SEC;
+        if (s->poll_limit_burst == UINT_MAX)
+                s->poll_limit_burst = s->accept ? 150 : 15;
 
         if (have_non_accept_socket(s)) {
 
@@ -767,9 +773,13 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
 
         fprintf(f,
                 "%sTriggerLimitIntervalSec: %s\n"
-                "%sTriggerLimitBurst: %u\n",
+                "%sTriggerLimitBurst: %u\n"
+                "%sPollLimitIntervalSec: %s\n"
+                "%sPollLimitBurst: %u\n",
                 prefix, FORMAT_TIMESPAN(s->trigger_limit.interval, USEC_PER_SEC),
-                prefix, s->trigger_limit.burst);
+                prefix, s->trigger_limit.burst,
+                prefix, FORMAT_TIMESPAN(s->poll_limit_interval, USEC_PER_SEC),
+                prefix, s->poll_limit_burst);
 
         str = ip_protocol_to_name(s->socket_protocol);
         if (str)
@@ -1761,6 +1771,10 @@ static int socket_watch_fds(Socket *s) {
 
                         (void) sd_event_source_set_description(p->event_source, "socket-port-io");
                 }
+
+                r = sd_event_source_set_ratelimit(p->event_source, s->poll_limit_interval, s->poll_limit_burst);
+                if (r < 0)
+                        log_unit_debug_errno(UNIT(s), r, "Failed to set poll limit on I/O event source, ignoring: %m");
         }
 
         return 0;
index 03b11c1a692dbcdbcdf6ee41145b8db933aba53b..0b82141659a081fe8c62662be4c51e52f4515da5 100644 (file)
@@ -159,6 +159,8 @@ struct Socket {
         char *fdname;
 
         RateLimit trigger_limit;
+        usec_t poll_limit_interval;
+        unsigned poll_limit_burst;
 };
 
 SocketPeer *socket_peer_ref(SocketPeer *p);
index ad1957d9d741fbce74a88da405db436c3abdd324..e2d6bfebf320fb335ad3f6dbbe33cfd3e101cef4 100644 (file)
@@ -2165,10 +2165,10 @@ static int bus_append_path_property(sd_bus_message *m, const char *field, const
                 return 1;
         }
 
-        if (streq(field, "TriggerLimitBurst"))
+        if (STR_IN_SET(field, "TriggerLimitBurst", "PollLimitBurst"))
                 return bus_append_safe_atou(m, field, eq);
 
-        if (streq(field, "TriggerLimitIntervalSec"))
+        if (STR_IN_SET(field, "TriggerLimitIntervalSec", "PollLimitIntervalSec"))
                 return bus_append_parse_sec_rename(m, field, eq);
 
         return 0;
@@ -2377,7 +2377,8 @@ static int bus_append_socket_property(sd_bus_message *m, const char *field, cons
                               "MaxConnections",
                               "MaxConnectionsPerSource",
                               "KeepAliveProbes",
-                              "TriggerLimitBurst"))
+                              "TriggerLimitBurst",
+                              "PollLimitBurst"))
                 return bus_append_safe_atou(m, field, eq);
 
         if (STR_IN_SET(field, "SocketMode",
@@ -2392,7 +2393,8 @@ static int bus_append_socket_property(sd_bus_message *m, const char *field, cons
                               "KeepAliveTimeSec",
                               "KeepAliveIntervalSec",
                               "DeferAcceptSec",
-                              "TriggerLimitIntervalSec"))
+                              "TriggerLimitIntervalSec",
+                              "PollLimitIntervalSec"))
                 return bus_append_parse_sec_rename(m, field, eq);
 
         if (STR_IN_SET(field, "ReceiveBuffer",
index 1f9a9294667bd5b1c6c94e626a71c1fcb92d0d2a..b3421520f65c4b839dba5bad651e646680874cbf 100755 (executable)
@@ -32,6 +32,8 @@ Alias=issue2730-alias.mount
 EOF
     "${SYSTEMCTL:?}" enable --root="$workspace" issue2730.mount
     ln -svrf "$workspace/etc/systemd/system/issue2730.mount" "$workspace/etc/systemd/system/issue2730-alias.mount"
+
+    image_install logger
 }
 
 do_test "$@"
diff --git a/test/units/testsuite-07.poll-limit.sh b/test/units/testsuite-07.poll-limit.sh
new file mode 100755 (executable)
index 0000000..480d7ee
--- /dev/null
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: LGPL-2.1-or-later
+set -eux
+set -o pipefail
+
+systemd-analyze log-level debug
+
+cat > /run/systemd/system/floodme@.service <<EOF
+[Service]
+ExecStart=/bin/true
+EOF
+
+cat > /run/systemd/system/floodme.socket <<EOF
+[Socket]
+ListenStream=/tmp/floodme
+PollLimitIntervalSec=10s
+Accept=yes
+PollLimitBurst=3
+EOF
+
+systemctl daemon-reload
+systemctl start floodme.socket
+
+START=$(date +%s%N)
+
+# Trigger this 100 times in a flood
+for (( i=0 ; i < 100; i++ )) ; do
+    logger -u /tmp/floodme foo &
+done
+
+# Let some time pass
+sleep 5
+
+END=$(date +%s%N)
+
+PASSED=$((END-START))
+
+# Calculate (round up) how many trigger events could have happened in the passed time
+MAXCOUNT=$(((PASSED+10000000000)*3/10000000000))
+
+# We started 100 connection attempts, but only 3 should have gone through, as per limit
+test "$(systemctl show -P NAccepted floodme.socket)" -le "$MAXCOUNT"
+
+systemctl stop floodme.socket floodme@*.service
+
+rm /run/systemd/system/floodme@.service /run/systemd/system/floodme.socket /tmp/floodme
+
+systemctl daemon-reload