Merge pull request #29159 from poettering/socket-pause

author Lennart Poettering <lennart@poettering.net>

Tue, 19 Sep 2023 07:40:27 +0000 (09:40 +0200)

committer GitHub <noreply@github.com>

Tue, 19 Sep 2023 07:40:27 +0000 (09:40 +0200)
author Lennart Poettering <lennart@poettering.net>
Tue, 19 Sep 2023 07:40:27 +0000 (09:40 +0200)
committer GitHub <noreply@github.com>
Tue, 19 Sep 2023 07:40:27 +0000 (09:40 +0200)
diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml

index 4ca0583d65522517d09c3ccaa61476dc988b407c..47d4b4828b43e5d9535f9b22815977bc3641b480 100644 (file)
--- a/man/org.freedesktop.systemd1.xml
+++ b/man/org.freedesktop.systemd1.xml
@@ -4735,6 +4735,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
        readonly t TriggerLimitIntervalUSec = ...;
        @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
        readonly u TriggerLimitBurst = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly t PollLimitIntervalUSec = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly u PollLimitBurst = ...;
        readonly u UID = ...;
        readonly u GID = ...;
        @org.freedesktop.DBus.Property.EmitsChangedSignal("invalidates")
@@ -5969,6 +5973,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
  
      <variablelist class="dbus-property" generated="True" extra-ref="TriggerLimitBurst"/>
  
+    <variablelist class="dbus-property" generated="True" extra-ref="PollLimitIntervalUSec"/>
+
+    <variablelist class="dbus-property" generated="True" extra-ref="PollLimitBurst"/>
+
      <variablelist class="dbus-property" generated="True" extra-ref="UID"/>
  
      <variablelist class="dbus-property" generated="True" extra-ref="GID"/>
@@ -6505,6 +6513,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
  
      <!--End of Autogenerated section-->
  
+    <para><varname>PollLimitIntervalUSec</varname>/<varname>PollLimitBurst</varname> properties configure the
+    polling limit for the socket unit. Expects a time in µs, resp. an unsigned integer. If either is set to
+    zero the limiting feature is turned off.</para>
+
      <refsect2>
        <title>Properties</title>
  
diff --git a/man/systemd.socket.xml b/man/systemd.socket.xml

index f260c4ed26a8cdb15e126ff7bc8b9401d4aac02c..6137d94a0cbe43d500c9001fb33e7d1414e67d02 100644 (file)
--- a/man/systemd.socket.xml
+++ b/man/systemd.socket.xml
@@ -868,21 +868,57 @@
          <term><varname>TriggerLimitIntervalSec=</varname></term>
          <term><varname>TriggerLimitBurst=</varname></term>
  
-        <listitem><para>Configures a limit on how often this socket unit may be activated within a specific time
-        interval. The <varname>TriggerLimitIntervalSec=</varname> may be used to configure the length of the time
-        interval in the usual time units <literal>us</literal>, <literal>ms</literal>, <literal>s</literal>,
-        <literal>min</literal>, <literal>h</literal>, … and defaults to 2s (See
-        <citerefentry><refentrytitle>systemd.time</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details on
-        the various time units understood). The <varname>TriggerLimitBurst=</varname> setting takes a positive integer
-        value and specifies the number of permitted activations per time interval, and defaults to 200 for
-        <varname>Accept=yes</varname> sockets (thus by default permitting 200 activations per 2s), and 20 otherwise (20
-        activations per 2s). Set either to 0 to disable any form of trigger rate limiting. If the limit is hit, the
-        socket unit is placed into a failure mode, and will not be connectible anymore until restarted. Note that this
-        limit is enforced before the service activation is enqueued.</para>
+        <listitem><para>Configures a limit on how often this socket unit may be activated within a specific
+        time interval. The <varname>TriggerLimitIntervalSec=</varname> setting may be used to configure the
+        length of the time interval in the usual time units <literal>us</literal>, <literal>ms</literal>,
+        <literal>s</literal>, <literal>min</literal>, <literal>h</literal>, … and defaults to 2s (See
+        <citerefentry><refentrytitle>systemd.time</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
+        details on the various time units understood). The <varname>TriggerLimitBurst=</varname> setting
+        takes a positive integer value and specifies the number of permitted activations per time interval,
+        and defaults to 200 for <varname>Accept=yes</varname> sockets (thus by default permitting 200
+        activations per 2s), and 20 otherwise (20 activations per 2s). Set either to 0 to disable any form of
+        trigger rate limiting.</para>
+
+        <para>If the limit is hit, the socket unit is placed into a failure mode, and will not be connectible
+        anymore until restarted. Note that this limit is enforced before the service activation is
+        enqueued.</para>
+
+        <para>Compare with <varname>PollLimitIntervalSec=</varname>/<varname>PollLimitBurst=</varname>
+        described below, which implements a temporary slowdown if a socket unit is flooded with incoming
+        traffic, as opposed to the permanent failure state
+        <varname>TriggerLimitIntervalSec=</varname>/<varname>TriggerLimitBurst=</varname> results in.</para>
  
          <xi:include href="version-info.xml" xpointer="v230"/></listitem>
        </varlistentry>
  
+      <varlistentry>
+        <term><varname>PollLimitIntervalSec=</varname></term>
+        <term><varname>PollLimitBurst=</varname></term>
+
+        <listitem><para>Configures a limit on how often polling events on the file descriptors backing this
+        socket unit will be considered. This pair of settings is similar to
+        <varname>TriggerLimitIntervalSec=</varname>/<varname>TriggerLimitBurst=</varname> but instead of
+        putting a (fatal) limit on the activation frequency puts a (transient) limit on the polling
+        frequency. The expected parameter syntax and range are identical to that of the aforementioned
+        options, and can be disabled the same way.</para>
+
+        <para>If the polling limit is hit polling is temporarily disabled on it until the specified time
+        window passes. The polling limit hence slows down connection attempts if hit, but unlike the trigger
+        limit won't cause permanent failures. It's the recommended mechanism to deal with DoS attempts
+        through packet flooding.</para>
+
+        <para>The polling limit is enforced per file descriptor to listen on, as opposed to the trigger limit
+        which is enforced for the entire socket unit. This distinction matters for socket units that listen
+        on multiple file descriptors (i.e. have multiple <varname>ListenXYZ=</varname> stanzas).</para>
+
+        <para>These setting defaults to 150 (in case of <varname>Accept=yes</varname>) and 15 (otherwise)
+        polling events per 2s. This is considerably lower than the default values for the trigger limit (see
+        above) and means that the polling limit should typically ensure the trigger limit is never hit,
+        unless one of them is reconfigured or disabled.</para>
+
+        <xi:include href="version-info.xml" xpointer="v255"/></listitem>
+      </varlistentry>
+
      </variablelist>
  
      <xi:include href="systemd.service.xml" xpointer="shared-unit-options" />
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c

index 09a3a9502b33f98ddafc13b9622c1483f11bd8cb..04552b7c60bd68bca1583cffee07513a93d36696 100644 (file)
--- a/src/core/dbus-socket.c
+++ b/src/core/dbus-socket.c
@@ -129,6 +129,8 @@ const sd_bus_vtable bus_socket_vtable[] = {
          SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST),
          SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
          SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("PollLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, poll_limit_interval), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("PollLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, poll_limit_burst), SD_BUS_VTABLE_PROPERTY_CONST),
          SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
          SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
          BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
@@ -248,6 +250,9 @@ static int bus_socket_set_transient_property(
          if (streq(name, "TriggerLimitBurst"))
                  return bus_set_transient_unsigned(u, name, &s->trigger_limit.burst, message, flags, error);
  
+        if (streq(name, "PollLimitBurst"))
+                return bus_set_transient_unsigned(u, name, &s->poll_limit_burst, message, flags, error);
+
          if (streq(name, "SocketMode"))
                  return bus_set_transient_mode_t(u, name, &s->socket_mode, message, flags, error);
  
@@ -275,6 +280,9 @@ static int bus_socket_set_transient_property(
          if (streq(name, "TriggerLimitIntervalUSec"))
                  return bus_set_transient_usec(u, name, &s->trigger_limit.interval, message, flags, error);
  
+        if (streq(name, "PollLimitIntervalUSec"))
+                return bus_set_transient_usec(u, name, &s->poll_limit_interval, message, flags, error);
+
          if (streq(name, "SmackLabel"))
                  return bus_set_transient_string(u, name, &s->smack, message, flags, error);
  
diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in

index b66adf281193617c56b70b462cee4ff29d8003c1..0d1ee9c231aa1c91eccb6dd63e1c6c8c0dcb5d77 100644 (file)
--- a/src/core/load-fragment-gperf.gperf.in
+++ b/src/core/load-fragment-gperf.gperf.in
@@ -507,6 +507,8 @@ Socket.FileDescriptorName,               config_parse_fdname,
  Socket.Service,                          config_parse_socket_service,                 0,                                  0
  Socket.TriggerLimitIntervalSec,          config_parse_sec,                            0,                                  offsetof(Socket, trigger_limit.interval)
  Socket.TriggerLimitBurst,                config_parse_unsigned,                       0,                                  offsetof(Socket, trigger_limit.burst)
+Socket.PollLimitIntervalSec,             config_parse_sec,                            0,                                  offsetof(Socket, poll_limit_interval)
+Socket.PollLimitBurst,                   config_parse_unsigned,                       0,                                  offsetof(Socket, poll_limit_burst)
  {% if ENABLE_SMACK %}
  Socket.SmackLabel,                       config_parse_unit_string_printf,             0,                                  offsetof(Socket, smack)
  Socket.SmackLabelIPIn,                   config_parse_unit_string_printf,             0,                                  offsetof(Socket, smack_ip_in)
diff --git a/src/core/socket.c b/src/core/socket.c

index 3304af831dbe24ee2111b40940e1d8c4043d5527..43f412bc895fcd0f362cbafde9926c9ce771755f 100644 (file)
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -102,6 +102,9 @@ static void socket_init(Unit *u) {
  
          s->trigger_limit.interval = USEC_INFINITY;
          s->trigger_limit.burst = UINT_MAX;
+
+        s->poll_limit_interval = USEC_INFINITY;
+        s->poll_limit_burst = UINT_MAX;
  }
  
  static void socket_unwatch_control_pid(Socket *s) {
@@ -307,17 +310,20 @@ static int socket_add_extras(Socket *s) {
           * off the queues, which it might not necessarily do. Moreover, while Accept=no services are supposed to
           * process whatever is queued in one go, and thus should normally never have to be started frequently. This is
           * different for Accept=yes where each connection is processed by a new service instance, and thus frequent
-         * service starts are typical. */
+         * service starts are typical.
+         *
+         * For the poll limit we follow a similar rule, but use 3/4th of the trigger limit parameters, to
+         * trigger this earlier. */
  
          if (s->trigger_limit.interval == USEC_INFINITY)
                  s->trigger_limit.interval = 2 * USEC_PER_SEC;
+        if (s->trigger_limit.burst == UINT_MAX)
+                s->trigger_limit.burst = s->accept ? 200 : 20;
  
-        if (s->trigger_limit.burst == UINT_MAX) {
-                if (s->accept)
-                        s->trigger_limit.burst = 200;
-                else
-                        s->trigger_limit.burst = 20;
-        }
+        if (s->poll_limit_interval == USEC_INFINITY)
+                s->poll_limit_interval = 2 * USEC_PER_SEC;
+        if (s->poll_limit_burst == UINT_MAX)
+                s->poll_limit_burst = s->accept ? 150 : 15;
  
          if (have_non_accept_socket(s)) {
  
@@ -767,9 +773,13 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
  
          fprintf(f,
                  "%sTriggerLimitIntervalSec: %s\n"
-                "%sTriggerLimitBurst: %u\n",
+                "%sTriggerLimitBurst: %u\n"
+                "%sPollLimitIntervalSec: %s\n"
+                "%sPollLimitBurst: %u\n",
                  prefix, FORMAT_TIMESPAN(s->trigger_limit.interval, USEC_PER_SEC),
-                prefix, s->trigger_limit.burst);
+                prefix, s->trigger_limit.burst,
+                prefix, FORMAT_TIMESPAN(s->poll_limit_interval, USEC_PER_SEC),
+                prefix, s->poll_limit_burst);
  
          str = ip_protocol_to_name(s->socket_protocol);
          if (str)
@@ -1761,6 +1771,10 @@ static int socket_watch_fds(Socket *s) {
  
                          (void) sd_event_source_set_description(p->event_source, "socket-port-io");
                  }
+
+                r = sd_event_source_set_ratelimit(p->event_source, s->poll_limit_interval, s->poll_limit_burst);
+                if (r < 0)
+                        log_unit_debug_errno(UNIT(s), r, "Failed to set poll limit on I/O event source, ignoring: %m");
          }
  
          return 0;
diff --git a/src/core/socket.h b/src/core/socket.h

index 03b11c1a692dbcdbcdf6ee41145b8db933aba53b..0b82141659a081fe8c62662be4c51e52f4515da5 100644 (file)
--- a/src/core/socket.h
+++ b/src/core/socket.h
@@ -159,6 +159,8 @@ struct Socket {
          char *fdname;
  
          RateLimit trigger_limit;
+        usec_t poll_limit_interval;
+        unsigned poll_limit_burst;
  };
  
  SocketPeer *socket_peer_ref(SocketPeer *p);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c

index ad1957d9d741fbce74a88da405db436c3abdd324..e2d6bfebf320fb335ad3f6dbbe33cfd3e101cef4 100644 (file)
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -2165,10 +2165,10 @@ static int bus_append_path_property(sd_bus_message *m, const char *field, const
                  return 1;
          }
  
-        if (streq(field, "TriggerLimitBurst"))
+        if (STR_IN_SET(field, "TriggerLimitBurst", "PollLimitBurst"))
                  return bus_append_safe_atou(m, field, eq);
  
-        if (streq(field, "TriggerLimitIntervalSec"))
+        if (STR_IN_SET(field, "TriggerLimitIntervalSec", "PollLimitIntervalSec"))
                  return bus_append_parse_sec_rename(m, field, eq);
  
          return 0;
@@ -2377,7 +2377,8 @@ static int bus_append_socket_property(sd_bus_message *m, const char *field, cons
                                "MaxConnections",
                                "MaxConnectionsPerSource",
                                "KeepAliveProbes",
-                              "TriggerLimitBurst"))
+                              "TriggerLimitBurst",
+                              "PollLimitBurst"))
                  return bus_append_safe_atou(m, field, eq);
  
          if (STR_IN_SET(field, "SocketMode",
@@ -2392,7 +2393,8 @@ static int bus_append_socket_property(sd_bus_message *m, const char *field, cons
                                "KeepAliveTimeSec",
                                "KeepAliveIntervalSec",
                                "DeferAcceptSec",
-                              "TriggerLimitIntervalSec"))
+                              "TriggerLimitIntervalSec",
+                              "PollLimitIntervalSec"))
                  return bus_append_parse_sec_rename(m, field, eq);
  
          if (STR_IN_SET(field, "ReceiveBuffer",
diff --git a/test/TEST-07-PID1/test.sh b/test/TEST-07-PID1/test.sh

index 1f9a9294667bd5b1c6c94e626a71c1fcb92d0d2a..b3421520f65c4b839dba5bad651e646680874cbf 100755 (executable)
--- a/test/TEST-07-PID1/test.sh
+++ b/test/TEST-07-PID1/test.sh
@@ -32,6 +32,8 @@ Alias=issue2730-alias.mount
  EOF
      "${SYSTEMCTL:?}" enable --root="$workspace" issue2730.mount
      ln -svrf "$workspace/etc/systemd/system/issue2730.mount" "$workspace/etc/systemd/system/issue2730-alias.mount"
+
+    image_install logger
  }
  
  do_test "$@"
diff --git a/test/units/testsuite-07.poll-limit.sh b/test/units/testsuite-07.poll-limit.sh

new file mode 100755 (executable)

index 0000000..480d7ee
--- /dev/null
+++ b/test/units/testsuite-07.poll-limit.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: LGPL-2.1-or-later
+set -eux
+set -o pipefail
+
+systemd-analyze log-level debug
+
+cat > /run/systemd/system/floodme@.service <<EOF
+[Service]
+ExecStart=/bin/true
+EOF
+
+cat > /run/systemd/system/floodme.socket <<EOF
+[Socket]
+ListenStream=/tmp/floodme
+PollLimitIntervalSec=10s
+Accept=yes
+PollLimitBurst=3
+EOF
+
+systemctl daemon-reload
+systemctl start floodme.socket
+
+START=$(date +%s%N)
+
+# Trigger this 100 times in a flood
+for (( i=0 ; i < 100; i++ )) ; do
+    logger -u /tmp/floodme foo &
+done
+
+# Let some time pass
+sleep 5
+
+END=$(date +%s%N)
+
+PASSED=$((END-START))
+
+# Calculate (round up) how many trigger events could have happened in the passed time
+MAXCOUNT=$(((PASSED+10000000000)*3/10000000000))
+
+# We started 100 connection attempts, but only 3 should have gone through, as per limit
+test "$(systemctl show -P NAccepted floodme.socket)" -le "$MAXCOUNT"
+
+systemctl stop floodme.socket floodme@*.service
+
+rm /run/systemd/system/floodme@.service /run/systemd/system/floodme.socket /tmp/floodme
+
+systemctl daemon-reload
author	Lennart Poettering <lennart@poettering.net>
	Tue, 19 Sep 2023 07:40:27 +0000 (09:40 +0200)
committer	GitHub <noreply@github.com>
	Tue, 19 Sep 2023 07:40:27 +0000 (09:40 +0200)
man/org.freedesktop.systemd1.xml		patch \| blob \| blame \| history
man/systemd.socket.xml		patch \| blob \| blame \| history
src/core/dbus-socket.c		patch \| blob \| blame \| history
src/core/load-fragment-gperf.gperf.in		patch \| blob \| blame \| history
src/core/socket.c		patch \| blob \| blame \| history
src/core/socket.h		patch \| blob \| blame \| history
src/shared/bus-unit-util.c		patch \| blob \| blame \| history
test/TEST-07-PID1/test.sh		patch \| blob \| blame \| history
test/units/testsuite-07.poll-limit.sh	[new file with mode: 0755]	patch \| blob