]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: add RestartSteps= and RestartSecMax= for exponentially increasing
authorMike Yuan <me@yhndnzj.com>
Mon, 20 Mar 2023 10:49:39 +0000 (18:49 +0800)
committerMike Yuan <me@yhndnzj.com>
Mon, 27 Mar 2023 11:31:12 +0000 (19:31 +0800)
interval between restarts

RestartSteps= accepts a positive integer as the number of steps
to take to increase the interval between auto-restarts from
RestartSec= to RestartSecMax=, or 0 to disable it.

Closes #6129

man/org.freedesktop.systemd1.xml
man/systemd.service.xml
src/core/dbus-service.c
src/core/load-fragment-gperf.gperf.in
src/core/meson.build
src/core/service.c
src/core/service.h

index bc5f8ea388e6abdd70f992a8282878c8a47d3d7c..e7254ca305339c6391e54ad743ee85add116119b 100644 (file)
@@ -2564,6 +2564,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly t RestartUSec = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly u RestartSteps = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly t RestartUSecMax = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly t TimeoutStartUSec = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly t TimeoutStopUSec = ...;
@@ -3188,6 +3192,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <!--property RestartUSec is not documented!-->
 
+    <!--property RestartSteps is not documented!-->
+
+    <!--property RestartUSecMax is not documented!-->
+
     <!--property TimeoutStartFailureMode is not documented!-->
 
     <!--property TimeoutStopFailureMode is not documented!-->
@@ -3746,6 +3754,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <variablelist class="dbus-property" generated="True" extra-ref="RestartUSec"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="RestartSteps"/>
+
+    <variablelist class="dbus-property" generated="True" extra-ref="RestartUSecMax"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="TimeoutStartUSec"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="TimeoutStopUSec"/>
index a1a32e8cd905469ccfd1ecf0c385231a94b06017..e8be2ff46851f5c48e22bc62be3e287e550ae4ae 100644 (file)
         as "5min 20s". Defaults to 100ms.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>RestartSteps=</varname></term>
+        <listitem><para>Configures the number of steps to take to increase the interval
+        of auto-restarts from <varname>RestartSec=</varname> to <varname>RestartSecMax=</varname>.
+        Takes a positive integer or 0 to disable it. Defaults to 0.</para>
+
+        <para>This setting is effective only if <varname>RestartSecMax=</varname> is also set.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>RestartSecMax=</varname></term>
+        <listitem><para>Configures the longest time to sleep before restarting a service
+        as the interval goes up with <varname>RestartSteps=</varname>. Takes a value
+        in the same format as <varname>RestartSec=</varname>, or <literal>infinity</literal>
+        to disable the setting. Defaults to <literal>infinity</literal>.</para>
+
+        <para>This setting is effective only if <varname>RestartSteps=</varname> is also set.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>TimeoutStartSec=</varname></term>
         <listitem><para>Configures the time to wait for start-up. If a daemon service does not signal
index 24297b52a012981e5dba28330b57508edca8f03c..704e59cc607059ede2573cab9fb03fb5a6c7b126 100644 (file)
@@ -225,6 +225,8 @@ const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_PROPERTY("PIDFile", "s", NULL, offsetof(Service, pid_file), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NotifyAccess", "s", property_get_notify_access, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("RestartUSec", "t", bus_property_get_usec, offsetof(Service, restart_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("RestartSteps", "u", bus_property_get_unsigned, offsetof(Service, restart_steps), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("RestartUSecMax", "t", bus_property_get_usec, offsetof(Service, restart_usec_max), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TimeoutStartUSec", "t", bus_property_get_usec, offsetof(Service, timeout_start_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Service, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TimeoutAbortUSec", "t", property_get_timeout_abort_usec, 0, 0),
@@ -448,6 +450,12 @@ static int bus_service_set_transient_property(
         if (streq(name, "RestartUSec"))
                 return bus_set_transient_usec(u, name, &s->restart_usec, message, flags, error);
 
+        if (streq(name, "RestartSteps"))
+                return bus_set_transient_unsigned(u, name, &s->restart_steps, message, flags, error);
+
+        if (streq(name, "RestartUSecMax"))
+                return bus_set_transient_usec(u, name, &s->restart_usec_max, message, flags, error);
+
         if (streq(name, "TimeoutStartUSec")) {
                 r = bus_set_transient_usec(u, name, &s->timeout_start_usec, message, flags, error);
                 if (r >= 0 && !UNIT_WRITE_FLAGS_NOOP(flags))
index 9a3ec7faf69ccbeffee51d510785d8eefc2859fc..50ff57a9f8d689b543fa21453c67a6fe403cd477 100644 (file)
@@ -399,6 +399,8 @@ Service.ExecReload,                      config_parse_exec,
 Service.ExecStop,                        config_parse_exec,                           SERVICE_EXEC_STOP,                  offsetof(Service, exec_command)
 Service.ExecStopPost,                    config_parse_exec,                           SERVICE_EXEC_STOP_POST,             offsetof(Service, exec_command)
 Service.RestartSec,                      config_parse_sec,                            0,                                  offsetof(Service, restart_usec)
+Service.RestartSteps,                    config_parse_unsigned,                       0,                                  offsetof(Service, restart_steps)
+Service.RestartSecMax,                   config_parse_sec,                            0,                                  offsetof(Service, restart_usec_max)
 Service.TimeoutSec,                      config_parse_service_timeout,                0,                                  0
 Service.TimeoutStartSec,                 config_parse_service_timeout,                0,                                  0
 Service.TimeoutStopSec,                  config_parse_sec_fix_0,                      0,                                  offsetof(Service, timeout_stop_usec)
index e68c55917f8ae53f48a3f662f7c6d6e4cb3acd6d..af3eaa52d2b52b153014e4d549c6111e60c82f5d 100644 (file)
@@ -126,6 +126,7 @@ libcore = shared_library(
                         libblkid,
                         libdl,
                         libkmod,
+                        libm,
                         libmount,
                         libpam,
                         librt,
index 21d1bf6595b9e675d3f4bbadd7d73b99a3785d93..02d514d56a869793291195743255e30119f092f2 100644 (file)
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 
 #include <errno.h>
+#include <math.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
@@ -114,6 +115,7 @@ static void service_init(Unit *u) {
         s->timeout_abort_usec = u->manager->default_timeout_abort_usec;
         s->timeout_abort_set = u->manager->default_timeout_abort_set;
         s->restart_usec = u->manager->default_restart_usec;
+        s->restart_usec_max = USEC_INFINITY;
         s->runtime_max_usec = USEC_INFINITY;
         s->type = _SERVICE_TYPE_INVALID;
         s->socket_fd = -EBADF;
@@ -262,6 +264,38 @@ static void service_start_watchdog(Service *s) {
                 log_unit_warning_errno(UNIT(s), r, "Failed to install watchdog timer: %m");
 }
 
+usec_t service_restart_usec(Service *s) {
+        unsigned n_restarts;
+        long double unit;
+
+        assert(s);
+
+        /* s->n_restarts is not yet updated when we're in these states, so let's add 1 to it manually.
+         * Note that for SERVICE_AUTO_RESTART a restart job might have been enqueued,
+         * i.e. s->n_restarts is already increased. But we assume it's not since the time
+         * between job enqueuing and running is usually neglectable compared to the time
+         * we'll be sleeping. */
+        n_restarts = s->n_restarts +
+                     (IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART) ? 1 : 0);
+
+        /* n_restarts can equal to 0 if no restart has happened nor planned */
+        if (n_restarts <= 1 ||
+            s->restart_steps == 0 ||
+            s->restart_usec_max == USEC_INFINITY ||
+            s->restart_usec == s->restart_usec_max)
+                return s->restart_usec;
+
+        if (n_restarts > s->restart_steps)
+                return s->restart_usec_max;
+
+        /* Enforced in service_verify() and above */
+        assert(s->restart_usec_max > s->restart_usec);
+
+        unit = powl(s->restart_usec_max - s->restart_usec, 1.0L / s->restart_steps);
+
+        return usec_add(s->restart_usec, (usec_t) powl(unit, n_restarts - 1));
+}
+
 static void service_extend_event_source_timeout(Service *s, sd_event_source *source, usec_t extended) {
         usec_t current;
         int r;
@@ -644,6 +678,17 @@ static int service_verify(Service *s) {
         if (s->exit_type == SERVICE_EXIT_CGROUP && cg_unified() < CGROUP_UNIFIED_SYSTEMD)
                 log_unit_warning(UNIT(s), "Service has ExitType=cgroup set, but we are running with legacy cgroups v1, which might not work correctly. Continuing.");
 
+        if (s->restart_usec_max == USEC_INFINITY && s->restart_steps > 0)
+                log_unit_warning(UNIT(s), "Service has RestartSteps= but no RestartSecMax= setting. Ignoring.");
+
+        if (s->restart_usec_max != USEC_INFINITY && s->restart_steps == 0)
+                log_unit_warning(UNIT(s), "Service has RestartSecMax= but no RestartSteps= setting. Ignoring.");
+
+        if (s->restart_usec_max < s->restart_usec) {
+                log_unit_warning(UNIT(s), "RestartSecMax= has a value smaller than RestartSec=, resetting RestartSec= to RestartSecMax=.");
+                s->restart_usec = s->restart_usec_max;
+        }
+
         return 0;
 }
 
@@ -899,11 +944,15 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
 
         fprintf(f,
                 "%sRestartSec: %s\n"
+                "%sRestartSteps: %u\n"
+                "%sRestartSecMax: %s\n"
                 "%sTimeoutStartSec: %s\n"
                 "%sTimeoutStopSec: %s\n"
                 "%sTimeoutStartFailureMode: %s\n"
                 "%sTimeoutStopFailureMode: %s\n",
                 prefix, FORMAT_TIMESPAN(s->restart_usec, USEC_PER_SEC),
+                prefix, s->restart_steps,
+                prefix, FORMAT_TIMESPAN(s->restart_usec_max, USEC_PER_SEC),
                 prefix, FORMAT_TIMESPAN(s->timeout_start_usec, USEC_PER_SEC),
                 prefix, FORMAT_TIMESPAN(s->timeout_stop_usec, USEC_PER_SEC),
                 prefix, service_timeout_failure_mode_to_string(s->timeout_start_failure_mode),
@@ -1215,7 +1264,7 @@ static usec_t service_coldplug_timeout(Service *s) {
                 return usec_add(UNIT(s)->state_change_timestamp.monotonic, service_timeout_abort_usec(s));
 
         case SERVICE_AUTO_RESTART:
-                return usec_add(UNIT(s)->inactive_enter_timestamp.monotonic, s->restart_usec);
+                return usec_add(UNIT(s)->inactive_enter_timestamp.monotonic, service_restart_usec(s));
 
         case SERVICE_CLEANING:
                 return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->exec_context.timeout_clean_usec);
@@ -1901,7 +1950,7 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
         if (s->will_auto_restart) {
                 s->will_auto_restart = false;
 
-                r = service_arm_timer(s, /* relative= */ true, s->restart_usec);
+                r = service_arm_timer(s, /* relative= */ true, service_restart_usec(s));
                 if (r < 0) {
                         s->n_keep_fd_store--;
                         goto fail;
@@ -4116,8 +4165,8 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
         case SERVICE_AUTO_RESTART:
                 if (s->restart_usec > 0)
                         log_unit_debug(UNIT(s),
-                                       "Service RestartSec=%s expired, scheduling restart.",
-                                       FORMAT_TIMESPAN(s->restart_usec, USEC_PER_SEC));
+                                       "Service restart interval %s expired, scheduling restart.",
+                                       FORMAT_TIMESPAN(service_restart_usec(s), USEC_PER_SEC));
                 else
                         log_unit_debug(UNIT(s),
                                        "Service has no hold-off time (RestartSec=0), scheduling restart.");
index 7663f26f70a12a1f4ec797f405a6b1fdbbd9ff30..9d5b15d8c2e71a2b663408a5b0f86c37e6e360fd 100644 (file)
@@ -116,6 +116,8 @@ struct Service {
         char *pid_file;
 
         usec_t restart_usec;
+        unsigned restart_steps;
+        usec_t restart_usec_max;
         usec_t timeout_start_usec;
         usec_t timeout_stop_usec;
         usec_t timeout_abort_usec;
@@ -245,6 +247,8 @@ extern const UnitVTable service_vtable;
 int service_set_socket_fd(Service *s, int fd, struct Socket *socket, struct SocketPeer *peer, bool selinux_context_net);
 void service_close_socket_fd(Service *s);
 
+usec_t service_restart_usec(Service *s);
+
 const char* service_restart_to_string(ServiceRestart i) _const_;
 ServiceRestart service_restart_from_string(const char *s) _pure_;