]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: add RestartRandomizedDelaySec= service option
authorMichael Vogt <michael@amutable.com>
Wed, 27 May 2026 16:32:04 +0000 (18:32 +0200)
committerLennart Poettering <lennart@poettering.net>
Sat, 20 Jun 2026 10:31:28 +0000 (12:31 +0200)
We already support exponential backoff for automatic restarts via
RestartSec=/RestartSteps=/RestartMaxDelaySec=, but there is no way to
randomize the restart delay. When many instances of a service fail at
the same time (e.g. because a shared resource briefly went away) they
are all restarted in lockstep, creating a thundering herd problem.

So this commit adds a simple `RestartRandomizedDelaySec=` service
option which is similar to the timer `RandomizedDelaySec=` and
adds a randomized restart delay.

man/org.freedesktop.systemd1.xml
man/systemd.service.xml
src/core/dbus-service.c
src/core/load-fragment-gperf.gperf.in
src/core/service.c
src/core/service.h
src/core/varlink-service.c
src/shared/bus-unit-util.c
src/shared/varlink-io.systemd.Unit.c
test/units/TEST-03-JOBS.sh

index de4a6e229fab20c22d4f62888b659b1778925636..d3c2e39a18c2d25da092dbcaebe0a49c0c171654 100644 (file)
@@ -2846,6 +2846,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
       readonly u RestartSteps = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly t RestartMaxDelayUSec = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly t RestartRandomizedDelayUSec = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
       readonly t RestartUSecNext = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@@ -3578,6 +3580,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <!--property RestartMaxDelayUSec is not documented!-->
 
+    <!--property RestartRandomizedDelayUSec is not documented!-->
+
     <!--property RestartUSecNext is not documented!-->
 
     <!--property TimeoutStartFailureMode is not documented!-->
@@ -4214,6 +4218,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <variablelist class="dbus-property" generated="True" extra-ref="RestartMaxDelayUSec"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="RestartRandomizedDelayUSec"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="RestartUSecNext"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="TimeoutStartUSec"/>
@@ -12857,6 +12863,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <varname>CPUSetPartition</varname>, and
       <varname>OOMRules</varname> were added in version 261.</para>
       <para><varname>LUOSession</varname> was added in version 262.</para>
+      <para><varname>RestartRandomizedDelayUSec</varname> was added in version 262.</para>
     </refsect2>
     <refsect2>
       <title>Socket Unit Objects</title>
index 0ae6086feb8a9bda6873d294afcdd2b796ead54d..965e57a817f1ee4b14523baaeef17e52709183e1 100644 (file)
@@ -642,6 +642,22 @@ RestartMaxDelaySec=160s</programlisting>
         <xi:include href="version-info.xml" xpointer="v254"/></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>RestartRandomizedDelaySec=</varname></term>
+        <listitem><para>Delay automatic restarts by a randomly selected, evenly distributed amount of time
+        between 0 and the specified time value, added on top of the delay otherwise configured via
+        <varname>RestartSec=</varname> (and <varname>RestartSteps=</varname>/<varname>RestartMaxDelaySec=</varname>,
+        if used). Takes a value in the same format as <varname>RestartSec=</varname>. Defaults to 0, indicating
+        that no randomized delay shall be applied.</para>
+
+        <para>This setting is useful to stretch out the restarts of similarly configured service instances that
+        fail at the same time, to prevent them from restarting simultaneously and possibly resulting in
+        resource congestion. It is the restart-side analogue of <varname>RandomizedDelaySec=</varname> in
+        <citerefentry><refentrytitle>systemd.timer</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
+
+        <xi:include href="version-info.xml" xpointer="v262"/></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>TimeoutStartSec=</varname></term>
         <listitem><para>Configures the time to wait for start-up. If a daemon service does not signal
index 5fc06df714b7f77a15bc3fb990da08ee50113b97..923249cec89aa9c82307aff7bd4981d82db9d190 100644 (file)
@@ -358,6 +358,7 @@ const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_PROPERTY("RestartUSec", "t", bus_property_get_usec, offsetof(Service, restart_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RestartSteps", "u", bus_property_get_unsigned, offsetof(Service, restart_steps), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RestartMaxDelayUSec", "t", bus_property_get_usec, offsetof(Service, restart_max_delay_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("RestartRandomizedDelayUSec", "t", bus_property_get_usec, offsetof(Service, restart_randomized_delay_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RestartUSecNext", "t", property_get_restart_usec_next, 0, 0),
         SD_BUS_PROPERTY("TimeoutStartUSec", "t", bus_property_get_usec, offsetof(Service, timeout_start_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Service, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -611,6 +612,9 @@ static int bus_service_set_transient_property(
         if (streq(name, "RestartMaxDelayUSec"))
                 return bus_set_transient_usec(u, name, &s->restart_max_delay_usec, message, flags, reterr_error);
 
+        if (streq(name, "RestartRandomizedDelayUSec"))
+                return bus_set_transient_usec(u, name, &s->restart_randomized_delay_usec, message, flags, reterr_error);
+
         if (streq(name, "TimeoutStartUSec")) {
                 r = bus_set_transient_usec(u, name, &s->timeout_start_usec, message, flags, reterr_error);
                 if (r >= 0 && !UNIT_WRITE_FLAGS_NOOP(flags))
index fcfdbd3169f91d69a203fe20bd07169a5c3b4a35..2b8d2296f0922b18dd7a75b967edff08200c8324 100644 (file)
@@ -453,6 +453,7 @@ Service.ExecStopPost,                         config_parse_exec,
 Service.RestartSec,                           config_parse_sec,                                   0,                                  offsetof(Service, restart_usec)
 Service.RestartSteps,                         config_parse_unsigned,                              0,                                  offsetof(Service, restart_steps)
 Service.RestartMaxDelaySec,                   config_parse_sec,                                   0,                                  offsetof(Service, restart_max_delay_usec)
+Service.RestartRandomizedDelaySec,            config_parse_sec,                                   0,                                  offsetof(Service, restart_randomized_delay_usec)
 Service.TimeoutSec,                           config_parse_service_timeout,                       0,                                  0
 Service.TimeoutStartSec,                      config_parse_service_timeout,                       0,                                  0
 Service.TimeoutStopSec,                       config_parse_sec_fix_0,                             0,                                  offsetof(Service, timeout_stop_usec)
index 3ae59df7ab76d0d9588288de722e079ee67216fe..9e4af299f7ec9835265e7a87ffeec0b93db2df16 100644 (file)
@@ -400,6 +400,13 @@ usec_t service_restart_usec_next(const Service *s) {
                                                 (long double) (n_restarts_next - 1) / s->restart_steps));
 }
 
+static usec_t service_restart_usec_next_jittered(const Service *s) {
+        assert(s);
+
+        /* Single helper for the restart timer and the deadline reconstructed at coldplug so they can't drift */
+        return usec_add(service_restart_usec_next(s), s->restart_randomized_delay_chosen_usec);
+}
+
 static void service_extend_event_source_timeout(Service *s, sd_event_source *source, usec_t extended) {
         usec_t current;
         int r;
@@ -1080,6 +1087,11 @@ static int service_verify(Service *s) {
                 s->restart_usec = s->restart_max_delay_usec;
         }
 
+        if (s->restart_randomized_delay_usec == USEC_INFINITY) {
+                log_unit_warning(UNIT(s), "RestartRandomizedDelaySec= cannot be infinity, ignoring.");
+                s->restart_randomized_delay_usec = 0;
+        }
+
         if (s->refresh_on_reload_set && s->refresh_on_reload_flags != _SERVICE_REFRESH_ON_RELOAD_ALL) {
                 if (FLAGS_SET(s->refresh_on_reload_flags, SERVICE_RELOAD_EXTENSIONS))
                         service_can_reload_extensions(s, /* warn = */ true);
@@ -1388,6 +1400,7 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
                 "%sRestartSec: %s\n"
                 "%sRestartSteps: %u\n"
                 "%sRestartMaxDelaySec: %s\n"
+                "%sRestartRandomizedDelaySec: %s\n"
                 "%sTimeoutStartSec: %s\n"
                 "%sTimeoutStopSec: %s\n"
                 "%sTimeoutStartFailureMode: %s\n"
@@ -1395,6 +1408,7 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
                 prefix, FORMAT_TIMESPAN(s->restart_usec, USEC_PER_SEC),
                 prefix, s->restart_steps,
                 prefix, FORMAT_TIMESPAN(s->restart_max_delay_usec, USEC_PER_SEC),
+                prefix, FORMAT_TIMESPAN(s->restart_randomized_delay_usec, USEC_PER_SEC),
                 prefix, FORMAT_TIMESPAN(s->timeout_start_usec, USEC_PER_SEC),
                 prefix, FORMAT_TIMESPAN(s->timeout_stop_usec, USEC_PER_SEC),
                 prefix, service_timeout_failure_mode_to_string(s->timeout_start_failure_mode),
@@ -1741,7 +1755,8 @@ static usec_t service_coldplug_timeout(Service *s) {
                 return usec_add(UNIT(s)->state_change_timestamp.monotonic, service_timeout_abort_usec(s));
 
         case SERVICE_AUTO_RESTART:
-                return usec_add(UNIT(s)->inactive_enter_timestamp.monotonic, service_restart_usec_next(s));
+                return usec_add(UNIT(s)->inactive_enter_timestamp.monotonic,
+                                service_restart_usec_next_jittered(s));
 
         case SERVICE_CLEANING:
                 return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->exec_context.timeout_clean_usec);
@@ -2514,7 +2529,11 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
                 if (s->restart_mode != SERVICE_RESTART_MODE_DIRECT)
                         service_set_state(s, restart_state);
 
-                restart_usec_next = service_restart_usec_next(s);
+                /* Do the randomized restart delay once and remember it so that it's stable across daemon-reload */
+                s->restart_randomized_delay_chosen_usec = s->restart_randomized_delay_usec > 0 ?
+                        random_u64_range(s->restart_randomized_delay_usec) : 0;
+
+                restart_usec_next = service_restart_usec_next_jittered(s);
 
                 r = service_arm_timer(s, /* relative= */ true, restart_usec_next);
                 if (r < 0) {
@@ -2534,7 +2553,9 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
                                 log_unit_notice(UNIT(s), "Service dead, subsequent restarts will be executed with debug level logging.");
                 }
 
-                log_unit_debug(UNIT(s), "Next restart interval calculated as: %s", FORMAT_TIMESPAN(restart_usec_next, 0));
+                log_unit_debug(UNIT(s), "Next restart interval calculated as: %s (randomized delay: %s)",
+                               FORMAT_TIMESPAN(restart_usec_next, 0),
+                               FORMAT_TIMESPAN(s->restart_randomized_delay_chosen_usec, 0));
 
                 service_set_state(s, SERVICE_AUTO_RESTART);
         } else {
@@ -3744,6 +3765,7 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
         (void) serialize_bool(f, "bus-name-good", s->bus_name_good);
 
         (void) serialize_item_format(f, "n-restarts", "%u", s->n_restarts);
+        (void) serialize_usec(f, "restart-randomized-delay-chosen-usec", s->restart_randomized_delay_chosen_usec);
         (void) serialize_bool(f, "forbid-restart", s->forbid_restart);
 
         service_serialize_exec_command(u, f, s->control_command);
@@ -4197,6 +4219,9 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
                 if (r < 0)
                         log_unit_debug_errno(u, r, "Failed to parse serialized restart counter '%s': %m", value);
 
+        } else if (streq(key, "restart-randomized-delay-chosen-usec")) {
+                (void) deserialize_usec(value, &s->restart_randomized_delay_chosen_usec);
+
         } else if (streq(key, "forbid-restart")) {
                 r = parse_boolean(value);
                 if (r < 0)
index c8a09ca8293abf313283e182a02751d68577e8c8..c028248bba3df482c29eaae1315a6d24b8ca95c0 100644 (file)
@@ -155,6 +155,8 @@ typedef struct Service {
         unsigned restart_steps;
         usec_t restart_usec;
         usec_t restart_max_delay_usec;
+        usec_t restart_randomized_delay_usec;        /* configured upper bound for the randomized restart delay */
+        usec_t restart_randomized_delay_chosen_usec; /* the value actually picked for the pending auto-restart */
         usec_t timeout_start_usec;
         usec_t timeout_stop_usec;
         usec_t timeout_abort_usec;
index dde75a30ff8022fb05d218c499735616a0406221..a2d244f6e9fbc5659b37f4bc2f49da7a3aed7c7b 100644 (file)
@@ -118,6 +118,7 @@ int service_context_build_json(sd_json_variant **ret, const char *name, void *us
                         JSON_BUILD_PAIR_FINITE_USEC("RestartUSec", s->restart_usec),
                         JSON_BUILD_PAIR_UNSIGNED_NON_ZERO("RestartSteps", s->restart_steps),
                         JSON_BUILD_PAIR_FINITE_USEC_NON_ZERO("RestartMaxDelayUSec", s->restart_max_delay_usec),
+                        JSON_BUILD_PAIR_FINITE_USEC_NON_ZERO("RestartRandomizedDelayUSec", s->restart_randomized_delay_usec),
                         JSON_BUILD_PAIR_FINITE_USEC("TimeoutStartUSec", s->timeout_start_usec),
                         JSON_BUILD_PAIR_FINITE_USEC("TimeoutStopUSec", s->timeout_stop_usec),
                         JSON_BUILD_PAIR_ENUM("TimeoutStartFailureMode", service_timeout_failure_mode_to_string(s->timeout_start_failure_mode)),
index c36ada61db9cda37620831730ceaa04f3e2fdd9d..8b618bf5001d444163ed4a2057a0b4548fd11008 100644 (file)
@@ -2695,6 +2695,7 @@ static const BusProperty service_properties[] = {
         { "GuessMainPID",                          bus_append_parse_boolean                      },
         { "RestartSec",                            bus_append_parse_sec_rename                   },
         { "RestartMaxDelaySec",                    bus_append_parse_sec_rename                   },
+        { "RestartRandomizedDelaySec",             bus_append_parse_sec_rename                   },
         { "TimeoutStartSec",                       bus_append_parse_sec_rename                   },
         { "TimeoutStopSec",                        bus_append_parse_sec_rename                   },
         { "TimeoutAbortSec",                       bus_append_parse_sec_rename                   },
index 0c63725c7507791b58ba0e770b9787d55ee31bde..3fc2d6fc7942c9f18bece71835c1f6252fe737f8 100644 (file)
@@ -1429,6 +1429,8 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE(
                 SD_VARLINK_DEFINE_FIELD(RestartSteps, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
                 SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.service.html#RestartMaxDelaySec="),
                 SD_VARLINK_DEFINE_FIELD(RestartMaxDelayUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
+                SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.service.html#RestartRandomizedDelaySec="),
+                SD_VARLINK_DEFINE_FIELD(RestartRandomizedDelayUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
                 SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.service.html#TimeoutStartSec="),
                 SD_VARLINK_DEFINE_FIELD(TimeoutStartUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
                 SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.service.html#TimeoutStopSec="),
index 6f7494ef2f83ec320dc83c087d0abe73891d287e..67fe117fc8662423c4bd250470479cc319ea33eb 100755 (executable)
@@ -218,4 +218,76 @@ assert_eq "$(systemctl show "$UNIT_NAME" -P NRestarts)" "1"
 
 rm /run/systemd/system/"$UNIT_NAME"
 
+# Test RestartRandomizedDelaySec=
+
+export UNIT_NAME="TEST-03-JOBS-restart-randomized-delay.service"
+
+cat >"/run/systemd/system/$UNIT_NAME" <<EOF
+[Service]
+Type=simple
+ExecStart=false
+Restart=on-failure
+RestartSec=1
+RestartRandomizedDelaySec=1
+StartLimitIntervalSec=0
+EOF
+
+systemctl daemon-reload
+
+# The option should be parsed and exposed on the bus in usec.
+assert_eq "$(systemctl show "$UNIT_NAME" -P RestartRandomizedDelayUSec)" "1s"
+
+# The chosen delay is logged at debug level when the unit enters auto-restart, so we can read it without
+# waiting for the delay to elapse.
+PREV_LOG_LEVEL="$(systemctl log-level)"
+
+restart_randomized_delay_cleanup() {
+    set +e
+    systemctl log-level "$PREV_LOG_LEVEL"
+    systemctl stop "$UNIT_NAME"
+    rm -f /run/systemd/system/"$UNIT_NAME"
+    systemctl daemon-reload
+}
+trap restart_randomized_delay_cleanup EXIT
+
+systemctl log-level debug
+
+get_restart_interval() {
+    # Enter auto-restart once, read the logged "<total>|<delay>", then stop again so it never has to elapse.
+    systemctl start --no-block "$UNIT_NAME"
+    timeout 10 bash -c 'while [[ "$(systemctl show "'"$UNIT_NAME"'" -P SubState)" != "auto-restart" ]]; do sleep .2; done'
+    systemctl stop "$UNIT_NAME"
+    journalctl --sync
+    # needed because of -o pipefail
+    { journalctl -q --no-pager -o cat -b -u "$UNIT_NAME" --grep="Next restart interval calculated as" || true; } |
+        sed -n 's/.*calculated as: \(.*\) (randomized delay: \(.*\))$/\1|\2/p' | tail -n1
+}
+
+# Several samples + "not all equal": two draws could rarely render identically (~1e-6) and falsely fail.
+DELAYS=()
+TOTALS=()
+for _ in {1..4}; do
+    IFS='|' read -r total delay <<<"$(get_restart_interval)"
+    TOTALS+=("$total")
+    DELAYS+=("$delay")
+done
+
+systemctl log-level "$PREV_LOG_LEVEL"
+
+: "Chosen randomized restart delays: ${DELAYS[*]} (totals: ${TOTALS[*]})"
+for delay in "${DELAYS[@]}"; do
+    assert_neq "$delay" ""
+    # Within bound: a value below 1s never renders a bare "<digit>s" token (only ms/us).
+    if [[ "$delay" =~ [0-9]s ]]; then
+        echo "FAIL: randomized restart delay '$delay' exceeds the configured 1s bound" >&2
+        exit 1
+    fi
+done
+# Total must vary, proving the jitter is folded into the armed timer (not merely logged).
+all_equal=1
+for total in "${TOTALS[@]}"; do
+    [[ "$total" == "${TOTALS[0]}" ]] || all_equal=0
+done
+assert_eq "$all_equal" "0"
+
 touch /testok