From: Michael Vogt Date: Wed, 27 May 2026 16:32:04 +0000 (+0200) Subject: core: add RestartRandomizedDelaySec= service option X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=046de3caeaa8fdfe6014ffdf23b2db6e4b6c60db;p=thirdparty%2Fsystemd.git core: add RestartRandomizedDelaySec= service option We already support exponential backoff for automatic restarts via RestartSec=/RestartSteps=/RestartMaxDelaySec=, but there is no way to randomize the restart delay. When many instances of a service fail at the same time (e.g. because a shared resource briefly went away) they are all restarted in lockstep, creating a thundering herd problem. So this commit adds a simple `RestartRandomizedDelaySec=` service option which is similar to the timer `RandomizedDelaySec=` and adds a randomized restart delay. --- diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index de4a6e229fa..d3c2e39a18c 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -2846,6 +2846,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { readonly u RestartSteps = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly t RestartMaxDelayUSec = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly t RestartRandomizedDelayUSec = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t RestartUSecNext = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") @@ -3578,6 +3580,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4214,6 +4218,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -12857,6 +12863,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ CPUSetPartition, and OOMRules were added in version 261. LUOSession was added in version 262. + RestartRandomizedDelayUSec was added in version 262. Socket Unit Objects diff --git a/man/systemd.service.xml b/man/systemd.service.xml index 0ae6086feb8..965e57a817f 100644 --- a/man/systemd.service.xml +++ b/man/systemd.service.xml @@ -642,6 +642,22 @@ RestartMaxDelaySec=160s + + RestartRandomizedDelaySec= + Delay automatic restarts by a randomly selected, evenly distributed amount of time + between 0 and the specified time value, added on top of the delay otherwise configured via + RestartSec= (and RestartSteps=/RestartMaxDelaySec=, + if used). Takes a value in the same format as RestartSec=. Defaults to 0, indicating + that no randomized delay shall be applied. + + This setting is useful to stretch out the restarts of similarly configured service instances that + fail at the same time, to prevent them from restarting simultaneously and possibly resulting in + resource congestion. It is the restart-side analogue of RandomizedDelaySec= in + systemd.timer5. + + + + TimeoutStartSec= Configures the time to wait for start-up. If a daemon service does not signal diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 5fc06df714b..923249cec89 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -358,6 +358,7 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("RestartUSec", "t", bus_property_get_usec, offsetof(Service, restart_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestartSteps", "u", bus_property_get_unsigned, offsetof(Service, restart_steps), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestartMaxDelayUSec", "t", bus_property_get_usec, offsetof(Service, restart_max_delay_usec), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("RestartRandomizedDelayUSec", "t", bus_property_get_usec, offsetof(Service, restart_randomized_delay_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestartUSecNext", "t", property_get_restart_usec_next, 0, 0), SD_BUS_PROPERTY("TimeoutStartUSec", "t", bus_property_get_usec, offsetof(Service, timeout_start_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Service, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST), @@ -611,6 +612,9 @@ static int bus_service_set_transient_property( if (streq(name, "RestartMaxDelayUSec")) return bus_set_transient_usec(u, name, &s->restart_max_delay_usec, message, flags, reterr_error); + if (streq(name, "RestartRandomizedDelayUSec")) + return bus_set_transient_usec(u, name, &s->restart_randomized_delay_usec, message, flags, reterr_error); + if (streq(name, "TimeoutStartUSec")) { r = bus_set_transient_usec(u, name, &s->timeout_start_usec, message, flags, reterr_error); if (r >= 0 && !UNIT_WRITE_FLAGS_NOOP(flags)) diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index fcfdbd3169f..2b8d2296f09 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -453,6 +453,7 @@ Service.ExecStopPost, config_parse_exec, Service.RestartSec, config_parse_sec, 0, offsetof(Service, restart_usec) Service.RestartSteps, config_parse_unsigned, 0, offsetof(Service, restart_steps) Service.RestartMaxDelaySec, config_parse_sec, 0, offsetof(Service, restart_max_delay_usec) +Service.RestartRandomizedDelaySec, config_parse_sec, 0, offsetof(Service, restart_randomized_delay_usec) Service.TimeoutSec, config_parse_service_timeout, 0, 0 Service.TimeoutStartSec, config_parse_service_timeout, 0, 0 Service.TimeoutStopSec, config_parse_sec_fix_0, 0, offsetof(Service, timeout_stop_usec) diff --git a/src/core/service.c b/src/core/service.c index 3ae59df7ab7..9e4af299f7e 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -400,6 +400,13 @@ usec_t service_restart_usec_next(const Service *s) { (long double) (n_restarts_next - 1) / s->restart_steps)); } +static usec_t service_restart_usec_next_jittered(const Service *s) { + assert(s); + + /* Single helper for the restart timer and the deadline reconstructed at coldplug so they can't drift */ + return usec_add(service_restart_usec_next(s), s->restart_randomized_delay_chosen_usec); +} + static void service_extend_event_source_timeout(Service *s, sd_event_source *source, usec_t extended) { usec_t current; int r; @@ -1080,6 +1087,11 @@ static int service_verify(Service *s) { s->restart_usec = s->restart_max_delay_usec; } + if (s->restart_randomized_delay_usec == USEC_INFINITY) { + log_unit_warning(UNIT(s), "RestartRandomizedDelaySec= cannot be infinity, ignoring."); + s->restart_randomized_delay_usec = 0; + } + if (s->refresh_on_reload_set && s->refresh_on_reload_flags != _SERVICE_REFRESH_ON_RELOAD_ALL) { if (FLAGS_SET(s->refresh_on_reload_flags, SERVICE_RELOAD_EXTENSIONS)) service_can_reload_extensions(s, /* warn = */ true); @@ -1388,6 +1400,7 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { "%sRestartSec: %s\n" "%sRestartSteps: %u\n" "%sRestartMaxDelaySec: %s\n" + "%sRestartRandomizedDelaySec: %s\n" "%sTimeoutStartSec: %s\n" "%sTimeoutStopSec: %s\n" "%sTimeoutStartFailureMode: %s\n" @@ -1395,6 +1408,7 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { prefix, FORMAT_TIMESPAN(s->restart_usec, USEC_PER_SEC), prefix, s->restart_steps, prefix, FORMAT_TIMESPAN(s->restart_max_delay_usec, USEC_PER_SEC), + prefix, FORMAT_TIMESPAN(s->restart_randomized_delay_usec, USEC_PER_SEC), prefix, FORMAT_TIMESPAN(s->timeout_start_usec, USEC_PER_SEC), prefix, FORMAT_TIMESPAN(s->timeout_stop_usec, USEC_PER_SEC), prefix, service_timeout_failure_mode_to_string(s->timeout_start_failure_mode), @@ -1741,7 +1755,8 @@ static usec_t service_coldplug_timeout(Service *s) { return usec_add(UNIT(s)->state_change_timestamp.monotonic, service_timeout_abort_usec(s)); case SERVICE_AUTO_RESTART: - return usec_add(UNIT(s)->inactive_enter_timestamp.monotonic, service_restart_usec_next(s)); + return usec_add(UNIT(s)->inactive_enter_timestamp.monotonic, + service_restart_usec_next_jittered(s)); case SERVICE_CLEANING: return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->exec_context.timeout_clean_usec); @@ -2514,7 +2529,11 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) if (s->restart_mode != SERVICE_RESTART_MODE_DIRECT) service_set_state(s, restart_state); - restart_usec_next = service_restart_usec_next(s); + /* Do the randomized restart delay once and remember it so that it's stable across daemon-reload */ + s->restart_randomized_delay_chosen_usec = s->restart_randomized_delay_usec > 0 ? + random_u64_range(s->restart_randomized_delay_usec) : 0; + + restart_usec_next = service_restart_usec_next_jittered(s); r = service_arm_timer(s, /* relative= */ true, restart_usec_next); if (r < 0) { @@ -2534,7 +2553,9 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) log_unit_notice(UNIT(s), "Service dead, subsequent restarts will be executed with debug level logging."); } - log_unit_debug(UNIT(s), "Next restart interval calculated as: %s", FORMAT_TIMESPAN(restart_usec_next, 0)); + log_unit_debug(UNIT(s), "Next restart interval calculated as: %s (randomized delay: %s)", + FORMAT_TIMESPAN(restart_usec_next, 0), + FORMAT_TIMESPAN(s->restart_randomized_delay_chosen_usec, 0)); service_set_state(s, SERVICE_AUTO_RESTART); } else { @@ -3744,6 +3765,7 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { (void) serialize_bool(f, "bus-name-good", s->bus_name_good); (void) serialize_item_format(f, "n-restarts", "%u", s->n_restarts); + (void) serialize_usec(f, "restart-randomized-delay-chosen-usec", s->restart_randomized_delay_chosen_usec); (void) serialize_bool(f, "forbid-restart", s->forbid_restart); service_serialize_exec_command(u, f, s->control_command); @@ -4197,6 +4219,9 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, if (r < 0) log_unit_debug_errno(u, r, "Failed to parse serialized restart counter '%s': %m", value); + } else if (streq(key, "restart-randomized-delay-chosen-usec")) { + (void) deserialize_usec(value, &s->restart_randomized_delay_chosen_usec); + } else if (streq(key, "forbid-restart")) { r = parse_boolean(value); if (r < 0) diff --git a/src/core/service.h b/src/core/service.h index c8a09ca8293..c028248bba3 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -155,6 +155,8 @@ typedef struct Service { unsigned restart_steps; usec_t restart_usec; usec_t restart_max_delay_usec; + usec_t restart_randomized_delay_usec; /* configured upper bound for the randomized restart delay */ + usec_t restart_randomized_delay_chosen_usec; /* the value actually picked for the pending auto-restart */ usec_t timeout_start_usec; usec_t timeout_stop_usec; usec_t timeout_abort_usec; diff --git a/src/core/varlink-service.c b/src/core/varlink-service.c index dde75a30ff8..a2d244f6e9f 100644 --- a/src/core/varlink-service.c +++ b/src/core/varlink-service.c @@ -118,6 +118,7 @@ int service_context_build_json(sd_json_variant **ret, const char *name, void *us JSON_BUILD_PAIR_FINITE_USEC("RestartUSec", s->restart_usec), JSON_BUILD_PAIR_UNSIGNED_NON_ZERO("RestartSteps", s->restart_steps), JSON_BUILD_PAIR_FINITE_USEC_NON_ZERO("RestartMaxDelayUSec", s->restart_max_delay_usec), + JSON_BUILD_PAIR_FINITE_USEC_NON_ZERO("RestartRandomizedDelayUSec", s->restart_randomized_delay_usec), JSON_BUILD_PAIR_FINITE_USEC("TimeoutStartUSec", s->timeout_start_usec), JSON_BUILD_PAIR_FINITE_USEC("TimeoutStopUSec", s->timeout_stop_usec), JSON_BUILD_PAIR_ENUM("TimeoutStartFailureMode", service_timeout_failure_mode_to_string(s->timeout_start_failure_mode)), diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index c36ada61db9..8b618bf5001 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -2695,6 +2695,7 @@ static const BusProperty service_properties[] = { { "GuessMainPID", bus_append_parse_boolean }, { "RestartSec", bus_append_parse_sec_rename }, { "RestartMaxDelaySec", bus_append_parse_sec_rename }, + { "RestartRandomizedDelaySec", bus_append_parse_sec_rename }, { "TimeoutStartSec", bus_append_parse_sec_rename }, { "TimeoutStopSec", bus_append_parse_sec_rename }, { "TimeoutAbortSec", bus_append_parse_sec_rename }, diff --git a/src/shared/varlink-io.systemd.Unit.c b/src/shared/varlink-io.systemd.Unit.c index 0c63725c750..3fc2d6fc794 100644 --- a/src/shared/varlink-io.systemd.Unit.c +++ b/src/shared/varlink-io.systemd.Unit.c @@ -1429,6 +1429,8 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE( SD_VARLINK_DEFINE_FIELD(RestartSteps, SD_VARLINK_INT, SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.service.html#RestartMaxDelaySec="), SD_VARLINK_DEFINE_FIELD(RestartMaxDelayUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.service.html#RestartRandomizedDelaySec="), + SD_VARLINK_DEFINE_FIELD(RestartRandomizedDelayUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.service.html#TimeoutStartSec="), SD_VARLINK_DEFINE_FIELD(TimeoutStartUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man/"PROJECT_VERSION_STR"/systemd.service.html#TimeoutStopSec="), diff --git a/test/units/TEST-03-JOBS.sh b/test/units/TEST-03-JOBS.sh index 6f7494ef2f8..67fe117fc86 100755 --- a/test/units/TEST-03-JOBS.sh +++ b/test/units/TEST-03-JOBS.sh @@ -218,4 +218,76 @@ assert_eq "$(systemctl show "$UNIT_NAME" -P NRestarts)" "1" rm /run/systemd/system/"$UNIT_NAME" +# Test RestartRandomizedDelaySec= + +export UNIT_NAME="TEST-03-JOBS-restart-randomized-delay.service" + +cat >"/run/systemd/system/$UNIT_NAME" <|", then stop again so it never has to elapse. + systemctl start --no-block "$UNIT_NAME" + timeout 10 bash -c 'while [[ "$(systemctl show "'"$UNIT_NAME"'" -P SubState)" != "auto-restart" ]]; do sleep .2; done' + systemctl stop "$UNIT_NAME" + journalctl --sync + # needed because of -o pipefail + { journalctl -q --no-pager -o cat -b -u "$UNIT_NAME" --grep="Next restart interval calculated as" || true; } | + sed -n 's/.*calculated as: \(.*\) (randomized delay: \(.*\))$/\1|\2/p' | tail -n1 +} + +# Several samples + "not all equal": two draws could rarely render identically (~1e-6) and falsely fail. +DELAYS=() +TOTALS=() +for _ in {1..4}; do + IFS='|' read -r total delay <<<"$(get_restart_interval)" + TOTALS+=("$total") + DELAYS+=("$delay") +done + +systemctl log-level "$PREV_LOG_LEVEL" + +: "Chosen randomized restart delays: ${DELAYS[*]} (totals: ${TOTALS[*]})" +for delay in "${DELAYS[@]}"; do + assert_neq "$delay" "" + # Within bound: a value below 1s never renders a bare "s" token (only ms/us). + if [[ "$delay" =~ [0-9]s ]]; then + echo "FAIL: randomized restart delay '$delay' exceeds the configured 1s bound" >&2 + exit 1 + fi +done +# Total must vary, proving the jitter is folded into the armed timer (not merely logged). +all_equal=1 +for total in "${TOTALS[@]}"; do + [[ "$total" == "${TOTALS[0]}" ]] || all_equal=0 +done +assert_eq "$all_equal" "0" + touch /testok