From: Luca Boccassi Date: Wed, 27 Mar 2024 21:14:15 +0000 (+0000) Subject: run: fix generated unit name clash after soft-reboot X-Git-Tag: v256-rc1~370 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ed358516937780b524a2cfa833427da3df1bc87f;p=thirdparty%2Fsystemd.git run: fix generated unit name clash after soft-reboot When sd-run connects to D-Bus rather than the private socket, it will generate the transient unit name using the bus ID assigned by the D-Bus broker/daemon. The issue is that this ID is only unique per D-Bus run, if the broker/daemon restarts it starts again from 1, and it's a simple incremental counter for each client. So if a transient unit run-u6.service starts and fails, and it is not collected (default on failure), and the system soft-reboots, any new transient unit might conflict as the counter will restart: Failed to start transient service unit: Unit run-u6.service was already loaded or has a fragment file. Get the soft-reboot counter, and if it's greater than zero, append it to the autogenerated unit name to avoid clashes. --- diff --git a/src/run/run.c b/src/run/run.c index 7f40ed00760..9349bfafbc4 100644 --- a/src/run/run.c +++ b/src/run/run.c @@ -1313,6 +1313,7 @@ static int transient_timer_set_properties(sd_bus_message *m) { } static int make_unit_name(sd_bus *bus, UnitType t, char **ret) { + unsigned soft_reboots_count = 0; const char *unique, *id; char *p; int r; @@ -1351,9 +1352,23 @@ static int make_unit_name(sd_bus *bus, UnitType t, char **ret) { "Unique name %s has unexpected format.", unique); - p = strjoin("run-u", id, ".", unit_type_to_string(t)); - if (!p) - return log_oom(); + /* The unique D-Bus names are actually unique per D-Bus instance, so on soft-reboot they will wrap + * and start over since the D-Bus broker is restarted. If there's a failed unit left behind that + * hasn't been garbage collected, we'll conflict. Append the soft-reboot counter to avoid clashing. */ + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + r = bus_get_property_trivial( + bus, bus_systemd_mgr, "SoftRebootsCount", &error, 'u', &soft_reboots_count); + if (r < 0) + log_debug_errno(r, "Failed to get SoftRebootsCount property, ignoring: %s", bus_error_message(&error, r)); + + if (soft_reboots_count > 0) { + if (asprintf(&p, "run-u%s-s%u.%s", id, soft_reboots_count, unit_type_to_string(t)) < 0) + return log_oom(); + } else { + p = strjoin("run-u", id, ".", unit_type_to_string(t)); + if (!p) + return log_oom(); + } *ret = p; return 0; diff --git a/test/units/testsuite-82.sh b/test/units/testsuite-82.sh index 8ff69c90e4d..74361455980 100755 --- a/test/units/testsuite-82.sh +++ b/test/units/testsuite-82.sh @@ -146,6 +146,11 @@ elif [ -f /run/testsuite82.touch ]; then # Restart the unit that is not supposed to survive systemd-run --collect --service-type=exec --unit=testsuite-82-nosurvive.service sleep infinity + # Now ensure there are no naming clashes and a bunch of transient units all succeed + for _ in $(seq 1 25); do + systemd-run --wait true + done + # Now issue the soft reboot. We should be right back soon. Given /run/nextroot exists, we should # automatically do a softreboot instead of normal reboot. touch /run/testsuite82.touch2 @@ -227,6 +232,14 @@ EOF systemd-inhibit --what=shutdown --who=test --why=test --mode=delay \ sleep infinity + # Enqueue a bunch of failing units to try and trigger the transient name clash that happens due to D-Bus + # being restarted and the "unique" bus IDs not being unique across restarts + for _ in $(seq 1 25); do + # Use --wait to ensure we connect to the system bus instead of the private bus (otherwise a UUID is + # used instead of the bus ID) + systemd-run --wait false || true + done + # Now issue the soft reboot. We should be right back soon. touch /run/testsuite82.touch systemctl --no-block --check-inhibitors=yes soft-reboot