From: Luca Boccassi Date: Fri, 8 May 2026 13:21:33 +0000 (+0100) Subject: homectl: retry DeactivateHome on transient busy errors X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=39863e2b1e4c98d126282a8c8272de379d248e08;p=thirdparty%2Fsystemd.git homectl: retry DeactivateHome on transient busy errors When 'homectl deactivate' is called immediately after a preceding operation, the umount inside systemd-homework can fail with EBUSY because something briefly holds a reference to the home mount (e.g. a concurrent inspect). systemd-homed already handles this gracefully by moving the home into the 'lingering' state and retrying deactivation after 15 seconds, but the bus reply for the original DeactivateHome call returns the org.freedesktop.home1.HomeBusy error immediately, which makes TEST-46-HOMED flaky. Fix homectl to follow homed and retry for up to 30 seconds on HomeBusy and add a test case trying to make the issue more reproducible. --- diff --git a/src/home/home-util.h b/src/home/home-util.h index 0b1781d2728..32dcae30442 100644 --- a/src/home/home-util.h +++ b/src/home/home-util.h @@ -35,5 +35,8 @@ int bus_message_append_secret(sd_bus_message *m, UserRecord *secret); * operations permit a *very* long timeout */ #define HOME_SLOW_BUS_CALL_TIMEOUT_USEC (2*USEC_PER_MINUTE) +/* Retry to deactivate home directories again and again every 15s until it works */ +#define HOME_RETRY_DEACTIVATE_USEC (15U * USEC_PER_SEC) + const char* home_record_dir(void); const char* home_system_blob_dir(void); diff --git a/src/home/homectl.c b/src/home/homectl.c index 3d3cd6087d1..c0dab67989d 100644 --- a/src/home/homectl.c +++ b/src/home/homectl.c @@ -2062,22 +2062,41 @@ static int verb_deactivate_home(int argc, char *argv[], uintptr_t _data, void *u return r; STRV_FOREACH(i, strv_skip(argv, 1)) { - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + /* The home directory might still be busy for a brief moment after a preceding operation + * (e.g. a concurrent inspect/deactivate, or a stray reference holding the mount busy at + * unmount time). homed will transition the home into "lingering" state and retry + * deactivation internally after some time, but rather than failing immediately let's just + * retry the bus call here for a while, so callers don't need to deal with this transient + * condition themselves. Use double the time homed waits to avoid racing with it. */ + usec_t end = usec_add(now(CLOCK_MONOTONIC), 2 * HOME_RETRY_DEACTIVATE_USEC); - r = bus_message_new_method_call(bus, &m, bus_mgr, "DeactivateHome"); - if (r < 0) - return bus_log_create_error(r); + for (;;) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; - r = sd_bus_message_append(m, "s", *i); - if (r < 0) - return bus_log_create_error(r); + r = bus_message_new_method_call(bus, &m, bus_mgr, "DeactivateHome"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "s", *i); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, /* ret_reply= */ NULL); + if (r >= 0) + break; + + if (sd_bus_error_has_name(&error, BUS_ERROR_HOME_BUSY) && + now(CLOCK_MONOTONIC) < end) { + log_info("Home of user %s is currently busy, retrying deactivation.", *i); + (void) usleep_safe(1 * USEC_PER_SEC); + continue; + } - r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL); - if (r < 0) { log_error_errno(r, "Failed to deactivate user home: %s", bus_error_message(&error, r)); if (ret == 0) ret = r; + break; } } diff --git a/src/home/homed-home.c b/src/home/homed-home.c index 33d39165592..012d65afc29 100644 --- a/src/home/homed-home.c +++ b/src/home/homed-home.c @@ -49,9 +49,6 @@ #include "user-record-util.h" #include "user-util.h" -/* Retry to deactivate home directories again and again every 15s until it works */ -#define RETRY_DEACTIVATE_USEC (15U * USEC_PER_SEC) - #define HOME_USERS_MAX 500 #define PENDING_OPERATIONS_MAX 100 @@ -514,7 +511,7 @@ static void home_start_retry_deactivate(Home *h) { h->manager->event, &h->retry_deactivate_event_source, CLOCK_MONOTONIC, - RETRY_DEACTIVATE_USEC, + HOME_RETRY_DEACTIVATE_USEC, 1*USEC_PER_MINUTE, home_on_retry_deactivate, h); diff --git a/test/units/TEST-46-HOMED.sh b/test/units/TEST-46-HOMED.sh index 5d090b016fc..595f46b8a87 100755 --- a/test/units/TEST-46-HOMED.sh +++ b/test/units/TEST-46-HOMED.sh @@ -1062,4 +1062,51 @@ testcase_fscrypt() { homectl remove fscrypttest } +testcase_deactivate_busy() { + # Verify that "homectl deactivate" is robust against transient EBUSY + # failures of the umount() inside systemd-homework. This used to make + # TEST-46-HOMED occasionally fail when something briefly held a reference + # to the home mount at the moment the deactivation tried to unmount it. + # + # Reproduce the situation deterministically by spawning a background + # process whose cwd is the home directory: that holds the mount busy via + # the kernel's cwd reference until the process exits, so the initial + # umount2() call in homework will fail with EBUSY. homectl is expected to + # transparently retry the bus call until it succeeds (once the holder + # exits). + + NEWPASSWORD=hunter2 homectl create \ + --storage=directory \ + --enforce-password-policy=no \ + busytest + PASSWORD=hunter2 homectl activate busytest + inspect busytest + + # Make sure the home is actually mounted before we try to hold it busy, + # otherwise the subshell below would silently fail to acquire the cwd + # reference. + mountpoint /home/busytest + + # Spawn a process whose cwd is inside the home mount. `cd` is a shell + # builtin so the subshell process itself acquires the cwd reference, and + # `exec sleep` then preserves it across the exec. + ( cd /home/busytest && exec sleep 10 ) & + local busy_pid=$! + + # Wait until the kernel actually reports the cwd of the background + # process as the home directory, so we know the busy reference is in + # place before we attempt to deactivate. + timeout 5 bash -c "until [[ \"\$(readlink /proc/${busy_pid}/cwd 2>/dev/null)\" == /home/busytest ]]; do sleep 0.1; done" + + # The deactivate must succeed eventually: the first umount2() will fail + # with EBUSY, but homectl retries the call for up to 30 seconds, by + # which time the background process will have exited and released the + # cwd reference. + homectl deactivate busytest + wait_for_state busytest inactive + + wait "$busy_pid" || true + homectl remove busytest +} + run_testcases