From: Yu Watanabe Date: Thu, 27 Mar 2025 03:57:30 +0000 (+0900) Subject: udev-watch: add inotify watch by manager process X-Git-Tag: v258-rc1~864^2~1 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7e50402aa367d9d8b1a72e81795984dda704dab4;p=thirdparty%2Fsystemd.git udev-watch: add inotify watch by manager process Previously, inotify watch on a device node was added/removed by a worker process processing the relevant uevent. However, that could not avoid races. For example, 1. A device node X is removed by the kernel (e.g. unplug USB memory), and the kernel removes the inotify watch for the device node and produces IN_IGNORED event and 'remove' uevent for the device. 2. Before udevd processes the 'remove' uevent of the device, a worker process may try to add an inotify watch on another device node Y. As the inotify watch on X has been already removed, the worker may acquire the same watch handle that was previously assigned to X. 3. Since the 'remove' uevent for X is not processed yet, the symlink named with the watch handle still exists and points to X. So, the worker process for Y cannot add the symlink... To avoid such races, let's sequentially add/remove inotify watch by the manager process. Note, this potentially reduces performance on boot when there exists huge amount of disks and/or partitions. --- diff --git a/src/udev/udev-manager.c b/src/udev/udev-manager.c index 883b27a2328..6dd62d35e65 100644 --- a/src/udev/udev-manager.c +++ b/src/udev/udev-manager.c @@ -257,8 +257,10 @@ void manager_exit(Manager *manager) { /* close sources of new events and discard buffered events */ manager->ctrl = udev_ctrl_unref(manager->ctrl); manager->varlink_server = sd_varlink_server_unref(manager->varlink_server); + + /* Disable the event source, but does not close the inotify fd here, as we may still receive + * notification messages about requests to add or remove inotify watches. */ manager->inotify_event = sd_event_source_disable_unref(manager->inotify_event); - manager->inotify_fd = safe_close(manager->inotify_fd); /* Disable the device monitor but do not free device monitor, as it may be used when a worker failed, * and the manager needs to broadcast the kernel event assigned to the worker to libudev listeners. @@ -394,6 +396,7 @@ static int worker_spawn(Manager *manager, Event *event) { if (r < 0) return log_error_errno(r, "Worker: Failed to set unicast sender: %m"); + pid_t manager_pid = getpid_cached(); _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; r = pidref_safe_fork("(udev-worker)", FORK_DEATHSIG_SIGTERM, &pidref); if (r < 0) { @@ -405,8 +408,8 @@ static int worker_spawn(Manager *manager, Event *event) { .monitor = TAKE_PTR(worker_monitor), .properties = TAKE_PTR(manager->properties), .rules = TAKE_PTR(manager->rules), - .inotify_fd = TAKE_FD(manager->inotify_fd), .config = manager->config, + .manager_pid = manager_pid, }; if (setenv("NOTIFY_SOCKET", manager->worker_notify_socket_path, /* overwrite = */ true) < 0) { @@ -820,6 +823,48 @@ static int on_worker_notify(sd_event_source *s, int fd, uint32_t revents, void * return 0; } + if (strv_contains(l, "INOTIFY_WATCH_ADD=1")) { + assert(worker->event); + + r = manager_add_watch(manager, worker->event->dev); + if (ERRNO_IS_NEG_DEVICE_ABSENT(r)) + r = 0; + if (r < 0) + log_device_warning_errno(worker->event->dev, r, "Failed to add inotify watch, ignoring: %m"); + + /* Send the result back to the worker process. */ + r = pidref_sigqueue(&sender, SIGUSR1, r); + if (r < 0) { + log_device_warning_errno(worker->event->dev, r, + "Failed to send signal to worker process ["PID_FMT"], killing the worker process: %m", + sender.pid); + + (void) pidref_kill(&sender, SIGTERM); + worker->state = WORKER_KILLED; + } + return 0; + } + + if (strv_contains(l, "INOTIFY_WATCH_REMOVE=1")) { + assert(worker->event); + + r = manager_remove_watch(manager, worker->event->dev); + if (r < 0) + log_device_warning_errno(worker->event->dev, r, "Failed to remove inotify watch, ignoring: %m"); + + /* Send the result back to the worker process. */ + r = pidref_sigqueue(&sender, SIGUSR1, r); + if (r < 0) { + log_device_warning_errno(worker->event->dev, r, + "Failed to send signal to worker process ["PID_FMT"], killing the worker process: %m", + sender.pid); + + (void) pidref_kill(&sender, SIGTERM); + worker->state = WORKER_KILLED; + } + return 0; + } + if (strv_contains(l, "TRY_AGAIN=1")) /* Worker cannot lock the device. Requeue the event. */ event_requeue(worker->event); diff --git a/src/udev/udev-watch.c b/src/udev/udev-watch.c index 84a7f542b1f..7adde732df3 100644 --- a/src/udev/udev-watch.c +++ b/src/udev/udev-watch.c @@ -22,6 +22,9 @@ #include "udev-trace.h" #include "udev-util.h" #include "udev-watch.h" +#include "udev-worker.h" + +static int udev_watch_clear_by_wd(sd_device *dev, int dirfd, int wd); static int device_new_from_watch_handle_at(sd_device **ret, int dirfd, int wd) { char path_wd[STRLEN("/run/udev/watch/") + DECIMAL_STR_MAX(int)]; @@ -240,11 +243,15 @@ static int manager_process_inotify(Manager *manager, const struct inotify_event assert(manager); assert(e); - /* Do not handle IN_IGNORED here. Especially, do not try to call udev_watch_end() from the - * main process. Otherwise, the pair of the symlinks may become inconsistent, and several - * garbage may remain. The old symlinks are removed by a worker that processes the - * corresponding 'remove' uevent; - * udev_event_execute_rules() -> event_execute_rules_on_remove() -> udev_watch_end(). */ + if (FLAGS_SET(e->mask, IN_IGNORED)) { + log_debug("Received inotify event about removal of watch handle %i.", e->wd); + + r = udev_watch_clear_by_wd(/* dev = */ NULL, /* dirfd = */ -EBADF, e->wd); + if (r < 0) + log_warning_errno(r, "Failed to remove saved symlink(s) for watch handle %i, ignoring: %m", e->wd); + + return 0; + } if (!FLAGS_SET(e->mask, IN_CLOSE_WRITE)) return 0; @@ -281,13 +288,13 @@ static int on_inotify(sd_event_source *s, int fd, uint32_t revents, void *userda return 0; } -static int udev_watch_restore(int inotify_fd) { +static int udev_watch_restore(Manager *manager) { _cleanup_(rm_rf_safep) const char *old = "/run/udev/watch.old/"; int r; /* Move any old watches directory out of the way, and then restore the watches. */ - assert(inotify_fd >= 0); + assert(manager); rm_rf_safe(old); if (rename("/run/udev/watch/", old) < 0) { @@ -320,7 +327,7 @@ static int udev_watch_restore(int inotify_fd) { continue; } - (void) udev_watch_begin(inotify_fd, dev); + (void) manager_add_watch(manager, dev); } return 0; @@ -351,7 +358,7 @@ int manager_init_inotify(Manager *manager, int fd) { log_debug("Initialized new inotify instance, restoring inotify watches of previous invocation."); manager->inotify_fd = fd; - (void) udev_watch_restore(fd); + (void) udev_watch_restore(manager); r = notify_push_fd(manager->inotify_fd, "inotify"); if (r < 0) @@ -385,6 +392,51 @@ int manager_start_inotify(Manager *manager) { return 0; } +static int udev_watch_clear_by_wd(sd_device *dev, int dirfd, int wd) { + int r; + + _cleanup_close_ int dirfd_close = -EBADF; + if (dirfd < 0) { + dirfd_close = RET_NERRNO(open("/run/udev/watch/", O_CLOEXEC | O_DIRECTORY | O_NOFOLLOW | O_RDONLY)); + if (dirfd_close < 0) + return log_device_debug_errno(dev, dirfd_close, "Failed to open '/run/udev/watch/': %m"); + + dirfd = dirfd_close; + } + + char wd_str[DECIMAL_STR_MAX(int)]; + xsprintf(wd_str, "%d", wd); + + _cleanup_free_ char *id = NULL, *wd_alloc = NULL; + r = readlinkat_malloc(dirfd, wd_str, &id); + if (r == -ENOENT) + return 0; + if (r < 0) { + log_device_debug_errno(dev, r, "Failed to read '/run/udev/watch/%s': %m", wd_str); + goto finalize; + } + + r = readlinkat_malloc(dirfd, id, &wd_alloc); + if (r < 0) { + log_device_debug_errno(dev, r, "Failed to read '/run/udev/watch/%s': %m", id); + goto finalize; + } + + if (!streq(wd_str, wd_alloc)) { + r = log_device_debug_errno(dev, SYNTHETIC_ERRNO(ESTALE), "Unmatching watch handle found: %s -> %s -> %s", wd_str, id, wd_alloc); + goto finalize; + } + + if (unlinkat(dirfd, id, 0) < 0 && errno != ENOENT) + r = log_device_debug_errno(dev, errno, "Failed to remove '/run/udev/watch/%s': %m", id); + +finalize: + if (unlinkat(dirfd, wd_str, 0) < 0 && errno != ENOENT) + RET_GATHER(r, log_device_debug_errno(dev, errno, "Failed to remove '/run/udev/watch/%s': %m", wd_str)); + + return r; +} + static int udev_watch_clear(sd_device *dev, int dirfd, int *ret_wd) { _cleanup_free_ char *wd_str = NULL, *buf = NULL; const char *id; @@ -454,13 +506,13 @@ finalize: return r; } -int udev_watch_begin(int inotify_fd, sd_device *dev) { +int manager_add_watch(Manager *manager, sd_device *dev) { char wd_str[DECIMAL_STR_MAX(int)]; _cleanup_close_ int dirfd = -EBADF; const char *devnode, *id; int wd, r; - assert(inotify_fd >= 0); + assert(manager); assert(dev); /* Ignore the request of watching the device node on remove event, as the device node specified by @@ -486,13 +538,16 @@ int udev_watch_begin(int inotify_fd, sd_device *dev) { /* 2. Add inotify watch */ log_device_debug(dev, "Adding watch on '%s'", devnode); - wd = inotify_add_watch(inotify_fd, devnode, IN_CLOSE_WRITE); + wd = inotify_add_watch(manager->inotify_fd, devnode, IN_CLOSE_WRITE); if (wd < 0) return log_device_debug_errno(dev, errno, "Failed to watch device node '%s': %m", devnode); + /* 3. Clear old symlinks by the newly acquired watch handle, for the case that the watch handle is reused. */ + (void) udev_watch_clear_by_wd(dev, dirfd, wd); + xsprintf(wd_str, "%d", wd); - /* 3. Create new symlinks */ + /* 4. Create new symlinks */ if (symlinkat(wd_str, dirfd, id) < 0) { r = log_device_debug_errno(dev, errno, "Failed to create symlink '/run/udev/watch/%s' to '%s': %m", id, wd_str); goto on_failure; @@ -509,20 +564,17 @@ int udev_watch_begin(int inotify_fd, sd_device *dev) { on_failure: (void) unlinkat(dirfd, id, 0); - (void) inotify_rm_watch(inotify_fd, wd); + (void) inotify_rm_watch(manager->inotify_fd, wd); return r; } -int udev_watch_end(int inotify_fd, sd_device *dev) { +int manager_remove_watch(Manager *manager, sd_device *dev) { _cleanup_close_ int dirfd = -EBADF; int wd, r; - assert(inotify_fd >= 0); + assert(manager); assert(dev); - if (sd_device_get_devname(dev, NULL) < 0) - return 0; - dirfd = RET_NERRNO(open("/run/udev/watch", O_CLOEXEC | O_DIRECTORY | O_NOFOLLOW | O_RDONLY)); if (dirfd == -ENOENT) return 0; @@ -536,7 +588,61 @@ int udev_watch_end(int inotify_fd, sd_device *dev) { /* Then, remove inotify watch. */ log_device_debug(dev, "Removing watch handle %i.", wd); - (void) inotify_rm_watch(inotify_fd, wd); + (void) inotify_rm_watch(manager->inotify_fd, wd); return 0; } + +static int on_sigusr1(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) { + UdevWorker *worker = ASSERT_PTR(userdata); + + if ((pid_t) si->ssi_pid != worker->manager_pid) { + log_debug("Received SIGUSR1 from unexpected process [%"PRIu32"], ignoring.", si->ssi_pid); + return 0; + } + + return sd_event_exit(sd_event_source_get_event(s), 0); +} + +static int notify_and_wait_signal(UdevWorker *worker, sd_device *dev, const char *msg) { + int r; + + assert(worker); + assert(dev); + assert(msg); + + if (sd_device_get_devname(dev, NULL) < 0) + return 0; + + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + r = sd_event_new(&e); + if (r < 0) + return r; + + r = sd_event_add_signal(e, /* ret_event_source = */ NULL, SIGUSR1 | SD_EVENT_SIGNAL_PROCMASK, on_sigusr1, worker); + if (r < 0) + return r; + + r = sd_notify(/* unset_environment = */ false, msg); + if (r <= 0) + return r; + + return sd_event_loop(e); +} + +int udev_watch_begin(UdevWorker *worker, sd_device *dev) { + assert(worker); + assert(dev); + + if (device_for_action(dev, SD_DEVICE_REMOVE)) + return 0; + + return notify_and_wait_signal(worker, dev, "INOTIFY_WATCH_ADD=1"); +} + +int udev_watch_end(UdevWorker *worker, sd_device *dev) { + assert(worker); + assert(dev); + + return notify_and_wait_signal(worker, dev, "INOTIFY_WATCH_REMOVE=1"); +} diff --git a/src/udev/udev-watch.h b/src/udev/udev-watch.h index fed5f9614c7..71945e06af0 100644 --- a/src/udev/udev-watch.h +++ b/src/udev/udev-watch.h @@ -4,11 +4,15 @@ #include "sd-device.h" typedef struct Manager Manager; +typedef struct UdevWorker UdevWorker; void udev_watch_dump(void); int manager_init_inotify(Manager *manager, int fd); int manager_start_inotify(Manager *manager); -int udev_watch_begin(int inotify_fd, sd_device *dev); -int udev_watch_end(int inotify_fd, sd_device *dev); +int manager_add_watch(Manager *manager, sd_device *dev); +int manager_remove_watch(Manager *manager, sd_device *dev); + +int udev_watch_begin(UdevWorker *worker, sd_device *dev); +int udev_watch_end(UdevWorker *worker, sd_device *dev); diff --git a/src/udev/udev-worker.c b/src/udev/udev-worker.c index 28ca21291ef..82085e86c48 100644 --- a/src/udev/udev-worker.c +++ b/src/udev/udev-worker.c @@ -210,7 +210,7 @@ static int worker_process_device(UdevWorker *worker, sd_device *dev) { (void) worker_mark_block_device_read_only(dev); /* Disable watch during event processing. */ - r = udev_watch_end(worker->inotify_fd, dev); + r = udev_watch_end(worker, dev); if (r < 0) log_device_warning_errno(dev, r, "Failed to remove inotify watch, ignoring: %m"); @@ -228,8 +228,8 @@ static int worker_process_device(UdevWorker *worker, sd_device *dev) { /* Enable watch if requested. */ if (udev_event->inotify_watch) { - r = udev_watch_begin(worker->inotify_fd, dev); - if (r < 0 && r != -ENOENT) /* The device may be already removed, ignore -ENOENT. */ + r = udev_watch_begin(worker, dev); + if (r < 0) log_device_warning_errno(dev, r, "Failed to add inotify watch, ignoring: %m"); } diff --git a/src/udev/udev-worker.h b/src/udev/udev-worker.h index 521141c170d..356084194e0 100644 --- a/src/udev/udev-worker.h +++ b/src/udev/udev-worker.h @@ -25,9 +25,9 @@ typedef struct UdevWorker { Hashmap *properties; UdevRules *rules; - int inotify_fd; /* Do not close! */ - UdevConfig config; + + pid_t manager_pid; } UdevWorker; void udev_worker_done(UdevWorker *worker);