From: Yu Watanabe Date: Tue, 6 May 2025 16:09:09 +0000 (+0900) Subject: udev: refactoring for managing events for locked block devices X-Git-Tag: v258-rc1~646^2~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f8b1d4fdfb6d58352faef47f2845310d53b547e9;p=thirdparty%2Fsystemd.git udev: refactoring for managing events for locked block devices Previously, when an event for block device is queued or an inotify event for a block device is triggered, all queued events were checked by event_queue_assume_block_device_unlocked(), and its cost may be huge on early boot stage. This makes locked events are managed in separated prioq and hashmap, to reduce the cost of event_queue_assume_block_device_unlocked(), which is now renamed to manager_requeue_locked_events_by_device(). This also changes the clockid of timer events for requeueing and timeout of retry from CLOCK_BOOTTIME to CLOCK_MONOTONIC. Otherwise, if the system suspend while an event is locked, the event may be requeued immediately after come back from the suspend and timed out. --- diff --git a/src/udev/udev-manager.c b/src/udev/udev-manager.c index 1aaa9c7b36a..709ec418762 100644 --- a/src/udev/udev-manager.c +++ b/src/udev/udev-manager.c @@ -39,13 +39,14 @@ #include "udev-watch.h" #include "udev-worker.h" -#define EVENT_RETRY_INTERVAL_USEC (200 * USEC_PER_MSEC) -#define EVENT_RETRY_TIMEOUT_USEC (3 * USEC_PER_MINUTE) +#define EVENT_REQUEUE_INTERVAL_USEC (200 * USEC_PER_MSEC) +#define EVENT_REQUEUE_TIMEOUT_USEC (3 * USEC_PER_MINUTE) typedef enum EventState { EVENT_UNDEF, EVENT_QUEUED, EVENT_RUNNING, + EVENT_LOCKED, } EventState; typedef struct Event { @@ -63,9 +64,11 @@ typedef struct Event { const char *devnode; /* Used when the device is locked by another program. */ - usec_t retry_again_next_usec; - usec_t retry_again_timeout_usec; - sd_event_source *retry_event_source; + usec_t requeue_next_usec; + usec_t requeue_timeout_usec; + unsigned locked_event_prioq_index; + char *whole_disk; + LIST_FIELDS(Event, same_disk); bool dependencies_built; Set *blocker_events; @@ -107,11 +110,38 @@ static void event_clear_dependencies(Event *event) { event->dependencies_built = false; } +static void event_unset_whole_disk(Event *event) { + Manager *manager = ASSERT_PTR(ASSERT_PTR(event)->manager); + + if (!event->whole_disk) + return; + + if (event->same_disk_prev) + /* If this is not the first event, then simply remove this event. */ + event->same_disk_prev->same_disk_next = event->same_disk_next; + else if (event->same_disk_next) + /* If this is the first event, replace with the next event. */ + assert_se(hashmap_replace(manager->locked_events_by_disk, event->same_disk_next->whole_disk, event->same_disk_next) >= 0); + else + /* Otherwise, remove the entry. */ + assert_se(hashmap_remove(manager->locked_events_by_disk, event->whole_disk) == event); + + if (event->same_disk_next) + event->same_disk_next->same_disk_prev = event->same_disk_prev; + + event->same_disk_prev = event->same_disk_next = NULL; + + event->whole_disk = mfree(event->whole_disk); +} + static Event* event_free(Event *event) { if (!event) return NULL; if (event->manager) { + event_unset_whole_disk(event); + prioq_remove(event->manager->locked_events_by_time, event, &event->locked_event_prioq_index); + if (event->manager->last_event == event) event->manager->last_event = event->event_prev; LIST_REMOVE(event, event->manager->events, event); @@ -124,8 +154,6 @@ static Event* event_free(Event *event) { sd_device_unref(event->dev); - sd_event_source_unref(event->retry_event_source); - return mfree(event); } @@ -169,6 +197,10 @@ Manager* manager_free(Manager *manager) { while (manager->events) event_free(manager->events); + prioq_free(manager->locked_events_by_time); + hashmap_free(manager->locked_events_by_disk); + sd_event_source_unref(manager->requeue_locked_events_timer_event_source); + safe_close(manager->inotify_fd); free(manager->worker_notify_socket_path); @@ -279,6 +311,8 @@ void manager_exit(Manager *manager) { /* Kill all workers with SIGTERM, and disable unnecessary timer event source. */ manager_kill_workers(manager, SIGTERM); manager->kill_workers_event = sd_event_source_disable_unref(manager->kill_workers_event); + + (void) event_source_disable(manager->requeue_locked_events_timer_event_source); } void notify_ready(Manager *manager) { @@ -568,8 +602,6 @@ static int event_run(Event *event) { log_device_uevent(event->dev, "Device ready for processing"); - (void) event_source_disable(event->retry_event_source); - Worker *worker; HASHMAP_FOREACH(worker, manager->workers) { if (worker->state != WORKER_IDLE) @@ -649,19 +681,6 @@ static int event_is_blocked(Event *event) { int r; assert(event); - assert(event->manager); - - if (event->retry_again_next_usec > 0) { - assert(event->dependencies_built); - - usec_t now_usec; - r = sd_event_now(event->manager->event, CLOCK_BOOTTIME, &now_usec); - if (r < 0) - return r; - - if (event->retry_again_next_usec > now_usec) - return true; - } r = event_build_dependencies(event); if (r < 0) @@ -747,91 +766,130 @@ static int event_queue_start(Manager *manager) { return 0; } -static int on_event_retry(sd_event_source *s, uint64_t usec, void *userdata) { - /* This does nothing. The on_post() callback will start the event if there exists an idle worker. */ +static int on_requeue_locked_events(sd_event_source *s, uint64_t usec, void *userdata) { + /* This does nothing. The on_post() callback will requeue locked events. */ return 1; } -static int event_requeue(Event *event) { - usec_t now_usec; +static int manager_requeue_locked_events(Manager *manager) { + usec_t now_usec = 0; int r; - assert(event); - assert(event->manager); - assert(event->manager->event); + assert(manager); + + for (;;) { + Event *event = prioq_peek(manager->locked_events_by_time); + if (!event) + return event_source_disable(manager->requeue_locked_events_timer_event_source); + + if (now_usec == 0) { + r = sd_event_now(manager->event, CLOCK_MONOTONIC, &now_usec); + if (r < 0) + return r; + } + + if (event->requeue_next_usec > now_usec) + return event_reset_time( + manager->event, + &manager->requeue_locked_events_timer_event_source, + CLOCK_MONOTONIC, + event->requeue_next_usec, + USEC_PER_SEC, + on_requeue_locked_events, + /* userdata = */ NULL, + EVENT_PRIORITY_REQUEUE_EVENT, + "requeue-locked-events", + /* force_reset = */ true); + + assert_se(prioq_pop(manager->locked_events_by_time) == event); + event_unset_whole_disk(event); + event->state = EVENT_QUEUED; + } +} + +int manager_requeue_locked_events_by_device(Manager *manager, sd_device *dev) { + int r; + + /* When a new event for a block device is queued or we get an inotify event, assume that the + * device is not locked anymore. The assumption may not be true, but that should not cause any + * issues, as in that case events will be requeued soon. */ + + if (hashmap_isempty(manager->locked_events_by_disk)) + return 0; + + const char *devname; + r = udev_get_whole_disk(dev, NULL, &devname); + if (r <= 0) + return r; + + Event *first = hashmap_remove(manager->locked_events_by_disk, devname); + if (!first) + return 0; + Event *event; + while ((event = LIST_POP(same_disk, first))) { + assert_se(prioq_remove(manager->locked_events_by_time, event, &event->locked_event_prioq_index) > 0); + event->whole_disk = mfree(event->whole_disk); + event->state = EVENT_QUEUED; + } + + return 0; +} + +static int locked_event_compare(const Event *x, const Event *y) { + return CMP(x->requeue_next_usec, y->requeue_next_usec); +} + +static int event_enter_locked(Event *event, const char *whole_disk) { + Manager *manager = ASSERT_PTR(ASSERT_PTR(event)->manager); sd_device *dev = ASSERT_PTR(event->dev); + usec_t now_usec; + int r; /* add a short delay to suppress busy loop */ - r = sd_event_now(event->manager->event, CLOCK_BOOTTIME, &now_usec); + r = sd_event_now(manager->event, CLOCK_MONOTONIC, &now_usec); if (r < 0) return log_device_warning_errno( dev, r, "Failed to get current time, skipping event (SEQNUM=%"PRIu64", ACTION=%s): %m", event->seqnum, strna(device_action_to_string(event->action))); - if (event->retry_again_timeout_usec > 0 && event->retry_again_timeout_usec <= now_usec) + if (event->requeue_timeout_usec > 0 && event->requeue_timeout_usec <= now_usec) return log_device_warning_errno( dev, SYNTHETIC_ERRNO(ETIMEDOUT), "The underlying block device is locked by a process more than %s, skipping event (SEQNUM=%"PRIu64", ACTION=%s).", - FORMAT_TIMESPAN(EVENT_RETRY_TIMEOUT_USEC, USEC_PER_MINUTE), + FORMAT_TIMESPAN(EVENT_REQUEUE_TIMEOUT_USEC, USEC_PER_MINUTE), event->seqnum, strna(device_action_to_string(event->action))); - event->retry_again_next_usec = usec_add(now_usec, EVENT_RETRY_INTERVAL_USEC); - if (event->retry_again_timeout_usec == 0) - event->retry_again_timeout_usec = usec_add(now_usec, EVENT_RETRY_TIMEOUT_USEC); + event->requeue_next_usec = usec_add(now_usec, EVENT_REQUEUE_INTERVAL_USEC); + if (event->requeue_timeout_usec == 0) + event->requeue_timeout_usec = usec_add(now_usec, EVENT_REQUEUE_TIMEOUT_USEC); - r = event_reset_time_relative( - event->manager->event, - &event->retry_event_source, - CLOCK_MONOTONIC, - EVENT_RETRY_INTERVAL_USEC, - /* accuracy = */ 0, - on_event_retry, - /* userdata = */ NULL, - EVENT_PRIORITY_RETRY_EVENT, - "retry-event", - /* force_reset = */ true); - if (r < 0) + if (isempty(whole_disk)) return log_device_warning_errno( - dev, r, - "Failed to reset timer event source for retrying event, skipping event (SEQNUM=%"PRIu64", ACTION=%s): %m", + dev, SYNTHETIC_ERRNO(EBADMSG), + "Unexpected notify message received, skipping event (SEQNUM=%"PRIu64", ACTION=%s): %m", event->seqnum, strna(device_action_to_string(event->action))); - event->state = EVENT_QUEUED; - return 0; -} - -int event_queue_assume_block_device_unlocked(Manager *manager, sd_device *dev) { - const char *devname; - int r; - - /* When a new event for a block device is queued or we get an inotify event, assume that the - * device is not locked anymore. The assumption may not be true, but that should not cause any - * issues, as in that case events will be requeued soon. */ - - r = udev_get_whole_disk(dev, NULL, &devname); - if (r <= 0) - return r; - - LIST_FOREACH(event, event, manager->events) { - const char *event_devname; - - if (event->state != EVENT_QUEUED) - continue; - - if (event->retry_again_next_usec == 0) - continue; + _cleanup_free_ char *whole_disk_copy = strdup(whole_disk); + if (!whole_disk_copy) + return log_oom(); - if (udev_get_whole_disk(event->dev, NULL, &event_devname) <= 0) - continue; - - if (!streq(devname, event_devname)) - continue; + Event *first = hashmap_get(manager->locked_events_by_disk, whole_disk_copy); + LIST_PREPEND(same_disk, first, event); - event->retry_again_next_usec = 0; + r = hashmap_ensure_replace(&manager->locked_events_by_disk, &path_hash_ops, whole_disk_copy, first); + if (r < 0) { + LIST_REMOVE(same_disk, first, event); + return log_oom(); } + event->whole_disk = TAKE_PTR(whole_disk_copy); + + r = prioq_ensure_put(&manager->locked_events_by_time, locked_event_compare, event, &event->locked_event_prioq_index); + if (r < 0) + return log_oom(); + event->state = EVENT_LOCKED; return 0; } @@ -882,6 +940,7 @@ static int event_queue_insert(Manager *manager, sd_device *dev) { .devpath_old = devpath_old, .devnode = devnode, .state = EVENT_QUEUED, + .locked_event_prioq_index = PRIOQ_IDX_NULL, }; LIST_INSERT_AFTER(event, manager->events, manager->last_event, event); @@ -1014,8 +1073,7 @@ static int on_uevent(sd_device_monitor *monitor, sd_device *dev, void *userdata) return 1; } - (void) event_queue_assume_block_device_unlocked(manager, dev); - + (void) manager_requeue_locked_events_by_device(manager, dev); return 1; } @@ -1139,8 +1197,8 @@ static int on_worker_notify(sd_event_source *s, int fd, uint32_t revents, void * _cleanup_(event_freep) Event *event = worker_detach_event(worker); if (strv_contains(l, "TRY_AGAIN=1")) { - /* Worker cannot lock the device. Requeue the event. */ - r = event_requeue(event); + /* Worker cannot lock the device. */ + r = event_enter_locked(event, strv_find_startswith(l, "WHOLE_DISK=")); if (r < 0) { (void) device_add_errno(event->dev, r); (void) device_broadcast_on_error(event->dev, manager->monitor); @@ -1231,6 +1289,8 @@ static int on_post(sd_event_source *s, void *userdata) { return on_post_exit(manager); if (manager->events) { + (void) manager_requeue_locked_events(manager); + /* Try to process pending events if idle workers exist. Why is this necessary? * When a worker finished an event and became idle, even if there was a pending event, * the corresponding device might have been locked and the processing of the event diff --git a/src/udev/udev-manager.h b/src/udev/udev-manager.h index 87af431f2d9..d59ab0eb368 100644 --- a/src/udev/udev-manager.h +++ b/src/udev/udev-manager.h @@ -10,6 +10,7 @@ #include "hashmap.h" #include "list.h" #include "macro.h" +#include "prioq.h" #include "time-util.h" #include "udev-config.h" #include "udev-ctrl.h" @@ -40,7 +41,7 @@ #define EVENT_PRIORITY_VARLINK (SD_EVENT_PRIORITY_NORMAL + 2) #define EVENT_PRIORITY_CONTROL (SD_EVENT_PRIORITY_NORMAL + 2) /* The event is intended to trigger the post-event source, hence can be the lowest priority. */ -#define EVENT_PRIORITY_RETRY_EVENT (SD_EVENT_PRIORITY_NORMAL + 3) +#define EVENT_PRIORITY_REQUEUE_EVENT (SD_EVENT_PRIORITY_NORMAL + 3) typedef struct Event Event; typedef struct UdevRules UdevRules; @@ -69,6 +70,10 @@ typedef struct Manager { sd_event_source *kill_workers_event; + Hashmap *locked_events_by_disk; + Prioq *locked_events_by_time; + sd_event_source *requeue_locked_events_timer_event_source; + usec_t last_usec; UdevConfig config_by_udev_conf; @@ -98,4 +103,4 @@ int manager_reset_kill_workers_timer(Manager *manager); bool devpath_conflict(const char *a, const char *b); -int event_queue_assume_block_device_unlocked(Manager *manager, sd_device *dev); +int manager_requeue_locked_events_by_device(Manager *manager, sd_device *dev); diff --git a/src/udev/udev-watch.c b/src/udev/udev-watch.c index 003f61293f4..455ebd40e50 100644 --- a/src/udev/udev-watch.c +++ b/src/udev/udev-watch.c @@ -266,7 +266,7 @@ static int manager_process_inotify(Manager *manager, const struct inotify_event log_device_debug(dev, "Received inotify event of watch handle %i.", e->wd); - (void) event_queue_assume_block_device_unlocked(manager, dev); + (void) manager_requeue_locked_events_by_device(manager, dev); (void) synthesize_change(manager, dev); return 0; }