1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 #include "blockdev-util.h"
4 #include "cgroup-util.h"
5 #include "common-signal.h"
6 #include "cpu-set-util.h"
7 #include "daemon-util.h"
8 #include "device-monitor-private.h"
9 #include "device-private.h"
10 #include "device-util.h"
11 #include "errno-list.h"
12 #include "event-util.h"
16 #include "inotify-util.h"
17 #include "iovec-util.h"
18 #include "limits-util.h"
21 #include "process-util.h"
22 #include "selinux-util.h"
23 #include "signal-util.h"
24 #include "socket-util.h"
25 #include "string-util.h"
26 #include "syslog-util.h"
27 #include "udev-builtin.h"
28 #include "udev-ctrl.h"
29 #include "udev-event.h"
30 #include "udev-manager.h"
31 #include "udev-node.h"
32 #include "udev-spawn.h"
33 #include "udev-trace.h"
34 #include "udev-util.h"
35 #include "udev-watch.h"
36 #include "udev-worker.h"
38 #define WORKER_NUM_MAX UINT64_C(2048)
40 #define EVENT_RETRY_INTERVAL_USEC (200 * USEC_PER_MSEC)
41 #define EVENT_RETRY_TIMEOUT_USEC (3 * USEC_PER_MINUTE)
43 typedef enum EventState
{
49 typedef struct Event
{
56 sd_device_action_t action
;
58 uint64_t blocker_seqnum
;
61 const char *devpath_old
;
64 /* Used when the device is locked by another program. */
65 usec_t retry_again_next_usec
;
66 usec_t retry_again_timeout_usec
;
67 sd_event_source
*retry_event_source
;
69 sd_event_source
*timeout_warning_event
;
70 sd_event_source
*timeout_event
;
72 LIST_FIELDS(Event
, event
);
75 typedef enum WorkerState
{
83 typedef struct Worker
{
86 sd_event_source
*child_event_source
;
87 sd_device_monitor
*monitor
;
92 static Event
*event_free(Event
*event
) {
96 assert(event
->manager
);
98 LIST_REMOVE(event
, event
->manager
->events
, event
);
99 sd_device_unref(event
->dev
);
101 sd_event_source_unref(event
->retry_event_source
);
102 sd_event_source_unref(event
->timeout_warning_event
);
103 sd_event_source_unref(event
->timeout_event
);
106 event
->worker
->event
= NULL
;
111 static void event_queue_cleanup(Manager
*manager
, EventState match_state
) {
112 LIST_FOREACH(event
, event
, manager
->events
) {
113 if (match_state
!= EVENT_UNDEF
&& match_state
!= event
->state
)
120 static Worker
*worker_free(Worker
*worker
) {
125 hashmap_remove(worker
->manager
->workers
, PID_TO_PTR(worker
->pid
));
127 sd_event_source_unref(worker
->child_event_source
);
128 sd_device_monitor_unref(worker
->monitor
);
129 event_free(worker
->event
);
131 return mfree(worker
);
134 DEFINE_TRIVIAL_CLEANUP_FUNC(Worker
*, worker_free
);
135 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(worker_hash_op
, void, trivial_hash_func
, trivial_compare_func
, Worker
, worker_free
);
137 Manager
* manager_free(Manager
*manager
) {
143 hashmap_free_free_free(manager
->properties
);
144 udev_rules_free(manager
->rules
);
146 hashmap_free(manager
->workers
);
147 event_queue_cleanup(manager
, EVENT_UNDEF
);
149 safe_close(manager
->inotify_fd
);
150 safe_close_pair(manager
->worker_watch
);
152 sd_device_monitor_unref(manager
->monitor
);
153 udev_ctrl_unref(manager
->ctrl
);
155 sd_event_source_unref(manager
->inotify_event
);
156 sd_event_source_unref(manager
->kill_workers_event
);
157 sd_event_source_unref(manager
->memory_pressure_event_source
);
158 sd_event_source_unref(manager
->sigrtmin18_event_source
);
159 sd_event_unref(manager
->event
);
161 free(manager
->cgroup
);
162 return mfree(manager
);
165 static int on_sigchld(sd_event_source
*s
, const siginfo_t
*si
, void *userdata
);
167 static int worker_new(Worker
**ret
, Manager
*manager
, sd_device_monitor
*worker_monitor
, pid_t pid
) {
168 _cleanup_(worker_freep
) Worker
*worker
= NULL
;
173 assert(worker_monitor
);
176 /* close monitor, but keep address around */
177 device_monitor_disconnect(worker_monitor
);
179 worker
= new(Worker
, 1);
184 .monitor
= sd_device_monitor_ref(worker_monitor
),
188 r
= sd_event_add_child(manager
->event
, &worker
->child_event_source
, pid
, WEXITED
, on_sigchld
, worker
);
192 r
= hashmap_ensure_put(&manager
->workers
, &worker_hash_op
, PID_TO_PTR(pid
), worker
);
196 worker
->manager
= manager
;
198 *ret
= TAKE_PTR(worker
);
202 static void manager_kill_workers(Manager
*manager
, bool force
) {
207 HASHMAP_FOREACH(worker
, manager
->workers
) {
208 if (worker
->state
== WORKER_KILLED
)
211 if (worker
->state
== WORKER_RUNNING
&& !force
) {
212 worker
->state
= WORKER_KILLING
;
216 worker
->state
= WORKER_KILLED
;
217 (void) kill(worker
->pid
, SIGTERM
);
221 static void manager_exit(Manager
*manager
) {
224 manager
->exit
= true;
226 (void) sd_notify(/* unset= */ false, NOTIFY_STOPPING
);
228 /* close sources of new events and discard buffered events */
229 manager
->ctrl
= udev_ctrl_unref(manager
->ctrl
);
231 manager
->inotify_event
= sd_event_source_disable_unref(manager
->inotify_event
);
232 manager
->inotify_fd
= safe_close(manager
->inotify_fd
);
234 manager
->monitor
= sd_device_monitor_unref(manager
->monitor
);
236 /* discard queued events and kill workers */
237 event_queue_cleanup(manager
, EVENT_QUEUED
);
238 manager_kill_workers(manager
, true);
241 static void notify_ready(Manager
*manager
) {
246 r
= sd_notifyf(/* unset= */ false,
248 "STATUS=Processing with %u children at max", manager
->children_max
);
250 log_warning_errno(r
, "Failed to send readiness notification, ignoring: %m");
253 /* reload requested, HUP signal received, rules changed, builtin changed */
254 static void manager_reload(Manager
*manager
, bool force
) {
255 _cleanup_(udev_rules_freep
) UdevRules
*rules
= NULL
;
261 assert_se(sd_event_now(manager
->event
, CLOCK_MONOTONIC
, &now_usec
) >= 0);
262 if (!force
&& now_usec
< usec_add(manager
->last_usec
, 3 * USEC_PER_SEC
))
263 /* check for changed config, every 3 seconds at most */
265 manager
->last_usec
= now_usec
;
267 /* Reload SELinux label database, to make the child inherit the up-to-date database. */
268 mac_selinux_maybe_reload();
270 /* Nothing changed. It is not necessary to reload. */
271 if (!udev_rules_should_reload(manager
->rules
) && !udev_builtin_should_reload()) {
276 /* If we eat this up, then tell our service manager to just continue */
277 (void) sd_notifyf(/* unset= */ false,
279 "STATUS=Skipping configuration reloading, nothing changed.\n"
280 "MONOTONIC_USEC=" USEC_FMT
, now(CLOCK_MONOTONIC
));
282 (void) sd_notifyf(/* unset= */ false,
284 "STATUS=Flushing configuration...\n"
285 "MONOTONIC_USEC=" USEC_FMT
, now(CLOCK_MONOTONIC
));
287 manager_kill_workers(manager
, false);
292 r
= udev_rules_load(&rules
, manager
->resolve_name_timing
);
294 log_warning_errno(r
, "Failed to read udev rules, using the previously loaded rules, ignoring: %m");
296 udev_rules_free_and_replace(manager
->rules
, rules
);
299 notify_ready(manager
);
302 static int on_kill_workers_event(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
303 Manager
*manager
= ASSERT_PTR(userdata
);
305 log_debug("Cleanup idle workers");
306 manager_kill_workers(manager
, false);
311 static int on_event_timeout(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
312 Event
*event
= ASSERT_PTR(userdata
);
314 assert(event
->manager
);
315 assert(event
->worker
);
317 kill_and_sigcont(event
->worker
->pid
, event
->manager
->timeout_signal
);
318 event
->worker
->state
= WORKER_KILLED
;
320 log_device_error(event
->dev
, "Worker ["PID_FMT
"] processing SEQNUM=%"PRIu64
" killed", event
->worker
->pid
, event
->seqnum
);
325 static int on_event_timeout_warning(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
326 Event
*event
= ASSERT_PTR(userdata
);
328 assert(event
->worker
);
330 log_device_warning(event
->dev
, "Worker ["PID_FMT
"] processing SEQNUM=%"PRIu64
" is taking a long time", event
->worker
->pid
, event
->seqnum
);
335 static usec_t
extra_timeout_usec(void) {
336 static usec_t saved
= 10 * USEC_PER_SEC
;
337 static bool parsed
= false;
347 e
= getenv("SYSTEMD_UDEV_EXTRA_TIMEOUT_SEC");
351 r
= parse_sec(e
, &timeout
);
353 log_debug_errno(r
, "Failed to parse $SYSTEMD_UDEV_EXTRA_TIMEOUT_SEC=%s, ignoring: %m", e
);
355 if (timeout
> 5 * USEC_PER_HOUR
) /* Add an arbitrary upper bound */
356 log_debug("Parsed $SYSTEMD_UDEV_EXTRA_TIMEOUT_SEC=%s is too large, ignoring.", e
);
363 static void worker_attach_event(Worker
*worker
, Event
*event
) {
364 Manager
*manager
= ASSERT_PTR(ASSERT_PTR(worker
)->manager
);
365 sd_event
*e
= ASSERT_PTR(manager
->event
);
368 assert(!event
->worker
);
369 assert(!worker
->event
);
371 worker
->state
= WORKER_RUNNING
;
372 worker
->event
= event
;
373 event
->state
= EVENT_RUNNING
;
374 event
->worker
= worker
;
376 (void) sd_event_add_time_relative(e
, &event
->timeout_warning_event
, CLOCK_MONOTONIC
,
377 udev_warn_timeout(manager
->timeout_usec
), USEC_PER_SEC
,
378 on_event_timeout_warning
, event
);
380 /* Manager.timeout_usec is also used as the timeout for running programs specified in
381 * IMPORT{program}=, PROGRAM=, or RUN=. Here, let's add an extra time before the manager
382 * kills a worker, to make it possible that the worker detects timed out of spawned programs,
383 * kills them, and finalizes the event. */
384 (void) sd_event_add_time_relative(e
, &event
->timeout_event
, CLOCK_MONOTONIC
,
385 usec_add(manager
->timeout_usec
, extra_timeout_usec()), USEC_PER_SEC
,
386 on_event_timeout
, event
);
389 static int worker_spawn(Manager
*manager
, Event
*event
) {
390 _cleanup_(sd_device_monitor_unrefp
) sd_device_monitor
*worker_monitor
= NULL
;
395 /* listen for new events */
396 r
= device_monitor_new_full(&worker_monitor
, MONITOR_GROUP_NONE
, -1);
400 (void) sd_device_monitor_set_description(worker_monitor
, "worker");
402 /* allow the main daemon netlink address to send devices to the worker */
403 r
= device_monitor_allow_unicast_sender(worker_monitor
, manager
->monitor
);
405 return log_error_errno(r
, "Worker: Failed to set unicast sender: %m");
407 r
= device_monitor_enable_receiving(worker_monitor
);
409 return log_error_errno(r
, "Worker: Failed to enable receiving of device: %m");
411 r
= safe_fork("(udev-worker)", FORK_DEATHSIG_SIGTERM
, &pid
);
413 event
->state
= EVENT_QUEUED
;
414 return log_error_errno(r
, "Failed to fork() worker: %m");
417 _cleanup_(udev_worker_done
) UdevWorker w
= {
418 .monitor
= TAKE_PTR(worker_monitor
),
419 .properties
= TAKE_PTR(manager
->properties
),
420 .rules
= TAKE_PTR(manager
->rules
),
421 .pipe_fd
= TAKE_FD(manager
->worker_watch
[WRITE_END
]),
422 .inotify_fd
= TAKE_FD(manager
->inotify_fd
),
423 .exec_delay_usec
= manager
->exec_delay_usec
,
424 .timeout_usec
= manager
->timeout_usec
,
425 .timeout_signal
= manager
->timeout_signal
,
426 .log_level
= manager
->log_level
,
427 .blockdev_read_only
= manager
->blockdev_read_only
,
431 r
= udev_worker_main(&w
, event
->dev
);
433 _exit(r
< 0 ? EXIT_FAILURE
: EXIT_SUCCESS
);
436 r
= worker_new(&worker
, manager
, worker_monitor
, pid
);
438 return log_error_errno(r
, "Failed to create worker object: %m");
440 worker_attach_event(worker
, event
);
442 log_device_debug(event
->dev
, "Worker ["PID_FMT
"] is forked for processing SEQNUM=%"PRIu64
".", pid
, event
->seqnum
);
446 static int event_run(Event
*event
) {
447 static bool log_children_max_reached
= true;
453 assert(event
->manager
);
455 log_device_uevent(event
->dev
, "Device ready for processing");
457 (void) event_source_disable(event
->retry_event_source
);
459 manager
= event
->manager
;
460 HASHMAP_FOREACH(worker
, manager
->workers
) {
461 if (worker
->state
!= WORKER_IDLE
)
464 r
= device_monitor_send_device(manager
->monitor
, worker
->monitor
, event
->dev
);
466 log_device_error_errno(event
->dev
, r
, "Worker ["PID_FMT
"] did not accept message, killing the worker: %m",
468 (void) kill(worker
->pid
, SIGKILL
);
469 worker
->state
= WORKER_KILLED
;
472 worker_attach_event(worker
, event
);
473 return 1; /* event is now processing. */
476 if (hashmap_size(manager
->workers
) >= manager
->children_max
) {
477 /* Avoid spamming the debug logs if the limit is already reached and
478 * many events still need to be processed */
479 if (log_children_max_reached
&& manager
->children_max
> 1) {
480 log_debug("Maximum number (%u) of children reached.", hashmap_size(manager
->workers
));
481 log_children_max_reached
= false;
483 return 0; /* no free worker */
486 /* Re-enable the debug message for the next batch of events */
487 log_children_max_reached
= true;
489 /* start new worker and pass initial device */
490 r
= worker_spawn(manager
, event
);
494 return 1; /* event is now processing. */
497 bool devpath_conflict(const char *a
, const char *b
) {
498 /* This returns true when two paths are equivalent, or one is a child of another. */
503 for (; *a
!= '\0' && *b
!= '\0'; a
++, b
++)
507 return *a
== '/' || *b
== '/' || *a
== *b
;
510 static int event_is_blocked(Event
*event
) {
511 Event
*loop_event
= NULL
;
514 /* lookup event for identical, parent, child device */
517 assert(event
->manager
);
518 assert(event
->blocker_seqnum
<= event
->seqnum
);
520 if (event
->retry_again_next_usec
> 0) {
523 r
= sd_event_now(event
->manager
->event
, CLOCK_BOOTTIME
, &now_usec
);
527 if (event
->retry_again_next_usec
> now_usec
)
531 if (event
->blocker_seqnum
== event
->seqnum
)
532 /* we have checked previously and no blocker found */
535 LIST_FOREACH(event
, e
, event
->manager
->events
) {
538 /* we already found a later event, earlier cannot block us, no need to check again */
539 if (loop_event
->seqnum
< event
->blocker_seqnum
)
542 /* event we checked earlier still exists, no need to check again */
543 if (loop_event
->seqnum
== event
->blocker_seqnum
)
546 /* found ourself, no later event can block us */
547 if (loop_event
->seqnum
>= event
->seqnum
)
550 /* found event we have not checked */
555 assert(loop_event
->seqnum
> event
->blocker_seqnum
&&
556 loop_event
->seqnum
< event
->seqnum
);
558 /* check if queue contains events we depend on */
559 LIST_FOREACH(event
, e
, loop_event
) {
562 /* found ourself, no later event can block us */
563 if (loop_event
->seqnum
>= event
->seqnum
)
566 if (streq_ptr(loop_event
->id
, event
->id
))
569 if (devpath_conflict(event
->devpath
, loop_event
->devpath
) ||
570 devpath_conflict(event
->devpath
, loop_event
->devpath_old
) ||
571 devpath_conflict(event
->devpath_old
, loop_event
->devpath
))
574 if (event
->devnode
&& streq_ptr(event
->devnode
, loop_event
->devnode
))
580 log_device_debug(event
->dev
, "SEQNUM=%" PRIu64
" blocked by SEQNUM=%" PRIu64
,
581 event
->seqnum
, loop_event
->seqnum
);
583 event
->blocker_seqnum
= loop_event
->seqnum
;
587 event
->blocker_seqnum
= event
->seqnum
;
591 static int event_queue_start(Manager
*manager
) {
596 if (!manager
->events
|| manager
->exit
|| manager
->stop_exec_queue
)
599 /* To make the stack directory /run/udev/links cleaned up later. */
600 manager
->udev_node_needs_cleanup
= true;
602 r
= event_source_disable(manager
->kill_workers_event
);
604 log_warning_errno(r
, "Failed to disable event source for cleaning up idle workers, ignoring: %m");
606 manager_reload(manager
, /* force = */ false);
608 LIST_FOREACH(event
, event
, manager
->events
) {
609 if (event
->state
!= EVENT_QUEUED
)
612 /* do not start event if parent or child event is still running or queued */
613 r
= event_is_blocked(event
);
617 log_device_warning_errno(event
->dev
, r
,
618 "Failed to check dependencies for event (SEQNUM=%"PRIu64
", ACTION=%s), "
619 "assuming there is no blocking event, ignoring: %m",
621 strna(device_action_to_string(event
->action
)));
623 r
= event_run(event
);
624 if (r
<= 0) /* 0 means there are no idle workers. Let's escape from the loop. */
631 static int on_event_retry(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
632 /* This does nothing. The on_post() callback will start the event if there exists an idle worker. */
636 static int event_requeue(Event
*event
) {
641 assert(event
->manager
);
642 assert(event
->manager
->event
);
644 event
->timeout_warning_event
= sd_event_source_disable_unref(event
->timeout_warning_event
);
645 event
->timeout_event
= sd_event_source_disable_unref(event
->timeout_event
);
647 /* add a short delay to suppress busy loop */
648 r
= sd_event_now(event
->manager
->event
, CLOCK_BOOTTIME
, &now_usec
);
650 return log_device_warning_errno(event
->dev
, r
,
651 "Failed to get current time, "
652 "skipping event (SEQNUM=%"PRIu64
", ACTION=%s): %m",
653 event
->seqnum
, strna(device_action_to_string(event
->action
)));
655 if (event
->retry_again_timeout_usec
> 0 && event
->retry_again_timeout_usec
<= now_usec
)
656 return log_device_warning_errno(event
->dev
, SYNTHETIC_ERRNO(ETIMEDOUT
),
657 "The underlying block device is locked by a process more than %s, "
658 "skipping event (SEQNUM=%"PRIu64
", ACTION=%s).",
659 FORMAT_TIMESPAN(EVENT_RETRY_TIMEOUT_USEC
, USEC_PER_MINUTE
),
660 event
->seqnum
, strna(device_action_to_string(event
->action
)));
662 event
->retry_again_next_usec
= usec_add(now_usec
, EVENT_RETRY_INTERVAL_USEC
);
663 if (event
->retry_again_timeout_usec
== 0)
664 event
->retry_again_timeout_usec
= usec_add(now_usec
, EVENT_RETRY_TIMEOUT_USEC
);
666 r
= event_reset_time_relative(event
->manager
->event
, &event
->retry_event_source
,
667 CLOCK_MONOTONIC
, EVENT_RETRY_INTERVAL_USEC
, 0,
668 on_event_retry
, NULL
,
669 0, "retry-event", true);
671 return log_device_warning_errno(event
->dev
, r
, "Failed to reset timer event source for retrying event, "
672 "skipping event (SEQNUM=%"PRIu64
", ACTION=%s): %m",
673 event
->seqnum
, strna(device_action_to_string(event
->action
)));
675 if (event
->worker
&& event
->worker
->event
== event
)
676 event
->worker
->event
= NULL
;
677 event
->worker
= NULL
;
679 event
->state
= EVENT_QUEUED
;
683 static int event_queue_assume_block_device_unlocked(Manager
*manager
, sd_device
*dev
) {
687 /* When a new event for a block device is queued or we get an inotify event, assume that the
688 * device is not locked anymore. The assumption may not be true, but that should not cause any
689 * issues, as in that case events will be requeued soon. */
691 r
= udev_get_whole_disk(dev
, NULL
, &devname
);
695 LIST_FOREACH(event
, event
, manager
->events
) {
696 const char *event_devname
;
698 if (event
->state
!= EVENT_QUEUED
)
701 if (event
->retry_again_next_usec
== 0)
704 if (udev_get_whole_disk(event
->dev
, NULL
, &event_devname
) <= 0)
707 if (!streq(devname
, event_devname
))
710 event
->retry_again_next_usec
= 0;
716 static int event_queue_insert(Manager
*manager
, sd_device
*dev
) {
717 const char *devpath
, *devpath_old
= NULL
, *id
= NULL
, *devnode
= NULL
;
718 sd_device_action_t action
;
726 /* We only accepts devices received by device monitor. */
727 r
= sd_device_get_seqnum(dev
, &seqnum
);
731 r
= sd_device_get_action(dev
, &action
);
735 r
= sd_device_get_devpath(dev
, &devpath
);
739 r
= sd_device_get_property_value(dev
, "DEVPATH_OLD", &devpath_old
);
740 if (r
< 0 && r
!= -ENOENT
)
743 r
= device_get_device_id(dev
, &id
);
744 if (r
< 0 && r
!= -ENOENT
)
747 r
= sd_device_get_devname(dev
, &devnode
);
748 if (r
< 0 && r
!= -ENOENT
)
751 event
= new(Event
, 1);
757 .dev
= sd_device_ref(dev
),
762 .devpath_old
= devpath_old
,
764 .state
= EVENT_QUEUED
,
767 if (!manager
->events
) {
768 r
= touch("/run/udev/queue");
770 log_warning_errno(r
, "Failed to touch /run/udev/queue, ignoring: %m");
773 LIST_APPEND(event
, manager
->events
, event
);
775 log_device_uevent(dev
, "Device is queued");
780 static int on_uevent(sd_device_monitor
*monitor
, sd_device
*dev
, void *userdata
) {
781 Manager
*manager
= ASSERT_PTR(userdata
);
784 DEVICE_TRACE_POINT(kernel_uevent_received
, dev
);
786 device_ensure_usec_initialized(dev
, NULL
);
788 r
= event_queue_insert(manager
, dev
);
790 log_device_error_errno(dev
, r
, "Failed to insert device into event queue: %m");
794 (void) event_queue_assume_block_device_unlocked(manager
, dev
);
799 static int on_worker(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
800 Manager
*manager
= ASSERT_PTR(userdata
);
804 struct iovec iovec
= IOVEC_MAKE(&result
, sizeof(result
));
805 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred
))) control
;
806 struct msghdr msghdr
= {
809 .msg_control
= &control
,
810 .msg_controllen
= sizeof(control
),
816 size
= recvmsg_safe(fd
, &msghdr
, MSG_DONTWAIT
);
820 /* nothing more to read */
823 return log_error_errno(size
, "Failed to receive message: %m");
825 cmsg_close_all(&msghdr
);
827 if (size
!= sizeof(result
)) {
828 log_warning("Ignoring worker message with invalid size %zi bytes", size
);
832 ucred
= CMSG_FIND_DATA(&msghdr
, SOL_SOCKET
, SCM_CREDENTIALS
, struct ucred
);
833 if (!ucred
|| ucred
->pid
<= 0) {
834 log_warning("Ignoring worker message without valid PID");
838 /* lookup worker who sent the signal */
839 worker
= hashmap_get(manager
->workers
, PID_TO_PTR(ucred
->pid
));
841 log_debug("Worker ["PID_FMT
"] returned, but is no longer tracked", ucred
->pid
);
845 if (worker
->state
== WORKER_KILLING
) {
846 worker
->state
= WORKER_KILLED
;
847 (void) kill(worker
->pid
, SIGTERM
);
848 } else if (worker
->state
!= WORKER_KILLED
)
849 worker
->state
= WORKER_IDLE
;
851 /* worker returned */
852 if (result
== EVENT_RESULT_TRY_AGAIN
&&
853 event_requeue(worker
->event
) < 0)
854 udev_broadcast_result(manager
->monitor
, worker
->event
->dev
, -ETIMEDOUT
);
856 /* When event_requeue() succeeds, worker->event is NULL, and event_free() handles NULL gracefully. */
857 event_free(worker
->event
);
863 static void manager_set_default_children_max(Manager
*manager
) {
864 uint64_t cpu_limit
, mem_limit
, cpu_count
= 1;
869 if (manager
->children_max
!= 0)
872 r
= cpus_in_affinity_mask();
874 log_warning_errno(r
, "Failed to determine number of local CPUs, ignoring: %m");
878 cpu_limit
= cpu_count
* 2 + 16;
879 mem_limit
= MAX(physical_memory() / (128*1024*1024), UINT64_C(10));
881 manager
->children_max
= MIN3(cpu_limit
, mem_limit
, WORKER_NUM_MAX
);
882 log_debug("Set children_max to %u", manager
->children_max
);
885 /* receive the udevd message from userspace */
886 static int on_ctrl_msg(UdevCtrl
*uctrl
, UdevCtrlMessageType type
, const UdevCtrlMessageValue
*value
, void *userdata
) {
887 Manager
*manager
= ASSERT_PTR(userdata
);
893 case UDEV_CTRL_SET_LOG_LEVEL
:
894 if ((value
->intval
& LOG_PRIMASK
) != value
->intval
) {
895 log_debug("Received invalid udev control message (SET_LOG_LEVEL, %i), ignoring.", value
->intval
);
899 log_debug("Received udev control message (SET_LOG_LEVEL), setting log_level=%i", value
->intval
);
901 r
= log_get_max_level();
902 if (r
== value
->intval
)
905 log_set_max_level(value
->intval
);
906 manager
->log_level
= value
->intval
;
907 manager_kill_workers(manager
, false);
909 case UDEV_CTRL_STOP_EXEC_QUEUE
:
910 log_debug("Received udev control message (STOP_EXEC_QUEUE)");
911 manager
->stop_exec_queue
= true;
913 case UDEV_CTRL_START_EXEC_QUEUE
:
914 log_debug("Received udev control message (START_EXEC_QUEUE)");
915 manager
->stop_exec_queue
= false;
916 /* It is not necessary to call event_queue_start() here, as it will be called in on_post() if necessary. */
918 case UDEV_CTRL_RELOAD
:
919 log_debug("Received udev control message (RELOAD)");
920 manager_reload(manager
, /* force = */ true);
922 case UDEV_CTRL_SET_ENV
: {
923 _unused_ _cleanup_free_
char *old_val
= NULL
, *old_key
= NULL
;
924 _cleanup_free_
char *key
= NULL
, *val
= NULL
;
927 eq
= strchr(value
->buf
, '=');
929 log_error("Invalid key format '%s'", value
->buf
);
933 key
= strndup(value
->buf
, eq
- value
->buf
);
939 old_val
= hashmap_remove2(manager
->properties
, key
, (void **) &old_key
);
941 r
= hashmap_ensure_allocated(&manager
->properties
, &string_hash_ops
);
949 log_debug("Received udev control message (ENV), unsetting '%s'", key
);
957 log_debug("Received udev control message (ENV), setting '%s=%s'", key
, val
);
959 r
= hashmap_put(manager
->properties
, key
, val
);
967 manager_kill_workers(manager
, false);
970 case UDEV_CTRL_SET_CHILDREN_MAX
:
971 if (value
->intval
< 0) {
972 log_debug("Received invalid udev control message (SET_MAX_CHILDREN, %i), ignoring.", value
->intval
);
976 log_debug("Received udev control message (SET_MAX_CHILDREN), setting children_max=%i", value
->intval
);
977 manager
->children_max
= value
->intval
;
979 /* When 0 is specified, determine the maximum based on the system resources. */
980 manager_set_default_children_max(manager
);
982 notify_ready(manager
);
985 log_debug("Received udev control message (PING)");
988 log_debug("Received udev control message (EXIT)");
989 manager_exit(manager
);
992 log_debug("Received unknown udev control message, ignoring");
998 static int synthesize_change_one(sd_device
*dev
, sd_device
*target
) {
1001 if (DEBUG_LOGGING
) {
1002 const char *syspath
= NULL
;
1003 (void) sd_device_get_syspath(target
, &syspath
);
1004 log_device_debug(dev
, "device is closed, synthesising 'change' on %s", strna(syspath
));
1007 r
= sd_device_trigger(target
, SD_DEVICE_CHANGE
);
1009 return log_device_debug_errno(target
, r
, "Failed to trigger 'change' uevent: %m");
1011 DEVICE_TRACE_POINT(synthetic_change_event
, dev
);
1016 static int synthesize_change(sd_device
*dev
) {
1017 _cleanup_(sd_device_enumerator_unrefp
) sd_device_enumerator
*e
= NULL
;
1018 bool part_table_read
;
1019 const char *sysname
;
1022 r
= sd_device_get_sysname(dev
, &sysname
);
1026 if (startswith(sysname
, "dm-") || block_device_is_whole_disk(dev
) <= 0)
1027 return synthesize_change_one(dev
, dev
);
1029 r
= blockdev_reread_partition_table(dev
);
1031 log_device_debug_errno(dev
, r
, "Failed to re-read partition table, ignoring: %m");
1032 part_table_read
= r
>= 0;
1034 /* search for partitions */
1035 r
= partition_enumerator_new(dev
, &e
);
1039 /* We have partitions and re-read the table, the kernel already sent out a "change"
1040 * event for the disk, and "remove/add" for all partitions. */
1041 if (part_table_read
&& sd_device_enumerator_get_device_first(e
))
1044 /* We have partitions but re-reading the partition table did not work, synthesize
1045 * "change" for the disk and all partitions. */
1046 r
= synthesize_change_one(dev
, dev
);
1047 FOREACH_DEVICE(e
, d
) {
1048 k
= synthesize_change_one(dev
, d
);
1049 if (k
< 0 && r
>= 0)
1056 static int on_inotify(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
1057 Manager
*manager
= ASSERT_PTR(userdata
);
1058 union inotify_event_buffer buffer
;
1062 l
= read(fd
, &buffer
, sizeof(buffer
));
1064 if (ERRNO_IS_TRANSIENT(errno
))
1067 return log_error_errno(errno
, "Failed to read inotify fd: %m");
1070 FOREACH_INOTIFY_EVENT_WARN(e
, buffer
, l
) {
1071 _cleanup_(sd_device_unrefp
) sd_device
*dev
= NULL
;
1072 const char *devnode
;
1074 /* Do not handle IN_IGNORED here. Especially, do not try to call udev_watch_end() from the
1075 * main process. Otherwise, the pair of the symlinks may become inconsistent, and several
1076 * garbage may remain. The old symlinks are removed by a worker that processes the
1077 * corresponding 'remove' uevent;
1078 * udev_event_execute_rules() -> event_execute_rules_on_remove() -> udev_watch_end(). */
1080 if (!FLAGS_SET(e
->mask
, IN_CLOSE_WRITE
))
1083 r
= device_new_from_watch_handle(&dev
, e
->wd
);
1085 /* Device may be removed just after closed. */
1086 log_debug_errno(r
, "Failed to create sd_device object from watch handle, ignoring: %m");
1090 r
= sd_device_get_devname(dev
, &devnode
);
1092 /* Also here, device may be already removed. */
1093 log_device_debug_errno(dev
, r
, "Failed to get device node, ignoring: %m");
1097 log_device_debug(dev
, "Received inotify event for %s.", devnode
);
1099 (void) event_queue_assume_block_device_unlocked(manager
, dev
);
1100 (void) synthesize_change(dev
);
1106 static int on_sigterm(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1107 Manager
*manager
= ASSERT_PTR(userdata
);
1109 manager_exit(manager
);
1114 static int on_sighup(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1115 Manager
*manager
= ASSERT_PTR(userdata
);
1117 manager_reload(manager
, /* force = */ true);
1122 static int on_sigchld(sd_event_source
*s
, const siginfo_t
*si
, void *userdata
) {
1123 Worker
*worker
= ASSERT_PTR(userdata
);
1124 Manager
*manager
= ASSERT_PTR(worker
->manager
);
1125 sd_device
*dev
= worker
->event
? ASSERT_PTR(worker
->event
->dev
) : NULL
;
1130 switch (si
->si_code
) {
1132 if (si
->si_status
== 0)
1133 log_device_debug(dev
, "Worker ["PID_FMT
"] exited.", si
->si_pid
);
1135 log_device_warning(dev
, "Worker ["PID_FMT
"] exited with return code %i.",
1136 si
->si_pid
, si
->si_status
);
1137 result
= EVENT_RESULT_EXIT_STATUS_BASE
+ si
->si_status
;
1142 log_device_warning(dev
, "Worker ["PID_FMT
"] terminated by signal %i (%s).",
1143 si
->si_pid
, si
->si_status
, signal_to_string(si
->si_status
));
1144 result
= EVENT_RESULT_SIGNAL_BASE
+ si
->si_status
;
1148 assert_not_reached();
1151 if (result
!= EVENT_RESULT_SUCCESS
&& dev
) {
1152 /* delete state from disk */
1153 device_delete_db(dev
);
1154 device_tag_index(dev
, NULL
, false);
1156 /* Forward kernel event to libudev listeners */
1157 udev_broadcast_result(manager
->monitor
, dev
, result
);
1160 worker_free(worker
);
1165 static int on_post(sd_event_source
*s
, void *userdata
) {
1166 Manager
*manager
= ASSERT_PTR(userdata
);
1168 if (manager
->events
) {
1169 /* Try to process pending events if idle workers exist. Why is this necessary?
1170 * When a worker finished an event and became idle, even if there was a pending event,
1171 * the corresponding device might have been locked and the processing of the event
1172 * delayed for a while, preventing the worker from processing the event immediately.
1173 * Now, the device may be unlocked. Let's try again! */
1174 event_queue_start(manager
);
1178 /* There are no queued events. Let's remove /run/udev/queue and clean up the idle processes. */
1180 if (unlink("/run/udev/queue") < 0) {
1181 if (errno
!= ENOENT
)
1182 log_warning_errno(errno
, "Failed to unlink /run/udev/queue, ignoring: %m");
1184 log_debug("No events are queued, removing /run/udev/queue.");
1186 if (!hashmap_isempty(manager
->workers
)) {
1187 /* There are idle workers */
1188 (void) event_reset_time_relative(manager
->event
, &manager
->kill_workers_event
,
1189 CLOCK_MONOTONIC
, 3 * USEC_PER_SEC
, USEC_PER_SEC
,
1190 on_kill_workers_event
, manager
,
1191 0, "kill-workers-event", false);
1195 /* There are no idle workers. */
1197 if (manager
->udev_node_needs_cleanup
) {
1198 (void) udev_node_cleanup();
1199 manager
->udev_node_needs_cleanup
= false;
1203 return sd_event_exit(manager
->event
, 0);
1205 if (manager
->cgroup
)
1206 /* cleanup possible left-over processes in our cgroup */
1207 (void) cg_kill(manager
->cgroup
, SIGKILL
, CGROUP_IGNORE_SELF
, /* set=*/ NULL
, /* kill_log= */ NULL
, /* userdata= */ NULL
);
1212 Manager
* manager_new(void) {
1215 manager
= new(Manager
, 1);
1219 *manager
= (Manager
) {
1220 .inotify_fd
= -EBADF
,
1221 .worker_watch
= EBADF_PAIR
,
1222 .log_level
= LOG_INFO
,
1223 .resolve_name_timing
= RESOLVE_NAME_EARLY
,
1224 .timeout_usec
= DEFAULT_WORKER_TIMEOUT_USEC
,
1225 .timeout_signal
= SIGKILL
,
1231 void manager_adjust_arguments(Manager
*manager
) {
1234 if (manager
->timeout_usec
< MIN_WORKER_TIMEOUT_USEC
) {
1235 log_debug("Timeout (%s) for processing event is too small, using the default: %s",
1236 FORMAT_TIMESPAN(manager
->timeout_usec
, 1),
1237 FORMAT_TIMESPAN(DEFAULT_WORKER_TIMEOUT_USEC
, 1));
1239 manager
->timeout_usec
= DEFAULT_WORKER_TIMEOUT_USEC
;
1242 if (manager
->exec_delay_usec
>= manager
->timeout_usec
) {
1243 log_debug("Delay (%s) for executing RUN= commands is too large compared with the timeout (%s) for event execution, ignoring the delay.",
1244 FORMAT_TIMESPAN(manager
->exec_delay_usec
, 1),
1245 FORMAT_TIMESPAN(manager
->timeout_usec
, 1));
1247 manager
->exec_delay_usec
= 0;
1251 int manager_init(Manager
*manager
, int fd_ctrl
, int fd_uevent
) {
1252 _cleanup_free_
char *cgroup
= NULL
;
1257 r
= udev_ctrl_new_from_fd(&manager
->ctrl
, fd_ctrl
);
1259 return log_error_errno(r
, "Failed to initialize udev control socket: %m");
1261 r
= udev_ctrl_enable_receiving(manager
->ctrl
);
1263 return log_error_errno(r
, "Failed to bind udev control socket: %m");
1265 r
= device_monitor_new_full(&manager
->monitor
, MONITOR_GROUP_KERNEL
, fd_uevent
);
1267 return log_error_errno(r
, "Failed to initialize device monitor: %m");
1269 (void) sd_device_monitor_set_description(manager
->monitor
, "manager");
1271 r
= device_monitor_enable_receiving(manager
->monitor
);
1273 return log_error_errno(r
, "Failed to bind netlink socket: %m");
1275 manager
->log_level
= log_get_max_level();
1277 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 0, &cgroup
);
1279 log_debug_errno(r
, "Failed to get cgroup, ignoring: %m");
1280 else if (endswith(cgroup
, "/udev")) { /* If we are in a subcgroup /udev/ we assume it was delegated to us */
1281 log_debug("Running in delegated subcgroup '%s'.", cgroup
);
1282 manager
->cgroup
= TAKE_PTR(cgroup
);
1288 int manager_main(Manager
*manager
) {
1291 manager_set_default_children_max(manager
);
1293 /* unnamed socket from workers to the main daemon */
1294 r
= socketpair(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0, manager
->worker_watch
);
1296 return log_error_errno(errno
, "Failed to create socketpair for communicating with workers: %m");
1298 fd_worker
= manager
->worker_watch
[READ_END
];
1300 r
= setsockopt_int(fd_worker
, SOL_SOCKET
, SO_PASSCRED
, true);
1302 return log_error_errno(r
, "Failed to enable SO_PASSCRED: %m");
1304 manager
->inotify_fd
= inotify_init1(IN_CLOEXEC
);
1305 if (manager
->inotify_fd
< 0)
1306 return log_error_errno(errno
, "Failed to create inotify descriptor: %m");
1308 udev_watch_restore(manager
->inotify_fd
);
1310 /* block SIGCHLD for listening child events. */
1311 assert_se(sigprocmask_many(SIG_BLOCK
, NULL
, SIGCHLD
, -1) >= 0);
1313 r
= sd_event_default(&manager
->event
);
1315 return log_error_errno(r
, "Failed to allocate event loop: %m");
1317 r
= sd_event_add_signal(manager
->event
, NULL
, SIGINT
| SD_EVENT_SIGNAL_PROCMASK
, on_sigterm
, manager
);
1319 return log_error_errno(r
, "Failed to create SIGINT event source: %m");
1321 r
= sd_event_add_signal(manager
->event
, NULL
, SIGTERM
| SD_EVENT_SIGNAL_PROCMASK
, on_sigterm
, manager
);
1323 return log_error_errno(r
, "Failed to create SIGTERM event source: %m");
1325 r
= sd_event_add_signal(manager
->event
, NULL
, SIGHUP
| SD_EVENT_SIGNAL_PROCMASK
, on_sighup
, manager
);
1327 return log_error_errno(r
, "Failed to create SIGHUP event source: %m");
1329 r
= sd_event_set_watchdog(manager
->event
, true);
1331 return log_error_errno(r
, "Failed to create watchdog event source: %m");
1333 r
= udev_ctrl_attach_event(manager
->ctrl
, manager
->event
);
1335 return log_error_errno(r
, "Failed to attach event to udev control: %m");
1337 r
= udev_ctrl_start(manager
->ctrl
, on_ctrl_msg
, manager
);
1339 return log_error_errno(r
, "Failed to start udev control: %m");
1341 /* This needs to be after the inotify and uevent handling, to make sure
1342 * that the ping is send back after fully processing the pending uevents
1343 * (including the synthetic ones we may create due to inotify events).
1345 r
= sd_event_source_set_priority(udev_ctrl_get_event_source(manager
->ctrl
), SD_EVENT_PRIORITY_IDLE
);
1347 return log_error_errno(r
, "Failed to set IDLE event priority for udev control event source: %m");
1349 r
= sd_event_add_io(manager
->event
, &manager
->inotify_event
, manager
->inotify_fd
, EPOLLIN
, on_inotify
, manager
);
1351 return log_error_errno(r
, "Failed to create inotify event source: %m");
1353 r
= sd_device_monitor_attach_event(manager
->monitor
, manager
->event
);
1355 return log_error_errno(r
, "Failed to attach event to device monitor: %m");
1357 r
= sd_device_monitor_start(manager
->monitor
, on_uevent
, manager
);
1359 return log_error_errno(r
, "Failed to start device monitor: %m");
1361 r
= sd_event_add_io(manager
->event
, NULL
, fd_worker
, EPOLLIN
, on_worker
, manager
);
1363 return log_error_errno(r
, "Failed to create worker event source: %m");
1365 r
= sd_event_add_post(manager
->event
, NULL
, on_post
, manager
);
1367 return log_error_errno(r
, "Failed to create post event source: %m");
1369 /* Eventually, we probably want to do more here on memory pressure, for example, kill idle workers immediately */
1370 r
= sd_event_add_memory_pressure(manager
->event
, &manager
->memory_pressure_event_source
, NULL
, NULL
);
1372 log_full_errno(ERRNO_IS_NOT_SUPPORTED(r
) || ERRNO_IS_PRIVILEGE(r
) || (r
== -EHOSTDOWN
) ? LOG_DEBUG
: LOG_WARNING
, r
,
1373 "Failed to allocate memory pressure watch, ignoring: %m");
1375 r
= sd_event_add_signal(manager
->event
, &manager
->memory_pressure_event_source
,
1376 (SIGRTMIN
+18) | SD_EVENT_SIGNAL_PROCMASK
, sigrtmin18_handler
, NULL
);
1378 return log_error_errno(r
, "Failed to allocate SIGRTMIN+18 event source, ignoring: %m");
1380 manager
->last_usec
= now(CLOCK_MONOTONIC
);
1382 udev_builtin_init();
1384 r
= udev_rules_load(&manager
->rules
, manager
->resolve_name_timing
);
1386 return log_error_errno(r
, "Failed to read udev rules: %m");
1388 r
= udev_rules_apply_static_dev_perms(manager
->rules
);
1390 log_warning_errno(r
, "Failed to apply permissions on static device nodes, ignoring: %m");
1392 notify_ready(manager
);
1394 r
= sd_event_loop(manager
->event
);
1396 log_error_errno(r
, "Event loop failed: %m");
1398 (void) sd_notify(/* unset= */ false, NOTIFY_STOPPING
);