1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 #include "blockdev-util.h"
4 #include "cgroup-util.h"
5 #include "common-signal.h"
6 #include "cpu-set-util.h"
7 #include "daemon-util.h"
8 #include "device-monitor-private.h"
9 #include "device-private.h"
10 #include "device-util.h"
11 #include "errno-list.h"
12 #include "event-util.h"
16 #include "inotify-util.h"
17 #include "iovec-util.h"
18 #include "limits-util.h"
21 #include "process-util.h"
22 #include "selinux-util.h"
23 #include "signal-util.h"
24 #include "socket-util.h"
25 #include "string-util.h"
26 #include "syslog-util.h"
27 #include "udev-builtin.h"
28 #include "udev-ctrl.h"
29 #include "udev-event.h"
30 #include "udev-manager.h"
31 #include "udev-node.h"
32 #include "udev-spawn.h"
33 #include "udev-trace.h"
34 #include "udev-util.h"
35 #include "udev-watch.h"
36 #include "udev-worker.h"
38 #define WORKER_NUM_MAX UINT64_C(2048)
40 #define EVENT_RETRY_INTERVAL_USEC (200 * USEC_PER_MSEC)
41 #define EVENT_RETRY_TIMEOUT_USEC (3 * USEC_PER_MINUTE)
43 typedef enum EventState
{
49 typedef struct Event
{
56 sd_device_action_t action
;
58 uint64_t blocker_seqnum
;
61 const char *devpath_old
;
64 /* Used when the device is locked by another program. */
65 usec_t retry_again_next_usec
;
66 usec_t retry_again_timeout_usec
;
67 sd_event_source
*retry_event_source
;
69 sd_event_source
*timeout_warning_event
;
70 sd_event_source
*timeout_event
;
72 LIST_FIELDS(Event
, event
);
75 typedef enum WorkerState
{
83 typedef struct Worker
{
86 sd_event_source
*child_event_source
;
87 sd_device_monitor
*monitor
;
92 static Event
*event_free(Event
*event
) {
96 assert(event
->manager
);
98 LIST_REMOVE(event
, event
->manager
->events
, event
);
99 sd_device_unref(event
->dev
);
101 sd_event_source_unref(event
->retry_event_source
);
102 sd_event_source_unref(event
->timeout_warning_event
);
103 sd_event_source_unref(event
->timeout_event
);
106 event
->worker
->event
= NULL
;
111 static void event_queue_cleanup(Manager
*manager
, EventState match_state
) {
112 LIST_FOREACH(event
, event
, manager
->events
) {
113 if (match_state
!= EVENT_UNDEF
&& match_state
!= event
->state
)
120 static Worker
*worker_free(Worker
*worker
) {
125 hashmap_remove(worker
->manager
->workers
, PID_TO_PTR(worker
->pid
));
127 sd_event_source_unref(worker
->child_event_source
);
128 sd_device_monitor_unref(worker
->monitor
);
129 event_free(worker
->event
);
131 return mfree(worker
);
134 DEFINE_TRIVIAL_CLEANUP_FUNC(Worker
*, worker_free
);
135 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(worker_hash_op
, void, trivial_hash_func
, trivial_compare_func
, Worker
, worker_free
);
137 Manager
* manager_free(Manager
*manager
) {
143 hashmap_free_free_free(manager
->properties
);
144 udev_rules_free(manager
->rules
);
146 hashmap_free(manager
->workers
);
147 event_queue_cleanup(manager
, EVENT_UNDEF
);
149 safe_close(manager
->inotify_fd
);
150 safe_close_pair(manager
->worker_watch
);
152 sd_device_monitor_unref(manager
->monitor
);
153 udev_ctrl_unref(manager
->ctrl
);
155 sd_event_source_unref(manager
->inotify_event
);
156 sd_event_source_unref(manager
->kill_workers_event
);
157 sd_event_source_unref(manager
->memory_pressure_event_source
);
158 sd_event_source_unref(manager
->sigrtmin18_event_source
);
159 sd_event_unref(manager
->event
);
161 free(manager
->cgroup
);
162 return mfree(manager
);
165 static int on_sigchld(sd_event_source
*s
, const siginfo_t
*si
, void *userdata
);
167 static int worker_new(Worker
**ret
, Manager
*manager
, sd_device_monitor
*worker_monitor
, pid_t pid
) {
168 _cleanup_(worker_freep
) Worker
*worker
= NULL
;
173 assert(worker_monitor
);
176 /* close monitor, but keep address around */
177 device_monitor_disconnect(worker_monitor
);
179 worker
= new(Worker
, 1);
184 .monitor
= sd_device_monitor_ref(worker_monitor
),
188 r
= sd_event_add_child(manager
->event
, &worker
->child_event_source
, pid
, WEXITED
, on_sigchld
, worker
);
192 r
= hashmap_ensure_put(&manager
->workers
, &worker_hash_op
, PID_TO_PTR(pid
), worker
);
196 worker
->manager
= manager
;
198 *ret
= TAKE_PTR(worker
);
202 static void manager_kill_workers(Manager
*manager
, bool force
) {
207 HASHMAP_FOREACH(worker
, manager
->workers
) {
208 if (worker
->state
== WORKER_KILLED
)
211 if (worker
->state
== WORKER_RUNNING
&& !force
) {
212 worker
->state
= WORKER_KILLING
;
216 worker
->state
= WORKER_KILLED
;
217 (void) kill(worker
->pid
, SIGTERM
);
221 static void manager_exit(Manager
*manager
) {
224 manager
->exit
= true;
226 (void) sd_notify(/* unset= */ false, NOTIFY_STOPPING
);
228 /* close sources of new events and discard buffered events */
229 manager
->ctrl
= udev_ctrl_unref(manager
->ctrl
);
231 manager
->inotify_event
= sd_event_source_disable_unref(manager
->inotify_event
);
232 manager
->inotify_fd
= safe_close(manager
->inotify_fd
);
234 manager
->monitor
= sd_device_monitor_unref(manager
->monitor
);
236 /* discard queued events and kill workers */
237 event_queue_cleanup(manager
, EVENT_QUEUED
);
238 manager_kill_workers(manager
, true);
241 static void notify_ready(Manager
*manager
) {
246 r
= sd_notifyf(/* unset= */ false,
248 "STATUS=Processing with %u children at max", manager
->children_max
);
250 log_warning_errno(r
, "Failed to send readiness notification, ignoring: %m");
253 /* reload requested, HUP signal received, rules changed, builtin changed */
254 static void manager_reload(Manager
*manager
, bool force
) {
255 _cleanup_(udev_rules_freep
) UdevRules
*rules
= NULL
;
261 assert_se(sd_event_now(manager
->event
, CLOCK_MONOTONIC
, &now_usec
) >= 0);
262 if (!force
&& now_usec
< usec_add(manager
->last_usec
, 3 * USEC_PER_SEC
))
263 /* check for changed config, every 3 seconds at most */
265 manager
->last_usec
= now_usec
;
267 /* Reload SELinux label database, to make the child inherit the up-to-date database. */
268 mac_selinux_maybe_reload();
270 /* Nothing changed. It is not necessary to reload. */
271 if (!udev_rules_should_reload(manager
->rules
) && !udev_builtin_should_reload()) {
276 /* If we eat this up, then tell our service manager to just continue */
277 (void) sd_notifyf(/* unset= */ false,
279 "STATUS=Skipping configuration reloading, nothing changed.\n"
280 "MONOTONIC_USEC=" USEC_FMT
, now(CLOCK_MONOTONIC
));
282 (void) sd_notifyf(/* unset= */ false,
284 "STATUS=Flushing configuration...\n"
285 "MONOTONIC_USEC=" USEC_FMT
, now(CLOCK_MONOTONIC
));
287 manager_kill_workers(manager
, false);
292 r
= udev_rules_load(&rules
, manager
->resolve_name_timing
);
294 log_warning_errno(r
, "Failed to read udev rules, using the previously loaded rules, ignoring: %m");
296 udev_rules_free_and_replace(manager
->rules
, rules
);
299 notify_ready(manager
);
302 static int on_kill_workers_event(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
303 Manager
*manager
= ASSERT_PTR(userdata
);
305 log_debug("Cleanup idle workers");
306 manager_kill_workers(manager
, false);
311 static int on_event_timeout(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
312 Event
*event
= ASSERT_PTR(userdata
);
314 assert(event
->manager
);
315 assert(event
->worker
);
317 kill_and_sigcont(event
->worker
->pid
, event
->manager
->timeout_signal
);
318 event
->worker
->state
= WORKER_KILLED
;
320 log_device_error(event
->dev
, "Worker ["PID_FMT
"] processing SEQNUM=%"PRIu64
" killed", event
->worker
->pid
, event
->seqnum
);
325 static int on_event_timeout_warning(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
326 Event
*event
= ASSERT_PTR(userdata
);
328 assert(event
->worker
);
330 log_device_warning(event
->dev
, "Worker ["PID_FMT
"] processing SEQNUM=%"PRIu64
" is taking a long time", event
->worker
->pid
, event
->seqnum
);
335 static void worker_attach_event(Worker
*worker
, Event
*event
) {
336 Manager
*manager
= ASSERT_PTR(ASSERT_PTR(worker
)->manager
);
337 sd_event
*e
= ASSERT_PTR(manager
->event
);
340 assert(!event
->worker
);
341 assert(!worker
->event
);
343 worker
->state
= WORKER_RUNNING
;
344 worker
->event
= event
;
345 event
->state
= EVENT_RUNNING
;
346 event
->worker
= worker
;
348 (void) sd_event_add_time_relative(e
, &event
->timeout_warning_event
, CLOCK_MONOTONIC
,
349 udev_warn_timeout(manager
->timeout_usec
), USEC_PER_SEC
,
350 on_event_timeout_warning
, event
);
352 (void) sd_event_add_time_relative(e
, &event
->timeout_event
, CLOCK_MONOTONIC
,
353 manager
->timeout_usec
, USEC_PER_SEC
,
354 on_event_timeout
, event
);
357 static int worker_spawn(Manager
*manager
, Event
*event
) {
358 _cleanup_(sd_device_monitor_unrefp
) sd_device_monitor
*worker_monitor
= NULL
;
363 /* listen for new events */
364 r
= device_monitor_new_full(&worker_monitor
, MONITOR_GROUP_NONE
, -1);
368 (void) sd_device_monitor_set_description(worker_monitor
, "worker");
370 /* allow the main daemon netlink address to send devices to the worker */
371 r
= device_monitor_allow_unicast_sender(worker_monitor
, manager
->monitor
);
373 return log_error_errno(r
, "Worker: Failed to set unicast sender: %m");
375 r
= device_monitor_enable_receiving(worker_monitor
);
377 return log_error_errno(r
, "Worker: Failed to enable receiving of device: %m");
379 r
= safe_fork("(udev-worker)", FORK_DEATHSIG_SIGTERM
, &pid
);
381 event
->state
= EVENT_QUEUED
;
382 return log_error_errno(r
, "Failed to fork() worker: %m");
385 _cleanup_(udev_worker_done
) UdevWorker w
= {
386 .monitor
= TAKE_PTR(worker_monitor
),
387 .properties
= TAKE_PTR(manager
->properties
),
388 .rules
= TAKE_PTR(manager
->rules
),
389 .pipe_fd
= TAKE_FD(manager
->worker_watch
[WRITE_END
]),
390 .inotify_fd
= TAKE_FD(manager
->inotify_fd
),
391 .exec_delay_usec
= manager
->exec_delay_usec
,
392 .timeout_usec
= manager
->timeout_usec
,
393 .timeout_signal
= manager
->timeout_signal
,
394 .log_level
= manager
->log_level
,
395 .blockdev_read_only
= manager
->blockdev_read_only
,
399 r
= udev_worker_main(&w
, event
->dev
);
401 _exit(r
< 0 ? EXIT_FAILURE
: EXIT_SUCCESS
);
404 r
= worker_new(&worker
, manager
, worker_monitor
, pid
);
406 return log_error_errno(r
, "Failed to create worker object: %m");
408 worker_attach_event(worker
, event
);
410 log_device_debug(event
->dev
, "Worker ["PID_FMT
"] is forked for processing SEQNUM=%"PRIu64
".", pid
, event
->seqnum
);
414 static int event_run(Event
*event
) {
415 static bool log_children_max_reached
= true;
421 assert(event
->manager
);
423 log_device_uevent(event
->dev
, "Device ready for processing");
425 (void) event_source_disable(event
->retry_event_source
);
427 manager
= event
->manager
;
428 HASHMAP_FOREACH(worker
, manager
->workers
) {
429 if (worker
->state
!= WORKER_IDLE
)
432 r
= device_monitor_send_device(manager
->monitor
, worker
->monitor
, event
->dev
);
434 log_device_error_errno(event
->dev
, r
, "Worker ["PID_FMT
"] did not accept message, killing the worker: %m",
436 (void) kill(worker
->pid
, SIGKILL
);
437 worker
->state
= WORKER_KILLED
;
440 worker_attach_event(worker
, event
);
441 return 1; /* event is now processing. */
444 if (hashmap_size(manager
->workers
) >= manager
->children_max
) {
445 /* Avoid spamming the debug logs if the limit is already reached and
446 * many events still need to be processed */
447 if (log_children_max_reached
&& manager
->children_max
> 1) {
448 log_debug("Maximum number (%u) of children reached.", hashmap_size(manager
->workers
));
449 log_children_max_reached
= false;
451 return 0; /* no free worker */
454 /* Re-enable the debug message for the next batch of events */
455 log_children_max_reached
= true;
457 /* start new worker and pass initial device */
458 r
= worker_spawn(manager
, event
);
462 return 1; /* event is now processing. */
465 bool devpath_conflict(const char *a
, const char *b
) {
466 /* This returns true when two paths are equivalent, or one is a child of another. */
471 for (; *a
!= '\0' && *b
!= '\0'; a
++, b
++)
475 return *a
== '/' || *b
== '/' || *a
== *b
;
478 static int event_is_blocked(Event
*event
) {
479 Event
*loop_event
= NULL
;
482 /* lookup event for identical, parent, child device */
485 assert(event
->manager
);
486 assert(event
->blocker_seqnum
<= event
->seqnum
);
488 if (event
->retry_again_next_usec
> 0) {
491 r
= sd_event_now(event
->manager
->event
, CLOCK_BOOTTIME
, &now_usec
);
495 if (event
->retry_again_next_usec
> now_usec
)
499 if (event
->blocker_seqnum
== event
->seqnum
)
500 /* we have checked previously and no blocker found */
503 LIST_FOREACH(event
, e
, event
->manager
->events
) {
506 /* we already found a later event, earlier cannot block us, no need to check again */
507 if (loop_event
->seqnum
< event
->blocker_seqnum
)
510 /* event we checked earlier still exists, no need to check again */
511 if (loop_event
->seqnum
== event
->blocker_seqnum
)
514 /* found ourself, no later event can block us */
515 if (loop_event
->seqnum
>= event
->seqnum
)
518 /* found event we have not checked */
523 assert(loop_event
->seqnum
> event
->blocker_seqnum
&&
524 loop_event
->seqnum
< event
->seqnum
);
526 /* check if queue contains events we depend on */
527 LIST_FOREACH(event
, e
, loop_event
) {
530 /* found ourself, no later event can block us */
531 if (loop_event
->seqnum
>= event
->seqnum
)
534 if (streq_ptr(loop_event
->id
, event
->id
))
537 if (devpath_conflict(event
->devpath
, loop_event
->devpath
) ||
538 devpath_conflict(event
->devpath
, loop_event
->devpath_old
) ||
539 devpath_conflict(event
->devpath_old
, loop_event
->devpath
))
542 if (event
->devnode
&& streq_ptr(event
->devnode
, loop_event
->devnode
))
548 log_device_debug(event
->dev
, "SEQNUM=%" PRIu64
" blocked by SEQNUM=%" PRIu64
,
549 event
->seqnum
, loop_event
->seqnum
);
551 event
->blocker_seqnum
= loop_event
->seqnum
;
555 event
->blocker_seqnum
= event
->seqnum
;
559 static int event_queue_start(Manager
*manager
) {
564 if (!manager
->events
|| manager
->exit
|| manager
->stop_exec_queue
)
567 /* To make the stack directory /run/udev/links cleaned up later. */
568 manager
->udev_node_needs_cleanup
= true;
570 r
= event_source_disable(manager
->kill_workers_event
);
572 log_warning_errno(r
, "Failed to disable event source for cleaning up idle workers, ignoring: %m");
574 manager_reload(manager
, /* force = */ false);
576 LIST_FOREACH(event
, event
, manager
->events
) {
577 if (event
->state
!= EVENT_QUEUED
)
580 /* do not start event if parent or child event is still running or queued */
581 r
= event_is_blocked(event
);
585 log_device_warning_errno(event
->dev
, r
,
586 "Failed to check dependencies for event (SEQNUM=%"PRIu64
", ACTION=%s), "
587 "assuming there is no blocking event, ignoring: %m",
589 strna(device_action_to_string(event
->action
)));
591 r
= event_run(event
);
592 if (r
<= 0) /* 0 means there are no idle workers. Let's escape from the loop. */
599 static int on_event_retry(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
600 /* This does nothing. The on_post() callback will start the event if there exists an idle worker. */
604 static int event_requeue(Event
*event
) {
609 assert(event
->manager
);
610 assert(event
->manager
->event
);
612 event
->timeout_warning_event
= sd_event_source_disable_unref(event
->timeout_warning_event
);
613 event
->timeout_event
= sd_event_source_disable_unref(event
->timeout_event
);
615 /* add a short delay to suppress busy loop */
616 r
= sd_event_now(event
->manager
->event
, CLOCK_BOOTTIME
, &now_usec
);
618 return log_device_warning_errno(event
->dev
, r
,
619 "Failed to get current time, "
620 "skipping event (SEQNUM=%"PRIu64
", ACTION=%s): %m",
621 event
->seqnum
, strna(device_action_to_string(event
->action
)));
623 if (event
->retry_again_timeout_usec
> 0 && event
->retry_again_timeout_usec
<= now_usec
)
624 return log_device_warning_errno(event
->dev
, SYNTHETIC_ERRNO(ETIMEDOUT
),
625 "The underlying block device is locked by a process more than %s, "
626 "skipping event (SEQNUM=%"PRIu64
", ACTION=%s).",
627 FORMAT_TIMESPAN(EVENT_RETRY_TIMEOUT_USEC
, USEC_PER_MINUTE
),
628 event
->seqnum
, strna(device_action_to_string(event
->action
)));
630 event
->retry_again_next_usec
= usec_add(now_usec
, EVENT_RETRY_INTERVAL_USEC
);
631 if (event
->retry_again_timeout_usec
== 0)
632 event
->retry_again_timeout_usec
= usec_add(now_usec
, EVENT_RETRY_TIMEOUT_USEC
);
634 r
= event_reset_time_relative(event
->manager
->event
, &event
->retry_event_source
,
635 CLOCK_MONOTONIC
, EVENT_RETRY_INTERVAL_USEC
, 0,
636 on_event_retry
, NULL
,
637 0, "retry-event", true);
639 return log_device_warning_errno(event
->dev
, r
, "Failed to reset timer event source for retrying event, "
640 "skipping event (SEQNUM=%"PRIu64
", ACTION=%s): %m",
641 event
->seqnum
, strna(device_action_to_string(event
->action
)));
643 if (event
->worker
&& event
->worker
->event
== event
)
644 event
->worker
->event
= NULL
;
645 event
->worker
= NULL
;
647 event
->state
= EVENT_QUEUED
;
651 static int event_queue_assume_block_device_unlocked(Manager
*manager
, sd_device
*dev
) {
655 /* When a new event for a block device is queued or we get an inotify event, assume that the
656 * device is not locked anymore. The assumption may not be true, but that should not cause any
657 * issues, as in that case events will be requeued soon. */
659 r
= udev_get_whole_disk(dev
, NULL
, &devname
);
663 LIST_FOREACH(event
, event
, manager
->events
) {
664 const char *event_devname
;
666 if (event
->state
!= EVENT_QUEUED
)
669 if (event
->retry_again_next_usec
== 0)
672 if (udev_get_whole_disk(event
->dev
, NULL
, &event_devname
) <= 0)
675 if (!streq(devname
, event_devname
))
678 event
->retry_again_next_usec
= 0;
684 static int event_queue_insert(Manager
*manager
, sd_device
*dev
) {
685 const char *devpath
, *devpath_old
= NULL
, *id
= NULL
, *devnode
= NULL
;
686 sd_device_action_t action
;
694 /* We only accepts devices received by device monitor. */
695 r
= sd_device_get_seqnum(dev
, &seqnum
);
699 r
= sd_device_get_action(dev
, &action
);
703 r
= sd_device_get_devpath(dev
, &devpath
);
707 r
= sd_device_get_property_value(dev
, "DEVPATH_OLD", &devpath_old
);
708 if (r
< 0 && r
!= -ENOENT
)
711 r
= device_get_device_id(dev
, &id
);
712 if (r
< 0 && r
!= -ENOENT
)
715 r
= sd_device_get_devname(dev
, &devnode
);
716 if (r
< 0 && r
!= -ENOENT
)
719 event
= new(Event
, 1);
725 .dev
= sd_device_ref(dev
),
730 .devpath_old
= devpath_old
,
732 .state
= EVENT_QUEUED
,
735 if (!manager
->events
) {
736 r
= touch("/run/udev/queue");
738 log_warning_errno(r
, "Failed to touch /run/udev/queue, ignoring: %m");
741 LIST_APPEND(event
, manager
->events
, event
);
743 log_device_uevent(dev
, "Device is queued");
748 static int on_uevent(sd_device_monitor
*monitor
, sd_device
*dev
, void *userdata
) {
749 Manager
*manager
= ASSERT_PTR(userdata
);
752 DEVICE_TRACE_POINT(kernel_uevent_received
, dev
);
754 device_ensure_usec_initialized(dev
, NULL
);
756 r
= event_queue_insert(manager
, dev
);
758 log_device_error_errno(dev
, r
, "Failed to insert device into event queue: %m");
762 (void) event_queue_assume_block_device_unlocked(manager
, dev
);
767 static int on_worker(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
768 Manager
*manager
= ASSERT_PTR(userdata
);
772 struct iovec iovec
= IOVEC_MAKE(&result
, sizeof(result
));
773 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred
))) control
;
774 struct msghdr msghdr
= {
777 .msg_control
= &control
,
778 .msg_controllen
= sizeof(control
),
784 size
= recvmsg_safe(fd
, &msghdr
, MSG_DONTWAIT
);
788 /* nothing more to read */
791 return log_error_errno(size
, "Failed to receive message: %m");
793 cmsg_close_all(&msghdr
);
795 if (size
!= sizeof(result
)) {
796 log_warning("Ignoring worker message with invalid size %zi bytes", size
);
800 ucred
= CMSG_FIND_DATA(&msghdr
, SOL_SOCKET
, SCM_CREDENTIALS
, struct ucred
);
801 if (!ucred
|| ucred
->pid
<= 0) {
802 log_warning("Ignoring worker message without valid PID");
806 /* lookup worker who sent the signal */
807 worker
= hashmap_get(manager
->workers
, PID_TO_PTR(ucred
->pid
));
809 log_debug("Worker ["PID_FMT
"] returned, but is no longer tracked", ucred
->pid
);
813 if (worker
->state
== WORKER_KILLING
) {
814 worker
->state
= WORKER_KILLED
;
815 (void) kill(worker
->pid
, SIGTERM
);
816 } else if (worker
->state
!= WORKER_KILLED
)
817 worker
->state
= WORKER_IDLE
;
819 /* worker returned */
820 if (result
== EVENT_RESULT_TRY_AGAIN
&&
821 event_requeue(worker
->event
) < 0)
822 udev_broadcast_result(manager
->monitor
, worker
->event
->dev
, -ETIMEDOUT
);
824 /* When event_requeue() succeeds, worker->event is NULL, and event_free() handles NULL gracefully. */
825 event_free(worker
->event
);
831 static void manager_set_default_children_max(Manager
*manager
) {
832 uint64_t cpu_limit
, mem_limit
, cpu_count
= 1;
837 if (manager
->children_max
!= 0)
840 r
= cpus_in_affinity_mask();
842 log_warning_errno(r
, "Failed to determine number of local CPUs, ignoring: %m");
846 cpu_limit
= cpu_count
* 2 + 16;
847 mem_limit
= MAX(physical_memory() / (128*1024*1024), UINT64_C(10));
849 manager
->children_max
= MIN3(cpu_limit
, mem_limit
, WORKER_NUM_MAX
);
850 log_debug("Set children_max to %u", manager
->children_max
);
853 /* receive the udevd message from userspace */
854 static int on_ctrl_msg(UdevCtrl
*uctrl
, UdevCtrlMessageType type
, const UdevCtrlMessageValue
*value
, void *userdata
) {
855 Manager
*manager
= ASSERT_PTR(userdata
);
861 case UDEV_CTRL_SET_LOG_LEVEL
:
862 if ((value
->intval
& LOG_PRIMASK
) != value
->intval
) {
863 log_debug("Received invalid udev control message (SET_LOG_LEVEL, %i), ignoring.", value
->intval
);
867 log_debug("Received udev control message (SET_LOG_LEVEL), setting log_level=%i", value
->intval
);
869 r
= log_get_max_level();
870 if (r
== value
->intval
)
873 log_set_max_level(value
->intval
);
874 manager
->log_level
= value
->intval
;
875 manager_kill_workers(manager
, false);
877 case UDEV_CTRL_STOP_EXEC_QUEUE
:
878 log_debug("Received udev control message (STOP_EXEC_QUEUE)");
879 manager
->stop_exec_queue
= true;
881 case UDEV_CTRL_START_EXEC_QUEUE
:
882 log_debug("Received udev control message (START_EXEC_QUEUE)");
883 manager
->stop_exec_queue
= false;
884 /* It is not necessary to call event_queue_start() here, as it will be called in on_post() if necessary. */
886 case UDEV_CTRL_RELOAD
:
887 log_debug("Received udev control message (RELOAD)");
888 manager_reload(manager
, /* force = */ true);
890 case UDEV_CTRL_SET_ENV
: {
891 _unused_ _cleanup_free_
char *old_val
= NULL
, *old_key
= NULL
;
892 _cleanup_free_
char *key
= NULL
, *val
= NULL
;
895 eq
= strchr(value
->buf
, '=');
897 log_error("Invalid key format '%s'", value
->buf
);
901 key
= strndup(value
->buf
, eq
- value
->buf
);
907 old_val
= hashmap_remove2(manager
->properties
, key
, (void **) &old_key
);
909 r
= hashmap_ensure_allocated(&manager
->properties
, &string_hash_ops
);
917 log_debug("Received udev control message (ENV), unsetting '%s'", key
);
925 log_debug("Received udev control message (ENV), setting '%s=%s'", key
, val
);
927 r
= hashmap_put(manager
->properties
, key
, val
);
935 manager_kill_workers(manager
, false);
938 case UDEV_CTRL_SET_CHILDREN_MAX
:
939 if (value
->intval
< 0) {
940 log_debug("Received invalid udev control message (SET_MAX_CHILDREN, %i), ignoring.", value
->intval
);
944 log_debug("Received udev control message (SET_MAX_CHILDREN), setting children_max=%i", value
->intval
);
945 manager
->children_max
= value
->intval
;
947 /* When 0 is specified, determine the maximum based on the system resources. */
948 manager_set_default_children_max(manager
);
950 notify_ready(manager
);
953 log_debug("Received udev control message (PING)");
956 log_debug("Received udev control message (EXIT)");
957 manager_exit(manager
);
960 log_debug("Received unknown udev control message, ignoring");
966 static int synthesize_change_one(sd_device
*dev
, sd_device
*target
) {
970 const char *syspath
= NULL
;
971 (void) sd_device_get_syspath(target
, &syspath
);
972 log_device_debug(dev
, "device is closed, synthesising 'change' on %s", strna(syspath
));
975 r
= sd_device_trigger(target
, SD_DEVICE_CHANGE
);
977 return log_device_debug_errno(target
, r
, "Failed to trigger 'change' uevent: %m");
979 DEVICE_TRACE_POINT(synthetic_change_event
, dev
);
984 static int synthesize_change(sd_device
*dev
) {
985 _cleanup_(sd_device_enumerator_unrefp
) sd_device_enumerator
*e
= NULL
;
986 bool part_table_read
;
990 r
= sd_device_get_sysname(dev
, &sysname
);
994 if (startswith(sysname
, "dm-") || block_device_is_whole_disk(dev
) <= 0)
995 return synthesize_change_one(dev
, dev
);
997 r
= blockdev_reread_partition_table(dev
);
999 log_device_debug_errno(dev
, r
, "Failed to re-read partition table, ignoring: %m");
1000 part_table_read
= r
>= 0;
1002 /* search for partitions */
1003 r
= partition_enumerator_new(dev
, &e
);
1007 /* We have partitions and re-read the table, the kernel already sent out a "change"
1008 * event for the disk, and "remove/add" for all partitions. */
1009 if (part_table_read
&& sd_device_enumerator_get_device_first(e
))
1012 /* We have partitions but re-reading the partition table did not work, synthesize
1013 * "change" for the disk and all partitions. */
1014 r
= synthesize_change_one(dev
, dev
);
1015 FOREACH_DEVICE(e
, d
) {
1016 k
= synthesize_change_one(dev
, d
);
1017 if (k
< 0 && r
>= 0)
1024 static int on_inotify(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
1025 Manager
*manager
= ASSERT_PTR(userdata
);
1026 union inotify_event_buffer buffer
;
1030 l
= read(fd
, &buffer
, sizeof(buffer
));
1032 if (ERRNO_IS_TRANSIENT(errno
))
1035 return log_error_errno(errno
, "Failed to read inotify fd: %m");
1038 FOREACH_INOTIFY_EVENT_WARN(e
, buffer
, l
) {
1039 _cleanup_(sd_device_unrefp
) sd_device
*dev
= NULL
;
1040 const char *devnode
;
1042 /* Do not handle IN_IGNORED here. Especially, do not try to call udev_watch_end() from the
1043 * main process. Otherwise, the pair of the symlinks may become inconsistent, and several
1044 * garbage may remain. The old symlinks are removed by a worker that processes the
1045 * corresponding 'remove' uevent;
1046 * udev_event_execute_rules() -> event_execute_rules_on_remove() -> udev_watch_end(). */
1048 if (!FLAGS_SET(e
->mask
, IN_CLOSE_WRITE
))
1051 r
= device_new_from_watch_handle(&dev
, e
->wd
);
1053 /* Device may be removed just after closed. */
1054 log_debug_errno(r
, "Failed to create sd_device object from watch handle, ignoring: %m");
1058 r
= sd_device_get_devname(dev
, &devnode
);
1060 /* Also here, device may be already removed. */
1061 log_device_debug_errno(dev
, r
, "Failed to get device node, ignoring: %m");
1065 log_device_debug(dev
, "Received inotify event for %s.", devnode
);
1067 (void) event_queue_assume_block_device_unlocked(manager
, dev
);
1068 (void) synthesize_change(dev
);
1074 static int on_sigterm(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1075 Manager
*manager
= ASSERT_PTR(userdata
);
1077 manager_exit(manager
);
1082 static int on_sighup(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1083 Manager
*manager
= ASSERT_PTR(userdata
);
1085 manager_reload(manager
, /* force = */ true);
1090 static int on_sigchld(sd_event_source
*s
, const siginfo_t
*si
, void *userdata
) {
1091 Worker
*worker
= ASSERT_PTR(userdata
);
1092 Manager
*manager
= ASSERT_PTR(worker
->manager
);
1093 sd_device
*dev
= worker
->event
? ASSERT_PTR(worker
->event
->dev
) : NULL
;
1098 switch (si
->si_code
) {
1100 if (si
->si_status
== 0)
1101 log_device_debug(dev
, "Worker ["PID_FMT
"] exited.", si
->si_pid
);
1103 log_device_warning(dev
, "Worker ["PID_FMT
"] exited with return code %i.",
1104 si
->si_pid
, si
->si_status
);
1105 result
= EVENT_RESULT_EXIT_STATUS_BASE
+ si
->si_status
;
1110 log_device_warning(dev
, "Worker ["PID_FMT
"] terminated by signal %i (%s).",
1111 si
->si_pid
, si
->si_status
, signal_to_string(si
->si_status
));
1112 result
= EVENT_RESULT_SIGNAL_BASE
+ si
->si_status
;
1116 assert_not_reached();
1119 if (result
!= EVENT_RESULT_SUCCESS
&& dev
) {
1120 /* delete state from disk */
1121 device_delete_db(dev
);
1122 device_tag_index(dev
, NULL
, false);
1124 /* Forward kernel event to libudev listeners */
1125 udev_broadcast_result(manager
->monitor
, dev
, result
);
1128 worker_free(worker
);
1133 static int on_post(sd_event_source
*s
, void *userdata
) {
1134 Manager
*manager
= ASSERT_PTR(userdata
);
1136 if (manager
->events
) {
1137 /* Try to process pending events if idle workers exist. Why is this necessary?
1138 * When a worker finished an event and became idle, even if there was a pending event,
1139 * the corresponding device might have been locked and the processing of the event
1140 * delayed for a while, preventing the worker from processing the event immediately.
1141 * Now, the device may be unlocked. Let's try again! */
1142 event_queue_start(manager
);
1146 /* There are no queued events. Let's remove /run/udev/queue and clean up the idle processes. */
1148 if (unlink("/run/udev/queue") < 0) {
1149 if (errno
!= ENOENT
)
1150 log_warning_errno(errno
, "Failed to unlink /run/udev/queue, ignoring: %m");
1152 log_debug("No events are queued, removing /run/udev/queue.");
1154 if (!hashmap_isempty(manager
->workers
)) {
1155 /* There are idle workers */
1156 (void) event_reset_time_relative(manager
->event
, &manager
->kill_workers_event
,
1157 CLOCK_MONOTONIC
, 3 * USEC_PER_SEC
, USEC_PER_SEC
,
1158 on_kill_workers_event
, manager
,
1159 0, "kill-workers-event", false);
1163 /* There are no idle workers. */
1165 if (manager
->udev_node_needs_cleanup
) {
1166 (void) udev_node_cleanup();
1167 manager
->udev_node_needs_cleanup
= false;
1171 return sd_event_exit(manager
->event
, 0);
1173 if (manager
->cgroup
)
1174 /* cleanup possible left-over processes in our cgroup */
1175 (void) cg_kill(manager
->cgroup
, SIGKILL
, CGROUP_IGNORE_SELF
, /* set=*/ NULL
, /* kill_log= */ NULL
, /* userdata= */ NULL
);
1180 Manager
* manager_new(void) {
1183 manager
= new(Manager
, 1);
1187 *manager
= (Manager
) {
1188 .inotify_fd
= -EBADF
,
1189 .worker_watch
= EBADF_PAIR
,
1190 .log_level
= LOG_INFO
,
1191 .resolve_name_timing
= RESOLVE_NAME_EARLY
,
1192 .timeout_usec
= DEFAULT_WORKER_TIMEOUT_USEC
,
1193 .timeout_signal
= SIGKILL
,
1199 void manager_adjust_arguments(Manager
*manager
) {
1202 if (manager
->timeout_usec
< MIN_WORKER_TIMEOUT_USEC
) {
1203 log_debug("Timeout (%s) for processing event is too small, using the default: %s",
1204 FORMAT_TIMESPAN(manager
->timeout_usec
, 1),
1205 FORMAT_TIMESPAN(DEFAULT_WORKER_TIMEOUT_USEC
, 1));
1207 manager
->timeout_usec
= DEFAULT_WORKER_TIMEOUT_USEC
;
1210 if (manager
->exec_delay_usec
>= manager
->timeout_usec
) {
1211 log_debug("Delay (%s) for executing RUN= commands is too large compared with the timeout (%s) for event execution, ignoring the delay.",
1212 FORMAT_TIMESPAN(manager
->exec_delay_usec
, 1),
1213 FORMAT_TIMESPAN(manager
->timeout_usec
, 1));
1215 manager
->exec_delay_usec
= 0;
1219 int manager_init(Manager
*manager
, int fd_ctrl
, int fd_uevent
) {
1220 _cleanup_free_
char *cgroup
= NULL
;
1225 r
= udev_ctrl_new_from_fd(&manager
->ctrl
, fd_ctrl
);
1227 return log_error_errno(r
, "Failed to initialize udev control socket: %m");
1229 r
= udev_ctrl_enable_receiving(manager
->ctrl
);
1231 return log_error_errno(r
, "Failed to bind udev control socket: %m");
1233 r
= device_monitor_new_full(&manager
->monitor
, MONITOR_GROUP_KERNEL
, fd_uevent
);
1235 return log_error_errno(r
, "Failed to initialize device monitor: %m");
1237 (void) sd_device_monitor_set_description(manager
->monitor
, "manager");
1239 r
= device_monitor_enable_receiving(manager
->monitor
);
1241 return log_error_errno(r
, "Failed to bind netlink socket: %m");
1243 manager
->log_level
= log_get_max_level();
1245 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 0, &cgroup
);
1247 log_debug_errno(r
, "Failed to get cgroup, ignoring: %m");
1248 else if (endswith(cgroup
, "/udev")) { /* If we are in a subcgroup /udev/ we assume it was delegated to us */
1249 log_debug("Running in delegated subcgroup '%s'.", cgroup
);
1250 manager
->cgroup
= TAKE_PTR(cgroup
);
1256 int manager_main(Manager
*manager
) {
1259 manager_set_default_children_max(manager
);
1261 /* unnamed socket from workers to the main daemon */
1262 r
= socketpair(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0, manager
->worker_watch
);
1264 return log_error_errno(errno
, "Failed to create socketpair for communicating with workers: %m");
1266 fd_worker
= manager
->worker_watch
[READ_END
];
1268 r
= setsockopt_int(fd_worker
, SOL_SOCKET
, SO_PASSCRED
, true);
1270 return log_error_errno(r
, "Failed to enable SO_PASSCRED: %m");
1272 manager
->inotify_fd
= inotify_init1(IN_CLOEXEC
);
1273 if (manager
->inotify_fd
< 0)
1274 return log_error_errno(errno
, "Failed to create inotify descriptor: %m");
1276 udev_watch_restore(manager
->inotify_fd
);
1278 /* block and listen to all signals on signalfd */
1279 assert_se(sigprocmask_many(SIG_BLOCK
, NULL
, SIGTERM
, SIGINT
, SIGHUP
, SIGCHLD
, SIGRTMIN
+18, -1) >= 0);
1281 r
= sd_event_default(&manager
->event
);
1283 return log_error_errno(r
, "Failed to allocate event loop: %m");
1285 r
= sd_event_add_signal(manager
->event
, NULL
, SIGINT
, on_sigterm
, manager
);
1287 return log_error_errno(r
, "Failed to create SIGINT event source: %m");
1289 r
= sd_event_add_signal(manager
->event
, NULL
, SIGTERM
, on_sigterm
, manager
);
1291 return log_error_errno(r
, "Failed to create SIGTERM event source: %m");
1293 r
= sd_event_add_signal(manager
->event
, NULL
, SIGHUP
, on_sighup
, manager
);
1295 return log_error_errno(r
, "Failed to create SIGHUP event source: %m");
1297 r
= sd_event_set_watchdog(manager
->event
, true);
1299 return log_error_errno(r
, "Failed to create watchdog event source: %m");
1301 r
= udev_ctrl_attach_event(manager
->ctrl
, manager
->event
);
1303 return log_error_errno(r
, "Failed to attach event to udev control: %m");
1305 r
= udev_ctrl_start(manager
->ctrl
, on_ctrl_msg
, manager
);
1307 return log_error_errno(r
, "Failed to start udev control: %m");
1309 /* This needs to be after the inotify and uevent handling, to make sure
1310 * that the ping is send back after fully processing the pending uevents
1311 * (including the synthetic ones we may create due to inotify events).
1313 r
= sd_event_source_set_priority(udev_ctrl_get_event_source(manager
->ctrl
), SD_EVENT_PRIORITY_IDLE
);
1315 return log_error_errno(r
, "Failed to set IDLE event priority for udev control event source: %m");
1317 r
= sd_event_add_io(manager
->event
, &manager
->inotify_event
, manager
->inotify_fd
, EPOLLIN
, on_inotify
, manager
);
1319 return log_error_errno(r
, "Failed to create inotify event source: %m");
1321 r
= sd_device_monitor_attach_event(manager
->monitor
, manager
->event
);
1323 return log_error_errno(r
, "Failed to attach event to device monitor: %m");
1325 r
= sd_device_monitor_start(manager
->monitor
, on_uevent
, manager
);
1327 return log_error_errno(r
, "Failed to start device monitor: %m");
1329 r
= sd_event_add_io(manager
->event
, NULL
, fd_worker
, EPOLLIN
, on_worker
, manager
);
1331 return log_error_errno(r
, "Failed to create worker event source: %m");
1333 r
= sd_event_add_post(manager
->event
, NULL
, on_post
, manager
);
1335 return log_error_errno(r
, "Failed to create post event source: %m");
1337 /* Eventually, we probably want to do more here on memory pressure, for example, kill idle workers immediately */
1338 r
= sd_event_add_memory_pressure(manager
->event
, &manager
->memory_pressure_event_source
, NULL
, NULL
);
1340 log_full_errno(ERRNO_IS_NOT_SUPPORTED(r
) || ERRNO_IS_PRIVILEGE(r
) || (r
== -EHOSTDOWN
) ? LOG_DEBUG
: LOG_WARNING
, r
,
1341 "Failed to allocate memory pressure watch, ignoring: %m");
1343 r
= sd_event_add_signal(manager
->event
, &manager
->memory_pressure_event_source
, SIGRTMIN
+18, sigrtmin18_handler
, NULL
);
1345 return log_error_errno(r
, "Failed to allocate SIGRTMIN+18 event source, ignoring: %m");
1347 manager
->last_usec
= now(CLOCK_MONOTONIC
);
1349 udev_builtin_init();
1351 r
= udev_rules_load(&manager
->rules
, manager
->resolve_name_timing
);
1353 return log_error_errno(r
, "Failed to read udev rules: %m");
1355 r
= udev_rules_apply_static_dev_perms(manager
->rules
);
1357 log_warning_errno(r
, "Failed to apply permissions on static device nodes, ignoring: %m");
1359 notify_ready(manager
);
1361 r
= sd_event_loop(manager
->event
);
1363 log_error_errno(r
, "Event loop failed: %m");
1365 (void) sd_notify(/* unset= */ false, NOTIFY_STOPPING
);