]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/udev/udevd.c
tree-wide: use -EBADF for fd initialization
[thirdparty/systemd.git] / src / udev / udevd.c
CommitLineData
f13467ec 1/* SPDX-License-Identifier: GPL-2.0-or-later */
7fafc032 2/*
810adae9
LP
3 * Copyright © 2004 Chris Friesen <chris_friesen@sympatico.ca>
4 * Copyright © 2009 Canonical Ltd.
5 * Copyright © 2009 Scott James Remnant <scott@netsplit.com>
7fafc032
KS
6 */
7
7fafc032 8#include <errno.h>
618234a5
LP
9#include <fcntl.h>
10#include <getopt.h>
618234a5
LP
11#include <stdbool.h>
12#include <stddef.h>
7fafc032
KS
13#include <stdio.h>
14#include <stdlib.h>
618234a5 15#include <sys/epoll.h>
3ebdb81e 16#include <sys/file.h>
618234a5
LP
17#include <sys/inotify.h>
18#include <sys/ioctl.h>
19#include <sys/mount.h>
1e03b754 20#include <sys/prctl.h>
1e03b754 21#include <sys/signalfd.h>
dc117daa 22#include <sys/stat.h>
618234a5
LP
23#include <sys/time.h>
24#include <sys/wait.h>
25#include <unistd.h>
7fafc032 26
392ef7a2 27#include "sd-daemon.h"
693d371d 28#include "sd-event.h"
8314de1d 29
b5efdb8a 30#include "alloc-util.h"
9409174e 31#include "blockdev-util.h"
a1f4fd38 32#include "cgroup-setup.h"
194bbe33 33#include "cgroup-util.h"
618234a5 34#include "cpu-set-util.h"
5ba2dc25 35#include "dev-setup.h"
7f2e3a14 36#include "device-monitor-private.h"
abde5ea8 37#include "device-private.h"
70068602 38#include "device-util.h"
6467bda5 39#include "errno-list.h"
6d63048a 40#include "event-util.h"
3ffd4af2 41#include "fd-util.h"
a5c32cff 42#include "fileio.h"
f97b34a6 43#include "format-util.h"
f4f15635 44#include "fs-util.h"
a505965d 45#include "hashmap.h"
9e5fd717 46#include "inotify-util.h"
c004493c 47#include "io-util.h"
eefc66aa 48#include "limits-util.h"
40a57716 49#include "list.h"
0c5a109a 50#include "main-func.h"
5ea78a39 51#include "mkdir.h"
618234a5 52#include "netlink-util.h"
6bedfcbb 53#include "parse-util.h"
a1f4fd38 54#include "path-util.h"
294bf0c3 55#include "pretty-print.h"
4e731273 56#include "proc-cmdline.h"
618234a5
LP
57#include "process-util.h"
58#include "selinux-util.h"
59#include "signal-util.h"
8f328d36 60#include "socket-util.h"
07630cea 61#include "string-util.h"
49fe5c09 62#include "strv.h"
5ea78a39 63#include "strxcpyx.h"
46f0fbd8 64#include "syslog-util.h"
63e2d171 65#include "udevd.h"
07a26e42 66#include "udev-builtin.h"
7d68eb1b 67#include "udev-ctrl.h"
25de7aa7 68#include "udev-event.h"
10551728 69#include "udev-node.h"
618234a5 70#include "udev-util.h"
70068602 71#include "udev-watch.h"
ee104e11 72#include "user-util.h"
47350c5f 73#include "version.h"
7fafc032 74
88bd5a32 75#define WORKER_NUM_MAX 2048U
5d354e52
YW
76#define EVENT_RETRY_INTERVAL_USEC (200 * USEC_PER_MSEC)
77#define EVENT_RETRY_TIMEOUT_USEC (3 * USEC_PER_MINUTE)
88bd5a32 78
bba7a484
TG
79static bool arg_debug = false;
80static int arg_daemonize = false;
c4d44cba 81static ResolveNameTiming arg_resolve_name_timing = RESOLVE_NAME_EARLY;
216e8bbe 82static unsigned arg_children_max = 0;
6b92f429 83static usec_t arg_exec_delay_usec = 0;
bba7a484 84static usec_t arg_event_timeout_usec = 180 * USEC_PER_SEC;
e2099267 85static int arg_timeout_signal = SIGKILL;
95ac5230 86static bool arg_blockdev_read_only = false;
c0c6806b 87
d9239923
YW
88typedef struct Event Event;
89typedef struct Worker Worker;
90
c0c6806b 91typedef struct Manager {
693d371d 92 sd_event *event;
c0c6806b 93 Hashmap *workers;
d9239923 94 LIST_HEAD(Event, events);
a1f4fd38 95 char *cgroup;
cb49a4f2 96 pid_t pid; /* the process that originally allocated the manager object */
1a0bd015 97 int log_level;
c0c6806b 98
9a07157d 99 UdevRules *rules;
9b5150b6 100 Hashmap *properties;
c0c6806b 101
0bed242c
YW
102 sd_netlink *rtnl;
103
7f2e3a14 104 sd_device_monitor *monitor;
e0d61dac 105 UdevCtrl *ctrl;
e237d8cb
TG
106 int worker_watch[2];
107
df7ee959
YW
108 /* used by udev-watch */
109 int inotify_fd;
693d371d 110 sd_event_source *inotify_event;
df7ee959 111
eca195ec 112 sd_event_source *kill_workers_event;
693d371d 113
7c4c7e89
TG
114 usec_t last_usec;
115
10551728 116 bool udev_node_needs_cleanup;
481f24d1
YW
117 bool stop_exec_queue;
118 bool exit;
c0c6806b 119} Manager;
1e03b754 120
d9239923 121typedef enum EventState {
912541b0
KS
122 EVENT_UNDEF,
123 EVENT_QUEUED,
124 EVENT_RUNNING,
d9239923 125} EventState;
1e03b754 126
d9239923 127typedef struct Event {
cb49a4f2 128 Manager *manager;
d9239923
YW
129 Worker *worker;
130 EventState state;
eb546b35
YW
131
132 sd_device *dev;
eb546b35 133
0c3d8182 134 sd_device_action_t action;
eb546b35 135 uint64_t seqnum;
a1fa99d8 136 uint64_t blocker_seqnum;
29d02458
YW
137 const char *id;
138 const char *devpath;
139 const char *devpath_old;
34458dbb 140 const char *devnode;
4f294ffd
YW
141
142 /* Used when the device is locked by another program. */
5d354e52
YW
143 usec_t retry_again_next_usec;
144 usec_t retry_again_timeout_usec;
4f294ffd 145 sd_event_source *retry_event_source;
d8f462b4
YW
146
147 sd_event_source *timeout_warning_event;
148 sd_event_source *timeout_event;
eb546b35 149
d9239923
YW
150 LIST_FIELDS(Event, event);
151} Event;
1e03b754 152
d9239923 153typedef enum WorkerState {
912541b0
KS
154 WORKER_UNDEF,
155 WORKER_RUNNING,
156 WORKER_IDLE,
157 WORKER_KILLED,
f257a8fc 158 WORKER_KILLING,
d9239923 159} WorkerState;
1e03b754 160
d9239923 161typedef struct Worker {
c0c6806b 162 Manager *manager;
912541b0 163 pid_t pid;
82e0b631 164 sd_event_source *child_event_source;
7f2e3a14 165 sd_device_monitor *monitor;
d9239923
YW
166 WorkerState state;
167 Event *event;
168} Worker;
1e03b754
KS
169
170/* passed from worker to main process */
5d354e52 171typedef enum EventResult {
6467bda5
YW
172 EVENT_RESULT_NERRNO_MIN = -ERRNO_MAX,
173 EVENT_RESULT_NERRNO_MAX = -1,
a79cba33 174 EVENT_RESULT_SUCCESS = 0,
6467bda5
YW
175 EVENT_RESULT_EXIT_STATUS_BASE = 0,
176 EVENT_RESULT_EXIT_STATUS_MAX = 255,
177 EVENT_RESULT_TRY_AGAIN = 256, /* when the block device is locked by another process. */
178 EVENT_RESULT_SIGNAL_BASE = 257,
179 EVENT_RESULT_SIGNAL_MAX = EVENT_RESULT_SIGNAL_BASE + _NSIG,
5d354e52 180 _EVENT_RESULT_MAX,
6467bda5 181 _EVENT_RESULT_INVALID = -EINVAL,
5d354e52 182} EventResult;
1e03b754 183
5393c528 184static Event *event_free(Event *event) {
c6aa11f2 185 if (!event)
5393c528 186 return NULL;
ba47b71c 187
40a57716 188 assert(event->manager);
c6aa11f2 189
40a57716 190 LIST_REMOVE(event, event->manager->events, event);
eb546b35 191 sd_device_unref(event->dev);
c6aa11f2 192
72151060
YW
193 /* Do not use sd_event_source_disable_unref() here, as this is called by both workers and the
194 * main process. */
4f294ffd 195 sd_event_source_unref(event->retry_event_source);
72151060
YW
196 sd_event_source_unref(event->timeout_warning_event);
197 sd_event_source_unref(event->timeout_event);
693d371d 198
c6aa11f2
TG
199 if (event->worker)
200 event->worker->event = NULL;
201
5393c528 202 return mfree(event);
aa8734ff 203}
7a770250 204
419ec631 205static void event_queue_cleanup(Manager *manager, EventState match_state) {
80a226b2 206 LIST_FOREACH(event, event, manager->events) {
419ec631
YW
207 if (match_state != EVENT_UNDEF && match_state != event->state)
208 continue;
209
210 event_free(event);
211 }
212}
213
d9239923 214static Worker *worker_free(Worker *worker) {
c6aa11f2 215 if (!worker)
75db809a 216 return NULL;
bc113de9 217
82e0b631
YW
218 if (worker->manager)
219 hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
c0c6806b 220
82e0b631 221 sd_event_source_unref(worker->child_event_source);
7f2e3a14 222 sd_device_monitor_unref(worker->monitor);
c6aa11f2
TG
223 event_free(worker->event);
224
75db809a 225 return mfree(worker);
ff2c503d
KS
226}
227
d9239923
YW
228DEFINE_TRIVIAL_CLEANUP_FUNC(Worker*, worker_free);
229DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(worker_hash_op, void, trivial_hash_func, trivial_compare_func, Worker, worker_free);
fc465079 230
419ec631
YW
231static void manager_clear_for_worker(Manager *manager) {
232 assert(manager);
233
f777e745
YW
234 /* Do not use sd_event_source_disable_unref() here, as this is called by both workers and the
235 * main process. */
236 manager->inotify_event = sd_event_source_unref(manager->inotify_event);
237 manager->kill_workers_event = sd_event_source_unref(manager->kill_workers_event);
419ec631
YW
238
239 manager->event = sd_event_unref(manager->event);
240
241 manager->workers = hashmap_free(manager->workers);
242 event_queue_cleanup(manager, EVENT_UNDEF);
243
244 manager->monitor = sd_device_monitor_unref(manager->monitor);
245 manager->ctrl = udev_ctrl_unref(manager->ctrl);
246
247 manager->worker_watch[READ_END] = safe_close(manager->worker_watch[READ_END]);
248}
249
250static Manager* manager_free(Manager *manager) {
251 if (!manager)
252 return NULL;
253
254 udev_builtin_exit();
255
419ec631
YW
256 manager_clear_for_worker(manager);
257
258 sd_netlink_unref(manager->rtnl);
259
260 hashmap_free_free_free(manager->properties);
261 udev_rules_free(manager->rules);
262
263 safe_close(manager->inotify_fd);
264 safe_close_pair(manager->worker_watch);
265
a1f4fd38 266 free(manager->cgroup);
419ec631
YW
267 return mfree(manager);
268}
269
270DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
271
82e0b631
YW
272static int on_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata);
273
d9239923
YW
274static int worker_new(Worker **ret, Manager *manager, sd_device_monitor *worker_monitor, pid_t pid) {
275 _cleanup_(worker_freep) Worker *worker = NULL;
a505965d 276 int r;
3a19b32a
TG
277
278 assert(ret);
c0c6806b 279 assert(manager);
3a19b32a
TG
280 assert(worker_monitor);
281 assert(pid > 1);
282
d4053464
YW
283 /* close monitor, but keep address around */
284 device_monitor_disconnect(worker_monitor);
285
d9239923 286 worker = new(Worker, 1);
3a19b32a
TG
287 if (!worker)
288 return -ENOMEM;
289
d9239923 290 *worker = (Worker) {
d4053464
YW
291 .monitor = sd_device_monitor_ref(worker_monitor),
292 .pid = pid,
293 };
a505965d 294
82e0b631
YW
295 r = sd_event_add_child(manager->event, &worker->child_event_source, pid, WEXITED, on_sigchld, worker);
296 if (r < 0)
297 return r;
298
92a74c47 299 r = hashmap_ensure_put(&manager->workers, &worker_hash_op, PID_TO_PTR(pid), worker);
a505965d
TG
300 if (r < 0)
301 return r;
302
82e0b631 303 worker->manager = manager;
3a19b32a 304
82e0b631 305 *ret = TAKE_PTR(worker);
3a19b32a
TG
306 return 0;
307}
308
419ec631
YW
309static void manager_kill_workers(Manager *manager, bool force) {
310 Worker *worker;
4fa4d885 311
419ec631 312 assert(manager);
4fa4d885 313
419ec631
YW
314 HASHMAP_FOREACH(worker, manager->workers) {
315 if (worker->state == WORKER_KILLED)
316 continue;
4fa4d885 317
419ec631
YW
318 if (worker->state == WORKER_RUNNING && !force) {
319 worker->state = WORKER_KILLING;
320 continue;
321 }
4fa4d885 322
419ec631
YW
323 worker->state = WORKER_KILLED;
324 (void) kill(worker->pid, SIGTERM);
325 }
4fa4d885
TG
326}
327
419ec631
YW
328static void manager_exit(Manager *manager) {
329 assert(manager);
693d371d 330
419ec631 331 manager->exit = true;
693d371d 332
419ec631
YW
333 sd_notify(false,
334 "STOPPING=1\n"
335 "STATUS=Starting shutdown...");
39c19cf1 336
419ec631
YW
337 /* close sources of new events and discard buffered events */
338 manager->ctrl = udev_ctrl_unref(manager->ctrl);
0bed242c 339
9612da36 340 manager->inotify_event = sd_event_source_disable_unref(manager->inotify_event);
419ec631 341 manager->inotify_fd = safe_close(manager->inotify_fd);
0bed242c 342
7f2e3a14 343 manager->monitor = sd_device_monitor_unref(manager->monitor);
0bed242c 344
419ec631
YW
345 /* discard queued events and kill workers */
346 event_queue_cleanup(manager, EVENT_QUEUED);
347 manager_kill_workers(manager, true);
0bed242c
YW
348}
349
4bf4f50f
ZJS
350static void notify_ready(void) {
351 int r;
352
353 r = sd_notifyf(false,
354 "READY=1\n"
355 "STATUS=Processing with %u children at max", arg_children_max);
356 if (r < 0)
357 log_warning_errno(r, "Failed to send readiness notification, ignoring: %m");
358}
359
419ec631 360/* reload requested, HUP signal received, rules changed, builtin changed */
e9d1fae3
YW
361static void manager_reload(Manager *manager, bool force) {
362 _cleanup_(udev_rules_freep) UdevRules *rules = NULL;
363 usec_t now_usec;
364 int r;
365
419ec631 366 assert(manager);
b2d21d93 367
e9d1fae3
YW
368 assert_se(sd_event_now(manager->event, CLOCK_MONOTONIC, &now_usec) >= 0);
369 if (!force && now_usec < usec_add(manager->last_usec, 3 * USEC_PER_SEC))
370 /* check for changed config, every 3 seconds at most */
371 return;
372 manager->last_usec = now_usec;
373
374 /* Reload SELinux label database, to make the child inherit the up-to-date database. */
375 mac_selinux_maybe_reload();
376
377 /* Nothing changed. It is not necessary to reload. */
f9b3b990 378 if (!udev_rules_should_reload(manager->rules) && !udev_builtin_should_reload())
e9d1fae3
YW
379 return;
380
419ec631
YW
381 sd_notify(false,
382 "RELOADING=1\n"
383 "STATUS=Flushing configuration...");
76e62a4d 384
419ec631 385 manager_kill_workers(manager, false);
e9d1fae3 386
419ec631 387 udev_builtin_exit();
e9d1fae3
YW
388 udev_builtin_init();
389
390 r = udev_rules_load(&rules, arg_resolve_name_timing);
391 if (r < 0)
392 log_warning_errno(r, "Failed to read udev rules, using the previously loaded rules, ignoring: %m");
393 else
394 udev_rules_free_and_replace(manager->rules, rules);
693d371d 395
4bf4f50f 396 notify_ready();
419ec631 397}
e237d8cb 398
419ec631 399static int on_kill_workers_event(sd_event_source *s, uint64_t usec, void *userdata) {
99534007 400 Manager *manager = ASSERT_PTR(userdata);
e237d8cb 401
419ec631
YW
402 log_debug("Cleanup idle workers");
403 manager_kill_workers(manager, false);
e237d8cb 404
419ec631
YW
405 return 1;
406}
e237d8cb 407
a79cba33 408static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, EventResult result) {
c17ab900
YW
409 int r;
410
411 assert(dev);
412
413 /* On exit, manager->monitor is already NULL. */
414 if (!monitor)
415 return;
416
a79cba33 417 if (result != EVENT_RESULT_SUCCESS) {
6467bda5
YW
418 (void) device_add_property(dev, "UDEV_WORKER_FAILED", "1");
419
420 switch (result) {
fb544617
YW
421 case EVENT_RESULT_NERRNO_MIN ... EVENT_RESULT_NERRNO_MAX: {
422 const char *str;
423
6467bda5 424 (void) device_add_propertyf(dev, "UDEV_WORKER_ERRNO", "%i", -result);
6467bda5 425
fb544617
YW
426 str = errno_to_name(result);
427 if (str)
428 (void) device_add_property(dev, "UDEV_WORKER_ERRNO_NAME", str);
429 break;
430 }
6467bda5
YW
431 case EVENT_RESULT_EXIT_STATUS_BASE ... EVENT_RESULT_EXIT_STATUS_MAX:
432 (void) device_add_propertyf(dev, "UDEV_WORKER_EXIT_STATUS", "%i", result - EVENT_RESULT_EXIT_STATUS_BASE);
433 break;
434
435 case EVENT_RESULT_TRY_AGAIN:
436 assert_not_reached();
437 break;
438
fb544617
YW
439 case EVENT_RESULT_SIGNAL_BASE ... EVENT_RESULT_SIGNAL_MAX: {
440 const char *str;
441
6467bda5 442 (void) device_add_propertyf(dev, "UDEV_WORKER_SIGNAL", "%i", result - EVENT_RESULT_SIGNAL_BASE);
6467bda5 443
fb544617
YW
444 str = signal_to_string(result - EVENT_RESULT_SIGNAL_BASE);
445 if (str)
446 (void) device_add_property(dev, "UDEV_WORKER_SIGNAL_NAME", str);
447 break;
448 }
6467bda5
YW
449 default:
450 log_device_warning(dev, "Unknown event result \"%i\", ignoring.", result);
451 }
452 }
453
c17ab900
YW
454 r = device_monitor_send_device(monitor, NULL, dev);
455 if (r < 0)
456 log_device_warning_errno(dev, r,
457 "Failed to broadcast event to libudev listeners, ignoring: %m");
458}
459
a79cba33 460static int worker_send_result(Manager *manager, EventResult result) {
5d354e52
YW
461 assert(manager);
462 assert(manager->worker_watch[WRITE_END] >= 0);
9a73bd7c 463
5d354e52 464 return loop_write(manager->worker_watch[WRITE_END], &result, sizeof(result), false);
9a73bd7c
TG
465}
466
78e278ad 467static int device_get_whole_disk(sd_device *dev, sd_device **ret_device, const char **ret_devname) {
b97897e3
YW
468 const char *val;
469 int r;
fee854ee 470
0bed242c 471 assert(dev);
a1130022
LP
472
473 if (device_for_action(dev, SD_DEVICE_REMOVE))
7b7959fb 474 goto irrelevant;
0bed242c 475
b97897e3
YW
476 r = sd_device_get_sysname(dev, &val);
477 if (r < 0)
478 return log_device_debug_errno(dev, r, "Failed to get sysname: %m");
479
a9e83209
YW
480 /* Exclude the following devices:
481 * For "dm-", see the comment added by e918a1b5a94f270186dca59156354acd2a596494.
482 * For "md", see the commit message of 2e5b17d01347d3c3118be2b8ad63d20415dbb1f0,
483 * but not sure the assumption is still valid even when partitions are created on the md
484 * devices, surprisingly which seems to be possible, see PR #22973.
485 * For "drbd", see the commit message of fee854ee8ccde0cd28e0f925dea18cce35f3993d. */
49fe5c09 486 if (STARTSWITH_SET(val, "dm-", "md", "drbd"))
7b7959fb 487 goto irrelevant;
0bed242c 488
8f71534e
YW
489 r = block_device_get_whole_disk(dev, &dev);
490 if (IN_SET(r,
491 -ENOTBLK, /* The device is not a block device. */
492 -ENODEV /* The whole disk device was not found, it may already be removed. */))
493 goto irrelevant;
494 if (r < 0)
495 return log_device_debug_errno(dev, r, "Failed to get whole disk device: %m");
0bed242c 496
b97897e3 497 r = sd_device_get_devname(dev, &val);
b97897e3
YW
498 if (r < 0)
499 return log_device_debug_errno(dev, r, "Failed to get devname: %m");
0bed242c 500
78e278ad
YW
501 if (ret_device)
502 *ret_device = dev;
503 if (ret_devname)
504 *ret_devname = val;
7b7959fb
YW
505 return 1;
506
507irrelevant:
78e278ad
YW
508 if (ret_device)
509 *ret_device = NULL;
510 if (ret_devname)
511 *ret_devname = NULL;
7b7959fb
YW
512 return 0;
513}
514
03a24ce7 515static int worker_lock_whole_disk(sd_device *dev, int *ret_fd) {
254d1313 516 _cleanup_close_ int fd = -EBADF;
78e278ad 517 sd_device *dev_whole_disk;
7b7959fb
YW
518 const char *val;
519 int r;
520
521 assert(dev);
522 assert(ret_fd);
523
524 /* Take a shared lock on the device node; this establishes a concept of device "ownership" to
525 * serialize device access. External processes holding an exclusive lock will cause udev to skip the
526 * event handling; in the case udev acquired the lock, the external process can block until udev has
527 * finished its event handling. */
528
78e278ad 529 r = device_get_whole_disk(dev, &dev_whole_disk, &val);
7b7959fb
YW
530 if (r < 0)
531 return r;
532 if (r == 0)
533 goto nolock;
534
32703bd1 535 fd = sd_device_open(dev_whole_disk, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
b97897e3 536 if (fd < 0) {
78e278ad 537 bool ignore = ERRNO_IS_DEVICE_ABSENT(fd);
ef400c38 538
78e278ad 539 log_device_debug_errno(dev, fd, "Failed to open '%s'%s: %m", val, ignore ? ", ignoring" : "");
7b7959fb 540 if (!ignore)
78e278ad 541 return fd;
7b7959fb
YW
542
543 goto nolock;
b97897e3 544 }
0bed242c
YW
545
546 if (flock(fd, LOCK_SH|LOCK_NB) < 0)
b97897e3 547 return log_device_debug_errno(dev, errno, "Failed to flock(%s): %m", val);
0bed242c
YW
548
549 *ret_fd = TAKE_FD(fd);
550 return 1;
7b7959fb
YW
551
552nolock:
254d1313 553 *ret_fd = -EBADF;
7b7959fb 554 return 0;
fee854ee
RK
555}
556
95ac5230 557static int worker_mark_block_device_read_only(sd_device *dev) {
254d1313 558 _cleanup_close_ int fd = -EBADF;
95ac5230
LP
559 const char *val;
560 int state = 1, r;
561
562 assert(dev);
563
564 if (!arg_blockdev_read_only)
565 return 0;
566
567 /* Do this only once, when the block device is new. If the device is later retriggered let's not
568 * toggle the bit again, so that people can boot up with full read-only mode and then unset the bit
569 * for specific devices only. */
a1130022 570 if (!device_for_action(dev, SD_DEVICE_ADD))
95ac5230
LP
571 return 0;
572
573 r = sd_device_get_subsystem(dev, &val);
574 if (r < 0)
575 return log_device_debug_errno(dev, r, "Failed to get subsystem: %m");
576
577 if (!streq(val, "block"))
578 return 0;
579
580 r = sd_device_get_sysname(dev, &val);
581 if (r < 0)
582 return log_device_debug_errno(dev, r, "Failed to get sysname: %m");
583
584 /* Exclude synthetic devices for now, this is supposed to be a safety feature to avoid modification
585 * of physical devices, and what sits on top of those doesn't really matter if we don't allow the
cb713f16 586 * underlying block devices to receive changes. */
95ac5230
LP
587 if (STARTSWITH_SET(val, "dm-", "md", "drbd", "loop", "nbd", "zram"))
588 return 0;
589
32703bd1 590 fd = sd_device_open(dev, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
95ac5230 591 if (fd < 0)
78e278ad 592 return log_device_debug_errno(dev, fd, "Failed to open '%s', ignoring: %m", val);
95ac5230
LP
593
594 if (ioctl(fd, BLKROSET, &state) < 0)
595 return log_device_warning_errno(dev, errno, "Failed to mark block device '%s' read-only: %m", val);
596
597 log_device_info(dev, "Successfully marked block device '%s' read-only.", val);
598 return 0;
599}
600
abde5ea8 601static int worker_process_device(Manager *manager, sd_device *dev) {
2e088715 602 _cleanup_(udev_event_freep) UdevEvent *udev_event = NULL;
254d1313 603 _cleanup_close_ int fd_lock = -EBADF;
0bed242c 604 int r;
912541b0 605
0bed242c
YW
606 assert(manager);
607 assert(dev);
608
b2d9e58f 609 log_device_uevent(dev, "Processing device");
abde5ea8 610
1a0bd015 611 udev_event = udev_event_new(dev, arg_exec_delay_usec, manager->rtnl, manager->log_level);
0bed242c
YW
612 if (!udev_event)
613 return -ENOMEM;
614
5d354e52
YW
615 /* If this is a block device and the device is locked currently via the BSD advisory locks,
616 * someone else is using it exclusively. We don't run our udev rules now to not interfere.
617 * Instead of processing the event, we requeue the event and will try again after a delay.
618 *
619 * The user-facing side of this: https://systemd.io/BLOCK_DEVICE_LOCKING */
03a24ce7 620 r = worker_lock_whole_disk(dev, &fd_lock);
6467bda5
YW
621 if (r == -EAGAIN)
622 return EVENT_RESULT_TRY_AGAIN;
b6aab8ef 623 if (r < 0)
0bed242c 624 return r;
912541b0 625
95ac5230
LP
626 (void) worker_mark_block_device_read_only(dev);
627
0bed242c 628 /* apply rules, create node, symlinks */
df7ee959
YW
629 r = udev_event_execute_rules(
630 udev_event,
631 manager->inotify_fd,
632 arg_event_timeout_usec,
633 arg_timeout_signal,
634 manager->properties,
635 manager->rules);
99058cd6
YW
636 if (r < 0)
637 return r;
638
e2099267 639 udev_event_execute_run(udev_event, arg_event_timeout_usec, arg_timeout_signal);
2dd9f98d 640
0bed242c
YW
641 if (!manager->rtnl)
642 /* in case rtnl was initialized */
643 manager->rtnl = sd_netlink_ref(udev_event->rtnl);
912541b0 644
04b25410
YW
645 if (udev_event->inotify_watch) {
646 r = udev_watch_begin(manager->inotify_fd, dev);
647 if (r < 0 && r != -ENOENT) /* The device may be already removed, ignore -ENOENT. */
648 log_device_warning_errno(dev, r, "Failed to add inotify watch, ignoring: %m");
649 }
912541b0 650
b2d9e58f 651 log_device_uevent(dev, "Device processed");
0bed242c
YW
652 return 0;
653}
912541b0 654
e2130348 655static int worker_device_monitor_handler(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
99534007 656 Manager *manager = ASSERT_PTR(userdata);
e2130348
YW
657 int r;
658
659 assert(dev);
e2130348
YW
660
661 r = worker_process_device(manager, dev);
6467bda5 662 if (r == EVENT_RESULT_TRY_AGAIN)
5d354e52 663 /* if we couldn't acquire the flock(), then requeue the event */
6467bda5
YW
664 log_device_debug(dev, "Block device is currently locked, requeueing the event.");
665 else {
666 if (r < 0)
667 log_device_warning_errno(dev, r, "Failed to process device, ignoring: %m");
e2130348 668
5abee64e 669 /* send processed event back to libudev listeners */
6467bda5
YW
670 device_broadcast(monitor, dev, r);
671 }
e2130348
YW
672
673 /* send udevd the result of the event execution */
6467bda5 674 r = worker_send_result(manager, r);
e2130348
YW
675 if (r < 0)
676 log_device_warning_errno(dev, r, "Failed to send signal to main daemon, ignoring: %m");
677
1a0bd015 678 /* Reset the log level, as it might be changed by "OPTIONS=log_level=". */
3cc6b14a 679 log_set_max_level(manager->log_level);
1a0bd015 680
e2130348
YW
681 return 1;
682}
683
7f2e3a14
YW
684static int worker_main(Manager *_manager, sd_device_monitor *monitor, sd_device *first_device) {
685 _cleanup_(sd_device_unrefp) sd_device *dev = first_device;
0bed242c 686 _cleanup_(manager_freep) Manager *manager = _manager;
44dcf454 687 int r;
145dae7e 688
0bed242c
YW
689 assert(manager);
690 assert(monitor);
691 assert(dev);
912541b0 692
44ee03d1 693 assert_se(unsetenv("NOTIFY_SOCKET") == 0);
3b64e4d4 694
e2130348
YW
695 assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, -1) >= 0);
696
697 /* Reset OOM score, we only protect the main daemon. */
698 r = set_oom_score_adjust(0);
699 if (r < 0)
700 log_debug_errno(r, "Failed to reset OOM score, ignoring: %m");
912541b0 701
7802194a 702 /* Clear unnecessary data in Manager object. */
e2130348 703 manager_clear_for_worker(manager);
3ebdb81e 704
e2130348
YW
705 r = sd_event_new(&manager->event);
706 if (r < 0)
707 return log_error_errno(r, "Failed to allocate event loop: %m");
912541b0 708
e2130348
YW
709 r = sd_event_add_signal(manager->event, NULL, SIGTERM, NULL, NULL);
710 if (r < 0)
711 return log_error_errno(r, "Failed to set SIGTERM event: %m");
4c83d994 712
e2130348
YW
713 r = sd_device_monitor_attach_event(monitor, manager->event);
714 if (r < 0)
715 return log_error_errno(r, "Failed to attach event loop to device monitor: %m");
912541b0 716
e2130348 717 r = sd_device_monitor_start(monitor, worker_device_monitor_handler, manager);
0bed242c 718 if (r < 0)
e2130348 719 return log_error_errno(r, "Failed to start device monitor: %m");
912541b0 720
e2130348
YW
721 /* Process first device */
722 (void) worker_device_monitor_handler(monitor, dev, manager);
912541b0 723
e2130348
YW
724 r = sd_event_loop(manager->event);
725 if (r < 0)
726 return log_error_errno(r, "Event loop failed: %m");
727
44dcf454 728 return 0;
0bed242c
YW
729}
730
419ec631 731static int on_event_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
99534007 732 Event *event = ASSERT_PTR(userdata);
419ec631 733
419ec631
YW
734 assert(event->worker);
735
736 kill_and_sigcont(event->worker->pid, arg_timeout_signal);
737 event->worker->state = WORKER_KILLED;
738
739 log_device_error(event->dev, "Worker ["PID_FMT"] processing SEQNUM=%"PRIu64" killed", event->worker->pid, event->seqnum);
740
741 return 1;
742}
743
744static int on_event_timeout_warning(sd_event_source *s, uint64_t usec, void *userdata) {
99534007 745 Event *event = ASSERT_PTR(userdata);
419ec631 746
419ec631
YW
747 assert(event->worker);
748
749 log_device_warning(event->dev, "Worker ["PID_FMT"] processing SEQNUM=%"PRIu64" is taking a long time", event->worker->pid, event->seqnum);
750
751 return 1;
752}
753
754static void worker_attach_event(Worker *worker, Event *event) {
755 sd_event *e;
756
757 assert(worker);
758 assert(worker->manager);
759 assert(event);
760 assert(!event->worker);
761 assert(!worker->event);
762
763 worker->state = WORKER_RUNNING;
764 worker->event = event;
765 event->state = EVENT_RUNNING;
766 event->worker = worker;
767
768 e = worker->manager->event;
769
770 (void) sd_event_add_time_relative(e, &event->timeout_warning_event, CLOCK_MONOTONIC,
771 udev_warn_timeout(arg_event_timeout_usec), USEC_PER_SEC,
772 on_event_timeout_warning, event);
773
774 (void) sd_event_add_time_relative(e, &event->timeout_event, CLOCK_MONOTONIC,
775 arg_event_timeout_usec, USEC_PER_SEC,
776 on_event_timeout, event);
777}
778
d9239923 779static int worker_spawn(Manager *manager, Event *event) {
7f2e3a14 780 _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *worker_monitor = NULL;
d9239923 781 Worker *worker;
0bed242c 782 pid_t pid;
7443654e 783 int r;
0bed242c
YW
784
785 /* listen for new events */
7f2e3a14
YW
786 r = device_monitor_new_full(&worker_monitor, MONITOR_GROUP_NONE, -1);
787 if (r < 0)
788 return r;
7443654e 789
f714ecd4
YW
790 (void) sd_device_monitor_set_description(worker_monitor, "worker");
791
0bed242c 792 /* allow the main daemon netlink address to send devices to the worker */
7f2e3a14
YW
793 r = device_monitor_allow_unicast_sender(worker_monitor, manager->monitor);
794 if (r < 0)
795 return log_error_errno(r, "Worker: Failed to set unicast sender: %m");
796
797 r = device_monitor_enable_receiving(worker_monitor);
0bed242c 798 if (r < 0)
7f2e3a14 799 return log_error_errno(r, "Worker: Failed to enable receiving of device: %m");
0bed242c 800
ff86c92e 801 r = safe_fork(NULL, FORK_DEATHSIG, &pid);
7443654e
YW
802 if (r < 0) {
803 event->state = EVENT_QUEUED;
804 return log_error_errno(r, "Failed to fork() worker: %m");
805 }
806 if (r == 0) {
b428efa5
MS
807 DEVICE_TRACE_POINT(worker_spawned, event->dev, getpid());
808
7443654e 809 /* Worker process */
eb546b35 810 r = worker_main(manager, worker_monitor, sd_device_ref(event->dev));
baa30fbc 811 log_close();
8b46c3fc 812 _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
912541b0 813 }
e03c7cc2 814
7443654e
YW
815 r = worker_new(&worker, manager, worker_monitor, pid);
816 if (r < 0)
817 return log_error_errno(r, "Failed to create worker object: %m");
e03c7cc2 818
7443654e 819 worker_attach_event(worker, event);
39c19cf1 820
eb546b35 821 log_device_debug(event->dev, "Worker ["PID_FMT"] is forked for processing SEQNUM=%"PRIu64".", pid, event->seqnum);
7443654e 822 return 0;
7fafc032
KS
823}
824
f2a5412b 825static int event_run(Event *event) {
5406c368 826 static bool log_children_max_reached = true;
f2a5412b 827 Manager *manager;
d9239923 828 Worker *worker;
7f2e3a14 829 int r;
912541b0 830
c0c6806b 831 assert(event);
f2a5412b 832 assert(event->manager);
c0c6806b 833
b2d9e58f 834 log_device_uevent(event->dev, "Device ready for processing");
7600dbb8 835
4f294ffd
YW
836 (void) event_source_disable(event->retry_event_source);
837
f2a5412b 838 manager = event->manager;
90e74a66 839 HASHMAP_FOREACH(worker, manager->workers) {
912541b0
KS
840 if (worker->state != WORKER_IDLE)
841 continue;
842
eb546b35 843 r = device_monitor_send_device(manager->monitor, worker->monitor, event->dev);
7f2e3a14 844 if (r < 0) {
eb546b35 845 log_device_error_errno(event->dev, r, "Worker ["PID_FMT"] did not accept message, killing the worker: %m",
7f2e3a14 846 worker->pid);
cb542e84 847 (void) kill(worker->pid, SIGKILL);
912541b0
KS
848 worker->state = WORKER_KILLED;
849 continue;
850 }
39c19cf1 851 worker_attach_event(worker, event);
f2a5412b 852 return 1; /* event is now processing. */
912541b0
KS
853 }
854
c0c6806b 855 if (hashmap_size(manager->workers) >= arg_children_max) {
5406c368
FB
856 /* Avoid spamming the debug logs if the limit is already reached and
857 * many events still need to be processed */
858 if (log_children_max_reached && arg_children_max > 1) {
044497e2 859 log_debug("Maximum number (%u) of children reached.", hashmap_size(manager->workers));
5406c368
FB
860 log_children_max_reached = false;
861 }
f2a5412b 862 return 0; /* no free worker */
912541b0
KS
863 }
864
5406c368
FB
865 /* Re-enable the debug message for the next batch of events */
866 log_children_max_reached = true;
867
912541b0 868 /* start new worker and pass initial device */
f2a5412b
YW
869 r = worker_spawn(manager, event);
870 if (r < 0)
871 return r;
872
873 return 1; /* event is now processing. */
1e03b754
KS
874}
875
a1fa99d8 876static int event_is_blocked(Event *event) {
03677889 877 Event *loop_event = NULL;
29d02458 878 int r;
eb546b35 879
a1fa99d8
YW
880 /* lookup event for identical, parent, child device */
881
044ac33c
YW
882 assert(event);
883 assert(event->manager);
884 assert(event->blocker_seqnum <= event->seqnum);
885
5d354e52
YW
886 if (event->retry_again_next_usec > 0) {
887 usec_t now_usec;
888
ba4e0427 889 r = sd_event_now(event->manager->event, CLOCK_BOOTTIME, &now_usec);
5d354e52
YW
890 if (r < 0)
891 return r;
892
400e3d21 893 if (event->retry_again_next_usec > now_usec)
5d354e52
YW
894 return true;
895 }
896
044ac33c
YW
897 if (event->blocker_seqnum == event->seqnum)
898 /* we have checked previously and no blocker found */
899 return false;
900
03677889
YW
901 LIST_FOREACH(event, e, event->manager->events) {
902 loop_event = e;
903
044ac33c
YW
904 /* we already found a later event, earlier cannot block us, no need to check again */
905 if (loop_event->seqnum < event->blocker_seqnum)
906 continue;
907
908 /* event we checked earlier still exists, no need to check again */
909 if (loop_event->seqnum == event->blocker_seqnum)
910 return true;
911
912 /* found ourself, no later event can block us */
913 if (loop_event->seqnum >= event->seqnum)
914 goto no_blocker;
915
916 /* found event we have not checked */
917 break;
918 }
919
920 assert(loop_event);
921 assert(loop_event->seqnum > event->blocker_seqnum &&
922 loop_event->seqnum < event->seqnum);
923
912541b0 924 /* check if queue contains events we depend on */
03677889 925 LIST_FOREACH(event, e, loop_event) {
03677889
YW
926 loop_event = e;
927
912541b0
KS
928 /* found ourself, no later event can block us */
929 if (loop_event->seqnum >= event->seqnum)
044ac33c 930 goto no_blocker;
912541b0 931
29d02458 932 if (streq_ptr(loop_event->id, event->id))
a1fa99d8 933 break;
912541b0 934
a1af9668
YW
935 if (devpath_conflict(event->devpath, loop_event->devpath) ||
936 devpath_conflict(event->devpath, loop_event->devpath_old) ||
937 devpath_conflict(event->devpath_old, loop_event->devpath))
a1fa99d8 938 break;
912541b0 939
34458dbb 940 if (event->devnode && streq_ptr(event->devnode, loop_event->devnode))
a1fa99d8 941 break;
912541b0
KS
942 }
943
a1fa99d8 944 assert(loop_event);
0bd0407e 945
7600dbb8
UKK
946 log_device_debug(event->dev, "SEQNUM=%" PRIu64 " blocked by SEQNUM=%" PRIu64,
947 event->seqnum, loop_event->seqnum);
948
a1fa99d8 949 event->blocker_seqnum = loop_event->seqnum;
0bd0407e 950 return true;
044ac33c
YW
951
952no_blocker:
953 event->blocker_seqnum = event->seqnum;
954 return false;
7fafc032
KS
955}
956
0744e74c 957static int event_queue_start(Manager *manager) {
0725c4b9 958 int r;
8ab44e3f 959
c0c6806b
TG
960 assert(manager);
961
64903d18 962 if (!manager->events || manager->exit || manager->stop_exec_queue)
0744e74c 963 return 0;
7c4c7e89 964
10551728
YW
965 /* To make the stack directory /run/udev/links cleaned up later. */
966 manager->udev_node_needs_cleanup = true;
967
0725c4b9
YW
968 r = event_source_disable(manager->kill_workers_event);
969 if (r < 0)
970 log_warning_errno(r, "Failed to disable event source for cleaning up idle workers, ignoring: %m");
eca195ec 971
e9d1fae3 972 manager_reload(manager, /* force = */ false);
c02fb804 973
80a226b2 974 LIST_FOREACH(event, event, manager->events) {
912541b0
KS
975 if (event->state != EVENT_QUEUED)
976 continue;
0bc74ea7 977
c6f78234
YW
978 /* do not start event if parent or child event is still running or queued */
979 r = event_is_blocked(event);
2d40f02e
YW
980 if (r > 0)
981 continue;
0c3d8182 982 if (r < 0)
c6f78234 983 log_device_warning_errno(event->dev, r,
2d40f02e
YW
984 "Failed to check dependencies for event (SEQNUM=%"PRIu64", ACTION=%s), "
985 "assuming there is no blocking event, ignoring: %m",
c6f78234 986 event->seqnum,
0c3d8182 987 strna(device_action_to_string(event->action)));
fc465079 988
f2a5412b 989 r = event_run(event);
2d40f02e 990 if (r <= 0) /* 0 means there are no idle workers. Let's escape from the loop. */
f2a5412b 991 return r;
912541b0 992 }
0744e74c
YW
993
994 return 0;
1e03b754
KS
995}
996
4f294ffd
YW
997static int on_event_retry(sd_event_source *s, uint64_t usec, void *userdata) {
998 /* This does nothing. The on_post() callback will start the event if there exists an idle worker. */
999 return 1;
1000}
1001
5d354e52
YW
1002static int event_requeue(Event *event) {
1003 usec_t now_usec;
1004 int r;
1005
1006 assert(event);
1007 assert(event->manager);
1008 assert(event->manager->event);
1009
1010 event->timeout_warning_event = sd_event_source_disable_unref(event->timeout_warning_event);
1011 event->timeout_event = sd_event_source_disable_unref(event->timeout_event);
1012
1013 /* add a short delay to suppress busy loop */
ba4e0427 1014 r = sd_event_now(event->manager->event, CLOCK_BOOTTIME, &now_usec);
5d354e52
YW
1015 if (r < 0)
1016 return log_device_warning_errno(event->dev, r,
1017 "Failed to get current time, "
1018 "skipping event (SEQNUM=%"PRIu64", ACTION=%s): %m",
1019 event->seqnum, strna(device_action_to_string(event->action)));
1020
1021 if (event->retry_again_timeout_usec > 0 && event->retry_again_timeout_usec <= now_usec)
1022 return log_device_warning_errno(event->dev, SYNTHETIC_ERRNO(ETIMEDOUT),
1023 "The underlying block device is locked by a process more than %s, "
1024 "skipping event (SEQNUM=%"PRIu64", ACTION=%s).",
1025 FORMAT_TIMESPAN(EVENT_RETRY_TIMEOUT_USEC, USEC_PER_MINUTE),
1026 event->seqnum, strna(device_action_to_string(event->action)));
1027
1028 event->retry_again_next_usec = usec_add(now_usec, EVENT_RETRY_INTERVAL_USEC);
1029 if (event->retry_again_timeout_usec == 0)
1030 event->retry_again_timeout_usec = usec_add(now_usec, EVENT_RETRY_TIMEOUT_USEC);
1031
4f294ffd
YW
1032 r = event_reset_time_relative(event->manager->event, &event->retry_event_source,
1033 CLOCK_MONOTONIC, EVENT_RETRY_INTERVAL_USEC, 0,
1034 on_event_retry, NULL,
1035 0, "retry-event", true);
1036 if (r < 0)
1037 return log_device_warning_errno(event->dev, r, "Failed to reset timer event source for retrying event, "
1038 "skipping event (SEQNUM=%"PRIu64", ACTION=%s): %m",
1039 event->seqnum, strna(device_action_to_string(event->action)));
1040
5d354e52
YW
1041 if (event->worker && event->worker->event == event)
1042 event->worker->event = NULL;
1043 event->worker = NULL;
1044
1045 event->state = EVENT_QUEUED;
1046 return 0;
1047}
1048
82a5de9f
YW
1049static int event_queue_assume_block_device_unlocked(Manager *manager, sd_device *dev) {
1050 const char *devname;
1051 int r;
1052
1053 /* When a new event for a block device is queued or we get an inotify event, assume that the
1054 * device is not locked anymore. The assumption may not be true, but that should not cause any
1055 * issues, as in that case events will be requeued soon. */
1056
78e278ad 1057 r = device_get_whole_disk(dev, NULL, &devname);
82a5de9f
YW
1058 if (r <= 0)
1059 return r;
1060
1061 LIST_FOREACH(event, event, manager->events) {
1062 const char *event_devname;
1063
1064 if (event->state != EVENT_QUEUED)
1065 continue;
1066
1067 if (event->retry_again_next_usec == 0)
1068 continue;
1069
78e278ad 1070 if (device_get_whole_disk(event->dev, NULL, &event_devname) <= 0)
82a5de9f
YW
1071 continue;
1072
1073 if (!streq(devname, event_devname))
1074 continue;
1075
1076 event->retry_again_next_usec = 0;
1077 }
1078
1079 return 0;
1080}
1081
419ec631 1082static int event_queue_insert(Manager *manager, sd_device *dev) {
34458dbb 1083 const char *devpath, *devpath_old = NULL, *id = NULL, *devnode = NULL;
0c3d8182 1084 sd_device_action_t action;
419ec631 1085 uint64_t seqnum;
c9473aaa 1086 Event *event;
419ec631 1087 int r;
ff2c503d 1088
419ec631
YW
1089 assert(manager);
1090 assert(dev);
ff2c503d 1091
419ec631
YW
1092 /* only one process can add events to the queue */
1093 assert(manager->pid == getpid_cached());
1094
1095 /* We only accepts devices received by device monitor. */
1096 r = sd_device_get_seqnum(dev, &seqnum);
1097 if (r < 0)
1098 return r;
1099
0c3d8182
YW
1100 r = sd_device_get_action(dev, &action);
1101 if (r < 0)
1102 return r;
1103
29d02458
YW
1104 r = sd_device_get_devpath(dev, &devpath);
1105 if (r < 0)
1106 return r;
1107
1108 r = sd_device_get_property_value(dev, "DEVPATH_OLD", &devpath_old);
1109 if (r < 0 && r != -ENOENT)
1110 return r;
1111
1112 r = device_get_device_id(dev, &id);
1113 if (r < 0 && r != -ENOENT)
1114 return r;
1115
34458dbb
YW
1116 r = sd_device_get_devname(dev, &devnode);
1117 if (r < 0 && r != -ENOENT)
1118 return r;
1119
419ec631
YW
1120 event = new(Event, 1);
1121 if (!event)
1122 return -ENOMEM;
1123
1124 *event = (Event) {
1125 .manager = manager,
1126 .dev = sd_device_ref(dev),
419ec631 1127 .seqnum = seqnum,
0c3d8182 1128 .action = action,
29d02458
YW
1129 .id = id,
1130 .devpath = devpath,
1131 .devpath_old = devpath_old,
34458dbb 1132 .devnode = devnode,
419ec631
YW
1133 .state = EVENT_QUEUED,
1134 };
1135
64903d18 1136 if (!manager->events) {
419ec631
YW
1137 r = touch("/run/udev/queue");
1138 if (r < 0)
6be97d67 1139 log_warning_errno(r, "Failed to touch /run/udev/queue, ignoring: %m");
912541b0 1140 }
419ec631
YW
1141
1142 LIST_APPEND(event, manager->events, event);
1143
1144 log_device_uevent(dev, "Device is queued");
1145
1146 return 0;
1147}
1148
1149static int on_uevent(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
99534007 1150 Manager *manager = ASSERT_PTR(userdata);
419ec631
YW
1151 int r;
1152
419ec631
YW
1153 DEVICE_TRACE_POINT(kernel_uevent_received, dev);
1154
1155 device_ensure_usec_initialized(dev, NULL);
1156
1157 r = event_queue_insert(manager, dev);
1158 if (r < 0) {
1159 log_device_error_errno(dev, r, "Failed to insert device into event queue: %m");
1160 return 1;
1161 }
1162
82a5de9f
YW
1163 (void) event_queue_assume_block_device_unlocked(manager, dev);
1164
419ec631 1165 return 1;
ff2c503d
KS
1166}
1167
e82e8fa5 1168static int on_worker(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
99534007 1169 Manager *manager = ASSERT_PTR(userdata);
c0c6806b 1170
912541b0 1171 for (;;) {
a79cba33 1172 EventResult result;
5d354e52 1173 struct iovec iovec = IOVEC_MAKE(&result, sizeof(result));
fb29cdbe 1174 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
979558f3
TG
1175 struct msghdr msghdr = {
1176 .msg_iov = &iovec,
1177 .msg_iovlen = 1,
1178 .msg_control = &control,
1179 .msg_controllen = sizeof(control),
1180 };
912541b0 1181 ssize_t size;
371d72e0 1182 struct ucred *ucred;
d9239923 1183 Worker *worker;
912541b0 1184
3691bcf3
LP
1185 size = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT);
1186 if (size == -EINTR)
1187 continue;
1188 if (size == -EAGAIN)
1189 /* nothing more to read */
1190 break;
1191 if (size < 0)
1192 return log_error_errno(size, "Failed to receive message: %m");
1193
1194 cmsg_close_all(&msghdr);
979558f3 1195
6467bda5 1196 if (size != sizeof(result)) {
d4e98880 1197 log_warning("Ignoring worker message with invalid size %zi bytes", size);
e82e8fa5 1198 continue;
979558f3
TG
1199 }
1200
371d72e0 1201 ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
979558f3 1202 if (!ucred || ucred->pid <= 0) {
d4e98880 1203 log_warning("Ignoring worker message without valid PID");
979558f3
TG
1204 continue;
1205 }
912541b0
KS
1206
1207 /* lookup worker who sent the signal */
4a0b58c4 1208 worker = hashmap_get(manager->workers, PID_TO_PTR(ucred->pid));
a505965d 1209 if (!worker) {
044497e2 1210 log_debug("Worker ["PID_FMT"] returned, but is no longer tracked", ucred->pid);
a505965d 1211 continue;
912541b0 1212 }
c0bbfd72 1213
f257a8fc
YG
1214 if (worker->state == WORKER_KILLING) {
1215 worker->state = WORKER_KILLED;
1216 (void) kill(worker->pid, SIGTERM);
1217 } else if (worker->state != WORKER_KILLED)
a505965d
TG
1218 worker->state = WORKER_IDLE;
1219
1220 /* worker returned */
5d354e52
YW
1221 if (result == EVENT_RESULT_TRY_AGAIN &&
1222 event_requeue(worker->event) < 0)
6467bda5 1223 device_broadcast(manager->monitor, worker->event->dev, -ETIMEDOUT);
5d354e52
YW
1224
1225 /* When event_requeue() succeeds, worker->event is NULL, and event_free() handles NULL gracefully. */
a505965d 1226 event_free(worker->event);
912541b0 1227 }
e82e8fa5
TG
1228
1229 return 1;
1230}
1231
3b47c739 1232/* receive the udevd message from userspace */
e0d61dac 1233static int on_ctrl_msg(UdevCtrl *uctrl, UdevCtrlMessageType type, const UdevCtrlMessageValue *value, void *userdata) {
99534007 1234 Manager *manager = ASSERT_PTR(userdata);
d02c6f54 1235 int r;
912541b0 1236
d02c6f54 1237 assert(value);
e4f66b77 1238
d02c6f54
YW
1239 switch (type) {
1240 case UDEV_CTRL_SET_LOG_LEVEL:
53c8590f
YW
1241 if ((value->intval & LOG_PRIMASK) != value->intval) {
1242 log_debug("Received invalid udev control message (SET_LOG_LEVEL, %i), ignoring.", value->intval);
1243 break;
1244 }
1245
64a3494c 1246 log_debug("Received udev control message (SET_LOG_LEVEL), setting log_level=%i", value->intval);
53c8590f
YW
1247
1248 r = log_get_max_level();
1249 if (r == value->intval)
1250 break;
1251
3cc6b14a 1252 log_set_max_level(value->intval);
1a0bd015 1253 manager->log_level = value->intval;
f257a8fc 1254 manager_kill_workers(manager, false);
d02c6f54
YW
1255 break;
1256 case UDEV_CTRL_STOP_EXEC_QUEUE:
044497e2 1257 log_debug("Received udev control message (STOP_EXEC_QUEUE)");
c0c6806b 1258 manager->stop_exec_queue = true;
d02c6f54
YW
1259 break;
1260 case UDEV_CTRL_START_EXEC_QUEUE:
044497e2 1261 log_debug("Received udev control message (START_EXEC_QUEUE)");
c0c6806b 1262 manager->stop_exec_queue = false;
d7bddfa1 1263 /* It is not necessary to call event_queue_start() here, as it will be called in on_post() if necessary. */
d02c6f54
YW
1264 break;
1265 case UDEV_CTRL_RELOAD:
044497e2 1266 log_debug("Received udev control message (RELOAD)");
e9d1fae3 1267 manager_reload(manager, /* force = */ true);
d02c6f54
YW
1268 break;
1269 case UDEV_CTRL_SET_ENV: {
d7ac0952
FS
1270 _unused_ _cleanup_free_ char *old_val = NULL;
1271 _cleanup_free_ char *key = NULL, *val = NULL, *old_key = NULL;
d02c6f54 1272 const char *eq;
9b5150b6 1273
d02c6f54 1274 eq = strchr(value->buf, '=');
9b5150b6 1275 if (!eq) {
d02c6f54 1276 log_error("Invalid key format '%s'", value->buf);
9b5150b6
YW
1277 return 1;
1278 }
1279
d02c6f54 1280 key = strndup(value->buf, eq - value->buf);
9b5150b6
YW
1281 if (!key) {
1282 log_oom();
1283 return 1;
1284 }
1285
1286 old_val = hashmap_remove2(manager->properties, key, (void **) &old_key);
1287
1288 r = hashmap_ensure_allocated(&manager->properties, &string_hash_ops);
1289 if (r < 0) {
1290 log_oom();
1291 return 1;
912541b0 1292 }
9b5150b6
YW
1293
1294 eq++;
f053fc33 1295 if (isempty(eq)) {
044497e2 1296 log_debug("Received udev control message (ENV), unsetting '%s'", key);
9b5150b6
YW
1297
1298 r = hashmap_put(manager->properties, key, NULL);
1299 if (r < 0) {
1300 log_oom();
1301 return 1;
1302 }
1303 } else {
1304 val = strdup(eq);
1305 if (!val) {
1306 log_oom();
1307 return 1;
1308 }
1309
044497e2 1310 log_debug("Received udev control message (ENV), setting '%s=%s'", key, val);
9b5150b6
YW
1311
1312 r = hashmap_put(manager->properties, key, val);
1313 if (r < 0) {
1314 log_oom();
1315 return 1;
1316 }
1317 }
1318
1319 key = val = NULL;
f257a8fc 1320 manager_kill_workers(manager, false);
d02c6f54 1321 break;
912541b0 1322 }
d02c6f54
YW
1323 case UDEV_CTRL_SET_CHILDREN_MAX:
1324 if (value->intval <= 0) {
1325 log_debug("Received invalid udev control message (SET_MAX_CHILDREN, %i), ignoring.", value->intval);
1326 return 0;
1327 }
912541b0 1328
d02c6f54
YW
1329 log_debug("Received udev control message (SET_MAX_CHILDREN), setting children_max=%i", value->intval);
1330 arg_children_max = value->intval;
1ef72b55 1331
4bf4f50f 1332 notify_ready();
d02c6f54
YW
1333 break;
1334 case UDEV_CTRL_PING:
d30f43ee 1335 log_debug("Received udev control message (PING)");
d02c6f54
YW
1336 break;
1337 case UDEV_CTRL_EXIT:
044497e2 1338 log_debug("Received udev control message (EXIT)");
62d43dac 1339 manager_exit(manager);
d02c6f54
YW
1340 break;
1341 default:
1342 log_debug("Received unknown udev control message, ignoring");
912541b0 1343 }
e4f66b77 1344
e82e8fa5 1345 return 1;
88f4b648 1346}
4a231017 1347
25677a05 1348static int synthesize_change_one(sd_device *dev, sd_device *target) {
0584b17a
ZJS
1349 int r;
1350
25677a05
YW
1351 if (DEBUG_LOGGING) {
1352 const char *syspath = NULL;
1353 (void) sd_device_get_syspath(target, &syspath);
1354 log_device_debug(dev, "device is closed, synthesising 'change' on %s", strna(syspath));
1355 }
1356
1357 r = sd_device_trigger(target, SD_DEVICE_CHANGE);
0584b17a 1358 if (r < 0)
25677a05
YW
1359 return log_device_debug_errno(target, r, "Failed to trigger 'change' uevent: %m");
1360
b428efa5
MS
1361 DEVICE_TRACE_POINT(synthetic_change_event, dev);
1362
0584b17a
ZJS
1363 return 0;
1364}
1365
70068602 1366static int synthesize_change(sd_device *dev) {
2f9d2317
YW
1367 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
1368 bool part_table_read;
1369 const char *sysname;
1370 sd_device *d;
1371 int r, k;
edd32000 1372
2f9d2317 1373 r = sd_device_get_sysname(dev, &sysname);
70068602
YW
1374 if (r < 0)
1375 return r;
1376
2f9d2317
YW
1377 if (startswith(sysname, "dm-") || block_device_is_whole_disk(dev) <= 0)
1378 return synthesize_change_one(dev, dev);
1379
1380 r = blockdev_reread_partition_table(dev);
70068602 1381 if (r < 0)
2f9d2317
YW
1382 log_device_debug_errno(dev, r, "Failed to re-read partition table, ignoring: %m");
1383 part_table_read = r >= 0;
70068602 1384
2f9d2317
YW
1385 /* search for partitions */
1386 r = partition_enumerator_new(dev, &e);
70068602
YW
1387 if (r < 0)
1388 return r;
1389
2f9d2317
YW
1390 /* We have partitions and re-read the table, the kernel already sent out a "change"
1391 * event for the disk, and "remove/add" for all partitions. */
1392 if (part_table_read && sd_device_enumerator_get_device_first(e))
1393 return 0;
e9fc29f4 1394
2f9d2317
YW
1395 /* We have partitions but re-reading the partition table did not work, synthesize
1396 * "change" for the disk and all partitions. */
1397 r = synthesize_change_one(dev, dev);
1398 FOREACH_DEVICE(e, d) {
1399 k = synthesize_change_one(dev, d);
1400 if (k < 0 && r >= 0)
1401 r = k;
1402 }
ede34445 1403
2f9d2317 1404 return r;
edd32000
KS
1405}
1406
e82e8fa5 1407static int on_inotify(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
ea8213dc 1408 Manager *manager = ASSERT_PTR(userdata);
0254e944 1409 union inotify_event_buffer buffer;
f7c1ad4f 1410 ssize_t l;
0725c4b9 1411 int r;
912541b0 1412
e82e8fa5 1413 l = read(fd, &buffer, sizeof(buffer));
f7c1ad4f 1414 if (l < 0) {
8add30a0 1415 if (ERRNO_IS_TRANSIENT(errno))
ea8213dc 1416 return 0;
912541b0 1417
f7c1ad4f 1418 return log_error_errno(errno, "Failed to read inotify fd: %m");
912541b0
KS
1419 }
1420
00adc340 1421 FOREACH_INOTIFY_EVENT_WARN(e, buffer, l) {
70068602
YW
1422 _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
1423 const char *devnode;
1424
cd66f972
YW
1425 /* Do not handle IN_IGNORED here. Especially, do not try to call udev_watch_end() from the
1426 * main process. Otherwise, the pair of the symlinks may become inconsistent, and several
1427 * garbage may remain. The old symlinks are removed by a worker that processes the
1428 * corresponding 'remove' uevent;
1429 * udev_event_execute_rules() -> event_execute_rules_on_remove() -> udev_watch_end(). */
1430
1431 if (!FLAGS_SET(e->mask, IN_CLOSE_WRITE))
1432 continue;
1433
e7f781e4
YW
1434 r = device_new_from_watch_handle(&dev, e->wd);
1435 if (r < 0) {
cd66f972 1436 /* Device may be removed just after closed. */
e7f781e4 1437 log_debug_errno(r, "Failed to create sd_device object from watch handle, ignoring: %m");
70068602 1438 continue;
e7f781e4 1439 }
912541b0 1440
cd66f972
YW
1441 r = sd_device_get_devname(dev, &devnode);
1442 if (r < 0) {
1443 /* Also here, device may be already removed. */
1444 log_device_debug_errno(dev, r, "Failed to get device node, ignoring: %m");
edd32000 1445 continue;
82a5de9f 1446 }
e7f781e4 1447
cd66f972
YW
1448 log_device_debug(dev, "Received inotify event for %s.", devnode);
1449
1450 (void) event_queue_assume_block_device_unlocked(manager, dev);
1451 (void) synthesize_change(dev);
912541b0
KS
1452 }
1453
ea8213dc 1454 return 0;
bd284db1
SJR
1455}
1456
0561329d 1457static int on_sigterm(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
99534007 1458 Manager *manager = ASSERT_PTR(userdata);
c0c6806b 1459
62d43dac 1460 manager_exit(manager);
912541b0 1461
e82e8fa5
TG
1462 return 1;
1463}
912541b0 1464
0561329d 1465static int on_sighup(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
99534007 1466 Manager *manager = ASSERT_PTR(userdata);
c0c6806b 1467
e9d1fae3 1468 manager_reload(manager, /* force = */ true);
912541b0 1469
e82e8fa5
TG
1470 return 1;
1471}
912541b0 1472
82e0b631
YW
1473static int on_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata) {
1474 Worker *worker = ASSERT_PTR(userdata);
1475 Manager *manager = ASSERT_PTR(worker->manager);
1476 sd_device *dev = worker->event ? ASSERT_PTR(worker->event->dev) : NULL;
1477 EventResult result;
c0c6806b 1478
82e0b631 1479 assert(si);
e82e8fa5 1480
82e0b631
YW
1481 switch (si->si_code) {
1482 case CLD_EXITED:
1483 if (si->si_status == 0)
1484 log_device_debug(dev, "Worker ["PID_FMT"] exited.", si->si_pid);
1485 else
1486 log_device_warning(dev, "Worker ["PID_FMT"] exited with return code %i.",
1487 si->si_pid, si->si_status);
1488 result = EVENT_RESULT_EXIT_STATUS_BASE + si->si_status;
1489 break;
e82e8fa5 1490
82e0b631
YW
1491 case CLD_KILLED:
1492 case CLD_DUMPED:
1493 log_device_warning(dev, "Worker ["PID_FMT"] terminated by signal %i (%s).",
1494 si->si_pid, si->si_status, signal_to_string(si->si_status));
1495 result = EVENT_RESULT_SIGNAL_BASE + si->si_status;
1496 break;
dc7faf2a 1497
82e0b631
YW
1498 default:
1499 assert_not_reached();
1500 }
dc7faf2a 1501
82e0b631
YW
1502 if (result != EVENT_RESULT_SUCCESS && dev) {
1503 /* delete state from disk */
1504 device_delete_db(dev);
1505 device_tag_index(dev, NULL, false);
e82e8fa5 1506
82e0b631
YW
1507 /* Forward kernel event to libudev listeners */
1508 device_broadcast(manager->monitor, dev, result);
912541b0 1509 }
e82e8fa5 1510
82e0b631
YW
1511 worker_free(worker);
1512
e82e8fa5 1513 return 1;
f27125f9 1514}
1515
693d371d 1516static int on_post(sd_event_source *s, void *userdata) {
99534007 1517 Manager *manager = ASSERT_PTR(userdata);
693d371d 1518
64903d18 1519 if (manager->events) {
5d354e52
YW
1520 /* Try to process pending events if idle workers exist. Why is this necessary?
1521 * When a worker finished an event and became idle, even if there was a pending event,
1522 * the corresponding device might have been locked and the processing of the event
1523 * delayed for a while, preventing the worker from processing the event immediately.
1524 * Now, the device may be unlocked. Let's try again! */
1525 event_queue_start(manager);
b6107f01 1526 return 1;
5d354e52 1527 }
b6107f01 1528
40293280
YW
1529 /* There are no queued events. Let's remove /run/udev/queue and clean up the idle processes. */
1530
1531 if (unlink("/run/udev/queue") < 0) {
1532 if (errno != ENOENT)
1533 log_warning_errno(errno, "Failed to unlink /run/udev/queue, ignoring: %m");
1534 } else
1535 log_debug("No events are queued, removing /run/udev/queue.");
b6107f01
YW
1536
1537 if (!hashmap_isempty(manager->workers)) {
1538 /* There are idle workers */
8e543eb8
YW
1539 (void) event_reset_time_relative(manager->event, &manager->kill_workers_event,
1540 CLOCK_MONOTONIC, 3 * USEC_PER_SEC, USEC_PER_SEC,
1541 on_kill_workers_event, manager,
1542 0, "kill-workers-event", false);
b6107f01 1543 return 1;
693d371d
TG
1544 }
1545
b6107f01
YW
1546 /* There are no idle workers. */
1547
10551728
YW
1548 if (manager->udev_node_needs_cleanup) {
1549 (void) udev_node_cleanup();
1550 manager->udev_node_needs_cleanup = false;
1551 }
1552
b6107f01
YW
1553 if (manager->exit)
1554 return sd_event_exit(manager->event, 0);
1555
42670846
YW
1556 if (manager->cgroup)
1557 /* cleanup possible left-over processes in our cgroup */
1558 (void) cg_kill(SYSTEMD_CGROUP_CONTROLLER, manager->cgroup, SIGKILL, CGROUP_IGNORE_SELF, NULL, NULL, NULL);
b6107f01 1559
693d371d
TG
1560 return 1;
1561}
1562
c4b69e99 1563static int listen_fds(int *ret_ctrl, int *ret_netlink) {
254d1313 1564 int ctrl_fd = -EBADF, netlink_fd = -EBADF;
c4b69e99 1565 int fd, n;
912541b0 1566
c4b69e99
YW
1567 assert(ret_ctrl);
1568 assert(ret_netlink);
fcff1e72 1569
912541b0 1570 n = sd_listen_fds(true);
fcff1e72
TG
1571 if (n < 0)
1572 return n;
912541b0
KS
1573
1574 for (fd = SD_LISTEN_FDS_START; fd < n + SD_LISTEN_FDS_START; fd++) {
618b3642 1575 if (sd_is_socket(fd, AF_UNIX, SOCK_SEQPACKET, -1) > 0) {
fcff1e72
TG
1576 if (ctrl_fd >= 0)
1577 return -EINVAL;
1578 ctrl_fd = fd;
912541b0
KS
1579 continue;
1580 }
1581
c52cff07 1582 if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
fcff1e72
TG
1583 if (netlink_fd >= 0)
1584 return -EINVAL;
1585 netlink_fd = fd;
912541b0
KS
1586 continue;
1587 }
1588
fcff1e72 1589 return -EINVAL;
912541b0
KS
1590 }
1591
c4b69e99
YW
1592 *ret_ctrl = ctrl_fd;
1593 *ret_netlink = netlink_fd;
912541b0 1594
912541b0 1595 return 0;
7459bcdc
KS
1596}
1597
e6f86cac 1598/*
3f85ef0f 1599 * read the kernel command line, in case we need to get into debug mode
64a3494c 1600 * udev.log_level=<level> syslog priority
1d84ad94
LP
1601 * udev.children_max=<number of workers> events are fully serialized if set to 1
1602 * udev.exec_delay=<number of seconds> delay execution of every executed program
1603 * udev.event_timeout=<number of seconds> seconds to wait before terminating an event
95ac5230 1604 * udev.blockdev_read_only<=bool> mark all block devices read-only when they appear
e6f86cac 1605 */
96287a49 1606static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
95ac5230 1607 int r;
e6f86cac 1608
614a823c 1609 assert(key);
e6f86cac 1610
64a3494c
FB
1611 if (proc_cmdline_key_streq(key, "udev.log_level") ||
1612 proc_cmdline_key_streq(key, "udev.log_priority")) { /* kept for backward compatibility */
1d84ad94
LP
1613
1614 if (proc_cmdline_value_missing(key, value))
1615 return 0;
1616
46f0fbd8 1617 r = log_level_from_string(value);
92e72467
ZJS
1618 if (r >= 0)
1619 log_set_max_level(r);
1d84ad94
LP
1620
1621 } else if (proc_cmdline_key_streq(key, "udev.event_timeout")) {
1622
1623 if (proc_cmdline_value_missing(key, value))
1624 return 0;
1625
9d9264ba 1626 r = parse_sec(value, &arg_event_timeout_usec);
1d84ad94
LP
1627
1628 } else if (proc_cmdline_key_streq(key, "udev.children_max")) {
1629
1630 if (proc_cmdline_value_missing(key, value))
1631 return 0;
1632
020328e1 1633 r = safe_atou(value, &arg_children_max);
1d84ad94
LP
1634
1635 } else if (proc_cmdline_key_streq(key, "udev.exec_delay")) {
1636
1637 if (proc_cmdline_value_missing(key, value))
1638 return 0;
1639
6b92f429 1640 r = parse_sec(value, &arg_exec_delay_usec);
1d84ad94 1641
e2099267 1642 } else if (proc_cmdline_key_streq(key, "udev.timeout_signal")) {
95ac5230 1643
e2099267
MS
1644 if (proc_cmdline_value_missing(key, value))
1645 return 0;
1646
1647 r = signal_from_string(value);
1648 if (r > 0)
1649 arg_timeout_signal = r;
95ac5230
LP
1650
1651 } else if (proc_cmdline_key_streq(key, "udev.blockdev_read_only")) {
1652
1653 if (!value)
1654 arg_blockdev_read_only = true;
1655 else {
1656 r = parse_boolean(value);
1657 if (r < 0)
1658 log_warning_errno(r, "Failed to parse udev.blockdev-read-only argument, ignoring: %s", value);
1659 else
1660 arg_blockdev_read_only = r;
1661 }
1662
1663 if (arg_blockdev_read_only)
1664 log_notice("All physical block devices will be marked read-only.");
1665
1666 return 0;
1667
1668 } else {
1669 if (startswith(key, "udev."))
1670 log_warning("Unknown udev kernel command line option \"%s\", ignoring.", key);
1671
1672 return 0;
1673 }
614a823c 1674
92e72467
ZJS
1675 if (r < 0)
1676 log_warning_errno(r, "Failed to parse \"%s=%s\", ignoring: %m", key, value);
1d84ad94 1677
614a823c 1678 return 0;
e6f86cac
KS
1679}
1680
37ec0fdd
LP
1681static int help(void) {
1682 _cleanup_free_ char *link = NULL;
1683 int r;
1684
1685 r = terminal_urlify_man("systemd-udevd.service", "8", &link);
1686 if (r < 0)
1687 return log_oom();
1688
ed216e1f 1689 printf("%s [OPTIONS...]\n\n"
d1109e12 1690 "Rule-based manager for device events and files.\n\n"
5ac0162c 1691 " -h --help Print this message\n"
2d19c17e
MF
1692 " -V --version Print version of the program\n"
1693 " -d --daemon Detach and run in the background\n"
1694 " -D --debug Enable debug output\n"
1695 " -c --children-max=INT Set maximum number of workers\n"
1696 " -e --exec-delay=SECONDS Seconds to wait before executing RUN=\n"
1697 " -t --event-timeout=SECONDS Seconds to wait before terminating an event\n"
1698 " -N --resolve-names=early|late|never\n"
5ac0162c 1699 " When to resolve users and groups\n"
bc556335
DDM
1700 "\nSee the %s for details.\n",
1701 program_invocation_short_name,
1702 link);
37ec0fdd
LP
1703
1704 return 0;
ed216e1f
TG
1705}
1706
bba7a484 1707static int parse_argv(int argc, char *argv[]) {
e2099267
MS
1708 enum {
1709 ARG_TIMEOUT_SIGNAL,
1710 };
1711
912541b0 1712 static const struct option options[] = {
e2099267
MS
1713 { "daemon", no_argument, NULL, 'd' },
1714 { "debug", no_argument, NULL, 'D' },
1715 { "children-max", required_argument, NULL, 'c' },
1716 { "exec-delay", required_argument, NULL, 'e' },
1717 { "event-timeout", required_argument, NULL, 't' },
1718 { "resolve-names", required_argument, NULL, 'N' },
1719 { "help", no_argument, NULL, 'h' },
1720 { "version", no_argument, NULL, 'V' },
1721 { "timeout-signal", required_argument, NULL, ARG_TIMEOUT_SIGNAL },
912541b0
KS
1722 {}
1723 };
689a97f5 1724
044497e2 1725 int c, r;
689a97f5 1726
bba7a484
TG
1727 assert(argc >= 0);
1728 assert(argv);
912541b0 1729
e14b6f21 1730 while ((c = getopt_long(argc, argv, "c:de:Dt:N:hV", options, NULL)) >= 0) {
bba7a484 1731 switch (c) {
912541b0 1732
912541b0 1733 case 'd':
bba7a484 1734 arg_daemonize = true;
912541b0
KS
1735 break;
1736 case 'c':
020328e1 1737 r = safe_atou(optarg, &arg_children_max);
6f5cf8a8 1738 if (r < 0)
389f9bf2 1739 log_warning_errno(r, "Failed to parse --children-max= value '%s', ignoring: %m", optarg);
912541b0
KS
1740 break;
1741 case 'e':
6b92f429 1742 r = parse_sec(optarg, &arg_exec_delay_usec);
6f5cf8a8 1743 if (r < 0)
6b92f429 1744 log_warning_errno(r, "Failed to parse --exec-delay= value '%s', ignoring: %m", optarg);
912541b0 1745 break;
e2099267
MS
1746 case ARG_TIMEOUT_SIGNAL:
1747 r = signal_from_string(optarg);
1748 if (r <= 0)
1749 log_warning_errno(r, "Failed to parse --timeout-signal= value '%s', ignoring: %m", optarg);
1750 else
1751 arg_timeout_signal = r;
1752
1753 break;
9719859c 1754 case 't':
9d9264ba 1755 r = parse_sec(optarg, &arg_event_timeout_usec);
f1e8664e 1756 if (r < 0)
9d9264ba 1757 log_warning_errno(r, "Failed to parse --event-timeout= value '%s', ignoring: %m", optarg);
9719859c 1758 break;
912541b0 1759 case 'D':
bba7a484 1760 arg_debug = true;
912541b0 1761 break;
c4d44cba
YW
1762 case 'N': {
1763 ResolveNameTiming t;
1764
1765 t = resolve_name_timing_from_string(optarg);
1766 if (t < 0)
1767 log_warning("Invalid --resolve-names= value '%s', ignoring.", optarg);
1768 else
1769 arg_resolve_name_timing = t;
912541b0 1770 break;
c4d44cba 1771 }
912541b0 1772 case 'h':
37ec0fdd 1773 return help();
912541b0 1774 case 'V':
681bd2c5 1775 printf("%s\n", GIT_VERSION);
bba7a484
TG
1776 return 0;
1777 case '?':
1778 return -EINVAL;
912541b0 1779 default:
04499a70 1780 assert_not_reached();
bba7a484 1781
912541b0
KS
1782 }
1783 }
1784
bba7a484
TG
1785 return 1;
1786}
1787
a1f4fd38
YW
1788static int create_subcgroup(char **ret) {
1789 _cleanup_free_ char *cgroup = NULL, *subcgroup = NULL;
1790 int r;
1791
1792 if (getppid() != 1)
1793 return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Not invoked by PID1.");
1794
1795 r = sd_booted();
1796 if (r < 0)
1797 return log_debug_errno(r, "Failed to check if systemd is running: %m");
1798 if (r == 0)
1799 return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "systemd is not running.");
1800
1801 /* Get our own cgroup, we regularly kill everything udev has left behind.
1802 * We only do this on systemd systems, and only if we are directly spawned
1803 * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
1804
1805 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
1806 if (r < 0) {
1807 if (IN_SET(r, -ENOENT, -ENOMEDIUM))
1808 return log_debug_errno(r, "Dedicated cgroup not found: %m");
1809 return log_debug_errno(r, "Failed to get cgroup: %m");
1810 }
1811
1812 r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, cgroup, "trusted.delegate");
00675c36 1813 if (r == 0 || (r < 0 && ERRNO_IS_XATTR_ABSENT(r)))
a1f4fd38
YW
1814 return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "The cgroup %s is not delegated to us.", cgroup);
1815 if (r < 0)
1816 return log_debug_errno(r, "Failed to read trusted.delegate attribute: %m");
1817
1818 /* We are invoked with our own delegated cgroup tree, let's move us one level down, so that we
1819 * don't collide with the "no processes in inner nodes" rule of cgroups, when the service
1820 * manager invokes the ExecReload= job in the .control/ subcgroup. */
1821
1822 subcgroup = path_join(cgroup, "/udev");
1823 if (!subcgroup)
1824 return log_oom_debug();
1825
1826 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup, 0);
1827 if (r < 0)
1828 return log_debug_errno(r, "Failed to create %s subcgroup: %m", subcgroup);
1829
1830 log_debug("Created %s subcgroup.", subcgroup);
1831 if (ret)
1832 *ret = TAKE_PTR(subcgroup);
1833 return 0;
1834}
1835
1836static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent) {
c0c6806b 1837 _cleanup_(manager_freep) Manager *manager = NULL;
a1f4fd38 1838 _cleanup_free_ char *cgroup = NULL;
b5af8c8c 1839 int r;
c0c6806b
TG
1840
1841 assert(ret);
1842
a1f4fd38
YW
1843 (void) create_subcgroup(&cgroup);
1844
6f19b42f 1845 manager = new(Manager, 1);
c0c6806b
TG
1846 if (!manager)
1847 return log_oom();
1848
6f19b42f 1849 *manager = (Manager) {
254d1313 1850 .inotify_fd = -EBADF,
6f19b42f 1851 .worker_watch = { -1, -1 },
a1f4fd38 1852 .cgroup = TAKE_PTR(cgroup),
6f19b42f 1853 };
e237d8cb 1854
100bc5bf
YW
1855 r = udev_ctrl_new_from_fd(&manager->ctrl, fd_ctrl);
1856 if (r < 0)
1857 return log_error_errno(r, "Failed to initialize udev control socket: %m");
e237d8cb 1858
b5af8c8c
ZJS
1859 r = udev_ctrl_enable_receiving(manager->ctrl);
1860 if (r < 0)
1861 return log_error_errno(r, "Failed to bind udev control socket: %m");
c4b69e99 1862
7f2e3a14
YW
1863 r = device_monitor_new_full(&manager->monitor, MONITOR_GROUP_KERNEL, fd_uevent);
1864 if (r < 0)
1865 return log_error_errno(r, "Failed to initialize device monitor: %m");
e237d8cb 1866
1ffadeaa
FB
1867 /* Bump receiver buffer, but only if we are not called via socket activation, as in that
1868 * case systemd sets the receive buffer size for us, and the value in the .socket unit
1869 * should take full effect. */
e77f52e5
YW
1870 if (fd_uevent < 0) {
1871 r = sd_device_monitor_set_receive_buffer_size(manager->monitor, 128 * 1024 * 1024);
1872 if (r < 0)
1873 log_warning_errno(r, "Failed to set receive buffer size for device monitor, ignoring: %m");
1874 }
c4b69e99 1875
f714ecd4
YW
1876 (void) sd_device_monitor_set_description(manager->monitor, "manager");
1877
b5af8c8c
ZJS
1878 r = device_monitor_enable_receiving(manager->monitor);
1879 if (r < 0)
1880 return log_error_errno(r, "Failed to bind netlink socket: %m");
1881
1a0bd015
YW
1882 manager->log_level = log_get_max_level();
1883
b5af8c8c
ZJS
1884 *ret = TAKE_PTR(manager);
1885
1886 return 0;
1887}
1888
1889static int main_loop(Manager *manager) {
d02c6f54 1890 int fd_worker, r;
b5af8c8c 1891
76e62a4d
YW
1892 manager->pid = getpid_cached();
1893
e237d8cb 1894 /* unnamed socket from workers to the main daemon */
618b3642 1895 r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, manager->worker_watch);
e237d8cb 1896 if (r < 0)
044497e2 1897 return log_error_errno(errno, "Failed to create socketpair for communicating with workers: %m");
e237d8cb 1898
693d371d 1899 fd_worker = manager->worker_watch[READ_END];
e237d8cb 1900
2ff48e98 1901 r = setsockopt_int(fd_worker, SOL_SOCKET, SO_PASSCRED, true);
e237d8cb 1902 if (r < 0)
044497e2 1903 return log_error_errno(r, "Failed to enable SO_PASSCRED: %m");
e237d8cb 1904
df7ee959
YW
1905 manager->inotify_fd = inotify_init1(IN_CLOEXEC);
1906 if (manager->inotify_fd < 0)
1907 return log_error_errno(errno, "Failed to create inotify descriptor: %m");
e237d8cb 1908
df7ee959 1909 udev_watch_restore(manager->inotify_fd);
e237d8cb
TG
1910
1911 /* block and listen to all signals on signalfd */
72c0a2c2 1912 assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, SIGHUP, SIGCHLD, -1) >= 0);
693d371d 1913
49f997f3
TG
1914 r = sd_event_default(&manager->event);
1915 if (r < 0)
044497e2 1916 return log_error_errno(r, "Failed to allocate event loop: %m");
49f997f3 1917
693d371d
TG
1918 r = sd_event_add_signal(manager->event, NULL, SIGINT, on_sigterm, manager);
1919 if (r < 0)
044497e2 1920 return log_error_errno(r, "Failed to create SIGINT event source: %m");
693d371d
TG
1921
1922 r = sd_event_add_signal(manager->event, NULL, SIGTERM, on_sigterm, manager);
1923 if (r < 0)
044497e2 1924 return log_error_errno(r, "Failed to create SIGTERM event source: %m");
693d371d
TG
1925
1926 r = sd_event_add_signal(manager->event, NULL, SIGHUP, on_sighup, manager);
1927 if (r < 0)
044497e2 1928 return log_error_errno(r, "Failed to create SIGHUP event source: %m");
693d371d 1929
693d371d
TG
1930 r = sd_event_set_watchdog(manager->event, true);
1931 if (r < 0)
044497e2 1932 return log_error_errno(r, "Failed to create watchdog event source: %m");
693d371d 1933
d02c6f54
YW
1934 r = udev_ctrl_attach_event(manager->ctrl, manager->event);
1935 if (r < 0)
1936 return log_error_errno(r, "Failed to attach event to udev control: %m");
b5af8c8c 1937
d02c6f54 1938 r = udev_ctrl_start(manager->ctrl, on_ctrl_msg, manager);
693d371d 1939 if (r < 0)
d02c6f54 1940 return log_error_errno(r, "Failed to start device monitor: %m");
693d371d
TG
1941
1942 /* This needs to be after the inotify and uevent handling, to make sure
1943 * that the ping is send back after fully processing the pending uevents
1944 * (including the synthetic ones we may create due to inotify events).
1945 */
d02c6f54 1946 r = sd_event_source_set_priority(udev_ctrl_get_event_source(manager->ctrl), SD_EVENT_PRIORITY_IDLE);
693d371d 1947 if (r < 0)
044497e2 1948 return log_error_errno(r, "Failed to set IDLE event priority for udev control event source: %m");
693d371d 1949
df7ee959 1950 r = sd_event_add_io(manager->event, &manager->inotify_event, manager->inotify_fd, EPOLLIN, on_inotify, manager);
693d371d 1951 if (r < 0)
044497e2 1952 return log_error_errno(r, "Failed to create inotify event source: %m");
693d371d 1953
f00d2b6d
YW
1954 r = sd_device_monitor_attach_event(manager->monitor, manager->event);
1955 if (r < 0)
1956 return log_error_errno(r, "Failed to attach event to device monitor: %m");
1957
1958 r = sd_device_monitor_start(manager->monitor, on_uevent, manager);
693d371d 1959 if (r < 0)
f00d2b6d
YW
1960 return log_error_errno(r, "Failed to start device monitor: %m");
1961
693d371d
TG
1962 r = sd_event_add_io(manager->event, NULL, fd_worker, EPOLLIN, on_worker, manager);
1963 if (r < 0)
044497e2 1964 return log_error_errno(r, "Failed to create worker event source: %m");
693d371d
TG
1965
1966 r = sd_event_add_post(manager->event, NULL, on_post, manager);
1967 if (r < 0)
044497e2 1968 return log_error_errno(r, "Failed to create post event source: %m");
e237d8cb 1969
e9d1fae3
YW
1970 manager->last_usec = now(CLOCK_MONOTONIC);
1971
b5af8c8c 1972 udev_builtin_init();
077fc5e2 1973
c238a1f5 1974 r = udev_rules_load(&manager->rules, arg_resolve_name_timing);
298316eb 1975 if (r < 0)
b5af8c8c 1976 return log_error_errno(r, "Failed to read udev rules: %m");
077fc5e2
DH
1977
1978 r = udev_rules_apply_static_dev_perms(manager->rules);
1979 if (r < 0)
6e40ed53 1980 log_warning_errno(r, "Failed to apply permissions on static device nodes, ignoring: %m");
077fc5e2 1981
4bf4f50f 1982 notify_ready();
077fc5e2
DH
1983
1984 r = sd_event_loop(manager->event);
44dcf454 1985 if (r < 0)
044497e2 1986 log_error_errno(r, "Event loop failed: %m");
077fc5e2 1987
077fc5e2
DH
1988 sd_notify(false,
1989 "STOPPING=1\n"
1990 "STATUS=Shutting down...");
077fc5e2
DH
1991 return r;
1992}
1993
63e2d171 1994int run_udevd(int argc, char *argv[]) {
b5af8c8c 1995 _cleanup_(manager_freep) Manager *manager = NULL;
254d1313 1996 int fd_ctrl = -EBADF, fd_uevent = -EBADF;
e5d7bce1 1997 int r;
bba7a484 1998
bba7a484 1999 log_set_target(LOG_TARGET_AUTO);
6b413782 2000 log_open();
e2099267 2001 udev_parse_config_full(&arg_children_max, &arg_exec_delay_usec, &arg_event_timeout_usec, &arg_resolve_name_timing, &arg_timeout_signal);
bba7a484 2002 log_parse_environment();
6b413782 2003 log_open(); /* Done again to update after reading configuration. */
bba7a484 2004
bba7a484
TG
2005 r = parse_argv(argc, argv);
2006 if (r <= 0)
0c5a109a 2007 return r;
bba7a484 2008
1d84ad94 2009 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
614a823c 2010 if (r < 0)
044497e2 2011 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
912541b0 2012
78d3e041
KS
2013 if (arg_debug) {
2014 log_set_target(LOG_TARGET_CONSOLE);
bba7a484 2015 log_set_max_level(LOG_DEBUG);
78d3e041 2016 }
bba7a484 2017
fba868fa
LP
2018 r = must_be_root();
2019 if (r < 0)
0c5a109a 2020 return r;
912541b0 2021
712cebf1 2022 if (arg_children_max == 0) {
fe56acd8 2023 unsigned long cpu_limit, mem_limit, cpu_count = 1;
d457ff83 2024
fe56acd8
LP
2025 r = cpus_in_affinity_mask();
2026 if (r < 0)
2027 log_warning_errno(r, "Failed to determine number of local CPUs, ignoring: %m");
2028 else
2029 cpu_count = r;
88bd5a32
FB
2030
2031 cpu_limit = cpu_count * 2 + 16;
2032 mem_limit = MAX(physical_memory() / (128UL*1024*1024), 10U);
912541b0 2033
88bd5a32
FB
2034 arg_children_max = MIN(cpu_limit, mem_limit);
2035 arg_children_max = MIN(WORKER_NUM_MAX, arg_children_max);
e438c57a 2036
044497e2 2037 log_debug("Set children_max to %u", arg_children_max);
d457ff83 2038 }
912541b0 2039
712cebf1 2040 /* set umask before creating any file/directory */
712cebf1 2041 umask(022);
912541b0 2042
c3dacc8b 2043 r = mac_selinux_init();
0c5a109a 2044 if (r < 0)
a9ba0e32 2045 return r;
912541b0 2046
3f692e2e 2047 r = RET_NERRNO(mkdir("/run/udev", 0755));
0c5a109a
ZJS
2048 if (r < 0 && r != -EEXIST)
2049 return log_error_errno(r, "Failed to create /run/udev: %m");
712cebf1 2050
b7f74dd4 2051 r = listen_fds(&fd_ctrl, &fd_uevent);
0c5a109a
ZJS
2052 if (r < 0)
2053 return log_error_errno(r, "Failed to listen on fds: %m");
b7f74dd4 2054
a1f4fd38 2055 r = manager_new(&manager, fd_ctrl, fd_uevent);
b5af8c8c
ZJS
2056 if (r < 0)
2057 return log_error_errno(r, "Failed to create manager: %m");
2058
bba7a484 2059 if (arg_daemonize) {
912541b0 2060 pid_t pid;
912541b0 2061
2751ad9c 2062 log_info("Starting systemd-udevd version " GIT_VERSION);
3cbb2057 2063
40e749b5 2064 /* connect /dev/null to stdin, stdout, stderr */
c76cf844
AK
2065 if (log_get_max_level() < LOG_DEBUG) {
2066 r = make_null_stdio();
2067 if (r < 0)
2068 log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
2069 }
2070
912541b0 2071 pid = fork();
0c5a109a
ZJS
2072 if (pid < 0)
2073 return log_error_errno(errno, "Failed to fork daemon: %m");
2074 if (pid > 0)
2075 /* parent */
2076 return 0;
912541b0 2077
0c5a109a 2078 /* child */
ece0fe12 2079 (void) setsid();
7500cd5e 2080 }
912541b0 2081
76e62a4d 2082 return main_loop(manager);
7fafc032 2083}