]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/udev/udevd.c
device-util: Declare iterator variables inline
[thirdparty/systemd.git] / src / udev / udevd.c
CommitLineData
f13467ec 1/* SPDX-License-Identifier: GPL-2.0-or-later */
7fafc032 2/*
810adae9
LP
3 * Copyright © 2004 Chris Friesen <chris_friesen@sympatico.ca>
4 * Copyright © 2009 Canonical Ltd.
5 * Copyright © 2009 Scott James Remnant <scott@netsplit.com>
7fafc032
KS
6 */
7
7fafc032 8#include <errno.h>
618234a5
LP
9#include <fcntl.h>
10#include <getopt.h>
618234a5
LP
11#include <stdbool.h>
12#include <stddef.h>
7fafc032
KS
13#include <stdio.h>
14#include <stdlib.h>
618234a5 15#include <sys/epoll.h>
3ebdb81e 16#include <sys/file.h>
618234a5
LP
17#include <sys/inotify.h>
18#include <sys/ioctl.h>
19#include <sys/mount.h>
1e03b754 20#include <sys/prctl.h>
1e03b754 21#include <sys/signalfd.h>
dc117daa 22#include <sys/stat.h>
618234a5
LP
23#include <sys/time.h>
24#include <sys/wait.h>
25#include <unistd.h>
7fafc032 26
392ef7a2 27#include "sd-daemon.h"
693d371d 28#include "sd-event.h"
8314de1d 29
b5efdb8a 30#include "alloc-util.h"
9409174e 31#include "blockdev-util.h"
a1f4fd38 32#include "cgroup-setup.h"
194bbe33 33#include "cgroup-util.h"
79d905af 34#include "common-signal.h"
618234a5 35#include "cpu-set-util.h"
f8433153 36#include "daemon-util.h"
5ba2dc25 37#include "dev-setup.h"
7f2e3a14 38#include "device-monitor-private.h"
abde5ea8 39#include "device-private.h"
70068602 40#include "device-util.h"
6467bda5 41#include "errno-list.h"
6d63048a 42#include "event-util.h"
3ffd4af2 43#include "fd-util.h"
a5c32cff 44#include "fileio.h"
f97b34a6 45#include "format-util.h"
f4f15635 46#include "fs-util.h"
a505965d 47#include "hashmap.h"
9e5fd717 48#include "inotify-util.h"
c004493c 49#include "io-util.h"
eefc66aa 50#include "limits-util.h"
40a57716 51#include "list.h"
0c5a109a 52#include "main-func.h"
5ea78a39 53#include "mkdir.h"
618234a5 54#include "netlink-util.h"
6bedfcbb 55#include "parse-util.h"
a1f4fd38 56#include "path-util.h"
294bf0c3 57#include "pretty-print.h"
4e731273 58#include "proc-cmdline.h"
618234a5
LP
59#include "process-util.h"
60#include "selinux-util.h"
61#include "signal-util.h"
8f328d36 62#include "socket-util.h"
07630cea 63#include "string-util.h"
49fe5c09 64#include "strv.h"
5ea78a39 65#include "strxcpyx.h"
46f0fbd8 66#include "syslog-util.h"
63e2d171 67#include "udevd.h"
07a26e42 68#include "udev-builtin.h"
7d68eb1b 69#include "udev-ctrl.h"
25de7aa7 70#include "udev-event.h"
10551728 71#include "udev-node.h"
618234a5 72#include "udev-util.h"
70068602 73#include "udev-watch.h"
ee104e11 74#include "user-util.h"
47350c5f 75#include "version.h"
7fafc032 76
88bd5a32 77#define WORKER_NUM_MAX 2048U
5d354e52
YW
78#define EVENT_RETRY_INTERVAL_USEC (200 * USEC_PER_MSEC)
79#define EVENT_RETRY_TIMEOUT_USEC (3 * USEC_PER_MINUTE)
88bd5a32 80
bba7a484
TG
81static bool arg_debug = false;
82static int arg_daemonize = false;
c4d44cba 83static ResolveNameTiming arg_resolve_name_timing = RESOLVE_NAME_EARLY;
216e8bbe 84static unsigned arg_children_max = 0;
6b92f429 85static usec_t arg_exec_delay_usec = 0;
bba7a484 86static usec_t arg_event_timeout_usec = 180 * USEC_PER_SEC;
e2099267 87static int arg_timeout_signal = SIGKILL;
95ac5230 88static bool arg_blockdev_read_only = false;
c0c6806b 89
d9239923
YW
90typedef struct Event Event;
91typedef struct Worker Worker;
92
c0c6806b 93typedef struct Manager {
693d371d 94 sd_event *event;
c0c6806b 95 Hashmap *workers;
d9239923 96 LIST_HEAD(Event, events);
a1f4fd38 97 char *cgroup;
cb49a4f2 98 pid_t pid; /* the process that originally allocated the manager object */
1a0bd015 99 int log_level;
c0c6806b 100
9a07157d 101 UdevRules *rules;
9b5150b6 102 Hashmap *properties;
c0c6806b 103
0bed242c
YW
104 sd_netlink *rtnl;
105
7f2e3a14 106 sd_device_monitor *monitor;
e0d61dac 107 UdevCtrl *ctrl;
e237d8cb
TG
108 int worker_watch[2];
109
df7ee959
YW
110 /* used by udev-watch */
111 int inotify_fd;
693d371d 112 sd_event_source *inotify_event;
df7ee959 113
eca195ec 114 sd_event_source *kill_workers_event;
693d371d 115
79d905af
LP
116 sd_event_source *memory_pressure_event_source;
117 sd_event_source *sigrtmin18_event_source;
118
7c4c7e89
TG
119 usec_t last_usec;
120
10551728 121 bool udev_node_needs_cleanup;
481f24d1
YW
122 bool stop_exec_queue;
123 bool exit;
c0c6806b 124} Manager;
1e03b754 125
d9239923 126typedef enum EventState {
912541b0
KS
127 EVENT_UNDEF,
128 EVENT_QUEUED,
129 EVENT_RUNNING,
d9239923 130} EventState;
1e03b754 131
d9239923 132typedef struct Event {
cb49a4f2 133 Manager *manager;
d9239923
YW
134 Worker *worker;
135 EventState state;
eb546b35
YW
136
137 sd_device *dev;
eb546b35 138
0c3d8182 139 sd_device_action_t action;
eb546b35 140 uint64_t seqnum;
a1fa99d8 141 uint64_t blocker_seqnum;
29d02458
YW
142 const char *id;
143 const char *devpath;
144 const char *devpath_old;
34458dbb 145 const char *devnode;
4f294ffd
YW
146
147 /* Used when the device is locked by another program. */
5d354e52
YW
148 usec_t retry_again_next_usec;
149 usec_t retry_again_timeout_usec;
4f294ffd 150 sd_event_source *retry_event_source;
d8f462b4
YW
151
152 sd_event_source *timeout_warning_event;
153 sd_event_source *timeout_event;
eb546b35 154
d9239923
YW
155 LIST_FIELDS(Event, event);
156} Event;
1e03b754 157
d9239923 158typedef enum WorkerState {
912541b0
KS
159 WORKER_UNDEF,
160 WORKER_RUNNING,
161 WORKER_IDLE,
162 WORKER_KILLED,
f257a8fc 163 WORKER_KILLING,
d9239923 164} WorkerState;
1e03b754 165
d9239923 166typedef struct Worker {
c0c6806b 167 Manager *manager;
912541b0 168 pid_t pid;
82e0b631 169 sd_event_source *child_event_source;
7f2e3a14 170 sd_device_monitor *monitor;
d9239923
YW
171 WorkerState state;
172 Event *event;
173} Worker;
1e03b754
KS
174
175/* passed from worker to main process */
5d354e52 176typedef enum EventResult {
6467bda5
YW
177 EVENT_RESULT_NERRNO_MIN = -ERRNO_MAX,
178 EVENT_RESULT_NERRNO_MAX = -1,
a79cba33 179 EVENT_RESULT_SUCCESS = 0,
6467bda5
YW
180 EVENT_RESULT_EXIT_STATUS_BASE = 0,
181 EVENT_RESULT_EXIT_STATUS_MAX = 255,
182 EVENT_RESULT_TRY_AGAIN = 256, /* when the block device is locked by another process. */
183 EVENT_RESULT_SIGNAL_BASE = 257,
184 EVENT_RESULT_SIGNAL_MAX = EVENT_RESULT_SIGNAL_BASE + _NSIG,
5d354e52 185 _EVENT_RESULT_MAX,
6467bda5 186 _EVENT_RESULT_INVALID = -EINVAL,
5d354e52 187} EventResult;
1e03b754 188
5393c528 189static Event *event_free(Event *event) {
c6aa11f2 190 if (!event)
5393c528 191 return NULL;
ba47b71c 192
40a57716 193 assert(event->manager);
c6aa11f2 194
40a57716 195 LIST_REMOVE(event, event->manager->events, event);
eb546b35 196 sd_device_unref(event->dev);
c6aa11f2 197
72151060
YW
198 /* Do not use sd_event_source_disable_unref() here, as this is called by both workers and the
199 * main process. */
4f294ffd 200 sd_event_source_unref(event->retry_event_source);
72151060
YW
201 sd_event_source_unref(event->timeout_warning_event);
202 sd_event_source_unref(event->timeout_event);
693d371d 203
c6aa11f2
TG
204 if (event->worker)
205 event->worker->event = NULL;
206
5393c528 207 return mfree(event);
aa8734ff 208}
7a770250 209
419ec631 210static void event_queue_cleanup(Manager *manager, EventState match_state) {
80a226b2 211 LIST_FOREACH(event, event, manager->events) {
419ec631
YW
212 if (match_state != EVENT_UNDEF && match_state != event->state)
213 continue;
214
215 event_free(event);
216 }
217}
218
d9239923 219static Worker *worker_free(Worker *worker) {
c6aa11f2 220 if (!worker)
75db809a 221 return NULL;
bc113de9 222
82e0b631
YW
223 if (worker->manager)
224 hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
c0c6806b 225
82e0b631 226 sd_event_source_unref(worker->child_event_source);
7f2e3a14 227 sd_device_monitor_unref(worker->monitor);
c6aa11f2
TG
228 event_free(worker->event);
229
75db809a 230 return mfree(worker);
ff2c503d
KS
231}
232
d9239923
YW
233DEFINE_TRIVIAL_CLEANUP_FUNC(Worker*, worker_free);
234DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(worker_hash_op, void, trivial_hash_func, trivial_compare_func, Worker, worker_free);
fc465079 235
419ec631
YW
236static void manager_clear_for_worker(Manager *manager) {
237 assert(manager);
238
f777e745
YW
239 /* Do not use sd_event_source_disable_unref() here, as this is called by both workers and the
240 * main process. */
241 manager->inotify_event = sd_event_source_unref(manager->inotify_event);
242 manager->kill_workers_event = sd_event_source_unref(manager->kill_workers_event);
419ec631
YW
243
244 manager->event = sd_event_unref(manager->event);
245
246 manager->workers = hashmap_free(manager->workers);
247 event_queue_cleanup(manager, EVENT_UNDEF);
248
249 manager->monitor = sd_device_monitor_unref(manager->monitor);
250 manager->ctrl = udev_ctrl_unref(manager->ctrl);
251
252 manager->worker_watch[READ_END] = safe_close(manager->worker_watch[READ_END]);
253}
254
255static Manager* manager_free(Manager *manager) {
256 if (!manager)
257 return NULL;
258
259 udev_builtin_exit();
260
419ec631
YW
261 manager_clear_for_worker(manager);
262
263 sd_netlink_unref(manager->rtnl);
264
265 hashmap_free_free_free(manager->properties);
266 udev_rules_free(manager->rules);
267
268 safe_close(manager->inotify_fd);
269 safe_close_pair(manager->worker_watch);
270
79d905af
LP
271 sd_event_source_unref(manager->memory_pressure_event_source);
272 sd_event_source_unref(manager->sigrtmin18_event_source);
273
a1f4fd38 274 free(manager->cgroup);
419ec631
YW
275 return mfree(manager);
276}
277
278DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
279
82e0b631
YW
280static int on_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata);
281
d9239923
YW
282static int worker_new(Worker **ret, Manager *manager, sd_device_monitor *worker_monitor, pid_t pid) {
283 _cleanup_(worker_freep) Worker *worker = NULL;
a505965d 284 int r;
3a19b32a
TG
285
286 assert(ret);
c0c6806b 287 assert(manager);
3a19b32a
TG
288 assert(worker_monitor);
289 assert(pid > 1);
290
d4053464
YW
291 /* close monitor, but keep address around */
292 device_monitor_disconnect(worker_monitor);
293
d9239923 294 worker = new(Worker, 1);
3a19b32a
TG
295 if (!worker)
296 return -ENOMEM;
297
d9239923 298 *worker = (Worker) {
d4053464
YW
299 .monitor = sd_device_monitor_ref(worker_monitor),
300 .pid = pid,
301 };
a505965d 302
82e0b631
YW
303 r = sd_event_add_child(manager->event, &worker->child_event_source, pid, WEXITED, on_sigchld, worker);
304 if (r < 0)
305 return r;
306
92a74c47 307 r = hashmap_ensure_put(&manager->workers, &worker_hash_op, PID_TO_PTR(pid), worker);
a505965d
TG
308 if (r < 0)
309 return r;
310
82e0b631 311 worker->manager = manager;
3a19b32a 312
82e0b631 313 *ret = TAKE_PTR(worker);
3a19b32a
TG
314 return 0;
315}
316
419ec631
YW
317static void manager_kill_workers(Manager *manager, bool force) {
318 Worker *worker;
4fa4d885 319
419ec631 320 assert(manager);
4fa4d885 321
419ec631
YW
322 HASHMAP_FOREACH(worker, manager->workers) {
323 if (worker->state == WORKER_KILLED)
324 continue;
4fa4d885 325
419ec631
YW
326 if (worker->state == WORKER_RUNNING && !force) {
327 worker->state = WORKER_KILLING;
328 continue;
329 }
4fa4d885 330
419ec631
YW
331 worker->state = WORKER_KILLED;
332 (void) kill(worker->pid, SIGTERM);
333 }
4fa4d885
TG
334}
335
419ec631
YW
336static void manager_exit(Manager *manager) {
337 assert(manager);
693d371d 338
419ec631 339 manager->exit = true;
693d371d 340
f8433153 341 (void) sd_notify(/* unset= */ false, NOTIFY_STOPPING);
39c19cf1 342
419ec631
YW
343 /* close sources of new events and discard buffered events */
344 manager->ctrl = udev_ctrl_unref(manager->ctrl);
0bed242c 345
9612da36 346 manager->inotify_event = sd_event_source_disable_unref(manager->inotify_event);
419ec631 347 manager->inotify_fd = safe_close(manager->inotify_fd);
0bed242c 348
7f2e3a14 349 manager->monitor = sd_device_monitor_unref(manager->monitor);
0bed242c 350
419ec631
YW
351 /* discard queued events and kill workers */
352 event_queue_cleanup(manager, EVENT_QUEUED);
353 manager_kill_workers(manager, true);
0bed242c
YW
354}
355
4bf4f50f
ZJS
356static void notify_ready(void) {
357 int r;
358
f8433153 359 r = sd_notifyf(/* unset= */ false,
4bf4f50f
ZJS
360 "READY=1\n"
361 "STATUS=Processing with %u children at max", arg_children_max);
362 if (r < 0)
363 log_warning_errno(r, "Failed to send readiness notification, ignoring: %m");
364}
365
419ec631 366/* reload requested, HUP signal received, rules changed, builtin changed */
e9d1fae3
YW
367static void manager_reload(Manager *manager, bool force) {
368 _cleanup_(udev_rules_freep) UdevRules *rules = NULL;
369 usec_t now_usec;
370 int r;
371
419ec631 372 assert(manager);
b2d21d93 373
e9d1fae3
YW
374 assert_se(sd_event_now(manager->event, CLOCK_MONOTONIC, &now_usec) >= 0);
375 if (!force && now_usec < usec_add(manager->last_usec, 3 * USEC_PER_SEC))
376 /* check for changed config, every 3 seconds at most */
377 return;
378 manager->last_usec = now_usec;
379
380 /* Reload SELinux label database, to make the child inherit the up-to-date database. */
381 mac_selinux_maybe_reload();
382
383 /* Nothing changed. It is not necessary to reload. */
f8433153 384 if (!udev_rules_should_reload(manager->rules) && !udev_builtin_should_reload()) {
e9d1fae3 385
f8433153
LP
386 if (!force)
387 return;
76e62a4d 388
f8433153
LP
389 /* If we eat this up, then tell our service manager to just continue */
390 (void) sd_notifyf(/* unset= */ false,
391 "RELOADING=1\n"
392 "STATUS=Skipping configuration reloading, nothing changed.\n"
393 "MONOTONIC_USEC=" USEC_FMT, now(CLOCK_MONOTONIC));
394 } else {
395 (void) sd_notifyf(/* unset= */ false,
396 "RELOADING=1\n"
397 "STATUS=Flushing configuration...\n"
398 "MONOTONIC_USEC=" USEC_FMT, now(CLOCK_MONOTONIC));
e9d1fae3 399
f8433153 400 manager_kill_workers(manager, false);
e9d1fae3 401
f8433153
LP
402 udev_builtin_exit();
403 udev_builtin_init();
404
405 r = udev_rules_load(&rules, arg_resolve_name_timing);
406 if (r < 0)
407 log_warning_errno(r, "Failed to read udev rules, using the previously loaded rules, ignoring: %m");
408 else
409 udev_rules_free_and_replace(manager->rules, rules);
410 }
693d371d 411
4bf4f50f 412 notify_ready();
419ec631 413}
e237d8cb 414
419ec631 415static int on_kill_workers_event(sd_event_source *s, uint64_t usec, void *userdata) {
99534007 416 Manager *manager = ASSERT_PTR(userdata);
e237d8cb 417
419ec631
YW
418 log_debug("Cleanup idle workers");
419 manager_kill_workers(manager, false);
e237d8cb 420
419ec631
YW
421 return 1;
422}
e237d8cb 423
a79cba33 424static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, EventResult result) {
c17ab900
YW
425 int r;
426
427 assert(dev);
428
429 /* On exit, manager->monitor is already NULL. */
430 if (!monitor)
431 return;
432
a79cba33 433 if (result != EVENT_RESULT_SUCCESS) {
6467bda5
YW
434 (void) device_add_property(dev, "UDEV_WORKER_FAILED", "1");
435
436 switch (result) {
fb544617
YW
437 case EVENT_RESULT_NERRNO_MIN ... EVENT_RESULT_NERRNO_MAX: {
438 const char *str;
439
6467bda5 440 (void) device_add_propertyf(dev, "UDEV_WORKER_ERRNO", "%i", -result);
6467bda5 441
fb544617
YW
442 str = errno_to_name(result);
443 if (str)
444 (void) device_add_property(dev, "UDEV_WORKER_ERRNO_NAME", str);
445 break;
446 }
6467bda5
YW
447 case EVENT_RESULT_EXIT_STATUS_BASE ... EVENT_RESULT_EXIT_STATUS_MAX:
448 (void) device_add_propertyf(dev, "UDEV_WORKER_EXIT_STATUS", "%i", result - EVENT_RESULT_EXIT_STATUS_BASE);
449 break;
450
451 case EVENT_RESULT_TRY_AGAIN:
452 assert_not_reached();
453 break;
454
fb544617
YW
455 case EVENT_RESULT_SIGNAL_BASE ... EVENT_RESULT_SIGNAL_MAX: {
456 const char *str;
457
6467bda5 458 (void) device_add_propertyf(dev, "UDEV_WORKER_SIGNAL", "%i", result - EVENT_RESULT_SIGNAL_BASE);
6467bda5 459
fb544617
YW
460 str = signal_to_string(result - EVENT_RESULT_SIGNAL_BASE);
461 if (str)
462 (void) device_add_property(dev, "UDEV_WORKER_SIGNAL_NAME", str);
463 break;
464 }
6467bda5
YW
465 default:
466 log_device_warning(dev, "Unknown event result \"%i\", ignoring.", result);
467 }
468 }
469
c17ab900
YW
470 r = device_monitor_send_device(monitor, NULL, dev);
471 if (r < 0)
472 log_device_warning_errno(dev, r,
473 "Failed to broadcast event to libudev listeners, ignoring: %m");
474}
475
a79cba33 476static int worker_send_result(Manager *manager, EventResult result) {
5d354e52
YW
477 assert(manager);
478 assert(manager->worker_watch[WRITE_END] >= 0);
9a73bd7c 479
5d354e52 480 return loop_write(manager->worker_watch[WRITE_END], &result, sizeof(result), false);
9a73bd7c
TG
481}
482
78e278ad 483static int device_get_whole_disk(sd_device *dev, sd_device **ret_device, const char **ret_devname) {
b97897e3
YW
484 const char *val;
485 int r;
fee854ee 486
0bed242c 487 assert(dev);
a1130022
LP
488
489 if (device_for_action(dev, SD_DEVICE_REMOVE))
7b7959fb 490 goto irrelevant;
0bed242c 491
b97897e3
YW
492 r = sd_device_get_sysname(dev, &val);
493 if (r < 0)
494 return log_device_debug_errno(dev, r, "Failed to get sysname: %m");
495
a9e83209
YW
496 /* Exclude the following devices:
497 * For "dm-", see the comment added by e918a1b5a94f270186dca59156354acd2a596494.
498 * For "md", see the commit message of 2e5b17d01347d3c3118be2b8ad63d20415dbb1f0,
499 * but not sure the assumption is still valid even when partitions are created on the md
500 * devices, surprisingly which seems to be possible, see PR #22973.
501 * For "drbd", see the commit message of fee854ee8ccde0cd28e0f925dea18cce35f3993d. */
49fe5c09 502 if (STARTSWITH_SET(val, "dm-", "md", "drbd"))
7b7959fb 503 goto irrelevant;
0bed242c 504
8f71534e
YW
505 r = block_device_get_whole_disk(dev, &dev);
506 if (IN_SET(r,
507 -ENOTBLK, /* The device is not a block device. */
508 -ENODEV /* The whole disk device was not found, it may already be removed. */))
509 goto irrelevant;
510 if (r < 0)
511 return log_device_debug_errno(dev, r, "Failed to get whole disk device: %m");
0bed242c 512
b97897e3 513 r = sd_device_get_devname(dev, &val);
b97897e3
YW
514 if (r < 0)
515 return log_device_debug_errno(dev, r, "Failed to get devname: %m");
0bed242c 516
78e278ad
YW
517 if (ret_device)
518 *ret_device = dev;
519 if (ret_devname)
520 *ret_devname = val;
7b7959fb
YW
521 return 1;
522
523irrelevant:
78e278ad
YW
524 if (ret_device)
525 *ret_device = NULL;
526 if (ret_devname)
527 *ret_devname = NULL;
7b7959fb
YW
528 return 0;
529}
530
03a24ce7 531static int worker_lock_whole_disk(sd_device *dev, int *ret_fd) {
254d1313 532 _cleanup_close_ int fd = -EBADF;
78e278ad 533 sd_device *dev_whole_disk;
7b7959fb
YW
534 const char *val;
535 int r;
536
537 assert(dev);
538 assert(ret_fd);
539
540 /* Take a shared lock on the device node; this establishes a concept of device "ownership" to
541 * serialize device access. External processes holding an exclusive lock will cause udev to skip the
542 * event handling; in the case udev acquired the lock, the external process can block until udev has
543 * finished its event handling. */
544
78e278ad 545 r = device_get_whole_disk(dev, &dev_whole_disk, &val);
7b7959fb
YW
546 if (r < 0)
547 return r;
548 if (r == 0)
549 goto nolock;
550
32703bd1 551 fd = sd_device_open(dev_whole_disk, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
b97897e3 552 if (fd < 0) {
78e278ad 553 bool ignore = ERRNO_IS_DEVICE_ABSENT(fd);
ef400c38 554
78e278ad 555 log_device_debug_errno(dev, fd, "Failed to open '%s'%s: %m", val, ignore ? ", ignoring" : "");
7b7959fb 556 if (!ignore)
78e278ad 557 return fd;
7b7959fb
YW
558
559 goto nolock;
b97897e3 560 }
0bed242c
YW
561
562 if (flock(fd, LOCK_SH|LOCK_NB) < 0)
b97897e3 563 return log_device_debug_errno(dev, errno, "Failed to flock(%s): %m", val);
0bed242c
YW
564
565 *ret_fd = TAKE_FD(fd);
566 return 1;
7b7959fb
YW
567
568nolock:
254d1313 569 *ret_fd = -EBADF;
7b7959fb 570 return 0;
fee854ee
RK
571}
572
95ac5230 573static int worker_mark_block_device_read_only(sd_device *dev) {
254d1313 574 _cleanup_close_ int fd = -EBADF;
95ac5230
LP
575 const char *val;
576 int state = 1, r;
577
578 assert(dev);
579
580 if (!arg_blockdev_read_only)
581 return 0;
582
583 /* Do this only once, when the block device is new. If the device is later retriggered let's not
584 * toggle the bit again, so that people can boot up with full read-only mode and then unset the bit
585 * for specific devices only. */
a1130022 586 if (!device_for_action(dev, SD_DEVICE_ADD))
95ac5230
LP
587 return 0;
588
589 r = sd_device_get_subsystem(dev, &val);
590 if (r < 0)
591 return log_device_debug_errno(dev, r, "Failed to get subsystem: %m");
592
593 if (!streq(val, "block"))
594 return 0;
595
596 r = sd_device_get_sysname(dev, &val);
597 if (r < 0)
598 return log_device_debug_errno(dev, r, "Failed to get sysname: %m");
599
600 /* Exclude synthetic devices for now, this is supposed to be a safety feature to avoid modification
601 * of physical devices, and what sits on top of those doesn't really matter if we don't allow the
cb713f16 602 * underlying block devices to receive changes. */
95ac5230
LP
603 if (STARTSWITH_SET(val, "dm-", "md", "drbd", "loop", "nbd", "zram"))
604 return 0;
605
32703bd1 606 fd = sd_device_open(dev, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
95ac5230 607 if (fd < 0)
78e278ad 608 return log_device_debug_errno(dev, fd, "Failed to open '%s', ignoring: %m", val);
95ac5230
LP
609
610 if (ioctl(fd, BLKROSET, &state) < 0)
611 return log_device_warning_errno(dev, errno, "Failed to mark block device '%s' read-only: %m", val);
612
613 log_device_info(dev, "Successfully marked block device '%s' read-only.", val);
614 return 0;
615}
616
abde5ea8 617static int worker_process_device(Manager *manager, sd_device *dev) {
2e088715 618 _cleanup_(udev_event_freep) UdevEvent *udev_event = NULL;
254d1313 619 _cleanup_close_ int fd_lock = -EBADF;
0bed242c 620 int r;
912541b0 621
0bed242c
YW
622 assert(manager);
623 assert(dev);
624
b2d9e58f 625 log_device_uevent(dev, "Processing device");
abde5ea8 626
1a0bd015 627 udev_event = udev_event_new(dev, arg_exec_delay_usec, manager->rtnl, manager->log_level);
0bed242c
YW
628 if (!udev_event)
629 return -ENOMEM;
630
5d354e52
YW
631 /* If this is a block device and the device is locked currently via the BSD advisory locks,
632 * someone else is using it exclusively. We don't run our udev rules now to not interfere.
633 * Instead of processing the event, we requeue the event and will try again after a delay.
634 *
635 * The user-facing side of this: https://systemd.io/BLOCK_DEVICE_LOCKING */
03a24ce7 636 r = worker_lock_whole_disk(dev, &fd_lock);
6467bda5
YW
637 if (r == -EAGAIN)
638 return EVENT_RESULT_TRY_AGAIN;
b6aab8ef 639 if (r < 0)
0bed242c 640 return r;
912541b0 641
95ac5230
LP
642 (void) worker_mark_block_device_read_only(dev);
643
0bed242c 644 /* apply rules, create node, symlinks */
df7ee959
YW
645 r = udev_event_execute_rules(
646 udev_event,
647 manager->inotify_fd,
648 arg_event_timeout_usec,
649 arg_timeout_signal,
650 manager->properties,
651 manager->rules);
99058cd6
YW
652 if (r < 0)
653 return r;
654
e2099267 655 udev_event_execute_run(udev_event, arg_event_timeout_usec, arg_timeout_signal);
2dd9f98d 656
0bed242c
YW
657 if (!manager->rtnl)
658 /* in case rtnl was initialized */
659 manager->rtnl = sd_netlink_ref(udev_event->rtnl);
912541b0 660
04b25410
YW
661 if (udev_event->inotify_watch) {
662 r = udev_watch_begin(manager->inotify_fd, dev);
663 if (r < 0 && r != -ENOENT) /* The device may be already removed, ignore -ENOENT. */
664 log_device_warning_errno(dev, r, "Failed to add inotify watch, ignoring: %m");
665 }
912541b0 666
b2d9e58f 667 log_device_uevent(dev, "Device processed");
0bed242c
YW
668 return 0;
669}
912541b0 670
e2130348 671static int worker_device_monitor_handler(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
99534007 672 Manager *manager = ASSERT_PTR(userdata);
e2130348
YW
673 int r;
674
675 assert(dev);
e2130348
YW
676
677 r = worker_process_device(manager, dev);
6467bda5 678 if (r == EVENT_RESULT_TRY_AGAIN)
5d354e52 679 /* if we couldn't acquire the flock(), then requeue the event */
6467bda5
YW
680 log_device_debug(dev, "Block device is currently locked, requeueing the event.");
681 else {
682 if (r < 0)
683 log_device_warning_errno(dev, r, "Failed to process device, ignoring: %m");
e2130348 684
5abee64e 685 /* send processed event back to libudev listeners */
6467bda5
YW
686 device_broadcast(monitor, dev, r);
687 }
e2130348
YW
688
689 /* send udevd the result of the event execution */
6467bda5 690 r = worker_send_result(manager, r);
e2130348
YW
691 if (r < 0)
692 log_device_warning_errno(dev, r, "Failed to send signal to main daemon, ignoring: %m");
693
1a0bd015 694 /* Reset the log level, as it might be changed by "OPTIONS=log_level=". */
3cc6b14a 695 log_set_max_level(manager->log_level);
1a0bd015 696
e2130348
YW
697 return 1;
698}
699
7f2e3a14
YW
700static int worker_main(Manager *_manager, sd_device_monitor *monitor, sd_device *first_device) {
701 _cleanup_(sd_device_unrefp) sd_device *dev = first_device;
0bed242c 702 _cleanup_(manager_freep) Manager *manager = _manager;
44dcf454 703 int r;
145dae7e 704
0bed242c
YW
705 assert(manager);
706 assert(monitor);
707 assert(dev);
912541b0 708
e2130348
YW
709 assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, -1) >= 0);
710
711 /* Reset OOM score, we only protect the main daemon. */
712 r = set_oom_score_adjust(0);
713 if (r < 0)
714 log_debug_errno(r, "Failed to reset OOM score, ignoring: %m");
912541b0 715
7802194a 716 /* Clear unnecessary data in Manager object. */
e2130348 717 manager_clear_for_worker(manager);
3ebdb81e 718
e2130348
YW
719 r = sd_event_new(&manager->event);
720 if (r < 0)
721 return log_error_errno(r, "Failed to allocate event loop: %m");
912541b0 722
e2130348
YW
723 r = sd_event_add_signal(manager->event, NULL, SIGTERM, NULL, NULL);
724 if (r < 0)
725 return log_error_errno(r, "Failed to set SIGTERM event: %m");
4c83d994 726
e2130348
YW
727 r = sd_device_monitor_attach_event(monitor, manager->event);
728 if (r < 0)
729 return log_error_errno(r, "Failed to attach event loop to device monitor: %m");
912541b0 730
e2130348 731 r = sd_device_monitor_start(monitor, worker_device_monitor_handler, manager);
0bed242c 732 if (r < 0)
e2130348 733 return log_error_errno(r, "Failed to start device monitor: %m");
912541b0 734
e2130348
YW
735 /* Process first device */
736 (void) worker_device_monitor_handler(monitor, dev, manager);
912541b0 737
e2130348
YW
738 r = sd_event_loop(manager->event);
739 if (r < 0)
740 return log_error_errno(r, "Event loop failed: %m");
741
44dcf454 742 return 0;
0bed242c
YW
743}
744
419ec631 745static int on_event_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
99534007 746 Event *event = ASSERT_PTR(userdata);
419ec631 747
419ec631
YW
748 assert(event->worker);
749
750 kill_and_sigcont(event->worker->pid, arg_timeout_signal);
751 event->worker->state = WORKER_KILLED;
752
753 log_device_error(event->dev, "Worker ["PID_FMT"] processing SEQNUM=%"PRIu64" killed", event->worker->pid, event->seqnum);
754
755 return 1;
756}
757
758static int on_event_timeout_warning(sd_event_source *s, uint64_t usec, void *userdata) {
99534007 759 Event *event = ASSERT_PTR(userdata);
419ec631 760
419ec631
YW
761 assert(event->worker);
762
763 log_device_warning(event->dev, "Worker ["PID_FMT"] processing SEQNUM=%"PRIu64" is taking a long time", event->worker->pid, event->seqnum);
764
765 return 1;
766}
767
768static void worker_attach_event(Worker *worker, Event *event) {
769 sd_event *e;
770
771 assert(worker);
772 assert(worker->manager);
773 assert(event);
774 assert(!event->worker);
775 assert(!worker->event);
776
777 worker->state = WORKER_RUNNING;
778 worker->event = event;
779 event->state = EVENT_RUNNING;
780 event->worker = worker;
781
782 e = worker->manager->event;
783
784 (void) sd_event_add_time_relative(e, &event->timeout_warning_event, CLOCK_MONOTONIC,
785 udev_warn_timeout(arg_event_timeout_usec), USEC_PER_SEC,
786 on_event_timeout_warning, event);
787
788 (void) sd_event_add_time_relative(e, &event->timeout_event, CLOCK_MONOTONIC,
789 arg_event_timeout_usec, USEC_PER_SEC,
790 on_event_timeout, event);
791}
792
d9239923 793static int worker_spawn(Manager *manager, Event *event) {
7f2e3a14 794 _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *worker_monitor = NULL;
d9239923 795 Worker *worker;
0bed242c 796 pid_t pid;
7443654e 797 int r;
0bed242c
YW
798
799 /* listen for new events */
7f2e3a14
YW
800 r = device_monitor_new_full(&worker_monitor, MONITOR_GROUP_NONE, -1);
801 if (r < 0)
802 return r;
7443654e 803
f714ecd4
YW
804 (void) sd_device_monitor_set_description(worker_monitor, "worker");
805
0bed242c 806 /* allow the main daemon netlink address to send devices to the worker */
7f2e3a14
YW
807 r = device_monitor_allow_unicast_sender(worker_monitor, manager->monitor);
808 if (r < 0)
809 return log_error_errno(r, "Worker: Failed to set unicast sender: %m");
810
811 r = device_monitor_enable_receiving(worker_monitor);
0bed242c 812 if (r < 0)
7f2e3a14 813 return log_error_errno(r, "Worker: Failed to enable receiving of device: %m");
0bed242c 814
e955a7f4 815 r = safe_fork("(udev-worker)", FORK_DEATHSIG, &pid);
7443654e
YW
816 if (r < 0) {
817 event->state = EVENT_QUEUED;
818 return log_error_errno(r, "Failed to fork() worker: %m");
819 }
820 if (r == 0) {
19b761a0 821 DEVICE_TRACE_POINT(worker_spawned, event->dev, getpid_cached());
b428efa5 822
7443654e 823 /* Worker process */
eb546b35 824 r = worker_main(manager, worker_monitor, sd_device_ref(event->dev));
baa30fbc 825 log_close();
8b46c3fc 826 _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
912541b0 827 }
e03c7cc2 828
7443654e
YW
829 r = worker_new(&worker, manager, worker_monitor, pid);
830 if (r < 0)
831 return log_error_errno(r, "Failed to create worker object: %m");
e03c7cc2 832
7443654e 833 worker_attach_event(worker, event);
39c19cf1 834
eb546b35 835 log_device_debug(event->dev, "Worker ["PID_FMT"] is forked for processing SEQNUM=%"PRIu64".", pid, event->seqnum);
7443654e 836 return 0;
7fafc032
KS
837}
838
f2a5412b 839static int event_run(Event *event) {
5406c368 840 static bool log_children_max_reached = true;
f2a5412b 841 Manager *manager;
d9239923 842 Worker *worker;
7f2e3a14 843 int r;
912541b0 844
c0c6806b 845 assert(event);
f2a5412b 846 assert(event->manager);
c0c6806b 847
b2d9e58f 848 log_device_uevent(event->dev, "Device ready for processing");
7600dbb8 849
4f294ffd
YW
850 (void) event_source_disable(event->retry_event_source);
851
f2a5412b 852 manager = event->manager;
90e74a66 853 HASHMAP_FOREACH(worker, manager->workers) {
912541b0
KS
854 if (worker->state != WORKER_IDLE)
855 continue;
856
eb546b35 857 r = device_monitor_send_device(manager->monitor, worker->monitor, event->dev);
7f2e3a14 858 if (r < 0) {
eb546b35 859 log_device_error_errno(event->dev, r, "Worker ["PID_FMT"] did not accept message, killing the worker: %m",
7f2e3a14 860 worker->pid);
cb542e84 861 (void) kill(worker->pid, SIGKILL);
912541b0
KS
862 worker->state = WORKER_KILLED;
863 continue;
864 }
39c19cf1 865 worker_attach_event(worker, event);
f2a5412b 866 return 1; /* event is now processing. */
912541b0
KS
867 }
868
c0c6806b 869 if (hashmap_size(manager->workers) >= arg_children_max) {
5406c368
FB
870 /* Avoid spamming the debug logs if the limit is already reached and
871 * many events still need to be processed */
872 if (log_children_max_reached && arg_children_max > 1) {
044497e2 873 log_debug("Maximum number (%u) of children reached.", hashmap_size(manager->workers));
5406c368
FB
874 log_children_max_reached = false;
875 }
f2a5412b 876 return 0; /* no free worker */
912541b0
KS
877 }
878
5406c368
FB
879 /* Re-enable the debug message for the next batch of events */
880 log_children_max_reached = true;
881
912541b0 882 /* start new worker and pass initial device */
f2a5412b
YW
883 r = worker_spawn(manager, event);
884 if (r < 0)
885 return r;
886
887 return 1; /* event is now processing. */
1e03b754
KS
888}
889
a1fa99d8 890static int event_is_blocked(Event *event) {
03677889 891 Event *loop_event = NULL;
29d02458 892 int r;
eb546b35 893
a1fa99d8
YW
894 /* lookup event for identical, parent, child device */
895
044ac33c
YW
896 assert(event);
897 assert(event->manager);
898 assert(event->blocker_seqnum <= event->seqnum);
899
5d354e52
YW
900 if (event->retry_again_next_usec > 0) {
901 usec_t now_usec;
902
ba4e0427 903 r = sd_event_now(event->manager->event, CLOCK_BOOTTIME, &now_usec);
5d354e52
YW
904 if (r < 0)
905 return r;
906
400e3d21 907 if (event->retry_again_next_usec > now_usec)
5d354e52
YW
908 return true;
909 }
910
044ac33c
YW
911 if (event->blocker_seqnum == event->seqnum)
912 /* we have checked previously and no blocker found */
913 return false;
914
03677889
YW
915 LIST_FOREACH(event, e, event->manager->events) {
916 loop_event = e;
917
044ac33c
YW
918 /* we already found a later event, earlier cannot block us, no need to check again */
919 if (loop_event->seqnum < event->blocker_seqnum)
920 continue;
921
922 /* event we checked earlier still exists, no need to check again */
923 if (loop_event->seqnum == event->blocker_seqnum)
924 return true;
925
926 /* found ourself, no later event can block us */
927 if (loop_event->seqnum >= event->seqnum)
928 goto no_blocker;
929
930 /* found event we have not checked */
931 break;
932 }
933
934 assert(loop_event);
935 assert(loop_event->seqnum > event->blocker_seqnum &&
936 loop_event->seqnum < event->seqnum);
937
912541b0 938 /* check if queue contains events we depend on */
03677889 939 LIST_FOREACH(event, e, loop_event) {
03677889
YW
940 loop_event = e;
941
912541b0
KS
942 /* found ourself, no later event can block us */
943 if (loop_event->seqnum >= event->seqnum)
044ac33c 944 goto no_blocker;
912541b0 945
29d02458 946 if (streq_ptr(loop_event->id, event->id))
a1fa99d8 947 break;
912541b0 948
a1af9668
YW
949 if (devpath_conflict(event->devpath, loop_event->devpath) ||
950 devpath_conflict(event->devpath, loop_event->devpath_old) ||
951 devpath_conflict(event->devpath_old, loop_event->devpath))
a1fa99d8 952 break;
912541b0 953
34458dbb 954 if (event->devnode && streq_ptr(event->devnode, loop_event->devnode))
a1fa99d8 955 break;
912541b0
KS
956 }
957
a1fa99d8 958 assert(loop_event);
0bd0407e 959
7600dbb8
UKK
960 log_device_debug(event->dev, "SEQNUM=%" PRIu64 " blocked by SEQNUM=%" PRIu64,
961 event->seqnum, loop_event->seqnum);
962
a1fa99d8 963 event->blocker_seqnum = loop_event->seqnum;
0bd0407e 964 return true;
044ac33c
YW
965
966no_blocker:
967 event->blocker_seqnum = event->seqnum;
968 return false;
7fafc032
KS
969}
970
0744e74c 971static int event_queue_start(Manager *manager) {
0725c4b9 972 int r;
8ab44e3f 973
c0c6806b
TG
974 assert(manager);
975
64903d18 976 if (!manager->events || manager->exit || manager->stop_exec_queue)
0744e74c 977 return 0;
7c4c7e89 978
10551728
YW
979 /* To make the stack directory /run/udev/links cleaned up later. */
980 manager->udev_node_needs_cleanup = true;
981
0725c4b9
YW
982 r = event_source_disable(manager->kill_workers_event);
983 if (r < 0)
984 log_warning_errno(r, "Failed to disable event source for cleaning up idle workers, ignoring: %m");
eca195ec 985
e9d1fae3 986 manager_reload(manager, /* force = */ false);
c02fb804 987
80a226b2 988 LIST_FOREACH(event, event, manager->events) {
912541b0
KS
989 if (event->state != EVENT_QUEUED)
990 continue;
0bc74ea7 991
c6f78234
YW
992 /* do not start event if parent or child event is still running or queued */
993 r = event_is_blocked(event);
2d40f02e
YW
994 if (r > 0)
995 continue;
0c3d8182 996 if (r < 0)
c6f78234 997 log_device_warning_errno(event->dev, r,
2d40f02e
YW
998 "Failed to check dependencies for event (SEQNUM=%"PRIu64", ACTION=%s), "
999 "assuming there is no blocking event, ignoring: %m",
c6f78234 1000 event->seqnum,
0c3d8182 1001 strna(device_action_to_string(event->action)));
fc465079 1002
f2a5412b 1003 r = event_run(event);
2d40f02e 1004 if (r <= 0) /* 0 means there are no idle workers. Let's escape from the loop. */
f2a5412b 1005 return r;
912541b0 1006 }
0744e74c
YW
1007
1008 return 0;
1e03b754
KS
1009}
1010
4f294ffd
YW
1011static int on_event_retry(sd_event_source *s, uint64_t usec, void *userdata) {
1012 /* This does nothing. The on_post() callback will start the event if there exists an idle worker. */
1013 return 1;
1014}
1015
5d354e52
YW
1016static int event_requeue(Event *event) {
1017 usec_t now_usec;
1018 int r;
1019
1020 assert(event);
1021 assert(event->manager);
1022 assert(event->manager->event);
1023
1024 event->timeout_warning_event = sd_event_source_disable_unref(event->timeout_warning_event);
1025 event->timeout_event = sd_event_source_disable_unref(event->timeout_event);
1026
1027 /* add a short delay to suppress busy loop */
ba4e0427 1028 r = sd_event_now(event->manager->event, CLOCK_BOOTTIME, &now_usec);
5d354e52
YW
1029 if (r < 0)
1030 return log_device_warning_errno(event->dev, r,
1031 "Failed to get current time, "
1032 "skipping event (SEQNUM=%"PRIu64", ACTION=%s): %m",
1033 event->seqnum, strna(device_action_to_string(event->action)));
1034
1035 if (event->retry_again_timeout_usec > 0 && event->retry_again_timeout_usec <= now_usec)
1036 return log_device_warning_errno(event->dev, SYNTHETIC_ERRNO(ETIMEDOUT),
1037 "The underlying block device is locked by a process more than %s, "
1038 "skipping event (SEQNUM=%"PRIu64", ACTION=%s).",
1039 FORMAT_TIMESPAN(EVENT_RETRY_TIMEOUT_USEC, USEC_PER_MINUTE),
1040 event->seqnum, strna(device_action_to_string(event->action)));
1041
1042 event->retry_again_next_usec = usec_add(now_usec, EVENT_RETRY_INTERVAL_USEC);
1043 if (event->retry_again_timeout_usec == 0)
1044 event->retry_again_timeout_usec = usec_add(now_usec, EVENT_RETRY_TIMEOUT_USEC);
1045
4f294ffd
YW
1046 r = event_reset_time_relative(event->manager->event, &event->retry_event_source,
1047 CLOCK_MONOTONIC, EVENT_RETRY_INTERVAL_USEC, 0,
1048 on_event_retry, NULL,
1049 0, "retry-event", true);
1050 if (r < 0)
1051 return log_device_warning_errno(event->dev, r, "Failed to reset timer event source for retrying event, "
1052 "skipping event (SEQNUM=%"PRIu64", ACTION=%s): %m",
1053 event->seqnum, strna(device_action_to_string(event->action)));
1054
5d354e52
YW
1055 if (event->worker && event->worker->event == event)
1056 event->worker->event = NULL;
1057 event->worker = NULL;
1058
1059 event->state = EVENT_QUEUED;
1060 return 0;
1061}
1062
82a5de9f
YW
1063static int event_queue_assume_block_device_unlocked(Manager *manager, sd_device *dev) {
1064 const char *devname;
1065 int r;
1066
1067 /* When a new event for a block device is queued or we get an inotify event, assume that the
1068 * device is not locked anymore. The assumption may not be true, but that should not cause any
1069 * issues, as in that case events will be requeued soon. */
1070
78e278ad 1071 r = device_get_whole_disk(dev, NULL, &devname);
82a5de9f
YW
1072 if (r <= 0)
1073 return r;
1074
1075 LIST_FOREACH(event, event, manager->events) {
1076 const char *event_devname;
1077
1078 if (event->state != EVENT_QUEUED)
1079 continue;
1080
1081 if (event->retry_again_next_usec == 0)
1082 continue;
1083
78e278ad 1084 if (device_get_whole_disk(event->dev, NULL, &event_devname) <= 0)
82a5de9f
YW
1085 continue;
1086
1087 if (!streq(devname, event_devname))
1088 continue;
1089
1090 event->retry_again_next_usec = 0;
1091 }
1092
1093 return 0;
1094}
1095
419ec631 1096static int event_queue_insert(Manager *manager, sd_device *dev) {
34458dbb 1097 const char *devpath, *devpath_old = NULL, *id = NULL, *devnode = NULL;
0c3d8182 1098 sd_device_action_t action;
419ec631 1099 uint64_t seqnum;
c9473aaa 1100 Event *event;
419ec631 1101 int r;
ff2c503d 1102
419ec631
YW
1103 assert(manager);
1104 assert(dev);
ff2c503d 1105
419ec631
YW
1106 /* only one process can add events to the queue */
1107 assert(manager->pid == getpid_cached());
1108
1109 /* We only accepts devices received by device monitor. */
1110 r = sd_device_get_seqnum(dev, &seqnum);
1111 if (r < 0)
1112 return r;
1113
0c3d8182
YW
1114 r = sd_device_get_action(dev, &action);
1115 if (r < 0)
1116 return r;
1117
29d02458
YW
1118 r = sd_device_get_devpath(dev, &devpath);
1119 if (r < 0)
1120 return r;
1121
1122 r = sd_device_get_property_value(dev, "DEVPATH_OLD", &devpath_old);
1123 if (r < 0 && r != -ENOENT)
1124 return r;
1125
1126 r = device_get_device_id(dev, &id);
1127 if (r < 0 && r != -ENOENT)
1128 return r;
1129
34458dbb
YW
1130 r = sd_device_get_devname(dev, &devnode);
1131 if (r < 0 && r != -ENOENT)
1132 return r;
1133
419ec631
YW
1134 event = new(Event, 1);
1135 if (!event)
1136 return -ENOMEM;
1137
1138 *event = (Event) {
1139 .manager = manager,
1140 .dev = sd_device_ref(dev),
419ec631 1141 .seqnum = seqnum,
0c3d8182 1142 .action = action,
29d02458
YW
1143 .id = id,
1144 .devpath = devpath,
1145 .devpath_old = devpath_old,
34458dbb 1146 .devnode = devnode,
419ec631
YW
1147 .state = EVENT_QUEUED,
1148 };
1149
64903d18 1150 if (!manager->events) {
419ec631
YW
1151 r = touch("/run/udev/queue");
1152 if (r < 0)
6be97d67 1153 log_warning_errno(r, "Failed to touch /run/udev/queue, ignoring: %m");
912541b0 1154 }
419ec631
YW
1155
1156 LIST_APPEND(event, manager->events, event);
1157
1158 log_device_uevent(dev, "Device is queued");
1159
1160 return 0;
1161}
1162
1163static int on_uevent(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
99534007 1164 Manager *manager = ASSERT_PTR(userdata);
419ec631
YW
1165 int r;
1166
419ec631
YW
1167 DEVICE_TRACE_POINT(kernel_uevent_received, dev);
1168
1169 device_ensure_usec_initialized(dev, NULL);
1170
1171 r = event_queue_insert(manager, dev);
1172 if (r < 0) {
1173 log_device_error_errno(dev, r, "Failed to insert device into event queue: %m");
1174 return 1;
1175 }
1176
82a5de9f
YW
1177 (void) event_queue_assume_block_device_unlocked(manager, dev);
1178
419ec631 1179 return 1;
ff2c503d
KS
1180}
1181
e82e8fa5 1182static int on_worker(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
99534007 1183 Manager *manager = ASSERT_PTR(userdata);
c0c6806b 1184
912541b0 1185 for (;;) {
a79cba33 1186 EventResult result;
5d354e52 1187 struct iovec iovec = IOVEC_MAKE(&result, sizeof(result));
fb29cdbe 1188 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
979558f3
TG
1189 struct msghdr msghdr = {
1190 .msg_iov = &iovec,
1191 .msg_iovlen = 1,
1192 .msg_control = &control,
1193 .msg_controllen = sizeof(control),
1194 };
912541b0 1195 ssize_t size;
371d72e0 1196 struct ucred *ucred;
d9239923 1197 Worker *worker;
912541b0 1198
3691bcf3
LP
1199 size = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT);
1200 if (size == -EINTR)
1201 continue;
1202 if (size == -EAGAIN)
1203 /* nothing more to read */
1204 break;
1205 if (size < 0)
1206 return log_error_errno(size, "Failed to receive message: %m");
1207
1208 cmsg_close_all(&msghdr);
979558f3 1209
6467bda5 1210 if (size != sizeof(result)) {
d4e98880 1211 log_warning("Ignoring worker message with invalid size %zi bytes", size);
e82e8fa5 1212 continue;
979558f3
TG
1213 }
1214
371d72e0 1215 ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
979558f3 1216 if (!ucred || ucred->pid <= 0) {
d4e98880 1217 log_warning("Ignoring worker message without valid PID");
979558f3
TG
1218 continue;
1219 }
912541b0
KS
1220
1221 /* lookup worker who sent the signal */
4a0b58c4 1222 worker = hashmap_get(manager->workers, PID_TO_PTR(ucred->pid));
a505965d 1223 if (!worker) {
044497e2 1224 log_debug("Worker ["PID_FMT"] returned, but is no longer tracked", ucred->pid);
a505965d 1225 continue;
912541b0 1226 }
c0bbfd72 1227
f257a8fc
YG
1228 if (worker->state == WORKER_KILLING) {
1229 worker->state = WORKER_KILLED;
1230 (void) kill(worker->pid, SIGTERM);
1231 } else if (worker->state != WORKER_KILLED)
a505965d
TG
1232 worker->state = WORKER_IDLE;
1233
1234 /* worker returned */
5d354e52
YW
1235 if (result == EVENT_RESULT_TRY_AGAIN &&
1236 event_requeue(worker->event) < 0)
6467bda5 1237 device_broadcast(manager->monitor, worker->event->dev, -ETIMEDOUT);
5d354e52
YW
1238
1239 /* When event_requeue() succeeds, worker->event is NULL, and event_free() handles NULL gracefully. */
a505965d 1240 event_free(worker->event);
912541b0 1241 }
e82e8fa5
TG
1242
1243 return 1;
1244}
1245
3b47c739 1246/* receive the udevd message from userspace */
e0d61dac 1247static int on_ctrl_msg(UdevCtrl *uctrl, UdevCtrlMessageType type, const UdevCtrlMessageValue *value, void *userdata) {
99534007 1248 Manager *manager = ASSERT_PTR(userdata);
d02c6f54 1249 int r;
912541b0 1250
d02c6f54 1251 assert(value);
e4f66b77 1252
d02c6f54
YW
1253 switch (type) {
1254 case UDEV_CTRL_SET_LOG_LEVEL:
53c8590f
YW
1255 if ((value->intval & LOG_PRIMASK) != value->intval) {
1256 log_debug("Received invalid udev control message (SET_LOG_LEVEL, %i), ignoring.", value->intval);
1257 break;
1258 }
1259
64a3494c 1260 log_debug("Received udev control message (SET_LOG_LEVEL), setting log_level=%i", value->intval);
53c8590f
YW
1261
1262 r = log_get_max_level();
1263 if (r == value->intval)
1264 break;
1265
3cc6b14a 1266 log_set_max_level(value->intval);
1a0bd015 1267 manager->log_level = value->intval;
f257a8fc 1268 manager_kill_workers(manager, false);
d02c6f54
YW
1269 break;
1270 case UDEV_CTRL_STOP_EXEC_QUEUE:
044497e2 1271 log_debug("Received udev control message (STOP_EXEC_QUEUE)");
c0c6806b 1272 manager->stop_exec_queue = true;
d02c6f54
YW
1273 break;
1274 case UDEV_CTRL_START_EXEC_QUEUE:
044497e2 1275 log_debug("Received udev control message (START_EXEC_QUEUE)");
c0c6806b 1276 manager->stop_exec_queue = false;
d7bddfa1 1277 /* It is not necessary to call event_queue_start() here, as it will be called in on_post() if necessary. */
d02c6f54
YW
1278 break;
1279 case UDEV_CTRL_RELOAD:
044497e2 1280 log_debug("Received udev control message (RELOAD)");
e9d1fae3 1281 manager_reload(manager, /* force = */ true);
d02c6f54
YW
1282 break;
1283 case UDEV_CTRL_SET_ENV: {
d7ac0952
FS
1284 _unused_ _cleanup_free_ char *old_val = NULL;
1285 _cleanup_free_ char *key = NULL, *val = NULL, *old_key = NULL;
d02c6f54 1286 const char *eq;
9b5150b6 1287
d02c6f54 1288 eq = strchr(value->buf, '=');
9b5150b6 1289 if (!eq) {
d02c6f54 1290 log_error("Invalid key format '%s'", value->buf);
9b5150b6
YW
1291 return 1;
1292 }
1293
d02c6f54 1294 key = strndup(value->buf, eq - value->buf);
9b5150b6
YW
1295 if (!key) {
1296 log_oom();
1297 return 1;
1298 }
1299
1300 old_val = hashmap_remove2(manager->properties, key, (void **) &old_key);
1301
1302 r = hashmap_ensure_allocated(&manager->properties, &string_hash_ops);
1303 if (r < 0) {
1304 log_oom();
1305 return 1;
912541b0 1306 }
9b5150b6
YW
1307
1308 eq++;
f053fc33 1309 if (isempty(eq)) {
044497e2 1310 log_debug("Received udev control message (ENV), unsetting '%s'", key);
9b5150b6
YW
1311
1312 r = hashmap_put(manager->properties, key, NULL);
1313 if (r < 0) {
1314 log_oom();
1315 return 1;
1316 }
1317 } else {
1318 val = strdup(eq);
1319 if (!val) {
1320 log_oom();
1321 return 1;
1322 }
1323
044497e2 1324 log_debug("Received udev control message (ENV), setting '%s=%s'", key, val);
9b5150b6
YW
1325
1326 r = hashmap_put(manager->properties, key, val);
1327 if (r < 0) {
1328 log_oom();
1329 return 1;
1330 }
1331 }
1332
1333 key = val = NULL;
f257a8fc 1334 manager_kill_workers(manager, false);
d02c6f54 1335 break;
912541b0 1336 }
d02c6f54
YW
1337 case UDEV_CTRL_SET_CHILDREN_MAX:
1338 if (value->intval <= 0) {
1339 log_debug("Received invalid udev control message (SET_MAX_CHILDREN, %i), ignoring.", value->intval);
1340 return 0;
1341 }
912541b0 1342
d02c6f54
YW
1343 log_debug("Received udev control message (SET_MAX_CHILDREN), setting children_max=%i", value->intval);
1344 arg_children_max = value->intval;
1ef72b55 1345
4bf4f50f 1346 notify_ready();
d02c6f54
YW
1347 break;
1348 case UDEV_CTRL_PING:
d30f43ee 1349 log_debug("Received udev control message (PING)");
d02c6f54
YW
1350 break;
1351 case UDEV_CTRL_EXIT:
044497e2 1352 log_debug("Received udev control message (EXIT)");
62d43dac 1353 manager_exit(manager);
d02c6f54
YW
1354 break;
1355 default:
1356 log_debug("Received unknown udev control message, ignoring");
912541b0 1357 }
e4f66b77 1358
e82e8fa5 1359 return 1;
88f4b648 1360}
4a231017 1361
25677a05 1362static int synthesize_change_one(sd_device *dev, sd_device *target) {
0584b17a
ZJS
1363 int r;
1364
25677a05
YW
1365 if (DEBUG_LOGGING) {
1366 const char *syspath = NULL;
1367 (void) sd_device_get_syspath(target, &syspath);
1368 log_device_debug(dev, "device is closed, synthesising 'change' on %s", strna(syspath));
1369 }
1370
1371 r = sd_device_trigger(target, SD_DEVICE_CHANGE);
0584b17a 1372 if (r < 0)
25677a05
YW
1373 return log_device_debug_errno(target, r, "Failed to trigger 'change' uevent: %m");
1374
b428efa5
MS
1375 DEVICE_TRACE_POINT(synthetic_change_event, dev);
1376
0584b17a
ZJS
1377 return 0;
1378}
1379
70068602 1380static int synthesize_change(sd_device *dev) {
2f9d2317
YW
1381 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
1382 bool part_table_read;
1383 const char *sysname;
2f9d2317 1384 int r, k;
edd32000 1385
2f9d2317 1386 r = sd_device_get_sysname(dev, &sysname);
70068602
YW
1387 if (r < 0)
1388 return r;
1389
2f9d2317
YW
1390 if (startswith(sysname, "dm-") || block_device_is_whole_disk(dev) <= 0)
1391 return synthesize_change_one(dev, dev);
1392
1393 r = blockdev_reread_partition_table(dev);
70068602 1394 if (r < 0)
2f9d2317
YW
1395 log_device_debug_errno(dev, r, "Failed to re-read partition table, ignoring: %m");
1396 part_table_read = r >= 0;
70068602 1397
2f9d2317
YW
1398 /* search for partitions */
1399 r = partition_enumerator_new(dev, &e);
70068602
YW
1400 if (r < 0)
1401 return r;
1402
2f9d2317
YW
1403 /* We have partitions and re-read the table, the kernel already sent out a "change"
1404 * event for the disk, and "remove/add" for all partitions. */
1405 if (part_table_read && sd_device_enumerator_get_device_first(e))
1406 return 0;
e9fc29f4 1407
2f9d2317
YW
1408 /* We have partitions but re-reading the partition table did not work, synthesize
1409 * "change" for the disk and all partitions. */
1410 r = synthesize_change_one(dev, dev);
1411 FOREACH_DEVICE(e, d) {
1412 k = synthesize_change_one(dev, d);
1413 if (k < 0 && r >= 0)
1414 r = k;
1415 }
ede34445 1416
2f9d2317 1417 return r;
edd32000
KS
1418}
1419
e82e8fa5 1420static int on_inotify(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
ea8213dc 1421 Manager *manager = ASSERT_PTR(userdata);
0254e944 1422 union inotify_event_buffer buffer;
f7c1ad4f 1423 ssize_t l;
0725c4b9 1424 int r;
912541b0 1425
e82e8fa5 1426 l = read(fd, &buffer, sizeof(buffer));
f7c1ad4f 1427 if (l < 0) {
8add30a0 1428 if (ERRNO_IS_TRANSIENT(errno))
ea8213dc 1429 return 0;
912541b0 1430
f7c1ad4f 1431 return log_error_errno(errno, "Failed to read inotify fd: %m");
912541b0
KS
1432 }
1433
00adc340 1434 FOREACH_INOTIFY_EVENT_WARN(e, buffer, l) {
70068602
YW
1435 _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
1436 const char *devnode;
1437
cd66f972
YW
1438 /* Do not handle IN_IGNORED here. Especially, do not try to call udev_watch_end() from the
1439 * main process. Otherwise, the pair of the symlinks may become inconsistent, and several
1440 * garbage may remain. The old symlinks are removed by a worker that processes the
1441 * corresponding 'remove' uevent;
1442 * udev_event_execute_rules() -> event_execute_rules_on_remove() -> udev_watch_end(). */
1443
1444 if (!FLAGS_SET(e->mask, IN_CLOSE_WRITE))
1445 continue;
1446
e7f781e4
YW
1447 r = device_new_from_watch_handle(&dev, e->wd);
1448 if (r < 0) {
cd66f972 1449 /* Device may be removed just after closed. */
e7f781e4 1450 log_debug_errno(r, "Failed to create sd_device object from watch handle, ignoring: %m");
70068602 1451 continue;
e7f781e4 1452 }
912541b0 1453
cd66f972
YW
1454 r = sd_device_get_devname(dev, &devnode);
1455 if (r < 0) {
1456 /* Also here, device may be already removed. */
1457 log_device_debug_errno(dev, r, "Failed to get device node, ignoring: %m");
edd32000 1458 continue;
82a5de9f 1459 }
e7f781e4 1460
cd66f972
YW
1461 log_device_debug(dev, "Received inotify event for %s.", devnode);
1462
1463 (void) event_queue_assume_block_device_unlocked(manager, dev);
1464 (void) synthesize_change(dev);
912541b0
KS
1465 }
1466
ea8213dc 1467 return 0;
bd284db1
SJR
1468}
1469
0561329d 1470static int on_sigterm(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
99534007 1471 Manager *manager = ASSERT_PTR(userdata);
c0c6806b 1472
62d43dac 1473 manager_exit(manager);
912541b0 1474
e82e8fa5
TG
1475 return 1;
1476}
912541b0 1477
0561329d 1478static int on_sighup(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
99534007 1479 Manager *manager = ASSERT_PTR(userdata);
c0c6806b 1480
e9d1fae3 1481 manager_reload(manager, /* force = */ true);
912541b0 1482
e82e8fa5
TG
1483 return 1;
1484}
912541b0 1485
82e0b631
YW
1486static int on_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata) {
1487 Worker *worker = ASSERT_PTR(userdata);
1488 Manager *manager = ASSERT_PTR(worker->manager);
1489 sd_device *dev = worker->event ? ASSERT_PTR(worker->event->dev) : NULL;
1490 EventResult result;
c0c6806b 1491
82e0b631 1492 assert(si);
e82e8fa5 1493
82e0b631
YW
1494 switch (si->si_code) {
1495 case CLD_EXITED:
1496 if (si->si_status == 0)
1497 log_device_debug(dev, "Worker ["PID_FMT"] exited.", si->si_pid);
1498 else
1499 log_device_warning(dev, "Worker ["PID_FMT"] exited with return code %i.",
1500 si->si_pid, si->si_status);
1501 result = EVENT_RESULT_EXIT_STATUS_BASE + si->si_status;
1502 break;
e82e8fa5 1503
82e0b631
YW
1504 case CLD_KILLED:
1505 case CLD_DUMPED:
1506 log_device_warning(dev, "Worker ["PID_FMT"] terminated by signal %i (%s).",
1507 si->si_pid, si->si_status, signal_to_string(si->si_status));
1508 result = EVENT_RESULT_SIGNAL_BASE + si->si_status;
1509 break;
dc7faf2a 1510
82e0b631
YW
1511 default:
1512 assert_not_reached();
1513 }
dc7faf2a 1514
82e0b631
YW
1515 if (result != EVENT_RESULT_SUCCESS && dev) {
1516 /* delete state from disk */
1517 device_delete_db(dev);
1518 device_tag_index(dev, NULL, false);
e82e8fa5 1519
82e0b631
YW
1520 /* Forward kernel event to libudev listeners */
1521 device_broadcast(manager->monitor, dev, result);
912541b0 1522 }
e82e8fa5 1523
82e0b631
YW
1524 worker_free(worker);
1525
e82e8fa5 1526 return 1;
f27125f9 1527}
1528
693d371d 1529static int on_post(sd_event_source *s, void *userdata) {
99534007 1530 Manager *manager = ASSERT_PTR(userdata);
693d371d 1531
64903d18 1532 if (manager->events) {
5d354e52
YW
1533 /* Try to process pending events if idle workers exist. Why is this necessary?
1534 * When a worker finished an event and became idle, even if there was a pending event,
1535 * the corresponding device might have been locked and the processing of the event
1536 * delayed for a while, preventing the worker from processing the event immediately.
1537 * Now, the device may be unlocked. Let's try again! */
1538 event_queue_start(manager);
b6107f01 1539 return 1;
5d354e52 1540 }
b6107f01 1541
40293280
YW
1542 /* There are no queued events. Let's remove /run/udev/queue and clean up the idle processes. */
1543
1544 if (unlink("/run/udev/queue") < 0) {
1545 if (errno != ENOENT)
1546 log_warning_errno(errno, "Failed to unlink /run/udev/queue, ignoring: %m");
1547 } else
1548 log_debug("No events are queued, removing /run/udev/queue.");
b6107f01
YW
1549
1550 if (!hashmap_isempty(manager->workers)) {
1551 /* There are idle workers */
8e543eb8
YW
1552 (void) event_reset_time_relative(manager->event, &manager->kill_workers_event,
1553 CLOCK_MONOTONIC, 3 * USEC_PER_SEC, USEC_PER_SEC,
1554 on_kill_workers_event, manager,
1555 0, "kill-workers-event", false);
b6107f01 1556 return 1;
693d371d
TG
1557 }
1558
b6107f01
YW
1559 /* There are no idle workers. */
1560
10551728
YW
1561 if (manager->udev_node_needs_cleanup) {
1562 (void) udev_node_cleanup();
1563 manager->udev_node_needs_cleanup = false;
1564 }
1565
b6107f01
YW
1566 if (manager->exit)
1567 return sd_event_exit(manager->event, 0);
1568
42670846
YW
1569 if (manager->cgroup)
1570 /* cleanup possible left-over processes in our cgroup */
1571 (void) cg_kill(SYSTEMD_CGROUP_CONTROLLER, manager->cgroup, SIGKILL, CGROUP_IGNORE_SELF, NULL, NULL, NULL);
b6107f01 1572
693d371d
TG
1573 return 1;
1574}
1575
c4b69e99 1576static int listen_fds(int *ret_ctrl, int *ret_netlink) {
254d1313 1577 int ctrl_fd = -EBADF, netlink_fd = -EBADF;
c4b69e99 1578 int fd, n;
912541b0 1579
c4b69e99
YW
1580 assert(ret_ctrl);
1581 assert(ret_netlink);
fcff1e72 1582
912541b0 1583 n = sd_listen_fds(true);
fcff1e72
TG
1584 if (n < 0)
1585 return n;
912541b0
KS
1586
1587 for (fd = SD_LISTEN_FDS_START; fd < n + SD_LISTEN_FDS_START; fd++) {
618b3642 1588 if (sd_is_socket(fd, AF_UNIX, SOCK_SEQPACKET, -1) > 0) {
fcff1e72
TG
1589 if (ctrl_fd >= 0)
1590 return -EINVAL;
1591 ctrl_fd = fd;
912541b0
KS
1592 continue;
1593 }
1594
c52cff07 1595 if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
fcff1e72
TG
1596 if (netlink_fd >= 0)
1597 return -EINVAL;
1598 netlink_fd = fd;
912541b0
KS
1599 continue;
1600 }
1601
fcff1e72 1602 return -EINVAL;
912541b0
KS
1603 }
1604
c4b69e99
YW
1605 *ret_ctrl = ctrl_fd;
1606 *ret_netlink = netlink_fd;
912541b0 1607
912541b0 1608 return 0;
7459bcdc
KS
1609}
1610
e6f86cac 1611/*
3f85ef0f 1612 * read the kernel command line, in case we need to get into debug mode
64a3494c 1613 * udev.log_level=<level> syslog priority
1d84ad94
LP
1614 * udev.children_max=<number of workers> events are fully serialized if set to 1
1615 * udev.exec_delay=<number of seconds> delay execution of every executed program
1616 * udev.event_timeout=<number of seconds> seconds to wait before terminating an event
95ac5230 1617 * udev.blockdev_read_only<=bool> mark all block devices read-only when they appear
e6f86cac 1618 */
96287a49 1619static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
95ac5230 1620 int r;
e6f86cac 1621
614a823c 1622 assert(key);
e6f86cac 1623
64a3494c
FB
1624 if (proc_cmdline_key_streq(key, "udev.log_level") ||
1625 proc_cmdline_key_streq(key, "udev.log_priority")) { /* kept for backward compatibility */
1d84ad94
LP
1626
1627 if (proc_cmdline_value_missing(key, value))
1628 return 0;
1629
46f0fbd8 1630 r = log_level_from_string(value);
92e72467
ZJS
1631 if (r >= 0)
1632 log_set_max_level(r);
1d84ad94
LP
1633
1634 } else if (proc_cmdline_key_streq(key, "udev.event_timeout")) {
1635
1636 if (proc_cmdline_value_missing(key, value))
1637 return 0;
1638
9d9264ba 1639 r = parse_sec(value, &arg_event_timeout_usec);
1d84ad94
LP
1640
1641 } else if (proc_cmdline_key_streq(key, "udev.children_max")) {
1642
1643 if (proc_cmdline_value_missing(key, value))
1644 return 0;
1645
020328e1 1646 r = safe_atou(value, &arg_children_max);
1d84ad94
LP
1647
1648 } else if (proc_cmdline_key_streq(key, "udev.exec_delay")) {
1649
1650 if (proc_cmdline_value_missing(key, value))
1651 return 0;
1652
6b92f429 1653 r = parse_sec(value, &arg_exec_delay_usec);
1d84ad94 1654
e2099267 1655 } else if (proc_cmdline_key_streq(key, "udev.timeout_signal")) {
95ac5230 1656
e2099267
MS
1657 if (proc_cmdline_value_missing(key, value))
1658 return 0;
1659
1660 r = signal_from_string(value);
1661 if (r > 0)
1662 arg_timeout_signal = r;
95ac5230
LP
1663
1664 } else if (proc_cmdline_key_streq(key, "udev.blockdev_read_only")) {
1665
1666 if (!value)
1667 arg_blockdev_read_only = true;
1668 else {
1669 r = parse_boolean(value);
1670 if (r < 0)
1671 log_warning_errno(r, "Failed to parse udev.blockdev-read-only argument, ignoring: %s", value);
1672 else
1673 arg_blockdev_read_only = r;
1674 }
1675
1676 if (arg_blockdev_read_only)
1677 log_notice("All physical block devices will be marked read-only.");
1678
1679 return 0;
1680
1681 } else {
1682 if (startswith(key, "udev."))
1683 log_warning("Unknown udev kernel command line option \"%s\", ignoring.", key);
1684
1685 return 0;
1686 }
614a823c 1687
92e72467
ZJS
1688 if (r < 0)
1689 log_warning_errno(r, "Failed to parse \"%s=%s\", ignoring: %m", key, value);
1d84ad94 1690
614a823c 1691 return 0;
e6f86cac
KS
1692}
1693
37ec0fdd
LP
1694static int help(void) {
1695 _cleanup_free_ char *link = NULL;
1696 int r;
1697
1698 r = terminal_urlify_man("systemd-udevd.service", "8", &link);
1699 if (r < 0)
1700 return log_oom();
1701
ed216e1f 1702 printf("%s [OPTIONS...]\n\n"
d1109e12 1703 "Rule-based manager for device events and files.\n\n"
5ac0162c 1704 " -h --help Print this message\n"
2d19c17e
MF
1705 " -V --version Print version of the program\n"
1706 " -d --daemon Detach and run in the background\n"
1707 " -D --debug Enable debug output\n"
1708 " -c --children-max=INT Set maximum number of workers\n"
1709 " -e --exec-delay=SECONDS Seconds to wait before executing RUN=\n"
1710 " -t --event-timeout=SECONDS Seconds to wait before terminating an event\n"
1711 " -N --resolve-names=early|late|never\n"
5ac0162c 1712 " When to resolve users and groups\n"
bc556335
DDM
1713 "\nSee the %s for details.\n",
1714 program_invocation_short_name,
1715 link);
37ec0fdd
LP
1716
1717 return 0;
ed216e1f
TG
1718}
1719
bba7a484 1720static int parse_argv(int argc, char *argv[]) {
e2099267
MS
1721 enum {
1722 ARG_TIMEOUT_SIGNAL,
1723 };
1724
912541b0 1725 static const struct option options[] = {
e2099267
MS
1726 { "daemon", no_argument, NULL, 'd' },
1727 { "debug", no_argument, NULL, 'D' },
1728 { "children-max", required_argument, NULL, 'c' },
1729 { "exec-delay", required_argument, NULL, 'e' },
1730 { "event-timeout", required_argument, NULL, 't' },
1731 { "resolve-names", required_argument, NULL, 'N' },
1732 { "help", no_argument, NULL, 'h' },
1733 { "version", no_argument, NULL, 'V' },
1734 { "timeout-signal", required_argument, NULL, ARG_TIMEOUT_SIGNAL },
912541b0
KS
1735 {}
1736 };
689a97f5 1737
044497e2 1738 int c, r;
689a97f5 1739
bba7a484
TG
1740 assert(argc >= 0);
1741 assert(argv);
912541b0 1742
e14b6f21 1743 while ((c = getopt_long(argc, argv, "c:de:Dt:N:hV", options, NULL)) >= 0) {
bba7a484 1744 switch (c) {
912541b0 1745
912541b0 1746 case 'd':
bba7a484 1747 arg_daemonize = true;
912541b0
KS
1748 break;
1749 case 'c':
020328e1 1750 r = safe_atou(optarg, &arg_children_max);
6f5cf8a8 1751 if (r < 0)
389f9bf2 1752 log_warning_errno(r, "Failed to parse --children-max= value '%s', ignoring: %m", optarg);
912541b0
KS
1753 break;
1754 case 'e':
6b92f429 1755 r = parse_sec(optarg, &arg_exec_delay_usec);
6f5cf8a8 1756 if (r < 0)
6b92f429 1757 log_warning_errno(r, "Failed to parse --exec-delay= value '%s', ignoring: %m", optarg);
912541b0 1758 break;
e2099267
MS
1759 case ARG_TIMEOUT_SIGNAL:
1760 r = signal_from_string(optarg);
1761 if (r <= 0)
1762 log_warning_errno(r, "Failed to parse --timeout-signal= value '%s', ignoring: %m", optarg);
1763 else
1764 arg_timeout_signal = r;
1765
1766 break;
9719859c 1767 case 't':
9d9264ba 1768 r = parse_sec(optarg, &arg_event_timeout_usec);
f1e8664e 1769 if (r < 0)
9d9264ba 1770 log_warning_errno(r, "Failed to parse --event-timeout= value '%s', ignoring: %m", optarg);
9719859c 1771 break;
912541b0 1772 case 'D':
bba7a484 1773 arg_debug = true;
912541b0 1774 break;
c4d44cba
YW
1775 case 'N': {
1776 ResolveNameTiming t;
1777
1778 t = resolve_name_timing_from_string(optarg);
1779 if (t < 0)
1780 log_warning("Invalid --resolve-names= value '%s', ignoring.", optarg);
1781 else
1782 arg_resolve_name_timing = t;
912541b0 1783 break;
c4d44cba 1784 }
912541b0 1785 case 'h':
37ec0fdd 1786 return help();
912541b0 1787 case 'V':
681bd2c5 1788 printf("%s\n", GIT_VERSION);
bba7a484
TG
1789 return 0;
1790 case '?':
1791 return -EINVAL;
912541b0 1792 default:
04499a70 1793 assert_not_reached();
bba7a484 1794
912541b0
KS
1795 }
1796 }
1797
bba7a484
TG
1798 return 1;
1799}
1800
a1f4fd38 1801static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent) {
c0c6806b 1802 _cleanup_(manager_freep) Manager *manager = NULL;
a1f4fd38 1803 _cleanup_free_ char *cgroup = NULL;
b5af8c8c 1804 int r;
c0c6806b
TG
1805
1806 assert(ret);
1807
6f19b42f 1808 manager = new(Manager, 1);
c0c6806b
TG
1809 if (!manager)
1810 return log_oom();
1811
6f19b42f 1812 *manager = (Manager) {
254d1313 1813 .inotify_fd = -EBADF,
19ee48a6 1814 .worker_watch = PIPE_EBADF,
6f19b42f 1815 };
e237d8cb 1816
100bc5bf
YW
1817 r = udev_ctrl_new_from_fd(&manager->ctrl, fd_ctrl);
1818 if (r < 0)
1819 return log_error_errno(r, "Failed to initialize udev control socket: %m");
e237d8cb 1820
b5af8c8c
ZJS
1821 r = udev_ctrl_enable_receiving(manager->ctrl);
1822 if (r < 0)
1823 return log_error_errno(r, "Failed to bind udev control socket: %m");
c4b69e99 1824
7f2e3a14
YW
1825 r = device_monitor_new_full(&manager->monitor, MONITOR_GROUP_KERNEL, fd_uevent);
1826 if (r < 0)
1827 return log_error_errno(r, "Failed to initialize device monitor: %m");
e237d8cb 1828
1ffadeaa
FB
1829 /* Bump receiver buffer, but only if we are not called via socket activation, as in that
1830 * case systemd sets the receive buffer size for us, and the value in the .socket unit
1831 * should take full effect. */
e77f52e5
YW
1832 if (fd_uevent < 0) {
1833 r = sd_device_monitor_set_receive_buffer_size(manager->monitor, 128 * 1024 * 1024);
1834 if (r < 0)
1835 log_warning_errno(r, "Failed to set receive buffer size for device monitor, ignoring: %m");
1836 }
c4b69e99 1837
f714ecd4
YW
1838 (void) sd_device_monitor_set_description(manager->monitor, "manager");
1839
b5af8c8c
ZJS
1840 r = device_monitor_enable_receiving(manager->monitor);
1841 if (r < 0)
1842 return log_error_errno(r, "Failed to bind netlink socket: %m");
1843
1a0bd015
YW
1844 manager->log_level = log_get_max_level();
1845
f8371dbd
LP
1846 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
1847 if (r < 0)
1848 log_warning_errno(r, "Failed to get cgroup, ignoring: %m");
1849 else if (endswith(cgroup, "/udev")) { /* If we are in a subcgroup /udev/ we assume it was delegated to us */
1850 log_debug("Running in delegated subcgroup '%s'.", cgroup);
1851 manager->cgroup = TAKE_PTR(cgroup);
1852 }
1853
b5af8c8c
ZJS
1854 *ret = TAKE_PTR(manager);
1855
1856 return 0;
1857}
1858
1859static int main_loop(Manager *manager) {
d02c6f54 1860 int fd_worker, r;
b5af8c8c 1861
76e62a4d
YW
1862 manager->pid = getpid_cached();
1863
e237d8cb 1864 /* unnamed socket from workers to the main daemon */
618b3642 1865 r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, manager->worker_watch);
e237d8cb 1866 if (r < 0)
044497e2 1867 return log_error_errno(errno, "Failed to create socketpair for communicating with workers: %m");
e237d8cb 1868
693d371d 1869 fd_worker = manager->worker_watch[READ_END];
e237d8cb 1870
2ff48e98 1871 r = setsockopt_int(fd_worker, SOL_SOCKET, SO_PASSCRED, true);
e237d8cb 1872 if (r < 0)
044497e2 1873 return log_error_errno(r, "Failed to enable SO_PASSCRED: %m");
e237d8cb 1874
df7ee959
YW
1875 manager->inotify_fd = inotify_init1(IN_CLOEXEC);
1876 if (manager->inotify_fd < 0)
1877 return log_error_errno(errno, "Failed to create inotify descriptor: %m");
e237d8cb 1878
df7ee959 1879 udev_watch_restore(manager->inotify_fd);
e237d8cb
TG
1880
1881 /* block and listen to all signals on signalfd */
79d905af 1882 assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, SIGHUP, SIGCHLD, SIGRTMIN+18, -1) >= 0);
693d371d 1883
49f997f3
TG
1884 r = sd_event_default(&manager->event);
1885 if (r < 0)
044497e2 1886 return log_error_errno(r, "Failed to allocate event loop: %m");
49f997f3 1887
693d371d
TG
1888 r = sd_event_add_signal(manager->event, NULL, SIGINT, on_sigterm, manager);
1889 if (r < 0)
044497e2 1890 return log_error_errno(r, "Failed to create SIGINT event source: %m");
693d371d
TG
1891
1892 r = sd_event_add_signal(manager->event, NULL, SIGTERM, on_sigterm, manager);
1893 if (r < 0)
044497e2 1894 return log_error_errno(r, "Failed to create SIGTERM event source: %m");
693d371d
TG
1895
1896 r = sd_event_add_signal(manager->event, NULL, SIGHUP, on_sighup, manager);
1897 if (r < 0)
044497e2 1898 return log_error_errno(r, "Failed to create SIGHUP event source: %m");
693d371d 1899
693d371d
TG
1900 r = sd_event_set_watchdog(manager->event, true);
1901 if (r < 0)
044497e2 1902 return log_error_errno(r, "Failed to create watchdog event source: %m");
693d371d 1903
d02c6f54
YW
1904 r = udev_ctrl_attach_event(manager->ctrl, manager->event);
1905 if (r < 0)
1906 return log_error_errno(r, "Failed to attach event to udev control: %m");
b5af8c8c 1907
d02c6f54 1908 r = udev_ctrl_start(manager->ctrl, on_ctrl_msg, manager);
693d371d 1909 if (r < 0)
d02c6f54 1910 return log_error_errno(r, "Failed to start device monitor: %m");
693d371d
TG
1911
1912 /* This needs to be after the inotify and uevent handling, to make sure
1913 * that the ping is send back after fully processing the pending uevents
1914 * (including the synthetic ones we may create due to inotify events).
1915 */
d02c6f54 1916 r = sd_event_source_set_priority(udev_ctrl_get_event_source(manager->ctrl), SD_EVENT_PRIORITY_IDLE);
693d371d 1917 if (r < 0)
044497e2 1918 return log_error_errno(r, "Failed to set IDLE event priority for udev control event source: %m");
693d371d 1919
df7ee959 1920 r = sd_event_add_io(manager->event, &manager->inotify_event, manager->inotify_fd, EPOLLIN, on_inotify, manager);
693d371d 1921 if (r < 0)
044497e2 1922 return log_error_errno(r, "Failed to create inotify event source: %m");
693d371d 1923
f00d2b6d
YW
1924 r = sd_device_monitor_attach_event(manager->monitor, manager->event);
1925 if (r < 0)
1926 return log_error_errno(r, "Failed to attach event to device monitor: %m");
1927
1928 r = sd_device_monitor_start(manager->monitor, on_uevent, manager);
693d371d 1929 if (r < 0)
f00d2b6d
YW
1930 return log_error_errno(r, "Failed to start device monitor: %m");
1931
693d371d
TG
1932 r = sd_event_add_io(manager->event, NULL, fd_worker, EPOLLIN, on_worker, manager);
1933 if (r < 0)
044497e2 1934 return log_error_errno(r, "Failed to create worker event source: %m");
693d371d
TG
1935
1936 r = sd_event_add_post(manager->event, NULL, on_post, manager);
1937 if (r < 0)
044497e2 1938 return log_error_errno(r, "Failed to create post event source: %m");
e237d8cb 1939
79d905af
LP
1940 /* Eventually, we probably want to do more here on memory pressure, for example, kill idle workers immediately */
1941 r = sd_event_add_memory_pressure(manager->event, &manager->memory_pressure_event_source, NULL, NULL);
1942 if (r < 0)
1943 log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || (r == -EHOSTDOWN) ? LOG_DEBUG : LOG_WARNING, r,
1944 "Failed to allocate memory pressure watch, ignoring: %m");
1945
1946 r = sd_event_add_signal(manager->event, &manager->memory_pressure_event_source, SIGRTMIN+18, sigrtmin18_handler, NULL);
1947 if (r < 0)
1948 return log_error_errno(r, "Failed to allocate SIGRTMIN+18 event source, ignoring: %m");
1949
e9d1fae3
YW
1950 manager->last_usec = now(CLOCK_MONOTONIC);
1951
b5af8c8c 1952 udev_builtin_init();
077fc5e2 1953
c238a1f5 1954 r = udev_rules_load(&manager->rules, arg_resolve_name_timing);
298316eb 1955 if (r < 0)
b5af8c8c 1956 return log_error_errno(r, "Failed to read udev rules: %m");
077fc5e2
DH
1957
1958 r = udev_rules_apply_static_dev_perms(manager->rules);
1959 if (r < 0)
6e40ed53 1960 log_warning_errno(r, "Failed to apply permissions on static device nodes, ignoring: %m");
077fc5e2 1961
4bf4f50f 1962 notify_ready();
077fc5e2
DH
1963
1964 r = sd_event_loop(manager->event);
44dcf454 1965 if (r < 0)
044497e2 1966 log_error_errno(r, "Event loop failed: %m");
077fc5e2 1967
f8433153 1968 (void) sd_notify(/* unset= */ false, NOTIFY_STOPPING);
077fc5e2
DH
1969 return r;
1970}
1971
63e2d171 1972int run_udevd(int argc, char *argv[]) {
b5af8c8c 1973 _cleanup_(manager_freep) Manager *manager = NULL;
254d1313 1974 int fd_ctrl = -EBADF, fd_uevent = -EBADF;
e5d7bce1 1975 int r;
bba7a484 1976
bba7a484 1977 log_set_target(LOG_TARGET_AUTO);
6b413782 1978 log_open();
e2099267 1979 udev_parse_config_full(&arg_children_max, &arg_exec_delay_usec, &arg_event_timeout_usec, &arg_resolve_name_timing, &arg_timeout_signal);
bba7a484 1980 log_parse_environment();
6b413782 1981 log_open(); /* Done again to update after reading configuration. */
bba7a484 1982
bba7a484
TG
1983 r = parse_argv(argc, argv);
1984 if (r <= 0)
0c5a109a 1985 return r;
bba7a484 1986
1d84ad94 1987 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
614a823c 1988 if (r < 0)
044497e2 1989 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
912541b0 1990
78d3e041
KS
1991 if (arg_debug) {
1992 log_set_target(LOG_TARGET_CONSOLE);
bba7a484 1993 log_set_max_level(LOG_DEBUG);
78d3e041 1994 }
bba7a484 1995
fba868fa
LP
1996 r = must_be_root();
1997 if (r < 0)
0c5a109a 1998 return r;
912541b0 1999
712cebf1 2000 if (arg_children_max == 0) {
fe56acd8 2001 unsigned long cpu_limit, mem_limit, cpu_count = 1;
d457ff83 2002
fe56acd8
LP
2003 r = cpus_in_affinity_mask();
2004 if (r < 0)
2005 log_warning_errno(r, "Failed to determine number of local CPUs, ignoring: %m");
2006 else
2007 cpu_count = r;
88bd5a32
FB
2008
2009 cpu_limit = cpu_count * 2 + 16;
2010 mem_limit = MAX(physical_memory() / (128UL*1024*1024), 10U);
912541b0 2011
88bd5a32
FB
2012 arg_children_max = MIN(cpu_limit, mem_limit);
2013 arg_children_max = MIN(WORKER_NUM_MAX, arg_children_max);
e438c57a 2014
044497e2 2015 log_debug("Set children_max to %u", arg_children_max);
d457ff83 2016 }
912541b0 2017
712cebf1 2018 /* set umask before creating any file/directory */
712cebf1 2019 umask(022);
912541b0 2020
a452c807 2021 r = mac_init();
0c5a109a 2022 if (r < 0)
a9ba0e32 2023 return r;
912541b0 2024
3f692e2e 2025 r = RET_NERRNO(mkdir("/run/udev", 0755));
0c5a109a
ZJS
2026 if (r < 0 && r != -EEXIST)
2027 return log_error_errno(r, "Failed to create /run/udev: %m");
712cebf1 2028
b7f74dd4 2029 r = listen_fds(&fd_ctrl, &fd_uevent);
0c5a109a
ZJS
2030 if (r < 0)
2031 return log_error_errno(r, "Failed to listen on fds: %m");
b7f74dd4 2032
a1f4fd38 2033 r = manager_new(&manager, fd_ctrl, fd_uevent);
b5af8c8c
ZJS
2034 if (r < 0)
2035 return log_error_errno(r, "Failed to create manager: %m");
2036
bba7a484 2037 if (arg_daemonize) {
912541b0 2038 pid_t pid;
912541b0 2039
2751ad9c 2040 log_info("Starting systemd-udevd version " GIT_VERSION);
3cbb2057 2041
40e749b5 2042 /* connect /dev/null to stdin, stdout, stderr */
c76cf844
AK
2043 if (log_get_max_level() < LOG_DEBUG) {
2044 r = make_null_stdio();
2045 if (r < 0)
2046 log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
2047 }
2048
912541b0 2049 pid = fork();
0c5a109a
ZJS
2050 if (pid < 0)
2051 return log_error_errno(errno, "Failed to fork daemon: %m");
2052 if (pid > 0)
2053 /* parent */
2054 return 0;
912541b0 2055
0c5a109a 2056 /* child */
ece0fe12 2057 (void) setsid();
7500cd5e 2058 }
912541b0 2059
76e62a4d 2060 return main_loop(manager);
7fafc032 2061}