]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/manager.c
Merge pull request #8149 from poettering/fake-root-cgroup
[thirdparty/systemd.git] / src / core / manager.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/kd.h>
24 #include <signal.h>
25 #include <stdio_ext.h>
26 #include <string.h>
27 #include <sys/epoll.h>
28 #include <sys/inotify.h>
29 #include <sys/ioctl.h>
30 #include <sys/reboot.h>
31 #include <sys/timerfd.h>
32 #include <sys/wait.h>
33 #include <unistd.h>
34
35 #if HAVE_AUDIT
36 #include <libaudit.h>
37 #endif
38
39 #include "sd-daemon.h"
40 #include "sd-messages.h"
41 #include "sd-path.h"
42
43 #include "alloc-util.h"
44 #include "audit-fd.h"
45 #include "boot-timestamps.h"
46 #include "bus-common-errors.h"
47 #include "bus-error.h"
48 #include "bus-kernel.h"
49 #include "bus-util.h"
50 #include "clean-ipc.h"
51 #include "clock-util.h"
52 #include "dbus-job.h"
53 #include "dbus-manager.h"
54 #include "dbus-unit.h"
55 #include "dbus.h"
56 #include "dirent-util.h"
57 #include "env-util.h"
58 #include "escape.h"
59 #include "exec-util.h"
60 #include "execute.h"
61 #include "exit-status.h"
62 #include "fd-util.h"
63 #include "fileio.h"
64 #include "fs-util.h"
65 #include "hashmap.h"
66 #include "io-util.h"
67 #include "label.h"
68 #include "locale-setup.h"
69 #include "log.h"
70 #include "macro.h"
71 #include "manager.h"
72 #include "missing.h"
73 #include "mkdir.h"
74 #include "parse-util.h"
75 #include "path-lookup.h"
76 #include "path-util.h"
77 #include "process-util.h"
78 #include "ratelimit.h"
79 #include "rm-rf.h"
80 #include "signal-util.h"
81 #include "special.h"
82 #include "stat-util.h"
83 #include "string-table.h"
84 #include "string-util.h"
85 #include "strv.h"
86 #include "strxcpyx.h"
87 #include "terminal-util.h"
88 #include "time-util.h"
89 #include "transaction.h"
90 #include "umask-util.h"
91 #include "unit-name.h"
92 #include "user-util.h"
93 #include "util.h"
94 #include "virt.h"
95 #include "watchdog.h"
96
97 #define NOTIFY_RCVBUF_SIZE (8*1024*1024)
98 #define CGROUPS_AGENT_RCVBUF_SIZE (8*1024*1024)
99
100 /* Initial delay and the interval for printing status messages about running jobs */
101 #define JOBS_IN_PROGRESS_WAIT_USEC (5*USEC_PER_SEC)
102 #define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
103 #define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
104
105 /* If there are more than 1K bus messages queue across our API and direct busses, then let's not add more on top until
106 * the queue gets more empty. */
107 #define MANAGER_BUS_BUSY_THRESHOLD 1024LU
108
109 /* How many units and jobs to process of the bus queue before returning to the event loop. */
110 #define MANAGER_BUS_MESSAGE_BUDGET 100U
111
112 static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
113 static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
114 static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
115 static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
116 static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
117 static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
118 static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
119 static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
120 static int manager_dispatch_sigchld(sd_event_source *source, void *userdata);
121 static int manager_run_environment_generators(Manager *m);
122 static int manager_run_generators(Manager *m);
123
124 static void manager_watch_jobs_in_progress(Manager *m) {
125 usec_t next;
126 int r;
127
128 assert(m);
129
130 /* We do not want to show the cylon animation if the user
131 * needs to confirm service executions otherwise confirmation
132 * messages will be screwed by the cylon animation. */
133 if (!manager_is_confirm_spawn_disabled(m))
134 return;
135
136 if (m->jobs_in_progress_event_source)
137 return;
138
139 next = now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC;
140 r = sd_event_add_time(
141 m->event,
142 &m->jobs_in_progress_event_source,
143 CLOCK_MONOTONIC,
144 next, 0,
145 manager_dispatch_jobs_in_progress, m);
146 if (r < 0)
147 return;
148
149 (void) sd_event_source_set_description(m->jobs_in_progress_event_source, "manager-jobs-in-progress");
150 }
151
152 #define CYLON_BUFFER_EXTRA (2*STRLEN(ANSI_RED) + STRLEN(ANSI_HIGHLIGHT_RED) + 2*STRLEN(ANSI_NORMAL))
153
154 static void draw_cylon(char buffer[], size_t buflen, unsigned width, unsigned pos) {
155 char *p = buffer;
156
157 assert(buflen >= CYLON_BUFFER_EXTRA + width + 1);
158 assert(pos <= width+1); /* 0 or width+1 mean that the center light is behind the corner */
159
160 if (pos > 1) {
161 if (pos > 2)
162 p = mempset(p, ' ', pos-2);
163 if (log_get_show_color())
164 p = stpcpy(p, ANSI_RED);
165 *p++ = '*';
166 }
167
168 if (pos > 0 && pos <= width) {
169 if (log_get_show_color())
170 p = stpcpy(p, ANSI_HIGHLIGHT_RED);
171 *p++ = '*';
172 }
173
174 if (log_get_show_color())
175 p = stpcpy(p, ANSI_NORMAL);
176
177 if (pos < width) {
178 if (log_get_show_color())
179 p = stpcpy(p, ANSI_RED);
180 *p++ = '*';
181 if (pos < width-1)
182 p = mempset(p, ' ', width-1-pos);
183 if (log_get_show_color())
184 strcpy(p, ANSI_NORMAL);
185 }
186 }
187
188 void manager_flip_auto_status(Manager *m, bool enable) {
189 assert(m);
190
191 if (enable) {
192 if (m->show_status == SHOW_STATUS_AUTO)
193 manager_set_show_status(m, SHOW_STATUS_TEMPORARY);
194 } else {
195 if (m->show_status == SHOW_STATUS_TEMPORARY)
196 manager_set_show_status(m, SHOW_STATUS_AUTO);
197 }
198 }
199
200 static void manager_print_jobs_in_progress(Manager *m) {
201 _cleanup_free_ char *job_of_n = NULL;
202 Iterator i;
203 Job *j;
204 unsigned counter = 0, print_nr;
205 char cylon[6 + CYLON_BUFFER_EXTRA + 1];
206 unsigned cylon_pos;
207 char time[FORMAT_TIMESPAN_MAX], limit[FORMAT_TIMESPAN_MAX] = "no limit";
208 uint64_t x;
209
210 assert(m);
211 assert(m->n_running_jobs > 0);
212
213 manager_flip_auto_status(m, true);
214
215 print_nr = (m->jobs_in_progress_iteration / JOBS_IN_PROGRESS_PERIOD_DIVISOR) % m->n_running_jobs;
216
217 HASHMAP_FOREACH(j, m->jobs, i)
218 if (j->state == JOB_RUNNING && counter++ == print_nr)
219 break;
220
221 /* m->n_running_jobs must be consistent with the contents of m->jobs,
222 * so the above loop must have succeeded in finding j. */
223 assert(counter == print_nr + 1);
224 assert(j);
225
226 cylon_pos = m->jobs_in_progress_iteration % 14;
227 if (cylon_pos >= 8)
228 cylon_pos = 14 - cylon_pos;
229 draw_cylon(cylon, sizeof(cylon), 6, cylon_pos);
230
231 m->jobs_in_progress_iteration++;
232
233 if (m->n_running_jobs > 1) {
234 if (asprintf(&job_of_n, "(%u of %u) ", counter, m->n_running_jobs) < 0)
235 job_of_n = NULL;
236 }
237
238 format_timespan(time, sizeof(time), now(CLOCK_MONOTONIC) - j->begin_usec, 1*USEC_PER_SEC);
239 if (job_get_timeout(j, &x) > 0)
240 format_timespan(limit, sizeof(limit), x - j->begin_usec, 1*USEC_PER_SEC);
241
242 manager_status_printf(m, STATUS_TYPE_EPHEMERAL, cylon,
243 "%sA %s job is running for %s (%s / %s)",
244 strempty(job_of_n),
245 job_type_to_string(j->type),
246 unit_description(j->unit),
247 time, limit);
248 }
249
250 static int have_ask_password(void) {
251 _cleanup_closedir_ DIR *dir;
252 struct dirent *de;
253
254 dir = opendir("/run/systemd/ask-password");
255 if (!dir) {
256 if (errno == ENOENT)
257 return false;
258 else
259 return -errno;
260 }
261
262 FOREACH_DIRENT_ALL(de, dir, return -errno) {
263 if (startswith(de->d_name, "ask."))
264 return true;
265 }
266 return false;
267 }
268
269 static int manager_dispatch_ask_password_fd(sd_event_source *source,
270 int fd, uint32_t revents, void *userdata) {
271 Manager *m = userdata;
272
273 assert(m);
274
275 (void) flush_fd(fd);
276
277 m->have_ask_password = have_ask_password();
278 if (m->have_ask_password < 0)
279 /* Log error but continue. Negative have_ask_password
280 * is treated as unknown status. */
281 log_error_errno(m->have_ask_password, "Failed to list /run/systemd/ask-password: %m");
282
283 return 0;
284 }
285
286 static void manager_close_ask_password(Manager *m) {
287 assert(m);
288
289 m->ask_password_event_source = sd_event_source_unref(m->ask_password_event_source);
290 m->ask_password_inotify_fd = safe_close(m->ask_password_inotify_fd);
291 m->have_ask_password = -EINVAL;
292 }
293
294 static int manager_check_ask_password(Manager *m) {
295 int r;
296
297 assert(m);
298
299 if (!m->ask_password_event_source) {
300 assert(m->ask_password_inotify_fd < 0);
301
302 mkdir_p_label("/run/systemd/ask-password", 0755);
303
304 m->ask_password_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
305 if (m->ask_password_inotify_fd < 0)
306 return log_error_errno(errno, "inotify_init1() failed: %m");
307
308 if (inotify_add_watch(m->ask_password_inotify_fd, "/run/systemd/ask-password", IN_CREATE|IN_DELETE|IN_MOVE) < 0) {
309 log_error_errno(errno, "Failed to add watch on /run/systemd/ask-password: %m");
310 manager_close_ask_password(m);
311 return -errno;
312 }
313
314 r = sd_event_add_io(m->event, &m->ask_password_event_source,
315 m->ask_password_inotify_fd, EPOLLIN,
316 manager_dispatch_ask_password_fd, m);
317 if (r < 0) {
318 log_error_errno(errno, "Failed to add event source for /run/systemd/ask-password: %m");
319 manager_close_ask_password(m);
320 return -errno;
321 }
322
323 (void) sd_event_source_set_description(m->ask_password_event_source, "manager-ask-password");
324
325 /* Queries might have been added meanwhile... */
326 manager_dispatch_ask_password_fd(m->ask_password_event_source,
327 m->ask_password_inotify_fd, EPOLLIN, m);
328 }
329
330 return m->have_ask_password;
331 }
332
333 static int manager_watch_idle_pipe(Manager *m) {
334 int r;
335
336 assert(m);
337
338 if (m->idle_pipe_event_source)
339 return 0;
340
341 if (m->idle_pipe[2] < 0)
342 return 0;
343
344 r = sd_event_add_io(m->event, &m->idle_pipe_event_source, m->idle_pipe[2], EPOLLIN, manager_dispatch_idle_pipe_fd, m);
345 if (r < 0)
346 return log_error_errno(r, "Failed to watch idle pipe: %m");
347
348 (void) sd_event_source_set_description(m->idle_pipe_event_source, "manager-idle-pipe");
349
350 return 0;
351 }
352
353 static void manager_close_idle_pipe(Manager *m) {
354 assert(m);
355
356 m->idle_pipe_event_source = sd_event_source_unref(m->idle_pipe_event_source);
357
358 safe_close_pair(m->idle_pipe);
359 safe_close_pair(m->idle_pipe + 2);
360 }
361
362 static int manager_setup_time_change(Manager *m) {
363 int r;
364
365 /* We only care for the cancellation event, hence we set the
366 * timeout to the latest possible value. */
367 struct itimerspec its = {
368 .it_value.tv_sec = TIME_T_MAX,
369 };
370
371 assert(m);
372 assert_cc(sizeof(time_t) == sizeof(TIME_T_MAX));
373
374 if (m->test_run_flags)
375 return 0;
376
377 /* Uses TFD_TIMER_CANCEL_ON_SET to get notifications whenever
378 * CLOCK_REALTIME makes a jump relative to CLOCK_MONOTONIC */
379
380 m->time_change_fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC);
381 if (m->time_change_fd < 0)
382 return log_error_errno(errno, "Failed to create timerfd: %m");
383
384 if (timerfd_settime(m->time_change_fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its, NULL) < 0) {
385 log_debug_errno(errno, "Failed to set up TFD_TIMER_CANCEL_ON_SET, ignoring: %m");
386 m->time_change_fd = safe_close(m->time_change_fd);
387 return 0;
388 }
389
390 r = sd_event_add_io(m->event, &m->time_change_event_source, m->time_change_fd, EPOLLIN, manager_dispatch_time_change_fd, m);
391 if (r < 0)
392 return log_error_errno(r, "Failed to create time change event source: %m");
393
394 (void) sd_event_source_set_description(m->time_change_event_source, "manager-time-change");
395
396 log_debug("Set up TFD_TIMER_CANCEL_ON_SET timerfd.");
397
398 return 0;
399 }
400
401 static int enable_special_signals(Manager *m) {
402 _cleanup_close_ int fd = -1;
403
404 assert(m);
405
406 if (m->test_run_flags)
407 return 0;
408
409 /* Enable that we get SIGINT on control-alt-del. In containers
410 * this will fail with EPERM (older) or EINVAL (newer), so
411 * ignore that. */
412 if (reboot(RB_DISABLE_CAD) < 0 && !IN_SET(errno, EPERM, EINVAL))
413 log_warning_errno(errno, "Failed to enable ctrl-alt-del handling: %m");
414
415 fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC);
416 if (fd < 0) {
417 /* Support systems without virtual console */
418 if (fd != -ENOENT)
419 log_warning_errno(errno, "Failed to open /dev/tty0: %m");
420 } else {
421 /* Enable that we get SIGWINCH on kbrequest */
422 if (ioctl(fd, KDSIGACCEPT, SIGWINCH) < 0)
423 log_warning_errno(errno, "Failed to enable kbrequest handling: %m");
424 }
425
426 return 0;
427 }
428
429 static int manager_setup_signals(Manager *m) {
430 struct sigaction sa = {
431 .sa_handler = SIG_DFL,
432 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
433 };
434 sigset_t mask;
435 int r;
436
437 assert(m);
438
439 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
440
441 /* We make liberal use of realtime signals here. On
442 * Linux/glibc we have 30 of them (with the exception of Linux
443 * on hppa, see below), between SIGRTMIN+0 ... SIGRTMIN+30
444 * (aka SIGRTMAX). */
445
446 assert_se(sigemptyset(&mask) == 0);
447 sigset_add_many(&mask,
448 SIGCHLD, /* Child died */
449 SIGTERM, /* Reexecute daemon */
450 SIGHUP, /* Reload configuration */
451 SIGUSR1, /* systemd/upstart: reconnect to D-Bus */
452 SIGUSR2, /* systemd: dump status */
453 SIGINT, /* Kernel sends us this on control-alt-del */
454 SIGWINCH, /* Kernel sends us this on kbrequest (alt-arrowup) */
455 SIGPWR, /* Some kernel drivers and upsd send us this on power failure */
456
457 SIGRTMIN+0, /* systemd: start default.target */
458 SIGRTMIN+1, /* systemd: isolate rescue.target */
459 SIGRTMIN+2, /* systemd: isolate emergency.target */
460 SIGRTMIN+3, /* systemd: start halt.target */
461 SIGRTMIN+4, /* systemd: start poweroff.target */
462 SIGRTMIN+5, /* systemd: start reboot.target */
463 SIGRTMIN+6, /* systemd: start kexec.target */
464
465 /* ... space for more special targets ... */
466
467 SIGRTMIN+13, /* systemd: Immediate halt */
468 SIGRTMIN+14, /* systemd: Immediate poweroff */
469 SIGRTMIN+15, /* systemd: Immediate reboot */
470 SIGRTMIN+16, /* systemd: Immediate kexec */
471
472 /* ... space for more immediate system state changes ... */
473
474 SIGRTMIN+20, /* systemd: enable status messages */
475 SIGRTMIN+21, /* systemd: disable status messages */
476 SIGRTMIN+22, /* systemd: set log level to LOG_DEBUG */
477 SIGRTMIN+23, /* systemd: set log level to LOG_INFO */
478 SIGRTMIN+24, /* systemd: Immediate exit (--user only) */
479
480 /* .. one free signal here ... */
481
482 #if !defined(__hppa64__) && !defined(__hppa__)
483 /* Apparently Linux on hppa has fewer RT
484 * signals (SIGRTMAX is SIGRTMIN+25 there),
485 * hence let's not try to make use of them
486 * here. Since these commands are accessible
487 * by different means and only really a safety
488 * net, the missing functionality on hppa
489 * shouldn't matter. */
490
491 SIGRTMIN+26, /* systemd: set log target to journal-or-kmsg */
492 SIGRTMIN+27, /* systemd: set log target to console */
493 SIGRTMIN+28, /* systemd: set log target to kmsg */
494 SIGRTMIN+29, /* systemd: set log target to syslog-or-kmsg (obsolete) */
495
496 /* ... one free signal here SIGRTMIN+30 ... */
497 #endif
498 -1);
499 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
500
501 m->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
502 if (m->signal_fd < 0)
503 return -errno;
504
505 r = sd_event_add_io(m->event, &m->signal_event_source, m->signal_fd, EPOLLIN, manager_dispatch_signal_fd, m);
506 if (r < 0)
507 return r;
508
509 (void) sd_event_source_set_description(m->signal_event_source, "manager-signal");
510
511 /* Process signals a bit earlier than the rest of things, but later than notify_fd processing, so that the
512 * notify processing can still figure out to which process/service a message belongs, before we reap the
513 * process. Also, process this before handling cgroup notifications, so that we always collect child exit
514 * status information before detecting that there's no process in a cgroup. */
515 r = sd_event_source_set_priority(m->signal_event_source, SD_EVENT_PRIORITY_NORMAL-6);
516 if (r < 0)
517 return r;
518
519 if (MANAGER_IS_SYSTEM(m))
520 return enable_special_signals(m);
521
522 return 0;
523 }
524
525 static void manager_sanitize_environment(Manager *m) {
526 assert(m);
527
528 /* Let's remove some environment variables that we need ourselves to communicate with our clients */
529 strv_env_unset_many(
530 m->environment,
531 "EXIT_CODE",
532 "EXIT_STATUS",
533 "INVOCATION_ID",
534 "JOURNAL_STREAM",
535 "LISTEN_FDNAMES",
536 "LISTEN_FDS",
537 "LISTEN_PID",
538 "MAINPID",
539 "MANAGERPID",
540 "NOTIFY_SOCKET",
541 "REMOTE_ADDR",
542 "REMOTE_PORT",
543 "SERVICE_RESULT",
544 "WATCHDOG_PID",
545 "WATCHDOG_USEC",
546 NULL);
547
548 /* Let's order the environment alphabetically, just to make it pretty */
549 strv_sort(m->environment);
550 }
551
552 static int manager_default_environment(Manager *m) {
553 assert(m);
554
555 if (MANAGER_IS_SYSTEM(m)) {
556 /* The system manager always starts with a clean
557 * environment for its children. It does not import
558 * the kernel's or the parents' exported variables.
559 *
560 * The initial passed environment is untouched to keep
561 * /proc/self/environ valid; it is used for tagging
562 * the init process inside containers. */
563 m->environment = strv_new("PATH=" DEFAULT_PATH,
564 NULL);
565
566 /* Import locale variables LC_*= from configuration */
567 locale_setup(&m->environment);
568 } else
569 /* The user manager passes its own environment
570 * along to its children. */
571 m->environment = strv_copy(environ);
572
573 if (!m->environment)
574 return -ENOMEM;
575
576 manager_sanitize_environment(m);
577
578 return 0;
579 }
580
581 static int manager_setup_prefix(Manager *m) {
582 struct table_entry {
583 uint64_t type;
584 const char *suffix;
585 };
586
587 static const struct table_entry paths_system[_EXEC_DIRECTORY_TYPE_MAX] = {
588 [EXEC_DIRECTORY_RUNTIME] = { SD_PATH_SYSTEM_RUNTIME, NULL },
589 [EXEC_DIRECTORY_STATE] = { SD_PATH_SYSTEM_STATE_PRIVATE, NULL },
590 [EXEC_DIRECTORY_CACHE] = { SD_PATH_SYSTEM_STATE_CACHE, NULL },
591 [EXEC_DIRECTORY_LOGS] = { SD_PATH_SYSTEM_STATE_LOGS, NULL },
592 [EXEC_DIRECTORY_CONFIGURATION] = { SD_PATH_SYSTEM_CONFIGURATION, NULL },
593 };
594
595 static const struct table_entry paths_user[_EXEC_DIRECTORY_TYPE_MAX] = {
596 [EXEC_DIRECTORY_RUNTIME] = { SD_PATH_USER_RUNTIME, NULL },
597 [EXEC_DIRECTORY_STATE] = { SD_PATH_USER_CONFIGURATION, NULL },
598 [EXEC_DIRECTORY_CACHE] = { SD_PATH_USER_STATE_CACHE, NULL },
599 [EXEC_DIRECTORY_LOGS] = { SD_PATH_USER_CONFIGURATION, "log" },
600 [EXEC_DIRECTORY_CONFIGURATION] = { SD_PATH_USER_CONFIGURATION, NULL },
601 };
602
603 const struct table_entry *p;
604 ExecDirectoryType i;
605 int r;
606
607 assert(m);
608
609 if (MANAGER_IS_SYSTEM(m))
610 p = paths_system;
611 else
612 p = paths_user;
613
614 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++) {
615 r = sd_path_home(p[i].type, p[i].suffix, &m->prefix[i]);
616 if (r < 0)
617 return r;
618 }
619
620 return 0;
621 }
622
623 static int manager_setup_run_queue(Manager *m) {
624 int r;
625
626 assert(m);
627 assert(!m->run_queue_event_source);
628
629 r = sd_event_add_defer(m->event, &m->run_queue_event_source, manager_dispatch_run_queue, m);
630 if (r < 0)
631 return r;
632
633 r = sd_event_source_set_priority(m->run_queue_event_source, SD_EVENT_PRIORITY_IDLE);
634 if (r < 0)
635 return r;
636
637 r = sd_event_source_set_enabled(m->run_queue_event_source, SD_EVENT_OFF);
638 if (r < 0)
639 return r;
640
641 (void) sd_event_source_set_description(m->run_queue_event_source, "manager-run-queue");
642
643 return 0;
644 }
645
646 static int manager_setup_sigchld_event_source(Manager *m) {
647 int r;
648
649 assert(m);
650 assert(!m->sigchld_event_source);
651
652 r = sd_event_add_defer(m->event, &m->sigchld_event_source, manager_dispatch_sigchld, m);
653 if (r < 0)
654 return r;
655
656 r = sd_event_source_set_priority(m->sigchld_event_source, SD_EVENT_PRIORITY_NORMAL-7);
657 if (r < 0)
658 return r;
659
660 r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_OFF);
661 if (r < 0)
662 return r;
663
664 (void) sd_event_source_set_description(m->sigchld_event_source, "manager-sigchld");
665
666 return 0;
667 }
668
669 int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
670 Manager *m;
671 int r;
672
673 assert(_m);
674 assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER));
675
676 m = new0(Manager, 1);
677 if (!m)
678 return -ENOMEM;
679
680 m->unit_file_scope = scope;
681 m->exit_code = _MANAGER_EXIT_CODE_INVALID;
682 m->default_timer_accuracy_usec = USEC_PER_MINUTE;
683 m->default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
684 m->default_tasks_accounting = true;
685 m->default_tasks_max = UINT64_MAX;
686 m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
687 m->default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
688 m->default_restart_usec = DEFAULT_RESTART_USEC;
689
690 #if ENABLE_EFI
691 if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
692 boot_timestamps(m->timestamps + MANAGER_TIMESTAMP_USERSPACE,
693 m->timestamps + MANAGER_TIMESTAMP_FIRMWARE,
694 m->timestamps + MANAGER_TIMESTAMP_LOADER);
695 #endif
696
697 /* Prepare log fields we can use for structured logging */
698 if (MANAGER_IS_SYSTEM(m)) {
699 m->unit_log_field = "UNIT=";
700 m->unit_log_format_string = "UNIT=%s";
701
702 m->invocation_log_field = "INVOCATION_ID=";
703 m->invocation_log_format_string = "INVOCATION_ID=%s";
704 } else {
705 m->unit_log_field = "USER_UNIT=";
706 m->unit_log_format_string = "USER_UNIT=%s";
707
708 m->invocation_log_field = "USER_INVOCATION_ID=";
709 m->invocation_log_format_string = "USER_INVOCATION_ID=%s";
710 }
711
712 m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
713
714 m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd =
715 m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd =
716 m->ask_password_inotify_fd = -1;
717
718 m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1;
719
720 m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
721
722 m->have_ask_password = -EINVAL; /* we don't know */
723 m->first_boot = -1;
724
725 m->test_run_flags = test_run_flags;
726
727 /* Reboot immediately if the user hits C-A-D more often than 7x per 2s */
728 RATELIMIT_INIT(m->ctrl_alt_del_ratelimit, 2 * USEC_PER_SEC, 7);
729
730 r = manager_default_environment(m);
731 if (r < 0)
732 goto fail;
733
734 r = hashmap_ensure_allocated(&m->units, &string_hash_ops);
735 if (r < 0)
736 goto fail;
737
738 r = hashmap_ensure_allocated(&m->jobs, NULL);
739 if (r < 0)
740 goto fail;
741
742 r = hashmap_ensure_allocated(&m->cgroup_unit, &path_hash_ops);
743 if (r < 0)
744 goto fail;
745
746 r = hashmap_ensure_allocated(&m->watch_bus, &string_hash_ops);
747 if (r < 0)
748 goto fail;
749
750 r = sd_event_default(&m->event);
751 if (r < 0)
752 goto fail;
753
754 r = manager_setup_run_queue(m);
755 if (r < 0)
756 goto fail;
757
758 r = manager_setup_signals(m);
759 if (r < 0)
760 goto fail;
761
762 r = manager_setup_cgroup(m);
763 if (r < 0)
764 goto fail;
765
766 r = manager_setup_time_change(m);
767 if (r < 0)
768 goto fail;
769
770 r = manager_setup_sigchld_event_source(m);
771 if (r < 0)
772 goto fail;
773
774 m->udev = udev_new();
775 if (!m->udev) {
776 r = -ENOMEM;
777 goto fail;
778 }
779
780 r = manager_setup_prefix(m);
781 if (r < 0)
782 goto fail;
783
784 if (MANAGER_IS_SYSTEM(m) && test_run_flags == 0) {
785 r = mkdir_label("/run/systemd/units", 0755);
786 if (r < 0 && r != -EEXIST)
787 goto fail;
788 }
789
790 m->taint_usr =
791 !in_initrd() &&
792 dir_is_empty("/usr") > 0;
793
794 /* Note that we do not set up the notify fd here. We do that after deserialization,
795 * since they might have gotten serialized across the reexec. */
796
797 *_m = m;
798 return 0;
799
800 fail:
801 manager_free(m);
802 return r;
803 }
804
805 static int manager_setup_notify(Manager *m) {
806 int r;
807
808 if (m->test_run_flags)
809 return 0;
810
811 if (m->notify_fd < 0) {
812 _cleanup_close_ int fd = -1;
813 union sockaddr_union sa = {
814 .sa.sa_family = AF_UNIX,
815 };
816 static const int one = 1;
817
818 /* First free all secondary fields */
819 m->notify_socket = mfree(m->notify_socket);
820 m->notify_event_source = sd_event_source_unref(m->notify_event_source);
821
822 fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
823 if (fd < 0)
824 return log_error_errno(errno, "Failed to allocate notification socket: %m");
825
826 fd_inc_rcvbuf(fd, NOTIFY_RCVBUF_SIZE);
827
828 m->notify_socket = strappend(m->prefix[EXEC_DIRECTORY_RUNTIME], "/systemd/notify");
829 if (!m->notify_socket)
830 return log_oom();
831
832 (void) mkdir_parents_label(m->notify_socket, 0755);
833 (void) unlink(m->notify_socket);
834
835 strncpy(sa.un.sun_path, m->notify_socket, sizeof(sa.un.sun_path)-1);
836 r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
837 if (r < 0)
838 return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
839
840 r = setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
841 if (r < 0)
842 return log_error_errno(errno, "SO_PASSCRED failed: %m");
843
844 m->notify_fd = fd;
845 fd = -1;
846
847 log_debug("Using notification socket %s", m->notify_socket);
848 }
849
850 if (!m->notify_event_source) {
851 r = sd_event_add_io(m->event, &m->notify_event_source, m->notify_fd, EPOLLIN, manager_dispatch_notify_fd, m);
852 if (r < 0)
853 return log_error_errno(r, "Failed to allocate notify event source: %m");
854
855 /* Process notification messages a bit earlier than SIGCHLD, so that we can still identify to which
856 * service an exit message belongs. */
857 r = sd_event_source_set_priority(m->notify_event_source, SD_EVENT_PRIORITY_NORMAL-8);
858 if (r < 0)
859 return log_error_errno(r, "Failed to set priority of notify event source: %m");
860
861 (void) sd_event_source_set_description(m->notify_event_source, "manager-notify");
862 }
863
864 return 0;
865 }
866
867 static int manager_setup_cgroups_agent(Manager *m) {
868
869 static const union sockaddr_union sa = {
870 .un.sun_family = AF_UNIX,
871 .un.sun_path = "/run/systemd/cgroups-agent",
872 };
873 int r;
874
875 /* This creates a listening socket we receive cgroups agent messages on. We do not use D-Bus for delivering
876 * these messages from the cgroups agent binary to PID 1, as the cgroups agent binary is very short-living, and
877 * each instance of it needs a new D-Bus connection. Since D-Bus connections are SOCK_STREAM/AF_UNIX, on
878 * overloaded systems the backlog of the D-Bus socket becomes relevant, as not more than the configured number
879 * of D-Bus connections may be queued until the kernel will start dropping further incoming connections,
880 * possibly resulting in lost cgroups agent messages. To avoid this, we'll use a private SOCK_DGRAM/AF_UNIX
881 * socket, where no backlog is relevant as communication may take place without an actual connect() cycle, and
882 * we thus won't lose messages.
883 *
884 * Note that PID 1 will forward the agent message to system bus, so that the user systemd instance may listen
885 * to it. The system instance hence listens on this special socket, but the user instances listen on the system
886 * bus for these messages. */
887
888 if (m->test_run_flags)
889 return 0;
890
891 if (!MANAGER_IS_SYSTEM(m))
892 return 0;
893
894 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
895 if (r < 0)
896 return log_error_errno(r, "Failed to determine whether unified cgroups hierarchy is used: %m");
897 if (r > 0) /* We don't need this anymore on the unified hierarchy */
898 return 0;
899
900 if (m->cgroups_agent_fd < 0) {
901 _cleanup_close_ int fd = -1;
902
903 /* First free all secondary fields */
904 m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
905
906 fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
907 if (fd < 0)
908 return log_error_errno(errno, "Failed to allocate cgroups agent socket: %m");
909
910 fd_inc_rcvbuf(fd, CGROUPS_AGENT_RCVBUF_SIZE);
911
912 (void) unlink(sa.un.sun_path);
913
914 /* Only allow root to connect to this socket */
915 RUN_WITH_UMASK(0077)
916 r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
917 if (r < 0)
918 return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
919
920 m->cgroups_agent_fd = fd;
921 fd = -1;
922 }
923
924 if (!m->cgroups_agent_event_source) {
925 r = sd_event_add_io(m->event, &m->cgroups_agent_event_source, m->cgroups_agent_fd, EPOLLIN, manager_dispatch_cgroups_agent_fd, m);
926 if (r < 0)
927 return log_error_errno(r, "Failed to allocate cgroups agent event source: %m");
928
929 /* Process cgroups notifications early, but after having processed service notification messages or
930 * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of notification,
931 * and we collected the metadata the notification and SIGCHLD stuff offers first. Also see handling of
932 * cgroup inotify for the unified cgroup stuff. */
933 r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-4);
934 if (r < 0)
935 return log_error_errno(r, "Failed to set priority of cgroups agent event source: %m");
936
937 (void) sd_event_source_set_description(m->cgroups_agent_event_source, "manager-cgroups-agent");
938 }
939
940 return 0;
941 }
942
943 static int manager_setup_user_lookup_fd(Manager *m) {
944 int r;
945
946 assert(m);
947
948 /* Set up the socket pair used for passing UID/GID resolution results from forked off processes to PID
949 * 1. Background: we can't do name lookups (NSS) from PID 1, since it might involve IPC and thus activation,
950 * and we might hence deadlock on ourselves. Hence we do all user/group lookups asynchronously from the forked
951 * off processes right before executing the binaries to start. In order to be able to clean up any IPC objects
952 * created by a unit (see RemoveIPC=) we need to know in PID 1 the used UID/GID of the executed processes,
953 * hence we establish this communication channel so that forked off processes can pass their UID/GID
954 * information back to PID 1. The forked off processes send their resolved UID/GID to PID 1 in a simple
955 * datagram, along with their unit name, so that we can share one communication socket pair among all units for
956 * this purpose.
957 *
958 * You might wonder why we need a communication channel for this that is independent of the usual notification
959 * socket scheme (i.e. $NOTIFY_SOCKET). The primary difference is about trust: data sent via the $NOTIFY_SOCKET
960 * channel is only accepted if it originates from the right unit and if reception was enabled for it. The user
961 * lookup socket OTOH is only accessible by PID 1 and its children until they exec(), and always available.
962 *
963 * Note that this function is called under two circumstances: when we first initialize (in which case we
964 * allocate both the socket pair and the event source to listen on it), and when we deserialize after a reload
965 * (in which case the socket pair already exists but we still need to allocate the event source for it). */
966
967 if (m->user_lookup_fds[0] < 0) {
968
969 /* Free all secondary fields */
970 safe_close_pair(m->user_lookup_fds);
971 m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
972
973 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->user_lookup_fds) < 0)
974 return log_error_errno(errno, "Failed to allocate user lookup socket: %m");
975
976 (void) fd_inc_rcvbuf(m->user_lookup_fds[0], NOTIFY_RCVBUF_SIZE);
977 }
978
979 if (!m->user_lookup_event_source) {
980 r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m);
981 if (r < 0)
982 return log_error_errno(errno, "Failed to allocate user lookup event source: %m");
983
984 /* Process even earlier than the notify event source, so that we always know first about valid UID/GID
985 * resolutions */
986 r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-11);
987 if (r < 0)
988 return log_error_errno(errno, "Failed to set priority ot user lookup event source: %m");
989
990 (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup");
991 }
992
993 return 0;
994 }
995
996 static unsigned manager_dispatch_cleanup_queue(Manager *m) {
997 Unit *u;
998 unsigned n = 0;
999
1000 assert(m);
1001
1002 while ((u = m->cleanup_queue)) {
1003 assert(u->in_cleanup_queue);
1004
1005 unit_free(u);
1006 n++;
1007 }
1008
1009 return n;
1010 }
1011
1012 enum {
1013 GC_OFFSET_IN_PATH, /* This one is on the path we were traveling */
1014 GC_OFFSET_UNSURE, /* No clue */
1015 GC_OFFSET_GOOD, /* We still need this unit */
1016 GC_OFFSET_BAD, /* We don't need this unit anymore */
1017 _GC_OFFSET_MAX
1018 };
1019
1020 static void unit_gc_mark_good(Unit *u, unsigned gc_marker) {
1021 Unit *other;
1022 Iterator i;
1023 void *v;
1024
1025 u->gc_marker = gc_marker + GC_OFFSET_GOOD;
1026
1027 /* Recursively mark referenced units as GOOD as well */
1028 HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REFERENCES], i)
1029 if (other->gc_marker == gc_marker + GC_OFFSET_UNSURE)
1030 unit_gc_mark_good(other, gc_marker);
1031 }
1032
1033 static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
1034 Unit *other;
1035 bool is_bad;
1036 Iterator i;
1037 void *v;
1038
1039 assert(u);
1040
1041 if (IN_SET(u->gc_marker - gc_marker,
1042 GC_OFFSET_GOOD, GC_OFFSET_BAD, GC_OFFSET_UNSURE, GC_OFFSET_IN_PATH))
1043 return;
1044
1045 if (u->in_cleanup_queue)
1046 goto bad;
1047
1048 if (!unit_may_gc(u))
1049 goto good;
1050
1051 u->gc_marker = gc_marker + GC_OFFSET_IN_PATH;
1052
1053 is_bad = true;
1054
1055 HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REFERENCED_BY], i) {
1056 unit_gc_sweep(other, gc_marker);
1057
1058 if (other->gc_marker == gc_marker + GC_OFFSET_GOOD)
1059 goto good;
1060
1061 if (other->gc_marker != gc_marker + GC_OFFSET_BAD)
1062 is_bad = false;
1063 }
1064
1065 if (u->refs_by_target) {
1066 const UnitRef *ref;
1067
1068 LIST_FOREACH(refs_by_target, ref, u->refs_by_target) {
1069 unit_gc_sweep(ref->source, gc_marker);
1070
1071 if (ref->source->gc_marker == gc_marker + GC_OFFSET_GOOD)
1072 goto good;
1073
1074 if (ref->source->gc_marker != gc_marker + GC_OFFSET_BAD)
1075 is_bad = false;
1076 }
1077 }
1078
1079 if (is_bad)
1080 goto bad;
1081
1082 /* We were unable to find anything out about this entry, so
1083 * let's investigate it later */
1084 u->gc_marker = gc_marker + GC_OFFSET_UNSURE;
1085 unit_add_to_gc_queue(u);
1086 return;
1087
1088 bad:
1089 /* We definitely know that this one is not useful anymore, so
1090 * let's mark it for deletion */
1091 u->gc_marker = gc_marker + GC_OFFSET_BAD;
1092 unit_add_to_cleanup_queue(u);
1093 return;
1094
1095 good:
1096 unit_gc_mark_good(u, gc_marker);
1097 }
1098
1099 static unsigned manager_dispatch_gc_unit_queue(Manager *m) {
1100 unsigned n = 0, gc_marker;
1101 Unit *u;
1102
1103 assert(m);
1104
1105 /* log_debug("Running GC..."); */
1106
1107 m->gc_marker += _GC_OFFSET_MAX;
1108 if (m->gc_marker + _GC_OFFSET_MAX <= _GC_OFFSET_MAX)
1109 m->gc_marker = 1;
1110
1111 gc_marker = m->gc_marker;
1112
1113 while ((u = m->gc_unit_queue)) {
1114 assert(u->in_gc_queue);
1115
1116 unit_gc_sweep(u, gc_marker);
1117
1118 LIST_REMOVE(gc_queue, m->gc_unit_queue, u);
1119 u->in_gc_queue = false;
1120
1121 n++;
1122
1123 if (IN_SET(u->gc_marker - gc_marker,
1124 GC_OFFSET_BAD, GC_OFFSET_UNSURE)) {
1125 if (u->id)
1126 log_unit_debug(u, "Collecting.");
1127 u->gc_marker = gc_marker + GC_OFFSET_BAD;
1128 unit_add_to_cleanup_queue(u);
1129 }
1130 }
1131
1132 return n;
1133 }
1134
1135 static unsigned manager_dispatch_gc_job_queue(Manager *m) {
1136 unsigned n = 0;
1137 Job *j;
1138
1139 assert(m);
1140
1141 while ((j = m->gc_job_queue)) {
1142 assert(j->in_gc_queue);
1143
1144 LIST_REMOVE(gc_queue, m->gc_job_queue, j);
1145 j->in_gc_queue = false;
1146
1147 n++;
1148
1149 if (!job_may_gc(j))
1150 continue;
1151
1152 log_unit_debug(j->unit, "Collecting job.");
1153 (void) job_finish_and_invalidate(j, JOB_COLLECTED, false, false);
1154 }
1155
1156 return n;
1157 }
1158
1159 static void manager_clear_jobs_and_units(Manager *m) {
1160 Unit *u;
1161
1162 assert(m);
1163
1164 while ((u = hashmap_first(m->units)))
1165 unit_free(u);
1166
1167 manager_dispatch_cleanup_queue(m);
1168
1169 assert(!m->load_queue);
1170 assert(!m->run_queue);
1171 assert(!m->dbus_unit_queue);
1172 assert(!m->dbus_job_queue);
1173 assert(!m->cleanup_queue);
1174 assert(!m->gc_unit_queue);
1175 assert(!m->gc_job_queue);
1176
1177 assert(hashmap_isempty(m->jobs));
1178 assert(hashmap_isempty(m->units));
1179
1180 m->n_on_console = 0;
1181 m->n_running_jobs = 0;
1182 }
1183
1184 Manager* manager_free(Manager *m) {
1185 UnitType c;
1186 int i;
1187 ExecDirectoryType dt;
1188
1189 if (!m)
1190 return NULL;
1191
1192 manager_clear_jobs_and_units(m);
1193
1194 for (c = 0; c < _UNIT_TYPE_MAX; c++)
1195 if (unit_vtable[c]->shutdown)
1196 unit_vtable[c]->shutdown(m);
1197
1198 /* If we reexecute ourselves, we keep the root cgroup around */
1199 manager_shutdown_cgroup(m, m->exit_code != MANAGER_REEXECUTE);
1200
1201 lookup_paths_flush_generator(&m->lookup_paths);
1202
1203 bus_done(m);
1204
1205 exec_runtime_vacuum(m);
1206 hashmap_free(m->exec_runtime_by_id);
1207
1208 dynamic_user_vacuum(m, false);
1209 hashmap_free(m->dynamic_users);
1210
1211 hashmap_free(m->units);
1212 hashmap_free(m->units_by_invocation_id);
1213 hashmap_free(m->jobs);
1214 hashmap_free(m->watch_pids);
1215 hashmap_free(m->watch_bus);
1216
1217 set_free(m->startup_units);
1218 set_free(m->failed_units);
1219
1220 sd_event_source_unref(m->signal_event_source);
1221 sd_event_source_unref(m->sigchld_event_source);
1222 sd_event_source_unref(m->notify_event_source);
1223 sd_event_source_unref(m->cgroups_agent_event_source);
1224 sd_event_source_unref(m->time_change_event_source);
1225 sd_event_source_unref(m->jobs_in_progress_event_source);
1226 sd_event_source_unref(m->run_queue_event_source);
1227 sd_event_source_unref(m->user_lookup_event_source);
1228 sd_event_source_unref(m->sync_bus_names_event_source);
1229
1230 safe_close(m->signal_fd);
1231 safe_close(m->notify_fd);
1232 safe_close(m->cgroups_agent_fd);
1233 safe_close(m->time_change_fd);
1234 safe_close_pair(m->user_lookup_fds);
1235
1236 manager_close_ask_password(m);
1237
1238 manager_close_idle_pipe(m);
1239
1240 udev_unref(m->udev);
1241 sd_event_unref(m->event);
1242
1243 free(m->notify_socket);
1244
1245 lookup_paths_free(&m->lookup_paths);
1246 strv_free(m->environment);
1247
1248 hashmap_free(m->cgroup_unit);
1249 set_free_free(m->unit_path_cache);
1250
1251 free(m->switch_root);
1252 free(m->switch_root_init);
1253
1254 for (i = 0; i < _RLIMIT_MAX; i++)
1255 m->rlimit[i] = mfree(m->rlimit[i]);
1256
1257 assert(hashmap_isempty(m->units_requiring_mounts_for));
1258 hashmap_free(m->units_requiring_mounts_for);
1259
1260 hashmap_free(m->uid_refs);
1261 hashmap_free(m->gid_refs);
1262
1263 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++)
1264 m->prefix[dt] = mfree(m->prefix[dt]);
1265
1266 return mfree(m);
1267 }
1268
1269 void manager_enumerate(Manager *m) {
1270 UnitType c;
1271
1272 assert(m);
1273
1274 /* Let's ask every type to load all units from disk/kernel
1275 * that it might know */
1276 for (c = 0; c < _UNIT_TYPE_MAX; c++) {
1277 if (!unit_type_supported(c)) {
1278 log_debug("Unit type .%s is not supported on this system.", unit_type_to_string(c));
1279 continue;
1280 }
1281
1282 if (!unit_vtable[c]->enumerate)
1283 continue;
1284
1285 unit_vtable[c]->enumerate(m);
1286 }
1287
1288 manager_dispatch_load_queue(m);
1289 }
1290
1291 static void manager_coldplug(Manager *m) {
1292 Iterator i;
1293 Unit *u;
1294 char *k;
1295 int r;
1296
1297 assert(m);
1298
1299 /* Then, let's set up their initial state. */
1300 HASHMAP_FOREACH_KEY(u, k, m->units, i) {
1301
1302 /* ignore aliases */
1303 if (u->id != k)
1304 continue;
1305
1306 r = unit_coldplug(u);
1307 if (r < 0)
1308 log_warning_errno(r, "We couldn't coldplug %s, proceeding anyway: %m", u->id);
1309 }
1310 }
1311
1312 static void manager_build_unit_path_cache(Manager *m) {
1313 char **i;
1314 int r;
1315
1316 assert(m);
1317
1318 set_free_free(m->unit_path_cache);
1319
1320 m->unit_path_cache = set_new(&path_hash_ops);
1321 if (!m->unit_path_cache) {
1322 r = -ENOMEM;
1323 goto fail;
1324 }
1325
1326 /* This simply builds a list of files we know exist, so that
1327 * we don't always have to go to disk */
1328
1329 STRV_FOREACH(i, m->lookup_paths.search_path) {
1330 _cleanup_closedir_ DIR *d = NULL;
1331 struct dirent *de;
1332
1333 d = opendir(*i);
1334 if (!d) {
1335 if (errno != ENOENT)
1336 log_warning_errno(errno, "Failed to open directory %s, ignoring: %m", *i);
1337 continue;
1338 }
1339
1340 FOREACH_DIRENT(de, d, r = -errno; goto fail) {
1341 char *p;
1342
1343 p = strjoin(streq(*i, "/") ? "" : *i, "/", de->d_name);
1344 if (!p) {
1345 r = -ENOMEM;
1346 goto fail;
1347 }
1348
1349 r = set_consume(m->unit_path_cache, p);
1350 if (r < 0)
1351 goto fail;
1352 }
1353 }
1354
1355 return;
1356
1357 fail:
1358 log_warning_errno(r, "Failed to build unit path cache, proceeding without: %m");
1359 m->unit_path_cache = set_free_free(m->unit_path_cache);
1360 }
1361
1362 static void manager_distribute_fds(Manager *m, FDSet *fds) {
1363 Iterator i;
1364 Unit *u;
1365
1366 assert(m);
1367
1368 HASHMAP_FOREACH(u, m->units, i) {
1369
1370 if (fdset_size(fds) <= 0)
1371 break;
1372
1373 if (!UNIT_VTABLE(u)->distribute_fds)
1374 continue;
1375
1376 UNIT_VTABLE(u)->distribute_fds(u, fds);
1377 }
1378 }
1379
1380 static bool manager_dbus_is_running(Manager *m, bool deserialized) {
1381 Unit *u;
1382
1383 assert(m);
1384
1385 /* This checks whether the dbus instance we are supposed to expose our APIs on is up. We check both the socket
1386 * and the service unit. If the 'deserialized' parameter is true we'll check the deserialized state of the unit
1387 * rather than the current one. */
1388
1389 if (m->test_run_flags != 0)
1390 return false;
1391
1392 /* If we are in the user instance, and the env var is already set for us, then this means D-Bus is ran
1393 * somewhere outside of our own logic. Let's use it */
1394 if (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS"))
1395 return true;
1396
1397 u = manager_get_unit(m, SPECIAL_DBUS_SOCKET);
1398 if (!u)
1399 return false;
1400 if ((deserialized ? SOCKET(u)->deserialized_state : SOCKET(u)->state) != SOCKET_RUNNING)
1401 return false;
1402
1403 u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
1404 if (!u)
1405 return false;
1406 if (!IN_SET((deserialized ? SERVICE(u)->deserialized_state : SERVICE(u)->state), SERVICE_RUNNING, SERVICE_RELOAD))
1407 return false;
1408
1409 return true;
1410 }
1411
1412 int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
1413 int r;
1414
1415 assert(m);
1416
1417 /* If we are running in test mode, we still want to run the generators,
1418 * but we should not touch the real generator directories. */
1419 r = lookup_paths_init(&m->lookup_paths, m->unit_file_scope,
1420 m->test_run_flags ? LOOKUP_PATHS_TEMPORARY_GENERATED : 0,
1421 NULL);
1422 if (r < 0)
1423 return r;
1424
1425 r = manager_run_environment_generators(m);
1426 if (r < 0)
1427 return r;
1428
1429 dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_GENERATORS_START);
1430 r = manager_run_generators(m);
1431 dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_GENERATORS_FINISH);
1432 if (r < 0)
1433 return r;
1434
1435 /* If this is the first boot, and we are in the host system, then preset everything */
1436 if (m->first_boot > 0 &&
1437 MANAGER_IS_SYSTEM(m) &&
1438 !m->test_run_flags) {
1439
1440 r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0);
1441 if (r < 0)
1442 log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r,
1443 "Failed to populate /etc with preset unit settings, ignoring: %m");
1444 else
1445 log_info("Populated /etc with preset unit settings.");
1446 }
1447
1448 lookup_paths_reduce(&m->lookup_paths);
1449 manager_build_unit_path_cache(m);
1450
1451 /* If we will deserialize make sure that during enumeration
1452 * this is already known, so we increase the counter here
1453 * already */
1454 if (serialization)
1455 m->n_reloading++;
1456
1457 /* First, enumerate what we can from all config files */
1458 dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_UNITS_LOAD_START);
1459 manager_enumerate(m);
1460 dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_UNITS_LOAD_FINISH);
1461
1462 /* Second, deserialize if there is something to deserialize */
1463 if (serialization) {
1464 r = manager_deserialize(m, serialization, fds);
1465 if (r < 0)
1466 return log_error_errno(r, "Deserialization failed: %m");
1467 }
1468
1469 /* Any fds left? Find some unit which wants them. This is
1470 * useful to allow container managers to pass some file
1471 * descriptors to us pre-initialized. This enables
1472 * socket-based activation of entire containers. */
1473 manager_distribute_fds(m, fds);
1474
1475 /* We might have deserialized the notify fd, but if we didn't
1476 * then let's create the bus now */
1477 r = manager_setup_notify(m);
1478 if (r < 0)
1479 /* No sense to continue without notifications, our children would fail anyway. */
1480 return r;
1481
1482 r = manager_setup_cgroups_agent(m);
1483 if (r < 0)
1484 /* Likewise, no sense to continue without empty cgroup notifications. */
1485 return r;
1486
1487 r = manager_setup_user_lookup_fd(m);
1488 if (r < 0)
1489 /* This shouldn't fail, except if things are really broken. */
1490 return r;
1491
1492 /* Let's set up our private bus connection now, unconditionally */
1493 (void) bus_init_private(m);
1494
1495 /* If we are in --user mode also connect to the system bus now */
1496 if (MANAGER_IS_USER(m))
1497 (void) bus_init_system(m);
1498
1499 /* Let's connect to the bus now, but only if the unit is supposed to be up */
1500 if (manager_dbus_is_running(m, !!serialization)) {
1501 (void) bus_init_api(m);
1502
1503 if (MANAGER_IS_SYSTEM(m))
1504 (void) bus_init_system(m);
1505 }
1506
1507 /* Now that we are connected to all possible busses, let's deserialize who is tracking us. */
1508 (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
1509 m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
1510
1511 /* Third, fire things up! */
1512 manager_coldplug(m);
1513
1514 /* Release any dynamic users no longer referenced */
1515 dynamic_user_vacuum(m, true);
1516
1517 exec_runtime_vacuum(m);
1518
1519 /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
1520 manager_vacuum_uid_refs(m);
1521 manager_vacuum_gid_refs(m);
1522
1523 if (serialization) {
1524 assert(m->n_reloading > 0);
1525 m->n_reloading--;
1526
1527 /* Let's wait for the UnitNew/JobNew messages being
1528 * sent, before we notify that the reload is
1529 * finished */
1530 m->send_reloading_done = true;
1531 }
1532
1533 return 0;
1534 }
1535
1536 int manager_add_job(Manager *m, JobType type, Unit *unit, JobMode mode, sd_bus_error *e, Job **_ret) {
1537 int r;
1538 Transaction *tr;
1539
1540 assert(m);
1541 assert(type < _JOB_TYPE_MAX);
1542 assert(unit);
1543 assert(mode < _JOB_MODE_MAX);
1544
1545 if (mode == JOB_ISOLATE && type != JOB_START)
1546 return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Isolate is only valid for start.");
1547
1548 if (mode == JOB_ISOLATE && !unit->allow_isolate)
1549 return sd_bus_error_setf(e, BUS_ERROR_NO_ISOLATION, "Operation refused, unit may not be isolated.");
1550
1551 log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
1552
1553 type = job_type_collapse(type, unit);
1554
1555 tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY);
1556 if (!tr)
1557 return -ENOMEM;
1558
1559 r = transaction_add_job_and_dependencies(tr, type, unit, NULL, true, false,
1560 IN_SET(mode, JOB_IGNORE_DEPENDENCIES, JOB_IGNORE_REQUIREMENTS),
1561 mode == JOB_IGNORE_DEPENDENCIES, e);
1562 if (r < 0)
1563 goto tr_abort;
1564
1565 if (mode == JOB_ISOLATE) {
1566 r = transaction_add_isolate_jobs(tr, m);
1567 if (r < 0)
1568 goto tr_abort;
1569 }
1570
1571 r = transaction_activate(tr, m, mode, e);
1572 if (r < 0)
1573 goto tr_abort;
1574
1575 log_unit_debug(unit,
1576 "Enqueued job %s/%s as %u", unit->id,
1577 job_type_to_string(type), (unsigned) tr->anchor_job->id);
1578
1579 if (_ret)
1580 *_ret = tr->anchor_job;
1581
1582 transaction_free(tr);
1583 return 0;
1584
1585 tr_abort:
1586 transaction_abort(tr);
1587 transaction_free(tr);
1588 return r;
1589 }
1590
1591 int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, sd_bus_error *e, Job **ret) {
1592 Unit *unit = NULL; /* just to appease gcc, initialization is not really necessary */
1593 int r;
1594
1595 assert(m);
1596 assert(type < _JOB_TYPE_MAX);
1597 assert(name);
1598 assert(mode < _JOB_MODE_MAX);
1599
1600 r = manager_load_unit(m, name, NULL, NULL, &unit);
1601 if (r < 0)
1602 return r;
1603 assert(unit);
1604
1605 return manager_add_job(m, type, unit, mode, e, ret);
1606 }
1607
1608 int manager_add_job_by_name_and_warn(Manager *m, JobType type, const char *name, JobMode mode, Job **ret) {
1609 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
1610 int r;
1611
1612 assert(m);
1613 assert(type < _JOB_TYPE_MAX);
1614 assert(name);
1615 assert(mode < _JOB_MODE_MAX);
1616
1617 r = manager_add_job_by_name(m, type, name, mode, &error, ret);
1618 if (r < 0)
1619 return log_warning_errno(r, "Failed to enqueue %s job for %s: %s", job_mode_to_string(mode), name, bus_error_message(&error, r));
1620
1621 return r;
1622 }
1623
1624 int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error *e) {
1625 int r;
1626 Transaction *tr;
1627
1628 assert(m);
1629 assert(unit);
1630 assert(mode < _JOB_MODE_MAX);
1631 assert(mode != JOB_ISOLATE); /* Isolate is only valid for start */
1632
1633 tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY);
1634 if (!tr)
1635 return -ENOMEM;
1636
1637 /* We need an anchor job */
1638 r = transaction_add_job_and_dependencies(tr, JOB_NOP, unit, NULL, false, false, true, true, e);
1639 if (r < 0)
1640 goto tr_abort;
1641
1642 /* Failure in adding individual dependencies is ignored, so this always succeeds. */
1643 transaction_add_propagate_reload_jobs(tr, unit, tr->anchor_job, mode == JOB_IGNORE_DEPENDENCIES, e);
1644
1645 r = transaction_activate(tr, m, mode, e);
1646 if (r < 0)
1647 goto tr_abort;
1648
1649 transaction_free(tr);
1650 return 0;
1651
1652 tr_abort:
1653 transaction_abort(tr);
1654 transaction_free(tr);
1655 return r;
1656 }
1657
1658 Job *manager_get_job(Manager *m, uint32_t id) {
1659 assert(m);
1660
1661 return hashmap_get(m->jobs, UINT32_TO_PTR(id));
1662 }
1663
1664 Unit *manager_get_unit(Manager *m, const char *name) {
1665 assert(m);
1666 assert(name);
1667
1668 return hashmap_get(m->units, name);
1669 }
1670
1671 unsigned manager_dispatch_load_queue(Manager *m) {
1672 Unit *u;
1673 unsigned n = 0;
1674
1675 assert(m);
1676
1677 /* Make sure we are not run recursively */
1678 if (m->dispatching_load_queue)
1679 return 0;
1680
1681 m->dispatching_load_queue = true;
1682
1683 /* Dispatches the load queue. Takes a unit from the queue and
1684 * tries to load its data until the queue is empty */
1685
1686 while ((u = m->load_queue)) {
1687 assert(u->in_load_queue);
1688
1689 unit_load(u);
1690 n++;
1691 }
1692
1693 m->dispatching_load_queue = false;
1694 return n;
1695 }
1696
1697 int manager_load_unit_prepare(
1698 Manager *m,
1699 const char *name,
1700 const char *path,
1701 sd_bus_error *e,
1702 Unit **_ret) {
1703
1704 Unit *ret;
1705 UnitType t;
1706 int r;
1707
1708 assert(m);
1709 assert(name || path);
1710 assert(_ret);
1711
1712 /* This will prepare the unit for loading, but not actually
1713 * load anything from disk. */
1714
1715 if (path && !is_path(path))
1716 return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute.", path);
1717
1718 if (!name)
1719 name = basename(path);
1720
1721 t = unit_name_to_type(name);
1722
1723 if (t == _UNIT_TYPE_INVALID || !unit_name_is_valid(name, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
1724 if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE))
1725 return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Unit name %s is missing the instance name.", name);
1726
1727 return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Unit name %s is not valid.", name);
1728 }
1729
1730 ret = manager_get_unit(m, name);
1731 if (ret) {
1732 *_ret = ret;
1733 return 1;
1734 }
1735
1736 ret = unit_new(m, unit_vtable[t]->object_size);
1737 if (!ret)
1738 return -ENOMEM;
1739
1740 if (path) {
1741 ret->fragment_path = strdup(path);
1742 if (!ret->fragment_path) {
1743 unit_free(ret);
1744 return -ENOMEM;
1745 }
1746 }
1747
1748 r = unit_add_name(ret, name);
1749 if (r < 0) {
1750 unit_free(ret);
1751 return r;
1752 }
1753
1754 unit_add_to_load_queue(ret);
1755 unit_add_to_dbus_queue(ret);
1756 unit_add_to_gc_queue(ret);
1757
1758 *_ret = ret;
1759
1760 return 0;
1761 }
1762
1763 int manager_load_unit(
1764 Manager *m,
1765 const char *name,
1766 const char *path,
1767 sd_bus_error *e,
1768 Unit **_ret) {
1769
1770 int r;
1771
1772 assert(m);
1773 assert(_ret);
1774
1775 /* This will load the service information files, but not actually
1776 * start any services or anything. */
1777
1778 r = manager_load_unit_prepare(m, name, path, e, _ret);
1779 if (r != 0)
1780 return r;
1781
1782 manager_dispatch_load_queue(m);
1783
1784 *_ret = unit_follow_merge(*_ret);
1785
1786 return 0;
1787 }
1788
1789 void manager_dump_jobs(Manager *s, FILE *f, const char *prefix) {
1790 Iterator i;
1791 Job *j;
1792
1793 assert(s);
1794 assert(f);
1795
1796 HASHMAP_FOREACH(j, s->jobs, i)
1797 job_dump(j, f, prefix);
1798 }
1799
1800 void manager_dump_units(Manager *s, FILE *f, const char *prefix) {
1801 Iterator i;
1802 Unit *u;
1803 const char *t;
1804
1805 assert(s);
1806 assert(f);
1807
1808 HASHMAP_FOREACH_KEY(u, t, s->units, i)
1809 if (u->id == t)
1810 unit_dump(u, f, prefix);
1811 }
1812
1813 void manager_dump(Manager *m, FILE *f, const char *prefix) {
1814 ManagerTimestamp q;
1815
1816 assert(m);
1817 assert(f);
1818
1819 for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
1820 char buf[FORMAT_TIMESTAMP_MAX];
1821
1822 if (dual_timestamp_is_set(m->timestamps + q))
1823 fprintf(f, "%sTimestamp %s: %s\n",
1824 strempty(prefix),
1825 manager_timestamp_to_string(q),
1826 format_timestamp(buf, sizeof(buf), m->timestamps[q].realtime));
1827 }
1828
1829 manager_dump_units(m, f, prefix);
1830 manager_dump_jobs(m, f, prefix);
1831 }
1832
1833 int manager_get_dump_string(Manager *m, char **ret) {
1834 _cleanup_free_ char *dump = NULL;
1835 _cleanup_fclose_ FILE *f = NULL;
1836 size_t size;
1837 int r;
1838
1839 assert(m);
1840 assert(ret);
1841
1842 f = open_memstream(&dump, &size);
1843 if (!f)
1844 return -errno;
1845
1846 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1847
1848 manager_dump(m, f, NULL);
1849
1850 r = fflush_and_check(f);
1851 if (r < 0)
1852 return r;
1853
1854 f = safe_fclose(f);
1855
1856 *ret = dump;
1857 dump = NULL;
1858
1859 return 0;
1860 }
1861
1862 void manager_clear_jobs(Manager *m) {
1863 Job *j;
1864
1865 assert(m);
1866
1867 while ((j = hashmap_first(m->jobs)))
1868 /* No need to recurse. We're cancelling all jobs. */
1869 job_finish_and_invalidate(j, JOB_CANCELED, false, false);
1870 }
1871
1872 static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
1873 Manager *m = userdata;
1874 Job *j;
1875
1876 assert(source);
1877 assert(m);
1878
1879 while ((j = m->run_queue)) {
1880 assert(j->installed);
1881 assert(j->in_run_queue);
1882
1883 job_run_and_invalidate(j);
1884 }
1885
1886 if (m->n_running_jobs > 0)
1887 manager_watch_jobs_in_progress(m);
1888
1889 if (m->n_on_console > 0)
1890 manager_watch_idle_pipe(m);
1891
1892 return 1;
1893 }
1894
1895 static unsigned manager_dispatch_dbus_queue(Manager *m) {
1896 unsigned n = 0, budget;
1897 Unit *u;
1898 Job *j;
1899
1900 assert(m);
1901
1902 if (m->dispatching_dbus_queue)
1903 return 0;
1904
1905 /* Anything to do at all? */
1906 if (!m->dbus_unit_queue && !m->dbus_job_queue && !m->send_reloading_done && !m->queued_message)
1907 return 0;
1908
1909 /* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's sit this
1910 * cycle out, and process things in a later cycle when the queues got a bit emptier. */
1911 if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD)
1912 return 0;
1913
1914 /* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't overly
1915 * full before this call we shouldn't increase it in size too wildly in one step, and we shouldn't monopolize
1916 * CPU time with generating these messages. Note the difference in counting of this "budget" and the
1917 * "threshold" above: the "budget" is decreased only once per generated message, regardless how many
1918 * busses/direct connections it is enqueued on, while the "threshold" is applied to each queued instance of bus
1919 * message, i.e. if the same message is enqueued to five busses/direct connections it will be counted five
1920 * times. This difference in counting ("references" vs. "instances") is primarily a result of the fact that
1921 * it's easier to implement it this way, however it also reflects the thinking that the "threshold" should put
1922 * a limit on used queue memory, i.e. space, while the "budget" should put a limit on time. Also note that
1923 * the "threshold" is currently chosen much higher than the "budget". */
1924 budget = MANAGER_BUS_MESSAGE_BUDGET;
1925
1926 m->dispatching_dbus_queue = true;
1927
1928 while (budget > 0 && (u = m->dbus_unit_queue)) {
1929
1930 assert(u->in_dbus_queue);
1931
1932 bus_unit_send_change_signal(u);
1933 n++, budget--;
1934 }
1935
1936 while (budget > 0 && (j = m->dbus_job_queue)) {
1937 assert(j->in_dbus_queue);
1938
1939 bus_job_send_change_signal(j);
1940 n++, budget--;
1941 }
1942
1943 m->dispatching_dbus_queue = false;
1944
1945 if (budget > 0 && m->send_reloading_done) {
1946 m->send_reloading_done = false;
1947 bus_manager_send_reloading(m, false);
1948 n++, budget--;
1949 }
1950
1951 if (budget > 0 && m->queued_message) {
1952 bus_send_queued_message(m);
1953 n++;
1954 }
1955
1956 return n;
1957 }
1958
1959 static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
1960 Manager *m = userdata;
1961 char buf[PATH_MAX+1];
1962 ssize_t n;
1963
1964 n = recv(fd, buf, sizeof(buf), 0);
1965 if (n < 0)
1966 return log_error_errno(errno, "Failed to read cgroups agent message: %m");
1967 if (n == 0) {
1968 log_error("Got zero-length cgroups agent message, ignoring.");
1969 return 0;
1970 }
1971 if ((size_t) n >= sizeof(buf)) {
1972 log_error("Got overly long cgroups agent message, ignoring.");
1973 return 0;
1974 }
1975
1976 if (memchr(buf, 0, n)) {
1977 log_error("Got cgroups agent message with embedded NUL byte, ignoring.");
1978 return 0;
1979 }
1980 buf[n] = 0;
1981
1982 manager_notify_cgroup_empty(m, buf);
1983 (void) bus_forward_agent_released(m, buf);
1984
1985 return 0;
1986 }
1987
1988 static void manager_invoke_notify_message(
1989 Manager *m,
1990 Unit *u,
1991 const struct ucred *ucred,
1992 const char *buf,
1993 FDSet *fds) {
1994
1995 assert(m);
1996 assert(u);
1997 assert(ucred);
1998 assert(buf);
1999
2000 if (u->notifygen == m->notifygen) /* Already invoked on this same unit in this same iteration? */
2001 return;
2002 u->notifygen = m->notifygen;
2003
2004 if (UNIT_VTABLE(u)->notify_message) {
2005 _cleanup_strv_free_ char **tags = NULL;
2006
2007 tags = strv_split(buf, NEWLINE);
2008 if (!tags) {
2009 log_oom();
2010 return;
2011 }
2012
2013 UNIT_VTABLE(u)->notify_message(u, ucred, tags, fds);
2014
2015 } else if (DEBUG_LOGGING) {
2016 _cleanup_free_ char *x = NULL, *y = NULL;
2017
2018 x = ellipsize(buf, 20, 90);
2019 if (x)
2020 y = cescape(x);
2021
2022 log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y));
2023 }
2024 }
2025
2026 static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
2027
2028 _cleanup_fdset_free_ FDSet *fds = NULL;
2029 Manager *m = userdata;
2030 char buf[NOTIFY_BUFFER_MAX+1];
2031 struct iovec iovec = {
2032 .iov_base = buf,
2033 .iov_len = sizeof(buf)-1,
2034 };
2035 union {
2036 struct cmsghdr cmsghdr;
2037 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2038 CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)];
2039 } control = {};
2040 struct msghdr msghdr = {
2041 .msg_iov = &iovec,
2042 .msg_iovlen = 1,
2043 .msg_control = &control,
2044 .msg_controllen = sizeof(control),
2045 };
2046
2047 struct cmsghdr *cmsg;
2048 struct ucred *ucred = NULL;
2049 _cleanup_free_ Unit **array_copy = NULL;
2050 Unit *u1, *u2, **array;
2051 int r, *fd_array = NULL;
2052 unsigned n_fds = 0;
2053 bool found = false;
2054 ssize_t n;
2055
2056 assert(m);
2057 assert(m->notify_fd == fd);
2058
2059 if (revents != EPOLLIN) {
2060 log_warning("Got unexpected poll event for notify fd.");
2061 return 0;
2062 }
2063
2064 n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
2065 if (n < 0) {
2066 if (IN_SET(errno, EAGAIN, EINTR))
2067 return 0; /* Spurious wakeup, try again */
2068
2069 /* If this is any other, real error, then let's stop processing this socket. This of course means we
2070 * won't take notification messages anymore, but that's still better than busy looping around this:
2071 * being woken up over and over again but being unable to actually read the message off the socket. */
2072 return log_error_errno(errno, "Failed to receive notification message: %m");
2073 }
2074
2075 CMSG_FOREACH(cmsg, &msghdr) {
2076 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
2077
2078 fd_array = (int*) CMSG_DATA(cmsg);
2079 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2080
2081 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2082 cmsg->cmsg_type == SCM_CREDENTIALS &&
2083 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
2084
2085 ucred = (struct ucred*) CMSG_DATA(cmsg);
2086 }
2087 }
2088
2089 if (n_fds > 0) {
2090 assert(fd_array);
2091
2092 r = fdset_new_array(&fds, fd_array, n_fds);
2093 if (r < 0) {
2094 close_many(fd_array, n_fds);
2095 log_oom();
2096 return 0;
2097 }
2098 }
2099
2100 if (!ucred || !pid_is_valid(ucred->pid)) {
2101 log_warning("Received notify message without valid credentials. Ignoring.");
2102 return 0;
2103 }
2104
2105 if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) {
2106 log_warning("Received notify message exceeded maximum size. Ignoring.");
2107 return 0;
2108 }
2109
2110 /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one
2111 * trailing NUL byte in the message, but don't expect it. */
2112 if (n > 1 && memchr(buf, 0, n-1)) {
2113 log_warning("Received notify message with embedded NUL bytes. Ignoring.");
2114 return 0;
2115 }
2116
2117 /* Make sure it's NUL-terminated. */
2118 buf[n] = 0;
2119
2120 /* Increase the generation counter used for filtering out duplicate unit invocations. */
2121 m->notifygen++;
2122
2123 /* Notify every unit that might be interested, which might be multiple. */
2124 u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid);
2125 u2 = hashmap_get(m->watch_pids, PID_TO_PTR(ucred->pid));
2126 array = hashmap_get(m->watch_pids, PID_TO_PTR(-ucred->pid));
2127 if (array) {
2128 size_t k = 0;
2129
2130 while (array[k])
2131 k++;
2132
2133 array_copy = newdup(Unit*, array, k+1);
2134 if (!array_copy)
2135 log_oom();
2136 }
2137 /* And now invoke the per-unit callbacks. Note that manager_invoke_notify_message() will handle duplicate units
2138 * make sure we only invoke each unit's handler once. */
2139 if (u1) {
2140 manager_invoke_notify_message(m, u1, ucred, buf, fds);
2141 found = true;
2142 }
2143 if (u2) {
2144 manager_invoke_notify_message(m, u2, ucred, buf, fds);
2145 found = true;
2146 }
2147 if (array_copy)
2148 for (size_t i = 0; array_copy[i]; i++) {
2149 manager_invoke_notify_message(m, array_copy[i], ucred, buf, fds);
2150 found = true;
2151 }
2152
2153 if (!found)
2154 log_warning("Cannot find unit for notify message of PID "PID_FMT", ignoring.", ucred->pid);
2155
2156 if (fdset_size(fds) > 0)
2157 log_warning("Got extra auxiliary fds with notification message, closing them.");
2158
2159 return 0;
2160 }
2161
2162 static void manager_invoke_sigchld_event(
2163 Manager *m,
2164 Unit *u,
2165 const siginfo_t *si) {
2166
2167 assert(m);
2168 assert(u);
2169 assert(si);
2170
2171 /* Already invoked the handler of this unit in this iteration? Then don't process this again */
2172 if (u->sigchldgen == m->sigchldgen)
2173 return;
2174 u->sigchldgen = m->sigchldgen;
2175
2176 log_unit_debug(u, "Child "PID_FMT" belongs to %s.", si->si_pid, u->id);
2177 unit_unwatch_pid(u, si->si_pid);
2178
2179 if (UNIT_VTABLE(u)->sigchld_event)
2180 UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status);
2181 }
2182
2183 static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) {
2184 Manager *m = userdata;
2185 siginfo_t si = {};
2186 int r;
2187
2188 assert(source);
2189 assert(m);
2190
2191 /* First we call waitd() for a PID and do not reap the zombie. That way we can still access /proc/$PID for it
2192 * while it is a zombie. */
2193
2194 if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) {
2195
2196 if (errno != ECHILD)
2197 log_error_errno(errno, "Failed to peek for child with waitid(), ignoring: %m");
2198
2199 goto turn_off;
2200 }
2201
2202 if (si.si_pid <= 0)
2203 goto turn_off;
2204
2205 if (IN_SET(si.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) {
2206 _cleanup_free_ Unit **array_copy = NULL;
2207 _cleanup_free_ char *name = NULL;
2208 Unit *u1, *u2, **array;
2209
2210 (void) get_process_comm(si.si_pid, &name);
2211
2212 log_debug("Child "PID_FMT" (%s) died (code=%s, status=%i/%s)",
2213 si.si_pid, strna(name),
2214 sigchld_code_to_string(si.si_code),
2215 si.si_status,
2216 strna(si.si_code == CLD_EXITED
2217 ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL)
2218 : signal_to_string(si.si_status)));
2219
2220 /* Increase the generation counter used for filtering out duplicate unit invocations */
2221 m->sigchldgen++;
2222
2223 /* And now figure out the unit this belongs to, it might be multiple... */
2224 u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid);
2225 u2 = hashmap_get(m->watch_pids, PID_TO_PTR(si.si_pid));
2226 array = hashmap_get(m->watch_pids, PID_TO_PTR(-si.si_pid));
2227 if (array) {
2228 size_t n = 0;
2229
2230 /* Cound how many entries the array has */
2231 while (array[n])
2232 n++;
2233
2234 /* Make a copy of the array so that we don't trip up on the array changing beneath us */
2235 array_copy = newdup(Unit*, array, n+1);
2236 if (!array_copy)
2237 log_oom();
2238 }
2239
2240 /* Finally, execute them all. Note that u1, u2 and the array might contain duplicates, but
2241 * that's fine, manager_invoke_sigchld_event() will ensure we only invoke the handlers once for
2242 * each iteration. */
2243 if (u1)
2244 manager_invoke_sigchld_event(m, u1, &si);
2245 if (u2)
2246 manager_invoke_sigchld_event(m, u2, &si);
2247 if (array_copy)
2248 for (size_t i = 0; array_copy[i]; i++)
2249 manager_invoke_sigchld_event(m, array_copy[i], &si);
2250 }
2251
2252 /* And now, we actually reap the zombie. */
2253 if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) {
2254 log_error_errno(errno, "Failed to dequeue child, ignoring: %m");
2255 return 0;
2256 }
2257
2258 return 0;
2259
2260 turn_off:
2261 /* All children processed for now, turn off event source */
2262
2263 r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_OFF);
2264 if (r < 0)
2265 return log_error_errno(r, "Failed to disable SIGCHLD event source: %m");
2266
2267 return 0;
2268 }
2269
2270 static void manager_start_target(Manager *m, const char *name, JobMode mode) {
2271 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
2272 int r;
2273
2274 log_debug("Activating special unit %s", name);
2275
2276 r = manager_add_job_by_name(m, JOB_START, name, mode, &error, NULL);
2277 if (r < 0)
2278 log_error("Failed to enqueue %s job: %s", name, bus_error_message(&error, r));
2279 }
2280
2281 static void manager_handle_ctrl_alt_del(Manager *m) {
2282 /* If the user presses C-A-D more than
2283 * 7 times within 2s, we reboot/shutdown immediately,
2284 * unless it was disabled in system.conf */
2285
2286 if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE)
2287 manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
2288 else
2289 emergency_action(m, m->cad_burst_action, NULL,
2290 "Ctrl-Alt-Del was pressed more than 7 times within 2s");
2291 }
2292
2293 static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
2294 Manager *m = userdata;
2295 ssize_t n;
2296 struct signalfd_siginfo sfsi;
2297 int r;
2298
2299 assert(m);
2300 assert(m->signal_fd == fd);
2301
2302 if (revents != EPOLLIN) {
2303 log_warning("Got unexpected events from signal file descriptor.");
2304 return 0;
2305 }
2306
2307 n = read(m->signal_fd, &sfsi, sizeof(sfsi));
2308 if (n != sizeof(sfsi)) {
2309 if (n >= 0) {
2310 log_warning("Truncated read from signal fd (%zu bytes), ignoring!", n);
2311 return 0;
2312 }
2313
2314 if (IN_SET(errno, EINTR, EAGAIN))
2315 return 0;
2316
2317 /* We return an error here, which will kill this handler,
2318 * to avoid a busy loop on read error. */
2319 return log_error_errno(errno, "Reading from signal fd failed: %m");
2320 }
2321
2322 log_received_signal(sfsi.ssi_signo == SIGCHLD ||
2323 (sfsi.ssi_signo == SIGTERM && MANAGER_IS_USER(m))
2324 ? LOG_DEBUG : LOG_INFO,
2325 &sfsi);
2326
2327 switch (sfsi.ssi_signo) {
2328
2329 case SIGCHLD:
2330 r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_ON);
2331 if (r < 0)
2332 log_warning_errno(r, "Failed to enable SIGCHLD event source, ignoring: %m");
2333
2334 break;
2335
2336 case SIGTERM:
2337 if (MANAGER_IS_SYSTEM(m)) {
2338 /* This is for compatibility with the
2339 * original sysvinit */
2340 r = verify_run_space_and_log("Refusing to reexecute");
2341 if (r >= 0)
2342 m->exit_code = MANAGER_REEXECUTE;
2343 break;
2344 }
2345
2346 _fallthrough_;
2347 case SIGINT:
2348 if (MANAGER_IS_SYSTEM(m))
2349 manager_handle_ctrl_alt_del(m);
2350 else
2351 manager_start_target(m, SPECIAL_EXIT_TARGET,
2352 JOB_REPLACE_IRREVERSIBLY);
2353 break;
2354
2355 case SIGWINCH:
2356 if (MANAGER_IS_SYSTEM(m))
2357 manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE);
2358
2359 /* This is a nop on non-init */
2360 break;
2361
2362 case SIGPWR:
2363 if (MANAGER_IS_SYSTEM(m))
2364 manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE);
2365
2366 /* This is a nop on non-init */
2367 break;
2368
2369 case SIGUSR1:
2370
2371 if (manager_dbus_is_running(m, false)) {
2372 log_info("Trying to reconnect to bus...");
2373
2374 (void) bus_init_api(m);
2375
2376 if (MANAGER_IS_SYSTEM(m))
2377 (void) bus_init_system(m);
2378 } else {
2379 log_info("Starting D-Bus service...");
2380 manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE);
2381 }
2382
2383 break;
2384
2385 case SIGUSR2: {
2386 _cleanup_free_ char *dump = NULL;
2387
2388 r = manager_get_dump_string(m, &dump);
2389 if (r < 0) {
2390 log_warning_errno(errno, "Failed to acquire manager dump: %m");
2391 break;
2392 }
2393
2394 log_dump(LOG_INFO, dump);
2395 break;
2396 }
2397
2398 case SIGHUP:
2399 r = verify_run_space_and_log("Refusing to reload");
2400 if (r >= 0)
2401 m->exit_code = MANAGER_RELOAD;
2402 break;
2403
2404 default: {
2405
2406 /* Starting SIGRTMIN+0 */
2407 static const struct {
2408 const char *target;
2409 JobMode mode;
2410 } target_table[] = {
2411 [0] = { SPECIAL_DEFAULT_TARGET, JOB_ISOLATE },
2412 [1] = { SPECIAL_RESCUE_TARGET, JOB_ISOLATE },
2413 [2] = { SPECIAL_EMERGENCY_TARGET, JOB_ISOLATE },
2414 [3] = { SPECIAL_HALT_TARGET, JOB_REPLACE_IRREVERSIBLY },
2415 [4] = { SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY },
2416 [5] = { SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY },
2417 [6] = { SPECIAL_KEXEC_TARGET, JOB_REPLACE_IRREVERSIBLY },
2418 };
2419
2420 /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */
2421 static const ManagerExitCode code_table[] = {
2422 [0] = MANAGER_HALT,
2423 [1] = MANAGER_POWEROFF,
2424 [2] = MANAGER_REBOOT,
2425 [3] = MANAGER_KEXEC,
2426 };
2427
2428 if ((int) sfsi.ssi_signo >= SIGRTMIN+0 &&
2429 (int) sfsi.ssi_signo < SIGRTMIN+(int) ELEMENTSOF(target_table)) {
2430 int idx = (int) sfsi.ssi_signo - SIGRTMIN;
2431 manager_start_target(m, target_table[idx].target,
2432 target_table[idx].mode);
2433 break;
2434 }
2435
2436 if ((int) sfsi.ssi_signo >= SIGRTMIN+13 &&
2437 (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(code_table)) {
2438 m->exit_code = code_table[sfsi.ssi_signo - SIGRTMIN - 13];
2439 break;
2440 }
2441
2442 switch (sfsi.ssi_signo - SIGRTMIN) {
2443
2444 case 20:
2445 manager_set_show_status(m, SHOW_STATUS_YES);
2446 break;
2447
2448 case 21:
2449 manager_set_show_status(m, SHOW_STATUS_NO);
2450 break;
2451
2452 case 22:
2453 log_set_max_level(LOG_DEBUG);
2454 log_info("Setting log level to debug.");
2455 break;
2456
2457 case 23:
2458 log_set_max_level(LOG_INFO);
2459 log_info("Setting log level to info.");
2460 break;
2461
2462 case 24:
2463 if (MANAGER_IS_USER(m)) {
2464 m->exit_code = MANAGER_EXIT;
2465 return 0;
2466 }
2467
2468 /* This is a nop on init */
2469 break;
2470
2471 case 26:
2472 case 29: /* compatibility: used to be mapped to LOG_TARGET_SYSLOG_OR_KMSG */
2473 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2474 log_notice("Setting log target to journal-or-kmsg.");
2475 break;
2476
2477 case 27:
2478 log_set_target(LOG_TARGET_CONSOLE);
2479 log_notice("Setting log target to console.");
2480 break;
2481
2482 case 28:
2483 log_set_target(LOG_TARGET_KMSG);
2484 log_notice("Setting log target to kmsg.");
2485 break;
2486
2487 default:
2488 log_warning("Got unhandled signal <%s>.", signal_to_string(sfsi.ssi_signo));
2489 }
2490 }}
2491
2492 return 0;
2493 }
2494
2495 static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
2496 Manager *m = userdata;
2497 Iterator i;
2498 Unit *u;
2499
2500 assert(m);
2501 assert(m->time_change_fd == fd);
2502
2503 log_struct(LOG_DEBUG,
2504 "MESSAGE_ID=" SD_MESSAGE_TIME_CHANGE_STR,
2505 LOG_MESSAGE("Time has been changed"),
2506 NULL);
2507
2508 /* Restart the watch */
2509 m->time_change_event_source = sd_event_source_unref(m->time_change_event_source);
2510 m->time_change_fd = safe_close(m->time_change_fd);
2511
2512 manager_setup_time_change(m);
2513
2514 HASHMAP_FOREACH(u, m->units, i)
2515 if (UNIT_VTABLE(u)->time_change)
2516 UNIT_VTABLE(u)->time_change(u);
2517
2518 return 0;
2519 }
2520
2521 static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
2522 Manager *m = userdata;
2523
2524 assert(m);
2525 assert(m->idle_pipe[2] == fd);
2526
2527 /* There's at least one Type=idle child that just gave up on us waiting for the boot process to complete. Let's
2528 * now turn off any further console output if there's at least one service that needs console access, so that
2529 * from now on our own output should not spill into that service's output anymore. After all, we support
2530 * Type=idle only to beautify console output and it generally is set on services that want to own the console
2531 * exclusively without our interference. */
2532 m->no_console_output = m->n_on_console > 0;
2533
2534 /* Acknowledge the child's request, and let all all other children know too that they shouldn't wait any longer
2535 * by closing the pipes towards them, which is what they are waiting for. */
2536 manager_close_idle_pipe(m);
2537
2538 return 0;
2539 }
2540
2541 static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata) {
2542 Manager *m = userdata;
2543 int r;
2544 uint64_t next;
2545
2546 assert(m);
2547 assert(source);
2548
2549 manager_print_jobs_in_progress(m);
2550
2551 next = now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_PERIOD_USEC;
2552 r = sd_event_source_set_time(source, next);
2553 if (r < 0)
2554 return r;
2555
2556 return sd_event_source_set_enabled(source, SD_EVENT_ONESHOT);
2557 }
2558
2559 int manager_loop(Manager *m) {
2560 int r;
2561
2562 RATELIMIT_DEFINE(rl, 1*USEC_PER_SEC, 50000);
2563
2564 assert(m);
2565 m->exit_code = MANAGER_OK;
2566
2567 /* Release the path cache */
2568 m->unit_path_cache = set_free_free(m->unit_path_cache);
2569
2570 manager_check_finished(m);
2571
2572 /* There might still be some zombies hanging around from before we were exec()'ed. Let's reap them. */
2573 r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_ON);
2574 if (r < 0)
2575 return log_error_errno(r, "Failed to enable SIGCHLD event source: %m");
2576
2577 while (m->exit_code == MANAGER_OK) {
2578 usec_t wait_usec;
2579
2580 if (m->runtime_watchdog > 0 && m->runtime_watchdog != USEC_INFINITY && MANAGER_IS_SYSTEM(m))
2581 watchdog_ping();
2582
2583 if (!ratelimit_test(&rl)) {
2584 /* Yay, something is going seriously wrong, pause a little */
2585 log_warning("Looping too fast. Throttling execution a little.");
2586 sleep(1);
2587 }
2588
2589 if (manager_dispatch_load_queue(m) > 0)
2590 continue;
2591
2592 if (manager_dispatch_gc_job_queue(m) > 0)
2593 continue;
2594
2595 if (manager_dispatch_gc_unit_queue(m) > 0)
2596 continue;
2597
2598 if (manager_dispatch_cleanup_queue(m) > 0)
2599 continue;
2600
2601 if (manager_dispatch_cgroup_realize_queue(m) > 0)
2602 continue;
2603
2604 if (manager_dispatch_dbus_queue(m) > 0)
2605 continue;
2606
2607 /* Sleep for half the watchdog time */
2608 if (m->runtime_watchdog > 0 && m->runtime_watchdog != USEC_INFINITY && MANAGER_IS_SYSTEM(m)) {
2609 wait_usec = m->runtime_watchdog / 2;
2610 if (wait_usec <= 0)
2611 wait_usec = 1;
2612 } else
2613 wait_usec = USEC_INFINITY;
2614
2615 r = sd_event_run(m->event, wait_usec);
2616 if (r < 0)
2617 return log_error_errno(r, "Failed to run event loop: %m");
2618 }
2619
2620 return m->exit_code;
2621 }
2622
2623 int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) {
2624 _cleanup_free_ char *n = NULL;
2625 sd_id128_t invocation_id;
2626 Unit *u;
2627 int r;
2628
2629 assert(m);
2630 assert(s);
2631 assert(_u);
2632
2633 r = unit_name_from_dbus_path(s, &n);
2634 if (r < 0)
2635 return r;
2636
2637 /* Permit addressing units by invocation ID: if the passed bus path is suffixed by a 128bit ID then we use it
2638 * as invocation ID. */
2639 r = sd_id128_from_string(n, &invocation_id);
2640 if (r >= 0) {
2641 u = hashmap_get(m->units_by_invocation_id, &invocation_id);
2642 if (u) {
2643 *_u = u;
2644 return 0;
2645 }
2646
2647 return sd_bus_error_setf(e, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(invocation_id));
2648 }
2649
2650 /* If this didn't work, we check if this is a unit name */
2651 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
2652 return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Unit name %s is neither a valid invocation ID nor unit name.", n);
2653
2654 r = manager_load_unit(m, n, NULL, e, &u);
2655 if (r < 0)
2656 return r;
2657
2658 *_u = u;
2659 return 0;
2660 }
2661
2662 int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j) {
2663 const char *p;
2664 unsigned id;
2665 Job *j;
2666 int r;
2667
2668 assert(m);
2669 assert(s);
2670 assert(_j);
2671
2672 p = startswith(s, "/org/freedesktop/systemd1/job/");
2673 if (!p)
2674 return -EINVAL;
2675
2676 r = safe_atou(p, &id);
2677 if (r < 0)
2678 return r;
2679
2680 j = manager_get_job(m, id);
2681 if (!j)
2682 return -ENOENT;
2683
2684 *_j = j;
2685
2686 return 0;
2687 }
2688
2689 void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
2690
2691 #if HAVE_AUDIT
2692 _cleanup_free_ char *p = NULL;
2693 const char *msg;
2694 int audit_fd, r;
2695
2696 if (!MANAGER_IS_SYSTEM(m))
2697 return;
2698
2699 audit_fd = get_audit_fd();
2700 if (audit_fd < 0)
2701 return;
2702
2703 /* Don't generate audit events if the service was already
2704 * started and we're just deserializing */
2705 if (MANAGER_IS_RELOADING(m))
2706 return;
2707
2708 if (u->type != UNIT_SERVICE)
2709 return;
2710
2711 r = unit_name_to_prefix_and_instance(u->id, &p);
2712 if (r < 0) {
2713 log_error_errno(r, "Failed to extract prefix and instance of unit name: %m");
2714 return;
2715 }
2716
2717 msg = strjoina("unit=", p);
2718 if (audit_log_user_comm_message(audit_fd, type, msg, "systemd", NULL, NULL, NULL, success) < 0) {
2719 if (errno == EPERM)
2720 /* We aren't allowed to send audit messages?
2721 * Then let's not retry again. */
2722 close_audit_fd();
2723 else
2724 log_warning_errno(errno, "Failed to send audit message: %m");
2725 }
2726 #endif
2727
2728 }
2729
2730 void manager_send_unit_plymouth(Manager *m, Unit *u) {
2731 static const union sockaddr_union sa = PLYMOUTH_SOCKET;
2732 _cleanup_free_ char *message = NULL;
2733 _cleanup_close_ int fd = -1;
2734 int n = 0;
2735
2736 /* Don't generate plymouth events if the service was already
2737 * started and we're just deserializing */
2738 if (MANAGER_IS_RELOADING(m))
2739 return;
2740
2741 if (!MANAGER_IS_SYSTEM(m))
2742 return;
2743
2744 if (detect_container() > 0)
2745 return;
2746
2747 if (!IN_SET(u->type, UNIT_SERVICE, UNIT_MOUNT, UNIT_SWAP))
2748 return;
2749
2750 /* We set SOCK_NONBLOCK here so that we rather drop the
2751 * message then wait for plymouth */
2752 fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2753 if (fd < 0) {
2754 log_error_errno(errno, "socket() failed: %m");
2755 return;
2756 }
2757
2758 if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
2759 if (!IN_SET(errno, EPIPE, EAGAIN, ENOENT, ECONNREFUSED, ECONNRESET, ECONNABORTED))
2760 log_error_errno(errno, "connect() failed: %m");
2761 return;
2762 }
2763
2764 if (asprintf(&message, "U\002%c%s%n", (int) (strlen(u->id) + 1), u->id, &n) < 0) {
2765 log_oom();
2766 return;
2767 }
2768
2769 errno = 0;
2770 if (write(fd, message, n + 1) != n + 1)
2771 if (!IN_SET(errno, EPIPE, EAGAIN, ENOENT, ECONNREFUSED, ECONNRESET, ECONNABORTED))
2772 log_error_errno(errno, "Failed to write Plymouth message: %m");
2773 }
2774
2775 int manager_open_serialization(Manager *m, FILE **_f) {
2776 int fd;
2777 FILE *f;
2778
2779 assert(_f);
2780
2781 fd = open_serialization_fd("systemd-state");
2782 if (fd < 0)
2783 return fd;
2784
2785 f = fdopen(fd, "w+");
2786 if (!f) {
2787 safe_close(fd);
2788 return -errno;
2789 }
2790
2791 *_f = f;
2792 return 0;
2793 }
2794
2795 int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
2796 ManagerTimestamp q;
2797 const char *t;
2798 Iterator i;
2799 Unit *u;
2800 int r;
2801
2802 assert(m);
2803 assert(f);
2804 assert(fds);
2805
2806 m->n_reloading++;
2807
2808 fprintf(f, "current-job-id=%"PRIu32"\n", m->current_job_id);
2809 fprintf(f, "n-installed-jobs=%u\n", m->n_installed_jobs);
2810 fprintf(f, "n-failed-jobs=%u\n", m->n_failed_jobs);
2811 fprintf(f, "taint-usr=%s\n", yes_no(m->taint_usr));
2812 fprintf(f, "ready-sent=%s\n", yes_no(m->ready_sent));
2813 fprintf(f, "taint-logged=%s\n", yes_no(m->taint_logged));
2814 fprintf(f, "service-watchdogs=%s\n", yes_no(m->service_watchdogs));
2815
2816 for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
2817 /* The userspace and finish timestamps only apply to the host system, hence only serialize them there */
2818 if (in_initrd() && IN_SET(q, MANAGER_TIMESTAMP_USERSPACE, MANAGER_TIMESTAMP_FINISH))
2819 continue;
2820
2821 t = manager_timestamp_to_string(q);
2822 {
2823 char field[strlen(t) + STRLEN("-timestamp") + 1];
2824 strcpy(stpcpy(field, t), "-timestamp");
2825 dual_timestamp_serialize(f, field, m->timestamps + q);
2826 }
2827 }
2828
2829 if (!switching_root)
2830 (void) serialize_environment(f, m->environment);
2831
2832 if (m->notify_fd >= 0) {
2833 int copy;
2834
2835 copy = fdset_put_dup(fds, m->notify_fd);
2836 if (copy < 0)
2837 return copy;
2838
2839 fprintf(f, "notify-fd=%i\n", copy);
2840 fprintf(f, "notify-socket=%s\n", m->notify_socket);
2841 }
2842
2843 if (m->cgroups_agent_fd >= 0) {
2844 int copy;
2845
2846 copy = fdset_put_dup(fds, m->cgroups_agent_fd);
2847 if (copy < 0)
2848 return copy;
2849
2850 fprintf(f, "cgroups-agent-fd=%i\n", copy);
2851 }
2852
2853 if (m->user_lookup_fds[0] >= 0) {
2854 int copy0, copy1;
2855
2856 copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]);
2857 if (copy0 < 0)
2858 return copy0;
2859
2860 copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]);
2861 if (copy1 < 0)
2862 return copy1;
2863
2864 fprintf(f, "user-lookup=%i %i\n", copy0, copy1);
2865 }
2866
2867 bus_track_serialize(m->subscribed, f, "subscribed");
2868
2869 r = dynamic_user_serialize(m, f, fds);
2870 if (r < 0)
2871 return r;
2872
2873 manager_serialize_uid_refs(m, f);
2874 manager_serialize_gid_refs(m, f);
2875
2876 r = exec_runtime_serialize(m, f, fds);
2877 if (r < 0)
2878 return r;
2879
2880 (void) fputc('\n', f);
2881
2882 HASHMAP_FOREACH_KEY(u, t, m->units, i) {
2883 if (u->id != t)
2884 continue;
2885
2886 /* Start marker */
2887 fputs(u->id, f);
2888 fputc('\n', f);
2889
2890 r = unit_serialize(u, f, fds, !switching_root);
2891 if (r < 0) {
2892 m->n_reloading--;
2893 return r;
2894 }
2895 }
2896
2897 assert(m->n_reloading > 0);
2898 m->n_reloading--;
2899
2900 if (ferror(f))
2901 return -EIO;
2902
2903 r = bus_fdset_add_all(m, fds);
2904 if (r < 0)
2905 return r;
2906
2907 return 0;
2908 }
2909
2910 int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
2911 int r = 0;
2912
2913 assert(m);
2914 assert(f);
2915
2916 log_debug("Deserializing state...");
2917
2918 m->n_reloading++;
2919
2920 for (;;) {
2921 char line[LINE_MAX];
2922 const char *val, *l;
2923
2924 if (!fgets(line, sizeof(line), f)) {
2925 if (feof(f))
2926 r = 0;
2927 else
2928 r = -errno;
2929
2930 goto finish;
2931 }
2932
2933 char_array_0(line);
2934 l = strstrip(line);
2935
2936 if (l[0] == 0)
2937 break;
2938
2939 if ((val = startswith(l, "current-job-id="))) {
2940 uint32_t id;
2941
2942 if (safe_atou32(val, &id) < 0)
2943 log_notice("Failed to parse current job id value %s", val);
2944 else
2945 m->current_job_id = MAX(m->current_job_id, id);
2946
2947 } else if ((val = startswith(l, "n-installed-jobs="))) {
2948 uint32_t n;
2949
2950 if (safe_atou32(val, &n) < 0)
2951 log_notice("Failed to parse installed jobs counter %s", val);
2952 else
2953 m->n_installed_jobs += n;
2954
2955 } else if ((val = startswith(l, "n-failed-jobs="))) {
2956 uint32_t n;
2957
2958 if (safe_atou32(val, &n) < 0)
2959 log_notice("Failed to parse failed jobs counter %s", val);
2960 else
2961 m->n_failed_jobs += n;
2962
2963 } else if ((val = startswith(l, "taint-usr="))) {
2964 int b;
2965
2966 b = parse_boolean(val);
2967 if (b < 0)
2968 log_notice("Failed to parse taint /usr flag %s", val);
2969 else
2970 m->taint_usr = m->taint_usr || b;
2971
2972 } else if ((val = startswith(l, "ready-sent="))) {
2973 int b;
2974
2975 b = parse_boolean(val);
2976 if (b < 0)
2977 log_notice("Failed to parse ready-sent flag %s", val);
2978 else
2979 m->ready_sent = m->ready_sent || b;
2980
2981 } else if ((val = startswith(l, "taint-logged="))) {
2982 int b;
2983
2984 b = parse_boolean(val);
2985 if (b < 0)
2986 log_notice("Failed to parse taint-logged flag %s", val);
2987 else
2988 m->taint_logged = m->taint_logged || b;
2989
2990 } else if ((val = startswith(l, "service-watchdogs="))) {
2991 int b;
2992
2993 b = parse_boolean(val);
2994 if (b < 0)
2995 log_notice("Failed to parse service-watchdogs flag %s", val);
2996 else
2997 m->service_watchdogs = b;
2998
2999 } else if (startswith(l, "env=")) {
3000 r = deserialize_environment(&m->environment, l);
3001 if (r == -ENOMEM)
3002 goto finish;
3003 if (r < 0)
3004 log_notice_errno(r, "Failed to parse environment entry: \"%s\": %m", l);
3005
3006 } else if ((val = startswith(l, "notify-fd="))) {
3007 int fd;
3008
3009 if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
3010 log_notice("Failed to parse notify fd: \"%s\"", val);
3011 else {
3012 m->notify_event_source = sd_event_source_unref(m->notify_event_source);
3013 safe_close(m->notify_fd);
3014 m->notify_fd = fdset_remove(fds, fd);
3015 }
3016
3017 } else if ((val = startswith(l, "notify-socket="))) {
3018 char *n;
3019
3020 n = strdup(val);
3021 if (!n) {
3022 r = -ENOMEM;
3023 goto finish;
3024 }
3025
3026 free(m->notify_socket);
3027 m->notify_socket = n;
3028
3029 } else if ((val = startswith(l, "cgroups-agent-fd="))) {
3030 int fd;
3031
3032 if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
3033 log_notice("Failed to parse cgroups agent fd: %s", val);
3034 else {
3035 m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
3036 safe_close(m->cgroups_agent_fd);
3037 m->cgroups_agent_fd = fdset_remove(fds, fd);
3038 }
3039
3040 } else if ((val = startswith(l, "user-lookup="))) {
3041 int fd0, fd1;
3042
3043 if (sscanf(val, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1))
3044 log_notice("Failed to parse user lookup fd: %s", val);
3045 else {
3046 m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
3047 safe_close_pair(m->user_lookup_fds);
3048 m->user_lookup_fds[0] = fdset_remove(fds, fd0);
3049 m->user_lookup_fds[1] = fdset_remove(fds, fd1);
3050 }
3051
3052 } else if ((val = startswith(l, "dynamic-user=")))
3053 dynamic_user_deserialize_one(m, val, fds);
3054 else if ((val = startswith(l, "destroy-ipc-uid=")))
3055 manager_deserialize_uid_refs_one(m, val);
3056 else if ((val = startswith(l, "destroy-ipc-gid=")))
3057 manager_deserialize_gid_refs_one(m, val);
3058 else if ((val = startswith(l, "exec-runtime=")))
3059 exec_runtime_deserialize_one(m, val, fds);
3060 else if ((val = startswith(l, "subscribed="))) {
3061
3062 if (strv_extend(&m->deserialized_subscribed, val) < 0)
3063 log_oom();
3064 } else {
3065 ManagerTimestamp q;
3066
3067 for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
3068 val = startswith(l, manager_timestamp_to_string(q));
3069 if (!val)
3070 continue;
3071
3072 val = startswith(val, "-timestamp=");
3073 if (val)
3074 break;
3075 }
3076
3077 if (q < _MANAGER_TIMESTAMP_MAX) /* found it */
3078 dual_timestamp_deserialize(val, m->timestamps + q);
3079 else if (!startswith(l, "kdbus-fd=")) /* ignore kdbus */
3080 log_notice("Unknown serialization item '%s'", l);
3081 }
3082 }
3083
3084 for (;;) {
3085 Unit *u;
3086 char name[UNIT_NAME_MAX+2];
3087 const char* unit_name;
3088
3089 /* Start marker */
3090 if (!fgets(name, sizeof(name), f)) {
3091 if (feof(f))
3092 r = 0;
3093 else
3094 r = -errno;
3095
3096 goto finish;
3097 }
3098
3099 char_array_0(name);
3100 unit_name = strstrip(name);
3101
3102 r = manager_load_unit(m, unit_name, NULL, NULL, &u);
3103 if (r < 0) {
3104 log_notice_errno(r, "Failed to load unit \"%s\", skipping deserialization: %m", unit_name);
3105 if (r == -ENOMEM)
3106 goto finish;
3107 unit_deserialize_skip(f);
3108 continue;
3109 }
3110
3111 r = unit_deserialize(u, f, fds);
3112 if (r < 0) {
3113 log_notice_errno(r, "Failed to deserialize unit \"%s\": %m", unit_name);
3114 if (r == -ENOMEM)
3115 goto finish;
3116 }
3117 }
3118
3119 finish:
3120 if (ferror(f))
3121 r = -EIO;
3122
3123 assert(m->n_reloading > 0);
3124 m->n_reloading--;
3125
3126 return r;
3127 }
3128
3129 int manager_reload(Manager *m) {
3130 int r, q;
3131 _cleanup_fclose_ FILE *f = NULL;
3132 _cleanup_fdset_free_ FDSet *fds = NULL;
3133
3134 assert(m);
3135
3136 r = manager_open_serialization(m, &f);
3137 if (r < 0)
3138 return r;
3139
3140 m->n_reloading++;
3141 bus_manager_send_reloading(m, true);
3142
3143 fds = fdset_new();
3144 if (!fds) {
3145 m->n_reloading--;
3146 return -ENOMEM;
3147 }
3148
3149 r = manager_serialize(m, f, fds, false);
3150 if (r < 0) {
3151 m->n_reloading--;
3152 return r;
3153 }
3154
3155 if (fseeko(f, 0, SEEK_SET) < 0) {
3156 m->n_reloading--;
3157 return -errno;
3158 }
3159
3160 /* From here on there is no way back. */
3161 manager_clear_jobs_and_units(m);
3162 lookup_paths_flush_generator(&m->lookup_paths);
3163 lookup_paths_free(&m->lookup_paths);
3164 exec_runtime_vacuum(m);
3165 dynamic_user_vacuum(m, false);
3166 m->uid_refs = hashmap_free(m->uid_refs);
3167 m->gid_refs = hashmap_free(m->gid_refs);
3168
3169 q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
3170 if (q < 0 && r >= 0)
3171 r = q;
3172
3173 q = manager_run_environment_generators(m);
3174 if (q < 0 && r >= 0)
3175 r = q;
3176
3177 /* Find new unit paths */
3178 q = manager_run_generators(m);
3179 if (q < 0 && r >= 0)
3180 r = q;
3181
3182 lookup_paths_reduce(&m->lookup_paths);
3183 manager_build_unit_path_cache(m);
3184
3185 /* First, enumerate what we can from all config files */
3186 manager_enumerate(m);
3187
3188 /* Second, deserialize our stored data */
3189 q = manager_deserialize(m, f, fds);
3190 if (q < 0) {
3191 log_error_errno(q, "Deserialization failed: %m");
3192
3193 if (r >= 0)
3194 r = q;
3195 }
3196
3197 fclose(f);
3198 f = NULL;
3199
3200 /* Re-register notify_fd as event source */
3201 q = manager_setup_notify(m);
3202 if (q < 0 && r >= 0)
3203 r = q;
3204
3205 q = manager_setup_cgroups_agent(m);
3206 if (q < 0 && r >= 0)
3207 r = q;
3208
3209 q = manager_setup_user_lookup_fd(m);
3210 if (q < 0 && r >= 0)
3211 r = q;
3212
3213 /* Third, fire things up! */
3214 manager_coldplug(m);
3215
3216 /* Release any dynamic users no longer referenced */
3217 dynamic_user_vacuum(m, true);
3218
3219 /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
3220 manager_vacuum_uid_refs(m);
3221 manager_vacuum_gid_refs(m);
3222
3223 exec_runtime_vacuum(m);
3224
3225 /* It might be safe to log to the journal now and connect to dbus */
3226 manager_recheck_journal(m);
3227 manager_recheck_dbus(m);
3228
3229 /* Sync current state of bus names with our set of listening units */
3230 q = manager_enqueue_sync_bus_names(m);
3231 if (q < 0 && r >= 0)
3232 r = q;
3233
3234 assert(m->n_reloading > 0);
3235 m->n_reloading--;
3236
3237 m->send_reloading_done = true;
3238
3239 return r;
3240 }
3241
3242 void manager_reset_failed(Manager *m) {
3243 Unit *u;
3244 Iterator i;
3245
3246 assert(m);
3247
3248 HASHMAP_FOREACH(u, m->units, i)
3249 unit_reset_failed(u);
3250 }
3251
3252 bool manager_unit_inactive_or_pending(Manager *m, const char *name) {
3253 Unit *u;
3254
3255 assert(m);
3256 assert(name);
3257
3258 /* Returns true if the unit is inactive or going down */
3259 u = manager_get_unit(m, name);
3260 if (!u)
3261 return true;
3262
3263 return unit_inactive_or_pending(u);
3264 }
3265
3266 static void log_taint_string(Manager *m) {
3267 _cleanup_free_ char *taint = NULL;
3268
3269 assert(m);
3270
3271 if (MANAGER_IS_USER(m) || m->taint_logged)
3272 return;
3273
3274 m->taint_logged = true; /* only check for taint once */
3275
3276 taint = manager_taint_string(m);
3277 if (isempty(taint))
3278 return;
3279
3280 log_struct(LOG_NOTICE,
3281 LOG_MESSAGE("System is tainted: %s", taint),
3282 "TAINT=%s", taint,
3283 "MESSAGE_ID=" SD_MESSAGE_TAINTED_STR,
3284 NULL);
3285 }
3286
3287 static void manager_notify_finished(Manager *m) {
3288 char userspace[FORMAT_TIMESPAN_MAX], initrd[FORMAT_TIMESPAN_MAX], kernel[FORMAT_TIMESPAN_MAX], sum[FORMAT_TIMESPAN_MAX];
3289 usec_t firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec;
3290
3291 if (m->test_run_flags)
3292 return;
3293
3294 if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) {
3295 char ts[FORMAT_TIMESPAN_MAX];
3296 char buf[FORMAT_TIMESPAN_MAX + STRLEN(" (firmware) + ") + FORMAT_TIMESPAN_MAX + STRLEN(" (loader) + ")]
3297 = {};
3298 char *p = buf;
3299 size_t size = sizeof buf;
3300
3301 /* Note that MANAGER_TIMESTAMP_KERNEL's monotonic value is always at 0, and
3302 * MANAGER_TIMESTAMP_FIRMWARE's and MANAGER_TIMESTAMP_LOADER's monotonic value should be considered
3303 * negative values. */
3304
3305 firmware_usec = m->timestamps[MANAGER_TIMESTAMP_FIRMWARE].monotonic - m->timestamps[MANAGER_TIMESTAMP_LOADER].monotonic;
3306 loader_usec = m->timestamps[MANAGER_TIMESTAMP_LOADER].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
3307 userspace_usec = m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic - m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic;
3308 total_usec = m->timestamps[MANAGER_TIMESTAMP_FIRMWARE].monotonic + m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic;
3309
3310 if (firmware_usec > 0)
3311 size = strpcpyf(&p, size, "%s (firmware) + ", format_timespan(ts, sizeof(ts), firmware_usec, USEC_PER_MSEC));
3312 if (loader_usec > 0)
3313 size = strpcpyf(&p, size, "%s (loader) + ", format_timespan(ts, sizeof(ts), loader_usec, USEC_PER_MSEC));
3314
3315 if (dual_timestamp_is_set(&m->timestamps[MANAGER_TIMESTAMP_INITRD])) {
3316
3317 /* The initrd case on bare-metal*/
3318 kernel_usec = m->timestamps[MANAGER_TIMESTAMP_INITRD].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
3319 initrd_usec = m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic - m->timestamps[MANAGER_TIMESTAMP_INITRD].monotonic;
3320
3321 log_struct(LOG_INFO,
3322 "MESSAGE_ID=" SD_MESSAGE_STARTUP_FINISHED_STR,
3323 "KERNEL_USEC="USEC_FMT, kernel_usec,
3324 "INITRD_USEC="USEC_FMT, initrd_usec,
3325 "USERSPACE_USEC="USEC_FMT, userspace_usec,
3326 LOG_MESSAGE("Startup finished in %s%s (kernel) + %s (initrd) + %s (userspace) = %s.",
3327 buf,
3328 format_timespan(kernel, sizeof(kernel), kernel_usec, USEC_PER_MSEC),
3329 format_timespan(initrd, sizeof(initrd), initrd_usec, USEC_PER_MSEC),
3330 format_timespan(userspace, sizeof(userspace), userspace_usec, USEC_PER_MSEC),
3331 format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)),
3332 NULL);
3333 } else {
3334 /* The initrd-less case on bare-metal*/
3335
3336 kernel_usec = m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
3337 initrd_usec = 0;
3338
3339 log_struct(LOG_INFO,
3340 "MESSAGE_ID=" SD_MESSAGE_STARTUP_FINISHED_STR,
3341 "KERNEL_USEC="USEC_FMT, kernel_usec,
3342 "USERSPACE_USEC="USEC_FMT, userspace_usec,
3343 LOG_MESSAGE("Startup finished in %s%s (kernel) + %s (userspace) = %s.",
3344 buf,
3345 format_timespan(kernel, sizeof(kernel), kernel_usec, USEC_PER_MSEC),
3346 format_timespan(userspace, sizeof(userspace), userspace_usec, USEC_PER_MSEC),
3347 format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)),
3348 NULL);
3349 }
3350 } else {
3351 /* The container and --user case */
3352 firmware_usec = loader_usec = initrd_usec = kernel_usec = 0;
3353 total_usec = userspace_usec = m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic - m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic;
3354
3355 log_struct(LOG_INFO,
3356 "MESSAGE_ID=" SD_MESSAGE_USER_STARTUP_FINISHED_STR,
3357 "USERSPACE_USEC="USEC_FMT, userspace_usec,
3358 LOG_MESSAGE("Startup finished in %s.",
3359 format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)),
3360 NULL);
3361 }
3362
3363 bus_manager_send_finished(m, firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec);
3364
3365 sd_notifyf(false,
3366 m->ready_sent ? "STATUS=Startup finished in %s."
3367 : "READY=1\n"
3368 "STATUS=Startup finished in %s.",
3369 format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC));
3370 m->ready_sent = true;
3371
3372 log_taint_string(m);
3373 }
3374
3375 static void manager_send_ready(Manager *m) {
3376 assert(m);
3377
3378 /* We send READY=1 on reaching basic.target only when running in --user mode. */
3379 if (!MANAGER_IS_USER(m) || m->ready_sent)
3380 return;
3381
3382 m->ready_sent = true;
3383
3384 sd_notifyf(false,
3385 "READY=1\n"
3386 "STATUS=Reached " SPECIAL_BASIC_TARGET ".");
3387 }
3388
3389 static void manager_check_basic_target(Manager *m) {
3390 Unit *u;
3391
3392 assert(m);
3393
3394 /* Small shortcut */
3395 if (m->ready_sent && m->taint_logged)
3396 return;
3397
3398 u = manager_get_unit(m, SPECIAL_BASIC_TARGET);
3399 if (!u || !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
3400 return;
3401
3402 /* For user managers, send out READY=1 as soon as we reach basic.target */
3403 manager_send_ready(m);
3404
3405 /* Log the taint string as soon as we reach basic.target */
3406 log_taint_string(m);
3407 }
3408
3409 void manager_check_finished(Manager *m) {
3410 assert(m);
3411
3412 if (MANAGER_IS_RELOADING(m))
3413 return;
3414
3415 /* Verify that we have entered the event loop already, and not left it again. */
3416 if (!MANAGER_IS_RUNNING(m))
3417 return;
3418
3419 manager_check_basic_target(m);
3420
3421 if (hashmap_size(m->jobs) > 0) {
3422 if (m->jobs_in_progress_event_source)
3423 /* Ignore any failure, this is only for feedback */
3424 (void) sd_event_source_set_time(m->jobs_in_progress_event_source, now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC);
3425
3426 return;
3427 }
3428
3429 manager_flip_auto_status(m, false);
3430
3431 /* Notify Type=idle units that we are done now */
3432 manager_close_idle_pipe(m);
3433
3434 /* Turn off confirm spawn now */
3435 m->confirm_spawn = NULL;
3436
3437 /* No need to update ask password status when we're going non-interactive */
3438 manager_close_ask_password(m);
3439
3440 /* This is no longer the first boot */
3441 manager_set_first_boot(m, false);
3442
3443 if (MANAGER_IS_FINISHED(m))
3444 return;
3445
3446 dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_FINISH);
3447
3448 manager_notify_finished(m);
3449
3450 manager_invalidate_startup_units(m);
3451 }
3452
3453 static bool generator_path_any(const char* const* paths) {
3454 char **path;
3455 bool found = false;
3456
3457 /* Optimize by skipping the whole process by not creating output directories
3458 * if no generators are found. */
3459 STRV_FOREACH(path, (char**) paths)
3460 if (access(*path, F_OK) == 0)
3461 found = true;
3462 else if (errno != ENOENT)
3463 log_warning_errno(errno, "Failed to open generator directory %s: %m", *path);
3464
3465 return found;
3466 }
3467
3468 static const char* system_env_generator_binary_paths[] = {
3469 "/run/systemd/system-environment-generators",
3470 "/etc/systemd/system-environment-generators",
3471 "/usr/local/lib/systemd/system-environment-generators",
3472 SYSTEM_ENV_GENERATOR_PATH,
3473 NULL
3474 };
3475
3476 static const char* user_env_generator_binary_paths[] = {
3477 "/run/systemd/user-environment-generators",
3478 "/etc/systemd/user-environment-generators",
3479 "/usr/local/lib/systemd/user-environment-generators",
3480 USER_ENV_GENERATOR_PATH,
3481 NULL
3482 };
3483
3484 static int manager_run_environment_generators(Manager *m) {
3485 char **tmp = NULL; /* this is only used in the forked process, no cleanup here */
3486 const char **paths;
3487 void* args[] = {&tmp, &tmp, &m->environment};
3488
3489 if (m->test_run_flags && !(m->test_run_flags & MANAGER_TEST_RUN_ENV_GENERATORS))
3490 return 0;
3491
3492 paths = MANAGER_IS_SYSTEM(m) ? system_env_generator_binary_paths : user_env_generator_binary_paths;
3493
3494 if (!generator_path_any(paths))
3495 return 0;
3496
3497 return execute_directories(paths, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL);
3498 }
3499
3500 static int manager_run_generators(Manager *m) {
3501 _cleanup_strv_free_ char **paths = NULL;
3502 const char *argv[5];
3503 int r;
3504
3505 assert(m);
3506
3507 if (m->test_run_flags && !(m->test_run_flags & MANAGER_TEST_RUN_GENERATORS))
3508 return 0;
3509
3510 paths = generator_binary_paths(m->unit_file_scope);
3511 if (!paths)
3512 return log_oom();
3513
3514 if (!generator_path_any((const char* const*) paths))
3515 return 0;
3516
3517 r = lookup_paths_mkdir_generator(&m->lookup_paths);
3518 if (r < 0)
3519 goto finish;
3520
3521 argv[0] = NULL; /* Leave this empty, execute_directory() will fill something in */
3522 argv[1] = m->lookup_paths.generator;
3523 argv[2] = m->lookup_paths.generator_early;
3524 argv[3] = m->lookup_paths.generator_late;
3525 argv[4] = NULL;
3526
3527 RUN_WITH_UMASK(0022)
3528 execute_directories((const char* const*) paths, DEFAULT_TIMEOUT_USEC,
3529 NULL, NULL, (char**) argv);
3530
3531 finish:
3532 lookup_paths_trim_generator(&m->lookup_paths);
3533 return r;
3534 }
3535
3536 int manager_environment_add(Manager *m, char **minus, char **plus) {
3537 char **a = NULL, **b = NULL, **l;
3538 assert(m);
3539
3540 l = m->environment;
3541
3542 if (!strv_isempty(minus)) {
3543 a = strv_env_delete(l, 1, minus);
3544 if (!a)
3545 return -ENOMEM;
3546
3547 l = a;
3548 }
3549
3550 if (!strv_isempty(plus)) {
3551 b = strv_env_merge(2, l, plus);
3552 if (!b) {
3553 strv_free(a);
3554 return -ENOMEM;
3555 }
3556
3557 l = b;
3558 }
3559
3560 if (m->environment != l)
3561 strv_free(m->environment);
3562 if (a != l)
3563 strv_free(a);
3564 if (b != l)
3565 strv_free(b);
3566
3567 m->environment = l;
3568 manager_sanitize_environment(m);
3569
3570 return 0;
3571 }
3572
3573 int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) {
3574 int i;
3575
3576 assert(m);
3577
3578 for (i = 0; i < _RLIMIT_MAX; i++) {
3579 m->rlimit[i] = mfree(m->rlimit[i]);
3580
3581 if (!default_rlimit[i])
3582 continue;
3583
3584 m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1);
3585 if (!m->rlimit[i])
3586 return log_oom();
3587 }
3588
3589 return 0;
3590 }
3591
3592 void manager_recheck_dbus(Manager *m) {
3593 assert(m);
3594
3595 /* Connects to the bus if the dbus service and socket are running. If we are running in user mode this is all
3596 * it does. In system mode we'll also connect to the system bus (which will most likely just reuse the
3597 * connection of the API bus). That's because the system bus after all runs as service of the system instance,
3598 * while in the user instance we can assume it's already there. */
3599
3600 if (manager_dbus_is_running(m, false)) {
3601 (void) bus_init_api(m);
3602
3603 if (MANAGER_IS_SYSTEM(m))
3604 (void) bus_init_system(m);
3605 } else {
3606 (void) bus_done_api(m);
3607
3608 if (MANAGER_IS_SYSTEM(m))
3609 (void) bus_done_system(m);
3610 }
3611 }
3612
3613 static bool manager_journal_is_running(Manager *m) {
3614 Unit *u;
3615
3616 assert(m);
3617
3618 if (m->test_run_flags != 0)
3619 return false;
3620
3621 /* If we are the user manager we can safely assume that the journal is up */
3622 if (!MANAGER_IS_SYSTEM(m))
3623 return true;
3624
3625 /* Check that the socket is not only up, but in RUNNING state */
3626 u = manager_get_unit(m, SPECIAL_JOURNALD_SOCKET);
3627 if (!u)
3628 return false;
3629 if (SOCKET(u)->state != SOCKET_RUNNING)
3630 return false;
3631
3632 /* Similar, check if the daemon itself is fully up, too */
3633 u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE);
3634 if (!u)
3635 return false;
3636 if (!IN_SET(SERVICE(u)->state, SERVICE_RELOAD, SERVICE_RUNNING))
3637 return false;
3638
3639 return true;
3640 }
3641
3642 void manager_recheck_journal(Manager *m) {
3643
3644 assert(m);
3645
3646 /* Don't bother with this unless we are in the special situation of being PID 1 */
3647 if (getpid_cached() != 1)
3648 return;
3649
3650 /* The journal is fully and entirely up? If so, let's permit logging to it, if that's configured. If the
3651 * journal is down, don't ever log to it, otherwise we might end up deadlocking ourselves as we might trigger
3652 * an activation ourselves we can't fulfill. */
3653 log_set_prohibit_ipc(!manager_journal_is_running(m));
3654 log_open();
3655 }
3656
3657 void manager_set_show_status(Manager *m, ShowStatus mode) {
3658 assert(m);
3659 assert(IN_SET(mode, SHOW_STATUS_AUTO, SHOW_STATUS_NO, SHOW_STATUS_YES, SHOW_STATUS_TEMPORARY));
3660
3661 if (!MANAGER_IS_SYSTEM(m))
3662 return;
3663
3664 if (m->show_status != mode)
3665 log_debug("%s showing of status.",
3666 mode == SHOW_STATUS_NO ? "Disabling" : "Enabling");
3667 m->show_status = mode;
3668
3669 if (mode > 0)
3670 (void) touch("/run/systemd/show-status");
3671 else
3672 (void) unlink("/run/systemd/show-status");
3673 }
3674
3675 static bool manager_get_show_status(Manager *m, StatusType type) {
3676 assert(m);
3677
3678 if (!MANAGER_IS_SYSTEM(m))
3679 return false;
3680
3681 if (m->no_console_output)
3682 return false;
3683
3684 if (!IN_SET(manager_state(m), MANAGER_INITIALIZING, MANAGER_STARTING, MANAGER_STOPPING))
3685 return false;
3686
3687 /* If we cannot find out the status properly, just proceed. */
3688 if (type != STATUS_TYPE_EMERGENCY && manager_check_ask_password(m) > 0)
3689 return false;
3690
3691 return m->show_status > 0;
3692 }
3693
3694 const char *manager_get_confirm_spawn(Manager *m) {
3695 static int last_errno = 0;
3696 const char *vc = m->confirm_spawn;
3697 struct stat st;
3698 int r;
3699
3700 /* Here's the deal: we want to test the validity of the console but don't want
3701 * PID1 to go through the whole console process which might block. But we also
3702 * want to warn the user only once if something is wrong with the console so we
3703 * cannot do the sanity checks after spawning our children. So here we simply do
3704 * really basic tests to hopefully trap common errors.
3705 *
3706 * If the console suddenly disappear at the time our children will really it
3707 * then they will simply fail to acquire it and a positive answer will be
3708 * assumed. New children will fallback to /dev/console though.
3709 *
3710 * Note: TTYs are devices that can come and go any time, and frequently aren't
3711 * available yet during early boot (consider a USB rs232 dongle...). If for any
3712 * reason the configured console is not ready, we fallback to the default
3713 * console. */
3714
3715 if (!vc || path_equal(vc, "/dev/console"))
3716 return vc;
3717
3718 r = stat(vc, &st);
3719 if (r < 0)
3720 goto fail;
3721
3722 if (!S_ISCHR(st.st_mode)) {
3723 errno = ENOTTY;
3724 goto fail;
3725 }
3726
3727 last_errno = 0;
3728 return vc;
3729 fail:
3730 if (last_errno != errno) {
3731 last_errno = errno;
3732 log_warning_errno(errno, "Failed to open %s: %m, using default console", vc);
3733 }
3734 return "/dev/console";
3735 }
3736
3737 void manager_set_first_boot(Manager *m, bool b) {
3738 assert(m);
3739
3740 if (!MANAGER_IS_SYSTEM(m))
3741 return;
3742
3743 if (m->first_boot != (int) b) {
3744 if (b)
3745 (void) touch("/run/systemd/first-boot");
3746 else
3747 (void) unlink("/run/systemd/first-boot");
3748 }
3749
3750 m->first_boot = b;
3751 }
3752
3753 void manager_disable_confirm_spawn(void) {
3754 (void) touch("/run/systemd/confirm_spawn_disabled");
3755 }
3756
3757 bool manager_is_confirm_spawn_disabled(Manager *m) {
3758 if (!m->confirm_spawn)
3759 return true;
3760
3761 return access("/run/systemd/confirm_spawn_disabled", F_OK) >= 0;
3762 }
3763
3764 void manager_status_printf(Manager *m, StatusType type, const char *status, const char *format, ...) {
3765 va_list ap;
3766
3767 /* If m is NULL, assume we're after shutdown and let the messages through. */
3768
3769 if (m && !manager_get_show_status(m, type))
3770 return;
3771
3772 /* XXX We should totally drop the check for ephemeral here
3773 * and thus effectively make 'Type=idle' pointless. */
3774 if (type == STATUS_TYPE_EPHEMERAL && m && m->n_on_console > 0)
3775 return;
3776
3777 va_start(ap, format);
3778 status_vprintf(status, true, type == STATUS_TYPE_EPHEMERAL, format, ap);
3779 va_end(ap);
3780 }
3781
3782 Set *manager_get_units_requiring_mounts_for(Manager *m, const char *path) {
3783 char p[strlen(path)+1];
3784
3785 assert(m);
3786 assert(path);
3787
3788 strcpy(p, path);
3789 path_kill_slashes(p);
3790
3791 return hashmap_get(m->units_requiring_mounts_for, streq(p, "/") ? "" : p);
3792 }
3793
3794 int manager_update_failed_units(Manager *m, Unit *u, bool failed) {
3795 unsigned size;
3796 int r;
3797
3798 assert(m);
3799 assert(u->manager == m);
3800
3801 size = set_size(m->failed_units);
3802
3803 if (failed) {
3804 r = set_ensure_allocated(&m->failed_units, NULL);
3805 if (r < 0)
3806 return log_oom();
3807
3808 if (set_put(m->failed_units, u) < 0)
3809 return log_oom();
3810 } else
3811 (void) set_remove(m->failed_units, u);
3812
3813 if (set_size(m->failed_units) != size)
3814 bus_manager_send_change_signal(m);
3815
3816 return 0;
3817 }
3818
3819 ManagerState manager_state(Manager *m) {
3820 Unit *u;
3821
3822 assert(m);
3823
3824 /* Did we ever finish booting? If not then we are still starting up */
3825 if (!MANAGER_IS_FINISHED(m)) {
3826
3827 u = manager_get_unit(m, SPECIAL_BASIC_TARGET);
3828 if (!u || !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
3829 return MANAGER_INITIALIZING;
3830
3831 return MANAGER_STARTING;
3832 }
3833
3834 /* Is the special shutdown target active or queued? If so, we are in shutdown state */
3835 u = manager_get_unit(m, SPECIAL_SHUTDOWN_TARGET);
3836 if (u && unit_active_or_pending(u))
3837 return MANAGER_STOPPING;
3838
3839 if (MANAGER_IS_SYSTEM(m)) {
3840 /* Are the rescue or emergency targets active or queued? If so we are in maintenance state */
3841 u = manager_get_unit(m, SPECIAL_RESCUE_TARGET);
3842 if (u && unit_active_or_pending(u))
3843 return MANAGER_MAINTENANCE;
3844
3845 u = manager_get_unit(m, SPECIAL_EMERGENCY_TARGET);
3846 if (u && unit_active_or_pending(u))
3847 return MANAGER_MAINTENANCE;
3848 }
3849
3850 /* Are there any failed units? If so, we are in degraded mode */
3851 if (set_size(m->failed_units) > 0)
3852 return MANAGER_DEGRADED;
3853
3854 return MANAGER_RUNNING;
3855 }
3856
3857 #define DESTROY_IPC_FLAG (UINT32_C(1) << 31)
3858
3859 static void manager_unref_uid_internal(
3860 Manager *m,
3861 Hashmap **uid_refs,
3862 uid_t uid,
3863 bool destroy_now,
3864 int (*_clean_ipc)(uid_t uid)) {
3865
3866 uint32_t c, n;
3867
3868 assert(m);
3869 assert(uid_refs);
3870 assert(uid_is_valid(uid));
3871 assert(_clean_ipc);
3872
3873 /* A generic implementation, covering both manager_unref_uid() and manager_unref_gid(), under the assumption
3874 * that uid_t and gid_t are actually defined the same way, with the same validity rules.
3875 *
3876 * We store a hashmap where the UID/GID is they key and the value is a 32bit reference counter, whose highest
3877 * bit is used as flag for marking UIDs/GIDs whose IPC objects to remove when the last reference to the UID/GID
3878 * is dropped. The flag is set to on, once at least one reference from a unit where RemoveIPC= is set is added
3879 * on a UID/GID. It is reset when the UID's/GID's reference counter drops to 0 again. */
3880
3881 assert_cc(sizeof(uid_t) == sizeof(gid_t));
3882 assert_cc(UID_INVALID == (uid_t) GID_INVALID);
3883
3884 if (uid == 0) /* We don't keep track of root, and will never destroy it */
3885 return;
3886
3887 c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
3888
3889 n = c & ~DESTROY_IPC_FLAG;
3890 assert(n > 0);
3891 n--;
3892
3893 if (destroy_now && n == 0) {
3894 hashmap_remove(*uid_refs, UID_TO_PTR(uid));
3895
3896 if (c & DESTROY_IPC_FLAG) {
3897 log_debug("%s " UID_FMT " is no longer referenced, cleaning up its IPC.",
3898 _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
3899 uid);
3900 (void) _clean_ipc(uid);
3901 }
3902 } else {
3903 c = n | (c & DESTROY_IPC_FLAG);
3904 assert_se(hashmap_update(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)) >= 0);
3905 }
3906 }
3907
3908 void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now) {
3909 manager_unref_uid_internal(m, &m->uid_refs, uid, destroy_now, clean_ipc_by_uid);
3910 }
3911
3912 void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now) {
3913 manager_unref_uid_internal(m, &m->gid_refs, (uid_t) gid, destroy_now, clean_ipc_by_gid);
3914 }
3915
3916 static int manager_ref_uid_internal(
3917 Manager *m,
3918 Hashmap **uid_refs,
3919 uid_t uid,
3920 bool clean_ipc) {
3921
3922 uint32_t c, n;
3923 int r;
3924
3925 assert(m);
3926 assert(uid_refs);
3927 assert(uid_is_valid(uid));
3928
3929 /* A generic implementation, covering both manager_ref_uid() and manager_ref_gid(), under the assumption
3930 * that uid_t and gid_t are actually defined the same way, with the same validity rules. */
3931
3932 assert_cc(sizeof(uid_t) == sizeof(gid_t));
3933 assert_cc(UID_INVALID == (uid_t) GID_INVALID);
3934
3935 if (uid == 0) /* We don't keep track of root, and will never destroy it */
3936 return 0;
3937
3938 r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
3939 if (r < 0)
3940 return r;
3941
3942 c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
3943
3944 n = c & ~DESTROY_IPC_FLAG;
3945 n++;
3946
3947 if (n & DESTROY_IPC_FLAG) /* check for overflow */
3948 return -EOVERFLOW;
3949
3950 c = n | (c & DESTROY_IPC_FLAG) | (clean_ipc ? DESTROY_IPC_FLAG : 0);
3951
3952 return hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
3953 }
3954
3955 int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc) {
3956 return manager_ref_uid_internal(m, &m->uid_refs, uid, clean_ipc);
3957 }
3958
3959 int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc) {
3960 return manager_ref_uid_internal(m, &m->gid_refs, (uid_t) gid, clean_ipc);
3961 }
3962
3963 static void manager_vacuum_uid_refs_internal(
3964 Manager *m,
3965 Hashmap **uid_refs,
3966 int (*_clean_ipc)(uid_t uid)) {
3967
3968 Iterator i;
3969 void *p, *k;
3970
3971 assert(m);
3972 assert(uid_refs);
3973 assert(_clean_ipc);
3974
3975 HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
3976 uint32_t c, n;
3977 uid_t uid;
3978
3979 uid = PTR_TO_UID(k);
3980 c = PTR_TO_UINT32(p);
3981
3982 n = c & ~DESTROY_IPC_FLAG;
3983 if (n > 0)
3984 continue;
3985
3986 if (c & DESTROY_IPC_FLAG) {
3987 log_debug("Found unreferenced %s " UID_FMT " after reload/reexec. Cleaning up.",
3988 _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
3989 uid);
3990 (void) _clean_ipc(uid);
3991 }
3992
3993 assert_se(hashmap_remove(*uid_refs, k) == p);
3994 }
3995 }
3996
3997 void manager_vacuum_uid_refs(Manager *m) {
3998 manager_vacuum_uid_refs_internal(m, &m->uid_refs, clean_ipc_by_uid);
3999 }
4000
4001 void manager_vacuum_gid_refs(Manager *m) {
4002 manager_vacuum_uid_refs_internal(m, &m->gid_refs, clean_ipc_by_gid);
4003 }
4004
4005 static void manager_serialize_uid_refs_internal(
4006 Manager *m,
4007 FILE *f,
4008 Hashmap **uid_refs,
4009 const char *field_name) {
4010
4011 Iterator i;
4012 void *p, *k;
4013
4014 assert(m);
4015 assert(f);
4016 assert(uid_refs);
4017 assert(field_name);
4018
4019 /* Serialize the UID reference table. Or actually, just the IPC destruction flag of it, as the actual counter
4020 * of it is better rebuild after a reload/reexec. */
4021
4022 HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
4023 uint32_t c;
4024 uid_t uid;
4025
4026 uid = PTR_TO_UID(k);
4027 c = PTR_TO_UINT32(p);
4028
4029 if (!(c & DESTROY_IPC_FLAG))
4030 continue;
4031
4032 fprintf(f, "%s=" UID_FMT "\n", field_name, uid);
4033 }
4034 }
4035
4036 void manager_serialize_uid_refs(Manager *m, FILE *f) {
4037 manager_serialize_uid_refs_internal(m, f, &m->uid_refs, "destroy-ipc-uid");
4038 }
4039
4040 void manager_serialize_gid_refs(Manager *m, FILE *f) {
4041 manager_serialize_uid_refs_internal(m, f, &m->gid_refs, "destroy-ipc-gid");
4042 }
4043
4044 static void manager_deserialize_uid_refs_one_internal(
4045 Manager *m,
4046 Hashmap** uid_refs,
4047 const char *value) {
4048
4049 uid_t uid;
4050 uint32_t c;
4051 int r;
4052
4053 assert(m);
4054 assert(uid_refs);
4055 assert(value);
4056
4057 r = parse_uid(value, &uid);
4058 if (r < 0 || uid == 0) {
4059 log_debug("Unable to parse UID reference serialization");
4060 return;
4061 }
4062
4063 r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
4064 if (r < 0) {
4065 log_oom();
4066 return;
4067 }
4068
4069 c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
4070 if (c & DESTROY_IPC_FLAG)
4071 return;
4072
4073 c |= DESTROY_IPC_FLAG;
4074
4075 r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
4076 if (r < 0) {
4077 log_debug("Failed to add UID reference entry");
4078 return;
4079 }
4080 }
4081
4082 void manager_deserialize_uid_refs_one(Manager *m, const char *value) {
4083 manager_deserialize_uid_refs_one_internal(m, &m->uid_refs, value);
4084 }
4085
4086 void manager_deserialize_gid_refs_one(Manager *m, const char *value) {
4087 manager_deserialize_uid_refs_one_internal(m, &m->gid_refs, value);
4088 }
4089
4090 int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
4091 struct buffer {
4092 uid_t uid;
4093 gid_t gid;
4094 char unit_name[UNIT_NAME_MAX+1];
4095 } _packed_ buffer;
4096
4097 Manager *m = userdata;
4098 ssize_t l;
4099 size_t n;
4100 Unit *u;
4101
4102 assert_se(source);
4103 assert_se(m);
4104
4105 /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the resulting UID/GID
4106 * in a datagram. We parse the datagram here and pass it off to the unit, so that it can add a reference to the
4107 * UID/GID so that it can destroy the UID/GID's IPC objects when the reference counter drops to 0. */
4108
4109 l = recv(fd, &buffer, sizeof(buffer), MSG_DONTWAIT);
4110 if (l < 0) {
4111 if (IN_SET(errno, EINTR, EAGAIN))
4112 return 0;
4113
4114 return log_error_errno(errno, "Failed to read from user lookup fd: %m");
4115 }
4116
4117 if ((size_t) l <= offsetof(struct buffer, unit_name)) {
4118 log_warning("Received too short user lookup message, ignoring.");
4119 return 0;
4120 }
4121
4122 if ((size_t) l > offsetof(struct buffer, unit_name) + UNIT_NAME_MAX) {
4123 log_warning("Received too long user lookup message, ignoring.");
4124 return 0;
4125 }
4126
4127 if (!uid_is_valid(buffer.uid) && !gid_is_valid(buffer.gid)) {
4128 log_warning("Got user lookup message with invalid UID/GID pair, ignoring.");
4129 return 0;
4130 }
4131
4132 n = (size_t) l - offsetof(struct buffer, unit_name);
4133 if (memchr(buffer.unit_name, 0, n)) {
4134 log_warning("Received lookup message with embedded NUL character, ignoring.");
4135 return 0;
4136 }
4137
4138 buffer.unit_name[n] = 0;
4139 u = manager_get_unit(m, buffer.unit_name);
4140 if (!u) {
4141 log_debug("Got user lookup message but unit doesn't exist, ignoring.");
4142 return 0;
4143 }
4144
4145 log_unit_debug(u, "User lookup succeeded: uid=" UID_FMT " gid=" GID_FMT, buffer.uid, buffer.gid);
4146
4147 unit_notify_user_lookup(u, buffer.uid, buffer.gid);
4148 return 0;
4149 }
4150
4151 char *manager_taint_string(Manager *m) {
4152 _cleanup_free_ char *destination = NULL, *overflowuid = NULL, *overflowgid = NULL;
4153 char *buf, *e;
4154 int r;
4155
4156 /* Returns a "taint string", e.g. "local-hwclock:var-run-bad".
4157 * Only things that are detected at runtime should be tagged
4158 * here. For stuff that is set during compilation, emit a warning
4159 * in the configuration phase. */
4160
4161 assert(m);
4162
4163 buf = new(char, sizeof("split-usr:"
4164 "cgroups-missing:"
4165 "local-hwclock:"
4166 "var-run-bad:"
4167 "overflowuid-not-65534:"
4168 "overflowgid-not-65534:"));
4169 if (!buf)
4170 return NULL;
4171
4172 e = buf;
4173 buf[0] = 0;
4174
4175 if (m->taint_usr)
4176 e = stpcpy(e, "split-usr:");
4177
4178 if (access("/proc/cgroups", F_OK) < 0)
4179 e = stpcpy(e, "cgroups-missing:");
4180
4181 if (clock_is_localtime(NULL) > 0)
4182 e = stpcpy(e, "local-hwclock:");
4183
4184 r = readlink_malloc("/var/run", &destination);
4185 if (r < 0 || !PATH_IN_SET(destination, "../run", "/run"))
4186 e = stpcpy(e, "var-run-bad:");
4187
4188 r = read_one_line_file("/proc/sys/kernel/overflowuid", &overflowuid);
4189 if (r >= 0 && !streq(overflowuid, "65534"))
4190 e = stpcpy(e, "overflowuid-not-65534:");
4191
4192 r = read_one_line_file("/proc/sys/kernel/overflowgid", &overflowgid);
4193 if (r >= 0 && !streq(overflowgid, "65534"))
4194 e = stpcpy(e, "overflowgid-not-65534:");
4195
4196 /* remove the last ':' */
4197 if (e != buf)
4198 e[-1] = 0;
4199
4200 return buf;
4201 }
4202
4203 void manager_ref_console(Manager *m) {
4204 assert(m);
4205
4206 m->n_on_console++;
4207 }
4208
4209 void manager_unref_console(Manager *m) {
4210
4211 assert(m->n_on_console > 0);
4212 m->n_on_console--;
4213
4214 if (m->n_on_console == 0)
4215 m->no_console_output = false; /* unset no_console_output flag, since the console is definitely free now */
4216 }
4217
4218 static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
4219 [MANAGER_INITIALIZING] = "initializing",
4220 [MANAGER_STARTING] = "starting",
4221 [MANAGER_RUNNING] = "running",
4222 [MANAGER_DEGRADED] = "degraded",
4223 [MANAGER_MAINTENANCE] = "maintenance",
4224 [MANAGER_STOPPING] = "stopping",
4225 };
4226
4227 DEFINE_STRING_TABLE_LOOKUP(manager_state, ManagerState);
4228
4229 static const char *const manager_timestamp_table[_MANAGER_TIMESTAMP_MAX] = {
4230 [MANAGER_TIMESTAMP_FIRMWARE] = "firmware",
4231 [MANAGER_TIMESTAMP_LOADER] = "loader",
4232 [MANAGER_TIMESTAMP_KERNEL] = "kernel",
4233 [MANAGER_TIMESTAMP_INITRD] = "initrd",
4234 [MANAGER_TIMESTAMP_USERSPACE] = "userspace",
4235 [MANAGER_TIMESTAMP_FINISH] = "finish",
4236 [MANAGER_TIMESTAMP_SECURITY_START] = "security-start",
4237 [MANAGER_TIMESTAMP_SECURITY_FINISH] = "security-finish",
4238 [MANAGER_TIMESTAMP_GENERATORS_START] = "generators-start",
4239 [MANAGER_TIMESTAMP_GENERATORS_FINISH] = "generators-finish",
4240 [MANAGER_TIMESTAMP_UNITS_LOAD_START] = "units-load-start",
4241 [MANAGER_TIMESTAMP_UNITS_LOAD_FINISH] = "units-load-finish",
4242 };
4243
4244 DEFINE_STRING_TABLE_LOOKUP(manager_timestamp, ManagerTimestamp);