]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/manager.c
manager: make use of pid_is_valid() where appropriate
[thirdparty/systemd.git] / src / core / manager.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <linux/kd.h>
24 #include <signal.h>
25 #include <stdio_ext.h>
26 #include <string.h>
27 #include <sys/epoll.h>
28 #include <sys/inotify.h>
29 #include <sys/ioctl.h>
30 #include <sys/reboot.h>
31 #include <sys/timerfd.h>
32 #include <sys/wait.h>
33 #include <unistd.h>
34
35 #if HAVE_AUDIT
36 #include <libaudit.h>
37 #endif
38
39 #include "sd-daemon.h"
40 #include "sd-messages.h"
41 #include "sd-path.h"
42
43 #include "alloc-util.h"
44 #include "audit-fd.h"
45 #include "boot-timestamps.h"
46 #include "bus-common-errors.h"
47 #include "bus-error.h"
48 #include "bus-kernel.h"
49 #include "bus-util.h"
50 #include "clean-ipc.h"
51 #include "clock-util.h"
52 #include "dbus-job.h"
53 #include "dbus-manager.h"
54 #include "dbus-unit.h"
55 #include "dbus.h"
56 #include "dirent-util.h"
57 #include "env-util.h"
58 #include "escape.h"
59 #include "exec-util.h"
60 #include "execute.h"
61 #include "exit-status.h"
62 #include "fd-util.h"
63 #include "fileio.h"
64 #include "fs-util.h"
65 #include "hashmap.h"
66 #include "io-util.h"
67 #include "label.h"
68 #include "locale-setup.h"
69 #include "log.h"
70 #include "macro.h"
71 #include "manager.h"
72 #include "missing.h"
73 #include "mkdir.h"
74 #include "parse-util.h"
75 #include "path-lookup.h"
76 #include "path-util.h"
77 #include "process-util.h"
78 #include "ratelimit.h"
79 #include "rm-rf.h"
80 #include "signal-util.h"
81 #include "special.h"
82 #include "stat-util.h"
83 #include "string-table.h"
84 #include "string-util.h"
85 #include "strv.h"
86 #include "terminal-util.h"
87 #include "time-util.h"
88 #include "transaction.h"
89 #include "umask-util.h"
90 #include "unit-name.h"
91 #include "user-util.h"
92 #include "util.h"
93 #include "virt.h"
94 #include "watchdog.h"
95
96 #define NOTIFY_RCVBUF_SIZE (8*1024*1024)
97 #define CGROUPS_AGENT_RCVBUF_SIZE (8*1024*1024)
98
99 /* Initial delay and the interval for printing status messages about running jobs */
100 #define JOBS_IN_PROGRESS_WAIT_USEC (5*USEC_PER_SEC)
101 #define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
102 #define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
103
104 static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
105 static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
106 static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
107 static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
108 static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
109 static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
110 static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
111 static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
112 static int manager_run_environment_generators(Manager *m);
113 static int manager_run_generators(Manager *m);
114
115 static void manager_watch_jobs_in_progress(Manager *m) {
116 usec_t next;
117 int r;
118
119 assert(m);
120
121 /* We do not want to show the cylon animation if the user
122 * needs to confirm service executions otherwise confirmation
123 * messages will be screwed by the cylon animation. */
124 if (!manager_is_confirm_spawn_disabled(m))
125 return;
126
127 if (m->jobs_in_progress_event_source)
128 return;
129
130 next = now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC;
131 r = sd_event_add_time(
132 m->event,
133 &m->jobs_in_progress_event_source,
134 CLOCK_MONOTONIC,
135 next, 0,
136 manager_dispatch_jobs_in_progress, m);
137 if (r < 0)
138 return;
139
140 (void) sd_event_source_set_description(m->jobs_in_progress_event_source, "manager-jobs-in-progress");
141 }
142
143 #define CYLON_BUFFER_EXTRA (2*STRLEN(ANSI_RED) + STRLEN(ANSI_HIGHLIGHT_RED) + 2*STRLEN(ANSI_NORMAL))
144
145 static void draw_cylon(char buffer[], size_t buflen, unsigned width, unsigned pos) {
146 char *p = buffer;
147
148 assert(buflen >= CYLON_BUFFER_EXTRA + width + 1);
149 assert(pos <= width+1); /* 0 or width+1 mean that the center light is behind the corner */
150
151 if (pos > 1) {
152 if (pos > 2)
153 p = mempset(p, ' ', pos-2);
154 if (log_get_show_color())
155 p = stpcpy(p, ANSI_RED);
156 *p++ = '*';
157 }
158
159 if (pos > 0 && pos <= width) {
160 if (log_get_show_color())
161 p = stpcpy(p, ANSI_HIGHLIGHT_RED);
162 *p++ = '*';
163 }
164
165 if (log_get_show_color())
166 p = stpcpy(p, ANSI_NORMAL);
167
168 if (pos < width) {
169 if (log_get_show_color())
170 p = stpcpy(p, ANSI_RED);
171 *p++ = '*';
172 if (pos < width-1)
173 p = mempset(p, ' ', width-1-pos);
174 if (log_get_show_color())
175 strcpy(p, ANSI_NORMAL);
176 }
177 }
178
179 void manager_flip_auto_status(Manager *m, bool enable) {
180 assert(m);
181
182 if (enable) {
183 if (m->show_status == SHOW_STATUS_AUTO)
184 manager_set_show_status(m, SHOW_STATUS_TEMPORARY);
185 } else {
186 if (m->show_status == SHOW_STATUS_TEMPORARY)
187 manager_set_show_status(m, SHOW_STATUS_AUTO);
188 }
189 }
190
191 static void manager_print_jobs_in_progress(Manager *m) {
192 _cleanup_free_ char *job_of_n = NULL;
193 Iterator i;
194 Job *j;
195 unsigned counter = 0, print_nr;
196 char cylon[6 + CYLON_BUFFER_EXTRA + 1];
197 unsigned cylon_pos;
198 char time[FORMAT_TIMESPAN_MAX], limit[FORMAT_TIMESPAN_MAX] = "no limit";
199 uint64_t x;
200
201 assert(m);
202 assert(m->n_running_jobs > 0);
203
204 manager_flip_auto_status(m, true);
205
206 print_nr = (m->jobs_in_progress_iteration / JOBS_IN_PROGRESS_PERIOD_DIVISOR) % m->n_running_jobs;
207
208 HASHMAP_FOREACH(j, m->jobs, i)
209 if (j->state == JOB_RUNNING && counter++ == print_nr)
210 break;
211
212 /* m->n_running_jobs must be consistent with the contents of m->jobs,
213 * so the above loop must have succeeded in finding j. */
214 assert(counter == print_nr + 1);
215 assert(j);
216
217 cylon_pos = m->jobs_in_progress_iteration % 14;
218 if (cylon_pos >= 8)
219 cylon_pos = 14 - cylon_pos;
220 draw_cylon(cylon, sizeof(cylon), 6, cylon_pos);
221
222 m->jobs_in_progress_iteration++;
223
224 if (m->n_running_jobs > 1) {
225 if (asprintf(&job_of_n, "(%u of %u) ", counter, m->n_running_jobs) < 0)
226 job_of_n = NULL;
227 }
228
229 format_timespan(time, sizeof(time), now(CLOCK_MONOTONIC) - j->begin_usec, 1*USEC_PER_SEC);
230 if (job_get_timeout(j, &x) > 0)
231 format_timespan(limit, sizeof(limit), x - j->begin_usec, 1*USEC_PER_SEC);
232
233 manager_status_printf(m, STATUS_TYPE_EPHEMERAL, cylon,
234 "%sA %s job is running for %s (%s / %s)",
235 strempty(job_of_n),
236 job_type_to_string(j->type),
237 unit_description(j->unit),
238 time, limit);
239 }
240
241 static int have_ask_password(void) {
242 _cleanup_closedir_ DIR *dir;
243 struct dirent *de;
244
245 dir = opendir("/run/systemd/ask-password");
246 if (!dir) {
247 if (errno == ENOENT)
248 return false;
249 else
250 return -errno;
251 }
252
253 FOREACH_DIRENT_ALL(de, dir, return -errno) {
254 if (startswith(de->d_name, "ask."))
255 return true;
256 }
257 return false;
258 }
259
260 static int manager_dispatch_ask_password_fd(sd_event_source *source,
261 int fd, uint32_t revents, void *userdata) {
262 Manager *m = userdata;
263
264 assert(m);
265
266 (void) flush_fd(fd);
267
268 m->have_ask_password = have_ask_password();
269 if (m->have_ask_password < 0)
270 /* Log error but continue. Negative have_ask_password
271 * is treated as unknown status. */
272 log_error_errno(m->have_ask_password, "Failed to list /run/systemd/ask-password: %m");
273
274 return 0;
275 }
276
277 static void manager_close_ask_password(Manager *m) {
278 assert(m);
279
280 m->ask_password_event_source = sd_event_source_unref(m->ask_password_event_source);
281 m->ask_password_inotify_fd = safe_close(m->ask_password_inotify_fd);
282 m->have_ask_password = -EINVAL;
283 }
284
285 static int manager_check_ask_password(Manager *m) {
286 int r;
287
288 assert(m);
289
290 if (!m->ask_password_event_source) {
291 assert(m->ask_password_inotify_fd < 0);
292
293 mkdir_p_label("/run/systemd/ask-password", 0755);
294
295 m->ask_password_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
296 if (m->ask_password_inotify_fd < 0)
297 return log_error_errno(errno, "inotify_init1() failed: %m");
298
299 if (inotify_add_watch(m->ask_password_inotify_fd, "/run/systemd/ask-password", IN_CREATE|IN_DELETE|IN_MOVE) < 0) {
300 log_error_errno(errno, "Failed to add watch on /run/systemd/ask-password: %m");
301 manager_close_ask_password(m);
302 return -errno;
303 }
304
305 r = sd_event_add_io(m->event, &m->ask_password_event_source,
306 m->ask_password_inotify_fd, EPOLLIN,
307 manager_dispatch_ask_password_fd, m);
308 if (r < 0) {
309 log_error_errno(errno, "Failed to add event source for /run/systemd/ask-password: %m");
310 manager_close_ask_password(m);
311 return -errno;
312 }
313
314 (void) sd_event_source_set_description(m->ask_password_event_source, "manager-ask-password");
315
316 /* Queries might have been added meanwhile... */
317 manager_dispatch_ask_password_fd(m->ask_password_event_source,
318 m->ask_password_inotify_fd, EPOLLIN, m);
319 }
320
321 return m->have_ask_password;
322 }
323
324 static int manager_watch_idle_pipe(Manager *m) {
325 int r;
326
327 assert(m);
328
329 if (m->idle_pipe_event_source)
330 return 0;
331
332 if (m->idle_pipe[2] < 0)
333 return 0;
334
335 r = sd_event_add_io(m->event, &m->idle_pipe_event_source, m->idle_pipe[2], EPOLLIN, manager_dispatch_idle_pipe_fd, m);
336 if (r < 0)
337 return log_error_errno(r, "Failed to watch idle pipe: %m");
338
339 (void) sd_event_source_set_description(m->idle_pipe_event_source, "manager-idle-pipe");
340
341 return 0;
342 }
343
344 static void manager_close_idle_pipe(Manager *m) {
345 assert(m);
346
347 m->idle_pipe_event_source = sd_event_source_unref(m->idle_pipe_event_source);
348
349 safe_close_pair(m->idle_pipe);
350 safe_close_pair(m->idle_pipe + 2);
351 }
352
353 static int manager_setup_time_change(Manager *m) {
354 int r;
355
356 /* We only care for the cancellation event, hence we set the
357 * timeout to the latest possible value. */
358 struct itimerspec its = {
359 .it_value.tv_sec = TIME_T_MAX,
360 };
361
362 assert(m);
363 assert_cc(sizeof(time_t) == sizeof(TIME_T_MAX));
364
365 if (m->test_run_flags)
366 return 0;
367
368 /* Uses TFD_TIMER_CANCEL_ON_SET to get notifications whenever
369 * CLOCK_REALTIME makes a jump relative to CLOCK_MONOTONIC */
370
371 m->time_change_fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC);
372 if (m->time_change_fd < 0)
373 return log_error_errno(errno, "Failed to create timerfd: %m");
374
375 if (timerfd_settime(m->time_change_fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its, NULL) < 0) {
376 log_debug_errno(errno, "Failed to set up TFD_TIMER_CANCEL_ON_SET, ignoring: %m");
377 m->time_change_fd = safe_close(m->time_change_fd);
378 return 0;
379 }
380
381 r = sd_event_add_io(m->event, &m->time_change_event_source, m->time_change_fd, EPOLLIN, manager_dispatch_time_change_fd, m);
382 if (r < 0)
383 return log_error_errno(r, "Failed to create time change event source: %m");
384
385 (void) sd_event_source_set_description(m->time_change_event_source, "manager-time-change");
386
387 log_debug("Set up TFD_TIMER_CANCEL_ON_SET timerfd.");
388
389 return 0;
390 }
391
392 static int enable_special_signals(Manager *m) {
393 _cleanup_close_ int fd = -1;
394
395 assert(m);
396
397 if (m->test_run_flags)
398 return 0;
399
400 /* Enable that we get SIGINT on control-alt-del. In containers
401 * this will fail with EPERM (older) or EINVAL (newer), so
402 * ignore that. */
403 if (reboot(RB_DISABLE_CAD) < 0 && !IN_SET(errno, EPERM, EINVAL))
404 log_warning_errno(errno, "Failed to enable ctrl-alt-del handling: %m");
405
406 fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC);
407 if (fd < 0) {
408 /* Support systems without virtual console */
409 if (fd != -ENOENT)
410 log_warning_errno(errno, "Failed to open /dev/tty0: %m");
411 } else {
412 /* Enable that we get SIGWINCH on kbrequest */
413 if (ioctl(fd, KDSIGACCEPT, SIGWINCH) < 0)
414 log_warning_errno(errno, "Failed to enable kbrequest handling: %m");
415 }
416
417 return 0;
418 }
419
420 static int manager_setup_signals(Manager *m) {
421 struct sigaction sa = {
422 .sa_handler = SIG_DFL,
423 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
424 };
425 sigset_t mask;
426 int r;
427
428 assert(m);
429
430 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
431
432 /* We make liberal use of realtime signals here. On
433 * Linux/glibc we have 30 of them (with the exception of Linux
434 * on hppa, see below), between SIGRTMIN+0 ... SIGRTMIN+30
435 * (aka SIGRTMAX). */
436
437 assert_se(sigemptyset(&mask) == 0);
438 sigset_add_many(&mask,
439 SIGCHLD, /* Child died */
440 SIGTERM, /* Reexecute daemon */
441 SIGHUP, /* Reload configuration */
442 SIGUSR1, /* systemd/upstart: reconnect to D-Bus */
443 SIGUSR2, /* systemd: dump status */
444 SIGINT, /* Kernel sends us this on control-alt-del */
445 SIGWINCH, /* Kernel sends us this on kbrequest (alt-arrowup) */
446 SIGPWR, /* Some kernel drivers and upsd send us this on power failure */
447
448 SIGRTMIN+0, /* systemd: start default.target */
449 SIGRTMIN+1, /* systemd: isolate rescue.target */
450 SIGRTMIN+2, /* systemd: isolate emergency.target */
451 SIGRTMIN+3, /* systemd: start halt.target */
452 SIGRTMIN+4, /* systemd: start poweroff.target */
453 SIGRTMIN+5, /* systemd: start reboot.target */
454 SIGRTMIN+6, /* systemd: start kexec.target */
455
456 /* ... space for more special targets ... */
457
458 SIGRTMIN+13, /* systemd: Immediate halt */
459 SIGRTMIN+14, /* systemd: Immediate poweroff */
460 SIGRTMIN+15, /* systemd: Immediate reboot */
461 SIGRTMIN+16, /* systemd: Immediate kexec */
462
463 /* ... space for more immediate system state changes ... */
464
465 SIGRTMIN+20, /* systemd: enable status messages */
466 SIGRTMIN+21, /* systemd: disable status messages */
467 SIGRTMIN+22, /* systemd: set log level to LOG_DEBUG */
468 SIGRTMIN+23, /* systemd: set log level to LOG_INFO */
469 SIGRTMIN+24, /* systemd: Immediate exit (--user only) */
470
471 /* .. one free signal here ... */
472
473 #if !defined(__hppa64__) && !defined(__hppa__)
474 /* Apparently Linux on hppa has fewer RT
475 * signals (SIGRTMAX is SIGRTMIN+25 there),
476 * hence let's not try to make use of them
477 * here. Since these commands are accessible
478 * by different means and only really a safety
479 * net, the missing functionality on hppa
480 * shouldn't matter. */
481
482 SIGRTMIN+26, /* systemd: set log target to journal-or-kmsg */
483 SIGRTMIN+27, /* systemd: set log target to console */
484 SIGRTMIN+28, /* systemd: set log target to kmsg */
485 SIGRTMIN+29, /* systemd: set log target to syslog-or-kmsg (obsolete) */
486
487 /* ... one free signal here SIGRTMIN+30 ... */
488 #endif
489 -1);
490 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
491
492 m->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
493 if (m->signal_fd < 0)
494 return -errno;
495
496 r = sd_event_add_io(m->event, &m->signal_event_source, m->signal_fd, EPOLLIN, manager_dispatch_signal_fd, m);
497 if (r < 0)
498 return r;
499
500 (void) sd_event_source_set_description(m->signal_event_source, "manager-signal");
501
502 /* Process signals a bit earlier than the rest of things, but later than notify_fd processing, so that the
503 * notify processing can still figure out to which process/service a message belongs, before we reap the
504 * process. Also, process this before handling cgroup notifications, so that we always collect child exit
505 * status information before detecting that there's no process in a cgroup. */
506 r = sd_event_source_set_priority(m->signal_event_source, SD_EVENT_PRIORITY_NORMAL-6);
507 if (r < 0)
508 return r;
509
510 if (MANAGER_IS_SYSTEM(m))
511 return enable_special_signals(m);
512
513 return 0;
514 }
515
516 static void manager_sanitize_environment(Manager *m) {
517 assert(m);
518
519 /* Let's remove some environment variables that we need ourselves to communicate with our clients */
520 strv_env_unset_many(
521 m->environment,
522 "EXIT_CODE",
523 "EXIT_STATUS",
524 "INVOCATION_ID",
525 "JOURNAL_STREAM",
526 "LISTEN_FDNAMES",
527 "LISTEN_FDS",
528 "LISTEN_PID",
529 "MAINPID",
530 "MANAGERPID",
531 "NOTIFY_SOCKET",
532 "REMOTE_ADDR",
533 "REMOTE_PORT",
534 "SERVICE_RESULT",
535 "WATCHDOG_PID",
536 "WATCHDOG_USEC",
537 NULL);
538
539 /* Let's order the environment alphabetically, just to make it pretty */
540 strv_sort(m->environment);
541 }
542
543 static int manager_default_environment(Manager *m) {
544 assert(m);
545
546 if (MANAGER_IS_SYSTEM(m)) {
547 /* The system manager always starts with a clean
548 * environment for its children. It does not import
549 * the kernel's or the parents' exported variables.
550 *
551 * The initial passed environment is untouched to keep
552 * /proc/self/environ valid; it is used for tagging
553 * the init process inside containers. */
554 m->environment = strv_new("PATH=" DEFAULT_PATH,
555 NULL);
556
557 /* Import locale variables LC_*= from configuration */
558 locale_setup(&m->environment);
559 } else
560 /* The user manager passes its own environment
561 * along to its children. */
562 m->environment = strv_copy(environ);
563
564 if (!m->environment)
565 return -ENOMEM;
566
567 manager_sanitize_environment(m);
568
569 return 0;
570 }
571
572 static int manager_setup_prefix(Manager *m) {
573 struct table_entry {
574 uint64_t type;
575 const char *suffix;
576 };
577
578 static const struct table_entry paths_system[_EXEC_DIRECTORY_TYPE_MAX] = {
579 [EXEC_DIRECTORY_RUNTIME] = { SD_PATH_SYSTEM_RUNTIME, NULL },
580 [EXEC_DIRECTORY_STATE] = { SD_PATH_SYSTEM_STATE_PRIVATE, NULL },
581 [EXEC_DIRECTORY_CACHE] = { SD_PATH_SYSTEM_STATE_CACHE, NULL },
582 [EXEC_DIRECTORY_LOGS] = { SD_PATH_SYSTEM_STATE_LOGS, NULL },
583 [EXEC_DIRECTORY_CONFIGURATION] = { SD_PATH_SYSTEM_CONFIGURATION, NULL },
584 };
585
586 static const struct table_entry paths_user[_EXEC_DIRECTORY_TYPE_MAX] = {
587 [EXEC_DIRECTORY_RUNTIME] = { SD_PATH_USER_RUNTIME, NULL },
588 [EXEC_DIRECTORY_STATE] = { SD_PATH_USER_CONFIGURATION, NULL },
589 [EXEC_DIRECTORY_CACHE] = { SD_PATH_USER_STATE_CACHE, NULL },
590 [EXEC_DIRECTORY_LOGS] = { SD_PATH_USER_CONFIGURATION, "log" },
591 [EXEC_DIRECTORY_CONFIGURATION] = { SD_PATH_USER_CONFIGURATION, NULL },
592 };
593
594 const struct table_entry *p;
595 ExecDirectoryType i;
596 int r;
597
598 assert(m);
599
600 if (MANAGER_IS_SYSTEM(m))
601 p = paths_system;
602 else
603 p = paths_user;
604
605 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++) {
606 r = sd_path_home(p[i].type, p[i].suffix, &m->prefix[i]);
607 if (r < 0)
608 return r;
609 }
610
611 return 0;
612 }
613
614 static int manager_setup_run_queue(Manager *m) {
615 int r;
616
617 assert(m);
618 assert(!m->run_queue_event_source);
619
620 r = sd_event_add_defer(m->event, &m->run_queue_event_source, manager_dispatch_run_queue, m);
621 if (r < 0)
622 return r;
623
624 r = sd_event_source_set_priority(m->run_queue_event_source, SD_EVENT_PRIORITY_IDLE);
625 if (r < 0)
626 return r;
627
628 r = sd_event_source_set_enabled(m->run_queue_event_source, SD_EVENT_OFF);
629 if (r < 0)
630 return r;
631
632 (void) sd_event_source_set_description(m->run_queue_event_source, "manager-run-queue");
633
634 return 0;
635 }
636
637 int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
638 Manager *m;
639 int r;
640
641 assert(_m);
642 assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER));
643
644 m = new0(Manager, 1);
645 if (!m)
646 return -ENOMEM;
647
648 m->unit_file_scope = scope;
649 m->exit_code = _MANAGER_EXIT_CODE_INVALID;
650 m->default_timer_accuracy_usec = USEC_PER_MINUTE;
651 m->default_tasks_accounting = true;
652 m->default_tasks_max = UINT64_MAX;
653 m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
654 m->default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
655 m->default_restart_usec = DEFAULT_RESTART_USEC;
656
657 #if ENABLE_EFI
658 if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
659 boot_timestamps(m->timestamps + MANAGER_TIMESTAMP_USERSPACE,
660 m->timestamps + MANAGER_TIMESTAMP_FIRMWARE,
661 m->timestamps + MANAGER_TIMESTAMP_LOADER);
662 #endif
663
664 /* Prepare log fields we can use for structured logging */
665 if (MANAGER_IS_SYSTEM(m)) {
666 m->unit_log_field = "UNIT=";
667 m->unit_log_format_string = "UNIT=%s";
668
669 m->invocation_log_field = "INVOCATION_ID=";
670 m->invocation_log_format_string = "INVOCATION_ID=%s";
671 } else {
672 m->unit_log_field = "USER_UNIT=";
673 m->unit_log_format_string = "USER_UNIT=%s";
674
675 m->invocation_log_field = "USER_INVOCATION_ID=";
676 m->invocation_log_format_string = "USER_INVOCATION_ID=%s";
677 }
678
679 m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
680
681 m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd =
682 m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd =
683 m->ask_password_inotify_fd = -1;
684
685 m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1;
686
687 m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
688
689 m->have_ask_password = -EINVAL; /* we don't know */
690 m->first_boot = -1;
691
692 m->test_run_flags = test_run_flags;
693
694 /* Reboot immediately if the user hits C-A-D more often than 7x per 2s */
695 RATELIMIT_INIT(m->ctrl_alt_del_ratelimit, 2 * USEC_PER_SEC, 7);
696
697 r = manager_default_environment(m);
698 if (r < 0)
699 goto fail;
700
701 r = hashmap_ensure_allocated(&m->units, &string_hash_ops);
702 if (r < 0)
703 goto fail;
704
705 r = hashmap_ensure_allocated(&m->jobs, NULL);
706 if (r < 0)
707 goto fail;
708
709 r = hashmap_ensure_allocated(&m->cgroup_unit, &string_hash_ops);
710 if (r < 0)
711 goto fail;
712
713 r = hashmap_ensure_allocated(&m->watch_bus, &string_hash_ops);
714 if (r < 0)
715 goto fail;
716
717 r = sd_event_default(&m->event);
718 if (r < 0)
719 goto fail;
720
721 r = manager_setup_run_queue(m);
722 if (r < 0)
723 goto fail;
724
725 r = manager_setup_signals(m);
726 if (r < 0)
727 goto fail;
728
729 r = manager_setup_cgroup(m);
730 if (r < 0)
731 goto fail;
732
733 r = manager_setup_time_change(m);
734 if (r < 0)
735 goto fail;
736
737 m->udev = udev_new();
738 if (!m->udev) {
739 r = -ENOMEM;
740 goto fail;
741 }
742
743 r = manager_setup_prefix(m);
744 if (r < 0)
745 goto fail;
746
747 if (MANAGER_IS_SYSTEM(m) && test_run_flags == 0) {
748 r = mkdir_label("/run/systemd/units", 0755);
749 if (r < 0 && r != -EEXIST)
750 goto fail;
751 }
752
753 m->taint_usr =
754 !in_initrd() &&
755 dir_is_empty("/usr") > 0;
756
757 /* Note that we do not set up the notify fd here. We do that after deserialization,
758 * since they might have gotten serialized across the reexec. */
759
760 *_m = m;
761 return 0;
762
763 fail:
764 manager_free(m);
765 return r;
766 }
767
768 static int manager_setup_notify(Manager *m) {
769 int r;
770
771 if (m->test_run_flags)
772 return 0;
773
774 if (m->notify_fd < 0) {
775 _cleanup_close_ int fd = -1;
776 union sockaddr_union sa = {
777 .sa.sa_family = AF_UNIX,
778 };
779 static const int one = 1;
780
781 /* First free all secondary fields */
782 m->notify_socket = mfree(m->notify_socket);
783 m->notify_event_source = sd_event_source_unref(m->notify_event_source);
784
785 fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
786 if (fd < 0)
787 return log_error_errno(errno, "Failed to allocate notification socket: %m");
788
789 fd_inc_rcvbuf(fd, NOTIFY_RCVBUF_SIZE);
790
791 m->notify_socket = strappend(m->prefix[EXEC_DIRECTORY_RUNTIME], "/systemd/notify");
792 if (!m->notify_socket)
793 return log_oom();
794
795 (void) mkdir_parents_label(m->notify_socket, 0755);
796 (void) unlink(m->notify_socket);
797
798 strncpy(sa.un.sun_path, m->notify_socket, sizeof(sa.un.sun_path)-1);
799 r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
800 if (r < 0)
801 return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
802
803 r = setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
804 if (r < 0)
805 return log_error_errno(errno, "SO_PASSCRED failed: %m");
806
807 m->notify_fd = fd;
808 fd = -1;
809
810 log_debug("Using notification socket %s", m->notify_socket);
811 }
812
813 if (!m->notify_event_source) {
814 r = sd_event_add_io(m->event, &m->notify_event_source, m->notify_fd, EPOLLIN, manager_dispatch_notify_fd, m);
815 if (r < 0)
816 return log_error_errno(r, "Failed to allocate notify event source: %m");
817
818 /* Process notification messages a bit earlier than SIGCHLD, so that we can still identify to which
819 * service an exit message belongs. */
820 r = sd_event_source_set_priority(m->notify_event_source, SD_EVENT_PRIORITY_NORMAL-7);
821 if (r < 0)
822 return log_error_errno(r, "Failed to set priority of notify event source: %m");
823
824 (void) sd_event_source_set_description(m->notify_event_source, "manager-notify");
825 }
826
827 return 0;
828 }
829
830 static int manager_setup_cgroups_agent(Manager *m) {
831
832 static const union sockaddr_union sa = {
833 .un.sun_family = AF_UNIX,
834 .un.sun_path = "/run/systemd/cgroups-agent",
835 };
836 int r;
837
838 /* This creates a listening socket we receive cgroups agent messages on. We do not use D-Bus for delivering
839 * these messages from the cgroups agent binary to PID 1, as the cgroups agent binary is very short-living, and
840 * each instance of it needs a new D-Bus connection. Since D-Bus connections are SOCK_STREAM/AF_UNIX, on
841 * overloaded systems the backlog of the D-Bus socket becomes relevant, as not more than the configured number
842 * of D-Bus connections may be queued until the kernel will start dropping further incoming connections,
843 * possibly resulting in lost cgroups agent messages. To avoid this, we'll use a private SOCK_DGRAM/AF_UNIX
844 * socket, where no backlog is relevant as communication may take place without an actual connect() cycle, and
845 * we thus won't lose messages.
846 *
847 * Note that PID 1 will forward the agent message to system bus, so that the user systemd instance may listen
848 * to it. The system instance hence listens on this special socket, but the user instances listen on the system
849 * bus for these messages. */
850
851 if (m->test_run_flags)
852 return 0;
853
854 if (!MANAGER_IS_SYSTEM(m))
855 return 0;
856
857 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
858 if (r < 0)
859 return log_error_errno(r, "Failed to determine whether unified cgroups hierarchy is used: %m");
860 if (r > 0) /* We don't need this anymore on the unified hierarchy */
861 return 0;
862
863 if (m->cgroups_agent_fd < 0) {
864 _cleanup_close_ int fd = -1;
865
866 /* First free all secondary fields */
867 m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
868
869 fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
870 if (fd < 0)
871 return log_error_errno(errno, "Failed to allocate cgroups agent socket: %m");
872
873 fd_inc_rcvbuf(fd, CGROUPS_AGENT_RCVBUF_SIZE);
874
875 (void) unlink(sa.un.sun_path);
876
877 /* Only allow root to connect to this socket */
878 RUN_WITH_UMASK(0077)
879 r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
880 if (r < 0)
881 return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
882
883 m->cgroups_agent_fd = fd;
884 fd = -1;
885 }
886
887 if (!m->cgroups_agent_event_source) {
888 r = sd_event_add_io(m->event, &m->cgroups_agent_event_source, m->cgroups_agent_fd, EPOLLIN, manager_dispatch_cgroups_agent_fd, m);
889 if (r < 0)
890 return log_error_errno(r, "Failed to allocate cgroups agent event source: %m");
891
892 /* Process cgroups notifications early, but after having processed service notification messages or
893 * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of notification,
894 * and we collected the metadata the notification and SIGCHLD stuff offers first. Also see handling of
895 * cgroup inotify for the unified cgroup stuff. */
896 r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-4);
897 if (r < 0)
898 return log_error_errno(r, "Failed to set priority of cgroups agent event source: %m");
899
900 (void) sd_event_source_set_description(m->cgroups_agent_event_source, "manager-cgroups-agent");
901 }
902
903 return 0;
904 }
905
906 static int manager_setup_user_lookup_fd(Manager *m) {
907 int r;
908
909 assert(m);
910
911 /* Set up the socket pair used for passing UID/GID resolution results from forked off processes to PID
912 * 1. Background: we can't do name lookups (NSS) from PID 1, since it might involve IPC and thus activation,
913 * and we might hence deadlock on ourselves. Hence we do all user/group lookups asynchronously from the forked
914 * off processes right before executing the binaries to start. In order to be able to clean up any IPC objects
915 * created by a unit (see RemoveIPC=) we need to know in PID 1 the used UID/GID of the executed processes,
916 * hence we establish this communication channel so that forked off processes can pass their UID/GID
917 * information back to PID 1. The forked off processes send their resolved UID/GID to PID 1 in a simple
918 * datagram, along with their unit name, so that we can share one communication socket pair among all units for
919 * this purpose.
920 *
921 * You might wonder why we need a communication channel for this that is independent of the usual notification
922 * socket scheme (i.e. $NOTIFY_SOCKET). The primary difference is about trust: data sent via the $NOTIFY_SOCKET
923 * channel is only accepted if it originates from the right unit and if reception was enabled for it. The user
924 * lookup socket OTOH is only accessible by PID 1 and its children until they exec(), and always available.
925 *
926 * Note that this function is called under two circumstances: when we first initialize (in which case we
927 * allocate both the socket pair and the event source to listen on it), and when we deserialize after a reload
928 * (in which case the socket pair already exists but we still need to allocate the event source for it). */
929
930 if (m->user_lookup_fds[0] < 0) {
931
932 /* Free all secondary fields */
933 safe_close_pair(m->user_lookup_fds);
934 m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
935
936 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->user_lookup_fds) < 0)
937 return log_error_errno(errno, "Failed to allocate user lookup socket: %m");
938
939 (void) fd_inc_rcvbuf(m->user_lookup_fds[0], NOTIFY_RCVBUF_SIZE);
940 }
941
942 if (!m->user_lookup_event_source) {
943 r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m);
944 if (r < 0)
945 return log_error_errno(errno, "Failed to allocate user lookup event source: %m");
946
947 /* Process even earlier than the notify event source, so that we always know first about valid UID/GID
948 * resolutions */
949 r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-8);
950 if (r < 0)
951 return log_error_errno(errno, "Failed to set priority ot user lookup event source: %m");
952
953 (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup");
954 }
955
956 return 0;
957 }
958
959 static int manager_connect_bus(Manager *m, bool reexecuting) {
960 bool try_bus_connect;
961 Unit *u = NULL;
962
963 assert(m);
964
965 if (m->test_run_flags)
966 return 0;
967
968 u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
969
970 try_bus_connect =
971 (u && SERVICE(u)->deserialized_state == SERVICE_RUNNING) &&
972 (reexecuting ||
973 (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS")));
974
975 /* Try to connect to the buses, if possible. */
976 return bus_init(m, try_bus_connect);
977 }
978
979 static unsigned manager_dispatch_cleanup_queue(Manager *m) {
980 Unit *u;
981 unsigned n = 0;
982
983 assert(m);
984
985 while ((u = m->cleanup_queue)) {
986 assert(u->in_cleanup_queue);
987
988 unit_free(u);
989 n++;
990 }
991
992 return n;
993 }
994
995 enum {
996 GC_OFFSET_IN_PATH, /* This one is on the path we were traveling */
997 GC_OFFSET_UNSURE, /* No clue */
998 GC_OFFSET_GOOD, /* We still need this unit */
999 GC_OFFSET_BAD, /* We don't need this unit anymore */
1000 _GC_OFFSET_MAX
1001 };
1002
1003 static void unit_gc_mark_good(Unit *u, unsigned gc_marker) {
1004 Unit *other;
1005 Iterator i;
1006 void *v;
1007
1008 u->gc_marker = gc_marker + GC_OFFSET_GOOD;
1009
1010 /* Recursively mark referenced units as GOOD as well */
1011 HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REFERENCES], i)
1012 if (other->gc_marker == gc_marker + GC_OFFSET_UNSURE)
1013 unit_gc_mark_good(other, gc_marker);
1014 }
1015
1016 static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
1017 Unit *other;
1018 bool is_bad;
1019 Iterator i;
1020 void *v;
1021
1022 assert(u);
1023
1024 if (IN_SET(u->gc_marker - gc_marker,
1025 GC_OFFSET_GOOD, GC_OFFSET_BAD, GC_OFFSET_UNSURE, GC_OFFSET_IN_PATH))
1026 return;
1027
1028 if (u->in_cleanup_queue)
1029 goto bad;
1030
1031 if (unit_check_gc(u))
1032 goto good;
1033
1034 u->gc_marker = gc_marker + GC_OFFSET_IN_PATH;
1035
1036 is_bad = true;
1037
1038 HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REFERENCED_BY], i) {
1039 unit_gc_sweep(other, gc_marker);
1040
1041 if (other->gc_marker == gc_marker + GC_OFFSET_GOOD)
1042 goto good;
1043
1044 if (other->gc_marker != gc_marker + GC_OFFSET_BAD)
1045 is_bad = false;
1046 }
1047
1048 if (is_bad)
1049 goto bad;
1050
1051 /* We were unable to find anything out about this entry, so
1052 * let's investigate it later */
1053 u->gc_marker = gc_marker + GC_OFFSET_UNSURE;
1054 unit_add_to_gc_queue(u);
1055 return;
1056
1057 bad:
1058 /* We definitely know that this one is not useful anymore, so
1059 * let's mark it for deletion */
1060 u->gc_marker = gc_marker + GC_OFFSET_BAD;
1061 unit_add_to_cleanup_queue(u);
1062 return;
1063
1064 good:
1065 unit_gc_mark_good(u, gc_marker);
1066 }
1067
1068 static unsigned manager_dispatch_gc_unit_queue(Manager *m) {
1069 unsigned n = 0, gc_marker;
1070 Unit *u;
1071
1072 assert(m);
1073
1074 /* log_debug("Running GC..."); */
1075
1076 m->gc_marker += _GC_OFFSET_MAX;
1077 if (m->gc_marker + _GC_OFFSET_MAX <= _GC_OFFSET_MAX)
1078 m->gc_marker = 1;
1079
1080 gc_marker = m->gc_marker;
1081
1082 while ((u = m->gc_unit_queue)) {
1083 assert(u->in_gc_queue);
1084
1085 unit_gc_sweep(u, gc_marker);
1086
1087 LIST_REMOVE(gc_queue, m->gc_unit_queue, u);
1088 u->in_gc_queue = false;
1089
1090 n++;
1091
1092 if (IN_SET(u->gc_marker - gc_marker,
1093 GC_OFFSET_BAD, GC_OFFSET_UNSURE)) {
1094 if (u->id)
1095 log_unit_debug(u, "Collecting.");
1096 u->gc_marker = gc_marker + GC_OFFSET_BAD;
1097 unit_add_to_cleanup_queue(u);
1098 }
1099 }
1100
1101 return n;
1102 }
1103
1104 static unsigned manager_dispatch_gc_job_queue(Manager *m) {
1105 unsigned n = 0;
1106 Job *j;
1107
1108 assert(m);
1109
1110 while ((j = m->gc_job_queue)) {
1111 assert(j->in_gc_queue);
1112
1113 LIST_REMOVE(gc_queue, m->gc_job_queue, j);
1114 j->in_gc_queue = false;
1115
1116 n++;
1117
1118 if (job_check_gc(j))
1119 continue;
1120
1121 log_unit_debug(j->unit, "Collecting job.");
1122 (void) job_finish_and_invalidate(j, JOB_COLLECTED, false, false);
1123 }
1124
1125 return n;
1126 }
1127
1128 static void manager_clear_jobs_and_units(Manager *m) {
1129 Unit *u;
1130
1131 assert(m);
1132
1133 while ((u = hashmap_first(m->units)))
1134 unit_free(u);
1135
1136 manager_dispatch_cleanup_queue(m);
1137
1138 assert(!m->load_queue);
1139 assert(!m->run_queue);
1140 assert(!m->dbus_unit_queue);
1141 assert(!m->dbus_job_queue);
1142 assert(!m->cleanup_queue);
1143 assert(!m->gc_unit_queue);
1144 assert(!m->gc_job_queue);
1145
1146 assert(hashmap_isempty(m->jobs));
1147 assert(hashmap_isempty(m->units));
1148
1149 m->n_on_console = 0;
1150 m->n_running_jobs = 0;
1151 }
1152
1153 Manager* manager_free(Manager *m) {
1154 UnitType c;
1155 int i;
1156 ExecDirectoryType dt;
1157
1158 if (!m)
1159 return NULL;
1160
1161 manager_clear_jobs_and_units(m);
1162
1163 for (c = 0; c < _UNIT_TYPE_MAX; c++)
1164 if (unit_vtable[c]->shutdown)
1165 unit_vtable[c]->shutdown(m);
1166
1167 /* If we reexecute ourselves, we keep the root cgroup around */
1168 manager_shutdown_cgroup(m, m->exit_code != MANAGER_REEXECUTE);
1169
1170 lookup_paths_flush_generator(&m->lookup_paths);
1171
1172 bus_done(m);
1173
1174 dynamic_user_vacuum(m, false);
1175 hashmap_free(m->dynamic_users);
1176
1177 hashmap_free(m->units);
1178 hashmap_free(m->units_by_invocation_id);
1179 hashmap_free(m->jobs);
1180 hashmap_free(m->watch_pids1);
1181 hashmap_free(m->watch_pids2);
1182 hashmap_free(m->watch_bus);
1183
1184 set_free(m->startup_units);
1185 set_free(m->failed_units);
1186
1187 sd_event_source_unref(m->signal_event_source);
1188 sd_event_source_unref(m->notify_event_source);
1189 sd_event_source_unref(m->cgroups_agent_event_source);
1190 sd_event_source_unref(m->time_change_event_source);
1191 sd_event_source_unref(m->jobs_in_progress_event_source);
1192 sd_event_source_unref(m->run_queue_event_source);
1193 sd_event_source_unref(m->user_lookup_event_source);
1194
1195 safe_close(m->signal_fd);
1196 safe_close(m->notify_fd);
1197 safe_close(m->cgroups_agent_fd);
1198 safe_close(m->time_change_fd);
1199 safe_close_pair(m->user_lookup_fds);
1200
1201 manager_close_ask_password(m);
1202
1203 manager_close_idle_pipe(m);
1204
1205 udev_unref(m->udev);
1206 sd_event_unref(m->event);
1207
1208 free(m->notify_socket);
1209
1210 lookup_paths_free(&m->lookup_paths);
1211 strv_free(m->environment);
1212
1213 hashmap_free(m->cgroup_unit);
1214 set_free_free(m->unit_path_cache);
1215
1216 free(m->switch_root);
1217 free(m->switch_root_init);
1218
1219 for (i = 0; i < _RLIMIT_MAX; i++)
1220 m->rlimit[i] = mfree(m->rlimit[i]);
1221
1222 assert(hashmap_isempty(m->units_requiring_mounts_for));
1223 hashmap_free(m->units_requiring_mounts_for);
1224
1225 hashmap_free(m->uid_refs);
1226 hashmap_free(m->gid_refs);
1227
1228 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++)
1229 m->prefix[dt] = mfree(m->prefix[dt]);
1230
1231 return mfree(m);
1232 }
1233
1234 void manager_enumerate(Manager *m) {
1235 UnitType c;
1236
1237 assert(m);
1238
1239 /* Let's ask every type to load all units from disk/kernel
1240 * that it might know */
1241 for (c = 0; c < _UNIT_TYPE_MAX; c++) {
1242 if (!unit_type_supported(c)) {
1243 log_debug("Unit type .%s is not supported on this system.", unit_type_to_string(c));
1244 continue;
1245 }
1246
1247 if (!unit_vtable[c]->enumerate)
1248 continue;
1249
1250 unit_vtable[c]->enumerate(m);
1251 }
1252
1253 manager_dispatch_load_queue(m);
1254 }
1255
1256 static void manager_coldplug(Manager *m) {
1257 Iterator i;
1258 Unit *u;
1259 char *k;
1260 int r;
1261
1262 assert(m);
1263
1264 /* Then, let's set up their initial state. */
1265 HASHMAP_FOREACH_KEY(u, k, m->units, i) {
1266
1267 /* ignore aliases */
1268 if (u->id != k)
1269 continue;
1270
1271 r = unit_coldplug(u);
1272 if (r < 0)
1273 log_warning_errno(r, "We couldn't coldplug %s, proceeding anyway: %m", u->id);
1274 }
1275 }
1276
1277 static void manager_build_unit_path_cache(Manager *m) {
1278 char **i;
1279 int r;
1280
1281 assert(m);
1282
1283 set_free_free(m->unit_path_cache);
1284
1285 m->unit_path_cache = set_new(&string_hash_ops);
1286 if (!m->unit_path_cache) {
1287 r = -ENOMEM;
1288 goto fail;
1289 }
1290
1291 /* This simply builds a list of files we know exist, so that
1292 * we don't always have to go to disk */
1293
1294 STRV_FOREACH(i, m->lookup_paths.search_path) {
1295 _cleanup_closedir_ DIR *d = NULL;
1296 struct dirent *de;
1297
1298 d = opendir(*i);
1299 if (!d) {
1300 if (errno != ENOENT)
1301 log_warning_errno(errno, "Failed to open directory %s, ignoring: %m", *i);
1302 continue;
1303 }
1304
1305 FOREACH_DIRENT(de, d, r = -errno; goto fail) {
1306 char *p;
1307
1308 p = strjoin(streq(*i, "/") ? "" : *i, "/", de->d_name);
1309 if (!p) {
1310 r = -ENOMEM;
1311 goto fail;
1312 }
1313
1314 r = set_consume(m->unit_path_cache, p);
1315 if (r < 0)
1316 goto fail;
1317 }
1318 }
1319
1320 return;
1321
1322 fail:
1323 log_warning_errno(r, "Failed to build unit path cache, proceeding without: %m");
1324 m->unit_path_cache = set_free_free(m->unit_path_cache);
1325 }
1326
1327 static void manager_distribute_fds(Manager *m, FDSet *fds) {
1328 Iterator i;
1329 Unit *u;
1330
1331 assert(m);
1332
1333 HASHMAP_FOREACH(u, m->units, i) {
1334
1335 if (fdset_size(fds) <= 0)
1336 break;
1337
1338 if (!UNIT_VTABLE(u)->distribute_fds)
1339 continue;
1340
1341 UNIT_VTABLE(u)->distribute_fds(u, fds);
1342 }
1343 }
1344
1345 int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
1346 int r;
1347
1348 assert(m);
1349
1350 /* If we are running in test mode, we still want to run the generators,
1351 * but we should not touch the real generator directories. */
1352 r = lookup_paths_init(&m->lookup_paths, m->unit_file_scope,
1353 m->test_run_flags ? LOOKUP_PATHS_TEMPORARY_GENERATED : 0,
1354 NULL);
1355 if (r < 0)
1356 return r;
1357
1358 r = manager_run_environment_generators(m);
1359 if (r < 0)
1360 return r;
1361
1362 dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_GENERATORS_START);
1363 r = manager_run_generators(m);
1364 dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_GENERATORS_FINISH);
1365 if (r < 0)
1366 return r;
1367
1368 /* If this is the first boot, and we are in the host system, then preset everything */
1369 if (m->first_boot > 0 &&
1370 MANAGER_IS_SYSTEM(m) &&
1371 !m->test_run_flags) {
1372
1373 r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0);
1374 if (r < 0)
1375 log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r,
1376 "Failed to populate /etc with preset unit settings, ignoring: %m");
1377 else
1378 log_info("Populated /etc with preset unit settings.");
1379 }
1380
1381 lookup_paths_reduce(&m->lookup_paths);
1382 manager_build_unit_path_cache(m);
1383
1384 /* If we will deserialize make sure that during enumeration
1385 * this is already known, so we increase the counter here
1386 * already */
1387 if (serialization)
1388 m->n_reloading++;
1389
1390 /* First, enumerate what we can from all config files */
1391 dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_UNITS_LOAD_START);
1392 manager_enumerate(m);
1393 dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_UNITS_LOAD_FINISH);
1394
1395 /* Second, deserialize if there is something to deserialize */
1396 if (serialization) {
1397 r = manager_deserialize(m, serialization, fds);
1398 if (r < 0)
1399 return log_error_errno(r, "Deserialization failed: %m");
1400 }
1401
1402 /* Any fds left? Find some unit which wants them. This is
1403 * useful to allow container managers to pass some file
1404 * descriptors to us pre-initialized. This enables
1405 * socket-based activation of entire containers. */
1406 manager_distribute_fds(m, fds);
1407
1408 /* We might have deserialized the notify fd, but if we didn't
1409 * then let's create the bus now */
1410 r = manager_setup_notify(m);
1411 if (r < 0)
1412 /* No sense to continue without notifications, our children would fail anyway. */
1413 return r;
1414
1415 r = manager_setup_cgroups_agent(m);
1416 if (r < 0)
1417 /* Likewise, no sense to continue without empty cgroup notifications. */
1418 return r;
1419
1420 r = manager_setup_user_lookup_fd(m);
1421 if (r < 0)
1422 /* This shouldn't fail, except if things are really broken. */
1423 return r;
1424
1425 /* Let's connect to the bus now. */
1426 (void) manager_connect_bus(m, !!serialization);
1427
1428 (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
1429 m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
1430
1431 /* Third, fire things up! */
1432 manager_coldplug(m);
1433
1434 /* Release any dynamic users no longer referenced */
1435 dynamic_user_vacuum(m, true);
1436
1437 /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
1438 manager_vacuum_uid_refs(m);
1439 manager_vacuum_gid_refs(m);
1440
1441 if (serialization) {
1442 assert(m->n_reloading > 0);
1443 m->n_reloading--;
1444
1445 /* Let's wait for the UnitNew/JobNew messages being
1446 * sent, before we notify that the reload is
1447 * finished */
1448 m->send_reloading_done = true;
1449 }
1450
1451 return 0;
1452 }
1453
1454 int manager_add_job(Manager *m, JobType type, Unit *unit, JobMode mode, sd_bus_error *e, Job **_ret) {
1455 int r;
1456 Transaction *tr;
1457
1458 assert(m);
1459 assert(type < _JOB_TYPE_MAX);
1460 assert(unit);
1461 assert(mode < _JOB_MODE_MAX);
1462
1463 if (mode == JOB_ISOLATE && type != JOB_START)
1464 return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Isolate is only valid for start.");
1465
1466 if (mode == JOB_ISOLATE && !unit->allow_isolate)
1467 return sd_bus_error_setf(e, BUS_ERROR_NO_ISOLATION, "Operation refused, unit may not be isolated.");
1468
1469 log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
1470
1471 type = job_type_collapse(type, unit);
1472
1473 tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY);
1474 if (!tr)
1475 return -ENOMEM;
1476
1477 r = transaction_add_job_and_dependencies(tr, type, unit, NULL, true, false,
1478 IN_SET(mode, JOB_IGNORE_DEPENDENCIES, JOB_IGNORE_REQUIREMENTS),
1479 mode == JOB_IGNORE_DEPENDENCIES, e);
1480 if (r < 0)
1481 goto tr_abort;
1482
1483 if (mode == JOB_ISOLATE) {
1484 r = transaction_add_isolate_jobs(tr, m);
1485 if (r < 0)
1486 goto tr_abort;
1487 }
1488
1489 r = transaction_activate(tr, m, mode, e);
1490 if (r < 0)
1491 goto tr_abort;
1492
1493 log_unit_debug(unit,
1494 "Enqueued job %s/%s as %u", unit->id,
1495 job_type_to_string(type), (unsigned) tr->anchor_job->id);
1496
1497 if (_ret)
1498 *_ret = tr->anchor_job;
1499
1500 transaction_free(tr);
1501 return 0;
1502
1503 tr_abort:
1504 transaction_abort(tr);
1505 transaction_free(tr);
1506 return r;
1507 }
1508
1509 int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, sd_bus_error *e, Job **ret) {
1510 Unit *unit = NULL; /* just to appease gcc, initialization is not really necessary */
1511 int r;
1512
1513 assert(m);
1514 assert(type < _JOB_TYPE_MAX);
1515 assert(name);
1516 assert(mode < _JOB_MODE_MAX);
1517
1518 r = manager_load_unit(m, name, NULL, NULL, &unit);
1519 if (r < 0)
1520 return r;
1521 assert(unit);
1522
1523 return manager_add_job(m, type, unit, mode, e, ret);
1524 }
1525
1526 int manager_add_job_by_name_and_warn(Manager *m, JobType type, const char *name, JobMode mode, Job **ret) {
1527 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
1528 int r;
1529
1530 assert(m);
1531 assert(type < _JOB_TYPE_MAX);
1532 assert(name);
1533 assert(mode < _JOB_MODE_MAX);
1534
1535 r = manager_add_job_by_name(m, type, name, mode, &error, ret);
1536 if (r < 0)
1537 return log_warning_errno(r, "Failed to enqueue %s job for %s: %s", job_mode_to_string(mode), name, bus_error_message(&error, r));
1538
1539 return r;
1540 }
1541
1542 int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error *e) {
1543 int r;
1544 Transaction *tr;
1545
1546 assert(m);
1547 assert(unit);
1548 assert(mode < _JOB_MODE_MAX);
1549 assert(mode != JOB_ISOLATE); /* Isolate is only valid for start */
1550
1551 tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY);
1552 if (!tr)
1553 return -ENOMEM;
1554
1555 /* We need an anchor job */
1556 r = transaction_add_job_and_dependencies(tr, JOB_NOP, unit, NULL, false, false, true, true, e);
1557 if (r < 0)
1558 goto tr_abort;
1559
1560 /* Failure in adding individual dependencies is ignored, so this always succeeds. */
1561 transaction_add_propagate_reload_jobs(tr, unit, tr->anchor_job, mode == JOB_IGNORE_DEPENDENCIES, e);
1562
1563 r = transaction_activate(tr, m, mode, e);
1564 if (r < 0)
1565 goto tr_abort;
1566
1567 transaction_free(tr);
1568 return 0;
1569
1570 tr_abort:
1571 transaction_abort(tr);
1572 transaction_free(tr);
1573 return r;
1574 }
1575
1576 Job *manager_get_job(Manager *m, uint32_t id) {
1577 assert(m);
1578
1579 return hashmap_get(m->jobs, UINT32_TO_PTR(id));
1580 }
1581
1582 Unit *manager_get_unit(Manager *m, const char *name) {
1583 assert(m);
1584 assert(name);
1585
1586 return hashmap_get(m->units, name);
1587 }
1588
1589 unsigned manager_dispatch_load_queue(Manager *m) {
1590 Unit *u;
1591 unsigned n = 0;
1592
1593 assert(m);
1594
1595 /* Make sure we are not run recursively */
1596 if (m->dispatching_load_queue)
1597 return 0;
1598
1599 m->dispatching_load_queue = true;
1600
1601 /* Dispatches the load queue. Takes a unit from the queue and
1602 * tries to load its data until the queue is empty */
1603
1604 while ((u = m->load_queue)) {
1605 assert(u->in_load_queue);
1606
1607 unit_load(u);
1608 n++;
1609 }
1610
1611 m->dispatching_load_queue = false;
1612 return n;
1613 }
1614
1615 int manager_load_unit_prepare(
1616 Manager *m,
1617 const char *name,
1618 const char *path,
1619 sd_bus_error *e,
1620 Unit **_ret) {
1621
1622 Unit *ret;
1623 UnitType t;
1624 int r;
1625
1626 assert(m);
1627 assert(name || path);
1628 assert(_ret);
1629
1630 /* This will prepare the unit for loading, but not actually
1631 * load anything from disk. */
1632
1633 if (path && !is_path(path))
1634 return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute.", path);
1635
1636 if (!name)
1637 name = basename(path);
1638
1639 t = unit_name_to_type(name);
1640
1641 if (t == _UNIT_TYPE_INVALID || !unit_name_is_valid(name, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
1642 if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE))
1643 return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Unit name %s is missing the instance name.", name);
1644
1645 return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Unit name %s is not valid.", name);
1646 }
1647
1648 ret = manager_get_unit(m, name);
1649 if (ret) {
1650 *_ret = ret;
1651 return 1;
1652 }
1653
1654 ret = unit_new(m, unit_vtable[t]->object_size);
1655 if (!ret)
1656 return -ENOMEM;
1657
1658 if (path) {
1659 ret->fragment_path = strdup(path);
1660 if (!ret->fragment_path) {
1661 unit_free(ret);
1662 return -ENOMEM;
1663 }
1664 }
1665
1666 r = unit_add_name(ret, name);
1667 if (r < 0) {
1668 unit_free(ret);
1669 return r;
1670 }
1671
1672 unit_add_to_load_queue(ret);
1673 unit_add_to_dbus_queue(ret);
1674 unit_add_to_gc_queue(ret);
1675
1676 *_ret = ret;
1677
1678 return 0;
1679 }
1680
1681 int manager_load_unit(
1682 Manager *m,
1683 const char *name,
1684 const char *path,
1685 sd_bus_error *e,
1686 Unit **_ret) {
1687
1688 int r;
1689
1690 assert(m);
1691 assert(_ret);
1692
1693 /* This will load the service information files, but not actually
1694 * start any services or anything. */
1695
1696 r = manager_load_unit_prepare(m, name, path, e, _ret);
1697 if (r != 0)
1698 return r;
1699
1700 manager_dispatch_load_queue(m);
1701
1702 *_ret = unit_follow_merge(*_ret);
1703
1704 return 0;
1705 }
1706
1707 void manager_dump_jobs(Manager *s, FILE *f, const char *prefix) {
1708 Iterator i;
1709 Job *j;
1710
1711 assert(s);
1712 assert(f);
1713
1714 HASHMAP_FOREACH(j, s->jobs, i)
1715 job_dump(j, f, prefix);
1716 }
1717
1718 void manager_dump_units(Manager *s, FILE *f, const char *prefix) {
1719 Iterator i;
1720 Unit *u;
1721 const char *t;
1722
1723 assert(s);
1724 assert(f);
1725
1726 HASHMAP_FOREACH_KEY(u, t, s->units, i)
1727 if (u->id == t)
1728 unit_dump(u, f, prefix);
1729 }
1730
1731 void manager_dump(Manager *m, FILE *f, const char *prefix) {
1732 ManagerTimestamp q;
1733
1734 assert(m);
1735 assert(f);
1736
1737 for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
1738 char buf[FORMAT_TIMESTAMP_MAX];
1739
1740 if (dual_timestamp_is_set(m->timestamps + q))
1741 fprintf(f, "%sTimestamp %s: %s\n",
1742 strempty(prefix),
1743 manager_timestamp_to_string(q),
1744 format_timestamp(buf, sizeof(buf), m->timestamps[q].realtime));
1745 }
1746
1747 manager_dump_units(m, f, prefix);
1748 manager_dump_jobs(m, f, prefix);
1749 }
1750
1751 int manager_get_dump_string(Manager *m, char **ret) {
1752 _cleanup_free_ char *dump = NULL;
1753 _cleanup_fclose_ FILE *f = NULL;
1754 size_t size;
1755 int r;
1756
1757 assert(m);
1758 assert(ret);
1759
1760 f = open_memstream(&dump, &size);
1761 if (!f)
1762 return -errno;
1763
1764 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1765
1766 manager_dump(m, f, NULL);
1767
1768 r = fflush_and_check(f);
1769 if (r < 0)
1770 return r;
1771
1772 f = safe_fclose(f);
1773
1774 *ret = dump;
1775 dump = NULL;
1776
1777 return 0;
1778 }
1779
1780 void manager_clear_jobs(Manager *m) {
1781 Job *j;
1782
1783 assert(m);
1784
1785 while ((j = hashmap_first(m->jobs)))
1786 /* No need to recurse. We're cancelling all jobs. */
1787 job_finish_and_invalidate(j, JOB_CANCELED, false, false);
1788 }
1789
1790 static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
1791 Manager *m = userdata;
1792 Job *j;
1793
1794 assert(source);
1795 assert(m);
1796
1797 while ((j = m->run_queue)) {
1798 assert(j->installed);
1799 assert(j->in_run_queue);
1800
1801 job_run_and_invalidate(j);
1802 }
1803
1804 if (m->n_running_jobs > 0)
1805 manager_watch_jobs_in_progress(m);
1806
1807 if (m->n_on_console > 0)
1808 manager_watch_idle_pipe(m);
1809
1810 return 1;
1811 }
1812
1813 static unsigned manager_dispatch_dbus_queue(Manager *m) {
1814 Job *j;
1815 Unit *u;
1816 unsigned n = 0;
1817
1818 assert(m);
1819
1820 if (m->dispatching_dbus_queue)
1821 return 0;
1822
1823 m->dispatching_dbus_queue = true;
1824
1825 while ((u = m->dbus_unit_queue)) {
1826 assert(u->in_dbus_queue);
1827
1828 bus_unit_send_change_signal(u);
1829 n++;
1830 }
1831
1832 while ((j = m->dbus_job_queue)) {
1833 assert(j->in_dbus_queue);
1834
1835 bus_job_send_change_signal(j);
1836 n++;
1837 }
1838
1839 m->dispatching_dbus_queue = false;
1840
1841 if (m->send_reloading_done) {
1842 m->send_reloading_done = false;
1843
1844 bus_manager_send_reloading(m, false);
1845 }
1846
1847 if (m->queued_message)
1848 bus_send_queued_message(m);
1849
1850 return n;
1851 }
1852
1853 static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
1854 Manager *m = userdata;
1855 char buf[PATH_MAX+1];
1856 ssize_t n;
1857
1858 n = recv(fd, buf, sizeof(buf), 0);
1859 if (n < 0)
1860 return log_error_errno(errno, "Failed to read cgroups agent message: %m");
1861 if (n == 0) {
1862 log_error("Got zero-length cgroups agent message, ignoring.");
1863 return 0;
1864 }
1865 if ((size_t) n >= sizeof(buf)) {
1866 log_error("Got overly long cgroups agent message, ignoring.");
1867 return 0;
1868 }
1869
1870 if (memchr(buf, 0, n)) {
1871 log_error("Got cgroups agent message with embedded NUL byte, ignoring.");
1872 return 0;
1873 }
1874 buf[n] = 0;
1875
1876 manager_notify_cgroup_empty(m, buf);
1877 (void) bus_forward_agent_released(m, buf);
1878
1879 return 0;
1880 }
1881
1882 static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, FDSet *fds) {
1883 _cleanup_strv_free_ char **tags = NULL;
1884
1885 assert(m);
1886 assert(u);
1887 assert(buf);
1888
1889 tags = strv_split(buf, NEWLINE);
1890 if (!tags) {
1891 log_oom();
1892 return;
1893 }
1894
1895 if (UNIT_VTABLE(u)->notify_message)
1896 UNIT_VTABLE(u)->notify_message(u, pid, tags, fds);
1897 else if (DEBUG_LOGGING) {
1898 _cleanup_free_ char *x = NULL, *y = NULL;
1899
1900 x = ellipsize(buf, 20, 90);
1901 if (x)
1902 y = cescape(x);
1903
1904 log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y));
1905 }
1906 }
1907
1908 static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
1909
1910 _cleanup_fdset_free_ FDSet *fds = NULL;
1911 Manager *m = userdata;
1912 char buf[NOTIFY_BUFFER_MAX+1];
1913 struct iovec iovec = {
1914 .iov_base = buf,
1915 .iov_len = sizeof(buf)-1,
1916 };
1917 union {
1918 struct cmsghdr cmsghdr;
1919 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1920 CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)];
1921 } control = {};
1922 struct msghdr msghdr = {
1923 .msg_iov = &iovec,
1924 .msg_iovlen = 1,
1925 .msg_control = &control,
1926 .msg_controllen = sizeof(control),
1927 };
1928
1929 struct cmsghdr *cmsg;
1930 struct ucred *ucred = NULL;
1931 Unit *u1, *u2, *u3;
1932 int r, *fd_array = NULL;
1933 unsigned n_fds = 0;
1934 ssize_t n;
1935
1936 assert(m);
1937 assert(m->notify_fd == fd);
1938
1939 if (revents != EPOLLIN) {
1940 log_warning("Got unexpected poll event for notify fd.");
1941 return 0;
1942 }
1943
1944 n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
1945 if (n < 0) {
1946 if (IN_SET(errno, EAGAIN, EINTR))
1947 return 0; /* Spurious wakeup, try again */
1948
1949 /* If this is any other, real error, then let's stop processing this socket. This of course means we
1950 * won't take notification messages anymore, but that's still better than busy looping around this:
1951 * being woken up over and over again but being unable to actually read the message off the socket. */
1952 return log_error_errno(errno, "Failed to receive notification message: %m");
1953 }
1954
1955 CMSG_FOREACH(cmsg, &msghdr) {
1956 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
1957
1958 fd_array = (int*) CMSG_DATA(cmsg);
1959 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1960
1961 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1962 cmsg->cmsg_type == SCM_CREDENTIALS &&
1963 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
1964
1965 ucred = (struct ucred*) CMSG_DATA(cmsg);
1966 }
1967 }
1968
1969 if (n_fds > 0) {
1970 assert(fd_array);
1971
1972 r = fdset_new_array(&fds, fd_array, n_fds);
1973 if (r < 0) {
1974 close_many(fd_array, n_fds);
1975 log_oom();
1976 return 0;
1977 }
1978 }
1979
1980 if (!ucred || !pid_is_valid(ucred->pid)) {
1981 log_warning("Received notify message without valid credentials. Ignoring.");
1982 return 0;
1983 }
1984
1985 if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) {
1986 log_warning("Received notify message exceeded maximum size. Ignoring.");
1987 return 0;
1988 }
1989
1990 /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one
1991 * trailing NUL byte in the message, but don't expect it. */
1992 if (n > 1 && memchr(buf, 0, n-1)) {
1993 log_warning("Received notify message with embedded NUL bytes. Ignoring.");
1994 return 0;
1995 }
1996
1997 /* Make sure it's NUL-terminated. */
1998 buf[n] = 0;
1999
2000 /* Notify every unit that might be interested, but try
2001 * to avoid notifying the same one multiple times. */
2002 u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid);
2003 if (u1)
2004 manager_invoke_notify_message(m, u1, ucred->pid, buf, fds);
2005
2006 u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid));
2007 if (u2 && u2 != u1)
2008 manager_invoke_notify_message(m, u2, ucred->pid, buf, fds);
2009
2010 u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid));
2011 if (u3 && u3 != u2 && u3 != u1)
2012 manager_invoke_notify_message(m, u3, ucred->pid, buf, fds);
2013
2014 if (!u1 && !u2 && !u3)
2015 log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid);
2016
2017 if (fdset_size(fds) > 0)
2018 log_warning("Got extra auxiliary fds with notification message, closing them.");
2019
2020 return 0;
2021 }
2022
2023 static void invoke_sigchld_event(Manager *m, Unit *u, const siginfo_t *si) {
2024 uint64_t iteration;
2025
2026 assert(m);
2027 assert(u);
2028 assert(si);
2029
2030 sd_event_get_iteration(m->event, &iteration);
2031
2032 log_unit_debug(u, "Child "PID_FMT" belongs to %s", si->si_pid, u->id);
2033
2034 unit_unwatch_pid(u, si->si_pid);
2035
2036 if (UNIT_VTABLE(u)->sigchld_event) {
2037 if (set_size(u->pids) <= 1 ||
2038 iteration != u->sigchldgen ||
2039 unit_main_pid(u) == si->si_pid ||
2040 unit_control_pid(u) == si->si_pid) {
2041 UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status);
2042 u->sigchldgen = iteration;
2043 } else
2044 log_debug("%s already issued a sigchld this iteration %" PRIu64 ", skipping. Pids still being watched %d", u->id, iteration, set_size(u->pids));
2045 }
2046 }
2047
2048 static int manager_dispatch_sigchld(Manager *m) {
2049 assert(m);
2050
2051 for (;;) {
2052 siginfo_t si = {};
2053
2054 /* First we call waitd() for a PID and do not reap the
2055 * zombie. That way we can still access /proc/$PID for
2056 * it while it is a zombie. */
2057 if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) {
2058
2059 if (errno == ECHILD)
2060 break;
2061
2062 if (errno == EINTR)
2063 continue;
2064
2065 return -errno;
2066 }
2067
2068 if (si.si_pid <= 0)
2069 break;
2070
2071 if (IN_SET(si.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) {
2072 _cleanup_free_ char *name = NULL;
2073 Unit *u1, *u2, *u3;
2074
2075 get_process_comm(si.si_pid, &name);
2076
2077 log_debug("Child "PID_FMT" (%s) died (code=%s, status=%i/%s)",
2078 si.si_pid, strna(name),
2079 sigchld_code_to_string(si.si_code),
2080 si.si_status,
2081 strna(si.si_code == CLD_EXITED
2082 ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL)
2083 : signal_to_string(si.si_status)));
2084
2085 /* And now figure out the unit this belongs
2086 * to, it might be multiple... */
2087 u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid);
2088 if (u1)
2089 invoke_sigchld_event(m, u1, &si);
2090 u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(si.si_pid));
2091 if (u2 && u2 != u1)
2092 invoke_sigchld_event(m, u2, &si);
2093 u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(si.si_pid));
2094 if (u3 && u3 != u2 && u3 != u1)
2095 invoke_sigchld_event(m, u3, &si);
2096 }
2097
2098 /* And now, we actually reap the zombie. */
2099 if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) {
2100 if (errno == EINTR)
2101 continue;
2102
2103 return -errno;
2104 }
2105 }
2106
2107 return 0;
2108 }
2109
2110 static void manager_start_target(Manager *m, const char *name, JobMode mode) {
2111 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
2112 int r;
2113
2114 log_debug("Activating special unit %s", name);
2115
2116 r = manager_add_job_by_name(m, JOB_START, name, mode, &error, NULL);
2117 if (r < 0)
2118 log_error("Failed to enqueue %s job: %s", name, bus_error_message(&error, r));
2119 }
2120
2121 static void manager_handle_ctrl_alt_del(Manager *m) {
2122 /* If the user presses C-A-D more than
2123 * 7 times within 2s, we reboot/shutdown immediately,
2124 * unless it was disabled in system.conf */
2125
2126 if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE)
2127 manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
2128 else
2129 emergency_action(m, m->cad_burst_action, NULL,
2130 "Ctrl-Alt-Del was pressed more than 7 times within 2s");
2131 }
2132
2133 static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
2134 Manager *m = userdata;
2135 ssize_t n;
2136 struct signalfd_siginfo sfsi;
2137 bool sigchld = false;
2138 int r;
2139
2140 assert(m);
2141 assert(m->signal_fd == fd);
2142
2143 if (revents != EPOLLIN) {
2144 log_warning("Got unexpected events from signal file descriptor.");
2145 return 0;
2146 }
2147
2148 for (;;) {
2149 n = read(m->signal_fd, &sfsi, sizeof(sfsi));
2150 if (n != sizeof(sfsi)) {
2151 if (n >= 0) {
2152 log_warning("Truncated read from signal fd (%zu bytes)!", n);
2153 return 0;
2154 }
2155
2156 if (IN_SET(errno, EINTR, EAGAIN))
2157 break;
2158
2159 /* We return an error here, which will kill this handler,
2160 * to avoid a busy loop on read error. */
2161 return log_error_errno(errno, "Reading from signal fd failed: %m");
2162 }
2163
2164 log_received_signal(sfsi.ssi_signo == SIGCHLD ||
2165 (sfsi.ssi_signo == SIGTERM && MANAGER_IS_USER(m))
2166 ? LOG_DEBUG : LOG_INFO,
2167 &sfsi);
2168
2169 switch (sfsi.ssi_signo) {
2170
2171 case SIGCHLD:
2172 sigchld = true;
2173 break;
2174
2175 case SIGTERM:
2176 if (MANAGER_IS_SYSTEM(m)) {
2177 /* This is for compatibility with the
2178 * original sysvinit */
2179 r = verify_run_space_and_log("Refusing to reexecute");
2180 if (r >= 0)
2181 m->exit_code = MANAGER_REEXECUTE;
2182 break;
2183 }
2184
2185 _fallthrough_;
2186 case SIGINT:
2187 if (MANAGER_IS_SYSTEM(m))
2188 manager_handle_ctrl_alt_del(m);
2189 else
2190 manager_start_target(m, SPECIAL_EXIT_TARGET,
2191 JOB_REPLACE_IRREVERSIBLY);
2192 break;
2193
2194 case SIGWINCH:
2195 if (MANAGER_IS_SYSTEM(m))
2196 manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE);
2197
2198 /* This is a nop on non-init */
2199 break;
2200
2201 case SIGPWR:
2202 if (MANAGER_IS_SYSTEM(m))
2203 manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE);
2204
2205 /* This is a nop on non-init */
2206 break;
2207
2208 case SIGUSR1: {
2209 Unit *u;
2210
2211 u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
2212
2213 if (!u || UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) {
2214 log_info("Trying to reconnect to bus...");
2215 bus_init(m, true);
2216 }
2217
2218 if (!u || !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u))) {
2219 log_info("Loading D-Bus service...");
2220 manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE);
2221 }
2222
2223 break;
2224 }
2225
2226 case SIGUSR2: {
2227 _cleanup_free_ char *dump = NULL;
2228
2229 r = manager_get_dump_string(m, &dump);
2230 if (r < 0) {
2231 log_warning_errno(errno, "Failed to acquire manager dump: %m");
2232 break;
2233 }
2234
2235 log_dump(LOG_INFO, dump);
2236 break;
2237 }
2238
2239 case SIGHUP:
2240 r = verify_run_space_and_log("Refusing to reload");
2241 if (r >= 0)
2242 m->exit_code = MANAGER_RELOAD;
2243 break;
2244
2245 default: {
2246
2247 /* Starting SIGRTMIN+0 */
2248 static const struct {
2249 const char *target;
2250 JobMode mode;
2251 } target_table[] = {
2252 [0] = { SPECIAL_DEFAULT_TARGET, JOB_ISOLATE },
2253 [1] = { SPECIAL_RESCUE_TARGET, JOB_ISOLATE },
2254 [2] = { SPECIAL_EMERGENCY_TARGET, JOB_ISOLATE },
2255 [3] = { SPECIAL_HALT_TARGET, JOB_REPLACE_IRREVERSIBLY },
2256 [4] = { SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY },
2257 [5] = { SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY },
2258 [6] = { SPECIAL_KEXEC_TARGET, JOB_REPLACE_IRREVERSIBLY }
2259 };
2260
2261 /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */
2262 static const ManagerExitCode code_table[] = {
2263 [0] = MANAGER_HALT,
2264 [1] = MANAGER_POWEROFF,
2265 [2] = MANAGER_REBOOT,
2266 [3] = MANAGER_KEXEC
2267 };
2268
2269 if ((int) sfsi.ssi_signo >= SIGRTMIN+0 &&
2270 (int) sfsi.ssi_signo < SIGRTMIN+(int) ELEMENTSOF(target_table)) {
2271 int idx = (int) sfsi.ssi_signo - SIGRTMIN;
2272 manager_start_target(m, target_table[idx].target,
2273 target_table[idx].mode);
2274 break;
2275 }
2276
2277 if ((int) sfsi.ssi_signo >= SIGRTMIN+13 &&
2278 (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(code_table)) {
2279 m->exit_code = code_table[sfsi.ssi_signo - SIGRTMIN - 13];
2280 break;
2281 }
2282
2283 switch (sfsi.ssi_signo - SIGRTMIN) {
2284
2285 case 20:
2286 manager_set_show_status(m, SHOW_STATUS_YES);
2287 break;
2288
2289 case 21:
2290 manager_set_show_status(m, SHOW_STATUS_NO);
2291 break;
2292
2293 case 22:
2294 log_set_max_level(LOG_DEBUG);
2295 log_info("Setting log level to debug.");
2296 break;
2297
2298 case 23:
2299 log_set_max_level(LOG_INFO);
2300 log_info("Setting log level to info.");
2301 break;
2302
2303 case 24:
2304 if (MANAGER_IS_USER(m)) {
2305 m->exit_code = MANAGER_EXIT;
2306 return 0;
2307 }
2308
2309 /* This is a nop on init */
2310 break;
2311
2312 case 26:
2313 case 29: /* compatibility: used to be mapped to LOG_TARGET_SYSLOG_OR_KMSG */
2314 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2315 log_notice("Setting log target to journal-or-kmsg.");
2316 break;
2317
2318 case 27:
2319 log_set_target(LOG_TARGET_CONSOLE);
2320 log_notice("Setting log target to console.");
2321 break;
2322
2323 case 28:
2324 log_set_target(LOG_TARGET_KMSG);
2325 log_notice("Setting log target to kmsg.");
2326 break;
2327
2328 default:
2329 log_warning("Got unhandled signal <%s>.", signal_to_string(sfsi.ssi_signo));
2330 }
2331 }
2332 }
2333 }
2334
2335 if (sigchld)
2336 manager_dispatch_sigchld(m);
2337
2338 return 0;
2339 }
2340
2341 static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
2342 Manager *m = userdata;
2343 Iterator i;
2344 Unit *u;
2345
2346 assert(m);
2347 assert(m->time_change_fd == fd);
2348
2349 log_struct(LOG_DEBUG,
2350 "MESSAGE_ID=" SD_MESSAGE_TIME_CHANGE_STR,
2351 LOG_MESSAGE("Time has been changed"),
2352 NULL);
2353
2354 /* Restart the watch */
2355 m->time_change_event_source = sd_event_source_unref(m->time_change_event_source);
2356 m->time_change_fd = safe_close(m->time_change_fd);
2357
2358 manager_setup_time_change(m);
2359
2360 HASHMAP_FOREACH(u, m->units, i)
2361 if (UNIT_VTABLE(u)->time_change)
2362 UNIT_VTABLE(u)->time_change(u);
2363
2364 return 0;
2365 }
2366
2367 static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
2368 Manager *m = userdata;
2369
2370 assert(m);
2371 assert(m->idle_pipe[2] == fd);
2372
2373 m->no_console_output = m->n_on_console > 0;
2374
2375 manager_close_idle_pipe(m);
2376
2377 return 0;
2378 }
2379
2380 static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata) {
2381 Manager *m = userdata;
2382 int r;
2383 uint64_t next;
2384
2385 assert(m);
2386 assert(source);
2387
2388 manager_print_jobs_in_progress(m);
2389
2390 next = now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_PERIOD_USEC;
2391 r = sd_event_source_set_time(source, next);
2392 if (r < 0)
2393 return r;
2394
2395 return sd_event_source_set_enabled(source, SD_EVENT_ONESHOT);
2396 }
2397
2398 int manager_loop(Manager *m) {
2399 int r;
2400
2401 RATELIMIT_DEFINE(rl, 1*USEC_PER_SEC, 50000);
2402
2403 assert(m);
2404 m->exit_code = MANAGER_OK;
2405
2406 /* Release the path cache */
2407 m->unit_path_cache = set_free_free(m->unit_path_cache);
2408
2409 manager_check_finished(m);
2410
2411 /* There might still be some zombies hanging around from
2412 * before we were exec()'ed. Let's reap them. */
2413 r = manager_dispatch_sigchld(m);
2414 if (r < 0)
2415 return r;
2416
2417 while (m->exit_code == MANAGER_OK) {
2418 usec_t wait_usec;
2419
2420 if (m->runtime_watchdog > 0 && m->runtime_watchdog != USEC_INFINITY && MANAGER_IS_SYSTEM(m))
2421 watchdog_ping();
2422
2423 if (!ratelimit_test(&rl)) {
2424 /* Yay, something is going seriously wrong, pause a little */
2425 log_warning("Looping too fast. Throttling execution a little.");
2426 sleep(1);
2427 }
2428
2429 if (manager_dispatch_load_queue(m) > 0)
2430 continue;
2431
2432 if (manager_dispatch_gc_job_queue(m) > 0)
2433 continue;
2434
2435 if (manager_dispatch_gc_unit_queue(m) > 0)
2436 continue;
2437
2438 if (manager_dispatch_cleanup_queue(m) > 0)
2439 continue;
2440
2441 if (manager_dispatch_cgroup_realize_queue(m) > 0)
2442 continue;
2443
2444 if (manager_dispatch_dbus_queue(m) > 0)
2445 continue;
2446
2447 /* Sleep for half the watchdog time */
2448 if (m->runtime_watchdog > 0 && m->runtime_watchdog != USEC_INFINITY && MANAGER_IS_SYSTEM(m)) {
2449 wait_usec = m->runtime_watchdog / 2;
2450 if (wait_usec <= 0)
2451 wait_usec = 1;
2452 } else
2453 wait_usec = USEC_INFINITY;
2454
2455 r = sd_event_run(m->event, wait_usec);
2456 if (r < 0)
2457 return log_error_errno(r, "Failed to run event loop: %m");
2458 }
2459
2460 return m->exit_code;
2461 }
2462
2463 int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) {
2464 _cleanup_free_ char *n = NULL;
2465 sd_id128_t invocation_id;
2466 Unit *u;
2467 int r;
2468
2469 assert(m);
2470 assert(s);
2471 assert(_u);
2472
2473 r = unit_name_from_dbus_path(s, &n);
2474 if (r < 0)
2475 return r;
2476
2477 /* Permit addressing units by invocation ID: if the passed bus path is suffixed by a 128bit ID then we use it
2478 * as invocation ID. */
2479 r = sd_id128_from_string(n, &invocation_id);
2480 if (r >= 0) {
2481 u = hashmap_get(m->units_by_invocation_id, &invocation_id);
2482 if (u) {
2483 *_u = u;
2484 return 0;
2485 }
2486
2487 return sd_bus_error_setf(e, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(invocation_id));
2488 }
2489
2490 /* If this didn't work, we check if this is a unit name */
2491 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
2492 return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Unit name %s is neither a valid invocation ID nor unit name.", n);
2493
2494 r = manager_load_unit(m, n, NULL, e, &u);
2495 if (r < 0)
2496 return r;
2497
2498 *_u = u;
2499 return 0;
2500 }
2501
2502 int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j) {
2503 const char *p;
2504 unsigned id;
2505 Job *j;
2506 int r;
2507
2508 assert(m);
2509 assert(s);
2510 assert(_j);
2511
2512 p = startswith(s, "/org/freedesktop/systemd1/job/");
2513 if (!p)
2514 return -EINVAL;
2515
2516 r = safe_atou(p, &id);
2517 if (r < 0)
2518 return r;
2519
2520 j = manager_get_job(m, id);
2521 if (!j)
2522 return -ENOENT;
2523
2524 *_j = j;
2525
2526 return 0;
2527 }
2528
2529 void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
2530
2531 #if HAVE_AUDIT
2532 _cleanup_free_ char *p = NULL;
2533 const char *msg;
2534 int audit_fd, r;
2535
2536 if (!MANAGER_IS_SYSTEM(m))
2537 return;
2538
2539 audit_fd = get_audit_fd();
2540 if (audit_fd < 0)
2541 return;
2542
2543 /* Don't generate audit events if the service was already
2544 * started and we're just deserializing */
2545 if (MANAGER_IS_RELOADING(m))
2546 return;
2547
2548 if (u->type != UNIT_SERVICE)
2549 return;
2550
2551 r = unit_name_to_prefix_and_instance(u->id, &p);
2552 if (r < 0) {
2553 log_error_errno(r, "Failed to extract prefix and instance of unit name: %m");
2554 return;
2555 }
2556
2557 msg = strjoina("unit=", p);
2558 if (audit_log_user_comm_message(audit_fd, type, msg, "systemd", NULL, NULL, NULL, success) < 0) {
2559 if (errno == EPERM)
2560 /* We aren't allowed to send audit messages?
2561 * Then let's not retry again. */
2562 close_audit_fd();
2563 else
2564 log_warning_errno(errno, "Failed to send audit message: %m");
2565 }
2566 #endif
2567
2568 }
2569
2570 void manager_send_unit_plymouth(Manager *m, Unit *u) {
2571 static const union sockaddr_union sa = PLYMOUTH_SOCKET;
2572 _cleanup_free_ char *message = NULL;
2573 _cleanup_close_ int fd = -1;
2574 int n = 0;
2575
2576 /* Don't generate plymouth events if the service was already
2577 * started and we're just deserializing */
2578 if (MANAGER_IS_RELOADING(m))
2579 return;
2580
2581 if (!MANAGER_IS_SYSTEM(m))
2582 return;
2583
2584 if (detect_container() > 0)
2585 return;
2586
2587 if (!IN_SET(u->type, UNIT_SERVICE, UNIT_MOUNT, UNIT_SWAP))
2588 return;
2589
2590 /* We set SOCK_NONBLOCK here so that we rather drop the
2591 * message then wait for plymouth */
2592 fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2593 if (fd < 0) {
2594 log_error_errno(errno, "socket() failed: %m");
2595 return;
2596 }
2597
2598 if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
2599
2600 if (!IN_SET(errno, EPIPE, EAGAIN, ENOENT, ECONNREFUSED, ECONNRESET, ECONNABORTED))
2601 log_error_errno(errno, "connect() failed: %m");
2602 return;
2603 }
2604
2605 if (asprintf(&message, "U\002%c%s%n", (int) (strlen(u->id) + 1), u->id, &n) < 0) {
2606 log_oom();
2607 return;
2608 }
2609
2610 errno = 0;
2611 if (write(fd, message, n + 1) != n + 1)
2612 if (!IN_SET(errno, EPIPE, EAGAIN, ENOENT, ECONNREFUSED, ECONNRESET, ECONNABORTED))
2613 log_error_errno(errno, "Failed to write Plymouth message: %m");
2614 }
2615
2616 int manager_open_serialization(Manager *m, FILE **_f) {
2617 int fd;
2618 FILE *f;
2619
2620 assert(_f);
2621
2622 fd = open_serialization_fd("systemd-state");
2623 if (fd < 0)
2624 return fd;
2625
2626 f = fdopen(fd, "w+");
2627 if (!f) {
2628 safe_close(fd);
2629 return -errno;
2630 }
2631
2632 *_f = f;
2633 return 0;
2634 }
2635
2636 int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
2637 ManagerTimestamp q;
2638 const char *t;
2639 Iterator i;
2640 Unit *u;
2641 int r;
2642
2643 assert(m);
2644 assert(f);
2645 assert(fds);
2646
2647 m->n_reloading++;
2648
2649 fprintf(f, "current-job-id=%"PRIu32"\n", m->current_job_id);
2650 fprintf(f, "n-installed-jobs=%u\n", m->n_installed_jobs);
2651 fprintf(f, "n-failed-jobs=%u\n", m->n_failed_jobs);
2652 fprintf(f, "taint-usr=%s\n", yes_no(m->taint_usr));
2653 fprintf(f, "ready-sent=%s\n", yes_no(m->ready_sent));
2654
2655 for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
2656 /* The userspace and finish timestamps only apply to the host system, hence only serialize them there */
2657 if (in_initrd() && IN_SET(q, MANAGER_TIMESTAMP_USERSPACE, MANAGER_TIMESTAMP_FINISH))
2658 continue;
2659
2660 t = manager_timestamp_to_string(q);
2661 {
2662 char field[strlen(t) + STRLEN("-timestamp") + 1];
2663 strcpy(stpcpy(field, t), "-timestamp");
2664 dual_timestamp_serialize(f, field, m->timestamps + q);
2665 }
2666 }
2667
2668 if (!switching_root)
2669 (void) serialize_environment(f, m->environment);
2670
2671 if (m->notify_fd >= 0) {
2672 int copy;
2673
2674 copy = fdset_put_dup(fds, m->notify_fd);
2675 if (copy < 0)
2676 return copy;
2677
2678 fprintf(f, "notify-fd=%i\n", copy);
2679 fprintf(f, "notify-socket=%s\n", m->notify_socket);
2680 }
2681
2682 if (m->cgroups_agent_fd >= 0) {
2683 int copy;
2684
2685 copy = fdset_put_dup(fds, m->cgroups_agent_fd);
2686 if (copy < 0)
2687 return copy;
2688
2689 fprintf(f, "cgroups-agent-fd=%i\n", copy);
2690 }
2691
2692 if (m->user_lookup_fds[0] >= 0) {
2693 int copy0, copy1;
2694
2695 copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]);
2696 if (copy0 < 0)
2697 return copy0;
2698
2699 copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]);
2700 if (copy1 < 0)
2701 return copy1;
2702
2703 fprintf(f, "user-lookup=%i %i\n", copy0, copy1);
2704 }
2705
2706 bus_track_serialize(m->subscribed, f, "subscribed");
2707
2708 r = dynamic_user_serialize(m, f, fds);
2709 if (r < 0)
2710 return r;
2711
2712 manager_serialize_uid_refs(m, f);
2713 manager_serialize_gid_refs(m, f);
2714
2715 (void) fputc('\n', f);
2716
2717 HASHMAP_FOREACH_KEY(u, t, m->units, i) {
2718 if (u->id != t)
2719 continue;
2720
2721 /* Start marker */
2722 fputs(u->id, f);
2723 fputc('\n', f);
2724
2725 r = unit_serialize(u, f, fds, !switching_root);
2726 if (r < 0) {
2727 m->n_reloading--;
2728 return r;
2729 }
2730 }
2731
2732 assert(m->n_reloading > 0);
2733 m->n_reloading--;
2734
2735 if (ferror(f))
2736 return -EIO;
2737
2738 r = bus_fdset_add_all(m, fds);
2739 if (r < 0)
2740 return r;
2741
2742 return 0;
2743 }
2744
2745 int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
2746 int r = 0;
2747
2748 assert(m);
2749 assert(f);
2750
2751 log_debug("Deserializing state...");
2752
2753 m->n_reloading++;
2754
2755 for (;;) {
2756 char line[LINE_MAX];
2757 const char *val, *l;
2758
2759 if (!fgets(line, sizeof(line), f)) {
2760 if (feof(f))
2761 r = 0;
2762 else
2763 r = -errno;
2764
2765 goto finish;
2766 }
2767
2768 char_array_0(line);
2769 l = strstrip(line);
2770
2771 if (l[0] == 0)
2772 break;
2773
2774 if ((val = startswith(l, "current-job-id="))) {
2775 uint32_t id;
2776
2777 if (safe_atou32(val, &id) < 0)
2778 log_notice("Failed to parse current job id value %s", val);
2779 else
2780 m->current_job_id = MAX(m->current_job_id, id);
2781
2782 } else if ((val = startswith(l, "n-installed-jobs="))) {
2783 uint32_t n;
2784
2785 if (safe_atou32(val, &n) < 0)
2786 log_notice("Failed to parse installed jobs counter %s", val);
2787 else
2788 m->n_installed_jobs += n;
2789
2790 } else if ((val = startswith(l, "n-failed-jobs="))) {
2791 uint32_t n;
2792
2793 if (safe_atou32(val, &n) < 0)
2794 log_notice("Failed to parse failed jobs counter %s", val);
2795 else
2796 m->n_failed_jobs += n;
2797
2798 } else if ((val = startswith(l, "taint-usr="))) {
2799 int b;
2800
2801 b = parse_boolean(val);
2802 if (b < 0)
2803 log_notice("Failed to parse taint /usr flag %s", val);
2804 else
2805 m->taint_usr = m->taint_usr || b;
2806
2807 } else if ((val = startswith(l, "ready-sent="))) {
2808 int b;
2809
2810 b = parse_boolean(val);
2811 if (b < 0)
2812 log_notice("Failed to parse ready-sent flag %s", val);
2813 else
2814 m->ready_sent = m->ready_sent || b;
2815
2816 } else if (startswith(l, "env=")) {
2817 r = deserialize_environment(&m->environment, l);
2818 if (r == -ENOMEM)
2819 goto finish;
2820 if (r < 0)
2821 log_notice_errno(r, "Failed to parse environment entry: \"%s\": %m", l);
2822
2823 } else if ((val = startswith(l, "notify-fd="))) {
2824 int fd;
2825
2826 if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
2827 log_notice("Failed to parse notify fd: \"%s\"", val);
2828 else {
2829 m->notify_event_source = sd_event_source_unref(m->notify_event_source);
2830 safe_close(m->notify_fd);
2831 m->notify_fd = fdset_remove(fds, fd);
2832 }
2833
2834 } else if ((val = startswith(l, "notify-socket="))) {
2835 char *n;
2836
2837 n = strdup(val);
2838 if (!n) {
2839 r = -ENOMEM;
2840 goto finish;
2841 }
2842
2843 free(m->notify_socket);
2844 m->notify_socket = n;
2845
2846 } else if ((val = startswith(l, "cgroups-agent-fd="))) {
2847 int fd;
2848
2849 if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
2850 log_notice("Failed to parse cgroups agent fd: %s", val);
2851 else {
2852 m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
2853 safe_close(m->cgroups_agent_fd);
2854 m->cgroups_agent_fd = fdset_remove(fds, fd);
2855 }
2856
2857 } else if ((val = startswith(l, "user-lookup="))) {
2858 int fd0, fd1;
2859
2860 if (sscanf(val, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1))
2861 log_notice("Failed to parse user lookup fd: %s", val);
2862 else {
2863 m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
2864 safe_close_pair(m->user_lookup_fds);
2865 m->user_lookup_fds[0] = fdset_remove(fds, fd0);
2866 m->user_lookup_fds[1] = fdset_remove(fds, fd1);
2867 }
2868
2869 } else if ((val = startswith(l, "dynamic-user=")))
2870 dynamic_user_deserialize_one(m, val, fds);
2871 else if ((val = startswith(l, "destroy-ipc-uid=")))
2872 manager_deserialize_uid_refs_one(m, val);
2873 else if ((val = startswith(l, "destroy-ipc-gid=")))
2874 manager_deserialize_gid_refs_one(m, val);
2875 else if ((val = startswith(l, "subscribed="))) {
2876
2877 if (strv_extend(&m->deserialized_subscribed, val) < 0)
2878 log_oom();
2879 } else {
2880 ManagerTimestamp q;
2881
2882 for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
2883 val = startswith(l, manager_timestamp_to_string(q));
2884 if (!val)
2885 continue;
2886
2887 val = startswith(val, "-timestamp=");
2888 if (val)
2889 break;
2890 }
2891
2892 if (q < _MANAGER_TIMESTAMP_MAX) /* found it */
2893 dual_timestamp_deserialize(val, m->timestamps + q);
2894 else if (!startswith(l, "kdbus-fd=")) /* ignore kdbus */
2895 log_notice("Unknown serialization item '%s'", l);
2896 }
2897 }
2898
2899 for (;;) {
2900 Unit *u;
2901 char name[UNIT_NAME_MAX+2];
2902 const char* unit_name;
2903
2904 /* Start marker */
2905 if (!fgets(name, sizeof(name), f)) {
2906 if (feof(f))
2907 r = 0;
2908 else
2909 r = -errno;
2910
2911 goto finish;
2912 }
2913
2914 char_array_0(name);
2915 unit_name = strstrip(name);
2916
2917 r = manager_load_unit(m, unit_name, NULL, NULL, &u);
2918 if (r < 0) {
2919 log_notice_errno(r, "Failed to load unit \"%s\", skipping deserialization: %m", unit_name);
2920 if (r == -ENOMEM)
2921 goto finish;
2922 unit_deserialize_skip(f);
2923 continue;
2924 }
2925
2926 r = unit_deserialize(u, f, fds);
2927 if (r < 0) {
2928 log_notice_errno(r, "Failed to deserialize unit \"%s\": %m", unit_name);
2929 if (r == -ENOMEM)
2930 goto finish;
2931 }
2932 }
2933
2934 finish:
2935 if (ferror(f))
2936 r = -EIO;
2937
2938 assert(m->n_reloading > 0);
2939 m->n_reloading--;
2940
2941 return r;
2942 }
2943
2944 int manager_reload(Manager *m) {
2945 int r, q;
2946 _cleanup_fclose_ FILE *f = NULL;
2947 _cleanup_fdset_free_ FDSet *fds = NULL;
2948
2949 assert(m);
2950
2951 r = manager_open_serialization(m, &f);
2952 if (r < 0)
2953 return r;
2954
2955 m->n_reloading++;
2956 bus_manager_send_reloading(m, true);
2957
2958 fds = fdset_new();
2959 if (!fds) {
2960 m->n_reloading--;
2961 return -ENOMEM;
2962 }
2963
2964 r = manager_serialize(m, f, fds, false);
2965 if (r < 0) {
2966 m->n_reloading--;
2967 return r;
2968 }
2969
2970 if (fseeko(f, 0, SEEK_SET) < 0) {
2971 m->n_reloading--;
2972 return -errno;
2973 }
2974
2975 /* From here on there is no way back. */
2976 manager_clear_jobs_and_units(m);
2977 lookup_paths_flush_generator(&m->lookup_paths);
2978 lookup_paths_free(&m->lookup_paths);
2979 dynamic_user_vacuum(m, false);
2980 m->uid_refs = hashmap_free(m->uid_refs);
2981 m->gid_refs = hashmap_free(m->gid_refs);
2982
2983 q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
2984 if (q < 0 && r >= 0)
2985 r = q;
2986
2987 q = manager_run_environment_generators(m);
2988 if (q < 0 && r >= 0)
2989 r = q;
2990
2991 /* Find new unit paths */
2992 q = manager_run_generators(m);
2993 if (q < 0 && r >= 0)
2994 r = q;
2995
2996 lookup_paths_reduce(&m->lookup_paths);
2997 manager_build_unit_path_cache(m);
2998
2999 /* First, enumerate what we can from all config files */
3000 manager_enumerate(m);
3001
3002 /* Second, deserialize our stored data */
3003 q = manager_deserialize(m, f, fds);
3004 if (q < 0) {
3005 log_error_errno(q, "Deserialization failed: %m");
3006
3007 if (r >= 0)
3008 r = q;
3009 }
3010
3011 fclose(f);
3012 f = NULL;
3013
3014 /* Re-register notify_fd as event source */
3015 q = manager_setup_notify(m);
3016 if (q < 0 && r >= 0)
3017 r = q;
3018
3019 q = manager_setup_cgroups_agent(m);
3020 if (q < 0 && r >= 0)
3021 r = q;
3022
3023 q = manager_setup_user_lookup_fd(m);
3024 if (q < 0 && r >= 0)
3025 r = q;
3026
3027 /* Third, fire things up! */
3028 manager_coldplug(m);
3029
3030 /* Release any dynamic users no longer referenced */
3031 dynamic_user_vacuum(m, true);
3032
3033 /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
3034 manager_vacuum_uid_refs(m);
3035 manager_vacuum_gid_refs(m);
3036
3037 /* Sync current state of bus names with our set of listening units */
3038 if (m->api_bus)
3039 manager_sync_bus_names(m, m->api_bus);
3040
3041 assert(m->n_reloading > 0);
3042 m->n_reloading--;
3043
3044 m->send_reloading_done = true;
3045
3046 return r;
3047 }
3048
3049 void manager_reset_failed(Manager *m) {
3050 Unit *u;
3051 Iterator i;
3052
3053 assert(m);
3054
3055 HASHMAP_FOREACH(u, m->units, i)
3056 unit_reset_failed(u);
3057 }
3058
3059 bool manager_unit_inactive_or_pending(Manager *m, const char *name) {
3060 Unit *u;
3061
3062 assert(m);
3063 assert(name);
3064
3065 /* Returns true if the unit is inactive or going down */
3066 u = manager_get_unit(m, name);
3067 if (!u)
3068 return true;
3069
3070 return unit_inactive_or_pending(u);
3071 }
3072
3073 static void manager_notify_finished(Manager *m) {
3074 char userspace[FORMAT_TIMESPAN_MAX], initrd[FORMAT_TIMESPAN_MAX], kernel[FORMAT_TIMESPAN_MAX], sum[FORMAT_TIMESPAN_MAX];
3075 usec_t firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec;
3076
3077 if (m->test_run_flags)
3078 return;
3079
3080 if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) {
3081
3082 /* Note that MANAGER_TIMESTAMP_KERNEL's monotonic value is always at 0, and
3083 * MANAGER_TIMESTAMP_FIRMWARE's and MANAGER_TIMESTAMP_LOADER's monotonic value should be considered
3084 * negative values. */
3085
3086 firmware_usec = m->timestamps[MANAGER_TIMESTAMP_FIRMWARE].monotonic - m->timestamps[MANAGER_TIMESTAMP_LOADER].monotonic;
3087 loader_usec = m->timestamps[MANAGER_TIMESTAMP_LOADER].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
3088 userspace_usec = m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic - m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic;
3089 total_usec = m->timestamps[MANAGER_TIMESTAMP_FIRMWARE].monotonic + m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic;
3090
3091 if (dual_timestamp_is_set(&m->timestamps[MANAGER_TIMESTAMP_INITRD])) {
3092
3093 /* The initrd case on bare-metal*/
3094 kernel_usec = m->timestamps[MANAGER_TIMESTAMP_INITRD].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
3095 initrd_usec = m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic - m->timestamps[MANAGER_TIMESTAMP_INITRD].monotonic;
3096
3097 log_struct(LOG_INFO,
3098 "MESSAGE_ID=" SD_MESSAGE_STARTUP_FINISHED_STR,
3099 "KERNEL_USEC="USEC_FMT, kernel_usec,
3100 "INITRD_USEC="USEC_FMT, initrd_usec,
3101 "USERSPACE_USEC="USEC_FMT, userspace_usec,
3102 LOG_MESSAGE("Startup finished in %s (kernel) + %s (initrd) + %s (userspace) = %s.",
3103 format_timespan(kernel, sizeof(kernel), kernel_usec, USEC_PER_MSEC),
3104 format_timespan(initrd, sizeof(initrd), initrd_usec, USEC_PER_MSEC),
3105 format_timespan(userspace, sizeof(userspace), userspace_usec, USEC_PER_MSEC),
3106 format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)),
3107 NULL);
3108 } else {
3109 /* The initrd-less case on bare-metal*/
3110
3111 kernel_usec = m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
3112 initrd_usec = 0;
3113
3114 log_struct(LOG_INFO,
3115 "MESSAGE_ID=" SD_MESSAGE_STARTUP_FINISHED_STR,
3116 "KERNEL_USEC="USEC_FMT, kernel_usec,
3117 "USERSPACE_USEC="USEC_FMT, userspace_usec,
3118 LOG_MESSAGE("Startup finished in %s (kernel) + %s (userspace) = %s.",
3119 format_timespan(kernel, sizeof(kernel), kernel_usec, USEC_PER_MSEC),
3120 format_timespan(userspace, sizeof(userspace), userspace_usec, USEC_PER_MSEC),
3121 format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)),
3122 NULL);
3123 }
3124 } else {
3125 /* The container case */
3126 firmware_usec = loader_usec = initrd_usec = kernel_usec = 0;
3127 total_usec = userspace_usec = m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic - m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic;
3128
3129 log_struct(LOG_INFO,
3130 "MESSAGE_ID=" SD_MESSAGE_USER_STARTUP_FINISHED_STR,
3131 "USERSPACE_USEC="USEC_FMT, userspace_usec,
3132 LOG_MESSAGE("Startup finished in %s.",
3133 format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)),
3134 NULL);
3135 }
3136
3137 bus_manager_send_finished(m, firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec);
3138
3139 sd_notifyf(false,
3140 m->ready_sent ? "STATUS=Startup finished in %s."
3141 : "READY=1\n"
3142 "STATUS=Startup finished in %s.",
3143 format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC));
3144 m->ready_sent = true;
3145 }
3146
3147 void manager_check_finished(Manager *m) {
3148 assert(m);
3149
3150 if (MANAGER_IS_RELOADING(m))
3151 return;
3152
3153 /* Verify that we are actually running currently. Initially
3154 * the exit code is set to invalid, and during operation it is
3155 * then set to MANAGER_OK */
3156 if (m->exit_code != MANAGER_OK)
3157 return;
3158
3159 /* For user managers, send out READY=1 as soon as we reach basic.target */
3160 if (MANAGER_IS_USER(m) && !m->ready_sent) {
3161 Unit *u;
3162
3163 u = manager_get_unit(m, SPECIAL_BASIC_TARGET);
3164 if (u && !u->job) {
3165 sd_notifyf(false,
3166 "READY=1\n"
3167 "STATUS=Reached " SPECIAL_BASIC_TARGET ".");
3168 m->ready_sent = true;
3169 }
3170 }
3171
3172 if (hashmap_size(m->jobs) > 0) {
3173 if (m->jobs_in_progress_event_source)
3174 /* Ignore any failure, this is only for feedback */
3175 (void) sd_event_source_set_time(m->jobs_in_progress_event_source, now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC);
3176
3177 return;
3178 }
3179
3180 manager_flip_auto_status(m, false);
3181
3182 /* Notify Type=idle units that we are done now */
3183 manager_close_idle_pipe(m);
3184
3185 /* Turn off confirm spawn now */
3186 m->confirm_spawn = NULL;
3187
3188 /* No need to update ask password status when we're going non-interactive */
3189 manager_close_ask_password(m);
3190
3191 /* This is no longer the first boot */
3192 manager_set_first_boot(m, false);
3193
3194 if (MANAGER_IS_FINISHED(m))
3195 return;
3196
3197 dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_FINISH);
3198
3199 manager_notify_finished(m);
3200
3201 manager_invalidate_startup_units(m);
3202 }
3203
3204 static bool generator_path_any(const char* const* paths) {
3205 char **path;
3206 bool found = false;
3207
3208 /* Optimize by skipping the whole process by not creating output directories
3209 * if no generators are found. */
3210 STRV_FOREACH(path, (char**) paths)
3211 if (access(*path, F_OK) == 0)
3212 found = true;
3213 else if (errno != ENOENT)
3214 log_warning_errno(errno, "Failed to open generator directory %s: %m", *path);
3215
3216 return found;
3217 }
3218
3219 static const char* system_env_generator_binary_paths[] = {
3220 "/run/systemd/system-environment-generators",
3221 "/etc/systemd/system-environment-generators",
3222 "/usr/local/lib/systemd/system-environment-generators",
3223 SYSTEM_ENV_GENERATOR_PATH,
3224 NULL
3225 };
3226
3227 static const char* user_env_generator_binary_paths[] = {
3228 "/run/systemd/user-environment-generators",
3229 "/etc/systemd/user-environment-generators",
3230 "/usr/local/lib/systemd/user-environment-generators",
3231 USER_ENV_GENERATOR_PATH,
3232 NULL
3233 };
3234
3235 static int manager_run_environment_generators(Manager *m) {
3236 char **tmp = NULL; /* this is only used in the forked process, no cleanup here */
3237 const char **paths;
3238 void* args[] = {&tmp, &tmp, &m->environment};
3239
3240 if (m->test_run_flags && !(m->test_run_flags & MANAGER_TEST_RUN_ENV_GENERATORS))
3241 return 0;
3242
3243 paths = MANAGER_IS_SYSTEM(m) ? system_env_generator_binary_paths : user_env_generator_binary_paths;
3244
3245 if (!generator_path_any(paths))
3246 return 0;
3247
3248 return execute_directories(paths, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL);
3249 }
3250
3251 static int manager_run_generators(Manager *m) {
3252 _cleanup_strv_free_ char **paths = NULL;
3253 const char *argv[5];
3254 int r;
3255
3256 assert(m);
3257
3258 if (m->test_run_flags && !(m->test_run_flags & MANAGER_TEST_RUN_GENERATORS))
3259 return 0;
3260
3261 paths = generator_binary_paths(m->unit_file_scope);
3262 if (!paths)
3263 return log_oom();
3264
3265 if (!generator_path_any((const char* const*) paths))
3266 return 0;
3267
3268 r = lookup_paths_mkdir_generator(&m->lookup_paths);
3269 if (r < 0)
3270 goto finish;
3271
3272 argv[0] = NULL; /* Leave this empty, execute_directory() will fill something in */
3273 argv[1] = m->lookup_paths.generator;
3274 argv[2] = m->lookup_paths.generator_early;
3275 argv[3] = m->lookup_paths.generator_late;
3276 argv[4] = NULL;
3277
3278 RUN_WITH_UMASK(0022)
3279 execute_directories((const char* const*) paths, DEFAULT_TIMEOUT_USEC,
3280 NULL, NULL, (char**) argv);
3281
3282 finish:
3283 lookup_paths_trim_generator(&m->lookup_paths);
3284 return r;
3285 }
3286
3287 int manager_environment_add(Manager *m, char **minus, char **plus) {
3288 char **a = NULL, **b = NULL, **l;
3289 assert(m);
3290
3291 l = m->environment;
3292
3293 if (!strv_isempty(minus)) {
3294 a = strv_env_delete(l, 1, minus);
3295 if (!a)
3296 return -ENOMEM;
3297
3298 l = a;
3299 }
3300
3301 if (!strv_isempty(plus)) {
3302 b = strv_env_merge(2, l, plus);
3303 if (!b) {
3304 strv_free(a);
3305 return -ENOMEM;
3306 }
3307
3308 l = b;
3309 }
3310
3311 if (m->environment != l)
3312 strv_free(m->environment);
3313 if (a != l)
3314 strv_free(a);
3315 if (b != l)
3316 strv_free(b);
3317
3318 m->environment = l;
3319 manager_sanitize_environment(m);
3320
3321 return 0;
3322 }
3323
3324 int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) {
3325 int i;
3326
3327 assert(m);
3328
3329 for (i = 0; i < _RLIMIT_MAX; i++) {
3330 m->rlimit[i] = mfree(m->rlimit[i]);
3331
3332 if (!default_rlimit[i])
3333 continue;
3334
3335 m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1);
3336 if (!m->rlimit[i])
3337 return log_oom();
3338 }
3339
3340 return 0;
3341 }
3342
3343 void manager_recheck_journal(Manager *m) {
3344 Unit *u;
3345
3346 assert(m);
3347
3348 if (!MANAGER_IS_SYSTEM(m))
3349 return;
3350
3351 u = manager_get_unit(m, SPECIAL_JOURNALD_SOCKET);
3352 if (u && SOCKET(u)->state != SOCKET_RUNNING) {
3353 log_close_journal();
3354 return;
3355 }
3356
3357 u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE);
3358 if (u && SERVICE(u)->state != SERVICE_RUNNING) {
3359 log_close_journal();
3360 return;
3361 }
3362
3363 /* Hmm, OK, so the socket is fully up and the service is up
3364 * too, then let's make use of the thing. */
3365 log_open();
3366 }
3367
3368 void manager_set_show_status(Manager *m, ShowStatus mode) {
3369 assert(m);
3370 assert(IN_SET(mode, SHOW_STATUS_AUTO, SHOW_STATUS_NO, SHOW_STATUS_YES, SHOW_STATUS_TEMPORARY));
3371
3372 if (!MANAGER_IS_SYSTEM(m))
3373 return;
3374
3375 if (m->show_status != mode)
3376 log_debug("%s showing of status.",
3377 mode == SHOW_STATUS_NO ? "Disabling" : "Enabling");
3378 m->show_status = mode;
3379
3380 if (mode > 0)
3381 (void) touch("/run/systemd/show-status");
3382 else
3383 (void) unlink("/run/systemd/show-status");
3384 }
3385
3386 static bool manager_get_show_status(Manager *m, StatusType type) {
3387 assert(m);
3388
3389 if (!MANAGER_IS_SYSTEM(m))
3390 return false;
3391
3392 if (m->no_console_output)
3393 return false;
3394
3395 if (!IN_SET(manager_state(m), MANAGER_INITIALIZING, MANAGER_STARTING, MANAGER_STOPPING))
3396 return false;
3397
3398 /* If we cannot find out the status properly, just proceed. */
3399 if (type != STATUS_TYPE_EMERGENCY && manager_check_ask_password(m) > 0)
3400 return false;
3401
3402 if (m->show_status > 0)
3403 return true;
3404
3405 return false;
3406 }
3407
3408 const char *manager_get_confirm_spawn(Manager *m) {
3409 static int last_errno = 0;
3410 const char *vc = m->confirm_spawn;
3411 struct stat st;
3412 int r;
3413
3414 /* Here's the deal: we want to test the validity of the console but don't want
3415 * PID1 to go through the whole console process which might block. But we also
3416 * want to warn the user only once if something is wrong with the console so we
3417 * cannot do the sanity checks after spawning our children. So here we simply do
3418 * really basic tests to hopefully trap common errors.
3419 *
3420 * If the console suddenly disappear at the time our children will really it
3421 * then they will simply fail to acquire it and a positive answer will be
3422 * assumed. New children will fallback to /dev/console though.
3423 *
3424 * Note: TTYs are devices that can come and go any time, and frequently aren't
3425 * available yet during early boot (consider a USB rs232 dongle...). If for any
3426 * reason the configured console is not ready, we fallback to the default
3427 * console. */
3428
3429 if (!vc || path_equal(vc, "/dev/console"))
3430 return vc;
3431
3432 r = stat(vc, &st);
3433 if (r < 0)
3434 goto fail;
3435
3436 if (!S_ISCHR(st.st_mode)) {
3437 errno = ENOTTY;
3438 goto fail;
3439 }
3440
3441 last_errno = 0;
3442 return vc;
3443 fail:
3444 if (last_errno != errno) {
3445 last_errno = errno;
3446 log_warning_errno(errno, "Failed to open %s: %m, using default console", vc);
3447 }
3448 return "/dev/console";
3449 }
3450
3451 void manager_set_first_boot(Manager *m, bool b) {
3452 assert(m);
3453
3454 if (!MANAGER_IS_SYSTEM(m))
3455 return;
3456
3457 if (m->first_boot != (int) b) {
3458 if (b)
3459 (void) touch("/run/systemd/first-boot");
3460 else
3461 (void) unlink("/run/systemd/first-boot");
3462 }
3463
3464 m->first_boot = b;
3465 }
3466
3467 void manager_disable_confirm_spawn(void) {
3468 (void) touch("/run/systemd/confirm_spawn_disabled");
3469 }
3470
3471 bool manager_is_confirm_spawn_disabled(Manager *m) {
3472 if (!m->confirm_spawn)
3473 return true;
3474
3475 return access("/run/systemd/confirm_spawn_disabled", F_OK) >= 0;
3476 }
3477
3478 void manager_status_printf(Manager *m, StatusType type, const char *status, const char *format, ...) {
3479 va_list ap;
3480
3481 /* If m is NULL, assume we're after shutdown and let the messages through. */
3482
3483 if (m && !manager_get_show_status(m, type))
3484 return;
3485
3486 /* XXX We should totally drop the check for ephemeral here
3487 * and thus effectively make 'Type=idle' pointless. */
3488 if (type == STATUS_TYPE_EPHEMERAL && m && m->n_on_console > 0)
3489 return;
3490
3491 va_start(ap, format);
3492 status_vprintf(status, true, type == STATUS_TYPE_EPHEMERAL, format, ap);
3493 va_end(ap);
3494 }
3495
3496 Set *manager_get_units_requiring_mounts_for(Manager *m, const char *path) {
3497 char p[strlen(path)+1];
3498
3499 assert(m);
3500 assert(path);
3501
3502 strcpy(p, path);
3503 path_kill_slashes(p);
3504
3505 return hashmap_get(m->units_requiring_mounts_for, streq(p, "/") ? "" : p);
3506 }
3507
3508 void manager_set_exec_params(Manager *m, ExecParameters *p) {
3509 assert(m);
3510 assert(p);
3511
3512 p->environment = m->environment;
3513 p->confirm_spawn = manager_get_confirm_spawn(m);
3514 p->cgroup_supported = m->cgroup_supported;
3515 p->prefix = m->prefix;
3516
3517 SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(m));
3518 }
3519
3520 int manager_update_failed_units(Manager *m, Unit *u, bool failed) {
3521 unsigned size;
3522 int r;
3523
3524 assert(m);
3525 assert(u->manager == m);
3526
3527 size = set_size(m->failed_units);
3528
3529 if (failed) {
3530 r = set_ensure_allocated(&m->failed_units, NULL);
3531 if (r < 0)
3532 return log_oom();
3533
3534 if (set_put(m->failed_units, u) < 0)
3535 return log_oom();
3536 } else
3537 (void) set_remove(m->failed_units, u);
3538
3539 if (set_size(m->failed_units) != size)
3540 bus_manager_send_change_signal(m);
3541
3542 return 0;
3543 }
3544
3545 ManagerState manager_state(Manager *m) {
3546 Unit *u;
3547
3548 assert(m);
3549
3550 /* Did we ever finish booting? If not then we are still starting up */
3551 if (!MANAGER_IS_FINISHED(m)) {
3552
3553 u = manager_get_unit(m, SPECIAL_BASIC_TARGET);
3554 if (!u || !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
3555 return MANAGER_INITIALIZING;
3556
3557 return MANAGER_STARTING;
3558 }
3559
3560 /* Is the special shutdown target active or queued? If so, we are in shutdown state */
3561 u = manager_get_unit(m, SPECIAL_SHUTDOWN_TARGET);
3562 if (u && unit_active_or_pending(u))
3563 return MANAGER_STOPPING;
3564
3565 if (MANAGER_IS_SYSTEM(m)) {
3566 /* Are the rescue or emergency targets active or queued? If so we are in maintenance state */
3567 u = manager_get_unit(m, SPECIAL_RESCUE_TARGET);
3568 if (u && unit_active_or_pending(u))
3569 return MANAGER_MAINTENANCE;
3570
3571 u = manager_get_unit(m, SPECIAL_EMERGENCY_TARGET);
3572 if (u && unit_active_or_pending(u))
3573 return MANAGER_MAINTENANCE;
3574 }
3575
3576 /* Are there any failed units? If so, we are in degraded mode */
3577 if (set_size(m->failed_units) > 0)
3578 return MANAGER_DEGRADED;
3579
3580 return MANAGER_RUNNING;
3581 }
3582
3583 #define DESTROY_IPC_FLAG (UINT32_C(1) << 31)
3584
3585 static void manager_unref_uid_internal(
3586 Manager *m,
3587 Hashmap **uid_refs,
3588 uid_t uid,
3589 bool destroy_now,
3590 int (*_clean_ipc)(uid_t uid)) {
3591
3592 uint32_t c, n;
3593
3594 assert(m);
3595 assert(uid_refs);
3596 assert(uid_is_valid(uid));
3597 assert(_clean_ipc);
3598
3599 /* A generic implementation, covering both manager_unref_uid() and manager_unref_gid(), under the assumption
3600 * that uid_t and gid_t are actually defined the same way, with the same validity rules.
3601 *
3602 * We store a hashmap where the UID/GID is they key and the value is a 32bit reference counter, whose highest
3603 * bit is used as flag for marking UIDs/GIDs whose IPC objects to remove when the last reference to the UID/GID
3604 * is dropped. The flag is set to on, once at least one reference from a unit where RemoveIPC= is set is added
3605 * on a UID/GID. It is reset when the UID's/GID's reference counter drops to 0 again. */
3606
3607 assert_cc(sizeof(uid_t) == sizeof(gid_t));
3608 assert_cc(UID_INVALID == (uid_t) GID_INVALID);
3609
3610 if (uid == 0) /* We don't keep track of root, and will never destroy it */
3611 return;
3612
3613 c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
3614
3615 n = c & ~DESTROY_IPC_FLAG;
3616 assert(n > 0);
3617 n--;
3618
3619 if (destroy_now && n == 0) {
3620 hashmap_remove(*uid_refs, UID_TO_PTR(uid));
3621
3622 if (c & DESTROY_IPC_FLAG) {
3623 log_debug("%s " UID_FMT " is no longer referenced, cleaning up its IPC.",
3624 _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
3625 uid);
3626 (void) _clean_ipc(uid);
3627 }
3628 } else {
3629 c = n | (c & DESTROY_IPC_FLAG);
3630 assert_se(hashmap_update(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)) >= 0);
3631 }
3632 }
3633
3634 void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now) {
3635 manager_unref_uid_internal(m, &m->uid_refs, uid, destroy_now, clean_ipc_by_uid);
3636 }
3637
3638 void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now) {
3639 manager_unref_uid_internal(m, &m->gid_refs, (uid_t) gid, destroy_now, clean_ipc_by_gid);
3640 }
3641
3642 static int manager_ref_uid_internal(
3643 Manager *m,
3644 Hashmap **uid_refs,
3645 uid_t uid,
3646 bool clean_ipc) {
3647
3648 uint32_t c, n;
3649 int r;
3650
3651 assert(m);
3652 assert(uid_refs);
3653 assert(uid_is_valid(uid));
3654
3655 /* A generic implementation, covering both manager_ref_uid() and manager_ref_gid(), under the assumption
3656 * that uid_t and gid_t are actually defined the same way, with the same validity rules. */
3657
3658 assert_cc(sizeof(uid_t) == sizeof(gid_t));
3659 assert_cc(UID_INVALID == (uid_t) GID_INVALID);
3660
3661 if (uid == 0) /* We don't keep track of root, and will never destroy it */
3662 return 0;
3663
3664 r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
3665 if (r < 0)
3666 return r;
3667
3668 c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
3669
3670 n = c & ~DESTROY_IPC_FLAG;
3671 n++;
3672
3673 if (n & DESTROY_IPC_FLAG) /* check for overflow */
3674 return -EOVERFLOW;
3675
3676 c = n | (c & DESTROY_IPC_FLAG) | (clean_ipc ? DESTROY_IPC_FLAG : 0);
3677
3678 return hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
3679 }
3680
3681 int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc) {
3682 return manager_ref_uid_internal(m, &m->uid_refs, uid, clean_ipc);
3683 }
3684
3685 int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc) {
3686 return manager_ref_uid_internal(m, &m->gid_refs, (uid_t) gid, clean_ipc);
3687 }
3688
3689 static void manager_vacuum_uid_refs_internal(
3690 Manager *m,
3691 Hashmap **uid_refs,
3692 int (*_clean_ipc)(uid_t uid)) {
3693
3694 Iterator i;
3695 void *p, *k;
3696
3697 assert(m);
3698 assert(uid_refs);
3699 assert(_clean_ipc);
3700
3701 HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
3702 uint32_t c, n;
3703 uid_t uid;
3704
3705 uid = PTR_TO_UID(k);
3706 c = PTR_TO_UINT32(p);
3707
3708 n = c & ~DESTROY_IPC_FLAG;
3709 if (n > 0)
3710 continue;
3711
3712 if (c & DESTROY_IPC_FLAG) {
3713 log_debug("Found unreferenced %s " UID_FMT " after reload/reexec. Cleaning up.",
3714 _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
3715 uid);
3716 (void) _clean_ipc(uid);
3717 }
3718
3719 assert_se(hashmap_remove(*uid_refs, k) == p);
3720 }
3721 }
3722
3723 void manager_vacuum_uid_refs(Manager *m) {
3724 manager_vacuum_uid_refs_internal(m, &m->uid_refs, clean_ipc_by_uid);
3725 }
3726
3727 void manager_vacuum_gid_refs(Manager *m) {
3728 manager_vacuum_uid_refs_internal(m, &m->gid_refs, clean_ipc_by_gid);
3729 }
3730
3731 static void manager_serialize_uid_refs_internal(
3732 Manager *m,
3733 FILE *f,
3734 Hashmap **uid_refs,
3735 const char *field_name) {
3736
3737 Iterator i;
3738 void *p, *k;
3739
3740 assert(m);
3741 assert(f);
3742 assert(uid_refs);
3743 assert(field_name);
3744
3745 /* Serialize the UID reference table. Or actually, just the IPC destruction flag of it, as the actual counter
3746 * of it is better rebuild after a reload/reexec. */
3747
3748 HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
3749 uint32_t c;
3750 uid_t uid;
3751
3752 uid = PTR_TO_UID(k);
3753 c = PTR_TO_UINT32(p);
3754
3755 if (!(c & DESTROY_IPC_FLAG))
3756 continue;
3757
3758 fprintf(f, "%s=" UID_FMT "\n", field_name, uid);
3759 }
3760 }
3761
3762 void manager_serialize_uid_refs(Manager *m, FILE *f) {
3763 manager_serialize_uid_refs_internal(m, f, &m->uid_refs, "destroy-ipc-uid");
3764 }
3765
3766 void manager_serialize_gid_refs(Manager *m, FILE *f) {
3767 manager_serialize_uid_refs_internal(m, f, &m->gid_refs, "destroy-ipc-gid");
3768 }
3769
3770 static void manager_deserialize_uid_refs_one_internal(
3771 Manager *m,
3772 Hashmap** uid_refs,
3773 const char *value) {
3774
3775 uid_t uid;
3776 uint32_t c;
3777 int r;
3778
3779 assert(m);
3780 assert(uid_refs);
3781 assert(value);
3782
3783 r = parse_uid(value, &uid);
3784 if (r < 0 || uid == 0) {
3785 log_debug("Unable to parse UID reference serialization");
3786 return;
3787 }
3788
3789 r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
3790 if (r < 0) {
3791 log_oom();
3792 return;
3793 }
3794
3795 c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
3796 if (c & DESTROY_IPC_FLAG)
3797 return;
3798
3799 c |= DESTROY_IPC_FLAG;
3800
3801 r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
3802 if (r < 0) {
3803 log_debug("Failed to add UID reference entry");
3804 return;
3805 }
3806 }
3807
3808 void manager_deserialize_uid_refs_one(Manager *m, const char *value) {
3809 manager_deserialize_uid_refs_one_internal(m, &m->uid_refs, value);
3810 }
3811
3812 void manager_deserialize_gid_refs_one(Manager *m, const char *value) {
3813 manager_deserialize_uid_refs_one_internal(m, &m->gid_refs, value);
3814 }
3815
3816 int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
3817 struct buffer {
3818 uid_t uid;
3819 gid_t gid;
3820 char unit_name[UNIT_NAME_MAX+1];
3821 } _packed_ buffer;
3822
3823 Manager *m = userdata;
3824 ssize_t l;
3825 size_t n;
3826 Unit *u;
3827
3828 assert_se(source);
3829 assert_se(m);
3830
3831 /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the resulting UID/GID
3832 * in a datagram. We parse the datagram here and pass it off to the unit, so that it can add a reference to the
3833 * UID/GID so that it can destroy the UID/GID's IPC objects when the reference counter drops to 0. */
3834
3835 l = recv(fd, &buffer, sizeof(buffer), MSG_DONTWAIT);
3836 if (l < 0) {
3837 if (IN_SET(errno, EINTR, EAGAIN))
3838 return 0;
3839
3840 return log_error_errno(errno, "Failed to read from user lookup fd: %m");
3841 }
3842
3843 if ((size_t) l <= offsetof(struct buffer, unit_name)) {
3844 log_warning("Received too short user lookup message, ignoring.");
3845 return 0;
3846 }
3847
3848 if ((size_t) l > offsetof(struct buffer, unit_name) + UNIT_NAME_MAX) {
3849 log_warning("Received too long user lookup message, ignoring.");
3850 return 0;
3851 }
3852
3853 if (!uid_is_valid(buffer.uid) && !gid_is_valid(buffer.gid)) {
3854 log_warning("Got user lookup message with invalid UID/GID pair, ignoring.");
3855 return 0;
3856 }
3857
3858 n = (size_t) l - offsetof(struct buffer, unit_name);
3859 if (memchr(buffer.unit_name, 0, n)) {
3860 log_warning("Received lookup message with embedded NUL character, ignoring.");
3861 return 0;
3862 }
3863
3864 buffer.unit_name[n] = 0;
3865 u = manager_get_unit(m, buffer.unit_name);
3866 if (!u) {
3867 log_debug("Got user lookup message but unit doesn't exist, ignoring.");
3868 return 0;
3869 }
3870
3871 log_unit_debug(u, "User lookup succeeded: uid=" UID_FMT " gid=" GID_FMT, buffer.uid, buffer.gid);
3872
3873 unit_notify_user_lookup(u, buffer.uid, buffer.gid);
3874 return 0;
3875 }
3876
3877 char *manager_taint_string(Manager *m) {
3878 _cleanup_free_ char *destination = NULL, *overflowuid = NULL, *overflowgid = NULL;
3879 char *buf, *e;
3880 int r;
3881
3882 /* Returns a "taint string", e.g. "local-hwclock:var-run-bad".
3883 * Only things that are detected at runtime should be tagged
3884 * here. For stuff that is set during compilation, emit a warning
3885 * in the configuration phase. */
3886
3887 assert(m);
3888
3889 buf = new(char, sizeof("split-usr:"
3890 "cgroups-missing:"
3891 "local-hwclock:"
3892 "var-run-bad:"
3893 "overflowuid-not-65534:"
3894 "overflowgid-not-65534:"));
3895 if (!buf)
3896 return NULL;
3897
3898 e = buf;
3899 buf[0] = 0;
3900
3901 if (m->taint_usr)
3902 e = stpcpy(e, "split-usr:");
3903
3904 if (access("/proc/cgroups", F_OK) < 0)
3905 e = stpcpy(e, "cgroups-missing:");
3906
3907 if (clock_is_localtime(NULL) > 0)
3908 e = stpcpy(e, "local-hwclock:");
3909
3910 r = readlink_malloc("/var/run", &destination);
3911 if (r < 0 || !PATH_IN_SET(destination, "../run", "/run"))
3912 e = stpcpy(e, "var-run-bad:");
3913
3914 r = read_one_line_file("/proc/sys/kernel/overflowuid", &overflowuid);
3915 if (r >= 0 && !streq(overflowuid, "65534"))
3916 e = stpcpy(e, "overflowuid-not-65534:");
3917
3918 r = read_one_line_file("/proc/sys/kernel/overflowgid", &overflowgid);
3919 if (r >= 0 && !streq(overflowgid, "65534"))
3920 e = stpcpy(e, "overflowgid-not-65534:");
3921
3922 /* remove the last ':' */
3923 if (e != buf)
3924 e[-1] = 0;
3925
3926 return buf;
3927 }
3928
3929 static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
3930 [MANAGER_INITIALIZING] = "initializing",
3931 [MANAGER_STARTING] = "starting",
3932 [MANAGER_RUNNING] = "running",
3933 [MANAGER_DEGRADED] = "degraded",
3934 [MANAGER_MAINTENANCE] = "maintenance",
3935 [MANAGER_STOPPING] = "stopping",
3936 };
3937
3938 DEFINE_STRING_TABLE_LOOKUP(manager_state, ManagerState);
3939
3940 static const char *const manager_timestamp_table[_MANAGER_TIMESTAMP_MAX] = {
3941 [MANAGER_TIMESTAMP_FIRMWARE] = "firmware",
3942 [MANAGER_TIMESTAMP_LOADER] = "loader",
3943 [MANAGER_TIMESTAMP_KERNEL] = "kernel",
3944 [MANAGER_TIMESTAMP_INITRD] = "initrd",
3945 [MANAGER_TIMESTAMP_USERSPACE] = "userspace",
3946 [MANAGER_TIMESTAMP_FINISH] = "finish",
3947 [MANAGER_TIMESTAMP_SECURITY_START] = "security-start",
3948 [MANAGER_TIMESTAMP_SECURITY_FINISH] = "security-finish",
3949 [MANAGER_TIMESTAMP_GENERATORS_START] = "generators-start",
3950 [MANAGER_TIMESTAMP_GENERATORS_FINISH] = "generators-finish",
3951 [MANAGER_TIMESTAMP_UNITS_LOAD_START] = "units-load-start",
3952 [MANAGER_TIMESTAMP_UNITS_LOAD_FINISH] = "units-load-finish",
3953 };
3954
3955 DEFINE_STRING_TABLE_LOOKUP(manager_timestamp, ManagerTimestamp);