]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/main.c
Merge pull request #8533 from poettering/bootup-shutdown-phase2
[thirdparty/systemd.git] / src / core / main.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <getopt.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <sys/mount.h>
28 #include <sys/prctl.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32 #if HAVE_SECCOMP
33 #include <seccomp.h>
34 #endif
35 #if HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-bus.h"
40 #include "sd-daemon.h"
41 #include "sd-messages.h"
42
43 #include "alloc-util.h"
44 #include "architecture.h"
45 #include "build.h"
46 #include "bus-error.h"
47 #include "bus-util.h"
48 #include "capability-util.h"
49 #include "clock-util.h"
50 #include "conf-parser.h"
51 #include "cpu-set-util.h"
52 #include "dbus-manager.h"
53 #include "def.h"
54 #include "emergency-action.h"
55 #include "env-util.h"
56 #include "fd-util.h"
57 #include "fdset.h"
58 #include "fileio.h"
59 #include "format-util.h"
60 #include "fs-util.h"
61 #include "hostname-setup.h"
62 #include "ima-setup.h"
63 #include "killall.h"
64 #include "kmod-setup.h"
65 #include "load-fragment.h"
66 #include "log.h"
67 #include "loopback-setup.h"
68 #include "machine-id-setup.h"
69 #include "manager.h"
70 #include "missing.h"
71 #include "mount-setup.h"
72 #include "pager.h"
73 #include "parse-util.h"
74 #include "path-util.h"
75 #include "proc-cmdline.h"
76 #include "process-util.h"
77 #include "raw-clone.h"
78 #include "rlimit-util.h"
79 #if HAVE_SECCOMP
80 #include "seccomp-util.h"
81 #endif
82 #include "selinux-setup.h"
83 #include "selinux-util.h"
84 #include "signal-util.h"
85 #include "smack-setup.h"
86 #include "special.h"
87 #include "stat-util.h"
88 #include "stdio-util.h"
89 #include "strv.h"
90 #include "switch-root.h"
91 #include "terminal-util.h"
92 #include "umask-util.h"
93 #include "user-util.h"
94 #include "util.h"
95 #include "virt.h"
96 #include "watchdog.h"
97
98 static enum {
99 ACTION_RUN,
100 ACTION_HELP,
101 ACTION_VERSION,
102 ACTION_TEST,
103 ACTION_DUMP_CONFIGURATION_ITEMS
104 } arg_action = ACTION_RUN;
105 static char *arg_default_unit = NULL;
106 static bool arg_system = false;
107 static bool arg_dump_core = true;
108 static int arg_crash_chvt = -1;
109 static bool arg_crash_shell = false;
110 static bool arg_crash_reboot = false;
111 static char *arg_confirm_spawn = NULL;
112 static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
113 static bool arg_switched_root = false;
114 static bool arg_no_pager = false;
115 static bool arg_service_watchdogs = true;
116 static char ***arg_join_controllers = NULL;
117 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
118 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
119 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
120 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
121 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
122 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
123 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
124 static usec_t arg_runtime_watchdog = 0;
125 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
126 static char *arg_watchdog_device = NULL;
127 static char **arg_default_environment = NULL;
128 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
129 static uint64_t arg_capability_bounding_set = CAP_ALL;
130 static bool arg_no_new_privs = false;
131 static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
132 static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
133 static Set* arg_syscall_archs = NULL;
134 static FILE* arg_serialization = NULL;
135 static bool arg_default_cpu_accounting = false;
136 static bool arg_default_io_accounting = false;
137 static bool arg_default_ip_accounting = false;
138 static bool arg_default_blockio_accounting = false;
139 static bool arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
140 static bool arg_default_tasks_accounting = true;
141 static uint64_t arg_default_tasks_max = UINT64_MAX;
142 static sd_id128_t arg_machine_id = {};
143 static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
144
145 _noreturn_ static void freeze_or_reboot(void) {
146
147 if (arg_crash_reboot) {
148 log_notice("Rebooting in 10s...");
149 (void) sleep(10);
150
151 log_notice("Rebooting now...");
152 (void) reboot(RB_AUTOBOOT);
153 log_emergency_errno(errno, "Failed to reboot: %m");
154 }
155
156 log_emergency("Freezing execution.");
157 freeze();
158 }
159
160 _noreturn_ static void crash(int sig) {
161 struct sigaction sa;
162 pid_t pid;
163
164 if (getpid_cached() != 1)
165 /* Pass this on immediately, if this is not PID 1 */
166 (void) raise(sig);
167 else if (!arg_dump_core)
168 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
169 else {
170 sa = (struct sigaction) {
171 .sa_handler = nop_signal_handler,
172 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
173 };
174
175 /* We want to wait for the core process, hence let's enable SIGCHLD */
176 (void) sigaction(SIGCHLD, &sa, NULL);
177
178 pid = raw_clone(SIGCHLD);
179 if (pid < 0)
180 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
181 else if (pid == 0) {
182 /* Enable default signal handler for core dump */
183
184 sa = (struct sigaction) {
185 .sa_handler = SIG_DFL,
186 };
187 (void) sigaction(sig, &sa, NULL);
188
189 /* Don't limit the coredump size */
190 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
191
192 /* Just to be sure... */
193 (void) chdir("/");
194
195 /* Raise the signal again */
196 pid = raw_getpid();
197 (void) kill(pid, sig); /* raise() would kill the parent */
198
199 assert_not_reached("We shouldn't be here...");
200 _exit(EXIT_FAILURE);
201 } else {
202 siginfo_t status;
203 int r;
204
205 /* Order things nicely. */
206 r = wait_for_terminate(pid, &status);
207 if (r < 0)
208 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
209 else if (status.si_code != CLD_DUMPED)
210 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
211 signal_to_string(sig),
212 pid, sigchld_code_to_string(status.si_code),
213 status.si_status,
214 strna(status.si_code == CLD_EXITED
215 ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
216 : signal_to_string(status.si_status)));
217 else
218 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
219 }
220 }
221
222 if (arg_crash_chvt >= 0)
223 (void) chvt(arg_crash_chvt);
224
225 sa = (struct sigaction) {
226 .sa_handler = SIG_IGN,
227 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
228 };
229
230 /* Let the kernel reap children for us */
231 (void) sigaction(SIGCHLD, &sa, NULL);
232
233 if (arg_crash_shell) {
234 log_notice("Executing crash shell in 10s...");
235 (void) sleep(10);
236
237 pid = raw_clone(SIGCHLD);
238 if (pid < 0)
239 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
240 else if (pid == 0) {
241 (void) setsid();
242 (void) make_console_stdio();
243 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
244
245 log_emergency_errno(errno, "execle() failed: %m");
246 _exit(EXIT_FAILURE);
247 } else {
248 log_info("Spawned crash shell as PID "PID_FMT".", pid);
249 (void) wait_for_terminate(pid, NULL);
250 }
251 }
252
253 freeze_or_reboot();
254 }
255
256 static void install_crash_handler(void) {
257 static const struct sigaction sa = {
258 .sa_handler = crash,
259 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
260 };
261 int r;
262
263 /* We ignore the return value here, since, we don't mind if we
264 * cannot set up a crash handler */
265 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
266 if (r < 0)
267 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
268 }
269
270 static int console_setup(void) {
271 _cleanup_close_ int tty_fd = -1;
272 int r;
273
274 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
275 if (tty_fd < 0)
276 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
277
278 /* We don't want to force text mode. plymouth may be showing
279 * pictures already from initrd. */
280 r = reset_terminal_fd(tty_fd, false);
281 if (r < 0)
282 return log_error_errno(r, "Failed to reset /dev/console: %m");
283
284 return 0;
285 }
286
287 static int parse_crash_chvt(const char *value) {
288 int b;
289
290 if (safe_atoi(value, &arg_crash_chvt) >= 0)
291 return 0;
292
293 b = parse_boolean(value);
294 if (b < 0)
295 return b;
296
297 if (b > 0)
298 arg_crash_chvt = 0; /* switch to where kmsg goes */
299 else
300 arg_crash_chvt = -1; /* turn off switching */
301
302 return 0;
303 }
304
305 static int parse_confirm_spawn(const char *value, char **console) {
306 char *s;
307 int r;
308
309 r = value ? parse_boolean(value) : 1;
310 if (r == 0) {
311 *console = NULL;
312 return 0;
313 }
314
315 if (r > 0) /* on with default tty */
316 s = strdup("/dev/console");
317 else if (is_path(value)) /* on with fully qualified path */
318 s = strdup(value);
319 else /* on with only a tty file name, not a fully qualified path */
320 s = strjoin("/dev/", value);
321 if (!s)
322 return -ENOMEM;
323 *console = s;
324 return 0;
325 }
326
327 static int set_machine_id(const char *m) {
328 sd_id128_t t;
329 assert(m);
330
331 if (sd_id128_from_string(m, &t) < 0)
332 return -EINVAL;
333
334 if (sd_id128_is_null(t))
335 return -EINVAL;
336
337 arg_machine_id = t;
338 return 0;
339 }
340
341 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
342
343 int r;
344
345 assert(key);
346
347 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
348
349 if (proc_cmdline_value_missing(key, value))
350 return 0;
351
352 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
353 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
354 else if (in_initrd() == !!startswith(key, "rd.")) {
355 if (free_and_strdup(&arg_default_unit, value) < 0)
356 return log_oom();
357 }
358
359 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
360
361 r = value ? parse_boolean(value) : true;
362 if (r < 0)
363 log_warning("Failed to parse dump core switch %s. Ignoring.", value);
364 else
365 arg_dump_core = r;
366
367 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
368
369 if (!value)
370 arg_crash_chvt = 0; /* turn on */
371 else if (parse_crash_chvt(value) < 0)
372 log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
373
374 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
375
376 r = value ? parse_boolean(value) : true;
377 if (r < 0)
378 log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
379 else
380 arg_crash_shell = r;
381
382 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
383
384 r = value ? parse_boolean(value) : true;
385 if (r < 0)
386 log_warning("Failed to parse crash reboot switch %s. Ignoring.", value);
387 else
388 arg_crash_reboot = r;
389
390 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
391 char *s;
392
393 r = parse_confirm_spawn(value, &s);
394 if (r < 0)
395 log_warning_errno(r, "Failed to parse confirm_spawn switch %s. Ignoring.", value);
396 else {
397 free(arg_confirm_spawn);
398 arg_confirm_spawn = s;
399 }
400
401 } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
402
403 r = value ? parse_boolean(value) : true;
404 if (r < 0)
405 log_warning("Failed to parse service watchdog switch %s. Ignoring.", value);
406 else
407 arg_service_watchdogs = r;
408
409 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
410
411 if (value) {
412 r = parse_show_status(value, &arg_show_status);
413 if (r < 0)
414 log_warning("Failed to parse show status switch %s. Ignoring.", value);
415 } else
416 arg_show_status = SHOW_STATUS_YES;
417
418 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
419
420 if (proc_cmdline_value_missing(key, value))
421 return 0;
422
423 r = exec_output_from_string(value);
424 if (r < 0)
425 log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
426 else
427 arg_default_std_output = r;
428
429 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
430
431 if (proc_cmdline_value_missing(key, value))
432 return 0;
433
434 r = exec_output_from_string(value);
435 if (r < 0)
436 log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
437 else
438 arg_default_std_error = r;
439
440 } else if (streq(key, "systemd.setenv")) {
441
442 if (proc_cmdline_value_missing(key, value))
443 return 0;
444
445 if (env_assignment_is_valid(value)) {
446 char **env;
447
448 env = strv_env_set(arg_default_environment, value);
449 if (!env)
450 return log_oom();
451
452 arg_default_environment = env;
453 } else
454 log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
455
456 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
457
458 if (proc_cmdline_value_missing(key, value))
459 return 0;
460
461 r = set_machine_id(value);
462 if (r < 0)
463 log_warning("MachineID '%s' is not valid. Ignoring.", value);
464
465 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
466
467 if (proc_cmdline_value_missing(key, value))
468 return 0;
469
470 r = parse_sec(value, &arg_default_timeout_start_usec);
471 if (r < 0)
472 log_warning_errno(r, "Failed to parse default start timeout: %s, ignoring.", value);
473
474 if (arg_default_timeout_start_usec <= 0)
475 arg_default_timeout_start_usec = USEC_INFINITY;
476
477 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
478
479 if (proc_cmdline_value_missing(key, value))
480 return 0;
481
482 parse_path_argument_and_warn(value, false, &arg_watchdog_device);
483
484 } else if (streq(key, "quiet") && !value) {
485
486 if (arg_show_status == _SHOW_STATUS_UNSET)
487 arg_show_status = SHOW_STATUS_AUTO;
488
489 } else if (streq(key, "debug") && !value) {
490
491 /* Note that log_parse_environment() handles 'debug'
492 * too, and sets the log level to LOG_DEBUG. */
493
494 if (detect_container() > 0)
495 log_set_target(LOG_TARGET_CONSOLE);
496
497 } else if (!value) {
498 const char *target;
499
500 /* SysV compatibility */
501 target = runlevel_to_target(key);
502 if (target)
503 return free_and_strdup(&arg_default_unit, target);
504 }
505
506 return 0;
507 }
508
509 #define DEFINE_SETTER(name, func, descr) \
510 static int name(const char *unit, \
511 const char *filename, \
512 unsigned line, \
513 const char *section, \
514 unsigned section_line, \
515 const char *lvalue, \
516 int ltype, \
517 const char *rvalue, \
518 void *data, \
519 void *userdata) { \
520 \
521 int r; \
522 \
523 assert(filename); \
524 assert(lvalue); \
525 assert(rvalue); \
526 \
527 r = func(rvalue); \
528 if (r < 0) \
529 log_syntax(unit, LOG_ERR, filename, line, r, \
530 "Invalid " descr "'%s': %m", \
531 rvalue); \
532 \
533 return 0; \
534 }
535
536 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
537 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
538 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
539 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
540
541 static int config_parse_cpu_affinity2(
542 const char *unit,
543 const char *filename,
544 unsigned line,
545 const char *section,
546 unsigned section_line,
547 const char *lvalue,
548 int ltype,
549 const char *rvalue,
550 void *data,
551 void *userdata) {
552
553 _cleanup_cpu_free_ cpu_set_t *c = NULL;
554 int ncpus;
555
556 ncpus = parse_cpu_set_and_warn(rvalue, &c, unit, filename, line, lvalue);
557 if (ncpus < 0)
558 return ncpus;
559
560 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
561 log_warning_errno(errno, "Failed to set CPU affinity: %m");
562
563 return 0;
564 }
565
566 static int config_parse_show_status(
567 const char* unit,
568 const char *filename,
569 unsigned line,
570 const char *section,
571 unsigned section_line,
572 const char *lvalue,
573 int ltype,
574 const char *rvalue,
575 void *data,
576 void *userdata) {
577
578 int k;
579 ShowStatus *b = data;
580
581 assert(filename);
582 assert(lvalue);
583 assert(rvalue);
584 assert(data);
585
586 k = parse_show_status(rvalue, b);
587 if (k < 0) {
588 log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse show status setting, ignoring: %s", rvalue);
589 return 0;
590 }
591
592 return 0;
593 }
594
595 static int config_parse_output_restricted(
596 const char* unit,
597 const char *filename,
598 unsigned line,
599 const char *section,
600 unsigned section_line,
601 const char *lvalue,
602 int ltype,
603 const char *rvalue,
604 void *data,
605 void *userdata) {
606
607 ExecOutput t, *eo = data;
608
609 assert(filename);
610 assert(lvalue);
611 assert(rvalue);
612 assert(data);
613
614 t = exec_output_from_string(rvalue);
615 if (t < 0) {
616 log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output type, ignoring: %s", rvalue);
617 return 0;
618 }
619
620 if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE)) {
621 log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file: are not supported as defaults, ignoring: %s", rvalue);
622 return 0;
623 }
624
625 *eo = t;
626 return 0;
627 }
628
629 static int config_parse_crash_chvt(
630 const char* unit,
631 const char *filename,
632 unsigned line,
633 const char *section,
634 unsigned section_line,
635 const char *lvalue,
636 int ltype,
637 const char *rvalue,
638 void *data,
639 void *userdata) {
640
641 int r;
642
643 assert(filename);
644 assert(lvalue);
645 assert(rvalue);
646
647 r = parse_crash_chvt(rvalue);
648 if (r < 0) {
649 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CrashChangeVT= setting, ignoring: %s", rvalue);
650 return 0;
651 }
652
653 return 0;
654 }
655
656 static int parse_config_file(void) {
657
658 const ConfigTableItem items[] = {
659 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
660 { "Manager", "LogTarget", config_parse_target, 0, NULL },
661 { "Manager", "LogColor", config_parse_color, 0, NULL },
662 { "Manager", "LogLocation", config_parse_location, 0, NULL },
663 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
664 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, NULL },
665 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, NULL },
666 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
667 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
668 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
669 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL },
670 { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers },
671 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
672 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog },
673 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
674 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
675 { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
676 #if HAVE_SECCOMP
677 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
678 #endif
679 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
680 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
681 { "Manager", "DefaultStandardOutput", config_parse_output_restricted,0, &arg_default_std_output },
682 { "Manager", "DefaultStandardError", config_parse_output_restricted,0, &arg_default_std_error },
683 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
684 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
685 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
686 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
687 { "Manager", "DefaultStartLimitIntervalSec",config_parse_sec, 0, &arg_default_start_limit_interval },
688 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
689 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
690 { "Manager", "DefaultLimitCPU", config_parse_limit, RLIMIT_CPU, arg_default_rlimit },
691 { "Manager", "DefaultLimitFSIZE", config_parse_limit, RLIMIT_FSIZE, arg_default_rlimit },
692 { "Manager", "DefaultLimitDATA", config_parse_limit, RLIMIT_DATA, arg_default_rlimit },
693 { "Manager", "DefaultLimitSTACK", config_parse_limit, RLIMIT_STACK, arg_default_rlimit },
694 { "Manager", "DefaultLimitCORE", config_parse_limit, RLIMIT_CORE, arg_default_rlimit },
695 { "Manager", "DefaultLimitRSS", config_parse_limit, RLIMIT_RSS, arg_default_rlimit },
696 { "Manager", "DefaultLimitNOFILE", config_parse_limit, RLIMIT_NOFILE, arg_default_rlimit },
697 { "Manager", "DefaultLimitAS", config_parse_limit, RLIMIT_AS, arg_default_rlimit },
698 { "Manager", "DefaultLimitNPROC", config_parse_limit, RLIMIT_NPROC, arg_default_rlimit },
699 { "Manager", "DefaultLimitMEMLOCK", config_parse_limit, RLIMIT_MEMLOCK, arg_default_rlimit },
700 { "Manager", "DefaultLimitLOCKS", config_parse_limit, RLIMIT_LOCKS, arg_default_rlimit },
701 { "Manager", "DefaultLimitSIGPENDING", config_parse_limit, RLIMIT_SIGPENDING, arg_default_rlimit },
702 { "Manager", "DefaultLimitMSGQUEUE", config_parse_limit, RLIMIT_MSGQUEUE, arg_default_rlimit },
703 { "Manager", "DefaultLimitNICE", config_parse_limit, RLIMIT_NICE, arg_default_rlimit },
704 { "Manager", "DefaultLimitRTPRIO", config_parse_limit, RLIMIT_RTPRIO, arg_default_rlimit },
705 { "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit },
706 { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
707 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
708 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
709 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
710 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
711 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
712 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
713 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
714 {}
715 };
716
717 const char *fn, *conf_dirs_nulstr;
718
719 fn = arg_system ?
720 PKGSYSCONFDIR "/system.conf" :
721 PKGSYSCONFDIR "/user.conf";
722
723 conf_dirs_nulstr = arg_system ?
724 CONF_PATHS_NULSTR("systemd/system.conf.d") :
725 CONF_PATHS_NULSTR("systemd/user.conf.d");
726
727 (void) config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, CONFIG_PARSE_WARN, NULL);
728
729 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
730 * like everywhere else. */
731 if (arg_default_timeout_start_usec <= 0)
732 arg_default_timeout_start_usec = USEC_INFINITY;
733 if (arg_default_timeout_stop_usec <= 0)
734 arg_default_timeout_stop_usec = USEC_INFINITY;
735
736 return 0;
737 }
738
739 static void set_manager_defaults(Manager *m) {
740
741 assert(m);
742
743 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
744 m->default_std_output = arg_default_std_output;
745 m->default_std_error = arg_default_std_error;
746 m->default_timeout_start_usec = arg_default_timeout_start_usec;
747 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
748 m->default_restart_usec = arg_default_restart_usec;
749 m->default_start_limit_interval = arg_default_start_limit_interval;
750 m->default_start_limit_burst = arg_default_start_limit_burst;
751 m->default_cpu_accounting = arg_default_cpu_accounting;
752 m->default_io_accounting = arg_default_io_accounting;
753 m->default_ip_accounting = arg_default_ip_accounting;
754 m->default_blockio_accounting = arg_default_blockio_accounting;
755 m->default_memory_accounting = arg_default_memory_accounting;
756 m->default_tasks_accounting = arg_default_tasks_accounting;
757 m->default_tasks_max = arg_default_tasks_max;
758
759 manager_set_default_rlimits(m, arg_default_rlimit);
760 manager_environment_add(m, NULL, arg_default_environment);
761 }
762
763 static void set_manager_settings(Manager *m) {
764
765 assert(m);
766
767 m->confirm_spawn = arg_confirm_spawn;
768 m->service_watchdogs = arg_service_watchdogs;
769 m->runtime_watchdog = arg_runtime_watchdog;
770 m->shutdown_watchdog = arg_shutdown_watchdog;
771 m->cad_burst_action = arg_cad_burst_action;
772
773 manager_set_show_status(m, arg_show_status);
774 }
775
776 static int parse_argv(int argc, char *argv[]) {
777
778 enum {
779 ARG_LOG_LEVEL = 0x100,
780 ARG_LOG_TARGET,
781 ARG_LOG_COLOR,
782 ARG_LOG_LOCATION,
783 ARG_UNIT,
784 ARG_SYSTEM,
785 ARG_USER,
786 ARG_TEST,
787 ARG_NO_PAGER,
788 ARG_VERSION,
789 ARG_DUMP_CONFIGURATION_ITEMS,
790 ARG_DUMP_CORE,
791 ARG_CRASH_CHVT,
792 ARG_CRASH_SHELL,
793 ARG_CRASH_REBOOT,
794 ARG_CONFIRM_SPAWN,
795 ARG_SHOW_STATUS,
796 ARG_DESERIALIZE,
797 ARG_SWITCHED_ROOT,
798 ARG_DEFAULT_STD_OUTPUT,
799 ARG_DEFAULT_STD_ERROR,
800 ARG_MACHINE_ID,
801 ARG_SERVICE_WATCHDOGS,
802 };
803
804 static const struct option options[] = {
805 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
806 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
807 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
808 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
809 { "unit", required_argument, NULL, ARG_UNIT },
810 { "system", no_argument, NULL, ARG_SYSTEM },
811 { "user", no_argument, NULL, ARG_USER },
812 { "test", no_argument, NULL, ARG_TEST },
813 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
814 { "help", no_argument, NULL, 'h' },
815 { "version", no_argument, NULL, ARG_VERSION },
816 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
817 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
818 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
819 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
820 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
821 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
822 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
823 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
824 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
825 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
826 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
827 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
828 { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
829 {}
830 };
831
832 int c, r;
833
834 assert(argc >= 1);
835 assert(argv);
836
837 if (getpid_cached() == 1)
838 opterr = 0;
839
840 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
841
842 switch (c) {
843
844 case ARG_LOG_LEVEL:
845 r = log_set_max_level_from_string(optarg);
846 if (r < 0) {
847 log_error("Failed to parse log level %s.", optarg);
848 return r;
849 }
850
851 break;
852
853 case ARG_LOG_TARGET:
854 r = log_set_target_from_string(optarg);
855 if (r < 0) {
856 log_error("Failed to parse log target %s.", optarg);
857 return r;
858 }
859
860 break;
861
862 case ARG_LOG_COLOR:
863
864 if (optarg) {
865 r = log_show_color_from_string(optarg);
866 if (r < 0) {
867 log_error("Failed to parse log color setting %s.", optarg);
868 return r;
869 }
870 } else
871 log_show_color(true);
872
873 break;
874
875 case ARG_LOG_LOCATION:
876 if (optarg) {
877 r = log_show_location_from_string(optarg);
878 if (r < 0) {
879 log_error("Failed to parse log location setting %s.", optarg);
880 return r;
881 }
882 } else
883 log_show_location(true);
884
885 break;
886
887 case ARG_DEFAULT_STD_OUTPUT:
888 r = exec_output_from_string(optarg);
889 if (r < 0) {
890 log_error("Failed to parse default standard output setting %s.", optarg);
891 return r;
892 } else
893 arg_default_std_output = r;
894 break;
895
896 case ARG_DEFAULT_STD_ERROR:
897 r = exec_output_from_string(optarg);
898 if (r < 0) {
899 log_error("Failed to parse default standard error output setting %s.", optarg);
900 return r;
901 } else
902 arg_default_std_error = r;
903 break;
904
905 case ARG_UNIT:
906 r = free_and_strdup(&arg_default_unit, optarg);
907 if (r < 0)
908 return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
909
910 break;
911
912 case ARG_SYSTEM:
913 arg_system = true;
914 break;
915
916 case ARG_USER:
917 arg_system = false;
918 break;
919
920 case ARG_TEST:
921 arg_action = ACTION_TEST;
922 break;
923
924 case ARG_NO_PAGER:
925 arg_no_pager = true;
926 break;
927
928 case ARG_VERSION:
929 arg_action = ACTION_VERSION;
930 break;
931
932 case ARG_DUMP_CONFIGURATION_ITEMS:
933 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
934 break;
935
936 case ARG_DUMP_CORE:
937 if (!optarg)
938 arg_dump_core = true;
939 else {
940 r = parse_boolean(optarg);
941 if (r < 0)
942 return log_error_errno(r, "Failed to parse dump core boolean: %s", optarg);
943 arg_dump_core = r;
944 }
945 break;
946
947 case ARG_CRASH_CHVT:
948 r = parse_crash_chvt(optarg);
949 if (r < 0)
950 return log_error_errno(r, "Failed to parse crash virtual terminal index: %s", optarg);
951 break;
952
953 case ARG_CRASH_SHELL:
954 if (!optarg)
955 arg_crash_shell = true;
956 else {
957 r = parse_boolean(optarg);
958 if (r < 0)
959 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
960 arg_crash_shell = r;
961 }
962 break;
963
964 case ARG_CRASH_REBOOT:
965 if (!optarg)
966 arg_crash_reboot = true;
967 else {
968 r = parse_boolean(optarg);
969 if (r < 0)
970 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
971 arg_crash_reboot = r;
972 }
973 break;
974
975 case ARG_CONFIRM_SPAWN:
976 arg_confirm_spawn = mfree(arg_confirm_spawn);
977
978 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
979 if (r < 0)
980 return log_error_errno(r, "Failed to parse confirm spawn option: %m");
981 break;
982
983 case ARG_SERVICE_WATCHDOGS:
984 r = parse_boolean(optarg);
985 if (r < 0)
986 return log_error_errno(r, "Failed to parse service watchdogs boolean: %s", optarg);
987 arg_service_watchdogs = r;
988 break;
989
990 case ARG_SHOW_STATUS:
991 if (optarg) {
992 r = parse_show_status(optarg, &arg_show_status);
993 if (r < 0) {
994 log_error("Failed to parse show status boolean %s.", optarg);
995 return r;
996 }
997 } else
998 arg_show_status = SHOW_STATUS_YES;
999 break;
1000
1001 case ARG_DESERIALIZE: {
1002 int fd;
1003 FILE *f;
1004
1005 r = safe_atoi(optarg, &fd);
1006 if (r < 0 || fd < 0) {
1007 log_error("Failed to parse deserialize option %s.", optarg);
1008 return -EINVAL;
1009 }
1010
1011 (void) fd_cloexec(fd, true);
1012
1013 f = fdopen(fd, "r");
1014 if (!f)
1015 return log_error_errno(errno, "Failed to open serialization fd: %m");
1016
1017 safe_fclose(arg_serialization);
1018 arg_serialization = f;
1019
1020 break;
1021 }
1022
1023 case ARG_SWITCHED_ROOT:
1024 arg_switched_root = true;
1025 break;
1026
1027 case ARG_MACHINE_ID:
1028 r = set_machine_id(optarg);
1029 if (r < 0)
1030 return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
1031 break;
1032
1033 case 'h':
1034 arg_action = ACTION_HELP;
1035 break;
1036
1037 case 'D':
1038 log_set_max_level(LOG_DEBUG);
1039 break;
1040
1041 case 'b':
1042 case 's':
1043 case 'z':
1044 /* Just to eat away the sysvinit kernel
1045 * cmdline args without getopt() error
1046 * messages that we'll parse in
1047 * parse_proc_cmdline_word() or ignore. */
1048
1049 case '?':
1050 if (getpid_cached() != 1)
1051 return -EINVAL;
1052 else
1053 return 0;
1054
1055 default:
1056 assert_not_reached("Unhandled option code.");
1057 }
1058
1059 if (optind < argc && getpid_cached() != 1) {
1060 /* Hmm, when we aren't run as init system
1061 * let's complain about excess arguments */
1062
1063 log_error("Excess arguments.");
1064 return -EINVAL;
1065 }
1066
1067 return 0;
1068 }
1069
1070 static int help(void) {
1071
1072 printf("%s [OPTIONS...]\n\n"
1073 "Starts up and maintains the system or user services.\n\n"
1074 " -h --help Show this help\n"
1075 " --version Show version\n"
1076 " --test Determine startup sequence, dump it and exit\n"
1077 " --no-pager Do not pipe output into a pager\n"
1078 " --dump-configuration-items Dump understood unit configuration items\n"
1079 " --unit=UNIT Set default unit\n"
1080 " --system Run a system instance, even if PID != 1\n"
1081 " --user Run a user instance\n"
1082 " --dump-core[=BOOL] Dump core on crash\n"
1083 " --crash-vt=NR Change to specified VT on crash\n"
1084 " --crash-reboot[=BOOL] Reboot on crash\n"
1085 " --crash-shell[=BOOL] Run shell on crash\n"
1086 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1087 " --show-status[=BOOL] Show status updates on the console during bootup\n"
1088 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
1089 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1090 " --log-color[=BOOL] Highlight important log messages\n"
1091 " --log-location[=BOOL] Include code location in log messages\n"
1092 " --default-standard-output= Set default standard output for services\n"
1093 " --default-standard-error= Set default standard error output for services\n",
1094 program_invocation_short_name);
1095
1096 return 0;
1097 }
1098
1099 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1100 _cleanup_fdset_free_ FDSet *fds = NULL;
1101 _cleanup_fclose_ FILE *f = NULL;
1102 int r;
1103
1104 assert(m);
1105 assert(_f);
1106 assert(_fds);
1107
1108 r = manager_open_serialization(m, &f);
1109 if (r < 0)
1110 return log_error_errno(r, "Failed to create serialization file: %m");
1111
1112 /* Make sure nothing is really destructed when we shut down */
1113 m->n_reloading++;
1114 bus_manager_send_reloading(m, true);
1115
1116 fds = fdset_new();
1117 if (!fds)
1118 return log_oom();
1119
1120 r = manager_serialize(m, f, fds, switching_root);
1121 if (r < 0)
1122 return log_error_errno(r, "Failed to serialize state: %m");
1123
1124 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1125 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
1126
1127 r = fd_cloexec(fileno(f), false);
1128 if (r < 0)
1129 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1130
1131 r = fdset_cloexec(fds, false);
1132 if (r < 0)
1133 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1134
1135 *_f = f;
1136 *_fds = fds;
1137
1138 f = NULL;
1139 fds = NULL;
1140
1141 return 0;
1142 }
1143
1144 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1145 struct rlimit nl;
1146 int r;
1147 int min_max;
1148 _cleanup_free_ char *nr_open = NULL;
1149
1150 assert(saved_rlimit);
1151
1152 /* Save the original RLIMIT_NOFILE so that we can reset it
1153 * later when transitioning from the initrd to the main
1154 * systemd or suchlike. */
1155 if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
1156 return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
1157
1158 /* Make sure forked processes get the default kernel setting */
1159 if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1160 struct rlimit *rl;
1161
1162 rl = newdup(struct rlimit, saved_rlimit, 1);
1163 if (!rl)
1164 return log_oom();
1165
1166 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1167 }
1168
1169 /* Get current RLIMIT_NOFILE maximum compiled into the kernel. */
1170 r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
1171 if (r >= 0)
1172 r = safe_atoi(nr_open, &min_max);
1173 /* If we fail, fallback to the hard-coded kernel limit of 1024 * 1024. */
1174 if (r < 0)
1175 min_max = 1024 * 1024;
1176
1177 /* Bump up the resource limit for ourselves substantially */
1178 nl.rlim_cur = nl.rlim_max = min_max;
1179 r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1180 if (r < 0)
1181 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1182
1183 return 0;
1184 }
1185
1186 static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
1187 int r;
1188
1189 assert(saved_rlimit);
1190 assert(getuid() == 0);
1191
1192 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
1193 * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
1194 * bump the value high enough for the root user. */
1195
1196 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
1197 return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
1198
1199 r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
1200 if (r < 0)
1201 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1202
1203 return 0;
1204 }
1205
1206 static void test_usr(void) {
1207
1208 /* Check that /usr is not a separate fs */
1209
1210 if (dir_is_empty("/usr") <= 0)
1211 return;
1212
1213 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1214 "Some things will probably break (sometimes even silently) in mysterious ways. "
1215 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1216 }
1217
1218 static int enforce_syscall_archs(Set *archs) {
1219 #if HAVE_SECCOMP
1220 int r;
1221
1222 if (!is_seccomp_available())
1223 return 0;
1224
1225 r = seccomp_restrict_archs(arg_syscall_archs);
1226 if (r < 0)
1227 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
1228 #endif
1229 return 0;
1230 }
1231
1232 static int status_welcome(void) {
1233 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1234 const char *fn;
1235 int r;
1236
1237 if (arg_show_status <= 0)
1238 return 0;
1239
1240 FOREACH_STRING(fn, "/etc/os-release", "/usr/lib/os-release") {
1241 r = parse_env_file(fn, NEWLINE,
1242 "PRETTY_NAME", &pretty_name,
1243 "ANSI_COLOR", &ansi_color,
1244 NULL);
1245
1246 if (r != -ENOENT)
1247 break;
1248 }
1249 if (r < 0 && r != -ENOENT)
1250 log_warning_errno(r, "Failed to read os-release file, ignoring: %m");
1251
1252 if (log_get_show_color())
1253 return status_printf(NULL, false, false,
1254 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1255 isempty(ansi_color) ? "1" : ansi_color,
1256 isempty(pretty_name) ? "Linux" : pretty_name);
1257 else
1258 return status_printf(NULL, false, false,
1259 "\nWelcome to %s!\n",
1260 isempty(pretty_name) ? "Linux" : pretty_name);
1261 }
1262
1263 static int write_container_id(void) {
1264 const char *c;
1265 int r;
1266
1267 c = getenv("container");
1268 if (isempty(c))
1269 return 0;
1270
1271 RUN_WITH_UMASK(0022)
1272 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
1273 if (r < 0)
1274 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
1275
1276 return 1;
1277 }
1278
1279 static int bump_unix_max_dgram_qlen(void) {
1280 _cleanup_free_ char *qlen = NULL;
1281 unsigned long v;
1282 int r;
1283
1284 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel
1285 * default of 16 is simply too low. We set the value really
1286 * really early during boot, so that it is actually applied to
1287 * all our sockets, including the $NOTIFY_SOCKET one. */
1288
1289 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1290 if (r < 0)
1291 return log_warning_errno(r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1292
1293 r = safe_atolu(qlen, &v);
1294 if (r < 0)
1295 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length, ignoring: %m");
1296
1297 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1298 return 0;
1299
1300 qlen = mfree(qlen);
1301 if (asprintf(&qlen, "%lu\n", DEFAULT_UNIX_MAX_DGRAM_QLEN) < 0)
1302 return log_oom();
1303
1304 r = write_string_file("/proc/sys/net/unix/max_dgram_qlen", qlen, 0);
1305 if (r < 0)
1306 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1307 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1308
1309 return 1;
1310 }
1311
1312 static int fixup_environment(void) {
1313 _cleanup_free_ char *term = NULL;
1314 const char *t;
1315 int r;
1316
1317 /* Only fix up the environment when we are started as PID 1 */
1318 if (getpid_cached() != 1)
1319 return 0;
1320
1321 /* We expect the environment to be set correctly if run inside a container. */
1322 if (detect_container() > 0)
1323 return 0;
1324
1325 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1326 * device used by the console. We try to make a better guess here since some consoles might not have support
1327 * for color mode for example.
1328 *
1329 * However if TERM was configured through the kernel command line then leave it alone. */
1330 r = proc_cmdline_get_key("TERM", 0, &term);
1331 if (r < 0)
1332 return r;
1333
1334 t = term ?: default_term_for_tty("/dev/console");
1335
1336 if (setenv("TERM", t, 1) < 0)
1337 return -errno;
1338
1339 return 0;
1340 }
1341
1342 static void redirect_telinit(int argc, char *argv[]) {
1343
1344 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1345
1346 #if HAVE_SYSV_COMPAT
1347 if (getpid_cached() == 1)
1348 return;
1349
1350 if (!strstr(program_invocation_short_name, "init"))
1351 return;
1352
1353 execv(SYSTEMCTL_BINARY_PATH, argv);
1354 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1355 exit(EXIT_FAILURE);
1356 #endif
1357 }
1358
1359 static int become_shutdown(
1360 const char *shutdown_verb,
1361 int retval) {
1362
1363 char log_level[DECIMAL_STR_MAX(int) + 1],
1364 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1365 timeout[DECIMAL_STR_MAX(usec_t) + 1];
1366
1367 const char* command_line[13] = {
1368 SYSTEMD_SHUTDOWN_BINARY_PATH,
1369 shutdown_verb,
1370 "--timeout", timeout,
1371 "--log-level", log_level,
1372 "--log-target",
1373 };
1374
1375 _cleanup_strv_free_ char **env_block = NULL;
1376 size_t pos = 7;
1377 int r;
1378
1379 assert(shutdown_verb);
1380 assert(!command_line[pos]);
1381 env_block = strv_copy(environ);
1382
1383 xsprintf(log_level, "%d", log_get_max_level());
1384 xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
1385
1386 switch (log_get_target()) {
1387
1388 case LOG_TARGET_KMSG:
1389 case LOG_TARGET_JOURNAL_OR_KMSG:
1390 case LOG_TARGET_SYSLOG_OR_KMSG:
1391 command_line[pos++] = "kmsg";
1392 break;
1393
1394 case LOG_TARGET_NULL:
1395 command_line[pos++] = "null";
1396 break;
1397
1398 case LOG_TARGET_CONSOLE:
1399 default:
1400 command_line[pos++] = "console";
1401 break;
1402 };
1403
1404 if (log_get_show_color())
1405 command_line[pos++] = "--log-color";
1406
1407 if (log_get_show_location())
1408 command_line[pos++] = "--log-location";
1409
1410 if (streq(shutdown_verb, "exit")) {
1411 command_line[pos++] = "--exit-code";
1412 command_line[pos++] = exit_code;
1413 xsprintf(exit_code, "%d", retval);
1414 }
1415
1416 assert(pos < ELEMENTSOF(command_line));
1417
1418 if (streq(shutdown_verb, "reboot") &&
1419 arg_shutdown_watchdog > 0 &&
1420 arg_shutdown_watchdog != USEC_INFINITY) {
1421
1422 char *e;
1423
1424 /* If we reboot let's set the shutdown
1425 * watchdog and tell the shutdown binary to
1426 * repeatedly ping it */
1427 r = watchdog_set_timeout(&arg_shutdown_watchdog);
1428 watchdog_close(r < 0);
1429
1430 /* Tell the binary how often to ping, ignore failure */
1431 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
1432 (void) strv_consume(&env_block, e);
1433
1434 if (arg_watchdog_device &&
1435 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1436 (void) strv_consume(&env_block, e);
1437 } else
1438 watchdog_close(true);
1439
1440 /* Avoid the creation of new processes forked by the
1441 * kernel; at this point, we will not listen to the
1442 * signals anyway */
1443 if (detect_container() <= 0)
1444 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1445
1446 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1447 return -errno;
1448 }
1449
1450 static void initialize_clock(void) {
1451 int r;
1452
1453 if (clock_is_localtime(NULL) > 0) {
1454 int min;
1455
1456 /*
1457 * The very first call of settimeofday() also does a time warp in the kernel.
1458 *
1459 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1460 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1461 * the RTC alone if the registry tells that the RTC runs in UTC.
1462 */
1463 r = clock_set_timezone(&min);
1464 if (r < 0)
1465 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1466 else
1467 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1468
1469 } else if (!in_initrd()) {
1470 /*
1471 * Do a dummy very first call to seal the kernel's time warp magic.
1472 *
1473 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1474 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1475 * until we reach the real system.
1476 *
1477 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1478 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1479 * be treated as UTC that way.
1480 */
1481 (void) clock_reset_timewarp();
1482 }
1483
1484 r = clock_apply_epoch();
1485 if (r < 0)
1486 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1487 else if (r > 0)
1488 log_info("System time before build time, advancing clock.");
1489 }
1490
1491 static void initialize_coredump(bool skip_setup) {
1492
1493 if (getpid_cached() != 1)
1494 return;
1495
1496 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1497 * will process core dumps for system services by default. */
1498 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1499 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1500
1501 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1502 * until the systemd-coredump tool is enabled via sysctl. */
1503 if (!skip_setup)
1504 disable_coredumps();
1505 }
1506
1507 static void do_reexecute(
1508 int argc,
1509 char *argv[],
1510 const struct rlimit *saved_rlimit_nofile,
1511 const struct rlimit *saved_rlimit_memlock,
1512 FDSet *fds,
1513 const char *switch_root_dir,
1514 const char *switch_root_init,
1515 const char **ret_error_message) {
1516
1517 unsigned i, j, args_size;
1518 const char **args;
1519 int r;
1520
1521 assert(saved_rlimit_nofile);
1522 assert(saved_rlimit_memlock);
1523 assert(ret_error_message);
1524
1525 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1526 * we do that */
1527 watchdog_close(true);
1528
1529 /* Reset the RLIMIT_NOFILE to the kernel default, so that the new systemd can pass the kernel default to its
1530 * child processes */
1531
1532 if (saved_rlimit_nofile->rlim_cur > 0)
1533 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
1534 if (saved_rlimit_memlock->rlim_cur != (rlim_t) -1)
1535 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1536
1537 if (switch_root_dir) {
1538 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1539 * SIGCHLD for them after deserializing. */
1540 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
1541
1542 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1543 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1544 if (r < 0)
1545 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1546 }
1547
1548 args_size = MAX(6, argc+1);
1549 args = newa(const char*, args_size);
1550
1551 if (!switch_root_init) {
1552 char sfd[DECIMAL_STR_MAX(int) + 1];
1553
1554 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1555 * the user didn't specify an explicit init to spawn. */
1556
1557 assert(arg_serialization);
1558 assert(fds);
1559
1560 xsprintf(sfd, "%i", fileno(arg_serialization));
1561
1562 i = 0;
1563 args[i++] = SYSTEMD_BINARY_PATH;
1564 if (switch_root_dir)
1565 args[i++] = "--switched-root";
1566 args[i++] = arg_system ? "--system" : "--user";
1567 args[i++] = "--deserialize";
1568 args[i++] = sfd;
1569 args[i++] = NULL;
1570
1571 assert(i <= args_size);
1572
1573 /*
1574 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1575 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1576 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1577 * before proceeding into the exec().
1578 */
1579 valgrind_summary_hack();
1580
1581 (void) execv(args[0], (char* const*) args);
1582 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1583 }
1584
1585 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1586 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1587 * doesn't matter.) */
1588
1589 arg_serialization = safe_fclose(arg_serialization);
1590 fds = fdset_free(fds);
1591
1592 /* Reopen the console */
1593 (void) make_console_stdio();
1594
1595 for (j = 1, i = 1; j < (unsigned) argc; j++)
1596 args[i++] = argv[j];
1597 args[i++] = NULL;
1598 assert(i <= args_size);
1599
1600 /* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */
1601 (void) reset_all_signal_handlers();
1602 (void) reset_signal_mask();
1603
1604 if (switch_root_init) {
1605 args[0] = switch_root_init;
1606 (void) execv(args[0], (char* const*) args);
1607 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1608 }
1609
1610 args[0] = "/sbin/init";
1611 (void) execv(args[0], (char* const*) args);
1612 r = -errno;
1613
1614 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1615 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1616 "Failed to execute /sbin/init");
1617
1618 if (r == -ENOENT) {
1619 log_warning("No /sbin/init, trying fallback");
1620
1621 args[0] = "/bin/sh";
1622 args[1] = NULL;
1623 (void) execv(args[0], (char* const*) args);
1624 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1625 } else
1626 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1627
1628 *ret_error_message = "Failed to execute fallback shell";
1629 }
1630
1631 static int invoke_main_loop(
1632 Manager *m,
1633 bool *ret_reexecute,
1634 int *ret_retval, /* Return parameters relevant for shutting down */
1635 const char **ret_shutdown_verb, /* … */
1636 FDSet **ret_fds, /* Return parameters for reexecuting */
1637 char **ret_switch_root_dir, /* … */
1638 char **ret_switch_root_init, /* … */
1639 const char **ret_error_message) {
1640
1641 int r;
1642
1643 assert(m);
1644 assert(ret_reexecute);
1645 assert(ret_retval);
1646 assert(ret_shutdown_verb);
1647 assert(ret_fds);
1648 assert(ret_switch_root_dir);
1649 assert(ret_switch_root_init);
1650 assert(ret_error_message);
1651
1652 for (;;) {
1653 r = manager_loop(m);
1654 if (r < 0) {
1655 *ret_error_message = "Failed to run main loop";
1656 return log_emergency_errno(r, "Failed to run main loop: %m");
1657 }
1658
1659 switch (m->exit_code) {
1660
1661 case MANAGER_RELOAD:
1662 log_info("Reloading.");
1663
1664 r = parse_config_file();
1665 if (r < 0)
1666 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
1667
1668 set_manager_defaults(m);
1669
1670 r = manager_reload(m);
1671 if (r < 0)
1672 log_warning_errno(r, "Failed to reload, ignoring: %m");
1673
1674 break;
1675
1676 case MANAGER_REEXECUTE:
1677
1678 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1679 if (r < 0) {
1680 *ret_error_message = "Failed to prepare for reexecution";
1681 return r;
1682 }
1683
1684 log_notice("Reexecuting.");
1685
1686 *ret_reexecute = true;
1687 *ret_retval = EXIT_SUCCESS;
1688 *ret_shutdown_verb = NULL;
1689 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1690
1691 return 0;
1692
1693 case MANAGER_SWITCH_ROOT:
1694 if (!m->switch_root_init) {
1695 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1696 if (r < 0) {
1697 *ret_error_message = "Failed to prepare for reexecution";
1698 return r;
1699 }
1700 } else
1701 *ret_fds = NULL;
1702
1703 log_notice("Switching root.");
1704
1705 *ret_reexecute = true;
1706 *ret_retval = EXIT_SUCCESS;
1707 *ret_shutdown_verb = NULL;
1708
1709 /* Steal the switch root parameters */
1710 *ret_switch_root_dir = m->switch_root;
1711 *ret_switch_root_init = m->switch_root_init;
1712 m->switch_root = m->switch_root_init = NULL;
1713
1714 return 0;
1715
1716 case MANAGER_EXIT:
1717
1718 if (MANAGER_IS_USER(m)) {
1719 log_debug("Exit.");
1720
1721 *ret_reexecute = false;
1722 *ret_retval = m->return_value;
1723 *ret_shutdown_verb = NULL;
1724 *ret_fds = NULL;
1725 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1726
1727 return 0;
1728 }
1729
1730 _fallthrough_;
1731 case MANAGER_REBOOT:
1732 case MANAGER_POWEROFF:
1733 case MANAGER_HALT:
1734 case MANAGER_KEXEC: {
1735 static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1736 [MANAGER_EXIT] = "exit",
1737 [MANAGER_REBOOT] = "reboot",
1738 [MANAGER_POWEROFF] = "poweroff",
1739 [MANAGER_HALT] = "halt",
1740 [MANAGER_KEXEC] = "kexec"
1741 };
1742
1743 log_notice("Shutting down.");
1744
1745 *ret_reexecute = false;
1746 *ret_retval = m->return_value;
1747 assert_se(*ret_shutdown_verb = table[m->exit_code]);
1748 *ret_fds = NULL;
1749 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1750
1751 return 0;
1752 }
1753
1754 default:
1755 assert_not_reached("Unknown exit code.");
1756 }
1757 }
1758 }
1759
1760 static void log_execution_mode(bool *ret_first_boot) {
1761 assert(ret_first_boot);
1762
1763 if (arg_system) {
1764 int v;
1765
1766 log_info(PACKAGE_STRING " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
1767 arg_action == ACTION_TEST ? "test " : "" );
1768
1769 v = detect_virtualization();
1770 if (v > 0)
1771 log_info("Detected virtualization %s.", virtualization_to_string(v));
1772
1773 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
1774
1775 if (in_initrd()) {
1776 *ret_first_boot = false;
1777 log_info("Running in initial RAM disk.");
1778 } else {
1779 /* Let's check whether we are in first boot, i.e. whether /etc is still unpopulated. We use
1780 * /etc/machine-id as flag file, for this: if it exists we assume /etc is populated, if it
1781 * doesn't it's unpopulated. This allows container managers and installers to provision a
1782 * couple of files already. If the container manager wants to provision the machine ID itself
1783 * it should pass $container_uuid to PID 1. */
1784
1785 *ret_first_boot = access("/etc/machine-id", F_OK) < 0;
1786 if (*ret_first_boot)
1787 log_info("Running with unpopulated /etc.");
1788 }
1789 } else {
1790 if (DEBUG_LOGGING) {
1791 _cleanup_free_ char *t;
1792
1793 t = uid_to_name(getuid());
1794 log_debug(PACKAGE_STRING " running in %suser mode for user " UID_FMT "/%s. (" SYSTEMD_FEATURES ")",
1795 arg_action == ACTION_TEST ? " test" : "", getuid(), strna(t));
1796 }
1797
1798 *ret_first_boot = false;
1799 }
1800 }
1801
1802 static int initialize_runtime(
1803 bool skip_setup,
1804 struct rlimit *saved_rlimit_nofile,
1805 struct rlimit *saved_rlimit_memlock,
1806 const char **ret_error_message) {
1807
1808 int r;
1809
1810 assert(ret_error_message);
1811
1812 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
1813 *
1814 * - Some only apply to --system instances
1815 * - Some only apply to --user instances
1816 * - Some only apply when we first start up, but not when we reexecute
1817 */
1818
1819 if (arg_action != ACTION_RUN)
1820 return 0;
1821
1822 if (arg_system) {
1823 /* Make sure we leave a core dump without panicing the kernel. */
1824 install_crash_handler();
1825
1826 if (!skip_setup) {
1827 r = mount_cgroup_controllers(arg_join_controllers);
1828 if (r < 0) {
1829 *ret_error_message = "Failed to mount cgroup hierarchies";
1830 return r;
1831 }
1832
1833 status_welcome();
1834 hostname_setup();
1835 machine_id_setup(NULL, arg_machine_id, NULL);
1836 loopback_setup();
1837 bump_unix_max_dgram_qlen();
1838 test_usr();
1839 write_container_id();
1840 }
1841
1842 if (arg_watchdog_device) {
1843 r = watchdog_set_device(arg_watchdog_device);
1844 if (r < 0)
1845 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
1846 }
1847
1848 if (arg_runtime_watchdog > 0 && arg_runtime_watchdog != USEC_INFINITY)
1849 watchdog_set_timeout(&arg_runtime_watchdog);
1850 }
1851
1852 if (arg_timer_slack_nsec != NSEC_INFINITY)
1853 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1854 log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
1855
1856 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
1857 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
1858 if (r < 0) {
1859 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
1860 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
1861 }
1862
1863 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
1864 if (r < 0) {
1865 *ret_error_message = "Failed to drop capability bounding set";
1866 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
1867 }
1868 }
1869
1870 if (arg_system && arg_no_new_privs) {
1871 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1872 *ret_error_message = "Failed to disable new privileges";
1873 return log_emergency_errno(errno, "Failed to disable new privileges: %m");
1874 }
1875 }
1876
1877 if (arg_syscall_archs) {
1878 r = enforce_syscall_archs(arg_syscall_archs);
1879 if (r < 0) {
1880 *ret_error_message = "Failed to set syscall architectures";
1881 return r;
1882 }
1883 }
1884
1885 if (!arg_system)
1886 /* Become reaper of our children */
1887 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
1888 log_warning_errno(errno, "Failed to make us a subreaper: %m");
1889
1890 if (arg_system) {
1891 /* Bump up RLIMIT_NOFILE for systemd itself */
1892 (void) bump_rlimit_nofile(saved_rlimit_nofile);
1893 (void) bump_rlimit_memlock(saved_rlimit_memlock);
1894 }
1895
1896 return 0;
1897 }
1898
1899 static int do_queue_default_job(
1900 Manager *m,
1901 const char **ret_error_message) {
1902
1903 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
1904 Job *default_unit_job;
1905 Unit *target = NULL;
1906 int r;
1907
1908 log_debug("Activating default unit: %s", arg_default_unit);
1909
1910 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1911 if (r < 0)
1912 log_error("Failed to load default target: %s", bus_error_message(&error, r));
1913 else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND))
1914 log_error_errno(target->load_error, "Failed to load default target: %m");
1915 else if (target->load_state == UNIT_MASKED)
1916 log_error("Default target masked.");
1917
1918 if (!target || target->load_state != UNIT_LOADED) {
1919 log_info("Trying to load rescue target...");
1920
1921 r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1922 if (r < 0) {
1923 *ret_error_message = "Failed to load rescue target";
1924 return log_emergency_errno(r, "Failed to load rescue target: %s", bus_error_message(&error, r));
1925 } else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND)) {
1926 *ret_error_message = "Failed to load rescue target";
1927 return log_emergency_errno(target->load_error, "Failed to load rescue target: %m");
1928 } else if (target->load_state == UNIT_MASKED) {
1929 *ret_error_message = "Rescue target masked";
1930 log_emergency("Rescue target masked.");
1931 return -ERFKILL;
1932 }
1933 }
1934
1935 assert(target->load_state == UNIT_LOADED);
1936
1937 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, &error, &default_unit_job);
1938 if (r == -EPERM) {
1939 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1940
1941 sd_bus_error_free(&error);
1942
1943 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, &error, &default_unit_job);
1944 if (r < 0) {
1945 *ret_error_message = "Failed to start default target";
1946 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
1947 }
1948
1949 } else if (r < 0) {
1950 *ret_error_message = "Failed to isolate default target";
1951 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
1952 }
1953
1954 m->default_unit_job_id = default_unit_job->id;
1955
1956 return 0;
1957 }
1958
1959 static void free_arguments(void) {
1960 size_t j;
1961
1962 /* Frees all arg_* variables, with the exception of arg_serialization */
1963
1964 for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++)
1965 arg_default_rlimit[j] = mfree(arg_default_rlimit[j]);
1966
1967 arg_default_unit = mfree(arg_default_unit);
1968 arg_confirm_spawn = mfree(arg_confirm_spawn);
1969 arg_join_controllers = strv_free_free(arg_join_controllers);
1970 arg_default_environment = strv_free(arg_default_environment);
1971 arg_syscall_archs = set_free(arg_syscall_archs);
1972 }
1973
1974 static int load_configuration(int argc, char **argv, const char **ret_error_message) {
1975 int r;
1976
1977 assert(ret_error_message);
1978
1979 arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
1980
1981 r = parse_config_file();
1982 if (r < 0) {
1983 *ret_error_message = "Failed to parse config file";
1984 return r;
1985 }
1986
1987 if (arg_system) {
1988 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
1989 if (r < 0)
1990 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1991 }
1992
1993 /* Note that this also parses bits from the kernel command line, including "debug". */
1994 log_parse_environment();
1995
1996 r = parse_argv(argc, argv);
1997 if (r < 0) {
1998 *ret_error_message = "Failed to parse commandline arguments";
1999 return r;
2000 }
2001
2002 /* Initialize default unit */
2003 if (!arg_default_unit) {
2004 arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
2005 if (!arg_default_unit) {
2006 *ret_error_message = "Failed to set default unit";
2007 return log_oom();
2008 }
2009 }
2010
2011 /* Initialize the show status setting if it hasn't been set explicitly yet */
2012 if (arg_show_status == _SHOW_STATUS_UNSET)
2013 arg_show_status = SHOW_STATUS_YES;
2014
2015 return 0;
2016 }
2017
2018 static int safety_checks(void) {
2019
2020 if (getpid_cached() == 1 &&
2021 arg_action != ACTION_RUN) {
2022 log_error("Unsupported execution mode while PID 1.");
2023 return -EPERM;
2024 }
2025
2026 if (getpid_cached() == 1 &&
2027 !arg_system) {
2028 log_error("Can't run --user mode as PID 1.");
2029 return -EPERM;
2030 }
2031
2032 if (arg_action == ACTION_RUN &&
2033 arg_system &&
2034 getpid_cached() != 1) {
2035 log_error("Can't run system mode unless PID 1.");
2036 return -EPERM;
2037 }
2038
2039 if (arg_action == ACTION_TEST &&
2040 geteuid() == 0) {
2041 log_error("Don't run test mode as root.");
2042 return -EPERM;
2043 }
2044
2045 if (!arg_system &&
2046 arg_action == ACTION_RUN &&
2047 sd_booted() <= 0) {
2048 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
2049 return -EOPNOTSUPP;
2050 }
2051
2052 if (!arg_system &&
2053 arg_action == ACTION_RUN &&
2054 !getenv("XDG_RUNTIME_DIR")) {
2055 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2056 return -EUNATCH;
2057 }
2058
2059 if (arg_system &&
2060 arg_action == ACTION_RUN &&
2061 running_in_chroot() > 0) {
2062 log_error("Cannot be run in a chroot() environment.");
2063 return -EOPNOTSUPP;
2064 }
2065
2066 return 0;
2067 }
2068
2069 static int initialize_security(
2070 bool *loaded_policy,
2071 dual_timestamp *security_start_timestamp,
2072 dual_timestamp *security_finish_timestamp,
2073 const char **ret_error_message) {
2074
2075 int r;
2076
2077 assert(loaded_policy);
2078 assert(security_start_timestamp);
2079 assert(security_finish_timestamp);
2080 assert(ret_error_message);
2081
2082 dual_timestamp_get(security_start_timestamp);
2083
2084 r = mac_selinux_setup(loaded_policy);
2085 if (r < 0) {
2086 *ret_error_message = "Failed to load SELinux policy";
2087 return r;
2088 }
2089
2090 r = mac_smack_setup(loaded_policy);
2091 if (r < 0) {
2092 *ret_error_message = "Failed to load SMACK policy";
2093 return r;
2094 }
2095
2096 r = ima_setup();
2097 if (r < 0) {
2098 *ret_error_message = "Failed to load IMA policy";
2099 return r;
2100 }
2101
2102 dual_timestamp_get(security_finish_timestamp);
2103 return 0;
2104 }
2105
2106 static void test_summary(Manager *m) {
2107 assert(m);
2108
2109 printf("-> By units:\n");
2110 manager_dump_units(m, stdout, "\t");
2111
2112 printf("-> By jobs:\n");
2113 manager_dump_jobs(m, stdout, "\t");
2114 }
2115
2116 static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2117 int r;
2118
2119 assert(ret_fds);
2120 assert(ret_error_message);
2121
2122 r = fdset_new_fill(ret_fds);
2123 if (r < 0) {
2124 *ret_error_message = "Failed to allocate fd set";
2125 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2126 }
2127
2128 fdset_cloexec(*ret_fds, true);
2129
2130 if (arg_serialization)
2131 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2132
2133 return 0;
2134 }
2135
2136 static void setup_console_terminal(bool skip_setup) {
2137
2138 if (!arg_system)
2139 return;
2140
2141 /* Become a session leader if we aren't one yet. */
2142 (void) setsid();
2143
2144 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2145 * tty. */
2146 (void) release_terminal();
2147
2148 /* Reset the console, but only if this is really init and we are freshly booted */
2149 if (getpid_cached() == 1 && !skip_setup)
2150 (void) console_setup();
2151 }
2152
2153 static bool early_skip_setup_check(int argc, char *argv[]) {
2154 bool found_deserialize = false;
2155 int i;
2156
2157 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2158 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2159 * anyway, even if in that case we also do deserialization. */
2160
2161 for (i = 1; i < argc; i++) {
2162
2163 if (streq(argv[i], "--switched-root"))
2164 return false; /* If we switched root, don't skip the setup. */
2165 else if (streq(argv[i], "--deserialize"))
2166 found_deserialize = true;
2167 }
2168
2169 return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2170 }
2171
2172 int main(int argc, char *argv[]) {
2173
2174 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2175 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
2176 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
2177 bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2178 char *switch_root_dir = NULL, *switch_root_init = NULL;
2179 usec_t before_startup, after_startup;
2180 static char systemd[] = "systemd";
2181 char timespan[FORMAT_TIMESPAN_MAX];
2182 const char *shutdown_verb = NULL, *error_message = NULL;
2183 int r, retval = EXIT_FAILURE;
2184 Manager *m = NULL;
2185 FDSet *fds = NULL;
2186
2187 /* SysV compatibility: redirect init → telinit */
2188 redirect_telinit(argc, argv);
2189
2190 /* Take timestamps early on */
2191 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2192 dual_timestamp_get(&userspace_timestamp);
2193
2194 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2195 * reexecuting */
2196 skip_setup = early_skip_setup_check(argc, argv);
2197
2198 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2199 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
2200 program_invocation_short_name = systemd;
2201 (void) prctl(PR_SET_NAME, systemd);
2202
2203 /* Save the original command line */
2204 saved_argv = argv;
2205 saved_argc = argc;
2206
2207 /* Make sure that if the user says "syslog" we actually log to the journal. */
2208 log_set_upgrade_syslog_to_journal(true);
2209
2210 if (getpid_cached() == 1) {
2211 /* Disable the umask logic */
2212 umask(0);
2213
2214 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be activated
2215 * yet (even though the log socket for it exists). */
2216 log_set_prohibit_ipc(true);
2217
2218 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2219 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2220 * child process right before execve()'ing the actual binary, at a point in time where socket
2221 * activation stderr/stdout area already set up. */
2222 log_set_always_reopen_console(true);
2223 }
2224
2225 if (getpid_cached() == 1 && detect_container() <= 0) {
2226
2227 /* Running outside of a container as PID 1 */
2228 arg_system = true;
2229 log_set_target(LOG_TARGET_KMSG);
2230 log_open();
2231
2232 if (in_initrd())
2233 initrd_timestamp = userspace_timestamp;
2234
2235 if (!skip_setup) {
2236 r = mount_setup_early();
2237 if (r < 0) {
2238 error_message = "Failed to mount early API filesystems";
2239 goto finish;
2240 }
2241
2242 r = initialize_security(
2243 &loaded_policy,
2244 &security_start_timestamp,
2245 &security_finish_timestamp,
2246 &error_message);
2247 if (r < 0)
2248 goto finish;
2249 }
2250
2251 if (mac_selinux_init() < 0) {
2252 error_message = "Failed to initialize SELinux policy";
2253 goto finish;
2254 }
2255
2256 if (!skip_setup)
2257 initialize_clock();
2258
2259 /* Set the default for later on, but don't actually
2260 * open the logs like this for now. Note that if we
2261 * are transitioning from the initrd there might still
2262 * be journal fd open, and we shouldn't attempt
2263 * opening that before we parsed /proc/cmdline which
2264 * might redirect output elsewhere. */
2265 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2266
2267 } else if (getpid_cached() == 1) {
2268 /* Running inside a container, as PID 1 */
2269 arg_system = true;
2270 log_set_target(LOG_TARGET_CONSOLE);
2271 log_open();
2272
2273 /* For later on, see above... */
2274 log_set_target(LOG_TARGET_JOURNAL);
2275
2276 /* clear the kernel timestamp,
2277 * because we are in a container */
2278 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2279 } else {
2280 /* Running as user instance */
2281 arg_system = false;
2282 log_set_target(LOG_TARGET_AUTO);
2283 log_open();
2284
2285 /* clear the kernel timestamp,
2286 * because we are not PID 1 */
2287 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2288 }
2289
2290 initialize_coredump(skip_setup);
2291
2292 r = fixup_environment();
2293 if (r < 0) {
2294 log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2295 error_message = "Failed to fix up PID1 environment";
2296 goto finish;
2297 }
2298
2299 if (arg_system) {
2300
2301 /* Try to figure out if we can use colors with the console. No
2302 * need to do that for user instances since they never log
2303 * into the console. */
2304 log_show_color(colors_enabled());
2305 r = make_null_stdio();
2306 if (r < 0)
2307 log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
2308 }
2309
2310 /* Mount /proc, /sys and friends, so that /proc/cmdline and
2311 * /proc/$PID/fd is available. */
2312 if (getpid_cached() == 1) {
2313
2314 /* Load the kernel modules early. */
2315 if (!skip_setup)
2316 kmod_setup();
2317
2318 r = mount_setup(loaded_policy);
2319 if (r < 0) {
2320 error_message = "Failed to mount API filesystems";
2321 goto finish;
2322 }
2323 }
2324
2325 /* Reset all signal handlers. */
2326 (void) reset_all_signal_handlers();
2327 (void) ignore_signals(SIGNALS_IGNORE, -1);
2328
2329 r = load_configuration(argc, argv, &error_message);
2330 if (r < 0)
2331 goto finish;
2332
2333 r = safety_checks();
2334 if (r < 0)
2335 goto finish;
2336
2337 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS))
2338 (void) pager_open(arg_no_pager, false);
2339
2340 if (arg_action != ACTION_RUN)
2341 skip_setup = true;
2342
2343 if (arg_action == ACTION_HELP) {
2344 retval = help();
2345 goto finish;
2346 } else if (arg_action == ACTION_VERSION) {
2347 retval = version();
2348 goto finish;
2349 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
2350 unit_dump_config_items(stdout);
2351 retval = EXIT_SUCCESS;
2352 goto finish;
2353 }
2354
2355 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
2356
2357 /* Move out of the way, so that we won't block unmounts */
2358 assert_se(chdir("/") == 0);
2359
2360 if (arg_action == ACTION_RUN) {
2361
2362 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
2363 log_close();
2364
2365 /* Remember open file descriptors for later deserialization */
2366 r = collect_fds(&fds, &error_message);
2367 if (r < 0)
2368 goto finish;
2369
2370 /* Give up any control of the console, but make sure its initialized. */
2371 setup_console_terminal(skip_setup);
2372
2373 /* Open the logging devices, if possible and necessary */
2374 log_open();
2375 }
2376
2377 log_execution_mode(&first_boot);
2378
2379 r = initialize_runtime(skip_setup,
2380 &saved_rlimit_nofile,
2381 &saved_rlimit_memlock,
2382 &error_message);
2383 if (r < 0)
2384 goto finish;
2385
2386 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2387 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2388 &m);
2389 if (r < 0) {
2390 log_emergency_errno(r, "Failed to allocate manager object: %m");
2391 error_message = "Failed to allocate manager object";
2392 goto finish;
2393 }
2394
2395 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2396 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2397 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
2398 m->timestamps[MANAGER_TIMESTAMP_SECURITY_START] = security_start_timestamp;
2399 m->timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH] = security_finish_timestamp;
2400
2401 set_manager_defaults(m);
2402 set_manager_settings(m);
2403 manager_set_first_boot(m, first_boot);
2404
2405 /* Remember whether we should queue the default job */
2406 queue_default_job = !arg_serialization || arg_switched_root;
2407
2408 before_startup = now(CLOCK_MONOTONIC);
2409
2410 r = manager_startup(m, arg_serialization, fds);
2411 if (r < 0) {
2412 log_error_errno(r, "Failed to fully start up daemon: %m");
2413 error_message = "Failed to start up manager";
2414 goto finish;
2415 }
2416
2417 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2418 fds = fdset_free(fds);
2419 arg_serialization = safe_fclose(arg_serialization);
2420
2421 if (queue_default_job) {
2422 r = do_queue_default_job(m, &error_message);
2423 if (r < 0)
2424 goto finish;
2425 }
2426
2427 after_startup = now(CLOCK_MONOTONIC);
2428
2429 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2430 "Loaded units and determined initial transaction in %s.",
2431 format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
2432
2433 if (arg_action == ACTION_TEST) {
2434 test_summary(m);
2435 retval = EXIT_SUCCESS;
2436 goto finish;
2437 }
2438
2439 (void) invoke_main_loop(m,
2440 &reexecute,
2441 &retval,
2442 &shutdown_verb,
2443 &fds,
2444 &switch_root_dir,
2445 &switch_root_init,
2446 &error_message);
2447
2448 finish:
2449 pager_close();
2450
2451 if (m)
2452 arg_shutdown_watchdog = m->shutdown_watchdog;
2453
2454 m = manager_free(m);
2455
2456 free_arguments();
2457 mac_selinux_finish();
2458
2459 if (reexecute)
2460 do_reexecute(argc, argv,
2461 &saved_rlimit_nofile,
2462 &saved_rlimit_memlock,
2463 fds,
2464 switch_root_dir,
2465 switch_root_init,
2466 &error_message); /* This only returns if reexecution failed */
2467
2468 arg_serialization = safe_fclose(arg_serialization);
2469 fds = fdset_free(fds);
2470
2471 #if HAVE_VALGRIND_VALGRIND_H
2472 /* If we are PID 1 and running under valgrind, then let's exit
2473 * here explicitly. valgrind will only generate nice output on
2474 * exit(), not on exec(), hence let's do the former not the
2475 * latter here. */
2476 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2477 /* Cleanup watchdog_device strings for valgrind. We need them
2478 * in become_shutdown() so normally we cannot free them yet. */
2479 watchdog_free_device();
2480 arg_watchdog_device = mfree(arg_watchdog_device);
2481 return 0;
2482 }
2483 #endif
2484
2485 if (shutdown_verb) {
2486 r = become_shutdown(shutdown_verb, retval);
2487
2488 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
2489 error_message = "Failed to execute shutdown binary";
2490 }
2491
2492 watchdog_free_device();
2493 arg_watchdog_device = mfree(arg_watchdog_device);
2494
2495 if (getpid_cached() == 1) {
2496 if (error_message)
2497 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2498 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
2499 "%s, freezing.", error_message);
2500 freeze_or_reboot();
2501 }
2502
2503 return retval;
2504 }