]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/main.c
tree-wide: use TAKE_PTR() and TAKE_FD() macros
[thirdparty/systemd.git] / src / core / main.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <getopt.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <sys/mount.h>
28 #include <sys/prctl.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32 #if HAVE_SECCOMP
33 #include <seccomp.h>
34 #endif
35 #if HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-bus.h"
40 #include "sd-daemon.h"
41 #include "sd-messages.h"
42
43 #include "alloc-util.h"
44 #include "architecture.h"
45 #include "build.h"
46 #include "bus-error.h"
47 #include "bus-util.h"
48 #include "capability-util.h"
49 #include "clock-util.h"
50 #include "conf-parser.h"
51 #include "cpu-set-util.h"
52 #include "dbus-manager.h"
53 #include "def.h"
54 #include "emergency-action.h"
55 #include "env-util.h"
56 #include "fd-util.h"
57 #include "fdset.h"
58 #include "fileio.h"
59 #include "format-util.h"
60 #include "fs-util.h"
61 #include "hostname-setup.h"
62 #include "ima-setup.h"
63 #include "killall.h"
64 #include "kmod-setup.h"
65 #include "load-fragment.h"
66 #include "log.h"
67 #include "loopback-setup.h"
68 #include "machine-id-setup.h"
69 #include "manager.h"
70 #include "missing.h"
71 #include "mount-setup.h"
72 #include "pager.h"
73 #include "parse-util.h"
74 #include "path-util.h"
75 #include "proc-cmdline.h"
76 #include "process-util.h"
77 #include "raw-clone.h"
78 #include "rlimit-util.h"
79 #if HAVE_SECCOMP
80 #include "seccomp-util.h"
81 #endif
82 #include "selinux-setup.h"
83 #include "selinux-util.h"
84 #include "signal-util.h"
85 #include "smack-setup.h"
86 #include "special.h"
87 #include "stat-util.h"
88 #include "stdio-util.h"
89 #include "strv.h"
90 #include "switch-root.h"
91 #include "terminal-util.h"
92 #include "umask-util.h"
93 #include "user-util.h"
94 #include "util.h"
95 #include "virt.h"
96 #include "watchdog.h"
97
98 static enum {
99 ACTION_RUN,
100 ACTION_HELP,
101 ACTION_VERSION,
102 ACTION_TEST,
103 ACTION_DUMP_CONFIGURATION_ITEMS
104 } arg_action = ACTION_RUN;
105 static char *arg_default_unit = NULL;
106 static bool arg_system = false;
107 static bool arg_dump_core = true;
108 static int arg_crash_chvt = -1;
109 static bool arg_crash_shell = false;
110 static bool arg_crash_reboot = false;
111 static char *arg_confirm_spawn = NULL;
112 static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
113 static bool arg_switched_root = false;
114 static bool arg_no_pager = false;
115 static bool arg_service_watchdogs = true;
116 static char ***arg_join_controllers = NULL;
117 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
118 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
119 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
120 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
121 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
122 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
123 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
124 static usec_t arg_runtime_watchdog = 0;
125 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
126 static char *arg_watchdog_device = NULL;
127 static char **arg_default_environment = NULL;
128 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
129 static uint64_t arg_capability_bounding_set = CAP_ALL;
130 static bool arg_no_new_privs = false;
131 static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
132 static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
133 static Set* arg_syscall_archs = NULL;
134 static FILE* arg_serialization = NULL;
135 static bool arg_default_cpu_accounting = false;
136 static bool arg_default_io_accounting = false;
137 static bool arg_default_ip_accounting = false;
138 static bool arg_default_blockio_accounting = false;
139 static bool arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
140 static bool arg_default_tasks_accounting = true;
141 static uint64_t arg_default_tasks_max = UINT64_MAX;
142 static sd_id128_t arg_machine_id = {};
143 static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
144
145 _noreturn_ static void freeze_or_reboot(void) {
146
147 if (arg_crash_reboot) {
148 log_notice("Rebooting in 10s...");
149 (void) sleep(10);
150
151 log_notice("Rebooting now...");
152 (void) reboot(RB_AUTOBOOT);
153 log_emergency_errno(errno, "Failed to reboot: %m");
154 }
155
156 log_emergency("Freezing execution.");
157 freeze();
158 }
159
160 _noreturn_ static void crash(int sig) {
161 struct sigaction sa;
162 pid_t pid;
163
164 if (getpid_cached() != 1)
165 /* Pass this on immediately, if this is not PID 1 */
166 (void) raise(sig);
167 else if (!arg_dump_core)
168 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
169 else {
170 sa = (struct sigaction) {
171 .sa_handler = nop_signal_handler,
172 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
173 };
174
175 /* We want to wait for the core process, hence let's enable SIGCHLD */
176 (void) sigaction(SIGCHLD, &sa, NULL);
177
178 pid = raw_clone(SIGCHLD);
179 if (pid < 0)
180 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
181 else if (pid == 0) {
182 /* Enable default signal handler for core dump */
183
184 sa = (struct sigaction) {
185 .sa_handler = SIG_DFL,
186 };
187 (void) sigaction(sig, &sa, NULL);
188
189 /* Don't limit the coredump size */
190 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
191
192 /* Just to be sure... */
193 (void) chdir("/");
194
195 /* Raise the signal again */
196 pid = raw_getpid();
197 (void) kill(pid, sig); /* raise() would kill the parent */
198
199 assert_not_reached("We shouldn't be here...");
200 _exit(EXIT_FAILURE);
201 } else {
202 siginfo_t status;
203 int r;
204
205 /* Order things nicely. */
206 r = wait_for_terminate(pid, &status);
207 if (r < 0)
208 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
209 else if (status.si_code != CLD_DUMPED)
210 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
211 signal_to_string(sig),
212 pid, sigchld_code_to_string(status.si_code),
213 status.si_status,
214 strna(status.si_code == CLD_EXITED
215 ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
216 : signal_to_string(status.si_status)));
217 else
218 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
219 }
220 }
221
222 if (arg_crash_chvt >= 0)
223 (void) chvt(arg_crash_chvt);
224
225 sa = (struct sigaction) {
226 .sa_handler = SIG_IGN,
227 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
228 };
229
230 /* Let the kernel reap children for us */
231 (void) sigaction(SIGCHLD, &sa, NULL);
232
233 if (arg_crash_shell) {
234 log_notice("Executing crash shell in 10s...");
235 (void) sleep(10);
236
237 pid = raw_clone(SIGCHLD);
238 if (pid < 0)
239 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
240 else if (pid == 0) {
241 (void) setsid();
242 (void) make_console_stdio();
243 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
244
245 log_emergency_errno(errno, "execle() failed: %m");
246 _exit(EXIT_FAILURE);
247 } else {
248 log_info("Spawned crash shell as PID "PID_FMT".", pid);
249 (void) wait_for_terminate(pid, NULL);
250 }
251 }
252
253 freeze_or_reboot();
254 }
255
256 static void install_crash_handler(void) {
257 static const struct sigaction sa = {
258 .sa_handler = crash,
259 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
260 };
261 int r;
262
263 /* We ignore the return value here, since, we don't mind if we
264 * cannot set up a crash handler */
265 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
266 if (r < 0)
267 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
268 }
269
270 static int console_setup(void) {
271 _cleanup_close_ int tty_fd = -1;
272 int r;
273
274 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
275 if (tty_fd < 0)
276 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
277
278 /* We don't want to force text mode. plymouth may be showing
279 * pictures already from initrd. */
280 r = reset_terminal_fd(tty_fd, false);
281 if (r < 0)
282 return log_error_errno(r, "Failed to reset /dev/console: %m");
283
284 return 0;
285 }
286
287 static int parse_crash_chvt(const char *value) {
288 int b;
289
290 if (safe_atoi(value, &arg_crash_chvt) >= 0)
291 return 0;
292
293 b = parse_boolean(value);
294 if (b < 0)
295 return b;
296
297 if (b > 0)
298 arg_crash_chvt = 0; /* switch to where kmsg goes */
299 else
300 arg_crash_chvt = -1; /* turn off switching */
301
302 return 0;
303 }
304
305 static int parse_confirm_spawn(const char *value, char **console) {
306 char *s;
307 int r;
308
309 r = value ? parse_boolean(value) : 1;
310 if (r == 0) {
311 *console = NULL;
312 return 0;
313 }
314
315 if (r > 0) /* on with default tty */
316 s = strdup("/dev/console");
317 else if (is_path(value)) /* on with fully qualified path */
318 s = strdup(value);
319 else /* on with only a tty file name, not a fully qualified path */
320 s = strjoin("/dev/", value);
321 if (!s)
322 return -ENOMEM;
323 *console = s;
324 return 0;
325 }
326
327 static int set_machine_id(const char *m) {
328 sd_id128_t t;
329 assert(m);
330
331 if (sd_id128_from_string(m, &t) < 0)
332 return -EINVAL;
333
334 if (sd_id128_is_null(t))
335 return -EINVAL;
336
337 arg_machine_id = t;
338 return 0;
339 }
340
341 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
342
343 int r;
344
345 assert(key);
346
347 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
348
349 if (proc_cmdline_value_missing(key, value))
350 return 0;
351
352 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
353 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
354 else if (in_initrd() == !!startswith(key, "rd.")) {
355 if (free_and_strdup(&arg_default_unit, value) < 0)
356 return log_oom();
357 }
358
359 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
360
361 r = value ? parse_boolean(value) : true;
362 if (r < 0)
363 log_warning("Failed to parse dump core switch %s. Ignoring.", value);
364 else
365 arg_dump_core = r;
366
367 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
368
369 if (!value)
370 arg_crash_chvt = 0; /* turn on */
371 else if (parse_crash_chvt(value) < 0)
372 log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
373
374 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
375
376 r = value ? parse_boolean(value) : true;
377 if (r < 0)
378 log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
379 else
380 arg_crash_shell = r;
381
382 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
383
384 r = value ? parse_boolean(value) : true;
385 if (r < 0)
386 log_warning("Failed to parse crash reboot switch %s. Ignoring.", value);
387 else
388 arg_crash_reboot = r;
389
390 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
391 char *s;
392
393 r = parse_confirm_spawn(value, &s);
394 if (r < 0)
395 log_warning_errno(r, "Failed to parse confirm_spawn switch %s. Ignoring.", value);
396 else {
397 free(arg_confirm_spawn);
398 arg_confirm_spawn = s;
399 }
400
401 } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
402
403 r = value ? parse_boolean(value) : true;
404 if (r < 0)
405 log_warning("Failed to parse service watchdog switch %s. Ignoring.", value);
406 else
407 arg_service_watchdogs = r;
408
409 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
410
411 if (value) {
412 r = parse_show_status(value, &arg_show_status);
413 if (r < 0)
414 log_warning("Failed to parse show status switch %s. Ignoring.", value);
415 } else
416 arg_show_status = SHOW_STATUS_YES;
417
418 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
419
420 if (proc_cmdline_value_missing(key, value))
421 return 0;
422
423 r = exec_output_from_string(value);
424 if (r < 0)
425 log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
426 else
427 arg_default_std_output = r;
428
429 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
430
431 if (proc_cmdline_value_missing(key, value))
432 return 0;
433
434 r = exec_output_from_string(value);
435 if (r < 0)
436 log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
437 else
438 arg_default_std_error = r;
439
440 } else if (streq(key, "systemd.setenv")) {
441
442 if (proc_cmdline_value_missing(key, value))
443 return 0;
444
445 if (env_assignment_is_valid(value)) {
446 char **env;
447
448 env = strv_env_set(arg_default_environment, value);
449 if (!env)
450 return log_oom();
451
452 arg_default_environment = env;
453 } else
454 log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
455
456 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
457
458 if (proc_cmdline_value_missing(key, value))
459 return 0;
460
461 r = set_machine_id(value);
462 if (r < 0)
463 log_warning("MachineID '%s' is not valid. Ignoring.", value);
464
465 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
466
467 if (proc_cmdline_value_missing(key, value))
468 return 0;
469
470 r = parse_sec(value, &arg_default_timeout_start_usec);
471 if (r < 0)
472 log_warning_errno(r, "Failed to parse default start timeout: %s, ignoring.", value);
473
474 if (arg_default_timeout_start_usec <= 0)
475 arg_default_timeout_start_usec = USEC_INFINITY;
476
477 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
478
479 if (proc_cmdline_value_missing(key, value))
480 return 0;
481
482 parse_path_argument_and_warn(value, false, &arg_watchdog_device);
483
484 } else if (streq(key, "quiet") && !value) {
485
486 if (arg_show_status == _SHOW_STATUS_UNSET)
487 arg_show_status = SHOW_STATUS_AUTO;
488
489 } else if (streq(key, "debug") && !value) {
490
491 /* Note that log_parse_environment() handles 'debug'
492 * too, and sets the log level to LOG_DEBUG. */
493
494 if (detect_container() > 0)
495 log_set_target(LOG_TARGET_CONSOLE);
496
497 } else if (!value) {
498 const char *target;
499
500 /* SysV compatibility */
501 target = runlevel_to_target(key);
502 if (target)
503 return free_and_strdup(&arg_default_unit, target);
504 }
505
506 return 0;
507 }
508
509 #define DEFINE_SETTER(name, func, descr) \
510 static int name(const char *unit, \
511 const char *filename, \
512 unsigned line, \
513 const char *section, \
514 unsigned section_line, \
515 const char *lvalue, \
516 int ltype, \
517 const char *rvalue, \
518 void *data, \
519 void *userdata) { \
520 \
521 int r; \
522 \
523 assert(filename); \
524 assert(lvalue); \
525 assert(rvalue); \
526 \
527 r = func(rvalue); \
528 if (r < 0) \
529 log_syntax(unit, LOG_ERR, filename, line, r, \
530 "Invalid " descr "'%s': %m", \
531 rvalue); \
532 \
533 return 0; \
534 }
535
536 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
537 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
538 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
539 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
540
541 static int config_parse_cpu_affinity2(
542 const char *unit,
543 const char *filename,
544 unsigned line,
545 const char *section,
546 unsigned section_line,
547 const char *lvalue,
548 int ltype,
549 const char *rvalue,
550 void *data,
551 void *userdata) {
552
553 _cleanup_cpu_free_ cpu_set_t *c = NULL;
554 int ncpus;
555
556 ncpus = parse_cpu_set_and_warn(rvalue, &c, unit, filename, line, lvalue);
557 if (ncpus < 0)
558 return ncpus;
559
560 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
561 log_warning_errno(errno, "Failed to set CPU affinity: %m");
562
563 return 0;
564 }
565
566 static int config_parse_show_status(
567 const char* unit,
568 const char *filename,
569 unsigned line,
570 const char *section,
571 unsigned section_line,
572 const char *lvalue,
573 int ltype,
574 const char *rvalue,
575 void *data,
576 void *userdata) {
577
578 int k;
579 ShowStatus *b = data;
580
581 assert(filename);
582 assert(lvalue);
583 assert(rvalue);
584 assert(data);
585
586 k = parse_show_status(rvalue, b);
587 if (k < 0) {
588 log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse show status setting, ignoring: %s", rvalue);
589 return 0;
590 }
591
592 return 0;
593 }
594
595 static int config_parse_output_restricted(
596 const char* unit,
597 const char *filename,
598 unsigned line,
599 const char *section,
600 unsigned section_line,
601 const char *lvalue,
602 int ltype,
603 const char *rvalue,
604 void *data,
605 void *userdata) {
606
607 ExecOutput t, *eo = data;
608
609 assert(filename);
610 assert(lvalue);
611 assert(rvalue);
612 assert(data);
613
614 t = exec_output_from_string(rvalue);
615 if (t < 0) {
616 log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output type, ignoring: %s", rvalue);
617 return 0;
618 }
619
620 if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE)) {
621 log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file: are not supported as defaults, ignoring: %s", rvalue);
622 return 0;
623 }
624
625 *eo = t;
626 return 0;
627 }
628
629 static int config_parse_crash_chvt(
630 const char* unit,
631 const char *filename,
632 unsigned line,
633 const char *section,
634 unsigned section_line,
635 const char *lvalue,
636 int ltype,
637 const char *rvalue,
638 void *data,
639 void *userdata) {
640
641 int r;
642
643 assert(filename);
644 assert(lvalue);
645 assert(rvalue);
646
647 r = parse_crash_chvt(rvalue);
648 if (r < 0) {
649 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CrashChangeVT= setting, ignoring: %s", rvalue);
650 return 0;
651 }
652
653 return 0;
654 }
655
656 static int parse_config_file(void) {
657
658 const ConfigTableItem items[] = {
659 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
660 { "Manager", "LogTarget", config_parse_target, 0, NULL },
661 { "Manager", "LogColor", config_parse_color, 0, NULL },
662 { "Manager", "LogLocation", config_parse_location, 0, NULL },
663 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
664 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, NULL },
665 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, NULL },
666 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
667 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
668 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
669 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL },
670 { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers },
671 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
672 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog },
673 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
674 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
675 { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
676 #if HAVE_SECCOMP
677 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
678 #endif
679 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
680 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
681 { "Manager", "DefaultStandardOutput", config_parse_output_restricted,0, &arg_default_std_output },
682 { "Manager", "DefaultStandardError", config_parse_output_restricted,0, &arg_default_std_error },
683 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
684 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
685 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
686 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
687 { "Manager", "DefaultStartLimitIntervalSec",config_parse_sec, 0, &arg_default_start_limit_interval },
688 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
689 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
690 { "Manager", "DefaultLimitCPU", config_parse_limit, RLIMIT_CPU, arg_default_rlimit },
691 { "Manager", "DefaultLimitFSIZE", config_parse_limit, RLIMIT_FSIZE, arg_default_rlimit },
692 { "Manager", "DefaultLimitDATA", config_parse_limit, RLIMIT_DATA, arg_default_rlimit },
693 { "Manager", "DefaultLimitSTACK", config_parse_limit, RLIMIT_STACK, arg_default_rlimit },
694 { "Manager", "DefaultLimitCORE", config_parse_limit, RLIMIT_CORE, arg_default_rlimit },
695 { "Manager", "DefaultLimitRSS", config_parse_limit, RLIMIT_RSS, arg_default_rlimit },
696 { "Manager", "DefaultLimitNOFILE", config_parse_limit, RLIMIT_NOFILE, arg_default_rlimit },
697 { "Manager", "DefaultLimitAS", config_parse_limit, RLIMIT_AS, arg_default_rlimit },
698 { "Manager", "DefaultLimitNPROC", config_parse_limit, RLIMIT_NPROC, arg_default_rlimit },
699 { "Manager", "DefaultLimitMEMLOCK", config_parse_limit, RLIMIT_MEMLOCK, arg_default_rlimit },
700 { "Manager", "DefaultLimitLOCKS", config_parse_limit, RLIMIT_LOCKS, arg_default_rlimit },
701 { "Manager", "DefaultLimitSIGPENDING", config_parse_limit, RLIMIT_SIGPENDING, arg_default_rlimit },
702 { "Manager", "DefaultLimitMSGQUEUE", config_parse_limit, RLIMIT_MSGQUEUE, arg_default_rlimit },
703 { "Manager", "DefaultLimitNICE", config_parse_limit, RLIMIT_NICE, arg_default_rlimit },
704 { "Manager", "DefaultLimitRTPRIO", config_parse_limit, RLIMIT_RTPRIO, arg_default_rlimit },
705 { "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit },
706 { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
707 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
708 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
709 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
710 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
711 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
712 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
713 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
714 {}
715 };
716
717 const char *fn, *conf_dirs_nulstr;
718
719 fn = arg_system ?
720 PKGSYSCONFDIR "/system.conf" :
721 PKGSYSCONFDIR "/user.conf";
722
723 conf_dirs_nulstr = arg_system ?
724 CONF_PATHS_NULSTR("systemd/system.conf.d") :
725 CONF_PATHS_NULSTR("systemd/user.conf.d");
726
727 (void) config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, CONFIG_PARSE_WARN, NULL);
728
729 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
730 * like everywhere else. */
731 if (arg_default_timeout_start_usec <= 0)
732 arg_default_timeout_start_usec = USEC_INFINITY;
733 if (arg_default_timeout_stop_usec <= 0)
734 arg_default_timeout_stop_usec = USEC_INFINITY;
735
736 return 0;
737 }
738
739 static void set_manager_defaults(Manager *m) {
740
741 assert(m);
742
743 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
744 m->default_std_output = arg_default_std_output;
745 m->default_std_error = arg_default_std_error;
746 m->default_timeout_start_usec = arg_default_timeout_start_usec;
747 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
748 m->default_restart_usec = arg_default_restart_usec;
749 m->default_start_limit_interval = arg_default_start_limit_interval;
750 m->default_start_limit_burst = arg_default_start_limit_burst;
751 m->default_cpu_accounting = arg_default_cpu_accounting;
752 m->default_io_accounting = arg_default_io_accounting;
753 m->default_ip_accounting = arg_default_ip_accounting;
754 m->default_blockio_accounting = arg_default_blockio_accounting;
755 m->default_memory_accounting = arg_default_memory_accounting;
756 m->default_tasks_accounting = arg_default_tasks_accounting;
757 m->default_tasks_max = arg_default_tasks_max;
758
759 manager_set_default_rlimits(m, arg_default_rlimit);
760 manager_environment_add(m, NULL, arg_default_environment);
761 }
762
763 static void set_manager_settings(Manager *m) {
764
765 assert(m);
766
767 m->confirm_spawn = arg_confirm_spawn;
768 m->service_watchdogs = arg_service_watchdogs;
769 m->runtime_watchdog = arg_runtime_watchdog;
770 m->shutdown_watchdog = arg_shutdown_watchdog;
771 m->cad_burst_action = arg_cad_burst_action;
772
773 manager_set_show_status(m, arg_show_status);
774 }
775
776 static int parse_argv(int argc, char *argv[]) {
777 enum {
778 ARG_LOG_LEVEL = 0x100,
779 ARG_LOG_TARGET,
780 ARG_LOG_COLOR,
781 ARG_LOG_LOCATION,
782 ARG_UNIT,
783 ARG_SYSTEM,
784 ARG_USER,
785 ARG_TEST,
786 ARG_NO_PAGER,
787 ARG_VERSION,
788 ARG_DUMP_CONFIGURATION_ITEMS,
789 ARG_DUMP_CORE,
790 ARG_CRASH_CHVT,
791 ARG_CRASH_SHELL,
792 ARG_CRASH_REBOOT,
793 ARG_CONFIRM_SPAWN,
794 ARG_SHOW_STATUS,
795 ARG_DESERIALIZE,
796 ARG_SWITCHED_ROOT,
797 ARG_DEFAULT_STD_OUTPUT,
798 ARG_DEFAULT_STD_ERROR,
799 ARG_MACHINE_ID,
800 ARG_SERVICE_WATCHDOGS,
801 };
802
803 static const struct option options[] = {
804 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
805 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
806 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
807 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
808 { "unit", required_argument, NULL, ARG_UNIT },
809 { "system", no_argument, NULL, ARG_SYSTEM },
810 { "user", no_argument, NULL, ARG_USER },
811 { "test", no_argument, NULL, ARG_TEST },
812 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
813 { "help", no_argument, NULL, 'h' },
814 { "version", no_argument, NULL, ARG_VERSION },
815 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
816 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
817 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
818 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
819 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
820 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
821 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
822 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
823 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
824 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
825 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
826 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
827 { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
828 {}
829 };
830
831 int c, r;
832
833 assert(argc >= 1);
834 assert(argv);
835
836 if (getpid_cached() == 1)
837 opterr = 0;
838
839 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
840
841 switch (c) {
842
843 case ARG_LOG_LEVEL:
844 r = log_set_max_level_from_string(optarg);
845 if (r < 0) {
846 log_error("Failed to parse log level %s.", optarg);
847 return r;
848 }
849
850 break;
851
852 case ARG_LOG_TARGET:
853 r = log_set_target_from_string(optarg);
854 if (r < 0) {
855 log_error("Failed to parse log target %s.", optarg);
856 return r;
857 }
858
859 break;
860
861 case ARG_LOG_COLOR:
862
863 if (optarg) {
864 r = log_show_color_from_string(optarg);
865 if (r < 0) {
866 log_error("Failed to parse log color setting %s.", optarg);
867 return r;
868 }
869 } else
870 log_show_color(true);
871
872 break;
873
874 case ARG_LOG_LOCATION:
875 if (optarg) {
876 r = log_show_location_from_string(optarg);
877 if (r < 0) {
878 log_error("Failed to parse log location setting %s.", optarg);
879 return r;
880 }
881 } else
882 log_show_location(true);
883
884 break;
885
886 case ARG_DEFAULT_STD_OUTPUT:
887 r = exec_output_from_string(optarg);
888 if (r < 0) {
889 log_error("Failed to parse default standard output setting %s.", optarg);
890 return r;
891 } else
892 arg_default_std_output = r;
893 break;
894
895 case ARG_DEFAULT_STD_ERROR:
896 r = exec_output_from_string(optarg);
897 if (r < 0) {
898 log_error("Failed to parse default standard error output setting %s.", optarg);
899 return r;
900 } else
901 arg_default_std_error = r;
902 break;
903
904 case ARG_UNIT:
905 r = free_and_strdup(&arg_default_unit, optarg);
906 if (r < 0)
907 return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
908
909 break;
910
911 case ARG_SYSTEM:
912 arg_system = true;
913 break;
914
915 case ARG_USER:
916 arg_system = false;
917 break;
918
919 case ARG_TEST:
920 arg_action = ACTION_TEST;
921 break;
922
923 case ARG_NO_PAGER:
924 arg_no_pager = true;
925 break;
926
927 case ARG_VERSION:
928 arg_action = ACTION_VERSION;
929 break;
930
931 case ARG_DUMP_CONFIGURATION_ITEMS:
932 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
933 break;
934
935 case ARG_DUMP_CORE:
936 if (!optarg)
937 arg_dump_core = true;
938 else {
939 r = parse_boolean(optarg);
940 if (r < 0)
941 return log_error_errno(r, "Failed to parse dump core boolean: %s", optarg);
942 arg_dump_core = r;
943 }
944 break;
945
946 case ARG_CRASH_CHVT:
947 r = parse_crash_chvt(optarg);
948 if (r < 0)
949 return log_error_errno(r, "Failed to parse crash virtual terminal index: %s", optarg);
950 break;
951
952 case ARG_CRASH_SHELL:
953 if (!optarg)
954 arg_crash_shell = true;
955 else {
956 r = parse_boolean(optarg);
957 if (r < 0)
958 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
959 arg_crash_shell = r;
960 }
961 break;
962
963 case ARG_CRASH_REBOOT:
964 if (!optarg)
965 arg_crash_reboot = true;
966 else {
967 r = parse_boolean(optarg);
968 if (r < 0)
969 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
970 arg_crash_reboot = r;
971 }
972 break;
973
974 case ARG_CONFIRM_SPAWN:
975 arg_confirm_spawn = mfree(arg_confirm_spawn);
976
977 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
978 if (r < 0)
979 return log_error_errno(r, "Failed to parse confirm spawn option: %m");
980 break;
981
982 case ARG_SERVICE_WATCHDOGS:
983 r = parse_boolean(optarg);
984 if (r < 0)
985 return log_error_errno(r, "Failed to parse service watchdogs boolean: %s", optarg);
986 arg_service_watchdogs = r;
987 break;
988
989 case ARG_SHOW_STATUS:
990 if (optarg) {
991 r = parse_show_status(optarg, &arg_show_status);
992 if (r < 0) {
993 log_error("Failed to parse show status boolean %s.", optarg);
994 return r;
995 }
996 } else
997 arg_show_status = SHOW_STATUS_YES;
998 break;
999
1000 case ARG_DESERIALIZE: {
1001 int fd;
1002 FILE *f;
1003
1004 r = safe_atoi(optarg, &fd);
1005 if (r < 0 || fd < 0) {
1006 log_error("Failed to parse deserialize option %s.", optarg);
1007 return -EINVAL;
1008 }
1009
1010 (void) fd_cloexec(fd, true);
1011
1012 f = fdopen(fd, "r");
1013 if (!f)
1014 return log_error_errno(errno, "Failed to open serialization fd: %m");
1015
1016 safe_fclose(arg_serialization);
1017 arg_serialization = f;
1018
1019 break;
1020 }
1021
1022 case ARG_SWITCHED_ROOT:
1023 arg_switched_root = true;
1024 break;
1025
1026 case ARG_MACHINE_ID:
1027 r = set_machine_id(optarg);
1028 if (r < 0)
1029 return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
1030 break;
1031
1032 case 'h':
1033 arg_action = ACTION_HELP;
1034 break;
1035
1036 case 'D':
1037 log_set_max_level(LOG_DEBUG);
1038 break;
1039
1040 case 'b':
1041 case 's':
1042 case 'z':
1043 /* Just to eat away the sysvinit kernel
1044 * cmdline args without getopt() error
1045 * messages that we'll parse in
1046 * parse_proc_cmdline_word() or ignore. */
1047
1048 case '?':
1049 if (getpid_cached() != 1)
1050 return -EINVAL;
1051 else
1052 return 0;
1053
1054 default:
1055 assert_not_reached("Unhandled option code.");
1056 }
1057
1058 if (optind < argc && getpid_cached() != 1) {
1059 /* Hmm, when we aren't run as init system
1060 * let's complain about excess arguments */
1061
1062 log_error("Excess arguments.");
1063 return -EINVAL;
1064 }
1065
1066 return 0;
1067 }
1068
1069 static int help(void) {
1070
1071 printf("%s [OPTIONS...]\n\n"
1072 "Starts up and maintains the system or user services.\n\n"
1073 " -h --help Show this help\n"
1074 " --version Show version\n"
1075 " --test Determine startup sequence, dump it and exit\n"
1076 " --no-pager Do not pipe output into a pager\n"
1077 " --dump-configuration-items Dump understood unit configuration items\n"
1078 " --unit=UNIT Set default unit\n"
1079 " --system Run a system instance, even if PID != 1\n"
1080 " --user Run a user instance\n"
1081 " --dump-core[=BOOL] Dump core on crash\n"
1082 " --crash-vt=NR Change to specified VT on crash\n"
1083 " --crash-reboot[=BOOL] Reboot on crash\n"
1084 " --crash-shell[=BOOL] Run shell on crash\n"
1085 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1086 " --show-status[=BOOL] Show status updates on the console during bootup\n"
1087 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
1088 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1089 " --log-color[=BOOL] Highlight important log messages\n"
1090 " --log-location[=BOOL] Include code location in log messages\n"
1091 " --default-standard-output= Set default standard output for services\n"
1092 " --default-standard-error= Set default standard error output for services\n",
1093 program_invocation_short_name);
1094
1095 return 0;
1096 }
1097
1098 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1099 _cleanup_fdset_free_ FDSet *fds = NULL;
1100 _cleanup_fclose_ FILE *f = NULL;
1101 int r;
1102
1103 assert(m);
1104 assert(_f);
1105 assert(_fds);
1106
1107 r = manager_open_serialization(m, &f);
1108 if (r < 0)
1109 return log_error_errno(r, "Failed to create serialization file: %m");
1110
1111 /* Make sure nothing is really destructed when we shut down */
1112 m->n_reloading++;
1113 bus_manager_send_reloading(m, true);
1114
1115 fds = fdset_new();
1116 if (!fds)
1117 return log_oom();
1118
1119 r = manager_serialize(m, f, fds, switching_root);
1120 if (r < 0)
1121 return log_error_errno(r, "Failed to serialize state: %m");
1122
1123 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1124 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
1125
1126 r = fd_cloexec(fileno(f), false);
1127 if (r < 0)
1128 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1129
1130 r = fdset_cloexec(fds, false);
1131 if (r < 0)
1132 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1133
1134 *_f = TAKE_PTR(f);
1135 *_fds = TAKE_PTR(fds);
1136
1137 return 0;
1138 }
1139
1140 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1141 struct rlimit nl;
1142 int r;
1143 int min_max;
1144 _cleanup_free_ char *nr_open = NULL;
1145
1146 assert(saved_rlimit);
1147
1148 /* Save the original RLIMIT_NOFILE so that we can reset it
1149 * later when transitioning from the initrd to the main
1150 * systemd or suchlike. */
1151 if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
1152 return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
1153
1154 /* Make sure forked processes get the default kernel setting */
1155 if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1156 struct rlimit *rl;
1157
1158 rl = newdup(struct rlimit, saved_rlimit, 1);
1159 if (!rl)
1160 return log_oom();
1161
1162 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1163 }
1164
1165 /* Get current RLIMIT_NOFILE maximum compiled into the kernel. */
1166 r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
1167 if (r >= 0)
1168 r = safe_atoi(nr_open, &min_max);
1169 /* If we fail, fallback to the hard-coded kernel limit of 1024 * 1024. */
1170 if (r < 0)
1171 min_max = 1024 * 1024;
1172
1173 /* Bump up the resource limit for ourselves substantially */
1174 nl.rlim_cur = nl.rlim_max = min_max;
1175 r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1176 if (r < 0)
1177 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1178
1179 return 0;
1180 }
1181
1182 static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
1183 int r;
1184
1185 assert(saved_rlimit);
1186 assert(getuid() == 0);
1187
1188 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
1189 * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
1190 * bump the value high enough for the root user. */
1191
1192 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
1193 return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
1194
1195 r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
1196 if (r < 0)
1197 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1198
1199 return 0;
1200 }
1201
1202 static void test_usr(void) {
1203
1204 /* Check that /usr is not a separate fs */
1205
1206 if (dir_is_empty("/usr") <= 0)
1207 return;
1208
1209 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1210 "Some things will probably break (sometimes even silently) in mysterious ways. "
1211 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1212 }
1213
1214 static int enforce_syscall_archs(Set *archs) {
1215 #if HAVE_SECCOMP
1216 int r;
1217
1218 if (!is_seccomp_available())
1219 return 0;
1220
1221 r = seccomp_restrict_archs(arg_syscall_archs);
1222 if (r < 0)
1223 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
1224 #endif
1225 return 0;
1226 }
1227
1228 static int status_welcome(void) {
1229 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1230 const char *fn;
1231 int r;
1232
1233 if (arg_show_status <= 0)
1234 return 0;
1235
1236 FOREACH_STRING(fn, "/etc/os-release", "/usr/lib/os-release") {
1237 r = parse_env_file(fn, NEWLINE,
1238 "PRETTY_NAME", &pretty_name,
1239 "ANSI_COLOR", &ansi_color,
1240 NULL);
1241
1242 if (r != -ENOENT)
1243 break;
1244 }
1245 if (r < 0 && r != -ENOENT)
1246 log_warning_errno(r, "Failed to read os-release file, ignoring: %m");
1247
1248 if (log_get_show_color())
1249 return status_printf(NULL, false, false,
1250 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1251 isempty(ansi_color) ? "1" : ansi_color,
1252 isempty(pretty_name) ? "Linux" : pretty_name);
1253 else
1254 return status_printf(NULL, false, false,
1255 "\nWelcome to %s!\n",
1256 isempty(pretty_name) ? "Linux" : pretty_name);
1257 }
1258
1259 static int write_container_id(void) {
1260 const char *c;
1261 int r;
1262
1263 c = getenv("container");
1264 if (isempty(c))
1265 return 0;
1266
1267 RUN_WITH_UMASK(0022)
1268 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
1269 if (r < 0)
1270 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
1271
1272 return 1;
1273 }
1274
1275 static int bump_unix_max_dgram_qlen(void) {
1276 _cleanup_free_ char *qlen = NULL;
1277 unsigned long v;
1278 int r;
1279
1280 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel
1281 * default of 16 is simply too low. We set the value really
1282 * really early during boot, so that it is actually applied to
1283 * all our sockets, including the $NOTIFY_SOCKET one. */
1284
1285 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1286 if (r < 0)
1287 return log_warning_errno(r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1288
1289 r = safe_atolu(qlen, &v);
1290 if (r < 0)
1291 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length, ignoring: %m");
1292
1293 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1294 return 0;
1295
1296 qlen = mfree(qlen);
1297 if (asprintf(&qlen, "%lu\n", DEFAULT_UNIX_MAX_DGRAM_QLEN) < 0)
1298 return log_oom();
1299
1300 r = write_string_file("/proc/sys/net/unix/max_dgram_qlen", qlen, 0);
1301 if (r < 0)
1302 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1303 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1304
1305 return 1;
1306 }
1307
1308 static int fixup_environment(void) {
1309 _cleanup_free_ char *term = NULL;
1310 const char *t;
1311 int r;
1312
1313 /* Only fix up the environment when we are started as PID 1 */
1314 if (getpid_cached() != 1)
1315 return 0;
1316
1317 /* We expect the environment to be set correctly if run inside a container. */
1318 if (detect_container() > 0)
1319 return 0;
1320
1321 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1322 * device used by the console. We try to make a better guess here since some consoles might not have support
1323 * for color mode for example.
1324 *
1325 * However if TERM was configured through the kernel command line then leave it alone. */
1326 r = proc_cmdline_get_key("TERM", 0, &term);
1327 if (r < 0)
1328 return r;
1329
1330 t = term ?: default_term_for_tty("/dev/console");
1331
1332 if (setenv("TERM", t, 1) < 0)
1333 return -errno;
1334
1335 return 0;
1336 }
1337
1338 static void redirect_telinit(int argc, char *argv[]) {
1339
1340 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1341
1342 #if HAVE_SYSV_COMPAT
1343 if (getpid_cached() == 1)
1344 return;
1345
1346 if (!strstr(program_invocation_short_name, "init"))
1347 return;
1348
1349 execv(SYSTEMCTL_BINARY_PATH, argv);
1350 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1351 exit(EXIT_FAILURE);
1352 #endif
1353 }
1354
1355 static int become_shutdown(
1356 const char *shutdown_verb,
1357 int retval) {
1358
1359 char log_level[DECIMAL_STR_MAX(int) + 1],
1360 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1361 timeout[DECIMAL_STR_MAX(usec_t) + 1];
1362
1363 const char* command_line[13] = {
1364 SYSTEMD_SHUTDOWN_BINARY_PATH,
1365 shutdown_verb,
1366 "--timeout", timeout,
1367 "--log-level", log_level,
1368 "--log-target",
1369 };
1370
1371 _cleanup_strv_free_ char **env_block = NULL;
1372 size_t pos = 7;
1373 int r;
1374
1375 assert(shutdown_verb);
1376 assert(!command_line[pos]);
1377 env_block = strv_copy(environ);
1378
1379 xsprintf(log_level, "%d", log_get_max_level());
1380 xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
1381
1382 switch (log_get_target()) {
1383
1384 case LOG_TARGET_KMSG:
1385 case LOG_TARGET_JOURNAL_OR_KMSG:
1386 case LOG_TARGET_SYSLOG_OR_KMSG:
1387 command_line[pos++] = "kmsg";
1388 break;
1389
1390 case LOG_TARGET_NULL:
1391 command_line[pos++] = "null";
1392 break;
1393
1394 case LOG_TARGET_CONSOLE:
1395 default:
1396 command_line[pos++] = "console";
1397 break;
1398 };
1399
1400 if (log_get_show_color())
1401 command_line[pos++] = "--log-color";
1402
1403 if (log_get_show_location())
1404 command_line[pos++] = "--log-location";
1405
1406 if (streq(shutdown_verb, "exit")) {
1407 command_line[pos++] = "--exit-code";
1408 command_line[pos++] = exit_code;
1409 xsprintf(exit_code, "%d", retval);
1410 }
1411
1412 assert(pos < ELEMENTSOF(command_line));
1413
1414 if (streq(shutdown_verb, "reboot") &&
1415 arg_shutdown_watchdog > 0 &&
1416 arg_shutdown_watchdog != USEC_INFINITY) {
1417
1418 char *e;
1419
1420 /* If we reboot let's set the shutdown
1421 * watchdog and tell the shutdown binary to
1422 * repeatedly ping it */
1423 r = watchdog_set_timeout(&arg_shutdown_watchdog);
1424 watchdog_close(r < 0);
1425
1426 /* Tell the binary how often to ping, ignore failure */
1427 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
1428 (void) strv_consume(&env_block, e);
1429
1430 if (arg_watchdog_device &&
1431 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1432 (void) strv_consume(&env_block, e);
1433 } else
1434 watchdog_close(true);
1435
1436 /* Avoid the creation of new processes forked by the
1437 * kernel; at this point, we will not listen to the
1438 * signals anyway */
1439 if (detect_container() <= 0)
1440 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1441
1442 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1443 return -errno;
1444 }
1445
1446 static void initialize_clock(void) {
1447 int r;
1448
1449 if (clock_is_localtime(NULL) > 0) {
1450 int min;
1451
1452 /*
1453 * The very first call of settimeofday() also does a time warp in the kernel.
1454 *
1455 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1456 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1457 * the RTC alone if the registry tells that the RTC runs in UTC.
1458 */
1459 r = clock_set_timezone(&min);
1460 if (r < 0)
1461 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1462 else
1463 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1464
1465 } else if (!in_initrd()) {
1466 /*
1467 * Do a dummy very first call to seal the kernel's time warp magic.
1468 *
1469 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1470 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1471 * until we reach the real system.
1472 *
1473 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1474 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1475 * be treated as UTC that way.
1476 */
1477 (void) clock_reset_timewarp();
1478 }
1479
1480 r = clock_apply_epoch();
1481 if (r < 0)
1482 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1483 else if (r > 0)
1484 log_info("System time before build time, advancing clock.");
1485 }
1486
1487 static void initialize_coredump(bool skip_setup) {
1488
1489 if (getpid_cached() != 1)
1490 return;
1491
1492 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1493 * will process core dumps for system services by default. */
1494 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1495 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1496
1497 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1498 * until the systemd-coredump tool is enabled via sysctl. */
1499 if (!skip_setup)
1500 disable_coredumps();
1501 }
1502
1503 static void do_reexecute(
1504 int argc,
1505 char *argv[],
1506 const struct rlimit *saved_rlimit_nofile,
1507 const struct rlimit *saved_rlimit_memlock,
1508 FDSet *fds,
1509 const char *switch_root_dir,
1510 const char *switch_root_init,
1511 const char **ret_error_message) {
1512
1513 unsigned i, j, args_size;
1514 const char **args;
1515 int r;
1516
1517 assert(saved_rlimit_nofile);
1518 assert(saved_rlimit_memlock);
1519 assert(ret_error_message);
1520
1521 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1522 * we do that */
1523 watchdog_close(true);
1524
1525 /* Reset the RLIMIT_NOFILE to the kernel default, so that the new systemd can pass the kernel default to its
1526 * child processes */
1527
1528 if (saved_rlimit_nofile->rlim_cur > 0)
1529 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
1530 if (saved_rlimit_memlock->rlim_cur != (rlim_t) -1)
1531 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1532
1533 if (switch_root_dir) {
1534 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1535 * SIGCHLD for them after deserializing. */
1536 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
1537
1538 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1539 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1540 if (r < 0)
1541 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1542 }
1543
1544 args_size = MAX(6, argc+1);
1545 args = newa(const char*, args_size);
1546
1547 if (!switch_root_init) {
1548 char sfd[DECIMAL_STR_MAX(int) + 1];
1549
1550 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1551 * the user didn't specify an explicit init to spawn. */
1552
1553 assert(arg_serialization);
1554 assert(fds);
1555
1556 xsprintf(sfd, "%i", fileno(arg_serialization));
1557
1558 i = 0;
1559 args[i++] = SYSTEMD_BINARY_PATH;
1560 if (switch_root_dir)
1561 args[i++] = "--switched-root";
1562 args[i++] = arg_system ? "--system" : "--user";
1563 args[i++] = "--deserialize";
1564 args[i++] = sfd;
1565 args[i++] = NULL;
1566
1567 assert(i <= args_size);
1568
1569 /*
1570 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1571 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1572 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1573 * before proceeding into the exec().
1574 */
1575 valgrind_summary_hack();
1576
1577 (void) execv(args[0], (char* const*) args);
1578 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1579 }
1580
1581 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1582 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1583 * doesn't matter.) */
1584
1585 arg_serialization = safe_fclose(arg_serialization);
1586 fds = fdset_free(fds);
1587
1588 /* Reopen the console */
1589 (void) make_console_stdio();
1590
1591 for (j = 1, i = 1; j < (unsigned) argc; j++)
1592 args[i++] = argv[j];
1593 args[i++] = NULL;
1594 assert(i <= args_size);
1595
1596 /* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */
1597 (void) reset_all_signal_handlers();
1598 (void) reset_signal_mask();
1599
1600 if (switch_root_init) {
1601 args[0] = switch_root_init;
1602 (void) execv(args[0], (char* const*) args);
1603 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1604 }
1605
1606 args[0] = "/sbin/init";
1607 (void) execv(args[0], (char* const*) args);
1608 r = -errno;
1609
1610 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1611 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1612 "Failed to execute /sbin/init");
1613
1614 if (r == -ENOENT) {
1615 log_warning("No /sbin/init, trying fallback");
1616
1617 args[0] = "/bin/sh";
1618 args[1] = NULL;
1619 (void) execv(args[0], (char* const*) args);
1620 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1621 } else
1622 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1623
1624 *ret_error_message = "Failed to execute fallback shell";
1625 }
1626
1627 static int invoke_main_loop(
1628 Manager *m,
1629 bool *ret_reexecute,
1630 int *ret_retval, /* Return parameters relevant for shutting down */
1631 const char **ret_shutdown_verb, /* … */
1632 FDSet **ret_fds, /* Return parameters for reexecuting */
1633 char **ret_switch_root_dir, /* … */
1634 char **ret_switch_root_init, /* … */
1635 const char **ret_error_message) {
1636
1637 int r;
1638
1639 assert(m);
1640 assert(ret_reexecute);
1641 assert(ret_retval);
1642 assert(ret_shutdown_verb);
1643 assert(ret_fds);
1644 assert(ret_switch_root_dir);
1645 assert(ret_switch_root_init);
1646 assert(ret_error_message);
1647
1648 for (;;) {
1649 r = manager_loop(m);
1650 if (r < 0) {
1651 *ret_error_message = "Failed to run main loop";
1652 return log_emergency_errno(r, "Failed to run main loop: %m");
1653 }
1654
1655 switch (m->exit_code) {
1656
1657 case MANAGER_RELOAD:
1658 log_info("Reloading.");
1659
1660 r = parse_config_file();
1661 if (r < 0)
1662 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
1663
1664 set_manager_defaults(m);
1665
1666 r = manager_reload(m);
1667 if (r < 0)
1668 log_warning_errno(r, "Failed to reload, ignoring: %m");
1669
1670 break;
1671
1672 case MANAGER_REEXECUTE:
1673
1674 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1675 if (r < 0) {
1676 *ret_error_message = "Failed to prepare for reexecution";
1677 return r;
1678 }
1679
1680 log_notice("Reexecuting.");
1681
1682 *ret_reexecute = true;
1683 *ret_retval = EXIT_SUCCESS;
1684 *ret_shutdown_verb = NULL;
1685 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1686
1687 return 0;
1688
1689 case MANAGER_SWITCH_ROOT:
1690 if (!m->switch_root_init) {
1691 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1692 if (r < 0) {
1693 *ret_error_message = "Failed to prepare for reexecution";
1694 return r;
1695 }
1696 } else
1697 *ret_fds = NULL;
1698
1699 log_notice("Switching root.");
1700
1701 *ret_reexecute = true;
1702 *ret_retval = EXIT_SUCCESS;
1703 *ret_shutdown_verb = NULL;
1704
1705 /* Steal the switch root parameters */
1706 *ret_switch_root_dir = m->switch_root;
1707 *ret_switch_root_init = m->switch_root_init;
1708 m->switch_root = m->switch_root_init = NULL;
1709
1710 return 0;
1711
1712 case MANAGER_EXIT:
1713
1714 if (MANAGER_IS_USER(m)) {
1715 log_debug("Exit.");
1716
1717 *ret_reexecute = false;
1718 *ret_retval = m->return_value;
1719 *ret_shutdown_verb = NULL;
1720 *ret_fds = NULL;
1721 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1722
1723 return 0;
1724 }
1725
1726 _fallthrough_;
1727 case MANAGER_REBOOT:
1728 case MANAGER_POWEROFF:
1729 case MANAGER_HALT:
1730 case MANAGER_KEXEC: {
1731 static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1732 [MANAGER_EXIT] = "exit",
1733 [MANAGER_REBOOT] = "reboot",
1734 [MANAGER_POWEROFF] = "poweroff",
1735 [MANAGER_HALT] = "halt",
1736 [MANAGER_KEXEC] = "kexec"
1737 };
1738
1739 log_notice("Shutting down.");
1740
1741 *ret_reexecute = false;
1742 *ret_retval = m->return_value;
1743 assert_se(*ret_shutdown_verb = table[m->exit_code]);
1744 *ret_fds = NULL;
1745 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1746
1747 return 0;
1748 }
1749
1750 default:
1751 assert_not_reached("Unknown exit code.");
1752 }
1753 }
1754 }
1755
1756 static void log_execution_mode(bool *ret_first_boot) {
1757 assert(ret_first_boot);
1758
1759 if (arg_system) {
1760 int v;
1761
1762 log_info(PACKAGE_STRING " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
1763 arg_action == ACTION_TEST ? "test " : "" );
1764
1765 v = detect_virtualization();
1766 if (v > 0)
1767 log_info("Detected virtualization %s.", virtualization_to_string(v));
1768
1769 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
1770
1771 if (in_initrd()) {
1772 *ret_first_boot = false;
1773 log_info("Running in initial RAM disk.");
1774 } else {
1775 /* Let's check whether we are in first boot, i.e. whether /etc is still unpopulated. We use
1776 * /etc/machine-id as flag file, for this: if it exists we assume /etc is populated, if it
1777 * doesn't it's unpopulated. This allows container managers and installers to provision a
1778 * couple of files already. If the container manager wants to provision the machine ID itself
1779 * it should pass $container_uuid to PID 1. */
1780
1781 *ret_first_boot = access("/etc/machine-id", F_OK) < 0;
1782 if (*ret_first_boot)
1783 log_info("Running with unpopulated /etc.");
1784 }
1785 } else {
1786 if (DEBUG_LOGGING) {
1787 _cleanup_free_ char *t;
1788
1789 t = uid_to_name(getuid());
1790 log_debug(PACKAGE_STRING " running in %suser mode for user " UID_FMT "/%s. (" SYSTEMD_FEATURES ")",
1791 arg_action == ACTION_TEST ? " test" : "", getuid(), strna(t));
1792 }
1793
1794 *ret_first_boot = false;
1795 }
1796 }
1797
1798 static int initialize_runtime(
1799 bool skip_setup,
1800 struct rlimit *saved_rlimit_nofile,
1801 struct rlimit *saved_rlimit_memlock,
1802 const char **ret_error_message) {
1803
1804 int r;
1805
1806 assert(ret_error_message);
1807
1808 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
1809 *
1810 * - Some only apply to --system instances
1811 * - Some only apply to --user instances
1812 * - Some only apply when we first start up, but not when we reexecute
1813 */
1814
1815 if (arg_action != ACTION_RUN)
1816 return 0;
1817
1818 if (arg_system) {
1819 /* Make sure we leave a core dump without panicing the kernel. */
1820 install_crash_handler();
1821
1822 if (!skip_setup) {
1823 r = mount_cgroup_controllers(arg_join_controllers);
1824 if (r < 0) {
1825 *ret_error_message = "Failed to mount cgroup hierarchies";
1826 return r;
1827 }
1828
1829 status_welcome();
1830 hostname_setup();
1831 machine_id_setup(NULL, arg_machine_id, NULL);
1832 loopback_setup();
1833 bump_unix_max_dgram_qlen();
1834 test_usr();
1835 write_container_id();
1836 }
1837
1838 if (arg_watchdog_device) {
1839 r = watchdog_set_device(arg_watchdog_device);
1840 if (r < 0)
1841 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
1842 }
1843
1844 if (arg_runtime_watchdog > 0 && arg_runtime_watchdog != USEC_INFINITY)
1845 watchdog_set_timeout(&arg_runtime_watchdog);
1846 }
1847
1848 if (arg_timer_slack_nsec != NSEC_INFINITY)
1849 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1850 log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
1851
1852 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
1853 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
1854 if (r < 0) {
1855 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
1856 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
1857 }
1858
1859 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
1860 if (r < 0) {
1861 *ret_error_message = "Failed to drop capability bounding set";
1862 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
1863 }
1864 }
1865
1866 if (arg_system && arg_no_new_privs) {
1867 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1868 *ret_error_message = "Failed to disable new privileges";
1869 return log_emergency_errno(errno, "Failed to disable new privileges: %m");
1870 }
1871 }
1872
1873 if (arg_syscall_archs) {
1874 r = enforce_syscall_archs(arg_syscall_archs);
1875 if (r < 0) {
1876 *ret_error_message = "Failed to set syscall architectures";
1877 return r;
1878 }
1879 }
1880
1881 if (!arg_system)
1882 /* Become reaper of our children */
1883 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
1884 log_warning_errno(errno, "Failed to make us a subreaper: %m");
1885
1886 if (arg_system) {
1887 /* Bump up RLIMIT_NOFILE for systemd itself */
1888 (void) bump_rlimit_nofile(saved_rlimit_nofile);
1889 (void) bump_rlimit_memlock(saved_rlimit_memlock);
1890 }
1891
1892 return 0;
1893 }
1894
1895 static int do_queue_default_job(
1896 Manager *m,
1897 const char **ret_error_message) {
1898
1899 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
1900 Job *default_unit_job;
1901 Unit *target = NULL;
1902 int r;
1903
1904 log_debug("Activating default unit: %s", arg_default_unit);
1905
1906 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1907 if (r < 0)
1908 log_error("Failed to load default target: %s", bus_error_message(&error, r));
1909 else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND))
1910 log_error_errno(target->load_error, "Failed to load default target: %m");
1911 else if (target->load_state == UNIT_MASKED)
1912 log_error("Default target masked.");
1913
1914 if (!target || target->load_state != UNIT_LOADED) {
1915 log_info("Trying to load rescue target...");
1916
1917 r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1918 if (r < 0) {
1919 *ret_error_message = "Failed to load rescue target";
1920 return log_emergency_errno(r, "Failed to load rescue target: %s", bus_error_message(&error, r));
1921 } else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND)) {
1922 *ret_error_message = "Failed to load rescue target";
1923 return log_emergency_errno(target->load_error, "Failed to load rescue target: %m");
1924 } else if (target->load_state == UNIT_MASKED) {
1925 *ret_error_message = "Rescue target masked";
1926 log_emergency("Rescue target masked.");
1927 return -ERFKILL;
1928 }
1929 }
1930
1931 assert(target->load_state == UNIT_LOADED);
1932
1933 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, &error, &default_unit_job);
1934 if (r == -EPERM) {
1935 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1936
1937 sd_bus_error_free(&error);
1938
1939 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, &error, &default_unit_job);
1940 if (r < 0) {
1941 *ret_error_message = "Failed to start default target";
1942 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
1943 }
1944
1945 } else if (r < 0) {
1946 *ret_error_message = "Failed to isolate default target";
1947 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
1948 }
1949
1950 m->default_unit_job_id = default_unit_job->id;
1951
1952 return 0;
1953 }
1954
1955 static void free_arguments(void) {
1956 size_t j;
1957
1958 /* Frees all arg_* variables, with the exception of arg_serialization */
1959
1960 for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++)
1961 arg_default_rlimit[j] = mfree(arg_default_rlimit[j]);
1962
1963 arg_default_unit = mfree(arg_default_unit);
1964 arg_confirm_spawn = mfree(arg_confirm_spawn);
1965 arg_join_controllers = strv_free_free(arg_join_controllers);
1966 arg_default_environment = strv_free(arg_default_environment);
1967 arg_syscall_archs = set_free(arg_syscall_archs);
1968 }
1969
1970 static int load_configuration(int argc, char **argv, const char **ret_error_message) {
1971 int r;
1972
1973 assert(ret_error_message);
1974
1975 arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
1976
1977 r = parse_config_file();
1978 if (r < 0) {
1979 *ret_error_message = "Failed to parse config file";
1980 return r;
1981 }
1982
1983 if (arg_system) {
1984 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
1985 if (r < 0)
1986 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1987 }
1988
1989 /* Note that this also parses bits from the kernel command line, including "debug". */
1990 log_parse_environment();
1991
1992 r = parse_argv(argc, argv);
1993 if (r < 0) {
1994 *ret_error_message = "Failed to parse commandline arguments";
1995 return r;
1996 }
1997
1998 /* Initialize default unit */
1999 if (!arg_default_unit) {
2000 arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
2001 if (!arg_default_unit) {
2002 *ret_error_message = "Failed to set default unit";
2003 return log_oom();
2004 }
2005 }
2006
2007 /* Initialize the show status setting if it hasn't been set explicitly yet */
2008 if (arg_show_status == _SHOW_STATUS_UNSET)
2009 arg_show_status = SHOW_STATUS_YES;
2010
2011 return 0;
2012 }
2013
2014 static int safety_checks(void) {
2015
2016 if (getpid_cached() == 1 &&
2017 arg_action != ACTION_RUN) {
2018 log_error("Unsupported execution mode while PID 1.");
2019 return -EPERM;
2020 }
2021
2022 if (getpid_cached() == 1 &&
2023 !arg_system) {
2024 log_error("Can't run --user mode as PID 1.");
2025 return -EPERM;
2026 }
2027
2028 if (arg_action == ACTION_RUN &&
2029 arg_system &&
2030 getpid_cached() != 1) {
2031 log_error("Can't run system mode unless PID 1.");
2032 return -EPERM;
2033 }
2034
2035 if (arg_action == ACTION_TEST &&
2036 geteuid() == 0) {
2037 log_error("Don't run test mode as root.");
2038 return -EPERM;
2039 }
2040
2041 if (!arg_system &&
2042 arg_action == ACTION_RUN &&
2043 sd_booted() <= 0) {
2044 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
2045 return -EOPNOTSUPP;
2046 }
2047
2048 if (!arg_system &&
2049 arg_action == ACTION_RUN &&
2050 !getenv("XDG_RUNTIME_DIR")) {
2051 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2052 return -EUNATCH;
2053 }
2054
2055 if (arg_system &&
2056 arg_action == ACTION_RUN &&
2057 running_in_chroot() > 0) {
2058 log_error("Cannot be run in a chroot() environment.");
2059 return -EOPNOTSUPP;
2060 }
2061
2062 return 0;
2063 }
2064
2065 static int initialize_security(
2066 bool *loaded_policy,
2067 dual_timestamp *security_start_timestamp,
2068 dual_timestamp *security_finish_timestamp,
2069 const char **ret_error_message) {
2070
2071 int r;
2072
2073 assert(loaded_policy);
2074 assert(security_start_timestamp);
2075 assert(security_finish_timestamp);
2076 assert(ret_error_message);
2077
2078 dual_timestamp_get(security_start_timestamp);
2079
2080 r = mac_selinux_setup(loaded_policy);
2081 if (r < 0) {
2082 *ret_error_message = "Failed to load SELinux policy";
2083 return r;
2084 }
2085
2086 r = mac_smack_setup(loaded_policy);
2087 if (r < 0) {
2088 *ret_error_message = "Failed to load SMACK policy";
2089 return r;
2090 }
2091
2092 r = ima_setup();
2093 if (r < 0) {
2094 *ret_error_message = "Failed to load IMA policy";
2095 return r;
2096 }
2097
2098 dual_timestamp_get(security_finish_timestamp);
2099 return 0;
2100 }
2101
2102 static void test_summary(Manager *m) {
2103 assert(m);
2104
2105 printf("-> By units:\n");
2106 manager_dump_units(m, stdout, "\t");
2107
2108 printf("-> By jobs:\n");
2109 manager_dump_jobs(m, stdout, "\t");
2110 }
2111
2112 static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2113 int r;
2114
2115 assert(ret_fds);
2116 assert(ret_error_message);
2117
2118 r = fdset_new_fill(ret_fds);
2119 if (r < 0) {
2120 *ret_error_message = "Failed to allocate fd set";
2121 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2122 }
2123
2124 fdset_cloexec(*ret_fds, true);
2125
2126 if (arg_serialization)
2127 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2128
2129 return 0;
2130 }
2131
2132 static void setup_console_terminal(bool skip_setup) {
2133
2134 if (!arg_system)
2135 return;
2136
2137 /* Become a session leader if we aren't one yet. */
2138 (void) setsid();
2139
2140 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2141 * tty. */
2142 (void) release_terminal();
2143
2144 /* Reset the console, but only if this is really init and we are freshly booted */
2145 if (getpid_cached() == 1 && !skip_setup)
2146 (void) console_setup();
2147 }
2148
2149 static bool early_skip_setup_check(int argc, char *argv[]) {
2150 bool found_deserialize = false;
2151 int i;
2152
2153 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2154 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2155 * anyway, even if in that case we also do deserialization. */
2156
2157 for (i = 1; i < argc; i++) {
2158
2159 if (streq(argv[i], "--switched-root"))
2160 return false; /* If we switched root, don't skip the setup. */
2161 else if (streq(argv[i], "--deserialize"))
2162 found_deserialize = true;
2163 }
2164
2165 return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2166 }
2167
2168 int main(int argc, char *argv[]) {
2169
2170 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2171 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
2172 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
2173 bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2174 char *switch_root_dir = NULL, *switch_root_init = NULL;
2175 usec_t before_startup, after_startup;
2176 static char systemd[] = "systemd";
2177 char timespan[FORMAT_TIMESPAN_MAX];
2178 const char *shutdown_verb = NULL, *error_message = NULL;
2179 int r, retval = EXIT_FAILURE;
2180 Manager *m = NULL;
2181 FDSet *fds = NULL;
2182
2183 /* SysV compatibility: redirect init → telinit */
2184 redirect_telinit(argc, argv);
2185
2186 /* Take timestamps early on */
2187 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2188 dual_timestamp_get(&userspace_timestamp);
2189
2190 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2191 * reexecuting */
2192 skip_setup = early_skip_setup_check(argc, argv);
2193
2194 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2195 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
2196 program_invocation_short_name = systemd;
2197 (void) prctl(PR_SET_NAME, systemd);
2198
2199 /* Save the original command line */
2200 saved_argv = argv;
2201 saved_argc = argc;
2202
2203 /* Make sure that if the user says "syslog" we actually log to the journal. */
2204 log_set_upgrade_syslog_to_journal(true);
2205
2206 if (getpid_cached() == 1) {
2207 /* Disable the umask logic */
2208 umask(0);
2209
2210 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be activated
2211 * yet (even though the log socket for it exists). */
2212 log_set_prohibit_ipc(true);
2213
2214 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2215 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2216 * child process right before execve()'ing the actual binary, at a point in time where socket
2217 * activation stderr/stdout area already set up. */
2218 log_set_always_reopen_console(true);
2219 }
2220
2221 if (getpid_cached() == 1 && detect_container() <= 0) {
2222
2223 /* Running outside of a container as PID 1 */
2224 arg_system = true;
2225 log_set_target(LOG_TARGET_KMSG);
2226 log_open();
2227
2228 if (in_initrd())
2229 initrd_timestamp = userspace_timestamp;
2230
2231 if (!skip_setup) {
2232 r = mount_setup_early();
2233 if (r < 0) {
2234 error_message = "Failed to mount early API filesystems";
2235 goto finish;
2236 }
2237
2238 r = initialize_security(
2239 &loaded_policy,
2240 &security_start_timestamp,
2241 &security_finish_timestamp,
2242 &error_message);
2243 if (r < 0)
2244 goto finish;
2245 }
2246
2247 if (mac_selinux_init() < 0) {
2248 error_message = "Failed to initialize SELinux policy";
2249 goto finish;
2250 }
2251
2252 if (!skip_setup)
2253 initialize_clock();
2254
2255 /* Set the default for later on, but don't actually
2256 * open the logs like this for now. Note that if we
2257 * are transitioning from the initrd there might still
2258 * be journal fd open, and we shouldn't attempt
2259 * opening that before we parsed /proc/cmdline which
2260 * might redirect output elsewhere. */
2261 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2262
2263 } else if (getpid_cached() == 1) {
2264 /* Running inside a container, as PID 1 */
2265 arg_system = true;
2266 log_set_target(LOG_TARGET_CONSOLE);
2267 log_open();
2268
2269 /* For later on, see above... */
2270 log_set_target(LOG_TARGET_JOURNAL);
2271
2272 /* clear the kernel timestamp,
2273 * because we are in a container */
2274 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2275 } else {
2276 /* Running as user instance */
2277 arg_system = false;
2278 log_set_target(LOG_TARGET_AUTO);
2279 log_open();
2280
2281 /* clear the kernel timestamp,
2282 * because we are not PID 1 */
2283 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2284 }
2285
2286 initialize_coredump(skip_setup);
2287
2288 r = fixup_environment();
2289 if (r < 0) {
2290 log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2291 error_message = "Failed to fix up PID1 environment";
2292 goto finish;
2293 }
2294
2295 if (arg_system) {
2296
2297 /* Try to figure out if we can use colors with the console. No
2298 * need to do that for user instances since they never log
2299 * into the console. */
2300 log_show_color(colors_enabled());
2301 r = make_null_stdio();
2302 if (r < 0)
2303 log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
2304 }
2305
2306 /* Mount /proc, /sys and friends, so that /proc/cmdline and
2307 * /proc/$PID/fd is available. */
2308 if (getpid_cached() == 1) {
2309
2310 /* Load the kernel modules early. */
2311 if (!skip_setup)
2312 kmod_setup();
2313
2314 r = mount_setup(loaded_policy);
2315 if (r < 0) {
2316 error_message = "Failed to mount API filesystems";
2317 goto finish;
2318 }
2319 }
2320
2321 /* Reset all signal handlers. */
2322 (void) reset_all_signal_handlers();
2323 (void) ignore_signals(SIGNALS_IGNORE, -1);
2324
2325 r = load_configuration(argc, argv, &error_message);
2326 if (r < 0)
2327 goto finish;
2328
2329 r = safety_checks();
2330 if (r < 0)
2331 goto finish;
2332
2333 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS))
2334 (void) pager_open(arg_no_pager, false);
2335
2336 if (arg_action != ACTION_RUN)
2337 skip_setup = true;
2338
2339 if (arg_action == ACTION_HELP) {
2340 retval = help();
2341 goto finish;
2342 } else if (arg_action == ACTION_VERSION) {
2343 retval = version();
2344 goto finish;
2345 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
2346 unit_dump_config_items(stdout);
2347 retval = EXIT_SUCCESS;
2348 goto finish;
2349 }
2350
2351 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
2352
2353 /* Move out of the way, so that we won't block unmounts */
2354 assert_se(chdir("/") == 0);
2355
2356 if (arg_action == ACTION_RUN) {
2357
2358 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
2359 log_close();
2360
2361 /* Remember open file descriptors for later deserialization */
2362 r = collect_fds(&fds, &error_message);
2363 if (r < 0)
2364 goto finish;
2365
2366 /* Give up any control of the console, but make sure its initialized. */
2367 setup_console_terminal(skip_setup);
2368
2369 /* Open the logging devices, if possible and necessary */
2370 log_open();
2371 }
2372
2373 log_execution_mode(&first_boot);
2374
2375 r = initialize_runtime(skip_setup,
2376 &saved_rlimit_nofile,
2377 &saved_rlimit_memlock,
2378 &error_message);
2379 if (r < 0)
2380 goto finish;
2381
2382 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2383 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2384 &m);
2385 if (r < 0) {
2386 log_emergency_errno(r, "Failed to allocate manager object: %m");
2387 error_message = "Failed to allocate manager object";
2388 goto finish;
2389 }
2390
2391 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2392 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2393 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
2394 m->timestamps[MANAGER_TIMESTAMP_SECURITY_START] = security_start_timestamp;
2395 m->timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH] = security_finish_timestamp;
2396
2397 set_manager_defaults(m);
2398 set_manager_settings(m);
2399 manager_set_first_boot(m, first_boot);
2400
2401 /* Remember whether we should queue the default job */
2402 queue_default_job = !arg_serialization || arg_switched_root;
2403
2404 before_startup = now(CLOCK_MONOTONIC);
2405
2406 r = manager_startup(m, arg_serialization, fds);
2407 if (r < 0) {
2408 log_error_errno(r, "Failed to fully start up daemon: %m");
2409 error_message = "Failed to start up manager";
2410 goto finish;
2411 }
2412
2413 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2414 fds = fdset_free(fds);
2415 arg_serialization = safe_fclose(arg_serialization);
2416
2417 if (queue_default_job) {
2418 r = do_queue_default_job(m, &error_message);
2419 if (r < 0)
2420 goto finish;
2421 }
2422
2423 after_startup = now(CLOCK_MONOTONIC);
2424
2425 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2426 "Loaded units and determined initial transaction in %s.",
2427 format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
2428
2429 if (arg_action == ACTION_TEST) {
2430 test_summary(m);
2431 retval = EXIT_SUCCESS;
2432 goto finish;
2433 }
2434
2435 (void) invoke_main_loop(m,
2436 &reexecute,
2437 &retval,
2438 &shutdown_verb,
2439 &fds,
2440 &switch_root_dir,
2441 &switch_root_init,
2442 &error_message);
2443
2444 finish:
2445 pager_close();
2446
2447 if (m)
2448 arg_shutdown_watchdog = m->shutdown_watchdog;
2449
2450 m = manager_free(m);
2451
2452 free_arguments();
2453 mac_selinux_finish();
2454
2455 if (reexecute)
2456 do_reexecute(argc, argv,
2457 &saved_rlimit_nofile,
2458 &saved_rlimit_memlock,
2459 fds,
2460 switch_root_dir,
2461 switch_root_init,
2462 &error_message); /* This only returns if reexecution failed */
2463
2464 arg_serialization = safe_fclose(arg_serialization);
2465 fds = fdset_free(fds);
2466
2467 #if HAVE_VALGRIND_VALGRIND_H
2468 /* If we are PID 1 and running under valgrind, then let's exit
2469 * here explicitly. valgrind will only generate nice output on
2470 * exit(), not on exec(), hence let's do the former not the
2471 * latter here. */
2472 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2473 /* Cleanup watchdog_device strings for valgrind. We need them
2474 * in become_shutdown() so normally we cannot free them yet. */
2475 watchdog_free_device();
2476 arg_watchdog_device = mfree(arg_watchdog_device);
2477 return retval;
2478 }
2479 #endif
2480
2481 if (shutdown_verb) {
2482 r = become_shutdown(shutdown_verb, retval);
2483
2484 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
2485 error_message = "Failed to execute shutdown binary";
2486 }
2487
2488 watchdog_free_device();
2489 arg_watchdog_device = mfree(arg_watchdog_device);
2490
2491 if (getpid_cached() == 1) {
2492 if (error_message)
2493 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2494 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
2495 "%s, freezing.", error_message);
2496 freeze_or_reboot();
2497 }
2498
2499 return retval;
2500 }