]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/main.c
Merge pull request #7591 from poettering/retry-on-servfail
[thirdparty/systemd.git] / src / core / main.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <getopt.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <sys/mount.h>
28 #include <sys/prctl.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32 #if HAVE_SECCOMP
33 #include <seccomp.h>
34 #endif
35 #if HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-bus.h"
40 #include "sd-daemon.h"
41
42 #include "alloc-util.h"
43 #include "architecture.h"
44 #include "build.h"
45 #include "bus-error.h"
46 #include "bus-util.h"
47 #include "capability-util.h"
48 #include "clock-util.h"
49 #include "conf-parser.h"
50 #include "cpu-set-util.h"
51 #include "dbus-manager.h"
52 #include "def.h"
53 #include "emergency-action.h"
54 #include "env-util.h"
55 #include "fd-util.h"
56 #include "fdset.h"
57 #include "fileio.h"
58 #include "format-util.h"
59 #include "fs-util.h"
60 #include "hostname-setup.h"
61 #include "ima-setup.h"
62 #include "killall.h"
63 #include "kmod-setup.h"
64 #include "load-fragment.h"
65 #include "log.h"
66 #include "loopback-setup.h"
67 #include "machine-id-setup.h"
68 #include "manager.h"
69 #include "missing.h"
70 #include "mount-setup.h"
71 #include "pager.h"
72 #include "parse-util.h"
73 #include "path-util.h"
74 #include "proc-cmdline.h"
75 #include "process-util.h"
76 #include "raw-clone.h"
77 #include "rlimit-util.h"
78 #if HAVE_SECCOMP
79 #include "seccomp-util.h"
80 #endif
81 #include "selinux-setup.h"
82 #include "selinux-util.h"
83 #include "signal-util.h"
84 #include "smack-setup.h"
85 #include "special.h"
86 #include "stat-util.h"
87 #include "stdio-util.h"
88 #include "strv.h"
89 #include "switch-root.h"
90 #include "terminal-util.h"
91 #include "umask-util.h"
92 #include "user-util.h"
93 #include "virt.h"
94 #include "watchdog.h"
95
96 static enum {
97 ACTION_RUN,
98 ACTION_HELP,
99 ACTION_VERSION,
100 ACTION_TEST,
101 ACTION_DUMP_CONFIGURATION_ITEMS
102 } arg_action = ACTION_RUN;
103 static char *arg_default_unit = NULL;
104 static bool arg_system = false;
105 static bool arg_dump_core = true;
106 static int arg_crash_chvt = -1;
107 static bool arg_crash_shell = false;
108 static bool arg_crash_reboot = false;
109 static char *arg_confirm_spawn = NULL;
110 static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
111 static bool arg_switched_root = false;
112 static bool arg_no_pager = false;
113 static char ***arg_join_controllers = NULL;
114 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
115 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
116 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
117 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
118 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
119 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
120 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
121 static usec_t arg_runtime_watchdog = 0;
122 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
123 static char *arg_watchdog_device = NULL;
124 static char **arg_default_environment = NULL;
125 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
126 static uint64_t arg_capability_bounding_set = CAP_ALL;
127 static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
128 static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
129 static Set* arg_syscall_archs = NULL;
130 static FILE* arg_serialization = NULL;
131 static bool arg_default_cpu_accounting = false;
132 static bool arg_default_io_accounting = false;
133 static bool arg_default_ip_accounting = false;
134 static bool arg_default_blockio_accounting = false;
135 static bool arg_default_memory_accounting = false;
136 static bool arg_default_tasks_accounting = true;
137 static uint64_t arg_default_tasks_max = UINT64_MAX;
138 static sd_id128_t arg_machine_id = {};
139 static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
140
141 noreturn static void freeze_or_reboot(void) {
142
143 if (arg_crash_reboot) {
144 log_notice("Rebooting in 10s...");
145 (void) sleep(10);
146
147 log_notice("Rebooting now...");
148 (void) reboot(RB_AUTOBOOT);
149 log_emergency_errno(errno, "Failed to reboot: %m");
150 }
151
152 log_emergency("Freezing execution.");
153 freeze();
154 }
155
156 noreturn static void crash(int sig) {
157 struct sigaction sa;
158 pid_t pid;
159
160 if (getpid_cached() != 1)
161 /* Pass this on immediately, if this is not PID 1 */
162 (void) raise(sig);
163 else if (!arg_dump_core)
164 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
165 else {
166 sa = (struct sigaction) {
167 .sa_handler = nop_signal_handler,
168 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
169 };
170
171 /* We want to wait for the core process, hence let's enable SIGCHLD */
172 (void) sigaction(SIGCHLD, &sa, NULL);
173
174 pid = raw_clone(SIGCHLD);
175 if (pid < 0)
176 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
177 else if (pid == 0) {
178 /* Enable default signal handler for core dump */
179
180 sa = (struct sigaction) {
181 .sa_handler = SIG_DFL,
182 };
183 (void) sigaction(sig, &sa, NULL);
184
185 /* Don't limit the coredump size */
186 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
187
188 /* Just to be sure... */
189 (void) chdir("/");
190
191 /* Raise the signal again */
192 pid = raw_getpid();
193 (void) kill(pid, sig); /* raise() would kill the parent */
194
195 assert_not_reached("We shouldn't be here...");
196 _exit(EXIT_FAILURE);
197 } else {
198 siginfo_t status;
199 int r;
200
201 /* Order things nicely. */
202 r = wait_for_terminate(pid, &status);
203 if (r < 0)
204 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
205 else if (status.si_code != CLD_DUMPED)
206 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
207 signal_to_string(sig),
208 pid, sigchld_code_to_string(status.si_code),
209 status.si_status,
210 strna(status.si_code == CLD_EXITED
211 ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
212 : signal_to_string(status.si_status)));
213 else
214 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
215 }
216 }
217
218 if (arg_crash_chvt >= 0)
219 (void) chvt(arg_crash_chvt);
220
221 sa = (struct sigaction) {
222 .sa_handler = SIG_IGN,
223 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
224 };
225
226 /* Let the kernel reap children for us */
227 (void) sigaction(SIGCHLD, &sa, NULL);
228
229 if (arg_crash_shell) {
230 log_notice("Executing crash shell in 10s...");
231 (void) sleep(10);
232
233 pid = raw_clone(SIGCHLD);
234 if (pid < 0)
235 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
236 else if (pid == 0) {
237 (void) setsid();
238 (void) make_console_stdio();
239 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
240
241 log_emergency_errno(errno, "execle() failed: %m");
242 _exit(EXIT_FAILURE);
243 } else {
244 log_info("Spawned crash shell as PID "PID_FMT".", pid);
245 (void) wait_for_terminate(pid, NULL);
246 }
247 }
248
249 freeze_or_reboot();
250 }
251
252 static void install_crash_handler(void) {
253 static const struct sigaction sa = {
254 .sa_handler = crash,
255 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
256 };
257 int r;
258
259 /* We ignore the return value here, since, we don't mind if we
260 * cannot set up a crash handler */
261 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
262 if (r < 0)
263 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
264 }
265
266 static int console_setup(void) {
267 _cleanup_close_ int tty_fd = -1;
268 int r;
269
270 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
271 if (tty_fd < 0)
272 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
273
274 /* We don't want to force text mode. plymouth may be showing
275 * pictures already from initrd. */
276 r = reset_terminal_fd(tty_fd, false);
277 if (r < 0)
278 return log_error_errno(r, "Failed to reset /dev/console: %m");
279
280 return 0;
281 }
282
283 static int parse_crash_chvt(const char *value) {
284 int b;
285
286 if (safe_atoi(value, &arg_crash_chvt) >= 0)
287 return 0;
288
289 b = parse_boolean(value);
290 if (b < 0)
291 return b;
292
293 if (b > 0)
294 arg_crash_chvt = 0; /* switch to where kmsg goes */
295 else
296 arg_crash_chvt = -1; /* turn off switching */
297
298 return 0;
299 }
300
301 static int parse_confirm_spawn(const char *value, char **console) {
302 char *s;
303 int r;
304
305 r = value ? parse_boolean(value) : 1;
306 if (r == 0) {
307 *console = NULL;
308 return 0;
309 }
310
311 if (r > 0) /* on with default tty */
312 s = strdup("/dev/console");
313 else if (is_path(value)) /* on with fully qualified path */
314 s = strdup(value);
315 else /* on with only a tty file name, not a fully qualified path */
316 s = strjoin("/dev/", value);
317 if (!s)
318 return -ENOMEM;
319 *console = s;
320 return 0;
321 }
322
323 static int set_machine_id(const char *m) {
324 sd_id128_t t;
325 assert(m);
326
327 if (sd_id128_from_string(m, &t) < 0)
328 return -EINVAL;
329
330 if (sd_id128_is_null(t))
331 return -EINVAL;
332
333 arg_machine_id = t;
334 return 0;
335 }
336
337 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
338
339 int r;
340
341 assert(key);
342
343 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
344
345 if (proc_cmdline_value_missing(key, value))
346 return 0;
347
348 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
349 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
350 else if (in_initrd() == !!startswith(key, "rd.")) {
351 if (free_and_strdup(&arg_default_unit, value) < 0)
352 return log_oom();
353 }
354
355 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
356
357 r = value ? parse_boolean(value) : true;
358 if (r < 0)
359 log_warning("Failed to parse dump core switch %s. Ignoring.", value);
360 else
361 arg_dump_core = r;
362
363 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
364
365 if (!value)
366 arg_crash_chvt = 0; /* turn on */
367 else if (parse_crash_chvt(value) < 0)
368 log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
369
370 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
371
372 r = value ? parse_boolean(value) : true;
373 if (r < 0)
374 log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
375 else
376 arg_crash_shell = r;
377
378 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
379
380 r = value ? parse_boolean(value) : true;
381 if (r < 0)
382 log_warning("Failed to parse crash reboot switch %s. Ignoring.", value);
383 else
384 arg_crash_reboot = r;
385
386 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
387 char *s;
388
389 r = parse_confirm_spawn(value, &s);
390 if (r < 0)
391 log_warning_errno(r, "Failed to parse confirm_spawn switch %s. Ignoring.", value);
392 else {
393 free(arg_confirm_spawn);
394 arg_confirm_spawn = s;
395 }
396
397 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
398
399 if (value) {
400 r = parse_show_status(value, &arg_show_status);
401 if (r < 0)
402 log_warning("Failed to parse show status switch %s. Ignoring.", value);
403 } else
404 arg_show_status = SHOW_STATUS_YES;
405
406 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
407
408 if (proc_cmdline_value_missing(key, value))
409 return 0;
410
411 r = exec_output_from_string(value);
412 if (r < 0)
413 log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
414 else
415 arg_default_std_output = r;
416
417 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
418
419 if (proc_cmdline_value_missing(key, value))
420 return 0;
421
422 r = exec_output_from_string(value);
423 if (r < 0)
424 log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
425 else
426 arg_default_std_error = r;
427
428 } else if (streq(key, "systemd.setenv")) {
429
430 if (proc_cmdline_value_missing(key, value))
431 return 0;
432
433 if (env_assignment_is_valid(value)) {
434 char **env;
435
436 env = strv_env_set(arg_default_environment, value);
437 if (!env)
438 return log_oom();
439
440 arg_default_environment = env;
441 } else
442 log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
443
444 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
445
446 if (proc_cmdline_value_missing(key, value))
447 return 0;
448
449 r = set_machine_id(value);
450 if (r < 0)
451 log_warning("MachineID '%s' is not valid. Ignoring.", value);
452
453 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
454
455 if (proc_cmdline_value_missing(key, value))
456 return 0;
457
458 r = parse_sec(value, &arg_default_timeout_start_usec);
459 if (r < 0)
460 log_warning_errno(r, "Failed to parse default start timeout: %s, ignoring.", value);
461
462 if (arg_default_timeout_start_usec <= 0)
463 arg_default_timeout_start_usec = USEC_INFINITY;
464
465 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
466
467 if (proc_cmdline_value_missing(key, value))
468 return 0;
469
470 parse_path_argument_and_warn(value, false, &arg_watchdog_device);
471
472 } else if (streq(key, "quiet") && !value) {
473
474 if (arg_show_status == _SHOW_STATUS_UNSET)
475 arg_show_status = SHOW_STATUS_AUTO;
476
477 } else if (streq(key, "debug") && !value) {
478
479 /* Note that log_parse_environment() handles 'debug'
480 * too, and sets the log level to LOG_DEBUG. */
481
482 if (detect_container() > 0)
483 log_set_target(LOG_TARGET_CONSOLE);
484
485 } else if (!value) {
486 const char *target;
487
488 /* SysV compatibility */
489 target = runlevel_to_target(key);
490 if (target)
491 return free_and_strdup(&arg_default_unit, target);
492 }
493
494 return 0;
495 }
496
497 #define DEFINE_SETTER(name, func, descr) \
498 static int name(const char *unit, \
499 const char *filename, \
500 unsigned line, \
501 const char *section, \
502 unsigned section_line, \
503 const char *lvalue, \
504 int ltype, \
505 const char *rvalue, \
506 void *data, \
507 void *userdata) { \
508 \
509 int r; \
510 \
511 assert(filename); \
512 assert(lvalue); \
513 assert(rvalue); \
514 \
515 r = func(rvalue); \
516 if (r < 0) \
517 log_syntax(unit, LOG_ERR, filename, line, r, \
518 "Invalid " descr "'%s': %m", \
519 rvalue); \
520 \
521 return 0; \
522 }
523
524 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
525 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
526 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
527 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
528
529 static int config_parse_cpu_affinity2(
530 const char *unit,
531 const char *filename,
532 unsigned line,
533 const char *section,
534 unsigned section_line,
535 const char *lvalue,
536 int ltype,
537 const char *rvalue,
538 void *data,
539 void *userdata) {
540
541 _cleanup_cpu_free_ cpu_set_t *c = NULL;
542 int ncpus;
543
544 ncpus = parse_cpu_set_and_warn(rvalue, &c, unit, filename, line, lvalue);
545 if (ncpus < 0)
546 return ncpus;
547
548 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
549 log_warning_errno(errno, "Failed to set CPU affinity: %m");
550
551 return 0;
552 }
553
554 static int config_parse_show_status(
555 const char* unit,
556 const char *filename,
557 unsigned line,
558 const char *section,
559 unsigned section_line,
560 const char *lvalue,
561 int ltype,
562 const char *rvalue,
563 void *data,
564 void *userdata) {
565
566 int k;
567 ShowStatus *b = data;
568
569 assert(filename);
570 assert(lvalue);
571 assert(rvalue);
572 assert(data);
573
574 k = parse_show_status(rvalue, b);
575 if (k < 0) {
576 log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse show status setting, ignoring: %s", rvalue);
577 return 0;
578 }
579
580 return 0;
581 }
582
583 static int config_parse_output_restricted(
584 const char* unit,
585 const char *filename,
586 unsigned line,
587 const char *section,
588 unsigned section_line,
589 const char *lvalue,
590 int ltype,
591 const char *rvalue,
592 void *data,
593 void *userdata) {
594
595 ExecOutput t, *eo = data;
596
597 assert(filename);
598 assert(lvalue);
599 assert(rvalue);
600 assert(data);
601
602 t = exec_output_from_string(rvalue);
603 if (t < 0) {
604 log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output type, ignoring: %s", rvalue);
605 return 0;
606 }
607
608 if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE)) {
609 log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file: are not supported as defaults, ignoring: %s", rvalue);
610 return 0;
611 }
612
613 *eo = t;
614 return 0;
615 }
616
617 static int config_parse_crash_chvt(
618 const char* unit,
619 const char *filename,
620 unsigned line,
621 const char *section,
622 unsigned section_line,
623 const char *lvalue,
624 int ltype,
625 const char *rvalue,
626 void *data,
627 void *userdata) {
628
629 int r;
630
631 assert(filename);
632 assert(lvalue);
633 assert(rvalue);
634
635 r = parse_crash_chvt(rvalue);
636 if (r < 0) {
637 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CrashChangeVT= setting, ignoring: %s", rvalue);
638 return 0;
639 }
640
641 return 0;
642 }
643
644 static int config_parse_join_controllers(const char *unit,
645 const char *filename,
646 unsigned line,
647 const char *section,
648 unsigned section_line,
649 const char *lvalue,
650 int ltype,
651 const char *rvalue,
652 void *data,
653 void *userdata) {
654
655 const char *whole_rvalue = rvalue;
656 unsigned n = 0;
657
658 assert(filename);
659 assert(lvalue);
660 assert(rvalue);
661
662 arg_join_controllers = strv_free_free(arg_join_controllers);
663
664 for (;;) {
665 _cleanup_free_ char *word = NULL;
666 char **l;
667 int r;
668
669 r = extract_first_word(&rvalue, &word, NULL, EXTRACT_QUOTES);
670 if (r < 0) {
671 log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, whole_rvalue);
672 return r;
673 }
674 if (r == 0)
675 break;
676
677 l = strv_split(word, ",");
678 if (!l)
679 return log_oom();
680 strv_uniq(l);
681
682 if (strv_length(l) <= 1) {
683 strv_free(l);
684 continue;
685 }
686
687 if (!arg_join_controllers) {
688 arg_join_controllers = new(char**, 2);
689 if (!arg_join_controllers) {
690 strv_free(l);
691 return log_oom();
692 }
693
694 arg_join_controllers[0] = l;
695 arg_join_controllers[1] = NULL;
696
697 n = 1;
698 } else {
699 char ***a;
700 char ***t;
701
702 t = new0(char**, n+2);
703 if (!t) {
704 strv_free(l);
705 return log_oom();
706 }
707
708 n = 0;
709
710 for (a = arg_join_controllers; *a; a++) {
711
712 if (strv_overlap(*a, l)) {
713 if (strv_extend_strv(&l, *a, false) < 0) {
714 strv_free(l);
715 strv_free_free(t);
716 return log_oom();
717 }
718
719 } else {
720 char **c;
721
722 c = strv_copy(*a);
723 if (!c) {
724 strv_free(l);
725 strv_free_free(t);
726 return log_oom();
727 }
728
729 t[n++] = c;
730 }
731 }
732
733 t[n++] = strv_uniq(l);
734
735 strv_free_free(arg_join_controllers);
736 arg_join_controllers = t;
737 }
738 }
739 if (!isempty(rvalue))
740 log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
741
742 return 0;
743 }
744
745 static int parse_config_file(void) {
746
747 const ConfigTableItem items[] = {
748 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
749 { "Manager", "LogTarget", config_parse_target, 0, NULL },
750 { "Manager", "LogColor", config_parse_color, 0, NULL },
751 { "Manager", "LogLocation", config_parse_location, 0, NULL },
752 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
753 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, NULL },
754 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, NULL },
755 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
756 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
757 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
758 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL },
759 { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers },
760 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
761 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog },
762 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
763 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
764 #if HAVE_SECCOMP
765 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
766 #endif
767 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
768 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
769 { "Manager", "DefaultStandardOutput", config_parse_output_restricted,0, &arg_default_std_output },
770 { "Manager", "DefaultStandardError", config_parse_output_restricted,0, &arg_default_std_error },
771 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
772 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
773 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
774 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
775 { "Manager", "DefaultStartLimitIntervalSec",config_parse_sec, 0, &arg_default_start_limit_interval },
776 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
777 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
778 { "Manager", "DefaultLimitCPU", config_parse_limit, RLIMIT_CPU, arg_default_rlimit },
779 { "Manager", "DefaultLimitFSIZE", config_parse_limit, RLIMIT_FSIZE, arg_default_rlimit },
780 { "Manager", "DefaultLimitDATA", config_parse_limit, RLIMIT_DATA, arg_default_rlimit },
781 { "Manager", "DefaultLimitSTACK", config_parse_limit, RLIMIT_STACK, arg_default_rlimit },
782 { "Manager", "DefaultLimitCORE", config_parse_limit, RLIMIT_CORE, arg_default_rlimit },
783 { "Manager", "DefaultLimitRSS", config_parse_limit, RLIMIT_RSS, arg_default_rlimit },
784 { "Manager", "DefaultLimitNOFILE", config_parse_limit, RLIMIT_NOFILE, arg_default_rlimit },
785 { "Manager", "DefaultLimitAS", config_parse_limit, RLIMIT_AS, arg_default_rlimit },
786 { "Manager", "DefaultLimitNPROC", config_parse_limit, RLIMIT_NPROC, arg_default_rlimit },
787 { "Manager", "DefaultLimitMEMLOCK", config_parse_limit, RLIMIT_MEMLOCK, arg_default_rlimit },
788 { "Manager", "DefaultLimitLOCKS", config_parse_limit, RLIMIT_LOCKS, arg_default_rlimit },
789 { "Manager", "DefaultLimitSIGPENDING", config_parse_limit, RLIMIT_SIGPENDING, arg_default_rlimit },
790 { "Manager", "DefaultLimitMSGQUEUE", config_parse_limit, RLIMIT_MSGQUEUE, arg_default_rlimit },
791 { "Manager", "DefaultLimitNICE", config_parse_limit, RLIMIT_NICE, arg_default_rlimit },
792 { "Manager", "DefaultLimitRTPRIO", config_parse_limit, RLIMIT_RTPRIO, arg_default_rlimit },
793 { "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit },
794 { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
795 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
796 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
797 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
798 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
799 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
800 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
801 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
802 {}
803 };
804
805 const char *fn, *conf_dirs_nulstr;
806
807 fn = arg_system ?
808 PKGSYSCONFDIR "/system.conf" :
809 PKGSYSCONFDIR "/user.conf";
810
811 conf_dirs_nulstr = arg_system ?
812 CONF_PATHS_NULSTR("systemd/system.conf.d") :
813 CONF_PATHS_NULSTR("systemd/user.conf.d");
814
815 (void) config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, CONFIG_PARSE_WARN, NULL);
816
817 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
818 * like everywhere else. */
819 if (arg_default_timeout_start_usec <= 0)
820 arg_default_timeout_start_usec = USEC_INFINITY;
821 if (arg_default_timeout_stop_usec <= 0)
822 arg_default_timeout_stop_usec = USEC_INFINITY;
823
824 return 0;
825 }
826
827 static void set_manager_defaults(Manager *m) {
828
829 assert(m);
830
831 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
832 m->default_std_output = arg_default_std_output;
833 m->default_std_error = arg_default_std_error;
834 m->default_timeout_start_usec = arg_default_timeout_start_usec;
835 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
836 m->default_restart_usec = arg_default_restart_usec;
837 m->default_start_limit_interval = arg_default_start_limit_interval;
838 m->default_start_limit_burst = arg_default_start_limit_burst;
839 m->default_cpu_accounting = arg_default_cpu_accounting;
840 m->default_io_accounting = arg_default_io_accounting;
841 m->default_ip_accounting = arg_default_ip_accounting;
842 m->default_blockio_accounting = arg_default_blockio_accounting;
843 m->default_memory_accounting = arg_default_memory_accounting;
844 m->default_tasks_accounting = arg_default_tasks_accounting;
845 m->default_tasks_max = arg_default_tasks_max;
846
847 manager_set_default_rlimits(m, arg_default_rlimit);
848 manager_environment_add(m, NULL, arg_default_environment);
849 }
850
851 static void set_manager_settings(Manager *m) {
852
853 assert(m);
854
855 m->confirm_spawn = arg_confirm_spawn;
856 m->runtime_watchdog = arg_runtime_watchdog;
857 m->shutdown_watchdog = arg_shutdown_watchdog;
858 m->cad_burst_action = arg_cad_burst_action;
859
860 manager_set_show_status(m, arg_show_status);
861 }
862
863 static int parse_argv(int argc, char *argv[]) {
864
865 enum {
866 ARG_LOG_LEVEL = 0x100,
867 ARG_LOG_TARGET,
868 ARG_LOG_COLOR,
869 ARG_LOG_LOCATION,
870 ARG_UNIT,
871 ARG_SYSTEM,
872 ARG_USER,
873 ARG_TEST,
874 ARG_NO_PAGER,
875 ARG_VERSION,
876 ARG_DUMP_CONFIGURATION_ITEMS,
877 ARG_DUMP_CORE,
878 ARG_CRASH_CHVT,
879 ARG_CRASH_SHELL,
880 ARG_CRASH_REBOOT,
881 ARG_CONFIRM_SPAWN,
882 ARG_SHOW_STATUS,
883 ARG_DESERIALIZE,
884 ARG_SWITCHED_ROOT,
885 ARG_DEFAULT_STD_OUTPUT,
886 ARG_DEFAULT_STD_ERROR,
887 ARG_MACHINE_ID
888 };
889
890 static const struct option options[] = {
891 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
892 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
893 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
894 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
895 { "unit", required_argument, NULL, ARG_UNIT },
896 { "system", no_argument, NULL, ARG_SYSTEM },
897 { "user", no_argument, NULL, ARG_USER },
898 { "test", no_argument, NULL, ARG_TEST },
899 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
900 { "help", no_argument, NULL, 'h' },
901 { "version", no_argument, NULL, ARG_VERSION },
902 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
903 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
904 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
905 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
906 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
907 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
908 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
909 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
910 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
911 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
912 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
913 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
914 {}
915 };
916
917 int c, r;
918
919 assert(argc >= 1);
920 assert(argv);
921
922 if (getpid_cached() == 1)
923 opterr = 0;
924
925 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
926
927 switch (c) {
928
929 case ARG_LOG_LEVEL:
930 r = log_set_max_level_from_string(optarg);
931 if (r < 0) {
932 log_error("Failed to parse log level %s.", optarg);
933 return r;
934 }
935
936 break;
937
938 case ARG_LOG_TARGET:
939 r = log_set_target_from_string(optarg);
940 if (r < 0) {
941 log_error("Failed to parse log target %s.", optarg);
942 return r;
943 }
944
945 break;
946
947 case ARG_LOG_COLOR:
948
949 if (optarg) {
950 r = log_show_color_from_string(optarg);
951 if (r < 0) {
952 log_error("Failed to parse log color setting %s.", optarg);
953 return r;
954 }
955 } else
956 log_show_color(true);
957
958 break;
959
960 case ARG_LOG_LOCATION:
961 if (optarg) {
962 r = log_show_location_from_string(optarg);
963 if (r < 0) {
964 log_error("Failed to parse log location setting %s.", optarg);
965 return r;
966 }
967 } else
968 log_show_location(true);
969
970 break;
971
972 case ARG_DEFAULT_STD_OUTPUT:
973 r = exec_output_from_string(optarg);
974 if (r < 0) {
975 log_error("Failed to parse default standard output setting %s.", optarg);
976 return r;
977 } else
978 arg_default_std_output = r;
979 break;
980
981 case ARG_DEFAULT_STD_ERROR:
982 r = exec_output_from_string(optarg);
983 if (r < 0) {
984 log_error("Failed to parse default standard error output setting %s.", optarg);
985 return r;
986 } else
987 arg_default_std_error = r;
988 break;
989
990 case ARG_UNIT:
991 r = free_and_strdup(&arg_default_unit, optarg);
992 if (r < 0)
993 return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
994
995 break;
996
997 case ARG_SYSTEM:
998 arg_system = true;
999 break;
1000
1001 case ARG_USER:
1002 arg_system = false;
1003 break;
1004
1005 case ARG_TEST:
1006 arg_action = ACTION_TEST;
1007 break;
1008
1009 case ARG_NO_PAGER:
1010 arg_no_pager = true;
1011 break;
1012
1013 case ARG_VERSION:
1014 arg_action = ACTION_VERSION;
1015 break;
1016
1017 case ARG_DUMP_CONFIGURATION_ITEMS:
1018 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
1019 break;
1020
1021 case ARG_DUMP_CORE:
1022 if (!optarg)
1023 arg_dump_core = true;
1024 else {
1025 r = parse_boolean(optarg);
1026 if (r < 0)
1027 return log_error_errno(r, "Failed to parse dump core boolean: %s", optarg);
1028 arg_dump_core = r;
1029 }
1030 break;
1031
1032 case ARG_CRASH_CHVT:
1033 r = parse_crash_chvt(optarg);
1034 if (r < 0)
1035 return log_error_errno(r, "Failed to parse crash virtual terminal index: %s", optarg);
1036 break;
1037
1038 case ARG_CRASH_SHELL:
1039 if (!optarg)
1040 arg_crash_shell = true;
1041 else {
1042 r = parse_boolean(optarg);
1043 if (r < 0)
1044 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
1045 arg_crash_shell = r;
1046 }
1047 break;
1048
1049 case ARG_CRASH_REBOOT:
1050 if (!optarg)
1051 arg_crash_reboot = true;
1052 else {
1053 r = parse_boolean(optarg);
1054 if (r < 0)
1055 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
1056 arg_crash_reboot = r;
1057 }
1058 break;
1059
1060 case ARG_CONFIRM_SPAWN:
1061 arg_confirm_spawn = mfree(arg_confirm_spawn);
1062
1063 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
1064 if (r < 0)
1065 return log_error_errno(r, "Failed to parse confirm spawn option: %m");
1066 break;
1067
1068 case ARG_SHOW_STATUS:
1069 if (optarg) {
1070 r = parse_show_status(optarg, &arg_show_status);
1071 if (r < 0) {
1072 log_error("Failed to parse show status boolean %s.", optarg);
1073 return r;
1074 }
1075 } else
1076 arg_show_status = SHOW_STATUS_YES;
1077 break;
1078
1079 case ARG_DESERIALIZE: {
1080 int fd;
1081 FILE *f;
1082
1083 r = safe_atoi(optarg, &fd);
1084 if (r < 0 || fd < 0) {
1085 log_error("Failed to parse deserialize option %s.", optarg);
1086 return -EINVAL;
1087 }
1088
1089 (void) fd_cloexec(fd, true);
1090
1091 f = fdopen(fd, "r");
1092 if (!f)
1093 return log_error_errno(errno, "Failed to open serialization fd: %m");
1094
1095 safe_fclose(arg_serialization);
1096 arg_serialization = f;
1097
1098 break;
1099 }
1100
1101 case ARG_SWITCHED_ROOT:
1102 arg_switched_root = true;
1103 break;
1104
1105 case ARG_MACHINE_ID:
1106 r = set_machine_id(optarg);
1107 if (r < 0)
1108 return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
1109 break;
1110
1111 case 'h':
1112 arg_action = ACTION_HELP;
1113 break;
1114
1115 case 'D':
1116 log_set_max_level(LOG_DEBUG);
1117 break;
1118
1119 case 'b':
1120 case 's':
1121 case 'z':
1122 /* Just to eat away the sysvinit kernel
1123 * cmdline args without getopt() error
1124 * messages that we'll parse in
1125 * parse_proc_cmdline_word() or ignore. */
1126
1127 case '?':
1128 if (getpid_cached() != 1)
1129 return -EINVAL;
1130 else
1131 return 0;
1132
1133 default:
1134 assert_not_reached("Unhandled option code.");
1135 }
1136
1137 if (optind < argc && getpid_cached() != 1) {
1138 /* Hmm, when we aren't run as init system
1139 * let's complain about excess arguments */
1140
1141 log_error("Excess arguments.");
1142 return -EINVAL;
1143 }
1144
1145 return 0;
1146 }
1147
1148 static int help(void) {
1149
1150 printf("%s [OPTIONS...]\n\n"
1151 "Starts up and maintains the system or user services.\n\n"
1152 " -h --help Show this help\n"
1153 " --version Show version\n"
1154 " --test Determine startup sequence, dump it and exit\n"
1155 " --no-pager Do not pipe output into a pager\n"
1156 " --dump-configuration-items Dump understood unit configuration items\n"
1157 " --unit=UNIT Set default unit\n"
1158 " --system Run a system instance, even if PID != 1\n"
1159 " --user Run a user instance\n"
1160 " --dump-core[=BOOL] Dump core on crash\n"
1161 " --crash-vt=NR Change to specified VT on crash\n"
1162 " --crash-reboot[=BOOL] Reboot on crash\n"
1163 " --crash-shell[=BOOL] Run shell on crash\n"
1164 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1165 " --show-status[=BOOL] Show status updates on the console during bootup\n"
1166 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
1167 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1168 " --log-color[=BOOL] Highlight important log messages\n"
1169 " --log-location[=BOOL] Include code location in log messages\n"
1170 " --default-standard-output= Set default standard output for services\n"
1171 " --default-standard-error= Set default standard error output for services\n",
1172 program_invocation_short_name);
1173
1174 return 0;
1175 }
1176
1177 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1178 _cleanup_fdset_free_ FDSet *fds = NULL;
1179 _cleanup_fclose_ FILE *f = NULL;
1180 int r;
1181
1182 assert(m);
1183 assert(_f);
1184 assert(_fds);
1185
1186 r = manager_open_serialization(m, &f);
1187 if (r < 0)
1188 return log_error_errno(r, "Failed to create serialization file: %m");
1189
1190 /* Make sure nothing is really destructed when we shut down */
1191 m->n_reloading++;
1192 bus_manager_send_reloading(m, true);
1193
1194 fds = fdset_new();
1195 if (!fds)
1196 return log_oom();
1197
1198 r = manager_serialize(m, f, fds, switching_root);
1199 if (r < 0)
1200 return log_error_errno(r, "Failed to serialize state: %m");
1201
1202 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1203 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
1204
1205 r = fd_cloexec(fileno(f), false);
1206 if (r < 0)
1207 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1208
1209 r = fdset_cloexec(fds, false);
1210 if (r < 0)
1211 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1212
1213 *_f = f;
1214 *_fds = fds;
1215
1216 f = NULL;
1217 fds = NULL;
1218
1219 return 0;
1220 }
1221
1222 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1223 struct rlimit nl;
1224 int r;
1225 int min_max;
1226 _cleanup_free_ char *nr_open = NULL;
1227
1228 assert(saved_rlimit);
1229
1230 /* Save the original RLIMIT_NOFILE so that we can reset it
1231 * later when transitioning from the initrd to the main
1232 * systemd or suchlike. */
1233 if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
1234 return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
1235
1236 /* Make sure forked processes get the default kernel setting */
1237 if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1238 struct rlimit *rl;
1239
1240 rl = newdup(struct rlimit, saved_rlimit, 1);
1241 if (!rl)
1242 return log_oom();
1243
1244 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1245 }
1246
1247 /* Get current RLIMIT_NOFILE maximum compiled into the kernel. */
1248 r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
1249 if (r >= 0)
1250 r = safe_atoi(nr_open, &min_max);
1251 /* If we fail, fallback to the hard-coded kernel limit of 1024 * 1024. */
1252 if (r < 0)
1253 min_max = 1024 * 1024;
1254
1255 /* Bump up the resource limit for ourselves substantially */
1256 nl.rlim_cur = nl.rlim_max = min_max;
1257 r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1258 if (r < 0)
1259 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1260
1261 return 0;
1262 }
1263
1264 static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
1265 int r;
1266
1267 assert(saved_rlimit);
1268 assert(getuid() == 0);
1269
1270 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
1271 * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
1272 * bump the value high enough for the root user. */
1273
1274 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
1275 return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
1276
1277 r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
1278 if (r < 0)
1279 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1280
1281 return 0;
1282 }
1283
1284 static void test_usr(void) {
1285
1286 /* Check that /usr is not a separate fs */
1287
1288 if (dir_is_empty("/usr") <= 0)
1289 return;
1290
1291 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1292 "Some things will probably break (sometimes even silently) in mysterious ways. "
1293 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1294 }
1295
1296 static int initialize_join_controllers(void) {
1297 /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1298 * + "net_prio". We'd like to add "cpuset" to the mix, but
1299 * "cpuset" doesn't really work for groups with no initialized
1300 * attributes. */
1301
1302 arg_join_controllers = new(char**, 3);
1303 if (!arg_join_controllers)
1304 return -ENOMEM;
1305
1306 arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1307 if (!arg_join_controllers[0])
1308 goto oom;
1309
1310 arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1311 if (!arg_join_controllers[1])
1312 goto oom;
1313
1314 arg_join_controllers[2] = NULL;
1315 return 0;
1316
1317 oom:
1318 arg_join_controllers = strv_free_free(arg_join_controllers);
1319 return -ENOMEM;
1320 }
1321
1322 static int enforce_syscall_archs(Set *archs) {
1323 #if HAVE_SECCOMP
1324 int r;
1325
1326 if (!is_seccomp_available())
1327 return 0;
1328
1329 r = seccomp_restrict_archs(arg_syscall_archs);
1330 if (r < 0)
1331 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
1332 #endif
1333 return 0;
1334 }
1335
1336 static int status_welcome(void) {
1337 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1338 int r;
1339
1340 r = parse_env_file("/etc/os-release", NEWLINE,
1341 "PRETTY_NAME", &pretty_name,
1342 "ANSI_COLOR", &ansi_color,
1343 NULL);
1344 if (r == -ENOENT)
1345 r = parse_env_file("/usr/lib/os-release", NEWLINE,
1346 "PRETTY_NAME", &pretty_name,
1347 "ANSI_COLOR", &ansi_color,
1348 NULL);
1349
1350 if (r < 0 && r != -ENOENT)
1351 log_warning_errno(r, "Failed to read os-release file: %m");
1352
1353 if (log_get_show_color())
1354 return status_printf(NULL, false, false,
1355 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1356 isempty(ansi_color) ? "1" : ansi_color,
1357 isempty(pretty_name) ? "Linux" : pretty_name);
1358 else
1359 return status_printf(NULL, false, false,
1360 "\nWelcome to %s!\n",
1361 isempty(pretty_name) ? "Linux" : pretty_name);
1362 }
1363
1364 static int write_container_id(void) {
1365 const char *c;
1366 int r;
1367
1368 c = getenv("container");
1369 if (isempty(c))
1370 return 0;
1371
1372 RUN_WITH_UMASK(0022)
1373 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
1374 if (r < 0)
1375 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
1376
1377 return 1;
1378 }
1379
1380 static int bump_unix_max_dgram_qlen(void) {
1381 _cleanup_free_ char *qlen = NULL;
1382 unsigned long v;
1383 int r;
1384
1385 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel
1386 * default of 16 is simply too low. We set the value really
1387 * really early during boot, so that it is actually applied to
1388 * all our sockets, including the $NOTIFY_SOCKET one. */
1389
1390 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1391 if (r < 0)
1392 return log_warning_errno(r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1393
1394 r = safe_atolu(qlen, &v);
1395 if (r < 0)
1396 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length, ignoring: %m");
1397
1398 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1399 return 0;
1400
1401 qlen = mfree(qlen);
1402 if (asprintf(&qlen, "%lu\n", DEFAULT_UNIX_MAX_DGRAM_QLEN) < 0)
1403 return log_oom();
1404
1405 r = write_string_file("/proc/sys/net/unix/max_dgram_qlen", qlen, 0);
1406 if (r < 0)
1407 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1408 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1409
1410 return 1;
1411 }
1412
1413 static int fixup_environment(void) {
1414 _cleanup_free_ char *term = NULL;
1415 int r;
1416
1417 /* We expect the environment to be set correctly
1418 * if run inside a container. */
1419 if (detect_container() > 0)
1420 return 0;
1421
1422 /* When started as PID1, the kernel uses /dev/console
1423 * for our stdios and uses TERM=linux whatever the
1424 * backend device used by the console. We try to make
1425 * a better guess here since some consoles might not
1426 * have support for color mode for example.
1427 *
1428 * However if TERM was configured through the kernel
1429 * command line then leave it alone. */
1430
1431 r = proc_cmdline_get_key("TERM", 0, &term);
1432 if (r < 0)
1433 return r;
1434 if (r == 0) {
1435 term = strdup(default_term_for_tty("/dev/console"));
1436 if (!term)
1437 return -ENOMEM;
1438 }
1439
1440 if (setenv("TERM", term, 1) < 0)
1441 return -errno;
1442
1443 return 0;
1444 }
1445
1446 static void redirect_telinit(int argc, char *argv[]) {
1447
1448 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1449
1450 #if HAVE_SYSV_COMPAT
1451 if (getpid_cached() == 1)
1452 return;
1453
1454 if (!strstr(program_invocation_short_name, "init"))
1455 return;
1456
1457 execv(SYSTEMCTL_BINARY_PATH, argv);
1458 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1459 exit(1);
1460 #endif
1461 }
1462
1463 static int become_shutdown(
1464 const char *shutdown_verb,
1465 int retval) {
1466
1467 char log_level[DECIMAL_STR_MAX(int) + 1],
1468 exit_code[DECIMAL_STR_MAX(uint8_t) + 1];
1469
1470 const char* command_line[11] = {
1471 SYSTEMD_SHUTDOWN_BINARY_PATH,
1472 shutdown_verb,
1473 "--log-level", log_level,
1474 "--log-target",
1475 };
1476
1477 _cleanup_strv_free_ char **env_block = NULL;
1478 size_t pos = 5;
1479 int r;
1480
1481 assert(shutdown_verb);
1482 assert(!command_line[pos]);
1483 env_block = strv_copy(environ);
1484
1485 xsprintf(log_level, "%d", log_get_max_level());
1486
1487 switch (log_get_target()) {
1488
1489 case LOG_TARGET_KMSG:
1490 case LOG_TARGET_JOURNAL_OR_KMSG:
1491 case LOG_TARGET_SYSLOG_OR_KMSG:
1492 command_line[pos++] = "kmsg";
1493 break;
1494
1495 case LOG_TARGET_NULL:
1496 command_line[pos++] = "null";
1497 break;
1498
1499 case LOG_TARGET_CONSOLE:
1500 default:
1501 command_line[pos++] = "console";
1502 break;
1503 };
1504
1505 if (log_get_show_color())
1506 command_line[pos++] = "--log-color";
1507
1508 if (log_get_show_location())
1509 command_line[pos++] = "--log-location";
1510
1511 if (streq(shutdown_verb, "exit")) {
1512 command_line[pos++] = "--exit-code";
1513 command_line[pos++] = exit_code;
1514 xsprintf(exit_code, "%d", retval);
1515 }
1516
1517 assert(pos < ELEMENTSOF(command_line));
1518
1519 if (streq(shutdown_verb, "reboot") &&
1520 arg_shutdown_watchdog > 0 &&
1521 arg_shutdown_watchdog != USEC_INFINITY) {
1522
1523 char *e;
1524
1525 /* If we reboot let's set the shutdown
1526 * watchdog and tell the shutdown binary to
1527 * repeatedly ping it */
1528 r = watchdog_set_timeout(&arg_shutdown_watchdog);
1529 watchdog_close(r < 0);
1530
1531 /* Tell the binary how often to ping, ignore failure */
1532 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
1533 (void) strv_consume(&env_block, e);
1534
1535 if (arg_watchdog_device &&
1536 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1537 (void) strv_consume(&env_block, e);
1538 } else
1539 watchdog_close(true);
1540
1541 /* Avoid the creation of new processes forked by the
1542 * kernel; at this point, we will not listen to the
1543 * signals anyway */
1544 if (detect_container() <= 0)
1545 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1546
1547 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1548 return -errno;
1549 }
1550
1551 static void initialize_clock(void) {
1552 int r;
1553
1554 if (clock_is_localtime(NULL) > 0) {
1555 int min;
1556
1557 /*
1558 * The very first call of settimeofday() also does a time warp in the kernel.
1559 *
1560 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1561 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1562 * the RTC alone if the registry tells that the RTC runs in UTC.
1563 */
1564 r = clock_set_timezone(&min);
1565 if (r < 0)
1566 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1567 else
1568 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1569
1570 } else if (!in_initrd()) {
1571 /*
1572 * Do a dummy very first call to seal the kernel's time warp magic.
1573 *
1574 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1575 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1576 * until we reach the real system.
1577 *
1578 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1579 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1580 * be treated as UTC that way.
1581 */
1582 (void) clock_reset_timewarp();
1583 }
1584
1585 r = clock_apply_epoch();
1586 if (r < 0)
1587 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1588 else if (r > 0)
1589 log_info("System time before build time, advancing clock.");
1590 }
1591
1592 static void initialize_coredump(bool skip_setup) {
1593
1594 if (getpid_cached() != 1)
1595 return;
1596
1597 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1598 * will process core dumps for system services by default. */
1599 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1600 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1601
1602 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1603 * until the systemd-coredump tool is enabled via sysctl. */
1604 if (!skip_setup)
1605 (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
1606 }
1607
1608 static void do_reexecute(
1609 int argc,
1610 char *argv[],
1611 const struct rlimit *saved_rlimit_nofile,
1612 const struct rlimit *saved_rlimit_memlock,
1613 FDSet *fds,
1614 const char *switch_root_dir,
1615 const char *switch_root_init,
1616 const char **ret_error_message) {
1617
1618 unsigned i, j, args_size;
1619 const char **args;
1620 int r;
1621
1622 assert(saved_rlimit_nofile);
1623 assert(saved_rlimit_memlock);
1624 assert(ret_error_message);
1625
1626 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1627 * we do that */
1628 watchdog_close(true);
1629
1630 /* Reset the RLIMIT_NOFILE to the kernel default, so that the new systemd can pass the kernel default to its
1631 * child processes */
1632
1633 if (saved_rlimit_nofile->rlim_cur > 0)
1634 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
1635 if (saved_rlimit_memlock->rlim_cur != (rlim_t) -1)
1636 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1637
1638 if (switch_root_dir) {
1639 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1640 * SIGCHLD for them after deserializing. */
1641 broadcast_signal(SIGTERM, false, true);
1642
1643 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1644 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1645 if (r < 0)
1646 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1647 }
1648
1649 args_size = MAX(6, argc+1);
1650 args = newa(const char*, args_size);
1651
1652 if (!switch_root_init) {
1653 char sfd[DECIMAL_STR_MAX(int) + 1];
1654
1655 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1656 * the user didn't specify an explicit init to spawn. */
1657
1658 assert(arg_serialization);
1659 assert(fds);
1660
1661 xsprintf(sfd, "%i", fileno(arg_serialization));
1662
1663 i = 0;
1664 args[i++] = SYSTEMD_BINARY_PATH;
1665 if (switch_root_dir)
1666 args[i++] = "--switched-root";
1667 args[i++] = arg_system ? "--system" : "--user";
1668 args[i++] = "--deserialize";
1669 args[i++] = sfd;
1670 args[i++] = NULL;
1671
1672 assert(i <= args_size);
1673
1674 /*
1675 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1676 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1677 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1678 * before proceeding into the exec().
1679 */
1680 valgrind_summary_hack();
1681
1682 (void) execv(args[0], (char* const*) args);
1683 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1684 }
1685
1686 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1687 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1688 * doesn't matter.) */
1689
1690 arg_serialization = safe_fclose(arg_serialization);
1691 fds = fdset_free(fds);
1692
1693 /* Reopen the console */
1694 (void) make_console_stdio();
1695
1696 for (j = 1, i = 1; j < (unsigned) argc; j++)
1697 args[i++] = argv[j];
1698 args[i++] = NULL;
1699 assert(i <= args_size);
1700
1701 /* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */
1702 (void) reset_all_signal_handlers();
1703 (void) reset_signal_mask();
1704
1705 if (switch_root_init) {
1706 args[0] = switch_root_init;
1707 (void) execv(args[0], (char* const*) args);
1708 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1709 }
1710
1711 args[0] = "/sbin/init";
1712 (void) execv(args[0], (char* const*) args);
1713 r = -errno;
1714
1715 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1716 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1717 "Failed to execute /sbin/init");
1718
1719 if (r == -ENOENT) {
1720 log_warning("No /sbin/init, trying fallback");
1721
1722 args[0] = "/bin/sh";
1723 args[1] = NULL;
1724 (void) execv(args[0], (char* const*) args);
1725 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1726 } else
1727 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1728
1729 *ret_error_message = "Failed to execute fallback shell";
1730 }
1731
1732 static int invoke_main_loop(
1733 Manager *m,
1734 bool *ret_reexecute,
1735 int *ret_retval, /* Return parameters relevant for shutting down */
1736 const char **ret_shutdown_verb, /* … */
1737 FDSet **ret_fds, /* Return parameters for reexecuting */
1738 char **ret_switch_root_dir, /* … */
1739 char **ret_switch_root_init, /* … */
1740 const char **ret_error_message) {
1741
1742 int r;
1743
1744 assert(m);
1745 assert(ret_reexecute);
1746 assert(ret_retval);
1747 assert(ret_shutdown_verb);
1748 assert(ret_fds);
1749 assert(ret_switch_root_dir);
1750 assert(ret_switch_root_init);
1751 assert(ret_error_message);
1752
1753 for (;;) {
1754 r = manager_loop(m);
1755 if (r < 0) {
1756 *ret_error_message = "Failed to run main loop";
1757 return log_emergency_errno(r, "Failed to run main loop: %m");
1758 }
1759
1760 switch (m->exit_code) {
1761
1762 case MANAGER_RELOAD:
1763 log_info("Reloading.");
1764
1765 r = parse_config_file();
1766 if (r < 0)
1767 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
1768
1769 set_manager_defaults(m);
1770
1771 r = manager_reload(m);
1772 if (r < 0)
1773 log_warning_errno(r, "Failed to reload, ignoring: %m");
1774
1775 break;
1776
1777 case MANAGER_REEXECUTE:
1778
1779 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1780 if (r < 0) {
1781 *ret_error_message = "Failed to prepare for reexecution";
1782 return r;
1783 }
1784
1785 log_notice("Reexecuting.");
1786
1787 *ret_reexecute = true;
1788 *ret_retval = EXIT_SUCCESS;
1789 *ret_shutdown_verb = NULL;
1790 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1791
1792 return 0;
1793
1794 case MANAGER_SWITCH_ROOT:
1795 if (!m->switch_root_init) {
1796 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1797 if (r < 0) {
1798 *ret_error_message = "Failed to prepare for reexecution";
1799 return r;
1800 }
1801 } else
1802 *ret_fds = NULL;
1803
1804 log_notice("Switching root.");
1805
1806 *ret_reexecute = true;
1807 *ret_retval = EXIT_SUCCESS;
1808 *ret_shutdown_verb = NULL;
1809
1810 /* Steal the switch root parameters */
1811 *ret_switch_root_dir = m->switch_root;
1812 *ret_switch_root_init = m->switch_root_init;
1813 m->switch_root = m->switch_root_init = NULL;
1814
1815 return 0;
1816
1817 case MANAGER_EXIT:
1818
1819 if (MANAGER_IS_USER(m)) {
1820 log_debug("Exit.");
1821
1822 *ret_reexecute = false;
1823 *ret_retval = m->return_value;
1824 *ret_shutdown_verb = NULL;
1825 *ret_fds = NULL;
1826 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1827
1828 return 0;
1829 }
1830
1831 _fallthrough_;
1832 case MANAGER_REBOOT:
1833 case MANAGER_POWEROFF:
1834 case MANAGER_HALT:
1835 case MANAGER_KEXEC: {
1836 static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1837 [MANAGER_EXIT] = "exit",
1838 [MANAGER_REBOOT] = "reboot",
1839 [MANAGER_POWEROFF] = "poweroff",
1840 [MANAGER_HALT] = "halt",
1841 [MANAGER_KEXEC] = "kexec"
1842 };
1843
1844 log_notice("Shutting down.");
1845
1846 *ret_reexecute = false;
1847 *ret_retval = m->return_value;
1848 assert_se(*ret_shutdown_verb = table[m->exit_code]);
1849 *ret_fds = NULL;
1850 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1851
1852 return 0;
1853 }
1854
1855 default:
1856 assert_not_reached("Unknown exit code.");
1857 }
1858 }
1859 }
1860
1861 static void log_execution_mode(bool *ret_first_boot) {
1862 assert(ret_first_boot);
1863
1864 if (arg_system) {
1865 int v;
1866
1867 log_info(PACKAGE_STRING " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
1868 arg_action == ACTION_TEST ? "test " : "" );
1869
1870 v = detect_virtualization();
1871 if (v > 0)
1872 log_info("Detected virtualization %s.", virtualization_to_string(v));
1873
1874 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
1875
1876 if (in_initrd()) {
1877 *ret_first_boot = false;
1878 log_info("Running in initial RAM disk.");
1879 } else {
1880 /* Let's check whether we are in first boot, i.e. whether /etc is still unpopulated. We use
1881 * /etc/machine-id as flag file, for this: if it exists we assume /etc is populated, if it
1882 * doesn't it's unpopulated. This allows container managers and installers to provision a
1883 * couple of files already. If the container manager wants to provision the machine ID itself
1884 * it should pass $container_uuid to PID 1. */
1885
1886 *ret_first_boot = access("/etc/machine-id", F_OK) < 0;
1887 if (*ret_first_boot)
1888 log_info("Running with unpopulated /etc.");
1889 }
1890 } else {
1891 _cleanup_free_ char *t;
1892
1893 t = uid_to_name(getuid());
1894 log_debug(PACKAGE_STRING " running in %suser mode for user " UID_FMT "/%s. (" SYSTEMD_FEATURES ")",
1895 arg_action == ACTION_TEST ? " test" : "", getuid(), strna(t));
1896
1897 *ret_first_boot = false;
1898 }
1899 }
1900
1901 static int initialize_runtime(
1902 bool skip_setup,
1903 struct rlimit *saved_rlimit_nofile,
1904 struct rlimit *saved_rlimit_memlock,
1905 const char **ret_error_message) {
1906
1907 int r;
1908
1909 assert(ret_error_message);
1910
1911 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
1912 *
1913 * - Some only apply to --system instances
1914 * - Some only apply to --user instances
1915 * - Some only apply when we first start up, but not when we reexecute
1916 */
1917
1918 if (arg_system && !skip_setup) {
1919 if (arg_show_status > 0)
1920 status_welcome();
1921
1922 hostname_setup();
1923 machine_id_setup(NULL, arg_machine_id, NULL);
1924 loopback_setup();
1925 bump_unix_max_dgram_qlen();
1926 test_usr();
1927 write_container_id();
1928 }
1929
1930 if (arg_system && arg_watchdog_device) {
1931 r = watchdog_set_device(arg_watchdog_device);
1932 if (r < 0)
1933 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
1934 arg_watchdog_device);
1935 }
1936
1937 if (arg_system && arg_runtime_watchdog > 0 && arg_runtime_watchdog != USEC_INFINITY)
1938 watchdog_set_timeout(&arg_runtime_watchdog);
1939
1940 if (arg_timer_slack_nsec != NSEC_INFINITY)
1941 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1942 log_error_errno(errno, "Failed to adjust timer slack: %m");
1943
1944 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
1945 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
1946 if (r < 0) {
1947 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
1948 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
1949 }
1950
1951 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
1952 if (r < 0) {
1953 *ret_error_message = "Failed to drop capability bounding set";
1954 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
1955 }
1956 }
1957
1958 if (arg_syscall_archs) {
1959 r = enforce_syscall_archs(arg_syscall_archs);
1960 if (r < 0) {
1961 *ret_error_message = "Failed to set syscall architectures";
1962 return r;
1963 }
1964 }
1965
1966 if (!arg_system)
1967 /* Become reaper of our children */
1968 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
1969 log_warning_errno(errno, "Failed to make us a subreaper: %m");
1970
1971 if (arg_system) {
1972 /* Bump up RLIMIT_NOFILE for systemd itself */
1973 (void) bump_rlimit_nofile(saved_rlimit_nofile);
1974 (void) bump_rlimit_memlock(saved_rlimit_memlock);
1975 }
1976
1977 return 0;
1978 }
1979
1980 static int do_queue_default_job(
1981 Manager *m,
1982 const char **ret_error_message) {
1983
1984 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
1985 Job *default_unit_job;
1986 Unit *target = NULL;
1987 int r;
1988
1989 log_debug("Activating default unit: %s", arg_default_unit);
1990
1991 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1992 if (r < 0)
1993 log_error("Failed to load default target: %s", bus_error_message(&error, r));
1994 else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND))
1995 log_error_errno(target->load_error, "Failed to load default target: %m");
1996 else if (target->load_state == UNIT_MASKED)
1997 log_error("Default target masked.");
1998
1999 if (!target || target->load_state != UNIT_LOADED) {
2000 log_info("Trying to load rescue target...");
2001
2002 r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
2003 if (r < 0) {
2004 *ret_error_message = "Failed to load rescue target";
2005 return log_emergency_errno(r, "Failed to load rescue target: %s", bus_error_message(&error, r));
2006 } else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND)) {
2007 *ret_error_message = "Failed to load rescue target";
2008 return log_emergency_errno(target->load_error, "Failed to load rescue target: %m");
2009 } else if (target->load_state == UNIT_MASKED) {
2010 *ret_error_message = "Rescue target masked";
2011 log_emergency("Rescue target masked.");
2012 return -ERFKILL;
2013 }
2014 }
2015
2016 assert(target->load_state == UNIT_LOADED);
2017
2018 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, &error, &default_unit_job);
2019 if (r == -EPERM) {
2020 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2021
2022 sd_bus_error_free(&error);
2023
2024 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, &error, &default_unit_job);
2025 if (r < 0) {
2026 *ret_error_message = "Failed to start default target";
2027 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
2028 }
2029
2030 } else if (r < 0) {
2031 *ret_error_message = "Failed to isolate default target";
2032 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
2033 }
2034
2035 m->default_unit_job_id = default_unit_job->id;
2036
2037 return 0;
2038 }
2039
2040 static void free_arguments(void) {
2041 size_t j;
2042
2043 /* Frees all arg_* variables, with the exception of arg_serialization */
2044
2045 for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++)
2046 arg_default_rlimit[j] = mfree(arg_default_rlimit[j]);
2047
2048 arg_default_unit = mfree(arg_default_unit);
2049 arg_confirm_spawn = mfree(arg_confirm_spawn);
2050 arg_join_controllers = strv_free_free(arg_join_controllers);
2051 arg_default_environment = strv_free(arg_default_environment);
2052 arg_syscall_archs = set_free(arg_syscall_archs);
2053 }
2054
2055 int main(int argc, char *argv[]) {
2056 Manager *m = NULL;
2057 int r, retval = EXIT_FAILURE;
2058 usec_t before_startup, after_startup;
2059 char timespan[FORMAT_TIMESPAN_MAX];
2060 FDSet *fds = NULL;
2061 bool reexecute = false;
2062 const char *shutdown_verb = NULL;
2063 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL;
2064 dual_timestamp userspace_timestamp = DUAL_TIMESTAMP_NULL;
2065 dual_timestamp kernel_timestamp = DUAL_TIMESTAMP_NULL;
2066 dual_timestamp security_start_timestamp = DUAL_TIMESTAMP_NULL;
2067 dual_timestamp security_finish_timestamp = DUAL_TIMESTAMP_NULL;
2068 static char systemd[] = "systemd";
2069 bool skip_setup = false;
2070 bool loaded_policy = false;
2071 bool queue_default_job = false;
2072 bool first_boot = false;
2073 char *switch_root_dir = NULL, *switch_root_init = NULL;
2074 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
2075 const char *error_message = NULL;
2076
2077 redirect_telinit(argc, argv);
2078
2079 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2080 dual_timestamp_get(&userspace_timestamp);
2081
2082 /* Determine if this is a reexecution or normal bootup. We do
2083 * the full command line parsing much later, so let's just
2084 * have a quick peek here. */
2085 if (strv_find(argv+1, "--deserialize"))
2086 skip_setup = true;
2087
2088 /* If we have switched root, do all the special setup
2089 * things */
2090 if (strv_find(argv+1, "--switched-root"))
2091 skip_setup = false;
2092
2093 /* If we get started via the /sbin/init symlink then we are
2094 called 'init'. After a subsequent reexecution we are then
2095 called 'systemd'. That is confusing, hence let's call us
2096 systemd right-away. */
2097 program_invocation_short_name = systemd;
2098 (void) prctl(PR_SET_NAME, systemd);
2099
2100 saved_argv = argv;
2101 saved_argc = argc;
2102
2103 log_set_upgrade_syslog_to_journal(true);
2104
2105 if (getpid_cached() == 1) {
2106 /* Disable the umask logic */
2107 umask(0);
2108
2109 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2110 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2111 * child process right before execve()'ing the actual binary, at a point in time where socket
2112 * activation stderr/stdout area already set up. */
2113 log_set_always_reopen_console(true);
2114 }
2115
2116 if (getpid_cached() == 1 && detect_container() <= 0) {
2117
2118 /* Running outside of a container as PID 1 */
2119 arg_system = true;
2120 log_set_target(LOG_TARGET_KMSG);
2121 log_open();
2122
2123 if (in_initrd())
2124 initrd_timestamp = userspace_timestamp;
2125
2126 if (!skip_setup) {
2127 r = mount_setup_early();
2128 if (r < 0) {
2129 error_message = "Failed to mount early API filesystems";
2130 goto finish;
2131 }
2132
2133 dual_timestamp_get(&security_start_timestamp);
2134 if (mac_selinux_setup(&loaded_policy) < 0) {
2135 error_message = "Failed to load SELinux policy";
2136 goto finish;
2137 } else if (mac_smack_setup(&loaded_policy) < 0) {
2138 error_message = "Failed to load SMACK policy";
2139 goto finish;
2140 } else if (ima_setup() < 0) {
2141 error_message = "Failed to load IMA policy";
2142 goto finish;
2143 }
2144 dual_timestamp_get(&security_finish_timestamp);
2145 }
2146
2147 if (mac_selinux_init() < 0) {
2148 error_message = "Failed to initialize SELinux policy";
2149 goto finish;
2150 }
2151
2152 if (!skip_setup)
2153 initialize_clock();
2154
2155 /* Set the default for later on, but don't actually
2156 * open the logs like this for now. Note that if we
2157 * are transitioning from the initrd there might still
2158 * be journal fd open, and we shouldn't attempt
2159 * opening that before we parsed /proc/cmdline which
2160 * might redirect output elsewhere. */
2161 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2162
2163 } else if (getpid_cached() == 1) {
2164 /* Running inside a container, as PID 1 */
2165 arg_system = true;
2166 log_set_target(LOG_TARGET_CONSOLE);
2167 log_close_console(); /* force reopen of /dev/console */
2168 log_open();
2169
2170 /* For later on, see above... */
2171 log_set_target(LOG_TARGET_JOURNAL);
2172
2173 /* clear the kernel timestamp,
2174 * because we are in a container */
2175 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2176 } else {
2177 /* Running as user instance */
2178 arg_system = false;
2179 log_set_target(LOG_TARGET_AUTO);
2180 log_open();
2181
2182 /* clear the kernel timestamp,
2183 * because we are not PID 1 */
2184 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2185 }
2186
2187 initialize_coredump(skip_setup);
2188
2189 if (arg_system) {
2190 if (fixup_environment() < 0) {
2191 error_message = "Failed to fix up PID1 environment";
2192 goto finish;
2193 }
2194
2195 /* Try to figure out if we can use colors with the console. No
2196 * need to do that for user instances since they never log
2197 * into the console. */
2198 log_show_color(colors_enabled());
2199 r = make_null_stdio();
2200 if (r < 0)
2201 log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
2202 }
2203
2204 r = initialize_join_controllers();
2205 if (r < 0) {
2206 error_message = "Failed to initialize cgroup controllers";
2207 goto finish;
2208 }
2209
2210 /* Mount /proc, /sys and friends, so that /proc/cmdline and
2211 * /proc/$PID/fd is available. */
2212 if (getpid_cached() == 1) {
2213
2214 /* Load the kernel modules early. */
2215 if (!skip_setup)
2216 kmod_setup();
2217
2218 r = mount_setup(loaded_policy);
2219 if (r < 0) {
2220 error_message = "Failed to mount API filesystems";
2221 goto finish;
2222 }
2223 }
2224
2225 /* Reset all signal handlers. */
2226 (void) reset_all_signal_handlers();
2227 (void) ignore_signals(SIGNALS_IGNORE, -1);
2228
2229 arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
2230
2231 if (parse_config_file() < 0) {
2232 error_message = "Failed to parse config file";
2233 goto finish;
2234 }
2235
2236 if (arg_system) {
2237 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2238 if (r < 0)
2239 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2240 }
2241
2242 /* Note that this also parses bits from the kernel command
2243 * line, including "debug". */
2244 log_parse_environment();
2245
2246 if (parse_argv(argc, argv) < 0) {
2247 error_message = "Failed to parse commandline arguments";
2248 goto finish;
2249 }
2250
2251 /* Initialize default unit */
2252 if (!arg_default_unit) {
2253 arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
2254 if (!arg_default_unit) {
2255 r = log_oom();
2256 error_message = "Failed to set default unit";
2257 goto finish;
2258 }
2259 }
2260
2261 if (arg_action == ACTION_TEST &&
2262 geteuid() == 0) {
2263 log_error("Don't run test mode as root.");
2264 goto finish;
2265 }
2266
2267 if (!arg_system &&
2268 arg_action == ACTION_RUN &&
2269 sd_booted() <= 0) {
2270 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
2271 goto finish;
2272 }
2273
2274 if (arg_system &&
2275 arg_action == ACTION_RUN &&
2276 running_in_chroot() > 0) {
2277 log_error("Cannot be run in a chroot() environment.");
2278 goto finish;
2279 }
2280
2281 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP)) {
2282 pager_open(arg_no_pager, false);
2283 skip_setup = true;
2284 }
2285
2286 if (arg_action == ACTION_HELP) {
2287 retval = help();
2288 goto finish;
2289 } else if (arg_action == ACTION_VERSION) {
2290 retval = version();
2291 goto finish;
2292 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
2293 pager_open(arg_no_pager, false);
2294 unit_dump_config_items(stdout);
2295 retval = EXIT_SUCCESS;
2296 goto finish;
2297 }
2298
2299 if (!arg_system &&
2300 !getenv("XDG_RUNTIME_DIR")) {
2301 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2302 goto finish;
2303 }
2304
2305 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
2306
2307 /* Close logging fds, in order not to confuse fdset below */
2308 log_close();
2309
2310 /* Remember open file descriptors for later deserialization */
2311 if (arg_action == ACTION_RUN) {
2312 r = fdset_new_fill(&fds);
2313 if (r < 0) {
2314 log_emergency_errno(r, "Failed to allocate fd set: %m");
2315 error_message = "Failed to allocate fd set";
2316 goto finish;
2317 } else
2318 fdset_cloexec(fds, true);
2319
2320 if (arg_serialization)
2321 assert_se(fdset_remove(fds, fileno(arg_serialization)) >= 0);
2322
2323 if (arg_system)
2324 /* Become a session leader if we aren't one yet. */
2325 setsid();
2326 }
2327
2328 /* Move out of the way, so that we won't block unmounts */
2329 assert_se(chdir("/") == 0);
2330
2331 /* Reset the console, but only if this is really init and we
2332 * are freshly booted */
2333 if (arg_system && arg_action == ACTION_RUN) {
2334
2335 /* If we are init, we connect stdin/stdout/stderr to
2336 * /dev/null and make sure we don't have a controlling
2337 * tty. */
2338 release_terminal();
2339
2340 if (getpid_cached() == 1 && !skip_setup)
2341 console_setup();
2342 }
2343
2344 /* Open the logging devices, if possible and necessary */
2345 log_open();
2346
2347 if (arg_show_status == _SHOW_STATUS_UNSET)
2348 arg_show_status = SHOW_STATUS_YES;
2349
2350 /* Make sure we leave a core dump without panicing the
2351 * kernel. */
2352 if (getpid_cached() == 1) {
2353 install_crash_handler();
2354
2355 r = mount_cgroup_controllers(arg_join_controllers);
2356 if (r < 0)
2357 goto finish;
2358 }
2359
2360 log_execution_mode(&first_boot);
2361
2362 if (arg_action == ACTION_RUN) {
2363 r = initialize_runtime(skip_setup,
2364 &saved_rlimit_nofile,
2365 &saved_rlimit_memlock,
2366 &error_message);
2367 if (r < 0)
2368 goto finish;
2369 }
2370
2371 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2372 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2373 &m);
2374 if (r < 0) {
2375 log_emergency_errno(r, "Failed to allocate manager object: %m");
2376 error_message = "Failed to allocate manager object";
2377 goto finish;
2378 }
2379
2380 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2381 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2382 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
2383 m->timestamps[MANAGER_TIMESTAMP_SECURITY_START] = security_start_timestamp;
2384 m->timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH] = security_finish_timestamp;
2385
2386 set_manager_defaults(m);
2387 set_manager_settings(m);
2388 manager_set_first_boot(m, first_boot);
2389
2390 /* Remember whether we should queue the default job */
2391 queue_default_job = !arg_serialization || arg_switched_root;
2392
2393 before_startup = now(CLOCK_MONOTONIC);
2394
2395 r = manager_startup(m, arg_serialization, fds);
2396 if (r < 0) {
2397 log_error_errno(r, "Failed to fully start up daemon: %m");
2398 error_message = "Failed to start up manager";
2399 goto finish;
2400 }
2401
2402 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2403 fds = fdset_free(fds);
2404 arg_serialization = safe_fclose(arg_serialization);
2405
2406 if (queue_default_job) {
2407 r = do_queue_default_job(m, &error_message);
2408 if (r < 0)
2409 goto finish;
2410 }
2411
2412 after_startup = now(CLOCK_MONOTONIC);
2413
2414 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2415 "Loaded units and determined initial transaction in %s.",
2416 format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
2417
2418 if (arg_system) {
2419 _cleanup_free_ char *taint;
2420
2421 taint = manager_taint_string(m);
2422 if (!isempty(taint))
2423 log_notice("System is tainted: %s", taint);
2424 }
2425
2426 if (arg_action == ACTION_TEST) {
2427 printf("-> By units:\n");
2428 manager_dump_units(m, stdout, "\t");
2429
2430 printf("-> By jobs:\n");
2431 manager_dump_jobs(m, stdout, "\t");
2432 retval = EXIT_SUCCESS;
2433 goto finish;
2434 }
2435
2436 r = invoke_main_loop(m,
2437 &reexecute,
2438 &retval,
2439 &shutdown_verb,
2440 &fds,
2441 &switch_root_dir,
2442 &switch_root_init,
2443 &error_message);
2444
2445 finish:
2446 pager_close();
2447
2448 if (m)
2449 arg_shutdown_watchdog = m->shutdown_watchdog;
2450
2451 m = manager_free(m);
2452
2453 free_arguments();
2454 mac_selinux_finish();
2455
2456 if (reexecute)
2457 do_reexecute(argc, argv,
2458 &saved_rlimit_nofile,
2459 &saved_rlimit_memlock,
2460 fds,
2461 switch_root_dir,
2462 switch_root_init,
2463 &error_message); /* This only returns if reexecution failed */
2464
2465 arg_serialization = safe_fclose(arg_serialization);
2466 fds = fdset_free(fds);
2467
2468 #if HAVE_VALGRIND_VALGRIND_H
2469 /* If we are PID 1 and running under valgrind, then let's exit
2470 * here explicitly. valgrind will only generate nice output on
2471 * exit(), not on exec(), hence let's do the former not the
2472 * latter here. */
2473 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2474 /* Cleanup watchdog_device strings for valgrind. We need them
2475 * in become_shutdown() so normally we cannot free them yet. */
2476 watchdog_free_device();
2477 arg_watchdog_device = mfree(arg_watchdog_device);
2478 return 0;
2479 }
2480 #endif
2481
2482 if (shutdown_verb) {
2483 r = become_shutdown(shutdown_verb, retval);
2484
2485 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
2486 error_message = "Failed to execute shutdown binary";
2487 }
2488
2489 watchdog_free_device();
2490 arg_watchdog_device = mfree(arg_watchdog_device);
2491
2492 if (getpid_cached() == 1) {
2493 if (error_message)
2494 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2495 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
2496 "%s, freezing.", error_message);
2497 freeze_or_reboot();
2498 }
2499
2500 return retval;
2501 }