]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/main.c
main: split out security policy loading into its own function
[thirdparty/systemd.git] / src / core / main.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <getopt.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <sys/mount.h>
28 #include <sys/prctl.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32 #if HAVE_SECCOMP
33 #include <seccomp.h>
34 #endif
35 #if HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-bus.h"
40 #include "sd-daemon.h"
41 #include "sd-messages.h"
42
43 #include "alloc-util.h"
44 #include "architecture.h"
45 #include "build.h"
46 #include "bus-error.h"
47 #include "bus-util.h"
48 #include "capability-util.h"
49 #include "clock-util.h"
50 #include "conf-parser.h"
51 #include "cpu-set-util.h"
52 #include "dbus-manager.h"
53 #include "def.h"
54 #include "emergency-action.h"
55 #include "env-util.h"
56 #include "fd-util.h"
57 #include "fdset.h"
58 #include "fileio.h"
59 #include "format-util.h"
60 #include "fs-util.h"
61 #include "hostname-setup.h"
62 #include "ima-setup.h"
63 #include "killall.h"
64 #include "kmod-setup.h"
65 #include "load-fragment.h"
66 #include "log.h"
67 #include "loopback-setup.h"
68 #include "machine-id-setup.h"
69 #include "manager.h"
70 #include "missing.h"
71 #include "mount-setup.h"
72 #include "pager.h"
73 #include "parse-util.h"
74 #include "path-util.h"
75 #include "proc-cmdline.h"
76 #include "process-util.h"
77 #include "raw-clone.h"
78 #include "rlimit-util.h"
79 #if HAVE_SECCOMP
80 #include "seccomp-util.h"
81 #endif
82 #include "selinux-setup.h"
83 #include "selinux-util.h"
84 #include "signal-util.h"
85 #include "smack-setup.h"
86 #include "special.h"
87 #include "stat-util.h"
88 #include "stdio-util.h"
89 #include "strv.h"
90 #include "switch-root.h"
91 #include "terminal-util.h"
92 #include "umask-util.h"
93 #include "user-util.h"
94 #include "virt.h"
95 #include "watchdog.h"
96
97 static enum {
98 ACTION_RUN,
99 ACTION_HELP,
100 ACTION_VERSION,
101 ACTION_TEST,
102 ACTION_DUMP_CONFIGURATION_ITEMS
103 } arg_action = ACTION_RUN;
104 static char *arg_default_unit = NULL;
105 static bool arg_system = false;
106 static bool arg_dump_core = true;
107 static int arg_crash_chvt = -1;
108 static bool arg_crash_shell = false;
109 static bool arg_crash_reboot = false;
110 static char *arg_confirm_spawn = NULL;
111 static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
112 static bool arg_switched_root = false;
113 static bool arg_no_pager = false;
114 static char ***arg_join_controllers = NULL;
115 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
116 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
117 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
118 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
119 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
120 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
121 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
122 static usec_t arg_runtime_watchdog = 0;
123 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
124 static char *arg_watchdog_device = NULL;
125 static char **arg_default_environment = NULL;
126 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
127 static uint64_t arg_capability_bounding_set = CAP_ALL;
128 static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
129 static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
130 static Set* arg_syscall_archs = NULL;
131 static FILE* arg_serialization = NULL;
132 static bool arg_default_cpu_accounting = false;
133 static bool arg_default_io_accounting = false;
134 static bool arg_default_ip_accounting = false;
135 static bool arg_default_blockio_accounting = false;
136 static bool arg_default_memory_accounting = false;
137 static bool arg_default_tasks_accounting = true;
138 static uint64_t arg_default_tasks_max = UINT64_MAX;
139 static sd_id128_t arg_machine_id = {};
140 static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
141
142 noreturn static void freeze_or_reboot(void) {
143
144 if (arg_crash_reboot) {
145 log_notice("Rebooting in 10s...");
146 (void) sleep(10);
147
148 log_notice("Rebooting now...");
149 (void) reboot(RB_AUTOBOOT);
150 log_emergency_errno(errno, "Failed to reboot: %m");
151 }
152
153 log_emergency("Freezing execution.");
154 freeze();
155 }
156
157 noreturn static void crash(int sig) {
158 struct sigaction sa;
159 pid_t pid;
160
161 if (getpid_cached() != 1)
162 /* Pass this on immediately, if this is not PID 1 */
163 (void) raise(sig);
164 else if (!arg_dump_core)
165 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
166 else {
167 sa = (struct sigaction) {
168 .sa_handler = nop_signal_handler,
169 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
170 };
171
172 /* We want to wait for the core process, hence let's enable SIGCHLD */
173 (void) sigaction(SIGCHLD, &sa, NULL);
174
175 pid = raw_clone(SIGCHLD);
176 if (pid < 0)
177 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
178 else if (pid == 0) {
179 /* Enable default signal handler for core dump */
180
181 sa = (struct sigaction) {
182 .sa_handler = SIG_DFL,
183 };
184 (void) sigaction(sig, &sa, NULL);
185
186 /* Don't limit the coredump size */
187 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
188
189 /* Just to be sure... */
190 (void) chdir("/");
191
192 /* Raise the signal again */
193 pid = raw_getpid();
194 (void) kill(pid, sig); /* raise() would kill the parent */
195
196 assert_not_reached("We shouldn't be here...");
197 _exit(EXIT_FAILURE);
198 } else {
199 siginfo_t status;
200 int r;
201
202 /* Order things nicely. */
203 r = wait_for_terminate(pid, &status);
204 if (r < 0)
205 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
206 else if (status.si_code != CLD_DUMPED)
207 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
208 signal_to_string(sig),
209 pid, sigchld_code_to_string(status.si_code),
210 status.si_status,
211 strna(status.si_code == CLD_EXITED
212 ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
213 : signal_to_string(status.si_status)));
214 else
215 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
216 }
217 }
218
219 if (arg_crash_chvt >= 0)
220 (void) chvt(arg_crash_chvt);
221
222 sa = (struct sigaction) {
223 .sa_handler = SIG_IGN,
224 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
225 };
226
227 /* Let the kernel reap children for us */
228 (void) sigaction(SIGCHLD, &sa, NULL);
229
230 if (arg_crash_shell) {
231 log_notice("Executing crash shell in 10s...");
232 (void) sleep(10);
233
234 pid = raw_clone(SIGCHLD);
235 if (pid < 0)
236 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
237 else if (pid == 0) {
238 (void) setsid();
239 (void) make_console_stdio();
240 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
241
242 log_emergency_errno(errno, "execle() failed: %m");
243 _exit(EXIT_FAILURE);
244 } else {
245 log_info("Spawned crash shell as PID "PID_FMT".", pid);
246 (void) wait_for_terminate(pid, NULL);
247 }
248 }
249
250 freeze_or_reboot();
251 }
252
253 static void install_crash_handler(void) {
254 static const struct sigaction sa = {
255 .sa_handler = crash,
256 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
257 };
258 int r;
259
260 /* We ignore the return value here, since, we don't mind if we
261 * cannot set up a crash handler */
262 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
263 if (r < 0)
264 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
265 }
266
267 static int console_setup(void) {
268 _cleanup_close_ int tty_fd = -1;
269 int r;
270
271 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
272 if (tty_fd < 0)
273 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
274
275 /* We don't want to force text mode. plymouth may be showing
276 * pictures already from initrd. */
277 r = reset_terminal_fd(tty_fd, false);
278 if (r < 0)
279 return log_error_errno(r, "Failed to reset /dev/console: %m");
280
281 return 0;
282 }
283
284 static int parse_crash_chvt(const char *value) {
285 int b;
286
287 if (safe_atoi(value, &arg_crash_chvt) >= 0)
288 return 0;
289
290 b = parse_boolean(value);
291 if (b < 0)
292 return b;
293
294 if (b > 0)
295 arg_crash_chvt = 0; /* switch to where kmsg goes */
296 else
297 arg_crash_chvt = -1; /* turn off switching */
298
299 return 0;
300 }
301
302 static int parse_confirm_spawn(const char *value, char **console) {
303 char *s;
304 int r;
305
306 r = value ? parse_boolean(value) : 1;
307 if (r == 0) {
308 *console = NULL;
309 return 0;
310 }
311
312 if (r > 0) /* on with default tty */
313 s = strdup("/dev/console");
314 else if (is_path(value)) /* on with fully qualified path */
315 s = strdup(value);
316 else /* on with only a tty file name, not a fully qualified path */
317 s = strjoin("/dev/", value);
318 if (!s)
319 return -ENOMEM;
320 *console = s;
321 return 0;
322 }
323
324 static int set_machine_id(const char *m) {
325 sd_id128_t t;
326 assert(m);
327
328 if (sd_id128_from_string(m, &t) < 0)
329 return -EINVAL;
330
331 if (sd_id128_is_null(t))
332 return -EINVAL;
333
334 arg_machine_id = t;
335 return 0;
336 }
337
338 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
339
340 int r;
341
342 assert(key);
343
344 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
345
346 if (proc_cmdline_value_missing(key, value))
347 return 0;
348
349 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
350 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
351 else if (in_initrd() == !!startswith(key, "rd.")) {
352 if (free_and_strdup(&arg_default_unit, value) < 0)
353 return log_oom();
354 }
355
356 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
357
358 r = value ? parse_boolean(value) : true;
359 if (r < 0)
360 log_warning("Failed to parse dump core switch %s. Ignoring.", value);
361 else
362 arg_dump_core = r;
363
364 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
365
366 if (!value)
367 arg_crash_chvt = 0; /* turn on */
368 else if (parse_crash_chvt(value) < 0)
369 log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
370
371 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
372
373 r = value ? parse_boolean(value) : true;
374 if (r < 0)
375 log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
376 else
377 arg_crash_shell = r;
378
379 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
380
381 r = value ? parse_boolean(value) : true;
382 if (r < 0)
383 log_warning("Failed to parse crash reboot switch %s. Ignoring.", value);
384 else
385 arg_crash_reboot = r;
386
387 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
388 char *s;
389
390 r = parse_confirm_spawn(value, &s);
391 if (r < 0)
392 log_warning_errno(r, "Failed to parse confirm_spawn switch %s. Ignoring.", value);
393 else {
394 free(arg_confirm_spawn);
395 arg_confirm_spawn = s;
396 }
397
398 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
399
400 if (value) {
401 r = parse_show_status(value, &arg_show_status);
402 if (r < 0)
403 log_warning("Failed to parse show status switch %s. Ignoring.", value);
404 } else
405 arg_show_status = SHOW_STATUS_YES;
406
407 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
408
409 if (proc_cmdline_value_missing(key, value))
410 return 0;
411
412 r = exec_output_from_string(value);
413 if (r < 0)
414 log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
415 else
416 arg_default_std_output = r;
417
418 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
419
420 if (proc_cmdline_value_missing(key, value))
421 return 0;
422
423 r = exec_output_from_string(value);
424 if (r < 0)
425 log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
426 else
427 arg_default_std_error = r;
428
429 } else if (streq(key, "systemd.setenv")) {
430
431 if (proc_cmdline_value_missing(key, value))
432 return 0;
433
434 if (env_assignment_is_valid(value)) {
435 char **env;
436
437 env = strv_env_set(arg_default_environment, value);
438 if (!env)
439 return log_oom();
440
441 arg_default_environment = env;
442 } else
443 log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
444
445 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
446
447 if (proc_cmdline_value_missing(key, value))
448 return 0;
449
450 r = set_machine_id(value);
451 if (r < 0)
452 log_warning("MachineID '%s' is not valid. Ignoring.", value);
453
454 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
455
456 if (proc_cmdline_value_missing(key, value))
457 return 0;
458
459 r = parse_sec(value, &arg_default_timeout_start_usec);
460 if (r < 0)
461 log_warning_errno(r, "Failed to parse default start timeout: %s, ignoring.", value);
462
463 if (arg_default_timeout_start_usec <= 0)
464 arg_default_timeout_start_usec = USEC_INFINITY;
465
466 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
467
468 if (proc_cmdline_value_missing(key, value))
469 return 0;
470
471 parse_path_argument_and_warn(value, false, &arg_watchdog_device);
472
473 } else if (streq(key, "quiet") && !value) {
474
475 if (arg_show_status == _SHOW_STATUS_UNSET)
476 arg_show_status = SHOW_STATUS_AUTO;
477
478 } else if (streq(key, "debug") && !value) {
479
480 /* Note that log_parse_environment() handles 'debug'
481 * too, and sets the log level to LOG_DEBUG. */
482
483 if (detect_container() > 0)
484 log_set_target(LOG_TARGET_CONSOLE);
485
486 } else if (!value) {
487 const char *target;
488
489 /* SysV compatibility */
490 target = runlevel_to_target(key);
491 if (target)
492 return free_and_strdup(&arg_default_unit, target);
493 }
494
495 return 0;
496 }
497
498 #define DEFINE_SETTER(name, func, descr) \
499 static int name(const char *unit, \
500 const char *filename, \
501 unsigned line, \
502 const char *section, \
503 unsigned section_line, \
504 const char *lvalue, \
505 int ltype, \
506 const char *rvalue, \
507 void *data, \
508 void *userdata) { \
509 \
510 int r; \
511 \
512 assert(filename); \
513 assert(lvalue); \
514 assert(rvalue); \
515 \
516 r = func(rvalue); \
517 if (r < 0) \
518 log_syntax(unit, LOG_ERR, filename, line, r, \
519 "Invalid " descr "'%s': %m", \
520 rvalue); \
521 \
522 return 0; \
523 }
524
525 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
526 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
527 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
528 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
529
530 static int config_parse_cpu_affinity2(
531 const char *unit,
532 const char *filename,
533 unsigned line,
534 const char *section,
535 unsigned section_line,
536 const char *lvalue,
537 int ltype,
538 const char *rvalue,
539 void *data,
540 void *userdata) {
541
542 _cleanup_cpu_free_ cpu_set_t *c = NULL;
543 int ncpus;
544
545 ncpus = parse_cpu_set_and_warn(rvalue, &c, unit, filename, line, lvalue);
546 if (ncpus < 0)
547 return ncpus;
548
549 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
550 log_warning_errno(errno, "Failed to set CPU affinity: %m");
551
552 return 0;
553 }
554
555 static int config_parse_show_status(
556 const char* unit,
557 const char *filename,
558 unsigned line,
559 const char *section,
560 unsigned section_line,
561 const char *lvalue,
562 int ltype,
563 const char *rvalue,
564 void *data,
565 void *userdata) {
566
567 int k;
568 ShowStatus *b = data;
569
570 assert(filename);
571 assert(lvalue);
572 assert(rvalue);
573 assert(data);
574
575 k = parse_show_status(rvalue, b);
576 if (k < 0) {
577 log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse show status setting, ignoring: %s", rvalue);
578 return 0;
579 }
580
581 return 0;
582 }
583
584 static int config_parse_output_restricted(
585 const char* unit,
586 const char *filename,
587 unsigned line,
588 const char *section,
589 unsigned section_line,
590 const char *lvalue,
591 int ltype,
592 const char *rvalue,
593 void *data,
594 void *userdata) {
595
596 ExecOutput t, *eo = data;
597
598 assert(filename);
599 assert(lvalue);
600 assert(rvalue);
601 assert(data);
602
603 t = exec_output_from_string(rvalue);
604 if (t < 0) {
605 log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output type, ignoring: %s", rvalue);
606 return 0;
607 }
608
609 if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE)) {
610 log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file: are not supported as defaults, ignoring: %s", rvalue);
611 return 0;
612 }
613
614 *eo = t;
615 return 0;
616 }
617
618 static int config_parse_crash_chvt(
619 const char* unit,
620 const char *filename,
621 unsigned line,
622 const char *section,
623 unsigned section_line,
624 const char *lvalue,
625 int ltype,
626 const char *rvalue,
627 void *data,
628 void *userdata) {
629
630 int r;
631
632 assert(filename);
633 assert(lvalue);
634 assert(rvalue);
635
636 r = parse_crash_chvt(rvalue);
637 if (r < 0) {
638 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CrashChangeVT= setting, ignoring: %s", rvalue);
639 return 0;
640 }
641
642 return 0;
643 }
644
645 static int config_parse_join_controllers(const char *unit,
646 const char *filename,
647 unsigned line,
648 const char *section,
649 unsigned section_line,
650 const char *lvalue,
651 int ltype,
652 const char *rvalue,
653 void *data,
654 void *userdata) {
655
656 const char *whole_rvalue = rvalue;
657 unsigned n = 0;
658
659 assert(filename);
660 assert(lvalue);
661 assert(rvalue);
662
663 arg_join_controllers = strv_free_free(arg_join_controllers);
664
665 for (;;) {
666 _cleanup_free_ char *word = NULL;
667 char **l;
668 int r;
669
670 r = extract_first_word(&rvalue, &word, NULL, EXTRACT_QUOTES);
671 if (r < 0) {
672 log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, whole_rvalue);
673 return r;
674 }
675 if (r == 0)
676 break;
677
678 l = strv_split(word, ",");
679 if (!l)
680 return log_oom();
681 strv_uniq(l);
682
683 if (strv_length(l) <= 1) {
684 strv_free(l);
685 continue;
686 }
687
688 if (!arg_join_controllers) {
689 arg_join_controllers = new(char**, 2);
690 if (!arg_join_controllers) {
691 strv_free(l);
692 return log_oom();
693 }
694
695 arg_join_controllers[0] = l;
696 arg_join_controllers[1] = NULL;
697
698 n = 1;
699 } else {
700 char ***a;
701 char ***t;
702
703 t = new0(char**, n+2);
704 if (!t) {
705 strv_free(l);
706 return log_oom();
707 }
708
709 n = 0;
710
711 for (a = arg_join_controllers; *a; a++) {
712
713 if (strv_overlap(*a, l)) {
714 if (strv_extend_strv(&l, *a, false) < 0) {
715 strv_free(l);
716 strv_free_free(t);
717 return log_oom();
718 }
719
720 } else {
721 char **c;
722
723 c = strv_copy(*a);
724 if (!c) {
725 strv_free(l);
726 strv_free_free(t);
727 return log_oom();
728 }
729
730 t[n++] = c;
731 }
732 }
733
734 t[n++] = strv_uniq(l);
735
736 strv_free_free(arg_join_controllers);
737 arg_join_controllers = t;
738 }
739 }
740 if (!isempty(rvalue))
741 log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
742
743 return 0;
744 }
745
746 static int parse_config_file(void) {
747
748 const ConfigTableItem items[] = {
749 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
750 { "Manager", "LogTarget", config_parse_target, 0, NULL },
751 { "Manager", "LogColor", config_parse_color, 0, NULL },
752 { "Manager", "LogLocation", config_parse_location, 0, NULL },
753 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
754 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, NULL },
755 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, NULL },
756 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
757 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
758 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
759 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL },
760 { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers },
761 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
762 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog },
763 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
764 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
765 #if HAVE_SECCOMP
766 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
767 #endif
768 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
769 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
770 { "Manager", "DefaultStandardOutput", config_parse_output_restricted,0, &arg_default_std_output },
771 { "Manager", "DefaultStandardError", config_parse_output_restricted,0, &arg_default_std_error },
772 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
773 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
774 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
775 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
776 { "Manager", "DefaultStartLimitIntervalSec",config_parse_sec, 0, &arg_default_start_limit_interval },
777 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
778 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
779 { "Manager", "DefaultLimitCPU", config_parse_limit, RLIMIT_CPU, arg_default_rlimit },
780 { "Manager", "DefaultLimitFSIZE", config_parse_limit, RLIMIT_FSIZE, arg_default_rlimit },
781 { "Manager", "DefaultLimitDATA", config_parse_limit, RLIMIT_DATA, arg_default_rlimit },
782 { "Manager", "DefaultLimitSTACK", config_parse_limit, RLIMIT_STACK, arg_default_rlimit },
783 { "Manager", "DefaultLimitCORE", config_parse_limit, RLIMIT_CORE, arg_default_rlimit },
784 { "Manager", "DefaultLimitRSS", config_parse_limit, RLIMIT_RSS, arg_default_rlimit },
785 { "Manager", "DefaultLimitNOFILE", config_parse_limit, RLIMIT_NOFILE, arg_default_rlimit },
786 { "Manager", "DefaultLimitAS", config_parse_limit, RLIMIT_AS, arg_default_rlimit },
787 { "Manager", "DefaultLimitNPROC", config_parse_limit, RLIMIT_NPROC, arg_default_rlimit },
788 { "Manager", "DefaultLimitMEMLOCK", config_parse_limit, RLIMIT_MEMLOCK, arg_default_rlimit },
789 { "Manager", "DefaultLimitLOCKS", config_parse_limit, RLIMIT_LOCKS, arg_default_rlimit },
790 { "Manager", "DefaultLimitSIGPENDING", config_parse_limit, RLIMIT_SIGPENDING, arg_default_rlimit },
791 { "Manager", "DefaultLimitMSGQUEUE", config_parse_limit, RLIMIT_MSGQUEUE, arg_default_rlimit },
792 { "Manager", "DefaultLimitNICE", config_parse_limit, RLIMIT_NICE, arg_default_rlimit },
793 { "Manager", "DefaultLimitRTPRIO", config_parse_limit, RLIMIT_RTPRIO, arg_default_rlimit },
794 { "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit },
795 { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
796 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
797 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
798 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
799 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
800 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
801 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
802 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
803 {}
804 };
805
806 const char *fn, *conf_dirs_nulstr;
807
808 fn = arg_system ?
809 PKGSYSCONFDIR "/system.conf" :
810 PKGSYSCONFDIR "/user.conf";
811
812 conf_dirs_nulstr = arg_system ?
813 CONF_PATHS_NULSTR("systemd/system.conf.d") :
814 CONF_PATHS_NULSTR("systemd/user.conf.d");
815
816 (void) config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, CONFIG_PARSE_WARN, NULL);
817
818 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
819 * like everywhere else. */
820 if (arg_default_timeout_start_usec <= 0)
821 arg_default_timeout_start_usec = USEC_INFINITY;
822 if (arg_default_timeout_stop_usec <= 0)
823 arg_default_timeout_stop_usec = USEC_INFINITY;
824
825 return 0;
826 }
827
828 static void set_manager_defaults(Manager *m) {
829
830 assert(m);
831
832 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
833 m->default_std_output = arg_default_std_output;
834 m->default_std_error = arg_default_std_error;
835 m->default_timeout_start_usec = arg_default_timeout_start_usec;
836 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
837 m->default_restart_usec = arg_default_restart_usec;
838 m->default_start_limit_interval = arg_default_start_limit_interval;
839 m->default_start_limit_burst = arg_default_start_limit_burst;
840 m->default_cpu_accounting = arg_default_cpu_accounting;
841 m->default_io_accounting = arg_default_io_accounting;
842 m->default_ip_accounting = arg_default_ip_accounting;
843 m->default_blockio_accounting = arg_default_blockio_accounting;
844 m->default_memory_accounting = arg_default_memory_accounting;
845 m->default_tasks_accounting = arg_default_tasks_accounting;
846 m->default_tasks_max = arg_default_tasks_max;
847
848 manager_set_default_rlimits(m, arg_default_rlimit);
849 manager_environment_add(m, NULL, arg_default_environment);
850 }
851
852 static void set_manager_settings(Manager *m) {
853
854 assert(m);
855
856 m->confirm_spawn = arg_confirm_spawn;
857 m->runtime_watchdog = arg_runtime_watchdog;
858 m->shutdown_watchdog = arg_shutdown_watchdog;
859 m->cad_burst_action = arg_cad_burst_action;
860
861 manager_set_show_status(m, arg_show_status);
862 }
863
864 static int parse_argv(int argc, char *argv[]) {
865
866 enum {
867 ARG_LOG_LEVEL = 0x100,
868 ARG_LOG_TARGET,
869 ARG_LOG_COLOR,
870 ARG_LOG_LOCATION,
871 ARG_UNIT,
872 ARG_SYSTEM,
873 ARG_USER,
874 ARG_TEST,
875 ARG_NO_PAGER,
876 ARG_VERSION,
877 ARG_DUMP_CONFIGURATION_ITEMS,
878 ARG_DUMP_CORE,
879 ARG_CRASH_CHVT,
880 ARG_CRASH_SHELL,
881 ARG_CRASH_REBOOT,
882 ARG_CONFIRM_SPAWN,
883 ARG_SHOW_STATUS,
884 ARG_DESERIALIZE,
885 ARG_SWITCHED_ROOT,
886 ARG_DEFAULT_STD_OUTPUT,
887 ARG_DEFAULT_STD_ERROR,
888 ARG_MACHINE_ID
889 };
890
891 static const struct option options[] = {
892 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
893 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
894 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
895 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
896 { "unit", required_argument, NULL, ARG_UNIT },
897 { "system", no_argument, NULL, ARG_SYSTEM },
898 { "user", no_argument, NULL, ARG_USER },
899 { "test", no_argument, NULL, ARG_TEST },
900 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
901 { "help", no_argument, NULL, 'h' },
902 { "version", no_argument, NULL, ARG_VERSION },
903 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
904 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
905 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
906 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
907 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
908 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
909 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
910 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
911 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
912 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
913 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
914 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
915 {}
916 };
917
918 int c, r;
919
920 assert(argc >= 1);
921 assert(argv);
922
923 if (getpid_cached() == 1)
924 opterr = 0;
925
926 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
927
928 switch (c) {
929
930 case ARG_LOG_LEVEL:
931 r = log_set_max_level_from_string(optarg);
932 if (r < 0) {
933 log_error("Failed to parse log level %s.", optarg);
934 return r;
935 }
936
937 break;
938
939 case ARG_LOG_TARGET:
940 r = log_set_target_from_string(optarg);
941 if (r < 0) {
942 log_error("Failed to parse log target %s.", optarg);
943 return r;
944 }
945
946 break;
947
948 case ARG_LOG_COLOR:
949
950 if (optarg) {
951 r = log_show_color_from_string(optarg);
952 if (r < 0) {
953 log_error("Failed to parse log color setting %s.", optarg);
954 return r;
955 }
956 } else
957 log_show_color(true);
958
959 break;
960
961 case ARG_LOG_LOCATION:
962 if (optarg) {
963 r = log_show_location_from_string(optarg);
964 if (r < 0) {
965 log_error("Failed to parse log location setting %s.", optarg);
966 return r;
967 }
968 } else
969 log_show_location(true);
970
971 break;
972
973 case ARG_DEFAULT_STD_OUTPUT:
974 r = exec_output_from_string(optarg);
975 if (r < 0) {
976 log_error("Failed to parse default standard output setting %s.", optarg);
977 return r;
978 } else
979 arg_default_std_output = r;
980 break;
981
982 case ARG_DEFAULT_STD_ERROR:
983 r = exec_output_from_string(optarg);
984 if (r < 0) {
985 log_error("Failed to parse default standard error output setting %s.", optarg);
986 return r;
987 } else
988 arg_default_std_error = r;
989 break;
990
991 case ARG_UNIT:
992 r = free_and_strdup(&arg_default_unit, optarg);
993 if (r < 0)
994 return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
995
996 break;
997
998 case ARG_SYSTEM:
999 arg_system = true;
1000 break;
1001
1002 case ARG_USER:
1003 arg_system = false;
1004 break;
1005
1006 case ARG_TEST:
1007 arg_action = ACTION_TEST;
1008 break;
1009
1010 case ARG_NO_PAGER:
1011 arg_no_pager = true;
1012 break;
1013
1014 case ARG_VERSION:
1015 arg_action = ACTION_VERSION;
1016 break;
1017
1018 case ARG_DUMP_CONFIGURATION_ITEMS:
1019 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
1020 break;
1021
1022 case ARG_DUMP_CORE:
1023 if (!optarg)
1024 arg_dump_core = true;
1025 else {
1026 r = parse_boolean(optarg);
1027 if (r < 0)
1028 return log_error_errno(r, "Failed to parse dump core boolean: %s", optarg);
1029 arg_dump_core = r;
1030 }
1031 break;
1032
1033 case ARG_CRASH_CHVT:
1034 r = parse_crash_chvt(optarg);
1035 if (r < 0)
1036 return log_error_errno(r, "Failed to parse crash virtual terminal index: %s", optarg);
1037 break;
1038
1039 case ARG_CRASH_SHELL:
1040 if (!optarg)
1041 arg_crash_shell = true;
1042 else {
1043 r = parse_boolean(optarg);
1044 if (r < 0)
1045 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
1046 arg_crash_shell = r;
1047 }
1048 break;
1049
1050 case ARG_CRASH_REBOOT:
1051 if (!optarg)
1052 arg_crash_reboot = true;
1053 else {
1054 r = parse_boolean(optarg);
1055 if (r < 0)
1056 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
1057 arg_crash_reboot = r;
1058 }
1059 break;
1060
1061 case ARG_CONFIRM_SPAWN:
1062 arg_confirm_spawn = mfree(arg_confirm_spawn);
1063
1064 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
1065 if (r < 0)
1066 return log_error_errno(r, "Failed to parse confirm spawn option: %m");
1067 break;
1068
1069 case ARG_SHOW_STATUS:
1070 if (optarg) {
1071 r = parse_show_status(optarg, &arg_show_status);
1072 if (r < 0) {
1073 log_error("Failed to parse show status boolean %s.", optarg);
1074 return r;
1075 }
1076 } else
1077 arg_show_status = SHOW_STATUS_YES;
1078 break;
1079
1080 case ARG_DESERIALIZE: {
1081 int fd;
1082 FILE *f;
1083
1084 r = safe_atoi(optarg, &fd);
1085 if (r < 0 || fd < 0) {
1086 log_error("Failed to parse deserialize option %s.", optarg);
1087 return -EINVAL;
1088 }
1089
1090 (void) fd_cloexec(fd, true);
1091
1092 f = fdopen(fd, "r");
1093 if (!f)
1094 return log_error_errno(errno, "Failed to open serialization fd: %m");
1095
1096 safe_fclose(arg_serialization);
1097 arg_serialization = f;
1098
1099 break;
1100 }
1101
1102 case ARG_SWITCHED_ROOT:
1103 arg_switched_root = true;
1104 break;
1105
1106 case ARG_MACHINE_ID:
1107 r = set_machine_id(optarg);
1108 if (r < 0)
1109 return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
1110 break;
1111
1112 case 'h':
1113 arg_action = ACTION_HELP;
1114 break;
1115
1116 case 'D':
1117 log_set_max_level(LOG_DEBUG);
1118 break;
1119
1120 case 'b':
1121 case 's':
1122 case 'z':
1123 /* Just to eat away the sysvinit kernel
1124 * cmdline args without getopt() error
1125 * messages that we'll parse in
1126 * parse_proc_cmdline_word() or ignore. */
1127
1128 case '?':
1129 if (getpid_cached() != 1)
1130 return -EINVAL;
1131 else
1132 return 0;
1133
1134 default:
1135 assert_not_reached("Unhandled option code.");
1136 }
1137
1138 if (optind < argc && getpid_cached() != 1) {
1139 /* Hmm, when we aren't run as init system
1140 * let's complain about excess arguments */
1141
1142 log_error("Excess arguments.");
1143 return -EINVAL;
1144 }
1145
1146 return 0;
1147 }
1148
1149 static int help(void) {
1150
1151 printf("%s [OPTIONS...]\n\n"
1152 "Starts up and maintains the system or user services.\n\n"
1153 " -h --help Show this help\n"
1154 " --version Show version\n"
1155 " --test Determine startup sequence, dump it and exit\n"
1156 " --no-pager Do not pipe output into a pager\n"
1157 " --dump-configuration-items Dump understood unit configuration items\n"
1158 " --unit=UNIT Set default unit\n"
1159 " --system Run a system instance, even if PID != 1\n"
1160 " --user Run a user instance\n"
1161 " --dump-core[=BOOL] Dump core on crash\n"
1162 " --crash-vt=NR Change to specified VT on crash\n"
1163 " --crash-reboot[=BOOL] Reboot on crash\n"
1164 " --crash-shell[=BOOL] Run shell on crash\n"
1165 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1166 " --show-status[=BOOL] Show status updates on the console during bootup\n"
1167 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
1168 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1169 " --log-color[=BOOL] Highlight important log messages\n"
1170 " --log-location[=BOOL] Include code location in log messages\n"
1171 " --default-standard-output= Set default standard output for services\n"
1172 " --default-standard-error= Set default standard error output for services\n",
1173 program_invocation_short_name);
1174
1175 return 0;
1176 }
1177
1178 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1179 _cleanup_fdset_free_ FDSet *fds = NULL;
1180 _cleanup_fclose_ FILE *f = NULL;
1181 int r;
1182
1183 assert(m);
1184 assert(_f);
1185 assert(_fds);
1186
1187 r = manager_open_serialization(m, &f);
1188 if (r < 0)
1189 return log_error_errno(r, "Failed to create serialization file: %m");
1190
1191 /* Make sure nothing is really destructed when we shut down */
1192 m->n_reloading++;
1193 bus_manager_send_reloading(m, true);
1194
1195 fds = fdset_new();
1196 if (!fds)
1197 return log_oom();
1198
1199 r = manager_serialize(m, f, fds, switching_root);
1200 if (r < 0)
1201 return log_error_errno(r, "Failed to serialize state: %m");
1202
1203 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1204 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
1205
1206 r = fd_cloexec(fileno(f), false);
1207 if (r < 0)
1208 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1209
1210 r = fdset_cloexec(fds, false);
1211 if (r < 0)
1212 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1213
1214 *_f = f;
1215 *_fds = fds;
1216
1217 f = NULL;
1218 fds = NULL;
1219
1220 return 0;
1221 }
1222
1223 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1224 struct rlimit nl;
1225 int r;
1226 int min_max;
1227 _cleanup_free_ char *nr_open = NULL;
1228
1229 assert(saved_rlimit);
1230
1231 /* Save the original RLIMIT_NOFILE so that we can reset it
1232 * later when transitioning from the initrd to the main
1233 * systemd or suchlike. */
1234 if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
1235 return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
1236
1237 /* Make sure forked processes get the default kernel setting */
1238 if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1239 struct rlimit *rl;
1240
1241 rl = newdup(struct rlimit, saved_rlimit, 1);
1242 if (!rl)
1243 return log_oom();
1244
1245 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1246 }
1247
1248 /* Get current RLIMIT_NOFILE maximum compiled into the kernel. */
1249 r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
1250 if (r >= 0)
1251 r = safe_atoi(nr_open, &min_max);
1252 /* If we fail, fallback to the hard-coded kernel limit of 1024 * 1024. */
1253 if (r < 0)
1254 min_max = 1024 * 1024;
1255
1256 /* Bump up the resource limit for ourselves substantially */
1257 nl.rlim_cur = nl.rlim_max = min_max;
1258 r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1259 if (r < 0)
1260 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1261
1262 return 0;
1263 }
1264
1265 static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
1266 int r;
1267
1268 assert(saved_rlimit);
1269 assert(getuid() == 0);
1270
1271 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
1272 * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
1273 * bump the value high enough for the root user. */
1274
1275 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
1276 return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
1277
1278 r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
1279 if (r < 0)
1280 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1281
1282 return 0;
1283 }
1284
1285 static void test_usr(void) {
1286
1287 /* Check that /usr is not a separate fs */
1288
1289 if (dir_is_empty("/usr") <= 0)
1290 return;
1291
1292 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1293 "Some things will probably break (sometimes even silently) in mysterious ways. "
1294 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1295 }
1296
1297 static int initialize_join_controllers(void) {
1298 /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1299 * + "net_prio". We'd like to add "cpuset" to the mix, but
1300 * "cpuset" doesn't really work for groups with no initialized
1301 * attributes. */
1302
1303 arg_join_controllers = new(char**, 3);
1304 if (!arg_join_controllers)
1305 return -ENOMEM;
1306
1307 arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1308 if (!arg_join_controllers[0])
1309 goto oom;
1310
1311 arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1312 if (!arg_join_controllers[1])
1313 goto oom;
1314
1315 arg_join_controllers[2] = NULL;
1316 return 0;
1317
1318 oom:
1319 arg_join_controllers = strv_free_free(arg_join_controllers);
1320 return -ENOMEM;
1321 }
1322
1323 static int enforce_syscall_archs(Set *archs) {
1324 #if HAVE_SECCOMP
1325 int r;
1326
1327 if (!is_seccomp_available())
1328 return 0;
1329
1330 r = seccomp_restrict_archs(arg_syscall_archs);
1331 if (r < 0)
1332 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
1333 #endif
1334 return 0;
1335 }
1336
1337 static int status_welcome(void) {
1338 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1339 int r;
1340
1341 r = parse_env_file("/etc/os-release", NEWLINE,
1342 "PRETTY_NAME", &pretty_name,
1343 "ANSI_COLOR", &ansi_color,
1344 NULL);
1345 if (r == -ENOENT)
1346 r = parse_env_file("/usr/lib/os-release", NEWLINE,
1347 "PRETTY_NAME", &pretty_name,
1348 "ANSI_COLOR", &ansi_color,
1349 NULL);
1350
1351 if (r < 0 && r != -ENOENT)
1352 log_warning_errno(r, "Failed to read os-release file: %m");
1353
1354 if (log_get_show_color())
1355 return status_printf(NULL, false, false,
1356 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1357 isempty(ansi_color) ? "1" : ansi_color,
1358 isempty(pretty_name) ? "Linux" : pretty_name);
1359 else
1360 return status_printf(NULL, false, false,
1361 "\nWelcome to %s!\n",
1362 isempty(pretty_name) ? "Linux" : pretty_name);
1363 }
1364
1365 static int write_container_id(void) {
1366 const char *c;
1367 int r;
1368
1369 c = getenv("container");
1370 if (isempty(c))
1371 return 0;
1372
1373 RUN_WITH_UMASK(0022)
1374 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
1375 if (r < 0)
1376 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
1377
1378 return 1;
1379 }
1380
1381 static int bump_unix_max_dgram_qlen(void) {
1382 _cleanup_free_ char *qlen = NULL;
1383 unsigned long v;
1384 int r;
1385
1386 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel
1387 * default of 16 is simply too low. We set the value really
1388 * really early during boot, so that it is actually applied to
1389 * all our sockets, including the $NOTIFY_SOCKET one. */
1390
1391 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1392 if (r < 0)
1393 return log_warning_errno(r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1394
1395 r = safe_atolu(qlen, &v);
1396 if (r < 0)
1397 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length, ignoring: %m");
1398
1399 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1400 return 0;
1401
1402 qlen = mfree(qlen);
1403 if (asprintf(&qlen, "%lu\n", DEFAULT_UNIX_MAX_DGRAM_QLEN) < 0)
1404 return log_oom();
1405
1406 r = write_string_file("/proc/sys/net/unix/max_dgram_qlen", qlen, 0);
1407 if (r < 0)
1408 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1409 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1410
1411 return 1;
1412 }
1413
1414 static int fixup_environment(void) {
1415 _cleanup_free_ char *term = NULL;
1416 const char *t;
1417 int r;
1418
1419 /* We expect the environment to be set correctly
1420 * if run inside a container. */
1421 if (detect_container() > 0)
1422 return 0;
1423
1424 /* When started as PID1, the kernel uses /dev/console
1425 * for our stdios and uses TERM=linux whatever the
1426 * backend device used by the console. We try to make
1427 * a better guess here since some consoles might not
1428 * have support for color mode for example.
1429 *
1430 * However if TERM was configured through the kernel
1431 * command line then leave it alone. */
1432
1433 r = proc_cmdline_get_key("TERM", 0, &term);
1434 if (r < 0)
1435 return r;
1436
1437 t = term ?: default_term_for_tty("/dev/console");
1438
1439 if (setenv("TERM", t, 1) < 0)
1440 return -errno;
1441
1442 return 0;
1443 }
1444
1445 static void redirect_telinit(int argc, char *argv[]) {
1446
1447 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1448
1449 #if HAVE_SYSV_COMPAT
1450 if (getpid_cached() == 1)
1451 return;
1452
1453 if (!strstr(program_invocation_short_name, "init"))
1454 return;
1455
1456 execv(SYSTEMCTL_BINARY_PATH, argv);
1457 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1458 exit(1);
1459 #endif
1460 }
1461
1462 static int become_shutdown(
1463 const char *shutdown_verb,
1464 int retval) {
1465
1466 char log_level[DECIMAL_STR_MAX(int) + 1],
1467 exit_code[DECIMAL_STR_MAX(uint8_t) + 1];
1468
1469 const char* command_line[11] = {
1470 SYSTEMD_SHUTDOWN_BINARY_PATH,
1471 shutdown_verb,
1472 "--log-level", log_level,
1473 "--log-target",
1474 };
1475
1476 _cleanup_strv_free_ char **env_block = NULL;
1477 size_t pos = 5;
1478 int r;
1479
1480 assert(shutdown_verb);
1481 assert(!command_line[pos]);
1482 env_block = strv_copy(environ);
1483
1484 xsprintf(log_level, "%d", log_get_max_level());
1485
1486 switch (log_get_target()) {
1487
1488 case LOG_TARGET_KMSG:
1489 case LOG_TARGET_JOURNAL_OR_KMSG:
1490 case LOG_TARGET_SYSLOG_OR_KMSG:
1491 command_line[pos++] = "kmsg";
1492 break;
1493
1494 case LOG_TARGET_NULL:
1495 command_line[pos++] = "null";
1496 break;
1497
1498 case LOG_TARGET_CONSOLE:
1499 default:
1500 command_line[pos++] = "console";
1501 break;
1502 };
1503
1504 if (log_get_show_color())
1505 command_line[pos++] = "--log-color";
1506
1507 if (log_get_show_location())
1508 command_line[pos++] = "--log-location";
1509
1510 if (streq(shutdown_verb, "exit")) {
1511 command_line[pos++] = "--exit-code";
1512 command_line[pos++] = exit_code;
1513 xsprintf(exit_code, "%d", retval);
1514 }
1515
1516 assert(pos < ELEMENTSOF(command_line));
1517
1518 if (streq(shutdown_verb, "reboot") &&
1519 arg_shutdown_watchdog > 0 &&
1520 arg_shutdown_watchdog != USEC_INFINITY) {
1521
1522 char *e;
1523
1524 /* If we reboot let's set the shutdown
1525 * watchdog and tell the shutdown binary to
1526 * repeatedly ping it */
1527 r = watchdog_set_timeout(&arg_shutdown_watchdog);
1528 watchdog_close(r < 0);
1529
1530 /* Tell the binary how often to ping, ignore failure */
1531 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
1532 (void) strv_consume(&env_block, e);
1533
1534 if (arg_watchdog_device &&
1535 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1536 (void) strv_consume(&env_block, e);
1537 } else
1538 watchdog_close(true);
1539
1540 /* Avoid the creation of new processes forked by the
1541 * kernel; at this point, we will not listen to the
1542 * signals anyway */
1543 if (detect_container() <= 0)
1544 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1545
1546 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1547 return -errno;
1548 }
1549
1550 static void initialize_clock(void) {
1551 int r;
1552
1553 if (clock_is_localtime(NULL) > 0) {
1554 int min;
1555
1556 /*
1557 * The very first call of settimeofday() also does a time warp in the kernel.
1558 *
1559 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1560 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1561 * the RTC alone if the registry tells that the RTC runs in UTC.
1562 */
1563 r = clock_set_timezone(&min);
1564 if (r < 0)
1565 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1566 else
1567 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1568
1569 } else if (!in_initrd()) {
1570 /*
1571 * Do a dummy very first call to seal the kernel's time warp magic.
1572 *
1573 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1574 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1575 * until we reach the real system.
1576 *
1577 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1578 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1579 * be treated as UTC that way.
1580 */
1581 (void) clock_reset_timewarp();
1582 }
1583
1584 r = clock_apply_epoch();
1585 if (r < 0)
1586 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1587 else if (r > 0)
1588 log_info("System time before build time, advancing clock.");
1589 }
1590
1591 static void initialize_coredump(bool skip_setup) {
1592
1593 if (getpid_cached() != 1)
1594 return;
1595
1596 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1597 * will process core dumps for system services by default. */
1598 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1599 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1600
1601 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1602 * until the systemd-coredump tool is enabled via sysctl. */
1603 if (!skip_setup)
1604 (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
1605 }
1606
1607 static void do_reexecute(
1608 int argc,
1609 char *argv[],
1610 const struct rlimit *saved_rlimit_nofile,
1611 const struct rlimit *saved_rlimit_memlock,
1612 FDSet *fds,
1613 const char *switch_root_dir,
1614 const char *switch_root_init,
1615 const char **ret_error_message) {
1616
1617 unsigned i, j, args_size;
1618 const char **args;
1619 int r;
1620
1621 assert(saved_rlimit_nofile);
1622 assert(saved_rlimit_memlock);
1623 assert(ret_error_message);
1624
1625 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1626 * we do that */
1627 watchdog_close(true);
1628
1629 /* Reset the RLIMIT_NOFILE to the kernel default, so that the new systemd can pass the kernel default to its
1630 * child processes */
1631
1632 if (saved_rlimit_nofile->rlim_cur > 0)
1633 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
1634 if (saved_rlimit_memlock->rlim_cur != (rlim_t) -1)
1635 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1636
1637 if (switch_root_dir) {
1638 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1639 * SIGCHLD for them after deserializing. */
1640 broadcast_signal(SIGTERM, false, true);
1641
1642 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1643 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1644 if (r < 0)
1645 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1646 }
1647
1648 args_size = MAX(6, argc+1);
1649 args = newa(const char*, args_size);
1650
1651 if (!switch_root_init) {
1652 char sfd[DECIMAL_STR_MAX(int) + 1];
1653
1654 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1655 * the user didn't specify an explicit init to spawn. */
1656
1657 assert(arg_serialization);
1658 assert(fds);
1659
1660 xsprintf(sfd, "%i", fileno(arg_serialization));
1661
1662 i = 0;
1663 args[i++] = SYSTEMD_BINARY_PATH;
1664 if (switch_root_dir)
1665 args[i++] = "--switched-root";
1666 args[i++] = arg_system ? "--system" : "--user";
1667 args[i++] = "--deserialize";
1668 args[i++] = sfd;
1669 args[i++] = NULL;
1670
1671 assert(i <= args_size);
1672
1673 /*
1674 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1675 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1676 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1677 * before proceeding into the exec().
1678 */
1679 valgrind_summary_hack();
1680
1681 (void) execv(args[0], (char* const*) args);
1682 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1683 }
1684
1685 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1686 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1687 * doesn't matter.) */
1688
1689 arg_serialization = safe_fclose(arg_serialization);
1690 fds = fdset_free(fds);
1691
1692 /* Reopen the console */
1693 (void) make_console_stdio();
1694
1695 for (j = 1, i = 1; j < (unsigned) argc; j++)
1696 args[i++] = argv[j];
1697 args[i++] = NULL;
1698 assert(i <= args_size);
1699
1700 /* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */
1701 (void) reset_all_signal_handlers();
1702 (void) reset_signal_mask();
1703
1704 if (switch_root_init) {
1705 args[0] = switch_root_init;
1706 (void) execv(args[0], (char* const*) args);
1707 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1708 }
1709
1710 args[0] = "/sbin/init";
1711 (void) execv(args[0], (char* const*) args);
1712 r = -errno;
1713
1714 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1715 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1716 "Failed to execute /sbin/init");
1717
1718 if (r == -ENOENT) {
1719 log_warning("No /sbin/init, trying fallback");
1720
1721 args[0] = "/bin/sh";
1722 args[1] = NULL;
1723 (void) execv(args[0], (char* const*) args);
1724 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1725 } else
1726 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1727
1728 *ret_error_message = "Failed to execute fallback shell";
1729 }
1730
1731 static int invoke_main_loop(
1732 Manager *m,
1733 bool *ret_reexecute,
1734 int *ret_retval, /* Return parameters relevant for shutting down */
1735 const char **ret_shutdown_verb, /* … */
1736 FDSet **ret_fds, /* Return parameters for reexecuting */
1737 char **ret_switch_root_dir, /* … */
1738 char **ret_switch_root_init, /* … */
1739 const char **ret_error_message) {
1740
1741 int r;
1742
1743 assert(m);
1744 assert(ret_reexecute);
1745 assert(ret_retval);
1746 assert(ret_shutdown_verb);
1747 assert(ret_fds);
1748 assert(ret_switch_root_dir);
1749 assert(ret_switch_root_init);
1750 assert(ret_error_message);
1751
1752 for (;;) {
1753 r = manager_loop(m);
1754 if (r < 0) {
1755 *ret_error_message = "Failed to run main loop";
1756 return log_emergency_errno(r, "Failed to run main loop: %m");
1757 }
1758
1759 switch (m->exit_code) {
1760
1761 case MANAGER_RELOAD:
1762 log_info("Reloading.");
1763
1764 r = parse_config_file();
1765 if (r < 0)
1766 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
1767
1768 set_manager_defaults(m);
1769
1770 r = manager_reload(m);
1771 if (r < 0)
1772 log_warning_errno(r, "Failed to reload, ignoring: %m");
1773
1774 break;
1775
1776 case MANAGER_REEXECUTE:
1777
1778 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1779 if (r < 0) {
1780 *ret_error_message = "Failed to prepare for reexecution";
1781 return r;
1782 }
1783
1784 log_notice("Reexecuting.");
1785
1786 *ret_reexecute = true;
1787 *ret_retval = EXIT_SUCCESS;
1788 *ret_shutdown_verb = NULL;
1789 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1790
1791 return 0;
1792
1793 case MANAGER_SWITCH_ROOT:
1794 if (!m->switch_root_init) {
1795 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1796 if (r < 0) {
1797 *ret_error_message = "Failed to prepare for reexecution";
1798 return r;
1799 }
1800 } else
1801 *ret_fds = NULL;
1802
1803 log_notice("Switching root.");
1804
1805 *ret_reexecute = true;
1806 *ret_retval = EXIT_SUCCESS;
1807 *ret_shutdown_verb = NULL;
1808
1809 /* Steal the switch root parameters */
1810 *ret_switch_root_dir = m->switch_root;
1811 *ret_switch_root_init = m->switch_root_init;
1812 m->switch_root = m->switch_root_init = NULL;
1813
1814 return 0;
1815
1816 case MANAGER_EXIT:
1817
1818 if (MANAGER_IS_USER(m)) {
1819 log_debug("Exit.");
1820
1821 *ret_reexecute = false;
1822 *ret_retval = m->return_value;
1823 *ret_shutdown_verb = NULL;
1824 *ret_fds = NULL;
1825 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1826
1827 return 0;
1828 }
1829
1830 _fallthrough_;
1831 case MANAGER_REBOOT:
1832 case MANAGER_POWEROFF:
1833 case MANAGER_HALT:
1834 case MANAGER_KEXEC: {
1835 static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1836 [MANAGER_EXIT] = "exit",
1837 [MANAGER_REBOOT] = "reboot",
1838 [MANAGER_POWEROFF] = "poweroff",
1839 [MANAGER_HALT] = "halt",
1840 [MANAGER_KEXEC] = "kexec"
1841 };
1842
1843 log_notice("Shutting down.");
1844
1845 *ret_reexecute = false;
1846 *ret_retval = m->return_value;
1847 assert_se(*ret_shutdown_verb = table[m->exit_code]);
1848 *ret_fds = NULL;
1849 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1850
1851 return 0;
1852 }
1853
1854 default:
1855 assert_not_reached("Unknown exit code.");
1856 }
1857 }
1858 }
1859
1860 static void log_execution_mode(bool *ret_first_boot) {
1861 assert(ret_first_boot);
1862
1863 if (arg_system) {
1864 int v;
1865
1866 log_info(PACKAGE_STRING " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
1867 arg_action == ACTION_TEST ? "test " : "" );
1868
1869 v = detect_virtualization();
1870 if (v > 0)
1871 log_info("Detected virtualization %s.", virtualization_to_string(v));
1872
1873 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
1874
1875 if (in_initrd()) {
1876 *ret_first_boot = false;
1877 log_info("Running in initial RAM disk.");
1878 } else {
1879 /* Let's check whether we are in first boot, i.e. whether /etc is still unpopulated. We use
1880 * /etc/machine-id as flag file, for this: if it exists we assume /etc is populated, if it
1881 * doesn't it's unpopulated. This allows container managers and installers to provision a
1882 * couple of files already. If the container manager wants to provision the machine ID itself
1883 * it should pass $container_uuid to PID 1. */
1884
1885 *ret_first_boot = access("/etc/machine-id", F_OK) < 0;
1886 if (*ret_first_boot)
1887 log_info("Running with unpopulated /etc.");
1888 }
1889 } else {
1890 _cleanup_free_ char *t;
1891
1892 t = uid_to_name(getuid());
1893 log_debug(PACKAGE_STRING " running in %suser mode for user " UID_FMT "/%s. (" SYSTEMD_FEATURES ")",
1894 arg_action == ACTION_TEST ? " test" : "", getuid(), strna(t));
1895
1896 *ret_first_boot = false;
1897 }
1898 }
1899
1900 static int initialize_runtime(
1901 bool skip_setup,
1902 struct rlimit *saved_rlimit_nofile,
1903 struct rlimit *saved_rlimit_memlock,
1904 const char **ret_error_message) {
1905
1906 int r;
1907
1908 assert(ret_error_message);
1909
1910 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
1911 *
1912 * - Some only apply to --system instances
1913 * - Some only apply to --user instances
1914 * - Some only apply when we first start up, but not when we reexecute
1915 */
1916
1917 if (arg_system && !skip_setup) {
1918 if (arg_show_status > 0)
1919 status_welcome();
1920
1921 hostname_setup();
1922 machine_id_setup(NULL, arg_machine_id, NULL);
1923 loopback_setup();
1924 bump_unix_max_dgram_qlen();
1925 test_usr();
1926 write_container_id();
1927 }
1928
1929 if (arg_system && arg_watchdog_device) {
1930 r = watchdog_set_device(arg_watchdog_device);
1931 if (r < 0)
1932 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
1933 arg_watchdog_device);
1934 }
1935
1936 if (arg_system && arg_runtime_watchdog > 0 && arg_runtime_watchdog != USEC_INFINITY)
1937 watchdog_set_timeout(&arg_runtime_watchdog);
1938
1939 if (arg_timer_slack_nsec != NSEC_INFINITY)
1940 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1941 log_error_errno(errno, "Failed to adjust timer slack: %m");
1942
1943 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
1944 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
1945 if (r < 0) {
1946 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
1947 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
1948 }
1949
1950 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
1951 if (r < 0) {
1952 *ret_error_message = "Failed to drop capability bounding set";
1953 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
1954 }
1955 }
1956
1957 if (arg_syscall_archs) {
1958 r = enforce_syscall_archs(arg_syscall_archs);
1959 if (r < 0) {
1960 *ret_error_message = "Failed to set syscall architectures";
1961 return r;
1962 }
1963 }
1964
1965 if (!arg_system)
1966 /* Become reaper of our children */
1967 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
1968 log_warning_errno(errno, "Failed to make us a subreaper: %m");
1969
1970 if (arg_system) {
1971 /* Bump up RLIMIT_NOFILE for systemd itself */
1972 (void) bump_rlimit_nofile(saved_rlimit_nofile);
1973 (void) bump_rlimit_memlock(saved_rlimit_memlock);
1974 }
1975
1976 return 0;
1977 }
1978
1979 static int do_queue_default_job(
1980 Manager *m,
1981 const char **ret_error_message) {
1982
1983 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
1984 Job *default_unit_job;
1985 Unit *target = NULL;
1986 int r;
1987
1988 log_debug("Activating default unit: %s", arg_default_unit);
1989
1990 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1991 if (r < 0)
1992 log_error("Failed to load default target: %s", bus_error_message(&error, r));
1993 else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND))
1994 log_error_errno(target->load_error, "Failed to load default target: %m");
1995 else if (target->load_state == UNIT_MASKED)
1996 log_error("Default target masked.");
1997
1998 if (!target || target->load_state != UNIT_LOADED) {
1999 log_info("Trying to load rescue target...");
2000
2001 r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
2002 if (r < 0) {
2003 *ret_error_message = "Failed to load rescue target";
2004 return log_emergency_errno(r, "Failed to load rescue target: %s", bus_error_message(&error, r));
2005 } else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND)) {
2006 *ret_error_message = "Failed to load rescue target";
2007 return log_emergency_errno(target->load_error, "Failed to load rescue target: %m");
2008 } else if (target->load_state == UNIT_MASKED) {
2009 *ret_error_message = "Rescue target masked";
2010 log_emergency("Rescue target masked.");
2011 return -ERFKILL;
2012 }
2013 }
2014
2015 assert(target->load_state == UNIT_LOADED);
2016
2017 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, &error, &default_unit_job);
2018 if (r == -EPERM) {
2019 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2020
2021 sd_bus_error_free(&error);
2022
2023 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, &error, &default_unit_job);
2024 if (r < 0) {
2025 *ret_error_message = "Failed to start default target";
2026 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
2027 }
2028
2029 } else if (r < 0) {
2030 *ret_error_message = "Failed to isolate default target";
2031 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
2032 }
2033
2034 m->default_unit_job_id = default_unit_job->id;
2035
2036 return 0;
2037 }
2038
2039 static void free_arguments(void) {
2040 size_t j;
2041
2042 /* Frees all arg_* variables, with the exception of arg_serialization */
2043
2044 for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++)
2045 arg_default_rlimit[j] = mfree(arg_default_rlimit[j]);
2046
2047 arg_default_unit = mfree(arg_default_unit);
2048 arg_confirm_spawn = mfree(arg_confirm_spawn);
2049 arg_join_controllers = strv_free_free(arg_join_controllers);
2050 arg_default_environment = strv_free(arg_default_environment);
2051 arg_syscall_archs = set_free(arg_syscall_archs);
2052 }
2053
2054 static int load_configuration(int argc, char **argv, const char **ret_error_message) {
2055 int r;
2056
2057 assert(ret_error_message);
2058
2059 arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
2060
2061 r = parse_config_file();
2062 if (r < 0) {
2063 *ret_error_message = "Failed to parse config file";
2064 return r;
2065 }
2066
2067 if (arg_system) {
2068 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2069 if (r < 0)
2070 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2071 }
2072
2073 /* Note that this also parses bits from the kernel command line, including "debug". */
2074 log_parse_environment();
2075
2076 r = parse_argv(argc, argv);
2077 if (r < 0) {
2078 *ret_error_message = "Failed to parse commandline arguments";
2079 return r;
2080 }
2081
2082 /* Initialize default unit */
2083 if (!arg_default_unit) {
2084 arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
2085 if (!arg_default_unit) {
2086 *ret_error_message = "Failed to set default unit";
2087 return log_oom();
2088 }
2089 }
2090
2091 return 0;
2092 }
2093
2094 static int safety_checks(void) {
2095
2096 if (arg_action == ACTION_TEST &&
2097 geteuid() == 0) {
2098 log_error("Don't run test mode as root.");
2099 return -EPERM;
2100 }
2101
2102 if (!arg_system &&
2103 arg_action == ACTION_RUN &&
2104 sd_booted() <= 0) {
2105 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
2106 return -EOPNOTSUPP;
2107 }
2108
2109 if (!arg_system &&
2110 arg_action == ACTION_RUN &&
2111 !getenv("XDG_RUNTIME_DIR")) {
2112 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2113 return -EUNATCH;
2114 }
2115
2116 if (arg_system &&
2117 arg_action == ACTION_RUN &&
2118 running_in_chroot() > 0) {
2119 log_error("Cannot be run in a chroot() environment.");
2120 return -EOPNOTSUPP;
2121 }
2122
2123 return 0;
2124 }
2125
2126 static int initialize_security(
2127 bool *loaded_policy,
2128 dual_timestamp *security_start_timestamp,
2129 dual_timestamp *security_finish_timestamp,
2130 const char **ret_error_message) {
2131
2132 int r;
2133
2134 assert(loaded_policy);
2135 assert(security_start_timestamp);
2136 assert(security_finish_timestamp);
2137 assert(ret_error_message);
2138
2139 dual_timestamp_get(security_start_timestamp);
2140
2141 r = mac_selinux_setup(loaded_policy) < 0;
2142 if (r < 0) {
2143 *ret_error_message = "Failed to load SELinux policy";
2144 return r;
2145 }
2146
2147 r = mac_smack_setup(loaded_policy);
2148 if (r < 0) {
2149 *ret_error_message = "Failed to load SMACK policy";
2150 return r;
2151 }
2152
2153 r = ima_setup();
2154 if (r < 0) {
2155 *ret_error_message = "Failed to load IMA policy";
2156 return r;
2157 }
2158
2159 dual_timestamp_get(security_finish_timestamp);
2160 return 0;
2161 }
2162
2163 int main(int argc, char *argv[]) {
2164 Manager *m = NULL;
2165 int r, retval = EXIT_FAILURE;
2166 usec_t before_startup, after_startup;
2167 char timespan[FORMAT_TIMESPAN_MAX];
2168 FDSet *fds = NULL;
2169 bool reexecute = false;
2170 const char *shutdown_verb = NULL;
2171 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL;
2172 dual_timestamp userspace_timestamp = DUAL_TIMESTAMP_NULL;
2173 dual_timestamp kernel_timestamp = DUAL_TIMESTAMP_NULL;
2174 dual_timestamp security_start_timestamp = DUAL_TIMESTAMP_NULL;
2175 dual_timestamp security_finish_timestamp = DUAL_TIMESTAMP_NULL;
2176 static char systemd[] = "systemd";
2177 bool skip_setup = false;
2178 bool loaded_policy = false;
2179 bool queue_default_job = false;
2180 bool first_boot = false;
2181 char *switch_root_dir = NULL, *switch_root_init = NULL;
2182 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
2183 const char *error_message = NULL;
2184
2185 redirect_telinit(argc, argv);
2186
2187 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2188 dual_timestamp_get(&userspace_timestamp);
2189
2190 /* Determine if this is a reexecution or normal bootup. We do
2191 * the full command line parsing much later, so let's just
2192 * have a quick peek here. */
2193 if (strv_find(argv+1, "--deserialize"))
2194 skip_setup = true;
2195
2196 /* If we have switched root, do all the special setup
2197 * things */
2198 if (strv_find(argv+1, "--switched-root"))
2199 skip_setup = false;
2200
2201 /* If we get started via the /sbin/init symlink then we are
2202 called 'init'. After a subsequent reexecution we are then
2203 called 'systemd'. That is confusing, hence let's call us
2204 systemd right-away. */
2205 program_invocation_short_name = systemd;
2206 (void) prctl(PR_SET_NAME, systemd);
2207
2208 saved_argv = argv;
2209 saved_argc = argc;
2210
2211 log_set_upgrade_syslog_to_journal(true);
2212
2213 if (getpid_cached() == 1) {
2214 /* Disable the umask logic */
2215 umask(0);
2216
2217 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2218 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2219 * child process right before execve()'ing the actual binary, at a point in time where socket
2220 * activation stderr/stdout area already set up. */
2221 log_set_always_reopen_console(true);
2222 }
2223
2224 if (getpid_cached() == 1 && detect_container() <= 0) {
2225
2226 /* Running outside of a container as PID 1 */
2227 arg_system = true;
2228 log_set_target(LOG_TARGET_KMSG);
2229 log_open();
2230
2231 if (in_initrd())
2232 initrd_timestamp = userspace_timestamp;
2233
2234 if (!skip_setup) {
2235 r = mount_setup_early();
2236 if (r < 0) {
2237 error_message = "Failed to mount early API filesystems";
2238 goto finish;
2239 }
2240
2241 r = initialize_security(
2242 &loaded_policy,
2243 &security_start_timestamp,
2244 &security_finish_timestamp,
2245 &error_message);
2246 if (r < 0)
2247 goto finish;
2248 }
2249
2250 if (mac_selinux_init() < 0) {
2251 error_message = "Failed to initialize SELinux policy";
2252 goto finish;
2253 }
2254
2255 if (!skip_setup)
2256 initialize_clock();
2257
2258 /* Set the default for later on, but don't actually
2259 * open the logs like this for now. Note that if we
2260 * are transitioning from the initrd there might still
2261 * be journal fd open, and we shouldn't attempt
2262 * opening that before we parsed /proc/cmdline which
2263 * might redirect output elsewhere. */
2264 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2265
2266 } else if (getpid_cached() == 1) {
2267 /* Running inside a container, as PID 1 */
2268 arg_system = true;
2269 log_set_target(LOG_TARGET_CONSOLE);
2270 log_close_console(); /* force reopen of /dev/console */
2271 log_open();
2272
2273 /* For later on, see above... */
2274 log_set_target(LOG_TARGET_JOURNAL);
2275
2276 /* clear the kernel timestamp,
2277 * because we are in a container */
2278 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2279 } else {
2280 /* Running as user instance */
2281 arg_system = false;
2282 log_set_target(LOG_TARGET_AUTO);
2283 log_open();
2284
2285 /* clear the kernel timestamp,
2286 * because we are not PID 1 */
2287 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2288 }
2289
2290 initialize_coredump(skip_setup);
2291
2292 if (arg_system) {
2293 if (fixup_environment() < 0) {
2294 error_message = "Failed to fix up PID1 environment";
2295 goto finish;
2296 }
2297
2298 /* Try to figure out if we can use colors with the console. No
2299 * need to do that for user instances since they never log
2300 * into the console. */
2301 log_show_color(colors_enabled());
2302 r = make_null_stdio();
2303 if (r < 0)
2304 log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
2305 }
2306
2307 r = initialize_join_controllers();
2308 if (r < 0) {
2309 error_message = "Failed to initialize cgroup controllers";
2310 goto finish;
2311 }
2312
2313 /* Mount /proc, /sys and friends, so that /proc/cmdline and
2314 * /proc/$PID/fd is available. */
2315 if (getpid_cached() == 1) {
2316
2317 /* Load the kernel modules early. */
2318 if (!skip_setup)
2319 kmod_setup();
2320
2321 r = mount_setup(loaded_policy);
2322 if (r < 0) {
2323 error_message = "Failed to mount API filesystems";
2324 goto finish;
2325 }
2326 }
2327
2328 /* Reset all signal handlers. */
2329 (void) reset_all_signal_handlers();
2330 (void) ignore_signals(SIGNALS_IGNORE, -1);
2331
2332 r = load_configuration(argc, argv, &error_message);
2333 if (r < 0)
2334 goto finish;
2335
2336 r = safety_checks();
2337 if (r < 0)
2338 goto finish;
2339
2340 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS))
2341 pager_open(arg_no_pager, false);
2342
2343 if (arg_action != ACTION_RUN)
2344 skip_setup = true;
2345
2346 if (arg_action == ACTION_HELP) {
2347 retval = help();
2348 goto finish;
2349 } else if (arg_action == ACTION_VERSION) {
2350 retval = version();
2351 goto finish;
2352 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
2353 unit_dump_config_items(stdout);
2354 retval = EXIT_SUCCESS;
2355 goto finish;
2356 }
2357
2358 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
2359
2360 /* Close logging fds, in order not to confuse fdset below */
2361 log_close();
2362
2363 /* Remember open file descriptors for later deserialization */
2364 if (arg_action == ACTION_RUN) {
2365 r = fdset_new_fill(&fds);
2366 if (r < 0) {
2367 log_emergency_errno(r, "Failed to allocate fd set: %m");
2368 error_message = "Failed to allocate fd set";
2369 goto finish;
2370 } else
2371 fdset_cloexec(fds, true);
2372
2373 if (arg_serialization)
2374 assert_se(fdset_remove(fds, fileno(arg_serialization)) >= 0);
2375
2376 if (arg_system)
2377 /* Become a session leader if we aren't one yet. */
2378 setsid();
2379 }
2380
2381 /* Move out of the way, so that we won't block unmounts */
2382 assert_se(chdir("/") == 0);
2383
2384 /* Reset the console, but only if this is really init and we
2385 * are freshly booted */
2386 if (arg_system && arg_action == ACTION_RUN) {
2387
2388 /* If we are init, we connect stdin/stdout/stderr to
2389 * /dev/null and make sure we don't have a controlling
2390 * tty. */
2391 release_terminal();
2392
2393 if (getpid_cached() == 1 && !skip_setup)
2394 console_setup();
2395 }
2396
2397 /* Open the logging devices, if possible and necessary */
2398 log_open();
2399
2400 if (arg_show_status == _SHOW_STATUS_UNSET)
2401 arg_show_status = SHOW_STATUS_YES;
2402
2403 /* Make sure we leave a core dump without panicing the
2404 * kernel. */
2405 if (getpid_cached() == 1) {
2406 install_crash_handler();
2407
2408 r = mount_cgroup_controllers(arg_join_controllers);
2409 if (r < 0)
2410 goto finish;
2411 }
2412
2413 log_execution_mode(&first_boot);
2414
2415 if (arg_action == ACTION_RUN) {
2416 r = initialize_runtime(skip_setup,
2417 &saved_rlimit_nofile,
2418 &saved_rlimit_memlock,
2419 &error_message);
2420 if (r < 0)
2421 goto finish;
2422 }
2423
2424 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2425 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2426 &m);
2427 if (r < 0) {
2428 log_emergency_errno(r, "Failed to allocate manager object: %m");
2429 error_message = "Failed to allocate manager object";
2430 goto finish;
2431 }
2432
2433 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2434 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2435 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
2436 m->timestamps[MANAGER_TIMESTAMP_SECURITY_START] = security_start_timestamp;
2437 m->timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH] = security_finish_timestamp;
2438
2439 set_manager_defaults(m);
2440 set_manager_settings(m);
2441 manager_set_first_boot(m, first_boot);
2442
2443 /* Remember whether we should queue the default job */
2444 queue_default_job = !arg_serialization || arg_switched_root;
2445
2446 before_startup = now(CLOCK_MONOTONIC);
2447
2448 r = manager_startup(m, arg_serialization, fds);
2449 if (r < 0) {
2450 log_error_errno(r, "Failed to fully start up daemon: %m");
2451 error_message = "Failed to start up manager";
2452 goto finish;
2453 }
2454
2455 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2456 fds = fdset_free(fds);
2457 arg_serialization = safe_fclose(arg_serialization);
2458
2459 if (queue_default_job) {
2460 r = do_queue_default_job(m, &error_message);
2461 if (r < 0)
2462 goto finish;
2463 }
2464
2465 after_startup = now(CLOCK_MONOTONIC);
2466
2467 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2468 "Loaded units and determined initial transaction in %s.",
2469 format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
2470
2471 if (arg_system) {
2472 _cleanup_free_ char *taint;
2473
2474 taint = manager_taint_string(m);
2475 if (!isempty(taint))
2476 log_struct(LOG_NOTICE,
2477 LOG_MESSAGE("System is tainted: %s", taint),
2478 "TAINT=%s", taint,
2479 "MESSAGE_ID=" SD_MESSAGE_TAINTED_STR,
2480 NULL);
2481 }
2482
2483 if (arg_action == ACTION_TEST) {
2484 printf("-> By units:\n");
2485 manager_dump_units(m, stdout, "\t");
2486
2487 printf("-> By jobs:\n");
2488 manager_dump_jobs(m, stdout, "\t");
2489 retval = EXIT_SUCCESS;
2490 goto finish;
2491 }
2492
2493 r = invoke_main_loop(m,
2494 &reexecute,
2495 &retval,
2496 &shutdown_verb,
2497 &fds,
2498 &switch_root_dir,
2499 &switch_root_init,
2500 &error_message);
2501
2502 finish:
2503 pager_close();
2504
2505 if (m)
2506 arg_shutdown_watchdog = m->shutdown_watchdog;
2507
2508 m = manager_free(m);
2509
2510 free_arguments();
2511 mac_selinux_finish();
2512
2513 if (reexecute)
2514 do_reexecute(argc, argv,
2515 &saved_rlimit_nofile,
2516 &saved_rlimit_memlock,
2517 fds,
2518 switch_root_dir,
2519 switch_root_init,
2520 &error_message); /* This only returns if reexecution failed */
2521
2522 arg_serialization = safe_fclose(arg_serialization);
2523 fds = fdset_free(fds);
2524
2525 #if HAVE_VALGRIND_VALGRIND_H
2526 /* If we are PID 1 and running under valgrind, then let's exit
2527 * here explicitly. valgrind will only generate nice output on
2528 * exit(), not on exec(), hence let's do the former not the
2529 * latter here. */
2530 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2531 /* Cleanup watchdog_device strings for valgrind. We need them
2532 * in become_shutdown() so normally we cannot free them yet. */
2533 watchdog_free_device();
2534 arg_watchdog_device = mfree(arg_watchdog_device);
2535 return 0;
2536 }
2537 #endif
2538
2539 if (shutdown_verb) {
2540 r = become_shutdown(shutdown_verb, retval);
2541
2542 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
2543 error_message = "Failed to execute shutdown binary";
2544 }
2545
2546 watchdog_free_device();
2547 arg_watchdog_device = mfree(arg_watchdog_device);
2548
2549 if (getpid_cached() == 1) {
2550 if (error_message)
2551 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2552 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
2553 "%s, freezing.", error_message);
2554 freeze_or_reboot();
2555 }
2556
2557 return retval;
2558 }