]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/main.c
meson: add -Dmemory-accounting-default=true|false
[thirdparty/systemd.git] / src / core / main.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <getopt.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <sys/mount.h>
28 #include <sys/prctl.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32 #if HAVE_SECCOMP
33 #include <seccomp.h>
34 #endif
35 #if HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-bus.h"
40 #include "sd-daemon.h"
41 #include "sd-messages.h"
42
43 #include "alloc-util.h"
44 #include "architecture.h"
45 #include "build.h"
46 #include "bus-error.h"
47 #include "bus-util.h"
48 #include "capability-util.h"
49 #include "clock-util.h"
50 #include "conf-parser.h"
51 #include "cpu-set-util.h"
52 #include "dbus-manager.h"
53 #include "def.h"
54 #include "emergency-action.h"
55 #include "env-util.h"
56 #include "fd-util.h"
57 #include "fdset.h"
58 #include "fileio.h"
59 #include "format-util.h"
60 #include "fs-util.h"
61 #include "hostname-setup.h"
62 #include "ima-setup.h"
63 #include "killall.h"
64 #include "kmod-setup.h"
65 #include "load-fragment.h"
66 #include "log.h"
67 #include "loopback-setup.h"
68 #include "machine-id-setup.h"
69 #include "manager.h"
70 #include "missing.h"
71 #include "mount-setup.h"
72 #include "pager.h"
73 #include "parse-util.h"
74 #include "path-util.h"
75 #include "proc-cmdline.h"
76 #include "process-util.h"
77 #include "raw-clone.h"
78 #include "rlimit-util.h"
79 #if HAVE_SECCOMP
80 #include "seccomp-util.h"
81 #endif
82 #include "selinux-setup.h"
83 #include "selinux-util.h"
84 #include "signal-util.h"
85 #include "smack-setup.h"
86 #include "special.h"
87 #include "stat-util.h"
88 #include "stdio-util.h"
89 #include "strv.h"
90 #include "switch-root.h"
91 #include "terminal-util.h"
92 #include "umask-util.h"
93 #include "user-util.h"
94 #include "util.h"
95 #include "virt.h"
96 #include "watchdog.h"
97
98 static enum {
99 ACTION_RUN,
100 ACTION_HELP,
101 ACTION_VERSION,
102 ACTION_TEST,
103 ACTION_DUMP_CONFIGURATION_ITEMS
104 } arg_action = ACTION_RUN;
105 static char *arg_default_unit = NULL;
106 static bool arg_system = false;
107 static bool arg_dump_core = true;
108 static int arg_crash_chvt = -1;
109 static bool arg_crash_shell = false;
110 static bool arg_crash_reboot = false;
111 static char *arg_confirm_spawn = NULL;
112 static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
113 static bool arg_switched_root = false;
114 static bool arg_no_pager = false;
115 static bool arg_service_watchdogs = true;
116 static char ***arg_join_controllers = NULL;
117 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
118 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
119 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
120 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
121 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
122 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
123 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
124 static usec_t arg_runtime_watchdog = 0;
125 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
126 static char *arg_watchdog_device = NULL;
127 static char **arg_default_environment = NULL;
128 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
129 static uint64_t arg_capability_bounding_set = CAP_ALL;
130 static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
131 static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
132 static Set* arg_syscall_archs = NULL;
133 static FILE* arg_serialization = NULL;
134 static bool arg_default_cpu_accounting = false;
135 static bool arg_default_io_accounting = false;
136 static bool arg_default_ip_accounting = false;
137 static bool arg_default_blockio_accounting = false;
138 static bool arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
139 static bool arg_default_tasks_accounting = true;
140 static uint64_t arg_default_tasks_max = UINT64_MAX;
141 static sd_id128_t arg_machine_id = {};
142 static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
143
144 noreturn static void freeze_or_reboot(void) {
145
146 if (arg_crash_reboot) {
147 log_notice("Rebooting in 10s...");
148 (void) sleep(10);
149
150 log_notice("Rebooting now...");
151 (void) reboot(RB_AUTOBOOT);
152 log_emergency_errno(errno, "Failed to reboot: %m");
153 }
154
155 log_emergency("Freezing execution.");
156 freeze();
157 }
158
159 noreturn static void crash(int sig) {
160 struct sigaction sa;
161 pid_t pid;
162
163 if (getpid_cached() != 1)
164 /* Pass this on immediately, if this is not PID 1 */
165 (void) raise(sig);
166 else if (!arg_dump_core)
167 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
168 else {
169 sa = (struct sigaction) {
170 .sa_handler = nop_signal_handler,
171 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
172 };
173
174 /* We want to wait for the core process, hence let's enable SIGCHLD */
175 (void) sigaction(SIGCHLD, &sa, NULL);
176
177 pid = raw_clone(SIGCHLD);
178 if (pid < 0)
179 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
180 else if (pid == 0) {
181 /* Enable default signal handler for core dump */
182
183 sa = (struct sigaction) {
184 .sa_handler = SIG_DFL,
185 };
186 (void) sigaction(sig, &sa, NULL);
187
188 /* Don't limit the coredump size */
189 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
190
191 /* Just to be sure... */
192 (void) chdir("/");
193
194 /* Raise the signal again */
195 pid = raw_getpid();
196 (void) kill(pid, sig); /* raise() would kill the parent */
197
198 assert_not_reached("We shouldn't be here...");
199 _exit(EXIT_FAILURE);
200 } else {
201 siginfo_t status;
202 int r;
203
204 /* Order things nicely. */
205 r = wait_for_terminate(pid, &status);
206 if (r < 0)
207 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
208 else if (status.si_code != CLD_DUMPED)
209 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
210 signal_to_string(sig),
211 pid, sigchld_code_to_string(status.si_code),
212 status.si_status,
213 strna(status.si_code == CLD_EXITED
214 ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
215 : signal_to_string(status.si_status)));
216 else
217 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
218 }
219 }
220
221 if (arg_crash_chvt >= 0)
222 (void) chvt(arg_crash_chvt);
223
224 sa = (struct sigaction) {
225 .sa_handler = SIG_IGN,
226 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
227 };
228
229 /* Let the kernel reap children for us */
230 (void) sigaction(SIGCHLD, &sa, NULL);
231
232 if (arg_crash_shell) {
233 log_notice("Executing crash shell in 10s...");
234 (void) sleep(10);
235
236 pid = raw_clone(SIGCHLD);
237 if (pid < 0)
238 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
239 else if (pid == 0) {
240 (void) setsid();
241 (void) make_console_stdio();
242 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
243
244 log_emergency_errno(errno, "execle() failed: %m");
245 _exit(EXIT_FAILURE);
246 } else {
247 log_info("Spawned crash shell as PID "PID_FMT".", pid);
248 (void) wait_for_terminate(pid, NULL);
249 }
250 }
251
252 freeze_or_reboot();
253 }
254
255 static void install_crash_handler(void) {
256 static const struct sigaction sa = {
257 .sa_handler = crash,
258 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
259 };
260 int r;
261
262 /* We ignore the return value here, since, we don't mind if we
263 * cannot set up a crash handler */
264 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
265 if (r < 0)
266 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
267 }
268
269 static int console_setup(void) {
270 _cleanup_close_ int tty_fd = -1;
271 int r;
272
273 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
274 if (tty_fd < 0)
275 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
276
277 /* We don't want to force text mode. plymouth may be showing
278 * pictures already from initrd. */
279 r = reset_terminal_fd(tty_fd, false);
280 if (r < 0)
281 return log_error_errno(r, "Failed to reset /dev/console: %m");
282
283 return 0;
284 }
285
286 static int parse_crash_chvt(const char *value) {
287 int b;
288
289 if (safe_atoi(value, &arg_crash_chvt) >= 0)
290 return 0;
291
292 b = parse_boolean(value);
293 if (b < 0)
294 return b;
295
296 if (b > 0)
297 arg_crash_chvt = 0; /* switch to where kmsg goes */
298 else
299 arg_crash_chvt = -1; /* turn off switching */
300
301 return 0;
302 }
303
304 static int parse_confirm_spawn(const char *value, char **console) {
305 char *s;
306 int r;
307
308 r = value ? parse_boolean(value) : 1;
309 if (r == 0) {
310 *console = NULL;
311 return 0;
312 }
313
314 if (r > 0) /* on with default tty */
315 s = strdup("/dev/console");
316 else if (is_path(value)) /* on with fully qualified path */
317 s = strdup(value);
318 else /* on with only a tty file name, not a fully qualified path */
319 s = strjoin("/dev/", value);
320 if (!s)
321 return -ENOMEM;
322 *console = s;
323 return 0;
324 }
325
326 static int set_machine_id(const char *m) {
327 sd_id128_t t;
328 assert(m);
329
330 if (sd_id128_from_string(m, &t) < 0)
331 return -EINVAL;
332
333 if (sd_id128_is_null(t))
334 return -EINVAL;
335
336 arg_machine_id = t;
337 return 0;
338 }
339
340 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
341
342 int r;
343
344 assert(key);
345
346 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
347
348 if (proc_cmdline_value_missing(key, value))
349 return 0;
350
351 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
352 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
353 else if (in_initrd() == !!startswith(key, "rd.")) {
354 if (free_and_strdup(&arg_default_unit, value) < 0)
355 return log_oom();
356 }
357
358 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
359
360 r = value ? parse_boolean(value) : true;
361 if (r < 0)
362 log_warning("Failed to parse dump core switch %s. Ignoring.", value);
363 else
364 arg_dump_core = r;
365
366 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
367
368 if (!value)
369 arg_crash_chvt = 0; /* turn on */
370 else if (parse_crash_chvt(value) < 0)
371 log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
372
373 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
374
375 r = value ? parse_boolean(value) : true;
376 if (r < 0)
377 log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
378 else
379 arg_crash_shell = r;
380
381 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
382
383 r = value ? parse_boolean(value) : true;
384 if (r < 0)
385 log_warning("Failed to parse crash reboot switch %s. Ignoring.", value);
386 else
387 arg_crash_reboot = r;
388
389 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
390 char *s;
391
392 r = parse_confirm_spawn(value, &s);
393 if (r < 0)
394 log_warning_errno(r, "Failed to parse confirm_spawn switch %s. Ignoring.", value);
395 else {
396 free(arg_confirm_spawn);
397 arg_confirm_spawn = s;
398 }
399
400 } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
401
402 r = value ? parse_boolean(value) : true;
403 if (r < 0)
404 log_warning("Failed to parse service watchdog switch %s. Ignoring.", value);
405 else
406 arg_service_watchdogs = r;
407
408 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
409
410 if (value) {
411 r = parse_show_status(value, &arg_show_status);
412 if (r < 0)
413 log_warning("Failed to parse show status switch %s. Ignoring.", value);
414 } else
415 arg_show_status = SHOW_STATUS_YES;
416
417 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
418
419 if (proc_cmdline_value_missing(key, value))
420 return 0;
421
422 r = exec_output_from_string(value);
423 if (r < 0)
424 log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
425 else
426 arg_default_std_output = r;
427
428 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
429
430 if (proc_cmdline_value_missing(key, value))
431 return 0;
432
433 r = exec_output_from_string(value);
434 if (r < 0)
435 log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
436 else
437 arg_default_std_error = r;
438
439 } else if (streq(key, "systemd.setenv")) {
440
441 if (proc_cmdline_value_missing(key, value))
442 return 0;
443
444 if (env_assignment_is_valid(value)) {
445 char **env;
446
447 env = strv_env_set(arg_default_environment, value);
448 if (!env)
449 return log_oom();
450
451 arg_default_environment = env;
452 } else
453 log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
454
455 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
456
457 if (proc_cmdline_value_missing(key, value))
458 return 0;
459
460 r = set_machine_id(value);
461 if (r < 0)
462 log_warning("MachineID '%s' is not valid. Ignoring.", value);
463
464 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
465
466 if (proc_cmdline_value_missing(key, value))
467 return 0;
468
469 r = parse_sec(value, &arg_default_timeout_start_usec);
470 if (r < 0)
471 log_warning_errno(r, "Failed to parse default start timeout: %s, ignoring.", value);
472
473 if (arg_default_timeout_start_usec <= 0)
474 arg_default_timeout_start_usec = USEC_INFINITY;
475
476 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
477
478 if (proc_cmdline_value_missing(key, value))
479 return 0;
480
481 parse_path_argument_and_warn(value, false, &arg_watchdog_device);
482
483 } else if (streq(key, "quiet") && !value) {
484
485 if (arg_show_status == _SHOW_STATUS_UNSET)
486 arg_show_status = SHOW_STATUS_AUTO;
487
488 } else if (streq(key, "debug") && !value) {
489
490 /* Note that log_parse_environment() handles 'debug'
491 * too, and sets the log level to LOG_DEBUG. */
492
493 if (detect_container() > 0)
494 log_set_target(LOG_TARGET_CONSOLE);
495
496 } else if (!value) {
497 const char *target;
498
499 /* SysV compatibility */
500 target = runlevel_to_target(key);
501 if (target)
502 return free_and_strdup(&arg_default_unit, target);
503 }
504
505 return 0;
506 }
507
508 #define DEFINE_SETTER(name, func, descr) \
509 static int name(const char *unit, \
510 const char *filename, \
511 unsigned line, \
512 const char *section, \
513 unsigned section_line, \
514 const char *lvalue, \
515 int ltype, \
516 const char *rvalue, \
517 void *data, \
518 void *userdata) { \
519 \
520 int r; \
521 \
522 assert(filename); \
523 assert(lvalue); \
524 assert(rvalue); \
525 \
526 r = func(rvalue); \
527 if (r < 0) \
528 log_syntax(unit, LOG_ERR, filename, line, r, \
529 "Invalid " descr "'%s': %m", \
530 rvalue); \
531 \
532 return 0; \
533 }
534
535 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
536 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
537 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
538 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
539
540 static int config_parse_cpu_affinity2(
541 const char *unit,
542 const char *filename,
543 unsigned line,
544 const char *section,
545 unsigned section_line,
546 const char *lvalue,
547 int ltype,
548 const char *rvalue,
549 void *data,
550 void *userdata) {
551
552 _cleanup_cpu_free_ cpu_set_t *c = NULL;
553 int ncpus;
554
555 ncpus = parse_cpu_set_and_warn(rvalue, &c, unit, filename, line, lvalue);
556 if (ncpus < 0)
557 return ncpus;
558
559 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
560 log_warning_errno(errno, "Failed to set CPU affinity: %m");
561
562 return 0;
563 }
564
565 static int config_parse_show_status(
566 const char* unit,
567 const char *filename,
568 unsigned line,
569 const char *section,
570 unsigned section_line,
571 const char *lvalue,
572 int ltype,
573 const char *rvalue,
574 void *data,
575 void *userdata) {
576
577 int k;
578 ShowStatus *b = data;
579
580 assert(filename);
581 assert(lvalue);
582 assert(rvalue);
583 assert(data);
584
585 k = parse_show_status(rvalue, b);
586 if (k < 0) {
587 log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse show status setting, ignoring: %s", rvalue);
588 return 0;
589 }
590
591 return 0;
592 }
593
594 static int config_parse_output_restricted(
595 const char* unit,
596 const char *filename,
597 unsigned line,
598 const char *section,
599 unsigned section_line,
600 const char *lvalue,
601 int ltype,
602 const char *rvalue,
603 void *data,
604 void *userdata) {
605
606 ExecOutput t, *eo = data;
607
608 assert(filename);
609 assert(lvalue);
610 assert(rvalue);
611 assert(data);
612
613 t = exec_output_from_string(rvalue);
614 if (t < 0) {
615 log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output type, ignoring: %s", rvalue);
616 return 0;
617 }
618
619 if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE)) {
620 log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file: are not supported as defaults, ignoring: %s", rvalue);
621 return 0;
622 }
623
624 *eo = t;
625 return 0;
626 }
627
628 static int config_parse_crash_chvt(
629 const char* unit,
630 const char *filename,
631 unsigned line,
632 const char *section,
633 unsigned section_line,
634 const char *lvalue,
635 int ltype,
636 const char *rvalue,
637 void *data,
638 void *userdata) {
639
640 int r;
641
642 assert(filename);
643 assert(lvalue);
644 assert(rvalue);
645
646 r = parse_crash_chvt(rvalue);
647 if (r < 0) {
648 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CrashChangeVT= setting, ignoring: %s", rvalue);
649 return 0;
650 }
651
652 return 0;
653 }
654
655 static int config_parse_join_controllers(const char *unit,
656 const char *filename,
657 unsigned line,
658 const char *section,
659 unsigned section_line,
660 const char *lvalue,
661 int ltype,
662 const char *rvalue,
663 void *data,
664 void *userdata) {
665
666 const char *whole_rvalue = rvalue;
667 unsigned n = 0;
668
669 assert(filename);
670 assert(lvalue);
671 assert(rvalue);
672
673 arg_join_controllers = strv_free_free(arg_join_controllers);
674
675 for (;;) {
676 _cleanup_free_ char *word = NULL;
677 char **l;
678 int r;
679
680 r = extract_first_word(&rvalue, &word, NULL, EXTRACT_QUOTES);
681 if (r < 0) {
682 log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, whole_rvalue);
683 return r;
684 }
685 if (r == 0)
686 break;
687
688 l = strv_split(word, ",");
689 if (!l)
690 return log_oom();
691 strv_uniq(l);
692
693 if (strv_length(l) <= 1) {
694 strv_free(l);
695 continue;
696 }
697
698 if (!arg_join_controllers) {
699 arg_join_controllers = new(char**, 2);
700 if (!arg_join_controllers) {
701 strv_free(l);
702 return log_oom();
703 }
704
705 arg_join_controllers[0] = l;
706 arg_join_controllers[1] = NULL;
707
708 n = 1;
709 } else {
710 char ***a;
711 char ***t;
712
713 t = new0(char**, n+2);
714 if (!t) {
715 strv_free(l);
716 return log_oom();
717 }
718
719 n = 0;
720
721 for (a = arg_join_controllers; *a; a++) {
722
723 if (strv_overlap(*a, l)) {
724 if (strv_extend_strv(&l, *a, false) < 0) {
725 strv_free(l);
726 strv_free_free(t);
727 return log_oom();
728 }
729
730 } else {
731 char **c;
732
733 c = strv_copy(*a);
734 if (!c) {
735 strv_free(l);
736 strv_free_free(t);
737 return log_oom();
738 }
739
740 t[n++] = c;
741 }
742 }
743
744 t[n++] = strv_uniq(l);
745
746 strv_free_free(arg_join_controllers);
747 arg_join_controllers = t;
748 }
749 }
750 if (!isempty(rvalue))
751 log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
752
753 return 0;
754 }
755
756 static int parse_config_file(void) {
757
758 const ConfigTableItem items[] = {
759 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
760 { "Manager", "LogTarget", config_parse_target, 0, NULL },
761 { "Manager", "LogColor", config_parse_color, 0, NULL },
762 { "Manager", "LogLocation", config_parse_location, 0, NULL },
763 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
764 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, NULL },
765 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, NULL },
766 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
767 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
768 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
769 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL },
770 { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers },
771 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
772 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog },
773 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
774 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
775 #if HAVE_SECCOMP
776 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
777 #endif
778 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
779 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
780 { "Manager", "DefaultStandardOutput", config_parse_output_restricted,0, &arg_default_std_output },
781 { "Manager", "DefaultStandardError", config_parse_output_restricted,0, &arg_default_std_error },
782 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
783 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
784 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
785 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
786 { "Manager", "DefaultStartLimitIntervalSec",config_parse_sec, 0, &arg_default_start_limit_interval },
787 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
788 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
789 { "Manager", "DefaultLimitCPU", config_parse_limit, RLIMIT_CPU, arg_default_rlimit },
790 { "Manager", "DefaultLimitFSIZE", config_parse_limit, RLIMIT_FSIZE, arg_default_rlimit },
791 { "Manager", "DefaultLimitDATA", config_parse_limit, RLIMIT_DATA, arg_default_rlimit },
792 { "Manager", "DefaultLimitSTACK", config_parse_limit, RLIMIT_STACK, arg_default_rlimit },
793 { "Manager", "DefaultLimitCORE", config_parse_limit, RLIMIT_CORE, arg_default_rlimit },
794 { "Manager", "DefaultLimitRSS", config_parse_limit, RLIMIT_RSS, arg_default_rlimit },
795 { "Manager", "DefaultLimitNOFILE", config_parse_limit, RLIMIT_NOFILE, arg_default_rlimit },
796 { "Manager", "DefaultLimitAS", config_parse_limit, RLIMIT_AS, arg_default_rlimit },
797 { "Manager", "DefaultLimitNPROC", config_parse_limit, RLIMIT_NPROC, arg_default_rlimit },
798 { "Manager", "DefaultLimitMEMLOCK", config_parse_limit, RLIMIT_MEMLOCK, arg_default_rlimit },
799 { "Manager", "DefaultLimitLOCKS", config_parse_limit, RLIMIT_LOCKS, arg_default_rlimit },
800 { "Manager", "DefaultLimitSIGPENDING", config_parse_limit, RLIMIT_SIGPENDING, arg_default_rlimit },
801 { "Manager", "DefaultLimitMSGQUEUE", config_parse_limit, RLIMIT_MSGQUEUE, arg_default_rlimit },
802 { "Manager", "DefaultLimitNICE", config_parse_limit, RLIMIT_NICE, arg_default_rlimit },
803 { "Manager", "DefaultLimitRTPRIO", config_parse_limit, RLIMIT_RTPRIO, arg_default_rlimit },
804 { "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit },
805 { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
806 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
807 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
808 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
809 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
810 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
811 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
812 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
813 {}
814 };
815
816 const char *fn, *conf_dirs_nulstr;
817
818 fn = arg_system ?
819 PKGSYSCONFDIR "/system.conf" :
820 PKGSYSCONFDIR "/user.conf";
821
822 conf_dirs_nulstr = arg_system ?
823 CONF_PATHS_NULSTR("systemd/system.conf.d") :
824 CONF_PATHS_NULSTR("systemd/user.conf.d");
825
826 (void) config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, CONFIG_PARSE_WARN, NULL);
827
828 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
829 * like everywhere else. */
830 if (arg_default_timeout_start_usec <= 0)
831 arg_default_timeout_start_usec = USEC_INFINITY;
832 if (arg_default_timeout_stop_usec <= 0)
833 arg_default_timeout_stop_usec = USEC_INFINITY;
834
835 return 0;
836 }
837
838 static void set_manager_defaults(Manager *m) {
839
840 assert(m);
841
842 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
843 m->default_std_output = arg_default_std_output;
844 m->default_std_error = arg_default_std_error;
845 m->default_timeout_start_usec = arg_default_timeout_start_usec;
846 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
847 m->default_restart_usec = arg_default_restart_usec;
848 m->default_start_limit_interval = arg_default_start_limit_interval;
849 m->default_start_limit_burst = arg_default_start_limit_burst;
850 m->default_cpu_accounting = arg_default_cpu_accounting;
851 m->default_io_accounting = arg_default_io_accounting;
852 m->default_ip_accounting = arg_default_ip_accounting;
853 m->default_blockio_accounting = arg_default_blockio_accounting;
854 m->default_memory_accounting = arg_default_memory_accounting;
855 m->default_tasks_accounting = arg_default_tasks_accounting;
856 m->default_tasks_max = arg_default_tasks_max;
857
858 manager_set_default_rlimits(m, arg_default_rlimit);
859 manager_environment_add(m, NULL, arg_default_environment);
860 }
861
862 static void set_manager_settings(Manager *m) {
863
864 assert(m);
865
866 m->confirm_spawn = arg_confirm_spawn;
867 m->service_watchdogs = arg_service_watchdogs;
868 m->runtime_watchdog = arg_runtime_watchdog;
869 m->shutdown_watchdog = arg_shutdown_watchdog;
870 m->cad_burst_action = arg_cad_burst_action;
871
872 manager_set_show_status(m, arg_show_status);
873 }
874
875 static int parse_argv(int argc, char *argv[]) {
876
877 enum {
878 ARG_LOG_LEVEL = 0x100,
879 ARG_LOG_TARGET,
880 ARG_LOG_COLOR,
881 ARG_LOG_LOCATION,
882 ARG_UNIT,
883 ARG_SYSTEM,
884 ARG_USER,
885 ARG_TEST,
886 ARG_NO_PAGER,
887 ARG_VERSION,
888 ARG_DUMP_CONFIGURATION_ITEMS,
889 ARG_DUMP_CORE,
890 ARG_CRASH_CHVT,
891 ARG_CRASH_SHELL,
892 ARG_CRASH_REBOOT,
893 ARG_CONFIRM_SPAWN,
894 ARG_SHOW_STATUS,
895 ARG_DESERIALIZE,
896 ARG_SWITCHED_ROOT,
897 ARG_DEFAULT_STD_OUTPUT,
898 ARG_DEFAULT_STD_ERROR,
899 ARG_MACHINE_ID,
900 ARG_SERVICE_WATCHDOGS,
901 };
902
903 static const struct option options[] = {
904 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
905 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
906 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
907 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
908 { "unit", required_argument, NULL, ARG_UNIT },
909 { "system", no_argument, NULL, ARG_SYSTEM },
910 { "user", no_argument, NULL, ARG_USER },
911 { "test", no_argument, NULL, ARG_TEST },
912 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
913 { "help", no_argument, NULL, 'h' },
914 { "version", no_argument, NULL, ARG_VERSION },
915 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
916 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
917 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
918 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
919 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
920 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
921 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
922 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
923 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
924 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
925 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
926 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
927 { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
928 {}
929 };
930
931 int c, r;
932
933 assert(argc >= 1);
934 assert(argv);
935
936 if (getpid_cached() == 1)
937 opterr = 0;
938
939 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
940
941 switch (c) {
942
943 case ARG_LOG_LEVEL:
944 r = log_set_max_level_from_string(optarg);
945 if (r < 0) {
946 log_error("Failed to parse log level %s.", optarg);
947 return r;
948 }
949
950 break;
951
952 case ARG_LOG_TARGET:
953 r = log_set_target_from_string(optarg);
954 if (r < 0) {
955 log_error("Failed to parse log target %s.", optarg);
956 return r;
957 }
958
959 break;
960
961 case ARG_LOG_COLOR:
962
963 if (optarg) {
964 r = log_show_color_from_string(optarg);
965 if (r < 0) {
966 log_error("Failed to parse log color setting %s.", optarg);
967 return r;
968 }
969 } else
970 log_show_color(true);
971
972 break;
973
974 case ARG_LOG_LOCATION:
975 if (optarg) {
976 r = log_show_location_from_string(optarg);
977 if (r < 0) {
978 log_error("Failed to parse log location setting %s.", optarg);
979 return r;
980 }
981 } else
982 log_show_location(true);
983
984 break;
985
986 case ARG_DEFAULT_STD_OUTPUT:
987 r = exec_output_from_string(optarg);
988 if (r < 0) {
989 log_error("Failed to parse default standard output setting %s.", optarg);
990 return r;
991 } else
992 arg_default_std_output = r;
993 break;
994
995 case ARG_DEFAULT_STD_ERROR:
996 r = exec_output_from_string(optarg);
997 if (r < 0) {
998 log_error("Failed to parse default standard error output setting %s.", optarg);
999 return r;
1000 } else
1001 arg_default_std_error = r;
1002 break;
1003
1004 case ARG_UNIT:
1005 r = free_and_strdup(&arg_default_unit, optarg);
1006 if (r < 0)
1007 return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
1008
1009 break;
1010
1011 case ARG_SYSTEM:
1012 arg_system = true;
1013 break;
1014
1015 case ARG_USER:
1016 arg_system = false;
1017 break;
1018
1019 case ARG_TEST:
1020 arg_action = ACTION_TEST;
1021 break;
1022
1023 case ARG_NO_PAGER:
1024 arg_no_pager = true;
1025 break;
1026
1027 case ARG_VERSION:
1028 arg_action = ACTION_VERSION;
1029 break;
1030
1031 case ARG_DUMP_CONFIGURATION_ITEMS:
1032 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
1033 break;
1034
1035 case ARG_DUMP_CORE:
1036 if (!optarg)
1037 arg_dump_core = true;
1038 else {
1039 r = parse_boolean(optarg);
1040 if (r < 0)
1041 return log_error_errno(r, "Failed to parse dump core boolean: %s", optarg);
1042 arg_dump_core = r;
1043 }
1044 break;
1045
1046 case ARG_CRASH_CHVT:
1047 r = parse_crash_chvt(optarg);
1048 if (r < 0)
1049 return log_error_errno(r, "Failed to parse crash virtual terminal index: %s", optarg);
1050 break;
1051
1052 case ARG_CRASH_SHELL:
1053 if (!optarg)
1054 arg_crash_shell = true;
1055 else {
1056 r = parse_boolean(optarg);
1057 if (r < 0)
1058 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
1059 arg_crash_shell = r;
1060 }
1061 break;
1062
1063 case ARG_CRASH_REBOOT:
1064 if (!optarg)
1065 arg_crash_reboot = true;
1066 else {
1067 r = parse_boolean(optarg);
1068 if (r < 0)
1069 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
1070 arg_crash_reboot = r;
1071 }
1072 break;
1073
1074 case ARG_CONFIRM_SPAWN:
1075 arg_confirm_spawn = mfree(arg_confirm_spawn);
1076
1077 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
1078 if (r < 0)
1079 return log_error_errno(r, "Failed to parse confirm spawn option: %m");
1080 break;
1081
1082 case ARG_SERVICE_WATCHDOGS:
1083 r = parse_boolean(optarg);
1084 if (r < 0)
1085 return log_error_errno(r, "Failed to parse service watchdogs boolean: %s", optarg);
1086 arg_service_watchdogs = r;
1087 break;
1088
1089 case ARG_SHOW_STATUS:
1090 if (optarg) {
1091 r = parse_show_status(optarg, &arg_show_status);
1092 if (r < 0) {
1093 log_error("Failed to parse show status boolean %s.", optarg);
1094 return r;
1095 }
1096 } else
1097 arg_show_status = SHOW_STATUS_YES;
1098 break;
1099
1100 case ARG_DESERIALIZE: {
1101 int fd;
1102 FILE *f;
1103
1104 r = safe_atoi(optarg, &fd);
1105 if (r < 0 || fd < 0) {
1106 log_error("Failed to parse deserialize option %s.", optarg);
1107 return -EINVAL;
1108 }
1109
1110 (void) fd_cloexec(fd, true);
1111
1112 f = fdopen(fd, "r");
1113 if (!f)
1114 return log_error_errno(errno, "Failed to open serialization fd: %m");
1115
1116 safe_fclose(arg_serialization);
1117 arg_serialization = f;
1118
1119 break;
1120 }
1121
1122 case ARG_SWITCHED_ROOT:
1123 arg_switched_root = true;
1124 break;
1125
1126 case ARG_MACHINE_ID:
1127 r = set_machine_id(optarg);
1128 if (r < 0)
1129 return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
1130 break;
1131
1132 case 'h':
1133 arg_action = ACTION_HELP;
1134 break;
1135
1136 case 'D':
1137 log_set_max_level(LOG_DEBUG);
1138 break;
1139
1140 case 'b':
1141 case 's':
1142 case 'z':
1143 /* Just to eat away the sysvinit kernel
1144 * cmdline args without getopt() error
1145 * messages that we'll parse in
1146 * parse_proc_cmdline_word() or ignore. */
1147
1148 case '?':
1149 if (getpid_cached() != 1)
1150 return -EINVAL;
1151 else
1152 return 0;
1153
1154 default:
1155 assert_not_reached("Unhandled option code.");
1156 }
1157
1158 if (optind < argc && getpid_cached() != 1) {
1159 /* Hmm, when we aren't run as init system
1160 * let's complain about excess arguments */
1161
1162 log_error("Excess arguments.");
1163 return -EINVAL;
1164 }
1165
1166 return 0;
1167 }
1168
1169 static int help(void) {
1170
1171 printf("%s [OPTIONS...]\n\n"
1172 "Starts up and maintains the system or user services.\n\n"
1173 " -h --help Show this help\n"
1174 " --version Show version\n"
1175 " --test Determine startup sequence, dump it and exit\n"
1176 " --no-pager Do not pipe output into a pager\n"
1177 " --dump-configuration-items Dump understood unit configuration items\n"
1178 " --unit=UNIT Set default unit\n"
1179 " --system Run a system instance, even if PID != 1\n"
1180 " --user Run a user instance\n"
1181 " --dump-core[=BOOL] Dump core on crash\n"
1182 " --crash-vt=NR Change to specified VT on crash\n"
1183 " --crash-reboot[=BOOL] Reboot on crash\n"
1184 " --crash-shell[=BOOL] Run shell on crash\n"
1185 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1186 " --show-status[=BOOL] Show status updates on the console during bootup\n"
1187 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
1188 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1189 " --log-color[=BOOL] Highlight important log messages\n"
1190 " --log-location[=BOOL] Include code location in log messages\n"
1191 " --default-standard-output= Set default standard output for services\n"
1192 " --default-standard-error= Set default standard error output for services\n",
1193 program_invocation_short_name);
1194
1195 return 0;
1196 }
1197
1198 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1199 _cleanup_fdset_free_ FDSet *fds = NULL;
1200 _cleanup_fclose_ FILE *f = NULL;
1201 int r;
1202
1203 assert(m);
1204 assert(_f);
1205 assert(_fds);
1206
1207 r = manager_open_serialization(m, &f);
1208 if (r < 0)
1209 return log_error_errno(r, "Failed to create serialization file: %m");
1210
1211 /* Make sure nothing is really destructed when we shut down */
1212 m->n_reloading++;
1213 bus_manager_send_reloading(m, true);
1214
1215 fds = fdset_new();
1216 if (!fds)
1217 return log_oom();
1218
1219 r = manager_serialize(m, f, fds, switching_root);
1220 if (r < 0)
1221 return log_error_errno(r, "Failed to serialize state: %m");
1222
1223 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1224 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
1225
1226 r = fd_cloexec(fileno(f), false);
1227 if (r < 0)
1228 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1229
1230 r = fdset_cloexec(fds, false);
1231 if (r < 0)
1232 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1233
1234 *_f = f;
1235 *_fds = fds;
1236
1237 f = NULL;
1238 fds = NULL;
1239
1240 return 0;
1241 }
1242
1243 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1244 struct rlimit nl;
1245 int r;
1246 int min_max;
1247 _cleanup_free_ char *nr_open = NULL;
1248
1249 assert(saved_rlimit);
1250
1251 /* Save the original RLIMIT_NOFILE so that we can reset it
1252 * later when transitioning from the initrd to the main
1253 * systemd or suchlike. */
1254 if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
1255 return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
1256
1257 /* Make sure forked processes get the default kernel setting */
1258 if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1259 struct rlimit *rl;
1260
1261 rl = newdup(struct rlimit, saved_rlimit, 1);
1262 if (!rl)
1263 return log_oom();
1264
1265 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1266 }
1267
1268 /* Get current RLIMIT_NOFILE maximum compiled into the kernel. */
1269 r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
1270 if (r >= 0)
1271 r = safe_atoi(nr_open, &min_max);
1272 /* If we fail, fallback to the hard-coded kernel limit of 1024 * 1024. */
1273 if (r < 0)
1274 min_max = 1024 * 1024;
1275
1276 /* Bump up the resource limit for ourselves substantially */
1277 nl.rlim_cur = nl.rlim_max = min_max;
1278 r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1279 if (r < 0)
1280 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1281
1282 return 0;
1283 }
1284
1285 static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
1286 int r;
1287
1288 assert(saved_rlimit);
1289 assert(getuid() == 0);
1290
1291 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
1292 * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
1293 * bump the value high enough for the root user. */
1294
1295 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
1296 return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
1297
1298 r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
1299 if (r < 0)
1300 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1301
1302 return 0;
1303 }
1304
1305 static void test_usr(void) {
1306
1307 /* Check that /usr is not a separate fs */
1308
1309 if (dir_is_empty("/usr") <= 0)
1310 return;
1311
1312 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1313 "Some things will probably break (sometimes even silently) in mysterious ways. "
1314 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1315 }
1316
1317 static int initialize_join_controllers(void) {
1318 /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1319 * + "net_prio". We'd like to add "cpuset" to the mix, but
1320 * "cpuset" doesn't really work for groups with no initialized
1321 * attributes. */
1322
1323 arg_join_controllers = new(char**, 3);
1324 if (!arg_join_controllers)
1325 return -ENOMEM;
1326
1327 arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1328 if (!arg_join_controllers[0])
1329 goto oom;
1330
1331 arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1332 if (!arg_join_controllers[1])
1333 goto oom;
1334
1335 arg_join_controllers[2] = NULL;
1336 return 0;
1337
1338 oom:
1339 arg_join_controllers = strv_free_free(arg_join_controllers);
1340 return -ENOMEM;
1341 }
1342
1343 static int enforce_syscall_archs(Set *archs) {
1344 #if HAVE_SECCOMP
1345 int r;
1346
1347 if (!is_seccomp_available())
1348 return 0;
1349
1350 r = seccomp_restrict_archs(arg_syscall_archs);
1351 if (r < 0)
1352 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
1353 #endif
1354 return 0;
1355 }
1356
1357 static int status_welcome(void) {
1358 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1359 const char *fn;
1360 int r;
1361
1362 if (arg_show_status <= 0)
1363 return 0;
1364
1365 FOREACH_STRING(fn, "/etc/os-release", "/usr/lib/os-release") {
1366 r = parse_env_file(fn, NEWLINE,
1367 "PRETTY_NAME", &pretty_name,
1368 "ANSI_COLOR", &ansi_color,
1369 NULL);
1370
1371 if (r != -ENOENT)
1372 break;
1373 }
1374 if (r < 0 && r != -ENOENT)
1375 log_warning_errno(r, "Failed to read os-release file, ignoring: %m");
1376
1377 if (log_get_show_color())
1378 return status_printf(NULL, false, false,
1379 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1380 isempty(ansi_color) ? "1" : ansi_color,
1381 isempty(pretty_name) ? "Linux" : pretty_name);
1382 else
1383 return status_printf(NULL, false, false,
1384 "\nWelcome to %s!\n",
1385 isempty(pretty_name) ? "Linux" : pretty_name);
1386 }
1387
1388 static int write_container_id(void) {
1389 const char *c;
1390 int r;
1391
1392 c = getenv("container");
1393 if (isempty(c))
1394 return 0;
1395
1396 RUN_WITH_UMASK(0022)
1397 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
1398 if (r < 0)
1399 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
1400
1401 return 1;
1402 }
1403
1404 static int bump_unix_max_dgram_qlen(void) {
1405 _cleanup_free_ char *qlen = NULL;
1406 unsigned long v;
1407 int r;
1408
1409 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel
1410 * default of 16 is simply too low. We set the value really
1411 * really early during boot, so that it is actually applied to
1412 * all our sockets, including the $NOTIFY_SOCKET one. */
1413
1414 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1415 if (r < 0)
1416 return log_warning_errno(r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1417
1418 r = safe_atolu(qlen, &v);
1419 if (r < 0)
1420 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length, ignoring: %m");
1421
1422 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1423 return 0;
1424
1425 qlen = mfree(qlen);
1426 if (asprintf(&qlen, "%lu\n", DEFAULT_UNIX_MAX_DGRAM_QLEN) < 0)
1427 return log_oom();
1428
1429 r = write_string_file("/proc/sys/net/unix/max_dgram_qlen", qlen, 0);
1430 if (r < 0)
1431 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1432 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1433
1434 return 1;
1435 }
1436
1437 static int fixup_environment(void) {
1438 _cleanup_free_ char *term = NULL;
1439 const char *t;
1440 int r;
1441
1442 /* Only fix up the environment when we are started as PID 1 */
1443 if (getpid_cached() != 1)
1444 return 0;
1445
1446 /* We expect the environment to be set correctly if run inside a container. */
1447 if (detect_container() > 0)
1448 return 0;
1449
1450 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1451 * device used by the console. We try to make a better guess here since some consoles might not have support
1452 * for color mode for example.
1453 *
1454 * However if TERM was configured through the kernel command line then leave it alone. */
1455 r = proc_cmdline_get_key("TERM", 0, &term);
1456 if (r < 0)
1457 return r;
1458
1459 t = term ?: default_term_for_tty("/dev/console");
1460
1461 if (setenv("TERM", t, 1) < 0)
1462 return -errno;
1463
1464 return 0;
1465 }
1466
1467 static void redirect_telinit(int argc, char *argv[]) {
1468
1469 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1470
1471 #if HAVE_SYSV_COMPAT
1472 if (getpid_cached() == 1)
1473 return;
1474
1475 if (!strstr(program_invocation_short_name, "init"))
1476 return;
1477
1478 execv(SYSTEMCTL_BINARY_PATH, argv);
1479 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1480 exit(EXIT_FAILURE);
1481 #endif
1482 }
1483
1484 static int become_shutdown(
1485 const char *shutdown_verb,
1486 int retval) {
1487
1488 char log_level[DECIMAL_STR_MAX(int) + 1],
1489 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1490 timeout[DECIMAL_STR_MAX(usec_t) + 1];
1491
1492 const char* command_line[13] = {
1493 SYSTEMD_SHUTDOWN_BINARY_PATH,
1494 shutdown_verb,
1495 "--timeout", timeout,
1496 "--log-level", log_level,
1497 "--log-target",
1498 };
1499
1500 _cleanup_strv_free_ char **env_block = NULL;
1501 size_t pos = 7;
1502 int r;
1503
1504 assert(shutdown_verb);
1505 assert(!command_line[pos]);
1506 env_block = strv_copy(environ);
1507
1508 xsprintf(log_level, "%d", log_get_max_level());
1509 xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
1510
1511 switch (log_get_target()) {
1512
1513 case LOG_TARGET_KMSG:
1514 case LOG_TARGET_JOURNAL_OR_KMSG:
1515 case LOG_TARGET_SYSLOG_OR_KMSG:
1516 command_line[pos++] = "kmsg";
1517 break;
1518
1519 case LOG_TARGET_NULL:
1520 command_line[pos++] = "null";
1521 break;
1522
1523 case LOG_TARGET_CONSOLE:
1524 default:
1525 command_line[pos++] = "console";
1526 break;
1527 };
1528
1529 if (log_get_show_color())
1530 command_line[pos++] = "--log-color";
1531
1532 if (log_get_show_location())
1533 command_line[pos++] = "--log-location";
1534
1535 if (streq(shutdown_verb, "exit")) {
1536 command_line[pos++] = "--exit-code";
1537 command_line[pos++] = exit_code;
1538 xsprintf(exit_code, "%d", retval);
1539 }
1540
1541 assert(pos < ELEMENTSOF(command_line));
1542
1543 if (streq(shutdown_verb, "reboot") &&
1544 arg_shutdown_watchdog > 0 &&
1545 arg_shutdown_watchdog != USEC_INFINITY) {
1546
1547 char *e;
1548
1549 /* If we reboot let's set the shutdown
1550 * watchdog and tell the shutdown binary to
1551 * repeatedly ping it */
1552 r = watchdog_set_timeout(&arg_shutdown_watchdog);
1553 watchdog_close(r < 0);
1554
1555 /* Tell the binary how often to ping, ignore failure */
1556 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
1557 (void) strv_consume(&env_block, e);
1558
1559 if (arg_watchdog_device &&
1560 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1561 (void) strv_consume(&env_block, e);
1562 } else
1563 watchdog_close(true);
1564
1565 /* Avoid the creation of new processes forked by the
1566 * kernel; at this point, we will not listen to the
1567 * signals anyway */
1568 if (detect_container() <= 0)
1569 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1570
1571 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1572 return -errno;
1573 }
1574
1575 static void initialize_clock(void) {
1576 int r;
1577
1578 if (clock_is_localtime(NULL) > 0) {
1579 int min;
1580
1581 /*
1582 * The very first call of settimeofday() also does a time warp in the kernel.
1583 *
1584 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1585 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1586 * the RTC alone if the registry tells that the RTC runs in UTC.
1587 */
1588 r = clock_set_timezone(&min);
1589 if (r < 0)
1590 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1591 else
1592 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1593
1594 } else if (!in_initrd()) {
1595 /*
1596 * Do a dummy very first call to seal the kernel's time warp magic.
1597 *
1598 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1599 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1600 * until we reach the real system.
1601 *
1602 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1603 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1604 * be treated as UTC that way.
1605 */
1606 (void) clock_reset_timewarp();
1607 }
1608
1609 r = clock_apply_epoch();
1610 if (r < 0)
1611 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1612 else if (r > 0)
1613 log_info("System time before build time, advancing clock.");
1614 }
1615
1616 static void initialize_coredump(bool skip_setup) {
1617
1618 if (getpid_cached() != 1)
1619 return;
1620
1621 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1622 * will process core dumps for system services by default. */
1623 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1624 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1625
1626 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1627 * until the systemd-coredump tool is enabled via sysctl. */
1628 if (!skip_setup)
1629 disable_coredumps();
1630 }
1631
1632 static void do_reexecute(
1633 int argc,
1634 char *argv[],
1635 const struct rlimit *saved_rlimit_nofile,
1636 const struct rlimit *saved_rlimit_memlock,
1637 FDSet *fds,
1638 const char *switch_root_dir,
1639 const char *switch_root_init,
1640 const char **ret_error_message) {
1641
1642 unsigned i, j, args_size;
1643 const char **args;
1644 int r;
1645
1646 assert(saved_rlimit_nofile);
1647 assert(saved_rlimit_memlock);
1648 assert(ret_error_message);
1649
1650 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1651 * we do that */
1652 watchdog_close(true);
1653
1654 /* Reset the RLIMIT_NOFILE to the kernel default, so that the new systemd can pass the kernel default to its
1655 * child processes */
1656
1657 if (saved_rlimit_nofile->rlim_cur > 0)
1658 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
1659 if (saved_rlimit_memlock->rlim_cur != (rlim_t) -1)
1660 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1661
1662 if (switch_root_dir) {
1663 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1664 * SIGCHLD for them after deserializing. */
1665 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
1666
1667 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1668 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1669 if (r < 0)
1670 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1671 }
1672
1673 args_size = MAX(6, argc+1);
1674 args = newa(const char*, args_size);
1675
1676 if (!switch_root_init) {
1677 char sfd[DECIMAL_STR_MAX(int) + 1];
1678
1679 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1680 * the user didn't specify an explicit init to spawn. */
1681
1682 assert(arg_serialization);
1683 assert(fds);
1684
1685 xsprintf(sfd, "%i", fileno(arg_serialization));
1686
1687 i = 0;
1688 args[i++] = SYSTEMD_BINARY_PATH;
1689 if (switch_root_dir)
1690 args[i++] = "--switched-root";
1691 args[i++] = arg_system ? "--system" : "--user";
1692 args[i++] = "--deserialize";
1693 args[i++] = sfd;
1694 args[i++] = NULL;
1695
1696 assert(i <= args_size);
1697
1698 /*
1699 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1700 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1701 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1702 * before proceeding into the exec().
1703 */
1704 valgrind_summary_hack();
1705
1706 (void) execv(args[0], (char* const*) args);
1707 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1708 }
1709
1710 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1711 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1712 * doesn't matter.) */
1713
1714 arg_serialization = safe_fclose(arg_serialization);
1715 fds = fdset_free(fds);
1716
1717 /* Reopen the console */
1718 (void) make_console_stdio();
1719
1720 for (j = 1, i = 1; j < (unsigned) argc; j++)
1721 args[i++] = argv[j];
1722 args[i++] = NULL;
1723 assert(i <= args_size);
1724
1725 /* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */
1726 (void) reset_all_signal_handlers();
1727 (void) reset_signal_mask();
1728
1729 if (switch_root_init) {
1730 args[0] = switch_root_init;
1731 (void) execv(args[0], (char* const*) args);
1732 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1733 }
1734
1735 args[0] = "/sbin/init";
1736 (void) execv(args[0], (char* const*) args);
1737 r = -errno;
1738
1739 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1740 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1741 "Failed to execute /sbin/init");
1742
1743 if (r == -ENOENT) {
1744 log_warning("No /sbin/init, trying fallback");
1745
1746 args[0] = "/bin/sh";
1747 args[1] = NULL;
1748 (void) execv(args[0], (char* const*) args);
1749 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1750 } else
1751 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1752
1753 *ret_error_message = "Failed to execute fallback shell";
1754 }
1755
1756 static int invoke_main_loop(
1757 Manager *m,
1758 bool *ret_reexecute,
1759 int *ret_retval, /* Return parameters relevant for shutting down */
1760 const char **ret_shutdown_verb, /* … */
1761 FDSet **ret_fds, /* Return parameters for reexecuting */
1762 char **ret_switch_root_dir, /* … */
1763 char **ret_switch_root_init, /* … */
1764 const char **ret_error_message) {
1765
1766 int r;
1767
1768 assert(m);
1769 assert(ret_reexecute);
1770 assert(ret_retval);
1771 assert(ret_shutdown_verb);
1772 assert(ret_fds);
1773 assert(ret_switch_root_dir);
1774 assert(ret_switch_root_init);
1775 assert(ret_error_message);
1776
1777 for (;;) {
1778 r = manager_loop(m);
1779 if (r < 0) {
1780 *ret_error_message = "Failed to run main loop";
1781 return log_emergency_errno(r, "Failed to run main loop: %m");
1782 }
1783
1784 switch (m->exit_code) {
1785
1786 case MANAGER_RELOAD:
1787 log_info("Reloading.");
1788
1789 r = parse_config_file();
1790 if (r < 0)
1791 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
1792
1793 set_manager_defaults(m);
1794
1795 r = manager_reload(m);
1796 if (r < 0)
1797 log_warning_errno(r, "Failed to reload, ignoring: %m");
1798
1799 break;
1800
1801 case MANAGER_REEXECUTE:
1802
1803 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1804 if (r < 0) {
1805 *ret_error_message = "Failed to prepare for reexecution";
1806 return r;
1807 }
1808
1809 log_notice("Reexecuting.");
1810
1811 *ret_reexecute = true;
1812 *ret_retval = EXIT_SUCCESS;
1813 *ret_shutdown_verb = NULL;
1814 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1815
1816 return 0;
1817
1818 case MANAGER_SWITCH_ROOT:
1819 if (!m->switch_root_init) {
1820 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1821 if (r < 0) {
1822 *ret_error_message = "Failed to prepare for reexecution";
1823 return r;
1824 }
1825 } else
1826 *ret_fds = NULL;
1827
1828 log_notice("Switching root.");
1829
1830 *ret_reexecute = true;
1831 *ret_retval = EXIT_SUCCESS;
1832 *ret_shutdown_verb = NULL;
1833
1834 /* Steal the switch root parameters */
1835 *ret_switch_root_dir = m->switch_root;
1836 *ret_switch_root_init = m->switch_root_init;
1837 m->switch_root = m->switch_root_init = NULL;
1838
1839 return 0;
1840
1841 case MANAGER_EXIT:
1842
1843 if (MANAGER_IS_USER(m)) {
1844 log_debug("Exit.");
1845
1846 *ret_reexecute = false;
1847 *ret_retval = m->return_value;
1848 *ret_shutdown_verb = NULL;
1849 *ret_fds = NULL;
1850 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1851
1852 return 0;
1853 }
1854
1855 _fallthrough_;
1856 case MANAGER_REBOOT:
1857 case MANAGER_POWEROFF:
1858 case MANAGER_HALT:
1859 case MANAGER_KEXEC: {
1860 static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1861 [MANAGER_EXIT] = "exit",
1862 [MANAGER_REBOOT] = "reboot",
1863 [MANAGER_POWEROFF] = "poweroff",
1864 [MANAGER_HALT] = "halt",
1865 [MANAGER_KEXEC] = "kexec"
1866 };
1867
1868 log_notice("Shutting down.");
1869
1870 *ret_reexecute = false;
1871 *ret_retval = m->return_value;
1872 assert_se(*ret_shutdown_verb = table[m->exit_code]);
1873 *ret_fds = NULL;
1874 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1875
1876 return 0;
1877 }
1878
1879 default:
1880 assert_not_reached("Unknown exit code.");
1881 }
1882 }
1883 }
1884
1885 static void log_execution_mode(bool *ret_first_boot) {
1886 assert(ret_first_boot);
1887
1888 if (arg_system) {
1889 int v;
1890
1891 log_info(PACKAGE_STRING " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
1892 arg_action == ACTION_TEST ? "test " : "" );
1893
1894 v = detect_virtualization();
1895 if (v > 0)
1896 log_info("Detected virtualization %s.", virtualization_to_string(v));
1897
1898 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
1899
1900 if (in_initrd()) {
1901 *ret_first_boot = false;
1902 log_info("Running in initial RAM disk.");
1903 } else {
1904 /* Let's check whether we are in first boot, i.e. whether /etc is still unpopulated. We use
1905 * /etc/machine-id as flag file, for this: if it exists we assume /etc is populated, if it
1906 * doesn't it's unpopulated. This allows container managers and installers to provision a
1907 * couple of files already. If the container manager wants to provision the machine ID itself
1908 * it should pass $container_uuid to PID 1. */
1909
1910 *ret_first_boot = access("/etc/machine-id", F_OK) < 0;
1911 if (*ret_first_boot)
1912 log_info("Running with unpopulated /etc.");
1913 }
1914 } else {
1915 if (DEBUG_LOGGING) {
1916 _cleanup_free_ char *t;
1917
1918 t = uid_to_name(getuid());
1919 log_debug(PACKAGE_STRING " running in %suser mode for user " UID_FMT "/%s. (" SYSTEMD_FEATURES ")",
1920 arg_action == ACTION_TEST ? " test" : "", getuid(), strna(t));
1921 }
1922
1923 *ret_first_boot = false;
1924 }
1925 }
1926
1927 static int initialize_runtime(
1928 bool skip_setup,
1929 struct rlimit *saved_rlimit_nofile,
1930 struct rlimit *saved_rlimit_memlock,
1931 const char **ret_error_message) {
1932
1933 int r;
1934
1935 assert(ret_error_message);
1936
1937 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
1938 *
1939 * - Some only apply to --system instances
1940 * - Some only apply to --user instances
1941 * - Some only apply when we first start up, but not when we reexecute
1942 */
1943
1944 if (arg_action != ACTION_RUN)
1945 return 0;
1946
1947 if (arg_system) {
1948 /* Make sure we leave a core dump without panicing the kernel. */
1949 install_crash_handler();
1950
1951 if (!skip_setup) {
1952 r = mount_cgroup_controllers(arg_join_controllers);
1953 if (r < 0) {
1954 *ret_error_message = "Failed to mount cgroup hierarchies";
1955 return r;
1956 }
1957
1958 status_welcome();
1959 hostname_setup();
1960 machine_id_setup(NULL, arg_machine_id, NULL);
1961 loopback_setup();
1962 bump_unix_max_dgram_qlen();
1963 test_usr();
1964 write_container_id();
1965 }
1966
1967 if (arg_watchdog_device) {
1968 r = watchdog_set_device(arg_watchdog_device);
1969 if (r < 0)
1970 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
1971 }
1972
1973 if (arg_runtime_watchdog > 0 && arg_runtime_watchdog != USEC_INFINITY)
1974 watchdog_set_timeout(&arg_runtime_watchdog);
1975 }
1976
1977 if (arg_timer_slack_nsec != NSEC_INFINITY)
1978 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1979 log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
1980
1981 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
1982 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
1983 if (r < 0) {
1984 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
1985 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
1986 }
1987
1988 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
1989 if (r < 0) {
1990 *ret_error_message = "Failed to drop capability bounding set";
1991 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
1992 }
1993 }
1994
1995 if (arg_syscall_archs) {
1996 r = enforce_syscall_archs(arg_syscall_archs);
1997 if (r < 0) {
1998 *ret_error_message = "Failed to set syscall architectures";
1999 return r;
2000 }
2001 }
2002
2003 if (!arg_system)
2004 /* Become reaper of our children */
2005 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
2006 log_warning_errno(errno, "Failed to make us a subreaper: %m");
2007
2008 if (arg_system) {
2009 /* Bump up RLIMIT_NOFILE for systemd itself */
2010 (void) bump_rlimit_nofile(saved_rlimit_nofile);
2011 (void) bump_rlimit_memlock(saved_rlimit_memlock);
2012 }
2013
2014 return 0;
2015 }
2016
2017 static int do_queue_default_job(
2018 Manager *m,
2019 const char **ret_error_message) {
2020
2021 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
2022 Job *default_unit_job;
2023 Unit *target = NULL;
2024 int r;
2025
2026 log_debug("Activating default unit: %s", arg_default_unit);
2027
2028 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
2029 if (r < 0)
2030 log_error("Failed to load default target: %s", bus_error_message(&error, r));
2031 else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND))
2032 log_error_errno(target->load_error, "Failed to load default target: %m");
2033 else if (target->load_state == UNIT_MASKED)
2034 log_error("Default target masked.");
2035
2036 if (!target || target->load_state != UNIT_LOADED) {
2037 log_info("Trying to load rescue target...");
2038
2039 r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
2040 if (r < 0) {
2041 *ret_error_message = "Failed to load rescue target";
2042 return log_emergency_errno(r, "Failed to load rescue target: %s", bus_error_message(&error, r));
2043 } else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND)) {
2044 *ret_error_message = "Failed to load rescue target";
2045 return log_emergency_errno(target->load_error, "Failed to load rescue target: %m");
2046 } else if (target->load_state == UNIT_MASKED) {
2047 *ret_error_message = "Rescue target masked";
2048 log_emergency("Rescue target masked.");
2049 return -ERFKILL;
2050 }
2051 }
2052
2053 assert(target->load_state == UNIT_LOADED);
2054
2055 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, &error, &default_unit_job);
2056 if (r == -EPERM) {
2057 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
2058
2059 sd_bus_error_free(&error);
2060
2061 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, &error, &default_unit_job);
2062 if (r < 0) {
2063 *ret_error_message = "Failed to start default target";
2064 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
2065 }
2066
2067 } else if (r < 0) {
2068 *ret_error_message = "Failed to isolate default target";
2069 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
2070 }
2071
2072 m->default_unit_job_id = default_unit_job->id;
2073
2074 return 0;
2075 }
2076
2077 static void free_arguments(void) {
2078 size_t j;
2079
2080 /* Frees all arg_* variables, with the exception of arg_serialization */
2081
2082 for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++)
2083 arg_default_rlimit[j] = mfree(arg_default_rlimit[j]);
2084
2085 arg_default_unit = mfree(arg_default_unit);
2086 arg_confirm_spawn = mfree(arg_confirm_spawn);
2087 arg_join_controllers = strv_free_free(arg_join_controllers);
2088 arg_default_environment = strv_free(arg_default_environment);
2089 arg_syscall_archs = set_free(arg_syscall_archs);
2090 }
2091
2092 static int load_configuration(int argc, char **argv, const char **ret_error_message) {
2093 int r;
2094
2095 assert(ret_error_message);
2096
2097 r = initialize_join_controllers();
2098 if (r < 0) {
2099 *ret_error_message = "Failed to initialize cgroup controller joining table";
2100 return r;
2101 }
2102
2103 arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
2104
2105 r = parse_config_file();
2106 if (r < 0) {
2107 *ret_error_message = "Failed to parse config file";
2108 return r;
2109 }
2110
2111 if (arg_system) {
2112 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
2113 if (r < 0)
2114 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2115 }
2116
2117 /* Note that this also parses bits from the kernel command line, including "debug". */
2118 log_parse_environment();
2119
2120 r = parse_argv(argc, argv);
2121 if (r < 0) {
2122 *ret_error_message = "Failed to parse commandline arguments";
2123 return r;
2124 }
2125
2126 /* Initialize default unit */
2127 if (!arg_default_unit) {
2128 arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
2129 if (!arg_default_unit) {
2130 *ret_error_message = "Failed to set default unit";
2131 return log_oom();
2132 }
2133 }
2134
2135 /* Initialize the show status setting if it hasn't been set explicitly yet */
2136 if (arg_show_status == _SHOW_STATUS_UNSET)
2137 arg_show_status = SHOW_STATUS_YES;
2138
2139 return 0;
2140 }
2141
2142 static int safety_checks(void) {
2143
2144 if (getpid_cached() == 1 &&
2145 arg_action != ACTION_RUN) {
2146 log_error("Unsupported execution mode while PID 1.");
2147 return -EPERM;
2148 }
2149
2150 if (getpid_cached() == 1 &&
2151 !arg_system) {
2152 log_error("Can't run --user mode as PID 1.");
2153 return -EPERM;
2154 }
2155
2156 if (arg_action == ACTION_RUN &&
2157 arg_system &&
2158 getpid_cached() != 1) {
2159 log_error("Can't run system mode unless PID 1.");
2160 return -EPERM;
2161 }
2162
2163 if (arg_action == ACTION_TEST &&
2164 geteuid() == 0) {
2165 log_error("Don't run test mode as root.");
2166 return -EPERM;
2167 }
2168
2169 if (!arg_system &&
2170 arg_action == ACTION_RUN &&
2171 sd_booted() <= 0) {
2172 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
2173 return -EOPNOTSUPP;
2174 }
2175
2176 if (!arg_system &&
2177 arg_action == ACTION_RUN &&
2178 !getenv("XDG_RUNTIME_DIR")) {
2179 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2180 return -EUNATCH;
2181 }
2182
2183 if (arg_system &&
2184 arg_action == ACTION_RUN &&
2185 running_in_chroot() > 0) {
2186 log_error("Cannot be run in a chroot() environment.");
2187 return -EOPNOTSUPP;
2188 }
2189
2190 return 0;
2191 }
2192
2193 static int initialize_security(
2194 bool *loaded_policy,
2195 dual_timestamp *security_start_timestamp,
2196 dual_timestamp *security_finish_timestamp,
2197 const char **ret_error_message) {
2198
2199 int r;
2200
2201 assert(loaded_policy);
2202 assert(security_start_timestamp);
2203 assert(security_finish_timestamp);
2204 assert(ret_error_message);
2205
2206 dual_timestamp_get(security_start_timestamp);
2207
2208 r = mac_selinux_setup(loaded_policy);
2209 if (r < 0) {
2210 *ret_error_message = "Failed to load SELinux policy";
2211 return r;
2212 }
2213
2214 r = mac_smack_setup(loaded_policy);
2215 if (r < 0) {
2216 *ret_error_message = "Failed to load SMACK policy";
2217 return r;
2218 }
2219
2220 r = ima_setup();
2221 if (r < 0) {
2222 *ret_error_message = "Failed to load IMA policy";
2223 return r;
2224 }
2225
2226 dual_timestamp_get(security_finish_timestamp);
2227 return 0;
2228 }
2229
2230 static void test_summary(Manager *m) {
2231 assert(m);
2232
2233 printf("-> By units:\n");
2234 manager_dump_units(m, stdout, "\t");
2235
2236 printf("-> By jobs:\n");
2237 manager_dump_jobs(m, stdout, "\t");
2238 }
2239
2240 static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2241 int r;
2242
2243 assert(ret_fds);
2244 assert(ret_error_message);
2245
2246 r = fdset_new_fill(ret_fds);
2247 if (r < 0) {
2248 *ret_error_message = "Failed to allocate fd set";
2249 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2250 }
2251
2252 fdset_cloexec(*ret_fds, true);
2253
2254 if (arg_serialization)
2255 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2256
2257 return 0;
2258 }
2259
2260 static void setup_console_terminal(bool skip_setup) {
2261
2262 if (!arg_system)
2263 return;
2264
2265 /* Become a session leader if we aren't one yet. */
2266 (void) setsid();
2267
2268 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2269 * tty. */
2270 (void) release_terminal();
2271
2272 /* Reset the console, but only if this is really init and we are freshly booted */
2273 if (getpid_cached() == 1 && !skip_setup)
2274 (void) console_setup();
2275 }
2276
2277 static bool early_skip_setup_check(int argc, char *argv[]) {
2278 bool found_deserialize = false;
2279 int i;
2280
2281 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2282 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2283 * anyway, even if in that case we also do deserialization. */
2284
2285 for (i = 1; i < argc; i++) {
2286
2287 if (streq(argv[i], "--switched-root"))
2288 return false; /* If we switched root, don't skip the setup. */
2289 else if (streq(argv[i], "--deserialize"))
2290 found_deserialize = true;
2291 }
2292
2293 return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2294 }
2295
2296 int main(int argc, char *argv[]) {
2297
2298 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2299 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
2300 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
2301 bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2302 char *switch_root_dir = NULL, *switch_root_init = NULL;
2303 usec_t before_startup, after_startup;
2304 static char systemd[] = "systemd";
2305 char timespan[FORMAT_TIMESPAN_MAX];
2306 const char *shutdown_verb = NULL, *error_message = NULL;
2307 int r, retval = EXIT_FAILURE;
2308 Manager *m = NULL;
2309 FDSet *fds = NULL;
2310
2311 /* SysV compatibility: redirect init → telinit */
2312 redirect_telinit(argc, argv);
2313
2314 /* Take timestamps early on */
2315 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2316 dual_timestamp_get(&userspace_timestamp);
2317
2318 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2319 * reexecuting */
2320 skip_setup = early_skip_setup_check(argc, argv);
2321
2322 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2323 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
2324 program_invocation_short_name = systemd;
2325 (void) prctl(PR_SET_NAME, systemd);
2326
2327 /* Save the original command line */
2328 saved_argv = argv;
2329 saved_argc = argc;
2330
2331 /* Make sure that if the user says "syslog" we actually log to the journal. */
2332 log_set_upgrade_syslog_to_journal(true);
2333
2334 if (getpid_cached() == 1) {
2335 /* Disable the umask logic */
2336 umask(0);
2337
2338 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be activated
2339 * yet (even though the log socket for it exists). */
2340 log_set_prohibit_ipc(true);
2341
2342 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2343 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2344 * child process right before execve()'ing the actual binary, at a point in time where socket
2345 * activation stderr/stdout area already set up. */
2346 log_set_always_reopen_console(true);
2347 }
2348
2349 if (getpid_cached() == 1 && detect_container() <= 0) {
2350
2351 /* Running outside of a container as PID 1 */
2352 arg_system = true;
2353 log_set_target(LOG_TARGET_KMSG);
2354 log_open();
2355
2356 if (in_initrd())
2357 initrd_timestamp = userspace_timestamp;
2358
2359 if (!skip_setup) {
2360 r = mount_setup_early();
2361 if (r < 0) {
2362 error_message = "Failed to mount early API filesystems";
2363 goto finish;
2364 }
2365
2366 r = initialize_security(
2367 &loaded_policy,
2368 &security_start_timestamp,
2369 &security_finish_timestamp,
2370 &error_message);
2371 if (r < 0)
2372 goto finish;
2373 }
2374
2375 if (mac_selinux_init() < 0) {
2376 error_message = "Failed to initialize SELinux policy";
2377 goto finish;
2378 }
2379
2380 if (!skip_setup)
2381 initialize_clock();
2382
2383 /* Set the default for later on, but don't actually
2384 * open the logs like this for now. Note that if we
2385 * are transitioning from the initrd there might still
2386 * be journal fd open, and we shouldn't attempt
2387 * opening that before we parsed /proc/cmdline which
2388 * might redirect output elsewhere. */
2389 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2390
2391 } else if (getpid_cached() == 1) {
2392 /* Running inside a container, as PID 1 */
2393 arg_system = true;
2394 log_set_target(LOG_TARGET_CONSOLE);
2395 log_open();
2396
2397 /* For later on, see above... */
2398 log_set_target(LOG_TARGET_JOURNAL);
2399
2400 /* clear the kernel timestamp,
2401 * because we are in a container */
2402 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2403 } else {
2404 /* Running as user instance */
2405 arg_system = false;
2406 log_set_target(LOG_TARGET_AUTO);
2407 log_open();
2408
2409 /* clear the kernel timestamp,
2410 * because we are not PID 1 */
2411 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2412 }
2413
2414 initialize_coredump(skip_setup);
2415
2416 r = fixup_environment();
2417 if (r < 0) {
2418 log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2419 error_message = "Failed to fix up PID1 environment";
2420 goto finish;
2421 }
2422
2423 if (arg_system) {
2424
2425 /* Try to figure out if we can use colors with the console. No
2426 * need to do that for user instances since they never log
2427 * into the console. */
2428 log_show_color(colors_enabled());
2429 r = make_null_stdio();
2430 if (r < 0)
2431 log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
2432 }
2433
2434 /* Mount /proc, /sys and friends, so that /proc/cmdline and
2435 * /proc/$PID/fd is available. */
2436 if (getpid_cached() == 1) {
2437
2438 /* Load the kernel modules early. */
2439 if (!skip_setup)
2440 kmod_setup();
2441
2442 r = mount_setup(loaded_policy);
2443 if (r < 0) {
2444 error_message = "Failed to mount API filesystems";
2445 goto finish;
2446 }
2447 }
2448
2449 /* Reset all signal handlers. */
2450 (void) reset_all_signal_handlers();
2451 (void) ignore_signals(SIGNALS_IGNORE, -1);
2452
2453 r = load_configuration(argc, argv, &error_message);
2454 if (r < 0)
2455 goto finish;
2456
2457 r = safety_checks();
2458 if (r < 0)
2459 goto finish;
2460
2461 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS))
2462 pager_open(arg_no_pager, false);
2463
2464 if (arg_action != ACTION_RUN)
2465 skip_setup = true;
2466
2467 if (arg_action == ACTION_HELP) {
2468 retval = help();
2469 goto finish;
2470 } else if (arg_action == ACTION_VERSION) {
2471 retval = version();
2472 goto finish;
2473 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
2474 unit_dump_config_items(stdout);
2475 retval = EXIT_SUCCESS;
2476 goto finish;
2477 }
2478
2479 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
2480
2481 /* Move out of the way, so that we won't block unmounts */
2482 assert_se(chdir("/") == 0);
2483
2484 if (arg_action == ACTION_RUN) {
2485
2486 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
2487 log_close();
2488
2489 /* Remember open file descriptors for later deserialization */
2490 r = collect_fds(&fds, &error_message);
2491 if (r < 0)
2492 goto finish;
2493
2494 /* Give up any control of the console, but make sure its initialized. */
2495 setup_console_terminal(skip_setup);
2496
2497 /* Open the logging devices, if possible and necessary */
2498 log_open();
2499 }
2500
2501 log_execution_mode(&first_boot);
2502
2503 r = initialize_runtime(skip_setup,
2504 &saved_rlimit_nofile,
2505 &saved_rlimit_memlock,
2506 &error_message);
2507 if (r < 0)
2508 goto finish;
2509
2510 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2511 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2512 &m);
2513 if (r < 0) {
2514 log_emergency_errno(r, "Failed to allocate manager object: %m");
2515 error_message = "Failed to allocate manager object";
2516 goto finish;
2517 }
2518
2519 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2520 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2521 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
2522 m->timestamps[MANAGER_TIMESTAMP_SECURITY_START] = security_start_timestamp;
2523 m->timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH] = security_finish_timestamp;
2524
2525 set_manager_defaults(m);
2526 set_manager_settings(m);
2527 manager_set_first_boot(m, first_boot);
2528
2529 /* Remember whether we should queue the default job */
2530 queue_default_job = !arg_serialization || arg_switched_root;
2531
2532 before_startup = now(CLOCK_MONOTONIC);
2533
2534 r = manager_startup(m, arg_serialization, fds);
2535 if (r < 0) {
2536 log_error_errno(r, "Failed to fully start up daemon: %m");
2537 error_message = "Failed to start up manager";
2538 goto finish;
2539 }
2540
2541 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2542 fds = fdset_free(fds);
2543 arg_serialization = safe_fclose(arg_serialization);
2544
2545 if (queue_default_job) {
2546 r = do_queue_default_job(m, &error_message);
2547 if (r < 0)
2548 goto finish;
2549 }
2550
2551 after_startup = now(CLOCK_MONOTONIC);
2552
2553 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2554 "Loaded units and determined initial transaction in %s.",
2555 format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
2556
2557 if (arg_action == ACTION_TEST) {
2558 test_summary(m);
2559 retval = EXIT_SUCCESS;
2560 goto finish;
2561 }
2562
2563 (void) invoke_main_loop(m,
2564 &reexecute,
2565 &retval,
2566 &shutdown_verb,
2567 &fds,
2568 &switch_root_dir,
2569 &switch_root_init,
2570 &error_message);
2571
2572 finish:
2573 pager_close();
2574
2575 if (m)
2576 arg_shutdown_watchdog = m->shutdown_watchdog;
2577
2578 m = manager_free(m);
2579
2580 free_arguments();
2581 mac_selinux_finish();
2582
2583 if (reexecute)
2584 do_reexecute(argc, argv,
2585 &saved_rlimit_nofile,
2586 &saved_rlimit_memlock,
2587 fds,
2588 switch_root_dir,
2589 switch_root_init,
2590 &error_message); /* This only returns if reexecution failed */
2591
2592 arg_serialization = safe_fclose(arg_serialization);
2593 fds = fdset_free(fds);
2594
2595 #if HAVE_VALGRIND_VALGRIND_H
2596 /* If we are PID 1 and running under valgrind, then let's exit
2597 * here explicitly. valgrind will only generate nice output on
2598 * exit(), not on exec(), hence let's do the former not the
2599 * latter here. */
2600 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2601 /* Cleanup watchdog_device strings for valgrind. We need them
2602 * in become_shutdown() so normally we cannot free them yet. */
2603 watchdog_free_device();
2604 arg_watchdog_device = mfree(arg_watchdog_device);
2605 return 0;
2606 }
2607 #endif
2608
2609 if (shutdown_verb) {
2610 r = become_shutdown(shutdown_verb, retval);
2611
2612 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
2613 error_message = "Failed to execute shutdown binary";
2614 }
2615
2616 watchdog_free_device();
2617 arg_watchdog_device = mfree(arg_watchdog_device);
2618
2619 if (getpid_cached() == 1) {
2620 if (error_message)
2621 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2622 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
2623 "%s, freezing.", error_message);
2624 freeze_or_reboot();
2625 }
2626
2627 return retval;
2628 }