]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/main.c
Merge pull request #8106 from dqminh/route-expires-kernel
[thirdparty/systemd.git] / src / core / main.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <getopt.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <sys/mount.h>
28 #include <sys/prctl.h>
29 #include <sys/reboot.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32 #if HAVE_SECCOMP
33 #include <seccomp.h>
34 #endif
35 #if HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-bus.h"
40 #include "sd-daemon.h"
41 #include "sd-messages.h"
42
43 #include "alloc-util.h"
44 #include "architecture.h"
45 #include "build.h"
46 #include "bus-error.h"
47 #include "bus-util.h"
48 #include "capability-util.h"
49 #include "clock-util.h"
50 #include "conf-parser.h"
51 #include "cpu-set-util.h"
52 #include "dbus-manager.h"
53 #include "def.h"
54 #include "emergency-action.h"
55 #include "env-util.h"
56 #include "fd-util.h"
57 #include "fdset.h"
58 #include "fileio.h"
59 #include "format-util.h"
60 #include "fs-util.h"
61 #include "hostname-setup.h"
62 #include "ima-setup.h"
63 #include "killall.h"
64 #include "kmod-setup.h"
65 #include "load-fragment.h"
66 #include "log.h"
67 #include "loopback-setup.h"
68 #include "machine-id-setup.h"
69 #include "manager.h"
70 #include "missing.h"
71 #include "mount-setup.h"
72 #include "pager.h"
73 #include "parse-util.h"
74 #include "path-util.h"
75 #include "proc-cmdline.h"
76 #include "process-util.h"
77 #include "raw-clone.h"
78 #include "rlimit-util.h"
79 #if HAVE_SECCOMP
80 #include "seccomp-util.h"
81 #endif
82 #include "selinux-setup.h"
83 #include "selinux-util.h"
84 #include "signal-util.h"
85 #include "smack-setup.h"
86 #include "special.h"
87 #include "stat-util.h"
88 #include "stdio-util.h"
89 #include "strv.h"
90 #include "switch-root.h"
91 #include "terminal-util.h"
92 #include "umask-util.h"
93 #include "user-util.h"
94 #include "util.h"
95 #include "virt.h"
96 #include "watchdog.h"
97
98 static enum {
99 ACTION_RUN,
100 ACTION_HELP,
101 ACTION_VERSION,
102 ACTION_TEST,
103 ACTION_DUMP_CONFIGURATION_ITEMS
104 } arg_action = ACTION_RUN;
105 static char *arg_default_unit = NULL;
106 static bool arg_system = false;
107 static bool arg_dump_core = true;
108 static int arg_crash_chvt = -1;
109 static bool arg_crash_shell = false;
110 static bool arg_crash_reboot = false;
111 static char *arg_confirm_spawn = NULL;
112 static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
113 static bool arg_switched_root = false;
114 static bool arg_no_pager = false;
115 static bool arg_service_watchdogs = true;
116 static char ***arg_join_controllers = NULL;
117 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
118 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
119 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
120 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
121 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
122 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
123 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
124 static usec_t arg_runtime_watchdog = 0;
125 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
126 static char *arg_watchdog_device = NULL;
127 static char **arg_default_environment = NULL;
128 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
129 static uint64_t arg_capability_bounding_set = CAP_ALL;
130 static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
131 static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
132 static Set* arg_syscall_archs = NULL;
133 static FILE* arg_serialization = NULL;
134 static bool arg_default_cpu_accounting = false;
135 static bool arg_default_io_accounting = false;
136 static bool arg_default_ip_accounting = false;
137 static bool arg_default_blockio_accounting = false;
138 static bool arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
139 static bool arg_default_tasks_accounting = true;
140 static uint64_t arg_default_tasks_max = UINT64_MAX;
141 static sd_id128_t arg_machine_id = {};
142 static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
143
144 _noreturn_ static void freeze_or_reboot(void) {
145
146 if (arg_crash_reboot) {
147 log_notice("Rebooting in 10s...");
148 (void) sleep(10);
149
150 log_notice("Rebooting now...");
151 (void) reboot(RB_AUTOBOOT);
152 log_emergency_errno(errno, "Failed to reboot: %m");
153 }
154
155 log_emergency("Freezing execution.");
156 freeze();
157 }
158
159 _noreturn_ static void crash(int sig) {
160 struct sigaction sa;
161 pid_t pid;
162
163 if (getpid_cached() != 1)
164 /* Pass this on immediately, if this is not PID 1 */
165 (void) raise(sig);
166 else if (!arg_dump_core)
167 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
168 else {
169 sa = (struct sigaction) {
170 .sa_handler = nop_signal_handler,
171 .sa_flags = SA_NOCLDSTOP|SA_RESTART,
172 };
173
174 /* We want to wait for the core process, hence let's enable SIGCHLD */
175 (void) sigaction(SIGCHLD, &sa, NULL);
176
177 pid = raw_clone(SIGCHLD);
178 if (pid < 0)
179 log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
180 else if (pid == 0) {
181 /* Enable default signal handler for core dump */
182
183 sa = (struct sigaction) {
184 .sa_handler = SIG_DFL,
185 };
186 (void) sigaction(sig, &sa, NULL);
187
188 /* Don't limit the coredump size */
189 (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
190
191 /* Just to be sure... */
192 (void) chdir("/");
193
194 /* Raise the signal again */
195 pid = raw_getpid();
196 (void) kill(pid, sig); /* raise() would kill the parent */
197
198 assert_not_reached("We shouldn't be here...");
199 _exit(EXIT_FAILURE);
200 } else {
201 siginfo_t status;
202 int r;
203
204 /* Order things nicely. */
205 r = wait_for_terminate(pid, &status);
206 if (r < 0)
207 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
208 else if (status.si_code != CLD_DUMPED)
209 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
210 signal_to_string(sig),
211 pid, sigchld_code_to_string(status.si_code),
212 status.si_status,
213 strna(status.si_code == CLD_EXITED
214 ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
215 : signal_to_string(status.si_status)));
216 else
217 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
218 }
219 }
220
221 if (arg_crash_chvt >= 0)
222 (void) chvt(arg_crash_chvt);
223
224 sa = (struct sigaction) {
225 .sa_handler = SIG_IGN,
226 .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
227 };
228
229 /* Let the kernel reap children for us */
230 (void) sigaction(SIGCHLD, &sa, NULL);
231
232 if (arg_crash_shell) {
233 log_notice("Executing crash shell in 10s...");
234 (void) sleep(10);
235
236 pid = raw_clone(SIGCHLD);
237 if (pid < 0)
238 log_emergency_errno(errno, "Failed to fork off crash shell: %m");
239 else if (pid == 0) {
240 (void) setsid();
241 (void) make_console_stdio();
242 (void) execle("/bin/sh", "/bin/sh", NULL, environ);
243
244 log_emergency_errno(errno, "execle() failed: %m");
245 _exit(EXIT_FAILURE);
246 } else {
247 log_info("Spawned crash shell as PID "PID_FMT".", pid);
248 (void) wait_for_terminate(pid, NULL);
249 }
250 }
251
252 freeze_or_reboot();
253 }
254
255 static void install_crash_handler(void) {
256 static const struct sigaction sa = {
257 .sa_handler = crash,
258 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
259 };
260 int r;
261
262 /* We ignore the return value here, since, we don't mind if we
263 * cannot set up a crash handler */
264 r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
265 if (r < 0)
266 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
267 }
268
269 static int console_setup(void) {
270 _cleanup_close_ int tty_fd = -1;
271 int r;
272
273 tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
274 if (tty_fd < 0)
275 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
276
277 /* We don't want to force text mode. plymouth may be showing
278 * pictures already from initrd. */
279 r = reset_terminal_fd(tty_fd, false);
280 if (r < 0)
281 return log_error_errno(r, "Failed to reset /dev/console: %m");
282
283 return 0;
284 }
285
286 static int parse_crash_chvt(const char *value) {
287 int b;
288
289 if (safe_atoi(value, &arg_crash_chvt) >= 0)
290 return 0;
291
292 b = parse_boolean(value);
293 if (b < 0)
294 return b;
295
296 if (b > 0)
297 arg_crash_chvt = 0; /* switch to where kmsg goes */
298 else
299 arg_crash_chvt = -1; /* turn off switching */
300
301 return 0;
302 }
303
304 static int parse_confirm_spawn(const char *value, char **console) {
305 char *s;
306 int r;
307
308 r = value ? parse_boolean(value) : 1;
309 if (r == 0) {
310 *console = NULL;
311 return 0;
312 }
313
314 if (r > 0) /* on with default tty */
315 s = strdup("/dev/console");
316 else if (is_path(value)) /* on with fully qualified path */
317 s = strdup(value);
318 else /* on with only a tty file name, not a fully qualified path */
319 s = strjoin("/dev/", value);
320 if (!s)
321 return -ENOMEM;
322 *console = s;
323 return 0;
324 }
325
326 static int set_machine_id(const char *m) {
327 sd_id128_t t;
328 assert(m);
329
330 if (sd_id128_from_string(m, &t) < 0)
331 return -EINVAL;
332
333 if (sd_id128_is_null(t))
334 return -EINVAL;
335
336 arg_machine_id = t;
337 return 0;
338 }
339
340 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
341
342 int r;
343
344 assert(key);
345
346 if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
347
348 if (proc_cmdline_value_missing(key, value))
349 return 0;
350
351 if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
352 log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
353 else if (in_initrd() == !!startswith(key, "rd.")) {
354 if (free_and_strdup(&arg_default_unit, value) < 0)
355 return log_oom();
356 }
357
358 } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
359
360 r = value ? parse_boolean(value) : true;
361 if (r < 0)
362 log_warning("Failed to parse dump core switch %s. Ignoring.", value);
363 else
364 arg_dump_core = r;
365
366 } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
367
368 if (!value)
369 arg_crash_chvt = 0; /* turn on */
370 else if (parse_crash_chvt(value) < 0)
371 log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
372
373 } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
374
375 r = value ? parse_boolean(value) : true;
376 if (r < 0)
377 log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
378 else
379 arg_crash_shell = r;
380
381 } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
382
383 r = value ? parse_boolean(value) : true;
384 if (r < 0)
385 log_warning("Failed to parse crash reboot switch %s. Ignoring.", value);
386 else
387 arg_crash_reboot = r;
388
389 } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
390 char *s;
391
392 r = parse_confirm_spawn(value, &s);
393 if (r < 0)
394 log_warning_errno(r, "Failed to parse confirm_spawn switch %s. Ignoring.", value);
395 else {
396 free(arg_confirm_spawn);
397 arg_confirm_spawn = s;
398 }
399
400 } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
401
402 r = value ? parse_boolean(value) : true;
403 if (r < 0)
404 log_warning("Failed to parse service watchdog switch %s. Ignoring.", value);
405 else
406 arg_service_watchdogs = r;
407
408 } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
409
410 if (value) {
411 r = parse_show_status(value, &arg_show_status);
412 if (r < 0)
413 log_warning("Failed to parse show status switch %s. Ignoring.", value);
414 } else
415 arg_show_status = SHOW_STATUS_YES;
416
417 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
418
419 if (proc_cmdline_value_missing(key, value))
420 return 0;
421
422 r = exec_output_from_string(value);
423 if (r < 0)
424 log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
425 else
426 arg_default_std_output = r;
427
428 } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
429
430 if (proc_cmdline_value_missing(key, value))
431 return 0;
432
433 r = exec_output_from_string(value);
434 if (r < 0)
435 log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
436 else
437 arg_default_std_error = r;
438
439 } else if (streq(key, "systemd.setenv")) {
440
441 if (proc_cmdline_value_missing(key, value))
442 return 0;
443
444 if (env_assignment_is_valid(value)) {
445 char **env;
446
447 env = strv_env_set(arg_default_environment, value);
448 if (!env)
449 return log_oom();
450
451 arg_default_environment = env;
452 } else
453 log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
454
455 } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
456
457 if (proc_cmdline_value_missing(key, value))
458 return 0;
459
460 r = set_machine_id(value);
461 if (r < 0)
462 log_warning("MachineID '%s' is not valid. Ignoring.", value);
463
464 } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
465
466 if (proc_cmdline_value_missing(key, value))
467 return 0;
468
469 r = parse_sec(value, &arg_default_timeout_start_usec);
470 if (r < 0)
471 log_warning_errno(r, "Failed to parse default start timeout: %s, ignoring.", value);
472
473 if (arg_default_timeout_start_usec <= 0)
474 arg_default_timeout_start_usec = USEC_INFINITY;
475
476 } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
477
478 if (proc_cmdline_value_missing(key, value))
479 return 0;
480
481 parse_path_argument_and_warn(value, false, &arg_watchdog_device);
482
483 } else if (streq(key, "quiet") && !value) {
484
485 if (arg_show_status == _SHOW_STATUS_UNSET)
486 arg_show_status = SHOW_STATUS_AUTO;
487
488 } else if (streq(key, "debug") && !value) {
489
490 /* Note that log_parse_environment() handles 'debug'
491 * too, and sets the log level to LOG_DEBUG. */
492
493 if (detect_container() > 0)
494 log_set_target(LOG_TARGET_CONSOLE);
495
496 } else if (!value) {
497 const char *target;
498
499 /* SysV compatibility */
500 target = runlevel_to_target(key);
501 if (target)
502 return free_and_strdup(&arg_default_unit, target);
503 }
504
505 return 0;
506 }
507
508 #define DEFINE_SETTER(name, func, descr) \
509 static int name(const char *unit, \
510 const char *filename, \
511 unsigned line, \
512 const char *section, \
513 unsigned section_line, \
514 const char *lvalue, \
515 int ltype, \
516 const char *rvalue, \
517 void *data, \
518 void *userdata) { \
519 \
520 int r; \
521 \
522 assert(filename); \
523 assert(lvalue); \
524 assert(rvalue); \
525 \
526 r = func(rvalue); \
527 if (r < 0) \
528 log_syntax(unit, LOG_ERR, filename, line, r, \
529 "Invalid " descr "'%s': %m", \
530 rvalue); \
531 \
532 return 0; \
533 }
534
535 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
536 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
537 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
538 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
539
540 static int config_parse_cpu_affinity2(
541 const char *unit,
542 const char *filename,
543 unsigned line,
544 const char *section,
545 unsigned section_line,
546 const char *lvalue,
547 int ltype,
548 const char *rvalue,
549 void *data,
550 void *userdata) {
551
552 _cleanup_cpu_free_ cpu_set_t *c = NULL;
553 int ncpus;
554
555 ncpus = parse_cpu_set_and_warn(rvalue, &c, unit, filename, line, lvalue);
556 if (ncpus < 0)
557 return ncpus;
558
559 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
560 log_warning_errno(errno, "Failed to set CPU affinity: %m");
561
562 return 0;
563 }
564
565 static int config_parse_show_status(
566 const char* unit,
567 const char *filename,
568 unsigned line,
569 const char *section,
570 unsigned section_line,
571 const char *lvalue,
572 int ltype,
573 const char *rvalue,
574 void *data,
575 void *userdata) {
576
577 int k;
578 ShowStatus *b = data;
579
580 assert(filename);
581 assert(lvalue);
582 assert(rvalue);
583 assert(data);
584
585 k = parse_show_status(rvalue, b);
586 if (k < 0) {
587 log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse show status setting, ignoring: %s", rvalue);
588 return 0;
589 }
590
591 return 0;
592 }
593
594 static int config_parse_output_restricted(
595 const char* unit,
596 const char *filename,
597 unsigned line,
598 const char *section,
599 unsigned section_line,
600 const char *lvalue,
601 int ltype,
602 const char *rvalue,
603 void *data,
604 void *userdata) {
605
606 ExecOutput t, *eo = data;
607
608 assert(filename);
609 assert(lvalue);
610 assert(rvalue);
611 assert(data);
612
613 t = exec_output_from_string(rvalue);
614 if (t < 0) {
615 log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output type, ignoring: %s", rvalue);
616 return 0;
617 }
618
619 if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE)) {
620 log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file: are not supported as defaults, ignoring: %s", rvalue);
621 return 0;
622 }
623
624 *eo = t;
625 return 0;
626 }
627
628 static int config_parse_crash_chvt(
629 const char* unit,
630 const char *filename,
631 unsigned line,
632 const char *section,
633 unsigned section_line,
634 const char *lvalue,
635 int ltype,
636 const char *rvalue,
637 void *data,
638 void *userdata) {
639
640 int r;
641
642 assert(filename);
643 assert(lvalue);
644 assert(rvalue);
645
646 r = parse_crash_chvt(rvalue);
647 if (r < 0) {
648 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CrashChangeVT= setting, ignoring: %s", rvalue);
649 return 0;
650 }
651
652 return 0;
653 }
654
655 static int parse_config_file(void) {
656
657 const ConfigTableItem items[] = {
658 { "Manager", "LogLevel", config_parse_level2, 0, NULL },
659 { "Manager", "LogTarget", config_parse_target, 0, NULL },
660 { "Manager", "LogColor", config_parse_color, 0, NULL },
661 { "Manager", "LogLocation", config_parse_location, 0, NULL },
662 { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
663 { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, NULL },
664 { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, NULL },
665 { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
666 { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
667 { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
668 { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL },
669 { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers },
670 { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
671 { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog },
672 { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
673 { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
674 #if HAVE_SECCOMP
675 { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
676 #endif
677 { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
678 { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
679 { "Manager", "DefaultStandardOutput", config_parse_output_restricted,0, &arg_default_std_output },
680 { "Manager", "DefaultStandardError", config_parse_output_restricted,0, &arg_default_std_error },
681 { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
682 { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
683 { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
684 { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
685 { "Manager", "DefaultStartLimitIntervalSec",config_parse_sec, 0, &arg_default_start_limit_interval },
686 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
687 { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
688 { "Manager", "DefaultLimitCPU", config_parse_limit, RLIMIT_CPU, arg_default_rlimit },
689 { "Manager", "DefaultLimitFSIZE", config_parse_limit, RLIMIT_FSIZE, arg_default_rlimit },
690 { "Manager", "DefaultLimitDATA", config_parse_limit, RLIMIT_DATA, arg_default_rlimit },
691 { "Manager", "DefaultLimitSTACK", config_parse_limit, RLIMIT_STACK, arg_default_rlimit },
692 { "Manager", "DefaultLimitCORE", config_parse_limit, RLIMIT_CORE, arg_default_rlimit },
693 { "Manager", "DefaultLimitRSS", config_parse_limit, RLIMIT_RSS, arg_default_rlimit },
694 { "Manager", "DefaultLimitNOFILE", config_parse_limit, RLIMIT_NOFILE, arg_default_rlimit },
695 { "Manager", "DefaultLimitAS", config_parse_limit, RLIMIT_AS, arg_default_rlimit },
696 { "Manager", "DefaultLimitNPROC", config_parse_limit, RLIMIT_NPROC, arg_default_rlimit },
697 { "Manager", "DefaultLimitMEMLOCK", config_parse_limit, RLIMIT_MEMLOCK, arg_default_rlimit },
698 { "Manager", "DefaultLimitLOCKS", config_parse_limit, RLIMIT_LOCKS, arg_default_rlimit },
699 { "Manager", "DefaultLimitSIGPENDING", config_parse_limit, RLIMIT_SIGPENDING, arg_default_rlimit },
700 { "Manager", "DefaultLimitMSGQUEUE", config_parse_limit, RLIMIT_MSGQUEUE, arg_default_rlimit },
701 { "Manager", "DefaultLimitNICE", config_parse_limit, RLIMIT_NICE, arg_default_rlimit },
702 { "Manager", "DefaultLimitRTPRIO", config_parse_limit, RLIMIT_RTPRIO, arg_default_rlimit },
703 { "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit },
704 { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
705 { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
706 { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
707 { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
708 { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
709 { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
710 { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
711 { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
712 {}
713 };
714
715 const char *fn, *conf_dirs_nulstr;
716
717 fn = arg_system ?
718 PKGSYSCONFDIR "/system.conf" :
719 PKGSYSCONFDIR "/user.conf";
720
721 conf_dirs_nulstr = arg_system ?
722 CONF_PATHS_NULSTR("systemd/system.conf.d") :
723 CONF_PATHS_NULSTR("systemd/user.conf.d");
724
725 (void) config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, CONFIG_PARSE_WARN, NULL);
726
727 /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
728 * like everywhere else. */
729 if (arg_default_timeout_start_usec <= 0)
730 arg_default_timeout_start_usec = USEC_INFINITY;
731 if (arg_default_timeout_stop_usec <= 0)
732 arg_default_timeout_stop_usec = USEC_INFINITY;
733
734 return 0;
735 }
736
737 static void set_manager_defaults(Manager *m) {
738
739 assert(m);
740
741 m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
742 m->default_std_output = arg_default_std_output;
743 m->default_std_error = arg_default_std_error;
744 m->default_timeout_start_usec = arg_default_timeout_start_usec;
745 m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
746 m->default_restart_usec = arg_default_restart_usec;
747 m->default_start_limit_interval = arg_default_start_limit_interval;
748 m->default_start_limit_burst = arg_default_start_limit_burst;
749 m->default_cpu_accounting = arg_default_cpu_accounting;
750 m->default_io_accounting = arg_default_io_accounting;
751 m->default_ip_accounting = arg_default_ip_accounting;
752 m->default_blockio_accounting = arg_default_blockio_accounting;
753 m->default_memory_accounting = arg_default_memory_accounting;
754 m->default_tasks_accounting = arg_default_tasks_accounting;
755 m->default_tasks_max = arg_default_tasks_max;
756
757 manager_set_default_rlimits(m, arg_default_rlimit);
758 manager_environment_add(m, NULL, arg_default_environment);
759 }
760
761 static void set_manager_settings(Manager *m) {
762
763 assert(m);
764
765 m->confirm_spawn = arg_confirm_spawn;
766 m->service_watchdogs = arg_service_watchdogs;
767 m->runtime_watchdog = arg_runtime_watchdog;
768 m->shutdown_watchdog = arg_shutdown_watchdog;
769 m->cad_burst_action = arg_cad_burst_action;
770
771 manager_set_show_status(m, arg_show_status);
772 }
773
774 static int parse_argv(int argc, char *argv[]) {
775
776 enum {
777 ARG_LOG_LEVEL = 0x100,
778 ARG_LOG_TARGET,
779 ARG_LOG_COLOR,
780 ARG_LOG_LOCATION,
781 ARG_UNIT,
782 ARG_SYSTEM,
783 ARG_USER,
784 ARG_TEST,
785 ARG_NO_PAGER,
786 ARG_VERSION,
787 ARG_DUMP_CONFIGURATION_ITEMS,
788 ARG_DUMP_CORE,
789 ARG_CRASH_CHVT,
790 ARG_CRASH_SHELL,
791 ARG_CRASH_REBOOT,
792 ARG_CONFIRM_SPAWN,
793 ARG_SHOW_STATUS,
794 ARG_DESERIALIZE,
795 ARG_SWITCHED_ROOT,
796 ARG_DEFAULT_STD_OUTPUT,
797 ARG_DEFAULT_STD_ERROR,
798 ARG_MACHINE_ID,
799 ARG_SERVICE_WATCHDOGS,
800 };
801
802 static const struct option options[] = {
803 { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
804 { "log-target", required_argument, NULL, ARG_LOG_TARGET },
805 { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
806 { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
807 { "unit", required_argument, NULL, ARG_UNIT },
808 { "system", no_argument, NULL, ARG_SYSTEM },
809 { "user", no_argument, NULL, ARG_USER },
810 { "test", no_argument, NULL, ARG_TEST },
811 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
812 { "help", no_argument, NULL, 'h' },
813 { "version", no_argument, NULL, ARG_VERSION },
814 { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
815 { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
816 { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
817 { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
818 { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
819 { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
820 { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
821 { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
822 { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
823 { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
824 { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
825 { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
826 { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
827 {}
828 };
829
830 int c, r;
831
832 assert(argc >= 1);
833 assert(argv);
834
835 if (getpid_cached() == 1)
836 opterr = 0;
837
838 while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
839
840 switch (c) {
841
842 case ARG_LOG_LEVEL:
843 r = log_set_max_level_from_string(optarg);
844 if (r < 0) {
845 log_error("Failed to parse log level %s.", optarg);
846 return r;
847 }
848
849 break;
850
851 case ARG_LOG_TARGET:
852 r = log_set_target_from_string(optarg);
853 if (r < 0) {
854 log_error("Failed to parse log target %s.", optarg);
855 return r;
856 }
857
858 break;
859
860 case ARG_LOG_COLOR:
861
862 if (optarg) {
863 r = log_show_color_from_string(optarg);
864 if (r < 0) {
865 log_error("Failed to parse log color setting %s.", optarg);
866 return r;
867 }
868 } else
869 log_show_color(true);
870
871 break;
872
873 case ARG_LOG_LOCATION:
874 if (optarg) {
875 r = log_show_location_from_string(optarg);
876 if (r < 0) {
877 log_error("Failed to parse log location setting %s.", optarg);
878 return r;
879 }
880 } else
881 log_show_location(true);
882
883 break;
884
885 case ARG_DEFAULT_STD_OUTPUT:
886 r = exec_output_from_string(optarg);
887 if (r < 0) {
888 log_error("Failed to parse default standard output setting %s.", optarg);
889 return r;
890 } else
891 arg_default_std_output = r;
892 break;
893
894 case ARG_DEFAULT_STD_ERROR:
895 r = exec_output_from_string(optarg);
896 if (r < 0) {
897 log_error("Failed to parse default standard error output setting %s.", optarg);
898 return r;
899 } else
900 arg_default_std_error = r;
901 break;
902
903 case ARG_UNIT:
904 r = free_and_strdup(&arg_default_unit, optarg);
905 if (r < 0)
906 return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
907
908 break;
909
910 case ARG_SYSTEM:
911 arg_system = true;
912 break;
913
914 case ARG_USER:
915 arg_system = false;
916 break;
917
918 case ARG_TEST:
919 arg_action = ACTION_TEST;
920 break;
921
922 case ARG_NO_PAGER:
923 arg_no_pager = true;
924 break;
925
926 case ARG_VERSION:
927 arg_action = ACTION_VERSION;
928 break;
929
930 case ARG_DUMP_CONFIGURATION_ITEMS:
931 arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
932 break;
933
934 case ARG_DUMP_CORE:
935 if (!optarg)
936 arg_dump_core = true;
937 else {
938 r = parse_boolean(optarg);
939 if (r < 0)
940 return log_error_errno(r, "Failed to parse dump core boolean: %s", optarg);
941 arg_dump_core = r;
942 }
943 break;
944
945 case ARG_CRASH_CHVT:
946 r = parse_crash_chvt(optarg);
947 if (r < 0)
948 return log_error_errno(r, "Failed to parse crash virtual terminal index: %s", optarg);
949 break;
950
951 case ARG_CRASH_SHELL:
952 if (!optarg)
953 arg_crash_shell = true;
954 else {
955 r = parse_boolean(optarg);
956 if (r < 0)
957 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
958 arg_crash_shell = r;
959 }
960 break;
961
962 case ARG_CRASH_REBOOT:
963 if (!optarg)
964 arg_crash_reboot = true;
965 else {
966 r = parse_boolean(optarg);
967 if (r < 0)
968 return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
969 arg_crash_reboot = r;
970 }
971 break;
972
973 case ARG_CONFIRM_SPAWN:
974 arg_confirm_spawn = mfree(arg_confirm_spawn);
975
976 r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
977 if (r < 0)
978 return log_error_errno(r, "Failed to parse confirm spawn option: %m");
979 break;
980
981 case ARG_SERVICE_WATCHDOGS:
982 r = parse_boolean(optarg);
983 if (r < 0)
984 return log_error_errno(r, "Failed to parse service watchdogs boolean: %s", optarg);
985 arg_service_watchdogs = r;
986 break;
987
988 case ARG_SHOW_STATUS:
989 if (optarg) {
990 r = parse_show_status(optarg, &arg_show_status);
991 if (r < 0) {
992 log_error("Failed to parse show status boolean %s.", optarg);
993 return r;
994 }
995 } else
996 arg_show_status = SHOW_STATUS_YES;
997 break;
998
999 case ARG_DESERIALIZE: {
1000 int fd;
1001 FILE *f;
1002
1003 r = safe_atoi(optarg, &fd);
1004 if (r < 0 || fd < 0) {
1005 log_error("Failed to parse deserialize option %s.", optarg);
1006 return -EINVAL;
1007 }
1008
1009 (void) fd_cloexec(fd, true);
1010
1011 f = fdopen(fd, "r");
1012 if (!f)
1013 return log_error_errno(errno, "Failed to open serialization fd: %m");
1014
1015 safe_fclose(arg_serialization);
1016 arg_serialization = f;
1017
1018 break;
1019 }
1020
1021 case ARG_SWITCHED_ROOT:
1022 arg_switched_root = true;
1023 break;
1024
1025 case ARG_MACHINE_ID:
1026 r = set_machine_id(optarg);
1027 if (r < 0)
1028 return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
1029 break;
1030
1031 case 'h':
1032 arg_action = ACTION_HELP;
1033 break;
1034
1035 case 'D':
1036 log_set_max_level(LOG_DEBUG);
1037 break;
1038
1039 case 'b':
1040 case 's':
1041 case 'z':
1042 /* Just to eat away the sysvinit kernel
1043 * cmdline args without getopt() error
1044 * messages that we'll parse in
1045 * parse_proc_cmdline_word() or ignore. */
1046
1047 case '?':
1048 if (getpid_cached() != 1)
1049 return -EINVAL;
1050 else
1051 return 0;
1052
1053 default:
1054 assert_not_reached("Unhandled option code.");
1055 }
1056
1057 if (optind < argc && getpid_cached() != 1) {
1058 /* Hmm, when we aren't run as init system
1059 * let's complain about excess arguments */
1060
1061 log_error("Excess arguments.");
1062 return -EINVAL;
1063 }
1064
1065 return 0;
1066 }
1067
1068 static int help(void) {
1069
1070 printf("%s [OPTIONS...]\n\n"
1071 "Starts up and maintains the system or user services.\n\n"
1072 " -h --help Show this help\n"
1073 " --version Show version\n"
1074 " --test Determine startup sequence, dump it and exit\n"
1075 " --no-pager Do not pipe output into a pager\n"
1076 " --dump-configuration-items Dump understood unit configuration items\n"
1077 " --unit=UNIT Set default unit\n"
1078 " --system Run a system instance, even if PID != 1\n"
1079 " --user Run a user instance\n"
1080 " --dump-core[=BOOL] Dump core on crash\n"
1081 " --crash-vt=NR Change to specified VT on crash\n"
1082 " --crash-reboot[=BOOL] Reboot on crash\n"
1083 " --crash-shell[=BOOL] Run shell on crash\n"
1084 " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
1085 " --show-status[=BOOL] Show status updates on the console during bootup\n"
1086 " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
1087 " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1088 " --log-color[=BOOL] Highlight important log messages\n"
1089 " --log-location[=BOOL] Include code location in log messages\n"
1090 " --default-standard-output= Set default standard output for services\n"
1091 " --default-standard-error= Set default standard error output for services\n",
1092 program_invocation_short_name);
1093
1094 return 0;
1095 }
1096
1097 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1098 _cleanup_fdset_free_ FDSet *fds = NULL;
1099 _cleanup_fclose_ FILE *f = NULL;
1100 int r;
1101
1102 assert(m);
1103 assert(_f);
1104 assert(_fds);
1105
1106 r = manager_open_serialization(m, &f);
1107 if (r < 0)
1108 return log_error_errno(r, "Failed to create serialization file: %m");
1109
1110 /* Make sure nothing is really destructed when we shut down */
1111 m->n_reloading++;
1112 bus_manager_send_reloading(m, true);
1113
1114 fds = fdset_new();
1115 if (!fds)
1116 return log_oom();
1117
1118 r = manager_serialize(m, f, fds, switching_root);
1119 if (r < 0)
1120 return log_error_errno(r, "Failed to serialize state: %m");
1121
1122 if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
1123 return log_error_errno(errno, "Failed to rewind serialization fd: %m");
1124
1125 r = fd_cloexec(fileno(f), false);
1126 if (r < 0)
1127 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1128
1129 r = fdset_cloexec(fds, false);
1130 if (r < 0)
1131 return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1132
1133 *_f = f;
1134 *_fds = fds;
1135
1136 f = NULL;
1137 fds = NULL;
1138
1139 return 0;
1140 }
1141
1142 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1143 struct rlimit nl;
1144 int r;
1145 int min_max;
1146 _cleanup_free_ char *nr_open = NULL;
1147
1148 assert(saved_rlimit);
1149
1150 /* Save the original RLIMIT_NOFILE so that we can reset it
1151 * later when transitioning from the initrd to the main
1152 * systemd or suchlike. */
1153 if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
1154 return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
1155
1156 /* Make sure forked processes get the default kernel setting */
1157 if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1158 struct rlimit *rl;
1159
1160 rl = newdup(struct rlimit, saved_rlimit, 1);
1161 if (!rl)
1162 return log_oom();
1163
1164 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1165 }
1166
1167 /* Get current RLIMIT_NOFILE maximum compiled into the kernel. */
1168 r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
1169 if (r >= 0)
1170 r = safe_atoi(nr_open, &min_max);
1171 /* If we fail, fallback to the hard-coded kernel limit of 1024 * 1024. */
1172 if (r < 0)
1173 min_max = 1024 * 1024;
1174
1175 /* Bump up the resource limit for ourselves substantially */
1176 nl.rlim_cur = nl.rlim_max = min_max;
1177 r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1178 if (r < 0)
1179 return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
1180
1181 return 0;
1182 }
1183
1184 static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
1185 int r;
1186
1187 assert(saved_rlimit);
1188 assert(getuid() == 0);
1189
1190 /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
1191 * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
1192 * bump the value high enough for the root user. */
1193
1194 if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
1195 return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
1196
1197 r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
1198 if (r < 0)
1199 return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
1200
1201 return 0;
1202 }
1203
1204 static void test_usr(void) {
1205
1206 /* Check that /usr is not a separate fs */
1207
1208 if (dir_is_empty("/usr") <= 0)
1209 return;
1210
1211 log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1212 "Some things will probably break (sometimes even silently) in mysterious ways. "
1213 "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1214 }
1215
1216 static int enforce_syscall_archs(Set *archs) {
1217 #if HAVE_SECCOMP
1218 int r;
1219
1220 if (!is_seccomp_available())
1221 return 0;
1222
1223 r = seccomp_restrict_archs(arg_syscall_archs);
1224 if (r < 0)
1225 return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
1226 #endif
1227 return 0;
1228 }
1229
1230 static int status_welcome(void) {
1231 _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1232 const char *fn;
1233 int r;
1234
1235 if (arg_show_status <= 0)
1236 return 0;
1237
1238 FOREACH_STRING(fn, "/etc/os-release", "/usr/lib/os-release") {
1239 r = parse_env_file(fn, NEWLINE,
1240 "PRETTY_NAME", &pretty_name,
1241 "ANSI_COLOR", &ansi_color,
1242 NULL);
1243
1244 if (r != -ENOENT)
1245 break;
1246 }
1247 if (r < 0 && r != -ENOENT)
1248 log_warning_errno(r, "Failed to read os-release file, ignoring: %m");
1249
1250 if (log_get_show_color())
1251 return status_printf(NULL, false, false,
1252 "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1253 isempty(ansi_color) ? "1" : ansi_color,
1254 isempty(pretty_name) ? "Linux" : pretty_name);
1255 else
1256 return status_printf(NULL, false, false,
1257 "\nWelcome to %s!\n",
1258 isempty(pretty_name) ? "Linux" : pretty_name);
1259 }
1260
1261 static int write_container_id(void) {
1262 const char *c;
1263 int r;
1264
1265 c = getenv("container");
1266 if (isempty(c))
1267 return 0;
1268
1269 RUN_WITH_UMASK(0022)
1270 r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
1271 if (r < 0)
1272 return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
1273
1274 return 1;
1275 }
1276
1277 static int bump_unix_max_dgram_qlen(void) {
1278 _cleanup_free_ char *qlen = NULL;
1279 unsigned long v;
1280 int r;
1281
1282 /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel
1283 * default of 16 is simply too low. We set the value really
1284 * really early during boot, so that it is actually applied to
1285 * all our sockets, including the $NOTIFY_SOCKET one. */
1286
1287 r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
1288 if (r < 0)
1289 return log_warning_errno(r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1290
1291 r = safe_atolu(qlen, &v);
1292 if (r < 0)
1293 return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length, ignoring: %m");
1294
1295 if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
1296 return 0;
1297
1298 qlen = mfree(qlen);
1299 if (asprintf(&qlen, "%lu\n", DEFAULT_UNIX_MAX_DGRAM_QLEN) < 0)
1300 return log_oom();
1301
1302 r = write_string_file("/proc/sys/net/unix/max_dgram_qlen", qlen, 0);
1303 if (r < 0)
1304 return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
1305 "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1306
1307 return 1;
1308 }
1309
1310 static int fixup_environment(void) {
1311 _cleanup_free_ char *term = NULL;
1312 const char *t;
1313 int r;
1314
1315 /* Only fix up the environment when we are started as PID 1 */
1316 if (getpid_cached() != 1)
1317 return 0;
1318
1319 /* We expect the environment to be set correctly if run inside a container. */
1320 if (detect_container() > 0)
1321 return 0;
1322
1323 /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
1324 * device used by the console. We try to make a better guess here since some consoles might not have support
1325 * for color mode for example.
1326 *
1327 * However if TERM was configured through the kernel command line then leave it alone. */
1328 r = proc_cmdline_get_key("TERM", 0, &term);
1329 if (r < 0)
1330 return r;
1331
1332 t = term ?: default_term_for_tty("/dev/console");
1333
1334 if (setenv("TERM", t, 1) < 0)
1335 return -errno;
1336
1337 return 0;
1338 }
1339
1340 static void redirect_telinit(int argc, char *argv[]) {
1341
1342 /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
1343
1344 #if HAVE_SYSV_COMPAT
1345 if (getpid_cached() == 1)
1346 return;
1347
1348 if (!strstr(program_invocation_short_name, "init"))
1349 return;
1350
1351 execv(SYSTEMCTL_BINARY_PATH, argv);
1352 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1353 exit(EXIT_FAILURE);
1354 #endif
1355 }
1356
1357 static int become_shutdown(
1358 const char *shutdown_verb,
1359 int retval) {
1360
1361 char log_level[DECIMAL_STR_MAX(int) + 1],
1362 exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
1363 timeout[DECIMAL_STR_MAX(usec_t) + 1];
1364
1365 const char* command_line[13] = {
1366 SYSTEMD_SHUTDOWN_BINARY_PATH,
1367 shutdown_verb,
1368 "--timeout", timeout,
1369 "--log-level", log_level,
1370 "--log-target",
1371 };
1372
1373 _cleanup_strv_free_ char **env_block = NULL;
1374 size_t pos = 7;
1375 int r;
1376
1377 assert(shutdown_verb);
1378 assert(!command_line[pos]);
1379 env_block = strv_copy(environ);
1380
1381 xsprintf(log_level, "%d", log_get_max_level());
1382 xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
1383
1384 switch (log_get_target()) {
1385
1386 case LOG_TARGET_KMSG:
1387 case LOG_TARGET_JOURNAL_OR_KMSG:
1388 case LOG_TARGET_SYSLOG_OR_KMSG:
1389 command_line[pos++] = "kmsg";
1390 break;
1391
1392 case LOG_TARGET_NULL:
1393 command_line[pos++] = "null";
1394 break;
1395
1396 case LOG_TARGET_CONSOLE:
1397 default:
1398 command_line[pos++] = "console";
1399 break;
1400 };
1401
1402 if (log_get_show_color())
1403 command_line[pos++] = "--log-color";
1404
1405 if (log_get_show_location())
1406 command_line[pos++] = "--log-location";
1407
1408 if (streq(shutdown_verb, "exit")) {
1409 command_line[pos++] = "--exit-code";
1410 command_line[pos++] = exit_code;
1411 xsprintf(exit_code, "%d", retval);
1412 }
1413
1414 assert(pos < ELEMENTSOF(command_line));
1415
1416 if (streq(shutdown_verb, "reboot") &&
1417 arg_shutdown_watchdog > 0 &&
1418 arg_shutdown_watchdog != USEC_INFINITY) {
1419
1420 char *e;
1421
1422 /* If we reboot let's set the shutdown
1423 * watchdog and tell the shutdown binary to
1424 * repeatedly ping it */
1425 r = watchdog_set_timeout(&arg_shutdown_watchdog);
1426 watchdog_close(r < 0);
1427
1428 /* Tell the binary how often to ping, ignore failure */
1429 if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
1430 (void) strv_consume(&env_block, e);
1431
1432 if (arg_watchdog_device &&
1433 asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
1434 (void) strv_consume(&env_block, e);
1435 } else
1436 watchdog_close(true);
1437
1438 /* Avoid the creation of new processes forked by the
1439 * kernel; at this point, we will not listen to the
1440 * signals anyway */
1441 if (detect_container() <= 0)
1442 (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1443
1444 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1445 return -errno;
1446 }
1447
1448 static void initialize_clock(void) {
1449 int r;
1450
1451 if (clock_is_localtime(NULL) > 0) {
1452 int min;
1453
1454 /*
1455 * The very first call of settimeofday() also does a time warp in the kernel.
1456 *
1457 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
1458 * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
1459 * the RTC alone if the registry tells that the RTC runs in UTC.
1460 */
1461 r = clock_set_timezone(&min);
1462 if (r < 0)
1463 log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1464 else
1465 log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1466
1467 } else if (!in_initrd()) {
1468 /*
1469 * Do a dummy very first call to seal the kernel's time warp magic.
1470 *
1471 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
1472 * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
1473 * until we reach the real system.
1474 *
1475 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
1476 * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
1477 * be treated as UTC that way.
1478 */
1479 (void) clock_reset_timewarp();
1480 }
1481
1482 r = clock_apply_epoch();
1483 if (r < 0)
1484 log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
1485 else if (r > 0)
1486 log_info("System time before build time, advancing clock.");
1487 }
1488
1489 static void initialize_coredump(bool skip_setup) {
1490
1491 if (getpid_cached() != 1)
1492 return;
1493
1494 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
1495 * will process core dumps for system services by default. */
1496 if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
1497 log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
1498
1499 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1500 * until the systemd-coredump tool is enabled via sysctl. */
1501 if (!skip_setup)
1502 disable_coredumps();
1503 }
1504
1505 static void do_reexecute(
1506 int argc,
1507 char *argv[],
1508 const struct rlimit *saved_rlimit_nofile,
1509 const struct rlimit *saved_rlimit_memlock,
1510 FDSet *fds,
1511 const char *switch_root_dir,
1512 const char *switch_root_init,
1513 const char **ret_error_message) {
1514
1515 unsigned i, j, args_size;
1516 const char **args;
1517 int r;
1518
1519 assert(saved_rlimit_nofile);
1520 assert(saved_rlimit_memlock);
1521 assert(ret_error_message);
1522
1523 /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
1524 * we do that */
1525 watchdog_close(true);
1526
1527 /* Reset the RLIMIT_NOFILE to the kernel default, so that the new systemd can pass the kernel default to its
1528 * child processes */
1529
1530 if (saved_rlimit_nofile->rlim_cur > 0)
1531 (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
1532 if (saved_rlimit_memlock->rlim_cur != (rlim_t) -1)
1533 (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
1534
1535 if (switch_root_dir) {
1536 /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1537 * SIGCHLD for them after deserializing. */
1538 broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
1539
1540 /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1541 r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1542 if (r < 0)
1543 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1544 }
1545
1546 args_size = MAX(6, argc+1);
1547 args = newa(const char*, args_size);
1548
1549 if (!switch_root_init) {
1550 char sfd[DECIMAL_STR_MAX(int) + 1];
1551
1552 /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
1553 * the user didn't specify an explicit init to spawn. */
1554
1555 assert(arg_serialization);
1556 assert(fds);
1557
1558 xsprintf(sfd, "%i", fileno(arg_serialization));
1559
1560 i = 0;
1561 args[i++] = SYSTEMD_BINARY_PATH;
1562 if (switch_root_dir)
1563 args[i++] = "--switched-root";
1564 args[i++] = arg_system ? "--system" : "--user";
1565 args[i++] = "--deserialize";
1566 args[i++] = sfd;
1567 args[i++] = NULL;
1568
1569 assert(i <= args_size);
1570
1571 /*
1572 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
1573 * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
1574 * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
1575 * before proceeding into the exec().
1576 */
1577 valgrind_summary_hack();
1578
1579 (void) execv(args[0], (char* const*) args);
1580 log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
1581 }
1582
1583 /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
1584 * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
1585 * doesn't matter.) */
1586
1587 arg_serialization = safe_fclose(arg_serialization);
1588 fds = fdset_free(fds);
1589
1590 /* Reopen the console */
1591 (void) make_console_stdio();
1592
1593 for (j = 1, i = 1; j < (unsigned) argc; j++)
1594 args[i++] = argv[j];
1595 args[i++] = NULL;
1596 assert(i <= args_size);
1597
1598 /* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */
1599 (void) reset_all_signal_handlers();
1600 (void) reset_signal_mask();
1601
1602 if (switch_root_init) {
1603 args[0] = switch_root_init;
1604 (void) execv(args[0], (char* const*) args);
1605 log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1606 }
1607
1608 args[0] = "/sbin/init";
1609 (void) execv(args[0], (char* const*) args);
1610 r = -errno;
1611
1612 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
1613 ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
1614 "Failed to execute /sbin/init");
1615
1616 if (r == -ENOENT) {
1617 log_warning("No /sbin/init, trying fallback");
1618
1619 args[0] = "/bin/sh";
1620 args[1] = NULL;
1621 (void) execv(args[0], (char* const*) args);
1622 log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1623 } else
1624 log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
1625
1626 *ret_error_message = "Failed to execute fallback shell";
1627 }
1628
1629 static int invoke_main_loop(
1630 Manager *m,
1631 bool *ret_reexecute,
1632 int *ret_retval, /* Return parameters relevant for shutting down */
1633 const char **ret_shutdown_verb, /* … */
1634 FDSet **ret_fds, /* Return parameters for reexecuting */
1635 char **ret_switch_root_dir, /* … */
1636 char **ret_switch_root_init, /* … */
1637 const char **ret_error_message) {
1638
1639 int r;
1640
1641 assert(m);
1642 assert(ret_reexecute);
1643 assert(ret_retval);
1644 assert(ret_shutdown_verb);
1645 assert(ret_fds);
1646 assert(ret_switch_root_dir);
1647 assert(ret_switch_root_init);
1648 assert(ret_error_message);
1649
1650 for (;;) {
1651 r = manager_loop(m);
1652 if (r < 0) {
1653 *ret_error_message = "Failed to run main loop";
1654 return log_emergency_errno(r, "Failed to run main loop: %m");
1655 }
1656
1657 switch (m->exit_code) {
1658
1659 case MANAGER_RELOAD:
1660 log_info("Reloading.");
1661
1662 r = parse_config_file();
1663 if (r < 0)
1664 log_warning_errno(r, "Failed to parse config file, ignoring: %m");
1665
1666 set_manager_defaults(m);
1667
1668 r = manager_reload(m);
1669 if (r < 0)
1670 log_warning_errno(r, "Failed to reload, ignoring: %m");
1671
1672 break;
1673
1674 case MANAGER_REEXECUTE:
1675
1676 r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
1677 if (r < 0) {
1678 *ret_error_message = "Failed to prepare for reexecution";
1679 return r;
1680 }
1681
1682 log_notice("Reexecuting.");
1683
1684 *ret_reexecute = true;
1685 *ret_retval = EXIT_SUCCESS;
1686 *ret_shutdown_verb = NULL;
1687 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1688
1689 return 0;
1690
1691 case MANAGER_SWITCH_ROOT:
1692 if (!m->switch_root_init) {
1693 r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
1694 if (r < 0) {
1695 *ret_error_message = "Failed to prepare for reexecution";
1696 return r;
1697 }
1698 } else
1699 *ret_fds = NULL;
1700
1701 log_notice("Switching root.");
1702
1703 *ret_reexecute = true;
1704 *ret_retval = EXIT_SUCCESS;
1705 *ret_shutdown_verb = NULL;
1706
1707 /* Steal the switch root parameters */
1708 *ret_switch_root_dir = m->switch_root;
1709 *ret_switch_root_init = m->switch_root_init;
1710 m->switch_root = m->switch_root_init = NULL;
1711
1712 return 0;
1713
1714 case MANAGER_EXIT:
1715
1716 if (MANAGER_IS_USER(m)) {
1717 log_debug("Exit.");
1718
1719 *ret_reexecute = false;
1720 *ret_retval = m->return_value;
1721 *ret_shutdown_verb = NULL;
1722 *ret_fds = NULL;
1723 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1724
1725 return 0;
1726 }
1727
1728 _fallthrough_;
1729 case MANAGER_REBOOT:
1730 case MANAGER_POWEROFF:
1731 case MANAGER_HALT:
1732 case MANAGER_KEXEC: {
1733 static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1734 [MANAGER_EXIT] = "exit",
1735 [MANAGER_REBOOT] = "reboot",
1736 [MANAGER_POWEROFF] = "poweroff",
1737 [MANAGER_HALT] = "halt",
1738 [MANAGER_KEXEC] = "kexec"
1739 };
1740
1741 log_notice("Shutting down.");
1742
1743 *ret_reexecute = false;
1744 *ret_retval = m->return_value;
1745 assert_se(*ret_shutdown_verb = table[m->exit_code]);
1746 *ret_fds = NULL;
1747 *ret_switch_root_dir = *ret_switch_root_init = NULL;
1748
1749 return 0;
1750 }
1751
1752 default:
1753 assert_not_reached("Unknown exit code.");
1754 }
1755 }
1756 }
1757
1758 static void log_execution_mode(bool *ret_first_boot) {
1759 assert(ret_first_boot);
1760
1761 if (arg_system) {
1762 int v;
1763
1764 log_info(PACKAGE_STRING " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
1765 arg_action == ACTION_TEST ? "test " : "" );
1766
1767 v = detect_virtualization();
1768 if (v > 0)
1769 log_info("Detected virtualization %s.", virtualization_to_string(v));
1770
1771 log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
1772
1773 if (in_initrd()) {
1774 *ret_first_boot = false;
1775 log_info("Running in initial RAM disk.");
1776 } else {
1777 /* Let's check whether we are in first boot, i.e. whether /etc is still unpopulated. We use
1778 * /etc/machine-id as flag file, for this: if it exists we assume /etc is populated, if it
1779 * doesn't it's unpopulated. This allows container managers and installers to provision a
1780 * couple of files already. If the container manager wants to provision the machine ID itself
1781 * it should pass $container_uuid to PID 1. */
1782
1783 *ret_first_boot = access("/etc/machine-id", F_OK) < 0;
1784 if (*ret_first_boot)
1785 log_info("Running with unpopulated /etc.");
1786 }
1787 } else {
1788 if (DEBUG_LOGGING) {
1789 _cleanup_free_ char *t;
1790
1791 t = uid_to_name(getuid());
1792 log_debug(PACKAGE_STRING " running in %suser mode for user " UID_FMT "/%s. (" SYSTEMD_FEATURES ")",
1793 arg_action == ACTION_TEST ? " test" : "", getuid(), strna(t));
1794 }
1795
1796 *ret_first_boot = false;
1797 }
1798 }
1799
1800 static int initialize_runtime(
1801 bool skip_setup,
1802 struct rlimit *saved_rlimit_nofile,
1803 struct rlimit *saved_rlimit_memlock,
1804 const char **ret_error_message) {
1805
1806 int r;
1807
1808 assert(ret_error_message);
1809
1810 /* Sets up various runtime parameters. Many of these initializations are conditionalized:
1811 *
1812 * - Some only apply to --system instances
1813 * - Some only apply to --user instances
1814 * - Some only apply when we first start up, but not when we reexecute
1815 */
1816
1817 if (arg_action != ACTION_RUN)
1818 return 0;
1819
1820 if (arg_system) {
1821 /* Make sure we leave a core dump without panicing the kernel. */
1822 install_crash_handler();
1823
1824 if (!skip_setup) {
1825 r = mount_cgroup_controllers(arg_join_controllers);
1826 if (r < 0) {
1827 *ret_error_message = "Failed to mount cgroup hierarchies";
1828 return r;
1829 }
1830
1831 status_welcome();
1832 hostname_setup();
1833 machine_id_setup(NULL, arg_machine_id, NULL);
1834 loopback_setup();
1835 bump_unix_max_dgram_qlen();
1836 test_usr();
1837 write_container_id();
1838 }
1839
1840 if (arg_watchdog_device) {
1841 r = watchdog_set_device(arg_watchdog_device);
1842 if (r < 0)
1843 log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
1844 }
1845
1846 if (arg_runtime_watchdog > 0 && arg_runtime_watchdog != USEC_INFINITY)
1847 watchdog_set_timeout(&arg_runtime_watchdog);
1848 }
1849
1850 if (arg_timer_slack_nsec != NSEC_INFINITY)
1851 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1852 log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
1853
1854 if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
1855 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
1856 if (r < 0) {
1857 *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
1858 return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
1859 }
1860
1861 r = capability_bounding_set_drop(arg_capability_bounding_set, true);
1862 if (r < 0) {
1863 *ret_error_message = "Failed to drop capability bounding set";
1864 return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
1865 }
1866 }
1867
1868 if (arg_syscall_archs) {
1869 r = enforce_syscall_archs(arg_syscall_archs);
1870 if (r < 0) {
1871 *ret_error_message = "Failed to set syscall architectures";
1872 return r;
1873 }
1874 }
1875
1876 if (!arg_system)
1877 /* Become reaper of our children */
1878 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
1879 log_warning_errno(errno, "Failed to make us a subreaper: %m");
1880
1881 if (arg_system) {
1882 /* Bump up RLIMIT_NOFILE for systemd itself */
1883 (void) bump_rlimit_nofile(saved_rlimit_nofile);
1884 (void) bump_rlimit_memlock(saved_rlimit_memlock);
1885 }
1886
1887 return 0;
1888 }
1889
1890 static int do_queue_default_job(
1891 Manager *m,
1892 const char **ret_error_message) {
1893
1894 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
1895 Job *default_unit_job;
1896 Unit *target = NULL;
1897 int r;
1898
1899 log_debug("Activating default unit: %s", arg_default_unit);
1900
1901 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1902 if (r < 0)
1903 log_error("Failed to load default target: %s", bus_error_message(&error, r));
1904 else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND))
1905 log_error_errno(target->load_error, "Failed to load default target: %m");
1906 else if (target->load_state == UNIT_MASKED)
1907 log_error("Default target masked.");
1908
1909 if (!target || target->load_state != UNIT_LOADED) {
1910 log_info("Trying to load rescue target...");
1911
1912 r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1913 if (r < 0) {
1914 *ret_error_message = "Failed to load rescue target";
1915 return log_emergency_errno(r, "Failed to load rescue target: %s", bus_error_message(&error, r));
1916 } else if (IN_SET(target->load_state, UNIT_ERROR, UNIT_NOT_FOUND)) {
1917 *ret_error_message = "Failed to load rescue target";
1918 return log_emergency_errno(target->load_error, "Failed to load rescue target: %m");
1919 } else if (target->load_state == UNIT_MASKED) {
1920 *ret_error_message = "Rescue target masked";
1921 log_emergency("Rescue target masked.");
1922 return -ERFKILL;
1923 }
1924 }
1925
1926 assert(target->load_state == UNIT_LOADED);
1927
1928 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, &error, &default_unit_job);
1929 if (r == -EPERM) {
1930 log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1931
1932 sd_bus_error_free(&error);
1933
1934 r = manager_add_job(m, JOB_START, target, JOB_REPLACE, &error, &default_unit_job);
1935 if (r < 0) {
1936 *ret_error_message = "Failed to start default target";
1937 return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
1938 }
1939
1940 } else if (r < 0) {
1941 *ret_error_message = "Failed to isolate default target";
1942 return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
1943 }
1944
1945 m->default_unit_job_id = default_unit_job->id;
1946
1947 return 0;
1948 }
1949
1950 static void free_arguments(void) {
1951 size_t j;
1952
1953 /* Frees all arg_* variables, with the exception of arg_serialization */
1954
1955 for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++)
1956 arg_default_rlimit[j] = mfree(arg_default_rlimit[j]);
1957
1958 arg_default_unit = mfree(arg_default_unit);
1959 arg_confirm_spawn = mfree(arg_confirm_spawn);
1960 arg_join_controllers = strv_free_free(arg_join_controllers);
1961 arg_default_environment = strv_free(arg_default_environment);
1962 arg_syscall_archs = set_free(arg_syscall_archs);
1963 }
1964
1965 static int load_configuration(int argc, char **argv, const char **ret_error_message) {
1966 int r;
1967
1968 assert(ret_error_message);
1969
1970 arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
1971
1972 r = parse_config_file();
1973 if (r < 0) {
1974 *ret_error_message = "Failed to parse config file";
1975 return r;
1976 }
1977
1978 if (arg_system) {
1979 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
1980 if (r < 0)
1981 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1982 }
1983
1984 /* Note that this also parses bits from the kernel command line, including "debug". */
1985 log_parse_environment();
1986
1987 r = parse_argv(argc, argv);
1988 if (r < 0) {
1989 *ret_error_message = "Failed to parse commandline arguments";
1990 return r;
1991 }
1992
1993 /* Initialize default unit */
1994 if (!arg_default_unit) {
1995 arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
1996 if (!arg_default_unit) {
1997 *ret_error_message = "Failed to set default unit";
1998 return log_oom();
1999 }
2000 }
2001
2002 /* Initialize the show status setting if it hasn't been set explicitly yet */
2003 if (arg_show_status == _SHOW_STATUS_UNSET)
2004 arg_show_status = SHOW_STATUS_YES;
2005
2006 return 0;
2007 }
2008
2009 static int safety_checks(void) {
2010
2011 if (getpid_cached() == 1 &&
2012 arg_action != ACTION_RUN) {
2013 log_error("Unsupported execution mode while PID 1.");
2014 return -EPERM;
2015 }
2016
2017 if (getpid_cached() == 1 &&
2018 !arg_system) {
2019 log_error("Can't run --user mode as PID 1.");
2020 return -EPERM;
2021 }
2022
2023 if (arg_action == ACTION_RUN &&
2024 arg_system &&
2025 getpid_cached() != 1) {
2026 log_error("Can't run system mode unless PID 1.");
2027 return -EPERM;
2028 }
2029
2030 if (arg_action == ACTION_TEST &&
2031 geteuid() == 0) {
2032 log_error("Don't run test mode as root.");
2033 return -EPERM;
2034 }
2035
2036 if (!arg_system &&
2037 arg_action == ACTION_RUN &&
2038 sd_booted() <= 0) {
2039 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
2040 return -EOPNOTSUPP;
2041 }
2042
2043 if (!arg_system &&
2044 arg_action == ACTION_RUN &&
2045 !getenv("XDG_RUNTIME_DIR")) {
2046 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2047 return -EUNATCH;
2048 }
2049
2050 if (arg_system &&
2051 arg_action == ACTION_RUN &&
2052 running_in_chroot() > 0) {
2053 log_error("Cannot be run in a chroot() environment.");
2054 return -EOPNOTSUPP;
2055 }
2056
2057 return 0;
2058 }
2059
2060 static int initialize_security(
2061 bool *loaded_policy,
2062 dual_timestamp *security_start_timestamp,
2063 dual_timestamp *security_finish_timestamp,
2064 const char **ret_error_message) {
2065
2066 int r;
2067
2068 assert(loaded_policy);
2069 assert(security_start_timestamp);
2070 assert(security_finish_timestamp);
2071 assert(ret_error_message);
2072
2073 dual_timestamp_get(security_start_timestamp);
2074
2075 r = mac_selinux_setup(loaded_policy);
2076 if (r < 0) {
2077 *ret_error_message = "Failed to load SELinux policy";
2078 return r;
2079 }
2080
2081 r = mac_smack_setup(loaded_policy);
2082 if (r < 0) {
2083 *ret_error_message = "Failed to load SMACK policy";
2084 return r;
2085 }
2086
2087 r = ima_setup();
2088 if (r < 0) {
2089 *ret_error_message = "Failed to load IMA policy";
2090 return r;
2091 }
2092
2093 dual_timestamp_get(security_finish_timestamp);
2094 return 0;
2095 }
2096
2097 static void test_summary(Manager *m) {
2098 assert(m);
2099
2100 printf("-> By units:\n");
2101 manager_dump_units(m, stdout, "\t");
2102
2103 printf("-> By jobs:\n");
2104 manager_dump_jobs(m, stdout, "\t");
2105 }
2106
2107 static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
2108 int r;
2109
2110 assert(ret_fds);
2111 assert(ret_error_message);
2112
2113 r = fdset_new_fill(ret_fds);
2114 if (r < 0) {
2115 *ret_error_message = "Failed to allocate fd set";
2116 return log_emergency_errno(r, "Failed to allocate fd set: %m");
2117 }
2118
2119 fdset_cloexec(*ret_fds, true);
2120
2121 if (arg_serialization)
2122 assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
2123
2124 return 0;
2125 }
2126
2127 static void setup_console_terminal(bool skip_setup) {
2128
2129 if (!arg_system)
2130 return;
2131
2132 /* Become a session leader if we aren't one yet. */
2133 (void) setsid();
2134
2135 /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
2136 * tty. */
2137 (void) release_terminal();
2138
2139 /* Reset the console, but only if this is really init and we are freshly booted */
2140 if (getpid_cached() == 1 && !skip_setup)
2141 (void) console_setup();
2142 }
2143
2144 static bool early_skip_setup_check(int argc, char *argv[]) {
2145 bool found_deserialize = false;
2146 int i;
2147
2148 /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
2149 * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
2150 * anyway, even if in that case we also do deserialization. */
2151
2152 for (i = 1; i < argc; i++) {
2153
2154 if (streq(argv[i], "--switched-root"))
2155 return false; /* If we switched root, don't skip the setup. */
2156 else if (streq(argv[i], "--deserialize"))
2157 found_deserialize = true;
2158 }
2159
2160 return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
2161 }
2162
2163 int main(int argc, char *argv[]) {
2164
2165 dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
2166 security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
2167 struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
2168 bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
2169 char *switch_root_dir = NULL, *switch_root_init = NULL;
2170 usec_t before_startup, after_startup;
2171 static char systemd[] = "systemd";
2172 char timespan[FORMAT_TIMESPAN_MAX];
2173 const char *shutdown_verb = NULL, *error_message = NULL;
2174 int r, retval = EXIT_FAILURE;
2175 Manager *m = NULL;
2176 FDSet *fds = NULL;
2177
2178 /* SysV compatibility: redirect init → telinit */
2179 redirect_telinit(argc, argv);
2180
2181 /* Take timestamps early on */
2182 dual_timestamp_from_monotonic(&kernel_timestamp, 0);
2183 dual_timestamp_get(&userspace_timestamp);
2184
2185 /* Figure out whether we need to do initialize the system, or if we already did that because we are
2186 * reexecuting */
2187 skip_setup = early_skip_setup_check(argc, argv);
2188
2189 /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
2190 * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
2191 program_invocation_short_name = systemd;
2192 (void) prctl(PR_SET_NAME, systemd);
2193
2194 /* Save the original command line */
2195 saved_argv = argv;
2196 saved_argc = argc;
2197
2198 /* Make sure that if the user says "syslog" we actually log to the journal. */
2199 log_set_upgrade_syslog_to_journal(true);
2200
2201 if (getpid_cached() == 1) {
2202 /* Disable the umask logic */
2203 umask(0);
2204
2205 /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be activated
2206 * yet (even though the log socket for it exists). */
2207 log_set_prohibit_ipc(true);
2208
2209 /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
2210 * important so that we never end up logging to any foreign stderr, for example if we have to log in a
2211 * child process right before execve()'ing the actual binary, at a point in time where socket
2212 * activation stderr/stdout area already set up. */
2213 log_set_always_reopen_console(true);
2214 }
2215
2216 if (getpid_cached() == 1 && detect_container() <= 0) {
2217
2218 /* Running outside of a container as PID 1 */
2219 arg_system = true;
2220 log_set_target(LOG_TARGET_KMSG);
2221 log_open();
2222
2223 if (in_initrd())
2224 initrd_timestamp = userspace_timestamp;
2225
2226 if (!skip_setup) {
2227 r = mount_setup_early();
2228 if (r < 0) {
2229 error_message = "Failed to mount early API filesystems";
2230 goto finish;
2231 }
2232
2233 r = initialize_security(
2234 &loaded_policy,
2235 &security_start_timestamp,
2236 &security_finish_timestamp,
2237 &error_message);
2238 if (r < 0)
2239 goto finish;
2240 }
2241
2242 if (mac_selinux_init() < 0) {
2243 error_message = "Failed to initialize SELinux policy";
2244 goto finish;
2245 }
2246
2247 if (!skip_setup)
2248 initialize_clock();
2249
2250 /* Set the default for later on, but don't actually
2251 * open the logs like this for now. Note that if we
2252 * are transitioning from the initrd there might still
2253 * be journal fd open, and we shouldn't attempt
2254 * opening that before we parsed /proc/cmdline which
2255 * might redirect output elsewhere. */
2256 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
2257
2258 } else if (getpid_cached() == 1) {
2259 /* Running inside a container, as PID 1 */
2260 arg_system = true;
2261 log_set_target(LOG_TARGET_CONSOLE);
2262 log_open();
2263
2264 /* For later on, see above... */
2265 log_set_target(LOG_TARGET_JOURNAL);
2266
2267 /* clear the kernel timestamp,
2268 * because we are in a container */
2269 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2270 } else {
2271 /* Running as user instance */
2272 arg_system = false;
2273 log_set_target(LOG_TARGET_AUTO);
2274 log_open();
2275
2276 /* clear the kernel timestamp,
2277 * because we are not PID 1 */
2278 kernel_timestamp = DUAL_TIMESTAMP_NULL;
2279 }
2280
2281 initialize_coredump(skip_setup);
2282
2283 r = fixup_environment();
2284 if (r < 0) {
2285 log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
2286 error_message = "Failed to fix up PID1 environment";
2287 goto finish;
2288 }
2289
2290 if (arg_system) {
2291
2292 /* Try to figure out if we can use colors with the console. No
2293 * need to do that for user instances since they never log
2294 * into the console. */
2295 log_show_color(colors_enabled());
2296 r = make_null_stdio();
2297 if (r < 0)
2298 log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
2299 }
2300
2301 /* Mount /proc, /sys and friends, so that /proc/cmdline and
2302 * /proc/$PID/fd is available. */
2303 if (getpid_cached() == 1) {
2304
2305 /* Load the kernel modules early. */
2306 if (!skip_setup)
2307 kmod_setup();
2308
2309 r = mount_setup(loaded_policy);
2310 if (r < 0) {
2311 error_message = "Failed to mount API filesystems";
2312 goto finish;
2313 }
2314 }
2315
2316 /* Reset all signal handlers. */
2317 (void) reset_all_signal_handlers();
2318 (void) ignore_signals(SIGNALS_IGNORE, -1);
2319
2320 r = load_configuration(argc, argv, &error_message);
2321 if (r < 0)
2322 goto finish;
2323
2324 r = safety_checks();
2325 if (r < 0)
2326 goto finish;
2327
2328 if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS))
2329 (void) pager_open(arg_no_pager, false);
2330
2331 if (arg_action != ACTION_RUN)
2332 skip_setup = true;
2333
2334 if (arg_action == ACTION_HELP) {
2335 retval = help();
2336 goto finish;
2337 } else if (arg_action == ACTION_VERSION) {
2338 retval = version();
2339 goto finish;
2340 } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
2341 unit_dump_config_items(stdout);
2342 retval = EXIT_SUCCESS;
2343 goto finish;
2344 }
2345
2346 assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
2347
2348 /* Move out of the way, so that we won't block unmounts */
2349 assert_se(chdir("/") == 0);
2350
2351 if (arg_action == ACTION_RUN) {
2352
2353 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
2354 log_close();
2355
2356 /* Remember open file descriptors for later deserialization */
2357 r = collect_fds(&fds, &error_message);
2358 if (r < 0)
2359 goto finish;
2360
2361 /* Give up any control of the console, but make sure its initialized. */
2362 setup_console_terminal(skip_setup);
2363
2364 /* Open the logging devices, if possible and necessary */
2365 log_open();
2366 }
2367
2368 log_execution_mode(&first_boot);
2369
2370 r = initialize_runtime(skip_setup,
2371 &saved_rlimit_nofile,
2372 &saved_rlimit_memlock,
2373 &error_message);
2374 if (r < 0)
2375 goto finish;
2376
2377 r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
2378 arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
2379 &m);
2380 if (r < 0) {
2381 log_emergency_errno(r, "Failed to allocate manager object: %m");
2382 error_message = "Failed to allocate manager object";
2383 goto finish;
2384 }
2385
2386 m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
2387 m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
2388 m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
2389 m->timestamps[MANAGER_TIMESTAMP_SECURITY_START] = security_start_timestamp;
2390 m->timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH] = security_finish_timestamp;
2391
2392 set_manager_defaults(m);
2393 set_manager_settings(m);
2394 manager_set_first_boot(m, first_boot);
2395
2396 /* Remember whether we should queue the default job */
2397 queue_default_job = !arg_serialization || arg_switched_root;
2398
2399 before_startup = now(CLOCK_MONOTONIC);
2400
2401 r = manager_startup(m, arg_serialization, fds);
2402 if (r < 0) {
2403 log_error_errno(r, "Failed to fully start up daemon: %m");
2404 error_message = "Failed to start up manager";
2405 goto finish;
2406 }
2407
2408 /* This will close all file descriptors that were opened, but not claimed by any unit. */
2409 fds = fdset_free(fds);
2410 arg_serialization = safe_fclose(arg_serialization);
2411
2412 if (queue_default_job) {
2413 r = do_queue_default_job(m, &error_message);
2414 if (r < 0)
2415 goto finish;
2416 }
2417
2418 after_startup = now(CLOCK_MONOTONIC);
2419
2420 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
2421 "Loaded units and determined initial transaction in %s.",
2422 format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
2423
2424 if (arg_action == ACTION_TEST) {
2425 test_summary(m);
2426 retval = EXIT_SUCCESS;
2427 goto finish;
2428 }
2429
2430 (void) invoke_main_loop(m,
2431 &reexecute,
2432 &retval,
2433 &shutdown_verb,
2434 &fds,
2435 &switch_root_dir,
2436 &switch_root_init,
2437 &error_message);
2438
2439 finish:
2440 pager_close();
2441
2442 if (m)
2443 arg_shutdown_watchdog = m->shutdown_watchdog;
2444
2445 m = manager_free(m);
2446
2447 free_arguments();
2448 mac_selinux_finish();
2449
2450 if (reexecute)
2451 do_reexecute(argc, argv,
2452 &saved_rlimit_nofile,
2453 &saved_rlimit_memlock,
2454 fds,
2455 switch_root_dir,
2456 switch_root_init,
2457 &error_message); /* This only returns if reexecution failed */
2458
2459 arg_serialization = safe_fclose(arg_serialization);
2460 fds = fdset_free(fds);
2461
2462 #if HAVE_VALGRIND_VALGRIND_H
2463 /* If we are PID 1 and running under valgrind, then let's exit
2464 * here explicitly. valgrind will only generate nice output on
2465 * exit(), not on exec(), hence let's do the former not the
2466 * latter here. */
2467 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
2468 /* Cleanup watchdog_device strings for valgrind. We need them
2469 * in become_shutdown() so normally we cannot free them yet. */
2470 watchdog_free_device();
2471 arg_watchdog_device = mfree(arg_watchdog_device);
2472 return 0;
2473 }
2474 #endif
2475
2476 if (shutdown_verb) {
2477 r = become_shutdown(shutdown_verb, retval);
2478
2479 log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
2480 error_message = "Failed to execute shutdown binary";
2481 }
2482
2483 watchdog_free_device();
2484 arg_watchdog_device = mfree(arg_watchdog_device);
2485
2486 if (getpid_cached() == 1) {
2487 if (error_message)
2488 manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2489 ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
2490 "%s, freezing.", error_message);
2491 freeze_or_reboot();
2492 }
2493
2494 return retval;
2495 }