X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=src%2Fnspawn%2Fnspawn.c;h=96075327df449adbfb702edfb2aa9baff6290a7b;hb=0b452006de98294d1690f045f6ea2f7f6630ec3b;hp=a9b9a3e062f7a6d0abd78660ab5c3e22bd3996b6;hpb=4aab5d0cbd979b2cccb88534f118bceaa86466d8;p=thirdparty%2Fsystemd.git diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index a9b9a3e062f..96075327df4 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -23,27 +23,21 @@ #include #include #include -#include #include -#include #include #include #include #include #include #include -#include -#include #include #include -#include #include #include #include #include #include #include -#include #include #ifdef HAVE_SELINUX @@ -65,8 +59,8 @@ #include "log.h" #include "util.h" #include "mkdir.h" +#include "rm-rf.h" #include "macro.h" -#include "audit.h" #include "missing.h" #include "cgroup-util.h" #include "strv.h" @@ -79,9 +73,7 @@ #include "bus-util.h" #include "bus-error.h" #include "ptyfwd.h" -#include "bus-kernel.h" #include "env-util.h" -#include "def.h" #include "rtnl-util.h" #include "udev-util.h" #include "blkid-util.h" @@ -99,6 +91,8 @@ #include "in-addr-util.h" #include "fw-util.h" #include "local-addresses.h" +#include "formats-util.h" +#include "process-util.h" #ifdef HAVE_SECCOMP #include "seccomp-util.h" @@ -190,6 +184,7 @@ static ExposePort *arg_expose_ports = NULL; static char **arg_property = NULL; static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U; static bool arg_userns = false; +static int arg_kill_signal = 0; static void help(void) { printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" @@ -238,6 +233,7 @@ static void help(void) { " --capability=CAP In addition to the default, retain specified\n" " capability\n" " --drop-capability=CAP Drop the specified capability from the default set\n" + " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n" " --link-journal=MODE Link up guest journal, one of no, auto, guest, host,\n" " try-guest, try-host\n" " -j Equivalent to --link-journal=try-guest\n" @@ -302,6 +298,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_TEMPLATE, ARG_PROPERTY, ARG_PRIVATE_USERS, + ARG_KILL_SIGNAL, }; static const struct option options[] = { @@ -341,6 +338,7 @@ static int parse_argv(int argc, char *argv[]) { { "port", required_argument, NULL, 'p' }, { "property", required_argument, NULL, ARG_PROPERTY }, { "private-users", optional_argument, NULL, ARG_PRIVATE_USERS }, + { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL }, {} }; @@ -776,6 +774,15 @@ static int parse_argv(int argc, char *argv[]) { arg_userns = true; break; + case ARG_KILL_SIGNAL: + arg_kill_signal = signal_from_string_try_harder(optarg); + if (arg_kill_signal < 0) { + log_error("Cannot parse signal: %s", optarg); + return -EINVAL; + } + + break; + case '?': return -EINVAL; @@ -838,6 +845,9 @@ static int parse_argv(int argc, char *argv[]) { arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus; + if (arg_boot && arg_kill_signal <= 0) + arg_kill_signal = SIGRTMIN+3; + return 1; } @@ -881,7 +891,7 @@ static int mount_all(const char *dest) { return log_oom(); t = path_is_mount_point(where, true); - if (t < 0) { + if (t < 0 && t != -ENOENT) { log_error_errno(t, "Failed to detect whether %s is a mount point: %m", where); if (r == 0) @@ -1000,7 +1010,7 @@ static int mount_binds(const char *dest, char **l, bool ro) { return log_error_errno(r, "Failed to create mount point %s: %m", where); } - if (mount(*x, where, "bind", MS_BIND, NULL) < 0) + if (mount(*x, where, NULL, MS_BIND, NULL) < 0) return log_error_errno(errno, "mount(%s) failed: %m", where); if (ro) { @@ -1020,7 +1030,7 @@ static int mount_cgroup_hierarchy(const char *dest, const char *controller, cons to = strjoina(dest, "/sys/fs/cgroup/", hierarchy); r = path_is_mount_point(to, false); - if (r < 0) + if (r < 0 && r != -ENOENT) return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to); if (r > 0) return 0; @@ -1316,7 +1326,7 @@ static int setup_volatile(const char *directory) { goto fail; } - if (mount(f, t, "bind", MS_BIND|MS_REC, NULL) < 0) { + if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) { log_error_errno(errno, "Failed to create /usr bind mount: %m"); r = -errno; goto fail; @@ -1387,10 +1397,10 @@ static int setup_boot_id(const char *dest) { if (r < 0) return log_error_errno(r, "Failed to write boot id: %m"); - if (mount(from, to, "bind", MS_BIND, NULL) < 0) { + if (mount(from, to, NULL, MS_BIND, NULL) < 0) { log_error_errno(errno, "Failed to bind mount boot id: %m"); r = -errno; - } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL)) + } else if (mount(from, to, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL)) log_warning_errno(errno, "Failed to make boot id read-only: %m"); unlink(from); @@ -1442,8 +1452,18 @@ static int copy_devnodes(const char *dest) { return -r; } - if (mknod(to, st.st_mode, st.st_rdev) < 0) - return log_error_errno(errno, "mknod(%s) failed: %m", to); + if (mknod(to, st.st_mode, st.st_rdev) < 0) { + if (errno != EPERM) + return log_error_errno(errno, "mknod(%s) failed: %m", to); + + /* Some systems abusively restrict mknod but + * allow bind mounts. */ + r = touch(to); + if (r < 0) + return log_error_errno(r, "touch (%s) failed: %m", to); + if (mount(from, to, NULL, MS_BIND, NULL) < 0) + return log_error_errno(errno, "Both mknod and bind mount (%s) failed: %m", to); + } if (arg_userns && arg_uid_shift != UID_INVALID) if (lchown(to, arg_uid_shift, arg_uid_shift) < 0) @@ -1474,7 +1494,6 @@ static int setup_ptmx(const char *dest) { static int setup_dev_console(const char *dest, const char *console) { _cleanup_umask_ mode_t u; const char *to; - struct stat st; int r; assert(dest); @@ -1482,26 +1501,20 @@ static int setup_dev_console(const char *dest, const char *console) { u = umask(0000); - if (stat("/dev/null", &st) < 0) - return log_error_errno(errno, "Failed to stat /dev/null: %m"); - r = chmod_and_chown(console, 0600, 0, 0); if (r < 0) return log_error_errno(r, "Failed to correct access mode for TTY: %m"); /* We need to bind mount the right tty to /dev/console since * ptys can only exist on pts file systems. To have something - * to bind mount things on we create a device node first, and - * use /dev/null for that since we the cgroups device policy - * allows us to create that freely, while we cannot create - * /dev/console. (Note that the major minor doesn't actually - * matter here, since we mount it over anyway). */ + * to bind mount things on we create a empty regular file. */ to = strjoina(dest, "/dev/console"); - if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) - return log_error_errno(errno, "mknod() for /dev/console failed: %m"); + r = touch(to); + if (r < 0) + return log_error_errno(r, "touch() for /dev/console failed: %m"); - if (mount(console, to, "bind", MS_BIND, NULL) < 0) + if (mount(console, to, NULL, MS_BIND, NULL) < 0) return log_error_errno(errno, "Bind mount for /dev/console failed: %m"); return 0; @@ -1544,7 +1557,7 @@ static int setup_kmsg(const char *dest, int kmsg_socket) { if (r < 0) return log_error_errno(r, "Failed to correct access mode for /dev/kmsg: %m"); - if (mount(from, to, "bind", MS_BIND, NULL) < 0) + if (mount(from, to, NULL, MS_BIND, NULL) < 0) return log_error_errno(errno, "Bind mount for /proc/kmsg failed: %m"); fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC); @@ -1919,7 +1932,7 @@ static int setup_journal(const char *directory) { return r; } - if (mount(p, q, "bind", MS_BIND, NULL) < 0) + if (mount(p, q, NULL, MS_BIND, NULL) < 0) return log_error_errno(errno, "Failed to bind mount journal from host into guest: %m"); return 0; @@ -2560,19 +2573,19 @@ static int setup_ipvlan(pid_t pid) { static int setup_seccomp(void) { #ifdef HAVE_SECCOMP - static const int blacklist[] = { - SCMP_SYS(kexec_load), - SCMP_SYS(open_by_handle_at), - SCMP_SYS(iopl), - SCMP_SYS(ioperm), - SCMP_SYS(swapon), - SCMP_SYS(swapoff), - }; - - static const int kmod_blacklist[] = { - SCMP_SYS(init_module), - SCMP_SYS(finit_module), - SCMP_SYS(delete_module), + static const struct { + uint64_t capability; + int syscall_num; + } blacklist[] = { + { CAP_SYS_RAWIO, SCMP_SYS(iopl)}, + { CAP_SYS_RAWIO, SCMP_SYS(ioperm)}, + { CAP_SYS_BOOT, SCMP_SYS(kexec_load)}, + { CAP_SYS_ADMIN, SCMP_SYS(swapon)}, + { CAP_SYS_ADMIN, SCMP_SYS(swapoff)}, + { CAP_SYS_ADMIN, SCMP_SYS(open_by_handle_at)}, + { CAP_SYS_MODULE, SCMP_SYS(init_module)}, + { CAP_SYS_MODULE, SCMP_SYS(finit_module)}, + { CAP_SYS_MODULE, SCMP_SYS(delete_module)}, }; scmp_filter_ctx seccomp; @@ -2590,7 +2603,10 @@ static int setup_seccomp(void) { } for (i = 0; i < ELEMENTSOF(blacklist); i++) { - r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i], 0); + if (arg_retain & (1ULL << blacklist[i].capability)) + continue; + + r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0); if (r == -EFAULT) continue; /* unknown syscall */ if (r < 0) { @@ -2599,19 +2615,6 @@ static int setup_seccomp(void) { } } - /* If the CAP_SYS_MODULE capability is not requested then - * we'll block the kmod syscalls too */ - if (!(arg_retain & (1ULL << CAP_SYS_MODULE))) { - for (i = 0; i < ELEMENTSOF(kmod_blacklist); i++) { - r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), kmod_blacklist[i], 0); - if (r == -EFAULT) - continue; /* unknown syscall */ - if (r < 0) { - log_error_errno(r, "Failed to block syscall: %m"); - goto finish; - } - } - } /* Audit is broken in containers, much of the userspace audit @@ -2826,7 +2829,7 @@ static int dissect_image( return -errno; } - blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); + (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); is_gpt = streq_ptr(pttype, "gpt"); is_mbr = streq_ptr(pttype, "dos"); @@ -3128,7 +3131,7 @@ static int dissect_image( return 0; #else log_error("--image= is not supported, compiled without blkid support."); - return -ENOTSUP; + return -EOPNOTSUPP; #endif } @@ -3183,7 +3186,7 @@ static int mount_device(const char *what, const char *where, const char *directo if (streq(fstype, "crypto_LUKS")) { log_error("nspawn currently does not support LUKS disk images."); - return -ENOTSUP; + return -EOPNOTSUPP; } if (mount(what, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), NULL) < 0) @@ -3192,7 +3195,7 @@ static int mount_device(const char *what, const char *where, const char *directo return 0; #else log_error("--image= is not supported, compiled without blkid support."); - return -ENOTSUP; + return -EOPNOTSUPP; #endif } @@ -3568,7 +3571,7 @@ static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo pid = PTR_TO_UINT32(userdata); if (pid > 0) { - if (kill(pid, SIGRTMIN+3) >= 0) { + if (kill(pid, arg_kill_signal) >= 0) { log_info("Trying to halt container. Send SIGTERM again to trigger immediate termination."); sd_event_source_set_userdata(s, NULL); return 0; @@ -3710,12 +3713,6 @@ int main(int argc, char *argv[]) { goto finish; } - if (sd_booted() <= 0) { - log_error("Not running on a systemd system."); - r = -EINVAL; - goto finish; - } - log_close(); n_fd_passed = sd_listen_fds(false); if (n_fd_passed > 0) { @@ -3738,7 +3735,7 @@ int main(int argc, char *argv[]) { } if (arg_ephemeral) { - char *np; + _cleanup_free_ char *np = NULL; /* If the specified path is a mount point we * generate the new snapshot immediately @@ -3766,15 +3763,15 @@ int main(int argc, char *argv[]) { goto finish; } - r = btrfs_subvol_snapshot(arg_directory, np, arg_read_only, true); + r = btrfs_subvol_snapshot(arg_directory, np, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE); if (r < 0) { - free(np); log_error_errno(r, "Failed to create snapshot %s from %s: %m", np, arg_directory); goto finish; } free(arg_directory); arg_directory = np; + np = NULL; remove_subvol = true; @@ -3790,7 +3787,7 @@ int main(int argc, char *argv[]) { } if (arg_template) { - r = btrfs_subvol_snapshot(arg_template, arg_directory, arg_read_only, true); + r = btrfs_subvol_snapshot(arg_template, arg_directory, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE); if (r == -EEXIST) { if (!arg_quiet) log_info("Directory %s already exists, not populating from template %s.", arg_directory, arg_template); @@ -4037,7 +4034,7 @@ int main(int argc, char *argv[]) { _exit(EXIT_FAILURE); /* Turn directory into bind mount */ - if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) { + if (mount(arg_directory, arg_directory, NULL, MS_BIND|MS_REC, NULL) < 0) { log_error_errno(errno, "Failed to make bind mount: %m"); _exit(EXIT_FAILURE); } @@ -4365,7 +4362,7 @@ int main(int argc, char *argv[]) { goto finish; } - if (arg_boot) { + if (arg_kill_signal > 0) { /* Try to kill the init system on SIGINT or SIGTERM */ sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, UINT32_TO_PTR(pid)); sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, UINT32_TO_PTR(pid)); @@ -4464,7 +4461,7 @@ finish: if (remove_subvol && arg_directory) { int k; - k = btrfs_subvol_remove(arg_directory); + k = btrfs_subvol_remove(arg_directory, true); if (k < 0) log_warning_errno(k, "Cannot remove subvolume '%s', ignoring: %m", arg_directory); } @@ -4473,7 +4470,7 @@ finish: const char *p; p = strjoina("/run/systemd/nspawn/propagate/", arg_machine); - (void) rm_rf(p, false, true, false); + (void) rm_rf(p, REMOVE_ROOT); } free(arg_directory);