]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/nspawn/nspawn.c
nspawn-network: Split off udev checking from parse_interface.
[thirdparty/systemd.git] / src / nspawn / nspawn.c
index c87e8591949facd285c9bb4a06124c2d2c0dcad6..13c91c40bfcbb9eb915f19b51a2bc3338a974cb0 100644 (file)
@@ -290,7 +290,7 @@ static int help(void) {
                 return log_oom();
 
         printf("%1$s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
-               "Spawn a command or OS in a light-weight container.\n\n"
+               "%5$sSpawn a command or OS in a light-weight container.%6$s\n\n"
                "  -h --help                 Show this help\n"
                "     --version              Print version string\n"
                "  -q --quiet                Do not show status information\n"
@@ -405,7 +405,9 @@ static int help(void) {
                "\nSee the %2$s for details.\n"
                , program_invocation_short_name
                , link
-               , ansi_underline(), ansi_normal());
+               , ansi_underline(), ansi_normal()
+               , ansi_highlight(), ansi_normal()
+        );
 
         return 0;
 }
@@ -437,14 +439,9 @@ static int detect_unified_cgroup_hierarchy_from_environment(void) {
 
         e = getenv(var);
         if (!e) {
-                static bool warned = false;
-
+                /* $UNIFIED_CGROUP_HIERARCHY has been renamed to $SYSTEMD_NSPAWN_UNIFIED_HIERARCHY. */
                 var = "UNIFIED_CGROUP_HIERARCHY";
                 e = getenv(var);
-                if (e && !warned) {
-                        log_info("$UNIFIED_CGROUP_HIERARCHY has been renamed to $SYSTEMD_NSPAWN_UNIFIED_HIERARCHY.");
-                        warned = true;
-                }
         }
 
         if (!isempty(e)) {
@@ -497,6 +494,46 @@ static int detect_unified_cgroup_hierarchy_from_image(const char *directory) {
         return 0;
 }
 
+static int parse_capability_spec(const char *spec, uint64_t *ret_mask) {
+        uint64_t mask = 0;
+        int r;
+
+        for (;;) {
+                _cleanup_free_ char *t = NULL;
+
+                r = extract_first_word(&spec, &t, ",", 0);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to parse capability %s.", t);
+                if (r == 0)
+                        break;
+
+                if (streq(t, "help")) {
+                        for (int i = 0; i < capability_list_length(); i++) {
+                                const char *name;
+
+                                name = capability_to_name(i);
+                                if (name)
+                                        puts(name);
+                        }
+
+                        return 0; /* quit */
+                }
+
+                if (streq(t, "all"))
+                        mask = (uint64_t) -1;
+                else {
+                        r = capability_from_name(t);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to parse capability %s.", t);
+
+                        mask |= 1ULL << r;
+                }
+        }
+
+        *ret_mask = mask;
+        return 1; /* continue */
+}
+
 static int parse_share_ns_env(const char *name, unsigned long ns_flag) {
         int r;
 
@@ -700,7 +737,6 @@ static int parse_argv(int argc, char *argv[]) {
         };
 
         int c, r;
-        const char *p;
         uint64_t plus = 0, minus = 0;
         bool mask_all_settings = false, mask_no_settings = false;
 
@@ -812,6 +848,10 @@ static int parse_argv(int argc, char *argv[]) {
                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
                                                        "Network interface name not valid: %s", optarg);
 
+                        r = test_network_interface_initialized(optarg);
+                        if (r < 0)
+                                return r;
+
                         if (strv_extend(&arg_network_interfaces, optarg) < 0)
                                 return log_oom();
 
@@ -825,6 +865,10 @@ static int parse_argv(int argc, char *argv[]) {
                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
                                                        "MACVLAN network interface name not valid: %s", optarg);
 
+                        r = test_network_interface_initialized(optarg);
+                        if (r < 0)
+                                return r;
+
                         if (strv_extend(&arg_network_macvlan, optarg) < 0)
                                 return log_oom();
 
@@ -838,6 +882,10 @@ static int parse_argv(int argc, char *argv[]) {
                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
                                                        "IPVLAN network interface name not valid: %s", optarg);
 
+                        r = test_network_interface_initialized(optarg);
+                        if (r < 0)
+                                return r;
+
                         if (strv_extend(&arg_network_ipvlan, optarg) < 0)
                                 return log_oom();
 
@@ -942,37 +990,18 @@ static int parse_argv(int argc, char *argv[]) {
 
                 case ARG_CAPABILITY:
                 case ARG_DROP_CAPABILITY: {
-                        p = optarg;
-                        for (;;) {
-                                _cleanup_free_ char *t = NULL;
-
-                                r = extract_first_word(&p, &t, ",", 0);
-                                if (r < 0)
-                                        return log_error_errno(r, "Failed to parse capability %s.", t);
-                                if (r == 0)
-                                        break;
-
-                                if (streq(t, "all")) {
-                                        if (c == ARG_CAPABILITY)
-                                                plus = (uint64_t) -1;
-                                        else
-                                                minus = (uint64_t) -1;
-                                } else {
-                                        r = capability_from_name(t);
-                                        if (r < 0)
-                                                return log_error_errno(r, "Failed to parse capability %s.", t);
-
-                                        if (c == ARG_CAPABILITY)
-                                                plus |= 1ULL << r;
-                                        else
-                                                minus |= 1ULL << r;
-                                }
-                        }
+                        uint64_t m;
+                        r = parse_capability_spec(optarg, &m);
+                        if (r <= 0)
+                                return r;
 
+                        if (c == ARG_CAPABILITY)
+                                plus |= m;
+                        else
+                                minus |= m;
                         arg_settings_mask |= SETTING_CAPABILITY;
                         break;
                 }
-
                 case ARG_NO_NEW_PRIVILEGES:
                         r = parse_boolean(optarg);
                         if (r < 0)
@@ -1545,13 +1574,13 @@ static int verify_arguments(void) {
         if (arg_userns_chown && arg_volatile_mode != VOLATILE_NO)
                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--volatile= and --private-users-chown may not be combined.");
 
-        /* If --network-namespace-path is given with any other network-related option, we need to error out,
-         * to avoid conflicts between different network options. */
+        /* If --network-namespace-path is given with any other network-related option (except --private-network),
+         * we need to error out, to avoid conflicts between different network options. */
         if (arg_network_namespace_path &&
                 (arg_network_interfaces || arg_network_macvlan ||
                  arg_network_ipvlan || arg_network_veth_extra ||
                  arg_network_bridge || arg_network_zone ||
-                 arg_network_veth || arg_private_network))
+                 arg_network_veth))
                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--network-namespace-path= cannot be combined with other network options.");
 
         if (arg_network_bridge && arg_network_zone)
@@ -2212,9 +2241,9 @@ static int setup_hostname(void) {
 
 static int setup_journal(const char *directory) {
         _cleanup_free_ char *d = NULL;
+        char id[SD_ID128_STRING_MAX];
         const char *dirname, *p, *q;
         sd_id128_t this_id;
-        char id[33];
         bool try;
         int r;
 
@@ -2849,7 +2878,7 @@ static int inner_child(
                 FDSet *fds) {
 
         _cleanup_free_ char *home = NULL;
-        char as_uuid[37];
+        char as_uuid[ID128_UUID_STRING_MAX];
         size_t n_env = 1;
         const char *envp[] = {
                 "PATH=" DEFAULT_PATH_COMPAT,
@@ -2958,11 +2987,9 @@ static int inner_child(
                         "/",
                         arg_custom_mounts,
                         arg_n_custom_mounts,
-                        false,
-                        0,
                         0,
                         arg_selinux_apifs_context,
-                        true);
+                        MOUNT_NON_ROOT_ONLY | MOUNT_IN_USERNS);
         if (r < 0)
                 return r;
 
@@ -3237,6 +3264,7 @@ static int outer_child(
                 int netns_fd) {
 
         _cleanup_close_ int fd = -1;
+        const char *p;
         pid_t pid;
         ssize_t l;
         int r;
@@ -3328,13 +3356,6 @@ static int outer_child(
                         return r;
 
                 directory = "/run/systemd/nspawn-root";
-
-        } else if (!dissected_image) {
-                /* Turn directory into bind mount (we need that so that we can move the bind mount to root
-                 * later on). */
-                r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL);
-                if (r < 0)
-                        return r;
         }
 
         r = setup_pivot_root(
@@ -3347,13 +3368,28 @@ static int outer_child(
         r = setup_volatile_mode(
                         directory,
                         arg_volatile_mode,
-                        arg_userns_mode != USER_NAMESPACE_NO,
                         arg_uid_shift,
-                        arg_uid_range,
                         arg_selinux_apifs_context);
         if (r < 0)
                 return r;
 
+        r = mount_custom(
+                        directory,
+                        arg_custom_mounts,
+                        arg_n_custom_mounts,
+                        arg_uid_shift,
+                        arg_selinux_apifs_context,
+                        MOUNT_ROOT_ONLY);
+        if (r < 0)
+                return r;
+
+        /* Make sure we always have a mount that we can move to root later on. */
+        if (!path_is_mount_point(directory, NULL, 0)) {
+                r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL);
+                if (r < 0)
+                        return r;
+        }
+
         if (dissected_image) {
                 /* Now we know the uid shift, let's now mount everything else that might be in the image. */
                 r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
@@ -3384,7 +3420,12 @@ static int outer_child(
          * inside the container that create a new mount namespace.
          * See https://github.com/systemd/systemd/issues/3860
          * Further submounts (such as /dev) done after this will inherit the
-         * shared propagation mode. */
+         * shared propagation mode.
+         *
+         * IMPORTANT: Do not overmount the root directory anymore from now on to
+         * enable moving the root directory mount to root later on.
+         * https://github.com/systemd/systemd/issues/3847#issuecomment-562735251
+         */
         r = mount_verbose(LOG_ERR, NULL, directory, NULL, MS_SHARED|MS_REC, NULL);
         if (r < 0)
                 return r;
@@ -3419,7 +3460,9 @@ static int outer_child(
                 return r;
 
         (void) dev_setup(directory, arg_uid_shift, arg_uid_shift);
-        (void) make_inaccessible_nodes(directory, arg_uid_shift, arg_uid_shift);
+
+        p = prefix_roota(directory, "/run/systemd");
+        (void) make_inaccessible_nodes(p, arg_uid_shift, arg_uid_shift);
 
         r = setup_pts(directory);
         if (r < 0)
@@ -3453,11 +3496,9 @@ static int outer_child(
                         directory,
                         arg_custom_mounts,
                         arg_n_custom_mounts,
-                        arg_userns_mode != USER_NAMESPACE_NO,
                         arg_uid_shift,
-                        arg_uid_range,
                         arg_selinux_apifs_context,
-                        false);
+                        MOUNT_NON_ROOT_ONLY);
         if (r < 0)
                 return r;
 
@@ -3775,6 +3816,7 @@ static int merge_settings(Settings *settings, const char *path) {
 
         if ((arg_settings_mask & SETTING_CAPABILITY) == 0) {
                 uint64_t plus, minus;
+                uint64_t network_minus = 0;
 
                 /* Note that we copy both the simple plus/minus caps here, and the full quintet from the
                  * Settings structure */
@@ -3786,14 +3828,16 @@ static int merge_settings(Settings *settings, const char *path) {
                         if (settings_private_network(settings))
                                 plus |= UINT64_C(1) << CAP_NET_ADMIN;
                         else
-                                minus |= UINT64_C(1) << CAP_NET_ADMIN;
+                                network_minus |= UINT64_C(1) << CAP_NET_ADMIN;
                 }
 
                 if (!arg_settings_trusted && plus != 0) {
                         if (settings->capability != 0)
                                 log_warning("Ignoring Capability= setting, file %s is not trusted.", path);
-                } else
+                } else {
+                        arg_caps_retain &= ~network_minus;
                         arg_caps_retain |= plus;
+                }
 
                 arg_caps_retain &= ~minus;
 
@@ -5016,7 +5060,7 @@ static int run(int argc, char *argv[]) {
                         goto finish;
                 }
 
-                r = loop_device_make_by_path(arg_image, arg_read_only ? O_RDONLY : O_RDWR, &loop);
+                r = loop_device_make_by_path(arg_image, arg_read_only ? O_RDONLY : O_RDWR, LO_FLAGS_PARTSCAN, &loop);
                 if (r < 0) {
                         log_error_errno(r, "Failed to set up loopback block device: %m");
                         goto finish;