]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
Merge pull request #7469 from kinvolk/dongsu/nspawn-netns
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Thu, 14 Dec 2017 21:47:57 +0000 (22:47 +0100)
committerGitHub <noreply@github.com>
Thu, 14 Dec 2017 21:47:57 +0000 (22:47 +0100)
nspawn: introduce an option for specifying network namespace path

man/systemd-nspawn.xml
src/basic/missing.h
src/basic/stat-util.c
src/basic/stat-util.h
src/nspawn/nspawn.c
test/TEST-13-NSPAWN-SMOKE/test.sh

index cd45ceb2a16b0defc7018fffbb07b2ee382ce468..3dbdf376d3cfe0aa5bd274a0a41da28ae48ec1fe 100644 (file)
         <option>--drop-capability=</option>.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><option>--network-namespace-path=</option></term>
+
+        <listitem><para>Takes the path to a file representing a kernel
+        network namespace that the container shall run in. The specified path
+        should refer to a (possibly bind-mounted) network namespace file, as
+        exposed by the kernel below <filename>/proc/$PID/ns/net</filename>.
+        This makes the container enter the given network namespace. One of the
+        typical use cases is to give a network namespace under
+        <filename>/run/netns</filename> created by <citerefentry
+        project='man-pages'><refentrytitle>ip-netns</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
+        for example, <option>--network-namespace-path=/run/netns/foo</option>.
+        Note that this option cannot be used together with other
+        network-related options, such as <option>--private-network</option>
+        or <option>--network-interface=</option>.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><option>--network-interface=</option></term>
 
index bbdded984fb3a75c1be0afaf6103e6ac3e48c6ef..790f9f55a59ffb1c4770c85434dcf8dd0d0c9a33 100644 (file)
@@ -1271,4 +1271,12 @@ struct fib_rule_uid_range {
 #  define EXT4_IOC_RESIZE_FS              _IOW('f', 16, __u64)
 #endif
 
+#ifndef NSFS_MAGIC
+#define NSFS_MAGIC 0x6e736673
+#endif
+
+#ifndef NS_GET_NSTYPE
+#define NS_GET_NSTYPE _IO(0xb7, 0x3)
+#endif
+
 #include "missing_syscall.h"
index c6b8507e9d8541240ee50a6d7f74fb00f535a956..96fc8b3787233fe0f68c92b5dbb66d41723dbd0d 100644 (file)
@@ -226,6 +226,18 @@ int fd_is_temporary_fs(int fd) {
         return is_temporary_fs(&s);
 }
 
+int fd_is_network_ns(int fd) {
+        int r;
+
+        r = fd_is_fs_type(fd, NSFS_MAGIC);
+        if (r <= 0)
+                return r;
+        r = ioctl(fd, NS_GET_NSTYPE);
+        if (r < 0)
+                return -errno;
+        return r == CLONE_NEWNET;
+}
+
 int path_is_temporary_fs(const char *path) {
         _cleanup_close_ int fd = -1;
 
index 8b8d128121124c19362b14c44b95816362604d4c..d8d3c204960921e006a9cf9c8130fc2ea7394aca 100644 (file)
@@ -62,6 +62,7 @@ int path_is_fs_type(const char *path, statfs_f_type_t magic_value);
 
 bool is_temporary_fs(const struct statfs *s) _pure_;
 int fd_is_temporary_fs(int fd);
+int fd_is_network_ns(int fd);
 int path_is_temporary_fs(const char *path);
 
 /* Because statfs.t_type can be int on some architectures, we have to cast
index f85856c077ad186977001e3603a2450613f08d7f..71b14e23029dd97a894009e6b782f22eaf1dd0f2 100644 (file)
@@ -190,6 +190,7 @@ static bool arg_network_veth = false;
 static char **arg_network_veth_extra = NULL;
 static char *arg_network_bridge = NULL;
 static char *arg_network_zone = NULL;
+static char *arg_network_namespace_path = NULL;
 static unsigned long arg_personality = PERSONALITY_INVALID;
 static char *arg_image = NULL;
 static VolatileMode arg_volatile_mode = VOLATILE_NO;
@@ -260,6 +261,9 @@ static void help(void) {
                "                            and attach it to an existing bridge on the host\n"
                "     --network-zone=NAME    Similar, but attach the new interface to an\n"
                "                            an automatically managed bridge interface\n"
+               "     --network-namespace-path=PATH\n"
+               "                            Set network namespace to the one represented by\n"
+               "                            the specified kernel namespace file node\n"
                "  -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n"
                "                            Expose a container IP port on the host\n"
                "  -Z --selinux-context=SECLABEL\n"
@@ -434,6 +438,7 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_NETWORK_BRIDGE,
                 ARG_NETWORK_ZONE,
                 ARG_NETWORK_VETH_EXTRA,
+                ARG_NETWORK_NAMESPACE_PATH,
                 ARG_PERSONALITY,
                 ARG_VOLATILE,
                 ARG_TEMPLATE,
@@ -450,55 +455,56 @@ static int parse_argv(int argc, char *argv[]) {
         };
 
         static const struct option options[] = {
-                { "help",                  no_argument,       NULL, 'h'                     },
-                { "version",               no_argument,       NULL, ARG_VERSION             },
-                { "directory",             required_argument, NULL, 'D'                     },
-                { "template",              required_argument, NULL, ARG_TEMPLATE            },
-                { "ephemeral",             no_argument,       NULL, 'x'                     },
-                { "user",                  required_argument, NULL, 'u'                     },
-                { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK     },
-                { "as-pid2",               no_argument,       NULL, 'a'                     },
-                { "boot",                  no_argument,       NULL, 'b'                     },
-                { "uuid",                  required_argument, NULL, ARG_UUID                },
-                { "read-only",             no_argument,       NULL, ARG_READ_ONLY           },
-                { "capability",            required_argument, NULL, ARG_CAPABILITY          },
-                { "drop-capability",       required_argument, NULL, ARG_DROP_CAPABILITY     },
-                { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL        },
-                { "bind",                  required_argument, NULL, ARG_BIND                },
-                { "bind-ro",               required_argument, NULL, ARG_BIND_RO             },
-                { "tmpfs",                 required_argument, NULL, ARG_TMPFS               },
-                { "overlay",               required_argument, NULL, ARG_OVERLAY             },
-                { "overlay-ro",            required_argument, NULL, ARG_OVERLAY_RO          },
-                { "machine",               required_argument, NULL, 'M'                     },
-                { "slice",                 required_argument, NULL, 'S'                     },
-                { "setenv",                required_argument, NULL, 'E'                     },
-                { "selinux-context",       required_argument, NULL, 'Z'                     },
-                { "selinux-apifs-context", required_argument, NULL, 'L'                     },
-                { "quiet",                 no_argument,       NULL, 'q'                     },
-                { "share-system",          no_argument,       NULL, ARG_SHARE_SYSTEM        }, /* not documented */
-                { "register",              required_argument, NULL, ARG_REGISTER            },
-                { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT           },
-                { "network-interface",     required_argument, NULL, ARG_NETWORK_INTERFACE   },
-                { "network-macvlan",       required_argument, NULL, ARG_NETWORK_MACVLAN     },
-                { "network-ipvlan",        required_argument, NULL, ARG_NETWORK_IPVLAN      },
-                { "network-veth",          no_argument,       NULL, 'n'                     },
-                { "network-veth-extra",    required_argument, NULL, ARG_NETWORK_VETH_EXTRA  },
-                { "network-bridge",        required_argument, NULL, ARG_NETWORK_BRIDGE      },
-                { "network-zone",          required_argument, NULL, ARG_NETWORK_ZONE        },
-                { "personality",           required_argument, NULL, ARG_PERSONALITY         },
-                { "image",                 required_argument, NULL, 'i'                     },
-                { "volatile",              optional_argument, NULL, ARG_VOLATILE            },
-                { "port",                  required_argument, NULL, 'p'                     },
-                { "property",              required_argument, NULL, ARG_PROPERTY            },
-                { "private-users",         optional_argument, NULL, ARG_PRIVATE_USERS       },
-                { "private-users-chown",   optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN },
-                { "kill-signal",           required_argument, NULL, ARG_KILL_SIGNAL         },
-                { "settings",              required_argument, NULL, ARG_SETTINGS            },
-                { "chdir",                 required_argument, NULL, ARG_CHDIR               },
-                { "pivot-root",            required_argument, NULL, ARG_PIVOT_ROOT          },
-                { "notify-ready",          required_argument, NULL, ARG_NOTIFY_READY        },
-                { "root-hash",             required_argument, NULL, ARG_ROOT_HASH           },
-                { "system-call-filter",    required_argument, NULL, ARG_SYSTEM_CALL_FILTER  },
+                { "help",                   no_argument,       NULL, 'h'                        },
+                { "version",                no_argument,       NULL, ARG_VERSION                },
+                { "directory",              required_argument, NULL, 'D'                        },
+                { "template",               required_argument, NULL, ARG_TEMPLATE               },
+                { "ephemeral",              no_argument,       NULL, 'x'                        },
+                { "user",                   required_argument, NULL, 'u'                        },
+                { "private-network",        no_argument,       NULL, ARG_PRIVATE_NETWORK        },
+                { "as-pid2",                no_argument,       NULL, 'a'                        },
+                { "boot",                   no_argument,       NULL, 'b'                        },
+                { "uuid",                   required_argument, NULL, ARG_UUID                   },
+                { "read-only",              no_argument,       NULL, ARG_READ_ONLY              },
+                { "capability",             required_argument, NULL, ARG_CAPABILITY             },
+                { "drop-capability",        required_argument, NULL, ARG_DROP_CAPABILITY        },
+                { "link-journal",           required_argument, NULL, ARG_LINK_JOURNAL           },
+                { "bind",                   required_argument, NULL, ARG_BIND                   },
+                { "bind-ro",                required_argument, NULL, ARG_BIND_RO                },
+                { "tmpfs",                  required_argument, NULL, ARG_TMPFS                  },
+                { "overlay",                required_argument, NULL, ARG_OVERLAY                },
+                { "overlay-ro",             required_argument, NULL, ARG_OVERLAY_RO             },
+                { "machine",                required_argument, NULL, 'M'                        },
+                { "slice",                  required_argument, NULL, 'S'                        },
+                { "setenv",                 required_argument, NULL, 'E'                        },
+                { "selinux-context",        required_argument, NULL, 'Z'                        },
+                { "selinux-apifs-context",  required_argument, NULL, 'L'                        },
+                { "quiet",                  no_argument,       NULL, 'q'                        },
+                { "share-system",           no_argument,       NULL, ARG_SHARE_SYSTEM           }, /* not documented */
+                { "register",               required_argument, NULL, ARG_REGISTER               },
+                { "keep-unit",              no_argument,       NULL, ARG_KEEP_UNIT              },
+                { "network-interface",      required_argument, NULL, ARG_NETWORK_INTERFACE      },
+                { "network-macvlan",        required_argument, NULL, ARG_NETWORK_MACVLAN        },
+                { "network-ipvlan",         required_argument, NULL, ARG_NETWORK_IPVLAN         },
+                { "network-veth",           no_argument,       NULL, 'n'                        },
+                { "network-veth-extra",     required_argument, NULL, ARG_NETWORK_VETH_EXTRA     },
+                { "network-bridge",         required_argument, NULL, ARG_NETWORK_BRIDGE         },
+                { "network-zone",           required_argument, NULL, ARG_NETWORK_ZONE           },
+                { "network-namespace-path", required_argument, NULL, ARG_NETWORK_NAMESPACE_PATH },
+                { "personality",            required_argument, NULL, ARG_PERSONALITY            },
+                { "image",                  required_argument, NULL, 'i'                        },
+                { "volatile",               optional_argument, NULL, ARG_VOLATILE               },
+                { "port",                   required_argument, NULL, 'p'                        },
+                { "property",               required_argument, NULL, ARG_PROPERTY               },
+                { "private-users",          optional_argument, NULL, ARG_PRIVATE_USERS          },
+                { "private-users-chown",    optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN    },
+                { "kill-signal",            required_argument, NULL, ARG_KILL_SIGNAL            },
+                { "settings",               required_argument, NULL, ARG_SETTINGS               },
+                { "chdir",                  required_argument, NULL, ARG_CHDIR                  },
+                { "pivot-root",             required_argument, NULL, ARG_PIVOT_ROOT             },
+                { "notify-ready",           required_argument, NULL, ARG_NOTIFY_READY           },
+                { "root-hash",              required_argument, NULL, ARG_ROOT_HASH              },
+                { "system-call-filter",     required_argument, NULL, ARG_SYSTEM_CALL_FILTER     },
                 {}
         };
 
@@ -644,6 +650,13 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_settings_mask |= SETTING_NETWORK;
                         break;
 
+                case ARG_NETWORK_NAMESPACE_PATH:
+                        r = parse_path_argument_and_warn(optarg, false, &arg_network_namespace_path);
+                        if (r < 0)
+                                return r;
+
+                        break;
+
                 case 'b':
                         if (arg_start_mode == START_PID2) {
                                 log_error("--boot and --as-pid2 may not be combined.");
@@ -1103,6 +1116,17 @@ static int parse_argv(int argc, char *argv[]) {
                         assert_not_reached("Unhandled option");
                 }
 
+        /* If --network-namespace-path is given with any other network-related option,
+         * we need to error out, to avoid conflicts between different network options. */
+        if (arg_network_namespace_path &&
+                (arg_network_interfaces || arg_network_macvlan ||
+                 arg_network_ipvlan || arg_network_veth_extra ||
+                 arg_network_bridge || arg_network_zone ||
+                 arg_network_veth || arg_private_network)) {
+                log_error("--network-namespace-path cannot be combined with other network options.");
+                return -EINVAL;
+        }
+
         parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_IPC", CLONE_NEWIPC);
         parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_PID", CLONE_NEWPID);
         parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_UTS", CLONE_NEWUTS);
@@ -2532,12 +2556,14 @@ static int outer_child(
                 int rtnl_socket,
                 int uid_shift_socket,
                 int unified_cgroup_hierarchy_socket,
-                FDSet *fds) {
+                FDSet *fds,
+                int netns_fd) {
 
         pid_t pid;
         ssize_t l;
         int r;
         _cleanup_close_ int fd = -1;
+        bool create_netns;
 
         assert(barrier);
         assert(directory);
@@ -2788,9 +2814,11 @@ static int outer_child(
         if (fd < 0)
                 return fd;
 
+        create_netns = !arg_network_namespace_path && arg_private_network;
+
         pid = raw_clone(SIGCHLD|CLONE_NEWNS|
                         arg_clone_ns_flags |
-                        (arg_private_network ? CLONE_NEWNET : 0) |
+                        (create_netns ? CLONE_NEWNET : 0) |
                         (arg_userns_mode != USER_NAMESPACE_NO ? CLONE_NEWUSER : 0));
         if (pid < 0)
                 return log_error_errno(errno, "Failed to fork inner child: %m");
@@ -2804,6 +2832,12 @@ static int outer_child(
                  * requested, so that we all are owned by the user if
                  * user namespaces are turned on. */
 
+                if (arg_network_namespace_path) {
+                        r = namespace_enter(-1, -1, netns_fd, -1, -1);
+                        if (r < 0)
+                                return r;
+                }
+
                 r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds);
                 if (r < 0)
                         _exit(EXIT_FAILURE);
@@ -2836,6 +2870,7 @@ static int outer_child(
         notify_socket = safe_close(notify_socket);
         kmsg_socket = safe_close(kmsg_socket);
         rtnl_socket = safe_close(rtnl_socket);
+        netns_fd = safe_close(netns_fd);
 
         return 0;
 }
@@ -3311,6 +3346,7 @@ static int run(int master,
         int ifi = 0, r;
         ssize_t l;
         sigset_t mask_chld;
+        _cleanup_close_ int netns_fd = -1;
 
         assert_se(sigemptyset(&mask_chld) == 0);
         assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
@@ -3365,6 +3401,20 @@ static int run(int master,
         if (r < 0)
                 return log_error_errno(errno, "Failed to install SIGCHLD handler: %m");
 
+        if (arg_network_namespace_path) {
+                netns_fd = open(arg_network_namespace_path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+                if (netns_fd < 0)
+                        return log_error_errno(errno, "Cannot open file %s: %m", arg_network_namespace_path);
+
+                r = fd_is_network_ns(netns_fd);
+                if (r < 0 && r != -ENOTTY)
+                        return log_error_errno(r, "Failed to check %s fs type: %m", arg_network_namespace_path);
+                if (r == 0) {
+                        log_error("Path %s doesn't refer to a network namespace", arg_network_namespace_path);
+                        return -EINVAL;
+                }
+        }
+
         *pid = raw_clone(SIGCHLD|CLONE_NEWNS);
         if (*pid < 0)
                 return log_error_errno(errno, "clone() failed%s: %m",
@@ -3401,7 +3451,8 @@ static int run(int master,
                                 rtnl_socket_pair[1],
                                 uid_shift_socket_pair[1],
                                 unified_cgroup_hierarchy_socket_pair[1],
-                                fds);
+                                fds,
+                                netns_fd);
                 if (r < 0)
                         _exit(EXIT_FAILURE);
 
index 7f7380fd6267eb77db360ac78c57ad189727fcba..239c7e0731e648c9131bab3531d9e16bc0abee6b 100755 (executable)
@@ -18,12 +18,12 @@ test_setup() {
         eval $(udevadm info --export --query=env --name=${LOOPDEV}p2)
 
         setup_basic_environment
-        dracut_install busybox chmod rmdir unshare
+        dracut_install busybox chmod rmdir unshare ip
 
         cp create-busybox-container $initdir/
 
         ./create-busybox-container $initdir/nc-container
-        initdir="$initdir/nc-container" dracut_install nc
+        initdir="$initdir/nc-container" dracut_install nc ip
 
         # setup the testsuite service
         cat >$initdir/etc/systemd/system/testsuite.service <<EOF
@@ -34,6 +34,8 @@ After=multi-user.target
 [Service]
 ExecStart=/test-nspawn.sh
 Type=oneshot
+StandardOutput=tty
+StandardError=tty
 EOF
 
         cat >$initdir/test-nspawn.sh <<'EOF'
@@ -107,6 +109,52 @@ function run {
        [[ "$is_user_ns_supported" = "no" && "$3" = "yes" ]] && return 1
     fi
 
+    local _netns_opt="--network-namespace-path=/proc/self/ns/net"
+
+    # --network-namespace-path and network-related options cannot be used together
+    if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-interface=lo -b; then
+       return 1
+    fi
+
+    if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-macvlan=lo -b; then
+       return 1
+    fi
+
+    if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-ipvlan=lo -b; then
+       return 1
+    fi
+
+    if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-veth -b; then
+       return 1
+    fi
+
+    if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-veth-extra=lo -b; then
+       return 1
+    fi
+
+    if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-bridge=lo -b; then
+       return 1
+    fi
+
+    if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-zone=zone -b; then
+       return 1
+    fi
+
+    if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --private-network -b; then
+       return 1
+    fi
+
+    # test --network-namespace-path works with a network namespace created by "ip netns"
+    ip netns add nspawn_test
+    _netns_opt="--network-namespace-path=/run/netns/nspawn_test"
+    UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" ip a | grep -E '^1: lo.*DOWN'
+    local r=$?
+    ip netns del nspawn_test
+
+    if [ $r -ne 0 ]; then
+       return 1
+    fi
+
     return 0
 }