]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
nspawn: introduce an option for specifying network namespace path
authorDongsu Park <dongsu@kinvolk.io>
Fri, 24 Nov 2017 17:22:17 +0000 (18:22 +0100)
committerIago López Galeiras <iago@kinvolk.io>
Wed, 13 Dec 2017 10:21:06 +0000 (10:21 +0000)
Add a new option `--network-namespace-path` to systemd-nspawn to allow
users to specify an arbitrary network namespace, e.g. `/run/netns/foo`.
Then systemd-nspawn will open the netns file, pass the fd to
outer_child, and enter the namespace represented by the fd before
running inner_child.

```
$ sudo ip netns add foo
$ mount | grep /run/netns/foo
nsfs on /run/netns/foo type nsfs (rw)
...
$ sudo systemd-nspawn -D /srv/fc27 --network-namespace-path=/run/netns/foo \
  /bin/readlink -f /proc/self/ns/net
/proc/1/ns/net:[4026532009]
```

Note that the option `--network-namespace-path=` cannot be used together
with other network-related options such as `--private-network` so that
the options do not conflict with each other.

Fixes https://github.com/systemd/systemd/issues/7361

man/systemd-nspawn.xml
src/basic/missing.h
src/basic/stat-util.c
src/basic/stat-util.h
src/nspawn/nspawn.c

index cd45ceb2a16b0defc7018fffbb07b2ee382ce468..3dbdf376d3cfe0aa5bd274a0a41da28ae48ec1fe 100644 (file)
         <option>--drop-capability=</option>.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><option>--network-namespace-path=</option></term>
+
+        <listitem><para>Takes the path to a file representing a kernel
+        network namespace that the container shall run in. The specified path
+        should refer to a (possibly bind-mounted) network namespace file, as
+        exposed by the kernel below <filename>/proc/$PID/ns/net</filename>.
+        This makes the container enter the given network namespace. One of the
+        typical use cases is to give a network namespace under
+        <filename>/run/netns</filename> created by <citerefentry
+        project='man-pages'><refentrytitle>ip-netns</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
+        for example, <option>--network-namespace-path=/run/netns/foo</option>.
+        Note that this option cannot be used together with other
+        network-related options, such as <option>--private-network</option>
+        or <option>--network-interface=</option>.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><option>--network-interface=</option></term>
 
index bbdded984fb3a75c1be0afaf6103e6ac3e48c6ef..790f9f55a59ffb1c4770c85434dcf8dd0d0c9a33 100644 (file)
@@ -1271,4 +1271,12 @@ struct fib_rule_uid_range {
 #  define EXT4_IOC_RESIZE_FS              _IOW('f', 16, __u64)
 #endif
 
+#ifndef NSFS_MAGIC
+#define NSFS_MAGIC 0x6e736673
+#endif
+
+#ifndef NS_GET_NSTYPE
+#define NS_GET_NSTYPE _IO(0xb7, 0x3)
+#endif
+
 #include "missing_syscall.h"
index c6b8507e9d8541240ee50a6d7f74fb00f535a956..96fc8b3787233fe0f68c92b5dbb66d41723dbd0d 100644 (file)
@@ -226,6 +226,18 @@ int fd_is_temporary_fs(int fd) {
         return is_temporary_fs(&s);
 }
 
+int fd_is_network_ns(int fd) {
+        int r;
+
+        r = fd_is_fs_type(fd, NSFS_MAGIC);
+        if (r <= 0)
+                return r;
+        r = ioctl(fd, NS_GET_NSTYPE);
+        if (r < 0)
+                return -errno;
+        return r == CLONE_NEWNET;
+}
+
 int path_is_temporary_fs(const char *path) {
         _cleanup_close_ int fd = -1;
 
index 8b8d128121124c19362b14c44b95816362604d4c..d8d3c204960921e006a9cf9c8130fc2ea7394aca 100644 (file)
@@ -62,6 +62,7 @@ int path_is_fs_type(const char *path, statfs_f_type_t magic_value);
 
 bool is_temporary_fs(const struct statfs *s) _pure_;
 int fd_is_temporary_fs(int fd);
+int fd_is_network_ns(int fd);
 int path_is_temporary_fs(const char *path);
 
 /* Because statfs.t_type can be int on some architectures, we have to cast
index f217def92d53b7b03aabc929bd0a1c469b3890d4..64819b5e853e68b2cdd59ecfc7976e84c99e4cd9 100644 (file)
@@ -190,6 +190,7 @@ static bool arg_network_veth = false;
 static char **arg_network_veth_extra = NULL;
 static char *arg_network_bridge = NULL;
 static char *arg_network_zone = NULL;
+static char *arg_network_namespace_path = NULL;
 static unsigned long arg_personality = PERSONALITY_INVALID;
 static char *arg_image = NULL;
 static VolatileMode arg_volatile_mode = VOLATILE_NO;
@@ -260,6 +261,9 @@ static void help(void) {
                "                            and attach it to an existing bridge on the host\n"
                "     --network-zone=NAME    Similar, but attach the new interface to an\n"
                "                            an automatically managed bridge interface\n"
+               "     --network-namespace-path=PATH\n"
+               "                            Set network namespace to the one represented by\n"
+               "                            the specified kernel namespace file node\n"
                "  -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n"
                "                            Expose a container IP port on the host\n"
                "  -Z --selinux-context=SECLABEL\n"
@@ -434,6 +438,7 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_NETWORK_BRIDGE,
                 ARG_NETWORK_ZONE,
                 ARG_NETWORK_VETH_EXTRA,
+                ARG_NETWORK_NAMESPACE_PATH,
                 ARG_PERSONALITY,
                 ARG_VOLATILE,
                 ARG_TEMPLATE,
@@ -450,55 +455,56 @@ static int parse_argv(int argc, char *argv[]) {
         };
 
         static const struct option options[] = {
-                { "help",                  no_argument,       NULL, 'h'                     },
-                { "version",               no_argument,       NULL, ARG_VERSION             },
-                { "directory",             required_argument, NULL, 'D'                     },
-                { "template",              required_argument, NULL, ARG_TEMPLATE            },
-                { "ephemeral",             no_argument,       NULL, 'x'                     },
-                { "user",                  required_argument, NULL, 'u'                     },
-                { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK     },
-                { "as-pid2",               no_argument,       NULL, 'a'                     },
-                { "boot",                  no_argument,       NULL, 'b'                     },
-                { "uuid",                  required_argument, NULL, ARG_UUID                },
-                { "read-only",             no_argument,       NULL, ARG_READ_ONLY           },
-                { "capability",            required_argument, NULL, ARG_CAPABILITY          },
-                { "drop-capability",       required_argument, NULL, ARG_DROP_CAPABILITY     },
-                { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL        },
-                { "bind",                  required_argument, NULL, ARG_BIND                },
-                { "bind-ro",               required_argument, NULL, ARG_BIND_RO             },
-                { "tmpfs",                 required_argument, NULL, ARG_TMPFS               },
-                { "overlay",               required_argument, NULL, ARG_OVERLAY             },
-                { "overlay-ro",            required_argument, NULL, ARG_OVERLAY_RO          },
-                { "machine",               required_argument, NULL, 'M'                     },
-                { "slice",                 required_argument, NULL, 'S'                     },
-                { "setenv",                required_argument, NULL, 'E'                     },
-                { "selinux-context",       required_argument, NULL, 'Z'                     },
-                { "selinux-apifs-context", required_argument, NULL, 'L'                     },
-                { "quiet",                 no_argument,       NULL, 'q'                     },
-                { "share-system",          no_argument,       NULL, ARG_SHARE_SYSTEM        }, /* not documented */
-                { "register",              required_argument, NULL, ARG_REGISTER            },
-                { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT           },
-                { "network-interface",     required_argument, NULL, ARG_NETWORK_INTERFACE   },
-                { "network-macvlan",       required_argument, NULL, ARG_NETWORK_MACVLAN     },
-                { "network-ipvlan",        required_argument, NULL, ARG_NETWORK_IPVLAN      },
-                { "network-veth",          no_argument,       NULL, 'n'                     },
-                { "network-veth-extra",    required_argument, NULL, ARG_NETWORK_VETH_EXTRA  },
-                { "network-bridge",        required_argument, NULL, ARG_NETWORK_BRIDGE      },
-                { "network-zone",          required_argument, NULL, ARG_NETWORK_ZONE        },
-                { "personality",           required_argument, NULL, ARG_PERSONALITY         },
-                { "image",                 required_argument, NULL, 'i'                     },
-                { "volatile",              optional_argument, NULL, ARG_VOLATILE            },
-                { "port",                  required_argument, NULL, 'p'                     },
-                { "property",              required_argument, NULL, ARG_PROPERTY            },
-                { "private-users",         optional_argument, NULL, ARG_PRIVATE_USERS       },
-                { "private-users-chown",   optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN },
-                { "kill-signal",           required_argument, NULL, ARG_KILL_SIGNAL         },
-                { "settings",              required_argument, NULL, ARG_SETTINGS            },
-                { "chdir",                 required_argument, NULL, ARG_CHDIR               },
-                { "pivot-root",            required_argument, NULL, ARG_PIVOT_ROOT          },
-                { "notify-ready",          required_argument, NULL, ARG_NOTIFY_READY        },
-                { "root-hash",             required_argument, NULL, ARG_ROOT_HASH           },
-                { "system-call-filter",    required_argument, NULL, ARG_SYSTEM_CALL_FILTER  },
+                { "help",                   no_argument,       NULL, 'h'                        },
+                { "version",                no_argument,       NULL, ARG_VERSION                },
+                { "directory",              required_argument, NULL, 'D'                        },
+                { "template",               required_argument, NULL, ARG_TEMPLATE               },
+                { "ephemeral",              no_argument,       NULL, 'x'                        },
+                { "user",                   required_argument, NULL, 'u'                        },
+                { "private-network",        no_argument,       NULL, ARG_PRIVATE_NETWORK        },
+                { "as-pid2",                no_argument,       NULL, 'a'                        },
+                { "boot",                   no_argument,       NULL, 'b'                        },
+                { "uuid",                   required_argument, NULL, ARG_UUID                   },
+                { "read-only",              no_argument,       NULL, ARG_READ_ONLY              },
+                { "capability",             required_argument, NULL, ARG_CAPABILITY             },
+                { "drop-capability",        required_argument, NULL, ARG_DROP_CAPABILITY        },
+                { "link-journal",           required_argument, NULL, ARG_LINK_JOURNAL           },
+                { "bind",                   required_argument, NULL, ARG_BIND                   },
+                { "bind-ro",                required_argument, NULL, ARG_BIND_RO                },
+                { "tmpfs",                  required_argument, NULL, ARG_TMPFS                  },
+                { "overlay",                required_argument, NULL, ARG_OVERLAY                },
+                { "overlay-ro",             required_argument, NULL, ARG_OVERLAY_RO             },
+                { "machine",                required_argument, NULL, 'M'                        },
+                { "slice",                  required_argument, NULL, 'S'                        },
+                { "setenv",                 required_argument, NULL, 'E'                        },
+                { "selinux-context",        required_argument, NULL, 'Z'                        },
+                { "selinux-apifs-context",  required_argument, NULL, 'L'                        },
+                { "quiet",                  no_argument,       NULL, 'q'                        },
+                { "share-system",           no_argument,       NULL, ARG_SHARE_SYSTEM           }, /* not documented */
+                { "register",               required_argument, NULL, ARG_REGISTER               },
+                { "keep-unit",              no_argument,       NULL, ARG_KEEP_UNIT              },
+                { "network-interface",      required_argument, NULL, ARG_NETWORK_INTERFACE      },
+                { "network-macvlan",        required_argument, NULL, ARG_NETWORK_MACVLAN        },
+                { "network-ipvlan",         required_argument, NULL, ARG_NETWORK_IPVLAN         },
+                { "network-veth",           no_argument,       NULL, 'n'                        },
+                { "network-veth-extra",     required_argument, NULL, ARG_NETWORK_VETH_EXTRA     },
+                { "network-bridge",         required_argument, NULL, ARG_NETWORK_BRIDGE         },
+                { "network-zone",           required_argument, NULL, ARG_NETWORK_ZONE           },
+                { "network-namespace-path", required_argument, NULL, ARG_NETWORK_NAMESPACE_PATH },
+                { "personality",            required_argument, NULL, ARG_PERSONALITY            },
+                { "image",                  required_argument, NULL, 'i'                        },
+                { "volatile",               optional_argument, NULL, ARG_VOLATILE               },
+                { "port",                   required_argument, NULL, 'p'                        },
+                { "property",               required_argument, NULL, ARG_PROPERTY               },
+                { "private-users",          optional_argument, NULL, ARG_PRIVATE_USERS          },
+                { "private-users-chown",    optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN    },
+                { "kill-signal",            required_argument, NULL, ARG_KILL_SIGNAL            },
+                { "settings",               required_argument, NULL, ARG_SETTINGS               },
+                { "chdir",                  required_argument, NULL, ARG_CHDIR                  },
+                { "pivot-root",             required_argument, NULL, ARG_PIVOT_ROOT             },
+                { "notify-ready",           required_argument, NULL, ARG_NOTIFY_READY           },
+                { "root-hash",              required_argument, NULL, ARG_ROOT_HASH              },
+                { "system-call-filter",     required_argument, NULL, ARG_SYSTEM_CALL_FILTER     },
                 {}
         };
 
@@ -644,6 +650,13 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_settings_mask |= SETTING_NETWORK;
                         break;
 
+                case ARG_NETWORK_NAMESPACE_PATH:
+                        r = parse_path_argument_and_warn(optarg, false, &arg_network_namespace_path);
+                        if (r < 0)
+                                return r;
+
+                        break;
+
                 case 'b':
                         if (arg_start_mode == START_PID2) {
                                 log_error("--boot and --as-pid2 may not be combined.");
@@ -1103,6 +1116,17 @@ static int parse_argv(int argc, char *argv[]) {
                         assert_not_reached("Unhandled option");
                 }
 
+        /* If --network-namespace-path is given with any other network-related option,
+         * we need to error out, to avoid conflicts between different network options. */
+        if (arg_network_namespace_path &&
+                (arg_network_interfaces || arg_network_macvlan ||
+                 arg_network_ipvlan || arg_network_veth_extra ||
+                 arg_network_bridge || arg_network_zone ||
+                 arg_network_veth || arg_private_network)) {
+                log_error("--network-namespace-path cannot be combined with other network options.");
+                return -EINVAL;
+        }
+
         parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_IPC", CLONE_NEWIPC);
         parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_PID", CLONE_NEWPID);
         parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_UTS", CLONE_NEWUTS);
@@ -2532,12 +2556,14 @@ static int outer_child(
                 int rtnl_socket,
                 int uid_shift_socket,
                 int unified_cgroup_hierarchy_socket,
-                FDSet *fds) {
+                FDSet *fds,
+                int netns_fd) {
 
         pid_t pid;
         ssize_t l;
         int r;
         _cleanup_close_ int fd = -1;
+        bool create_netns;
 
         assert(barrier);
         assert(directory);
@@ -2788,9 +2814,11 @@ static int outer_child(
         if (fd < 0)
                 return fd;
 
+        create_netns = !arg_network_namespace_path && arg_private_network;
+
         pid = raw_clone(SIGCHLD|CLONE_NEWNS|
                         arg_clone_ns_flags |
-                        (arg_private_network ? CLONE_NEWNET : 0) |
+                        (create_netns ? CLONE_NEWNET : 0) |
                         (arg_userns_mode != USER_NAMESPACE_NO ? CLONE_NEWUSER : 0));
         if (pid < 0)
                 return log_error_errno(errno, "Failed to fork inner child: %m");
@@ -2804,6 +2832,12 @@ static int outer_child(
                  * requested, so that we all are owned by the user if
                  * user namespaces are turned on. */
 
+                if (arg_network_namespace_path) {
+                        r = namespace_enter(-1, -1, netns_fd, -1, -1);
+                        if (r < 0)
+                                return r;
+                }
+
                 r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds);
                 if (r < 0)
                         _exit(EXIT_FAILURE);
@@ -2836,6 +2870,7 @@ static int outer_child(
         notify_socket = safe_close(notify_socket);
         kmsg_socket = safe_close(kmsg_socket);
         rtnl_socket = safe_close(rtnl_socket);
+        netns_fd = safe_close(netns_fd);
 
         return 0;
 }
@@ -3311,6 +3346,7 @@ static int run(int master,
         int ifi = 0, r;
         ssize_t l;
         sigset_t mask_chld;
+        _cleanup_close_ int netns_fd = -1;
 
         assert_se(sigemptyset(&mask_chld) == 0);
         assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
@@ -3365,6 +3401,20 @@ static int run(int master,
         if (r < 0)
                 return log_error_errno(errno, "Failed to install SIGCHLD handler: %m");
 
+        if (arg_network_namespace_path) {
+                netns_fd = open(arg_network_namespace_path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+                if (netns_fd < 0)
+                        return log_error_errno(errno, "Cannot open file %s: %m", arg_network_namespace_path);
+
+                r = fd_is_network_ns(netns_fd);
+                if (r < 0 && r != -ENOTTY)
+                        return log_error_errno(r, "Failed to check %s fs type: %m", arg_network_namespace_path);
+                if (r == 0) {
+                        log_error("Path %s doesn't refer to a network namespace", arg_network_namespace_path);
+                        return -EINVAL;
+                }
+        }
+
         *pid = raw_clone(SIGCHLD|CLONE_NEWNS);
         if (*pid < 0)
                 return log_error_errno(errno, "clone() failed%s: %m",
@@ -3401,7 +3451,8 @@ static int run(int master,
                                 rtnl_socket_pair[1],
                                 uid_shift_socket_pair[1],
                                 unified_cgroup_hierarchy_socket_pair[1],
-                                fds);
+                                fds,
+                                netns_fd);
                 if (r < 0)
                         _exit(EXIT_FAILURE);