#define LXC_USERNIC_CREATE 0
#define LXC_USERNIC_DELETE 1
+static bool is_privileged_over_netns(int netns_fd)
+{
+ int ret;
+ uid_t euid, ruid, suid;
+ bool bret = false;
+ int ofd = -1;
+
+ ofd = lxc_preserve_ns(getpid(), "net");
+ if (ofd < 0) {
+ usernic_error("Failed opening network namespace path for %d", getpid());
+ return false;
+ }
+
+ ret = getresuid(&ruid, &euid, &suid);
+ if (ret < 0) {
+ usernic_error("Failed to retrieve real, effective, and saved "
+ "user IDs: %s\n",
+ strerror(errno));
+ goto do_partial_cleanup;
+ }
+
+ ret = setns(netns_fd, CLONE_NEWNET);
+ if (ret < 0) {
+ usernic_error("Failed to setns() to network namespace %s\n",
+ strerror(errno));
+ goto do_partial_cleanup;
+ }
+
+ ret = setresuid(ruid, ruid, 0);
+ if (ret < 0) {
+ usernic_error("Failed to drop privilege by setting effective "
+ "user id and real user id to %d, and saved user "
+ "ID to 0: %s\n",
+ ruid, strerror(errno));
+ /* It's ok to jump to do_full_cleanup here since setresuid()
+ * will succeed when trying to set real, effective, and saved to
+ * values they currently have.
+ */
+ goto do_full_cleanup;
+ }
+
+ /* Test whether we are privileged over the network namespace. To do this
+ * we try to delete the loopback interface which is not possible. If we
+ * are privileged over the network namespace we will get ENOTSUP. If we
+ * are not privileged over the network namespace we will get EPERM.
+ */
+ ret = lxc_netdev_delete_by_name("lo");
+ if (ret == -ENOTSUP)
+ bret = true;
+
+do_full_cleanup:
+ ret = setresuid(ruid, euid, suid);
+ if (ret < 0) {
+ usernic_error("Failed to restore privilege by setting "
+ "effective user id to %d, real user id to %d, "
+ "and saved user ID to %d: %s\n", ruid, euid, suid,
+ strerror(errno));
+
+ bret = false;
+ }
+
+ ret = setns(ofd, CLONE_NEWNET);
+ if (ret < 0) {
+ usernic_error("Failed to setns() to original network namespace "
+ "of PID %d: %s\n", ofd, strerror(errno));
+
+ bret = false;
+ }
+
+do_partial_cleanup:
+
+ close(ofd);
+ return bret;
+}
+
int main(int argc, char *argv[])
{
int fd, ifindex, n, pid, request, ret;
char *me, *newname;
+ struct user_nic_args args;
+ int netns_fd = -1;
char *cnic = NULL, *nicname = NULL;
struct alloted_s *alloted = NULL;
- struct user_nic_args args;
if (argc < 7 || argc > 8) {
usage(argv[0], true);
exit(EXIT_FAILURE);
}
- ret = lxc_safe_int(args.pid, &pid);
- if (ret < 0) {
- usernic_error("Could not read pid: %s\n", args.pid);
- exit(EXIT_FAILURE);
+ if (request == LXC_USERNIC_CREATE) {
+ ret = lxc_safe_int(args.pid, &pid);
+ if (ret < 0) {
+ usernic_error("Could not read pid: %s\n", args.pid);
+ exit(EXIT_FAILURE);
+ }
+ } else if (request == LXC_USERNIC_DELETE) {
+ netns_fd = open(args.pid, O_RDONLY);
+ if (netns_fd < 0) {
+ usernic_error("Could not open \"%s\": %s\n", args.pid,
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
}
if (!create_db_dir(LXC_USERNIC_DB)) {
usernic_error("%s", "Failed to create directory for db file\n");
+ if (netns_fd >= 0)
+ close(netns_fd);
exit(EXIT_FAILURE);
}
fd = open_and_lock(LXC_USERNIC_DB);
if (fd < 0) {
usernic_error("Failed to lock %s\n", LXC_USERNIC_DB);
+ if (netns_fd >= 0)
+ close(netns_fd);
exit(EXIT_FAILURE);
}
- if (!may_access_netns(pid)) {
- usernic_error("User %s may not modify netns for pid %d\n", me, pid);
- exit(EXIT_FAILURE);
+ if (request == LXC_USERNIC_CREATE) {
+ if (!may_access_netns(pid)) {
+ usernic_error("User %s may not modify netns for pid %d\n", me, pid);
+ exit(EXIT_FAILURE);
+ }
+ } else if (request == LXC_USERNIC_DELETE) {
+ bool has_priv;
+ has_priv = is_privileged_over_netns(netns_fd);
+ close(netns_fd);
+ if (!has_priv) {
+ usernic_error("%s", "Process is not privileged over "
+ "network namespace\n");
+ exit(EXIT_FAILURE);
+ }
}
n = get_alloted(me, args.type, args.link, &alloted);
return 0;
}
-static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname,
- struct lxc_netdev *netdev, pid_t pid)
+static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
+ struct lxc_netdev *netdev,
+ const char *netns_path)
{
int bytes, ret;
pid_t child;
if (child == 0) {
int ret;
- char pidstr[LXC_NUMSTRLEN64];
close(pipefd[0]);
SYSERROR("Network link for network device \"%s\" is "
"missing", netdev->priv.veth_attr.pair);
- ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid);
- if (ret < 0 || ret >= LXC_NUMSTRLEN64)
- exit(EXIT_FAILURE);
- pidstr[LXC_NUMSTRLEN64 - 1] = '\0';
-
INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
- lxcname, pidstr, netdev->link, netdev->priv.veth_attr.pair);
+ lxcname, netns_path, netdev->link, netdev->priv.veth_attr.pair);
execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
- lxcname, pidstr, "veth", netdev->link,
+ lxcname, netns_path, "veth", netdev->link,
netdev->priv.veth_attr.pair, (char *)NULL);
SYSERROR("Failed to exec lxc-user-nic.");
exit(EXIT_FAILURE);
return 0;
}
+bool lxc_delete_network_unpriv(struct lxc_handler *handler)
+{
+ int ret;
+ struct lxc_list *iterator;
+ struct lxc_list *network = &handler->conf->network;
+ /* strlen("/proc/") = 6
+ * +
+ * LXC_NUMSTRLEN64
+ * +
+ * strlen("/fd/") = 4
+ * +
+ * LXC_NUMSTRLEN64
+ * +
+ * \0
+ */
+ char netns_path[6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1];
+ bool deleted_all = true;
+
+ if (!am_unpriv())
+ return true;
+
+ *netns_path = '\0';
+
+ if (handler->netnsfd < 0) {
+ DEBUG("Cannot not guarantee safe deletion of network devices. "
+ "Manual cleanup maybe needed");
+ return false;
+ }
+
+ ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
+ getpid(), handler->netnsfd);
+ if (ret < 0 || ret >= sizeof(netns_path))
+ return false;
+
+ lxc_list_for_each(iterator, network) {
+ char *hostveth = NULL;
+ struct lxc_netdev *netdev = iterator->elem;
+
+ /* We can only delete devices whose ifindex we have. If we don't
+ * have the index it means that we didn't create it.
+ */
+ if (!netdev->ifindex)
+ continue;
+
+ if (netdev->type == LXC_NET_PHYS) {
+ ret = lxc_netdev_rename_by_index(netdev->ifindex,
+ netdev->link);
+ if (ret < 0)
+ WARN("Failed to rename interface with index %d "
+ "to its initial name \"%s\"",
+ netdev->ifindex, netdev->link);
+ else
+ TRACE("Renamed interface with index %d to its "
+ "initial name \"%s\"",
+ netdev->ifindex, netdev->link);
+ continue;
+ }
+
+ ret = netdev_deconf[netdev->type](handler, netdev);
+ if (ret < 0)
+ WARN("Failed to deconfigure network device");
+
+ if (netdev->type != LXC_NET_VETH)
+ continue;
+
+ if (!is_ovs_bridge(netdev->link))
+ continue;
+
+ ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
+ handler->name, netdev,
+ netns_path);
+ if (ret < 0) {
+ deleted_all = false;
+ WARN("Failed to remove port \"%s\" from openvswitch "
+ "bridge \"%s\"",
+ netdev->priv.veth_attr.pair, netdev->link);
+ continue;
+ }
+ INFO("Removed interface \"%s\" from \"%s\"", hostveth,
+ netdev->link);
+ }
+
+ return deleted_all;
+}
+
int lxc_create_network_priv(struct lxc_handler *handler)
{
bool am_root;
return 0;
}
-bool lxc_delete_network(struct lxc_handler *handler)
+bool lxc_delete_network_priv(struct lxc_handler *handler)
{
int ret;
struct lxc_list *iterator;
struct lxc_list *network = &handler->conf->network;
bool deleted_all = true;
+ if (am_unpriv())
+ return true;
+
lxc_list_for_each(iterator, network) {
char *hostveth = NULL;
struct lxc_netdev *netdev = iterator->elem;
* namespace is destroyed but in case we did not move the
* interface to the network namespace, we have to destroy it.
*/
- if (!am_unpriv()) {
- ret = lxc_netdev_delete_by_index(netdev->ifindex);
- if (-ret == ENODEV) {
- INFO("Interface \"%s\" with index %d already "
- "deleted or existing in different network "
- "namespace",
- netdev->name ? netdev->name : "(null)",
- netdev->ifindex);
- } else if (ret < 0) {
- deleted_all = false;
- WARN("Failed to remove interface \"%s\" with "
- "index %d: %s",
- netdev->name ? netdev->name : "(null)",
- netdev->ifindex, strerror(-ret));
- continue;
- }
- INFO("Removed interface \"%s\" with index %d",
- netdev->name ? netdev->name : "(null)",
- netdev->ifindex);
+ ret = lxc_netdev_delete_by_index(netdev->ifindex);
+ if (-ret == ENODEV) {
+ INFO("Interface \"%s\" with index %d already "
+ "deleted or existing in different network "
+ "namespace",
+ netdev->name ? netdev->name : "(null)",
+ netdev->ifindex);
+ } else if (ret < 0) {
+ deleted_all = false;
+ WARN("Failed to remove interface \"%s\" with "
+ "index %d: %s",
+ netdev->name ? netdev->name : "(null)",
+ netdev->ifindex, strerror(-ret));
+ continue;
}
+ INFO("Removed interface \"%s\" with index %d",
+ netdev->name ? netdev->name : "(null)",
+ netdev->ifindex);
if (netdev->type != LXC_NET_VETH)
continue;
- if (am_unpriv()) {
- if (is_ovs_bridge(netdev->link)) {
- ret = lxc_delete_network_unpriv(handler->lxcpath,
- handler->name,
- netdev, getpid());
- if (ret < 0)
- WARN("Failed to remove port \"%s\" "
- "from openvswitch bridge \"%s\"",
- netdev->priv.veth_attr.pair,
- netdev->link);
- }
-
- continue;
- }
-
/* Explicitly delete host veth device to prevent lingering
* devices. We had issues in LXD around this.
*/
extern int setup_private_host_hw_addr(char *veth1);
extern int netdev_get_mtu(int ifindex);
extern int lxc_create_network_priv(struct lxc_handler *handler);
-extern bool lxc_delete_network(struct lxc_handler *handler);
+extern bool lxc_delete_network_priv(struct lxc_handler *handler);
+extern bool lxc_delete_network_unpriv(struct lxc_handler *handler);
extern int lxc_find_gateway_addresses(struct lxc_handler *handler);
extern int lxc_create_network(const char *lxcpath, char *lxcname,
struct lxc_list *network, pid_t pid);
SYSERROR("Failed to clone a new set of namespaces.");
goto out_delete_net;
}
+
for (i = 0; i < LXC_NS_MAX; i++)
if (flags & ns_info[i].clone_flag)
INFO("Cloned %s.", ns_info[i].flag_name);
if (failed_before_rename)
goto out_delete_net;
+ handler->netnsfd = lxc_preserve_ns(handler->pid, "net");
+ if (handler->netnsfd < 0) {
+ ERROR("Failed to preserve network namespace");
+ goto out_delete_net;
+ }
+
/* Create the network configuration. */
if (handler->clone_flags & CLONE_NEWNET) {
if (lxc_create_network(handler->lxcpath, handler->name,
}
lxc_sync_fini(handler);
- handler->netnsfd = lxc_preserve_ns(handler->pid, "net");
return 0;
out_delete_net:
if (cgroups_connected)
cgroup_disconnect();
- if (handler->clone_flags & CLONE_NEWNET)
- lxc_delete_network(handler);
+
+ if (handler->clone_flags & CLONE_NEWNET) {
+ DEBUG("Tearing down network devices");
+ if (!lxc_delete_network_priv(handler))
+ DEBUG("Failed tearing down network devices");
+
+ if (!lxc_delete_network_unpriv(handler))
+ DEBUG("Failed tearing down network devices");
+ }
+
out_abort:
lxc_abort(name, handler);
lxc_sync_fini(handler);
handler->pinfd = -1;
}
+ if (handler->netnsfd >= 0) {
+ close(handler->netnsfd);
+ handler->netnsfd = -1;
+ }
+
return -1;
}
{
int status;
int err = -1;
- bool removed_all_netdevs = true;
struct lxc_conf *conf = handler->conf;
if (lxc_init(name, handler) < 0) {
err = lxc_poll(name, handler);
if (err) {
ERROR("LXC mainloop exited with error: %d.", err);
- if (handler->netnsfd >= 0) {
- close(handler->netnsfd);
- handler->netnsfd = -1;
- }
goto out_abort;
}
DEBUG("Pushing physical nics back to host namespace");
lxc_restore_phys_nics_to_netns(handler->netnsfd, handler->conf);
- DEBUG("Tearing down virtual network devices used by container \"%s\".", name);
- removed_all_netdevs = lxc_delete_network(handler);
-
if (handler->pinfd >= 0) {
close(handler->pinfd);
handler->pinfd = -1;
lxc_monitor_send_exit_code(name, status, handler->lxcpath);
err = lxc_error_set_and_log(handler->pid, status);
+
out_fini:
- if (!removed_all_netdevs) {
- DEBUG("Failed tearing down network devices used by container. Trying again!");
- removed_all_netdevs = lxc_delete_network(handler);
- if (!removed_all_netdevs)
- DEBUG("Failed tearing down network devices used by container. Not trying again!");
+ DEBUG("Tearing down network devices");
+ if (!lxc_delete_network_priv(handler))
+ DEBUG("Failed tearing down network devices");
+
+ if (!lxc_delete_network_unpriv(handler))
+ DEBUG("Failed tearing down network devices");
+
+ if (handler->netnsfd >= 0) {
+ close(handler->netnsfd);
+ handler->netnsfd = -1;
}
out_detach_blockdev: