From: Christian Brauner Date: Mon, 28 Aug 2017 10:23:29 +0000 (+0200) Subject: network: non-functional changes X-Git-Tag: lxc-2.0.9~49^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4d26f2474ae2de8aa9e1b295513f49b0fcb52992;p=thirdparty%2Flxc.git network: non-functional changes This moves all of the network handling code into network.{c,h}. This makes what is going on much clearer. Also it's easier to find relevant code if it is all in one place. Signed-off-by: Christian Brauner --- diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 9baf46b2b..d92937ce7 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -235,8 +235,6 @@ char *lxchook_names[NUM_LXC_HOOKS] = {"pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy"}; -typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *); - struct mount_opt { char *name; int clear; @@ -262,38 +260,6 @@ struct lxc_conf *current_config; /* Declare this here, since we don't want to reshuffle the whole file. */ static int in_caplist(int cap, struct lxc_list *caps); -static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_none(struct lxc_handler *, struct lxc_netdev *); - -static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = { - [LXC_NET_VETH] = instantiate_veth, - [LXC_NET_MACVLAN] = instantiate_macvlan, - [LXC_NET_VLAN] = instantiate_vlan, - [LXC_NET_PHYS] = instantiate_phys, - [LXC_NET_EMPTY] = instantiate_empty, - [LXC_NET_NONE] = instantiate_none, -}; - -static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_none(struct lxc_handler *, struct lxc_netdev *); - -static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = { - [LXC_NET_VETH] = shutdown_veth, - [LXC_NET_MACVLAN] = shutdown_macvlan, - [LXC_NET_VLAN] = shutdown_vlan, - [LXC_NET_PHYS] = shutdown_phys, - [LXC_NET_EMPTY] = shutdown_empty, - [LXC_NET_NONE] = shutdown_none, -}; - static struct mount_opt mount_opt[] = { { "async", 1, MS_SYNCHRONOUS }, { "atime", 1, MS_NOATIME }, @@ -471,8 +437,7 @@ static int run_script_argv(const char *name, const char *section, return run_buffer(buffer); } -static int run_script(const char *name, const char *section, const char *script, - ...) +int run_script(const char *name, const char *section, const char *script, ...) { int ret; char *buffer, *p; @@ -2286,331 +2251,6 @@ static int dropcaps_except(struct lxc_list *caps) return 0; } -static int setup_hw_addr(char *hwaddr, const char *ifname) -{ - struct sockaddr sockaddr; - struct ifreq ifr; - int ret, fd, saved_errno; - - ret = lxc_convert_mac(hwaddr, &sockaddr); - if (ret) { - ERROR("mac address '%s' conversion failed : %s", - hwaddr, strerror(-ret)); - return -1; - } - - memcpy(ifr.ifr_name, ifname, IFNAMSIZ); - ifr.ifr_name[IFNAMSIZ-1] = '\0'; - memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr)); - - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) { - ERROR("socket failure : %s", strerror(errno)); - return -1; - } - - ret = ioctl(fd, SIOCSIFHWADDR, &ifr); - saved_errno = errno; - close(fd); - if (ret) - ERROR("ioctl failure : %s", strerror(saved_errno)); - - DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name); - - return ret; -} - -static int setup_ipv4_addr(struct lxc_list *ip, int ifindex) -{ - struct lxc_list *iterator; - struct lxc_inetdev *inetdev; - int err; - - lxc_list_for_each(iterator, ip) { - - inetdev = iterator->elem; - - err = lxc_ipv4_addr_add(ifindex, &inetdev->addr, - &inetdev->bcast, inetdev->prefix); - if (err) { - ERROR("failed to setup_ipv4_addr ifindex %d : %s", - ifindex, strerror(-err)); - return -1; - } - } - - return 0; -} - -static int setup_ipv6_addr(struct lxc_list *ip, int ifindex) -{ - struct lxc_list *iterator; - struct lxc_inet6dev *inet6dev; - int err; - - lxc_list_for_each(iterator, ip) { - - inet6dev = iterator->elem; - - err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr, - &inet6dev->mcast, &inet6dev->acast, - inet6dev->prefix); - if (err) { - ERROR("failed to setup_ipv6_addr ifindex %d : %s", - ifindex, strerror(-err)); - return -1; - } - } - - return 0; -} - -static int setup_netdev(struct lxc_netdev *netdev) -{ - char ifname[IFNAMSIZ]; - char *current_ifname = ifname; - int err; - - /* empty network namespace */ - if (!netdev->ifindex) { - if (netdev->flags & IFF_UP) { - err = lxc_netdev_up("lo"); - if (err) { - ERROR("failed to set the loopback up : %s", - strerror(-err)); - return -1; - } - } - if (netdev->type != LXC_NET_VETH) - return 0; - netdev->ifindex = if_nametoindex(netdev->name); - } - - /* get the new ifindex in case of physical netdev */ - if (netdev->type == LXC_NET_PHYS) { - if (!(netdev->ifindex = if_nametoindex(netdev->link))) { - ERROR("failed to get ifindex for %s", - netdev->link); - return -1; - } - } - - /* retrieve the name of the interface */ - if (!if_indextoname(netdev->ifindex, current_ifname)) { - ERROR("no interface corresponding to index '%d'", - netdev->ifindex); - return -1; - } - - /* Default: let the system to choose one interface name. - * When the IFLA_IFNAME attribute is passed something like "%d" - * netlink will replace the format specifier with an appropriate index. - */ - if (!netdev->name) - netdev->name = netdev->type == LXC_NET_PHYS ? - netdev->link : "eth%d"; - - /* rename the interface name */ - if (strcmp(ifname, netdev->name) != 0) { - err = lxc_netdev_rename_by_name(ifname, netdev->name); - if (err) { - ERROR("failed to rename %s->%s : %s", ifname, netdev->name, - strerror(-err)); - return -1; - } - } - - /* Re-read the name of the interface because its name has changed - * and would be automatically allocated by the system - */ - if (!if_indextoname(netdev->ifindex, current_ifname)) { - ERROR("no interface corresponding to index '%d'", - netdev->ifindex); - return -1; - } - - /* set a mac address */ - if (netdev->hwaddr) { - if (setup_hw_addr(netdev->hwaddr, current_ifname)) { - ERROR("failed to setup hw address for '%s'", - current_ifname); - return -1; - } - } - - /* setup ipv4 addresses on the interface */ - if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) { - ERROR("failed to setup ip addresses for '%s'", - ifname); - return -1; - } - - /* setup ipv6 addresses on the interface */ - if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) { - ERROR("failed to setup ipv6 addresses for '%s'", - ifname); - return -1; - } - - /* set the network device up */ - if (netdev->flags & IFF_UP) { - int err; - - err = lxc_netdev_up(current_ifname); - if (err) { - ERROR("failed to set '%s' up : %s", current_ifname, - strerror(-err)); - return -1; - } - - /* the network is up, make the loopback up too */ - err = lxc_netdev_up("lo"); - if (err) { - ERROR("failed to set the loopback up : %s", - strerror(-err)); - return -1; - } - } - - /* We can only set up the default routes after bringing - * up the interface, sine bringing up the interface adds - * the link-local routes and we can't add a default - * route if the gateway is not reachable. */ - - /* setup ipv4 gateway on the interface */ - if (netdev->ipv4_gateway) { - if (!(netdev->flags & IFF_UP)) { - ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname); - return -1; - } - - if (lxc_list_empty(&netdev->ipv4)) { - ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname); - return -1; - } - - err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); - if (err) { - err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway); - if (err) { - ERROR("failed to add ipv4 dest for '%s': %s", - ifname, strerror(-err)); - } - - err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); - if (err) { - ERROR("failed to setup ipv4 gateway for '%s': %s", - ifname, strerror(-err)); - if (netdev->ipv4_gateway_auto) { - char buf[INET_ADDRSTRLEN]; - inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf)); - ERROR("tried to set autodetected ipv4 gateway '%s'", buf); - } - return -1; - } - } - } - - /* setup ipv6 gateway on the interface */ - if (netdev->ipv6_gateway) { - if (!(netdev->flags & IFF_UP)) { - ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname); - return -1; - } - - if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) { - ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname); - return -1; - } - - err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); - if (err) { - err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway); - if (err) { - ERROR("failed to add ipv6 dest for '%s': %s", - ifname, strerror(-err)); - } - - err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); - if (err) { - ERROR("failed to setup ipv6 gateway for '%s': %s", - ifname, strerror(-err)); - if (netdev->ipv6_gateway_auto) { - char buf[INET6_ADDRSTRLEN]; - inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf)); - ERROR("tried to set autodetected ipv6 gateway '%s'", buf); - } - return -1; - } - } - } - - DEBUG("'%s' has been setup", current_ifname); - - return 0; -} - -static int setup_network(struct lxc_list *network) -{ - struct lxc_list *iterator; - struct lxc_netdev *netdev; - - lxc_list_for_each(iterator, network) { - - netdev = iterator->elem; - - if (setup_netdev(netdev)) { - ERROR("failed to setup netdev"); - return -1; - } - } - - if (!lxc_list_empty(network)) - INFO("network has been setup"); - - return 0; -} - -/* try to move physical nics to the init netns */ -void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf) -{ - int i, oldfd; - char ifname[IFNAMSIZ]; - - if (netnsfd < 0 || conf->num_savednics == 0) - return; - - INFO("Running to reset %d nic names.", conf->num_savednics); - - oldfd = lxc_preserve_ns(getpid(), "net"); - if (oldfd < 0) { - SYSERROR("Failed to open monitor netns fd."); - return; - } - - if (setns(netnsfd, 0) != 0) { - SYSERROR("Failed to enter container netns to reset nics"); - close(oldfd); - return; - } - for (i=0; inum_savednics; i++) { - struct saved_nic *s = &conf->saved_nics[i]; - /* retrieve the name of the interface */ - if (!if_indextoname(s->ifindex, ifname)) { - WARN("no interface corresponding to index '%d'", s->ifindex); - continue; - } - if (lxc_netdev_move_by_name(ifname, 1, s->orig_name)) - WARN("Error moving nic name:%s back to host netns", ifname); - free(s->orig_name); - } - conf->num_savednics = 0; - - if (setns(oldfd, 0) != 0) - SYSERROR("Failed to re-enter monitor's netns"); - close(oldfd); -} - static char *default_rootfs_mount = LXCROOTFSMOUNT; struct lxc_conf *lxc_conf_init(void) @@ -2675,688 +2315,6 @@ struct lxc_conf *lxc_conf_init(void) return new; } -static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char *veth1, *veth2; - char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ]; - int bridge_index, err; - unsigned int mtu = 0; - - if (netdev->priv.veth_attr.pair) { - veth1 = netdev->priv.veth_attr.pair; - if (handler->conf->reboot) - lxc_netdev_delete_by_name(veth1); - } else { - err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX"); - if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */ - ERROR("veth1 name too long"); - return -1; - } - veth1 = lxc_mkifname(veth1buf); - if (!veth1) { - ERROR("failed to allocate a temporary name"); - return -1; - } - /* store away for deconf */ - memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ); - } - - snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX"); - veth2 = lxc_mkifname(veth2buf); - if (!veth2) { - ERROR("failed to allocate a temporary name"); - goto out_delete; - } - - err = lxc_veth_create(veth1, veth2); - if (err) { - ERROR("failed to create veth pair \"%s\" and \"%s\": %s", veth1, - veth2, strerror(-err)); - goto out_delete; - } - - /* changing the high byte of the mac address to 0xfe, the bridge interface - * will always keep the host's mac address and not take the mac address - * of a container */ - err = setup_private_host_hw_addr(veth1); - if (err) { - ERROR("failed to change mac address of host interface \"%s\": %s", - veth1, strerror(-err)); - goto out_delete; - } - - netdev->ifindex = if_nametoindex(veth2); - if (!netdev->ifindex) { - ERROR("failed to retrieve the index for \"%s\"", veth2); - goto out_delete; - } - - if (netdev->mtu) { - if (lxc_safe_uint(netdev->mtu, &mtu) < 0) - WARN("failed to parse mtu from"); - else - INFO("retrieved mtu %d", mtu); - } else if (netdev->link) { - bridge_index = if_nametoindex(netdev->link); - if (bridge_index) { - mtu = netdev_get_mtu(bridge_index); - INFO("retrieved mtu %d from %s", mtu, netdev->link); - } else { - mtu = netdev_get_mtu(netdev->ifindex); - INFO("retrieved mtu %d from %s", mtu, veth2); - } - } - - if (mtu) { - err = lxc_netdev_set_mtu(veth1, mtu); - if (!err) - err = lxc_netdev_set_mtu(veth2, mtu); - if (err) { - ERROR("failed to set mtu \"%d\" for veth pair \"%s\" " - "and \"%s\": %s", - mtu, veth1, veth2, strerror(-err)); - goto out_delete; - } - } - - if (netdev->link) { - err = lxc_bridge_attach(netdev->link, veth1); - if (err) { - ERROR("failed to attach \"%s\" to bridge \"%s\": %s", - veth1, netdev->link, strerror(-err)); - goto out_delete; - } - INFO("attached \"%s\" to bridge \"%s\"", veth1, netdev->link); - } - - err = lxc_netdev_up(veth1); - if (err) { - ERROR("failed to set \"%s\" up: %s", veth1, strerror(-err)); - goto out_delete; - } - - if (netdev->upscript) { - err = run_script(handler->name, "net", netdev->upscript, "up", - "veth", veth1, (char*) NULL); - if (err) - goto out_delete; - } - - DEBUG("instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2, - netdev->ifindex); - - return 0; - -out_delete: - if (netdev->ifindex != 0) - lxc_netdev_delete_by_name(veth1); - if (!netdev->priv.veth_attr.pair) - free(veth1); - free(veth2); - return -1; -} - -static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char *veth1; - int err; - - if (netdev->priv.veth_attr.pair) - veth1 = netdev->priv.veth_attr.pair; - else - veth1 = netdev->priv.veth_attr.veth1; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "veth", veth1, (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char peerbuf[IFNAMSIZ], *peer; - int err; - - if (!netdev->link) { - ERROR("no link specified for macvlan netdev"); - return -1; - } - - err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX"); - if (err >= sizeof(peerbuf)) - return -1; - - peer = lxc_mkifname(peerbuf); - if (!peer) { - ERROR("failed to make a temporary name"); - return -1; - } - - err = lxc_macvlan_create(netdev->link, peer, - netdev->priv.macvlan_attr.mode); - if (err) { - ERROR("failed to create macvlan interface '%s' on '%s' : %s", - peer, netdev->link, strerror(-err)); - goto out; - } - - netdev->ifindex = if_nametoindex(peer); - if (!netdev->ifindex) { - ERROR("failed to retrieve the index for %s", peer); - goto out; - } - - if (netdev->upscript) { - err = run_script(handler->name, "net", netdev->upscript, "up", - "macvlan", netdev->link, (char*) NULL); - if (err) - goto out; - } - - DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'", - peer, netdev->ifindex, netdev->priv.macvlan_attr.mode); - - return 0; -out: - lxc_netdev_delete_by_name(peer); - free(peer); - return -1; -} - -static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - int err; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "macvlan", netdev->link, - (char*) NULL); - if (err) - return -1; - } - return 0; -} - -/* XXX: merge with instantiate_macvlan */ -static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char peer[IFNAMSIZ]; - int err; - static uint16_t vlan_cntr = 0; - unsigned int mtu = 0; - - if (!netdev->link) { - ERROR("no link specified for vlan netdev"); - return -1; - } - - err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++); - if (err >= sizeof(peer)) { - ERROR("peer name too long"); - return -1; - } - - err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid); - if (err) { - ERROR("failed to create vlan interface '%s' on '%s' : %s", - peer, netdev->link, strerror(-err)); - return -1; - } - - netdev->ifindex = if_nametoindex(peer); - if (!netdev->ifindex) { - ERROR("failed to retrieve the ifindex for %s", peer); - lxc_netdev_delete_by_name(peer); - return -1; - } - - DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000", - netdev->ifindex); - if (netdev->mtu) { - if (lxc_safe_uint(netdev->mtu, &mtu) < 0) { - ERROR("Failed to retrieve mtu from: '%d'/'%s'.", - netdev->ifindex, netdev->name); - return -1; - } - err = lxc_netdev_set_mtu(peer, mtu); - if (err) { - ERROR("failed to set mtu '%s' for %s : %s", - netdev->mtu, peer, strerror(-err)); - lxc_netdev_delete_by_name(peer); - return -1; - } - } - - return 0; -} - -static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - return 0; -} - -static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - if (!netdev->link) { - ERROR("no link specified for the physical interface"); - return -1; - } - - netdev->ifindex = if_nametoindex(netdev->link); - if (!netdev->ifindex) { - ERROR("failed to retrieve the index for %s", netdev->link); - return -1; - } - - if (netdev->upscript) { - int err; - err = run_script(handler->name, "net", netdev->upscript, - "up", "phys", netdev->link, (char*) NULL); - if (err) - return -1; - } - - return 0; -} - -static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - int err; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "phys", netdev->link, (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - netdev->ifindex = 0; - return 0; -} - -static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - netdev->ifindex = 0; - if (netdev->upscript) { - int err; - err = run_script(handler->name, "net", netdev->upscript, - "up", "empty", (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - int err; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "empty", (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - return 0; -} - -int lxc_requests_empty_network(struct lxc_handler *handler) -{ - struct lxc_list *network = &handler->conf->network; - struct lxc_list *iterator; - struct lxc_netdev *netdev; - bool found_none = false, found_nic = false; - - if (lxc_list_empty(network)) - return 0; - - lxc_list_for_each(iterator, network) { - - netdev = iterator->elem; - - if (netdev->type == LXC_NET_NONE) - found_none = true; - else - found_nic = true; - } - if (found_none && !found_nic) - return 1; - return 0; -} - -int lxc_create_network(struct lxc_handler *handler) -{ - struct lxc_list *network = &handler->conf->network; - struct lxc_list *iterator; - struct lxc_netdev *netdev; - int am_root = (getuid() == 0); - - if (!am_root) - return 0; - - lxc_log_configured_netdevs(handler->conf); - - lxc_list_for_each(iterator, network) { - - netdev = iterator->elem; - - if (netdev_conf[netdev->type](handler, netdev)) { - ERROR("failed to create netdev"); - return -1; - } - - } - - return 0; -} - -bool lxc_delete_network(struct lxc_handler *handler) -{ - int ret; - struct lxc_list *iterator; - struct lxc_list *network = &handler->conf->network; - bool deleted_all = true; - - lxc_list_for_each(iterator, network) { - char *hostveth = NULL; - struct lxc_netdev *netdev = iterator->elem; - - /* We can only delete devices whose ifindex we have. If we don't - * have the index it means that we didn't create it. - */ - if (!netdev->ifindex) - continue; - - if (netdev->type == LXC_NET_PHYS) { - ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link); - if (ret < 0) - WARN("Failed to rename interface with index %d " - "to its initial name \"%s\"", - netdev->ifindex, netdev->link); - else - TRACE("Renamed interface with index %d to its " - "initial name \"%s\"", - netdev->ifindex, netdev->link); - continue; - } - - ret = netdev_deconf[netdev->type](handler, netdev); - if (ret < 0) - WARN("Failed to deconfigure network device"); - - /* Recent kernels remove the virtual interfaces when the network - * namespace is destroyed but in case we did not move the - * interface to the network namespace, we have to destroy it. - */ - if (!am_unpriv()) { - ret = lxc_netdev_delete_by_index(netdev->ifindex); - if (-ret == ENODEV) { - INFO("Interface \"%s\" with index %d already " - "deleted or existing in different network " - "namespace", - netdev->name ? netdev->name : "(null)", - netdev->ifindex); - } else if (ret < 0) { - deleted_all = false; - WARN("Failed to remove interface \"%s\" with " - "index %d: %s", - netdev->name ? netdev->name : "(null)", - netdev->ifindex, strerror(-ret)); - continue; - } - INFO("Removed interface \"%s\" with index %d", - netdev->name ? netdev->name : "(null)", - netdev->ifindex); - } - - if (netdev->type != LXC_NET_VETH) - continue; - - if (am_unpriv()) { - if (is_ovs_bridge(netdev->link)) { - ret = lxc_unpriv_delete_nic(handler->lxcpath, - handler->name, - netdev, getpid()); - if (ret < 0) - WARN("Failed to remove port \"%s\" " - "from openvswitch bridge \"%s\"", - netdev->priv.veth_attr.pair, - netdev->link); - } - - continue; - } - - /* Explicitly delete host veth device to prevent lingering - * devices. We had issues in LXD around this. - */ - if (netdev->priv.veth_attr.pair) - hostveth = netdev->priv.veth_attr.pair; - else - hostveth = netdev->priv.veth_attr.veth1; - if (*hostveth == '\0') - continue; - - ret = lxc_netdev_delete_by_name(hostveth); - if (ret < 0) { - deleted_all = false; - WARN("Failed to remove interface \"%s\" from \"%s\": %s", - hostveth, netdev->link, strerror(-ret)); - continue; - } - INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link); - - if (!is_ovs_bridge(netdev->link)) { - netdev->priv.veth_attr.veth1[0] = '\0'; - continue; - } - - /* Delete the openvswitch port. */ - ret = lxc_ovs_delete_port(netdev->link, hostveth); - if (ret < 0) - WARN("Failed to remove port \"%s\" from openvswitch " - "bridge \"%s\"", hostveth, netdev->link); - else - INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", - hostveth, netdev->link); - - netdev->priv.veth_attr.veth1[0] = '\0'; - } - - return deleted_all; -} - -#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic" -static int unpriv_assign_nic(const char *lxcpath, char *lxcname, - struct lxc_netdev *netdev, pid_t pid) -{ - int ret; - pid_t child; - int bytes, pipefd[2]; - char *token, *saveptr = NULL; - char netdev_link[IFNAMSIZ + 1]; - char buffer[MAXPATHLEN] = {0}; - - if (netdev->type != LXC_NET_VETH) { - ERROR("nic type %d not support for unprivileged use", - netdev->type); - return -1; - } - - if (pipe(pipefd) < 0) { - SYSERROR("pipe failed"); - return -1; - } - - child = fork(); - if (child < 0) { - SYSERROR("fork"); - close(pipefd[0]); - close(pipefd[1]); - return -1; - } - - if (child == 0) { // child - /* Call lxc-user-nic pid type bridge. */ - int ret; - char pidstr[LXC_NUMSTRLEN64]; - - close(pipefd[0]); /* Close the read-end of the pipe. */ - - /* Redirect stdout to write-end of the pipe. */ - ret = dup2(pipefd[1], STDOUT_FILENO); - if (ret >= 0) - ret = dup2(pipefd[1], STDERR_FILENO); - close(pipefd[1]); /* Close the write-end of the pipe. */ - if (ret < 0) { - SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor."); - exit(EXIT_FAILURE); - } - - if (netdev->link) - strncpy(netdev_link, netdev->link, IFNAMSIZ); - else - strncpy(netdev_link, "none", IFNAMSIZ); - - ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); - if (ret < 0 || ret >= LXC_NUMSTRLEN64) - exit(EXIT_FAILURE); - pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; - - INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath, - lxcname, pidstr, netdev_link, - netdev->name ? netdev->name : "(null)"); - if (netdev->name) - execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", - lxcpath, lxcname, pidstr, "veth", netdev_link, - netdev->name, (char *)NULL); - else - execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", - lxcpath, lxcname, pidstr, "veth", netdev_link, - (char *)NULL); - SYSERROR("Failed to exec lxc-user-nic."); - exit(EXIT_FAILURE); - } - - /* close the write-end of the pipe */ - close(pipefd[1]); - - bytes = read(pipefd[0], &buffer, MAXPATHLEN); - if (bytes < 0) { - SYSERROR("Failed to read from pipe file descriptor."); - close(pipefd[0]); - return -1; - } - buffer[bytes - 1] = '\0'; - - if (wait_for_pid(child) != 0) { - ERROR("lxc-user-nic failed to configure requested network: %s", - buffer[0] != '\0' ? buffer : "(null)"); - close(pipefd[0]); - return -1; - } - TRACE("Received output \"%s\" from lxc-user-nic", buffer); - - /* close the read-end of the pipe */ - close(pipefd[0]); - - /* fill netdev->name field */ - token = strtok_r(buffer, ":", &saveptr); - if (!token) - return -1; - - netdev->name = malloc(IFNAMSIZ + 1); - if (!netdev->name) { - SYSERROR("Failed to allocate memory."); - return -1; - } - memset(netdev->name, 0, IFNAMSIZ + 1); - strncpy(netdev->name, token, IFNAMSIZ); - - /* fill netdev->veth_attr.pair field */ - token = strtok_r(NULL, ":", &saveptr); - if (!token) - return -1; - - netdev->priv.veth_attr.pair = strdup(token); - if (!netdev->priv.veth_attr.pair) { - ERROR("Failed to allocate memory."); - return -1; - } - - /* fill netdev->veth_attr.pair field */ - token = strtok_r(NULL, ":", &saveptr); - if (!token) - return -1; - - ret = lxc_safe_int(token, &netdev->ifindex); - if (ret < 0) { - ERROR("Failed to parse ifindex for network device \"%s\"", netdev->name); - return -1; - } - - return 0; -} - -int lxc_assign_network(const char *lxcpath, char *lxcname, - struct lxc_list *network, pid_t pid) -{ - struct lxc_list *iterator; - struct lxc_netdev *netdev; - char ifname[IFNAMSIZ]; - int am_root = (getuid() == 0); - int err; - - lxc_list_for_each(iterator, network) { - - netdev = iterator->elem; - - if (netdev->type == LXC_NET_VETH && !am_root) { - if (netdev->mtu) - INFO("mtu ignored due to insufficient privilege"); - if (unpriv_assign_nic(lxcpath, lxcname, netdev, pid)) - return -1; - // lxc-user-nic has moved the nic to the new ns. - // unpriv_assign_nic() fills in netdev->name. - // netdev->ifindex will be filed in at setup_netdev. - continue; - } - - /* empty network namespace, nothing to move */ - if (!netdev->ifindex) - continue; - - /* retrieve the name of the interface */ - if (!if_indextoname(netdev->ifindex, ifname)) { - ERROR("no interface corresponding to index '%d'", netdev->ifindex); - return -1; - } - - err = lxc_netdev_move_by_name(ifname, pid, NULL); - if (err) { - ERROR("failed to move '%s' to the container : %s", - netdev->link, strerror(-err)); - return -1; - } - - DEBUG("move '%s'/'%s' to '%d': .", ifname, netdev->name, pid); - } - - return 0; -} - static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size) { @@ -3628,54 +2586,6 @@ again: return freeid; } -int lxc_find_gateway_addresses(struct lxc_handler *handler) -{ - struct lxc_list *network = &handler->conf->network; - struct lxc_list *iterator; - struct lxc_netdev *netdev; - int link_index; - - lxc_list_for_each(iterator, network) { - netdev = iterator->elem; - - if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto) - continue; - - if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) { - ERROR("gateway = auto only supported for " - "veth and macvlan"); - return -1; - } - - if (!netdev->link) { - ERROR("gateway = auto needs a link interface"); - return -1; - } - - link_index = if_nametoindex(netdev->link); - if (!link_index) - return -EINVAL; - - if (netdev->ipv4_gateway_auto) { - if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) { - ERROR("failed to automatically find ipv4 gateway " - "address from link interface '%s'", netdev->link); - return -1; - } - } - - if (netdev->ipv6_gateway_auto) { - if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) { - ERROR("failed to automatically find ipv6 gateway " - "address from link interface '%s'", netdev->link); - return -1; - } - } - } - - return 0; -} - int lxc_create_tty(const char *name, struct lxc_conf *conf) { struct lxc_tty_info *tty_info = &conf->tty_info; @@ -4153,7 +3063,7 @@ int lxc_setup(struct lxc_handler *handler) } } - if (setup_network(&lxc_conf->network)) { + if (lxc_setup_network_in_child_namespaces(lxc_conf, &lxc_conf->network)) { ERROR("failed to setup the network for '%s'", name); return -1; } @@ -4509,7 +3419,6 @@ int lxc_clear_environment(struct lxc_conf *c) return 0; } - int lxc_clear_mount_entries(struct lxc_conf *c) { struct lxc_list *it,*next; @@ -5113,89 +4022,3 @@ struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings) return result; } - -int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, - struct lxc_netdev *netdev, pid_t pid) -{ - pid_t child; - int bytes, pipefd[2]; - char buffer[MAXPATHLEN] = {0}; - - if (netdev->type != LXC_NET_VETH) { - ERROR("nic type %d not support for unprivileged use", - netdev->type); - return -1; - } - - if (pipe(pipefd) < 0) { - SYSERROR("pipe failed"); - return -1; - } - - child = fork(); - if (child < 0) { - SYSERROR("fork"); - close(pipefd[0]); - close(pipefd[1]); - return -1; - } - - if (child == 0) { /* child */ - /* Call lxc-user-nic pid type bridge. */ - int ret; - char pidstr[LXC_NUMSTRLEN64]; - - close(pipefd[0]); /* Close the read-end of the pipe. */ - - /* Redirect stdout to write-end of the pipe. */ - ret = dup2(pipefd[1], STDOUT_FILENO); - if (ret >= 0) - ret = dup2(pipefd[1], STDERR_FILENO); - close(pipefd[1]); /* Close the write-end of the pipe. */ - if (ret < 0) { - SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor."); - exit(EXIT_FAILURE); - } - - if (!netdev->link) - SYSERROR("Network link for network device \"%s\" is " - "missing", netdev->priv.veth_attr.pair); - - ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); - if (ret < 0 || ret >= LXC_NUMSTRLEN64) - exit(EXIT_FAILURE); - pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; - - INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath, - lxcname, pidstr, netdev->link, netdev->priv.veth_attr.pair); - execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath, - lxcname, pidstr, "veth", netdev->link, - netdev->priv.veth_attr.pair, (char *)NULL); - SYSERROR("Failed to exec lxc-user-nic."); - exit(EXIT_FAILURE); - } - - /* close the write-end of the pipe */ - close(pipefd[1]); - - bytes = read(pipefd[0], &buffer, MAXPATHLEN); - if (bytes < 0) { - SYSERROR("Failed to read from pipe file descriptor."); - close(pipefd[0]); - return -1; - } - buffer[bytes - 1] = '\0'; - - if (wait_for_pid(child) != 0) { - ERROR("lxc-user-nic failed to delete requested network: %s", - buffer[0] != '\0' ? buffer : "(null)"); - close(pipefd[0]); - return -1; - } - TRACE("Received output \"%s\" from lxc-user-nic", buffer); - - /* close the read-end of the pipe */ - close(pipefd[0]); - - return 0; -} diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 813cc49b4..43e836fa6 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -43,103 +43,6 @@ typedef void * scmp_filter_ctx; #define subuidfile "/etc/subuid" #define subgidfile "/etc/subgid" -enum { - LXC_NET_EMPTY, - LXC_NET_VETH, - LXC_NET_MACVLAN, - LXC_NET_PHYS, - LXC_NET_VLAN, - LXC_NET_NONE, - LXC_NET_MAXCONFTYPE, -}; - -/* - * Defines the structure to configure an ipv4 address - * @address : ipv4 address - * @broadcast : ipv4 broadcast address - * @mask : network mask - */ -struct lxc_inetdev { - struct in_addr addr; - struct in_addr bcast; - unsigned int prefix; -}; - -struct lxc_route { - struct in_addr addr; -}; - -/* - * Defines the structure to configure an ipv6 address - * @flags : set the address up - * @address : ipv6 address - * @broadcast : ipv6 broadcast address - * @mask : network mask - */ -struct lxc_inet6dev { - struct in6_addr addr; - struct in6_addr mcast; - struct in6_addr acast; - unsigned int prefix; -}; - -struct lxc_route6 { - struct in6_addr addr; -}; - -struct ifla_veth { - char *pair; /* pair name */ - char veth1[IFNAMSIZ]; /* needed for deconf */ -}; - -struct ifla_vlan { - unsigned int flags; - unsigned int fmask; - unsigned short vid; - unsigned short pad; -}; - -struct ifla_macvlan { - int mode; /* private, vepa, bridge, passthru */ -}; - -union netdev_p { - struct ifla_veth veth_attr; - struct ifla_vlan vlan_attr; - struct ifla_macvlan macvlan_attr; -}; - -/* - * Defines a structure to configure a network device - * @link : lxc.network.link, name of bridge or host iface to attach if any - * @name : lxc.network.name, name of iface on the container side - * @flags : flag of the network device (IFF_UP, ... ) - * @ipv4 : a list of ipv4 addresses to be set on the network device - * @ipv6 : a list of ipv6 addresses to be set on the network device - * @upscript : a script filename to be executed during interface configuration - * @downscript : a script filename to be executed during interface destruction - * @idx : network counter - */ -struct lxc_netdev { - ssize_t idx; - int type; - int flags; - int ifindex; - char *link; - char *name; - char *hwaddr; - char *mtu; - union netdev_p priv; - struct lxc_list ipv4; - struct lxc_list ipv6; - struct in_addr *ipv4_gateway; - bool ipv4_gateway_auto; - struct in6_addr *ipv6_gateway; - bool ipv6_gateway_auto; - char *upscript; - char *downscript; -}; - /* * Defines a generic struct to configure the control group. * It is up to the programmer to specify the right subsystem. @@ -282,16 +185,20 @@ enum { * @lsm_se_context : selinux type to switch to or NULL */ enum lxchooks { - LXCHOOK_PRESTART, LXCHOOK_PREMOUNT, LXCHOOK_MOUNT, LXCHOOK_AUTODEV, - LXCHOOK_START, LXCHOOK_STOP, LXCHOOK_POSTSTOP, LXCHOOK_CLONE, LXCHOOK_DESTROY, - NUM_LXC_HOOKS}; -extern char *lxchook_names[NUM_LXC_HOOKS]; - -struct saved_nic { - int ifindex; - char *orig_name; + LXCHOOK_PRESTART, + LXCHOOK_PREMOUNT, + LXCHOOK_MOUNT, + LXCHOOK_AUTODEV, + LXCHOOK_START, + LXCHOOK_STOP, + LXCHOOK_POSTSTOP, + LXCHOOK_CLONE, + LXCHOOK_DESTROY, + NUM_LXC_HOOKS }; +extern char *lxchook_names[NUM_LXC_HOOKS]; + struct lxc_conf { int is_execute; char *fstab; @@ -400,15 +307,7 @@ extern struct lxc_conf *lxc_conf_init(void); extern void lxc_conf_free(struct lxc_conf *conf); extern int pin_rootfs(const char *rootfs); - -extern int lxc_requests_empty_network(struct lxc_handler *handler); -extern int lxc_create_network(struct lxc_handler *handler); -extern bool lxc_delete_network(struct lxc_handler *handler); -extern int lxc_assign_network(const char *lxcpath, char *lxcname, - struct lxc_list *networks, pid_t pid); extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid); -extern int lxc_find_gateway_addresses(struct lxc_handler *handler); - extern int lxc_create_tty(const char *name, struct lxc_conf *conf); extern void lxc_delete_tty(struct lxc_tty_info *tty_info); @@ -435,9 +334,6 @@ extern int do_rootfs_setup(struct lxc_conf *conf, const char *name, struct cgroup_process_info; extern int lxc_setup(struct lxc_handler *handler); - -extern void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf); - extern int find_unmapped_nsid(struct lxc_conf *conf, enum idtype idtype); extern int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype); extern int chown_mapped_root(char *path, struct lxc_conf *conf); @@ -453,7 +349,7 @@ extern FILE *make_anonymous_mount_file(struct lxc_list *mount); extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings); extern unsigned long add_required_remount_flags(const char *s, const char *d, unsigned long flags); -extern int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, - struct lxc_netdev *netdev, pid_t pid); +extern int run_script(const char *name, const char *section, const char *script, + ...); #endif /* __LXC_CONF_H */ diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c index 25c42810a..3e8334a0d 100644 --- a/src/lxc/confile_utils.c +++ b/src/lxc/confile_utils.c @@ -29,6 +29,8 @@ #include "error.h" #include "log.h" #include "list.h" +#include "network.h" +#include "parse.h" #include "utils.h" lxc_log_define(lxc_confile_utils, lxc); diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index 0237a9268..f55cc6d77 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -17,7 +17,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#define _GNU_SOURCE /* See feature_test_macros(7) */ +#define _GNU_SOURCE #include #include #include diff --git a/src/lxc/network.c b/src/lxc/network.c index 6685792b6..bbb586ca3 100644 --- a/src/lxc/network.c +++ b/src/lxc/network.c @@ -21,8 +21,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "config.h" - +#define _GNU_SOURCE #include #include #include @@ -47,6 +46,8 @@ #include #include "conf.h" +#include "config.h" +#include "confile_utils.h" #include "log.h" #include "network.h" #include "nl.h" @@ -92,7 +93,356 @@ lxc_log_define(lxc_network, lxc); -int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char* ifname) +typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *); + +static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int bridge_index, err; + char *veth1, *veth2; + char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ]; + unsigned int mtu = 0; + + if (netdev->priv.veth_attr.pair) { + veth1 = netdev->priv.veth_attr.pair; + if (handler->conf->reboot) + lxc_netdev_delete_by_name(veth1); + } else { + err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX"); + if (err < 0 || (size_t)err >= sizeof(veth1buf)) + return -1; + + veth1 = lxc_mkifname(veth1buf); + if (!veth1) + return -1; + + /* store away for deconf */ + memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ); + } + + snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX"); + veth2 = lxc_mkifname(veth2buf); + if (!veth2) + goto out_delete; + + err = lxc_veth_create(veth1, veth2); + if (err) { + ERROR("Failed to create veth pair \"%s\" and \"%s\": %s", veth1, + veth2, strerror(-err)); + goto out_delete; + } + + /* changing the high byte of the mac address to 0xfe, the bridge interface + * will always keep the host's mac address and not take the mac address + * of a container */ + err = setup_private_host_hw_addr(veth1); + if (err) { + ERROR("Failed to change mac address of host interface \"%s\": %s", + veth1, strerror(-err)); + goto out_delete; + } + + netdev->ifindex = if_nametoindex(veth2); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", veth2); + goto out_delete; + } + + if (netdev->mtu) { + if (lxc_safe_uint(netdev->mtu, &mtu) < 0) + WARN("Failed to parse mtu"); + else + INFO("Retrieved mtu %d", mtu); + } else if (netdev->link) { + bridge_index = if_nametoindex(netdev->link); + if (bridge_index) { + mtu = netdev_get_mtu(bridge_index); + INFO("Retrieved mtu %d from %s", mtu, netdev->link); + } else { + mtu = netdev_get_mtu(netdev->ifindex); + INFO("Retrieved mtu %d from %s", mtu, veth2); + } + } + + if (mtu) { + err = lxc_netdev_set_mtu(veth1, mtu); + if (!err) + err = lxc_netdev_set_mtu(veth2, mtu); + if (err) { + ERROR("Failed to set mtu \"%d\" for veth pair \"%s\" " + "and \"%s\": %s", + mtu, veth1, veth2, strerror(-err)); + goto out_delete; + } + } + + if (netdev->link) { + err = lxc_bridge_attach(netdev->link, veth1); + if (err) { + ERROR("Failed to attach \"%s\" to bridge \"%s\": %s", + veth1, netdev->link, strerror(-err)); + goto out_delete; + } + INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link); + } + + err = lxc_netdev_up(veth1); + if (err) { + ERROR("Failed to set \"%s\" up: %s", veth1, strerror(-err)); + goto out_delete; + } + + if (netdev->upscript) { + err = run_script(handler->name, "net", netdev->upscript, "up", + "veth", veth1, (char*) NULL); + if (err) + goto out_delete; + } + + DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2, + netdev->ifindex); + + return 0; + +out_delete: + if (netdev->ifindex != 0) + lxc_netdev_delete_by_name(veth1); + if (!netdev->priv.veth_attr.pair) + free(veth1); + free(veth2); + return -1; +} + +static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + char peerbuf[IFNAMSIZ], *peer; + int err; + + if (!netdev->link) { + ERROR("No link for macvlan network device specified"); + return -1; + } + + err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX"); + if (err < 0 || (size_t)err >= sizeof(peerbuf)) + return -1; + + peer = lxc_mkifname(peerbuf); + if (!peer) + return -1; + + err = lxc_macvlan_create(netdev->link, peer, + netdev->priv.macvlan_attr.mode); + if (err) { + ERROR("Failed to create macvlan interface \"%s\" on \"%s\": %s", + peer, netdev->link, strerror(-err)); + goto out; + } + + netdev->ifindex = if_nametoindex(peer); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", peer); + goto out; + } + + if (netdev->upscript) { + err = run_script(handler->name, "net", netdev->upscript, "up", + "macvlan", netdev->link, (char*) NULL); + if (err) + goto out; + } + + DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d", + peer, netdev->ifindex, netdev->priv.macvlan_attr.mode); + + return 0; +out: + lxc_netdev_delete_by_name(peer); + free(peer); + return -1; +} + +static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + char peer[IFNAMSIZ]; + int err; + static uint16_t vlan_cntr = 0; + unsigned int mtu = 0; + + if (!netdev->link) { + ERROR("No link for vlan network device specified"); + return -1; + } + + err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++); + if (err < 0 || (size_t)err >= sizeof(peer)) + return -1; + + err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid); + if (err) { + ERROR("Failed to create vlan interface \"%s\" on \"%s\": %s", + peer, netdev->link, strerror(-err)); + return -1; + } + + netdev->ifindex = if_nametoindex(peer); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", peer); + lxc_netdev_delete_by_name(peer); + return -1; + } + + DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\" (vlan1000)", + peer, netdev->ifindex); + if (netdev->mtu) { + if (lxc_safe_uint(netdev->mtu, &mtu) < 0) { + ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".", + netdev->ifindex, + netdev->name ? netdev->name : "(null)"); + return -1; + } + err = lxc_netdev_set_mtu(peer, mtu); + if (err) { + ERROR("Failed to set mtu \"%s\" for \"%s\": %s", + netdev->mtu, peer, strerror(-err)); + lxc_netdev_delete_by_name(peer); + return -1; + } + } + + return 0; +} + +static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + if (!netdev->link) { + ERROR("No link for physical interface specified"); + return -1; + } + + netdev->ifindex = if_nametoindex(netdev->link); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link); + return -1; + } + + if (netdev->upscript) { + int err; + err = run_script(handler->name, "net", netdev->upscript, + "up", "phys", netdev->link, (char*) NULL); + if (err) + return -1; + } + + return 0; +} + +static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + netdev->ifindex = 0; + if (netdev->upscript) { + int err; + err = run_script(handler->name, "net", netdev->upscript, + "up", "empty", (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + netdev->ifindex = 0; + return 0; +} + +static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = { + [LXC_NET_VETH] = instantiate_veth, + [LXC_NET_MACVLAN] = instantiate_macvlan, + [LXC_NET_VLAN] = instantiate_vlan, + [LXC_NET_PHYS] = instantiate_phys, + [LXC_NET_EMPTY] = instantiate_empty, + [LXC_NET_NONE] = instantiate_none, +}; + +static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + char *veth1; + int err; + + if (netdev->priv.veth_attr.pair) + veth1 = netdev->priv.veth_attr.pair; + else + veth1 = netdev->priv.veth_attr.veth1; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "veth", veth1, (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int err; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "macvlan", netdev->link, + (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + return 0; +} + +static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int err; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "phys", netdev->link, (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int err; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "empty", (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + return 0; +} + +static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = { + [LXC_NET_VETH] = shutdown_veth, + [LXC_NET_MACVLAN] = shutdown_macvlan, + [LXC_NET_VLAN] = shutdown_vlan, + [LXC_NET_PHYS] = shutdown_phys, + [LXC_NET_EMPTY] = shutdown_empty, + [LXC_NET_NONE] = shutdown_none, +}; + +int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname) { struct nl_handler nlh; struct nlmsg *nlmsg = NULL; @@ -1519,7 +1869,7 @@ const char *lxc_net_type_to_str(int type) static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -char *lxc_mkifname(char *template) +char *lxc_mkifname(const char *template) { char *name = NULL; size_t i = 0; @@ -1605,3 +1955,842 @@ int setup_private_host_hw_addr(char *veth1) return 0; } + +int lxc_find_gateway_addresses(struct lxc_handler *handler) +{ + struct lxc_list *network = &handler->conf->network; + struct lxc_list *iterator; + struct lxc_netdev *netdev; + int link_index; + + lxc_list_for_each(iterator, network) { + netdev = iterator->elem; + + if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto) + continue; + + if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) { + ERROR("Automatic gateway detection is only supported " + "for veth and macvlan"); + return -1; + } + + if (!netdev->link) { + ERROR("Automatic gateway detection needs a link interface"); + return -1; + } + + link_index = if_nametoindex(netdev->link); + if (!link_index) + return -EINVAL; + + if (netdev->ipv4_gateway_auto) { + if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) { + ERROR("Failed to automatically find ipv4 gateway " + "address from link interface \"%s\"", netdev->link); + return -1; + } + } + + if (netdev->ipv6_gateway_auto) { + if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) { + ERROR("Failed to automatically find ipv6 gateway " + "address from link interface \"%s\"", netdev->link); + return -1; + } + } + } + + return 0; +} + +#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic" +static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname, + struct lxc_netdev *netdev, pid_t pid) +{ + int ret; + pid_t child; + int bytes, pipefd[2]; + char *token, *saveptr = NULL; + char netdev_link[IFNAMSIZ + 1]; + char buffer[MAXPATHLEN] = {0}; + + if (netdev->type != LXC_NET_VETH) { + ERROR("Network type %d not support for unprivileged use", netdev->type); + return -1; + } + + ret = pipe(pipefd); + if (ret < 0) { + SYSERROR("Failed to create pipe"); + return -1; + } + + child = fork(); + if (child < 0) { + SYSERROR("Failed to create new process"); + close(pipefd[0]); + close(pipefd[1]); + return -1; + } + + if (child == 0) { + int ret; + char pidstr[LXC_NUMSTRLEN64]; + + close(pipefd[0]); + + ret = dup2(pipefd[1], STDOUT_FILENO); + if (ret >= 0) + ret = dup2(pipefd[1], STDERR_FILENO); + close(pipefd[1]); + if (ret < 0) { + SYSERROR("Failed to duplicate std{err,out} file descriptor"); + exit(EXIT_FAILURE); + } + + if (netdev->link) + strncpy(netdev_link, netdev->link, IFNAMSIZ); + else + strncpy(netdev_link, "none", IFNAMSIZ); + + ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); + if (ret < 0 || ret >= LXC_NUMSTRLEN64) + exit(EXIT_FAILURE); + pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; + + INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath, + lxcname, pidstr, netdev_link, + netdev->name ? netdev->name : "(null)"); + if (netdev->name) + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", + lxcpath, lxcname, pidstr, "veth", netdev_link, + netdev->name, (char *)NULL); + else + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", + lxcpath, lxcname, pidstr, "veth", netdev_link, + (char *)NULL); + SYSERROR("Failed to execute lxc-user-nic"); + exit(EXIT_FAILURE); + } + + /* close the write-end of the pipe */ + close(pipefd[1]); + + bytes = read(pipefd[0], &buffer, MAXPATHLEN); + if (bytes < 0) { + SYSERROR("Failed to read from pipe file descriptor."); + close(pipefd[0]); + return -1; + } + buffer[bytes - 1] = '\0'; + + ret = wait_for_pid(child); + close(pipefd[0]); + if (ret != 0) { + ERROR("lxc-user-nic failed to configure requested network: %s", + buffer[0] != '\0' ? buffer : "(null)"); + return -1; + } + TRACE("Received output \"%s\" from lxc-user-nic", buffer); + + /* netdev->name */ + token = strtok_r(buffer, ":", &saveptr); + if (!token) + return -1; + + netdev->name = malloc(IFNAMSIZ + 1); + if (!netdev->name) { + SYSERROR("Failed to allocate memory."); + return -1; + } + memset(netdev->name, 0, IFNAMSIZ + 1); + strncpy(netdev->name, token, IFNAMSIZ); + + /* netdev->priv.veth_attr.pair */ + token = strtok_r(NULL, ":", &saveptr); + if (!token) + return -1; + + netdev->priv.veth_attr.pair = strdup(token); + if (!netdev->priv.veth_attr.pair) { + ERROR("Failed to allocate memory."); + return -1; + } + + /* netdev->ifindex */ + token = strtok_r(NULL, ":", &saveptr); + if (!token) + return -1; + + ret = lxc_safe_int(token, &netdev->ifindex); + if (ret < 0) { + ERROR("Failed to parse ifindex for network device \"%s\"", netdev->name); + return -1; + } + + return 0; +} + +static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, + struct lxc_netdev *netdev, pid_t pid) +{ + int bytes, ret; + pid_t child; + int pipefd[2]; + char buffer[MAXPATHLEN] = {0}; + + if (netdev->type != LXC_NET_VETH) { + ERROR("Network type %d not support for unprivileged use", netdev->type); + return -1; + } + + ret = pipe(pipefd); + if (ret < 0) { + SYSERROR("Failed to create pipe"); + return -1; + } + + child = fork(); + if (child < 0) { + SYSERROR("Failed to create new process"); + close(pipefd[0]); + close(pipefd[1]); + return -1; + } + + if (child == 0) { + int ret; + char pidstr[LXC_NUMSTRLEN64]; + + close(pipefd[0]); + + ret = dup2(pipefd[1], STDOUT_FILENO); + if (ret >= 0) + ret = dup2(pipefd[1], STDERR_FILENO); + close(pipefd[1]); + if (ret < 0) { + SYSERROR("Failed to duplicate std{err,out} file descriptor"); + exit(EXIT_FAILURE); + } + + if (!netdev->link) + SYSERROR("Network link for network device \"%s\" is " + "missing", netdev->priv.veth_attr.pair); + + ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); + if (ret < 0 || ret >= LXC_NUMSTRLEN64) + exit(EXIT_FAILURE); + pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; + + INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath, + lxcname, pidstr, netdev->link, netdev->priv.veth_attr.pair); + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath, + lxcname, pidstr, "veth", netdev->link, + netdev->priv.veth_attr.pair, (char *)NULL); + SYSERROR("Failed to exec lxc-user-nic."); + exit(EXIT_FAILURE); + } + + close(pipefd[1]); + + bytes = read(pipefd[0], &buffer, MAXPATHLEN); + if (bytes < 0) { + SYSERROR("Failed to read from pipe file descriptor."); + close(pipefd[0]); + return -1; + } + buffer[bytes - 1] = '\0'; + + if (wait_for_pid(child) != 0) { + ERROR("lxc-user-nic failed to delete requested network: %s", + buffer[0] != '\0' ? buffer : "(null)"); + close(pipefd[0]); + return -1; + } + + close(pipefd[0]); + + return 0; +} + +int lxc_create_network_priv(struct lxc_handler *handler) +{ + bool am_root; + struct lxc_list *iterator; + struct lxc_list *network = &handler->conf->network; + + /* We need to be root. */ + am_root = (getuid() == 0); + if (!am_root) + return 0; + + lxc_list_for_each(iterator, network) { + struct lxc_netdev *netdev = iterator->elem; + + if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) { + ERROR("Invalid network configuration type %d", netdev->type); + return -1; + } + + if (netdev_conf[netdev->type](handler, netdev)) { + ERROR("Failed to create network device"); + return -1; + } + + } + + return 0; +} + +int lxc_create_network(const char *lxcpath, char *lxcname, + struct lxc_list *network, pid_t pid) +{ + int err; + bool am_root; + char ifname[IFNAMSIZ]; + struct lxc_list *iterator; + + am_root = (getuid() == 0); + + lxc_list_for_each(iterator, network) { + struct lxc_netdev *netdev = iterator->elem; + + if (netdev->type == LXC_NET_VETH && !am_root) { + if (netdev->mtu) + INFO("mtu ignored due to insufficient privilege"); + if (lxc_create_network_unpriv(lxcpath, lxcname, netdev, pid)) + return -1; + /* lxc-user-nic has moved the nic to the new ns. + * unpriv_assign_nic() fills in netdev->name. + * netdev->ifindex will be filled in at + * lxc_setup_netdev_in_child_namespaces(). + */ + continue; + } + + /* empty network namespace, nothing to move */ + if (!netdev->ifindex) + continue; + + /* retrieve the name of the interface */ + if (!if_indextoname(netdev->ifindex, ifname)) { + ERROR("No interface corresponding to ifindex \"%d\"", + netdev->ifindex); + return -1; + } + + err = lxc_netdev_move_by_name(ifname, pid, NULL); + if (err) { + ERROR("Failed to move network device \"%s\" to " + "network namespace %d: %s", ifname, pid, + strerror(-err)); + return -1; + } + + DEBUG("Moved network device \"%s\"/\"%s\" to network namespace " + "of %d:", ifname, netdev->name ? netdev->name : "(null)", + pid); + } + + return 0; +} + +bool lxc_delete_network(struct lxc_handler *handler) +{ + int ret; + struct lxc_list *iterator; + struct lxc_list *network = &handler->conf->network; + bool deleted_all = true; + + lxc_list_for_each(iterator, network) { + char *hostveth = NULL; + struct lxc_netdev *netdev = iterator->elem; + + /* We can only delete devices whose ifindex we have. If we don't + * have the index it means that we didn't create it. + */ + if (!netdev->ifindex) + continue; + + if (netdev->type == LXC_NET_PHYS) { + ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link); + if (ret < 0) + WARN("Failed to rename interface with index %d " + "to its initial name \"%s\"", + netdev->ifindex, netdev->link); + else + TRACE("Renamed interface with index %d to its " + "initial name \"%s\"", + netdev->ifindex, netdev->link); + continue; + } + + ret = netdev_deconf[netdev->type](handler, netdev); + if (ret < 0) + WARN("Failed to deconfigure network device"); + + /* Recent kernels remove the virtual interfaces when the network + * namespace is destroyed but in case we did not move the + * interface to the network namespace, we have to destroy it. + */ + if (!am_unpriv()) { + ret = lxc_netdev_delete_by_index(netdev->ifindex); + if (-ret == ENODEV) { + INFO("Interface \"%s\" with index %d already " + "deleted or existing in different network " + "namespace", + netdev->name ? netdev->name : "(null)", + netdev->ifindex); + } else if (ret < 0) { + deleted_all = false; + WARN("Failed to remove interface \"%s\" with " + "index %d: %s", + netdev->name ? netdev->name : "(null)", + netdev->ifindex, strerror(-ret)); + continue; + } + INFO("Removed interface \"%s\" with index %d", + netdev->name ? netdev->name : "(null)", + netdev->ifindex); + } + + if (netdev->type != LXC_NET_VETH) + continue; + + if (am_unpriv()) { + if (is_ovs_bridge(netdev->link)) { + ret = lxc_delete_network_unpriv(handler->lxcpath, + handler->name, + netdev, getpid()); + if (ret < 0) + WARN("Failed to remove port \"%s\" " + "from openvswitch bridge \"%s\"", + netdev->priv.veth_attr.pair, + netdev->link); + } + + continue; + } + + /* Explicitly delete host veth device to prevent lingering + * devices. We had issues in LXD around this. + */ + if (netdev->priv.veth_attr.pair) + hostveth = netdev->priv.veth_attr.pair; + else + hostveth = netdev->priv.veth_attr.veth1; + if (*hostveth == '\0') + continue; + + ret = lxc_netdev_delete_by_name(hostveth); + if (ret < 0) { + deleted_all = false; + WARN("Failed to remove interface \"%s\" from \"%s\": %s", + hostveth, netdev->link, strerror(-ret)); + continue; + } + INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link); + + if (!is_ovs_bridge(netdev->link)) { + netdev->priv.veth_attr.veth1[0] = '\0'; + continue; + } + + /* Delete the openvswitch port. */ + ret = lxc_ovs_delete_port(netdev->link, hostveth); + if (ret < 0) + WARN("Failed to remove port \"%s\" from openvswitch " + "bridge \"%s\"", hostveth, netdev->link); + else + INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", + hostveth, netdev->link); + + netdev->priv.veth_attr.veth1[0] = '\0'; + } + + return deleted_all; +} + +int lxc_requests_empty_network(struct lxc_handler *handler) +{ + struct lxc_list *network = &handler->conf->network; + struct lxc_list *iterator; + bool found_none = false, found_nic = false; + + if (lxc_list_empty(network)) + return 0; + + lxc_list_for_each(iterator, network) { + struct lxc_netdev *netdev = iterator->elem; + + if (netdev->type == LXC_NET_NONE) + found_none = true; + else + found_nic = true; + } + if (found_none && !found_nic) + return 1; + return 0; +} + +/* try to move physical nics to the init netns */ +void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf) +{ + int ret; + int i, oldfd; + char ifname[IFNAMSIZ]; + + if (netnsfd < 0 || conf->num_savednics == 0) + return; + + INFO("Trying to restore network device names in original namespace for " + "%d network devices", conf->num_savednics); + + oldfd = lxc_preserve_ns(getpid(), "net"); + if (oldfd < 0) { + SYSERROR("Failed to preserve network namespace"); + return; + } + + ret = setns(netnsfd, 0); + if (ret < 0) { + SYSERROR("Failed to enter network namespace"); + close(oldfd); + return; + } + + for (i = 0; i < conf->num_savednics; i++) { + struct saved_nic *s = &conf->saved_nics[i]; + + /* retrieve the name of the interface */ + if (!if_indextoname(s->ifindex, ifname)) { + WARN("No interface corresponding to ifindex %d", + s->ifindex); + continue; + } + if (lxc_netdev_move_by_name(ifname, 1, s->orig_name)) + WARN("Error moving network device \"%s\" back to " + "network namespace", ifname); + free(s->orig_name); + } + conf->num_savednics = 0; + + ret = setns(oldfd, 0); + if (ret < 0) + SYSERROR("Failed to enter network namespace"); + close(oldfd); +} + +static int setup_hw_addr(char *hwaddr, const char *ifname) +{ + struct sockaddr sockaddr; + struct ifreq ifr; + int ret, fd, saved_errno; + + ret = lxc_convert_mac(hwaddr, &sockaddr); + if (ret) { + ERROR("Mac address \"%s\" conversion failed: %s", hwaddr, + strerror(-ret)); + return -1; + } + + memcpy(ifr.ifr_name, ifname, IFNAMSIZ); + ifr.ifr_name[IFNAMSIZ-1] = '\0'; + memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr)); + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + ret = ioctl(fd, SIOCSIFHWADDR, &ifr); + saved_errno = errno; + close(fd); + if (ret) + ERROR("Failed to perform ioctl: %s", strerror(saved_errno)); + + DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, + ifr.ifr_name); + + return ret; +} + +static int setup_ipv4_addr(struct lxc_list *ip, int ifindex) +{ + struct lxc_list *iterator; + int err; + + lxc_list_for_each(iterator, ip) { + struct lxc_inetdev *inetdev = iterator->elem; + + err = lxc_ipv4_addr_add(ifindex, &inetdev->addr, + &inetdev->bcast, inetdev->prefix); + if (err) { + ERROR("Failed to setup ipv4 address for network device " + "with eifindex %d: %s", ifindex, strerror(-err)); + return -1; + } + } + + return 0; +} + +static int setup_ipv6_addr(struct lxc_list *ip, int ifindex) +{ + struct lxc_list *iterator; + int err; + + lxc_list_for_each(iterator, ip) { + struct lxc_inet6dev *inet6dev = iterator->elem; + + err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr, + &inet6dev->mcast, &inet6dev->acast, + inet6dev->prefix); + if (err) { + ERROR("Failed to setup ipv6 address for network device " + "with eifindex %d: %s", ifindex, strerror(-err)); + return -1; + } + } + + return 0; +} + +static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev) +{ + char ifname[IFNAMSIZ]; + int err; + const char *net_type_name; + char *current_ifname = ifname; + + /* empty network namespace */ + if (!netdev->ifindex) { + if (netdev->flags & IFF_UP) { + err = lxc_netdev_up("lo"); + if (err) { + ERROR("Failed to set the loopback network " + "device up: %s", + strerror(-err)); + return -1; + } + } + + if (netdev->type == LXC_NET_EMPTY) + return 0; + + if (netdev->type == LXC_NET_NONE) + return 0; + + if (netdev->type != LXC_NET_VETH) { + net_type_name = lxc_net_type_to_str(netdev->type); + ERROR("%s networks are not supported for containers " + "not setup up by privileged users", + net_type_name); + return -1; + } + + netdev->ifindex = if_nametoindex(netdev->name); + } + + /* get the new ifindex in case of physical netdev */ + if (netdev->type == LXC_NET_PHYS) { + netdev->ifindex = if_nametoindex(netdev->link); + if (!netdev->ifindex) { + ERROR("Failed to get ifindex for network device \"%s\"", + netdev->link); + return -1; + } + } + + /* retrieve the name of the interface */ + if (!if_indextoname(netdev->ifindex, current_ifname)) { + ERROR("Failed get name for network device with ifindex %d", + netdev->ifindex); + return -1; + } + + /* Default: let the system to choose one interface name. + * When the IFLA_IFNAME attribute is passed something like "%d" + * netlink will replace the format specifier with an appropriate index. + */ + if (!netdev->name) + netdev->name = netdev->type == LXC_NET_PHYS ? + netdev->link : "eth%d"; + + /* rename the interface name */ + if (strcmp(ifname, netdev->name) != 0) { + err = lxc_netdev_rename_by_name(ifname, netdev->name); + if (err) { + ERROR("Failed to rename network device \"%s\" to " + "\"%s\": %s", ifname, netdev->name, strerror(-err)); + return -1; + } + } + + /* Re-read the name of the interface because its name has changed + * and would be automatically allocated by the system + */ + if (!if_indextoname(netdev->ifindex, current_ifname)) { + ERROR("Failed get name for network device with ifindex %d", + netdev->ifindex); + return -1; + } + + /* set a mac address */ + if (netdev->hwaddr) { + if (setup_hw_addr(netdev->hwaddr, current_ifname)) { + ERROR("Failed to setup hw address for network device \"%s\"", + current_ifname); + return -1; + } + } + + /* setup ipv4 addresses on the interface */ + if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) { + ERROR("Failed to setup ip addresses for network device \"%s\"", + ifname); + return -1; + } + + /* setup ipv6 addresses on the interface */ + if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) { + ERROR("Failed to setup ipv6 addresses for network device \"%s\"", + ifname); + return -1; + } + + /* set the network device up */ + if (netdev->flags & IFF_UP) { + int err; + + err = lxc_netdev_up(current_ifname); + if (err) { + ERROR("Failed to set network device \"%s\" up: %s", + current_ifname, strerror(-err)); + return -1; + } + + /* the network is up, make the loopback up too */ + err = lxc_netdev_up("lo"); + if (err) { + ERROR("Failed to set the loopback network device up: %s", + strerror(-err)); + return -1; + } + } + + /* We can only set up the default routes after bringing + * up the interface, sine bringing up the interface adds + * the link-local routes and we can't add a default + * route if the gateway is not reachable. */ + + /* setup ipv4 gateway on the interface */ + if (netdev->ipv4_gateway) { + if (!(netdev->flags & IFF_UP)) { + ERROR("Cannot add ipv4 gateway for network device " + "\"%s\" when not bringing up the interface", ifname); + return -1; + } + + if (lxc_list_empty(&netdev->ipv4)) { + ERROR("Cannot add ipv4 gateway for network device " + "\"%s\" when not assigning an address", ifname); + return -1; + } + + err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); + if (err) { + err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway); + if (err) { + ERROR("Failed to add ipv4 dest for network " + "device \"%s\": %s", ifname, strerror(-err)); + } + + err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); + if (err) { + ERROR("Failed to setup ipv4 gateway for " + "network device \"%s\": %s", + ifname, strerror(-err)); + if (netdev->ipv4_gateway_auto) { + char buf[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf)); + ERROR("Fried to set autodetected ipv4 gateway \"%s\"", buf); + } + return -1; + } + } + } + + /* setup ipv6 gateway on the interface */ + if (netdev->ipv6_gateway) { + if (!(netdev->flags & IFF_UP)) { + ERROR("Cannot add ipv6 gateway for network device " + "\"%s\" when not bringing up the interface", ifname); + return -1; + } + + if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) { + ERROR("Cannot add ipv6 gateway for network device " + "\"%s\" when not assigning an address", ifname); + return -1; + } + + err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); + if (err) { + err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway); + if (err) { + ERROR("Failed to add ipv6 dest for network " + "device \"%s\": %s", ifname, strerror(-err)); + } + + err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); + if (err) { + ERROR("Failed to setup ipv6 gateway for " + "network device \"%s\": %s", ifname, + strerror(-err)); + if (netdev->ipv6_gateway_auto) { + char buf[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf)); + ERROR("Tried to set autodetected ipv6 " + "gateway for network device " + "\"%s\"", buf); + } + return -1; + } + } + } + + DEBUG("Network devie \"%s\" has been setup", current_ifname); + + return 0; +} + +int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf, + struct lxc_list *network) +{ + struct lxc_list *iterator; + struct lxc_netdev *netdev; + + lxc_log_configured_netdevs(conf); + + lxc_list_for_each(iterator, network) { + netdev = iterator->elem; + + /* REMOVE in LXC 3.0 */ + if (netdev->idx < 0) { + ERROR("WARNING: using \"lxc.network.*\" keys to define " + "networks is DEPRECATED, please switch to using " + "\"lxc.net.[i].* keys\""); + } + + if (lxc_setup_netdev_in_child_namespaces(netdev)) { + ERROR("failed to setup netdev"); + return -1; + } + } + + if (!lxc_list_empty(network)) + INFO("network has been setup"); + + return 0; +} diff --git a/src/lxc/network.h b/src/lxc/network.h index b9dec641b..8557b2acf 100644 --- a/src/lxc/network.h +++ b/src/lxc/network.h @@ -23,9 +23,121 @@ #ifndef __LXC_NETWORK_H #define __LXC_NETWORK_H +#include +#include +#include +#include +#include + +#include "list.h" + +struct lxc_conf; +struct lxc_handler; +struct lxc_netdev; + +enum { + LXC_NET_EMPTY, + LXC_NET_VETH, + LXC_NET_MACVLAN, + LXC_NET_PHYS, + LXC_NET_VLAN, + LXC_NET_NONE, + LXC_NET_MAXCONFTYPE, +}; + +/* + * Defines the structure to configure an ipv4 address + * @address : ipv4 address + * @broadcast : ipv4 broadcast address + * @mask : network mask + */ +struct lxc_inetdev { + struct in_addr addr; + struct in_addr bcast; + unsigned int prefix; +}; + +struct lxc_route { + struct in_addr addr; +}; + +/* + * Defines the structure to configure an ipv6 address + * @flags : set the address up + * @address : ipv6 address + * @broadcast : ipv6 broadcast address + * @mask : network mask + */ +struct lxc_inet6dev { + struct in6_addr addr; + struct in6_addr mcast; + struct in6_addr acast; + unsigned int prefix; +}; + +struct lxc_route6 { + struct in6_addr addr; +}; + +struct ifla_veth { + char *pair; /* pair name */ + char veth1[IFNAMSIZ]; /* needed for deconf */ +}; + +struct ifla_vlan { + unsigned int flags; + unsigned int fmask; + unsigned short vid; + unsigned short pad; +}; + +struct ifla_macvlan { + int mode; /* private, vepa, bridge, passthru */ +}; + +union netdev_p { + struct ifla_veth veth_attr; + struct ifla_vlan vlan_attr; + struct ifla_macvlan macvlan_attr; +}; + /* - * Convert a string mac address to a socket structure + * Defines a structure to configure a network device + * @link : lxc.net.[i].link, name of bridge or host iface to attach if any + * @name : lxc.net.[i].name, name of iface on the container side + * @flags : flag of the network device (IFF_UP, ... ) + * @ipv4 : a list of ipv4 addresses to be set on the network device + * @ipv6 : a list of ipv6 addresses to be set on the network device + * @upscript : a script filename to be executed during interface configuration + * @downscript : a script filename to be executed during interface destruction + * @idx : network counter */ +struct lxc_netdev { + ssize_t idx; + int type; + int flags; + int ifindex; + char *link; + char *name; + char *hwaddr; + char *mtu; + union netdev_p priv; + struct lxc_list ipv4; + struct lxc_list ipv6; + struct in_addr *ipv4_gateway; + bool ipv4_gateway_auto; + struct in6_addr *ipv6_gateway; + bool ipv6_gateway_auto; + char *upscript; + char *downscript; +}; + +struct saved_nic { + int ifindex; + char *orig_name; +}; + +/* Convert a string mac address to a socket structure. */ extern int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr); /* @@ -136,12 +248,22 @@ extern int lxc_neigh_proxy_on(const char *name, int family); */ extern int lxc_neigh_proxy_off(const char *name, int family); -/* - * Generate a new unique network interface name +/* Generate a new unique network interface name. + * Allocated memory must be freed by caller. */ -extern char *lxc_mkifname(char *template); +extern char *lxc_mkifname(const char *template); extern const char *lxc_net_type_to_str(int type); extern int setup_private_host_hw_addr(char *veth1); extern int netdev_get_mtu(int ifindex); -#endif +extern int lxc_create_network_priv(struct lxc_handler *handler); +extern bool lxc_delete_network(struct lxc_handler *handler); +extern int lxc_find_gateway_addresses(struct lxc_handler *handler); +extern int lxc_create_network(const char *lxcpath, char *lxcname, + struct lxc_list *network, pid_t pid); +extern int lxc_requests_empty_network(struct lxc_handler *handler); +extern void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf); +extern int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf, + struct lxc_list *network); + +#endif /* __LXC_NETWORK_H */ diff --git a/src/lxc/start.c b/src/lxc/start.c index 19deddb5c..cf37281c2 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -70,6 +70,7 @@ #include "mainloop.h" #include "monitor.h" #include "namespace.h" +#include "network.h" #include "start.h" #include "storage.h" #include "storage_utils.h" @@ -1311,7 +1312,7 @@ static int lxc_spawn(struct lxc_handler *handler) /* That should be done before the clone because we will * fill the netdev index and use them in the child. */ - if (lxc_create_network(handler)) { + if (lxc_create_network_priv(handler)) { ERROR("Failed to create the network."); lxc_sync_fini(handler); return -1; @@ -1429,7 +1430,7 @@ static int lxc_spawn(struct lxc_handler *handler) /* Create the network configuration. */ if (handler->clone_flags & CLONE_NEWNET) { - if (lxc_assign_network(handler->lxcpath, handler->name, + if (lxc_create_network(handler->lxcpath, handler->name, &handler->conf->network, handler->pid)) { ERROR("Failed to create the configured network."); goto out_delete_net;