From: Lennart Poettering Date: Sun, 9 Mar 2025 07:14:09 +0000 (+0100) Subject: nsresourced: add ability to delegate tap device X-Git-Tag: v258-rc1~1062^2~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1365034727b3322e0adf371700cc540a1bcd95c1;p=thirdparty%2Fsystemd.git nsresourced: add ability to delegate tap device --- diff --git a/src/nsresourced/nsresourcework.c b/src/nsresourced/nsresourcework.c index a7274d22516..abbd09e8e29 100644 --- a/src/nsresourced/nsresourcework.c +++ b/src/nsresourced/nsresourcework.c @@ -1,9 +1,12 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* Make sure the net/if.h header is included before any linux/ one */ +#include + #include +#include #include #include -#include #include #include #include @@ -1468,7 +1471,7 @@ static int create_veth( _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; r = sd_netlink_open(&rtnl); if (r < 0) - return r; + return log_error_errno(r, "Failed to allocation netlink connection: %m"); _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0); @@ -1530,6 +1533,65 @@ static int create_veth( return 0; } +static int create_tap( + int userns_fd, + const char *ifname_host, + char *const *altifname_host, + struct ether_addr *mac_host) { + + int r; + + assert(ifname_host); + assert(mac_host); + + if (!ifname_valid(ifname_host)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid interface name: %s", ifname_host); + + log_debug("Creating tap link on host %s (%s) with address %s", + ifname_host, strna(altifname_host ? altifname_host[0] : NULL), ETHER_ADDR_TO_STR(mac_host)); + + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + if (altifname_host) { + r = sd_netlink_open(&rtnl); + if (r < 0) + return log_error_errno(r, "Failed to allocate netlink connection: %m"); + } + + uid_t uid; + r = userns_get_base_uid(userns_fd, &uid, /* ret_gid= */ NULL); + if (r < 0) + return log_error_errno(r, "Failed to get namespace base UID/GID: %m"); + + struct ifreq ifr = { + .ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, + }; + + assert(strlen(ifname_host) < sizeof(ifr.ifr_name)); + strcpy(ifr.ifr_name, ifname_host); + + _cleanup_close_ int fd = open("/dev/net/tun", O_RDWR|O_CLOEXEC); + if (fd < 0) { + if (errno == ENOENT) /* Turn ENOENT → EOPNOTSUPP */ + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Network tap device node /dev/net/tun not found, cannot create network interface."); + + return log_error_errno(errno, "Failed to open /dev/net/tun: %m"); + } + + if (ioctl(fd, TUNSETIFF, &ifr) < 0) + return log_error_errno(errno, "TUNSETIFF failed: %m"); + + if (ioctl(fd, TUNSETOWNER, uid) < 0) + return log_error_errno(errno, "TUNSETOWNER failed: %m"); + + if (!strv_isempty(altifname_host)) { + r = rtnl_set_link_alternative_names_by_ifname(&rtnl, ifname_host, altifname_host); + if (r < 0) + log_warning_errno(r, "Failed to set alternative interface names, ignoring: %m"); + } + + return TAKE_FD(fd); +} + static int validate_netns(sd_varlink *link, int userns_fd, int netns_fd) { int r; @@ -1596,7 +1658,7 @@ typedef struct AddNetworkParameters { static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { static const sd_json_dispatch_field parameter_dispatch_table[] = { { "userNamespaceFileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, offsetof(AddNetworkParameters, userns_fd_idx), SD_JSON_MANDATORY }, - { "networkNamespaceFileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, offsetof(AddNetworkParameters, netns_fd_idx), SD_JSON_MANDATORY }, + { "networkNamespaceFileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, offsetof(AddNetworkParameters, netns_fd_idx), 0 }, { "namespaceInterfaceName", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(AddNetworkParameters, ifname), 0 }, { "mode", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(AddNetworkParameters, mode), SD_JSON_MANDATORY }, {} @@ -1607,9 +1669,6 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia .userns_fd_idx = UINT_MAX, .netns_fd_idx = UINT_MAX, }; - _cleanup_(userns_info_freep) UserNamespaceInfo *userns_info = NULL; - struct stat userns_st; - uid_t peer_uid; int r; assert(link); @@ -1631,32 +1690,49 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia if (r != 0) return r; + struct stat userns_st; if (fstat(userns_fd, &userns_st) < 0) return -errno; - netns_fd = sd_varlink_take_fd(link, p.netns_fd_idx); - if (netns_fd < 0) - return netns_fd; - - r = validate_netns(link, userns_fd, netns_fd); - if (r != 0) - return r; + if (p.netns_fd_idx != UINT_MAX) { + netns_fd = sd_varlink_take_fd(link, p.netns_fd_idx); + if (netns_fd < 0) + return netns_fd; - if (!streq_ptr(p.mode, "veth")) - return sd_varlink_error_invalid_parameter_name(link, "mode"); + r = validate_netns(link, userns_fd, netns_fd); + if (r != 0) + return r; + } if (p.ifname && !ifname_valid(p.ifname)) return sd_varlink_error_invalid_parameter_name(link, "interfaceName"); + if (streq(p.mode, "veth")) { + /* In veth mode we need a netns */ + + if (netns_fd < 0) + return sd_varlink_error_invalid_parameter_name(link, "networkNamespaceFileDescriptor"); + + } else if (streq(p.mode, "tap")) { + /* In tap mode we do want a netns, nor an interface name for it */ + + if (p.ifname) + return sd_varlink_error_invalid_parameter_name(link, "namespaceInterfaceName"); + + if (netns_fd >= 0) + return sd_varlink_error_invalid_parameter_name(link, "networkNamespaceFileDescriptor"); + } else + return sd_varlink_error_invalid_parameter_name(link, "mode"); + registry_dir_fd = userns_registry_open_fd(); if (registry_dir_fd < 0) return registry_dir_fd; - _cleanup_close_ int lock_fd = -EBADF; - lock_fd = userns_registry_lock(registry_dir_fd); + _cleanup_close_ int lock_fd = userns_registry_lock(registry_dir_fd); if (lock_fd < 0) return log_debug_errno(lock_fd, "Failed to open nsresource registry lock file: %m"); + _cleanup_(userns_info_freep) UserNamespaceInfo *userns_info = NULL; r = userns_registry_load_by_userns_inode( registry_dir_fd, userns_st.st_ino, @@ -1667,6 +1743,7 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia return r; /* Registering a network interface for this client is only allowed for the root or the owner of a userns */ + uid_t peer_uid; r = sd_varlink_get_peer_uid(link, &peer_uid); if (r < 0) return r; @@ -1690,24 +1767,55 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia if (r < 0) return -ENOMEM; - struct ether_addr ether_addr_host, ether_addr_namespace; + if (!ifname_valid_full(altifname_host, IFNAME_VALID_ALTERNATIVE)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Generated alternative interface name not valid: %s", altifname_host); + struct ether_addr ether_addr_host; hash_ether_addr(userns_info, p.ifname, 0, ðer_addr_host); - hash_ether_addr(userns_info, p.ifname, 1, ðer_addr_namespace); - r = create_veth(netns_fd, - ifname_host, altifname_host, ðer_addr_host, - ifname_namespace, ðer_addr_namespace); - if (r < 0) - return r; + if (streq(p.mode, "veth")) { + struct ether_addr ether_addr_namespace; + hash_ether_addr(userns_info, p.ifname, 1, ðer_addr_namespace); + + r = create_veth(netns_fd, + ifname_host, altifname_host, ðer_addr_host, + ifname_namespace, ðer_addr_namespace); + if (r < 0) + return r; - log_debug("Adding veth tunnel %s from host to userns " INO_FMT " ('%s' @ UID " UID_FMT ", interface %s).", - ifname_host, userns_st.st_ino, userns_info->name, userns_info->start_uid, ifname_namespace); + log_debug("Added veth tunnel %s from host to userns " INO_FMT " ('%s' @ UID " UID_FMT ", interface %s).", + ifname_host, userns_st.st_ino, userns_info->name, userns_info->start_uid, ifname_namespace); - return sd_varlink_replybo( - link, - SD_JSON_BUILD_PAIR("hostInterfaceName", SD_JSON_BUILD_STRING(ifname_host)), - SD_JSON_BUILD_PAIR("namespaceInterfaceName", SD_JSON_BUILD_STRING(ifname_namespace))); + return sd_varlink_replybo( + link, + SD_JSON_BUILD_PAIR("hostInterfaceName", SD_JSON_BUILD_STRING(ifname_host)), + SD_JSON_BUILD_PAIR("namespaceInterfaceName", SD_JSON_BUILD_STRING(ifname_namespace))); + + } else if (streq(p.mode, "tap")) { + /* NB: when we do the "tap" stuff we do not actually do any namespace operation here, neither + * netns nor userns. We use the userns only as conduit for user identity information and + * indication that the calling user has some control over the UID they want to assign the tap + * device to. */ + + _cleanup_close_ int tap_fd = create_tap(userns_fd, ifname_host, STRV_MAKE(altifname_host), ðer_addr_host); + if (tap_fd < 0) + return tap_fd; + + log_debug("Added tap device %s from host to userns " INO_FMT " ('%s' @ UID " UID_FMT ").", + ifname_host, userns_st.st_ino, userns_info->name, userns_info->start_uid); + + int fd_index = sd_varlink_push_fd(link, tap_fd); + if (fd_index < 0) + return log_error_errno(fd_index, "Failed to push tap fd into varlink socket: %m"); + + TAKE_FD(tap_fd); + + return sd_varlink_replybo( + link, + SD_JSON_BUILD_PAIR_STRING("hostInterfaceName", ifname_host), + SD_JSON_BUILD_PAIR_INTEGER("interfaceFileDescriptor", fd_index)); + } else + assert_not_reached(); } static int process_connection(sd_varlink_server *server, int _fd) { diff --git a/src/shared/varlink-io.systemd.NamespaceResource.c b/src/shared/varlink-io.systemd.NamespaceResource.c index 145d706c1b6..40d5c82663c 100644 --- a/src/shared/varlink-io.systemd.NamespaceResource.c +++ b/src/shared/varlink-io.systemd.NamespaceResource.c @@ -41,11 +41,12 @@ static SD_VARLINK_DEFINE_METHOD( static SD_VARLINK_DEFINE_METHOD( AddNetworkToUserNamespace, SD_VARLINK_DEFINE_INPUT(userNamespaceFileDescriptor, SD_VARLINK_INT, 0), - SD_VARLINK_DEFINE_INPUT(networkNamespaceFileDescriptor, SD_VARLINK_INT, 0), + SD_VARLINK_DEFINE_INPUT(networkNamespaceFileDescriptor, SD_VARLINK_INT, SD_VARLINK_NULLABLE), SD_VARLINK_DEFINE_INPUT(namespaceInterfaceName, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), SD_VARLINK_DEFINE_INPUT(mode, SD_VARLINK_STRING, 0), SD_VARLINK_DEFINE_OUTPUT(hostInterfaceName, SD_VARLINK_STRING, 0), - SD_VARLINK_DEFINE_OUTPUT(namespaceInterfaceName, SD_VARLINK_STRING, 0)); + SD_VARLINK_DEFINE_OUTPUT(namespaceInterfaceName, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_DEFINE_OUTPUT(interfaceFileDescriptor, SD_VARLINK_INT, SD_VARLINK_NULLABLE)); static SD_VARLINK_DEFINE_ERROR(UserNamespaceInterfaceNotSupported); static SD_VARLINK_DEFINE_ERROR(NameExists);