From: Lennart Poettering Date: Fri, 8 Nov 2024 11:15:16 +0000 (+0100) Subject: mntfsd: add api to mount dirs for containers X-Git-Tag: v258-rc1~1502^2~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d6f8e1ae879ed1676406b61b6f4dba1bdd3749ae;p=thirdparty%2Fsystemd.git mntfsd: add api to mount dirs for containers systemd-mountfsd so far provided a MountImage() API call for mounting a disk image and returning a set of mount fds. This complements the API with a new MountDirectory() API call, that operates on a directory instead of an image file. Now, what makes this interesting is that it applies an idmapping from the foreign UID range to the provided target userns – and in which case unpriveleged operation is allowed (well, under some conditions: in particular the client must own a parent dir of the provided path). This allows container managers to run fully unprivileged from directories – as long as those directories are owned by the foreign UID range. Basic operation is like this: 1. acquire a transient userns from systemd-nsresourced with 64K users 2. ask systemd-mountfsd for an idmapped mount of the container dir matching that userns 3. join the userns and bind the mount fd as root. Note that we have to drop various sandboxing knobs from the mountfsd service file for this to work, since the kernel's security checks that try to ensure than an obstructed /proc/ cannot be circumvented via mounting a new procfs will otherwise prohibit mountfsd to duplicate the mounts properly. --- diff --git a/src/mountfsd/io.systemd.mount-file-system.policy b/src/mountfsd/io.systemd.mount-file-system.policy index 6a151eb4374..78613bfdaf6 100644 --- a/src/mountfsd/io.systemd.mount-file-system.policy +++ b/src/mountfsd/io.systemd.mount-file-system.policy @@ -67,4 +67,53 @@ io.systemd.mount-file-system.mount-image-privately + + + + + Allow mounting of directory + Authentication is required for an application to mount directory $(directory). + + auth_admin_keep + auth_admin_keep + yes + + + + + + Allow mounting of untrusted directory + Authentication is required for an application to mount directory $(directory) which is not owned by the user. + + auth_admin + auth_admin + auth_admin + + + io.systemd.mount-file-system.mount-directory + + + + + Allow private mounting of directory + Authentication is required for an application to privately mount directory $(directory). + + yes + yes + yes + + + + + Allow private mounting of untrusted directory + Authentication is required for an application to privately mount directory $(directory) which is not owned by the user. + + auth_admin + auth_admin + auth_admin + + + io.systemd.mount-file-system.mount-directory-privately + diff --git a/src/mountfsd/mountwork.c b/src/mountfsd/mountwork.c index 31f2d47546c..2fd610f5d65 100644 --- a/src/mountfsd/mountwork.c +++ b/src/mountfsd/mountwork.c @@ -1,5 +1,10 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include +#if WANT_LINUX_FS_H +#include +#endif + #include "sd-daemon.h" #include "sd-varlink.h" @@ -15,12 +20,17 @@ #include "json-util.h" #include "main-func.h" #include "missing_loop.h" +#include "missing_mount.h" +#include "missing_syscall.h" #include "namespace-util.h" #include "nsresource.h" #include "nulstr-util.h" #include "os-util.h" #include "process-util.h" #include "stat-util.h" +#include "string-table.h" +#include "uid-classification.h" +#include "uid-range.h" #include "user-util.h" #include "varlink-io.systemd.MountFileSystem.h" #include "varlink-util.h" @@ -532,6 +542,349 @@ static int vl_method_mount_image( SD_JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(di->image_uuid), "imageUuid", SD_JSON_BUILD_UUID(di->image_uuid))); } +typedef enum MountMapMode { + MOUNT_MAP_AUTO = 0, /* determine automatically from image and caller */ + MOUNT_MAP_ROOT, /* map caller's UID to root in namespace (map 1 UID only) */ + MOUNT_MAP_FOREIGN, /* map foreign UID range to base in namespace (map 64K) */ + MOUNT_MAP_IDENTITY, /* apply identity mapping (map 64K) */ + _MOUNT_MAP_MODE_MAX, + _MOUNT_MAP_MODE_INVALID = -EINVAL, +} MountMapMode; + +static const char *const mount_map_mode_table[_MOUNT_MAP_MODE_MAX] = { + [MOUNT_MAP_AUTO] = "auto", + [MOUNT_MAP_ROOT] = "root", + [MOUNT_MAP_FOREIGN] = "foreign", + [MOUNT_MAP_IDENTITY] = "identity", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP(mount_map_mode, MountMapMode); + +typedef struct MountDirectoryParameters { + MountMapMode mode; + unsigned directory_fd_idx; + unsigned userns_fd_idx; + int read_only; +} MountDirectoryParameters; + +typedef enum DirectoryOwnership { + DIRECTORY_IS_ROOT_PEER_OWNED, /* This is returned if the directory is owned by the root user and the peer is root */ + DIRECTORY_IS_ROOT_OWNED, /* This is returned if the directory is owned by the root user (and the peer user is not root) */ + DIRECTORY_IS_PEER_OWNED, /* This is returned if the directory is owned by the peer user (who is not root) */ + DIRECTORY_IS_FOREIGN_OWNED, /* This is returned if the directory is owned by the foreign UID range */ + DIRECTORY_IS_OTHERWISE_OWNED, /* This is returned if the directory is owned by something else */ + _DIRECTORY_OWNERSHIP_MAX, + _DIRECTORY_OWNERSHIP_ERRNO_MAX = -ERRNO_MAX, /* Guarantee the whole negative errno range fits */ +} DirectoryOwnership; + +static MountMapMode default_mount_map_mode(DirectoryOwnership ownership) { + /* Derives a suitable mapping mode from the ownership of the base tree */ + + switch (ownership) { + case DIRECTORY_IS_PEER_OWNED: + return MOUNT_MAP_ROOT; /* Map the peer's UID to root in the container */ + + case DIRECTORY_IS_FOREIGN_OWNED: + return MOUNT_MAP_FOREIGN; /* Map the foreign UID range to the container's UID range */ + + case DIRECTORY_IS_ROOT_PEER_OWNED: + case DIRECTORY_IS_ROOT_OWNED: + case DIRECTORY_IS_OTHERWISE_OWNED: + return MOUNT_MAP_IDENTITY; /* Don't map */ + + default: + return _MOUNT_MAP_MODE_INVALID; + } +} + +static JSON_DISPATCH_ENUM_DEFINE(dispatch_mount_directory_mode, MountMapMode, mount_map_mode_from_string); + +static DirectoryOwnership validate_directory_fd(int fd, uid_t peer_uid) { + int r, fl; + + assert(fd >= 0); + + /* Checks if the specified directory fd looks sane. Returns a DirectoryOwnership that categorizes the + * ownership situation in comparison to the peer's UID. + * + * Note one key difference to image validation (as implemented above): for regular files if the + * client provided us with an open fd it implies the client has access, as well as what kind of + * access (i.e. ro or rw). But for directories this doesn't work the same way, as directories are + * always opened read-only only. Hence we use a different mechanism to validate access to them: we + * check if the directory is owned by the peer UID or by the foreign UID range (in the latter case + * one of the parent directories must be owned by the peer though). */ + + struct stat st; + if (fstat(fd, &st) < 0) + return log_debug_errno(errno, "Failed to stat() directory fd: %m"); + + r = stat_verify_directory(&st); + if (r < 0) + return r; + + fl = fd_verify_safe_flags_full(fd, O_DIRECTORY); + if (fl < 0) + return log_debug_errno(fl, "Directory file descriptor has unsafe flags set: %m"); + + if (st.st_uid == 0) { + if (peer_uid == 0) { + log_debug("Directory file descriptor points to root owned directory, who is also the peer."); + return DIRECTORY_IS_ROOT_PEER_OWNED; + } + log_debug("Directory file descriptor points to root owned directory."); + return DIRECTORY_IS_ROOT_OWNED; + } + if (st.st_uid == peer_uid) { + log_debug("Directory file descriptor points to peer owned directory."); + return DIRECTORY_IS_PEER_OWNED; + } + + /* For bind mounted directories we check if they are either owned by the client's UID, or by the + * foreign UID set, but in that case the parent directory must be owned by the client's UID, or some + * directory iteratively up the chain */ + + _cleanup_close_ int parent_fd = -EBADF; + unsigned n_level; + for (n_level = 0; n_level < 16; n_level++) { + /* Stop iteration if we find a directory up the tree that is neither owned by the user, nor is from the foreign UID range */ + if (!uid_is_foreign(st.st_uid) || !gid_is_foreign(st.st_gid)) { + log_debug("Directory file descriptor points to directory which itself or its parents is neither owned by foreign UID range nor by the user."); + return DIRECTORY_IS_OTHERWISE_OWNED; + } + + /* If the peer is root, then it doesn't matter if we find a parent owned by root, let's shortcut things. */ + if (peer_uid == 0) { + log_debug("Directory file descriptor is owned by foreign UID range, and peer is root."); + return DIRECTORY_IS_FOREIGN_OWNED; + } + + /* Go one level up */ + _cleanup_close_ int new_parent_fd = openat(fd, "..", O_DIRECTORY|O_PATH|O_CLOEXEC); + if (new_parent_fd < 0) + return log_debug_errno(errno, "Failed to open parent directory of directory file descriptor: %m"); + + struct stat new_st; + if (fstat(new_parent_fd, &new_st) < 0) + return log_debug_errno(errno, "Failed to stat parent directory of directory file descriptor: %m"); + + /* Safety check to see if we hit the root dir */ + if (stat_inode_same(&st, &new_st)) { + log_debug("Directory file descriptor is owned by foreign UID range, but didn't find parent directory that is owned by peer among ancestors."); + return DIRECTORY_IS_OTHERWISE_OWNED; + } + + if (new_st.st_uid == peer_uid) { /* Parent inode is owned by the peer. That's good! Everything's fine. */ + log_debug("Directory file descriptor is owned by foreign UID range, and ancestor is owned by peer."); + return DIRECTORY_IS_FOREIGN_OWNED; + } + + close_and_replace(parent_fd, new_parent_fd); + st = new_st; + } + + log_debug("Failed to find peer owned parent directory after %u levels, refusing.", n_level); + return DIRECTORY_IS_OTHERWISE_OWNED; +} + +static int vl_method_mount_directory( + sd_varlink *link, + sd_json_variant *parameters, + sd_varlink_method_flags_t flags, + void *userdata) { + + static const sd_json_dispatch_field dispatch_table[] = { + { "mode", SD_JSON_VARIANT_STRING, dispatch_mount_directory_mode, offsetof(MountDirectoryParameters, mode), 0 }, + { "directoryFileDescriptor", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint, offsetof(MountDirectoryParameters, directory_fd_idx), SD_JSON_MANDATORY }, + { "userNamespaceFileDescriptor", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint, offsetof(MountDirectoryParameters, userns_fd_idx), 0 }, + { "readOnly", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_tristate, offsetof(MountDirectoryParameters, read_only), 0 }, + VARLINK_DISPATCH_POLKIT_FIELD, + {} + }; + + MountDirectoryParameters p = { + .mode = MOUNT_MAP_AUTO, + .directory_fd_idx = UINT_MAX, + .userns_fd_idx = UINT_MAX, + .read_only = -1, + }; + _cleanup_close_ int directory_fd = -EBADF, userns_fd = -EBADF; + Hashmap **polkit_registry = ASSERT_PTR(userdata); + int r; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + if (p.directory_fd_idx == UINT_MAX) + return sd_varlink_error_invalid_parameter_name(link, "directoryFileDescriptor"); + + directory_fd = sd_varlink_peek_dup_fd(link, p.directory_fd_idx); + if (directory_fd < 0) + return log_debug_errno(directory_fd, "Failed to peek directory fd from client: %m"); + + if (p.userns_fd_idx != UINT_MAX) { + userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx); + if (userns_fd < 0) + return log_debug_errno(userns_fd, "Failed to peek user namespace fd from client: %m"); + } + + uid_t peer_uid; + r = sd_varlink_get_peer_uid(link, &peer_uid); + if (r < 0) + return log_debug_errno(r, "Failed to get client UID: %m"); + + DirectoryOwnership owned_by = validate_directory_fd(directory_fd, peer_uid); + if (owned_by < 0) + return owned_by; + + r = validate_userns(link, &userns_fd); + if (r != 0) + return r; + + /* If no mode is specified, pick sensible default */ + if (p.mode <= 0) { + p.mode = default_mount_map_mode(owned_by); + assert(p.mode > 0); + } + + _cleanup_free_ char *directory_path = NULL; + (void) fd_get_path(directory_fd, &directory_path); + + log_debug("Mounting '%s' with mapping mode: %s", strna(directory_path), mount_map_mode_to_string(p.mode)); + + const char *polkit_details[] = { + "read_only", one_zero(p.read_only > 0), + "directory", strna(directory_path), + NULL, + }; + + const char *polkit_action, *polkit_untrusted_action; + PolkitFlags polkit_flags; + if (userns_fd < 0) { + /* Mount into the host user namespace */ + polkit_action = "io.systemd.mount-file-system.mount-directory"; + polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-directory"; + polkit_flags = 0; + } else { + /* Mount into a private user namespace */ + polkit_action = "io.systemd.mount-file-system.mount-directory-privately"; + polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-directory-privately"; + + /* If polkit is not around, let's allow mounting authenticated images by default */ + polkit_flags = POLKIT_DEFAULT_ALLOW; + } + + /* We consider a directory "trusted" if it is owned by the peer or the foreign UID range */ + bool trusted_directory = IN_SET(owned_by, DIRECTORY_IS_ROOT_PEER_OWNED, DIRECTORY_IS_PEER_OWNED, DIRECTORY_IS_FOREIGN_OWNED); + + /* Let's definitely acquire the regular action privilege, for mounting properly signed images */ + r = varlink_verify_polkit_async_full( + link, + /* bus= */ NULL, + trusted_directory ? polkit_action : polkit_untrusted_action, + polkit_details, + /* good_user= */ UID_INVALID, + trusted_directory ? polkit_flags : 0, + polkit_registry); + if (r <= 0) + return r; + + /* Generate the common dissection directory here. We are not going to use it, but the clients might, + * and they likely are unprivileged, hence cannot create it themselves. Hence let's just create it + * here, if it is missing. */ + r = get_common_dissect_directory(NULL); + if (r < 0) + return r; + + _cleanup_close_ int mount_fd = open_tree(directory_fd, "", OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH); + if (mount_fd < 0) + return log_debug_errno(errno, "Failed to issue open_tree() of provided directory '%s': %m", strna(directory_path)); + + if (p.read_only > 0 && mount_setattr( + mount_fd, "", AT_EMPTY_PATH, + &(struct mount_attr) { + .attr_set = MOUNT_ATTR_RDONLY, + }, MOUNT_ATTR_SIZE_VER0) < 0) + return log_debug_errno(errno, "Failed to enable read-only mode: %m"); + + if (p.mode != MOUNT_MAP_IDENTITY) { + uid_t start; + + if (userns_fd >= 0) { + _cleanup_(uid_range_freep) UIDRange *uid_range_outside = NULL, *uid_range_inside = NULL, *gid_range_outside = NULL, *gid_range_inside = NULL; + r = uid_range_load_userns_by_fd(userns_fd, UID_RANGE_USERNS_OUTSIDE, &uid_range_outside); + if (r < 0) + return log_debug_errno(r, "Failed to load outside UID range of provided userns: %m"); + r = uid_range_load_userns_by_fd(userns_fd, UID_RANGE_USERNS_INSIDE, &uid_range_inside); + if (r < 0) + return log_debug_errno(r, "Failed to load inside UID range of provided userns: %m"); + r = uid_range_load_userns_by_fd(userns_fd, GID_RANGE_USERNS_OUTSIDE, &gid_range_outside); + if (r < 0) + return log_debug_errno(r, "Failed to load outside GID range of provided userns: %m"); + r = uid_range_load_userns_by_fd(userns_fd, GID_RANGE_USERNS_INSIDE, &gid_range_inside); + if (r < 0) + return log_debug_errno(r, "Failed to load inside GID range of provided userns: %m"); + + /* Be very strict for now */ + if (!uid_range_equal(uid_range_outside, gid_range_outside) || + !uid_range_equal(uid_range_inside, gid_range_inside) || + uid_range_outside->n_entries != 1 || + uid_range_outside->entries[0].nr != 0x10000 || + uid_range_inside->n_entries != 1 || + uid_range_inside->entries[0].start != 0 || + uid_range_inside->entries[0].nr != 0x10000) + return sd_varlink_error_invalid_parameter_name(link, "userNamespaceFileDescriptor"); + + start = uid_range_outside->entries[0].start; + } else + start = 0; + + _cleanup_free_ char *new_uid_map = NULL; + switch (p.mode) { + case MOUNT_MAP_ROOT: + r = strextendf(&new_uid_map, UID_FMT " " UID_FMT " " UID_FMT, + peer_uid, start, (uid_t) 1); + break; + case MOUNT_MAP_FOREIGN: + r = strextendf(&new_uid_map, UID_FMT " " UID_FMT " " UID_FMT, + (uid_t) FOREIGN_UID_MIN, start, (uid_t) 0x10000); + break; + default: + assert_not_reached(); + } + if (r < 0) + return r; + + _cleanup_close_ int idmap_userns_fd = userns_acquire(new_uid_map, new_uid_map); + if (idmap_userns_fd < 0) + return log_debug_errno(idmap_userns_fd, "Failed to acquire user namespace for id mapping: %m"); + + if (mount_setattr(mount_fd, "", AT_EMPTY_PATH, + &(struct mount_attr) { + .attr_set = MOUNT_ATTR_IDMAP, + .userns_fd = idmap_userns_fd, + .propagation = MS_PRIVATE, + }, MOUNT_ATTR_SIZE_VER0) < 0) + return log_debug_errno(errno, "Failed to enable id mapping: %m"); + } + + if (userns_fd >= 0) { + r = nsresource_add_mount(userns_fd, mount_fd); + if (r < 0) + return r; + } + + int fd_idx = sd_varlink_push_fd(link, mount_fd); + if (fd_idx < 0) + return fd_idx; + + TAKE_FD(mount_fd); + + return sd_varlink_replybo( + link, + SD_JSON_BUILD_PAIR("mountFileDescriptor", SD_JSON_BUILD_INTEGER(fd_idx))); +} + static int process_connection(sd_varlink_server *server, int _fd) { _cleanup_close_ int fd = TAKE_FD(_fd); /* always take possession */ _cleanup_(sd_varlink_close_unrefp) sd_varlink *vl = NULL; @@ -601,7 +954,8 @@ static int run(int argc, char *argv[]) { r = sd_varlink_server_bind_method_many( server, - "io.systemd.MountFileSystem.MountImage", vl_method_mount_image); + "io.systemd.MountFileSystem.MountImage", vl_method_mount_image, + "io.systemd.MountFileSystem.MountDirectory", vl_method_mount_directory); if (r < 0) return log_error_errno(r, "Failed to bind methods: %m"); diff --git a/src/shared/varlink-io.systemd.MountFileSystem.c b/src/shared/varlink-io.systemd.MountFileSystem.c index 43b812b0d23..78e7ce06ab4 100644 --- a/src/shared/varlink-io.systemd.MountFileSystem.c +++ b/src/shared/varlink-io.systemd.MountFileSystem.c @@ -49,6 +49,31 @@ static SD_VARLINK_DEFINE_METHOD( SD_VARLINK_DEFINE_OUTPUT(imageName, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), SD_VARLINK_DEFINE_OUTPUT(imageUuid, SD_VARLINK_STRING, SD_VARLINK_NULLABLE)); +static SD_VARLINK_DEFINE_ENUM_TYPE( + MountMapMode, + SD_VARLINK_FIELD_COMMENT("Map the caller's UID to root in the user namespace, do not map anything else."), + SD_VARLINK_DEFINE_ENUM_VALUE(root), + SD_VARLINK_FIELD_COMMENT("Map the foreign UID range to the base UID range in the user namespace (i.e. UID zero and above), covering 64K users."), + SD_VARLINK_DEFINE_ENUM_VALUE(foreign), + SD_VARLINK_FIELD_COMMENT("Apply an identity (1:1) mapping, but limit it to 64K users."), + SD_VARLINK_DEFINE_ENUM_VALUE(identity), + SD_VARLINK_FIELD_COMMENT("Determine automatically based on provided directory and caller."), + SD_VARLINK_DEFINE_ENUM_VALUE(auto)); + +static SD_VARLINK_DEFINE_METHOD( + MountDirectory, + SD_VARLINK_FIELD_COMMENT("Directory file descriptor of the directory to assign to the user namespace. Must be a regular, i.e. non-O_PATH file descriptor."), + SD_VARLINK_DEFINE_INPUT(directoryFileDescriptor, SD_VARLINK_INT, 0), + SD_VARLINK_FIELD_COMMENT("File descriptor to the user namespace to assign this directory to. If not specified uses the host user namespace."), + SD_VARLINK_DEFINE_INPUT(userNamespaceFileDescriptor, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Whether to mark the resulting mount file descriptor as read-only. If not specified defaults to false."), + SD_VARLINK_DEFINE_INPUT(readOnly, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Which kinda of UID/GID mapping to apply to the resulting mount file descriptor."), + SD_VARLINK_DEFINE_INPUT_BY_TYPE(mode, MountMapMode, SD_VARLINK_NULLABLE), + VARLINK_DEFINE_POLKIT_INPUT, + SD_VARLINK_FIELD_COMMENT("The freshly allocated mount file descriptor for the mount."), + SD_VARLINK_DEFINE_OUTPUT(mountFileDescriptor, SD_VARLINK_INT, 0)); + static SD_VARLINK_DEFINE_ERROR(IncompatibleImage); static SD_VARLINK_DEFINE_ERROR(MultipleRootPartitionsFound); static SD_VARLINK_DEFINE_ERROR(RootPartitionNotFound); @@ -59,9 +84,17 @@ static SD_VARLINK_DEFINE_ERROR(VerityFailure); SD_VARLINK_DEFINE_INTERFACE( io_systemd_MountFileSystem, "io.systemd.MountFileSystem", + SD_VARLINK_INTERFACE_COMMENT("APIs for unpriviliged mounting."), + SD_VARLINK_SYMBOL_COMMENT("Encodes the designated purpose of a partition."), &vl_type_PartitionDesignator, + SD_VARLINK_SYMBOL_COMMENT("Information about a specific partition."), &vl_type_PartitionInfo, + SD_VARLINK_SYMBOL_COMMENT("Selects the type of UID/GID mapping to apply."), + &vl_type_MountMapMode, + SD_VARLINK_SYMBOL_COMMENT("Takes a disk image file descriptor as input, returns a set of mount file descriptors for it."), &vl_method_MountImage, + SD_VARLINK_SYMBOL_COMMENT("Takes a directory file descriptor as input, returns a mount file descriptor."), + &vl_method_MountDirectory, &vl_error_IncompatibleImage, &vl_error_MultipleRootPartitionsFound, &vl_error_RootPartitionNotFound, diff --git a/units/systemd-mountfsd.service.in b/units/systemd-mountfsd.service.in index ef413684c36..00a0827fbcd 100644 --- a/units/systemd-mountfsd.service.in +++ b/units/systemd-mountfsd.service.in @@ -25,13 +25,7 @@ LimitNOFILE={{HIGH_RLIMIT_NOFILE}} LockPersonality=yes MemoryDenyWriteExecute=yes NoNewPrivileges=yes -ProtectProc=invisible -ProtectControlGroups=yes -ProtectHome=yes ProtectHostname=yes -ProtectKernelLogs=yes -ProtectKernelModules=yes -ProtectSystem=strict RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6 RestrictRealtime=yes RestrictSUIDSGID=yes