From: Yu Watanabe Date: Mon, 17 Feb 2025 06:06:10 +0000 (+0900) Subject: nspawn: enable FUSE unconditionally X-Git-Tag: v258-rc1~1312^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3cc23a2c2345eb188551565349c89ec1fa8f650f;p=thirdparty%2Fsystemd.git nspawn: enable FUSE unconditionally FUSE is userns-safe since kernel v4.18 (da315f6e03988a7127680bbc26e1028991b899b8), and now our kernel base line is 5.4. Let's drop the logic of checking the version of FUSE, and unconditionally enable FUSE. --- diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c index 4193a338137..0387e0b7836 100644 --- a/src/nspawn/nspawn-register.c +++ b/src/nspawn/nspawn-register.c @@ -15,7 +15,6 @@ static int append_machine_properties( sd_bus_message *m, - bool enable_fuse, CustomMount *mounts, unsigned n_mounts, int kill_signal, @@ -31,21 +30,17 @@ static int append_machine_properties( /* If you make changes here, also make sure to update systemd-nspawn@.service, to keep the device * policies in sync regardless if we are run with or without the --keep-unit switch. */ - r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2, - /* Allow the container to access and create the API device nodes, so that - * PrivateDevices= in the container can work fine */ + r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 3, + /* Allow the container to access and create the API device node, so that + * PrivateDevices= in the container can work fine. */ "/dev/net/tun", "rwm", - /* Allow the container access to ptys. However, do not permit the container + /* Allow the container to access ptys. However, do not permit the container * to ever create these device nodes. */ - "char-pts", "rw"); + "char-pts", "rw", + /* Allow the container to access and create the FUSE API device node. */ + "/dev/fuse", "rwm"); if (r < 0) return bus_log_create_error(r); - if (enable_fuse) { - r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1, - "/dev/fuse", "rwm"); - if (r < 0) - return bus_log_create_error(r); - } FOREACH_ARRAY(cm, mounts, n_mounts) { if (cm->type != CUSTOM_MOUNT_BIND) @@ -204,7 +199,6 @@ int register_machine( r = append_machine_properties( m, - FLAGS_SET(flags, REGISTER_MACHINE_ENABLE_FUSE), mounts, n_mounts, kill_signal, @@ -325,7 +319,6 @@ int allocate_scope( r = append_machine_properties( m, - FLAGS_SET(flags, ALLOCATE_SCOPE_ENABLE_FUSE), mounts, n_mounts, kill_signal, diff --git a/src/nspawn/nspawn-register.h b/src/nspawn/nspawn-register.h index 5e187e33bb8..89e35f02a78 100644 --- a/src/nspawn/nspawn-register.h +++ b/src/nspawn/nspawn-register.h @@ -10,7 +10,6 @@ typedef enum RegisterMachineFlags { REGISTER_MACHINE_KEEP_UNIT = 1 << 0, - REGISTER_MACHINE_ENABLE_FUSE = 1 << 1, } RegisterMachineFlags; int register_machine( @@ -32,7 +31,6 @@ int unregister_machine(sd_bus *bus, const char *machine_name); typedef enum AllocateScopeFlags { ALLOCATE_SCOPE_ALLOW_PIDFD = 1 << 0, - ALLOCATE_SCOPE_ENABLE_FUSE = 1 << 1, } AllocateScopeFlags; int allocate_scope( diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 8fa05b9bc23..31757f4ee10 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2170,85 +2170,6 @@ static int setup_boot_id(void) { return mount_nofollow_verbose(LOG_ERR, NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); } -static int get_fuse_version(uint32_t *ret_major, uint32_t *ret_minor) { - /* Must be called with mount privileges, either via arg_privileged or by being uid=0 in new - * CLONE_NEWUSER/CLONE_NEWNS namespaces. This is true when called from outer_child(). */ - ssize_t n; - _cleanup_close_ int fuse_fd = -EBADF, mnt_fd = -EBADF; - _cleanup_free_ char *opts = NULL; - union { - char unstructured[FUSE_MIN_READ_BUFFER]; - struct { - struct fuse_in_header header; - /* Don't use :`struct fuse_init_in` because a newer fuse.h might give - * us a bigger struct than what an older kernel actually gives us, and that would - * break our .header.len check. */ - struct { - uint32_t major; - uint32_t minor; - } body; - } structured; - } request; - - assert(ret_major); - assert(ret_minor); - - /* Get a FUSE handle. */ - fuse_fd = open("/dev/fuse", O_CLOEXEC|O_RDWR); - if (fuse_fd < 0) - return log_debug_errno(errno, "Failed to open /dev/fuse: %m"); - if (asprintf(&opts, "fd=%i,rootmode=40000,user_id=0,group_id=0", fuse_fd) < 0) - return log_oom_debug(); - mnt_fd = make_fsmount(LOG_DEBUG, "nspawn-fuse", "fuse.nspawn", 0, opts, -EBADF); - if (mnt_fd < 0) - return mnt_fd; - - /* Read a request from the FUSE handle. */ - n = read(fuse_fd, &request.unstructured, sizeof request); - if (n < 0) - return log_debug_errno(errno, "Failed to read /dev/fuse: %m"); - if ((size_t) n < sizeof request.structured.header || - (size_t) n < request.structured.header.len) - return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Failed to read /dev/fuse: Short read"); - - /* Assume that the request is a FUSE_INIT request, and return the version information from it. */ - if (request.structured.header.opcode != FUSE_INIT) - return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Initial request from /dev/fuse should have opcode=%i (FUSE_INIT), but has opcode=%"PRIu32, - FUSE_INIT, request.structured.header.opcode); - if (request.structured.header.len < sizeof request.structured) - return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Initial FUSE_INIT request from /dev/fuse is too short"); - *ret_major = request.structured.body.major; - *ret_minor = request.structured.body.minor; - return 0; -} - -static bool should_enable_fuse(void) { - uint32_t fuse_major, fuse_minor; - int r; - - r = get_fuse_version(&fuse_major, &fuse_minor); - if (r < 0) { - if (ERRNO_IS_NEG_DEVICE_ABSENT(r)) - log_debug_errno(r, "Disabling FUSE: FUSE appears to be disabled on the host: %m"); - else if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) - log_debug_errno(r, "Disabling FUSE: Kernel does not support the fsopen() family of syscalls: %m"); - else - log_full_errno(ERRNO_IS_NEG_PRIVILEGE(r) ? LOG_DEBUG : LOG_WARNING, r, - "Disabling FUSE: Failed to determine FUSE version: %m"); - return false; - } - - /* FUSE is only userns-safe in FUSE version 7.27 and later. - * https://github.com/torvalds/linux/commit/da315f6e03988a7127680bbc26e1028991b899b8 */ - if (fuse_major < 7 || (fuse_major == 7 && fuse_minor < 27)) { - log_debug("Disabling FUSE: FUSE version %" PRIu32 ".%" PRIu32 " is too old to support user namespaces", - fuse_major, fuse_minor); - return false; - } - - return true; -} - static int bind_mount_devnode(const char *from, const char *to) { int r; @@ -2367,7 +2288,7 @@ static int copy_devnode_one(const char *dest, const char *node, bool ignore_mkno return 0; } -static int copy_devnodes(const char *dest, bool enable_fuse) { +static int copy_devnodes(const char *dest) { int r = 0; assert(dest); @@ -2378,7 +2299,10 @@ static int copy_devnodes(const char *dest, bool enable_fuse) { return r; } - if (enable_fuse) { + /* Create /dev/fuse only when it is accessible. The check is necessary, as some custom service + * units that invoke nspawn may enable DevicePolicy= without DeviceAllow= for the device node. */ + _cleanup_close_ int fuse_fd = open("/dev/fuse", O_CLOEXEC|O_RDWR); + if (fuse_fd >= 0) { r = copy_devnode_one(dest, "fuse", /* ignore_mknod_failure = */ false); if (r < 0) return r; @@ -3970,7 +3894,7 @@ static int outer_child( _cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL; _cleanup_strv_free_ char **os_release_pairs = NULL; - bool idmap = false, enable_fuse; + bool idmap = false; const char *p; pid_t pid; ssize_t l; @@ -4314,12 +4238,7 @@ static int outer_child( if (r < 0) return r; - enable_fuse = should_enable_fuse(); - l = send(fd_outer_socket, &enable_fuse, sizeof enable_fuse, 0); - if (l < 0) - return log_error_errno(errno, "Failed to send whether to enable FUSE: %m"); - - r = copy_devnodes(directory, enable_fuse); + r = copy_devnodes(directory); if (r < 0) return r; @@ -5275,7 +5194,6 @@ static int run_container( ssize_t l; sigset_t mask_chld; _cleanup_close_ int child_netns_fd = -EBADF; - bool enable_fuse; assert_se(sigemptyset(&mask_chld) == 0); assert_se(sigaddset(&mask_chld, SIGCHLD) == 0); @@ -5459,12 +5377,6 @@ static int run_container( l, l == 0 ? " The child is most likely dead." : ""); } - l = recv(fd_outer_socket_pair[0], &enable_fuse, sizeof enable_fuse, 0); - if (l < 0) - return log_error_errno(errno, "Failed to read whether to enable FUSE: %m"); - if (l != sizeof enable_fuse) - return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading whether to enable FUSE."); - /* Wait for the outer child. */ r = wait_for_terminate_and_check("(sd-namespace)", *pid, WAIT_LOG_ABNORMAL); if (r < 0) @@ -5619,7 +5531,6 @@ static int run_container( if (arg_register) { RegisterMachineFlags flags = 0; SET_FLAG(flags, REGISTER_MACHINE_KEEP_UNIT, arg_keep_unit); - SET_FLAG(flags, REGISTER_MACHINE_ENABLE_FUSE, enable_fuse); r = register_machine( bus, arg_machine, @@ -5640,7 +5551,6 @@ static int run_container( } else if (!arg_keep_unit) { AllocateScopeFlags flags = ALLOCATE_SCOPE_ALLOW_PIDFD; - SET_FLAG(flags, ALLOCATE_SCOPE_ENABLE_FUSE, enable_fuse); r = allocate_scope( bus, arg_machine,