]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
nspawn: enable FUSE unconditionally
authorYu Watanabe <watanabe.yu+github@gmail.com>
Mon, 17 Feb 2025 06:06:10 +0000 (15:06 +0900)
committerYu Watanabe <watanabe.yu+github@gmail.com>
Tue, 18 Feb 2025 14:24:20 +0000 (23:24 +0900)
FUSE is userns-safe since kernel v4.18 (da315f6e03988a7127680bbc26e1028991b899b8),
and now our kernel base line is 5.4. Let's drop the logic of checking
the version of FUSE, and unconditionally enable FUSE.

src/nspawn/nspawn-register.c
src/nspawn/nspawn-register.h
src/nspawn/nspawn.c

index 4193a338137de3ba7fe917fffc465b3498b60fc1..0387e0b7836e2d500b3721222616f59c30c938ee 100644 (file)
@@ -15,7 +15,6 @@
 
 static int append_machine_properties(
                 sd_bus_message *m,
-                bool enable_fuse,
                 CustomMount *mounts,
                 unsigned n_mounts,
                 int kill_signal,
@@ -31,21 +30,17 @@ static int append_machine_properties(
 
         /* If you make changes here, also make sure to update systemd-nspawn@.service, to keep the device
          * policies in sync regardless if we are run with or without the --keep-unit switch. */
-        r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2,
-                                  /* Allow the container to access and create the API device nodes, so that
-                                   * PrivateDevices= in the container can work fine */
+        r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 3,
+                                  /* Allow the container to access and create the API device node, so that
+                                   * PrivateDevices= in the container can work fine. */
                                   "/dev/net/tun", "rwm",
-                                  /* Allow the container access to ptys. However, do not permit the container
+                                  /* Allow the container to access ptys. However, do not permit the container
                                    * to ever create these device nodes. */
-                                  "char-pts", "rw");
+                                  "char-pts", "rw",
+                                  /* Allow the container to access and create the FUSE API device node. */
+                                  "/dev/fuse", "rwm");
         if (r < 0)
                 return bus_log_create_error(r);
-        if (enable_fuse) {
-                r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1,
-                                          "/dev/fuse", "rwm");
-                if (r < 0)
-                        return bus_log_create_error(r);
-        }
 
         FOREACH_ARRAY(cm, mounts, n_mounts) {
                 if (cm->type != CUSTOM_MOUNT_BIND)
@@ -204,7 +199,6 @@ int register_machine(
 
                 r = append_machine_properties(
                                 m,
-                                FLAGS_SET(flags, REGISTER_MACHINE_ENABLE_FUSE),
                                 mounts,
                                 n_mounts,
                                 kill_signal,
@@ -325,7 +319,6 @@ int allocate_scope(
 
         r = append_machine_properties(
                         m,
-                        FLAGS_SET(flags, ALLOCATE_SCOPE_ENABLE_FUSE),
                         mounts,
                         n_mounts,
                         kill_signal,
index 5e187e33bb8727c99e104aca800c602e0b79ad98..89e35f02a781688273e45ba40d96679a307d0264 100644 (file)
@@ -10,7 +10,6 @@
 
 typedef enum RegisterMachineFlags {
         REGISTER_MACHINE_KEEP_UNIT   = 1 << 0,
-        REGISTER_MACHINE_ENABLE_FUSE = 1 << 1,
 } RegisterMachineFlags;
 
 int register_machine(
@@ -32,7 +31,6 @@ int unregister_machine(sd_bus *bus, const char *machine_name);
 
 typedef enum AllocateScopeFlags {
         ALLOCATE_SCOPE_ALLOW_PIDFD = 1 << 0,
-        ALLOCATE_SCOPE_ENABLE_FUSE = 1 << 1,
 } AllocateScopeFlags;
 
 int allocate_scope(
index 8fa05b9bc2346b32446bd2eb1a8305c9b6d59b35..31757f4ee102af44c38b3cd122fb3457bac4de13 100644 (file)
@@ -2170,85 +2170,6 @@ static int setup_boot_id(void) {
         return mount_nofollow_verbose(LOG_ERR, NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
 }
 
-static int get_fuse_version(uint32_t *ret_major, uint32_t *ret_minor) {
-        /* Must be called with mount privileges, either via arg_privileged or by being uid=0 in new
-         * CLONE_NEWUSER/CLONE_NEWNS namespaces. This is true when called from outer_child(). */
-        ssize_t n;
-        _cleanup_close_ int fuse_fd = -EBADF, mnt_fd = -EBADF;
-        _cleanup_free_ char *opts = NULL;
-        union {
-                char unstructured[FUSE_MIN_READ_BUFFER];
-                struct {
-                        struct fuse_in_header header;
-                        /* Don't use <linux/fuse.h>:`struct fuse_init_in` because a newer fuse.h might give
-                         * us a bigger struct than what an older kernel actually gives us, and that would
-                         * break our .header.len check. */
-                        struct {
-                                uint32_t major;
-                                uint32_t minor;
-                        } body;
-                } structured;
-        } request;
-
-        assert(ret_major);
-        assert(ret_minor);
-
-        /* Get a FUSE handle. */
-        fuse_fd = open("/dev/fuse", O_CLOEXEC|O_RDWR);
-        if (fuse_fd < 0)
-                return log_debug_errno(errno, "Failed to open /dev/fuse: %m");
-        if (asprintf(&opts, "fd=%i,rootmode=40000,user_id=0,group_id=0", fuse_fd) < 0)
-                return log_oom_debug();
-        mnt_fd = make_fsmount(LOG_DEBUG, "nspawn-fuse", "fuse.nspawn", 0, opts, -EBADF);
-        if (mnt_fd < 0)
-                return mnt_fd;
-
-        /* Read a request from the FUSE handle. */
-        n = read(fuse_fd, &request.unstructured, sizeof request);
-        if (n < 0)
-                return log_debug_errno(errno, "Failed to read /dev/fuse: %m");
-        if ((size_t) n < sizeof request.structured.header ||
-            (size_t) n < request.structured.header.len)
-                return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Failed to read /dev/fuse: Short read");
-
-        /* Assume that the request is a FUSE_INIT request, and return the version information from it. */
-        if (request.structured.header.opcode != FUSE_INIT)
-                return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Initial request from /dev/fuse should have opcode=%i (FUSE_INIT), but has opcode=%"PRIu32,
-                                       FUSE_INIT, request.structured.header.opcode);
-        if (request.structured.header.len < sizeof request.structured)
-                return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Initial FUSE_INIT request from /dev/fuse is too short");
-        *ret_major = request.structured.body.major;
-        *ret_minor = request.structured.body.minor;
-        return 0;
-}
-
-static bool should_enable_fuse(void) {
-        uint32_t fuse_major, fuse_minor;
-        int r;
-
-        r = get_fuse_version(&fuse_major, &fuse_minor);
-        if (r < 0) {
-                if (ERRNO_IS_NEG_DEVICE_ABSENT(r))
-                        log_debug_errno(r, "Disabling FUSE: FUSE appears to be disabled on the host: %m");
-                else if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
-                        log_debug_errno(r, "Disabling FUSE: Kernel does not support the fsopen() family of syscalls: %m");
-                else
-                        log_full_errno(ERRNO_IS_NEG_PRIVILEGE(r) ? LOG_DEBUG : LOG_WARNING, r,
-                                       "Disabling FUSE: Failed to determine FUSE version: %m");
-                return false;
-        }
-
-        /* FUSE is only userns-safe in FUSE version 7.27 and later.
-         * https://github.com/torvalds/linux/commit/da315f6e03988a7127680bbc26e1028991b899b8 */
-        if (fuse_major < 7 || (fuse_major == 7 && fuse_minor < 27)) {
-                log_debug("Disabling FUSE: FUSE version %" PRIu32 ".%" PRIu32 " is too old to support user namespaces",
-                          fuse_major, fuse_minor);
-                return false;
-        }
-
-        return true;
-}
-
 static int bind_mount_devnode(const char *from, const char *to) {
         int r;
 
@@ -2367,7 +2288,7 @@ static int copy_devnode_one(const char *dest, const char *node, bool ignore_mkno
         return 0;
 }
 
-static int copy_devnodes(const char *dest, bool enable_fuse) {
+static int copy_devnodes(const char *dest) {
         int r = 0;
 
         assert(dest);
@@ -2378,7 +2299,10 @@ static int copy_devnodes(const char *dest, bool enable_fuse) {
                         return r;
         }
 
-        if (enable_fuse) {
+        /* Create /dev/fuse only when it is accessible. The check is necessary, as some custom service
+         * units that invoke nspawn may enable DevicePolicy= without DeviceAllow= for the device node. */
+        _cleanup_close_ int fuse_fd = open("/dev/fuse", O_CLOEXEC|O_RDWR);
+        if (fuse_fd >= 0) {
                 r = copy_devnode_one(dest, "fuse", /* ignore_mknod_failure = */ false);
                 if (r < 0)
                         return r;
@@ -3970,7 +3894,7 @@ static int outer_child(
 
         _cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL;
         _cleanup_strv_free_ char **os_release_pairs = NULL;
-        bool idmap = false, enable_fuse;
+        bool idmap = false;
         const char *p;
         pid_t pid;
         ssize_t l;
@@ -4314,12 +4238,7 @@ static int outer_child(
         if (r < 0)
                 return r;
 
-        enable_fuse = should_enable_fuse();
-        l = send(fd_outer_socket, &enable_fuse, sizeof enable_fuse, 0);
-        if (l < 0)
-                return log_error_errno(errno, "Failed to send whether to enable FUSE: %m");
-
-        r = copy_devnodes(directory, enable_fuse);
+        r = copy_devnodes(directory);
         if (r < 0)
                 return r;
 
@@ -5275,7 +5194,6 @@ static int run_container(
         ssize_t l;
         sigset_t mask_chld;
         _cleanup_close_ int child_netns_fd = -EBADF;
-        bool enable_fuse;
 
         assert_se(sigemptyset(&mask_chld) == 0);
         assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
@@ -5459,12 +5377,6 @@ static int run_container(
                                                l, l == 0 ? " The child is most likely dead." : "");
         }
 
-        l = recv(fd_outer_socket_pair[0], &enable_fuse, sizeof enable_fuse, 0);
-        if (l < 0)
-                return log_error_errno(errno, "Failed to read whether to enable FUSE: %m");
-        if (l != sizeof enable_fuse)
-                return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading whether to enable FUSE.");
-
         /* Wait for the outer child. */
         r = wait_for_terminate_and_check("(sd-namespace)", *pid, WAIT_LOG_ABNORMAL);
         if (r < 0)
@@ -5619,7 +5531,6 @@ static int run_container(
         if (arg_register) {
                 RegisterMachineFlags flags = 0;
                 SET_FLAG(flags, REGISTER_MACHINE_KEEP_UNIT, arg_keep_unit);
-                SET_FLAG(flags, REGISTER_MACHINE_ENABLE_FUSE, enable_fuse);
                 r = register_machine(
                                 bus,
                                 arg_machine,
@@ -5640,7 +5551,6 @@ static int run_container(
 
         } else if (!arg_keep_unit) {
                 AllocateScopeFlags flags = ALLOCATE_SCOPE_ALLOW_PIDFD;
-                SET_FLAG(flags, ALLOCATE_SCOPE_ENABLE_FUSE, enable_fuse);
                 r = allocate_scope(
                                 bus,
                                 arg_machine,