#include <errno.h>
#include <getopt.h>
+#include <linux/fuse.h>
#include <linux/loop.h>
#if HAVE_SELINUX
#include <selinux/selinux.h>
return mount_nofollow_verbose(LOG_ERR, NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
}
-static int copy_devnodes(const char *dest) {
+static int get_fuse_version(uint32_t *ret_major, uint32_t *ret_minor) {
+ /* Must be called with mount privileges, either via arg_privileged or by being uid=0 in new
+ * CLONE_NEWUSER/CLONE_NEWNS namespaces. This is true when called from outer_child(). */
+ ssize_t n;
+ _cleanup_close_ int fuse_fd = -EBADF, mnt_fd = -EBADF;
+ _cleanup_free_ char *opts = NULL;
+ union {
+ char unstructured[FUSE_MIN_READ_BUFFER];
+ struct {
+ struct fuse_in_header header;
+ /* Don't use <linux/fuse.h>:`struct fuse_init_in` because a newer fuse.h might give
+ * us a bigger struct than what an older kernel actually gives us, and that would
+ * break our .header.len check. */
+ struct {
+ uint32_t major;
+ uint32_t minor;
+ } body;
+ } structured;
+ } request;
+
+ assert(ret_major);
+ assert(ret_minor);
+
+ /* Get a FUSE handle. */
+ fuse_fd = open("/dev/fuse", O_CLOEXEC|O_RDWR);
+ if (fuse_fd < 0)
+ return log_debug_errno(errno, "Failed to open /dev/fuse: %m");
+ if (asprintf(&opts, "fd=%i,rootmode=40000,user_id=0,group_id=0", fuse_fd) < 0)
+ return log_oom_debug();
+ mnt_fd = make_fsmount(LOG_DEBUG, "nspawn-fuse", "fuse.nspawn", 0, opts, -EBADF);
+ if (mnt_fd < 0)
+ return mnt_fd;
+
+ /* Read a request from the FUSE handle. */
+ n = read(fuse_fd, &request.unstructured, sizeof request);
+ if (n < 0)
+ return log_debug_errno(errno, "Failed to read /dev/fuse: %m");
+ if ((size_t) n < sizeof request.structured.header ||
+ (size_t) n < request.structured.header.len)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Failed to read /dev/fuse: Short read");
+
+ /* Assume that the request is a FUSE_INIT request, and return the version information from it. */
+ if (request.structured.header.opcode != FUSE_INIT)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Initial request from /dev/fuse should have opcode=%i (FUSE_INIT), but has opcode=%"PRIu32,
+ FUSE_INIT, request.structured.header.opcode);
+ if (request.structured.header.len < sizeof request.structured)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Initial FUSE_INIT request from /dev/fuse is too short");
+ *ret_major = request.structured.body.major;
+ *ret_minor = request.structured.body.minor;
+ return 0;
+}
+
+static bool should_enable_fuse(void) {
+ uint32_t fuse_major, fuse_minor;
+ int r;
+
+ r = get_fuse_version(&fuse_major, &fuse_minor);
+ if (r < 0) {
+ if (ERRNO_IS_NEG_DEVICE_ABSENT(r))
+ log_debug_errno(r, "Disabling FUSE: FUSE appears to be disabled on the host: %m");
+ else if (r == -ENOSYS)
+ log_debug_errno(r, "Disabling FUSE: Kernel does not support the fsopen() family of syscalls: %m");
+ else
+ log_warning_errno(r, "Disabling FUSE: Failed to determine FUSE version: %m");
+ return false;
+ }
+
+ /* FUSE is only userns-safe in FUSE version 7.27 and later.
+ * https://github.com/torvalds/linux/commit/da315f6e03988a7127680bbc26e1028991b899b8 */
+ if (fuse_major < 7 || (fuse_major == 7 && fuse_minor < 27)) {
+ log_debug("Disabling FUSE: FUSE version %" PRIu32 ".%" PRIu32 " is too old to support user namespaces",
+ fuse_major, fuse_minor);
+ return false;
+ }
+
+ return true;
+}
+
+static int copy_devnodes(const char *dest, bool enable_fuse) {
_cleanup_strv_free_ char **devnodes = NULL;
int r = 0;
"random",
"urandom",
"tty",
+ STRV_IFNOTNULL(enable_fuse ? "fuse" : NULL),
"net/tun");
if (!devnodes)
return log_oom();
_cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL;
_cleanup_strv_free_ char **os_release_pairs = NULL;
_cleanup_close_ int fd = -EBADF, mntns_fd = -EBADF;
- bool idmap = false;
+ bool idmap = false, enable_fuse;
const char *p;
pid_t pid;
ssize_t l;
if (r < 0)
return r;
- r = copy_devnodes(directory);
+ enable_fuse = should_enable_fuse();
+ l = send(fd_outer_socket, &enable_fuse, sizeof enable_fuse, 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send whether to enable FUSE: %m");
+
+ r = copy_devnodes(directory, enable_fuse);
if (r < 0)
return r;
ssize_t l;
sigset_t mask_chld;
_cleanup_close_ int child_netns_fd = -EBADF;
+ bool enable_fuse;
assert_se(sigemptyset(&mask_chld) == 0);
assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
l, l == 0 ? " The child is most likely dead." : "");
}
+ l = recv(fd_outer_socket_pair[0], &enable_fuse, sizeof enable_fuse, 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to read whether to enable FUSE: %m");
+ if (l != sizeof enable_fuse)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading whether to enable FUSE.");
+
/* Wait for the outer child. */
r = wait_for_terminate_and_check("(sd-namespace)", *pid, WAIT_LOG_ABNORMAL);
if (r < 0)
if (arg_register) {
RegisterMachineFlags flags = 0;
SET_FLAG(flags, REGISTER_MACHINE_KEEP_UNIT, arg_keep_unit);
+ SET_FLAG(flags, REGISTER_MACHINE_ENABLE_FUSE, enable_fuse);
r = register_machine(
bus,
arg_machine,
} else if (!arg_keep_unit) {
AllocateScopeFlags flags = ALLOCATE_SCOPE_ALLOW_PIDFD;
+ SET_FLAG(flags, ALLOCATE_SCOPE_ENABLE_FUSE, enable_fuse);
r = allocate_scope(
bus,
arg_machine,
echo hello | cmp "$tmpdir/stdout.txt" -
}
+testcase_fuse() {
+ if [[ "$(cat <>/dev/fuse 2>&1)" != 'cat: -: Operation not permitted' ]]; then
+ echo "FUSE is not supported, skipping the test..."
+ return 0
+ fi
+
+ # Assume that the tests are running on a kernel that is new enough for FUSE
+ # to have user-namespace support; and so we should expect that nspawn
+ # enables FUSE. This test does not validate that the version check
+ # disables FUSE on old kernels.
+
+ local root
+
+ root="$(mktemp -d /var/lib/machines/TEST-13-NSPAWN.fuse.XXX)"
+ create_dummy_container "$root"
+
+ # To avoid adding any complex dependencies to the test, we simply check
+ # that /dev/fuse can be opened for reading and writing (O_RDWR), but that
+ # actually reading from it fails with EPERM. This can be done with a
+ # simple Bash script: run `cat <>/dev/fuse` and if the EPERM error message
+ # comes from "bash" then we know it couldn't be opened, while if it comes
+ # from "cat" then we know that it was opened but not read. If we are able
+ # to read from the file, then this indicates that it's not a real FUSE
+ # device (which requires us to mount a type="fuse" filesystem with the
+ # option string "fd=${num}" for /dev/fuse FD before reading from it will
+ # return anything other than EPERM); if this happens then most likely
+ # nspawn didn't create the file at all and Bash "<>" simply created a new
+ # normal file.
+ #
+ # "cat: -: Operation not permitted" # pass the test; opened but not read
+ # "bash: line 1: /dev/fuse: Operation not permitted" # fail the test; could not open
+ # "" # fail the test; reading worked
+ [[ "$(systemd-nspawn --pipe --directory="$root" \
+ bash -c 'cat <>/dev/fuse' 2>&1)" == 'cat: -: Operation not permitted' ]]
+
+ rm -fr "$root"
+}
+
+testcase_unpriv_fuse() {
+ # Same as above, but for unprivileged operation.
+
+ if [[ "$(cat <>/dev/fuse 2>&1)" != 'cat: -: Operation not permitted' ]]; then
+ echo "FUSE is not supported, skipping the test..."
+ return 0
+ fi
+ if ! can_do_rootless_nspawn; then
+ echo "Skipping rootless test..."
+ return 0
+ fi
+
+ local tmpdir name
+ tmpdir="$(mktemp -d /var/tmp/TEST-13-NSPAWN.unpriv-fuse.XXX)"
+ # $name must be such that len("ns-$(id -u testuser)-nspawn-${name}-65535")
+ # <= 31, or nsresourced will reject the request for a namespace.
+ # Therefore; len($name) <= 10 bytes.
+ name="ufuse-${tmpdir##*.}"
+ trap 'rm -fr ${tmpdir@Q} || true; rm -f /run/verity.d/test-13-nspawn-${name@Q} || true' RETURN ERR
+ create_dummy_ddi "$tmpdir" "$name"
+ chown --recursive testuser: "$tmpdir"
+
+ [[ "$(systemd-run \
+ --pipe \
+ --uid=testuser \
+ --property=Delegate=yes \
+ --setenv=SYSTEMD_LOG_LEVEL \
+ --setenv=SYSTEMD_LOG_TARGET \
+ -- \
+ systemd-nspawn --pipe --private-network --register=no --keep-unit --image="$tmpdir/$name.raw" \
+ bash -c 'cat <>/dev/fuse' 2>&1)" == *'cat: -: Operation not permitted' ]]
+}
+
run_testcases