]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
fuse: {io-uring} Handle SQEs - register commands
authorBernd Schubert <bschubert@ddn.com>
Mon, 20 Jan 2025 01:28:59 +0000 (02:28 +0100)
committerMiklos Szeredi <mszeredi@redhat.com>
Fri, 24 Jan 2025 10:54:08 +0000 (11:54 +0100)
This adds basic support for ring SQEs (with opcode=IORING_OP_URING_CMD).
For now only FUSE_IO_URING_CMD_REGISTER is handled to register queue
entries.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com> # io_uring
Reviewed-by: Luis Henriques <luis@igalia.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
fs/fuse/Kconfig
fs/fuse/Makefile
fs/fuse/dev_uring.c [new file with mode: 0644]
fs/fuse/dev_uring_i.h [new file with mode: 0644]
fs/fuse/fuse_i.h
fs/fuse/inode.c
include/uapi/linux/fuse.h

index 8674dbfbe59dbf79c304c587b08ebba3cfe405be..ca215a3cba3e310d1359d069202193acdcdb172b 100644 (file)
@@ -63,3 +63,15 @@ config FUSE_PASSTHROUGH
          to be performed directly on a backing file.
 
          If you want to allow passthrough operations, answer Y.
+
+config FUSE_IO_URING
+       bool "FUSE communication over io-uring"
+       default y
+       depends on FUSE_FS
+       depends on IO_URING
+       help
+         This allows sending FUSE requests over the io-uring interface and
+          also adds request core affinity.
+
+         If you want to allow fuse server/client communication through io-uring,
+         answer Y
index 2c372180d631eb340eca36f19ee2c2686de9714d..3f0f312a31c1cc200c0c91a086b30a8318e39d94 100644 (file)
@@ -15,5 +15,6 @@ fuse-y += iomode.o
 fuse-$(CONFIG_FUSE_DAX) += dax.o
 fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
 fuse-$(CONFIG_SYSCTL) += sysctl.o
+fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
 
 virtiofs-y := virtio_fs.o
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
new file mode 100644 (file)
index 0000000..42092a2
--- /dev/null
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (c) 2023-2024 DataDirect Networks.
+ */
+
+#include "fuse_i.h"
+#include "dev_uring_i.h"
+#include "fuse_dev_i.h"
+
+#include <linux/fs.h>
+#include <linux/io_uring/cmd.h>
+
+static bool __read_mostly enable_uring;
+module_param(enable_uring, bool, 0644);
+MODULE_PARM_DESC(enable_uring,
+                "Enable userspace communication through io-uring");
+
+#define FUSE_URING_IOV_SEGS 2 /* header and payload */
+
+
+bool fuse_uring_enabled(void)
+{
+       return enable_uring;
+}
+
+void fuse_uring_destruct(struct fuse_conn *fc)
+{
+       struct fuse_ring *ring = fc->ring;
+       int qid;
+
+       if (!ring)
+               return;
+
+       for (qid = 0; qid < ring->nr_queues; qid++) {
+               struct fuse_ring_queue *queue = ring->queues[qid];
+
+               if (!queue)
+                       continue;
+
+               WARN_ON(!list_empty(&queue->ent_avail_queue));
+               WARN_ON(!list_empty(&queue->ent_commit_queue));
+
+               kfree(queue);
+               ring->queues[qid] = NULL;
+       }
+
+       kfree(ring->queues);
+       kfree(ring);
+       fc->ring = NULL;
+}
+
+/*
+ * Basic ring setup for this connection based on the provided configuration
+ */
+static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
+{
+       struct fuse_ring *ring;
+       size_t nr_queues = num_possible_cpus();
+       struct fuse_ring *res = NULL;
+       size_t max_payload_size;
+
+       ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT);
+       if (!ring)
+               return NULL;
+
+       ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *),
+                              GFP_KERNEL_ACCOUNT);
+       if (!ring->queues)
+               goto out_err;
+
+       max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write);
+       max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE);
+
+       spin_lock(&fc->lock);
+       if (fc->ring) {
+               /* race, another thread created the ring in the meantime */
+               spin_unlock(&fc->lock);
+               res = fc->ring;
+               goto out_err;
+       }
+
+       fc->ring = ring;
+       ring->nr_queues = nr_queues;
+       ring->fc = fc;
+       ring->max_payload_sz = max_payload_size;
+
+       spin_unlock(&fc->lock);
+       return ring;
+
+out_err:
+       kfree(ring->queues);
+       kfree(ring);
+       return res;
+}
+
+static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
+                                                      int qid)
+{
+       struct fuse_conn *fc = ring->fc;
+       struct fuse_ring_queue *queue;
+
+       queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
+       if (!queue)
+               return NULL;
+       queue->qid = qid;
+       queue->ring = ring;
+       spin_lock_init(&queue->lock);
+
+       INIT_LIST_HEAD(&queue->ent_avail_queue);
+       INIT_LIST_HEAD(&queue->ent_commit_queue);
+
+       spin_lock(&fc->lock);
+       if (ring->queues[qid]) {
+               spin_unlock(&fc->lock);
+               kfree(queue);
+               return ring->queues[qid];
+       }
+
+       /*
+        * write_once and lock as the caller mostly doesn't take the lock at all
+        */
+       WRITE_ONCE(ring->queues[qid], queue);
+       spin_unlock(&fc->lock);
+
+       return queue;
+}
+
+/*
+ * Make a ring entry available for fuse_req assignment
+ */
+static void fuse_uring_ent_avail(struct fuse_ring_ent *ent,
+                                struct fuse_ring_queue *queue)
+{
+       WARN_ON_ONCE(!ent->cmd);
+       list_move(&ent->list, &queue->ent_avail_queue);
+       ent->state = FRRS_AVAILABLE;
+}
+
+/*
+ * fuse_uring_req_fetch command handling
+ */
+static void fuse_uring_do_register(struct fuse_ring_ent *ent,
+                                  struct io_uring_cmd *cmd,
+                                  unsigned int issue_flags)
+{
+       struct fuse_ring_queue *queue = ent->queue;
+
+       spin_lock(&queue->lock);
+       ent->cmd = cmd;
+       fuse_uring_ent_avail(ent, queue);
+       spin_unlock(&queue->lock);
+}
+
+/*
+ * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
+ * the payload
+ */
+static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
+                                        struct iovec iov[FUSE_URING_IOV_SEGS])
+{
+       struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       struct iov_iter iter;
+       ssize_t ret;
+
+       if (sqe->len != FUSE_URING_IOV_SEGS)
+               return -EINVAL;
+
+       /*
+        * Direction for buffer access will actually be READ and WRITE,
+        * using write for the import should include READ access as well.
+        */
+       ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
+                          FUSE_URING_IOV_SEGS, &iov, &iter);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+static struct fuse_ring_ent *
+fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
+                          struct fuse_ring_queue *queue)
+{
+       struct fuse_ring *ring = queue->ring;
+       struct fuse_ring_ent *ent;
+       size_t payload_size;
+       struct iovec iov[FUSE_URING_IOV_SEGS];
+       int err;
+
+       err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
+       if (err) {
+               pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
+                                   err);
+               return ERR_PTR(err);
+       }
+
+       err = -EINVAL;
+       if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
+               pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
+               return ERR_PTR(err);
+       }
+
+       payload_size = iov[1].iov_len;
+       if (payload_size < ring->max_payload_sz) {
+               pr_info_ratelimited("Invalid req payload len %zu\n",
+                                   payload_size);
+               return ERR_PTR(err);
+       }
+
+       err = -ENOMEM;
+       ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
+       if (!ent)
+               return ERR_PTR(err);
+
+       INIT_LIST_HEAD(&ent->list);
+
+       ent->queue = queue;
+       ent->headers = iov[0].iov_base;
+       ent->payload = iov[1].iov_base;
+
+       return ent;
+}
+
+/*
+ * Register header and payload buffer with the kernel and puts the
+ * entry as "ready to get fuse requests" on the queue
+ */
+static int fuse_uring_register(struct io_uring_cmd *cmd,
+                              unsigned int issue_flags, struct fuse_conn *fc)
+{
+       const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
+       struct fuse_ring *ring = fc->ring;
+       struct fuse_ring_queue *queue;
+       struct fuse_ring_ent *ent;
+       int err;
+       unsigned int qid = READ_ONCE(cmd_req->qid);
+
+       err = -ENOMEM;
+       if (!ring) {
+               ring = fuse_uring_create(fc);
+               if (!ring)
+                       return err;
+       }
+
+       if (qid >= ring->nr_queues) {
+               pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid);
+               return -EINVAL;
+       }
+
+       queue = ring->queues[qid];
+       if (!queue) {
+               queue = fuse_uring_create_queue(ring, qid);
+               if (!queue)
+                       return err;
+       }
+
+       /*
+        * The created queue above does not need to be destructed in
+        * case of entry errors below, will be done at ring destruction time.
+        */
+
+       ent = fuse_uring_create_ring_ent(cmd, queue);
+       if (IS_ERR(ent))
+               return PTR_ERR(ent);
+
+       fuse_uring_do_register(ent, cmd, issue_flags);
+
+       return 0;
+}
+
+/*
+ * Entry function from io_uring to handle the given passthrough command
+ * (op code IORING_OP_URING_CMD)
+ */
+int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd,
+                                 unsigned int issue_flags)
+{
+       struct fuse_dev *fud;
+       struct fuse_conn *fc;
+       u32 cmd_op = cmd->cmd_op;
+       int err;
+
+       if (!enable_uring) {
+               pr_info_ratelimited("fuse-io-uring is disabled\n");
+               return -EOPNOTSUPP;
+       }
+
+       /* This extra SQE size holds struct fuse_uring_cmd_req */
+       if (!(issue_flags & IO_URING_F_SQE128))
+               return -EINVAL;
+
+       fud = fuse_get_dev(cmd->file);
+       if (!fud) {
+               pr_info_ratelimited("No fuse device found\n");
+               return -ENOTCONN;
+       }
+       fc = fud->fc;
+
+       if (fc->aborted)
+               return -ECONNABORTED;
+       if (!fc->connected)
+               return -ENOTCONN;
+
+       /*
+        * fuse_uring_register() needs the ring to be initialized,
+        * we need to know the max payload size
+        */
+       if (!fc->initialized)
+               return -EAGAIN;
+
+       switch (cmd_op) {
+       case FUSE_IO_URING_CMD_REGISTER:
+               err = fuse_uring_register(cmd, issue_flags, fc);
+               if (err) {
+                       pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n",
+                                    err);
+                       return err;
+               }
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return -EIOCBQUEUED;
+}
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
new file mode 100644 (file)
index 0000000..ae15363
--- /dev/null
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * FUSE: Filesystem in Userspace
+ * Copyright (c) 2023-2024 DataDirect Networks.
+ */
+
+#ifndef _FS_FUSE_DEV_URING_I_H
+#define _FS_FUSE_DEV_URING_I_H
+
+#include "fuse_i.h"
+
+#ifdef CONFIG_FUSE_IO_URING
+
+enum fuse_ring_req_state {
+       FRRS_INVALID = 0,
+
+       /* The ring entry received from userspace and it is being processed */
+       FRRS_COMMIT,
+
+       /* The ring entry is waiting for new fuse requests */
+       FRRS_AVAILABLE,
+
+       /* The ring entry is in or on the way to user space */
+       FRRS_USERSPACE,
+};
+
+/** A fuse ring entry, part of the ring queue */
+struct fuse_ring_ent {
+       /* userspace buffer */
+       struct fuse_uring_req_header __user *headers;
+       void __user *payload;
+
+       /* the ring queue that owns the request */
+       struct fuse_ring_queue *queue;
+
+       /* fields below are protected by queue->lock */
+
+       struct io_uring_cmd *cmd;
+
+       struct list_head list;
+
+       enum fuse_ring_req_state state;
+
+       struct fuse_req *fuse_req;
+};
+
+struct fuse_ring_queue {
+       /*
+        * back pointer to the main fuse uring structure that holds this
+        * queue
+        */
+       struct fuse_ring *ring;
+
+       /* queue id, corresponds to the cpu core */
+       unsigned int qid;
+
+       /*
+        * queue lock, taken when any value in the queue changes _and_ also
+        * a ring entry state changes.
+        */
+       spinlock_t lock;
+
+       /* available ring entries (struct fuse_ring_ent) */
+       struct list_head ent_avail_queue;
+
+       /*
+        * entries in the process of being committed or in the process
+        * to be sent to userspace
+        */
+       struct list_head ent_commit_queue;
+};
+
+/**
+ * Describes if uring is for communication and holds alls the data needed
+ * for uring communication
+ */
+struct fuse_ring {
+       /* back pointer */
+       struct fuse_conn *fc;
+
+       /* number of ring queues */
+       size_t nr_queues;
+
+       /* maximum payload/arg size */
+       size_t max_payload_sz;
+
+       struct fuse_ring_queue **queues;
+};
+
+bool fuse_uring_enabled(void);
+void fuse_uring_destruct(struct fuse_conn *fc);
+int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
+
+#else /* CONFIG_FUSE_IO_URING */
+
+struct fuse_ring;
+
+static inline void fuse_uring_create(struct fuse_conn *fc)
+{
+}
+
+static inline void fuse_uring_destruct(struct fuse_conn *fc)
+{
+}
+
+static inline bool fuse_uring_enabled(void)
+{
+       return false;
+}
+
+#endif /* CONFIG_FUSE_IO_URING */
+
+#endif /* _FS_FUSE_DEV_URING_I_H */
index 5666900bee5e28cacfb03728f84571f5c0c94784..bce8cc482d6425a64c930c9b646d2f74e81323c8 100644 (file)
@@ -923,6 +923,11 @@ struct fuse_conn {
        /** IDR for backing files ids */
        struct idr backing_files_map;
 #endif
+
+#ifdef CONFIG_FUSE_IO_URING
+       /**  uring connection information*/
+       struct fuse_ring *ring;
+#endif
 };
 
 /*
index 3ce4f4e81d09e867c3a7db7b1dbb819f88ed34ef..e4f9bbacfc1bc6f51d5d01b4c47b42cc159ed783 100644 (file)
@@ -7,6 +7,7 @@
 */
 
 #include "fuse_i.h"
+#include "dev_uring_i.h"
 
 #include <linux/pagemap.h>
 #include <linux/slab.h>
@@ -992,6 +993,8 @@ static void delayed_release(struct rcu_head *p)
 {
        struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu);
 
+       fuse_uring_destruct(fc);
+
        put_user_ns(fc->user_ns);
        fc->release(fc);
 }
@@ -1446,6 +1449,13 @@ void fuse_send_init(struct fuse_mount *fm)
        if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
                flags |= FUSE_PASSTHROUGH;
 
+       /*
+        * This is just an information flag for fuse server. No need to check
+        * the reply - server is either sending IORING_OP_URING_CMD or not.
+        */
+       if (fuse_uring_enabled())
+               flags |= FUSE_OVER_IO_URING;
+
        ia->in.flags = flags;
        ia->in.flags2 = flags >> 32;
 
index f1e99458e29e4fdce5273bc3def242342f207ebd..5e0eb41d967e9de5951673de4405a3ed22cdd8e2 100644 (file)
  *
  *  7.41
  *  - add FUSE_ALLOW_IDMAP
+ *  7.42
+ *  - Add FUSE_OVER_IO_URING and all other io-uring related flags and data
+ *    structures:
+ *    - struct fuse_uring_ent_in_out
+ *    - struct fuse_uring_req_header
+ *    - struct fuse_uring_cmd_req
+ *    - FUSE_URING_IN_OUT_HEADER_SZ
+ *    - FUSE_URING_OP_IN_OUT_SZ
+ *    - enum fuse_uring_cmd
  */
 
 #ifndef _LINUX_FUSE_H
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 41
+#define FUSE_KERNEL_MINOR_VERSION 42
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -425,6 +434,7 @@ struct fuse_file_lock {
  * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
  *                 of the request ID indicates resend requests
  * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
+ * FUSE_OVER_IO_URING: Indicate that client supports io-uring
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -471,6 +481,7 @@ struct fuse_file_lock {
 /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
 #define FUSE_DIRECT_IO_RELAX   FUSE_DIRECT_IO_ALLOW_MMAP
 #define FUSE_ALLOW_IDMAP       (1ULL << 40)
+#define FUSE_OVER_IO_URING     (1ULL << 41)
 
 /**
  * CUSE INIT request/reply flags
@@ -1206,4 +1217,67 @@ struct fuse_supp_groups {
        uint32_t        groups[];
 };
 
+/**
+ * Size of the ring buffer header
+ */
+#define FUSE_URING_IN_OUT_HEADER_SZ 128
+#define FUSE_URING_OP_IN_OUT_SZ 128
+
+/* Used as part of the fuse_uring_req_header */
+struct fuse_uring_ent_in_out {
+       uint64_t flags;
+
+       /*
+        * commit ID to be used in a reply to a ring request (see also
+        * struct fuse_uring_cmd_req)
+        */
+       uint64_t commit_id;
+
+       /* size of user payload buffer */
+       uint32_t payload_sz;
+       uint32_t padding;
+
+       uint64_t reserved;
+};
+
+/**
+ * Header for all fuse-io-uring requests
+ */
+struct fuse_uring_req_header {
+       /* struct fuse_in_header / struct fuse_out_header */
+       char in_out[FUSE_URING_IN_OUT_HEADER_SZ];
+
+       /* per op code header */
+       char op_in[FUSE_URING_OP_IN_OUT_SZ];
+
+       struct fuse_uring_ent_in_out ring_ent_in_out;
+};
+
+/**
+ * sqe commands to the kernel
+ */
+enum fuse_uring_cmd {
+       FUSE_IO_URING_CMD_INVALID = 0,
+
+       /* register the request buffer and fetch a fuse request */
+       FUSE_IO_URING_CMD_REGISTER = 1,
+
+       /* commit fuse request result and fetch next request */
+       FUSE_IO_URING_CMD_COMMIT_AND_FETCH = 2,
+};
+
+/**
+ * In the 80B command area of the SQE.
+ */
+struct fuse_uring_cmd_req {
+       uint64_t flags;
+
+       /* entry identifier for commits */
+       uint64_t commit_id;
+
+       /* queue the command is for (queue index) */
+       uint16_t qid;
+       uint8_t padding[6];
+};
+
 #endif /* _LINUX_FUSE_H */