]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
ublk: handle UBLK_U_IO_PREP_IO_CMDS
authorMing Lei <ming.lei@redhat.com>
Fri, 16 Jan 2026 14:18:37 +0000 (22:18 +0800)
committerJens Axboe <axboe@kernel.dk>
Fri, 23 Jan 2026 03:05:40 +0000 (20:05 -0700)
This commit implements the handling of the UBLK_U_IO_PREP_IO_CMDS command,
which allows userspace to prepare a batch of I/O requests.

The core of this change is the `ublk_walk_cmd_buf` function, which iterates
over the elements in the uring_cmd fixed buffer. For each element, it parses
the I/O details, finds the corresponding `ublk_io` structure, and prepares it
for future dispatch.

Add per-io lock for protecting concurrent delivery and committing.

Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/block/ublk_drv.c
include/uapi/linux/ublk_cmd.h

index 22c7296d90f39d755a71e489ab5ac8d3748f3b42..a3840b3f108110932a34c145fdabb75283912236 100644 (file)
@@ -208,6 +208,7 @@ struct ublk_io {
        unsigned task_registered_buffers;
 
        void *buf_ctx_handle;
+       spinlock_t lock;
 } ____cacheline_aligned_in_smp;
 
 struct ublk_queue {
@@ -280,6 +281,16 @@ static inline bool ublk_dev_support_batch_io(const struct ublk_device *ub)
        return false;
 }
 
+static inline void ublk_io_lock(struct ublk_io *io)
+{
+       spin_lock(&io->lock);
+}
+
+static inline void ublk_io_unlock(struct ublk_io *io)
+{
+       spin_unlock(&io->lock);
+}
+
 static inline struct ublksrv_io_desc *
 ublk_get_iod(const struct ublk_queue *ubq, unsigned tag)
 {
@@ -2699,6 +2710,171 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
        return ublk_ch_uring_cmd_local(cmd, issue_flags);
 }
 
+static inline __u64 ublk_batch_buf_addr(const struct ublk_batch_io *uc,
+                                       const struct ublk_elem_header *elem)
+{
+       const void *buf = elem;
+
+       if (uc->flags & UBLK_BATCH_F_HAS_BUF_ADDR)
+               return *(const __u64 *)(buf + sizeof(*elem));
+       return 0;
+}
+
+static struct ublk_auto_buf_reg
+ublk_batch_auto_buf_reg(const struct ublk_batch_io *uc,
+                       const struct ublk_elem_header *elem)
+{
+       struct ublk_auto_buf_reg reg = {
+               .index = elem->buf_index,
+               .flags = (uc->flags & UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK) ?
+                       UBLK_AUTO_BUF_REG_FALLBACK : 0,
+       };
+
+       return reg;
+}
+
+/*
+ * 48 can hold any type of buffer element(8, 16 and 24 bytes) because
+ * it is the least common multiple(LCM) of 8, 16 and 24
+ */
+#define UBLK_CMD_BATCH_TMP_BUF_SZ  (48 * 10)
+struct ublk_batch_io_iter {
+       void __user *uaddr;
+       unsigned done, total;
+       unsigned char elem_bytes;
+       /* copy to this buffer from user space */
+       unsigned char buf[UBLK_CMD_BATCH_TMP_BUF_SZ];
+};
+
+static inline int
+__ublk_walk_cmd_buf(struct ublk_queue *ubq,
+                   struct ublk_batch_io_iter *iter,
+                   const struct ublk_batch_io_data *data,
+                   unsigned bytes,
+                   int (*cb)(struct ublk_queue *q,
+                           const struct ublk_batch_io_data *data,
+                           const struct ublk_elem_header *elem))
+{
+       unsigned int i;
+       int ret = 0;
+
+       for (i = 0; i < bytes; i += iter->elem_bytes) {
+               const struct ublk_elem_header *elem =
+                       (const struct ublk_elem_header *)&iter->buf[i];
+
+               if (unlikely(elem->tag >= data->ub->dev_info.queue_depth)) {
+                       ret = -EINVAL;
+                       break;
+               }
+
+               ret = cb(ubq, data, elem);
+               if (unlikely(ret))
+                       break;
+       }
+
+       iter->done += i;
+       return ret;
+}
+
+static int ublk_walk_cmd_buf(struct ublk_batch_io_iter *iter,
+                            const struct ublk_batch_io_data *data,
+                            int (*cb)(struct ublk_queue *q,
+                                    const struct ublk_batch_io_data *data,
+                                    const struct ublk_elem_header *elem))
+{
+       struct ublk_queue *ubq = ublk_get_queue(data->ub, data->header.q_id);
+       int ret = 0;
+
+       while (iter->done < iter->total) {
+               unsigned int len = min(sizeof(iter->buf), iter->total - iter->done);
+
+               if (copy_from_user(iter->buf, iter->uaddr + iter->done, len)) {
+                       pr_warn("ublk%d: read batch cmd buffer failed\n",
+                                       data->ub->dev_info.dev_id);
+                       return -EFAULT;
+               }
+
+               ret = __ublk_walk_cmd_buf(ubq, iter, data, len, cb);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+static int ublk_batch_unprep_io(struct ublk_queue *ubq,
+                               const struct ublk_batch_io_data *data,
+                               const struct ublk_elem_header *elem)
+{
+       struct ublk_io *io = &ubq->ios[elem->tag];
+
+       data->ub->nr_io_ready--;
+       ublk_io_lock(io);
+       io->flags = 0;
+       ublk_io_unlock(io);
+       return 0;
+}
+
+static void ublk_batch_revert_prep_cmd(struct ublk_batch_io_iter *iter,
+                                      const struct ublk_batch_io_data *data)
+{
+       int ret;
+
+       /* Re-process only what we've already processed, starting from beginning */
+       iter->total = iter->done;
+       iter->done = 0;
+
+       ret = ublk_walk_cmd_buf(iter, data, ublk_batch_unprep_io);
+       WARN_ON_ONCE(ret);
+}
+
+static int ublk_batch_prep_io(struct ublk_queue *ubq,
+                             const struct ublk_batch_io_data *data,
+                             const struct ublk_elem_header *elem)
+{
+       struct ublk_io *io = &ubq->ios[elem->tag];
+       const struct ublk_batch_io *uc = &data->header;
+       union ublk_io_buf buf = { 0 };
+       int ret;
+
+       if (ublk_dev_support_auto_buf_reg(data->ub))
+               buf.auto_reg = ublk_batch_auto_buf_reg(uc, elem);
+       else if (ublk_dev_need_map_io(data->ub)) {
+               buf.addr = ublk_batch_buf_addr(uc, elem);
+
+               ret = ublk_check_fetch_buf(data->ub, buf.addr);
+               if (ret)
+                       return ret;
+       }
+
+       ublk_io_lock(io);
+       ret = __ublk_fetch(data->cmd, data->ub, io);
+       if (!ret)
+               io->buf = buf;
+       ublk_io_unlock(io);
+
+       return ret;
+}
+
+static int ublk_handle_batch_prep_cmd(const struct ublk_batch_io_data *data)
+{
+       const struct ublk_batch_io *uc = &data->header;
+       struct io_uring_cmd *cmd = data->cmd;
+       struct ublk_batch_io_iter iter = {
+               .uaddr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)),
+               .total = uc->nr_elem * uc->elem_bytes,
+               .elem_bytes = uc->elem_bytes,
+       };
+       int ret;
+
+       mutex_lock(&data->ub->mutex);
+       ret = ublk_walk_cmd_buf(&iter, data, ublk_batch_prep_io);
+
+       if (ret && iter.done)
+               ublk_batch_revert_prep_cmd(&iter, data);
+       mutex_unlock(&data->ub->mutex);
+       return ret;
+}
+
 static int ublk_check_batch_cmd_flags(const struct ublk_batch_io *uc)
 {
        unsigned elem_bytes = sizeof(struct ublk_elem_header);
@@ -2765,6 +2941,11 @@ static int ublk_ch_batch_io_uring_cmd(struct io_uring_cmd *cmd,
 
        switch (cmd_op) {
        case UBLK_U_IO_PREP_IO_CMDS:
+               ret = ublk_check_batch_cmd(&data);
+               if (ret)
+                       goto out;
+               ret = ublk_handle_batch_prep_cmd(&data);
+               break;
        case UBLK_U_IO_COMMIT_IO_CMDS:
                ret = ublk_check_batch_cmd(&data);
                if (ret)
@@ -2952,7 +3133,7 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id)
        struct ublk_queue *ubq;
        struct page *page;
        int numa_node;
-       int size;
+       int size, i;
 
        /* Determine NUMA node based on queue's CPU affinity */
        numa_node = ublk_get_queue_numa_node(ub, q_id);
@@ -2977,6 +3158,9 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id)
        }
        ubq->io_cmd_buf = page_address(page);
 
+       for (i = 0; i < ubq->q_depth; i++)
+               spin_lock_init(&ubq->ios[i].lock);
+
        ub->queues[q_id] = ubq;
        ubq->dev = ub;
        return 0;
@@ -3220,6 +3404,11 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub,
                return -EINVAL;
 
        mutex_lock(&ub->mutex);
+       /* device may become not ready in case of F_BATCH */
+       if (!ublk_dev_ready(ub)) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
        if (ub->dev_info.state == UBLK_S_DEV_LIVE ||
            test_bit(UB_STATE_USED, &ub->state)) {
                ret = -EEXIST;
index 0cc58e19d401c74d877627a0d92e555ec4259d7c..1a3d4d33c1d1239d44f2d2c865369d95ca4b89aa 100644 (file)
        _IOWR('u', 0x23, struct ublksrv_io_cmd)
 #define        UBLK_U_IO_UNREGISTER_IO_BUF     \
        _IOWR('u', 0x24, struct ublksrv_io_cmd)
+
+/*
+ * return 0 if the command is run successfully, otherwise failure code
+ * is returned
+ */
 #define        UBLK_U_IO_PREP_IO_CMDS  \
        _IOWR('u', 0x25, struct ublk_batch_io)
 #define        UBLK_U_IO_COMMIT_IO_CMDS        \