1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2011-2014, Intel Corporation.
4 * Copyright (c) 2017-2021 Christoph Hellwig.
6 #include <linux/ptrace.h> /* for force_successful_syscall_return */
7 #include <linux/nvme_ioctl.h>
8 #include <linux/io_uring.h>
12 NVME_IOCTL_VEC
= (1 << 0),
13 NVME_IOCTL_PARTITION
= (1 << 1),
16 static bool nvme_cmd_allowed(struct nvme_ns
*ns
, struct nvme_command
*c
,
17 unsigned int flags
, bool open_for_write
)
21 if (capable(CAP_SYS_ADMIN
))
25 * Do not allow unprivileged passthrough on partitions, as that allows an
26 * escape from the containment of the partition.
28 if (flags
& NVME_IOCTL_PARTITION
)
32 * Do not allow unprivileged processes to send vendor specific or fabrics
33 * commands as we can't be sure about their effects.
35 if (c
->common
.opcode
>= nvme_cmd_vendor_start
||
36 c
->common
.opcode
== nvme_fabrics_command
)
40 * Do not allow unprivileged passthrough of admin commands except
41 * for a subset of identify commands that contain information required
42 * to form proper I/O commands in userspace and do not expose any
43 * potentially sensitive information.
46 if (c
->common
.opcode
== nvme_admin_identify
) {
47 switch (c
->identify
.cns
) {
49 case NVME_ID_CNS_CS_NS
:
50 case NVME_ID_CNS_NS_CS_INDEP
:
51 case NVME_ID_CNS_CS_CTRL
:
52 case NVME_ID_CNS_CTRL
:
60 * Check if the controller provides a Commands Supported and Effects log
61 * and marks this command as supported. If not reject unprivileged
64 effects
= nvme_command_effects(ns
->ctrl
, ns
, c
->common
.opcode
);
65 if (!(effects
& NVME_CMD_EFFECTS_CSUPP
))
69 * Don't allow passthrough for command that have intrusive (or unknown)
72 if (effects
& ~(NVME_CMD_EFFECTS_CSUPP
| NVME_CMD_EFFECTS_LBCC
|
73 NVME_CMD_EFFECTS_UUID_SEL
|
74 NVME_CMD_EFFECTS_SCOPE_MASK
))
78 * Only allow I/O commands that transfer data to the controller or that
79 * change the logical block contents if the file descriptor is open for
82 if (nvme_is_write(c
) || (effects
& NVME_CMD_EFFECTS_LBCC
))
83 return open_for_write
;
88 * Convert integer values from ioctl structures to user pointers, silently
89 * ignoring the upper bits in the compat case to match behaviour of 32-bit
92 static void __user
*nvme_to_user_ptr(uintptr_t ptrval
)
94 if (in_compat_syscall())
95 ptrval
= (compat_uptr_t
)ptrval
;
96 return (void __user
*)ptrval
;
99 static void *nvme_add_user_metadata(struct request
*req
, void __user
*ubuf
,
100 unsigned len
, u32 seed
)
102 struct bio_integrity_payload
*bip
;
105 struct bio
*bio
= req
->bio
;
107 buf
= kmalloc(len
, GFP_KERNEL
);
111 if (req_op(req
) == REQ_OP_DRV_OUT
) {
113 if (copy_from_user(buf
, ubuf
, len
))
119 bip
= bio_integrity_alloc(bio
, GFP_KERNEL
, 1);
125 bip
->bip_iter
.bi_sector
= seed
;
126 ret
= bio_integrity_add_page(bio
, virt_to_page(buf
), len
,
127 offset_in_page(buf
));
133 req
->cmd_flags
|= REQ_INTEGRITY
;
141 static int nvme_finish_user_metadata(struct request
*req
, void __user
*ubuf
,
142 void *meta
, unsigned len
, int ret
)
144 if (!ret
&& req_op(req
) == REQ_OP_DRV_IN
&&
145 copy_to_user(ubuf
, meta
, len
))
151 static struct request
*nvme_alloc_user_request(struct request_queue
*q
,
152 struct nvme_command
*cmd
, blk_opf_t rq_flags
,
153 blk_mq_req_flags_t blk_flags
)
157 req
= blk_mq_alloc_request(q
, nvme_req_op(cmd
) | rq_flags
, blk_flags
);
160 nvme_init_request(req
, cmd
);
161 nvme_req(req
)->flags
|= NVME_REQ_USERCMD
;
165 static int nvme_map_user_request(struct request
*req
, u64 ubuffer
,
166 unsigned bufflen
, void __user
*meta_buffer
, unsigned meta_len
,
167 u32 meta_seed
, void **metap
, struct io_uring_cmd
*ioucmd
,
170 struct request_queue
*q
= req
->q
;
171 struct nvme_ns
*ns
= q
->queuedata
;
172 struct block_device
*bdev
= ns
? ns
->disk
->part0
: NULL
;
173 struct bio
*bio
= NULL
;
177 if (ioucmd
&& (ioucmd
->flags
& IORING_URING_CMD_FIXED
)) {
178 struct iov_iter iter
;
180 /* fixedbufs is only for non-vectored io */
181 if (WARN_ON_ONCE(flags
& NVME_IOCTL_VEC
))
183 ret
= io_uring_cmd_import_fixed(ubuffer
, bufflen
,
184 rq_data_dir(req
), &iter
, ioucmd
);
187 ret
= blk_rq_map_user_iov(q
, req
, NULL
, &iter
, GFP_KERNEL
);
189 ret
= blk_rq_map_user_io(req
, NULL
, nvme_to_user_ptr(ubuffer
),
190 bufflen
, GFP_KERNEL
, flags
& NVME_IOCTL_VEC
, 0,
191 0, rq_data_dir(req
));
198 bio_set_dev(bio
, bdev
);
200 if (bdev
&& meta_buffer
&& meta_len
) {
201 meta
= nvme_add_user_metadata(req
, meta_buffer
, meta_len
,
214 blk_rq_unmap_user(bio
);
216 blk_mq_free_request(req
);
220 static int nvme_submit_user_cmd(struct request_queue
*q
,
221 struct nvme_command
*cmd
, u64 ubuffer
, unsigned bufflen
,
222 void __user
*meta_buffer
, unsigned meta_len
, u32 meta_seed
,
223 u64
*result
, unsigned timeout
, unsigned int flags
)
225 struct nvme_ns
*ns
= q
->queuedata
;
226 struct nvme_ctrl
*ctrl
;
233 req
= nvme_alloc_user_request(q
, cmd
, 0, 0);
237 req
->timeout
= timeout
;
238 if (ubuffer
&& bufflen
) {
239 ret
= nvme_map_user_request(req
, ubuffer
, bufflen
, meta_buffer
,
240 meta_len
, meta_seed
, &meta
, NULL
, flags
);
246 ctrl
= nvme_req(req
)->ctrl
;
248 effects
= nvme_passthru_start(ctrl
, ns
, cmd
->common
.opcode
);
249 ret
= nvme_execute_rq(req
, false);
251 *result
= le64_to_cpu(nvme_req(req
)->result
.u64
);
253 ret
= nvme_finish_user_metadata(req
, meta_buffer
, meta
,
256 blk_rq_unmap_user(bio
);
257 blk_mq_free_request(req
);
260 nvme_passthru_end(ctrl
, ns
, effects
, cmd
, ret
);
265 static int nvme_submit_io(struct nvme_ns
*ns
, struct nvme_user_io __user
*uio
)
267 struct nvme_user_io io
;
268 struct nvme_command c
;
269 unsigned length
, meta_len
;
270 void __user
*metadata
;
272 if (copy_from_user(&io
, uio
, sizeof(io
)))
280 case nvme_cmd_compare
:
286 length
= (io
.nblocks
+ 1) << ns
->lba_shift
;
288 if ((io
.control
& NVME_RW_PRINFO_PRACT
) &&
289 ns
->ms
== sizeof(struct t10_pi_tuple
)) {
291 * Protection information is stripped/inserted by the
294 if (nvme_to_user_ptr(io
.metadata
))
299 meta_len
= (io
.nblocks
+ 1) * ns
->ms
;
300 metadata
= nvme_to_user_ptr(io
.metadata
);
303 if (ns
->features
& NVME_NS_EXT_LBAS
) {
306 } else if (meta_len
) {
307 if ((io
.metadata
& 3) || !io
.metadata
)
311 memset(&c
, 0, sizeof(c
));
312 c
.rw
.opcode
= io
.opcode
;
313 c
.rw
.flags
= io
.flags
;
314 c
.rw
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
315 c
.rw
.slba
= cpu_to_le64(io
.slba
);
316 c
.rw
.length
= cpu_to_le16(io
.nblocks
);
317 c
.rw
.control
= cpu_to_le16(io
.control
);
318 c
.rw
.dsmgmt
= cpu_to_le32(io
.dsmgmt
);
319 c
.rw
.reftag
= cpu_to_le32(io
.reftag
);
320 c
.rw
.apptag
= cpu_to_le16(io
.apptag
);
321 c
.rw
.appmask
= cpu_to_le16(io
.appmask
);
323 return nvme_submit_user_cmd(ns
->queue
, &c
, io
.addr
, length
, metadata
,
324 meta_len
, lower_32_bits(io
.slba
), NULL
, 0, 0);
327 static bool nvme_validate_passthru_nsid(struct nvme_ctrl
*ctrl
,
328 struct nvme_ns
*ns
, __u32 nsid
)
330 if (ns
&& nsid
!= ns
->head
->ns_id
) {
331 dev_err(ctrl
->device
,
332 "%s: nsid (%u) in cmd does not match nsid (%u)"
334 current
->comm
, nsid
, ns
->head
->ns_id
);
341 static int nvme_user_cmd(struct nvme_ctrl
*ctrl
, struct nvme_ns
*ns
,
342 struct nvme_passthru_cmd __user
*ucmd
, unsigned int flags
,
345 struct nvme_passthru_cmd cmd
;
346 struct nvme_command c
;
347 unsigned timeout
= 0;
351 if (copy_from_user(&cmd
, ucmd
, sizeof(cmd
)))
355 if (!nvme_validate_passthru_nsid(ctrl
, ns
, cmd
.nsid
))
358 memset(&c
, 0, sizeof(c
));
359 c
.common
.opcode
= cmd
.opcode
;
360 c
.common
.flags
= cmd
.flags
;
361 c
.common
.nsid
= cpu_to_le32(cmd
.nsid
);
362 c
.common
.cdw2
[0] = cpu_to_le32(cmd
.cdw2
);
363 c
.common
.cdw2
[1] = cpu_to_le32(cmd
.cdw3
);
364 c
.common
.cdw10
= cpu_to_le32(cmd
.cdw10
);
365 c
.common
.cdw11
= cpu_to_le32(cmd
.cdw11
);
366 c
.common
.cdw12
= cpu_to_le32(cmd
.cdw12
);
367 c
.common
.cdw13
= cpu_to_le32(cmd
.cdw13
);
368 c
.common
.cdw14
= cpu_to_le32(cmd
.cdw14
);
369 c
.common
.cdw15
= cpu_to_le32(cmd
.cdw15
);
371 if (!nvme_cmd_allowed(ns
, &c
, 0, open_for_write
))
375 timeout
= msecs_to_jiffies(cmd
.timeout_ms
);
377 status
= nvme_submit_user_cmd(ns
? ns
->queue
: ctrl
->admin_q
, &c
,
378 cmd
.addr
, cmd
.data_len
, nvme_to_user_ptr(cmd
.metadata
),
379 cmd
.metadata_len
, 0, &result
, timeout
, 0);
382 if (put_user(result
, &ucmd
->result
))
389 static int nvme_user_cmd64(struct nvme_ctrl
*ctrl
, struct nvme_ns
*ns
,
390 struct nvme_passthru_cmd64 __user
*ucmd
, unsigned int flags
,
393 struct nvme_passthru_cmd64 cmd
;
394 struct nvme_command c
;
395 unsigned timeout
= 0;
398 if (copy_from_user(&cmd
, ucmd
, sizeof(cmd
)))
402 if (!nvme_validate_passthru_nsid(ctrl
, ns
, cmd
.nsid
))
405 memset(&c
, 0, sizeof(c
));
406 c
.common
.opcode
= cmd
.opcode
;
407 c
.common
.flags
= cmd
.flags
;
408 c
.common
.nsid
= cpu_to_le32(cmd
.nsid
);
409 c
.common
.cdw2
[0] = cpu_to_le32(cmd
.cdw2
);
410 c
.common
.cdw2
[1] = cpu_to_le32(cmd
.cdw3
);
411 c
.common
.cdw10
= cpu_to_le32(cmd
.cdw10
);
412 c
.common
.cdw11
= cpu_to_le32(cmd
.cdw11
);
413 c
.common
.cdw12
= cpu_to_le32(cmd
.cdw12
);
414 c
.common
.cdw13
= cpu_to_le32(cmd
.cdw13
);
415 c
.common
.cdw14
= cpu_to_le32(cmd
.cdw14
);
416 c
.common
.cdw15
= cpu_to_le32(cmd
.cdw15
);
418 if (!nvme_cmd_allowed(ns
, &c
, flags
, open_for_write
))
422 timeout
= msecs_to_jiffies(cmd
.timeout_ms
);
424 status
= nvme_submit_user_cmd(ns
? ns
->queue
: ctrl
->admin_q
, &c
,
425 cmd
.addr
, cmd
.data_len
, nvme_to_user_ptr(cmd
.metadata
),
426 cmd
.metadata_len
, 0, &cmd
.result
, timeout
, flags
);
429 if (put_user(cmd
.result
, &ucmd
->result
))
436 struct nvme_uring_data
{
445 * This overlays struct io_uring_cmd pdu.
446 * Expect build errors if this grows larger than that.
448 struct nvme_uring_cmd_pdu
{
457 void *meta
; /* kernel-resident buffer */
458 void __user
*meta_buffer
;
464 static inline struct nvme_uring_cmd_pdu
*nvme_uring_cmd_pdu(
465 struct io_uring_cmd
*ioucmd
)
467 return (struct nvme_uring_cmd_pdu
*)&ioucmd
->pdu
;
470 static void nvme_uring_task_meta_cb(struct io_uring_cmd
*ioucmd
,
471 unsigned issue_flags
)
473 struct nvme_uring_cmd_pdu
*pdu
= nvme_uring_cmd_pdu(ioucmd
);
474 struct request
*req
= pdu
->req
;
478 if (nvme_req(req
)->flags
& NVME_REQ_CANCELLED
)
481 status
= nvme_req(req
)->status
;
483 result
= le64_to_cpu(nvme_req(req
)->result
.u64
);
486 status
= nvme_finish_user_metadata(req
, pdu
->u
.meta_buffer
,
487 pdu
->u
.meta
, pdu
->meta_len
, status
);
489 blk_rq_unmap_user(req
->bio
);
490 blk_mq_free_request(req
);
492 io_uring_cmd_done(ioucmd
, status
, result
, issue_flags
);
495 static void nvme_uring_task_cb(struct io_uring_cmd
*ioucmd
,
496 unsigned issue_flags
)
498 struct nvme_uring_cmd_pdu
*pdu
= nvme_uring_cmd_pdu(ioucmd
);
501 blk_rq_unmap_user(pdu
->bio
);
503 io_uring_cmd_done(ioucmd
, pdu
->nvme_status
, pdu
->u
.result
, issue_flags
);
506 static enum rq_end_io_ret
nvme_uring_cmd_end_io(struct request
*req
,
509 struct io_uring_cmd
*ioucmd
= req
->end_io_data
;
510 struct nvme_uring_cmd_pdu
*pdu
= nvme_uring_cmd_pdu(ioucmd
);
513 if (nvme_req(req
)->flags
& NVME_REQ_CANCELLED
)
514 pdu
->nvme_status
= -EINTR
;
516 pdu
->nvme_status
= nvme_req(req
)->status
;
517 pdu
->u
.result
= le64_to_cpu(nvme_req(req
)->result
.u64
);
520 * For iopoll, complete it directly.
521 * Otherwise, move the completion to task work.
523 if (blk_rq_is_poll(req
)) {
524 WRITE_ONCE(ioucmd
->cookie
, NULL
);
525 nvme_uring_task_cb(ioucmd
, IO_URING_F_UNLOCKED
);
527 io_uring_cmd_do_in_task_lazy(ioucmd
, nvme_uring_task_cb
);
530 return RQ_END_IO_FREE
;
533 static enum rq_end_io_ret
nvme_uring_cmd_end_io_meta(struct request
*req
,
536 struct io_uring_cmd
*ioucmd
= req
->end_io_data
;
537 struct nvme_uring_cmd_pdu
*pdu
= nvme_uring_cmd_pdu(ioucmd
);
543 * For iopoll, complete it directly.
544 * Otherwise, move the completion to task work.
546 if (blk_rq_is_poll(req
)) {
547 WRITE_ONCE(ioucmd
->cookie
, NULL
);
548 nvme_uring_task_meta_cb(ioucmd
, IO_URING_F_UNLOCKED
);
550 io_uring_cmd_do_in_task_lazy(ioucmd
, nvme_uring_task_meta_cb
);
553 return RQ_END_IO_NONE
;
556 static int nvme_uring_cmd_io(struct nvme_ctrl
*ctrl
, struct nvme_ns
*ns
,
557 struct io_uring_cmd
*ioucmd
, unsigned int issue_flags
, bool vec
)
559 struct nvme_uring_cmd_pdu
*pdu
= nvme_uring_cmd_pdu(ioucmd
);
560 const struct nvme_uring_cmd
*cmd
= io_uring_sqe_cmd(ioucmd
->sqe
);
561 struct request_queue
*q
= ns
? ns
->queue
: ctrl
->admin_q
;
562 struct nvme_uring_data d
;
563 struct nvme_command c
;
565 blk_opf_t rq_flags
= REQ_ALLOC_CACHE
;
566 blk_mq_req_flags_t blk_flags
= 0;
570 c
.common
.opcode
= READ_ONCE(cmd
->opcode
);
571 c
.common
.flags
= READ_ONCE(cmd
->flags
);
575 c
.common
.command_id
= 0;
576 c
.common
.nsid
= cpu_to_le32(cmd
->nsid
);
577 if (!nvme_validate_passthru_nsid(ctrl
, ns
, le32_to_cpu(c
.common
.nsid
)))
580 c
.common
.cdw2
[0] = cpu_to_le32(READ_ONCE(cmd
->cdw2
));
581 c
.common
.cdw2
[1] = cpu_to_le32(READ_ONCE(cmd
->cdw3
));
582 c
.common
.metadata
= 0;
583 c
.common
.dptr
.prp1
= c
.common
.dptr
.prp2
= 0;
584 c
.common
.cdw10
= cpu_to_le32(READ_ONCE(cmd
->cdw10
));
585 c
.common
.cdw11
= cpu_to_le32(READ_ONCE(cmd
->cdw11
));
586 c
.common
.cdw12
= cpu_to_le32(READ_ONCE(cmd
->cdw12
));
587 c
.common
.cdw13
= cpu_to_le32(READ_ONCE(cmd
->cdw13
));
588 c
.common
.cdw14
= cpu_to_le32(READ_ONCE(cmd
->cdw14
));
589 c
.common
.cdw15
= cpu_to_le32(READ_ONCE(cmd
->cdw15
));
591 if (!nvme_cmd_allowed(ns
, &c
, 0, ioucmd
->file
->f_mode
& FMODE_WRITE
))
594 d
.metadata
= READ_ONCE(cmd
->metadata
);
595 d
.addr
= READ_ONCE(cmd
->addr
);
596 d
.data_len
= READ_ONCE(cmd
->data_len
);
597 d
.metadata_len
= READ_ONCE(cmd
->metadata_len
);
598 d
.timeout_ms
= READ_ONCE(cmd
->timeout_ms
);
600 if (issue_flags
& IO_URING_F_NONBLOCK
) {
601 rq_flags
|= REQ_NOWAIT
;
602 blk_flags
= BLK_MQ_REQ_NOWAIT
;
604 if (issue_flags
& IO_URING_F_IOPOLL
)
605 rq_flags
|= REQ_POLLED
;
607 req
= nvme_alloc_user_request(q
, &c
, rq_flags
, blk_flags
);
610 req
->timeout
= d
.timeout_ms
? msecs_to_jiffies(d
.timeout_ms
) : 0;
612 if (d
.addr
&& d
.data_len
) {
613 ret
= nvme_map_user_request(req
, d
.addr
,
614 d
.data_len
, nvme_to_user_ptr(d
.metadata
),
615 d
.metadata_len
, 0, &meta
, ioucmd
, vec
);
620 if (blk_rq_is_poll(req
)) {
621 ioucmd
->flags
|= IORING_URING_CMD_POLLED
;
622 WRITE_ONCE(ioucmd
->cookie
, req
);
625 /* to free bio on completion, as req->bio will be null at that time */
627 pdu
->meta_len
= d
.metadata_len
;
628 req
->end_io_data
= ioucmd
;
631 pdu
->u
.meta_buffer
= nvme_to_user_ptr(d
.metadata
);
632 req
->end_io
= nvme_uring_cmd_end_io_meta
;
634 req
->end_io
= nvme_uring_cmd_end_io
;
636 blk_execute_rq_nowait(req
, false);
640 static bool is_ctrl_ioctl(unsigned int cmd
)
642 if (cmd
== NVME_IOCTL_ADMIN_CMD
|| cmd
== NVME_IOCTL_ADMIN64_CMD
)
644 if (is_sed_ioctl(cmd
))
649 static int nvme_ctrl_ioctl(struct nvme_ctrl
*ctrl
, unsigned int cmd
,
650 void __user
*argp
, bool open_for_write
)
653 case NVME_IOCTL_ADMIN_CMD
:
654 return nvme_user_cmd(ctrl
, NULL
, argp
, 0, open_for_write
);
655 case NVME_IOCTL_ADMIN64_CMD
:
656 return nvme_user_cmd64(ctrl
, NULL
, argp
, 0, open_for_write
);
658 return sed_ioctl(ctrl
->opal_dev
, cmd
, argp
);
662 #ifdef COMPAT_FOR_U64_ALIGNMENT
663 struct nvme_user_io32
{
676 } __attribute__((__packed__
));
677 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32)
678 #endif /* COMPAT_FOR_U64_ALIGNMENT */
680 static int nvme_ns_ioctl(struct nvme_ns
*ns
, unsigned int cmd
,
681 void __user
*argp
, unsigned int flags
, bool open_for_write
)
685 force_successful_syscall_return();
686 return ns
->head
->ns_id
;
687 case NVME_IOCTL_IO_CMD
:
688 return nvme_user_cmd(ns
->ctrl
, ns
, argp
, flags
, open_for_write
);
690 * struct nvme_user_io can have different padding on some 32-bit ABIs.
691 * Just accept the compat version as all fields that are used are the
692 * same size and at the same offset.
694 #ifdef COMPAT_FOR_U64_ALIGNMENT
695 case NVME_IOCTL_SUBMIT_IO32
:
697 case NVME_IOCTL_SUBMIT_IO
:
698 return nvme_submit_io(ns
, argp
);
699 case NVME_IOCTL_IO64_CMD_VEC
:
700 flags
|= NVME_IOCTL_VEC
;
702 case NVME_IOCTL_IO64_CMD
:
703 return nvme_user_cmd64(ns
->ctrl
, ns
, argp
, flags
,
710 int nvme_ioctl(struct block_device
*bdev
, blk_mode_t mode
,
711 unsigned int cmd
, unsigned long arg
)
713 struct nvme_ns
*ns
= bdev
->bd_disk
->private_data
;
714 bool open_for_write
= mode
& BLK_OPEN_WRITE
;
715 void __user
*argp
= (void __user
*)arg
;
716 unsigned int flags
= 0;
718 if (bdev_is_partition(bdev
))
719 flags
|= NVME_IOCTL_PARTITION
;
721 if (is_ctrl_ioctl(cmd
))
722 return nvme_ctrl_ioctl(ns
->ctrl
, cmd
, argp
, open_for_write
);
723 return nvme_ns_ioctl(ns
, cmd
, argp
, flags
, open_for_write
);
726 long nvme_ns_chr_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
729 container_of(file_inode(file
)->i_cdev
, struct nvme_ns
, cdev
);
730 bool open_for_write
= file
->f_mode
& FMODE_WRITE
;
731 void __user
*argp
= (void __user
*)arg
;
733 if (is_ctrl_ioctl(cmd
))
734 return nvme_ctrl_ioctl(ns
->ctrl
, cmd
, argp
, open_for_write
);
735 return nvme_ns_ioctl(ns
, cmd
, argp
, 0, open_for_write
);
738 static int nvme_uring_cmd_checks(unsigned int issue_flags
)
741 /* NVMe passthrough requires big SQE/CQE support */
742 if ((issue_flags
& (IO_URING_F_SQE128
|IO_URING_F_CQE32
)) !=
743 (IO_URING_F_SQE128
|IO_URING_F_CQE32
))
748 static int nvme_ns_uring_cmd(struct nvme_ns
*ns
, struct io_uring_cmd
*ioucmd
,
749 unsigned int issue_flags
)
751 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
754 BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu
) > sizeof(ioucmd
->pdu
));
756 ret
= nvme_uring_cmd_checks(issue_flags
);
760 switch (ioucmd
->cmd_op
) {
761 case NVME_URING_CMD_IO
:
762 ret
= nvme_uring_cmd_io(ctrl
, ns
, ioucmd
, issue_flags
, false);
764 case NVME_URING_CMD_IO_VEC
:
765 ret
= nvme_uring_cmd_io(ctrl
, ns
, ioucmd
, issue_flags
, true);
774 int nvme_ns_chr_uring_cmd(struct io_uring_cmd
*ioucmd
, unsigned int issue_flags
)
776 struct nvme_ns
*ns
= container_of(file_inode(ioucmd
->file
)->i_cdev
,
777 struct nvme_ns
, cdev
);
779 return nvme_ns_uring_cmd(ns
, ioucmd
, issue_flags
);
782 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd
*ioucmd
,
783 struct io_comp_batch
*iob
,
784 unsigned int poll_flags
)
789 if (!(ioucmd
->flags
& IORING_URING_CMD_POLLED
))
792 req
= READ_ONCE(ioucmd
->cookie
);
793 if (req
&& blk_rq_is_poll(req
))
794 ret
= blk_rq_poll(req
, iob
, poll_flags
);
797 #ifdef CONFIG_NVME_MULTIPATH
798 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns
*ns
, unsigned int cmd
,
799 void __user
*argp
, struct nvme_ns_head
*head
, int srcu_idx
,
801 __releases(&head
->srcu
)
803 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
806 nvme_get_ctrl(ns
->ctrl
);
807 srcu_read_unlock(&head
->srcu
, srcu_idx
);
808 ret
= nvme_ctrl_ioctl(ns
->ctrl
, cmd
, argp
, open_for_write
);
814 int nvme_ns_head_ioctl(struct block_device
*bdev
, blk_mode_t mode
,
815 unsigned int cmd
, unsigned long arg
)
817 struct nvme_ns_head
*head
= bdev
->bd_disk
->private_data
;
818 bool open_for_write
= mode
& BLK_OPEN_WRITE
;
819 void __user
*argp
= (void __user
*)arg
;
821 int srcu_idx
, ret
= -EWOULDBLOCK
;
822 unsigned int flags
= 0;
824 if (bdev_is_partition(bdev
))
825 flags
|= NVME_IOCTL_PARTITION
;
827 srcu_idx
= srcu_read_lock(&head
->srcu
);
828 ns
= nvme_find_path(head
);
833 * Handle ioctls that apply to the controller instead of the namespace
834 * seperately and drop the ns SRCU reference early. This avoids a
835 * deadlock when deleting namespaces using the passthrough interface.
837 if (is_ctrl_ioctl(cmd
))
838 return nvme_ns_head_ctrl_ioctl(ns
, cmd
, argp
, head
, srcu_idx
,
841 ret
= nvme_ns_ioctl(ns
, cmd
, argp
, flags
, open_for_write
);
843 srcu_read_unlock(&head
->srcu
, srcu_idx
);
847 long nvme_ns_head_chr_ioctl(struct file
*file
, unsigned int cmd
,
850 bool open_for_write
= file
->f_mode
& FMODE_WRITE
;
851 struct cdev
*cdev
= file_inode(file
)->i_cdev
;
852 struct nvme_ns_head
*head
=
853 container_of(cdev
, struct nvme_ns_head
, cdev
);
854 void __user
*argp
= (void __user
*)arg
;
856 int srcu_idx
, ret
= -EWOULDBLOCK
;
858 srcu_idx
= srcu_read_lock(&head
->srcu
);
859 ns
= nvme_find_path(head
);
863 if (is_ctrl_ioctl(cmd
))
864 return nvme_ns_head_ctrl_ioctl(ns
, cmd
, argp
, head
, srcu_idx
,
867 ret
= nvme_ns_ioctl(ns
, cmd
, argp
, 0, open_for_write
);
869 srcu_read_unlock(&head
->srcu
, srcu_idx
);
873 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd
*ioucmd
,
874 unsigned int issue_flags
)
876 struct cdev
*cdev
= file_inode(ioucmd
->file
)->i_cdev
;
877 struct nvme_ns_head
*head
= container_of(cdev
, struct nvme_ns_head
, cdev
);
878 int srcu_idx
= srcu_read_lock(&head
->srcu
);
879 struct nvme_ns
*ns
= nvme_find_path(head
);
883 ret
= nvme_ns_uring_cmd(ns
, ioucmd
, issue_flags
);
884 srcu_read_unlock(&head
->srcu
, srcu_idx
);
887 #endif /* CONFIG_NVME_MULTIPATH */
889 int nvme_dev_uring_cmd(struct io_uring_cmd
*ioucmd
, unsigned int issue_flags
)
891 struct nvme_ctrl
*ctrl
= ioucmd
->file
->private_data
;
894 /* IOPOLL not supported yet */
895 if (issue_flags
& IO_URING_F_IOPOLL
)
898 ret
= nvme_uring_cmd_checks(issue_flags
);
902 switch (ioucmd
->cmd_op
) {
903 case NVME_URING_CMD_ADMIN
:
904 ret
= nvme_uring_cmd_io(ctrl
, NULL
, ioucmd
, issue_flags
, false);
906 case NVME_URING_CMD_ADMIN_VEC
:
907 ret
= nvme_uring_cmd_io(ctrl
, NULL
, ioucmd
, issue_flags
, true);
916 static int nvme_dev_user_cmd(struct nvme_ctrl
*ctrl
, void __user
*argp
,
922 down_read(&ctrl
->namespaces_rwsem
);
923 if (list_empty(&ctrl
->namespaces
)) {
928 ns
= list_first_entry(&ctrl
->namespaces
, struct nvme_ns
, list
);
929 if (ns
!= list_last_entry(&ctrl
->namespaces
, struct nvme_ns
, list
)) {
930 dev_warn(ctrl
->device
,
931 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
936 dev_warn(ctrl
->device
,
937 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
939 up_read(&ctrl
->namespaces_rwsem
);
941 ret
= nvme_user_cmd(ctrl
, ns
, argp
, 0, open_for_write
);
946 up_read(&ctrl
->namespaces_rwsem
);
950 long nvme_dev_ioctl(struct file
*file
, unsigned int cmd
,
953 bool open_for_write
= file
->f_mode
& FMODE_WRITE
;
954 struct nvme_ctrl
*ctrl
= file
->private_data
;
955 void __user
*argp
= (void __user
*)arg
;
958 case NVME_IOCTL_ADMIN_CMD
:
959 return nvme_user_cmd(ctrl
, NULL
, argp
, 0, open_for_write
);
960 case NVME_IOCTL_ADMIN64_CMD
:
961 return nvme_user_cmd64(ctrl
, NULL
, argp
, 0, open_for_write
);
962 case NVME_IOCTL_IO_CMD
:
963 return nvme_dev_user_cmd(ctrl
, argp
, open_for_write
);
964 case NVME_IOCTL_RESET
:
965 if (!capable(CAP_SYS_ADMIN
))
967 dev_warn(ctrl
->device
, "resetting controller\n");
968 return nvme_reset_ctrl_sync(ctrl
);
969 case NVME_IOCTL_SUBSYS_RESET
:
970 if (!capable(CAP_SYS_ADMIN
))
972 return nvme_reset_subsystem(ctrl
);
973 case NVME_IOCTL_RESCAN
:
974 if (!capable(CAP_SYS_ADMIN
))
976 nvme_queue_scan(ctrl
);