1 // SPDX-License-Identifier: GPL-2.0
3 * NVMe I/O command implementation.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/blkdev.h>
8 #include <linux/blk-integrity.h>
9 #include <linux/memremap.h>
10 #include <linux/module.h>
13 void nvmet_bdev_set_limits(struct block_device
*bdev
, struct nvme_id_ns
*id
)
15 /* Logical blocks per physical block, 0's based. */
16 const __le16 lpp0b
= to0based(bdev_physical_block_size(bdev
) /
17 bdev_logical_block_size(bdev
));
20 * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
21 * NAWUPF, and NACWU are defined for this namespace and should be
22 * used by the host for this namespace instead of the AWUN, AWUPF,
23 * and ACWU fields in the Identify Controller data structure. If
24 * any of these fields are zero that means that the corresponding
25 * field from the identify controller data structure should be used.
33 * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
34 * NOWS are defined for this namespace and should be used by
35 * the host for I/O optimization.
38 /* NPWG = Namespace Preferred Write Granularity. 0's based */
40 /* NPWA = Namespace Preferred Write Alignment. 0's based */
42 /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
43 id
->npdg
= to0based(bdev_discard_granularity(bdev
) /
44 bdev_logical_block_size(bdev
));
45 /* NPDG = Namespace Preferred Deallocate Alignment */
47 /* NOWS = Namespace Optimal Write Size */
48 id
->nows
= to0based(bdev_io_opt(bdev
) / bdev_logical_block_size(bdev
));
51 void nvmet_bdev_ns_disable(struct nvmet_ns
*ns
)
53 if (ns
->bdev_handle
) {
54 bdev_release(ns
->bdev_handle
);
56 ns
->bdev_handle
= NULL
;
60 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns
*ns
)
62 struct blk_integrity
*bi
= bdev_get_integrity(ns
->bdev
);
65 ns
->metadata_size
= bi
->tuple_size
;
66 if (bi
->profile
== &t10_pi_type1_crc
)
67 ns
->pi_type
= NVME_NS_DPS_PI_TYPE1
;
68 else if (bi
->profile
== &t10_pi_type3_crc
)
69 ns
->pi_type
= NVME_NS_DPS_PI_TYPE3
;
71 /* Unsupported metadata type */
72 ns
->metadata_size
= 0;
76 int nvmet_bdev_ns_enable(struct nvmet_ns
*ns
)
81 * When buffered_io namespace attribute is enabled that means user want
82 * this block device to be used as a file, so block device can take
83 * an advantage of cache.
88 ns
->bdev_handle
= bdev_open_by_path(ns
->device_path
,
89 BLK_OPEN_READ
| BLK_OPEN_WRITE
, NULL
, NULL
);
90 if (IS_ERR(ns
->bdev_handle
)) {
91 ret
= PTR_ERR(ns
->bdev_handle
);
92 if (ret
!= -ENOTBLK
) {
93 pr_err("failed to open block device %s: (%d)\n",
94 ns
->device_path
, ret
);
96 ns
->bdev_handle
= NULL
;
99 ns
->bdev
= ns
->bdev_handle
->bdev
;
100 ns
->size
= bdev_nr_bytes(ns
->bdev
);
101 ns
->blksize_shift
= blksize_bits(bdev_logical_block_size(ns
->bdev
));
104 ns
->metadata_size
= 0;
105 if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10
))
106 nvmet_bdev_ns_enable_integrity(ns
);
108 if (bdev_is_zoned(ns
->bdev
)) {
109 if (!nvmet_bdev_zns_enable(ns
)) {
110 nvmet_bdev_ns_disable(ns
);
113 ns
->csi
= NVME_CSI_ZNS
;
119 void nvmet_bdev_ns_revalidate(struct nvmet_ns
*ns
)
121 ns
->size
= bdev_nr_bytes(ns
->bdev
);
124 u16
blk_to_nvme_status(struct nvmet_req
*req
, blk_status_t blk_sts
)
126 u16 status
= NVME_SC_SUCCESS
;
128 if (likely(blk_sts
== BLK_STS_OK
))
131 * Right now there exists M : 1 mapping between block layer error
132 * to the NVMe status code (see nvme_error_status()). For consistency,
133 * when we reverse map we use most appropriate NVMe Status code from
134 * the group of the NVMe staus codes used in the nvme_error_status().
138 status
= NVME_SC_CAP_EXCEEDED
| NVME_SC_DNR
;
139 req
->error_loc
= offsetof(struct nvme_rw_command
, length
);
142 status
= NVME_SC_LBA_RANGE
| NVME_SC_DNR
;
143 req
->error_loc
= offsetof(struct nvme_rw_command
, slba
);
145 case BLK_STS_NOTSUPP
:
146 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
147 switch (req
->cmd
->common
.opcode
) {
149 case nvme_cmd_write_zeroes
:
150 status
= NVME_SC_ONCS_NOT_SUPPORTED
| NVME_SC_DNR
;
153 status
= NVME_SC_INVALID_OPCODE
| NVME_SC_DNR
;
157 status
= NVME_SC_ACCESS_DENIED
;
158 req
->error_loc
= offsetof(struct nvme_rw_command
, nsid
);
162 status
= NVME_SC_INTERNAL
| NVME_SC_DNR
;
163 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
166 switch (req
->cmd
->common
.opcode
) {
169 req
->error_slba
= le64_to_cpu(req
->cmd
->rw
.slba
);
171 case nvme_cmd_write_zeroes
:
173 le64_to_cpu(req
->cmd
->write_zeroes
.slba
);
181 static void nvmet_bio_done(struct bio
*bio
)
183 struct nvmet_req
*req
= bio
->bi_private
;
185 nvmet_req_complete(req
, blk_to_nvme_status(req
, bio
->bi_status
));
186 nvmet_req_bio_put(req
, bio
);
189 #ifdef CONFIG_BLK_DEV_INTEGRITY
190 static int nvmet_bdev_alloc_bip(struct nvmet_req
*req
, struct bio
*bio
,
191 struct sg_mapping_iter
*miter
)
193 struct blk_integrity
*bi
;
194 struct bio_integrity_payload
*bip
;
198 bi
= bdev_get_integrity(req
->ns
->bdev
);
200 pr_err("Unable to locate bio_integrity\n");
204 bip
= bio_integrity_alloc(bio
, GFP_NOIO
,
205 bio_max_segs(req
->metadata_sg_cnt
));
207 pr_err("Unable to allocate bio_integrity_payload\n");
211 /* virtual start sector must be in integrity interval units */
212 bip_set_seed(bip
, bio
->bi_iter
.bi_sector
>>
213 (bi
->interval_exp
- SECTOR_SHIFT
));
215 resid
= bio_integrity_bytes(bi
, bio_sectors(bio
));
216 while (resid
> 0 && sg_miter_next(miter
)) {
217 len
= min_t(size_t, miter
->length
, resid
);
218 rc
= bio_integrity_add_page(bio
, miter
->page
, len
,
219 offset_in_page(miter
->addr
));
220 if (unlikely(rc
!= len
)) {
221 pr_err("bio_integrity_add_page() failed; %d\n", rc
);
222 sg_miter_stop(miter
);
227 if (len
< miter
->length
)
228 miter
->consumed
-= miter
->length
- len
;
230 sg_miter_stop(miter
);
235 static int nvmet_bdev_alloc_bip(struct nvmet_req
*req
, struct bio
*bio
,
236 struct sg_mapping_iter
*miter
)
240 #endif /* CONFIG_BLK_DEV_INTEGRITY */
242 static void nvmet_bdev_execute_rw(struct nvmet_req
*req
)
244 unsigned int sg_cnt
= req
->sg_cnt
;
246 struct scatterlist
*sg
;
247 struct blk_plug plug
;
251 struct sg_mapping_iter prot_miter
;
252 unsigned int iter_flags
;
253 unsigned int total_len
= nvmet_rw_data_len(req
) + req
->metadata_len
;
255 if (!nvmet_check_transfer_len(req
, total_len
))
259 nvmet_req_complete(req
, 0);
263 if (req
->cmd
->rw
.opcode
== nvme_cmd_write
) {
264 opf
= REQ_OP_WRITE
| REQ_SYNC
| REQ_IDLE
;
265 if (req
->cmd
->rw
.control
& cpu_to_le16(NVME_RW_FUA
))
267 iter_flags
= SG_MITER_TO_SG
;
270 iter_flags
= SG_MITER_FROM_SG
;
273 if (is_pci_p2pdma_page(sg_page(req
->sg
)))
276 sector
= nvmet_lba_to_sect(req
->ns
, req
->cmd
->rw
.slba
);
278 if (nvmet_use_inline_bvec(req
)) {
279 bio
= &req
->b
.inline_bio
;
280 bio_init(bio
, req
->ns
->bdev
, req
->inline_bvec
,
281 ARRAY_SIZE(req
->inline_bvec
), opf
);
283 bio
= bio_alloc(req
->ns
->bdev
, bio_max_segs(sg_cnt
), opf
,
286 bio
->bi_iter
.bi_sector
= sector
;
287 bio
->bi_private
= req
;
288 bio
->bi_end_io
= nvmet_bio_done
;
290 blk_start_plug(&plug
);
291 if (req
->metadata_len
)
292 sg_miter_start(&prot_miter
, req
->metadata_sg
,
293 req
->metadata_sg_cnt
, iter_flags
);
295 for_each_sg(req
->sg
, sg
, req
->sg_cnt
, i
) {
296 while (bio_add_page(bio
, sg_page(sg
), sg
->length
, sg
->offset
)
298 struct bio
*prev
= bio
;
300 if (req
->metadata_len
) {
301 rc
= nvmet_bdev_alloc_bip(req
, bio
,
309 bio
= bio_alloc(req
->ns
->bdev
, bio_max_segs(sg_cnt
),
311 bio
->bi_iter
.bi_sector
= sector
;
313 bio_chain(bio
, prev
);
317 sector
+= sg
->length
>> 9;
321 if (req
->metadata_len
) {
322 rc
= nvmet_bdev_alloc_bip(req
, bio
, &prot_miter
);
330 blk_finish_plug(&plug
);
333 static void nvmet_bdev_execute_flush(struct nvmet_req
*req
)
335 struct bio
*bio
= &req
->b
.inline_bio
;
337 if (!bdev_write_cache(req
->ns
->bdev
)) {
338 nvmet_req_complete(req
, NVME_SC_SUCCESS
);
342 if (!nvmet_check_transfer_len(req
, 0))
345 bio_init(bio
, req
->ns
->bdev
, req
->inline_bvec
,
346 ARRAY_SIZE(req
->inline_bvec
), REQ_OP_WRITE
| REQ_PREFLUSH
);
347 bio
->bi_private
= req
;
348 bio
->bi_end_io
= nvmet_bio_done
;
353 u16
nvmet_bdev_flush(struct nvmet_req
*req
)
355 if (!bdev_write_cache(req
->ns
->bdev
))
358 if (blkdev_issue_flush(req
->ns
->bdev
))
359 return NVME_SC_INTERNAL
| NVME_SC_DNR
;
363 static u16
nvmet_bdev_discard_range(struct nvmet_req
*req
,
364 struct nvme_dsm_range
*range
, struct bio
**bio
)
366 struct nvmet_ns
*ns
= req
->ns
;
369 ret
= __blkdev_issue_discard(ns
->bdev
,
370 nvmet_lba_to_sect(ns
, range
->slba
),
371 le32_to_cpu(range
->nlb
) << (ns
->blksize_shift
- 9),
373 if (ret
&& ret
!= -EOPNOTSUPP
) {
374 req
->error_slba
= le64_to_cpu(range
->slba
);
375 return errno_to_nvme_status(req
, ret
);
377 return NVME_SC_SUCCESS
;
380 static void nvmet_bdev_execute_discard(struct nvmet_req
*req
)
382 struct nvme_dsm_range range
;
383 struct bio
*bio
= NULL
;
387 for (i
= 0; i
<= le32_to_cpu(req
->cmd
->dsm
.nr
); i
++) {
388 status
= nvmet_copy_from_sgl(req
, i
* sizeof(range
), &range
,
393 status
= nvmet_bdev_discard_range(req
, &range
, &bio
);
399 bio
->bi_private
= req
;
400 bio
->bi_end_io
= nvmet_bio_done
;
406 nvmet_req_complete(req
, status
);
410 static void nvmet_bdev_execute_dsm(struct nvmet_req
*req
)
412 if (!nvmet_check_data_len_lte(req
, nvmet_dsm_len(req
)))
415 switch (le32_to_cpu(req
->cmd
->dsm
.attributes
)) {
417 nvmet_bdev_execute_discard(req
);
419 case NVME_DSMGMT_IDR
:
420 case NVME_DSMGMT_IDW
:
422 /* Not supported yet */
423 nvmet_req_complete(req
, 0);
428 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req
*req
)
430 struct nvme_write_zeroes_cmd
*write_zeroes
= &req
->cmd
->write_zeroes
;
431 struct bio
*bio
= NULL
;
436 if (!nvmet_check_transfer_len(req
, 0))
439 sector
= nvmet_lba_to_sect(req
->ns
, write_zeroes
->slba
);
440 nr_sector
= (((sector_t
)le16_to_cpu(write_zeroes
->length
) + 1) <<
441 (req
->ns
->blksize_shift
- 9));
443 ret
= __blkdev_issue_zeroout(req
->ns
->bdev
, sector
, nr_sector
,
444 GFP_KERNEL
, &bio
, 0);
446 bio
->bi_private
= req
;
447 bio
->bi_end_io
= nvmet_bio_done
;
450 nvmet_req_complete(req
, errno_to_nvme_status(req
, ret
));
454 u16
nvmet_bdev_parse_io_cmd(struct nvmet_req
*req
)
456 switch (req
->cmd
->common
.opcode
) {
459 req
->execute
= nvmet_bdev_execute_rw
;
460 if (req
->sq
->ctrl
->pi_support
&& nvmet_ns_has_pi(req
->ns
))
461 req
->metadata_len
= nvmet_rw_metadata_len(req
);
464 req
->execute
= nvmet_bdev_execute_flush
;
467 req
->execute
= nvmet_bdev_execute_dsm
;
469 case nvme_cmd_write_zeroes
:
470 req
->execute
= nvmet_bdev_execute_write_zeroes
;
473 return nvmet_report_invalid_opcode(req
);