]> git.ipfire.org Git - thirdparty/linux.git/blame - drivers/block/virtio_blk.c
Merge tag 'riscv-for-linus-5.7-rc4' of git://git.kernel.org/pub/scm/linux/kernel...
[thirdparty/linux.git] / drivers / block / virtio_blk.c
CommitLineData
09c434b8 1// SPDX-License-Identifier: GPL-2.0-only
e467cde2
RR
2//#define DEBUG
3#include <linux/spinlock.h>
5a0e3ad6 4#include <linux/slab.h>
e467cde2
RR
5#include <linux/blkdev.h>
6#include <linux/hdreg.h>
0c8d44f2 7#include <linux/module.h>
4678d6f9 8#include <linux/mutex.h>
ad71473d 9#include <linux/interrupt.h>
e467cde2
RR
10#include <linux/virtio.h>
11#include <linux/virtio_blk.h>
3d1266c7 12#include <linux/scatterlist.h>
7a7c924c 13#include <linux/string_helpers.h>
5087a50e 14#include <linux/idr.h>
1cf7e9c6 15#include <linux/blk-mq.h>
ad71473d 16#include <linux/blk-mq-virtio.h>
1cf7e9c6 17#include <linux/numa.h>
55a2415b 18#include <uapi/linux/virtio_ring.h>
3d1266c7 19
4f3bf19c 20#define PART_BITS 4
6a27b656 21#define VQ_NAME_LEN 16
1f23816b 22#define MAX_DISCARD_SEGMENTS 256u
e467cde2 23
5087a50e
MT
24static int major;
25static DEFINE_IDA(vd_index_ida);
26
2a647bfe 27static struct workqueue_struct *virtblk_wq;
4f3bf19c 28
6a27b656
ML
29struct virtio_blk_vq {
30 struct virtqueue *vq;
31 spinlock_t lock;
32 char name[VQ_NAME_LEN];
33} ____cacheline_aligned_in_smp;
34
bb6ec576 35struct virtio_blk {
e467cde2 36 struct virtio_device *vdev;
e467cde2
RR
37
38 /* The disk structure for the kernel. */
39 struct gendisk *disk;
40
24d2f903
CH
41 /* Block layer tags. */
42 struct blk_mq_tag_set tag_set;
43
7a7c924c
CH
44 /* Process context for config space updates */
45 struct work_struct config_work;
46
0864b79a
RR
47 /* What host tells us, plus 2 for header & tailer. */
48 unsigned int sg_elems;
49
5087a50e
MT
50 /* Ida index - used to track minor number allocations. */
51 int index;
6a27b656
ML
52
53 /* num of vqs */
54 int num_vqs;
55 struct virtio_blk_vq *vqs;
e467cde2
RR
56};
57
bb6ec576 58struct virtblk_req {
97b50a65 59 struct virtio_blk_outhdr out_hdr;
cb38fa23 60 u8 status;
a98755c5 61 struct scatterlist sg[];
e467cde2
RR
62};
63
2a842aca 64static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
a98755c5
AH
65{
66 switch (vbr->status) {
67 case VIRTIO_BLK_S_OK:
2a842aca 68 return BLK_STS_OK;
a98755c5 69 case VIRTIO_BLK_S_UNSUPP:
2a842aca 70 return BLK_STS_NOTSUPP;
a98755c5 71 default:
2a842aca 72 return BLK_STS_IOERR;
a98755c5
AH
73 }
74}
75
97b50a65
CH
76static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
77 struct scatterlist *data_sg, bool have_data)
78{
79 struct scatterlist hdr, status, *sgs[3];
80 unsigned int num_out = 0, num_in = 0;
81
82 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
83 sgs[num_out++] = &hdr;
20af3cfd 84
0a11cc36 85 if (have_data) {
19c1c5a6 86 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
20af3cfd 87 sgs[num_out++] = data_sg;
8f39db9d 88 else
20af3cfd
PB
89 sgs[num_out + num_in++] = data_sg;
90 }
91
8f39db9d
PB
92 sg_init_one(&status, &vbr->status, sizeof(vbr->status));
93 sgs[num_out + num_in++] = &status;
94
95 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
5ee21a52
PB
96}
97
1f23816b
CL
98static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
99{
100 unsigned short segments = blk_rq_nr_discard_segments(req);
101 unsigned short n = 0;
102 struct virtio_blk_discard_write_zeroes *range;
103 struct bio *bio;
104 u32 flags = 0;
105
106 if (unmap)
107 flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
108
109 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
110 if (!range)
111 return -ENOMEM;
112
113 __rq_for_each_bio(bio, req) {
114 u64 sector = bio->bi_iter.bi_sector;
115 u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
116
117 range[n].flags = cpu_to_le32(flags);
118 range[n].num_sectors = cpu_to_le32(num_sectors);
119 range[n].sector = cpu_to_le64(sector);
120 n++;
121 }
122
123 req->special_vec.bv_page = virt_to_page(range);
124 req->special_vec.bv_offset = offset_in_page(range);
125 req->special_vec.bv_len = sizeof(*range) * segments;
126 req->rq_flags |= RQF_SPECIAL_PAYLOAD;
127
128 return 0;
129}
130
5124c285 131static inline void virtblk_request_done(struct request *req)
a98755c5 132{
9d74e257 133 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
a98755c5 134
1f23816b
CL
135 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
136 kfree(page_address(req->special_vec.bv_page) +
137 req->special_vec.bv_offset);
138 }
139
d19633d5 140 blk_mq_end_request(req, virtblk_result(vbr));
a98755c5
AH
141}
142
143static void virtblk_done(struct virtqueue *vq)
e467cde2
RR
144{
145 struct virtio_blk *vblk = vq->vdev->priv;
1cf7e9c6 146 bool req_done = false;
6a27b656 147 int qid = vq->index;
e467cde2 148 struct virtblk_req *vbr;
e467cde2 149 unsigned long flags;
a98755c5 150 unsigned int len;
e467cde2 151
6a27b656 152 spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
bb811108
AH
153 do {
154 virtqueue_disable_cb(vq);
6a27b656 155 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
85dada09
CH
156 struct request *req = blk_mq_rq_from_pdu(vbr);
157
08e0029a 158 blk_mq_complete_request(req);
1cf7e9c6 159 req_done = true;
33659ebb 160 }
7f03b17d
HG
161 if (unlikely(virtqueue_is_broken(vq)))
162 break;
bb811108 163 } while (!virtqueue_enable_cb(vq));
1cf7e9c6 164
e467cde2 165 /* In case queue is stopped waiting for more buffers. */
a98755c5 166 if (req_done)
1b4a3258 167 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
6a27b656 168 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
a98755c5
AH
169}
170
944e7c87
JA
171static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
172{
173 struct virtio_blk *vblk = hctx->queue->queuedata;
174 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
175 bool kick;
176
177 spin_lock_irq(&vq->lock);
178 kick = virtqueue_kick_prepare(vq->vq);
179 spin_unlock_irq(&vq->lock);
180
181 if (kick)
182 virtqueue_notify(vq->vq);
183}
184
fc17b653 185static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
74c45052 186 const struct blk_mq_queue_data *bd)
e467cde2 187{
1cf7e9c6 188 struct virtio_blk *vblk = hctx->queue->queuedata;
74c45052 189 struct request *req = bd->rq;
9d74e257 190 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
1cf7e9c6 191 unsigned long flags;
20af3cfd 192 unsigned int num;
6a27b656 193 int qid = hctx->queue_num;
5261b85e 194 int err;
e8edca6f 195 bool notify = false;
1f23816b 196 bool unmap = false;
aebf526b 197 u32 type;
e467cde2 198
1cf7e9c6 199 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
e467cde2 200
aebf526b
CH
201 switch (req_op(req)) {
202 case REQ_OP_READ:
203 case REQ_OP_WRITE:
204 type = 0;
205 break;
206 case REQ_OP_FLUSH:
207 type = VIRTIO_BLK_T_FLUSH;
208 break;
1f23816b
CL
209 case REQ_OP_DISCARD:
210 type = VIRTIO_BLK_T_DISCARD;
211 break;
212 case REQ_OP_WRITE_ZEROES:
213 type = VIRTIO_BLK_T_WRITE_ZEROES;
214 unmap = !(req->cmd_flags & REQ_NOUNMAP);
215 break;
aebf526b
CH
216 case REQ_OP_DRV_IN:
217 type = VIRTIO_BLK_T_GET_ID;
218 break;
219 default:
220 WARN_ON_ONCE(1);
fc17b653 221 return BLK_STS_IOERR;
e467cde2
RR
222 }
223
aebf526b
CH
224 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
225 vbr->out_hdr.sector = type ?
226 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req));
227 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req));
228
e2490073
CH
229 blk_mq_start_request(req);
230
1f23816b
CL
231 if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
232 err = virtblk_setup_discard_write_zeroes(req, unmap);
233 if (err)
234 return BLK_STS_RESOURCE;
235 }
236
85dada09 237 num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
1cde26f9 238 if (num) {
85dada09 239 if (rq_data_dir(req) == WRITE)
19c1c5a6 240 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
20af3cfd 241 else
19c1c5a6 242 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
e467cde2
RR
243 }
244
6a27b656 245 spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
782e067d 246 err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
5261b85e 247 if (err) {
6a27b656 248 virtqueue_kick(vblk->vqs[qid].vq);
f5f6b95c
HP
249 /* Don't stop the queue if -ENOMEM: we may have failed to
250 * bounce the buffer due to global resource outage.
251 */
252 if (err == -ENOSPC)
253 blk_mq_stop_hw_queue(hctx);
6a27b656 254 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
3d973b2e
HP
255 switch (err) {
256 case -ENOSPC:
86ff7c2a 257 return BLK_STS_DEV_RESOURCE;
3d973b2e
HP
258 case -ENOMEM:
259 return BLK_STS_RESOURCE;
260 default:
261 return BLK_STS_IOERR;
262 }
a98755c5
AH
263 }
264
74c45052 265 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
e8edca6f 266 notify = true;
6a27b656 267 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
e8edca6f
ML
268
269 if (notify)
6a27b656 270 virtqueue_notify(vblk->vqs[qid].vq);
fc17b653 271 return BLK_STS_OK;
a98755c5
AH
272}
273
4cb2ea28 274/* return id (s/n) string for *disk to *id_str
275 */
276static int virtblk_get_id(struct gendisk *disk, char *id_str)
277{
278 struct virtio_blk *vblk = disk->private_data;
f9596695 279 struct request_queue *q = vblk->disk->queue;
4cb2ea28 280 struct request *req;
e4c4776d 281 int err;
4cb2ea28 282
ff005a06 283 req = blk_get_request(q, REQ_OP_DRV_IN, 0);
f9596695 284 if (IS_ERR(req))
4cb2ea28 285 return PTR_ERR(req);
f9596695
CH
286
287 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
288 if (err)
289 goto out;
290
b7819b92 291 blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
2a842aca 292 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
f9596695 293out:
e4c4776d 294 blk_put_request(req);
e4c4776d 295 return err;
4cb2ea28 296}
297
135da0b0
CB
298/* We provide getgeo only to please some old bootloader/partitioning tools */
299static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
300{
48e4043d 301 struct virtio_blk *vblk = bd->bd_disk->private_data;
48e4043d
RH
302
303 /* see if the host passed in geometry config */
855e0c52
RR
304 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
305 virtio_cread(vblk->vdev, struct virtio_blk_config,
306 geometry.cylinders, &geo->cylinders);
307 virtio_cread(vblk->vdev, struct virtio_blk_config,
308 geometry.heads, &geo->heads);
309 virtio_cread(vblk->vdev, struct virtio_blk_config,
310 geometry.sectors, &geo->sectors);
48e4043d
RH
311 } else {
312 /* some standard values, similar to sd */
313 geo->heads = 1 << 6;
314 geo->sectors = 1 << 5;
315 geo->cylinders = get_capacity(bd->bd_disk) >> 11;
316 }
135da0b0
CB
317 return 0;
318}
319
83d5cde4 320static const struct block_device_operations virtblk_fops = {
135da0b0
CB
321 .owner = THIS_MODULE,
322 .getgeo = virtblk_getgeo,
e467cde2
RR
323};
324
d50ed907
CB
325static int index_to_minor(int index)
326{
327 return index << PART_BITS;
328}
329
5087a50e
MT
330static int minor_to_index(int minor)
331{
332 return minor >> PART_BITS;
333}
334
e982c4d0
HR
335static ssize_t serial_show(struct device *dev,
336 struct device_attribute *attr, char *buf)
a5eb9e4f
RH
337{
338 struct gendisk *disk = dev_to_disk(dev);
339 int err;
340
341 /* sysfs gives us a PAGE_SIZE buffer */
342 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
343
344 buf[VIRTIO_BLK_ID_BYTES] = '\0';
345 err = virtblk_get_id(disk, buf);
346 if (!err)
347 return strlen(buf);
348
349 if (err == -EIO) /* Unsupported? Make it empty. */
350 return 0;
351
352 return err;
353}
393c525b 354
e982c4d0 355static DEVICE_ATTR_RO(serial);
a5eb9e4f 356
daf2a501
SH
357/* The queue's logical block size must be set before calling this */
358static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
7a7c924c 359{
7a7c924c
CH
360 struct virtio_device *vdev = vblk->vdev;
361 struct request_queue *q = vblk->disk->queue;
362 char cap_str_2[10], cap_str_10[10];
1046d304 363 unsigned long long nblocks;
b9f28d86 364 u64 capacity;
7a7c924c
CH
365
366 /* Host must always specify the capacity. */
855e0c52 367 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
7a7c924c
CH
368
369 /* If capacity is too big, truncate with warning. */
370 if ((sector_t)capacity != capacity) {
371 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
372 (unsigned long long)capacity);
373 capacity = (sector_t)-1;
374 }
375
1046d304
SH
376 nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
377
378 string_get_size(nblocks, queue_logical_block_size(q),
b9f28d86 379 STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
1046d304 380 string_get_size(nblocks, queue_logical_block_size(q),
b9f28d86 381 STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
7a7c924c
CH
382
383 dev_notice(&vdev->dev,
daf2a501
SH
384 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n",
385 vblk->disk->disk_name,
386 resize ? "new size: " : "",
1046d304
SH
387 nblocks,
388 queue_logical_block_size(q),
389 cap_str_10,
390 cap_str_2);
7a7c924c 391
662155e2 392 set_capacity_revalidate_and_notify(vblk->disk, capacity, true);
daf2a501
SH
393}
394
395static void virtblk_config_changed_work(struct work_struct *work)
396{
397 struct virtio_blk *vblk =
398 container_of(work, struct virtio_blk, config_work);
daf2a501
SH
399
400 virtblk_update_capacity(vblk, true);
7a7c924c
CH
401}
402
403static void virtblk_config_changed(struct virtio_device *vdev)
404{
405 struct virtio_blk *vblk = vdev->priv;
406
407 queue_work(virtblk_wq, &vblk->config_work);
408}
409
6abd6e5a
AS
410static int init_vq(struct virtio_blk *vblk)
411{
2ff98449 412 int err;
6a27b656
ML
413 int i;
414 vq_callback_t **callbacks;
415 const char **names;
416 struct virtqueue **vqs;
417 unsigned short num_vqs;
418 struct virtio_device *vdev = vblk->vdev;
ad71473d 419 struct irq_affinity desc = { 0, };
6a27b656
ML
420
421 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
422 struct virtio_blk_config, num_queues,
423 &num_vqs);
424 if (err)
425 num_vqs = 1;
426
bf348f9b
DZ
427 num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs);
428
668866b6 429 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
347a5293
MH
430 if (!vblk->vqs)
431 return -ENOMEM;
6a27b656 432
668866b6
ME
433 names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL);
434 callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL);
435 vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL);
347a5293
MH
436 if (!names || !callbacks || !vqs) {
437 err = -ENOMEM;
438 goto out;
439 }
6abd6e5a 440
6a27b656
ML
441 for (i = 0; i < num_vqs; i++) {
442 callbacks[i] = virtblk_done;
443 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
444 names[i] = vblk->vqs[i].name;
445 }
446
447 /* Discover virtqueues and write information to configuration. */
9b2bbdb2 448 err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
6a27b656 449 if (err)
347a5293 450 goto out;
6abd6e5a 451
6a27b656
ML
452 for (i = 0; i < num_vqs; i++) {
453 spin_lock_init(&vblk->vqs[i].lock);
454 vblk->vqs[i].vq = vqs[i];
455 }
456 vblk->num_vqs = num_vqs;
457
347a5293 458out:
6a27b656 459 kfree(vqs);
6a27b656 460 kfree(callbacks);
6a27b656 461 kfree(names);
6a27b656
ML
462 if (err)
463 kfree(vblk->vqs);
6abd6e5a
AS
464 return err;
465}
466
c0aa3e09
RM
467/*
468 * Legacy naming scheme used for virtio devices. We are stuck with it for
469 * virtio blk but don't ever use it for any new driver.
470 */
471static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
472{
473 const int base = 'z' - 'a' + 1;
474 char *begin = buf + strlen(prefix);
475 char *end = buf + buflen;
476 char *p;
477 int unit;
478
479 p = end - 1;
480 *p = '\0';
481 unit = base;
482 do {
483 if (p == begin)
484 return -EINVAL;
485 *--p = 'a' + (index % unit);
486 index = (index / unit) - 1;
487 } while (index >= 0);
488
489 memmove(begin, p, end - p);
490 memcpy(buf, prefix, strlen(prefix));
491
492 return 0;
493}
494
cd5d5038
PB
495static int virtblk_get_cache_mode(struct virtio_device *vdev)
496{
497 u8 writeback;
498 int err;
499
855e0c52
RR
500 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
501 struct virtio_blk_config, wce,
502 &writeback);
592002f5
MT
503
504 /*
505 * If WCE is not configurable and flush is not available,
506 * assume no writeback cache is in use.
507 */
cd5d5038 508 if (err)
592002f5 509 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH);
cd5d5038
PB
510
511 return writeback;
512}
513
514static void virtblk_update_cache_mode(struct virtio_device *vdev)
515{
516 u8 writeback = virtblk_get_cache_mode(vdev);
517 struct virtio_blk *vblk = vdev->priv;
518
ad9126ac 519 blk_queue_write_cache(vblk->disk->queue, writeback, false);
cd5d5038
PB
520 revalidate_disk(vblk->disk);
521}
522
523static const char *const virtblk_cache_types[] = {
524 "write through", "write back"
525};
526
527static ssize_t
e982c4d0
HR
528cache_type_store(struct device *dev, struct device_attribute *attr,
529 const char *buf, size_t count)
cd5d5038
PB
530{
531 struct gendisk *disk = dev_to_disk(dev);
532 struct virtio_blk *vblk = disk->private_data;
533 struct virtio_device *vdev = vblk->vdev;
534 int i;
cd5d5038
PB
535
536 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
f53d5aa0 537 i = sysfs_match_string(virtblk_cache_types, buf);
cd5d5038 538 if (i < 0)
f53d5aa0 539 return i;
cd5d5038 540
855e0c52 541 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
cd5d5038
PB
542 virtblk_update_cache_mode(vdev);
543 return count;
544}
545
546static ssize_t
e982c4d0 547cache_type_show(struct device *dev, struct device_attribute *attr, char *buf)
cd5d5038
PB
548{
549 struct gendisk *disk = dev_to_disk(dev);
550 struct virtio_blk *vblk = disk->private_data;
551 u8 writeback = virtblk_get_cache_mode(vblk->vdev);
552
553 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
554 return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
555}
556
e982c4d0
HR
557static DEVICE_ATTR_RW(cache_type);
558
559static struct attribute *virtblk_attrs[] = {
560 &dev_attr_serial.attr,
561 &dev_attr_cache_type.attr,
562 NULL,
563};
564
565static umode_t virtblk_attrs_are_visible(struct kobject *kobj,
566 struct attribute *a, int n)
567{
568 struct device *dev = container_of(kobj, struct device, kobj);
569 struct gendisk *disk = dev_to_disk(dev);
570 struct virtio_blk *vblk = disk->private_data;
571 struct virtio_device *vdev = vblk->vdev;
572
573 if (a == &dev_attr_cache_type.attr &&
574 !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
575 return S_IRUGO;
576
577 return a->mode;
578}
579
580static const struct attribute_group virtblk_attr_group = {
581 .attrs = virtblk_attrs,
582 .is_visible = virtblk_attrs_are_visible,
583};
584
585static const struct attribute_group *virtblk_attr_groups[] = {
586 &virtblk_attr_group,
587 NULL,
588};
cd5d5038 589
d6296d39
CH
590static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
591 unsigned int hctx_idx, unsigned int numa_node)
e9b267d9 592{
d6296d39 593 struct virtio_blk *vblk = set->driver_data;
e9b267d9
CH
594 struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
595
596 sg_init_table(vbr->sg, vblk->sg_elems);
597 return 0;
598}
599
ad71473d
CH
600static int virtblk_map_queues(struct blk_mq_tag_set *set)
601{
602 struct virtio_blk *vblk = set->driver_data;
603
9bc00750
DZ
604 return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT],
605 vblk->vdev, 0);
ad71473d
CH
606}
607
f363b089 608static const struct blk_mq_ops virtio_mq_ops = {
1cf7e9c6 609 .queue_rq = virtio_queue_rq,
944e7c87 610 .commit_rqs = virtio_commit_rqs,
5124c285 611 .complete = virtblk_request_done,
24d2f903 612 .init_request = virtblk_init_request,
ad71473d 613 .map_queues = virtblk_map_queues,
1cf7e9c6
JA
614};
615
24d2f903
CH
616static unsigned int virtblk_queue_depth;
617module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
1cf7e9c6 618
8d85fce7 619static int virtblk_probe(struct virtio_device *vdev)
e467cde2
RR
620{
621 struct virtio_blk *vblk;
69740c8b 622 struct request_queue *q;
5087a50e 623 int err, index;
a98755c5 624
fd1068e1 625 u32 v, blk_size, max_size, sg_elems, opt_io_size;
69740c8b
CH
626 u16 min_io_size;
627 u8 physical_block_exp, alignment_offset;
e467cde2 628
a4379fd8
MT
629 if (!vdev->config->get) {
630 dev_err(&vdev->dev, "%s failure: config access disabled\n",
631 __func__);
632 return -EINVAL;
633 }
634
5087a50e
MT
635 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
636 GFP_KERNEL);
637 if (err < 0)
638 goto out;
639 index = err;
4f3bf19c 640
0864b79a 641 /* We need to know how many segments before we allocate. */
855e0c52
RR
642 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
643 struct virtio_blk_config, seg_max,
644 &sg_elems);
a5b365a6
CH
645
646 /* We need at least one SG element, whatever they say. */
647 if (err || !sg_elems)
0864b79a
RR
648 sg_elems = 1;
649
650 /* We need an extra sg elements at head and tail. */
651 sg_elems += 2;
1cf7e9c6 652 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
e467cde2
RR
653 if (!vblk) {
654 err = -ENOMEM;
5087a50e 655 goto out_free_index;
e467cde2
RR
656 }
657
e467cde2 658 vblk->vdev = vdev;
0864b79a 659 vblk->sg_elems = sg_elems;
a98755c5 660
7a7c924c 661 INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
e467cde2 662
6abd6e5a
AS
663 err = init_vq(vblk);
664 if (err)
e467cde2 665 goto out_free_vblk;
e467cde2 666
e467cde2 667 /* FIXME: How many partitions? How long is a piece of string? */
4f3bf19c 668 vblk->disk = alloc_disk(1 << PART_BITS);
e467cde2
RR
669 if (!vblk->disk) {
670 err = -ENOMEM;
1cf7e9c6 671 goto out_free_vq;
e467cde2
RR
672 }
673
fc4324b4 674 /* Default queue sizing is to fill the ring. */
24d2f903 675 if (!virtblk_queue_depth) {
6a27b656 676 virtblk_queue_depth = vblk->vqs[0].vq->num_free;
fc4324b4
RR
677 /* ... but without indirect descs, we use 2 descs per req */
678 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
24d2f903 679 virtblk_queue_depth /= 2;
fc4324b4 680 }
24d2f903
CH
681
682 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
683 vblk->tag_set.ops = &virtio_mq_ops;
24d2f903
CH
684 vblk->tag_set.queue_depth = virtblk_queue_depth;
685 vblk->tag_set.numa_node = NUMA_NO_NODE;
686 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
687 vblk->tag_set.cmd_size =
1cf7e9c6
JA
688 sizeof(struct virtblk_req) +
689 sizeof(struct scatterlist) * sg_elems;
24d2f903 690 vblk->tag_set.driver_data = vblk;
6a27b656 691 vblk->tag_set.nr_hw_queues = vblk->num_vqs;
1cf7e9c6 692
24d2f903
CH
693 err = blk_mq_alloc_tag_set(&vblk->tag_set);
694 if (err)
695 goto out_put_disk;
696
6bf6b0aa 697 q = blk_mq_init_queue(&vblk->tag_set);
35b489d3 698 if (IS_ERR(q)) {
e467cde2 699 err = -ENOMEM;
24d2f903 700 goto out_free_tags;
e467cde2 701 }
6bf6b0aa 702 vblk->disk->queue = q;
e467cde2 703
69740c8b 704 q->queuedata = vblk;
7d116b62 705
c0aa3e09 706 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
d50ed907 707
e467cde2 708 vblk->disk->major = major;
d50ed907 709 vblk->disk->first_minor = index_to_minor(index);
e467cde2
RR
710 vblk->disk->private_data = vblk;
711 vblk->disk->fops = &virtblk_fops;
5fa3142d 712 vblk->disk->flags |= GENHD_FL_EXT_DEVT;
5087a50e 713 vblk->index = index;
4f3bf19c 714
02c42b7a 715 /* configure queue flush support */
cd5d5038 716 virtblk_update_cache_mode(vdev);
e467cde2 717
3ef53609
CB
718 /* If disk is read-only in the host, the guest should obey */
719 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
720 set_disk_ro(vblk->disk, 1);
721
0864b79a 722 /* We can handle whatever the host told us to handle. */
ee714f2d 723 blk_queue_max_segments(q, vblk->sg_elems-2);
0864b79a 724
4b7f7e20 725 /* No real sector limit. */
ee714f2d 726 blk_queue_max_hw_sectors(q, -1U);
4b7f7e20 727
fd1068e1
JR
728 max_size = virtio_max_dma_size(vdev);
729
a586d4f6
RR
730 /* Host can optionally specify maximum segment size and number of
731 * segments. */
855e0c52
RR
732 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
733 struct virtio_blk_config, size_max, &v);
e467cde2 734 if (!err)
fd1068e1
JR
735 max_size = min(max_size, v);
736
737 blk_queue_max_segment_size(q, max_size);
e467cde2 738
066f4d82 739 /* Host can optionally specify the block size of the device */
855e0c52
RR
740 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
741 struct virtio_blk_config, blk_size,
742 &blk_size);
066f4d82 743 if (!err)
69740c8b
CH
744 blk_queue_logical_block_size(q, blk_size);
745 else
746 blk_size = queue_logical_block_size(q);
747
748 /* Use topology information if available */
855e0c52
RR
749 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
750 struct virtio_blk_config, physical_block_exp,
751 &physical_block_exp);
69740c8b
CH
752 if (!err && physical_block_exp)
753 blk_queue_physical_block_size(q,
754 blk_size * (1 << physical_block_exp));
755
855e0c52
RR
756 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
757 struct virtio_blk_config, alignment_offset,
758 &alignment_offset);
69740c8b
CH
759 if (!err && alignment_offset)
760 blk_queue_alignment_offset(q, blk_size * alignment_offset);
761
855e0c52
RR
762 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
763 struct virtio_blk_config, min_io_size,
764 &min_io_size);
69740c8b
CH
765 if (!err && min_io_size)
766 blk_queue_io_min(q, blk_size * min_io_size);
767
855e0c52
RR
768 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
769 struct virtio_blk_config, opt_io_size,
770 &opt_io_size);
69740c8b
CH
771 if (!err && opt_io_size)
772 blk_queue_io_opt(q, blk_size * opt_io_size);
773
1f23816b
CL
774 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
775 q->limits.discard_granularity = blk_size;
776
777 virtio_cread(vdev, struct virtio_blk_config,
778 discard_sector_alignment, &v);
779 q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
780
781 virtio_cread(vdev, struct virtio_blk_config,
782 max_discard_sectors, &v);
783 blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
784
785 virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
786 &v);
787 blk_queue_max_discard_segments(q,
788 min_not_zero(v,
789 MAX_DISCARD_SEGMENTS));
790
791 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
792 }
793
794 if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
795 virtio_cread(vdev, struct virtio_blk_config,
796 max_write_zeroes_sectors, &v);
797 blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
798 }
799
daf2a501 800 virtblk_update_capacity(vblk, false);
7a11370e
MT
801 virtio_device_ready(vdev);
802
e982c4d0 803 device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
e467cde2
RR
804 return 0;
805
24d2f903
CH
806out_free_tags:
807 blk_mq_free_tag_set(&vblk->tag_set);
e467cde2
RR
808out_put_disk:
809 put_disk(vblk->disk);
e467cde2 810out_free_vq:
d2a7ddda 811 vdev->config->del_vqs(vdev);
e467cde2
RR
812out_free_vblk:
813 kfree(vblk);
5087a50e
MT
814out_free_index:
815 ida_simple_remove(&vd_index_ida, index);
e467cde2
RR
816out:
817 return err;
818}
819
8d85fce7 820static void virtblk_remove(struct virtio_device *vdev)
e467cde2
RR
821{
822 struct virtio_blk *vblk = vdev->priv;
5087a50e 823 int index = vblk->index;
f4953fe6 824 int refc;
e467cde2 825
cc74f719
MT
826 /* Make sure no work handler is accessing the device. */
827 flush_work(&vblk->config_work);
7a7c924c 828
02e2b124 829 del_gendisk(vblk->disk);
483001c7 830 blk_cleanup_queue(vblk->disk->queue);
02e2b124 831
24d2f903
CH
832 blk_mq_free_tag_set(&vblk->tag_set);
833
6e5aa7ef
RR
834 /* Stop all the virtqueues. */
835 vdev->config->reset(vdev);
836
2c935bc5 837 refc = kref_read(&disk_to_dev(vblk->disk)->kobj.kref);
e467cde2 838 put_disk(vblk->disk);
d2a7ddda 839 vdev->config->del_vqs(vdev);
6a27b656 840 kfree(vblk->vqs);
e467cde2 841 kfree(vblk);
f4953fe6
AG
842
843 /* Only free device id if we don't have any users */
844 if (refc == 1)
845 ida_simple_remove(&vd_index_ida, index);
e467cde2
RR
846}
847
89107000 848#ifdef CONFIG_PM_SLEEP
f8fb5bc2
AS
849static int virtblk_freeze(struct virtio_device *vdev)
850{
851 struct virtio_blk *vblk = vdev->priv;
852
853 /* Ensure we don't receive any more interrupts */
854 vdev->config->reset(vdev);
855
cc74f719 856 /* Make sure no work handler is accessing the device. */
f8fb5bc2
AS
857 flush_work(&vblk->config_work);
858
9b3e9905 859 blk_mq_quiesce_queue(vblk->disk->queue);
f8fb5bc2
AS
860
861 vdev->config->del_vqs(vdev);
862 return 0;
863}
864
865static int virtblk_restore(struct virtio_device *vdev)
866{
867 struct virtio_blk *vblk = vdev->priv;
868 int ret;
869
f8fb5bc2 870 ret = init_vq(vdev->priv);
6d62c37f
MT
871 if (ret)
872 return ret;
873
874 virtio_device_ready(vdev);
1cf7e9c6 875
9b3e9905 876 blk_mq_unquiesce_queue(vblk->disk->queue);
6d62c37f 877 return 0;
f8fb5bc2
AS
878}
879#endif
880
47483e25 881static const struct virtio_device_id id_table[] = {
e467cde2
RR
882 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
883 { 0 },
884};
885
19c1c5a6 886static unsigned int features_legacy[] = {
02c42b7a 887 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
97b50a65 888 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
592002f5 889 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
1f23816b 890 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
19c1c5a6
MT
891}
892;
893static unsigned int features[] = {
894 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
895 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
592002f5 896 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
1f23816b 897 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
c45a6816
RR
898};
899
8d85fce7 900static struct virtio_driver virtio_blk = {
19c1c5a6
MT
901 .feature_table = features,
902 .feature_table_size = ARRAY_SIZE(features),
903 .feature_table_legacy = features_legacy,
904 .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
905 .driver.name = KBUILD_MODNAME,
906 .driver.owner = THIS_MODULE,
907 .id_table = id_table,
908 .probe = virtblk_probe,
909 .remove = virtblk_remove,
910 .config_changed = virtblk_config_changed,
89107000 911#ifdef CONFIG_PM_SLEEP
19c1c5a6
MT
912 .freeze = virtblk_freeze,
913 .restore = virtblk_restore,
f8fb5bc2 914#endif
e467cde2
RR
915};
916
917static int __init init(void)
918{
7a7c924c
CH
919 int error;
920
921 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
922 if (!virtblk_wq)
923 return -ENOMEM;
924
4f3bf19c 925 major = register_blkdev(0, "virtblk");
7a7c924c
CH
926 if (major < 0) {
927 error = major;
928 goto out_destroy_workqueue;
929 }
930
931 error = register_virtio_driver(&virtio_blk);
932 if (error)
933 goto out_unregister_blkdev;
934 return 0;
935
936out_unregister_blkdev:
937 unregister_blkdev(major, "virtblk");
938out_destroy_workqueue:
939 destroy_workqueue(virtblk_wq);
940 return error;
e467cde2
RR
941}
942
943static void __exit fini(void)
944{
945 unregister_virtio_driver(&virtio_blk);
38f37b57 946 unregister_blkdev(major, "virtblk");
7a7c924c 947 destroy_workqueue(virtblk_wq);
e467cde2
RR
948}
949module_init(init);
950module_exit(fini);
951
952MODULE_DEVICE_TABLE(virtio, id_table);
953MODULE_DESCRIPTION("Virtio block driver");
954MODULE_LICENSE("GPL");