]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
Merge tag 'kvm-x86-generic-6.8' of https://github.com/kvm-x86/linux into HEAD
[thirdparty/kernel/stable.git] / drivers / vdpa / vdpa_sim / vdpa_sim_blk.c
CommitLineData
0c853c2c
MG
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * VDPA simulator for block device.
4 *
5 * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
7d189f61 6 * Copyright (c) 2021, Red Hat Inc. All rights reserved.
0c853c2c
MG
7 *
8 */
9
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/device.h>
13#include <linux/kernel.h>
0c853c2c
MG
14#include <linux/blkdev.h>
15#include <linux/vringh.h>
16#include <linux/vdpa.h>
17#include <uapi/linux/virtio_blk.h>
18
19#include "vdpa_sim.h"
20
21#define DRV_VERSION "0.1"
22#define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>"
23#define DRV_DESC "vDPA Device Simulator for block device"
24#define DRV_LICENSE "GPL v2"
25
26#define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \
518083d2 27 (1ULL << VIRTIO_BLK_F_FLUSH) | \
0c853c2c
MG
28 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
29 (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
30 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
31 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
4a44a5ed
SG
32 (1ULL << VIRTIO_BLK_F_MQ) | \
33 (1ULL << VIRTIO_BLK_F_DISCARD) | \
34 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
0c853c2c
MG
35
36#define VDPASIM_BLK_CAPACITY 0x40000
37#define VDPASIM_BLK_SIZE_MAX 0x1000
38#define VDPASIM_BLK_SEG_MAX 32
4a44a5ed 39#define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
19cd4a54
SG
40
41/* 1 virtqueue, 1 address space, 1 virtqueue group */
0c853c2c 42#define VDPASIM_BLK_VQ_NUM 1
19cd4a54
SG
43#define VDPASIM_BLK_AS_NUM 1
44#define VDPASIM_BLK_GROUP_NUM 1
0c853c2c 45
112f23cd
SG
46struct vdpasim_blk {
47 struct vdpasim vdpasim;
48 void *buffer;
abebb162 49 bool shared_backend;
112f23cd
SG
50};
51
52static struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim)
53{
54 return container_of(vdpasim, struct vdpasim_blk, vdpasim);
55}
56
e6fa6052 57static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
0c853c2c 58
abebb162
SG
59static bool shared_backend;
60module_param(shared_backend, bool, 0444);
61MODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices");
62
63static void *shared_buffer;
64/* mutex to synchronize shared_buffer access */
65static DEFINE_MUTEX(shared_buffer_mutex);
66
67static void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk)
68{
69 if (blk->shared_backend)
70 mutex_lock(&shared_buffer_mutex);
71}
72
73static void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk)
74{
75 if (blk->shared_backend)
76 mutex_unlock(&shared_buffer_mutex);
77}
78
ac926e1b
SG
79static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
80 u64 num_sectors, u64 max_sectors)
7d189f61 81{
ac926e1b
SG
82 if (start_sector > VDPASIM_BLK_CAPACITY) {
83 dev_dbg(&vdpasim->vdpa.dev,
84 "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
85 start_sector, VDPASIM_BLK_CAPACITY);
86 }
7d189f61 87
ac926e1b
SG
88 if (num_sectors > max_sectors) {
89 dev_dbg(&vdpasim->vdpa.dev,
90 "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
91 num_sectors, max_sectors);
7d189f61 92 return false;
ac926e1b 93 }
7d189f61 94
ac926e1b
SG
95 if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) {
96 dev_dbg(&vdpasim->vdpa.dev,
97 "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
98 start_sector, num_sectors, VDPASIM_BLK_CAPACITY);
7d189f61 99 return false;
ac926e1b 100 }
7d189f61
SG
101
102 return true;
103}
104
105/* Returns 'true' if the request is handled (with or without an I/O error)
106 * and the status is correctly written in the last byte of the 'in iov',
107 * 'false' otherwise.
108 */
109static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
110 struct vdpasim_virtqueue *vq)
111{
112f23cd 112 struct vdpasim_blk *blk = sim_to_blk(vdpasim);
7d189f61
SG
113 size_t pushed = 0, to_pull, to_push;
114 struct virtio_blk_outhdr hdr;
8472019e 115 bool handled = false;
7d189f61
SG
116 ssize_t bytes;
117 loff_t offset;
118 u64 sector;
119 u8 status;
120 u32 type;
121 int ret;
122
123 ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov,
124 &vq->head, GFP_ATOMIC);
125 if (ret != 1)
126 return false;
127
128 if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
42a84c09 129 dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
7d189f61 130 vq->out_iov.used, vq->in_iov.used);
8472019e 131 goto err;
7d189f61
SG
132 }
133
134 if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
42a84c09 135 dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n");
8472019e 136 goto err;
7d189f61
SG
137 }
138
139 /* The last byte is the status and we checked if the last iov has
140 * enough room for it.
141 */
142 to_push = vringh_kiov_length(&vq->in_iov) - 1;
143
144 to_pull = vringh_kiov_length(&vq->out_iov);
145
146 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
147 sizeof(hdr));
148 if (bytes != sizeof(hdr)) {
42a84c09 149 dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n");
8472019e 150 goto err;
7d189f61
SG
151 }
152
153 to_pull -= bytes;
154
155 type = vdpasim32_to_cpu(vdpasim, hdr.type);
156 sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
157 offset = sector << SECTOR_SHIFT;
158 status = VIRTIO_BLK_S_OK;
159
b91cf6e9
SG
160 if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT &&
161 sector != 0) {
162 dev_dbg(&vdpasim->vdpa.dev,
163 "sector must be 0 for %u request - sector: 0x%llx\n",
164 type, sector);
165 status = VIRTIO_BLK_S_IOERR;
166 goto err_status;
167 }
168
7d189f61
SG
169 switch (type) {
170 case VIRTIO_BLK_T_IN:
ac926e1b
SG
171 if (!vdpasim_blk_check_range(vdpasim, sector,
172 to_push >> SECTOR_SHIFT,
173 VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
7d189f61
SG
174 status = VIRTIO_BLK_S_IOERR;
175 break;
176 }
177
abebb162 178 vdpasim_blk_buffer_lock(blk);
7d189f61 179 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
112f23cd 180 blk->buffer + offset, to_push);
abebb162 181 vdpasim_blk_buffer_unlock(blk);
7d189f61 182 if (bytes < 0) {
42a84c09 183 dev_dbg(&vdpasim->vdpa.dev,
7d189f61
SG
184 "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
185 bytes, offset, to_push);
186 status = VIRTIO_BLK_S_IOERR;
187 break;
188 }
189
190 pushed += bytes;
191 break;
192
193 case VIRTIO_BLK_T_OUT:
ac926e1b
SG
194 if (!vdpasim_blk_check_range(vdpasim, sector,
195 to_pull >> SECTOR_SHIFT,
196 VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
7d189f61
SG
197 status = VIRTIO_BLK_S_IOERR;
198 break;
199 }
200
abebb162 201 vdpasim_blk_buffer_lock(blk);
7d189f61 202 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
112f23cd 203 blk->buffer + offset, to_pull);
abebb162 204 vdpasim_blk_buffer_unlock(blk);
7d189f61 205 if (bytes < 0) {
42a84c09 206 dev_dbg(&vdpasim->vdpa.dev,
7d189f61
SG
207 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
208 bytes, offset, to_pull);
209 status = VIRTIO_BLK_S_IOERR;
210 break;
211 }
212 break;
213
e6fa6052
SG
214 case VIRTIO_BLK_T_GET_ID:
215 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
216 vdpasim_blk_id,
217 VIRTIO_BLK_ID_BYTES);
218 if (bytes < 0) {
42a84c09 219 dev_dbg(&vdpasim->vdpa.dev,
e6fa6052
SG
220 "vringh_iov_push_iotlb() error: %zd\n", bytes);
221 status = VIRTIO_BLK_S_IOERR;
222 break;
223 }
224
225 pushed += bytes;
226 break;
227
518083d2
SG
228 case VIRTIO_BLK_T_FLUSH:
229 /* nothing to do */
230 break;
231
4a44a5ed
SG
232 case VIRTIO_BLK_T_DISCARD:
233 case VIRTIO_BLK_T_WRITE_ZEROES: {
234 struct virtio_blk_discard_write_zeroes range;
235 u32 num_sectors, flags;
236
237 if (to_pull != sizeof(range)) {
238 dev_dbg(&vdpasim->vdpa.dev,
239 "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
240 to_pull, sizeof(range));
241 status = VIRTIO_BLK_S_IOERR;
242 break;
243 }
244
245 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range,
246 to_pull);
247 if (bytes < 0) {
248 dev_dbg(&vdpasim->vdpa.dev,
249 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
250 bytes, offset, to_pull);
251 status = VIRTIO_BLK_S_IOERR;
252 break;
253 }
254
255 sector = le64_to_cpu(range.sector);
256 offset = sector << SECTOR_SHIFT;
257 num_sectors = le32_to_cpu(range.num_sectors);
258 flags = le32_to_cpu(range.flags);
259
260 if (type == VIRTIO_BLK_T_DISCARD && flags != 0) {
261 dev_dbg(&vdpasim->vdpa.dev,
262 "discard unexpected flags set - flags: 0x%x\n",
263 flags);
264 status = VIRTIO_BLK_S_UNSUPP;
265 break;
266 }
267
268 if (type == VIRTIO_BLK_T_WRITE_ZEROES &&
269 flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
270 dev_dbg(&vdpasim->vdpa.dev,
271 "write_zeroes unexpected flags set - flags: 0x%x\n",
272 flags);
273 status = VIRTIO_BLK_S_UNSUPP;
274 break;
275 }
276
277 if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors,
278 VDPASIM_BLK_DWZ_MAX_SECTORS)) {
279 status = VIRTIO_BLK_S_IOERR;
280 break;
281 }
282
283 if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
abebb162 284 vdpasim_blk_buffer_lock(blk);
112f23cd 285 memset(blk->buffer + offset, 0,
4a44a5ed 286 num_sectors << SECTOR_SHIFT);
abebb162 287 vdpasim_blk_buffer_unlock(blk);
4a44a5ed
SG
288 }
289
290 break;
291 }
7d189f61 292 default:
42a84c09
SG
293 dev_dbg(&vdpasim->vdpa.dev,
294 "Unsupported request type %d\n", type);
7d189f61
SG
295 status = VIRTIO_BLK_S_IOERR;
296 break;
297 }
298
b91cf6e9 299err_status:
7d189f61
SG
300 /* If some operations fail, we need to skip the remaining bytes
301 * to put the status in the last byte
302 */
303 if (to_push - pushed > 0)
304 vringh_kiov_advance(&vq->in_iov, to_push - pushed);
305
306 /* Last byte is the status */
307 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
308 if (bytes != 1)
8472019e 309 goto err;
7d189f61
SG
310
311 pushed += bytes;
312
313 /* Make sure data is wrote before advancing index */
314 smp_wmb();
315
8472019e
SG
316 handled = true;
317
318err:
7d189f61
SG
319 vringh_complete_iotlb(&vq->vring, vq->head, pushed);
320
8472019e 321 return handled;
7d189f61
SG
322}
323
e2a4f808 324static void vdpasim_blk_work(struct vdpasim *vdpasim)
0c853c2c 325{
9c4df090 326 bool reschedule = false;
0c853c2c
MG
327 int i;
328
d7621c28 329 mutex_lock(&vdpasim->mutex);
0c853c2c
MG
330
331 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
332 goto out;
333
0c89e2a3
EP
334 if (!vdpasim->running)
335 goto out;
336
0c853c2c
MG
337 for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
338 struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
9c4df090 339 int reqs = 0;
0c853c2c
MG
340
341 if (!vq->ready)
342 continue;
343
7d189f61 344 while (vdpasim_blk_handle_req(vdpasim, vq)) {
0c853c2c
MG
345 /* Make sure used is visible before rasing the interrupt. */
346 smp_wmb();
347
348 local_bh_disable();
349 if (vringh_need_notify_iotlb(&vq->vring) > 0)
350 vringh_notify(&vq->vring);
351 local_bh_enable();
9c4df090
SG
352
353 if (++reqs > 4) {
354 reschedule = true;
355 break;
356 }
0c853c2c
MG
357 }
358 }
359out:
d7621c28 360 mutex_unlock(&vdpasim->mutex);
9c4df090
SG
361
362 if (reschedule)
e2a4f808 363 vdpasim_schedule_work(vdpasim);
0c853c2c
MG
364}
365
366static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
367{
368 struct virtio_blk_config *blk_config = config;
369
370 memset(config, 0, sizeof(struct virtio_blk_config));
371
372 blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
373 blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
374 blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
375 blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
376 blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
377 blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
378 blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
4a44a5ed
SG
379 /* VIRTIO_BLK_F_DISCARD */
380 blk_config->discard_sector_alignment =
381 cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
382 blk_config->max_discard_sectors =
383 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
384 blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1);
385 /* VIRTIO_BLK_F_WRITE_ZEROES */
386 blk_config->max_write_zeroes_sectors =
387 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
388 blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1);
389
0c853c2c
MG
390}
391
112f23cd
SG
392static void vdpasim_blk_free(struct vdpasim *vdpasim)
393{
394 struct vdpasim_blk *blk = sim_to_blk(vdpasim);
395
abebb162
SG
396 if (!blk->shared_backend)
397 kvfree(blk->buffer);
112f23cd
SG
398}
399
899c4d18
SG
400static void vdpasim_blk_mgmtdev_release(struct device *dev)
401{
402}
403
404static struct device vdpasim_blk_mgmtdev = {
405 .init_name = "vdpasim_blk",
406 .release = vdpasim_blk_mgmtdev_release,
407};
408
d8ca2fa5
PP
409static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
410 const struct vdpa_dev_set_config *config)
0c853c2c
MG
411{
412 struct vdpasim_dev_attr dev_attr = {};
112f23cd 413 struct vdpasim_blk *blk;
899c4d18 414 struct vdpasim *simdev;
0c853c2c
MG
415 int ret;
416
899c4d18
SG
417 dev_attr.mgmt_dev = mdev;
418 dev_attr.name = name;
0c853c2c
MG
419 dev_attr.id = VIRTIO_ID_BLOCK;
420 dev_attr.supported_features = VDPASIM_BLK_FEATURES;
421 dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
19cd4a54
SG
422 dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
423 dev_attr.nas = VDPASIM_BLK_AS_NUM;
112f23cd 424 dev_attr.alloc_size = sizeof(struct vdpasim_blk);
0c853c2c
MG
425 dev_attr.config_size = sizeof(struct virtio_blk_config);
426 dev_attr.get_config = vdpasim_blk_get_config;
427 dev_attr.work_fn = vdpasim_blk_work;
112f23cd 428 dev_attr.free = vdpasim_blk_free;
0c853c2c 429
477f7197 430 simdev = vdpasim_create(&dev_attr, config);
899c4d18
SG
431 if (IS_ERR(simdev))
432 return PTR_ERR(simdev);
0c853c2c 433
112f23cd 434 blk = sim_to_blk(simdev);
abebb162
SG
435 blk->shared_backend = shared_backend;
436
437 if (blk->shared_backend) {
438 blk->buffer = shared_buffer;
439 } else {
0d824102 440 blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
abebb162
SG
441 GFP_KERNEL);
442 if (!blk->buffer) {
443 ret = -ENOMEM;
444 goto put_dev;
445 }
112f23cd
SG
446 }
447
899c4d18 448 ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
0c853c2c
MG
449 if (ret)
450 goto put_dev;
451
452 return 0;
453
454put_dev:
899c4d18 455 put_device(&simdev->vdpa.dev);
0c853c2c
MG
456 return ret;
457}
458
899c4d18
SG
459static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev,
460 struct vdpa_device *dev)
0c853c2c 461{
899c4d18
SG
462 struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
463
464 _vdpa_unregister_device(&simdev->vdpa);
465}
466
467static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = {
468 .dev_add = vdpasim_blk_dev_add,
469 .dev_del = vdpasim_blk_dev_del
470};
0c853c2c 471
899c4d18
SG
472static struct virtio_device_id id_table[] = {
473 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
474 { 0 },
475};
476
477static struct vdpa_mgmt_dev mgmt_dev = {
478 .device = &vdpasim_blk_mgmtdev,
479 .id_table = id_table,
480 .ops = &vdpasim_blk_mgmtdev_ops,
481};
482
483static int __init vdpasim_blk_init(void)
484{
485 int ret;
486
487 ret = device_register(&vdpasim_blk_mgmtdev);
aeca7ff2 488 if (ret) {
489 put_device(&vdpasim_blk_mgmtdev);
899c4d18 490 return ret;
aeca7ff2 491 }
899c4d18
SG
492
493 ret = vdpa_mgmtdev_register(&mgmt_dev);
494 if (ret)
495 goto parent_err;
496
abebb162 497 if (shared_backend) {
0d824102 498 shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
abebb162
SG
499 GFP_KERNEL);
500 if (!shared_buffer) {
501 ret = -ENOMEM;
d121df78 502 goto mgmt_dev_err;
abebb162
SG
503 }
504 }
505
899c4d18 506 return 0;
d121df78
SS
507mgmt_dev_err:
508 vdpa_mgmtdev_unregister(&mgmt_dev);
899c4d18
SG
509parent_err:
510 device_unregister(&vdpasim_blk_mgmtdev);
511 return ret;
512}
513
514static void __exit vdpasim_blk_exit(void)
515{
abebb162 516 kvfree(shared_buffer);
899c4d18
SG
517 vdpa_mgmtdev_unregister(&mgmt_dev);
518 device_unregister(&vdpasim_blk_mgmtdev);
0c853c2c
MG
519}
520
521module_init(vdpasim_blk_init)
522module_exit(vdpasim_blk_exit)
523
524MODULE_VERSION(DRV_VERSION);
525MODULE_LICENSE(DRV_LICENSE);
526MODULE_AUTHOR(DRV_AUTHOR);
527MODULE_DESCRIPTION(DRV_DESC);