]> git.ipfire.org Git - thirdparty/linux.git/blame - drivers/vdpa/vdpa_user/vduse_dev.c
vduse: Support set_vq_affinity callback
[thirdparty/linux.git] / drivers / vdpa / vdpa_user / vduse_dev.c
CommitLineData
c8a6153b
XY
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * VDUSE: vDPA Device in Userspace
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/cdev.h>
14#include <linux/device.h>
15#include <linux/eventfd.h>
16#include <linux/slab.h>
17#include <linux/wait.h>
18#include <linux/dma-map-ops.h>
19#include <linux/poll.h>
20#include <linux/file.h>
21#include <linux/uio.h>
22#include <linux/vdpa.h>
23#include <linux/nospec.h>
79a463be
XY
24#include <linux/vmalloc.h>
25#include <linux/sched/mm.h>
c8a6153b
XY
26#include <uapi/linux/vduse.h>
27#include <uapi/linux/vdpa.h>
28#include <uapi/linux/virtio_config.h>
29#include <uapi/linux/virtio_ids.h>
30#include <uapi/linux/virtio_blk.h>
31#include <linux/mod_devicetable.h>
32
33#include "iova_domain.h"
34
35#define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
36#define DRV_DESC "vDPA Device in Userspace"
37#define DRV_LICENSE "GPL v2"
38
39#define VDUSE_DEV_MAX (1U << MINORBITS)
40#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
41#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
42#define VDUSE_MSG_DEFAULT_TIMEOUT 30
43
28f6288e
XY
44#define IRQ_UNBOUND -1
45
c8a6153b
XY
46struct vduse_virtqueue {
47 u16 index;
48 u16 num_max;
49 u32 num;
50 u64 desc_addr;
51 u64 driver_addr;
52 u64 device_addr;
53 struct vdpa_vq_state state;
54 bool ready;
55 bool kicked;
56 spinlock_t kick_lock;
57 spinlock_t irq_lock;
58 struct eventfd_ctx *kickfd;
59 struct vdpa_callback cb;
60 struct work_struct inject;
61 struct work_struct kick;
28f6288e
XY
62 int irq_effective_cpu;
63 struct cpumask irq_affinity;
c8a6153b
XY
64};
65
66struct vduse_dev;
67
68struct vduse_vdpa {
69 struct vdpa_device vdpa;
70 struct vduse_dev *dev;
71};
72
79a463be
XY
73struct vduse_umem {
74 unsigned long iova;
75 unsigned long npages;
76 struct page **pages;
77 struct mm_struct *mm;
78};
79
c8a6153b
XY
80struct vduse_dev {
81 struct vduse_vdpa *vdev;
82 struct device *dev;
78885597 83 struct vduse_virtqueue **vqs;
c8a6153b
XY
84 struct vduse_iova_domain *domain;
85 char *name;
86 struct mutex lock;
87 spinlock_t msg_lock;
88 u64 msg_unique;
89 u32 msg_timeout;
90 wait_queue_head_t waitq;
91 struct list_head send_list;
92 struct list_head recv_list;
93 struct vdpa_callback config_cb;
94 struct work_struct inject;
95 spinlock_t irq_lock;
0943aacf 96 struct rw_semaphore rwsem;
c8a6153b
XY
97 int minor;
98 bool broken;
99 bool connected;
100 u64 api_version;
101 u64 device_features;
102 u64 driver_features;
103 u32 device_id;
104 u32 vendor_id;
105 u32 generation;
106 u32 config_size;
107 void *config;
108 u8 status;
109 u32 vq_num;
110 u32 vq_align;
79a463be
XY
111 struct vduse_umem *umem;
112 struct mutex mem_lock;
c8a6153b
XY
113};
114
115struct vduse_dev_msg {
116 struct vduse_dev_request req;
117 struct vduse_dev_response resp;
118 struct list_head list;
119 wait_queue_head_t waitq;
120 bool completed;
121};
122
123struct vduse_control {
124 u64 api_version;
125};
126
127static DEFINE_MUTEX(vduse_lock);
128static DEFINE_IDR(vduse_idr);
129
130static dev_t vduse_major;
131static struct class *vduse_class;
132static struct cdev vduse_ctrl_cdev;
133static struct cdev vduse_cdev;
134static struct workqueue_struct *vduse_irq_wq;
28f6288e 135static struct workqueue_struct *vduse_irq_bound_wq;
c8a6153b
XY
136
137static u32 allowed_device_id[] = {
138 VIRTIO_ID_BLOCK,
139};
140
141static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
142{
143 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
144
145 return vdev->dev;
146}
147
148static inline struct vduse_dev *dev_to_vduse(struct device *dev)
149{
150 struct vdpa_device *vdpa = dev_to_vdpa(dev);
151
152 return vdpa_to_vduse(vdpa);
153}
154
155static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
156 uint32_t request_id)
157{
158 struct vduse_dev_msg *msg;
159
160 list_for_each_entry(msg, head, list) {
161 if (msg->req.request_id == request_id) {
162 list_del(&msg->list);
163 return msg;
164 }
165 }
166
167 return NULL;
168}
169
170static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
171{
172 struct vduse_dev_msg *msg = NULL;
173
174 if (!list_empty(head)) {
175 msg = list_first_entry(head, struct vduse_dev_msg, list);
176 list_del(&msg->list);
177 }
178
179 return msg;
180}
181
182static void vduse_enqueue_msg(struct list_head *head,
183 struct vduse_dev_msg *msg)
184{
185 list_add_tail(&msg->list, head);
186}
187
188static void vduse_dev_broken(struct vduse_dev *dev)
189{
190 struct vduse_dev_msg *msg, *tmp;
191
192 if (unlikely(dev->broken))
193 return;
194
195 list_splice_init(&dev->recv_list, &dev->send_list);
196 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
197 list_del(&msg->list);
198 msg->completed = 1;
199 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
200 wake_up(&msg->waitq);
201 }
202 dev->broken = true;
203 wake_up(&dev->waitq);
204}
205
206static int vduse_dev_msg_sync(struct vduse_dev *dev,
207 struct vduse_dev_msg *msg)
208{
209 int ret;
210
211 if (unlikely(dev->broken))
212 return -EIO;
213
214 init_waitqueue_head(&msg->waitq);
215 spin_lock(&dev->msg_lock);
216 if (unlikely(dev->broken)) {
217 spin_unlock(&dev->msg_lock);
218 return -EIO;
219 }
220 msg->req.request_id = dev->msg_unique++;
221 vduse_enqueue_msg(&dev->send_list, msg);
222 wake_up(&dev->waitq);
223 spin_unlock(&dev->msg_lock);
224 if (dev->msg_timeout)
225 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
226 (long)dev->msg_timeout * HZ);
227 else
228 ret = wait_event_killable(msg->waitq, msg->completed);
229
230 spin_lock(&dev->msg_lock);
231 if (!msg->completed) {
232 list_del(&msg->list);
233 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
234 /* Mark the device as malfunction when there is a timeout */
235 if (!ret)
236 vduse_dev_broken(dev);
237 }
238 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
239 spin_unlock(&dev->msg_lock);
240
241 return ret;
242}
243
244static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
245 struct vduse_virtqueue *vq,
246 struct vdpa_vq_state_packed *packed)
247{
248 struct vduse_dev_msg msg = { 0 };
249 int ret;
250
251 msg.req.type = VDUSE_GET_VQ_STATE;
252 msg.req.vq_state.index = vq->index;
253
254 ret = vduse_dev_msg_sync(dev, &msg);
255 if (ret)
256 return ret;
257
258 packed->last_avail_counter =
259 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
260 packed->last_avail_idx =
261 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
262 packed->last_used_counter =
263 msg.resp.vq_state.packed.last_used_counter & 0x0001;
264 packed->last_used_idx =
265 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
266
267 return 0;
268}
269
270static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
271 struct vduse_virtqueue *vq,
272 struct vdpa_vq_state_split *split)
273{
274 struct vduse_dev_msg msg = { 0 };
275 int ret;
276
277 msg.req.type = VDUSE_GET_VQ_STATE;
278 msg.req.vq_state.index = vq->index;
279
280 ret = vduse_dev_msg_sync(dev, &msg);
281 if (ret)
282 return ret;
283
284 split->avail_index = msg.resp.vq_state.split.avail_index;
285
286 return 0;
287}
288
289static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
290{
291 struct vduse_dev_msg msg = { 0 };
292
293 msg.req.type = VDUSE_SET_STATUS;
294 msg.req.s.status = status;
295
296 return vduse_dev_msg_sync(dev, &msg);
297}
298
299static int vduse_dev_update_iotlb(struct vduse_dev *dev,
300 u64 start, u64 last)
301{
302 struct vduse_dev_msg msg = { 0 };
303
304 if (last < start)
305 return -EINVAL;
306
307 msg.req.type = VDUSE_UPDATE_IOTLB;
308 msg.req.iova.start = start;
309 msg.req.iova.last = last;
310
311 return vduse_dev_msg_sync(dev, &msg);
312}
313
314static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
315{
316 struct file *file = iocb->ki_filp;
317 struct vduse_dev *dev = file->private_data;
318 struct vduse_dev_msg *msg;
319 int size = sizeof(struct vduse_dev_request);
320 ssize_t ret;
321
322 if (iov_iter_count(to) < size)
323 return -EINVAL;
324
325 spin_lock(&dev->msg_lock);
326 while (1) {
327 msg = vduse_dequeue_msg(&dev->send_list);
328 if (msg)
329 break;
330
331 ret = -EAGAIN;
332 if (file->f_flags & O_NONBLOCK)
333 goto unlock;
334
335 spin_unlock(&dev->msg_lock);
336 ret = wait_event_interruptible_exclusive(dev->waitq,
337 !list_empty(&dev->send_list));
338 if (ret)
339 return ret;
340
341 spin_lock(&dev->msg_lock);
342 }
343 spin_unlock(&dev->msg_lock);
344 ret = copy_to_iter(&msg->req, size, to);
345 spin_lock(&dev->msg_lock);
346 if (ret != size) {
347 ret = -EFAULT;
348 vduse_enqueue_msg(&dev->send_list, msg);
349 goto unlock;
350 }
351 vduse_enqueue_msg(&dev->recv_list, msg);
352unlock:
353 spin_unlock(&dev->msg_lock);
354
355 return ret;
356}
357
358static bool is_mem_zero(const char *ptr, int size)
359{
360 int i;
361
362 for (i = 0; i < size; i++) {
363 if (ptr[i])
364 return false;
365 }
366 return true;
367}
368
369static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
370{
371 struct file *file = iocb->ki_filp;
372 struct vduse_dev *dev = file->private_data;
373 struct vduse_dev_response resp;
374 struct vduse_dev_msg *msg;
375 size_t ret;
376
377 ret = copy_from_iter(&resp, sizeof(resp), from);
378 if (ret != sizeof(resp))
379 return -EINVAL;
380
381 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
382 return -EINVAL;
383
384 spin_lock(&dev->msg_lock);
385 msg = vduse_find_msg(&dev->recv_list, resp.request_id);
386 if (!msg) {
387 ret = -ENOENT;
388 goto unlock;
389 }
390
391 memcpy(&msg->resp, &resp, sizeof(resp));
392 msg->completed = 1;
393 wake_up(&msg->waitq);
394unlock:
395 spin_unlock(&dev->msg_lock);
396
397 return ret;
398}
399
400static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
401{
402 struct vduse_dev *dev = file->private_data;
403 __poll_t mask = 0;
404
405 poll_wait(file, &dev->waitq, wait);
406
407 spin_lock(&dev->msg_lock);
408
409 if (unlikely(dev->broken))
410 mask |= EPOLLERR;
411 if (!list_empty(&dev->send_list))
412 mask |= EPOLLIN | EPOLLRDNORM;
413 if (!list_empty(&dev->recv_list))
414 mask |= EPOLLOUT | EPOLLWRNORM;
415
416 spin_unlock(&dev->msg_lock);
417
418 return mask;
419}
420
421static void vduse_dev_reset(struct vduse_dev *dev)
422{
423 int i;
424 struct vduse_iova_domain *domain = dev->domain;
425
426 /* The coherent mappings are handled in vduse_dev_free_coherent() */
427 if (domain->bounce_map)
428 vduse_domain_reset_bounce_map(domain);
429
0943aacf
XY
430 down_write(&dev->rwsem);
431
c8a6153b
XY
432 dev->status = 0;
433 dev->driver_features = 0;
434 dev->generation++;
435 spin_lock(&dev->irq_lock);
436 dev->config_cb.callback = NULL;
437 dev->config_cb.private = NULL;
438 spin_unlock(&dev->irq_lock);
439 flush_work(&dev->inject);
440
441 for (i = 0; i < dev->vq_num; i++) {
78885597 442 struct vduse_virtqueue *vq = dev->vqs[i];
c8a6153b
XY
443
444 vq->ready = false;
445 vq->desc_addr = 0;
446 vq->driver_addr = 0;
447 vq->device_addr = 0;
448 vq->num = 0;
449 memset(&vq->state, 0, sizeof(vq->state));
450
451 spin_lock(&vq->kick_lock);
452 vq->kicked = false;
453 if (vq->kickfd)
454 eventfd_ctx_put(vq->kickfd);
455 vq->kickfd = NULL;
456 spin_unlock(&vq->kick_lock);
457
458 spin_lock(&vq->irq_lock);
459 vq->cb.callback = NULL;
460 vq->cb.private = NULL;
461 spin_unlock(&vq->irq_lock);
462 flush_work(&vq->inject);
463 flush_work(&vq->kick);
464 }
0943aacf
XY
465
466 up_write(&dev->rwsem);
c8a6153b
XY
467}
468
469static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
470 u64 desc_area, u64 driver_area,
471 u64 device_area)
472{
473 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
78885597 474 struct vduse_virtqueue *vq = dev->vqs[idx];
c8a6153b
XY
475
476 vq->desc_addr = desc_area;
477 vq->driver_addr = driver_area;
478 vq->device_addr = device_area;
479
480 return 0;
481}
482
483static void vduse_vq_kick(struct vduse_virtqueue *vq)
484{
485 spin_lock(&vq->kick_lock);
486 if (!vq->ready)
487 goto unlock;
488
489 if (vq->kickfd)
490 eventfd_signal(vq->kickfd, 1);
491 else
492 vq->kicked = true;
493unlock:
494 spin_unlock(&vq->kick_lock);
495}
496
497static void vduse_vq_kick_work(struct work_struct *work)
498{
499 struct vduse_virtqueue *vq = container_of(work,
500 struct vduse_virtqueue, kick);
501
502 vduse_vq_kick(vq);
503}
504
505static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
506{
507 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
78885597 508 struct vduse_virtqueue *vq = dev->vqs[idx];
c8a6153b 509
78e70952 510 if (!eventfd_signal_allowed()) {
c8a6153b
XY
511 schedule_work(&vq->kick);
512 return;
513 }
514 vduse_vq_kick(vq);
515}
516
517static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
518 struct vdpa_callback *cb)
519{
520 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
78885597 521 struct vduse_virtqueue *vq = dev->vqs[idx];
c8a6153b
XY
522
523 spin_lock(&vq->irq_lock);
524 vq->cb.callback = cb->callback;
525 vq->cb.private = cb->private;
526 spin_unlock(&vq->irq_lock);
527}
528
529static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
530{
531 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
78885597 532 struct vduse_virtqueue *vq = dev->vqs[idx];
c8a6153b
XY
533
534 vq->num = num;
535}
536
537static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
538 u16 idx, bool ready)
539{
540 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
78885597 541 struct vduse_virtqueue *vq = dev->vqs[idx];
c8a6153b
XY
542
543 vq->ready = ready;
544}
545
546static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
547{
548 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
78885597 549 struct vduse_virtqueue *vq = dev->vqs[idx];
c8a6153b
XY
550
551 return vq->ready;
552}
553
554static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
555 const struct vdpa_vq_state *state)
556{
557 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
78885597 558 struct vduse_virtqueue *vq = dev->vqs[idx];
c8a6153b
XY
559
560 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
561 vq->state.packed.last_avail_counter =
562 state->packed.last_avail_counter;
563 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
564 vq->state.packed.last_used_counter =
565 state->packed.last_used_counter;
566 vq->state.packed.last_used_idx = state->packed.last_used_idx;
567 } else
568 vq->state.split.avail_index = state->split.avail_index;
569
570 return 0;
571}
572
573static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
574 struct vdpa_vq_state *state)
575{
576 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
78885597 577 struct vduse_virtqueue *vq = dev->vqs[idx];
c8a6153b
XY
578
579 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
580 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
581
582 return vduse_dev_get_vq_state_split(dev, vq, &state->split);
583}
584
585static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
586{
587 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
588
589 return dev->vq_align;
590}
591
a64917bc 592static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
c8a6153b
XY
593{
594 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
595
596 return dev->device_features;
597}
598
a64917bc 599static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
c8a6153b
XY
600{
601 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
602
603 dev->driver_features = features;
604 return 0;
605}
606
a64917bc
EC
607static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
608{
609 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
610
611 return dev->driver_features;
612}
613
c8a6153b
XY
614static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
615 struct vdpa_callback *cb)
616{
617 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
618
619 spin_lock(&dev->irq_lock);
620 dev->config_cb.callback = cb->callback;
621 dev->config_cb.private = cb->private;
622 spin_unlock(&dev->irq_lock);
623}
624
625static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
626{
627 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
628 u16 num_max = 0;
629 int i;
630
631 for (i = 0; i < dev->vq_num; i++)
78885597
XY
632 if (num_max < dev->vqs[i]->num_max)
633 num_max = dev->vqs[i]->num_max;
c8a6153b
XY
634
635 return num_max;
636}
637
638static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
639{
640 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
641
642 return dev->device_id;
643}
644
645static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
646{
647 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
648
649 return dev->vendor_id;
650}
651
652static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
653{
654 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
655
656 return dev->status;
657}
658
659static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
660{
661 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
662
663 if (vduse_dev_set_status(dev, status))
664 return;
665
666 dev->status = status;
667}
668
669static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
670{
671 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
672
673 return dev->config_size;
674}
675
676static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
677 void *buf, unsigned int len)
678{
679 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
680
46f8a292
MC
681 /* Initialize the buffer in case of partial copy. */
682 memset(buf, 0, len);
683
684 if (offset > dev->config_size)
c8a6153b
XY
685 return;
686
46f8a292
MC
687 if (len > dev->config_size - offset)
688 len = dev->config_size - offset;
689
c8a6153b
XY
690 memcpy(buf, dev->config + offset, len);
691}
692
693static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
694 const void *buf, unsigned int len)
695{
696 /* Now we only support read-only configuration space */
697}
698
699static int vduse_vdpa_reset(struct vdpa_device *vdpa)
700{
701 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
7bb5fb20 702 int ret = vduse_dev_set_status(dev, 0);
c8a6153b
XY
703
704 vduse_dev_reset(dev);
705
7bb5fb20 706 return ret;
c8a6153b
XY
707}
708
709static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
710{
711 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
712
713 return dev->generation;
714}
715
28f6288e
XY
716static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
717 const struct cpumask *cpu_mask)
718{
719 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
720
721 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
722 return 0;
723}
724
c8a6153b 725static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
db9adcbf 726 unsigned int asid,
c8a6153b
XY
727 struct vhost_iotlb *iotlb)
728{
729 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
730 int ret;
731
732 ret = vduse_domain_set_map(dev->domain, iotlb);
733 if (ret)
734 return ret;
735
736 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
737 if (ret) {
738 vduse_domain_clear_map(dev->domain, iotlb);
739 return ret;
740 }
741
742 return 0;
743}
744
745static void vduse_vdpa_free(struct vdpa_device *vdpa)
746{
747 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
748
749 dev->vdev = NULL;
750}
751
752static const struct vdpa_config_ops vduse_vdpa_config_ops = {
753 .set_vq_address = vduse_vdpa_set_vq_address,
754 .kick_vq = vduse_vdpa_kick_vq,
755 .set_vq_cb = vduse_vdpa_set_vq_cb,
756 .set_vq_num = vduse_vdpa_set_vq_num,
757 .set_vq_ready = vduse_vdpa_set_vq_ready,
758 .get_vq_ready = vduse_vdpa_get_vq_ready,
759 .set_vq_state = vduse_vdpa_set_vq_state,
760 .get_vq_state = vduse_vdpa_get_vq_state,
761 .get_vq_align = vduse_vdpa_get_vq_align,
a64917bc
EC
762 .get_device_features = vduse_vdpa_get_device_features,
763 .set_driver_features = vduse_vdpa_set_driver_features,
764 .get_driver_features = vduse_vdpa_get_driver_features,
c8a6153b
XY
765 .set_config_cb = vduse_vdpa_set_config_cb,
766 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
767 .get_device_id = vduse_vdpa_get_device_id,
768 .get_vendor_id = vduse_vdpa_get_vendor_id,
769 .get_status = vduse_vdpa_get_status,
770 .set_status = vduse_vdpa_set_status,
771 .get_config_size = vduse_vdpa_get_config_size,
772 .get_config = vduse_vdpa_get_config,
773 .set_config = vduse_vdpa_set_config,
774 .get_generation = vduse_vdpa_get_generation,
28f6288e 775 .set_vq_affinity = vduse_vdpa_set_vq_affinity,
c8a6153b
XY
776 .reset = vduse_vdpa_reset,
777 .set_map = vduse_vdpa_set_map,
778 .free = vduse_vdpa_free,
779};
780
781static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
782 unsigned long offset, size_t size,
783 enum dma_data_direction dir,
784 unsigned long attrs)
785{
786 struct vduse_dev *vdev = dev_to_vduse(dev);
787 struct vduse_iova_domain *domain = vdev->domain;
788
789 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
790}
791
792static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
793 size_t size, enum dma_data_direction dir,
794 unsigned long attrs)
795{
796 struct vduse_dev *vdev = dev_to_vduse(dev);
797 struct vduse_iova_domain *domain = vdev->domain;
798
799 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
800}
801
802static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
803 dma_addr_t *dma_addr, gfp_t flag,
804 unsigned long attrs)
805{
806 struct vduse_dev *vdev = dev_to_vduse(dev);
807 struct vduse_iova_domain *domain = vdev->domain;
808 unsigned long iova;
809 void *addr;
810
811 *dma_addr = DMA_MAPPING_ERROR;
812 addr = vduse_domain_alloc_coherent(domain, size,
813 (dma_addr_t *)&iova, flag, attrs);
814 if (!addr)
815 return NULL;
816
817 *dma_addr = (dma_addr_t)iova;
818
819 return addr;
820}
821
822static void vduse_dev_free_coherent(struct device *dev, size_t size,
823 void *vaddr, dma_addr_t dma_addr,
824 unsigned long attrs)
825{
826 struct vduse_dev *vdev = dev_to_vduse(dev);
827 struct vduse_iova_domain *domain = vdev->domain;
828
829 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
830}
831
832static size_t vduse_dev_max_mapping_size(struct device *dev)
833{
834 struct vduse_dev *vdev = dev_to_vduse(dev);
835 struct vduse_iova_domain *domain = vdev->domain;
836
837 return domain->bounce_size;
838}
839
840static const struct dma_map_ops vduse_dev_dma_ops = {
841 .map_page = vduse_dev_map_page,
842 .unmap_page = vduse_dev_unmap_page,
843 .alloc = vduse_dev_alloc_coherent,
844 .free = vduse_dev_free_coherent,
845 .max_mapping_size = vduse_dev_max_mapping_size,
846};
847
848static unsigned int perm_to_file_flags(u8 perm)
849{
850 unsigned int flags = 0;
851
852 switch (perm) {
853 case VDUSE_ACCESS_WO:
854 flags |= O_WRONLY;
855 break;
856 case VDUSE_ACCESS_RO:
857 flags |= O_RDONLY;
858 break;
859 case VDUSE_ACCESS_RW:
860 flags |= O_RDWR;
861 break;
862 default:
863 WARN(1, "invalidate vhost IOTLB permission\n");
864 break;
865 }
866
867 return flags;
868}
869
870static int vduse_kickfd_setup(struct vduse_dev *dev,
871 struct vduse_vq_eventfd *eventfd)
872{
873 struct eventfd_ctx *ctx = NULL;
874 struct vduse_virtqueue *vq;
875 u32 index;
876
877 if (eventfd->index >= dev->vq_num)
878 return -EINVAL;
879
880 index = array_index_nospec(eventfd->index, dev->vq_num);
78885597 881 vq = dev->vqs[index];
c8a6153b
XY
882 if (eventfd->fd >= 0) {
883 ctx = eventfd_ctx_fdget(eventfd->fd);
884 if (IS_ERR(ctx))
885 return PTR_ERR(ctx);
886 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
887 return 0;
888
889 spin_lock(&vq->kick_lock);
890 if (vq->kickfd)
891 eventfd_ctx_put(vq->kickfd);
892 vq->kickfd = ctx;
893 if (vq->ready && vq->kicked && vq->kickfd) {
894 eventfd_signal(vq->kickfd, 1);
895 vq->kicked = false;
896 }
897 spin_unlock(&vq->kick_lock);
898
899 return 0;
900}
901
902static bool vduse_dev_is_ready(struct vduse_dev *dev)
903{
904 int i;
905
906 for (i = 0; i < dev->vq_num; i++)
78885597 907 if (!dev->vqs[i]->num_max)
c8a6153b
XY
908 return false;
909
910 return true;
911}
912
913static void vduse_dev_irq_inject(struct work_struct *work)
914{
915 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
916
917 spin_lock_irq(&dev->irq_lock);
918 if (dev->config_cb.callback)
919 dev->config_cb.callback(dev->config_cb.private);
920 spin_unlock_irq(&dev->irq_lock);
921}
922
923static void vduse_vq_irq_inject(struct work_struct *work)
924{
925 struct vduse_virtqueue *vq = container_of(work,
926 struct vduse_virtqueue, inject);
927
928 spin_lock_irq(&vq->irq_lock);
929 if (vq->ready && vq->cb.callback)
930 vq->cb.callback(vq->cb.private);
931 spin_unlock_irq(&vq->irq_lock);
932}
933
0943aacf 934static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
28f6288e
XY
935 struct work_struct *irq_work,
936 int irq_effective_cpu)
0943aacf
XY
937{
938 int ret = -EINVAL;
939
940 down_read(&dev->rwsem);
941 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
942 goto unlock;
943
944 ret = 0;
28f6288e
XY
945 if (irq_effective_cpu == IRQ_UNBOUND)
946 queue_work(vduse_irq_wq, irq_work);
947 else
948 queue_work_on(irq_effective_cpu,
949 vduse_irq_bound_wq, irq_work);
0943aacf
XY
950unlock:
951 up_read(&dev->rwsem);
952
953 return ret;
954}
955
79a463be
XY
956static int vduse_dev_dereg_umem(struct vduse_dev *dev,
957 u64 iova, u64 size)
958{
959 int ret;
960
961 mutex_lock(&dev->mem_lock);
962 ret = -ENOENT;
963 if (!dev->umem)
964 goto unlock;
965
966 ret = -EINVAL;
967 if (dev->umem->iova != iova || size != dev->domain->bounce_size)
968 goto unlock;
969
970 vduse_domain_remove_user_bounce_pages(dev->domain);
971 unpin_user_pages_dirty_lock(dev->umem->pages,
972 dev->umem->npages, true);
973 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
974 mmdrop(dev->umem->mm);
975 vfree(dev->umem->pages);
976 kfree(dev->umem);
977 dev->umem = NULL;
978 ret = 0;
979unlock:
980 mutex_unlock(&dev->mem_lock);
981 return ret;
982}
983
984static int vduse_dev_reg_umem(struct vduse_dev *dev,
985 u64 iova, u64 uaddr, u64 size)
986{
987 struct page **page_list = NULL;
988 struct vduse_umem *umem = NULL;
989 long pinned = 0;
990 unsigned long npages, lock_limit;
991 int ret;
992
993 if (!dev->domain->bounce_map ||
994 size != dev->domain->bounce_size ||
995 iova != 0 || uaddr & ~PAGE_MASK)
996 return -EINVAL;
997
998 mutex_lock(&dev->mem_lock);
999 ret = -EEXIST;
1000 if (dev->umem)
1001 goto unlock;
1002
1003 ret = -ENOMEM;
1004 npages = size >> PAGE_SHIFT;
1005 page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1006 GFP_KERNEL_ACCOUNT);
1007 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1008 if (!page_list || !umem)
1009 goto unlock;
1010
1011 mmap_read_lock(current->mm);
1012
1013 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1014 if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
1015 goto out;
1016
1017 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1018 page_list, NULL);
1019 if (pinned != npages) {
1020 ret = pinned < 0 ? pinned : -ENOMEM;
1021 goto out;
1022 }
1023
1024 ret = vduse_domain_add_user_bounce_pages(dev->domain,
1025 page_list, pinned);
1026 if (ret)
1027 goto out;
1028
1029 atomic64_add(npages, &current->mm->pinned_vm);
1030
1031 umem->pages = page_list;
1032 umem->npages = pinned;
1033 umem->iova = iova;
1034 umem->mm = current->mm;
1035 mmgrab(current->mm);
1036
1037 dev->umem = umem;
1038out:
1039 if (ret && pinned > 0)
1040 unpin_user_pages(page_list, pinned);
1041
1042 mmap_read_unlock(current->mm);
1043unlock:
1044 if (ret) {
1045 vfree(page_list);
1046 kfree(umem);
1047 }
1048 mutex_unlock(&dev->mem_lock);
1049 return ret;
1050}
1051
28f6288e
XY
1052static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1053{
1054 int curr_cpu = vq->irq_effective_cpu;
1055
1056 while (true) {
1057 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1058 if (cpu_online(curr_cpu))
1059 break;
1060
1061 if (curr_cpu >= nr_cpu_ids)
1062 curr_cpu = IRQ_UNBOUND;
1063 }
1064
1065 vq->irq_effective_cpu = curr_cpu;
1066}
1067
c8a6153b
XY
1068static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1069 unsigned long arg)
1070{
1071 struct vduse_dev *dev = file->private_data;
1072 void __user *argp = (void __user *)arg;
1073 int ret;
1074
1075 if (unlikely(dev->broken))
1076 return -EPERM;
1077
1078 switch (cmd) {
1079 case VDUSE_IOTLB_GET_FD: {
1080 struct vduse_iotlb_entry entry;
1081 struct vhost_iotlb_map *map;
1082 struct vdpa_map_file *map_file;
1083 struct vduse_iova_domain *domain = dev->domain;
1084 struct file *f = NULL;
1085
1086 ret = -EFAULT;
1087 if (copy_from_user(&entry, argp, sizeof(entry)))
1088 break;
1089
1090 ret = -EINVAL;
1091 if (entry.start > entry.last)
1092 break;
1093
1094 spin_lock(&domain->iotlb_lock);
1095 map = vhost_iotlb_itree_first(domain->iotlb,
1096 entry.start, entry.last);
1097 if (map) {
1098 map_file = (struct vdpa_map_file *)map->opaque;
1099 f = get_file(map_file->file);
1100 entry.offset = map_file->offset;
1101 entry.start = map->start;
1102 entry.last = map->last;
1103 entry.perm = map->perm;
1104 }
1105 spin_unlock(&domain->iotlb_lock);
1106 ret = -EINVAL;
1107 if (!f)
1108 break;
1109
1110 ret = -EFAULT;
1111 if (copy_to_user(argp, &entry, sizeof(entry))) {
1112 fput(f);
1113 break;
1114 }
1115 ret = receive_fd(f, perm_to_file_flags(entry.perm));
1116 fput(f);
1117 break;
1118 }
1119 case VDUSE_DEV_GET_FEATURES:
1120 /*
1121 * Just mirror what driver wrote here.
1122 * The driver is expected to check FEATURE_OK later.
1123 */
1124 ret = put_user(dev->driver_features, (u64 __user *)argp);
1125 break;
1126 case VDUSE_DEV_SET_CONFIG: {
1127 struct vduse_config_data config;
1128 unsigned long size = offsetof(struct vduse_config_data,
1129 buffer);
1130
1131 ret = -EFAULT;
1132 if (copy_from_user(&config, argp, size))
1133 break;
1134
1135 ret = -EINVAL;
ff9f9c6e
DC
1136 if (config.offset > dev->config_size ||
1137 config.length == 0 ||
c8a6153b
XY
1138 config.length > dev->config_size - config.offset)
1139 break;
1140
1141 ret = -EFAULT;
1142 if (copy_from_user(dev->config + config.offset, argp + size,
1143 config.length))
1144 break;
1145
1146 ret = 0;
1147 break;
1148 }
1149 case VDUSE_DEV_INJECT_CONFIG_IRQ:
28f6288e 1150 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
c8a6153b
XY
1151 break;
1152 case VDUSE_VQ_SETUP: {
1153 struct vduse_vq_config config;
1154 u32 index;
1155
1156 ret = -EFAULT;
1157 if (copy_from_user(&config, argp, sizeof(config)))
1158 break;
1159
1160 ret = -EINVAL;
1161 if (config.index >= dev->vq_num)
1162 break;
1163
1164 if (!is_mem_zero((const char *)config.reserved,
1165 sizeof(config.reserved)))
1166 break;
1167
1168 index = array_index_nospec(config.index, dev->vq_num);
78885597 1169 dev->vqs[index]->num_max = config.max_size;
c8a6153b
XY
1170 ret = 0;
1171 break;
1172 }
1173 case VDUSE_VQ_GET_INFO: {
1174 struct vduse_vq_info vq_info;
1175 struct vduse_virtqueue *vq;
1176 u32 index;
1177
1178 ret = -EFAULT;
1179 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1180 break;
1181
1182 ret = -EINVAL;
1183 if (vq_info.index >= dev->vq_num)
1184 break;
1185
1186 index = array_index_nospec(vq_info.index, dev->vq_num);
78885597 1187 vq = dev->vqs[index];
c8a6153b
XY
1188 vq_info.desc_addr = vq->desc_addr;
1189 vq_info.driver_addr = vq->driver_addr;
1190 vq_info.device_addr = vq->device_addr;
1191 vq_info.num = vq->num;
1192
1193 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1194 vq_info.packed.last_avail_counter =
1195 vq->state.packed.last_avail_counter;
1196 vq_info.packed.last_avail_idx =
1197 vq->state.packed.last_avail_idx;
1198 vq_info.packed.last_used_counter =
1199 vq->state.packed.last_used_counter;
1200 vq_info.packed.last_used_idx =
1201 vq->state.packed.last_used_idx;
1202 } else
1203 vq_info.split.avail_index =
1204 vq->state.split.avail_index;
1205
1206 vq_info.ready = vq->ready;
1207
1208 ret = -EFAULT;
1209 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1210 break;
1211
1212 ret = 0;
1213 break;
1214 }
1215 case VDUSE_VQ_SETUP_KICKFD: {
1216 struct vduse_vq_eventfd eventfd;
1217
1218 ret = -EFAULT;
1219 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1220 break;
1221
1222 ret = vduse_kickfd_setup(dev, &eventfd);
1223 break;
1224 }
1225 case VDUSE_VQ_INJECT_IRQ: {
1226 u32 index;
1227
1228 ret = -EFAULT;
1229 if (get_user(index, (u32 __user *)argp))
1230 break;
1231
1232 ret = -EINVAL;
1233 if (index >= dev->vq_num)
1234 break;
1235
c8a6153b 1236 index = array_index_nospec(index, dev->vq_num);
28f6288e
XY
1237
1238 vduse_vq_update_effective_cpu(dev->vqs[index]);
1239 ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
1240 dev->vqs[index]->irq_effective_cpu);
c8a6153b
XY
1241 break;
1242 }
79a463be
XY
1243 case VDUSE_IOTLB_REG_UMEM: {
1244 struct vduse_iova_umem umem;
1245
1246 ret = -EFAULT;
1247 if (copy_from_user(&umem, argp, sizeof(umem)))
1248 break;
1249
1250 ret = -EINVAL;
1251 if (!is_mem_zero((const char *)umem.reserved,
1252 sizeof(umem.reserved)))
1253 break;
1254
1255 ret = vduse_dev_reg_umem(dev, umem.iova,
1256 umem.uaddr, umem.size);
1257 break;
1258 }
1259 case VDUSE_IOTLB_DEREG_UMEM: {
1260 struct vduse_iova_umem umem;
1261
1262 ret = -EFAULT;
1263 if (copy_from_user(&umem, argp, sizeof(umem)))
1264 break;
1265
1266 ret = -EINVAL;
1267 if (!is_mem_zero((const char *)umem.reserved,
1268 sizeof(umem.reserved)))
1269 break;
1270
1271 ret = vduse_dev_dereg_umem(dev, umem.iova,
1272 umem.size);
1273 break;
1274 }
ad146355
XY
1275 case VDUSE_IOTLB_GET_INFO: {
1276 struct vduse_iova_info info;
1277 struct vhost_iotlb_map *map;
1278 struct vduse_iova_domain *domain = dev->domain;
1279
1280 ret = -EFAULT;
1281 if (copy_from_user(&info, argp, sizeof(info)))
1282 break;
1283
1284 ret = -EINVAL;
1285 if (info.start > info.last)
1286 break;
1287
1288 if (!is_mem_zero((const char *)info.reserved,
1289 sizeof(info.reserved)))
1290 break;
1291
1292 spin_lock(&domain->iotlb_lock);
1293 map = vhost_iotlb_itree_first(domain->iotlb,
1294 info.start, info.last);
1295 if (map) {
1296 info.start = map->start;
1297 info.last = map->last;
1298 info.capability = 0;
1299 if (domain->bounce_map && map->start == 0 &&
1300 map->last == domain->bounce_size - 1)
1301 info.capability |= VDUSE_IOVA_CAP_UMEM;
1302 }
1303 spin_unlock(&domain->iotlb_lock);
1304 if (!map)
1305 break;
1306
1307 ret = -EFAULT;
1308 if (copy_to_user(argp, &info, sizeof(info)))
1309 break;
1310
1311 ret = 0;
1312 break;
1313 }
c8a6153b
XY
1314 default:
1315 ret = -ENOIOCTLCMD;
1316 break;
1317 }
1318
1319 return ret;
1320}
1321
1322static int vduse_dev_release(struct inode *inode, struct file *file)
1323{
1324 struct vduse_dev *dev = file->private_data;
1325
79a463be 1326 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
c8a6153b
XY
1327 spin_lock(&dev->msg_lock);
1328 /* Make sure the inflight messages can processed after reconncection */
1329 list_splice_init(&dev->recv_list, &dev->send_list);
1330 spin_unlock(&dev->msg_lock);
1331 dev->connected = false;
1332
1333 return 0;
1334}
1335
1336static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1337{
1338 struct vduse_dev *dev;
1339
1340 mutex_lock(&vduse_lock);
1341 dev = idr_find(&vduse_idr, minor);
1342 mutex_unlock(&vduse_lock);
1343
1344 return dev;
1345}
1346
1347static int vduse_dev_open(struct inode *inode, struct file *file)
1348{
1349 int ret;
1350 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1351
1352 if (!dev)
1353 return -ENODEV;
1354
1355 ret = -EBUSY;
1356 mutex_lock(&dev->lock);
1357 if (dev->connected)
1358 goto unlock;
1359
1360 ret = 0;
1361 dev->connected = true;
1362 file->private_data = dev;
1363unlock:
1364 mutex_unlock(&dev->lock);
1365
1366 return ret;
1367}
1368
1369static const struct file_operations vduse_dev_fops = {
1370 .owner = THIS_MODULE,
1371 .open = vduse_dev_open,
1372 .release = vduse_dev_release,
1373 .read_iter = vduse_dev_read_iter,
1374 .write_iter = vduse_dev_write_iter,
1375 .poll = vduse_dev_poll,
1376 .unlocked_ioctl = vduse_dev_ioctl,
1377 .compat_ioctl = compat_ptr_ioctl,
1378 .llseek = noop_llseek,
1379};
1380
78885597
XY
1381static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1382{
1383 int i;
1384
1385 if (!dev->vqs)
1386 return;
1387
1388 for (i = 0; i < dev->vq_num; i++)
1389 kfree(dev->vqs[i]);
1390 kfree(dev->vqs);
1391}
1392
1393static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1394{
1395 int i;
1396
1397 dev->vq_align = vq_align;
1398 dev->vq_num = vq_num;
1399 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1400 if (!dev->vqs)
1401 return -ENOMEM;
1402
1403 for (i = 0; i < vq_num; i++) {
1404 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1405 if (!dev->vqs[i])
1406 goto err;
1407
1408 dev->vqs[i]->index = i;
28f6288e 1409 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
78885597
XY
1410 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1411 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1412 spin_lock_init(&dev->vqs[i]->kick_lock);
1413 spin_lock_init(&dev->vqs[i]->irq_lock);
28f6288e 1414 cpumask_setall(&dev->vqs[i]->irq_affinity);
78885597
XY
1415 }
1416
1417 return 0;
1418err:
1419 while (i--)
1420 kfree(dev->vqs[i]);
1421 kfree(dev->vqs);
1422 dev->vqs = NULL;
1423 return -ENOMEM;
1424}
1425
c8a6153b
XY
1426static struct vduse_dev *vduse_dev_create(void)
1427{
1428 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1429
1430 if (!dev)
1431 return NULL;
1432
1433 mutex_init(&dev->lock);
79a463be 1434 mutex_init(&dev->mem_lock);
c8a6153b
XY
1435 spin_lock_init(&dev->msg_lock);
1436 INIT_LIST_HEAD(&dev->send_list);
1437 INIT_LIST_HEAD(&dev->recv_list);
1438 spin_lock_init(&dev->irq_lock);
0943aacf 1439 init_rwsem(&dev->rwsem);
c8a6153b
XY
1440
1441 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1442 init_waitqueue_head(&dev->waitq);
1443
1444 return dev;
1445}
1446
1447static void vduse_dev_destroy(struct vduse_dev *dev)
1448{
1449 kfree(dev);
1450}
1451
1452static struct vduse_dev *vduse_find_dev(const char *name)
1453{
1454 struct vduse_dev *dev;
1455 int id;
1456
1457 idr_for_each_entry(&vduse_idr, dev, id)
1458 if (!strcmp(dev->name, name))
1459 return dev;
1460
1461 return NULL;
1462}
1463
1464static int vduse_destroy_dev(char *name)
1465{
1466 struct vduse_dev *dev = vduse_find_dev(name);
1467
1468 if (!dev)
1469 return -EINVAL;
1470
1471 mutex_lock(&dev->lock);
1472 if (dev->vdev || dev->connected) {
1473 mutex_unlock(&dev->lock);
1474 return -EBUSY;
1475 }
1476 dev->connected = true;
1477 mutex_unlock(&dev->lock);
1478
1479 vduse_dev_reset(dev);
1480 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1481 idr_remove(&vduse_idr, dev->minor);
1482 kvfree(dev->config);
78885597 1483 vduse_dev_deinit_vqs(dev);
c8a6153b
XY
1484 vduse_domain_destroy(dev->domain);
1485 kfree(dev->name);
1486 vduse_dev_destroy(dev);
1487 module_put(THIS_MODULE);
1488
1489 return 0;
1490}
1491
1492static bool device_is_allowed(u32 device_id)
1493{
1494 int i;
1495
1496 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1497 if (allowed_device_id[i] == device_id)
1498 return true;
1499
1500 return false;
1501}
1502
1503static bool features_is_valid(u64 features)
1504{
1505 if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1506 return false;
1507
1508 /* Now we only support read-only configuration space */
1509 if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1510 return false;
1511
1512 return true;
1513}
1514
1515static bool vduse_validate_config(struct vduse_dev_config *config)
1516{
1517 if (!is_mem_zero((const char *)config->reserved,
1518 sizeof(config->reserved)))
1519 return false;
1520
1521 if (config->vq_align > PAGE_SIZE)
1522 return false;
1523
1524 if (config->config_size > PAGE_SIZE)
1525 return false;
1526
937c783a
HM
1527 if (config->vq_num > 0xffff)
1528 return false;
1529
c8a6153b
XY
1530 if (!device_is_allowed(config->device_id))
1531 return false;
1532
1533 if (!features_is_valid(config->features))
1534 return false;
1535
1536 return true;
1537}
1538
1539static ssize_t msg_timeout_show(struct device *device,
1540 struct device_attribute *attr, char *buf)
1541{
1542 struct vduse_dev *dev = dev_get_drvdata(device);
1543
1544 return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1545}
1546
1547static ssize_t msg_timeout_store(struct device *device,
1548 struct device_attribute *attr,
1549 const char *buf, size_t count)
1550{
1551 struct vduse_dev *dev = dev_get_drvdata(device);
1552 int ret;
1553
1554 ret = kstrtouint(buf, 10, &dev->msg_timeout);
1555 if (ret < 0)
1556 return ret;
1557
1558 return count;
1559}
1560
1561static DEVICE_ATTR_RW(msg_timeout);
1562
1563static struct attribute *vduse_dev_attrs[] = {
1564 &dev_attr_msg_timeout.attr,
1565 NULL
1566};
1567
1568ATTRIBUTE_GROUPS(vduse_dev);
1569
1570static int vduse_create_dev(struct vduse_dev_config *config,
1571 void *config_buf, u64 api_version)
1572{
78885597 1573 int ret;
c8a6153b
XY
1574 struct vduse_dev *dev;
1575
1576 ret = -EEXIST;
1577 if (vduse_find_dev(config->name))
1578 goto err;
1579
1580 ret = -ENOMEM;
1581 dev = vduse_dev_create();
1582 if (!dev)
1583 goto err;
1584
1585 dev->api_version = api_version;
1586 dev->device_features = config->features;
1587 dev->device_id = config->device_id;
1588 dev->vendor_id = config->vendor_id;
1589 dev->name = kstrdup(config->name, GFP_KERNEL);
1590 if (!dev->name)
1591 goto err_str;
1592
1593 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
1594 VDUSE_BOUNCE_SIZE);
1595 if (!dev->domain)
1596 goto err_domain;
1597
1598 dev->config = config_buf;
1599 dev->config_size = config->config_size;
c8a6153b 1600
78885597
XY
1601 ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1602 if (ret)
1603 goto err_vqs;
c8a6153b
XY
1604
1605 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1606 if (ret < 0)
1607 goto err_idr;
1608
1609 dev->minor = ret;
1610 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
b27ee76c
XY
1611 dev->dev = device_create_with_groups(vduse_class, NULL,
1612 MKDEV(MAJOR(vduse_major), dev->minor),
1613 dev, vduse_dev_groups, "%s", config->name);
c8a6153b
XY
1614 if (IS_ERR(dev->dev)) {
1615 ret = PTR_ERR(dev->dev);
1616 goto err_dev;
1617 }
1618 __module_get(THIS_MODULE);
1619
1620 return 0;
1621err_dev:
1622 idr_remove(&vduse_idr, dev->minor);
1623err_idr:
78885597 1624 vduse_dev_deinit_vqs(dev);
c8a6153b
XY
1625err_vqs:
1626 vduse_domain_destroy(dev->domain);
1627err_domain:
1628 kfree(dev->name);
1629err_str:
1630 vduse_dev_destroy(dev);
1631err:
c8a6153b
XY
1632 return ret;
1633}
1634
1635static long vduse_ioctl(struct file *file, unsigned int cmd,
1636 unsigned long arg)
1637{
1638 int ret;
1639 void __user *argp = (void __user *)arg;
1640 struct vduse_control *control = file->private_data;
1641
1642 mutex_lock(&vduse_lock);
1643 switch (cmd) {
1644 case VDUSE_GET_API_VERSION:
1645 ret = put_user(control->api_version, (u64 __user *)argp);
1646 break;
1647 case VDUSE_SET_API_VERSION: {
1648 u64 api_version;
1649
1650 ret = -EFAULT;
1651 if (get_user(api_version, (u64 __user *)argp))
1652 break;
1653
1654 ret = -EINVAL;
1655 if (api_version > VDUSE_API_VERSION)
1656 break;
1657
1658 ret = 0;
1659 control->api_version = api_version;
1660 break;
1661 }
1662 case VDUSE_CREATE_DEV: {
1663 struct vduse_dev_config config;
1664 unsigned long size = offsetof(struct vduse_dev_config, config);
1665 void *buf;
1666
1667 ret = -EFAULT;
1668 if (copy_from_user(&config, argp, size))
1669 break;
1670
1671 ret = -EINVAL;
1672 if (vduse_validate_config(&config) == false)
1673 break;
1674
1675 buf = vmemdup_user(argp + size, config.config_size);
1676 if (IS_ERR(buf)) {
1677 ret = PTR_ERR(buf);
1678 break;
1679 }
1680 config.name[VDUSE_NAME_MAX - 1] = '\0';
1681 ret = vduse_create_dev(&config, buf, control->api_version);
b4d80c8d
G
1682 if (ret)
1683 kvfree(buf);
c8a6153b
XY
1684 break;
1685 }
1686 case VDUSE_DESTROY_DEV: {
1687 char name[VDUSE_NAME_MAX];
1688
1689 ret = -EFAULT;
1690 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1691 break;
1692
1693 name[VDUSE_NAME_MAX - 1] = '\0';
1694 ret = vduse_destroy_dev(name);
1695 break;
1696 }
1697 default:
1698 ret = -EINVAL;
1699 break;
1700 }
1701 mutex_unlock(&vduse_lock);
1702
1703 return ret;
1704}
1705
1706static int vduse_release(struct inode *inode, struct file *file)
1707{
1708 struct vduse_control *control = file->private_data;
1709
1710 kfree(control);
1711 return 0;
1712}
1713
1714static int vduse_open(struct inode *inode, struct file *file)
1715{
1716 struct vduse_control *control;
1717
1718 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1719 if (!control)
1720 return -ENOMEM;
1721
1722 control->api_version = VDUSE_API_VERSION;
1723 file->private_data = control;
1724
1725 return 0;
1726}
1727
1728static const struct file_operations vduse_ctrl_fops = {
1729 .owner = THIS_MODULE,
1730 .open = vduse_open,
1731 .release = vduse_release,
1732 .unlocked_ioctl = vduse_ioctl,
1733 .compat_ioctl = compat_ptr_ioctl,
1734 .llseek = noop_llseek,
1735};
1736
ff62b8e6 1737static char *vduse_devnode(const struct device *dev, umode_t *mode)
c8a6153b
XY
1738{
1739 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1740}
1741
0e0348ac
PP
1742struct vduse_mgmt_dev {
1743 struct vdpa_mgmt_dev mgmt_dev;
1744 struct device dev;
c8a6153b
XY
1745};
1746
0e0348ac 1747static struct vduse_mgmt_dev *vduse_mgmt;
c8a6153b
XY
1748
1749static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1750{
1751 struct vduse_vdpa *vdev;
1752 int ret;
1753
1754 if (dev->vdev)
1755 return -EEXIST;
1756
1757 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
db9adcbf 1758 &vduse_vdpa_config_ops, 1, 1, name, true);
c8a6153b
XY
1759 if (IS_ERR(vdev))
1760 return PTR_ERR(vdev);
1761
1762 dev->vdev = vdev;
1763 vdev->dev = dev;
1764 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1765 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1766 if (ret) {
1767 put_device(&vdev->vdpa.dev);
1768 return ret;
1769 }
1770 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1771 vdev->vdpa.dma_dev = &vdev->vdpa.dev;
0e0348ac 1772 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
c8a6153b
XY
1773
1774 return 0;
1775}
1776
d8ca2fa5
PP
1777static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1778 const struct vdpa_dev_set_config *config)
c8a6153b
XY
1779{
1780 struct vduse_dev *dev;
1781 int ret;
1782
1783 mutex_lock(&vduse_lock);
1784 dev = vduse_find_dev(name);
1785 if (!dev || !vduse_dev_is_ready(dev)) {
1786 mutex_unlock(&vduse_lock);
1787 return -EINVAL;
1788 }
1789 ret = vduse_dev_init_vdpa(dev, name);
1790 mutex_unlock(&vduse_lock);
1791 if (ret)
1792 return ret;
1793
1794 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
1795 if (ret) {
1796 put_device(&dev->vdev->vdpa.dev);
1797 return ret;
1798 }
1799
1800 return 0;
1801}
1802
1803static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
1804{
1805 _vdpa_unregister_device(dev);
1806}
1807
1808static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
1809 .dev_add = vdpa_dev_add,
1810 .dev_del = vdpa_dev_del,
1811};
1812
1813static struct virtio_device_id id_table[] = {
1814 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
1815 { 0 },
1816};
1817
0e0348ac
PP
1818static void vduse_mgmtdev_release(struct device *dev)
1819{
1820 struct vduse_mgmt_dev *mgmt_dev;
1821
1822 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
1823 kfree(mgmt_dev);
1824}
c8a6153b
XY
1825
1826static int vduse_mgmtdev_init(void)
1827{
1828 int ret;
1829
0e0348ac
PP
1830 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
1831 if (!vduse_mgmt)
1832 return -ENOMEM;
1833
1834 ret = dev_set_name(&vduse_mgmt->dev, "vduse");
1835 if (ret) {
1836 kfree(vduse_mgmt);
c8a6153b 1837 return ret;
0e0348ac 1838 }
c8a6153b 1839
0e0348ac
PP
1840 vduse_mgmt->dev.release = vduse_mgmtdev_release;
1841
1842 ret = device_register(&vduse_mgmt->dev);
c8a6153b 1843 if (ret)
0e0348ac 1844 goto dev_reg_err;
c8a6153b 1845
0e0348ac
PP
1846 vduse_mgmt->mgmt_dev.id_table = id_table;
1847 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
1848 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
1849 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
1850 if (ret)
1851 device_unregister(&vduse_mgmt->dev);
1852
1853 return ret;
1854
1855dev_reg_err:
1856 put_device(&vduse_mgmt->dev);
c8a6153b
XY
1857 return ret;
1858}
1859
1860static void vduse_mgmtdev_exit(void)
1861{
0e0348ac
PP
1862 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
1863 device_unregister(&vduse_mgmt->dev);
c8a6153b
XY
1864}
1865
1866static int vduse_init(void)
1867{
1868 int ret;
1869 struct device *dev;
1870
1871 vduse_class = class_create(THIS_MODULE, "vduse");
1872 if (IS_ERR(vduse_class))
1873 return PTR_ERR(vduse_class);
1874
1875 vduse_class->devnode = vduse_devnode;
c8a6153b
XY
1876
1877 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
1878 if (ret)
1879 goto err_chardev_region;
1880
1881 /* /dev/vduse/control */
1882 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
1883 vduse_ctrl_cdev.owner = THIS_MODULE;
1884 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
1885 if (ret)
1886 goto err_ctrl_cdev;
1887
1888 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
1889 if (IS_ERR(dev)) {
1890 ret = PTR_ERR(dev);
1891 goto err_device;
1892 }
1893
1894 /* /dev/vduse/$DEVICE */
1895 cdev_init(&vduse_cdev, &vduse_dev_fops);
1896 vduse_cdev.owner = THIS_MODULE;
1897 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
1898 VDUSE_DEV_MAX - 1);
1899 if (ret)
1900 goto err_cdev;
1901
28f6288e 1902 ret = -ENOMEM;
c8a6153b
XY
1903 vduse_irq_wq = alloc_workqueue("vduse-irq",
1904 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
28f6288e 1905 if (!vduse_irq_wq)
c8a6153b 1906 goto err_wq;
28f6288e
XY
1907
1908 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
1909 if (!vduse_irq_bound_wq)
1910 goto err_bound_wq;
c8a6153b
XY
1911
1912 ret = vduse_domain_init();
1913 if (ret)
1914 goto err_domain;
1915
1916 ret = vduse_mgmtdev_init();
1917 if (ret)
1918 goto err_mgmtdev;
1919
1920 return 0;
1921err_mgmtdev:
1922 vduse_domain_exit();
1923err_domain:
28f6288e
XY
1924 destroy_workqueue(vduse_irq_bound_wq);
1925err_bound_wq:
c8a6153b
XY
1926 destroy_workqueue(vduse_irq_wq);
1927err_wq:
1928 cdev_del(&vduse_cdev);
1929err_cdev:
1930 device_destroy(vduse_class, vduse_major);
1931err_device:
1932 cdev_del(&vduse_ctrl_cdev);
1933err_ctrl_cdev:
1934 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1935err_chardev_region:
1936 class_destroy(vduse_class);
1937 return ret;
1938}
1939module_init(vduse_init);
1940
1941static void vduse_exit(void)
1942{
1943 vduse_mgmtdev_exit();
1944 vduse_domain_exit();
28f6288e 1945 destroy_workqueue(vduse_irq_bound_wq);
c8a6153b
XY
1946 destroy_workqueue(vduse_irq_wq);
1947 cdev_del(&vduse_cdev);
1948 device_destroy(vduse_class, vduse_major);
1949 cdev_del(&vduse_ctrl_cdev);
1950 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1951 class_destroy(vduse_class);
1952}
1953module_exit(vduse_exit);
1954
1955MODULE_LICENSE(DRV_LICENSE);
1956MODULE_AUTHOR(DRV_AUTHOR);
1957MODULE_DESCRIPTION(DRV_DESC);