]>
Commit | Line | Data |
---|---|---|
c8a6153b XY |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * VDUSE: vDPA Device in Userspace | |
4 | * | |
5 | * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. | |
6 | * | |
7 | * Author: Xie Yongji <xieyongji@bytedance.com> | |
8 | * | |
9 | */ | |
10 | ||
11 | #include <linux/init.h> | |
12 | #include <linux/module.h> | |
13 | #include <linux/cdev.h> | |
14 | #include <linux/device.h> | |
15 | #include <linux/eventfd.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/wait.h> | |
18 | #include <linux/dma-map-ops.h> | |
19 | #include <linux/poll.h> | |
20 | #include <linux/file.h> | |
21 | #include <linux/uio.h> | |
22 | #include <linux/vdpa.h> | |
23 | #include <linux/nospec.h> | |
79a463be XY |
24 | #include <linux/vmalloc.h> |
25 | #include <linux/sched/mm.h> | |
c8a6153b XY |
26 | #include <uapi/linux/vduse.h> |
27 | #include <uapi/linux/vdpa.h> | |
28 | #include <uapi/linux/virtio_config.h> | |
29 | #include <uapi/linux/virtio_ids.h> | |
30 | #include <uapi/linux/virtio_blk.h> | |
31 | #include <linux/mod_devicetable.h> | |
32 | ||
33 | #include "iova_domain.h" | |
34 | ||
35 | #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" | |
36 | #define DRV_DESC "vDPA Device in Userspace" | |
37 | #define DRV_LICENSE "GPL v2" | |
38 | ||
39 | #define VDUSE_DEV_MAX (1U << MINORBITS) | |
40 | #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) | |
41 | #define VDUSE_IOVA_SIZE (128 * 1024 * 1024) | |
42 | #define VDUSE_MSG_DEFAULT_TIMEOUT 30 | |
43 | ||
28f6288e XY |
44 | #define IRQ_UNBOUND -1 |
45 | ||
c8a6153b XY |
46 | struct vduse_virtqueue { |
47 | u16 index; | |
48 | u16 num_max; | |
49 | u32 num; | |
50 | u64 desc_addr; | |
51 | u64 driver_addr; | |
52 | u64 device_addr; | |
53 | struct vdpa_vq_state state; | |
54 | bool ready; | |
55 | bool kicked; | |
56 | spinlock_t kick_lock; | |
57 | spinlock_t irq_lock; | |
58 | struct eventfd_ctx *kickfd; | |
59 | struct vdpa_callback cb; | |
60 | struct work_struct inject; | |
61 | struct work_struct kick; | |
28f6288e XY |
62 | int irq_effective_cpu; |
63 | struct cpumask irq_affinity; | |
c8a6153b XY |
64 | }; |
65 | ||
66 | struct vduse_dev; | |
67 | ||
68 | struct vduse_vdpa { | |
69 | struct vdpa_device vdpa; | |
70 | struct vduse_dev *dev; | |
71 | }; | |
72 | ||
79a463be XY |
73 | struct vduse_umem { |
74 | unsigned long iova; | |
75 | unsigned long npages; | |
76 | struct page **pages; | |
77 | struct mm_struct *mm; | |
78 | }; | |
79 | ||
c8a6153b XY |
80 | struct vduse_dev { |
81 | struct vduse_vdpa *vdev; | |
82 | struct device *dev; | |
78885597 | 83 | struct vduse_virtqueue **vqs; |
c8a6153b XY |
84 | struct vduse_iova_domain *domain; |
85 | char *name; | |
86 | struct mutex lock; | |
87 | spinlock_t msg_lock; | |
88 | u64 msg_unique; | |
89 | u32 msg_timeout; | |
90 | wait_queue_head_t waitq; | |
91 | struct list_head send_list; | |
92 | struct list_head recv_list; | |
93 | struct vdpa_callback config_cb; | |
94 | struct work_struct inject; | |
95 | spinlock_t irq_lock; | |
0943aacf | 96 | struct rw_semaphore rwsem; |
c8a6153b XY |
97 | int minor; |
98 | bool broken; | |
99 | bool connected; | |
100 | u64 api_version; | |
101 | u64 device_features; | |
102 | u64 driver_features; | |
103 | u32 device_id; | |
104 | u32 vendor_id; | |
105 | u32 generation; | |
106 | u32 config_size; | |
107 | void *config; | |
108 | u8 status; | |
109 | u32 vq_num; | |
110 | u32 vq_align; | |
79a463be XY |
111 | struct vduse_umem *umem; |
112 | struct mutex mem_lock; | |
c8a6153b XY |
113 | }; |
114 | ||
115 | struct vduse_dev_msg { | |
116 | struct vduse_dev_request req; | |
117 | struct vduse_dev_response resp; | |
118 | struct list_head list; | |
119 | wait_queue_head_t waitq; | |
120 | bool completed; | |
121 | }; | |
122 | ||
123 | struct vduse_control { | |
124 | u64 api_version; | |
125 | }; | |
126 | ||
127 | static DEFINE_MUTEX(vduse_lock); | |
128 | static DEFINE_IDR(vduse_idr); | |
129 | ||
130 | static dev_t vduse_major; | |
131 | static struct class *vduse_class; | |
132 | static struct cdev vduse_ctrl_cdev; | |
133 | static struct cdev vduse_cdev; | |
134 | static struct workqueue_struct *vduse_irq_wq; | |
28f6288e | 135 | static struct workqueue_struct *vduse_irq_bound_wq; |
c8a6153b XY |
136 | |
137 | static u32 allowed_device_id[] = { | |
138 | VIRTIO_ID_BLOCK, | |
139 | }; | |
140 | ||
141 | static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) | |
142 | { | |
143 | struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); | |
144 | ||
145 | return vdev->dev; | |
146 | } | |
147 | ||
148 | static inline struct vduse_dev *dev_to_vduse(struct device *dev) | |
149 | { | |
150 | struct vdpa_device *vdpa = dev_to_vdpa(dev); | |
151 | ||
152 | return vdpa_to_vduse(vdpa); | |
153 | } | |
154 | ||
155 | static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, | |
156 | uint32_t request_id) | |
157 | { | |
158 | struct vduse_dev_msg *msg; | |
159 | ||
160 | list_for_each_entry(msg, head, list) { | |
161 | if (msg->req.request_id == request_id) { | |
162 | list_del(&msg->list); | |
163 | return msg; | |
164 | } | |
165 | } | |
166 | ||
167 | return NULL; | |
168 | } | |
169 | ||
170 | static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) | |
171 | { | |
172 | struct vduse_dev_msg *msg = NULL; | |
173 | ||
174 | if (!list_empty(head)) { | |
175 | msg = list_first_entry(head, struct vduse_dev_msg, list); | |
176 | list_del(&msg->list); | |
177 | } | |
178 | ||
179 | return msg; | |
180 | } | |
181 | ||
182 | static void vduse_enqueue_msg(struct list_head *head, | |
183 | struct vduse_dev_msg *msg) | |
184 | { | |
185 | list_add_tail(&msg->list, head); | |
186 | } | |
187 | ||
188 | static void vduse_dev_broken(struct vduse_dev *dev) | |
189 | { | |
190 | struct vduse_dev_msg *msg, *tmp; | |
191 | ||
192 | if (unlikely(dev->broken)) | |
193 | return; | |
194 | ||
195 | list_splice_init(&dev->recv_list, &dev->send_list); | |
196 | list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { | |
197 | list_del(&msg->list); | |
198 | msg->completed = 1; | |
199 | msg->resp.result = VDUSE_REQ_RESULT_FAILED; | |
200 | wake_up(&msg->waitq); | |
201 | } | |
202 | dev->broken = true; | |
203 | wake_up(&dev->waitq); | |
204 | } | |
205 | ||
206 | static int vduse_dev_msg_sync(struct vduse_dev *dev, | |
207 | struct vduse_dev_msg *msg) | |
208 | { | |
209 | int ret; | |
210 | ||
211 | if (unlikely(dev->broken)) | |
212 | return -EIO; | |
213 | ||
214 | init_waitqueue_head(&msg->waitq); | |
215 | spin_lock(&dev->msg_lock); | |
216 | if (unlikely(dev->broken)) { | |
217 | spin_unlock(&dev->msg_lock); | |
218 | return -EIO; | |
219 | } | |
220 | msg->req.request_id = dev->msg_unique++; | |
221 | vduse_enqueue_msg(&dev->send_list, msg); | |
222 | wake_up(&dev->waitq); | |
223 | spin_unlock(&dev->msg_lock); | |
224 | if (dev->msg_timeout) | |
225 | ret = wait_event_killable_timeout(msg->waitq, msg->completed, | |
226 | (long)dev->msg_timeout * HZ); | |
227 | else | |
228 | ret = wait_event_killable(msg->waitq, msg->completed); | |
229 | ||
230 | spin_lock(&dev->msg_lock); | |
231 | if (!msg->completed) { | |
232 | list_del(&msg->list); | |
233 | msg->resp.result = VDUSE_REQ_RESULT_FAILED; | |
234 | /* Mark the device as malfunction when there is a timeout */ | |
235 | if (!ret) | |
236 | vduse_dev_broken(dev); | |
237 | } | |
238 | ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; | |
239 | spin_unlock(&dev->msg_lock); | |
240 | ||
241 | return ret; | |
242 | } | |
243 | ||
244 | static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, | |
245 | struct vduse_virtqueue *vq, | |
246 | struct vdpa_vq_state_packed *packed) | |
247 | { | |
248 | struct vduse_dev_msg msg = { 0 }; | |
249 | int ret; | |
250 | ||
251 | msg.req.type = VDUSE_GET_VQ_STATE; | |
252 | msg.req.vq_state.index = vq->index; | |
253 | ||
254 | ret = vduse_dev_msg_sync(dev, &msg); | |
255 | if (ret) | |
256 | return ret; | |
257 | ||
258 | packed->last_avail_counter = | |
259 | msg.resp.vq_state.packed.last_avail_counter & 0x0001; | |
260 | packed->last_avail_idx = | |
261 | msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; | |
262 | packed->last_used_counter = | |
263 | msg.resp.vq_state.packed.last_used_counter & 0x0001; | |
264 | packed->last_used_idx = | |
265 | msg.resp.vq_state.packed.last_used_idx & 0x7FFF; | |
266 | ||
267 | return 0; | |
268 | } | |
269 | ||
270 | static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, | |
271 | struct vduse_virtqueue *vq, | |
272 | struct vdpa_vq_state_split *split) | |
273 | { | |
274 | struct vduse_dev_msg msg = { 0 }; | |
275 | int ret; | |
276 | ||
277 | msg.req.type = VDUSE_GET_VQ_STATE; | |
278 | msg.req.vq_state.index = vq->index; | |
279 | ||
280 | ret = vduse_dev_msg_sync(dev, &msg); | |
281 | if (ret) | |
282 | return ret; | |
283 | ||
284 | split->avail_index = msg.resp.vq_state.split.avail_index; | |
285 | ||
286 | return 0; | |
287 | } | |
288 | ||
289 | static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) | |
290 | { | |
291 | struct vduse_dev_msg msg = { 0 }; | |
292 | ||
293 | msg.req.type = VDUSE_SET_STATUS; | |
294 | msg.req.s.status = status; | |
295 | ||
296 | return vduse_dev_msg_sync(dev, &msg); | |
297 | } | |
298 | ||
299 | static int vduse_dev_update_iotlb(struct vduse_dev *dev, | |
300 | u64 start, u64 last) | |
301 | { | |
302 | struct vduse_dev_msg msg = { 0 }; | |
303 | ||
304 | if (last < start) | |
305 | return -EINVAL; | |
306 | ||
307 | msg.req.type = VDUSE_UPDATE_IOTLB; | |
308 | msg.req.iova.start = start; | |
309 | msg.req.iova.last = last; | |
310 | ||
311 | return vduse_dev_msg_sync(dev, &msg); | |
312 | } | |
313 | ||
314 | static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) | |
315 | { | |
316 | struct file *file = iocb->ki_filp; | |
317 | struct vduse_dev *dev = file->private_data; | |
318 | struct vduse_dev_msg *msg; | |
319 | int size = sizeof(struct vduse_dev_request); | |
320 | ssize_t ret; | |
321 | ||
322 | if (iov_iter_count(to) < size) | |
323 | return -EINVAL; | |
324 | ||
325 | spin_lock(&dev->msg_lock); | |
326 | while (1) { | |
327 | msg = vduse_dequeue_msg(&dev->send_list); | |
328 | if (msg) | |
329 | break; | |
330 | ||
331 | ret = -EAGAIN; | |
332 | if (file->f_flags & O_NONBLOCK) | |
333 | goto unlock; | |
334 | ||
335 | spin_unlock(&dev->msg_lock); | |
336 | ret = wait_event_interruptible_exclusive(dev->waitq, | |
337 | !list_empty(&dev->send_list)); | |
338 | if (ret) | |
339 | return ret; | |
340 | ||
341 | spin_lock(&dev->msg_lock); | |
342 | } | |
343 | spin_unlock(&dev->msg_lock); | |
344 | ret = copy_to_iter(&msg->req, size, to); | |
345 | spin_lock(&dev->msg_lock); | |
346 | if (ret != size) { | |
347 | ret = -EFAULT; | |
348 | vduse_enqueue_msg(&dev->send_list, msg); | |
349 | goto unlock; | |
350 | } | |
351 | vduse_enqueue_msg(&dev->recv_list, msg); | |
352 | unlock: | |
353 | spin_unlock(&dev->msg_lock); | |
354 | ||
355 | return ret; | |
356 | } | |
357 | ||
358 | static bool is_mem_zero(const char *ptr, int size) | |
359 | { | |
360 | int i; | |
361 | ||
362 | for (i = 0; i < size; i++) { | |
363 | if (ptr[i]) | |
364 | return false; | |
365 | } | |
366 | return true; | |
367 | } | |
368 | ||
369 | static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) | |
370 | { | |
371 | struct file *file = iocb->ki_filp; | |
372 | struct vduse_dev *dev = file->private_data; | |
373 | struct vduse_dev_response resp; | |
374 | struct vduse_dev_msg *msg; | |
375 | size_t ret; | |
376 | ||
377 | ret = copy_from_iter(&resp, sizeof(resp), from); | |
378 | if (ret != sizeof(resp)) | |
379 | return -EINVAL; | |
380 | ||
381 | if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) | |
382 | return -EINVAL; | |
383 | ||
384 | spin_lock(&dev->msg_lock); | |
385 | msg = vduse_find_msg(&dev->recv_list, resp.request_id); | |
386 | if (!msg) { | |
387 | ret = -ENOENT; | |
388 | goto unlock; | |
389 | } | |
390 | ||
391 | memcpy(&msg->resp, &resp, sizeof(resp)); | |
392 | msg->completed = 1; | |
393 | wake_up(&msg->waitq); | |
394 | unlock: | |
395 | spin_unlock(&dev->msg_lock); | |
396 | ||
397 | return ret; | |
398 | } | |
399 | ||
400 | static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) | |
401 | { | |
402 | struct vduse_dev *dev = file->private_data; | |
403 | __poll_t mask = 0; | |
404 | ||
405 | poll_wait(file, &dev->waitq, wait); | |
406 | ||
407 | spin_lock(&dev->msg_lock); | |
408 | ||
409 | if (unlikely(dev->broken)) | |
410 | mask |= EPOLLERR; | |
411 | if (!list_empty(&dev->send_list)) | |
412 | mask |= EPOLLIN | EPOLLRDNORM; | |
413 | if (!list_empty(&dev->recv_list)) | |
414 | mask |= EPOLLOUT | EPOLLWRNORM; | |
415 | ||
416 | spin_unlock(&dev->msg_lock); | |
417 | ||
418 | return mask; | |
419 | } | |
420 | ||
421 | static void vduse_dev_reset(struct vduse_dev *dev) | |
422 | { | |
423 | int i; | |
424 | struct vduse_iova_domain *domain = dev->domain; | |
425 | ||
426 | /* The coherent mappings are handled in vduse_dev_free_coherent() */ | |
427 | if (domain->bounce_map) | |
428 | vduse_domain_reset_bounce_map(domain); | |
429 | ||
0943aacf XY |
430 | down_write(&dev->rwsem); |
431 | ||
c8a6153b XY |
432 | dev->status = 0; |
433 | dev->driver_features = 0; | |
434 | dev->generation++; | |
435 | spin_lock(&dev->irq_lock); | |
436 | dev->config_cb.callback = NULL; | |
437 | dev->config_cb.private = NULL; | |
438 | spin_unlock(&dev->irq_lock); | |
439 | flush_work(&dev->inject); | |
440 | ||
441 | for (i = 0; i < dev->vq_num; i++) { | |
78885597 | 442 | struct vduse_virtqueue *vq = dev->vqs[i]; |
c8a6153b XY |
443 | |
444 | vq->ready = false; | |
445 | vq->desc_addr = 0; | |
446 | vq->driver_addr = 0; | |
447 | vq->device_addr = 0; | |
448 | vq->num = 0; | |
449 | memset(&vq->state, 0, sizeof(vq->state)); | |
450 | ||
451 | spin_lock(&vq->kick_lock); | |
452 | vq->kicked = false; | |
453 | if (vq->kickfd) | |
454 | eventfd_ctx_put(vq->kickfd); | |
455 | vq->kickfd = NULL; | |
456 | spin_unlock(&vq->kick_lock); | |
457 | ||
458 | spin_lock(&vq->irq_lock); | |
459 | vq->cb.callback = NULL; | |
460 | vq->cb.private = NULL; | |
461 | spin_unlock(&vq->irq_lock); | |
462 | flush_work(&vq->inject); | |
463 | flush_work(&vq->kick); | |
464 | } | |
0943aacf XY |
465 | |
466 | up_write(&dev->rwsem); | |
c8a6153b XY |
467 | } |
468 | ||
469 | static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, | |
470 | u64 desc_area, u64 driver_area, | |
471 | u64 device_area) | |
472 | { | |
473 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
78885597 | 474 | struct vduse_virtqueue *vq = dev->vqs[idx]; |
c8a6153b XY |
475 | |
476 | vq->desc_addr = desc_area; | |
477 | vq->driver_addr = driver_area; | |
478 | vq->device_addr = device_area; | |
479 | ||
480 | return 0; | |
481 | } | |
482 | ||
483 | static void vduse_vq_kick(struct vduse_virtqueue *vq) | |
484 | { | |
485 | spin_lock(&vq->kick_lock); | |
486 | if (!vq->ready) | |
487 | goto unlock; | |
488 | ||
489 | if (vq->kickfd) | |
490 | eventfd_signal(vq->kickfd, 1); | |
491 | else | |
492 | vq->kicked = true; | |
493 | unlock: | |
494 | spin_unlock(&vq->kick_lock); | |
495 | } | |
496 | ||
497 | static void vduse_vq_kick_work(struct work_struct *work) | |
498 | { | |
499 | struct vduse_virtqueue *vq = container_of(work, | |
500 | struct vduse_virtqueue, kick); | |
501 | ||
502 | vduse_vq_kick(vq); | |
503 | } | |
504 | ||
505 | static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) | |
506 | { | |
507 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
78885597 | 508 | struct vduse_virtqueue *vq = dev->vqs[idx]; |
c8a6153b | 509 | |
78e70952 | 510 | if (!eventfd_signal_allowed()) { |
c8a6153b XY |
511 | schedule_work(&vq->kick); |
512 | return; | |
513 | } | |
514 | vduse_vq_kick(vq); | |
515 | } | |
516 | ||
517 | static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, | |
518 | struct vdpa_callback *cb) | |
519 | { | |
520 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
78885597 | 521 | struct vduse_virtqueue *vq = dev->vqs[idx]; |
c8a6153b XY |
522 | |
523 | spin_lock(&vq->irq_lock); | |
524 | vq->cb.callback = cb->callback; | |
525 | vq->cb.private = cb->private; | |
526 | spin_unlock(&vq->irq_lock); | |
527 | } | |
528 | ||
529 | static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) | |
530 | { | |
531 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
78885597 | 532 | struct vduse_virtqueue *vq = dev->vqs[idx]; |
c8a6153b XY |
533 | |
534 | vq->num = num; | |
535 | } | |
536 | ||
537 | static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, | |
538 | u16 idx, bool ready) | |
539 | { | |
540 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
78885597 | 541 | struct vduse_virtqueue *vq = dev->vqs[idx]; |
c8a6153b XY |
542 | |
543 | vq->ready = ready; | |
544 | } | |
545 | ||
546 | static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) | |
547 | { | |
548 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
78885597 | 549 | struct vduse_virtqueue *vq = dev->vqs[idx]; |
c8a6153b XY |
550 | |
551 | return vq->ready; | |
552 | } | |
553 | ||
554 | static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, | |
555 | const struct vdpa_vq_state *state) | |
556 | { | |
557 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
78885597 | 558 | struct vduse_virtqueue *vq = dev->vqs[idx]; |
c8a6153b XY |
559 | |
560 | if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { | |
561 | vq->state.packed.last_avail_counter = | |
562 | state->packed.last_avail_counter; | |
563 | vq->state.packed.last_avail_idx = state->packed.last_avail_idx; | |
564 | vq->state.packed.last_used_counter = | |
565 | state->packed.last_used_counter; | |
566 | vq->state.packed.last_used_idx = state->packed.last_used_idx; | |
567 | } else | |
568 | vq->state.split.avail_index = state->split.avail_index; | |
569 | ||
570 | return 0; | |
571 | } | |
572 | ||
573 | static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, | |
574 | struct vdpa_vq_state *state) | |
575 | { | |
576 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
78885597 | 577 | struct vduse_virtqueue *vq = dev->vqs[idx]; |
c8a6153b XY |
578 | |
579 | if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) | |
580 | return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); | |
581 | ||
582 | return vduse_dev_get_vq_state_split(dev, vq, &state->split); | |
583 | } | |
584 | ||
585 | static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) | |
586 | { | |
587 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
588 | ||
589 | return dev->vq_align; | |
590 | } | |
591 | ||
a64917bc | 592 | static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa) |
c8a6153b XY |
593 | { |
594 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
595 | ||
596 | return dev->device_features; | |
597 | } | |
598 | ||
a64917bc | 599 | static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) |
c8a6153b XY |
600 | { |
601 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
602 | ||
603 | dev->driver_features = features; | |
604 | return 0; | |
605 | } | |
606 | ||
a64917bc EC |
607 | static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa) |
608 | { | |
609 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
610 | ||
611 | return dev->driver_features; | |
612 | } | |
613 | ||
c8a6153b XY |
614 | static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, |
615 | struct vdpa_callback *cb) | |
616 | { | |
617 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
618 | ||
619 | spin_lock(&dev->irq_lock); | |
620 | dev->config_cb.callback = cb->callback; | |
621 | dev->config_cb.private = cb->private; | |
622 | spin_unlock(&dev->irq_lock); | |
623 | } | |
624 | ||
625 | static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) | |
626 | { | |
627 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
628 | u16 num_max = 0; | |
629 | int i; | |
630 | ||
631 | for (i = 0; i < dev->vq_num; i++) | |
78885597 XY |
632 | if (num_max < dev->vqs[i]->num_max) |
633 | num_max = dev->vqs[i]->num_max; | |
c8a6153b XY |
634 | |
635 | return num_max; | |
636 | } | |
637 | ||
638 | static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) | |
639 | { | |
640 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
641 | ||
642 | return dev->device_id; | |
643 | } | |
644 | ||
645 | static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) | |
646 | { | |
647 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
648 | ||
649 | return dev->vendor_id; | |
650 | } | |
651 | ||
652 | static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) | |
653 | { | |
654 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
655 | ||
656 | return dev->status; | |
657 | } | |
658 | ||
659 | static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) | |
660 | { | |
661 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
662 | ||
663 | if (vduse_dev_set_status(dev, status)) | |
664 | return; | |
665 | ||
666 | dev->status = status; | |
667 | } | |
668 | ||
669 | static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) | |
670 | { | |
671 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
672 | ||
673 | return dev->config_size; | |
674 | } | |
675 | ||
676 | static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, | |
677 | void *buf, unsigned int len) | |
678 | { | |
679 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
680 | ||
46f8a292 MC |
681 | /* Initialize the buffer in case of partial copy. */ |
682 | memset(buf, 0, len); | |
683 | ||
684 | if (offset > dev->config_size) | |
c8a6153b XY |
685 | return; |
686 | ||
46f8a292 MC |
687 | if (len > dev->config_size - offset) |
688 | len = dev->config_size - offset; | |
689 | ||
c8a6153b XY |
690 | memcpy(buf, dev->config + offset, len); |
691 | } | |
692 | ||
693 | static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, | |
694 | const void *buf, unsigned int len) | |
695 | { | |
696 | /* Now we only support read-only configuration space */ | |
697 | } | |
698 | ||
699 | static int vduse_vdpa_reset(struct vdpa_device *vdpa) | |
700 | { | |
701 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
7bb5fb20 | 702 | int ret = vduse_dev_set_status(dev, 0); |
c8a6153b XY |
703 | |
704 | vduse_dev_reset(dev); | |
705 | ||
7bb5fb20 | 706 | return ret; |
c8a6153b XY |
707 | } |
708 | ||
709 | static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) | |
710 | { | |
711 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
712 | ||
713 | return dev->generation; | |
714 | } | |
715 | ||
28f6288e XY |
716 | static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx, |
717 | const struct cpumask *cpu_mask) | |
718 | { | |
719 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
720 | ||
721 | cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask); | |
722 | return 0; | |
723 | } | |
724 | ||
c8a6153b | 725 | static int vduse_vdpa_set_map(struct vdpa_device *vdpa, |
db9adcbf | 726 | unsigned int asid, |
c8a6153b XY |
727 | struct vhost_iotlb *iotlb) |
728 | { | |
729 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
730 | int ret; | |
731 | ||
732 | ret = vduse_domain_set_map(dev->domain, iotlb); | |
733 | if (ret) | |
734 | return ret; | |
735 | ||
736 | ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); | |
737 | if (ret) { | |
738 | vduse_domain_clear_map(dev->domain, iotlb); | |
739 | return ret; | |
740 | } | |
741 | ||
742 | return 0; | |
743 | } | |
744 | ||
745 | static void vduse_vdpa_free(struct vdpa_device *vdpa) | |
746 | { | |
747 | struct vduse_dev *dev = vdpa_to_vduse(vdpa); | |
748 | ||
749 | dev->vdev = NULL; | |
750 | } | |
751 | ||
752 | static const struct vdpa_config_ops vduse_vdpa_config_ops = { | |
753 | .set_vq_address = vduse_vdpa_set_vq_address, | |
754 | .kick_vq = vduse_vdpa_kick_vq, | |
755 | .set_vq_cb = vduse_vdpa_set_vq_cb, | |
756 | .set_vq_num = vduse_vdpa_set_vq_num, | |
757 | .set_vq_ready = vduse_vdpa_set_vq_ready, | |
758 | .get_vq_ready = vduse_vdpa_get_vq_ready, | |
759 | .set_vq_state = vduse_vdpa_set_vq_state, | |
760 | .get_vq_state = vduse_vdpa_get_vq_state, | |
761 | .get_vq_align = vduse_vdpa_get_vq_align, | |
a64917bc EC |
762 | .get_device_features = vduse_vdpa_get_device_features, |
763 | .set_driver_features = vduse_vdpa_set_driver_features, | |
764 | .get_driver_features = vduse_vdpa_get_driver_features, | |
c8a6153b XY |
765 | .set_config_cb = vduse_vdpa_set_config_cb, |
766 | .get_vq_num_max = vduse_vdpa_get_vq_num_max, | |
767 | .get_device_id = vduse_vdpa_get_device_id, | |
768 | .get_vendor_id = vduse_vdpa_get_vendor_id, | |
769 | .get_status = vduse_vdpa_get_status, | |
770 | .set_status = vduse_vdpa_set_status, | |
771 | .get_config_size = vduse_vdpa_get_config_size, | |
772 | .get_config = vduse_vdpa_get_config, | |
773 | .set_config = vduse_vdpa_set_config, | |
774 | .get_generation = vduse_vdpa_get_generation, | |
28f6288e | 775 | .set_vq_affinity = vduse_vdpa_set_vq_affinity, |
c8a6153b XY |
776 | .reset = vduse_vdpa_reset, |
777 | .set_map = vduse_vdpa_set_map, | |
778 | .free = vduse_vdpa_free, | |
779 | }; | |
780 | ||
781 | static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, | |
782 | unsigned long offset, size_t size, | |
783 | enum dma_data_direction dir, | |
784 | unsigned long attrs) | |
785 | { | |
786 | struct vduse_dev *vdev = dev_to_vduse(dev); | |
787 | struct vduse_iova_domain *domain = vdev->domain; | |
788 | ||
789 | return vduse_domain_map_page(domain, page, offset, size, dir, attrs); | |
790 | } | |
791 | ||
792 | static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, | |
793 | size_t size, enum dma_data_direction dir, | |
794 | unsigned long attrs) | |
795 | { | |
796 | struct vduse_dev *vdev = dev_to_vduse(dev); | |
797 | struct vduse_iova_domain *domain = vdev->domain; | |
798 | ||
799 | return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); | |
800 | } | |
801 | ||
802 | static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, | |
803 | dma_addr_t *dma_addr, gfp_t flag, | |
804 | unsigned long attrs) | |
805 | { | |
806 | struct vduse_dev *vdev = dev_to_vduse(dev); | |
807 | struct vduse_iova_domain *domain = vdev->domain; | |
808 | unsigned long iova; | |
809 | void *addr; | |
810 | ||
811 | *dma_addr = DMA_MAPPING_ERROR; | |
812 | addr = vduse_domain_alloc_coherent(domain, size, | |
813 | (dma_addr_t *)&iova, flag, attrs); | |
814 | if (!addr) | |
815 | return NULL; | |
816 | ||
817 | *dma_addr = (dma_addr_t)iova; | |
818 | ||
819 | return addr; | |
820 | } | |
821 | ||
822 | static void vduse_dev_free_coherent(struct device *dev, size_t size, | |
823 | void *vaddr, dma_addr_t dma_addr, | |
824 | unsigned long attrs) | |
825 | { | |
826 | struct vduse_dev *vdev = dev_to_vduse(dev); | |
827 | struct vduse_iova_domain *domain = vdev->domain; | |
828 | ||
829 | vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); | |
830 | } | |
831 | ||
832 | static size_t vduse_dev_max_mapping_size(struct device *dev) | |
833 | { | |
834 | struct vduse_dev *vdev = dev_to_vduse(dev); | |
835 | struct vduse_iova_domain *domain = vdev->domain; | |
836 | ||
837 | return domain->bounce_size; | |
838 | } | |
839 | ||
840 | static const struct dma_map_ops vduse_dev_dma_ops = { | |
841 | .map_page = vduse_dev_map_page, | |
842 | .unmap_page = vduse_dev_unmap_page, | |
843 | .alloc = vduse_dev_alloc_coherent, | |
844 | .free = vduse_dev_free_coherent, | |
845 | .max_mapping_size = vduse_dev_max_mapping_size, | |
846 | }; | |
847 | ||
848 | static unsigned int perm_to_file_flags(u8 perm) | |
849 | { | |
850 | unsigned int flags = 0; | |
851 | ||
852 | switch (perm) { | |
853 | case VDUSE_ACCESS_WO: | |
854 | flags |= O_WRONLY; | |
855 | break; | |
856 | case VDUSE_ACCESS_RO: | |
857 | flags |= O_RDONLY; | |
858 | break; | |
859 | case VDUSE_ACCESS_RW: | |
860 | flags |= O_RDWR; | |
861 | break; | |
862 | default: | |
863 | WARN(1, "invalidate vhost IOTLB permission\n"); | |
864 | break; | |
865 | } | |
866 | ||
867 | return flags; | |
868 | } | |
869 | ||
870 | static int vduse_kickfd_setup(struct vduse_dev *dev, | |
871 | struct vduse_vq_eventfd *eventfd) | |
872 | { | |
873 | struct eventfd_ctx *ctx = NULL; | |
874 | struct vduse_virtqueue *vq; | |
875 | u32 index; | |
876 | ||
877 | if (eventfd->index >= dev->vq_num) | |
878 | return -EINVAL; | |
879 | ||
880 | index = array_index_nospec(eventfd->index, dev->vq_num); | |
78885597 | 881 | vq = dev->vqs[index]; |
c8a6153b XY |
882 | if (eventfd->fd >= 0) { |
883 | ctx = eventfd_ctx_fdget(eventfd->fd); | |
884 | if (IS_ERR(ctx)) | |
885 | return PTR_ERR(ctx); | |
886 | } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) | |
887 | return 0; | |
888 | ||
889 | spin_lock(&vq->kick_lock); | |
890 | if (vq->kickfd) | |
891 | eventfd_ctx_put(vq->kickfd); | |
892 | vq->kickfd = ctx; | |
893 | if (vq->ready && vq->kicked && vq->kickfd) { | |
894 | eventfd_signal(vq->kickfd, 1); | |
895 | vq->kicked = false; | |
896 | } | |
897 | spin_unlock(&vq->kick_lock); | |
898 | ||
899 | return 0; | |
900 | } | |
901 | ||
902 | static bool vduse_dev_is_ready(struct vduse_dev *dev) | |
903 | { | |
904 | int i; | |
905 | ||
906 | for (i = 0; i < dev->vq_num; i++) | |
78885597 | 907 | if (!dev->vqs[i]->num_max) |
c8a6153b XY |
908 | return false; |
909 | ||
910 | return true; | |
911 | } | |
912 | ||
913 | static void vduse_dev_irq_inject(struct work_struct *work) | |
914 | { | |
915 | struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); | |
916 | ||
917 | spin_lock_irq(&dev->irq_lock); | |
918 | if (dev->config_cb.callback) | |
919 | dev->config_cb.callback(dev->config_cb.private); | |
920 | spin_unlock_irq(&dev->irq_lock); | |
921 | } | |
922 | ||
923 | static void vduse_vq_irq_inject(struct work_struct *work) | |
924 | { | |
925 | struct vduse_virtqueue *vq = container_of(work, | |
926 | struct vduse_virtqueue, inject); | |
927 | ||
928 | spin_lock_irq(&vq->irq_lock); | |
929 | if (vq->ready && vq->cb.callback) | |
930 | vq->cb.callback(vq->cb.private); | |
931 | spin_unlock_irq(&vq->irq_lock); | |
932 | } | |
933 | ||
0943aacf | 934 | static int vduse_dev_queue_irq_work(struct vduse_dev *dev, |
28f6288e XY |
935 | struct work_struct *irq_work, |
936 | int irq_effective_cpu) | |
0943aacf XY |
937 | { |
938 | int ret = -EINVAL; | |
939 | ||
940 | down_read(&dev->rwsem); | |
941 | if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) | |
942 | goto unlock; | |
943 | ||
944 | ret = 0; | |
28f6288e XY |
945 | if (irq_effective_cpu == IRQ_UNBOUND) |
946 | queue_work(vduse_irq_wq, irq_work); | |
947 | else | |
948 | queue_work_on(irq_effective_cpu, | |
949 | vduse_irq_bound_wq, irq_work); | |
0943aacf XY |
950 | unlock: |
951 | up_read(&dev->rwsem); | |
952 | ||
953 | return ret; | |
954 | } | |
955 | ||
79a463be XY |
956 | static int vduse_dev_dereg_umem(struct vduse_dev *dev, |
957 | u64 iova, u64 size) | |
958 | { | |
959 | int ret; | |
960 | ||
961 | mutex_lock(&dev->mem_lock); | |
962 | ret = -ENOENT; | |
963 | if (!dev->umem) | |
964 | goto unlock; | |
965 | ||
966 | ret = -EINVAL; | |
967 | if (dev->umem->iova != iova || size != dev->domain->bounce_size) | |
968 | goto unlock; | |
969 | ||
970 | vduse_domain_remove_user_bounce_pages(dev->domain); | |
971 | unpin_user_pages_dirty_lock(dev->umem->pages, | |
972 | dev->umem->npages, true); | |
973 | atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); | |
974 | mmdrop(dev->umem->mm); | |
975 | vfree(dev->umem->pages); | |
976 | kfree(dev->umem); | |
977 | dev->umem = NULL; | |
978 | ret = 0; | |
979 | unlock: | |
980 | mutex_unlock(&dev->mem_lock); | |
981 | return ret; | |
982 | } | |
983 | ||
984 | static int vduse_dev_reg_umem(struct vduse_dev *dev, | |
985 | u64 iova, u64 uaddr, u64 size) | |
986 | { | |
987 | struct page **page_list = NULL; | |
988 | struct vduse_umem *umem = NULL; | |
989 | long pinned = 0; | |
990 | unsigned long npages, lock_limit; | |
991 | int ret; | |
992 | ||
993 | if (!dev->domain->bounce_map || | |
994 | size != dev->domain->bounce_size || | |
995 | iova != 0 || uaddr & ~PAGE_MASK) | |
996 | return -EINVAL; | |
997 | ||
998 | mutex_lock(&dev->mem_lock); | |
999 | ret = -EEXIST; | |
1000 | if (dev->umem) | |
1001 | goto unlock; | |
1002 | ||
1003 | ret = -ENOMEM; | |
1004 | npages = size >> PAGE_SHIFT; | |
1005 | page_list = __vmalloc(array_size(npages, sizeof(struct page *)), | |
1006 | GFP_KERNEL_ACCOUNT); | |
1007 | umem = kzalloc(sizeof(*umem), GFP_KERNEL); | |
1008 | if (!page_list || !umem) | |
1009 | goto unlock; | |
1010 | ||
1011 | mmap_read_lock(current->mm); | |
1012 | ||
1013 | lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); | |
1014 | if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) | |
1015 | goto out; | |
1016 | ||
1017 | pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, | |
1018 | page_list, NULL); | |
1019 | if (pinned != npages) { | |
1020 | ret = pinned < 0 ? pinned : -ENOMEM; | |
1021 | goto out; | |
1022 | } | |
1023 | ||
1024 | ret = vduse_domain_add_user_bounce_pages(dev->domain, | |
1025 | page_list, pinned); | |
1026 | if (ret) | |
1027 | goto out; | |
1028 | ||
1029 | atomic64_add(npages, ¤t->mm->pinned_vm); | |
1030 | ||
1031 | umem->pages = page_list; | |
1032 | umem->npages = pinned; | |
1033 | umem->iova = iova; | |
1034 | umem->mm = current->mm; | |
1035 | mmgrab(current->mm); | |
1036 | ||
1037 | dev->umem = umem; | |
1038 | out: | |
1039 | if (ret && pinned > 0) | |
1040 | unpin_user_pages(page_list, pinned); | |
1041 | ||
1042 | mmap_read_unlock(current->mm); | |
1043 | unlock: | |
1044 | if (ret) { | |
1045 | vfree(page_list); | |
1046 | kfree(umem); | |
1047 | } | |
1048 | mutex_unlock(&dev->mem_lock); | |
1049 | return ret; | |
1050 | } | |
1051 | ||
28f6288e XY |
1052 | static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq) |
1053 | { | |
1054 | int curr_cpu = vq->irq_effective_cpu; | |
1055 | ||
1056 | while (true) { | |
1057 | curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity); | |
1058 | if (cpu_online(curr_cpu)) | |
1059 | break; | |
1060 | ||
1061 | if (curr_cpu >= nr_cpu_ids) | |
1062 | curr_cpu = IRQ_UNBOUND; | |
1063 | } | |
1064 | ||
1065 | vq->irq_effective_cpu = curr_cpu; | |
1066 | } | |
1067 | ||
c8a6153b XY |
1068 | static long vduse_dev_ioctl(struct file *file, unsigned int cmd, |
1069 | unsigned long arg) | |
1070 | { | |
1071 | struct vduse_dev *dev = file->private_data; | |
1072 | void __user *argp = (void __user *)arg; | |
1073 | int ret; | |
1074 | ||
1075 | if (unlikely(dev->broken)) | |
1076 | return -EPERM; | |
1077 | ||
1078 | switch (cmd) { | |
1079 | case VDUSE_IOTLB_GET_FD: { | |
1080 | struct vduse_iotlb_entry entry; | |
1081 | struct vhost_iotlb_map *map; | |
1082 | struct vdpa_map_file *map_file; | |
1083 | struct vduse_iova_domain *domain = dev->domain; | |
1084 | struct file *f = NULL; | |
1085 | ||
1086 | ret = -EFAULT; | |
1087 | if (copy_from_user(&entry, argp, sizeof(entry))) | |
1088 | break; | |
1089 | ||
1090 | ret = -EINVAL; | |
1091 | if (entry.start > entry.last) | |
1092 | break; | |
1093 | ||
1094 | spin_lock(&domain->iotlb_lock); | |
1095 | map = vhost_iotlb_itree_first(domain->iotlb, | |
1096 | entry.start, entry.last); | |
1097 | if (map) { | |
1098 | map_file = (struct vdpa_map_file *)map->opaque; | |
1099 | f = get_file(map_file->file); | |
1100 | entry.offset = map_file->offset; | |
1101 | entry.start = map->start; | |
1102 | entry.last = map->last; | |
1103 | entry.perm = map->perm; | |
1104 | } | |
1105 | spin_unlock(&domain->iotlb_lock); | |
1106 | ret = -EINVAL; | |
1107 | if (!f) | |
1108 | break; | |
1109 | ||
1110 | ret = -EFAULT; | |
1111 | if (copy_to_user(argp, &entry, sizeof(entry))) { | |
1112 | fput(f); | |
1113 | break; | |
1114 | } | |
1115 | ret = receive_fd(f, perm_to_file_flags(entry.perm)); | |
1116 | fput(f); | |
1117 | break; | |
1118 | } | |
1119 | case VDUSE_DEV_GET_FEATURES: | |
1120 | /* | |
1121 | * Just mirror what driver wrote here. | |
1122 | * The driver is expected to check FEATURE_OK later. | |
1123 | */ | |
1124 | ret = put_user(dev->driver_features, (u64 __user *)argp); | |
1125 | break; | |
1126 | case VDUSE_DEV_SET_CONFIG: { | |
1127 | struct vduse_config_data config; | |
1128 | unsigned long size = offsetof(struct vduse_config_data, | |
1129 | buffer); | |
1130 | ||
1131 | ret = -EFAULT; | |
1132 | if (copy_from_user(&config, argp, size)) | |
1133 | break; | |
1134 | ||
1135 | ret = -EINVAL; | |
ff9f9c6e DC |
1136 | if (config.offset > dev->config_size || |
1137 | config.length == 0 || | |
c8a6153b XY |
1138 | config.length > dev->config_size - config.offset) |
1139 | break; | |
1140 | ||
1141 | ret = -EFAULT; | |
1142 | if (copy_from_user(dev->config + config.offset, argp + size, | |
1143 | config.length)) | |
1144 | break; | |
1145 | ||
1146 | ret = 0; | |
1147 | break; | |
1148 | } | |
1149 | case VDUSE_DEV_INJECT_CONFIG_IRQ: | |
28f6288e | 1150 | ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND); |
c8a6153b XY |
1151 | break; |
1152 | case VDUSE_VQ_SETUP: { | |
1153 | struct vduse_vq_config config; | |
1154 | u32 index; | |
1155 | ||
1156 | ret = -EFAULT; | |
1157 | if (copy_from_user(&config, argp, sizeof(config))) | |
1158 | break; | |
1159 | ||
1160 | ret = -EINVAL; | |
1161 | if (config.index >= dev->vq_num) | |
1162 | break; | |
1163 | ||
1164 | if (!is_mem_zero((const char *)config.reserved, | |
1165 | sizeof(config.reserved))) | |
1166 | break; | |
1167 | ||
1168 | index = array_index_nospec(config.index, dev->vq_num); | |
78885597 | 1169 | dev->vqs[index]->num_max = config.max_size; |
c8a6153b XY |
1170 | ret = 0; |
1171 | break; | |
1172 | } | |
1173 | case VDUSE_VQ_GET_INFO: { | |
1174 | struct vduse_vq_info vq_info; | |
1175 | struct vduse_virtqueue *vq; | |
1176 | u32 index; | |
1177 | ||
1178 | ret = -EFAULT; | |
1179 | if (copy_from_user(&vq_info, argp, sizeof(vq_info))) | |
1180 | break; | |
1181 | ||
1182 | ret = -EINVAL; | |
1183 | if (vq_info.index >= dev->vq_num) | |
1184 | break; | |
1185 | ||
1186 | index = array_index_nospec(vq_info.index, dev->vq_num); | |
78885597 | 1187 | vq = dev->vqs[index]; |
c8a6153b XY |
1188 | vq_info.desc_addr = vq->desc_addr; |
1189 | vq_info.driver_addr = vq->driver_addr; | |
1190 | vq_info.device_addr = vq->device_addr; | |
1191 | vq_info.num = vq->num; | |
1192 | ||
1193 | if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { | |
1194 | vq_info.packed.last_avail_counter = | |
1195 | vq->state.packed.last_avail_counter; | |
1196 | vq_info.packed.last_avail_idx = | |
1197 | vq->state.packed.last_avail_idx; | |
1198 | vq_info.packed.last_used_counter = | |
1199 | vq->state.packed.last_used_counter; | |
1200 | vq_info.packed.last_used_idx = | |
1201 | vq->state.packed.last_used_idx; | |
1202 | } else | |
1203 | vq_info.split.avail_index = | |
1204 | vq->state.split.avail_index; | |
1205 | ||
1206 | vq_info.ready = vq->ready; | |
1207 | ||
1208 | ret = -EFAULT; | |
1209 | if (copy_to_user(argp, &vq_info, sizeof(vq_info))) | |
1210 | break; | |
1211 | ||
1212 | ret = 0; | |
1213 | break; | |
1214 | } | |
1215 | case VDUSE_VQ_SETUP_KICKFD: { | |
1216 | struct vduse_vq_eventfd eventfd; | |
1217 | ||
1218 | ret = -EFAULT; | |
1219 | if (copy_from_user(&eventfd, argp, sizeof(eventfd))) | |
1220 | break; | |
1221 | ||
1222 | ret = vduse_kickfd_setup(dev, &eventfd); | |
1223 | break; | |
1224 | } | |
1225 | case VDUSE_VQ_INJECT_IRQ: { | |
1226 | u32 index; | |
1227 | ||
1228 | ret = -EFAULT; | |
1229 | if (get_user(index, (u32 __user *)argp)) | |
1230 | break; | |
1231 | ||
1232 | ret = -EINVAL; | |
1233 | if (index >= dev->vq_num) | |
1234 | break; | |
1235 | ||
c8a6153b | 1236 | index = array_index_nospec(index, dev->vq_num); |
28f6288e XY |
1237 | |
1238 | vduse_vq_update_effective_cpu(dev->vqs[index]); | |
1239 | ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject, | |
1240 | dev->vqs[index]->irq_effective_cpu); | |
c8a6153b XY |
1241 | break; |
1242 | } | |
79a463be XY |
1243 | case VDUSE_IOTLB_REG_UMEM: { |
1244 | struct vduse_iova_umem umem; | |
1245 | ||
1246 | ret = -EFAULT; | |
1247 | if (copy_from_user(&umem, argp, sizeof(umem))) | |
1248 | break; | |
1249 | ||
1250 | ret = -EINVAL; | |
1251 | if (!is_mem_zero((const char *)umem.reserved, | |
1252 | sizeof(umem.reserved))) | |
1253 | break; | |
1254 | ||
1255 | ret = vduse_dev_reg_umem(dev, umem.iova, | |
1256 | umem.uaddr, umem.size); | |
1257 | break; | |
1258 | } | |
1259 | case VDUSE_IOTLB_DEREG_UMEM: { | |
1260 | struct vduse_iova_umem umem; | |
1261 | ||
1262 | ret = -EFAULT; | |
1263 | if (copy_from_user(&umem, argp, sizeof(umem))) | |
1264 | break; | |
1265 | ||
1266 | ret = -EINVAL; | |
1267 | if (!is_mem_zero((const char *)umem.reserved, | |
1268 | sizeof(umem.reserved))) | |
1269 | break; | |
1270 | ||
1271 | ret = vduse_dev_dereg_umem(dev, umem.iova, | |
1272 | umem.size); | |
1273 | break; | |
1274 | } | |
ad146355 XY |
1275 | case VDUSE_IOTLB_GET_INFO: { |
1276 | struct vduse_iova_info info; | |
1277 | struct vhost_iotlb_map *map; | |
1278 | struct vduse_iova_domain *domain = dev->domain; | |
1279 | ||
1280 | ret = -EFAULT; | |
1281 | if (copy_from_user(&info, argp, sizeof(info))) | |
1282 | break; | |
1283 | ||
1284 | ret = -EINVAL; | |
1285 | if (info.start > info.last) | |
1286 | break; | |
1287 | ||
1288 | if (!is_mem_zero((const char *)info.reserved, | |
1289 | sizeof(info.reserved))) | |
1290 | break; | |
1291 | ||
1292 | spin_lock(&domain->iotlb_lock); | |
1293 | map = vhost_iotlb_itree_first(domain->iotlb, | |
1294 | info.start, info.last); | |
1295 | if (map) { | |
1296 | info.start = map->start; | |
1297 | info.last = map->last; | |
1298 | info.capability = 0; | |
1299 | if (domain->bounce_map && map->start == 0 && | |
1300 | map->last == domain->bounce_size - 1) | |
1301 | info.capability |= VDUSE_IOVA_CAP_UMEM; | |
1302 | } | |
1303 | spin_unlock(&domain->iotlb_lock); | |
1304 | if (!map) | |
1305 | break; | |
1306 | ||
1307 | ret = -EFAULT; | |
1308 | if (copy_to_user(argp, &info, sizeof(info))) | |
1309 | break; | |
1310 | ||
1311 | ret = 0; | |
1312 | break; | |
1313 | } | |
c8a6153b XY |
1314 | default: |
1315 | ret = -ENOIOCTLCMD; | |
1316 | break; | |
1317 | } | |
1318 | ||
1319 | return ret; | |
1320 | } | |
1321 | ||
1322 | static int vduse_dev_release(struct inode *inode, struct file *file) | |
1323 | { | |
1324 | struct vduse_dev *dev = file->private_data; | |
1325 | ||
79a463be | 1326 | vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); |
c8a6153b XY |
1327 | spin_lock(&dev->msg_lock); |
1328 | /* Make sure the inflight messages can processed after reconncection */ | |
1329 | list_splice_init(&dev->recv_list, &dev->send_list); | |
1330 | spin_unlock(&dev->msg_lock); | |
1331 | dev->connected = false; | |
1332 | ||
1333 | return 0; | |
1334 | } | |
1335 | ||
1336 | static struct vduse_dev *vduse_dev_get_from_minor(int minor) | |
1337 | { | |
1338 | struct vduse_dev *dev; | |
1339 | ||
1340 | mutex_lock(&vduse_lock); | |
1341 | dev = idr_find(&vduse_idr, minor); | |
1342 | mutex_unlock(&vduse_lock); | |
1343 | ||
1344 | return dev; | |
1345 | } | |
1346 | ||
1347 | static int vduse_dev_open(struct inode *inode, struct file *file) | |
1348 | { | |
1349 | int ret; | |
1350 | struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); | |
1351 | ||
1352 | if (!dev) | |
1353 | return -ENODEV; | |
1354 | ||
1355 | ret = -EBUSY; | |
1356 | mutex_lock(&dev->lock); | |
1357 | if (dev->connected) | |
1358 | goto unlock; | |
1359 | ||
1360 | ret = 0; | |
1361 | dev->connected = true; | |
1362 | file->private_data = dev; | |
1363 | unlock: | |
1364 | mutex_unlock(&dev->lock); | |
1365 | ||
1366 | return ret; | |
1367 | } | |
1368 | ||
1369 | static const struct file_operations vduse_dev_fops = { | |
1370 | .owner = THIS_MODULE, | |
1371 | .open = vduse_dev_open, | |
1372 | .release = vduse_dev_release, | |
1373 | .read_iter = vduse_dev_read_iter, | |
1374 | .write_iter = vduse_dev_write_iter, | |
1375 | .poll = vduse_dev_poll, | |
1376 | .unlocked_ioctl = vduse_dev_ioctl, | |
1377 | .compat_ioctl = compat_ptr_ioctl, | |
1378 | .llseek = noop_llseek, | |
1379 | }; | |
1380 | ||
78885597 XY |
1381 | static void vduse_dev_deinit_vqs(struct vduse_dev *dev) |
1382 | { | |
1383 | int i; | |
1384 | ||
1385 | if (!dev->vqs) | |
1386 | return; | |
1387 | ||
1388 | for (i = 0; i < dev->vq_num; i++) | |
1389 | kfree(dev->vqs[i]); | |
1390 | kfree(dev->vqs); | |
1391 | } | |
1392 | ||
1393 | static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num) | |
1394 | { | |
1395 | int i; | |
1396 | ||
1397 | dev->vq_align = vq_align; | |
1398 | dev->vq_num = vq_num; | |
1399 | dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); | |
1400 | if (!dev->vqs) | |
1401 | return -ENOMEM; | |
1402 | ||
1403 | for (i = 0; i < vq_num; i++) { | |
1404 | dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL); | |
1405 | if (!dev->vqs[i]) | |
1406 | goto err; | |
1407 | ||
1408 | dev->vqs[i]->index = i; | |
28f6288e | 1409 | dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND; |
78885597 XY |
1410 | INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject); |
1411 | INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work); | |
1412 | spin_lock_init(&dev->vqs[i]->kick_lock); | |
1413 | spin_lock_init(&dev->vqs[i]->irq_lock); | |
28f6288e | 1414 | cpumask_setall(&dev->vqs[i]->irq_affinity); |
78885597 XY |
1415 | } |
1416 | ||
1417 | return 0; | |
1418 | err: | |
1419 | while (i--) | |
1420 | kfree(dev->vqs[i]); | |
1421 | kfree(dev->vqs); | |
1422 | dev->vqs = NULL; | |
1423 | return -ENOMEM; | |
1424 | } | |
1425 | ||
c8a6153b XY |
1426 | static struct vduse_dev *vduse_dev_create(void) |
1427 | { | |
1428 | struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); | |
1429 | ||
1430 | if (!dev) | |
1431 | return NULL; | |
1432 | ||
1433 | mutex_init(&dev->lock); | |
79a463be | 1434 | mutex_init(&dev->mem_lock); |
c8a6153b XY |
1435 | spin_lock_init(&dev->msg_lock); |
1436 | INIT_LIST_HEAD(&dev->send_list); | |
1437 | INIT_LIST_HEAD(&dev->recv_list); | |
1438 | spin_lock_init(&dev->irq_lock); | |
0943aacf | 1439 | init_rwsem(&dev->rwsem); |
c8a6153b XY |
1440 | |
1441 | INIT_WORK(&dev->inject, vduse_dev_irq_inject); | |
1442 | init_waitqueue_head(&dev->waitq); | |
1443 | ||
1444 | return dev; | |
1445 | } | |
1446 | ||
1447 | static void vduse_dev_destroy(struct vduse_dev *dev) | |
1448 | { | |
1449 | kfree(dev); | |
1450 | } | |
1451 | ||
1452 | static struct vduse_dev *vduse_find_dev(const char *name) | |
1453 | { | |
1454 | struct vduse_dev *dev; | |
1455 | int id; | |
1456 | ||
1457 | idr_for_each_entry(&vduse_idr, dev, id) | |
1458 | if (!strcmp(dev->name, name)) | |
1459 | return dev; | |
1460 | ||
1461 | return NULL; | |
1462 | } | |
1463 | ||
1464 | static int vduse_destroy_dev(char *name) | |
1465 | { | |
1466 | struct vduse_dev *dev = vduse_find_dev(name); | |
1467 | ||
1468 | if (!dev) | |
1469 | return -EINVAL; | |
1470 | ||
1471 | mutex_lock(&dev->lock); | |
1472 | if (dev->vdev || dev->connected) { | |
1473 | mutex_unlock(&dev->lock); | |
1474 | return -EBUSY; | |
1475 | } | |
1476 | dev->connected = true; | |
1477 | mutex_unlock(&dev->lock); | |
1478 | ||
1479 | vduse_dev_reset(dev); | |
1480 | device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); | |
1481 | idr_remove(&vduse_idr, dev->minor); | |
1482 | kvfree(dev->config); | |
78885597 | 1483 | vduse_dev_deinit_vqs(dev); |
c8a6153b XY |
1484 | vduse_domain_destroy(dev->domain); |
1485 | kfree(dev->name); | |
1486 | vduse_dev_destroy(dev); | |
1487 | module_put(THIS_MODULE); | |
1488 | ||
1489 | return 0; | |
1490 | } | |
1491 | ||
1492 | static bool device_is_allowed(u32 device_id) | |
1493 | { | |
1494 | int i; | |
1495 | ||
1496 | for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) | |
1497 | if (allowed_device_id[i] == device_id) | |
1498 | return true; | |
1499 | ||
1500 | return false; | |
1501 | } | |
1502 | ||
1503 | static bool features_is_valid(u64 features) | |
1504 | { | |
1505 | if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) | |
1506 | return false; | |
1507 | ||
1508 | /* Now we only support read-only configuration space */ | |
1509 | if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE)) | |
1510 | return false; | |
1511 | ||
1512 | return true; | |
1513 | } | |
1514 | ||
1515 | static bool vduse_validate_config(struct vduse_dev_config *config) | |
1516 | { | |
1517 | if (!is_mem_zero((const char *)config->reserved, | |
1518 | sizeof(config->reserved))) | |
1519 | return false; | |
1520 | ||
1521 | if (config->vq_align > PAGE_SIZE) | |
1522 | return false; | |
1523 | ||
1524 | if (config->config_size > PAGE_SIZE) | |
1525 | return false; | |
1526 | ||
937c783a HM |
1527 | if (config->vq_num > 0xffff) |
1528 | return false; | |
1529 | ||
c8a6153b XY |
1530 | if (!device_is_allowed(config->device_id)) |
1531 | return false; | |
1532 | ||
1533 | if (!features_is_valid(config->features)) | |
1534 | return false; | |
1535 | ||
1536 | return true; | |
1537 | } | |
1538 | ||
1539 | static ssize_t msg_timeout_show(struct device *device, | |
1540 | struct device_attribute *attr, char *buf) | |
1541 | { | |
1542 | struct vduse_dev *dev = dev_get_drvdata(device); | |
1543 | ||
1544 | return sysfs_emit(buf, "%u\n", dev->msg_timeout); | |
1545 | } | |
1546 | ||
1547 | static ssize_t msg_timeout_store(struct device *device, | |
1548 | struct device_attribute *attr, | |
1549 | const char *buf, size_t count) | |
1550 | { | |
1551 | struct vduse_dev *dev = dev_get_drvdata(device); | |
1552 | int ret; | |
1553 | ||
1554 | ret = kstrtouint(buf, 10, &dev->msg_timeout); | |
1555 | if (ret < 0) | |
1556 | return ret; | |
1557 | ||
1558 | return count; | |
1559 | } | |
1560 | ||
1561 | static DEVICE_ATTR_RW(msg_timeout); | |
1562 | ||
1563 | static struct attribute *vduse_dev_attrs[] = { | |
1564 | &dev_attr_msg_timeout.attr, | |
1565 | NULL | |
1566 | }; | |
1567 | ||
1568 | ATTRIBUTE_GROUPS(vduse_dev); | |
1569 | ||
1570 | static int vduse_create_dev(struct vduse_dev_config *config, | |
1571 | void *config_buf, u64 api_version) | |
1572 | { | |
78885597 | 1573 | int ret; |
c8a6153b XY |
1574 | struct vduse_dev *dev; |
1575 | ||
1576 | ret = -EEXIST; | |
1577 | if (vduse_find_dev(config->name)) | |
1578 | goto err; | |
1579 | ||
1580 | ret = -ENOMEM; | |
1581 | dev = vduse_dev_create(); | |
1582 | if (!dev) | |
1583 | goto err; | |
1584 | ||
1585 | dev->api_version = api_version; | |
1586 | dev->device_features = config->features; | |
1587 | dev->device_id = config->device_id; | |
1588 | dev->vendor_id = config->vendor_id; | |
1589 | dev->name = kstrdup(config->name, GFP_KERNEL); | |
1590 | if (!dev->name) | |
1591 | goto err_str; | |
1592 | ||
1593 | dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, | |
1594 | VDUSE_BOUNCE_SIZE); | |
1595 | if (!dev->domain) | |
1596 | goto err_domain; | |
1597 | ||
1598 | dev->config = config_buf; | |
1599 | dev->config_size = config->config_size; | |
c8a6153b | 1600 | |
78885597 XY |
1601 | ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num); |
1602 | if (ret) | |
1603 | goto err_vqs; | |
c8a6153b XY |
1604 | |
1605 | ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); | |
1606 | if (ret < 0) | |
1607 | goto err_idr; | |
1608 | ||
1609 | dev->minor = ret; | |
1610 | dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; | |
b27ee76c XY |
1611 | dev->dev = device_create_with_groups(vduse_class, NULL, |
1612 | MKDEV(MAJOR(vduse_major), dev->minor), | |
1613 | dev, vduse_dev_groups, "%s", config->name); | |
c8a6153b XY |
1614 | if (IS_ERR(dev->dev)) { |
1615 | ret = PTR_ERR(dev->dev); | |
1616 | goto err_dev; | |
1617 | } | |
1618 | __module_get(THIS_MODULE); | |
1619 | ||
1620 | return 0; | |
1621 | err_dev: | |
1622 | idr_remove(&vduse_idr, dev->minor); | |
1623 | err_idr: | |
78885597 | 1624 | vduse_dev_deinit_vqs(dev); |
c8a6153b XY |
1625 | err_vqs: |
1626 | vduse_domain_destroy(dev->domain); | |
1627 | err_domain: | |
1628 | kfree(dev->name); | |
1629 | err_str: | |
1630 | vduse_dev_destroy(dev); | |
1631 | err: | |
c8a6153b XY |
1632 | return ret; |
1633 | } | |
1634 | ||
1635 | static long vduse_ioctl(struct file *file, unsigned int cmd, | |
1636 | unsigned long arg) | |
1637 | { | |
1638 | int ret; | |
1639 | void __user *argp = (void __user *)arg; | |
1640 | struct vduse_control *control = file->private_data; | |
1641 | ||
1642 | mutex_lock(&vduse_lock); | |
1643 | switch (cmd) { | |
1644 | case VDUSE_GET_API_VERSION: | |
1645 | ret = put_user(control->api_version, (u64 __user *)argp); | |
1646 | break; | |
1647 | case VDUSE_SET_API_VERSION: { | |
1648 | u64 api_version; | |
1649 | ||
1650 | ret = -EFAULT; | |
1651 | if (get_user(api_version, (u64 __user *)argp)) | |
1652 | break; | |
1653 | ||
1654 | ret = -EINVAL; | |
1655 | if (api_version > VDUSE_API_VERSION) | |
1656 | break; | |
1657 | ||
1658 | ret = 0; | |
1659 | control->api_version = api_version; | |
1660 | break; | |
1661 | } | |
1662 | case VDUSE_CREATE_DEV: { | |
1663 | struct vduse_dev_config config; | |
1664 | unsigned long size = offsetof(struct vduse_dev_config, config); | |
1665 | void *buf; | |
1666 | ||
1667 | ret = -EFAULT; | |
1668 | if (copy_from_user(&config, argp, size)) | |
1669 | break; | |
1670 | ||
1671 | ret = -EINVAL; | |
1672 | if (vduse_validate_config(&config) == false) | |
1673 | break; | |
1674 | ||
1675 | buf = vmemdup_user(argp + size, config.config_size); | |
1676 | if (IS_ERR(buf)) { | |
1677 | ret = PTR_ERR(buf); | |
1678 | break; | |
1679 | } | |
1680 | config.name[VDUSE_NAME_MAX - 1] = '\0'; | |
1681 | ret = vduse_create_dev(&config, buf, control->api_version); | |
b4d80c8d G |
1682 | if (ret) |
1683 | kvfree(buf); | |
c8a6153b XY |
1684 | break; |
1685 | } | |
1686 | case VDUSE_DESTROY_DEV: { | |
1687 | char name[VDUSE_NAME_MAX]; | |
1688 | ||
1689 | ret = -EFAULT; | |
1690 | if (copy_from_user(name, argp, VDUSE_NAME_MAX)) | |
1691 | break; | |
1692 | ||
1693 | name[VDUSE_NAME_MAX - 1] = '\0'; | |
1694 | ret = vduse_destroy_dev(name); | |
1695 | break; | |
1696 | } | |
1697 | default: | |
1698 | ret = -EINVAL; | |
1699 | break; | |
1700 | } | |
1701 | mutex_unlock(&vduse_lock); | |
1702 | ||
1703 | return ret; | |
1704 | } | |
1705 | ||
1706 | static int vduse_release(struct inode *inode, struct file *file) | |
1707 | { | |
1708 | struct vduse_control *control = file->private_data; | |
1709 | ||
1710 | kfree(control); | |
1711 | return 0; | |
1712 | } | |
1713 | ||
1714 | static int vduse_open(struct inode *inode, struct file *file) | |
1715 | { | |
1716 | struct vduse_control *control; | |
1717 | ||
1718 | control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); | |
1719 | if (!control) | |
1720 | return -ENOMEM; | |
1721 | ||
1722 | control->api_version = VDUSE_API_VERSION; | |
1723 | file->private_data = control; | |
1724 | ||
1725 | return 0; | |
1726 | } | |
1727 | ||
1728 | static const struct file_operations vduse_ctrl_fops = { | |
1729 | .owner = THIS_MODULE, | |
1730 | .open = vduse_open, | |
1731 | .release = vduse_release, | |
1732 | .unlocked_ioctl = vduse_ioctl, | |
1733 | .compat_ioctl = compat_ptr_ioctl, | |
1734 | .llseek = noop_llseek, | |
1735 | }; | |
1736 | ||
ff62b8e6 | 1737 | static char *vduse_devnode(const struct device *dev, umode_t *mode) |
c8a6153b XY |
1738 | { |
1739 | return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); | |
1740 | } | |
1741 | ||
0e0348ac PP |
1742 | struct vduse_mgmt_dev { |
1743 | struct vdpa_mgmt_dev mgmt_dev; | |
1744 | struct device dev; | |
c8a6153b XY |
1745 | }; |
1746 | ||
0e0348ac | 1747 | static struct vduse_mgmt_dev *vduse_mgmt; |
c8a6153b XY |
1748 | |
1749 | static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) | |
1750 | { | |
1751 | struct vduse_vdpa *vdev; | |
1752 | int ret; | |
1753 | ||
1754 | if (dev->vdev) | |
1755 | return -EEXIST; | |
1756 | ||
1757 | vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, | |
db9adcbf | 1758 | &vduse_vdpa_config_ops, 1, 1, name, true); |
c8a6153b XY |
1759 | if (IS_ERR(vdev)) |
1760 | return PTR_ERR(vdev); | |
1761 | ||
1762 | dev->vdev = vdev; | |
1763 | vdev->dev = dev; | |
1764 | vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; | |
1765 | ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); | |
1766 | if (ret) { | |
1767 | put_device(&vdev->vdpa.dev); | |
1768 | return ret; | |
1769 | } | |
1770 | set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); | |
1771 | vdev->vdpa.dma_dev = &vdev->vdpa.dev; | |
0e0348ac | 1772 | vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; |
c8a6153b XY |
1773 | |
1774 | return 0; | |
1775 | } | |
1776 | ||
d8ca2fa5 PP |
1777 | static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, |
1778 | const struct vdpa_dev_set_config *config) | |
c8a6153b XY |
1779 | { |
1780 | struct vduse_dev *dev; | |
1781 | int ret; | |
1782 | ||
1783 | mutex_lock(&vduse_lock); | |
1784 | dev = vduse_find_dev(name); | |
1785 | if (!dev || !vduse_dev_is_ready(dev)) { | |
1786 | mutex_unlock(&vduse_lock); | |
1787 | return -EINVAL; | |
1788 | } | |
1789 | ret = vduse_dev_init_vdpa(dev, name); | |
1790 | mutex_unlock(&vduse_lock); | |
1791 | if (ret) | |
1792 | return ret; | |
1793 | ||
1794 | ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); | |
1795 | if (ret) { | |
1796 | put_device(&dev->vdev->vdpa.dev); | |
1797 | return ret; | |
1798 | } | |
1799 | ||
1800 | return 0; | |
1801 | } | |
1802 | ||
1803 | static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) | |
1804 | { | |
1805 | _vdpa_unregister_device(dev); | |
1806 | } | |
1807 | ||
1808 | static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { | |
1809 | .dev_add = vdpa_dev_add, | |
1810 | .dev_del = vdpa_dev_del, | |
1811 | }; | |
1812 | ||
1813 | static struct virtio_device_id id_table[] = { | |
1814 | { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, | |
1815 | { 0 }, | |
1816 | }; | |
1817 | ||
0e0348ac PP |
1818 | static void vduse_mgmtdev_release(struct device *dev) |
1819 | { | |
1820 | struct vduse_mgmt_dev *mgmt_dev; | |
1821 | ||
1822 | mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); | |
1823 | kfree(mgmt_dev); | |
1824 | } | |
c8a6153b XY |
1825 | |
1826 | static int vduse_mgmtdev_init(void) | |
1827 | { | |
1828 | int ret; | |
1829 | ||
0e0348ac PP |
1830 | vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); |
1831 | if (!vduse_mgmt) | |
1832 | return -ENOMEM; | |
1833 | ||
1834 | ret = dev_set_name(&vduse_mgmt->dev, "vduse"); | |
1835 | if (ret) { | |
1836 | kfree(vduse_mgmt); | |
c8a6153b | 1837 | return ret; |
0e0348ac | 1838 | } |
c8a6153b | 1839 | |
0e0348ac PP |
1840 | vduse_mgmt->dev.release = vduse_mgmtdev_release; |
1841 | ||
1842 | ret = device_register(&vduse_mgmt->dev); | |
c8a6153b | 1843 | if (ret) |
0e0348ac | 1844 | goto dev_reg_err; |
c8a6153b | 1845 | |
0e0348ac PP |
1846 | vduse_mgmt->mgmt_dev.id_table = id_table; |
1847 | vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; | |
1848 | vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; | |
1849 | ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); | |
1850 | if (ret) | |
1851 | device_unregister(&vduse_mgmt->dev); | |
1852 | ||
1853 | return ret; | |
1854 | ||
1855 | dev_reg_err: | |
1856 | put_device(&vduse_mgmt->dev); | |
c8a6153b XY |
1857 | return ret; |
1858 | } | |
1859 | ||
1860 | static void vduse_mgmtdev_exit(void) | |
1861 | { | |
0e0348ac PP |
1862 | vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); |
1863 | device_unregister(&vduse_mgmt->dev); | |
c8a6153b XY |
1864 | } |
1865 | ||
1866 | static int vduse_init(void) | |
1867 | { | |
1868 | int ret; | |
1869 | struct device *dev; | |
1870 | ||
1871 | vduse_class = class_create(THIS_MODULE, "vduse"); | |
1872 | if (IS_ERR(vduse_class)) | |
1873 | return PTR_ERR(vduse_class); | |
1874 | ||
1875 | vduse_class->devnode = vduse_devnode; | |
c8a6153b XY |
1876 | |
1877 | ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); | |
1878 | if (ret) | |
1879 | goto err_chardev_region; | |
1880 | ||
1881 | /* /dev/vduse/control */ | |
1882 | cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); | |
1883 | vduse_ctrl_cdev.owner = THIS_MODULE; | |
1884 | ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); | |
1885 | if (ret) | |
1886 | goto err_ctrl_cdev; | |
1887 | ||
1888 | dev = device_create(vduse_class, NULL, vduse_major, NULL, "control"); | |
1889 | if (IS_ERR(dev)) { | |
1890 | ret = PTR_ERR(dev); | |
1891 | goto err_device; | |
1892 | } | |
1893 | ||
1894 | /* /dev/vduse/$DEVICE */ | |
1895 | cdev_init(&vduse_cdev, &vduse_dev_fops); | |
1896 | vduse_cdev.owner = THIS_MODULE; | |
1897 | ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), | |
1898 | VDUSE_DEV_MAX - 1); | |
1899 | if (ret) | |
1900 | goto err_cdev; | |
1901 | ||
28f6288e | 1902 | ret = -ENOMEM; |
c8a6153b XY |
1903 | vduse_irq_wq = alloc_workqueue("vduse-irq", |
1904 | WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); | |
28f6288e | 1905 | if (!vduse_irq_wq) |
c8a6153b | 1906 | goto err_wq; |
28f6288e XY |
1907 | |
1908 | vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0); | |
1909 | if (!vduse_irq_bound_wq) | |
1910 | goto err_bound_wq; | |
c8a6153b XY |
1911 | |
1912 | ret = vduse_domain_init(); | |
1913 | if (ret) | |
1914 | goto err_domain; | |
1915 | ||
1916 | ret = vduse_mgmtdev_init(); | |
1917 | if (ret) | |
1918 | goto err_mgmtdev; | |
1919 | ||
1920 | return 0; | |
1921 | err_mgmtdev: | |
1922 | vduse_domain_exit(); | |
1923 | err_domain: | |
28f6288e XY |
1924 | destroy_workqueue(vduse_irq_bound_wq); |
1925 | err_bound_wq: | |
c8a6153b XY |
1926 | destroy_workqueue(vduse_irq_wq); |
1927 | err_wq: | |
1928 | cdev_del(&vduse_cdev); | |
1929 | err_cdev: | |
1930 | device_destroy(vduse_class, vduse_major); | |
1931 | err_device: | |
1932 | cdev_del(&vduse_ctrl_cdev); | |
1933 | err_ctrl_cdev: | |
1934 | unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); | |
1935 | err_chardev_region: | |
1936 | class_destroy(vduse_class); | |
1937 | return ret; | |
1938 | } | |
1939 | module_init(vduse_init); | |
1940 | ||
1941 | static void vduse_exit(void) | |
1942 | { | |
1943 | vduse_mgmtdev_exit(); | |
1944 | vduse_domain_exit(); | |
28f6288e | 1945 | destroy_workqueue(vduse_irq_bound_wq); |
c8a6153b XY |
1946 | destroy_workqueue(vduse_irq_wq); |
1947 | cdev_del(&vduse_cdev); | |
1948 | device_destroy(vduse_class, vduse_major); | |
1949 | cdev_del(&vduse_ctrl_cdev); | |
1950 | unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); | |
1951 | class_destroy(vduse_class); | |
1952 | } | |
1953 | module_exit(vduse_exit); | |
1954 | ||
1955 | MODULE_LICENSE(DRV_LICENSE); | |
1956 | MODULE_AUTHOR(DRV_AUTHOR); | |
1957 | MODULE_DESCRIPTION(DRV_DESC); |