]> git.ipfire.org Git - thirdparty/qemu.git/blob - hw/virtio/virtio.c
f35178f5fcde2c52b06863341aaa6cb6a926d7cf
[thirdparty/qemu.git] / hw / virtio / virtio.c
1 /*
2 * Virtio Support
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qapi/qapi-commands-virtio.h"
17 #include "trace.h"
18 #include "qemu/error-report.h"
19 #include "qemu/log.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "qom/object_interfaces.h"
23 #include "hw/core/cpu.h"
24 #include "hw/virtio/virtio.h"
25 #include "hw/virtio/vhost.h"
26 #include "migration/qemu-file-types.h"
27 #include "qemu/atomic.h"
28 #include "hw/virtio/virtio-bus.h"
29 #include "hw/qdev-properties.h"
30 #include "hw/virtio/virtio-access.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/runstate.h"
33 #include "virtio-qmp.h"
34
35 #include "standard-headers/linux/virtio_ids.h"
36 #include "standard-headers/linux/vhost_types.h"
37 #include "standard-headers/linux/virtio_blk.h"
38 #include "standard-headers/linux/virtio_console.h"
39 #include "standard-headers/linux/virtio_gpu.h"
40 #include "standard-headers/linux/virtio_net.h"
41 #include "standard-headers/linux/virtio_scsi.h"
42 #include "standard-headers/linux/virtio_i2c.h"
43 #include "standard-headers/linux/virtio_balloon.h"
44 #include "standard-headers/linux/virtio_iommu.h"
45 #include "standard-headers/linux/virtio_mem.h"
46 #include "standard-headers/linux/virtio_vsock.h"
47
48 QmpVirtIODeviceList virtio_list;
49
50 /*
51 * Maximum size of virtio device config space
52 */
53 #define VHOST_USER_MAX_CONFIG_SIZE 256
54
55 /*
56 * The alignment to use between consumer and producer parts of vring.
57 * x86 pagesize again. This is the default, used by transports like PCI
58 * which don't provide a means for the guest to tell the host the alignment.
59 */
60 #define VIRTIO_PCI_VRING_ALIGN 4096
61
62 typedef struct VRingDesc
63 {
64 uint64_t addr;
65 uint32_t len;
66 uint16_t flags;
67 uint16_t next;
68 } VRingDesc;
69
70 typedef struct VRingPackedDesc {
71 uint64_t addr;
72 uint32_t len;
73 uint16_t id;
74 uint16_t flags;
75 } VRingPackedDesc;
76
77 typedef struct VRingAvail
78 {
79 uint16_t flags;
80 uint16_t idx;
81 uint16_t ring[];
82 } VRingAvail;
83
84 typedef struct VRingUsedElem
85 {
86 uint32_t id;
87 uint32_t len;
88 } VRingUsedElem;
89
90 typedef struct VRingUsed
91 {
92 uint16_t flags;
93 uint16_t idx;
94 VRingUsedElem ring[];
95 } VRingUsed;
96
97 typedef struct VRingMemoryRegionCaches {
98 struct rcu_head rcu;
99 MemoryRegionCache desc;
100 MemoryRegionCache avail;
101 MemoryRegionCache used;
102 } VRingMemoryRegionCaches;
103
104 typedef struct VRing
105 {
106 unsigned int num;
107 unsigned int num_default;
108 unsigned int align;
109 hwaddr desc;
110 hwaddr avail;
111 hwaddr used;
112 VRingMemoryRegionCaches *caches;
113 } VRing;
114
115 typedef struct VRingPackedDescEvent {
116 uint16_t off_wrap;
117 uint16_t flags;
118 } VRingPackedDescEvent ;
119
120 struct VirtQueue
121 {
122 VRing vring;
123 VirtQueueElement *used_elems;
124
125 /* Next head to pop */
126 uint16_t last_avail_idx;
127 bool last_avail_wrap_counter;
128
129 /* Last avail_idx read from VQ. */
130 uint16_t shadow_avail_idx;
131 bool shadow_avail_wrap_counter;
132
133 uint16_t used_idx;
134 bool used_wrap_counter;
135
136 /* Last used index value we have signalled on */
137 uint16_t signalled_used;
138
139 /* Last used index value we have signalled on */
140 bool signalled_used_valid;
141
142 /* Notification enabled? */
143 bool notification;
144
145 uint16_t queue_index;
146
147 unsigned int inuse;
148
149 uint16_t vector;
150 VirtIOHandleOutput handle_output;
151 VirtIODevice *vdev;
152 EventNotifier guest_notifier;
153 EventNotifier host_notifier;
154 bool host_notifier_enabled;
155 QLIST_ENTRY(VirtQueue) node;
156 };
157
158 const char *virtio_device_names[] = {
159 [VIRTIO_ID_NET] = "virtio-net",
160 [VIRTIO_ID_BLOCK] = "virtio-blk",
161 [VIRTIO_ID_CONSOLE] = "virtio-serial",
162 [VIRTIO_ID_RNG] = "virtio-rng",
163 [VIRTIO_ID_BALLOON] = "virtio-balloon",
164 [VIRTIO_ID_IOMEM] = "virtio-iomem",
165 [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
166 [VIRTIO_ID_SCSI] = "virtio-scsi",
167 [VIRTIO_ID_9P] = "virtio-9p",
168 [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
169 [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
170 [VIRTIO_ID_CAIF] = "virtio-caif",
171 [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
172 [VIRTIO_ID_GPU] = "virtio-gpu",
173 [VIRTIO_ID_CLOCK] = "virtio-clk",
174 [VIRTIO_ID_INPUT] = "virtio-input",
175 [VIRTIO_ID_VSOCK] = "vhost-vsock",
176 [VIRTIO_ID_CRYPTO] = "virtio-crypto",
177 [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
178 [VIRTIO_ID_PSTORE] = "virtio-pstore",
179 [VIRTIO_ID_IOMMU] = "virtio-iommu",
180 [VIRTIO_ID_MEM] = "virtio-mem",
181 [VIRTIO_ID_SOUND] = "virtio-sound",
182 [VIRTIO_ID_FS] = "virtio-user-fs",
183 [VIRTIO_ID_PMEM] = "virtio-pmem",
184 [VIRTIO_ID_RPMB] = "virtio-rpmb",
185 [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
186 [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
187 [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
188 [VIRTIO_ID_SCMI] = "virtio-scmi",
189 [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
190 [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
191 [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
192 [VIRTIO_ID_CAN] = "virtio-can",
193 [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
194 [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
195 [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
196 [VIRTIO_ID_BT] = "virtio-bluetooth",
197 [VIRTIO_ID_GPIO] = "virtio-gpio"
198 };
199
200 static const char *virtio_id_to_name(uint16_t device_id)
201 {
202 assert(device_id < G_N_ELEMENTS(virtio_device_names));
203 const char *name = virtio_device_names[device_id];
204 assert(name != NULL);
205 return name;
206 }
207
208 /* Called within call_rcu(). */
209 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
210 {
211 assert(caches != NULL);
212 address_space_cache_destroy(&caches->desc);
213 address_space_cache_destroy(&caches->avail);
214 address_space_cache_destroy(&caches->used);
215 g_free(caches);
216 }
217
218 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
219 {
220 VRingMemoryRegionCaches *caches;
221
222 caches = qatomic_read(&vq->vring.caches);
223 qatomic_rcu_set(&vq->vring.caches, NULL);
224 if (caches) {
225 call_rcu(caches, virtio_free_region_cache, rcu);
226 }
227 }
228
229 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
230 {
231 VirtQueue *vq = &vdev->vq[n];
232 VRingMemoryRegionCaches *old = vq->vring.caches;
233 VRingMemoryRegionCaches *new = NULL;
234 hwaddr addr, size;
235 int64_t len;
236 bool packed;
237
238
239 addr = vq->vring.desc;
240 if (!addr) {
241 goto out_no_cache;
242 }
243 new = g_new0(VRingMemoryRegionCaches, 1);
244 size = virtio_queue_get_desc_size(vdev, n);
245 packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
246 true : false;
247 len = address_space_cache_init(&new->desc, vdev->dma_as,
248 addr, size, packed);
249 if (len < size) {
250 virtio_error(vdev, "Cannot map desc");
251 goto err_desc;
252 }
253
254 size = virtio_queue_get_used_size(vdev, n);
255 len = address_space_cache_init(&new->used, vdev->dma_as,
256 vq->vring.used, size, true);
257 if (len < size) {
258 virtio_error(vdev, "Cannot map used");
259 goto err_used;
260 }
261
262 size = virtio_queue_get_avail_size(vdev, n);
263 len = address_space_cache_init(&new->avail, vdev->dma_as,
264 vq->vring.avail, size, false);
265 if (len < size) {
266 virtio_error(vdev, "Cannot map avail");
267 goto err_avail;
268 }
269
270 qatomic_rcu_set(&vq->vring.caches, new);
271 if (old) {
272 call_rcu(old, virtio_free_region_cache, rcu);
273 }
274 return;
275
276 err_avail:
277 address_space_cache_destroy(&new->avail);
278 err_used:
279 address_space_cache_destroy(&new->used);
280 err_desc:
281 address_space_cache_destroy(&new->desc);
282 out_no_cache:
283 g_free(new);
284 virtio_virtqueue_reset_region_cache(vq);
285 }
286
287 /* virt queue functions */
288 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
289 {
290 VRing *vring = &vdev->vq[n].vring;
291
292 if (!vring->num || !vring->desc || !vring->align) {
293 /* not yet setup -> nothing to do */
294 return;
295 }
296 vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
297 vring->used = vring_align(vring->avail +
298 offsetof(VRingAvail, ring[vring->num]),
299 vring->align);
300 virtio_init_region_cache(vdev, n);
301 }
302
303 /* Called within rcu_read_lock(). */
304 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
305 MemoryRegionCache *cache, int i)
306 {
307 address_space_read_cached(cache, i * sizeof(VRingDesc),
308 desc, sizeof(VRingDesc));
309 virtio_tswap64s(vdev, &desc->addr);
310 virtio_tswap32s(vdev, &desc->len);
311 virtio_tswap16s(vdev, &desc->flags);
312 virtio_tswap16s(vdev, &desc->next);
313 }
314
315 static void vring_packed_event_read(VirtIODevice *vdev,
316 MemoryRegionCache *cache,
317 VRingPackedDescEvent *e)
318 {
319 hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
320 hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
321
322 e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
323 /* Make sure flags is seen before off_wrap */
324 smp_rmb();
325 e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
326 virtio_tswap16s(vdev, &e->flags);
327 }
328
329 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
330 MemoryRegionCache *cache,
331 uint16_t off_wrap)
332 {
333 hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
334
335 virtio_stw_phys_cached(vdev, cache, off, off_wrap);
336 address_space_cache_invalidate(cache, off, sizeof(off_wrap));
337 }
338
339 static void vring_packed_flags_write(VirtIODevice *vdev,
340 MemoryRegionCache *cache, uint16_t flags)
341 {
342 hwaddr off = offsetof(VRingPackedDescEvent, flags);
343
344 virtio_stw_phys_cached(vdev, cache, off, flags);
345 address_space_cache_invalidate(cache, off, sizeof(flags));
346 }
347
348 /* Called within rcu_read_lock(). */
349 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
350 {
351 return qatomic_rcu_read(&vq->vring.caches);
352 }
353
354 /* Called within rcu_read_lock(). */
355 static inline uint16_t vring_avail_flags(VirtQueue *vq)
356 {
357 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
358 hwaddr pa = offsetof(VRingAvail, flags);
359
360 if (!caches) {
361 return 0;
362 }
363
364 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
365 }
366
367 /* Called within rcu_read_lock(). */
368 static inline uint16_t vring_avail_idx(VirtQueue *vq)
369 {
370 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
371 hwaddr pa = offsetof(VRingAvail, idx);
372
373 if (!caches) {
374 return 0;
375 }
376
377 vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
378 return vq->shadow_avail_idx;
379 }
380
381 /* Called within rcu_read_lock(). */
382 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
383 {
384 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
385 hwaddr pa = offsetof(VRingAvail, ring[i]);
386
387 if (!caches) {
388 return 0;
389 }
390
391 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
392 }
393
394 /* Called within rcu_read_lock(). */
395 static inline uint16_t vring_get_used_event(VirtQueue *vq)
396 {
397 return vring_avail_ring(vq, vq->vring.num);
398 }
399
400 /* Called within rcu_read_lock(). */
401 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
402 int i)
403 {
404 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
405 hwaddr pa = offsetof(VRingUsed, ring[i]);
406
407 if (!caches) {
408 return;
409 }
410
411 virtio_tswap32s(vq->vdev, &uelem->id);
412 virtio_tswap32s(vq->vdev, &uelem->len);
413 address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
414 address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
415 }
416
417 /* Called within rcu_read_lock(). */
418 static inline uint16_t vring_used_flags(VirtQueue *vq)
419 {
420 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
421 hwaddr pa = offsetof(VRingUsed, flags);
422
423 if (!caches) {
424 return 0;
425 }
426
427 return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
428 }
429
430 /* Called within rcu_read_lock(). */
431 static uint16_t vring_used_idx(VirtQueue *vq)
432 {
433 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
434 hwaddr pa = offsetof(VRingUsed, idx);
435
436 if (!caches) {
437 return 0;
438 }
439
440 return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
441 }
442
443 /* Called within rcu_read_lock(). */
444 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
445 {
446 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
447 hwaddr pa = offsetof(VRingUsed, idx);
448
449 if (caches) {
450 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
451 address_space_cache_invalidate(&caches->used, pa, sizeof(val));
452 }
453
454 vq->used_idx = val;
455 }
456
457 /* Called within rcu_read_lock(). */
458 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
459 {
460 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
461 VirtIODevice *vdev = vq->vdev;
462 hwaddr pa = offsetof(VRingUsed, flags);
463 uint16_t flags;
464
465 if (!caches) {
466 return;
467 }
468
469 flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
470 virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
471 address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
472 }
473
474 /* Called within rcu_read_lock(). */
475 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
476 {
477 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
478 VirtIODevice *vdev = vq->vdev;
479 hwaddr pa = offsetof(VRingUsed, flags);
480 uint16_t flags;
481
482 if (!caches) {
483 return;
484 }
485
486 flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
487 virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
488 address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
489 }
490
491 /* Called within rcu_read_lock(). */
492 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
493 {
494 VRingMemoryRegionCaches *caches;
495 hwaddr pa;
496 if (!vq->notification) {
497 return;
498 }
499
500 caches = vring_get_region_caches(vq);
501 if (!caches) {
502 return;
503 }
504
505 pa = offsetof(VRingUsed, ring[vq->vring.num]);
506 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
507 address_space_cache_invalidate(&caches->used, pa, sizeof(val));
508 }
509
510 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
511 {
512 RCU_READ_LOCK_GUARD();
513
514 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
515 vring_set_avail_event(vq, vring_avail_idx(vq));
516 } else if (enable) {
517 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
518 } else {
519 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
520 }
521 if (enable) {
522 /* Expose avail event/used flags before caller checks the avail idx. */
523 smp_mb();
524 }
525 }
526
527 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
528 {
529 uint16_t off_wrap;
530 VRingPackedDescEvent e;
531 VRingMemoryRegionCaches *caches;
532
533 RCU_READ_LOCK_GUARD();
534 caches = vring_get_region_caches(vq);
535 if (!caches) {
536 return;
537 }
538
539 vring_packed_event_read(vq->vdev, &caches->used, &e);
540
541 if (!enable) {
542 e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
543 } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
544 off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
545 vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
546 /* Make sure off_wrap is wrote before flags */
547 smp_wmb();
548 e.flags = VRING_PACKED_EVENT_FLAG_DESC;
549 } else {
550 e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
551 }
552
553 vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
554 if (enable) {
555 /* Expose avail event/used flags before caller checks the avail idx. */
556 smp_mb();
557 }
558 }
559
560 bool virtio_queue_get_notification(VirtQueue *vq)
561 {
562 return vq->notification;
563 }
564
565 void virtio_queue_set_notification(VirtQueue *vq, int enable)
566 {
567 vq->notification = enable;
568
569 if (!vq->vring.desc) {
570 return;
571 }
572
573 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
574 virtio_queue_packed_set_notification(vq, enable);
575 } else {
576 virtio_queue_split_set_notification(vq, enable);
577 }
578 }
579
580 int virtio_queue_ready(VirtQueue *vq)
581 {
582 return vq->vring.avail != 0;
583 }
584
585 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
586 uint16_t *flags,
587 MemoryRegionCache *cache,
588 int i)
589 {
590 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
591
592 *flags = virtio_lduw_phys_cached(vdev, cache, off);
593 }
594
595 static void vring_packed_desc_read(VirtIODevice *vdev,
596 VRingPackedDesc *desc,
597 MemoryRegionCache *cache,
598 int i, bool strict_order)
599 {
600 hwaddr off = i * sizeof(VRingPackedDesc);
601
602 vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
603
604 if (strict_order) {
605 /* Make sure flags is read before the rest fields. */
606 smp_rmb();
607 }
608
609 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
610 &desc->addr, sizeof(desc->addr));
611 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
612 &desc->id, sizeof(desc->id));
613 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
614 &desc->len, sizeof(desc->len));
615 virtio_tswap64s(vdev, &desc->addr);
616 virtio_tswap16s(vdev, &desc->id);
617 virtio_tswap32s(vdev, &desc->len);
618 }
619
620 static void vring_packed_desc_write_data(VirtIODevice *vdev,
621 VRingPackedDesc *desc,
622 MemoryRegionCache *cache,
623 int i)
624 {
625 hwaddr off_id = i * sizeof(VRingPackedDesc) +
626 offsetof(VRingPackedDesc, id);
627 hwaddr off_len = i * sizeof(VRingPackedDesc) +
628 offsetof(VRingPackedDesc, len);
629
630 virtio_tswap32s(vdev, &desc->len);
631 virtio_tswap16s(vdev, &desc->id);
632 address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
633 address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
634 address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
635 address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
636 }
637
638 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
639 VRingPackedDesc *desc,
640 MemoryRegionCache *cache,
641 int i)
642 {
643 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
644
645 virtio_stw_phys_cached(vdev, cache, off, desc->flags);
646 address_space_cache_invalidate(cache, off, sizeof(desc->flags));
647 }
648
649 static void vring_packed_desc_write(VirtIODevice *vdev,
650 VRingPackedDesc *desc,
651 MemoryRegionCache *cache,
652 int i, bool strict_order)
653 {
654 vring_packed_desc_write_data(vdev, desc, cache, i);
655 if (strict_order) {
656 /* Make sure data is wrote before flags. */
657 smp_wmb();
658 }
659 vring_packed_desc_write_flags(vdev, desc, cache, i);
660 }
661
662 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
663 {
664 bool avail, used;
665
666 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
667 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
668 return (avail != used) && (avail == wrap_counter);
669 }
670
671 /* Fetch avail_idx from VQ memory only when we really need to know if
672 * guest has added some buffers.
673 * Called within rcu_read_lock(). */
674 static int virtio_queue_empty_rcu(VirtQueue *vq)
675 {
676 if (virtio_device_disabled(vq->vdev)) {
677 return 1;
678 }
679
680 if (unlikely(!vq->vring.avail)) {
681 return 1;
682 }
683
684 if (vq->shadow_avail_idx != vq->last_avail_idx) {
685 return 0;
686 }
687
688 return vring_avail_idx(vq) == vq->last_avail_idx;
689 }
690
691 static int virtio_queue_split_empty(VirtQueue *vq)
692 {
693 bool empty;
694
695 if (virtio_device_disabled(vq->vdev)) {
696 return 1;
697 }
698
699 if (unlikely(!vq->vring.avail)) {
700 return 1;
701 }
702
703 if (vq->shadow_avail_idx != vq->last_avail_idx) {
704 return 0;
705 }
706
707 RCU_READ_LOCK_GUARD();
708 empty = vring_avail_idx(vq) == vq->last_avail_idx;
709 return empty;
710 }
711
712 /* Called within rcu_read_lock(). */
713 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
714 {
715 struct VRingPackedDesc desc;
716 VRingMemoryRegionCaches *cache;
717
718 if (unlikely(!vq->vring.desc)) {
719 return 1;
720 }
721
722 cache = vring_get_region_caches(vq);
723 if (!cache) {
724 return 1;
725 }
726
727 vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
728 vq->last_avail_idx);
729
730 return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
731 }
732
733 static int virtio_queue_packed_empty(VirtQueue *vq)
734 {
735 RCU_READ_LOCK_GUARD();
736 return virtio_queue_packed_empty_rcu(vq);
737 }
738
739 int virtio_queue_empty(VirtQueue *vq)
740 {
741 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
742 return virtio_queue_packed_empty(vq);
743 } else {
744 return virtio_queue_split_empty(vq);
745 }
746 }
747
748 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
749 unsigned int len)
750 {
751 AddressSpace *dma_as = vq->vdev->dma_as;
752 unsigned int offset;
753 int i;
754
755 offset = 0;
756 for (i = 0; i < elem->in_num; i++) {
757 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
758
759 dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
760 elem->in_sg[i].iov_len,
761 DMA_DIRECTION_FROM_DEVICE, size);
762
763 offset += size;
764 }
765
766 for (i = 0; i < elem->out_num; i++)
767 dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
768 elem->out_sg[i].iov_len,
769 DMA_DIRECTION_TO_DEVICE,
770 elem->out_sg[i].iov_len);
771 }
772
773 /* virtqueue_detach_element:
774 * @vq: The #VirtQueue
775 * @elem: The #VirtQueueElement
776 * @len: number of bytes written
777 *
778 * Detach the element from the virtqueue. This function is suitable for device
779 * reset or other situations where a #VirtQueueElement is simply freed and will
780 * not be pushed or discarded.
781 */
782 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
783 unsigned int len)
784 {
785 vq->inuse -= elem->ndescs;
786 virtqueue_unmap_sg(vq, elem, len);
787 }
788
789 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
790 {
791 vq->last_avail_idx -= num;
792 }
793
794 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
795 {
796 if (vq->last_avail_idx < num) {
797 vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
798 vq->last_avail_wrap_counter ^= 1;
799 } else {
800 vq->last_avail_idx -= num;
801 }
802 }
803
804 /* virtqueue_unpop:
805 * @vq: The #VirtQueue
806 * @elem: The #VirtQueueElement
807 * @len: number of bytes written
808 *
809 * Pretend the most recent element wasn't popped from the virtqueue. The next
810 * call to virtqueue_pop() will refetch the element.
811 */
812 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
813 unsigned int len)
814 {
815
816 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
817 virtqueue_packed_rewind(vq, 1);
818 } else {
819 virtqueue_split_rewind(vq, 1);
820 }
821
822 virtqueue_detach_element(vq, elem, len);
823 }
824
825 /* virtqueue_rewind:
826 * @vq: The #VirtQueue
827 * @num: Number of elements to push back
828 *
829 * Pretend that elements weren't popped from the virtqueue. The next
830 * virtqueue_pop() will refetch the oldest element.
831 *
832 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
833 *
834 * Returns: true on success, false if @num is greater than the number of in use
835 * elements.
836 */
837 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
838 {
839 if (num > vq->inuse) {
840 return false;
841 }
842
843 vq->inuse -= num;
844 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
845 virtqueue_packed_rewind(vq, num);
846 } else {
847 virtqueue_split_rewind(vq, num);
848 }
849 return true;
850 }
851
852 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
853 unsigned int len, unsigned int idx)
854 {
855 VRingUsedElem uelem;
856
857 if (unlikely(!vq->vring.used)) {
858 return;
859 }
860
861 idx = (idx + vq->used_idx) % vq->vring.num;
862
863 uelem.id = elem->index;
864 uelem.len = len;
865 vring_used_write(vq, &uelem, idx);
866 }
867
868 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
869 unsigned int len, unsigned int idx)
870 {
871 vq->used_elems[idx].index = elem->index;
872 vq->used_elems[idx].len = len;
873 vq->used_elems[idx].ndescs = elem->ndescs;
874 }
875
876 static void virtqueue_packed_fill_desc(VirtQueue *vq,
877 const VirtQueueElement *elem,
878 unsigned int idx,
879 bool strict_order)
880 {
881 uint16_t head;
882 VRingMemoryRegionCaches *caches;
883 VRingPackedDesc desc = {
884 .id = elem->index,
885 .len = elem->len,
886 };
887 bool wrap_counter = vq->used_wrap_counter;
888
889 if (unlikely(!vq->vring.desc)) {
890 return;
891 }
892
893 head = vq->used_idx + idx;
894 if (head >= vq->vring.num) {
895 head -= vq->vring.num;
896 wrap_counter ^= 1;
897 }
898 if (wrap_counter) {
899 desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
900 desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
901 } else {
902 desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
903 desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
904 }
905
906 caches = vring_get_region_caches(vq);
907 if (!caches) {
908 return;
909 }
910
911 vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
912 }
913
914 /* Called within rcu_read_lock(). */
915 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
916 unsigned int len, unsigned int idx)
917 {
918 trace_virtqueue_fill(vq, elem, len, idx);
919
920 virtqueue_unmap_sg(vq, elem, len);
921
922 if (virtio_device_disabled(vq->vdev)) {
923 return;
924 }
925
926 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
927 virtqueue_packed_fill(vq, elem, len, idx);
928 } else {
929 virtqueue_split_fill(vq, elem, len, idx);
930 }
931 }
932
933 /* Called within rcu_read_lock(). */
934 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
935 {
936 uint16_t old, new;
937
938 if (unlikely(!vq->vring.used)) {
939 return;
940 }
941
942 /* Make sure buffer is written before we update index. */
943 smp_wmb();
944 trace_virtqueue_flush(vq, count);
945 old = vq->used_idx;
946 new = old + count;
947 vring_used_idx_set(vq, new);
948 vq->inuse -= count;
949 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
950 vq->signalled_used_valid = false;
951 }
952
953 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
954 {
955 unsigned int i, ndescs = 0;
956
957 if (unlikely(!vq->vring.desc)) {
958 return;
959 }
960
961 for (i = 1; i < count; i++) {
962 virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
963 ndescs += vq->used_elems[i].ndescs;
964 }
965 virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
966 ndescs += vq->used_elems[0].ndescs;
967
968 vq->inuse -= ndescs;
969 vq->used_idx += ndescs;
970 if (vq->used_idx >= vq->vring.num) {
971 vq->used_idx -= vq->vring.num;
972 vq->used_wrap_counter ^= 1;
973 vq->signalled_used_valid = false;
974 }
975 }
976
977 void virtqueue_flush(VirtQueue *vq, unsigned int count)
978 {
979 if (virtio_device_disabled(vq->vdev)) {
980 vq->inuse -= count;
981 return;
982 }
983
984 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
985 virtqueue_packed_flush(vq, count);
986 } else {
987 virtqueue_split_flush(vq, count);
988 }
989 }
990
991 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
992 unsigned int len)
993 {
994 RCU_READ_LOCK_GUARD();
995 virtqueue_fill(vq, elem, len, 0);
996 virtqueue_flush(vq, 1);
997 }
998
999 /* Called within rcu_read_lock(). */
1000 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1001 {
1002 uint16_t num_heads = vring_avail_idx(vq) - idx;
1003
1004 /* Check it isn't doing very strange things with descriptor numbers. */
1005 if (num_heads > vq->vring.num) {
1006 virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1007 idx, vq->shadow_avail_idx);
1008 return -EINVAL;
1009 }
1010 /* On success, callers read a descriptor at vq->last_avail_idx.
1011 * Make sure descriptor read does not bypass avail index read. */
1012 if (num_heads) {
1013 smp_rmb();
1014 }
1015
1016 return num_heads;
1017 }
1018
1019 /* Called within rcu_read_lock(). */
1020 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1021 unsigned int *head)
1022 {
1023 /* Grab the next descriptor number they're advertising, and increment
1024 * the index we've seen. */
1025 *head = vring_avail_ring(vq, idx % vq->vring.num);
1026
1027 /* If their number is silly, that's a fatal mistake. */
1028 if (*head >= vq->vring.num) {
1029 virtio_error(vq->vdev, "Guest says index %u is available", *head);
1030 return false;
1031 }
1032
1033 return true;
1034 }
1035
1036 enum {
1037 VIRTQUEUE_READ_DESC_ERROR = -1,
1038 VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
1039 VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
1040 };
1041
1042 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1043 MemoryRegionCache *desc_cache,
1044 unsigned int max, unsigned int *next)
1045 {
1046 /* If this descriptor says it doesn't chain, we're done. */
1047 if (!(desc->flags & VRING_DESC_F_NEXT)) {
1048 return VIRTQUEUE_READ_DESC_DONE;
1049 }
1050
1051 /* Check they're not leading us off end of descriptors. */
1052 *next = desc->next;
1053 /* Make sure compiler knows to grab that: we don't want it changing! */
1054 smp_wmb();
1055
1056 if (*next >= max) {
1057 virtio_error(vdev, "Desc next is %u", *next);
1058 return VIRTQUEUE_READ_DESC_ERROR;
1059 }
1060
1061 vring_split_desc_read(vdev, desc, desc_cache, *next);
1062 return VIRTQUEUE_READ_DESC_MORE;
1063 }
1064
1065 /* Called within rcu_read_lock(). */
1066 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1067 unsigned int *in_bytes, unsigned int *out_bytes,
1068 unsigned max_in_bytes, unsigned max_out_bytes,
1069 VRingMemoryRegionCaches *caches)
1070 {
1071 VirtIODevice *vdev = vq->vdev;
1072 unsigned int max, idx;
1073 unsigned int total_bufs, in_total, out_total;
1074 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1075 int64_t len = 0;
1076 int rc;
1077
1078 idx = vq->last_avail_idx;
1079 total_bufs = in_total = out_total = 0;
1080
1081 max = vq->vring.num;
1082
1083 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1084 MemoryRegionCache *desc_cache = &caches->desc;
1085 unsigned int num_bufs;
1086 VRingDesc desc;
1087 unsigned int i;
1088
1089 num_bufs = total_bufs;
1090
1091 if (!virtqueue_get_head(vq, idx++, &i)) {
1092 goto err;
1093 }
1094
1095 vring_split_desc_read(vdev, &desc, desc_cache, i);
1096
1097 if (desc.flags & VRING_DESC_F_INDIRECT) {
1098 if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1099 virtio_error(vdev, "Invalid size for indirect buffer table");
1100 goto err;
1101 }
1102
1103 /* If we've got too many, that implies a descriptor loop. */
1104 if (num_bufs >= max) {
1105 virtio_error(vdev, "Looped descriptor");
1106 goto err;
1107 }
1108
1109 /* loop over the indirect descriptor table */
1110 len = address_space_cache_init(&indirect_desc_cache,
1111 vdev->dma_as,
1112 desc.addr, desc.len, false);
1113 desc_cache = &indirect_desc_cache;
1114 if (len < desc.len) {
1115 virtio_error(vdev, "Cannot map indirect buffer");
1116 goto err;
1117 }
1118
1119 max = desc.len / sizeof(VRingDesc);
1120 num_bufs = i = 0;
1121 vring_split_desc_read(vdev, &desc, desc_cache, i);
1122 }
1123
1124 do {
1125 /* If we've got too many, that implies a descriptor loop. */
1126 if (++num_bufs > max) {
1127 virtio_error(vdev, "Looped descriptor");
1128 goto err;
1129 }
1130
1131 if (desc.flags & VRING_DESC_F_WRITE) {
1132 in_total += desc.len;
1133 } else {
1134 out_total += desc.len;
1135 }
1136 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1137 goto done;
1138 }
1139
1140 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1141 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1142
1143 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1144 goto err;
1145 }
1146
1147 if (desc_cache == &indirect_desc_cache) {
1148 address_space_cache_destroy(&indirect_desc_cache);
1149 total_bufs++;
1150 } else {
1151 total_bufs = num_bufs;
1152 }
1153 }
1154
1155 if (rc < 0) {
1156 goto err;
1157 }
1158
1159 done:
1160 address_space_cache_destroy(&indirect_desc_cache);
1161 if (in_bytes) {
1162 *in_bytes = in_total;
1163 }
1164 if (out_bytes) {
1165 *out_bytes = out_total;
1166 }
1167 return;
1168
1169 err:
1170 in_total = out_total = 0;
1171 goto done;
1172 }
1173
1174 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1175 VRingPackedDesc *desc,
1176 MemoryRegionCache
1177 *desc_cache,
1178 unsigned int max,
1179 unsigned int *next,
1180 bool indirect)
1181 {
1182 /* If this descriptor says it doesn't chain, we're done. */
1183 if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1184 return VIRTQUEUE_READ_DESC_DONE;
1185 }
1186
1187 ++*next;
1188 if (*next == max) {
1189 if (indirect) {
1190 return VIRTQUEUE_READ_DESC_DONE;
1191 } else {
1192 (*next) -= vq->vring.num;
1193 }
1194 }
1195
1196 vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1197 return VIRTQUEUE_READ_DESC_MORE;
1198 }
1199
1200 /* Called within rcu_read_lock(). */
1201 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1202 unsigned int *in_bytes,
1203 unsigned int *out_bytes,
1204 unsigned max_in_bytes,
1205 unsigned max_out_bytes,
1206 VRingMemoryRegionCaches *caches)
1207 {
1208 VirtIODevice *vdev = vq->vdev;
1209 unsigned int max, idx;
1210 unsigned int total_bufs, in_total, out_total;
1211 MemoryRegionCache *desc_cache;
1212 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1213 int64_t len = 0;
1214 VRingPackedDesc desc;
1215 bool wrap_counter;
1216
1217 idx = vq->last_avail_idx;
1218 wrap_counter = vq->last_avail_wrap_counter;
1219 total_bufs = in_total = out_total = 0;
1220
1221 max = vq->vring.num;
1222
1223 for (;;) {
1224 unsigned int num_bufs = total_bufs;
1225 unsigned int i = idx;
1226 int rc;
1227
1228 desc_cache = &caches->desc;
1229 vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1230 if (!is_desc_avail(desc.flags, wrap_counter)) {
1231 break;
1232 }
1233
1234 if (desc.flags & VRING_DESC_F_INDIRECT) {
1235 if (desc.len % sizeof(VRingPackedDesc)) {
1236 virtio_error(vdev, "Invalid size for indirect buffer table");
1237 goto err;
1238 }
1239
1240 /* If we've got too many, that implies a descriptor loop. */
1241 if (num_bufs >= max) {
1242 virtio_error(vdev, "Looped descriptor");
1243 goto err;
1244 }
1245
1246 /* loop over the indirect descriptor table */
1247 len = address_space_cache_init(&indirect_desc_cache,
1248 vdev->dma_as,
1249 desc.addr, desc.len, false);
1250 desc_cache = &indirect_desc_cache;
1251 if (len < desc.len) {
1252 virtio_error(vdev, "Cannot map indirect buffer");
1253 goto err;
1254 }
1255
1256 max = desc.len / sizeof(VRingPackedDesc);
1257 num_bufs = i = 0;
1258 vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1259 }
1260
1261 do {
1262 /* If we've got too many, that implies a descriptor loop. */
1263 if (++num_bufs > max) {
1264 virtio_error(vdev, "Looped descriptor");
1265 goto err;
1266 }
1267
1268 if (desc.flags & VRING_DESC_F_WRITE) {
1269 in_total += desc.len;
1270 } else {
1271 out_total += desc.len;
1272 }
1273 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1274 goto done;
1275 }
1276
1277 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1278 &i, desc_cache ==
1279 &indirect_desc_cache);
1280 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1281
1282 if (desc_cache == &indirect_desc_cache) {
1283 address_space_cache_destroy(&indirect_desc_cache);
1284 total_bufs++;
1285 idx++;
1286 } else {
1287 idx += num_bufs - total_bufs;
1288 total_bufs = num_bufs;
1289 }
1290
1291 if (idx >= vq->vring.num) {
1292 idx -= vq->vring.num;
1293 wrap_counter ^= 1;
1294 }
1295 }
1296
1297 /* Record the index and wrap counter for a kick we want */
1298 vq->shadow_avail_idx = idx;
1299 vq->shadow_avail_wrap_counter = wrap_counter;
1300 done:
1301 address_space_cache_destroy(&indirect_desc_cache);
1302 if (in_bytes) {
1303 *in_bytes = in_total;
1304 }
1305 if (out_bytes) {
1306 *out_bytes = out_total;
1307 }
1308 return;
1309
1310 err:
1311 in_total = out_total = 0;
1312 goto done;
1313 }
1314
1315 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1316 unsigned int *out_bytes,
1317 unsigned max_in_bytes, unsigned max_out_bytes)
1318 {
1319 uint16_t desc_size;
1320 VRingMemoryRegionCaches *caches;
1321
1322 RCU_READ_LOCK_GUARD();
1323
1324 if (unlikely(!vq->vring.desc)) {
1325 goto err;
1326 }
1327
1328 caches = vring_get_region_caches(vq);
1329 if (!caches) {
1330 goto err;
1331 }
1332
1333 desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1334 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1335 if (caches->desc.len < vq->vring.num * desc_size) {
1336 virtio_error(vq->vdev, "Cannot map descriptor ring");
1337 goto err;
1338 }
1339
1340 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1341 virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1342 max_in_bytes, max_out_bytes,
1343 caches);
1344 } else {
1345 virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1346 max_in_bytes, max_out_bytes,
1347 caches);
1348 }
1349
1350 return;
1351 err:
1352 if (in_bytes) {
1353 *in_bytes = 0;
1354 }
1355 if (out_bytes) {
1356 *out_bytes = 0;
1357 }
1358 }
1359
1360 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1361 unsigned int out_bytes)
1362 {
1363 unsigned int in_total, out_total;
1364
1365 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1366 return in_bytes <= in_total && out_bytes <= out_total;
1367 }
1368
1369 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1370 hwaddr *addr, struct iovec *iov,
1371 unsigned int max_num_sg, bool is_write,
1372 hwaddr pa, size_t sz)
1373 {
1374 bool ok = false;
1375 unsigned num_sg = *p_num_sg;
1376 assert(num_sg <= max_num_sg);
1377
1378 if (!sz) {
1379 virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1380 goto out;
1381 }
1382
1383 while (sz) {
1384 hwaddr len = sz;
1385
1386 if (num_sg == max_num_sg) {
1387 virtio_error(vdev, "virtio: too many write descriptors in "
1388 "indirect table");
1389 goto out;
1390 }
1391
1392 iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1393 is_write ?
1394 DMA_DIRECTION_FROM_DEVICE :
1395 DMA_DIRECTION_TO_DEVICE,
1396 MEMTXATTRS_UNSPECIFIED);
1397 if (!iov[num_sg].iov_base) {
1398 virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1399 goto out;
1400 }
1401
1402 iov[num_sg].iov_len = len;
1403 addr[num_sg] = pa;
1404
1405 sz -= len;
1406 pa += len;
1407 num_sg++;
1408 }
1409 ok = true;
1410
1411 out:
1412 *p_num_sg = num_sg;
1413 return ok;
1414 }
1415
1416 /* Only used by error code paths before we have a VirtQueueElement (therefore
1417 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
1418 * yet.
1419 */
1420 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1421 struct iovec *iov)
1422 {
1423 unsigned int i;
1424
1425 for (i = 0; i < out_num + in_num; i++) {
1426 int is_write = i >= out_num;
1427
1428 cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1429 iov++;
1430 }
1431 }
1432
1433 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1434 hwaddr *addr, unsigned int num_sg,
1435 bool is_write)
1436 {
1437 unsigned int i;
1438 hwaddr len;
1439
1440 for (i = 0; i < num_sg; i++) {
1441 len = sg[i].iov_len;
1442 sg[i].iov_base = dma_memory_map(vdev->dma_as,
1443 addr[i], &len, is_write ?
1444 DMA_DIRECTION_FROM_DEVICE :
1445 DMA_DIRECTION_TO_DEVICE,
1446 MEMTXATTRS_UNSPECIFIED);
1447 if (!sg[i].iov_base) {
1448 error_report("virtio: error trying to map MMIO memory");
1449 exit(1);
1450 }
1451 if (len != sg[i].iov_len) {
1452 error_report("virtio: unexpected memory split");
1453 exit(1);
1454 }
1455 }
1456 }
1457
1458 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1459 {
1460 virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1461 virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1462 false);
1463 }
1464
1465 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1466 {
1467 VirtQueueElement *elem;
1468 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1469 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1470 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1471 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1472 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1473 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1474
1475 assert(sz >= sizeof(VirtQueueElement));
1476 elem = g_malloc(out_sg_end);
1477 trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1478 elem->out_num = out_num;
1479 elem->in_num = in_num;
1480 elem->in_addr = (void *)elem + in_addr_ofs;
1481 elem->out_addr = (void *)elem + out_addr_ofs;
1482 elem->in_sg = (void *)elem + in_sg_ofs;
1483 elem->out_sg = (void *)elem + out_sg_ofs;
1484 return elem;
1485 }
1486
1487 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1488 {
1489 unsigned int i, head, max;
1490 VRingMemoryRegionCaches *caches;
1491 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1492 MemoryRegionCache *desc_cache;
1493 int64_t len;
1494 VirtIODevice *vdev = vq->vdev;
1495 VirtQueueElement *elem = NULL;
1496 unsigned out_num, in_num, elem_entries;
1497 hwaddr addr[VIRTQUEUE_MAX_SIZE];
1498 struct iovec iov[VIRTQUEUE_MAX_SIZE];
1499 VRingDesc desc;
1500 int rc;
1501
1502 RCU_READ_LOCK_GUARD();
1503 if (virtio_queue_empty_rcu(vq)) {
1504 goto done;
1505 }
1506 /* Needed after virtio_queue_empty(), see comment in
1507 * virtqueue_num_heads(). */
1508 smp_rmb();
1509
1510 /* When we start there are none of either input nor output. */
1511 out_num = in_num = elem_entries = 0;
1512
1513 max = vq->vring.num;
1514
1515 if (vq->inuse >= vq->vring.num) {
1516 virtio_error(vdev, "Virtqueue size exceeded");
1517 goto done;
1518 }
1519
1520 if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1521 goto done;
1522 }
1523
1524 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1525 vring_set_avail_event(vq, vq->last_avail_idx);
1526 }
1527
1528 i = head;
1529
1530 caches = vring_get_region_caches(vq);
1531 if (!caches) {
1532 virtio_error(vdev, "Region caches not initialized");
1533 goto done;
1534 }
1535
1536 if (caches->desc.len < max * sizeof(VRingDesc)) {
1537 virtio_error(vdev, "Cannot map descriptor ring");
1538 goto done;
1539 }
1540
1541 desc_cache = &caches->desc;
1542 vring_split_desc_read(vdev, &desc, desc_cache, i);
1543 if (desc.flags & VRING_DESC_F_INDIRECT) {
1544 if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1545 virtio_error(vdev, "Invalid size for indirect buffer table");
1546 goto done;
1547 }
1548
1549 /* loop over the indirect descriptor table */
1550 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1551 desc.addr, desc.len, false);
1552 desc_cache = &indirect_desc_cache;
1553 if (len < desc.len) {
1554 virtio_error(vdev, "Cannot map indirect buffer");
1555 goto done;
1556 }
1557
1558 max = desc.len / sizeof(VRingDesc);
1559 i = 0;
1560 vring_split_desc_read(vdev, &desc, desc_cache, i);
1561 }
1562
1563 /* Collect all the descriptors */
1564 do {
1565 bool map_ok;
1566
1567 if (desc.flags & VRING_DESC_F_WRITE) {
1568 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1569 iov + out_num,
1570 VIRTQUEUE_MAX_SIZE - out_num, true,
1571 desc.addr, desc.len);
1572 } else {
1573 if (in_num) {
1574 virtio_error(vdev, "Incorrect order for descriptors");
1575 goto err_undo_map;
1576 }
1577 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1578 VIRTQUEUE_MAX_SIZE, false,
1579 desc.addr, desc.len);
1580 }
1581 if (!map_ok) {
1582 goto err_undo_map;
1583 }
1584
1585 /* If we've got too many, that implies a descriptor loop. */
1586 if (++elem_entries > max) {
1587 virtio_error(vdev, "Looped descriptor");
1588 goto err_undo_map;
1589 }
1590
1591 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1592 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1593
1594 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1595 goto err_undo_map;
1596 }
1597
1598 /* Now copy what we have collected and mapped */
1599 elem = virtqueue_alloc_element(sz, out_num, in_num);
1600 elem->index = head;
1601 elem->ndescs = 1;
1602 for (i = 0; i < out_num; i++) {
1603 elem->out_addr[i] = addr[i];
1604 elem->out_sg[i] = iov[i];
1605 }
1606 for (i = 0; i < in_num; i++) {
1607 elem->in_addr[i] = addr[out_num + i];
1608 elem->in_sg[i] = iov[out_num + i];
1609 }
1610
1611 vq->inuse++;
1612
1613 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1614 done:
1615 address_space_cache_destroy(&indirect_desc_cache);
1616
1617 return elem;
1618
1619 err_undo_map:
1620 virtqueue_undo_map_desc(out_num, in_num, iov);
1621 goto done;
1622 }
1623
1624 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1625 {
1626 unsigned int i, max;
1627 VRingMemoryRegionCaches *caches;
1628 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1629 MemoryRegionCache *desc_cache;
1630 int64_t len;
1631 VirtIODevice *vdev = vq->vdev;
1632 VirtQueueElement *elem = NULL;
1633 unsigned out_num, in_num, elem_entries;
1634 hwaddr addr[VIRTQUEUE_MAX_SIZE];
1635 struct iovec iov[VIRTQUEUE_MAX_SIZE];
1636 VRingPackedDesc desc;
1637 uint16_t id;
1638 int rc;
1639
1640 RCU_READ_LOCK_GUARD();
1641 if (virtio_queue_packed_empty_rcu(vq)) {
1642 goto done;
1643 }
1644
1645 /* When we start there are none of either input nor output. */
1646 out_num = in_num = elem_entries = 0;
1647
1648 max = vq->vring.num;
1649
1650 if (vq->inuse >= vq->vring.num) {
1651 virtio_error(vdev, "Virtqueue size exceeded");
1652 goto done;
1653 }
1654
1655 i = vq->last_avail_idx;
1656
1657 caches = vring_get_region_caches(vq);
1658 if (!caches) {
1659 virtio_error(vdev, "Region caches not initialized");
1660 goto done;
1661 }
1662
1663 if (caches->desc.len < max * sizeof(VRingDesc)) {
1664 virtio_error(vdev, "Cannot map descriptor ring");
1665 goto done;
1666 }
1667
1668 desc_cache = &caches->desc;
1669 vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1670 id = desc.id;
1671 if (desc.flags & VRING_DESC_F_INDIRECT) {
1672 if (desc.len % sizeof(VRingPackedDesc)) {
1673 virtio_error(vdev, "Invalid size for indirect buffer table");
1674 goto done;
1675 }
1676
1677 /* loop over the indirect descriptor table */
1678 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1679 desc.addr, desc.len, false);
1680 desc_cache = &indirect_desc_cache;
1681 if (len < desc.len) {
1682 virtio_error(vdev, "Cannot map indirect buffer");
1683 goto done;
1684 }
1685
1686 max = desc.len / sizeof(VRingPackedDesc);
1687 i = 0;
1688 vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1689 }
1690
1691 /* Collect all the descriptors */
1692 do {
1693 bool map_ok;
1694
1695 if (desc.flags & VRING_DESC_F_WRITE) {
1696 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1697 iov + out_num,
1698 VIRTQUEUE_MAX_SIZE - out_num, true,
1699 desc.addr, desc.len);
1700 } else {
1701 if (in_num) {
1702 virtio_error(vdev, "Incorrect order for descriptors");
1703 goto err_undo_map;
1704 }
1705 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1706 VIRTQUEUE_MAX_SIZE, false,
1707 desc.addr, desc.len);
1708 }
1709 if (!map_ok) {
1710 goto err_undo_map;
1711 }
1712
1713 /* If we've got too many, that implies a descriptor loop. */
1714 if (++elem_entries > max) {
1715 virtio_error(vdev, "Looped descriptor");
1716 goto err_undo_map;
1717 }
1718
1719 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1720 desc_cache ==
1721 &indirect_desc_cache);
1722 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1723
1724 /* Now copy what we have collected and mapped */
1725 elem = virtqueue_alloc_element(sz, out_num, in_num);
1726 for (i = 0; i < out_num; i++) {
1727 elem->out_addr[i] = addr[i];
1728 elem->out_sg[i] = iov[i];
1729 }
1730 for (i = 0; i < in_num; i++) {
1731 elem->in_addr[i] = addr[out_num + i];
1732 elem->in_sg[i] = iov[out_num + i];
1733 }
1734
1735 elem->index = id;
1736 elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1737 vq->last_avail_idx += elem->ndescs;
1738 vq->inuse += elem->ndescs;
1739
1740 if (vq->last_avail_idx >= vq->vring.num) {
1741 vq->last_avail_idx -= vq->vring.num;
1742 vq->last_avail_wrap_counter ^= 1;
1743 }
1744
1745 vq->shadow_avail_idx = vq->last_avail_idx;
1746 vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1747
1748 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1749 done:
1750 address_space_cache_destroy(&indirect_desc_cache);
1751
1752 return elem;
1753
1754 err_undo_map:
1755 virtqueue_undo_map_desc(out_num, in_num, iov);
1756 goto done;
1757 }
1758
1759 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1760 {
1761 if (virtio_device_disabled(vq->vdev)) {
1762 return NULL;
1763 }
1764
1765 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1766 return virtqueue_packed_pop(vq, sz);
1767 } else {
1768 return virtqueue_split_pop(vq, sz);
1769 }
1770 }
1771
1772 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1773 {
1774 VRingMemoryRegionCaches *caches;
1775 MemoryRegionCache *desc_cache;
1776 unsigned int dropped = 0;
1777 VirtQueueElement elem = {};
1778 VirtIODevice *vdev = vq->vdev;
1779 VRingPackedDesc desc;
1780
1781 RCU_READ_LOCK_GUARD();
1782
1783 caches = vring_get_region_caches(vq);
1784 if (!caches) {
1785 return 0;
1786 }
1787
1788 desc_cache = &caches->desc;
1789
1790 virtio_queue_set_notification(vq, 0);
1791
1792 while (vq->inuse < vq->vring.num) {
1793 unsigned int idx = vq->last_avail_idx;
1794 /*
1795 * works similar to virtqueue_pop but does not map buffers
1796 * and does not allocate any memory.
1797 */
1798 vring_packed_desc_read(vdev, &desc, desc_cache,
1799 vq->last_avail_idx , true);
1800 if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1801 break;
1802 }
1803 elem.index = desc.id;
1804 elem.ndescs = 1;
1805 while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1806 vq->vring.num, &idx, false)) {
1807 ++elem.ndescs;
1808 }
1809 /*
1810 * immediately push the element, nothing to unmap
1811 * as both in_num and out_num are set to 0.
1812 */
1813 virtqueue_push(vq, &elem, 0);
1814 dropped++;
1815 vq->last_avail_idx += elem.ndescs;
1816 if (vq->last_avail_idx >= vq->vring.num) {
1817 vq->last_avail_idx -= vq->vring.num;
1818 vq->last_avail_wrap_counter ^= 1;
1819 }
1820 }
1821
1822 return dropped;
1823 }
1824
1825 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1826 {
1827 unsigned int dropped = 0;
1828 VirtQueueElement elem = {};
1829 VirtIODevice *vdev = vq->vdev;
1830 bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1831
1832 while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1833 /* works similar to virtqueue_pop but does not map buffers
1834 * and does not allocate any memory */
1835 smp_rmb();
1836 if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1837 break;
1838 }
1839 vq->inuse++;
1840 vq->last_avail_idx++;
1841 if (fEventIdx) {
1842 vring_set_avail_event(vq, vq->last_avail_idx);
1843 }
1844 /* immediately push the element, nothing to unmap
1845 * as both in_num and out_num are set to 0 */
1846 virtqueue_push(vq, &elem, 0);
1847 dropped++;
1848 }
1849
1850 return dropped;
1851 }
1852
1853 /* virtqueue_drop_all:
1854 * @vq: The #VirtQueue
1855 * Drops all queued buffers and indicates them to the guest
1856 * as if they are done. Useful when buffers can not be
1857 * processed but must be returned to the guest.
1858 */
1859 unsigned int virtqueue_drop_all(VirtQueue *vq)
1860 {
1861 struct VirtIODevice *vdev = vq->vdev;
1862
1863 if (virtio_device_disabled(vq->vdev)) {
1864 return 0;
1865 }
1866
1867 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1868 return virtqueue_packed_drop_all(vq);
1869 } else {
1870 return virtqueue_split_drop_all(vq);
1871 }
1872 }
1873
1874 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1875 * it is what QEMU has always done by mistake. We can change it sooner
1876 * or later by bumping the version number of the affected vm states.
1877 * In the meanwhile, since the in-memory layout of VirtQueueElement
1878 * has changed, we need to marshal to and from the layout that was
1879 * used before the change.
1880 */
1881 typedef struct VirtQueueElementOld {
1882 unsigned int index;
1883 unsigned int out_num;
1884 unsigned int in_num;
1885 hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1886 hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1887 struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1888 struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1889 } VirtQueueElementOld;
1890
1891 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1892 {
1893 VirtQueueElement *elem;
1894 VirtQueueElementOld data;
1895 int i;
1896
1897 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1898
1899 /* TODO: teach all callers that this can fail, and return failure instead
1900 * of asserting here.
1901 * This is just one thing (there are probably more) that must be
1902 * fixed before we can allow NDEBUG compilation.
1903 */
1904 assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1905 assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1906
1907 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1908 elem->index = data.index;
1909
1910 for (i = 0; i < elem->in_num; i++) {
1911 elem->in_addr[i] = data.in_addr[i];
1912 }
1913
1914 for (i = 0; i < elem->out_num; i++) {
1915 elem->out_addr[i] = data.out_addr[i];
1916 }
1917
1918 for (i = 0; i < elem->in_num; i++) {
1919 /* Base is overwritten by virtqueue_map. */
1920 elem->in_sg[i].iov_base = 0;
1921 elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1922 }
1923
1924 for (i = 0; i < elem->out_num; i++) {
1925 /* Base is overwritten by virtqueue_map. */
1926 elem->out_sg[i].iov_base = 0;
1927 elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1928 }
1929
1930 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1931 qemu_get_be32s(f, &elem->ndescs);
1932 }
1933
1934 virtqueue_map(vdev, elem);
1935 return elem;
1936 }
1937
1938 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1939 VirtQueueElement *elem)
1940 {
1941 VirtQueueElementOld data;
1942 int i;
1943
1944 memset(&data, 0, sizeof(data));
1945 data.index = elem->index;
1946 data.in_num = elem->in_num;
1947 data.out_num = elem->out_num;
1948
1949 for (i = 0; i < elem->in_num; i++) {
1950 data.in_addr[i] = elem->in_addr[i];
1951 }
1952
1953 for (i = 0; i < elem->out_num; i++) {
1954 data.out_addr[i] = elem->out_addr[i];
1955 }
1956
1957 for (i = 0; i < elem->in_num; i++) {
1958 /* Base is overwritten by virtqueue_map when loading. Do not
1959 * save it, as it would leak the QEMU address space layout. */
1960 data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1961 }
1962
1963 for (i = 0; i < elem->out_num; i++) {
1964 /* Do not save iov_base as above. */
1965 data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1966 }
1967
1968 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1969 qemu_put_be32s(f, &elem->ndescs);
1970 }
1971
1972 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1973 }
1974
1975 /* virtio device */
1976 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1977 {
1978 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1979 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1980
1981 if (virtio_device_disabled(vdev)) {
1982 return;
1983 }
1984
1985 if (k->notify) {
1986 k->notify(qbus->parent, vector);
1987 }
1988 }
1989
1990 void virtio_update_irq(VirtIODevice *vdev)
1991 {
1992 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1993 }
1994
1995 static int virtio_validate_features(VirtIODevice *vdev)
1996 {
1997 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1998
1999 if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
2000 !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2001 return -EFAULT;
2002 }
2003
2004 if (k->validate_features) {
2005 return k->validate_features(vdev);
2006 } else {
2007 return 0;
2008 }
2009 }
2010
2011 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2012 {
2013 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2014 trace_virtio_set_status(vdev, val);
2015
2016 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2017 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2018 val & VIRTIO_CONFIG_S_FEATURES_OK) {
2019 int ret = virtio_validate_features(vdev);
2020
2021 if (ret) {
2022 return ret;
2023 }
2024 }
2025 }
2026
2027 if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2028 (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2029 virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2030 }
2031
2032 if (k->set_status) {
2033 k->set_status(vdev, val);
2034 }
2035 vdev->status = val;
2036
2037 return 0;
2038 }
2039
2040 static enum virtio_device_endian virtio_default_endian(void)
2041 {
2042 if (target_words_bigendian()) {
2043 return VIRTIO_DEVICE_ENDIAN_BIG;
2044 } else {
2045 return VIRTIO_DEVICE_ENDIAN_LITTLE;
2046 }
2047 }
2048
2049 static enum virtio_device_endian virtio_current_cpu_endian(void)
2050 {
2051 if (cpu_virtio_is_big_endian(current_cpu)) {
2052 return VIRTIO_DEVICE_ENDIAN_BIG;
2053 } else {
2054 return VIRTIO_DEVICE_ENDIAN_LITTLE;
2055 }
2056 }
2057
2058 static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2059 {
2060 vdev->vq[i].vring.desc = 0;
2061 vdev->vq[i].vring.avail = 0;
2062 vdev->vq[i].vring.used = 0;
2063 vdev->vq[i].last_avail_idx = 0;
2064 vdev->vq[i].shadow_avail_idx = 0;
2065 vdev->vq[i].used_idx = 0;
2066 vdev->vq[i].last_avail_wrap_counter = true;
2067 vdev->vq[i].shadow_avail_wrap_counter = true;
2068 vdev->vq[i].used_wrap_counter = true;
2069 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2070 vdev->vq[i].signalled_used = 0;
2071 vdev->vq[i].signalled_used_valid = false;
2072 vdev->vq[i].notification = true;
2073 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2074 vdev->vq[i].inuse = 0;
2075 virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2076 }
2077
2078 void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2079 {
2080 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2081
2082 if (k->queue_reset) {
2083 k->queue_reset(vdev, queue_index);
2084 }
2085
2086 __virtio_queue_reset(vdev, queue_index);
2087 }
2088
2089 void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2090 {
2091 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2092
2093 /*
2094 * TODO: Seabios is currently out of spec and triggering this error.
2095 * So this needs to be fixed in Seabios, then this can
2096 * be re-enabled for new machine types only, and also after
2097 * being converted to LOG_GUEST_ERROR.
2098 *
2099 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2100 error_report("queue_enable is only suppported in devices of virtio "
2101 "1.0 or later.");
2102 }
2103 */
2104
2105 if (k->queue_enable) {
2106 k->queue_enable(vdev, queue_index);
2107 }
2108 }
2109
2110 void virtio_reset(void *opaque)
2111 {
2112 VirtIODevice *vdev = opaque;
2113 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2114 int i;
2115
2116 virtio_set_status(vdev, 0);
2117 if (current_cpu) {
2118 /* Guest initiated reset */
2119 vdev->device_endian = virtio_current_cpu_endian();
2120 } else {
2121 /* System reset */
2122 vdev->device_endian = virtio_default_endian();
2123 }
2124
2125 if (k->reset) {
2126 k->reset(vdev);
2127 }
2128
2129 vdev->start_on_kick = false;
2130 vdev->started = false;
2131 vdev->broken = false;
2132 vdev->guest_features = 0;
2133 vdev->queue_sel = 0;
2134 vdev->status = 0;
2135 vdev->disabled = false;
2136 qatomic_set(&vdev->isr, 0);
2137 vdev->config_vector = VIRTIO_NO_VECTOR;
2138 virtio_notify_vector(vdev, vdev->config_vector);
2139
2140 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2141 __virtio_queue_reset(vdev, i);
2142 }
2143 }
2144
2145 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2146 {
2147 if (!vdev->vq[n].vring.num) {
2148 return;
2149 }
2150 vdev->vq[n].vring.desc = addr;
2151 virtio_queue_update_rings(vdev, n);
2152 }
2153
2154 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2155 {
2156 return vdev->vq[n].vring.desc;
2157 }
2158
2159 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2160 hwaddr avail, hwaddr used)
2161 {
2162 if (!vdev->vq[n].vring.num) {
2163 return;
2164 }
2165 vdev->vq[n].vring.desc = desc;
2166 vdev->vq[n].vring.avail = avail;
2167 vdev->vq[n].vring.used = used;
2168 virtio_init_region_cache(vdev, n);
2169 }
2170
2171 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2172 {
2173 /* Don't allow guest to flip queue between existent and
2174 * nonexistent states, or to set it to an invalid size.
2175 */
2176 if (!!num != !!vdev->vq[n].vring.num ||
2177 num > VIRTQUEUE_MAX_SIZE ||
2178 num < 0) {
2179 return;
2180 }
2181 vdev->vq[n].vring.num = num;
2182 }
2183
2184 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2185 {
2186 return QLIST_FIRST(&vdev->vector_queues[vector]);
2187 }
2188
2189 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2190 {
2191 return QLIST_NEXT(vq, node);
2192 }
2193
2194 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2195 {
2196 return vdev->vq[n].vring.num;
2197 }
2198
2199 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2200 {
2201 return vdev->vq[n].vring.num_default;
2202 }
2203
2204 int virtio_get_num_queues(VirtIODevice *vdev)
2205 {
2206 int i;
2207
2208 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2209 if (!virtio_queue_get_num(vdev, i)) {
2210 break;
2211 }
2212 }
2213
2214 return i;
2215 }
2216
2217 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2218 {
2219 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2220 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2221
2222 /* virtio-1 compliant devices cannot change the alignment */
2223 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2224 error_report("tried to modify queue alignment for virtio-1 device");
2225 return;
2226 }
2227 /* Check that the transport told us it was going to do this
2228 * (so a buggy transport will immediately assert rather than
2229 * silently failing to migrate this state)
2230 */
2231 assert(k->has_variable_vring_alignment);
2232
2233 if (align) {
2234 vdev->vq[n].vring.align = align;
2235 virtio_queue_update_rings(vdev, n);
2236 }
2237 }
2238
2239 static void virtio_queue_notify_vq(VirtQueue *vq)
2240 {
2241 if (vq->vring.desc && vq->handle_output) {
2242 VirtIODevice *vdev = vq->vdev;
2243
2244 if (unlikely(vdev->broken)) {
2245 return;
2246 }
2247
2248 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2249 vq->handle_output(vdev, vq);
2250
2251 if (unlikely(vdev->start_on_kick)) {
2252 virtio_set_started(vdev, true);
2253 }
2254 }
2255 }
2256
2257 void virtio_queue_notify(VirtIODevice *vdev, int n)
2258 {
2259 VirtQueue *vq = &vdev->vq[n];
2260
2261 if (unlikely(!vq->vring.desc || vdev->broken)) {
2262 return;
2263 }
2264
2265 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2266 if (vq->host_notifier_enabled) {
2267 event_notifier_set(&vq->host_notifier);
2268 } else if (vq->handle_output) {
2269 vq->handle_output(vdev, vq);
2270
2271 if (unlikely(vdev->start_on_kick)) {
2272 virtio_set_started(vdev, true);
2273 }
2274 }
2275 }
2276
2277 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2278 {
2279 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2280 VIRTIO_NO_VECTOR;
2281 }
2282
2283 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2284 {
2285 VirtQueue *vq = &vdev->vq[n];
2286
2287 if (n < VIRTIO_QUEUE_MAX) {
2288 if (vdev->vector_queues &&
2289 vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2290 QLIST_REMOVE(vq, node);
2291 }
2292 vdev->vq[n].vector = vector;
2293 if (vdev->vector_queues &&
2294 vector != VIRTIO_NO_VECTOR) {
2295 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2296 }
2297 }
2298 }
2299
2300 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2301 VirtIOHandleOutput handle_output)
2302 {
2303 int i;
2304
2305 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2306 if (vdev->vq[i].vring.num == 0)
2307 break;
2308 }
2309
2310 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2311 abort();
2312
2313 vdev->vq[i].vring.num = queue_size;
2314 vdev->vq[i].vring.num_default = queue_size;
2315 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2316 vdev->vq[i].handle_output = handle_output;
2317 vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2318
2319 return &vdev->vq[i];
2320 }
2321
2322 void virtio_delete_queue(VirtQueue *vq)
2323 {
2324 vq->vring.num = 0;
2325 vq->vring.num_default = 0;
2326 vq->handle_output = NULL;
2327 g_free(vq->used_elems);
2328 vq->used_elems = NULL;
2329 virtio_virtqueue_reset_region_cache(vq);
2330 }
2331
2332 void virtio_del_queue(VirtIODevice *vdev, int n)
2333 {
2334 if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2335 abort();
2336 }
2337
2338 virtio_delete_queue(&vdev->vq[n]);
2339 }
2340
2341 static void virtio_set_isr(VirtIODevice *vdev, int value)
2342 {
2343 uint8_t old = qatomic_read(&vdev->isr);
2344
2345 /* Do not write ISR if it does not change, so that its cacheline remains
2346 * shared in the common case where the guest does not read it.
2347 */
2348 if ((old & value) != value) {
2349 qatomic_or(&vdev->isr, value);
2350 }
2351 }
2352
2353 /* Called within rcu_read_lock(). */
2354 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2355 {
2356 uint16_t old, new;
2357 bool v;
2358 /* We need to expose used array entries before checking used event. */
2359 smp_mb();
2360 /* Always notify when queue is empty (when feature acknowledge) */
2361 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2362 !vq->inuse && virtio_queue_empty(vq)) {
2363 return true;
2364 }
2365
2366 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2367 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2368 }
2369
2370 v = vq->signalled_used_valid;
2371 vq->signalled_used_valid = true;
2372 old = vq->signalled_used;
2373 new = vq->signalled_used = vq->used_idx;
2374 return !v || vring_need_event(vring_get_used_event(vq), new, old);
2375 }
2376
2377 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2378 uint16_t off_wrap, uint16_t new,
2379 uint16_t old)
2380 {
2381 int off = off_wrap & ~(1 << 15);
2382
2383 if (wrap != off_wrap >> 15) {
2384 off -= vq->vring.num;
2385 }
2386
2387 return vring_need_event(off, new, old);
2388 }
2389
2390 /* Called within rcu_read_lock(). */
2391 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2392 {
2393 VRingPackedDescEvent e;
2394 uint16_t old, new;
2395 bool v;
2396 VRingMemoryRegionCaches *caches;
2397
2398 caches = vring_get_region_caches(vq);
2399 if (!caches) {
2400 return false;
2401 }
2402
2403 vring_packed_event_read(vdev, &caches->avail, &e);
2404
2405 old = vq->signalled_used;
2406 new = vq->signalled_used = vq->used_idx;
2407 v = vq->signalled_used_valid;
2408 vq->signalled_used_valid = true;
2409
2410 if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2411 return false;
2412 } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2413 return true;
2414 }
2415
2416 return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2417 e.off_wrap, new, old);
2418 }
2419
2420 /* Called within rcu_read_lock(). */
2421 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2422 {
2423 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2424 return virtio_packed_should_notify(vdev, vq);
2425 } else {
2426 return virtio_split_should_notify(vdev, vq);
2427 }
2428 }
2429
2430 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2431 {
2432 WITH_RCU_READ_LOCK_GUARD() {
2433 if (!virtio_should_notify(vdev, vq)) {
2434 return;
2435 }
2436 }
2437
2438 trace_virtio_notify_irqfd(vdev, vq);
2439
2440 /*
2441 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2442 * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2443 * incorrectly polling this bit during crashdump and hibernation
2444 * in MSI mode, causing a hang if this bit is never updated.
2445 * Recent releases of Windows do not really shut down, but rather
2446 * log out and hibernate to make the next startup faster. Hence,
2447 * this manifested as a more serious hang during shutdown with
2448 *
2449 * Next driver release from 2016 fixed this problem, so working around it
2450 * is not a must, but it's easy to do so let's do it here.
2451 *
2452 * Note: it's safe to update ISR from any thread as it was switched
2453 * to an atomic operation.
2454 */
2455 virtio_set_isr(vq->vdev, 0x1);
2456 event_notifier_set(&vq->guest_notifier);
2457 }
2458
2459 static void virtio_irq(VirtQueue *vq)
2460 {
2461 virtio_set_isr(vq->vdev, 0x1);
2462 virtio_notify_vector(vq->vdev, vq->vector);
2463 }
2464
2465 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2466 {
2467 WITH_RCU_READ_LOCK_GUARD() {
2468 if (!virtio_should_notify(vdev, vq)) {
2469 return;
2470 }
2471 }
2472
2473 trace_virtio_notify(vdev, vq);
2474 virtio_irq(vq);
2475 }
2476
2477 void virtio_notify_config(VirtIODevice *vdev)
2478 {
2479 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2480 return;
2481
2482 virtio_set_isr(vdev, 0x3);
2483 vdev->generation++;
2484 virtio_notify_vector(vdev, vdev->config_vector);
2485 }
2486
2487 static bool virtio_device_endian_needed(void *opaque)
2488 {
2489 VirtIODevice *vdev = opaque;
2490
2491 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2492 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2493 return vdev->device_endian != virtio_default_endian();
2494 }
2495 /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2496 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2497 }
2498
2499 static bool virtio_64bit_features_needed(void *opaque)
2500 {
2501 VirtIODevice *vdev = opaque;
2502
2503 return (vdev->host_features >> 32) != 0;
2504 }
2505
2506 static bool virtio_virtqueue_needed(void *opaque)
2507 {
2508 VirtIODevice *vdev = opaque;
2509
2510 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2511 }
2512
2513 static bool virtio_packed_virtqueue_needed(void *opaque)
2514 {
2515 VirtIODevice *vdev = opaque;
2516
2517 return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2518 }
2519
2520 static bool virtio_ringsize_needed(void *opaque)
2521 {
2522 VirtIODevice *vdev = opaque;
2523 int i;
2524
2525 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2526 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2527 return true;
2528 }
2529 }
2530 return false;
2531 }
2532
2533 static bool virtio_extra_state_needed(void *opaque)
2534 {
2535 VirtIODevice *vdev = opaque;
2536 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2537 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2538
2539 return k->has_extra_state &&
2540 k->has_extra_state(qbus->parent);
2541 }
2542
2543 static bool virtio_broken_needed(void *opaque)
2544 {
2545 VirtIODevice *vdev = opaque;
2546
2547 return vdev->broken;
2548 }
2549
2550 static bool virtio_started_needed(void *opaque)
2551 {
2552 VirtIODevice *vdev = opaque;
2553
2554 return vdev->started;
2555 }
2556
2557 static bool virtio_disabled_needed(void *opaque)
2558 {
2559 VirtIODevice *vdev = opaque;
2560
2561 return vdev->disabled;
2562 }
2563
2564 static const VMStateDescription vmstate_virtqueue = {
2565 .name = "virtqueue_state",
2566 .version_id = 1,
2567 .minimum_version_id = 1,
2568 .fields = (VMStateField[]) {
2569 VMSTATE_UINT64(vring.avail, struct VirtQueue),
2570 VMSTATE_UINT64(vring.used, struct VirtQueue),
2571 VMSTATE_END_OF_LIST()
2572 }
2573 };
2574
2575 static const VMStateDescription vmstate_packed_virtqueue = {
2576 .name = "packed_virtqueue_state",
2577 .version_id = 1,
2578 .minimum_version_id = 1,
2579 .fields = (VMStateField[]) {
2580 VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2581 VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2582 VMSTATE_UINT16(used_idx, struct VirtQueue),
2583 VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2584 VMSTATE_UINT32(inuse, struct VirtQueue),
2585 VMSTATE_END_OF_LIST()
2586 }
2587 };
2588
2589 static const VMStateDescription vmstate_virtio_virtqueues = {
2590 .name = "virtio/virtqueues",
2591 .version_id = 1,
2592 .minimum_version_id = 1,
2593 .needed = &virtio_virtqueue_needed,
2594 .fields = (VMStateField[]) {
2595 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2596 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2597 VMSTATE_END_OF_LIST()
2598 }
2599 };
2600
2601 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2602 .name = "virtio/packed_virtqueues",
2603 .version_id = 1,
2604 .minimum_version_id = 1,
2605 .needed = &virtio_packed_virtqueue_needed,
2606 .fields = (VMStateField[]) {
2607 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2608 VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2609 VMSTATE_END_OF_LIST()
2610 }
2611 };
2612
2613 static const VMStateDescription vmstate_ringsize = {
2614 .name = "ringsize_state",
2615 .version_id = 1,
2616 .minimum_version_id = 1,
2617 .fields = (VMStateField[]) {
2618 VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2619 VMSTATE_END_OF_LIST()
2620 }
2621 };
2622
2623 static const VMStateDescription vmstate_virtio_ringsize = {
2624 .name = "virtio/ringsize",
2625 .version_id = 1,
2626 .minimum_version_id = 1,
2627 .needed = &virtio_ringsize_needed,
2628 .fields = (VMStateField[]) {
2629 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2630 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2631 VMSTATE_END_OF_LIST()
2632 }
2633 };
2634
2635 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2636 const VMStateField *field)
2637 {
2638 VirtIODevice *vdev = pv;
2639 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2640 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2641
2642 if (!k->load_extra_state) {
2643 return -1;
2644 } else {
2645 return k->load_extra_state(qbus->parent, f);
2646 }
2647 }
2648
2649 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2650 const VMStateField *field, JSONWriter *vmdesc)
2651 {
2652 VirtIODevice *vdev = pv;
2653 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2654 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2655
2656 k->save_extra_state(qbus->parent, f);
2657 return 0;
2658 }
2659
2660 static const VMStateInfo vmstate_info_extra_state = {
2661 .name = "virtqueue_extra_state",
2662 .get = get_extra_state,
2663 .put = put_extra_state,
2664 };
2665
2666 static const VMStateDescription vmstate_virtio_extra_state = {
2667 .name = "virtio/extra_state",
2668 .version_id = 1,
2669 .minimum_version_id = 1,
2670 .needed = &virtio_extra_state_needed,
2671 .fields = (VMStateField[]) {
2672 {
2673 .name = "extra_state",
2674 .version_id = 0,
2675 .field_exists = NULL,
2676 .size = 0,
2677 .info = &vmstate_info_extra_state,
2678 .flags = VMS_SINGLE,
2679 .offset = 0,
2680 },
2681 VMSTATE_END_OF_LIST()
2682 }
2683 };
2684
2685 static const VMStateDescription vmstate_virtio_device_endian = {
2686 .name = "virtio/device_endian",
2687 .version_id = 1,
2688 .minimum_version_id = 1,
2689 .needed = &virtio_device_endian_needed,
2690 .fields = (VMStateField[]) {
2691 VMSTATE_UINT8(device_endian, VirtIODevice),
2692 VMSTATE_END_OF_LIST()
2693 }
2694 };
2695
2696 static const VMStateDescription vmstate_virtio_64bit_features = {
2697 .name = "virtio/64bit_features",
2698 .version_id = 1,
2699 .minimum_version_id = 1,
2700 .needed = &virtio_64bit_features_needed,
2701 .fields = (VMStateField[]) {
2702 VMSTATE_UINT64(guest_features, VirtIODevice),
2703 VMSTATE_END_OF_LIST()
2704 }
2705 };
2706
2707 static const VMStateDescription vmstate_virtio_broken = {
2708 .name = "virtio/broken",
2709 .version_id = 1,
2710 .minimum_version_id = 1,
2711 .needed = &virtio_broken_needed,
2712 .fields = (VMStateField[]) {
2713 VMSTATE_BOOL(broken, VirtIODevice),
2714 VMSTATE_END_OF_LIST()
2715 }
2716 };
2717
2718 static const VMStateDescription vmstate_virtio_started = {
2719 .name = "virtio/started",
2720 .version_id = 1,
2721 .minimum_version_id = 1,
2722 .needed = &virtio_started_needed,
2723 .fields = (VMStateField[]) {
2724 VMSTATE_BOOL(started, VirtIODevice),
2725 VMSTATE_END_OF_LIST()
2726 }
2727 };
2728
2729 static const VMStateDescription vmstate_virtio_disabled = {
2730 .name = "virtio/disabled",
2731 .version_id = 1,
2732 .minimum_version_id = 1,
2733 .needed = &virtio_disabled_needed,
2734 .fields = (VMStateField[]) {
2735 VMSTATE_BOOL(disabled, VirtIODevice),
2736 VMSTATE_END_OF_LIST()
2737 }
2738 };
2739
2740 static const VMStateDescription vmstate_virtio = {
2741 .name = "virtio",
2742 .version_id = 1,
2743 .minimum_version_id = 1,
2744 .fields = (VMStateField[]) {
2745 VMSTATE_END_OF_LIST()
2746 },
2747 .subsections = (const VMStateDescription*[]) {
2748 &vmstate_virtio_device_endian,
2749 &vmstate_virtio_64bit_features,
2750 &vmstate_virtio_virtqueues,
2751 &vmstate_virtio_ringsize,
2752 &vmstate_virtio_broken,
2753 &vmstate_virtio_extra_state,
2754 &vmstate_virtio_started,
2755 &vmstate_virtio_packed_virtqueues,
2756 &vmstate_virtio_disabled,
2757 NULL
2758 }
2759 };
2760
2761 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2762 {
2763 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2764 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2765 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2766 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2767 int i;
2768
2769 if (k->save_config) {
2770 k->save_config(qbus->parent, f);
2771 }
2772
2773 qemu_put_8s(f, &vdev->status);
2774 qemu_put_8s(f, &vdev->isr);
2775 qemu_put_be16s(f, &vdev->queue_sel);
2776 qemu_put_be32s(f, &guest_features_lo);
2777 qemu_put_be32(f, vdev->config_len);
2778 qemu_put_buffer(f, vdev->config, vdev->config_len);
2779
2780 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2781 if (vdev->vq[i].vring.num == 0)
2782 break;
2783 }
2784
2785 qemu_put_be32(f, i);
2786
2787 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2788 if (vdev->vq[i].vring.num == 0)
2789 break;
2790
2791 qemu_put_be32(f, vdev->vq[i].vring.num);
2792 if (k->has_variable_vring_alignment) {
2793 qemu_put_be32(f, vdev->vq[i].vring.align);
2794 }
2795 /*
2796 * Save desc now, the rest of the ring addresses are saved in
2797 * subsections for VIRTIO-1 devices.
2798 */
2799 qemu_put_be64(f, vdev->vq[i].vring.desc);
2800 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2801 if (k->save_queue) {
2802 k->save_queue(qbus->parent, i, f);
2803 }
2804 }
2805
2806 if (vdc->save != NULL) {
2807 vdc->save(vdev, f);
2808 }
2809
2810 if (vdc->vmsd) {
2811 int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2812 if (ret) {
2813 return ret;
2814 }
2815 }
2816
2817 /* Subsections */
2818 return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2819 }
2820
2821 /* A wrapper for use as a VMState .put function */
2822 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2823 const VMStateField *field, JSONWriter *vmdesc)
2824 {
2825 return virtio_save(VIRTIO_DEVICE(opaque), f);
2826 }
2827
2828 /* A wrapper for use as a VMState .get function */
2829 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2830 const VMStateField *field)
2831 {
2832 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2833 DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2834
2835 return virtio_load(vdev, f, dc->vmsd->version_id);
2836 }
2837
2838 const VMStateInfo virtio_vmstate_info = {
2839 .name = "virtio",
2840 .get = virtio_device_get,
2841 .put = virtio_device_put,
2842 };
2843
2844 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2845 {
2846 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2847 bool bad = (val & ~(vdev->host_features)) != 0;
2848
2849 val &= vdev->host_features;
2850 if (k->set_features) {
2851 k->set_features(vdev, val);
2852 }
2853 vdev->guest_features = val;
2854 return bad ? -1 : 0;
2855 }
2856
2857 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2858 {
2859 int ret;
2860 /*
2861 * The driver must not attempt to set features after feature negotiation
2862 * has finished.
2863 */
2864 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2865 return -EINVAL;
2866 }
2867
2868 if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
2869 qemu_log_mask(LOG_GUEST_ERROR,
2870 "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
2871 __func__, vdev->name);
2872 }
2873
2874 ret = virtio_set_features_nocheck(vdev, val);
2875 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2876 /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
2877 int i;
2878 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2879 if (vdev->vq[i].vring.num != 0) {
2880 virtio_init_region_cache(vdev, i);
2881 }
2882 }
2883 }
2884 if (!ret) {
2885 if (!virtio_device_started(vdev, vdev->status) &&
2886 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2887 vdev->start_on_kick = true;
2888 }
2889 }
2890 return ret;
2891 }
2892
2893 size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
2894 uint64_t host_features)
2895 {
2896 size_t config_size = params->min_size;
2897 const VirtIOFeature *feature_sizes = params->feature_sizes;
2898 size_t i;
2899
2900 for (i = 0; feature_sizes[i].flags != 0; i++) {
2901 if (host_features & feature_sizes[i].flags) {
2902 config_size = MAX(feature_sizes[i].end, config_size);
2903 }
2904 }
2905
2906 assert(config_size <= params->max_size);
2907 return config_size;
2908 }
2909
2910 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2911 {
2912 int i, ret;
2913 int32_t config_len;
2914 uint32_t num;
2915 uint32_t features;
2916 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2917 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2918 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2919
2920 /*
2921 * We poison the endianness to ensure it does not get used before
2922 * subsections have been loaded.
2923 */
2924 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
2925
2926 if (k->load_config) {
2927 ret = k->load_config(qbus->parent, f);
2928 if (ret)
2929 return ret;
2930 }
2931
2932 qemu_get_8s(f, &vdev->status);
2933 qemu_get_8s(f, &vdev->isr);
2934 qemu_get_be16s(f, &vdev->queue_sel);
2935 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
2936 return -1;
2937 }
2938 qemu_get_be32s(f, &features);
2939
2940 /*
2941 * Temporarily set guest_features low bits - needed by
2942 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
2943 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
2944 *
2945 * Note: devices should always test host features in future - don't create
2946 * new dependencies like this.
2947 */
2948 vdev->guest_features = features;
2949
2950 config_len = qemu_get_be32(f);
2951
2952 /*
2953 * There are cases where the incoming config can be bigger or smaller
2954 * than what we have; so load what we have space for, and skip
2955 * any excess that's in the stream.
2956 */
2957 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
2958
2959 while (config_len > vdev->config_len) {
2960 qemu_get_byte(f);
2961 config_len--;
2962 }
2963
2964 num = qemu_get_be32(f);
2965
2966 if (num > VIRTIO_QUEUE_MAX) {
2967 error_report("Invalid number of virtqueues: 0x%x", num);
2968 return -1;
2969 }
2970
2971 for (i = 0; i < num; i++) {
2972 vdev->vq[i].vring.num = qemu_get_be32(f);
2973 if (k->has_variable_vring_alignment) {
2974 vdev->vq[i].vring.align = qemu_get_be32(f);
2975 }
2976 vdev->vq[i].vring.desc = qemu_get_be64(f);
2977 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
2978 vdev->vq[i].signalled_used_valid = false;
2979 vdev->vq[i].notification = true;
2980
2981 if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
2982 error_report("VQ %d address 0x0 "
2983 "inconsistent with Host index 0x%x",
2984 i, vdev->vq[i].last_avail_idx);
2985 return -1;
2986 }
2987 if (k->load_queue) {
2988 ret = k->load_queue(qbus->parent, i, f);
2989 if (ret)
2990 return ret;
2991 }
2992 }
2993
2994 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2995
2996 if (vdc->load != NULL) {
2997 ret = vdc->load(vdev, f, version_id);
2998 if (ret) {
2999 return ret;
3000 }
3001 }
3002
3003 if (vdc->vmsd) {
3004 ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3005 if (ret) {
3006 return ret;
3007 }
3008 }
3009
3010 /* Subsections */
3011 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3012 if (ret) {
3013 return ret;
3014 }
3015
3016 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3017 vdev->device_endian = virtio_default_endian();
3018 }
3019
3020 if (virtio_64bit_features_needed(vdev)) {
3021 /*
3022 * Subsection load filled vdev->guest_features. Run them
3023 * through virtio_set_features to sanity-check them against
3024 * host_features.
3025 */
3026 uint64_t features64 = vdev->guest_features;
3027 if (virtio_set_features_nocheck(vdev, features64) < 0) {
3028 error_report("Features 0x%" PRIx64 " unsupported. "
3029 "Allowed features: 0x%" PRIx64,
3030 features64, vdev->host_features);
3031 return -1;
3032 }
3033 } else {
3034 if (virtio_set_features_nocheck(vdev, features) < 0) {
3035 error_report("Features 0x%x unsupported. "
3036 "Allowed features: 0x%" PRIx64,
3037 features, vdev->host_features);
3038 return -1;
3039 }
3040 }
3041
3042 if (!virtio_device_started(vdev, vdev->status) &&
3043 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3044 vdev->start_on_kick = true;
3045 }
3046
3047 RCU_READ_LOCK_GUARD();
3048 for (i = 0; i < num; i++) {
3049 if (vdev->vq[i].vring.desc) {
3050 uint16_t nheads;
3051
3052 /*
3053 * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3054 * only the region cache needs to be set up. Legacy devices need
3055 * to calculate used and avail ring addresses based on the desc
3056 * address.
3057 */
3058 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3059 virtio_init_region_cache(vdev, i);
3060 } else {
3061 virtio_queue_update_rings(vdev, i);
3062 }
3063
3064 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3065 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3066 vdev->vq[i].shadow_avail_wrap_counter =
3067 vdev->vq[i].last_avail_wrap_counter;
3068 continue;
3069 }
3070
3071 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3072 /* Check it isn't doing strange things with descriptor numbers. */
3073 if (nheads > vdev->vq[i].vring.num) {
3074 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3075 "inconsistent with Host index 0x%x: delta 0x%x",
3076 i, vdev->vq[i].vring.num,
3077 vring_avail_idx(&vdev->vq[i]),
3078 vdev->vq[i].last_avail_idx, nheads);
3079 vdev->vq[i].used_idx = 0;
3080 vdev->vq[i].shadow_avail_idx = 0;
3081 vdev->vq[i].inuse = 0;
3082 continue;
3083 }
3084 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3085 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3086
3087 /*
3088 * Some devices migrate VirtQueueElements that have been popped
3089 * from the avail ring but not yet returned to the used ring.
3090 * Since max ring size < UINT16_MAX it's safe to use modulo
3091 * UINT16_MAX + 1 subtraction.
3092 */
3093 vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3094 vdev->vq[i].used_idx);
3095 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3096 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3097 "used_idx 0x%x",
3098 i, vdev->vq[i].vring.num,
3099 vdev->vq[i].last_avail_idx,
3100 vdev->vq[i].used_idx);
3101 return -1;
3102 }
3103 }
3104 }
3105
3106 if (vdc->post_load) {
3107 ret = vdc->post_load(vdev);
3108 if (ret) {
3109 return ret;
3110 }
3111 }
3112
3113 return 0;
3114 }
3115
3116 void virtio_cleanup(VirtIODevice *vdev)
3117 {
3118 qemu_del_vm_change_state_handler(vdev->vmstate);
3119 }
3120
3121 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3122 {
3123 VirtIODevice *vdev = opaque;
3124 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3125 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3126 bool backend_run = running && virtio_device_started(vdev, vdev->status);
3127 vdev->vm_running = running;
3128
3129 if (backend_run) {
3130 virtio_set_status(vdev, vdev->status);
3131 }
3132
3133 if (k->vmstate_change) {
3134 k->vmstate_change(qbus->parent, backend_run);
3135 }
3136
3137 if (!backend_run) {
3138 virtio_set_status(vdev, vdev->status);
3139 }
3140 }
3141
3142 void virtio_instance_init_common(Object *proxy_obj, void *data,
3143 size_t vdev_size, const char *vdev_name)
3144 {
3145 DeviceState *vdev = data;
3146
3147 object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3148 vdev_size, vdev_name, &error_abort,
3149 NULL);
3150 qdev_alias_all_properties(vdev, proxy_obj);
3151 }
3152
3153 void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3154 {
3155 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3156 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3157 int i;
3158 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3159
3160 if (nvectors) {
3161 vdev->vector_queues =
3162 g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3163 }
3164
3165 vdev->start_on_kick = false;
3166 vdev->started = false;
3167 vdev->vhost_started = false;
3168 vdev->device_id = device_id;
3169 vdev->status = 0;
3170 qatomic_set(&vdev->isr, 0);
3171 vdev->queue_sel = 0;
3172 vdev->config_vector = VIRTIO_NO_VECTOR;
3173 vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3174 vdev->vm_running = runstate_is_running();
3175 vdev->broken = false;
3176 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3177 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3178 vdev->vq[i].vdev = vdev;
3179 vdev->vq[i].queue_index = i;
3180 vdev->vq[i].host_notifier_enabled = false;
3181 }
3182
3183 vdev->name = virtio_id_to_name(device_id);
3184 vdev->config_len = config_size;
3185 if (vdev->config_len) {
3186 vdev->config = g_malloc0(config_size);
3187 } else {
3188 vdev->config = NULL;
3189 }
3190 vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3191 virtio_vmstate_change, vdev);
3192 vdev->device_endian = virtio_default_endian();
3193 vdev->use_guest_notifier_mask = true;
3194 }
3195
3196 /*
3197 * Only devices that have already been around prior to defining the virtio
3198 * standard support legacy mode; this includes devices not specified in the
3199 * standard. All newer devices conform to the virtio standard only.
3200 */
3201 bool virtio_legacy_allowed(VirtIODevice *vdev)
3202 {
3203 switch (vdev->device_id) {
3204 case VIRTIO_ID_NET:
3205 case VIRTIO_ID_BLOCK:
3206 case VIRTIO_ID_CONSOLE:
3207 case VIRTIO_ID_RNG:
3208 case VIRTIO_ID_BALLOON:
3209 case VIRTIO_ID_RPMSG:
3210 case VIRTIO_ID_SCSI:
3211 case VIRTIO_ID_9P:
3212 case VIRTIO_ID_RPROC_SERIAL:
3213 case VIRTIO_ID_CAIF:
3214 return true;
3215 default:
3216 return false;
3217 }
3218 }
3219
3220 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3221 {
3222 return vdev->disable_legacy_check;
3223 }
3224
3225 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3226 {
3227 return vdev->vq[n].vring.desc;
3228 }
3229
3230 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3231 {
3232 return virtio_queue_get_desc_addr(vdev, n) != 0;
3233 }
3234
3235 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3236 {
3237 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3238 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3239
3240 if (k->queue_enabled) {
3241 return k->queue_enabled(qbus->parent, n);
3242 }
3243 return virtio_queue_enabled_legacy(vdev, n);
3244 }
3245
3246 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3247 {
3248 return vdev->vq[n].vring.avail;
3249 }
3250
3251 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3252 {
3253 return vdev->vq[n].vring.used;
3254 }
3255
3256 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3257 {
3258 return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3259 }
3260
3261 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3262 {
3263 int s;
3264
3265 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3266 return sizeof(struct VRingPackedDescEvent);
3267 }
3268
3269 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3270 return offsetof(VRingAvail, ring) +
3271 sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3272 }
3273
3274 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3275 {
3276 int s;
3277
3278 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3279 return sizeof(struct VRingPackedDescEvent);
3280 }
3281
3282 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3283 return offsetof(VRingUsed, ring) +
3284 sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3285 }
3286
3287 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3288 int n)
3289 {
3290 unsigned int avail, used;
3291
3292 avail = vdev->vq[n].last_avail_idx;
3293 avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3294
3295 used = vdev->vq[n].used_idx;
3296 used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3297
3298 return avail | used << 16;
3299 }
3300
3301 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3302 int n)
3303 {
3304 return vdev->vq[n].last_avail_idx;
3305 }
3306
3307 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3308 {
3309 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3310 return virtio_queue_packed_get_last_avail_idx(vdev, n);
3311 } else {
3312 return virtio_queue_split_get_last_avail_idx(vdev, n);
3313 }
3314 }
3315
3316 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3317 int n, unsigned int idx)
3318 {
3319 struct VirtQueue *vq = &vdev->vq[n];
3320
3321 vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3322 vq->last_avail_wrap_counter =
3323 vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3324 idx >>= 16;
3325 vq->used_idx = idx & 0x7ffff;
3326 vq->used_wrap_counter = !!(idx & 0x8000);
3327 }
3328
3329 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3330 int n, unsigned int idx)
3331 {
3332 vdev->vq[n].last_avail_idx = idx;
3333 vdev->vq[n].shadow_avail_idx = idx;
3334 }
3335
3336 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3337 unsigned int idx)
3338 {
3339 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3340 virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3341 } else {
3342 virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3343 }
3344 }
3345
3346 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3347 int n)
3348 {
3349 /* We don't have a reference like avail idx in shared memory */
3350 return;
3351 }
3352
3353 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3354 int n)
3355 {
3356 RCU_READ_LOCK_GUARD();
3357 if (vdev->vq[n].vring.desc) {
3358 vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3359 vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3360 }
3361 }
3362
3363 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3364 {
3365 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3366 virtio_queue_packed_restore_last_avail_idx(vdev, n);
3367 } else {
3368 virtio_queue_split_restore_last_avail_idx(vdev, n);
3369 }
3370 }
3371
3372 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3373 {
3374 /* used idx was updated through set_last_avail_idx() */
3375 return;
3376 }
3377
3378 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3379 {
3380 RCU_READ_LOCK_GUARD();
3381 if (vdev->vq[n].vring.desc) {
3382 vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3383 }
3384 }
3385
3386 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3387 {
3388 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3389 return virtio_queue_packed_update_used_idx(vdev, n);
3390 } else {
3391 return virtio_split_packed_update_used_idx(vdev, n);
3392 }
3393 }
3394
3395 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3396 {
3397 vdev->vq[n].signalled_used_valid = false;
3398 }
3399
3400 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3401 {
3402 return vdev->vq + n;
3403 }
3404
3405 uint16_t virtio_get_queue_index(VirtQueue *vq)
3406 {
3407 return vq->queue_index;
3408 }
3409
3410 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3411 {
3412 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3413 if (event_notifier_test_and_clear(n)) {
3414 virtio_irq(vq);
3415 }
3416 }
3417 static void virtio_config_guest_notifier_read(EventNotifier *n)
3418 {
3419 VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
3420
3421 if (event_notifier_test_and_clear(n)) {
3422 virtio_notify_config(vdev);
3423 }
3424 }
3425 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3426 bool with_irqfd)
3427 {
3428 if (assign && !with_irqfd) {
3429 event_notifier_set_handler(&vq->guest_notifier,
3430 virtio_queue_guest_notifier_read);
3431 } else {
3432 event_notifier_set_handler(&vq->guest_notifier, NULL);
3433 }
3434 if (!assign) {
3435 /* Test and clear notifier before closing it,
3436 * in case poll callback didn't have time to run. */
3437 virtio_queue_guest_notifier_read(&vq->guest_notifier);
3438 }
3439 }
3440
3441 void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
3442 bool assign, bool with_irqfd)
3443 {
3444 EventNotifier *n;
3445 n = &vdev->config_notifier;
3446 if (assign && !with_irqfd) {
3447 event_notifier_set_handler(n, virtio_config_guest_notifier_read);
3448 } else {
3449 event_notifier_set_handler(n, NULL);
3450 }
3451 if (!assign) {
3452 /* Test and clear notifier before closing it,*/
3453 /* in case poll callback didn't have time to run. */
3454 virtio_config_guest_notifier_read(n);
3455 }
3456 }
3457
3458 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3459 {
3460 return &vq->guest_notifier;
3461 }
3462
3463 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3464 {
3465 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3466
3467 virtio_queue_set_notification(vq, 0);
3468 }
3469
3470 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3471 {
3472 EventNotifier *n = opaque;
3473 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3474
3475 return vq->vring.desc && !virtio_queue_empty(vq);
3476 }
3477
3478 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3479 {
3480 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3481
3482 virtio_queue_notify_vq(vq);
3483 }
3484
3485 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3486 {
3487 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3488
3489 /* Caller polls once more after this to catch requests that race with us */
3490 virtio_queue_set_notification(vq, 1);
3491 }
3492
3493 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
3494 {
3495 aio_set_event_notifier(ctx, &vq->host_notifier, true,
3496 virtio_queue_host_notifier_read,
3497 virtio_queue_host_notifier_aio_poll,
3498 virtio_queue_host_notifier_aio_poll_ready);
3499 aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3500 virtio_queue_host_notifier_aio_poll_begin,
3501 virtio_queue_host_notifier_aio_poll_end);
3502 }
3503
3504 /*
3505 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3506 * this for rx virtqueues and similar cases where the virtqueue handler
3507 * function does not pop all elements. When the virtqueue is left non-empty
3508 * polling consumes CPU cycles and should not be used.
3509 */
3510 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
3511 {
3512 aio_set_event_notifier(ctx, &vq->host_notifier, true,
3513 virtio_queue_host_notifier_read,
3514 NULL, NULL);
3515 }
3516
3517 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
3518 {
3519 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
3520 /* Test and clear notifier before after disabling event,
3521 * in case poll callback didn't have time to run. */
3522 virtio_queue_host_notifier_read(&vq->host_notifier);
3523 }
3524
3525 void virtio_queue_host_notifier_read(EventNotifier *n)
3526 {
3527 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3528 if (event_notifier_test_and_clear(n)) {
3529 virtio_queue_notify_vq(vq);
3530 }
3531 }
3532
3533 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3534 {
3535 return &vq->host_notifier;
3536 }
3537
3538 EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
3539 {
3540 return &vdev->config_notifier;
3541 }
3542
3543 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3544 {
3545 vq->host_notifier_enabled = enabled;
3546 }
3547
3548 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3549 MemoryRegion *mr, bool assign)
3550 {
3551 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3552 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3553
3554 if (k->set_host_notifier_mr) {
3555 return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3556 }
3557
3558 return -1;
3559 }
3560
3561 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3562 {
3563 g_free(vdev->bus_name);
3564 vdev->bus_name = g_strdup(bus_name);
3565 }
3566
3567 void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3568 {
3569 va_list ap;
3570
3571 va_start(ap, fmt);
3572 error_vreport(fmt, ap);
3573 va_end(ap);
3574
3575 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3576 vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3577 virtio_notify_config(vdev);
3578 }
3579
3580 vdev->broken = true;
3581 }
3582
3583 static void virtio_memory_listener_commit(MemoryListener *listener)
3584 {
3585 VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3586 int i;
3587
3588 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3589 if (vdev->vq[i].vring.num == 0) {
3590 break;
3591 }
3592 virtio_init_region_cache(vdev, i);
3593 }
3594 }
3595
3596 static void virtio_device_realize(DeviceState *dev, Error **errp)
3597 {
3598 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3599 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3600 Error *err = NULL;
3601
3602 /* Devices should either use vmsd or the load/save methods */
3603 assert(!vdc->vmsd || !vdc->load);
3604
3605 if (vdc->realize != NULL) {
3606 vdc->realize(dev, &err);
3607 if (err != NULL) {
3608 error_propagate(errp, err);
3609 return;
3610 }
3611 }
3612
3613 virtio_bus_device_plugged(vdev, &err);
3614 if (err != NULL) {
3615 error_propagate(errp, err);
3616 vdc->unrealize(dev);
3617 return;
3618 }
3619
3620 vdev->listener.commit = virtio_memory_listener_commit;
3621 vdev->listener.name = "virtio";
3622 memory_listener_register(&vdev->listener, vdev->dma_as);
3623 QTAILQ_INSERT_TAIL(&virtio_list, vdev, next);
3624 }
3625
3626 static void virtio_device_unrealize(DeviceState *dev)
3627 {
3628 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3629 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3630
3631 memory_listener_unregister(&vdev->listener);
3632 virtio_bus_device_unplugged(vdev);
3633
3634 if (vdc->unrealize != NULL) {
3635 vdc->unrealize(dev);
3636 }
3637
3638 QTAILQ_REMOVE(&virtio_list, vdev, next);
3639 g_free(vdev->bus_name);
3640 vdev->bus_name = NULL;
3641 }
3642
3643 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3644 {
3645 int i;
3646 if (!vdev->vq) {
3647 return;
3648 }
3649
3650 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3651 if (vdev->vq[i].vring.num == 0) {
3652 break;
3653 }
3654 virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3655 }
3656 g_free(vdev->vq);
3657 }
3658
3659 static void virtio_device_instance_finalize(Object *obj)
3660 {
3661 VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3662
3663 virtio_device_free_virtqueues(vdev);
3664
3665 g_free(vdev->config);
3666 g_free(vdev->vector_queues);
3667 }
3668
3669 static Property virtio_properties[] = {
3670 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3671 DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3672 DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3673 DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3674 disable_legacy_check, false),
3675 DEFINE_PROP_END_OF_LIST(),
3676 };
3677
3678 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3679 {
3680 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3681 int i, n, r, err;
3682
3683 /*
3684 * Batch all the host notifiers in a single transaction to avoid
3685 * quadratic time complexity in address_space_update_ioeventfds().
3686 */
3687 memory_region_transaction_begin();
3688 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3689 VirtQueue *vq = &vdev->vq[n];
3690 if (!virtio_queue_get_num(vdev, n)) {
3691 continue;
3692 }
3693 r = virtio_bus_set_host_notifier(qbus, n, true);
3694 if (r < 0) {
3695 err = r;
3696 goto assign_error;
3697 }
3698 event_notifier_set_handler(&vq->host_notifier,
3699 virtio_queue_host_notifier_read);
3700 }
3701
3702 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3703 /* Kick right away to begin processing requests already in vring */
3704 VirtQueue *vq = &vdev->vq[n];
3705 if (!vq->vring.num) {
3706 continue;
3707 }
3708 event_notifier_set(&vq->host_notifier);
3709 }
3710 memory_region_transaction_commit();
3711 return 0;
3712
3713 assign_error:
3714 i = n; /* save n for a second iteration after transaction is committed. */
3715 while (--n >= 0) {
3716 VirtQueue *vq = &vdev->vq[n];
3717 if (!virtio_queue_get_num(vdev, n)) {
3718 continue;
3719 }
3720
3721 event_notifier_set_handler(&vq->host_notifier, NULL);
3722 r = virtio_bus_set_host_notifier(qbus, n, false);
3723 assert(r >= 0);
3724 }
3725 /*
3726 * The transaction expects the ioeventfds to be open when it
3727 * commits. Do it now, before the cleanup loop.
3728 */
3729 memory_region_transaction_commit();
3730
3731 while (--i >= 0) {
3732 if (!virtio_queue_get_num(vdev, i)) {
3733 continue;
3734 }
3735 virtio_bus_cleanup_host_notifier(qbus, i);
3736 }
3737 return err;
3738 }
3739
3740 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3741 {
3742 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3743 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3744
3745 return virtio_bus_start_ioeventfd(vbus);
3746 }
3747
3748 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3749 {
3750 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3751 int n, r;
3752
3753 /*
3754 * Batch all the host notifiers in a single transaction to avoid
3755 * quadratic time complexity in address_space_update_ioeventfds().
3756 */
3757 memory_region_transaction_begin();
3758 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3759 VirtQueue *vq = &vdev->vq[n];
3760
3761 if (!virtio_queue_get_num(vdev, n)) {
3762 continue;
3763 }
3764 event_notifier_set_handler(&vq->host_notifier, NULL);
3765 r = virtio_bus_set_host_notifier(qbus, n, false);
3766 assert(r >= 0);
3767 }
3768 /*
3769 * The transaction expects the ioeventfds to be open when it
3770 * commits. Do it now, before the cleanup loop.
3771 */
3772 memory_region_transaction_commit();
3773
3774 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3775 if (!virtio_queue_get_num(vdev, n)) {
3776 continue;
3777 }
3778 virtio_bus_cleanup_host_notifier(qbus, n);
3779 }
3780 }
3781
3782 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3783 {
3784 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3785 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3786
3787 return virtio_bus_grab_ioeventfd(vbus);
3788 }
3789
3790 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3791 {
3792 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3793 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3794
3795 virtio_bus_release_ioeventfd(vbus);
3796 }
3797
3798 static void virtio_device_class_init(ObjectClass *klass, void *data)
3799 {
3800 /* Set the default value here. */
3801 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3802 DeviceClass *dc = DEVICE_CLASS(klass);
3803
3804 dc->realize = virtio_device_realize;
3805 dc->unrealize = virtio_device_unrealize;
3806 dc->bus_type = TYPE_VIRTIO_BUS;
3807 device_class_set_props(dc, virtio_properties);
3808 vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3809 vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3810
3811 vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3812
3813 QTAILQ_INIT(&virtio_list);
3814 }
3815
3816 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3817 {
3818 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3819 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3820
3821 return virtio_bus_ioeventfd_enabled(vbus);
3822 }
3823
3824 VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
3825 uint16_t queue,
3826 Error **errp)
3827 {
3828 VirtIODevice *vdev;
3829 VirtQueueStatus *status;
3830
3831 vdev = qmp_find_virtio_device(path);
3832 if (vdev == NULL) {
3833 error_setg(errp, "Path %s is not a VirtIODevice", path);
3834 return NULL;
3835 }
3836
3837 if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
3838 error_setg(errp, "Invalid virtqueue number %d", queue);
3839 return NULL;
3840 }
3841
3842 status = g_new0(VirtQueueStatus, 1);
3843 status->name = g_strdup(vdev->name);
3844 status->queue_index = vdev->vq[queue].queue_index;
3845 status->inuse = vdev->vq[queue].inuse;
3846 status->vring_num = vdev->vq[queue].vring.num;
3847 status->vring_num_default = vdev->vq[queue].vring.num_default;
3848 status->vring_align = vdev->vq[queue].vring.align;
3849 status->vring_desc = vdev->vq[queue].vring.desc;
3850 status->vring_avail = vdev->vq[queue].vring.avail;
3851 status->vring_used = vdev->vq[queue].vring.used;
3852 status->used_idx = vdev->vq[queue].used_idx;
3853 status->signalled_used = vdev->vq[queue].signalled_used;
3854 status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
3855
3856 if (vdev->vhost_started) {
3857 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3858 struct vhost_dev *hdev = vdc->get_vhost(vdev);
3859
3860 /* check if vq index exists for vhost as well */
3861 if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
3862 status->has_last_avail_idx = true;
3863
3864 int vhost_vq_index =
3865 hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
3866 struct vhost_vring_state state = {
3867 .index = vhost_vq_index,
3868 };
3869
3870 status->last_avail_idx =
3871 hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
3872 }
3873 } else {
3874 status->has_shadow_avail_idx = true;
3875 status->has_last_avail_idx = true;
3876 status->last_avail_idx = vdev->vq[queue].last_avail_idx;
3877 status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
3878 }
3879
3880 return status;
3881 }
3882
3883 static strList *qmp_decode_vring_desc_flags(uint16_t flags)
3884 {
3885 strList *list = NULL;
3886 strList *node;
3887 int i;
3888
3889 struct {
3890 uint16_t flag;
3891 const char *value;
3892 } map[] = {
3893 { VRING_DESC_F_NEXT, "next" },
3894 { VRING_DESC_F_WRITE, "write" },
3895 { VRING_DESC_F_INDIRECT, "indirect" },
3896 { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
3897 { 1 << VRING_PACKED_DESC_F_USED, "used" },
3898 { 0, "" }
3899 };
3900
3901 for (i = 0; map[i].flag; i++) {
3902 if ((map[i].flag & flags) == 0) {
3903 continue;
3904 }
3905 node = g_malloc0(sizeof(strList));
3906 node->value = g_strdup(map[i].value);
3907 node->next = list;
3908 list = node;
3909 }
3910
3911 return list;
3912 }
3913
3914 VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
3915 uint16_t queue,
3916 bool has_index,
3917 uint16_t index,
3918 Error **errp)
3919 {
3920 VirtIODevice *vdev;
3921 VirtQueue *vq;
3922 VirtioQueueElement *element = NULL;
3923
3924 vdev = qmp_find_virtio_device(path);
3925 if (vdev == NULL) {
3926 error_setg(errp, "Path %s is not a VirtIO device", path);
3927 return NULL;
3928 }
3929
3930 if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
3931 error_setg(errp, "Invalid virtqueue number %d", queue);
3932 return NULL;
3933 }
3934 vq = &vdev->vq[queue];
3935
3936 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3937 error_setg(errp, "Packed ring not supported");
3938 return NULL;
3939 } else {
3940 unsigned int head, i, max;
3941 VRingMemoryRegionCaches *caches;
3942 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
3943 MemoryRegionCache *desc_cache;
3944 VRingDesc desc;
3945 VirtioRingDescList *list = NULL;
3946 VirtioRingDescList *node;
3947 int rc; int ndescs;
3948
3949 RCU_READ_LOCK_GUARD();
3950
3951 max = vq->vring.num;
3952
3953 if (!has_index) {
3954 head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
3955 } else {
3956 head = vring_avail_ring(vq, index % vq->vring.num);
3957 }
3958 i = head;
3959
3960 caches = vring_get_region_caches(vq);
3961 if (!caches) {
3962 error_setg(errp, "Region caches not initialized");
3963 return NULL;
3964 }
3965 if (caches->desc.len < max * sizeof(VRingDesc)) {
3966 error_setg(errp, "Cannot map descriptor ring");
3967 return NULL;
3968 }
3969
3970 desc_cache = &caches->desc;
3971 vring_split_desc_read(vdev, &desc, desc_cache, i);
3972 if (desc.flags & VRING_DESC_F_INDIRECT) {
3973 int64_t len;
3974 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
3975 desc.addr, desc.len, false);
3976 desc_cache = &indirect_desc_cache;
3977 if (len < desc.len) {
3978 error_setg(errp, "Cannot map indirect buffer");
3979 goto done;
3980 }
3981
3982 max = desc.len / sizeof(VRingDesc);
3983 i = 0;
3984 vring_split_desc_read(vdev, &desc, desc_cache, i);
3985 }
3986
3987 element = g_new0(VirtioQueueElement, 1);
3988 element->avail = g_new0(VirtioRingAvail, 1);
3989 element->used = g_new0(VirtioRingUsed, 1);
3990 element->name = g_strdup(vdev->name);
3991 element->index = head;
3992 element->avail->flags = vring_avail_flags(vq);
3993 element->avail->idx = vring_avail_idx(vq);
3994 element->avail->ring = head;
3995 element->used->flags = vring_used_flags(vq);
3996 element->used->idx = vring_used_idx(vq);
3997 ndescs = 0;
3998
3999 do {
4000 /* A buggy driver may produce an infinite loop */
4001 if (ndescs >= max) {
4002 break;
4003 }
4004 node = g_new0(VirtioRingDescList, 1);
4005 node->value = g_new0(VirtioRingDesc, 1);
4006 node->value->addr = desc.addr;
4007 node->value->len = desc.len;
4008 node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4009 node->next = list;
4010 list = node;
4011
4012 ndescs++;
4013 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache,
4014 max, &i);
4015 } while (rc == VIRTQUEUE_READ_DESC_MORE);
4016 element->descs = list;
4017 done:
4018 address_space_cache_destroy(&indirect_desc_cache);
4019 }
4020
4021 return element;
4022 }
4023
4024 static const TypeInfo virtio_device_info = {
4025 .name = TYPE_VIRTIO_DEVICE,
4026 .parent = TYPE_DEVICE,
4027 .instance_size = sizeof(VirtIODevice),
4028 .class_init = virtio_device_class_init,
4029 .instance_finalize = virtio_device_instance_finalize,
4030 .abstract = true,
4031 .class_size = sizeof(VirtioDeviceClass),
4032 };
4033
4034 static void virtio_register_types(void)
4035 {
4036 type_register_static(&virtio_device_info);
4037 }
4038
4039 type_init(virtio_register_types)