]> git.ipfire.org Git - thirdparty/qemu.git/blob - hw/virtio/virtio.c
08011be8dcde9390c34a2ecbe9e6be0351430552
[thirdparty/qemu.git] / hw / virtio / virtio.c
1 /*
2 * Virtio Support
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qapi/qapi-commands-virtio.h"
17 #include "trace.h"
18 #include "qemu/error-report.h"
19 #include "qemu/log.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "qom/object_interfaces.h"
23 #include "hw/core/cpu.h"
24 #include "hw/virtio/virtio.h"
25 #include "hw/virtio/vhost.h"
26 #include "migration/qemu-file-types.h"
27 #include "qemu/atomic.h"
28 #include "hw/virtio/virtio-bus.h"
29 #include "hw/qdev-properties.h"
30 #include "hw/virtio/virtio-access.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/runstate.h"
33 #include "virtio-qmp.h"
34
35 #include "standard-headers/linux/virtio_ids.h"
36 #include "standard-headers/linux/vhost_types.h"
37 #include "standard-headers/linux/virtio_blk.h"
38 #include "standard-headers/linux/virtio_console.h"
39 #include "standard-headers/linux/virtio_gpu.h"
40 #include "standard-headers/linux/virtio_net.h"
41 #include "standard-headers/linux/virtio_scsi.h"
42 #include "standard-headers/linux/virtio_i2c.h"
43 #include "standard-headers/linux/virtio_balloon.h"
44 #include "standard-headers/linux/virtio_iommu.h"
45 #include "standard-headers/linux/virtio_mem.h"
46 #include "standard-headers/linux/virtio_vsock.h"
47
48 QmpVirtIODeviceList virtio_list;
49
50 /*
51 * Maximum size of virtio device config space
52 */
53 #define VHOST_USER_MAX_CONFIG_SIZE 256
54
55 /*
56 * The alignment to use between consumer and producer parts of vring.
57 * x86 pagesize again. This is the default, used by transports like PCI
58 * which don't provide a means for the guest to tell the host the alignment.
59 */
60 #define VIRTIO_PCI_VRING_ALIGN 4096
61
62 typedef struct VRingDesc
63 {
64 uint64_t addr;
65 uint32_t len;
66 uint16_t flags;
67 uint16_t next;
68 } VRingDesc;
69
70 typedef struct VRingPackedDesc {
71 uint64_t addr;
72 uint32_t len;
73 uint16_t id;
74 uint16_t flags;
75 } VRingPackedDesc;
76
77 typedef struct VRingAvail
78 {
79 uint16_t flags;
80 uint16_t idx;
81 uint16_t ring[];
82 } VRingAvail;
83
84 typedef struct VRingUsedElem
85 {
86 uint32_t id;
87 uint32_t len;
88 } VRingUsedElem;
89
90 typedef struct VRingUsed
91 {
92 uint16_t flags;
93 uint16_t idx;
94 VRingUsedElem ring[];
95 } VRingUsed;
96
97 typedef struct VRingMemoryRegionCaches {
98 struct rcu_head rcu;
99 MemoryRegionCache desc;
100 MemoryRegionCache avail;
101 MemoryRegionCache used;
102 } VRingMemoryRegionCaches;
103
104 typedef struct VRing
105 {
106 unsigned int num;
107 unsigned int num_default;
108 unsigned int align;
109 hwaddr desc;
110 hwaddr avail;
111 hwaddr used;
112 VRingMemoryRegionCaches *caches;
113 } VRing;
114
115 typedef struct VRingPackedDescEvent {
116 uint16_t off_wrap;
117 uint16_t flags;
118 } VRingPackedDescEvent ;
119
120 struct VirtQueue
121 {
122 VRing vring;
123 VirtQueueElement *used_elems;
124
125 /* Next head to pop */
126 uint16_t last_avail_idx;
127 bool last_avail_wrap_counter;
128
129 /* Last avail_idx read from VQ. */
130 uint16_t shadow_avail_idx;
131 bool shadow_avail_wrap_counter;
132
133 uint16_t used_idx;
134 bool used_wrap_counter;
135
136 /* Last used index value we have signalled on */
137 uint16_t signalled_used;
138
139 /* Last used index value we have signalled on */
140 bool signalled_used_valid;
141
142 /* Notification enabled? */
143 bool notification;
144
145 uint16_t queue_index;
146
147 unsigned int inuse;
148
149 uint16_t vector;
150 VirtIOHandleOutput handle_output;
151 VirtIODevice *vdev;
152 EventNotifier guest_notifier;
153 EventNotifier host_notifier;
154 bool host_notifier_enabled;
155 QLIST_ENTRY(VirtQueue) node;
156 };
157
158 const char *virtio_device_names[] = {
159 [VIRTIO_ID_NET] = "virtio-net",
160 [VIRTIO_ID_BLOCK] = "virtio-blk",
161 [VIRTIO_ID_CONSOLE] = "virtio-serial",
162 [VIRTIO_ID_RNG] = "virtio-rng",
163 [VIRTIO_ID_BALLOON] = "virtio-balloon",
164 [VIRTIO_ID_IOMEM] = "virtio-iomem",
165 [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
166 [VIRTIO_ID_SCSI] = "virtio-scsi",
167 [VIRTIO_ID_9P] = "virtio-9p",
168 [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
169 [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
170 [VIRTIO_ID_CAIF] = "virtio-caif",
171 [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
172 [VIRTIO_ID_GPU] = "virtio-gpu",
173 [VIRTIO_ID_CLOCK] = "virtio-clk",
174 [VIRTIO_ID_INPUT] = "virtio-input",
175 [VIRTIO_ID_VSOCK] = "vhost-vsock",
176 [VIRTIO_ID_CRYPTO] = "virtio-crypto",
177 [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
178 [VIRTIO_ID_PSTORE] = "virtio-pstore",
179 [VIRTIO_ID_IOMMU] = "virtio-iommu",
180 [VIRTIO_ID_MEM] = "virtio-mem",
181 [VIRTIO_ID_SOUND] = "virtio-sound",
182 [VIRTIO_ID_FS] = "virtio-user-fs",
183 [VIRTIO_ID_PMEM] = "virtio-pmem",
184 [VIRTIO_ID_RPMB] = "virtio-rpmb",
185 [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
186 [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
187 [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
188 [VIRTIO_ID_SCMI] = "virtio-scmi",
189 [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
190 [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
191 [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
192 [VIRTIO_ID_CAN] = "virtio-can",
193 [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
194 [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
195 [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
196 [VIRTIO_ID_BT] = "virtio-bluetooth",
197 [VIRTIO_ID_GPIO] = "virtio-gpio"
198 };
199
200 static const char *virtio_id_to_name(uint16_t device_id)
201 {
202 assert(device_id < G_N_ELEMENTS(virtio_device_names));
203 const char *name = virtio_device_names[device_id];
204 assert(name != NULL);
205 return name;
206 }
207
208 /* Called within call_rcu(). */
209 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
210 {
211 assert(caches != NULL);
212 address_space_cache_destroy(&caches->desc);
213 address_space_cache_destroy(&caches->avail);
214 address_space_cache_destroy(&caches->used);
215 g_free(caches);
216 }
217
218 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
219 {
220 VRingMemoryRegionCaches *caches;
221
222 caches = qatomic_read(&vq->vring.caches);
223 qatomic_rcu_set(&vq->vring.caches, NULL);
224 if (caches) {
225 call_rcu(caches, virtio_free_region_cache, rcu);
226 }
227 }
228
229 void virtio_init_region_cache(VirtIODevice *vdev, int n)
230 {
231 VirtQueue *vq = &vdev->vq[n];
232 VRingMemoryRegionCaches *old = vq->vring.caches;
233 VRingMemoryRegionCaches *new = NULL;
234 hwaddr addr, size;
235 int64_t len;
236 bool packed;
237
238
239 addr = vq->vring.desc;
240 if (!addr) {
241 goto out_no_cache;
242 }
243 new = g_new0(VRingMemoryRegionCaches, 1);
244 size = virtio_queue_get_desc_size(vdev, n);
245 packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
246 true : false;
247 len = address_space_cache_init(&new->desc, vdev->dma_as,
248 addr, size, packed);
249 if (len < size) {
250 virtio_error(vdev, "Cannot map desc");
251 goto err_desc;
252 }
253
254 size = virtio_queue_get_used_size(vdev, n);
255 len = address_space_cache_init(&new->used, vdev->dma_as,
256 vq->vring.used, size, true);
257 if (len < size) {
258 virtio_error(vdev, "Cannot map used");
259 goto err_used;
260 }
261
262 size = virtio_queue_get_avail_size(vdev, n);
263 len = address_space_cache_init(&new->avail, vdev->dma_as,
264 vq->vring.avail, size, false);
265 if (len < size) {
266 virtio_error(vdev, "Cannot map avail");
267 goto err_avail;
268 }
269
270 qatomic_rcu_set(&vq->vring.caches, new);
271 if (old) {
272 call_rcu(old, virtio_free_region_cache, rcu);
273 }
274 return;
275
276 err_avail:
277 address_space_cache_destroy(&new->avail);
278 err_used:
279 address_space_cache_destroy(&new->used);
280 err_desc:
281 address_space_cache_destroy(&new->desc);
282 out_no_cache:
283 g_free(new);
284 virtio_virtqueue_reset_region_cache(vq);
285 }
286
287 /* virt queue functions */
288 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
289 {
290 VRing *vring = &vdev->vq[n].vring;
291
292 if (!vring->num || !vring->desc || !vring->align) {
293 /* not yet setup -> nothing to do */
294 return;
295 }
296 vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
297 vring->used = vring_align(vring->avail +
298 offsetof(VRingAvail, ring[vring->num]),
299 vring->align);
300 virtio_init_region_cache(vdev, n);
301 }
302
303 /* Called within rcu_read_lock(). */
304 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
305 MemoryRegionCache *cache, int i)
306 {
307 address_space_read_cached(cache, i * sizeof(VRingDesc),
308 desc, sizeof(VRingDesc));
309 virtio_tswap64s(vdev, &desc->addr);
310 virtio_tswap32s(vdev, &desc->len);
311 virtio_tswap16s(vdev, &desc->flags);
312 virtio_tswap16s(vdev, &desc->next);
313 }
314
315 static void vring_packed_event_read(VirtIODevice *vdev,
316 MemoryRegionCache *cache,
317 VRingPackedDescEvent *e)
318 {
319 hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
320 hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
321
322 e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
323 /* Make sure flags is seen before off_wrap */
324 smp_rmb();
325 e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
326 virtio_tswap16s(vdev, &e->flags);
327 }
328
329 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
330 MemoryRegionCache *cache,
331 uint16_t off_wrap)
332 {
333 hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
334
335 virtio_stw_phys_cached(vdev, cache, off, off_wrap);
336 address_space_cache_invalidate(cache, off, sizeof(off_wrap));
337 }
338
339 static void vring_packed_flags_write(VirtIODevice *vdev,
340 MemoryRegionCache *cache, uint16_t flags)
341 {
342 hwaddr off = offsetof(VRingPackedDescEvent, flags);
343
344 virtio_stw_phys_cached(vdev, cache, off, flags);
345 address_space_cache_invalidate(cache, off, sizeof(flags));
346 }
347
348 /* Called within rcu_read_lock(). */
349 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
350 {
351 return qatomic_rcu_read(&vq->vring.caches);
352 }
353
354 /* Called within rcu_read_lock(). */
355 static inline uint16_t vring_avail_flags(VirtQueue *vq)
356 {
357 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
358 hwaddr pa = offsetof(VRingAvail, flags);
359
360 if (!caches) {
361 return 0;
362 }
363
364 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
365 }
366
367 /* Called within rcu_read_lock(). */
368 static inline uint16_t vring_avail_idx(VirtQueue *vq)
369 {
370 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
371 hwaddr pa = offsetof(VRingAvail, idx);
372
373 if (!caches) {
374 return 0;
375 }
376
377 vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
378 return vq->shadow_avail_idx;
379 }
380
381 /* Called within rcu_read_lock(). */
382 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
383 {
384 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
385 hwaddr pa = offsetof(VRingAvail, ring[i]);
386
387 if (!caches) {
388 return 0;
389 }
390
391 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
392 }
393
394 /* Called within rcu_read_lock(). */
395 static inline uint16_t vring_get_used_event(VirtQueue *vq)
396 {
397 return vring_avail_ring(vq, vq->vring.num);
398 }
399
400 /* Called within rcu_read_lock(). */
401 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
402 int i)
403 {
404 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
405 hwaddr pa = offsetof(VRingUsed, ring[i]);
406
407 if (!caches) {
408 return;
409 }
410
411 virtio_tswap32s(vq->vdev, &uelem->id);
412 virtio_tswap32s(vq->vdev, &uelem->len);
413 address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
414 address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
415 }
416
417 /* Called within rcu_read_lock(). */
418 static inline uint16_t vring_used_flags(VirtQueue *vq)
419 {
420 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
421 hwaddr pa = offsetof(VRingUsed, flags);
422
423 if (!caches) {
424 return 0;
425 }
426
427 return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
428 }
429
430 /* Called within rcu_read_lock(). */
431 static uint16_t vring_used_idx(VirtQueue *vq)
432 {
433 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
434 hwaddr pa = offsetof(VRingUsed, idx);
435
436 if (!caches) {
437 return 0;
438 }
439
440 return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
441 }
442
443 /* Called within rcu_read_lock(). */
444 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
445 {
446 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
447 hwaddr pa = offsetof(VRingUsed, idx);
448
449 if (caches) {
450 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
451 address_space_cache_invalidate(&caches->used, pa, sizeof(val));
452 }
453
454 vq->used_idx = val;
455 }
456
457 /* Called within rcu_read_lock(). */
458 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
459 {
460 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
461 VirtIODevice *vdev = vq->vdev;
462 hwaddr pa = offsetof(VRingUsed, flags);
463 uint16_t flags;
464
465 if (!caches) {
466 return;
467 }
468
469 flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
470 virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
471 address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
472 }
473
474 /* Called within rcu_read_lock(). */
475 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
476 {
477 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
478 VirtIODevice *vdev = vq->vdev;
479 hwaddr pa = offsetof(VRingUsed, flags);
480 uint16_t flags;
481
482 if (!caches) {
483 return;
484 }
485
486 flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
487 virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
488 address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
489 }
490
491 /* Called within rcu_read_lock(). */
492 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
493 {
494 VRingMemoryRegionCaches *caches;
495 hwaddr pa;
496 if (!vq->notification) {
497 return;
498 }
499
500 caches = vring_get_region_caches(vq);
501 if (!caches) {
502 return;
503 }
504
505 pa = offsetof(VRingUsed, ring[vq->vring.num]);
506 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
507 address_space_cache_invalidate(&caches->used, pa, sizeof(val));
508 }
509
510 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
511 {
512 RCU_READ_LOCK_GUARD();
513
514 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
515 vring_set_avail_event(vq, vring_avail_idx(vq));
516 } else if (enable) {
517 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
518 } else {
519 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
520 }
521 if (enable) {
522 /* Expose avail event/used flags before caller checks the avail idx. */
523 smp_mb();
524 }
525 }
526
527 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
528 {
529 uint16_t off_wrap;
530 VRingPackedDescEvent e;
531 VRingMemoryRegionCaches *caches;
532
533 RCU_READ_LOCK_GUARD();
534 caches = vring_get_region_caches(vq);
535 if (!caches) {
536 return;
537 }
538
539 vring_packed_event_read(vq->vdev, &caches->used, &e);
540
541 if (!enable) {
542 e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
543 } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
544 off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
545 vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
546 /* Make sure off_wrap is wrote before flags */
547 smp_wmb();
548 e.flags = VRING_PACKED_EVENT_FLAG_DESC;
549 } else {
550 e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
551 }
552
553 vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
554 if (enable) {
555 /* Expose avail event/used flags before caller checks the avail idx. */
556 smp_mb();
557 }
558 }
559
560 bool virtio_queue_get_notification(VirtQueue *vq)
561 {
562 return vq->notification;
563 }
564
565 void virtio_queue_set_notification(VirtQueue *vq, int enable)
566 {
567 vq->notification = enable;
568
569 if (!vq->vring.desc) {
570 return;
571 }
572
573 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
574 virtio_queue_packed_set_notification(vq, enable);
575 } else {
576 virtio_queue_split_set_notification(vq, enable);
577 }
578 }
579
580 int virtio_queue_ready(VirtQueue *vq)
581 {
582 return vq->vring.avail != 0;
583 }
584
585 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
586 uint16_t *flags,
587 MemoryRegionCache *cache,
588 int i)
589 {
590 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
591
592 *flags = virtio_lduw_phys_cached(vdev, cache, off);
593 }
594
595 static void vring_packed_desc_read(VirtIODevice *vdev,
596 VRingPackedDesc *desc,
597 MemoryRegionCache *cache,
598 int i, bool strict_order)
599 {
600 hwaddr off = i * sizeof(VRingPackedDesc);
601
602 vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
603
604 if (strict_order) {
605 /* Make sure flags is read before the rest fields. */
606 smp_rmb();
607 }
608
609 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
610 &desc->addr, sizeof(desc->addr));
611 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
612 &desc->id, sizeof(desc->id));
613 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
614 &desc->len, sizeof(desc->len));
615 virtio_tswap64s(vdev, &desc->addr);
616 virtio_tswap16s(vdev, &desc->id);
617 virtio_tswap32s(vdev, &desc->len);
618 }
619
620 static void vring_packed_desc_write_data(VirtIODevice *vdev,
621 VRingPackedDesc *desc,
622 MemoryRegionCache *cache,
623 int i)
624 {
625 hwaddr off_id = i * sizeof(VRingPackedDesc) +
626 offsetof(VRingPackedDesc, id);
627 hwaddr off_len = i * sizeof(VRingPackedDesc) +
628 offsetof(VRingPackedDesc, len);
629
630 virtio_tswap32s(vdev, &desc->len);
631 virtio_tswap16s(vdev, &desc->id);
632 address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
633 address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
634 address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
635 address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
636 }
637
638 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
639 VRingPackedDesc *desc,
640 MemoryRegionCache *cache,
641 int i)
642 {
643 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
644
645 virtio_stw_phys_cached(vdev, cache, off, desc->flags);
646 address_space_cache_invalidate(cache, off, sizeof(desc->flags));
647 }
648
649 static void vring_packed_desc_write(VirtIODevice *vdev,
650 VRingPackedDesc *desc,
651 MemoryRegionCache *cache,
652 int i, bool strict_order)
653 {
654 vring_packed_desc_write_data(vdev, desc, cache, i);
655 if (strict_order) {
656 /* Make sure data is wrote before flags. */
657 smp_wmb();
658 }
659 vring_packed_desc_write_flags(vdev, desc, cache, i);
660 }
661
662 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
663 {
664 bool avail, used;
665
666 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
667 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
668 return (avail != used) && (avail == wrap_counter);
669 }
670
671 /* Fetch avail_idx from VQ memory only when we really need to know if
672 * guest has added some buffers.
673 * Called within rcu_read_lock(). */
674 static int virtio_queue_empty_rcu(VirtQueue *vq)
675 {
676 if (virtio_device_disabled(vq->vdev)) {
677 return 1;
678 }
679
680 if (unlikely(!vq->vring.avail)) {
681 return 1;
682 }
683
684 if (vq->shadow_avail_idx != vq->last_avail_idx) {
685 return 0;
686 }
687
688 return vring_avail_idx(vq) == vq->last_avail_idx;
689 }
690
691 static int virtio_queue_split_empty(VirtQueue *vq)
692 {
693 bool empty;
694
695 if (virtio_device_disabled(vq->vdev)) {
696 return 1;
697 }
698
699 if (unlikely(!vq->vring.avail)) {
700 return 1;
701 }
702
703 if (vq->shadow_avail_idx != vq->last_avail_idx) {
704 return 0;
705 }
706
707 RCU_READ_LOCK_GUARD();
708 empty = vring_avail_idx(vq) == vq->last_avail_idx;
709 return empty;
710 }
711
712 /* Called within rcu_read_lock(). */
713 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
714 {
715 struct VRingPackedDesc desc;
716 VRingMemoryRegionCaches *cache;
717
718 if (unlikely(!vq->vring.desc)) {
719 return 1;
720 }
721
722 cache = vring_get_region_caches(vq);
723 if (!cache) {
724 return 1;
725 }
726
727 vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
728 vq->last_avail_idx);
729
730 return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
731 }
732
733 static int virtio_queue_packed_empty(VirtQueue *vq)
734 {
735 RCU_READ_LOCK_GUARD();
736 return virtio_queue_packed_empty_rcu(vq);
737 }
738
739 int virtio_queue_empty(VirtQueue *vq)
740 {
741 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
742 return virtio_queue_packed_empty(vq);
743 } else {
744 return virtio_queue_split_empty(vq);
745 }
746 }
747
748 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
749 unsigned int len)
750 {
751 AddressSpace *dma_as = vq->vdev->dma_as;
752 unsigned int offset;
753 int i;
754
755 offset = 0;
756 for (i = 0; i < elem->in_num; i++) {
757 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
758
759 dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
760 elem->in_sg[i].iov_len,
761 DMA_DIRECTION_FROM_DEVICE, size);
762
763 offset += size;
764 }
765
766 for (i = 0; i < elem->out_num; i++)
767 dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
768 elem->out_sg[i].iov_len,
769 DMA_DIRECTION_TO_DEVICE,
770 elem->out_sg[i].iov_len);
771 }
772
773 /* virtqueue_detach_element:
774 * @vq: The #VirtQueue
775 * @elem: The #VirtQueueElement
776 * @len: number of bytes written
777 *
778 * Detach the element from the virtqueue. This function is suitable for device
779 * reset or other situations where a #VirtQueueElement is simply freed and will
780 * not be pushed or discarded.
781 */
782 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
783 unsigned int len)
784 {
785 vq->inuse -= elem->ndescs;
786 virtqueue_unmap_sg(vq, elem, len);
787 }
788
789 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
790 {
791 vq->last_avail_idx -= num;
792 }
793
794 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
795 {
796 if (vq->last_avail_idx < num) {
797 vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
798 vq->last_avail_wrap_counter ^= 1;
799 } else {
800 vq->last_avail_idx -= num;
801 }
802 }
803
804 /* virtqueue_unpop:
805 * @vq: The #VirtQueue
806 * @elem: The #VirtQueueElement
807 * @len: number of bytes written
808 *
809 * Pretend the most recent element wasn't popped from the virtqueue. The next
810 * call to virtqueue_pop() will refetch the element.
811 */
812 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
813 unsigned int len)
814 {
815
816 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
817 virtqueue_packed_rewind(vq, 1);
818 } else {
819 virtqueue_split_rewind(vq, 1);
820 }
821
822 virtqueue_detach_element(vq, elem, len);
823 }
824
825 /* virtqueue_rewind:
826 * @vq: The #VirtQueue
827 * @num: Number of elements to push back
828 *
829 * Pretend that elements weren't popped from the virtqueue. The next
830 * virtqueue_pop() will refetch the oldest element.
831 *
832 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
833 *
834 * Returns: true on success, false if @num is greater than the number of in use
835 * elements.
836 */
837 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
838 {
839 if (num > vq->inuse) {
840 return false;
841 }
842
843 vq->inuse -= num;
844 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
845 virtqueue_packed_rewind(vq, num);
846 } else {
847 virtqueue_split_rewind(vq, num);
848 }
849 return true;
850 }
851
852 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
853 unsigned int len, unsigned int idx)
854 {
855 VRingUsedElem uelem;
856
857 if (unlikely(!vq->vring.used)) {
858 return;
859 }
860
861 idx = (idx + vq->used_idx) % vq->vring.num;
862
863 uelem.id = elem->index;
864 uelem.len = len;
865 vring_used_write(vq, &uelem, idx);
866 }
867
868 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
869 unsigned int len, unsigned int idx)
870 {
871 vq->used_elems[idx].index = elem->index;
872 vq->used_elems[idx].len = len;
873 vq->used_elems[idx].ndescs = elem->ndescs;
874 }
875
876 static void virtqueue_packed_fill_desc(VirtQueue *vq,
877 const VirtQueueElement *elem,
878 unsigned int idx,
879 bool strict_order)
880 {
881 uint16_t head;
882 VRingMemoryRegionCaches *caches;
883 VRingPackedDesc desc = {
884 .id = elem->index,
885 .len = elem->len,
886 };
887 bool wrap_counter = vq->used_wrap_counter;
888
889 if (unlikely(!vq->vring.desc)) {
890 return;
891 }
892
893 head = vq->used_idx + idx;
894 if (head >= vq->vring.num) {
895 head -= vq->vring.num;
896 wrap_counter ^= 1;
897 }
898 if (wrap_counter) {
899 desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
900 desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
901 } else {
902 desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
903 desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
904 }
905
906 caches = vring_get_region_caches(vq);
907 if (!caches) {
908 return;
909 }
910
911 vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
912 }
913
914 /* Called within rcu_read_lock(). */
915 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
916 unsigned int len, unsigned int idx)
917 {
918 trace_virtqueue_fill(vq, elem, len, idx);
919
920 virtqueue_unmap_sg(vq, elem, len);
921
922 if (virtio_device_disabled(vq->vdev)) {
923 return;
924 }
925
926 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
927 virtqueue_packed_fill(vq, elem, len, idx);
928 } else {
929 virtqueue_split_fill(vq, elem, len, idx);
930 }
931 }
932
933 /* Called within rcu_read_lock(). */
934 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
935 {
936 uint16_t old, new;
937
938 if (unlikely(!vq->vring.used)) {
939 return;
940 }
941
942 /* Make sure buffer is written before we update index. */
943 smp_wmb();
944 trace_virtqueue_flush(vq, count);
945 old = vq->used_idx;
946 new = old + count;
947 vring_used_idx_set(vq, new);
948 vq->inuse -= count;
949 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
950 vq->signalled_used_valid = false;
951 }
952
953 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
954 {
955 unsigned int i, ndescs = 0;
956
957 if (unlikely(!vq->vring.desc)) {
958 return;
959 }
960
961 for (i = 1; i < count; i++) {
962 virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
963 ndescs += vq->used_elems[i].ndescs;
964 }
965 virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
966 ndescs += vq->used_elems[0].ndescs;
967
968 vq->inuse -= ndescs;
969 vq->used_idx += ndescs;
970 if (vq->used_idx >= vq->vring.num) {
971 vq->used_idx -= vq->vring.num;
972 vq->used_wrap_counter ^= 1;
973 vq->signalled_used_valid = false;
974 }
975 }
976
977 void virtqueue_flush(VirtQueue *vq, unsigned int count)
978 {
979 if (virtio_device_disabled(vq->vdev)) {
980 vq->inuse -= count;
981 return;
982 }
983
984 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
985 virtqueue_packed_flush(vq, count);
986 } else {
987 virtqueue_split_flush(vq, count);
988 }
989 }
990
991 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
992 unsigned int len)
993 {
994 RCU_READ_LOCK_GUARD();
995 virtqueue_fill(vq, elem, len, 0);
996 virtqueue_flush(vq, 1);
997 }
998
999 /* Called within rcu_read_lock(). */
1000 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1001 {
1002 uint16_t num_heads = vring_avail_idx(vq) - idx;
1003
1004 /* Check it isn't doing very strange things with descriptor numbers. */
1005 if (num_heads > vq->vring.num) {
1006 virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1007 idx, vq->shadow_avail_idx);
1008 return -EINVAL;
1009 }
1010 /* On success, callers read a descriptor at vq->last_avail_idx.
1011 * Make sure descriptor read does not bypass avail index read. */
1012 if (num_heads) {
1013 smp_rmb();
1014 }
1015
1016 return num_heads;
1017 }
1018
1019 /* Called within rcu_read_lock(). */
1020 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1021 unsigned int *head)
1022 {
1023 /* Grab the next descriptor number they're advertising, and increment
1024 * the index we've seen. */
1025 *head = vring_avail_ring(vq, idx % vq->vring.num);
1026
1027 /* If their number is silly, that's a fatal mistake. */
1028 if (*head >= vq->vring.num) {
1029 virtio_error(vq->vdev, "Guest says index %u is available", *head);
1030 return false;
1031 }
1032
1033 return true;
1034 }
1035
1036 enum {
1037 VIRTQUEUE_READ_DESC_ERROR = -1,
1038 VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
1039 VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
1040 };
1041
1042 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1043 MemoryRegionCache *desc_cache,
1044 unsigned int max, unsigned int *next)
1045 {
1046 /* If this descriptor says it doesn't chain, we're done. */
1047 if (!(desc->flags & VRING_DESC_F_NEXT)) {
1048 return VIRTQUEUE_READ_DESC_DONE;
1049 }
1050
1051 /* Check they're not leading us off end of descriptors. */
1052 *next = desc->next;
1053 /* Make sure compiler knows to grab that: we don't want it changing! */
1054 smp_wmb();
1055
1056 if (*next >= max) {
1057 virtio_error(vdev, "Desc next is %u", *next);
1058 return VIRTQUEUE_READ_DESC_ERROR;
1059 }
1060
1061 vring_split_desc_read(vdev, desc, desc_cache, *next);
1062 return VIRTQUEUE_READ_DESC_MORE;
1063 }
1064
1065 /* Called within rcu_read_lock(). */
1066 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1067 unsigned int *in_bytes, unsigned int *out_bytes,
1068 unsigned max_in_bytes, unsigned max_out_bytes,
1069 VRingMemoryRegionCaches *caches)
1070 {
1071 VirtIODevice *vdev = vq->vdev;
1072 unsigned int idx;
1073 unsigned int total_bufs, in_total, out_total;
1074 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1075 int64_t len = 0;
1076 int rc;
1077
1078 idx = vq->last_avail_idx;
1079 total_bufs = in_total = out_total = 0;
1080
1081 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1082 MemoryRegionCache *desc_cache = &caches->desc;
1083 unsigned int num_bufs;
1084 VRingDesc desc;
1085 unsigned int i;
1086 unsigned int max = vq->vring.num;
1087
1088 num_bufs = total_bufs;
1089
1090 if (!virtqueue_get_head(vq, idx++, &i)) {
1091 goto err;
1092 }
1093
1094 vring_split_desc_read(vdev, &desc, desc_cache, i);
1095
1096 if (desc.flags & VRING_DESC_F_INDIRECT) {
1097 if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1098 virtio_error(vdev, "Invalid size for indirect buffer table");
1099 goto err;
1100 }
1101
1102 /* If we've got too many, that implies a descriptor loop. */
1103 if (num_bufs >= max) {
1104 virtio_error(vdev, "Looped descriptor");
1105 goto err;
1106 }
1107
1108 /* loop over the indirect descriptor table */
1109 len = address_space_cache_init(&indirect_desc_cache,
1110 vdev->dma_as,
1111 desc.addr, desc.len, false);
1112 desc_cache = &indirect_desc_cache;
1113 if (len < desc.len) {
1114 virtio_error(vdev, "Cannot map indirect buffer");
1115 goto err;
1116 }
1117
1118 max = desc.len / sizeof(VRingDesc);
1119 num_bufs = i = 0;
1120 vring_split_desc_read(vdev, &desc, desc_cache, i);
1121 }
1122
1123 do {
1124 /* If we've got too many, that implies a descriptor loop. */
1125 if (++num_bufs > max) {
1126 virtio_error(vdev, "Looped descriptor");
1127 goto err;
1128 }
1129
1130 if (desc.flags & VRING_DESC_F_WRITE) {
1131 in_total += desc.len;
1132 } else {
1133 out_total += desc.len;
1134 }
1135 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1136 goto done;
1137 }
1138
1139 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1140 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1141
1142 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1143 goto err;
1144 }
1145
1146 if (desc_cache == &indirect_desc_cache) {
1147 address_space_cache_destroy(&indirect_desc_cache);
1148 total_bufs++;
1149 } else {
1150 total_bufs = num_bufs;
1151 }
1152 }
1153
1154 if (rc < 0) {
1155 goto err;
1156 }
1157
1158 done:
1159 address_space_cache_destroy(&indirect_desc_cache);
1160 if (in_bytes) {
1161 *in_bytes = in_total;
1162 }
1163 if (out_bytes) {
1164 *out_bytes = out_total;
1165 }
1166 return;
1167
1168 err:
1169 in_total = out_total = 0;
1170 goto done;
1171 }
1172
1173 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1174 VRingPackedDesc *desc,
1175 MemoryRegionCache
1176 *desc_cache,
1177 unsigned int max,
1178 unsigned int *next,
1179 bool indirect)
1180 {
1181 /* If this descriptor says it doesn't chain, we're done. */
1182 if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1183 return VIRTQUEUE_READ_DESC_DONE;
1184 }
1185
1186 ++*next;
1187 if (*next == max) {
1188 if (indirect) {
1189 return VIRTQUEUE_READ_DESC_DONE;
1190 } else {
1191 (*next) -= vq->vring.num;
1192 }
1193 }
1194
1195 vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1196 return VIRTQUEUE_READ_DESC_MORE;
1197 }
1198
1199 /* Called within rcu_read_lock(). */
1200 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1201 unsigned int *in_bytes,
1202 unsigned int *out_bytes,
1203 unsigned max_in_bytes,
1204 unsigned max_out_bytes,
1205 VRingMemoryRegionCaches *caches)
1206 {
1207 VirtIODevice *vdev = vq->vdev;
1208 unsigned int idx;
1209 unsigned int total_bufs, in_total, out_total;
1210 MemoryRegionCache *desc_cache;
1211 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1212 int64_t len = 0;
1213 VRingPackedDesc desc;
1214 bool wrap_counter;
1215
1216 idx = vq->last_avail_idx;
1217 wrap_counter = vq->last_avail_wrap_counter;
1218 total_bufs = in_total = out_total = 0;
1219
1220 for (;;) {
1221 unsigned int num_bufs = total_bufs;
1222 unsigned int i = idx;
1223 int rc;
1224 unsigned int max = vq->vring.num;
1225
1226 desc_cache = &caches->desc;
1227
1228 vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1229 if (!is_desc_avail(desc.flags, wrap_counter)) {
1230 break;
1231 }
1232
1233 if (desc.flags & VRING_DESC_F_INDIRECT) {
1234 if (desc.len % sizeof(VRingPackedDesc)) {
1235 virtio_error(vdev, "Invalid size for indirect buffer table");
1236 goto err;
1237 }
1238
1239 /* If we've got too many, that implies a descriptor loop. */
1240 if (num_bufs >= max) {
1241 virtio_error(vdev, "Looped descriptor");
1242 goto err;
1243 }
1244
1245 /* loop over the indirect descriptor table */
1246 len = address_space_cache_init(&indirect_desc_cache,
1247 vdev->dma_as,
1248 desc.addr, desc.len, false);
1249 desc_cache = &indirect_desc_cache;
1250 if (len < desc.len) {
1251 virtio_error(vdev, "Cannot map indirect buffer");
1252 goto err;
1253 }
1254
1255 max = desc.len / sizeof(VRingPackedDesc);
1256 num_bufs = i = 0;
1257 vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1258 }
1259
1260 do {
1261 /* If we've got too many, that implies a descriptor loop. */
1262 if (++num_bufs > max) {
1263 virtio_error(vdev, "Looped descriptor");
1264 goto err;
1265 }
1266
1267 if (desc.flags & VRING_DESC_F_WRITE) {
1268 in_total += desc.len;
1269 } else {
1270 out_total += desc.len;
1271 }
1272 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1273 goto done;
1274 }
1275
1276 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1277 &i, desc_cache ==
1278 &indirect_desc_cache);
1279 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1280
1281 if (desc_cache == &indirect_desc_cache) {
1282 address_space_cache_destroy(&indirect_desc_cache);
1283 total_bufs++;
1284 idx++;
1285 } else {
1286 idx += num_bufs - total_bufs;
1287 total_bufs = num_bufs;
1288 }
1289
1290 if (idx >= vq->vring.num) {
1291 idx -= vq->vring.num;
1292 wrap_counter ^= 1;
1293 }
1294 }
1295
1296 /* Record the index and wrap counter for a kick we want */
1297 vq->shadow_avail_idx = idx;
1298 vq->shadow_avail_wrap_counter = wrap_counter;
1299 done:
1300 address_space_cache_destroy(&indirect_desc_cache);
1301 if (in_bytes) {
1302 *in_bytes = in_total;
1303 }
1304 if (out_bytes) {
1305 *out_bytes = out_total;
1306 }
1307 return;
1308
1309 err:
1310 in_total = out_total = 0;
1311 goto done;
1312 }
1313
1314 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1315 unsigned int *out_bytes,
1316 unsigned max_in_bytes, unsigned max_out_bytes)
1317 {
1318 uint16_t desc_size;
1319 VRingMemoryRegionCaches *caches;
1320
1321 RCU_READ_LOCK_GUARD();
1322
1323 if (unlikely(!vq->vring.desc)) {
1324 goto err;
1325 }
1326
1327 caches = vring_get_region_caches(vq);
1328 if (!caches) {
1329 goto err;
1330 }
1331
1332 desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1333 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1334 if (caches->desc.len < vq->vring.num * desc_size) {
1335 virtio_error(vq->vdev, "Cannot map descriptor ring");
1336 goto err;
1337 }
1338
1339 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1340 virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1341 max_in_bytes, max_out_bytes,
1342 caches);
1343 } else {
1344 virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1345 max_in_bytes, max_out_bytes,
1346 caches);
1347 }
1348
1349 return;
1350 err:
1351 if (in_bytes) {
1352 *in_bytes = 0;
1353 }
1354 if (out_bytes) {
1355 *out_bytes = 0;
1356 }
1357 }
1358
1359 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1360 unsigned int out_bytes)
1361 {
1362 unsigned int in_total, out_total;
1363
1364 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1365 return in_bytes <= in_total && out_bytes <= out_total;
1366 }
1367
1368 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1369 hwaddr *addr, struct iovec *iov,
1370 unsigned int max_num_sg, bool is_write,
1371 hwaddr pa, size_t sz)
1372 {
1373 bool ok = false;
1374 unsigned num_sg = *p_num_sg;
1375 assert(num_sg <= max_num_sg);
1376
1377 if (!sz) {
1378 virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1379 goto out;
1380 }
1381
1382 while (sz) {
1383 hwaddr len = sz;
1384
1385 if (num_sg == max_num_sg) {
1386 virtio_error(vdev, "virtio: too many write descriptors in "
1387 "indirect table");
1388 goto out;
1389 }
1390
1391 iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1392 is_write ?
1393 DMA_DIRECTION_FROM_DEVICE :
1394 DMA_DIRECTION_TO_DEVICE,
1395 MEMTXATTRS_UNSPECIFIED);
1396 if (!iov[num_sg].iov_base) {
1397 virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1398 goto out;
1399 }
1400
1401 iov[num_sg].iov_len = len;
1402 addr[num_sg] = pa;
1403
1404 sz -= len;
1405 pa += len;
1406 num_sg++;
1407 }
1408 ok = true;
1409
1410 out:
1411 *p_num_sg = num_sg;
1412 return ok;
1413 }
1414
1415 /* Only used by error code paths before we have a VirtQueueElement (therefore
1416 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
1417 * yet.
1418 */
1419 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1420 struct iovec *iov)
1421 {
1422 unsigned int i;
1423
1424 for (i = 0; i < out_num + in_num; i++) {
1425 int is_write = i >= out_num;
1426
1427 cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1428 iov++;
1429 }
1430 }
1431
1432 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1433 hwaddr *addr, unsigned int num_sg,
1434 bool is_write)
1435 {
1436 unsigned int i;
1437 hwaddr len;
1438
1439 for (i = 0; i < num_sg; i++) {
1440 len = sg[i].iov_len;
1441 sg[i].iov_base = dma_memory_map(vdev->dma_as,
1442 addr[i], &len, is_write ?
1443 DMA_DIRECTION_FROM_DEVICE :
1444 DMA_DIRECTION_TO_DEVICE,
1445 MEMTXATTRS_UNSPECIFIED);
1446 if (!sg[i].iov_base) {
1447 error_report("virtio: error trying to map MMIO memory");
1448 exit(1);
1449 }
1450 if (len != sg[i].iov_len) {
1451 error_report("virtio: unexpected memory split");
1452 exit(1);
1453 }
1454 }
1455 }
1456
1457 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1458 {
1459 virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1460 virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1461 false);
1462 }
1463
1464 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1465 {
1466 VirtQueueElement *elem;
1467 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1468 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1469 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1470 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1471 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1472 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1473
1474 assert(sz >= sizeof(VirtQueueElement));
1475 elem = g_malloc(out_sg_end);
1476 trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1477 elem->out_num = out_num;
1478 elem->in_num = in_num;
1479 elem->in_addr = (void *)elem + in_addr_ofs;
1480 elem->out_addr = (void *)elem + out_addr_ofs;
1481 elem->in_sg = (void *)elem + in_sg_ofs;
1482 elem->out_sg = (void *)elem + out_sg_ofs;
1483 return elem;
1484 }
1485
1486 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1487 {
1488 unsigned int i, head, max;
1489 VRingMemoryRegionCaches *caches;
1490 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1491 MemoryRegionCache *desc_cache;
1492 int64_t len;
1493 VirtIODevice *vdev = vq->vdev;
1494 VirtQueueElement *elem = NULL;
1495 unsigned out_num, in_num, elem_entries;
1496 hwaddr addr[VIRTQUEUE_MAX_SIZE];
1497 struct iovec iov[VIRTQUEUE_MAX_SIZE];
1498 VRingDesc desc;
1499 int rc;
1500
1501 RCU_READ_LOCK_GUARD();
1502 if (virtio_queue_empty_rcu(vq)) {
1503 goto done;
1504 }
1505 /* Needed after virtio_queue_empty(), see comment in
1506 * virtqueue_num_heads(). */
1507 smp_rmb();
1508
1509 /* When we start there are none of either input nor output. */
1510 out_num = in_num = elem_entries = 0;
1511
1512 max = vq->vring.num;
1513
1514 if (vq->inuse >= vq->vring.num) {
1515 virtio_error(vdev, "Virtqueue size exceeded");
1516 goto done;
1517 }
1518
1519 if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1520 goto done;
1521 }
1522
1523 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1524 vring_set_avail_event(vq, vq->last_avail_idx);
1525 }
1526
1527 i = head;
1528
1529 caches = vring_get_region_caches(vq);
1530 if (!caches) {
1531 virtio_error(vdev, "Region caches not initialized");
1532 goto done;
1533 }
1534
1535 if (caches->desc.len < max * sizeof(VRingDesc)) {
1536 virtio_error(vdev, "Cannot map descriptor ring");
1537 goto done;
1538 }
1539
1540 desc_cache = &caches->desc;
1541 vring_split_desc_read(vdev, &desc, desc_cache, i);
1542 if (desc.flags & VRING_DESC_F_INDIRECT) {
1543 if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1544 virtio_error(vdev, "Invalid size for indirect buffer table");
1545 goto done;
1546 }
1547
1548 /* loop over the indirect descriptor table */
1549 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1550 desc.addr, desc.len, false);
1551 desc_cache = &indirect_desc_cache;
1552 if (len < desc.len) {
1553 virtio_error(vdev, "Cannot map indirect buffer");
1554 goto done;
1555 }
1556
1557 max = desc.len / sizeof(VRingDesc);
1558 i = 0;
1559 vring_split_desc_read(vdev, &desc, desc_cache, i);
1560 }
1561
1562 /* Collect all the descriptors */
1563 do {
1564 bool map_ok;
1565
1566 if (desc.flags & VRING_DESC_F_WRITE) {
1567 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1568 iov + out_num,
1569 VIRTQUEUE_MAX_SIZE - out_num, true,
1570 desc.addr, desc.len);
1571 } else {
1572 if (in_num) {
1573 virtio_error(vdev, "Incorrect order for descriptors");
1574 goto err_undo_map;
1575 }
1576 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1577 VIRTQUEUE_MAX_SIZE, false,
1578 desc.addr, desc.len);
1579 }
1580 if (!map_ok) {
1581 goto err_undo_map;
1582 }
1583
1584 /* If we've got too many, that implies a descriptor loop. */
1585 if (++elem_entries > max) {
1586 virtio_error(vdev, "Looped descriptor");
1587 goto err_undo_map;
1588 }
1589
1590 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1591 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1592
1593 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1594 goto err_undo_map;
1595 }
1596
1597 /* Now copy what we have collected and mapped */
1598 elem = virtqueue_alloc_element(sz, out_num, in_num);
1599 elem->index = head;
1600 elem->ndescs = 1;
1601 for (i = 0; i < out_num; i++) {
1602 elem->out_addr[i] = addr[i];
1603 elem->out_sg[i] = iov[i];
1604 }
1605 for (i = 0; i < in_num; i++) {
1606 elem->in_addr[i] = addr[out_num + i];
1607 elem->in_sg[i] = iov[out_num + i];
1608 }
1609
1610 vq->inuse++;
1611
1612 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1613 done:
1614 address_space_cache_destroy(&indirect_desc_cache);
1615
1616 return elem;
1617
1618 err_undo_map:
1619 virtqueue_undo_map_desc(out_num, in_num, iov);
1620 goto done;
1621 }
1622
1623 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1624 {
1625 unsigned int i, max;
1626 VRingMemoryRegionCaches *caches;
1627 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1628 MemoryRegionCache *desc_cache;
1629 int64_t len;
1630 VirtIODevice *vdev = vq->vdev;
1631 VirtQueueElement *elem = NULL;
1632 unsigned out_num, in_num, elem_entries;
1633 hwaddr addr[VIRTQUEUE_MAX_SIZE];
1634 struct iovec iov[VIRTQUEUE_MAX_SIZE];
1635 VRingPackedDesc desc;
1636 uint16_t id;
1637 int rc;
1638
1639 RCU_READ_LOCK_GUARD();
1640 if (virtio_queue_packed_empty_rcu(vq)) {
1641 goto done;
1642 }
1643
1644 /* When we start there are none of either input nor output. */
1645 out_num = in_num = elem_entries = 0;
1646
1647 max = vq->vring.num;
1648
1649 if (vq->inuse >= vq->vring.num) {
1650 virtio_error(vdev, "Virtqueue size exceeded");
1651 goto done;
1652 }
1653
1654 i = vq->last_avail_idx;
1655
1656 caches = vring_get_region_caches(vq);
1657 if (!caches) {
1658 virtio_error(vdev, "Region caches not initialized");
1659 goto done;
1660 }
1661
1662 if (caches->desc.len < max * sizeof(VRingDesc)) {
1663 virtio_error(vdev, "Cannot map descriptor ring");
1664 goto done;
1665 }
1666
1667 desc_cache = &caches->desc;
1668 vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1669 id = desc.id;
1670 if (desc.flags & VRING_DESC_F_INDIRECT) {
1671 if (desc.len % sizeof(VRingPackedDesc)) {
1672 virtio_error(vdev, "Invalid size for indirect buffer table");
1673 goto done;
1674 }
1675
1676 /* loop over the indirect descriptor table */
1677 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1678 desc.addr, desc.len, false);
1679 desc_cache = &indirect_desc_cache;
1680 if (len < desc.len) {
1681 virtio_error(vdev, "Cannot map indirect buffer");
1682 goto done;
1683 }
1684
1685 max = desc.len / sizeof(VRingPackedDesc);
1686 i = 0;
1687 vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1688 }
1689
1690 /* Collect all the descriptors */
1691 do {
1692 bool map_ok;
1693
1694 if (desc.flags & VRING_DESC_F_WRITE) {
1695 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1696 iov + out_num,
1697 VIRTQUEUE_MAX_SIZE - out_num, true,
1698 desc.addr, desc.len);
1699 } else {
1700 if (in_num) {
1701 virtio_error(vdev, "Incorrect order for descriptors");
1702 goto err_undo_map;
1703 }
1704 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1705 VIRTQUEUE_MAX_SIZE, false,
1706 desc.addr, desc.len);
1707 }
1708 if (!map_ok) {
1709 goto err_undo_map;
1710 }
1711
1712 /* If we've got too many, that implies a descriptor loop. */
1713 if (++elem_entries > max) {
1714 virtio_error(vdev, "Looped descriptor");
1715 goto err_undo_map;
1716 }
1717
1718 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1719 desc_cache ==
1720 &indirect_desc_cache);
1721 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1722
1723 /* Now copy what we have collected and mapped */
1724 elem = virtqueue_alloc_element(sz, out_num, in_num);
1725 for (i = 0; i < out_num; i++) {
1726 elem->out_addr[i] = addr[i];
1727 elem->out_sg[i] = iov[i];
1728 }
1729 for (i = 0; i < in_num; i++) {
1730 elem->in_addr[i] = addr[out_num + i];
1731 elem->in_sg[i] = iov[out_num + i];
1732 }
1733
1734 elem->index = id;
1735 elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1736 vq->last_avail_idx += elem->ndescs;
1737 vq->inuse += elem->ndescs;
1738
1739 if (vq->last_avail_idx >= vq->vring.num) {
1740 vq->last_avail_idx -= vq->vring.num;
1741 vq->last_avail_wrap_counter ^= 1;
1742 }
1743
1744 vq->shadow_avail_idx = vq->last_avail_idx;
1745 vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1746
1747 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1748 done:
1749 address_space_cache_destroy(&indirect_desc_cache);
1750
1751 return elem;
1752
1753 err_undo_map:
1754 virtqueue_undo_map_desc(out_num, in_num, iov);
1755 goto done;
1756 }
1757
1758 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1759 {
1760 if (virtio_device_disabled(vq->vdev)) {
1761 return NULL;
1762 }
1763
1764 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1765 return virtqueue_packed_pop(vq, sz);
1766 } else {
1767 return virtqueue_split_pop(vq, sz);
1768 }
1769 }
1770
1771 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1772 {
1773 VRingMemoryRegionCaches *caches;
1774 MemoryRegionCache *desc_cache;
1775 unsigned int dropped = 0;
1776 VirtQueueElement elem = {};
1777 VirtIODevice *vdev = vq->vdev;
1778 VRingPackedDesc desc;
1779
1780 RCU_READ_LOCK_GUARD();
1781
1782 caches = vring_get_region_caches(vq);
1783 if (!caches) {
1784 return 0;
1785 }
1786
1787 desc_cache = &caches->desc;
1788
1789 virtio_queue_set_notification(vq, 0);
1790
1791 while (vq->inuse < vq->vring.num) {
1792 unsigned int idx = vq->last_avail_idx;
1793 /*
1794 * works similar to virtqueue_pop but does not map buffers
1795 * and does not allocate any memory.
1796 */
1797 vring_packed_desc_read(vdev, &desc, desc_cache,
1798 vq->last_avail_idx , true);
1799 if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1800 break;
1801 }
1802 elem.index = desc.id;
1803 elem.ndescs = 1;
1804 while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1805 vq->vring.num, &idx, false)) {
1806 ++elem.ndescs;
1807 }
1808 /*
1809 * immediately push the element, nothing to unmap
1810 * as both in_num and out_num are set to 0.
1811 */
1812 virtqueue_push(vq, &elem, 0);
1813 dropped++;
1814 vq->last_avail_idx += elem.ndescs;
1815 if (vq->last_avail_idx >= vq->vring.num) {
1816 vq->last_avail_idx -= vq->vring.num;
1817 vq->last_avail_wrap_counter ^= 1;
1818 }
1819 }
1820
1821 return dropped;
1822 }
1823
1824 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1825 {
1826 unsigned int dropped = 0;
1827 VirtQueueElement elem = {};
1828 VirtIODevice *vdev = vq->vdev;
1829 bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1830
1831 while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1832 /* works similar to virtqueue_pop but does not map buffers
1833 * and does not allocate any memory */
1834 smp_rmb();
1835 if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1836 break;
1837 }
1838 vq->inuse++;
1839 vq->last_avail_idx++;
1840 if (fEventIdx) {
1841 vring_set_avail_event(vq, vq->last_avail_idx);
1842 }
1843 /* immediately push the element, nothing to unmap
1844 * as both in_num and out_num are set to 0 */
1845 virtqueue_push(vq, &elem, 0);
1846 dropped++;
1847 }
1848
1849 return dropped;
1850 }
1851
1852 /* virtqueue_drop_all:
1853 * @vq: The #VirtQueue
1854 * Drops all queued buffers and indicates them to the guest
1855 * as if they are done. Useful when buffers can not be
1856 * processed but must be returned to the guest.
1857 */
1858 unsigned int virtqueue_drop_all(VirtQueue *vq)
1859 {
1860 struct VirtIODevice *vdev = vq->vdev;
1861
1862 if (virtio_device_disabled(vq->vdev)) {
1863 return 0;
1864 }
1865
1866 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1867 return virtqueue_packed_drop_all(vq);
1868 } else {
1869 return virtqueue_split_drop_all(vq);
1870 }
1871 }
1872
1873 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1874 * it is what QEMU has always done by mistake. We can change it sooner
1875 * or later by bumping the version number of the affected vm states.
1876 * In the meanwhile, since the in-memory layout of VirtQueueElement
1877 * has changed, we need to marshal to and from the layout that was
1878 * used before the change.
1879 */
1880 typedef struct VirtQueueElementOld {
1881 unsigned int index;
1882 unsigned int out_num;
1883 unsigned int in_num;
1884 hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1885 hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1886 struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1887 struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1888 } VirtQueueElementOld;
1889
1890 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1891 {
1892 VirtQueueElement *elem;
1893 VirtQueueElementOld data;
1894 int i;
1895
1896 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1897
1898 /* TODO: teach all callers that this can fail, and return failure instead
1899 * of asserting here.
1900 * This is just one thing (there are probably more) that must be
1901 * fixed before we can allow NDEBUG compilation.
1902 */
1903 assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1904 assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1905
1906 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1907 elem->index = data.index;
1908
1909 for (i = 0; i < elem->in_num; i++) {
1910 elem->in_addr[i] = data.in_addr[i];
1911 }
1912
1913 for (i = 0; i < elem->out_num; i++) {
1914 elem->out_addr[i] = data.out_addr[i];
1915 }
1916
1917 for (i = 0; i < elem->in_num; i++) {
1918 /* Base is overwritten by virtqueue_map. */
1919 elem->in_sg[i].iov_base = 0;
1920 elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1921 }
1922
1923 for (i = 0; i < elem->out_num; i++) {
1924 /* Base is overwritten by virtqueue_map. */
1925 elem->out_sg[i].iov_base = 0;
1926 elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1927 }
1928
1929 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1930 qemu_get_be32s(f, &elem->ndescs);
1931 }
1932
1933 virtqueue_map(vdev, elem);
1934 return elem;
1935 }
1936
1937 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1938 VirtQueueElement *elem)
1939 {
1940 VirtQueueElementOld data;
1941 int i;
1942
1943 memset(&data, 0, sizeof(data));
1944 data.index = elem->index;
1945 data.in_num = elem->in_num;
1946 data.out_num = elem->out_num;
1947
1948 for (i = 0; i < elem->in_num; i++) {
1949 data.in_addr[i] = elem->in_addr[i];
1950 }
1951
1952 for (i = 0; i < elem->out_num; i++) {
1953 data.out_addr[i] = elem->out_addr[i];
1954 }
1955
1956 for (i = 0; i < elem->in_num; i++) {
1957 /* Base is overwritten by virtqueue_map when loading. Do not
1958 * save it, as it would leak the QEMU address space layout. */
1959 data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1960 }
1961
1962 for (i = 0; i < elem->out_num; i++) {
1963 /* Do not save iov_base as above. */
1964 data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1965 }
1966
1967 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1968 qemu_put_be32s(f, &elem->ndescs);
1969 }
1970
1971 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1972 }
1973
1974 /* virtio device */
1975 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1976 {
1977 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1978 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1979
1980 if (virtio_device_disabled(vdev)) {
1981 return;
1982 }
1983
1984 if (k->notify) {
1985 k->notify(qbus->parent, vector);
1986 }
1987 }
1988
1989 void virtio_update_irq(VirtIODevice *vdev)
1990 {
1991 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1992 }
1993
1994 static int virtio_validate_features(VirtIODevice *vdev)
1995 {
1996 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1997
1998 if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1999 !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2000 return -EFAULT;
2001 }
2002
2003 if (k->validate_features) {
2004 return k->validate_features(vdev);
2005 } else {
2006 return 0;
2007 }
2008 }
2009
2010 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2011 {
2012 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2013 trace_virtio_set_status(vdev, val);
2014
2015 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2016 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2017 val & VIRTIO_CONFIG_S_FEATURES_OK) {
2018 int ret = virtio_validate_features(vdev);
2019
2020 if (ret) {
2021 return ret;
2022 }
2023 }
2024 }
2025
2026 if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2027 (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2028 virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2029 }
2030
2031 if (k->set_status) {
2032 k->set_status(vdev, val);
2033 }
2034 vdev->status = val;
2035
2036 return 0;
2037 }
2038
2039 static enum virtio_device_endian virtio_default_endian(void)
2040 {
2041 if (target_words_bigendian()) {
2042 return VIRTIO_DEVICE_ENDIAN_BIG;
2043 } else {
2044 return VIRTIO_DEVICE_ENDIAN_LITTLE;
2045 }
2046 }
2047
2048 static enum virtio_device_endian virtio_current_cpu_endian(void)
2049 {
2050 if (cpu_virtio_is_big_endian(current_cpu)) {
2051 return VIRTIO_DEVICE_ENDIAN_BIG;
2052 } else {
2053 return VIRTIO_DEVICE_ENDIAN_LITTLE;
2054 }
2055 }
2056
2057 static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2058 {
2059 vdev->vq[i].vring.desc = 0;
2060 vdev->vq[i].vring.avail = 0;
2061 vdev->vq[i].vring.used = 0;
2062 vdev->vq[i].last_avail_idx = 0;
2063 vdev->vq[i].shadow_avail_idx = 0;
2064 vdev->vq[i].used_idx = 0;
2065 vdev->vq[i].last_avail_wrap_counter = true;
2066 vdev->vq[i].shadow_avail_wrap_counter = true;
2067 vdev->vq[i].used_wrap_counter = true;
2068 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2069 vdev->vq[i].signalled_used = 0;
2070 vdev->vq[i].signalled_used_valid = false;
2071 vdev->vq[i].notification = true;
2072 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2073 vdev->vq[i].inuse = 0;
2074 virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2075 }
2076
2077 void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2078 {
2079 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2080
2081 if (k->queue_reset) {
2082 k->queue_reset(vdev, queue_index);
2083 }
2084
2085 __virtio_queue_reset(vdev, queue_index);
2086 }
2087
2088 void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2089 {
2090 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2091
2092 /*
2093 * TODO: Seabios is currently out of spec and triggering this error.
2094 * So this needs to be fixed in Seabios, then this can
2095 * be re-enabled for new machine types only, and also after
2096 * being converted to LOG_GUEST_ERROR.
2097 *
2098 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2099 error_report("queue_enable is only suppported in devices of virtio "
2100 "1.0 or later.");
2101 }
2102 */
2103
2104 if (k->queue_enable) {
2105 k->queue_enable(vdev, queue_index);
2106 }
2107 }
2108
2109 void virtio_reset(void *opaque)
2110 {
2111 VirtIODevice *vdev = opaque;
2112 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2113 int i;
2114
2115 virtio_set_status(vdev, 0);
2116 if (current_cpu) {
2117 /* Guest initiated reset */
2118 vdev->device_endian = virtio_current_cpu_endian();
2119 } else {
2120 /* System reset */
2121 vdev->device_endian = virtio_default_endian();
2122 }
2123
2124 if (k->reset) {
2125 k->reset(vdev);
2126 }
2127
2128 vdev->start_on_kick = false;
2129 vdev->started = false;
2130 vdev->broken = false;
2131 vdev->guest_features = 0;
2132 vdev->queue_sel = 0;
2133 vdev->status = 0;
2134 vdev->disabled = false;
2135 qatomic_set(&vdev->isr, 0);
2136 vdev->config_vector = VIRTIO_NO_VECTOR;
2137 virtio_notify_vector(vdev, vdev->config_vector);
2138
2139 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2140 __virtio_queue_reset(vdev, i);
2141 }
2142 }
2143
2144 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2145 {
2146 if (!vdev->vq[n].vring.num) {
2147 return;
2148 }
2149 vdev->vq[n].vring.desc = addr;
2150 virtio_queue_update_rings(vdev, n);
2151 }
2152
2153 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2154 {
2155 return vdev->vq[n].vring.desc;
2156 }
2157
2158 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2159 hwaddr avail, hwaddr used)
2160 {
2161 if (!vdev->vq[n].vring.num) {
2162 return;
2163 }
2164 vdev->vq[n].vring.desc = desc;
2165 vdev->vq[n].vring.avail = avail;
2166 vdev->vq[n].vring.used = used;
2167 virtio_init_region_cache(vdev, n);
2168 }
2169
2170 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2171 {
2172 /* Don't allow guest to flip queue between existent and
2173 * nonexistent states, or to set it to an invalid size.
2174 */
2175 if (!!num != !!vdev->vq[n].vring.num ||
2176 num > VIRTQUEUE_MAX_SIZE ||
2177 num < 0) {
2178 return;
2179 }
2180 vdev->vq[n].vring.num = num;
2181 }
2182
2183 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2184 {
2185 return QLIST_FIRST(&vdev->vector_queues[vector]);
2186 }
2187
2188 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2189 {
2190 return QLIST_NEXT(vq, node);
2191 }
2192
2193 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2194 {
2195 return vdev->vq[n].vring.num;
2196 }
2197
2198 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2199 {
2200 return vdev->vq[n].vring.num_default;
2201 }
2202
2203 int virtio_get_num_queues(VirtIODevice *vdev)
2204 {
2205 int i;
2206
2207 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2208 if (!virtio_queue_get_num(vdev, i)) {
2209 break;
2210 }
2211 }
2212
2213 return i;
2214 }
2215
2216 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2217 {
2218 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2219 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2220
2221 /* virtio-1 compliant devices cannot change the alignment */
2222 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2223 error_report("tried to modify queue alignment for virtio-1 device");
2224 return;
2225 }
2226 /* Check that the transport told us it was going to do this
2227 * (so a buggy transport will immediately assert rather than
2228 * silently failing to migrate this state)
2229 */
2230 assert(k->has_variable_vring_alignment);
2231
2232 if (align) {
2233 vdev->vq[n].vring.align = align;
2234 virtio_queue_update_rings(vdev, n);
2235 }
2236 }
2237
2238 static void virtio_queue_notify_vq(VirtQueue *vq)
2239 {
2240 if (vq->vring.desc && vq->handle_output) {
2241 VirtIODevice *vdev = vq->vdev;
2242
2243 if (unlikely(vdev->broken)) {
2244 return;
2245 }
2246
2247 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2248 vq->handle_output(vdev, vq);
2249
2250 if (unlikely(vdev->start_on_kick)) {
2251 virtio_set_started(vdev, true);
2252 }
2253 }
2254 }
2255
2256 void virtio_queue_notify(VirtIODevice *vdev, int n)
2257 {
2258 VirtQueue *vq = &vdev->vq[n];
2259
2260 if (unlikely(!vq->vring.desc || vdev->broken)) {
2261 return;
2262 }
2263
2264 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2265 if (vq->host_notifier_enabled) {
2266 event_notifier_set(&vq->host_notifier);
2267 } else if (vq->handle_output) {
2268 vq->handle_output(vdev, vq);
2269
2270 if (unlikely(vdev->start_on_kick)) {
2271 virtio_set_started(vdev, true);
2272 }
2273 }
2274 }
2275
2276 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2277 {
2278 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2279 VIRTIO_NO_VECTOR;
2280 }
2281
2282 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2283 {
2284 VirtQueue *vq = &vdev->vq[n];
2285
2286 if (n < VIRTIO_QUEUE_MAX) {
2287 if (vdev->vector_queues &&
2288 vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2289 QLIST_REMOVE(vq, node);
2290 }
2291 vdev->vq[n].vector = vector;
2292 if (vdev->vector_queues &&
2293 vector != VIRTIO_NO_VECTOR) {
2294 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2295 }
2296 }
2297 }
2298
2299 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2300 VirtIOHandleOutput handle_output)
2301 {
2302 int i;
2303
2304 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2305 if (vdev->vq[i].vring.num == 0)
2306 break;
2307 }
2308
2309 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2310 abort();
2311
2312 vdev->vq[i].vring.num = queue_size;
2313 vdev->vq[i].vring.num_default = queue_size;
2314 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2315 vdev->vq[i].handle_output = handle_output;
2316 vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2317
2318 return &vdev->vq[i];
2319 }
2320
2321 void virtio_delete_queue(VirtQueue *vq)
2322 {
2323 vq->vring.num = 0;
2324 vq->vring.num_default = 0;
2325 vq->handle_output = NULL;
2326 g_free(vq->used_elems);
2327 vq->used_elems = NULL;
2328 virtio_virtqueue_reset_region_cache(vq);
2329 }
2330
2331 void virtio_del_queue(VirtIODevice *vdev, int n)
2332 {
2333 if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2334 abort();
2335 }
2336
2337 virtio_delete_queue(&vdev->vq[n]);
2338 }
2339
2340 static void virtio_set_isr(VirtIODevice *vdev, int value)
2341 {
2342 uint8_t old = qatomic_read(&vdev->isr);
2343
2344 /* Do not write ISR if it does not change, so that its cacheline remains
2345 * shared in the common case where the guest does not read it.
2346 */
2347 if ((old & value) != value) {
2348 qatomic_or(&vdev->isr, value);
2349 }
2350 }
2351
2352 /* Called within rcu_read_lock(). */
2353 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2354 {
2355 uint16_t old, new;
2356 bool v;
2357 /* We need to expose used array entries before checking used event. */
2358 smp_mb();
2359 /* Always notify when queue is empty (when feature acknowledge) */
2360 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2361 !vq->inuse && virtio_queue_empty(vq)) {
2362 return true;
2363 }
2364
2365 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2366 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2367 }
2368
2369 v = vq->signalled_used_valid;
2370 vq->signalled_used_valid = true;
2371 old = vq->signalled_used;
2372 new = vq->signalled_used = vq->used_idx;
2373 return !v || vring_need_event(vring_get_used_event(vq), new, old);
2374 }
2375
2376 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2377 uint16_t off_wrap, uint16_t new,
2378 uint16_t old)
2379 {
2380 int off = off_wrap & ~(1 << 15);
2381
2382 if (wrap != off_wrap >> 15) {
2383 off -= vq->vring.num;
2384 }
2385
2386 return vring_need_event(off, new, old);
2387 }
2388
2389 /* Called within rcu_read_lock(). */
2390 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2391 {
2392 VRingPackedDescEvent e;
2393 uint16_t old, new;
2394 bool v;
2395 VRingMemoryRegionCaches *caches;
2396
2397 caches = vring_get_region_caches(vq);
2398 if (!caches) {
2399 return false;
2400 }
2401
2402 vring_packed_event_read(vdev, &caches->avail, &e);
2403
2404 old = vq->signalled_used;
2405 new = vq->signalled_used = vq->used_idx;
2406 v = vq->signalled_used_valid;
2407 vq->signalled_used_valid = true;
2408
2409 if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2410 return false;
2411 } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2412 return true;
2413 }
2414
2415 return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2416 e.off_wrap, new, old);
2417 }
2418
2419 /* Called within rcu_read_lock(). */
2420 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2421 {
2422 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2423 return virtio_packed_should_notify(vdev, vq);
2424 } else {
2425 return virtio_split_should_notify(vdev, vq);
2426 }
2427 }
2428
2429 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2430 {
2431 WITH_RCU_READ_LOCK_GUARD() {
2432 if (!virtio_should_notify(vdev, vq)) {
2433 return;
2434 }
2435 }
2436
2437 trace_virtio_notify_irqfd(vdev, vq);
2438
2439 /*
2440 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2441 * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2442 * incorrectly polling this bit during crashdump and hibernation
2443 * in MSI mode, causing a hang if this bit is never updated.
2444 * Recent releases of Windows do not really shut down, but rather
2445 * log out and hibernate to make the next startup faster. Hence,
2446 * this manifested as a more serious hang during shutdown with
2447 *
2448 * Next driver release from 2016 fixed this problem, so working around it
2449 * is not a must, but it's easy to do so let's do it here.
2450 *
2451 * Note: it's safe to update ISR from any thread as it was switched
2452 * to an atomic operation.
2453 */
2454 virtio_set_isr(vq->vdev, 0x1);
2455 event_notifier_set(&vq->guest_notifier);
2456 }
2457
2458 static void virtio_irq(VirtQueue *vq)
2459 {
2460 virtio_set_isr(vq->vdev, 0x1);
2461 virtio_notify_vector(vq->vdev, vq->vector);
2462 }
2463
2464 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2465 {
2466 WITH_RCU_READ_LOCK_GUARD() {
2467 if (!virtio_should_notify(vdev, vq)) {
2468 return;
2469 }
2470 }
2471
2472 trace_virtio_notify(vdev, vq);
2473 virtio_irq(vq);
2474 }
2475
2476 void virtio_notify_config(VirtIODevice *vdev)
2477 {
2478 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2479 return;
2480
2481 virtio_set_isr(vdev, 0x3);
2482 vdev->generation++;
2483 virtio_notify_vector(vdev, vdev->config_vector);
2484 }
2485
2486 static bool virtio_device_endian_needed(void *opaque)
2487 {
2488 VirtIODevice *vdev = opaque;
2489
2490 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2491 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2492 return vdev->device_endian != virtio_default_endian();
2493 }
2494 /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2495 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2496 }
2497
2498 static bool virtio_64bit_features_needed(void *opaque)
2499 {
2500 VirtIODevice *vdev = opaque;
2501
2502 return (vdev->host_features >> 32) != 0;
2503 }
2504
2505 static bool virtio_virtqueue_needed(void *opaque)
2506 {
2507 VirtIODevice *vdev = opaque;
2508
2509 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2510 }
2511
2512 static bool virtio_packed_virtqueue_needed(void *opaque)
2513 {
2514 VirtIODevice *vdev = opaque;
2515
2516 return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2517 }
2518
2519 static bool virtio_ringsize_needed(void *opaque)
2520 {
2521 VirtIODevice *vdev = opaque;
2522 int i;
2523
2524 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2525 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2526 return true;
2527 }
2528 }
2529 return false;
2530 }
2531
2532 static bool virtio_extra_state_needed(void *opaque)
2533 {
2534 VirtIODevice *vdev = opaque;
2535 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2536 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2537
2538 return k->has_extra_state &&
2539 k->has_extra_state(qbus->parent);
2540 }
2541
2542 static bool virtio_broken_needed(void *opaque)
2543 {
2544 VirtIODevice *vdev = opaque;
2545
2546 return vdev->broken;
2547 }
2548
2549 static bool virtio_started_needed(void *opaque)
2550 {
2551 VirtIODevice *vdev = opaque;
2552
2553 return vdev->started;
2554 }
2555
2556 static bool virtio_disabled_needed(void *opaque)
2557 {
2558 VirtIODevice *vdev = opaque;
2559
2560 return vdev->disabled;
2561 }
2562
2563 static const VMStateDescription vmstate_virtqueue = {
2564 .name = "virtqueue_state",
2565 .version_id = 1,
2566 .minimum_version_id = 1,
2567 .fields = (VMStateField[]) {
2568 VMSTATE_UINT64(vring.avail, struct VirtQueue),
2569 VMSTATE_UINT64(vring.used, struct VirtQueue),
2570 VMSTATE_END_OF_LIST()
2571 }
2572 };
2573
2574 static const VMStateDescription vmstate_packed_virtqueue = {
2575 .name = "packed_virtqueue_state",
2576 .version_id = 1,
2577 .minimum_version_id = 1,
2578 .fields = (VMStateField[]) {
2579 VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2580 VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2581 VMSTATE_UINT16(used_idx, struct VirtQueue),
2582 VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2583 VMSTATE_UINT32(inuse, struct VirtQueue),
2584 VMSTATE_END_OF_LIST()
2585 }
2586 };
2587
2588 static const VMStateDescription vmstate_virtio_virtqueues = {
2589 .name = "virtio/virtqueues",
2590 .version_id = 1,
2591 .minimum_version_id = 1,
2592 .needed = &virtio_virtqueue_needed,
2593 .fields = (VMStateField[]) {
2594 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2595 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2596 VMSTATE_END_OF_LIST()
2597 }
2598 };
2599
2600 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2601 .name = "virtio/packed_virtqueues",
2602 .version_id = 1,
2603 .minimum_version_id = 1,
2604 .needed = &virtio_packed_virtqueue_needed,
2605 .fields = (VMStateField[]) {
2606 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2607 VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2608 VMSTATE_END_OF_LIST()
2609 }
2610 };
2611
2612 static const VMStateDescription vmstate_ringsize = {
2613 .name = "ringsize_state",
2614 .version_id = 1,
2615 .minimum_version_id = 1,
2616 .fields = (VMStateField[]) {
2617 VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2618 VMSTATE_END_OF_LIST()
2619 }
2620 };
2621
2622 static const VMStateDescription vmstate_virtio_ringsize = {
2623 .name = "virtio/ringsize",
2624 .version_id = 1,
2625 .minimum_version_id = 1,
2626 .needed = &virtio_ringsize_needed,
2627 .fields = (VMStateField[]) {
2628 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2629 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2630 VMSTATE_END_OF_LIST()
2631 }
2632 };
2633
2634 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2635 const VMStateField *field)
2636 {
2637 VirtIODevice *vdev = pv;
2638 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2639 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2640
2641 if (!k->load_extra_state) {
2642 return -1;
2643 } else {
2644 return k->load_extra_state(qbus->parent, f);
2645 }
2646 }
2647
2648 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2649 const VMStateField *field, JSONWriter *vmdesc)
2650 {
2651 VirtIODevice *vdev = pv;
2652 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2653 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2654
2655 k->save_extra_state(qbus->parent, f);
2656 return 0;
2657 }
2658
2659 static const VMStateInfo vmstate_info_extra_state = {
2660 .name = "virtqueue_extra_state",
2661 .get = get_extra_state,
2662 .put = put_extra_state,
2663 };
2664
2665 static const VMStateDescription vmstate_virtio_extra_state = {
2666 .name = "virtio/extra_state",
2667 .version_id = 1,
2668 .minimum_version_id = 1,
2669 .needed = &virtio_extra_state_needed,
2670 .fields = (VMStateField[]) {
2671 {
2672 .name = "extra_state",
2673 .version_id = 0,
2674 .field_exists = NULL,
2675 .size = 0,
2676 .info = &vmstate_info_extra_state,
2677 .flags = VMS_SINGLE,
2678 .offset = 0,
2679 },
2680 VMSTATE_END_OF_LIST()
2681 }
2682 };
2683
2684 static const VMStateDescription vmstate_virtio_device_endian = {
2685 .name = "virtio/device_endian",
2686 .version_id = 1,
2687 .minimum_version_id = 1,
2688 .needed = &virtio_device_endian_needed,
2689 .fields = (VMStateField[]) {
2690 VMSTATE_UINT8(device_endian, VirtIODevice),
2691 VMSTATE_END_OF_LIST()
2692 }
2693 };
2694
2695 static const VMStateDescription vmstate_virtio_64bit_features = {
2696 .name = "virtio/64bit_features",
2697 .version_id = 1,
2698 .minimum_version_id = 1,
2699 .needed = &virtio_64bit_features_needed,
2700 .fields = (VMStateField[]) {
2701 VMSTATE_UINT64(guest_features, VirtIODevice),
2702 VMSTATE_END_OF_LIST()
2703 }
2704 };
2705
2706 static const VMStateDescription vmstate_virtio_broken = {
2707 .name = "virtio/broken",
2708 .version_id = 1,
2709 .minimum_version_id = 1,
2710 .needed = &virtio_broken_needed,
2711 .fields = (VMStateField[]) {
2712 VMSTATE_BOOL(broken, VirtIODevice),
2713 VMSTATE_END_OF_LIST()
2714 }
2715 };
2716
2717 static const VMStateDescription vmstate_virtio_started = {
2718 .name = "virtio/started",
2719 .version_id = 1,
2720 .minimum_version_id = 1,
2721 .needed = &virtio_started_needed,
2722 .fields = (VMStateField[]) {
2723 VMSTATE_BOOL(started, VirtIODevice),
2724 VMSTATE_END_OF_LIST()
2725 }
2726 };
2727
2728 static const VMStateDescription vmstate_virtio_disabled = {
2729 .name = "virtio/disabled",
2730 .version_id = 1,
2731 .minimum_version_id = 1,
2732 .needed = &virtio_disabled_needed,
2733 .fields = (VMStateField[]) {
2734 VMSTATE_BOOL(disabled, VirtIODevice),
2735 VMSTATE_END_OF_LIST()
2736 }
2737 };
2738
2739 static const VMStateDescription vmstate_virtio = {
2740 .name = "virtio",
2741 .version_id = 1,
2742 .minimum_version_id = 1,
2743 .fields = (VMStateField[]) {
2744 VMSTATE_END_OF_LIST()
2745 },
2746 .subsections = (const VMStateDescription*[]) {
2747 &vmstate_virtio_device_endian,
2748 &vmstate_virtio_64bit_features,
2749 &vmstate_virtio_virtqueues,
2750 &vmstate_virtio_ringsize,
2751 &vmstate_virtio_broken,
2752 &vmstate_virtio_extra_state,
2753 &vmstate_virtio_started,
2754 &vmstate_virtio_packed_virtqueues,
2755 &vmstate_virtio_disabled,
2756 NULL
2757 }
2758 };
2759
2760 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2761 {
2762 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2763 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2764 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2765 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2766 int i;
2767
2768 if (k->save_config) {
2769 k->save_config(qbus->parent, f);
2770 }
2771
2772 qemu_put_8s(f, &vdev->status);
2773 qemu_put_8s(f, &vdev->isr);
2774 qemu_put_be16s(f, &vdev->queue_sel);
2775 qemu_put_be32s(f, &guest_features_lo);
2776 qemu_put_be32(f, vdev->config_len);
2777 qemu_put_buffer(f, vdev->config, vdev->config_len);
2778
2779 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2780 if (vdev->vq[i].vring.num == 0)
2781 break;
2782 }
2783
2784 qemu_put_be32(f, i);
2785
2786 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2787 if (vdev->vq[i].vring.num == 0)
2788 break;
2789
2790 qemu_put_be32(f, vdev->vq[i].vring.num);
2791 if (k->has_variable_vring_alignment) {
2792 qemu_put_be32(f, vdev->vq[i].vring.align);
2793 }
2794 /*
2795 * Save desc now, the rest of the ring addresses are saved in
2796 * subsections for VIRTIO-1 devices.
2797 */
2798 qemu_put_be64(f, vdev->vq[i].vring.desc);
2799 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2800 if (k->save_queue) {
2801 k->save_queue(qbus->parent, i, f);
2802 }
2803 }
2804
2805 if (vdc->save != NULL) {
2806 vdc->save(vdev, f);
2807 }
2808
2809 if (vdc->vmsd) {
2810 int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2811 if (ret) {
2812 return ret;
2813 }
2814 }
2815
2816 /* Subsections */
2817 return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2818 }
2819
2820 /* A wrapper for use as a VMState .put function */
2821 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2822 const VMStateField *field, JSONWriter *vmdesc)
2823 {
2824 return virtio_save(VIRTIO_DEVICE(opaque), f);
2825 }
2826
2827 /* A wrapper for use as a VMState .get function */
2828 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2829 const VMStateField *field)
2830 {
2831 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2832 DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2833
2834 return virtio_load(vdev, f, dc->vmsd->version_id);
2835 }
2836
2837 const VMStateInfo virtio_vmstate_info = {
2838 .name = "virtio",
2839 .get = virtio_device_get,
2840 .put = virtio_device_put,
2841 };
2842
2843 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2844 {
2845 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2846 bool bad = (val & ~(vdev->host_features)) != 0;
2847
2848 val &= vdev->host_features;
2849 if (k->set_features) {
2850 k->set_features(vdev, val);
2851 }
2852 vdev->guest_features = val;
2853 return bad ? -1 : 0;
2854 }
2855
2856 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2857 {
2858 int ret;
2859 /*
2860 * The driver must not attempt to set features after feature negotiation
2861 * has finished.
2862 */
2863 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2864 return -EINVAL;
2865 }
2866
2867 if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
2868 qemu_log_mask(LOG_GUEST_ERROR,
2869 "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
2870 __func__, vdev->name);
2871 }
2872
2873 ret = virtio_set_features_nocheck(vdev, val);
2874 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2875 /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
2876 int i;
2877 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2878 if (vdev->vq[i].vring.num != 0) {
2879 virtio_init_region_cache(vdev, i);
2880 }
2881 }
2882 }
2883 if (!ret) {
2884 if (!virtio_device_started(vdev, vdev->status) &&
2885 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2886 vdev->start_on_kick = true;
2887 }
2888 }
2889 return ret;
2890 }
2891
2892 size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
2893 uint64_t host_features)
2894 {
2895 size_t config_size = params->min_size;
2896 const VirtIOFeature *feature_sizes = params->feature_sizes;
2897 size_t i;
2898
2899 for (i = 0; feature_sizes[i].flags != 0; i++) {
2900 if (host_features & feature_sizes[i].flags) {
2901 config_size = MAX(feature_sizes[i].end, config_size);
2902 }
2903 }
2904
2905 assert(config_size <= params->max_size);
2906 return config_size;
2907 }
2908
2909 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2910 {
2911 int i, ret;
2912 int32_t config_len;
2913 uint32_t num;
2914 uint32_t features;
2915 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2916 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2917 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2918
2919 /*
2920 * We poison the endianness to ensure it does not get used before
2921 * subsections have been loaded.
2922 */
2923 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
2924
2925 if (k->load_config) {
2926 ret = k->load_config(qbus->parent, f);
2927 if (ret)
2928 return ret;
2929 }
2930
2931 qemu_get_8s(f, &vdev->status);
2932 qemu_get_8s(f, &vdev->isr);
2933 qemu_get_be16s(f, &vdev->queue_sel);
2934 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
2935 return -1;
2936 }
2937 qemu_get_be32s(f, &features);
2938
2939 /*
2940 * Temporarily set guest_features low bits - needed by
2941 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
2942 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
2943 *
2944 * Note: devices should always test host features in future - don't create
2945 * new dependencies like this.
2946 */
2947 vdev->guest_features = features;
2948
2949 config_len = qemu_get_be32(f);
2950
2951 /*
2952 * There are cases where the incoming config can be bigger or smaller
2953 * than what we have; so load what we have space for, and skip
2954 * any excess that's in the stream.
2955 */
2956 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
2957
2958 while (config_len > vdev->config_len) {
2959 qemu_get_byte(f);
2960 config_len--;
2961 }
2962
2963 num = qemu_get_be32(f);
2964
2965 if (num > VIRTIO_QUEUE_MAX) {
2966 error_report("Invalid number of virtqueues: 0x%x", num);
2967 return -1;
2968 }
2969
2970 for (i = 0; i < num; i++) {
2971 vdev->vq[i].vring.num = qemu_get_be32(f);
2972 if (k->has_variable_vring_alignment) {
2973 vdev->vq[i].vring.align = qemu_get_be32(f);
2974 }
2975 vdev->vq[i].vring.desc = qemu_get_be64(f);
2976 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
2977 vdev->vq[i].signalled_used_valid = false;
2978 vdev->vq[i].notification = true;
2979
2980 if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
2981 error_report("VQ %d address 0x0 "
2982 "inconsistent with Host index 0x%x",
2983 i, vdev->vq[i].last_avail_idx);
2984 return -1;
2985 }
2986 if (k->load_queue) {
2987 ret = k->load_queue(qbus->parent, i, f);
2988 if (ret)
2989 return ret;
2990 }
2991 }
2992
2993 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2994
2995 if (vdc->load != NULL) {
2996 ret = vdc->load(vdev, f, version_id);
2997 if (ret) {
2998 return ret;
2999 }
3000 }
3001
3002 if (vdc->vmsd) {
3003 ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3004 if (ret) {
3005 return ret;
3006 }
3007 }
3008
3009 /* Subsections */
3010 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3011 if (ret) {
3012 return ret;
3013 }
3014
3015 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3016 vdev->device_endian = virtio_default_endian();
3017 }
3018
3019 if (virtio_64bit_features_needed(vdev)) {
3020 /*
3021 * Subsection load filled vdev->guest_features. Run them
3022 * through virtio_set_features to sanity-check them against
3023 * host_features.
3024 */
3025 uint64_t features64 = vdev->guest_features;
3026 if (virtio_set_features_nocheck(vdev, features64) < 0) {
3027 error_report("Features 0x%" PRIx64 " unsupported. "
3028 "Allowed features: 0x%" PRIx64,
3029 features64, vdev->host_features);
3030 return -1;
3031 }
3032 } else {
3033 if (virtio_set_features_nocheck(vdev, features) < 0) {
3034 error_report("Features 0x%x unsupported. "
3035 "Allowed features: 0x%" PRIx64,
3036 features, vdev->host_features);
3037 return -1;
3038 }
3039 }
3040
3041 if (!virtio_device_started(vdev, vdev->status) &&
3042 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3043 vdev->start_on_kick = true;
3044 }
3045
3046 RCU_READ_LOCK_GUARD();
3047 for (i = 0; i < num; i++) {
3048 if (vdev->vq[i].vring.desc) {
3049 uint16_t nheads;
3050
3051 /*
3052 * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3053 * only the region cache needs to be set up. Legacy devices need
3054 * to calculate used and avail ring addresses based on the desc
3055 * address.
3056 */
3057 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3058 virtio_init_region_cache(vdev, i);
3059 } else {
3060 virtio_queue_update_rings(vdev, i);
3061 }
3062
3063 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3064 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3065 vdev->vq[i].shadow_avail_wrap_counter =
3066 vdev->vq[i].last_avail_wrap_counter;
3067 continue;
3068 }
3069
3070 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3071 /* Check it isn't doing strange things with descriptor numbers. */
3072 if (nheads > vdev->vq[i].vring.num) {
3073 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3074 "inconsistent with Host index 0x%x: delta 0x%x",
3075 i, vdev->vq[i].vring.num,
3076 vring_avail_idx(&vdev->vq[i]),
3077 vdev->vq[i].last_avail_idx, nheads);
3078 vdev->vq[i].used_idx = 0;
3079 vdev->vq[i].shadow_avail_idx = 0;
3080 vdev->vq[i].inuse = 0;
3081 continue;
3082 }
3083 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3084 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3085
3086 /*
3087 * Some devices migrate VirtQueueElements that have been popped
3088 * from the avail ring but not yet returned to the used ring.
3089 * Since max ring size < UINT16_MAX it's safe to use modulo
3090 * UINT16_MAX + 1 subtraction.
3091 */
3092 vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3093 vdev->vq[i].used_idx);
3094 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3095 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3096 "used_idx 0x%x",
3097 i, vdev->vq[i].vring.num,
3098 vdev->vq[i].last_avail_idx,
3099 vdev->vq[i].used_idx);
3100 return -1;
3101 }
3102 }
3103 }
3104
3105 if (vdc->post_load) {
3106 ret = vdc->post_load(vdev);
3107 if (ret) {
3108 return ret;
3109 }
3110 }
3111
3112 return 0;
3113 }
3114
3115 void virtio_cleanup(VirtIODevice *vdev)
3116 {
3117 qemu_del_vm_change_state_handler(vdev->vmstate);
3118 }
3119
3120 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3121 {
3122 VirtIODevice *vdev = opaque;
3123 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3124 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3125 bool backend_run = running && virtio_device_started(vdev, vdev->status);
3126 vdev->vm_running = running;
3127
3128 if (backend_run) {
3129 virtio_set_status(vdev, vdev->status);
3130 }
3131
3132 if (k->vmstate_change) {
3133 k->vmstate_change(qbus->parent, backend_run);
3134 }
3135
3136 if (!backend_run) {
3137 virtio_set_status(vdev, vdev->status);
3138 }
3139 }
3140
3141 void virtio_instance_init_common(Object *proxy_obj, void *data,
3142 size_t vdev_size, const char *vdev_name)
3143 {
3144 DeviceState *vdev = data;
3145
3146 object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3147 vdev_size, vdev_name, &error_abort,
3148 NULL);
3149 qdev_alias_all_properties(vdev, proxy_obj);
3150 }
3151
3152 void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3153 {
3154 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3155 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3156 int i;
3157 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3158
3159 if (nvectors) {
3160 vdev->vector_queues =
3161 g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3162 }
3163
3164 vdev->start_on_kick = false;
3165 vdev->started = false;
3166 vdev->vhost_started = false;
3167 vdev->device_id = device_id;
3168 vdev->status = 0;
3169 qatomic_set(&vdev->isr, 0);
3170 vdev->queue_sel = 0;
3171 vdev->config_vector = VIRTIO_NO_VECTOR;
3172 vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3173 vdev->vm_running = runstate_is_running();
3174 vdev->broken = false;
3175 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3176 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3177 vdev->vq[i].vdev = vdev;
3178 vdev->vq[i].queue_index = i;
3179 vdev->vq[i].host_notifier_enabled = false;
3180 }
3181
3182 vdev->name = virtio_id_to_name(device_id);
3183 vdev->config_len = config_size;
3184 if (vdev->config_len) {
3185 vdev->config = g_malloc0(config_size);
3186 } else {
3187 vdev->config = NULL;
3188 }
3189 vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3190 virtio_vmstate_change, vdev);
3191 vdev->device_endian = virtio_default_endian();
3192 vdev->use_guest_notifier_mask = true;
3193 }
3194
3195 /*
3196 * Only devices that have already been around prior to defining the virtio
3197 * standard support legacy mode; this includes devices not specified in the
3198 * standard. All newer devices conform to the virtio standard only.
3199 */
3200 bool virtio_legacy_allowed(VirtIODevice *vdev)
3201 {
3202 switch (vdev->device_id) {
3203 case VIRTIO_ID_NET:
3204 case VIRTIO_ID_BLOCK:
3205 case VIRTIO_ID_CONSOLE:
3206 case VIRTIO_ID_RNG:
3207 case VIRTIO_ID_BALLOON:
3208 case VIRTIO_ID_RPMSG:
3209 case VIRTIO_ID_SCSI:
3210 case VIRTIO_ID_9P:
3211 case VIRTIO_ID_RPROC_SERIAL:
3212 case VIRTIO_ID_CAIF:
3213 return true;
3214 default:
3215 return false;
3216 }
3217 }
3218
3219 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3220 {
3221 return vdev->disable_legacy_check;
3222 }
3223
3224 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3225 {
3226 return vdev->vq[n].vring.desc;
3227 }
3228
3229 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3230 {
3231 return virtio_queue_get_desc_addr(vdev, n) != 0;
3232 }
3233
3234 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3235 {
3236 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3237 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3238
3239 if (k->queue_enabled) {
3240 return k->queue_enabled(qbus->parent, n);
3241 }
3242 return virtio_queue_enabled_legacy(vdev, n);
3243 }
3244
3245 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3246 {
3247 return vdev->vq[n].vring.avail;
3248 }
3249
3250 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3251 {
3252 return vdev->vq[n].vring.used;
3253 }
3254
3255 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3256 {
3257 return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3258 }
3259
3260 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3261 {
3262 int s;
3263
3264 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3265 return sizeof(struct VRingPackedDescEvent);
3266 }
3267
3268 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3269 return offsetof(VRingAvail, ring) +
3270 sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3271 }
3272
3273 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3274 {
3275 int s;
3276
3277 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3278 return sizeof(struct VRingPackedDescEvent);
3279 }
3280
3281 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3282 return offsetof(VRingUsed, ring) +
3283 sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3284 }
3285
3286 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3287 int n)
3288 {
3289 unsigned int avail, used;
3290
3291 avail = vdev->vq[n].last_avail_idx;
3292 avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3293
3294 used = vdev->vq[n].used_idx;
3295 used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3296
3297 return avail | used << 16;
3298 }
3299
3300 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3301 int n)
3302 {
3303 return vdev->vq[n].last_avail_idx;
3304 }
3305
3306 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3307 {
3308 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3309 return virtio_queue_packed_get_last_avail_idx(vdev, n);
3310 } else {
3311 return virtio_queue_split_get_last_avail_idx(vdev, n);
3312 }
3313 }
3314
3315 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3316 int n, unsigned int idx)
3317 {
3318 struct VirtQueue *vq = &vdev->vq[n];
3319
3320 vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3321 vq->last_avail_wrap_counter =
3322 vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3323 idx >>= 16;
3324 vq->used_idx = idx & 0x7ffff;
3325 vq->used_wrap_counter = !!(idx & 0x8000);
3326 }
3327
3328 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3329 int n, unsigned int idx)
3330 {
3331 vdev->vq[n].last_avail_idx = idx;
3332 vdev->vq[n].shadow_avail_idx = idx;
3333 }
3334
3335 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3336 unsigned int idx)
3337 {
3338 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3339 virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3340 } else {
3341 virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3342 }
3343 }
3344
3345 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3346 int n)
3347 {
3348 /* We don't have a reference like avail idx in shared memory */
3349 return;
3350 }
3351
3352 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3353 int n)
3354 {
3355 RCU_READ_LOCK_GUARD();
3356 if (vdev->vq[n].vring.desc) {
3357 vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3358 vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3359 }
3360 }
3361
3362 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3363 {
3364 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3365 virtio_queue_packed_restore_last_avail_idx(vdev, n);
3366 } else {
3367 virtio_queue_split_restore_last_avail_idx(vdev, n);
3368 }
3369 }
3370
3371 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3372 {
3373 /* used idx was updated through set_last_avail_idx() */
3374 return;
3375 }
3376
3377 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3378 {
3379 RCU_READ_LOCK_GUARD();
3380 if (vdev->vq[n].vring.desc) {
3381 vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3382 }
3383 }
3384
3385 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3386 {
3387 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3388 return virtio_queue_packed_update_used_idx(vdev, n);
3389 } else {
3390 return virtio_split_packed_update_used_idx(vdev, n);
3391 }
3392 }
3393
3394 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3395 {
3396 vdev->vq[n].signalled_used_valid = false;
3397 }
3398
3399 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3400 {
3401 return vdev->vq + n;
3402 }
3403
3404 uint16_t virtio_get_queue_index(VirtQueue *vq)
3405 {
3406 return vq->queue_index;
3407 }
3408
3409 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3410 {
3411 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3412 if (event_notifier_test_and_clear(n)) {
3413 virtio_irq(vq);
3414 }
3415 }
3416 static void virtio_config_guest_notifier_read(EventNotifier *n)
3417 {
3418 VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
3419
3420 if (event_notifier_test_and_clear(n)) {
3421 virtio_notify_config(vdev);
3422 }
3423 }
3424 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3425 bool with_irqfd)
3426 {
3427 if (assign && !with_irqfd) {
3428 event_notifier_set_handler(&vq->guest_notifier,
3429 virtio_queue_guest_notifier_read);
3430 } else {
3431 event_notifier_set_handler(&vq->guest_notifier, NULL);
3432 }
3433 if (!assign) {
3434 /* Test and clear notifier before closing it,
3435 * in case poll callback didn't have time to run. */
3436 virtio_queue_guest_notifier_read(&vq->guest_notifier);
3437 }
3438 }
3439
3440 void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
3441 bool assign, bool with_irqfd)
3442 {
3443 EventNotifier *n;
3444 n = &vdev->config_notifier;
3445 if (assign && !with_irqfd) {
3446 event_notifier_set_handler(n, virtio_config_guest_notifier_read);
3447 } else {
3448 event_notifier_set_handler(n, NULL);
3449 }
3450 if (!assign) {
3451 /* Test and clear notifier before closing it,*/
3452 /* in case poll callback didn't have time to run. */
3453 virtio_config_guest_notifier_read(n);
3454 }
3455 }
3456
3457 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3458 {
3459 return &vq->guest_notifier;
3460 }
3461
3462 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3463 {
3464 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3465
3466 virtio_queue_set_notification(vq, 0);
3467 }
3468
3469 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3470 {
3471 EventNotifier *n = opaque;
3472 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3473
3474 return vq->vring.desc && !virtio_queue_empty(vq);
3475 }
3476
3477 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3478 {
3479 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3480
3481 virtio_queue_notify_vq(vq);
3482 }
3483
3484 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3485 {
3486 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3487
3488 /* Caller polls once more after this to catch requests that race with us */
3489 virtio_queue_set_notification(vq, 1);
3490 }
3491
3492 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
3493 {
3494 aio_set_event_notifier(ctx, &vq->host_notifier, false,
3495 virtio_queue_host_notifier_read,
3496 virtio_queue_host_notifier_aio_poll,
3497 virtio_queue_host_notifier_aio_poll_ready);
3498 aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3499 virtio_queue_host_notifier_aio_poll_begin,
3500 virtio_queue_host_notifier_aio_poll_end);
3501 }
3502
3503 /*
3504 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3505 * this for rx virtqueues and similar cases where the virtqueue handler
3506 * function does not pop all elements. When the virtqueue is left non-empty
3507 * polling consumes CPU cycles and should not be used.
3508 */
3509 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
3510 {
3511 aio_set_event_notifier(ctx, &vq->host_notifier, false,
3512 virtio_queue_host_notifier_read,
3513 NULL, NULL);
3514 }
3515
3516 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
3517 {
3518 aio_set_event_notifier(ctx, &vq->host_notifier, false, NULL, NULL, NULL);
3519 }
3520
3521 void virtio_queue_host_notifier_read(EventNotifier *n)
3522 {
3523 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3524 if (event_notifier_test_and_clear(n)) {
3525 virtio_queue_notify_vq(vq);
3526 }
3527 }
3528
3529 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3530 {
3531 return &vq->host_notifier;
3532 }
3533
3534 EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
3535 {
3536 return &vdev->config_notifier;
3537 }
3538
3539 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3540 {
3541 vq->host_notifier_enabled = enabled;
3542 }
3543
3544 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3545 MemoryRegion *mr, bool assign)
3546 {
3547 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3548 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3549
3550 if (k->set_host_notifier_mr) {
3551 return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3552 }
3553
3554 return -1;
3555 }
3556
3557 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3558 {
3559 g_free(vdev->bus_name);
3560 vdev->bus_name = g_strdup(bus_name);
3561 }
3562
3563 void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3564 {
3565 va_list ap;
3566
3567 va_start(ap, fmt);
3568 error_vreport(fmt, ap);
3569 va_end(ap);
3570
3571 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3572 vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3573 virtio_notify_config(vdev);
3574 }
3575
3576 vdev->broken = true;
3577 }
3578
3579 static void virtio_memory_listener_commit(MemoryListener *listener)
3580 {
3581 VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3582 int i;
3583
3584 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3585 if (vdev->vq[i].vring.num == 0) {
3586 break;
3587 }
3588 virtio_init_region_cache(vdev, i);
3589 }
3590 }
3591
3592 static void virtio_device_realize(DeviceState *dev, Error **errp)
3593 {
3594 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3595 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3596 Error *err = NULL;
3597
3598 /* Devices should either use vmsd or the load/save methods */
3599 assert(!vdc->vmsd || !vdc->load);
3600
3601 if (vdc->realize != NULL) {
3602 vdc->realize(dev, &err);
3603 if (err != NULL) {
3604 error_propagate(errp, err);
3605 return;
3606 }
3607 }
3608
3609 virtio_bus_device_plugged(vdev, &err);
3610 if (err != NULL) {
3611 error_propagate(errp, err);
3612 vdc->unrealize(dev);
3613 return;
3614 }
3615
3616 vdev->listener.commit = virtio_memory_listener_commit;
3617 vdev->listener.name = "virtio";
3618 memory_listener_register(&vdev->listener, vdev->dma_as);
3619 QTAILQ_INSERT_TAIL(&virtio_list, vdev, next);
3620 }
3621
3622 static void virtio_device_unrealize(DeviceState *dev)
3623 {
3624 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3625 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3626
3627 memory_listener_unregister(&vdev->listener);
3628 virtio_bus_device_unplugged(vdev);
3629
3630 if (vdc->unrealize != NULL) {
3631 vdc->unrealize(dev);
3632 }
3633
3634 QTAILQ_REMOVE(&virtio_list, vdev, next);
3635 g_free(vdev->bus_name);
3636 vdev->bus_name = NULL;
3637 }
3638
3639 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3640 {
3641 int i;
3642 if (!vdev->vq) {
3643 return;
3644 }
3645
3646 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3647 if (vdev->vq[i].vring.num == 0) {
3648 break;
3649 }
3650 virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3651 }
3652 g_free(vdev->vq);
3653 }
3654
3655 static void virtio_device_instance_finalize(Object *obj)
3656 {
3657 VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3658
3659 virtio_device_free_virtqueues(vdev);
3660
3661 g_free(vdev->config);
3662 g_free(vdev->vector_queues);
3663 }
3664
3665 static Property virtio_properties[] = {
3666 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3667 DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3668 DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3669 DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3670 disable_legacy_check, false),
3671 DEFINE_PROP_END_OF_LIST(),
3672 };
3673
3674 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3675 {
3676 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3677 int i, n, r, err;
3678
3679 /*
3680 * Batch all the host notifiers in a single transaction to avoid
3681 * quadratic time complexity in address_space_update_ioeventfds().
3682 */
3683 memory_region_transaction_begin();
3684 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3685 VirtQueue *vq = &vdev->vq[n];
3686 if (!virtio_queue_get_num(vdev, n)) {
3687 continue;
3688 }
3689 r = virtio_bus_set_host_notifier(qbus, n, true);
3690 if (r < 0) {
3691 err = r;
3692 goto assign_error;
3693 }
3694 event_notifier_set_handler(&vq->host_notifier,
3695 virtio_queue_host_notifier_read);
3696 }
3697
3698 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3699 /* Kick right away to begin processing requests already in vring */
3700 VirtQueue *vq = &vdev->vq[n];
3701 if (!vq->vring.num) {
3702 continue;
3703 }
3704 event_notifier_set(&vq->host_notifier);
3705 }
3706 memory_region_transaction_commit();
3707 return 0;
3708
3709 assign_error:
3710 i = n; /* save n for a second iteration after transaction is committed. */
3711 while (--n >= 0) {
3712 VirtQueue *vq = &vdev->vq[n];
3713 if (!virtio_queue_get_num(vdev, n)) {
3714 continue;
3715 }
3716
3717 event_notifier_set_handler(&vq->host_notifier, NULL);
3718 r = virtio_bus_set_host_notifier(qbus, n, false);
3719 assert(r >= 0);
3720 }
3721 /*
3722 * The transaction expects the ioeventfds to be open when it
3723 * commits. Do it now, before the cleanup loop.
3724 */
3725 memory_region_transaction_commit();
3726
3727 while (--i >= 0) {
3728 if (!virtio_queue_get_num(vdev, i)) {
3729 continue;
3730 }
3731 virtio_bus_cleanup_host_notifier(qbus, i);
3732 }
3733 return err;
3734 }
3735
3736 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3737 {
3738 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3739 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3740
3741 return virtio_bus_start_ioeventfd(vbus);
3742 }
3743
3744 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3745 {
3746 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3747 int n, r;
3748
3749 /*
3750 * Batch all the host notifiers in a single transaction to avoid
3751 * quadratic time complexity in address_space_update_ioeventfds().
3752 */
3753 memory_region_transaction_begin();
3754 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3755 VirtQueue *vq = &vdev->vq[n];
3756
3757 if (!virtio_queue_get_num(vdev, n)) {
3758 continue;
3759 }
3760 event_notifier_set_handler(&vq->host_notifier, NULL);
3761 r = virtio_bus_set_host_notifier(qbus, n, false);
3762 assert(r >= 0);
3763 }
3764 /*
3765 * The transaction expects the ioeventfds to be open when it
3766 * commits. Do it now, before the cleanup loop.
3767 */
3768 memory_region_transaction_commit();
3769
3770 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3771 if (!virtio_queue_get_num(vdev, n)) {
3772 continue;
3773 }
3774 virtio_bus_cleanup_host_notifier(qbus, n);
3775 }
3776 }
3777
3778 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3779 {
3780 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3781 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3782
3783 return virtio_bus_grab_ioeventfd(vbus);
3784 }
3785
3786 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3787 {
3788 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3789 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3790
3791 virtio_bus_release_ioeventfd(vbus);
3792 }
3793
3794 static void virtio_device_class_init(ObjectClass *klass, void *data)
3795 {
3796 /* Set the default value here. */
3797 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3798 DeviceClass *dc = DEVICE_CLASS(klass);
3799
3800 dc->realize = virtio_device_realize;
3801 dc->unrealize = virtio_device_unrealize;
3802 dc->bus_type = TYPE_VIRTIO_BUS;
3803 device_class_set_props(dc, virtio_properties);
3804 vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3805 vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3806
3807 vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3808
3809 QTAILQ_INIT(&virtio_list);
3810 }
3811
3812 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3813 {
3814 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3815 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3816
3817 return virtio_bus_ioeventfd_enabled(vbus);
3818 }
3819
3820 VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
3821 uint16_t queue,
3822 Error **errp)
3823 {
3824 VirtIODevice *vdev;
3825 VirtQueueStatus *status;
3826
3827 vdev = qmp_find_virtio_device(path);
3828 if (vdev == NULL) {
3829 error_setg(errp, "Path %s is not a VirtIODevice", path);
3830 return NULL;
3831 }
3832
3833 if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
3834 error_setg(errp, "Invalid virtqueue number %d", queue);
3835 return NULL;
3836 }
3837
3838 status = g_new0(VirtQueueStatus, 1);
3839 status->name = g_strdup(vdev->name);
3840 status->queue_index = vdev->vq[queue].queue_index;
3841 status->inuse = vdev->vq[queue].inuse;
3842 status->vring_num = vdev->vq[queue].vring.num;
3843 status->vring_num_default = vdev->vq[queue].vring.num_default;
3844 status->vring_align = vdev->vq[queue].vring.align;
3845 status->vring_desc = vdev->vq[queue].vring.desc;
3846 status->vring_avail = vdev->vq[queue].vring.avail;
3847 status->vring_used = vdev->vq[queue].vring.used;
3848 status->used_idx = vdev->vq[queue].used_idx;
3849 status->signalled_used = vdev->vq[queue].signalled_used;
3850 status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
3851
3852 if (vdev->vhost_started) {
3853 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3854 struct vhost_dev *hdev = vdc->get_vhost(vdev);
3855
3856 /* check if vq index exists for vhost as well */
3857 if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
3858 status->has_last_avail_idx = true;
3859
3860 int vhost_vq_index =
3861 hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
3862 struct vhost_vring_state state = {
3863 .index = vhost_vq_index,
3864 };
3865
3866 status->last_avail_idx =
3867 hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
3868 }
3869 } else {
3870 status->has_shadow_avail_idx = true;
3871 status->has_last_avail_idx = true;
3872 status->last_avail_idx = vdev->vq[queue].last_avail_idx;
3873 status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
3874 }
3875
3876 return status;
3877 }
3878
3879 static strList *qmp_decode_vring_desc_flags(uint16_t flags)
3880 {
3881 strList *list = NULL;
3882 strList *node;
3883 int i;
3884
3885 struct {
3886 uint16_t flag;
3887 const char *value;
3888 } map[] = {
3889 { VRING_DESC_F_NEXT, "next" },
3890 { VRING_DESC_F_WRITE, "write" },
3891 { VRING_DESC_F_INDIRECT, "indirect" },
3892 { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
3893 { 1 << VRING_PACKED_DESC_F_USED, "used" },
3894 { 0, "" }
3895 };
3896
3897 for (i = 0; map[i].flag; i++) {
3898 if ((map[i].flag & flags) == 0) {
3899 continue;
3900 }
3901 node = g_malloc0(sizeof(strList));
3902 node->value = g_strdup(map[i].value);
3903 node->next = list;
3904 list = node;
3905 }
3906
3907 return list;
3908 }
3909
3910 VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
3911 uint16_t queue,
3912 bool has_index,
3913 uint16_t index,
3914 Error **errp)
3915 {
3916 VirtIODevice *vdev;
3917 VirtQueue *vq;
3918 VirtioQueueElement *element = NULL;
3919
3920 vdev = qmp_find_virtio_device(path);
3921 if (vdev == NULL) {
3922 error_setg(errp, "Path %s is not a VirtIO device", path);
3923 return NULL;
3924 }
3925
3926 if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
3927 error_setg(errp, "Invalid virtqueue number %d", queue);
3928 return NULL;
3929 }
3930 vq = &vdev->vq[queue];
3931
3932 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3933 error_setg(errp, "Packed ring not supported");
3934 return NULL;
3935 } else {
3936 unsigned int head, i, max;
3937 VRingMemoryRegionCaches *caches;
3938 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
3939 MemoryRegionCache *desc_cache;
3940 VRingDesc desc;
3941 VirtioRingDescList *list = NULL;
3942 VirtioRingDescList *node;
3943 int rc; int ndescs;
3944
3945 RCU_READ_LOCK_GUARD();
3946
3947 max = vq->vring.num;
3948
3949 if (!has_index) {
3950 head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
3951 } else {
3952 head = vring_avail_ring(vq, index % vq->vring.num);
3953 }
3954 i = head;
3955
3956 caches = vring_get_region_caches(vq);
3957 if (!caches) {
3958 error_setg(errp, "Region caches not initialized");
3959 return NULL;
3960 }
3961 if (caches->desc.len < max * sizeof(VRingDesc)) {
3962 error_setg(errp, "Cannot map descriptor ring");
3963 return NULL;
3964 }
3965
3966 desc_cache = &caches->desc;
3967 vring_split_desc_read(vdev, &desc, desc_cache, i);
3968 if (desc.flags & VRING_DESC_F_INDIRECT) {
3969 int64_t len;
3970 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
3971 desc.addr, desc.len, false);
3972 desc_cache = &indirect_desc_cache;
3973 if (len < desc.len) {
3974 error_setg(errp, "Cannot map indirect buffer");
3975 goto done;
3976 }
3977
3978 max = desc.len / sizeof(VRingDesc);
3979 i = 0;
3980 vring_split_desc_read(vdev, &desc, desc_cache, i);
3981 }
3982
3983 element = g_new0(VirtioQueueElement, 1);
3984 element->avail = g_new0(VirtioRingAvail, 1);
3985 element->used = g_new0(VirtioRingUsed, 1);
3986 element->name = g_strdup(vdev->name);
3987 element->index = head;
3988 element->avail->flags = vring_avail_flags(vq);
3989 element->avail->idx = vring_avail_idx(vq);
3990 element->avail->ring = head;
3991 element->used->flags = vring_used_flags(vq);
3992 element->used->idx = vring_used_idx(vq);
3993 ndescs = 0;
3994
3995 do {
3996 /* A buggy driver may produce an infinite loop */
3997 if (ndescs >= max) {
3998 break;
3999 }
4000 node = g_new0(VirtioRingDescList, 1);
4001 node->value = g_new0(VirtioRingDesc, 1);
4002 node->value->addr = desc.addr;
4003 node->value->len = desc.len;
4004 node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4005 node->next = list;
4006 list = node;
4007
4008 ndescs++;
4009 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache,
4010 max, &i);
4011 } while (rc == VIRTQUEUE_READ_DESC_MORE);
4012 element->descs = list;
4013 done:
4014 address_space_cache_destroy(&indirect_desc_cache);
4015 }
4016
4017 return element;
4018 }
4019
4020 static const TypeInfo virtio_device_info = {
4021 .name = TYPE_VIRTIO_DEVICE,
4022 .parent = TYPE_DEVICE,
4023 .instance_size = sizeof(VirtIODevice),
4024 .class_init = virtio_device_class_init,
4025 .instance_finalize = virtio_device_instance_finalize,
4026 .abstract = true,
4027 .class_size = sizeof(VirtioDeviceClass),
4028 };
4029
4030 static void virtio_register_types(void)
4031 {
4032 type_register_static(&virtio_device_info);
4033 }
4034
4035 type_init(virtio_register_types)