]> git.ipfire.org Git - thirdparty/qemu.git/blob - hw/virtio/virtio.c
hw/virtio: Rename virtio_device_find() -> qmp_find_virtio_device()
[thirdparty/qemu.git] / hw / virtio / virtio.c
1 /*
2 * Virtio Support
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/qapi-commands-virtio.h"
18 #include "qapi/qapi-commands-qom.h"
19 #include "qapi/qmp/qjson.h"
20 #include "trace.h"
21 #include "qemu/error-report.h"
22 #include "qemu/log.h"
23 #include "qemu/main-loop.h"
24 #include "qemu/module.h"
25 #include "qom/object_interfaces.h"
26 #include "hw/core/cpu.h"
27 #include "hw/virtio/virtio.h"
28 #include "migration/qemu-file-types.h"
29 #include "qemu/atomic.h"
30 #include "hw/virtio/virtio-bus.h"
31 #include "hw/qdev-properties.h"
32 #include "hw/virtio/virtio-access.h"
33 #include "sysemu/dma.h"
34 #include "sysemu/runstate.h"
35 #include "virtio-qmp.h"
36
37 #include "standard-headers/linux/virtio_ids.h"
38 #include "standard-headers/linux/vhost_types.h"
39 #include "standard-headers/linux/virtio_blk.h"
40 #include "standard-headers/linux/virtio_console.h"
41 #include "standard-headers/linux/virtio_gpu.h"
42 #include "standard-headers/linux/virtio_net.h"
43 #include "standard-headers/linux/virtio_scsi.h"
44 #include "standard-headers/linux/virtio_i2c.h"
45 #include "standard-headers/linux/virtio_balloon.h"
46 #include "standard-headers/linux/virtio_iommu.h"
47 #include "standard-headers/linux/virtio_mem.h"
48 #include "standard-headers/linux/virtio_vsock.h"
49
50 /* QAPI list of realized VirtIODevices */
51 static QTAILQ_HEAD(, VirtIODevice) virtio_list;
52
53 /*
54 * Maximum size of virtio device config space
55 */
56 #define VHOST_USER_MAX_CONFIG_SIZE 256
57
58 /*
59 * The alignment to use between consumer and producer parts of vring.
60 * x86 pagesize again. This is the default, used by transports like PCI
61 * which don't provide a means for the guest to tell the host the alignment.
62 */
63 #define VIRTIO_PCI_VRING_ALIGN 4096
64
65 typedef struct VRingDesc
66 {
67 uint64_t addr;
68 uint32_t len;
69 uint16_t flags;
70 uint16_t next;
71 } VRingDesc;
72
73 typedef struct VRingPackedDesc {
74 uint64_t addr;
75 uint32_t len;
76 uint16_t id;
77 uint16_t flags;
78 } VRingPackedDesc;
79
80 typedef struct VRingAvail
81 {
82 uint16_t flags;
83 uint16_t idx;
84 uint16_t ring[];
85 } VRingAvail;
86
87 typedef struct VRingUsedElem
88 {
89 uint32_t id;
90 uint32_t len;
91 } VRingUsedElem;
92
93 typedef struct VRingUsed
94 {
95 uint16_t flags;
96 uint16_t idx;
97 VRingUsedElem ring[];
98 } VRingUsed;
99
100 typedef struct VRingMemoryRegionCaches {
101 struct rcu_head rcu;
102 MemoryRegionCache desc;
103 MemoryRegionCache avail;
104 MemoryRegionCache used;
105 } VRingMemoryRegionCaches;
106
107 typedef struct VRing
108 {
109 unsigned int num;
110 unsigned int num_default;
111 unsigned int align;
112 hwaddr desc;
113 hwaddr avail;
114 hwaddr used;
115 VRingMemoryRegionCaches *caches;
116 } VRing;
117
118 typedef struct VRingPackedDescEvent {
119 uint16_t off_wrap;
120 uint16_t flags;
121 } VRingPackedDescEvent ;
122
123 struct VirtQueue
124 {
125 VRing vring;
126 VirtQueueElement *used_elems;
127
128 /* Next head to pop */
129 uint16_t last_avail_idx;
130 bool last_avail_wrap_counter;
131
132 /* Last avail_idx read from VQ. */
133 uint16_t shadow_avail_idx;
134 bool shadow_avail_wrap_counter;
135
136 uint16_t used_idx;
137 bool used_wrap_counter;
138
139 /* Last used index value we have signalled on */
140 uint16_t signalled_used;
141
142 /* Last used index value we have signalled on */
143 bool signalled_used_valid;
144
145 /* Notification enabled? */
146 bool notification;
147
148 uint16_t queue_index;
149
150 unsigned int inuse;
151
152 uint16_t vector;
153 VirtIOHandleOutput handle_output;
154 VirtIODevice *vdev;
155 EventNotifier guest_notifier;
156 EventNotifier host_notifier;
157 bool host_notifier_enabled;
158 QLIST_ENTRY(VirtQueue) node;
159 };
160
161 const char *virtio_device_names[] = {
162 [VIRTIO_ID_NET] = "virtio-net",
163 [VIRTIO_ID_BLOCK] = "virtio-blk",
164 [VIRTIO_ID_CONSOLE] = "virtio-serial",
165 [VIRTIO_ID_RNG] = "virtio-rng",
166 [VIRTIO_ID_BALLOON] = "virtio-balloon",
167 [VIRTIO_ID_IOMEM] = "virtio-iomem",
168 [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
169 [VIRTIO_ID_SCSI] = "virtio-scsi",
170 [VIRTIO_ID_9P] = "virtio-9p",
171 [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
172 [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
173 [VIRTIO_ID_CAIF] = "virtio-caif",
174 [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
175 [VIRTIO_ID_GPU] = "virtio-gpu",
176 [VIRTIO_ID_CLOCK] = "virtio-clk",
177 [VIRTIO_ID_INPUT] = "virtio-input",
178 [VIRTIO_ID_VSOCK] = "vhost-vsock",
179 [VIRTIO_ID_CRYPTO] = "virtio-crypto",
180 [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
181 [VIRTIO_ID_PSTORE] = "virtio-pstore",
182 [VIRTIO_ID_IOMMU] = "virtio-iommu",
183 [VIRTIO_ID_MEM] = "virtio-mem",
184 [VIRTIO_ID_SOUND] = "virtio-sound",
185 [VIRTIO_ID_FS] = "virtio-user-fs",
186 [VIRTIO_ID_PMEM] = "virtio-pmem",
187 [VIRTIO_ID_RPMB] = "virtio-rpmb",
188 [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
189 [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
190 [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
191 [VIRTIO_ID_SCMI] = "virtio-scmi",
192 [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
193 [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
194 [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
195 [VIRTIO_ID_CAN] = "virtio-can",
196 [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
197 [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
198 [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
199 [VIRTIO_ID_BT] = "virtio-bluetooth",
200 [VIRTIO_ID_GPIO] = "virtio-gpio"
201 };
202
203 static const char *virtio_id_to_name(uint16_t device_id)
204 {
205 assert(device_id < G_N_ELEMENTS(virtio_device_names));
206 const char *name = virtio_device_names[device_id];
207 assert(name != NULL);
208 return name;
209 }
210
211 /* Called within call_rcu(). */
212 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
213 {
214 assert(caches != NULL);
215 address_space_cache_destroy(&caches->desc);
216 address_space_cache_destroy(&caches->avail);
217 address_space_cache_destroy(&caches->used);
218 g_free(caches);
219 }
220
221 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
222 {
223 VRingMemoryRegionCaches *caches;
224
225 caches = qatomic_read(&vq->vring.caches);
226 qatomic_rcu_set(&vq->vring.caches, NULL);
227 if (caches) {
228 call_rcu(caches, virtio_free_region_cache, rcu);
229 }
230 }
231
232 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
233 {
234 VirtQueue *vq = &vdev->vq[n];
235 VRingMemoryRegionCaches *old = vq->vring.caches;
236 VRingMemoryRegionCaches *new = NULL;
237 hwaddr addr, size;
238 int64_t len;
239 bool packed;
240
241
242 addr = vq->vring.desc;
243 if (!addr) {
244 goto out_no_cache;
245 }
246 new = g_new0(VRingMemoryRegionCaches, 1);
247 size = virtio_queue_get_desc_size(vdev, n);
248 packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
249 true : false;
250 len = address_space_cache_init(&new->desc, vdev->dma_as,
251 addr, size, packed);
252 if (len < size) {
253 virtio_error(vdev, "Cannot map desc");
254 goto err_desc;
255 }
256
257 size = virtio_queue_get_used_size(vdev, n);
258 len = address_space_cache_init(&new->used, vdev->dma_as,
259 vq->vring.used, size, true);
260 if (len < size) {
261 virtio_error(vdev, "Cannot map used");
262 goto err_used;
263 }
264
265 size = virtio_queue_get_avail_size(vdev, n);
266 len = address_space_cache_init(&new->avail, vdev->dma_as,
267 vq->vring.avail, size, false);
268 if (len < size) {
269 virtio_error(vdev, "Cannot map avail");
270 goto err_avail;
271 }
272
273 qatomic_rcu_set(&vq->vring.caches, new);
274 if (old) {
275 call_rcu(old, virtio_free_region_cache, rcu);
276 }
277 return;
278
279 err_avail:
280 address_space_cache_destroy(&new->avail);
281 err_used:
282 address_space_cache_destroy(&new->used);
283 err_desc:
284 address_space_cache_destroy(&new->desc);
285 out_no_cache:
286 g_free(new);
287 virtio_virtqueue_reset_region_cache(vq);
288 }
289
290 /* virt queue functions */
291 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
292 {
293 VRing *vring = &vdev->vq[n].vring;
294
295 if (!vring->num || !vring->desc || !vring->align) {
296 /* not yet setup -> nothing to do */
297 return;
298 }
299 vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
300 vring->used = vring_align(vring->avail +
301 offsetof(VRingAvail, ring[vring->num]),
302 vring->align);
303 virtio_init_region_cache(vdev, n);
304 }
305
306 /* Called within rcu_read_lock(). */
307 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
308 MemoryRegionCache *cache, int i)
309 {
310 address_space_read_cached(cache, i * sizeof(VRingDesc),
311 desc, sizeof(VRingDesc));
312 virtio_tswap64s(vdev, &desc->addr);
313 virtio_tswap32s(vdev, &desc->len);
314 virtio_tswap16s(vdev, &desc->flags);
315 virtio_tswap16s(vdev, &desc->next);
316 }
317
318 static void vring_packed_event_read(VirtIODevice *vdev,
319 MemoryRegionCache *cache,
320 VRingPackedDescEvent *e)
321 {
322 hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
323 hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
324
325 e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
326 /* Make sure flags is seen before off_wrap */
327 smp_rmb();
328 e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
329 virtio_tswap16s(vdev, &e->flags);
330 }
331
332 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
333 MemoryRegionCache *cache,
334 uint16_t off_wrap)
335 {
336 hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
337
338 virtio_stw_phys_cached(vdev, cache, off, off_wrap);
339 address_space_cache_invalidate(cache, off, sizeof(off_wrap));
340 }
341
342 static void vring_packed_flags_write(VirtIODevice *vdev,
343 MemoryRegionCache *cache, uint16_t flags)
344 {
345 hwaddr off = offsetof(VRingPackedDescEvent, flags);
346
347 virtio_stw_phys_cached(vdev, cache, off, flags);
348 address_space_cache_invalidate(cache, off, sizeof(flags));
349 }
350
351 /* Called within rcu_read_lock(). */
352 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
353 {
354 return qatomic_rcu_read(&vq->vring.caches);
355 }
356
357 /* Called within rcu_read_lock(). */
358 static inline uint16_t vring_avail_flags(VirtQueue *vq)
359 {
360 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
361 hwaddr pa = offsetof(VRingAvail, flags);
362
363 if (!caches) {
364 return 0;
365 }
366
367 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
368 }
369
370 /* Called within rcu_read_lock(). */
371 static inline uint16_t vring_avail_idx(VirtQueue *vq)
372 {
373 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
374 hwaddr pa = offsetof(VRingAvail, idx);
375
376 if (!caches) {
377 return 0;
378 }
379
380 vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
381 return vq->shadow_avail_idx;
382 }
383
384 /* Called within rcu_read_lock(). */
385 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
386 {
387 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
388 hwaddr pa = offsetof(VRingAvail, ring[i]);
389
390 if (!caches) {
391 return 0;
392 }
393
394 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
395 }
396
397 /* Called within rcu_read_lock(). */
398 static inline uint16_t vring_get_used_event(VirtQueue *vq)
399 {
400 return vring_avail_ring(vq, vq->vring.num);
401 }
402
403 /* Called within rcu_read_lock(). */
404 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
405 int i)
406 {
407 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
408 hwaddr pa = offsetof(VRingUsed, ring[i]);
409
410 if (!caches) {
411 return;
412 }
413
414 virtio_tswap32s(vq->vdev, &uelem->id);
415 virtio_tswap32s(vq->vdev, &uelem->len);
416 address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
417 address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
418 }
419
420 /* Called within rcu_read_lock(). */
421 static inline uint16_t vring_used_flags(VirtQueue *vq)
422 {
423 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
424 hwaddr pa = offsetof(VRingUsed, flags);
425
426 if (!caches) {
427 return 0;
428 }
429
430 return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
431 }
432
433 /* Called within rcu_read_lock(). */
434 static uint16_t vring_used_idx(VirtQueue *vq)
435 {
436 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
437 hwaddr pa = offsetof(VRingUsed, idx);
438
439 if (!caches) {
440 return 0;
441 }
442
443 return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
444 }
445
446 /* Called within rcu_read_lock(). */
447 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
448 {
449 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
450 hwaddr pa = offsetof(VRingUsed, idx);
451
452 if (caches) {
453 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
454 address_space_cache_invalidate(&caches->used, pa, sizeof(val));
455 }
456
457 vq->used_idx = val;
458 }
459
460 /* Called within rcu_read_lock(). */
461 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
462 {
463 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
464 VirtIODevice *vdev = vq->vdev;
465 hwaddr pa = offsetof(VRingUsed, flags);
466 uint16_t flags;
467
468 if (!caches) {
469 return;
470 }
471
472 flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
473 virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
474 address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
475 }
476
477 /* Called within rcu_read_lock(). */
478 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
479 {
480 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
481 VirtIODevice *vdev = vq->vdev;
482 hwaddr pa = offsetof(VRingUsed, flags);
483 uint16_t flags;
484
485 if (!caches) {
486 return;
487 }
488
489 flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
490 virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
491 address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
492 }
493
494 /* Called within rcu_read_lock(). */
495 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
496 {
497 VRingMemoryRegionCaches *caches;
498 hwaddr pa;
499 if (!vq->notification) {
500 return;
501 }
502
503 caches = vring_get_region_caches(vq);
504 if (!caches) {
505 return;
506 }
507
508 pa = offsetof(VRingUsed, ring[vq->vring.num]);
509 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
510 address_space_cache_invalidate(&caches->used, pa, sizeof(val));
511 }
512
513 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
514 {
515 RCU_READ_LOCK_GUARD();
516
517 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
518 vring_set_avail_event(vq, vring_avail_idx(vq));
519 } else if (enable) {
520 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
521 } else {
522 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
523 }
524 if (enable) {
525 /* Expose avail event/used flags before caller checks the avail idx. */
526 smp_mb();
527 }
528 }
529
530 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
531 {
532 uint16_t off_wrap;
533 VRingPackedDescEvent e;
534 VRingMemoryRegionCaches *caches;
535
536 RCU_READ_LOCK_GUARD();
537 caches = vring_get_region_caches(vq);
538 if (!caches) {
539 return;
540 }
541
542 vring_packed_event_read(vq->vdev, &caches->used, &e);
543
544 if (!enable) {
545 e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
546 } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
547 off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
548 vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
549 /* Make sure off_wrap is wrote before flags */
550 smp_wmb();
551 e.flags = VRING_PACKED_EVENT_FLAG_DESC;
552 } else {
553 e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
554 }
555
556 vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
557 if (enable) {
558 /* Expose avail event/used flags before caller checks the avail idx. */
559 smp_mb();
560 }
561 }
562
563 bool virtio_queue_get_notification(VirtQueue *vq)
564 {
565 return vq->notification;
566 }
567
568 void virtio_queue_set_notification(VirtQueue *vq, int enable)
569 {
570 vq->notification = enable;
571
572 if (!vq->vring.desc) {
573 return;
574 }
575
576 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
577 virtio_queue_packed_set_notification(vq, enable);
578 } else {
579 virtio_queue_split_set_notification(vq, enable);
580 }
581 }
582
583 int virtio_queue_ready(VirtQueue *vq)
584 {
585 return vq->vring.avail != 0;
586 }
587
588 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
589 uint16_t *flags,
590 MemoryRegionCache *cache,
591 int i)
592 {
593 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
594
595 *flags = virtio_lduw_phys_cached(vdev, cache, off);
596 }
597
598 static void vring_packed_desc_read(VirtIODevice *vdev,
599 VRingPackedDesc *desc,
600 MemoryRegionCache *cache,
601 int i, bool strict_order)
602 {
603 hwaddr off = i * sizeof(VRingPackedDesc);
604
605 vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
606
607 if (strict_order) {
608 /* Make sure flags is read before the rest fields. */
609 smp_rmb();
610 }
611
612 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
613 &desc->addr, sizeof(desc->addr));
614 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
615 &desc->id, sizeof(desc->id));
616 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
617 &desc->len, sizeof(desc->len));
618 virtio_tswap64s(vdev, &desc->addr);
619 virtio_tswap16s(vdev, &desc->id);
620 virtio_tswap32s(vdev, &desc->len);
621 }
622
623 static void vring_packed_desc_write_data(VirtIODevice *vdev,
624 VRingPackedDesc *desc,
625 MemoryRegionCache *cache,
626 int i)
627 {
628 hwaddr off_id = i * sizeof(VRingPackedDesc) +
629 offsetof(VRingPackedDesc, id);
630 hwaddr off_len = i * sizeof(VRingPackedDesc) +
631 offsetof(VRingPackedDesc, len);
632
633 virtio_tswap32s(vdev, &desc->len);
634 virtio_tswap16s(vdev, &desc->id);
635 address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
636 address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
637 address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
638 address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
639 }
640
641 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
642 VRingPackedDesc *desc,
643 MemoryRegionCache *cache,
644 int i)
645 {
646 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
647
648 virtio_stw_phys_cached(vdev, cache, off, desc->flags);
649 address_space_cache_invalidate(cache, off, sizeof(desc->flags));
650 }
651
652 static void vring_packed_desc_write(VirtIODevice *vdev,
653 VRingPackedDesc *desc,
654 MemoryRegionCache *cache,
655 int i, bool strict_order)
656 {
657 vring_packed_desc_write_data(vdev, desc, cache, i);
658 if (strict_order) {
659 /* Make sure data is wrote before flags. */
660 smp_wmb();
661 }
662 vring_packed_desc_write_flags(vdev, desc, cache, i);
663 }
664
665 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
666 {
667 bool avail, used;
668
669 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
670 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
671 return (avail != used) && (avail == wrap_counter);
672 }
673
674 /* Fetch avail_idx from VQ memory only when we really need to know if
675 * guest has added some buffers.
676 * Called within rcu_read_lock(). */
677 static int virtio_queue_empty_rcu(VirtQueue *vq)
678 {
679 if (virtio_device_disabled(vq->vdev)) {
680 return 1;
681 }
682
683 if (unlikely(!vq->vring.avail)) {
684 return 1;
685 }
686
687 if (vq->shadow_avail_idx != vq->last_avail_idx) {
688 return 0;
689 }
690
691 return vring_avail_idx(vq) == vq->last_avail_idx;
692 }
693
694 static int virtio_queue_split_empty(VirtQueue *vq)
695 {
696 bool empty;
697
698 if (virtio_device_disabled(vq->vdev)) {
699 return 1;
700 }
701
702 if (unlikely(!vq->vring.avail)) {
703 return 1;
704 }
705
706 if (vq->shadow_avail_idx != vq->last_avail_idx) {
707 return 0;
708 }
709
710 RCU_READ_LOCK_GUARD();
711 empty = vring_avail_idx(vq) == vq->last_avail_idx;
712 return empty;
713 }
714
715 /* Called within rcu_read_lock(). */
716 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
717 {
718 struct VRingPackedDesc desc;
719 VRingMemoryRegionCaches *cache;
720
721 if (unlikely(!vq->vring.desc)) {
722 return 1;
723 }
724
725 cache = vring_get_region_caches(vq);
726 if (!cache) {
727 return 1;
728 }
729
730 vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
731 vq->last_avail_idx);
732
733 return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
734 }
735
736 static int virtio_queue_packed_empty(VirtQueue *vq)
737 {
738 RCU_READ_LOCK_GUARD();
739 return virtio_queue_packed_empty_rcu(vq);
740 }
741
742 int virtio_queue_empty(VirtQueue *vq)
743 {
744 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
745 return virtio_queue_packed_empty(vq);
746 } else {
747 return virtio_queue_split_empty(vq);
748 }
749 }
750
751 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
752 unsigned int len)
753 {
754 AddressSpace *dma_as = vq->vdev->dma_as;
755 unsigned int offset;
756 int i;
757
758 offset = 0;
759 for (i = 0; i < elem->in_num; i++) {
760 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
761
762 dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
763 elem->in_sg[i].iov_len,
764 DMA_DIRECTION_FROM_DEVICE, size);
765
766 offset += size;
767 }
768
769 for (i = 0; i < elem->out_num; i++)
770 dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
771 elem->out_sg[i].iov_len,
772 DMA_DIRECTION_TO_DEVICE,
773 elem->out_sg[i].iov_len);
774 }
775
776 /* virtqueue_detach_element:
777 * @vq: The #VirtQueue
778 * @elem: The #VirtQueueElement
779 * @len: number of bytes written
780 *
781 * Detach the element from the virtqueue. This function is suitable for device
782 * reset or other situations where a #VirtQueueElement is simply freed and will
783 * not be pushed or discarded.
784 */
785 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
786 unsigned int len)
787 {
788 vq->inuse -= elem->ndescs;
789 virtqueue_unmap_sg(vq, elem, len);
790 }
791
792 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
793 {
794 vq->last_avail_idx -= num;
795 }
796
797 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
798 {
799 if (vq->last_avail_idx < num) {
800 vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
801 vq->last_avail_wrap_counter ^= 1;
802 } else {
803 vq->last_avail_idx -= num;
804 }
805 }
806
807 /* virtqueue_unpop:
808 * @vq: The #VirtQueue
809 * @elem: The #VirtQueueElement
810 * @len: number of bytes written
811 *
812 * Pretend the most recent element wasn't popped from the virtqueue. The next
813 * call to virtqueue_pop() will refetch the element.
814 */
815 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
816 unsigned int len)
817 {
818
819 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
820 virtqueue_packed_rewind(vq, 1);
821 } else {
822 virtqueue_split_rewind(vq, 1);
823 }
824
825 virtqueue_detach_element(vq, elem, len);
826 }
827
828 /* virtqueue_rewind:
829 * @vq: The #VirtQueue
830 * @num: Number of elements to push back
831 *
832 * Pretend that elements weren't popped from the virtqueue. The next
833 * virtqueue_pop() will refetch the oldest element.
834 *
835 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
836 *
837 * Returns: true on success, false if @num is greater than the number of in use
838 * elements.
839 */
840 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
841 {
842 if (num > vq->inuse) {
843 return false;
844 }
845
846 vq->inuse -= num;
847 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
848 virtqueue_packed_rewind(vq, num);
849 } else {
850 virtqueue_split_rewind(vq, num);
851 }
852 return true;
853 }
854
855 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
856 unsigned int len, unsigned int idx)
857 {
858 VRingUsedElem uelem;
859
860 if (unlikely(!vq->vring.used)) {
861 return;
862 }
863
864 idx = (idx + vq->used_idx) % vq->vring.num;
865
866 uelem.id = elem->index;
867 uelem.len = len;
868 vring_used_write(vq, &uelem, idx);
869 }
870
871 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
872 unsigned int len, unsigned int idx)
873 {
874 vq->used_elems[idx].index = elem->index;
875 vq->used_elems[idx].len = len;
876 vq->used_elems[idx].ndescs = elem->ndescs;
877 }
878
879 static void virtqueue_packed_fill_desc(VirtQueue *vq,
880 const VirtQueueElement *elem,
881 unsigned int idx,
882 bool strict_order)
883 {
884 uint16_t head;
885 VRingMemoryRegionCaches *caches;
886 VRingPackedDesc desc = {
887 .id = elem->index,
888 .len = elem->len,
889 };
890 bool wrap_counter = vq->used_wrap_counter;
891
892 if (unlikely(!vq->vring.desc)) {
893 return;
894 }
895
896 head = vq->used_idx + idx;
897 if (head >= vq->vring.num) {
898 head -= vq->vring.num;
899 wrap_counter ^= 1;
900 }
901 if (wrap_counter) {
902 desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
903 desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
904 } else {
905 desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
906 desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
907 }
908
909 caches = vring_get_region_caches(vq);
910 if (!caches) {
911 return;
912 }
913
914 vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
915 }
916
917 /* Called within rcu_read_lock(). */
918 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
919 unsigned int len, unsigned int idx)
920 {
921 trace_virtqueue_fill(vq, elem, len, idx);
922
923 virtqueue_unmap_sg(vq, elem, len);
924
925 if (virtio_device_disabled(vq->vdev)) {
926 return;
927 }
928
929 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
930 virtqueue_packed_fill(vq, elem, len, idx);
931 } else {
932 virtqueue_split_fill(vq, elem, len, idx);
933 }
934 }
935
936 /* Called within rcu_read_lock(). */
937 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
938 {
939 uint16_t old, new;
940
941 if (unlikely(!vq->vring.used)) {
942 return;
943 }
944
945 /* Make sure buffer is written before we update index. */
946 smp_wmb();
947 trace_virtqueue_flush(vq, count);
948 old = vq->used_idx;
949 new = old + count;
950 vring_used_idx_set(vq, new);
951 vq->inuse -= count;
952 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
953 vq->signalled_used_valid = false;
954 }
955
956 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
957 {
958 unsigned int i, ndescs = 0;
959
960 if (unlikely(!vq->vring.desc)) {
961 return;
962 }
963
964 for (i = 1; i < count; i++) {
965 virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
966 ndescs += vq->used_elems[i].ndescs;
967 }
968 virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
969 ndescs += vq->used_elems[0].ndescs;
970
971 vq->inuse -= ndescs;
972 vq->used_idx += ndescs;
973 if (vq->used_idx >= vq->vring.num) {
974 vq->used_idx -= vq->vring.num;
975 vq->used_wrap_counter ^= 1;
976 vq->signalled_used_valid = false;
977 }
978 }
979
980 void virtqueue_flush(VirtQueue *vq, unsigned int count)
981 {
982 if (virtio_device_disabled(vq->vdev)) {
983 vq->inuse -= count;
984 return;
985 }
986
987 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
988 virtqueue_packed_flush(vq, count);
989 } else {
990 virtqueue_split_flush(vq, count);
991 }
992 }
993
994 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
995 unsigned int len)
996 {
997 RCU_READ_LOCK_GUARD();
998 virtqueue_fill(vq, elem, len, 0);
999 virtqueue_flush(vq, 1);
1000 }
1001
1002 /* Called within rcu_read_lock(). */
1003 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1004 {
1005 uint16_t num_heads = vring_avail_idx(vq) - idx;
1006
1007 /* Check it isn't doing very strange things with descriptor numbers. */
1008 if (num_heads > vq->vring.num) {
1009 virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1010 idx, vq->shadow_avail_idx);
1011 return -EINVAL;
1012 }
1013 /* On success, callers read a descriptor at vq->last_avail_idx.
1014 * Make sure descriptor read does not bypass avail index read. */
1015 if (num_heads) {
1016 smp_rmb();
1017 }
1018
1019 return num_heads;
1020 }
1021
1022 /* Called within rcu_read_lock(). */
1023 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1024 unsigned int *head)
1025 {
1026 /* Grab the next descriptor number they're advertising, and increment
1027 * the index we've seen. */
1028 *head = vring_avail_ring(vq, idx % vq->vring.num);
1029
1030 /* If their number is silly, that's a fatal mistake. */
1031 if (*head >= vq->vring.num) {
1032 virtio_error(vq->vdev, "Guest says index %u is available", *head);
1033 return false;
1034 }
1035
1036 return true;
1037 }
1038
1039 enum {
1040 VIRTQUEUE_READ_DESC_ERROR = -1,
1041 VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
1042 VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
1043 };
1044
1045 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1046 MemoryRegionCache *desc_cache,
1047 unsigned int max, unsigned int *next)
1048 {
1049 /* If this descriptor says it doesn't chain, we're done. */
1050 if (!(desc->flags & VRING_DESC_F_NEXT)) {
1051 return VIRTQUEUE_READ_DESC_DONE;
1052 }
1053
1054 /* Check they're not leading us off end of descriptors. */
1055 *next = desc->next;
1056 /* Make sure compiler knows to grab that: we don't want it changing! */
1057 smp_wmb();
1058
1059 if (*next >= max) {
1060 virtio_error(vdev, "Desc next is %u", *next);
1061 return VIRTQUEUE_READ_DESC_ERROR;
1062 }
1063
1064 vring_split_desc_read(vdev, desc, desc_cache, *next);
1065 return VIRTQUEUE_READ_DESC_MORE;
1066 }
1067
1068 /* Called within rcu_read_lock(). */
1069 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1070 unsigned int *in_bytes, unsigned int *out_bytes,
1071 unsigned max_in_bytes, unsigned max_out_bytes,
1072 VRingMemoryRegionCaches *caches)
1073 {
1074 VirtIODevice *vdev = vq->vdev;
1075 unsigned int max, idx;
1076 unsigned int total_bufs, in_total, out_total;
1077 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1078 int64_t len = 0;
1079 int rc;
1080
1081 idx = vq->last_avail_idx;
1082 total_bufs = in_total = out_total = 0;
1083
1084 max = vq->vring.num;
1085
1086 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1087 MemoryRegionCache *desc_cache = &caches->desc;
1088 unsigned int num_bufs;
1089 VRingDesc desc;
1090 unsigned int i;
1091
1092 num_bufs = total_bufs;
1093
1094 if (!virtqueue_get_head(vq, idx++, &i)) {
1095 goto err;
1096 }
1097
1098 vring_split_desc_read(vdev, &desc, desc_cache, i);
1099
1100 if (desc.flags & VRING_DESC_F_INDIRECT) {
1101 if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1102 virtio_error(vdev, "Invalid size for indirect buffer table");
1103 goto err;
1104 }
1105
1106 /* If we've got too many, that implies a descriptor loop. */
1107 if (num_bufs >= max) {
1108 virtio_error(vdev, "Looped descriptor");
1109 goto err;
1110 }
1111
1112 /* loop over the indirect descriptor table */
1113 len = address_space_cache_init(&indirect_desc_cache,
1114 vdev->dma_as,
1115 desc.addr, desc.len, false);
1116 desc_cache = &indirect_desc_cache;
1117 if (len < desc.len) {
1118 virtio_error(vdev, "Cannot map indirect buffer");
1119 goto err;
1120 }
1121
1122 max = desc.len / sizeof(VRingDesc);
1123 num_bufs = i = 0;
1124 vring_split_desc_read(vdev, &desc, desc_cache, i);
1125 }
1126
1127 do {
1128 /* If we've got too many, that implies a descriptor loop. */
1129 if (++num_bufs > max) {
1130 virtio_error(vdev, "Looped descriptor");
1131 goto err;
1132 }
1133
1134 if (desc.flags & VRING_DESC_F_WRITE) {
1135 in_total += desc.len;
1136 } else {
1137 out_total += desc.len;
1138 }
1139 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1140 goto done;
1141 }
1142
1143 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1144 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1145
1146 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1147 goto err;
1148 }
1149
1150 if (desc_cache == &indirect_desc_cache) {
1151 address_space_cache_destroy(&indirect_desc_cache);
1152 total_bufs++;
1153 } else {
1154 total_bufs = num_bufs;
1155 }
1156 }
1157
1158 if (rc < 0) {
1159 goto err;
1160 }
1161
1162 done:
1163 address_space_cache_destroy(&indirect_desc_cache);
1164 if (in_bytes) {
1165 *in_bytes = in_total;
1166 }
1167 if (out_bytes) {
1168 *out_bytes = out_total;
1169 }
1170 return;
1171
1172 err:
1173 in_total = out_total = 0;
1174 goto done;
1175 }
1176
1177 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1178 VRingPackedDesc *desc,
1179 MemoryRegionCache
1180 *desc_cache,
1181 unsigned int max,
1182 unsigned int *next,
1183 bool indirect)
1184 {
1185 /* If this descriptor says it doesn't chain, we're done. */
1186 if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1187 return VIRTQUEUE_READ_DESC_DONE;
1188 }
1189
1190 ++*next;
1191 if (*next == max) {
1192 if (indirect) {
1193 return VIRTQUEUE_READ_DESC_DONE;
1194 } else {
1195 (*next) -= vq->vring.num;
1196 }
1197 }
1198
1199 vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1200 return VIRTQUEUE_READ_DESC_MORE;
1201 }
1202
1203 /* Called within rcu_read_lock(). */
1204 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1205 unsigned int *in_bytes,
1206 unsigned int *out_bytes,
1207 unsigned max_in_bytes,
1208 unsigned max_out_bytes,
1209 VRingMemoryRegionCaches *caches)
1210 {
1211 VirtIODevice *vdev = vq->vdev;
1212 unsigned int max, idx;
1213 unsigned int total_bufs, in_total, out_total;
1214 MemoryRegionCache *desc_cache;
1215 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1216 int64_t len = 0;
1217 VRingPackedDesc desc;
1218 bool wrap_counter;
1219
1220 idx = vq->last_avail_idx;
1221 wrap_counter = vq->last_avail_wrap_counter;
1222 total_bufs = in_total = out_total = 0;
1223
1224 max = vq->vring.num;
1225
1226 for (;;) {
1227 unsigned int num_bufs = total_bufs;
1228 unsigned int i = idx;
1229 int rc;
1230
1231 desc_cache = &caches->desc;
1232 vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1233 if (!is_desc_avail(desc.flags, wrap_counter)) {
1234 break;
1235 }
1236
1237 if (desc.flags & VRING_DESC_F_INDIRECT) {
1238 if (desc.len % sizeof(VRingPackedDesc)) {
1239 virtio_error(vdev, "Invalid size for indirect buffer table");
1240 goto err;
1241 }
1242
1243 /* If we've got too many, that implies a descriptor loop. */
1244 if (num_bufs >= max) {
1245 virtio_error(vdev, "Looped descriptor");
1246 goto err;
1247 }
1248
1249 /* loop over the indirect descriptor table */
1250 len = address_space_cache_init(&indirect_desc_cache,
1251 vdev->dma_as,
1252 desc.addr, desc.len, false);
1253 desc_cache = &indirect_desc_cache;
1254 if (len < desc.len) {
1255 virtio_error(vdev, "Cannot map indirect buffer");
1256 goto err;
1257 }
1258
1259 max = desc.len / sizeof(VRingPackedDesc);
1260 num_bufs = i = 0;
1261 vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1262 }
1263
1264 do {
1265 /* If we've got too many, that implies a descriptor loop. */
1266 if (++num_bufs > max) {
1267 virtio_error(vdev, "Looped descriptor");
1268 goto err;
1269 }
1270
1271 if (desc.flags & VRING_DESC_F_WRITE) {
1272 in_total += desc.len;
1273 } else {
1274 out_total += desc.len;
1275 }
1276 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1277 goto done;
1278 }
1279
1280 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1281 &i, desc_cache ==
1282 &indirect_desc_cache);
1283 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1284
1285 if (desc_cache == &indirect_desc_cache) {
1286 address_space_cache_destroy(&indirect_desc_cache);
1287 total_bufs++;
1288 idx++;
1289 } else {
1290 idx += num_bufs - total_bufs;
1291 total_bufs = num_bufs;
1292 }
1293
1294 if (idx >= vq->vring.num) {
1295 idx -= vq->vring.num;
1296 wrap_counter ^= 1;
1297 }
1298 }
1299
1300 /* Record the index and wrap counter for a kick we want */
1301 vq->shadow_avail_idx = idx;
1302 vq->shadow_avail_wrap_counter = wrap_counter;
1303 done:
1304 address_space_cache_destroy(&indirect_desc_cache);
1305 if (in_bytes) {
1306 *in_bytes = in_total;
1307 }
1308 if (out_bytes) {
1309 *out_bytes = out_total;
1310 }
1311 return;
1312
1313 err:
1314 in_total = out_total = 0;
1315 goto done;
1316 }
1317
1318 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1319 unsigned int *out_bytes,
1320 unsigned max_in_bytes, unsigned max_out_bytes)
1321 {
1322 uint16_t desc_size;
1323 VRingMemoryRegionCaches *caches;
1324
1325 RCU_READ_LOCK_GUARD();
1326
1327 if (unlikely(!vq->vring.desc)) {
1328 goto err;
1329 }
1330
1331 caches = vring_get_region_caches(vq);
1332 if (!caches) {
1333 goto err;
1334 }
1335
1336 desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1337 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1338 if (caches->desc.len < vq->vring.num * desc_size) {
1339 virtio_error(vq->vdev, "Cannot map descriptor ring");
1340 goto err;
1341 }
1342
1343 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1344 virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1345 max_in_bytes, max_out_bytes,
1346 caches);
1347 } else {
1348 virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1349 max_in_bytes, max_out_bytes,
1350 caches);
1351 }
1352
1353 return;
1354 err:
1355 if (in_bytes) {
1356 *in_bytes = 0;
1357 }
1358 if (out_bytes) {
1359 *out_bytes = 0;
1360 }
1361 }
1362
1363 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1364 unsigned int out_bytes)
1365 {
1366 unsigned int in_total, out_total;
1367
1368 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1369 return in_bytes <= in_total && out_bytes <= out_total;
1370 }
1371
1372 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1373 hwaddr *addr, struct iovec *iov,
1374 unsigned int max_num_sg, bool is_write,
1375 hwaddr pa, size_t sz)
1376 {
1377 bool ok = false;
1378 unsigned num_sg = *p_num_sg;
1379 assert(num_sg <= max_num_sg);
1380
1381 if (!sz) {
1382 virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1383 goto out;
1384 }
1385
1386 while (sz) {
1387 hwaddr len = sz;
1388
1389 if (num_sg == max_num_sg) {
1390 virtio_error(vdev, "virtio: too many write descriptors in "
1391 "indirect table");
1392 goto out;
1393 }
1394
1395 iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1396 is_write ?
1397 DMA_DIRECTION_FROM_DEVICE :
1398 DMA_DIRECTION_TO_DEVICE,
1399 MEMTXATTRS_UNSPECIFIED);
1400 if (!iov[num_sg].iov_base) {
1401 virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1402 goto out;
1403 }
1404
1405 iov[num_sg].iov_len = len;
1406 addr[num_sg] = pa;
1407
1408 sz -= len;
1409 pa += len;
1410 num_sg++;
1411 }
1412 ok = true;
1413
1414 out:
1415 *p_num_sg = num_sg;
1416 return ok;
1417 }
1418
1419 /* Only used by error code paths before we have a VirtQueueElement (therefore
1420 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
1421 * yet.
1422 */
1423 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1424 struct iovec *iov)
1425 {
1426 unsigned int i;
1427
1428 for (i = 0; i < out_num + in_num; i++) {
1429 int is_write = i >= out_num;
1430
1431 cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1432 iov++;
1433 }
1434 }
1435
1436 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1437 hwaddr *addr, unsigned int num_sg,
1438 bool is_write)
1439 {
1440 unsigned int i;
1441 hwaddr len;
1442
1443 for (i = 0; i < num_sg; i++) {
1444 len = sg[i].iov_len;
1445 sg[i].iov_base = dma_memory_map(vdev->dma_as,
1446 addr[i], &len, is_write ?
1447 DMA_DIRECTION_FROM_DEVICE :
1448 DMA_DIRECTION_TO_DEVICE,
1449 MEMTXATTRS_UNSPECIFIED);
1450 if (!sg[i].iov_base) {
1451 error_report("virtio: error trying to map MMIO memory");
1452 exit(1);
1453 }
1454 if (len != sg[i].iov_len) {
1455 error_report("virtio: unexpected memory split");
1456 exit(1);
1457 }
1458 }
1459 }
1460
1461 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1462 {
1463 virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1464 virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1465 false);
1466 }
1467
1468 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1469 {
1470 VirtQueueElement *elem;
1471 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1472 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1473 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1474 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1475 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1476 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1477
1478 assert(sz >= sizeof(VirtQueueElement));
1479 elem = g_malloc(out_sg_end);
1480 trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1481 elem->out_num = out_num;
1482 elem->in_num = in_num;
1483 elem->in_addr = (void *)elem + in_addr_ofs;
1484 elem->out_addr = (void *)elem + out_addr_ofs;
1485 elem->in_sg = (void *)elem + in_sg_ofs;
1486 elem->out_sg = (void *)elem + out_sg_ofs;
1487 return elem;
1488 }
1489
1490 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1491 {
1492 unsigned int i, head, max;
1493 VRingMemoryRegionCaches *caches;
1494 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1495 MemoryRegionCache *desc_cache;
1496 int64_t len;
1497 VirtIODevice *vdev = vq->vdev;
1498 VirtQueueElement *elem = NULL;
1499 unsigned out_num, in_num, elem_entries;
1500 hwaddr addr[VIRTQUEUE_MAX_SIZE];
1501 struct iovec iov[VIRTQUEUE_MAX_SIZE];
1502 VRingDesc desc;
1503 int rc;
1504
1505 RCU_READ_LOCK_GUARD();
1506 if (virtio_queue_empty_rcu(vq)) {
1507 goto done;
1508 }
1509 /* Needed after virtio_queue_empty(), see comment in
1510 * virtqueue_num_heads(). */
1511 smp_rmb();
1512
1513 /* When we start there are none of either input nor output. */
1514 out_num = in_num = elem_entries = 0;
1515
1516 max = vq->vring.num;
1517
1518 if (vq->inuse >= vq->vring.num) {
1519 virtio_error(vdev, "Virtqueue size exceeded");
1520 goto done;
1521 }
1522
1523 if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1524 goto done;
1525 }
1526
1527 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1528 vring_set_avail_event(vq, vq->last_avail_idx);
1529 }
1530
1531 i = head;
1532
1533 caches = vring_get_region_caches(vq);
1534 if (!caches) {
1535 virtio_error(vdev, "Region caches not initialized");
1536 goto done;
1537 }
1538
1539 if (caches->desc.len < max * sizeof(VRingDesc)) {
1540 virtio_error(vdev, "Cannot map descriptor ring");
1541 goto done;
1542 }
1543
1544 desc_cache = &caches->desc;
1545 vring_split_desc_read(vdev, &desc, desc_cache, i);
1546 if (desc.flags & VRING_DESC_F_INDIRECT) {
1547 if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1548 virtio_error(vdev, "Invalid size for indirect buffer table");
1549 goto done;
1550 }
1551
1552 /* loop over the indirect descriptor table */
1553 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1554 desc.addr, desc.len, false);
1555 desc_cache = &indirect_desc_cache;
1556 if (len < desc.len) {
1557 virtio_error(vdev, "Cannot map indirect buffer");
1558 goto done;
1559 }
1560
1561 max = desc.len / sizeof(VRingDesc);
1562 i = 0;
1563 vring_split_desc_read(vdev, &desc, desc_cache, i);
1564 }
1565
1566 /* Collect all the descriptors */
1567 do {
1568 bool map_ok;
1569
1570 if (desc.flags & VRING_DESC_F_WRITE) {
1571 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1572 iov + out_num,
1573 VIRTQUEUE_MAX_SIZE - out_num, true,
1574 desc.addr, desc.len);
1575 } else {
1576 if (in_num) {
1577 virtio_error(vdev, "Incorrect order for descriptors");
1578 goto err_undo_map;
1579 }
1580 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1581 VIRTQUEUE_MAX_SIZE, false,
1582 desc.addr, desc.len);
1583 }
1584 if (!map_ok) {
1585 goto err_undo_map;
1586 }
1587
1588 /* If we've got too many, that implies a descriptor loop. */
1589 if (++elem_entries > max) {
1590 virtio_error(vdev, "Looped descriptor");
1591 goto err_undo_map;
1592 }
1593
1594 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1595 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1596
1597 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1598 goto err_undo_map;
1599 }
1600
1601 /* Now copy what we have collected and mapped */
1602 elem = virtqueue_alloc_element(sz, out_num, in_num);
1603 elem->index = head;
1604 elem->ndescs = 1;
1605 for (i = 0; i < out_num; i++) {
1606 elem->out_addr[i] = addr[i];
1607 elem->out_sg[i] = iov[i];
1608 }
1609 for (i = 0; i < in_num; i++) {
1610 elem->in_addr[i] = addr[out_num + i];
1611 elem->in_sg[i] = iov[out_num + i];
1612 }
1613
1614 vq->inuse++;
1615
1616 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1617 done:
1618 address_space_cache_destroy(&indirect_desc_cache);
1619
1620 return elem;
1621
1622 err_undo_map:
1623 virtqueue_undo_map_desc(out_num, in_num, iov);
1624 goto done;
1625 }
1626
1627 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1628 {
1629 unsigned int i, max;
1630 VRingMemoryRegionCaches *caches;
1631 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1632 MemoryRegionCache *desc_cache;
1633 int64_t len;
1634 VirtIODevice *vdev = vq->vdev;
1635 VirtQueueElement *elem = NULL;
1636 unsigned out_num, in_num, elem_entries;
1637 hwaddr addr[VIRTQUEUE_MAX_SIZE];
1638 struct iovec iov[VIRTQUEUE_MAX_SIZE];
1639 VRingPackedDesc desc;
1640 uint16_t id;
1641 int rc;
1642
1643 RCU_READ_LOCK_GUARD();
1644 if (virtio_queue_packed_empty_rcu(vq)) {
1645 goto done;
1646 }
1647
1648 /* When we start there are none of either input nor output. */
1649 out_num = in_num = elem_entries = 0;
1650
1651 max = vq->vring.num;
1652
1653 if (vq->inuse >= vq->vring.num) {
1654 virtio_error(vdev, "Virtqueue size exceeded");
1655 goto done;
1656 }
1657
1658 i = vq->last_avail_idx;
1659
1660 caches = vring_get_region_caches(vq);
1661 if (!caches) {
1662 virtio_error(vdev, "Region caches not initialized");
1663 goto done;
1664 }
1665
1666 if (caches->desc.len < max * sizeof(VRingDesc)) {
1667 virtio_error(vdev, "Cannot map descriptor ring");
1668 goto done;
1669 }
1670
1671 desc_cache = &caches->desc;
1672 vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1673 id = desc.id;
1674 if (desc.flags & VRING_DESC_F_INDIRECT) {
1675 if (desc.len % sizeof(VRingPackedDesc)) {
1676 virtio_error(vdev, "Invalid size for indirect buffer table");
1677 goto done;
1678 }
1679
1680 /* loop over the indirect descriptor table */
1681 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1682 desc.addr, desc.len, false);
1683 desc_cache = &indirect_desc_cache;
1684 if (len < desc.len) {
1685 virtio_error(vdev, "Cannot map indirect buffer");
1686 goto done;
1687 }
1688
1689 max = desc.len / sizeof(VRingPackedDesc);
1690 i = 0;
1691 vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1692 }
1693
1694 /* Collect all the descriptors */
1695 do {
1696 bool map_ok;
1697
1698 if (desc.flags & VRING_DESC_F_WRITE) {
1699 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1700 iov + out_num,
1701 VIRTQUEUE_MAX_SIZE - out_num, true,
1702 desc.addr, desc.len);
1703 } else {
1704 if (in_num) {
1705 virtio_error(vdev, "Incorrect order for descriptors");
1706 goto err_undo_map;
1707 }
1708 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1709 VIRTQUEUE_MAX_SIZE, false,
1710 desc.addr, desc.len);
1711 }
1712 if (!map_ok) {
1713 goto err_undo_map;
1714 }
1715
1716 /* If we've got too many, that implies a descriptor loop. */
1717 if (++elem_entries > max) {
1718 virtio_error(vdev, "Looped descriptor");
1719 goto err_undo_map;
1720 }
1721
1722 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1723 desc_cache ==
1724 &indirect_desc_cache);
1725 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1726
1727 /* Now copy what we have collected and mapped */
1728 elem = virtqueue_alloc_element(sz, out_num, in_num);
1729 for (i = 0; i < out_num; i++) {
1730 elem->out_addr[i] = addr[i];
1731 elem->out_sg[i] = iov[i];
1732 }
1733 for (i = 0; i < in_num; i++) {
1734 elem->in_addr[i] = addr[out_num + i];
1735 elem->in_sg[i] = iov[out_num + i];
1736 }
1737
1738 elem->index = id;
1739 elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1740 vq->last_avail_idx += elem->ndescs;
1741 vq->inuse += elem->ndescs;
1742
1743 if (vq->last_avail_idx >= vq->vring.num) {
1744 vq->last_avail_idx -= vq->vring.num;
1745 vq->last_avail_wrap_counter ^= 1;
1746 }
1747
1748 vq->shadow_avail_idx = vq->last_avail_idx;
1749 vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1750
1751 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1752 done:
1753 address_space_cache_destroy(&indirect_desc_cache);
1754
1755 return elem;
1756
1757 err_undo_map:
1758 virtqueue_undo_map_desc(out_num, in_num, iov);
1759 goto done;
1760 }
1761
1762 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1763 {
1764 if (virtio_device_disabled(vq->vdev)) {
1765 return NULL;
1766 }
1767
1768 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1769 return virtqueue_packed_pop(vq, sz);
1770 } else {
1771 return virtqueue_split_pop(vq, sz);
1772 }
1773 }
1774
1775 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1776 {
1777 VRingMemoryRegionCaches *caches;
1778 MemoryRegionCache *desc_cache;
1779 unsigned int dropped = 0;
1780 VirtQueueElement elem = {};
1781 VirtIODevice *vdev = vq->vdev;
1782 VRingPackedDesc desc;
1783
1784 RCU_READ_LOCK_GUARD();
1785
1786 caches = vring_get_region_caches(vq);
1787 if (!caches) {
1788 return 0;
1789 }
1790
1791 desc_cache = &caches->desc;
1792
1793 virtio_queue_set_notification(vq, 0);
1794
1795 while (vq->inuse < vq->vring.num) {
1796 unsigned int idx = vq->last_avail_idx;
1797 /*
1798 * works similar to virtqueue_pop but does not map buffers
1799 * and does not allocate any memory.
1800 */
1801 vring_packed_desc_read(vdev, &desc, desc_cache,
1802 vq->last_avail_idx , true);
1803 if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1804 break;
1805 }
1806 elem.index = desc.id;
1807 elem.ndescs = 1;
1808 while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1809 vq->vring.num, &idx, false)) {
1810 ++elem.ndescs;
1811 }
1812 /*
1813 * immediately push the element, nothing to unmap
1814 * as both in_num and out_num are set to 0.
1815 */
1816 virtqueue_push(vq, &elem, 0);
1817 dropped++;
1818 vq->last_avail_idx += elem.ndescs;
1819 if (vq->last_avail_idx >= vq->vring.num) {
1820 vq->last_avail_idx -= vq->vring.num;
1821 vq->last_avail_wrap_counter ^= 1;
1822 }
1823 }
1824
1825 return dropped;
1826 }
1827
1828 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1829 {
1830 unsigned int dropped = 0;
1831 VirtQueueElement elem = {};
1832 VirtIODevice *vdev = vq->vdev;
1833 bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1834
1835 while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1836 /* works similar to virtqueue_pop but does not map buffers
1837 * and does not allocate any memory */
1838 smp_rmb();
1839 if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1840 break;
1841 }
1842 vq->inuse++;
1843 vq->last_avail_idx++;
1844 if (fEventIdx) {
1845 vring_set_avail_event(vq, vq->last_avail_idx);
1846 }
1847 /* immediately push the element, nothing to unmap
1848 * as both in_num and out_num are set to 0 */
1849 virtqueue_push(vq, &elem, 0);
1850 dropped++;
1851 }
1852
1853 return dropped;
1854 }
1855
1856 /* virtqueue_drop_all:
1857 * @vq: The #VirtQueue
1858 * Drops all queued buffers and indicates them to the guest
1859 * as if they are done. Useful when buffers can not be
1860 * processed but must be returned to the guest.
1861 */
1862 unsigned int virtqueue_drop_all(VirtQueue *vq)
1863 {
1864 struct VirtIODevice *vdev = vq->vdev;
1865
1866 if (virtio_device_disabled(vq->vdev)) {
1867 return 0;
1868 }
1869
1870 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1871 return virtqueue_packed_drop_all(vq);
1872 } else {
1873 return virtqueue_split_drop_all(vq);
1874 }
1875 }
1876
1877 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1878 * it is what QEMU has always done by mistake. We can change it sooner
1879 * or later by bumping the version number of the affected vm states.
1880 * In the meanwhile, since the in-memory layout of VirtQueueElement
1881 * has changed, we need to marshal to and from the layout that was
1882 * used before the change.
1883 */
1884 typedef struct VirtQueueElementOld {
1885 unsigned int index;
1886 unsigned int out_num;
1887 unsigned int in_num;
1888 hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1889 hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1890 struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1891 struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1892 } VirtQueueElementOld;
1893
1894 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1895 {
1896 VirtQueueElement *elem;
1897 VirtQueueElementOld data;
1898 int i;
1899
1900 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1901
1902 /* TODO: teach all callers that this can fail, and return failure instead
1903 * of asserting here.
1904 * This is just one thing (there are probably more) that must be
1905 * fixed before we can allow NDEBUG compilation.
1906 */
1907 assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1908 assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1909
1910 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1911 elem->index = data.index;
1912
1913 for (i = 0; i < elem->in_num; i++) {
1914 elem->in_addr[i] = data.in_addr[i];
1915 }
1916
1917 for (i = 0; i < elem->out_num; i++) {
1918 elem->out_addr[i] = data.out_addr[i];
1919 }
1920
1921 for (i = 0; i < elem->in_num; i++) {
1922 /* Base is overwritten by virtqueue_map. */
1923 elem->in_sg[i].iov_base = 0;
1924 elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1925 }
1926
1927 for (i = 0; i < elem->out_num; i++) {
1928 /* Base is overwritten by virtqueue_map. */
1929 elem->out_sg[i].iov_base = 0;
1930 elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1931 }
1932
1933 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1934 qemu_get_be32s(f, &elem->ndescs);
1935 }
1936
1937 virtqueue_map(vdev, elem);
1938 return elem;
1939 }
1940
1941 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1942 VirtQueueElement *elem)
1943 {
1944 VirtQueueElementOld data;
1945 int i;
1946
1947 memset(&data, 0, sizeof(data));
1948 data.index = elem->index;
1949 data.in_num = elem->in_num;
1950 data.out_num = elem->out_num;
1951
1952 for (i = 0; i < elem->in_num; i++) {
1953 data.in_addr[i] = elem->in_addr[i];
1954 }
1955
1956 for (i = 0; i < elem->out_num; i++) {
1957 data.out_addr[i] = elem->out_addr[i];
1958 }
1959
1960 for (i = 0; i < elem->in_num; i++) {
1961 /* Base is overwritten by virtqueue_map when loading. Do not
1962 * save it, as it would leak the QEMU address space layout. */
1963 data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1964 }
1965
1966 for (i = 0; i < elem->out_num; i++) {
1967 /* Do not save iov_base as above. */
1968 data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1969 }
1970
1971 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1972 qemu_put_be32s(f, &elem->ndescs);
1973 }
1974
1975 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1976 }
1977
1978 /* virtio device */
1979 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1980 {
1981 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1982 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1983
1984 if (virtio_device_disabled(vdev)) {
1985 return;
1986 }
1987
1988 if (k->notify) {
1989 k->notify(qbus->parent, vector);
1990 }
1991 }
1992
1993 void virtio_update_irq(VirtIODevice *vdev)
1994 {
1995 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1996 }
1997
1998 static int virtio_validate_features(VirtIODevice *vdev)
1999 {
2000 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2001
2002 if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
2003 !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2004 return -EFAULT;
2005 }
2006
2007 if (k->validate_features) {
2008 return k->validate_features(vdev);
2009 } else {
2010 return 0;
2011 }
2012 }
2013
2014 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2015 {
2016 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2017 trace_virtio_set_status(vdev, val);
2018
2019 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2020 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2021 val & VIRTIO_CONFIG_S_FEATURES_OK) {
2022 int ret = virtio_validate_features(vdev);
2023
2024 if (ret) {
2025 return ret;
2026 }
2027 }
2028 }
2029
2030 if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2031 (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2032 virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2033 }
2034
2035 if (k->set_status) {
2036 k->set_status(vdev, val);
2037 }
2038 vdev->status = val;
2039
2040 return 0;
2041 }
2042
2043 static enum virtio_device_endian virtio_default_endian(void)
2044 {
2045 if (target_words_bigendian()) {
2046 return VIRTIO_DEVICE_ENDIAN_BIG;
2047 } else {
2048 return VIRTIO_DEVICE_ENDIAN_LITTLE;
2049 }
2050 }
2051
2052 static enum virtio_device_endian virtio_current_cpu_endian(void)
2053 {
2054 if (cpu_virtio_is_big_endian(current_cpu)) {
2055 return VIRTIO_DEVICE_ENDIAN_BIG;
2056 } else {
2057 return VIRTIO_DEVICE_ENDIAN_LITTLE;
2058 }
2059 }
2060
2061 static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2062 {
2063 vdev->vq[i].vring.desc = 0;
2064 vdev->vq[i].vring.avail = 0;
2065 vdev->vq[i].vring.used = 0;
2066 vdev->vq[i].last_avail_idx = 0;
2067 vdev->vq[i].shadow_avail_idx = 0;
2068 vdev->vq[i].used_idx = 0;
2069 vdev->vq[i].last_avail_wrap_counter = true;
2070 vdev->vq[i].shadow_avail_wrap_counter = true;
2071 vdev->vq[i].used_wrap_counter = true;
2072 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2073 vdev->vq[i].signalled_used = 0;
2074 vdev->vq[i].signalled_used_valid = false;
2075 vdev->vq[i].notification = true;
2076 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2077 vdev->vq[i].inuse = 0;
2078 virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2079 }
2080
2081 void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2082 {
2083 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2084
2085 if (k->queue_reset) {
2086 k->queue_reset(vdev, queue_index);
2087 }
2088
2089 __virtio_queue_reset(vdev, queue_index);
2090 }
2091
2092 void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2093 {
2094 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2095
2096 /*
2097 * TODO: Seabios is currently out of spec and triggering this error.
2098 * So this needs to be fixed in Seabios, then this can
2099 * be re-enabled for new machine types only, and also after
2100 * being converted to LOG_GUEST_ERROR.
2101 *
2102 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2103 error_report("queue_enable is only suppported in devices of virtio "
2104 "1.0 or later.");
2105 }
2106 */
2107
2108 if (k->queue_enable) {
2109 k->queue_enable(vdev, queue_index);
2110 }
2111 }
2112
2113 void virtio_reset(void *opaque)
2114 {
2115 VirtIODevice *vdev = opaque;
2116 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2117 int i;
2118
2119 virtio_set_status(vdev, 0);
2120 if (current_cpu) {
2121 /* Guest initiated reset */
2122 vdev->device_endian = virtio_current_cpu_endian();
2123 } else {
2124 /* System reset */
2125 vdev->device_endian = virtio_default_endian();
2126 }
2127
2128 if (k->reset) {
2129 k->reset(vdev);
2130 }
2131
2132 vdev->start_on_kick = false;
2133 vdev->started = false;
2134 vdev->broken = false;
2135 vdev->guest_features = 0;
2136 vdev->queue_sel = 0;
2137 vdev->status = 0;
2138 vdev->disabled = false;
2139 qatomic_set(&vdev->isr, 0);
2140 vdev->config_vector = VIRTIO_NO_VECTOR;
2141 virtio_notify_vector(vdev, vdev->config_vector);
2142
2143 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2144 __virtio_queue_reset(vdev, i);
2145 }
2146 }
2147
2148 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2149 {
2150 if (!vdev->vq[n].vring.num) {
2151 return;
2152 }
2153 vdev->vq[n].vring.desc = addr;
2154 virtio_queue_update_rings(vdev, n);
2155 }
2156
2157 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2158 {
2159 return vdev->vq[n].vring.desc;
2160 }
2161
2162 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2163 hwaddr avail, hwaddr used)
2164 {
2165 if (!vdev->vq[n].vring.num) {
2166 return;
2167 }
2168 vdev->vq[n].vring.desc = desc;
2169 vdev->vq[n].vring.avail = avail;
2170 vdev->vq[n].vring.used = used;
2171 virtio_init_region_cache(vdev, n);
2172 }
2173
2174 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2175 {
2176 /* Don't allow guest to flip queue between existent and
2177 * nonexistent states, or to set it to an invalid size.
2178 */
2179 if (!!num != !!vdev->vq[n].vring.num ||
2180 num > VIRTQUEUE_MAX_SIZE ||
2181 num < 0) {
2182 return;
2183 }
2184 vdev->vq[n].vring.num = num;
2185 }
2186
2187 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2188 {
2189 return QLIST_FIRST(&vdev->vector_queues[vector]);
2190 }
2191
2192 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2193 {
2194 return QLIST_NEXT(vq, node);
2195 }
2196
2197 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2198 {
2199 return vdev->vq[n].vring.num;
2200 }
2201
2202 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2203 {
2204 return vdev->vq[n].vring.num_default;
2205 }
2206
2207 int virtio_get_num_queues(VirtIODevice *vdev)
2208 {
2209 int i;
2210
2211 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2212 if (!virtio_queue_get_num(vdev, i)) {
2213 break;
2214 }
2215 }
2216
2217 return i;
2218 }
2219
2220 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2221 {
2222 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2223 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2224
2225 /* virtio-1 compliant devices cannot change the alignment */
2226 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2227 error_report("tried to modify queue alignment for virtio-1 device");
2228 return;
2229 }
2230 /* Check that the transport told us it was going to do this
2231 * (so a buggy transport will immediately assert rather than
2232 * silently failing to migrate this state)
2233 */
2234 assert(k->has_variable_vring_alignment);
2235
2236 if (align) {
2237 vdev->vq[n].vring.align = align;
2238 virtio_queue_update_rings(vdev, n);
2239 }
2240 }
2241
2242 static void virtio_queue_notify_vq(VirtQueue *vq)
2243 {
2244 if (vq->vring.desc && vq->handle_output) {
2245 VirtIODevice *vdev = vq->vdev;
2246
2247 if (unlikely(vdev->broken)) {
2248 return;
2249 }
2250
2251 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2252 vq->handle_output(vdev, vq);
2253
2254 if (unlikely(vdev->start_on_kick)) {
2255 virtio_set_started(vdev, true);
2256 }
2257 }
2258 }
2259
2260 void virtio_queue_notify(VirtIODevice *vdev, int n)
2261 {
2262 VirtQueue *vq = &vdev->vq[n];
2263
2264 if (unlikely(!vq->vring.desc || vdev->broken)) {
2265 return;
2266 }
2267
2268 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2269 if (vq->host_notifier_enabled) {
2270 event_notifier_set(&vq->host_notifier);
2271 } else if (vq->handle_output) {
2272 vq->handle_output(vdev, vq);
2273
2274 if (unlikely(vdev->start_on_kick)) {
2275 virtio_set_started(vdev, true);
2276 }
2277 }
2278 }
2279
2280 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2281 {
2282 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2283 VIRTIO_NO_VECTOR;
2284 }
2285
2286 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2287 {
2288 VirtQueue *vq = &vdev->vq[n];
2289
2290 if (n < VIRTIO_QUEUE_MAX) {
2291 if (vdev->vector_queues &&
2292 vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2293 QLIST_REMOVE(vq, node);
2294 }
2295 vdev->vq[n].vector = vector;
2296 if (vdev->vector_queues &&
2297 vector != VIRTIO_NO_VECTOR) {
2298 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2299 }
2300 }
2301 }
2302
2303 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2304 VirtIOHandleOutput handle_output)
2305 {
2306 int i;
2307
2308 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2309 if (vdev->vq[i].vring.num == 0)
2310 break;
2311 }
2312
2313 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2314 abort();
2315
2316 vdev->vq[i].vring.num = queue_size;
2317 vdev->vq[i].vring.num_default = queue_size;
2318 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2319 vdev->vq[i].handle_output = handle_output;
2320 vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2321
2322 return &vdev->vq[i];
2323 }
2324
2325 void virtio_delete_queue(VirtQueue *vq)
2326 {
2327 vq->vring.num = 0;
2328 vq->vring.num_default = 0;
2329 vq->handle_output = NULL;
2330 g_free(vq->used_elems);
2331 vq->used_elems = NULL;
2332 virtio_virtqueue_reset_region_cache(vq);
2333 }
2334
2335 void virtio_del_queue(VirtIODevice *vdev, int n)
2336 {
2337 if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2338 abort();
2339 }
2340
2341 virtio_delete_queue(&vdev->vq[n]);
2342 }
2343
2344 static void virtio_set_isr(VirtIODevice *vdev, int value)
2345 {
2346 uint8_t old = qatomic_read(&vdev->isr);
2347
2348 /* Do not write ISR if it does not change, so that its cacheline remains
2349 * shared in the common case where the guest does not read it.
2350 */
2351 if ((old & value) != value) {
2352 qatomic_or(&vdev->isr, value);
2353 }
2354 }
2355
2356 /* Called within rcu_read_lock(). */
2357 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2358 {
2359 uint16_t old, new;
2360 bool v;
2361 /* We need to expose used array entries before checking used event. */
2362 smp_mb();
2363 /* Always notify when queue is empty (when feature acknowledge) */
2364 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2365 !vq->inuse && virtio_queue_empty(vq)) {
2366 return true;
2367 }
2368
2369 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2370 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2371 }
2372
2373 v = vq->signalled_used_valid;
2374 vq->signalled_used_valid = true;
2375 old = vq->signalled_used;
2376 new = vq->signalled_used = vq->used_idx;
2377 return !v || vring_need_event(vring_get_used_event(vq), new, old);
2378 }
2379
2380 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2381 uint16_t off_wrap, uint16_t new,
2382 uint16_t old)
2383 {
2384 int off = off_wrap & ~(1 << 15);
2385
2386 if (wrap != off_wrap >> 15) {
2387 off -= vq->vring.num;
2388 }
2389
2390 return vring_need_event(off, new, old);
2391 }
2392
2393 /* Called within rcu_read_lock(). */
2394 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2395 {
2396 VRingPackedDescEvent e;
2397 uint16_t old, new;
2398 bool v;
2399 VRingMemoryRegionCaches *caches;
2400
2401 caches = vring_get_region_caches(vq);
2402 if (!caches) {
2403 return false;
2404 }
2405
2406 vring_packed_event_read(vdev, &caches->avail, &e);
2407
2408 old = vq->signalled_used;
2409 new = vq->signalled_used = vq->used_idx;
2410 v = vq->signalled_used_valid;
2411 vq->signalled_used_valid = true;
2412
2413 if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2414 return false;
2415 } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2416 return true;
2417 }
2418
2419 return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2420 e.off_wrap, new, old);
2421 }
2422
2423 /* Called within rcu_read_lock(). */
2424 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2425 {
2426 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2427 return virtio_packed_should_notify(vdev, vq);
2428 } else {
2429 return virtio_split_should_notify(vdev, vq);
2430 }
2431 }
2432
2433 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2434 {
2435 WITH_RCU_READ_LOCK_GUARD() {
2436 if (!virtio_should_notify(vdev, vq)) {
2437 return;
2438 }
2439 }
2440
2441 trace_virtio_notify_irqfd(vdev, vq);
2442
2443 /*
2444 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2445 * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2446 * incorrectly polling this bit during crashdump and hibernation
2447 * in MSI mode, causing a hang if this bit is never updated.
2448 * Recent releases of Windows do not really shut down, but rather
2449 * log out and hibernate to make the next startup faster. Hence,
2450 * this manifested as a more serious hang during shutdown with
2451 *
2452 * Next driver release from 2016 fixed this problem, so working around it
2453 * is not a must, but it's easy to do so let's do it here.
2454 *
2455 * Note: it's safe to update ISR from any thread as it was switched
2456 * to an atomic operation.
2457 */
2458 virtio_set_isr(vq->vdev, 0x1);
2459 event_notifier_set(&vq->guest_notifier);
2460 }
2461
2462 static void virtio_irq(VirtQueue *vq)
2463 {
2464 virtio_set_isr(vq->vdev, 0x1);
2465 virtio_notify_vector(vq->vdev, vq->vector);
2466 }
2467
2468 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2469 {
2470 WITH_RCU_READ_LOCK_GUARD() {
2471 if (!virtio_should_notify(vdev, vq)) {
2472 return;
2473 }
2474 }
2475
2476 trace_virtio_notify(vdev, vq);
2477 virtio_irq(vq);
2478 }
2479
2480 void virtio_notify_config(VirtIODevice *vdev)
2481 {
2482 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2483 return;
2484
2485 virtio_set_isr(vdev, 0x3);
2486 vdev->generation++;
2487 virtio_notify_vector(vdev, vdev->config_vector);
2488 }
2489
2490 static bool virtio_device_endian_needed(void *opaque)
2491 {
2492 VirtIODevice *vdev = opaque;
2493
2494 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2495 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2496 return vdev->device_endian != virtio_default_endian();
2497 }
2498 /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2499 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2500 }
2501
2502 static bool virtio_64bit_features_needed(void *opaque)
2503 {
2504 VirtIODevice *vdev = opaque;
2505
2506 return (vdev->host_features >> 32) != 0;
2507 }
2508
2509 static bool virtio_virtqueue_needed(void *opaque)
2510 {
2511 VirtIODevice *vdev = opaque;
2512
2513 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2514 }
2515
2516 static bool virtio_packed_virtqueue_needed(void *opaque)
2517 {
2518 VirtIODevice *vdev = opaque;
2519
2520 return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2521 }
2522
2523 static bool virtio_ringsize_needed(void *opaque)
2524 {
2525 VirtIODevice *vdev = opaque;
2526 int i;
2527
2528 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2529 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2530 return true;
2531 }
2532 }
2533 return false;
2534 }
2535
2536 static bool virtio_extra_state_needed(void *opaque)
2537 {
2538 VirtIODevice *vdev = opaque;
2539 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2540 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2541
2542 return k->has_extra_state &&
2543 k->has_extra_state(qbus->parent);
2544 }
2545
2546 static bool virtio_broken_needed(void *opaque)
2547 {
2548 VirtIODevice *vdev = opaque;
2549
2550 return vdev->broken;
2551 }
2552
2553 static bool virtio_started_needed(void *opaque)
2554 {
2555 VirtIODevice *vdev = opaque;
2556
2557 return vdev->started;
2558 }
2559
2560 static bool virtio_disabled_needed(void *opaque)
2561 {
2562 VirtIODevice *vdev = opaque;
2563
2564 return vdev->disabled;
2565 }
2566
2567 static const VMStateDescription vmstate_virtqueue = {
2568 .name = "virtqueue_state",
2569 .version_id = 1,
2570 .minimum_version_id = 1,
2571 .fields = (VMStateField[]) {
2572 VMSTATE_UINT64(vring.avail, struct VirtQueue),
2573 VMSTATE_UINT64(vring.used, struct VirtQueue),
2574 VMSTATE_END_OF_LIST()
2575 }
2576 };
2577
2578 static const VMStateDescription vmstate_packed_virtqueue = {
2579 .name = "packed_virtqueue_state",
2580 .version_id = 1,
2581 .minimum_version_id = 1,
2582 .fields = (VMStateField[]) {
2583 VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2584 VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2585 VMSTATE_UINT16(used_idx, struct VirtQueue),
2586 VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2587 VMSTATE_UINT32(inuse, struct VirtQueue),
2588 VMSTATE_END_OF_LIST()
2589 }
2590 };
2591
2592 static const VMStateDescription vmstate_virtio_virtqueues = {
2593 .name = "virtio/virtqueues",
2594 .version_id = 1,
2595 .minimum_version_id = 1,
2596 .needed = &virtio_virtqueue_needed,
2597 .fields = (VMStateField[]) {
2598 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2599 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2600 VMSTATE_END_OF_LIST()
2601 }
2602 };
2603
2604 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2605 .name = "virtio/packed_virtqueues",
2606 .version_id = 1,
2607 .minimum_version_id = 1,
2608 .needed = &virtio_packed_virtqueue_needed,
2609 .fields = (VMStateField[]) {
2610 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2611 VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2612 VMSTATE_END_OF_LIST()
2613 }
2614 };
2615
2616 static const VMStateDescription vmstate_ringsize = {
2617 .name = "ringsize_state",
2618 .version_id = 1,
2619 .minimum_version_id = 1,
2620 .fields = (VMStateField[]) {
2621 VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2622 VMSTATE_END_OF_LIST()
2623 }
2624 };
2625
2626 static const VMStateDescription vmstate_virtio_ringsize = {
2627 .name = "virtio/ringsize",
2628 .version_id = 1,
2629 .minimum_version_id = 1,
2630 .needed = &virtio_ringsize_needed,
2631 .fields = (VMStateField[]) {
2632 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2633 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2634 VMSTATE_END_OF_LIST()
2635 }
2636 };
2637
2638 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2639 const VMStateField *field)
2640 {
2641 VirtIODevice *vdev = pv;
2642 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2643 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2644
2645 if (!k->load_extra_state) {
2646 return -1;
2647 } else {
2648 return k->load_extra_state(qbus->parent, f);
2649 }
2650 }
2651
2652 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2653 const VMStateField *field, JSONWriter *vmdesc)
2654 {
2655 VirtIODevice *vdev = pv;
2656 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2657 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2658
2659 k->save_extra_state(qbus->parent, f);
2660 return 0;
2661 }
2662
2663 static const VMStateInfo vmstate_info_extra_state = {
2664 .name = "virtqueue_extra_state",
2665 .get = get_extra_state,
2666 .put = put_extra_state,
2667 };
2668
2669 static const VMStateDescription vmstate_virtio_extra_state = {
2670 .name = "virtio/extra_state",
2671 .version_id = 1,
2672 .minimum_version_id = 1,
2673 .needed = &virtio_extra_state_needed,
2674 .fields = (VMStateField[]) {
2675 {
2676 .name = "extra_state",
2677 .version_id = 0,
2678 .field_exists = NULL,
2679 .size = 0,
2680 .info = &vmstate_info_extra_state,
2681 .flags = VMS_SINGLE,
2682 .offset = 0,
2683 },
2684 VMSTATE_END_OF_LIST()
2685 }
2686 };
2687
2688 static const VMStateDescription vmstate_virtio_device_endian = {
2689 .name = "virtio/device_endian",
2690 .version_id = 1,
2691 .minimum_version_id = 1,
2692 .needed = &virtio_device_endian_needed,
2693 .fields = (VMStateField[]) {
2694 VMSTATE_UINT8(device_endian, VirtIODevice),
2695 VMSTATE_END_OF_LIST()
2696 }
2697 };
2698
2699 static const VMStateDescription vmstate_virtio_64bit_features = {
2700 .name = "virtio/64bit_features",
2701 .version_id = 1,
2702 .minimum_version_id = 1,
2703 .needed = &virtio_64bit_features_needed,
2704 .fields = (VMStateField[]) {
2705 VMSTATE_UINT64(guest_features, VirtIODevice),
2706 VMSTATE_END_OF_LIST()
2707 }
2708 };
2709
2710 static const VMStateDescription vmstate_virtio_broken = {
2711 .name = "virtio/broken",
2712 .version_id = 1,
2713 .minimum_version_id = 1,
2714 .needed = &virtio_broken_needed,
2715 .fields = (VMStateField[]) {
2716 VMSTATE_BOOL(broken, VirtIODevice),
2717 VMSTATE_END_OF_LIST()
2718 }
2719 };
2720
2721 static const VMStateDescription vmstate_virtio_started = {
2722 .name = "virtio/started",
2723 .version_id = 1,
2724 .minimum_version_id = 1,
2725 .needed = &virtio_started_needed,
2726 .fields = (VMStateField[]) {
2727 VMSTATE_BOOL(started, VirtIODevice),
2728 VMSTATE_END_OF_LIST()
2729 }
2730 };
2731
2732 static const VMStateDescription vmstate_virtio_disabled = {
2733 .name = "virtio/disabled",
2734 .version_id = 1,
2735 .minimum_version_id = 1,
2736 .needed = &virtio_disabled_needed,
2737 .fields = (VMStateField[]) {
2738 VMSTATE_BOOL(disabled, VirtIODevice),
2739 VMSTATE_END_OF_LIST()
2740 }
2741 };
2742
2743 static const VMStateDescription vmstate_virtio = {
2744 .name = "virtio",
2745 .version_id = 1,
2746 .minimum_version_id = 1,
2747 .fields = (VMStateField[]) {
2748 VMSTATE_END_OF_LIST()
2749 },
2750 .subsections = (const VMStateDescription*[]) {
2751 &vmstate_virtio_device_endian,
2752 &vmstate_virtio_64bit_features,
2753 &vmstate_virtio_virtqueues,
2754 &vmstate_virtio_ringsize,
2755 &vmstate_virtio_broken,
2756 &vmstate_virtio_extra_state,
2757 &vmstate_virtio_started,
2758 &vmstate_virtio_packed_virtqueues,
2759 &vmstate_virtio_disabled,
2760 NULL
2761 }
2762 };
2763
2764 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2765 {
2766 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2767 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2768 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2769 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2770 int i;
2771
2772 if (k->save_config) {
2773 k->save_config(qbus->parent, f);
2774 }
2775
2776 qemu_put_8s(f, &vdev->status);
2777 qemu_put_8s(f, &vdev->isr);
2778 qemu_put_be16s(f, &vdev->queue_sel);
2779 qemu_put_be32s(f, &guest_features_lo);
2780 qemu_put_be32(f, vdev->config_len);
2781 qemu_put_buffer(f, vdev->config, vdev->config_len);
2782
2783 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2784 if (vdev->vq[i].vring.num == 0)
2785 break;
2786 }
2787
2788 qemu_put_be32(f, i);
2789
2790 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2791 if (vdev->vq[i].vring.num == 0)
2792 break;
2793
2794 qemu_put_be32(f, vdev->vq[i].vring.num);
2795 if (k->has_variable_vring_alignment) {
2796 qemu_put_be32(f, vdev->vq[i].vring.align);
2797 }
2798 /*
2799 * Save desc now, the rest of the ring addresses are saved in
2800 * subsections for VIRTIO-1 devices.
2801 */
2802 qemu_put_be64(f, vdev->vq[i].vring.desc);
2803 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2804 if (k->save_queue) {
2805 k->save_queue(qbus->parent, i, f);
2806 }
2807 }
2808
2809 if (vdc->save != NULL) {
2810 vdc->save(vdev, f);
2811 }
2812
2813 if (vdc->vmsd) {
2814 int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2815 if (ret) {
2816 return ret;
2817 }
2818 }
2819
2820 /* Subsections */
2821 return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2822 }
2823
2824 /* A wrapper for use as a VMState .put function */
2825 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2826 const VMStateField *field, JSONWriter *vmdesc)
2827 {
2828 return virtio_save(VIRTIO_DEVICE(opaque), f);
2829 }
2830
2831 /* A wrapper for use as a VMState .get function */
2832 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2833 const VMStateField *field)
2834 {
2835 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2836 DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2837
2838 return virtio_load(vdev, f, dc->vmsd->version_id);
2839 }
2840
2841 const VMStateInfo virtio_vmstate_info = {
2842 .name = "virtio",
2843 .get = virtio_device_get,
2844 .put = virtio_device_put,
2845 };
2846
2847 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2848 {
2849 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2850 bool bad = (val & ~(vdev->host_features)) != 0;
2851
2852 val &= vdev->host_features;
2853 if (k->set_features) {
2854 k->set_features(vdev, val);
2855 }
2856 vdev->guest_features = val;
2857 return bad ? -1 : 0;
2858 }
2859
2860 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2861 {
2862 int ret;
2863 /*
2864 * The driver must not attempt to set features after feature negotiation
2865 * has finished.
2866 */
2867 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2868 return -EINVAL;
2869 }
2870
2871 if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
2872 qemu_log_mask(LOG_GUEST_ERROR,
2873 "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
2874 __func__, vdev->name);
2875 }
2876
2877 ret = virtio_set_features_nocheck(vdev, val);
2878 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2879 /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
2880 int i;
2881 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2882 if (vdev->vq[i].vring.num != 0) {
2883 virtio_init_region_cache(vdev, i);
2884 }
2885 }
2886 }
2887 if (!ret) {
2888 if (!virtio_device_started(vdev, vdev->status) &&
2889 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2890 vdev->start_on_kick = true;
2891 }
2892 }
2893 return ret;
2894 }
2895
2896 size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
2897 uint64_t host_features)
2898 {
2899 size_t config_size = params->min_size;
2900 const VirtIOFeature *feature_sizes = params->feature_sizes;
2901 size_t i;
2902
2903 for (i = 0; feature_sizes[i].flags != 0; i++) {
2904 if (host_features & feature_sizes[i].flags) {
2905 config_size = MAX(feature_sizes[i].end, config_size);
2906 }
2907 }
2908
2909 assert(config_size <= params->max_size);
2910 return config_size;
2911 }
2912
2913 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2914 {
2915 int i, ret;
2916 int32_t config_len;
2917 uint32_t num;
2918 uint32_t features;
2919 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2920 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2921 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2922
2923 /*
2924 * We poison the endianness to ensure it does not get used before
2925 * subsections have been loaded.
2926 */
2927 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
2928
2929 if (k->load_config) {
2930 ret = k->load_config(qbus->parent, f);
2931 if (ret)
2932 return ret;
2933 }
2934
2935 qemu_get_8s(f, &vdev->status);
2936 qemu_get_8s(f, &vdev->isr);
2937 qemu_get_be16s(f, &vdev->queue_sel);
2938 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
2939 return -1;
2940 }
2941 qemu_get_be32s(f, &features);
2942
2943 /*
2944 * Temporarily set guest_features low bits - needed by
2945 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
2946 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
2947 *
2948 * Note: devices should always test host features in future - don't create
2949 * new dependencies like this.
2950 */
2951 vdev->guest_features = features;
2952
2953 config_len = qemu_get_be32(f);
2954
2955 /*
2956 * There are cases where the incoming config can be bigger or smaller
2957 * than what we have; so load what we have space for, and skip
2958 * any excess that's in the stream.
2959 */
2960 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
2961
2962 while (config_len > vdev->config_len) {
2963 qemu_get_byte(f);
2964 config_len--;
2965 }
2966
2967 num = qemu_get_be32(f);
2968
2969 if (num > VIRTIO_QUEUE_MAX) {
2970 error_report("Invalid number of virtqueues: 0x%x", num);
2971 return -1;
2972 }
2973
2974 for (i = 0; i < num; i++) {
2975 vdev->vq[i].vring.num = qemu_get_be32(f);
2976 if (k->has_variable_vring_alignment) {
2977 vdev->vq[i].vring.align = qemu_get_be32(f);
2978 }
2979 vdev->vq[i].vring.desc = qemu_get_be64(f);
2980 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
2981 vdev->vq[i].signalled_used_valid = false;
2982 vdev->vq[i].notification = true;
2983
2984 if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
2985 error_report("VQ %d address 0x0 "
2986 "inconsistent with Host index 0x%x",
2987 i, vdev->vq[i].last_avail_idx);
2988 return -1;
2989 }
2990 if (k->load_queue) {
2991 ret = k->load_queue(qbus->parent, i, f);
2992 if (ret)
2993 return ret;
2994 }
2995 }
2996
2997 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2998
2999 if (vdc->load != NULL) {
3000 ret = vdc->load(vdev, f, version_id);
3001 if (ret) {
3002 return ret;
3003 }
3004 }
3005
3006 if (vdc->vmsd) {
3007 ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3008 if (ret) {
3009 return ret;
3010 }
3011 }
3012
3013 /* Subsections */
3014 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3015 if (ret) {
3016 return ret;
3017 }
3018
3019 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3020 vdev->device_endian = virtio_default_endian();
3021 }
3022
3023 if (virtio_64bit_features_needed(vdev)) {
3024 /*
3025 * Subsection load filled vdev->guest_features. Run them
3026 * through virtio_set_features to sanity-check them against
3027 * host_features.
3028 */
3029 uint64_t features64 = vdev->guest_features;
3030 if (virtio_set_features_nocheck(vdev, features64) < 0) {
3031 error_report("Features 0x%" PRIx64 " unsupported. "
3032 "Allowed features: 0x%" PRIx64,
3033 features64, vdev->host_features);
3034 return -1;
3035 }
3036 } else {
3037 if (virtio_set_features_nocheck(vdev, features) < 0) {
3038 error_report("Features 0x%x unsupported. "
3039 "Allowed features: 0x%" PRIx64,
3040 features, vdev->host_features);
3041 return -1;
3042 }
3043 }
3044
3045 if (!virtio_device_started(vdev, vdev->status) &&
3046 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3047 vdev->start_on_kick = true;
3048 }
3049
3050 RCU_READ_LOCK_GUARD();
3051 for (i = 0; i < num; i++) {
3052 if (vdev->vq[i].vring.desc) {
3053 uint16_t nheads;
3054
3055 /*
3056 * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3057 * only the region cache needs to be set up. Legacy devices need
3058 * to calculate used and avail ring addresses based on the desc
3059 * address.
3060 */
3061 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3062 virtio_init_region_cache(vdev, i);
3063 } else {
3064 virtio_queue_update_rings(vdev, i);
3065 }
3066
3067 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3068 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3069 vdev->vq[i].shadow_avail_wrap_counter =
3070 vdev->vq[i].last_avail_wrap_counter;
3071 continue;
3072 }
3073
3074 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3075 /* Check it isn't doing strange things with descriptor numbers. */
3076 if (nheads > vdev->vq[i].vring.num) {
3077 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3078 "inconsistent with Host index 0x%x: delta 0x%x",
3079 i, vdev->vq[i].vring.num,
3080 vring_avail_idx(&vdev->vq[i]),
3081 vdev->vq[i].last_avail_idx, nheads);
3082 vdev->vq[i].used_idx = 0;
3083 vdev->vq[i].shadow_avail_idx = 0;
3084 vdev->vq[i].inuse = 0;
3085 continue;
3086 }
3087 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3088 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3089
3090 /*
3091 * Some devices migrate VirtQueueElements that have been popped
3092 * from the avail ring but not yet returned to the used ring.
3093 * Since max ring size < UINT16_MAX it's safe to use modulo
3094 * UINT16_MAX + 1 subtraction.
3095 */
3096 vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3097 vdev->vq[i].used_idx);
3098 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3099 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3100 "used_idx 0x%x",
3101 i, vdev->vq[i].vring.num,
3102 vdev->vq[i].last_avail_idx,
3103 vdev->vq[i].used_idx);
3104 return -1;
3105 }
3106 }
3107 }
3108
3109 if (vdc->post_load) {
3110 ret = vdc->post_load(vdev);
3111 if (ret) {
3112 return ret;
3113 }
3114 }
3115
3116 return 0;
3117 }
3118
3119 void virtio_cleanup(VirtIODevice *vdev)
3120 {
3121 qemu_del_vm_change_state_handler(vdev->vmstate);
3122 }
3123
3124 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3125 {
3126 VirtIODevice *vdev = opaque;
3127 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3128 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3129 bool backend_run = running && virtio_device_started(vdev, vdev->status);
3130 vdev->vm_running = running;
3131
3132 if (backend_run) {
3133 virtio_set_status(vdev, vdev->status);
3134 }
3135
3136 if (k->vmstate_change) {
3137 k->vmstate_change(qbus->parent, backend_run);
3138 }
3139
3140 if (!backend_run) {
3141 virtio_set_status(vdev, vdev->status);
3142 }
3143 }
3144
3145 void virtio_instance_init_common(Object *proxy_obj, void *data,
3146 size_t vdev_size, const char *vdev_name)
3147 {
3148 DeviceState *vdev = data;
3149
3150 object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3151 vdev_size, vdev_name, &error_abort,
3152 NULL);
3153 qdev_alias_all_properties(vdev, proxy_obj);
3154 }
3155
3156 void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3157 {
3158 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3159 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3160 int i;
3161 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3162
3163 if (nvectors) {
3164 vdev->vector_queues =
3165 g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3166 }
3167
3168 vdev->start_on_kick = false;
3169 vdev->started = false;
3170 vdev->vhost_started = false;
3171 vdev->device_id = device_id;
3172 vdev->status = 0;
3173 qatomic_set(&vdev->isr, 0);
3174 vdev->queue_sel = 0;
3175 vdev->config_vector = VIRTIO_NO_VECTOR;
3176 vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3177 vdev->vm_running = runstate_is_running();
3178 vdev->broken = false;
3179 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3180 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3181 vdev->vq[i].vdev = vdev;
3182 vdev->vq[i].queue_index = i;
3183 vdev->vq[i].host_notifier_enabled = false;
3184 }
3185
3186 vdev->name = virtio_id_to_name(device_id);
3187 vdev->config_len = config_size;
3188 if (vdev->config_len) {
3189 vdev->config = g_malloc0(config_size);
3190 } else {
3191 vdev->config = NULL;
3192 }
3193 vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3194 virtio_vmstate_change, vdev);
3195 vdev->device_endian = virtio_default_endian();
3196 vdev->use_guest_notifier_mask = true;
3197 }
3198
3199 /*
3200 * Only devices that have already been around prior to defining the virtio
3201 * standard support legacy mode; this includes devices not specified in the
3202 * standard. All newer devices conform to the virtio standard only.
3203 */
3204 bool virtio_legacy_allowed(VirtIODevice *vdev)
3205 {
3206 switch (vdev->device_id) {
3207 case VIRTIO_ID_NET:
3208 case VIRTIO_ID_BLOCK:
3209 case VIRTIO_ID_CONSOLE:
3210 case VIRTIO_ID_RNG:
3211 case VIRTIO_ID_BALLOON:
3212 case VIRTIO_ID_RPMSG:
3213 case VIRTIO_ID_SCSI:
3214 case VIRTIO_ID_9P:
3215 case VIRTIO_ID_RPROC_SERIAL:
3216 case VIRTIO_ID_CAIF:
3217 return true;
3218 default:
3219 return false;
3220 }
3221 }
3222
3223 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3224 {
3225 return vdev->disable_legacy_check;
3226 }
3227
3228 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3229 {
3230 return vdev->vq[n].vring.desc;
3231 }
3232
3233 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3234 {
3235 return virtio_queue_get_desc_addr(vdev, n) != 0;
3236 }
3237
3238 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3239 {
3240 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3241 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3242
3243 if (k->queue_enabled) {
3244 return k->queue_enabled(qbus->parent, n);
3245 }
3246 return virtio_queue_enabled_legacy(vdev, n);
3247 }
3248
3249 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3250 {
3251 return vdev->vq[n].vring.avail;
3252 }
3253
3254 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3255 {
3256 return vdev->vq[n].vring.used;
3257 }
3258
3259 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3260 {
3261 return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3262 }
3263
3264 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3265 {
3266 int s;
3267
3268 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3269 return sizeof(struct VRingPackedDescEvent);
3270 }
3271
3272 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3273 return offsetof(VRingAvail, ring) +
3274 sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3275 }
3276
3277 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3278 {
3279 int s;
3280
3281 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3282 return sizeof(struct VRingPackedDescEvent);
3283 }
3284
3285 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3286 return offsetof(VRingUsed, ring) +
3287 sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3288 }
3289
3290 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3291 int n)
3292 {
3293 unsigned int avail, used;
3294
3295 avail = vdev->vq[n].last_avail_idx;
3296 avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3297
3298 used = vdev->vq[n].used_idx;
3299 used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3300
3301 return avail | used << 16;
3302 }
3303
3304 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3305 int n)
3306 {
3307 return vdev->vq[n].last_avail_idx;
3308 }
3309
3310 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3311 {
3312 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3313 return virtio_queue_packed_get_last_avail_idx(vdev, n);
3314 } else {
3315 return virtio_queue_split_get_last_avail_idx(vdev, n);
3316 }
3317 }
3318
3319 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3320 int n, unsigned int idx)
3321 {
3322 struct VirtQueue *vq = &vdev->vq[n];
3323
3324 vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3325 vq->last_avail_wrap_counter =
3326 vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3327 idx >>= 16;
3328 vq->used_idx = idx & 0x7ffff;
3329 vq->used_wrap_counter = !!(idx & 0x8000);
3330 }
3331
3332 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3333 int n, unsigned int idx)
3334 {
3335 vdev->vq[n].last_avail_idx = idx;
3336 vdev->vq[n].shadow_avail_idx = idx;
3337 }
3338
3339 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3340 unsigned int idx)
3341 {
3342 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3343 virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3344 } else {
3345 virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3346 }
3347 }
3348
3349 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3350 int n)
3351 {
3352 /* We don't have a reference like avail idx in shared memory */
3353 return;
3354 }
3355
3356 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3357 int n)
3358 {
3359 RCU_READ_LOCK_GUARD();
3360 if (vdev->vq[n].vring.desc) {
3361 vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3362 vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3363 }
3364 }
3365
3366 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3367 {
3368 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3369 virtio_queue_packed_restore_last_avail_idx(vdev, n);
3370 } else {
3371 virtio_queue_split_restore_last_avail_idx(vdev, n);
3372 }
3373 }
3374
3375 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3376 {
3377 /* used idx was updated through set_last_avail_idx() */
3378 return;
3379 }
3380
3381 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3382 {
3383 RCU_READ_LOCK_GUARD();
3384 if (vdev->vq[n].vring.desc) {
3385 vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3386 }
3387 }
3388
3389 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3390 {
3391 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3392 return virtio_queue_packed_update_used_idx(vdev, n);
3393 } else {
3394 return virtio_split_packed_update_used_idx(vdev, n);
3395 }
3396 }
3397
3398 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3399 {
3400 vdev->vq[n].signalled_used_valid = false;
3401 }
3402
3403 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3404 {
3405 return vdev->vq + n;
3406 }
3407
3408 uint16_t virtio_get_queue_index(VirtQueue *vq)
3409 {
3410 return vq->queue_index;
3411 }
3412
3413 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3414 {
3415 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3416 if (event_notifier_test_and_clear(n)) {
3417 virtio_irq(vq);
3418 }
3419 }
3420 static void virtio_config_guest_notifier_read(EventNotifier *n)
3421 {
3422 VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
3423
3424 if (event_notifier_test_and_clear(n)) {
3425 virtio_notify_config(vdev);
3426 }
3427 }
3428 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3429 bool with_irqfd)
3430 {
3431 if (assign && !with_irqfd) {
3432 event_notifier_set_handler(&vq->guest_notifier,
3433 virtio_queue_guest_notifier_read);
3434 } else {
3435 event_notifier_set_handler(&vq->guest_notifier, NULL);
3436 }
3437 if (!assign) {
3438 /* Test and clear notifier before closing it,
3439 * in case poll callback didn't have time to run. */
3440 virtio_queue_guest_notifier_read(&vq->guest_notifier);
3441 }
3442 }
3443
3444 void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
3445 bool assign, bool with_irqfd)
3446 {
3447 EventNotifier *n;
3448 n = &vdev->config_notifier;
3449 if (assign && !with_irqfd) {
3450 event_notifier_set_handler(n, virtio_config_guest_notifier_read);
3451 } else {
3452 event_notifier_set_handler(n, NULL);
3453 }
3454 if (!assign) {
3455 /* Test and clear notifier before closing it,*/
3456 /* in case poll callback didn't have time to run. */
3457 virtio_config_guest_notifier_read(n);
3458 }
3459 }
3460
3461 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3462 {
3463 return &vq->guest_notifier;
3464 }
3465
3466 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3467 {
3468 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3469
3470 virtio_queue_set_notification(vq, 0);
3471 }
3472
3473 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3474 {
3475 EventNotifier *n = opaque;
3476 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3477
3478 return vq->vring.desc && !virtio_queue_empty(vq);
3479 }
3480
3481 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3482 {
3483 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3484
3485 virtio_queue_notify_vq(vq);
3486 }
3487
3488 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3489 {
3490 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3491
3492 /* Caller polls once more after this to catch requests that race with us */
3493 virtio_queue_set_notification(vq, 1);
3494 }
3495
3496 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
3497 {
3498 aio_set_event_notifier(ctx, &vq->host_notifier, true,
3499 virtio_queue_host_notifier_read,
3500 virtio_queue_host_notifier_aio_poll,
3501 virtio_queue_host_notifier_aio_poll_ready);
3502 aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3503 virtio_queue_host_notifier_aio_poll_begin,
3504 virtio_queue_host_notifier_aio_poll_end);
3505 }
3506
3507 /*
3508 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3509 * this for rx virtqueues and similar cases where the virtqueue handler
3510 * function does not pop all elements. When the virtqueue is left non-empty
3511 * polling consumes CPU cycles and should not be used.
3512 */
3513 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
3514 {
3515 aio_set_event_notifier(ctx, &vq->host_notifier, true,
3516 virtio_queue_host_notifier_read,
3517 NULL, NULL);
3518 }
3519
3520 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
3521 {
3522 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
3523 /* Test and clear notifier before after disabling event,
3524 * in case poll callback didn't have time to run. */
3525 virtio_queue_host_notifier_read(&vq->host_notifier);
3526 }
3527
3528 void virtio_queue_host_notifier_read(EventNotifier *n)
3529 {
3530 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3531 if (event_notifier_test_and_clear(n)) {
3532 virtio_queue_notify_vq(vq);
3533 }
3534 }
3535
3536 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3537 {
3538 return &vq->host_notifier;
3539 }
3540
3541 EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
3542 {
3543 return &vdev->config_notifier;
3544 }
3545
3546 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3547 {
3548 vq->host_notifier_enabled = enabled;
3549 }
3550
3551 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3552 MemoryRegion *mr, bool assign)
3553 {
3554 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3555 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3556
3557 if (k->set_host_notifier_mr) {
3558 return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3559 }
3560
3561 return -1;
3562 }
3563
3564 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3565 {
3566 g_free(vdev->bus_name);
3567 vdev->bus_name = g_strdup(bus_name);
3568 }
3569
3570 void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3571 {
3572 va_list ap;
3573
3574 va_start(ap, fmt);
3575 error_vreport(fmt, ap);
3576 va_end(ap);
3577
3578 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3579 vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3580 virtio_notify_config(vdev);
3581 }
3582
3583 vdev->broken = true;
3584 }
3585
3586 static void virtio_memory_listener_commit(MemoryListener *listener)
3587 {
3588 VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3589 int i;
3590
3591 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3592 if (vdev->vq[i].vring.num == 0) {
3593 break;
3594 }
3595 virtio_init_region_cache(vdev, i);
3596 }
3597 }
3598
3599 static void virtio_device_realize(DeviceState *dev, Error **errp)
3600 {
3601 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3602 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3603 Error *err = NULL;
3604
3605 /* Devices should either use vmsd or the load/save methods */
3606 assert(!vdc->vmsd || !vdc->load);
3607
3608 if (vdc->realize != NULL) {
3609 vdc->realize(dev, &err);
3610 if (err != NULL) {
3611 error_propagate(errp, err);
3612 return;
3613 }
3614 }
3615
3616 virtio_bus_device_plugged(vdev, &err);
3617 if (err != NULL) {
3618 error_propagate(errp, err);
3619 vdc->unrealize(dev);
3620 return;
3621 }
3622
3623 vdev->listener.commit = virtio_memory_listener_commit;
3624 vdev->listener.name = "virtio";
3625 memory_listener_register(&vdev->listener, vdev->dma_as);
3626 QTAILQ_INSERT_TAIL(&virtio_list, vdev, next);
3627 }
3628
3629 static void virtio_device_unrealize(DeviceState *dev)
3630 {
3631 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3632 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3633
3634 memory_listener_unregister(&vdev->listener);
3635 virtio_bus_device_unplugged(vdev);
3636
3637 if (vdc->unrealize != NULL) {
3638 vdc->unrealize(dev);
3639 }
3640
3641 QTAILQ_REMOVE(&virtio_list, vdev, next);
3642 g_free(vdev->bus_name);
3643 vdev->bus_name = NULL;
3644 }
3645
3646 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3647 {
3648 int i;
3649 if (!vdev->vq) {
3650 return;
3651 }
3652
3653 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3654 if (vdev->vq[i].vring.num == 0) {
3655 break;
3656 }
3657 virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3658 }
3659 g_free(vdev->vq);
3660 }
3661
3662 static void virtio_device_instance_finalize(Object *obj)
3663 {
3664 VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3665
3666 virtio_device_free_virtqueues(vdev);
3667
3668 g_free(vdev->config);
3669 g_free(vdev->vector_queues);
3670 }
3671
3672 static Property virtio_properties[] = {
3673 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3674 DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3675 DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3676 DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3677 disable_legacy_check, false),
3678 DEFINE_PROP_END_OF_LIST(),
3679 };
3680
3681 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3682 {
3683 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3684 int i, n, r, err;
3685
3686 /*
3687 * Batch all the host notifiers in a single transaction to avoid
3688 * quadratic time complexity in address_space_update_ioeventfds().
3689 */
3690 memory_region_transaction_begin();
3691 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3692 VirtQueue *vq = &vdev->vq[n];
3693 if (!virtio_queue_get_num(vdev, n)) {
3694 continue;
3695 }
3696 r = virtio_bus_set_host_notifier(qbus, n, true);
3697 if (r < 0) {
3698 err = r;
3699 goto assign_error;
3700 }
3701 event_notifier_set_handler(&vq->host_notifier,
3702 virtio_queue_host_notifier_read);
3703 }
3704
3705 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3706 /* Kick right away to begin processing requests already in vring */
3707 VirtQueue *vq = &vdev->vq[n];
3708 if (!vq->vring.num) {
3709 continue;
3710 }
3711 event_notifier_set(&vq->host_notifier);
3712 }
3713 memory_region_transaction_commit();
3714 return 0;
3715
3716 assign_error:
3717 i = n; /* save n for a second iteration after transaction is committed. */
3718 while (--n >= 0) {
3719 VirtQueue *vq = &vdev->vq[n];
3720 if (!virtio_queue_get_num(vdev, n)) {
3721 continue;
3722 }
3723
3724 event_notifier_set_handler(&vq->host_notifier, NULL);
3725 r = virtio_bus_set_host_notifier(qbus, n, false);
3726 assert(r >= 0);
3727 }
3728 /*
3729 * The transaction expects the ioeventfds to be open when it
3730 * commits. Do it now, before the cleanup loop.
3731 */
3732 memory_region_transaction_commit();
3733
3734 while (--i >= 0) {
3735 if (!virtio_queue_get_num(vdev, i)) {
3736 continue;
3737 }
3738 virtio_bus_cleanup_host_notifier(qbus, i);
3739 }
3740 return err;
3741 }
3742
3743 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3744 {
3745 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3746 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3747
3748 return virtio_bus_start_ioeventfd(vbus);
3749 }
3750
3751 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3752 {
3753 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3754 int n, r;
3755
3756 /*
3757 * Batch all the host notifiers in a single transaction to avoid
3758 * quadratic time complexity in address_space_update_ioeventfds().
3759 */
3760 memory_region_transaction_begin();
3761 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3762 VirtQueue *vq = &vdev->vq[n];
3763
3764 if (!virtio_queue_get_num(vdev, n)) {
3765 continue;
3766 }
3767 event_notifier_set_handler(&vq->host_notifier, NULL);
3768 r = virtio_bus_set_host_notifier(qbus, n, false);
3769 assert(r >= 0);
3770 }
3771 /*
3772 * The transaction expects the ioeventfds to be open when it
3773 * commits. Do it now, before the cleanup loop.
3774 */
3775 memory_region_transaction_commit();
3776
3777 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3778 if (!virtio_queue_get_num(vdev, n)) {
3779 continue;
3780 }
3781 virtio_bus_cleanup_host_notifier(qbus, n);
3782 }
3783 }
3784
3785 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3786 {
3787 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3788 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3789
3790 return virtio_bus_grab_ioeventfd(vbus);
3791 }
3792
3793 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3794 {
3795 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3796 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3797
3798 virtio_bus_release_ioeventfd(vbus);
3799 }
3800
3801 static void virtio_device_class_init(ObjectClass *klass, void *data)
3802 {
3803 /* Set the default value here. */
3804 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3805 DeviceClass *dc = DEVICE_CLASS(klass);
3806
3807 dc->realize = virtio_device_realize;
3808 dc->unrealize = virtio_device_unrealize;
3809 dc->bus_type = TYPE_VIRTIO_BUS;
3810 device_class_set_props(dc, virtio_properties);
3811 vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3812 vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3813
3814 vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3815
3816 QTAILQ_INIT(&virtio_list);
3817 }
3818
3819 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3820 {
3821 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3822 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3823
3824 return virtio_bus_ioeventfd_enabled(vbus);
3825 }
3826
3827 VirtioInfoList *qmp_x_query_virtio(Error **errp)
3828 {
3829 VirtioInfoList *list = NULL;
3830 VirtioInfoList *node;
3831 VirtIODevice *vdev;
3832
3833 QTAILQ_FOREACH(vdev, &virtio_list, next) {
3834 DeviceState *dev = DEVICE(vdev);
3835 Error *err = NULL;
3836 QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err);
3837
3838 if (err == NULL) {
3839 GString *is_realized = qobject_to_json_pretty(obj, true);
3840 /* virtio device is NOT realized, remove it from list */
3841 if (!strncmp(is_realized->str, "false", 4)) {
3842 QTAILQ_REMOVE(&virtio_list, vdev, next);
3843 } else {
3844 node = g_new0(VirtioInfoList, 1);
3845 node->value = g_new(VirtioInfo, 1);
3846 node->value->path = g_strdup(dev->canonical_path);
3847 node->value->name = g_strdup(vdev->name);
3848 QAPI_LIST_PREPEND(list, node->value);
3849 }
3850 g_string_free(is_realized, true);
3851 }
3852 qobject_unref(obj);
3853 }
3854
3855 return list;
3856 }
3857
3858 static VirtIODevice *qmp_find_virtio_device(const char *path)
3859 {
3860 VirtIODevice *vdev;
3861
3862 QTAILQ_FOREACH(vdev, &virtio_list, next) {
3863 DeviceState *dev = DEVICE(vdev);
3864
3865 if (strcmp(dev->canonical_path, path) != 0) {
3866 continue;
3867 }
3868
3869 Error *err = NULL;
3870 QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err);
3871 if (err == NULL) {
3872 GString *is_realized = qobject_to_json_pretty(obj, true);
3873 /* virtio device is NOT realized, remove it from list */
3874 if (!strncmp(is_realized->str, "false", 4)) {
3875 g_string_free(is_realized, true);
3876 qobject_unref(obj);
3877 QTAILQ_REMOVE(&virtio_list, vdev, next);
3878 return NULL;
3879 }
3880 g_string_free(is_realized, true);
3881 } else {
3882 /* virtio device doesn't exist in QOM tree */
3883 QTAILQ_REMOVE(&virtio_list, vdev, next);
3884 qobject_unref(obj);
3885 return NULL;
3886 }
3887 /* device exists in QOM tree & is realized */
3888 qobject_unref(obj);
3889 return vdev;
3890 }
3891 return NULL;
3892 }
3893
3894 VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp)
3895 {
3896 VirtIODevice *vdev;
3897 VirtioStatus *status;
3898
3899 vdev = qmp_find_virtio_device(path);
3900 if (vdev == NULL) {
3901 error_setg(errp, "Path %s is not a VirtIODevice", path);
3902 return NULL;
3903 }
3904
3905 status = g_new0(VirtioStatus, 1);
3906 status->name = g_strdup(vdev->name);
3907 status->device_id = vdev->device_id;
3908 status->vhost_started = vdev->vhost_started;
3909 status->guest_features = qmp_decode_features(vdev->device_id,
3910 vdev->guest_features);
3911 status->host_features = qmp_decode_features(vdev->device_id,
3912 vdev->host_features);
3913 status->backend_features = qmp_decode_features(vdev->device_id,
3914 vdev->backend_features);
3915
3916 switch (vdev->device_endian) {
3917 case VIRTIO_DEVICE_ENDIAN_LITTLE:
3918 status->device_endian = g_strdup("little");
3919 break;
3920 case VIRTIO_DEVICE_ENDIAN_BIG:
3921 status->device_endian = g_strdup("big");
3922 break;
3923 default:
3924 status->device_endian = g_strdup("unknown");
3925 break;
3926 }
3927
3928 status->num_vqs = virtio_get_num_queues(vdev);
3929 status->status = qmp_decode_status(vdev->status);
3930 status->isr = vdev->isr;
3931 status->queue_sel = vdev->queue_sel;
3932 status->vm_running = vdev->vm_running;
3933 status->broken = vdev->broken;
3934 status->disabled = vdev->disabled;
3935 status->use_started = vdev->use_started;
3936 status->started = vdev->started;
3937 status->start_on_kick = vdev->start_on_kick;
3938 status->disable_legacy_check = vdev->disable_legacy_check;
3939 status->bus_name = g_strdup(vdev->bus_name);
3940 status->use_guest_notifier_mask = vdev->use_guest_notifier_mask;
3941
3942 if (vdev->vhost_started) {
3943 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3944 struct vhost_dev *hdev = vdc->get_vhost(vdev);
3945
3946 status->vhost_dev = g_new0(VhostStatus, 1);
3947 status->vhost_dev->n_mem_sections = hdev->n_mem_sections;
3948 status->vhost_dev->n_tmp_sections = hdev->n_tmp_sections;
3949 status->vhost_dev->nvqs = hdev->nvqs;
3950 status->vhost_dev->vq_index = hdev->vq_index;
3951 status->vhost_dev->features =
3952 qmp_decode_features(vdev->device_id, hdev->features);
3953 status->vhost_dev->acked_features =
3954 qmp_decode_features(vdev->device_id, hdev->acked_features);
3955 status->vhost_dev->backend_features =
3956 qmp_decode_features(vdev->device_id, hdev->backend_features);
3957 status->vhost_dev->protocol_features =
3958 qmp_decode_protocols(hdev->protocol_features);
3959 status->vhost_dev->max_queues = hdev->max_queues;
3960 status->vhost_dev->backend_cap = hdev->backend_cap;
3961 status->vhost_dev->log_enabled = hdev->log_enabled;
3962 status->vhost_dev->log_size = hdev->log_size;
3963 }
3964
3965 return status;
3966 }
3967
3968 VirtVhostQueueStatus *qmp_x_query_virtio_vhost_queue_status(const char *path,
3969 uint16_t queue,
3970 Error **errp)
3971 {
3972 VirtIODevice *vdev;
3973 VirtVhostQueueStatus *status;
3974
3975 vdev = qmp_find_virtio_device(path);
3976 if (vdev == NULL) {
3977 error_setg(errp, "Path %s is not a VirtIODevice", path);
3978 return NULL;
3979 }
3980
3981 if (!vdev->vhost_started) {
3982 error_setg(errp, "Error: vhost device has not started yet");
3983 return NULL;
3984 }
3985
3986 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3987 struct vhost_dev *hdev = vdc->get_vhost(vdev);
3988
3989 if (queue < hdev->vq_index || queue >= hdev->vq_index + hdev->nvqs) {
3990 error_setg(errp, "Invalid vhost virtqueue number %d", queue);
3991 return NULL;
3992 }
3993
3994 status = g_new0(VirtVhostQueueStatus, 1);
3995 status->name = g_strdup(vdev->name);
3996 status->kick = hdev->vqs[queue].kick;
3997 status->call = hdev->vqs[queue].call;
3998 status->desc = (uintptr_t)hdev->vqs[queue].desc;
3999 status->avail = (uintptr_t)hdev->vqs[queue].avail;
4000 status->used = (uintptr_t)hdev->vqs[queue].used;
4001 status->num = hdev->vqs[queue].num;
4002 status->desc_phys = hdev->vqs[queue].desc_phys;
4003 status->desc_size = hdev->vqs[queue].desc_size;
4004 status->avail_phys = hdev->vqs[queue].avail_phys;
4005 status->avail_size = hdev->vqs[queue].avail_size;
4006 status->used_phys = hdev->vqs[queue].used_phys;
4007 status->used_size = hdev->vqs[queue].used_size;
4008
4009 return status;
4010 }
4011
4012 VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
4013 uint16_t queue,
4014 Error **errp)
4015 {
4016 VirtIODevice *vdev;
4017 VirtQueueStatus *status;
4018
4019 vdev = qmp_find_virtio_device(path);
4020 if (vdev == NULL) {
4021 error_setg(errp, "Path %s is not a VirtIODevice", path);
4022 return NULL;
4023 }
4024
4025 if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4026 error_setg(errp, "Invalid virtqueue number %d", queue);
4027 return NULL;
4028 }
4029
4030 status = g_new0(VirtQueueStatus, 1);
4031 status->name = g_strdup(vdev->name);
4032 status->queue_index = vdev->vq[queue].queue_index;
4033 status->inuse = vdev->vq[queue].inuse;
4034 status->vring_num = vdev->vq[queue].vring.num;
4035 status->vring_num_default = vdev->vq[queue].vring.num_default;
4036 status->vring_align = vdev->vq[queue].vring.align;
4037 status->vring_desc = vdev->vq[queue].vring.desc;
4038 status->vring_avail = vdev->vq[queue].vring.avail;
4039 status->vring_used = vdev->vq[queue].vring.used;
4040 status->used_idx = vdev->vq[queue].used_idx;
4041 status->signalled_used = vdev->vq[queue].signalled_used;
4042 status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
4043
4044 if (vdev->vhost_started) {
4045 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4046 struct vhost_dev *hdev = vdc->get_vhost(vdev);
4047
4048 /* check if vq index exists for vhost as well */
4049 if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
4050 status->has_last_avail_idx = true;
4051
4052 int vhost_vq_index =
4053 hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
4054 struct vhost_vring_state state = {
4055 .index = vhost_vq_index,
4056 };
4057
4058 status->last_avail_idx =
4059 hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
4060 }
4061 } else {
4062 status->has_shadow_avail_idx = true;
4063 status->has_last_avail_idx = true;
4064 status->last_avail_idx = vdev->vq[queue].last_avail_idx;
4065 status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
4066 }
4067
4068 return status;
4069 }
4070
4071 static strList *qmp_decode_vring_desc_flags(uint16_t flags)
4072 {
4073 strList *list = NULL;
4074 strList *node;
4075 int i;
4076
4077 struct {
4078 uint16_t flag;
4079 const char *value;
4080 } map[] = {
4081 { VRING_DESC_F_NEXT, "next" },
4082 { VRING_DESC_F_WRITE, "write" },
4083 { VRING_DESC_F_INDIRECT, "indirect" },
4084 { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
4085 { 1 << VRING_PACKED_DESC_F_USED, "used" },
4086 { 0, "" }
4087 };
4088
4089 for (i = 0; map[i].flag; i++) {
4090 if ((map[i].flag & flags) == 0) {
4091 continue;
4092 }
4093 node = g_malloc0(sizeof(strList));
4094 node->value = g_strdup(map[i].value);
4095 node->next = list;
4096 list = node;
4097 }
4098
4099 return list;
4100 }
4101
4102 VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
4103 uint16_t queue,
4104 bool has_index,
4105 uint16_t index,
4106 Error **errp)
4107 {
4108 VirtIODevice *vdev;
4109 VirtQueue *vq;
4110 VirtioQueueElement *element = NULL;
4111
4112 vdev = qmp_find_virtio_device(path);
4113 if (vdev == NULL) {
4114 error_setg(errp, "Path %s is not a VirtIO device", path);
4115 return NULL;
4116 }
4117
4118 if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4119 error_setg(errp, "Invalid virtqueue number %d", queue);
4120 return NULL;
4121 }
4122 vq = &vdev->vq[queue];
4123
4124 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
4125 error_setg(errp, "Packed ring not supported");
4126 return NULL;
4127 } else {
4128 unsigned int head, i, max;
4129 VRingMemoryRegionCaches *caches;
4130 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
4131 MemoryRegionCache *desc_cache;
4132 VRingDesc desc;
4133 VirtioRingDescList *list = NULL;
4134 VirtioRingDescList *node;
4135 int rc; int ndescs;
4136
4137 RCU_READ_LOCK_GUARD();
4138
4139 max = vq->vring.num;
4140
4141 if (!has_index) {
4142 head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
4143 } else {
4144 head = vring_avail_ring(vq, index % vq->vring.num);
4145 }
4146 i = head;
4147
4148 caches = vring_get_region_caches(vq);
4149 if (!caches) {
4150 error_setg(errp, "Region caches not initialized");
4151 return NULL;
4152 }
4153 if (caches->desc.len < max * sizeof(VRingDesc)) {
4154 error_setg(errp, "Cannot map descriptor ring");
4155 return NULL;
4156 }
4157
4158 desc_cache = &caches->desc;
4159 vring_split_desc_read(vdev, &desc, desc_cache, i);
4160 if (desc.flags & VRING_DESC_F_INDIRECT) {
4161 int64_t len;
4162 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
4163 desc.addr, desc.len, false);
4164 desc_cache = &indirect_desc_cache;
4165 if (len < desc.len) {
4166 error_setg(errp, "Cannot map indirect buffer");
4167 goto done;
4168 }
4169
4170 max = desc.len / sizeof(VRingDesc);
4171 i = 0;
4172 vring_split_desc_read(vdev, &desc, desc_cache, i);
4173 }
4174
4175 element = g_new0(VirtioQueueElement, 1);
4176 element->avail = g_new0(VirtioRingAvail, 1);
4177 element->used = g_new0(VirtioRingUsed, 1);
4178 element->name = g_strdup(vdev->name);
4179 element->index = head;
4180 element->avail->flags = vring_avail_flags(vq);
4181 element->avail->idx = vring_avail_idx(vq);
4182 element->avail->ring = head;
4183 element->used->flags = vring_used_flags(vq);
4184 element->used->idx = vring_used_idx(vq);
4185 ndescs = 0;
4186
4187 do {
4188 /* A buggy driver may produce an infinite loop */
4189 if (ndescs >= max) {
4190 break;
4191 }
4192 node = g_new0(VirtioRingDescList, 1);
4193 node->value = g_new0(VirtioRingDesc, 1);
4194 node->value->addr = desc.addr;
4195 node->value->len = desc.len;
4196 node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4197 node->next = list;
4198 list = node;
4199
4200 ndescs++;
4201 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache,
4202 max, &i);
4203 } while (rc == VIRTQUEUE_READ_DESC_MORE);
4204 element->descs = list;
4205 done:
4206 address_space_cache_destroy(&indirect_desc_cache);
4207 }
4208
4209 return element;
4210 }
4211
4212 static const TypeInfo virtio_device_info = {
4213 .name = TYPE_VIRTIO_DEVICE,
4214 .parent = TYPE_DEVICE,
4215 .instance_size = sizeof(VirtIODevice),
4216 .class_init = virtio_device_class_init,
4217 .instance_finalize = virtio_device_instance_finalize,
4218 .abstract = true,
4219 .class_size = sizeof(VirtioDeviceClass),
4220 };
4221
4222 static void virtio_register_types(void)
4223 {
4224 type_register_static(&virtio_device_info);
4225 }
4226
4227 type_init(virtio_register_types)