vhost: basic in order support

author Jason Wang <jasowang@redhat.com>

Mon, 14 Jul 2025 08:47:54 +0000 (16:47 +0800)

committer Michael S. Tsirkin <mst@redhat.com>

Fri, 1 Aug 2025 13:11:09 +0000 (09:11 -0400)
author Jason Wang <jasowang@redhat.com>
Mon, 14 Jul 2025 08:47:54 +0000 (16:47 +0800)
committer Michael S. Tsirkin <mst@redhat.com>
Fri, 1 Aug 2025 13:11:09 +0000 (09:11 -0400)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c

index 7cbfc7d718b3fdc09d53a1846ea5b5c701a6a41b..4f9c67f17b49c941abc02a3d4ae4caa2e49ae30b 100644 (file)
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -374,7 +374,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
         while (j) {
                 add = min(UIO_MAXIOV - nvq->done_idx, j);
                 vhost_add_used_and_signal_n(vq->dev, vq,
-                                           &vq->heads[nvq->done_idx], add);
+                                           &vq->heads[nvq->done_idx],
+                                           NULL, add);
                 nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
                 j -= add;
         }
@@ -457,7 +458,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
         if (!nvq->done_idx)
                 return;
  
-       vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
+       vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL,
+                                   nvq->done_idx);
         nvq->done_idx = 0;
  }
  
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c

index b38e39242fb9eb45a22169e095af2dbe1a360518..a4873d116df1425d2ee0117d9513893791fd10c2 100644 (file)
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -372,6 +372,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
         vq->avail = NULL;
         vq->used = NULL;
         vq->last_avail_idx = 0;
+       vq->next_avail_head = 0;
         vq->avail_idx = 0;
         vq->last_used_idx = 0;
         vq->signalled_used = 0;
@@ -501,6 +502,8 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
         vq->log = NULL;
         kfree(vq->heads);
         vq->heads = NULL;
+       kfree(vq->nheads);
+       vq->nheads = NULL;
  }
  
  /* Helper to allocate iovec buffers for all vqs. */
@@ -518,7 +521,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
                                         GFP_KERNEL);
                 vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
                                           GFP_KERNEL);
-               if (!vq->indirect || !vq->log || !vq->heads)
+               vq->nheads = kmalloc_array(dev->iov_limit, sizeof(*vq->nheads),
+                                          GFP_KERNEL);
+               if (!vq->indirect || !vq->log || !vq->heads || !vq->nheads)
                         goto err_nomem;
         }
         return 0;
@@ -2159,14 +2164,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
                         break;
                 }
                 if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
-                       vq->last_avail_idx = s.num & 0xffff;
+                       vq->next_avail_head = vq->last_avail_idx =
+                                             s.num & 0xffff;
                         vq->last_used_idx = (s.num >> 16) & 0xffff;
                 } else {
                         if (s.num > 0xffff) {
                                 r = -EINVAL;
                                 break;
                         }
-                       vq->last_avail_idx = s.num;
+                       vq->next_avail_head = vq->last_avail_idx = s.num;
                 }
                 /* Forget the cached index value. */
                 vq->avail_idx = vq->last_avail_idx;
@@ -2798,11 +2804,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
                       unsigned int *out_num, unsigned int *in_num,
                       struct vhost_log *log, unsigned int *log_num)
  {
+       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
         struct vring_desc desc;
         unsigned int i, head, found = 0;
         u16 last_avail_idx = vq->last_avail_idx;
         __virtio16 ring_head;
-       int ret, access;
+       int ret, access, c = 0;
  
         if (vq->avail_idx == vq->last_avail_idx) {
                 ret = vhost_get_avail_idx(vq);
@@ -2813,17 +2820,21 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
                         return vq->num;
         }
  
-       /* Grab the next descriptor number they're advertising, and increment
-        * the index we've seen. */
-       if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
-               vq_err(vq, "Failed to read head: idx %d address %p\n",
-                      last_avail_idx,
-                      &vq->avail->ring[last_avail_idx % vq->num]);
-               return -EFAULT;
+       if (in_order)
+               head = vq->next_avail_head & (vq->num - 1);
+       else {
+               /* Grab the next descriptor number they're
+                * advertising, and increment the index we've seen. */
+               if (unlikely(vhost_get_avail_head(vq, &ring_head,
+                                                 last_avail_idx))) {
+                       vq_err(vq, "Failed to read head: idx %d address %p\n",
+                               last_avail_idx,
+                               &vq->avail->ring[last_avail_idx % vq->num]);
+                       return -EFAULT;
+               }
+               head = vhost16_to_cpu(vq, ring_head);
         }
  
-       head = vhost16_to_cpu(vq, ring_head);
-
         /* If their number is silly, that's an error. */
         if (unlikely(head >= vq->num)) {
                 vq_err(vq, "Guest says index %u > %u is available",
@@ -2866,6 +2877,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
                                                 "in indirect descriptor at idx %d\n", i);
                                 return ret;
                         }
+                       ++c;
                         continue;
                 }
  
@@ -2901,10 +2913,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
                         }
                         *out_num += ret;
                 }
+               ++c;
         } while ((i = next_desc(vq, &desc)) != -1);
  
         /* On success, increment avail index. */
         vq->last_avail_idx++;
+       vq->next_avail_head += c;
  
         /* Assume notifications from guest are disabled at this point,
          * if they aren't we would need to update avail_event index. */
@@ -2928,8 +2942,9 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
                 cpu_to_vhost32(vq, head),
                 cpu_to_vhost32(vq, len)
         };
+       u16 nheads = 1;
  
-       return vhost_add_used_n(vq, &heads, 1);
+       return vhost_add_used_n(vq, &heads, &nheads, 1);
  }
  EXPORT_SYMBOL_GPL(vhost_add_used);
  
@@ -2965,10 +2980,9 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
         return 0;
  }
  
-/* After we've used one of their buffers, we tell them about it.  We'll then
- * want to notify the guest, using eventfd. */
-int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
-                    unsigned count)
+static int vhost_add_used_n_ooo(struct vhost_virtqueue *vq,
+                               struct vring_used_elem *heads,
+                               unsigned count)
  {
         int start, n, r;
  
@@ -2981,7 +2995,69 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
                 heads += n;
                 count -= n;
         }
-       r = __vhost_add_used_n(vq, heads, count);
+       return __vhost_add_used_n(vq, heads, count);
+}
+
+static int vhost_add_used_n_in_order(struct vhost_virtqueue *vq,
+                                    struct vring_used_elem *heads,
+                                    const u16 *nheads,
+                                    unsigned count)
+{
+       vring_used_elem_t __user *used;
+       u16 old, new = vq->last_used_idx;
+       int start, i;
+
+       if (!nheads)
+               return -EINVAL;
+
+       start = vq->last_used_idx & (vq->num - 1);
+       used = vq->used->ring + start;
+
+       for (i = 0; i < count; i++) {
+               if (vhost_put_used(vq, &heads[i], start, 1)) {
+                       vq_err(vq, "Failed to write used");
+                       return -EFAULT;
+               }
+               start += nheads[i];
+               new += nheads[i];
+               if (start >= vq->num)
+                       start -= vq->num;
+       }
+
+       if (unlikely(vq->log_used)) {
+               /* Make sure data is seen before log. */
+               smp_wmb();
+               /* Log used ring entry write. */
+               log_used(vq, ((void __user *)used - (void __user *)vq->used),
+                        (vq->num - start) * sizeof *used);
+               if (start + count > vq->num)
+                       log_used(vq, 0,
+                                (start + count - vq->num) * sizeof *used);
+       }
+
+       old = vq->last_used_idx;
+       vq->last_used_idx = new;
+       /* If the driver never bothers to signal in a very long while,
+        * used index might wrap around. If that happens, invalidate
+        * signalled_used index we stored. TODO: make sure driver
+        * signals at least once in 2^16 and remove this. */
+       if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
+               vq->signalled_used_valid = false;
+       return 0;
+}
+
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
+                    u16 *nheads, unsigned count)
+{
+       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+       int r;
+
+       if (!in_order || !nheads)
+               r = vhost_add_used_n_ooo(vq, heads, count);
+       else
+               r = vhost_add_used_n_in_order(vq, heads, nheads, count);
  
         if (r < 0)
                 return r;
@@ -3064,9 +3140,11 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
  /* multi-buffer version of vhost_add_used_and_signal */
  void vhost_add_used_and_signal_n(struct vhost_dev *dev,
                                  struct vhost_virtqueue *vq,
-                                struct vring_used_elem *heads, unsigned count)
+                                struct vring_used_elem *heads,
+                                u16 *nheads,
+                                unsigned count)
  {
-       vhost_add_used_n(vq, heads, count);
+       vhost_add_used_n(vq, heads, nheads, count);
         vhost_signal(dev, vq);
  }
  EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h

index ab704d84fb3446c47501ba8b3ee7adfa45c584a5..24f3540b08a27473b7a358ec098a540fccb4585a 100644 (file)
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -115,6 +115,8 @@ struct vhost_virtqueue {
          * Values are limited to 0x7fff, and the high bit is used as
          * a wrap counter when using VIRTIO_F_RING_PACKED. */
         u16 last_avail_idx;
+       /* Next avail ring head when VIRTIO_F_IN_ORDER is negoitated */
+       u16 next_avail_head;
  
         /* Caches available index value from user. */
         u16 avail_idx;
@@ -141,6 +143,7 @@ struct vhost_virtqueue {
         struct iovec iotlb_iov[64];
         struct iovec *indirect;
         struct vring_used_elem *heads;
+       u16 *nheads;
         /* Protected by virtqueue mutex. */
         struct vhost_iotlb *umem;
         struct vhost_iotlb *iotlb;
@@ -235,11 +238,12 @@ bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
  int vhost_vq_init_access(struct vhost_virtqueue *);
  int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
  int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
-                    unsigned count);
+                    u16 *nheads, unsigned count);
  void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
                                unsigned int id, int len);
  void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
-                              struct vring_used_elem *heads, unsigned count);
+                                struct vring_used_elem *heads, u16 *nheads,
+                                unsigned count);
  void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
  void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
  bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
author	Jason Wang <jasowang@redhat.com>
	Mon, 14 Jul 2025 08:47:54 +0000 (16:47 +0800)
committer	Michael S. Tsirkin <mst@redhat.com>
	Fri, 1 Aug 2025 13:11:09 +0000 (09:11 -0400)
drivers/vhost/net.c		patch \| blob \| blame \| history
drivers/vhost/vhost.c		patch \| blob \| blame \| history
drivers/vhost/vhost.h		patch \| blob \| blame \| history