]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
vhost-net: wake queue of tun/tap after ptr_ring consume
authorSimon Schippers <simon.schippers@tu-dortmund.de>
Sun, 10 May 2026 15:15:27 +0000 (17:15 +0200)
committerJakub Kicinski <kuba@kernel.org>
Thu, 14 May 2026 00:52:55 +0000 (17:52 -0700)
Add tun_wake_queue() to tun.c and export it for use by vhost-net. The
function validates that the file belongs to a tun/tap device and that
the tfile exists, dereferences the tun_struct under RCU, and delegates
to __tun_wake_queue().

vhost_net_buf_produce() now calls tun_wake_queue() after a successful
batched consume of the ring to allow the netdev subqueue to be woken up.
The point is to allow the queue to be stopped when it gets full, which
is required for traffic shaping - implemented by the following
"avoid ptr_ring tail-drop when a qdisc is present".

Without the corresponding queue stopping, this patch alone causes no
throughput regression for a tap+vhost-net setup sending to a qemu VM:
3.857 Mpps to 3.891 Mpps.

Details: AMD Ryzen 5 5600X at 4.3 GHz, 3200 MHz RAM, isolated QEMU
threads, XDP drop program active in VM, pktgen sender; Avg over
50 runs @ 100,000,000 packets. SRSO and spectre v2 mitigations disabled.

Co-developed-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
Signed-off-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
Signed-off-by: Simon Schippers <simon.schippers@tu-dortmund.de>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://patch.msgid.link/20260510151529.43895-3-simon.schippers@tu-dortmund.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/tun.c
drivers/vhost/net.c
include/linux/if_tun.h

index 3dded7c7d12da0cb6b931487af01bec45b52cad1..cbec66646a4b963a77f249f5e85a794f4c32b482 100644 (file)
@@ -3783,6 +3783,29 @@ struct ptr_ring *tun_get_tx_ring(struct file *file)
 }
 EXPORT_SYMBOL_GPL(tun_get_tx_ring);
 
+/* Callers must hold ring.consumer_lock */
+void tun_wake_queue(struct file *file, int consumed)
+{
+       struct tun_file *tfile;
+       struct tun_struct *tun;
+
+       if (file->f_op != &tun_fops)
+               return;
+
+       tfile = file->private_data;
+       if (!tfile)
+               return;
+
+       rcu_read_lock();
+
+       tun = rcu_dereference(tfile->tun);
+       if (tun)
+               __tun_wake_queue(tun, tfile, consumed);
+
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tun_wake_queue);
+
 module_init(tun_init);
 module_exit(tun_cleanup);
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
index c6536cad9c4f94f6ad38f116363c9f1949cf373d..db341c9226739c4e1bd8cc0f2182cecb1ff82cdd 100644 (file)
@@ -176,13 +176,21 @@ static void *vhost_net_buf_consume(struct vhost_net_buf *rxq)
        return ret;
 }
 
-static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
+static int vhost_net_buf_produce(struct sock *sk,
+                                struct vhost_net_virtqueue *nvq)
 {
+       struct file *file = sk->sk_socket->file;
        struct vhost_net_buf *rxq = &nvq->rxq;
 
        rxq->head = 0;
-       rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
-                                             VHOST_NET_BATCH);
+       spin_lock(&nvq->rx_ring->consumer_lock);
+       rxq->tail = __ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
+                                              VHOST_NET_BATCH);
+
+       if (rxq->tail)
+               tun_wake_queue(file, rxq->tail);
+
+       spin_unlock(&nvq->rx_ring->consumer_lock);
        return rxq->tail;
 }
 
@@ -209,14 +217,15 @@ static int vhost_net_buf_peek_len(void *ptr)
        return __skb_array_len_with_tag(ptr);
 }
 
-static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
+static int vhost_net_buf_peek(struct sock *sk,
+                             struct vhost_net_virtqueue *nvq)
 {
        struct vhost_net_buf *rxq = &nvq->rxq;
 
        if (!vhost_net_buf_is_empty(rxq))
                goto out;
 
-       if (!vhost_net_buf_produce(nvq))
+       if (!vhost_net_buf_produce(sk, nvq))
                return 0;
 
 out:
@@ -995,7 +1004,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
        unsigned long flags;
 
        if (rvq->rx_ring)
-               return vhost_net_buf_peek(rvq);
+               return vhost_net_buf_peek(sk, rvq);
 
        spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
        head = skb_peek(&sk->sk_receive_queue);
index 80166eb62f41c7a04d63c11c02df4e5ab20120a6..5f3e206c7a737584a7014c33b0f52354e128dc0a 100644 (file)
@@ -22,6 +22,7 @@ struct tun_msg_ctl {
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
+void tun_wake_queue(struct file *file, int consumed);
 
 static inline bool tun_is_xdp_frame(void *ptr)
 {
@@ -55,6 +56,8 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
        return ERR_PTR(-EINVAL);
 }
 
+static inline void tun_wake_queue(struct file *f, int consumed) {}
+
 static inline bool tun_is_xdp_frame(void *ptr)
 {
        return false;