]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
net: add net.core.qdisc_max_burst
authorEric Dumazet <edumazet@google.com>
Wed, 7 Jan 2026 10:41:59 +0000 (10:41 +0000)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 13 Jan 2026 09:12:11 +0000 (10:12 +0100)
In blamed commit, I added a check against the temporary queue
built in __dev_xmit_skb(). Idea was to drop packets early,
before any spinlock was acquired.

if (unlikely(defer_count > READ_ONCE(q->limit))) {
kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP);
return NET_XMIT_DROP;
}

It turned out that HTB Qdisc has a zero q->limit.
HTB limits packets on a per-class basis.
Some of our tests became flaky.

Add a new sysctl : net.core.qdisc_max_burst to control
how many packets can be stored in the temporary lockless queue.

Also add a new QDISC_BURST_DROP drop reason to better diagnose
future issues.

Thanks Neal !

Fixes: 100dfa74cad9 ("net: dev_queue_xmit() llist adoption")
Reported-and-bisected-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Link: https://patch.msgid.link/20260107104159.3669285-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Documentation/admin-guide/sysctl/net.rst
include/net/dropreason-core.h
include/net/hotdata.h
net/core/dev.c
net/core/hotdata.c
net/core/sysctl_net_core.c

index 369a738a68193e897d880eeb2c5a22cd90833938..91fa4ccd326c2b6351fd028a1c5d1c69126bee5f 100644 (file)
@@ -303,6 +303,14 @@ netdev_max_backlog
 Maximum number of packets, queued on the INPUT side, when the interface
 receives packets faster than kernel can process them.
 
+qdisc_max_burst
+------------------
+
+Maximum number of packets that can be temporarily stored before
+reaching qdisc.
+
+Default: 1000
+
 netdev_rss_key
 --------------
 
index 58d91ccc56e0b54368c432fb9075ab174dc3a09f..a7b7abd66e215c4bcaece6f00ca03de3ac81396f 100644 (file)
@@ -67,6 +67,7 @@
        FN(TC_EGRESS)                   \
        FN(SECURITY_HOOK)               \
        FN(QDISC_DROP)                  \
+       FN(QDISC_BURST_DROP)            \
        FN(QDISC_OVERLIMIT)             \
        FN(QDISC_CONGESTED)             \
        FN(CAKE_FLOOD)                  \
@@ -374,6 +375,11 @@ enum skb_drop_reason {
         * failed to enqueue to current qdisc)
         */
        SKB_DROP_REASON_QDISC_DROP,
+       /**
+        * @SKB_DROP_REASON_QDISC_BURST_DROP: dropped when net.core.qdisc_max_burst
+        * limit is hit.
+        */
+       SKB_DROP_REASON_QDISC_BURST_DROP,
        /**
         * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc
         * instance exceeds its total buffer size limit.
index 4acec191c54ab367ca12fff590d1f8c8aad64651..6632b1aa7584821fd4ab42163b77dfff6732a45e 100644 (file)
@@ -42,6 +42,7 @@ struct net_hotdata {
        int                     netdev_budget_usecs;
        int                     tstamp_prequeue;
        int                     max_backlog;
+       int                     qdisc_max_burst;
        int                     dev_tx_weight;
        int                     dev_rx_weight;
        int                     sysctl_max_skb_frags;
index 9af9c3df452f7f736430c2e39d16ef004aeaae4b..ccef685023c299dbd9fc1ccb7a914a282219a327 100644 (file)
@@ -4203,8 +4203,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
        do {
                if (first_n && !defer_count) {
                        defer_count = atomic_long_inc_return(&q->defer_count);
-                       if (unlikely(defer_count > READ_ONCE(q->limit))) {
-                               kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP);
+                       if (unlikely(defer_count > READ_ONCE(net_hotdata.qdisc_max_burst))) {
+                               kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_BURST_DROP);
                                return NET_XMIT_DROP;
                        }
                }
@@ -4222,7 +4222,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
        ll_list = llist_del_all(&q->defer_list);
        /* There is a small race because we clear defer_count not atomically
         * with the prior llist_del_all(). This means defer_list could grow
-        * over q->limit.
+        * over qdisc_max_burst.
         */
        atomic_long_set(&q->defer_count, 0);
 
index dddd5c287cf08ba75aec1cc546fd1bc48c0f7b26..a6db365808178d243f53ae1a817938fb17c3f968 100644 (file)
@@ -17,6 +17,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
 
        .tstamp_prequeue = 1,
        .max_backlog = 1000,
+       .qdisc_max_burst = 1000,
        .dev_tx_weight = 64,
        .dev_rx_weight = 64,
        .sysctl_max_skb_frags = MAX_SKB_FRAGS,
index 8d4decb2606fa18222a02e59dc889efa995d2eaa..05dd55cf8b58e6c6fce498a11c09f23fd56d8f34 100644 (file)
@@ -429,6 +429,13 @@ static struct ctl_table net_core_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec
        },
+       {
+               .procname       = "qdisc_max_burst",
+               .data           = &net_hotdata.qdisc_max_burst,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec
+       },
        {
                .procname       = "netdev_rss_key",
                .data           = &netdev_rss_key,