From ffe4ccd359d006eba559cb1a3c6113144b7fb38c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Jan 2026 10:41:59 +0000 Subject: [PATCH] net: add net.core.qdisc_max_burst In blamed commit, I added a check against the temporary queue built in __dev_xmit_skb(). Idea was to drop packets early, before any spinlock was acquired. if (unlikely(defer_count > READ_ONCE(q->limit))) { kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP); return NET_XMIT_DROP; } It turned out that HTB Qdisc has a zero q->limit. HTB limits packets on a per-class basis. Some of our tests became flaky. Add a new sysctl : net.core.qdisc_max_burst to control how many packets can be stored in the temporary lockless queue. Also add a new QDISC_BURST_DROP drop reason to better diagnose future issues. Thanks Neal ! Fixes: 100dfa74cad9 ("net: dev_queue_xmit() llist adoption") Reported-and-bisected-by: Neal Cardwell Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20260107104159.3669285-1-edumazet@google.com Signed-off-by: Paolo Abeni --- Documentation/admin-guide/sysctl/net.rst | 8 ++++++++ include/net/dropreason-core.h | 6 ++++++ include/net/hotdata.h | 1 + net/core/dev.c | 6 +++--- net/core/hotdata.c | 1 + net/core/sysctl_net_core.c | 7 +++++++ 6 files changed, 26 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 369a738a6819..91fa4ccd326c 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -303,6 +303,14 @@ netdev_max_backlog Maximum number of packets, queued on the INPUT side, when the interface receives packets faster than kernel can process them. +qdisc_max_burst +------------------ + +Maximum number of packets that can be temporarily stored before +reaching qdisc. + +Default: 1000 + netdev_rss_key -------------- diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 58d91ccc56e0..a7b7abd66e21 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -67,6 +67,7 @@ FN(TC_EGRESS) \ FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ + FN(QDISC_BURST_DROP) \ FN(QDISC_OVERLIMIT) \ FN(QDISC_CONGESTED) \ FN(CAKE_FLOOD) \ @@ -374,6 +375,11 @@ enum skb_drop_reason { * failed to enqueue to current qdisc) */ SKB_DROP_REASON_QDISC_DROP, + /** + * @SKB_DROP_REASON_QDISC_BURST_DROP: dropped when net.core.qdisc_max_burst + * limit is hit. + */ + SKB_DROP_REASON_QDISC_BURST_DROP, /** * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc * instance exceeds its total buffer size limit. diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 4acec191c54a..6632b1aa7584 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -42,6 +42,7 @@ struct net_hotdata { int netdev_budget_usecs; int tstamp_prequeue; int max_backlog; + int qdisc_max_burst; int dev_tx_weight; int dev_rx_weight; int sysctl_max_skb_frags; diff --git a/net/core/dev.c b/net/core/dev.c index 9af9c3df452f..ccef685023c2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4203,8 +4203,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, do { if (first_n && !defer_count) { defer_count = atomic_long_inc_return(&q->defer_count); - if (unlikely(defer_count > READ_ONCE(q->limit))) { - kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP); + if (unlikely(defer_count > READ_ONCE(net_hotdata.qdisc_max_burst))) { + kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_BURST_DROP); return NET_XMIT_DROP; } } @@ -4222,7 +4222,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, ll_list = llist_del_all(&q->defer_list); /* There is a small race because we clear defer_count not atomically * with the prior llist_del_all(). This means defer_list could grow - * over q->limit. + * over qdisc_max_burst. */ atomic_long_set(&q->defer_count, 0); diff --git a/net/core/hotdata.c b/net/core/hotdata.c index dddd5c287cf0..a6db36580817 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -17,6 +17,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .tstamp_prequeue = 1, .max_backlog = 1000, + .qdisc_max_burst = 1000, .dev_tx_weight = 64, .dev_rx_weight = 64, .sysctl_max_skb_frags = MAX_SKB_FRAGS, diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8d4decb2606f..05dd55cf8b58 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -429,6 +429,13 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "qdisc_max_burst", + .data = &net_hotdata.qdisc_max_burst, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .procname = "netdev_rss_key", .data = &netdev_rss_key, -- 2.47.3