From 3f3aa77ff1c8b45ec8c9e40212f1a24a93e00df3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 May 2026 13:59:16 +0000 Subject: [PATCH] net/sched: add qstats_cpu_drop_inc() helper 1) Using this_cpu_inc() is better than going through this_cpu_ptr(): - Single instruction on x86. - Store tearing prevention. 2) Change tcf_action_update_stats() to use this_cpu_add(). 3) Add WRITE_ONCE() to __qdisc_qstats_drop() and qstats_drop_inc() in preparation for lockless "tc qdisc show". $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/0 grow/shrink: 3/17 up/down: 72/-216 (-144) Function old new delta dualpi2_enqueue_skb 462 511 +49 tcf_ife_act 1061 1077 +16 taprio_enqueue 613 620 +7 codel_qdisc_enqueue 149 143 -6 tcf_vlan_act 684 676 -8 tcf_skbedit_act 626 618 -8 tcf_police_act 725 717 -8 tcf_mpls_act 1297 1289 -8 tcf_gate_act 310 302 -8 tcf_gact_act 222 214 -8 tcf_csum_act 2438 2430 -8 tcf_bpf_act 709 701 -8 tcf_action_update_stats 124 115 -9 pie_qdisc_enqueue 865 856 -9 pfifo_enqueue 116 107 -9 choke_enqueue 2069 2059 -10 plug_enqueue 139 128 -11 bfifo_enqueue 121 110 -11 tcf_nat_act 1501 1489 -12 gred_enqueue 1743 1668 -75 Total: Before=24388609, After=24388465, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260501135916.2566766-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/act_api.h | 2 +- include/net/sch_generic.h | 9 +++++++-- net/sched/act_api.c | 2 +- net/sched/act_bpf.c | 2 +- net/sched/act_ife.c | 8 ++++---- net/sched/act_mpls.c | 2 +- net/sched/act_police.c | 2 +- net/sched/act_skbedit.c | 2 +- net/sched/sch_cake.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_gred.c | 2 +- 11 files changed, 20 insertions(+), 15 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 2ec4ef9a5d0c8..167435c5615e0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -241,7 +241,7 @@ static inline void tcf_action_update_bstats(struct tc_action *a, static inline void tcf_action_inc_drop_qstats(struct tc_action *a) { if (likely(a->cpu_qstats)) { - qstats_drop_inc(this_cpu_ptr(a->cpu_qstats)); + qstats_cpu_drop_inc(a->cpu_qstats); return; } atomic_inc(&a->tcfa_drops); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index cbfe9ed435fd7..ccfabfac674ef 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -996,12 +996,17 @@ static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count) { - sch->qstats.drops += count; + WRITE_ONCE(sch->qstats.drops, sch->qstats.drops + count); } static inline void qstats_drop_inc(struct gnet_stats_queue *qstats) { - qstats->drops++; + WRITE_ONCE(qstats->drops, qstats->drops + 1); +} + +static inline void qstats_cpu_drop_inc(struct gnet_stats_queue __percpu *qstats) +{ + this_cpu_inc(qstats->drops); } static inline void qstats_cpu_overlimit_inc(struct gnet_stats_queue __percpu *qstats) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 332fd9695e54a..551992683d9e6 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1578,7 +1578,7 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, if (a->cpu_bstats) { _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - this_cpu_ptr(a->cpu_qstats)->drops += drops; + this_cpu_add(a->cpu_qstats->drops, drops); if (hw) _bstats_update(this_cpu_ptr(a->cpu_bstats_hw), diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index c2b5bc19e0911..58a0746511767 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -76,7 +76,7 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, break; case TC_ACT_SHOT: action = filter_res; - qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats)); + qstats_cpu_drop_inc(prog->common.cpu_qstats); break; case TC_ACT_UNSPEC: action = prog->tcf_action; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e1b825e14900d..065228026c58e 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -727,7 +727,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, tlv_data = ife_decode(skb, &metalen); if (unlikely(!tlv_data)) { - qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); + qstats_cpu_drop_inc(ife->common.cpu_qstats); return TC_ACT_SHOT; } @@ -740,7 +740,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, curr_data = ife_tlv_meta_decode(tlv_data, ifehdr_end, &mtype, &dlen, NULL); if (!curr_data) { - qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); + qstats_cpu_drop_inc(ife->common.cpu_qstats); return TC_ACT_SHOT; } @@ -755,7 +755,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, } if (WARN_ON(tlv_data != ifehdr_end)) { - qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); + qstats_cpu_drop_inc(ife->common.cpu_qstats); return TC_ACT_SHOT; } @@ -821,7 +821,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, * so lets be conservative.. */ if ((action == TC_ACT_SHOT) || exceed_mtu) { drop: - qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); + qstats_cpu_drop_inc(ife->common.cpu_qstats); return TC_ACT_SHOT; } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 1abfaf9d99f1f..4ea8b2e08c3a4 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -123,7 +123,7 @@ TC_INDIRECT_SCOPE int tcf_mpls_act(struct sk_buff *skb, return p->action; drop: - qstats_drop_inc(this_cpu_ptr(m->common.cpu_qstats)); + qstats_cpu_drop_inc(m->common.cpu_qstats); return TC_ACT_SHOT; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8060f43e4d11c..b16468a98c55e 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -310,7 +310,7 @@ inc_overlimits: qstats_cpu_overlimit_inc(police->common.cpu_qstats); inc_drops: if (ret == TC_ACT_SHOT) - qstats_drop_inc(this_cpu_ptr(police->common.cpu_qstats)); + qstats_cpu_drop_inc(police->common.cpu_qstats); end: return ret; } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index a778cdba9258c..bfec6b6684103 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -86,7 +86,7 @@ TC_INDIRECT_SCOPE int tcf_skbedit_act(struct sk_buff *skb, return params->action; err: - qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats)); + qstats_cpu_drop_inc(d->common.cpu_qstats); return TC_ACT_SHOT; } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 13c6d1869a144..c779e72f153c9 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1845,7 +1845,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (ack) { WRITE_ONCE(b->ack_drops, b->ack_drops + 1); - sch->qstats.drops++; + qdisc_qstats_drop(sch); ack_pkt_len = qdisc_pkt_len(ack); WRITE_ONCE(b->bytes, b->bytes + ack_pkt_len); q->buffer_used += skb->truesize - ack->truesize; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 0664b2f2d6f28..8ba722faf7e3c 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -176,7 +176,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, flow->cvars.count += i; q->backlogs[idx] -= len; q->memory_usage -= mem; - sch->qstats.drops += i; + __qdisc_qstats_drop(sch, i); sch->qstats.backlog -= len; sch->q.qlen -= i; return idx; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 36d0cafac2063..8ae65572162c1 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -389,7 +389,7 @@ static int gred_offload_dump_stats(struct Qdisc *sch) packets += u64_stats_read(&hw_stats->stats.bstats[i].packets); sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; - sch->qstats.drops += hw_stats->stats.qstats[i].drops; + __qdisc_qstats_drop(sch, hw_stats->stats.qstats[i].drops); sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; } -- 2.47.3