]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
net/sched: sch_pie: annotate data-races in pie_dump_stats()
authorEric Dumazet <edumazet@google.com>
Tue, 21 Apr 2026 14:29:44 +0000 (14:29 +0000)
committerJakub Kicinski <kuba@kernel.org>
Thu, 23 Apr 2026 04:12:47 +0000 (21:12 -0700)
pie_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.

tc_pie_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.

Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260421142944.4009941-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/pie.h
net/sched/sch_pie.c

index 01cbc66825a40bd21c0a044b1180cbbc346785df..1f3db0c355149b41823a891c9156cac625122031 100644 (file)
@@ -104,7 +104,7 @@ static inline void pie_vars_init(struct pie_vars *vars)
        vars->dq_tstamp = DTIME_INVALID;
        vars->accu_prob = 0;
        vars->dq_count = DQCOUNT_INVALID;
-       vars->avg_dq_rate = 0;
+       WRITE_ONCE(vars->avg_dq_rate, 0);
 }
 
 static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
index 16f3f629cb8e4be71431f7e50a278e3c7fdba8d0..fb53fbf0e328571be72b66ba4e75a938e1963422 100644 (file)
@@ -90,7 +90,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
        bool enqueue = false;
 
        if (unlikely(qdisc_qlen(sch) >= sch->limit)) {
-               q->stats.overlimit++;
+               WRITE_ONCE(q->stats.overlimit, q->stats.overlimit + 1);
                goto out;
        }
 
@@ -104,7 +104,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                /* If packet is ecn capable, mark it if drop probability
                 * is lower than 10%, else drop it.
                 */
-               q->stats.ecn_mark++;
+               WRITE_ONCE(q->stats.ecn_mark, q->stats.ecn_mark + 1);
                enqueue = true;
        }
 
@@ -114,15 +114,15 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                if (!q->params.dq_rate_estimator)
                        pie_set_enqueue_time(skb);
 
-               q->stats.packets_in++;
+               WRITE_ONCE(q->stats.packets_in, q->stats.packets_in + 1);
                if (qdisc_qlen(sch) > q->stats.maxq)
-                       q->stats.maxq = qdisc_qlen(sch);
+                       WRITE_ONCE(q->stats.maxq, qdisc_qlen(sch));
 
                return qdisc_enqueue_tail(skb, sch);
        }
 
 out:
-       q->stats.dropped++;
+       WRITE_ONCE(q->stats.dropped, q->stats.dropped + 1);
        q->vars.accu_prob = 0;
        return qdisc_drop_reason(skb, sch, to_free, reason);
 }
@@ -267,11 +267,11 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params,
                        count = count / dtime;
 
                        if (vars->avg_dq_rate == 0)
-                               vars->avg_dq_rate = count;
+                               WRITE_ONCE(vars->avg_dq_rate, count);
                        else
-                               vars->avg_dq_rate =
+                               WRITE_ONCE(vars->avg_dq_rate,
                                    (vars->avg_dq_rate -
-                                    (vars->avg_dq_rate >> 3)) + (count >> 3);
+                                    (vars->avg_dq_rate >> 3)) + (count >> 3));
 
                        /* If the queue has receded below the threshold, we hold
                         * on to the last drain rate calculated, else we reset
@@ -381,7 +381,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
        if (delta > 0) {
                /* prevent overflow */
                if (vars->prob < oldprob) {
-                       vars->prob = MAX_PROB;
+                       WRITE_ONCE(vars->prob, MAX_PROB);
                        /* Prevent normalization error. If probability is at
                         * maximum value already, we normalize it here, and
                         * skip the check to do a non-linear drop in the next
@@ -392,7 +392,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
        } else {
                /* prevent underflow */
                if (vars->prob > oldprob)
-                       vars->prob = 0;
+                       WRITE_ONCE(vars->prob, 0);
        }
 
        /* Non-linear drop in probability: Reduce drop probability quickly if
@@ -403,7 +403,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
                /* Reduce drop probability to 98.4% */
                vars->prob -= vars->prob / 64;
 
-       vars->qdelay = qdelay;
+       WRITE_ONCE(vars->qdelay, qdelay);
        vars->backlog_old = backlog;
 
        /* We restart the measurement cycle if the following conditions are met
@@ -502,21 +502,21 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
        struct pie_sched_data *q = qdisc_priv(sch);
        struct tc_pie_xstats st = {
                .prob           = q->vars.prob << BITS_PER_BYTE,
-               .delay          = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
+               .delay          = ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) /
                                   NSEC_PER_USEC,
-               .packets_in     = q->stats.packets_in,
-               .overlimit      = q->stats.overlimit,
-               .maxq           = q->stats.maxq,
-               .dropped        = q->stats.dropped,
-               .ecn_mark       = q->stats.ecn_mark,
+               .packets_in     = READ_ONCE(q->stats.packets_in),
+               .overlimit      = READ_ONCE(q->stats.overlimit),
+               .maxq           = READ_ONCE(q->stats.maxq),
+               .dropped        = READ_ONCE(q->stats.dropped),
+               .ecn_mark       = READ_ONCE(q->stats.ecn_mark),
        };
 
        /* avg_dq_rate is only valid if dq_rate_estimator is enabled */
        st.dq_rate_estimating = q->params.dq_rate_estimator;
 
        /* unscale and return dq_rate in bytes per sec */
-       if (q->params.dq_rate_estimator)
-               st.avg_dq_rate = q->vars.avg_dq_rate *
+       if (st.dq_rate_estimating)
+               st.avg_dq_rate = READ_ONCE(q->vars.avg_dq_rate) *
                                 (PSCHED_TICKS_PER_SEC) >> PIE_SCALE;
 
        return gnet_stats_copy_app(d, &st, sizeof(st));