]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
codel: annotate data-races in codel_dump_stats()
authorEric Dumazet <edumazet@google.com>
Tue, 7 Apr 2026 14:30:53 +0000 (14:30 +0000)
committerJakub Kicinski <kuba@kernel.org>
Thu, 9 Apr 2026 02:18:52 +0000 (19:18 -0700)
codel_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.

tc_codel_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.

No change in kernel size:

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 1/1 up/down: 3/-1 (2)
Function                                     old     new   delta
codel_qdisc_dequeue                         2462    2465      +3
codel_dump_stats                             250     249      -1
Total: Before=29739919, After=29739921, chg +0.00%

Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260407143053.1570620-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/codel_impl.h
net/sched/sch_codel.c

index b2c359c6dd1b84e3aeb77834272641bac73f8e6a..2c1f0ec309e9fe02e79d377129da6c5c0cbce19b 100644 (file)
@@ -120,10 +120,10 @@ static bool codel_should_drop(const struct sk_buff *skb,
        }
 
        skb_len = skb_len_func(skb);
-       vars->ldelay = now - skb_time_func(skb);
+       WRITE_ONCE(vars->ldelay, now - skb_time_func(skb));
 
        if (unlikely(skb_len > stats->maxpacket))
-               stats->maxpacket = skb_len;
+               WRITE_ONCE(stats->maxpacket, skb_len);
 
        if (codel_time_before(vars->ldelay, params->target) ||
            *backlog <= params->mtu) {
@@ -159,7 +159,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
 
        if (!skb) {
                vars->first_above_time = 0;
-               vars->dropping = false;
+               WRITE_ONCE(vars->dropping, false);
                return skb;
        }
        now = codel_get_time();
@@ -168,7 +168,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
        if (vars->dropping) {
                if (!drop) {
                        /* sojourn time below target - leave dropping state */
-                       vars->dropping = false;
+                       WRITE_ONCE(vars->dropping, false);
                } else if (codel_time_after_eq(now, vars->drop_next)) {
                        /* It's time for the next drop. Drop the current
                         * packet and dequeue the next. The dequeue might
@@ -180,16 +180,18 @@ static struct sk_buff *codel_dequeue(void *ctx,
                         */
                        while (vars->dropping &&
                               codel_time_after_eq(now, vars->drop_next)) {
-                               vars->count++; /* dont care of possible wrap
-                                               * since there is no more divide
-                                               */
+                               /* dont care of possible wrap
+                                * since there is no more divide.
+                                */
+                               WRITE_ONCE(vars->count, vars->count + 1);
                                codel_Newton_step(vars);
                                if (params->ecn && INET_ECN_set_ce(skb)) {
-                                       stats->ecn_mark++;
-                                       vars->drop_next =
+                                       WRITE_ONCE(stats->ecn_mark,
+                                                  stats->ecn_mark + 1);
+                                       WRITE_ONCE(vars->drop_next,
                                                codel_control_law(vars->drop_next,
                                                                  params->interval,
-                                                                 vars->rec_inv_sqrt);
+                                                                 vars->rec_inv_sqrt));
                                        goto end;
                                }
                                stats->drop_len += skb_len_func(skb);
@@ -202,13 +204,13 @@ static struct sk_buff *codel_dequeue(void *ctx,
                                                       skb_time_func,
                                                       backlog, now)) {
                                        /* leave dropping state */
-                                       vars->dropping = false;
+                                       WRITE_ONCE(vars->dropping, false);
                                } else {
                                        /* and schedule the next drop */
-                                       vars->drop_next =
+                                       WRITE_ONCE(vars->drop_next,
                                                codel_control_law(vars->drop_next,
                                                                  params->interval,
-                                                                 vars->rec_inv_sqrt);
+                                                                 vars->rec_inv_sqrt));
                                }
                        }
                }
@@ -216,7 +218,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
                u32 delta;
 
                if (params->ecn && INET_ECN_set_ce(skb)) {
-                       stats->ecn_mark++;
+                       WRITE_ONCE(stats->ecn_mark, stats->ecn_mark + 1);
                } else {
                        stats->drop_len += skb_len_func(skb);
                        drop_func(skb, ctx);
@@ -227,7 +229,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
                                                 stats, skb_len_func,
                                                 skb_time_func, backlog, now);
                }
-               vars->dropping = true;
+               WRITE_ONCE(vars->dropping, true);
                /* if min went above target close to when we last went below it
                 * assume that the drop rate that controlled the queue on the
                 * last cycle is a good starting point to control it now.
@@ -236,19 +238,20 @@ static struct sk_buff *codel_dequeue(void *ctx,
                if (delta > 1 &&
                    codel_time_before(now - vars->drop_next,
                                      16 * params->interval)) {
-                       vars->count = delta;
+                       WRITE_ONCE(vars->count, delta);
                        /* we dont care if rec_inv_sqrt approximation
                         * is not very precise :
                         * Next Newton steps will correct it quadratically.
                         */
                        codel_Newton_step(vars);
                } else {
-                       vars->count = 1;
+                       WRITE_ONCE(vars->count, 1);
                        vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
                }
-               vars->lastcount = vars->count;
-               vars->drop_next = codel_control_law(now, params->interval,
-                                                   vars->rec_inv_sqrt);
+               WRITE_ONCE(vars->lastcount, vars->count);
+               WRITE_ONCE(vars->drop_next,
+                          codel_control_law(now, params->interval,
+                                            vars->rec_inv_sqrt));
        }
 end:
        if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) {
@@ -262,7 +265,7 @@ end:
                                   params->ce_threshold_selector));
                }
                if (set_ce && INET_ECN_set_ce(skb))
-                       stats->ce_mark++;
+                       WRITE_ONCE(stats->ce_mark, stats->ce_mark + 1);
        }
        return skb;
 }
index dc2be90666ffbd715550800790a0091acd28701d..317aae0ec7bd6aedb4bae09b18423c981fed16e7 100644 (file)
@@ -85,7 +85,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                return qdisc_enqueue_tail(skb, sch);
        }
        q = qdisc_priv(sch);
-       q->drop_overlimit++;
+       WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1);
        return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT);
 }
 
@@ -221,18 +221,18 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
        const struct codel_sched_data *q = qdisc_priv(sch);
        struct tc_codel_xstats st = {
-               .maxpacket      = q->stats.maxpacket,
-               .count          = q->vars.count,
-               .lastcount      = q->vars.lastcount,
-               .drop_overlimit = q->drop_overlimit,
-               .ldelay         = codel_time_to_us(q->vars.ldelay),
-               .dropping       = q->vars.dropping,
-               .ecn_mark       = q->stats.ecn_mark,
-               .ce_mark        = q->stats.ce_mark,
+               .maxpacket      = READ_ONCE(q->stats.maxpacket),
+               .count          = READ_ONCE(q->vars.count),
+               .lastcount      = READ_ONCE(q->vars.lastcount),
+               .drop_overlimit = READ_ONCE(q->drop_overlimit),
+               .ldelay         = codel_time_to_us(READ_ONCE(q->vars.ldelay)),
+               .dropping       = READ_ONCE(q->vars.dropping),
+               .ecn_mark       = READ_ONCE(q->stats.ecn_mark),
+               .ce_mark        = READ_ONCE(q->stats.ce_mark),
        };
 
-       if (q->vars.dropping) {
-               codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
+       if (st.dropping) {
+               codel_tdiff_t delta = READ_ONCE(q->vars.drop_next) - codel_get_time();
 
                if (delta >= 0)
                        st.drop_next = codel_time_to_us(delta);