From: Maoyi Xie Date: Wed, 6 May 2026 08:24:15 +0000 (+0800) Subject: ipv6: flowlabel: take ip6_fl_lock across mem_check and fl_intern X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7ce5556f255a680d80daa31b1cedecf7f89e2c22;p=thirdparty%2Fkernel%2Fstable.git ipv6: flowlabel: take ip6_fl_lock across mem_check and fl_intern mem_check() in net/ipv6/ip6_flowlabel.c reads fl_size without holding ip6_fl_lock. fl_intern() takes the lock immediately afterwards. The two checks therefore race against concurrent fl_intern, ip6_fl_gc and ip6_fl_purge writers, which makes the mem_check budget check approximate. Move spin_lock_bh(&ip6_fl_lock) and the matching unlock from fl_intern() into its only caller ipv6_flowlabel_get(). The mem_check() call now runs under the same critical section as the fl_intern() insert, so the budget check is exact. With all writers and the read of fl_size under ip6_fl_lock, convert fl_size from atomic_t to plain int. The four sites that update or read fl_size are fl_intern (insert path), ip6_fl_gc (garbage collector, the !sched check and the per-entry decrement), ip6_fl_purge (per-netns purge), and mem_check (budget check), and all four now run under ip6_fl_lock. This is a prerequisite for adding a per-netns budget alongside fl_size. The follow-up patch adds netns_ipv6::flowlabel_count and folds it into mem_check(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: Willem de Bruijn Reviewed-by: Willem de Bruijn Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260506082416.2259567-2-maoyixie.tju@gmail.com Signed-off-by: Jakub Kicinski --- diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c92f98c6f6ec..a8974643195a 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -40,7 +40,7 @@ #define FL_HASH_MASK 255 #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) -static atomic_t fl_size = ATOMIC_INIT(0); +static int fl_size; static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(struct timer_list *unused); @@ -163,7 +163,7 @@ static void ip6_fl_gc(struct timer_list *unused) if (time_after_eq(now, ttd)) { *flp = fl->next; fl_free(fl); - atomic_dec(&fl_size); + fl_size--; continue; } if (!sched || time_before(ttd, sched)) @@ -172,7 +172,7 @@ static void ip6_fl_gc(struct timer_list *unused) flp = &fl->next; } } - if (!sched && atomic_read(&fl_size)) + if (!sched && fl_size) sched = now + FL_MAX_LINGER; if (sched) { mod_timer(&ip6_fl_gc_timer, sched); @@ -196,7 +196,7 @@ static void __net_exit ip6_fl_purge(struct net *net) atomic_read(&fl->users) == 0) { *flp = fl->next; fl_free(fl); - atomic_dec(&fl_size); + fl_size--; continue; } flp = &fl->next; @@ -210,10 +210,10 @@ static struct ip6_flowlabel *fl_intern(struct net *net, { struct ip6_flowlabel *lfl; + lockdep_assert_held(&ip6_fl_lock); + fl->label = label & IPV6_FLOWLABEL_MASK; - rcu_read_lock(); - spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK; @@ -235,8 +235,6 @@ static struct ip6_flowlabel *fl_intern(struct net *net, lfl = __fl_lookup(net, fl->label); if (lfl) { atomic_inc(&lfl->users); - spin_unlock_bh(&ip6_fl_lock); - rcu_read_unlock(); return lfl; } } @@ -244,9 +242,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->lastuse = jiffies; fl->next = fl_ht[FL_HASH(fl->label)]; rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); - atomic_inc(&fl_size); - spin_unlock_bh(&ip6_fl_lock); - rcu_read_unlock(); + fl_size++; return NULL; } @@ -464,10 +460,14 @@ done: static int mem_check(struct sock *sk) { - int room = FL_MAX_SIZE - atomic_read(&fl_size); + int room; struct ipv6_fl_socklist *sfl; int count = 0; + lockdep_assert_held(&ip6_fl_lock); + + room = FL_MAX_SIZE - fl_size; + if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; @@ -692,11 +692,19 @@ release: if (!sfl1) goto done; + rcu_read_lock(); + spin_lock_bh(&ip6_fl_lock); err = mem_check(sk); + if (err == 0) + fl1 = fl_intern(net, fl, freq->flr_label); + else + fl1 = NULL; + spin_unlock_bh(&ip6_fl_lock); + rcu_read_unlock(); + if (err != 0) goto done; - fl1 = fl_intern(net, fl, freq->flr_label); if (fl1) goto recheck;