]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
ipv6: flowlabel: enforce per-netns limit for unprivileged callers
authorMaoyi Xie <maoyi.xie@ntu.edu.sg>
Wed, 6 May 2026 08:24:16 +0000 (16:24 +0800)
committerJakub Kicinski <kuba@kernel.org>
Fri, 8 May 2026 21:59:14 +0000 (14:59 -0700)
fl_size, fl_ht and ip6_fl_lock in net/ipv6/ip6_flowlabel.c are
file scope and shared across netns. mem_check() reads fl_size to
decide whether to deny non-CAP_NET_ADMIN callers. capable() runs
against init_user_ns, so an unprivileged user in any non-init
userns can push fl_size past FL_MAX_SIZE - FL_MAX_SIZE / 4 and
starve every other unprivileged userns on the host.

Add struct netns_ipv6::flowlabel_count, bumped and decremented
next to fl_size in fl_intern, ip6_fl_gc and ip6_fl_purge. The new
field fills the existing 4-byte hole after ipmr_seq, so struct
netns_ipv6 stays the same size on 64-bit builds.

Bump FL_MAX_SIZE from 4096 to 8192. It has been 4096 since the
file was added. Machines and connection counts have grown.

mem_check() folds an extra per-netns ceiling into the existing
non-CAP_NET_ADMIN conditional. The ceiling is half of the total
budget that unprivileged callers have ever been able to use, i.e.
(FL_MAX_SIZE - FL_MAX_SIZE / 4) / 2 = 3072 entries. With
FL_MAX_SIZE doubled, this preserves the original per-user reach
of 3K (what an unprivileged caller could already obtain before
this change), while forcing an attacker to spread allocations
across at least two netns to exhaust the global non-CAP_NET_ADMIN
budget.

CAP_NET_ADMIN against init_user_ns still bypasses both caps.

The previous patch took ip6_fl_lock across mem_check and
fl_intern, so the new flowlabel_count read in mem_check and the
new flowlabel_count++ in fl_intern run under the same critical
section. flowlabel_count is therefore plain int, like fl_size.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Suggested-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Cc: stable@vger.kernel.org # v5.15+
Signed-off-by: Maoyi Xie <maoyi.xie@ntu.edu.sg>
Link: https://patch.msgid.link/20260506082416.2259567-3-maoyixie.tju@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/netns/ipv6.h
net/ipv6/ip6_flowlabel.c

index 499e4288170fc6c59039614addcc82267a317d6b..875916d60bfe4e640d2fd834603f1896eb1b4842 100644 (file)
@@ -119,6 +119,7 @@ struct netns_ipv6 {
        struct fib_notifier_ops *notifier_ops;
        struct fib_notifier_ops *ip6mr_notifier_ops;
        atomic_t                ipmr_seq;
+       int                     flowlabel_count;
        struct {
                struct hlist_head head;
                spinlock_t      lock;
index a8974643195a1547843632bcf8de7dcbba327a06..b1ccdf0dc64699f55ce1cd0c0cc92eaef3909e28 100644 (file)
@@ -36,7 +36,7 @@
 /* FL hash table */
 
 #define FL_MAX_PER_SOCK        32
-#define FL_MAX_SIZE    4096
+#define FL_MAX_SIZE    8192
 #define FL_HASH_MASK   255
 #define FL_HASH(l)     (ntohl(l)&FL_HASH_MASK)
 
@@ -162,8 +162,9 @@ static void ip6_fl_gc(struct timer_list *unused)
                                ttd = fl->expires;
                                if (time_after_eq(now, ttd)) {
                                        *flp = fl->next;
-                                       fl_free(fl);
                                        fl_size--;
+                                       fl->fl_net->ipv6.flowlabel_count--;
+                                       fl_free(fl);
                                        continue;
                                }
                                if (!sched || time_before(ttd, sched))
@@ -197,6 +198,7 @@ static void __net_exit ip6_fl_purge(struct net *net)
                                *flp = fl->next;
                                fl_free(fl);
                                fl_size--;
+                               net->ipv6.flowlabel_count--;
                                continue;
                        }
                        flp = &fl->next;
@@ -243,6 +245,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
        fl->next = fl_ht[FL_HASH(fl->label)];
        rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
        fl_size++;
+       net->ipv6.flowlabel_count++;
        return NULL;
 }
 
@@ -460,6 +463,9 @@ done:
 
 static int mem_check(struct sock *sk)
 {
+       const int unpriv_total_limit = FL_MAX_SIZE - (FL_MAX_SIZE / 4);
+       const int unpriv_user_limit = unpriv_total_limit / 2;
+       struct net *net = sock_net(sk);
        int room;
        struct ipv6_fl_socklist *sfl;
        int count = 0;
@@ -478,7 +484,9 @@ static int mem_check(struct sock *sk)
 
        if (room <= 0 ||
            ((count >= FL_MAX_PER_SOCK ||
-             (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
+             (count > 0 && room < FL_MAX_SIZE / 2) ||
+             room < FL_MAX_SIZE / 4 ||
+             net->ipv6.flowlabel_count >= unpriv_user_limit) &&
             !capable(CAP_NET_ADMIN)))
                return -ENOBUFS;