+From b8cad071e722b1def54d161bc61100c1b8b5c7b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Jul 2025 05:48:18 +0000
+Subject: netlink: Fix wraparounds of sk->sk_rmem_alloc.
+
+From: Kuniyuki Iwashima <kuniyu@google.com>
+
+[ Upstream commit ae8f160e7eb24240a2a79fc4c815c6a0d4ee16cc ]
+
+Netlink has this pattern in some places
+
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+, which has the same problem fixed by commit 5a465a0da13e ("udp:
+Fix multiple wraparounds of sk->sk_rmem_alloc.").
+
+For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
+is always false as the two operands are of int.
+
+Then, a single socket can eat as many skb as possible until OOM
+happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.
+
+Let's fix it by using atomic_add_return() and comparing the two
+variables as unsigned int.
+
+Before:
+ [root@fedora ~]# ss -f netlink
+ Recv-Q Send-Q Local Address:Port Peer Address:Port
+ -1668710080 0 rtnl:nl_wraparound/293 *
+
+After:
+ [root@fedora ~]# ss -f netlink
+ Recv-Q Send-Q Local Address:Port Peer Address:Port
+ 2147483072 0 rtnl:nl_wraparound/290 *
+ ^
+ `--- INT_MAX - 576
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Jason Baron <jbaron@akamai.com>
+Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
+Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
+Link: https://patch.msgid.link/20250704054824.1580222-1-kuniyu@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netlink/af_netlink.c | 81 ++++++++++++++++++++++++----------------
+ 1 file changed, 49 insertions(+), 32 deletions(-)
+
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index 17d86eee8bd8b..4c7af7f5117ae 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -378,7 +378,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+ WARN_ON(skb->sk != NULL);
+ skb->sk = sk;
+ skb->destructor = netlink_skb_destructor;
+- atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ sk_mem_charge(sk, skb->truesize);
+ }
+
+@@ -1206,41 +1205,48 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
+ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
+ long *timeo, struct sock *ssk)
+ {
++ DECLARE_WAITQUEUE(wait, current);
+ struct netlink_sock *nlk;
++ unsigned int rmem;
+
+ nlk = nlk_sk(sk);
++ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+
+- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+- test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
+- DECLARE_WAITQUEUE(wait, current);
+- if (!*timeo) {
+- if (!ssk || netlink_is_kernel(ssk))
+- netlink_overrun(sk);
+- sock_put(sk);
+- kfree_skb(skb);
+- return -EAGAIN;
+- }
+-
+- __set_current_state(TASK_INTERRUPTIBLE);
+- add_wait_queue(&nlk->wait, &wait);
++ if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) &&
++ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
++ netlink_skb_set_owner_r(skb, sk);
++ return 0;
++ }
+
+- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+- test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
+- !sock_flag(sk, SOCK_DEAD))
+- *timeo = schedule_timeout(*timeo);
++ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+
+- __set_current_state(TASK_RUNNING);
+- remove_wait_queue(&nlk->wait, &wait);
++ if (!*timeo) {
++ if (!ssk || netlink_is_kernel(ssk))
++ netlink_overrun(sk);
+ sock_put(sk);
++ kfree_skb(skb);
++ return -EAGAIN;
++ }
+
+- if (signal_pending(current)) {
+- kfree_skb(skb);
+- return sock_intr_errno(*timeo);
+- }
+- return 1;
++ __set_current_state(TASK_INTERRUPTIBLE);
++ add_wait_queue(&nlk->wait, &wait);
++ rmem = atomic_read(&sk->sk_rmem_alloc);
++
++ if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
++ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
++ !sock_flag(sk, SOCK_DEAD))
++ *timeo = schedule_timeout(*timeo);
++
++ __set_current_state(TASK_RUNNING);
++ remove_wait_queue(&nlk->wait, &wait);
++ sock_put(sk);
++
++ if (signal_pending(current)) {
++ kfree_skb(skb);
++ return sock_intr_errno(*timeo);
+ }
+- netlink_skb_set_owner_r(skb, sk);
+- return 0;
++
++ return 1;
+ }
+
+ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
+@@ -1300,6 +1306,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
+ ret = -ECONNREFUSED;
+ if (nlk->netlink_rcv != NULL) {
+ ret = skb->len;
++ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ netlink_skb_set_owner_r(skb, sk);
+ NETLINK_CB(skb).sk = ssk;
+ netlink_deliver_tap_kernel(sk, ssk, skb);
+@@ -1378,13 +1385,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
+ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
+ {
+ struct netlink_sock *nlk = nlk_sk(sk);
++ unsigned int rmem, rcvbuf;
+
+- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
++ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
++ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
++
++ if ((rmem != skb->truesize || rmem <= rcvbuf) &&
+ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
+ netlink_skb_set_owner_r(skb, sk);
+ __netlink_sendskb(sk, skb);
+- return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
++ return rmem > (rcvbuf >> 1);
+ }
++
++ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+ return -1;
+ }
+
+@@ -2177,6 +2190,7 @@ static int netlink_dump(struct sock *sk)
+ struct module *module;
+ int err = -ENOBUFS;
+ int alloc_min_size;
++ unsigned int rmem;
+ int alloc_size;
+
+ mutex_lock(nlk->cb_mutex);
+@@ -2185,9 +2199,6 @@ static int netlink_dump(struct sock *sk)
+ goto errout_skb;
+ }
+
+- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+- goto errout_skb;
+-
+ /* NLMSG_GOODSIZE is small to avoid high order allocations being
+ * required, but it makes sense to _attempt_ a 16K bytes allocation
+ * to reduce number of system calls on dump operations, if user
+@@ -2209,6 +2220,12 @@ static int netlink_dump(struct sock *sk)
+ if (!skb)
+ goto errout_skb;
+
++ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
++ if (rmem >= READ_ONCE(sk->sk_rcvbuf)) {
++ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
++ goto errout_skb;
++ }
++
+ /* Trim skb to allocated size. User is expected to provide buffer as
+ * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
+ * netlink_recvmsg())). dump will pack as many smaller messages as
+--
+2.39.5
+