--- /dev/null
+From 2cef01bde92c5bcec824604bdec23da7a03f24b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Nov 2024 17:52:34 -0800
+Subject: netlink: terminate outstanding dump on socket close
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 1904fb9ebf911441f90a68e96b22aa73e4410505 ]
+
+Netlink supports iterative dumping of data. It provides the families
+the following ops:
+ - start - (optional) kicks off the dumping process
+ - dump - actual dump helper, keeps getting called until it returns 0
+ - done - (optional) pairs with .start, can be used for cleanup
+The whole process is asynchronous and the repeated calls to .dump
+don't actually happen in a tight loop, but rather are triggered
+in response to recvmsg() on the socket.
+
+This gives the user full control over the dump, but also means that
+the user can close the socket without getting to the end of the dump.
+To make sure .start is always paired with .done we check if there
+is an ongoing dump before freeing the socket, and if so call .done.
+
+The complication is that sockets can get freed from BH and .done
+is allowed to sleep. So we use a workqueue to defer the call, when
+needed.
+
+Unfortunately this does not work correctly. What we defer is not
+the cleanup but rather releasing a reference on the socket.
+We have no guarantee that we own the last reference, if someone
+else holds the socket they may release it in BH and we're back
+to square one.
+
+The whole dance, however, appears to be unnecessary. Only the user
+can interact with dumps, so we can clean up when socket is closed.
+And close always happens in process context. Some async code may
+still access the socket after close, queue notification skbs to it etc.
+but no dumps can start, end or otherwise make progress.
+
+Delete the workqueue and flush the dump state directly from the release
+handler. Note that further cleanup is possible in -next, for instance
+we now always call .done before releasing the main module reference,
+so dump doesn't have to take a reference of its own.
+
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Fixes: ed5d7788a934 ("netlink: Do not schedule work from sk_destruct")
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241106015235.2458807-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netlink/af_netlink.c | 31 ++++++++-----------------------
+ net/netlink/af_netlink.h | 2 --
+ 2 files changed, 8 insertions(+), 25 deletions(-)
+
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index 2e26ecb46707c..8c397697e47fa 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -393,15 +393,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+
+ static void netlink_sock_destruct(struct sock *sk)
+ {
+- struct netlink_sock *nlk = nlk_sk(sk);
+-
+- if (nlk->cb_running) {
+- if (nlk->cb.done)
+- nlk->cb.done(&nlk->cb);
+- module_put(nlk->cb.module);
+- kfree_skb(nlk->cb.skb);
+- }
+-
+ skb_queue_purge(&sk->sk_receive_queue);
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+@@ -414,14 +405,6 @@ static void netlink_sock_destruct(struct sock *sk)
+ WARN_ON(nlk_sk(sk)->groups);
+ }
+
+-static void netlink_sock_destruct_work(struct work_struct *work)
+-{
+- struct netlink_sock *nlk = container_of(work, struct netlink_sock,
+- work);
+-
+- sk_free(&nlk->sk);
+-}
+-
+ /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
+ * SMP. Look, when several writers sleep and reader wakes them up, all but one
+ * immediately hit write lock and grab all the cpus. Exclusive sleep solves
+@@ -738,12 +721,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
+ if (!refcount_dec_and_test(&sk->sk_refcnt))
+ return;
+
+- if (nlk->cb_running && nlk->cb.done) {
+- INIT_WORK(&nlk->work, netlink_sock_destruct_work);
+- schedule_work(&nlk->work);
+- return;
+- }
+-
+ sk_free(sk);
+ }
+
+@@ -793,6 +770,14 @@ static int netlink_release(struct socket *sock)
+ NETLINK_URELEASE, &n);
+ }
+
++ /* Terminate any outstanding dump */
++ if (nlk->cb_running) {
++ if (nlk->cb.done)
++ nlk->cb.done(&nlk->cb);
++ module_put(nlk->cb.module);
++ kfree_skb(nlk->cb.skb);
++ }
++
+ module_put(nlk->module);
+
+ if (netlink_is_kernel(sk)) {
+diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
+index 962de7b3c023d..c61e634693eaa 100644
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -4,7 +4,6 @@
+
+ #include <linux/rhashtable.h>
+ #include <linux/atomic.h>
+-#include <linux/workqueue.h>
+ #include <net/sock.h>
+
+ /* flags */
+@@ -45,7 +44,6 @@ struct netlink_sock {
+
+ struct rhash_head node;
+ struct rcu_head rcu;
+- struct work_struct work;
+ };
+
+ static inline struct netlink_sock *nlk_sk(struct sock *sk)
+--
+2.43.0
+