Fixes for 4.19

author Sasha Levin <sashal@kernel.org>

Sun, 17 Nov 2024 14:35:21 +0000 (09:35 -0500)

committer Sasha Levin <sashal@kernel.org>

Sun, 17 Nov 2024 14:35:21 +0000 (09:35 -0500)
author Sasha Levin <sashal@kernel.org>
Sun, 17 Nov 2024 14:35:21 +0000 (09:35 -0500)
committer Sasha Levin <sashal@kernel.org>
Sun, 17 Nov 2024 14:35:21 +0000 (09:35 -0500)
diff --git a/queue-4.19/netlink-terminate-outstanding-dump-on-socket-close.patch b/queue-4.19/netlink-terminate-outstanding-dump-on-socket-close.patch

new file mode 100644 (file)

index 0000000..765e166
--- /dev/null
+++ b/queue-4.19/netlink-terminate-outstanding-dump-on-socket-close.patch
@@ -0,0 +1,142 @@
+From 2cef01bde92c5bcec824604bdec23da7a03f24b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Nov 2024 17:52:34 -0800
+Subject: netlink: terminate outstanding dump on socket close
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 1904fb9ebf911441f90a68e96b22aa73e4410505 ]
+
+Netlink supports iterative dumping of data. It provides the families
+the following ops:
+ - start - (optional) kicks off the dumping process
+ - dump  - actual dump helper, keeps getting called until it returns 0
+ - done  - (optional) pairs with .start, can be used for cleanup
+The whole process is asynchronous and the repeated calls to .dump
+don't actually happen in a tight loop, but rather are triggered
+in response to recvmsg() on the socket.
+
+This gives the user full control over the dump, but also means that
+the user can close the socket without getting to the end of the dump.
+To make sure .start is always paired with .done we check if there
+is an ongoing dump before freeing the socket, and if so call .done.
+
+The complication is that sockets can get freed from BH and .done
+is allowed to sleep. So we use a workqueue to defer the call, when
+needed.
+
+Unfortunately this does not work correctly. What we defer is not
+the cleanup but rather releasing a reference on the socket.
+We have no guarantee that we own the last reference, if someone
+else holds the socket they may release it in BH and we're back
+to square one.
+
+The whole dance, however, appears to be unnecessary. Only the user
+can interact with dumps, so we can clean up when socket is closed.
+And close always happens in process context. Some async code may
+still access the socket after close, queue notification skbs to it etc.
+but no dumps can start, end or otherwise make progress.
+
+Delete the workqueue and flush the dump state directly from the release
+handler. Note that further cleanup is possible in -next, for instance
+we now always call .done before releasing the main module reference,
+so dump doesn't have to take a reference of its own.
+
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Fixes: ed5d7788a934 ("netlink: Do not schedule work from sk_destruct")
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241106015235.2458807-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netlink/af_netlink.c | 31 ++++++++-----------------------
+ net/netlink/af_netlink.h |  2 --
+ 2 files changed, 8 insertions(+), 25 deletions(-)
+
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index 2e26ecb46707c..8c397697e47fa 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -393,15 +393,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+ 
+ static void netlink_sock_destruct(struct sock *sk)
+ {
+-      struct netlink_sock *nlk = nlk_sk(sk);
+-
+-      if (nlk->cb_running) {
+-              if (nlk->cb.done)
+-                      nlk->cb.done(&nlk->cb);
+-              module_put(nlk->cb.module);
+-              kfree_skb(nlk->cb.skb);
+-      }
+-
+       skb_queue_purge(&sk->sk_receive_queue);
+ 
+       if (!sock_flag(sk, SOCK_DEAD)) {
+@@ -414,14 +405,6 @@ static void netlink_sock_destruct(struct sock *sk)
+       WARN_ON(nlk_sk(sk)->groups);
+ }
+ 
+-static void netlink_sock_destruct_work(struct work_struct *work)
+-{
+-      struct netlink_sock *nlk = container_of(work, struct netlink_sock,
+-                                              work);
+-
+-      sk_free(&nlk->sk);
+-}
+-
+ /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
+  * SMP. Look, when several writers sleep and reader wakes them up, all but one
+  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
+@@ -738,12 +721,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
+       if (!refcount_dec_and_test(&sk->sk_refcnt))
+               return;
+ 
+-      if (nlk->cb_running && nlk->cb.done) {
+-              INIT_WORK(&nlk->work, netlink_sock_destruct_work);
+-              schedule_work(&nlk->work);
+-              return;
+-      }
+-
+       sk_free(sk);
+ }
+ 
+@@ -793,6 +770,14 @@ static int netlink_release(struct socket *sock)
+                               NETLINK_URELEASE, &n);
+       }
+ 
++      /* Terminate any outstanding dump */
++      if (nlk->cb_running) {
++              if (nlk->cb.done)
++                      nlk->cb.done(&nlk->cb);
++              module_put(nlk->cb.module);
++              kfree_skb(nlk->cb.skb);
++      }
++
+       module_put(nlk->module);
+ 
+       if (netlink_is_kernel(sk)) {
+diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
+index 962de7b3c023d..c61e634693eaa 100644
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -4,7 +4,6 @@
+ 
+ #include <linux/rhashtable.h>
+ #include <linux/atomic.h>
+-#include <linux/workqueue.h>
+ #include <net/sock.h>
+ 
+ /* flags */
+@@ -45,7 +44,6 @@ struct netlink_sock {
+ 
+       struct rhash_head       node;
+       struct rcu_head         rcu;
+-      struct work_struct      work;
+ };
+ 
+ static inline struct netlink_sock *nlk_sk(struct sock *sk)
+-- 
+2.43.0
+
diff --git a/queue-4.19/series b/queue-4.19/series

new file mode 100644 (file)

index 0000000..a8381bd
--- /dev/null
+++ b/queue-4.19/series
@@ -0,0 +1 @@
+netlink-terminate-outstanding-dump-on-socket-close.patch
author	Sasha Levin <sashal@kernel.org>
	Sun, 17 Nov 2024 14:35:21 +0000 (09:35 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Sun, 17 Nov 2024 14:35:21 +0000 (09:35 -0500)
queue-4.19/netlink-terminate-outstanding-dump-on-socket-close.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series	[new file with mode: 0644]	patch \| blob