]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/6.6.26/tcp-properly-terminate-timers-for-kernel-sockets.patch
Linux 6.6.26
[thirdparty/kernel/stable-queue.git] / releases / 6.6.26 / tcp-properly-terminate-timers-for-kernel-sockets.patch
CommitLineData
ffc1c2fe
SL
1From a1fdf0e0f4f97b1fbabc2fa920b1866b3fd5f65d Mon Sep 17 00:00:00 2001
2From: Sasha Levin <sashal@kernel.org>
3Date: Fri, 22 Mar 2024 13:57:32 +0000
4Subject: tcp: properly terminate timers for kernel sockets
5
6From: Eric Dumazet <edumazet@google.com>
7
8[ Upstream commit 151c9c724d05d5b0dd8acd3e11cb69ef1f2dbada ]
9
10We had various syzbot reports about tcp timers firing after
11the corresponding netns has been dismantled.
12
13Fortunately Josef Bacik could trigger the issue more often,
14and could test a patch I wrote two years ago.
15
16When TCP sockets are closed, we call inet_csk_clear_xmit_timers()
17to 'stop' the timers.
18
19inet_csk_clear_xmit_timers() can be called from any context,
20including when socket lock is held.
21This is the reason it uses sk_stop_timer(), aka del_timer().
22This means that ongoing timers might finish much later.
23
24For user sockets, this is fine because each running timer
25holds a reference on the socket, and the user socket holds
26a reference on the netns.
27
28For kernel sockets, we risk that the netns is freed before
29timer can complete, because kernel sockets do not hold
30reference on the netns.
31
32This patch adds inet_csk_clear_xmit_timers_sync() function
33that using sk_stop_timer_sync() to make sure all timers
34are terminated before the kernel socket is released.
35Modules using kernel sockets close them in their netns exit()
36handler.
37
38Also add sock_not_owned_by_me() helper to get LOCKDEP
39support : inet_csk_clear_xmit_timers_sync() must not be called
40while socket lock is held.
41
42It is very possible we can revert in the future commit
433a58f13a881e ("net: rds: acquire refcount on TCP sockets")
44which attempted to solve the issue in rds only.
45(net/smc/af_smc.c and net/mptcp/subflow.c have similar code)
46
47We probably can remove the check_net() tests from
48tcp_out_of_resources() and __tcp_close() in the future.
49
50Reported-by: Josef Bacik <josef@toxicpanda.com>
51Closes: https://lore.kernel.org/netdev/20240314210740.GA2823176@perftesting/
52Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.")
53Fixes: 8a68173691f0 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket")
54Link: https://lore.kernel.org/bpf/CANn89i+484ffqb93aQm1N-tjxxvb3WDKX0EbD7318RwRgsatjw@mail.gmail.com/
55Signed-off-by: Eric Dumazet <edumazet@google.com>
56Tested-by: Josef Bacik <josef@toxicpanda.com>
57Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
58Link: https://lore.kernel.org/r/20240322135732.1535772-1-edumazet@google.com
59Signed-off-by: Jakub Kicinski <kuba@kernel.org>
60Signed-off-by: Sasha Levin <sashal@kernel.org>
61---
62 include/net/inet_connection_sock.h | 1 +
63 include/net/sock.h | 7 +++++++
64 net/ipv4/inet_connection_sock.c | 14 ++++++++++++++
65 net/ipv4/tcp.c | 2 ++
66 4 files changed, 24 insertions(+)
67
68diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
69index 01a73bf74fa19..6ecac01115d9c 100644
70--- a/include/net/inet_connection_sock.h
71+++ b/include/net/inet_connection_sock.h
72@@ -173,6 +173,7 @@ void inet_csk_init_xmit_timers(struct sock *sk,
73 void (*delack_handler)(struct timer_list *),
74 void (*keepalive_handler)(struct timer_list *));
75 void inet_csk_clear_xmit_timers(struct sock *sk);
76+void inet_csk_clear_xmit_timers_sync(struct sock *sk);
77
78 static inline void inet_csk_schedule_ack(struct sock *sk)
79 {
80diff --git a/include/net/sock.h b/include/net/sock.h
81index e70c903b04f30..25780942ec8bf 100644
82--- a/include/net/sock.h
83+++ b/include/net/sock.h
84@@ -1808,6 +1808,13 @@ static inline void sock_owned_by_me(const struct sock *sk)
85 #endif
86 }
87
88+static inline void sock_not_owned_by_me(const struct sock *sk)
89+{
90+#ifdef CONFIG_LOCKDEP
91+ WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
92+#endif
93+}
94+
95 static inline bool sock_owned_by_user(const struct sock *sk)
96 {
97 sock_owned_by_me(sk);
98diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
99index 762817d6c8d70..a587cb6be807c 100644
100--- a/net/ipv4/inet_connection_sock.c
101+++ b/net/ipv4/inet_connection_sock.c
102@@ -774,6 +774,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
103 }
104 EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
105
106+void inet_csk_clear_xmit_timers_sync(struct sock *sk)
107+{
108+ struct inet_connection_sock *icsk = inet_csk(sk);
109+
110+ /* ongoing timer handlers need to acquire socket lock. */
111+ sock_not_owned_by_me(sk);
112+
113+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
114+
115+ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
116+ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
117+ sk_stop_timer_sync(sk, &sk->sk_timer);
118+}
119+
120 void inet_csk_delete_keepalive_timer(struct sock *sk)
121 {
122 sk_stop_timer(sk, &sk->sk_timer);
123diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
124index 68bb8d6bcc113..f8df35f7352a5 100644
125--- a/net/ipv4/tcp.c
126+++ b/net/ipv4/tcp.c
127@@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout)
128 lock_sock(sk);
129 __tcp_close(sk, timeout);
130 release_sock(sk);
131+ if (!sk->sk_net_refcnt)
132+ inet_csk_clear_xmit_timers_sync(sk);
133 sock_put(sk);
134 }
135 EXPORT_SYMBOL(tcp_close);
136--
1372.43.0
138