]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-6.6/tcp-properly-terminate-timers-for-kernel-sockets.patch
4da92a2b3fe0f859f7d895ea98087e4a9933c2e8
[thirdparty/kernel/stable-queue.git] / queue-6.6 / tcp-properly-terminate-timers-for-kernel-sockets.patch
1 From a1fdf0e0f4f97b1fbabc2fa920b1866b3fd5f65d Mon Sep 17 00:00:00 2001
2 From: Sasha Levin <sashal@kernel.org>
3 Date: Fri, 22 Mar 2024 13:57:32 +0000
4 Subject: tcp: properly terminate timers for kernel sockets
5
6 From: Eric Dumazet <edumazet@google.com>
7
8 [ Upstream commit 151c9c724d05d5b0dd8acd3e11cb69ef1f2dbada ]
9
10 We had various syzbot reports about tcp timers firing after
11 the corresponding netns has been dismantled.
12
13 Fortunately Josef Bacik could trigger the issue more often,
14 and could test a patch I wrote two years ago.
15
16 When TCP sockets are closed, we call inet_csk_clear_xmit_timers()
17 to 'stop' the timers.
18
19 inet_csk_clear_xmit_timers() can be called from any context,
20 including when socket lock is held.
21 This is the reason it uses sk_stop_timer(), aka del_timer().
22 This means that ongoing timers might finish much later.
23
24 For user sockets, this is fine because each running timer
25 holds a reference on the socket, and the user socket holds
26 a reference on the netns.
27
28 For kernel sockets, we risk that the netns is freed before
29 timer can complete, because kernel sockets do not hold
30 reference on the netns.
31
32 This patch adds inet_csk_clear_xmit_timers_sync() function
33 that using sk_stop_timer_sync() to make sure all timers
34 are terminated before the kernel socket is released.
35 Modules using kernel sockets close them in their netns exit()
36 handler.
37
38 Also add sock_not_owned_by_me() helper to get LOCKDEP
39 support : inet_csk_clear_xmit_timers_sync() must not be called
40 while socket lock is held.
41
42 It is very possible we can revert in the future commit
43 3a58f13a881e ("net: rds: acquire refcount on TCP sockets")
44 which attempted to solve the issue in rds only.
45 (net/smc/af_smc.c and net/mptcp/subflow.c have similar code)
46
47 We probably can remove the check_net() tests from
48 tcp_out_of_resources() and __tcp_close() in the future.
49
50 Reported-by: Josef Bacik <josef@toxicpanda.com>
51 Closes: https://lore.kernel.org/netdev/20240314210740.GA2823176@perftesting/
52 Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.")
53 Fixes: 8a68173691f0 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket")
54 Link: https://lore.kernel.org/bpf/CANn89i+484ffqb93aQm1N-tjxxvb3WDKX0EbD7318RwRgsatjw@mail.gmail.com/
55 Signed-off-by: Eric Dumazet <edumazet@google.com>
56 Tested-by: Josef Bacik <josef@toxicpanda.com>
57 Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
58 Link: https://lore.kernel.org/r/20240322135732.1535772-1-edumazet@google.com
59 Signed-off-by: Jakub Kicinski <kuba@kernel.org>
60 Signed-off-by: Sasha Levin <sashal@kernel.org>
61 ---
62 include/net/inet_connection_sock.h | 1 +
63 include/net/sock.h | 7 +++++++
64 net/ipv4/inet_connection_sock.c | 14 ++++++++++++++
65 net/ipv4/tcp.c | 2 ++
66 4 files changed, 24 insertions(+)
67
68 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
69 index 01a73bf74fa19..6ecac01115d9c 100644
70 --- a/include/net/inet_connection_sock.h
71 +++ b/include/net/inet_connection_sock.h
72 @@ -173,6 +173,7 @@ void inet_csk_init_xmit_timers(struct sock *sk,
73 void (*delack_handler)(struct timer_list *),
74 void (*keepalive_handler)(struct timer_list *));
75 void inet_csk_clear_xmit_timers(struct sock *sk);
76 +void inet_csk_clear_xmit_timers_sync(struct sock *sk);
77
78 static inline void inet_csk_schedule_ack(struct sock *sk)
79 {
80 diff --git a/include/net/sock.h b/include/net/sock.h
81 index e70c903b04f30..25780942ec8bf 100644
82 --- a/include/net/sock.h
83 +++ b/include/net/sock.h
84 @@ -1808,6 +1808,13 @@ static inline void sock_owned_by_me(const struct sock *sk)
85 #endif
86 }
87
88 +static inline void sock_not_owned_by_me(const struct sock *sk)
89 +{
90 +#ifdef CONFIG_LOCKDEP
91 + WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
92 +#endif
93 +}
94 +
95 static inline bool sock_owned_by_user(const struct sock *sk)
96 {
97 sock_owned_by_me(sk);
98 diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
99 index 762817d6c8d70..a587cb6be807c 100644
100 --- a/net/ipv4/inet_connection_sock.c
101 +++ b/net/ipv4/inet_connection_sock.c
102 @@ -774,6 +774,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
103 }
104 EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
105
106 +void inet_csk_clear_xmit_timers_sync(struct sock *sk)
107 +{
108 + struct inet_connection_sock *icsk = inet_csk(sk);
109 +
110 + /* ongoing timer handlers need to acquire socket lock. */
111 + sock_not_owned_by_me(sk);
112 +
113 + icsk->icsk_pending = icsk->icsk_ack.pending = 0;
114 +
115 + sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
116 + sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
117 + sk_stop_timer_sync(sk, &sk->sk_timer);
118 +}
119 +
120 void inet_csk_delete_keepalive_timer(struct sock *sk)
121 {
122 sk_stop_timer(sk, &sk->sk_timer);
123 diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
124 index 68bb8d6bcc113..f8df35f7352a5 100644
125 --- a/net/ipv4/tcp.c
126 +++ b/net/ipv4/tcp.c
127 @@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout)
128 lock_sock(sk);
129 __tcp_close(sk, timeout);
130 release_sock(sk);
131 + if (!sk->sk_net_refcnt)
132 + inet_csk_clear_xmit_timers_sync(sk);
133 sock_put(sk);
134 }
135 EXPORT_SYMBOL(tcp_close);
136 --
137 2.43.0
138