]>
Commit | Line | Data |
---|---|---|
ffc1c2fe SL |
1 | From a1fdf0e0f4f97b1fbabc2fa920b1866b3fd5f65d Mon Sep 17 00:00:00 2001 |
2 | From: Sasha Levin <sashal@kernel.org> | |
3 | Date: Fri, 22 Mar 2024 13:57:32 +0000 | |
4 | Subject: tcp: properly terminate timers for kernel sockets | |
5 | ||
6 | From: Eric Dumazet <edumazet@google.com> | |
7 | ||
8 | [ Upstream commit 151c9c724d05d5b0dd8acd3e11cb69ef1f2dbada ] | |
9 | ||
10 | We had various syzbot reports about tcp timers firing after | |
11 | the corresponding netns has been dismantled. | |
12 | ||
13 | Fortunately Josef Bacik could trigger the issue more often, | |
14 | and could test a patch I wrote two years ago. | |
15 | ||
16 | When TCP sockets are closed, we call inet_csk_clear_xmit_timers() | |
17 | to 'stop' the timers. | |
18 | ||
19 | inet_csk_clear_xmit_timers() can be called from any context, | |
20 | including when socket lock is held. | |
21 | This is the reason it uses sk_stop_timer(), aka del_timer(). | |
22 | This means that ongoing timers might finish much later. | |
23 | ||
24 | For user sockets, this is fine because each running timer | |
25 | holds a reference on the socket, and the user socket holds | |
26 | a reference on the netns. | |
27 | ||
28 | For kernel sockets, we risk that the netns is freed before | |
29 | timer can complete, because kernel sockets do not hold | |
30 | reference on the netns. | |
31 | ||
32 | This patch adds inet_csk_clear_xmit_timers_sync() function | |
33 | that using sk_stop_timer_sync() to make sure all timers | |
34 | are terminated before the kernel socket is released. | |
35 | Modules using kernel sockets close them in their netns exit() | |
36 | handler. | |
37 | ||
38 | Also add sock_not_owned_by_me() helper to get LOCKDEP | |
39 | support : inet_csk_clear_xmit_timers_sync() must not be called | |
40 | while socket lock is held. | |
41 | ||
42 | It is very possible we can revert in the future commit | |
43 | 3a58f13a881e ("net: rds: acquire refcount on TCP sockets") | |
44 | which attempted to solve the issue in rds only. | |
45 | (net/smc/af_smc.c and net/mptcp/subflow.c have similar code) | |
46 | ||
47 | We probably can remove the check_net() tests from | |
48 | tcp_out_of_resources() and __tcp_close() in the future. | |
49 | ||
50 | Reported-by: Josef Bacik <josef@toxicpanda.com> | |
51 | Closes: https://lore.kernel.org/netdev/20240314210740.GA2823176@perftesting/ | |
52 | Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") | |
53 | Fixes: 8a68173691f0 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket") | |
54 | Link: https://lore.kernel.org/bpf/CANn89i+484ffqb93aQm1N-tjxxvb3WDKX0EbD7318RwRgsatjw@mail.gmail.com/ | |
55 | Signed-off-by: Eric Dumazet <edumazet@google.com> | |
56 | Tested-by: Josef Bacik <josef@toxicpanda.com> | |
57 | Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> | |
58 | Link: https://lore.kernel.org/r/20240322135732.1535772-1-edumazet@google.com | |
59 | Signed-off-by: Jakub Kicinski <kuba@kernel.org> | |
60 | Signed-off-by: Sasha Levin <sashal@kernel.org> | |
61 | --- | |
62 | include/net/inet_connection_sock.h | 1 + | |
63 | include/net/sock.h | 7 +++++++ | |
64 | net/ipv4/inet_connection_sock.c | 14 ++++++++++++++ | |
65 | net/ipv4/tcp.c | 2 ++ | |
66 | 4 files changed, 24 insertions(+) | |
67 | ||
68 | diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h | |
69 | index 01a73bf74fa19..6ecac01115d9c 100644 | |
70 | --- a/include/net/inet_connection_sock.h | |
71 | +++ b/include/net/inet_connection_sock.h | |
72 | @@ -173,6 +173,7 @@ void inet_csk_init_xmit_timers(struct sock *sk, | |
73 | void (*delack_handler)(struct timer_list *), | |
74 | void (*keepalive_handler)(struct timer_list *)); | |
75 | void inet_csk_clear_xmit_timers(struct sock *sk); | |
76 | +void inet_csk_clear_xmit_timers_sync(struct sock *sk); | |
77 | ||
78 | static inline void inet_csk_schedule_ack(struct sock *sk) | |
79 | { | |
80 | diff --git a/include/net/sock.h b/include/net/sock.h | |
81 | index e70c903b04f30..25780942ec8bf 100644 | |
82 | --- a/include/net/sock.h | |
83 | +++ b/include/net/sock.h | |
84 | @@ -1808,6 +1808,13 @@ static inline void sock_owned_by_me(const struct sock *sk) | |
85 | #endif | |
86 | } | |
87 | ||
88 | +static inline void sock_not_owned_by_me(const struct sock *sk) | |
89 | +{ | |
90 | +#ifdef CONFIG_LOCKDEP | |
91 | + WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks); | |
92 | +#endif | |
93 | +} | |
94 | + | |
95 | static inline bool sock_owned_by_user(const struct sock *sk) | |
96 | { | |
97 | sock_owned_by_me(sk); | |
98 | diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c | |
99 | index 762817d6c8d70..a587cb6be807c 100644 | |
100 | --- a/net/ipv4/inet_connection_sock.c | |
101 | +++ b/net/ipv4/inet_connection_sock.c | |
102 | @@ -774,6 +774,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk) | |
103 | } | |
104 | EXPORT_SYMBOL(inet_csk_clear_xmit_timers); | |
105 | ||
106 | +void inet_csk_clear_xmit_timers_sync(struct sock *sk) | |
107 | +{ | |
108 | + struct inet_connection_sock *icsk = inet_csk(sk); | |
109 | + | |
110 | + /* ongoing timer handlers need to acquire socket lock. */ | |
111 | + sock_not_owned_by_me(sk); | |
112 | + | |
113 | + icsk->icsk_pending = icsk->icsk_ack.pending = 0; | |
114 | + | |
115 | + sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); | |
116 | + sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); | |
117 | + sk_stop_timer_sync(sk, &sk->sk_timer); | |
118 | +} | |
119 | + | |
120 | void inet_csk_delete_keepalive_timer(struct sock *sk) | |
121 | { | |
122 | sk_stop_timer(sk, &sk->sk_timer); | |
123 | diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c | |
124 | index 68bb8d6bcc113..f8df35f7352a5 100644 | |
125 | --- a/net/ipv4/tcp.c | |
126 | +++ b/net/ipv4/tcp.c | |
127 | @@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout) | |
128 | lock_sock(sk); | |
129 | __tcp_close(sk, timeout); | |
130 | release_sock(sk); | |
131 | + if (!sk->sk_net_refcnt) | |
132 | + inet_csk_clear_xmit_timers_sync(sk); | |
133 | sock_put(sk); | |
134 | } | |
135 | EXPORT_SYMBOL(tcp_close); | |
136 | -- | |
137 | 2.43.0 | |
138 |