]>
Commit | Line | Data |
---|---|---|
fa55523a GKH |
1 | From foo@baz Thu Dec 14 11:45:40 CET 2017 |
2 | From: Eric Dumazet <edumazet@google.com> | |
3 | Date: Fri, 1 Dec 2017 10:06:56 -0800 | |
4 | Subject: tcp/dccp: block bh before arming time_wait timer | |
5 | ||
6 | From: Eric Dumazet <edumazet@google.com> | |
7 | ||
8 | ||
9 | [ Upstream commit cfac7f836a715b91f08c851df915d401a4d52783 ] | |
10 | ||
11 | Maciej Żenczykowski reported some panics in tcp_twsk_destructor() | |
12 | that might be caused by the following bug. | |
13 | ||
14 | timewait timer is pinned to the cpu, because we want to transition | |
15 | timwewait refcount from 0 to 4 in one go, once everything has been | |
16 | initialized. | |
17 | ||
18 | At the time commit ed2e92394589 ("tcp/dccp: fix timewait races in timer | |
19 | handling") was merged, TCP was always running from BH habdler. | |
20 | ||
21 | After commit 5413d1babe8f ("net: do not block BH while processing | |
22 | socket backlog") we definitely can run tcp_time_wait() from process | |
23 | context. | |
24 | ||
25 | We need to block BH in the critical section so that the pinned timer | |
26 | has still its purpose. | |
27 | ||
28 | This bug is more likely to happen under stress and when very small RTO | |
29 | are used in datacenter flows. | |
30 | ||
31 | Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog") | |
32 | Signed-off-by: Eric Dumazet <edumazet@google.com> | |
33 | Reported-by: Maciej Żenczykowski <maze@google.com> | |
34 | Acked-by: Maciej Żenczykowski <maze@google.com> | |
35 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
36 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
37 | --- | |
38 | net/dccp/minisocks.c | 6 ++++++ | |
39 | net/ipv4/tcp_minisocks.c | 6 ++++++ | |
40 | 2 files changed, 12 insertions(+) | |
41 | ||
42 | --- a/net/dccp/minisocks.c | |
43 | +++ b/net/dccp/minisocks.c | |
44 | @@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int | |
45 | if (state == DCCP_TIME_WAIT) | |
46 | timeo = DCCP_TIMEWAIT_LEN; | |
47 | ||
48 | + /* tw_timer is pinned, so we need to make sure BH are disabled | |
49 | + * in following section, otherwise timer handler could run before | |
50 | + * we complete the initialization. | |
51 | + */ | |
52 | + local_bh_disable(); | |
53 | inet_twsk_schedule(tw, timeo); | |
54 | /* Linkage updates. */ | |
55 | __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); | |
56 | inet_twsk_put(tw); | |
57 | + local_bh_enable(); | |
58 | } else { | |
59 | /* Sorry, if we're out of memory, just CLOSE this | |
60 | * socket up. We've got bigger problems than | |
61 | --- a/net/ipv4/tcp_minisocks.c | |
62 | +++ b/net/ipv4/tcp_minisocks.c | |
63 | @@ -312,10 +312,16 @@ void tcp_time_wait(struct sock *sk, int | |
64 | if (state == TCP_TIME_WAIT) | |
65 | timeo = TCP_TIMEWAIT_LEN; | |
66 | ||
67 | + /* tw_timer is pinned, so we need to make sure BH are disabled | |
68 | + * in following section, otherwise timer handler could run before | |
69 | + * we complete the initialization. | |
70 | + */ | |
71 | + local_bh_disable(); | |
72 | inet_twsk_schedule(tw, timeo); | |
73 | /* Linkage updates. */ | |
74 | __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); | |
75 | inet_twsk_put(tw); | |
76 | + local_bh_enable(); | |
77 | } else { | |
78 | /* Sorry, if we're out of memory, just CLOSE this | |
79 | * socket up. We've got bigger problems than |