]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-4.19/netfilter-conntrack-tcp-only-close-if-rst-matches-ex.patch
07b7f5b38cb13168703b517b5767e2c8401f39aa
[thirdparty/kernel/stable-queue.git] / queue-4.19 / netfilter-conntrack-tcp-only-close-if-rst-matches-ex.patch
1 From 41934b967f9122547f0b18e59c825520e8472667 Mon Sep 17 00:00:00 2001
2 From: Florian Westphal <fw@strlen.de>
3 Date: Thu, 21 Feb 2019 17:09:31 +0100
4 Subject: netfilter: conntrack: tcp: only close if RST matches exact sequence
5
6 [ Upstream commit be0502a3f2e94211a8809a09ecbc3a017189b8fb ]
7
8 TCP resets cause instant transition from established to closed state
9 provided the reset is in-window. Endpoints that implement RFC 5961
10 require resets to match the next expected sequence number.
11 RST segments that are in-window (but that do not match RCV.NXT) are
12 ignored, and a "challenge ACK" is sent back.
13
14 Main problem for conntrack is that its a middlebox, i.e. whereas an end
15 host might have ACK'd SEQ (and would thus accept an RST with this
16 sequence number), conntrack might not have seen this ACK (yet).
17
18 Therefore we can't simply flag RSTs with non-exact match as invalid.
19
20 This updates RST processing as follows:
21
22 1. If the connection is in a state other than ESTABLISHED, nothing is
23 changed, RST is subject to normal in-window check.
24
25 2. If the RSTs sequence number either matches exactly RCV.NXT,
26 connection state moves to CLOSE.
27
28 3. The same applies if the RST sequence number aligns with a previous
29 packet in the same direction.
30
31 In all other cases, the connection remains in ESTABLISHED state.
32 If the normal-in-window check passes, the timeout will be lowered
33 to that of CLOSE.
34
35 If the peer sends a challenge ack, connection timeout will be reset.
36
37 If the challenge ACK triggers another RST (RST was valid after all),
38 this 2nd RST will match expected sequence and conntrack state changes to
39 CLOSE.
40
41 If no challenge ACK is received, the connection will time out after
42 CLOSE seconds (10 seconds by default), just like without this patch.
43
44 Packetdrill test case:
45
46 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
47 0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
48 0.000 bind(3, ..., ...) = 0
49 0.000 listen(3, 1) = 0
50
51 0.100 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
52 0.100 > S. 0:0(0) ack 1 win 64240 <mss 1460,nop,nop,sackOK,nop,wscale 7>
53 0.200 < . 1:1(0) ack 1 win 257
54 0.200 accept(3, ..., ...) = 4
55
56 // Receive a segment.
57 0.210 < P. 1:1001(1000) ack 1 win 46
58 0.210 > . 1:1(0) ack 1001
59
60 // Application writes 1000 bytes.
61 0.250 write(4, ..., 1000) = 1000
62 0.250 > P. 1:1001(1000) ack 1001
63
64 // First reset, old sequence. Conntrack (correctly) considers this
65 // invalid due to failed window validation (regardless of this patch).
66 0.260 < R 2:2(0) ack 1001 win 260
67
68 // 2nd reset, but too far ahead sequence. Same: correctly handled
69 // as invalid.
70 0.270 < R 99990001:99990001(0) ack 1001 win 260
71
72 // in-window, but not exact sequence.
73 // Current Linux kernels might reply with a challenge ack, and do not
74 // remove connection.
75 // Without this patch, conntrack state moves to CLOSE.
76 // With patch, timeout is lowered like CLOSE, but connection stays
77 // in ESTABLISHED state.
78 0.280 < R 1010:1010(0) ack 1001 win 260
79
80 // Expect challenge ACK
81 0.281 > . 1001:1001(0) ack 1001 win 501
82
83 // With or without this patch, RST will cause connection
84 // to move to CLOSE (sequence number matches)
85 // 0.282 < R 1001:1001(0) ack 1001 win 260
86
87 // ACK
88 0.300 < . 1001:1001(0) ack 1001 win 257
89
90 // more data could be exchanged here, connection
91 // is still established
92
93 // Client closes the connection.
94 0.610 < F. 1001:1001(0) ack 1001 win 260
95 0.650 > . 1001:1001(0) ack 1002
96
97 // Close the connection without reading outstanding data
98 0.700 close(4) = 0
99
100 // so one more reset. Will be deemed acceptable with patch as well:
101 // connection is already closing.
102 0.701 > R. 1001:1001(0) ack 1002 win 501
103 // End packetdrill test case.
104
105 With patch, this generates following conntrack events:
106 [NEW] 120 SYN_SENT src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80 [UNREPLIED]
107 [UPDATE] 60 SYN_RECV src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80
108 [UPDATE] 432000 ESTABLISHED src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80 [ASSURED]
109 [UPDATE] 120 FIN_WAIT src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80 [ASSURED]
110 [UPDATE] 60 CLOSE_WAIT src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80 [ASSURED]
111 [UPDATE] 10 CLOSE src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80 [ASSURED]
112
113 Without patch, first RST moves connection to close, whereas socket state
114 does not change until FIN is received.
115 [NEW] 120 SYN_SENT src=10.0.2.1 dst=10.0.0.1 sport=5141 dport=80 [UNREPLIED]
116 [UPDATE] 60 SYN_RECV src=10.0.2.1 dst=10.0.0.1 sport=5141 dport=80
117 [UPDATE] 432000 ESTABLISHED src=10.0.2.1 dst=10.0.0.1 sport=5141 dport=80 [ASSURED]
118 [UPDATE] 10 CLOSE src=10.0.2.1 dst=10.0.0.1 sport=5141 dport=80 [ASSURED]
119
120 Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
121 Signed-off-by: Florian Westphal <fw@strlen.de>
122 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
123 Signed-off-by: Sasha Levin <sashal@kernel.org>
124 ---
125 net/netfilter/nf_conntrack_proto_tcp.c | 50 ++++++++++++++++++++------
126 1 file changed, 40 insertions(+), 10 deletions(-)
127
128 diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
129 index 247b89784a6f..842f3f86fb2e 100644
130 --- a/net/netfilter/nf_conntrack_proto_tcp.c
131 +++ b/net/netfilter/nf_conntrack_proto_tcp.c
132 @@ -769,6 +769,12 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
133 return NF_ACCEPT;
134 }
135
136 +static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
137 +{
138 + return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
139 + test_bit(IPS_ASSURED_BIT, &ct->status);
140 +}
141 +
142 /* Returns verdict for packet, or -1 for invalid. */
143 static int tcp_packet(struct nf_conn *ct,
144 const struct sk_buff *skb,
145 @@ -963,16 +969,38 @@ static int tcp_packet(struct nf_conn *ct,
146 new_state = TCP_CONNTRACK_ESTABLISHED;
147 break;
148 case TCP_CONNTRACK_CLOSE:
149 - if (index == TCP_RST_SET
150 - && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
151 - && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
152 - /* Invalid RST */
153 - spin_unlock_bh(&ct->lock);
154 - nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
155 - return -NF_ACCEPT;
156 + if (index != TCP_RST_SET)
157 + break;
158 +
159 + if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
160 + u32 seq = ntohl(th->seq);
161 +
162 + if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
163 + /* Invalid RST */
164 + spin_unlock_bh(&ct->lock);
165 + nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
166 + return -NF_ACCEPT;
167 + }
168 +
169 + if (!nf_conntrack_tcp_established(ct) ||
170 + seq == ct->proto.tcp.seen[!dir].td_maxack)
171 + break;
172 +
173 + /* Check if rst is part of train, such as
174 + * foo:80 > bar:4379: P, 235946583:235946602(19) ack 42
175 + * foo:80 > bar:4379: R, 235946602:235946602(0) ack 42
176 + */
177 + if (ct->proto.tcp.last_index == TCP_ACK_SET &&
178 + ct->proto.tcp.last_dir == dir &&
179 + seq == ct->proto.tcp.last_end)
180 + break;
181 +
182 + /* ... RST sequence number doesn't match exactly, keep
183 + * established state to allow a possible challenge ACK.
184 + */
185 + new_state = old_state;
186 }
187 - if (index == TCP_RST_SET
188 - && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
189 + if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
190 && ct->proto.tcp.last_index == TCP_SYN_SET)
191 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
192 && ct->proto.tcp.last_index == TCP_ACK_SET))
193 @@ -988,7 +1016,7 @@ static int tcp_packet(struct nf_conn *ct,
194 * segments we ignored. */
195 goto in_window;
196 }
197 - /* Just fall through */
198 + break;
199 default:
200 /* Keep compilers happy. */
201 break;
202 @@ -1023,6 +1051,8 @@ static int tcp_packet(struct nf_conn *ct,
203 if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
204 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
205 timeout = timeouts[TCP_CONNTRACK_RETRANS];
206 + else if (unlikely(index == TCP_RST_SET))
207 + timeout = timeouts[TCP_CONNTRACK_CLOSE];
208 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
209 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
210 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
211 --
212 2.19.1
213