]> git.ipfire.org Git - people/ms/linux.git/blame - net/ipv4/tcp_ipv4.c
dmaengine maintainer update
[people/ms/linux.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
076bb0c8 78#include <net/busy_poll.h>
1da177e4
LT
79
80#include <linux/inet.h>
81#include <linux/ipv6.h>
82#include <linux/stddef.h>
83#include <linux/proc_fs.h>
84#include <linux/seq_file.h>
85
cfb6eeb4
YH
86#include <linux/crypto.h>
87#include <linux/scatterlist.h>
88
ab32ea5d
BH
89int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 91EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 92
1da177e4 93
cfb6eeb4 94#ifdef CONFIG_TCP_MD5SIG
a915da9b 95static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
97#endif
98
5caea4ea 99struct inet_hashinfo tcp_hashinfo;
4bc2f18b 100EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 101
cf533ea5 102static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 103{
eddc9ec5
ACM
104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 ip_hdr(skb)->saddr,
aa8223c7
ACM
106 tcp_hdr(skb)->dest,
107 tcp_hdr(skb)->source);
1da177e4
LT
108}
109
6d6ee43e
ACM
110int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
111{
112 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
113 struct tcp_sock *tp = tcp_sk(sk);
114
115 /* With PAWS, it is safe from the viewpoint
116 of data integrity. Even without PAWS it is safe provided sequence
117 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
118
119 Actually, the idea is close to VJ's one, only timestamp cache is
120 held not per host, but per port pair and TW bucket is used as state
121 holder.
122
123 If TW bucket has been already destroyed we fall back to VJ's scheme
124 and use initial timestamp retrieved from peer table.
125 */
126 if (tcptw->tw_ts_recent_stamp &&
127 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 128 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
129 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
130 if (tp->write_seq == 0)
131 tp->write_seq = 1;
132 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
133 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
134 sock_hold(sktw);
135 return 1;
136 }
137
138 return 0;
139}
6d6ee43e
ACM
140EXPORT_SYMBOL_GPL(tcp_twsk_unique);
141
1da177e4
LT
142/* This will initiate an outgoing connection. */
143int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
144{
2d7192d6 145 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
146 struct inet_sock *inet = inet_sk(sk);
147 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 148 __be16 orig_sport, orig_dport;
bada8adc 149 __be32 daddr, nexthop;
da905bd1 150 struct flowi4 *fl4;
2d7192d6 151 struct rtable *rt;
1da177e4 152 int err;
f6d8bd05 153 struct ip_options_rcu *inet_opt;
1da177e4
LT
154
155 if (addr_len < sizeof(struct sockaddr_in))
156 return -EINVAL;
157
158 if (usin->sin_family != AF_INET)
159 return -EAFNOSUPPORT;
160
161 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
162 inet_opt = rcu_dereference_protected(inet->inet_opt,
163 sock_owned_by_user(sk));
164 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
165 if (!daddr)
166 return -EINVAL;
f6d8bd05 167 nexthop = inet_opt->opt.faddr;
1da177e4
LT
168 }
169
dca8b089
DM
170 orig_sport = inet->inet_sport;
171 orig_dport = usin->sin_port;
da905bd1
DM
172 fl4 = &inet->cork.fl.u.ip4;
173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP,
0e0d44ab 176 orig_sport, orig_dport, sk);
b23dd4fe
DM
177 if (IS_ERR(rt)) {
178 err = PTR_ERR(rt);
179 if (err == -ENETUNREACH)
f1d8cba6 180 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 181 return err;
584bdf8c 182 }
1da177e4
LT
183
184 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185 ip_rt_put(rt);
186 return -ENETUNREACH;
187 }
188
f6d8bd05 189 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 190 daddr = fl4->daddr;
1da177e4 191
c720c7e8 192 if (!inet->inet_saddr)
da905bd1 193 inet->inet_saddr = fl4->saddr;
c720c7e8 194 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 195
c720c7e8 196 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
197 /* Reset inherited state */
198 tp->rx_opt.ts_recent = 0;
199 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
200 if (likely(!tp->repair))
201 tp->write_seq = 0;
1da177e4
LT
202 }
203
295ff7ed 204 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
205 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
206 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 207
c720c7e8
ED
208 inet->inet_dport = usin->sin_port;
209 inet->inet_daddr = daddr;
1da177e4 210
d83d8461 211 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
212 if (inet_opt)
213 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 214
bee7ca9e 215 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
216
217 /* Socket identity is still unknown (sport may be zero).
218 * However we set state to SYN-SENT and not releasing socket
219 * lock select source port, enter ourselves into the hash tables and
220 * complete initialization after this.
221 */
222 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 223 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
224 if (err)
225 goto failure;
226
da905bd1 227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
228 inet->inet_sport, inet->inet_dport, sk);
229 if (IS_ERR(rt)) {
230 err = PTR_ERR(rt);
231 rt = NULL;
1da177e4 232 goto failure;
b23dd4fe 233 }
1da177e4 234 /* OK, now commit destination to socket. */
bcd76111 235 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 236 sk_setup_caps(sk, &rt->dst);
1da177e4 237
ee995283 238 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
239 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240 inet->inet_daddr,
241 inet->inet_sport,
1da177e4
LT
242 usin->sin_port);
243
c720c7e8 244 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 245
2b916477 246 err = tcp_connect(sk);
ee995283 247
1da177e4
LT
248 rt = NULL;
249 if (err)
250 goto failure;
251
252 return 0;
253
254failure:
7174259e
ACM
255 /*
256 * This unhashes the socket and releases the local port,
257 * if necessary.
258 */
1da177e4
LT
259 tcp_set_state(sk, TCP_CLOSE);
260 ip_rt_put(rt);
261 sk->sk_route_caps = 0;
c720c7e8 262 inet->inet_dport = 0;
1da177e4
LT
263 return err;
264}
4bc2f18b 265EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 266
1da177e4 267/*
563d34d0
ED
268 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269 * It can be called through tcp_release_cb() if socket was owned by user
270 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 271 */
563d34d0 272static void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
273{
274 struct dst_entry *dst;
275 struct inet_sock *inet = inet_sk(sk);
563d34d0 276 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4 277
80d0a69f
DM
278 dst = inet_csk_update_pmtu(sk, mtu);
279 if (!dst)
1da177e4
LT
280 return;
281
1da177e4
LT
282 /* Something is about to be wrong... Remember soft error
283 * for the case, if this connection will not able to recover.
284 */
285 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
286 sk->sk_err_soft = EMSGSIZE;
287
288 mtu = dst_mtu(dst);
289
290 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
482fc609 291 ip_sk_accept_pmtu(sk) &&
d83d8461 292 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
293 tcp_sync_mss(sk, mtu);
294
295 /* Resend the TCP packet because it's
296 * clear that the old packet has been
297 * dropped. This is the new "fast" path mtu
298 * discovery.
299 */
300 tcp_simple_retransmit(sk);
301 } /* else let the usual retransmit timer handle it */
302}
303
55be7a9c
DM
304static void do_redirect(struct sk_buff *skb, struct sock *sk)
305{
306 struct dst_entry *dst = __sk_dst_check(sk, 0);
307
1ed5c48f 308 if (dst)
6700c270 309 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
310}
311
1da177e4
LT
312/*
313 * This routine is called by the ICMP module when it gets some
314 * sort of error condition. If err < 0 then the socket should
315 * be closed and the error returned to the user. If err > 0
316 * it's just the icmp type << 8 | icmp code. After adjustment
317 * header points to the first 8 bytes of the tcp header. We need
318 * to find the appropriate port.
319 *
320 * The locking strategy used here is very "optimistic". When
321 * someone else accesses the socket the ICMP is just dropped
322 * and for some paths there is no check at all.
323 * A more general error queue to queue errors for later handling
324 * is probably better.
325 *
326 */
327
4d1a2d9e 328void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 329{
b71d1d42 330 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 331 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 332 struct inet_connection_sock *icsk;
1da177e4
LT
333 struct tcp_sock *tp;
334 struct inet_sock *inet;
4d1a2d9e
DL
335 const int type = icmp_hdr(icmp_skb)->type;
336 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 337 struct sock *sk;
f1ecd5d9 338 struct sk_buff *skb;
168a8f58 339 struct request_sock *req;
1da177e4 340 __u32 seq;
f1ecd5d9 341 __u32 remaining;
1da177e4 342 int err;
4d1a2d9e 343 struct net *net = dev_net(icmp_skb->dev);
1da177e4 344
4d1a2d9e 345 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 346 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
347 return;
348 }
349
fd54d716 350 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 351 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 352 if (!sk) {
dcfc23ca 353 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
354 return;
355 }
356 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 357 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
358 return;
359 }
360
361 bh_lock_sock(sk);
362 /* If too many ICMPs get dropped on busy
363 * servers this needs to be solved differently.
563d34d0
ED
364 * We do take care of PMTU discovery (RFC1191) special case :
365 * we can receive locally generated ICMP messages while socket is held.
1da177e4 366 */
b74aa930
ED
367 if (sock_owned_by_user(sk)) {
368 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
369 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
370 }
1da177e4
LT
371 if (sk->sk_state == TCP_CLOSE)
372 goto out;
373
97e3ecd1 374 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
375 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 goto out;
377 }
378
f1ecd5d9 379 icsk = inet_csk(sk);
1da177e4 380 tp = tcp_sk(sk);
168a8f58 381 req = tp->fastopen_rsk;
1da177e4
LT
382 seq = ntohl(th->seq);
383 if (sk->sk_state != TCP_LISTEN &&
168a8f58
JC
384 !between(seq, tp->snd_una, tp->snd_nxt) &&
385 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
386 /* For a Fast Open socket, allow seq to be snt_isn. */
de0744af 387 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
388 goto out;
389 }
390
391 switch (type) {
55be7a9c
DM
392 case ICMP_REDIRECT:
393 do_redirect(icmp_skb, sk);
394 goto out;
1da177e4
LT
395 case ICMP_SOURCE_QUENCH:
396 /* Just silently ignore these. */
397 goto out;
398 case ICMP_PARAMETERPROB:
399 err = EPROTO;
400 break;
401 case ICMP_DEST_UNREACH:
402 if (code > NR_ICMP_UNREACH)
403 goto out;
404
405 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
406 /* We are not interested in TCP_LISTEN and open_requests
407 * (SYN-ACKs send out by Linux are always <576bytes so
408 * they should go through unfragmented).
409 */
410 if (sk->sk_state == TCP_LISTEN)
411 goto out;
412
563d34d0 413 tp->mtu_info = info;
144d56e9 414 if (!sock_owned_by_user(sk)) {
563d34d0 415 tcp_v4_mtu_reduced(sk);
144d56e9
ED
416 } else {
417 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
418 sock_hold(sk);
419 }
1da177e4
LT
420 goto out;
421 }
422
423 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
424 /* check if icmp_skb allows revert of backoff
425 * (see draft-zimmermann-tcp-lcd) */
426 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
427 break;
428 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
429 !icsk->icsk_backoff)
430 break;
431
168a8f58
JC
432 /* XXX (TFO) - revisit the following logic for TFO */
433
8f49c270
DM
434 if (sock_owned_by_user(sk))
435 break;
436
f1ecd5d9 437 icsk->icsk_backoff--;
9ad7c049
JC
438 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
439 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
440 tcp_bound_rto(sk);
441
442 skb = tcp_write_queue_head(sk);
443 BUG_ON(!skb);
444
445 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
446 tcp_time_stamp - TCP_SKB_CB(skb)->when);
447
448 if (remaining) {
449 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
450 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
451 } else {
452 /* RTO revert clocked out retransmission.
453 * Will retransmit now */
454 tcp_retransmit_timer(sk);
455 }
456
1da177e4
LT
457 break;
458 case ICMP_TIME_EXCEEDED:
459 err = EHOSTUNREACH;
460 break;
461 default:
462 goto out;
463 }
464
168a8f58
JC
465 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
466 * than following the TCP_SYN_RECV case and closing the socket,
467 * we ignore the ICMP error and keep trying like a fully established
468 * socket. Is this the right thing to do?
469 */
470 if (req && req->sk == NULL)
471 goto out;
472
1da177e4 473 switch (sk->sk_state) {
60236fdd 474 struct request_sock *req, **prev;
1da177e4
LT
475 case TCP_LISTEN:
476 if (sock_owned_by_user(sk))
477 goto out;
478
463c84b9
ACM
479 req = inet_csk_search_req(sk, &prev, th->dest,
480 iph->daddr, iph->saddr);
1da177e4
LT
481 if (!req)
482 goto out;
483
484 /* ICMPs are not backlogged, hence we cannot get
485 an established socket here.
486 */
547b792c 487 WARN_ON(req->sk);
1da177e4 488
2e6599cb 489 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 490 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
491 goto out;
492 }
493
494 /*
495 * Still in SYN_RECV, just remove it silently.
496 * There is no good way to pass the error to the newly
497 * created socket, and POSIX does not want network
498 * errors returned from accept().
499 */
463c84b9 500 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 501 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
502 goto out;
503
504 case TCP_SYN_SENT:
505 case TCP_SYN_RECV: /* Cannot happen.
168a8f58
JC
506 It can f.e. if SYNs crossed,
507 or Fast Open.
1da177e4
LT
508 */
509 if (!sock_owned_by_user(sk)) {
1da177e4
LT
510 sk->sk_err = err;
511
512 sk->sk_error_report(sk);
513
514 tcp_done(sk);
515 } else {
516 sk->sk_err_soft = err;
517 }
518 goto out;
519 }
520
521 /* If we've already connected we will keep trying
522 * until we time out, or the user gives up.
523 *
524 * rfc1122 4.2.3.9 allows to consider as hard errors
525 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
526 * but it is obsoleted by pmtu discovery).
527 *
528 * Note, that in modern internet, where routing is unreliable
529 * and in each dark corner broken firewalls sit, sending random
530 * errors ordered by their masters even this two messages finally lose
531 * their original sense (even Linux sends invalid PORT_UNREACHs)
532 *
533 * Now we are in compliance with RFCs.
534 * --ANK (980905)
535 */
536
537 inet = inet_sk(sk);
538 if (!sock_owned_by_user(sk) && inet->recverr) {
539 sk->sk_err = err;
540 sk->sk_error_report(sk);
541 } else { /* Only an error on timeout */
542 sk->sk_err_soft = err;
543 }
544
545out:
546 bh_unlock_sock(sk);
547 sock_put(sk);
548}
549
28850dc7 550void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 551{
aa8223c7 552 struct tcphdr *th = tcp_hdr(skb);
1da177e4 553
84fa7933 554 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 555 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 556 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 557 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 558 } else {
419f9f89 559 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 560 csum_partial(th,
1da177e4
LT
561 th->doff << 2,
562 skb->csum));
563 }
564}
565
419f9f89 566/* This routine computes an IPv4 TCP checksum. */
bb296246 567void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 568{
cf533ea5 569 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
570
571 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
572}
4bc2f18b 573EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 574
1da177e4
LT
575/*
576 * This routine will send an RST to the other tcp.
577 *
578 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
579 * for reset.
580 * Answer: if a packet caused RST, it is not for a socket
581 * existing in our system, if it is matched to a socket,
582 * it is just duplicate segment or bug in other side's TCP.
583 * So that we build reply only basing on parameters
584 * arrived with segment.
585 * Exception: precedence violation. We do not implement it in any case.
586 */
587
cfb6eeb4 588static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 589{
cf533ea5 590 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
591 struct {
592 struct tcphdr th;
593#ifdef CONFIG_TCP_MD5SIG
714e85be 594 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
595#endif
596 } rep;
1da177e4 597 struct ip_reply_arg arg;
cfb6eeb4
YH
598#ifdef CONFIG_TCP_MD5SIG
599 struct tcp_md5sig_key *key;
658ddaaf
SL
600 const __u8 *hash_location = NULL;
601 unsigned char newhash[16];
602 int genhash;
603 struct sock *sk1 = NULL;
cfb6eeb4 604#endif
a86b1e30 605 struct net *net;
1da177e4
LT
606
607 /* Never send a reset in response to a reset. */
608 if (th->rst)
609 return;
610
511c3f92 611 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
612 return;
613
614 /* Swap the send and the receive. */
cfb6eeb4
YH
615 memset(&rep, 0, sizeof(rep));
616 rep.th.dest = th->source;
617 rep.th.source = th->dest;
618 rep.th.doff = sizeof(struct tcphdr) / 4;
619 rep.th.rst = 1;
1da177e4
LT
620
621 if (th->ack) {
cfb6eeb4 622 rep.th.seq = th->ack_seq;
1da177e4 623 } else {
cfb6eeb4
YH
624 rep.th.ack = 1;
625 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
626 skb->len - (th->doff << 2));
1da177e4
LT
627 }
628
7174259e 629 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
630 arg.iov[0].iov_base = (unsigned char *)&rep;
631 arg.iov[0].iov_len = sizeof(rep.th);
632
633#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
634 hash_location = tcp_parse_md5sig_option(th);
635 if (!sk && hash_location) {
636 /*
637 * active side is lost. Try to find listening socket through
638 * source port, and then find md5 key through listening socket.
639 * we are not loose security here:
640 * Incoming packet is checked with md5 hash with finding key,
641 * no RST generated if md5 hash doesn't match.
642 */
643 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
da5e3630
TH
644 &tcp_hashinfo, ip_hdr(skb)->saddr,
645 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
646 ntohs(th->source), inet_iif(skb));
647 /* don't send rst if it can't find key */
648 if (!sk1)
649 return;
650 rcu_read_lock();
651 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
652 &ip_hdr(skb)->saddr, AF_INET);
653 if (!key)
654 goto release_sk1;
655
656 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
657 if (genhash || memcmp(hash_location, newhash, 16) != 0)
658 goto release_sk1;
659 } else {
660 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
661 &ip_hdr(skb)->saddr,
662 AF_INET) : NULL;
663 }
664
cfb6eeb4
YH
665 if (key) {
666 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
667 (TCPOPT_NOP << 16) |
668 (TCPOPT_MD5SIG << 8) |
669 TCPOLEN_MD5SIG);
670 /* Update length and the length the header thinks exists */
671 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
672 rep.th.doff = arg.iov[0].iov_len / 4;
673
49a72dfb 674 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
675 key, ip_hdr(skb)->saddr,
676 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
677 }
678#endif
eddc9ec5
ACM
679 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
680 ip_hdr(skb)->saddr, /* XXX */
52cd5750 681 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 682 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 683 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 684 /* When socket is gone, all binding information is lost.
4c675258
AK
685 * routing might fail in this case. No choice here, if we choose to force
686 * input interface, we will misroute in case of asymmetric route.
e2446eaa 687 */
4c675258
AK
688 if (sk)
689 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 690
adf30907 691 net = dev_net(skb_dst(skb)->dev);
66b13d99 692 arg.tos = ip_hdr(skb)->tos;
be9f4a44 693 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 694 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 695
63231bdd
PE
696 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
697 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
698
699#ifdef CONFIG_TCP_MD5SIG
700release_sk1:
701 if (sk1) {
702 rcu_read_unlock();
703 sock_put(sk1);
704 }
705#endif
1da177e4
LT
706}
707
708/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
709 outside socket context is ugly, certainly. What can I do?
710 */
711
9501f972 712static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 713 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 714 struct tcp_md5sig_key *key,
66b13d99 715 int reply_flags, u8 tos)
1da177e4 716{
cf533ea5 717 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
718 struct {
719 struct tcphdr th;
714e85be 720 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 721#ifdef CONFIG_TCP_MD5SIG
714e85be 722 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
723#endif
724 ];
1da177e4
LT
725 } rep;
726 struct ip_reply_arg arg;
adf30907 727 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
728
729 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 730 memset(&arg, 0, sizeof(arg));
1da177e4
LT
731
732 arg.iov[0].iov_base = (unsigned char *)&rep;
733 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 734 if (tsecr) {
cfb6eeb4
YH
735 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
736 (TCPOPT_TIMESTAMP << 8) |
737 TCPOLEN_TIMESTAMP);
ee684b6f
AV
738 rep.opt[1] = htonl(tsval);
739 rep.opt[2] = htonl(tsecr);
cb48cfe8 740 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
741 }
742
743 /* Swap the send and the receive. */
744 rep.th.dest = th->source;
745 rep.th.source = th->dest;
746 rep.th.doff = arg.iov[0].iov_len / 4;
747 rep.th.seq = htonl(seq);
748 rep.th.ack_seq = htonl(ack);
749 rep.th.ack = 1;
750 rep.th.window = htons(win);
751
cfb6eeb4 752#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 753 if (key) {
ee684b6f 754 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
755
756 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
757 (TCPOPT_NOP << 16) |
758 (TCPOPT_MD5SIG << 8) |
759 TCPOLEN_MD5SIG);
760 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
761 rep.th.doff = arg.iov[0].iov_len/4;
762
49a72dfb 763 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
764 key, ip_hdr(skb)->saddr,
765 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
766 }
767#endif
88ef4a5a 768 arg.flags = reply_flags;
eddc9ec5
ACM
769 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
770 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
771 arg.iov[0].iov_len, IPPROTO_TCP, 0);
772 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
773 if (oif)
774 arg.bound_dev_if = oif;
66b13d99 775 arg.tos = tos;
be9f4a44 776 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 777 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 778
63231bdd 779 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
780}
781
782static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
783{
8feaf0c0 784 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 785 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 786
9501f972 787 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 788 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 789 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
790 tcptw->tw_ts_recent,
791 tw->tw_bound_dev_if,
88ef4a5a 792 tcp_twsk_md5_key(tcptw),
66b13d99
ED
793 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
794 tw->tw_tos
9501f972 795 );
1da177e4 796
8feaf0c0 797 inet_twsk_put(tw);
1da177e4
LT
798}
799
6edafaaf 800static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 801 struct request_sock *req)
1da177e4 802{
168a8f58
JC
803 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
804 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
805 */
806 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
807 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
808 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
ee684b6f 809 tcp_time_stamp,
9501f972
YH
810 req->ts_recent,
811 0,
a915da9b
ED
812 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
813 AF_INET),
66b13d99
ED
814 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
815 ip_hdr(skb)->tos);
1da177e4
LT
816}
817
1da177e4 818/*
9bf1d83e 819 * Send a SYN-ACK after having received a SYN.
60236fdd 820 * This still operates on a request_sock only, not on a big
1da177e4
LT
821 * socket.
822 */
72659ecc
OP
823static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
824 struct request_sock *req,
397b4174 825 u16 queue_mapping)
1da177e4 826{
2e6599cb 827 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 828 struct flowi4 fl4;
1da177e4 829 int err = -1;
d41db5af 830 struct sk_buff *skb;
1da177e4
LT
831
832 /* First, grab a route. */
ba3f7f04 833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 834 return -1;
1da177e4 835
1a2c6181 836 skb = tcp_make_synack(sk, dst, req, NULL);
1da177e4
LT
837
838 if (skb) {
634fb979 839 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
1da177e4 840
fff32699 841 skb_set_queue_mapping(skb, queue_mapping);
634fb979
ED
842 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
843 ireq->ir_rmt_addr,
2e6599cb 844 ireq->opt);
b9df3cb8 845 err = net_xmit_eval(err);
016818d0
NC
846 if (!tcp_rsk(req)->snt_synack && !err)
847 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
848 }
849
1da177e4
LT
850 return err;
851}
852
1a2c6181 853static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
fd80eb94 854{
397b4174 855 int res = tcp_v4_send_synack(sk, NULL, req, 0);
e6c022a4
ED
856
857 if (!res)
858 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
859 return res;
fd80eb94
DL
860}
861
1da177e4 862/*
60236fdd 863 * IPv4 request_sock destructor.
1da177e4 864 */
60236fdd 865static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 866{
a51482bd 867 kfree(inet_rsk(req)->opt);
1da177e4
LT
868}
869
946cedcc 870/*
a2a385d6 871 * Return true if a syncookie should be sent
946cedcc 872 */
a2a385d6 873bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
874 const struct sk_buff *skb,
875 const char *proto)
1da177e4 876{
946cedcc 877 const char *msg = "Dropping request";
a2a385d6 878 bool want_cookie = false;
946cedcc
ED
879 struct listen_sock *lopt;
880
881
1da177e4 882
2a1d4bd4 883#ifdef CONFIG_SYN_COOKIES
946cedcc 884 if (sysctl_tcp_syncookies) {
2a1d4bd4 885 msg = "Sending cookies";
a2a385d6 886 want_cookie = true;
946cedcc
ED
887 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
888 } else
80e40daa 889#endif
946cedcc
ED
890 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
891
892 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
5ad37d5d 893 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
946cedcc 894 lopt->synflood_warned = 1;
afd46503 895 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
896 proto, ntohs(tcp_hdr(skb)->dest), msg);
897 }
898 return want_cookie;
2a1d4bd4 899}
946cedcc 900EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
901
902/*
60236fdd 903 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 904 */
5dff747b 905static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 906{
f6d8bd05
ED
907 const struct ip_options *opt = &(IPCB(skb)->opt);
908 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
909
910 if (opt && opt->optlen) {
f6d8bd05
ED
911 int opt_size = sizeof(*dopt) + opt->optlen;
912
1da177e4
LT
913 dopt = kmalloc(opt_size, GFP_ATOMIC);
914 if (dopt) {
f6d8bd05 915 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
916 kfree(dopt);
917 dopt = NULL;
918 }
919 }
920 }
921 return dopt;
922}
923
cfb6eeb4
YH
924#ifdef CONFIG_TCP_MD5SIG
925/*
926 * RFC2385 MD5 checksumming requires a mapping of
927 * IP address->MD5 Key.
928 * We need to maintain these in the sk structure.
929 */
930
931/* Find the Key structure for an address. */
a915da9b
ED
932struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
933 const union tcp_md5_addr *addr,
934 int family)
cfb6eeb4
YH
935{
936 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 937 struct tcp_md5sig_key *key;
a915da9b 938 unsigned int size = sizeof(struct in_addr);
a8afca03 939 struct tcp_md5sig_info *md5sig;
cfb6eeb4 940
a8afca03
ED
941 /* caller either holds rcu_read_lock() or socket lock */
942 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
943 sock_owned_by_user(sk) ||
944 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 945 if (!md5sig)
cfb6eeb4 946 return NULL;
a915da9b
ED
947#if IS_ENABLED(CONFIG_IPV6)
948 if (family == AF_INET6)
949 size = sizeof(struct in6_addr);
950#endif
b67bfe0d 951 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
952 if (key->family != family)
953 continue;
954 if (!memcmp(&key->addr, addr, size))
955 return key;
cfb6eeb4
YH
956 }
957 return NULL;
958}
a915da9b 959EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
960
961struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
962 struct sock *addr_sk)
963{
a915da9b
ED
964 union tcp_md5_addr *addr;
965
966 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
967 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 968}
cfb6eeb4
YH
969EXPORT_SYMBOL(tcp_v4_md5_lookup);
970
f5b99bcd
AB
971static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
972 struct request_sock *req)
cfb6eeb4 973{
a915da9b
ED
974 union tcp_md5_addr *addr;
975
634fb979 976 addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
a915da9b 977 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
978}
979
980/* This can be called on a newly created socket, from other files */
a915da9b
ED
981int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
982 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
983{
984 /* Add Key to the list */
b0a713e9 985 struct tcp_md5sig_key *key;
cfb6eeb4 986 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 987 struct tcp_md5sig_info *md5sig;
cfb6eeb4 988
c0353c7b 989 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
990 if (key) {
991 /* Pre-existing entry - just update that one. */
a915da9b 992 memcpy(key->key, newkey, newkeylen);
b0a713e9 993 key->keylen = newkeylen;
a915da9b
ED
994 return 0;
995 }
260fcbeb 996
a8afca03
ED
997 md5sig = rcu_dereference_protected(tp->md5sig_info,
998 sock_owned_by_user(sk));
a915da9b
ED
999 if (!md5sig) {
1000 md5sig = kmalloc(sizeof(*md5sig), gfp);
1001 if (!md5sig)
cfb6eeb4 1002 return -ENOMEM;
cfb6eeb4 1003
a915da9b
ED
1004 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1005 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1006 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1007 }
cfb6eeb4 1008
5f3d9cb2 1009 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1010 if (!key)
1011 return -ENOMEM;
71cea17e 1012 if (!tcp_alloc_md5sig_pool()) {
5f3d9cb2 1013 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1014 return -ENOMEM;
cfb6eeb4 1015 }
a915da9b
ED
1016
1017 memcpy(key->key, newkey, newkeylen);
1018 key->keylen = newkeylen;
1019 key->family = family;
1020 memcpy(&key->addr, addr,
1021 (family == AF_INET6) ? sizeof(struct in6_addr) :
1022 sizeof(struct in_addr));
1023 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1024 return 0;
1025}
a915da9b 1026EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1027
a915da9b 1028int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4 1029{
a915da9b
ED
1030 struct tcp_md5sig_key *key;
1031
c0353c7b 1032 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1033 if (!key)
1034 return -ENOENT;
1035 hlist_del_rcu(&key->node);
5f3d9cb2 1036 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1037 kfree_rcu(key, rcu);
a915da9b 1038 return 0;
cfb6eeb4 1039}
a915da9b 1040EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1041
e0683e70 1042static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1043{
1044 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1045 struct tcp_md5sig_key *key;
b67bfe0d 1046 struct hlist_node *n;
a8afca03 1047 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1048
a8afca03
ED
1049 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1050
b67bfe0d 1051 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1052 hlist_del_rcu(&key->node);
5f3d9cb2 1053 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1054 kfree_rcu(key, rcu);
cfb6eeb4
YH
1055 }
1056}
1057
7174259e
ACM
1058static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1059 int optlen)
cfb6eeb4
YH
1060{
1061 struct tcp_md5sig cmd;
1062 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1063
1064 if (optlen < sizeof(cmd))
1065 return -EINVAL;
1066
7174259e 1067 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1068 return -EFAULT;
1069
1070 if (sin->sin_family != AF_INET)
1071 return -EINVAL;
1072
a8afca03 1073 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1074 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1075 AF_INET);
cfb6eeb4
YH
1076
1077 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1078 return -EINVAL;
1079
a915da9b
ED
1080 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1081 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1082 GFP_KERNEL);
cfb6eeb4
YH
1083}
1084
49a72dfb
AL
1085static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1086 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1087{
cfb6eeb4 1088 struct tcp4_pseudohdr *bp;
49a72dfb 1089 struct scatterlist sg;
cfb6eeb4
YH
1090
1091 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1092
1093 /*
49a72dfb 1094 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1095 * destination IP address, zero-padded protocol number, and
1096 * segment length)
1097 */
1098 bp->saddr = saddr;
1099 bp->daddr = daddr;
1100 bp->pad = 0;
076fb722 1101 bp->protocol = IPPROTO_TCP;
49a72dfb 1102 bp->len = cpu_to_be16(nbytes);
c7da57a1 1103
49a72dfb
AL
1104 sg_init_one(&sg, bp, sizeof(*bp));
1105 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1106}
1107
a915da9b 1108static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1109 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1110{
1111 struct tcp_md5sig_pool *hp;
1112 struct hash_desc *desc;
1113
1114 hp = tcp_get_md5sig_pool();
1115 if (!hp)
1116 goto clear_hash_noput;
1117 desc = &hp->md5_desc;
1118
1119 if (crypto_hash_init(desc))
1120 goto clear_hash;
1121 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1122 goto clear_hash;
1123 if (tcp_md5_hash_header(hp, th))
1124 goto clear_hash;
1125 if (tcp_md5_hash_key(hp, key))
1126 goto clear_hash;
1127 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1128 goto clear_hash;
1129
cfb6eeb4 1130 tcp_put_md5sig_pool();
cfb6eeb4 1131 return 0;
49a72dfb 1132
cfb6eeb4
YH
1133clear_hash:
1134 tcp_put_md5sig_pool();
1135clear_hash_noput:
1136 memset(md5_hash, 0, 16);
49a72dfb 1137 return 1;
cfb6eeb4
YH
1138}
1139
49a72dfb 1140int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1141 const struct sock *sk, const struct request_sock *req,
1142 const struct sk_buff *skb)
cfb6eeb4 1143{
49a72dfb
AL
1144 struct tcp_md5sig_pool *hp;
1145 struct hash_desc *desc;
318cf7aa 1146 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1147 __be32 saddr, daddr;
1148
1149 if (sk) {
c720c7e8
ED
1150 saddr = inet_sk(sk)->inet_saddr;
1151 daddr = inet_sk(sk)->inet_daddr;
49a72dfb 1152 } else if (req) {
634fb979
ED
1153 saddr = inet_rsk(req)->ir_loc_addr;
1154 daddr = inet_rsk(req)->ir_rmt_addr;
cfb6eeb4 1155 } else {
49a72dfb
AL
1156 const struct iphdr *iph = ip_hdr(skb);
1157 saddr = iph->saddr;
1158 daddr = iph->daddr;
cfb6eeb4 1159 }
49a72dfb
AL
1160
1161 hp = tcp_get_md5sig_pool();
1162 if (!hp)
1163 goto clear_hash_noput;
1164 desc = &hp->md5_desc;
1165
1166 if (crypto_hash_init(desc))
1167 goto clear_hash;
1168
1169 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1170 goto clear_hash;
1171 if (tcp_md5_hash_header(hp, th))
1172 goto clear_hash;
1173 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1174 goto clear_hash;
1175 if (tcp_md5_hash_key(hp, key))
1176 goto clear_hash;
1177 if (crypto_hash_final(desc, md5_hash))
1178 goto clear_hash;
1179
1180 tcp_put_md5sig_pool();
1181 return 0;
1182
1183clear_hash:
1184 tcp_put_md5sig_pool();
1185clear_hash_noput:
1186 memset(md5_hash, 0, 16);
1187 return 1;
cfb6eeb4 1188}
49a72dfb 1189EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1190
a2a385d6 1191static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1192{
1193 /*
1194 * This gets called for each TCP segment that arrives
1195 * so we want to be efficient.
1196 * We have 3 drop cases:
1197 * o No MD5 hash and one expected.
1198 * o MD5 hash and we're not expecting one.
1199 * o MD5 hash and its wrong.
1200 */
cf533ea5 1201 const __u8 *hash_location = NULL;
cfb6eeb4 1202 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1203 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1204 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1205 int genhash;
cfb6eeb4
YH
1206 unsigned char newhash[16];
1207
a915da9b
ED
1208 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1209 AF_INET);
7d5d5525 1210 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1211
cfb6eeb4
YH
1212 /* We've parsed the options - do we have a hash? */
1213 if (!hash_expected && !hash_location)
a2a385d6 1214 return false;
cfb6eeb4
YH
1215
1216 if (hash_expected && !hash_location) {
785957d3 1217 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1218 return true;
cfb6eeb4
YH
1219 }
1220
1221 if (!hash_expected && hash_location) {
785957d3 1222 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1223 return true;
cfb6eeb4
YH
1224 }
1225
1226 /* Okay, so this is hash_expected and hash_location -
1227 * so we need to calculate the checksum.
1228 */
49a72dfb
AL
1229 genhash = tcp_v4_md5_hash_skb(newhash,
1230 hash_expected,
1231 NULL, NULL, skb);
cfb6eeb4
YH
1232
1233 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1234 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1235 &iph->saddr, ntohs(th->source),
1236 &iph->daddr, ntohs(th->dest),
1237 genhash ? " tcp_v4_calc_md5_hash failed"
1238 : "");
a2a385d6 1239 return true;
cfb6eeb4 1240 }
a2a385d6 1241 return false;
cfb6eeb4
YH
1242}
1243
1244#endif
1245
72a3effa 1246struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1247 .family = PF_INET,
2e6599cb 1248 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1249 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1250 .send_ack = tcp_v4_reqsk_send_ack,
1251 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1252 .send_reset = tcp_v4_send_reset,
72659ecc 1253 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1254};
1255
cfb6eeb4 1256#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1257static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1258 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1259 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1260};
b6332e6c 1261#endif
cfb6eeb4 1262
168a8f58
JC
1263static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1264 struct request_sock *req,
1265 struct tcp_fastopen_cookie *foc,
1266 struct tcp_fastopen_cookie *valid_foc)
1267{
1268 bool skip_cookie = false;
1269 struct fastopen_queue *fastopenq;
1270
1271 if (likely(!fastopen_cookie_present(foc))) {
1272 /* See include/net/tcp.h for the meaning of these knobs */
1273 if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1274 ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1275 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1276 skip_cookie = true; /* no cookie to validate */
1277 else
1278 return false;
1279 }
1280 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
1281 /* A FO option is present; bump the counter. */
1282 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
1283
1284 /* Make sure the listener has enabled fastopen, and we don't
1285 * exceed the max # of pending TFO requests allowed before trying
1286 * to validating the cookie in order to avoid burning CPU cycles
1287 * unnecessarily.
1288 *
1289 * XXX (TFO) - The implication of checking the max_qlen before
1290 * processing a cookie request is that clients can't differentiate
1291 * between qlen overflow causing Fast Open to be disabled
1292 * temporarily vs a server not supporting Fast Open at all.
1293 */
1294 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1295 fastopenq == NULL || fastopenq->max_qlen == 0)
1296 return false;
1297
1298 if (fastopenq->qlen >= fastopenq->max_qlen) {
1299 struct request_sock *req1;
1300 spin_lock(&fastopenq->lock);
1301 req1 = fastopenq->rskq_rst_head;
1302 if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1303 spin_unlock(&fastopenq->lock);
1304 NET_INC_STATS_BH(sock_net(sk),
1305 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1306 /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1307 foc->len = -1;
1308 return false;
1309 }
1310 fastopenq->rskq_rst_head = req1->dl_next;
1311 fastopenq->qlen--;
1312 spin_unlock(&fastopenq->lock);
1313 reqsk_free(req1);
1314 }
1315 if (skip_cookie) {
1316 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1317 return true;
1318 }
149479d0 1319
168a8f58
JC
1320 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1321 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
149479d0
YC
1322 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1323 ip_hdr(skb)->daddr, valid_foc);
168a8f58
JC
1324 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1325 memcmp(&foc->val[0], &valid_foc->val[0],
1326 TCP_FASTOPEN_COOKIE_SIZE) != 0)
1327 return false;
1328 valid_foc->len = -1;
1329 }
1330 /* Acknowledge the data received from the peer. */
1331 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1332 return true;
1333 } else if (foc->len == 0) { /* Client requesting a cookie */
149479d0
YC
1334 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1335 ip_hdr(skb)->daddr, valid_foc);
168a8f58
JC
1336 NET_INC_STATS_BH(sock_net(sk),
1337 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1338 } else {
1339 /* Client sent a cookie with wrong size. Treat it
1340 * the same as invalid and return a valid one.
1341 */
149479d0
YC
1342 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1343 ip_hdr(skb)->daddr, valid_foc);
168a8f58
JC
1344 }
1345 return false;
1346}
1347
1348static int tcp_v4_conn_req_fastopen(struct sock *sk,
1349 struct sk_buff *skb,
1350 struct sk_buff *skb_synack,
1a2c6181 1351 struct request_sock *req)
168a8f58
JC
1352{
1353 struct tcp_sock *tp = tcp_sk(sk);
1354 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1355 const struct inet_request_sock *ireq = inet_rsk(req);
1356 struct sock *child;
016818d0 1357 int err;
168a8f58 1358
e6c022a4
ED
1359 req->num_retrans = 0;
1360 req->num_timeout = 0;
168a8f58
JC
1361 req->sk = NULL;
1362
1363 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
1364 if (child == NULL) {
1365 NET_INC_STATS_BH(sock_net(sk),
1366 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1367 kfree_skb(skb_synack);
1368 return -1;
1369 }
634fb979
ED
1370 err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
1371 ireq->ir_rmt_addr, ireq->opt);
016818d0
NC
1372 err = net_xmit_eval(err);
1373 if (!err)
1374 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1375 /* XXX (TFO) - is it ok to ignore error and continue? */
1376
1377 spin_lock(&queue->fastopenq->lock);
1378 queue->fastopenq->qlen++;
1379 spin_unlock(&queue->fastopenq->lock);
1380
1381 /* Initialize the child socket. Have to fix some values to take
1382 * into account the child is a Fast Open socket and is created
1383 * only out of the bits carried in the SYN packet.
1384 */
1385 tp = tcp_sk(child);
1386
1387 tp->fastopen_rsk = req;
1388 /* Do a hold on the listner sk so that if the listener is being
1389 * closed, the child that has been accepted can live on and still
1390 * access listen_lock.
1391 */
1392 sock_hold(sk);
1393 tcp_rsk(req)->listener = sk;
1394
1395 /* RFC1323: The window in SYN & SYN/ACK segments is never
1396 * scaled. So correct it appropriately.
1397 */
1398 tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
1399
1400 /* Activate the retrans timer so that SYNACK can be retransmitted.
1401 * The request socket is not added to the SYN table of the parent
1402 * because it's been added to the accept queue directly.
1403 */
1404 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
1405 TCP_TIMEOUT_INIT, TCP_RTO_MAX);
1406
1407 /* Add the child socket directly into the accept queue */
1408 inet_csk_reqsk_queue_add(sk, req, child);
1409
1410 /* Now finish processing the fastopen child socket. */
1411 inet_csk(child)->icsk_af_ops->rebuild_header(child);
1412 tcp_init_congestion_control(child);
1413 tcp_mtup_init(child);
168a8f58 1414 tcp_init_metrics(child);
6ae70532 1415 tcp_init_buffer_space(child);
168a8f58
JC
1416
1417 /* Queue the data carried in the SYN packet. We need to first
1418 * bump skb's refcnt because the caller will attempt to free it.
1419 *
1420 * XXX (TFO) - we honor a zero-payload TFO request for now.
1421 * (Any reason not to?)
1422 */
1423 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
1424 /* Don't queue the skb if there is no payload in SYN.
1425 * XXX (TFO) - How about SYN+FIN?
1426 */
1427 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1428 } else {
1429 skb = skb_get(skb);
1430 skb_dst_drop(skb);
1431 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
1432 skb_set_owner_r(skb, child);
1433 __skb_queue_tail(&child->sk_receive_queue, skb);
1434 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
6f73601e 1435 tp->syn_data_acked = 1;
168a8f58
JC
1436 }
1437 sk->sk_data_ready(sk, 0);
1438 bh_unlock_sock(child);
1439 sock_put(child);
1440 WARN_ON(req->sk == NULL);
1441 return 0;
1442}
1443
1da177e4
LT
1444int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1445{
1446 struct tcp_options_received tmp_opt;
60236fdd 1447 struct request_sock *req;
e6b4d113 1448 struct inet_request_sock *ireq;
4957faad 1449 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1450 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1451 __be32 saddr = ip_hdr(skb)->saddr;
1452 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1453 __u32 isn = TCP_SKB_CB(skb)->when;
a2a385d6 1454 bool want_cookie = false;
168a8f58
JC
1455 struct flowi4 fl4;
1456 struct tcp_fastopen_cookie foc = { .len = -1 };
1457 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1458 struct sk_buff *skb_synack;
1459 int do_fastopen;
1da177e4
LT
1460
1461 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1462 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1463 goto drop;
1464
1465 /* TW buckets are converted to open requests without
1466 * limitations, they conserve resources and peer is
1467 * evidently real one.
1468 */
5ad37d5d
HFS
1469 if ((sysctl_tcp_syncookies == 2 ||
1470 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
946cedcc
ED
1471 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1472 if (!want_cookie)
1473 goto drop;
1da177e4
LT
1474 }
1475
1476 /* Accept backlog is full. If we have already queued enough
1477 * of warm entries in syn queue, drop request. It is better than
1478 * clogging syn queue with openreqs with exponentially increasing
1479 * timeout.
1480 */
2aeef18d
NS
1481 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1482 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1483 goto drop;
2aeef18d 1484 }
1da177e4 1485
ce4a7d0d 1486 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1487 if (!req)
1488 goto drop;
1489
cfb6eeb4
YH
1490#ifdef CONFIG_TCP_MD5SIG
1491 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1492#endif
1493
1da177e4 1494 tcp_clear_options(&tmp_opt);
bee7ca9e 1495 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1496 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1497 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1498
4dfc2817 1499 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1500 tcp_clear_options(&tmp_opt);
1da177e4 1501
1da177e4 1502 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1503 tcp_openreq_init(req, &tmp_opt, skb);
1504
bb5b7c11 1505 ireq = inet_rsk(req);
634fb979
ED
1506 ireq->ir_loc_addr = daddr;
1507 ireq->ir_rmt_addr = saddr;
bb5b7c11 1508 ireq->no_srccheck = inet_sk(sk)->transparent;
5dff747b 1509 ireq->opt = tcp_v4_save_options(skb);
bb5b7c11 1510
284904aa 1511 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1512 goto drop_and_free;
284904aa 1513
172d69e6 1514 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1515 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1516
1517 if (want_cookie) {
1da177e4 1518 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1519 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1520 } else if (!isn) {
1da177e4
LT
1521 /* VJ's idea. We save last timestamp seen
1522 * from the destination in peer table, when entering
1523 * state TIME-WAIT, and check against it before
1524 * accepting new connection request.
1525 *
1526 * If "isn" is not zero, this request hit alive
1527 * timewait bucket, so that all the necessary checks
1528 * are made in the function processing timewait state.
1529 */
1530 if (tmp_opt.saw_tstamp &&
295ff7ed 1531 tcp_death_row.sysctl_tw_recycle &&
ba3f7f04 1532 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
81166dd6
DM
1533 fl4.daddr == saddr) {
1534 if (!tcp_peer_is_proven(req, dst, true)) {
de0744af 1535 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1536 goto drop_and_release;
1da177e4
LT
1537 }
1538 }
1539 /* Kill the following clause, if you dislike this way. */
1540 else if (!sysctl_tcp_syncookies &&
463c84b9 1541 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1542 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1543 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1544 /* Without syncookies last quarter of
1545 * backlog is filled with destinations,
1546 * proven to be alive.
1547 * It means that we continue to communicate
1548 * to destinations, already remembered
1549 * to the moment of synflood.
1550 */
afd46503 1551 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1552 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1553 goto drop_and_release;
1da177e4
LT
1554 }
1555
a94f723d 1556 isn = tcp_v4_init_sequence(skb);
1da177e4 1557 }
2e6599cb 1558 tcp_rsk(req)->snt_isn = isn;
1da177e4 1559
168a8f58
JC
1560 if (dst == NULL) {
1561 dst = inet_csk_route_req(sk, &fl4, req);
1562 if (dst == NULL)
1563 goto drop_and_free;
1564 }
1565 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1566
1567 /* We don't call tcp_v4_send_synack() directly because we need
1568 * to make sure a child socket can be created successfully before
1569 * sending back synack!
1570 *
1571 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1572 * (or better yet, call tcp_send_synack() in the child context
1573 * directly, but will have to fix bunch of other code first)
1574 * after syn_recv_sock() except one will need to first fix the
1575 * latter to remove its dependency on the current implementation
1576 * of tcp_v4_send_synack()->tcp_select_initial_window().
1577 */
1578 skb_synack = tcp_make_synack(sk, dst, req,
168a8f58
JC
1579 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1580
1581 if (skb_synack) {
634fb979 1582 __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
168a8f58
JC
1583 skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1584 } else
1585 goto drop_and_free;
1586
1587 if (likely(!do_fastopen)) {
1588 int err;
634fb979
ED
1589 err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
1590 ireq->ir_rmt_addr, ireq->opt);
168a8f58
JC
1591 err = net_xmit_eval(err);
1592 if (err || want_cookie)
1593 goto drop_and_free;
1594
016818d0 1595 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1596 tcp_rsk(req)->listener = NULL;
1597 /* Add the request_sock to the SYN table */
1598 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1599 if (fastopen_cookie_present(&foc) && foc.len != 0)
1600 NET_INC_STATS_BH(sock_net(sk),
1601 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1a2c6181 1602 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
1da177e4
LT
1603 goto drop_and_free;
1604
1da177e4
LT
1605 return 0;
1606
7cd04fa7
DL
1607drop_and_release:
1608 dst_release(dst);
1da177e4 1609drop_and_free:
60236fdd 1610 reqsk_free(req);
1da177e4 1611drop:
848bf15f 1612 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1613 return 0;
1614}
4bc2f18b 1615EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1616
1617
1618/*
1619 * The three way handshake has completed - we got a valid synack -
1620 * now create the new socket.
1621 */
1622struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1623 struct request_sock *req,
1da177e4
LT
1624 struct dst_entry *dst)
1625{
2e6599cb 1626 struct inet_request_sock *ireq;
1da177e4
LT
1627 struct inet_sock *newinet;
1628 struct tcp_sock *newtp;
1629 struct sock *newsk;
cfb6eeb4
YH
1630#ifdef CONFIG_TCP_MD5SIG
1631 struct tcp_md5sig_key *key;
1632#endif
f6d8bd05 1633 struct ip_options_rcu *inet_opt;
1da177e4
LT
1634
1635 if (sk_acceptq_is_full(sk))
1636 goto exit_overflow;
1637
1da177e4
LT
1638 newsk = tcp_create_openreq_child(sk, req, skb);
1639 if (!newsk)
093d2823 1640 goto exit_nonewsk;
1da177e4 1641
bcd76111 1642 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1643 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1644
1645 newtp = tcp_sk(newsk);
1646 newinet = inet_sk(newsk);
2e6599cb 1647 ireq = inet_rsk(req);
634fb979
ED
1648 newinet->inet_daddr = ireq->ir_rmt_addr;
1649 newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1650 newinet->inet_saddr = ireq->ir_loc_addr;
f6d8bd05
ED
1651 inet_opt = ireq->opt;
1652 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1653 ireq->opt = NULL;
463c84b9 1654 newinet->mc_index = inet_iif(skb);
eddc9ec5 1655 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1656 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1657 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1658 if (inet_opt)
1659 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1660 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1661
dfd25fff
ED
1662 if (!dst) {
1663 dst = inet_csk_route_child_sock(sk, newsk, req);
1664 if (!dst)
1665 goto put_and_exit;
1666 } else {
1667 /* syncookie case : see end of cookie_v4_check() */
1668 }
0e734419
DM
1669 sk_setup_caps(newsk, dst);
1670
1da177e4 1671 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1672 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1673 if (tcp_sk(sk)->rx_opt.user_mss &&
1674 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1675 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1676
1da177e4
LT
1677 tcp_initialize_rcv_mss(newsk);
1678
cfb6eeb4
YH
1679#ifdef CONFIG_TCP_MD5SIG
1680 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1681 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1682 AF_INET);
c720c7e8 1683 if (key != NULL) {
cfb6eeb4
YH
1684 /*
1685 * We're using one, so create a matching key
1686 * on the newsk structure. If we fail to get
1687 * memory, then we end up not copying the key
1688 * across. Shucks.
1689 */
a915da9b
ED
1690 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1691 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1692 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1693 }
1694#endif
1695
0e734419
DM
1696 if (__inet_inherit_port(sk, newsk) < 0)
1697 goto put_and_exit;
9327f705 1698 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1699
1700 return newsk;
1701
1702exit_overflow:
de0744af 1703 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1704exit_nonewsk:
1705 dst_release(dst);
1da177e4 1706exit:
de0744af 1707 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1708 return NULL;
0e734419 1709put_and_exit:
e337e24d
CP
1710 inet_csk_prepare_forced_close(newsk);
1711 tcp_done(newsk);
0e734419 1712 goto exit;
1da177e4 1713}
4bc2f18b 1714EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1715
1716static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1717{
aa8223c7 1718 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1719 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1720 struct sock *nsk;
60236fdd 1721 struct request_sock **prev;
1da177e4 1722 /* Find possible connection requests. */
463c84b9
ACM
1723 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1724 iph->saddr, iph->daddr);
1da177e4 1725 if (req)
8336886f 1726 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1727
3b1e0a65 1728 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1729 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1730
1731 if (nsk) {
1732 if (nsk->sk_state != TCP_TIME_WAIT) {
1733 bh_lock_sock(nsk);
1734 return nsk;
1735 }
9469c7b4 1736 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1737 return NULL;
1738 }
1739
1740#ifdef CONFIG_SYN_COOKIES
af9b4738 1741 if (!th->syn)
1da177e4
LT
1742 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1743#endif
1744 return sk;
1745}
1746
b51655b9 1747static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1748{
eddc9ec5
ACM
1749 const struct iphdr *iph = ip_hdr(skb);
1750
84fa7933 1751 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1752 if (!tcp_v4_check(skb->len, iph->saddr,
1753 iph->daddr, skb->csum)) {
fb286bb2 1754 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1755 return 0;
fb286bb2 1756 }
1da177e4 1757 }
fb286bb2 1758
eddc9ec5 1759 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1760 skb->len, IPPROTO_TCP, 0);
1761
1da177e4 1762 if (skb->len <= 76) {
fb286bb2 1763 return __skb_checksum_complete(skb);
1da177e4
LT
1764 }
1765 return 0;
1766}
1767
1768
1769/* The socket must have it's spinlock held when we get
1770 * here.
1771 *
1772 * We have a potential double-lock case here, so even when
1773 * doing backlog processing we use the BH locking scheme.
1774 * This is because we cannot sleep with the original spinlock
1775 * held.
1776 */
1777int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1778{
cfb6eeb4
YH
1779 struct sock *rsk;
1780#ifdef CONFIG_TCP_MD5SIG
1781 /*
1782 * We really want to reject the packet as early as possible
1783 * if:
1784 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1785 * o There is an MD5 option and we're not expecting one
1786 */
7174259e 1787 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1788 goto discard;
1789#endif
1790
1da177e4 1791 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1792 struct dst_entry *dst = sk->sk_rx_dst;
1793
bdeab991 1794 sock_rps_save_rxhash(sk, skb);
404e0a8b 1795 if (dst) {
505fbcf0
ED
1796 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1797 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1798 dst_release(dst);
1799 sk->sk_rx_dst = NULL;
1800 }
1801 }
c995ae22 1802 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1da177e4
LT
1803 return 0;
1804 }
1805
ab6a5bb6 1806 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1807 goto csum_err;
1808
1809 if (sk->sk_state == TCP_LISTEN) {
1810 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1811 if (!nsk)
1812 goto discard;
1813
1814 if (nsk != sk) {
bdeab991 1815 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1816 if (tcp_child_process(sk, nsk, skb)) {
1817 rsk = nsk;
1da177e4 1818 goto reset;
cfb6eeb4 1819 }
1da177e4
LT
1820 return 0;
1821 }
ca55158c 1822 } else
bdeab991 1823 sock_rps_save_rxhash(sk, skb);
ca55158c 1824
aa8223c7 1825 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1826 rsk = sk;
1da177e4 1827 goto reset;
cfb6eeb4 1828 }
1da177e4
LT
1829 return 0;
1830
1831reset:
cfb6eeb4 1832 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1833discard:
1834 kfree_skb(skb);
1835 /* Be careful here. If this function gets more complicated and
1836 * gcc suffers from register pressure on the x86, sk (in %ebx)
1837 * might be destroyed here. This current version compiles correctly,
1838 * but you have been warned.
1839 */
1840 return 0;
1841
1842csum_err:
6a5dc9e5 1843 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1844 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1845 goto discard;
1846}
4bc2f18b 1847EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1848
160eb5a6 1849void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1850{
41063e9d
DM
1851 const struct iphdr *iph;
1852 const struct tcphdr *th;
1853 struct sock *sk;
41063e9d 1854
41063e9d 1855 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1856 return;
41063e9d 1857
45f00f99 1858 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1859 return;
41063e9d
DM
1860
1861 iph = ip_hdr(skb);
45f00f99 1862 th = tcp_hdr(skb);
41063e9d
DM
1863
1864 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1865 return;
41063e9d 1866
45f00f99 1867 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1868 iph->saddr, th->source,
7011d085 1869 iph->daddr, ntohs(th->dest),
9cb429d6 1870 skb->skb_iif);
41063e9d
DM
1871 if (sk) {
1872 skb->sk = sk;
1873 skb->destructor = sock_edemux;
1874 if (sk->sk_state != TCP_TIME_WAIT) {
1875 struct dst_entry *dst = sk->sk_rx_dst;
505fbcf0 1876
41063e9d
DM
1877 if (dst)
1878 dst = dst_check(dst, 0);
92101b3b 1879 if (dst &&
505fbcf0 1880 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1881 skb_dst_set_noref(skb, dst);
41063e9d
DM
1882 }
1883 }
41063e9d
DM
1884}
1885
b2fb4f54
ED
1886/* Packet is added to VJ-style prequeue for processing in process
1887 * context, if a reader task is waiting. Apparently, this exciting
1888 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1889 * failed somewhere. Latency? Burstiness? Well, at least now we will
1890 * see, why it failed. 8)8) --ANK
1891 *
1892 */
1893bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1894{
1895 struct tcp_sock *tp = tcp_sk(sk);
1896
1897 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1898 return false;
1899
1900 if (skb->len <= tcp_hdrlen(skb) &&
1901 skb_queue_len(&tp->ucopy.prequeue) == 0)
1902 return false;
1903
58717686 1904 skb_dst_force(skb);
b2fb4f54
ED
1905 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1906 tp->ucopy.memory += skb->truesize;
1907 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1908 struct sk_buff *skb1;
1909
1910 BUG_ON(sock_owned_by_user(sk));
1911
1912 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1913 sk_backlog_rcv(sk, skb1);
1914 NET_INC_STATS_BH(sock_net(sk),
1915 LINUX_MIB_TCPPREQUEUEDROPPED);
1916 }
1917
1918 tp->ucopy.memory = 0;
1919 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1920 wake_up_interruptible_sync_poll(sk_sleep(sk),
1921 POLLIN | POLLRDNORM | POLLRDBAND);
1922 if (!inet_csk_ack_scheduled(sk))
1923 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1924 (3 * tcp_rto_min(sk)) / 4,
1925 TCP_RTO_MAX);
1926 }
1927 return true;
1928}
1929EXPORT_SYMBOL(tcp_prequeue);
1930
1da177e4
LT
1931/*
1932 * From tcp_input.c
1933 */
1934
1935int tcp_v4_rcv(struct sk_buff *skb)
1936{
eddc9ec5 1937 const struct iphdr *iph;
cf533ea5 1938 const struct tcphdr *th;
1da177e4
LT
1939 struct sock *sk;
1940 int ret;
a86b1e30 1941 struct net *net = dev_net(skb->dev);
1da177e4
LT
1942
1943 if (skb->pkt_type != PACKET_HOST)
1944 goto discard_it;
1945
1946 /* Count it even if it's bad */
63231bdd 1947 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1948
1949 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1950 goto discard_it;
1951
aa8223c7 1952 th = tcp_hdr(skb);
1da177e4
LT
1953
1954 if (th->doff < sizeof(struct tcphdr) / 4)
1955 goto bad_packet;
1956 if (!pskb_may_pull(skb, th->doff * 4))
1957 goto discard_it;
1958
1959 /* An explanation is required here, I think.
1960 * Packet length and doff are validated by header prediction,
caa20d9a 1961 * provided case of th->doff==0 is eliminated.
1da177e4 1962 * So, we defer the checks. */
60476372 1963 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
6a5dc9e5 1964 goto csum_error;
1da177e4 1965
aa8223c7 1966 th = tcp_hdr(skb);
eddc9ec5 1967 iph = ip_hdr(skb);
1da177e4
LT
1968 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1969 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1970 skb->len - th->doff * 4);
1971 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1972 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1973 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1974 TCP_SKB_CB(skb)->sacked = 0;
1975
9a1f27c4 1976 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1977 if (!sk)
1978 goto no_tcp_socket;
1979
bb134d5d
ED
1980process:
1981 if (sk->sk_state == TCP_TIME_WAIT)
1982 goto do_time_wait;
1983
6cce09f8
ED
1984 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1985 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1986 goto discard_and_relse;
6cce09f8 1987 }
d218d111 1988
1da177e4
LT
1989 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1990 goto discard_and_relse;
b59c2701 1991 nf_reset(skb);
1da177e4 1992
fda9ef5d 1993 if (sk_filter(sk, skb))
1da177e4
LT
1994 goto discard_and_relse;
1995
8b80cda5 1996 sk_mark_napi_id(sk, skb);
1da177e4
LT
1997 skb->dev = NULL;
1998
c6366184 1999 bh_lock_sock_nested(sk);
1da177e4
LT
2000 ret = 0;
2001 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
2002#ifdef CONFIG_NET_DMA
2003 struct tcp_sock *tp = tcp_sk(sk);
2004 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 2005 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 2006 if (tp->ucopy.dma_chan)
1da177e4 2007 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
2008 else
2009#endif
2010 {
2011 if (!tcp_prequeue(sk, skb))
ae8d7f88 2012 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 2013 }
da882c1f
ED
2014 } else if (unlikely(sk_add_backlog(sk, skb,
2015 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 2016 bh_unlock_sock(sk);
6cce09f8 2017 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
2018 goto discard_and_relse;
2019 }
1da177e4
LT
2020 bh_unlock_sock(sk);
2021
2022 sock_put(sk);
2023
2024 return ret;
2025
2026no_tcp_socket:
2027 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
2028 goto discard_it;
2029
2030 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
2031csum_error:
2032 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 2033bad_packet:
63231bdd 2034 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 2035 } else {
cfb6eeb4 2036 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
2037 }
2038
2039discard_it:
2040 /* Discard frame. */
2041 kfree_skb(skb);
e905a9ed 2042 return 0;
1da177e4
LT
2043
2044discard_and_relse:
2045 sock_put(sk);
2046 goto discard_it;
2047
2048do_time_wait:
2049 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 2050 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
2051 goto discard_it;
2052 }
2053
6a5dc9e5 2054 if (skb->len < (th->doff << 2)) {
9469c7b4 2055 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
2056 goto bad_packet;
2057 }
2058 if (tcp_checksum_complete(skb)) {
2059 inet_twsk_put(inet_twsk(sk));
2060 goto csum_error;
1da177e4 2061 }
9469c7b4 2062 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 2063 case TCP_TW_SYN: {
c346dca1 2064 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 2065 &tcp_hashinfo,
da5e3630 2066 iph->saddr, th->source,
eddc9ec5 2067 iph->daddr, th->dest,
463c84b9 2068 inet_iif(skb));
1da177e4 2069 if (sk2) {
9469c7b4
YH
2070 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
2071 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
2072 sk = sk2;
2073 goto process;
2074 }
2075 /* Fall through to ACK */
2076 }
2077 case TCP_TW_ACK:
2078 tcp_v4_timewait_ack(sk, skb);
2079 break;
2080 case TCP_TW_RST:
2081 goto no_tcp_socket;
2082 case TCP_TW_SUCCESS:;
2083 }
2084 goto discard_it;
2085}
2086
ccb7c410
DM
2087static struct timewait_sock_ops tcp_timewait_sock_ops = {
2088 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2089 .twsk_unique = tcp_twsk_unique,
2090 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 2091};
1da177e4 2092
63d02d15 2093void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
2094{
2095 struct dst_entry *dst = skb_dst(skb);
2096
2097 dst_hold(dst);
2098 sk->sk_rx_dst = dst;
2099 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2100}
63d02d15 2101EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 2102
3b401a81 2103const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
2104 .queue_xmit = ip_queue_xmit,
2105 .send_check = tcp_v4_send_check,
2106 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 2107 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
2108 .conn_request = tcp_v4_conn_request,
2109 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
2110 .net_header_len = sizeof(struct iphdr),
2111 .setsockopt = ip_setsockopt,
2112 .getsockopt = ip_getsockopt,
2113 .addr2sockaddr = inet_csk_addr2sockaddr,
2114 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 2115 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 2116#ifdef CONFIG_COMPAT
543d9cfe
ACM
2117 .compat_setsockopt = compat_ip_setsockopt,
2118 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 2119#endif
1da177e4 2120};
4bc2f18b 2121EXPORT_SYMBOL(ipv4_specific);
1da177e4 2122
cfb6eeb4 2123#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 2124static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 2125 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 2126 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 2127 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 2128};
b6332e6c 2129#endif
cfb6eeb4 2130
1da177e4
LT
2131/* NOTE: A lot of things set to zero explicitly by call to
2132 * sk_alloc() so need not be done here.
2133 */
2134static int tcp_v4_init_sock(struct sock *sk)
2135{
6687e988 2136 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 2137
900f65d3 2138 tcp_init_sock(sk);
1da177e4 2139
8292a17a 2140 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 2141
cfb6eeb4 2142#ifdef CONFIG_TCP_MD5SIG
ac807fa8 2143 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 2144#endif
1da177e4 2145
1da177e4
LT
2146 return 0;
2147}
2148
7d06b2e0 2149void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
2150{
2151 struct tcp_sock *tp = tcp_sk(sk);
2152
2153 tcp_clear_xmit_timers(sk);
2154
6687e988 2155 tcp_cleanup_congestion_control(sk);
317a76f9 2156
1da177e4 2157 /* Cleanup up the write buffer. */
fe067e8a 2158 tcp_write_queue_purge(sk);
1da177e4
LT
2159
2160 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 2161 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 2162
cfb6eeb4
YH
2163#ifdef CONFIG_TCP_MD5SIG
2164 /* Clean up the MD5 key list, if any */
2165 if (tp->md5sig_info) {
a915da9b 2166 tcp_clear_md5_list(sk);
a8afca03 2167 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
2168 tp->md5sig_info = NULL;
2169 }
2170#endif
2171
1a2449a8
CL
2172#ifdef CONFIG_NET_DMA
2173 /* Cleans up our sk_async_wait_queue */
e905a9ed 2174 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
2175#endif
2176
1da177e4
LT
2177 /* Clean prequeue, it must be empty really */
2178 __skb_queue_purge(&tp->ucopy.prequeue);
2179
2180 /* Clean up a referenced TCP bind bucket. */
463c84b9 2181 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 2182 inet_put_port(sk);
1da177e4 2183
168a8f58 2184 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 2185
cf60af03
YC
2186 /* If socket is aborted during connect operation */
2187 tcp_free_fastopen_req(tp);
2188
180d8cd9 2189 sk_sockets_allocated_dec(sk);
d1a4c0b3 2190 sock_release_memcg(sk);
1da177e4 2191}
1da177e4
LT
2192EXPORT_SYMBOL(tcp_v4_destroy_sock);
2193
2194#ifdef CONFIG_PROC_FS
2195/* Proc filesystem TCP sock list dumping. */
2196
a8b690f9
TH
2197/*
2198 * Get next listener socket follow cur. If cur is NULL, get first socket
2199 * starting from bucket given in st->bucket; when st->bucket is zero the
2200 * very first socket in the hash table is returned.
2201 */
1da177e4
LT
2202static void *listening_get_next(struct seq_file *seq, void *cur)
2203{
463c84b9 2204 struct inet_connection_sock *icsk;
c25eb3bf 2205 struct hlist_nulls_node *node;
1da177e4 2206 struct sock *sk = cur;
5caea4ea 2207 struct inet_listen_hashbucket *ilb;
5799de0b 2208 struct tcp_iter_state *st = seq->private;
a4146b1b 2209 struct net *net = seq_file_net(seq);
1da177e4
LT
2210
2211 if (!sk) {
a8b690f9 2212 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2213 spin_lock_bh(&ilb->lock);
c25eb3bf 2214 sk = sk_nulls_head(&ilb->head);
a8b690f9 2215 st->offset = 0;
1da177e4
LT
2216 goto get_sk;
2217 }
5caea4ea 2218 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2219 ++st->num;
a8b690f9 2220 ++st->offset;
1da177e4
LT
2221
2222 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2223 struct request_sock *req = cur;
1da177e4 2224
72a3effa 2225 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2226 req = req->dl_next;
2227 while (1) {
2228 while (req) {
bdccc4ca 2229 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2230 cur = req;
2231 goto out;
2232 }
2233 req = req->dl_next;
2234 }
72a3effa 2235 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2236 break;
2237get_req:
463c84b9 2238 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2239 }
1bde5ac4 2240 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2241 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2242 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2243 } else {
e905a9ed 2244 icsk = inet_csk(sk);
463c84b9
ACM
2245 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2246 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2247 goto start_req;
463c84b9 2248 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2249 sk = sk_nulls_next(sk);
1da177e4
LT
2250 }
2251get_sk:
c25eb3bf 2252 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2253 if (!net_eq(sock_net(sk), net))
2254 continue;
2255 if (sk->sk_family == st->family) {
1da177e4
LT
2256 cur = sk;
2257 goto out;
2258 }
e905a9ed 2259 icsk = inet_csk(sk);
463c84b9
ACM
2260 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2261 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2262start_req:
2263 st->uid = sock_i_uid(sk);
2264 st->syn_wait_sk = sk;
2265 st->state = TCP_SEQ_STATE_OPENREQ;
2266 st->sbucket = 0;
2267 goto get_req;
2268 }
463c84b9 2269 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2270 }
5caea4ea 2271 spin_unlock_bh(&ilb->lock);
a8b690f9 2272 st->offset = 0;
0f7ff927 2273 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2274 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2275 spin_lock_bh(&ilb->lock);
c25eb3bf 2276 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2277 goto get_sk;
2278 }
2279 cur = NULL;
2280out:
2281 return cur;
2282}
2283
2284static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2285{
a8b690f9
TH
2286 struct tcp_iter_state *st = seq->private;
2287 void *rc;
2288
2289 st->bucket = 0;
2290 st->offset = 0;
2291 rc = listening_get_next(seq, NULL);
1da177e4
LT
2292
2293 while (rc && *pos) {
2294 rc = listening_get_next(seq, rc);
2295 --*pos;
2296 }
2297 return rc;
2298}
2299
05dbc7b5 2300static inline bool empty_bucket(const struct tcp_iter_state *st)
6eac5604 2301{
05dbc7b5 2302 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
6eac5604
AK
2303}
2304
a8b690f9
TH
2305/*
2306 * Get first established socket starting from bucket given in st->bucket.
2307 * If st->bucket is zero, the very first socket in the hash is returned.
2308 */
1da177e4
LT
2309static void *established_get_first(struct seq_file *seq)
2310{
5799de0b 2311 struct tcp_iter_state *st = seq->private;
a4146b1b 2312 struct net *net = seq_file_net(seq);
1da177e4
LT
2313 void *rc = NULL;
2314
a8b690f9
TH
2315 st->offset = 0;
2316 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2317 struct sock *sk;
3ab5aee7 2318 struct hlist_nulls_node *node;
9db66bdc 2319 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2320
6eac5604
AK
2321 /* Lockless fast path for the common case of empty buckets */
2322 if (empty_bucket(st))
2323 continue;
2324
9db66bdc 2325 spin_lock_bh(lock);
3ab5aee7 2326 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2327 if (sk->sk_family != st->family ||
878628fb 2328 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2329 continue;
2330 }
2331 rc = sk;
2332 goto out;
2333 }
9db66bdc 2334 spin_unlock_bh(lock);
1da177e4
LT
2335 }
2336out:
2337 return rc;
2338}
2339
2340static void *established_get_next(struct seq_file *seq, void *cur)
2341{
2342 struct sock *sk = cur;
3ab5aee7 2343 struct hlist_nulls_node *node;
5799de0b 2344 struct tcp_iter_state *st = seq->private;
a4146b1b 2345 struct net *net = seq_file_net(seq);
1da177e4
LT
2346
2347 ++st->num;
a8b690f9 2348 ++st->offset;
1da177e4 2349
05dbc7b5 2350 sk = sk_nulls_next(sk);
1da177e4 2351
3ab5aee7 2352 sk_nulls_for_each_from(sk, node) {
878628fb 2353 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
05dbc7b5 2354 return sk;
1da177e4
LT
2355 }
2356
05dbc7b5
ED
2357 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2358 ++st->bucket;
2359 return established_get_first(seq);
1da177e4
LT
2360}
2361
2362static void *established_get_idx(struct seq_file *seq, loff_t pos)
2363{
a8b690f9
TH
2364 struct tcp_iter_state *st = seq->private;
2365 void *rc;
2366
2367 st->bucket = 0;
2368 rc = established_get_first(seq);
1da177e4
LT
2369
2370 while (rc && pos) {
2371 rc = established_get_next(seq, rc);
2372 --pos;
7174259e 2373 }
1da177e4
LT
2374 return rc;
2375}
2376
2377static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2378{
2379 void *rc;
5799de0b 2380 struct tcp_iter_state *st = seq->private;
1da177e4 2381
1da177e4
LT
2382 st->state = TCP_SEQ_STATE_LISTENING;
2383 rc = listening_get_idx(seq, &pos);
2384
2385 if (!rc) {
1da177e4
LT
2386 st->state = TCP_SEQ_STATE_ESTABLISHED;
2387 rc = established_get_idx(seq, pos);
2388 }
2389
2390 return rc;
2391}
2392
a8b690f9
TH
2393static void *tcp_seek_last_pos(struct seq_file *seq)
2394{
2395 struct tcp_iter_state *st = seq->private;
2396 int offset = st->offset;
2397 int orig_num = st->num;
2398 void *rc = NULL;
2399
2400 switch (st->state) {
2401 case TCP_SEQ_STATE_OPENREQ:
2402 case TCP_SEQ_STATE_LISTENING:
2403 if (st->bucket >= INET_LHTABLE_SIZE)
2404 break;
2405 st->state = TCP_SEQ_STATE_LISTENING;
2406 rc = listening_get_next(seq, NULL);
2407 while (offset-- && rc)
2408 rc = listening_get_next(seq, rc);
2409 if (rc)
2410 break;
2411 st->bucket = 0;
05dbc7b5 2412 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2413 /* Fallthrough */
2414 case TCP_SEQ_STATE_ESTABLISHED:
a8b690f9
TH
2415 if (st->bucket > tcp_hashinfo.ehash_mask)
2416 break;
2417 rc = established_get_first(seq);
2418 while (offset-- && rc)
2419 rc = established_get_next(seq, rc);
2420 }
2421
2422 st->num = orig_num;
2423
2424 return rc;
2425}
2426
1da177e4
LT
2427static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2428{
5799de0b 2429 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2430 void *rc;
2431
2432 if (*pos && *pos == st->last_pos) {
2433 rc = tcp_seek_last_pos(seq);
2434 if (rc)
2435 goto out;
2436 }
2437
1da177e4
LT
2438 st->state = TCP_SEQ_STATE_LISTENING;
2439 st->num = 0;
a8b690f9
TH
2440 st->bucket = 0;
2441 st->offset = 0;
2442 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2443
2444out:
2445 st->last_pos = *pos;
2446 return rc;
1da177e4
LT
2447}
2448
2449static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2450{
a8b690f9 2451 struct tcp_iter_state *st = seq->private;
1da177e4 2452 void *rc = NULL;
1da177e4
LT
2453
2454 if (v == SEQ_START_TOKEN) {
2455 rc = tcp_get_idx(seq, 0);
2456 goto out;
2457 }
1da177e4
LT
2458
2459 switch (st->state) {
2460 case TCP_SEQ_STATE_OPENREQ:
2461 case TCP_SEQ_STATE_LISTENING:
2462 rc = listening_get_next(seq, v);
2463 if (!rc) {
1da177e4 2464 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2465 st->bucket = 0;
2466 st->offset = 0;
1da177e4
LT
2467 rc = established_get_first(seq);
2468 }
2469 break;
2470 case TCP_SEQ_STATE_ESTABLISHED:
1da177e4
LT
2471 rc = established_get_next(seq, v);
2472 break;
2473 }
2474out:
2475 ++*pos;
a8b690f9 2476 st->last_pos = *pos;
1da177e4
LT
2477 return rc;
2478}
2479
2480static void tcp_seq_stop(struct seq_file *seq, void *v)
2481{
5799de0b 2482 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2483
2484 switch (st->state) {
2485 case TCP_SEQ_STATE_OPENREQ:
2486 if (v) {
463c84b9
ACM
2487 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2488 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2489 }
2490 case TCP_SEQ_STATE_LISTENING:
2491 if (v != SEQ_START_TOKEN)
5caea4ea 2492 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4 2493 break;
1da177e4
LT
2494 case TCP_SEQ_STATE_ESTABLISHED:
2495 if (v)
9db66bdc 2496 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2497 break;
2498 }
2499}
2500
73cb88ec 2501int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2502{
d9dda78b 2503 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2504 struct tcp_iter_state *s;
52d6f3f1 2505 int err;
1da177e4 2506
52d6f3f1
DL
2507 err = seq_open_net(inode, file, &afinfo->seq_ops,
2508 sizeof(struct tcp_iter_state));
2509 if (err < 0)
2510 return err;
f40c8174 2511
52d6f3f1 2512 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2513 s->family = afinfo->family;
a8b690f9 2514 s->last_pos = 0;
f40c8174
DL
2515 return 0;
2516}
73cb88ec 2517EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2518
6f8b13bc 2519int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2520{
2521 int rc = 0;
2522 struct proc_dir_entry *p;
2523
9427c4b3
DL
2524 afinfo->seq_ops.start = tcp_seq_start;
2525 afinfo->seq_ops.next = tcp_seq_next;
2526 afinfo->seq_ops.stop = tcp_seq_stop;
2527
84841c3c 2528 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2529 afinfo->seq_fops, afinfo);
84841c3c 2530 if (!p)
1da177e4
LT
2531 rc = -ENOMEM;
2532 return rc;
2533}
4bc2f18b 2534EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2535
6f8b13bc 2536void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2537{
ece31ffd 2538 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2539}
4bc2f18b 2540EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2541
cf533ea5 2542static void get_openreq4(const struct sock *sk, const struct request_sock *req,
652586df 2543 struct seq_file *f, int i, kuid_t uid)
1da177e4 2544{
2e6599cb 2545 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2546 long delta = req->expires - jiffies;
1da177e4 2547
5e659e4c 2548 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2549 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
1da177e4 2550 i,
634fb979 2551 ireq->ir_loc_addr,
c720c7e8 2552 ntohs(inet_sk(sk)->inet_sport),
634fb979
ED
2553 ireq->ir_rmt_addr,
2554 ntohs(ireq->ir_rmt_port),
1da177e4
LT
2555 TCP_SYN_RECV,
2556 0, 0, /* could print option size, but that is af dependent. */
2557 1, /* timers active (only the expire timer) */
a399a805 2558 jiffies_delta_to_clock_t(delta),
e6c022a4 2559 req->num_timeout,
a7cb5a49 2560 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2561 0, /* non standard timer */
2562 0, /* open_requests have no inode */
2563 atomic_read(&sk->sk_refcnt),
652586df 2564 req);
1da177e4
LT
2565}
2566
652586df 2567static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
1da177e4
LT
2568{
2569 int timer_active;
2570 unsigned long timer_expires;
cf533ea5 2571 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2572 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2573 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2574 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2575 __be32 dest = inet->inet_daddr;
2576 __be32 src = inet->inet_rcv_saddr;
2577 __u16 destp = ntohs(inet->inet_dport);
2578 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2579 int rx_queue;
1da177e4 2580
6ba8a3b1
ND
2581 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2582 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2583 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2584 timer_active = 1;
463c84b9
ACM
2585 timer_expires = icsk->icsk_timeout;
2586 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2587 timer_active = 4;
463c84b9 2588 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2589 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2590 timer_active = 2;
cf4c6bf8 2591 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2592 } else {
2593 timer_active = 0;
2594 timer_expires = jiffies;
2595 }
2596
49d09007
ED
2597 if (sk->sk_state == TCP_LISTEN)
2598 rx_queue = sk->sk_ack_backlog;
2599 else
2600 /*
2601 * because we dont lock socket, we might find a transient negative value
2602 */
2603 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2604
5e659e4c 2605 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
652586df 2606 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
cf4c6bf8 2607 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2608 tp->write_seq - tp->snd_una,
49d09007 2609 rx_queue,
1da177e4 2610 timer_active,
a399a805 2611 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2612 icsk->icsk_retransmits,
a7cb5a49 2613 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2614 icsk->icsk_probes_out,
cf4c6bf8
IJ
2615 sock_i_ino(sk),
2616 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2617 jiffies_to_clock_t(icsk->icsk_rto),
2618 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2619 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2620 tp->snd_cwnd,
168a8f58
JC
2621 sk->sk_state == TCP_LISTEN ?
2622 (fastopenq ? fastopenq->max_qlen : 0) :
652586df 2623 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
1da177e4
LT
2624}
2625
cf533ea5 2626static void get_timewait4_sock(const struct inet_timewait_sock *tw,
652586df 2627 struct seq_file *f, int i)
1da177e4 2628{
23f33c2d 2629 __be32 dest, src;
1da177e4 2630 __u16 destp, srcp;
a399a805 2631 long delta = tw->tw_ttd - jiffies;
1da177e4
LT
2632
2633 dest = tw->tw_daddr;
2634 src = tw->tw_rcv_saddr;
2635 destp = ntohs(tw->tw_dport);
2636 srcp = ntohs(tw->tw_sport);
2637
5e659e4c 2638 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2639 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
1da177e4 2640 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2641 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
652586df 2642 atomic_read(&tw->tw_refcnt), tw);
1da177e4
LT
2643}
2644
2645#define TMPSZ 150
2646
2647static int tcp4_seq_show(struct seq_file *seq, void *v)
2648{
5799de0b 2649 struct tcp_iter_state *st;
05dbc7b5 2650 struct sock *sk = v;
1da177e4 2651
652586df 2652 seq_setwidth(seq, TMPSZ - 1);
1da177e4 2653 if (v == SEQ_START_TOKEN) {
652586df 2654 seq_puts(seq, " sl local_address rem_address st tx_queue "
1da177e4
LT
2655 "rx_queue tr tm->when retrnsmt uid timeout "
2656 "inode");
2657 goto out;
2658 }
2659 st = seq->private;
2660
2661 switch (st->state) {
2662 case TCP_SEQ_STATE_LISTENING:
2663 case TCP_SEQ_STATE_ESTABLISHED:
05dbc7b5 2664 if (sk->sk_state == TCP_TIME_WAIT)
652586df 2665 get_timewait4_sock(v, seq, st->num);
05dbc7b5 2666 else
652586df 2667 get_tcp4_sock(v, seq, st->num);
1da177e4
LT
2668 break;
2669 case TCP_SEQ_STATE_OPENREQ:
652586df 2670 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
1da177e4
LT
2671 break;
2672 }
1da177e4 2673out:
652586df 2674 seq_pad(seq, '\n');
1da177e4
LT
2675 return 0;
2676}
2677
73cb88ec
AV
2678static const struct file_operations tcp_afinfo_seq_fops = {
2679 .owner = THIS_MODULE,
2680 .open = tcp_seq_open,
2681 .read = seq_read,
2682 .llseek = seq_lseek,
2683 .release = seq_release_net
2684};
2685
1da177e4 2686static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2687 .name = "tcp",
2688 .family = AF_INET,
73cb88ec 2689 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2690 .seq_ops = {
2691 .show = tcp4_seq_show,
2692 },
1da177e4
LT
2693};
2694
2c8c1e72 2695static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2696{
2697 return tcp_proc_register(net, &tcp4_seq_afinfo);
2698}
2699
2c8c1e72 2700static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2701{
2702 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2703}
2704
2705static struct pernet_operations tcp4_net_ops = {
2706 .init = tcp4_proc_init_net,
2707 .exit = tcp4_proc_exit_net,
2708};
2709
1da177e4
LT
2710int __init tcp4_proc_init(void)
2711{
757764f6 2712 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2713}
2714
2715void tcp4_proc_exit(void)
2716{
757764f6 2717 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2718}
2719#endif /* CONFIG_PROC_FS */
2720
2721struct proto tcp_prot = {
2722 .name = "TCP",
2723 .owner = THIS_MODULE,
2724 .close = tcp_close,
2725 .connect = tcp_v4_connect,
2726 .disconnect = tcp_disconnect,
463c84b9 2727 .accept = inet_csk_accept,
1da177e4
LT
2728 .ioctl = tcp_ioctl,
2729 .init = tcp_v4_init_sock,
2730 .destroy = tcp_v4_destroy_sock,
2731 .shutdown = tcp_shutdown,
2732 .setsockopt = tcp_setsockopt,
2733 .getsockopt = tcp_getsockopt,
1da177e4 2734 .recvmsg = tcp_recvmsg,
7ba42910
CG
2735 .sendmsg = tcp_sendmsg,
2736 .sendpage = tcp_sendpage,
1da177e4 2737 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2738 .release_cb = tcp_release_cb,
563d34d0 2739 .mtu_reduced = tcp_v4_mtu_reduced,
ab1e0a13
ACM
2740 .hash = inet_hash,
2741 .unhash = inet_unhash,
2742 .get_port = inet_csk_get_port,
1da177e4 2743 .enter_memory_pressure = tcp_enter_memory_pressure,
c9bee3b7 2744 .stream_memory_free = tcp_stream_memory_free,
1da177e4 2745 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2746 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2747 .memory_allocated = &tcp_memory_allocated,
2748 .memory_pressure = &tcp_memory_pressure,
a4fe34bf 2749 .sysctl_mem = sysctl_tcp_mem,
1da177e4
LT
2750 .sysctl_wmem = sysctl_tcp_wmem,
2751 .sysctl_rmem = sysctl_tcp_rmem,
2752 .max_header = MAX_TCP_HEADER,
2753 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2754 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2755 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2756 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2757 .h.hashinfo = &tcp_hashinfo,
7ba42910 2758 .no_autobind = true,
543d9cfe
ACM
2759#ifdef CONFIG_COMPAT
2760 .compat_setsockopt = compat_tcp_setsockopt,
2761 .compat_getsockopt = compat_tcp_getsockopt,
2762#endif
c255a458 2763#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
2764 .init_cgroup = tcp_init_cgroup,
2765 .destroy_cgroup = tcp_destroy_cgroup,
2766 .proto_cgroup = tcp_proto_cgroup,
2767#endif
1da177e4 2768};
4bc2f18b 2769EXPORT_SYMBOL(tcp_prot);
1da177e4 2770
046ee902
DL
2771static int __net_init tcp_sk_init(struct net *net)
2772{
5d134f1c 2773 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 2774 return 0;
046ee902
DL
2775}
2776
2777static void __net_exit tcp_sk_exit(struct net *net)
2778{
b099ce26
EB
2779}
2780
2781static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2782{
2783 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2784}
2785
2786static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2787 .init = tcp_sk_init,
2788 .exit = tcp_sk_exit,
2789 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2790};
2791
9b0f976f 2792void __init tcp_v4_init(void)
1da177e4 2793{
5caea4ea 2794 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2795 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2796 panic("Failed to create the TCP control socket.\n");
1da177e4 2797}