]> git.ipfire.org Git - people/arne_f/kernel.git/blame - net/ipv4/tcp_ipv4.c
Merge branch 'linus' into locking/core, to pick up fixes
[people/arne_f/kernel.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
6e5714ea 75#include <net/secure_seq.h>
076bb0c8 76#include <net/busy_poll.h>
1da177e4
LT
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
6797318e 83#include <linux/inetdevice.h>
1da177e4 84
cf80e0e4 85#include <crypto/hash.h>
cfb6eeb4
YH
86#include <linux/scatterlist.h>
87
ab32ea5d 88int sysctl_tcp_low_latency __read_mostly;
1da177e4 89
cfb6eeb4 90#ifdef CONFIG_TCP_MD5SIG
a915da9b 91static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 92 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
93#endif
94
5caea4ea 95struct inet_hashinfo tcp_hashinfo;
4bc2f18b 96EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 97
84b114b9 98static u32 tcp_v4_init_seq(const struct sk_buff *skb)
1da177e4 99{
84b114b9
ED
100 return secure_tcp_seq(ip_hdr(skb)->daddr,
101 ip_hdr(skb)->saddr,
102 tcp_hdr(skb)->dest,
103 tcp_hdr(skb)->source);
104}
105
5d2ed052 106static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
84b114b9 107{
5d2ed052 108 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
1da177e4
LT
109}
110
6d6ee43e
ACM
111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
112{
113 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
114 struct tcp_sock *tp = tcp_sk(sk);
115
116 /* With PAWS, it is safe from the viewpoint
117 of data integrity. Even without PAWS it is safe provided sequence
118 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
119
120 Actually, the idea is close to VJ's one, only timestamp cache is
121 held not per host, but per port pair and TW bucket is used as state
122 holder.
123
124 If TW bucket has been already destroyed we fall back to VJ's scheme
125 and use initial timestamp retrieved from peer table.
126 */
127 if (tcptw->tw_ts_recent_stamp &&
56ab6b93 128 (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
9d729f72 129 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
130 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
131 if (tp->write_seq == 0)
132 tp->write_seq = 1;
133 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
134 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
135 sock_hold(sktw);
136 return 1;
137 }
138
139 return 0;
140}
6d6ee43e
ACM
141EXPORT_SYMBOL_GPL(tcp_twsk_unique);
142
1da177e4
LT
143/* This will initiate an outgoing connection. */
144int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
145{
2d7192d6 146 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
147 struct inet_sock *inet = inet_sk(sk);
148 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 149 __be16 orig_sport, orig_dport;
bada8adc 150 __be32 daddr, nexthop;
da905bd1 151 struct flowi4 *fl4;
2d7192d6 152 struct rtable *rt;
1da177e4 153 int err;
f6d8bd05 154 struct ip_options_rcu *inet_opt;
1946e672 155 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
1da177e4
LT
156
157 if (addr_len < sizeof(struct sockaddr_in))
158 return -EINVAL;
159
160 if (usin->sin_family != AF_INET)
161 return -EAFNOSUPPORT;
162
163 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05 164 inet_opt = rcu_dereference_protected(inet->inet_opt,
1e1d04e6 165 lockdep_sock_is_held(sk));
f6d8bd05 166 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
167 if (!daddr)
168 return -EINVAL;
f6d8bd05 169 nexthop = inet_opt->opt.faddr;
1da177e4
LT
170 }
171
dca8b089
DM
172 orig_sport = inet->inet_sport;
173 orig_dport = usin->sin_port;
da905bd1
DM
174 fl4 = &inet->cork.fl.u.ip4;
175 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
176 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
177 IPPROTO_TCP,
0e0d44ab 178 orig_sport, orig_dport, sk);
b23dd4fe
DM
179 if (IS_ERR(rt)) {
180 err = PTR_ERR(rt);
181 if (err == -ENETUNREACH)
f1d8cba6 182 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 183 return err;
584bdf8c 184 }
1da177e4
LT
185
186 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
187 ip_rt_put(rt);
188 return -ENETUNREACH;
189 }
190
f6d8bd05 191 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 192 daddr = fl4->daddr;
1da177e4 193
c720c7e8 194 if (!inet->inet_saddr)
da905bd1 195 inet->inet_saddr = fl4->saddr;
d1e559d0 196 sk_rcv_saddr_set(sk, inet->inet_saddr);
1da177e4 197
c720c7e8 198 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
199 /* Reset inherited state */
200 tp->rx_opt.ts_recent = 0;
201 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
202 if (likely(!tp->repair))
203 tp->write_seq = 0;
1da177e4
LT
204 }
205
c720c7e8 206 inet->inet_dport = usin->sin_port;
d1e559d0 207 sk_daddr_set(sk, daddr);
1da177e4 208
d83d8461 209 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
210 if (inet_opt)
211 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 212
bee7ca9e 213 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
214
215 /* Socket identity is still unknown (sport may be zero).
216 * However we set state to SYN-SENT and not releasing socket
217 * lock select source port, enter ourselves into the hash tables and
218 * complete initialization after this.
219 */
220 tcp_set_state(sk, TCP_SYN_SENT);
1946e672 221 err = inet_hash_connect(tcp_death_row, sk);
1da177e4
LT
222 if (err)
223 goto failure;
224
877d1f62 225 sk_set_txhash(sk);
9e7ceb06 226
da905bd1 227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
228 inet->inet_sport, inet->inet_dport, sk);
229 if (IS_ERR(rt)) {
230 err = PTR_ERR(rt);
231 rt = NULL;
1da177e4 232 goto failure;
b23dd4fe 233 }
1da177e4 234 /* OK, now commit destination to socket. */
bcd76111 235 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 236 sk_setup_caps(sk, &rt->dst);
19f6d3f3 237 rt = NULL;
1da177e4 238
00355fa5 239 if (likely(!tp->repair)) {
00355fa5 240 if (!tp->write_seq)
84b114b9
ED
241 tp->write_seq = secure_tcp_seq(inet->inet_saddr,
242 inet->inet_daddr,
243 inet->inet_sport,
244 usin->sin_port);
5d2ed052
ED
245 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
246 inet->inet_saddr,
84b114b9 247 inet->inet_daddr);
00355fa5 248 }
1da177e4 249
c720c7e8 250 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 251
19f6d3f3
WW
252 if (tcp_fastopen_defer_connect(sk, &err))
253 return err;
254 if (err)
255 goto failure;
256
2b916477 257 err = tcp_connect(sk);
ee995283 258
1da177e4
LT
259 if (err)
260 goto failure;
261
262 return 0;
263
264failure:
7174259e
ACM
265 /*
266 * This unhashes the socket and releases the local port,
267 * if necessary.
268 */
1da177e4
LT
269 tcp_set_state(sk, TCP_CLOSE);
270 ip_rt_put(rt);
271 sk->sk_route_caps = 0;
c720c7e8 272 inet->inet_dport = 0;
1da177e4
LT
273 return err;
274}
4bc2f18b 275EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 276
1da177e4 277/*
563d34d0
ED
278 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
279 * It can be called through tcp_release_cb() if socket was owned by user
280 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 281 */
4fab9071 282void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4 283{
1da177e4 284 struct inet_sock *inet = inet_sk(sk);
02b2faaf
ED
285 struct dst_entry *dst;
286 u32 mtu;
1da177e4 287
02b2faaf
ED
288 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
289 return;
290 mtu = tcp_sk(sk)->mtu_info;
80d0a69f
DM
291 dst = inet_csk_update_pmtu(sk, mtu);
292 if (!dst)
1da177e4
LT
293 return;
294
1da177e4
LT
295 /* Something is about to be wrong... Remember soft error
296 * for the case, if this connection will not able to recover.
297 */
298 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
299 sk->sk_err_soft = EMSGSIZE;
300
301 mtu = dst_mtu(dst);
302
303 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
482fc609 304 ip_sk_accept_pmtu(sk) &&
d83d8461 305 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
306 tcp_sync_mss(sk, mtu);
307
308 /* Resend the TCP packet because it's
309 * clear that the old packet has been
310 * dropped. This is the new "fast" path mtu
311 * discovery.
312 */
313 tcp_simple_retransmit(sk);
314 } /* else let the usual retransmit timer handle it */
315}
4fab9071 316EXPORT_SYMBOL(tcp_v4_mtu_reduced);
1da177e4 317
55be7a9c
DM
318static void do_redirect(struct sk_buff *skb, struct sock *sk)
319{
320 struct dst_entry *dst = __sk_dst_check(sk, 0);
321
1ed5c48f 322 if (dst)
6700c270 323 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
324}
325
26e37360
ED
326
327/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
9cf74903 328void tcp_req_err(struct sock *sk, u32 seq, bool abort)
26e37360
ED
329{
330 struct request_sock *req = inet_reqsk(sk);
331 struct net *net = sock_net(sk);
332
333 /* ICMPs are not backlogged, hence we cannot get
334 * an established socket here.
335 */
26e37360 336 if (seq != tcp_rsk(req)->snt_isn) {
02a1d6e7 337 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
9cf74903 338 } else if (abort) {
26e37360
ED
339 /*
340 * Still in SYN_RECV, just remove it silently.
341 * There is no good way to pass the error to the newly
342 * created socket, and POSIX does not want network
343 * errors returned from accept().
344 */
c6973669 345 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
9caad864 346 tcp_listendrop(req->rsk_listener);
26e37360 347 }
ef84d8ce 348 reqsk_put(req);
26e37360
ED
349}
350EXPORT_SYMBOL(tcp_req_err);
351
1da177e4
LT
352/*
353 * This routine is called by the ICMP module when it gets some
354 * sort of error condition. If err < 0 then the socket should
355 * be closed and the error returned to the user. If err > 0
356 * it's just the icmp type << 8 | icmp code. After adjustment
357 * header points to the first 8 bytes of the tcp header. We need
358 * to find the appropriate port.
359 *
360 * The locking strategy used here is very "optimistic". When
361 * someone else accesses the socket the ICMP is just dropped
362 * and for some paths there is no check at all.
363 * A more general error queue to queue errors for later handling
364 * is probably better.
365 *
366 */
367
4d1a2d9e 368void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 369{
b71d1d42 370 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 371 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 372 struct inet_connection_sock *icsk;
1da177e4
LT
373 struct tcp_sock *tp;
374 struct inet_sock *inet;
4d1a2d9e
DL
375 const int type = icmp_hdr(icmp_skb)->type;
376 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 377 struct sock *sk;
f1ecd5d9 378 struct sk_buff *skb;
0a672f74 379 struct request_sock *fastopen;
9a568de4
ED
380 u32 seq, snd_una;
381 s32 remaining;
382 u32 delta_us;
1da177e4 383 int err;
4d1a2d9e 384 struct net *net = dev_net(icmp_skb->dev);
1da177e4 385
26e37360
ED
386 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
387 th->dest, iph->saddr, ntohs(th->source),
388 inet_iif(icmp_skb));
1da177e4 389 if (!sk) {
5d3848bc 390 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
1da177e4
LT
391 return;
392 }
393 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 394 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
395 return;
396 }
26e37360
ED
397 seq = ntohl(th->seq);
398 if (sk->sk_state == TCP_NEW_SYN_RECV)
9cf74903
ED
399 return tcp_req_err(sk, seq,
400 type == ICMP_PARAMETERPROB ||
401 type == ICMP_TIME_EXCEEDED ||
402 (type == ICMP_DEST_UNREACH &&
403 (code == ICMP_NET_UNREACH ||
404 code == ICMP_HOST_UNREACH)));
1da177e4
LT
405
406 bh_lock_sock(sk);
407 /* If too many ICMPs get dropped on busy
408 * servers this needs to be solved differently.
563d34d0
ED
409 * We do take care of PMTU discovery (RFC1191) special case :
410 * we can receive locally generated ICMP messages while socket is held.
1da177e4 411 */
b74aa930
ED
412 if (sock_owned_by_user(sk)) {
413 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
02a1d6e7 414 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
b74aa930 415 }
1da177e4
LT
416 if (sk->sk_state == TCP_CLOSE)
417 goto out;
418
97e3ecd1 419 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
02a1d6e7 420 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
97e3ecd1 421 goto out;
422 }
423
f1ecd5d9 424 icsk = inet_csk(sk);
1da177e4 425 tp = tcp_sk(sk);
0a672f74
YC
426 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
427 fastopen = tp->fastopen_rsk;
428 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
1da177e4 429 if (sk->sk_state != TCP_LISTEN &&
0a672f74 430 !between(seq, snd_una, tp->snd_nxt)) {
02a1d6e7 431 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
432 goto out;
433 }
434
435 switch (type) {
55be7a9c 436 case ICMP_REDIRECT:
45caeaa5
JM
437 if (!sock_owned_by_user(sk))
438 do_redirect(icmp_skb, sk);
55be7a9c 439 goto out;
1da177e4
LT
440 case ICMP_SOURCE_QUENCH:
441 /* Just silently ignore these. */
442 goto out;
443 case ICMP_PARAMETERPROB:
444 err = EPROTO;
445 break;
446 case ICMP_DEST_UNREACH:
447 if (code > NR_ICMP_UNREACH)
448 goto out;
449
450 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
451 /* We are not interested in TCP_LISTEN and open_requests
452 * (SYN-ACKs send out by Linux are always <576bytes so
453 * they should go through unfragmented).
454 */
455 if (sk->sk_state == TCP_LISTEN)
456 goto out;
457
563d34d0 458 tp->mtu_info = info;
144d56e9 459 if (!sock_owned_by_user(sk)) {
563d34d0 460 tcp_v4_mtu_reduced(sk);
144d56e9 461 } else {
7aa5470c 462 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
144d56e9
ED
463 sock_hold(sk);
464 }
1da177e4
LT
465 goto out;
466 }
467
468 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
469 /* check if icmp_skb allows revert of backoff
470 * (see draft-zimmermann-tcp-lcd) */
471 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
472 break;
473 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
0a672f74 474 !icsk->icsk_backoff || fastopen)
f1ecd5d9
DL
475 break;
476
8f49c270
DM
477 if (sock_owned_by_user(sk))
478 break;
479
f1ecd5d9 480 icsk->icsk_backoff--;
fcdd1cf4
ED
481 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
482 TCP_TIMEOUT_INIT;
483 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
f1ecd5d9
DL
484
485 skb = tcp_write_queue_head(sk);
486 BUG_ON(!skb);
487
9a568de4
ED
488 tcp_mstamp_refresh(tp);
489 delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
7faee5c0 490 remaining = icsk->icsk_rto -
9a568de4 491 usecs_to_jiffies(delta_us);
f1ecd5d9 492
9a568de4 493 if (remaining > 0) {
f1ecd5d9
DL
494 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
495 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
496 } else {
497 /* RTO revert clocked out retransmission.
498 * Will retransmit now */
499 tcp_retransmit_timer(sk);
500 }
501
1da177e4
LT
502 break;
503 case ICMP_TIME_EXCEEDED:
504 err = EHOSTUNREACH;
505 break;
506 default:
507 goto out;
508 }
509
510 switch (sk->sk_state) {
1da177e4 511 case TCP_SYN_SENT:
0a672f74
YC
512 case TCP_SYN_RECV:
513 /* Only in fast or simultaneous open. If a fast open socket is
514 * is already accepted it is treated as a connected one below.
515 */
51456b29 516 if (fastopen && !fastopen->sk)
0a672f74
YC
517 break;
518
1da177e4 519 if (!sock_owned_by_user(sk)) {
1da177e4
LT
520 sk->sk_err = err;
521
522 sk->sk_error_report(sk);
523
524 tcp_done(sk);
525 } else {
526 sk->sk_err_soft = err;
527 }
528 goto out;
529 }
530
531 /* If we've already connected we will keep trying
532 * until we time out, or the user gives up.
533 *
534 * rfc1122 4.2.3.9 allows to consider as hard errors
535 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
536 * but it is obsoleted by pmtu discovery).
537 *
538 * Note, that in modern internet, where routing is unreliable
539 * and in each dark corner broken firewalls sit, sending random
540 * errors ordered by their masters even this two messages finally lose
541 * their original sense (even Linux sends invalid PORT_UNREACHs)
542 *
543 * Now we are in compliance with RFCs.
544 * --ANK (980905)
545 */
546
547 inet = inet_sk(sk);
548 if (!sock_owned_by_user(sk) && inet->recverr) {
549 sk->sk_err = err;
550 sk->sk_error_report(sk);
551 } else { /* Only an error on timeout */
552 sk->sk_err_soft = err;
553 }
554
555out:
556 bh_unlock_sock(sk);
557 sock_put(sk);
558}
559
28850dc7 560void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 561{
aa8223c7 562 struct tcphdr *th = tcp_hdr(skb);
1da177e4 563
84fa7933 564 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 565 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 566 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 567 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 568 } else {
419f9f89 569 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 570 csum_partial(th,
1da177e4
LT
571 th->doff << 2,
572 skb->csum));
573 }
574}
575
419f9f89 576/* This routine computes an IPv4 TCP checksum. */
bb296246 577void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 578{
cf533ea5 579 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
580
581 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
582}
4bc2f18b 583EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 584
1da177e4
LT
585/*
586 * This routine will send an RST to the other tcp.
587 *
588 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
589 * for reset.
590 * Answer: if a packet caused RST, it is not for a socket
591 * existing in our system, if it is matched to a socket,
592 * it is just duplicate segment or bug in other side's TCP.
593 * So that we build reply only basing on parameters
594 * arrived with segment.
595 * Exception: precedence violation. We do not implement it in any case.
596 */
597
a00e7444 598static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
1da177e4 599{
cf533ea5 600 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
601 struct {
602 struct tcphdr th;
603#ifdef CONFIG_TCP_MD5SIG
714e85be 604 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
605#endif
606 } rep;
1da177e4 607 struct ip_reply_arg arg;
cfb6eeb4 608#ifdef CONFIG_TCP_MD5SIG
e46787f0 609 struct tcp_md5sig_key *key = NULL;
658ddaaf
SL
610 const __u8 *hash_location = NULL;
611 unsigned char newhash[16];
612 int genhash;
613 struct sock *sk1 = NULL;
cfb6eeb4 614#endif
a86b1e30 615 struct net *net;
1da177e4
LT
616
617 /* Never send a reset in response to a reset. */
618 if (th->rst)
619 return;
620
c3658e8d
ED
621 /* If sk not NULL, it means we did a successful lookup and incoming
622 * route had to be correct. prequeue might have dropped our dst.
623 */
624 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
625 return;
626
627 /* Swap the send and the receive. */
cfb6eeb4
YH
628 memset(&rep, 0, sizeof(rep));
629 rep.th.dest = th->source;
630 rep.th.source = th->dest;
631 rep.th.doff = sizeof(struct tcphdr) / 4;
632 rep.th.rst = 1;
1da177e4
LT
633
634 if (th->ack) {
cfb6eeb4 635 rep.th.seq = th->ack_seq;
1da177e4 636 } else {
cfb6eeb4
YH
637 rep.th.ack = 1;
638 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
639 skb->len - (th->doff << 2));
1da177e4
LT
640 }
641
7174259e 642 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
643 arg.iov[0].iov_base = (unsigned char *)&rep;
644 arg.iov[0].iov_len = sizeof(rep.th);
645
0f85feae 646 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
cfb6eeb4 647#ifdef CONFIG_TCP_MD5SIG
3b24d854 648 rcu_read_lock();
658ddaaf 649 hash_location = tcp_parse_md5sig_option(th);
271c3b9b 650 if (sk && sk_fullsock(sk)) {
e46787f0
FW
651 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
652 &ip_hdr(skb)->saddr, AF_INET);
653 } else if (hash_location) {
658ddaaf
SL
654 /*
655 * active side is lost. Try to find listening socket through
656 * source port, and then find md5 key through listening socket.
657 * we are not loose security here:
658 * Incoming packet is checked with md5 hash with finding key,
659 * no RST generated if md5 hash doesn't match.
660 */
a583636a
CG
661 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
662 ip_hdr(skb)->saddr,
da5e3630 663 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
664 ntohs(th->source), inet_iif(skb));
665 /* don't send rst if it can't find key */
666 if (!sk1)
3b24d854
ED
667 goto out;
668
658ddaaf
SL
669 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
670 &ip_hdr(skb)->saddr, AF_INET);
671 if (!key)
3b24d854
ED
672 goto out;
673
658ddaaf 674
39f8e58e 675 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
658ddaaf 676 if (genhash || memcmp(hash_location, newhash, 16) != 0)
3b24d854
ED
677 goto out;
678
658ddaaf
SL
679 }
680
cfb6eeb4
YH
681 if (key) {
682 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
683 (TCPOPT_NOP << 16) |
684 (TCPOPT_MD5SIG << 8) |
685 TCPOLEN_MD5SIG);
686 /* Update length and the length the header thinks exists */
687 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
688 rep.th.doff = arg.iov[0].iov_len / 4;
689
49a72dfb 690 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
691 key, ip_hdr(skb)->saddr,
692 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
693 }
694#endif
eddc9ec5
ACM
695 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
696 ip_hdr(skb)->saddr, /* XXX */
52cd5750 697 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 698 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
271c3b9b
FW
699 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
700
e2446eaa 701 /* When socket is gone, all binding information is lost.
4c675258
AK
702 * routing might fail in this case. No choice here, if we choose to force
703 * input interface, we will misroute in case of asymmetric route.
e2446eaa 704 */
4c675258
AK
705 if (sk)
706 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 707
271c3b9b
FW
708 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
709 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
710
66b13d99 711 arg.tos = ip_hdr(skb)->tos;
e2d118a1 712 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
47dcc20a 713 local_bh_disable();
bdbbb852
ED
714 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
715 skb, &TCP_SKB_CB(skb)->header.h4.opt,
24a2d43d
ED
716 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
717 &arg, arg.iov[0].iov_len);
1da177e4 718
90bbcc60
ED
719 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
720 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
47dcc20a 721 local_bh_enable();
658ddaaf
SL
722
723#ifdef CONFIG_TCP_MD5SIG
3b24d854
ED
724out:
725 rcu_read_unlock();
658ddaaf 726#endif
1da177e4
LT
727}
728
729/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
730 outside socket context is ugly, certainly. What can I do?
731 */
732
e2d118a1 733static void tcp_v4_send_ack(const struct sock *sk,
e62a123b 734 struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 735 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 736 struct tcp_md5sig_key *key,
66b13d99 737 int reply_flags, u8 tos)
1da177e4 738{
cf533ea5 739 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
740 struct {
741 struct tcphdr th;
714e85be 742 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 743#ifdef CONFIG_TCP_MD5SIG
714e85be 744 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
745#endif
746 ];
1da177e4 747 } rep;
e2d118a1 748 struct net *net = sock_net(sk);
1da177e4
LT
749 struct ip_reply_arg arg;
750
751 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 752 memset(&arg, 0, sizeof(arg));
1da177e4
LT
753
754 arg.iov[0].iov_base = (unsigned char *)&rep;
755 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 756 if (tsecr) {
cfb6eeb4
YH
757 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
758 (TCPOPT_TIMESTAMP << 8) |
759 TCPOLEN_TIMESTAMP);
ee684b6f
AV
760 rep.opt[1] = htonl(tsval);
761 rep.opt[2] = htonl(tsecr);
cb48cfe8 762 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
763 }
764
765 /* Swap the send and the receive. */
766 rep.th.dest = th->source;
767 rep.th.source = th->dest;
768 rep.th.doff = arg.iov[0].iov_len / 4;
769 rep.th.seq = htonl(seq);
770 rep.th.ack_seq = htonl(ack);
771 rep.th.ack = 1;
772 rep.th.window = htons(win);
773
cfb6eeb4 774#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 775 if (key) {
ee684b6f 776 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
777
778 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
779 (TCPOPT_NOP << 16) |
780 (TCPOPT_MD5SIG << 8) |
781 TCPOLEN_MD5SIG);
782 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
783 rep.th.doff = arg.iov[0].iov_len/4;
784
49a72dfb 785 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
786 key, ip_hdr(skb)->saddr,
787 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
788 }
789#endif
88ef4a5a 790 arg.flags = reply_flags;
eddc9ec5
ACM
791 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
792 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
793 arg.iov[0].iov_len, IPPROTO_TCP, 0);
794 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
795 if (oif)
796 arg.bound_dev_if = oif;
66b13d99 797 arg.tos = tos;
e2d118a1 798 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
47dcc20a 799 local_bh_disable();
bdbbb852
ED
800 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
801 skb, &TCP_SKB_CB(skb)->header.h4.opt,
24a2d43d
ED
802 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
803 &arg, arg.iov[0].iov_len);
1da177e4 804
90bbcc60 805 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
47dcc20a 806 local_bh_enable();
1da177e4
LT
807}
808
809static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
810{
8feaf0c0 811 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 812 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 813
e2d118a1 814 tcp_v4_send_ack(sk, skb,
e62a123b 815 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 816 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9a568de4 817 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
9501f972
YH
818 tcptw->tw_ts_recent,
819 tw->tw_bound_dev_if,
88ef4a5a 820 tcp_twsk_md5_key(tcptw),
66b13d99
ED
821 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
822 tw->tw_tos
9501f972 823 );
1da177e4 824
8feaf0c0 825 inet_twsk_put(tw);
1da177e4
LT
826}
827
a00e7444 828static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
7174259e 829 struct request_sock *req)
1da177e4 830{
168a8f58
JC
831 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
832 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
833 */
e62a123b
ED
834 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
835 tcp_sk(sk)->snd_nxt;
836
20a2b49f
ED
837 /* RFC 7323 2.3
838 * The window field (SEG.WND) of every outgoing segment, with the
839 * exception of <SYN> segments, MUST be right-shifted by
840 * Rcv.Wind.Shift bits:
841 */
e2d118a1 842 tcp_v4_send_ack(sk, skb, seq,
20a2b49f
ED
843 tcp_rsk(req)->rcv_nxt,
844 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
9a568de4 845 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
9501f972
YH
846 req->ts_recent,
847 0,
a915da9b
ED
848 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
849 AF_INET),
66b13d99
ED
850 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
851 ip_hdr(skb)->tos);
1da177e4
LT
852}
853
1da177e4 854/*
9bf1d83e 855 * Send a SYN-ACK after having received a SYN.
60236fdd 856 * This still operates on a request_sock only, not on a big
1da177e4
LT
857 * socket.
858 */
0f935dbe 859static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
d6274bd8 860 struct flowi *fl,
72659ecc 861 struct request_sock *req,
ca6fb065 862 struct tcp_fastopen_cookie *foc,
b3d05147 863 enum tcp_synack_type synack_type)
1da177e4 864{
2e6599cb 865 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 866 struct flowi4 fl4;
1da177e4 867 int err = -1;
d41db5af 868 struct sk_buff *skb;
1da177e4
LT
869
870 /* First, grab a route. */
ba3f7f04 871 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 872 return -1;
1da177e4 873
b3d05147 874 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
1da177e4
LT
875
876 if (skb) {
634fb979 877 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
1da177e4 878
634fb979
ED
879 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
880 ireq->ir_rmt_addr,
2e6599cb 881 ireq->opt);
b9df3cb8 882 err = net_xmit_eval(err);
1da177e4
LT
883 }
884
1da177e4
LT
885 return err;
886}
887
888/*
60236fdd 889 * IPv4 request_sock destructor.
1da177e4 890 */
60236fdd 891static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 892{
a51482bd 893 kfree(inet_rsk(req)->opt);
1da177e4
LT
894}
895
cfb6eeb4
YH
896#ifdef CONFIG_TCP_MD5SIG
897/*
898 * RFC2385 MD5 checksumming requires a mapping of
899 * IP address->MD5 Key.
900 * We need to maintain these in the sk structure.
901 */
902
903/* Find the Key structure for an address. */
b83e3deb 904struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
a915da9b
ED
905 const union tcp_md5_addr *addr,
906 int family)
cfb6eeb4 907{
fd3a154a 908 const struct tcp_sock *tp = tcp_sk(sk);
a915da9b 909 struct tcp_md5sig_key *key;
fd3a154a 910 const struct tcp_md5sig_info *md5sig;
6797318e
ID
911 __be32 mask;
912 struct tcp_md5sig_key *best_match = NULL;
913 bool match;
cfb6eeb4 914
a8afca03
ED
915 /* caller either holds rcu_read_lock() or socket lock */
916 md5sig = rcu_dereference_check(tp->md5sig_info,
1e1d04e6 917 lockdep_sock_is_held(sk));
a8afca03 918 if (!md5sig)
cfb6eeb4 919 return NULL;
083a0326 920
b67bfe0d 921 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
922 if (key->family != family)
923 continue;
6797318e
ID
924
925 if (family == AF_INET) {
926 mask = inet_make_mask(key->prefixlen);
927 match = (key->addr.a4.s_addr & mask) ==
928 (addr->a4.s_addr & mask);
929#if IS_ENABLED(CONFIG_IPV6)
930 } else if (family == AF_INET6) {
931 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
932 key->prefixlen);
933#endif
934 } else {
935 match = false;
936 }
937
938 if (match && (!best_match ||
939 key->prefixlen > best_match->prefixlen))
940 best_match = key;
941 }
942 return best_match;
943}
944EXPORT_SYMBOL(tcp_md5_do_lookup);
945
e8f37d57
WF
946static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
947 const union tcp_md5_addr *addr,
948 int family, u8 prefixlen)
6797318e
ID
949{
950 const struct tcp_sock *tp = tcp_sk(sk);
951 struct tcp_md5sig_key *key;
952 unsigned int size = sizeof(struct in_addr);
953 const struct tcp_md5sig_info *md5sig;
954
955 /* caller either holds rcu_read_lock() or socket lock */
956 md5sig = rcu_dereference_check(tp->md5sig_info,
957 lockdep_sock_is_held(sk));
958 if (!md5sig)
959 return NULL;
960#if IS_ENABLED(CONFIG_IPV6)
961 if (family == AF_INET6)
962 size = sizeof(struct in6_addr);
963#endif
964 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
965 if (key->family != family)
966 continue;
967 if (!memcmp(&key->addr, addr, size) &&
968 key->prefixlen == prefixlen)
a915da9b 969 return key;
cfb6eeb4
YH
970 }
971 return NULL;
972}
973
b83e3deb 974struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
fd3a154a 975 const struct sock *addr_sk)
cfb6eeb4 976{
b52e6921 977 const union tcp_md5_addr *addr;
a915da9b 978
b52e6921 979 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
a915da9b 980 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 981}
cfb6eeb4
YH
982EXPORT_SYMBOL(tcp_v4_md5_lookup);
983
cfb6eeb4 984/* This can be called on a newly created socket, from other files */
a915da9b 985int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
6797318e
ID
986 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
987 gfp_t gfp)
cfb6eeb4
YH
988{
989 /* Add Key to the list */
b0a713e9 990 struct tcp_md5sig_key *key;
cfb6eeb4 991 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 992 struct tcp_md5sig_info *md5sig;
cfb6eeb4 993
6797318e 994 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
cfb6eeb4
YH
995 if (key) {
996 /* Pre-existing entry - just update that one. */
a915da9b 997 memcpy(key->key, newkey, newkeylen);
b0a713e9 998 key->keylen = newkeylen;
a915da9b
ED
999 return 0;
1000 }
260fcbeb 1001
a8afca03 1002 md5sig = rcu_dereference_protected(tp->md5sig_info,
1e1d04e6 1003 lockdep_sock_is_held(sk));
a915da9b
ED
1004 if (!md5sig) {
1005 md5sig = kmalloc(sizeof(*md5sig), gfp);
1006 if (!md5sig)
cfb6eeb4 1007 return -ENOMEM;
cfb6eeb4 1008
a915da9b
ED
1009 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1010 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1011 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1012 }
cfb6eeb4 1013
5f3d9cb2 1014 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1015 if (!key)
1016 return -ENOMEM;
71cea17e 1017 if (!tcp_alloc_md5sig_pool()) {
5f3d9cb2 1018 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1019 return -ENOMEM;
cfb6eeb4 1020 }
a915da9b
ED
1021
1022 memcpy(key->key, newkey, newkeylen);
1023 key->keylen = newkeylen;
1024 key->family = family;
6797318e 1025 key->prefixlen = prefixlen;
a915da9b
ED
1026 memcpy(&key->addr, addr,
1027 (family == AF_INET6) ? sizeof(struct in6_addr) :
1028 sizeof(struct in_addr));
1029 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1030 return 0;
1031}
a915da9b 1032EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1033
6797318e
ID
1034int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
1035 u8 prefixlen)
cfb6eeb4 1036{
a915da9b
ED
1037 struct tcp_md5sig_key *key;
1038
6797318e 1039 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
a915da9b
ED
1040 if (!key)
1041 return -ENOENT;
1042 hlist_del_rcu(&key->node);
5f3d9cb2 1043 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1044 kfree_rcu(key, rcu);
a915da9b 1045 return 0;
cfb6eeb4 1046}
a915da9b 1047EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1048
e0683e70 1049static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1050{
1051 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1052 struct tcp_md5sig_key *key;
b67bfe0d 1053 struct hlist_node *n;
a8afca03 1054 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1055
a8afca03
ED
1056 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1057
b67bfe0d 1058 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1059 hlist_del_rcu(&key->node);
5f3d9cb2 1060 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1061 kfree_rcu(key, rcu);
cfb6eeb4
YH
1062 }
1063}
1064
8917a777
ID
1065static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1066 char __user *optval, int optlen)
cfb6eeb4
YH
1067{
1068 struct tcp_md5sig cmd;
1069 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
8917a777 1070 u8 prefixlen = 32;
cfb6eeb4
YH
1071
1072 if (optlen < sizeof(cmd))
1073 return -EINVAL;
1074
7174259e 1075 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1076 return -EFAULT;
1077
1078 if (sin->sin_family != AF_INET)
1079 return -EINVAL;
1080
8917a777
ID
1081 if (optname == TCP_MD5SIG_EXT &&
1082 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1083 prefixlen = cmd.tcpm_prefixlen;
1084 if (prefixlen > 32)
1085 return -EINVAL;
1086 }
1087
64a124ed 1088 if (!cmd.tcpm_keylen)
a915da9b 1089 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
8917a777 1090 AF_INET, prefixlen);
cfb6eeb4
YH
1091
1092 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1093 return -EINVAL;
1094
a915da9b 1095 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
8917a777 1096 AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
a915da9b 1097 GFP_KERNEL);
cfb6eeb4
YH
1098}
1099
19689e38
ED
1100static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1101 __be32 daddr, __be32 saddr,
1102 const struct tcphdr *th, int nbytes)
cfb6eeb4 1103{
cfb6eeb4 1104 struct tcp4_pseudohdr *bp;
49a72dfb 1105 struct scatterlist sg;
19689e38 1106 struct tcphdr *_th;
cfb6eeb4 1107
19689e38 1108 bp = hp->scratch;
cfb6eeb4
YH
1109 bp->saddr = saddr;
1110 bp->daddr = daddr;
1111 bp->pad = 0;
076fb722 1112 bp->protocol = IPPROTO_TCP;
49a72dfb 1113 bp->len = cpu_to_be16(nbytes);
c7da57a1 1114
19689e38
ED
1115 _th = (struct tcphdr *)(bp + 1);
1116 memcpy(_th, th, sizeof(*th));
1117 _th->check = 0;
1118
1119 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1120 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1121 sizeof(*bp) + sizeof(*th));
cf80e0e4 1122 return crypto_ahash_update(hp->md5_req);
49a72dfb
AL
1123}
1124
a915da9b 1125static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1126 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1127{
1128 struct tcp_md5sig_pool *hp;
cf80e0e4 1129 struct ahash_request *req;
49a72dfb
AL
1130
1131 hp = tcp_get_md5sig_pool();
1132 if (!hp)
1133 goto clear_hash_noput;
cf80e0e4 1134 req = hp->md5_req;
49a72dfb 1135
cf80e0e4 1136 if (crypto_ahash_init(req))
49a72dfb 1137 goto clear_hash;
19689e38 1138 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
49a72dfb
AL
1139 goto clear_hash;
1140 if (tcp_md5_hash_key(hp, key))
1141 goto clear_hash;
cf80e0e4
HX
1142 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1143 if (crypto_ahash_final(req))
cfb6eeb4
YH
1144 goto clear_hash;
1145
cfb6eeb4 1146 tcp_put_md5sig_pool();
cfb6eeb4 1147 return 0;
49a72dfb 1148
cfb6eeb4
YH
1149clear_hash:
1150 tcp_put_md5sig_pool();
1151clear_hash_noput:
1152 memset(md5_hash, 0, 16);
49a72dfb 1153 return 1;
cfb6eeb4
YH
1154}
1155
39f8e58e
ED
1156int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1157 const struct sock *sk,
318cf7aa 1158 const struct sk_buff *skb)
cfb6eeb4 1159{
49a72dfb 1160 struct tcp_md5sig_pool *hp;
cf80e0e4 1161 struct ahash_request *req;
318cf7aa 1162 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1163 __be32 saddr, daddr;
1164
39f8e58e
ED
1165 if (sk) { /* valid for establish/request sockets */
1166 saddr = sk->sk_rcv_saddr;
1167 daddr = sk->sk_daddr;
cfb6eeb4 1168 } else {
49a72dfb
AL
1169 const struct iphdr *iph = ip_hdr(skb);
1170 saddr = iph->saddr;
1171 daddr = iph->daddr;
cfb6eeb4 1172 }
49a72dfb
AL
1173
1174 hp = tcp_get_md5sig_pool();
1175 if (!hp)
1176 goto clear_hash_noput;
cf80e0e4 1177 req = hp->md5_req;
49a72dfb 1178
cf80e0e4 1179 if (crypto_ahash_init(req))
49a72dfb
AL
1180 goto clear_hash;
1181
19689e38 1182 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
49a72dfb
AL
1183 goto clear_hash;
1184 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1185 goto clear_hash;
1186 if (tcp_md5_hash_key(hp, key))
1187 goto clear_hash;
cf80e0e4
HX
1188 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1189 if (crypto_ahash_final(req))
49a72dfb
AL
1190 goto clear_hash;
1191
1192 tcp_put_md5sig_pool();
1193 return 0;
1194
1195clear_hash:
1196 tcp_put_md5sig_pool();
1197clear_hash_noput:
1198 memset(md5_hash, 0, 16);
1199 return 1;
cfb6eeb4 1200}
49a72dfb 1201EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1202
ba8e275a
ED
1203#endif
1204
ff74e23f 1205/* Called with rcu_read_lock() */
ba8e275a 1206static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
ff74e23f 1207 const struct sk_buff *skb)
cfb6eeb4 1208{
ba8e275a 1209#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
1210 /*
1211 * This gets called for each TCP segment that arrives
1212 * so we want to be efficient.
1213 * We have 3 drop cases:
1214 * o No MD5 hash and one expected.
1215 * o MD5 hash and we're not expecting one.
1216 * o MD5 hash and its wrong.
1217 */
cf533ea5 1218 const __u8 *hash_location = NULL;
cfb6eeb4 1219 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1220 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1221 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1222 int genhash;
cfb6eeb4
YH
1223 unsigned char newhash[16];
1224
a915da9b
ED
1225 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1226 AF_INET);
7d5d5525 1227 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1228
cfb6eeb4
YH
1229 /* We've parsed the options - do we have a hash? */
1230 if (!hash_expected && !hash_location)
a2a385d6 1231 return false;
cfb6eeb4
YH
1232
1233 if (hash_expected && !hash_location) {
c10d9310 1234 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1235 return true;
cfb6eeb4
YH
1236 }
1237
1238 if (!hash_expected && hash_location) {
c10d9310 1239 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1240 return true;
cfb6eeb4
YH
1241 }
1242
1243 /* Okay, so this is hash_expected and hash_location -
1244 * so we need to calculate the checksum.
1245 */
49a72dfb
AL
1246 genhash = tcp_v4_md5_hash_skb(newhash,
1247 hash_expected,
39f8e58e 1248 NULL, skb);
cfb6eeb4
YH
1249
1250 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
72145a68 1251 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
e87cc472
JP
1252 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1253 &iph->saddr, ntohs(th->source),
1254 &iph->daddr, ntohs(th->dest),
1255 genhash ? " tcp_v4_calc_md5_hash failed"
1256 : "");
a2a385d6 1257 return true;
cfb6eeb4 1258 }
a2a385d6 1259 return false;
cfb6eeb4 1260#endif
ba8e275a
ED
1261 return false;
1262}
cfb6eeb4 1263
b40cf18e
ED
1264static void tcp_v4_init_req(struct request_sock *req,
1265 const struct sock *sk_listener,
16bea70a
OP
1266 struct sk_buff *skb)
1267{
1268 struct inet_request_sock *ireq = inet_rsk(req);
1269
08d2cc3b
ED
1270 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1271 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
16bea70a
OP
1272 ireq->opt = tcp_v4_save_options(skb);
1273}
1274
f964629e
ED
1275static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1276 struct flowi *fl,
4396e461 1277 const struct request_sock *req)
d94e0417 1278{
4396e461 1279 return inet_csk_route_req(sk, &fl->u.ip4, req);
d94e0417
OP
1280}
1281
72a3effa 1282struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1283 .family = PF_INET,
2e6599cb 1284 .obj_size = sizeof(struct tcp_request_sock),
5db92c99 1285 .rtx_syn_ack = tcp_rtx_synack,
60236fdd
ACM
1286 .send_ack = tcp_v4_reqsk_send_ack,
1287 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1288 .send_reset = tcp_v4_send_reset,
688d1945 1289 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1290};
1291
b2e4b3de 1292static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
2aec4a29 1293 .mss_clamp = TCP_MSS_DEFAULT,
16bea70a 1294#ifdef CONFIG_TCP_MD5SIG
fd3a154a 1295 .req_md5_lookup = tcp_v4_md5_lookup,
e3afe7b7 1296 .calc_md5_hash = tcp_v4_md5_hash_skb,
b6332e6c 1297#endif
16bea70a 1298 .init_req = tcp_v4_init_req,
fb7b37a7
OP
1299#ifdef CONFIG_SYN_COOKIES
1300 .cookie_init_seq = cookie_v4_init_sequence,
1301#endif
d94e0417 1302 .route_req = tcp_v4_route_req,
84b114b9
ED
1303 .init_seq = tcp_v4_init_seq,
1304 .init_ts_off = tcp_v4_init_ts_off,
d6274bd8 1305 .send_synack = tcp_v4_send_synack,
16bea70a 1306};
cfb6eeb4 1307
1da177e4
LT
1308int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1309{
1da177e4 1310 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1311 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1312 goto drop;
1313
1fb6f159
OP
1314 return tcp_conn_request(&tcp_request_sock_ops,
1315 &tcp_request_sock_ipv4_ops, sk, skb);
1da177e4 1316
1da177e4 1317drop:
9caad864 1318 tcp_listendrop(sk);
1da177e4
LT
1319 return 0;
1320}
4bc2f18b 1321EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1322
1323
1324/*
1325 * The three way handshake has completed - we got a valid synack -
1326 * now create the new socket.
1327 */
0c27171e 1328struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
60236fdd 1329 struct request_sock *req,
5e0724d0
ED
1330 struct dst_entry *dst,
1331 struct request_sock *req_unhash,
1332 bool *own_req)
1da177e4 1333{
2e6599cb 1334 struct inet_request_sock *ireq;
1da177e4
LT
1335 struct inet_sock *newinet;
1336 struct tcp_sock *newtp;
1337 struct sock *newsk;
cfb6eeb4
YH
1338#ifdef CONFIG_TCP_MD5SIG
1339 struct tcp_md5sig_key *key;
1340#endif
f6d8bd05 1341 struct ip_options_rcu *inet_opt;
1da177e4
LT
1342
1343 if (sk_acceptq_is_full(sk))
1344 goto exit_overflow;
1345
1da177e4
LT
1346 newsk = tcp_create_openreq_child(sk, req, skb);
1347 if (!newsk)
093d2823 1348 goto exit_nonewsk;
1da177e4 1349
bcd76111 1350 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1351 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1352
1353 newtp = tcp_sk(newsk);
1354 newinet = inet_sk(newsk);
2e6599cb 1355 ireq = inet_rsk(req);
d1e559d0
ED
1356 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1357 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
6dd9a14e 1358 newsk->sk_bound_dev_if = ireq->ir_iif;
634fb979 1359 newinet->inet_saddr = ireq->ir_loc_addr;
f6d8bd05
ED
1360 inet_opt = ireq->opt;
1361 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1362 ireq->opt = NULL;
463c84b9 1363 newinet->mc_index = inet_iif(skb);
eddc9ec5 1364 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1365 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1366 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1367 if (inet_opt)
1368 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1369 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1370
dfd25fff
ED
1371 if (!dst) {
1372 dst = inet_csk_route_child_sock(sk, newsk, req);
1373 if (!dst)
1374 goto put_and_exit;
1375 } else {
1376 /* syncookie case : see end of cookie_v4_check() */
1377 }
0e734419
DM
1378 sk_setup_caps(newsk, dst);
1379
81164413
DB
1380 tcp_ca_openreq_child(newsk, dst);
1381
1da177e4 1382 tcp_sync_mss(newsk, dst_mtu(dst));
3541f9e8 1383 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
f5fff5dc 1384
1da177e4
LT
1385 tcp_initialize_rcv_mss(newsk);
1386
cfb6eeb4
YH
1387#ifdef CONFIG_TCP_MD5SIG
1388 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1389 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1390 AF_INET);
00db4124 1391 if (key) {
cfb6eeb4
YH
1392 /*
1393 * We're using one, so create a matching key
1394 * on the newsk structure. If we fail to get
1395 * memory, then we end up not copying the key
1396 * across. Shucks.
1397 */
a915da9b 1398 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
6797318e 1399 AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
a465419b 1400 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1401 }
1402#endif
1403
0e734419
DM
1404 if (__inet_inherit_port(sk, newsk) < 0)
1405 goto put_and_exit;
5e0724d0 1406 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
805c4bc0 1407 if (*own_req)
49a496c9 1408 tcp_move_syn(newtp, req);
1da177e4
LT
1409
1410 return newsk;
1411
1412exit_overflow:
c10d9310 1413 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1414exit_nonewsk:
1415 dst_release(dst);
1da177e4 1416exit:
9caad864 1417 tcp_listendrop(sk);
1da177e4 1418 return NULL;
0e734419 1419put_and_exit:
e337e24d
CP
1420 inet_csk_prepare_forced_close(newsk);
1421 tcp_done(newsk);
0e734419 1422 goto exit;
1da177e4 1423}
4bc2f18b 1424EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4 1425
079096f1 1426static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1da177e4 1427{
079096f1 1428#ifdef CONFIG_SYN_COOKIES
52452c54 1429 const struct tcphdr *th = tcp_hdr(skb);
1da177e4 1430
af9b4738 1431 if (!th->syn)
461b74c3 1432 sk = cookie_v4_check(sk, skb);
1da177e4
LT
1433#endif
1434 return sk;
1435}
1436
1da177e4 1437/* The socket must have it's spinlock held when we get
e994b2f0 1438 * here, unless it is a TCP_LISTEN socket.
1da177e4
LT
1439 *
1440 * We have a potential double-lock case here, so even when
1441 * doing backlog processing we use the BH locking scheme.
1442 * This is because we cannot sleep with the original spinlock
1443 * held.
1444 */
1445int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1446{
cfb6eeb4 1447 struct sock *rsk;
cfb6eeb4 1448
1da177e4 1449 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1450 struct dst_entry *dst = sk->sk_rx_dst;
1451
bdeab991 1452 sock_rps_save_rxhash(sk, skb);
3d97379a 1453 sk_mark_napi_id(sk, skb);
404e0a8b 1454 if (dst) {
505fbcf0 1455 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
51456b29 1456 !dst->ops->check(dst, 0)) {
92101b3b
DM
1457 dst_release(dst);
1458 sk->sk_rx_dst = NULL;
1459 }
1460 }
c995ae22 1461 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1da177e4
LT
1462 return 0;
1463 }
1464
12e25e10 1465 if (tcp_checksum_complete(skb))
1da177e4
LT
1466 goto csum_err;
1467
1468 if (sk->sk_state == TCP_LISTEN) {
079096f1
ED
1469 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1470
1da177e4
LT
1471 if (!nsk)
1472 goto discard;
1da177e4 1473 if (nsk != sk) {
cfb6eeb4
YH
1474 if (tcp_child_process(sk, nsk, skb)) {
1475 rsk = nsk;
1da177e4 1476 goto reset;
cfb6eeb4 1477 }
1da177e4
LT
1478 return 0;
1479 }
ca55158c 1480 } else
bdeab991 1481 sock_rps_save_rxhash(sk, skb);
ca55158c 1482
72ab4a86 1483 if (tcp_rcv_state_process(sk, skb)) {
cfb6eeb4 1484 rsk = sk;
1da177e4 1485 goto reset;
cfb6eeb4 1486 }
1da177e4
LT
1487 return 0;
1488
1489reset:
cfb6eeb4 1490 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1491discard:
1492 kfree_skb(skb);
1493 /* Be careful here. If this function gets more complicated and
1494 * gcc suffers from register pressure on the x86, sk (in %ebx)
1495 * might be destroyed here. This current version compiles correctly,
1496 * but you have been warned.
1497 */
1498 return 0;
1499
1500csum_err:
c10d9310
ED
1501 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1502 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1503 goto discard;
1504}
4bc2f18b 1505EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1506
160eb5a6 1507void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1508{
41063e9d
DM
1509 const struct iphdr *iph;
1510 const struct tcphdr *th;
1511 struct sock *sk;
41063e9d 1512
41063e9d 1513 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1514 return;
41063e9d 1515
45f00f99 1516 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1517 return;
41063e9d
DM
1518
1519 iph = ip_hdr(skb);
45f00f99 1520 th = tcp_hdr(skb);
41063e9d
DM
1521
1522 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1523 return;
41063e9d 1524
45f00f99 1525 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1526 iph->saddr, th->source,
7011d085 1527 iph->daddr, ntohs(th->dest),
9cb429d6 1528 skb->skb_iif);
41063e9d
DM
1529 if (sk) {
1530 skb->sk = sk;
1531 skb->destructor = sock_edemux;
f7e4eb03 1532 if (sk_fullsock(sk)) {
d0c294c5 1533 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
505fbcf0 1534
41063e9d
DM
1535 if (dst)
1536 dst = dst_check(dst, 0);
92101b3b 1537 if (dst &&
505fbcf0 1538 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1539 skb_dst_set_noref(skb, dst);
41063e9d
DM
1540 }
1541 }
41063e9d
DM
1542}
1543
b2fb4f54
ED
1544/* Packet is added to VJ-style prequeue for processing in process
1545 * context, if a reader task is waiting. Apparently, this exciting
1546 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1547 * failed somewhere. Latency? Burstiness? Well, at least now we will
1548 * see, why it failed. 8)8) --ANK
1549 *
1550 */
1551bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1552{
1553 struct tcp_sock *tp = tcp_sk(sk);
1554
1555 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1556 return false;
1557
1558 if (skb->len <= tcp_hdrlen(skb) &&
1559 skb_queue_len(&tp->ucopy.prequeue) == 0)
1560 return false;
1561
ca777eff
ED
1562 /* Before escaping RCU protected region, we need to take care of skb
1563 * dst. Prequeue is only enabled for established sockets.
1564 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1565 * Instead of doing full sk_rx_dst validity here, let's perform
1566 * an optimistic check.
1567 */
1568 if (likely(sk->sk_rx_dst))
1569 skb_dst_drop(skb);
1570 else
5037e9ef 1571 skb_dst_force_safe(skb);
ca777eff 1572
b2fb4f54
ED
1573 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1574 tp->ucopy.memory += skb->truesize;
0cef6a4c
ED
1575 if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1576 tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
b2fb4f54
ED
1577 struct sk_buff *skb1;
1578
1579 BUG_ON(sock_owned_by_user(sk));
0cef6a4c
ED
1580 __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1581 skb_queue_len(&tp->ucopy.prequeue));
b2fb4f54 1582
0cef6a4c 1583 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
b2fb4f54 1584 sk_backlog_rcv(sk, skb1);
b2fb4f54
ED
1585
1586 tp->ucopy.memory = 0;
1587 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1588 wake_up_interruptible_sync_poll(sk_sleep(sk),
1589 POLLIN | POLLRDNORM | POLLRDBAND);
1590 if (!inet_csk_ack_scheduled(sk))
1591 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1592 (3 * tcp_rto_min(sk)) / 4,
1593 TCP_RTO_MAX);
1594 }
1595 return true;
1596}
1597EXPORT_SYMBOL(tcp_prequeue);
1598
c9c33212
ED
1599bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1600{
1601 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1602
1603 /* Only socket owner can try to collapse/prune rx queues
1604 * to reduce memory overhead, so add a little headroom here.
1605 * Few sockets backlog are possibly concurrently non empty.
1606 */
1607 limit += 64*1024;
1608
1609 /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
1610 * we can fix skb->truesize to its real value to avoid future drops.
1611 * This is valid because skb is not yet charged to the socket.
1612 * It has been noticed pure SACK packets were sometimes dropped
1613 * (if cooked by drivers without copybreak feature).
1614 */
60b1af33 1615 skb_condense(skb);
c9c33212
ED
1616
1617 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1618 bh_unlock_sock(sk);
1619 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1620 return true;
1621 }
1622 return false;
1623}
1624EXPORT_SYMBOL(tcp_add_backlog);
1625
ac6e7800
ED
1626int tcp_filter(struct sock *sk, struct sk_buff *skb)
1627{
1628 struct tcphdr *th = (struct tcphdr *)skb->data;
1629 unsigned int eaten = skb->len;
1630 int err;
1631
1632 err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1633 if (!err) {
1634 eaten -= skb->len;
1635 TCP_SKB_CB(skb)->end_seq -= eaten;
1636 }
1637 return err;
1638}
1639EXPORT_SYMBOL(tcp_filter);
1640
1da177e4
LT
1641/*
1642 * From tcp_input.c
1643 */
1644
1645int tcp_v4_rcv(struct sk_buff *skb)
1646{
3b24d854 1647 struct net *net = dev_net(skb->dev);
eddc9ec5 1648 const struct iphdr *iph;
cf533ea5 1649 const struct tcphdr *th;
3b24d854 1650 bool refcounted;
1da177e4
LT
1651 struct sock *sk;
1652 int ret;
1653
1654 if (skb->pkt_type != PACKET_HOST)
1655 goto discard_it;
1656
1657 /* Count it even if it's bad */
90bbcc60 1658 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1da177e4
LT
1659
1660 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1661 goto discard_it;
1662
ea1627c2 1663 th = (const struct tcphdr *)skb->data;
1da177e4 1664
ea1627c2 1665 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1da177e4
LT
1666 goto bad_packet;
1667 if (!pskb_may_pull(skb, th->doff * 4))
1668 goto discard_it;
1669
1670 /* An explanation is required here, I think.
1671 * Packet length and doff are validated by header prediction,
caa20d9a 1672 * provided case of th->doff==0 is eliminated.
1da177e4 1673 * So, we defer the checks. */
ed70fcfc
TH
1674
1675 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
6a5dc9e5 1676 goto csum_error;
1da177e4 1677
ea1627c2 1678 th = (const struct tcphdr *)skb->data;
eddc9ec5 1679 iph = ip_hdr(skb);
971f10ec
ED
1680 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1681 * barrier() makes sure compiler wont play fool^Waliasing games.
1682 */
1683 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1684 sizeof(struct inet_skb_parm));
1685 barrier();
1686
1da177e4
LT
1687 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1688 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1689 skb->len - th->doff * 4);
1690 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
e11ecddf 1691 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
04317daf 1692 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
b82d1bb4 1693 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1694 TCP_SKB_CB(skb)->sacked = 0;
1695
4bdc3d66 1696lookup:
a583636a 1697 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
3b24d854 1698 th->dest, &refcounted);
1da177e4
LT
1699 if (!sk)
1700 goto no_tcp_socket;
1701
bb134d5d
ED
1702process:
1703 if (sk->sk_state == TCP_TIME_WAIT)
1704 goto do_time_wait;
1705
079096f1
ED
1706 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1707 struct request_sock *req = inet_reqsk(sk);
7716682c 1708 struct sock *nsk;
079096f1
ED
1709
1710 sk = req->rsk_listener;
72923555 1711 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
e65c332d 1712 sk_drops_add(sk, skb);
72923555
ED
1713 reqsk_put(req);
1714 goto discard_it;
1715 }
7716682c 1716 if (unlikely(sk->sk_state != TCP_LISTEN)) {
f03f2e15 1717 inet_csk_reqsk_queue_drop_and_put(sk, req);
4bdc3d66
ED
1718 goto lookup;
1719 }
3b24d854
ED
1720 /* We own a reference on the listener, increase it again
1721 * as we might lose it too soon.
1722 */
7716682c 1723 sock_hold(sk);
3b24d854 1724 refcounted = true;
d624d276
ED
1725 if (tcp_filter(sk, skb))
1726 goto discard_and_relse;
7716682c 1727 nsk = tcp_check_req(sk, skb, req, false);
079096f1
ED
1728 if (!nsk) {
1729 reqsk_put(req);
7716682c 1730 goto discard_and_relse;
079096f1
ED
1731 }
1732 if (nsk == sk) {
079096f1
ED
1733 reqsk_put(req);
1734 } else if (tcp_child_process(sk, nsk, skb)) {
1735 tcp_v4_send_reset(nsk, skb);
7716682c 1736 goto discard_and_relse;
079096f1 1737 } else {
7716682c 1738 sock_put(sk);
079096f1
ED
1739 return 0;
1740 }
1741 }
6cce09f8 1742 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
02a1d6e7 1743 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1744 goto discard_and_relse;
6cce09f8 1745 }
d218d111 1746
1da177e4
LT
1747 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1748 goto discard_and_relse;
9ea88a15 1749
9ea88a15
DP
1750 if (tcp_v4_inbound_md5_hash(sk, skb))
1751 goto discard_and_relse;
9ea88a15 1752
b59c2701 1753 nf_reset(skb);
1da177e4 1754
ac6e7800 1755 if (tcp_filter(sk, skb))
1da177e4 1756 goto discard_and_relse;
ac6e7800
ED
1757 th = (const struct tcphdr *)skb->data;
1758 iph = ip_hdr(skb);
1da177e4
LT
1759
1760 skb->dev = NULL;
1761
e994b2f0
ED
1762 if (sk->sk_state == TCP_LISTEN) {
1763 ret = tcp_v4_do_rcv(sk, skb);
1764 goto put_and_return;
1765 }
1766
1767 sk_incoming_cpu_update(sk);
1768
c6366184 1769 bh_lock_sock_nested(sk);
a44d6eac 1770 tcp_segs_in(tcp_sk(sk), skb);
1da177e4
LT
1771 ret = 0;
1772 if (!sock_owned_by_user(sk)) {
7bced397 1773 if (!tcp_prequeue(sk, skb))
1da177e4 1774 ret = tcp_v4_do_rcv(sk, skb);
c9c33212 1775 } else if (tcp_add_backlog(sk, skb)) {
6b03a53a
ZY
1776 goto discard_and_relse;
1777 }
1da177e4
LT
1778 bh_unlock_sock(sk);
1779
e994b2f0 1780put_and_return:
3b24d854
ED
1781 if (refcounted)
1782 sock_put(sk);
1da177e4
LT
1783
1784 return ret;
1785
1786no_tcp_socket:
1787 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1788 goto discard_it;
1789
12e25e10 1790 if (tcp_checksum_complete(skb)) {
6a5dc9e5 1791csum_error:
90bbcc60 1792 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1da177e4 1793bad_packet:
90bbcc60 1794 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1da177e4 1795 } else {
cfb6eeb4 1796 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1797 }
1798
1799discard_it:
1800 /* Discard frame. */
1801 kfree_skb(skb);
e905a9ed 1802 return 0;
1da177e4
LT
1803
1804discard_and_relse:
532182cd 1805 sk_drops_add(sk, skb);
3b24d854
ED
1806 if (refcounted)
1807 sock_put(sk);
1da177e4
LT
1808 goto discard_it;
1809
1810do_time_wait:
1811 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1812 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1813 goto discard_it;
1814 }
1815
6a5dc9e5
ED
1816 if (tcp_checksum_complete(skb)) {
1817 inet_twsk_put(inet_twsk(sk));
1818 goto csum_error;
1da177e4 1819 }
9469c7b4 1820 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1821 case TCP_TW_SYN: {
c346dca1 1822 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
a583636a
CG
1823 &tcp_hashinfo, skb,
1824 __tcp_hdrlen(th),
da5e3630 1825 iph->saddr, th->source,
eddc9ec5 1826 iph->daddr, th->dest,
463c84b9 1827 inet_iif(skb));
1da177e4 1828 if (sk2) {
dbe7faa4 1829 inet_twsk_deschedule_put(inet_twsk(sk));
1da177e4 1830 sk = sk2;
3b24d854 1831 refcounted = false;
1da177e4
LT
1832 goto process;
1833 }
1834 /* Fall through to ACK */
1835 }
1836 case TCP_TW_ACK:
1837 tcp_v4_timewait_ack(sk, skb);
1838 break;
1839 case TCP_TW_RST:
271c3b9b
FW
1840 tcp_v4_send_reset(sk, skb);
1841 inet_twsk_deschedule_put(inet_twsk(sk));
1842 goto discard_it;
1da177e4
LT
1843 case TCP_TW_SUCCESS:;
1844 }
1845 goto discard_it;
1846}
1847
ccb7c410
DM
1848static struct timewait_sock_ops tcp_timewait_sock_ops = {
1849 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1850 .twsk_unique = tcp_twsk_unique,
1851 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 1852};
1da177e4 1853
63d02d15 1854void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
1855{
1856 struct dst_entry *dst = skb_dst(skb);
1857
5037e9ef 1858 if (dst && dst_hold_safe(dst)) {
ca777eff
ED
1859 sk->sk_rx_dst = dst;
1860 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1861 }
5d299f3d 1862}
63d02d15 1863EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 1864
3b401a81 1865const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1866 .queue_xmit = ip_queue_xmit,
1867 .send_check = tcp_v4_send_check,
1868 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 1869 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
1870 .conn_request = tcp_v4_conn_request,
1871 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
1872 .net_header_len = sizeof(struct iphdr),
1873 .setsockopt = ip_setsockopt,
1874 .getsockopt = ip_getsockopt,
1875 .addr2sockaddr = inet_csk_addr2sockaddr,
1876 .sockaddr_len = sizeof(struct sockaddr_in),
3fdadf7d 1877#ifdef CONFIG_COMPAT
543d9cfe
ACM
1878 .compat_setsockopt = compat_ip_setsockopt,
1879 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1880#endif
4fab9071 1881 .mtu_reduced = tcp_v4_mtu_reduced,
1da177e4 1882};
4bc2f18b 1883EXPORT_SYMBOL(ipv4_specific);
1da177e4 1884
cfb6eeb4 1885#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1886static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1887 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1888 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1889 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1890};
b6332e6c 1891#endif
cfb6eeb4 1892
1da177e4
LT
1893/* NOTE: A lot of things set to zero explicitly by call to
1894 * sk_alloc() so need not be done here.
1895 */
1896static int tcp_v4_init_sock(struct sock *sk)
1897{
6687e988 1898 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 1899
900f65d3 1900 tcp_init_sock(sk);
1da177e4 1901
8292a17a 1902 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 1903
cfb6eeb4 1904#ifdef CONFIG_TCP_MD5SIG
ac807fa8 1905 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 1906#endif
1da177e4 1907
1da177e4
LT
1908 return 0;
1909}
1910
7d06b2e0 1911void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1912{
1913 struct tcp_sock *tp = tcp_sk(sk);
1914
1915 tcp_clear_xmit_timers(sk);
1916
6687e988 1917 tcp_cleanup_congestion_control(sk);
317a76f9 1918
734942cc
DW
1919 tcp_cleanup_ulp(sk);
1920
1da177e4 1921 /* Cleanup up the write buffer. */
fe067e8a 1922 tcp_write_queue_purge(sk);
1da177e4 1923
cf1ef3f0
WW
1924 /* Check if we want to disable active TFO */
1925 tcp_fastopen_active_disable_ofo_check(sk);
1926
1da177e4 1927 /* Cleans up our, hopefully empty, out_of_order_queue. */
9f5afeae 1928 skb_rbtree_purge(&tp->out_of_order_queue);
1da177e4 1929
cfb6eeb4
YH
1930#ifdef CONFIG_TCP_MD5SIG
1931 /* Clean up the MD5 key list, if any */
1932 if (tp->md5sig_info) {
a915da9b 1933 tcp_clear_md5_list(sk);
a8afca03 1934 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1935 tp->md5sig_info = NULL;
1936 }
1937#endif
1a2449a8 1938
1da177e4
LT
1939 /* Clean prequeue, it must be empty really */
1940 __skb_queue_purge(&tp->ucopy.prequeue);
1941
1942 /* Clean up a referenced TCP bind bucket. */
463c84b9 1943 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1944 inet_put_port(sk);
1da177e4 1945
00db4124 1946 BUG_ON(tp->fastopen_rsk);
435cf559 1947
cf60af03
YC
1948 /* If socket is aborted during connect operation */
1949 tcp_free_fastopen_req(tp);
cd8ae852 1950 tcp_saved_syn_free(tp);
cf60af03 1951
180d8cd9 1952 sk_sockets_allocated_dec(sk);
1da177e4 1953}
1da177e4
LT
1954EXPORT_SYMBOL(tcp_v4_destroy_sock);
1955
1956#ifdef CONFIG_PROC_FS
1957/* Proc filesystem TCP sock list dumping. */
1958
a8b690f9
TH
1959/*
1960 * Get next listener socket follow cur. If cur is NULL, get first socket
1961 * starting from bucket given in st->bucket; when st->bucket is zero the
1962 * very first socket in the hash table is returned.
1963 */
1da177e4
LT
1964static void *listening_get_next(struct seq_file *seq, void *cur)
1965{
5799de0b 1966 struct tcp_iter_state *st = seq->private;
a4146b1b 1967 struct net *net = seq_file_net(seq);
3b24d854 1968 struct inet_listen_hashbucket *ilb;
3b24d854 1969 struct sock *sk = cur;
1da177e4
LT
1970
1971 if (!sk) {
3b24d854 1972get_head:
a8b690f9 1973 ilb = &tcp_hashinfo.listening_hash[st->bucket];
9652dc2e 1974 spin_lock(&ilb->lock);
3b24d854 1975 sk = sk_head(&ilb->head);
a8b690f9 1976 st->offset = 0;
1da177e4
LT
1977 goto get_sk;
1978 }
5caea4ea 1979 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 1980 ++st->num;
a8b690f9 1981 ++st->offset;
1da177e4 1982
3b24d854 1983 sk = sk_next(sk);
1da177e4 1984get_sk:
3b24d854 1985 sk_for_each_from(sk) {
8475ef9f
PE
1986 if (!net_eq(sock_net(sk), net))
1987 continue;
3b24d854
ED
1988 if (sk->sk_family == st->family)
1989 return sk;
1da177e4 1990 }
9652dc2e 1991 spin_unlock(&ilb->lock);
a8b690f9 1992 st->offset = 0;
3b24d854
ED
1993 if (++st->bucket < INET_LHTABLE_SIZE)
1994 goto get_head;
1995 return NULL;
1da177e4
LT
1996}
1997
1998static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1999{
a8b690f9
TH
2000 struct tcp_iter_state *st = seq->private;
2001 void *rc;
2002
2003 st->bucket = 0;
2004 st->offset = 0;
2005 rc = listening_get_next(seq, NULL);
1da177e4
LT
2006
2007 while (rc && *pos) {
2008 rc = listening_get_next(seq, rc);
2009 --*pos;
2010 }
2011 return rc;
2012}
2013
05dbc7b5 2014static inline bool empty_bucket(const struct tcp_iter_state *st)
6eac5604 2015{
05dbc7b5 2016 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
6eac5604
AK
2017}
2018
a8b690f9
TH
2019/*
2020 * Get first established socket starting from bucket given in st->bucket.
2021 * If st->bucket is zero, the very first socket in the hash is returned.
2022 */
1da177e4
LT
2023static void *established_get_first(struct seq_file *seq)
2024{
5799de0b 2025 struct tcp_iter_state *st = seq->private;
a4146b1b 2026 struct net *net = seq_file_net(seq);
1da177e4
LT
2027 void *rc = NULL;
2028
a8b690f9
TH
2029 st->offset = 0;
2030 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2031 struct sock *sk;
3ab5aee7 2032 struct hlist_nulls_node *node;
9db66bdc 2033 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2034
6eac5604
AK
2035 /* Lockless fast path for the common case of empty buckets */
2036 if (empty_bucket(st))
2037 continue;
2038
9db66bdc 2039 spin_lock_bh(lock);
3ab5aee7 2040 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2041 if (sk->sk_family != st->family ||
878628fb 2042 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2043 continue;
2044 }
2045 rc = sk;
2046 goto out;
2047 }
9db66bdc 2048 spin_unlock_bh(lock);
1da177e4
LT
2049 }
2050out:
2051 return rc;
2052}
2053
2054static void *established_get_next(struct seq_file *seq, void *cur)
2055{
2056 struct sock *sk = cur;
3ab5aee7 2057 struct hlist_nulls_node *node;
5799de0b 2058 struct tcp_iter_state *st = seq->private;
a4146b1b 2059 struct net *net = seq_file_net(seq);
1da177e4
LT
2060
2061 ++st->num;
a8b690f9 2062 ++st->offset;
1da177e4 2063
05dbc7b5 2064 sk = sk_nulls_next(sk);
1da177e4 2065
3ab5aee7 2066 sk_nulls_for_each_from(sk, node) {
878628fb 2067 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
05dbc7b5 2068 return sk;
1da177e4
LT
2069 }
2070
05dbc7b5
ED
2071 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2072 ++st->bucket;
2073 return established_get_first(seq);
1da177e4
LT
2074}
2075
2076static void *established_get_idx(struct seq_file *seq, loff_t pos)
2077{
a8b690f9
TH
2078 struct tcp_iter_state *st = seq->private;
2079 void *rc;
2080
2081 st->bucket = 0;
2082 rc = established_get_first(seq);
1da177e4
LT
2083
2084 while (rc && pos) {
2085 rc = established_get_next(seq, rc);
2086 --pos;
7174259e 2087 }
1da177e4
LT
2088 return rc;
2089}
2090
2091static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2092{
2093 void *rc;
5799de0b 2094 struct tcp_iter_state *st = seq->private;
1da177e4 2095
1da177e4
LT
2096 st->state = TCP_SEQ_STATE_LISTENING;
2097 rc = listening_get_idx(seq, &pos);
2098
2099 if (!rc) {
1da177e4
LT
2100 st->state = TCP_SEQ_STATE_ESTABLISHED;
2101 rc = established_get_idx(seq, pos);
2102 }
2103
2104 return rc;
2105}
2106
a8b690f9
TH
2107static void *tcp_seek_last_pos(struct seq_file *seq)
2108{
2109 struct tcp_iter_state *st = seq->private;
2110 int offset = st->offset;
2111 int orig_num = st->num;
2112 void *rc = NULL;
2113
2114 switch (st->state) {
a8b690f9
TH
2115 case TCP_SEQ_STATE_LISTENING:
2116 if (st->bucket >= INET_LHTABLE_SIZE)
2117 break;
2118 st->state = TCP_SEQ_STATE_LISTENING;
2119 rc = listening_get_next(seq, NULL);
2120 while (offset-- && rc)
2121 rc = listening_get_next(seq, rc);
2122 if (rc)
2123 break;
2124 st->bucket = 0;
05dbc7b5 2125 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2126 /* Fallthrough */
2127 case TCP_SEQ_STATE_ESTABLISHED:
a8b690f9
TH
2128 if (st->bucket > tcp_hashinfo.ehash_mask)
2129 break;
2130 rc = established_get_first(seq);
2131 while (offset-- && rc)
2132 rc = established_get_next(seq, rc);
2133 }
2134
2135 st->num = orig_num;
2136
2137 return rc;
2138}
2139
1da177e4
LT
2140static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2141{
5799de0b 2142 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2143 void *rc;
2144
2145 if (*pos && *pos == st->last_pos) {
2146 rc = tcp_seek_last_pos(seq);
2147 if (rc)
2148 goto out;
2149 }
2150
1da177e4
LT
2151 st->state = TCP_SEQ_STATE_LISTENING;
2152 st->num = 0;
a8b690f9
TH
2153 st->bucket = 0;
2154 st->offset = 0;
2155 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2156
2157out:
2158 st->last_pos = *pos;
2159 return rc;
1da177e4
LT
2160}
2161
2162static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2163{
a8b690f9 2164 struct tcp_iter_state *st = seq->private;
1da177e4 2165 void *rc = NULL;
1da177e4
LT
2166
2167 if (v == SEQ_START_TOKEN) {
2168 rc = tcp_get_idx(seq, 0);
2169 goto out;
2170 }
1da177e4
LT
2171
2172 switch (st->state) {
1da177e4
LT
2173 case TCP_SEQ_STATE_LISTENING:
2174 rc = listening_get_next(seq, v);
2175 if (!rc) {
1da177e4 2176 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2177 st->bucket = 0;
2178 st->offset = 0;
1da177e4
LT
2179 rc = established_get_first(seq);
2180 }
2181 break;
2182 case TCP_SEQ_STATE_ESTABLISHED:
1da177e4
LT
2183 rc = established_get_next(seq, v);
2184 break;
2185 }
2186out:
2187 ++*pos;
a8b690f9 2188 st->last_pos = *pos;
1da177e4
LT
2189 return rc;
2190}
2191
2192static void tcp_seq_stop(struct seq_file *seq, void *v)
2193{
5799de0b 2194 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2195
2196 switch (st->state) {
1da177e4
LT
2197 case TCP_SEQ_STATE_LISTENING:
2198 if (v != SEQ_START_TOKEN)
9652dc2e 2199 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4 2200 break;
1da177e4
LT
2201 case TCP_SEQ_STATE_ESTABLISHED:
2202 if (v)
9db66bdc 2203 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2204 break;
2205 }
2206}
2207
73cb88ec 2208int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2209{
d9dda78b 2210 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2211 struct tcp_iter_state *s;
52d6f3f1 2212 int err;
1da177e4 2213
52d6f3f1
DL
2214 err = seq_open_net(inode, file, &afinfo->seq_ops,
2215 sizeof(struct tcp_iter_state));
2216 if (err < 0)
2217 return err;
f40c8174 2218
52d6f3f1 2219 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2220 s->family = afinfo->family;
688d1945 2221 s->last_pos = 0;
f40c8174
DL
2222 return 0;
2223}
73cb88ec 2224EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2225
6f8b13bc 2226int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2227{
2228 int rc = 0;
2229 struct proc_dir_entry *p;
2230
9427c4b3
DL
2231 afinfo->seq_ops.start = tcp_seq_start;
2232 afinfo->seq_ops.next = tcp_seq_next;
2233 afinfo->seq_ops.stop = tcp_seq_stop;
2234
84841c3c 2235 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2236 afinfo->seq_fops, afinfo);
84841c3c 2237 if (!p)
1da177e4
LT
2238 rc = -ENOMEM;
2239 return rc;
2240}
4bc2f18b 2241EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2242
6f8b13bc 2243void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2244{
ece31ffd 2245 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2246}
4bc2f18b 2247EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2248
d4f06873 2249static void get_openreq4(const struct request_sock *req,
aa3a0c8c 2250 struct seq_file *f, int i)
1da177e4 2251{
2e6599cb 2252 const struct inet_request_sock *ireq = inet_rsk(req);
fa76ce73 2253 long delta = req->rsk_timer.expires - jiffies;
1da177e4 2254
5e659e4c 2255 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2256 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
1da177e4 2257 i,
634fb979 2258 ireq->ir_loc_addr,
d4f06873 2259 ireq->ir_num,
634fb979
ED
2260 ireq->ir_rmt_addr,
2261 ntohs(ireq->ir_rmt_port),
1da177e4
LT
2262 TCP_SYN_RECV,
2263 0, 0, /* could print option size, but that is af dependent. */
2264 1, /* timers active (only the expire timer) */
a399a805 2265 jiffies_delta_to_clock_t(delta),
e6c022a4 2266 req->num_timeout,
aa3a0c8c
ED
2267 from_kuid_munged(seq_user_ns(f),
2268 sock_i_uid(req->rsk_listener)),
1da177e4
LT
2269 0, /* non standard timer */
2270 0, /* open_requests have no inode */
d4f06873 2271 0,
652586df 2272 req);
1da177e4
LT
2273}
2274
652586df 2275static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
1da177e4
LT
2276{
2277 int timer_active;
2278 unsigned long timer_expires;
cf533ea5 2279 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2280 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2281 const struct inet_sock *inet = inet_sk(sk);
0536fcc0 2282 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2283 __be32 dest = inet->inet_daddr;
2284 __be32 src = inet->inet_rcv_saddr;
2285 __u16 destp = ntohs(inet->inet_dport);
2286 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2287 int rx_queue;
00fd38d9 2288 int state;
1da177e4 2289
6ba8a3b1 2290 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
57dde7f7 2291 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
6ba8a3b1 2292 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2293 timer_active = 1;
463c84b9
ACM
2294 timer_expires = icsk->icsk_timeout;
2295 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2296 timer_active = 4;
463c84b9 2297 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2298 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2299 timer_active = 2;
cf4c6bf8 2300 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2301 } else {
2302 timer_active = 0;
2303 timer_expires = jiffies;
2304 }
2305
00fd38d9
ED
2306 state = sk_state_load(sk);
2307 if (state == TCP_LISTEN)
49d09007
ED
2308 rx_queue = sk->sk_ack_backlog;
2309 else
00fd38d9
ED
2310 /* Because we don't lock the socket,
2311 * we might find a transient negative value.
49d09007
ED
2312 */
2313 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2314
5e659e4c 2315 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
652586df 2316 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
00fd38d9 2317 i, src, srcp, dest, destp, state,
47da8ee6 2318 tp->write_seq - tp->snd_una,
49d09007 2319 rx_queue,
1da177e4 2320 timer_active,
a399a805 2321 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2322 icsk->icsk_retransmits,
a7cb5a49 2323 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2324 icsk->icsk_probes_out,
cf4c6bf8 2325 sock_i_ino(sk),
41c6d650 2326 refcount_read(&sk->sk_refcnt), sk,
7be87351
SH
2327 jiffies_to_clock_t(icsk->icsk_rto),
2328 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2329 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2330 tp->snd_cwnd,
00fd38d9
ED
2331 state == TCP_LISTEN ?
2332 fastopenq->max_qlen :
652586df 2333 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
1da177e4
LT
2334}
2335
cf533ea5 2336static void get_timewait4_sock(const struct inet_timewait_sock *tw,
652586df 2337 struct seq_file *f, int i)
1da177e4 2338{
789f558c 2339 long delta = tw->tw_timer.expires - jiffies;
23f33c2d 2340 __be32 dest, src;
1da177e4 2341 __u16 destp, srcp;
1da177e4
LT
2342
2343 dest = tw->tw_daddr;
2344 src = tw->tw_rcv_saddr;
2345 destp = ntohs(tw->tw_dport);
2346 srcp = ntohs(tw->tw_sport);
2347
5e659e4c 2348 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2349 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
1da177e4 2350 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2351 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
41c6d650 2352 refcount_read(&tw->tw_refcnt), tw);
1da177e4
LT
2353}
2354
2355#define TMPSZ 150
2356
2357static int tcp4_seq_show(struct seq_file *seq, void *v)
2358{
5799de0b 2359 struct tcp_iter_state *st;
05dbc7b5 2360 struct sock *sk = v;
1da177e4 2361
652586df 2362 seq_setwidth(seq, TMPSZ - 1);
1da177e4 2363 if (v == SEQ_START_TOKEN) {
652586df 2364 seq_puts(seq, " sl local_address rem_address st tx_queue "
1da177e4
LT
2365 "rx_queue tr tm->when retrnsmt uid timeout "
2366 "inode");
2367 goto out;
2368 }
2369 st = seq->private;
2370
079096f1
ED
2371 if (sk->sk_state == TCP_TIME_WAIT)
2372 get_timewait4_sock(v, seq, st->num);
2373 else if (sk->sk_state == TCP_NEW_SYN_RECV)
aa3a0c8c 2374 get_openreq4(v, seq, st->num);
079096f1
ED
2375 else
2376 get_tcp4_sock(v, seq, st->num);
1da177e4 2377out:
652586df 2378 seq_pad(seq, '\n');
1da177e4
LT
2379 return 0;
2380}
2381
73cb88ec
AV
2382static const struct file_operations tcp_afinfo_seq_fops = {
2383 .owner = THIS_MODULE,
2384 .open = tcp_seq_open,
2385 .read = seq_read,
2386 .llseek = seq_lseek,
2387 .release = seq_release_net
2388};
2389
1da177e4 2390static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2391 .name = "tcp",
2392 .family = AF_INET,
73cb88ec 2393 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2394 .seq_ops = {
2395 .show = tcp4_seq_show,
2396 },
1da177e4
LT
2397};
2398
2c8c1e72 2399static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2400{
2401 return tcp_proc_register(net, &tcp4_seq_afinfo);
2402}
2403
2c8c1e72 2404static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2405{
2406 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2407}
2408
2409static struct pernet_operations tcp4_net_ops = {
2410 .init = tcp4_proc_init_net,
2411 .exit = tcp4_proc_exit_net,
2412};
2413
1da177e4
LT
2414int __init tcp4_proc_init(void)
2415{
757764f6 2416 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2417}
2418
2419void tcp4_proc_exit(void)
2420{
757764f6 2421 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2422}
2423#endif /* CONFIG_PROC_FS */
2424
2425struct proto tcp_prot = {
2426 .name = "TCP",
2427 .owner = THIS_MODULE,
2428 .close = tcp_close,
2429 .connect = tcp_v4_connect,
2430 .disconnect = tcp_disconnect,
463c84b9 2431 .accept = inet_csk_accept,
1da177e4
LT
2432 .ioctl = tcp_ioctl,
2433 .init = tcp_v4_init_sock,
2434 .destroy = tcp_v4_destroy_sock,
2435 .shutdown = tcp_shutdown,
2436 .setsockopt = tcp_setsockopt,
2437 .getsockopt = tcp_getsockopt,
4b9d07a4 2438 .keepalive = tcp_set_keepalive,
1da177e4 2439 .recvmsg = tcp_recvmsg,
7ba42910
CG
2440 .sendmsg = tcp_sendmsg,
2441 .sendpage = tcp_sendpage,
1da177e4 2442 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2443 .release_cb = tcp_release_cb,
ab1e0a13
ACM
2444 .hash = inet_hash,
2445 .unhash = inet_unhash,
2446 .get_port = inet_csk_get_port,
1da177e4 2447 .enter_memory_pressure = tcp_enter_memory_pressure,
06044751 2448 .leave_memory_pressure = tcp_leave_memory_pressure,
c9bee3b7 2449 .stream_memory_free = tcp_stream_memory_free,
1da177e4 2450 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2451 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2452 .memory_allocated = &tcp_memory_allocated,
2453 .memory_pressure = &tcp_memory_pressure,
a4fe34bf 2454 .sysctl_mem = sysctl_tcp_mem,
1da177e4
LT
2455 .sysctl_wmem = sysctl_tcp_wmem,
2456 .sysctl_rmem = sysctl_tcp_rmem,
2457 .max_header = MAX_TCP_HEADER,
2458 .obj_size = sizeof(struct tcp_sock),
5f0d5a3a 2459 .slab_flags = SLAB_TYPESAFE_BY_RCU,
6d6ee43e 2460 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2461 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2462 .h.hashinfo = &tcp_hashinfo,
7ba42910 2463 .no_autobind = true,
543d9cfe
ACM
2464#ifdef CONFIG_COMPAT
2465 .compat_setsockopt = compat_tcp_setsockopt,
2466 .compat_getsockopt = compat_tcp_getsockopt,
d1a4c0b3 2467#endif
c1e64e29 2468 .diag_destroy = tcp_abort,
1da177e4 2469};
4bc2f18b 2470EXPORT_SYMBOL(tcp_prot);
1da177e4 2471
bdbbb852
ED
2472static void __net_exit tcp_sk_exit(struct net *net)
2473{
2474 int cpu;
2475
2476 for_each_possible_cpu(cpu)
2477 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2478 free_percpu(net->ipv4.tcp_sk);
2479}
2480
046ee902
DL
2481static int __net_init tcp_sk_init(struct net *net)
2482{
fee83d09 2483 int res, cpu, cnt;
bdbbb852
ED
2484
2485 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2486 if (!net->ipv4.tcp_sk)
2487 return -ENOMEM;
2488
2489 for_each_possible_cpu(cpu) {
2490 struct sock *sk;
2491
2492 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2493 IPPROTO_TCP, net);
2494 if (res)
2495 goto fail;
a9d6532b 2496 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
bdbbb852
ED
2497 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2498 }
49213555 2499
5d134f1c 2500 net->ipv4.sysctl_tcp_ecn = 2;
49213555
DB
2501 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2502
b0f9ca53 2503 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
6b58e0a5 2504 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
05cbc0db 2505 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
046ee902 2506
13b287e8 2507 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
9bd6861b 2508 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
b840d15d 2509 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
13b287e8 2510
6fa25166 2511 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
7c083ecb 2512 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
0aca737d 2513 net->ipv4.sysctl_tcp_syncookies = 1;
1043e25f 2514 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
ae5c3f40 2515 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
c6214a97 2516 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
c402d9be 2517 net->ipv4.sysctl_tcp_orphan_retries = 0;
1e579caa 2518 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
4979f2d9 2519 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
56ab6b93 2520 net->ipv4.sysctl_tcp_tw_reuse = 0;
12ed8244 2521
fee83d09 2522 cnt = tcp_hashinfo.ehash_mask + 1;
fee83d09 2523 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
1946e672
HY
2524 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2525
fee83d09 2526 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
f9301034 2527 net->ipv4.sysctl_tcp_sack = 1;
9bb37ef0 2528 net->ipv4.sysctl_tcp_window_scaling = 1;
5d2ed052 2529 net->ipv4.sysctl_tcp_timestamps = 1;
fee83d09 2530
49213555 2531 return 0;
bdbbb852
ED
2532fail:
2533 tcp_sk_exit(net);
2534
2535 return res;
b099ce26
EB
2536}
2537
2538static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2539{
1946e672 2540 inet_twsk_purge(&tcp_hashinfo, AF_INET);
046ee902
DL
2541}
2542
2543static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2544 .init = tcp_sk_init,
2545 .exit = tcp_sk_exit,
2546 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2547};
2548
9b0f976f 2549void __init tcp_v4_init(void)
1da177e4 2550{
6a1b3054 2551 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2552 panic("Failed to create the TCP control socket.\n");
1da177e4 2553}