1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
7 * Implementation of the Transmission Control Protocol(TCP).
9 * IPv4 specific functions
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
16 * See tcp.c for author information
21 * David S. Miller : New socket lookup architecture.
22 * This code is dedicated to John Dyson.
23 * David S. Miller : Change semantics of established hash,
24 * half is devoted to TIME_WAIT sockets
25 * and the rest go in the other half.
26 * Andi Kleen : Add support for syncookies and fixed
27 * some bugs: ip options weren't passed to
28 * the TCP layer, missed a check for an
30 * Andi Kleen : Implemented fast path mtu discovery.
31 * Fixed many serious bugs in the
32 * request_sock handling and moved
33 * most of it into the af independent code.
34 * Added tail drop and some other bugfixes.
35 * Added new listen semantics.
36 * Mike McLagan : Routing by source
37 * Juan Jose Ciarlante: ip_dynaddr bits
38 * Andi Kleen: various fixes.
39 * Vitaly E. Lavrov : Transparent proxy revived after year
41 * Andi Kleen : Fix new listen.
42 * Andi Kleen : Fix accept error reporting.
43 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
44 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
45 * a single port at the same time.
48 #define pr_fmt(fmt) "TCP: " fmt
50 #include <linux/bottom_half.h>
51 #include <linux/types.h>
52 #include <linux/fcntl.h>
53 #include <linux/module.h>
54 #include <linux/random.h>
55 #include <linux/cache.h>
56 #include <linux/jhash.h>
57 #include <linux/init.h>
58 #include <linux/times.h>
59 #include <linux/slab.h>
61 #include <net/net_namespace.h>
63 #include <net/inet_hashtables.h>
65 #include <net/transp_v6.h>
67 #include <net/inet_common.h>
68 #include <net/timewait_sock.h>
70 #include <net/secure_seq.h>
71 #include <net/busy_poll.h>
73 #include <linux/inet.h>
74 #include <linux/ipv6.h>
75 #include <linux/stddef.h>
76 #include <linux/proc_fs.h>
77 #include <linux/seq_file.h>
78 #include <linux/inetdevice.h>
80 #include <crypto/hash.h>
81 #include <linux/scatterlist.h>
83 #include <trace/events/tcp.h>
85 #ifdef CONFIG_TCP_MD5SIG
86 static int tcp_v4_md5_hash_hdr(char *md5_hash
, const struct tcp_md5sig_key
*key
,
87 __be32 daddr
, __be32 saddr
, const struct tcphdr
*th
);
90 struct inet_hashinfo tcp_hashinfo
;
91 EXPORT_SYMBOL(tcp_hashinfo
);
93 static u32
tcp_v4_init_seq(const struct sk_buff
*skb
)
95 return secure_tcp_seq(ip_hdr(skb
)->daddr
,
98 tcp_hdr(skb
)->source
);
101 static u32
tcp_v4_init_ts_off(const struct net
*net
, const struct sk_buff
*skb
)
103 return secure_tcp_ts_off(net
, ip_hdr(skb
)->daddr
, ip_hdr(skb
)->saddr
);
106 int tcp_twsk_unique(struct sock
*sk
, struct sock
*sktw
, void *twp
)
108 const struct inet_timewait_sock
*tw
= inet_twsk(sktw
);
109 const struct tcp_timewait_sock
*tcptw
= tcp_twsk(sktw
);
110 struct tcp_sock
*tp
= tcp_sk(sk
);
111 int reuse
= sock_net(sk
)->ipv4
.sysctl_tcp_tw_reuse
;
114 /* Still does not detect *everything* that goes through
115 * lo, since we require a loopback src or dst address
116 * or direct binding to 'lo' interface.
118 bool loopback
= false;
119 if (tw
->tw_bound_dev_if
== LOOPBACK_IFINDEX
)
121 #if IS_ENABLED(CONFIG_IPV6)
122 if (tw
->tw_family
== AF_INET6
) {
123 if (ipv6_addr_loopback(&tw
->tw_v6_daddr
) ||
124 ipv6_addr_v4mapped_loopback(&tw
->tw_v6_daddr
) ||
125 ipv6_addr_loopback(&tw
->tw_v6_rcv_saddr
) ||
126 ipv6_addr_v4mapped_loopback(&tw
->tw_v6_rcv_saddr
))
131 if (ipv4_is_loopback(tw
->tw_daddr
) ||
132 ipv4_is_loopback(tw
->tw_rcv_saddr
))
139 /* With PAWS, it is safe from the viewpoint
140 of data integrity. Even without PAWS it is safe provided sequence
141 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
143 Actually, the idea is close to VJ's one, only timestamp cache is
144 held not per host, but per port pair and TW bucket is used as state
147 If TW bucket has been already destroyed we fall back to VJ's scheme
148 and use initial timestamp retrieved from peer table.
150 if (tcptw
->tw_ts_recent_stamp
&&
151 (!twp
|| (reuse
&& time_after32(ktime_get_seconds(),
152 tcptw
->tw_ts_recent_stamp
)))) {
153 /* In case of repair and re-using TIME-WAIT sockets we still
154 * want to be sure that it is safe as above but honor the
155 * sequence numbers and time stamps set as part of the repair
158 * Without this check re-using a TIME-WAIT socket with TCP
159 * repair would accumulate a -1 on the repair assigned
160 * sequence number. The first time it is reused the sequence
161 * is -1, the second time -2, etc. This fixes that issue
162 * without appearing to create any others.
164 if (likely(!tp
->repair
)) {
165 u32 seq
= tcptw
->tw_snd_nxt
+ 65535 + 2;
169 WRITE_ONCE(tp
->write_seq
, seq
);
170 tp
->rx_opt
.ts_recent
= tcptw
->tw_ts_recent
;
171 tp
->rx_opt
.ts_recent_stamp
= tcptw
->tw_ts_recent_stamp
;
179 EXPORT_SYMBOL_GPL(tcp_twsk_unique
);
181 static int tcp_v4_pre_connect(struct sock
*sk
, struct sockaddr
*uaddr
,
184 /* This check is replicated from tcp_v4_connect() and intended to
185 * prevent BPF program called below from accessing bytes that are out
186 * of the bound specified by user in addr_len.
188 if (addr_len
< sizeof(struct sockaddr_in
))
191 sock_owned_by_me(sk
);
193 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk
, uaddr
);
196 /* This will initiate an outgoing connection. */
197 int tcp_v4_connect(struct sock
*sk
, struct sockaddr
*uaddr
, int addr_len
)
199 struct sockaddr_in
*usin
= (struct sockaddr_in
*)uaddr
;
200 struct inet_sock
*inet
= inet_sk(sk
);
201 struct tcp_sock
*tp
= tcp_sk(sk
);
202 __be16 orig_sport
, orig_dport
;
203 __be32 daddr
, nexthop
;
207 struct ip_options_rcu
*inet_opt
;
208 struct inet_timewait_death_row
*tcp_death_row
= &sock_net(sk
)->ipv4
.tcp_death_row
;
210 if (addr_len
< sizeof(struct sockaddr_in
))
213 if (usin
->sin_family
!= AF_INET
)
214 return -EAFNOSUPPORT
;
216 nexthop
= daddr
= usin
->sin_addr
.s_addr
;
217 inet_opt
= rcu_dereference_protected(inet
->inet_opt
,
218 lockdep_sock_is_held(sk
));
219 if (inet_opt
&& inet_opt
->opt
.srr
) {
222 nexthop
= inet_opt
->opt
.faddr
;
225 orig_sport
= inet
->inet_sport
;
226 orig_dport
= usin
->sin_port
;
227 fl4
= &inet
->cork
.fl
.u
.ip4
;
228 rt
= ip_route_connect(fl4
, nexthop
, inet
->inet_saddr
,
229 RT_CONN_FLAGS(sk
), sk
->sk_bound_dev_if
,
231 orig_sport
, orig_dport
, sk
);
234 if (err
== -ENETUNREACH
)
235 IP_INC_STATS(sock_net(sk
), IPSTATS_MIB_OUTNOROUTES
);
239 if (rt
->rt_flags
& (RTCF_MULTICAST
| RTCF_BROADCAST
)) {
244 if (!inet_opt
|| !inet_opt
->opt
.srr
)
247 if (!inet
->inet_saddr
)
248 inet
->inet_saddr
= fl4
->saddr
;
249 sk_rcv_saddr_set(sk
, inet
->inet_saddr
);
251 if (tp
->rx_opt
.ts_recent_stamp
&& inet
->inet_daddr
!= daddr
) {
252 /* Reset inherited state */
253 tp
->rx_opt
.ts_recent
= 0;
254 tp
->rx_opt
.ts_recent_stamp
= 0;
255 if (likely(!tp
->repair
))
256 WRITE_ONCE(tp
->write_seq
, 0);
259 inet
->inet_dport
= usin
->sin_port
;
260 sk_daddr_set(sk
, daddr
);
262 inet_csk(sk
)->icsk_ext_hdr_len
= 0;
264 inet_csk(sk
)->icsk_ext_hdr_len
= inet_opt
->opt
.optlen
;
266 tp
->rx_opt
.mss_clamp
= TCP_MSS_DEFAULT
;
268 /* Socket identity is still unknown (sport may be zero).
269 * However we set state to SYN-SENT and not releasing socket
270 * lock select source port, enter ourselves into the hash tables and
271 * complete initialization after this.
273 tcp_set_state(sk
, TCP_SYN_SENT
);
274 err
= inet_hash_connect(tcp_death_row
, sk
);
280 rt
= ip_route_newports(fl4
, rt
, orig_sport
, orig_dport
,
281 inet
->inet_sport
, inet
->inet_dport
, sk
);
287 /* OK, now commit destination to socket. */
288 sk
->sk_gso_type
= SKB_GSO_TCPV4
;
289 sk_setup_caps(sk
, &rt
->dst
);
292 if (likely(!tp
->repair
)) {
294 WRITE_ONCE(tp
->write_seq
,
295 secure_tcp_seq(inet
->inet_saddr
,
299 tp
->tsoffset
= secure_tcp_ts_off(sock_net(sk
),
304 inet
->inet_id
= prandom_u32();
306 if (tcp_fastopen_defer_connect(sk
, &err
))
311 err
= tcp_connect(sk
);
320 * This unhashes the socket and releases the local port,
323 tcp_set_state(sk
, TCP_CLOSE
);
325 sk
->sk_route_caps
= 0;
326 inet
->inet_dport
= 0;
329 EXPORT_SYMBOL(tcp_v4_connect
);
332 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
333 * It can be called through tcp_release_cb() if socket was owned by user
334 * at the time tcp_v4_err() was called to handle ICMP message.
336 void tcp_v4_mtu_reduced(struct sock
*sk
)
338 struct inet_sock
*inet
= inet_sk(sk
);
339 struct dst_entry
*dst
;
342 if ((1 << sk
->sk_state
) & (TCPF_LISTEN
| TCPF_CLOSE
))
344 mtu
= tcp_sk(sk
)->mtu_info
;
345 dst
= inet_csk_update_pmtu(sk
, mtu
);
349 /* Something is about to be wrong... Remember soft error
350 * for the case, if this connection will not able to recover.
352 if (mtu
< dst_mtu(dst
) && ip_dont_fragment(sk
, dst
))
353 sk
->sk_err_soft
= EMSGSIZE
;
357 if (inet
->pmtudisc
!= IP_PMTUDISC_DONT
&&
358 ip_sk_accept_pmtu(sk
) &&
359 inet_csk(sk
)->icsk_pmtu_cookie
> mtu
) {
360 tcp_sync_mss(sk
, mtu
);
362 /* Resend the TCP packet because it's
363 * clear that the old packet has been
364 * dropped. This is the new "fast" path mtu
367 tcp_simple_retransmit(sk
);
368 } /* else let the usual retransmit timer handle it */
370 EXPORT_SYMBOL(tcp_v4_mtu_reduced
);
372 static void do_redirect(struct sk_buff
*skb
, struct sock
*sk
)
374 struct dst_entry
*dst
= __sk_dst_check(sk
, 0);
377 dst
->ops
->redirect(dst
, sk
, skb
);
381 /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
382 void tcp_req_err(struct sock
*sk
, u32 seq
, bool abort
)
384 struct request_sock
*req
= inet_reqsk(sk
);
385 struct net
*net
= sock_net(sk
);
387 /* ICMPs are not backlogged, hence we cannot get
388 * an established socket here.
390 if (seq
!= tcp_rsk(req
)->snt_isn
) {
391 __NET_INC_STATS(net
, LINUX_MIB_OUTOFWINDOWICMPS
);
394 * Still in SYN_RECV, just remove it silently.
395 * There is no good way to pass the error to the newly
396 * created socket, and POSIX does not want network
397 * errors returned from accept().
399 inet_csk_reqsk_queue_drop(req
->rsk_listener
, req
);
400 tcp_listendrop(req
->rsk_listener
);
404 EXPORT_SYMBOL(tcp_req_err
);
407 * This routine is called by the ICMP module when it gets some
408 * sort of error condition. If err < 0 then the socket should
409 * be closed and the error returned to the user. If err > 0
410 * it's just the icmp type << 8 | icmp code. After adjustment
411 * header points to the first 8 bytes of the tcp header. We need
412 * to find the appropriate port.
414 * The locking strategy used here is very "optimistic". When
415 * someone else accesses the socket the ICMP is just dropped
416 * and for some paths there is no check at all.
417 * A more general error queue to queue errors for later handling
418 * is probably better.
422 int tcp_v4_err(struct sk_buff
*icmp_skb
, u32 info
)
424 const struct iphdr
*iph
= (const struct iphdr
*)icmp_skb
->data
;
425 struct tcphdr
*th
= (struct tcphdr
*)(icmp_skb
->data
+ (iph
->ihl
<< 2));
426 struct inet_connection_sock
*icsk
;
428 struct inet_sock
*inet
;
429 const int type
= icmp_hdr(icmp_skb
)->type
;
430 const int code
= icmp_hdr(icmp_skb
)->code
;
433 struct request_sock
*fastopen
;
438 struct net
*net
= dev_net(icmp_skb
->dev
);
440 sk
= __inet_lookup_established(net
, &tcp_hashinfo
, iph
->daddr
,
441 th
->dest
, iph
->saddr
, ntohs(th
->source
),
442 inet_iif(icmp_skb
), 0);
444 __ICMP_INC_STATS(net
, ICMP_MIB_INERRORS
);
447 if (sk
->sk_state
== TCP_TIME_WAIT
) {
448 inet_twsk_put(inet_twsk(sk
));
451 seq
= ntohl(th
->seq
);
452 if (sk
->sk_state
== TCP_NEW_SYN_RECV
) {
453 tcp_req_err(sk
, seq
, type
== ICMP_PARAMETERPROB
||
454 type
== ICMP_TIME_EXCEEDED
||
455 (type
== ICMP_DEST_UNREACH
&&
456 (code
== ICMP_NET_UNREACH
||
457 code
== ICMP_HOST_UNREACH
)));
462 /* If too many ICMPs get dropped on busy
463 * servers this needs to be solved differently.
464 * We do take care of PMTU discovery (RFC1191) special case :
465 * we can receive locally generated ICMP messages while socket is held.
467 if (sock_owned_by_user(sk
)) {
468 if (!(type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
))
469 __NET_INC_STATS(net
, LINUX_MIB_LOCKDROPPEDICMPS
);
471 if (sk
->sk_state
== TCP_CLOSE
)
474 if (unlikely(iph
->ttl
< inet_sk(sk
)->min_ttl
)) {
475 __NET_INC_STATS(net
, LINUX_MIB_TCPMINTTLDROP
);
481 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
482 fastopen
= rcu_dereference(tp
->fastopen_rsk
);
483 snd_una
= fastopen
? tcp_rsk(fastopen
)->snt_isn
: tp
->snd_una
;
484 if (sk
->sk_state
!= TCP_LISTEN
&&
485 !between(seq
, snd_una
, tp
->snd_nxt
)) {
486 __NET_INC_STATS(net
, LINUX_MIB_OUTOFWINDOWICMPS
);
492 if (!sock_owned_by_user(sk
))
493 do_redirect(icmp_skb
, sk
);
495 case ICMP_SOURCE_QUENCH
:
496 /* Just silently ignore these. */
498 case ICMP_PARAMETERPROB
:
501 case ICMP_DEST_UNREACH
:
502 if (code
> NR_ICMP_UNREACH
)
505 if (code
== ICMP_FRAG_NEEDED
) { /* PMTU discovery (RFC1191) */
506 /* We are not interested in TCP_LISTEN and open_requests
507 * (SYN-ACKs send out by Linux are always <576bytes so
508 * they should go through unfragmented).
510 if (sk
->sk_state
== TCP_LISTEN
)
514 if (!sock_owned_by_user(sk
)) {
515 tcp_v4_mtu_reduced(sk
);
517 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED
, &sk
->sk_tsq_flags
))
523 err
= icmp_err_convert
[code
].errno
;
524 /* check if icmp_skb allows revert of backoff
525 * (see draft-zimmermann-tcp-lcd) */
526 if (code
!= ICMP_NET_UNREACH
&& code
!= ICMP_HOST_UNREACH
)
528 if (seq
!= tp
->snd_una
|| !icsk
->icsk_retransmits
||
529 !icsk
->icsk_backoff
|| fastopen
)
532 if (sock_owned_by_user(sk
))
535 skb
= tcp_rtx_queue_head(sk
);
536 if (WARN_ON_ONCE(!skb
))
539 icsk
->icsk_backoff
--;
540 icsk
->icsk_rto
= tp
->srtt_us
? __tcp_set_rto(tp
) :
542 icsk
->icsk_rto
= inet_csk_rto_backoff(icsk
, TCP_RTO_MAX
);
545 tcp_mstamp_refresh(tp
);
546 delta_us
= (u32
)(tp
->tcp_mstamp
- tcp_skb_timestamp_us(skb
));
547 remaining
= icsk
->icsk_rto
-
548 usecs_to_jiffies(delta_us
);
551 inet_csk_reset_xmit_timer(sk
, ICSK_TIME_RETRANS
,
552 remaining
, TCP_RTO_MAX
);
554 /* RTO revert clocked out retransmission.
555 * Will retransmit now */
556 tcp_retransmit_timer(sk
);
560 case ICMP_TIME_EXCEEDED
:
567 switch (sk
->sk_state
) {
570 /* Only in fast or simultaneous open. If a fast open socket is
571 * is already accepted it is treated as a connected one below.
573 if (fastopen
&& !fastopen
->sk
)
576 if (!sock_owned_by_user(sk
)) {
579 sk
->sk_error_report(sk
);
583 sk
->sk_err_soft
= err
;
588 /* If we've already connected we will keep trying
589 * until we time out, or the user gives up.
591 * rfc1122 4.2.3.9 allows to consider as hard errors
592 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
593 * but it is obsoleted by pmtu discovery).
595 * Note, that in modern internet, where routing is unreliable
596 * and in each dark corner broken firewalls sit, sending random
597 * errors ordered by their masters even this two messages finally lose
598 * their original sense (even Linux sends invalid PORT_UNREACHs)
600 * Now we are in compliance with RFCs.
605 if (!sock_owned_by_user(sk
) && inet
->recverr
) {
607 sk
->sk_error_report(sk
);
608 } else { /* Only an error on timeout */
609 sk
->sk_err_soft
= err
;
618 void __tcp_v4_send_check(struct sk_buff
*skb
, __be32 saddr
, __be32 daddr
)
620 struct tcphdr
*th
= tcp_hdr(skb
);
622 th
->check
= ~tcp_v4_check(skb
->len
, saddr
, daddr
, 0);
623 skb
->csum_start
= skb_transport_header(skb
) - skb
->head
;
624 skb
->csum_offset
= offsetof(struct tcphdr
, check
);
627 /* This routine computes an IPv4 TCP checksum. */
628 void tcp_v4_send_check(struct sock
*sk
, struct sk_buff
*skb
)
630 const struct inet_sock
*inet
= inet_sk(sk
);
632 __tcp_v4_send_check(skb
, inet
->inet_saddr
, inet
->inet_daddr
);
634 EXPORT_SYMBOL(tcp_v4_send_check
);
637 * This routine will send an RST to the other tcp.
639 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
641 * Answer: if a packet caused RST, it is not for a socket
642 * existing in our system, if it is matched to a socket,
643 * it is just duplicate segment or bug in other side's TCP.
644 * So that we build reply only basing on parameters
645 * arrived with segment.
646 * Exception: precedence violation. We do not implement it in any case.
649 static void tcp_v4_send_reset(const struct sock
*sk
, struct sk_buff
*skb
)
651 const struct tcphdr
*th
= tcp_hdr(skb
);
654 #ifdef CONFIG_TCP_MD5SIG
655 __be32 opt
[(TCPOLEN_MD5SIG_ALIGNED
>> 2)];
658 struct ip_reply_arg arg
;
659 #ifdef CONFIG_TCP_MD5SIG
660 struct tcp_md5sig_key
*key
= NULL
;
661 const __u8
*hash_location
= NULL
;
662 unsigned char newhash
[16];
664 struct sock
*sk1
= NULL
;
666 u64 transmit_time
= 0;
670 /* Never send a reset in response to a reset. */
674 /* If sk not NULL, it means we did a successful lookup and incoming
675 * route had to be correct. prequeue might have dropped our dst.
677 if (!sk
&& skb_rtable(skb
)->rt_type
!= RTN_LOCAL
)
680 /* Swap the send and the receive. */
681 memset(&rep
, 0, sizeof(rep
));
682 rep
.th
.dest
= th
->source
;
683 rep
.th
.source
= th
->dest
;
684 rep
.th
.doff
= sizeof(struct tcphdr
) / 4;
688 rep
.th
.seq
= th
->ack_seq
;
691 rep
.th
.ack_seq
= htonl(ntohl(th
->seq
) + th
->syn
+ th
->fin
+
692 skb
->len
- (th
->doff
<< 2));
695 memset(&arg
, 0, sizeof(arg
));
696 arg
.iov
[0].iov_base
= (unsigned char *)&rep
;
697 arg
.iov
[0].iov_len
= sizeof(rep
.th
);
699 net
= sk
? sock_net(sk
) : dev_net(skb_dst(skb
)->dev
);
700 #ifdef CONFIG_TCP_MD5SIG
702 hash_location
= tcp_parse_md5sig_option(th
);
703 if (sk
&& sk_fullsock(sk
)) {
704 const union tcp_md5_addr
*addr
;
706 addr
= (union tcp_md5_addr
*)&ip_hdr(skb
)->saddr
;
707 key
= tcp_md5_do_lookup(sk
, addr
, AF_INET
);
708 } else if (hash_location
) {
709 const union tcp_md5_addr
*addr
;
710 int sdif
= tcp_v4_sdif(skb
);
711 int dif
= inet_iif(skb
);
714 * active side is lost. Try to find listening socket through
715 * source port, and then find md5 key through listening socket.
716 * we are not loose security here:
717 * Incoming packet is checked with md5 hash with finding key,
718 * no RST generated if md5 hash doesn't match.
720 sk1
= __inet_lookup_listener(net
, &tcp_hashinfo
, NULL
, 0,
722 th
->source
, ip_hdr(skb
)->daddr
,
723 ntohs(th
->source
), dif
, sdif
);
724 /* don't send rst if it can't find key */
728 addr
= (union tcp_md5_addr
*)&ip_hdr(skb
)->saddr
;
729 key
= tcp_md5_do_lookup(sk1
, addr
, AF_INET
);
734 genhash
= tcp_v4_md5_hash_skb(newhash
, key
, NULL
, skb
);
735 if (genhash
|| memcmp(hash_location
, newhash
, 16) != 0)
741 rep
.opt
[0] = htonl((TCPOPT_NOP
<< 24) |
743 (TCPOPT_MD5SIG
<< 8) |
745 /* Update length and the length the header thinks exists */
746 arg
.iov
[0].iov_len
+= TCPOLEN_MD5SIG_ALIGNED
;
747 rep
.th
.doff
= arg
.iov
[0].iov_len
/ 4;
749 tcp_v4_md5_hash_hdr((__u8
*) &rep
.opt
[1],
750 key
, ip_hdr(skb
)->saddr
,
751 ip_hdr(skb
)->daddr
, &rep
.th
);
754 arg
.csum
= csum_tcpudp_nofold(ip_hdr(skb
)->daddr
,
755 ip_hdr(skb
)->saddr
, /* XXX */
756 arg
.iov
[0].iov_len
, IPPROTO_TCP
, 0);
757 arg
.csumoffset
= offsetof(struct tcphdr
, check
) / 2;
758 arg
.flags
= (sk
&& inet_sk_transparent(sk
)) ? IP_REPLY_ARG_NOSRCCHECK
: 0;
760 /* When socket is gone, all binding information is lost.
761 * routing might fail in this case. No choice here, if we choose to force
762 * input interface, we will misroute in case of asymmetric route.
765 arg
.bound_dev_if
= sk
->sk_bound_dev_if
;
767 trace_tcp_send_reset(sk
, skb
);
770 BUILD_BUG_ON(offsetof(struct sock
, sk_bound_dev_if
) !=
771 offsetof(struct inet_timewait_sock
, tw_bound_dev_if
));
773 arg
.tos
= ip_hdr(skb
)->tos
;
774 arg
.uid
= sock_net_uid(net
, sk
&& sk_fullsock(sk
) ? sk
: NULL
);
776 ctl_sk
= this_cpu_read(*net
->ipv4
.tcp_sk
);
778 ctl_sk
->sk_mark
= (sk
->sk_state
== TCP_TIME_WAIT
) ?
779 inet_twsk(sk
)->tw_mark
: sk
->sk_mark
;
780 ctl_sk
->sk_priority
= (sk
->sk_state
== TCP_TIME_WAIT
) ?
781 inet_twsk(sk
)->tw_priority
: sk
->sk_priority
;
782 transmit_time
= tcp_transmit_time(sk
);
784 ip_send_unicast_reply(ctl_sk
,
785 skb
, &TCP_SKB_CB(skb
)->header
.h4
.opt
,
786 ip_hdr(skb
)->saddr
, ip_hdr(skb
)->daddr
,
787 &arg
, arg
.iov
[0].iov_len
,
791 __TCP_INC_STATS(net
, TCP_MIB_OUTSEGS
);
792 __TCP_INC_STATS(net
, TCP_MIB_OUTRSTS
);
795 #ifdef CONFIG_TCP_MD5SIG
801 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
802 outside socket context is ugly, certainly. What can I do?
805 static void tcp_v4_send_ack(const struct sock
*sk
,
806 struct sk_buff
*skb
, u32 seq
, u32 ack
,
807 u32 win
, u32 tsval
, u32 tsecr
, int oif
,
808 struct tcp_md5sig_key
*key
,
809 int reply_flags
, u8 tos
)
811 const struct tcphdr
*th
= tcp_hdr(skb
);
814 __be32 opt
[(TCPOLEN_TSTAMP_ALIGNED
>> 2)
815 #ifdef CONFIG_TCP_MD5SIG
816 + (TCPOLEN_MD5SIG_ALIGNED
>> 2)
820 struct net
*net
= sock_net(sk
);
821 struct ip_reply_arg arg
;
825 memset(&rep
.th
, 0, sizeof(struct tcphdr
));
826 memset(&arg
, 0, sizeof(arg
));
828 arg
.iov
[0].iov_base
= (unsigned char *)&rep
;
829 arg
.iov
[0].iov_len
= sizeof(rep
.th
);
831 rep
.opt
[0] = htonl((TCPOPT_NOP
<< 24) | (TCPOPT_NOP
<< 16) |
832 (TCPOPT_TIMESTAMP
<< 8) |
834 rep
.opt
[1] = htonl(tsval
);
835 rep
.opt
[2] = htonl(tsecr
);
836 arg
.iov
[0].iov_len
+= TCPOLEN_TSTAMP_ALIGNED
;
839 /* Swap the send and the receive. */
840 rep
.th
.dest
= th
->source
;
841 rep
.th
.source
= th
->dest
;
842 rep
.th
.doff
= arg
.iov
[0].iov_len
/ 4;
843 rep
.th
.seq
= htonl(seq
);
844 rep
.th
.ack_seq
= htonl(ack
);
846 rep
.th
.window
= htons(win
);
848 #ifdef CONFIG_TCP_MD5SIG
850 int offset
= (tsecr
) ? 3 : 0;
852 rep
.opt
[offset
++] = htonl((TCPOPT_NOP
<< 24) |
854 (TCPOPT_MD5SIG
<< 8) |
856 arg
.iov
[0].iov_len
+= TCPOLEN_MD5SIG_ALIGNED
;
857 rep
.th
.doff
= arg
.iov
[0].iov_len
/4;
859 tcp_v4_md5_hash_hdr((__u8
*) &rep
.opt
[offset
],
860 key
, ip_hdr(skb
)->saddr
,
861 ip_hdr(skb
)->daddr
, &rep
.th
);
864 arg
.flags
= reply_flags
;
865 arg
.csum
= csum_tcpudp_nofold(ip_hdr(skb
)->daddr
,
866 ip_hdr(skb
)->saddr
, /* XXX */
867 arg
.iov
[0].iov_len
, IPPROTO_TCP
, 0);
868 arg
.csumoffset
= offsetof(struct tcphdr
, check
) / 2;
870 arg
.bound_dev_if
= oif
;
872 arg
.uid
= sock_net_uid(net
, sk_fullsock(sk
) ? sk
: NULL
);
874 ctl_sk
= this_cpu_read(*net
->ipv4
.tcp_sk
);
875 ctl_sk
->sk_mark
= (sk
->sk_state
== TCP_TIME_WAIT
) ?
876 inet_twsk(sk
)->tw_mark
: sk
->sk_mark
;
877 ctl_sk
->sk_priority
= (sk
->sk_state
== TCP_TIME_WAIT
) ?
878 inet_twsk(sk
)->tw_priority
: sk
->sk_priority
;
879 transmit_time
= tcp_transmit_time(sk
);
880 ip_send_unicast_reply(ctl_sk
,
881 skb
, &TCP_SKB_CB(skb
)->header
.h4
.opt
,
882 ip_hdr(skb
)->saddr
, ip_hdr(skb
)->daddr
,
883 &arg
, arg
.iov
[0].iov_len
,
887 __TCP_INC_STATS(net
, TCP_MIB_OUTSEGS
);
891 static void tcp_v4_timewait_ack(struct sock
*sk
, struct sk_buff
*skb
)
893 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
894 struct tcp_timewait_sock
*tcptw
= tcp_twsk(sk
);
896 tcp_v4_send_ack(sk
, skb
,
897 tcptw
->tw_snd_nxt
, tcptw
->tw_rcv_nxt
,
898 tcptw
->tw_rcv_wnd
>> tw
->tw_rcv_wscale
,
899 tcp_time_stamp_raw() + tcptw
->tw_ts_offset
,
902 tcp_twsk_md5_key(tcptw
),
903 tw
->tw_transparent
? IP_REPLY_ARG_NOSRCCHECK
: 0,
910 static void tcp_v4_reqsk_send_ack(const struct sock
*sk
, struct sk_buff
*skb
,
911 struct request_sock
*req
)
913 const union tcp_md5_addr
*addr
;
915 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
916 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
918 u32 seq
= (sk
->sk_state
== TCP_LISTEN
) ? tcp_rsk(req
)->snt_isn
+ 1 :
922 * The window field (SEG.WND) of every outgoing segment, with the
923 * exception of <SYN> segments, MUST be right-shifted by
924 * Rcv.Wind.Shift bits:
926 addr
= (union tcp_md5_addr
*)&ip_hdr(skb
)->saddr
;
927 tcp_v4_send_ack(sk
, skb
, seq
,
928 tcp_rsk(req
)->rcv_nxt
,
929 req
->rsk_rcv_wnd
>> inet_rsk(req
)->rcv_wscale
,
930 tcp_time_stamp_raw() + tcp_rsk(req
)->ts_off
,
933 tcp_md5_do_lookup(sk
, addr
, AF_INET
),
934 inet_rsk(req
)->no_srccheck
? IP_REPLY_ARG_NOSRCCHECK
: 0,
939 * Send a SYN-ACK after having received a SYN.
940 * This still operates on a request_sock only, not on a big
943 static int tcp_v4_send_synack(const struct sock
*sk
, struct dst_entry
*dst
,
945 struct request_sock
*req
,
946 struct tcp_fastopen_cookie
*foc
,
947 enum tcp_synack_type synack_type
)
949 const struct inet_request_sock
*ireq
= inet_rsk(req
);
954 /* First, grab a route. */
955 if (!dst
&& (dst
= inet_csk_route_req(sk
, &fl4
, req
)) == NULL
)
958 skb
= tcp_make_synack(sk
, dst
, req
, foc
, synack_type
);
961 __tcp_v4_send_check(skb
, ireq
->ir_loc_addr
, ireq
->ir_rmt_addr
);
964 err
= ip_build_and_send_pkt(skb
, sk
, ireq
->ir_loc_addr
,
966 rcu_dereference(ireq
->ireq_opt
));
968 err
= net_xmit_eval(err
);
975 * IPv4 request_sock destructor.
977 static void tcp_v4_reqsk_destructor(struct request_sock
*req
)
979 kfree(rcu_dereference_protected(inet_rsk(req
)->ireq_opt
, 1));
982 #ifdef CONFIG_TCP_MD5SIG
984 * RFC2385 MD5 checksumming requires a mapping of
985 * IP address->MD5 Key.
986 * We need to maintain these in the sk structure.
989 DEFINE_STATIC_KEY_FALSE(tcp_md5_needed
);
990 EXPORT_SYMBOL(tcp_md5_needed
);
992 /* Find the Key structure for an address. */
993 struct tcp_md5sig_key
*__tcp_md5_do_lookup(const struct sock
*sk
,
994 const union tcp_md5_addr
*addr
,
997 const struct tcp_sock
*tp
= tcp_sk(sk
);
998 struct tcp_md5sig_key
*key
;
999 const struct tcp_md5sig_info
*md5sig
;
1001 struct tcp_md5sig_key
*best_match
= NULL
;
1004 /* caller either holds rcu_read_lock() or socket lock */
1005 md5sig
= rcu_dereference_check(tp
->md5sig_info
,
1006 lockdep_sock_is_held(sk
));
1010 hlist_for_each_entry_rcu(key
, &md5sig
->head
, node
) {
1011 if (key
->family
!= family
)
1014 if (family
== AF_INET
) {
1015 mask
= inet_make_mask(key
->prefixlen
);
1016 match
= (key
->addr
.a4
.s_addr
& mask
) ==
1017 (addr
->a4
.s_addr
& mask
);
1018 #if IS_ENABLED(CONFIG_IPV6)
1019 } else if (family
== AF_INET6
) {
1020 match
= ipv6_prefix_equal(&key
->addr
.a6
, &addr
->a6
,
1027 if (match
&& (!best_match
||
1028 key
->prefixlen
> best_match
->prefixlen
))
1033 EXPORT_SYMBOL(__tcp_md5_do_lookup
);
1035 static struct tcp_md5sig_key
*tcp_md5_do_lookup_exact(const struct sock
*sk
,
1036 const union tcp_md5_addr
*addr
,
1037 int family
, u8 prefixlen
)
1039 const struct tcp_sock
*tp
= tcp_sk(sk
);
1040 struct tcp_md5sig_key
*key
;
1041 unsigned int size
= sizeof(struct in_addr
);
1042 const struct tcp_md5sig_info
*md5sig
;
1044 /* caller either holds rcu_read_lock() or socket lock */
1045 md5sig
= rcu_dereference_check(tp
->md5sig_info
,
1046 lockdep_sock_is_held(sk
));
1049 #if IS_ENABLED(CONFIG_IPV6)
1050 if (family
== AF_INET6
)
1051 size
= sizeof(struct in6_addr
);
1053 hlist_for_each_entry_rcu(key
, &md5sig
->head
, node
) {
1054 if (key
->family
!= family
)
1056 if (!memcmp(&key
->addr
, addr
, size
) &&
1057 key
->prefixlen
== prefixlen
)
1063 struct tcp_md5sig_key
*tcp_v4_md5_lookup(const struct sock
*sk
,
1064 const struct sock
*addr_sk
)
1066 const union tcp_md5_addr
*addr
;
1068 addr
= (const union tcp_md5_addr
*)&addr_sk
->sk_daddr
;
1069 return tcp_md5_do_lookup(sk
, addr
, AF_INET
);
1071 EXPORT_SYMBOL(tcp_v4_md5_lookup
);
1073 /* This can be called on a newly created socket, from other files */
1074 int tcp_md5_do_add(struct sock
*sk
, const union tcp_md5_addr
*addr
,
1075 int family
, u8 prefixlen
, const u8
*newkey
, u8 newkeylen
,
1078 /* Add Key to the list */
1079 struct tcp_md5sig_key
*key
;
1080 struct tcp_sock
*tp
= tcp_sk(sk
);
1081 struct tcp_md5sig_info
*md5sig
;
1083 key
= tcp_md5_do_lookup_exact(sk
, addr
, family
, prefixlen
);
1085 /* Pre-existing entry - just update that one. */
1086 memcpy(key
->key
, newkey
, newkeylen
);
1087 key
->keylen
= newkeylen
;
1091 md5sig
= rcu_dereference_protected(tp
->md5sig_info
,
1092 lockdep_sock_is_held(sk
));
1094 md5sig
= kmalloc(sizeof(*md5sig
), gfp
);
1098 sk_nocaps_add(sk
, NETIF_F_GSO_MASK
);
1099 INIT_HLIST_HEAD(&md5sig
->head
);
1100 rcu_assign_pointer(tp
->md5sig_info
, md5sig
);
1103 key
= sock_kmalloc(sk
, sizeof(*key
), gfp
);
1106 if (!tcp_alloc_md5sig_pool()) {
1107 sock_kfree_s(sk
, key
, sizeof(*key
));
1111 memcpy(key
->key
, newkey
, newkeylen
);
1112 key
->keylen
= newkeylen
;
1113 key
->family
= family
;
1114 key
->prefixlen
= prefixlen
;
1115 memcpy(&key
->addr
, addr
,
1116 (family
== AF_INET6
) ? sizeof(struct in6_addr
) :
1117 sizeof(struct in_addr
));
1118 hlist_add_head_rcu(&key
->node
, &md5sig
->head
);
1121 EXPORT_SYMBOL(tcp_md5_do_add
);
1123 int tcp_md5_do_del(struct sock
*sk
, const union tcp_md5_addr
*addr
, int family
,
1126 struct tcp_md5sig_key
*key
;
1128 key
= tcp_md5_do_lookup_exact(sk
, addr
, family
, prefixlen
);
1131 hlist_del_rcu(&key
->node
);
1132 atomic_sub(sizeof(*key
), &sk
->sk_omem_alloc
);
1133 kfree_rcu(key
, rcu
);
1136 EXPORT_SYMBOL(tcp_md5_do_del
);
1138 static void tcp_clear_md5_list(struct sock
*sk
)
1140 struct tcp_sock
*tp
= tcp_sk(sk
);
1141 struct tcp_md5sig_key
*key
;
1142 struct hlist_node
*n
;
1143 struct tcp_md5sig_info
*md5sig
;
1145 md5sig
= rcu_dereference_protected(tp
->md5sig_info
, 1);
1147 hlist_for_each_entry_safe(key
, n
, &md5sig
->head
, node
) {
1148 hlist_del_rcu(&key
->node
);
1149 atomic_sub(sizeof(*key
), &sk
->sk_omem_alloc
);
1150 kfree_rcu(key
, rcu
);
1154 static int tcp_v4_parse_md5_keys(struct sock
*sk
, int optname
,
1155 char __user
*optval
, int optlen
)
1157 struct tcp_md5sig cmd
;
1158 struct sockaddr_in
*sin
= (struct sockaddr_in
*)&cmd
.tcpm_addr
;
1159 const union tcp_md5_addr
*addr
;
1162 if (optlen
< sizeof(cmd
))
1165 if (copy_from_user(&cmd
, optval
, sizeof(cmd
)))
1168 if (sin
->sin_family
!= AF_INET
)
1171 if (optname
== TCP_MD5SIG_EXT
&&
1172 cmd
.tcpm_flags
& TCP_MD5SIG_FLAG_PREFIX
) {
1173 prefixlen
= cmd
.tcpm_prefixlen
;
1178 addr
= (union tcp_md5_addr
*)&sin
->sin_addr
.s_addr
;
1180 if (!cmd
.tcpm_keylen
)
1181 return tcp_md5_do_del(sk
, addr
, AF_INET
, prefixlen
);
1183 if (cmd
.tcpm_keylen
> TCP_MD5SIG_MAXKEYLEN
)
1186 return tcp_md5_do_add(sk
, addr
, AF_INET
, prefixlen
,
1187 cmd
.tcpm_key
, cmd
.tcpm_keylen
, GFP_KERNEL
);
1190 static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool
*hp
,
1191 __be32 daddr
, __be32 saddr
,
1192 const struct tcphdr
*th
, int nbytes
)
1194 struct tcp4_pseudohdr
*bp
;
1195 struct scatterlist sg
;
1202 bp
->protocol
= IPPROTO_TCP
;
1203 bp
->len
= cpu_to_be16(nbytes
);
1205 _th
= (struct tcphdr
*)(bp
+ 1);
1206 memcpy(_th
, th
, sizeof(*th
));
1209 sg_init_one(&sg
, bp
, sizeof(*bp
) + sizeof(*th
));
1210 ahash_request_set_crypt(hp
->md5_req
, &sg
, NULL
,
1211 sizeof(*bp
) + sizeof(*th
));
1212 return crypto_ahash_update(hp
->md5_req
);
1215 static int tcp_v4_md5_hash_hdr(char *md5_hash
, const struct tcp_md5sig_key
*key
,
1216 __be32 daddr
, __be32 saddr
, const struct tcphdr
*th
)
1218 struct tcp_md5sig_pool
*hp
;
1219 struct ahash_request
*req
;
1221 hp
= tcp_get_md5sig_pool();
1223 goto clear_hash_noput
;
1226 if (crypto_ahash_init(req
))
1228 if (tcp_v4_md5_hash_headers(hp
, daddr
, saddr
, th
, th
->doff
<< 2))
1230 if (tcp_md5_hash_key(hp
, key
))
1232 ahash_request_set_crypt(req
, NULL
, md5_hash
, 0);
1233 if (crypto_ahash_final(req
))
1236 tcp_put_md5sig_pool();
1240 tcp_put_md5sig_pool();
1242 memset(md5_hash
, 0, 16);
1246 int tcp_v4_md5_hash_skb(char *md5_hash
, const struct tcp_md5sig_key
*key
,
1247 const struct sock
*sk
,
1248 const struct sk_buff
*skb
)
1250 struct tcp_md5sig_pool
*hp
;
1251 struct ahash_request
*req
;
1252 const struct tcphdr
*th
= tcp_hdr(skb
);
1253 __be32 saddr
, daddr
;
1255 if (sk
) { /* valid for establish/request sockets */
1256 saddr
= sk
->sk_rcv_saddr
;
1257 daddr
= sk
->sk_daddr
;
1259 const struct iphdr
*iph
= ip_hdr(skb
);
1264 hp
= tcp_get_md5sig_pool();
1266 goto clear_hash_noput
;
1269 if (crypto_ahash_init(req
))
1272 if (tcp_v4_md5_hash_headers(hp
, daddr
, saddr
, th
, skb
->len
))
1274 if (tcp_md5_hash_skb_data(hp
, skb
, th
->doff
<< 2))
1276 if (tcp_md5_hash_key(hp
, key
))
1278 ahash_request_set_crypt(req
, NULL
, md5_hash
, 0);
1279 if (crypto_ahash_final(req
))
1282 tcp_put_md5sig_pool();
1286 tcp_put_md5sig_pool();
1288 memset(md5_hash
, 0, 16);
1291 EXPORT_SYMBOL(tcp_v4_md5_hash_skb
);
1295 /* Called with rcu_read_lock() */
1296 static bool tcp_v4_inbound_md5_hash(const struct sock
*sk
,
1297 const struct sk_buff
*skb
,
1300 #ifdef CONFIG_TCP_MD5SIG
1302 * This gets called for each TCP segment that arrives
1303 * so we want to be efficient.
1304 * We have 3 drop cases:
1305 * o No MD5 hash and one expected.
1306 * o MD5 hash and we're not expecting one.
1307 * o MD5 hash and its wrong.
1309 const __u8
*hash_location
= NULL
;
1310 struct tcp_md5sig_key
*hash_expected
;
1311 const struct iphdr
*iph
= ip_hdr(skb
);
1312 const struct tcphdr
*th
= tcp_hdr(skb
);
1313 const union tcp_md5_addr
*addr
;
1315 unsigned char newhash
[16];
1317 addr
= (union tcp_md5_addr
*)&iph
->saddr
;
1318 hash_expected
= tcp_md5_do_lookup(sk
, addr
, AF_INET
);
1319 hash_location
= tcp_parse_md5sig_option(th
);
1321 /* We've parsed the options - do we have a hash? */
1322 if (!hash_expected
&& !hash_location
)
1325 if (hash_expected
&& !hash_location
) {
1326 NET_INC_STATS(sock_net(sk
), LINUX_MIB_TCPMD5NOTFOUND
);
1330 if (!hash_expected
&& hash_location
) {
1331 NET_INC_STATS(sock_net(sk
), LINUX_MIB_TCPMD5UNEXPECTED
);
1335 /* Okay, so this is hash_expected and hash_location -
1336 * so we need to calculate the checksum.
1338 genhash
= tcp_v4_md5_hash_skb(newhash
,
1342 if (genhash
|| memcmp(hash_location
, newhash
, 16) != 0) {
1343 NET_INC_STATS(sock_net(sk
), LINUX_MIB_TCPMD5FAILURE
);
1344 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1345 &iph
->saddr
, ntohs(th
->source
),
1346 &iph
->daddr
, ntohs(th
->dest
),
1347 genhash
? " tcp_v4_calc_md5_hash failed"
1356 static void tcp_v4_init_req(struct request_sock
*req
,
1357 const struct sock
*sk_listener
,
1358 struct sk_buff
*skb
)
1360 struct inet_request_sock
*ireq
= inet_rsk(req
);
1361 struct net
*net
= sock_net(sk_listener
);
1363 sk_rcv_saddr_set(req_to_sk(req
), ip_hdr(skb
)->daddr
);
1364 sk_daddr_set(req_to_sk(req
), ip_hdr(skb
)->saddr
);
1365 RCU_INIT_POINTER(ireq
->ireq_opt
, tcp_v4_save_options(net
, skb
));
1368 static struct dst_entry
*tcp_v4_route_req(const struct sock
*sk
,
1370 const struct request_sock
*req
)
1372 return inet_csk_route_req(sk
, &fl
->u
.ip4
, req
);
1375 struct request_sock_ops tcp_request_sock_ops __read_mostly
= {
1377 .obj_size
= sizeof(struct tcp_request_sock
),
1378 .rtx_syn_ack
= tcp_rtx_synack
,
1379 .send_ack
= tcp_v4_reqsk_send_ack
,
1380 .destructor
= tcp_v4_reqsk_destructor
,
1381 .send_reset
= tcp_v4_send_reset
,
1382 .syn_ack_timeout
= tcp_syn_ack_timeout
,
1385 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops
= {
1386 .mss_clamp
= TCP_MSS_DEFAULT
,
1387 #ifdef CONFIG_TCP_MD5SIG
1388 .req_md5_lookup
= tcp_v4_md5_lookup
,
1389 .calc_md5_hash
= tcp_v4_md5_hash_skb
,
1391 .init_req
= tcp_v4_init_req
,
1392 #ifdef CONFIG_SYN_COOKIES
1393 .cookie_init_seq
= cookie_v4_init_sequence
,
1395 .route_req
= tcp_v4_route_req
,
1396 .init_seq
= tcp_v4_init_seq
,
1397 .init_ts_off
= tcp_v4_init_ts_off
,
1398 .send_synack
= tcp_v4_send_synack
,
1401 int tcp_v4_conn_request(struct sock
*sk
, struct sk_buff
*skb
)
1403 /* Never answer to SYNs send to broadcast or multicast */
1404 if (skb_rtable(skb
)->rt_flags
& (RTCF_BROADCAST
| RTCF_MULTICAST
))
1407 return tcp_conn_request(&tcp_request_sock_ops
,
1408 &tcp_request_sock_ipv4_ops
, sk
, skb
);
1414 EXPORT_SYMBOL(tcp_v4_conn_request
);
1418 * The three way handshake has completed - we got a valid synack -
1419 * now create the new socket.
1421 struct sock
*tcp_v4_syn_recv_sock(const struct sock
*sk
, struct sk_buff
*skb
,
1422 struct request_sock
*req
,
1423 struct dst_entry
*dst
,
1424 struct request_sock
*req_unhash
,
1427 struct inet_request_sock
*ireq
;
1428 struct inet_sock
*newinet
;
1429 struct tcp_sock
*newtp
;
1431 #ifdef CONFIG_TCP_MD5SIG
1432 const union tcp_md5_addr
*addr
;
1433 struct tcp_md5sig_key
*key
;
1435 struct ip_options_rcu
*inet_opt
;
1437 if (sk_acceptq_is_full(sk
))
1440 newsk
= tcp_create_openreq_child(sk
, req
, skb
);
1444 newsk
->sk_gso_type
= SKB_GSO_TCPV4
;
1445 inet_sk_rx_dst_set(newsk
, skb
);
1447 newtp
= tcp_sk(newsk
);
1448 newinet
= inet_sk(newsk
);
1449 ireq
= inet_rsk(req
);
1450 sk_daddr_set(newsk
, ireq
->ir_rmt_addr
);
1451 sk_rcv_saddr_set(newsk
, ireq
->ir_loc_addr
);
1452 newsk
->sk_bound_dev_if
= ireq
->ir_iif
;
1453 newinet
->inet_saddr
= ireq
->ir_loc_addr
;
1454 inet_opt
= rcu_dereference(ireq
->ireq_opt
);
1455 RCU_INIT_POINTER(newinet
->inet_opt
, inet_opt
);
1456 newinet
->mc_index
= inet_iif(skb
);
1457 newinet
->mc_ttl
= ip_hdr(skb
)->ttl
;
1458 newinet
->rcv_tos
= ip_hdr(skb
)->tos
;
1459 inet_csk(newsk
)->icsk_ext_hdr_len
= 0;
1461 inet_csk(newsk
)->icsk_ext_hdr_len
= inet_opt
->opt
.optlen
;
1462 newinet
->inet_id
= prandom_u32();
1465 dst
= inet_csk_route_child_sock(sk
, newsk
, req
);
1469 /* syncookie case : see end of cookie_v4_check() */
1471 sk_setup_caps(newsk
, dst
);
1473 tcp_ca_openreq_child(newsk
, dst
);
1475 tcp_sync_mss(newsk
, dst_mtu(dst
));
1476 newtp
->advmss
= tcp_mss_clamp(tcp_sk(sk
), dst_metric_advmss(dst
));
1478 tcp_initialize_rcv_mss(newsk
);
1480 #ifdef CONFIG_TCP_MD5SIG
1481 /* Copy over the MD5 key from the original socket */
1482 addr
= (union tcp_md5_addr
*)&newinet
->inet_daddr
;
1483 key
= tcp_md5_do_lookup(sk
, addr
, AF_INET
);
1486 * We're using one, so create a matching key
1487 * on the newsk structure. If we fail to get
1488 * memory, then we end up not copying the key
1491 tcp_md5_do_add(newsk
, addr
, AF_INET
, 32,
1492 key
->key
, key
->keylen
, GFP_ATOMIC
);
1493 sk_nocaps_add(newsk
, NETIF_F_GSO_MASK
);
1497 if (__inet_inherit_port(sk
, newsk
) < 0)
1499 *own_req
= inet_ehash_nolisten(newsk
, req_to_sk(req_unhash
));
1500 if (likely(*own_req
)) {
1501 tcp_move_syn(newtp
, req
);
1502 ireq
->ireq_opt
= NULL
;
1504 newinet
->inet_opt
= NULL
;
1509 NET_INC_STATS(sock_net(sk
), LINUX_MIB_LISTENOVERFLOWS
);
1516 newinet
->inet_opt
= NULL
;
1517 inet_csk_prepare_forced_close(newsk
);
1521 EXPORT_SYMBOL(tcp_v4_syn_recv_sock
);
1523 static struct sock
*tcp_v4_cookie_check(struct sock
*sk
, struct sk_buff
*skb
)
1525 #ifdef CONFIG_SYN_COOKIES
1526 const struct tcphdr
*th
= tcp_hdr(skb
);
1529 sk
= cookie_v4_check(sk
, skb
);
1534 u16
tcp_v4_get_syncookie(struct sock
*sk
, struct iphdr
*iph
,
1535 struct tcphdr
*th
, u32
*cookie
)
1538 #ifdef CONFIG_SYN_COOKIES
1539 mss
= tcp_get_syncookie_mss(&tcp_request_sock_ops
,
1540 &tcp_request_sock_ipv4_ops
, sk
, th
);
1542 *cookie
= __cookie_v4_init_sequence(iph
, th
, &mss
);
1543 tcp_synq_overflow(sk
);
1549 /* The socket must have it's spinlock held when we get
1550 * here, unless it is a TCP_LISTEN socket.
1552 * We have a potential double-lock case here, so even when
1553 * doing backlog processing we use the BH locking scheme.
1554 * This is because we cannot sleep with the original spinlock
1557 int tcp_v4_do_rcv(struct sock
*sk
, struct sk_buff
*skb
)
1561 if (sk
->sk_state
== TCP_ESTABLISHED
) { /* Fast path */
1562 struct dst_entry
*dst
= sk
->sk_rx_dst
;
1564 sock_rps_save_rxhash(sk
, skb
);
1565 sk_mark_napi_id(sk
, skb
);
1567 if (inet_sk(sk
)->rx_dst_ifindex
!= skb
->skb_iif
||
1568 !dst
->ops
->check(dst
, 0)) {
1570 sk
->sk_rx_dst
= NULL
;
1573 tcp_rcv_established(sk
, skb
);
1577 if (tcp_checksum_complete(skb
))
1580 if (sk
->sk_state
== TCP_LISTEN
) {
1581 struct sock
*nsk
= tcp_v4_cookie_check(sk
, skb
);
1586 if (tcp_child_process(sk
, nsk
, skb
)) {
1593 sock_rps_save_rxhash(sk
, skb
);
1595 if (tcp_rcv_state_process(sk
, skb
)) {
1602 tcp_v4_send_reset(rsk
, skb
);
1605 /* Be careful here. If this function gets more complicated and
1606 * gcc suffers from register pressure on the x86, sk (in %ebx)
1607 * might be destroyed here. This current version compiles correctly,
1608 * but you have been warned.
1613 TCP_INC_STATS(sock_net(sk
), TCP_MIB_CSUMERRORS
);
1614 TCP_INC_STATS(sock_net(sk
), TCP_MIB_INERRS
);
1617 EXPORT_SYMBOL(tcp_v4_do_rcv
);
1619 int tcp_v4_early_demux(struct sk_buff
*skb
)
1621 const struct iphdr
*iph
;
1622 const struct tcphdr
*th
;
1625 if (skb
->pkt_type
!= PACKET_HOST
)
1628 if (!pskb_may_pull(skb
, skb_transport_offset(skb
) + sizeof(struct tcphdr
)))
1634 if (th
->doff
< sizeof(struct tcphdr
) / 4)
1637 sk
= __inet_lookup_established(dev_net(skb
->dev
), &tcp_hashinfo
,
1638 iph
->saddr
, th
->source
,
1639 iph
->daddr
, ntohs(th
->dest
),
1640 skb
->skb_iif
, inet_sdif(skb
));
1643 skb
->destructor
= sock_edemux
;
1644 if (sk_fullsock(sk
)) {
1645 struct dst_entry
*dst
= READ_ONCE(sk
->sk_rx_dst
);
1648 dst
= dst_check(dst
, 0);
1650 inet_sk(sk
)->rx_dst_ifindex
== skb
->skb_iif
)
1651 skb_dst_set_noref(skb
, dst
);
1657 bool tcp_add_backlog(struct sock
*sk
, struct sk_buff
*skb
)
1659 u32 limit
= READ_ONCE(sk
->sk_rcvbuf
) + READ_ONCE(sk
->sk_sndbuf
);
1660 struct skb_shared_info
*shinfo
;
1661 const struct tcphdr
*th
;
1662 struct tcphdr
*thtail
;
1663 struct sk_buff
*tail
;
1664 unsigned int hdrlen
;
1669 /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
1670 * we can fix skb->truesize to its real value to avoid future drops.
1671 * This is valid because skb is not yet charged to the socket.
1672 * It has been noticed pure SACK packets were sometimes dropped
1673 * (if cooked by drivers without copybreak feature).
1679 if (unlikely(tcp_checksum_complete(skb
))) {
1681 __TCP_INC_STATS(sock_net(sk
), TCP_MIB_CSUMERRORS
);
1682 __TCP_INC_STATS(sock_net(sk
), TCP_MIB_INERRS
);
1686 /* Attempt coalescing to last skb in backlog, even if we are
1688 * This is okay because skb capacity is limited to MAX_SKB_FRAGS.
1690 th
= (const struct tcphdr
*)skb
->data
;
1691 hdrlen
= th
->doff
* 4;
1692 shinfo
= skb_shinfo(skb
);
1694 if (!shinfo
->gso_size
)
1695 shinfo
->gso_size
= skb
->len
- hdrlen
;
1697 if (!shinfo
->gso_segs
)
1698 shinfo
->gso_segs
= 1;
1700 tail
= sk
->sk_backlog
.tail
;
1703 thtail
= (struct tcphdr
*)tail
->data
;
1705 if (TCP_SKB_CB(tail
)->end_seq
!= TCP_SKB_CB(skb
)->seq
||
1706 TCP_SKB_CB(tail
)->ip_dsfield
!= TCP_SKB_CB(skb
)->ip_dsfield
||
1707 ((TCP_SKB_CB(tail
)->tcp_flags
|
1708 TCP_SKB_CB(skb
)->tcp_flags
) & (TCPHDR_SYN
| TCPHDR_RST
| TCPHDR_URG
)) ||
1709 !((TCP_SKB_CB(tail
)->tcp_flags
&
1710 TCP_SKB_CB(skb
)->tcp_flags
) & TCPHDR_ACK
) ||
1711 ((TCP_SKB_CB(tail
)->tcp_flags
^
1712 TCP_SKB_CB(skb
)->tcp_flags
) & (TCPHDR_ECE
| TCPHDR_CWR
)) ||
1713 #ifdef CONFIG_TLS_DEVICE
1714 tail
->decrypted
!= skb
->decrypted
||
1716 thtail
->doff
!= th
->doff
||
1717 memcmp(thtail
+ 1, th
+ 1, hdrlen
- sizeof(*th
)))
1720 __skb_pull(skb
, hdrlen
);
1721 if (skb_try_coalesce(tail
, skb
, &fragstolen
, &delta
)) {
1722 thtail
->window
= th
->window
;
1724 TCP_SKB_CB(tail
)->end_seq
= TCP_SKB_CB(skb
)->end_seq
;
1726 if (after(TCP_SKB_CB(skb
)->ack_seq
, TCP_SKB_CB(tail
)->ack_seq
))
1727 TCP_SKB_CB(tail
)->ack_seq
= TCP_SKB_CB(skb
)->ack_seq
;
1729 /* We have to update both TCP_SKB_CB(tail)->tcp_flags and
1730 * thtail->fin, so that the fast path in tcp_rcv_established()
1731 * is not entered if we append a packet with a FIN.
1732 * SYN, RST, URG are not present.
1733 * ACK is set on both packets.
1734 * PSH : we do not really care in TCP stack,
1735 * at least for 'GRO' packets.
1737 thtail
->fin
|= th
->fin
;
1738 TCP_SKB_CB(tail
)->tcp_flags
|= TCP_SKB_CB(skb
)->tcp_flags
;
1740 if (TCP_SKB_CB(skb
)->has_rxtstamp
) {
1741 TCP_SKB_CB(tail
)->has_rxtstamp
= true;
1742 tail
->tstamp
= skb
->tstamp
;
1743 skb_hwtstamps(tail
)->hwtstamp
= skb_hwtstamps(skb
)->hwtstamp
;
1746 /* Not as strict as GRO. We only need to carry mss max value */
1747 skb_shinfo(tail
)->gso_size
= max(shinfo
->gso_size
,
1748 skb_shinfo(tail
)->gso_size
);
1750 gso_segs
= skb_shinfo(tail
)->gso_segs
+ shinfo
->gso_segs
;
1751 skb_shinfo(tail
)->gso_segs
= min_t(u32
, gso_segs
, 0xFFFF);
1753 sk
->sk_backlog
.len
+= delta
;
1754 __NET_INC_STATS(sock_net(sk
),
1755 LINUX_MIB_TCPBACKLOGCOALESCE
);
1756 kfree_skb_partial(skb
, fragstolen
);
1759 __skb_push(skb
, hdrlen
);
1762 /* Only socket owner can try to collapse/prune rx queues
1763 * to reduce memory overhead, so add a little headroom here.
1764 * Few sockets backlog are possibly concurrently non empty.
1768 if (unlikely(sk_add_backlog(sk
, skb
, limit
))) {
1770 __NET_INC_STATS(sock_net(sk
), LINUX_MIB_TCPBACKLOGDROP
);
1775 EXPORT_SYMBOL(tcp_add_backlog
);
1777 int tcp_filter(struct sock
*sk
, struct sk_buff
*skb
)
1779 struct tcphdr
*th
= (struct tcphdr
*)skb
->data
;
1781 return sk_filter_trim_cap(sk
, skb
, th
->doff
* 4);
1783 EXPORT_SYMBOL(tcp_filter
);
1785 static void tcp_v4_restore_cb(struct sk_buff
*skb
)
1787 memmove(IPCB(skb
), &TCP_SKB_CB(skb
)->header
.h4
,
1788 sizeof(struct inet_skb_parm
));
1791 static void tcp_v4_fill_cb(struct sk_buff
*skb
, const struct iphdr
*iph
,
1792 const struct tcphdr
*th
)
1794 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1795 * barrier() makes sure compiler wont play fool^Waliasing games.
1797 memmove(&TCP_SKB_CB(skb
)->header
.h4
, IPCB(skb
),
1798 sizeof(struct inet_skb_parm
));
1801 TCP_SKB_CB(skb
)->seq
= ntohl(th
->seq
);
1802 TCP_SKB_CB(skb
)->end_seq
= (TCP_SKB_CB(skb
)->seq
+ th
->syn
+ th
->fin
+
1803 skb
->len
- th
->doff
* 4);
1804 TCP_SKB_CB(skb
)->ack_seq
= ntohl(th
->ack_seq
);
1805 TCP_SKB_CB(skb
)->tcp_flags
= tcp_flag_byte(th
);
1806 TCP_SKB_CB(skb
)->tcp_tw_isn
= 0;
1807 TCP_SKB_CB(skb
)->ip_dsfield
= ipv4_get_dsfield(iph
);
1808 TCP_SKB_CB(skb
)->sacked
= 0;
1809 TCP_SKB_CB(skb
)->has_rxtstamp
=
1810 skb
->tstamp
|| skb_hwtstamps(skb
)->hwtstamp
;
1817 int tcp_v4_rcv(struct sk_buff
*skb
)
1819 struct net
*net
= dev_net(skb
->dev
);
1820 struct sk_buff
*skb_to_free
;
1821 int sdif
= inet_sdif(skb
);
1822 int dif
= inet_iif(skb
);
1823 const struct iphdr
*iph
;
1824 const struct tcphdr
*th
;
1829 if (skb
->pkt_type
!= PACKET_HOST
)
1832 /* Count it even if it's bad */
1833 __TCP_INC_STATS(net
, TCP_MIB_INSEGS
);
1835 if (!pskb_may_pull(skb
, sizeof(struct tcphdr
)))
1838 th
= (const struct tcphdr
*)skb
->data
;
1840 if (unlikely(th
->doff
< sizeof(struct tcphdr
) / 4))
1842 if (!pskb_may_pull(skb
, th
->doff
* 4))
1845 /* An explanation is required here, I think.
1846 * Packet length and doff are validated by header prediction,
1847 * provided case of th->doff==0 is eliminated.
1848 * So, we defer the checks. */
1850 if (skb_checksum_init(skb
, IPPROTO_TCP
, inet_compute_pseudo
))
1853 th
= (const struct tcphdr
*)skb
->data
;
1856 sk
= __inet_lookup_skb(&tcp_hashinfo
, skb
, __tcp_hdrlen(th
), th
->source
,
1857 th
->dest
, sdif
, &refcounted
);
1862 if (sk
->sk_state
== TCP_TIME_WAIT
)
1865 if (sk
->sk_state
== TCP_NEW_SYN_RECV
) {
1866 struct request_sock
*req
= inet_reqsk(sk
);
1867 bool req_stolen
= false;
1870 sk
= req
->rsk_listener
;
1871 if (unlikely(tcp_v4_inbound_md5_hash(sk
, skb
, dif
, sdif
))) {
1872 sk_drops_add(sk
, skb
);
1876 if (tcp_checksum_complete(skb
)) {
1880 if (unlikely(sk
->sk_state
!= TCP_LISTEN
)) {
1881 inet_csk_reqsk_queue_drop_and_put(sk
, req
);
1884 /* We own a reference on the listener, increase it again
1885 * as we might lose it too soon.
1890 if (!tcp_filter(sk
, skb
)) {
1891 th
= (const struct tcphdr
*)skb
->data
;
1893 tcp_v4_fill_cb(skb
, iph
, th
);
1894 nsk
= tcp_check_req(sk
, skb
, req
, false, &req_stolen
);
1899 /* Another cpu got exclusive access to req
1900 * and created a full blown socket.
1901 * Try to feed this packet to this socket
1902 * instead of discarding it.
1904 tcp_v4_restore_cb(skb
);
1908 goto discard_and_relse
;
1912 tcp_v4_restore_cb(skb
);
1913 } else if (tcp_child_process(sk
, nsk
, skb
)) {
1914 tcp_v4_send_reset(nsk
, skb
);
1915 goto discard_and_relse
;
1921 if (unlikely(iph
->ttl
< inet_sk(sk
)->min_ttl
)) {
1922 __NET_INC_STATS(net
, LINUX_MIB_TCPMINTTLDROP
);
1923 goto discard_and_relse
;
1926 if (!xfrm4_policy_check(sk
, XFRM_POLICY_IN
, skb
))
1927 goto discard_and_relse
;
1929 if (tcp_v4_inbound_md5_hash(sk
, skb
, dif
, sdif
))
1930 goto discard_and_relse
;
1934 if (tcp_filter(sk
, skb
))
1935 goto discard_and_relse
;
1936 th
= (const struct tcphdr
*)skb
->data
;
1938 tcp_v4_fill_cb(skb
, iph
, th
);
1942 if (sk
->sk_state
== TCP_LISTEN
) {
1943 ret
= tcp_v4_do_rcv(sk
, skb
);
1944 goto put_and_return
;
1947 sk_incoming_cpu_update(sk
);
1949 bh_lock_sock_nested(sk
);
1950 tcp_segs_in(tcp_sk(sk
), skb
);
1952 if (!sock_owned_by_user(sk
)) {
1953 skb_to_free
= sk
->sk_rx_skb_cache
;
1954 sk
->sk_rx_skb_cache
= NULL
;
1955 ret
= tcp_v4_do_rcv(sk
, skb
);
1957 if (tcp_add_backlog(sk
, skb
))
1958 goto discard_and_relse
;
1963 __kfree_skb(skb_to_free
);
1972 if (!xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
1975 tcp_v4_fill_cb(skb
, iph
, th
);
1977 if (tcp_checksum_complete(skb
)) {
1979 __TCP_INC_STATS(net
, TCP_MIB_CSUMERRORS
);
1981 __TCP_INC_STATS(net
, TCP_MIB_INERRS
);
1983 tcp_v4_send_reset(NULL
, skb
);
1987 /* Discard frame. */
1992 sk_drops_add(sk
, skb
);
1998 if (!xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
)) {
1999 inet_twsk_put(inet_twsk(sk
));
2003 tcp_v4_fill_cb(skb
, iph
, th
);
2005 if (tcp_checksum_complete(skb
)) {
2006 inet_twsk_put(inet_twsk(sk
));
2009 switch (tcp_timewait_state_process(inet_twsk(sk
), skb
, th
)) {
2011 struct sock
*sk2
= inet_lookup_listener(dev_net(skb
->dev
),
2014 iph
->saddr
, th
->source
,
2015 iph
->daddr
, th
->dest
,
2019 inet_twsk_deschedule_put(inet_twsk(sk
));
2021 tcp_v4_restore_cb(skb
);
2029 tcp_v4_timewait_ack(sk
, skb
);
2032 tcp_v4_send_reset(sk
, skb
);
2033 inet_twsk_deschedule_put(inet_twsk(sk
));
2035 case TCP_TW_SUCCESS
:;
2040 static struct timewait_sock_ops tcp_timewait_sock_ops
= {
2041 .twsk_obj_size
= sizeof(struct tcp_timewait_sock
),
2042 .twsk_unique
= tcp_twsk_unique
,
2043 .twsk_destructor
= tcp_twsk_destructor
,
2046 void inet_sk_rx_dst_set(struct sock
*sk
, const struct sk_buff
*skb
)
2048 struct dst_entry
*dst
= skb_dst(skb
);
2050 if (dst
&& dst_hold_safe(dst
)) {
2051 sk
->sk_rx_dst
= dst
;
2052 inet_sk(sk
)->rx_dst_ifindex
= skb
->skb_iif
;
2055 EXPORT_SYMBOL(inet_sk_rx_dst_set
);
2057 const struct inet_connection_sock_af_ops ipv4_specific
= {
2058 .queue_xmit
= ip_queue_xmit
,
2059 .send_check
= tcp_v4_send_check
,
2060 .rebuild_header
= inet_sk_rebuild_header
,
2061 .sk_rx_dst_set
= inet_sk_rx_dst_set
,
2062 .conn_request
= tcp_v4_conn_request
,
2063 .syn_recv_sock
= tcp_v4_syn_recv_sock
,
2064 .net_header_len
= sizeof(struct iphdr
),
2065 .setsockopt
= ip_setsockopt
,
2066 .getsockopt
= ip_getsockopt
,
2067 .addr2sockaddr
= inet_csk_addr2sockaddr
,
2068 .sockaddr_len
= sizeof(struct sockaddr_in
),
2069 #ifdef CONFIG_COMPAT
2070 .compat_setsockopt
= compat_ip_setsockopt
,
2071 .compat_getsockopt
= compat_ip_getsockopt
,
2073 .mtu_reduced
= tcp_v4_mtu_reduced
,
2075 EXPORT_SYMBOL(ipv4_specific
);
2077 #ifdef CONFIG_TCP_MD5SIG
2078 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific
= {
2079 .md5_lookup
= tcp_v4_md5_lookup
,
2080 .calc_md5_hash
= tcp_v4_md5_hash_skb
,
2081 .md5_parse
= tcp_v4_parse_md5_keys
,
2085 /* NOTE: A lot of things set to zero explicitly by call to
2086 * sk_alloc() so need not be done here.
2088 static int tcp_v4_init_sock(struct sock
*sk
)
2090 struct inet_connection_sock
*icsk
= inet_csk(sk
);
2094 icsk
->icsk_af_ops
= &ipv4_specific
;
2096 #ifdef CONFIG_TCP_MD5SIG
2097 tcp_sk(sk
)->af_specific
= &tcp_sock_ipv4_specific
;
2103 void tcp_v4_destroy_sock(struct sock
*sk
)
2105 struct tcp_sock
*tp
= tcp_sk(sk
);
2107 trace_tcp_destroy_sock(sk
);
2109 tcp_clear_xmit_timers(sk
);
2111 tcp_cleanup_congestion_control(sk
);
2113 tcp_cleanup_ulp(sk
);
2115 /* Cleanup up the write buffer. */
2116 tcp_write_queue_purge(sk
);
2118 /* Check if we want to disable active TFO */
2119 tcp_fastopen_active_disable_ofo_check(sk
);
2121 /* Cleans up our, hopefully empty, out_of_order_queue. */
2122 skb_rbtree_purge(&tp
->out_of_order_queue
);
2124 #ifdef CONFIG_TCP_MD5SIG
2125 /* Clean up the MD5 key list, if any */
2126 if (tp
->md5sig_info
) {
2127 tcp_clear_md5_list(sk
);
2128 kfree_rcu(rcu_dereference_protected(tp
->md5sig_info
, 1), rcu
);
2129 tp
->md5sig_info
= NULL
;
2133 /* Clean up a referenced TCP bind bucket. */
2134 if (inet_csk(sk
)->icsk_bind_hash
)
2137 BUG_ON(rcu_access_pointer(tp
->fastopen_rsk
));
2139 /* If socket is aborted during connect operation */
2140 tcp_free_fastopen_req(tp
);
2141 tcp_fastopen_destroy_cipher(sk
);
2142 tcp_saved_syn_free(tp
);
2144 sk_sockets_allocated_dec(sk
);
2146 EXPORT_SYMBOL(tcp_v4_destroy_sock
);
2148 #ifdef CONFIG_PROC_FS
2149 /* Proc filesystem TCP sock list dumping. */
2152 * Get next listener socket follow cur. If cur is NULL, get first socket
2153 * starting from bucket given in st->bucket; when st->bucket is zero the
2154 * very first socket in the hash table is returned.
2156 static void *listening_get_next(struct seq_file
*seq
, void *cur
)
2158 struct tcp_seq_afinfo
*afinfo
= PDE_DATA(file_inode(seq
->file
));
2159 struct tcp_iter_state
*st
= seq
->private;
2160 struct net
*net
= seq_file_net(seq
);
2161 struct inet_listen_hashbucket
*ilb
;
2162 struct hlist_nulls_node
*node
;
2163 struct sock
*sk
= cur
;
2167 ilb
= &tcp_hashinfo
.listening_hash
[st
->bucket
];
2168 spin_lock(&ilb
->lock
);
2169 sk
= sk_nulls_head(&ilb
->nulls_head
);
2173 ilb
= &tcp_hashinfo
.listening_hash
[st
->bucket
];
2177 sk
= sk_nulls_next(sk
);
2179 sk_nulls_for_each_from(sk
, node
) {
2180 if (!net_eq(sock_net(sk
), net
))
2182 if (sk
->sk_family
== afinfo
->family
)
2185 spin_unlock(&ilb
->lock
);
2187 if (++st
->bucket
< INET_LHTABLE_SIZE
)
2192 static void *listening_get_idx(struct seq_file
*seq
, loff_t
*pos
)
2194 struct tcp_iter_state
*st
= seq
->private;
2199 rc
= listening_get_next(seq
, NULL
);
2201 while (rc
&& *pos
) {
2202 rc
= listening_get_next(seq
, rc
);
2208 static inline bool empty_bucket(const struct tcp_iter_state
*st
)
2210 return hlist_nulls_empty(&tcp_hashinfo
.ehash
[st
->bucket
].chain
);
2214 * Get first established socket starting from bucket given in st->bucket.
2215 * If st->bucket is zero, the very first socket in the hash is returned.
2217 static void *established_get_first(struct seq_file
*seq
)
2219 struct tcp_seq_afinfo
*afinfo
= PDE_DATA(file_inode(seq
->file
));
2220 struct tcp_iter_state
*st
= seq
->private;
2221 struct net
*net
= seq_file_net(seq
);
2225 for (; st
->bucket
<= tcp_hashinfo
.ehash_mask
; ++st
->bucket
) {
2227 struct hlist_nulls_node
*node
;
2228 spinlock_t
*lock
= inet_ehash_lockp(&tcp_hashinfo
, st
->bucket
);
2230 /* Lockless fast path for the common case of empty buckets */
2231 if (empty_bucket(st
))
2235 sk_nulls_for_each(sk
, node
, &tcp_hashinfo
.ehash
[st
->bucket
].chain
) {
2236 if (sk
->sk_family
!= afinfo
->family
||
2237 !net_eq(sock_net(sk
), net
)) {
2243 spin_unlock_bh(lock
);
2249 static void *established_get_next(struct seq_file
*seq
, void *cur
)
2251 struct tcp_seq_afinfo
*afinfo
= PDE_DATA(file_inode(seq
->file
));
2252 struct sock
*sk
= cur
;
2253 struct hlist_nulls_node
*node
;
2254 struct tcp_iter_state
*st
= seq
->private;
2255 struct net
*net
= seq_file_net(seq
);
2260 sk
= sk_nulls_next(sk
);
2262 sk_nulls_for_each_from(sk
, node
) {
2263 if (sk
->sk_family
== afinfo
->family
&&
2264 net_eq(sock_net(sk
), net
))
2268 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo
, st
->bucket
));
2270 return established_get_first(seq
);
2273 static void *established_get_idx(struct seq_file
*seq
, loff_t pos
)
2275 struct tcp_iter_state
*st
= seq
->private;
2279 rc
= established_get_first(seq
);
2282 rc
= established_get_next(seq
, rc
);
2288 static void *tcp_get_idx(struct seq_file
*seq
, loff_t pos
)
2291 struct tcp_iter_state
*st
= seq
->private;
2293 st
->state
= TCP_SEQ_STATE_LISTENING
;
2294 rc
= listening_get_idx(seq
, &pos
);
2297 st
->state
= TCP_SEQ_STATE_ESTABLISHED
;
2298 rc
= established_get_idx(seq
, pos
);
2304 static void *tcp_seek_last_pos(struct seq_file
*seq
)
2306 struct tcp_iter_state
*st
= seq
->private;
2307 int offset
= st
->offset
;
2308 int orig_num
= st
->num
;
2311 switch (st
->state
) {
2312 case TCP_SEQ_STATE_LISTENING
:
2313 if (st
->bucket
>= INET_LHTABLE_SIZE
)
2315 st
->state
= TCP_SEQ_STATE_LISTENING
;
2316 rc
= listening_get_next(seq
, NULL
);
2317 while (offset
-- && rc
)
2318 rc
= listening_get_next(seq
, rc
);
2322 st
->state
= TCP_SEQ_STATE_ESTABLISHED
;
2324 case TCP_SEQ_STATE_ESTABLISHED
:
2325 if (st
->bucket
> tcp_hashinfo
.ehash_mask
)
2327 rc
= established_get_first(seq
);
2328 while (offset
-- && rc
)
2329 rc
= established_get_next(seq
, rc
);
2337 void *tcp_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2339 struct tcp_iter_state
*st
= seq
->private;
2342 if (*pos
&& *pos
== st
->last_pos
) {
2343 rc
= tcp_seek_last_pos(seq
);
2348 st
->state
= TCP_SEQ_STATE_LISTENING
;
2352 rc
= *pos
? tcp_get_idx(seq
, *pos
- 1) : SEQ_START_TOKEN
;
2355 st
->last_pos
= *pos
;
2358 EXPORT_SYMBOL(tcp_seq_start
);
2360 void *tcp_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2362 struct tcp_iter_state
*st
= seq
->private;
2365 if (v
== SEQ_START_TOKEN
) {
2366 rc
= tcp_get_idx(seq
, 0);
2370 switch (st
->state
) {
2371 case TCP_SEQ_STATE_LISTENING
:
2372 rc
= listening_get_next(seq
, v
);
2374 st
->state
= TCP_SEQ_STATE_ESTABLISHED
;
2377 rc
= established_get_first(seq
);
2380 case TCP_SEQ_STATE_ESTABLISHED
:
2381 rc
= established_get_next(seq
, v
);
2386 st
->last_pos
= *pos
;
2389 EXPORT_SYMBOL(tcp_seq_next
);
2391 void tcp_seq_stop(struct seq_file
*seq
, void *v
)
2393 struct tcp_iter_state
*st
= seq
->private;
2395 switch (st
->state
) {
2396 case TCP_SEQ_STATE_LISTENING
:
2397 if (v
!= SEQ_START_TOKEN
)
2398 spin_unlock(&tcp_hashinfo
.listening_hash
[st
->bucket
].lock
);
2400 case TCP_SEQ_STATE_ESTABLISHED
:
2402 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo
, st
->bucket
));
2406 EXPORT_SYMBOL(tcp_seq_stop
);
2408 static void get_openreq4(const struct request_sock
*req
,
2409 struct seq_file
*f
, int i
)
2411 const struct inet_request_sock
*ireq
= inet_rsk(req
);
2412 long delta
= req
->rsk_timer
.expires
- jiffies
;
2414 seq_printf(f
, "%4d: %08X:%04X %08X:%04X"
2415 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2420 ntohs(ireq
->ir_rmt_port
),
2422 0, 0, /* could print option size, but that is af dependent. */
2423 1, /* timers active (only the expire timer) */
2424 jiffies_delta_to_clock_t(delta
),
2426 from_kuid_munged(seq_user_ns(f
),
2427 sock_i_uid(req
->rsk_listener
)),
2428 0, /* non standard timer */
2429 0, /* open_requests have no inode */
2434 static void get_tcp4_sock(struct sock
*sk
, struct seq_file
*f
, int i
)
2437 unsigned long timer_expires
;
2438 const struct tcp_sock
*tp
= tcp_sk(sk
);
2439 const struct inet_connection_sock
*icsk
= inet_csk(sk
);
2440 const struct inet_sock
*inet
= inet_sk(sk
);
2441 const struct fastopen_queue
*fastopenq
= &icsk
->icsk_accept_queue
.fastopenq
;
2442 __be32 dest
= inet
->inet_daddr
;
2443 __be32 src
= inet
->inet_rcv_saddr
;
2444 __u16 destp
= ntohs(inet
->inet_dport
);
2445 __u16 srcp
= ntohs(inet
->inet_sport
);
2449 if (icsk
->icsk_pending
== ICSK_TIME_RETRANS
||
2450 icsk
->icsk_pending
== ICSK_TIME_REO_TIMEOUT
||
2451 icsk
->icsk_pending
== ICSK_TIME_LOSS_PROBE
) {
2453 timer_expires
= icsk
->icsk_timeout
;
2454 } else if (icsk
->icsk_pending
== ICSK_TIME_PROBE0
) {
2456 timer_expires
= icsk
->icsk_timeout
;
2457 } else if (timer_pending(&sk
->sk_timer
)) {
2459 timer_expires
= sk
->sk_timer
.expires
;
2462 timer_expires
= jiffies
;
2465 state
= inet_sk_state_load(sk
);
2466 if (state
== TCP_LISTEN
)
2467 rx_queue
= READ_ONCE(sk
->sk_ack_backlog
);
2469 /* Because we don't lock the socket,
2470 * we might find a transient negative value.
2472 rx_queue
= max_t(int, READ_ONCE(tp
->rcv_nxt
) -
2473 READ_ONCE(tp
->copied_seq
), 0);
2475 seq_printf(f
, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2476 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2477 i
, src
, srcp
, dest
, destp
, state
,
2478 READ_ONCE(tp
->write_seq
) - tp
->snd_una
,
2481 jiffies_delta_to_clock_t(timer_expires
- jiffies
),
2482 icsk
->icsk_retransmits
,
2483 from_kuid_munged(seq_user_ns(f
), sock_i_uid(sk
)),
2484 icsk
->icsk_probes_out
,
2486 refcount_read(&sk
->sk_refcnt
), sk
,
2487 jiffies_to_clock_t(icsk
->icsk_rto
),
2488 jiffies_to_clock_t(icsk
->icsk_ack
.ato
),
2489 (icsk
->icsk_ack
.quick
<< 1) | inet_csk_in_pingpong_mode(sk
),
2491 state
== TCP_LISTEN
?
2492 fastopenq
->max_qlen
:
2493 (tcp_in_initial_slowstart(tp
) ? -1 : tp
->snd_ssthresh
));
2496 static void get_timewait4_sock(const struct inet_timewait_sock
*tw
,
2497 struct seq_file
*f
, int i
)
2499 long delta
= tw
->tw_timer
.expires
- jiffies
;
2503 dest
= tw
->tw_daddr
;
2504 src
= tw
->tw_rcv_saddr
;
2505 destp
= ntohs(tw
->tw_dport
);
2506 srcp
= ntohs(tw
->tw_sport
);
2508 seq_printf(f
, "%4d: %08X:%04X %08X:%04X"
2509 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2510 i
, src
, srcp
, dest
, destp
, tw
->tw_substate
, 0, 0,
2511 3, jiffies_delta_to_clock_t(delta
), 0, 0, 0, 0,
2512 refcount_read(&tw
->tw_refcnt
), tw
);
2517 static int tcp4_seq_show(struct seq_file
*seq
, void *v
)
2519 struct tcp_iter_state
*st
;
2520 struct sock
*sk
= v
;
2522 seq_setwidth(seq
, TMPSZ
- 1);
2523 if (v
== SEQ_START_TOKEN
) {
2524 seq_puts(seq
, " sl local_address rem_address st tx_queue "
2525 "rx_queue tr tm->when retrnsmt uid timeout "
2531 if (sk
->sk_state
== TCP_TIME_WAIT
)
2532 get_timewait4_sock(v
, seq
, st
->num
);
2533 else if (sk
->sk_state
== TCP_NEW_SYN_RECV
)
2534 get_openreq4(v
, seq
, st
->num
);
2536 get_tcp4_sock(v
, seq
, st
->num
);
2542 static const struct seq_operations tcp4_seq_ops
= {
2543 .show
= tcp4_seq_show
,
2544 .start
= tcp_seq_start
,
2545 .next
= tcp_seq_next
,
2546 .stop
= tcp_seq_stop
,
2549 static struct tcp_seq_afinfo tcp4_seq_afinfo
= {
2553 static int __net_init
tcp4_proc_init_net(struct net
*net
)
2555 if (!proc_create_net_data("tcp", 0444, net
->proc_net
, &tcp4_seq_ops
,
2556 sizeof(struct tcp_iter_state
), &tcp4_seq_afinfo
))
2561 static void __net_exit
tcp4_proc_exit_net(struct net
*net
)
2563 remove_proc_entry("tcp", net
->proc_net
);
2566 static struct pernet_operations tcp4_net_ops
= {
2567 .init
= tcp4_proc_init_net
,
2568 .exit
= tcp4_proc_exit_net
,
2571 int __init
tcp4_proc_init(void)
2573 return register_pernet_subsys(&tcp4_net_ops
);
2576 void tcp4_proc_exit(void)
2578 unregister_pernet_subsys(&tcp4_net_ops
);
2580 #endif /* CONFIG_PROC_FS */
2582 struct proto tcp_prot
= {
2584 .owner
= THIS_MODULE
,
2586 .pre_connect
= tcp_v4_pre_connect
,
2587 .connect
= tcp_v4_connect
,
2588 .disconnect
= tcp_disconnect
,
2589 .accept
= inet_csk_accept
,
2591 .init
= tcp_v4_init_sock
,
2592 .destroy
= tcp_v4_destroy_sock
,
2593 .shutdown
= tcp_shutdown
,
2594 .setsockopt
= tcp_setsockopt
,
2595 .getsockopt
= tcp_getsockopt
,
2596 .keepalive
= tcp_set_keepalive
,
2597 .recvmsg
= tcp_recvmsg
,
2598 .sendmsg
= tcp_sendmsg
,
2599 .sendpage
= tcp_sendpage
,
2600 .backlog_rcv
= tcp_v4_do_rcv
,
2601 .release_cb
= tcp_release_cb
,
2603 .unhash
= inet_unhash
,
2604 .get_port
= inet_csk_get_port
,
2605 .enter_memory_pressure
= tcp_enter_memory_pressure
,
2606 .leave_memory_pressure
= tcp_leave_memory_pressure
,
2607 .stream_memory_free
= tcp_stream_memory_free
,
2608 .sockets_allocated
= &tcp_sockets_allocated
,
2609 .orphan_count
= &tcp_orphan_count
,
2610 .memory_allocated
= &tcp_memory_allocated
,
2611 .memory_pressure
= &tcp_memory_pressure
,
2612 .sysctl_mem
= sysctl_tcp_mem
,
2613 .sysctl_wmem_offset
= offsetof(struct net
, ipv4
.sysctl_tcp_wmem
),
2614 .sysctl_rmem_offset
= offsetof(struct net
, ipv4
.sysctl_tcp_rmem
),
2615 .max_header
= MAX_TCP_HEADER
,
2616 .obj_size
= sizeof(struct tcp_sock
),
2617 .slab_flags
= SLAB_TYPESAFE_BY_RCU
,
2618 .twsk_prot
= &tcp_timewait_sock_ops
,
2619 .rsk_prot
= &tcp_request_sock_ops
,
2620 .h
.hashinfo
= &tcp_hashinfo
,
2621 .no_autobind
= true,
2622 #ifdef CONFIG_COMPAT
2623 .compat_setsockopt
= compat_tcp_setsockopt
,
2624 .compat_getsockopt
= compat_tcp_getsockopt
,
2626 .diag_destroy
= tcp_abort
,
2628 EXPORT_SYMBOL(tcp_prot
);
2630 static void __net_exit
tcp_sk_exit(struct net
*net
)
2634 if (net
->ipv4
.tcp_congestion_control
)
2635 module_put(net
->ipv4
.tcp_congestion_control
->owner
);
2637 for_each_possible_cpu(cpu
)
2638 inet_ctl_sock_destroy(*per_cpu_ptr(net
->ipv4
.tcp_sk
, cpu
));
2639 free_percpu(net
->ipv4
.tcp_sk
);
2642 static int __net_init
tcp_sk_init(struct net
*net
)
2646 net
->ipv4
.tcp_sk
= alloc_percpu(struct sock
*);
2647 if (!net
->ipv4
.tcp_sk
)
2650 for_each_possible_cpu(cpu
) {
2653 res
= inet_ctl_sock_create(&sk
, PF_INET
, SOCK_RAW
,
2657 sock_set_flag(sk
, SOCK_USE_WRITE_QUEUE
);
2659 /* Please enforce IP_DF and IPID==0 for RST and
2660 * ACK sent in SYN-RECV and TIME-WAIT state.
2662 inet_sk(sk
)->pmtudisc
= IP_PMTUDISC_DO
;
2664 *per_cpu_ptr(net
->ipv4
.tcp_sk
, cpu
) = sk
;
2667 net
->ipv4
.sysctl_tcp_ecn
= 2;
2668 net
->ipv4
.sysctl_tcp_ecn_fallback
= 1;
2670 net
->ipv4
.sysctl_tcp_base_mss
= TCP_BASE_MSS
;
2671 net
->ipv4
.sysctl_tcp_min_snd_mss
= TCP_MIN_SND_MSS
;
2672 net
->ipv4
.sysctl_tcp_probe_threshold
= TCP_PROBE_THRESHOLD
;
2673 net
->ipv4
.sysctl_tcp_probe_interval
= TCP_PROBE_INTERVAL
;
2674 net
->ipv4
.sysctl_tcp_mtu_probe_floor
= TCP_MIN_SND_MSS
;
2676 net
->ipv4
.sysctl_tcp_keepalive_time
= TCP_KEEPALIVE_TIME
;
2677 net
->ipv4
.sysctl_tcp_keepalive_probes
= TCP_KEEPALIVE_PROBES
;
2678 net
->ipv4
.sysctl_tcp_keepalive_intvl
= TCP_KEEPALIVE_INTVL
;
2680 net
->ipv4
.sysctl_tcp_syn_retries
= TCP_SYN_RETRIES
;
2681 net
->ipv4
.sysctl_tcp_synack_retries
= TCP_SYNACK_RETRIES
;
2682 net
->ipv4
.sysctl_tcp_syncookies
= 1;
2683 net
->ipv4
.sysctl_tcp_reordering
= TCP_FASTRETRANS_THRESH
;
2684 net
->ipv4
.sysctl_tcp_retries1
= TCP_RETR1
;
2685 net
->ipv4
.sysctl_tcp_retries2
= TCP_RETR2
;
2686 net
->ipv4
.sysctl_tcp_orphan_retries
= 0;
2687 net
->ipv4
.sysctl_tcp_fin_timeout
= TCP_FIN_TIMEOUT
;
2688 net
->ipv4
.sysctl_tcp_notsent_lowat
= UINT_MAX
;
2689 net
->ipv4
.sysctl_tcp_tw_reuse
= 2;
2690 net
->ipv4
.sysctl_tcp_no_ssthresh_metrics_save
= 1;
2692 cnt
= tcp_hashinfo
.ehash_mask
+ 1;
2693 net
->ipv4
.tcp_death_row
.sysctl_max_tw_buckets
= cnt
/ 2;
2694 net
->ipv4
.tcp_death_row
.hashinfo
= &tcp_hashinfo
;
2696 net
->ipv4
.sysctl_max_syn_backlog
= max(128, cnt
/ 128);
2697 net
->ipv4
.sysctl_tcp_sack
= 1;
2698 net
->ipv4
.sysctl_tcp_window_scaling
= 1;
2699 net
->ipv4
.sysctl_tcp_timestamps
= 1;
2700 net
->ipv4
.sysctl_tcp_early_retrans
= 3;
2701 net
->ipv4
.sysctl_tcp_recovery
= TCP_RACK_LOSS_DETECTION
;
2702 net
->ipv4
.sysctl_tcp_slow_start_after_idle
= 1; /* By default, RFC2861 behavior. */
2703 net
->ipv4
.sysctl_tcp_retrans_collapse
= 1;
2704 net
->ipv4
.sysctl_tcp_max_reordering
= 300;
2705 net
->ipv4
.sysctl_tcp_dsack
= 1;
2706 net
->ipv4
.sysctl_tcp_app_win
= 31;
2707 net
->ipv4
.sysctl_tcp_adv_win_scale
= 1;
2708 net
->ipv4
.sysctl_tcp_frto
= 2;
2709 net
->ipv4
.sysctl_tcp_moderate_rcvbuf
= 1;
2710 /* This limits the percentage of the congestion window which we
2711 * will allow a single TSO frame to consume. Building TSO frames
2712 * which are too large can cause TCP streams to be bursty.
2714 net
->ipv4
.sysctl_tcp_tso_win_divisor
= 3;
2715 /* Default TSQ limit of 16 TSO segments */
2716 net
->ipv4
.sysctl_tcp_limit_output_bytes
= 16 * 65536;
2717 /* rfc5961 challenge ack rate limiting */
2718 net
->ipv4
.sysctl_tcp_challenge_ack_limit
= 1000;
2719 net
->ipv4
.sysctl_tcp_min_tso_segs
= 2;
2720 net
->ipv4
.sysctl_tcp_min_rtt_wlen
= 300;
2721 net
->ipv4
.sysctl_tcp_autocorking
= 1;
2722 net
->ipv4
.sysctl_tcp_invalid_ratelimit
= HZ
/2;
2723 net
->ipv4
.sysctl_tcp_pacing_ss_ratio
= 200;
2724 net
->ipv4
.sysctl_tcp_pacing_ca_ratio
= 120;
2725 if (net
!= &init_net
) {
2726 memcpy(net
->ipv4
.sysctl_tcp_rmem
,
2727 init_net
.ipv4
.sysctl_tcp_rmem
,
2728 sizeof(init_net
.ipv4
.sysctl_tcp_rmem
));
2729 memcpy(net
->ipv4
.sysctl_tcp_wmem
,
2730 init_net
.ipv4
.sysctl_tcp_wmem
,
2731 sizeof(init_net
.ipv4
.sysctl_tcp_wmem
));
2733 net
->ipv4
.sysctl_tcp_comp_sack_delay_ns
= NSEC_PER_MSEC
;
2734 net
->ipv4
.sysctl_tcp_comp_sack_nr
= 44;
2735 net
->ipv4
.sysctl_tcp_fastopen
= TFO_CLIENT_ENABLE
;
2736 spin_lock_init(&net
->ipv4
.tcp_fastopen_ctx_lock
);
2737 net
->ipv4
.sysctl_tcp_fastopen_blackhole_timeout
= 60 * 60;
2738 atomic_set(&net
->ipv4
.tfo_active_disable_times
, 0);
2740 /* Reno is always built in */
2741 if (!net_eq(net
, &init_net
) &&
2742 try_module_get(init_net
.ipv4
.tcp_congestion_control
->owner
))
2743 net
->ipv4
.tcp_congestion_control
= init_net
.ipv4
.tcp_congestion_control
;
2745 net
->ipv4
.tcp_congestion_control
= &tcp_reno
;
2754 static void __net_exit
tcp_sk_exit_batch(struct list_head
*net_exit_list
)
2758 inet_twsk_purge(&tcp_hashinfo
, AF_INET
);
2760 list_for_each_entry(net
, net_exit_list
, exit_list
)
2761 tcp_fastopen_ctx_destroy(net
);
2764 static struct pernet_operations __net_initdata tcp_sk_ops
= {
2765 .init
= tcp_sk_init
,
2766 .exit
= tcp_sk_exit
,
2767 .exit_batch
= tcp_sk_exit_batch
,
2770 void __init
tcp_v4_init(void)
2772 if (register_pernet_subsys(&tcp_sk_ops
))
2773 panic("Failed to create the TCP control socket.\n");