From: Sasha Levin Date: Fri, 22 Apr 2022 14:30:27 +0000 (-0400) Subject: Fixes for 4.19 X-Git-Tag: v4.9.312~68 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=00b2659b786fae56df2cc760f1ba38561ae0956b;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 4.19 Signed-off-by: Sasha Levin --- diff --git a/queue-4.19/series b/queue-4.19/series index 0e62692c3d8..6c3addf5ee1 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -4,3 +4,4 @@ tracing-dump-stacktrace-trigger-to-the-corresponding-instance.patch can-usb_8dev-usb_8dev_start_xmit-fix-double-dev_kfree_skb-in-error-path.patch dm-integrity-fix-memory-corruption-when-tag_size-is-less-than-digest-size.patch gfs2-assign-rgrp-glock-before-compute_bitstructs.patch +tcp-fix-race-condition-when-creating-child-sockets-f.patch diff --git a/queue-4.19/tcp-fix-race-condition-when-creating-child-sockets-f.patch b/queue-4.19/tcp-fix-race-condition-when-creating-child-sockets-f.patch new file mode 100644 index 00000000000..24722c314bb --- /dev/null +++ b/queue-4.19/tcp-fix-race-condition-when-creating-child-sockets-f.patch @@ -0,0 +1,313 @@ +From 36b9639fc96df9a60cc08401cdeddb5360141b63 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Nov 2020 11:11:33 +0000 +Subject: tcp: fix race condition when creating child sockets from syncookies + +From: Ricardo Dias + +[ Upstream commit 01770a166165738a6e05c3d911fb4609cc4eb416 ] + +When the TCP stack is in SYN flood mode, the server child socket is +created from the SYN cookie received in a TCP packet with the ACK flag +set. + +The child socket is created when the server receives the first TCP +packet with a valid SYN cookie from the client. Usually, this packet +corresponds to the final step of the TCP 3-way handshake, the ACK +packet. But is also possible to receive a valid SYN cookie from the +first TCP data packet sent by the client, and thus create a child socket +from that SYN cookie. + +Since a client socket is ready to send data as soon as it receives the +SYN+ACK packet from the server, the client can send the ACK packet (sent +by the TCP stack code), and the first data packet (sent by the userspace +program) almost at the same time, and thus the server will equally +receive the two TCP packets with valid SYN cookies almost at the same +instant. + +When such event happens, the TCP stack code has a race condition that +occurs between the momement a lookup is done to the established +connections hashtable to check for the existence of a connection for the +same client, and the moment that the child socket is added to the +established connections hashtable. As a consequence, this race condition +can lead to a situation where we add two child sockets to the +established connections hashtable and deliver two sockets to the +userspace program to the same client. + +This patch fixes the race condition by checking if an existing child +socket exists for the same client when we are adding the second child +socket to the established connections socket. If an existing child +socket exists, we drop the packet and discard the second child socket +to the same client. + +Signed-off-by: Ricardo Dias +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20201120111133.GA67501@rdias-suse-pc.lan +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/inet_hashtables.h | 5 ++- + net/dccp/ipv4.c | 2 +- + net/dccp/ipv6.c | 2 +- + net/ipv4/inet_connection_sock.c | 2 +- + net/ipv4/inet_hashtables.c | 68 +++++++++++++++++++++++++++++---- + net/ipv4/tcp_ipv4.c | 15 +++++++- + net/ipv6/tcp_ipv6.c | 13 ++++++- + 7 files changed, 91 insertions(+), 16 deletions(-) + +diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h +index b875dcef173c..fa5fe23ca6aa 100644 +--- a/include/net/inet_hashtables.h ++++ b/include/net/inet_hashtables.h +@@ -232,8 +232,9 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, + unsigned long low_limit, + unsigned long high_limit); + +-bool inet_ehash_insert(struct sock *sk, struct sock *osk); +-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); ++bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk); ++bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, ++ bool *found_dup_sk); + int __inet_hash(struct sock *sk, struct sock *osk); + int inet_hash(struct sock *sk); + void inet_unhash(struct sock *sk); +diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c +index 176bddacc16e..7e93087d1366 100644 +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -428,7 +428,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, + + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; +- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); ++ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); + if (*own_req) + ireq->ireq_opt = NULL; + else +diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c +index 2cd3508a3786..ae4851fdbe9e 100644 +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -538,7 +538,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, + dccp_done(newsk); + goto out; + } +- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); ++ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); + /* Clone pktoptions received with SYN, if we own the req */ + if (*own_req && ireq->pktopts) { + newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index 439a55d1aa99..0a69f92da71b 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -793,7 +793,7 @@ static void reqsk_queue_hash_req(struct request_sock *req, + timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); + mod_timer(&req->rsk_timer, jiffies + timeout); + +- inet_ehash_insert(req_to_sk(req), NULL); ++ inet_ehash_insert(req_to_sk(req), NULL, NULL); + /* before letting lookups find us, make sure all req fields + * are committed to memory and refcnt initialized. + */ +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index 3a5f12f011cb..c96a5871b49d 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -24,6 +24,9 @@ + #include + #include + #include ++#if IS_ENABLED(CONFIG_IPV6) ++#include ++#endif + #include + #include + #include +@@ -513,10 +516,52 @@ static u32 inet_sk_port_offset(const struct sock *sk) + inet->inet_dport); + } + +-/* insert a socket into ehash, and eventually remove another one +- * (The another one can be a SYN_RECV or TIMEWAIT ++/* Searches for an exsiting socket in the ehash bucket list. ++ * Returns true if found, false otherwise. + */ +-bool inet_ehash_insert(struct sock *sk, struct sock *osk) ++static bool inet_ehash_lookup_by_sk(struct sock *sk, ++ struct hlist_nulls_head *list) ++{ ++ const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num); ++ const int sdif = sk->sk_bound_dev_if; ++ const int dif = sk->sk_bound_dev_if; ++ const struct hlist_nulls_node *node; ++ struct net *net = sock_net(sk); ++ struct sock *esk; ++ ++ INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr); ++ ++ sk_nulls_for_each_rcu(esk, node, list) { ++ if (esk->sk_hash != sk->sk_hash) ++ continue; ++ if (sk->sk_family == AF_INET) { ++ if (unlikely(INET_MATCH(esk, net, acookie, ++ sk->sk_daddr, ++ sk->sk_rcv_saddr, ++ ports, dif, sdif))) { ++ return true; ++ } ++ } ++#if IS_ENABLED(CONFIG_IPV6) ++ else if (sk->sk_family == AF_INET6) { ++ if (unlikely(INET6_MATCH(esk, net, ++ &sk->sk_v6_daddr, ++ &sk->sk_v6_rcv_saddr, ++ ports, dif, sdif))) { ++ return true; ++ } ++ } ++#endif ++ } ++ return false; ++} ++ ++/* Insert a socket into ehash, and eventually remove another one ++ * (The another one can be a SYN_RECV or TIMEWAIT) ++ * If an existing socket already exists, socket sk is not inserted, ++ * and sets found_dup_sk parameter to true. ++ */ ++bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) + { + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct hlist_nulls_head *list; +@@ -535,16 +580,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk) + if (osk) { + WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); + ret = sk_nulls_del_node_init_rcu(osk); ++ } else if (found_dup_sk) { ++ *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); ++ if (*found_dup_sk) ++ ret = false; + } ++ + if (ret) + __sk_nulls_add_node_rcu(sk, list); ++ + spin_unlock(lock); ++ + return ret; + } + +-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) ++bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) + { +- bool ok = inet_ehash_insert(sk, osk); ++ bool ok = inet_ehash_insert(sk, osk, found_dup_sk); + + if (ok) { + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); +@@ -588,7 +640,7 @@ int __inet_hash(struct sock *sk, struct sock *osk) + int err = 0; + + if (sk->sk_state != TCP_LISTEN) { +- inet_ehash_nolisten(sk, osk); ++ inet_ehash_nolisten(sk, osk, NULL); + return 0; + } + WARN_ON(!sk_unhashed(sk)); +@@ -683,7 +735,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { +- inet_ehash_nolisten(sk, NULL); ++ inet_ehash_nolisten(sk, NULL, NULL); + spin_unlock_bh(&head->lock); + return 0; + } +@@ -759,7 +811,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, + inet_bind_hash(sk, tb, port); + if (sk_unhashed(sk)) { + inet_sk(sk)->inet_sport = htons(port); +- inet_ehash_nolisten(sk, (struct sock *)tw); ++ inet_ehash_nolisten(sk, (struct sock *)tw, NULL); + } + if (tw) + inet_twsk_bind_unhash(tw, hinfo); +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index de4edfbc9e46..7ebdfab5b24a 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1415,6 +1415,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + bool *own_req) + { + struct inet_request_sock *ireq; ++ bool found_dup_sk = false; + struct inet_sock *newinet; + struct tcp_sock *newtp; + struct sock *newsk; +@@ -1485,12 +1486,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; +- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); ++ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), ++ &found_dup_sk); + if (likely(*own_req)) { + tcp_move_syn(newtp, req); + ireq->ireq_opt = NULL; + } else { +- newinet->inet_opt = NULL; ++ if (!req_unhash && found_dup_sk) { ++ /* This code path should only be executed in the ++ * syncookie case only ++ */ ++ bh_unlock_sock(newsk); ++ sock_put(newsk); ++ newsk = NULL; ++ } else { ++ newinet->inet_opt = NULL; ++ } + } + return newsk; + +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index e8d206725cb7..c332f75f4e9a 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1090,6 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + struct ipv6_txoptions *opt; + struct tcp6_sock *newtcp6sk; + struct inet_sock *newinet; ++ bool found_dup_sk = false; + struct tcp_sock *newtp; + struct sock *newsk; + #ifdef CONFIG_TCP_MD5SIG +@@ -1258,7 +1259,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + tcp_done(newsk); + goto out; + } +- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); ++ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), ++ &found_dup_sk); + if (*own_req) { + tcp_move_syn(newtp, req); + +@@ -1273,6 +1275,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + skb_set_owner_r(newnp->pktoptions, newsk); + } + } ++ } else { ++ if (!req_unhash && found_dup_sk) { ++ /* This code path should only be executed in the ++ * syncookie case only ++ */ ++ bh_unlock_sock(newsk); ++ sock_put(newsk); ++ newsk = NULL; ++ } + } + + return newsk; +-- +2.35.1 +