]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 4.14
authorSasha Levin <sashal@kernel.org>
Fri, 22 Apr 2022 14:30:28 +0000 (10:30 -0400)
committerSasha Levin <sashal@kernel.org>
Fri, 22 Apr 2022 14:30:28 +0000 (10:30 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-4.14/series
queue-4.14/tcp-fix-race-condition-when-creating-child-sockets-f.patch [new file with mode: 0644]

index b11c38595f142d4bf8332d9c290a2bcd3f567b3d..a80f9ecbc32a1d3df73ccb638a84a76484df6211 100644 (file)
@@ -4,3 +4,4 @@ tracing-have-traceon-and-traceoff-trigger-honor-the-instance.patch
 tracing-dump-stacktrace-trigger-to-the-corresponding-instance.patch
 can-usb_8dev-usb_8dev_start_xmit-fix-double-dev_kfree_skb-in-error-path.patch
 gfs2-assign-rgrp-glock-before-compute_bitstructs.patch
+tcp-fix-race-condition-when-creating-child-sockets-f.patch
diff --git a/queue-4.14/tcp-fix-race-condition-when-creating-child-sockets-f.patch b/queue-4.14/tcp-fix-race-condition-when-creating-child-sockets-f.patch
new file mode 100644 (file)
index 0000000..4dc0a52
--- /dev/null
@@ -0,0 +1,313 @@
+From d012a52ff2c057a6f46e52a8422d379ec9a9efc2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Nov 2020 11:11:33 +0000
+Subject: tcp: fix race condition when creating child sockets from syncookies
+
+From: Ricardo Dias <rdias@singlestore.com>
+
+[ Upstream commit 01770a166165738a6e05c3d911fb4609cc4eb416 ]
+
+When the TCP stack is in SYN flood mode, the server child socket is
+created from the SYN cookie received in a TCP packet with the ACK flag
+set.
+
+The child socket is created when the server receives the first TCP
+packet with a valid SYN cookie from the client. Usually, this packet
+corresponds to the final step of the TCP 3-way handshake, the ACK
+packet. But is also possible to receive a valid SYN cookie from the
+first TCP data packet sent by the client, and thus create a child socket
+from that SYN cookie.
+
+Since a client socket is ready to send data as soon as it receives the
+SYN+ACK packet from the server, the client can send the ACK packet (sent
+by the TCP stack code), and the first data packet (sent by the userspace
+program) almost at the same time, and thus the server will equally
+receive the two TCP packets with valid SYN cookies almost at the same
+instant.
+
+When such event happens, the TCP stack code has a race condition that
+occurs between the momement a lookup is done to the established
+connections hashtable to check for the existence of a connection for the
+same client, and the moment that the child socket is added to the
+established connections hashtable. As a consequence, this race condition
+can lead to a situation where we add two child sockets to the
+established connections hashtable and deliver two sockets to the
+userspace program to the same client.
+
+This patch fixes the race condition by checking if an existing child
+socket exists for the same client when we are adding the second child
+socket to the established connections socket. If an existing child
+socket exists, we drop the packet and discard the second child socket
+to the same client.
+
+Signed-off-by: Ricardo Dias <rdias@singlestore.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20201120111133.GA67501@rdias-suse-pc.lan
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/inet_hashtables.h   |  5 ++-
+ net/dccp/ipv4.c                 |  2 +-
+ net/dccp/ipv6.c                 |  2 +-
+ net/ipv4/inet_connection_sock.c |  2 +-
+ net/ipv4/inet_hashtables.c      | 68 +++++++++++++++++++++++++++++----
+ net/ipv4/tcp_ipv4.c             | 15 +++++++-
+ net/ipv6/tcp_ipv6.c             | 13 ++++++-
+ 7 files changed, 91 insertions(+), 16 deletions(-)
+
+diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
+index 573ab110c9ec..e5f4d2711404 100644
+--- a/include/net/inet_hashtables.h
++++ b/include/net/inet_hashtables.h
+@@ -215,8 +215,9 @@ void inet_put_port(struct sock *sk);
+ void inet_hashinfo_init(struct inet_hashinfo *h);
+-bool inet_ehash_insert(struct sock *sk, struct sock *osk);
+-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
++bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
++bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
++                       bool *found_dup_sk);
+ int __inet_hash(struct sock *sk, struct sock *osk);
+ int inet_hash(struct sock *sk);
+ void inet_unhash(struct sock *sk);
+diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
+index 176bddacc16e..7e93087d1366 100644
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -428,7 +428,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
+       if (__inet_inherit_port(sk, newsk) < 0)
+               goto put_and_exit;
+-      *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
++      *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
+       if (*own_req)
+               ireq->ireq_opt = NULL;
+       else
+diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
+index 2cd3508a3786..ae4851fdbe9e 100644
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -538,7 +538,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
+               dccp_done(newsk);
+               goto out;
+       }
+-      *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
++      *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
+       /* Clone pktoptions received with SYN, if we own the req */
+       if (*own_req && ireq->pktopts) {
+               newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index 08ba0f91f2ab..44b1d630c40b 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -784,7 +784,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
+                           (unsigned long)req);
+       mod_timer(&req->rsk_timer, jiffies + timeout);
+-      inet_ehash_insert(req_to_sk(req), NULL);
++      inet_ehash_insert(req_to_sk(req), NULL, NULL);
+       /* before letting lookups find us, make sure all req fields
+        * are committed to memory and refcnt initialized.
+        */
+diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
+index 8a54babf5c90..1346e45cf8d1 100644
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -23,6 +23,9 @@
+ #include <net/addrconf.h>
+ #include <net/inet_connection_sock.h>
+ #include <net/inet_hashtables.h>
++#if IS_ENABLED(CONFIG_IPV6)
++#include <net/inet6_hashtables.h>
++#endif
+ #include <net/secure_seq.h>
+ #include <net/ip.h>
+ #include <net/tcp.h>
+@@ -395,10 +398,52 @@ static u32 inet_sk_port_offset(const struct sock *sk)
+                                         inet->inet_dport);
+ }
+-/* insert a socket into ehash, and eventually remove another one
+- * (The another one can be a SYN_RECV or TIMEWAIT
++/* Searches for an exsiting socket in the ehash bucket list.
++ * Returns true if found, false otherwise.
+  */
+-bool inet_ehash_insert(struct sock *sk, struct sock *osk)
++static bool inet_ehash_lookup_by_sk(struct sock *sk,
++                                  struct hlist_nulls_head *list)
++{
++      const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
++      const int sdif = sk->sk_bound_dev_if;
++      const int dif = sk->sk_bound_dev_if;
++      const struct hlist_nulls_node *node;
++      struct net *net = sock_net(sk);
++      struct sock *esk;
++
++      INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
++
++      sk_nulls_for_each_rcu(esk, node, list) {
++              if (esk->sk_hash != sk->sk_hash)
++                      continue;
++              if (sk->sk_family == AF_INET) {
++                      if (unlikely(INET_MATCH(esk, net, acookie,
++                                              sk->sk_daddr,
++                                              sk->sk_rcv_saddr,
++                                              ports, dif, sdif))) {
++                              return true;
++                      }
++              }
++#if IS_ENABLED(CONFIG_IPV6)
++              else if (sk->sk_family == AF_INET6) {
++                      if (unlikely(INET6_MATCH(esk, net,
++                                               &sk->sk_v6_daddr,
++                                               &sk->sk_v6_rcv_saddr,
++                                               ports, dif, sdif))) {
++                              return true;
++                      }
++              }
++#endif
++      }
++      return false;
++}
++
++/* Insert a socket into ehash, and eventually remove another one
++ * (The another one can be a SYN_RECV or TIMEWAIT)
++ * If an existing socket already exists, socket sk is not inserted,
++ * and sets found_dup_sk parameter to true.
++ */
++bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
+ {
+       struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+       struct hlist_nulls_head *list;
+@@ -417,16 +462,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
+       if (osk) {
+               WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
+               ret = sk_nulls_del_node_init_rcu(osk);
++      } else if (found_dup_sk) {
++              *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
++              if (*found_dup_sk)
++                      ret = false;
+       }
++
+       if (ret)
+               __sk_nulls_add_node_rcu(sk, list);
++
+       spin_unlock(lock);
++
+       return ret;
+ }
+-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
++bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
+ {
+-      bool ok = inet_ehash_insert(sk, osk);
++      bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
+       if (ok) {
+               sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+@@ -469,7 +521,7 @@ int __inet_hash(struct sock *sk, struct sock *osk)
+       int err = 0;
+       if (sk->sk_state != TCP_LISTEN) {
+-              inet_ehash_nolisten(sk, osk);
++              inet_ehash_nolisten(sk, osk, NULL);
+               return 0;
+       }
+       WARN_ON(!sk_unhashed(sk));
+@@ -556,7 +608,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
+               tb = inet_csk(sk)->icsk_bind_hash;
+               spin_lock_bh(&head->lock);
+               if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+-                      inet_ehash_nolisten(sk, NULL);
++                      inet_ehash_nolisten(sk, NULL, NULL);
+                       spin_unlock_bh(&head->lock);
+                       return 0;
+               }
+@@ -632,7 +684,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
+       inet_bind_hash(sk, tb, port);
+       if (sk_unhashed(sk)) {
+               inet_sk(sk)->inet_sport = htons(port);
+-              inet_ehash_nolisten(sk, (struct sock *)tw);
++              inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
+       }
+       if (tw)
+               inet_twsk_bind_unhash(tw, hinfo);
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 744479c8b91f..a598c5153434 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1344,6 +1344,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
+                                 bool *own_req)
+ {
+       struct inet_request_sock *ireq;
++      bool found_dup_sk = false;
+       struct inet_sock *newinet;
+       struct tcp_sock *newtp;
+       struct sock *newsk;
+@@ -1414,12 +1415,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
+       if (__inet_inherit_port(sk, newsk) < 0)
+               goto put_and_exit;
+-      *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
++      *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
++                                     &found_dup_sk);
+       if (likely(*own_req)) {
+               tcp_move_syn(newtp, req);
+               ireq->ireq_opt = NULL;
+       } else {
+-              newinet->inet_opt = NULL;
++              if (!req_unhash && found_dup_sk) {
++                      /* This code path should only be executed in the
++                       * syncookie case only
++                       */
++                      bh_unlock_sock(newsk);
++                      sock_put(newsk);
++                      newsk = NULL;
++              } else {
++                      newinet->inet_opt = NULL;
++              }
+       }
+       return newsk;
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 0f0772c48bf0..21637031fbab 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1064,6 +1064,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
+       struct ipv6_txoptions *opt;
+       struct tcp6_sock *newtcp6sk;
+       struct inet_sock *newinet;
++      bool found_dup_sk = false;
+       struct tcp_sock *newtp;
+       struct sock *newsk;
+ #ifdef CONFIG_TCP_MD5SIG
+@@ -1232,7 +1233,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
+               tcp_done(newsk);
+               goto out;
+       }
+-      *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
++      *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
++                                     &found_dup_sk);
+       if (*own_req) {
+               tcp_move_syn(newtp, req);
+@@ -1247,6 +1249,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
+                               skb_set_owner_r(newnp->pktoptions, newsk);
+                       }
+               }
++      } else {
++              if (!req_unhash && found_dup_sk) {
++                      /* This code path should only be executed in the
++                       * syncookie case only
++                       */
++                      bh_unlock_sock(newsk);
++                      sock_put(newsk);
++                      newsk = NULL;
++              }
+       }
+       return newsk;
+-- 
+2.35.1
+