]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Apr 2024 13:35:33 +0000 (15:35 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Apr 2024 13:35:33 +0000 (15:35 +0200)
added patches:
tcp-clean-up-kernel-listener-s-reqsk-in-inet_twsk_purge.patch
tcp-fix-new_syn_recv-handling-in-inet_twsk_purge.patch

queue-5.15/series
queue-5.15/tcp-clean-up-kernel-listener-s-reqsk-in-inet_twsk_purge.patch [new file with mode: 0644]
queue-5.15/tcp-fix-new_syn_recv-handling-in-inet_twsk_purge.patch [new file with mode: 0644]

index b5b1899b036a918efdfba15194cbdb54eb122753..895d039f786d2947fa296794727a2eaf7d43a57a 100644 (file)
@@ -65,3 +65,5 @@ net-b44-set-pause-params-only-when-interface-is-up.patch
 stackdepot-respect-__gfp_nolockdep-allocation-flag.patch
 wifi-nl80211-don-t-free-null-coalescing-rule.patch
 mtd-diskonchip-work-around-ubsan-link-failure.patch
+tcp-clean-up-kernel-listener-s-reqsk-in-inet_twsk_purge.patch
+tcp-fix-new_syn_recv-handling-in-inet_twsk_purge.patch
diff --git a/queue-5.15/tcp-clean-up-kernel-listener-s-reqsk-in-inet_twsk_purge.patch b/queue-5.15/tcp-clean-up-kernel-listener-s-reqsk-in-inet_twsk_purge.patch
new file mode 100644 (file)
index 0000000..afc8fc7
--- /dev/null
@@ -0,0 +1,101 @@
+From 740ea3c4a0b2e326b23d7cdf05472a0e92aa39bc Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Wed, 12 Oct 2022 07:50:36 -0700
+Subject: tcp: Clean up kernel listener's reqsk in inet_twsk_purge()
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 740ea3c4a0b2e326b23d7cdf05472a0e92aa39bc upstream.
+
+Eric Dumazet reported a use-after-free related to the per-netns ehash
+series. [0]
+
+When we create a TCP socket from userspace, the socket always holds a
+refcnt of the netns.  This guarantees that a reqsk timer is always fired
+before netns dismantle.  Each reqsk has a refcnt of its listener, so the
+listener is not freed before the reqsk, and the net is not freed before
+the listener as well.
+
+OTOH, when in-kernel users create a TCP socket, it might not hold a refcnt
+of its netns.  Thus, a reqsk timer can be fired after the netns dismantle
+and access freed per-netns ehash.
+
+To avoid the use-after-free, we need to clean up TCP_NEW_SYN_RECV sockets
+in inet_twsk_purge() if the netns uses a per-netns ehash.
+
+[0]: https://lore.kernel.org/netdev/CANn89iLXMup0dRD_Ov79Xt8N9FM0XdhCHEN05sf3eLwxKweM6w@mail.gmail.com/
+
+BUG: KASAN: use-after-free in tcp_or_dccp_get_hashinfo
+include/net/inet_hashtables.h:181 [inline]
+BUG: KASAN: use-after-free in reqsk_queue_unlink+0x320/0x350
+net/ipv4/inet_connection_sock.c:913
+Read of size 8 at addr ffff88807545bd80 by task syz-executor.2/8301
+
+CPU: 1 PID: 8301 Comm: syz-executor.2 Not tainted
+6.0.0-syzkaller-02757-gaf7d23f9d96a #0
+Hardware name: Google Google Compute Engine/Google Compute Engine,
+BIOS Google 09/22/2022
+Call Trace:
+<IRQ>
+__dump_stack lib/dump_stack.c:88 [inline]
+dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+print_address_description mm/kasan/report.c:317 [inline]
+print_report.cold+0x2ba/0x719 mm/kasan/report.c:433
+kasan_report+0xb1/0x1e0 mm/kasan/report.c:495
+tcp_or_dccp_get_hashinfo include/net/inet_hashtables.h:181 [inline]
+reqsk_queue_unlink+0x320/0x350 net/ipv4/inet_connection_sock.c:913
+inet_csk_reqsk_queue_drop net/ipv4/inet_connection_sock.c:927 [inline]
+inet_csk_reqsk_queue_drop_and_put net/ipv4/inet_connection_sock.c:939 [inline]
+reqsk_timer_handler+0x724/0x1160 net/ipv4/inet_connection_sock.c:1053
+call_timer_fn+0x1a0/0x6b0 kernel/time/timer.c:1474
+expire_timers kernel/time/timer.c:1519 [inline]
+__run_timers.part.0+0x674/0xa80 kernel/time/timer.c:1790
+__run_timers kernel/time/timer.c:1768 [inline]
+run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1803
+__do_softirq+0x1d0/0x9c8 kernel/softirq.c:571
+invoke_softirq kernel/softirq.c:445 [inline]
+__irq_exit_rcu+0x123/0x180 kernel/softirq.c:650
+irq_exit_rcu+0x5/0x20 kernel/softirq.c:662
+sysvec_apic_timer_interrupt+0x93/0xc0 arch/x86/kernel/apic/apic.c:1107
+</IRQ>
+
+Fixes: d1e5e6408b30 ("tcp: Introduce optional per-netns ehash.")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Reported-by: Eric Dumazet <edumazet@google.com>
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20221012145036.74960-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[shaozhengchao: resolved conflicts in 5.15]
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_timewait_sock.c |   15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/inet_timewait_sock.c
++++ b/net/ipv4/inet_timewait_sock.c
+@@ -268,8 +268,21 @@ restart_rcu:
+               rcu_read_lock();
+ restart:
+               sk_nulls_for_each_rcu(sk, node, &head->chain) {
+-                      if (sk->sk_state != TCP_TIME_WAIT)
++                      if (sk->sk_state != TCP_TIME_WAIT) {
++                              /* A kernel listener socket might not hold refcnt for net,
++                               * so reqsk_timer_handler() could be fired after net is
++                               * freed.  Userspace listener and reqsk never exist here.
++                               */
++                              if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV &&
++                                           hashinfo->pernet)) {
++                                      struct request_sock *req = inet_reqsk(sk);
++
++                                      inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
++                              }
++
+                               continue;
++                      }
++
+                       tw = inet_twsk(sk);
+                       if ((tw->tw_family != family) ||
+                               refcount_read(&twsk_net(tw)->ns.count))
diff --git a/queue-5.15/tcp-fix-new_syn_recv-handling-in-inet_twsk_purge.patch b/queue-5.15/tcp-fix-new_syn_recv-handling-in-inet_twsk_purge.patch
new file mode 100644 (file)
index 0000000..ca3b678
--- /dev/null
@@ -0,0 +1,112 @@
+From 1c4e97dd2d3c9a3e84f7e26346aa39bc426d3249 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 8 Mar 2024 12:01:21 -0800
+Subject: tcp: Fix NEW_SYN_RECV handling in inet_twsk_purge()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 1c4e97dd2d3c9a3e84f7e26346aa39bc426d3249 upstream.
+
+inet_twsk_purge() uses rcu to find TIME_WAIT and NEW_SYN_RECV
+objects to purge.
+
+These objects use SLAB_TYPESAFE_BY_RCU semantic and need special
+care. We need to use refcount_inc_not_zero(&sk->sk_refcnt).
+
+Reuse the existing correct logic I wrote for TIME_WAIT,
+because both structures have common locations for
+sk_state, sk_family, and netns pointer.
+
+If after the refcount_inc_not_zero() the object fields longer match
+the keys, use sock_gen_put(sk) to release the refcount.
+
+Then we can call inet_twsk_deschedule_put() for TIME_WAIT,
+inet_csk_reqsk_queue_drop_and_put() for NEW_SYN_RECV sockets,
+with BH disabled.
+
+Then we need to restart the loop because we had drop rcu_read_lock().
+
+Fixes: 740ea3c4a0b2 ("tcp: Clean up kernel listener's reqsk in inet_twsk_purge()")
+Link: https://lore.kernel.org/netdev/CANn89iLvFuuihCtt9PME2uS1WJATnf5fKjDToa1WzVnRzHnPfg@mail.gmail.com/T/#u
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240308200122.64357-2-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_timewait_sock.c |   41 +++++++++++++++++++----------------------
+ 1 file changed, 19 insertions(+), 22 deletions(-)
+
+--- a/net/ipv4/inet_timewait_sock.c
++++ b/net/ipv4/inet_timewait_sock.c
+@@ -254,12 +254,12 @@ void __inet_twsk_schedule(struct inet_ti
+ }
+ EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
++/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */
+ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+ {
+-      struct inet_timewait_sock *tw;
+-      struct sock *sk;
+       struct hlist_nulls_node *node;
+       unsigned int slot;
++      struct sock *sk;
+       for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
+               struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+@@ -268,38 +268,35 @@ restart_rcu:
+               rcu_read_lock();
+ restart:
+               sk_nulls_for_each_rcu(sk, node, &head->chain) {
+-                      if (sk->sk_state != TCP_TIME_WAIT) {
+-                              /* A kernel listener socket might not hold refcnt for net,
+-                               * so reqsk_timer_handler() could be fired after net is
+-                               * freed.  Userspace listener and reqsk never exist here.
+-                               */
+-                              if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV &&
+-                                           hashinfo->pernet)) {
+-                                      struct request_sock *req = inet_reqsk(sk);
+-
+-                                      inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
+-                              }
++                      int state = inet_sk_state_load(sk);
++                      if ((1 << state) & ~(TCPF_TIME_WAIT |
++                                           TCPF_NEW_SYN_RECV))
+                               continue;
+-                      }
+-                      tw = inet_twsk(sk);
+-                      if ((tw->tw_family != family) ||
+-                              refcount_read(&twsk_net(tw)->ns.count))
++                      if (sk->sk_family != family ||
++                          refcount_read(&sock_net(sk)->ns.count))
+                               continue;
+-                      if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
++                      if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
+                               continue;
+-                      if (unlikely((tw->tw_family != family) ||
+-                                   refcount_read(&twsk_net(tw)->ns.count))) {
+-                              inet_twsk_put(tw);
++                      if (unlikely(sk->sk_family != family ||
++                                   refcount_read(&sock_net(sk)->ns.count))) {
++                              sock_gen_put(sk);
+                               goto restart;
+                       }
+                       rcu_read_unlock();
+                       local_bh_disable();
+-                      inet_twsk_deschedule_put(tw);
++                      if (state == TCP_TIME_WAIT) {
++                              inet_twsk_deschedule_put(inet_twsk(sk));
++                      } else {
++                              struct request_sock *req = inet_reqsk(sk);
++
++                              inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
++                                                                req);
++                      }
+                       local_bh_enable();
+                       goto restart_rcu;
+               }