--- /dev/null
+From 740ea3c4a0b2e326b23d7cdf05472a0e92aa39bc Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Wed, 12 Oct 2022 07:50:36 -0700
+Subject: tcp: Clean up kernel listener's reqsk in inet_twsk_purge()
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 740ea3c4a0b2e326b23d7cdf05472a0e92aa39bc upstream.
+
+Eric Dumazet reported a use-after-free related to the per-netns ehash
+series. [0]
+
+When we create a TCP socket from userspace, the socket always holds a
+refcnt of the netns. This guarantees that a reqsk timer is always fired
+before netns dismantle. Each reqsk has a refcnt of its listener, so the
+listener is not freed before the reqsk, and the net is not freed before
+the listener as well.
+
+OTOH, when in-kernel users create a TCP socket, it might not hold a refcnt
+of its netns. Thus, a reqsk timer can be fired after the netns dismantle
+and access freed per-netns ehash.
+
+To avoid the use-after-free, we need to clean up TCP_NEW_SYN_RECV sockets
+in inet_twsk_purge() if the netns uses a per-netns ehash.
+
+[0]: https://lore.kernel.org/netdev/CANn89iLXMup0dRD_Ov79Xt8N9FM0XdhCHEN05sf3eLwxKweM6w@mail.gmail.com/
+
+BUG: KASAN: use-after-free in tcp_or_dccp_get_hashinfo
+include/net/inet_hashtables.h:181 [inline]
+BUG: KASAN: use-after-free in reqsk_queue_unlink+0x320/0x350
+net/ipv4/inet_connection_sock.c:913
+Read of size 8 at addr ffff88807545bd80 by task syz-executor.2/8301
+
+CPU: 1 PID: 8301 Comm: syz-executor.2 Not tainted
+6.0.0-syzkaller-02757-gaf7d23f9d96a #0
+Hardware name: Google Google Compute Engine/Google Compute Engine,
+BIOS Google 09/22/2022
+Call Trace:
+<IRQ>
+__dump_stack lib/dump_stack.c:88 [inline]
+dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+print_address_description mm/kasan/report.c:317 [inline]
+print_report.cold+0x2ba/0x719 mm/kasan/report.c:433
+kasan_report+0xb1/0x1e0 mm/kasan/report.c:495
+tcp_or_dccp_get_hashinfo include/net/inet_hashtables.h:181 [inline]
+reqsk_queue_unlink+0x320/0x350 net/ipv4/inet_connection_sock.c:913
+inet_csk_reqsk_queue_drop net/ipv4/inet_connection_sock.c:927 [inline]
+inet_csk_reqsk_queue_drop_and_put net/ipv4/inet_connection_sock.c:939 [inline]
+reqsk_timer_handler+0x724/0x1160 net/ipv4/inet_connection_sock.c:1053
+call_timer_fn+0x1a0/0x6b0 kernel/time/timer.c:1474
+expire_timers kernel/time/timer.c:1519 [inline]
+__run_timers.part.0+0x674/0xa80 kernel/time/timer.c:1790
+__run_timers kernel/time/timer.c:1768 [inline]
+run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1803
+__do_softirq+0x1d0/0x9c8 kernel/softirq.c:571
+invoke_softirq kernel/softirq.c:445 [inline]
+__irq_exit_rcu+0x123/0x180 kernel/softirq.c:650
+irq_exit_rcu+0x5/0x20 kernel/softirq.c:662
+sysvec_apic_timer_interrupt+0x93/0xc0 arch/x86/kernel/apic/apic.c:1107
+</IRQ>
+
+Fixes: d1e5e6408b30 ("tcp: Introduce optional per-netns ehash.")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Reported-by: Eric Dumazet <edumazet@google.com>
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20221012145036.74960-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[shaozhengchao: resolved conflicts in 5.15]
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_timewait_sock.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/inet_timewait_sock.c
++++ b/net/ipv4/inet_timewait_sock.c
+@@ -268,8 +268,21 @@ restart_rcu:
+ rcu_read_lock();
+ restart:
+ sk_nulls_for_each_rcu(sk, node, &head->chain) {
+- if (sk->sk_state != TCP_TIME_WAIT)
++ if (sk->sk_state != TCP_TIME_WAIT) {
++ /* A kernel listener socket might not hold refcnt for net,
++ * so reqsk_timer_handler() could be fired after net is
++ * freed. Userspace listener and reqsk never exist here.
++ */
++ if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV &&
++ hashinfo->pernet)) {
++ struct request_sock *req = inet_reqsk(sk);
++
++ inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
++ }
++
+ continue;
++ }
++
+ tw = inet_twsk(sk);
+ if ((tw->tw_family != family) ||
+ refcount_read(&twsk_net(tw)->ns.count))
--- /dev/null
+From 1c4e97dd2d3c9a3e84f7e26346aa39bc426d3249 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 8 Mar 2024 12:01:21 -0800
+Subject: tcp: Fix NEW_SYN_RECV handling in inet_twsk_purge()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 1c4e97dd2d3c9a3e84f7e26346aa39bc426d3249 upstream.
+
+inet_twsk_purge() uses rcu to find TIME_WAIT and NEW_SYN_RECV
+objects to purge.
+
+These objects use SLAB_TYPESAFE_BY_RCU semantic and need special
+care. We need to use refcount_inc_not_zero(&sk->sk_refcnt).
+
+Reuse the existing correct logic I wrote for TIME_WAIT,
+because both structures have common locations for
+sk_state, sk_family, and netns pointer.
+
+If after the refcount_inc_not_zero() the object fields longer match
+the keys, use sock_gen_put(sk) to release the refcount.
+
+Then we can call inet_twsk_deschedule_put() for TIME_WAIT,
+inet_csk_reqsk_queue_drop_and_put() for NEW_SYN_RECV sockets,
+with BH disabled.
+
+Then we need to restart the loop because we had drop rcu_read_lock().
+
+Fixes: 740ea3c4a0b2 ("tcp: Clean up kernel listener's reqsk in inet_twsk_purge()")
+Link: https://lore.kernel.org/netdev/CANn89iLvFuuihCtt9PME2uS1WJATnf5fKjDToa1WzVnRzHnPfg@mail.gmail.com/T/#u
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240308200122.64357-2-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_timewait_sock.c | 41 +++++++++++++++++++----------------------
+ 1 file changed, 19 insertions(+), 22 deletions(-)
+
+--- a/net/ipv4/inet_timewait_sock.c
++++ b/net/ipv4/inet_timewait_sock.c
+@@ -254,12 +254,12 @@ void __inet_twsk_schedule(struct inet_ti
+ }
+ EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+
++/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */
+ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+ {
+- struct inet_timewait_sock *tw;
+- struct sock *sk;
+ struct hlist_nulls_node *node;
+ unsigned int slot;
++ struct sock *sk;
+
+ for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
+ struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+@@ -268,38 +268,35 @@ restart_rcu:
+ rcu_read_lock();
+ restart:
+ sk_nulls_for_each_rcu(sk, node, &head->chain) {
+- if (sk->sk_state != TCP_TIME_WAIT) {
+- /* A kernel listener socket might not hold refcnt for net,
+- * so reqsk_timer_handler() could be fired after net is
+- * freed. Userspace listener and reqsk never exist here.
+- */
+- if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV &&
+- hashinfo->pernet)) {
+- struct request_sock *req = inet_reqsk(sk);
+-
+- inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
+- }
++ int state = inet_sk_state_load(sk);
+
++ if ((1 << state) & ~(TCPF_TIME_WAIT |
++ TCPF_NEW_SYN_RECV))
+ continue;
+- }
+
+- tw = inet_twsk(sk);
+- if ((tw->tw_family != family) ||
+- refcount_read(&twsk_net(tw)->ns.count))
++ if (sk->sk_family != family ||
++ refcount_read(&sock_net(sk)->ns.count))
+ continue;
+
+- if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
++ if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
+ continue;
+
+- if (unlikely((tw->tw_family != family) ||
+- refcount_read(&twsk_net(tw)->ns.count))) {
+- inet_twsk_put(tw);
++ if (unlikely(sk->sk_family != family ||
++ refcount_read(&sock_net(sk)->ns.count))) {
++ sock_gen_put(sk);
+ goto restart;
+ }
+
+ rcu_read_unlock();
+ local_bh_disable();
+- inet_twsk_deschedule_put(tw);
++ if (state == TCP_TIME_WAIT) {
++ inet_twsk_deschedule_put(inet_twsk(sk));
++ } else {
++ struct request_sock *req = inet_reqsk(sk);
++
++ inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
++ req);
++ }
+ local_bh_enable();
+ goto restart_rcu;
+ }