--- /dev/null
+From ed1fc5d76b81a4d681211333c026202cad4d5649 Mon Sep 17 00:00:00 2001
+From: Michal Luczaj <mhal@rbox.co>
+Date: Mon, 2 Dec 2024 12:29:25 +0100
+Subject: bpf, sockmap: Fix race between element replace and close()
+
+From: Michal Luczaj <mhal@rbox.co>
+
+commit ed1fc5d76b81a4d681211333c026202cad4d5649 upstream.
+
+Element replace (with a socket different from the one stored) may race
+with socket's close() link popping & unlinking. __sock_map_delete()
+unconditionally unrefs the (wrong) element:
+
+// set map[0] = s0
+map_update_elem(map, 0, s0)
+
+// drop fd of s0
+close(s0)
+ sock_map_close()
+ lock_sock(sk) (s0!)
+ sock_map_remove_links(sk)
+ link = sk_psock_link_pop()
+ sock_map_unlink(sk, link)
+ sock_map_delete_from_link
+ // replace map[0] with s1
+ map_update_elem(map, 0, s1)
+ sock_map_update_elem
+ (s1!) lock_sock(sk)
+ sock_map_update_common
+ psock = sk_psock(sk)
+ spin_lock(&stab->lock)
+ osk = stab->sks[idx]
+ sock_map_add_link(..., &stab->sks[idx])
+ sock_map_unref(osk, &stab->sks[idx])
+ psock = sk_psock(osk)
+ sk_psock_put(sk, psock)
+ if (refcount_dec_and_test(&psock))
+ sk_psock_drop(sk, psock)
+ spin_unlock(&stab->lock)
+ unlock_sock(sk)
+ __sock_map_delete
+ spin_lock(&stab->lock)
+ sk = *psk // s1 replaced s0; sk == s1
+ if (!sk_test || sk_test == sk) // sk_test (s0) != sk (s1); no branch
+ sk = xchg(psk, NULL)
+ if (sk)
+ sock_map_unref(sk, psk) // unref s1; sks[idx] will dangle
+ psock = sk_psock(sk)
+ sk_psock_put(sk, psock)
+ if (refcount_dec_and_test())
+ sk_psock_drop(sk, psock)
+ spin_unlock(&stab->lock)
+ release_sock(sk)
+
+Then close(map) enqueues bpf_map_free_deferred, which finally calls
+sock_map_free(). This results in some refcount_t warnings along with
+a KASAN splat [1].
+
+Fix __sock_map_delete(), do not allow sock_map_unref() on elements that
+may have been replaced.
+
+[1]:
+BUG: KASAN: slab-use-after-free in sock_map_free+0x10e/0x330
+Write of size 4 at addr ffff88811f5b9100 by task kworker/u64:12/1063
+
+CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Not tainted 6.12.0+ #125
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+Workqueue: events_unbound bpf_map_free_deferred
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x68/0x90
+ print_report+0x174/0x4f6
+ kasan_report+0xb9/0x190
+ kasan_check_range+0x10f/0x1e0
+ sock_map_free+0x10e/0x330
+ bpf_map_free_deferred+0x173/0x320
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+
+Allocated by task 1202:
+ kasan_save_stack+0x1e/0x40
+ kasan_save_track+0x10/0x30
+ __kasan_slab_alloc+0x85/0x90
+ kmem_cache_alloc_noprof+0x131/0x450
+ sk_prot_alloc+0x5b/0x220
+ sk_alloc+0x2c/0x870
+ unix_create1+0x88/0x8a0
+ unix_create+0xc5/0x180
+ __sock_create+0x241/0x650
+ __sys_socketpair+0x1ce/0x420
+ __x64_sys_socketpair+0x92/0x100
+ do_syscall_64+0x93/0x180
+ entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+Freed by task 46:
+ kasan_save_stack+0x1e/0x40
+ kasan_save_track+0x10/0x30
+ kasan_save_free_info+0x37/0x60
+ __kasan_slab_free+0x4b/0x70
+ kmem_cache_free+0x1a1/0x590
+ __sk_destruct+0x388/0x5a0
+ sk_psock_destroy+0x73e/0xa50
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+
+The buggy address belongs to the object at ffff88811f5b9080
+ which belongs to the cache UNIX-STREAM of size 1984
+The buggy address is located 128 bytes inside of
+ freed 1984-byte region [ffff88811f5b9080, ffff88811f5b9840)
+
+The buggy address belongs to the physical page:
+page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11f5b8
+head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
+memcg:ffff888127d49401
+flags: 0x17ffffc0000040(head|node=0|zone=2|lastcpupid=0x1fffff)
+page_type: f5(slab)
+raw: 0017ffffc0000040 ffff8881042e4500 dead000000000122 0000000000000000
+raw: 0000000000000000 00000000800f000f 00000001f5000000 ffff888127d49401
+head: 0017ffffc0000040 ffff8881042e4500 dead000000000122 0000000000000000
+head: 0000000000000000 00000000800f000f 00000001f5000000 ffff888127d49401
+head: 0017ffffc0000003 ffffea00047d6e01 ffffffffffffffff 0000000000000000
+head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff88811f5b9000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff88811f5b9080: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ^
+ ffff88811f5b9180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff88811f5b9200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+Disabling lock debugging due to kernel taint
+
+refcount_t: addition on 0; use-after-free.
+WARNING: CPU: 14 PID: 1063 at lib/refcount.c:25 refcount_warn_saturate+0xce/0x150
+CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Tainted: G B 6.12.0+ #125
+Tainted: [B]=BAD_PAGE
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+Workqueue: events_unbound bpf_map_free_deferred
+RIP: 0010:refcount_warn_saturate+0xce/0x150
+Code: 34 73 eb 03 01 e8 82 53 ad fe 0f 0b eb b1 80 3d 27 73 eb 03 00 75 a8 48 c7 c7 80 bd 95 84 c6 05 17 73 eb 03 01 e8 62 53 ad fe <0f> 0b eb 91 80 3d 06 73 eb 03 00 75 88 48 c7 c7 e0 bd 95 84 c6 05
+RSP: 0018:ffff88815c49fc70 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffff88811f5b9100 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001
+RBP: 0000000000000002 R08: 0000000000000001 R09: ffffed10bcde6349
+R10: ffff8885e6f31a4b R11: 0000000000000000 R12: ffff88813be0b000
+R13: ffff88811f5b9100 R14: ffff88811f5b9080 R15: ffff88813be0b024
+FS: 0000000000000000(0000) GS:ffff8885e6f00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000055dda99b0250 CR3: 000000015dbac000 CR4: 0000000000752ef0
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ? __warn.cold+0x5f/0x1ff
+ ? refcount_warn_saturate+0xce/0x150
+ ? report_bug+0x1ec/0x390
+ ? handle_bug+0x58/0x90
+ ? exc_invalid_op+0x13/0x40
+ ? asm_exc_invalid_op+0x16/0x20
+ ? refcount_warn_saturate+0xce/0x150
+ sock_map_free+0x2e5/0x330
+ bpf_map_free_deferred+0x173/0x320
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+irq event stamp: 10741
+hardirqs last enabled at (10741): [<ffffffff84400ec6>] asm_sysvec_apic_timer_interrupt+0x16/0x20
+hardirqs last disabled at (10740): [<ffffffff811e532d>] handle_softirqs+0x60d/0x770
+softirqs last enabled at (10506): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+softirqs last disabled at (10301): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+
+refcount_t: underflow; use-after-free.
+WARNING: CPU: 14 PID: 1063 at lib/refcount.c:28 refcount_warn_saturate+0xee/0x150
+CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Tainted: G B W 6.12.0+ #125
+Tainted: [B]=BAD_PAGE, [W]=WARN
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+Workqueue: events_unbound bpf_map_free_deferred
+RIP: 0010:refcount_warn_saturate+0xee/0x150
+Code: 17 73 eb 03 01 e8 62 53 ad fe 0f 0b eb 91 80 3d 06 73 eb 03 00 75 88 48 c7 c7 e0 bd 95 84 c6 05 f6 72 eb 03 01 e8 42 53 ad fe <0f> 0b e9 6e ff ff ff 80 3d e6 72 eb 03 00 0f 85 61 ff ff ff 48 c7
+RSP: 0018:ffff88815c49fc70 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffff88811f5b9100 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001
+RBP: 0000000000000003 R08: 0000000000000001 R09: ffffed10bcde6349
+R10: ffff8885e6f31a4b R11: 0000000000000000 R12: ffff88813be0b000
+R13: ffff88811f5b9100 R14: ffff88811f5b9080 R15: ffff88813be0b024
+FS: 0000000000000000(0000) GS:ffff8885e6f00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000055dda99b0250 CR3: 000000015dbac000 CR4: 0000000000752ef0
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ? __warn.cold+0x5f/0x1ff
+ ? refcount_warn_saturate+0xee/0x150
+ ? report_bug+0x1ec/0x390
+ ? handle_bug+0x58/0x90
+ ? exc_invalid_op+0x13/0x40
+ ? asm_exc_invalid_op+0x16/0x20
+ ? refcount_warn_saturate+0xee/0x150
+ sock_map_free+0x2d3/0x330
+ bpf_map_free_deferred+0x173/0x320
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+irq event stamp: 10741
+hardirqs last enabled at (10741): [<ffffffff84400ec6>] asm_sysvec_apic_timer_interrupt+0x16/0x20
+hardirqs last disabled at (10740): [<ffffffff811e532d>] handle_softirqs+0x60d/0x770
+softirqs last enabled at (10506): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+softirqs last disabled at (10301): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241202-sockmap-replace-v1-3-1e88579e7bd5@rbox.co
+Signed-off-by: Alva Lan <alvalan9@foxmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_map.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -411,15 +411,15 @@ static void *sock_map_lookup_sys(struct
+ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
+ struct sock **psk)
+ {
+- struct sock *sk;
++ struct sock *sk = NULL;
+ int err = 0;
+
+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
+ raw_spin_lock_bh(&stab->lock);
+- sk = *psk;
+- if (!sk_test || sk_test == sk)
++
++ if (!sk_test || sk_test == *psk)
+ sk = xchg(psk, NULL);
+
+ if (likely(sk))
--- /dev/null
+From 550f7ca98ee028a606aa75705a7e77b1bd11720f Mon Sep 17 00:00:00 2001
+From: Max Kellermann <max.kellermann@ionos.com>
+Date: Mon, 18 Nov 2024 23:28:28 +0100
+Subject: ceph: give up on paths longer than PATH_MAX
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Max Kellermann <max.kellermann@ionos.com>
+
+commit 550f7ca98ee028a606aa75705a7e77b1bd11720f upstream.
+
+If the full path to be built by ceph_mdsc_build_path() happens to be
+longer than PATH_MAX, then this function will enter an endless (retry)
+loop, effectively blocking the whole task. Most of the machine
+becomes unusable, making this a very simple and effective DoS
+vulnerability.
+
+I cannot imagine why this retry was ever implemented, but it seems
+rather useless and harmful to me. Let's remove it and fail with
+ENAMETOOLONG instead.
+
+Cc: stable@vger.kernel.org
+Reported-by: Dario Weißer <dario@cure53.de>
+Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
+Reviewed-by: Alex Markuze <amarkuze@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+[idryomov@gmail.com: backport to 6.1: pr_warn() is still in use]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/mds_client.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -2447,12 +2447,11 @@ retry:
+
+ if (pos < 0) {
+ /*
+- * A rename didn't occur, but somehow we didn't end up where
+- * we thought we would. Throw a warning and try again.
++ * The path is longer than PATH_MAX and this function
++ * cannot ever succeed. Creating paths that long is
++ * possible with Ceph, but Linux cannot use them.
+ */
+- pr_warn("build_path did not end path lookup where "
+- "expected, pos is %d\n", pos);
+- goto retry;
++ return ERR_PTR(-ENAMETOOLONG);
+ }
+
+ *pbase = base;
--- /dev/null
+From fd7b4f9f46d46acbc7af3a439bb0d869efdc5c58 Mon Sep 17 00:00:00 2001
+From: Qun-Wei Lin <qun-wei.lin@mediatek.com>
+Date: Wed, 13 Nov 2024 12:25:43 +0800
+Subject: sched/task_stack: fix object_is_on_stack() for KASAN tagged pointers
+
+From: Qun-Wei Lin <qun-wei.lin@mediatek.com>
+
+commit fd7b4f9f46d46acbc7af3a439bb0d869efdc5c58 upstream.
+
+When CONFIG_KASAN_SW_TAGS and CONFIG_KASAN_STACK are enabled, the
+object_is_on_stack() function may produce incorrect results due to the
+presence of tags in the obj pointer, while the stack pointer does not have
+tags. This discrepancy can lead to incorrect stack object detection and
+subsequently trigger warnings if CONFIG_DEBUG_OBJECTS is also enabled.
+
+Example of the warning:
+
+ODEBUG: object 3eff800082ea7bb0 is NOT on stack ffff800082ea0000, but annotated.
+------------[ cut here ]------------
+WARNING: CPU: 0 PID: 1 at lib/debugobjects.c:557 __debug_object_init+0x330/0x364
+Modules linked in:
+CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc5 #4
+Hardware name: linux,dummy-virt (DT)
+pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+pc : __debug_object_init+0x330/0x364
+lr : __debug_object_init+0x330/0x364
+sp : ffff800082ea7b40
+x29: ffff800082ea7b40 x28: 98ff0000c0164518 x27: 98ff0000c0164534
+x26: ffff800082d93ec8 x25: 0000000000000001 x24: 1cff0000c00172a0
+x23: 0000000000000000 x22: ffff800082d93ed0 x21: ffff800081a24418
+x20: 3eff800082ea7bb0 x19: efff800000000000 x18: 0000000000000000
+x17: 00000000000000ff x16: 0000000000000047 x15: 206b63617473206e
+x14: 0000000000000018 x13: ffff800082ea7780 x12: 0ffff800082ea78e
+x11: 0ffff800082ea790 x10: 0ffff800082ea79d x9 : 34d77febe173e800
+x8 : 34d77febe173e800 x7 : 0000000000000001 x6 : 0000000000000001
+x5 : feff800082ea74b8 x4 : ffff800082870a90 x3 : ffff80008018d3c4
+x2 : 0000000000000001 x1 : ffff800082858810 x0 : 0000000000000050
+Call trace:
+ __debug_object_init+0x330/0x364
+ debug_object_init_on_stack+0x30/0x3c
+ schedule_hrtimeout_range_clock+0xac/0x26c
+ schedule_hrtimeout+0x1c/0x30
+ wait_task_inactive+0x1d4/0x25c
+ kthread_bind_mask+0x28/0x98
+ init_rescuer+0x1e8/0x280
+ workqueue_init+0x1a0/0x3cc
+ kernel_init_freeable+0x118/0x200
+ kernel_init+0x28/0x1f0
+ ret_from_fork+0x10/0x20
+---[ end trace 0000000000000000 ]---
+ODEBUG: object 3eff800082ea7bb0 is NOT on stack ffff800082ea0000, but annotated.
+------------[ cut here ]------------
+
+Link: https://lkml.kernel.org/r/20241113042544.19095-1-qun-wei.lin@mediatek.com
+Signed-off-by: Qun-Wei Lin <qun-wei.lin@mediatek.com>
+Cc: Andrew Yang <andrew.yang@mediatek.com>
+Cc: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Cc: Casper Li <casper.li@mediatek.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Chinwen Chang <chinwen.chang@mediatek.com>
+Cc: Kent Overstreet <kent.overstreet@linux.dev>
+Cc: Matthias Brugger <matthias.bgg@gmail.com>
+Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
+Cc: Shakeel Butt <shakeel.butt@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Alva Lan <alvalan9@foxmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/sched/task_stack.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/include/linux/sched/task_stack.h
++++ b/include/linux/sched/task_stack.h
+@@ -8,6 +8,7 @@
+
+ #include <linux/sched.h>
+ #include <linux/magic.h>
++#include <linux/kasan.h>
+
+ #ifdef CONFIG_THREAD_INFO_IN_TASK
+
+@@ -88,6 +89,7 @@ static inline int object_is_on_stack(con
+ {
+ void *stack = task_stack_page(current);
+
++ obj = kasan_reset_tag(obj);
+ return (obj >= stack) && (obj < (stack + THREAD_SIZE));
+ }
+