From: Greg Kroah-Hartman Date: Thu, 9 Jan 2025 12:51:21 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v6.6.71~11 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3d69dbfdc9550b7d4beba74c363b531ea1d88109;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: bpf-sockmap-fix-race-between-element-replace-and-close.patch ceph-give-up-on-paths-longer-than-path_max.patch sched-task_stack-fix-object_is_on_stack-for-kasan-tagged-pointers.patch series --- diff --git a/queue-6.1/bpf-sockmap-fix-race-between-element-replace-and-close.patch b/queue-6.1/bpf-sockmap-fix-race-between-element-replace-and-close.patch new file mode 100644 index 00000000000..3f76a48eb66 --- /dev/null +++ b/queue-6.1/bpf-sockmap-fix-race-between-element-replace-and-close.patch @@ -0,0 +1,253 @@ +From ed1fc5d76b81a4d681211333c026202cad4d5649 Mon Sep 17 00:00:00 2001 +From: Michal Luczaj +Date: Mon, 2 Dec 2024 12:29:25 +0100 +Subject: bpf, sockmap: Fix race between element replace and close() + +From: Michal Luczaj + +commit ed1fc5d76b81a4d681211333c026202cad4d5649 upstream. + +Element replace (with a socket different from the one stored) may race +with socket's close() link popping & unlinking. __sock_map_delete() +unconditionally unrefs the (wrong) element: + +// set map[0] = s0 +map_update_elem(map, 0, s0) + +// drop fd of s0 +close(s0) + sock_map_close() + lock_sock(sk) (s0!) + sock_map_remove_links(sk) + link = sk_psock_link_pop() + sock_map_unlink(sk, link) + sock_map_delete_from_link + // replace map[0] with s1 + map_update_elem(map, 0, s1) + sock_map_update_elem + (s1!) lock_sock(sk) + sock_map_update_common + psock = sk_psock(sk) + spin_lock(&stab->lock) + osk = stab->sks[idx] + sock_map_add_link(..., &stab->sks[idx]) + sock_map_unref(osk, &stab->sks[idx]) + psock = sk_psock(osk) + sk_psock_put(sk, psock) + if (refcount_dec_and_test(&psock)) + sk_psock_drop(sk, psock) + spin_unlock(&stab->lock) + unlock_sock(sk) + __sock_map_delete + spin_lock(&stab->lock) + sk = *psk // s1 replaced s0; sk == s1 + if (!sk_test || sk_test == sk) // sk_test (s0) != sk (s1); no branch + sk = xchg(psk, NULL) + if (sk) + sock_map_unref(sk, psk) // unref s1; sks[idx] will dangle + psock = sk_psock(sk) + sk_psock_put(sk, psock) + if (refcount_dec_and_test()) + sk_psock_drop(sk, psock) + spin_unlock(&stab->lock) + release_sock(sk) + +Then close(map) enqueues bpf_map_free_deferred, which finally calls +sock_map_free(). This results in some refcount_t warnings along with +a KASAN splat [1]. + +Fix __sock_map_delete(), do not allow sock_map_unref() on elements that +may have been replaced. + +[1]: +BUG: KASAN: slab-use-after-free in sock_map_free+0x10e/0x330 +Write of size 4 at addr ffff88811f5b9100 by task kworker/u64:12/1063 + +CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Not tainted 6.12.0+ #125 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 +Workqueue: events_unbound bpf_map_free_deferred +Call Trace: + + dump_stack_lvl+0x68/0x90 + print_report+0x174/0x4f6 + kasan_report+0xb9/0x190 + kasan_check_range+0x10f/0x1e0 + sock_map_free+0x10e/0x330 + bpf_map_free_deferred+0x173/0x320 + process_one_work+0x846/0x1420 + worker_thread+0x5b3/0xf80 + kthread+0x29e/0x360 + ret_from_fork+0x2d/0x70 + ret_from_fork_asm+0x1a/0x30 + + +Allocated by task 1202: + kasan_save_stack+0x1e/0x40 + kasan_save_track+0x10/0x30 + __kasan_slab_alloc+0x85/0x90 + kmem_cache_alloc_noprof+0x131/0x450 + sk_prot_alloc+0x5b/0x220 + sk_alloc+0x2c/0x870 + unix_create1+0x88/0x8a0 + unix_create+0xc5/0x180 + __sock_create+0x241/0x650 + __sys_socketpair+0x1ce/0x420 + __x64_sys_socketpair+0x92/0x100 + do_syscall_64+0x93/0x180 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Freed by task 46: + kasan_save_stack+0x1e/0x40 + kasan_save_track+0x10/0x30 + kasan_save_free_info+0x37/0x60 + __kasan_slab_free+0x4b/0x70 + kmem_cache_free+0x1a1/0x590 + __sk_destruct+0x388/0x5a0 + sk_psock_destroy+0x73e/0xa50 + process_one_work+0x846/0x1420 + worker_thread+0x5b3/0xf80 + kthread+0x29e/0x360 + ret_from_fork+0x2d/0x70 + ret_from_fork_asm+0x1a/0x30 + +The buggy address belongs to the object at ffff88811f5b9080 + which belongs to the cache UNIX-STREAM of size 1984 +The buggy address is located 128 bytes inside of + freed 1984-byte region [ffff88811f5b9080, ffff88811f5b9840) + +The buggy address belongs to the physical page: +page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11f5b8 +head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 +memcg:ffff888127d49401 +flags: 0x17ffffc0000040(head|node=0|zone=2|lastcpupid=0x1fffff) +page_type: f5(slab) +raw: 0017ffffc0000040 ffff8881042e4500 dead000000000122 0000000000000000 +raw: 0000000000000000 00000000800f000f 00000001f5000000 ffff888127d49401 +head: 0017ffffc0000040 ffff8881042e4500 dead000000000122 0000000000000000 +head: 0000000000000000 00000000800f000f 00000001f5000000 ffff888127d49401 +head: 0017ffffc0000003 ffffea00047d6e01 ffffffffffffffff 0000000000000000 +head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff88811f5b9000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff88811f5b9080: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff88811f5b9180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff88811f5b9200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +Disabling lock debugging due to kernel taint + +refcount_t: addition on 0; use-after-free. +WARNING: CPU: 14 PID: 1063 at lib/refcount.c:25 refcount_warn_saturate+0xce/0x150 +CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Tainted: G B 6.12.0+ #125 +Tainted: [B]=BAD_PAGE +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 +Workqueue: events_unbound bpf_map_free_deferred +RIP: 0010:refcount_warn_saturate+0xce/0x150 +Code: 34 73 eb 03 01 e8 82 53 ad fe 0f 0b eb b1 80 3d 27 73 eb 03 00 75 a8 48 c7 c7 80 bd 95 84 c6 05 17 73 eb 03 01 e8 62 53 ad fe <0f> 0b eb 91 80 3d 06 73 eb 03 00 75 88 48 c7 c7 e0 bd 95 84 c6 05 +RSP: 0018:ffff88815c49fc70 EFLAGS: 00010282 +RAX: 0000000000000000 RBX: ffff88811f5b9100 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001 +RBP: 0000000000000002 R08: 0000000000000001 R09: ffffed10bcde6349 +R10: ffff8885e6f31a4b R11: 0000000000000000 R12: ffff88813be0b000 +R13: ffff88811f5b9100 R14: ffff88811f5b9080 R15: ffff88813be0b024 +FS: 0000000000000000(0000) GS:ffff8885e6f00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 000055dda99b0250 CR3: 000000015dbac000 CR4: 0000000000752ef0 +PKRU: 55555554 +Call Trace: + + ? __warn.cold+0x5f/0x1ff + ? refcount_warn_saturate+0xce/0x150 + ? report_bug+0x1ec/0x390 + ? handle_bug+0x58/0x90 + ? exc_invalid_op+0x13/0x40 + ? asm_exc_invalid_op+0x16/0x20 + ? refcount_warn_saturate+0xce/0x150 + sock_map_free+0x2e5/0x330 + bpf_map_free_deferred+0x173/0x320 + process_one_work+0x846/0x1420 + worker_thread+0x5b3/0xf80 + kthread+0x29e/0x360 + ret_from_fork+0x2d/0x70 + ret_from_fork_asm+0x1a/0x30 + +irq event stamp: 10741 +hardirqs last enabled at (10741): [] asm_sysvec_apic_timer_interrupt+0x16/0x20 +hardirqs last disabled at (10740): [] handle_softirqs+0x60d/0x770 +softirqs last enabled at (10506): [] __irq_exit_rcu+0x109/0x210 +softirqs last disabled at (10301): [] __irq_exit_rcu+0x109/0x210 + +refcount_t: underflow; use-after-free. +WARNING: CPU: 14 PID: 1063 at lib/refcount.c:28 refcount_warn_saturate+0xee/0x150 +CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Tainted: G B W 6.12.0+ #125 +Tainted: [B]=BAD_PAGE, [W]=WARN +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 +Workqueue: events_unbound bpf_map_free_deferred +RIP: 0010:refcount_warn_saturate+0xee/0x150 +Code: 17 73 eb 03 01 e8 62 53 ad fe 0f 0b eb 91 80 3d 06 73 eb 03 00 75 88 48 c7 c7 e0 bd 95 84 c6 05 f6 72 eb 03 01 e8 42 53 ad fe <0f> 0b e9 6e ff ff ff 80 3d e6 72 eb 03 00 0f 85 61 ff ff ff 48 c7 +RSP: 0018:ffff88815c49fc70 EFLAGS: 00010282 +RAX: 0000000000000000 RBX: ffff88811f5b9100 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001 +RBP: 0000000000000003 R08: 0000000000000001 R09: ffffed10bcde6349 +R10: ffff8885e6f31a4b R11: 0000000000000000 R12: ffff88813be0b000 +R13: ffff88811f5b9100 R14: ffff88811f5b9080 R15: ffff88813be0b024 +FS: 0000000000000000(0000) GS:ffff8885e6f00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 000055dda99b0250 CR3: 000000015dbac000 CR4: 0000000000752ef0 +PKRU: 55555554 +Call Trace: + + ? __warn.cold+0x5f/0x1ff + ? refcount_warn_saturate+0xee/0x150 + ? report_bug+0x1ec/0x390 + ? handle_bug+0x58/0x90 + ? exc_invalid_op+0x13/0x40 + ? asm_exc_invalid_op+0x16/0x20 + ? refcount_warn_saturate+0xee/0x150 + sock_map_free+0x2d3/0x330 + bpf_map_free_deferred+0x173/0x320 + process_one_work+0x846/0x1420 + worker_thread+0x5b3/0xf80 + kthread+0x29e/0x360 + ret_from_fork+0x2d/0x70 + ret_from_fork_asm+0x1a/0x30 + +irq event stamp: 10741 +hardirqs last enabled at (10741): [] asm_sysvec_apic_timer_interrupt+0x16/0x20 +hardirqs last disabled at (10740): [] handle_softirqs+0x60d/0x770 +softirqs last enabled at (10506): [] __irq_exit_rcu+0x109/0x210 +softirqs last disabled at (10301): [] __irq_exit_rcu+0x109/0x210 + +Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") +Signed-off-by: Michal Luczaj +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Link: https://lore.kernel.org/bpf/20241202-sockmap-replace-v1-3-1e88579e7bd5@rbox.co +Signed-off-by: Alva Lan +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock_map.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -411,15 +411,15 @@ static void *sock_map_lookup_sys(struct + static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, + struct sock **psk) + { +- struct sock *sk; ++ struct sock *sk = NULL; + int err = 0; + + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + + raw_spin_lock_bh(&stab->lock); +- sk = *psk; +- if (!sk_test || sk_test == sk) ++ ++ if (!sk_test || sk_test == *psk) + sk = xchg(psk, NULL); + + if (likely(sk)) diff --git a/queue-6.1/ceph-give-up-on-paths-longer-than-path_max.patch b/queue-6.1/ceph-give-up-on-paths-longer-than-path_max.patch new file mode 100644 index 00000000000..bd16ea2d2ca --- /dev/null +++ b/queue-6.1/ceph-give-up-on-paths-longer-than-path_max.patch @@ -0,0 +1,52 @@ +From 550f7ca98ee028a606aa75705a7e77b1bd11720f Mon Sep 17 00:00:00 2001 +From: Max Kellermann +Date: Mon, 18 Nov 2024 23:28:28 +0100 +Subject: ceph: give up on paths longer than PATH_MAX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Max Kellermann + +commit 550f7ca98ee028a606aa75705a7e77b1bd11720f upstream. + +If the full path to be built by ceph_mdsc_build_path() happens to be +longer than PATH_MAX, then this function will enter an endless (retry) +loop, effectively blocking the whole task. Most of the machine +becomes unusable, making this a very simple and effective DoS +vulnerability. + +I cannot imagine why this retry was ever implemented, but it seems +rather useless and harmful to me. Let's remove it and fail with +ENAMETOOLONG instead. + +Cc: stable@vger.kernel.org +Reported-by: Dario Weißer +Signed-off-by: Max Kellermann +Reviewed-by: Alex Markuze +Signed-off-by: Ilya Dryomov +[idryomov@gmail.com: backport to 6.1: pr_warn() is still in use] +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/mds_client.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/fs/ceph/mds_client.c ++++ b/fs/ceph/mds_client.c +@@ -2447,12 +2447,11 @@ retry: + + if (pos < 0) { + /* +- * A rename didn't occur, but somehow we didn't end up where +- * we thought we would. Throw a warning and try again. ++ * The path is longer than PATH_MAX and this function ++ * cannot ever succeed. Creating paths that long is ++ * possible with Ceph, but Linux cannot use them. + */ +- pr_warn("build_path did not end path lookup where " +- "expected, pos is %d\n", pos); +- goto retry; ++ return ERR_PTR(-ENAMETOOLONG); + } + + *pbase = base; diff --git a/queue-6.1/sched-task_stack-fix-object_is_on_stack-for-kasan-tagged-pointers.patch b/queue-6.1/sched-task_stack-fix-object_is_on_stack-for-kasan-tagged-pointers.patch new file mode 100644 index 00000000000..3bcc72122a1 --- /dev/null +++ b/queue-6.1/sched-task_stack-fix-object_is_on_stack-for-kasan-tagged-pointers.patch @@ -0,0 +1,90 @@ +From fd7b4f9f46d46acbc7af3a439bb0d869efdc5c58 Mon Sep 17 00:00:00 2001 +From: Qun-Wei Lin +Date: Wed, 13 Nov 2024 12:25:43 +0800 +Subject: sched/task_stack: fix object_is_on_stack() for KASAN tagged pointers + +From: Qun-Wei Lin + +commit fd7b4f9f46d46acbc7af3a439bb0d869efdc5c58 upstream. + +When CONFIG_KASAN_SW_TAGS and CONFIG_KASAN_STACK are enabled, the +object_is_on_stack() function may produce incorrect results due to the +presence of tags in the obj pointer, while the stack pointer does not have +tags. This discrepancy can lead to incorrect stack object detection and +subsequently trigger warnings if CONFIG_DEBUG_OBJECTS is also enabled. + +Example of the warning: + +ODEBUG: object 3eff800082ea7bb0 is NOT on stack ffff800082ea0000, but annotated. +------------[ cut here ]------------ +WARNING: CPU: 0 PID: 1 at lib/debugobjects.c:557 __debug_object_init+0x330/0x364 +Modules linked in: +CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc5 #4 +Hardware name: linux,dummy-virt (DT) +pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) +pc : __debug_object_init+0x330/0x364 +lr : __debug_object_init+0x330/0x364 +sp : ffff800082ea7b40 +x29: ffff800082ea7b40 x28: 98ff0000c0164518 x27: 98ff0000c0164534 +x26: ffff800082d93ec8 x25: 0000000000000001 x24: 1cff0000c00172a0 +x23: 0000000000000000 x22: ffff800082d93ed0 x21: ffff800081a24418 +x20: 3eff800082ea7bb0 x19: efff800000000000 x18: 0000000000000000 +x17: 00000000000000ff x16: 0000000000000047 x15: 206b63617473206e +x14: 0000000000000018 x13: ffff800082ea7780 x12: 0ffff800082ea78e +x11: 0ffff800082ea790 x10: 0ffff800082ea79d x9 : 34d77febe173e800 +x8 : 34d77febe173e800 x7 : 0000000000000001 x6 : 0000000000000001 +x5 : feff800082ea74b8 x4 : ffff800082870a90 x3 : ffff80008018d3c4 +x2 : 0000000000000001 x1 : ffff800082858810 x0 : 0000000000000050 +Call trace: + __debug_object_init+0x330/0x364 + debug_object_init_on_stack+0x30/0x3c + schedule_hrtimeout_range_clock+0xac/0x26c + schedule_hrtimeout+0x1c/0x30 + wait_task_inactive+0x1d4/0x25c + kthread_bind_mask+0x28/0x98 + init_rescuer+0x1e8/0x280 + workqueue_init+0x1a0/0x3cc + kernel_init_freeable+0x118/0x200 + kernel_init+0x28/0x1f0 + ret_from_fork+0x10/0x20 +---[ end trace 0000000000000000 ]--- +ODEBUG: object 3eff800082ea7bb0 is NOT on stack ffff800082ea0000, but annotated. +------------[ cut here ]------------ + +Link: https://lkml.kernel.org/r/20241113042544.19095-1-qun-wei.lin@mediatek.com +Signed-off-by: Qun-Wei Lin +Cc: Andrew Yang +Cc: AngeloGioacchino Del Regno +Cc: Casper Li +Cc: Catalin Marinas +Cc: Chinwen Chang +Cc: Kent Overstreet +Cc: Matthias Brugger +Cc: Pasha Tatashin +Cc: Shakeel Butt +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Alva Lan +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched/task_stack.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/linux/sched/task_stack.h ++++ b/include/linux/sched/task_stack.h +@@ -8,6 +8,7 @@ + + #include + #include ++#include + + #ifdef CONFIG_THREAD_INFO_IN_TASK + +@@ -88,6 +89,7 @@ static inline int object_is_on_stack(con + { + void *stack = task_stack_page(current); + ++ obj = kasan_reset_tag(obj); + return (obj >= stack) && (obj < (stack + THREAD_SIZE)); + } + diff --git a/queue-6.1/series b/queue-6.1/series new file mode 100644 index 00000000000..45ab75e91df --- /dev/null +++ b/queue-6.1/series @@ -0,0 +1,3 @@ +ceph-give-up-on-paths-longer-than-path_max.patch +bpf-sockmap-fix-race-between-element-replace-and-close.patch +sched-task_stack-fix-object_is_on_stack-for-kasan-tagged-pointers.patch