From: Greg Kroah-Hartman Date: Sun, 15 Dec 2024 09:04:18 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v5.4.288~51 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c80e8561c27aa7c561dc30784d7a4dd649facc28;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch bpf-sockmap-fix-race-between-element-replace-and-close.patch bpf-sockmap-fix-update-element-with-same.patch --- diff --git a/queue-6.6/bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch b/queue-6.6/bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch new file mode 100644 index 00000000000..3e38103b16b --- /dev/null +++ b/queue-6.6/bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch @@ -0,0 +1,106 @@ +From 659b9ba7cb2d7adb64618b87ddfaa528a143766e Mon Sep 17 00:00:00 2001 +From: Kumar Kartikeya Dwivedi +Date: Thu, 12 Dec 2024 01:20:49 -0800 +Subject: bpf: Check size for BTF-based ctx access of pointer members + +From: Kumar Kartikeya Dwivedi + +commit 659b9ba7cb2d7adb64618b87ddfaa528a143766e upstream. + +Robert Morris reported the following program type which passes the +verifier in [0]: + +SEC("struct_ops/bpf_cubic_init") +void BPF_PROG(bpf_cubic_init, struct sock *sk) +{ + asm volatile("r2 = *(u16*)(r1 + 0)"); // verifier should demand u64 + asm volatile("*(u32 *)(r2 +1504) = 0"); // 1280 in some configs +} + +The second line may or may not work, but the first instruction shouldn't +pass, as it's a narrow load into the context structure of the struct ops +callback. The code falls back to btf_ctx_access to ensure correctness +and obtaining the types of pointers. Ensure that the size of the access +is correctly checked to be 8 bytes, otherwise the verifier thinks the +narrow load obtained a trusted BTF pointer and will permit loads/stores +as it sees fit. + +Perform the check on size after we've verified that the load is for a +pointer field, as for scalar values narrow loads are fine. Access to +structs passed as arguments to a BPF program are also treated as +scalars, therefore no adjustment is needed in their case. + +Existing verifier selftests are broken by this change, but because they +were incorrect. Verifier tests for d_path were performing narrow load +into context to obtain path pointer, had this program actually run it +would cause a crash. The same holds for verifier_btf_ctx_access tests. + + [0]: https://lore.kernel.org/bpf/51338.1732985814@localhost + +Fixes: 9e15db66136a ("bpf: Implement accurate raw_tp context access via BTF") +Reported-by: Robert Morris +Signed-off-by: Kumar Kartikeya Dwivedi +Link: https://lore.kernel.org/r/20241212092050.3204165-2-memxor@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/btf.c | 6 ++++++ + tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c | 4 ++-- + tools/testing/selftests/bpf/progs/verifier_d_path.c | 4 ++-- + 3 files changed, 10 insertions(+), 4 deletions(-) + +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -6024,6 +6024,12 @@ bool btf_ctx_access(int off, int size, e + return false; + } + ++ if (size != sizeof(u64)) { ++ bpf_log(log, "func '%s' size %d must be 8\n", ++ tname, size); ++ return false; ++ } ++ + /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ + for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { + const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; +--- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c ++++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c +@@ -11,7 +11,7 @@ __success __retval(0) + __naked void btf_ctx_access_accept(void) + { + asm volatile (" \ +- r2 = *(u32*)(r1 + 8); /* load 2nd argument value (int pointer) */\ ++ r2 = *(u64 *)(r1 + 8); /* load 2nd argument value (int pointer) */\ + r0 = 0; \ + exit; \ + " ::: __clobber_all); +@@ -23,7 +23,7 @@ __success __retval(0) + __naked void ctx_access_u32_pointer_accept(void) + { + asm volatile (" \ +- r2 = *(u32*)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ ++ r2 = *(u64 *)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ + r0 = 0; \ + exit; \ + " ::: __clobber_all); +--- a/tools/testing/selftests/bpf/progs/verifier_d_path.c ++++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c +@@ -11,7 +11,7 @@ __success __retval(0) + __naked void d_path_accept(void) + { + asm volatile (" \ +- r1 = *(u32*)(r1 + 0); \ ++ r1 = *(u64 *)(r1 + 0); \ + r2 = r10; \ + r2 += -8; \ + r6 = 0; \ +@@ -31,7 +31,7 @@ __failure __msg("helper call is not allo + __naked void d_path_reject(void) + { + asm volatile (" \ +- r1 = *(u32*)(r1 + 0); \ ++ r1 = *(u64 *)(r1 + 0); \ + r2 = r10; \ + r2 += -8; \ + r6 = 0; \ diff --git a/queue-6.6/bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch b/queue-6.6/bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch new file mode 100644 index 00000000000..8817494d3aa --- /dev/null +++ b/queue-6.6/bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch @@ -0,0 +1,99 @@ +From 7d0d673627e20cfa3b21a829a896ce03b58a4f1c Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Tue, 10 Dec 2024 20:08:14 +0100 +Subject: bpf: Fix theoretical prog_array UAF in __uprobe_perf_func() + +From: Jann Horn + +commit 7d0d673627e20cfa3b21a829a896ce03b58a4f1c upstream. + +Currently, the pointer stored in call->prog_array is loaded in +__uprobe_perf_func(), with no RCU annotation and no immediately visible +RCU protection, so it looks as if the loaded pointer can immediately be +dangling. +Later, bpf_prog_run_array_uprobe() starts a RCU-trace read-side critical +section, but this is too late. It then uses rcu_dereference_check(), but +this use of rcu_dereference_check() does not actually dereference anything. + +Fix it by aligning the semantics to bpf_prog_run_array(): Let the caller +provide rcu_read_lock_trace() protection and then load call->prog_array +with rcu_dereference_check(). + +This issue seems to be theoretical: I don't know of any way to reach this +code without having handle_swbp() further up the stack, which is already +holding a rcu_read_lock_trace() lock, so where we take +rcu_read_lock_trace() in __uprobe_perf_func()/bpf_prog_run_array_uprobe() +doesn't actually have any effect. + +Fixes: 8c7dcb84e3b7 ("bpf: implement sleepable uprobes by chaining gps") +Suggested-by: Andrii Nakryiko +Signed-off-by: Jann Horn +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20241210-bpf-fix-uprobe-uaf-v4-1-5fc8959b2b74@google.com +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf.h | 13 +++++-------- + kernel/trace/trace_uprobe.c | 6 +++++- + 2 files changed, 10 insertions(+), 9 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -1988,26 +1988,25 @@ bpf_prog_run_array(const struct bpf_prog + * rcu-protected dynamically sized maps. + */ + static __always_inline u32 +-bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, ++bpf_prog_run_array_uprobe(const struct bpf_prog_array *array, + const void *ctx, bpf_prog_run_fn run_prog) + { + const struct bpf_prog_array_item *item; + const struct bpf_prog *prog; +- const struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_trace_run_ctx run_ctx; + u32 ret = 1; + + might_fault(); ++ RCU_LOCKDEP_WARN(!rcu_read_lock_trace_held(), "no rcu lock held"); ++ ++ if (unlikely(!array)) ++ return ret; + +- rcu_read_lock_trace(); + migrate_disable(); + + run_ctx.is_uprobe = true; + +- array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); +- if (unlikely(!array)) +- goto out; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { +@@ -2022,9 +2021,7 @@ bpf_prog_run_array_uprobe(const struct b + rcu_read_unlock(); + } + bpf_reset_run_ctx(old_run_ctx); +-out: + migrate_enable(); +- rcu_read_unlock_trace(); + return ret; + } + +--- a/kernel/trace/trace_uprobe.c ++++ b/kernel/trace/trace_uprobe.c +@@ -1383,9 +1383,13 @@ static void __uprobe_perf_func(struct tr + + #ifdef CONFIG_BPF_EVENTS + if (bpf_prog_array_valid(call)) { ++ const struct bpf_prog_array *array; + u32 ret; + +- ret = bpf_prog_run_array_uprobe(call->prog_array, regs, bpf_prog_run); ++ rcu_read_lock_trace(); ++ array = rcu_dereference_check(call->prog_array, rcu_read_lock_trace_held()); ++ ret = bpf_prog_run_array_uprobe(array, regs, bpf_prog_run); ++ rcu_read_unlock_trace(); + if (!ret) + return; + } diff --git a/queue-6.6/bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch b/queue-6.6/bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch new file mode 100644 index 00000000000..3a94f76be55 --- /dev/null +++ b/queue-6.6/bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch @@ -0,0 +1,58 @@ +From 978c4486cca5c7b9253d3ab98a88c8e769cb9bbd Mon Sep 17 00:00:00 2001 +From: Jiri Olsa +Date: Sun, 8 Dec 2024 15:25:07 +0100 +Subject: bpf,perf: Fix invalid prog_array access in perf_event_detach_bpf_prog + +From: Jiri Olsa + +commit 978c4486cca5c7b9253d3ab98a88c8e769cb9bbd upstream. + +Syzbot reported [1] crash that happens for following tracing scenario: + + - create tracepoint perf event with attr.inherit=1, attach it to the + process and set bpf program to it + - attached process forks -> chid creates inherited event + + the new child event shares the parent's bpf program and tp_event + (hence prog_array) which is global for tracepoint + + - exit both process and its child -> release both events + - first perf_event_detach_bpf_prog call will release tp_event->prog_array + and second perf_event_detach_bpf_prog will crash, because + tp_event->prog_array is NULL + +The fix makes sure the perf_event_detach_bpf_prog checks prog_array +is valid before it tries to remove the bpf program from it. + +[1] https://lore.kernel.org/bpf/Z1MR6dCIKajNS6nU@krava/T/#m91dbf0688221ec7a7fc95e896a7ef9ff93b0b8ad + +Fixes: 0ee288e69d03 ("bpf,perf: Fix perf_event_detach_bpf_prog error handling") +Reported-by: syzbot+2e0d2840414ce817aaac@syzkaller.appspotmail.com +Signed-off-by: Jiri Olsa +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20241208142507.1207698-1-jolsa@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/bpf_trace.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -2216,6 +2216,9 @@ void perf_event_detach_bpf_prog(struct p + goto unlock; + + old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); ++ if (!old_array) ++ goto put; ++ + ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array); + if (ret < 0) { + bpf_prog_array_delete_safe(old_array, event->prog); +@@ -2224,6 +2227,7 @@ void perf_event_detach_bpf_prog(struct p + bpf_prog_array_free_sleepable(old_array); + } + ++put: + /* + * It could be that the bpf_prog is not sleepable (and will be freed + * via normal RCU), but is called from a point that supports sleepable diff --git a/queue-6.6/bpf-sockmap-fix-race-between-element-replace-and-close.patch b/queue-6.6/bpf-sockmap-fix-race-between-element-replace-and-close.patch new file mode 100644 index 00000000000..bee387931f9 --- /dev/null +++ b/queue-6.6/bpf-sockmap-fix-race-between-element-replace-and-close.patch @@ -0,0 +1,248 @@ +From ed1fc5d76b81a4d681211333c026202cad4d5649 Mon Sep 17 00:00:00 2001 +From: Michal Luczaj +Date: Mon, 2 Dec 2024 12:29:25 +0100 +Subject: bpf, sockmap: Fix race between element replace and close() + +From: Michal Luczaj + +commit ed1fc5d76b81a4d681211333c026202cad4d5649 upstream. + +Element replace (with a socket different from the one stored) may race +with socket's close() link popping & unlinking. __sock_map_delete() +unconditionally unrefs the (wrong) element: + +// set map[0] = s0 +map_update_elem(map, 0, s0) + +// drop fd of s0 +close(s0) + sock_map_close() + lock_sock(sk) (s0!) + sock_map_remove_links(sk) + link = sk_psock_link_pop() + sock_map_unlink(sk, link) + sock_map_delete_from_link + // replace map[0] with s1 + map_update_elem(map, 0, s1) + sock_map_update_elem + (s1!) lock_sock(sk) + sock_map_update_common + psock = sk_psock(sk) + spin_lock(&stab->lock) + osk = stab->sks[idx] + sock_map_add_link(..., &stab->sks[idx]) + sock_map_unref(osk, &stab->sks[idx]) + psock = sk_psock(osk) + sk_psock_put(sk, psock) + if (refcount_dec_and_test(&psock)) + sk_psock_drop(sk, psock) + spin_unlock(&stab->lock) + unlock_sock(sk) + __sock_map_delete + spin_lock(&stab->lock) + sk = *psk // s1 replaced s0; sk == s1 + if (!sk_test || sk_test == sk) // sk_test (s0) != sk (s1); no branch + sk = xchg(psk, NULL) + if (sk) + sock_map_unref(sk, psk) // unref s1; sks[idx] will dangle + psock = sk_psock(sk) + sk_psock_put(sk, psock) + if (refcount_dec_and_test()) + sk_psock_drop(sk, psock) + spin_unlock(&stab->lock) + release_sock(sk) + +Then close(map) enqueues bpf_map_free_deferred, which finally calls +sock_map_free(). This results in some refcount_t warnings along with +a KASAN splat [1]. + +Fix __sock_map_delete(), do not allow sock_map_unref() on elements that +may have been replaced. + +[1]: +BUG: KASAN: slab-use-after-free in sock_map_free+0x10e/0x330 +Write of size 4 at addr ffff88811f5b9100 by task kworker/u64:12/1063 + +CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Not tainted 6.12.0+ #125 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 +Workqueue: events_unbound bpf_map_free_deferred +Call Trace: + + dump_stack_lvl+0x68/0x90 + print_report+0x174/0x4f6 + kasan_report+0xb9/0x190 + kasan_check_range+0x10f/0x1e0 + sock_map_free+0x10e/0x330 + bpf_map_free_deferred+0x173/0x320 + process_one_work+0x846/0x1420 + worker_thread+0x5b3/0xf80 + kthread+0x29e/0x360 + ret_from_fork+0x2d/0x70 + ret_from_fork_asm+0x1a/0x30 + + +Allocated by task 1202: + kasan_save_stack+0x1e/0x40 + kasan_save_track+0x10/0x30 + __kasan_slab_alloc+0x85/0x90 + kmem_cache_alloc_noprof+0x131/0x450 + sk_prot_alloc+0x5b/0x220 + sk_alloc+0x2c/0x870 + unix_create1+0x88/0x8a0 + unix_create+0xc5/0x180 + __sock_create+0x241/0x650 + __sys_socketpair+0x1ce/0x420 + __x64_sys_socketpair+0x92/0x100 + do_syscall_64+0x93/0x180 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Freed by task 46: + kasan_save_stack+0x1e/0x40 + kasan_save_track+0x10/0x30 + kasan_save_free_info+0x37/0x60 + __kasan_slab_free+0x4b/0x70 + kmem_cache_free+0x1a1/0x590 + __sk_destruct+0x388/0x5a0 + sk_psock_destroy+0x73e/0xa50 + process_one_work+0x846/0x1420 + worker_thread+0x5b3/0xf80 + kthread+0x29e/0x360 + ret_from_fork+0x2d/0x70 + ret_from_fork_asm+0x1a/0x30 + +The buggy address belongs to the object at ffff88811f5b9080 + which belongs to the cache UNIX-STREAM of size 1984 +The buggy address is located 128 bytes inside of + freed 1984-byte region [ffff88811f5b9080, ffff88811f5b9840) + +The buggy address belongs to the physical page: +page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11f5b8 +head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 +memcg:ffff888127d49401 +flags: 0x17ffffc0000040(head|node=0|zone=2|lastcpupid=0x1fffff) +page_type: f5(slab) +raw: 0017ffffc0000040 ffff8881042e4500 dead000000000122 0000000000000000 +raw: 0000000000000000 00000000800f000f 00000001f5000000 ffff888127d49401 +head: 0017ffffc0000040 ffff8881042e4500 dead000000000122 0000000000000000 +head: 0000000000000000 00000000800f000f 00000001f5000000 ffff888127d49401 +head: 0017ffffc0000003 ffffea00047d6e01 ffffffffffffffff 0000000000000000 +head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff88811f5b9000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff88811f5b9080: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff88811f5b9180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff88811f5b9200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +Disabling lock debugging due to kernel taint + +refcount_t: addition on 0; use-after-free. +WARNING: CPU: 14 PID: 1063 at lib/refcount.c:25 refcount_warn_saturate+0xce/0x150 +CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Tainted: G B 6.12.0+ #125 +Tainted: [B]=BAD_PAGE +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 +Workqueue: events_unbound bpf_map_free_deferred +RIP: 0010:refcount_warn_saturate+0xce/0x150 +Code: 34 73 eb 03 01 e8 82 53 ad fe 0f 0b eb b1 80 3d 27 73 eb 03 00 75 a8 48 c7 c7 80 bd 95 84 c6 05 17 73 eb 03 01 e8 62 53 ad fe <0f> 0b eb 91 80 3d 06 73 eb 03 00 75 88 48 c7 c7 e0 bd 95 84 c6 05 +RSP: 0018:ffff88815c49fc70 EFLAGS: 00010282 +RAX: 0000000000000000 RBX: ffff88811f5b9100 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001 +RBP: 0000000000000002 R08: 0000000000000001 R09: ffffed10bcde6349 +R10: ffff8885e6f31a4b R11: 0000000000000000 R12: ffff88813be0b000 +R13: ffff88811f5b9100 R14: ffff88811f5b9080 R15: ffff88813be0b024 +FS: 0000000000000000(0000) GS:ffff8885e6f00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 000055dda99b0250 CR3: 000000015dbac000 CR4: 0000000000752ef0 +PKRU: 55555554 +Call Trace: + + ? __warn.cold+0x5f/0x1ff + ? refcount_warn_saturate+0xce/0x150 + ? report_bug+0x1ec/0x390 + ? handle_bug+0x58/0x90 + ? exc_invalid_op+0x13/0x40 + ? asm_exc_invalid_op+0x16/0x20 + ? refcount_warn_saturate+0xce/0x150 + sock_map_free+0x2e5/0x330 + bpf_map_free_deferred+0x173/0x320 + process_one_work+0x846/0x1420 + worker_thread+0x5b3/0xf80 + kthread+0x29e/0x360 + ret_from_fork+0x2d/0x70 + ret_from_fork_asm+0x1a/0x30 + +irq event stamp: 10741 +hardirqs last enabled at (10741): [] asm_sysvec_apic_timer_interrupt+0x16/0x20 +hardirqs last disabled at (10740): [] handle_softirqs+0x60d/0x770 +softirqs last enabled at (10506): [] __irq_exit_rcu+0x109/0x210 +softirqs last disabled at (10301): [] __irq_exit_rcu+0x109/0x210 + +refcount_t: underflow; use-after-free. +WARNING: CPU: 14 PID: 1063 at lib/refcount.c:28 refcount_warn_saturate+0xee/0x150 +CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Tainted: G B W 6.12.0+ #125 +Tainted: [B]=BAD_PAGE, [W]=WARN +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 +Workqueue: events_unbound bpf_map_free_deferred +RIP: 0010:refcount_warn_saturate+0xee/0x150 +Code: 17 73 eb 03 01 e8 62 53 ad fe 0f 0b eb 91 80 3d 06 73 eb 03 00 75 88 48 c7 c7 e0 bd 95 84 c6 05 f6 72 eb 03 01 e8 42 53 ad fe <0f> 0b e9 6e ff ff ff 80 3d e6 72 eb 03 00 0f 85 61 ff ff ff 48 c7 +RSP: 0018:ffff88815c49fc70 EFLAGS: 00010282 +RAX: 0000000000000000 RBX: ffff88811f5b9100 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001 +RBP: 0000000000000003 R08: 0000000000000001 R09: ffffed10bcde6349 +R10: ffff8885e6f31a4b R11: 0000000000000000 R12: ffff88813be0b000 +R13: ffff88811f5b9100 R14: ffff88811f5b9080 R15: ffff88813be0b024 +FS: 0000000000000000(0000) GS:ffff8885e6f00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 000055dda99b0250 CR3: 000000015dbac000 CR4: 0000000000752ef0 +PKRU: 55555554 +Call Trace: + + ? __warn.cold+0x5f/0x1ff + ? refcount_warn_saturate+0xee/0x150 + ? report_bug+0x1ec/0x390 + ? handle_bug+0x58/0x90 + ? exc_invalid_op+0x13/0x40 + ? asm_exc_invalid_op+0x16/0x20 + ? refcount_warn_saturate+0xee/0x150 + sock_map_free+0x2d3/0x330 + bpf_map_free_deferred+0x173/0x320 + process_one_work+0x846/0x1420 + worker_thread+0x5b3/0xf80 + kthread+0x29e/0x360 + ret_from_fork+0x2d/0x70 + ret_from_fork_asm+0x1a/0x30 + +irq event stamp: 10741 +hardirqs last enabled at (10741): [] asm_sysvec_apic_timer_interrupt+0x16/0x20 +hardirqs last disabled at (10740): [] handle_softirqs+0x60d/0x770 +softirqs last enabled at (10506): [] __irq_exit_rcu+0x109/0x210 +softirqs last disabled at (10301): [] __irq_exit_rcu+0x109/0x210 + +Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") +Signed-off-by: Michal Luczaj +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Link: https://lore.kernel.org/bpf/20241202-sockmap-replace-v1-3-1e88579e7bd5@rbox.co +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock_map.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -408,12 +408,11 @@ static void *sock_map_lookup_sys(struct + static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, + struct sock **psk) + { +- struct sock *sk; ++ struct sock *sk = NULL; + int err = 0; + + spin_lock_bh(&stab->lock); +- sk = *psk; +- if (!sk_test || sk_test == sk) ++ if (!sk_test || sk_test == *psk) + sk = xchg(psk, NULL); + + if (likely(sk)) diff --git a/queue-6.6/bpf-sockmap-fix-update-element-with-same.patch b/queue-6.6/bpf-sockmap-fix-update-element-with-same.patch new file mode 100644 index 00000000000..0e180b6d842 --- /dev/null +++ b/queue-6.6/bpf-sockmap-fix-update-element-with-same.patch @@ -0,0 +1,57 @@ +From 75e072a390da9a22e7ae4a4e8434dfca5da499fb Mon Sep 17 00:00:00 2001 +From: Michal Luczaj +Date: Mon, 2 Dec 2024 12:29:23 +0100 +Subject: bpf, sockmap: Fix update element with same + +From: Michal Luczaj + +commit 75e072a390da9a22e7ae4a4e8434dfca5da499fb upstream. + +Consider a sockmap entry being updated with the same socket: + + osk = stab->sks[idx]; + sock_map_add_link(psock, link, map, &stab->sks[idx]); + stab->sks[idx] = sk; + if (osk) + sock_map_unref(osk, &stab->sks[idx]); + +Due to sock_map_unref(), which invokes sock_map_del_link(), all the +psock's links for stab->sks[idx] are torn: + + list_for_each_entry_safe(link, tmp, &psock->link, list) { + if (link->link_raw == link_raw) { + ... + list_del(&link->list); + sk_psock_free_link(link); + } + } + +And that includes the new link sock_map_add_link() added just before +the unref. + +This results in a sockmap holding a socket, but without the respective +link. This in turn means that close(sock) won't trigger the cleanup, +i.e. a closed socket will not be automatically removed from the sockmap. + +Stop tearing the links when a matching link_raw is found. + +Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") +Signed-off-by: Michal Luczaj +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Link: https://lore.kernel.org/bpf/20241202-sockmap-replace-v1-1-1e88579e7bd5@rbox.co +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock_map.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -156,6 +156,7 @@ static void sock_map_del_link(struct soc + verdict_stop = true; + list_del(&link->list); + sk_psock_free_link(link); ++ break; + } + } + spin_unlock_bh(&psock->link_lock); diff --git a/queue-6.6/series b/queue-6.6/series index 02a09c848ad..8ae1ffc6056 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -26,3 +26,8 @@ xfs-don-t-drop-errno-values-when-we-fail-to-ficlone-the-entire-range.patch xfs-return-from-xfs_symlink_verify-early-on-v4-filesystems.patch xfs-fix-scrub-tracepoints-when-inode-rooted-btrees-are-involved.patch xfs-only-run-precommits-once-per-transaction-object.patch +bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch +bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch +bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch +bpf-sockmap-fix-race-between-element-replace-and-close.patch +bpf-sockmap-fix-update-element-with-same.patch