]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 15 Dec 2024 09:04:18 +0000 (10:04 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 15 Dec 2024 09:04:18 +0000 (10:04 +0100)
added patches:
bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch
bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch
bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch
bpf-sockmap-fix-race-between-element-replace-and-close.patch
bpf-sockmap-fix-update-element-with-same.patch

queue-6.6/bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch [new file with mode: 0644]
queue-6.6/bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch [new file with mode: 0644]
queue-6.6/bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch [new file with mode: 0644]
queue-6.6/bpf-sockmap-fix-race-between-element-replace-and-close.patch [new file with mode: 0644]
queue-6.6/bpf-sockmap-fix-update-element-with-same.patch [new file with mode: 0644]
queue-6.6/series

diff --git a/queue-6.6/bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch b/queue-6.6/bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch
new file mode 100644 (file)
index 0000000..3e38103
--- /dev/null
@@ -0,0 +1,106 @@
+From 659b9ba7cb2d7adb64618b87ddfaa528a143766e Mon Sep 17 00:00:00 2001
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Date: Thu, 12 Dec 2024 01:20:49 -0800
+Subject: bpf: Check size for BTF-based ctx access of pointer members
+
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+
+commit 659b9ba7cb2d7adb64618b87ddfaa528a143766e upstream.
+
+Robert Morris reported the following program type which passes the
+verifier in [0]:
+
+SEC("struct_ops/bpf_cubic_init")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
+{
+       asm volatile("r2 = *(u16*)(r1 + 0)");     // verifier should demand u64
+       asm volatile("*(u32 *)(r2 +1504) = 0");   // 1280 in some configs
+}
+
+The second line may or may not work, but the first instruction shouldn't
+pass, as it's a narrow load into the context structure of the struct ops
+callback. The code falls back to btf_ctx_access to ensure correctness
+and obtaining the types of pointers. Ensure that the size of the access
+is correctly checked to be 8 bytes, otherwise the verifier thinks the
+narrow load obtained a trusted BTF pointer and will permit loads/stores
+as it sees fit.
+
+Perform the check on size after we've verified that the load is for a
+pointer field, as for scalar values narrow loads are fine. Access to
+structs passed as arguments to a BPF program are also treated as
+scalars, therefore no adjustment is needed in their case.
+
+Existing verifier selftests are broken by this change, but because they
+were incorrect. Verifier tests for d_path were performing narrow load
+into context to obtain path pointer, had this program actually run it
+would cause a crash. The same holds for verifier_btf_ctx_access tests.
+
+  [0]: https://lore.kernel.org/bpf/51338.1732985814@localhost
+
+Fixes: 9e15db66136a ("bpf: Implement accurate raw_tp context access via BTF")
+Reported-by: Robert Morris <rtm@mit.edu>
+Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/r/20241212092050.3204165-2-memxor@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/btf.c                                            |    6 ++++++
+ tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c |    4 ++--
+ tools/testing/selftests/bpf/progs/verifier_d_path.c         |    4 ++--
+ 3 files changed, 10 insertions(+), 4 deletions(-)
+
+--- a/kernel/bpf/btf.c
++++ b/kernel/bpf/btf.c
+@@ -6024,6 +6024,12 @@ bool btf_ctx_access(int off, int size, e
+               return false;
+       }
++      if (size != sizeof(u64)) {
++              bpf_log(log, "func '%s' size %d must be 8\n",
++                      tname, size);
++              return false;
++      }
++
+       /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
+       for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
+               const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
+--- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
++++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
+@@ -11,7 +11,7 @@ __success __retval(0)
+ __naked void btf_ctx_access_accept(void)
+ {
+       asm volatile ("                                 \
+-      r2 = *(u32*)(r1 + 8);           /* load 2nd argument value (int pointer) */\
++      r2 = *(u64 *)(r1 + 8);          /* load 2nd argument value (int pointer) */\
+       r0 = 0;                                         \
+       exit;                                           \
+ "     ::: __clobber_all);
+@@ -23,7 +23,7 @@ __success __retval(0)
+ __naked void ctx_access_u32_pointer_accept(void)
+ {
+       asm volatile ("                                 \
+-      r2 = *(u32*)(r1 + 0);           /* load 1nd argument value (u32 pointer) */\
++      r2 = *(u64 *)(r1 + 0);          /* load 1nd argument value (u32 pointer) */\
+       r0 = 0;                                         \
+       exit;                                           \
+ "     ::: __clobber_all);
+--- a/tools/testing/selftests/bpf/progs/verifier_d_path.c
++++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c
+@@ -11,7 +11,7 @@ __success __retval(0)
+ __naked void d_path_accept(void)
+ {
+       asm volatile ("                                 \
+-      r1 = *(u32*)(r1 + 0);                           \
++      r1 = *(u64 *)(r1 + 0);                          \
+       r2 = r10;                                       \
+       r2 += -8;                                       \
+       r6 = 0;                                         \
+@@ -31,7 +31,7 @@ __failure __msg("helper call is not allo
+ __naked void d_path_reject(void)
+ {
+       asm volatile ("                                 \
+-      r1 = *(u32*)(r1 + 0);                           \
++      r1 = *(u64 *)(r1 + 0);                          \
+       r2 = r10;                                       \
+       r2 += -8;                                       \
+       r6 = 0;                                         \
diff --git a/queue-6.6/bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch b/queue-6.6/bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch
new file mode 100644 (file)
index 0000000..8817494
--- /dev/null
@@ -0,0 +1,99 @@
+From 7d0d673627e20cfa3b21a829a896ce03b58a4f1c Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Tue, 10 Dec 2024 20:08:14 +0100
+Subject: bpf: Fix theoretical prog_array UAF in __uprobe_perf_func()
+
+From: Jann Horn <jannh@google.com>
+
+commit 7d0d673627e20cfa3b21a829a896ce03b58a4f1c upstream.
+
+Currently, the pointer stored in call->prog_array is loaded in
+__uprobe_perf_func(), with no RCU annotation and no immediately visible
+RCU protection, so it looks as if the loaded pointer can immediately be
+dangling.
+Later, bpf_prog_run_array_uprobe() starts a RCU-trace read-side critical
+section, but this is too late. It then uses rcu_dereference_check(), but
+this use of rcu_dereference_check() does not actually dereference anything.
+
+Fix it by aligning the semantics to bpf_prog_run_array(): Let the caller
+provide rcu_read_lock_trace() protection and then load call->prog_array
+with rcu_dereference_check().
+
+This issue seems to be theoretical: I don't know of any way to reach this
+code without having handle_swbp() further up the stack, which is already
+holding a rcu_read_lock_trace() lock, so where we take
+rcu_read_lock_trace() in __uprobe_perf_func()/bpf_prog_run_array_uprobe()
+doesn't actually have any effect.
+
+Fixes: 8c7dcb84e3b7 ("bpf: implement sleepable uprobes by chaining gps")
+Suggested-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20241210-bpf-fix-uprobe-uaf-v4-1-5fc8959b2b74@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf.h         |   13 +++++--------
+ kernel/trace/trace_uprobe.c |    6 +++++-
+ 2 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -1988,26 +1988,25 @@ bpf_prog_run_array(const struct bpf_prog
+  * rcu-protected dynamically sized maps.
+  */
+ static __always_inline u32
+-bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu,
++bpf_prog_run_array_uprobe(const struct bpf_prog_array *array,
+                         const void *ctx, bpf_prog_run_fn run_prog)
+ {
+       const struct bpf_prog_array_item *item;
+       const struct bpf_prog *prog;
+-      const struct bpf_prog_array *array;
+       struct bpf_run_ctx *old_run_ctx;
+       struct bpf_trace_run_ctx run_ctx;
+       u32 ret = 1;
+       might_fault();
++      RCU_LOCKDEP_WARN(!rcu_read_lock_trace_held(), "no rcu lock held");
++
++      if (unlikely(!array))
++              return ret;
+-      rcu_read_lock_trace();
+       migrate_disable();
+       run_ctx.is_uprobe = true;
+-      array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held());
+-      if (unlikely(!array))
+-              goto out;
+       old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+       item = &array->items[0];
+       while ((prog = READ_ONCE(item->prog))) {
+@@ -2022,9 +2021,7 @@ bpf_prog_run_array_uprobe(const struct b
+                       rcu_read_unlock();
+       }
+       bpf_reset_run_ctx(old_run_ctx);
+-out:
+       migrate_enable();
+-      rcu_read_unlock_trace();
+       return ret;
+ }
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -1383,9 +1383,13 @@ static void __uprobe_perf_func(struct tr
+ #ifdef CONFIG_BPF_EVENTS
+       if (bpf_prog_array_valid(call)) {
++              const struct bpf_prog_array *array;
+               u32 ret;
+-              ret = bpf_prog_run_array_uprobe(call->prog_array, regs, bpf_prog_run);
++              rcu_read_lock_trace();
++              array = rcu_dereference_check(call->prog_array, rcu_read_lock_trace_held());
++              ret = bpf_prog_run_array_uprobe(array, regs, bpf_prog_run);
++              rcu_read_unlock_trace();
+               if (!ret)
+                       return;
+       }
diff --git a/queue-6.6/bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch b/queue-6.6/bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch
new file mode 100644 (file)
index 0000000..3a94f76
--- /dev/null
@@ -0,0 +1,58 @@
+From 978c4486cca5c7b9253d3ab98a88c8e769cb9bbd Mon Sep 17 00:00:00 2001
+From: Jiri Olsa <jolsa@kernel.org>
+Date: Sun, 8 Dec 2024 15:25:07 +0100
+Subject: bpf,perf: Fix invalid prog_array access in perf_event_detach_bpf_prog
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+commit 978c4486cca5c7b9253d3ab98a88c8e769cb9bbd upstream.
+
+Syzbot reported [1] crash that happens for following tracing scenario:
+
+  - create tracepoint perf event with attr.inherit=1, attach it to the
+    process and set bpf program to it
+  - attached process forks -> chid creates inherited event
+
+    the new child event shares the parent's bpf program and tp_event
+    (hence prog_array) which is global for tracepoint
+
+  - exit both process and its child -> release both events
+  - first perf_event_detach_bpf_prog call will release tp_event->prog_array
+    and second perf_event_detach_bpf_prog will crash, because
+    tp_event->prog_array is NULL
+
+The fix makes sure the perf_event_detach_bpf_prog checks prog_array
+is valid before it tries to remove the bpf program from it.
+
+[1] https://lore.kernel.org/bpf/Z1MR6dCIKajNS6nU@krava/T/#m91dbf0688221ec7a7fc95e896a7ef9ff93b0b8ad
+
+Fixes: 0ee288e69d03 ("bpf,perf: Fix perf_event_detach_bpf_prog error handling")
+Reported-by: syzbot+2e0d2840414ce817aaac@syzkaller.appspotmail.com
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20241208142507.1207698-1-jolsa@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/bpf_trace.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -2216,6 +2216,9 @@ void perf_event_detach_bpf_prog(struct p
+               goto unlock;
+       old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
++      if (!old_array)
++              goto put;
++
+       ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
+       if (ret < 0) {
+               bpf_prog_array_delete_safe(old_array, event->prog);
+@@ -2224,6 +2227,7 @@ void perf_event_detach_bpf_prog(struct p
+               bpf_prog_array_free_sleepable(old_array);
+       }
++put:
+       /*
+        * It could be that the bpf_prog is not sleepable (and will be freed
+        * via normal RCU), but is called from a point that supports sleepable
diff --git a/queue-6.6/bpf-sockmap-fix-race-between-element-replace-and-close.patch b/queue-6.6/bpf-sockmap-fix-race-between-element-replace-and-close.patch
new file mode 100644 (file)
index 0000000..bee3879
--- /dev/null
@@ -0,0 +1,248 @@
+From ed1fc5d76b81a4d681211333c026202cad4d5649 Mon Sep 17 00:00:00 2001
+From: Michal Luczaj <mhal@rbox.co>
+Date: Mon, 2 Dec 2024 12:29:25 +0100
+Subject: bpf, sockmap: Fix race between element replace and close()
+
+From: Michal Luczaj <mhal@rbox.co>
+
+commit ed1fc5d76b81a4d681211333c026202cad4d5649 upstream.
+
+Element replace (with a socket different from the one stored) may race
+with socket's close() link popping & unlinking. __sock_map_delete()
+unconditionally unrefs the (wrong) element:
+
+// set map[0] = s0
+map_update_elem(map, 0, s0)
+
+// drop fd of s0
+close(s0)
+  sock_map_close()
+    lock_sock(sk)               (s0!)
+    sock_map_remove_links(sk)
+      link = sk_psock_link_pop()
+      sock_map_unlink(sk, link)
+        sock_map_delete_from_link
+                                        // replace map[0] with s1
+                                        map_update_elem(map, 0, s1)
+                                          sock_map_update_elem
+                                (s1!)       lock_sock(sk)
+                                            sock_map_update_common
+                                              psock = sk_psock(sk)
+                                              spin_lock(&stab->lock)
+                                              osk = stab->sks[idx]
+                                              sock_map_add_link(..., &stab->sks[idx])
+                                              sock_map_unref(osk, &stab->sks[idx])
+                                                psock = sk_psock(osk)
+                                                sk_psock_put(sk, psock)
+                                                  if (refcount_dec_and_test(&psock))
+                                                    sk_psock_drop(sk, psock)
+                                              spin_unlock(&stab->lock)
+                                            unlock_sock(sk)
+          __sock_map_delete
+            spin_lock(&stab->lock)
+            sk = *psk                        // s1 replaced s0; sk == s1
+            if (!sk_test || sk_test == sk)   // sk_test (s0) != sk (s1); no branch
+              sk = xchg(psk, NULL)
+            if (sk)
+              sock_map_unref(sk, psk)        // unref s1; sks[idx] will dangle
+                psock = sk_psock(sk)
+                sk_psock_put(sk, psock)
+                  if (refcount_dec_and_test())
+                    sk_psock_drop(sk, psock)
+            spin_unlock(&stab->lock)
+    release_sock(sk)
+
+Then close(map) enqueues bpf_map_free_deferred, which finally calls
+sock_map_free(). This results in some refcount_t warnings along with
+a KASAN splat [1].
+
+Fix __sock_map_delete(), do not allow sock_map_unref() on elements that
+may have been replaced.
+
+[1]:
+BUG: KASAN: slab-use-after-free in sock_map_free+0x10e/0x330
+Write of size 4 at addr ffff88811f5b9100 by task kworker/u64:12/1063
+
+CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Not tainted 6.12.0+ #125
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+Workqueue: events_unbound bpf_map_free_deferred
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x68/0x90
+ print_report+0x174/0x4f6
+ kasan_report+0xb9/0x190
+ kasan_check_range+0x10f/0x1e0
+ sock_map_free+0x10e/0x330
+ bpf_map_free_deferred+0x173/0x320
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+
+Allocated by task 1202:
+ kasan_save_stack+0x1e/0x40
+ kasan_save_track+0x10/0x30
+ __kasan_slab_alloc+0x85/0x90
+ kmem_cache_alloc_noprof+0x131/0x450
+ sk_prot_alloc+0x5b/0x220
+ sk_alloc+0x2c/0x870
+ unix_create1+0x88/0x8a0
+ unix_create+0xc5/0x180
+ __sock_create+0x241/0x650
+ __sys_socketpair+0x1ce/0x420
+ __x64_sys_socketpair+0x92/0x100
+ do_syscall_64+0x93/0x180
+ entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+Freed by task 46:
+ kasan_save_stack+0x1e/0x40
+ kasan_save_track+0x10/0x30
+ kasan_save_free_info+0x37/0x60
+ __kasan_slab_free+0x4b/0x70
+ kmem_cache_free+0x1a1/0x590
+ __sk_destruct+0x388/0x5a0
+ sk_psock_destroy+0x73e/0xa50
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+
+The buggy address belongs to the object at ffff88811f5b9080
+ which belongs to the cache UNIX-STREAM of size 1984
+The buggy address is located 128 bytes inside of
+ freed 1984-byte region [ffff88811f5b9080, ffff88811f5b9840)
+
+The buggy address belongs to the physical page:
+page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11f5b8
+head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
+memcg:ffff888127d49401
+flags: 0x17ffffc0000040(head|node=0|zone=2|lastcpupid=0x1fffff)
+page_type: f5(slab)
+raw: 0017ffffc0000040 ffff8881042e4500 dead000000000122 0000000000000000
+raw: 0000000000000000 00000000800f000f 00000001f5000000 ffff888127d49401
+head: 0017ffffc0000040 ffff8881042e4500 dead000000000122 0000000000000000
+head: 0000000000000000 00000000800f000f 00000001f5000000 ffff888127d49401
+head: 0017ffffc0000003 ffffea00047d6e01 ffffffffffffffff 0000000000000000
+head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff88811f5b9000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff88811f5b9080: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+                   ^
+ ffff88811f5b9180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff88811f5b9200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+Disabling lock debugging due to kernel taint
+
+refcount_t: addition on 0; use-after-free.
+WARNING: CPU: 14 PID: 1063 at lib/refcount.c:25 refcount_warn_saturate+0xce/0x150
+CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Tainted: G    B              6.12.0+ #125
+Tainted: [B]=BAD_PAGE
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+Workqueue: events_unbound bpf_map_free_deferred
+RIP: 0010:refcount_warn_saturate+0xce/0x150
+Code: 34 73 eb 03 01 e8 82 53 ad fe 0f 0b eb b1 80 3d 27 73 eb 03 00 75 a8 48 c7 c7 80 bd 95 84 c6 05 17 73 eb 03 01 e8 62 53 ad fe <0f> 0b eb 91 80 3d 06 73 eb 03 00 75 88 48 c7 c7 e0 bd 95 84 c6 05
+RSP: 0018:ffff88815c49fc70 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffff88811f5b9100 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001
+RBP: 0000000000000002 R08: 0000000000000001 R09: ffffed10bcde6349
+R10: ffff8885e6f31a4b R11: 0000000000000000 R12: ffff88813be0b000
+R13: ffff88811f5b9100 R14: ffff88811f5b9080 R15: ffff88813be0b024
+FS:  0000000000000000(0000) GS:ffff8885e6f00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000055dda99b0250 CR3: 000000015dbac000 CR4: 0000000000752ef0
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ? __warn.cold+0x5f/0x1ff
+ ? refcount_warn_saturate+0xce/0x150
+ ? report_bug+0x1ec/0x390
+ ? handle_bug+0x58/0x90
+ ? exc_invalid_op+0x13/0x40
+ ? asm_exc_invalid_op+0x16/0x20
+ ? refcount_warn_saturate+0xce/0x150
+ sock_map_free+0x2e5/0x330
+ bpf_map_free_deferred+0x173/0x320
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+irq event stamp: 10741
+hardirqs last  enabled at (10741): [<ffffffff84400ec6>] asm_sysvec_apic_timer_interrupt+0x16/0x20
+hardirqs last disabled at (10740): [<ffffffff811e532d>] handle_softirqs+0x60d/0x770
+softirqs last  enabled at (10506): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+softirqs last disabled at (10301): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+
+refcount_t: underflow; use-after-free.
+WARNING: CPU: 14 PID: 1063 at lib/refcount.c:28 refcount_warn_saturate+0xee/0x150
+CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Tainted: G    B   W          6.12.0+ #125
+Tainted: [B]=BAD_PAGE, [W]=WARN
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+Workqueue: events_unbound bpf_map_free_deferred
+RIP: 0010:refcount_warn_saturate+0xee/0x150
+Code: 17 73 eb 03 01 e8 62 53 ad fe 0f 0b eb 91 80 3d 06 73 eb 03 00 75 88 48 c7 c7 e0 bd 95 84 c6 05 f6 72 eb 03 01 e8 42 53 ad fe <0f> 0b e9 6e ff ff ff 80 3d e6 72 eb 03 00 0f 85 61 ff ff ff 48 c7
+RSP: 0018:ffff88815c49fc70 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffff88811f5b9100 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001
+RBP: 0000000000000003 R08: 0000000000000001 R09: ffffed10bcde6349
+R10: ffff8885e6f31a4b R11: 0000000000000000 R12: ffff88813be0b000
+R13: ffff88811f5b9100 R14: ffff88811f5b9080 R15: ffff88813be0b024
+FS:  0000000000000000(0000) GS:ffff8885e6f00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000055dda99b0250 CR3: 000000015dbac000 CR4: 0000000000752ef0
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ? __warn.cold+0x5f/0x1ff
+ ? refcount_warn_saturate+0xee/0x150
+ ? report_bug+0x1ec/0x390
+ ? handle_bug+0x58/0x90
+ ? exc_invalid_op+0x13/0x40
+ ? asm_exc_invalid_op+0x16/0x20
+ ? refcount_warn_saturate+0xee/0x150
+ sock_map_free+0x2d3/0x330
+ bpf_map_free_deferred+0x173/0x320
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+irq event stamp: 10741
+hardirqs last  enabled at (10741): [<ffffffff84400ec6>] asm_sysvec_apic_timer_interrupt+0x16/0x20
+hardirqs last disabled at (10740): [<ffffffff811e532d>] handle_softirqs+0x60d/0x770
+softirqs last  enabled at (10506): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+softirqs last disabled at (10301): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241202-sockmap-replace-v1-3-1e88579e7bd5@rbox.co
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_map.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -408,12 +408,11 @@ static void *sock_map_lookup_sys(struct
+ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
+                            struct sock **psk)
+ {
+-      struct sock *sk;
++      struct sock *sk = NULL;
+       int err = 0;
+       spin_lock_bh(&stab->lock);
+-      sk = *psk;
+-      if (!sk_test || sk_test == sk)
++      if (!sk_test || sk_test == *psk)
+               sk = xchg(psk, NULL);
+       if (likely(sk))
diff --git a/queue-6.6/bpf-sockmap-fix-update-element-with-same.patch b/queue-6.6/bpf-sockmap-fix-update-element-with-same.patch
new file mode 100644 (file)
index 0000000..0e180b6
--- /dev/null
@@ -0,0 +1,57 @@
+From 75e072a390da9a22e7ae4a4e8434dfca5da499fb Mon Sep 17 00:00:00 2001
+From: Michal Luczaj <mhal@rbox.co>
+Date: Mon, 2 Dec 2024 12:29:23 +0100
+Subject: bpf, sockmap: Fix update element with same
+
+From: Michal Luczaj <mhal@rbox.co>
+
+commit 75e072a390da9a22e7ae4a4e8434dfca5da499fb upstream.
+
+Consider a sockmap entry being updated with the same socket:
+
+       osk = stab->sks[idx];
+       sock_map_add_link(psock, link, map, &stab->sks[idx]);
+       stab->sks[idx] = sk;
+       if (osk)
+               sock_map_unref(osk, &stab->sks[idx]);
+
+Due to sock_map_unref(), which invokes sock_map_del_link(), all the
+psock's links for stab->sks[idx] are torn:
+
+       list_for_each_entry_safe(link, tmp, &psock->link, list) {
+               if (link->link_raw == link_raw) {
+                       ...
+                       list_del(&link->list);
+                       sk_psock_free_link(link);
+               }
+       }
+
+And that includes the new link sock_map_add_link() added just before
+the unref.
+
+This results in a sockmap holding a socket, but without the respective
+link. This in turn means that close(sock) won't trigger the cleanup,
+i.e. a closed socket will not be automatically removed from the sockmap.
+
+Stop tearing the links when a matching link_raw is found.
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241202-sockmap-replace-v1-1-1e88579e7bd5@rbox.co
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_map.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -156,6 +156,7 @@ static void sock_map_del_link(struct soc
+                               verdict_stop = true;
+                       list_del(&link->list);
+                       sk_psock_free_link(link);
++                      break;
+               }
+       }
+       spin_unlock_bh(&psock->link_lock);
index 02a09c848adc02fa1d979a0400c7b4d324709411..8ae1ffc6056a90aec6d36947605ea1d631836403 100644 (file)
@@ -26,3 +26,8 @@ xfs-don-t-drop-errno-values-when-we-fail-to-ficlone-the-entire-range.patch
 xfs-return-from-xfs_symlink_verify-early-on-v4-filesystems.patch
 xfs-fix-scrub-tracepoints-when-inode-rooted-btrees-are-involved.patch
 xfs-only-run-precommits-once-per-transaction-object.patch
+bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch
+bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch
+bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch
+bpf-sockmap-fix-race-between-element-replace-and-close.patch
+bpf-sockmap-fix-update-element-with-same.patch