--- /dev/null
+From 838a10bd2ebfe11a60dd67687533a7cfc220cc86 Mon Sep 17 00:00:00 2001
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Date: Fri, 13 Dec 2024 14:19:28 -0800
+Subject: bpf: Augment raw_tp arguments with PTR_MAYBE_NULL
+
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+
+commit 838a10bd2ebfe11a60dd67687533a7cfc220cc86 upstream.
+
+Arguments to a raw tracepoint are tagged as trusted, which carries the
+semantics that the pointer will be non-NULL. However, in certain cases,
+a raw tracepoint argument may end up being NULL. More context about this
+issue is available in [0].
+
+Thus, there is a discrepancy between the reality, that raw_tp arguments can
+actually be NULL, and the verifier's knowledge, that they are never NULL,
+causing explicit NULL check branch to be dead code eliminated.
+
+A previous attempt [1], i.e. the second fixed commit, was made to
+simulate symbolic execution as if in most accesses, the argument is a
+non-NULL raw_tp, except for conditional jumps. This tried to suppress
+branch prediction while preserving compatibility, but surfaced issues
+with production programs that were difficult to solve without increasing
+verifier complexity. A more complete discussion of issues and fixes is
+available at [2].
+
+Fix this by maintaining an explicit list of tracepoints where the
+arguments are known to be NULL, and mark the positional arguments as
+PTR_MAYBE_NULL. Additionally, capture the tracepoints where arguments
+are known to be ERR_PTR, and mark these arguments as scalar values to
+prevent potential dereference.
+
+Each hex digit is used to encode NULL-ness (0x1) or ERR_PTR-ness (0x2),
+shifted by the zero-indexed argument number x 4. This can be represented
+as follows:
+1st arg: 0x1
+2nd arg: 0x10
+3rd arg: 0x100
+... and so on (likewise for ERR_PTR case).
+
+In the future, an automated pass will be used to produce such a list, or
+insert __nullable annotations automatically for tracepoints. Each
+compilation unit will be analyzed and results will be collated to find
+whether a tracepoint pointer is definitely not null, maybe null, or an
+unknown state where verifier conservatively marks it PTR_MAYBE_NULL.
+A proof of concept of this tool from Eduard is available at [3].
+
+Note that in case we don't find a specification in the raw_tp_null_args
+array and the tracepoint belongs to a kernel module, we will
+conservatively mark the arguments as PTR_MAYBE_NULL. This is because
+unlike for in-tree modules, out-of-tree module tracepoints may pass NULL
+freely to the tracepoint. We don't protect against such tracepoints
+passing ERR_PTR (which is uncommon anyway), lest we mark all such
+arguments as SCALAR_VALUE.
+
+While we are it, let's adjust the test raw_tp_null to not perform
+dereference of the skb->mark, as that won't be allowed anymore, and make
+it more robust by using inline assembly to test the dead code
+elimination behavior, which should still stay the same.
+
+ [0]: https://lore.kernel.org/bpf/ZrCZS6nisraEqehw@jlelli-thinkpadt14gen4.remote.csb
+ [1]: https://lore.kernel.org/all/20241104171959.2938862-1-memxor@gmail.com
+ [2]: https://lore.kernel.org/bpf/20241206161053.809580-1-memxor@gmail.com
+ [3]: https://github.com/eddyz87/llvm-project/tree/nullness-for-tracepoint-params
+
+Reported-by: Juri Lelli <juri.lelli@redhat.com> # original bug
+Reported-by: Manu Bretelle <chantra@meta.com> # bugs in masking fix
+Fixes: 3f00c5239344 ("bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs")
+Fixes: cb4158ce8ec8 ("bpf: Mark raw_tp arguments with PTR_MAYBE_NULL")
+Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
+Co-developed-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/r/20241213221929.3495062-3-memxor@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/btf.c | 138 ++++++++++++++++++
+ .../testing/selftests/bpf/progs/raw_tp_null.c | 19 ++-
+ 2 files changed, 147 insertions(+), 10 deletions(-)
+
+diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
+index c4aa304028ce..e5a5f023cedd 100644
+--- a/kernel/bpf/btf.c
++++ b/kernel/bpf/btf.c
+@@ -6439,6 +6439,101 @@ int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto,
+ return off;
+ }
+
++struct bpf_raw_tp_null_args {
++ const char *func;
++ u64 mask;
++};
++
++static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
++ /* sched */
++ { "sched_pi_setprio", 0x10 },
++ /* ... from sched_numa_pair_template event class */
++ { "sched_stick_numa", 0x100 },
++ { "sched_swap_numa", 0x100 },
++ /* afs */
++ { "afs_make_fs_call", 0x10 },
++ { "afs_make_fs_calli", 0x10 },
++ { "afs_make_fs_call1", 0x10 },
++ { "afs_make_fs_call2", 0x10 },
++ { "afs_protocol_error", 0x1 },
++ { "afs_flock_ev", 0x10 },
++ /* cachefiles */
++ { "cachefiles_lookup", 0x1 | 0x200 },
++ { "cachefiles_unlink", 0x1 },
++ { "cachefiles_rename", 0x1 },
++ { "cachefiles_prep_read", 0x1 },
++ { "cachefiles_mark_active", 0x1 },
++ { "cachefiles_mark_failed", 0x1 },
++ { "cachefiles_mark_inactive", 0x1 },
++ { "cachefiles_vfs_error", 0x1 },
++ { "cachefiles_io_error", 0x1 },
++ { "cachefiles_ondemand_open", 0x1 },
++ { "cachefiles_ondemand_copen", 0x1 },
++ { "cachefiles_ondemand_close", 0x1 },
++ { "cachefiles_ondemand_read", 0x1 },
++ { "cachefiles_ondemand_cread", 0x1 },
++ { "cachefiles_ondemand_fd_write", 0x1 },
++ { "cachefiles_ondemand_fd_release", 0x1 },
++ /* ext4, from ext4__mballoc event class */
++ { "ext4_mballoc_discard", 0x10 },
++ { "ext4_mballoc_free", 0x10 },
++ /* fib */
++ { "fib_table_lookup", 0x100 },
++ /* filelock */
++ /* ... from filelock_lock event class */
++ { "posix_lock_inode", 0x10 },
++ { "fcntl_setlk", 0x10 },
++ { "locks_remove_posix", 0x10 },
++ { "flock_lock_inode", 0x10 },
++ /* ... from filelock_lease event class */
++ { "break_lease_noblock", 0x10 },
++ { "break_lease_block", 0x10 },
++ { "break_lease_unblock", 0x10 },
++ { "generic_delete_lease", 0x10 },
++ { "time_out_leases", 0x10 },
++ /* host1x */
++ { "host1x_cdma_push_gather", 0x10000 },
++ /* huge_memory */
++ { "mm_khugepaged_scan_pmd", 0x10 },
++ { "mm_collapse_huge_page_isolate", 0x1 },
++ { "mm_khugepaged_scan_file", 0x10 },
++ { "mm_khugepaged_collapse_file", 0x10 },
++ /* kmem */
++ { "mm_page_alloc", 0x1 },
++ { "mm_page_pcpu_drain", 0x1 },
++ /* .. from mm_page event class */
++ { "mm_page_alloc_zone_locked", 0x1 },
++ /* netfs */
++ { "netfs_failure", 0x10 },
++ /* power */
++ { "device_pm_callback_start", 0x10 },
++ /* qdisc */
++ { "qdisc_dequeue", 0x1000 },
++ /* rxrpc */
++ { "rxrpc_recvdata", 0x1 },
++ { "rxrpc_resend", 0x10 },
++ /* sunrpc */
++ { "xs_stream_read_data", 0x1 },
++ /* ... from xprt_cong_event event class */
++ { "xprt_reserve_cong", 0x10 },
++ { "xprt_release_cong", 0x10 },
++ { "xprt_get_cong", 0x10 },
++ { "xprt_put_cong", 0x10 },
++ /* tcp */
++ { "tcp_send_reset", 0x11 },
++ /* tegra_apb_dma */
++ { "tegra_dma_tx_status", 0x100 },
++ /* timer_migration */
++ { "tmigr_update_events", 0x1 },
++ /* writeback, from writeback_folio_template event class */
++ { "writeback_dirty_folio", 0x10 },
++ { "folio_wait_writeback", 0x10 },
++ /* rdma */
++ { "mr_integ_alloc", 0x2000 },
++ /* bpf_testmod */
++ { "bpf_testmod_test_read", 0x0 },
++};
++
+ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+@@ -6449,6 +6544,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+ const char *tname = prog->aux->attach_func_name;
+ struct bpf_verifier_log *log = info->log;
+ const struct btf_param *args;
++ bool ptr_err_raw_tp = false;
+ const char *tag_value;
+ u32 nr_args, arg;
+ int i, ret;
+@@ -6597,6 +6693,39 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+ if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
+ info->reg_type |= PTR_MAYBE_NULL;
+
++ if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
++ struct btf *btf = prog->aux->attach_btf;
++ const struct btf_type *t;
++ const char *tname;
++
++ /* BTF lookups cannot fail, return false on error */
++ t = btf_type_by_id(btf, prog->aux->attach_btf_id);
++ if (!t)
++ return false;
++ tname = btf_name_by_offset(btf, t->name_off);
++ if (!tname)
++ return false;
++ /* Checked by bpf_check_attach_target */
++ tname += sizeof("btf_trace_") - 1;
++ for (i = 0; i < ARRAY_SIZE(raw_tp_null_args); i++) {
++ /* Is this a func with potential NULL args? */
++ if (strcmp(tname, raw_tp_null_args[i].func))
++ continue;
++ if (raw_tp_null_args[i].mask & (0x1 << (arg * 4)))
++ info->reg_type |= PTR_MAYBE_NULL;
++ /* Is the current arg IS_ERR? */
++ if (raw_tp_null_args[i].mask & (0x2 << (arg * 4)))
++ ptr_err_raw_tp = true;
++ break;
++ }
++ /* If we don't know NULL-ness specification and the tracepoint
++ * is coming from a loadable module, be conservative and mark
++ * argument as PTR_MAYBE_NULL.
++ */
++ if (i == ARRAY_SIZE(raw_tp_null_args) && btf_is_module(btf))
++ info->reg_type |= PTR_MAYBE_NULL;
++ }
++
+ if (tgt_prog) {
+ enum bpf_prog_type tgt_type;
+
+@@ -6641,6 +6770,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+ bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
+ tname, arg, info->btf_id, btf_type_str(t),
+ __btf_name_by_offset(btf, t->name_off));
++
++ /* Perform all checks on the validity of type for this argument, but if
++ * we know it can be IS_ERR at runtime, scrub pointer type and mark as
++ * scalar.
++ */
++ if (ptr_err_raw_tp) {
++ bpf_log(log, "marking pointer arg%d as scalar as it may encode error", arg);
++ info->reg_type = SCALAR_VALUE;
++ }
+ return true;
+ }
+ EXPORT_SYMBOL_GPL(btf_ctx_access);
+diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c
+index 457f34c151e3..5927054b6dd9 100644
+--- a/tools/testing/selftests/bpf/progs/raw_tp_null.c
++++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c
+@@ -3,6 +3,7 @@
+
+ #include <vmlinux.h>
+ #include <bpf/bpf_tracing.h>
++#include "bpf_misc.h"
+
+ char _license[] SEC("license") = "GPL";
+
+@@ -17,16 +18,14 @@ int BPF_PROG(test_raw_tp_null, struct sk_buff *skb)
+ if (task->pid != tid)
+ return 0;
+
+- i = i + skb->mark + 1;
+- /* The compiler may move the NULL check before this deref, which causes
+- * the load to fail as deref of scalar. Prevent that by using a barrier.
++ /* If dead code elimination kicks in, the increment +=2 will be
++ * removed. For raw_tp programs attaching to tracepoints in kernel
++ * modules, we mark input arguments as PTR_MAYBE_NULL, so branch
++ * prediction should never kick in.
+ */
+- barrier();
+- /* If dead code elimination kicks in, the increment below will
+- * be removed. For raw_tp programs, we mark input arguments as
+- * PTR_MAYBE_NULL, so branch prediction should never kick in.
+- */
+- if (!skb)
+- i += 2;
++ asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;"
++ : [i]"+r"(i)
++ : [ctx]"r"(skb)
++ : "memory");
+ return 0;
+ }
+--
+2.47.1
+
--- /dev/null
+From 659b9ba7cb2d7adb64618b87ddfaa528a143766e Mon Sep 17 00:00:00 2001
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Date: Thu, 12 Dec 2024 01:20:49 -0800
+Subject: bpf: Check size for BTF-based ctx access of pointer members
+
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+
+commit 659b9ba7cb2d7adb64618b87ddfaa528a143766e upstream.
+
+Robert Morris reported the following program type which passes the
+verifier in [0]:
+
+SEC("struct_ops/bpf_cubic_init")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
+{
+ asm volatile("r2 = *(u16*)(r1 + 0)"); // verifier should demand u64
+ asm volatile("*(u32 *)(r2 +1504) = 0"); // 1280 in some configs
+}
+
+The second line may or may not work, but the first instruction shouldn't
+pass, as it's a narrow load into the context structure of the struct ops
+callback. The code falls back to btf_ctx_access to ensure correctness
+and obtaining the types of pointers. Ensure that the size of the access
+is correctly checked to be 8 bytes, otherwise the verifier thinks the
+narrow load obtained a trusted BTF pointer and will permit loads/stores
+as it sees fit.
+
+Perform the check on size after we've verified that the load is for a
+pointer field, as for scalar values narrow loads are fine. Access to
+structs passed as arguments to a BPF program are also treated as
+scalars, therefore no adjustment is needed in their case.
+
+Existing verifier selftests are broken by this change, but because they
+were incorrect. Verifier tests for d_path were performing narrow load
+into context to obtain path pointer, had this program actually run it
+would cause a crash. The same holds for verifier_btf_ctx_access tests.
+
+ [0]: https://lore.kernel.org/bpf/51338.1732985814@localhost
+
+Fixes: 9e15db66136a ("bpf: Implement accurate raw_tp context access via BTF")
+Reported-by: Robert Morris <rtm@mit.edu>
+Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/r/20241212092050.3204165-2-memxor@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/btf.c | 6 ++++++
+ tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c | 4 ++--
+ tools/testing/selftests/bpf/progs/verifier_d_path.c | 4 ++--
+ 3 files changed, 10 insertions(+), 4 deletions(-)
+
+--- a/kernel/bpf/btf.c
++++ b/kernel/bpf/btf.c
+@@ -6519,6 +6519,12 @@ bool btf_ctx_access(int off, int size, e
+ return false;
+ }
+
++ if (size != sizeof(u64)) {
++ bpf_log(log, "func '%s' size %d must be 8\n",
++ tname, size);
++ return false;
++ }
++
+ /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
+ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
+ const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
+--- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
++++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
+@@ -11,7 +11,7 @@ __success __retval(0)
+ __naked void btf_ctx_access_accept(void)
+ {
+ asm volatile (" \
+- r2 = *(u32*)(r1 + 8); /* load 2nd argument value (int pointer) */\
++ r2 = *(u64 *)(r1 + 8); /* load 2nd argument value (int pointer) */\
+ r0 = 0; \
+ exit; \
+ " ::: __clobber_all);
+@@ -23,7 +23,7 @@ __success __retval(0)
+ __naked void ctx_access_u32_pointer_accept(void)
+ {
+ asm volatile (" \
+- r2 = *(u32*)(r1 + 0); /* load 1nd argument value (u32 pointer) */\
++ r2 = *(u64 *)(r1 + 0); /* load 1nd argument value (u32 pointer) */\
+ r0 = 0; \
+ exit; \
+ " ::: __clobber_all);
+--- a/tools/testing/selftests/bpf/progs/verifier_d_path.c
++++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c
+@@ -11,7 +11,7 @@ __success __retval(0)
+ __naked void d_path_accept(void)
+ {
+ asm volatile (" \
+- r1 = *(u32*)(r1 + 0); \
++ r1 = *(u64 *)(r1 + 0); \
+ r2 = r10; \
+ r2 += -8; \
+ r6 = 0; \
+@@ -31,7 +31,7 @@ __failure __msg("helper call is not allo
+ __naked void d_path_reject(void)
+ {
+ asm volatile (" \
+- r1 = *(u32*)(r1 + 0); \
++ r1 = *(u64 *)(r1 + 0); \
+ r2 = r10; \
+ r2 += -8; \
+ r6 = 0; \
--- /dev/null
+From 7d0d673627e20cfa3b21a829a896ce03b58a4f1c Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Tue, 10 Dec 2024 20:08:14 +0100
+Subject: bpf: Fix theoretical prog_array UAF in __uprobe_perf_func()
+
+From: Jann Horn <jannh@google.com>
+
+commit 7d0d673627e20cfa3b21a829a896ce03b58a4f1c upstream.
+
+Currently, the pointer stored in call->prog_array is loaded in
+__uprobe_perf_func(), with no RCU annotation and no immediately visible
+RCU protection, so it looks as if the loaded pointer can immediately be
+dangling.
+Later, bpf_prog_run_array_uprobe() starts a RCU-trace read-side critical
+section, but this is too late. It then uses rcu_dereference_check(), but
+this use of rcu_dereference_check() does not actually dereference anything.
+
+Fix it by aligning the semantics to bpf_prog_run_array(): Let the caller
+provide rcu_read_lock_trace() protection and then load call->prog_array
+with rcu_dereference_check().
+
+This issue seems to be theoretical: I don't know of any way to reach this
+code without having handle_swbp() further up the stack, which is already
+holding a rcu_read_lock_trace() lock, so where we take
+rcu_read_lock_trace() in __uprobe_perf_func()/bpf_prog_run_array_uprobe()
+doesn't actually have any effect.
+
+Fixes: 8c7dcb84e3b7 ("bpf: implement sleepable uprobes by chaining gps")
+Suggested-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20241210-bpf-fix-uprobe-uaf-v4-1-5fc8959b2b74@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf.h | 13 +++++--------
+ kernel/trace/trace_uprobe.c | 6 +++++-
+ 2 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -2157,26 +2157,25 @@ bpf_prog_run_array(const struct bpf_prog
+ * rcu-protected dynamically sized maps.
+ */
+ static __always_inline u32
+-bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu,
++bpf_prog_run_array_uprobe(const struct bpf_prog_array *array,
+ const void *ctx, bpf_prog_run_fn run_prog)
+ {
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+- const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_trace_run_ctx run_ctx;
+ u32 ret = 1;
+
+ might_fault();
++ RCU_LOCKDEP_WARN(!rcu_read_lock_trace_held(), "no rcu lock held");
++
++ if (unlikely(!array))
++ return ret;
+
+- rcu_read_lock_trace();
+ migrate_disable();
+
+ run_ctx.is_uprobe = true;
+
+- array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held());
+- if (unlikely(!array))
+- goto out;
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ item = &array->items[0];
+ while ((prog = READ_ONCE(item->prog))) {
+@@ -2191,9 +2190,7 @@ bpf_prog_run_array_uprobe(const struct b
+ rcu_read_unlock();
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+-out:
+ migrate_enable();
+- rcu_read_unlock_trace();
+ return ret;
+ }
+
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -1400,9 +1400,13 @@ static void __uprobe_perf_func(struct tr
+
+ #ifdef CONFIG_BPF_EVENTS
+ if (bpf_prog_array_valid(call)) {
++ const struct bpf_prog_array *array;
+ u32 ret;
+
+- ret = bpf_prog_run_array_uprobe(call->prog_array, regs, bpf_prog_run);
++ rcu_read_lock_trace();
++ array = rcu_dereference_check(call->prog_array, rcu_read_lock_trace_held());
++ ret = bpf_prog_run_array_uprobe(array, regs, bpf_prog_run);
++ rcu_read_unlock_trace();
+ if (!ret)
+ return;
+ }
--- /dev/null
+From 978c4486cca5c7b9253d3ab98a88c8e769cb9bbd Mon Sep 17 00:00:00 2001
+From: Jiri Olsa <jolsa@kernel.org>
+Date: Sun, 8 Dec 2024 15:25:07 +0100
+Subject: bpf,perf: Fix invalid prog_array access in perf_event_detach_bpf_prog
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+commit 978c4486cca5c7b9253d3ab98a88c8e769cb9bbd upstream.
+
+Syzbot reported [1] crash that happens for following tracing scenario:
+
+ - create tracepoint perf event with attr.inherit=1, attach it to the
+ process and set bpf program to it
+ - attached process forks -> chid creates inherited event
+
+ the new child event shares the parent's bpf program and tp_event
+ (hence prog_array) which is global for tracepoint
+
+ - exit both process and its child -> release both events
+ - first perf_event_detach_bpf_prog call will release tp_event->prog_array
+ and second perf_event_detach_bpf_prog will crash, because
+ tp_event->prog_array is NULL
+
+The fix makes sure the perf_event_detach_bpf_prog checks prog_array
+is valid before it tries to remove the bpf program from it.
+
+[1] https://lore.kernel.org/bpf/Z1MR6dCIKajNS6nU@krava/T/#m91dbf0688221ec7a7fc95e896a7ef9ff93b0b8ad
+
+Fixes: 0ee288e69d03 ("bpf,perf: Fix perf_event_detach_bpf_prog error handling")
+Reported-by: syzbot+2e0d2840414ce817aaac@syzkaller.appspotmail.com
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20241208142507.1207698-1-jolsa@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/bpf_trace.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -2215,6 +2215,9 @@ void perf_event_detach_bpf_prog(struct p
+ goto unlock;
+
+ old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
++ if (!old_array)
++ goto put;
++
+ ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
+ if (ret < 0) {
+ bpf_prog_array_delete_safe(old_array, event->prog);
+@@ -2223,6 +2226,7 @@ void perf_event_detach_bpf_prog(struct p
+ bpf_prog_array_free_sleepable(old_array);
+ }
+
++put:
+ /*
+ * It could be that the bpf_prog is not sleepable (and will be freed
+ * via normal RCU), but is called from a point that supports sleepable
--- /dev/null
+From ed1fc5d76b81a4d681211333c026202cad4d5649 Mon Sep 17 00:00:00 2001
+From: Michal Luczaj <mhal@rbox.co>
+Date: Mon, 2 Dec 2024 12:29:25 +0100
+Subject: bpf, sockmap: Fix race between element replace and close()
+
+From: Michal Luczaj <mhal@rbox.co>
+
+commit ed1fc5d76b81a4d681211333c026202cad4d5649 upstream.
+
+Element replace (with a socket different from the one stored) may race
+with socket's close() link popping & unlinking. __sock_map_delete()
+unconditionally unrefs the (wrong) element:
+
+// set map[0] = s0
+map_update_elem(map, 0, s0)
+
+// drop fd of s0
+close(s0)
+ sock_map_close()
+ lock_sock(sk) (s0!)
+ sock_map_remove_links(sk)
+ link = sk_psock_link_pop()
+ sock_map_unlink(sk, link)
+ sock_map_delete_from_link
+ // replace map[0] with s1
+ map_update_elem(map, 0, s1)
+ sock_map_update_elem
+ (s1!) lock_sock(sk)
+ sock_map_update_common
+ psock = sk_psock(sk)
+ spin_lock(&stab->lock)
+ osk = stab->sks[idx]
+ sock_map_add_link(..., &stab->sks[idx])
+ sock_map_unref(osk, &stab->sks[idx])
+ psock = sk_psock(osk)
+ sk_psock_put(sk, psock)
+ if (refcount_dec_and_test(&psock))
+ sk_psock_drop(sk, psock)
+ spin_unlock(&stab->lock)
+ unlock_sock(sk)
+ __sock_map_delete
+ spin_lock(&stab->lock)
+ sk = *psk // s1 replaced s0; sk == s1
+ if (!sk_test || sk_test == sk) // sk_test (s0) != sk (s1); no branch
+ sk = xchg(psk, NULL)
+ if (sk)
+ sock_map_unref(sk, psk) // unref s1; sks[idx] will dangle
+ psock = sk_psock(sk)
+ sk_psock_put(sk, psock)
+ if (refcount_dec_and_test())
+ sk_psock_drop(sk, psock)
+ spin_unlock(&stab->lock)
+ release_sock(sk)
+
+Then close(map) enqueues bpf_map_free_deferred, which finally calls
+sock_map_free(). This results in some refcount_t warnings along with
+a KASAN splat [1].
+
+Fix __sock_map_delete(), do not allow sock_map_unref() on elements that
+may have been replaced.
+
+[1]:
+BUG: KASAN: slab-use-after-free in sock_map_free+0x10e/0x330
+Write of size 4 at addr ffff88811f5b9100 by task kworker/u64:12/1063
+
+CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Not tainted 6.12.0+ #125
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+Workqueue: events_unbound bpf_map_free_deferred
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x68/0x90
+ print_report+0x174/0x4f6
+ kasan_report+0xb9/0x190
+ kasan_check_range+0x10f/0x1e0
+ sock_map_free+0x10e/0x330
+ bpf_map_free_deferred+0x173/0x320
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+
+Allocated by task 1202:
+ kasan_save_stack+0x1e/0x40
+ kasan_save_track+0x10/0x30
+ __kasan_slab_alloc+0x85/0x90
+ kmem_cache_alloc_noprof+0x131/0x450
+ sk_prot_alloc+0x5b/0x220
+ sk_alloc+0x2c/0x870
+ unix_create1+0x88/0x8a0
+ unix_create+0xc5/0x180
+ __sock_create+0x241/0x650
+ __sys_socketpair+0x1ce/0x420
+ __x64_sys_socketpair+0x92/0x100
+ do_syscall_64+0x93/0x180
+ entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+Freed by task 46:
+ kasan_save_stack+0x1e/0x40
+ kasan_save_track+0x10/0x30
+ kasan_save_free_info+0x37/0x60
+ __kasan_slab_free+0x4b/0x70
+ kmem_cache_free+0x1a1/0x590
+ __sk_destruct+0x388/0x5a0
+ sk_psock_destroy+0x73e/0xa50
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+
+The buggy address belongs to the object at ffff88811f5b9080
+ which belongs to the cache UNIX-STREAM of size 1984
+The buggy address is located 128 bytes inside of
+ freed 1984-byte region [ffff88811f5b9080, ffff88811f5b9840)
+
+The buggy address belongs to the physical page:
+page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11f5b8
+head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
+memcg:ffff888127d49401
+flags: 0x17ffffc0000040(head|node=0|zone=2|lastcpupid=0x1fffff)
+page_type: f5(slab)
+raw: 0017ffffc0000040 ffff8881042e4500 dead000000000122 0000000000000000
+raw: 0000000000000000 00000000800f000f 00000001f5000000 ffff888127d49401
+head: 0017ffffc0000040 ffff8881042e4500 dead000000000122 0000000000000000
+head: 0000000000000000 00000000800f000f 00000001f5000000 ffff888127d49401
+head: 0017ffffc0000003 ffffea00047d6e01 ffffffffffffffff 0000000000000000
+head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff88811f5b9000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff88811f5b9080: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ^
+ ffff88811f5b9180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff88811f5b9200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+Disabling lock debugging due to kernel taint
+
+refcount_t: addition on 0; use-after-free.
+WARNING: CPU: 14 PID: 1063 at lib/refcount.c:25 refcount_warn_saturate+0xce/0x150
+CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Tainted: G B 6.12.0+ #125
+Tainted: [B]=BAD_PAGE
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+Workqueue: events_unbound bpf_map_free_deferred
+RIP: 0010:refcount_warn_saturate+0xce/0x150
+Code: 34 73 eb 03 01 e8 82 53 ad fe 0f 0b eb b1 80 3d 27 73 eb 03 00 75 a8 48 c7 c7 80 bd 95 84 c6 05 17 73 eb 03 01 e8 62 53 ad fe <0f> 0b eb 91 80 3d 06 73 eb 03 00 75 88 48 c7 c7 e0 bd 95 84 c6 05
+RSP: 0018:ffff88815c49fc70 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffff88811f5b9100 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001
+RBP: 0000000000000002 R08: 0000000000000001 R09: ffffed10bcde6349
+R10: ffff8885e6f31a4b R11: 0000000000000000 R12: ffff88813be0b000
+R13: ffff88811f5b9100 R14: ffff88811f5b9080 R15: ffff88813be0b024
+FS: 0000000000000000(0000) GS:ffff8885e6f00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000055dda99b0250 CR3: 000000015dbac000 CR4: 0000000000752ef0
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ? __warn.cold+0x5f/0x1ff
+ ? refcount_warn_saturate+0xce/0x150
+ ? report_bug+0x1ec/0x390
+ ? handle_bug+0x58/0x90
+ ? exc_invalid_op+0x13/0x40
+ ? asm_exc_invalid_op+0x16/0x20
+ ? refcount_warn_saturate+0xce/0x150
+ sock_map_free+0x2e5/0x330
+ bpf_map_free_deferred+0x173/0x320
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+irq event stamp: 10741
+hardirqs last enabled at (10741): [<ffffffff84400ec6>] asm_sysvec_apic_timer_interrupt+0x16/0x20
+hardirqs last disabled at (10740): [<ffffffff811e532d>] handle_softirqs+0x60d/0x770
+softirqs last enabled at (10506): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+softirqs last disabled at (10301): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+
+refcount_t: underflow; use-after-free.
+WARNING: CPU: 14 PID: 1063 at lib/refcount.c:28 refcount_warn_saturate+0xee/0x150
+CPU: 14 UID: 0 PID: 1063 Comm: kworker/u64:12 Tainted: G B W 6.12.0+ #125
+Tainted: [B]=BAD_PAGE, [W]=WARN
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+Workqueue: events_unbound bpf_map_free_deferred
+RIP: 0010:refcount_warn_saturate+0xee/0x150
+Code: 17 73 eb 03 01 e8 62 53 ad fe 0f 0b eb 91 80 3d 06 73 eb 03 00 75 88 48 c7 c7 e0 bd 95 84 c6 05 f6 72 eb 03 01 e8 42 53 ad fe <0f> 0b e9 6e ff ff ff 80 3d e6 72 eb 03 00 0f 85 61 ff ff ff 48 c7
+RSP: 0018:ffff88815c49fc70 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffff88811f5b9100 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001
+RBP: 0000000000000003 R08: 0000000000000001 R09: ffffed10bcde6349
+R10: ffff8885e6f31a4b R11: 0000000000000000 R12: ffff88813be0b000
+R13: ffff88811f5b9100 R14: ffff88811f5b9080 R15: ffff88813be0b024
+FS: 0000000000000000(0000) GS:ffff8885e6f00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000055dda99b0250 CR3: 000000015dbac000 CR4: 0000000000752ef0
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ? __warn.cold+0x5f/0x1ff
+ ? refcount_warn_saturate+0xee/0x150
+ ? report_bug+0x1ec/0x390
+ ? handle_bug+0x58/0x90
+ ? exc_invalid_op+0x13/0x40
+ ? asm_exc_invalid_op+0x16/0x20
+ ? refcount_warn_saturate+0xee/0x150
+ sock_map_free+0x2d3/0x330
+ bpf_map_free_deferred+0x173/0x320
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x29e/0x360
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+irq event stamp: 10741
+hardirqs last enabled at (10741): [<ffffffff84400ec6>] asm_sysvec_apic_timer_interrupt+0x16/0x20
+hardirqs last disabled at (10740): [<ffffffff811e532d>] handle_softirqs+0x60d/0x770
+softirqs last enabled at (10506): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+softirqs last disabled at (10301): [<ffffffff811e55a9>] __irq_exit_rcu+0x109/0x210
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241202-sockmap-replace-v1-3-1e88579e7bd5@rbox.co
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_map.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -411,12 +411,11 @@ static void *sock_map_lookup_sys(struct
+ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
+ struct sock **psk)
+ {
+- struct sock *sk;
++ struct sock *sk = NULL;
+ int err = 0;
+
+ spin_lock_bh(&stab->lock);
+- sk = *psk;
+- if (!sk_test || sk_test == sk)
++ if (!sk_test || sk_test == *psk)
+ sk = xchg(psk, NULL);
+
+ if (likely(sk))
--- /dev/null
+From 75e072a390da9a22e7ae4a4e8434dfca5da499fb Mon Sep 17 00:00:00 2001
+From: Michal Luczaj <mhal@rbox.co>
+Date: Mon, 2 Dec 2024 12:29:23 +0100
+Subject: bpf, sockmap: Fix update element with same
+
+From: Michal Luczaj <mhal@rbox.co>
+
+commit 75e072a390da9a22e7ae4a4e8434dfca5da499fb upstream.
+
+Consider a sockmap entry being updated with the same socket:
+
+ osk = stab->sks[idx];
+ sock_map_add_link(psock, link, map, &stab->sks[idx]);
+ stab->sks[idx] = sk;
+ if (osk)
+ sock_map_unref(osk, &stab->sks[idx]);
+
+Due to sock_map_unref(), which invokes sock_map_del_link(), all the
+psock's links for stab->sks[idx] are torn:
+
+ list_for_each_entry_safe(link, tmp, &psock->link, list) {
+ if (link->link_raw == link_raw) {
+ ...
+ list_del(&link->list);
+ sk_psock_free_link(link);
+ }
+ }
+
+And that includes the new link sock_map_add_link() added just before
+the unref.
+
+This results in a sockmap holding a socket, but without the respective
+link. This in turn means that close(sock) won't trigger the cleanup,
+i.e. a closed socket will not be automatically removed from the sockmap.
+
+Stop tearing the links when a matching link_raw is found.
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241202-sockmap-replace-v1-1-1e88579e7bd5@rbox.co
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_map.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -159,6 +159,7 @@ static void sock_map_del_link(struct soc
+ verdict_stop = true;
+ list_del(&link->list);
+ sk_psock_free_link(link);
++ break;
+ }
+ }
+ spin_unlock_bh(&psock->link_lock);
xfs-fix-scrub-tracepoints-when-inode-rooted-btrees-are-involved.patch
xfs-only-run-precommits-once-per-transaction-object.patch
xfs-unlock-inodes-when-erroring-out-of-xfs_trans_alloc_dir.patch
+bpf-check-size-for-btf-based-ctx-access-of-pointer-members.patch
+bpf-fix-theoretical-prog_array-uaf-in-__uprobe_perf_func.patch
+bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch
+bpf-sockmap-fix-race-between-element-replace-and-close.patch
+bpf-sockmap-fix-update-element-with-same.patch
+bpf-augment-raw_tp-arguments-with-ptr_maybe_null.patch