From: Greg Kroah-Hartman Date: Mon, 23 Dec 2024 11:32:31 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v6.1.122~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=89bddeb3c04dd4e818f6879c920bb7d55de0a0e8;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch io_uring-check-if-iowq-is-killed-before-queuing.patch io_uring-fix-registered-ring-file-refcount-leak.patch kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch selftests-bpf-use-asm-constraint-m-for-loongarch.patch selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch smb-client-fix-tcp-timers-deadlock-after-rmmod.patch tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch tracing-add-s-check-in-test_event_printk.patch tracing-fix-test_event_printk-to-process-entire-print-argument.patch --- diff --git a/queue-6.6/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch b/queue-6.6/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch new file mode 100644 index 00000000000..c251bbc611d --- /dev/null +++ b/queue-6.6/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch @@ -0,0 +1,104 @@ +From dfb92681a19e1d5172420baa242806414b3eff6f Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Wed, 4 Dec 2024 13:30:46 +1030 +Subject: btrfs: tree-checker: reject inline extent items with 0 ref count + +From: Qu Wenruo + +commit dfb92681a19e1d5172420baa242806414b3eff6f upstream. + +[BUG] +There is a bug report in the mailing list where btrfs_run_delayed_refs() +failed to drop the ref count for logical 25870311358464 num_bytes +2113536. + +The involved leaf dump looks like this: + + item 166 key (25870311358464 168 2113536) itemoff 10091 itemsize 50 + extent refs 1 gen 84178 flags 1 + ref#0: shared data backref parent 32399126528000 count 0 <<< + ref#1: shared data backref parent 31808973717504 count 1 + +Notice the count number is 0. + +[CAUSE] +There is no concrete evidence yet, but considering 0 -> 1 is also a +single bit flipped, it's possible that hardware memory bitflip is +involved, causing the on-disk extent tree to be corrupted. + +[FIX] +To prevent us reading such corrupted extent item, or writing such +damaged extent item back to disk, enhance the handling of +BTRFS_EXTENT_DATA_REF_KEY and BTRFS_SHARED_DATA_REF_KEY keys for both +inlined and key items, to detect such 0 ref count and reject them. + +CC: stable@vger.kernel.org # 5.4+ +Link: https://lore.kernel.org/linux-btrfs/7c69dd49-c346-4806-86e7-e6f863a66f48@app.fastmail.com/ +Reported-by: Frankie Fisher +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 27 ++++++++++++++++++++++++++- + 1 file changed, 26 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -1503,6 +1503,11 @@ static int check_extent_item(struct exte + dref_offset, fs_info->sectorsize); + return -EUCLEAN; + } ++ if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid data ref count, should have non-zero value"); ++ return -EUCLEAN; ++ } + inline_refs += btrfs_extent_data_ref_count(leaf, dref); + break; + /* Contains parent bytenr and ref count */ +@@ -1515,6 +1520,11 @@ static int check_extent_item(struct exte + inline_offset, fs_info->sectorsize); + return -EUCLEAN; + } ++ if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid shared data ref count, should have non-zero value"); ++ return -EUCLEAN; ++ } + inline_refs += btrfs_shared_data_ref_count(leaf, sref); + break; + default: +@@ -1584,8 +1594,18 @@ static int check_simple_keyed_refs(struc + { + u32 expect_item_size = 0; + +- if (key->type == BTRFS_SHARED_DATA_REF_KEY) ++ if (key->type == BTRFS_SHARED_DATA_REF_KEY) { ++ struct btrfs_shared_data_ref *sref; ++ ++ sref = btrfs_item_ptr(leaf, slot, struct btrfs_shared_data_ref); ++ if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid shared data backref count, should have non-zero value"); ++ return -EUCLEAN; ++ } ++ + expect_item_size = sizeof(struct btrfs_shared_data_ref); ++ } + + if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) { + generic_err(leaf, slot, +@@ -1662,6 +1682,11 @@ static int check_extent_data_ref(struct + offset, leaf->fs_info->sectorsize); + return -EUCLEAN; + } ++ if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid extent data backref count, should have non-zero value"); ++ return -EUCLEAN; ++ } + } + return 0; + } diff --git a/queue-6.6/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch b/queue-6.6/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch new file mode 100644 index 00000000000..7fc3bb17dea --- /dev/null +++ b/queue-6.6/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch @@ -0,0 +1,169 @@ +From 07a756a49f4b4290b49ea46e089cbe6f79ff8d26 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 6 Nov 2024 07:42:47 -0800 +Subject: Drivers: hv: util: Avoid accessing a ringbuffer not initialized yet + +From: Michael Kelley + +commit 07a756a49f4b4290b49ea46e089cbe6f79ff8d26 upstream. + +If the KVP (or VSS) daemon starts before the VMBus channel's ringbuffer is +fully initialized, we can hit the panic below: + +hv_utils: Registering HyperV Utility Driver +hv_vmbus: registering driver hv_utils +... +BUG: kernel NULL pointer dereference, address: 0000000000000000 +CPU: 44 UID: 0 PID: 2552 Comm: hv_kvp_daemon Tainted: G E 6.11.0-rc3+ #1 +RIP: 0010:hv_pkt_iter_first+0x12/0xd0 +Call Trace: +... + vmbus_recvpacket + hv_kvp_onchannelcallback + vmbus_on_event + tasklet_action_common + tasklet_action + handle_softirqs + irq_exit_rcu + sysvec_hyperv_stimer0 + + + asm_sysvec_hyperv_stimer0 +... + kvp_register_done + hvt_op_read + vfs_read + ksys_read + __x64_sys_read + +This can happen because the KVP/VSS channel callback can be invoked +even before the channel is fully opened: +1) as soon as hv_kvp_init() -> hvutil_transport_init() creates +/dev/vmbus/hv_kvp, the kvp daemon can open the device file immediately and +register itself to the driver by writing a message KVP_OP_REGISTER1 to the +file (which is handled by kvp_on_msg() ->kvp_handle_handshake()) and +reading the file for the driver's response, which is handled by +hvt_op_read(), which calls hvt->on_read(), i.e. kvp_register_done(). + +2) the problem with kvp_register_done() is that it can cause the +channel callback to be called even before the channel is fully opened, +and when the channel callback is starting to run, util_probe()-> +vmbus_open() may have not initialized the ringbuffer yet, so the +callback can hit the panic of NULL pointer dereference. + +To reproduce the panic consistently, we can add a "ssleep(10)" for KVP in +__vmbus_open(), just before the first hv_ringbuffer_init(), and then we +unload and reload the driver hv_utils, and run the daemon manually within +the 10 seconds. + +Fix the panic by reordering the steps in util_probe() so the char dev +entry used by the KVP or VSS daemon is not created until after +vmbus_open() has completed. This reordering prevents the race condition +from happening. + +Reported-by: Dexuan Cui +Fixes: e0fa3e5e7df6 ("Drivers: hv: utils: fix a race on userspace daemons registration") +Cc: stable@vger.kernel.org +Signed-off-by: Michael Kelley +Acked-by: Wei Liu +Link: https://lore.kernel.org/r/20241106154247.2271-3-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20241106154247.2271-3-mhklinux@outlook.com> +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hv/hv_kvp.c | 6 ++++++ + drivers/hv/hv_snapshot.c | 6 ++++++ + drivers/hv/hv_util.c | 9 +++++++++ + drivers/hv/hyperv_vmbus.h | 2 ++ + include/linux/hyperv.h | 1 + + 5 files changed, 24 insertions(+) + +--- a/drivers/hv/hv_kvp.c ++++ b/drivers/hv/hv_kvp.c +@@ -767,6 +767,12 @@ hv_kvp_init(struct hv_util_service *srv) + */ + kvp_transaction.state = HVUTIL_DEVICE_INIT; + ++ return 0; ++} ++ ++int ++hv_kvp_init_transport(void) ++{ + hvt = hvutil_transport_init(kvp_devname, CN_KVP_IDX, CN_KVP_VAL, + kvp_on_msg, kvp_on_reset); + if (!hvt) +--- a/drivers/hv/hv_snapshot.c ++++ b/drivers/hv/hv_snapshot.c +@@ -388,6 +388,12 @@ hv_vss_init(struct hv_util_service *srv) + */ + vss_transaction.state = HVUTIL_DEVICE_INIT; + ++ return 0; ++} ++ ++int ++hv_vss_init_transport(void) ++{ + hvt = hvutil_transport_init(vss_devname, CN_VSS_IDX, CN_VSS_VAL, + vss_on_msg, vss_on_reset); + if (!hvt) { +--- a/drivers/hv/hv_util.c ++++ b/drivers/hv/hv_util.c +@@ -141,6 +141,7 @@ static struct hv_util_service util_heart + static struct hv_util_service util_kvp = { + .util_cb = hv_kvp_onchannelcallback, + .util_init = hv_kvp_init, ++ .util_init_transport = hv_kvp_init_transport, + .util_pre_suspend = hv_kvp_pre_suspend, + .util_pre_resume = hv_kvp_pre_resume, + .util_deinit = hv_kvp_deinit, +@@ -149,6 +150,7 @@ static struct hv_util_service util_kvp = + static struct hv_util_service util_vss = { + .util_cb = hv_vss_onchannelcallback, + .util_init = hv_vss_init, ++ .util_init_transport = hv_vss_init_transport, + .util_pre_suspend = hv_vss_pre_suspend, + .util_pre_resume = hv_vss_pre_resume, + .util_deinit = hv_vss_deinit, +@@ -592,6 +594,13 @@ static int util_probe(struct hv_device * + if (ret) + goto error; + ++ if (srv->util_init_transport) { ++ ret = srv->util_init_transport(); ++ if (ret) { ++ vmbus_close(dev->channel); ++ goto error; ++ } ++ } + return 0; + + error: +--- a/drivers/hv/hyperv_vmbus.h ++++ b/drivers/hv/hyperv_vmbus.h +@@ -370,12 +370,14 @@ void vmbus_on_event(unsigned long data); + void vmbus_on_msg_dpc(unsigned long data); + + int hv_kvp_init(struct hv_util_service *srv); ++int hv_kvp_init_transport(void); + void hv_kvp_deinit(void); + int hv_kvp_pre_suspend(void); + int hv_kvp_pre_resume(void); + void hv_kvp_onchannelcallback(void *context); + + int hv_vss_init(struct hv_util_service *srv); ++int hv_vss_init_transport(void); + void hv_vss_deinit(void); + int hv_vss_pre_suspend(void); + int hv_vss_pre_resume(void); +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1561,6 +1561,7 @@ struct hv_util_service { + void *channel; + void (*util_cb)(void *); + int (*util_init)(struct hv_util_service *); ++ int (*util_init_transport)(void); + void (*util_deinit)(void); + int (*util_pre_suspend)(void); + int (*util_pre_resume)(void); diff --git a/queue-6.6/io_uring-check-if-iowq-is-killed-before-queuing.patch b/queue-6.6/io_uring-check-if-iowq-is-killed-before-queuing.patch new file mode 100644 index 00000000000..c6534ac86ea --- /dev/null +++ b/queue-6.6/io_uring-check-if-iowq-is-killed-before-queuing.patch @@ -0,0 +1,46 @@ +From dbd2ca9367eb19bc5e269b8c58b0b1514ada9156 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Thu, 19 Dec 2024 19:52:58 +0000 +Subject: io_uring: check if iowq is killed before queuing + +From: Pavel Begunkov + +commit dbd2ca9367eb19bc5e269b8c58b0b1514ada9156 upstream. + +task work can be executed after the task has gone through io_uring +termination, whether it's the final task_work run or the fallback path. +In this case, task work will find ->io_wq being already killed and +null'ed, which is a problem if it then tries to forward the request to +io_queue_iowq(). Make io_queue_iowq() fail requests in this case. + +Note that it also checks PF_KTHREAD, because the user can first close +a DEFER_TASKRUN ring and shortly after kill the task, in which case +->iowq check would race. + +Cc: stable@vger.kernel.org +Fixes: 50c52250e2d74 ("block: implement async io_uring discard cmd") +Fixes: 773af69121ecc ("io_uring: always reissue from task_work context") +Reported-by: Will +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/63312b4a2c2bb67ad67b857d17a300e1d3b078e8.1734637909.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -498,7 +498,11 @@ void io_queue_iowq(struct io_kiocb *req, + struct io_uring_task *tctx = req->task->io_uring; + + BUG_ON(!tctx); +- BUG_ON(!tctx->io_wq); ++ ++ if ((current->flags & PF_KTHREAD) || !tctx->io_wq) { ++ io_req_task_queue_fail(req, -ECANCELED); ++ return; ++ } + + /* init ->work of the whole link before punting */ + io_prep_async_link(req); diff --git a/queue-6.6/io_uring-fix-registered-ring-file-refcount-leak.patch b/queue-6.6/io_uring-fix-registered-ring-file-refcount-leak.patch new file mode 100644 index 00000000000..b15af2a6d6a --- /dev/null +++ b/queue-6.6/io_uring-fix-registered-ring-file-refcount-leak.patch @@ -0,0 +1,64 @@ +From 12d908116f7efd34f255a482b9afc729d7a5fb78 Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Wed, 18 Dec 2024 17:56:25 +0100 +Subject: io_uring: Fix registered ring file refcount leak + +From: Jann Horn + +commit 12d908116f7efd34f255a482b9afc729d7a5fb78 upstream. + +Currently, io_uring_unreg_ringfd() (which cleans up registered rings) is +only called on exit, but __io_uring_free (which frees the tctx in which the +registered ring pointers are stored) is also called on execve (via +begin_new_exec -> io_uring_task_cancel -> __io_uring_cancel -> +io_uring_cancel_generic -> __io_uring_free). + +This means: A process going through execve while having registered rings +will leak references to the rings' `struct file`. + +Fix it by zapping registered rings on execve(). This is implemented by +moving the io_uring_unreg_ringfd() from io_uring_files_cancel() into its +callee __io_uring_cancel(), which is called from io_uring_task_cancel() on +execve. + +This could probably be exploited *on 32-bit kernels* by leaking 2^32 +references to the same ring, because the file refcount is stored in a +pointer-sized field and get_file() doesn't have protection against +refcount overflow, just a WARN_ONCE(); but on 64-bit it should have no +impact beyond a memory leak. + +Cc: stable@vger.kernel.org +Fixes: e7a6c00dc77a ("io_uring: add support for registering ring file descriptors") +Signed-off-by: Jann Horn +Link: https://lore.kernel.org/r/20241218-uring-reg-ring-cleanup-v1-1-8f63e999045b@google.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/io_uring.h | 4 +--- + io_uring/io_uring.c | 1 + + 2 files changed, 2 insertions(+), 3 deletions(-) + +--- a/include/linux/io_uring.h ++++ b/include/linux/io_uring.h +@@ -65,10 +65,8 @@ static inline void io_uring_cmd_complete + + static inline void io_uring_files_cancel(void) + { +- if (current->io_uring) { +- io_uring_unreg_ringfd(); ++ if (current->io_uring) + __io_uring_cancel(false); +- } + } + static inline void io_uring_task_cancel(void) + { +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -3431,6 +3431,7 @@ end_wait: + + void __io_uring_cancel(bool cancel_all) + { ++ io_uring_unreg_ringfd(); + io_uring_cancel_generic(cancel_all, NULL); + } + diff --git a/queue-6.6/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch b/queue-6.6/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch new file mode 100644 index 00000000000..49c465619b1 --- /dev/null +++ b/queue-6.6/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch @@ -0,0 +1,59 @@ +From 9b42d1e8e4fe9dc631162c04caa69b0d1860b0f0 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 27 Nov 2024 16:43:39 -0800 +Subject: KVM: x86: Play nice with protected guests in complete_hypercall_exit() + +From: Sean Christopherson + +commit 9b42d1e8e4fe9dc631162c04caa69b0d1860b0f0 upstream. + +Use is_64_bit_hypercall() instead of is_64_bit_mode() to detect a 64-bit +hypercall when completing said hypercall. For guests with protected state, +e.g. SEV-ES and SEV-SNP, KVM must assume the hypercall was made in 64-bit +mode as the vCPU state needed to detect 64-bit mode is unavailable. + +Hacking the sev_smoke_test selftest to generate a KVM_HC_MAP_GPA_RANGE +hypercall via VMGEXIT trips the WARN: + + ------------[ cut here ]------------ + WARNING: CPU: 273 PID: 326626 at arch/x86/kvm/x86.h:180 complete_hypercall_exit+0x44/0xe0 [kvm] + Modules linked in: kvm_amd kvm ... [last unloaded: kvm] + CPU: 273 UID: 0 PID: 326626 Comm: sev_smoke_test Not tainted 6.12.0-smp--392e932fa0f3-feat #470 + Hardware name: Google Astoria/astoria, BIOS 0.20240617.0-0 06/17/2024 + RIP: 0010:complete_hypercall_exit+0x44/0xe0 [kvm] + Call Trace: + + kvm_arch_vcpu_ioctl_run+0x2400/0x2720 [kvm] + kvm_vcpu_ioctl+0x54f/0x630 [kvm] + __se_sys_ioctl+0x6b/0xc0 + do_syscall_64+0x83/0x160 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + + ---[ end trace 0000000000000000 ]--- + +Fixes: b5aead0064f3 ("KVM: x86: Assume a 64-bit hypercall for guests with protected state") +Cc: stable@vger.kernel.org +Cc: Tom Lendacky +Reviewed-by: Xiaoyao Li +Reviewed-by: Nikunj A Dadhania +Reviewed-by: Tom Lendacky +Reviewed-by: Binbin Wu +Reviewed-by: Kai Huang +Link: https://lore.kernel.org/r/20241128004344.4072099-2-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -9825,7 +9825,7 @@ static int complete_hypercall_exit(struc + { + u64 ret = vcpu->run->hypercall.ret; + +- if (!is_64_bit_mode(vcpu)) ++ if (!is_64_bit_hypercall(vcpu)) + ret = (u32)ret; + kvm_rax_write(vcpu, ret); + ++vcpu->stat.hypercalls; diff --git a/queue-6.6/selftests-bpf-use-asm-constraint-m-for-loongarch.patch b/queue-6.6/selftests-bpf-use-asm-constraint-m-for-loongarch.patch new file mode 100644 index 00000000000..a491f2993a5 --- /dev/null +++ b/queue-6.6/selftests-bpf-use-asm-constraint-m-for-loongarch.patch @@ -0,0 +1,40 @@ +From 29d44cce324dab2bd86c447071a596262e7109b6 Mon Sep 17 00:00:00 2001 +From: Tiezhu Yang +Date: Thu, 19 Dec 2024 19:15:06 +0800 +Subject: selftests/bpf: Use asm constraint "m" for LoongArch + +From: Tiezhu Yang + +commit 29d44cce324dab2bd86c447071a596262e7109b6 upstream. + +Currently, LoongArch LLVM does not support the constraint "o" and no plan +to support it, it only supports the similar constraint "m", so change the +constraints from "nor" in the "else" case to arch-specific "nmr" to avoid +the build error such as "unexpected asm memory constraint" for LoongArch. + +Fixes: 630301b0d59d ("selftests/bpf: Add basic USDT selftests") +Suggested-by: Weining Lu +Suggested-by: Li Chen +Signed-off-by: Tiezhu Yang +Signed-off-by: Daniel Borkmann +Reviewed-by: Huacai Chen +Cc: stable@vger.kernel.org +Link: https://llvm.org/docs/LangRef.html#supported-constraint-code-list +Link: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp#L172 +Link: https://lore.kernel.org/bpf/20241219111506.20643-1-yangtiezhu@loongson.cn +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/sdt.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/tools/testing/selftests/bpf/sdt.h ++++ b/tools/testing/selftests/bpf/sdt.h +@@ -102,6 +102,8 @@ + # define STAP_SDT_ARG_CONSTRAINT nZr + # elif defined __arm__ + # define STAP_SDT_ARG_CONSTRAINT g ++# elif defined __loongarch__ ++# define STAP_SDT_ARG_CONSTRAINT nmr + # else + # define STAP_SDT_ARG_CONSTRAINT nor + # endif diff --git a/queue-6.6/selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch b/queue-6.6/selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch new file mode 100644 index 00000000000..8fe7969f3f6 --- /dev/null +++ b/queue-6.6/selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch @@ -0,0 +1,71 @@ +From 6a75f19af16ff482cfd6085c77123aa0f464f8dd Mon Sep 17 00:00:00 2001 +From: "Isaac J. Manjarres" +Date: Thu, 5 Dec 2024 11:29:41 -0800 +Subject: selftests/memfd: run sysctl tests when PID namespace support is enabled + +From: Isaac J. Manjarres + +commit 6a75f19af16ff482cfd6085c77123aa0f464f8dd upstream. + +The sysctl tests for vm.memfd_noexec rely on the kernel to support PID +namespaces (i.e. the kernel is built with CONFIG_PID_NS=y). If the +kernel the test runs on does not support PID namespaces, the first sysctl +test will fail when attempting to spawn a new thread in a new PID +namespace, abort the test, preventing the remaining tests from being run. + +This is not desirable, as not all kernels need PID namespaces, but can +still use the other features provided by memfd. Therefore, only run the +sysctl tests if the kernel supports PID namespaces. Otherwise, skip those +tests and emit an informative message to let the user know why the sysctl +tests are not being run. + +Link: https://lkml.kernel.org/r/20241205192943.3228757-1-isaacmanjarres@google.com +Fixes: 11f75a01448f ("selftests/memfd: add tests for MFD_NOEXEC_SEAL MFD_EXEC") +Signed-off-by: Isaac J. Manjarres +Reviewed-by: Jeff Xu +Cc: Suren Baghdasaryan +Cc: Kalesh Singh +Cc: [6.6+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/memfd/memfd_test.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/memfd/memfd_test.c ++++ b/tools/testing/selftests/memfd/memfd_test.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1567,6 +1568,11 @@ static void test_share_fork(char *banner + close(fd); + } + ++static bool pid_ns_supported(void) ++{ ++ return access("/proc/self/ns/pid", F_OK) == 0; ++} ++ + int main(int argc, char **argv) + { + pid_t pid; +@@ -1601,8 +1607,12 @@ int main(int argc, char **argv) + test_seal_grow(); + test_seal_resize(); + +- test_sysctl_simple(); +- test_sysctl_nested(); ++ if (pid_ns_supported()) { ++ test_sysctl_simple(); ++ test_sysctl_nested(); ++ } else { ++ printf("PID namespaces are not supported; skipping sysctl tests\n"); ++ } + + test_share_dup("SHARE-DUP", ""); + test_share_mmap("SHARE-MMAP", ""); diff --git a/queue-6.6/series b/queue-6.6/series index 350dd44cf36..d6bc2b86e54 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -85,3 +85,14 @@ hwmon-tmp513-fix-interpretation-of-values-of-tempera.patch zram-refuse-to-use-zero-sized-block-device-as-backing-device.patch zram-fix-uninitialized-zram-not-releasing-backing-device.patch vmalloc-fix-accounting-with-i915.patch +btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch +drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch +kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch +smb-client-fix-tcp-timers-deadlock-after-rmmod.patch +tracing-fix-test_event_printk-to-process-entire-print-argument.patch +tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch +tracing-add-s-check-in-test_event_printk.patch +selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch +selftests-bpf-use-asm-constraint-m-for-loongarch.patch +io_uring-fix-registered-ring-file-refcount-leak.patch +io_uring-check-if-iowq-is-killed-before-queuing.patch diff --git a/queue-6.6/smb-client-fix-tcp-timers-deadlock-after-rmmod.patch b/queue-6.6/smb-client-fix-tcp-timers-deadlock-after-rmmod.patch new file mode 100644 index 00000000000..3274f9a5a1e --- /dev/null +++ b/queue-6.6/smb-client-fix-tcp-timers-deadlock-after-rmmod.patch @@ -0,0 +1,182 @@ +From e9f2517a3e18a54a3943c098d2226b245d488801 Mon Sep 17 00:00:00 2001 +From: Enzo Matsumiya +Date: Tue, 10 Dec 2024 18:15:12 -0300 +Subject: smb: client: fix TCP timers deadlock after rmmod + +From: Enzo Matsumiya + +commit e9f2517a3e18a54a3943c098d2226b245d488801 upstream. + +Commit ef7134c7fc48 ("smb: client: Fix use-after-free of network namespace.") +fixed a netns UAF by manually enabled socket refcounting +(sk->sk_net_refcnt=1 and sock_inuse_add(net, 1)). + +The reason the patch worked for that bug was because we now hold +references to the netns (get_net_track() gets a ref internally) +and they're properly released (internally, on __sk_destruct()), +but only because sk->sk_net_refcnt was set. + +Problem: +(this happens regardless of CONFIG_NET_NS_REFCNT_TRACKER and regardless +if init_net or other) + +Setting sk->sk_net_refcnt=1 *manually* and *after* socket creation is not +only out of cifs scope, but also technically wrong -- it's set conditionally +based on user (=1) vs kernel (=0) sockets. And net/ implementations +seem to base their user vs kernel space operations on it. + +e.g. upon TCP socket close, the TCP timers are not cleared because +sk->sk_net_refcnt=1: +(cf. commit 151c9c724d05 ("tcp: properly terminate timers for kernel sockets")) + +net/ipv4/tcp.c: + void tcp_close(struct sock *sk, long timeout) + { + lock_sock(sk); + __tcp_close(sk, timeout); + release_sock(sk); + if (!sk->sk_net_refcnt) + inet_csk_clear_xmit_timers_sync(sk); + sock_put(sk); + } + +Which will throw a lockdep warning and then, as expected, deadlock on +tcp_write_timer(). + +A way to reproduce this is by running the reproducer from ef7134c7fc48 +and then 'rmmod cifs'. A few seconds later, the deadlock/lockdep +warning shows up. + +Fix: +We shouldn't mess with socket internals ourselves, so do not set +sk_net_refcnt manually. + +Also change __sock_create() to sock_create_kern() for explicitness. + +As for non-init_net network namespaces, we deal with it the best way +we can -- hold an extra netns reference for server->ssocket and drop it +when it's released. This ensures that the netns still exists whenever +we need to create/destroy server->ssocket, but is not directly tied to +it. + +Fixes: ef7134c7fc48 ("smb: client: Fix use-after-free of network namespace.") +Cc: stable@vger.kernel.org +Signed-off-by: Enzo Matsumiya +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/client/connect.c | 36 ++++++++++++++++++++++++++---------- + 1 file changed, 26 insertions(+), 10 deletions(-) + +--- a/fs/smb/client/connect.c ++++ b/fs/smb/client/connect.c +@@ -1003,9 +1003,13 @@ clean_demultiplex_info(struct TCP_Server + msleep(125); + if (cifs_rdma_enabled(server)) + smbd_destroy(server); ++ + if (server->ssocket) { + sock_release(server->ssocket); + server->ssocket = NULL; ++ ++ /* Release netns reference for the socket. */ ++ put_net(cifs_net_ns(server)); + } + + if (!list_empty(&server->pending_mid_q)) { +@@ -1054,6 +1058,7 @@ clean_demultiplex_info(struct TCP_Server + */ + } + ++ /* Release netns reference for this server. */ + put_net(cifs_net_ns(server)); + kfree(server->leaf_fullpath); + kfree(server); +@@ -1726,6 +1731,8 @@ cifs_get_tcp_session(struct smb3_fs_cont + + tcp_ses->ops = ctx->ops; + tcp_ses->vals = ctx->vals; ++ ++ /* Grab netns reference for this server. */ + cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); + + tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId); +@@ -1857,6 +1864,7 @@ smbd_connected: + out_err_crypto_release: + cifs_crypto_secmech_release(tcp_ses); + ++ /* Release netns reference for this server. */ + put_net(cifs_net_ns(tcp_ses)); + + out_err: +@@ -1865,8 +1873,10 @@ out_err: + cifs_put_tcp_session(tcp_ses->primary_server, false); + kfree(tcp_ses->hostname); + kfree(tcp_ses->leaf_fullpath); +- if (tcp_ses->ssocket) ++ if (tcp_ses->ssocket) { + sock_release(tcp_ses->ssocket); ++ put_net(cifs_net_ns(tcp_ses)); ++ } + kfree(tcp_ses); + } + return ERR_PTR(rc); +@@ -3120,20 +3130,20 @@ generic_ip_connect(struct TCP_Server_Inf + socket = server->ssocket; + } else { + struct net *net = cifs_net_ns(server); +- struct sock *sk; + +- rc = __sock_create(net, sfamily, SOCK_STREAM, +- IPPROTO_TCP, &server->ssocket, 1); ++ rc = sock_create_kern(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket); + if (rc < 0) { + cifs_server_dbg(VFS, "Error %d creating socket\n", rc); + return rc; + } + +- sk = server->ssocket->sk; +- __netns_tracker_free(net, &sk->ns_tracker, false); +- sk->sk_net_refcnt = 1; +- get_net_track(net, &sk->ns_tracker, GFP_KERNEL); +- sock_inuse_add(net, 1); ++ /* ++ * Grab netns reference for the socket. ++ * ++ * It'll be released here, on error, or in clean_demultiplex_info() upon server ++ * teardown. ++ */ ++ get_net(net); + + /* BB other socket options to set KEEPALIVE, NODELAY? */ + cifs_dbg(FYI, "Socket created\n"); +@@ -3147,8 +3157,10 @@ generic_ip_connect(struct TCP_Server_Inf + } + + rc = bind_socket(server); +- if (rc < 0) ++ if (rc < 0) { ++ put_net(cifs_net_ns(server)); + return rc; ++ } + + /* + * Eventually check for other socket options to change from +@@ -3185,6 +3197,7 @@ generic_ip_connect(struct TCP_Server_Inf + if (rc < 0) { + cifs_dbg(FYI, "Error %d connecting to server\n", rc); + trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc); ++ put_net(cifs_net_ns(server)); + sock_release(socket); + server->ssocket = NULL; + return rc; +@@ -3193,6 +3206,9 @@ generic_ip_connect(struct TCP_Server_Inf + if (sport == htons(RFC1001_PORT)) + rc = ip_rfc1001_connect(server); + ++ if (rc < 0) ++ put_net(cifs_net_ns(server)); ++ + return rc; + } + diff --git a/queue-6.6/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch b/queue-6.6/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch new file mode 100644 index 00000000000..5ea51045135 --- /dev/null +++ b/queue-6.6/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch @@ -0,0 +1,78 @@ +From 917110481f6bc1c96b1e54b62bb114137fbc6d17 Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Mon, 16 Dec 2024 21:41:20 -0500 +Subject: tracing: Add missing helper functions in event pointer dereference check + +From: Steven Rostedt + +commit 917110481f6bc1c96b1e54b62bb114137fbc6d17 upstream. + +The process_pointer() helper function looks to see if various trace event +macros are used. These macros are for storing data in the event. This +makes it safe to dereference as the dereference will then point into the +event on the ring buffer where the content of the data stays with the +event itself. + +A few helper functions were missing. Those were: + + __get_rel_dynamic_array() + __get_dynamic_array_len() + __get_rel_dynamic_array_len() + __get_rel_sockaddr() + +Also add a helper function find_print_string() to not need to use a middle +man variable to test if the string exists. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Al Viro +Cc: Linus Torvalds +Link: https://lore.kernel.org/20241217024720.521836792@goodmis.org +Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -274,6 +274,15 @@ static bool test_field(const char *fmt, + return false; + } + ++/* Look for a string within an argument */ ++static bool find_print_string(const char *arg, const char *str, const char *end) ++{ ++ const char *r; ++ ++ r = strstr(arg, str); ++ return r && r < end; ++} ++ + /* Return true if the argument pointer is safe */ + static bool process_pointer(const char *fmt, int len, struct trace_event_call *call) + { +@@ -292,9 +301,17 @@ static bool process_pointer(const char * + a = strchr(fmt, '&'); + if ((a && (a < r)) || test_field(r, call)) + return true; +- } else if ((r = strstr(fmt, "__get_dynamic_array(")) && r < e) { ++ } else if (find_print_string(fmt, "__get_dynamic_array(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_sockaddr(", e)) { + return true; +- } else if ((r = strstr(fmt, "__get_sockaddr(")) && r < e) { ++ } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) { + return true; + } + return false; diff --git a/queue-6.6/tracing-add-s-check-in-test_event_printk.patch b/queue-6.6/tracing-add-s-check-in-test_event_printk.patch new file mode 100644 index 00000000000..5c30d998837 --- /dev/null +++ b/queue-6.6/tracing-add-s-check-in-test_event_printk.patch @@ -0,0 +1,206 @@ +From 65a25d9f7ac02e0cf361356e834d1c71d36acca9 Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Mon, 16 Dec 2024 21:41:21 -0500 +Subject: tracing: Add "%s" check in test_event_printk() + +From: Steven Rostedt + +commit 65a25d9f7ac02e0cf361356e834d1c71d36acca9 upstream. + +The test_event_printk() code makes sure that when a trace event is +registered, any dereferenced pointers in from the event's TP_printk() are +pointing to content in the ring buffer. But currently it does not handle +"%s", as there's cases where the string pointer saved in the ring buffer +points to a static string in the kernel that will never be freed. As that +is a valid case, the pointer needs to be checked at runtime. + +Currently the runtime check is done via trace_check_vprintf(), but to not +have to replicate everything in vsnprintf() it does some logic with the +va_list that may not be reliable across architectures. In order to get rid +of that logic, more work in the test_event_printk() needs to be done. Some +of the strings can be validated at this time when it is obvious the string +is valid because the string will be saved in the ring buffer content. + +Do all the validation of strings in the ring buffer at boot in +test_event_printk(), and make sure that the field of the strings that +point into the kernel are accessible. This will allow adding checks at +runtime that will validate the fields themselves and not rely on paring +the TP_printk() format at runtime. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Al Viro +Cc: Linus Torvalds +Link: https://lore.kernel.org/20241217024720.685917008@goodmis.org +Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 104 +++++++++++++++++++++++++++++++++++++------- + 1 file changed, 89 insertions(+), 15 deletions(-) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -244,19 +244,16 @@ int trace_event_get_offsets(struct trace + return tail->offset + tail->size; + } + +-/* +- * Check if the referenced field is an array and return true, +- * as arrays are OK to dereference. +- */ +-static bool test_field(const char *fmt, struct trace_event_call *call) ++ ++static struct trace_event_fields *find_event_field(const char *fmt, ++ struct trace_event_call *call) + { + struct trace_event_fields *field = call->class->fields_array; +- const char *array_descriptor; + const char *p = fmt; + int len; + + if (!(len = str_has_prefix(fmt, "REC->"))) +- return false; ++ return NULL; + fmt += len; + for (p = fmt; *p; p++) { + if (!isalnum(*p) && *p != '_') +@@ -267,11 +264,26 @@ static bool test_field(const char *fmt, + for (; field->type; field++) { + if (strncmp(field->name, fmt, len) || field->name[len]) + continue; +- array_descriptor = strchr(field->type, '['); +- /* This is an array and is OK to dereference. */ +- return array_descriptor != NULL; ++ ++ return field; + } +- return false; ++ return NULL; ++} ++ ++/* ++ * Check if the referenced field is an array and return true, ++ * as arrays are OK to dereference. ++ */ ++static bool test_field(const char *fmt, struct trace_event_call *call) ++{ ++ struct trace_event_fields *field; ++ ++ field = find_event_field(fmt, call); ++ if (!field) ++ return false; ++ ++ /* This is an array and is OK to dereference. */ ++ return strchr(field->type, '[') != NULL; + } + + /* Look for a string within an argument */ +@@ -317,6 +329,53 @@ static bool process_pointer(const char * + return false; + } + ++/* Return true if the string is safe */ ++static bool process_string(const char *fmt, int len, struct trace_event_call *call) ++{ ++ const char *r, *e, *s; ++ ++ e = fmt + len; ++ ++ /* ++ * There are several helper functions that return strings. ++ * If the argument contains a function, then assume its field is valid. ++ * It is considered that the argument has a function if it has: ++ * alphanumeric or '_' before a parenthesis. ++ */ ++ s = fmt; ++ do { ++ r = strstr(s, "("); ++ if (!r || r >= e) ++ break; ++ for (int i = 1; r - i >= s; i++) { ++ char ch = *(r - i); ++ if (isspace(ch)) ++ continue; ++ if (isalnum(ch) || ch == '_') ++ return true; ++ /* Anything else, this isn't a function */ ++ break; ++ } ++ /* A function could be wrapped in parethesis, try the next one */ ++ s = r + 1; ++ } while (s < e); ++ ++ /* ++ * If there's any strings in the argument consider this arg OK as it ++ * could be: REC->field ? "foo" : "bar" and we don't want to get into ++ * verifying that logic here. ++ */ ++ if (find_print_string(fmt, "\"", e)) ++ return true; ++ ++ /* Dereferenced strings are also valid like any other pointer */ ++ if (process_pointer(fmt, len, call)) ++ return true; ++ ++ /* Make sure the field is found, and consider it OK for now if it is */ ++ return find_event_field(fmt, call) != NULL; ++} ++ + /* + * Examine the print fmt of the event looking for unsafe dereference + * pointers using %p* that could be recorded in the trace event and +@@ -326,6 +385,7 @@ static bool process_pointer(const char * + static void test_event_printk(struct trace_event_call *call) + { + u64 dereference_flags = 0; ++ u64 string_flags = 0; + bool first = true; + const char *fmt; + int parens = 0; +@@ -416,8 +476,16 @@ static void test_event_printk(struct tra + star = true; + continue; + } +- if ((fmt[i + j] == 's') && star) +- arg++; ++ if ((fmt[i + j] == 's')) { ++ if (star) ++ arg++; ++ if (WARN_ONCE(arg == 63, ++ "Too many args for event: %s", ++ trace_event_name(call))) ++ return; ++ dereference_flags |= 1ULL << arg; ++ string_flags |= 1ULL << arg; ++ } + break; + } + break; +@@ -464,7 +532,10 @@ static void test_event_printk(struct tra + } + + if (dereference_flags & (1ULL << arg)) { +- if (process_pointer(fmt + start_arg, e - start_arg, call)) ++ if (string_flags & (1ULL << arg)) { ++ if (process_string(fmt + start_arg, e - start_arg, call)) ++ dereference_flags &= ~(1ULL << arg); ++ } else if (process_pointer(fmt + start_arg, e - start_arg, call)) + dereference_flags &= ~(1ULL << arg); + } + +@@ -476,7 +547,10 @@ static void test_event_printk(struct tra + } + + if (dereference_flags & (1ULL << arg)) { +- if (process_pointer(fmt + start_arg, i - start_arg, call)) ++ if (string_flags & (1ULL << arg)) { ++ if (process_string(fmt + start_arg, i - start_arg, call)) ++ dereference_flags &= ~(1ULL << arg); ++ } else if (process_pointer(fmt + start_arg, i - start_arg, call)) + dereference_flags &= ~(1ULL << arg); + } + diff --git a/queue-6.6/tracing-fix-test_event_printk-to-process-entire-print-argument.patch b/queue-6.6/tracing-fix-test_event_printk-to-process-entire-print-argument.patch new file mode 100644 index 00000000000..ebd2328bc08 --- /dev/null +++ b/queue-6.6/tracing-fix-test_event_printk-to-process-entire-print-argument.patch @@ -0,0 +1,184 @@ +From a6629626c584200daf495cc9a740048b455addcd Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Mon, 16 Dec 2024 21:41:19 -0500 +Subject: tracing: Fix test_event_printk() to process entire print argument + +From: Steven Rostedt + +commit a6629626c584200daf495cc9a740048b455addcd upstream. + +The test_event_printk() analyzes print formats of trace events looking for +cases where it may dereference a pointer that is not in the ring buffer +which can possibly be a bug when the trace event is read from the ring +buffer and the content of that pointer no longer exists. + +The function needs to accurately go from one print format argument to the +next. It handles quotes and parenthesis that may be included in an +argument. When it finds the start of the next argument, it uses a simple +"c = strstr(fmt + i, ',')" to find the end of that argument! + +In order to include "%s" dereferencing, it needs to process the entire +content of the print format argument and not just the content of the first +',' it finds. As there may be content like: + + ({ const char *saved_ptr = trace_seq_buffer_ptr(p); static const char + *access_str[] = { "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" + }; union kvm_mmu_page_role role; role.word = REC->role; + trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" " %snxe + %sad root %u %s%c", REC->mmu_valid_gen, REC->gfn, role.level, + role.has_4_byte_gpte ? 4 : 8, role.quadrant, role.direct ? " direct" : "", + access_str[role.access], role.invalid ? " invalid" : "", role.efer_nx ? "" + : "!", role.ad_disabled ? "!" : "", REC->root_count, REC->unsync ? + "unsync" : "sync", 0); saved_ptr; }) + +Which is an example of a full argument of an existing event. As the code +already handles finding the next print format argument, process the +argument at the end of it and not the start of it. This way it has both +the start of the argument as well as the end of it. + +Add a helper function "process_pointer()" that will do the processing during +the loop as well as at the end. It also makes the code cleaner and easier +to read. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Al Viro +Cc: Linus Torvalds +Link: https://lore.kernel.org/20241217024720.362271189@goodmis.org +Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 82 ++++++++++++++++++++++++++++---------------- + 1 file changed, 53 insertions(+), 29 deletions(-) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -265,8 +265,7 @@ static bool test_field(const char *fmt, + len = p - fmt; + + for (; field->type; field++) { +- if (strncmp(field->name, fmt, len) || +- field->name[len]) ++ if (strncmp(field->name, fmt, len) || field->name[len]) + continue; + array_descriptor = strchr(field->type, '['); + /* This is an array and is OK to dereference. */ +@@ -275,6 +274,32 @@ static bool test_field(const char *fmt, + return false; + } + ++/* Return true if the argument pointer is safe */ ++static bool process_pointer(const char *fmt, int len, struct trace_event_call *call) ++{ ++ const char *r, *e, *a; ++ ++ e = fmt + len; ++ ++ /* Find the REC-> in the argument */ ++ r = strstr(fmt, "REC->"); ++ if (r && r < e) { ++ /* ++ * Addresses of events on the buffer, or an array on the buffer is ++ * OK to dereference. There's ways to fool this, but ++ * this is to catch common mistakes, not malicious code. ++ */ ++ a = strchr(fmt, '&'); ++ if ((a && (a < r)) || test_field(r, call)) ++ return true; ++ } else if ((r = strstr(fmt, "__get_dynamic_array(")) && r < e) { ++ return true; ++ } else if ((r = strstr(fmt, "__get_sockaddr(")) && r < e) { ++ return true; ++ } ++ return false; ++} ++ + /* + * Examine the print fmt of the event looking for unsafe dereference + * pointers using %p* that could be recorded in the trace event and +@@ -285,12 +310,12 @@ static void test_event_printk(struct tra + { + u64 dereference_flags = 0; + bool first = true; +- const char *fmt, *c, *r, *a; ++ const char *fmt; + int parens = 0; + char in_quote = 0; + int start_arg = 0; + int arg = 0; +- int i; ++ int i, e; + + fmt = call->print_fmt; + +@@ -403,42 +428,41 @@ static void test_event_printk(struct tra + case ',': + if (in_quote || parens) + continue; ++ e = i; + i++; + while (isspace(fmt[i])) + i++; +- start_arg = i; +- if (!(dereference_flags & (1ULL << arg))) +- goto next_arg; + +- /* Find the REC-> in the argument */ +- c = strchr(fmt + i, ','); +- r = strstr(fmt + i, "REC->"); +- if (r && (!c || r < c)) { +- /* +- * Addresses of events on the buffer, +- * or an array on the buffer is +- * OK to dereference. +- * There's ways to fool this, but +- * this is to catch common mistakes, +- * not malicious code. +- */ +- a = strchr(fmt + i, '&'); +- if ((a && (a < r)) || test_field(r, call)) ++ /* ++ * If start_arg is zero, then this is the start of the ++ * first argument. The processing of the argument happens ++ * when the end of the argument is found, as it needs to ++ * handle paranthesis and such. ++ */ ++ if (!start_arg) { ++ start_arg = i; ++ /* Balance out the i++ in the for loop */ ++ i--; ++ continue; ++ } ++ ++ if (dereference_flags & (1ULL << arg)) { ++ if (process_pointer(fmt + start_arg, e - start_arg, call)) + dereference_flags &= ~(1ULL << arg); +- } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) && +- (!c || r < c)) { +- dereference_flags &= ~(1ULL << arg); +- } else if ((r = strstr(fmt + i, "__get_sockaddr(")) && +- (!c || r < c)) { +- dereference_flags &= ~(1ULL << arg); + } + +- next_arg: +- i--; ++ start_arg = i; + arg++; ++ /* Balance out the i++ in the for loop */ ++ i--; + } + } + ++ if (dereference_flags & (1ULL << arg)) { ++ if (process_pointer(fmt + start_arg, i - start_arg, call)) ++ dereference_flags &= ~(1ULL << arg); ++ } ++ + /* + * If you triggered the below warning, the trace event reported + * uses an unsafe dereference pointer %p*. As the data stored