From: Greg Kroah-Hartman Date: Mon, 23 Dec 2024 11:28:37 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v6.1.122~24 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c3124874597610c43c81a4d081562a91eaf38290;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch io_uring-check-if-iowq-is-killed-before-queuing.patch io_uring-fix-registered-ring-file-refcount-leak.patch kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch selftests-bpf-use-asm-constraint-m-for-loongarch.patch tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch tracing-add-s-check-in-test_event_printk.patch tracing-fix-test_event_printk-to-process-entire-print-argument.patch --- diff --git a/queue-6.1/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch b/queue-6.1/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch new file mode 100644 index 00000000000..0cab2c70b82 --- /dev/null +++ b/queue-6.1/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch @@ -0,0 +1,104 @@ +From dfb92681a19e1d5172420baa242806414b3eff6f Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Wed, 4 Dec 2024 13:30:46 +1030 +Subject: btrfs: tree-checker: reject inline extent items with 0 ref count + +From: Qu Wenruo + +commit dfb92681a19e1d5172420baa242806414b3eff6f upstream. + +[BUG] +There is a bug report in the mailing list where btrfs_run_delayed_refs() +failed to drop the ref count for logical 25870311358464 num_bytes +2113536. + +The involved leaf dump looks like this: + + item 166 key (25870311358464 168 2113536) itemoff 10091 itemsize 50 + extent refs 1 gen 84178 flags 1 + ref#0: shared data backref parent 32399126528000 count 0 <<< + ref#1: shared data backref parent 31808973717504 count 1 + +Notice the count number is 0. + +[CAUSE] +There is no concrete evidence yet, but considering 0 -> 1 is also a +single bit flipped, it's possible that hardware memory bitflip is +involved, causing the on-disk extent tree to be corrupted. + +[FIX] +To prevent us reading such corrupted extent item, or writing such +damaged extent item back to disk, enhance the handling of +BTRFS_EXTENT_DATA_REF_KEY and BTRFS_SHARED_DATA_REF_KEY keys for both +inlined and key items, to detect such 0 ref count and reject them. + +CC: stable@vger.kernel.org # 5.4+ +Link: https://lore.kernel.org/linux-btrfs/7c69dd49-c346-4806-86e7-e6f863a66f48@app.fastmail.com/ +Reported-by: Frankie Fisher +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 27 ++++++++++++++++++++++++++- + 1 file changed, 26 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -1433,6 +1433,11 @@ static int check_extent_item(struct exte + dref_offset, fs_info->sectorsize); + return -EUCLEAN; + } ++ if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid data ref count, should have non-zero value"); ++ return -EUCLEAN; ++ } + inline_refs += btrfs_extent_data_ref_count(leaf, dref); + break; + /* Contains parent bytenr and ref count */ +@@ -1445,6 +1450,11 @@ static int check_extent_item(struct exte + inline_offset, fs_info->sectorsize); + return -EUCLEAN; + } ++ if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid shared data ref count, should have non-zero value"); ++ return -EUCLEAN; ++ } + inline_refs += btrfs_shared_data_ref_count(leaf, sref); + break; + default: +@@ -1496,8 +1506,18 @@ static int check_simple_keyed_refs(struc + { + u32 expect_item_size = 0; + +- if (key->type == BTRFS_SHARED_DATA_REF_KEY) ++ if (key->type == BTRFS_SHARED_DATA_REF_KEY) { ++ struct btrfs_shared_data_ref *sref; ++ ++ sref = btrfs_item_ptr(leaf, slot, struct btrfs_shared_data_ref); ++ if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid shared data backref count, should have non-zero value"); ++ return -EUCLEAN; ++ } ++ + expect_item_size = sizeof(struct btrfs_shared_data_ref); ++ } + + if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) { + generic_err(leaf, slot, +@@ -1557,6 +1577,11 @@ static int check_extent_data_ref(struct + offset, leaf->fs_info->sectorsize); + return -EUCLEAN; + } ++ if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid extent data backref count, should have non-zero value"); ++ return -EUCLEAN; ++ } + } + return 0; + } diff --git a/queue-6.1/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch b/queue-6.1/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch new file mode 100644 index 00000000000..bd5390af932 --- /dev/null +++ b/queue-6.1/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch @@ -0,0 +1,169 @@ +From 07a756a49f4b4290b49ea46e089cbe6f79ff8d26 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 6 Nov 2024 07:42:47 -0800 +Subject: Drivers: hv: util: Avoid accessing a ringbuffer not initialized yet + +From: Michael Kelley + +commit 07a756a49f4b4290b49ea46e089cbe6f79ff8d26 upstream. + +If the KVP (or VSS) daemon starts before the VMBus channel's ringbuffer is +fully initialized, we can hit the panic below: + +hv_utils: Registering HyperV Utility Driver +hv_vmbus: registering driver hv_utils +... +BUG: kernel NULL pointer dereference, address: 0000000000000000 +CPU: 44 UID: 0 PID: 2552 Comm: hv_kvp_daemon Tainted: G E 6.11.0-rc3+ #1 +RIP: 0010:hv_pkt_iter_first+0x12/0xd0 +Call Trace: +... + vmbus_recvpacket + hv_kvp_onchannelcallback + vmbus_on_event + tasklet_action_common + tasklet_action + handle_softirqs + irq_exit_rcu + sysvec_hyperv_stimer0 + + + asm_sysvec_hyperv_stimer0 +... + kvp_register_done + hvt_op_read + vfs_read + ksys_read + __x64_sys_read + +This can happen because the KVP/VSS channel callback can be invoked +even before the channel is fully opened: +1) as soon as hv_kvp_init() -> hvutil_transport_init() creates +/dev/vmbus/hv_kvp, the kvp daemon can open the device file immediately and +register itself to the driver by writing a message KVP_OP_REGISTER1 to the +file (which is handled by kvp_on_msg() ->kvp_handle_handshake()) and +reading the file for the driver's response, which is handled by +hvt_op_read(), which calls hvt->on_read(), i.e. kvp_register_done(). + +2) the problem with kvp_register_done() is that it can cause the +channel callback to be called even before the channel is fully opened, +and when the channel callback is starting to run, util_probe()-> +vmbus_open() may have not initialized the ringbuffer yet, so the +callback can hit the panic of NULL pointer dereference. + +To reproduce the panic consistently, we can add a "ssleep(10)" for KVP in +__vmbus_open(), just before the first hv_ringbuffer_init(), and then we +unload and reload the driver hv_utils, and run the daemon manually within +the 10 seconds. + +Fix the panic by reordering the steps in util_probe() so the char dev +entry used by the KVP or VSS daemon is not created until after +vmbus_open() has completed. This reordering prevents the race condition +from happening. + +Reported-by: Dexuan Cui +Fixes: e0fa3e5e7df6 ("Drivers: hv: utils: fix a race on userspace daemons registration") +Cc: stable@vger.kernel.org +Signed-off-by: Michael Kelley +Acked-by: Wei Liu +Link: https://lore.kernel.org/r/20241106154247.2271-3-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20241106154247.2271-3-mhklinux@outlook.com> +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hv/hv_kvp.c | 6 ++++++ + drivers/hv/hv_snapshot.c | 6 ++++++ + drivers/hv/hv_util.c | 9 +++++++++ + drivers/hv/hyperv_vmbus.h | 2 ++ + include/linux/hyperv.h | 1 + + 5 files changed, 24 insertions(+) + +--- a/drivers/hv/hv_kvp.c ++++ b/drivers/hv/hv_kvp.c +@@ -767,6 +767,12 @@ hv_kvp_init(struct hv_util_service *srv) + */ + kvp_transaction.state = HVUTIL_DEVICE_INIT; + ++ return 0; ++} ++ ++int ++hv_kvp_init_transport(void) ++{ + hvt = hvutil_transport_init(kvp_devname, CN_KVP_IDX, CN_KVP_VAL, + kvp_on_msg, kvp_on_reset); + if (!hvt) +--- a/drivers/hv/hv_snapshot.c ++++ b/drivers/hv/hv_snapshot.c +@@ -388,6 +388,12 @@ hv_vss_init(struct hv_util_service *srv) + */ + vss_transaction.state = HVUTIL_DEVICE_INIT; + ++ return 0; ++} ++ ++int ++hv_vss_init_transport(void) ++{ + hvt = hvutil_transport_init(vss_devname, CN_VSS_IDX, CN_VSS_VAL, + vss_on_msg, vss_on_reset); + if (!hvt) { +--- a/drivers/hv/hv_util.c ++++ b/drivers/hv/hv_util.c +@@ -141,6 +141,7 @@ static struct hv_util_service util_heart + static struct hv_util_service util_kvp = { + .util_cb = hv_kvp_onchannelcallback, + .util_init = hv_kvp_init, ++ .util_init_transport = hv_kvp_init_transport, + .util_pre_suspend = hv_kvp_pre_suspend, + .util_pre_resume = hv_kvp_pre_resume, + .util_deinit = hv_kvp_deinit, +@@ -149,6 +150,7 @@ static struct hv_util_service util_kvp = + static struct hv_util_service util_vss = { + .util_cb = hv_vss_onchannelcallback, + .util_init = hv_vss_init, ++ .util_init_transport = hv_vss_init_transport, + .util_pre_suspend = hv_vss_pre_suspend, + .util_pre_resume = hv_vss_pre_resume, + .util_deinit = hv_vss_deinit, +@@ -592,6 +594,13 @@ static int util_probe(struct hv_device * + if (ret) + goto error; + ++ if (srv->util_init_transport) { ++ ret = srv->util_init_transport(); ++ if (ret) { ++ vmbus_close(dev->channel); ++ goto error; ++ } ++ } + return 0; + + error: +--- a/drivers/hv/hyperv_vmbus.h ++++ b/drivers/hv/hyperv_vmbus.h +@@ -365,12 +365,14 @@ void vmbus_on_event(unsigned long data); + void vmbus_on_msg_dpc(unsigned long data); + + int hv_kvp_init(struct hv_util_service *srv); ++int hv_kvp_init_transport(void); + void hv_kvp_deinit(void); + int hv_kvp_pre_suspend(void); + int hv_kvp_pre_resume(void); + void hv_kvp_onchannelcallback(void *context); + + int hv_vss_init(struct hv_util_service *srv); ++int hv_vss_init_transport(void); + void hv_vss_deinit(void); + int hv_vss_pre_suspend(void); + int hv_vss_pre_resume(void); +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1567,6 +1567,7 @@ struct hv_util_service { + void *channel; + void (*util_cb)(void *); + int (*util_init)(struct hv_util_service *); ++ int (*util_init_transport)(void); + void (*util_deinit)(void); + int (*util_pre_suspend)(void); + int (*util_pre_resume)(void); diff --git a/queue-6.1/io_uring-check-if-iowq-is-killed-before-queuing.patch b/queue-6.1/io_uring-check-if-iowq-is-killed-before-queuing.patch new file mode 100644 index 00000000000..f13278c23f8 --- /dev/null +++ b/queue-6.1/io_uring-check-if-iowq-is-killed-before-queuing.patch @@ -0,0 +1,46 @@ +From dbd2ca9367eb19bc5e269b8c58b0b1514ada9156 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Thu, 19 Dec 2024 19:52:58 +0000 +Subject: io_uring: check if iowq is killed before queuing + +From: Pavel Begunkov + +commit dbd2ca9367eb19bc5e269b8c58b0b1514ada9156 upstream. + +task work can be executed after the task has gone through io_uring +termination, whether it's the final task_work run or the fallback path. +In this case, task work will find ->io_wq being already killed and +null'ed, which is a problem if it then tries to forward the request to +io_queue_iowq(). Make io_queue_iowq() fail requests in this case. + +Note that it also checks PF_KTHREAD, because the user can first close +a DEFER_TASKRUN ring and shortly after kill the task, in which case +->iowq check would race. + +Cc: stable@vger.kernel.org +Fixes: 50c52250e2d74 ("block: implement async io_uring discard cmd") +Fixes: 773af69121ecc ("io_uring: always reissue from task_work context") +Reported-by: Will +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/63312b4a2c2bb67ad67b857d17a300e1d3b078e8.1734637909.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -440,7 +440,11 @@ void io_queue_iowq(struct io_kiocb *req, + struct io_uring_task *tctx = req->task->io_uring; + + BUG_ON(!tctx); +- BUG_ON(!tctx->io_wq); ++ ++ if ((current->flags & PF_KTHREAD) || !tctx->io_wq) { ++ io_req_task_queue_fail(req, -ECANCELED); ++ return; ++ } + + /* init ->work of the whole link before punting */ + io_prep_async_link(req); diff --git a/queue-6.1/io_uring-fix-registered-ring-file-refcount-leak.patch b/queue-6.1/io_uring-fix-registered-ring-file-refcount-leak.patch new file mode 100644 index 00000000000..6ba35725dfb --- /dev/null +++ b/queue-6.1/io_uring-fix-registered-ring-file-refcount-leak.patch @@ -0,0 +1,64 @@ +From 12d908116f7efd34f255a482b9afc729d7a5fb78 Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Wed, 18 Dec 2024 17:56:25 +0100 +Subject: io_uring: Fix registered ring file refcount leak + +From: Jann Horn + +commit 12d908116f7efd34f255a482b9afc729d7a5fb78 upstream. + +Currently, io_uring_unreg_ringfd() (which cleans up registered rings) is +only called on exit, but __io_uring_free (which frees the tctx in which the +registered ring pointers are stored) is also called on execve (via +begin_new_exec -> io_uring_task_cancel -> __io_uring_cancel -> +io_uring_cancel_generic -> __io_uring_free). + +This means: A process going through execve while having registered rings +will leak references to the rings' `struct file`. + +Fix it by zapping registered rings on execve(). This is implemented by +moving the io_uring_unreg_ringfd() from io_uring_files_cancel() into its +callee __io_uring_cancel(), which is called from io_uring_task_cancel() on +execve. + +This could probably be exploited *on 32-bit kernels* by leaking 2^32 +references to the same ring, because the file refcount is stored in a +pointer-sized field and get_file() doesn't have protection against +refcount overflow, just a WARN_ONCE(); but on 64-bit it should have no +impact beyond a memory leak. + +Cc: stable@vger.kernel.org +Fixes: e7a6c00dc77a ("io_uring: add support for registering ring file descriptors") +Signed-off-by: Jann Horn +Link: https://lore.kernel.org/r/20241218-uring-reg-ring-cleanup-v1-1-8f63e999045b@google.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/io_uring.h | 4 +--- + io_uring/io_uring.c | 1 + + 2 files changed, 2 insertions(+), 3 deletions(-) + +--- a/include/linux/io_uring.h ++++ b/include/linux/io_uring.h +@@ -50,10 +50,8 @@ bool io_is_uring_fops(struct file *file) + + static inline void io_uring_files_cancel(void) + { +- if (current->io_uring) { +- io_uring_unreg_ringfd(); ++ if (current->io_uring) + __io_uring_cancel(false); +- } + } + static inline void io_uring_task_cancel(void) + { +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -3085,6 +3085,7 @@ __cold void io_uring_cancel_generic(bool + + void __io_uring_cancel(bool cancel_all) + { ++ io_uring_unreg_ringfd(); + io_uring_cancel_generic(cancel_all, NULL); + } + diff --git a/queue-6.1/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch b/queue-6.1/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch new file mode 100644 index 00000000000..a718db921fe --- /dev/null +++ b/queue-6.1/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch @@ -0,0 +1,59 @@ +From 9b42d1e8e4fe9dc631162c04caa69b0d1860b0f0 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 27 Nov 2024 16:43:39 -0800 +Subject: KVM: x86: Play nice with protected guests in complete_hypercall_exit() + +From: Sean Christopherson + +commit 9b42d1e8e4fe9dc631162c04caa69b0d1860b0f0 upstream. + +Use is_64_bit_hypercall() instead of is_64_bit_mode() to detect a 64-bit +hypercall when completing said hypercall. For guests with protected state, +e.g. SEV-ES and SEV-SNP, KVM must assume the hypercall was made in 64-bit +mode as the vCPU state needed to detect 64-bit mode is unavailable. + +Hacking the sev_smoke_test selftest to generate a KVM_HC_MAP_GPA_RANGE +hypercall via VMGEXIT trips the WARN: + + ------------[ cut here ]------------ + WARNING: CPU: 273 PID: 326626 at arch/x86/kvm/x86.h:180 complete_hypercall_exit+0x44/0xe0 [kvm] + Modules linked in: kvm_amd kvm ... [last unloaded: kvm] + CPU: 273 UID: 0 PID: 326626 Comm: sev_smoke_test Not tainted 6.12.0-smp--392e932fa0f3-feat #470 + Hardware name: Google Astoria/astoria, BIOS 0.20240617.0-0 06/17/2024 + RIP: 0010:complete_hypercall_exit+0x44/0xe0 [kvm] + Call Trace: + + kvm_arch_vcpu_ioctl_run+0x2400/0x2720 [kvm] + kvm_vcpu_ioctl+0x54f/0x630 [kvm] + __se_sys_ioctl+0x6b/0xc0 + do_syscall_64+0x83/0x160 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + + ---[ end trace 0000000000000000 ]--- + +Fixes: b5aead0064f3 ("KVM: x86: Assume a 64-bit hypercall for guests with protected state") +Cc: stable@vger.kernel.org +Cc: Tom Lendacky +Reviewed-by: Xiaoyao Li +Reviewed-by: Nikunj A Dadhania +Reviewed-by: Tom Lendacky +Reviewed-by: Binbin Wu +Reviewed-by: Kai Huang +Link: https://lore.kernel.org/r/20241128004344.4072099-2-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -9712,7 +9712,7 @@ static int complete_hypercall_exit(struc + { + u64 ret = vcpu->run->hypercall.ret; + +- if (!is_64_bit_mode(vcpu)) ++ if (!is_64_bit_hypercall(vcpu)) + ret = (u32)ret; + kvm_rax_write(vcpu, ret); + ++vcpu->stat.hypercalls; diff --git a/queue-6.1/selftests-bpf-use-asm-constraint-m-for-loongarch.patch b/queue-6.1/selftests-bpf-use-asm-constraint-m-for-loongarch.patch new file mode 100644 index 00000000000..a491f2993a5 --- /dev/null +++ b/queue-6.1/selftests-bpf-use-asm-constraint-m-for-loongarch.patch @@ -0,0 +1,40 @@ +From 29d44cce324dab2bd86c447071a596262e7109b6 Mon Sep 17 00:00:00 2001 +From: Tiezhu Yang +Date: Thu, 19 Dec 2024 19:15:06 +0800 +Subject: selftests/bpf: Use asm constraint "m" for LoongArch + +From: Tiezhu Yang + +commit 29d44cce324dab2bd86c447071a596262e7109b6 upstream. + +Currently, LoongArch LLVM does not support the constraint "o" and no plan +to support it, it only supports the similar constraint "m", so change the +constraints from "nor" in the "else" case to arch-specific "nmr" to avoid +the build error such as "unexpected asm memory constraint" for LoongArch. + +Fixes: 630301b0d59d ("selftests/bpf: Add basic USDT selftests") +Suggested-by: Weining Lu +Suggested-by: Li Chen +Signed-off-by: Tiezhu Yang +Signed-off-by: Daniel Borkmann +Reviewed-by: Huacai Chen +Cc: stable@vger.kernel.org +Link: https://llvm.org/docs/LangRef.html#supported-constraint-code-list +Link: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp#L172 +Link: https://lore.kernel.org/bpf/20241219111506.20643-1-yangtiezhu@loongson.cn +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/sdt.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/tools/testing/selftests/bpf/sdt.h ++++ b/tools/testing/selftests/bpf/sdt.h +@@ -102,6 +102,8 @@ + # define STAP_SDT_ARG_CONSTRAINT nZr + # elif defined __arm__ + # define STAP_SDT_ARG_CONSTRAINT g ++# elif defined __loongarch__ ++# define STAP_SDT_ARG_CONSTRAINT nmr + # else + # define STAP_SDT_ARG_CONSTRAINT nor + # endif diff --git a/queue-6.1/series b/queue-6.1/series index 550243506b5..258bfbf38ab 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -56,3 +56,12 @@ hwmon-tmp513-fix-interpretation-of-values-of-tempera.patch sh-clk-fix-clk_enable-to-return-0-on-null-clk.patch zram-refuse-to-use-zero-sized-block-device-as-backing-device.patch zram-fix-uninitialized-zram-not-releasing-backing-device.patch +btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch +drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch +kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch +tracing-fix-test_event_printk-to-process-entire-print-argument.patch +tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch +tracing-add-s-check-in-test_event_printk.patch +selftests-bpf-use-asm-constraint-m-for-loongarch.patch +io_uring-fix-registered-ring-file-refcount-leak.patch +io_uring-check-if-iowq-is-killed-before-queuing.patch diff --git a/queue-6.1/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch b/queue-6.1/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch new file mode 100644 index 00000000000..a51dce36643 --- /dev/null +++ b/queue-6.1/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch @@ -0,0 +1,78 @@ +From 917110481f6bc1c96b1e54b62bb114137fbc6d17 Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Mon, 16 Dec 2024 21:41:20 -0500 +Subject: tracing: Add missing helper functions in event pointer dereference check + +From: Steven Rostedt + +commit 917110481f6bc1c96b1e54b62bb114137fbc6d17 upstream. + +The process_pointer() helper function looks to see if various trace event +macros are used. These macros are for storing data in the event. This +makes it safe to dereference as the dereference will then point into the +event on the ring buffer where the content of the data stays with the +event itself. + +A few helper functions were missing. Those were: + + __get_rel_dynamic_array() + __get_dynamic_array_len() + __get_rel_dynamic_array_len() + __get_rel_sockaddr() + +Also add a helper function find_print_string() to not need to use a middle +man variable to test if the string exists. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Al Viro +Cc: Linus Torvalds +Link: https://lore.kernel.org/20241217024720.521836792@goodmis.org +Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -272,6 +272,15 @@ static bool test_field(const char *fmt, + return false; + } + ++/* Look for a string within an argument */ ++static bool find_print_string(const char *arg, const char *str, const char *end) ++{ ++ const char *r; ++ ++ r = strstr(arg, str); ++ return r && r < end; ++} ++ + /* Return true if the argument pointer is safe */ + static bool process_pointer(const char *fmt, int len, struct trace_event_call *call) + { +@@ -290,9 +299,17 @@ static bool process_pointer(const char * + a = strchr(fmt, '&'); + if ((a && (a < r)) || test_field(r, call)) + return true; +- } else if ((r = strstr(fmt, "__get_dynamic_array(")) && r < e) { ++ } else if (find_print_string(fmt, "__get_dynamic_array(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_sockaddr(", e)) { + return true; +- } else if ((r = strstr(fmt, "__get_sockaddr(")) && r < e) { ++ } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) { + return true; + } + return false; diff --git a/queue-6.1/tracing-add-s-check-in-test_event_printk.patch b/queue-6.1/tracing-add-s-check-in-test_event_printk.patch new file mode 100644 index 00000000000..a8e7186d5e5 --- /dev/null +++ b/queue-6.1/tracing-add-s-check-in-test_event_printk.patch @@ -0,0 +1,206 @@ +From 65a25d9f7ac02e0cf361356e834d1c71d36acca9 Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Mon, 16 Dec 2024 21:41:21 -0500 +Subject: tracing: Add "%s" check in test_event_printk() + +From: Steven Rostedt + +commit 65a25d9f7ac02e0cf361356e834d1c71d36acca9 upstream. + +The test_event_printk() code makes sure that when a trace event is +registered, any dereferenced pointers in from the event's TP_printk() are +pointing to content in the ring buffer. But currently it does not handle +"%s", as there's cases where the string pointer saved in the ring buffer +points to a static string in the kernel that will never be freed. As that +is a valid case, the pointer needs to be checked at runtime. + +Currently the runtime check is done via trace_check_vprintf(), but to not +have to replicate everything in vsnprintf() it does some logic with the +va_list that may not be reliable across architectures. In order to get rid +of that logic, more work in the test_event_printk() needs to be done. Some +of the strings can be validated at this time when it is obvious the string +is valid because the string will be saved in the ring buffer content. + +Do all the validation of strings in the ring buffer at boot in +test_event_printk(), and make sure that the field of the strings that +point into the kernel are accessible. This will allow adding checks at +runtime that will validate the fields themselves and not rely on paring +the TP_printk() format at runtime. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Al Viro +Cc: Linus Torvalds +Link: https://lore.kernel.org/20241217024720.685917008@goodmis.org +Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 104 +++++++++++++++++++++++++++++++++++++------- + 1 file changed, 89 insertions(+), 15 deletions(-) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -242,19 +242,16 @@ int trace_event_get_offsets(struct trace + return tail->offset + tail->size; + } + +-/* +- * Check if the referenced field is an array and return true, +- * as arrays are OK to dereference. +- */ +-static bool test_field(const char *fmt, struct trace_event_call *call) ++ ++static struct trace_event_fields *find_event_field(const char *fmt, ++ struct trace_event_call *call) + { + struct trace_event_fields *field = call->class->fields_array; +- const char *array_descriptor; + const char *p = fmt; + int len; + + if (!(len = str_has_prefix(fmt, "REC->"))) +- return false; ++ return NULL; + fmt += len; + for (p = fmt; *p; p++) { + if (!isalnum(*p) && *p != '_') +@@ -265,11 +262,26 @@ static bool test_field(const char *fmt, + for (; field->type; field++) { + if (strncmp(field->name, fmt, len) || field->name[len]) + continue; +- array_descriptor = strchr(field->type, '['); +- /* This is an array and is OK to dereference. */ +- return array_descriptor != NULL; ++ ++ return field; + } +- return false; ++ return NULL; ++} ++ ++/* ++ * Check if the referenced field is an array and return true, ++ * as arrays are OK to dereference. ++ */ ++static bool test_field(const char *fmt, struct trace_event_call *call) ++{ ++ struct trace_event_fields *field; ++ ++ field = find_event_field(fmt, call); ++ if (!field) ++ return false; ++ ++ /* This is an array and is OK to dereference. */ ++ return strchr(field->type, '[') != NULL; + } + + /* Look for a string within an argument */ +@@ -315,6 +327,53 @@ static bool process_pointer(const char * + return false; + } + ++/* Return true if the string is safe */ ++static bool process_string(const char *fmt, int len, struct trace_event_call *call) ++{ ++ const char *r, *e, *s; ++ ++ e = fmt + len; ++ ++ /* ++ * There are several helper functions that return strings. ++ * If the argument contains a function, then assume its field is valid. ++ * It is considered that the argument has a function if it has: ++ * alphanumeric or '_' before a parenthesis. ++ */ ++ s = fmt; ++ do { ++ r = strstr(s, "("); ++ if (!r || r >= e) ++ break; ++ for (int i = 1; r - i >= s; i++) { ++ char ch = *(r - i); ++ if (isspace(ch)) ++ continue; ++ if (isalnum(ch) || ch == '_') ++ return true; ++ /* Anything else, this isn't a function */ ++ break; ++ } ++ /* A function could be wrapped in parethesis, try the next one */ ++ s = r + 1; ++ } while (s < e); ++ ++ /* ++ * If there's any strings in the argument consider this arg OK as it ++ * could be: REC->field ? "foo" : "bar" and we don't want to get into ++ * verifying that logic here. ++ */ ++ if (find_print_string(fmt, "\"", e)) ++ return true; ++ ++ /* Dereferenced strings are also valid like any other pointer */ ++ if (process_pointer(fmt, len, call)) ++ return true; ++ ++ /* Make sure the field is found, and consider it OK for now if it is */ ++ return find_event_field(fmt, call) != NULL; ++} ++ + /* + * Examine the print fmt of the event looking for unsafe dereference + * pointers using %p* that could be recorded in the trace event and +@@ -324,6 +383,7 @@ static bool process_pointer(const char * + static void test_event_printk(struct trace_event_call *call) + { + u64 dereference_flags = 0; ++ u64 string_flags = 0; + bool first = true; + const char *fmt; + int parens = 0; +@@ -414,8 +474,16 @@ static void test_event_printk(struct tra + star = true; + continue; + } +- if ((fmt[i + j] == 's') && star) +- arg++; ++ if ((fmt[i + j] == 's')) { ++ if (star) ++ arg++; ++ if (WARN_ONCE(arg == 63, ++ "Too many args for event: %s", ++ trace_event_name(call))) ++ return; ++ dereference_flags |= 1ULL << arg; ++ string_flags |= 1ULL << arg; ++ } + break; + } + break; +@@ -462,7 +530,10 @@ static void test_event_printk(struct tra + } + + if (dereference_flags & (1ULL << arg)) { +- if (process_pointer(fmt + start_arg, e - start_arg, call)) ++ if (string_flags & (1ULL << arg)) { ++ if (process_string(fmt + start_arg, e - start_arg, call)) ++ dereference_flags &= ~(1ULL << arg); ++ } else if (process_pointer(fmt + start_arg, e - start_arg, call)) + dereference_flags &= ~(1ULL << arg); + } + +@@ -474,7 +545,10 @@ static void test_event_printk(struct tra + } + + if (dereference_flags & (1ULL << arg)) { +- if (process_pointer(fmt + start_arg, i - start_arg, call)) ++ if (string_flags & (1ULL << arg)) { ++ if (process_string(fmt + start_arg, i - start_arg, call)) ++ dereference_flags &= ~(1ULL << arg); ++ } else if (process_pointer(fmt + start_arg, i - start_arg, call)) + dereference_flags &= ~(1ULL << arg); + } + diff --git a/queue-6.1/tracing-fix-test_event_printk-to-process-entire-print-argument.patch b/queue-6.1/tracing-fix-test_event_printk-to-process-entire-print-argument.patch new file mode 100644 index 00000000000..def6642ab76 --- /dev/null +++ b/queue-6.1/tracing-fix-test_event_printk-to-process-entire-print-argument.patch @@ -0,0 +1,184 @@ +From a6629626c584200daf495cc9a740048b455addcd Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Mon, 16 Dec 2024 21:41:19 -0500 +Subject: tracing: Fix test_event_printk() to process entire print argument + +From: Steven Rostedt + +commit a6629626c584200daf495cc9a740048b455addcd upstream. + +The test_event_printk() analyzes print formats of trace events looking for +cases where it may dereference a pointer that is not in the ring buffer +which can possibly be a bug when the trace event is read from the ring +buffer and the content of that pointer no longer exists. + +The function needs to accurately go from one print format argument to the +next. It handles quotes and parenthesis that may be included in an +argument. When it finds the start of the next argument, it uses a simple +"c = strstr(fmt + i, ',')" to find the end of that argument! + +In order to include "%s" dereferencing, it needs to process the entire +content of the print format argument and not just the content of the first +',' it finds. As there may be content like: + + ({ const char *saved_ptr = trace_seq_buffer_ptr(p); static const char + *access_str[] = { "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" + }; union kvm_mmu_page_role role; role.word = REC->role; + trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" " %snxe + %sad root %u %s%c", REC->mmu_valid_gen, REC->gfn, role.level, + role.has_4_byte_gpte ? 4 : 8, role.quadrant, role.direct ? " direct" : "", + access_str[role.access], role.invalid ? " invalid" : "", role.efer_nx ? "" + : "!", role.ad_disabled ? "!" : "", REC->root_count, REC->unsync ? + "unsync" : "sync", 0); saved_ptr; }) + +Which is an example of a full argument of an existing event. As the code +already handles finding the next print format argument, process the +argument at the end of it and not the start of it. This way it has both +the start of the argument as well as the end of it. + +Add a helper function "process_pointer()" that will do the processing during +the loop as well as at the end. It also makes the code cleaner and easier +to read. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Al Viro +Cc: Linus Torvalds +Link: https://lore.kernel.org/20241217024720.362271189@goodmis.org +Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 82 ++++++++++++++++++++++++++++---------------- + 1 file changed, 53 insertions(+), 29 deletions(-) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -263,8 +263,7 @@ static bool test_field(const char *fmt, + len = p - fmt; + + for (; field->type; field++) { +- if (strncmp(field->name, fmt, len) || +- field->name[len]) ++ if (strncmp(field->name, fmt, len) || field->name[len]) + continue; + array_descriptor = strchr(field->type, '['); + /* This is an array and is OK to dereference. */ +@@ -273,6 +272,32 @@ static bool test_field(const char *fmt, + return false; + } + ++/* Return true if the argument pointer is safe */ ++static bool process_pointer(const char *fmt, int len, struct trace_event_call *call) ++{ ++ const char *r, *e, *a; ++ ++ e = fmt + len; ++ ++ /* Find the REC-> in the argument */ ++ r = strstr(fmt, "REC->"); ++ if (r && r < e) { ++ /* ++ * Addresses of events on the buffer, or an array on the buffer is ++ * OK to dereference. There's ways to fool this, but ++ * this is to catch common mistakes, not malicious code. ++ */ ++ a = strchr(fmt, '&'); ++ if ((a && (a < r)) || test_field(r, call)) ++ return true; ++ } else if ((r = strstr(fmt, "__get_dynamic_array(")) && r < e) { ++ return true; ++ } else if ((r = strstr(fmt, "__get_sockaddr(")) && r < e) { ++ return true; ++ } ++ return false; ++} ++ + /* + * Examine the print fmt of the event looking for unsafe dereference + * pointers using %p* that could be recorded in the trace event and +@@ -283,12 +308,12 @@ static void test_event_printk(struct tra + { + u64 dereference_flags = 0; + bool first = true; +- const char *fmt, *c, *r, *a; ++ const char *fmt; + int parens = 0; + char in_quote = 0; + int start_arg = 0; + int arg = 0; +- int i; ++ int i, e; + + fmt = call->print_fmt; + +@@ -401,42 +426,41 @@ static void test_event_printk(struct tra + case ',': + if (in_quote || parens) + continue; ++ e = i; + i++; + while (isspace(fmt[i])) + i++; +- start_arg = i; +- if (!(dereference_flags & (1ULL << arg))) +- goto next_arg; + +- /* Find the REC-> in the argument */ +- c = strchr(fmt + i, ','); +- r = strstr(fmt + i, "REC->"); +- if (r && (!c || r < c)) { +- /* +- * Addresses of events on the buffer, +- * or an array on the buffer is +- * OK to dereference. +- * There's ways to fool this, but +- * this is to catch common mistakes, +- * not malicious code. +- */ +- a = strchr(fmt + i, '&'); +- if ((a && (a < r)) || test_field(r, call)) ++ /* ++ * If start_arg is zero, then this is the start of the ++ * first argument. The processing of the argument happens ++ * when the end of the argument is found, as it needs to ++ * handle paranthesis and such. ++ */ ++ if (!start_arg) { ++ start_arg = i; ++ /* Balance out the i++ in the for loop */ ++ i--; ++ continue; ++ } ++ ++ if (dereference_flags & (1ULL << arg)) { ++ if (process_pointer(fmt + start_arg, e - start_arg, call)) + dereference_flags &= ~(1ULL << arg); +- } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) && +- (!c || r < c)) { +- dereference_flags &= ~(1ULL << arg); +- } else if ((r = strstr(fmt + i, "__get_sockaddr(")) && +- (!c || r < c)) { +- dereference_flags &= ~(1ULL << arg); + } + +- next_arg: +- i--; ++ start_arg = i; + arg++; ++ /* Balance out the i++ in the for loop */ ++ i--; + } + } + ++ if (dereference_flags & (1ULL << arg)) { ++ if (process_pointer(fmt + start_arg, i - start_arg, call)) ++ dereference_flags &= ~(1ULL << arg); ++ } ++ + /* + * If you triggered the below warning, the trace event reported + * uses an unsafe dereference pointer %p*. As the data stored