From 9df68376d9065bab3bdbde3061e1c3e762d4b756 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Dec 2024 12:32:11 +0100 Subject: [PATCH] 5.15-stable patches added patches: btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch tracing-add-s-check-in-test_event_printk.patch tracing-fix-test_event_printk-to-process-entire-print-argument.patch --- ...inline-extent-items-with-0-ref-count.patch | 104 +++++++++ ...ing-a-ringbuffer-not-initialized-yet.patch | 169 ++++++++++++++ ...ed-guests-in-complete_hypercall_exit.patch | 59 +++++ queue-5.15/series | 6 + ...s-in-event-pointer-dereference-check.patch | 78 +++++++ ...ing-add-s-check-in-test_event_printk.patch | 206 ++++++++++++++++++ ...ntk-to-process-entire-print-argument.patch | 184 ++++++++++++++++ 7 files changed, 806 insertions(+) create mode 100644 queue-5.15/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch create mode 100644 queue-5.15/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch create mode 100644 queue-5.15/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch create mode 100644 queue-5.15/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch create mode 100644 queue-5.15/tracing-add-s-check-in-test_event_printk.patch create mode 100644 queue-5.15/tracing-fix-test_event_printk-to-process-entire-print-argument.patch diff --git a/queue-5.15/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch b/queue-5.15/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch new file mode 100644 index 00000000000..5b5514b785f --- /dev/null +++ b/queue-5.15/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch @@ -0,0 +1,104 @@ +From dfb92681a19e1d5172420baa242806414b3eff6f Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Wed, 4 Dec 2024 13:30:46 +1030 +Subject: btrfs: tree-checker: reject inline extent items with 0 ref count + +From: Qu Wenruo + +commit dfb92681a19e1d5172420baa242806414b3eff6f upstream. + +[BUG] +There is a bug report in the mailing list where btrfs_run_delayed_refs() +failed to drop the ref count for logical 25870311358464 num_bytes +2113536. + +The involved leaf dump looks like this: + + item 166 key (25870311358464 168 2113536) itemoff 10091 itemsize 50 + extent refs 1 gen 84178 flags 1 + ref#0: shared data backref parent 32399126528000 count 0 <<< + ref#1: shared data backref parent 31808973717504 count 1 + +Notice the count number is 0. + +[CAUSE] +There is no concrete evidence yet, but considering 0 -> 1 is also a +single bit flipped, it's possible that hardware memory bitflip is +involved, causing the on-disk extent tree to be corrupted. + +[FIX] +To prevent us reading such corrupted extent item, or writing such +damaged extent item back to disk, enhance the handling of +BTRFS_EXTENT_DATA_REF_KEY and BTRFS_SHARED_DATA_REF_KEY keys for both +inlined and key items, to detect such 0 ref count and reject them. + +CC: stable@vger.kernel.org # 5.4+ +Link: https://lore.kernel.org/linux-btrfs/7c69dd49-c346-4806-86e7-e6f863a66f48@app.fastmail.com/ +Reported-by: Frankie Fisher +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 27 ++++++++++++++++++++++++++- + 1 file changed, 26 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -1416,6 +1416,11 @@ static int check_extent_item(struct exte + dref_offset, fs_info->sectorsize); + return -EUCLEAN; + } ++ if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid data ref count, should have non-zero value"); ++ return -EUCLEAN; ++ } + inline_refs += btrfs_extent_data_ref_count(leaf, dref); + break; + /* Contains parent bytenr and ref count */ +@@ -1428,6 +1433,11 @@ static int check_extent_item(struct exte + inline_offset, fs_info->sectorsize); + return -EUCLEAN; + } ++ if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid shared data ref count, should have non-zero value"); ++ return -EUCLEAN; ++ } + inline_refs += btrfs_shared_data_ref_count(leaf, sref); + break; + default: +@@ -1479,8 +1489,18 @@ static int check_simple_keyed_refs(struc + { + u32 expect_item_size = 0; + +- if (key->type == BTRFS_SHARED_DATA_REF_KEY) ++ if (key->type == BTRFS_SHARED_DATA_REF_KEY) { ++ struct btrfs_shared_data_ref *sref; ++ ++ sref = btrfs_item_ptr(leaf, slot, struct btrfs_shared_data_ref); ++ if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid shared data backref count, should have non-zero value"); ++ return -EUCLEAN; ++ } ++ + expect_item_size = sizeof(struct btrfs_shared_data_ref); ++ } + + if (unlikely(btrfs_item_size_nr(leaf, slot) != expect_item_size)) { + generic_err(leaf, slot, +@@ -1540,6 +1560,11 @@ static int check_extent_data_ref(struct + offset, leaf->fs_info->sectorsize); + return -EUCLEAN; + } ++ if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) { ++ extent_err(leaf, slot, ++ "invalid extent data backref count, should have non-zero value"); ++ return -EUCLEAN; ++ } + } + return 0; + } diff --git a/queue-5.15/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch b/queue-5.15/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch new file mode 100644 index 00000000000..40df3e8682e --- /dev/null +++ b/queue-5.15/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch @@ -0,0 +1,169 @@ +From 07a756a49f4b4290b49ea46e089cbe6f79ff8d26 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 6 Nov 2024 07:42:47 -0800 +Subject: Drivers: hv: util: Avoid accessing a ringbuffer not initialized yet + +From: Michael Kelley + +commit 07a756a49f4b4290b49ea46e089cbe6f79ff8d26 upstream. + +If the KVP (or VSS) daemon starts before the VMBus channel's ringbuffer is +fully initialized, we can hit the panic below: + +hv_utils: Registering HyperV Utility Driver +hv_vmbus: registering driver hv_utils +... +BUG: kernel NULL pointer dereference, address: 0000000000000000 +CPU: 44 UID: 0 PID: 2552 Comm: hv_kvp_daemon Tainted: G E 6.11.0-rc3+ #1 +RIP: 0010:hv_pkt_iter_first+0x12/0xd0 +Call Trace: +... + vmbus_recvpacket + hv_kvp_onchannelcallback + vmbus_on_event + tasklet_action_common + tasklet_action + handle_softirqs + irq_exit_rcu + sysvec_hyperv_stimer0 + + + asm_sysvec_hyperv_stimer0 +... + kvp_register_done + hvt_op_read + vfs_read + ksys_read + __x64_sys_read + +This can happen because the KVP/VSS channel callback can be invoked +even before the channel is fully opened: +1) as soon as hv_kvp_init() -> hvutil_transport_init() creates +/dev/vmbus/hv_kvp, the kvp daemon can open the device file immediately and +register itself to the driver by writing a message KVP_OP_REGISTER1 to the +file (which is handled by kvp_on_msg() ->kvp_handle_handshake()) and +reading the file for the driver's response, which is handled by +hvt_op_read(), which calls hvt->on_read(), i.e. kvp_register_done(). + +2) the problem with kvp_register_done() is that it can cause the +channel callback to be called even before the channel is fully opened, +and when the channel callback is starting to run, util_probe()-> +vmbus_open() may have not initialized the ringbuffer yet, so the +callback can hit the panic of NULL pointer dereference. + +To reproduce the panic consistently, we can add a "ssleep(10)" for KVP in +__vmbus_open(), just before the first hv_ringbuffer_init(), and then we +unload and reload the driver hv_utils, and run the daemon manually within +the 10 seconds. + +Fix the panic by reordering the steps in util_probe() so the char dev +entry used by the KVP or VSS daemon is not created until after +vmbus_open() has completed. This reordering prevents the race condition +from happening. + +Reported-by: Dexuan Cui +Fixes: e0fa3e5e7df6 ("Drivers: hv: utils: fix a race on userspace daemons registration") +Cc: stable@vger.kernel.org +Signed-off-by: Michael Kelley +Acked-by: Wei Liu +Link: https://lore.kernel.org/r/20241106154247.2271-3-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20241106154247.2271-3-mhklinux@outlook.com> +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hv/hv_kvp.c | 6 ++++++ + drivers/hv/hv_snapshot.c | 6 ++++++ + drivers/hv/hv_util.c | 9 +++++++++ + drivers/hv/hyperv_vmbus.h | 2 ++ + include/linux/hyperv.h | 1 + + 5 files changed, 24 insertions(+) + +--- a/drivers/hv/hv_kvp.c ++++ b/drivers/hv/hv_kvp.c +@@ -767,6 +767,12 @@ hv_kvp_init(struct hv_util_service *srv) + */ + kvp_transaction.state = HVUTIL_DEVICE_INIT; + ++ return 0; ++} ++ ++int ++hv_kvp_init_transport(void) ++{ + hvt = hvutil_transport_init(kvp_devname, CN_KVP_IDX, CN_KVP_VAL, + kvp_on_msg, kvp_on_reset); + if (!hvt) +--- a/drivers/hv/hv_snapshot.c ++++ b/drivers/hv/hv_snapshot.c +@@ -385,6 +385,12 @@ hv_vss_init(struct hv_util_service *srv) + */ + vss_transaction.state = HVUTIL_DEVICE_INIT; + ++ return 0; ++} ++ ++int ++hv_vss_init_transport(void) ++{ + hvt = hvutil_transport_init(vss_devname, CN_VSS_IDX, CN_VSS_VAL, + vss_on_msg, vss_on_reset); + if (!hvt) { +--- a/drivers/hv/hv_util.c ++++ b/drivers/hv/hv_util.c +@@ -141,6 +141,7 @@ static struct hv_util_service util_heart + static struct hv_util_service util_kvp = { + .util_cb = hv_kvp_onchannelcallback, + .util_init = hv_kvp_init, ++ .util_init_transport = hv_kvp_init_transport, + .util_pre_suspend = hv_kvp_pre_suspend, + .util_pre_resume = hv_kvp_pre_resume, + .util_deinit = hv_kvp_deinit, +@@ -149,6 +150,7 @@ static struct hv_util_service util_kvp = + static struct hv_util_service util_vss = { + .util_cb = hv_vss_onchannelcallback, + .util_init = hv_vss_init, ++ .util_init_transport = hv_vss_init_transport, + .util_pre_suspend = hv_vss_pre_suspend, + .util_pre_resume = hv_vss_pre_resume, + .util_deinit = hv_vss_deinit, +@@ -592,6 +594,13 @@ static int util_probe(struct hv_device * + if (ret) + goto error; + ++ if (srv->util_init_transport) { ++ ret = srv->util_init_transport(); ++ if (ret) { ++ vmbus_close(dev->channel); ++ goto error; ++ } ++ } + return 0; + + error: +--- a/drivers/hv/hyperv_vmbus.h ++++ b/drivers/hv/hyperv_vmbus.h +@@ -356,12 +356,14 @@ void vmbus_on_event(unsigned long data); + void vmbus_on_msg_dpc(unsigned long data); + + int hv_kvp_init(struct hv_util_service *srv); ++int hv_kvp_init_transport(void); + void hv_kvp_deinit(void); + int hv_kvp_pre_suspend(void); + int hv_kvp_pre_resume(void); + void hv_kvp_onchannelcallback(void *context); + + int hv_vss_init(struct hv_util_service *srv); ++int hv_vss_init_transport(void); + void hv_vss_deinit(void); + int hv_vss_pre_suspend(void); + int hv_vss_pre_resume(void); +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1526,6 +1526,7 @@ struct hv_util_service { + void *channel; + void (*util_cb)(void *); + int (*util_init)(struct hv_util_service *); ++ int (*util_init_transport)(void); + void (*util_deinit)(void); + int (*util_pre_suspend)(void); + int (*util_pre_resume)(void); diff --git a/queue-5.15/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch b/queue-5.15/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch new file mode 100644 index 00000000000..4dfbcc471c7 --- /dev/null +++ b/queue-5.15/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch @@ -0,0 +1,59 @@ +From 9b42d1e8e4fe9dc631162c04caa69b0d1860b0f0 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 27 Nov 2024 16:43:39 -0800 +Subject: KVM: x86: Play nice with protected guests in complete_hypercall_exit() + +From: Sean Christopherson + +commit 9b42d1e8e4fe9dc631162c04caa69b0d1860b0f0 upstream. + +Use is_64_bit_hypercall() instead of is_64_bit_mode() to detect a 64-bit +hypercall when completing said hypercall. For guests with protected state, +e.g. SEV-ES and SEV-SNP, KVM must assume the hypercall was made in 64-bit +mode as the vCPU state needed to detect 64-bit mode is unavailable. + +Hacking the sev_smoke_test selftest to generate a KVM_HC_MAP_GPA_RANGE +hypercall via VMGEXIT trips the WARN: + + ------------[ cut here ]------------ + WARNING: CPU: 273 PID: 326626 at arch/x86/kvm/x86.h:180 complete_hypercall_exit+0x44/0xe0 [kvm] + Modules linked in: kvm_amd kvm ... [last unloaded: kvm] + CPU: 273 UID: 0 PID: 326626 Comm: sev_smoke_test Not tainted 6.12.0-smp--392e932fa0f3-feat #470 + Hardware name: Google Astoria/astoria, BIOS 0.20240617.0-0 06/17/2024 + RIP: 0010:complete_hypercall_exit+0x44/0xe0 [kvm] + Call Trace: + + kvm_arch_vcpu_ioctl_run+0x2400/0x2720 [kvm] + kvm_vcpu_ioctl+0x54f/0x630 [kvm] + __se_sys_ioctl+0x6b/0xc0 + do_syscall_64+0x83/0x160 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + + ---[ end trace 0000000000000000 ]--- + +Fixes: b5aead0064f3 ("KVM: x86: Assume a 64-bit hypercall for guests with protected state") +Cc: stable@vger.kernel.org +Cc: Tom Lendacky +Reviewed-by: Xiaoyao Li +Reviewed-by: Nikunj A Dadhania +Reviewed-by: Tom Lendacky +Reviewed-by: Binbin Wu +Reviewed-by: Kai Huang +Link: https://lore.kernel.org/r/20241128004344.4072099-2-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -8946,7 +8946,7 @@ static int complete_hypercall_exit(struc + { + u64 ret = vcpu->run->hypercall.ret; + +- if (!is_64_bit_mode(vcpu)) ++ if (!is_64_bit_hypercall(vcpu)) + ret = (u32)ret; + kvm_rax_write(vcpu, ret); + ++vcpu->stat.hypercalls; diff --git a/queue-5.15/series b/queue-5.15/series index 00df4d08653..4d185080f0c 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -39,3 +39,9 @@ hwmon-tmp513-fix-current-register-value-interpretati.patch hwmon-tmp513-fix-interpretation-of-values-of-tempera.patch sh-clk-fix-clk_enable-to-return-0-on-null-clk.patch zram-refuse-to-use-zero-sized-block-device-as-backing-device.patch +btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch +drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch +kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch +tracing-fix-test_event_printk-to-process-entire-print-argument.patch +tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch +tracing-add-s-check-in-test_event_printk.patch diff --git a/queue-5.15/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch b/queue-5.15/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch new file mode 100644 index 00000000000..0c77395fdd1 --- /dev/null +++ b/queue-5.15/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch @@ -0,0 +1,78 @@ +From 917110481f6bc1c96b1e54b62bb114137fbc6d17 Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Mon, 16 Dec 2024 21:41:20 -0500 +Subject: tracing: Add missing helper functions in event pointer dereference check + +From: Steven Rostedt + +commit 917110481f6bc1c96b1e54b62bb114137fbc6d17 upstream. + +The process_pointer() helper function looks to see if various trace event +macros are used. These macros are for storing data in the event. This +makes it safe to dereference as the dereference will then point into the +event on the ring buffer where the content of the data stays with the +event itself. + +A few helper functions were missing. Those were: + + __get_rel_dynamic_array() + __get_dynamic_array_len() + __get_rel_dynamic_array_len() + __get_rel_sockaddr() + +Also add a helper function find_print_string() to not need to use a middle +man variable to test if the string exists. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Al Viro +Cc: Linus Torvalds +Link: https://lore.kernel.org/20241217024720.521836792@goodmis.org +Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -257,6 +257,15 @@ static bool test_field(const char *fmt, + return false; + } + ++/* Look for a string within an argument */ ++static bool find_print_string(const char *arg, const char *str, const char *end) ++{ ++ const char *r; ++ ++ r = strstr(arg, str); ++ return r && r < end; ++} ++ + /* Return true if the argument pointer is safe */ + static bool process_pointer(const char *fmt, int len, struct trace_event_call *call) + { +@@ -275,9 +284,17 @@ static bool process_pointer(const char * + a = strchr(fmt, '&'); + if ((a && (a < r)) || test_field(r, call)) + return true; +- } else if ((r = strstr(fmt, "__get_dynamic_array(")) && r < e) { ++ } else if (find_print_string(fmt, "__get_dynamic_array(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) { ++ return true; ++ } else if (find_print_string(fmt, "__get_sockaddr(", e)) { + return true; +- } else if ((r = strstr(fmt, "__get_sockaddr(")) && r < e) { ++ } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) { + return true; + } + return false; diff --git a/queue-5.15/tracing-add-s-check-in-test_event_printk.patch b/queue-5.15/tracing-add-s-check-in-test_event_printk.patch new file mode 100644 index 00000000000..9d0199576a3 --- /dev/null +++ b/queue-5.15/tracing-add-s-check-in-test_event_printk.patch @@ -0,0 +1,206 @@ +From 65a25d9f7ac02e0cf361356e834d1c71d36acca9 Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Mon, 16 Dec 2024 21:41:21 -0500 +Subject: tracing: Add "%s" check in test_event_printk() + +From: Steven Rostedt + +commit 65a25d9f7ac02e0cf361356e834d1c71d36acca9 upstream. + +The test_event_printk() code makes sure that when a trace event is +registered, any dereferenced pointers in from the event's TP_printk() are +pointing to content in the ring buffer. But currently it does not handle +"%s", as there's cases where the string pointer saved in the ring buffer +points to a static string in the kernel that will never be freed. As that +is a valid case, the pointer needs to be checked at runtime. + +Currently the runtime check is done via trace_check_vprintf(), but to not +have to replicate everything in vsnprintf() it does some logic with the +va_list that may not be reliable across architectures. In order to get rid +of that logic, more work in the test_event_printk() needs to be done. Some +of the strings can be validated at this time when it is obvious the string +is valid because the string will be saved in the ring buffer content. + +Do all the validation of strings in the ring buffer at boot in +test_event_printk(), and make sure that the field of the strings that +point into the kernel are accessible. This will allow adding checks at +runtime that will validate the fields themselves and not rely on paring +the TP_printk() format at runtime. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Al Viro +Cc: Linus Torvalds +Link: https://lore.kernel.org/20241217024720.685917008@goodmis.org +Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 104 +++++++++++++++++++++++++++++++++++++------- + 1 file changed, 89 insertions(+), 15 deletions(-) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -227,19 +227,16 @@ int trace_event_get_offsets(struct trace + return tail->offset + tail->size; + } + +-/* +- * Check if the referenced field is an array and return true, +- * as arrays are OK to dereference. +- */ +-static bool test_field(const char *fmt, struct trace_event_call *call) ++ ++static struct trace_event_fields *find_event_field(const char *fmt, ++ struct trace_event_call *call) + { + struct trace_event_fields *field = call->class->fields_array; +- const char *array_descriptor; + const char *p = fmt; + int len; + + if (!(len = str_has_prefix(fmt, "REC->"))) +- return false; ++ return NULL; + fmt += len; + for (p = fmt; *p; p++) { + if (!isalnum(*p) && *p != '_') +@@ -250,11 +247,26 @@ static bool test_field(const char *fmt, + for (; field->type; field++) { + if (strncmp(field->name, fmt, len) || field->name[len]) + continue; +- array_descriptor = strchr(field->type, '['); +- /* This is an array and is OK to dereference. */ +- return array_descriptor != NULL; ++ ++ return field; + } +- return false; ++ return NULL; ++} ++ ++/* ++ * Check if the referenced field is an array and return true, ++ * as arrays are OK to dereference. ++ */ ++static bool test_field(const char *fmt, struct trace_event_call *call) ++{ ++ struct trace_event_fields *field; ++ ++ field = find_event_field(fmt, call); ++ if (!field) ++ return false; ++ ++ /* This is an array and is OK to dereference. */ ++ return strchr(field->type, '[') != NULL; + } + + /* Look for a string within an argument */ +@@ -300,6 +312,53 @@ static bool process_pointer(const char * + return false; + } + ++/* Return true if the string is safe */ ++static bool process_string(const char *fmt, int len, struct trace_event_call *call) ++{ ++ const char *r, *e, *s; ++ ++ e = fmt + len; ++ ++ /* ++ * There are several helper functions that return strings. ++ * If the argument contains a function, then assume its field is valid. ++ * It is considered that the argument has a function if it has: ++ * alphanumeric or '_' before a parenthesis. ++ */ ++ s = fmt; ++ do { ++ r = strstr(s, "("); ++ if (!r || r >= e) ++ break; ++ for (int i = 1; r - i >= s; i++) { ++ char ch = *(r - i); ++ if (isspace(ch)) ++ continue; ++ if (isalnum(ch) || ch == '_') ++ return true; ++ /* Anything else, this isn't a function */ ++ break; ++ } ++ /* A function could be wrapped in parethesis, try the next one */ ++ s = r + 1; ++ } while (s < e); ++ ++ /* ++ * If there's any strings in the argument consider this arg OK as it ++ * could be: REC->field ? "foo" : "bar" and we don't want to get into ++ * verifying that logic here. ++ */ ++ if (find_print_string(fmt, "\"", e)) ++ return true; ++ ++ /* Dereferenced strings are also valid like any other pointer */ ++ if (process_pointer(fmt, len, call)) ++ return true; ++ ++ /* Make sure the field is found, and consider it OK for now if it is */ ++ return find_event_field(fmt, call) != NULL; ++} ++ + /* + * Examine the print fmt of the event looking for unsafe dereference + * pointers using %p* that could be recorded in the trace event and +@@ -309,6 +368,7 @@ static bool process_pointer(const char * + static void test_event_printk(struct trace_event_call *call) + { + u64 dereference_flags = 0; ++ u64 string_flags = 0; + bool first = true; + const char *fmt; + int parens = 0; +@@ -399,8 +459,16 @@ static void test_event_printk(struct tra + star = true; + continue; + } +- if ((fmt[i + j] == 's') && star) +- arg++; ++ if ((fmt[i + j] == 's')) { ++ if (star) ++ arg++; ++ if (WARN_ONCE(arg == 63, ++ "Too many args for event: %s", ++ trace_event_name(call))) ++ return; ++ dereference_flags |= 1ULL << arg; ++ string_flags |= 1ULL << arg; ++ } + break; + } + break; +@@ -447,7 +515,10 @@ static void test_event_printk(struct tra + } + + if (dereference_flags & (1ULL << arg)) { +- if (process_pointer(fmt + start_arg, e - start_arg, call)) ++ if (string_flags & (1ULL << arg)) { ++ if (process_string(fmt + start_arg, e - start_arg, call)) ++ dereference_flags &= ~(1ULL << arg); ++ } else if (process_pointer(fmt + start_arg, e - start_arg, call)) + dereference_flags &= ~(1ULL << arg); + } + +@@ -459,7 +530,10 @@ static void test_event_printk(struct tra + } + + if (dereference_flags & (1ULL << arg)) { +- if (process_pointer(fmt + start_arg, i - start_arg, call)) ++ if (string_flags & (1ULL << arg)) { ++ if (process_string(fmt + start_arg, i - start_arg, call)) ++ dereference_flags &= ~(1ULL << arg); ++ } else if (process_pointer(fmt + start_arg, i - start_arg, call)) + dereference_flags &= ~(1ULL << arg); + } + diff --git a/queue-5.15/tracing-fix-test_event_printk-to-process-entire-print-argument.patch b/queue-5.15/tracing-fix-test_event_printk-to-process-entire-print-argument.patch new file mode 100644 index 00000000000..19b51a265be --- /dev/null +++ b/queue-5.15/tracing-fix-test_event_printk-to-process-entire-print-argument.patch @@ -0,0 +1,184 @@ +From a6629626c584200daf495cc9a740048b455addcd Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Mon, 16 Dec 2024 21:41:19 -0500 +Subject: tracing: Fix test_event_printk() to process entire print argument + +From: Steven Rostedt + +commit a6629626c584200daf495cc9a740048b455addcd upstream. + +The test_event_printk() analyzes print formats of trace events looking for +cases where it may dereference a pointer that is not in the ring buffer +which can possibly be a bug when the trace event is read from the ring +buffer and the content of that pointer no longer exists. + +The function needs to accurately go from one print format argument to the +next. It handles quotes and parenthesis that may be included in an +argument. When it finds the start of the next argument, it uses a simple +"c = strstr(fmt + i, ',')" to find the end of that argument! + +In order to include "%s" dereferencing, it needs to process the entire +content of the print format argument and not just the content of the first +',' it finds. As there may be content like: + + ({ const char *saved_ptr = trace_seq_buffer_ptr(p); static const char + *access_str[] = { "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" + }; union kvm_mmu_page_role role; role.word = REC->role; + trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" " %snxe + %sad root %u %s%c", REC->mmu_valid_gen, REC->gfn, role.level, + role.has_4_byte_gpte ? 4 : 8, role.quadrant, role.direct ? " direct" : "", + access_str[role.access], role.invalid ? " invalid" : "", role.efer_nx ? "" + : "!", role.ad_disabled ? "!" : "", REC->root_count, REC->unsync ? + "unsync" : "sync", 0); saved_ptr; }) + +Which is an example of a full argument of an existing event. As the code +already handles finding the next print format argument, process the +argument at the end of it and not the start of it. This way it has both +the start of the argument as well as the end of it. + +Add a helper function "process_pointer()" that will do the processing during +the loop as well as at the end. It also makes the code cleaner and easier +to read. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Al Viro +Cc: Linus Torvalds +Link: https://lore.kernel.org/20241217024720.362271189@goodmis.org +Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 82 ++++++++++++++++++++++++++++---------------- + 1 file changed, 53 insertions(+), 29 deletions(-) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -248,8 +248,7 @@ static bool test_field(const char *fmt, + len = p - fmt; + + for (; field->type; field++) { +- if (strncmp(field->name, fmt, len) || +- field->name[len]) ++ if (strncmp(field->name, fmt, len) || field->name[len]) + continue; + array_descriptor = strchr(field->type, '['); + /* This is an array and is OK to dereference. */ +@@ -258,6 +257,32 @@ static bool test_field(const char *fmt, + return false; + } + ++/* Return true if the argument pointer is safe */ ++static bool process_pointer(const char *fmt, int len, struct trace_event_call *call) ++{ ++ const char *r, *e, *a; ++ ++ e = fmt + len; ++ ++ /* Find the REC-> in the argument */ ++ r = strstr(fmt, "REC->"); ++ if (r && r < e) { ++ /* ++ * Addresses of events on the buffer, or an array on the buffer is ++ * OK to dereference. There's ways to fool this, but ++ * this is to catch common mistakes, not malicious code. ++ */ ++ a = strchr(fmt, '&'); ++ if ((a && (a < r)) || test_field(r, call)) ++ return true; ++ } else if ((r = strstr(fmt, "__get_dynamic_array(")) && r < e) { ++ return true; ++ } else if ((r = strstr(fmt, "__get_sockaddr(")) && r < e) { ++ return true; ++ } ++ return false; ++} ++ + /* + * Examine the print fmt of the event looking for unsafe dereference + * pointers using %p* that could be recorded in the trace event and +@@ -268,12 +293,12 @@ static void test_event_printk(struct tra + { + u64 dereference_flags = 0; + bool first = true; +- const char *fmt, *c, *r, *a; ++ const char *fmt; + int parens = 0; + char in_quote = 0; + int start_arg = 0; + int arg = 0; +- int i; ++ int i, e; + + fmt = call->print_fmt; + +@@ -386,42 +411,41 @@ static void test_event_printk(struct tra + case ',': + if (in_quote || parens) + continue; ++ e = i; + i++; + while (isspace(fmt[i])) + i++; +- start_arg = i; +- if (!(dereference_flags & (1ULL << arg))) +- goto next_arg; + +- /* Find the REC-> in the argument */ +- c = strchr(fmt + i, ','); +- r = strstr(fmt + i, "REC->"); +- if (r && (!c || r < c)) { +- /* +- * Addresses of events on the buffer, +- * or an array on the buffer is +- * OK to dereference. +- * There's ways to fool this, but +- * this is to catch common mistakes, +- * not malicious code. +- */ +- a = strchr(fmt + i, '&'); +- if ((a && (a < r)) || test_field(r, call)) ++ /* ++ * If start_arg is zero, then this is the start of the ++ * first argument. The processing of the argument happens ++ * when the end of the argument is found, as it needs to ++ * handle paranthesis and such. ++ */ ++ if (!start_arg) { ++ start_arg = i; ++ /* Balance out the i++ in the for loop */ ++ i--; ++ continue; ++ } ++ ++ if (dereference_flags & (1ULL << arg)) { ++ if (process_pointer(fmt + start_arg, e - start_arg, call)) + dereference_flags &= ~(1ULL << arg); +- } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) && +- (!c || r < c)) { +- dereference_flags &= ~(1ULL << arg); +- } else if ((r = strstr(fmt + i, "__get_sockaddr(")) && +- (!c || r < c)) { +- dereference_flags &= ~(1ULL << arg); + } + +- next_arg: +- i--; ++ start_arg = i; + arg++; ++ /* Balance out the i++ in the for loop */ ++ i--; + } + } + ++ if (dereference_flags & (1ULL << arg)) { ++ if (process_pointer(fmt + start_arg, i - start_arg, call)) ++ dereference_flags &= ~(1ULL << arg); ++ } ++ + /* + * If you triggered the below warning, the trace event reported + * uses an unsafe dereference pointer %p*. As the data stored -- 2.47.2