]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 23 Dec 2024 11:32:31 +0000 (12:32 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 23 Dec 2024 11:32:31 +0000 (12:32 +0100)
added patches:
btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch
drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch
io_uring-check-if-iowq-is-killed-before-queuing.patch
io_uring-fix-registered-ring-file-refcount-leak.patch
kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch
selftests-bpf-use-asm-constraint-m-for-loongarch.patch
selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch
smb-client-fix-tcp-timers-deadlock-after-rmmod.patch
tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch
tracing-add-s-check-in-test_event_printk.patch
tracing-fix-test_event_printk-to-process-entire-print-argument.patch

12 files changed:
queue-6.6/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch [new file with mode: 0644]
queue-6.6/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch [new file with mode: 0644]
queue-6.6/io_uring-check-if-iowq-is-killed-before-queuing.patch [new file with mode: 0644]
queue-6.6/io_uring-fix-registered-ring-file-refcount-leak.patch [new file with mode: 0644]
queue-6.6/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch [new file with mode: 0644]
queue-6.6/selftests-bpf-use-asm-constraint-m-for-loongarch.patch [new file with mode: 0644]
queue-6.6/selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/smb-client-fix-tcp-timers-deadlock-after-rmmod.patch [new file with mode: 0644]
queue-6.6/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch [new file with mode: 0644]
queue-6.6/tracing-add-s-check-in-test_event_printk.patch [new file with mode: 0644]
queue-6.6/tracing-fix-test_event_printk-to-process-entire-print-argument.patch [new file with mode: 0644]

diff --git a/queue-6.6/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch b/queue-6.6/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch
new file mode 100644 (file)
index 0000000..c251bbc
--- /dev/null
@@ -0,0 +1,104 @@
+From dfb92681a19e1d5172420baa242806414b3eff6f Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 4 Dec 2024 13:30:46 +1030
+Subject: btrfs: tree-checker: reject inline extent items with 0 ref count
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit dfb92681a19e1d5172420baa242806414b3eff6f upstream.
+
+[BUG]
+There is a bug report in the mailing list where btrfs_run_delayed_refs()
+failed to drop the ref count for logical 25870311358464 num_bytes
+2113536.
+
+The involved leaf dump looks like this:
+
+  item 166 key (25870311358464 168 2113536) itemoff 10091 itemsize 50
+    extent refs 1 gen 84178 flags 1
+    ref#0: shared data backref parent 32399126528000 count 0 <<<
+    ref#1: shared data backref parent 31808973717504 count 1
+
+Notice the count number is 0.
+
+[CAUSE]
+There is no concrete evidence yet, but considering 0 -> 1 is also a
+single bit flipped, it's possible that hardware memory bitflip is
+involved, causing the on-disk extent tree to be corrupted.
+
+[FIX]
+To prevent us reading such corrupted extent item, or writing such
+damaged extent item back to disk, enhance the handling of
+BTRFS_EXTENT_DATA_REF_KEY and BTRFS_SHARED_DATA_REF_KEY keys for both
+inlined and key items, to detect such 0 ref count and reject them.
+
+CC: stable@vger.kernel.org # 5.4+
+Link: https://lore.kernel.org/linux-btrfs/7c69dd49-c346-4806-86e7-e6f863a66f48@app.fastmail.com/
+Reported-by: Frankie Fisher <frankie@terrorise.me.uk>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |   27 ++++++++++++++++++++++++++-
+ 1 file changed, 26 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1503,6 +1503,11 @@ static int check_extent_item(struct exte
+                                          dref_offset, fs_info->sectorsize);
+                               return -EUCLEAN;
+                       }
++                      if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
++                              extent_err(leaf, slot,
++                      "invalid data ref count, should have non-zero value");
++                              return -EUCLEAN;
++                      }
+                       inline_refs += btrfs_extent_data_ref_count(leaf, dref);
+                       break;
+               /* Contains parent bytenr and ref count */
+@@ -1515,6 +1520,11 @@ static int check_extent_item(struct exte
+                                          inline_offset, fs_info->sectorsize);
+                               return -EUCLEAN;
+                       }
++                      if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
++                              extent_err(leaf, slot,
++                      "invalid shared data ref count, should have non-zero value");
++                              return -EUCLEAN;
++                      }
+                       inline_refs += btrfs_shared_data_ref_count(leaf, sref);
+                       break;
+               default:
+@@ -1584,8 +1594,18 @@ static int check_simple_keyed_refs(struc
+ {
+       u32 expect_item_size = 0;
+-      if (key->type == BTRFS_SHARED_DATA_REF_KEY)
++      if (key->type == BTRFS_SHARED_DATA_REF_KEY) {
++              struct btrfs_shared_data_ref *sref;
++
++              sref = btrfs_item_ptr(leaf, slot, struct btrfs_shared_data_ref);
++              if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
++                      extent_err(leaf, slot,
++              "invalid shared data backref count, should have non-zero value");
++                      return -EUCLEAN;
++              }
++
+               expect_item_size = sizeof(struct btrfs_shared_data_ref);
++      }
+       if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) {
+               generic_err(leaf, slot,
+@@ -1662,6 +1682,11 @@ static int check_extent_data_ref(struct
+                                  offset, leaf->fs_info->sectorsize);
+                       return -EUCLEAN;
+               }
++              if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
++                      extent_err(leaf, slot,
++      "invalid extent data backref count, should have non-zero value");
++                      return -EUCLEAN;
++              }
+       }
+       return 0;
+ }
diff --git a/queue-6.6/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch b/queue-6.6/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch
new file mode 100644 (file)
index 0000000..7fc3bb1
--- /dev/null
@@ -0,0 +1,169 @@
+From 07a756a49f4b4290b49ea46e089cbe6f79ff8d26 Mon Sep 17 00:00:00 2001
+From: Michael Kelley <mhklinux@outlook.com>
+Date: Wed, 6 Nov 2024 07:42:47 -0800
+Subject: Drivers: hv: util: Avoid accessing a ringbuffer not initialized yet
+
+From: Michael Kelley <mhklinux@outlook.com>
+
+commit 07a756a49f4b4290b49ea46e089cbe6f79ff8d26 upstream.
+
+If the KVP (or VSS) daemon starts before the VMBus channel's ringbuffer is
+fully initialized, we can hit the panic below:
+
+hv_utils: Registering HyperV Utility Driver
+hv_vmbus: registering driver hv_utils
+...
+BUG: kernel NULL pointer dereference, address: 0000000000000000
+CPU: 44 UID: 0 PID: 2552 Comm: hv_kvp_daemon Tainted: G E 6.11.0-rc3+ #1
+RIP: 0010:hv_pkt_iter_first+0x12/0xd0
+Call Trace:
+...
+ vmbus_recvpacket
+ hv_kvp_onchannelcallback
+ vmbus_on_event
+ tasklet_action_common
+ tasklet_action
+ handle_softirqs
+ irq_exit_rcu
+ sysvec_hyperv_stimer0
+ </IRQ>
+ <TASK>
+ asm_sysvec_hyperv_stimer0
+...
+ kvp_register_done
+ hvt_op_read
+ vfs_read
+ ksys_read
+ __x64_sys_read
+
+This can happen because the KVP/VSS channel callback can be invoked
+even before the channel is fully opened:
+1) as soon as hv_kvp_init() -> hvutil_transport_init() creates
+/dev/vmbus/hv_kvp, the kvp daemon can open the device file immediately and
+register itself to the driver by writing a message KVP_OP_REGISTER1 to the
+file (which is handled by kvp_on_msg() ->kvp_handle_handshake()) and
+reading the file for the driver's response, which is handled by
+hvt_op_read(), which calls hvt->on_read(), i.e. kvp_register_done().
+
+2) the problem with kvp_register_done() is that it can cause the
+channel callback to be called even before the channel is fully opened,
+and when the channel callback is starting to run, util_probe()->
+vmbus_open() may have not initialized the ringbuffer yet, so the
+callback can hit the panic of NULL pointer dereference.
+
+To reproduce the panic consistently, we can add a "ssleep(10)" for KVP in
+__vmbus_open(), just before the first hv_ringbuffer_init(), and then we
+unload and reload the driver hv_utils, and run the daemon manually within
+the 10 seconds.
+
+Fix the panic by reordering the steps in util_probe() so the char dev
+entry used by the KVP or VSS daemon is not created until after
+vmbus_open() has completed. This reordering prevents the race condition
+from happening.
+
+Reported-by: Dexuan Cui <decui@microsoft.com>
+Fixes: e0fa3e5e7df6 ("Drivers: hv: utils: fix a race on userspace daemons registration")
+Cc: stable@vger.kernel.org
+Signed-off-by: Michael Kelley <mhklinux@outlook.com>
+Acked-by: Wei Liu <wei.liu@kernel.org>
+Link: https://lore.kernel.org/r/20241106154247.2271-3-mhklinux@outlook.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Message-ID: <20241106154247.2271-3-mhklinux@outlook.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hv/hv_kvp.c       |    6 ++++++
+ drivers/hv/hv_snapshot.c  |    6 ++++++
+ drivers/hv/hv_util.c      |    9 +++++++++
+ drivers/hv/hyperv_vmbus.h |    2 ++
+ include/linux/hyperv.h    |    1 +
+ 5 files changed, 24 insertions(+)
+
+--- a/drivers/hv/hv_kvp.c
++++ b/drivers/hv/hv_kvp.c
+@@ -767,6 +767,12 @@ hv_kvp_init(struct hv_util_service *srv)
+        */
+       kvp_transaction.state = HVUTIL_DEVICE_INIT;
++      return 0;
++}
++
++int
++hv_kvp_init_transport(void)
++{
+       hvt = hvutil_transport_init(kvp_devname, CN_KVP_IDX, CN_KVP_VAL,
+                                   kvp_on_msg, kvp_on_reset);
+       if (!hvt)
+--- a/drivers/hv/hv_snapshot.c
++++ b/drivers/hv/hv_snapshot.c
+@@ -388,6 +388,12 @@ hv_vss_init(struct hv_util_service *srv)
+        */
+       vss_transaction.state = HVUTIL_DEVICE_INIT;
++      return 0;
++}
++
++int
++hv_vss_init_transport(void)
++{
+       hvt = hvutil_transport_init(vss_devname, CN_VSS_IDX, CN_VSS_VAL,
+                                   vss_on_msg, vss_on_reset);
+       if (!hvt) {
+--- a/drivers/hv/hv_util.c
++++ b/drivers/hv/hv_util.c
+@@ -141,6 +141,7 @@ static struct hv_util_service util_heart
+ static struct hv_util_service util_kvp = {
+       .util_cb = hv_kvp_onchannelcallback,
+       .util_init = hv_kvp_init,
++      .util_init_transport = hv_kvp_init_transport,
+       .util_pre_suspend = hv_kvp_pre_suspend,
+       .util_pre_resume = hv_kvp_pre_resume,
+       .util_deinit = hv_kvp_deinit,
+@@ -149,6 +150,7 @@ static struct hv_util_service util_kvp =
+ static struct hv_util_service util_vss = {
+       .util_cb = hv_vss_onchannelcallback,
+       .util_init = hv_vss_init,
++      .util_init_transport = hv_vss_init_transport,
+       .util_pre_suspend = hv_vss_pre_suspend,
+       .util_pre_resume = hv_vss_pre_resume,
+       .util_deinit = hv_vss_deinit,
+@@ -592,6 +594,13 @@ static int util_probe(struct hv_device *
+       if (ret)
+               goto error;
++      if (srv->util_init_transport) {
++              ret = srv->util_init_transport();
++              if (ret) {
++                      vmbus_close(dev->channel);
++                      goto error;
++              }
++      }
+       return 0;
+ error:
+--- a/drivers/hv/hyperv_vmbus.h
++++ b/drivers/hv/hyperv_vmbus.h
+@@ -370,12 +370,14 @@ void vmbus_on_event(unsigned long data);
+ void vmbus_on_msg_dpc(unsigned long data);
+ int hv_kvp_init(struct hv_util_service *srv);
++int hv_kvp_init_transport(void);
+ void hv_kvp_deinit(void);
+ int hv_kvp_pre_suspend(void);
+ int hv_kvp_pre_resume(void);
+ void hv_kvp_onchannelcallback(void *context);
+ int hv_vss_init(struct hv_util_service *srv);
++int hv_vss_init_transport(void);
+ void hv_vss_deinit(void);
+ int hv_vss_pre_suspend(void);
+ int hv_vss_pre_resume(void);
+--- a/include/linux/hyperv.h
++++ b/include/linux/hyperv.h
+@@ -1561,6 +1561,7 @@ struct hv_util_service {
+       void *channel;
+       void (*util_cb)(void *);
+       int (*util_init)(struct hv_util_service *);
++      int (*util_init_transport)(void);
+       void (*util_deinit)(void);
+       int (*util_pre_suspend)(void);
+       int (*util_pre_resume)(void);
diff --git a/queue-6.6/io_uring-check-if-iowq-is-killed-before-queuing.patch b/queue-6.6/io_uring-check-if-iowq-is-killed-before-queuing.patch
new file mode 100644 (file)
index 0000000..c6534ac
--- /dev/null
@@ -0,0 +1,46 @@
+From dbd2ca9367eb19bc5e269b8c58b0b1514ada9156 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Thu, 19 Dec 2024 19:52:58 +0000
+Subject: io_uring: check if iowq is killed before queuing
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit dbd2ca9367eb19bc5e269b8c58b0b1514ada9156 upstream.
+
+task work can be executed after the task has gone through io_uring
+termination, whether it's the final task_work run or the fallback path.
+In this case, task work will find ->io_wq being already killed and
+null'ed, which is a problem if it then tries to forward the request to
+io_queue_iowq(). Make io_queue_iowq() fail requests in this case.
+
+Note that it also checks PF_KTHREAD, because the user can first close
+a DEFER_TASKRUN ring and shortly after kill the task, in which case
+->iowq check would race.
+
+Cc: stable@vger.kernel.org
+Fixes: 50c52250e2d74 ("block: implement async io_uring discard cmd")
+Fixes: 773af69121ecc ("io_uring: always reissue from task_work context")
+Reported-by: Will <willsroot@protonmail.com>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/63312b4a2c2bb67ad67b857d17a300e1d3b078e8.1734637909.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -498,7 +498,11 @@ void io_queue_iowq(struct io_kiocb *req,
+       struct io_uring_task *tctx = req->task->io_uring;
+       BUG_ON(!tctx);
+-      BUG_ON(!tctx->io_wq);
++
++      if ((current->flags & PF_KTHREAD) || !tctx->io_wq) {
++              io_req_task_queue_fail(req, -ECANCELED);
++              return;
++      }
+       /* init ->work of the whole link before punting */
+       io_prep_async_link(req);
diff --git a/queue-6.6/io_uring-fix-registered-ring-file-refcount-leak.patch b/queue-6.6/io_uring-fix-registered-ring-file-refcount-leak.patch
new file mode 100644 (file)
index 0000000..b15af2a
--- /dev/null
@@ -0,0 +1,64 @@
+From 12d908116f7efd34f255a482b9afc729d7a5fb78 Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Wed, 18 Dec 2024 17:56:25 +0100
+Subject: io_uring: Fix registered ring file refcount leak
+
+From: Jann Horn <jannh@google.com>
+
+commit 12d908116f7efd34f255a482b9afc729d7a5fb78 upstream.
+
+Currently, io_uring_unreg_ringfd() (which cleans up registered rings) is
+only called on exit, but __io_uring_free (which frees the tctx in which the
+registered ring pointers are stored) is also called on execve (via
+begin_new_exec -> io_uring_task_cancel -> __io_uring_cancel ->
+io_uring_cancel_generic -> __io_uring_free).
+
+This means: A process going through execve while having registered rings
+will leak references to the rings' `struct file`.
+
+Fix it by zapping registered rings on execve(). This is implemented by
+moving the io_uring_unreg_ringfd() from io_uring_files_cancel() into its
+callee __io_uring_cancel(), which is called from io_uring_task_cancel() on
+execve.
+
+This could probably be exploited *on 32-bit kernels* by leaking 2^32
+references to the same ring, because the file refcount is stored in a
+pointer-sized field and get_file() doesn't have protection against
+refcount overflow, just a WARN_ONCE(); but on 64-bit it should have no
+impact beyond a memory leak.
+
+Cc: stable@vger.kernel.org
+Fixes: e7a6c00dc77a ("io_uring: add support for registering ring file descriptors")
+Signed-off-by: Jann Horn <jannh@google.com>
+Link: https://lore.kernel.org/r/20241218-uring-reg-ring-cleanup-v1-1-8f63e999045b@google.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/io_uring.h |    4 +---
+ io_uring/io_uring.c      |    1 +
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+--- a/include/linux/io_uring.h
++++ b/include/linux/io_uring.h
+@@ -65,10 +65,8 @@ static inline void io_uring_cmd_complete
+ static inline void io_uring_files_cancel(void)
+ {
+-      if (current->io_uring) {
+-              io_uring_unreg_ringfd();
++      if (current->io_uring)
+               __io_uring_cancel(false);
+-      }
+ }
+ static inline void io_uring_task_cancel(void)
+ {
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3431,6 +3431,7 @@ end_wait:
+ void __io_uring_cancel(bool cancel_all)
+ {
++      io_uring_unreg_ringfd();
+       io_uring_cancel_generic(cancel_all, NULL);
+ }
diff --git a/queue-6.6/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch b/queue-6.6/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch
new file mode 100644 (file)
index 0000000..49c4656
--- /dev/null
@@ -0,0 +1,59 @@
+From 9b42d1e8e4fe9dc631162c04caa69b0d1860b0f0 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 27 Nov 2024 16:43:39 -0800
+Subject: KVM: x86: Play nice with protected guests in complete_hypercall_exit()
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 9b42d1e8e4fe9dc631162c04caa69b0d1860b0f0 upstream.
+
+Use is_64_bit_hypercall() instead of is_64_bit_mode() to detect a 64-bit
+hypercall when completing said hypercall.  For guests with protected state,
+e.g. SEV-ES and SEV-SNP, KVM must assume the hypercall was made in 64-bit
+mode as the vCPU state needed to detect 64-bit mode is unavailable.
+
+Hacking the sev_smoke_test selftest to generate a KVM_HC_MAP_GPA_RANGE
+hypercall via VMGEXIT trips the WARN:
+
+  ------------[ cut here ]------------
+  WARNING: CPU: 273 PID: 326626 at arch/x86/kvm/x86.h:180 complete_hypercall_exit+0x44/0xe0 [kvm]
+  Modules linked in: kvm_amd kvm ... [last unloaded: kvm]
+  CPU: 273 UID: 0 PID: 326626 Comm: sev_smoke_test Not tainted 6.12.0-smp--392e932fa0f3-feat #470
+  Hardware name: Google Astoria/astoria, BIOS 0.20240617.0-0 06/17/2024
+  RIP: 0010:complete_hypercall_exit+0x44/0xe0 [kvm]
+  Call Trace:
+   <TASK>
+   kvm_arch_vcpu_ioctl_run+0x2400/0x2720 [kvm]
+   kvm_vcpu_ioctl+0x54f/0x630 [kvm]
+   __se_sys_ioctl+0x6b/0xc0
+   do_syscall_64+0x83/0x160
+   entry_SYSCALL_64_after_hwframe+0x76/0x7e
+   </TASK>
+  ---[ end trace 0000000000000000 ]---
+
+Fixes: b5aead0064f3 ("KVM: x86: Assume a 64-bit hypercall for guests with protected state")
+Cc: stable@vger.kernel.org
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
+Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
+Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
+Reviewed-by: Kai Huang <kai.huang@intel.com>
+Link: https://lore.kernel.org/r/20241128004344.4072099-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9825,7 +9825,7 @@ static int complete_hypercall_exit(struc
+ {
+       u64 ret = vcpu->run->hypercall.ret;
+-      if (!is_64_bit_mode(vcpu))
++      if (!is_64_bit_hypercall(vcpu))
+               ret = (u32)ret;
+       kvm_rax_write(vcpu, ret);
+       ++vcpu->stat.hypercalls;
diff --git a/queue-6.6/selftests-bpf-use-asm-constraint-m-for-loongarch.patch b/queue-6.6/selftests-bpf-use-asm-constraint-m-for-loongarch.patch
new file mode 100644 (file)
index 0000000..a491f29
--- /dev/null
@@ -0,0 +1,40 @@
+From 29d44cce324dab2bd86c447071a596262e7109b6 Mon Sep 17 00:00:00 2001
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+Date: Thu, 19 Dec 2024 19:15:06 +0800
+Subject: selftests/bpf: Use asm constraint "m" for LoongArch
+
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+commit 29d44cce324dab2bd86c447071a596262e7109b6 upstream.
+
+Currently, LoongArch LLVM does not support the constraint "o" and no plan
+to support it, it only supports the similar constraint "m", so change the
+constraints from "nor" in the "else" case to arch-specific "nmr" to avoid
+the build error such as "unexpected asm memory constraint" for LoongArch.
+
+Fixes: 630301b0d59d ("selftests/bpf: Add basic USDT selftests")
+Suggested-by: Weining Lu <luweining@loongson.cn>
+Suggested-by: Li Chen <chenli@loongson.cn>
+Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Huacai Chen <chenhuacai@loongson.cn>
+Cc: stable@vger.kernel.org
+Link: https://llvm.org/docs/LangRef.html#supported-constraint-code-list
+Link: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp#L172
+Link: https://lore.kernel.org/bpf/20241219111506.20643-1-yangtiezhu@loongson.cn
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/bpf/sdt.h |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/tools/testing/selftests/bpf/sdt.h
++++ b/tools/testing/selftests/bpf/sdt.h
+@@ -102,6 +102,8 @@
+ # define STAP_SDT_ARG_CONSTRAINT        nZr
+ # elif defined __arm__
+ # define STAP_SDT_ARG_CONSTRAINT        g
++# elif defined __loongarch__
++# define STAP_SDT_ARG_CONSTRAINT        nmr
+ # else
+ # define STAP_SDT_ARG_CONSTRAINT        nor
+ # endif
diff --git a/queue-6.6/selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch b/queue-6.6/selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch
new file mode 100644 (file)
index 0000000..8fe7969
--- /dev/null
@@ -0,0 +1,71 @@
+From 6a75f19af16ff482cfd6085c77123aa0f464f8dd Mon Sep 17 00:00:00 2001
+From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
+Date: Thu, 5 Dec 2024 11:29:41 -0800
+Subject: selftests/memfd: run sysctl tests when PID namespace support is enabled
+
+From: Isaac J. Manjarres <isaacmanjarres@google.com>
+
+commit 6a75f19af16ff482cfd6085c77123aa0f464f8dd upstream.
+
+The sysctl tests for vm.memfd_noexec rely on the kernel to support PID
+namespaces (i.e.  the kernel is built with CONFIG_PID_NS=y).  If the
+kernel the test runs on does not support PID namespaces, the first sysctl
+test will fail when attempting to spawn a new thread in a new PID
+namespace, abort the test, preventing the remaining tests from being run.
+
+This is not desirable, as not all kernels need PID namespaces, but can
+still use the other features provided by memfd.  Therefore, only run the
+sysctl tests if the kernel supports PID namespaces.  Otherwise, skip those
+tests and emit an informative message to let the user know why the sysctl
+tests are not being run.
+
+Link: https://lkml.kernel.org/r/20241205192943.3228757-1-isaacmanjarres@google.com
+Fixes: 11f75a01448f ("selftests/memfd: add tests for MFD_NOEXEC_SEAL MFD_EXEC")
+Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
+Reviewed-by: Jeff Xu <jeffxu@google.com>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Kalesh Singh <kaleshsingh@google.com>
+Cc: <stable@vger.kernel.org>   [6.6+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/memfd/memfd_test.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/tools/testing/selftests/memfd/memfd_test.c
++++ b/tools/testing/selftests/memfd/memfd_test.c
+@@ -9,6 +9,7 @@
+ #include <fcntl.h>
+ #include <linux/memfd.h>
+ #include <sched.h>
++#include <stdbool.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <signal.h>
+@@ -1567,6 +1568,11 @@ static void test_share_fork(char *banner
+       close(fd);
+ }
++static bool pid_ns_supported(void)
++{
++      return access("/proc/self/ns/pid", F_OK) == 0;
++}
++
+ int main(int argc, char **argv)
+ {
+       pid_t pid;
+@@ -1601,8 +1607,12 @@ int main(int argc, char **argv)
+       test_seal_grow();
+       test_seal_resize();
+-      test_sysctl_simple();
+-      test_sysctl_nested();
++      if (pid_ns_supported()) {
++              test_sysctl_simple();
++              test_sysctl_nested();
++      } else {
++              printf("PID namespaces are not supported; skipping sysctl tests\n");
++      }
+       test_share_dup("SHARE-DUP", "");
+       test_share_mmap("SHARE-MMAP", "");
index 350dd44cf36a97ba33d89e47c25958b7aa8c75b9..d6bc2b86e54ae4c823d0989d376776c1d16496d7 100644 (file)
@@ -85,3 +85,14 @@ hwmon-tmp513-fix-interpretation-of-values-of-tempera.patch
 zram-refuse-to-use-zero-sized-block-device-as-backing-device.patch
 zram-fix-uninitialized-zram-not-releasing-backing-device.patch
 vmalloc-fix-accounting-with-i915.patch
+btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch
+drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch
+kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch
+smb-client-fix-tcp-timers-deadlock-after-rmmod.patch
+tracing-fix-test_event_printk-to-process-entire-print-argument.patch
+tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch
+tracing-add-s-check-in-test_event_printk.patch
+selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch
+selftests-bpf-use-asm-constraint-m-for-loongarch.patch
+io_uring-fix-registered-ring-file-refcount-leak.patch
+io_uring-check-if-iowq-is-killed-before-queuing.patch
diff --git a/queue-6.6/smb-client-fix-tcp-timers-deadlock-after-rmmod.patch b/queue-6.6/smb-client-fix-tcp-timers-deadlock-after-rmmod.patch
new file mode 100644 (file)
index 0000000..3274f9a
--- /dev/null
@@ -0,0 +1,182 @@
+From e9f2517a3e18a54a3943c098d2226b245d488801 Mon Sep 17 00:00:00 2001
+From: Enzo Matsumiya <ematsumiya@suse.de>
+Date: Tue, 10 Dec 2024 18:15:12 -0300
+Subject: smb: client: fix TCP timers deadlock after rmmod
+
+From: Enzo Matsumiya <ematsumiya@suse.de>
+
+commit e9f2517a3e18a54a3943c098d2226b245d488801 upstream.
+
+Commit ef7134c7fc48 ("smb: client: Fix use-after-free of network namespace.")
+fixed a netns UAF by manually enabled socket refcounting
+(sk->sk_net_refcnt=1 and sock_inuse_add(net, 1)).
+
+The reason the patch worked for that bug was because we now hold
+references to the netns (get_net_track() gets a ref internally)
+and they're properly released (internally, on __sk_destruct()),
+but only because sk->sk_net_refcnt was set.
+
+Problem:
+(this happens regardless of CONFIG_NET_NS_REFCNT_TRACKER and regardless
+if init_net or other)
+
+Setting sk->sk_net_refcnt=1 *manually* and *after* socket creation is not
+only out of cifs scope, but also technically wrong -- it's set conditionally
+based on user (=1) vs kernel (=0) sockets.  And net/ implementations
+seem to base their user vs kernel space operations on it.
+
+e.g. upon TCP socket close, the TCP timers are not cleared because
+sk->sk_net_refcnt=1:
+(cf. commit 151c9c724d05 ("tcp: properly terminate timers for kernel sockets"))
+
+net/ipv4/tcp.c:
+    void tcp_close(struct sock *sk, long timeout)
+    {
+       lock_sock(sk);
+       __tcp_close(sk, timeout);
+       release_sock(sk);
+       if (!sk->sk_net_refcnt)
+               inet_csk_clear_xmit_timers_sync(sk);
+       sock_put(sk);
+    }
+
+Which will throw a lockdep warning and then, as expected, deadlock on
+tcp_write_timer().
+
+A way to reproduce this is by running the reproducer from ef7134c7fc48
+and then 'rmmod cifs'.  A few seconds later, the deadlock/lockdep
+warning shows up.
+
+Fix:
+We shouldn't mess with socket internals ourselves, so do not set
+sk_net_refcnt manually.
+
+Also change __sock_create() to sock_create_kern() for explicitness.
+
+As for non-init_net network namespaces, we deal with it the best way
+we can -- hold an extra netns reference for server->ssocket and drop it
+when it's released.  This ensures that the netns still exists whenever
+we need to create/destroy server->ssocket, but is not directly tied to
+it.
+
+Fixes: ef7134c7fc48 ("smb: client: Fix use-after-free of network namespace.")
+Cc: stable@vger.kernel.org
+Signed-off-by: Enzo Matsumiya <ematsumiya@suse.de>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/client/connect.c |   36 ++++++++++++++++++++++++++----------
+ 1 file changed, 26 insertions(+), 10 deletions(-)
+
+--- a/fs/smb/client/connect.c
++++ b/fs/smb/client/connect.c
+@@ -1003,9 +1003,13 @@ clean_demultiplex_info(struct TCP_Server
+       msleep(125);
+       if (cifs_rdma_enabled(server))
+               smbd_destroy(server);
++
+       if (server->ssocket) {
+               sock_release(server->ssocket);
+               server->ssocket = NULL;
++
++              /* Release netns reference for the socket. */
++              put_net(cifs_net_ns(server));
+       }
+       if (!list_empty(&server->pending_mid_q)) {
+@@ -1054,6 +1058,7 @@ clean_demultiplex_info(struct TCP_Server
+                */
+       }
++      /* Release netns reference for this server. */
+       put_net(cifs_net_ns(server));
+       kfree(server->leaf_fullpath);
+       kfree(server);
+@@ -1726,6 +1731,8 @@ cifs_get_tcp_session(struct smb3_fs_cont
+       tcp_ses->ops = ctx->ops;
+       tcp_ses->vals = ctx->vals;
++
++      /* Grab netns reference for this server. */
+       cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
+       tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId);
+@@ -1857,6 +1864,7 @@ smbd_connected:
+ out_err_crypto_release:
+       cifs_crypto_secmech_release(tcp_ses);
++      /* Release netns reference for this server. */
+       put_net(cifs_net_ns(tcp_ses));
+ out_err:
+@@ -1865,8 +1873,10 @@ out_err:
+                       cifs_put_tcp_session(tcp_ses->primary_server, false);
+               kfree(tcp_ses->hostname);
+               kfree(tcp_ses->leaf_fullpath);
+-              if (tcp_ses->ssocket)
++              if (tcp_ses->ssocket) {
+                       sock_release(tcp_ses->ssocket);
++                      put_net(cifs_net_ns(tcp_ses));
++              }
+               kfree(tcp_ses);
+       }
+       return ERR_PTR(rc);
+@@ -3120,20 +3130,20 @@ generic_ip_connect(struct TCP_Server_Inf
+               socket = server->ssocket;
+       } else {
+               struct net *net = cifs_net_ns(server);
+-              struct sock *sk;
+-              rc = __sock_create(net, sfamily, SOCK_STREAM,
+-                                 IPPROTO_TCP, &server->ssocket, 1);
++              rc = sock_create_kern(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket);
+               if (rc < 0) {
+                       cifs_server_dbg(VFS, "Error %d creating socket\n", rc);
+                       return rc;
+               }
+-              sk = server->ssocket->sk;
+-              __netns_tracker_free(net, &sk->ns_tracker, false);
+-              sk->sk_net_refcnt = 1;
+-              get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+-              sock_inuse_add(net, 1);
++              /*
++               * Grab netns reference for the socket.
++               *
++               * It'll be released here, on error, or in clean_demultiplex_info() upon server
++               * teardown.
++               */
++              get_net(net);
+               /* BB other socket options to set KEEPALIVE, NODELAY? */
+               cifs_dbg(FYI, "Socket created\n");
+@@ -3147,8 +3157,10 @@ generic_ip_connect(struct TCP_Server_Inf
+       }
+       rc = bind_socket(server);
+-      if (rc < 0)
++      if (rc < 0) {
++              put_net(cifs_net_ns(server));
+               return rc;
++      }
+       /*
+        * Eventually check for other socket options to change from
+@@ -3185,6 +3197,7 @@ generic_ip_connect(struct TCP_Server_Inf
+       if (rc < 0) {
+               cifs_dbg(FYI, "Error %d connecting to server\n", rc);
+               trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc);
++              put_net(cifs_net_ns(server));
+               sock_release(socket);
+               server->ssocket = NULL;
+               return rc;
+@@ -3193,6 +3206,9 @@ generic_ip_connect(struct TCP_Server_Inf
+       if (sport == htons(RFC1001_PORT))
+               rc = ip_rfc1001_connect(server);
++      if (rc < 0)
++              put_net(cifs_net_ns(server));
++
+       return rc;
+ }
diff --git a/queue-6.6/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch b/queue-6.6/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch
new file mode 100644 (file)
index 0000000..5ea5104
--- /dev/null
@@ -0,0 +1,78 @@
+From 917110481f6bc1c96b1e54b62bb114137fbc6d17 Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Mon, 16 Dec 2024 21:41:20 -0500
+Subject: tracing: Add missing helper functions in event pointer dereference check
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit 917110481f6bc1c96b1e54b62bb114137fbc6d17 upstream.
+
+The process_pointer() helper function looks to see if various trace event
+macros are used. These macros are for storing data in the event. This
+makes it safe to dereference as the dereference will then point into the
+event on the ring buffer where the content of the data stays with the
+event itself.
+
+A few helper functions were missing. Those were:
+
+  __get_rel_dynamic_array()
+  __get_dynamic_array_len()
+  __get_rel_dynamic_array_len()
+  __get_rel_sockaddr()
+
+Also add a helper function find_print_string() to not need to use a middle
+man variable to test if the string exists.
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/20241217024720.521836792@goodmis.org
+Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events.c |   21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -274,6 +274,15 @@ static bool test_field(const char *fmt,
+       return false;
+ }
++/* Look for a string within an argument */
++static bool find_print_string(const char *arg, const char *str, const char *end)
++{
++      const char *r;
++
++      r = strstr(arg, str);
++      return r && r < end;
++}
++
+ /* Return true if the argument pointer is safe */
+ static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
+ {
+@@ -292,9 +301,17 @@ static bool process_pointer(const char *
+               a = strchr(fmt, '&');
+               if ((a && (a < r)) || test_field(r, call))
+                       return true;
+-      } else if ((r = strstr(fmt, "__get_dynamic_array(")) && r < e) {
++      } else if (find_print_string(fmt, "__get_dynamic_array(", e)) {
++              return true;
++      } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) {
++              return true;
++      } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) {
++              return true;
++      } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) {
++              return true;
++      } else if (find_print_string(fmt, "__get_sockaddr(", e)) {
+               return true;
+-      } else if ((r = strstr(fmt, "__get_sockaddr(")) && r < e) {
++      } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) {
+               return true;
+       }
+       return false;
diff --git a/queue-6.6/tracing-add-s-check-in-test_event_printk.patch b/queue-6.6/tracing-add-s-check-in-test_event_printk.patch
new file mode 100644 (file)
index 0000000..5c30d99
--- /dev/null
@@ -0,0 +1,206 @@
+From 65a25d9f7ac02e0cf361356e834d1c71d36acca9 Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Mon, 16 Dec 2024 21:41:21 -0500
+Subject: tracing: Add "%s" check in test_event_printk()
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit 65a25d9f7ac02e0cf361356e834d1c71d36acca9 upstream.
+
+The test_event_printk() code makes sure that when a trace event is
+registered, any dereferenced pointers in from the event's TP_printk() are
+pointing to content in the ring buffer. But currently it does not handle
+"%s", as there's cases where the string pointer saved in the ring buffer
+points to a static string in the kernel that will never be freed. As that
+is a valid case, the pointer needs to be checked at runtime.
+
+Currently the runtime check is done via trace_check_vprintf(), but to not
+have to replicate everything in vsnprintf() it does some logic with the
+va_list that may not be reliable across architectures. In order to get rid
+of that logic, more work in the test_event_printk() needs to be done. Some
+of the strings can be validated at this time when it is obvious the string
+is valid because the string will be saved in the ring buffer content.
+
+Do all the validation of strings in the ring buffer at boot in
+test_event_printk(), and make sure that the field of the strings that
+point into the kernel are accessible. This will allow adding checks at
+runtime that will validate the fields themselves and not rely on paring
+the TP_printk() format at runtime.
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/20241217024720.685917008@goodmis.org
+Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events.c |  104 +++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 89 insertions(+), 15 deletions(-)
+
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -244,19 +244,16 @@ int trace_event_get_offsets(struct trace
+       return tail->offset + tail->size;
+ }
+-/*
+- * Check if the referenced field is an array and return true,
+- * as arrays are OK to dereference.
+- */
+-static bool test_field(const char *fmt, struct trace_event_call *call)
++
++static struct trace_event_fields *find_event_field(const char *fmt,
++                                                 struct trace_event_call *call)
+ {
+       struct trace_event_fields *field = call->class->fields_array;
+-      const char *array_descriptor;
+       const char *p = fmt;
+       int len;
+       if (!(len = str_has_prefix(fmt, "REC->")))
+-              return false;
++              return NULL;
+       fmt += len;
+       for (p = fmt; *p; p++) {
+               if (!isalnum(*p) && *p != '_')
+@@ -267,11 +264,26 @@ static bool test_field(const char *fmt,
+       for (; field->type; field++) {
+               if (strncmp(field->name, fmt, len) || field->name[len])
+                       continue;
+-              array_descriptor = strchr(field->type, '[');
+-              /* This is an array and is OK to dereference. */
+-              return array_descriptor != NULL;
++
++              return field;
+       }
+-      return false;
++      return NULL;
++}
++
++/*
++ * Check if the referenced field is an array and return true,
++ * as arrays are OK to dereference.
++ */
++static bool test_field(const char *fmt, struct trace_event_call *call)
++{
++      struct trace_event_fields *field;
++
++      field = find_event_field(fmt, call);
++      if (!field)
++              return false;
++
++      /* This is an array and is OK to dereference. */
++      return strchr(field->type, '[') != NULL;
+ }
+ /* Look for a string within an argument */
+@@ -317,6 +329,53 @@ static bool process_pointer(const char *
+       return false;
+ }
++/* Return true if the string is safe */
++static bool process_string(const char *fmt, int len, struct trace_event_call *call)
++{
++      const char *r, *e, *s;
++
++      e = fmt + len;
++
++      /*
++       * There are several helper functions that return strings.
++       * If the argument contains a function, then assume its field is valid.
++       * It is considered that the argument has a function if it has:
++       *   alphanumeric or '_' before a parenthesis.
++       */
++      s = fmt;
++      do {
++              r = strstr(s, "(");
++              if (!r || r >= e)
++                      break;
++              for (int i = 1; r - i >= s; i++) {
++                      char ch = *(r - i);
++                      if (isspace(ch))
++                              continue;
++                      if (isalnum(ch) || ch == '_')
++                              return true;
++                      /* Anything else, this isn't a function */
++                      break;
++              }
++              /* A function could be wrapped in parethesis, try the next one */
++              s = r + 1;
++      } while (s < e);
++
++      /*
++       * If there's any strings in the argument consider this arg OK as it
++       * could be: REC->field ? "foo" : "bar" and we don't want to get into
++       * verifying that logic here.
++       */
++      if (find_print_string(fmt, "\"", e))
++              return true;
++
++      /* Dereferenced strings are also valid like any other pointer */
++      if (process_pointer(fmt, len, call))
++              return true;
++
++      /* Make sure the field is found, and consider it OK for now if it is */
++      return find_event_field(fmt, call) != NULL;
++}
++
+ /*
+  * Examine the print fmt of the event looking for unsafe dereference
+  * pointers using %p* that could be recorded in the trace event and
+@@ -326,6 +385,7 @@ static bool process_pointer(const char *
+ static void test_event_printk(struct trace_event_call *call)
+ {
+       u64 dereference_flags = 0;
++      u64 string_flags = 0;
+       bool first = true;
+       const char *fmt;
+       int parens = 0;
+@@ -416,8 +476,16 @@ static void test_event_printk(struct tra
+                                               star = true;
+                                               continue;
+                                       }
+-                                      if ((fmt[i + j] == 's') && star)
+-                                              arg++;
++                                      if ((fmt[i + j] == 's')) {
++                                              if (star)
++                                                      arg++;
++                                              if (WARN_ONCE(arg == 63,
++                                                            "Too many args for event: %s",
++                                                            trace_event_name(call)))
++                                                      return;
++                                              dereference_flags |= 1ULL << arg;
++                                              string_flags |= 1ULL << arg;
++                                      }
+                                       break;
+                               }
+                               break;
+@@ -464,7 +532,10 @@ static void test_event_printk(struct tra
+                       }
+                       if (dereference_flags & (1ULL << arg)) {
+-                              if (process_pointer(fmt + start_arg, e - start_arg, call))
++                              if (string_flags & (1ULL << arg)) {
++                                      if (process_string(fmt + start_arg, e - start_arg, call))
++                                              dereference_flags &= ~(1ULL << arg);
++                              } else if (process_pointer(fmt + start_arg, e - start_arg, call))
+                                       dereference_flags &= ~(1ULL << arg);
+                       }
+@@ -476,7 +547,10 @@ static void test_event_printk(struct tra
+       }
+       if (dereference_flags & (1ULL << arg)) {
+-              if (process_pointer(fmt + start_arg, i - start_arg, call))
++              if (string_flags & (1ULL << arg)) {
++                      if (process_string(fmt + start_arg, i - start_arg, call))
++                              dereference_flags &= ~(1ULL << arg);
++              } else if (process_pointer(fmt + start_arg, i - start_arg, call))
+                       dereference_flags &= ~(1ULL << arg);
+       }
diff --git a/queue-6.6/tracing-fix-test_event_printk-to-process-entire-print-argument.patch b/queue-6.6/tracing-fix-test_event_printk-to-process-entire-print-argument.patch
new file mode 100644 (file)
index 0000000..ebd2328
--- /dev/null
@@ -0,0 +1,184 @@
+From a6629626c584200daf495cc9a740048b455addcd Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Mon, 16 Dec 2024 21:41:19 -0500
+Subject: tracing: Fix test_event_printk() to process entire print argument
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit a6629626c584200daf495cc9a740048b455addcd upstream.
+
+The test_event_printk() analyzes print formats of trace events looking for
+cases where it may dereference a pointer that is not in the ring buffer
+which can possibly be a bug when the trace event is read from the ring
+buffer and the content of that pointer no longer exists.
+
+The function needs to accurately go from one print format argument to the
+next. It handles quotes and parenthesis that may be included in an
+argument. When it finds the start of the next argument, it uses a simple
+"c = strstr(fmt + i, ',')" to find the end of that argument!
+
+In order to include "%s" dereferencing, it needs to process the entire
+content of the print format argument and not just the content of the first
+',' it finds. As there may be content like:
+
+ ({ const char *saved_ptr = trace_seq_buffer_ptr(p); static const char
+   *access_str[] = { "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"
+   }; union kvm_mmu_page_role role; role.word = REC->role;
+   trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" " %snxe
+   %sad root %u %s%c", REC->mmu_valid_gen, REC->gfn, role.level,
+   role.has_4_byte_gpte ? 4 : 8, role.quadrant, role.direct ? " direct" : "",
+   access_str[role.access], role.invalid ? " invalid" : "", role.efer_nx ? ""
+   : "!", role.ad_disabled ? "!" : "", REC->root_count, REC->unsync ?
+   "unsync" : "sync", 0); saved_ptr; })
+
+Which is an example of a full argument of an existing event. As the code
+already handles finding the next print format argument, process the
+argument at the end of it and not the start of it. This way it has both
+the start of the argument as well as the end of it.
+
+Add a helper function "process_pointer()" that will do the processing during
+the loop as well as at the end. It also makes the code cleaner and easier
+to read.
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/20241217024720.362271189@goodmis.org
+Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events.c |   82 ++++++++++++++++++++++++++++----------------
+ 1 file changed, 53 insertions(+), 29 deletions(-)
+
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -265,8 +265,7 @@ static bool test_field(const char *fmt,
+       len = p - fmt;
+       for (; field->type; field++) {
+-              if (strncmp(field->name, fmt, len) ||
+-                  field->name[len])
++              if (strncmp(field->name, fmt, len) || field->name[len])
+                       continue;
+               array_descriptor = strchr(field->type, '[');
+               /* This is an array and is OK to dereference. */
+@@ -275,6 +274,32 @@ static bool test_field(const char *fmt,
+       return false;
+ }
++/* Return true if the argument pointer is safe */
++static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
++{
++      const char *r, *e, *a;
++
++      e = fmt + len;
++
++      /* Find the REC-> in the argument */
++      r = strstr(fmt, "REC->");
++      if (r && r < e) {
++              /*
++               * Addresses of events on the buffer, or an array on the buffer is
++               * OK to dereference. There's ways to fool this, but
++               * this is to catch common mistakes, not malicious code.
++               */
++              a = strchr(fmt, '&');
++              if ((a && (a < r)) || test_field(r, call))
++                      return true;
++      } else if ((r = strstr(fmt, "__get_dynamic_array(")) && r < e) {
++              return true;
++      } else if ((r = strstr(fmt, "__get_sockaddr(")) && r < e) {
++              return true;
++      }
++      return false;
++}
++
+ /*
+  * Examine the print fmt of the event looking for unsafe dereference
+  * pointers using %p* that could be recorded in the trace event and
+@@ -285,12 +310,12 @@ static void test_event_printk(struct tra
+ {
+       u64 dereference_flags = 0;
+       bool first = true;
+-      const char *fmt, *c, *r, *a;
++      const char *fmt;
+       int parens = 0;
+       char in_quote = 0;
+       int start_arg = 0;
+       int arg = 0;
+-      int i;
++      int i, e;
+       fmt = call->print_fmt;
+@@ -403,42 +428,41 @@ static void test_event_printk(struct tra
+               case ',':
+                       if (in_quote || parens)
+                               continue;
++                      e = i;
+                       i++;
+                       while (isspace(fmt[i]))
+                               i++;
+-                      start_arg = i;
+-                      if (!(dereference_flags & (1ULL << arg)))
+-                              goto next_arg;
+-                      /* Find the REC-> in the argument */
+-                      c = strchr(fmt + i, ',');
+-                      r = strstr(fmt + i, "REC->");
+-                      if (r && (!c || r < c)) {
+-                              /*
+-                               * Addresses of events on the buffer,
+-                               * or an array on the buffer is
+-                               * OK to dereference.
+-                               * There's ways to fool this, but
+-                               * this is to catch common mistakes,
+-                               * not malicious code.
+-                               */
+-                              a = strchr(fmt + i, '&');
+-                              if ((a && (a < r)) || test_field(r, call))
++                      /*
++                       * If start_arg is zero, then this is the start of the
++                       * first argument. The processing of the argument happens
++                       * when the end of the argument is found, as it needs to
++                       * handle paranthesis and such.
++                       */
++                      if (!start_arg) {
++                              start_arg = i;
++                              /* Balance out the i++ in the for loop */
++                              i--;
++                              continue;
++                      }
++
++                      if (dereference_flags & (1ULL << arg)) {
++                              if (process_pointer(fmt + start_arg, e - start_arg, call))
+                                       dereference_flags &= ~(1ULL << arg);
+-                      } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) &&
+-                                 (!c || r < c)) {
+-                              dereference_flags &= ~(1ULL << arg);
+-                      } else if ((r = strstr(fmt + i, "__get_sockaddr(")) &&
+-                                 (!c || r < c)) {
+-                              dereference_flags &= ~(1ULL << arg);
+                       }
+-              next_arg:
+-                      i--;
++                      start_arg = i;
+                       arg++;
++                      /* Balance out the i++ in the for loop */
++                      i--;
+               }
+       }
++      if (dereference_flags & (1ULL << arg)) {
++              if (process_pointer(fmt + start_arg, i - start_arg, call))
++                      dereference_flags &= ~(1ULL << arg);
++      }
++
+       /*
+        * If you triggered the below warning, the trace event reported
+        * uses an unsafe dereference pointer %p*. As the data stored