]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 23 Dec 2024 11:28:37 +0000 (12:28 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 23 Dec 2024 11:28:37 +0000 (12:28 +0100)
added patches:
btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch
drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch
io_uring-check-if-iowq-is-killed-before-queuing.patch
io_uring-fix-registered-ring-file-refcount-leak.patch
kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch
selftests-bpf-use-asm-constraint-m-for-loongarch.patch
tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch
tracing-add-s-check-in-test_event_printk.patch
tracing-fix-test_event_printk-to-process-entire-print-argument.patch

queue-6.1/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch [new file with mode: 0644]
queue-6.1/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch [new file with mode: 0644]
queue-6.1/io_uring-check-if-iowq-is-killed-before-queuing.patch [new file with mode: 0644]
queue-6.1/io_uring-fix-registered-ring-file-refcount-leak.patch [new file with mode: 0644]
queue-6.1/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch [new file with mode: 0644]
queue-6.1/selftests-bpf-use-asm-constraint-m-for-loongarch.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch [new file with mode: 0644]
queue-6.1/tracing-add-s-check-in-test_event_printk.patch [new file with mode: 0644]
queue-6.1/tracing-fix-test_event_printk-to-process-entire-print-argument.patch [new file with mode: 0644]

diff --git a/queue-6.1/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch b/queue-6.1/btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch
new file mode 100644 (file)
index 0000000..0cab2c7
--- /dev/null
@@ -0,0 +1,104 @@
+From dfb92681a19e1d5172420baa242806414b3eff6f Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 4 Dec 2024 13:30:46 +1030
+Subject: btrfs: tree-checker: reject inline extent items with 0 ref count
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit dfb92681a19e1d5172420baa242806414b3eff6f upstream.
+
+[BUG]
+There is a bug report in the mailing list where btrfs_run_delayed_refs()
+failed to drop the ref count for logical 25870311358464 num_bytes
+2113536.
+
+The involved leaf dump looks like this:
+
+  item 166 key (25870311358464 168 2113536) itemoff 10091 itemsize 50
+    extent refs 1 gen 84178 flags 1
+    ref#0: shared data backref parent 32399126528000 count 0 <<<
+    ref#1: shared data backref parent 31808973717504 count 1
+
+Notice the count number is 0.
+
+[CAUSE]
+There is no concrete evidence yet, but considering 0 -> 1 is also a
+single bit flipped, it's possible that hardware memory bitflip is
+involved, causing the on-disk extent tree to be corrupted.
+
+[FIX]
+To prevent us reading such corrupted extent item, or writing such
+damaged extent item back to disk, enhance the handling of
+BTRFS_EXTENT_DATA_REF_KEY and BTRFS_SHARED_DATA_REF_KEY keys for both
+inlined and key items, to detect such 0 ref count and reject them.
+
+CC: stable@vger.kernel.org # 5.4+
+Link: https://lore.kernel.org/linux-btrfs/7c69dd49-c346-4806-86e7-e6f863a66f48@app.fastmail.com/
+Reported-by: Frankie Fisher <frankie@terrorise.me.uk>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |   27 ++++++++++++++++++++++++++-
+ 1 file changed, 26 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1433,6 +1433,11 @@ static int check_extent_item(struct exte
+                                          dref_offset, fs_info->sectorsize);
+                               return -EUCLEAN;
+                       }
++                      if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
++                              extent_err(leaf, slot,
++                      "invalid data ref count, should have non-zero value");
++                              return -EUCLEAN;
++                      }
+                       inline_refs += btrfs_extent_data_ref_count(leaf, dref);
+                       break;
+               /* Contains parent bytenr and ref count */
+@@ -1445,6 +1450,11 @@ static int check_extent_item(struct exte
+                                          inline_offset, fs_info->sectorsize);
+                               return -EUCLEAN;
+                       }
++                      if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
++                              extent_err(leaf, slot,
++                      "invalid shared data ref count, should have non-zero value");
++                              return -EUCLEAN;
++                      }
+                       inline_refs += btrfs_shared_data_ref_count(leaf, sref);
+                       break;
+               default:
+@@ -1496,8 +1506,18 @@ static int check_simple_keyed_refs(struc
+ {
+       u32 expect_item_size = 0;
+-      if (key->type == BTRFS_SHARED_DATA_REF_KEY)
++      if (key->type == BTRFS_SHARED_DATA_REF_KEY) {
++              struct btrfs_shared_data_ref *sref;
++
++              sref = btrfs_item_ptr(leaf, slot, struct btrfs_shared_data_ref);
++              if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
++                      extent_err(leaf, slot,
++              "invalid shared data backref count, should have non-zero value");
++                      return -EUCLEAN;
++              }
++
+               expect_item_size = sizeof(struct btrfs_shared_data_ref);
++      }
+       if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) {
+               generic_err(leaf, slot,
+@@ -1557,6 +1577,11 @@ static int check_extent_data_ref(struct
+                                  offset, leaf->fs_info->sectorsize);
+                       return -EUCLEAN;
+               }
++              if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
++                      extent_err(leaf, slot,
++      "invalid extent data backref count, should have non-zero value");
++                      return -EUCLEAN;
++              }
+       }
+       return 0;
+ }
diff --git a/queue-6.1/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch b/queue-6.1/drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch
new file mode 100644 (file)
index 0000000..bd5390a
--- /dev/null
@@ -0,0 +1,169 @@
+From 07a756a49f4b4290b49ea46e089cbe6f79ff8d26 Mon Sep 17 00:00:00 2001
+From: Michael Kelley <mhklinux@outlook.com>
+Date: Wed, 6 Nov 2024 07:42:47 -0800
+Subject: Drivers: hv: util: Avoid accessing a ringbuffer not initialized yet
+
+From: Michael Kelley <mhklinux@outlook.com>
+
+commit 07a756a49f4b4290b49ea46e089cbe6f79ff8d26 upstream.
+
+If the KVP (or VSS) daemon starts before the VMBus channel's ringbuffer is
+fully initialized, we can hit the panic below:
+
+hv_utils: Registering HyperV Utility Driver
+hv_vmbus: registering driver hv_utils
+...
+BUG: kernel NULL pointer dereference, address: 0000000000000000
+CPU: 44 UID: 0 PID: 2552 Comm: hv_kvp_daemon Tainted: G E 6.11.0-rc3+ #1
+RIP: 0010:hv_pkt_iter_first+0x12/0xd0
+Call Trace:
+...
+ vmbus_recvpacket
+ hv_kvp_onchannelcallback
+ vmbus_on_event
+ tasklet_action_common
+ tasklet_action
+ handle_softirqs
+ irq_exit_rcu
+ sysvec_hyperv_stimer0
+ </IRQ>
+ <TASK>
+ asm_sysvec_hyperv_stimer0
+...
+ kvp_register_done
+ hvt_op_read
+ vfs_read
+ ksys_read
+ __x64_sys_read
+
+This can happen because the KVP/VSS channel callback can be invoked
+even before the channel is fully opened:
+1) as soon as hv_kvp_init() -> hvutil_transport_init() creates
+/dev/vmbus/hv_kvp, the kvp daemon can open the device file immediately and
+register itself to the driver by writing a message KVP_OP_REGISTER1 to the
+file (which is handled by kvp_on_msg() ->kvp_handle_handshake()) and
+reading the file for the driver's response, which is handled by
+hvt_op_read(), which calls hvt->on_read(), i.e. kvp_register_done().
+
+2) the problem with kvp_register_done() is that it can cause the
+channel callback to be called even before the channel is fully opened,
+and when the channel callback is starting to run, util_probe()->
+vmbus_open() may have not initialized the ringbuffer yet, so the
+callback can hit the panic of NULL pointer dereference.
+
+To reproduce the panic consistently, we can add a "ssleep(10)" for KVP in
+__vmbus_open(), just before the first hv_ringbuffer_init(), and then we
+unload and reload the driver hv_utils, and run the daemon manually within
+the 10 seconds.
+
+Fix the panic by reordering the steps in util_probe() so the char dev
+entry used by the KVP or VSS daemon is not created until after
+vmbus_open() has completed. This reordering prevents the race condition
+from happening.
+
+Reported-by: Dexuan Cui <decui@microsoft.com>
+Fixes: e0fa3e5e7df6 ("Drivers: hv: utils: fix a race on userspace daemons registration")
+Cc: stable@vger.kernel.org
+Signed-off-by: Michael Kelley <mhklinux@outlook.com>
+Acked-by: Wei Liu <wei.liu@kernel.org>
+Link: https://lore.kernel.org/r/20241106154247.2271-3-mhklinux@outlook.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Message-ID: <20241106154247.2271-3-mhklinux@outlook.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hv/hv_kvp.c       |    6 ++++++
+ drivers/hv/hv_snapshot.c  |    6 ++++++
+ drivers/hv/hv_util.c      |    9 +++++++++
+ drivers/hv/hyperv_vmbus.h |    2 ++
+ include/linux/hyperv.h    |    1 +
+ 5 files changed, 24 insertions(+)
+
+--- a/drivers/hv/hv_kvp.c
++++ b/drivers/hv/hv_kvp.c
+@@ -767,6 +767,12 @@ hv_kvp_init(struct hv_util_service *srv)
+        */
+       kvp_transaction.state = HVUTIL_DEVICE_INIT;
++      return 0;
++}
++
++int
++hv_kvp_init_transport(void)
++{
+       hvt = hvutil_transport_init(kvp_devname, CN_KVP_IDX, CN_KVP_VAL,
+                                   kvp_on_msg, kvp_on_reset);
+       if (!hvt)
+--- a/drivers/hv/hv_snapshot.c
++++ b/drivers/hv/hv_snapshot.c
+@@ -388,6 +388,12 @@ hv_vss_init(struct hv_util_service *srv)
+        */
+       vss_transaction.state = HVUTIL_DEVICE_INIT;
++      return 0;
++}
++
++int
++hv_vss_init_transport(void)
++{
+       hvt = hvutil_transport_init(vss_devname, CN_VSS_IDX, CN_VSS_VAL,
+                                   vss_on_msg, vss_on_reset);
+       if (!hvt) {
+--- a/drivers/hv/hv_util.c
++++ b/drivers/hv/hv_util.c
+@@ -141,6 +141,7 @@ static struct hv_util_service util_heart
+ static struct hv_util_service util_kvp = {
+       .util_cb = hv_kvp_onchannelcallback,
+       .util_init = hv_kvp_init,
++      .util_init_transport = hv_kvp_init_transport,
+       .util_pre_suspend = hv_kvp_pre_suspend,
+       .util_pre_resume = hv_kvp_pre_resume,
+       .util_deinit = hv_kvp_deinit,
+@@ -149,6 +150,7 @@ static struct hv_util_service util_kvp =
+ static struct hv_util_service util_vss = {
+       .util_cb = hv_vss_onchannelcallback,
+       .util_init = hv_vss_init,
++      .util_init_transport = hv_vss_init_transport,
+       .util_pre_suspend = hv_vss_pre_suspend,
+       .util_pre_resume = hv_vss_pre_resume,
+       .util_deinit = hv_vss_deinit,
+@@ -592,6 +594,13 @@ static int util_probe(struct hv_device *
+       if (ret)
+               goto error;
++      if (srv->util_init_transport) {
++              ret = srv->util_init_transport();
++              if (ret) {
++                      vmbus_close(dev->channel);
++                      goto error;
++              }
++      }
+       return 0;
+ error:
+--- a/drivers/hv/hyperv_vmbus.h
++++ b/drivers/hv/hyperv_vmbus.h
+@@ -365,12 +365,14 @@ void vmbus_on_event(unsigned long data);
+ void vmbus_on_msg_dpc(unsigned long data);
+ int hv_kvp_init(struct hv_util_service *srv);
++int hv_kvp_init_transport(void);
+ void hv_kvp_deinit(void);
+ int hv_kvp_pre_suspend(void);
+ int hv_kvp_pre_resume(void);
+ void hv_kvp_onchannelcallback(void *context);
+ int hv_vss_init(struct hv_util_service *srv);
++int hv_vss_init_transport(void);
+ void hv_vss_deinit(void);
+ int hv_vss_pre_suspend(void);
+ int hv_vss_pre_resume(void);
+--- a/include/linux/hyperv.h
++++ b/include/linux/hyperv.h
+@@ -1567,6 +1567,7 @@ struct hv_util_service {
+       void *channel;
+       void (*util_cb)(void *);
+       int (*util_init)(struct hv_util_service *);
++      int (*util_init_transport)(void);
+       void (*util_deinit)(void);
+       int (*util_pre_suspend)(void);
+       int (*util_pre_resume)(void);
diff --git a/queue-6.1/io_uring-check-if-iowq-is-killed-before-queuing.patch b/queue-6.1/io_uring-check-if-iowq-is-killed-before-queuing.patch
new file mode 100644 (file)
index 0000000..f13278c
--- /dev/null
@@ -0,0 +1,46 @@
+From dbd2ca9367eb19bc5e269b8c58b0b1514ada9156 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Thu, 19 Dec 2024 19:52:58 +0000
+Subject: io_uring: check if iowq is killed before queuing
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit dbd2ca9367eb19bc5e269b8c58b0b1514ada9156 upstream.
+
+task work can be executed after the task has gone through io_uring
+termination, whether it's the final task_work run or the fallback path.
+In this case, task work will find ->io_wq being already killed and
+null'ed, which is a problem if it then tries to forward the request to
+io_queue_iowq(). Make io_queue_iowq() fail requests in this case.
+
+Note that it also checks PF_KTHREAD, because the user can first close
+a DEFER_TASKRUN ring and shortly after kill the task, in which case
+->iowq check would race.
+
+Cc: stable@vger.kernel.org
+Fixes: 50c52250e2d74 ("block: implement async io_uring discard cmd")
+Fixes: 773af69121ecc ("io_uring: always reissue from task_work context")
+Reported-by: Will <willsroot@protonmail.com>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/63312b4a2c2bb67ad67b857d17a300e1d3b078e8.1734637909.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -440,7 +440,11 @@ void io_queue_iowq(struct io_kiocb *req,
+       struct io_uring_task *tctx = req->task->io_uring;
+       BUG_ON(!tctx);
+-      BUG_ON(!tctx->io_wq);
++
++      if ((current->flags & PF_KTHREAD) || !tctx->io_wq) {
++              io_req_task_queue_fail(req, -ECANCELED);
++              return;
++      }
+       /* init ->work of the whole link before punting */
+       io_prep_async_link(req);
diff --git a/queue-6.1/io_uring-fix-registered-ring-file-refcount-leak.patch b/queue-6.1/io_uring-fix-registered-ring-file-refcount-leak.patch
new file mode 100644 (file)
index 0000000..6ba3572
--- /dev/null
@@ -0,0 +1,64 @@
+From 12d908116f7efd34f255a482b9afc729d7a5fb78 Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Wed, 18 Dec 2024 17:56:25 +0100
+Subject: io_uring: Fix registered ring file refcount leak
+
+From: Jann Horn <jannh@google.com>
+
+commit 12d908116f7efd34f255a482b9afc729d7a5fb78 upstream.
+
+Currently, io_uring_unreg_ringfd() (which cleans up registered rings) is
+only called on exit, but __io_uring_free (which frees the tctx in which the
+registered ring pointers are stored) is also called on execve (via
+begin_new_exec -> io_uring_task_cancel -> __io_uring_cancel ->
+io_uring_cancel_generic -> __io_uring_free).
+
+This means: A process going through execve while having registered rings
+will leak references to the rings' `struct file`.
+
+Fix it by zapping registered rings on execve(). This is implemented by
+moving the io_uring_unreg_ringfd() from io_uring_files_cancel() into its
+callee __io_uring_cancel(), which is called from io_uring_task_cancel() on
+execve.
+
+This could probably be exploited *on 32-bit kernels* by leaking 2^32
+references to the same ring, because the file refcount is stored in a
+pointer-sized field and get_file() doesn't have protection against
+refcount overflow, just a WARN_ONCE(); but on 64-bit it should have no
+impact beyond a memory leak.
+
+Cc: stable@vger.kernel.org
+Fixes: e7a6c00dc77a ("io_uring: add support for registering ring file descriptors")
+Signed-off-by: Jann Horn <jannh@google.com>
+Link: https://lore.kernel.org/r/20241218-uring-reg-ring-cleanup-v1-1-8f63e999045b@google.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/io_uring.h |    4 +---
+ io_uring/io_uring.c      |    1 +
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+--- a/include/linux/io_uring.h
++++ b/include/linux/io_uring.h
+@@ -50,10 +50,8 @@ bool io_is_uring_fops(struct file *file)
+ static inline void io_uring_files_cancel(void)
+ {
+-      if (current->io_uring) {
+-              io_uring_unreg_ringfd();
++      if (current->io_uring)
+               __io_uring_cancel(false);
+-      }
+ }
+ static inline void io_uring_task_cancel(void)
+ {
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3085,6 +3085,7 @@ __cold void io_uring_cancel_generic(bool
+ void __io_uring_cancel(bool cancel_all)
+ {
++      io_uring_unreg_ringfd();
+       io_uring_cancel_generic(cancel_all, NULL);
+ }
diff --git a/queue-6.1/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch b/queue-6.1/kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch
new file mode 100644 (file)
index 0000000..a718db9
--- /dev/null
@@ -0,0 +1,59 @@
+From 9b42d1e8e4fe9dc631162c04caa69b0d1860b0f0 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 27 Nov 2024 16:43:39 -0800
+Subject: KVM: x86: Play nice with protected guests in complete_hypercall_exit()
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 9b42d1e8e4fe9dc631162c04caa69b0d1860b0f0 upstream.
+
+Use is_64_bit_hypercall() instead of is_64_bit_mode() to detect a 64-bit
+hypercall when completing said hypercall.  For guests with protected state,
+e.g. SEV-ES and SEV-SNP, KVM must assume the hypercall was made in 64-bit
+mode as the vCPU state needed to detect 64-bit mode is unavailable.
+
+Hacking the sev_smoke_test selftest to generate a KVM_HC_MAP_GPA_RANGE
+hypercall via VMGEXIT trips the WARN:
+
+  ------------[ cut here ]------------
+  WARNING: CPU: 273 PID: 326626 at arch/x86/kvm/x86.h:180 complete_hypercall_exit+0x44/0xe0 [kvm]
+  Modules linked in: kvm_amd kvm ... [last unloaded: kvm]
+  CPU: 273 UID: 0 PID: 326626 Comm: sev_smoke_test Not tainted 6.12.0-smp--392e932fa0f3-feat #470
+  Hardware name: Google Astoria/astoria, BIOS 0.20240617.0-0 06/17/2024
+  RIP: 0010:complete_hypercall_exit+0x44/0xe0 [kvm]
+  Call Trace:
+   <TASK>
+   kvm_arch_vcpu_ioctl_run+0x2400/0x2720 [kvm]
+   kvm_vcpu_ioctl+0x54f/0x630 [kvm]
+   __se_sys_ioctl+0x6b/0xc0
+   do_syscall_64+0x83/0x160
+   entry_SYSCALL_64_after_hwframe+0x76/0x7e
+   </TASK>
+  ---[ end trace 0000000000000000 ]---
+
+Fixes: b5aead0064f3 ("KVM: x86: Assume a 64-bit hypercall for guests with protected state")
+Cc: stable@vger.kernel.org
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
+Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
+Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
+Reviewed-by: Kai Huang <kai.huang@intel.com>
+Link: https://lore.kernel.org/r/20241128004344.4072099-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9712,7 +9712,7 @@ static int complete_hypercall_exit(struc
+ {
+       u64 ret = vcpu->run->hypercall.ret;
+-      if (!is_64_bit_mode(vcpu))
++      if (!is_64_bit_hypercall(vcpu))
+               ret = (u32)ret;
+       kvm_rax_write(vcpu, ret);
+       ++vcpu->stat.hypercalls;
diff --git a/queue-6.1/selftests-bpf-use-asm-constraint-m-for-loongarch.patch b/queue-6.1/selftests-bpf-use-asm-constraint-m-for-loongarch.patch
new file mode 100644 (file)
index 0000000..a491f29
--- /dev/null
@@ -0,0 +1,40 @@
+From 29d44cce324dab2bd86c447071a596262e7109b6 Mon Sep 17 00:00:00 2001
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+Date: Thu, 19 Dec 2024 19:15:06 +0800
+Subject: selftests/bpf: Use asm constraint "m" for LoongArch
+
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+commit 29d44cce324dab2bd86c447071a596262e7109b6 upstream.
+
+Currently, LoongArch LLVM does not support the constraint "o" and no plan
+to support it, it only supports the similar constraint "m", so change the
+constraints from "nor" in the "else" case to arch-specific "nmr" to avoid
+the build error such as "unexpected asm memory constraint" for LoongArch.
+
+Fixes: 630301b0d59d ("selftests/bpf: Add basic USDT selftests")
+Suggested-by: Weining Lu <luweining@loongson.cn>
+Suggested-by: Li Chen <chenli@loongson.cn>
+Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Huacai Chen <chenhuacai@loongson.cn>
+Cc: stable@vger.kernel.org
+Link: https://llvm.org/docs/LangRef.html#supported-constraint-code-list
+Link: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp#L172
+Link: https://lore.kernel.org/bpf/20241219111506.20643-1-yangtiezhu@loongson.cn
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/bpf/sdt.h |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/tools/testing/selftests/bpf/sdt.h
++++ b/tools/testing/selftests/bpf/sdt.h
+@@ -102,6 +102,8 @@
+ # define STAP_SDT_ARG_CONSTRAINT        nZr
+ # elif defined __arm__
+ # define STAP_SDT_ARG_CONSTRAINT        g
++# elif defined __loongarch__
++# define STAP_SDT_ARG_CONSTRAINT        nmr
+ # else
+ # define STAP_SDT_ARG_CONSTRAINT        nor
+ # endif
index 550243506b5a4d9cac6ddc8b3003bbf43503fe6c..258bfbf38ab88c9b1a6ef67c0313459d608962d2 100644 (file)
@@ -56,3 +56,12 @@ hwmon-tmp513-fix-interpretation-of-values-of-tempera.patch
 sh-clk-fix-clk_enable-to-return-0-on-null-clk.patch
 zram-refuse-to-use-zero-sized-block-device-as-backing-device.patch
 zram-fix-uninitialized-zram-not-releasing-backing-device.patch
+btrfs-tree-checker-reject-inline-extent-items-with-0-ref-count.patch
+drivers-hv-util-avoid-accessing-a-ringbuffer-not-initialized-yet.patch
+kvm-x86-play-nice-with-protected-guests-in-complete_hypercall_exit.patch
+tracing-fix-test_event_printk-to-process-entire-print-argument.patch
+tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch
+tracing-add-s-check-in-test_event_printk.patch
+selftests-bpf-use-asm-constraint-m-for-loongarch.patch
+io_uring-fix-registered-ring-file-refcount-leak.patch
+io_uring-check-if-iowq-is-killed-before-queuing.patch
diff --git a/queue-6.1/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch b/queue-6.1/tracing-add-missing-helper-functions-in-event-pointer-dereference-check.patch
new file mode 100644 (file)
index 0000000..a51dce3
--- /dev/null
@@ -0,0 +1,78 @@
+From 917110481f6bc1c96b1e54b62bb114137fbc6d17 Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Mon, 16 Dec 2024 21:41:20 -0500
+Subject: tracing: Add missing helper functions in event pointer dereference check
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit 917110481f6bc1c96b1e54b62bb114137fbc6d17 upstream.
+
+The process_pointer() helper function looks to see if various trace event
+macros are used. These macros are for storing data in the event. This
+makes it safe to dereference as the dereference will then point into the
+event on the ring buffer where the content of the data stays with the
+event itself.
+
+A few helper functions were missing. Those were:
+
+  __get_rel_dynamic_array()
+  __get_dynamic_array_len()
+  __get_rel_dynamic_array_len()
+  __get_rel_sockaddr()
+
+Also add a helper function find_print_string() to not need to use a middle
+man variable to test if the string exists.
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/20241217024720.521836792@goodmis.org
+Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events.c |   21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -272,6 +272,15 @@ static bool test_field(const char *fmt,
+       return false;
+ }
++/* Look for a string within an argument */
++static bool find_print_string(const char *arg, const char *str, const char *end)
++{
++      const char *r;
++
++      r = strstr(arg, str);
++      return r && r < end;
++}
++
+ /* Return true if the argument pointer is safe */
+ static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
+ {
+@@ -290,9 +299,17 @@ static bool process_pointer(const char *
+               a = strchr(fmt, '&');
+               if ((a && (a < r)) || test_field(r, call))
+                       return true;
+-      } else if ((r = strstr(fmt, "__get_dynamic_array(")) && r < e) {
++      } else if (find_print_string(fmt, "__get_dynamic_array(", e)) {
++              return true;
++      } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) {
++              return true;
++      } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) {
++              return true;
++      } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) {
++              return true;
++      } else if (find_print_string(fmt, "__get_sockaddr(", e)) {
+               return true;
+-      } else if ((r = strstr(fmt, "__get_sockaddr(")) && r < e) {
++      } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) {
+               return true;
+       }
+       return false;
diff --git a/queue-6.1/tracing-add-s-check-in-test_event_printk.patch b/queue-6.1/tracing-add-s-check-in-test_event_printk.patch
new file mode 100644 (file)
index 0000000..a8e7186
--- /dev/null
@@ -0,0 +1,206 @@
+From 65a25d9f7ac02e0cf361356e834d1c71d36acca9 Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Mon, 16 Dec 2024 21:41:21 -0500
+Subject: tracing: Add "%s" check in test_event_printk()
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit 65a25d9f7ac02e0cf361356e834d1c71d36acca9 upstream.
+
+The test_event_printk() code makes sure that when a trace event is
+registered, any dereferenced pointers in from the event's TP_printk() are
+pointing to content in the ring buffer. But currently it does not handle
+"%s", as there's cases where the string pointer saved in the ring buffer
+points to a static string in the kernel that will never be freed. As that
+is a valid case, the pointer needs to be checked at runtime.
+
+Currently the runtime check is done via trace_check_vprintf(), but to not
+have to replicate everything in vsnprintf() it does some logic with the
+va_list that may not be reliable across architectures. In order to get rid
+of that logic, more work in the test_event_printk() needs to be done. Some
+of the strings can be validated at this time when it is obvious the string
+is valid because the string will be saved in the ring buffer content.
+
+Do all the validation of strings in the ring buffer at boot in
+test_event_printk(), and make sure that the field of the strings that
+point into the kernel are accessible. This will allow adding checks at
+runtime that will validate the fields themselves and not rely on paring
+the TP_printk() format at runtime.
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/20241217024720.685917008@goodmis.org
+Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events.c |  104 +++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 89 insertions(+), 15 deletions(-)
+
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -242,19 +242,16 @@ int trace_event_get_offsets(struct trace
+       return tail->offset + tail->size;
+ }
+-/*
+- * Check if the referenced field is an array and return true,
+- * as arrays are OK to dereference.
+- */
+-static bool test_field(const char *fmt, struct trace_event_call *call)
++
++static struct trace_event_fields *find_event_field(const char *fmt,
++                                                 struct trace_event_call *call)
+ {
+       struct trace_event_fields *field = call->class->fields_array;
+-      const char *array_descriptor;
+       const char *p = fmt;
+       int len;
+       if (!(len = str_has_prefix(fmt, "REC->")))
+-              return false;
++              return NULL;
+       fmt += len;
+       for (p = fmt; *p; p++) {
+               if (!isalnum(*p) && *p != '_')
+@@ -265,11 +262,26 @@ static bool test_field(const char *fmt,
+       for (; field->type; field++) {
+               if (strncmp(field->name, fmt, len) || field->name[len])
+                       continue;
+-              array_descriptor = strchr(field->type, '[');
+-              /* This is an array and is OK to dereference. */
+-              return array_descriptor != NULL;
++
++              return field;
+       }
+-      return false;
++      return NULL;
++}
++
++/*
++ * Check if the referenced field is an array and return true,
++ * as arrays are OK to dereference.
++ */
++static bool test_field(const char *fmt, struct trace_event_call *call)
++{
++      struct trace_event_fields *field;
++
++      field = find_event_field(fmt, call);
++      if (!field)
++              return false;
++
++      /* This is an array and is OK to dereference. */
++      return strchr(field->type, '[') != NULL;
+ }
+ /* Look for a string within an argument */
+@@ -315,6 +327,53 @@ static bool process_pointer(const char *
+       return false;
+ }
++/* Return true if the string is safe */
++static bool process_string(const char *fmt, int len, struct trace_event_call *call)
++{
++      const char *r, *e, *s;
++
++      e = fmt + len;
++
++      /*
++       * There are several helper functions that return strings.
++       * If the argument contains a function, then assume its field is valid.
++       * It is considered that the argument has a function if it has:
++       *   alphanumeric or '_' before a parenthesis.
++       */
++      s = fmt;
++      do {
++              r = strstr(s, "(");
++              if (!r || r >= e)
++                      break;
++              for (int i = 1; r - i >= s; i++) {
++                      char ch = *(r - i);
++                      if (isspace(ch))
++                              continue;
++                      if (isalnum(ch) || ch == '_')
++                              return true;
++                      /* Anything else, this isn't a function */
++                      break;
++              }
++              /* A function could be wrapped in parethesis, try the next one */
++              s = r + 1;
++      } while (s < e);
++
++      /*
++       * If there's any strings in the argument consider this arg OK as it
++       * could be: REC->field ? "foo" : "bar" and we don't want to get into
++       * verifying that logic here.
++       */
++      if (find_print_string(fmt, "\"", e))
++              return true;
++
++      /* Dereferenced strings are also valid like any other pointer */
++      if (process_pointer(fmt, len, call))
++              return true;
++
++      /* Make sure the field is found, and consider it OK for now if it is */
++      return find_event_field(fmt, call) != NULL;
++}
++
+ /*
+  * Examine the print fmt of the event looking for unsafe dereference
+  * pointers using %p* that could be recorded in the trace event and
+@@ -324,6 +383,7 @@ static bool process_pointer(const char *
+ static void test_event_printk(struct trace_event_call *call)
+ {
+       u64 dereference_flags = 0;
++      u64 string_flags = 0;
+       bool first = true;
+       const char *fmt;
+       int parens = 0;
+@@ -414,8 +474,16 @@ static void test_event_printk(struct tra
+                                               star = true;
+                                               continue;
+                                       }
+-                                      if ((fmt[i + j] == 's') && star)
+-                                              arg++;
++                                      if ((fmt[i + j] == 's')) {
++                                              if (star)
++                                                      arg++;
++                                              if (WARN_ONCE(arg == 63,
++                                                            "Too many args for event: %s",
++                                                            trace_event_name(call)))
++                                                      return;
++                                              dereference_flags |= 1ULL << arg;
++                                              string_flags |= 1ULL << arg;
++                                      }
+                                       break;
+                               }
+                               break;
+@@ -462,7 +530,10 @@ static void test_event_printk(struct tra
+                       }
+                       if (dereference_flags & (1ULL << arg)) {
+-                              if (process_pointer(fmt + start_arg, e - start_arg, call))
++                              if (string_flags & (1ULL << arg)) {
++                                      if (process_string(fmt + start_arg, e - start_arg, call))
++                                              dereference_flags &= ~(1ULL << arg);
++                              } else if (process_pointer(fmt + start_arg, e - start_arg, call))
+                                       dereference_flags &= ~(1ULL << arg);
+                       }
+@@ -474,7 +545,10 @@ static void test_event_printk(struct tra
+       }
+       if (dereference_flags & (1ULL << arg)) {
+-              if (process_pointer(fmt + start_arg, i - start_arg, call))
++              if (string_flags & (1ULL << arg)) {
++                      if (process_string(fmt + start_arg, i - start_arg, call))
++                              dereference_flags &= ~(1ULL << arg);
++              } else if (process_pointer(fmt + start_arg, i - start_arg, call))
+                       dereference_flags &= ~(1ULL << arg);
+       }
diff --git a/queue-6.1/tracing-fix-test_event_printk-to-process-entire-print-argument.patch b/queue-6.1/tracing-fix-test_event_printk-to-process-entire-print-argument.patch
new file mode 100644 (file)
index 0000000..def6642
--- /dev/null
@@ -0,0 +1,184 @@
+From a6629626c584200daf495cc9a740048b455addcd Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Mon, 16 Dec 2024 21:41:19 -0500
+Subject: tracing: Fix test_event_printk() to process entire print argument
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit a6629626c584200daf495cc9a740048b455addcd upstream.
+
+The test_event_printk() analyzes print formats of trace events looking for
+cases where it may dereference a pointer that is not in the ring buffer
+which can possibly be a bug when the trace event is read from the ring
+buffer and the content of that pointer no longer exists.
+
+The function needs to accurately go from one print format argument to the
+next. It handles quotes and parenthesis that may be included in an
+argument. When it finds the start of the next argument, it uses a simple
+"c = strstr(fmt + i, ',')" to find the end of that argument!
+
+In order to include "%s" dereferencing, it needs to process the entire
+content of the print format argument and not just the content of the first
+',' it finds. As there may be content like:
+
+ ({ const char *saved_ptr = trace_seq_buffer_ptr(p); static const char
+   *access_str[] = { "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"
+   }; union kvm_mmu_page_role role; role.word = REC->role;
+   trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" " %snxe
+   %sad root %u %s%c", REC->mmu_valid_gen, REC->gfn, role.level,
+   role.has_4_byte_gpte ? 4 : 8, role.quadrant, role.direct ? " direct" : "",
+   access_str[role.access], role.invalid ? " invalid" : "", role.efer_nx ? ""
+   : "!", role.ad_disabled ? "!" : "", REC->root_count, REC->unsync ?
+   "unsync" : "sync", 0); saved_ptr; })
+
+Which is an example of a full argument of an existing event. As the code
+already handles finding the next print format argument, process the
+argument at the end of it and not the start of it. This way it has both
+the start of the argument as well as the end of it.
+
+Add a helper function "process_pointer()" that will do the processing during
+the loop as well as at the end. It also makes the code cleaner and easier
+to read.
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/20241217024720.362271189@goodmis.org
+Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events.c |   82 ++++++++++++++++++++++++++++----------------
+ 1 file changed, 53 insertions(+), 29 deletions(-)
+
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -263,8 +263,7 @@ static bool test_field(const char *fmt,
+       len = p - fmt;
+       for (; field->type; field++) {
+-              if (strncmp(field->name, fmt, len) ||
+-                  field->name[len])
++              if (strncmp(field->name, fmt, len) || field->name[len])
+                       continue;
+               array_descriptor = strchr(field->type, '[');
+               /* This is an array and is OK to dereference. */
+@@ -273,6 +272,32 @@ static bool test_field(const char *fmt,
+       return false;
+ }
++/* Return true if the argument pointer is safe */
++static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
++{
++      const char *r, *e, *a;
++
++      e = fmt + len;
++
++      /* Find the REC-> in the argument */
++      r = strstr(fmt, "REC->");
++      if (r && r < e) {
++              /*
++               * Addresses of events on the buffer, or an array on the buffer is
++               * OK to dereference. There's ways to fool this, but
++               * this is to catch common mistakes, not malicious code.
++               */
++              a = strchr(fmt, '&');
++              if ((a && (a < r)) || test_field(r, call))
++                      return true;
++      } else if ((r = strstr(fmt, "__get_dynamic_array(")) && r < e) {
++              return true;
++      } else if ((r = strstr(fmt, "__get_sockaddr(")) && r < e) {
++              return true;
++      }
++      return false;
++}
++
+ /*
+  * Examine the print fmt of the event looking for unsafe dereference
+  * pointers using %p* that could be recorded in the trace event and
+@@ -283,12 +308,12 @@ static void test_event_printk(struct tra
+ {
+       u64 dereference_flags = 0;
+       bool first = true;
+-      const char *fmt, *c, *r, *a;
++      const char *fmt;
+       int parens = 0;
+       char in_quote = 0;
+       int start_arg = 0;
+       int arg = 0;
+-      int i;
++      int i, e;
+       fmt = call->print_fmt;
+@@ -401,42 +426,41 @@ static void test_event_printk(struct tra
+               case ',':
+                       if (in_quote || parens)
+                               continue;
++                      e = i;
+                       i++;
+                       while (isspace(fmt[i]))
+                               i++;
+-                      start_arg = i;
+-                      if (!(dereference_flags & (1ULL << arg)))
+-                              goto next_arg;
+-                      /* Find the REC-> in the argument */
+-                      c = strchr(fmt + i, ',');
+-                      r = strstr(fmt + i, "REC->");
+-                      if (r && (!c || r < c)) {
+-                              /*
+-                               * Addresses of events on the buffer,
+-                               * or an array on the buffer is
+-                               * OK to dereference.
+-                               * There's ways to fool this, but
+-                               * this is to catch common mistakes,
+-                               * not malicious code.
+-                               */
+-                              a = strchr(fmt + i, '&');
+-                              if ((a && (a < r)) || test_field(r, call))
++                      /*
++                       * If start_arg is zero, then this is the start of the
++                       * first argument. The processing of the argument happens
++                       * when the end of the argument is found, as it needs to
++                       * handle paranthesis and such.
++                       */
++                      if (!start_arg) {
++                              start_arg = i;
++                              /* Balance out the i++ in the for loop */
++                              i--;
++                              continue;
++                      }
++
++                      if (dereference_flags & (1ULL << arg)) {
++                              if (process_pointer(fmt + start_arg, e - start_arg, call))
+                                       dereference_flags &= ~(1ULL << arg);
+-                      } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) &&
+-                                 (!c || r < c)) {
+-                              dereference_flags &= ~(1ULL << arg);
+-                      } else if ((r = strstr(fmt + i, "__get_sockaddr(")) &&
+-                                 (!c || r < c)) {
+-                              dereference_flags &= ~(1ULL << arg);
+                       }
+-              next_arg:
+-                      i--;
++                      start_arg = i;
+                       arg++;
++                      /* Balance out the i++ in the for loop */
++                      i--;
+               }
+       }
++      if (dereference_flags & (1ULL << arg)) {
++              if (process_pointer(fmt + start_arg, i - start_arg, call))
++                      dereference_flags &= ~(1ULL << arg);
++      }
++
+       /*
+        * If you triggered the below warning, the trace event reported
+        * uses an unsafe dereference pointer %p*. As the data stored