]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 18 Oct 2024 10:40:09 +0000 (12:40 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 18 Oct 2024 10:40:09 +0000 (12:40 +0200)
added patches:
io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch
io_uring-sqpoll-do-not-put-cpumask-on-stack.patch
io_uring-sqpoll-retain-test-for-whether-the-cpu-is-valid.patch
kvm-fix-a-data-race-on-last_boosted_vcpu-in-kvm_vcpu_on_spin.patch

queue-5.10/io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch [new file with mode: 0644]
queue-5.10/io_uring-sqpoll-do-not-put-cpumask-on-stack.patch [new file with mode: 0644]
queue-5.10/io_uring-sqpoll-retain-test-for-whether-the-cpu-is-valid.patch [new file with mode: 0644]
queue-5.10/kvm-fix-a-data-race-on-last_boosted_vcpu-in-kvm_vcpu_on_spin.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch b/queue-5.10/io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch
new file mode 100644 (file)
index 0000000..7dae578
--- /dev/null
@@ -0,0 +1,59 @@
+From fm-1321639-2024101711510000f421e7dbc03e184b-hkQGWx@rts-flowmailer.siemens.com Thu Oct 17 13:51:02 2024
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+Date: Thu, 17 Oct 2024 13:50:27 +0200
+Subject: io_uring/sqpoll: do not allow pinning outside of cpuset
+To: stable@vger.kernel.org
+Cc: io-uring@vger.kernel.org, axboe@kernel.dk, gregkh@linuxfoundation.org, Felix Moessbauer <felix.moessbauer@siemens.com>
+Message-ID: <20241017115029.178246-1-felix.moessbauer@siemens.com>
+
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+
+commit f011c9cf04c06f16b24f583d313d3c012e589e50 upstream.
+
+The submit queue polling threads are userland threads that just never
+exit to the userland. When creating the thread with IORING_SETUP_SQ_AFF,
+the affinity of the poller thread is set to the cpu specified in
+sq_thread_cpu. However, this CPU can be outside of the cpuset defined
+by the cgroup cpuset controller. This violates the rules defined by the
+cpuset controller and is a potential issue for realtime applications.
+
+In b7ed6d8ffd6 we fixed the default affinity of the poller thread, in
+case no explicit pinning is required by inheriting the one of the
+creating task. In case of explicit pinning, the check is more
+complicated, as also a cpu outside of the parent cpumask is allowed.
+We implemented this by using cpuset_cpus_allowed (that has support for
+cgroup cpusets) and testing if the requested cpu is in the set.
+
+Fixes: 37d1e2e3642e ("io_uring: move SQPOLL thread io-wq forked worker")
+Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
+Link: https://lore.kernel.org/r/20240909150036.55921-1-felix.moessbauer@siemens.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -56,6 +56,7 @@
+ #include <linux/mm.h>
+ #include <linux/mman.h>
+ #include <linux/percpu.h>
++#include <linux/cpuset.h>
+ #include <linux/slab.h>
+ #include <linux/blkdev.h>
+ #include <linux/bvec.h>
+@@ -8571,10 +8572,12 @@ static int io_sq_offload_create(struct i
+                       return 0;
+               if (p->flags & IORING_SETUP_SQ_AFF) {
++                      struct cpumask allowed_mask;
+                       int cpu = p->sq_thread_cpu;
+                       ret = -EINVAL;
+-                      if (cpu >= nr_cpu_ids || !cpu_online(cpu))
++                      cpuset_cpus_allowed(current, &allowed_mask);
++                      if (!cpumask_test_cpu(cpu, &allowed_mask))
+                               goto err_sqpoll;
+                       sqd->sq_cpu = cpu;
+               } else {
diff --git a/queue-5.10/io_uring-sqpoll-do-not-put-cpumask-on-stack.patch b/queue-5.10/io_uring-sqpoll-do-not-put-cpumask-on-stack.patch
new file mode 100644 (file)
index 0000000..ac32860
--- /dev/null
@@ -0,0 +1,53 @@
+From fm-1321639-20241017115101ba16fac9d1bf4e632e-TAlFlY@rts-flowmailer.siemens.com Thu Oct 17 13:51:03 2024
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+Date: Thu, 17 Oct 2024 13:50:29 +0200
+Subject: io_uring/sqpoll: do not put cpumask on stack
+To: stable@vger.kernel.org
+Cc: io-uring@vger.kernel.org, axboe@kernel.dk, gregkh@linuxfoundation.org, Felix Moessbauer <felix.moessbauer@siemens.com>
+Message-ID: <20241017115029.178246-3-felix.moessbauer@siemens.com>
+
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+
+commit 7f44beadcc11adb98220556d2ddbe9c97aa6d42d upstream.
+
+Putting the cpumask on the stack is deprecated for a long time (since
+2d3854a37e8), as these can be big. Given that, change the on-stack
+allocation of allowed_mask to be dynamically allocated.
+
+Fixes: f011c9cf04c0 ("io_uring/sqpoll: do not allow pinning outside of cpuset")
+Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
+Link: https://lore.kernel.org/r/20240916111150.1266191-1-felix.moessbauer@siemens.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -8572,15 +8572,22 @@ static int io_sq_offload_create(struct i
+                       return 0;
+               if (p->flags & IORING_SETUP_SQ_AFF) {
+-                      struct cpumask allowed_mask;
++                      cpumask_var_t allowed_mask;
+                       int cpu = p->sq_thread_cpu;
+                       ret = -EINVAL;
+                       if (cpu >= nr_cpu_ids || !cpu_online(cpu))
+                               goto err_sqpoll;
+-                      cpuset_cpus_allowed(current, &allowed_mask);
+-                      if (!cpumask_test_cpu(cpu, &allowed_mask))
++                      ret = -ENOMEM;
++                      if (!alloc_cpumask_var(&allowed_mask, GFP_KERNEL))
+                               goto err_sqpoll;
++                      ret = -EINVAL;
++                      cpuset_cpus_allowed(current, allowed_mask);
++                      if (!cpumask_test_cpu(cpu, allowed_mask)) {
++                              free_cpumask_var(allowed_mask);
++                              goto err_sqpoll;
++                      }
++                      free_cpumask_var(allowed_mask);
+                       sqd->sq_cpu = cpu;
+               } else {
+                       sqd->sq_cpu = -1;
diff --git a/queue-5.10/io_uring-sqpoll-retain-test-for-whether-the-cpu-is-valid.patch b/queue-5.10/io_uring-sqpoll-retain-test-for-whether-the-cpu-is-valid.patch
new file mode 100644 (file)
index 0000000..8cb3d53
--- /dev/null
@@ -0,0 +1,89 @@
+From fm-1321639-20241017115100e621ba6a24048a45b8-Een3Mc@rts-flowmailer.siemens.com Thu Oct 17 13:51:02 2024
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+Date: Thu, 17 Oct 2024 13:50:28 +0200
+Subject: io_uring/sqpoll: retain test for whether the CPU is valid
+To: stable@vger.kernel.org
+Cc: io-uring@vger.kernel.org, axboe@kernel.dk, gregkh@linuxfoundation.org, kernel test robot <oliver.sang@intel.com>, Felix Moessbauer <felix.moessbauer@siemens.com>
+Message-ID: <20241017115029.178246-2-felix.moessbauer@siemens.com>
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit a09c17240bdf2e9fa6d0591afa9448b59785f7d4 upstream.
+
+A recent commit ensured that SQPOLL cannot be setup with a CPU that
+isn't in the current tasks cpuset, but it also dropped testing whether
+the CPU is valid in the first place. Without that, if a task passes in
+a CPU value that is too high, the following KASAN splat can get
+triggered:
+
+BUG: KASAN: stack-out-of-bounds in io_sq_offload_create+0x858/0xaa4
+Read of size 8 at addr ffff800089bc7b90 by task wq-aff.t/1391
+
+CPU: 4 UID: 1000 PID: 1391 Comm: wq-aff.t Not tainted 6.11.0-rc7-00227-g371c468f4db6 #7080
+Hardware name: linux,dummy-virt (DT)
+Call trace:
+ dump_backtrace.part.0+0xcc/0xe0
+ show_stack+0x14/0x1c
+ dump_stack_lvl+0x58/0x74
+ print_report+0x16c/0x4c8
+ kasan_report+0x9c/0xe4
+ __asan_report_load8_noabort+0x1c/0x24
+ io_sq_offload_create+0x858/0xaa4
+ io_uring_setup+0x1394/0x17c4
+ __arm64_sys_io_uring_setup+0x6c/0x180
+ invoke_syscall+0x6c/0x260
+ el0_svc_common.constprop.0+0x158/0x224
+ do_el0_svc+0x3c/0x5c
+ el0_svc+0x34/0x70
+ el0t_64_sync_handler+0x118/0x124
+ el0t_64_sync+0x168/0x16c
+
+The buggy address belongs to stack of task wq-aff.t/1391
+ and is located at offset 48 in frame:
+ io_sq_offload_create+0x0/0xaa4
+
+This frame has 1 object:
+ [32, 40) 'allowed_mask'
+
+The buggy address belongs to the virtual mapping at
+ [ffff800089bc0000, ffff800089bc9000) created by:
+ kernel_clone+0x124/0x7e0
+
+The buggy address belongs to the physical page:
+page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff0000d740af80 pfn:0x11740a
+memcg:ffff0000c2706f02
+flags: 0xbffe00000000000(node=0|zone=2|lastcpupid=0x1fff)
+raw: 0bffe00000000000 0000000000000000 dead000000000122 0000000000000000
+raw: ffff0000d740af80 0000000000000000 00000001ffffffff ffff0000c2706f02
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff800089bc7a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ffff800089bc7b00: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1
+>ffff800089bc7b80: 00 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
+                         ^
+ ffff800089bc7c00: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1
+ ffff800089bc7c80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f3
+
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Closes: https://lore.kernel.org/oe-lkp/202409161632.cbeeca0d-lkp@intel.com
+Fixes: f011c9cf04c0 ("io_uring/sqpoll: do not allow pinning outside of cpuset")
+Tested-by: Felix Moessbauer <felix.moessbauer@siemens.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -8576,6 +8576,8 @@ static int io_sq_offload_create(struct i
+                       int cpu = p->sq_thread_cpu;
+                       ret = -EINVAL;
++                      if (cpu >= nr_cpu_ids || !cpu_online(cpu))
++                              goto err_sqpoll;
+                       cpuset_cpus_allowed(current, &allowed_mask);
+                       if (!cpumask_test_cpu(cpu, &allowed_mask))
+                               goto err_sqpoll;
diff --git a/queue-5.10/kvm-fix-a-data-race-on-last_boosted_vcpu-in-kvm_vcpu_on_spin.patch b/queue-5.10/kvm-fix-a-data-race-on-last_boosted_vcpu-in-kvm_vcpu_on_spin.patch
new file mode 100644 (file)
index 0000000..6905282
--- /dev/null
@@ -0,0 +1,97 @@
+From 49f683b41f28918df3e51ddc0d928cb2e934ccdb Mon Sep 17 00:00:00 2001
+From: Breno Leitao <leitao@debian.org>
+Date: Fri, 10 May 2024 02:23:52 -0700
+Subject: KVM: Fix a data race on last_boosted_vcpu in kvm_vcpu_on_spin()
+
+From: Breno Leitao <leitao@debian.org>
+
+commit 49f683b41f28918df3e51ddc0d928cb2e934ccdb upstream.
+
+Use {READ,WRITE}_ONCE() to access kvm->last_boosted_vcpu to ensure the
+loads and stores are atomic.  In the extremely unlikely scenario the
+compiler tears the stores, it's theoretically possible for KVM to attempt
+to get a vCPU using an out-of-bounds index, e.g. if the write is split
+into multiple 8-bit stores, and is paired with a 32-bit load on a VM with
+257 vCPUs:
+
+  CPU0                              CPU1
+  last_boosted_vcpu = 0xff;
+
+                                    (last_boosted_vcpu = 0x100)
+                                    last_boosted_vcpu[15:8] = 0x01;
+  i = (last_boosted_vcpu = 0x1ff)
+                                    last_boosted_vcpu[7:0] = 0x00;
+
+  vcpu = kvm->vcpu_array[0x1ff];
+
+As detected by KCSAN:
+
+  BUG: KCSAN: data-race in kvm_vcpu_on_spin [kvm] / kvm_vcpu_on_spin [kvm]
+
+  write to 0xffffc90025a92344 of 4 bytes by task 4340 on cpu 16:
+  kvm_vcpu_on_spin (arch/x86/kvm/../../../virt/kvm/kvm_main.c:4112) kvm
+  handle_pause (arch/x86/kvm/vmx/vmx.c:5929) kvm_intel
+  vmx_handle_exit (arch/x86/kvm/vmx/vmx.c:?
+                arch/x86/kvm/vmx/vmx.c:6606) kvm_intel
+  vcpu_run (arch/x86/kvm/x86.c:11107 arch/x86/kvm/x86.c:11211) kvm
+  kvm_arch_vcpu_ioctl_run (arch/x86/kvm/x86.c:?) kvm
+  kvm_vcpu_ioctl (arch/x86/kvm/../../../virt/kvm/kvm_main.c:?) kvm
+  __se_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:904 fs/ioctl.c:890)
+  __x64_sys_ioctl (fs/ioctl.c:890)
+  x64_sys_call (arch/x86/entry/syscall_64.c:33)
+  do_syscall_64 (arch/x86/entry/common.c:?)
+  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
+
+  read to 0xffffc90025a92344 of 4 bytes by task 4342 on cpu 4:
+  kvm_vcpu_on_spin (arch/x86/kvm/../../../virt/kvm/kvm_main.c:4069) kvm
+  handle_pause (arch/x86/kvm/vmx/vmx.c:5929) kvm_intel
+  vmx_handle_exit (arch/x86/kvm/vmx/vmx.c:?
+                       arch/x86/kvm/vmx/vmx.c:6606) kvm_intel
+  vcpu_run (arch/x86/kvm/x86.c:11107 arch/x86/kvm/x86.c:11211) kvm
+  kvm_arch_vcpu_ioctl_run (arch/x86/kvm/x86.c:?) kvm
+  kvm_vcpu_ioctl (arch/x86/kvm/../../../virt/kvm/kvm_main.c:?) kvm
+  __se_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:904 fs/ioctl.c:890)
+  __x64_sys_ioctl (fs/ioctl.c:890)
+  x64_sys_call (arch/x86/entry/syscall_64.c:33)
+  do_syscall_64 (arch/x86/entry/common.c:?)
+  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
+
+  value changed: 0x00000012 -> 0x00000000
+
+Fixes: 217ece6129f2 ("KVM: use yield_to instead of sleep in kvm_vcpu_on_spin")
+Cc: stable@vger.kernel.org
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Link: https://lore.kernel.org/r/20240510092353.2261824-1-leitao@debian.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Saeed Mirzamohammadi <saeed.mirzamohammadi@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -3090,12 +3090,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *m
+ {
+       struct kvm *kvm = me->kvm;
+       struct kvm_vcpu *vcpu;
+-      int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
++      int last_boosted_vcpu;
+       int yielded = 0;
+       int try = 3;
+       int pass;
+       int i;
++      last_boosted_vcpu = READ_ONCE(kvm->last_boosted_vcpu);
+       kvm_vcpu_set_in_spin_loop(me, true);
+       /*
+        * We boost the priority of a VCPU that is runnable but not
+@@ -3126,7 +3127,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *m
+                       yielded = kvm_vcpu_yield_to(vcpu);
+                       if (yielded > 0) {
+-                              kvm->last_boosted_vcpu = i;
++                              WRITE_ONCE(kvm->last_boosted_vcpu, i);
+                               break;
+                       } else if (yielded < 0) {
+                               try--;
index 9320436755652e2ddfb826c38f8f97487599aaf6..3708ce9a07afb787437f28b71748bdee6a746a8f 100644 (file)
@@ -8,3 +8,7 @@ irqchip-gic-v3-its-fix-vsync-referencing-an-unmapped-vpe-on-gic-v4.1.patch
 fat-fix-uninitialized-variable.patch
 mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch
 wifi-mac80211-fix-potential-key-use-after-free.patch
+kvm-fix-a-data-race-on-last_boosted_vcpu-in-kvm_vcpu_on_spin.patch
+io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch
+io_uring-sqpoll-retain-test-for-whether-the-cpu-is-valid.patch
+io_uring-sqpoll-do-not-put-cpumask-on-stack.patch