From: Greg Kroah-Hartman Date: Fri, 18 Oct 2024 10:40:09 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v5.10.228~55 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d2894a00ee679227ca244c983e1faa145f65fd46;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch io_uring-sqpoll-do-not-put-cpumask-on-stack.patch io_uring-sqpoll-retain-test-for-whether-the-cpu-is-valid.patch kvm-fix-a-data-race-on-last_boosted_vcpu-in-kvm_vcpu_on_spin.patch --- diff --git a/queue-5.10/io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch b/queue-5.10/io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch new file mode 100644 index 00000000000..7dae57887c8 --- /dev/null +++ b/queue-5.10/io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch @@ -0,0 +1,59 @@ +From fm-1321639-2024101711510000f421e7dbc03e184b-hkQGWx@rts-flowmailer.siemens.com Thu Oct 17 13:51:02 2024 +From: Felix Moessbauer +Date: Thu, 17 Oct 2024 13:50:27 +0200 +Subject: io_uring/sqpoll: do not allow pinning outside of cpuset +To: stable@vger.kernel.org +Cc: io-uring@vger.kernel.org, axboe@kernel.dk, gregkh@linuxfoundation.org, Felix Moessbauer +Message-ID: <20241017115029.178246-1-felix.moessbauer@siemens.com> + +From: Felix Moessbauer + +commit f011c9cf04c06f16b24f583d313d3c012e589e50 upstream. + +The submit queue polling threads are userland threads that just never +exit to the userland. When creating the thread with IORING_SETUP_SQ_AFF, +the affinity of the poller thread is set to the cpu specified in +sq_thread_cpu. However, this CPU can be outside of the cpuset defined +by the cgroup cpuset controller. This violates the rules defined by the +cpuset controller and is a potential issue for realtime applications. + +In b7ed6d8ffd6 we fixed the default affinity of the poller thread, in +case no explicit pinning is required by inheriting the one of the +creating task. In case of explicit pinning, the check is more +complicated, as also a cpu outside of the parent cpumask is allowed. +We implemented this by using cpuset_cpus_allowed (that has support for +cgroup cpusets) and testing if the requested cpu is in the set. + +Fixes: 37d1e2e3642e ("io_uring: move SQPOLL thread io-wq forked worker") +Signed-off-by: Felix Moessbauer +Link: https://lore.kernel.org/r/20240909150036.55921-1-felix.moessbauer@siemens.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -8571,10 +8572,12 @@ static int io_sq_offload_create(struct i + return 0; + + if (p->flags & IORING_SETUP_SQ_AFF) { ++ struct cpumask allowed_mask; + int cpu = p->sq_thread_cpu; + + ret = -EINVAL; +- if (cpu >= nr_cpu_ids || !cpu_online(cpu)) ++ cpuset_cpus_allowed(current, &allowed_mask); ++ if (!cpumask_test_cpu(cpu, &allowed_mask)) + goto err_sqpoll; + sqd->sq_cpu = cpu; + } else { diff --git a/queue-5.10/io_uring-sqpoll-do-not-put-cpumask-on-stack.patch b/queue-5.10/io_uring-sqpoll-do-not-put-cpumask-on-stack.patch new file mode 100644 index 00000000000..ac328608256 --- /dev/null +++ b/queue-5.10/io_uring-sqpoll-do-not-put-cpumask-on-stack.patch @@ -0,0 +1,53 @@ +From fm-1321639-20241017115101ba16fac9d1bf4e632e-TAlFlY@rts-flowmailer.siemens.com Thu Oct 17 13:51:03 2024 +From: Felix Moessbauer +Date: Thu, 17 Oct 2024 13:50:29 +0200 +Subject: io_uring/sqpoll: do not put cpumask on stack +To: stable@vger.kernel.org +Cc: io-uring@vger.kernel.org, axboe@kernel.dk, gregkh@linuxfoundation.org, Felix Moessbauer +Message-ID: <20241017115029.178246-3-felix.moessbauer@siemens.com> + +From: Felix Moessbauer + +commit 7f44beadcc11adb98220556d2ddbe9c97aa6d42d upstream. + +Putting the cpumask on the stack is deprecated for a long time (since +2d3854a37e8), as these can be big. Given that, change the on-stack +allocation of allowed_mask to be dynamically allocated. + +Fixes: f011c9cf04c0 ("io_uring/sqpoll: do not allow pinning outside of cpuset") +Signed-off-by: Felix Moessbauer +Link: https://lore.kernel.org/r/20240916111150.1266191-1-felix.moessbauer@siemens.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -8572,15 +8572,22 @@ static int io_sq_offload_create(struct i + return 0; + + if (p->flags & IORING_SETUP_SQ_AFF) { +- struct cpumask allowed_mask; ++ cpumask_var_t allowed_mask; + int cpu = p->sq_thread_cpu; + + ret = -EINVAL; + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + goto err_sqpoll; +- cpuset_cpus_allowed(current, &allowed_mask); +- if (!cpumask_test_cpu(cpu, &allowed_mask)) ++ ret = -ENOMEM; ++ if (!alloc_cpumask_var(&allowed_mask, GFP_KERNEL)) + goto err_sqpoll; ++ ret = -EINVAL; ++ cpuset_cpus_allowed(current, allowed_mask); ++ if (!cpumask_test_cpu(cpu, allowed_mask)) { ++ free_cpumask_var(allowed_mask); ++ goto err_sqpoll; ++ } ++ free_cpumask_var(allowed_mask); + sqd->sq_cpu = cpu; + } else { + sqd->sq_cpu = -1; diff --git a/queue-5.10/io_uring-sqpoll-retain-test-for-whether-the-cpu-is-valid.patch b/queue-5.10/io_uring-sqpoll-retain-test-for-whether-the-cpu-is-valid.patch new file mode 100644 index 00000000000..8cb3d534315 --- /dev/null +++ b/queue-5.10/io_uring-sqpoll-retain-test-for-whether-the-cpu-is-valid.patch @@ -0,0 +1,89 @@ +From fm-1321639-20241017115100e621ba6a24048a45b8-Een3Mc@rts-flowmailer.siemens.com Thu Oct 17 13:51:02 2024 +From: Felix Moessbauer +Date: Thu, 17 Oct 2024 13:50:28 +0200 +Subject: io_uring/sqpoll: retain test for whether the CPU is valid +To: stable@vger.kernel.org +Cc: io-uring@vger.kernel.org, axboe@kernel.dk, gregkh@linuxfoundation.org, kernel test robot , Felix Moessbauer +Message-ID: <20241017115029.178246-2-felix.moessbauer@siemens.com> + +From: Jens Axboe + +commit a09c17240bdf2e9fa6d0591afa9448b59785f7d4 upstream. + +A recent commit ensured that SQPOLL cannot be setup with a CPU that +isn't in the current tasks cpuset, but it also dropped testing whether +the CPU is valid in the first place. Without that, if a task passes in +a CPU value that is too high, the following KASAN splat can get +triggered: + +BUG: KASAN: stack-out-of-bounds in io_sq_offload_create+0x858/0xaa4 +Read of size 8 at addr ffff800089bc7b90 by task wq-aff.t/1391 + +CPU: 4 UID: 1000 PID: 1391 Comm: wq-aff.t Not tainted 6.11.0-rc7-00227-g371c468f4db6 #7080 +Hardware name: linux,dummy-virt (DT) +Call trace: + dump_backtrace.part.0+0xcc/0xe0 + show_stack+0x14/0x1c + dump_stack_lvl+0x58/0x74 + print_report+0x16c/0x4c8 + kasan_report+0x9c/0xe4 + __asan_report_load8_noabort+0x1c/0x24 + io_sq_offload_create+0x858/0xaa4 + io_uring_setup+0x1394/0x17c4 + __arm64_sys_io_uring_setup+0x6c/0x180 + invoke_syscall+0x6c/0x260 + el0_svc_common.constprop.0+0x158/0x224 + do_el0_svc+0x3c/0x5c + el0_svc+0x34/0x70 + el0t_64_sync_handler+0x118/0x124 + el0t_64_sync+0x168/0x16c + +The buggy address belongs to stack of task wq-aff.t/1391 + and is located at offset 48 in frame: + io_sq_offload_create+0x0/0xaa4 + +This frame has 1 object: + [32, 40) 'allowed_mask' + +The buggy address belongs to the virtual mapping at + [ffff800089bc0000, ffff800089bc9000) created by: + kernel_clone+0x124/0x7e0 + +The buggy address belongs to the physical page: +page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff0000d740af80 pfn:0x11740a +memcg:ffff0000c2706f02 +flags: 0xbffe00000000000(node=0|zone=2|lastcpupid=0x1fff) +raw: 0bffe00000000000 0000000000000000 dead000000000122 0000000000000000 +raw: ffff0000d740af80 0000000000000000 00000001ffffffff ffff0000c2706f02 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff800089bc7a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ffff800089bc7b00: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 +>ffff800089bc7b80: 00 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 + ^ + ffff800089bc7c00: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 + ffff800089bc7c80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f3 + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-lkp/202409161632.cbeeca0d-lkp@intel.com +Fixes: f011c9cf04c0 ("io_uring/sqpoll: do not allow pinning outside of cpuset") +Tested-by: Felix Moessbauer +Signed-off-by: Jens Axboe +Signed-off-by: Felix Moessbauer +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -8576,6 +8576,8 @@ static int io_sq_offload_create(struct i + int cpu = p->sq_thread_cpu; + + ret = -EINVAL; ++ if (cpu >= nr_cpu_ids || !cpu_online(cpu)) ++ goto err_sqpoll; + cpuset_cpus_allowed(current, &allowed_mask); + if (!cpumask_test_cpu(cpu, &allowed_mask)) + goto err_sqpoll; diff --git a/queue-5.10/kvm-fix-a-data-race-on-last_boosted_vcpu-in-kvm_vcpu_on_spin.patch b/queue-5.10/kvm-fix-a-data-race-on-last_boosted_vcpu-in-kvm_vcpu_on_spin.patch new file mode 100644 index 00000000000..6905282681b --- /dev/null +++ b/queue-5.10/kvm-fix-a-data-race-on-last_boosted_vcpu-in-kvm_vcpu_on_spin.patch @@ -0,0 +1,97 @@ +From 49f683b41f28918df3e51ddc0d928cb2e934ccdb Mon Sep 17 00:00:00 2001 +From: Breno Leitao +Date: Fri, 10 May 2024 02:23:52 -0700 +Subject: KVM: Fix a data race on last_boosted_vcpu in kvm_vcpu_on_spin() + +From: Breno Leitao + +commit 49f683b41f28918df3e51ddc0d928cb2e934ccdb upstream. + +Use {READ,WRITE}_ONCE() to access kvm->last_boosted_vcpu to ensure the +loads and stores are atomic. In the extremely unlikely scenario the +compiler tears the stores, it's theoretically possible for KVM to attempt +to get a vCPU using an out-of-bounds index, e.g. if the write is split +into multiple 8-bit stores, and is paired with a 32-bit load on a VM with +257 vCPUs: + + CPU0 CPU1 + last_boosted_vcpu = 0xff; + + (last_boosted_vcpu = 0x100) + last_boosted_vcpu[15:8] = 0x01; + i = (last_boosted_vcpu = 0x1ff) + last_boosted_vcpu[7:0] = 0x00; + + vcpu = kvm->vcpu_array[0x1ff]; + +As detected by KCSAN: + + BUG: KCSAN: data-race in kvm_vcpu_on_spin [kvm] / kvm_vcpu_on_spin [kvm] + + write to 0xffffc90025a92344 of 4 bytes by task 4340 on cpu 16: + kvm_vcpu_on_spin (arch/x86/kvm/../../../virt/kvm/kvm_main.c:4112) kvm + handle_pause (arch/x86/kvm/vmx/vmx.c:5929) kvm_intel + vmx_handle_exit (arch/x86/kvm/vmx/vmx.c:? + arch/x86/kvm/vmx/vmx.c:6606) kvm_intel + vcpu_run (arch/x86/kvm/x86.c:11107 arch/x86/kvm/x86.c:11211) kvm + kvm_arch_vcpu_ioctl_run (arch/x86/kvm/x86.c:?) kvm + kvm_vcpu_ioctl (arch/x86/kvm/../../../virt/kvm/kvm_main.c:?) kvm + __se_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:904 fs/ioctl.c:890) + __x64_sys_ioctl (fs/ioctl.c:890) + x64_sys_call (arch/x86/entry/syscall_64.c:33) + do_syscall_64 (arch/x86/entry/common.c:?) + entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) + + read to 0xffffc90025a92344 of 4 bytes by task 4342 on cpu 4: + kvm_vcpu_on_spin (arch/x86/kvm/../../../virt/kvm/kvm_main.c:4069) kvm + handle_pause (arch/x86/kvm/vmx/vmx.c:5929) kvm_intel + vmx_handle_exit (arch/x86/kvm/vmx/vmx.c:? + arch/x86/kvm/vmx/vmx.c:6606) kvm_intel + vcpu_run (arch/x86/kvm/x86.c:11107 arch/x86/kvm/x86.c:11211) kvm + kvm_arch_vcpu_ioctl_run (arch/x86/kvm/x86.c:?) kvm + kvm_vcpu_ioctl (arch/x86/kvm/../../../virt/kvm/kvm_main.c:?) kvm + __se_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:904 fs/ioctl.c:890) + __x64_sys_ioctl (fs/ioctl.c:890) + x64_sys_call (arch/x86/entry/syscall_64.c:33) + do_syscall_64 (arch/x86/entry/common.c:?) + entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) + + value changed: 0x00000012 -> 0x00000000 + +Fixes: 217ece6129f2 ("KVM: use yield_to instead of sleep in kvm_vcpu_on_spin") +Cc: stable@vger.kernel.org +Signed-off-by: Breno Leitao +Link: https://lore.kernel.org/r/20240510092353.2261824-1-leitao@debian.org +Signed-off-by: Sean Christopherson +Signed-off-by: Saeed Mirzamohammadi +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_main.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -3090,12 +3090,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *m + { + struct kvm *kvm = me->kvm; + struct kvm_vcpu *vcpu; +- int last_boosted_vcpu = me->kvm->last_boosted_vcpu; ++ int last_boosted_vcpu; + int yielded = 0; + int try = 3; + int pass; + int i; + ++ last_boosted_vcpu = READ_ONCE(kvm->last_boosted_vcpu); + kvm_vcpu_set_in_spin_loop(me, true); + /* + * We boost the priority of a VCPU that is runnable but not +@@ -3126,7 +3127,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *m + + yielded = kvm_vcpu_yield_to(vcpu); + if (yielded > 0) { +- kvm->last_boosted_vcpu = i; ++ WRITE_ONCE(kvm->last_boosted_vcpu, i); + break; + } else if (yielded < 0) { + try--; diff --git a/queue-5.10/series b/queue-5.10/series index 93204367556..3708ce9a07a 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -8,3 +8,7 @@ irqchip-gic-v3-its-fix-vsync-referencing-an-unmapped-vpe-on-gic-v4.1.patch fat-fix-uninitialized-variable.patch mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch wifi-mac80211-fix-potential-key-use-after-free.patch +kvm-fix-a-data-race-on-last_boosted_vcpu-in-kvm_vcpu_on_spin.patch +io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch +io_uring-sqpoll-retain-test-for-whether-the-cpu-is-valid.patch +io_uring-sqpoll-do-not-put-cpumask-on-stack.patch