]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Jul 2025 08:55:36 +0000 (10:55 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Jul 2025 08:55:36 +0000 (10:55 +0200)
added patches:
kvm-x86-free-vcpus-before-freeing-vm-state.patch
mm-khugepaged-fix-call-hpage_collapse_scan_file-for-anonymous-vma.patch

queue-6.12/kvm-x86-free-vcpus-before-freeing-vm-state.patch [new file with mode: 0644]
queue-6.12/mm-khugepaged-fix-call-hpage_collapse_scan_file-for-anonymous-vma.patch [new file with mode: 0644]
queue-6.12/series

diff --git a/queue-6.12/kvm-x86-free-vcpus-before-freeing-vm-state.patch b/queue-6.12/kvm-x86-free-vcpus-before-freeing-vm-state.patch
new file mode 100644 (file)
index 0000000..d22560e
--- /dev/null
@@ -0,0 +1,91 @@
+From 17bcd714426386fda741a4bccd96a2870179344b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 24 Feb 2025 15:55:36 -0800
+Subject: KVM: x86: Free vCPUs before freeing VM state
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 17bcd714426386fda741a4bccd96a2870179344b upstream.
+
+Free vCPUs before freeing any VM state, as both SVM and VMX may access
+VM state when "freeing" a vCPU that is currently "in" L2, i.e. that needs
+to be kicked out of nested guest mode.
+
+Commit 6fcee03df6a1 ("KVM: x86: avoid loading a vCPU after .vm_destroy was
+called") partially fixed the issue, but for unknown reasons only moved the
+MMU unloading before VM destruction.  Complete the change, and free all
+vCPU state prior to destroying VM state, as nVMX accesses even more state
+than nSVM.
+
+In addition to the AVIC, KVM can hit a use-after-free on MSR filters:
+
+  kvm_msr_allowed+0x4c/0xd0
+  __kvm_set_msr+0x12d/0x1e0
+  kvm_set_msr+0x19/0x40
+  load_vmcs12_host_state+0x2d8/0x6e0 [kvm_intel]
+  nested_vmx_vmexit+0x715/0xbd0 [kvm_intel]
+  nested_vmx_free_vcpu+0x33/0x50 [kvm_intel]
+  vmx_free_vcpu+0x54/0xc0 [kvm_intel]
+  kvm_arch_vcpu_destroy+0x28/0xf0
+  kvm_vcpu_destroy+0x12/0x50
+  kvm_arch_destroy_vm+0x12c/0x1c0
+  kvm_put_kvm+0x263/0x3c0
+  kvm_vm_release+0x21/0x30
+
+and an upcoming fix to process injectable interrupts on nested VM-Exit
+will access the PIC:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000090
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  CPU: 23 UID: 1000 PID: 2658 Comm: kvm-nx-lpage-re
+  RIP: 0010:kvm_cpu_has_extint+0x2f/0x60 [kvm]
+  Call Trace:
+   <TASK>
+   kvm_cpu_has_injectable_intr+0xe/0x60 [kvm]
+   nested_vmx_vmexit+0x2d7/0xdf0 [kvm_intel]
+   nested_vmx_free_vcpu+0x40/0x50 [kvm_intel]
+   vmx_vcpu_free+0x2d/0x80 [kvm_intel]
+   kvm_arch_vcpu_destroy+0x2d/0x130 [kvm]
+   kvm_destroy_vcpus+0x8a/0x100 [kvm]
+   kvm_arch_destroy_vm+0xa7/0x1d0 [kvm]
+   kvm_destroy_vm+0x172/0x300 [kvm]
+   kvm_vcpu_release+0x31/0x50 [kvm]
+
+Inarguably, both nSVM and nVMX need to be fixed, but punt on those
+cleanups for the moment.  Conceptually, vCPUs should be freed before VM
+state.  Assets like the I/O APIC and PIC _must_ be allocated before vCPUs
+are created, so it stands to reason that they must be freed _after_ vCPUs
+are destroyed.
+
+Reported-by: Aaron Lewis <aaronlewis@google.com>
+Closes: https://lore.kernel.org/all/20240703175618.2304869-2-aaronlewis@google.com
+Cc: Jim Mattson <jmattson@google.com>
+Cc: Yan Zhao <yan.y.zhao@intel.com>
+Cc: Rick P Edgecombe <rick.p.edgecombe@intel.com>
+Cc: Kai Huang <kai.huang@intel.com>
+Cc: Isaku Yamahata <isaku.yamahata@intel.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-ID: <20250224235542.2562848-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Kevin Cheng <chengkev@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -12895,11 +12895,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm
+               mutex_unlock(&kvm->slots_lock);
+       }
+       kvm_unload_vcpu_mmus(kvm);
++      kvm_destroy_vcpus(kvm);
+       kvm_x86_call(vm_destroy)(kvm);
+       kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
+       kvm_pic_destroy(kvm);
+       kvm_ioapic_destroy(kvm);
+-      kvm_destroy_vcpus(kvm);
+       kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
+       kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
+       kvm_mmu_uninit_vm(kvm);
diff --git a/queue-6.12/mm-khugepaged-fix-call-hpage_collapse_scan_file-for-anonymous-vma.patch b/queue-6.12/mm-khugepaged-fix-call-hpage_collapse_scan_file-for-anonymous-vma.patch
new file mode 100644 (file)
index 0000000..d3af1c5
--- /dev/null
@@ -0,0 +1,100 @@
+From f1897f2f08b28ae59476d8b73374b08f856973af Mon Sep 17 00:00:00 2001
+From: Liu Shixin <liushixin2@huawei.com>
+Date: Sat, 11 Jan 2025 11:45:11 +0800
+Subject: mm: khugepaged: fix call hpage_collapse_scan_file() for anonymous vma
+
+From: Liu Shixin <liushixin2@huawei.com>
+
+commit f1897f2f08b28ae59476d8b73374b08f856973af upstream.
+
+syzkaller reported such a BUG_ON():
+
+ ------------[ cut here ]------------
+ kernel BUG at mm/khugepaged.c:1835!
+ Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
+ ...
+ CPU: 6 UID: 0 PID: 8009 Comm: syz.15.106 Kdump: loaded Tainted: G        W          6.13.0-rc6 #22
+ Tainted: [W]=WARN
+ Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
+ pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+ pc : collapse_file+0xa44/0x1400
+ lr : collapse_file+0x88/0x1400
+ sp : ffff80008afe3a60
+ ...
+ Call trace:
+  collapse_file+0xa44/0x1400 (P)
+  hpage_collapse_scan_file+0x278/0x400
+  madvise_collapse+0x1bc/0x678
+  madvise_vma_behavior+0x32c/0x448
+  madvise_walk_vmas.constprop.0+0xbc/0x140
+  do_madvise.part.0+0xdc/0x2c8
+  __arm64_sys_madvise+0x68/0x88
+  invoke_syscall+0x50/0x120
+  el0_svc_common.constprop.0+0xc8/0xf0
+  do_el0_svc+0x24/0x38
+  el0_svc+0x34/0x128
+  el0t_64_sync_handler+0xc8/0xd0
+  el0t_64_sync+0x190/0x198
+
+This indicates that the pgoff is unaligned.  After analysis, I confirm the
+vma is mapped to /dev/zero.  Such a vma certainly has vm_file, but it is
+set to anonymous by mmap_zero().  So even if it's mmapped by 2m-unaligned,
+it can pass the check in thp_vma_allowable_order() as it is an
+anonymous-mmap, but then be collapsed as a file-mmap.
+
+It seems the problem has existed for a long time, but actually, since we
+have khugepaged_max_ptes_none check before, we will skip collapse it as it
+is /dev/zero and so has no present page.  But commit d8ea7cc8547c limit
+the check for only khugepaged, so the BUG_ON() can be triggered by
+madvise_collapse().
+
+Add vma_is_anonymous() check to make such vma be processed by
+hpage_collapse_scan_pmd().
+
+Link: https://lkml.kernel.org/r/20250111034511.2223353-1-liushixin2@huawei.com
+Fixes: d8ea7cc8547c ("mm/khugepaged: add flag to predicate khugepaged-only behavior")
+Signed-off-by: Liu Shixin <liushixin2@huawei.com>
+Reviewed-by: Yang Shi <yang@os.amperecomputing.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Mattew Wilcox <willy@infradead.org>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: Nanyong Sun <sunnanyong@huawei.com>
+Cc: Qi Zheng <zhengqi.arch@bytedance.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[acsjakub: backport, clean apply]
+Signed-off-by: Jakub Acs <acsjakub@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+v1 -> v2: fix missing sign-off
+
+Ran into the crash with syzkaller, backporting this patch works - the
+reproducer no longer crashes.
+
+Please let me know if there was a reason not to backport.
+
+ mm/khugepaged.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -2404,7 +2404,7 @@ skip:
+                       VM_BUG_ON(khugepaged_scan.address < hstart ||
+                                 khugepaged_scan.address + HPAGE_PMD_SIZE >
+                                 hend);
+-                      if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
++                      if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) {
+                               struct file *file = get_file(vma->vm_file);
+                               pgoff_t pgoff = linear_page_index(vma,
+                                               khugepaged_scan.address);
+@@ -2750,7 +2750,7 @@ int madvise_collapse(struct vm_area_stru
+               mmap_assert_locked(mm);
+               memset(cc->node_load, 0, sizeof(cc->node_load));
+               nodes_clear(cc->alloc_nmask);
+-              if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
++              if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) {
+                       struct file *file = get_file(vma->vm_file);
+                       pgoff_t pgoff = linear_page_index(vma, addr);
index a16a82dcb7475bf3389fd9a351003790c19038e3..350fd9979ea1b1545918f47851b18286d8093ccd 100644 (file)
@@ -113,3 +113,5 @@ revert-drm-xe-gt-update-handling-of-xe_force_wake_get-return.patch
 revert-drm-xe-tests-mocs-update-xe_force_wake_get-return-handling.patch
 revert-drm-xe-devcoredump-update-handling-of-xe_force_wake_get-return.patch
 revert-drm-xe-forcewake-add-a-helper-xe_force_wake_ref_has_domain.patch
+kvm-x86-free-vcpus-before-freeing-vm-state.patch
+mm-khugepaged-fix-call-hpage_collapse_scan_file-for-anonymous-vma.patch