]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 12 Dec 2024 08:28:37 +0000 (09:28 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 12 Dec 2024 08:28:37 +0000 (09:28 +0100)
added patches:
kvm-x86-mmu-ensure-that-kvm_release_pfn_clean-takes-exact-pfn-from-kvm_faultin_pfn.patch

queue-6.6/kvm-x86-mmu-ensure-that-kvm_release_pfn_clean-takes-exact-pfn-from-kvm_faultin_pfn.patch [new file with mode: 0644]
queue-6.6/selftests-ftrace-adjust-offset-for-kprobe-syntax-err.patch
queue-6.6/series

diff --git a/queue-6.6/kvm-x86-mmu-ensure-that-kvm_release_pfn_clean-takes-exact-pfn-from-kvm_faultin_pfn.patch b/queue-6.6/kvm-x86-mmu-ensure-that-kvm_release_pfn_clean-takes-exact-pfn-from-kvm_faultin_pfn.patch
new file mode 100644 (file)
index 0000000..0fd89e0
--- /dev/null
@@ -0,0 +1,136 @@
+From stable+bounces-100060-greg=kroah.com@vger.kernel.org Sun Dec  8 09:39:54 2024
+From: Nikolay Kuratov <kniv@yandex-team.ru>
+Date: Sun,  8 Dec 2024 11:38:30 +0300
+Subject: KVM: x86/mmu: Ensure that kvm_release_pfn_clean() takes exact pfn from kvm_faultin_pfn()
+To: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, kvm@vger.kernel.org, x86@kernel.org, Sean Christopherson <seanjc@google.com>, Paolo Bonzini <pbonzini@redhat.com>, Thomas Gleixner <tglx@linutronix.de>, Matthew Wilcox <willy@infradead.org>, Christoph Hellwig <hch@lst.de>, Nikolay Kuratov <kniv@yandex-team.ru>
+Message-ID: <20241208083830.77587-1-kniv@yandex-team.ru>
+
+From: Nikolay Kuratov <kniv@yandex-team.ru>
+
+Since 5.16 and prior to 6.13 KVM can't be used with FSDAX
+guest memory (PMD pages). To reproduce the issue you need to reserve
+guest memory with `memmap=` cmdline, create and mount FS in DAX mode
+(tested both XFS and ext4), see doc link below. ndctl command for test:
+ndctl create-namespace -v -e namespace1.0 --map=dev --mode=fsdax -a 2M
+Then pass memory object to qemu like:
+-m 8G -object memory-backend-file,id=ram0,size=8G,\
+mem-path=/mnt/pmem/guestmem,share=on,prealloc=on,dump=off,align=2097152 \
+-numa node,memdev=ram0,cpus=0-1
+QEMU fails to run guest with error: kvm run failed Bad address
+and there are two warnings in dmesg:
+WARN_ON_ONCE(!page_count(page)) in kvm_is_zone_device_page() and
+WARN_ON_ONCE(folio_ref_count(folio) <= 0) in try_grab_folio() (v6.6.63)
+
+It looks like in the past assumption was made that pfn won't change from
+faultin_pfn() to release_pfn_clean(), e.g. see
+commit 4cd071d13c5c ("KVM: x86/mmu: Move calls to thp_adjust() down a level")
+But kvm_page_fault structure made pfn part of mutable state, so
+now release_pfn_clean() can take hugepage-adjusted pfn.
+And it works for all cases (/dev/shm, hugetlb, devdax) except fsdax.
+Apparently in fsdax mode faultin-pfn and adjusted-pfn may refer to
+different folios, so we're getting get_page/put_page imbalance.
+
+To solve this preserve faultin pfn in separate local variable
+and pass it in kvm_release_pfn_clean().
+
+Patch tested for all mentioned guest memory backends with tdp_mmu={0,1}.
+
+No bug in upstream as it was solved fundamentally by
+commit 8dd861cc07e2 ("KVM: x86/mmu: Put refcounted pages instead of blindly releasing pfns")
+and related patch series.
+
+Link: https://nvdimm.docs.kernel.org/2mib_fs_dax.html
+Fixes: 2f6305dd5676 ("KVM: MMU: change kvm_tdp_mmu_map() arguments to kvm_page_fault")
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Nikolay Kuratov <kniv@yandex-team.ru>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c         |   10 ++++++++--
+ arch/x86/kvm/mmu/paging_tmpl.h |    5 ++++-
+ 2 files changed, 12 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -4363,6 +4363,7 @@ static bool is_page_fault_stale(struct k
+ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
+ {
++      kvm_pfn_t orig_pfn;
+       int r;
+       /* Dummy roots are used only for shadowing bad guest roots. */
+@@ -4384,6 +4385,8 @@ static int direct_page_fault(struct kvm_
+       if (r != RET_PF_CONTINUE)
+               return r;
++      orig_pfn = fault->pfn;
++
+       r = RET_PF_RETRY;
+       write_lock(&vcpu->kvm->mmu_lock);
+@@ -4398,7 +4401,7 @@ static int direct_page_fault(struct kvm_
+ out_unlock:
+       write_unlock(&vcpu->kvm->mmu_lock);
+-      kvm_release_pfn_clean(fault->pfn);
++      kvm_release_pfn_clean(orig_pfn);
+       return r;
+ }
+@@ -4447,6 +4450,7 @@ EXPORT_SYMBOL_GPL(kvm_handle_page_fault)
+ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
+                                 struct kvm_page_fault *fault)
+ {
++      kvm_pfn_t orig_pfn;
+       int r;
+       if (page_fault_handle_page_track(vcpu, fault))
+@@ -4464,6 +4468,8 @@ static int kvm_tdp_mmu_page_fault(struct
+       if (r != RET_PF_CONTINUE)
+               return r;
++      orig_pfn = fault->pfn;
++
+       r = RET_PF_RETRY;
+       read_lock(&vcpu->kvm->mmu_lock);
+@@ -4474,7 +4480,7 @@ static int kvm_tdp_mmu_page_fault(struct
+ out_unlock:
+       read_unlock(&vcpu->kvm->mmu_lock);
+-      kvm_release_pfn_clean(fault->pfn);
++      kvm_release_pfn_clean(orig_pfn);
+       return r;
+ }
+ #endif
+--- a/arch/x86/kvm/mmu/paging_tmpl.h
++++ b/arch/x86/kvm/mmu/paging_tmpl.h
+@@ -777,6 +777,7 @@ out_gpte_changed:
+ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
+ {
+       struct guest_walker walker;
++      kvm_pfn_t orig_pfn;
+       int r;
+       WARN_ON_ONCE(fault->is_tdp);
+@@ -835,6 +836,8 @@ static int FNAME(page_fault)(struct kvm_
+                       walker.pte_access &= ~ACC_EXEC_MASK;
+       }
++      orig_pfn = fault->pfn;
++
+       r = RET_PF_RETRY;
+       write_lock(&vcpu->kvm->mmu_lock);
+@@ -848,7 +851,7 @@ static int FNAME(page_fault)(struct kvm_
+ out_unlock:
+       write_unlock(&vcpu->kvm->mmu_lock);
+-      kvm_release_pfn_clean(fault->pfn);
++      kvm_release_pfn_clean(orig_pfn);
+       return r;
+ }
index 330db5fa4aaee6861506539499cbea137fdfdefa..cf627face9dfa3e824ae02b10697c4b2f3936cfa 100644 (file)
@@ -25,14 +25,12 @@ Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
 Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
 Signed-off-by: Sasha Levin <sashal@kernel.org>
 ---
.../selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc      | 2 +-
tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc |    2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
-diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
-index a16c6a6f6055c..8f1c58f0c2397 100644
 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
 +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
-@@ -111,7 +111,7 @@ check_error 'p vfs_read $arg* ^$arg*'              # DOUBLE_ARGS
+@@ -111,7 +111,7 @@ check_error 'p vfs_read $arg* ^$arg*'              #
  if !grep -q 'kernel return probes support:' README; then
  check_error 'r vfs_read ^$arg*'                       # NOFENTRY_ARGS
  fi
@@ -41,6 +39,3 @@ index a16c6a6f6055c..8f1c58f0c2397 100644
  check_error 'p vfs_read ^hoge'                        # NO_BTFARG
  check_error 'p kfree ^$arg10'                 # NO_BTFARG (exceed the number of parameters)
  check_error 'r kfree ^$retval'                        # NO_RETVAL
--- 
-2.43.0
-
index 3a2b482e1c838ecc9342082cea34259143328f0f..0928f7945c3a1363248ecabb90fdb560a57ed7d0 100644 (file)
@@ -340,3 +340,4 @@ x86-fix-build-regression-with-config_kexec_jump-enab.patch
 revert-unicode-don-t-special-case-ignorable-code-poi.patch
 vfio-mlx5-align-the-page-tracking-max-message-size-w.patch
 selftests-ftrace-adjust-offset-for-kprobe-syntax-err.patch
+kvm-x86-mmu-ensure-that-kvm_release_pfn_clean-takes-exact-pfn-from-kvm_faultin_pfn.patch