]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.17-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 20 Nov 2025 15:59:03 +0000 (16:59 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 20 Nov 2025 15:59:03 +0000 (16:59 +0100)
added patches:
dma-mapping-benchmark-restore-padding-to-ensure-uabi-remained-consistent.patch
gcov-add-support-for-gcc-15.patch
kho-allocate-metadata-directly-from-the-buddy-allocator.patch
kho-increase-metadata-bitmap-size-to-page_size.patch
kho-warn-and-exit-when-unpreserved-page-wasn-t-preserved.patch
kho-warn-and-fail-on-metadata-or-preserved-memory-in-scratch-area.patch
ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
ksmbd-close-accepted-socket-when-per-ip-limit-rejects-connection.patch
kvm-arm64-make-all-32bit-id-registers-fully-writable.patch
kvm-guest_memfd-remove-bindings-on-memslot-deletion-when-gmem-is-dying.patch
kvm-nsvm-always-recalculate-lbr-msr-intercepts-in-svm_update_lbrv.patch
kvm-nsvm-fix-and-simplify-lbr-virtualization-handling-with-nested.patch
kvm-svm-mark-vmcb_lbr-dirty-when-msr_ia32_debugctlmsr-is-updated.patch
kvm-vmx-fix-check-for-valid-gva-on-an-ept-violation.patch
loongarch-consolidate-early_ioremap-ioremap_prot.patch
loongarch-kvm-add-delay-until-timer-interrupt-injected.patch
loongarch-kvm-fix-max-supported-vcpus-set-with-eiointc.patch
loongarch-kvm-restore-guest-pmu-if-it-is-enabled.patch
loongarch-let-pte-pmd-_modify-record-the-status-of-_page_dirty.patch
loongarch-use-correct-accessor-to-read-fwpc-mwpc.patch
maple_tree-fix-tracepoint-string-pointers.patch
nfsd-add-missing-fattr4_word2_clone_blksize-from-supported-attributes.patch
nfsd-fix-refcount-leak-in-nfsd_set_fh_dentry.patch
nfsd-free-copynotify-stateid-in-nfs4_free_ol_stateid.patch
strparser-fix-signed-unsigned-mismatch-bug.patch

26 files changed:
queue-6.17/dma-mapping-benchmark-restore-padding-to-ensure-uabi-remained-consistent.patch [new file with mode: 0644]
queue-6.17/gcov-add-support-for-gcc-15.patch [new file with mode: 0644]
queue-6.17/kho-allocate-metadata-directly-from-the-buddy-allocator.patch [new file with mode: 0644]
queue-6.17/kho-increase-metadata-bitmap-size-to-page_size.patch [new file with mode: 0644]
queue-6.17/kho-warn-and-exit-when-unpreserved-page-wasn-t-preserved.patch [new file with mode: 0644]
queue-6.17/kho-warn-and-fail-on-metadata-or-preserved-memory-in-scratch-area.patch [new file with mode: 0644]
queue-6.17/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch [new file with mode: 0644]
queue-6.17/ksmbd-close-accepted-socket-when-per-ip-limit-rejects-connection.patch [new file with mode: 0644]
queue-6.17/kvm-arm64-make-all-32bit-id-registers-fully-writable.patch [new file with mode: 0644]
queue-6.17/kvm-guest_memfd-remove-bindings-on-memslot-deletion-when-gmem-is-dying.patch [new file with mode: 0644]
queue-6.17/kvm-nsvm-always-recalculate-lbr-msr-intercepts-in-svm_update_lbrv.patch [new file with mode: 0644]
queue-6.17/kvm-nsvm-fix-and-simplify-lbr-virtualization-handling-with-nested.patch [new file with mode: 0644]
queue-6.17/kvm-svm-mark-vmcb_lbr-dirty-when-msr_ia32_debugctlmsr-is-updated.patch [new file with mode: 0644]
queue-6.17/kvm-vmx-fix-check-for-valid-gva-on-an-ept-violation.patch [new file with mode: 0644]
queue-6.17/loongarch-consolidate-early_ioremap-ioremap_prot.patch [new file with mode: 0644]
queue-6.17/loongarch-kvm-add-delay-until-timer-interrupt-injected.patch [new file with mode: 0644]
queue-6.17/loongarch-kvm-fix-max-supported-vcpus-set-with-eiointc.patch [new file with mode: 0644]
queue-6.17/loongarch-kvm-restore-guest-pmu-if-it-is-enabled.patch [new file with mode: 0644]
queue-6.17/loongarch-let-pte-pmd-_modify-record-the-status-of-_page_dirty.patch [new file with mode: 0644]
queue-6.17/loongarch-use-correct-accessor-to-read-fwpc-mwpc.patch [new file with mode: 0644]
queue-6.17/maple_tree-fix-tracepoint-string-pointers.patch [new file with mode: 0644]
queue-6.17/nfsd-add-missing-fattr4_word2_clone_blksize-from-supported-attributes.patch [new file with mode: 0644]
queue-6.17/nfsd-fix-refcount-leak-in-nfsd_set_fh_dentry.patch [new file with mode: 0644]
queue-6.17/nfsd-free-copynotify-stateid-in-nfs4_free_ol_stateid.patch [new file with mode: 0644]
queue-6.17/series
queue-6.17/strparser-fix-signed-unsigned-mismatch-bug.patch [new file with mode: 0644]

diff --git a/queue-6.17/dma-mapping-benchmark-restore-padding-to-ensure-uabi-remained-consistent.patch b/queue-6.17/dma-mapping-benchmark-restore-padding-to-ensure-uabi-remained-consistent.patch
new file mode 100644 (file)
index 0000000..e480e83
--- /dev/null
@@ -0,0 +1,41 @@
+From 23ee8a2563a0f24cf4964685ced23c32be444ab8 Mon Sep 17 00:00:00 2001
+From: Qinxin Xia <xiaqinxin@huawei.com>
+Date: Tue, 28 Oct 2025 20:08:59 +0800
+Subject: dma-mapping: benchmark: Restore padding to ensure uABI remained consistent
+
+From: Qinxin Xia <xiaqinxin@huawei.com>
+
+commit 23ee8a2563a0f24cf4964685ced23c32be444ab8 upstream.
+
+The padding field in the structure was previously reserved to
+maintain a stable interface for potential new fields, ensuring
+compatibility with user-space shared data structures.
+However,it was accidentally removed by tiantao in a prior commit,
+which may lead to incompatibility between user space and the kernel.
+
+This patch reinstates the padding to restore the original structure
+layout and preserve compatibility.
+
+Fixes: 8ddde07a3d28 ("dma-mapping: benchmark: extract a common header file for map_benchmark definition")
+Cc: stable@vger.kernel.org
+Acked-by: Barry Song <baohua@kernel.org>
+Signed-off-by: Qinxin Xia <xiaqinxin@huawei.com>
+Reported-by: Barry Song <baohua@kernel.org>
+Closes: https://lore.kernel.org/lkml/CAGsJ_4waiZ2+NBJG+SCnbNk+nQ_ZF13_Q5FHJqZyxyJTcEop2A@mail.gmail.com/
+Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
+Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Link: https://lore.kernel.org/r/20251028120900.2265511-2-xiaqinxin@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/map_benchmark.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/linux/map_benchmark.h
++++ b/include/linux/map_benchmark.h
+@@ -27,5 +27,6 @@ struct map_benchmark {
+       __u32 dma_dir; /* DMA data direction */
+       __u32 dma_trans_ns; /* time for DMA transmission in ns */
+       __u32 granule;  /* how many PAGE_SIZE will do map/unmap once a time */
++      __u8 expansion[76]; /* For future use */
+ };
+ #endif /* _KERNEL_DMA_BENCHMARK_H */
diff --git a/queue-6.17/gcov-add-support-for-gcc-15.patch b/queue-6.17/gcov-add-support-for-gcc-15.patch
new file mode 100644 (file)
index 0000000..a5b974a
--- /dev/null
@@ -0,0 +1,40 @@
+From ec4d11fc4b2dd4a2fa8c9d801ee9753b74623554 Mon Sep 17 00:00:00 2001
+From: Peter Oberparleiter <oberpar@linux.ibm.com>
+Date: Tue, 28 Oct 2025 12:51:25 +0100
+Subject: gcov: add support for GCC 15
+
+From: Peter Oberparleiter <oberpar@linux.ibm.com>
+
+commit ec4d11fc4b2dd4a2fa8c9d801ee9753b74623554 upstream.
+
+Using gcov on kernels compiled with GCC 15 results in truncated 16-byte
+long .gcda files with no usable data.  To fix this, update GCOV_COUNTERS
+to match the value defined by GCC 15.
+
+Tested with GCC 14.3.0 and GCC 15.2.0.
+
+Link: https://lkml.kernel.org/r/20251028115125.1319410-1-oberpar@linux.ibm.com
+Signed-off-by: Peter Oberparleiter <oberpar@linux.ibm.com>
+Reported-by: Matthieu Baerts <matttbe@kernel.org>
+Closes: https://github.com/linux-test-project/lcov/issues/445
+Tested-by: Matthieu Baerts <matttbe@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/gcov/gcc_4_7.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/gcov/gcc_4_7.c
++++ b/kernel/gcov/gcc_4_7.c
+@@ -18,7 +18,9 @@
+ #include <linux/mm.h>
+ #include "gcov.h"
+-#if (__GNUC__ >= 14)
++#if (__GNUC__ >= 15)
++#define GCOV_COUNTERS                 10
++#elif (__GNUC__ >= 14)
+ #define GCOV_COUNTERS                 9
+ #elif (__GNUC__ >= 10)
+ #define GCOV_COUNTERS                 8
diff --git a/queue-6.17/kho-allocate-metadata-directly-from-the-buddy-allocator.patch b/queue-6.17/kho-allocate-metadata-directly-from-the-buddy-allocator.patch
new file mode 100644 (file)
index 0000000..fc5c917
--- /dev/null
@@ -0,0 +1,90 @@
+From fa759cd75bce5489eed34596daa53f721849a86f Mon Sep 17 00:00:00 2001
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+Date: Mon, 20 Oct 2025 20:08:52 -0400
+Subject: kho: allocate metadata directly from the buddy allocator
+
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+
+commit fa759cd75bce5489eed34596daa53f721849a86f upstream.
+
+KHO allocates metadata for its preserved memory map using the slab
+allocator via kzalloc().  This metadata is temporary and is used by the
+next kernel during early boot to find preserved memory.
+
+A problem arises when KFENCE is enabled.  kzalloc() calls can be randomly
+intercepted by kfence_alloc(), which services the allocation from a
+dedicated KFENCE memory pool.  This pool is allocated early in boot via
+memblock.
+
+When booting via KHO, the memblock allocator is restricted to a "scratch
+area", forcing the KFENCE pool to be allocated within it.  This creates a
+conflict, as the scratch area is expected to be ephemeral and
+overwriteable by a subsequent kexec.  If KHO metadata is placed in this
+KFENCE pool, it leads to memory corruption when the next kernel is loaded.
+
+To fix this, modify KHO to allocate its metadata directly from the buddy
+allocator instead of slab.
+
+Link: https://lkml.kernel.org/r/20251021000852.2924827-4-pasha.tatashin@soleen.com
+Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
+Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
+Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Reviewed-by: David Matlack <dmatlack@google.com>
+Cc: Alexander Graf <graf@amazon.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Cc: Miguel Ojeda <ojeda@kernel.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Samiullah Khawaja <skhawaja@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/gfp.h     |    3 +++
+ kernel/kexec_handover.c |    6 +++---
+ 2 files changed, 6 insertions(+), 3 deletions(-)
+
+--- a/include/linux/gfp.h
++++ b/include/linux/gfp.h
+@@ -7,6 +7,7 @@
+ #include <linux/mmzone.h>
+ #include <linux/topology.h>
+ #include <linux/alloc_tag.h>
++#include <linux/cleanup.h>
+ #include <linux/sched.h>
+ struct vm_area_struct;
+@@ -463,4 +464,6 @@ static inline struct folio *folio_alloc_
+ /* This should be paired with folio_put() rather than free_contig_range(). */
+ #define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
++DEFINE_FREE(free_page, void *, free_page((unsigned long)_T))
++
+ #endif /* __LINUX_GFP_H */
+--- a/kernel/kexec_handover.c
++++ b/kernel/kexec_handover.c
+@@ -102,7 +102,7 @@ static void *xa_load_or_alloc(struct xar
+       if (res)
+               return res;
+-      void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL);
++      void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!elm)
+               return ERR_PTR(-ENOMEM);
+@@ -266,9 +266,9 @@ static_assert(sizeof(struct khoser_mem_c
+ static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
+                                         unsigned long order)
+ {
+-      struct khoser_mem_chunk *chunk __free(kfree) = NULL;
++      struct khoser_mem_chunk *chunk __free(free_page) = NULL;
+-      chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
++      chunk = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!chunk)
+               return ERR_PTR(-ENOMEM);
diff --git a/queue-6.17/kho-increase-metadata-bitmap-size-to-page_size.patch b/queue-6.17/kho-increase-metadata-bitmap-size-to-page_size.patch
new file mode 100644 (file)
index 0000000..26489be
--- /dev/null
@@ -0,0 +1,113 @@
+From a2fff99f92dae9c0eaf0d75de3def70ec68dad92 Mon Sep 17 00:00:00 2001
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+Date: Mon, 20 Oct 2025 20:08:51 -0400
+Subject: kho: increase metadata bitmap size to PAGE_SIZE
+
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+
+commit a2fff99f92dae9c0eaf0d75de3def70ec68dad92 upstream.
+
+KHO memory preservation metadata is preserved in 512 byte chunks which
+requires their allocation from slab allocator.  Slabs are not safe to be
+used with KHO because of kfence, and because partial slabs may lead leaks
+to the next kernel.  Change the size to be PAGE_SIZE.
+
+The kfence specifically may cause memory corruption, where it randomly
+provides slab objects that can be within the scratch area.  The reason for
+that is that kfence allocates its objects prior to KHO scratch is marked
+as CMA region.
+
+While this change could potentially increase metadata overhead on systems
+with sparsely preserved memory, this is being mitigated by ongoing work to
+reduce sparseness during preservation via 1G guest pages.  Furthermore,
+this change aligns with future work on a stateless KHO, which will also
+use page-sized bitmaps for its radix tree metadata.
+
+Link: https://lkml.kernel.org/r/20251021000852.2924827-3-pasha.tatashin@soleen.com
+Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
+Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
+Cc: Alexander Graf <graf@amazon.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: David Matlack <dmatlack@google.com>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Cc: Miguel Ojeda <ojeda@kernel.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Samiullah Khawaja <skhawaja@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kexec_handover.c |   21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/kernel/kexec_handover.c
++++ b/kernel/kexec_handover.c
+@@ -52,10 +52,10 @@ early_param("kho", kho_parse_enable);
+  * Keep track of memory that is to be preserved across KHO.
+  *
+  * The serializing side uses two levels of xarrays to manage chunks of per-order
+- * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a
+- * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations
+- * each bitmap will cover 16M of address space. Thus, for 16G of memory at most
+- * 512K of bitmap memory will be needed for order 0.
++ * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order
++ * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0
++ * allocations each bitmap will cover 128M of address space. Thus, for 16G of
++ * memory at most 512K of bitmap memory will be needed for order 0.
+  *
+  * This approach is fully incremental, as the serialization progresses folios
+  * can continue be aggregated to the tracker. The final step, immediately prior
+@@ -63,12 +63,14 @@ early_param("kho", kho_parse_enable);
+  * successor kernel to parse.
+  */
+-#define PRESERVE_BITS (512 * 8)
++#define PRESERVE_BITS (PAGE_SIZE * 8)
+ struct kho_mem_phys_bits {
+       DECLARE_BITMAP(preserve, PRESERVE_BITS);
+ };
++static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE);
++
+ struct kho_mem_phys {
+       /*
+        * Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized
+@@ -93,19 +95,19 @@ struct kho_serialization {
+       struct khoser_mem_chunk *preserved_mem_map;
+ };
+-static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
++static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
+ {
+       void *res = xa_load(xa, index);
+       if (res)
+               return res;
+-      void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL);
++      void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!elm)
+               return ERR_PTR(-ENOMEM);
+-      if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz)))
++      if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE)))
+               return ERR_PTR(-EINVAL);
+       res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
+@@ -175,8 +177,7 @@ static int __kho_preserve_order(struct k
+               }
+       }
+-      bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS,
+-                              sizeof(*bits));
++      bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
+       if (IS_ERR(bits))
+               return PTR_ERR(bits);
diff --git a/queue-6.17/kho-warn-and-exit-when-unpreserved-page-wasn-t-preserved.patch b/queue-6.17/kho-warn-and-exit-when-unpreserved-page-wasn-t-preserved.patch
new file mode 100644 (file)
index 0000000..cacb512
--- /dev/null
@@ -0,0 +1,50 @@
+From b05addf6f0596edb1f82ab4059438c7ef2d2686d Mon Sep 17 00:00:00 2001
+From: Pratyush Yadav <pratyush@kernel.org>
+Date: Mon, 3 Nov 2025 19:02:32 +0100
+Subject: kho: warn and exit when unpreserved page wasn't preserved
+
+From: Pratyush Yadav <pratyush@kernel.org>
+
+commit b05addf6f0596edb1f82ab4059438c7ef2d2686d upstream.
+
+Calling __kho_unpreserve() on a pair of (pfn, end_pfn) that wasn't
+preserved is a bug.  Currently, if that is done, the physxa or bits can be
+NULL.  This results in a soft lockup since a NULL physxa or bits results
+in redoing the loop without ever making any progress.
+
+Return when physxa or bits are not found, but WARN first to loudly
+indicate invalid behaviour.
+
+Link: https://lkml.kernel.org/r/20251103180235.71409-3-pratyush@kernel.org
+Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
+Signed-off-by: Pratyush Yadav <pratyush@kernel.org>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Alexander Graf <graf@amazon.com>
+Cc: Baoquan He <bhe@redhat.com>
+Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kexec_handover.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/kernel/kexec_handover.c
++++ b/kernel/kexec_handover.c
+@@ -131,12 +131,12 @@ static void __kho_unpreserve(struct kho_
+               const unsigned long pfn_high = pfn >> order;
+               physxa = xa_load(&track->orders, order);
+-              if (!physxa)
+-                      continue;
++              if (WARN_ON_ONCE(!physxa))
++                      return;
+               bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
+-              if (!bits)
+-                      continue;
++              if (WARN_ON_ONCE(!bits))
++                      return;
+               clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
diff --git a/queue-6.17/kho-warn-and-fail-on-metadata-or-preserved-memory-in-scratch-area.patch b/queue-6.17/kho-warn-and-fail-on-metadata-or-preserved-memory-in-scratch-area.patch
new file mode 100644 (file)
index 0000000..dd70c6e
--- /dev/null
@@ -0,0 +1,318 @@
+From e38f65d317df1fd2dcafe614d9c537475ecf9992 Mon Sep 17 00:00:00 2001
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+Date: Mon, 20 Oct 2025 20:08:50 -0400
+Subject: kho: warn and fail on metadata or preserved memory in scratch area
+
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+
+commit e38f65d317df1fd2dcafe614d9c537475ecf9992 upstream.
+
+Patch series "KHO: kfence + KHO memory corruption fix", v3.
+
+This series fixes a memory corruption bug in KHO that occurs when KFENCE
+is enabled.
+
+The root cause is that KHO metadata, allocated via kzalloc(), can be
+randomly serviced by kfence_alloc().  When a kernel boots via KHO, the
+early memblock allocator is restricted to a "scratch area".  This forces
+the KFENCE pool to be allocated within this scratch area, creating a
+conflict.  If KHO metadata is subsequently placed in this pool, it gets
+corrupted during the next kexec operation.
+
+Google is using KHO and have had obscure crashes due to this memory
+corruption, with stacks all over the place.  I would prefer this fix to be
+properly backported to stable so we can also automatically consume it once
+we switch to the upstream KHO.
+
+Patch 1/3 introduces a debug-only feature (CONFIG_KEXEC_HANDOVER_DEBUG)
+that adds checks to detect and fail any operation that attempts to place
+KHO metadata or preserved memory within the scratch area.  This serves as
+a validation and diagnostic tool to confirm the problem without affecting
+production builds.
+
+Patch 2/3 Increases bitmap to PAGE_SIZE, so buddy allocator can be used.
+
+Patch 3/3 Provides the fix by modifying KHO to allocate its metadata
+directly from the buddy allocator instead of slab.  This bypasses the
+KFENCE interception entirely.
+
+
+This patch (of 3):
+
+It is invalid for KHO metadata or preserved memory regions to be located
+within the KHO scratch area, as this area is overwritten when the next
+kernel is loaded, and used early in boot by the next kernel.  This can
+lead to memory corruption.
+
+Add checks to kho_preserve_* and KHO's internal metadata allocators
+(xa_load_or_alloc, new_chunk) to verify that the physical address of the
+memory does not overlap with any defined scratch region.  If an overlap is
+detected, the operation will fail and a WARN_ON is triggered.  To avoid
+performance overhead in production kernels, these checks are enabled only
+when CONFIG_KEXEC_HANDOVER_DEBUG is selected.
+
+[rppt@kernel.org: fix KEXEC_HANDOVER_DEBUG Kconfig dependency]
+  Link: https://lkml.kernel.org/r/aQHUyyFtiNZhx8jo@kernel.org
+[pasha.tatashin@soleen.com: build fix]
+  Link: https://lkml.kernel.org/r/CA+CK2bBnorfsTymKtv4rKvqGBHs=y=MjEMMRg_tE-RME6n-zUw@mail.gmail.com
+Link: https://lkml.kernel.org/r/20251021000852.2924827-1-pasha.tatashin@soleen.com
+Link: https://lkml.kernel.org/r/20251021000852.2924827-2-pasha.tatashin@soleen.com
+Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
+Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
+Signed-off-by: Mike Rapoport <rppt@kernel.org>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
+Cc: Alexander Graf <graf@amazon.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: David Matlack <dmatlack@google.com>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Cc: Miguel Ojeda <ojeda@kernel.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Samiullah Khawaja <skhawaja@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/Kconfig.kexec             |    9 ++++++
+ kernel/Makefile                  |    1 
+ kernel/kexec_handover.c          |   57 ++++++++++++++++++++++++++-------------
+ kernel/kexec_handover_debug.c    |   25 +++++++++++++++++
+ kernel/kexec_handover_internal.h |   20 +++++++++++++
+ 5 files changed, 93 insertions(+), 19 deletions(-)
+ create mode 100644 kernel/kexec_handover_debug.c
+ create mode 100644 kernel/kexec_handover_internal.h
+
+--- a/kernel/Kconfig.kexec
++++ b/kernel/Kconfig.kexec
+@@ -109,6 +109,15 @@ config KEXEC_HANDOVER
+         to keep data or state alive across the kexec. For this to work,
+         both source and target kernels need to have this option enabled.
++config KEXEC_HANDOVER_DEBUG
++      bool "Enable Kexec Handover debug checks"
++      depends on KEXEC_HANDOVER
++      help
++        This option enables extra sanity checks for the Kexec Handover
++        subsystem. Since, KHO performance is crucial in live update
++        scenarios and the extra code might be adding overhead it is
++        only optionally enabled.
++
+ config CRASH_DUMP
+       bool "kernel crash dumps"
+       default ARCH_DEFAULT_CRASH_DUMP
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -82,6 +82,7 @@ obj-$(CONFIG_KEXEC) += kexec.o
+ obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
+ obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
+ obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
++obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o
+ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
+ obj-$(CONFIG_COMPAT) += compat.o
+ obj-$(CONFIG_CGROUPS) += cgroup/
+--- a/kernel/kexec_handover.c
++++ b/kernel/kexec_handover.c
+@@ -8,6 +8,7 @@
+ #define pr_fmt(fmt) "KHO: " fmt
++#include <linux/cleanup.h>
+ #include <linux/cma.h>
+ #include <linux/count_zeros.h>
+ #include <linux/debugfs.h>
+@@ -21,6 +22,7 @@
+ #include <asm/early_ioremap.h>
++#include "kexec_handover_internal.h"
+ /*
+  * KHO is tightly coupled with mm init and needs access to some of mm
+  * internal APIs.
+@@ -93,26 +95,26 @@ struct kho_serialization {
+ static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
+ {
+-      void *elm, *res;
++      void *res = xa_load(xa, index);
+-      elm = xa_load(xa, index);
+-      if (elm)
+-              return elm;
++      if (res)
++              return res;
++
++      void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL);
+-      elm = kzalloc(sz, GFP_KERNEL);
+       if (!elm)
+               return ERR_PTR(-ENOMEM);
++      if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz)))
++              return ERR_PTR(-EINVAL);
++
+       res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
+       if (xa_is_err(res))
+-              res = ERR_PTR(xa_err(res));
+-
+-      if (res) {
+-              kfree(elm);
++              return ERR_PTR(xa_err(res));
++      else if (res)
+               return res;
+-      }
+-      return elm;
++      return no_free_ptr(elm);
+ }
+ static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
+@@ -263,15 +265,19 @@ static_assert(sizeof(struct khoser_mem_c
+ static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
+                                         unsigned long order)
+ {
+-      struct khoser_mem_chunk *chunk;
++      struct khoser_mem_chunk *chunk __free(kfree) = NULL;
+       chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!chunk)
+-              return NULL;
++              return ERR_PTR(-ENOMEM);
++
++      if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE)))
++              return ERR_PTR(-EINVAL);
++
+       chunk->hdr.order = order;
+       if (cur_chunk)
+               KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
+-      return chunk;
++      return no_free_ptr(chunk);
+ }
+ static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
+@@ -292,14 +298,17 @@ static int kho_mem_serialize(struct kho_
+       struct khoser_mem_chunk *chunk = NULL;
+       struct kho_mem_phys *physxa;
+       unsigned long order;
++      int err = -ENOMEM;
+       xa_for_each(&ser->track.orders, order, physxa) {
+               struct kho_mem_phys_bits *bits;
+               unsigned long phys;
+               chunk = new_chunk(chunk, order);
+-              if (!chunk)
++              if (IS_ERR(chunk)) {
++                      err = PTR_ERR(chunk);
+                       goto err_free;
++              }
+               if (!first_chunk)
+                       first_chunk = chunk;
+@@ -309,8 +318,10 @@ static int kho_mem_serialize(struct kho_
+                       if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
+                               chunk = new_chunk(chunk, order);
+-                              if (!chunk)
++                              if (IS_ERR(chunk)) {
++                                      err = PTR_ERR(chunk);
+                                       goto err_free;
++                              }
+                       }
+                       elm = &chunk->bitmaps[chunk->hdr.num_elms];
+@@ -327,7 +338,7 @@ static int kho_mem_serialize(struct kho_
+ err_free:
+       kho_mem_ser_free(first_chunk);
+-      return -ENOMEM;
++      return err;
+ }
+ static void __init deserialize_bitmap(unsigned int order,
+@@ -380,8 +391,8 @@ static void __init kho_mem_deserialize(c
+  * area for early allocations that happen before page allocator is
+  * initialized.
+  */
+-static struct kho_scratch *kho_scratch;
+-static unsigned int kho_scratch_cnt;
++struct kho_scratch *kho_scratch;
++unsigned int kho_scratch_cnt;
+ /*
+  * The scratch areas are scaled by default as percent of memory allocated from
+@@ -684,6 +695,9 @@ int kho_preserve_folio(struct folio *fol
+       if (kho_out.finalized)
+               return -EBUSY;
++      if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
++              return -EINVAL;
++
+       return __kho_preserve_order(track, pfn, order);
+ }
+ EXPORT_SYMBOL_GPL(kho_preserve_folio);
+@@ -713,6 +727,11 @@ int kho_preserve_phys(phys_addr_t phys,
+       if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
+               return -EINVAL;
++      if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT,
++                                      nr_pages << PAGE_SHIFT))) {
++              return -EINVAL;
++      }
++
+       while (pfn < end_pfn) {
+               const unsigned int order =
+                       min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+--- /dev/null
++++ b/kernel/kexec_handover_debug.c
+@@ -0,0 +1,25 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * kexec_handover_debug.c - kexec handover optional debug functionality
++ * Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin@soleen.com>
++ */
++
++#define pr_fmt(fmt) "KHO: " fmt
++
++#include "kexec_handover_internal.h"
++
++bool kho_scratch_overlap(phys_addr_t phys, size_t size)
++{
++      phys_addr_t scratch_start, scratch_end;
++      unsigned int i;
++
++      for (i = 0; i < kho_scratch_cnt; i++) {
++              scratch_start = kho_scratch[i].addr;
++              scratch_end = kho_scratch[i].addr + kho_scratch[i].size;
++
++              if (phys < scratch_end && (phys + size) > scratch_start)
++                      return true;
++      }
++
++      return false;
++}
+--- /dev/null
++++ b/kernel/kexec_handover_internal.h
+@@ -0,0 +1,20 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H
++#define LINUX_KEXEC_HANDOVER_INTERNAL_H
++
++#include <linux/kexec_handover.h>
++#include <linux/types.h>
++
++extern struct kho_scratch *kho_scratch;
++extern unsigned int kho_scratch_cnt;
++
++#ifdef CONFIG_KEXEC_HANDOVER_DEBUG
++bool kho_scratch_overlap(phys_addr_t phys, size_t size);
++#else
++static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size)
++{
++      return false;
++}
++#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */
++
++#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */
diff --git a/queue-6.17/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch b/queue-6.17/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
new file mode 100644 (file)
index 0000000..31d69e0
--- /dev/null
@@ -0,0 +1,212 @@
+From f5548c318d6520d4fa3c5ed6003eeb710763cbc5 Mon Sep 17 00:00:00 2001
+From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+Date: Wed, 22 Oct 2025 12:30:59 -0300
+Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
+
+From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+
+commit f5548c318d6520d4fa3c5ed6003eeb710763cbc5 upstream.
+
+Currently, scan_get_next_rmap_item() walks every page address in a VMA to
+locate mergeable pages.  This becomes highly inefficient when scanning
+large virtual memory areas that contain mostly unmapped regions, causing
+ksmd to use large amount of cpu without deduplicating much pages.
+
+This patch replaces the per-address lookup with a range walk using
+walk_page_range().  The range walker allows KSM to skip over entire
+unmapped holes in a VMA, avoiding unnecessary lookups.  This problem was
+previously discussed in [1].
+
+Consider the following test program which creates a 32 TiB mapping in the
+virtual address space but only populates a single page:
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/mman.h>
+
+/* 32 TiB */
+const size_t size = 32ul * 1024 * 1024 * 1024 * 1024;
+
+int main() {
+        char *area = mmap(NULL, size, PROT_READ | PROT_WRITE,
+                          MAP_NORESERVE | MAP_PRIVATE | MAP_ANON, -1, 0);
+
+        if (area == MAP_FAILED) {
+                perror("mmap() failed\n");
+                return -1;
+        }
+
+        /* Populate a single page such that we get an anon_vma. */
+        *area = 0;
+
+        /* Enable KSM. */
+        madvise(area, size, MADV_MERGEABLE);
+        pause();
+        return 0;
+}
+
+$ ./ksm-sparse  &
+$ echo 1 > /sys/kernel/mm/ksm/run
+
+Without this patch ksmd uses 100% of the cpu for a long time (more then 1
+hour in my test machine) scanning all the 32 TiB virtual address space
+that contain only one mapped page.  This makes ksmd essentially deadlocked
+not able to deduplicate anything of value.  With this patch ksmd walks
+only the one mapped page and skips the rest of the 32 TiB virtual address
+space, making the scan fast using little cpu.
+
+Link: https://lkml.kernel.org/r/20251023035841.41406-1-pedrodemargomes@gmail.com
+Link: https://lkml.kernel.org/r/20251022153059.22763-1-pedrodemargomes@gmail.com
+Link: https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/ [1]
+Fixes: 31dbd01f3143 ("ksm: Kernel SamePage Merging")
+Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+Co-developed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reported-by: craftfever <craftfever@airmail.cc>
+Closes: https://lkml.kernel.org/r/020cf8de6e773bb78ba7614ef250129f11a63781@murena.io
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: xu xin <xu.xin16@zte.com.cn>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/ksm.c |  113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 104 insertions(+), 9 deletions(-)
+
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -2458,6 +2458,95 @@ static bool should_skip_rmap_item(struct
+       return true;
+ }
++struct ksm_next_page_arg {
++      struct folio *folio;
++      struct page *page;
++      unsigned long addr;
++};
++
++static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end,
++              struct mm_walk *walk)
++{
++      struct ksm_next_page_arg *private = walk->private;
++      struct vm_area_struct *vma = walk->vma;
++      pte_t *start_ptep = NULL, *ptep, pte;
++      struct mm_struct *mm = walk->mm;
++      struct folio *folio;
++      struct page *page;
++      spinlock_t *ptl;
++      pmd_t pmd;
++
++      if (ksm_test_exit(mm))
++              return 0;
++
++      cond_resched();
++
++      pmd = pmdp_get_lockless(pmdp);
++      if (!pmd_present(pmd))
++              return 0;
++
++      if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) {
++              ptl = pmd_lock(mm, pmdp);
++              pmd = pmdp_get(pmdp);
++
++              if (!pmd_present(pmd)) {
++                      goto not_found_unlock;
++              } else if (pmd_leaf(pmd)) {
++                      page = vm_normal_page_pmd(vma, addr, pmd);
++                      if (!page)
++                              goto not_found_unlock;
++                      folio = page_folio(page);
++
++                      if (folio_is_zone_device(folio) || !folio_test_anon(folio))
++                              goto not_found_unlock;
++
++                      page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT);
++                      goto found_unlock;
++              }
++              spin_unlock(ptl);
++      }
++
++      start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
++      if (!start_ptep)
++              return 0;
++
++      for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) {
++              pte = ptep_get(ptep);
++
++              if (!pte_present(pte))
++                      continue;
++
++              page = vm_normal_page(vma, addr, pte);
++              if (!page)
++                      continue;
++              folio = page_folio(page);
++
++              if (folio_is_zone_device(folio) || !folio_test_anon(folio))
++                      continue;
++              goto found_unlock;
++      }
++
++not_found_unlock:
++      spin_unlock(ptl);
++      if (start_ptep)
++              pte_unmap(start_ptep);
++      return 0;
++found_unlock:
++      folio_get(folio);
++      spin_unlock(ptl);
++      if (start_ptep)
++              pte_unmap(start_ptep);
++      private->page = page;
++      private->folio = folio;
++      private->addr = addr;
++      return 1;
++}
++
++static struct mm_walk_ops ksm_next_page_ops = {
++      .pmd_entry = ksm_next_page_pmd_entry,
++      .walk_lock = PGWALK_RDLOCK,
++};
++
+ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
+ {
+       struct mm_struct *mm;
+@@ -2545,21 +2634,27 @@ next_mm:
+                       ksm_scan.address = vma->vm_end;
+               while (ksm_scan.address < vma->vm_end) {
++                      struct ksm_next_page_arg ksm_next_page_arg;
+                       struct page *tmp_page = NULL;
+-                      struct folio_walk fw;
+                       struct folio *folio;
+                       if (ksm_test_exit(mm))
+                               break;
+-                      folio = folio_walk_start(&fw, vma, ksm_scan.address, 0);
+-                      if (folio) {
+-                              if (!folio_is_zone_device(folio) &&
+-                                   folio_test_anon(folio)) {
+-                                      folio_get(folio);
+-                                      tmp_page = fw.page;
+-                              }
+-                              folio_walk_end(&fw, vma);
++                      int found;
++
++                      found = walk_page_range_vma(vma, ksm_scan.address,
++                                                  vma->vm_end,
++                                                  &ksm_next_page_ops,
++                                                  &ksm_next_page_arg);
++
++                      if (found > 0) {
++                              folio = ksm_next_page_arg.folio;
++                              tmp_page = ksm_next_page_arg.page;
++                              ksm_scan.address = ksm_next_page_arg.addr;
++                      } else {
++                              VM_WARN_ON_ONCE(found < 0);
++                              ksm_scan.address = vma->vm_end - PAGE_SIZE;
+                       }
+                       if (tmp_page) {
diff --git a/queue-6.17/ksmbd-close-accepted-socket-when-per-ip-limit-rejects-connection.patch b/queue-6.17/ksmbd-close-accepted-socket-when-per-ip-limit-rejects-connection.patch
new file mode 100644 (file)
index 0000000..b38f7eb
--- /dev/null
@@ -0,0 +1,42 @@
+From 98a5fd31cbf72d46bf18e50b3ab0ce86d5f319a9 Mon Sep 17 00:00:00 2001
+From: Joshua Rogers <linux@joshua.hu>
+Date: Sat, 8 Nov 2025 22:59:23 +0800
+Subject: ksmbd: close accepted socket when per-IP limit rejects connection
+
+From: Joshua Rogers <linux@joshua.hu>
+
+commit 98a5fd31cbf72d46bf18e50b3ab0ce86d5f319a9 upstream.
+
+When the per-IP connection limit is exceeded in ksmbd_kthread_fn(),
+the code sets ret = -EAGAIN and continues the accept loop without
+closing the just-accepted socket. That leaks one socket per rejected
+attempt from a single IP and enables a trivial remote DoS.
+
+Release client_sk before continuing.
+
+This bug was found with ZeroPath.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Joshua Rogers <linux@joshua.hu>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/server/transport_tcp.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/smb/server/transport_tcp.c
++++ b/fs/smb/server/transport_tcp.c
+@@ -284,8 +284,11 @@ static int ksmbd_kthread_fn(void *p)
+                       }
+               }
+               up_read(&conn_list_lock);
+-              if (ret == -EAGAIN)
++              if (ret == -EAGAIN) {
++                      /* Per-IP limit hit: release the just-accepted socket. */
++                      sock_release(client_sk);
+                       continue;
++              }
+ skip_max_ip_conns_limit:
+               if (server_conf.max_connections &&
diff --git a/queue-6.17/kvm-arm64-make-all-32bit-id-registers-fully-writable.patch b/queue-6.17/kvm-arm64-make-all-32bit-id-registers-fully-writable.patch
new file mode 100644 (file)
index 0000000..4765abb
--- /dev/null
@@ -0,0 +1,123 @@
+From 3f9eacf4f0705876a5d6526d7d320ca91d7d7a16 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Thu, 30 Oct 2025 12:27:05 +0000
+Subject: KVM: arm64: Make all 32bit ID registers fully writable
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 3f9eacf4f0705876a5d6526d7d320ca91d7d7a16 upstream.
+
+32bit ID registers aren't getting much love these days, and are
+often missed in updates. One of these updates broke restoring
+a GICv2 guest on a GICv3 machine.
+
+Instead of performing a piecemeal fix, just bite the bullet
+and make all 32bit ID regs fully writable. KVM itself never
+relies on them for anything, and if the VMM wants to mess up
+the guest, so be it.
+
+Fixes: 5cb57a1aff755 ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest")
+Reported-by: Peter Maydell <peter.maydell@linaro.org>
+Cc: stable@vger.kernel.org
+Reviewed-by: Oliver Upton <oupton@kernel.org>
+Link: https://patch.msgid.link/20251030122707.2033690-2-maz@kernel.org
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/sys_regs.c |   59 ++++++++++++++++++++++++----------------------
+ 1 file changed, 31 insertions(+), 28 deletions(-)
+
+--- a/arch/arm64/kvm/sys_regs.c
++++ b/arch/arm64/kvm/sys_regs.c
+@@ -2515,19 +2515,23 @@ static bool bad_redir_trap(struct kvm_vc
+       .val = 0,                               \
+ }
+-/* sys_reg_desc initialiser for known cpufeature ID registers */
+-#define AA32_ID_SANITISED(name) {             \
+-      ID_DESC(name),                          \
+-      .visibility = aa32_id_visibility,       \
+-      .val = 0,                               \
+-}
+-
+ /* sys_reg_desc initialiser for writable ID registers */
+ #define ID_WRITABLE(name, mask) {             \
+       ID_DESC(name),                          \
+       .val = mask,                            \
+ }
++/*
++ * 32bit ID regs are fully writable when the guest is 32bit
++ * capable. Nothing in the KVM code should rely on 32bit features
++ * anyway, only 64bit, so let the VMM do its worse.
++ */
++#define AA32_ID_WRITABLE(name) {              \
++      ID_DESC(name),                          \
++      .visibility = aa32_id_visibility,       \
++      .val = GENMASK(31, 0),                  \
++}
++
+ /* sys_reg_desc initialiser for cpufeature ID registers that need filtering */
+ #define ID_FILTERED(sysreg, name, mask) {     \
+       ID_DESC(sysreg),                                \
+@@ -3039,40 +3043,39 @@ static const struct sys_reg_desc sys_reg
+       /* AArch64 mappings of the AArch32 ID registers */
+       /* CRm=1 */
+-      AA32_ID_SANITISED(ID_PFR0_EL1),
+-      AA32_ID_SANITISED(ID_PFR1_EL1),
++      AA32_ID_WRITABLE(ID_PFR0_EL1),
++      AA32_ID_WRITABLE(ID_PFR1_EL1),
+       { SYS_DESC(SYS_ID_DFR0_EL1),
+         .access = access_id_reg,
+         .get_user = get_id_reg,
+         .set_user = set_id_dfr0_el1,
+         .visibility = aa32_id_visibility,
+         .reset = read_sanitised_id_dfr0_el1,
+-        .val = ID_DFR0_EL1_PerfMon_MASK |
+-               ID_DFR0_EL1_CopDbg_MASK, },
++        .val = GENMASK(31, 0) },
+       ID_HIDDEN(ID_AFR0_EL1),
+-      AA32_ID_SANITISED(ID_MMFR0_EL1),
+-      AA32_ID_SANITISED(ID_MMFR1_EL1),
+-      AA32_ID_SANITISED(ID_MMFR2_EL1),
+-      AA32_ID_SANITISED(ID_MMFR3_EL1),
++      AA32_ID_WRITABLE(ID_MMFR0_EL1),
++      AA32_ID_WRITABLE(ID_MMFR1_EL1),
++      AA32_ID_WRITABLE(ID_MMFR2_EL1),
++      AA32_ID_WRITABLE(ID_MMFR3_EL1),
+       /* CRm=2 */
+-      AA32_ID_SANITISED(ID_ISAR0_EL1),
+-      AA32_ID_SANITISED(ID_ISAR1_EL1),
+-      AA32_ID_SANITISED(ID_ISAR2_EL1),
+-      AA32_ID_SANITISED(ID_ISAR3_EL1),
+-      AA32_ID_SANITISED(ID_ISAR4_EL1),
+-      AA32_ID_SANITISED(ID_ISAR5_EL1),
+-      AA32_ID_SANITISED(ID_MMFR4_EL1),
+-      AA32_ID_SANITISED(ID_ISAR6_EL1),
++      AA32_ID_WRITABLE(ID_ISAR0_EL1),
++      AA32_ID_WRITABLE(ID_ISAR1_EL1),
++      AA32_ID_WRITABLE(ID_ISAR2_EL1),
++      AA32_ID_WRITABLE(ID_ISAR3_EL1),
++      AA32_ID_WRITABLE(ID_ISAR4_EL1),
++      AA32_ID_WRITABLE(ID_ISAR5_EL1),
++      AA32_ID_WRITABLE(ID_MMFR4_EL1),
++      AA32_ID_WRITABLE(ID_ISAR6_EL1),
+       /* CRm=3 */
+-      AA32_ID_SANITISED(MVFR0_EL1),
+-      AA32_ID_SANITISED(MVFR1_EL1),
+-      AA32_ID_SANITISED(MVFR2_EL1),
++      AA32_ID_WRITABLE(MVFR0_EL1),
++      AA32_ID_WRITABLE(MVFR1_EL1),
++      AA32_ID_WRITABLE(MVFR2_EL1),
+       ID_UNALLOCATED(3,3),
+-      AA32_ID_SANITISED(ID_PFR2_EL1),
++      AA32_ID_WRITABLE(ID_PFR2_EL1),
+       ID_HIDDEN(ID_DFR1_EL1),
+-      AA32_ID_SANITISED(ID_MMFR5_EL1),
++      AA32_ID_WRITABLE(ID_MMFR5_EL1),
+       ID_UNALLOCATED(3,7),
+       /* AArch64 ID registers */
diff --git a/queue-6.17/kvm-guest_memfd-remove-bindings-on-memslot-deletion-when-gmem-is-dying.patch b/queue-6.17/kvm-guest_memfd-remove-bindings-on-memslot-deletion-when-gmem-is-dying.patch
new file mode 100644 (file)
index 0000000..6033c8e
--- /dev/null
@@ -0,0 +1,172 @@
+From ae431059e75d36170a5ae6b44cc4d06d43613215 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 3 Nov 2025 17:12:05 -0800
+Subject: KVM: guest_memfd: Remove bindings on memslot deletion when gmem is dying
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ae431059e75d36170a5ae6b44cc4d06d43613215 upstream.
+
+When unbinding a memslot from a guest_memfd instance, remove the bindings
+even if the guest_memfd file is dying, i.e. even if its file refcount has
+gone to zero.  If the memslot is freed before the file is fully released,
+nullifying the memslot side of the binding in kvm_gmem_release() will
+write to freed memory, as detected by syzbot+KASAN:
+
+  ==================================================================
+  BUG: KASAN: slab-use-after-free in kvm_gmem_release+0x176/0x440 virt/kvm/guest_memfd.c:353
+  Write of size 8 at addr ffff88807befa508 by task syz.0.17/6022
+
+  CPU: 0 UID: 0 PID: 6022 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full)
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025
+  Call Trace:
+   <TASK>
+   dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
+   print_address_description mm/kasan/report.c:378 [inline]
+   print_report+0xca/0x240 mm/kasan/report.c:482
+   kasan_report+0x118/0x150 mm/kasan/report.c:595
+   kvm_gmem_release+0x176/0x440 virt/kvm/guest_memfd.c:353
+   __fput+0x44c/0xa70 fs/file_table.c:468
+   task_work_run+0x1d4/0x260 kernel/task_work.c:227
+   resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
+   exit_to_user_mode_loop+0xe9/0x130 kernel/entry/common.c:43
+   exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline]
+   syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline]
+   syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline]
+   do_syscall_64+0x2bd/0xfa0 arch/x86/entry/syscall_64.c:100
+   entry_SYSCALL_64_after_hwframe+0x77/0x7f
+  RIP: 0033:0x7fbeeff8efc9
+   </TASK>
+
+  Allocated by task 6023:
+   kasan_save_stack mm/kasan/common.c:56 [inline]
+   kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
+   poison_kmalloc_redzone mm/kasan/common.c:397 [inline]
+   __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:414
+   kasan_kmalloc include/linux/kasan.h:262 [inline]
+   __kmalloc_cache_noprof+0x3e2/0x700 mm/slub.c:5758
+   kmalloc_noprof include/linux/slab.h:957 [inline]
+   kzalloc_noprof include/linux/slab.h:1094 [inline]
+   kvm_set_memory_region+0x747/0xb90 virt/kvm/kvm_main.c:2104
+   kvm_vm_ioctl_set_memory_region+0x6f/0xd0 virt/kvm/kvm_main.c:2154
+   kvm_vm_ioctl+0x957/0xc60 virt/kvm/kvm_main.c:5201
+   vfs_ioctl fs/ioctl.c:51 [inline]
+   __do_sys_ioctl fs/ioctl.c:597 [inline]
+   __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583
+   do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
+   do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
+   entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+  Freed by task 6023:
+   kasan_save_stack mm/kasan/common.c:56 [inline]
+   kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
+   kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:584
+   poison_slab_object mm/kasan/common.c:252 [inline]
+   __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:284
+   kasan_slab_free include/linux/kasan.h:234 [inline]
+   slab_free_hook mm/slub.c:2533 [inline]
+   slab_free mm/slub.c:6622 [inline]
+   kfree+0x19a/0x6d0 mm/slub.c:6829
+   kvm_set_memory_region+0x9c4/0xb90 virt/kvm/kvm_main.c:2130
+   kvm_vm_ioctl_set_memory_region+0x6f/0xd0 virt/kvm/kvm_main.c:2154
+   kvm_vm_ioctl+0x957/0xc60 virt/kvm/kvm_main.c:5201
+   vfs_ioctl fs/ioctl.c:51 [inline]
+   __do_sys_ioctl fs/ioctl.c:597 [inline]
+   __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583
+   do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
+   do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
+   entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Deliberately don't acquire filemap invalid lock when the file is dying as
+the lifecycle of f_mapping is outside the purview of KVM.  Dereferencing
+the mapping is *probably* fine, but there's no need to invalidate anything
+as memslot deletion is responsible for zapping SPTEs, and the only code
+that can access the dying file is kvm_gmem_release(), whose core code is
+mutually exclusive with unbinding.
+
+Note, the mutual exclusivity is also what makes it safe to access the
+bindings on a dying gmem instance.  Unbinding either runs with slots_lock
+held, or after the last reference to the owning "struct kvm" is put, and
+kvm_gmem_release() nullifies the slot pointer under slots_lock, and puts
+its reference to the VM after that is done.
+
+Reported-by: syzbot+2479e53d0db9b32ae2aa@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/68fa7a22.a70a0220.3bf6c6.008b.GAE@google.com
+Tested-by: syzbot+2479e53d0db9b32ae2aa@syzkaller.appspotmail.com
+Fixes: a7800aa80ea4 ("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory")
+Cc: stable@vger.kernel.org
+Cc: Hillf Danton <hdanton@sina.com>
+Reviewed-By: Vishal Annapurve <vannapurve@google.com>
+Link: https://patch.msgid.link/20251104011205.3853541-1-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/guest_memfd.c |   45 ++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 32 insertions(+), 13 deletions(-)
+
+--- a/virt/kvm/guest_memfd.c
++++ b/virt/kvm/guest_memfd.c
+@@ -523,31 +523,50 @@ err:
+       return r;
+ }
+-void kvm_gmem_unbind(struct kvm_memory_slot *slot)
++static void __kvm_gmem_unbind(struct kvm_memory_slot *slot, struct kvm_gmem *gmem)
+ {
+       unsigned long start = slot->gmem.pgoff;
+       unsigned long end = start + slot->npages;
+-      struct kvm_gmem *gmem;
++
++      xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL);
++
++      /*
++       * synchronize_srcu(&kvm->srcu) ensured that kvm_gmem_get_pfn()
++       * cannot see this memslot.
++       */
++      WRITE_ONCE(slot->gmem.file, NULL);
++}
++
++void kvm_gmem_unbind(struct kvm_memory_slot *slot)
++{
+       struct file *file;
+       /*
+-       * Nothing to do if the underlying file was already closed (or is being
+-       * closed right now), kvm_gmem_release() invalidates all bindings.
++       * Nothing to do if the underlying file was _already_ closed, as
++       * kvm_gmem_release() invalidates and nullifies all bindings.
+        */
+-      file = kvm_gmem_get_file(slot);
+-      if (!file)
++      if (!slot->gmem.file)
+               return;
+-      gmem = file->private_data;
+-
+-      filemap_invalidate_lock(file->f_mapping);
+-      xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL);
++      file = kvm_gmem_get_file(slot);
+       /*
+-       * synchronize_srcu(&kvm->srcu) ensured that kvm_gmem_get_pfn()
+-       * cannot see this memslot.
++       * However, if the file is _being_ closed, then the bindings need to be
++       * removed as kvm_gmem_release() might not run until after the memslot
++       * is freed.  Note, modifying the bindings is safe even though the file
++       * is dying as kvm_gmem_release() nullifies slot->gmem.file under
++       * slots_lock, and only puts its reference to KVM after destroying all
++       * bindings.  I.e. reaching this point means kvm_gmem_release() hasn't
++       * yet destroyed the bindings or freed the gmem_file, and can't do so
++       * until the caller drops slots_lock.
+        */
+-      WRITE_ONCE(slot->gmem.file, NULL);
++      if (!file) {
++              __kvm_gmem_unbind(slot, slot->gmem.file->private_data);
++              return;
++      }
++
++      filemap_invalidate_lock(file->f_mapping);
++      __kvm_gmem_unbind(slot, file->private_data);
+       filemap_invalidate_unlock(file->f_mapping);
+       fput(file);
diff --git a/queue-6.17/kvm-nsvm-always-recalculate-lbr-msr-intercepts-in-svm_update_lbrv.patch b/queue-6.17/kvm-nsvm-always-recalculate-lbr-msr-intercepts-in-svm_update_lbrv.patch
new file mode 100644 (file)
index 0000000..50dbb3b
--- /dev/null
@@ -0,0 +1,98 @@
+From fbe5e5f030c22ae717ee422aaab0e00ea84fab5e Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Sat, 8 Nov 2025 00:45:20 +0000
+Subject: KVM: nSVM: Always recalculate LBR MSR intercepts in svm_update_lbrv()
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit fbe5e5f030c22ae717ee422aaab0e00ea84fab5e upstream.
+
+svm_update_lbrv() is called when MSR_IA32_DEBUGCTLMSR is updated, and on
+nested transitions where LBRV is used. It checks whether LBRV enablement
+needs to be changed in the current VMCB, and if it does, it also
+recalculate intercepts to LBR MSRs.
+
+However, there are cases where intercepts need to be updated even when
+LBRV enablement doesn't. Example scenario:
+- L1 has MSR_IA32_DEBUGCTLMSR cleared.
+- L1 runs L2 without LBR_CTL_ENABLE (no LBRV).
+- L2 sets DEBUGCTLMSR_LBR in MSR_IA32_DEBUGCTLMSR, svm_update_lbrv()
+  sets LBR_CTL_ENABLE in VMCB02 and disables intercepts to LBR MSRs.
+- L2 exits to L1, svm_update_lbrv() is not called on this transition.
+- L1 clears MSR_IA32_DEBUGCTLMSR, svm_update_lbrv() finds that
+  LBR_CTL_ENABLE is already cleared in VMCB01 and does nothing.
+- Intercepts remain disabled, L1 reads to LBR MSRs read the host MSRs.
+
+Fix it by always recalculating intercepts in svm_update_lbrv().
+
+Fixes: 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20251108004524.1600006-3-yosry.ahmed@linux.dev
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |   29 +++++++++++++++++++----------
+ 1 file changed, 19 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -852,25 +852,29 @@ void svm_copy_lbrs(struct vmcb *to_vmcb,
+       vmcb_mark_dirty(to_vmcb, VMCB_LBR);
+ }
+-void svm_enable_lbrv(struct kvm_vcpu *vcpu)
++static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+       svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+-      svm_recalc_lbr_msr_intercepts(vcpu);
+       /* Move the LBR msrs to the vmcb02 so that the guest can see them. */
+       if (is_guest_mode(vcpu))
+               svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
+ }
+-static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
++void svm_enable_lbrv(struct kvm_vcpu *vcpu)
++{
++      __svm_enable_lbrv(vcpu);
++      svm_recalc_lbr_msr_intercepts(vcpu);
++}
++
++static void __svm_disable_lbrv(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+       KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
+       svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
+-      svm_recalc_lbr_msr_intercepts(vcpu);
+       /*
+        * Move the LBR msrs back to the vmcb01 to avoid copying them
+@@ -899,13 +903,18 @@ void svm_update_lbrv(struct kvm_vcpu *vc
+                           (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+                           (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
+-      if (enable_lbrv == current_enable_lbrv)
+-              return;
++      if (enable_lbrv && !current_enable_lbrv)
++              __svm_enable_lbrv(vcpu);
++      else if (!enable_lbrv && current_enable_lbrv)
++              __svm_disable_lbrv(vcpu);
+-      if (enable_lbrv)
+-              svm_enable_lbrv(vcpu);
+-      else
+-              svm_disable_lbrv(vcpu);
++      /*
++       * During nested transitions, it is possible that the current VMCB has
++       * LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa).
++       * In this case, even though LBR_CTL does not need an update, intercepts
++       * do, so always recalculate the intercepts here.
++       */
++      svm_recalc_lbr_msr_intercepts(vcpu);
+ }
+ void disable_nmi_singlestep(struct vcpu_svm *svm)
diff --git a/queue-6.17/kvm-nsvm-fix-and-simplify-lbr-virtualization-handling-with-nested.patch b/queue-6.17/kvm-nsvm-fix-and-simplify-lbr-virtualization-handling-with-nested.patch
new file mode 100644 (file)
index 0000000..8ab5ee7
--- /dev/null
@@ -0,0 +1,191 @@
+From 8a4821412cf2c1429fffa07c012dd150f2edf78c Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Sat, 8 Nov 2025 00:45:21 +0000
+Subject: KVM: nSVM: Fix and simplify LBR virtualization handling with nested
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit 8a4821412cf2c1429fffa07c012dd150f2edf78c upstream.
+
+The current scheme for handling LBRV when nested is used is very
+complicated, especially when L1 does not enable LBRV (i.e. does not set
+LBR_CTL_ENABLE_MASK).
+
+To avoid copying LBRs between VMCB01 and VMCB02 on every nested
+transition, the current implementation switches between using VMCB01 or
+VMCB02 as the source of truth for the LBRs while L2 is running. If L2
+enables LBR, VMCB02 is used as the source of truth. When L2 disables
+LBR, the LBRs are copied to VMCB01 and VMCB01 is used as the source of
+truth. This introduces significant complexity, and incorrect behavior in
+some cases.
+
+For example, on a nested #VMEXIT, the LBRs are only copied from VMCB02
+to VMCB01 if LBRV is enabled in VMCB01. This is because L2's writes to
+MSR_IA32_DEBUGCTLMSR to enable LBR are intercepted and propagated to
+VMCB01 instead of VMCB02. However, LBRV is only enabled in VMCB02 when
+L2 is running.
+
+This means that if L2 enables LBR and exits to L1, the LBRs will not be
+propagated from VMCB02 to VMCB01, because LBRV is disabled in VMCB01.
+
+There is no meaningful difference in CPUID rate in L2 when copying LBRs
+on every nested transition vs. the current approach, so do the simple
+and correct thing and always copy LBRs between VMCB01 and VMCB02 on
+nested transitions (when LBRV is disabled by L1). Drop the conditional
+LBRs copying in __svm_{enable/disable}_lbrv() as it is now unnecessary.
+
+VMCB02 becomes the only source of truth for LBRs when L2 is running,
+regardless of LBRV being enabled by L1, drop svm_get_lbr_vmcb() and use
+svm->vmcb directly in its place.
+
+Fixes: 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20251108004524.1600006-4-yosry.ahmed@linux.dev
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   20 +++++++-------------
+ arch/x86/kvm/svm/svm.c    |   46 ++++++++++------------------------------------
+ 2 files changed, 17 insertions(+), 49 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -669,11 +669,10 @@ static void nested_vmcb02_prepare_save(s
+                */
+               svm_copy_lbrs(vmcb02, vmcb12);
+               vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
+-              svm_update_lbrv(&svm->vcpu);
+-
+-      } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
++      } else {
+               svm_copy_lbrs(vmcb02, vmcb01);
+       }
++      svm_update_lbrv(&svm->vcpu);
+ }
+ static inline bool is_evtinj_soft(u32 evtinj)
+@@ -825,11 +824,7 @@ static void nested_vmcb02_prepare_contro
+                       svm->soft_int_next_rip = vmcb12_rip;
+       }
+-      vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
+-                                            LBR_CTL_ENABLE_MASK;
+-      if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV))
+-              vmcb02->control.virt_ext  |=
+-                      (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
++      /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */
+       if (!nested_vmcb_needs_vls_intercept(svm))
+               vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+@@ -1169,13 +1164,12 @@ int nested_svm_vmexit(struct vcpu_svm *s
+               kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+       if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+-                   (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
++                   (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)))
+               svm_copy_lbrs(vmcb12, vmcb02);
+-              svm_update_lbrv(vcpu);
+-      } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
++      else
+               svm_copy_lbrs(vmcb01, vmcb02);
+-              svm_update_lbrv(vcpu);
+-      }
++
++      svm_update_lbrv(vcpu);
+       if (vnmi) {
+               if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK)
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -854,13 +854,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb,
+ static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
+ {
+-      struct vcpu_svm *svm = to_svm(vcpu);
+-
+-      svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+-
+-      /* Move the LBR msrs to the vmcb02 so that the guest can see them. */
+-      if (is_guest_mode(vcpu))
+-              svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
++      to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+ }
+ void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+@@ -871,35 +865,15 @@ void svm_enable_lbrv(struct kvm_vcpu *vc
+ static void __svm_disable_lbrv(struct kvm_vcpu *vcpu)
+ {
+-      struct vcpu_svm *svm = to_svm(vcpu);
+-
+       KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
+-      svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
+-
+-      /*
+-       * Move the LBR msrs back to the vmcb01 to avoid copying them
+-       * on nested guest entries.
+-       */
+-      if (is_guest_mode(vcpu))
+-              svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
+-}
+-
+-static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
+-{
+-      /*
+-       * If LBR virtualization is disabled, the LBR MSRs are always kept in
+-       * vmcb01.  If LBR virtualization is enabled and L1 is running VMs of
+-       * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
+-       */
+-      return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
+-                                                                 svm->vmcb01.ptr;
++      to_svm(vcpu)->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
+ }
+ void svm_update_lbrv(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+       bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
+-      bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
++      bool enable_lbrv = (svm->vmcb->save.dbgctl & DEBUGCTLMSR_LBR) ||
+                           (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+                           (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
+@@ -2785,19 +2759,19 @@ static int svm_get_msr(struct kvm_vcpu *
+               msr_info->data = svm->tsc_aux;
+               break;
+       case MSR_IA32_DEBUGCTLMSR:
+-              msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
++              msr_info->data = svm->vmcb->save.dbgctl;
+               break;
+       case MSR_IA32_LASTBRANCHFROMIP:
+-              msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
++              msr_info->data = svm->vmcb->save.br_from;
+               break;
+       case MSR_IA32_LASTBRANCHTOIP:
+-              msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
++              msr_info->data = svm->vmcb->save.br_to;
+               break;
+       case MSR_IA32_LASTINTFROMIP:
+-              msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
++              msr_info->data = svm->vmcb->save.last_excp_from;
+               break;
+       case MSR_IA32_LASTINTTOIP:
+-              msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
++              msr_info->data = svm->vmcb->save.last_excp_to;
+               break;
+       case MSR_VM_HSAVE_PA:
+               msr_info->data = svm->nested.hsave_msr;
+@@ -3053,10 +3027,10 @@ static int svm_set_msr(struct kvm_vcpu *
+               if (data & DEBUGCTL_RESERVED_BITS)
+                       return 1;
+-              if (svm_get_lbr_vmcb(svm)->save.dbgctl == data)
++              if (svm->vmcb->save.dbgctl == data)
+                       break;
+-              svm_get_lbr_vmcb(svm)->save.dbgctl = data;
++              svm->vmcb->save.dbgctl = data;
+               vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
+               svm_update_lbrv(vcpu);
+               break;
diff --git a/queue-6.17/kvm-svm-mark-vmcb_lbr-dirty-when-msr_ia32_debugctlmsr-is-updated.patch b/queue-6.17/kvm-svm-mark-vmcb_lbr-dirty-when-msr_ia32_debugctlmsr-is-updated.patch
new file mode 100644 (file)
index 0000000..3fa9732
--- /dev/null
@@ -0,0 +1,47 @@
+From dc55b3c3f61246e483e50c85d8d5366f9567e188 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Sat, 8 Nov 2025 00:45:19 +0000
+Subject: KVM: SVM: Mark VMCB_LBR dirty when MSR_IA32_DEBUGCTLMSR is updated
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit dc55b3c3f61246e483e50c85d8d5366f9567e188 upstream.
+
+The APM lists the DbgCtlMsr field as being tracked by the VMCB_LBR clean
+bit.  Always clear the bit when MSR_IA32_DEBUGCTLMSR is updated.
+
+The history is complicated, it was correctly cleared for L1 before
+commit 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when
+L2 is running").  At that point svm_set_msr() started to rely on
+svm_update_lbrv() to clear the bit, but when nested virtualization
+is enabled the latter does not always clear it even if MSR_IA32_DEBUGCTLMSR
+changed. Go back to clearing it directly in svm_set_msr().
+
+Fixes: 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running")
+Reported-by: Matteo Rizzo <matteorizzo@google.com>
+Reported-by: evn@google.com
+Co-developed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20251108004524.1600006-2-yosry.ahmed@linux.dev
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3044,7 +3044,11 @@ static int svm_set_msr(struct kvm_vcpu *
+               if (data & DEBUGCTL_RESERVED_BITS)
+                       return 1;
++              if (svm_get_lbr_vmcb(svm)->save.dbgctl == data)
++                      break;
++
+               svm_get_lbr_vmcb(svm)->save.dbgctl = data;
++              vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
+               svm_update_lbrv(vcpu);
+               break;
+       case MSR_VM_HSAVE_PA:
diff --git a/queue-6.17/kvm-vmx-fix-check-for-valid-gva-on-an-ept-violation.patch b/queue-6.17/kvm-vmx-fix-check-for-valid-gva-on-an-ept-violation.patch
new file mode 100644 (file)
index 0000000..b128fe9
--- /dev/null
@@ -0,0 +1,35 @@
+From d0164c161923ac303bd843e04ebe95cfd03c6e19 Mon Sep 17 00:00:00 2001
+From: Sukrit Bhatnagar <Sukrit.Bhatnagar@sony.com>
+Date: Thu, 6 Nov 2025 14:28:51 +0900
+Subject: KVM: VMX: Fix check for valid GVA on an EPT violation
+
+From: Sukrit Bhatnagar <Sukrit.Bhatnagar@sony.com>
+
+commit d0164c161923ac303bd843e04ebe95cfd03c6e19 upstream.
+
+On an EPT violation, bit 7 of the exit qualification is set if the
+guest linear-address is valid. The derived page fault error code
+should not be checked for this bit.
+
+Fixes: f3009482512e ("KVM: VMX: Set PFERR_GUEST_{FINAL,PAGE}_MASK if and only if the GVA is valid")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sukrit Bhatnagar <Sukrit.Bhatnagar@sony.com>
+Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Link: https://patch.msgid.link/20251106052853.3071088-1-Sukrit.Bhatnagar@sony.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/common.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/common.h
++++ b/arch/x86/kvm/vmx/common.h
+@@ -98,7 +98,7 @@ static inline int __vmx_handle_ept_viola
+       error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK)
+                     ? PFERR_PRESENT_MASK : 0;
+-      if (error_code & EPT_VIOLATION_GVA_IS_VALID)
++      if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID)
+               error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ?
+                             PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
diff --git a/queue-6.17/loongarch-consolidate-early_ioremap-ioremap_prot.patch b/queue-6.17/loongarch-consolidate-early_ioremap-ioremap_prot.patch
new file mode 100644 (file)
index 0000000..d37c12f
--- /dev/null
@@ -0,0 +1,57 @@
+From 43a9e6a10bdde32445ad2725f568e08a94e51dc9 Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:00 +0800
+Subject: LoongArch: Consolidate early_ioremap()/ioremap_prot()
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit 43a9e6a10bdde32445ad2725f568e08a94e51dc9 upstream.
+
+1. Use phys_addr_t instead of u64, which can work for both 32/64 bits.
+2. Check whether the input physical address is above TO_PHYS_MASK (and
+   return NULL if yes) for the DMW version.
+
+Note: In theory early_ioremap() also need the TO_PHYS_MASK checking, but
+the UEFI BIOS pass some DMW virtual addresses.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/include/asm/io.h |    5 ++++-
+ arch/loongarch/mm/ioremap.c     |    2 +-
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/loongarch/include/asm/io.h
++++ b/arch/loongarch/include/asm/io.h
+@@ -14,7 +14,7 @@
+ #include <asm/pgtable-bits.h>
+ #include <asm/string.h>
+-extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
++extern void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size);
+ extern void __init early_iounmap(void __iomem *addr, unsigned long size);
+ #define early_memremap early_ioremap
+@@ -25,6 +25,9 @@ extern void __init early_iounmap(void __
+ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
+                                        pgprot_t prot)
+ {
++      if (offset > TO_PHYS_MASK)
++              return NULL;
++
+       switch (pgprot_val(prot) & _CACHE_MASK) {
+       case _CACHE_CC:
+               return (void __iomem *)(unsigned long)(CACHE_BASE + offset);
+--- a/arch/loongarch/mm/ioremap.c
++++ b/arch/loongarch/mm/ioremap.c
+@@ -6,7 +6,7 @@
+ #include <asm/io.h>
+ #include <asm-generic/early_ioremap.h>
+-void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size)
++void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size)
+ {
+       return ((void __iomem *)TO_CACHE(phys_addr));
+ }
diff --git a/queue-6.17/loongarch-kvm-add-delay-until-timer-interrupt-injected.patch b/queue-6.17/loongarch-kvm-add-delay-until-timer-interrupt-injected.patch
new file mode 100644 (file)
index 0000000..269b5a9
--- /dev/null
@@ -0,0 +1,46 @@
+From d3c9515e4f9d10ccb113adb4809db5cc31e7ef65 Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:09 +0800
+Subject: LoongArch: KVM: Add delay until timer interrupt injected
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit d3c9515e4f9d10ccb113adb4809db5cc31e7ef65 upstream.
+
+When timer is fired in oneshot mode, CSR.TVAL will stop with value -1
+rather than 0. However when the register CSR.TVAL is restored, it will
+continue to count down rather than stop there.
+
+Now the method is to write 0 to CSR.TVAL, wait to count down for 1 cycle
+at least, which is 10ns with a timer freq 100MHz, and then retore timer
+interrupt status. Here add 2 cycles delay to assure that timer interrupt
+is injected.
+
+With this patch, timer selftest case passes to run always.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/timer.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/loongarch/kvm/timer.c
++++ b/arch/loongarch/kvm/timer.c
+@@ -4,6 +4,7 @@
+  */
+ #include <linux/kvm_host.h>
++#include <asm/delay.h>
+ #include <asm/kvm_csr.h>
+ #include <asm/kvm_vcpu.h>
+@@ -95,6 +96,7 @@ void kvm_restore_timer(struct kvm_vcpu *
+                * and set CSR TVAL with -1
+                */
+               write_gcsr_timertick(0);
++              __delay(2); /* Wait cycles until timer interrupt injected */
+               /*
+                * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear
diff --git a/queue-6.17/loongarch-kvm-fix-max-supported-vcpus-set-with-eiointc.patch b/queue-6.17/loongarch-kvm-fix-max-supported-vcpus-set-with-eiointc.patch
new file mode 100644 (file)
index 0000000..45fc68a
--- /dev/null
@@ -0,0 +1,40 @@
+From 237e74bfa261fb0cf75bd08c9be0c5094018ee20 Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:09 +0800
+Subject: LoongArch: KVM: Fix max supported vCPUs set with EIOINTC
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit 237e74bfa261fb0cf75bd08c9be0c5094018ee20 upstream.
+
+VM fails to boot with 256 vCPUs, the detailed command is
+
+  qemu-system-loongarch64 -smp 256
+
+and there is an error reported as follows:
+
+  KVM_LOONGARCH_EXTIOI_INIT_NUM_CPU failed: Invalid argument
+
+There is typo issue in function kvm_eiointc_ctrl_access() when set
+max supported vCPUs.
+
+Cc: stable@vger.kernel.org
+Fixes: 47256c4c8b1b ("LoongArch: KVM: Avoid copy_*_user() with lock hold in kvm_eiointc_ctrl_access()")
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/intc/eiointc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/loongarch/kvm/intc/eiointc.c
++++ b/arch/loongarch/kvm/intc/eiointc.c
+@@ -439,7 +439,7 @@ static int kvm_eiointc_ctrl_access(struc
+       spin_lock_irqsave(&s->lock, flags);
+       switch (type) {
+       case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU:
+-              if (val >= EIOINTC_ROUTE_MAX_VCPUS)
++              if (val > EIOINTC_ROUTE_MAX_VCPUS)
+                       ret = -EINVAL;
+               else
+                       s->num_cpu = val;
diff --git a/queue-6.17/loongarch-kvm-restore-guest-pmu-if-it-is-enabled.patch b/queue-6.17/loongarch-kvm-restore-guest-pmu-if-it-is-enabled.patch
new file mode 100644 (file)
index 0000000..c0c2037
--- /dev/null
@@ -0,0 +1,49 @@
+From 5001bcf86edf2de02f025a0f789bcac37fa040e6 Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:09 +0800
+Subject: LoongArch: KVM: Restore guest PMU if it is enabled
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit 5001bcf86edf2de02f025a0f789bcac37fa040e6 upstream.
+
+On LoongArch system, guest PMU hardware is shared by guest and host but
+PMU interrupt is separated. PMU is pass-through to VM, and there is PMU
+context switch when exit to host and return to guest.
+
+There is optimiation to check whether PMU is enabled by guest. If not,
+it is not necessary to return to guest. However, if it is enabled, PMU
+context for guest need switch on. Now KVM_REQ_PMU notification is set
+on vCPU context switch, but it is missing if there is no vCPU context
+switch while PMU is used by guest VM, so fix it.
+
+Cc: <stable@vger.kernel.org>
+Fixes: f4e40ea9f78f ("LoongArch: KVM: Add PMU support for guest")
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/vcpu.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/loongarch/kvm/vcpu.c
++++ b/arch/loongarch/kvm/vcpu.c
+@@ -133,6 +133,9 @@ static void kvm_lose_pmu(struct kvm_vcpu
+        * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when
+        * exiting the guest, so that the next time trap into the guest.
+        * We don't need to deal with PMU CSRs contexts.
++       *
++       * Otherwise set the request bit KVM_REQ_PMU to restore guest PMU
++       * before entering guest VM
+        */
+       val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
+       val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
+@@ -140,6 +143,8 @@ static void kvm_lose_pmu(struct kvm_vcpu
+       val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
+       if (!(val & KVM_PMU_EVENT_ENABLED))
+               vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU;
++      else
++              kvm_make_request(KVM_REQ_PMU, vcpu);
+       kvm_restore_host_pmu(vcpu);
+ }
diff --git a/queue-6.17/loongarch-let-pte-pmd-_modify-record-the-status-of-_page_dirty.patch b/queue-6.17/loongarch-let-pte-pmd-_modify-record-the-status-of-_page_dirty.patch
new file mode 100644 (file)
index 0000000..e74f203
--- /dev/null
@@ -0,0 +1,56 @@
+From a073d637c8cfbfbab39b7272226a3fbf3b887580 Mon Sep 17 00:00:00 2001
+From: Tianyang Zhang <zhangtianyang@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:01 +0800
+Subject: LoongArch: Let {pte,pmd}_modify() record the status of _PAGE_DIRTY
+
+From: Tianyang Zhang <zhangtianyang@loongson.cn>
+
+commit a073d637c8cfbfbab39b7272226a3fbf3b887580 upstream.
+
+Now if the PTE/PMD is dirty with _PAGE_DIRTY but without _PAGE_MODIFIED,
+after {pte,pmd}_modify() we lose _PAGE_DIRTY, then {pte,pmd}_dirty()
+return false and lead to data loss. This can happen in certain scenarios
+such as HW PTW doesn't set _PAGE_MODIFIED automatically, so here we need
+_PAGE_MODIFIED to record the dirty status (_PAGE_DIRTY).
+
+The new modification involves checking whether the original PTE/PMD has
+the _PAGE_DIRTY flag. If it exists, the _PAGE_MODIFIED bit is also set,
+ensuring that the {pte,pmd}_dirty() interface can always return accurate
+information.
+
+Cc: stable@vger.kernel.org
+Co-developed-by: Liupu Wang <wangliupu@loongson.cn>
+Signed-off-by: Liupu Wang <wangliupu@loongson.cn>
+Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/include/asm/pgtable.h |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/loongarch/include/asm/pgtable.h
++++ b/arch/loongarch/include/asm/pgtable.h
+@@ -424,6 +424,9 @@ static inline unsigned long pte_accessib
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
++      if (pte_val(pte) & _PAGE_DIRTY)
++              pte_val(pte) |= _PAGE_MODIFIED;
++
+       return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
+                    (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
+ }
+@@ -547,9 +550,11 @@ static inline struct page *pmd_page(pmd_
+ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+ {
+-      pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) |
+-                              (pgprot_val(newprot) & ~_HPAGE_CHG_MASK);
+-      return pmd;
++      if (pmd_val(pmd) & _PAGE_DIRTY)
++              pmd_val(pmd) |= _PAGE_MODIFIED;
++
++      return __pmd((pmd_val(pmd) & _HPAGE_CHG_MASK) |
++                   (pgprot_val(newprot) & ~_HPAGE_CHG_MASK));
+ }
+ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
diff --git a/queue-6.17/loongarch-use-correct-accessor-to-read-fwpc-mwpc.patch b/queue-6.17/loongarch-use-correct-accessor-to-read-fwpc-mwpc.patch
new file mode 100644 (file)
index 0000000..c5e3504
--- /dev/null
@@ -0,0 +1,38 @@
+From eeeeaafa62ea0cd4b86390f657dc0aea73bff4f5 Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:01 +0800
+Subject: LoongArch: Use correct accessor to read FWPC/MWPC
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit eeeeaafa62ea0cd4b86390f657dc0aea73bff4f5 upstream.
+
+CSR.FWPC and CSR.MWPC are 32bit registers, so use csr_read32() rather
+than csr_read64() to read the values of FWPC/MWPC.
+
+Cc: stable@vger.kernel.org
+Fixes: edffa33c7bb5a73 ("LoongArch: Add hardware breakpoints/watchpoints support")
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/include/asm/hw_breakpoint.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/loongarch/include/asm/hw_breakpoint.h
++++ b/arch/loongarch/include/asm/hw_breakpoint.h
+@@ -134,13 +134,13 @@ static inline void hw_breakpoint_thread_
+ /* Determine number of BRP registers available. */
+ static inline int get_num_brps(void)
+ {
+-      return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM;
++      return csr_read32(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM;
+ }
+ /* Determine number of WRP registers available. */
+ static inline int get_num_wrps(void)
+ {
+-      return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM;
++      return csr_read32(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM;
+ }
+ #endif        /* __KERNEL__ */
diff --git a/queue-6.17/maple_tree-fix-tracepoint-string-pointers.patch b/queue-6.17/maple_tree-fix-tracepoint-string-pointers.patch
new file mode 100644 (file)
index 0000000..9c1da76
--- /dev/null
@@ -0,0 +1,174 @@
+From 91a54090026f84ceffaa12ac53c99b9f162946f6 Mon Sep 17 00:00:00 2001
+From: Martin Kaiser <martin@kaiser.cx>
+Date: Thu, 30 Oct 2025 16:55:05 +0100
+Subject: maple_tree: fix tracepoint string pointers
+
+From: Martin Kaiser <martin@kaiser.cx>
+
+commit 91a54090026f84ceffaa12ac53c99b9f162946f6 upstream.
+
+maple_tree tracepoints contain pointers to function names. Such a pointer
+is saved when a tracepoint logs an event. There's no guarantee that it's
+still valid when the event is parsed later and the pointer is dereferenced.
+
+The kernel warns about these unsafe pointers.
+
+       event 'ma_read' has unsafe pointer field 'fn'
+       WARNING: kernel/trace/trace.c:3779 at ignore_event+0x1da/0x1e4
+
+Mark the function names as tracepoint_string() to fix the events.
+
+One case that doesn't work without my patch would be trace-cmd record
+to save the binary ringbuffer and trace-cmd report to parse it in
+userspace.  The address of __func__ can't be dereferenced from
+userspace but tracepoint_string will add an entry to
+/sys/kernel/tracing/printk_formats
+
+Link: https://lkml.kernel.org/r/20251030155537.87972-1-martin@kaiser.cx
+Fixes: 54a611b60590 ("Maple Tree: add new data structure")
+Signed-off-by: Martin Kaiser <martin@kaiser.cx>
+Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/maple_tree.c |   30 ++++++++++++++++--------------
+ 1 file changed, 16 insertions(+), 14 deletions(-)
+
+--- a/lib/maple_tree.c
++++ b/lib/maple_tree.c
+@@ -64,6 +64,8 @@
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/maple_tree.h>
++#define TP_FCT tracepoint_string(__func__)
++
+ /*
+  * Kernel pointer hashing renders much of the maple tree dump useless as tagged
+  * pointers get hashed to arbitrary values.
+@@ -2976,7 +2978,7 @@ static inline void mas_rebalance(struct
+       MA_STATE(l_mas, mas->tree, mas->index, mas->last);
+       MA_STATE(r_mas, mas->tree, mas->index, mas->last);
+-      trace_ma_op(__func__, mas);
++      trace_ma_op(TP_FCT, mas);
+       /*
+        * Rebalancing occurs if a node is insufficient.  Data is rebalanced
+@@ -3337,7 +3339,7 @@ static void mas_split(struct ma_state *m
+       MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
+       MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
+-      trace_ma_op(__func__, mas);
++      trace_ma_op(TP_FCT, mas);
+       mast.l = &l_mas;
+       mast.r = &r_mas;
+@@ -3512,7 +3514,7 @@ static bool mas_is_span_wr(struct ma_wr_
+                       return false;
+       }
+-      trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry);
++      trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry);
+       return true;
+ }
+@@ -3756,7 +3758,7 @@ static noinline void mas_wr_spanning_sto
+        * of data may happen.
+        */
+       mas = wr_mas->mas;
+-      trace_ma_op(__func__, mas);
++      trace_ma_op(TP_FCT, mas);
+       if (unlikely(!mas->index && mas->last == ULONG_MAX))
+               return mas_new_root(mas, wr_mas->entry);
+@@ -3894,7 +3896,7 @@ done:
+       } else {
+               memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
+       }
+-      trace_ma_write(__func__, mas, 0, wr_mas->entry);
++      trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
+       mas_update_gap(mas);
+       mas->end = new_end;
+       return;
+@@ -3938,7 +3940,7 @@ static inline void mas_wr_slot_store(str
+               mas->offset++; /* Keep mas accurate. */
+       }
+-      trace_ma_write(__func__, mas, 0, wr_mas->entry);
++      trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
+       /*
+        * Only update gap when the new entry is empty or there is an empty
+        * entry in the original two ranges.
+@@ -4059,7 +4061,7 @@ static inline void mas_wr_append(struct
+               mas_update_gap(mas);
+       mas->end = new_end;
+-      trace_ma_write(__func__, mas, new_end, wr_mas->entry);
++      trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry);
+       return;
+ }
+@@ -4073,7 +4075,7 @@ static void mas_wr_bnode(struct ma_wr_st
+ {
+       struct maple_big_node b_node;
+-      trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
++      trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry);
+       memset(&b_node, 0, sizeof(struct maple_big_node));
+       mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
+       mas_commit_b_node(wr_mas, &b_node);
+@@ -5405,7 +5407,7 @@ void *mas_store(struct ma_state *mas, vo
+       int request;
+       MA_WR_STATE(wr_mas, mas, entry);
+-      trace_ma_write(__func__, mas, 0, entry);
++      trace_ma_write(TP_FCT, mas, 0, entry);
+ #ifdef CONFIG_DEBUG_MAPLE_TREE
+       if (MAS_WARN_ON(mas, mas->index > mas->last))
+               pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last,
+@@ -5506,7 +5508,7 @@ void mas_store_prealloc(struct ma_state
+       }
+ store:
+-      trace_ma_write(__func__, mas, 0, entry);
++      trace_ma_write(TP_FCT, mas, 0, entry);
+       mas_wr_store_entry(&wr_mas);
+       MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
+       mas_destroy(mas);
+@@ -6319,7 +6321,7 @@ void *mtree_load(struct maple_tree *mt,
+       MA_STATE(mas, mt, index, index);
+       void *entry;
+-      trace_ma_read(__func__, &mas);
++      trace_ma_read(TP_FCT, &mas);
+       rcu_read_lock();
+ retry:
+       entry = mas_start(&mas);
+@@ -6362,7 +6364,7 @@ int mtree_store_range(struct maple_tree
+       MA_STATE(mas, mt, index, last);
+       int ret = 0;
+-      trace_ma_write(__func__, &mas, 0, entry);
++      trace_ma_write(TP_FCT, &mas, 0, entry);
+       if (WARN_ON_ONCE(xa_is_advanced(entry)))
+               return -EINVAL;
+@@ -6585,7 +6587,7 @@ void *mtree_erase(struct maple_tree *mt,
+       void *entry = NULL;
+       MA_STATE(mas, mt, index, index);
+-      trace_ma_op(__func__, &mas);
++      trace_ma_op(TP_FCT, &mas);
+       mtree_lock(mt);
+       entry = mas_erase(&mas);
+@@ -6923,7 +6925,7 @@ void *mt_find(struct maple_tree *mt, uns
+       unsigned long copy = *index;
+ #endif
+-      trace_ma_read(__func__, &mas);
++      trace_ma_read(TP_FCT, &mas);
+       if ((*index) > max)
+               return NULL;
diff --git a/queue-6.17/nfsd-add-missing-fattr4_word2_clone_blksize-from-supported-attributes.patch b/queue-6.17/nfsd-add-missing-fattr4_word2_clone_blksize-from-supported-attributes.patch
new file mode 100644 (file)
index 0000000..08f12de
--- /dev/null
@@ -0,0 +1,32 @@
+From 4d3dbc2386fe051e44efad663e0ec828b98ab53f Mon Sep 17 00:00:00 2001
+From: Olga Kornievskaia <okorniev@redhat.com>
+Date: Thu, 9 Oct 2025 16:37:59 -0400
+Subject: nfsd: add missing FATTR4_WORD2_CLONE_BLKSIZE from supported attributes
+
+From: Olga Kornievskaia <okorniev@redhat.com>
+
+commit 4d3dbc2386fe051e44efad663e0ec828b98ab53f upstream.
+
+RFC 7862 Section 4.1.2 says that if the server supports CLONE it MUST
+support clone_blksize attribute.
+
+Fixes: d6ca7d2643ee ("NFSD: Implement FATTR4_CLONE_BLKSIZE attribute")
+Cc: stable@vger.kernel.org
+Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfsd.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -455,6 +455,7 @@ enum {
+ #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
+       (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
+       FATTR4_WORD2_MODE_UMASK | \
++      FATTR4_WORD2_CLONE_BLKSIZE | \
+       NFSD4_2_SECURITY_ATTRS | \
+       FATTR4_WORD2_XATTR_SUPPORT | \
+       FATTR4_WORD2_TIME_DELEG_ACCESS | \
diff --git a/queue-6.17/nfsd-fix-refcount-leak-in-nfsd_set_fh_dentry.patch b/queue-6.17/nfsd-fix-refcount-leak-in-nfsd_set_fh_dentry.patch
new file mode 100644 (file)
index 0000000..8d29c09
--- /dev/null
@@ -0,0 +1,60 @@
+From 8a7348a9ed70bda1c1f51d3f1815bcbdf9f3b38c Mon Sep 17 00:00:00 2001
+From: NeilBrown <neil@brown.name>
+Date: Wed, 8 Oct 2025 09:52:25 -0400
+Subject: nfsd: fix refcount leak in nfsd_set_fh_dentry()
+
+From: NeilBrown <neil@brown.name>
+
+commit 8a7348a9ed70bda1c1f51d3f1815bcbdf9f3b38c upstream.
+
+nfsd exports a "pseudo root filesystem" which is used by NFSv4 to find
+the various exported filesystems using LOOKUP requests from a known root
+filehandle.  NFSv3 uses the MOUNT protocol to find those exported
+filesystems and so is not given access to the pseudo root filesystem.
+
+If a v3 (or v2) client uses a filehandle from that filesystem,
+nfsd_set_fh_dentry() will report an error, but still stores the export
+in "struct svc_fh" even though it also drops the reference (exp_put()).
+This means that when fh_put() is called an extra reference will be dropped
+which can lead to use-after-free and possible denial of service.
+
+Normal NFS usage will not provide a pseudo-root filehandle to a v3
+client.  This bug can only be triggered by the client synthesising an
+incorrect filehandle.
+
+To fix this we move the assignments to the svc_fh later, after all
+possible error cases have been detected.
+
+Reported-and-tested-by: tianshuo han <hantianshuo233@gmail.com>
+Fixes: ef7f6c4904d0 ("nfsd: move V4ROOT version check to nfsd_set_fh_dentry()")
+Signed-off-by: NeilBrown <neil@brown.name>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfsfh.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -269,9 +269,6 @@ static __be32 nfsd_set_fh_dentry(struct
+                               dentry);
+       }
+-      fhp->fh_dentry = dentry;
+-      fhp->fh_export = exp;
+-
+       switch (fhp->fh_maxsize) {
+       case NFS4_FHSIZE:
+               if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR)
+@@ -293,6 +290,9 @@ static __be32 nfsd_set_fh_dentry(struct
+                       goto out;
+       }
++      fhp->fh_dentry = dentry;
++      fhp->fh_export = exp;
++
+       return 0;
+ out:
+       exp_put(exp);
diff --git a/queue-6.17/nfsd-free-copynotify-stateid-in-nfs4_free_ol_stateid.patch b/queue-6.17/nfsd-free-copynotify-stateid-in-nfs4_free_ol_stateid.patch
new file mode 100644 (file)
index 0000000..5b51340
--- /dev/null
@@ -0,0 +1,85 @@
+From 4aa17144d5abc3c756883e3a010246f0dba8b468 Mon Sep 17 00:00:00 2001
+From: Olga Kornievskaia <okorniev@redhat.com>
+Date: Tue, 14 Oct 2025 13:59:59 -0400
+Subject: NFSD: free copynotify stateid in nfs4_free_ol_stateid()
+
+From: Olga Kornievskaia <okorniev@redhat.com>
+
+commit 4aa17144d5abc3c756883e3a010246f0dba8b468 upstream.
+
+Typically copynotify stateid is freed either when parent's stateid
+is being close/freed or in nfsd4_laundromat if the stateid hasn't
+been used in a lease period.
+
+However, in case when the server got an OPEN (which created
+a parent stateid), followed by a COPY_NOTIFY using that stateid,
+followed by a client reboot. New client instance while doing
+CREATE_SESSION would force expire previous state of this client.
+It leads to the open state being freed thru release_openowner->
+nfs4_free_ol_stateid() and it finds that it still has copynotify
+stateid associated with it. We currently print a warning and is
+triggerred
+
+WARNING: CPU: 1 PID: 8858 at fs/nfsd/nfs4state.c:1550 nfs4_free_ol_stateid+0xb0/0x100 [nfsd]
+
+This patch, instead, frees the associated copynotify stateid here.
+
+If the parent stateid is freed (without freeing the copynotify
+stateids associated with it), it leads to the list corruption
+when laundromat ends up freeing the copynotify state later.
+
+[ 1626.839430] Internal error: Oops - BUG: 00000000f2000800 [#1]  SMP
+[ 1626.842828] Modules linked in: nfnetlink_queue nfnetlink_log bluetooth cfg80211 rpcrdma rdma_cm iw_cm ib_cm ib_core nfsd nfs_acl lockd grace nfs_localio ext4 crc16 mbcache jbd2 overlay uinput snd_seq_dummy snd_hrtimer qrtr rfkill vfat fat uvcvideo snd_hda_codec_generic videobuf2_vmalloc videobuf2_memops snd_hda_intel uvc snd_intel_dspcfg videobuf2_v4l2 videobuf2_common snd_hda_codec snd_hda_core videodev snd_hwdep snd_seq mc snd_seq_device snd_pcm snd_timer snd soundcore sg loop auth_rpcgss vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vmw_vmci vsock xfs 8021q garp stp llc mrp nvme ghash_ce e1000e nvme_core sr_mod nvme_keyring nvme_auth cdrom vmwgfx drm_ttm_helper ttm sunrpc dm_mirror dm_region_hash dm_log iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi fuse dm_multipath dm_mod nfnetlink
+[ 1626.855594] CPU: 2 UID: 0 PID: 199 Comm: kworker/u24:33 Kdump: loaded Tainted: G    B   W           6.17.0-rc7+ #22 PREEMPT(voluntary)
+[ 1626.857075] Tainted: [B]=BAD_PAGE, [W]=WARN
+[ 1626.857573] Hardware name: VMware, Inc. VMware20,1/VBSA, BIOS VMW201.00V.24006586.BA64.2406042154 06/04/2024
+[ 1626.858724] Workqueue: nfsd4 laundromat_main [nfsd]
+[ 1626.859304] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
+[ 1626.860010] pc : __list_del_entry_valid_or_report+0x148/0x200
+[ 1626.860601] lr : __list_del_entry_valid_or_report+0x148/0x200
+[ 1626.861182] sp : ffff8000881d7a40
+[ 1626.861521] x29: ffff8000881d7a40 x28: 0000000000000018 x27: ffff0000c2a98200
+[ 1626.862260] x26: 0000000000000600 x25: 0000000000000000 x24: ffff8000881d7b20
+[ 1626.862986] x23: ffff0000c2a981e8 x22: 1fffe00012410e7d x21: ffff0000920873e8
+[ 1626.863701] x20: ffff0000920873e8 x19: ffff000086f22998 x18: 0000000000000000
+[ 1626.864421] x17: 20747562202c3839 x16: 3932326636383030 x15: 3030666666662065
+[ 1626.865092] x14: 6220646c756f6873 x13: 0000000000000001 x12: ffff60004fd9e4a3
+[ 1626.865713] x11: 1fffe0004fd9e4a2 x10: ffff60004fd9e4a2 x9 : dfff800000000000
+[ 1626.866320] x8 : 00009fffb0261b5e x7 : ffff00027ecf2513 x6 : 0000000000000001
+[ 1626.866938] x5 : ffff00027ecf2510 x4 : ffff60004fd9e4a3 x3 : 0000000000000000
+[ 1626.867553] x2 : 0000000000000000 x1 : ffff000096069640 x0 : 000000000000006d
+[ 1626.868167] Call trace:
+[ 1626.868382]  __list_del_entry_valid_or_report+0x148/0x200 (P)
+[ 1626.868876]  _free_cpntf_state_locked+0xd0/0x268 [nfsd]
+[ 1626.869368]  nfs4_laundromat+0x6f8/0x1058 [nfsd]
+[ 1626.869813]  laundromat_main+0x24/0x60 [nfsd]
+[ 1626.870231]  process_one_work+0x584/0x1050
+[ 1626.870595]  worker_thread+0x4c4/0xc60
+[ 1626.870893]  kthread+0x2f8/0x398
+[ 1626.871146]  ret_from_fork+0x10/0x20
+[ 1626.871422] Code: aa1303e1 aa1403e3 910e8000 97bc55d7 (d4210000)
+[ 1626.871892] SMP: stopping secondary CPUs
+
+Reported-by: rtm@csail.mit.edu
+Closes: https://lore.kernel.org/linux-nfs/d8f064c1-a26f-4eed-b4f0-1f7f608f415f@oracle.com/T/#t
+Fixes: 624322f1adc5 ("NFSD add COPY_NOTIFY operation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4state.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -1505,7 +1505,8 @@ static void nfs4_free_ol_stateid(struct
+       release_all_access(stp);
+       if (stp->st_stateowner)
+               nfs4_put_stateowner(stp->st_stateowner);
+-      WARN_ON(!list_empty(&stid->sc_cp_list));
++      if (!list_empty(&stid->sc_cp_list))
++              nfs4_free_cpntf_statelist(stid->sc_client->net, stid);
+       kmem_cache_free(stateid_slab, stid);
+ }
index 9a49d0e73063c698bc80806f9e77faa625ad1e46..49ea7ee5ede6713bb8132b3ed96e8f20fcc37f43 100644 (file)
@@ -146,3 +146,28 @@ arm64-dts-rockchip-drop-reset-from-rk3576-i2c9-node.patch
 pwm-adp5585-correct-mismatched-pwm-chip-info.patch
 hid-playstation-fix-memory-leak-in-dualshock4_get_ca.patch
 hid-uclogic-fix-potential-memory-leak-in-error-path.patch
+loongarch-kvm-restore-guest-pmu-if-it-is-enabled.patch
+loongarch-kvm-add-delay-until-timer-interrupt-injected.patch
+loongarch-kvm-fix-max-supported-vcpus-set-with-eiointc.patch
+kvm-guest_memfd-remove-bindings-on-memslot-deletion-when-gmem-is-dying.patch
+kvm-arm64-make-all-32bit-id-registers-fully-writable.patch
+kvm-svm-mark-vmcb_lbr-dirty-when-msr_ia32_debugctlmsr-is-updated.patch
+kvm-nsvm-always-recalculate-lbr-msr-intercepts-in-svm_update_lbrv.patch
+kvm-nsvm-fix-and-simplify-lbr-virtualization-handling-with-nested.patch
+kvm-vmx-fix-check-for-valid-gva-on-an-ept-violation.patch
+nfsd-fix-refcount-leak-in-nfsd_set_fh_dentry.patch
+nfsd-add-missing-fattr4_word2_clone_blksize-from-supported-attributes.patch
+nfsd-free-copynotify-stateid-in-nfs4_free_ol_stateid.patch
+gcov-add-support-for-gcc-15.patch
+ksmbd-close-accepted-socket-when-per-ip-limit-rejects-connection.patch
+ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
+kho-warn-and-fail-on-metadata-or-preserved-memory-in-scratch-area.patch
+kho-increase-metadata-bitmap-size-to-page_size.patch
+kho-allocate-metadata-directly-from-the-buddy-allocator.patch
+kho-warn-and-exit-when-unpreserved-page-wasn-t-preserved.patch
+strparser-fix-signed-unsigned-mismatch-bug.patch
+dma-mapping-benchmark-restore-padding-to-ensure-uabi-remained-consistent.patch
+maple_tree-fix-tracepoint-string-pointers.patch
+loongarch-consolidate-early_ioremap-ioremap_prot.patch
+loongarch-use-correct-accessor-to-read-fwpc-mwpc.patch
+loongarch-let-pte-pmd-_modify-record-the-status-of-_page_dirty.patch
diff --git a/queue-6.17/strparser-fix-signed-unsigned-mismatch-bug.patch b/queue-6.17/strparser-fix-signed-unsigned-mismatch-bug.patch
new file mode 100644 (file)
index 0000000..036a265
--- /dev/null
@@ -0,0 +1,47 @@
+From 4da4e4bde1c453ac5cc2dce5def81d504ae257ee Mon Sep 17 00:00:00 2001
+From: Nate Karstens <nate.karstens@garmin.com>
+Date: Thu, 6 Nov 2025 16:28:33 -0600
+Subject: strparser: Fix signed/unsigned mismatch bug
+
+From: Nate Karstens <nate.karstens@garmin.com>
+
+commit 4da4e4bde1c453ac5cc2dce5def81d504ae257ee upstream.
+
+The `len` member of the sk_buff is an unsigned int. This is cast to
+`ssize_t` (a signed type) for the first sk_buff in the comparison,
+but not the second sk_buff. On 32-bit systems, this can result in
+an integer underflow for certain values because unsigned arithmetic
+is being used.
+
+This appears to be an oversight: if the intention was to use unsigned
+arithmetic, then the first cast would have been omitted. The change
+ensures both len values are cast to `ssize_t`.
+
+The underflow causes an issue with ktls when multiple TLS PDUs are
+included in a single TCP segment. The mainline kernel does not use
+strparser for ktls anymore, but this is still useful for other
+features that still use strparser, and for backporting.
+
+Signed-off-by: Nate Karstens <nate.karstens@garmin.com>
+Cc: stable@vger.kernel.org
+Fixes: 43a0c6751a32 ("strparser: Stream parser for messages")
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
+Link: https://patch.msgid.link/20251106222835.1871628-1-nate.karstens@garmin.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/strparser/strparser.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/strparser/strparser.c
++++ b/net/strparser/strparser.c
+@@ -238,7 +238,7 @@ static int __strp_recv(read_descriptor_t
+                               strp_parser_err(strp, -EMSGSIZE, desc);
+                               break;
+                       } else if (len <= (ssize_t)head->len -
+-                                        skb->len - stm->strp.offset) {
++                                        (ssize_t)skb->len - stm->strp.offset) {
+                               /* Length must be into new skb (and also
+                                * greater than zero)
+                                */