--- /dev/null
+From 23ee8a2563a0f24cf4964685ced23c32be444ab8 Mon Sep 17 00:00:00 2001
+From: Qinxin Xia <xiaqinxin@huawei.com>
+Date: Tue, 28 Oct 2025 20:08:59 +0800
+Subject: dma-mapping: benchmark: Restore padding to ensure uABI remained consistent
+
+From: Qinxin Xia <xiaqinxin@huawei.com>
+
+commit 23ee8a2563a0f24cf4964685ced23c32be444ab8 upstream.
+
+The padding field in the structure was previously reserved to
+maintain a stable interface for potential new fields, ensuring
+compatibility with user-space shared data structures.
+However,it was accidentally removed by tiantao in a prior commit,
+which may lead to incompatibility between user space and the kernel.
+
+This patch reinstates the padding to restore the original structure
+layout and preserve compatibility.
+
+Fixes: 8ddde07a3d28 ("dma-mapping: benchmark: extract a common header file for map_benchmark definition")
+Cc: stable@vger.kernel.org
+Acked-by: Barry Song <baohua@kernel.org>
+Signed-off-by: Qinxin Xia <xiaqinxin@huawei.com>
+Reported-by: Barry Song <baohua@kernel.org>
+Closes: https://lore.kernel.org/lkml/CAGsJ_4waiZ2+NBJG+SCnbNk+nQ_ZF13_Q5FHJqZyxyJTcEop2A@mail.gmail.com/
+Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
+Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Link: https://lore.kernel.org/r/20251028120900.2265511-2-xiaqinxin@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/map_benchmark.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/linux/map_benchmark.h
++++ b/include/linux/map_benchmark.h
+@@ -27,5 +27,6 @@ struct map_benchmark {
+ __u32 dma_dir; /* DMA data direction */
+ __u32 dma_trans_ns; /* time for DMA transmission in ns */
+ __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */
++ __u8 expansion[76]; /* For future use */
+ };
+ #endif /* _KERNEL_DMA_BENCHMARK_H */
--- /dev/null
+From ec4d11fc4b2dd4a2fa8c9d801ee9753b74623554 Mon Sep 17 00:00:00 2001
+From: Peter Oberparleiter <oberpar@linux.ibm.com>
+Date: Tue, 28 Oct 2025 12:51:25 +0100
+Subject: gcov: add support for GCC 15
+
+From: Peter Oberparleiter <oberpar@linux.ibm.com>
+
+commit ec4d11fc4b2dd4a2fa8c9d801ee9753b74623554 upstream.
+
+Using gcov on kernels compiled with GCC 15 results in truncated 16-byte
+long .gcda files with no usable data. To fix this, update GCOV_COUNTERS
+to match the value defined by GCC 15.
+
+Tested with GCC 14.3.0 and GCC 15.2.0.
+
+Link: https://lkml.kernel.org/r/20251028115125.1319410-1-oberpar@linux.ibm.com
+Signed-off-by: Peter Oberparleiter <oberpar@linux.ibm.com>
+Reported-by: Matthieu Baerts <matttbe@kernel.org>
+Closes: https://github.com/linux-test-project/lcov/issues/445
+Tested-by: Matthieu Baerts <matttbe@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/gcov/gcc_4_7.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/gcov/gcc_4_7.c
++++ b/kernel/gcov/gcc_4_7.c
+@@ -18,7 +18,9 @@
+ #include <linux/mm.h>
+ #include "gcov.h"
+
+-#if (__GNUC__ >= 14)
++#if (__GNUC__ >= 15)
++#define GCOV_COUNTERS 10
++#elif (__GNUC__ >= 14)
+ #define GCOV_COUNTERS 9
+ #elif (__GNUC__ >= 10)
+ #define GCOV_COUNTERS 8
--- /dev/null
+From fa759cd75bce5489eed34596daa53f721849a86f Mon Sep 17 00:00:00 2001
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+Date: Mon, 20 Oct 2025 20:08:52 -0400
+Subject: kho: allocate metadata directly from the buddy allocator
+
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+
+commit fa759cd75bce5489eed34596daa53f721849a86f upstream.
+
+KHO allocates metadata for its preserved memory map using the slab
+allocator via kzalloc(). This metadata is temporary and is used by the
+next kernel during early boot to find preserved memory.
+
+A problem arises when KFENCE is enabled. kzalloc() calls can be randomly
+intercepted by kfence_alloc(), which services the allocation from a
+dedicated KFENCE memory pool. This pool is allocated early in boot via
+memblock.
+
+When booting via KHO, the memblock allocator is restricted to a "scratch
+area", forcing the KFENCE pool to be allocated within it. This creates a
+conflict, as the scratch area is expected to be ephemeral and
+overwriteable by a subsequent kexec. If KHO metadata is placed in this
+KFENCE pool, it leads to memory corruption when the next kernel is loaded.
+
+To fix this, modify KHO to allocate its metadata directly from the buddy
+allocator instead of slab.
+
+Link: https://lkml.kernel.org/r/20251021000852.2924827-4-pasha.tatashin@soleen.com
+Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
+Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
+Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Reviewed-by: David Matlack <dmatlack@google.com>
+Cc: Alexander Graf <graf@amazon.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Cc: Miguel Ojeda <ojeda@kernel.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Samiullah Khawaja <skhawaja@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/gfp.h | 3 +++
+ kernel/kexec_handover.c | 6 +++---
+ 2 files changed, 6 insertions(+), 3 deletions(-)
+
+--- a/include/linux/gfp.h
++++ b/include/linux/gfp.h
+@@ -7,6 +7,7 @@
+ #include <linux/mmzone.h>
+ #include <linux/topology.h>
+ #include <linux/alloc_tag.h>
++#include <linux/cleanup.h>
+ #include <linux/sched.h>
+
+ struct vm_area_struct;
+@@ -463,4 +464,6 @@ static inline struct folio *folio_alloc_
+ /* This should be paired with folio_put() rather than free_contig_range(). */
+ #define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
+
++DEFINE_FREE(free_page, void *, free_page((unsigned long)_T))
++
+ #endif /* __LINUX_GFP_H */
+--- a/kernel/kexec_handover.c
++++ b/kernel/kexec_handover.c
+@@ -102,7 +102,7 @@ static void *xa_load_or_alloc(struct xar
+ if (res)
+ return res;
+
+- void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL);
++ void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
+
+ if (!elm)
+ return ERR_PTR(-ENOMEM);
+@@ -266,9 +266,9 @@ static_assert(sizeof(struct khoser_mem_c
+ static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
+ unsigned long order)
+ {
+- struct khoser_mem_chunk *chunk __free(kfree) = NULL;
++ struct khoser_mem_chunk *chunk __free(free_page) = NULL;
+
+- chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
++ chunk = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!chunk)
+ return ERR_PTR(-ENOMEM);
+
--- /dev/null
+From a2fff99f92dae9c0eaf0d75de3def70ec68dad92 Mon Sep 17 00:00:00 2001
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+Date: Mon, 20 Oct 2025 20:08:51 -0400
+Subject: kho: increase metadata bitmap size to PAGE_SIZE
+
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+
+commit a2fff99f92dae9c0eaf0d75de3def70ec68dad92 upstream.
+
+KHO memory preservation metadata is preserved in 512 byte chunks which
+requires their allocation from slab allocator. Slabs are not safe to be
+used with KHO because of kfence, and because partial slabs may lead leaks
+to the next kernel. Change the size to be PAGE_SIZE.
+
+The kfence specifically may cause memory corruption, where it randomly
+provides slab objects that can be within the scratch area. The reason for
+that is that kfence allocates its objects prior to KHO scratch is marked
+as CMA region.
+
+While this change could potentially increase metadata overhead on systems
+with sparsely preserved memory, this is being mitigated by ongoing work to
+reduce sparseness during preservation via 1G guest pages. Furthermore,
+this change aligns with future work on a stateless KHO, which will also
+use page-sized bitmaps for its radix tree metadata.
+
+Link: https://lkml.kernel.org/r/20251021000852.2924827-3-pasha.tatashin@soleen.com
+Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
+Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
+Cc: Alexander Graf <graf@amazon.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: David Matlack <dmatlack@google.com>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Cc: Miguel Ojeda <ojeda@kernel.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Samiullah Khawaja <skhawaja@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kexec_handover.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/kernel/kexec_handover.c
++++ b/kernel/kexec_handover.c
+@@ -52,10 +52,10 @@ early_param("kho", kho_parse_enable);
+ * Keep track of memory that is to be preserved across KHO.
+ *
+ * The serializing side uses two levels of xarrays to manage chunks of per-order
+- * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a
+- * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations
+- * each bitmap will cover 16M of address space. Thus, for 16G of memory at most
+- * 512K of bitmap memory will be needed for order 0.
++ * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order
++ * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0
++ * allocations each bitmap will cover 128M of address space. Thus, for 16G of
++ * memory at most 512K of bitmap memory will be needed for order 0.
+ *
+ * This approach is fully incremental, as the serialization progresses folios
+ * can continue be aggregated to the tracker. The final step, immediately prior
+@@ -63,12 +63,14 @@ early_param("kho", kho_parse_enable);
+ * successor kernel to parse.
+ */
+
+-#define PRESERVE_BITS (512 * 8)
++#define PRESERVE_BITS (PAGE_SIZE * 8)
+
+ struct kho_mem_phys_bits {
+ DECLARE_BITMAP(preserve, PRESERVE_BITS);
+ };
+
++static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE);
++
+ struct kho_mem_phys {
+ /*
+ * Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized
+@@ -93,19 +95,19 @@ struct kho_serialization {
+ struct khoser_mem_chunk *preserved_mem_map;
+ };
+
+-static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
++static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
+ {
+ void *res = xa_load(xa, index);
+
+ if (res)
+ return res;
+
+- void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL);
++ void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+ if (!elm)
+ return ERR_PTR(-ENOMEM);
+
+- if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz)))
++ if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE)))
+ return ERR_PTR(-EINVAL);
+
+ res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
+@@ -175,8 +177,7 @@ static int __kho_preserve_order(struct k
+ }
+ }
+
+- bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS,
+- sizeof(*bits));
++ bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
+ if (IS_ERR(bits))
+ return PTR_ERR(bits);
+
--- /dev/null
+From b05addf6f0596edb1f82ab4059438c7ef2d2686d Mon Sep 17 00:00:00 2001
+From: Pratyush Yadav <pratyush@kernel.org>
+Date: Mon, 3 Nov 2025 19:02:32 +0100
+Subject: kho: warn and exit when unpreserved page wasn't preserved
+
+From: Pratyush Yadav <pratyush@kernel.org>
+
+commit b05addf6f0596edb1f82ab4059438c7ef2d2686d upstream.
+
+Calling __kho_unpreserve() on a pair of (pfn, end_pfn) that wasn't
+preserved is a bug. Currently, if that is done, the physxa or bits can be
+NULL. This results in a soft lockup since a NULL physxa or bits results
+in redoing the loop without ever making any progress.
+
+Return when physxa or bits are not found, but WARN first to loudly
+indicate invalid behaviour.
+
+Link: https://lkml.kernel.org/r/20251103180235.71409-3-pratyush@kernel.org
+Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
+Signed-off-by: Pratyush Yadav <pratyush@kernel.org>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Alexander Graf <graf@amazon.com>
+Cc: Baoquan He <bhe@redhat.com>
+Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kexec_handover.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/kernel/kexec_handover.c
++++ b/kernel/kexec_handover.c
+@@ -131,12 +131,12 @@ static void __kho_unpreserve(struct kho_
+ const unsigned long pfn_high = pfn >> order;
+
+ physxa = xa_load(&track->orders, order);
+- if (!physxa)
+- continue;
++ if (WARN_ON_ONCE(!physxa))
++ return;
+
+ bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
+- if (!bits)
+- continue;
++ if (WARN_ON_ONCE(!bits))
++ return;
+
+ clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+
--- /dev/null
+From e38f65d317df1fd2dcafe614d9c537475ecf9992 Mon Sep 17 00:00:00 2001
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+Date: Mon, 20 Oct 2025 20:08:50 -0400
+Subject: kho: warn and fail on metadata or preserved memory in scratch area
+
+From: Pasha Tatashin <pasha.tatashin@soleen.com>
+
+commit e38f65d317df1fd2dcafe614d9c537475ecf9992 upstream.
+
+Patch series "KHO: kfence + KHO memory corruption fix", v3.
+
+This series fixes a memory corruption bug in KHO that occurs when KFENCE
+is enabled.
+
+The root cause is that KHO metadata, allocated via kzalloc(), can be
+randomly serviced by kfence_alloc(). When a kernel boots via KHO, the
+early memblock allocator is restricted to a "scratch area". This forces
+the KFENCE pool to be allocated within this scratch area, creating a
+conflict. If KHO metadata is subsequently placed in this pool, it gets
+corrupted during the next kexec operation.
+
+Google is using KHO and have had obscure crashes due to this memory
+corruption, with stacks all over the place. I would prefer this fix to be
+properly backported to stable so we can also automatically consume it once
+we switch to the upstream KHO.
+
+Patch 1/3 introduces a debug-only feature (CONFIG_KEXEC_HANDOVER_DEBUG)
+that adds checks to detect and fail any operation that attempts to place
+KHO metadata or preserved memory within the scratch area. This serves as
+a validation and diagnostic tool to confirm the problem without affecting
+production builds.
+
+Patch 2/3 Increases bitmap to PAGE_SIZE, so buddy allocator can be used.
+
+Patch 3/3 Provides the fix by modifying KHO to allocate its metadata
+directly from the buddy allocator instead of slab. This bypasses the
+KFENCE interception entirely.
+
+
+This patch (of 3):
+
+It is invalid for KHO metadata or preserved memory regions to be located
+within the KHO scratch area, as this area is overwritten when the next
+kernel is loaded, and used early in boot by the next kernel. This can
+lead to memory corruption.
+
+Add checks to kho_preserve_* and KHO's internal metadata allocators
+(xa_load_or_alloc, new_chunk) to verify that the physical address of the
+memory does not overlap with any defined scratch region. If an overlap is
+detected, the operation will fail and a WARN_ON is triggered. To avoid
+performance overhead in production kernels, these checks are enabled only
+when CONFIG_KEXEC_HANDOVER_DEBUG is selected.
+
+[rppt@kernel.org: fix KEXEC_HANDOVER_DEBUG Kconfig dependency]
+ Link: https://lkml.kernel.org/r/aQHUyyFtiNZhx8jo@kernel.org
+[pasha.tatashin@soleen.com: build fix]
+ Link: https://lkml.kernel.org/r/CA+CK2bBnorfsTymKtv4rKvqGBHs=y=MjEMMRg_tE-RME6n-zUw@mail.gmail.com
+Link: https://lkml.kernel.org/r/20251021000852.2924827-1-pasha.tatashin@soleen.com
+Link: https://lkml.kernel.org/r/20251021000852.2924827-2-pasha.tatashin@soleen.com
+Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
+Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
+Signed-off-by: Mike Rapoport <rppt@kernel.org>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
+Cc: Alexander Graf <graf@amazon.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: David Matlack <dmatlack@google.com>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Cc: Miguel Ojeda <ojeda@kernel.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Samiullah Khawaja <skhawaja@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/Kconfig.kexec | 9 ++++++
+ kernel/Makefile | 1
+ kernel/kexec_handover.c | 57 ++++++++++++++++++++++++++-------------
+ kernel/kexec_handover_debug.c | 25 +++++++++++++++++
+ kernel/kexec_handover_internal.h | 20 +++++++++++++
+ 5 files changed, 93 insertions(+), 19 deletions(-)
+ create mode 100644 kernel/kexec_handover_debug.c
+ create mode 100644 kernel/kexec_handover_internal.h
+
+--- a/kernel/Kconfig.kexec
++++ b/kernel/Kconfig.kexec
+@@ -109,6 +109,15 @@ config KEXEC_HANDOVER
+ to keep data or state alive across the kexec. For this to work,
+ both source and target kernels need to have this option enabled.
+
++config KEXEC_HANDOVER_DEBUG
++ bool "Enable Kexec Handover debug checks"
++ depends on KEXEC_HANDOVER
++ help
++ This option enables extra sanity checks for the Kexec Handover
++ subsystem. Since, KHO performance is crucial in live update
++ scenarios and the extra code might be adding overhead it is
++ only optionally enabled.
++
+ config CRASH_DUMP
+ bool "kernel crash dumps"
+ default ARCH_DEFAULT_CRASH_DUMP
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -82,6 +82,7 @@ obj-$(CONFIG_KEXEC) += kexec.o
+ obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
+ obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
+ obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
++obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o
+ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
+ obj-$(CONFIG_COMPAT) += compat.o
+ obj-$(CONFIG_CGROUPS) += cgroup/
+--- a/kernel/kexec_handover.c
++++ b/kernel/kexec_handover.c
+@@ -8,6 +8,7 @@
+
+ #define pr_fmt(fmt) "KHO: " fmt
+
++#include <linux/cleanup.h>
+ #include <linux/cma.h>
+ #include <linux/count_zeros.h>
+ #include <linux/debugfs.h>
+@@ -21,6 +22,7 @@
+
+ #include <asm/early_ioremap.h>
+
++#include "kexec_handover_internal.h"
+ /*
+ * KHO is tightly coupled with mm init and needs access to some of mm
+ * internal APIs.
+@@ -93,26 +95,26 @@ struct kho_serialization {
+
+ static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
+ {
+- void *elm, *res;
++ void *res = xa_load(xa, index);
+
+- elm = xa_load(xa, index);
+- if (elm)
+- return elm;
++ if (res)
++ return res;
++
++ void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL);
+
+- elm = kzalloc(sz, GFP_KERNEL);
+ if (!elm)
+ return ERR_PTR(-ENOMEM);
+
++ if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz)))
++ return ERR_PTR(-EINVAL);
++
+ res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
+ if (xa_is_err(res))
+- res = ERR_PTR(xa_err(res));
+-
+- if (res) {
+- kfree(elm);
++ return ERR_PTR(xa_err(res));
++ else if (res)
+ return res;
+- }
+
+- return elm;
++ return no_free_ptr(elm);
+ }
+
+ static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
+@@ -263,15 +265,19 @@ static_assert(sizeof(struct khoser_mem_c
+ static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
+ unsigned long order)
+ {
+- struct khoser_mem_chunk *chunk;
++ struct khoser_mem_chunk *chunk __free(kfree) = NULL;
+
+ chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!chunk)
+- return NULL;
++ return ERR_PTR(-ENOMEM);
++
++ if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE)))
++ return ERR_PTR(-EINVAL);
++
+ chunk->hdr.order = order;
+ if (cur_chunk)
+ KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
+- return chunk;
++ return no_free_ptr(chunk);
+ }
+
+ static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
+@@ -292,14 +298,17 @@ static int kho_mem_serialize(struct kho_
+ struct khoser_mem_chunk *chunk = NULL;
+ struct kho_mem_phys *physxa;
+ unsigned long order;
++ int err = -ENOMEM;
+
+ xa_for_each(&ser->track.orders, order, physxa) {
+ struct kho_mem_phys_bits *bits;
+ unsigned long phys;
+
+ chunk = new_chunk(chunk, order);
+- if (!chunk)
++ if (IS_ERR(chunk)) {
++ err = PTR_ERR(chunk);
+ goto err_free;
++ }
+
+ if (!first_chunk)
+ first_chunk = chunk;
+@@ -309,8 +318,10 @@ static int kho_mem_serialize(struct kho_
+
+ if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
+ chunk = new_chunk(chunk, order);
+- if (!chunk)
++ if (IS_ERR(chunk)) {
++ err = PTR_ERR(chunk);
+ goto err_free;
++ }
+ }
+
+ elm = &chunk->bitmaps[chunk->hdr.num_elms];
+@@ -327,7 +338,7 @@ static int kho_mem_serialize(struct kho_
+
+ err_free:
+ kho_mem_ser_free(first_chunk);
+- return -ENOMEM;
++ return err;
+ }
+
+ static void __init deserialize_bitmap(unsigned int order,
+@@ -380,8 +391,8 @@ static void __init kho_mem_deserialize(c
+ * area for early allocations that happen before page allocator is
+ * initialized.
+ */
+-static struct kho_scratch *kho_scratch;
+-static unsigned int kho_scratch_cnt;
++struct kho_scratch *kho_scratch;
++unsigned int kho_scratch_cnt;
+
+ /*
+ * The scratch areas are scaled by default as percent of memory allocated from
+@@ -684,6 +695,9 @@ int kho_preserve_folio(struct folio *fol
+ if (kho_out.finalized)
+ return -EBUSY;
+
++ if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
++ return -EINVAL;
++
+ return __kho_preserve_order(track, pfn, order);
+ }
+ EXPORT_SYMBOL_GPL(kho_preserve_folio);
+@@ -713,6 +727,11 @@ int kho_preserve_phys(phys_addr_t phys,
+ if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
+ return -EINVAL;
+
++ if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT,
++ nr_pages << PAGE_SHIFT))) {
++ return -EINVAL;
++ }
++
+ while (pfn < end_pfn) {
+ const unsigned int order =
+ min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+--- /dev/null
++++ b/kernel/kexec_handover_debug.c
+@@ -0,0 +1,25 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * kexec_handover_debug.c - kexec handover optional debug functionality
++ * Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin@soleen.com>
++ */
++
++#define pr_fmt(fmt) "KHO: " fmt
++
++#include "kexec_handover_internal.h"
++
++bool kho_scratch_overlap(phys_addr_t phys, size_t size)
++{
++ phys_addr_t scratch_start, scratch_end;
++ unsigned int i;
++
++ for (i = 0; i < kho_scratch_cnt; i++) {
++ scratch_start = kho_scratch[i].addr;
++ scratch_end = kho_scratch[i].addr + kho_scratch[i].size;
++
++ if (phys < scratch_end && (phys + size) > scratch_start)
++ return true;
++ }
++
++ return false;
++}
+--- /dev/null
++++ b/kernel/kexec_handover_internal.h
+@@ -0,0 +1,20 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H
++#define LINUX_KEXEC_HANDOVER_INTERNAL_H
++
++#include <linux/kexec_handover.h>
++#include <linux/types.h>
++
++extern struct kho_scratch *kho_scratch;
++extern unsigned int kho_scratch_cnt;
++
++#ifdef CONFIG_KEXEC_HANDOVER_DEBUG
++bool kho_scratch_overlap(phys_addr_t phys, size_t size);
++#else
++static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size)
++{
++ return false;
++}
++#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */
++
++#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */
--- /dev/null
+From f5548c318d6520d4fa3c5ed6003eeb710763cbc5 Mon Sep 17 00:00:00 2001
+From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+Date: Wed, 22 Oct 2025 12:30:59 -0300
+Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
+
+From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+
+commit f5548c318d6520d4fa3c5ed6003eeb710763cbc5 upstream.
+
+Currently, scan_get_next_rmap_item() walks every page address in a VMA to
+locate mergeable pages. This becomes highly inefficient when scanning
+large virtual memory areas that contain mostly unmapped regions, causing
+ksmd to use large amount of cpu without deduplicating much pages.
+
+This patch replaces the per-address lookup with a range walk using
+walk_page_range(). The range walker allows KSM to skip over entire
+unmapped holes in a VMA, avoiding unnecessary lookups. This problem was
+previously discussed in [1].
+
+Consider the following test program which creates a 32 TiB mapping in the
+virtual address space but only populates a single page:
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/mman.h>
+
+/* 32 TiB */
+const size_t size = 32ul * 1024 * 1024 * 1024 * 1024;
+
+int main() {
+ char *area = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_NORESERVE | MAP_PRIVATE | MAP_ANON, -1, 0);
+
+ if (area == MAP_FAILED) {
+ perror("mmap() failed\n");
+ return -1;
+ }
+
+ /* Populate a single page such that we get an anon_vma. */
+ *area = 0;
+
+ /* Enable KSM. */
+ madvise(area, size, MADV_MERGEABLE);
+ pause();
+ return 0;
+}
+
+$ ./ksm-sparse &
+$ echo 1 > /sys/kernel/mm/ksm/run
+
+Without this patch ksmd uses 100% of the cpu for a long time (more then 1
+hour in my test machine) scanning all the 32 TiB virtual address space
+that contain only one mapped page. This makes ksmd essentially deadlocked
+not able to deduplicate anything of value. With this patch ksmd walks
+only the one mapped page and skips the rest of the 32 TiB virtual address
+space, making the scan fast using little cpu.
+
+Link: https://lkml.kernel.org/r/20251023035841.41406-1-pedrodemargomes@gmail.com
+Link: https://lkml.kernel.org/r/20251022153059.22763-1-pedrodemargomes@gmail.com
+Link: https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/ [1]
+Fixes: 31dbd01f3143 ("ksm: Kernel SamePage Merging")
+Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+Co-developed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reported-by: craftfever <craftfever@airmail.cc>
+Closes: https://lkml.kernel.org/r/020cf8de6e773bb78ba7614ef250129f11a63781@murena.io
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: xu xin <xu.xin16@zte.com.cn>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/ksm.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 104 insertions(+), 9 deletions(-)
+
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -2458,6 +2458,95 @@ static bool should_skip_rmap_item(struct
+ return true;
+ }
+
++struct ksm_next_page_arg {
++ struct folio *folio;
++ struct page *page;
++ unsigned long addr;
++};
++
++static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end,
++ struct mm_walk *walk)
++{
++ struct ksm_next_page_arg *private = walk->private;
++ struct vm_area_struct *vma = walk->vma;
++ pte_t *start_ptep = NULL, *ptep, pte;
++ struct mm_struct *mm = walk->mm;
++ struct folio *folio;
++ struct page *page;
++ spinlock_t *ptl;
++ pmd_t pmd;
++
++ if (ksm_test_exit(mm))
++ return 0;
++
++ cond_resched();
++
++ pmd = pmdp_get_lockless(pmdp);
++ if (!pmd_present(pmd))
++ return 0;
++
++ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) {
++ ptl = pmd_lock(mm, pmdp);
++ pmd = pmdp_get(pmdp);
++
++ if (!pmd_present(pmd)) {
++ goto not_found_unlock;
++ } else if (pmd_leaf(pmd)) {
++ page = vm_normal_page_pmd(vma, addr, pmd);
++ if (!page)
++ goto not_found_unlock;
++ folio = page_folio(page);
++
++ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
++ goto not_found_unlock;
++
++ page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT);
++ goto found_unlock;
++ }
++ spin_unlock(ptl);
++ }
++
++ start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
++ if (!start_ptep)
++ return 0;
++
++ for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) {
++ pte = ptep_get(ptep);
++
++ if (!pte_present(pte))
++ continue;
++
++ page = vm_normal_page(vma, addr, pte);
++ if (!page)
++ continue;
++ folio = page_folio(page);
++
++ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
++ continue;
++ goto found_unlock;
++ }
++
++not_found_unlock:
++ spin_unlock(ptl);
++ if (start_ptep)
++ pte_unmap(start_ptep);
++ return 0;
++found_unlock:
++ folio_get(folio);
++ spin_unlock(ptl);
++ if (start_ptep)
++ pte_unmap(start_ptep);
++ private->page = page;
++ private->folio = folio;
++ private->addr = addr;
++ return 1;
++}
++
++static struct mm_walk_ops ksm_next_page_ops = {
++ .pmd_entry = ksm_next_page_pmd_entry,
++ .walk_lock = PGWALK_RDLOCK,
++};
++
+ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
+ {
+ struct mm_struct *mm;
+@@ -2545,21 +2634,27 @@ next_mm:
+ ksm_scan.address = vma->vm_end;
+
+ while (ksm_scan.address < vma->vm_end) {
++ struct ksm_next_page_arg ksm_next_page_arg;
+ struct page *tmp_page = NULL;
+- struct folio_walk fw;
+ struct folio *folio;
+
+ if (ksm_test_exit(mm))
+ break;
+
+- folio = folio_walk_start(&fw, vma, ksm_scan.address, 0);
+- if (folio) {
+- if (!folio_is_zone_device(folio) &&
+- folio_test_anon(folio)) {
+- folio_get(folio);
+- tmp_page = fw.page;
+- }
+- folio_walk_end(&fw, vma);
++ int found;
++
++ found = walk_page_range_vma(vma, ksm_scan.address,
++ vma->vm_end,
++ &ksm_next_page_ops,
++ &ksm_next_page_arg);
++
++ if (found > 0) {
++ folio = ksm_next_page_arg.folio;
++ tmp_page = ksm_next_page_arg.page;
++ ksm_scan.address = ksm_next_page_arg.addr;
++ } else {
++ VM_WARN_ON_ONCE(found < 0);
++ ksm_scan.address = vma->vm_end - PAGE_SIZE;
+ }
+
+ if (tmp_page) {
--- /dev/null
+From 98a5fd31cbf72d46bf18e50b3ab0ce86d5f319a9 Mon Sep 17 00:00:00 2001
+From: Joshua Rogers <linux@joshua.hu>
+Date: Sat, 8 Nov 2025 22:59:23 +0800
+Subject: ksmbd: close accepted socket when per-IP limit rejects connection
+
+From: Joshua Rogers <linux@joshua.hu>
+
+commit 98a5fd31cbf72d46bf18e50b3ab0ce86d5f319a9 upstream.
+
+When the per-IP connection limit is exceeded in ksmbd_kthread_fn(),
+the code sets ret = -EAGAIN and continues the accept loop without
+closing the just-accepted socket. That leaks one socket per rejected
+attempt from a single IP and enables a trivial remote DoS.
+
+Release client_sk before continuing.
+
+This bug was found with ZeroPath.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Joshua Rogers <linux@joshua.hu>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/server/transport_tcp.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/smb/server/transport_tcp.c
++++ b/fs/smb/server/transport_tcp.c
+@@ -284,8 +284,11 @@ static int ksmbd_kthread_fn(void *p)
+ }
+ }
+ up_read(&conn_list_lock);
+- if (ret == -EAGAIN)
++ if (ret == -EAGAIN) {
++ /* Per-IP limit hit: release the just-accepted socket. */
++ sock_release(client_sk);
+ continue;
++ }
+
+ skip_max_ip_conns_limit:
+ if (server_conf.max_connections &&
--- /dev/null
+From 3f9eacf4f0705876a5d6526d7d320ca91d7d7a16 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Thu, 30 Oct 2025 12:27:05 +0000
+Subject: KVM: arm64: Make all 32bit ID registers fully writable
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 3f9eacf4f0705876a5d6526d7d320ca91d7d7a16 upstream.
+
+32bit ID registers aren't getting much love these days, and are
+often missed in updates. One of these updates broke restoring
+a GICv2 guest on a GICv3 machine.
+
+Instead of performing a piecemeal fix, just bite the bullet
+and make all 32bit ID regs fully writable. KVM itself never
+relies on them for anything, and if the VMM wants to mess up
+the guest, so be it.
+
+Fixes: 5cb57a1aff755 ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest")
+Reported-by: Peter Maydell <peter.maydell@linaro.org>
+Cc: stable@vger.kernel.org
+Reviewed-by: Oliver Upton <oupton@kernel.org>
+Link: https://patch.msgid.link/20251030122707.2033690-2-maz@kernel.org
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/sys_regs.c | 59 ++++++++++++++++++++++++----------------------
+ 1 file changed, 31 insertions(+), 28 deletions(-)
+
+--- a/arch/arm64/kvm/sys_regs.c
++++ b/arch/arm64/kvm/sys_regs.c
+@@ -2515,19 +2515,23 @@ static bool bad_redir_trap(struct kvm_vc
+ .val = 0, \
+ }
+
+-/* sys_reg_desc initialiser for known cpufeature ID registers */
+-#define AA32_ID_SANITISED(name) { \
+- ID_DESC(name), \
+- .visibility = aa32_id_visibility, \
+- .val = 0, \
+-}
+-
+ /* sys_reg_desc initialiser for writable ID registers */
+ #define ID_WRITABLE(name, mask) { \
+ ID_DESC(name), \
+ .val = mask, \
+ }
+
++/*
++ * 32bit ID regs are fully writable when the guest is 32bit
++ * capable. Nothing in the KVM code should rely on 32bit features
++ * anyway, only 64bit, so let the VMM do its worse.
++ */
++#define AA32_ID_WRITABLE(name) { \
++ ID_DESC(name), \
++ .visibility = aa32_id_visibility, \
++ .val = GENMASK(31, 0), \
++}
++
+ /* sys_reg_desc initialiser for cpufeature ID registers that need filtering */
+ #define ID_FILTERED(sysreg, name, mask) { \
+ ID_DESC(sysreg), \
+@@ -3039,40 +3043,39 @@ static const struct sys_reg_desc sys_reg
+
+ /* AArch64 mappings of the AArch32 ID registers */
+ /* CRm=1 */
+- AA32_ID_SANITISED(ID_PFR0_EL1),
+- AA32_ID_SANITISED(ID_PFR1_EL1),
++ AA32_ID_WRITABLE(ID_PFR0_EL1),
++ AA32_ID_WRITABLE(ID_PFR1_EL1),
+ { SYS_DESC(SYS_ID_DFR0_EL1),
+ .access = access_id_reg,
+ .get_user = get_id_reg,
+ .set_user = set_id_dfr0_el1,
+ .visibility = aa32_id_visibility,
+ .reset = read_sanitised_id_dfr0_el1,
+- .val = ID_DFR0_EL1_PerfMon_MASK |
+- ID_DFR0_EL1_CopDbg_MASK, },
++ .val = GENMASK(31, 0) },
+ ID_HIDDEN(ID_AFR0_EL1),
+- AA32_ID_SANITISED(ID_MMFR0_EL1),
+- AA32_ID_SANITISED(ID_MMFR1_EL1),
+- AA32_ID_SANITISED(ID_MMFR2_EL1),
+- AA32_ID_SANITISED(ID_MMFR3_EL1),
++ AA32_ID_WRITABLE(ID_MMFR0_EL1),
++ AA32_ID_WRITABLE(ID_MMFR1_EL1),
++ AA32_ID_WRITABLE(ID_MMFR2_EL1),
++ AA32_ID_WRITABLE(ID_MMFR3_EL1),
+
+ /* CRm=2 */
+- AA32_ID_SANITISED(ID_ISAR0_EL1),
+- AA32_ID_SANITISED(ID_ISAR1_EL1),
+- AA32_ID_SANITISED(ID_ISAR2_EL1),
+- AA32_ID_SANITISED(ID_ISAR3_EL1),
+- AA32_ID_SANITISED(ID_ISAR4_EL1),
+- AA32_ID_SANITISED(ID_ISAR5_EL1),
+- AA32_ID_SANITISED(ID_MMFR4_EL1),
+- AA32_ID_SANITISED(ID_ISAR6_EL1),
++ AA32_ID_WRITABLE(ID_ISAR0_EL1),
++ AA32_ID_WRITABLE(ID_ISAR1_EL1),
++ AA32_ID_WRITABLE(ID_ISAR2_EL1),
++ AA32_ID_WRITABLE(ID_ISAR3_EL1),
++ AA32_ID_WRITABLE(ID_ISAR4_EL1),
++ AA32_ID_WRITABLE(ID_ISAR5_EL1),
++ AA32_ID_WRITABLE(ID_MMFR4_EL1),
++ AA32_ID_WRITABLE(ID_ISAR6_EL1),
+
+ /* CRm=3 */
+- AA32_ID_SANITISED(MVFR0_EL1),
+- AA32_ID_SANITISED(MVFR1_EL1),
+- AA32_ID_SANITISED(MVFR2_EL1),
++ AA32_ID_WRITABLE(MVFR0_EL1),
++ AA32_ID_WRITABLE(MVFR1_EL1),
++ AA32_ID_WRITABLE(MVFR2_EL1),
+ ID_UNALLOCATED(3,3),
+- AA32_ID_SANITISED(ID_PFR2_EL1),
++ AA32_ID_WRITABLE(ID_PFR2_EL1),
+ ID_HIDDEN(ID_DFR1_EL1),
+- AA32_ID_SANITISED(ID_MMFR5_EL1),
++ AA32_ID_WRITABLE(ID_MMFR5_EL1),
+ ID_UNALLOCATED(3,7),
+
+ /* AArch64 ID registers */
--- /dev/null
+From ae431059e75d36170a5ae6b44cc4d06d43613215 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 3 Nov 2025 17:12:05 -0800
+Subject: KVM: guest_memfd: Remove bindings on memslot deletion when gmem is dying
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ae431059e75d36170a5ae6b44cc4d06d43613215 upstream.
+
+When unbinding a memslot from a guest_memfd instance, remove the bindings
+even if the guest_memfd file is dying, i.e. even if its file refcount has
+gone to zero. If the memslot is freed before the file is fully released,
+nullifying the memslot side of the binding in kvm_gmem_release() will
+write to freed memory, as detected by syzbot+KASAN:
+
+ ==================================================================
+ BUG: KASAN: slab-use-after-free in kvm_gmem_release+0x176/0x440 virt/kvm/guest_memfd.c:353
+ Write of size 8 at addr ffff88807befa508 by task syz.0.17/6022
+
+ CPU: 0 UID: 0 PID: 6022 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full)
+ Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025
+ Call Trace:
+ <TASK>
+ dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
+ print_address_description mm/kasan/report.c:378 [inline]
+ print_report+0xca/0x240 mm/kasan/report.c:482
+ kasan_report+0x118/0x150 mm/kasan/report.c:595
+ kvm_gmem_release+0x176/0x440 virt/kvm/guest_memfd.c:353
+ __fput+0x44c/0xa70 fs/file_table.c:468
+ task_work_run+0x1d4/0x260 kernel/task_work.c:227
+ resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
+ exit_to_user_mode_loop+0xe9/0x130 kernel/entry/common.c:43
+ exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline]
+ syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline]
+ syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline]
+ do_syscall_64+0x2bd/0xfa0 arch/x86/entry/syscall_64.c:100
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+ RIP: 0033:0x7fbeeff8efc9
+ </TASK>
+
+ Allocated by task 6023:
+ kasan_save_stack mm/kasan/common.c:56 [inline]
+ kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
+ poison_kmalloc_redzone mm/kasan/common.c:397 [inline]
+ __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:414
+ kasan_kmalloc include/linux/kasan.h:262 [inline]
+ __kmalloc_cache_noprof+0x3e2/0x700 mm/slub.c:5758
+ kmalloc_noprof include/linux/slab.h:957 [inline]
+ kzalloc_noprof include/linux/slab.h:1094 [inline]
+ kvm_set_memory_region+0x747/0xb90 virt/kvm/kvm_main.c:2104
+ kvm_vm_ioctl_set_memory_region+0x6f/0xd0 virt/kvm/kvm_main.c:2154
+ kvm_vm_ioctl+0x957/0xc60 virt/kvm/kvm_main.c:5201
+ vfs_ioctl fs/ioctl.c:51 [inline]
+ __do_sys_ioctl fs/ioctl.c:597 [inline]
+ __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583
+ do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
+ do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+ Freed by task 6023:
+ kasan_save_stack mm/kasan/common.c:56 [inline]
+ kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
+ kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:584
+ poison_slab_object mm/kasan/common.c:252 [inline]
+ __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:284
+ kasan_slab_free include/linux/kasan.h:234 [inline]
+ slab_free_hook mm/slub.c:2533 [inline]
+ slab_free mm/slub.c:6622 [inline]
+ kfree+0x19a/0x6d0 mm/slub.c:6829
+ kvm_set_memory_region+0x9c4/0xb90 virt/kvm/kvm_main.c:2130
+ kvm_vm_ioctl_set_memory_region+0x6f/0xd0 virt/kvm/kvm_main.c:2154
+ kvm_vm_ioctl+0x957/0xc60 virt/kvm/kvm_main.c:5201
+ vfs_ioctl fs/ioctl.c:51 [inline]
+ __do_sys_ioctl fs/ioctl.c:597 [inline]
+ __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583
+ do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
+ do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Deliberately don't acquire filemap invalid lock when the file is dying as
+the lifecycle of f_mapping is outside the purview of KVM. Dereferencing
+the mapping is *probably* fine, but there's no need to invalidate anything
+as memslot deletion is responsible for zapping SPTEs, and the only code
+that can access the dying file is kvm_gmem_release(), whose core code is
+mutually exclusive with unbinding.
+
+Note, the mutual exclusivity is also what makes it safe to access the
+bindings on a dying gmem instance. Unbinding either runs with slots_lock
+held, or after the last reference to the owning "struct kvm" is put, and
+kvm_gmem_release() nullifies the slot pointer under slots_lock, and puts
+its reference to the VM after that is done.
+
+Reported-by: syzbot+2479e53d0db9b32ae2aa@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/68fa7a22.a70a0220.3bf6c6.008b.GAE@google.com
+Tested-by: syzbot+2479e53d0db9b32ae2aa@syzkaller.appspotmail.com
+Fixes: a7800aa80ea4 ("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory")
+Cc: stable@vger.kernel.org
+Cc: Hillf Danton <hdanton@sina.com>
+Reviewed-By: Vishal Annapurve <vannapurve@google.com>
+Link: https://patch.msgid.link/20251104011205.3853541-1-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/guest_memfd.c | 45 ++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 32 insertions(+), 13 deletions(-)
+
+--- a/virt/kvm/guest_memfd.c
++++ b/virt/kvm/guest_memfd.c
+@@ -523,31 +523,50 @@ err:
+ return r;
+ }
+
+-void kvm_gmem_unbind(struct kvm_memory_slot *slot)
++static void __kvm_gmem_unbind(struct kvm_memory_slot *slot, struct kvm_gmem *gmem)
+ {
+ unsigned long start = slot->gmem.pgoff;
+ unsigned long end = start + slot->npages;
+- struct kvm_gmem *gmem;
++
++ xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL);
++
++ /*
++ * synchronize_srcu(&kvm->srcu) ensured that kvm_gmem_get_pfn()
++ * cannot see this memslot.
++ */
++ WRITE_ONCE(slot->gmem.file, NULL);
++}
++
++void kvm_gmem_unbind(struct kvm_memory_slot *slot)
++{
+ struct file *file;
+
+ /*
+- * Nothing to do if the underlying file was already closed (or is being
+- * closed right now), kvm_gmem_release() invalidates all bindings.
++ * Nothing to do if the underlying file was _already_ closed, as
++ * kvm_gmem_release() invalidates and nullifies all bindings.
+ */
+- file = kvm_gmem_get_file(slot);
+- if (!file)
++ if (!slot->gmem.file)
+ return;
+
+- gmem = file->private_data;
+-
+- filemap_invalidate_lock(file->f_mapping);
+- xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL);
++ file = kvm_gmem_get_file(slot);
+
+ /*
+- * synchronize_srcu(&kvm->srcu) ensured that kvm_gmem_get_pfn()
+- * cannot see this memslot.
++ * However, if the file is _being_ closed, then the bindings need to be
++ * removed as kvm_gmem_release() might not run until after the memslot
++ * is freed. Note, modifying the bindings is safe even though the file
++ * is dying as kvm_gmem_release() nullifies slot->gmem.file under
++ * slots_lock, and only puts its reference to KVM after destroying all
++ * bindings. I.e. reaching this point means kvm_gmem_release() hasn't
++ * yet destroyed the bindings or freed the gmem_file, and can't do so
++ * until the caller drops slots_lock.
+ */
+- WRITE_ONCE(slot->gmem.file, NULL);
++ if (!file) {
++ __kvm_gmem_unbind(slot, slot->gmem.file->private_data);
++ return;
++ }
++
++ filemap_invalidate_lock(file->f_mapping);
++ __kvm_gmem_unbind(slot, file->private_data);
+ filemap_invalidate_unlock(file->f_mapping);
+
+ fput(file);
--- /dev/null
+From fbe5e5f030c22ae717ee422aaab0e00ea84fab5e Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Sat, 8 Nov 2025 00:45:20 +0000
+Subject: KVM: nSVM: Always recalculate LBR MSR intercepts in svm_update_lbrv()
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit fbe5e5f030c22ae717ee422aaab0e00ea84fab5e upstream.
+
+svm_update_lbrv() is called when MSR_IA32_DEBUGCTLMSR is updated, and on
+nested transitions where LBRV is used. It checks whether LBRV enablement
+needs to be changed in the current VMCB, and if it does, it also
+recalculate intercepts to LBR MSRs.
+
+However, there are cases where intercepts need to be updated even when
+LBRV enablement doesn't. Example scenario:
+- L1 has MSR_IA32_DEBUGCTLMSR cleared.
+- L1 runs L2 without LBR_CTL_ENABLE (no LBRV).
+- L2 sets DEBUGCTLMSR_LBR in MSR_IA32_DEBUGCTLMSR, svm_update_lbrv()
+ sets LBR_CTL_ENABLE in VMCB02 and disables intercepts to LBR MSRs.
+- L2 exits to L1, svm_update_lbrv() is not called on this transition.
+- L1 clears MSR_IA32_DEBUGCTLMSR, svm_update_lbrv() finds that
+ LBR_CTL_ENABLE is already cleared in VMCB01 and does nothing.
+- Intercepts remain disabled, L1 reads to LBR MSRs read the host MSRs.
+
+Fix it by always recalculating intercepts in svm_update_lbrv().
+
+Fixes: 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20251108004524.1600006-3-yosry.ahmed@linux.dev
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c | 29 +++++++++++++++++++----------
+ 1 file changed, 19 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -852,25 +852,29 @@ void svm_copy_lbrs(struct vmcb *to_vmcb,
+ vmcb_mark_dirty(to_vmcb, VMCB_LBR);
+ }
+
+-void svm_enable_lbrv(struct kvm_vcpu *vcpu)
++static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+- svm_recalc_lbr_msr_intercepts(vcpu);
+
+ /* Move the LBR msrs to the vmcb02 so that the guest can see them. */
+ if (is_guest_mode(vcpu))
+ svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
+ }
+
+-static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
++void svm_enable_lbrv(struct kvm_vcpu *vcpu)
++{
++ __svm_enable_lbrv(vcpu);
++ svm_recalc_lbr_msr_intercepts(vcpu);
++}
++
++static void __svm_disable_lbrv(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
+ svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
+- svm_recalc_lbr_msr_intercepts(vcpu);
+
+ /*
+ * Move the LBR msrs back to the vmcb01 to avoid copying them
+@@ -899,13 +903,18 @@ void svm_update_lbrv(struct kvm_vcpu *vc
+ (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
+
+- if (enable_lbrv == current_enable_lbrv)
+- return;
++ if (enable_lbrv && !current_enable_lbrv)
++ __svm_enable_lbrv(vcpu);
++ else if (!enable_lbrv && current_enable_lbrv)
++ __svm_disable_lbrv(vcpu);
+
+- if (enable_lbrv)
+- svm_enable_lbrv(vcpu);
+- else
+- svm_disable_lbrv(vcpu);
++ /*
++ * During nested transitions, it is possible that the current VMCB has
++ * LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa).
++ * In this case, even though LBR_CTL does not need an update, intercepts
++ * do, so always recalculate the intercepts here.
++ */
++ svm_recalc_lbr_msr_intercepts(vcpu);
+ }
+
+ void disable_nmi_singlestep(struct vcpu_svm *svm)
--- /dev/null
+From 8a4821412cf2c1429fffa07c012dd150f2edf78c Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Sat, 8 Nov 2025 00:45:21 +0000
+Subject: KVM: nSVM: Fix and simplify LBR virtualization handling with nested
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit 8a4821412cf2c1429fffa07c012dd150f2edf78c upstream.
+
+The current scheme for handling LBRV when nested is used is very
+complicated, especially when L1 does not enable LBRV (i.e. does not set
+LBR_CTL_ENABLE_MASK).
+
+To avoid copying LBRs between VMCB01 and VMCB02 on every nested
+transition, the current implementation switches between using VMCB01 or
+VMCB02 as the source of truth for the LBRs while L2 is running. If L2
+enables LBR, VMCB02 is used as the source of truth. When L2 disables
+LBR, the LBRs are copied to VMCB01 and VMCB01 is used as the source of
+truth. This introduces significant complexity, and incorrect behavior in
+some cases.
+
+For example, on a nested #VMEXIT, the LBRs are only copied from VMCB02
+to VMCB01 if LBRV is enabled in VMCB01. This is because L2's writes to
+MSR_IA32_DEBUGCTLMSR to enable LBR are intercepted and propagated to
+VMCB01 instead of VMCB02. However, LBRV is only enabled in VMCB02 when
+L2 is running.
+
+This means that if L2 enables LBR and exits to L1, the LBRs will not be
+propagated from VMCB02 to VMCB01, because LBRV is disabled in VMCB01.
+
+There is no meaningful difference in CPUID rate in L2 when copying LBRs
+on every nested transition vs. the current approach, so do the simple
+and correct thing and always copy LBRs between VMCB01 and VMCB02 on
+nested transitions (when LBRV is disabled by L1). Drop the conditional
+LBRs copying in __svm_{enable/disable}_lbrv() as it is now unnecessary.
+
+VMCB02 becomes the only source of truth for LBRs when L2 is running,
+regardless of LBRV being enabled by L1, drop svm_get_lbr_vmcb() and use
+svm->vmcb directly in its place.
+
+Fixes: 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20251108004524.1600006-4-yosry.ahmed@linux.dev
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 20 +++++++-------------
+ arch/x86/kvm/svm/svm.c | 46 ++++++++++------------------------------------
+ 2 files changed, 17 insertions(+), 49 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -669,11 +669,10 @@ static void nested_vmcb02_prepare_save(s
+ */
+ svm_copy_lbrs(vmcb02, vmcb12);
+ vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
+- svm_update_lbrv(&svm->vcpu);
+-
+- } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
++ } else {
+ svm_copy_lbrs(vmcb02, vmcb01);
+ }
++ svm_update_lbrv(&svm->vcpu);
+ }
+
+ static inline bool is_evtinj_soft(u32 evtinj)
+@@ -825,11 +824,7 @@ static void nested_vmcb02_prepare_contro
+ svm->soft_int_next_rip = vmcb12_rip;
+ }
+
+- vmcb02->control.virt_ext = vmcb01->control.virt_ext &
+- LBR_CTL_ENABLE_MASK;
+- if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV))
+- vmcb02->control.virt_ext |=
+- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
++ /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */
+
+ if (!nested_vmcb_needs_vls_intercept(svm))
+ vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+@@ -1169,13 +1164,12 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+
+ if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
++ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)))
+ svm_copy_lbrs(vmcb12, vmcb02);
+- svm_update_lbrv(vcpu);
+- } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
++ else
+ svm_copy_lbrs(vmcb01, vmcb02);
+- svm_update_lbrv(vcpu);
+- }
++
++ svm_update_lbrv(vcpu);
+
+ if (vnmi) {
+ if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK)
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -854,13 +854,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb,
+
+ static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
+ {
+- struct vcpu_svm *svm = to_svm(vcpu);
+-
+- svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+-
+- /* Move the LBR msrs to the vmcb02 so that the guest can see them. */
+- if (is_guest_mode(vcpu))
+- svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
++ to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+ }
+
+ void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+@@ -871,35 +865,15 @@ void svm_enable_lbrv(struct kvm_vcpu *vc
+
+ static void __svm_disable_lbrv(struct kvm_vcpu *vcpu)
+ {
+- struct vcpu_svm *svm = to_svm(vcpu);
+-
+ KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
+- svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
+-
+- /*
+- * Move the LBR msrs back to the vmcb01 to avoid copying them
+- * on nested guest entries.
+- */
+- if (is_guest_mode(vcpu))
+- svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
+-}
+-
+-static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
+-{
+- /*
+- * If LBR virtualization is disabled, the LBR MSRs are always kept in
+- * vmcb01. If LBR virtualization is enabled and L1 is running VMs of
+- * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
+- */
+- return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
+- svm->vmcb01.ptr;
++ to_svm(vcpu)->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
+ }
+
+ void svm_update_lbrv(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+ bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
+- bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
++ bool enable_lbrv = (svm->vmcb->save.dbgctl & DEBUGCTLMSR_LBR) ||
+ (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
+
+@@ -2785,19 +2759,19 @@ static int svm_get_msr(struct kvm_vcpu *
+ msr_info->data = svm->tsc_aux;
+ break;
+ case MSR_IA32_DEBUGCTLMSR:
+- msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
++ msr_info->data = svm->vmcb->save.dbgctl;
+ break;
+ case MSR_IA32_LASTBRANCHFROMIP:
+- msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
++ msr_info->data = svm->vmcb->save.br_from;
+ break;
+ case MSR_IA32_LASTBRANCHTOIP:
+- msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
++ msr_info->data = svm->vmcb->save.br_to;
+ break;
+ case MSR_IA32_LASTINTFROMIP:
+- msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
++ msr_info->data = svm->vmcb->save.last_excp_from;
+ break;
+ case MSR_IA32_LASTINTTOIP:
+- msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
++ msr_info->data = svm->vmcb->save.last_excp_to;
+ break;
+ case MSR_VM_HSAVE_PA:
+ msr_info->data = svm->nested.hsave_msr;
+@@ -3053,10 +3027,10 @@ static int svm_set_msr(struct kvm_vcpu *
+ if (data & DEBUGCTL_RESERVED_BITS)
+ return 1;
+
+- if (svm_get_lbr_vmcb(svm)->save.dbgctl == data)
++ if (svm->vmcb->save.dbgctl == data)
+ break;
+
+- svm_get_lbr_vmcb(svm)->save.dbgctl = data;
++ svm->vmcb->save.dbgctl = data;
+ vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
+ svm_update_lbrv(vcpu);
+ break;
--- /dev/null
+From dc55b3c3f61246e483e50c85d8d5366f9567e188 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Sat, 8 Nov 2025 00:45:19 +0000
+Subject: KVM: SVM: Mark VMCB_LBR dirty when MSR_IA32_DEBUGCTLMSR is updated
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit dc55b3c3f61246e483e50c85d8d5366f9567e188 upstream.
+
+The APM lists the DbgCtlMsr field as being tracked by the VMCB_LBR clean
+bit. Always clear the bit when MSR_IA32_DEBUGCTLMSR is updated.
+
+The history is complicated, it was correctly cleared for L1 before
+commit 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when
+L2 is running"). At that point svm_set_msr() started to rely on
+svm_update_lbrv() to clear the bit, but when nested virtualization
+is enabled the latter does not always clear it even if MSR_IA32_DEBUGCTLMSR
+changed. Go back to clearing it directly in svm_set_msr().
+
+Fixes: 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running")
+Reported-by: Matteo Rizzo <matteorizzo@google.com>
+Reported-by: evn@google.com
+Co-developed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20251108004524.1600006-2-yosry.ahmed@linux.dev
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3044,7 +3044,11 @@ static int svm_set_msr(struct kvm_vcpu *
+ if (data & DEBUGCTL_RESERVED_BITS)
+ return 1;
+
++ if (svm_get_lbr_vmcb(svm)->save.dbgctl == data)
++ break;
++
+ svm_get_lbr_vmcb(svm)->save.dbgctl = data;
++ vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
+ svm_update_lbrv(vcpu);
+ break;
+ case MSR_VM_HSAVE_PA:
--- /dev/null
+From d0164c161923ac303bd843e04ebe95cfd03c6e19 Mon Sep 17 00:00:00 2001
+From: Sukrit Bhatnagar <Sukrit.Bhatnagar@sony.com>
+Date: Thu, 6 Nov 2025 14:28:51 +0900
+Subject: KVM: VMX: Fix check for valid GVA on an EPT violation
+
+From: Sukrit Bhatnagar <Sukrit.Bhatnagar@sony.com>
+
+commit d0164c161923ac303bd843e04ebe95cfd03c6e19 upstream.
+
+On an EPT violation, bit 7 of the exit qualification is set if the
+guest linear-address is valid. The derived page fault error code
+should not be checked for this bit.
+
+Fixes: f3009482512e ("KVM: VMX: Set PFERR_GUEST_{FINAL,PAGE}_MASK if and only if the GVA is valid")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sukrit Bhatnagar <Sukrit.Bhatnagar@sony.com>
+Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Link: https://patch.msgid.link/20251106052853.3071088-1-Sukrit.Bhatnagar@sony.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/common.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/common.h
++++ b/arch/x86/kvm/vmx/common.h
+@@ -98,7 +98,7 @@ static inline int __vmx_handle_ept_viola
+ error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK)
+ ? PFERR_PRESENT_MASK : 0;
+
+- if (error_code & EPT_VIOLATION_GVA_IS_VALID)
++ if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID)
+ error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ?
+ PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
+
--- /dev/null
+From 43a9e6a10bdde32445ad2725f568e08a94e51dc9 Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:00 +0800
+Subject: LoongArch: Consolidate early_ioremap()/ioremap_prot()
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit 43a9e6a10bdde32445ad2725f568e08a94e51dc9 upstream.
+
+1. Use phys_addr_t instead of u64, which can work for both 32/64 bits.
+2. Check whether the input physical address is above TO_PHYS_MASK (and
+ return NULL if yes) for the DMW version.
+
+Note: In theory early_ioremap() also need the TO_PHYS_MASK checking, but
+the UEFI BIOS pass some DMW virtual addresses.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/include/asm/io.h | 5 ++++-
+ arch/loongarch/mm/ioremap.c | 2 +-
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/loongarch/include/asm/io.h
++++ b/arch/loongarch/include/asm/io.h
+@@ -14,7 +14,7 @@
+ #include <asm/pgtable-bits.h>
+ #include <asm/string.h>
+
+-extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
++extern void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size);
+ extern void __init early_iounmap(void __iomem *addr, unsigned long size);
+
+ #define early_memremap early_ioremap
+@@ -25,6 +25,9 @@ extern void __init early_iounmap(void __
+ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
+ pgprot_t prot)
+ {
++ if (offset > TO_PHYS_MASK)
++ return NULL;
++
+ switch (pgprot_val(prot) & _CACHE_MASK) {
+ case _CACHE_CC:
+ return (void __iomem *)(unsigned long)(CACHE_BASE + offset);
+--- a/arch/loongarch/mm/ioremap.c
++++ b/arch/loongarch/mm/ioremap.c
+@@ -6,7 +6,7 @@
+ #include <asm/io.h>
+ #include <asm-generic/early_ioremap.h>
+
+-void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size)
++void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size)
+ {
+ return ((void __iomem *)TO_CACHE(phys_addr));
+ }
--- /dev/null
+From d3c9515e4f9d10ccb113adb4809db5cc31e7ef65 Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:09 +0800
+Subject: LoongArch: KVM: Add delay until timer interrupt injected
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit d3c9515e4f9d10ccb113adb4809db5cc31e7ef65 upstream.
+
+When timer is fired in oneshot mode, CSR.TVAL will stop with value -1
+rather than 0. However when the register CSR.TVAL is restored, it will
+continue to count down rather than stop there.
+
+Now the method is to write 0 to CSR.TVAL, wait to count down for 1 cycle
+at least, which is 10ns with a timer freq 100MHz, and then retore timer
+interrupt status. Here add 2 cycles delay to assure that timer interrupt
+is injected.
+
+With this patch, timer selftest case passes to run always.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/timer.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/loongarch/kvm/timer.c
++++ b/arch/loongarch/kvm/timer.c
+@@ -4,6 +4,7 @@
+ */
+
+ #include <linux/kvm_host.h>
++#include <asm/delay.h>
+ #include <asm/kvm_csr.h>
+ #include <asm/kvm_vcpu.h>
+
+@@ -95,6 +96,7 @@ void kvm_restore_timer(struct kvm_vcpu *
+ * and set CSR TVAL with -1
+ */
+ write_gcsr_timertick(0);
++ __delay(2); /* Wait cycles until timer interrupt injected */
+
+ /*
+ * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear
--- /dev/null
+From 237e74bfa261fb0cf75bd08c9be0c5094018ee20 Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:09 +0800
+Subject: LoongArch: KVM: Fix max supported vCPUs set with EIOINTC
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit 237e74bfa261fb0cf75bd08c9be0c5094018ee20 upstream.
+
+VM fails to boot with 256 vCPUs, the detailed command is
+
+ qemu-system-loongarch64 -smp 256
+
+and there is an error reported as follows:
+
+ KVM_LOONGARCH_EXTIOI_INIT_NUM_CPU failed: Invalid argument
+
+There is typo issue in function kvm_eiointc_ctrl_access() when set
+max supported vCPUs.
+
+Cc: stable@vger.kernel.org
+Fixes: 47256c4c8b1b ("LoongArch: KVM: Avoid copy_*_user() with lock hold in kvm_eiointc_ctrl_access()")
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/intc/eiointc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/loongarch/kvm/intc/eiointc.c
++++ b/arch/loongarch/kvm/intc/eiointc.c
+@@ -439,7 +439,7 @@ static int kvm_eiointc_ctrl_access(struc
+ spin_lock_irqsave(&s->lock, flags);
+ switch (type) {
+ case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU:
+- if (val >= EIOINTC_ROUTE_MAX_VCPUS)
++ if (val > EIOINTC_ROUTE_MAX_VCPUS)
+ ret = -EINVAL;
+ else
+ s->num_cpu = val;
--- /dev/null
+From 5001bcf86edf2de02f025a0f789bcac37fa040e6 Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:09 +0800
+Subject: LoongArch: KVM: Restore guest PMU if it is enabled
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit 5001bcf86edf2de02f025a0f789bcac37fa040e6 upstream.
+
+On LoongArch system, guest PMU hardware is shared by guest and host but
+PMU interrupt is separated. PMU is pass-through to VM, and there is PMU
+context switch when exit to host and return to guest.
+
+There is optimiation to check whether PMU is enabled by guest. If not,
+it is not necessary to return to guest. However, if it is enabled, PMU
+context for guest need switch on. Now KVM_REQ_PMU notification is set
+on vCPU context switch, but it is missing if there is no vCPU context
+switch while PMU is used by guest VM, so fix it.
+
+Cc: <stable@vger.kernel.org>
+Fixes: f4e40ea9f78f ("LoongArch: KVM: Add PMU support for guest")
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/vcpu.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/loongarch/kvm/vcpu.c
++++ b/arch/loongarch/kvm/vcpu.c
+@@ -133,6 +133,9 @@ static void kvm_lose_pmu(struct kvm_vcpu
+ * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when
+ * exiting the guest, so that the next time trap into the guest.
+ * We don't need to deal with PMU CSRs contexts.
++ *
++ * Otherwise set the request bit KVM_REQ_PMU to restore guest PMU
++ * before entering guest VM
+ */
+ val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
+ val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
+@@ -140,6 +143,8 @@ static void kvm_lose_pmu(struct kvm_vcpu
+ val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
+ if (!(val & KVM_PMU_EVENT_ENABLED))
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU;
++ else
++ kvm_make_request(KVM_REQ_PMU, vcpu);
+
+ kvm_restore_host_pmu(vcpu);
+ }
--- /dev/null
+From a073d637c8cfbfbab39b7272226a3fbf3b887580 Mon Sep 17 00:00:00 2001
+From: Tianyang Zhang <zhangtianyang@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:01 +0800
+Subject: LoongArch: Let {pte,pmd}_modify() record the status of _PAGE_DIRTY
+
+From: Tianyang Zhang <zhangtianyang@loongson.cn>
+
+commit a073d637c8cfbfbab39b7272226a3fbf3b887580 upstream.
+
+Now if the PTE/PMD is dirty with _PAGE_DIRTY but without _PAGE_MODIFIED,
+after {pte,pmd}_modify() we lose _PAGE_DIRTY, then {pte,pmd}_dirty()
+return false and lead to data loss. This can happen in certain scenarios
+such as HW PTW doesn't set _PAGE_MODIFIED automatically, so here we need
+_PAGE_MODIFIED to record the dirty status (_PAGE_DIRTY).
+
+The new modification involves checking whether the original PTE/PMD has
+the _PAGE_DIRTY flag. If it exists, the _PAGE_MODIFIED bit is also set,
+ensuring that the {pte,pmd}_dirty() interface can always return accurate
+information.
+
+Cc: stable@vger.kernel.org
+Co-developed-by: Liupu Wang <wangliupu@loongson.cn>
+Signed-off-by: Liupu Wang <wangliupu@loongson.cn>
+Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/include/asm/pgtable.h | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/loongarch/include/asm/pgtable.h
++++ b/arch/loongarch/include/asm/pgtable.h
+@@ -424,6 +424,9 @@ static inline unsigned long pte_accessib
+
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
++ if (pte_val(pte) & _PAGE_DIRTY)
++ pte_val(pte) |= _PAGE_MODIFIED;
++
+ return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
+ (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
+ }
+@@ -547,9 +550,11 @@ static inline struct page *pmd_page(pmd_
+
+ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+ {
+- pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) |
+- (pgprot_val(newprot) & ~_HPAGE_CHG_MASK);
+- return pmd;
++ if (pmd_val(pmd) & _PAGE_DIRTY)
++ pmd_val(pmd) |= _PAGE_MODIFIED;
++
++ return __pmd((pmd_val(pmd) & _HPAGE_CHG_MASK) |
++ (pgprot_val(newprot) & ~_HPAGE_CHG_MASK));
+ }
+
+ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
--- /dev/null
+From eeeeaafa62ea0cd4b86390f657dc0aea73bff4f5 Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Sun, 9 Nov 2025 16:02:01 +0800
+Subject: LoongArch: Use correct accessor to read FWPC/MWPC
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit eeeeaafa62ea0cd4b86390f657dc0aea73bff4f5 upstream.
+
+CSR.FWPC and CSR.MWPC are 32bit registers, so use csr_read32() rather
+than csr_read64() to read the values of FWPC/MWPC.
+
+Cc: stable@vger.kernel.org
+Fixes: edffa33c7bb5a73 ("LoongArch: Add hardware breakpoints/watchpoints support")
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/include/asm/hw_breakpoint.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/loongarch/include/asm/hw_breakpoint.h
++++ b/arch/loongarch/include/asm/hw_breakpoint.h
+@@ -134,13 +134,13 @@ static inline void hw_breakpoint_thread_
+ /* Determine number of BRP registers available. */
+ static inline int get_num_brps(void)
+ {
+- return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM;
++ return csr_read32(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM;
+ }
+
+ /* Determine number of WRP registers available. */
+ static inline int get_num_wrps(void)
+ {
+- return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM;
++ return csr_read32(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM;
+ }
+
+ #endif /* __KERNEL__ */
--- /dev/null
+From 91a54090026f84ceffaa12ac53c99b9f162946f6 Mon Sep 17 00:00:00 2001
+From: Martin Kaiser <martin@kaiser.cx>
+Date: Thu, 30 Oct 2025 16:55:05 +0100
+Subject: maple_tree: fix tracepoint string pointers
+
+From: Martin Kaiser <martin@kaiser.cx>
+
+commit 91a54090026f84ceffaa12ac53c99b9f162946f6 upstream.
+
+maple_tree tracepoints contain pointers to function names. Such a pointer
+is saved when a tracepoint logs an event. There's no guarantee that it's
+still valid when the event is parsed later and the pointer is dereferenced.
+
+The kernel warns about these unsafe pointers.
+
+ event 'ma_read' has unsafe pointer field 'fn'
+ WARNING: kernel/trace/trace.c:3779 at ignore_event+0x1da/0x1e4
+
+Mark the function names as tracepoint_string() to fix the events.
+
+One case that doesn't work without my patch would be trace-cmd record
+to save the binary ringbuffer and trace-cmd report to parse it in
+userspace. The address of __func__ can't be dereferenced from
+userspace but tracepoint_string will add an entry to
+/sys/kernel/tracing/printk_formats
+
+Link: https://lkml.kernel.org/r/20251030155537.87972-1-martin@kaiser.cx
+Fixes: 54a611b60590 ("Maple Tree: add new data structure")
+Signed-off-by: Martin Kaiser <martin@kaiser.cx>
+Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/maple_tree.c | 30 ++++++++++++++++--------------
+ 1 file changed, 16 insertions(+), 14 deletions(-)
+
+--- a/lib/maple_tree.c
++++ b/lib/maple_tree.c
+@@ -64,6 +64,8 @@
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/maple_tree.h>
+
++#define TP_FCT tracepoint_string(__func__)
++
+ /*
+ * Kernel pointer hashing renders much of the maple tree dump useless as tagged
+ * pointers get hashed to arbitrary values.
+@@ -2976,7 +2978,7 @@ static inline void mas_rebalance(struct
+ MA_STATE(l_mas, mas->tree, mas->index, mas->last);
+ MA_STATE(r_mas, mas->tree, mas->index, mas->last);
+
+- trace_ma_op(__func__, mas);
++ trace_ma_op(TP_FCT, mas);
+
+ /*
+ * Rebalancing occurs if a node is insufficient. Data is rebalanced
+@@ -3337,7 +3339,7 @@ static void mas_split(struct ma_state *m
+ MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
+ MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
+
+- trace_ma_op(__func__, mas);
++ trace_ma_op(TP_FCT, mas);
+
+ mast.l = &l_mas;
+ mast.r = &r_mas;
+@@ -3512,7 +3514,7 @@ static bool mas_is_span_wr(struct ma_wr_
+ return false;
+ }
+
+- trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry);
++ trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry);
+ return true;
+ }
+
+@@ -3756,7 +3758,7 @@ static noinline void mas_wr_spanning_sto
+ * of data may happen.
+ */
+ mas = wr_mas->mas;
+- trace_ma_op(__func__, mas);
++ trace_ma_op(TP_FCT, mas);
+
+ if (unlikely(!mas->index && mas->last == ULONG_MAX))
+ return mas_new_root(mas, wr_mas->entry);
+@@ -3894,7 +3896,7 @@ done:
+ } else {
+ memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
+ }
+- trace_ma_write(__func__, mas, 0, wr_mas->entry);
++ trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
+ mas_update_gap(mas);
+ mas->end = new_end;
+ return;
+@@ -3938,7 +3940,7 @@ static inline void mas_wr_slot_store(str
+ mas->offset++; /* Keep mas accurate. */
+ }
+
+- trace_ma_write(__func__, mas, 0, wr_mas->entry);
++ trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
+ /*
+ * Only update gap when the new entry is empty or there is an empty
+ * entry in the original two ranges.
+@@ -4059,7 +4061,7 @@ static inline void mas_wr_append(struct
+ mas_update_gap(mas);
+
+ mas->end = new_end;
+- trace_ma_write(__func__, mas, new_end, wr_mas->entry);
++ trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry);
+ return;
+ }
+
+@@ -4073,7 +4075,7 @@ static void mas_wr_bnode(struct ma_wr_st
+ {
+ struct maple_big_node b_node;
+
+- trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
++ trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry);
+ memset(&b_node, 0, sizeof(struct maple_big_node));
+ mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
+ mas_commit_b_node(wr_mas, &b_node);
+@@ -5405,7 +5407,7 @@ void *mas_store(struct ma_state *mas, vo
+ int request;
+ MA_WR_STATE(wr_mas, mas, entry);
+
+- trace_ma_write(__func__, mas, 0, entry);
++ trace_ma_write(TP_FCT, mas, 0, entry);
+ #ifdef CONFIG_DEBUG_MAPLE_TREE
+ if (MAS_WARN_ON(mas, mas->index > mas->last))
+ pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last,
+@@ -5506,7 +5508,7 @@ void mas_store_prealloc(struct ma_state
+ }
+
+ store:
+- trace_ma_write(__func__, mas, 0, entry);
++ trace_ma_write(TP_FCT, mas, 0, entry);
+ mas_wr_store_entry(&wr_mas);
+ MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
+ mas_destroy(mas);
+@@ -6319,7 +6321,7 @@ void *mtree_load(struct maple_tree *mt,
+ MA_STATE(mas, mt, index, index);
+ void *entry;
+
+- trace_ma_read(__func__, &mas);
++ trace_ma_read(TP_FCT, &mas);
+ rcu_read_lock();
+ retry:
+ entry = mas_start(&mas);
+@@ -6362,7 +6364,7 @@ int mtree_store_range(struct maple_tree
+ MA_STATE(mas, mt, index, last);
+ int ret = 0;
+
+- trace_ma_write(__func__, &mas, 0, entry);
++ trace_ma_write(TP_FCT, &mas, 0, entry);
+ if (WARN_ON_ONCE(xa_is_advanced(entry)))
+ return -EINVAL;
+
+@@ -6585,7 +6587,7 @@ void *mtree_erase(struct maple_tree *mt,
+ void *entry = NULL;
+
+ MA_STATE(mas, mt, index, index);
+- trace_ma_op(__func__, &mas);
++ trace_ma_op(TP_FCT, &mas);
+
+ mtree_lock(mt);
+ entry = mas_erase(&mas);
+@@ -6923,7 +6925,7 @@ void *mt_find(struct maple_tree *mt, uns
+ unsigned long copy = *index;
+ #endif
+
+- trace_ma_read(__func__, &mas);
++ trace_ma_read(TP_FCT, &mas);
+
+ if ((*index) > max)
+ return NULL;
--- /dev/null
+From 4d3dbc2386fe051e44efad663e0ec828b98ab53f Mon Sep 17 00:00:00 2001
+From: Olga Kornievskaia <okorniev@redhat.com>
+Date: Thu, 9 Oct 2025 16:37:59 -0400
+Subject: nfsd: add missing FATTR4_WORD2_CLONE_BLKSIZE from supported attributes
+
+From: Olga Kornievskaia <okorniev@redhat.com>
+
+commit 4d3dbc2386fe051e44efad663e0ec828b98ab53f upstream.
+
+RFC 7862 Section 4.1.2 says that if the server supports CLONE it MUST
+support clone_blksize attribute.
+
+Fixes: d6ca7d2643ee ("NFSD: Implement FATTR4_CLONE_BLKSIZE attribute")
+Cc: stable@vger.kernel.org
+Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfsd.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -455,6 +455,7 @@ enum {
+ #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
+ (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
+ FATTR4_WORD2_MODE_UMASK | \
++ FATTR4_WORD2_CLONE_BLKSIZE | \
+ NFSD4_2_SECURITY_ATTRS | \
+ FATTR4_WORD2_XATTR_SUPPORT | \
+ FATTR4_WORD2_TIME_DELEG_ACCESS | \
--- /dev/null
+From 8a7348a9ed70bda1c1f51d3f1815bcbdf9f3b38c Mon Sep 17 00:00:00 2001
+From: NeilBrown <neil@brown.name>
+Date: Wed, 8 Oct 2025 09:52:25 -0400
+Subject: nfsd: fix refcount leak in nfsd_set_fh_dentry()
+
+From: NeilBrown <neil@brown.name>
+
+commit 8a7348a9ed70bda1c1f51d3f1815bcbdf9f3b38c upstream.
+
+nfsd exports a "pseudo root filesystem" which is used by NFSv4 to find
+the various exported filesystems using LOOKUP requests from a known root
+filehandle. NFSv3 uses the MOUNT protocol to find those exported
+filesystems and so is not given access to the pseudo root filesystem.
+
+If a v3 (or v2) client uses a filehandle from that filesystem,
+nfsd_set_fh_dentry() will report an error, but still stores the export
+in "struct svc_fh" even though it also drops the reference (exp_put()).
+This means that when fh_put() is called an extra reference will be dropped
+which can lead to use-after-free and possible denial of service.
+
+Normal NFS usage will not provide a pseudo-root filehandle to a v3
+client. This bug can only be triggered by the client synthesising an
+incorrect filehandle.
+
+To fix this we move the assignments to the svc_fh later, after all
+possible error cases have been detected.
+
+Reported-and-tested-by: tianshuo han <hantianshuo233@gmail.com>
+Fixes: ef7f6c4904d0 ("nfsd: move V4ROOT version check to nfsd_set_fh_dentry()")
+Signed-off-by: NeilBrown <neil@brown.name>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfsfh.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -269,9 +269,6 @@ static __be32 nfsd_set_fh_dentry(struct
+ dentry);
+ }
+
+- fhp->fh_dentry = dentry;
+- fhp->fh_export = exp;
+-
+ switch (fhp->fh_maxsize) {
+ case NFS4_FHSIZE:
+ if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR)
+@@ -293,6 +290,9 @@ static __be32 nfsd_set_fh_dentry(struct
+ goto out;
+ }
+
++ fhp->fh_dentry = dentry;
++ fhp->fh_export = exp;
++
+ return 0;
+ out:
+ exp_put(exp);
--- /dev/null
+From 4aa17144d5abc3c756883e3a010246f0dba8b468 Mon Sep 17 00:00:00 2001
+From: Olga Kornievskaia <okorniev@redhat.com>
+Date: Tue, 14 Oct 2025 13:59:59 -0400
+Subject: NFSD: free copynotify stateid in nfs4_free_ol_stateid()
+
+From: Olga Kornievskaia <okorniev@redhat.com>
+
+commit 4aa17144d5abc3c756883e3a010246f0dba8b468 upstream.
+
+Typically copynotify stateid is freed either when parent's stateid
+is being close/freed or in nfsd4_laundromat if the stateid hasn't
+been used in a lease period.
+
+However, in case when the server got an OPEN (which created
+a parent stateid), followed by a COPY_NOTIFY using that stateid,
+followed by a client reboot. New client instance while doing
+CREATE_SESSION would force expire previous state of this client.
+It leads to the open state being freed thru release_openowner->
+nfs4_free_ol_stateid() and it finds that it still has copynotify
+stateid associated with it. We currently print a warning and is
+triggerred
+
+WARNING: CPU: 1 PID: 8858 at fs/nfsd/nfs4state.c:1550 nfs4_free_ol_stateid+0xb0/0x100 [nfsd]
+
+This patch, instead, frees the associated copynotify stateid here.
+
+If the parent stateid is freed (without freeing the copynotify
+stateids associated with it), it leads to the list corruption
+when laundromat ends up freeing the copynotify state later.
+
+[ 1626.839430] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
+[ 1626.842828] Modules linked in: nfnetlink_queue nfnetlink_log bluetooth cfg80211 rpcrdma rdma_cm iw_cm ib_cm ib_core nfsd nfs_acl lockd grace nfs_localio ext4 crc16 mbcache jbd2 overlay uinput snd_seq_dummy snd_hrtimer qrtr rfkill vfat fat uvcvideo snd_hda_codec_generic videobuf2_vmalloc videobuf2_memops snd_hda_intel uvc snd_intel_dspcfg videobuf2_v4l2 videobuf2_common snd_hda_codec snd_hda_core videodev snd_hwdep snd_seq mc snd_seq_device snd_pcm snd_timer snd soundcore sg loop auth_rpcgss vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vmw_vmci vsock xfs 8021q garp stp llc mrp nvme ghash_ce e1000e nvme_core sr_mod nvme_keyring nvme_auth cdrom vmwgfx drm_ttm_helper ttm sunrpc dm_mirror dm_region_hash dm_log iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi fuse dm_multipath dm_mod nfnetlink
+[ 1626.855594] CPU: 2 UID: 0 PID: 199 Comm: kworker/u24:33 Kdump: loaded Tainted: G B W 6.17.0-rc7+ #22 PREEMPT(voluntary)
+[ 1626.857075] Tainted: [B]=BAD_PAGE, [W]=WARN
+[ 1626.857573] Hardware name: VMware, Inc. VMware20,1/VBSA, BIOS VMW201.00V.24006586.BA64.2406042154 06/04/2024
+[ 1626.858724] Workqueue: nfsd4 laundromat_main [nfsd]
+[ 1626.859304] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
+[ 1626.860010] pc : __list_del_entry_valid_or_report+0x148/0x200
+[ 1626.860601] lr : __list_del_entry_valid_or_report+0x148/0x200
+[ 1626.861182] sp : ffff8000881d7a40
+[ 1626.861521] x29: ffff8000881d7a40 x28: 0000000000000018 x27: ffff0000c2a98200
+[ 1626.862260] x26: 0000000000000600 x25: 0000000000000000 x24: ffff8000881d7b20
+[ 1626.862986] x23: ffff0000c2a981e8 x22: 1fffe00012410e7d x21: ffff0000920873e8
+[ 1626.863701] x20: ffff0000920873e8 x19: ffff000086f22998 x18: 0000000000000000
+[ 1626.864421] x17: 20747562202c3839 x16: 3932326636383030 x15: 3030666666662065
+[ 1626.865092] x14: 6220646c756f6873 x13: 0000000000000001 x12: ffff60004fd9e4a3
+[ 1626.865713] x11: 1fffe0004fd9e4a2 x10: ffff60004fd9e4a2 x9 : dfff800000000000
+[ 1626.866320] x8 : 00009fffb0261b5e x7 : ffff00027ecf2513 x6 : 0000000000000001
+[ 1626.866938] x5 : ffff00027ecf2510 x4 : ffff60004fd9e4a3 x3 : 0000000000000000
+[ 1626.867553] x2 : 0000000000000000 x1 : ffff000096069640 x0 : 000000000000006d
+[ 1626.868167] Call trace:
+[ 1626.868382] __list_del_entry_valid_or_report+0x148/0x200 (P)
+[ 1626.868876] _free_cpntf_state_locked+0xd0/0x268 [nfsd]
+[ 1626.869368] nfs4_laundromat+0x6f8/0x1058 [nfsd]
+[ 1626.869813] laundromat_main+0x24/0x60 [nfsd]
+[ 1626.870231] process_one_work+0x584/0x1050
+[ 1626.870595] worker_thread+0x4c4/0xc60
+[ 1626.870893] kthread+0x2f8/0x398
+[ 1626.871146] ret_from_fork+0x10/0x20
+[ 1626.871422] Code: aa1303e1 aa1403e3 910e8000 97bc55d7 (d4210000)
+[ 1626.871892] SMP: stopping secondary CPUs
+
+Reported-by: rtm@csail.mit.edu
+Closes: https://lore.kernel.org/linux-nfs/d8f064c1-a26f-4eed-b4f0-1f7f608f415f@oracle.com/T/#t
+Fixes: 624322f1adc5 ("NFSD add COPY_NOTIFY operation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4state.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -1505,7 +1505,8 @@ static void nfs4_free_ol_stateid(struct
+ release_all_access(stp);
+ if (stp->st_stateowner)
+ nfs4_put_stateowner(stp->st_stateowner);
+- WARN_ON(!list_empty(&stid->sc_cp_list));
++ if (!list_empty(&stid->sc_cp_list))
++ nfs4_free_cpntf_statelist(stid->sc_client->net, stid);
+ kmem_cache_free(stateid_slab, stid);
+ }
+
pwm-adp5585-correct-mismatched-pwm-chip-info.patch
hid-playstation-fix-memory-leak-in-dualshock4_get_ca.patch
hid-uclogic-fix-potential-memory-leak-in-error-path.patch
+loongarch-kvm-restore-guest-pmu-if-it-is-enabled.patch
+loongarch-kvm-add-delay-until-timer-interrupt-injected.patch
+loongarch-kvm-fix-max-supported-vcpus-set-with-eiointc.patch
+kvm-guest_memfd-remove-bindings-on-memslot-deletion-when-gmem-is-dying.patch
+kvm-arm64-make-all-32bit-id-registers-fully-writable.patch
+kvm-svm-mark-vmcb_lbr-dirty-when-msr_ia32_debugctlmsr-is-updated.patch
+kvm-nsvm-always-recalculate-lbr-msr-intercepts-in-svm_update_lbrv.patch
+kvm-nsvm-fix-and-simplify-lbr-virtualization-handling-with-nested.patch
+kvm-vmx-fix-check-for-valid-gva-on-an-ept-violation.patch
+nfsd-fix-refcount-leak-in-nfsd_set_fh_dentry.patch
+nfsd-add-missing-fattr4_word2_clone_blksize-from-supported-attributes.patch
+nfsd-free-copynotify-stateid-in-nfs4_free_ol_stateid.patch
+gcov-add-support-for-gcc-15.patch
+ksmbd-close-accepted-socket-when-per-ip-limit-rejects-connection.patch
+ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
+kho-warn-and-fail-on-metadata-or-preserved-memory-in-scratch-area.patch
+kho-increase-metadata-bitmap-size-to-page_size.patch
+kho-allocate-metadata-directly-from-the-buddy-allocator.patch
+kho-warn-and-exit-when-unpreserved-page-wasn-t-preserved.patch
+strparser-fix-signed-unsigned-mismatch-bug.patch
+dma-mapping-benchmark-restore-padding-to-ensure-uabi-remained-consistent.patch
+maple_tree-fix-tracepoint-string-pointers.patch
+loongarch-consolidate-early_ioremap-ioremap_prot.patch
+loongarch-use-correct-accessor-to-read-fwpc-mwpc.patch
+loongarch-let-pte-pmd-_modify-record-the-status-of-_page_dirty.patch
--- /dev/null
+From 4da4e4bde1c453ac5cc2dce5def81d504ae257ee Mon Sep 17 00:00:00 2001
+From: Nate Karstens <nate.karstens@garmin.com>
+Date: Thu, 6 Nov 2025 16:28:33 -0600
+Subject: strparser: Fix signed/unsigned mismatch bug
+
+From: Nate Karstens <nate.karstens@garmin.com>
+
+commit 4da4e4bde1c453ac5cc2dce5def81d504ae257ee upstream.
+
+The `len` member of the sk_buff is an unsigned int. This is cast to
+`ssize_t` (a signed type) for the first sk_buff in the comparison,
+but not the second sk_buff. On 32-bit systems, this can result in
+an integer underflow for certain values because unsigned arithmetic
+is being used.
+
+This appears to be an oversight: if the intention was to use unsigned
+arithmetic, then the first cast would have been omitted. The change
+ensures both len values are cast to `ssize_t`.
+
+The underflow causes an issue with ktls when multiple TLS PDUs are
+included in a single TCP segment. The mainline kernel does not use
+strparser for ktls anymore, but this is still useful for other
+features that still use strparser, and for backporting.
+
+Signed-off-by: Nate Karstens <nate.karstens@garmin.com>
+Cc: stable@vger.kernel.org
+Fixes: 43a0c6751a32 ("strparser: Stream parser for messages")
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
+Link: https://patch.msgid.link/20251106222835.1871628-1-nate.karstens@garmin.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/strparser/strparser.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/strparser/strparser.c
++++ b/net/strparser/strparser.c
+@@ -238,7 +238,7 @@ static int __strp_recv(read_descriptor_t
+ strp_parser_err(strp, -EMSGSIZE, desc);
+ break;
+ } else if (len <= (ssize_t)head->len -
+- skb->len - stm->strp.offset) {
++ (ssize_t)skb->len - stm->strp.offset) {
+ /* Length must be into new skb (and also
+ * greater than zero)
+ */