+++ /dev/null
-From fa759cd75bce5489eed34596daa53f721849a86f Mon Sep 17 00:00:00 2001
-From: Pasha Tatashin <pasha.tatashin@soleen.com>
-Date: Mon, 20 Oct 2025 20:08:52 -0400
-Subject: kho: allocate metadata directly from the buddy allocator
-
-From: Pasha Tatashin <pasha.tatashin@soleen.com>
-
-commit fa759cd75bce5489eed34596daa53f721849a86f upstream.
-
-KHO allocates metadata for its preserved memory map using the slab
-allocator via kzalloc(). This metadata is temporary and is used by the
-next kernel during early boot to find preserved memory.
-
-A problem arises when KFENCE is enabled. kzalloc() calls can be randomly
-intercepted by kfence_alloc(), which services the allocation from a
-dedicated KFENCE memory pool. This pool is allocated early in boot via
-memblock.
-
-When booting via KHO, the memblock allocator is restricted to a "scratch
-area", forcing the KFENCE pool to be allocated within it. This creates a
-conflict, as the scratch area is expected to be ephemeral and
-overwriteable by a subsequent kexec. If KHO metadata is placed in this
-KFENCE pool, it leads to memory corruption when the next kernel is loaded.
-
-To fix this, modify KHO to allocate its metadata directly from the buddy
-allocator instead of slab.
-
-Link: https://lkml.kernel.org/r/20251021000852.2924827-4-pasha.tatashin@soleen.com
-Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
-Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
-Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
-Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
-Reviewed-by: David Matlack <dmatlack@google.com>
-Cc: Alexander Graf <graf@amazon.com>
-Cc: Christian Brauner <brauner@kernel.org>
-Cc: Jason Gunthorpe <jgg@ziepe.ca>
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Masahiro Yamada <masahiroy@kernel.org>
-Cc: Miguel Ojeda <ojeda@kernel.org>
-Cc: Randy Dunlap <rdunlap@infradead.org>
-Cc: Samiullah Khawaja <skhawaja@google.com>
-Cc: Tejun Heo <tj@kernel.org>
-Cc: <stable@vger.kernel.org>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/linux/gfp.h | 3 +++
- kernel/kexec_handover.c | 6 +++---
- 2 files changed, 6 insertions(+), 3 deletions(-)
-
---- a/include/linux/gfp.h
-+++ b/include/linux/gfp.h
-@@ -7,6 +7,7 @@
- #include <linux/mmzone.h>
- #include <linux/topology.h>
- #include <linux/alloc_tag.h>
-+#include <linux/cleanup.h>
- #include <linux/sched.h>
-
- struct vm_area_struct;
-@@ -463,4 +464,6 @@ static inline struct folio *folio_alloc_
- /* This should be paired with folio_put() rather than free_contig_range(). */
- #define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
-
-+DEFINE_FREE(free_page, void *, free_page((unsigned long)_T))
-+
- #endif /* __LINUX_GFP_H */
---- a/kernel/kexec_handover.c
-+++ b/kernel/kexec_handover.c
-@@ -102,7 +102,7 @@ static void *xa_load_or_alloc(struct xar
- if (res)
- return res;
-
-- void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL);
-+ void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
-
- if (!elm)
- return ERR_PTR(-ENOMEM);
-@@ -266,9 +266,9 @@ static_assert(sizeof(struct khoser_mem_c
- static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
- unsigned long order)
- {
-- struct khoser_mem_chunk *chunk __free(kfree) = NULL;
-+ struct khoser_mem_chunk *chunk __free(free_page) = NULL;
-
-- chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
-+ chunk = (void *)get_zeroed_page(GFP_KERNEL);
- if (!chunk)
- return ERR_PTR(-ENOMEM);
-
+++ /dev/null
-From a2fff99f92dae9c0eaf0d75de3def70ec68dad92 Mon Sep 17 00:00:00 2001
-From: Pasha Tatashin <pasha.tatashin@soleen.com>
-Date: Mon, 20 Oct 2025 20:08:51 -0400
-Subject: kho: increase metadata bitmap size to PAGE_SIZE
-
-From: Pasha Tatashin <pasha.tatashin@soleen.com>
-
-commit a2fff99f92dae9c0eaf0d75de3def70ec68dad92 upstream.
-
-KHO memory preservation metadata is preserved in 512 byte chunks which
-requires their allocation from slab allocator. Slabs are not safe to be
-used with KHO because of kfence, and because partial slabs may lead leaks
-to the next kernel. Change the size to be PAGE_SIZE.
-
-The kfence specifically may cause memory corruption, where it randomly
-provides slab objects that can be within the scratch area. The reason for
-that is that kfence allocates its objects prior to KHO scratch is marked
-as CMA region.
-
-While this change could potentially increase metadata overhead on systems
-with sparsely preserved memory, this is being mitigated by ongoing work to
-reduce sparseness during preservation via 1G guest pages. Furthermore,
-this change aligns with future work on a stateless KHO, which will also
-use page-sized bitmaps for its radix tree metadata.
-
-Link: https://lkml.kernel.org/r/20251021000852.2924827-3-pasha.tatashin@soleen.com
-Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
-Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
-Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
-Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
-Cc: Alexander Graf <graf@amazon.com>
-Cc: Christian Brauner <brauner@kernel.org>
-Cc: David Matlack <dmatlack@google.com>
-Cc: Jason Gunthorpe <jgg@ziepe.ca>
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Masahiro Yamada <masahiroy@kernel.org>
-Cc: Miguel Ojeda <ojeda@kernel.org>
-Cc: Randy Dunlap <rdunlap@infradead.org>
-Cc: Samiullah Khawaja <skhawaja@google.com>
-Cc: Tejun Heo <tj@kernel.org>
-Cc: <stable@vger.kernel.org>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/kexec_handover.c | 21 +++++++++++----------
- 1 file changed, 11 insertions(+), 10 deletions(-)
-
---- a/kernel/kexec_handover.c
-+++ b/kernel/kexec_handover.c
-@@ -52,10 +52,10 @@ early_param("kho", kho_parse_enable);
- * Keep track of memory that is to be preserved across KHO.
- *
- * The serializing side uses two levels of xarrays to manage chunks of per-order
-- * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a
-- * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations
-- * each bitmap will cover 16M of address space. Thus, for 16G of memory at most
-- * 512K of bitmap memory will be needed for order 0.
-+ * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order
-+ * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0
-+ * allocations each bitmap will cover 128M of address space. Thus, for 16G of
-+ * memory at most 512K of bitmap memory will be needed for order 0.
- *
- * This approach is fully incremental, as the serialization progresses folios
- * can continue be aggregated to the tracker. The final step, immediately prior
-@@ -63,12 +63,14 @@ early_param("kho", kho_parse_enable);
- * successor kernel to parse.
- */
-
--#define PRESERVE_BITS (512 * 8)
-+#define PRESERVE_BITS (PAGE_SIZE * 8)
-
- struct kho_mem_phys_bits {
- DECLARE_BITMAP(preserve, PRESERVE_BITS);
- };
-
-+static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE);
-+
- struct kho_mem_phys {
- /*
- * Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized
-@@ -93,19 +95,19 @@ struct kho_serialization {
- struct khoser_mem_chunk *preserved_mem_map;
- };
-
--static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
-+static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
- {
- void *res = xa_load(xa, index);
-
- if (res)
- return res;
-
-- void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL);
-+ void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL);
-
- if (!elm)
- return ERR_PTR(-ENOMEM);
-
-- if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz)))
-+ if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE)))
- return ERR_PTR(-EINVAL);
-
- res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
-@@ -175,8 +177,7 @@ static int __kho_preserve_order(struct k
- }
- }
-
-- bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS,
-- sizeof(*bits));
-+ bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
- if (IS_ERR(bits))
- return PTR_ERR(bits);
-
+++ /dev/null
-From e38f65d317df1fd2dcafe614d9c537475ecf9992 Mon Sep 17 00:00:00 2001
-From: Pasha Tatashin <pasha.tatashin@soleen.com>
-Date: Mon, 20 Oct 2025 20:08:50 -0400
-Subject: kho: warn and fail on metadata or preserved memory in scratch area
-
-From: Pasha Tatashin <pasha.tatashin@soleen.com>
-
-commit e38f65d317df1fd2dcafe614d9c537475ecf9992 upstream.
-
-Patch series "KHO: kfence + KHO memory corruption fix", v3.
-
-This series fixes a memory corruption bug in KHO that occurs when KFENCE
-is enabled.
-
-The root cause is that KHO metadata, allocated via kzalloc(), can be
-randomly serviced by kfence_alloc(). When a kernel boots via KHO, the
-early memblock allocator is restricted to a "scratch area". This forces
-the KFENCE pool to be allocated within this scratch area, creating a
-conflict. If KHO metadata is subsequently placed in this pool, it gets
-corrupted during the next kexec operation.
-
-Google is using KHO and have had obscure crashes due to this memory
-corruption, with stacks all over the place. I would prefer this fix to be
-properly backported to stable so we can also automatically consume it once
-we switch to the upstream KHO.
-
-Patch 1/3 introduces a debug-only feature (CONFIG_KEXEC_HANDOVER_DEBUG)
-that adds checks to detect and fail any operation that attempts to place
-KHO metadata or preserved memory within the scratch area. This serves as
-a validation and diagnostic tool to confirm the problem without affecting
-production builds.
-
-Patch 2/3 Increases bitmap to PAGE_SIZE, so buddy allocator can be used.
-
-Patch 3/3 Provides the fix by modifying KHO to allocate its metadata
-directly from the buddy allocator instead of slab. This bypasses the
-KFENCE interception entirely.
-
-
-This patch (of 3):
-
-It is invalid for KHO metadata or preserved memory regions to be located
-within the KHO scratch area, as this area is overwritten when the next
-kernel is loaded, and used early in boot by the next kernel. This can
-lead to memory corruption.
-
-Add checks to kho_preserve_* and KHO's internal metadata allocators
-(xa_load_or_alloc, new_chunk) to verify that the physical address of the
-memory does not overlap with any defined scratch region. If an overlap is
-detected, the operation will fail and a WARN_ON is triggered. To avoid
-performance overhead in production kernels, these checks are enabled only
-when CONFIG_KEXEC_HANDOVER_DEBUG is selected.
-
-[rppt@kernel.org: fix KEXEC_HANDOVER_DEBUG Kconfig dependency]
- Link: https://lkml.kernel.org/r/aQHUyyFtiNZhx8jo@kernel.org
-[pasha.tatashin@soleen.com: build fix]
- Link: https://lkml.kernel.org/r/CA+CK2bBnorfsTymKtv4rKvqGBHs=y=MjEMMRg_tE-RME6n-zUw@mail.gmail.com
-Link: https://lkml.kernel.org/r/20251021000852.2924827-1-pasha.tatashin@soleen.com
-Link: https://lkml.kernel.org/r/20251021000852.2924827-2-pasha.tatashin@soleen.com
-Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
-Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
-Signed-off-by: Mike Rapoport <rppt@kernel.org>
-Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
-Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
-Cc: Alexander Graf <graf@amazon.com>
-Cc: Christian Brauner <brauner@kernel.org>
-Cc: David Matlack <dmatlack@google.com>
-Cc: Jason Gunthorpe <jgg@ziepe.ca>
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Masahiro Yamada <masahiroy@kernel.org>
-Cc: Miguel Ojeda <ojeda@kernel.org>
-Cc: Randy Dunlap <rdunlap@infradead.org>
-Cc: Samiullah Khawaja <skhawaja@google.com>
-Cc: Tejun Heo <tj@kernel.org>
-Cc: <stable@vger.kernel.org>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/Kconfig.kexec | 9 ++++++
- kernel/Makefile | 1
- kernel/kexec_handover.c | 57 ++++++++++++++++++++++++++-------------
- kernel/kexec_handover_debug.c | 25 +++++++++++++++++
- kernel/kexec_handover_internal.h | 20 +++++++++++++
- 5 files changed, 93 insertions(+), 19 deletions(-)
- create mode 100644 kernel/kexec_handover_debug.c
- create mode 100644 kernel/kexec_handover_internal.h
-
---- a/kernel/Kconfig.kexec
-+++ b/kernel/Kconfig.kexec
-@@ -109,6 +109,15 @@ config KEXEC_HANDOVER
- to keep data or state alive across the kexec. For this to work,
- both source and target kernels need to have this option enabled.
-
-+config KEXEC_HANDOVER_DEBUG
-+ bool "Enable Kexec Handover debug checks"
-+ depends on KEXEC_HANDOVER
-+ help
-+ This option enables extra sanity checks for the Kexec Handover
-+ subsystem. Since, KHO performance is crucial in live update
-+ scenarios and the extra code might be adding overhead it is
-+ only optionally enabled.
-+
- config CRASH_DUMP
- bool "kernel crash dumps"
- default ARCH_DEFAULT_CRASH_DUMP
---- a/kernel/Makefile
-+++ b/kernel/Makefile
-@@ -82,6 +82,7 @@ obj-$(CONFIG_KEXEC) += kexec.o
- obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
- obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
- obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
-+obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o
- obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
- obj-$(CONFIG_COMPAT) += compat.o
- obj-$(CONFIG_CGROUPS) += cgroup/
---- a/kernel/kexec_handover.c
-+++ b/kernel/kexec_handover.c
-@@ -8,6 +8,7 @@
-
- #define pr_fmt(fmt) "KHO: " fmt
-
-+#include <linux/cleanup.h>
- #include <linux/cma.h>
- #include <linux/count_zeros.h>
- #include <linux/debugfs.h>
-@@ -21,6 +22,7 @@
-
- #include <asm/early_ioremap.h>
-
-+#include "kexec_handover_internal.h"
- /*
- * KHO is tightly coupled with mm init and needs access to some of mm
- * internal APIs.
-@@ -93,26 +95,26 @@ struct kho_serialization {
-
- static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
- {
-- void *elm, *res;
-+ void *res = xa_load(xa, index);
-
-- elm = xa_load(xa, index);
-- if (elm)
-- return elm;
-+ if (res)
-+ return res;
-+
-+ void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL);
-
-- elm = kzalloc(sz, GFP_KERNEL);
- if (!elm)
- return ERR_PTR(-ENOMEM);
-
-+ if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz)))
-+ return ERR_PTR(-EINVAL);
-+
- res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
- if (xa_is_err(res))
-- res = ERR_PTR(xa_err(res));
--
-- if (res) {
-- kfree(elm);
-+ return ERR_PTR(xa_err(res));
-+ else if (res)
- return res;
-- }
-
-- return elm;
-+ return no_free_ptr(elm);
- }
-
- static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
-@@ -263,15 +265,19 @@ static_assert(sizeof(struct khoser_mem_c
- static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
- unsigned long order)
- {
-- struct khoser_mem_chunk *chunk;
-+ struct khoser_mem_chunk *chunk __free(kfree) = NULL;
-
- chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!chunk)
-- return NULL;
-+ return ERR_PTR(-ENOMEM);
-+
-+ if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE)))
-+ return ERR_PTR(-EINVAL);
-+
- chunk->hdr.order = order;
- if (cur_chunk)
- KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
-- return chunk;
-+ return no_free_ptr(chunk);
- }
-
- static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
-@@ -292,14 +298,17 @@ static int kho_mem_serialize(struct kho_
- struct khoser_mem_chunk *chunk = NULL;
- struct kho_mem_phys *physxa;
- unsigned long order;
-+ int err = -ENOMEM;
-
- xa_for_each(&ser->track.orders, order, physxa) {
- struct kho_mem_phys_bits *bits;
- unsigned long phys;
-
- chunk = new_chunk(chunk, order);
-- if (!chunk)
-+ if (IS_ERR(chunk)) {
-+ err = PTR_ERR(chunk);
- goto err_free;
-+ }
-
- if (!first_chunk)
- first_chunk = chunk;
-@@ -309,8 +318,10 @@ static int kho_mem_serialize(struct kho_
-
- if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
- chunk = new_chunk(chunk, order);
-- if (!chunk)
-+ if (IS_ERR(chunk)) {
-+ err = PTR_ERR(chunk);
- goto err_free;
-+ }
- }
-
- elm = &chunk->bitmaps[chunk->hdr.num_elms];
-@@ -327,7 +338,7 @@ static int kho_mem_serialize(struct kho_
-
- err_free:
- kho_mem_ser_free(first_chunk);
-- return -ENOMEM;
-+ return err;
- }
-
- static void __init deserialize_bitmap(unsigned int order,
-@@ -380,8 +391,8 @@ static void __init kho_mem_deserialize(c
- * area for early allocations that happen before page allocator is
- * initialized.
- */
--static struct kho_scratch *kho_scratch;
--static unsigned int kho_scratch_cnt;
-+struct kho_scratch *kho_scratch;
-+unsigned int kho_scratch_cnt;
-
- /*
- * The scratch areas are scaled by default as percent of memory allocated from
-@@ -684,6 +695,9 @@ int kho_preserve_folio(struct folio *fol
- if (kho_out.finalized)
- return -EBUSY;
-
-+ if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
-+ return -EINVAL;
-+
- return __kho_preserve_order(track, pfn, order);
- }
- EXPORT_SYMBOL_GPL(kho_preserve_folio);
-@@ -713,6 +727,11 @@ int kho_preserve_phys(phys_addr_t phys,
- if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
- return -EINVAL;
-
-+ if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT,
-+ nr_pages << PAGE_SHIFT))) {
-+ return -EINVAL;
-+ }
-+
- while (pfn < end_pfn) {
- const unsigned int order =
- min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
---- /dev/null
-+++ b/kernel/kexec_handover_debug.c
-@@ -0,0 +1,25 @@
-+// SPDX-License-Identifier: GPL-2.0-only
-+/*
-+ * kexec_handover_debug.c - kexec handover optional debug functionality
-+ * Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin@soleen.com>
-+ */
-+
-+#define pr_fmt(fmt) "KHO: " fmt
-+
-+#include "kexec_handover_internal.h"
-+
-+bool kho_scratch_overlap(phys_addr_t phys, size_t size)
-+{
-+ phys_addr_t scratch_start, scratch_end;
-+ unsigned int i;
-+
-+ for (i = 0; i < kho_scratch_cnt; i++) {
-+ scratch_start = kho_scratch[i].addr;
-+ scratch_end = kho_scratch[i].addr + kho_scratch[i].size;
-+
-+ if (phys < scratch_end && (phys + size) > scratch_start)
-+ return true;
-+ }
-+
-+ return false;
-+}
---- /dev/null
-+++ b/kernel/kexec_handover_internal.h
-@@ -0,0 +1,20 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H
-+#define LINUX_KEXEC_HANDOVER_INTERNAL_H
-+
-+#include <linux/kexec_handover.h>
-+#include <linux/types.h>
-+
-+extern struct kho_scratch *kho_scratch;
-+extern unsigned int kho_scratch_cnt;
-+
-+#ifdef CONFIG_KEXEC_HANDOVER_DEBUG
-+bool kho_scratch_overlap(phys_addr_t phys, size_t size);
-+#else
-+static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size)
-+{
-+ return false;
-+}
-+#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */
-+
-+#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */