#ifndef _LINUX_KHO_ABI_KEXEC_HANDOVER_H
#define _LINUX_KHO_ABI_KEXEC_HANDOVER_H
+#include <linux/bits.h>
+#include <linux/log2.h>
+#include <linux/math.h>
#include <linux/types.h>
+#include <asm/page.h>
+
/**
* DOC: Kexec Handover ABI
*
* compatibility is only guaranteed for kernels supporting the same ABI version.
*
* FDT Structure Overview:
- * The FDT serves as a central registry for physical
- * addresses of preserved data structures and sub-FDTs. The first kernel
- * populates this FDT with references to memory regions and other FDTs that
- * need to persist across the kexec transition. The subsequent kernel then
- * parses this FDT to locate and restore the preserved data.::
+ * The FDT serves as a central registry for physical addresses of preserved
+ * data structures. The first kernel populates this FDT with references to
+ * memory regions and other metadata that need to persist across the kexec
+ * transition. The subsequent kernel then parses this FDT to locate and
+ * restore the preserved data.::
*
* / {
- * compatible = "kho-v1";
+ * compatible = "kho-v2";
*
* preserved-memory-map = <0x...>;
*
* <subnode-name-1> {
- * fdt = <0x...>;
+ * preserved-data = <0x...>;
* };
*
* <subnode-name-2> {
- * fdt = <0x...>;
+ * preserved-data = <0x...>;
* };
* ... ...
* <subnode-name-N> {
- * fdt = <0x...>;
+ * preserved-data = <0x...>;
* };
* };
*
* Root KHO Node (/):
- * - compatible: "kho-v1"
+ * - compatible: "kho-v2"
*
* Indentifies the overall KHO ABI version.
*
* is provided by the subsystem that uses KHO for preserving its
* data.
*
- * - fdt: u64
+ * - preserved-data: u64
*
- * Physical address pointing to a subnode FDT blob that is also
+ * Physical address pointing to a subnode data blob that is also
* being preserved.
*/
/* The compatible string for the KHO FDT root node. */
-#define KHO_FDT_COMPATIBLE "kho-v1"
+#define KHO_FDT_COMPATIBLE "kho-v2"
/* The FDT property for the preserved memory map. */
#define KHO_FDT_MEMORY_MAP_PROP_NAME "preserved-memory-map"
-/* The FDT property for sub-FDTs. */
-#define KHO_FDT_SUB_TREE_PROP_NAME "fdt"
+/* The FDT property for preserved data blobs. */
+#define KHO_FDT_SUB_TREE_PROP_NAME "preserved-data"
/**
* DOC: Kexec Handover ABI for vmalloc Preservation
unsigned short order;
};
+/**
+ * DOC: KHO persistent memory tracker
+ *
+ * KHO tracks preserved memory using a radix tree data structure. Each node of
+ * the tree is exactly a single page. The leaf nodes are bitmaps where each set
+ * bit is a preserved page of any order. The intermediate nodes are tables of
+ * physical addresses that point to a lower level node.
+ *
+ * The tree hierarchy is shown below::
+ *
+ * root
+ * +-------------------+
+ * | Level 5 | (struct kho_radix_node)
+ * +-------------------+
+ * |
+ * v
+ * +-------------------+
+ * | Level 4 | (struct kho_radix_node)
+ * +-------------------+
+ * |
+ * | ... (intermediate levels)
+ * |
+ * v
+ * +-------------------+
+ * | Level 0 | (struct kho_radix_leaf)
+ * +-------------------+
+ *
+ * The tree is traversed using a key that encodes the page's physical address
+ * (pa) and its order into a single unsigned long value. The encoded key value
+ * is composed of two parts: the 'order bit' in the upper part and the
+ * 'shifted physical address' in the lower part.::
+ *
+ * +------------+-----------------------------+--------------------------+
+ * | Page Order | Order Bit | Shifted Physical Address |
+ * +------------+-----------------------------+--------------------------+
+ * | 0 | ...000100 ... (at bit 52) | pa >> (PAGE_SHIFT + 0) |
+ * | 1 | ...000010 ... (at bit 51) | pa >> (PAGE_SHIFT + 1) |
+ * | 2 | ...000001 ... (at bit 50) | pa >> (PAGE_SHIFT + 2) |
+ * | ... | ... | ... |
+ * +------------+-----------------------------+--------------------------+
+ *
+ * Shifted Physical Address:
+ * The 'shifted physical address' is the physical address normalized for its
+ * order. It effectively represents the PFN shifted right by the order.
+ *
+ * Order Bit:
+ * The 'order bit' encodes the page order by setting a single bit at a
+ * specific position. The position of this bit itself represents the order.
+ *
+ * For instance, on a 64-bit system with 4KB pages (PAGE_SHIFT = 12), the
+ * maximum range for the shifted physical address (for order 0) is 52 bits
+ * (64 - 12). This address occupies bits [0-51]. For order 0, the order bit is
+ * set at position 52.
+ *
+ * The following diagram illustrates how the encoded key value is split into
+ * indices for the tree levels, with PAGE_SIZE of 4KB::
+ *
+ * 63:60 59:51 50:42 41:33 32:24 23:15 14:0
+ * +---------+--------+--------+--------+--------+--------+-----------------+
+ * | 0 | Lv 5 | Lv 4 | Lv 3 | Lv 2 | Lv 1 | Lv 0 (bitmap) |
+ * +---------+--------+--------+--------+--------+--------+-----------------+
+ *
+ * The radix tree stores pages of all orders in a single 6-level hierarchy. It
+ * efficiently shares higher tree levels, especially due to common zero top
+ * address bits, allowing a single, efficient algorithm to manage all
+ * pages. This bitmap approach also offers memory efficiency; for example, a
+ * 512KB bitmap can cover a 16GB memory range for 0-order pages with PAGE_SIZE =
+ * 4KB.
+ *
+ * The data structures defined here are part of the KHO ABI. Any modification
+ * to these structures that breaks backward compatibility must be accompanied by
+ * an update to the "compatible" string. This ensures that a newer kernel can
+ * correctly interpret the data passed by an older kernel.
+ */
+
+/*
+ * Defines constants for the KHO radix tree structure, used to track preserved
+ * memory. These constants govern the indexing, sizing, and depth of the tree.
+ */
+enum kho_radix_consts {
+ /*
+ * The bit position of the order bit (and also the length of the
+ * shifted physical address) for an order-0 page.
+ */
+ KHO_ORDER_0_LOG2 = 64 - PAGE_SHIFT,
+
+ /* Size of the table in kho_radix_node, in log2 */
+ KHO_TABLE_SIZE_LOG2 = const_ilog2(PAGE_SIZE / sizeof(phys_addr_t)),
+
+ /* Number of bits in the kho_radix_leaf bitmap, in log2 */
+ KHO_BITMAP_SIZE_LOG2 = PAGE_SHIFT + const_ilog2(BITS_PER_BYTE),
+
+ /*
+ * The total tree depth is the number of intermediate levels
+ * and 1 bitmap level.
+ */
+ KHO_TREE_MAX_DEPTH =
+ DIV_ROUND_UP(KHO_ORDER_0_LOG2 - KHO_BITMAP_SIZE_LOG2,
+ KHO_TABLE_SIZE_LOG2) + 1,
+};
+
+struct kho_radix_node {
+ u64 table[1 << KHO_TABLE_SIZE_LOG2];
+};
+
+struct kho_radix_leaf {
+ DECLARE_BITMAP(bitmap, 1 << KHO_BITMAP_SIZE_LOG2);
+};
+
#endif /* _LINUX_KHO_ABI_KEXEC_HANDOVER_H */
* Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@kernel.org>
* Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@google.com>
* Copyright (C) 2025 Pasha Tatashin <pasha.tatashin@soleen.com>
+ * Copyright (C) 2026 Google LLC, Jason Miu <jasonmiu@google.com>
*/
#define pr_fmt(fmt) "KHO: " fmt
#include <linux/count_zeros.h>
#include <linux/kexec.h>
#include <linux/kexec_handover.h>
+#include <linux/kho_radix_tree.h>
#include <linux/kho/abi/kexec_handover.h>
#include <linux/libfdt.h>
#include <linux/list.h>
}
early_param("kho", kho_parse_enable);
-/*
- * Keep track of memory that is to be preserved across KHO.
- *
- * The serializing side uses two levels of xarrays to manage chunks of per-order
- * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order
- * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0
- * allocations each bitmap will cover 128M of address space. Thus, for 16G of
- * memory at most 512K of bitmap memory will be needed for order 0.
- *
- * This approach is fully incremental, as the serialization progresses folios
- * can continue be aggregated to the tracker. The final step, immediately prior
- * to kexec would serialize the xarray information into a linked list for the
- * successor kernel to parse.
- */
-
-#define PRESERVE_BITS (PAGE_SIZE * 8)
-
-struct kho_mem_phys_bits {
- DECLARE_BITMAP(preserve, PRESERVE_BITS);
-};
-
-static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE);
-
-struct kho_mem_phys {
- /*
- * Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized
- * to order.
- */
- struct xarray phys_bits;
-};
-
-struct kho_mem_track {
- /* Points to kho_mem_phys, each order gets its own bitmap tree */
- struct xarray orders;
-};
-
-struct khoser_mem_chunk;
-
struct kho_out {
void *fdt;
bool finalized;
struct mutex lock; /* protects KHO FDT finalization */
- struct kho_mem_track track;
+ struct kho_radix_tree radix_tree;
struct kho_debugfs dbg;
};
static struct kho_out kho_out = {
.lock = __MUTEX_INITIALIZER(kho_out.lock),
- .track = {
- .orders = XARRAY_INIT(kho_out.track.orders, 0),
+ .radix_tree = {
+ .lock = __MUTEX_INITIALIZER(kho_out.radix_tree.lock),
},
.finalized = false,
};
-static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
+/**
+ * kho_radix_encode_key - Encodes a physical address and order into a radix key.
+ * @phys: The physical address of the page.
+ * @order: The order of the page.
+ *
+ * This function combines a page's physical address and its order into a
+ * single unsigned long, which is used as a key for all radix tree
+ * operations.
+ *
+ * Return: The encoded unsigned long radix key.
+ */
+static unsigned long kho_radix_encode_key(phys_addr_t phys, unsigned int order)
{
- void *res = xa_load(xa, index);
+ /* Order bits part */
+ unsigned long h = 1UL << (KHO_ORDER_0_LOG2 - order);
+ /* Shifted physical address part */
+ unsigned long l = phys >> (PAGE_SHIFT + order);
- if (res)
- return res;
+ return h | l;
+}
- void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
+/**
+ * kho_radix_decode_key - Decodes a radix key back into a physical address and order.
+ * @key: The unsigned long key to decode.
+ * @order: An output parameter, a pointer to an unsigned int where the decoded
+ * page order will be stored.
+ *
+ * This function reverses the encoding performed by kho_radix_encode_key(),
+ * extracting the original physical address and page order from a given key.
+ *
+ * Return: The decoded physical address.
+ */
+static phys_addr_t kho_radix_decode_key(unsigned long key, unsigned int *order)
+{
+ unsigned int order_bit = fls64(key);
+ phys_addr_t phys;
- if (!elm)
- return ERR_PTR(-ENOMEM);
+ /* order_bit is numbered starting at 1 from fls64 */
+ *order = KHO_ORDER_0_LOG2 - order_bit + 1;
+ /* The order is discarded by the shift */
+ phys = key << (PAGE_SHIFT + *order);
- if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE)))
- return ERR_PTR(-EINVAL);
+ return phys;
+}
- res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
- if (xa_is_err(res))
- return ERR_PTR(xa_err(res));
- else if (res)
- return res;
+static unsigned long kho_radix_get_bitmap_index(unsigned long key)
+{
+ return key % (1 << KHO_BITMAP_SIZE_LOG2);
+}
+
+static unsigned long kho_radix_get_table_index(unsigned long key,
+ unsigned int level)
+{
+ int s;
- return no_free_ptr(elm);
+ s = ((level - 1) * KHO_TABLE_SIZE_LOG2) + KHO_BITMAP_SIZE_LOG2;
+ return (key >> s) % (1 << KHO_TABLE_SIZE_LOG2);
}
-static void __kho_unpreserve_order(struct kho_mem_track *track, unsigned long pfn,
- unsigned int order)
+/**
+ * kho_radix_add_page - Marks a page as preserved in the radix tree.
+ * @tree: The KHO radix tree.
+ * @pfn: The page frame number of the page to preserve.
+ * @order: The order of the page.
+ *
+ * This function traverses the radix tree based on the key derived from @pfn
+ * and @order. It sets the corresponding bit in the leaf bitmap to mark the
+ * page for preservation. If intermediate nodes do not exist along the path,
+ * they are allocated and added to the tree.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+int kho_radix_add_page(struct kho_radix_tree *tree,
+ unsigned long pfn, unsigned int order)
{
- struct kho_mem_phys_bits *bits;
- struct kho_mem_phys *physxa;
- const unsigned long pfn_high = pfn >> order;
+ /* Newly allocated nodes for error cleanup */
+ struct kho_radix_node *intermediate_nodes[KHO_TREE_MAX_DEPTH] = { 0 };
+ unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order);
+ struct kho_radix_node *anchor_node = NULL;
+ struct kho_radix_node *node = tree->root;
+ struct kho_radix_node *new_node;
+ unsigned int i, idx, anchor_idx;
+ struct kho_radix_leaf *leaf;
+ int err = 0;
- physxa = xa_load(&track->orders, order);
- if (WARN_ON_ONCE(!physxa))
- return;
+ if (WARN_ON_ONCE(!tree->root))
+ return -EINVAL;
- bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
- if (WARN_ON_ONCE(!bits))
- return;
+ might_sleep();
+
+ guard(mutex)(&tree->lock);
+
+ /* Go from high levels to low levels */
+ for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) {
+ idx = kho_radix_get_table_index(key, i);
+
+ if (node->table[idx]) {
+ node = phys_to_virt(node->table[idx]);
+ continue;
+ }
+
+ /* Next node is empty, create a new node for it */
+ new_node = (struct kho_radix_node *)get_zeroed_page(GFP_KERNEL);
+ if (!new_node) {
+ err = -ENOMEM;
+ goto err_free_nodes;
+ }
+
+ node->table[idx] = virt_to_phys(new_node);
- clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+ /*
+ * Capture the node where the new branch starts for cleanup
+ * if allocation fails.
+ */
+ if (!anchor_node) {
+ anchor_node = node;
+ anchor_idx = idx;
+ }
+ intermediate_nodes[i] = new_node;
+
+ node = new_node;
+ }
+
+ /* Handle the leaf level bitmap (level 0) */
+ idx = kho_radix_get_bitmap_index(key);
+ leaf = (struct kho_radix_leaf *)node;
+ __set_bit(idx, leaf->bitmap);
+
+ return 0;
+
+err_free_nodes:
+ for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) {
+ if (intermediate_nodes[i])
+ free_page((unsigned long)intermediate_nodes[i]);
+ }
+ if (anchor_node)
+ anchor_node->table[anchor_idx] = 0;
+
+ return err;
}
+EXPORT_SYMBOL_GPL(kho_radix_add_page);
-static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
- unsigned long end_pfn)
+/**
+ * kho_radix_del_page - Removes a page's preservation status from the radix tree.
+ * @tree: The KHO radix tree.
+ * @pfn: The page frame number of the page to unpreserve.
+ * @order: The order of the page.
+ *
+ * This function traverses the radix tree and clears the bit corresponding to
+ * the page, effectively removing its "preserved" status. It does not free
+ * the tree's intermediate nodes, even if they become empty.
+ */
+void kho_radix_del_page(struct kho_radix_tree *tree, unsigned long pfn,
+ unsigned int order)
{
- unsigned int order;
+ unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order);
+ struct kho_radix_node *node = tree->root;
+ struct kho_radix_leaf *leaf;
+ unsigned int i, idx;
- while (pfn < end_pfn) {
- order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+ if (WARN_ON_ONCE(!tree->root))
+ return;
+
+ might_sleep();
- __kho_unpreserve_order(track, pfn, order);
+ guard(mutex)(&tree->lock);
- pfn += 1 << order;
+ /* Go from high levels to low levels */
+ for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) {
+ idx = kho_radix_get_table_index(key, i);
+
+ /*
+ * Attempting to delete a page that has not been preserved,
+ * return with a warning.
+ */
+ if (WARN_ON(!node->table[idx]))
+ return;
+
+ node = phys_to_virt(node->table[idx]);
}
+
+ /* Handle the leaf level bitmap (level 0) */
+ leaf = (struct kho_radix_leaf *)node;
+ idx = kho_radix_get_bitmap_index(key);
+ __clear_bit(idx, leaf->bitmap);
}
+EXPORT_SYMBOL_GPL(kho_radix_del_page);
-static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
- unsigned int order)
+static int kho_radix_walk_leaf(struct kho_radix_leaf *leaf,
+ unsigned long key,
+ kho_radix_tree_walk_callback_t cb)
{
- struct kho_mem_phys_bits *bits;
- struct kho_mem_phys *physxa, *new_physxa;
- const unsigned long pfn_high = pfn >> order;
+ unsigned long *bitmap = (unsigned long *)leaf;
+ unsigned int order;
+ phys_addr_t phys;
+ unsigned int i;
+ int err;
- might_sleep();
- physxa = xa_load(&track->orders, order);
- if (!physxa) {
- int err;
+ for_each_set_bit(i, bitmap, PAGE_SIZE * BITS_PER_BYTE) {
+ phys = kho_radix_decode_key(key | i, &order);
+ err = cb(phys, order);
+ if (err)
+ return err;
+ }
- new_physxa = kzalloc_obj(*physxa);
- if (!new_physxa)
- return -ENOMEM;
+ return 0;
+}
+
+static int __kho_radix_walk_tree(struct kho_radix_node *root,
+ unsigned int level, unsigned long start,
+ kho_radix_tree_walk_callback_t cb)
+{
+ struct kho_radix_node *node;
+ struct kho_radix_leaf *leaf;
+ unsigned long key, i;
+ unsigned int shift;
+ int err;
+
+ for (i = 0; i < PAGE_SIZE / sizeof(phys_addr_t); i++) {
+ if (!root->table[i])
+ continue;
- xa_init(&new_physxa->phys_bits);
- physxa = xa_cmpxchg(&track->orders, order, NULL, new_physxa,
- GFP_KERNEL);
+ shift = ((level - 1) * KHO_TABLE_SIZE_LOG2) +
+ KHO_BITMAP_SIZE_LOG2;
+ key = start | (i << shift);
- err = xa_err(physxa);
- if (err || physxa) {
- xa_destroy(&new_physxa->phys_bits);
- kfree(new_physxa);
+ node = phys_to_virt(root->table[i]);
- if (err)
- return err;
+ if (level == 1) {
+ /*
+ * we are at level 1,
+ * node is pointing to the level 0 bitmap.
+ */
+ leaf = (struct kho_radix_leaf *)node;
+ err = kho_radix_walk_leaf(leaf, key, cb);
} else {
- physxa = new_physxa;
+ err = __kho_radix_walk_tree(node, level - 1,
+ key, cb);
}
+
+ if (err)
+ return err;
}
- bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
- if (IS_ERR(bits))
- return PTR_ERR(bits);
+ return 0;
+}
+
+/**
+ * kho_radix_walk_tree - Traverses the radix tree and calls a callback for each preserved page.
+ * @tree: A pointer to the KHO radix tree to walk.
+ * @cb: A callback function of type kho_radix_tree_walk_callback_t that will be
+ * invoked for each preserved page found in the tree. The callback receives
+ * the physical address and order of the preserved page.
+ *
+ * This function walks the radix tree, searching from the specified top level
+ * down to the lowest level (level 0). For each preserved page found, it invokes
+ * the provided callback, passing the page's physical address and order.
+ *
+ * Return: 0 if the walk completed the specified tree, or the non-zero return
+ * value from the callback that stopped the walk.
+ */
+int kho_radix_walk_tree(struct kho_radix_tree *tree,
+ kho_radix_tree_walk_callback_t cb)
+{
+ if (WARN_ON_ONCE(!tree->root))
+ return -EINVAL;
- set_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+ guard(mutex)(&tree->lock);
- return 0;
+ return __kho_radix_walk_tree(tree->root, KHO_TREE_MAX_DEPTH - 1, 0, cb);
+}
+EXPORT_SYMBOL_GPL(kho_radix_walk_tree);
+
+static void __kho_unpreserve(struct kho_radix_tree *tree,
+ unsigned long pfn, unsigned long end_pfn)
+{
+ unsigned int order;
+
+ while (pfn < end_pfn) {
+ order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+
+ kho_radix_del_page(tree, pfn, order);
+
+ pfn += 1 << order;
+ }
}
/* For physically contiguous 0-order pages. */
}
EXPORT_SYMBOL_GPL(kho_restore_pages);
-/* Serialize and deserialize struct kho_mem_phys across kexec
- *
- * Record all the bitmaps in a linked list of pages for the next kernel to
- * process. Each chunk holds bitmaps of the same order and each block of bitmaps
- * starts at a given physical address. This allows the bitmaps to be sparse. The
- * xarray is used to store them in a tree while building up the data structure,
- * but the KHO successor kernel only needs to process them once in order.
- *
- * All of this memory is normal kmalloc() memory and is not marked for
- * preservation. The successor kernel will remain isolated to the scratch space
- * until it completes processing this list. Once processed all the memory
- * storing these ranges will be marked as free.
- */
-
-struct khoser_mem_bitmap_ptr {
- phys_addr_t phys_start;
- DECLARE_KHOSER_PTR(bitmap, struct kho_mem_phys_bits *);
-};
-
-struct khoser_mem_chunk_hdr {
- DECLARE_KHOSER_PTR(next, struct khoser_mem_chunk *);
- unsigned int order;
- unsigned int num_elms;
-};
-
-#define KHOSER_BITMAP_SIZE \
- ((PAGE_SIZE - sizeof(struct khoser_mem_chunk_hdr)) / \
- sizeof(struct khoser_mem_bitmap_ptr))
-
-struct khoser_mem_chunk {
- struct khoser_mem_chunk_hdr hdr;
- struct khoser_mem_bitmap_ptr bitmaps[KHOSER_BITMAP_SIZE];
-};
-
-static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE);
-
-static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
- unsigned long order)
-{
- struct khoser_mem_chunk *chunk __free(free_page) = NULL;
-
- chunk = (void *)get_zeroed_page(GFP_KERNEL);
- if (!chunk)
- return ERR_PTR(-ENOMEM);
-
- if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE)))
- return ERR_PTR(-EINVAL);
-
- chunk->hdr.order = order;
- if (cur_chunk)
- KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
- return no_free_ptr(chunk);
-}
-
-static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
-{
- struct khoser_mem_chunk *chunk = first_chunk;
-
- while (chunk) {
- struct khoser_mem_chunk *tmp = chunk;
-
- chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
- free_page((unsigned long)tmp);
- }
-}
-
-/*
- * Update memory map property, if old one is found discard it via
- * kho_mem_ser_free().
- */
-static void kho_update_memory_map(struct khoser_mem_chunk *first_chunk)
-{
- void *ptr;
- u64 phys;
-
- ptr = fdt_getprop_w(kho_out.fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, NULL);
-
- /* Check and discard previous memory map */
- phys = get_unaligned((u64 *)ptr);
- if (phys)
- kho_mem_ser_free((struct khoser_mem_chunk *)phys_to_virt(phys));
-
- /* Update with the new value */
- phys = first_chunk ? (u64)virt_to_phys(first_chunk) : 0;
- put_unaligned(phys, (u64 *)ptr);
-}
-
-static int kho_mem_serialize(struct kho_out *kho_out)
+static int __init kho_preserved_memory_reserve(phys_addr_t phys,
+ unsigned int order)
{
- struct khoser_mem_chunk *first_chunk = NULL;
- struct khoser_mem_chunk *chunk = NULL;
- struct kho_mem_phys *physxa;
- unsigned long order;
- int err = -ENOMEM;
-
- xa_for_each(&kho_out->track.orders, order, physxa) {
- struct kho_mem_phys_bits *bits;
- unsigned long phys;
-
- chunk = new_chunk(chunk, order);
- if (IS_ERR(chunk)) {
- err = PTR_ERR(chunk);
- goto err_free;
- }
-
- if (!first_chunk)
- first_chunk = chunk;
-
- xa_for_each(&physxa->phys_bits, phys, bits) {
- struct khoser_mem_bitmap_ptr *elm;
-
- if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
- chunk = new_chunk(chunk, order);
- if (IS_ERR(chunk)) {
- err = PTR_ERR(chunk);
- goto err_free;
- }
- }
+ union kho_page_info info;
+ struct page *page;
+ u64 sz;
- elm = &chunk->bitmaps[chunk->hdr.num_elms];
- chunk->hdr.num_elms++;
- elm->phys_start = (phys * PRESERVE_BITS)
- << (order + PAGE_SHIFT);
- KHOSER_STORE_PTR(elm->bitmap, bits);
- }
- }
+ sz = 1 << (order + PAGE_SHIFT);
+ page = phys_to_page(phys);
- kho_update_memory_map(first_chunk);
+ /* Reserve the memory preserved in KHO in memblock */
+ memblock_reserve(phys, sz);
+ memblock_reserved_mark_noinit(phys, sz);
+ info.magic = KHO_PAGE_MAGIC;
+ info.order = order;
+ page->private = info.page_private;
return 0;
-
-err_free:
- kho_mem_ser_free(first_chunk);
- return err;
-}
-
-static void __init deserialize_bitmap(unsigned int order,
- struct khoser_mem_bitmap_ptr *elm)
-{
- struct kho_mem_phys_bits *bitmap = KHOSER_LOAD_PTR(elm->bitmap);
- unsigned long bit;
-
- for_each_set_bit(bit, bitmap->preserve, PRESERVE_BITS) {
- int sz = 1 << (order + PAGE_SHIFT);
- phys_addr_t phys =
- elm->phys_start + (bit << (order + PAGE_SHIFT));
- struct page *page = phys_to_page(phys);
- union kho_page_info info;
-
- memblock_reserve(phys, sz);
- memblock_reserved_mark_noinit(phys, sz);
- info.magic = KHO_PAGE_MAGIC;
- info.order = order;
- page->private = info.page_private;
- }
}
/* Returns physical address of the preserved memory map from FDT */
mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
if (!mem_ptr || len != sizeof(u64)) {
- pr_err("failed to get preserved memory bitmaps\n");
+ pr_err("failed to get preserved memory map\n");
return 0;
}
return get_unaligned((const u64 *)mem_ptr);
}
-static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
-{
- while (chunk) {
- unsigned int i;
-
- for (i = 0; i != chunk->hdr.num_elms; i++)
- deserialize_bitmap(chunk->hdr.order,
- &chunk->bitmaps[i]);
- chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
- }
-}
-
/*
* With KHO enabled, memory can become fragmented because KHO regions may
* be anywhere in physical address space. The scratch regions give us a
*/
int kho_preserve_folio(struct folio *folio)
{
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
const unsigned long pfn = folio_pfn(folio);
const unsigned int order = folio_order(folio);
- struct kho_mem_track *track = &kho_out.track;
if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
return -EINVAL;
- return __kho_preserve_order(track, pfn, order);
+ return kho_radix_add_page(tree, pfn, order);
}
EXPORT_SYMBOL_GPL(kho_preserve_folio);
*/
void kho_unpreserve_folio(struct folio *folio)
{
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
const unsigned long pfn = folio_pfn(folio);
const unsigned int order = folio_order(folio);
- struct kho_mem_track *track = &kho_out.track;
- __kho_unpreserve_order(track, pfn, order);
+ kho_radix_del_page(tree, pfn, order);
}
EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
*/
int kho_preserve_pages(struct page *page, unsigned long nr_pages)
{
- struct kho_mem_track *track = &kho_out.track;
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
const unsigned long start_pfn = page_to_pfn(page);
const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn = start_pfn;
const unsigned int order =
min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
- err = __kho_preserve_order(track, pfn, order);
+ err = kho_radix_add_page(tree, pfn, order);
if (err) {
failed_pfn = pfn;
break;
}
if (err)
- __kho_unpreserve(track, start_pfn, failed_pfn);
+ __kho_unpreserve(tree, start_pfn, failed_pfn);
return err;
}
*/
void kho_unpreserve_pages(struct page *page, unsigned long nr_pages)
{
- struct kho_mem_track *track = &kho_out.track;
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
const unsigned long start_pfn = page_to_pfn(page);
const unsigned long end_pfn = start_pfn + nr_pages;
- __kho_unpreserve(track, start_pfn, end_pfn);
+ __kho_unpreserve(tree, start_pfn, end_pfn);
}
EXPORT_SYMBOL_GPL(kho_unpreserve_pages);
static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk,
unsigned short order)
{
- struct kho_mem_track *track = &kho_out.track;
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
- __kho_unpreserve(track, pfn, pfn + 1);
+ __kho_unpreserve(tree, pfn, pfn + 1);
for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) {
pfn = PHYS_PFN(chunk->phys[i]);
- __kho_unpreserve(track, pfn, pfn + (1 << order));
+ __kho_unpreserve(tree, pfn, pfn + (1 << order));
}
}
int kho_finalize(void)
{
- int ret;
-
if (!kho_enable)
return -EOPNOTSUPP;
guard(mutex)(&kho_out.lock);
- ret = kho_mem_serialize(&kho_out);
- if (ret)
- return ret;
-
kho_out.finalized = true;
return 0;
struct kho_in {
phys_addr_t fdt_phys;
phys_addr_t scratch_phys;
- phys_addr_t mem_map_phys;
struct kho_debugfs dbg;
};
}
EXPORT_SYMBOL_GPL(kho_retrieve_subtree);
+static int __init kho_mem_retrieve(const void *fdt)
+{
+ struct kho_radix_tree tree;
+ const phys_addr_t *mem;
+ int len;
+
+ /* Retrieve the KHO radix tree from passed-in FDT. */
+ mem = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
+
+ if (!mem || len != sizeof(*mem)) {
+ pr_err("failed to get preserved KHO memory tree\n");
+ return -ENOENT;
+ }
+
+ if (!*mem)
+ return -EINVAL;
+
+ tree.root = phys_to_virt(*mem);
+ mutex_init(&tree.lock);
+ return kho_radix_walk_tree(&tree, kho_preserved_memory_reserve);
+}
+
static __init int kho_out_fdt_setup(void)
{
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
void *root = kho_out.fdt;
- u64 empty_mem_map = 0;
+ u64 preserved_mem_tree_pa;
int err;
err = fdt_create(root, PAGE_SIZE);
err |= fdt_finish_reservemap(root);
err |= fdt_begin_node(root, "");
err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE);
- err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME, &empty_mem_map,
- sizeof(empty_mem_map));
+
+ preserved_mem_tree_pa = virt_to_phys(tree->root);
+
+ err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME,
+ &preserved_mem_tree_pa,
+ sizeof(preserved_mem_tree_pa));
+
err |= fdt_end_node(root);
err |= fdt_finish(root);
static __init int kho_init(void)
{
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
const void *fdt = kho_get_fdt();
int err = 0;
if (!kho_enable)
return 0;
+ tree->root = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!tree->root) {
+ err = -ENOMEM;
+ goto err_free_scratch;
+ }
+
kho_out.fdt = kho_alloc_preserve(PAGE_SIZE);
if (IS_ERR(kho_out.fdt)) {
err = PTR_ERR(kho_out.fdt);
- goto err_free_scratch;
+ goto err_free_kho_radix_tree_root;
}
err = kho_debugfs_init();
err_free_fdt:
kho_unpreserve_free(kho_out.fdt);
+err_free_kho_radix_tree_root:
+ kfree(tree->root);
+ tree->root = NULL;
err_free_scratch:
kho_out.fdt = NULL;
for (int i = 0; i < kho_scratch_cnt; i++) {
void __init kho_memory_init(void)
{
- if (kho_in.mem_map_phys) {
+ if (kho_in.scratch_phys) {
kho_scratch = phys_to_virt(kho_in.scratch_phys);
kho_release_scratch();
- kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys));
+
+ if (kho_mem_retrieve(kho_get_fdt()))
+ kho_in.fdt_phys = 0;
} else {
kho_reserve_scratch();
}
kho_in.fdt_phys = fdt_phys;
kho_in.scratch_phys = scratch_phys;
- kho_in.mem_map_phys = mem_map_phys;
kho_scratch_cnt = scratch_cnt;
populated = true;