--- /dev/null
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "xe_page_reclaim.h"
+
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_macros.h"
+
+/**
+ * xe_page_reclaim_list_invalidate() - Mark a PRL as invalid
+ * @prl: Page reclaim list to reset
+ *
+ * Clears the entries pointer and marks the list as invalid so
+ * future use knows PRL is unusable. It is expected that the entries
+ * have already been released.
+ */
+void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl)
+{
+ xe_page_reclaim_entries_put(prl->entries);
+ prl->entries = NULL;
+ prl->num_entries = XE_PAGE_RECLAIM_INVALID_LIST;
+}
+
+/**
+ * xe_page_reclaim_list_init() - Initialize a page reclaim list
+ * @prl: Page reclaim list to initialize
+ *
+ * NULLs both values in list to prepare on initalization.
+ */
+void xe_page_reclaim_list_init(struct xe_page_reclaim_list *prl)
+{
+ // xe_page_reclaim_list_invalidate(prl);
+ prl->entries = NULL;
+ prl->num_entries = 0;
+}
+
+/**
+ * xe_page_reclaim_list_alloc_entries() - Allocate page reclaim list entries
+ * @prl: Page reclaim list to allocate entries for
+ *
+ * Allocate one 4K page for the PRL entries, otherwise assign prl->entries to NULL.
+ */
+int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl)
+{
+ struct page *page;
+
+ if (XE_WARN_ON(prl->entries))
+ return 0;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (page) {
+ prl->entries = page_address(page);
+ prl->num_entries = 0;
+ }
+
+ return page ? 0 : -ENOMEM;
+}
--- /dev/null
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PAGE_RECLAIM_H_
+#define _XE_PAGE_RECLAIM_H_
+
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/bits.h>
+
+#define XE_PAGE_RECLAIM_MAX_ENTRIES 512
+#define XE_PAGE_RECLAIM_LIST_MAX_SIZE SZ_4K
+
+struct xe_guc_page_reclaim_entry {
+ u64 qw;
+/* valid reclaim entry bit */
+#define XE_PAGE_RECLAIM_VALID BIT_ULL(0)
+/*
+ * offset order of page size to be reclaimed
+ * page_size = 1 << (XE_PTE_SHIFT + reclamation_size)
+ */
+#define XE_PAGE_RECLAIM_SIZE GENMASK_ULL(6, 1)
+#define XE_PAGE_RECLAIM_RSVD_0 GENMASK_ULL(11, 7)
+/* lower 20 bits of the physical address */
+#define XE_PAGE_RECLAIM_ADDR_LO GENMASK_ULL(31, 12)
+/* upper 20 bits of the physical address */
+#define XE_PAGE_RECLAIM_ADDR_HI GENMASK_ULL(51, 32)
+#define XE_PAGE_RECLAIM_RSVD_1 GENMASK_ULL(63, 52)
+} __packed;
+
+struct xe_page_reclaim_list {
+ /** @entries: array of page reclaim entries, page allocated */
+ struct xe_guc_page_reclaim_entry *entries;
+ /** @num_entries: number of entries */
+ int num_entries;
+#define XE_PAGE_RECLAIM_INVALID_LIST -1
+};
+
+/**
+ * xe_page_reclaim_list_is_new() - Check if PRL is new allocation
+ * @prl: Pointer to page reclaim list
+ *
+ * PRL indicates it hasn't been allocated through both values being NULL
+ */
+static inline bool xe_page_reclaim_list_is_new(struct xe_page_reclaim_list *prl)
+{
+ return !prl->entries && prl->num_entries == 0;
+}
+
+/**
+ * xe_page_reclaim_list_valid() - Check if the page reclaim list is valid
+ * @prl: Pointer to page reclaim list
+ *
+ * PRL uses the XE_PAGE_RECLAIM_INVALID_LIST to indicate that a PRL
+ * is unusable.
+ */
+static inline bool xe_page_reclaim_list_valid(struct xe_page_reclaim_list *prl)
+{
+ return !xe_page_reclaim_list_is_new(prl) &&
+ prl->num_entries != XE_PAGE_RECLAIM_INVALID_LIST;
+}
+
+void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl);
+void xe_page_reclaim_list_init(struct xe_page_reclaim_list *prl);
+int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl);
+/**
+ * xe_page_reclaim_entries_get() - Increment the reference count of page reclaim entries.
+ * @entries: Pointer to the array of page reclaim entries.
+ *
+ * This function increments the reference count of the backing page.
+ */
+static inline void xe_page_reclaim_entries_get(struct xe_guc_page_reclaim_entry *entries)
+{
+ if (entries)
+ get_page(virt_to_page(entries));
+}
+
+/**
+ * xe_page_reclaim_entries_put() - Decrement the reference count of page reclaim entries.
+ * @entries: Pointer to the array of page reclaim entries.
+ *
+ * This function decrements the reference count of the backing page
+ * and frees it if the count reaches zero.
+ */
+static inline void xe_page_reclaim_entries_put(struct xe_guc_page_reclaim_entry *entries)
+{
+ if (entries)
+ put_page(virt_to_page(entries));
+}
+
+#endif /* _XE_PAGE_RECLAIM_H_ */
#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_migrate.h"
+#include "xe_page_reclaim.h"
#include "xe_pt_types.h"
#include "xe_pt_walk.h"
#include "xe_res_cursor.h"
/** @modified_end: Walk range start, modified like @modified_start. */
u64 modified_end;
+ /** @prl: Backing pointer to page reclaim list in pt_update_ops */
+ struct xe_page_reclaim_list *prl;
+
/* Output */
/* @wupd: Structure to track the page-table updates we're building */
struct xe_walk_update wupd;
return false;
}
+/* Huge 2MB leaf lives directly in a level-1 table and has no children */
+static bool is_2m_pte(struct xe_pt *pte)
+{
+ return pte->level == 1 && !pte->base.children;
+}
+
+/* page_size = 2^(reclamation_size + XE_PTE_SHIFT) */
+#define COMPUTE_RECLAIM_ADDRESS_MASK(page_size) \
+({ \
+ BUILD_BUG_ON(!__builtin_constant_p(page_size)); \
+ ilog2(page_size) - XE_PTE_SHIFT; \
+})
+
+static int generate_reclaim_entry(struct xe_tile *tile,
+ struct xe_page_reclaim_list *prl,
+ u64 pte, struct xe_pt *xe_child)
+{
+ struct xe_guc_page_reclaim_entry *reclaim_entries = prl->entries;
+ u64 phys_page = (pte & XE_PTE_ADDR_MASK) >> XE_PTE_SHIFT;
+ int num_entries = prl->num_entries;
+ u32 reclamation_size;
+
+ xe_tile_assert(tile, xe_child->level <= MAX_HUGEPTE_LEVEL);
+ xe_tile_assert(tile, reclaim_entries);
+ xe_tile_assert(tile, num_entries < XE_PAGE_RECLAIM_MAX_ENTRIES - 1);
+
+ if (!xe_page_reclaim_list_valid(prl))
+ return -EINVAL;
+
+ /**
+ * reclamation_size indicates the size of the page to be
+ * invalidated and flushed from non-coherent cache.
+ * Page size is computed as 2^(reclamation_size + XE_PTE_SHIFT) bytes.
+ * Only 4K, 64K (level 0), and 2M pages are supported by hardware for page reclaim
+ */
+ if (xe_child->level == 0 && !(pte & XE_PTE_PS64)) {
+ reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_4K); /* reclamation_size = 0 */
+ } else if (xe_child->level == 0) {
+ reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_64K); /* reclamation_size = 4 */
+ } else if (is_2m_pte(xe_child)) {
+ reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_2M); /* reclamation_size = 9 */
+ } else {
+ xe_page_reclaim_list_invalidate(prl);
+ vm_dbg(&tile_to_xe(tile)->drm,
+ "PRL invalidate: unsupported PTE level=%u pte=%#llx\n",
+ xe_child->level, pte);
+ return -EINVAL;
+ }
+
+ reclaim_entries[num_entries].qw =
+ FIELD_PREP(XE_PAGE_RECLAIM_VALID, 1) |
+ FIELD_PREP(XE_PAGE_RECLAIM_SIZE, reclamation_size) |
+ FIELD_PREP(XE_PAGE_RECLAIM_ADDR_LO, phys_page) |
+ FIELD_PREP(XE_PAGE_RECLAIM_ADDR_HI, phys_page >> 20);
+ prl->num_entries++;
+ vm_dbg(&tile_to_xe(tile)->drm,
+ "PRL add entry: level=%u pte=%#llx reclamation_size=%u prl_idx=%d\n",
+ xe_child->level, pte, reclamation_size, num_entries);
+
+ return 0;
+}
+
static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
unsigned int level, u64 addr, u64 next,
struct xe_ptw **child,
struct xe_pt_walk *walk)
{
struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+ struct xe_pt_stage_unbind_walk *xe_walk =
+ container_of(walk, typeof(*xe_walk), base);
+ struct xe_device *xe = tile_to_xe(xe_walk->tile);
XE_WARN_ON(!*child);
XE_WARN_ON(!level);
+ /* Check for leaf node */
+ if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) &&
+ !xe_child->base.children) {
+ struct iosys_map *leaf_map = &xe_child->bo->vmap;
+ pgoff_t first = xe_pt_offset(addr, 0, walk);
+ pgoff_t count = xe_pt_num_entries(addr, next, 0, walk);
+
+ for (pgoff_t i = 0; i < count; i++) {
+ u64 pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64);
+ int ret;
+
+ /* Account for NULL terminated entry on end (-1) */
+ if (xe_walk->prl->num_entries < XE_PAGE_RECLAIM_MAX_ENTRIES - 1) {
+ ret = generate_reclaim_entry(xe_walk->tile, xe_walk->prl,
+ pte, xe_child);
+ if (ret)
+ break;
+ } else {
+ /* overflow, mark as invalid */
+ xe_page_reclaim_list_invalidate(xe_walk->prl);
+ vm_dbg(&xe->drm,
+ "PRL invalidate: overflow while adding pte=%#llx",
+ pte);
+ break;
+ }
+ }
+ }
- xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
+ /* If aborting page walk early, invalidate PRL since PTE may be dropped from this abort */
+ if (xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk) &&
+ xe_walk->prl && level > 1 && xe_child->base.children && xe_child->num_live != 0) {
+ xe_page_reclaim_list_invalidate(xe_walk->prl);
+ vm_dbg(&xe->drm,
+ "PRL invalidate: kill at level=%u addr=%#llx next=%#llx num_live=%u\n",
+ level, addr, next, xe_child->num_live);
+ }
return 0;
}
{
u64 start = range ? xe_svm_range_start(range) : xe_vma_start(vma);
u64 end = range ? xe_svm_range_end(range) : xe_vma_end(vma);
+ struct xe_vm_pgtable_update_op *pt_update_op =
+ container_of(entries, struct xe_vm_pgtable_update_op, entries[0]);
struct xe_pt_stage_unbind_walk xe_walk = {
.base = {
.ops = &xe_pt_stage_unbind_ops,
.modified_start = start,
.modified_end = end,
.wupd.entries = entries,
+ .prl = pt_update_op->prl,
};
struct xe_pt *pt = vm->pt_root[tile->id];
struct xe_vm_pgtable_update_ops *pt_update_ops,
struct xe_vma *vma)
{
+ struct xe_device *xe = tile_to_xe(tile);
u32 current_op = pt_update_ops->current_op;
struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
int err;
pt_op->vma = vma;
pt_op->bind = false;
pt_op->rebind = false;
+ /*
+ * Maintain one PRL located in pt_update_ops that all others in unbind op reference.
+ * Ensure that PRL is allocated only once, and if invalidated, remains an invalidated PRL.
+ */
+ if (xe->info.has_page_reclaim_hw_assist &&
+ xe_page_reclaim_list_is_new(&pt_update_ops->prl))
+ xe_page_reclaim_list_alloc_entries(&pt_update_ops->prl);
+
+ /* Page reclaim may not be needed due to other features, so skip the corresponding VMA */
+ pt_op->prl = (xe_page_reclaim_list_valid(&pt_update_ops->prl)) ? &pt_update_ops->prl : NULL;
err = vma_reserve_fences(tile_to_xe(tile), vma);
if (err)
pt_op->vma = XE_INVALID_VMA;
pt_op->bind = false;
pt_op->rebind = false;
+ pt_op->prl = NULL;
pt_op->num_entries = xe_pt_stage_unbind(tile, vm, NULL, range,
pt_op->entries);
init_llist_head(&pt_update_ops->deferred);
pt_update_ops->start = ~0x0ull;
pt_update_ops->last = 0x0ull;
+ xe_page_reclaim_list_init(&pt_update_ops->prl);
}
/**
&vops->pt_update_ops[tile->id];
int i;
+ xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
+
lockdep_assert_held(&vops->vm->lock);
xe_vm_assert_held(vops->vm);