From: Matthew Brost Date: Fri, 31 Oct 2025 16:54:10 +0000 (-0700) Subject: drm/xe: Stub out new pagefault layer X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=620a09fb0bddf387f418663478b48ca4ba62b6d6;p=thirdparty%2Fkernel%2Flinux.git drm/xe: Stub out new pagefault layer Stub out the new page fault layer and add kernel documentation. This is intended as a replacement for the GT page fault layer, enabling multiple producers to hook into a shared page fault consumer interface. v2: - Fix kernel doc typo (checkpatch) - Remove comment around GT (Stuart) - Add explaination around reclaim (Francois) - Add comment around u8 vs enum (Francois) - Include engine instance (Stuart) v3: - Fix XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION kernel doc (Stuart) Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Tested-by: Francois Dugast Link: https://patch.msgid.link/20251031165416.2871503-2-matthew.brost@intel.com --- diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 4c7f7fd58bcc8..cdf9b08b809e4 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -94,6 +94,7 @@ xe-y += xe_bb.o \ xe_nvm.o \ xe_oa.o \ xe_observation.o \ + xe_pagefault.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c new file mode 100644 index 0000000000000..d509a80cb1f31 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_pagefault.h" +#include "xe_pagefault_types.h" + +/** + * DOC: Xe page faults + * + * Xe page faults are handled in two layers. The producer layer interacts with + * hardware or firmware to receive and parse faults into struct xe_pagefault, + * then forwards them to the consumer. The consumer layer services the faults + * (e.g., memory migration, page table updates) and acknowledges the result back + * to the producer, which then forwards the results to the hardware or firmware. + * The consumer uses a page fault queue sized to absorb all potential faults and + * a multi-threaded worker to process them. Multiple producers are supported, + * with a single shared consumer. + * + * xe_pagefault.c implements the consumer layer. + */ + +/** + * xe_pagefault_init() - Page fault init + * @xe: xe device instance + * + * Initialize Xe page fault state. Must be done after reading fuses. + * + * Return: 0 on Success, errno on failure + */ +int xe_pagefault_init(struct xe_device *xe) +{ + /* TODO - implement */ + return 0; +} + +/** + * xe_pagefault_reset() - Page fault reset for a GT + * @xe: xe device instance + * @gt: GT being reset + * + * Reset the Xe page fault state for a GT; that is, squash any pending faults on + * the GT. + */ +void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt) +{ + /* TODO - implement */ +} + +/** + * xe_pagefault_handler() - Page fault handler + * @xe: xe device instance + * @pf: Page fault + * + * Sink the page fault to a queue (i.e., a memory buffer) and queue a worker to + * service it. Safe to be called from IRQ or process context. Reclaim safe. + * + * Return: 0 on success, errno on failure + */ +int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf) +{ + /* TODO - implement */ + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_pagefault.h b/drivers/gpu/drm/xe/xe_pagefault.h new file mode 100644 index 0000000000000..bd0cdf9ed37f1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PAGEFAULT_H_ +#define _XE_PAGEFAULT_H_ + +struct xe_device; +struct xe_gt; +struct xe_pagefault; + +int xe_pagefault_init(struct xe_device *xe); + +void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt); + +int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h new file mode 100644 index 0000000000000..d3b516407d600 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PAGEFAULT_TYPES_H_ +#define _XE_PAGEFAULT_TYPES_H_ + +#include + +struct xe_gt; +struct xe_pagefault; + +/** enum xe_pagefault_access_type - Xe page fault access type */ +enum xe_pagefault_access_type { + /** @XE_PAGEFAULT_ACCESS_TYPE_READ: Read access type */ + XE_PAGEFAULT_ACCESS_TYPE_READ = 0, + /** @XE_PAGEFAULT_ACCESS_TYPE_WRITE: Write access type */ + XE_PAGEFAULT_ACCESS_TYPE_WRITE = 1, + /** @XE_PAGEFAULT_ACCESS_TYPE_ATOMIC: Atomic access type */ + XE_PAGEFAULT_ACCESS_TYPE_ATOMIC = 2, +}; + +/** enum xe_pagefault_type - Xe page fault type */ +enum xe_pagefault_type { + /** @XE_PAGEFAULT_TYPE_NOT_PRESENT: Not present */ + XE_PAGEFAULT_TYPE_NOT_PRESENT = 0, + /** @XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION: Write access violation */ + XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION = 1, + /** @XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION: Atomic access violation */ + XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION = 2, +}; + +/** struct xe_pagefault_ops - Xe pagefault ops (producer) */ +struct xe_pagefault_ops { + /** + * @ack_fault: Ack fault + * @pf: Page fault + * @err: Error state of fault + * + * Page fault producer receives acknowledgment from the consumer and + * sends the result to the HW/FW interface. + */ + void (*ack_fault)(struct xe_pagefault *pf, int err); +}; + +/** + * struct xe_pagefault - Xe page fault + * + * Generic page fault structure for communication between producer and consumer. + * Carefully sized to be 64 bytes. Upon a device page fault, the producer + * populates this structure, and the consumer copies it into the page-fault + * queue for deferred handling. + */ +struct xe_pagefault { + /** + * @gt: GT of fault + */ + struct xe_gt *gt; + /** + * @consumer: State for the software handling the fault. Populated by + * the producer and may be modified by the consumer to communicate + * information back to the producer upon fault acknowledgment. + */ + struct { + /** @consumer.page_addr: address of page fault */ + u64 page_addr; + /** @consumer.asid: address space ID */ + u32 asid; + /** + * @consumer.access_type: access type, u8 rather than enum to + * keep size compact + */ + u8 access_type; + /** + * @consumer.fault_type: fault type, u8 rather than enum to + * keep size compact + */ + u8 fault_type; +#define XE_PAGEFAULT_LEVEL_NACK 0xff /* Producer indicates nack fault */ + /** @consumer.fault_level: fault level */ + u8 fault_level; + /** @consumer.engine_class: engine class */ + u8 engine_class; + /** @consumer.engine_instance: engine instance */ + u8 engine_instance; + /** consumer.reserved: reserved bits for future expansion */ + u8 reserved[7]; + } consumer; + /** + * @producer: State for the producer (i.e., HW/FW interface). Populated + * by the producer and should not be modified—or even inspected—by the + * consumer, except for calling operations. + */ + struct { + /** @producer.private: private pointer */ + void *private; + /** @producer.ops: operations */ + const struct xe_pagefault_ops *ops; +#define XE_PAGEFAULT_PRODUCER_MSG_LEN_DW 4 + /** + * @producer.msg: page fault message, used by producer in fault + * acknowledgment to formulate response to HW/FW interface. + * Included in the page-fault message because the producer + * typically receives the fault in a context where memory cannot + * be allocated (e.g., atomic context or the reclaim path). + */ + u32 msg[XE_PAGEFAULT_PRODUCER_MSG_LEN_DW]; + } producer; +}; + +/** + * struct xe_pagefault_queue: Xe pagefault queue (consumer) + * + * Used to capture all device page faults for deferred processing. Size this + * queue to absorb the device’s worst-case number of outstanding faults. + */ +struct xe_pagefault_queue { + /** + * @data: Data in queue containing struct xe_pagefault, protected by + * @lock + */ + void *data; + /** @size: Size of queue in bytes */ + u32 size; + /** @head: Head pointer in bytes, moved by producer, protected by @lock */ + u32 head; + /** @tail: Tail pointer in bytes, moved by consumer, protected by @lock */ + u32 tail; + /** @lock: protects page fault queue */ + spinlock_t lock; + /** @worker: to process page faults */ + struct work_struct worker; +}; + +#endif