From: Matthew Brost Date: Wed, 26 Nov 2025 18:59:52 +0000 (-0800) Subject: drm/xe: Implement DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1026c1a73a9686ff35ac100039f94f0725622447;p=thirdparty%2Flinux.git drm/xe: Implement DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE Implement DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE which sets the exec queue default state to user data passed in. The intent is for a Mesa tool to use this replay GPU hangs. v2: - Enable the flag DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE - Fix the page size math calculation to avoid a crash v4: - Use vmemdup_user (Maarten) - Copy default state first into LRC, then replay state (Testing, Carlos) Cc: José Roberto de Souza Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Reviewed-by: Jonathan Cavitt Link: https://patch.msgid.link/20251126185952.546277-10-matthew.brost@intel.com --- diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 8724f8de67e2..226d07a3d852 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -79,6 +79,7 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) if (q->xef) xe_file_put(q->xef); + kvfree(q->replay_state); kfree(q); } @@ -225,8 +226,8 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) struct xe_lrc *lrc; xe_gt_sriov_vf_wait_valid_ggtt(q->gt); - lrc = xe_lrc_create(q->hwe, q->vm, xe_lrc_ring_size(), - q->msix_vec, flags); + lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, + xe_lrc_ring_size(), q->msix_vec, flags); if (IS_ERR(lrc)) { err = PTR_ERR(lrc); goto err_lrc; @@ -567,6 +568,23 @@ exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); } +static int exec_queue_set_hang_replay_state(struct xe_device *xe, + struct xe_exec_queue *q, + u64 value) +{ + size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class); + u64 __user *address = u64_to_user_ptr(value); + void *ptr; + + ptr = vmemdup_user(address, size); + if (XE_IOCTL_DBG(xe, IS_ERR(ptr))) + return PTR_ERR(ptr); + + q->replay_state = ptr; + + return 0; +} + typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value); @@ -575,6 +593,7 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, + [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -595,7 +614,8 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, XE_IOCTL_DBG(xe, ext.pad) || XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE)) + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && + ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 771ffe35cd0c..3ba10632dcd6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -167,6 +167,9 @@ struct xe_exec_queue { /** @ufence_timeline_value: User fence timeline value */ u64 ufence_timeline_value; + /** @replay_state: GPU hang replay state */ + void *replay_state; + /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 769d05517f93..46c17a18a3f4 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -269,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, port->hwe = hwe; - port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0); + port->lrc = xe_lrc_create(hwe, NULL, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0); if (IS_ERR(port->lrc)) { err = PTR_ERR(port->lrc); goto err; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 2deca095607c..a05060f75e7e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -91,13 +91,19 @@ gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) return false; } -size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) +/** + * xe_gt_lrc_hang_replay_size() - Hang replay size + * @gt: The GT + * @class: Hardware engine class + * + * Determine size of GPU hang replay state for a GT and hardware engine class. + * + * Return: Size of GPU hang replay size + */ +size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class) { struct xe_device *xe = gt_to_xe(gt); - size_t size; - - /* Per-process HW status page (PPHWSP) */ - size = LRC_PPHWSP_SIZE; + size_t size = 0; /* Engine context image */ switch (class) { @@ -123,11 +129,18 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) size += 1 * SZ_4K; } + return size; +} + +size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) +{ + size_t size = xe_gt_lrc_hang_replay_size(gt, class); + /* Add indirect ring state page */ if (xe_gt_has_indirect_ring_state(gt)) size += LRC_INDIRECT_RING_STATE_SIZE; - return size; + return size + LRC_PPHWSP_SIZE; } /* @@ -1387,7 +1400,8 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) } static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, u32 ring_size, u16 msix_vec, + struct xe_vm *vm, void *replay_state, u32 ring_size, + u16 msix_vec, u32 init_flags) { struct xe_gt *gt = hwe->gt; @@ -1402,9 +1416,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, kref_init(&lrc->refcount); lrc->gt = gt; - lrc->replay_size = xe_gt_lrc_size(gt, hwe->class); - if (xe_gt_has_indirect_ring_state(gt)) - lrc->replay_size -= LRC_INDIRECT_RING_STATE_SIZE; + lrc->replay_size = xe_gt_lrc_hang_replay_size(gt, hwe->class); lrc->size = lrc_size; lrc->flags = 0; lrc->ring.size = ring_size; @@ -1441,11 +1453,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * scratch. */ map = __xe_lrc_pphwsp_map(lrc); - if (gt->default_lrc[hwe->class]) { + if (gt->default_lrc[hwe->class] || replay_state) { xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, lrc_size - LRC_PPHWSP_SIZE); + if (replay_state) + xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, + replay_state, lrc->replay_size); } else { void *init_data = empty_lrc_data(hwe); @@ -1553,6 +1568,7 @@ err_lrc_finish: * xe_lrc_create - Create a LRC * @hwe: Hardware Engine * @vm: The VM (address space) + * @replay_state: GPU hang replay state * @ring_size: LRC ring size * @msix_vec: MSI-X interrupt vector (for platforms that support it) * @flags: LRC initialization flags @@ -1563,7 +1579,7 @@ err_lrc_finish: * upon failure. */ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size, u16 msix_vec, u32 flags) + void *replay_state, u32 ring_size, u16 msix_vec, u32 flags) { struct xe_lrc *lrc; int err; @@ -1572,7 +1588,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, if (!lrc) return ERR_PTR(-ENOMEM); - err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags); + err = xe_lrc_init(lrc, hwe, vm, replay_state, ring_size, msix_vec, flags); if (err) { kfree(lrc); return ERR_PTR(err); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index c3288625d0c7..a32472b92242 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -50,7 +50,7 @@ struct xe_lrc_snapshot { #define XE_LRC_CREATE_USER_CTX BIT(2) struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size, u16 msix_vec, u32 flags); + void *replay_state, u32 ring_size, u16 msix_vec, u32 flags); void xe_lrc_destroy(struct kref *ref); /** @@ -87,6 +87,7 @@ static inline size_t xe_lrc_ring_size(void) return SZ_16K; } +size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class); size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); u32 xe_lrc_regs_offset(struct xe_lrc *lrc);