From: Satyanarayana K V P Date: Tue, 18 Nov 2025 12:07:45 +0000 (+0000) Subject: drm/xe/vf: Shadow buffer management for CCS read/write operations X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fa18290bf0723b02bfa8d30d2e14722f0d096c2c;p=thirdparty%2Flinux.git drm/xe/vf: Shadow buffer management for CCS read/write operations CCS copy command consist of 5-dword sequence. If vCPU halts during save/restore operations while these sequences are being programmed, incomplete writes can cause page faults during IGPU CCS metadata saving. Use shadow buffer management to prevent partial write issues during CCS operations. Signed-off-by: Satyanarayana K V P Suggested-by: Matthew Brost Cc: Michal Wajdeczko Cc: Matthew Auld Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20251118120745.3460172-3-satyanarayana.k.v.p@intel.com --- diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 2184af413b912..f3b66b55acfb3 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -34,6 +34,7 @@ #include "xe_res_cursor.h" #include "xe_sa.h" #include "xe_sched_job.h" +#include "xe_sriov_vf_ccs.h" #include "xe_sync.h" #include "xe_trace_bo.h" #include "xe_validation.h" @@ -1103,12 +1104,16 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, u32 batch_size, batch_size_allocated; struct xe_device *xe = gt_to_xe(gt); struct xe_res_cursor src_it, ccs_it; + struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_sa_manager *bb_pool; u64 size = xe_bo_size(src_bo); struct xe_bb *bb = NULL; u64 src_L0, src_L0_ofs; u32 src_L0_pt; int err; + ctx = &xe->sriov.vf.ccs.contexts[read_write]; + xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it); xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo), @@ -1141,11 +1146,15 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size -= src_L0; } + bb_pool = ctx->mem.ccs_bb_pool; + guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); + xe_sa_bo_swap_shadow(bb_pool); + bb = xe_bb_ccs_new(gt, batch_size, read_write); if (IS_ERR(bb)) { drm_err(&xe->drm, "BB allocation failed.\n"); err = PTR_ERR(bb); - goto err_ret; + return err; } batch_size_allocated = batch_size; @@ -1194,10 +1203,52 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, xe_assert(xe, (batch_size_allocated == bb->len)); src_bo->bb_ccs[read_write] = bb; + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); + xe_sa_bo_sync_shadow(bb->bo); return 0; +} -err_ret: - return err; +/** + * xe_migrate_ccs_rw_copy_clear() - Clear the CCS read/write batch buffer + * content. + * @src_bo: The buffer object @src is currently bound to. + * @read_write : Creates BB commands for CCS read/write. + * + * Directly clearing the BB lacks atomicity and can lead to undefined + * behavior if the vCPU is halted mid-operation during the clearing + * process. To avoid this issue, we use a shadow buffer object approach. + * + * First swap the SA BO address with the shadow BO, perform the clearing + * operation on the BB, update the shadow BO in the ring buffer, then + * sync the shadow and the actual buffer to maintain consistency. + * + * Returns: None. + */ +void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, + enum xe_sriov_vf_ccs_rw_ctxs read_write) +{ + struct xe_bb *bb = src_bo->bb_ccs[read_write]; + struct xe_device *xe = xe_bo_device(src_bo); + struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_sa_manager *bb_pool; + u32 *cs; + + xe_assert(xe, IS_SRIOV_VF(xe)); + + ctx = &xe->sriov.vf.ccs.contexts[read_write]; + bb_pool = ctx->mem.ccs_bb_pool; + + guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); + xe_sa_bo_swap_shadow(bb_pool); + + cs = xe_sa_bo_cpu_addr(bb->bo); + memset(cs, MI_NOOP, bb->len * sizeof(u32)); + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); + + xe_sa_bo_sync_shadow(bb->bo); + + xe_bb_free(bb, NULL); + src_bo->bb_ccs[read_write] = NULL; } /** diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 260e298e5dd7f..464c05dde1baf 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -134,6 +134,9 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, struct xe_bo *src_bo, enum xe_sriov_vf_ccs_rw_ctxs read_write); +void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, + enum xe_sriov_vf_ccs_rw_ctxs read_write); + struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate); struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate); struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset, diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index 9959d619addcf..33f4238604e11 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -150,7 +150,8 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); - sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16); + sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16, + XE_SA_BO_MANAGER_FLAG_SHADOW); if (IS_ERR(sa_manager)) { xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", @@ -384,6 +385,18 @@ err_ret: return err; } +#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32)) +void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx) +{ + u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); + struct xe_device *xe = gt_to_xe(ctx->mig_q->gt); + + xe_device_wmb(xe); + xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr); + xe_device_wmb(xe); +} + /** * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO. * @bo: the &buffer object to which batch buffer commands will be added. @@ -444,9 +457,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) if (!bb) continue; - memset(bb->cs, MI_NOOP, bb->len * sizeof(u32)); - xe_bb_free(bb, NULL); - bo->bb_ccs[ctx_id] = NULL; + xe_migrate_ccs_rw_copy_clear(bo, ctx_id); } return 0; } diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h index f8ca6efce9ee8..00e58b36c510a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h @@ -20,6 +20,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo); int xe_sriov_vf_ccs_register_context(struct xe_device *xe); void xe_sriov_vf_ccs_rebase(struct xe_device *xe); void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p); +void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx); static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe) {