From 1b5d39e6672fdee158c3306f5cb2df8975c77e5a Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:02:58 -0800 Subject: [PATCH] drm/xe/multi_queue: Set QUEUE_DRAIN_MODE for Multi Queue batches To properly support soft light restore between batches being arbitrated at the CFEG, PIPE_CONTROL instructions have a new bit in the first DW, QUEUE_DRAIN_MODE. When set, this indicates to the CFEG that it should only drain the current queue. Additionally we no longer want to set the CS_STALL bit for these multi queue queues as this causes the entire pipeline to stall waiting for completion of the prior batch, preventing this soft light restore from occurring between queues in a queue group. v4: Assert !multi_queue where applicable (Matt Roper) Bspec: 56551 Signed-off-by: Stuart Summers Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matt Roper Link: https://patch.msgid.link/20251211010249.1647839-29-niranjana.vishwanathapura@intel.com --- .../gpu/drm/xe/instructions/xe_gpu_commands.h | 1 + drivers/gpu/drm/xe/xe_ring_ops.c | 64 ++++++++++++------- 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h index 5d41ca2974471..885fcf211e6d0 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h @@ -47,6 +47,7 @@ #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL0_QUEUE_DRAIN_MODE BIT(12) #define PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE BIT(10) /* gen12 */ #define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */ diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index ac0c6dcffe156..96a14fb745071 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -12,7 +12,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" -#include "xe_exec_queue_types.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -135,12 +135,11 @@ emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset, return i; } -static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, - int i) +static int emit_pipe_invalidate(struct xe_exec_queue *q, u32 mask_flags, + bool invalidate_tlb, u32 *dw, int i) { u32 flags0 = 0; - u32 flags1 = PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | + u32 flags1 = PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_VF_CACHE_INVALIDATE | @@ -152,6 +151,11 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, if (invalidate_tlb) flags1 |= PIPE_CONTROL_TLB_INVALIDATE; + if (xe_exec_queue_is_multi_queue(q)) + flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE; + else + flags1 |= PIPE_CONTROL_CS_STALL; + flags1 &= ~mask_flags; if (flags1 & PIPE_CONTROL_VF_CACHE_INVALIDATE) @@ -175,37 +179,47 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) { - struct xe_gt *gt = job->q->gt; + struct xe_exec_queue *q = job->q; + struct xe_gt *gt = q->gt; bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); - u32 flags; + u32 flags0, flags1; if (XE_GT_WA(gt, 14016712196)) i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); - flags = (PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | + flags0 = PIPE_CONTROL0_HDC_PIPELINE_FLUSH; + flags1 = (PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); if (XE_GT_WA(gt, 1409600907)) - flags |= PIPE_CONTROL_DEPTH_STALL; + flags1 |= PIPE_CONTROL_DEPTH_STALL; if (lacks_render) - flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + flags1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS; else if (job->q->class == XE_ENGINE_CLASS_COMPUTE) - flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + flags1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + + if (xe_exec_queue_is_multi_queue(q)) + flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE; + else + flags1 |= PIPE_CONTROL_CS_STALL; - return emit_pipe_control(dw, i, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0, 0); + return emit_pipe_control(dw, i, flags0, flags1, 0, 0); } -static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i) +static int emit_pipe_control_to_ring_end(struct xe_exec_queue *q, u32 *dw, int i) { + struct xe_hw_engine *hwe = q->hwe; + if (hwe->class != XE_ENGINE_CLASS_RENDER) return i; + xe_gt_assert(q->gt, !xe_exec_queue_is_multi_queue(q)); + if (XE_GT_WA(hwe->gt, 16020292621)) i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC, RING_NOPID(hwe->mmio_base).addr, 0); @@ -213,16 +227,20 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int return i; } -static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, - int i) +static int emit_pipe_imm_ggtt(struct xe_exec_queue *q, u32 addr, u32 value, + bool stall_only, u32 *dw, int i) { - u32 flags = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE; + u32 flags0 = 0, flags1 = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE; if (!stall_only) - flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags1 |= PIPE_CONTROL_FLUSH_ENABLE; + + if (xe_exec_queue_is_multi_queue(q)) + flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE; + else + flags1 |= PIPE_CONTROL_CS_STALL; - return emit_pipe_control(dw, i, 0, flags, addr, value); + return emit_pipe_control(dw, i, flags0, flags1, addr, value); } static u32 get_ppgtt_flag(struct xe_sched_job *job) @@ -371,7 +389,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ - i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i); + i = emit_pipe_invalidate(job->q, mask_flags, job->ring_ops_flush_tlb, dw, i); /* hsdes: 1809175790 */ if (has_aux_ccs(xe)) @@ -391,11 +409,11 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, job->user_fence.value, dw, i); - i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i); + i = emit_pipe_imm_ggtt(job->q, xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i); i = emit_user_interrupt(dw, i); - i = emit_pipe_control_to_ring_end(job->q->hwe, dw, i); + i = emit_pipe_control_to_ring_end(job->q, dw, i); xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW); -- 2.47.3