]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/xe: Avoid reading RMW registers in emit_wa_job
authorMichal Wajdeczko <michal.wajdeczko@intel.com>
Mon, 3 Mar 2025 17:35:20 +0000 (18:35 +0100)
committerMichal Wajdeczko <michal.wajdeczko@intel.com>
Wed, 12 Mar 2025 10:37:51 +0000 (11:37 +0100)
To allow VFs properly handle LRC WAs, we should postpone doing
all RMW register operations and let them be run by the engine
itself, since attempt to perform read registers from within the
driver will fail on the VF. Use MI_MATH and ALU for that.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: MichaƂ Winiarski <michal.winiarski@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250303173522.1822-4-michal.wajdeczko@intel.com
drivers/gpu/drm/xe/xe_gt.c

index 10a9e3c72b3604b0276f6e42985c9737ab36638f..8068b4bc0a09ca0ad4f104a7202bb72f2ef12639 100644 (file)
 
 #include <generated/xe_wa_oob.h>
 
+#include "instructions/xe_alu_commands.h"
 #include "instructions/xe_gfxpipe_commands.h"
 #include "instructions/xe_mi_commands.h"
+#include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "xe_assert.h"
 #include "xe_bb.h"
@@ -176,15 +178,6 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
        return 0;
 }
 
-/*
- * Convert back from encoded value to type-safe, only to be used when reg.mcr
- * is true
- */
-static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
-{
-       return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
-}
-
 static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 {
        struct xe_reg_sr *sr = &q->hwe->reg_lrc;
@@ -194,6 +187,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
        struct xe_bb *bb;
        struct dma_fence *fence;
        long timeout;
+       int count_rmw = 0;
        int count = 0;
 
        if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
@@ -206,30 +200,32 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
        if (IS_ERR(bb))
                return PTR_ERR(bb);
 
-       xa_for_each(&sr->xa, idx, entry)
-               ++count;
+       /* count RMW registers as those will be handled separately */
+       xa_for_each(&sr->xa, idx, entry) {
+               if (entry->reg.masked || entry->clr_bits == ~0)
+                       ++count;
+               else
+                       ++count_rmw;
+       }
 
-       if (count) {
+       if (count || count_rmw)
                xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
 
+       if (count) {
+               /* emit single LRI with all non RMW regs */
+
                bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
 
                xa_for_each(&sr->xa, idx, entry) {
                        struct xe_reg reg = entry->reg;
-                       struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
                        u32 val;
 
-                       /*
-                        * Skip reading the register if it's not really needed
-                        */
                        if (reg.masked)
                                val = entry->clr_bits << 16;
-                       else if (entry->clr_bits + 1)
-                               val = (reg.mcr ?
-                                      xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
-                                      xe_mmio_read32(&gt->mmio, reg)) & (~entry->clr_bits);
-                       else
+                       else if (entry->clr_bits == ~0)
                                val = 0;
+                       else
+                               continue;
 
                        val |= entry->set_bits;
 
@@ -239,6 +235,52 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
                }
        }
 
+       if (count_rmw) {
+               /* emit MI_MATH for each RMW reg */
+
+               xa_for_each(&sr->xa, idx, entry) {
+                       if (entry->reg.masked || entry->clr_bits == ~0)
+                               continue;
+
+                       bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
+                       bb->cs[bb->len++] = entry->reg.addr;
+                       bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
+
+                       bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
+                                           MI_LRI_LRM_CS_MMIO;
+                       bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr;
+                       bb->cs[bb->len++] = entry->clr_bits;
+                       bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr;
+                       bb->cs[bb->len++] = entry->set_bits;
+
+                       bb->cs[bb->len++] = MI_MATH(8);
+                       bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0);
+                       bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1);
+                       bb->cs[bb->len++] = CS_ALU_INSTR_AND;
+                       bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU);
+                       bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0);
+                       bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2);
+                       bb->cs[bb->len++] = CS_ALU_INSTR_OR;
+                       bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU);
+
+                       bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO;
+                       bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
+                       bb->cs[bb->len++] = entry->reg.addr;
+
+                       xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
+                                 entry->reg.addr, entry->clr_bits, entry->set_bits);
+               }
+
+               /* reset used GPR */
+               bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO;
+               bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
+               bb->cs[bb->len++] = 0;
+               bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr;
+               bb->cs[bb->len++] = 0;
+               bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr;
+               bb->cs[bb->len++] = 0;
+       }
+
        xe_lrc_emit_hwe_state_instructions(q, bb);
 
        job = xe_bb_create_job(q, bb);