From: Michal Wajdeczko Date: Mon, 12 Jan 2026 18:37:16 +0000 (+0100) Subject: drm/xe/mert: Improve handling of MERT CAT errors X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=def675cf3f107ba8da78ca0b8650997fdf667538;p=thirdparty%2Fkernel%2Flinux.git drm/xe/mert: Improve handling of MERT CAT errors All MERT catastrophic errors but VF's LMTT fault are serious, so we shouldn't limit our handling only to print debug messages. Change CATERR message to error level and then declare the device as wedged to match expectation from the design document. For the LMTT faults, add a note about adding tracking of this unexpected VF activity. While at it, rename register fields defnitions to match the BSpec. Also drop trailing include guard name from the regs.h file. BSpec: 74625 Signed-off-by: Michal Wajdeczko Cc: Lukasz Laguna Reviewed-by: Lukasz Laguna Link: https://patch.msgid.link/20260112183716.28700-1-michal.wajdeczko@intel.com --- diff --git a/drivers/gpu/drm/xe/regs/xe_mert_regs.h b/drivers/gpu/drm/xe/regs/xe_mert_regs.h index c345e11ceea8..99e5a26da657 100644 --- a/drivers/gpu/drm/xe/regs/xe_mert_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mert_regs.h @@ -11,11 +11,13 @@ #define MERT_LMEM_CFG XE_REG(0x1448b0) #define MERT_TLB_CT_INTR_ERR_ID_PORT XE_REG(0x145190) -#define MERT_TLB_CT_VFID_MASK REG_GENMASK(16, 9) -#define MERT_TLB_CT_ERROR_MASK REG_GENMASK(5, 0) -#define MERT_TLB_CT_LMTT_FAULT 0x05 +#define CATERR_VFID REG_GENMASK(16, 9) +#define CATERR_CODES REG_GENMASK(5, 0) +#define CATERR_NO_ERROR 0x00 +#define CATERR_UNMAPPED_GGTT 0x01 +#define CATERR_LMTT_FAULT 0x05 #define MERT_TLB_INV_DESC_A XE_REG(0x14cf7c) #define MERT_TLB_INV_DESC_A_VALID REG_BIT(0) -#endif /* _XE_MERT_REGS_H_ */ +#endif diff --git a/drivers/gpu/drm/xe/xe_mert.c b/drivers/gpu/drm/xe/xe_mert.c index fc027d2d7a5e..f637df95418b 100644 --- a/drivers/gpu/drm/xe/xe_mert.c +++ b/drivers/gpu/drm/xe/xe_mert.c @@ -9,6 +9,7 @@ #include "xe_device.h" #include "xe_mert.h" #include "xe_mmio.h" +#include "xe_sriov_printk.h" #include "xe_tile.h" /** @@ -55,6 +56,37 @@ int xe_mert_invalidate_lmtt(struct xe_device *xe) return 0; } +static void mert_handle_cat_error(struct xe_device *xe) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + u32 reg_val, vfid, code; + + reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT); + if (!reg_val) + return; + xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0); + + vfid = FIELD_GET(CATERR_VFID, reg_val); + code = FIELD_GET(CATERR_CODES, reg_val); + + switch (code) { + case CATERR_NO_ERROR: + break; + case CATERR_UNMAPPED_GGTT: + xe_sriov_err(xe, "MERT: CAT_ERR: Access to an unmapped GGTT!\n"); + xe_device_declare_wedged(xe); + break; + case CATERR_LMTT_FAULT: + xe_sriov_dbg(xe, "MERT: CAT_ERR: VF%u LMTT fault!\n", vfid); + /* XXX: track/report malicious VF activity */ + break; + default: + xe_sriov_err(xe, "MERT: Unexpected CAT_ERR code=%#x!\n", code); + xe_device_declare_wedged(xe); + break; + } +} + /** * xe_mert_irq_handler - Handler for MERT interrupts * @xe: the &xe_device @@ -68,20 +100,11 @@ void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl) struct xe_mert *mert = &tile->mert; unsigned long flags; u32 reg_val; - u8 err; if (!(master_ctl & SOC_H2DMEMINT_IRQ)) return; - reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT); - xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0); - - err = FIELD_GET(MERT_TLB_CT_ERROR_MASK, reg_val); - if (err == MERT_TLB_CT_LMTT_FAULT) - drm_dbg(&xe->drm, "MERT catastrophic error: LMTT fault (VF%u)\n", - FIELD_GET(MERT_TLB_CT_VFID_MASK, reg_val)); - else if (err) - drm_dbg(&xe->drm, "MERT catastrophic error: Unexpected fault (0x%x)\n", err); + mert_handle_cat_error(xe); spin_lock_irqsave(&mert->lock, flags); if (mert->tlb_inv_triggered) {