]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/xe/mert: Improve handling of MERT CAT errors
authorMichal Wajdeczko <michal.wajdeczko@intel.com>
Mon, 12 Jan 2026 18:37:16 +0000 (19:37 +0100)
committerMichal Wajdeczko <michal.wajdeczko@intel.com>
Wed, 14 Jan 2026 15:02:50 +0000 (16:02 +0100)
All MERT catastrophic errors but VF's LMTT fault are serious, so
we shouldn't limit our handling only to print debug messages.

Change CATERR message to error level and then declare the device
as wedged to match expectation from the design document. For the
LMTT faults, add a note about adding tracking of this unexpected
VF activity.

While at it, rename register fields defnitions to match the BSpec.
Also drop trailing include guard name from the regs.h file.

BSpec: 74625
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lukasz Laguna <lukasz.laguna@intel.com>
Reviewed-by: Lukasz Laguna <lukasz.laguna@intel.com>
Link: https://patch.msgid.link/20260112183716.28700-1-michal.wajdeczko@intel.com
drivers/gpu/drm/xe/regs/xe_mert_regs.h
drivers/gpu/drm/xe/xe_mert.c

index c345e11ceea84f02d7d847043a17bf01fdba2ce1..99e5a26da65725b47aa2277a53109f11f98888ad 100644 (file)
 #define MERT_LMEM_CFG                          XE_REG(0x1448b0)
 
 #define MERT_TLB_CT_INTR_ERR_ID_PORT           XE_REG(0x145190)
-#define   MERT_TLB_CT_VFID_MASK                        REG_GENMASK(16, 9)
-#define   MERT_TLB_CT_ERROR_MASK               REG_GENMASK(5, 0)
-#define     MERT_TLB_CT_LMTT_FAULT             0x05
+#define   CATERR_VFID                          REG_GENMASK(16, 9)
+#define   CATERR_CODES                         REG_GENMASK(5, 0)
+#define     CATERR_NO_ERROR                    0x00
+#define     CATERR_UNMAPPED_GGTT               0x01
+#define     CATERR_LMTT_FAULT                  0x05
 
 #define MERT_TLB_INV_DESC_A                    XE_REG(0x14cf7c)
 #define   MERT_TLB_INV_DESC_A_VALID            REG_BIT(0)
 
-#endif /* _XE_MERT_REGS_H_ */
+#endif
index fc027d2d7a5eb4499fb13ccb0485ba3c382b6602..f637df95418b031ef17d3c486f9bcdb02e91c64e 100644 (file)
@@ -9,6 +9,7 @@
 #include "xe_device.h"
 #include "xe_mert.h"
 #include "xe_mmio.h"
+#include "xe_sriov_printk.h"
 #include "xe_tile.h"
 
 /**
@@ -55,6 +56,37 @@ int xe_mert_invalidate_lmtt(struct xe_device *xe)
        return 0;
 }
 
+static void mert_handle_cat_error(struct xe_device *xe)
+{
+       struct xe_tile *tile = xe_device_get_root_tile(xe);
+       u32 reg_val, vfid, code;
+
+       reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT);
+       if (!reg_val)
+               return;
+       xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0);
+
+       vfid = FIELD_GET(CATERR_VFID, reg_val);
+       code = FIELD_GET(CATERR_CODES, reg_val);
+
+       switch (code) {
+       case CATERR_NO_ERROR:
+               break;
+       case CATERR_UNMAPPED_GGTT:
+               xe_sriov_err(xe, "MERT: CAT_ERR: Access to an unmapped GGTT!\n");
+               xe_device_declare_wedged(xe);
+               break;
+       case CATERR_LMTT_FAULT:
+               xe_sriov_dbg(xe, "MERT: CAT_ERR: VF%u LMTT fault!\n", vfid);
+               /* XXX: track/report malicious VF activity */
+               break;
+       default:
+               xe_sriov_err(xe, "MERT: Unexpected CAT_ERR code=%#x!\n", code);
+               xe_device_declare_wedged(xe);
+               break;
+       }
+}
+
 /**
  * xe_mert_irq_handler - Handler for MERT interrupts
  * @xe: the &xe_device
@@ -68,20 +100,11 @@ void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl)
        struct xe_mert *mert = &tile->mert;
        unsigned long flags;
        u32 reg_val;
-       u8 err;
 
        if (!(master_ctl & SOC_H2DMEMINT_IRQ))
                return;
 
-       reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT);
-       xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0);
-
-       err = FIELD_GET(MERT_TLB_CT_ERROR_MASK, reg_val);
-       if (err == MERT_TLB_CT_LMTT_FAULT)
-               drm_dbg(&xe->drm, "MERT catastrophic error: LMTT fault (VF%u)\n",
-                       FIELD_GET(MERT_TLB_CT_VFID_MASK, reg_val));
-       else if (err)
-               drm_dbg(&xe->drm, "MERT catastrophic error: Unexpected fault (0x%x)\n", err);
+       mert_handle_cat_error(xe);
 
        spin_lock_irqsave(&mert->lock, flags);
        if (mert->tlb_inv_triggered) {