]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/xe: Apply Wa_16023105232
authorVinay Belgaumkar <vinay.belgaumkar@intel.com>
Thu, 20 Mar 2025 17:51:23 +0000 (10:51 -0700)
committerJohn Harrison <John.C.Harrison@Intel.com>
Thu, 20 Mar 2025 22:59:03 +0000 (15:59 -0700)
The WA requires KMD to disable DOP clock gating during a semaphore
wait and also ensure that idle delay for every CS is lower than the
idle wait time in the PWRCTX_MAXCNT register. Default values for these
registers already comply with this restriction.

v2: Store timestamp_base in gt info and other comments (Daniele)
v3: Skip WA check for VF
v4: Review comments (Matt Roper)
v5: Cleanup the clock functions and use reg_field_get (Matt Roper)
v6: Fix checkpatch issue
v7: Fix CI issue

Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250320175123.3026754-1-vinay.belgaumkar@intel.com
drivers/gpu/drm/xe/regs/xe_engine_regs.h
drivers/gpu/drm/xe/xe_gt_clock.c
drivers/gpu/drm/xe/xe_gt_types.h
drivers/gpu/drm/xe/xe_hw_engine.c
drivers/gpu/drm/xe/xe_wa.c
drivers/gpu/drm/xe/xe_wa_oob.rules

index 659cf85fa3d67d5fb20476f24735a536fb63102f..da713634d6a0c40a67bf6feeb5b60bc491b8f4ae 100644 (file)
 #define RING_EXECLIST_STATUS_LO(base)          XE_REG((base) + 0x234)
 #define RING_EXECLIST_STATUS_HI(base)          XE_REG((base) + 0x234 + 4)
 
+#define RING_IDLEDLY(base)                     XE_REG((base) + 0x23c)
+#define   INHIBIT_SWITCH_UNTIL_PREEMPTED       REG_BIT(31)
+#define   IDLE_DELAY                           REG_GENMASK(20, 0)
+
 #define RING_CONTEXT_CONTROL(base)             XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
 #define          CTX_CTRL_PXP_ENABLE                   REG_BIT(10)
 #define          CTX_CTRL_OAC_CONTEXT_ENABLE           REG_BIT(8)
index fca38738e610c0a4e4627c8faaafb48fb51b7e96..4f011d1573c65979b9058686d9346b90364f526e 100644 (file)
 #include "xe_macros.h"
 #include "xe_mmio.h"
 
-static u32 get_crystal_clock_freq(u32 rpm_config_reg)
+#define f19_2_mhz      19200000
+#define f24_mhz                24000000
+#define f25_mhz                25000000
+#define f38_4_mhz      38400000
+#define ts_base_83     83333
+#define ts_base_52     52083
+#define ts_base_80     80000
+
+static void read_crystal_clock(struct xe_gt *gt, u32 rpm_config_reg, u32 *freq,
+                              u32 *timestamp_base)
 {
-       const u32 f19_2_mhz = 19200000;
-       const u32 f24_mhz = 24000000;
-       const u32 f25_mhz = 25000000;
-       const u32 f38_4_mhz = 38400000;
        u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK,
                                          rpm_config_reg);
 
        switch (crystal_clock) {
        case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
-               return f24_mhz;
+               *freq = f24_mhz;
+               *timestamp_base = ts_base_83;
+               return;
        case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
-               return f19_2_mhz;
+               *freq = f19_2_mhz;
+               *timestamp_base = ts_base_52;
+               return;
        case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
-               return f38_4_mhz;
+               *freq = f38_4_mhz;
+               *timestamp_base = ts_base_52;
+               return;
        case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
-               return f25_mhz;
+               *freq = f25_mhz;
+               *timestamp_base = ts_base_80;
+               return;
        default:
-               XE_WARN_ON("NOT_POSSIBLE");
-               return 0;
+               xe_gt_warn(gt, "Invalid crystal clock frequency: %u", crystal_clock);
+               *freq = 0;
+               *timestamp_base = 0;
+               return;
        }
 }
 
@@ -65,7 +80,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
                check_ctc_mode(gt);
 
        c0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
-       freq = get_crystal_clock_freq(c0);
+       read_crystal_clock(gt, c0, &freq, &gt->info.timestamp_base);
 
        /*
         * Now figure out how the command stream's timestamp
index e3cfb026ac88ed1a152b4d6afa326885e841bd43..7def0959da35d86f516eb8ae403568124ab1477f 100644 (file)
@@ -121,6 +121,8 @@ struct xe_gt {
                enum xe_gt_type type;
                /** @info.reference_clock: clock frequency */
                u32 reference_clock;
+               /** @info.timestamp_base: GT timestamp base */
+               u32 timestamp_base;
                /**
                 * @info.engine_mask: mask of engines present on GT. Some of
                 * them may be reserved in runtime and not available for user.
index 223b95de388cb8919a7d1ab52e1305be990672ad..8c05fd30b7df6178d9806bd84e7ec51e99bd7bfb 100644 (file)
@@ -8,7 +8,9 @@
 #include <linux/nospec.h>
 
 #include <drm/drm_managed.h>
+#include <drm/drm_print.h>
 #include <uapi/drm/xe_drm.h>
+#include <generated/xe_wa_oob.h>
 
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
@@ -21,6 +23,7 @@
 #include "xe_gsc.h"
 #include "xe_gt.h"
 #include "xe_gt_ccs_mode.h"
+#include "xe_gt_clock.h"
 #include "xe_gt_printk.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_topology.h"
@@ -564,6 +567,33 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
        xe_reg_whitelist_process_engine(hwe);
 }
 
+static void adjust_idledly(struct xe_hw_engine *hwe)
+{
+       struct xe_gt *gt = hwe->gt;
+       u32 idledly, maxcnt;
+       u32 idledly_units_ps = 8 * gt->info.timestamp_base;
+       u32 maxcnt_units_ns = 640;
+       bool inhibit_switch = 0;
+
+       if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_WA(gt, 16023105232)) {
+               idledly = xe_mmio_read32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base));
+               maxcnt = xe_mmio_read32(&gt->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base));
+
+               inhibit_switch = idledly & INHIBIT_SWITCH_UNTIL_PREEMPTED;
+               idledly = REG_FIELD_GET(IDLE_DELAY, idledly);
+               idledly = DIV_ROUND_CLOSEST(idledly * idledly_units_ps, 1000);
+               maxcnt = REG_FIELD_GET(IDLE_WAIT_TIME, maxcnt);
+               maxcnt *= maxcnt_units_ns;
+
+               if (xe_gt_WARN_ON(gt, idledly >= maxcnt || inhibit_switch)) {
+                       idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * maxcnt_units_ns),
+                                                   idledly_units_ps);
+                       idledly = DIV_ROUND_CLOSEST(idledly, 1000);
+                       xe_mmio_write32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base), idledly);
+               }
+       }
+}
+
 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
                          enum xe_hw_engine_id id)
 {
@@ -604,6 +634,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
        if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
                gt->usm.reserved_bcs_instance = hwe->instance;
 
+       /* Ensure IDLEDLY is lower than MAXCNT */
+       adjust_idledly(hwe);
+
        return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
 
 err_hwsp:
index a25afb757f7088c7f831867717b42982e9ea754c..24f644c0a67365e5362e83b2b575f1408382c163 100644 (file)
@@ -622,6 +622,12 @@ static const struct xe_rtp_entry_sr engine_was[] = {
                       FUNC(xe_rtp_match_first_render_or_compute)),
          XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
        },
+       { XE_RTP_NAME("16023105232"),
+         XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 3000), OR,
+                      GRAPHICS_VERSION_RANGE(2001, 3001)),
+         XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE,
+                            XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+       },
 };
 
 static const struct xe_rtp_entry_sr lrc_was[] = {
index e0c5fa460487899d81321e8edace24c5432defd3..0c738af24f7c53d7dc828b4ddc27e2edc36a1445 100644 (file)
@@ -53,3 +53,5 @@ no_media_l3   MEDIA_VERSION(3000)
                GRAPHICS_VERSION_RANGE(1270, 1274)
 1508761755     GRAPHICS_VERSION(1255)
                GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0)
+16023105232    GRAPHICS_VERSION_RANGE(2001, 3001)
+               MEDIA_VERSION_RANGE(1301, 3000)