1 // SPDX-License-Identifier: MIT
3 * Copyright © 2014-2018 Intel Corporation
8 #include "intel_context.h"
9 #include "intel_engine_pm.h"
10 #include "intel_engine_regs.h"
11 #include "intel_gpu_commands.h"
13 #include "intel_gt_mcr.h"
14 #include "intel_gt_regs.h"
15 #include "intel_ring.h"
16 #include "intel_workarounds.h"
19 * DOC: Hardware workarounds
21 * Hardware workarounds are register programming documented to be executed in
22 * the driver that fall outside of the normal programming sequences for a
23 * platform. There are some basic categories of workarounds, depending on
24 * how/when they are applied:
26 * - Context workarounds: workarounds that touch registers that are
27 * saved/restored to/from the HW context image. The list is emitted (via Load
28 * Register Immediate commands) once when initializing the device and saved in
29 * the default context. That default context is then used on every context
30 * creation to have a "primed golden context", i.e. a context image that
31 * already contains the changes needed to all the registers.
33 * Context workarounds should be implemented in the \*_ctx_workarounds_init()
34 * variants respective to the targeted platforms.
36 * - Engine workarounds: the list of these WAs is applied whenever the specific
37 * engine is reset. It's also possible that a set of engine classes share a
38 * common power domain and they are reset together. This happens on some
39 * platforms with render and compute engines. In this case (at least) one of
40 * them need to keeep the workaround programming: the approach taken in the
41 * driver is to tie those workarounds to the first compute/render engine that
42 * is registered. When executing with GuC submission, engine resets are
43 * outside of kernel driver control, hence the list of registers involved in
44 * written once, on engine initialization, and then passed to GuC, that
45 * saves/restores their values before/after the reset takes place. See
46 * ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference.
48 * Workarounds for registers specific to RCS and CCS should be implemented in
49 * rcs_engine_wa_init() and ccs_engine_wa_init(), respectively; those for
50 * registers belonging to BCS, VCS or VECS should be implemented in
51 * xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
52 * engine's MMIO range but that are part of of the common RCS/CCS reset domain
53 * should be implemented in general_render_compute_wa_init().
55 * - GT workarounds: the list of these WAs is applied whenever these registers
56 * revert to their default values: on GPU reset, suspend/resume [1]_, etc.
58 * GT workarounds should be implemented in the \*_gt_workarounds_init()
59 * variants respective to the targeted platforms.
61 * - Register whitelist: some workarounds need to be implemented in userspace,
62 * but need to touch privileged registers. The whitelist in the kernel
63 * instructs the hardware to allow the access to happen. From the kernel side,
64 * this is just a special case of a MMIO workaround (as we write the list of
65 * these to/be-whitelisted registers to some special HW registers).
67 * Register whitelisting should be done in the \*_whitelist_build() variants
68 * respective to the targeted platforms.
70 * - Workaround batchbuffers: buffers that get executed automatically by the
71 * hardware on every HW context restore. These buffers are created and
72 * programmed in the default context so the hardware always go through those
73 * programming sequences when switching contexts. The support for workaround
74 * batchbuffers is enabled these hardware mechanisms:
76 * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
77 * context, pointing the hardware to jump to that location when that offset
78 * is reached in the context restore. Workaround batchbuffer in the driver
79 * currently uses this mechanism for all platforms.
81 * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
82 * pointing the hardware to a buffer to continue executing after the
83 * engine registers are restored in a context restore sequence. This is
84 * currently not used in the driver.
86 * - Other: There are WAs that, due to their nature, cannot be applied from a
87 * central place. Those are peppered around the rest of the code, as needed.
88 * Workarounds related to the display IP are the main example.
90 * .. [1] Technically, some registers are powercontext saved & restored, so they
91 * survive a suspend/resume. In practice, writing them again is not too
92 * costly and simplifies things, so it's the approach taken in the driver.
95 static void wa_init_start(struct i915_wa_list
*wal
, struct intel_gt
*gt
,
96 const char *name
, const char *engine_name
)
100 wal
->engine_name
= engine_name
;
103 #define WA_LIST_CHUNK (1 << 4)
105 static void wa_init_finish(struct i915_wa_list
*wal
)
107 /* Trim unused entries. */
108 if (!IS_ALIGNED(wal
->count
, WA_LIST_CHUNK
)) {
109 struct i915_wa
*list
= kmemdup(wal
->list
,
110 wal
->count
* sizeof(*list
),
122 drm_dbg(&wal
->gt
->i915
->drm
, "Initialized %u %s workarounds on %s\n",
123 wal
->wa_count
, wal
->name
, wal
->engine_name
);
126 static void _wa_add(struct i915_wa_list
*wal
, const struct i915_wa
*wa
)
128 unsigned int addr
= i915_mmio_reg_offset(wa
->reg
);
129 struct drm_i915_private
*i915
= wal
->gt
->i915
;
130 unsigned int start
= 0, end
= wal
->count
;
131 const unsigned int grow
= WA_LIST_CHUNK
;
134 GEM_BUG_ON(!is_power_of_2(grow
));
136 if (IS_ALIGNED(wal
->count
, grow
)) { /* Either uninitialized or full. */
137 struct i915_wa
*list
;
139 list
= kmalloc_array(ALIGN(wal
->count
+ 1, grow
), sizeof(*wa
),
142 drm_err(&i915
->drm
, "No space for workaround init!\n");
147 memcpy(list
, wal
->list
, sizeof(*wa
) * wal
->count
);
154 while (start
< end
) {
155 unsigned int mid
= start
+ (end
- start
) / 2;
157 if (i915_mmio_reg_offset(wal
->list
[mid
].reg
) < addr
) {
159 } else if (i915_mmio_reg_offset(wal
->list
[mid
].reg
) > addr
) {
162 wa_
= &wal
->list
[mid
];
164 if ((wa
->clr
| wa_
->clr
) && !(wa
->clr
& ~wa_
->clr
)) {
166 "Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
167 i915_mmio_reg_offset(wa_
->reg
),
170 wa_
->set
&= ~wa
->clr
;
176 wa_
->read
|= wa
->read
;
182 wa_
= &wal
->list
[wal
->count
++];
185 while (wa_
-- > wal
->list
) {
186 GEM_BUG_ON(i915_mmio_reg_offset(wa_
[0].reg
) ==
187 i915_mmio_reg_offset(wa_
[1].reg
));
188 if (i915_mmio_reg_offset(wa_
[1].reg
) >
189 i915_mmio_reg_offset(wa_
[0].reg
))
192 swap(wa_
[1], wa_
[0]);
196 static void wa_add(struct i915_wa_list
*wal
, i915_reg_t reg
,
197 u32 clear
, u32 set
, u32 read_mask
, bool masked_reg
)
199 struct i915_wa wa
= {
204 .masked_reg
= masked_reg
,
210 static void wa_mcr_add(struct i915_wa_list
*wal
, i915_mcr_reg_t reg
,
211 u32 clear
, u32 set
, u32 read_mask
, bool masked_reg
)
213 struct i915_wa wa
= {
218 .masked_reg
= masked_reg
,
226 wa_write_clr_set(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 clear
, u32 set
)
228 wa_add(wal
, reg
, clear
, set
, clear
, false);
232 wa_mcr_write_clr_set(struct i915_wa_list
*wal
, i915_mcr_reg_t reg
, u32 clear
, u32 set
)
234 wa_mcr_add(wal
, reg
, clear
, set
, clear
, false);
238 wa_write(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 set
)
240 wa_write_clr_set(wal
, reg
, ~0, set
);
244 wa_mcr_write(struct i915_wa_list
*wal
, i915_mcr_reg_t reg
, u32 set
)
246 wa_mcr_write_clr_set(wal
, reg
, ~0, set
);
250 wa_write_or(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 set
)
252 wa_write_clr_set(wal
, reg
, set
, set
);
256 wa_mcr_write_or(struct i915_wa_list
*wal
, i915_mcr_reg_t reg
, u32 set
)
258 wa_mcr_write_clr_set(wal
, reg
, set
, set
);
262 wa_write_clr(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 clr
)
264 wa_write_clr_set(wal
, reg
, clr
, 0);
268 wa_mcr_write_clr(struct i915_wa_list
*wal
, i915_mcr_reg_t reg
, u32 clr
)
270 wa_mcr_write_clr_set(wal
, reg
, clr
, 0);
274 * WA operations on "masked register". A masked register has the upper 16 bits
275 * documented as "masked" in b-spec. Its purpose is to allow writing to just a
276 * portion of the register without a rmw: you simply write in the upper 16 bits
277 * the mask of bits you are going to modify.
279 * The wa_masked_* family of functions already does the necessary operations to
280 * calculate the mask based on the parameters passed, so user only has to
281 * provide the lower 16 bits of that register.
285 wa_masked_en(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 val
)
287 wa_add(wal
, reg
, 0, _MASKED_BIT_ENABLE(val
), val
, true);
291 wa_mcr_masked_en(struct i915_wa_list
*wal
, i915_mcr_reg_t reg
, u32 val
)
293 wa_mcr_add(wal
, reg
, 0, _MASKED_BIT_ENABLE(val
), val
, true);
297 wa_masked_dis(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 val
)
299 wa_add(wal
, reg
, 0, _MASKED_BIT_DISABLE(val
), val
, true);
303 wa_mcr_masked_dis(struct i915_wa_list
*wal
, i915_mcr_reg_t reg
, u32 val
)
305 wa_mcr_add(wal
, reg
, 0, _MASKED_BIT_DISABLE(val
), val
, true);
309 wa_masked_field_set(struct i915_wa_list
*wal
, i915_reg_t reg
,
312 wa_add(wal
, reg
, 0, _MASKED_FIELD(mask
, val
), mask
, true);
316 wa_mcr_masked_field_set(struct i915_wa_list
*wal
, i915_mcr_reg_t reg
,
319 wa_mcr_add(wal
, reg
, 0, _MASKED_FIELD(mask
, val
), mask
, true);
322 static void gen6_ctx_workarounds_init(struct intel_engine_cs
*engine
,
323 struct i915_wa_list
*wal
)
325 wa_masked_en(wal
, INSTPM
, INSTPM_FORCE_ORDERING
);
328 static void gen7_ctx_workarounds_init(struct intel_engine_cs
*engine
,
329 struct i915_wa_list
*wal
)
331 wa_masked_en(wal
, INSTPM
, INSTPM_FORCE_ORDERING
);
334 static void gen8_ctx_workarounds_init(struct intel_engine_cs
*engine
,
335 struct i915_wa_list
*wal
)
337 wa_masked_en(wal
, INSTPM
, INSTPM_FORCE_ORDERING
);
339 /* WaDisableAsyncFlipPerfMode:bdw,chv */
340 wa_masked_en(wal
, RING_MI_MODE(RENDER_RING_BASE
), ASYNC_FLIP_PERF_DISABLE
);
342 /* WaDisablePartialInstShootdown:bdw,chv */
343 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN
,
344 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
);
346 /* Use Force Non-Coherent whenever executing a 3D context. This is a
347 * workaround for a possible hang in the unlikely event a TLB
348 * invalidation occurs during a PSD flush.
350 /* WaForceEnableNonCoherent:bdw,chv */
351 /* WaHdcDisableFetchWhenMasked:bdw,chv */
352 wa_masked_en(wal
, HDC_CHICKEN0
,
353 HDC_DONOT_FETCH_MEM_WHEN_MASKED
|
354 HDC_FORCE_NON_COHERENT
);
356 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
357 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
358 * polygons in the same 8x4 pixel/sample area to be processed without
359 * stalling waiting for the earlier ones to write to Hierarchical Z
362 * This optimization is off by default for BDW and CHV; turn it on.
364 wa_masked_dis(wal
, CACHE_MODE_0_GEN7
, HIZ_RAW_STALL_OPT_DISABLE
);
366 /* Wa4x4STCOptimizationDisable:bdw,chv */
367 wa_masked_en(wal
, CACHE_MODE_1
, GEN8_4x4_STC_OPTIMIZATION_DISABLE
);
370 * BSpec recommends 8x4 when MSAA is used,
371 * however in practice 16x4 seems fastest.
373 * Note that PS/WM thread counts depend on the WIZ hashing
374 * disable bit, which we don't touch here, but it's good
375 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
377 wa_masked_field_set(wal
, GEN7_GT_MODE
,
378 GEN6_WIZ_HASHING_MASK
,
379 GEN6_WIZ_HASHING_16x4
);
382 static void bdw_ctx_workarounds_init(struct intel_engine_cs
*engine
,
383 struct i915_wa_list
*wal
)
385 struct drm_i915_private
*i915
= engine
->i915
;
387 gen8_ctx_workarounds_init(engine
, wal
);
389 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
390 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN
, STALL_DOP_GATING_DISABLE
);
392 /* WaDisableDopClockGating:bdw
394 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
395 * to disable EUTC clock gating.
397 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN2
,
398 DOP_CLOCK_GATING_DISABLE
);
400 wa_mcr_masked_en(wal
, GEN8_HALF_SLICE_CHICKEN3
,
401 GEN8_SAMPLER_POWER_BYPASS_DIS
);
403 wa_masked_en(wal
, HDC_CHICKEN0
,
404 /* WaForceContextSaveRestoreNonCoherent:bdw */
405 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT
|
406 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
407 (IS_BDW_GT3(i915
) ? HDC_FENCE_DEST_SLM_DISABLE
: 0));
410 static void chv_ctx_workarounds_init(struct intel_engine_cs
*engine
,
411 struct i915_wa_list
*wal
)
413 gen8_ctx_workarounds_init(engine
, wal
);
415 /* WaDisableThreadStallDopClockGating:chv */
416 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN
, STALL_DOP_GATING_DISABLE
);
418 /* Improve HiZ throughput on CHV. */
419 wa_masked_en(wal
, HIZ_CHICKEN
, CHV_HZ_8X8_MODE_IN_1X
);
422 static void gen9_ctx_workarounds_init(struct intel_engine_cs
*engine
,
423 struct i915_wa_list
*wal
)
425 struct drm_i915_private
*i915
= engine
->i915
;
428 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
430 * Must match Display Engine. See
431 * WaCompressedResourceDisplayNewHashMode.
433 wa_masked_en(wal
, COMMON_SLICE_CHICKEN2
,
434 GEN9_PBE_COMPRESSED_HASH_SELECTION
);
435 wa_mcr_masked_en(wal
, GEN9_HALF_SLICE_CHICKEN7
,
436 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR
);
439 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
440 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
441 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN
,
442 FLOW_CONTROL_ENABLE
|
443 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
);
445 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
446 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
447 wa_mcr_masked_en(wal
, GEN9_HALF_SLICE_CHICKEN7
,
448 GEN9_ENABLE_YV12_BUGFIX
|
449 GEN9_ENABLE_GPGPU_PREEMPTION
);
451 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
452 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
453 wa_masked_en(wal
, CACHE_MODE_1
,
454 GEN8_4x4_STC_OPTIMIZATION_DISABLE
|
455 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE
);
457 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
458 wa_mcr_masked_dis(wal
, GEN9_HALF_SLICE_CHICKEN5
,
459 GEN9_CCS_TLB_PREFETCH_ENABLE
);
461 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
462 wa_masked_en(wal
, HDC_CHICKEN0
,
463 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT
|
464 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE
);
466 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
467 * both tied to WaForceContextSaveRestoreNonCoherent
468 * in some hsds for skl. We keep the tie for all gen9. The
469 * documentation is a bit hazy and so we want to get common behaviour,
470 * even though there is no clear evidence we would need both on kbl/bxt.
471 * This area has been source of system hangs so we play it safe
472 * and mimic the skl regardless of what bspec says.
474 * Use Force Non-Coherent whenever executing a 3D context. This
475 * is a workaround for a possible hang in the unlikely event
476 * a TLB invalidation occurs during a PSD flush.
479 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
480 wa_masked_en(wal
, HDC_CHICKEN0
,
481 HDC_FORCE_NON_COHERENT
);
483 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
484 if (IS_SKYLAKE(i915
) ||
486 IS_COFFEELAKE(i915
) ||
488 wa_mcr_masked_en(wal
, GEN8_HALF_SLICE_CHICKEN3
,
489 GEN8_SAMPLER_POWER_BYPASS_DIS
);
491 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
492 wa_mcr_masked_en(wal
, HALF_SLICE_CHICKEN2
, GEN8_ST_PO_DISABLE
);
495 * Supporting preemption with fine-granularity requires changes in the
496 * batch buffer programming. Since we can't break old userspace, we
497 * need to set our default preemption level to safe value. Userspace is
498 * still able to use more fine-grained preemption levels, since in
499 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
500 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
501 * not real HW workarounds, but merely a way to start using preemption
502 * while maintaining old contract with userspace.
505 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
506 wa_masked_dis(wal
, GEN8_CS_CHICKEN1
, GEN9_PREEMPT_3D_OBJECT_LEVEL
);
508 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
509 wa_masked_field_set(wal
, GEN8_CS_CHICKEN1
,
510 GEN9_PREEMPT_GPGPU_LEVEL_MASK
,
511 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL
);
513 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
514 if (IS_GEN9_LP(i915
))
515 wa_masked_en(wal
, GEN9_WM_CHICKEN3
, GEN9_FACTOR_IN_CLR_VAL_HIZ
);
518 static void skl_tune_iz_hashing(struct intel_engine_cs
*engine
,
519 struct i915_wa_list
*wal
)
521 struct intel_gt
*gt
= engine
->gt
;
522 u8 vals
[3] = { 0, 0, 0 };
525 for (i
= 0; i
< 3; i
++) {
529 * Only consider slices where one, and only one, subslice has 7
532 if (!is_power_of_2(gt
->info
.sseu
.subslice_7eu
[i
]))
536 * subslice_7eu[i] != 0 (because of the check above) and
537 * ss_max == 4 (maximum number of subslices possible per slice)
541 ss
= ffs(gt
->info
.sseu
.subslice_7eu
[i
]) - 1;
545 if (vals
[0] == 0 && vals
[1] == 0 && vals
[2] == 0)
548 /* Tune IZ hashing. See intel_device_info_runtime_init() */
549 wa_masked_field_set(wal
, GEN7_GT_MODE
,
550 GEN9_IZ_HASHING_MASK(2) |
551 GEN9_IZ_HASHING_MASK(1) |
552 GEN9_IZ_HASHING_MASK(0),
553 GEN9_IZ_HASHING(2, vals
[2]) |
554 GEN9_IZ_HASHING(1, vals
[1]) |
555 GEN9_IZ_HASHING(0, vals
[0]));
558 static void skl_ctx_workarounds_init(struct intel_engine_cs
*engine
,
559 struct i915_wa_list
*wal
)
561 gen9_ctx_workarounds_init(engine
, wal
);
562 skl_tune_iz_hashing(engine
, wal
);
565 static void bxt_ctx_workarounds_init(struct intel_engine_cs
*engine
,
566 struct i915_wa_list
*wal
)
568 gen9_ctx_workarounds_init(engine
, wal
);
570 /* WaDisableThreadStallDopClockGating:bxt */
571 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN
,
572 STALL_DOP_GATING_DISABLE
);
574 /* WaToEnableHwFixForPushConstHWBug:bxt */
575 wa_masked_en(wal
, COMMON_SLICE_CHICKEN2
,
576 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
579 static void kbl_ctx_workarounds_init(struct intel_engine_cs
*engine
,
580 struct i915_wa_list
*wal
)
582 struct drm_i915_private
*i915
= engine
->i915
;
584 gen9_ctx_workarounds_init(engine
, wal
);
586 /* WaToEnableHwFixForPushConstHWBug:kbl */
587 if (IS_KBL_GRAPHICS_STEP(i915
, STEP_C0
, STEP_FOREVER
))
588 wa_masked_en(wal
, COMMON_SLICE_CHICKEN2
,
589 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
591 /* WaDisableSbeCacheDispatchPortSharing:kbl */
592 wa_mcr_masked_en(wal
, GEN8_HALF_SLICE_CHICKEN1
,
593 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE
);
596 static void glk_ctx_workarounds_init(struct intel_engine_cs
*engine
,
597 struct i915_wa_list
*wal
)
599 gen9_ctx_workarounds_init(engine
, wal
);
601 /* WaToEnableHwFixForPushConstHWBug:glk */
602 wa_masked_en(wal
, COMMON_SLICE_CHICKEN2
,
603 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
606 static void cfl_ctx_workarounds_init(struct intel_engine_cs
*engine
,
607 struct i915_wa_list
*wal
)
609 gen9_ctx_workarounds_init(engine
, wal
);
611 /* WaToEnableHwFixForPushConstHWBug:cfl */
612 wa_masked_en(wal
, COMMON_SLICE_CHICKEN2
,
613 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
615 /* WaDisableSbeCacheDispatchPortSharing:cfl */
616 wa_mcr_masked_en(wal
, GEN8_HALF_SLICE_CHICKEN1
,
617 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE
);
620 static void icl_ctx_workarounds_init(struct intel_engine_cs
*engine
,
621 struct i915_wa_list
*wal
)
623 /* Wa_1406697149 (WaDisableBankHangMode:icl) */
626 intel_uncore_read(engine
->uncore
, GEN8_L3CNTLREG
) |
629 /* WaForceEnableNonCoherent:icl
630 * This is not the same workaround as in early Gen9 platforms, where
631 * lacking this could cause system hangs, but coherency performance
632 * overhead is high and only a few compute workloads really need it
633 * (the register is whitelisted in hardware now, so UMDs can opt in
634 * for coherency if they have a good reason).
636 wa_mcr_masked_en(wal
, ICL_HDC_MODE
, HDC_FORCE_NON_COHERENT
);
638 /* WaEnableFloatBlendOptimization:icl */
639 wa_mcr_add(wal
, GEN10_CACHE_MODE_SS
, 0,
640 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE
),
641 0 /* write-only, so skip validation */,
644 /* WaDisableGPGPUMidThreadPreemption:icl */
645 wa_masked_field_set(wal
, GEN8_CS_CHICKEN1
,
646 GEN9_PREEMPT_GPGPU_LEVEL_MASK
,
647 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL
);
649 /* allow headerless messages for preemptible GPGPU context */
650 wa_mcr_masked_en(wal
, GEN10_SAMPLER_MODE
,
651 GEN11_SAMPLER_ENABLE_HEADLESS_MSG
);
653 /* Wa_1604278689:icl,ehl */
654 wa_write(wal
, IVB_FBC_RT_BASE
, 0xFFFFFFFF & ~ILK_FBC_RT_VALID
);
655 wa_write_clr_set(wal
, IVB_FBC_RT_BASE_UPPER
,
656 0, /* write-only register; skip validation */
659 /* Wa_1406306137:icl,ehl */
660 wa_mcr_masked_en(wal
, GEN9_ROW_CHICKEN4
, GEN11_DIS_PICK_2ND_EU
);
664 * These settings aren't actually workarounds, but general tuning settings that
665 * need to be programmed on dg2 platform.
667 static void dg2_ctx_gt_tuning_init(struct intel_engine_cs
*engine
,
668 struct i915_wa_list
*wal
)
670 wa_mcr_masked_en(wal
, CHICKEN_RASTER_2
, TBIMR_FAST_CLIP
);
671 wa_mcr_write_clr_set(wal
, XEHP_L3SQCREG5
, L3_PWM_TIMER_INIT_VAL_MASK
,
672 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK
, 0x7f));
675 FF_MODE2_TDS_TIMER_MASK
,
676 FF_MODE2_TDS_TIMER_128
,
681 * These settings aren't actually workarounds, but general tuning settings that
682 * need to be programmed on several platforms.
684 static void gen12_ctx_gt_tuning_init(struct intel_engine_cs
*engine
,
685 struct i915_wa_list
*wal
)
688 * Although some platforms refer to it as Wa_1604555607, we need to
689 * program it even on those that don't explicitly list that
692 * Note that the programming of this register is further modified
693 * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
694 * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
695 * value when read. The default value for this register is zero for all
696 * fields and there are no bit masks. So instead of doing a RMW we
697 * should just write TDS timer value. For the same reason read
698 * verification is ignored.
702 FF_MODE2_TDS_TIMER_MASK
,
703 FF_MODE2_TDS_TIMER_128
,
707 static void gen12_ctx_workarounds_init(struct intel_engine_cs
*engine
,
708 struct i915_wa_list
*wal
)
710 struct drm_i915_private
*i915
= engine
->i915
;
712 gen12_ctx_gt_tuning_init(engine
, wal
);
715 * Wa_1409142259:tgl,dg1,adl-p
716 * Wa_1409347922:tgl,dg1,adl-p
717 * Wa_1409252684:tgl,dg1,adl-p
718 * Wa_1409217633:tgl,dg1,adl-p
719 * Wa_1409207793:tgl,dg1,adl-p
720 * Wa_1409178076:tgl,dg1,adl-p
721 * Wa_1408979724:tgl,dg1,adl-p
722 * Wa_14010443199:tgl,rkl,dg1,adl-p
723 * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
724 * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
726 wa_masked_en(wal
, GEN11_COMMON_SLICE_CHICKEN3
,
727 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE
);
729 /* WaDisableGPGPUMidThreadPreemption:gen12 */
730 wa_masked_field_set(wal
, GEN8_CS_CHICKEN1
,
731 GEN9_PREEMPT_GPGPU_LEVEL_MASK
,
732 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL
);
737 * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
742 FF_MODE2_GS_TIMER_MASK
,
743 FF_MODE2_GS_TIMER_224
,
748 wa_masked_en(wal
, HIZ_CHICKEN
, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE
);
751 wa_masked_en(wal
, COMMON_SLICE_CHICKEN4
, DISABLE_TDC_LOAD_BALANCING_CALC
);
755 static void dg1_ctx_workarounds_init(struct intel_engine_cs
*engine
,
756 struct i915_wa_list
*wal
)
758 gen12_ctx_workarounds_init(engine
, wal
);
761 wa_masked_dis(wal
, GEN11_COMMON_SLICE_CHICKEN3
,
762 DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN
);
765 wa_masked_en(wal
, HIZ_CHICKEN
,
766 DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE
);
769 static void dg2_ctx_workarounds_init(struct intel_engine_cs
*engine
,
770 struct i915_wa_list
*wal
)
772 dg2_ctx_gt_tuning_init(engine
, wal
);
774 /* Wa_16011186671:dg2_g11 */
775 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G11
, STEP_A0
, STEP_B0
)) {
776 wa_mcr_masked_dis(wal
, VFLSKPD
, DIS_MULT_MISS_RD_SQUASH
);
777 wa_mcr_masked_en(wal
, VFLSKPD
, DIS_OVER_FETCH_CACHE
);
780 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_A0
, STEP_B0
)) {
781 /* Wa_14010469329:dg2_g10 */
782 wa_mcr_masked_en(wal
, XEHP_COMMON_SLICE_CHICKEN3
,
783 XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE
);
786 * Wa_22010465075:dg2_g10
787 * Wa_22010613112:dg2_g10
788 * Wa_14010698770:dg2_g10
790 wa_mcr_masked_en(wal
, XEHP_COMMON_SLICE_CHICKEN3
,
791 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE
);
794 /* Wa_16013271637:dg2 */
795 wa_mcr_masked_en(wal
, XEHP_SLICE_COMMON_ECO_CHICKEN1
,
796 MSC_MSAA_REODER_BUF_BYPASS_DISABLE
);
798 /* Wa_14014947963:dg2 */
799 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_B0
, STEP_FOREVER
) ||
800 IS_DG2_G11(engine
->i915
) || IS_DG2_G12(engine
->i915
))
801 wa_masked_field_set(wal
, VF_PREEMPTION
, PREEMPTION_VERTEX_COUNT
, 0x4000);
803 /* Wa_18018764978:dg2 */
804 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_C0
, STEP_FOREVER
) ||
805 IS_DG2_G11(engine
->i915
) || IS_DG2_G12(engine
->i915
))
806 wa_mcr_masked_en(wal
, XEHP_PSS_MODE2
, SCOREBOARD_STALL_FLUSH_CONTROL
);
808 /* Wa_15010599737:dg2 */
809 wa_mcr_masked_en(wal
, CHICKEN_RASTER_1
, DIS_SF_ROUND_NEAREST_EVEN
);
811 /* Wa_18019271663:dg2 */
812 wa_masked_en(wal
, CACHE_MODE_1
, MSAA_OPTIMIZATION_REDUC_DISABLE
);
815 static void mtl_ctx_workarounds_init(struct intel_engine_cs
*engine
,
816 struct i915_wa_list
*wal
)
818 struct drm_i915_private
*i915
= engine
->i915
;
820 if (IS_MTL_GRAPHICS_STEP(i915
, M
, STEP_A0
, STEP_B0
) ||
821 IS_MTL_GRAPHICS_STEP(i915
, P
, STEP_A0
, STEP_B0
)) {
823 wa_masked_field_set(wal
, VF_PREEMPTION
,
824 PREEMPTION_VERTEX_COUNT
, 0x4000);
827 wa_mcr_masked_en(wal
, XEHP_SLICE_COMMON_ECO_CHICKEN1
,
828 MSC_MSAA_REODER_BUF_BYPASS_DISABLE
);
831 wa_mcr_masked_en(wal
, VFLSKPD
, VF_PREFETCH_TLB_DIS
);
834 wa_mcr_masked_en(wal
, XEHP_PSS_MODE2
, SCOREBOARD_STALL_FLUSH_CONTROL
);
838 wa_masked_en(wal
, CACHE_MODE_1
, MSAA_OPTIMIZATION_REDUC_DISABLE
);
841 static void fakewa_disable_nestedbb_mode(struct intel_engine_cs
*engine
,
842 struct i915_wa_list
*wal
)
845 * This is a "fake" workaround defined by software to ensure we
846 * maintain reliable, backward-compatible behavior for userspace with
847 * regards to how nested MI_BATCH_BUFFER_START commands are handled.
849 * The per-context setting of MI_MODE[12] determines whether the bits
850 * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
851 * in the traditional manner or whether they should instead use a new
852 * tgl+ meaning that breaks backward compatibility, but allows nesting
853 * into 3rd-level batchbuffers. When this new capability was first
854 * added in TGL, it remained off by default unless a context
855 * intentionally opted in to the new behavior. However Xe_HPG now
856 * flips this on by default and requires that we explicitly opt out if
857 * we don't want the new behavior.
859 * From a SW perspective, we want to maintain the backward-compatible
860 * behavior for userspace, so we'll apply a fake workaround to set it
861 * back to the legacy behavior on platforms where the hardware default
862 * is to break compatibility. At the moment there is no Linux
863 * userspace that utilizes third-level batchbuffers, so this will avoid
864 * userspace from needing to make any changes. using the legacy
865 * meaning is the correct thing to do. If/when we have userspace
866 * consumers that want to utilize third-level batch nesting, we can
867 * provide a context parameter to allow them to opt-in.
869 wa_masked_dis(wal
, RING_MI_MODE(engine
->mmio_base
), TGL_NESTED_BB_EN
);
872 static void gen12_ctx_gt_mocs_init(struct intel_engine_cs
*engine
,
873 struct i915_wa_list
*wal
)
878 * Some blitter commands do not have a field for MOCS, those
879 * commands will use MOCS index pointed by BLIT_CCTL.
880 * BLIT_CCTL registers are needed to be programmed to un-cached.
882 if (engine
->class == COPY_ENGINE_CLASS
) {
883 mocs
= engine
->gt
->mocs
.uc_index
;
884 wa_write_clr_set(wal
,
885 BLIT_CCTL(engine
->mmio_base
),
887 BLIT_CCTL_MOCS(mocs
, mocs
));
892 * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
893 * defined by the hardware team, but it programming general context registers.
894 * Adding those context register programming in context workaround
895 * allow us to use the wa framework for proper application and validation.
898 gen12_ctx_gt_fake_wa_init(struct intel_engine_cs
*engine
,
899 struct i915_wa_list
*wal
)
901 if (GRAPHICS_VER_FULL(engine
->i915
) >= IP_VER(12, 55))
902 fakewa_disable_nestedbb_mode(engine
, wal
);
904 gen12_ctx_gt_mocs_init(engine
, wal
);
908 __intel_engine_init_ctx_wa(struct intel_engine_cs
*engine
,
909 struct i915_wa_list
*wal
,
912 struct drm_i915_private
*i915
= engine
->i915
;
914 wa_init_start(wal
, engine
->gt
, name
, engine
->name
);
916 /* Applies to all engines */
918 * Fake workarounds are not the actual workaround but
919 * programming of context registers using workaround framework.
921 if (GRAPHICS_VER(i915
) >= 12)
922 gen12_ctx_gt_fake_wa_init(engine
, wal
);
924 if (engine
->class != RENDER_CLASS
)
927 if (IS_METEORLAKE(i915
))
928 mtl_ctx_workarounds_init(engine
, wal
);
929 else if (IS_PONTEVECCHIO(i915
))
930 ; /* noop; none at this time */
931 else if (IS_DG2(i915
))
932 dg2_ctx_workarounds_init(engine
, wal
);
933 else if (IS_XEHPSDV(i915
))
934 ; /* noop; none at this time */
935 else if (IS_DG1(i915
))
936 dg1_ctx_workarounds_init(engine
, wal
);
937 else if (GRAPHICS_VER(i915
) == 12)
938 gen12_ctx_workarounds_init(engine
, wal
);
939 else if (GRAPHICS_VER(i915
) == 11)
940 icl_ctx_workarounds_init(engine
, wal
);
941 else if (IS_COFFEELAKE(i915
) || IS_COMETLAKE(i915
))
942 cfl_ctx_workarounds_init(engine
, wal
);
943 else if (IS_GEMINILAKE(i915
))
944 glk_ctx_workarounds_init(engine
, wal
);
945 else if (IS_KABYLAKE(i915
))
946 kbl_ctx_workarounds_init(engine
, wal
);
947 else if (IS_BROXTON(i915
))
948 bxt_ctx_workarounds_init(engine
, wal
);
949 else if (IS_SKYLAKE(i915
))
950 skl_ctx_workarounds_init(engine
, wal
);
951 else if (IS_CHERRYVIEW(i915
))
952 chv_ctx_workarounds_init(engine
, wal
);
953 else if (IS_BROADWELL(i915
))
954 bdw_ctx_workarounds_init(engine
, wal
);
955 else if (GRAPHICS_VER(i915
) == 7)
956 gen7_ctx_workarounds_init(engine
, wal
);
957 else if (GRAPHICS_VER(i915
) == 6)
958 gen6_ctx_workarounds_init(engine
, wal
);
959 else if (GRAPHICS_VER(i915
) < 8)
962 MISSING_CASE(GRAPHICS_VER(i915
));
968 void intel_engine_init_ctx_wa(struct intel_engine_cs
*engine
)
970 __intel_engine_init_ctx_wa(engine
, &engine
->ctx_wa_list
, "context");
973 int intel_engine_emit_ctx_wa(struct i915_request
*rq
)
975 struct i915_wa_list
*wal
= &rq
->engine
->ctx_wa_list
;
984 ret
= rq
->engine
->emit_flush(rq
, EMIT_BARRIER
);
988 cs
= intel_ring_begin(rq
, (wal
->count
* 2 + 2));
992 *cs
++ = MI_LOAD_REGISTER_IMM(wal
->count
);
993 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++) {
994 *cs
++ = i915_mmio_reg_offset(wa
->reg
);
999 intel_ring_advance(rq
, cs
);
1001 ret
= rq
->engine
->emit_flush(rq
, EMIT_BARRIER
);
1009 gen4_gt_workarounds_init(struct intel_gt
*gt
,
1010 struct i915_wa_list
*wal
)
1012 /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
1013 wa_masked_dis(wal
, CACHE_MODE_0
, RC_OP_FLUSH_ENABLE
);
1017 g4x_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1019 gen4_gt_workarounds_init(gt
, wal
);
1021 /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
1022 wa_masked_en(wal
, CACHE_MODE_0
, CM0_PIPELINED_RENDER_FLUSH_DISABLE
);
1026 ilk_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1028 g4x_gt_workarounds_init(gt
, wal
);
1030 wa_masked_en(wal
, _3D_CHICKEN2
, _3D_CHICKEN2_WM_READ_PIPELINED
);
1034 snb_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1039 ivb_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1041 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
1043 GEN7_COMMON_SLICE_CHICKEN1
,
1044 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC
);
1046 /* WaApplyL3ControlAndL3ChickenMode:ivb */
1047 wa_write(wal
, GEN7_L3CNTLREG1
, GEN7_WA_FOR_GEN7_L3_CONTROL
);
1048 wa_write(wal
, GEN7_L3_CHICKEN_MODE_REGISTER
, GEN7_WA_L3_CHICKEN_MODE
);
1050 /* WaForceL3Serialization:ivb */
1051 wa_write_clr(wal
, GEN7_L3SQCREG4
, L3SQ_URB_READ_CAM_MATCH_DISABLE
);
1055 vlv_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1057 /* WaForceL3Serialization:vlv */
1058 wa_write_clr(wal
, GEN7_L3SQCREG4
, L3SQ_URB_READ_CAM_MATCH_DISABLE
);
1061 * WaIncreaseL3CreditsForVLVB0:vlv
1062 * This is the hardware default actually.
1064 wa_write(wal
, GEN7_L3SQCREG1
, VLV_B0_WA_L3SQCREG1_VALUE
);
1068 hsw_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1070 /* L3 caching of data atomics doesn't work -- disable it. */
1071 wa_write(wal
, HSW_SCRATCH1
, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE
);
1074 HSW_ROW_CHICKEN3
, 0,
1075 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE
),
1076 0 /* XXX does this reg exist? */, true);
1078 /* WaVSRefCountFullforceMissDisable:hsw */
1079 wa_write_clr(wal
, GEN7_FF_THREAD_MODE
, GEN7_FF_VS_REF_CNT_FFME
);
1083 gen9_wa_init_mcr(struct drm_i915_private
*i915
, struct i915_wa_list
*wal
)
1085 const struct sseu_dev_info
*sseu
= &to_gt(i915
)->info
.sseu
;
1086 unsigned int slice
, subslice
;
1089 GEM_BUG_ON(GRAPHICS_VER(i915
) != 9);
1092 * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
1093 * Before any MMIO read into slice/subslice specific registers, MCR
1094 * packet control register needs to be programmed to point to any
1095 * enabled s/ss pair. Otherwise, incorrect values will be returned.
1096 * This means each subsequent MMIO read will be forwarded to an
1097 * specific s/ss combination, but this is OK since these registers
1098 * are consistent across s/ss in almost all cases. In the rare
1099 * occasions, such as INSTDONE, where this value is dependent
1100 * on s/ss combo, the read should be done with read_subslice_reg.
1102 slice
= ffs(sseu
->slice_mask
) - 1;
1103 GEM_BUG_ON(slice
>= ARRAY_SIZE(sseu
->subslice_mask
.hsw
));
1104 subslice
= ffs(intel_sseu_get_hsw_subslices(sseu
, slice
));
1105 GEM_BUG_ON(!subslice
);
1109 * We use GEN8_MCR..() macros to calculate the |mcr| value for
1110 * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
1112 mcr
= GEN8_MCR_SLICE(slice
) | GEN8_MCR_SUBSLICE(subslice
);
1113 mcr_mask
= GEN8_MCR_SLICE_MASK
| GEN8_MCR_SUBSLICE_MASK
;
1115 drm_dbg(&i915
->drm
, "MCR slice:%d/subslice:%d = %x\n", slice
, subslice
, mcr
);
1117 wa_write_clr_set(wal
, GEN8_MCR_SELECTOR
, mcr_mask
, mcr
);
1121 gen9_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1123 struct drm_i915_private
*i915
= gt
->i915
;
1125 /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
1126 gen9_wa_init_mcr(i915
, wal
);
1128 /* WaDisableKillLogic:bxt,skl,kbl */
1129 if (!IS_COFFEELAKE(i915
) && !IS_COMETLAKE(i915
))
1134 if (HAS_LLC(i915
)) {
1135 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
1137 * Must match Display Engine. See
1138 * WaCompressedResourceDisplayNewHashMode.
1142 MMCD_PCLA
| MMCD_HOTSPOT_EN
);
1145 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1148 BDW_DISABLE_HDC_INVALIDATION
);
1152 skl_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1154 gen9_gt_workarounds_init(gt
, wal
);
1156 /* WaDisableGafsUnitClkGating:skl */
1159 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
1161 /* WaInPlaceDecompressionHang:skl */
1162 if (IS_SKL_GRAPHICS_STEP(gt
->i915
, STEP_A0
, STEP_H0
))
1164 GEN9_GAMT_ECO_REG_RW_IA
,
1165 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
1169 kbl_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1171 gen9_gt_workarounds_init(gt
, wal
);
1173 /* WaDisableDynamicCreditSharing:kbl */
1174 if (IS_KBL_GRAPHICS_STEP(gt
->i915
, 0, STEP_C0
))
1177 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING
);
1179 /* WaDisableGafsUnitClkGating:kbl */
1182 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
1184 /* WaInPlaceDecompressionHang:kbl */
1186 GEN9_GAMT_ECO_REG_RW_IA
,
1187 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
1191 glk_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1193 gen9_gt_workarounds_init(gt
, wal
);
1197 cfl_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1199 gen9_gt_workarounds_init(gt
, wal
);
1201 /* WaDisableGafsUnitClkGating:cfl */
1204 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
1206 /* WaInPlaceDecompressionHang:cfl */
1208 GEN9_GAMT_ECO_REG_RW_IA
,
1209 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
1212 static void __set_mcr_steering(struct i915_wa_list
*wal
,
1213 i915_reg_t steering_reg
,
1214 unsigned int slice
, unsigned int subslice
)
1218 mcr
= GEN11_MCR_SLICE(slice
) | GEN11_MCR_SUBSLICE(subslice
);
1219 mcr_mask
= GEN11_MCR_SLICE_MASK
| GEN11_MCR_SUBSLICE_MASK
;
1221 wa_write_clr_set(wal
, steering_reg
, mcr_mask
, mcr
);
1224 static void debug_dump_steering(struct intel_gt
*gt
)
1226 struct drm_printer p
= drm_debug_printer("MCR Steering:");
1228 if (drm_debug_enabled(DRM_UT_DRIVER
))
1229 intel_gt_mcr_report_steering(&p
, gt
, false);
1232 static void __add_mcr_wa(struct intel_gt
*gt
, struct i915_wa_list
*wal
,
1233 unsigned int slice
, unsigned int subslice
)
1235 __set_mcr_steering(wal
, GEN8_MCR_SELECTOR
, slice
, subslice
);
1237 gt
->default_steering
.groupid
= slice
;
1238 gt
->default_steering
.instanceid
= subslice
;
1240 debug_dump_steering(gt
);
1244 icl_wa_init_mcr(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1246 const struct sseu_dev_info
*sseu
= >
->info
.sseu
;
1247 unsigned int subslice
;
1249 GEM_BUG_ON(GRAPHICS_VER(gt
->i915
) < 11);
1250 GEM_BUG_ON(hweight8(sseu
->slice_mask
) > 1);
1253 * Although a platform may have subslices, we need to always steer
1254 * reads to the lowest instance that isn't fused off. When Render
1255 * Power Gating is enabled, grabbing forcewake will only power up a
1256 * single subslice (the "minconfig") if there isn't a real workload
1257 * that needs to be run; this means that if we steer register reads to
1258 * one of the higher subslices, we run the risk of reading back 0's or
1261 subslice
= __ffs(intel_sseu_get_hsw_subslices(sseu
, 0));
1264 * If the subslice we picked above also steers us to a valid L3 bank,
1265 * then we can just rely on the default steering and won't need to
1266 * worry about explicitly re-steering L3BANK reads later.
1268 if (gt
->info
.l3bank_mask
& BIT(subslice
))
1269 gt
->steering_table
[L3BANK
] = NULL
;
1271 __add_mcr_wa(gt
, wal
, 0, subslice
);
1275 xehp_init_mcr(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1277 const struct sseu_dev_info
*sseu
= >
->info
.sseu
;
1278 unsigned long slice
, subslice
= 0, slice_mask
= 0;
1283 * On Xe_HP the steering increases in complexity. There are now several
1284 * more units that require steering and we're not guaranteed to be able
1285 * to find a common setting for all of them. These are:
1286 * - GSLICE (fusable)
1287 * - DSS (sub-unit within gslice; fusable)
1288 * - L3 Bank (fusable)
1289 * - MSLICE (fusable)
1290 * - LNCF (sub-unit within mslice; always present if mslice is present)
1292 * We'll do our default/implicit steering based on GSLICE (in the
1293 * sliceid field) and DSS (in the subsliceid field). If we can
1294 * find overlap between the valid MSLICE and/or LNCF values with
1295 * a suitable GSLICE, then we can just re-use the default value and
1296 * skip and explicit steering at runtime.
1298 * We only need to look for overlap between GSLICE/MSLICE/LNCF to find
1299 * a valid sliceid value. DSS steering is the only type of steering
1300 * that utilizes the 'subsliceid' bits.
1302 * Also note that, even though the steering domain is called "GSlice"
1303 * and it is encoded in the register using the gslice format, the spec
1304 * says that the combined (geometry | compute) fuse should be used to
1305 * select the steering.
1308 /* Find the potential gslice candidates */
1309 slice_mask
= intel_slicemask_from_xehp_dssmask(sseu
->subslice_mask
,
1310 GEN_DSS_PER_GSLICE
);
1313 * Find the potential LNCF candidates. Either LNCF within a valid
1316 for_each_set_bit(i
, >
->info
.mslice_mask
, GEN12_MAX_MSLICES
)
1317 lncf_mask
|= (0x3 << (i
* 2));
1320 * Are there any sliceid values that work for both GSLICE and LNCF
1323 if (slice_mask
& lncf_mask
) {
1324 slice_mask
&= lncf_mask
;
1325 gt
->steering_table
[LNCF
] = NULL
;
1328 /* How about sliceid values that also work for MSLICE steering? */
1329 if (slice_mask
& gt
->info
.mslice_mask
) {
1330 slice_mask
&= gt
->info
.mslice_mask
;
1331 gt
->steering_table
[MSLICE
] = NULL
;
1334 if (IS_XEHPSDV(gt
->i915
) && slice_mask
& BIT(0))
1335 gt
->steering_table
[GAM
] = NULL
;
1337 slice
= __ffs(slice_mask
);
1338 subslice
= intel_sseu_find_first_xehp_dss(sseu
, GEN_DSS_PER_GSLICE
, slice
) %
1341 __add_mcr_wa(gt
, wal
, slice
, subslice
);
1344 * SQIDI ranges are special because they use different steering
1345 * registers than everything else we work with. On XeHP SDV and
1346 * DG2-G10, any value in the steering registers will work fine since
1347 * all instances are present, but DG2-G11 only has SQIDI instances at
1348 * ID's 2 and 3, so we need to steer to one of those. For simplicity
1349 * we'll just steer to a hardcoded "2" since that value will work
1352 __set_mcr_steering(wal
, MCFG_MCR_SELECTOR
, 0, 2);
1353 __set_mcr_steering(wal
, SF_MCR_SELECTOR
, 0, 2);
1356 * On DG2, GAM registers have a dedicated steering control register
1357 * and must always be programmed to a hardcoded groupid of "1."
1359 if (IS_DG2(gt
->i915
))
1360 __set_mcr_steering(wal
, GAM_MCR_SELECTOR
, 1, 0);
1364 pvc_init_mcr(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1369 * Setup implicit steering for COMPUTE and DSS ranges to the first
1370 * non-fused-off DSS. All other types of MCR registers will be
1371 * explicitly steered.
1373 dss
= intel_sseu_find_first_xehp_dss(>
->info
.sseu
, 0, 0);
1374 __add_mcr_wa(gt
, wal
, dss
/ GEN_DSS_PER_CSLICE
, dss
% GEN_DSS_PER_CSLICE
);
1378 icl_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1380 struct drm_i915_private
*i915
= gt
->i915
;
1382 icl_wa_init_mcr(gt
, wal
);
1384 /* WaModifyGamTlbPartitioning:icl */
1385 wa_write_clr_set(wal
,
1386 GEN11_GACB_PERF_CTRL
,
1387 GEN11_HASH_CTRL_MASK
,
1388 GEN11_HASH_CTRL_BIT0
| GEN11_HASH_CTRL_BIT4
);
1390 /* Wa_1405766107:icl
1391 * Formerly known as WaCL2SFHalfMaxAlloc
1395 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC
|
1396 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC
);
1399 * Formerly known as WaDisCtxReload
1402 GEN8_GAMW_ECO_DEV_RW_IA
,
1403 GAMW_ECO_DEV_CTX_RELOAD_DISABLE
);
1405 /* Wa_1406463099:icl
1406 * Formerly known as WaGamTlbPendError
1410 GAMT_CHKN_DISABLE_L3_COH_PIPE
);
1413 * Wa_1408615072:icl,ehl (vsunit)
1414 * Wa_1407596294:icl,ehl (hsunit)
1416 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE
,
1417 VSUNIT_CLKGATE_DIS
| HSUNIT_CLKGATE_DIS
);
1419 /* Wa_1407352427:icl,ehl */
1420 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE2
,
1421 PSDUNIT_CLKGATE_DIS
);
1423 /* Wa_1406680159:icl,ehl */
1424 wa_mcr_write_or(wal
,
1425 GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE
,
1426 GWUNIT_CLKGATE_DIS
);
1428 /* Wa_1607087056:icl,ehl,jsl */
1429 if (IS_ICELAKE(i915
) ||
1430 IS_JSL_EHL_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
))
1432 GEN11_SLICE_UNIT_LEVEL_CLKGATE
,
1433 L3_CLKGATE_DIS
| L3_CR2X_CLKGATE_DIS
);
1436 * This is not a documented workaround, but rather an optimization
1437 * to reduce sampler power.
1439 wa_mcr_write_clr(wal
, GEN10_DFR_RATIO_EN_AND_CHICKEN
, DFR_DISABLE
);
1443 * Though there are per-engine instances of these registers,
1444 * they retain their value through engine resets and should
1445 * only be provided on the GT workaround list rather than
1446 * the engine-specific workaround list.
1449 wa_14011060649(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1451 struct intel_engine_cs
*engine
;
1454 for_each_engine(engine
, gt
, id
) {
1455 if (engine
->class != VIDEO_DECODE_CLASS
||
1456 (engine
->instance
% 2))
1459 wa_write_or(wal
, VDBOX_CGCTL3F10(engine
->mmio_base
),
1460 IECPUNIT_CLKGATE_DIS
);
1465 gen12_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1467 icl_wa_init_mcr(gt
, wal
);
1469 /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
1470 wa_14011060649(gt
, wal
);
1472 /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
1473 wa_mcr_write_or(wal
, GEN10_DFR_RATIO_EN_AND_CHICKEN
, DFR_DISABLE
);
1477 dg1_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1479 gen12_gt_workarounds_init(gt
, wal
);
1481 /* Wa_1409420604:dg1 */
1482 wa_mcr_write_or(wal
, SUBSLICE_UNIT_LEVEL_CLKGATE2
,
1483 CPSSUNIT_CLKGATE_DIS
);
1485 /* Wa_1408615072:dg1 */
1486 /* Empirical testing shows this register is unaffected by engine reset. */
1487 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE2
, VSUNIT_CLKGATE_DIS_TGL
);
1491 xehpsdv_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1493 struct drm_i915_private
*i915
= gt
->i915
;
1495 xehp_init_mcr(gt
, wal
);
1497 /* Wa_1409757795:xehpsdv */
1498 wa_mcr_write_or(wal
, SCCGCTL94DC
, CG3DDISURB
);
1500 /* Wa_18011725039:xehpsdv */
1501 if (IS_XEHPSDV_GRAPHICS_STEP(i915
, STEP_A1
, STEP_B0
)) {
1502 wa_mcr_masked_dis(wal
, MLTICTXCTL
, TDONRENDER
);
1503 wa_mcr_write_or(wal
, L3SQCREG1_CCS0
, FLUSHALLNONCOH
);
1506 /* Wa_16011155590:xehpsdv */
1507 if (IS_XEHPSDV_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
))
1508 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE
,
1509 TSGUNIT_CLKGATE_DIS
);
1511 /* Wa_14011780169:xehpsdv */
1512 if (IS_XEHPSDV_GRAPHICS_STEP(i915
, STEP_B0
, STEP_FOREVER
)) {
1513 wa_write_or(wal
, UNSLCGCTL9440
, GAMTLBOACS_CLKGATE_DIS
|
1514 GAMTLBVDBOX7_CLKGATE_DIS
|
1515 GAMTLBVDBOX6_CLKGATE_DIS
|
1516 GAMTLBVDBOX5_CLKGATE_DIS
|
1517 GAMTLBVDBOX4_CLKGATE_DIS
|
1518 GAMTLBVDBOX3_CLKGATE_DIS
|
1519 GAMTLBVDBOX2_CLKGATE_DIS
|
1520 GAMTLBVDBOX1_CLKGATE_DIS
|
1521 GAMTLBVDBOX0_CLKGATE_DIS
|
1522 GAMTLBKCR_CLKGATE_DIS
|
1523 GAMTLBGUC_CLKGATE_DIS
|
1524 GAMTLBBLT_CLKGATE_DIS
);
1525 wa_write_or(wal
, UNSLCGCTL9444
, GAMTLBGFXA0_CLKGATE_DIS
|
1526 GAMTLBGFXA1_CLKGATE_DIS
|
1527 GAMTLBCOMPA0_CLKGATE_DIS
|
1528 GAMTLBCOMPA1_CLKGATE_DIS
|
1529 GAMTLBCOMPB0_CLKGATE_DIS
|
1530 GAMTLBCOMPB1_CLKGATE_DIS
|
1531 GAMTLBCOMPC0_CLKGATE_DIS
|
1532 GAMTLBCOMPC1_CLKGATE_DIS
|
1533 GAMTLBCOMPD0_CLKGATE_DIS
|
1534 GAMTLBCOMPD1_CLKGATE_DIS
|
1535 GAMTLBMERT_CLKGATE_DIS
|
1536 GAMTLBVEBOX3_CLKGATE_DIS
|
1537 GAMTLBVEBOX2_CLKGATE_DIS
|
1538 GAMTLBVEBOX1_CLKGATE_DIS
|
1539 GAMTLBVEBOX0_CLKGATE_DIS
);
1542 /* Wa_16012725990:xehpsdv */
1543 if (IS_XEHPSDV_GRAPHICS_STEP(i915
, STEP_A1
, STEP_FOREVER
))
1544 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE
, VFUNIT_CLKGATE_DIS
);
1546 /* Wa_14011060649:xehpsdv */
1547 wa_14011060649(gt
, wal
);
1549 /* Wa_14012362059:xehpsdv */
1550 wa_mcr_write_or(wal
, XEHP_MERT_MOD_CTRL
, FORCE_MISS_FTLB
);
1552 /* Wa_14014368820:xehpsdv */
1553 wa_mcr_write_or(wal
, XEHP_GAMCNTRL_CTRL
,
1554 INVALIDATION_BROADCAST_MODE_DIS
| GLOBAL_INVALIDATION_MODE
);
1556 /* Wa_14010670810:xehpsdv */
1557 wa_mcr_write_or(wal
, XEHP_L3NODEARBCFG
, XEHP_LNESPARE
);
1561 dg2_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1563 struct intel_engine_cs
*engine
;
1566 xehp_init_mcr(gt
, wal
);
1568 /* Wa_14011060649:dg2 */
1569 wa_14011060649(gt
, wal
);
1572 * Although there are per-engine instances of these registers,
1573 * they technically exist outside the engine itself and are not
1574 * impacted by engine resets. Furthermore, they're part of the
1575 * GuC blacklist so trying to treat them as engine workarounds
1576 * will result in GuC initialization failure and a wedged GPU.
1578 for_each_engine(engine
, gt
, id
) {
1579 if (engine
->class != VIDEO_DECODE_CLASS
)
1582 /* Wa_16010515920:dg2_g10 */
1583 if (IS_DG2_GRAPHICS_STEP(gt
->i915
, G10
, STEP_A0
, STEP_B0
))
1584 wa_write_or(wal
, VDBOX_CGCTL3F18(engine
->mmio_base
),
1585 ALNUNIT_CLKGATE_DIS
);
1588 if (IS_DG2_G10(gt
->i915
)) {
1589 /* Wa_22010523718:dg2 */
1590 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE
,
1591 CG3DDISCFEG_CLKGATE_DIS
);
1593 /* Wa_14011006942:dg2 */
1594 wa_mcr_write_or(wal
, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE
,
1595 DSS_ROUTER_CLKGATE_DIS
);
1598 if (IS_DG2_GRAPHICS_STEP(gt
->i915
, G10
, STEP_A0
, STEP_B0
) ||
1599 IS_DG2_GRAPHICS_STEP(gt
->i915
, G11
, STEP_A0
, STEP_B0
)) {
1600 /* Wa_14012362059:dg2 */
1601 wa_mcr_write_or(wal
, XEHP_MERT_MOD_CTRL
, FORCE_MISS_FTLB
);
1604 if (IS_DG2_GRAPHICS_STEP(gt
->i915
, G10
, STEP_A0
, STEP_B0
)) {
1605 /* Wa_14010948348:dg2_g10 */
1606 wa_write_or(wal
, UNSLCGCTL9430
, MSQDUNIT_CLKGATE_DIS
);
1608 /* Wa_14011037102:dg2_g10 */
1609 wa_write_or(wal
, UNSLCGCTL9444
, LTCDD_CLKGATE_DIS
);
1611 /* Wa_14011371254:dg2_g10 */
1612 wa_mcr_write_or(wal
, XEHP_SLICE_UNIT_LEVEL_CLKGATE
, NODEDSS_CLKGATE_DIS
);
1614 /* Wa_14011431319:dg2_g10 */
1615 wa_write_or(wal
, UNSLCGCTL9440
, GAMTLBOACS_CLKGATE_DIS
|
1616 GAMTLBVDBOX7_CLKGATE_DIS
|
1617 GAMTLBVDBOX6_CLKGATE_DIS
|
1618 GAMTLBVDBOX5_CLKGATE_DIS
|
1619 GAMTLBVDBOX4_CLKGATE_DIS
|
1620 GAMTLBVDBOX3_CLKGATE_DIS
|
1621 GAMTLBVDBOX2_CLKGATE_DIS
|
1622 GAMTLBVDBOX1_CLKGATE_DIS
|
1623 GAMTLBVDBOX0_CLKGATE_DIS
|
1624 GAMTLBKCR_CLKGATE_DIS
|
1625 GAMTLBGUC_CLKGATE_DIS
|
1626 GAMTLBBLT_CLKGATE_DIS
);
1627 wa_write_or(wal
, UNSLCGCTL9444
, GAMTLBGFXA0_CLKGATE_DIS
|
1628 GAMTLBGFXA1_CLKGATE_DIS
|
1629 GAMTLBCOMPA0_CLKGATE_DIS
|
1630 GAMTLBCOMPA1_CLKGATE_DIS
|
1631 GAMTLBCOMPB0_CLKGATE_DIS
|
1632 GAMTLBCOMPB1_CLKGATE_DIS
|
1633 GAMTLBCOMPC0_CLKGATE_DIS
|
1634 GAMTLBCOMPC1_CLKGATE_DIS
|
1635 GAMTLBCOMPD0_CLKGATE_DIS
|
1636 GAMTLBCOMPD1_CLKGATE_DIS
|
1637 GAMTLBMERT_CLKGATE_DIS
|
1638 GAMTLBVEBOX3_CLKGATE_DIS
|
1639 GAMTLBVEBOX2_CLKGATE_DIS
|
1640 GAMTLBVEBOX1_CLKGATE_DIS
|
1641 GAMTLBVEBOX0_CLKGATE_DIS
);
1643 /* Wa_14010569222:dg2_g10 */
1644 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE
,
1645 GAMEDIA_CLKGATE_DIS
);
1647 /* Wa_14011028019:dg2_g10 */
1648 wa_mcr_write_or(wal
, SSMCGCTL9530
, RTFUNIT_CLKGATE_DIS
);
1650 /* Wa_14010680813:dg2_g10 */
1651 wa_mcr_write_or(wal
, XEHP_GAMSTLB_CTRL
,
1652 CONTROL_BLOCK_CLKGATE_DIS
|
1653 EGRESS_BLOCK_CLKGATE_DIS
|
1654 TAG_BLOCK_CLKGATE_DIS
);
1657 /* Wa_14014830051:dg2 */
1658 wa_mcr_write_clr(wal
, SARB_CHICKEN1
, COMP_CKN_IN
);
1660 /* Wa_14015795083 */
1661 wa_write_clr(wal
, GEN7_MISCCPCTL
, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE
);
1663 /* Wa_18018781329 */
1664 wa_mcr_write_or(wal
, RENDER_MOD_CTRL
, FORCE_MISS_FTLB
);
1665 wa_mcr_write_or(wal
, COMP_MOD_CTRL
, FORCE_MISS_FTLB
);
1666 wa_mcr_write_or(wal
, XEHP_VDBX_MOD_CTRL
, FORCE_MISS_FTLB
);
1667 wa_mcr_write_or(wal
, XEHP_VEBX_MOD_CTRL
, FORCE_MISS_FTLB
);
1669 /* Wa_1509235366:dg2 */
1670 wa_mcr_write_or(wal
, XEHP_GAMCNTRL_CTRL
,
1671 INVALIDATION_BROADCAST_MODE_DIS
| GLOBAL_INVALIDATION_MODE
);
1673 /* Wa_14010648519:dg2 */
1674 wa_mcr_write_or(wal
, XEHP_L3NODEARBCFG
, XEHP_LNESPARE
);
1678 pvc_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1680 pvc_init_mcr(gt
, wal
);
1682 /* Wa_14015795083 */
1683 wa_write_clr(wal
, GEN7_MISCCPCTL
, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE
);
1685 /* Wa_18018781329 */
1686 wa_mcr_write_or(wal
, RENDER_MOD_CTRL
, FORCE_MISS_FTLB
);
1687 wa_mcr_write_or(wal
, COMP_MOD_CTRL
, FORCE_MISS_FTLB
);
1688 wa_mcr_write_or(wal
, XEHP_VDBX_MOD_CTRL
, FORCE_MISS_FTLB
);
1689 wa_mcr_write_or(wal
, XEHP_VEBX_MOD_CTRL
, FORCE_MISS_FTLB
);
1691 /* Wa_16016694945 */
1692 wa_mcr_masked_en(wal
, XEHPC_LNCFMISCCFGREG0
, XEHPC_OVRLSCCC
);
1696 xelpg_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1698 if (IS_MTL_GRAPHICS_STEP(gt
->i915
, M
, STEP_A0
, STEP_B0
) ||
1699 IS_MTL_GRAPHICS_STEP(gt
->i915
, P
, STEP_A0
, STEP_B0
)) {
1700 /* Wa_14014830051 */
1701 wa_mcr_write_clr(wal
, SARB_CHICKEN1
, COMP_CKN_IN
);
1703 /* Wa_18018781329 */
1704 wa_mcr_write_or(wal
, RENDER_MOD_CTRL
, FORCE_MISS_FTLB
);
1705 wa_mcr_write_or(wal
, COMP_MOD_CTRL
, FORCE_MISS_FTLB
);
1709 * Unlike older platforms, we no longer setup implicit steering here;
1710 * all MCR accesses are explicitly steered.
1712 debug_dump_steering(gt
);
1716 xelpmp_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1718 if (IS_MTL_MEDIA_STEP(gt
->i915
, STEP_A0
, STEP_B0
)) {
1722 * Note that although these registers are MCR on the primary
1723 * GT, the media GT's versions are regular singleton registers.
1725 wa_write_or(wal
, XELPMP_GSC_MOD_CTRL
, FORCE_MISS_FTLB
);
1726 wa_write_or(wal
, XELPMP_VDBX_MOD_CTRL
, FORCE_MISS_FTLB
);
1727 wa_write_or(wal
, XELPMP_VEBX_MOD_CTRL
, FORCE_MISS_FTLB
);
1730 debug_dump_steering(gt
);
1734 * The bspec performance guide has recommended MMIO tuning settings. These
1735 * aren't truly "workarounds" but we want to program them through the
1736 * workaround infrastructure to make sure they're (re)applied at the proper
1739 * The programming in this function is for settings that persist through
1740 * engine resets and also are not part of any engine's register state context.
1741 * I.e., settings that only need to be re-applied in the event of a full GT
1744 static void gt_tuning_settings(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1746 if (IS_PONTEVECCHIO(gt
->i915
)) {
1747 wa_mcr_write(wal
, XEHPC_L3SCRUB
,
1748 SCRUB_CL_DWNGRADE_SHARED
| SCRUB_RATE_4B_PER_CLK
);
1749 wa_mcr_masked_en(wal
, XEHPC_LNCFMISCCFGREG0
, XEHPC_HOSTCACHEEN
);
1752 if (IS_DG2(gt
->i915
)) {
1753 wa_mcr_write_or(wal
, XEHP_L3SCQREG7
, BLEND_FILL_CACHING_OPT_DIS
);
1754 wa_mcr_write_or(wal
, XEHP_SQCM
, EN_32B_ACCESS
);
1759 gt_init_workarounds(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1761 struct drm_i915_private
*i915
= gt
->i915
;
1763 gt_tuning_settings(gt
, wal
);
1765 if (gt
->type
== GT_MEDIA
) {
1766 if (MEDIA_VER(i915
) >= 13)
1767 xelpmp_gt_workarounds_init(gt
, wal
);
1769 MISSING_CASE(MEDIA_VER(i915
));
1774 if (GRAPHICS_VER_FULL(i915
) >= IP_VER(12, 70))
1775 xelpg_gt_workarounds_init(gt
, wal
);
1776 else if (IS_PONTEVECCHIO(i915
))
1777 pvc_gt_workarounds_init(gt
, wal
);
1778 else if (IS_DG2(i915
))
1779 dg2_gt_workarounds_init(gt
, wal
);
1780 else if (IS_XEHPSDV(i915
))
1781 xehpsdv_gt_workarounds_init(gt
, wal
);
1782 else if (IS_DG1(i915
))
1783 dg1_gt_workarounds_init(gt
, wal
);
1784 else if (GRAPHICS_VER(i915
) == 12)
1785 gen12_gt_workarounds_init(gt
, wal
);
1786 else if (GRAPHICS_VER(i915
) == 11)
1787 icl_gt_workarounds_init(gt
, wal
);
1788 else if (IS_COFFEELAKE(i915
) || IS_COMETLAKE(i915
))
1789 cfl_gt_workarounds_init(gt
, wal
);
1790 else if (IS_GEMINILAKE(i915
))
1791 glk_gt_workarounds_init(gt
, wal
);
1792 else if (IS_KABYLAKE(i915
))
1793 kbl_gt_workarounds_init(gt
, wal
);
1794 else if (IS_BROXTON(i915
))
1795 gen9_gt_workarounds_init(gt
, wal
);
1796 else if (IS_SKYLAKE(i915
))
1797 skl_gt_workarounds_init(gt
, wal
);
1798 else if (IS_HASWELL(i915
))
1799 hsw_gt_workarounds_init(gt
, wal
);
1800 else if (IS_VALLEYVIEW(i915
))
1801 vlv_gt_workarounds_init(gt
, wal
);
1802 else if (IS_IVYBRIDGE(i915
))
1803 ivb_gt_workarounds_init(gt
, wal
);
1804 else if (GRAPHICS_VER(i915
) == 6)
1805 snb_gt_workarounds_init(gt
, wal
);
1806 else if (GRAPHICS_VER(i915
) == 5)
1807 ilk_gt_workarounds_init(gt
, wal
);
1808 else if (IS_G4X(i915
))
1809 g4x_gt_workarounds_init(gt
, wal
);
1810 else if (GRAPHICS_VER(i915
) == 4)
1811 gen4_gt_workarounds_init(gt
, wal
);
1812 else if (GRAPHICS_VER(i915
) <= 8)
1815 MISSING_CASE(GRAPHICS_VER(i915
));
1818 void intel_gt_init_workarounds(struct intel_gt
*gt
)
1820 struct i915_wa_list
*wal
= >
->wa_list
;
1822 wa_init_start(wal
, gt
, "GT", "global");
1823 gt_init_workarounds(gt
, wal
);
1824 wa_init_finish(wal
);
1827 static enum forcewake_domains
1828 wal_get_fw_for_rmw(struct intel_uncore
*uncore
, const struct i915_wa_list
*wal
)
1830 enum forcewake_domains fw
= 0;
1834 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++)
1835 fw
|= intel_uncore_forcewake_for_reg(uncore
,
1844 wa_verify(struct intel_gt
*gt
, const struct i915_wa
*wa
, u32 cur
,
1845 const char *name
, const char *from
)
1847 if ((cur
^ wa
->set
) & wa
->read
) {
1848 drm_err(>
->i915
->drm
,
1849 "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1850 name
, from
, i915_mmio_reg_offset(wa
->reg
),
1851 cur
, cur
& wa
->read
, wa
->set
& wa
->read
);
1859 static void wa_list_apply(const struct i915_wa_list
*wal
)
1861 struct intel_gt
*gt
= wal
->gt
;
1862 struct intel_uncore
*uncore
= gt
->uncore
;
1863 enum forcewake_domains fw
;
1864 unsigned long flags
;
1871 fw
= wal_get_fw_for_rmw(uncore
, wal
);
1873 intel_gt_mcr_lock(gt
, &flags
);
1874 spin_lock(&uncore
->lock
);
1875 intel_uncore_forcewake_get__locked(uncore
, fw
);
1877 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++) {
1880 /* open-coded rmw due to steering */
1883 intel_gt_mcr_read_any_fw(gt
, wa
->mcr_reg
) :
1884 intel_uncore_read_fw(uncore
, wa
->reg
);
1885 val
= (old
& ~wa
->clr
) | wa
->set
;
1886 if (val
!= old
|| !wa
->clr
) {
1888 intel_gt_mcr_multicast_write_fw(gt
, wa
->mcr_reg
, val
);
1890 intel_uncore_write_fw(uncore
, wa
->reg
, val
);
1893 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM
)) {
1894 u32 val
= wa
->is_mcr
?
1895 intel_gt_mcr_read_any_fw(gt
, wa
->mcr_reg
) :
1896 intel_uncore_read_fw(uncore
, wa
->reg
);
1898 wa_verify(gt
, wa
, val
, wal
->name
, "application");
1902 intel_uncore_forcewake_put__locked(uncore
, fw
);
1903 spin_unlock(&uncore
->lock
);
1904 intel_gt_mcr_unlock(gt
, flags
);
1907 void intel_gt_apply_workarounds(struct intel_gt
*gt
)
1909 wa_list_apply(>
->wa_list
);
1912 static bool wa_list_verify(struct intel_gt
*gt
,
1913 const struct i915_wa_list
*wal
,
1916 struct intel_uncore
*uncore
= gt
->uncore
;
1918 enum forcewake_domains fw
;
1919 unsigned long flags
;
1923 fw
= wal_get_fw_for_rmw(uncore
, wal
);
1925 intel_gt_mcr_lock(gt
, &flags
);
1926 spin_lock(&uncore
->lock
);
1927 intel_uncore_forcewake_get__locked(uncore
, fw
);
1929 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++)
1930 ok
&= wa_verify(wal
->gt
, wa
, wa
->is_mcr
?
1931 intel_gt_mcr_read_any_fw(gt
, wa
->mcr_reg
) :
1932 intel_uncore_read_fw(uncore
, wa
->reg
),
1935 intel_uncore_forcewake_put__locked(uncore
, fw
);
1936 spin_unlock(&uncore
->lock
);
1937 intel_gt_mcr_unlock(gt
, flags
);
1942 bool intel_gt_verify_workarounds(struct intel_gt
*gt
, const char *from
)
1944 return wa_list_verify(gt
, >
->wa_list
, from
);
1948 static bool is_nonpriv_flags_valid(u32 flags
)
1950 /* Check only valid flag bits are set */
1951 if (flags
& ~RING_FORCE_TO_NONPRIV_MASK_VALID
)
1954 /* NB: Only 3 out of 4 enum values are valid for access field */
1955 if ((flags
& RING_FORCE_TO_NONPRIV_ACCESS_MASK
) ==
1956 RING_FORCE_TO_NONPRIV_ACCESS_INVALID
)
1963 whitelist_reg_ext(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 flags
)
1965 struct i915_wa wa
= {
1969 if (GEM_DEBUG_WARN_ON(wal
->count
>= RING_MAX_NONPRIV_SLOTS
))
1972 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags
)))
1975 wa
.reg
.reg
|= flags
;
1980 whitelist_mcr_reg_ext(struct i915_wa_list
*wal
, i915_mcr_reg_t reg
, u32 flags
)
1982 struct i915_wa wa
= {
1987 if (GEM_DEBUG_WARN_ON(wal
->count
>= RING_MAX_NONPRIV_SLOTS
))
1990 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags
)))
1993 wa
.mcr_reg
.reg
|= flags
;
1998 whitelist_reg(struct i915_wa_list
*wal
, i915_reg_t reg
)
2000 whitelist_reg_ext(wal
, reg
, RING_FORCE_TO_NONPRIV_ACCESS_RW
);
2004 whitelist_mcr_reg(struct i915_wa_list
*wal
, i915_mcr_reg_t reg
)
2006 whitelist_mcr_reg_ext(wal
, reg
, RING_FORCE_TO_NONPRIV_ACCESS_RW
);
2009 static void gen9_whitelist_build(struct i915_wa_list
*w
)
2011 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
2012 whitelist_reg(w
, GEN9_CTX_PREEMPT_REG
);
2014 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
2015 whitelist_reg(w
, GEN8_CS_CHICKEN1
);
2017 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
2018 whitelist_reg(w
, GEN8_HDC_CHICKEN1
);
2020 /* WaSendPushConstantsFromMMIO:skl,bxt */
2021 whitelist_reg(w
, COMMON_SLICE_CHICKEN2
);
2024 static void skl_whitelist_build(struct intel_engine_cs
*engine
)
2026 struct i915_wa_list
*w
= &engine
->whitelist
;
2028 if (engine
->class != RENDER_CLASS
)
2031 gen9_whitelist_build(w
);
2033 /* WaDisableLSQCROPERFforOCL:skl */
2034 whitelist_mcr_reg(w
, GEN8_L3SQCREG4
);
2037 static void bxt_whitelist_build(struct intel_engine_cs
*engine
)
2039 if (engine
->class != RENDER_CLASS
)
2042 gen9_whitelist_build(&engine
->whitelist
);
2045 static void kbl_whitelist_build(struct intel_engine_cs
*engine
)
2047 struct i915_wa_list
*w
= &engine
->whitelist
;
2049 if (engine
->class != RENDER_CLASS
)
2052 gen9_whitelist_build(w
);
2054 /* WaDisableLSQCROPERFforOCL:kbl */
2055 whitelist_mcr_reg(w
, GEN8_L3SQCREG4
);
2058 static void glk_whitelist_build(struct intel_engine_cs
*engine
)
2060 struct i915_wa_list
*w
= &engine
->whitelist
;
2062 if (engine
->class != RENDER_CLASS
)
2065 gen9_whitelist_build(w
);
2067 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
2068 whitelist_reg(w
, GEN9_SLICE_COMMON_ECO_CHICKEN1
);
2071 static void cfl_whitelist_build(struct intel_engine_cs
*engine
)
2073 struct i915_wa_list
*w
= &engine
->whitelist
;
2075 if (engine
->class != RENDER_CLASS
)
2078 gen9_whitelist_build(w
);
2081 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
2083 * This covers 4 register which are next to one another :
2084 * - PS_INVOCATION_COUNT
2085 * - PS_INVOCATION_COUNT_UDW
2087 * - PS_DEPTH_COUNT_UDW
2089 whitelist_reg_ext(w
, PS_INVOCATION_COUNT
,
2090 RING_FORCE_TO_NONPRIV_ACCESS_RD
|
2091 RING_FORCE_TO_NONPRIV_RANGE_4
);
2094 static void allow_read_ctx_timestamp(struct intel_engine_cs
*engine
)
2096 struct i915_wa_list
*w
= &engine
->whitelist
;
2098 if (engine
->class != RENDER_CLASS
)
2099 whitelist_reg_ext(w
,
2100 RING_CTX_TIMESTAMP(engine
->mmio_base
),
2101 RING_FORCE_TO_NONPRIV_ACCESS_RD
);
2104 static void cml_whitelist_build(struct intel_engine_cs
*engine
)
2106 allow_read_ctx_timestamp(engine
);
2108 cfl_whitelist_build(engine
);
2111 static void icl_whitelist_build(struct intel_engine_cs
*engine
)
2113 struct i915_wa_list
*w
= &engine
->whitelist
;
2115 allow_read_ctx_timestamp(engine
);
2117 switch (engine
->class) {
2119 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
2120 whitelist_mcr_reg(w
, GEN9_HALF_SLICE_CHICKEN7
);
2122 /* WaAllowUMDToModifySamplerMode:icl */
2123 whitelist_mcr_reg(w
, GEN10_SAMPLER_MODE
);
2125 /* WaEnableStateCacheRedirectToCS:icl */
2126 whitelist_reg(w
, GEN9_SLICE_COMMON_ECO_CHICKEN1
);
2129 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
2131 * This covers 4 register which are next to one another :
2132 * - PS_INVOCATION_COUNT
2133 * - PS_INVOCATION_COUNT_UDW
2135 * - PS_DEPTH_COUNT_UDW
2137 whitelist_reg_ext(w
, PS_INVOCATION_COUNT
,
2138 RING_FORCE_TO_NONPRIV_ACCESS_RD
|
2139 RING_FORCE_TO_NONPRIV_RANGE_4
);
2142 case VIDEO_DECODE_CLASS
:
2143 /* hucStatusRegOffset */
2144 whitelist_reg_ext(w
, _MMIO(0x2000 + engine
->mmio_base
),
2145 RING_FORCE_TO_NONPRIV_ACCESS_RD
);
2146 /* hucUKernelHdrInfoRegOffset */
2147 whitelist_reg_ext(w
, _MMIO(0x2014 + engine
->mmio_base
),
2148 RING_FORCE_TO_NONPRIV_ACCESS_RD
);
2149 /* hucStatus2RegOffset */
2150 whitelist_reg_ext(w
, _MMIO(0x23B0 + engine
->mmio_base
),
2151 RING_FORCE_TO_NONPRIV_ACCESS_RD
);
2159 static void tgl_whitelist_build(struct intel_engine_cs
*engine
)
2161 struct i915_wa_list
*w
= &engine
->whitelist
;
2163 allow_read_ctx_timestamp(engine
);
2165 switch (engine
->class) {
2168 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
2171 * This covers 4 registers which are next to one another :
2172 * - PS_INVOCATION_COUNT
2173 * - PS_INVOCATION_COUNT_UDW
2175 * - PS_DEPTH_COUNT_UDW
2177 whitelist_reg_ext(w
, PS_INVOCATION_COUNT
,
2178 RING_FORCE_TO_NONPRIV_ACCESS_RD
|
2179 RING_FORCE_TO_NONPRIV_RANGE_4
);
2183 * Wa_14012131227:dg1
2184 * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
2186 whitelist_reg(w
, GEN7_COMMON_SLICE_CHICKEN1
);
2188 /* Wa_1806527549:tgl */
2189 whitelist_reg(w
, HIZ_CHICKEN
);
2191 /* Required by recommended tuning setting (not a workaround) */
2192 whitelist_reg(w
, GEN11_COMMON_SLICE_CHICKEN3
);
2200 static void dg2_whitelist_build(struct intel_engine_cs
*engine
)
2202 struct i915_wa_list
*w
= &engine
->whitelist
;
2204 switch (engine
->class) {
2207 * Wa_1507100340:dg2_g10
2209 * This covers 4 registers which are next to one another :
2210 * - PS_INVOCATION_COUNT
2211 * - PS_INVOCATION_COUNT_UDW
2213 * - PS_DEPTH_COUNT_UDW
2215 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_A0
, STEP_B0
))
2216 whitelist_reg_ext(w
, PS_INVOCATION_COUNT
,
2217 RING_FORCE_TO_NONPRIV_ACCESS_RD
|
2218 RING_FORCE_TO_NONPRIV_RANGE_4
);
2220 /* Required by recommended tuning setting (not a workaround) */
2221 whitelist_mcr_reg(w
, XEHP_COMMON_SLICE_CHICKEN3
);
2225 /* Wa_16011157294:dg2_g10 */
2226 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_A0
, STEP_B0
))
2227 whitelist_reg(w
, GEN9_CTX_PREEMPT_REG
);
2234 static void blacklist_trtt(struct intel_engine_cs
*engine
)
2236 struct i915_wa_list
*w
= &engine
->whitelist
;
2239 * Prevent read/write access to [0x4400, 0x4600) which covers
2240 * the TRTT range across all engines. Note that normally userspace
2241 * cannot access the other engines' trtt control, but for simplicity
2242 * we cover the entire range on each engine.
2244 whitelist_reg_ext(w
, _MMIO(0x4400),
2245 RING_FORCE_TO_NONPRIV_DENY
|
2246 RING_FORCE_TO_NONPRIV_RANGE_64
);
2247 whitelist_reg_ext(w
, _MMIO(0x4500),
2248 RING_FORCE_TO_NONPRIV_DENY
|
2249 RING_FORCE_TO_NONPRIV_RANGE_64
);
2252 static void pvc_whitelist_build(struct intel_engine_cs
*engine
)
2254 /* Wa_16014440446:pvc */
2255 blacklist_trtt(engine
);
2258 static void mtl_whitelist_build(struct intel_engine_cs
*engine
)
2260 struct i915_wa_list
*w
= &engine
->whitelist
;
2262 switch (engine
->class) {
2264 /* Required by recommended tuning setting (not a workaround) */
2265 whitelist_mcr_reg(w
, XEHP_COMMON_SLICE_CHICKEN3
);
2273 void intel_engine_init_whitelist(struct intel_engine_cs
*engine
)
2275 struct drm_i915_private
*i915
= engine
->i915
;
2276 struct i915_wa_list
*w
= &engine
->whitelist
;
2278 wa_init_start(w
, engine
->gt
, "whitelist", engine
->name
);
2280 if (IS_METEORLAKE(i915
))
2281 mtl_whitelist_build(engine
);
2282 else if (IS_PONTEVECCHIO(i915
))
2283 pvc_whitelist_build(engine
);
2284 else if (IS_DG2(i915
))
2285 dg2_whitelist_build(engine
);
2286 else if (IS_XEHPSDV(i915
))
2288 else if (GRAPHICS_VER(i915
) == 12)
2289 tgl_whitelist_build(engine
);
2290 else if (GRAPHICS_VER(i915
) == 11)
2291 icl_whitelist_build(engine
);
2292 else if (IS_COMETLAKE(i915
))
2293 cml_whitelist_build(engine
);
2294 else if (IS_COFFEELAKE(i915
))
2295 cfl_whitelist_build(engine
);
2296 else if (IS_GEMINILAKE(i915
))
2297 glk_whitelist_build(engine
);
2298 else if (IS_KABYLAKE(i915
))
2299 kbl_whitelist_build(engine
);
2300 else if (IS_BROXTON(i915
))
2301 bxt_whitelist_build(engine
);
2302 else if (IS_SKYLAKE(i915
))
2303 skl_whitelist_build(engine
);
2304 else if (GRAPHICS_VER(i915
) <= 8)
2307 MISSING_CASE(GRAPHICS_VER(i915
));
2312 void intel_engine_apply_whitelist(struct intel_engine_cs
*engine
)
2314 const struct i915_wa_list
*wal
= &engine
->whitelist
;
2315 struct intel_uncore
*uncore
= engine
->uncore
;
2316 const u32 base
= engine
->mmio_base
;
2323 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++)
2324 intel_uncore_write(uncore
,
2325 RING_FORCE_TO_NONPRIV(base
, i
),
2326 i915_mmio_reg_offset(wa
->reg
));
2328 /* And clear the rest just in case of garbage */
2329 for (; i
< RING_MAX_NONPRIV_SLOTS
; i
++)
2330 intel_uncore_write(uncore
,
2331 RING_FORCE_TO_NONPRIV(base
, i
),
2332 i915_mmio_reg_offset(RING_NOPID(base
)));
2336 * engine_fake_wa_init(), a place holder to program the registers
2337 * which are not part of an official workaround defined by the
2339 * Adding programming of those register inside workaround will
2340 * allow utilizing wa framework to proper application and verification.
2343 engine_fake_wa_init(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2348 * RING_CMD_CCTL specifies the default MOCS entry that will be used
2349 * by the command streamer when executing commands that don't have
2350 * a way to explicitly specify a MOCS setting. The default should
2351 * usually reference whichever MOCS entry corresponds to uncached
2352 * behavior, although use of a WB cached entry is recommended by the
2353 * spec in certain circumstances on specific platforms.
2355 if (GRAPHICS_VER(engine
->i915
) >= 12) {
2356 mocs_r
= engine
->gt
->mocs
.uc_index
;
2357 mocs_w
= engine
->gt
->mocs
.uc_index
;
2359 if (HAS_L3_CCS_READ(engine
->i915
) &&
2360 engine
->class == COMPUTE_CLASS
) {
2361 mocs_r
= engine
->gt
->mocs
.wb_index
;
2364 * Even on the few platforms where MOCS 0 is a
2365 * legitimate table entry, it's never the correct
2366 * setting to use here; we can assume the MOCS init
2367 * just forgot to initialize wb_index.
2369 drm_WARN_ON(&engine
->i915
->drm
, mocs_r
== 0);
2372 wa_masked_field_set(wal
,
2373 RING_CMD_CCTL(engine
->mmio_base
),
2375 CMD_CCTL_MOCS_OVERRIDE(mocs_w
, mocs_r
));
2379 static bool needs_wa_1308578152(struct intel_engine_cs
*engine
)
2381 return intel_sseu_find_first_xehp_dss(&engine
->gt
->info
.sseu
, 0, 0) >=
2386 rcs_engine_wa_init(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2388 struct drm_i915_private
*i915
= engine
->i915
;
2390 if (IS_MTL_GRAPHICS_STEP(i915
, M
, STEP_A0
, STEP_B0
) ||
2391 IS_MTL_GRAPHICS_STEP(i915
, P
, STEP_A0
, STEP_B0
)) {
2392 /* Wa_22014600077 */
2393 wa_mcr_masked_en(wal
, GEN10_CACHE_MODE_SS
,
2394 ENABLE_EU_COUNT_FOR_TDL_FLUSH
);
2397 if (IS_MTL_GRAPHICS_STEP(i915
, M
, STEP_A0
, STEP_B0
) ||
2398 IS_MTL_GRAPHICS_STEP(i915
, P
, STEP_A0
, STEP_B0
) ||
2399 IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_FOREVER
) ||
2400 IS_DG2_G11(i915
) || IS_DG2_G12(i915
)) {
2402 wa_mcr_masked_en(wal
, GEN10_SAMPLER_MODE
,
2403 SC_DISABLE_POWER_OPTIMIZATION_EBB
);
2406 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_FOREVER
) ||
2407 IS_DG2_G11(i915
) || IS_DG2_G12(i915
) ||
2408 IS_MTL_GRAPHICS_STEP(i915
, M
, STEP_A0
, STEP_B0
)) {
2409 /* Wa_22012856258 */
2410 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN2
,
2411 GEN12_DISABLE_READ_SUPPRESSION
);
2414 if (IS_DG2_GRAPHICS_STEP(i915
, G11
, STEP_A0
, STEP_B0
)) {
2415 /* Wa_14013392000:dg2_g11 */
2416 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN2
, GEN12_ENABLE_LARGE_GRF_MODE
);
2419 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_A0
, STEP_B0
) ||
2420 IS_DG2_GRAPHICS_STEP(i915
, G11
, STEP_A0
, STEP_B0
)) {
2421 /* Wa_14012419201:dg2 */
2422 wa_mcr_masked_en(wal
, GEN9_ROW_CHICKEN4
,
2423 GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX
);
2426 /* Wa_1308578152:dg2_g10 when first gslice is fused off */
2427 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_C0
) &&
2428 needs_wa_1308578152(engine
)) {
2429 wa_masked_dis(wal
, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON
,
2430 GEN12_REPLAY_MODE_GRANULARITY
);
2433 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_FOREVER
) ||
2434 IS_DG2_G11(i915
) || IS_DG2_G12(i915
)) {
2436 * Wa_22010960976:dg2
2437 * Wa_14013347512:dg2
2439 wa_mcr_masked_dis(wal
, XEHP_HDC_CHICKEN0
,
2440 LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK
);
2443 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_A0
, STEP_B0
)) {
2445 * Wa_1608949956:dg2_g10
2446 * Wa_14010198302:dg2_g10
2448 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN
,
2449 MDQ_ARBITRATION_MODE
| UGM_BACKUP_MODE
);
2452 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_A0
, STEP_B0
))
2453 /* Wa_22010430635:dg2 */
2454 wa_mcr_masked_en(wal
,
2456 GEN12_DISABLE_GRF_CLEAR
);
2458 /* Wa_14013202645:dg2 */
2459 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_C0
) ||
2460 IS_DG2_GRAPHICS_STEP(i915
, G11
, STEP_A0
, STEP_B0
))
2461 wa_mcr_write_or(wal
, RT_CTRL
, DIS_NULL_QUERY
);
2463 /* Wa_22012532006:dg2 */
2464 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_A0
, STEP_C0
) ||
2465 IS_DG2_GRAPHICS_STEP(engine
->i915
, G11
, STEP_A0
, STEP_B0
))
2466 wa_mcr_masked_en(wal
, GEN9_HALF_SLICE_CHICKEN7
,
2467 DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA
);
2469 if (IS_DG2_GRAPHICS_STEP(i915
, G11
, STEP_B0
, STEP_FOREVER
) ||
2471 /* Wa_22014600077:dg2 */
2472 wa_mcr_add(wal
, GEN10_CACHE_MODE_SS
, 0,
2473 _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH
),
2474 0 /* Wa_14012342262 write-only reg, so skip verification */,
2478 if (IS_ALDERLAKE_P(i915
) || IS_ALDERLAKE_S(i915
) || IS_DG1(i915
) ||
2479 IS_ROCKETLAKE(i915
) || IS_TIGERLAKE(i915
)) {
2480 /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
2481 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN2
, GEN12_DISABLE_EARLY_READ
);
2484 * Wa_1407928979:tgl A*
2485 * Wa_18011464164:tgl[B0+],dg1[B0+]
2486 * Wa_22010931296:tgl[B0+],dg1[B0+]
2487 * Wa_14010919138:rkl,dg1,adl-s,adl-p
2489 wa_write_or(wal
, GEN7_FF_THREAD_MODE
,
2490 GEN12_FF_TESSELATION_DOP_GATE_DISABLE
);
2493 if (IS_ALDERLAKE_P(i915
) || IS_DG2(i915
) || IS_ALDERLAKE_S(i915
) ||
2494 IS_DG1(i915
) || IS_ROCKETLAKE(i915
) || IS_TIGERLAKE(i915
)) {
2496 * Wa_1606700617:tgl,dg1,adl-p
2497 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
2498 * Wa_14010826681:tgl,dg1,rkl,adl-p
2499 * Wa_18019627453:dg2
2502 GEN9_CS_DEBUG_MODE1
,
2503 FF_DOP_CLOCK_GATE_DISABLE
);
2506 if (IS_ALDERLAKE_P(i915
) || IS_ALDERLAKE_S(i915
) ||
2507 IS_ROCKETLAKE(i915
) || IS_TIGERLAKE(i915
)) {
2509 wa_mcr_masked_en(wal
, GEN8_ROW_CHICKEN2
,
2510 GEN12_PUSH_CONST_DEREF_HOLD_DIS
);
2512 /* Wa_14010229206 */
2513 wa_mcr_masked_en(wal
, GEN9_ROW_CHICKEN4
, GEN12_DISABLE_TDL_PUSH
);
2516 if (IS_ROCKETLAKE(i915
) || IS_TIGERLAKE(i915
) || IS_ALDERLAKE_P(i915
)) {
2520 * On TGL and RKL there are multiple entries for this WA in the
2521 * BSpec; some indicate this is an A0-only WA, others indicate
2522 * it applies to all steppings so we trust the "all steppings."
2525 RING_PSMI_CTL(RENDER_RING_BASE
),
2526 GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE
|
2527 GEN8_RC_SEMA_IDLE_MSG_DISABLE
);
2530 if (IS_DG1(i915
) || IS_ROCKETLAKE(i915
) || IS_TIGERLAKE(i915
) ||
2531 IS_ALDERLAKE_S(i915
) || IS_ALDERLAKE_P(i915
)) {
2532 /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
2533 wa_mcr_masked_en(wal
,
2538 if (GRAPHICS_VER(i915
) == 11) {
2539 /* This is not an Wa. Enable for better image quality */
2542 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE
);
2546 * Formerly known as WaGAPZPriorityScheme
2550 GEN11_ARBITRATION_PRIO_ORDER_MASK
);
2554 * Formerly known as WaL3BankAddressHashing
2556 wa_write_clr_set(wal
,
2558 GEN11_HASH_CTRL_EXCL_MASK
,
2559 GEN11_HASH_CTRL_EXCL_BIT0
);
2560 wa_write_clr_set(wal
,
2562 GEN11_BANK_HASH_ADDR_EXCL_MASK
,
2563 GEN11_BANK_HASH_ADDR_EXCL_BIT0
);
2567 * Formerly known as WaDisableCleanEvicts
2569 wa_mcr_write_or(wal
,
2571 GEN11_LQSC_CLEAN_EVICT_DISABLE
);
2573 /* Wa_1606682166:icl */
2576 GEN7_DISABLE_SAMPLER_PREFETCH
);
2578 /* Wa_1409178092:icl */
2579 wa_mcr_write_clr_set(wal
,
2581 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE
,
2584 /* WaEnable32PlaneMode:icl */
2585 wa_masked_en(wal
, GEN9_CSFE_CHICKEN1_RCS
,
2586 GEN11_ENABLE_32_PLANE_MODE
);
2589 * Wa_1408767742:icl[a2..forever],ehl[all]
2590 * Wa_1605460711:icl[a0..c0]
2593 GEN7_FF_THREAD_MODE
,
2594 GEN12_FF_TESSELATION_DOP_GATE_DISABLE
);
2596 /* Wa_22010271021 */
2598 GEN9_CS_DEBUG_MODE1
,
2599 FF_DOP_CLOCK_GATE_DISABLE
);
2603 * Intel platforms that support fine-grained preemption (i.e., gen9 and
2604 * beyond) allow the kernel-mode driver to choose between two different
2605 * options for controlling preemption granularity and behavior.
2607 * Option 1 (hardware default):
2608 * Preemption settings are controlled in a global manner via
2609 * kernel-only register CS_DEBUG_MODE1 (0x20EC). Any granularity
2610 * and settings chosen by the kernel-mode driver will apply to all
2611 * userspace clients.
2614 * Preemption settings are controlled on a per-context basis via
2615 * register CS_CHICKEN1 (0x2580). CS_CHICKEN1 is saved/restored on
2616 * context switch and is writable by userspace (e.g., via
2617 * MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
2618 * which allows different userspace drivers/clients to select
2619 * different settings, or to change those settings on the fly in
2620 * response to runtime needs. This option was known by name
2621 * "FtrPerCtxtPreemptionGranularityControl" at one time, although
2622 * that name is somewhat misleading as other non-granularity
2623 * preemption settings are also impacted by this decision.
2625 * On Linux, our policy has always been to let userspace drivers
2626 * control preemption granularity/settings (Option 2). This was
2627 * originally mandatory on gen9 to prevent ABI breakage (old gen9
2628 * userspace developed before object-level preemption was enabled would
2629 * not behave well if i915 were to go with Option 1 and enable that
2630 * preemption in a global manner). On gen9 each context would have
2631 * object-level preemption disabled by default (see
2632 * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
2633 * userspace drivers could opt-in to object-level preemption as they
2634 * saw fit. For post-gen9 platforms, we continue to utilize Option 2;
2635 * even though it is no longer necessary for ABI compatibility when
2636 * enabling a new platform, it does ensure that userspace will be able
2637 * to implement any workarounds that show up requiring temporary
2638 * adjustments to preemption behavior at runtime.
2640 * Notes/Workarounds:
2641 * - Wa_14015141709: On DG2 and early steppings of MTL,
2642 * CS_CHICKEN1[0] does not disable object-level preemption as
2643 * it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
2644 * using Option 1). Effectively this means userspace is unable
2645 * to disable object-level preemption on these platforms/steppings
2646 * despite the setting here.
2648 * - Wa_16013994831: May require that userspace program
2649 * CS_CHICKEN1[10] when certain runtime conditions are true.
2650 * Userspace requires Option 2 to be in effect for their update of
2651 * CS_CHICKEN1[10] to be effective.
2653 * Other workarounds may appear in the future that will also require
2654 * Option 2 behavior to allow proper userspace implementation.
2656 if (GRAPHICS_VER(i915
) >= 9)
2658 GEN7_FF_SLICE_CS_CHICKEN1
,
2659 GEN9_FFSC_PERCTX_PREEMPT_CTRL
);
2661 if (IS_SKYLAKE(i915
) ||
2662 IS_KABYLAKE(i915
) ||
2663 IS_COFFEELAKE(i915
) ||
2664 IS_COMETLAKE(i915
)) {
2665 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
2668 GEN9_GAPS_TSV_CREDIT_DISABLE
);
2671 if (IS_BROXTON(i915
)) {
2672 /* WaDisablePooledEuLoadBalancingFix:bxt */
2674 FF_SLICE_CS_CHICKEN2
,
2675 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE
);
2678 if (GRAPHICS_VER(i915
) == 9) {
2679 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
2681 GEN9_CSFE_CHICKEN1_RCS
,
2682 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE
);
2684 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
2685 wa_mcr_write_or(wal
,
2687 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE
);
2689 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
2690 if (IS_GEN9_LP(i915
))
2691 wa_mcr_write_clr_set(wal
,
2693 L3_PRIO_CREDITS_MASK
,
2694 L3_GENERAL_PRIO_CREDITS(62) |
2695 L3_HIGH_PRIO_CREDITS(2));
2697 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
2698 wa_mcr_write_or(wal
,
2700 GEN8_LQSC_FLUSH_COHERENT_LINES
);
2702 /* Disable atomics in L3 to prevent unrecoverable hangs */
2703 wa_write_clr_set(wal
, GEN9_SCRATCH_LNCF1
,
2704 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE
, 0);
2705 wa_mcr_write_clr_set(wal
, GEN8_L3SQCREG4
,
2706 GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE
, 0);
2707 wa_mcr_write_clr_set(wal
, GEN9_SCRATCH1
,
2708 EVICTION_PERF_FIX_ENABLE
, 0);
2711 if (IS_HASWELL(i915
)) {
2712 /* WaSampleCChickenBitEnable:hsw */
2714 HSW_HALF_SLICE_CHICKEN3
, HSW_SAMPLE_C_PERFORMANCE
);
2718 /* enable HiZ Raw Stall Optimization */
2719 HIZ_RAW_STALL_OPT_DISABLE
);
2722 if (IS_VALLEYVIEW(i915
)) {
2723 /* WaDisableEarlyCull:vlv */
2726 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL
);
2729 * WaVSThreadDispatchOverride:ivb,vlv
2731 * This actually overrides the dispatch
2732 * mode for all thread types.
2734 wa_write_clr_set(wal
,
2735 GEN7_FF_THREAD_MODE
,
2737 GEN7_FF_TS_SCHED_HW
|
2738 GEN7_FF_VS_SCHED_HW
|
2739 GEN7_FF_DS_SCHED_HW
);
2741 /* WaPsdDispatchEnable:vlv */
2742 /* WaDisablePSDDualDispatchEnable:vlv */
2744 GEN7_HALF_SLICE_CHICKEN1
,
2745 GEN7_MAX_PS_THREAD_DEP
|
2746 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE
);
2749 if (IS_IVYBRIDGE(i915
)) {
2750 /* WaDisableEarlyCull:ivb */
2753 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL
);
2755 if (0) { /* causes HiZ corruption on ivb:gt1 */
2756 /* enable HiZ Raw Stall Optimization */
2759 HIZ_RAW_STALL_OPT_DISABLE
);
2763 * WaVSThreadDispatchOverride:ivb,vlv
2765 * This actually overrides the dispatch
2766 * mode for all thread types.
2768 wa_write_clr_set(wal
,
2769 GEN7_FF_THREAD_MODE
,
2771 GEN7_FF_TS_SCHED_HW
|
2772 GEN7_FF_VS_SCHED_HW
|
2773 GEN7_FF_DS_SCHED_HW
);
2775 /* WaDisablePSDDualDispatchEnable:ivb */
2776 if (IS_IVB_GT1(i915
))
2778 GEN7_HALF_SLICE_CHICKEN1
,
2779 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE
);
2782 if (GRAPHICS_VER(i915
) == 7) {
2783 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
2785 RING_MODE_GEN7(RENDER_RING_BASE
),
2786 GFX_TLB_INVALIDATE_EXPLICIT
| GFX_REPLAY_MODE
);
2788 /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
2789 wa_masked_dis(wal
, CACHE_MODE_0_GEN7
, RC_OP_FLUSH_ENABLE
);
2792 * BSpec says this must be set, even though
2793 * WaDisable4x2SubspanOptimization:ivb,hsw
2794 * WaDisable4x2SubspanOptimization isn't listed for VLV.
2798 PIXEL_SUBSPAN_COLLECT_OPT_DISABLE
);
2801 * BSpec recommends 8x4 when MSAA is used,
2802 * however in practice 16x4 seems fastest.
2804 * Note that PS/WM thread counts depend on the WIZ hashing
2805 * disable bit, which we don't touch here, but it's good
2806 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2808 wa_masked_field_set(wal
,
2810 GEN6_WIZ_HASHING_MASK
,
2811 GEN6_WIZ_HASHING_16x4
);
2814 if (IS_GRAPHICS_VER(i915
, 6, 7))
2816 * We need to disable the AsyncFlip performance optimisations in
2817 * order to use MI_WAIT_FOR_EVENT within the CS. It should
2818 * already be programmed to '1' on all products.
2820 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
2823 RING_MI_MODE(RENDER_RING_BASE
),
2824 ASYNC_FLIP_PERF_DISABLE
);
2826 if (GRAPHICS_VER(i915
) == 6) {
2828 * Required for the hardware to program scanline values for
2830 * WaEnableFlushTlbInvalidationMode:snb
2834 GFX_TLB_INVALIDATE_EXPLICIT
);
2836 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
2839 _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB
);
2843 /* WaStripsFansDisableFastClipPerformanceFix:snb */
2844 _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL
|
2847 * "This bit must be set if 3DSTATE_CLIP clip mode is set
2848 * to normal and 3DSTATE_SF number of SF output attributes
2851 _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH
);
2854 * BSpec recommends 8x4 when MSAA is used,
2855 * however in practice 16x4 seems fastest.
2857 * Note that PS/WM thread counts depend on the WIZ hashing
2858 * disable bit, which we don't touch here, but it's good
2859 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2861 wa_masked_field_set(wal
,
2863 GEN6_WIZ_HASHING_MASK
,
2864 GEN6_WIZ_HASHING_16x4
);
2866 /* WaDisable_RenderCache_OperationalFlush:snb */
2867 wa_masked_dis(wal
, CACHE_MODE_0
, RC_OP_FLUSH_ENABLE
);
2870 * From the Sandybridge PRM, volume 1 part 3, page 24:
2871 * "If this bit is set, STCunit will have LRA as replacement
2872 * policy. [...] This bit must be reset. LRA replacement
2873 * policy is not supported."
2877 CM0_STC_EVICT_DISABLE_LRA_SNB
);
2880 if (IS_GRAPHICS_VER(i915
, 4, 6))
2881 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2882 wa_add(wal
, RING_MI_MODE(RENDER_RING_BASE
),
2883 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH
),
2884 /* XXX bit doesn't stick on Broadwater */
2885 IS_I965G(i915
) ? 0 : VS_TIMER_DISPATCH
, true);
2887 if (GRAPHICS_VER(i915
) == 4)
2889 * Disable CONSTANT_BUFFER before it is loaded from the context
2890 * image. For as it is loaded, it is executed and the stored
2891 * address may no longer be valid, leading to a GPU hang.
2893 * This imposes the requirement that userspace reload their
2894 * CONSTANT_BUFFER on every batch, fortunately a requirement
2895 * they are already accustomed to from before contexts were
2898 wa_add(wal
, ECOSKPD(RENDER_RING_BASE
),
2899 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE
),
2900 0 /* XXX bit doesn't stick on Broadwater */,
2905 xcs_engine_wa_init(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2907 struct drm_i915_private
*i915
= engine
->i915
;
2909 /* WaKBLVECSSemaphoreWaitPoll:kbl */
2910 if (IS_KBL_GRAPHICS_STEP(i915
, STEP_A0
, STEP_F0
)) {
2912 RING_SEMA_WAIT_POLL(engine
->mmio_base
),
2918 ccs_engine_wa_init(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2920 if (IS_PVC_CT_STEP(engine
->i915
, STEP_A0
, STEP_C0
)) {
2921 /* Wa_14014999345:pvc */
2922 wa_mcr_masked_en(wal
, GEN10_CACHE_MODE_SS
, DISABLE_ECC
);
2927 * The bspec performance guide has recommended MMIO tuning settings. These
2928 * aren't truly "workarounds" but we want to program them with the same
2929 * workaround infrastructure to ensure that they're automatically added to
2930 * the GuC save/restore lists, re-applied at the right times, and checked for
2931 * any conflicting programming requested by real workarounds.
2933 * Programming settings should be added here only if their registers are not
2934 * part of an engine's register state context. If a register is part of a
2935 * context, then any tuning settings should be programmed in an appropriate
2936 * function invoked by __intel_engine_init_ctx_wa().
2939 add_render_compute_tuning_settings(struct drm_i915_private
*i915
,
2940 struct i915_wa_list
*wal
)
2943 wa_mcr_write_clr_set(wal
, RT_CTRL
, STACKID_CTRL
, STACKID_CTRL_512
);
2946 * This tuning setting proves beneficial only on ATS-M designs; the
2947 * default "age based" setting is optimal on regular DG2 and other
2950 if (INTEL_INFO(i915
)->tuning_thread_rr_after_dep
)
2951 wa_mcr_masked_field_set(wal
, GEN9_ROW_CHICKEN4
, THREAD_EX_ARB_MODE
,
2952 THREAD_EX_ARB_MODE_RR_AFTER_DEP
);
2954 if (GRAPHICS_VER(i915
) == 12 && GRAPHICS_VER_FULL(i915
) < IP_VER(12, 50))
2955 wa_write_clr(wal
, GEN8_GARBCNTL
, GEN12_BUS_HASH_CTL_BIT_EXC
);
2959 * The workarounds in this function apply to shared registers in
2960 * the general render reset domain that aren't tied to a
2961 * specific engine. Since all render+compute engines get reset
2962 * together, and the contents of these registers are lost during
2963 * the shared render domain reset, we'll define such workarounds
2964 * here and then add them to just a single RCS or CCS engine's
2965 * workaround list (whichever engine has the XXXX flag).
2968 general_render_compute_wa_init(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2970 struct drm_i915_private
*i915
= engine
->i915
;
2972 add_render_compute_tuning_settings(i915
, wal
);
2974 if (GRAPHICS_VER(i915
) >= 11) {
2975 /* This is not a Wa (although referred to as
2976 * WaSetInidrectStateOverride in places), this allows
2977 * applications that reference sampler states through
2978 * the BindlessSamplerStateBaseAddress to have their
2979 * border color relative to DynamicStateBaseAddress
2980 * rather than BindlessSamplerStateBaseAddress.
2982 * Otherwise SAMPLER_STATE border colors have to be
2983 * copied in multiple heaps (DynamicStateBaseAddress &
2984 * BindlessSamplerStateBaseAddress)
2988 wa_mcr_masked_en(wal
,
2990 GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE
);
2993 if (IS_MTL_GRAPHICS_STEP(i915
, M
, STEP_B0
, STEP_FOREVER
) ||
2994 IS_MTL_GRAPHICS_STEP(i915
, P
, STEP_B0
, STEP_FOREVER
))
2995 /* Wa_14017856879 */
2996 wa_mcr_masked_en(wal
, GEN9_ROW_CHICKEN3
, MTL_DISABLE_FIX_FOR_EOT_FLUSH
);
2998 if (IS_MTL_GRAPHICS_STEP(i915
, M
, STEP_A0
, STEP_B0
) ||
2999 IS_MTL_GRAPHICS_STEP(i915
, P
, STEP_A0
, STEP_B0
))
3004 wa_mcr_masked_en(wal
, GEN10_SAMPLER_MODE
,
3005 MTL_DISABLE_SAMPLER_SC_OOO
);
3007 if (IS_MTL_GRAPHICS_STEP(i915
, P
, STEP_A0
, STEP_B0
))
3008 /* Wa_22015279794 */
3009 wa_mcr_masked_en(wal
, GEN10_CACHE_MODE_SS
,
3010 DISABLE_PREFETCH_INTO_IC
);
3012 if (IS_MTL_GRAPHICS_STEP(i915
, M
, STEP_A0
, STEP_B0
) ||
3013 IS_MTL_GRAPHICS_STEP(i915
, P
, STEP_A0
, STEP_B0
) ||
3014 IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_FOREVER
) ||
3015 IS_DG2_G11(i915
) || IS_DG2_G12(i915
)) {
3016 /* Wa_22013037850 */
3017 wa_mcr_write_or(wal
, LSC_CHICKEN_BIT_0_UDW
,
3018 DISABLE_128B_EVICTION_COMMAND_UDW
);
3021 if (IS_MTL_GRAPHICS_STEP(i915
, M
, STEP_A0
, STEP_B0
) ||
3022 IS_MTL_GRAPHICS_STEP(i915
, P
, STEP_A0
, STEP_B0
) ||
3023 IS_PONTEVECCHIO(i915
) ||
3025 /* Wa_22014226127 */
3026 wa_mcr_write_or(wal
, LSC_CHICKEN_BIT_0
, DISABLE_D8_D16_COASLESCE
);
3029 if (IS_MTL_GRAPHICS_STEP(i915
, M
, STEP_A0
, STEP_B0
) ||
3030 IS_MTL_GRAPHICS_STEP(i915
, P
, STEP_A0
, STEP_B0
) ||
3032 /* Wa_18017747507 */
3033 wa_masked_en(wal
, VFG_PREEMPTION_CHICKEN
, POLYGON_TRIFAN_LINELOOP_DISABLE
);
3036 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_C0
) ||
3039 * Wa_22012826095:dg2
3040 * Wa_22013059131:dg2
3042 wa_mcr_write_clr_set(wal
, LSC_CHICKEN_BIT_0_UDW
,
3044 REG_FIELD_PREP(MAXREQS_PER_BANK
, 2));
3046 /* Wa_22013059131:dg2 */
3047 wa_mcr_write_or(wal
, LSC_CHICKEN_BIT_0
,
3048 FORCE_1_SUB_MESSAGE_PER_FRAGMENT
);
3051 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_A0
, STEP_B0
)) {
3053 * Wa_14010918519:dg2_g10
3055 * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
3056 * so ignoring verification.
3058 wa_mcr_add(wal
, LSC_CHICKEN_BIT_0_UDW
, 0,
3059 FORCE_SLM_FENCE_SCOPE_TO_TILE
| FORCE_UGM_FENCE_SCOPE_TO_TILE
,
3063 if (IS_XEHPSDV(i915
)) {
3065 wa_mcr_masked_en(wal
,
3067 SYSTOLIC_DOP_CLOCK_GATING_DIS
);
3070 wa_mcr_masked_en(wal
,
3072 GEN12_DISABLE_GRF_CLEAR
);
3074 /* Wa_14010449647:xehpsdv */
3075 wa_mcr_masked_en(wal
, GEN8_HALF_SLICE_CHICKEN1
,
3076 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE
);
3079 if (IS_DG2(i915
) || IS_PONTEVECCHIO(i915
)) {
3080 /* Wa_14015227452:dg2,pvc */
3081 wa_mcr_masked_en(wal
, GEN9_ROW_CHICKEN4
, XEHP_DIS_BBL_SYSPIPE
);
3083 /* Wa_16015675438:dg2,pvc */
3084 wa_masked_en(wal
, FF_SLICE_CS_CHICKEN2
, GEN12_PERF_FIX_BALANCING_CFE_DISABLE
);
3089 * Wa_16011620976:dg2_g11
3090 * Wa_22015475538:dg2
3092 wa_mcr_write_or(wal
, LSC_CHICKEN_BIT_0_UDW
, DIS_CHAIN_2XSIMD8
);
3095 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_A0
, STEP_C0
) || IS_DG2_G11(i915
))
3099 * Note that register 0xE420 is write-only and cannot be read
3100 * back for verification on DG2 (due to Wa_14012342262), so
3101 * we need to explicitly skip the readback.
3103 wa_mcr_add(wal
, GEN10_CACHE_MODE_SS
, 0,
3104 _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC
),
3105 0 /* write-only, so skip validation */,
3110 engine_init_workarounds(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
3112 if (GRAPHICS_VER(engine
->i915
) < 4)
3115 engine_fake_wa_init(engine
, wal
);
3118 * These are common workarounds that just need to applied
3119 * to a single RCS/CCS engine's workaround list since
3120 * they're reset as part of the general render domain reset.
3122 if (engine
->flags
& I915_ENGINE_FIRST_RENDER_COMPUTE
)
3123 general_render_compute_wa_init(engine
, wal
);
3125 if (engine
->class == COMPUTE_CLASS
)
3126 ccs_engine_wa_init(engine
, wal
);
3127 else if (engine
->class == RENDER_CLASS
)
3128 rcs_engine_wa_init(engine
, wal
);
3130 xcs_engine_wa_init(engine
, wal
);
3133 void intel_engine_init_workarounds(struct intel_engine_cs
*engine
)
3135 struct i915_wa_list
*wal
= &engine
->wa_list
;
3137 wa_init_start(wal
, engine
->gt
, "engine", engine
->name
);
3138 engine_init_workarounds(engine
, wal
);
3139 wa_init_finish(wal
);
3142 void intel_engine_apply_workarounds(struct intel_engine_cs
*engine
)
3144 wa_list_apply(&engine
->wa_list
);
3147 static const struct i915_range mcr_ranges_gen8
[] = {
3148 { .start
= 0x5500, .end
= 0x55ff },
3149 { .start
= 0x7000, .end
= 0x7fff },
3150 { .start
= 0x9400, .end
= 0x97ff },
3151 { .start
= 0xb000, .end
= 0xb3ff },
3152 { .start
= 0xe000, .end
= 0xe7ff },
3156 static const struct i915_range mcr_ranges_gen12
[] = {
3157 { .start
= 0x8150, .end
= 0x815f },
3158 { .start
= 0x9520, .end
= 0x955f },
3159 { .start
= 0xb100, .end
= 0xb3ff },
3160 { .start
= 0xde80, .end
= 0xe8ff },
3161 { .start
= 0x24a00, .end
= 0x24a7f },
3165 static const struct i915_range mcr_ranges_xehp
[] = {
3166 { .start
= 0x4000, .end
= 0x4aff },
3167 { .start
= 0x5200, .end
= 0x52ff },
3168 { .start
= 0x5400, .end
= 0x7fff },
3169 { .start
= 0x8140, .end
= 0x815f },
3170 { .start
= 0x8c80, .end
= 0x8dff },
3171 { .start
= 0x94d0, .end
= 0x955f },
3172 { .start
= 0x9680, .end
= 0x96ff },
3173 { .start
= 0xb000, .end
= 0xb3ff },
3174 { .start
= 0xc800, .end
= 0xcfff },
3175 { .start
= 0xd800, .end
= 0xd8ff },
3176 { .start
= 0xdc00, .end
= 0xffff },
3177 { .start
= 0x17000, .end
= 0x17fff },
3178 { .start
= 0x24a00, .end
= 0x24a7f },
3182 static bool mcr_range(struct drm_i915_private
*i915
, u32 offset
)
3184 const struct i915_range
*mcr_ranges
;
3187 if (GRAPHICS_VER_FULL(i915
) >= IP_VER(12, 50))
3188 mcr_ranges
= mcr_ranges_xehp
;
3189 else if (GRAPHICS_VER(i915
) >= 12)
3190 mcr_ranges
= mcr_ranges_gen12
;
3191 else if (GRAPHICS_VER(i915
) >= 8)
3192 mcr_ranges
= mcr_ranges_gen8
;
3197 * Registers in these ranges are affected by the MCR selector
3198 * which only controls CPU initiated MMIO. Routing does not
3199 * work for CS access so we cannot verify them on this path.
3201 for (i
= 0; mcr_ranges
[i
].start
; i
++)
3202 if (offset
>= mcr_ranges
[i
].start
&&
3203 offset
<= mcr_ranges
[i
].end
)
3210 wa_list_srm(struct i915_request
*rq
,
3211 const struct i915_wa_list
*wal
,
3212 struct i915_vma
*vma
)
3214 struct drm_i915_private
*i915
= rq
->engine
->i915
;
3215 unsigned int i
, count
= 0;
3216 const struct i915_wa
*wa
;
3219 srm
= MI_STORE_REGISTER_MEM
| MI_SRM_LRM_GLOBAL_GTT
;
3220 if (GRAPHICS_VER(i915
) >= 8)
3223 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++) {
3224 if (!mcr_range(i915
, i915_mmio_reg_offset(wa
->reg
)))
3228 cs
= intel_ring_begin(rq
, 4 * count
);
3232 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++) {
3233 u32 offset
= i915_mmio_reg_offset(wa
->reg
);
3235 if (mcr_range(i915
, offset
))
3240 *cs
++ = i915_ggtt_offset(vma
) + sizeof(u32
) * i
;
3243 intel_ring_advance(rq
, cs
);
3248 static int engine_wa_list_verify(struct intel_context
*ce
,
3249 const struct i915_wa_list
* const wal
,
3252 const struct i915_wa
*wa
;
3253 struct i915_request
*rq
;
3254 struct i915_vma
*vma
;
3255 struct i915_gem_ww_ctx ww
;
3263 vma
= __vm_create_scratch_for_read(&ce
->engine
->gt
->ggtt
->vm
,
3264 wal
->count
* sizeof(u32
));
3266 return PTR_ERR(vma
);
3268 intel_engine_pm_get(ce
->engine
);
3269 i915_gem_ww_ctx_init(&ww
, false);
3271 err
= i915_gem_object_lock(vma
->obj
, &ww
);
3273 err
= intel_context_pin_ww(ce
, &ww
);
3277 err
= i915_vma_pin_ww(vma
, &ww
, 0, 0,
3278 i915_vma_is_ggtt(vma
) ? PIN_GLOBAL
: PIN_USER
);
3282 rq
= i915_request_create(ce
);
3288 err
= i915_vma_move_to_active(vma
, rq
, EXEC_OBJECT_WRITE
);
3290 err
= wa_list_srm(rq
, wal
, vma
);
3292 i915_request_get(rq
);
3294 i915_request_set_error_once(rq
, err
);
3295 i915_request_add(rq
);
3300 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
3305 results
= i915_gem_object_pin_map(vma
->obj
, I915_MAP_WB
);
3306 if (IS_ERR(results
)) {
3307 err
= PTR_ERR(results
);
3312 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++) {
3313 if (mcr_range(rq
->engine
->i915
, i915_mmio_reg_offset(wa
->reg
)))
3316 if (!wa_verify(wal
->gt
, wa
, results
[i
], wal
->name
, from
))
3320 i915_gem_object_unpin_map(vma
->obj
);
3323 i915_request_put(rq
);
3325 i915_vma_unpin(vma
);
3327 intel_context_unpin(ce
);
3329 if (err
== -EDEADLK
) {
3330 err
= i915_gem_ww_ctx_backoff(&ww
);
3334 i915_gem_ww_ctx_fini(&ww
);
3335 intel_engine_pm_put(ce
->engine
);
3340 int intel_engine_verify_workarounds(struct intel_engine_cs
*engine
,
3343 return engine_wa_list_verify(engine
->kernel_context
,
3348 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3349 #include "selftest_workarounds.c"