1 // SPDX-License-Identifier: MIT
3 * Copyright © 2014-2018 Intel Corporation
7 #include "intel_context.h"
8 #include "intel_engine_pm.h"
9 #include "intel_engine_regs.h"
10 #include "intel_gpu_commands.h"
12 #include "intel_gt_mcr.h"
13 #include "intel_gt_regs.h"
14 #include "intel_ring.h"
15 #include "intel_workarounds.h"
18 * DOC: Hardware workarounds
20 * This file is intended as a central place to implement most [1]_ of the
21 * required workarounds for hardware to work as originally intended. They fall
22 * in five basic categories depending on how/when they are applied:
24 * - Workarounds that touch registers that are saved/restored to/from the HW
25 * context image. The list is emitted (via Load Register Immediate commands)
26 * everytime a new context is created.
27 * - GT workarounds. The list of these WAs is applied whenever these registers
28 * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
29 * - Display workarounds. The list is applied during display clock-gating
31 * - Workarounds that whitelist a privileged register, so that UMDs can manage
32 * them directly. This is just a special case of a MMMIO workaround (as we
33 * write the list of these to/be-whitelisted registers to some special HW
35 * - Workaround batchbuffers, that get executed automatically by the hardware
36 * on every HW context restore.
38 * .. [1] Please notice that there are other WAs that, due to their nature,
39 * cannot be applied from a central place. Those are peppered around the rest
40 * of the code, as needed.
42 * .. [2] Technically, some registers are powercontext saved & restored, so they
43 * survive a suspend/resume. In practice, writing them again is not too
44 * costly and simplifies things. We can revisit this in the future.
49 * Keep things in this file ordered by WA type, as per the above (context, GT,
50 * display, register whitelist, batchbuffer). Then, inside each type, keep the
53 * - Infrastructure functions and macros
54 * - WAs per platform in standard gen/chrono order
55 * - Public functions to init or apply the given workaround type.
58 static void wa_init_start(struct i915_wa_list
*wal
, const char *name
, const char *engine_name
)
61 wal
->engine_name
= engine_name
;
64 #define WA_LIST_CHUNK (1 << 4)
66 static void wa_init_finish(struct i915_wa_list
*wal
)
68 /* Trim unused entries. */
69 if (!IS_ALIGNED(wal
->count
, WA_LIST_CHUNK
)) {
70 struct i915_wa
*list
= kmemdup(wal
->list
,
71 wal
->count
* sizeof(*list
),
83 DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
84 wal
->wa_count
, wal
->name
, wal
->engine_name
);
87 static void _wa_add(struct i915_wa_list
*wal
, const struct i915_wa
*wa
)
89 unsigned int addr
= i915_mmio_reg_offset(wa
->reg
);
90 unsigned int start
= 0, end
= wal
->count
;
91 const unsigned int grow
= WA_LIST_CHUNK
;
94 GEM_BUG_ON(!is_power_of_2(grow
));
96 if (IS_ALIGNED(wal
->count
, grow
)) { /* Either uninitialized or full. */
99 list
= kmalloc_array(ALIGN(wal
->count
+ 1, grow
), sizeof(*wa
),
102 DRM_ERROR("No space for workaround init!\n");
107 memcpy(list
, wal
->list
, sizeof(*wa
) * wal
->count
);
114 while (start
< end
) {
115 unsigned int mid
= start
+ (end
- start
) / 2;
117 if (i915_mmio_reg_offset(wal
->list
[mid
].reg
) < addr
) {
119 } else if (i915_mmio_reg_offset(wal
->list
[mid
].reg
) > addr
) {
122 wa_
= &wal
->list
[mid
];
124 if ((wa
->clr
| wa_
->clr
) && !(wa
->clr
& ~wa_
->clr
)) {
125 DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
126 i915_mmio_reg_offset(wa_
->reg
),
129 wa_
->set
&= ~wa
->clr
;
135 wa_
->read
|= wa
->read
;
141 wa_
= &wal
->list
[wal
->count
++];
144 while (wa_
-- > wal
->list
) {
145 GEM_BUG_ON(i915_mmio_reg_offset(wa_
[0].reg
) ==
146 i915_mmio_reg_offset(wa_
[1].reg
));
147 if (i915_mmio_reg_offset(wa_
[1].reg
) >
148 i915_mmio_reg_offset(wa_
[0].reg
))
151 swap(wa_
[1], wa_
[0]);
155 static void wa_add(struct i915_wa_list
*wal
, i915_reg_t reg
,
156 u32 clear
, u32 set
, u32 read_mask
, bool masked_reg
)
158 struct i915_wa wa
= {
163 .masked_reg
= masked_reg
,
170 wa_write_clr_set(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 clear
, u32 set
)
172 wa_add(wal
, reg
, clear
, set
, clear
, false);
176 wa_write(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 set
)
178 wa_write_clr_set(wal
, reg
, ~0, set
);
182 wa_write_or(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 set
)
184 wa_write_clr_set(wal
, reg
, set
, set
);
188 wa_write_clr(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 clr
)
190 wa_write_clr_set(wal
, reg
, clr
, 0);
194 * WA operations on "masked register". A masked register has the upper 16 bits
195 * documented as "masked" in b-spec. Its purpose is to allow writing to just a
196 * portion of the register without a rmw: you simply write in the upper 16 bits
197 * the mask of bits you are going to modify.
199 * The wa_masked_* family of functions already does the necessary operations to
200 * calculate the mask based on the parameters passed, so user only has to
201 * provide the lower 16 bits of that register.
205 wa_masked_en(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 val
)
207 wa_add(wal
, reg
, 0, _MASKED_BIT_ENABLE(val
), val
, true);
211 wa_masked_dis(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 val
)
213 wa_add(wal
, reg
, 0, _MASKED_BIT_DISABLE(val
), val
, true);
217 wa_masked_field_set(struct i915_wa_list
*wal
, i915_reg_t reg
,
220 wa_add(wal
, reg
, 0, _MASKED_FIELD(mask
, val
), mask
, true);
223 static void gen6_ctx_workarounds_init(struct intel_engine_cs
*engine
,
224 struct i915_wa_list
*wal
)
226 wa_masked_en(wal
, INSTPM
, INSTPM_FORCE_ORDERING
);
229 static void gen7_ctx_workarounds_init(struct intel_engine_cs
*engine
,
230 struct i915_wa_list
*wal
)
232 wa_masked_en(wal
, INSTPM
, INSTPM_FORCE_ORDERING
);
235 static void gen8_ctx_workarounds_init(struct intel_engine_cs
*engine
,
236 struct i915_wa_list
*wal
)
238 wa_masked_en(wal
, INSTPM
, INSTPM_FORCE_ORDERING
);
240 /* WaDisableAsyncFlipPerfMode:bdw,chv */
241 wa_masked_en(wal
, RING_MI_MODE(RENDER_RING_BASE
), ASYNC_FLIP_PERF_DISABLE
);
243 /* WaDisablePartialInstShootdown:bdw,chv */
244 wa_masked_en(wal
, GEN8_ROW_CHICKEN
,
245 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
);
247 /* Use Force Non-Coherent whenever executing a 3D context. This is a
248 * workaround for a possible hang in the unlikely event a TLB
249 * invalidation occurs during a PSD flush.
251 /* WaForceEnableNonCoherent:bdw,chv */
252 /* WaHdcDisableFetchWhenMasked:bdw,chv */
253 wa_masked_en(wal
, HDC_CHICKEN0
,
254 HDC_DONOT_FETCH_MEM_WHEN_MASKED
|
255 HDC_FORCE_NON_COHERENT
);
257 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
258 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
259 * polygons in the same 8x4 pixel/sample area to be processed without
260 * stalling waiting for the earlier ones to write to Hierarchical Z
263 * This optimization is off by default for BDW and CHV; turn it on.
265 wa_masked_dis(wal
, CACHE_MODE_0_GEN7
, HIZ_RAW_STALL_OPT_DISABLE
);
267 /* Wa4x4STCOptimizationDisable:bdw,chv */
268 wa_masked_en(wal
, CACHE_MODE_1
, GEN8_4x4_STC_OPTIMIZATION_DISABLE
);
271 * BSpec recommends 8x4 when MSAA is used,
272 * however in practice 16x4 seems fastest.
274 * Note that PS/WM thread counts depend on the WIZ hashing
275 * disable bit, which we don't touch here, but it's good
276 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
278 wa_masked_field_set(wal
, GEN7_GT_MODE
,
279 GEN6_WIZ_HASHING_MASK
,
280 GEN6_WIZ_HASHING_16x4
);
283 static void bdw_ctx_workarounds_init(struct intel_engine_cs
*engine
,
284 struct i915_wa_list
*wal
)
286 struct drm_i915_private
*i915
= engine
->i915
;
288 gen8_ctx_workarounds_init(engine
, wal
);
290 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
291 wa_masked_en(wal
, GEN8_ROW_CHICKEN
, STALL_DOP_GATING_DISABLE
);
293 /* WaDisableDopClockGating:bdw
295 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
296 * to disable EUTC clock gating.
298 wa_masked_en(wal
, GEN7_ROW_CHICKEN2
,
299 DOP_CLOCK_GATING_DISABLE
);
301 wa_masked_en(wal
, HALF_SLICE_CHICKEN3
,
302 GEN8_SAMPLER_POWER_BYPASS_DIS
);
304 wa_masked_en(wal
, HDC_CHICKEN0
,
305 /* WaForceContextSaveRestoreNonCoherent:bdw */
306 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT
|
307 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
308 (IS_BDW_GT3(i915
) ? HDC_FENCE_DEST_SLM_DISABLE
: 0));
311 static void chv_ctx_workarounds_init(struct intel_engine_cs
*engine
,
312 struct i915_wa_list
*wal
)
314 gen8_ctx_workarounds_init(engine
, wal
);
316 /* WaDisableThreadStallDopClockGating:chv */
317 wa_masked_en(wal
, GEN8_ROW_CHICKEN
, STALL_DOP_GATING_DISABLE
);
319 /* Improve HiZ throughput on CHV. */
320 wa_masked_en(wal
, HIZ_CHICKEN
, CHV_HZ_8X8_MODE_IN_1X
);
323 static void gen9_ctx_workarounds_init(struct intel_engine_cs
*engine
,
324 struct i915_wa_list
*wal
)
326 struct drm_i915_private
*i915
= engine
->i915
;
329 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
331 * Must match Display Engine. See
332 * WaCompressedResourceDisplayNewHashMode.
334 wa_masked_en(wal
, COMMON_SLICE_CHICKEN2
,
335 GEN9_PBE_COMPRESSED_HASH_SELECTION
);
336 wa_masked_en(wal
, GEN9_HALF_SLICE_CHICKEN7
,
337 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR
);
340 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
341 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
342 wa_masked_en(wal
, GEN8_ROW_CHICKEN
,
343 FLOW_CONTROL_ENABLE
|
344 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
);
346 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
347 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
348 wa_masked_en(wal
, GEN9_HALF_SLICE_CHICKEN7
,
349 GEN9_ENABLE_YV12_BUGFIX
|
350 GEN9_ENABLE_GPGPU_PREEMPTION
);
352 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
353 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
354 wa_masked_en(wal
, CACHE_MODE_1
,
355 GEN8_4x4_STC_OPTIMIZATION_DISABLE
|
356 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE
);
358 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
359 wa_masked_dis(wal
, GEN9_HALF_SLICE_CHICKEN5
,
360 GEN9_CCS_TLB_PREFETCH_ENABLE
);
362 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
363 wa_masked_en(wal
, HDC_CHICKEN0
,
364 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT
|
365 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE
);
367 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
368 * both tied to WaForceContextSaveRestoreNonCoherent
369 * in some hsds for skl. We keep the tie for all gen9. The
370 * documentation is a bit hazy and so we want to get common behaviour,
371 * even though there is no clear evidence we would need both on kbl/bxt.
372 * This area has been source of system hangs so we play it safe
373 * and mimic the skl regardless of what bspec says.
375 * Use Force Non-Coherent whenever executing a 3D context. This
376 * is a workaround for a possible hang in the unlikely event
377 * a TLB invalidation occurs during a PSD flush.
380 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
381 wa_masked_en(wal
, HDC_CHICKEN0
,
382 HDC_FORCE_NON_COHERENT
);
384 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
385 if (IS_SKYLAKE(i915
) ||
387 IS_COFFEELAKE(i915
) ||
389 wa_masked_en(wal
, HALF_SLICE_CHICKEN3
,
390 GEN8_SAMPLER_POWER_BYPASS_DIS
);
392 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
393 wa_masked_en(wal
, HALF_SLICE_CHICKEN2
, GEN8_ST_PO_DISABLE
);
396 * Supporting preemption with fine-granularity requires changes in the
397 * batch buffer programming. Since we can't break old userspace, we
398 * need to set our default preemption level to safe value. Userspace is
399 * still able to use more fine-grained preemption levels, since in
400 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
401 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
402 * not real HW workarounds, but merely a way to start using preemption
403 * while maintaining old contract with userspace.
406 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
407 wa_masked_dis(wal
, GEN8_CS_CHICKEN1
, GEN9_PREEMPT_3D_OBJECT_LEVEL
);
409 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
410 wa_masked_field_set(wal
, GEN8_CS_CHICKEN1
,
411 GEN9_PREEMPT_GPGPU_LEVEL_MASK
,
412 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL
);
414 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
415 if (IS_GEN9_LP(i915
))
416 wa_masked_en(wal
, GEN9_WM_CHICKEN3
, GEN9_FACTOR_IN_CLR_VAL_HIZ
);
419 static void skl_tune_iz_hashing(struct intel_engine_cs
*engine
,
420 struct i915_wa_list
*wal
)
422 struct intel_gt
*gt
= engine
->gt
;
423 u8 vals
[3] = { 0, 0, 0 };
426 for (i
= 0; i
< 3; i
++) {
430 * Only consider slices where one, and only one, subslice has 7
433 if (!is_power_of_2(gt
->info
.sseu
.subslice_7eu
[i
]))
437 * subslice_7eu[i] != 0 (because of the check above) and
438 * ss_max == 4 (maximum number of subslices possible per slice)
442 ss
= ffs(gt
->info
.sseu
.subslice_7eu
[i
]) - 1;
446 if (vals
[0] == 0 && vals
[1] == 0 && vals
[2] == 0)
449 /* Tune IZ hashing. See intel_device_info_runtime_init() */
450 wa_masked_field_set(wal
, GEN7_GT_MODE
,
451 GEN9_IZ_HASHING_MASK(2) |
452 GEN9_IZ_HASHING_MASK(1) |
453 GEN9_IZ_HASHING_MASK(0),
454 GEN9_IZ_HASHING(2, vals
[2]) |
455 GEN9_IZ_HASHING(1, vals
[1]) |
456 GEN9_IZ_HASHING(0, vals
[0]));
459 static void skl_ctx_workarounds_init(struct intel_engine_cs
*engine
,
460 struct i915_wa_list
*wal
)
462 gen9_ctx_workarounds_init(engine
, wal
);
463 skl_tune_iz_hashing(engine
, wal
);
466 static void bxt_ctx_workarounds_init(struct intel_engine_cs
*engine
,
467 struct i915_wa_list
*wal
)
469 gen9_ctx_workarounds_init(engine
, wal
);
471 /* WaDisableThreadStallDopClockGating:bxt */
472 wa_masked_en(wal
, GEN8_ROW_CHICKEN
,
473 STALL_DOP_GATING_DISABLE
);
475 /* WaToEnableHwFixForPushConstHWBug:bxt */
476 wa_masked_en(wal
, COMMON_SLICE_CHICKEN2
,
477 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
480 static void kbl_ctx_workarounds_init(struct intel_engine_cs
*engine
,
481 struct i915_wa_list
*wal
)
483 struct drm_i915_private
*i915
= engine
->i915
;
485 gen9_ctx_workarounds_init(engine
, wal
);
487 /* WaToEnableHwFixForPushConstHWBug:kbl */
488 if (IS_KBL_GRAPHICS_STEP(i915
, STEP_C0
, STEP_FOREVER
))
489 wa_masked_en(wal
, COMMON_SLICE_CHICKEN2
,
490 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
492 /* WaDisableSbeCacheDispatchPortSharing:kbl */
493 wa_masked_en(wal
, GEN7_HALF_SLICE_CHICKEN1
,
494 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE
);
497 static void glk_ctx_workarounds_init(struct intel_engine_cs
*engine
,
498 struct i915_wa_list
*wal
)
500 gen9_ctx_workarounds_init(engine
, wal
);
502 /* WaToEnableHwFixForPushConstHWBug:glk */
503 wa_masked_en(wal
, COMMON_SLICE_CHICKEN2
,
504 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
507 static void cfl_ctx_workarounds_init(struct intel_engine_cs
*engine
,
508 struct i915_wa_list
*wal
)
510 gen9_ctx_workarounds_init(engine
, wal
);
512 /* WaToEnableHwFixForPushConstHWBug:cfl */
513 wa_masked_en(wal
, COMMON_SLICE_CHICKEN2
,
514 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
516 /* WaDisableSbeCacheDispatchPortSharing:cfl */
517 wa_masked_en(wal
, GEN7_HALF_SLICE_CHICKEN1
,
518 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE
);
521 static void icl_ctx_workarounds_init(struct intel_engine_cs
*engine
,
522 struct i915_wa_list
*wal
)
524 /* Wa_1406697149 (WaDisableBankHangMode:icl) */
527 intel_uncore_read(engine
->uncore
, GEN8_L3CNTLREG
) |
530 /* WaForceEnableNonCoherent:icl
531 * This is not the same workaround as in early Gen9 platforms, where
532 * lacking this could cause system hangs, but coherency performance
533 * overhead is high and only a few compute workloads really need it
534 * (the register is whitelisted in hardware now, so UMDs can opt in
535 * for coherency if they have a good reason).
537 wa_masked_en(wal
, ICL_HDC_MODE
, HDC_FORCE_NON_COHERENT
);
539 /* WaEnableFloatBlendOptimization:icl */
540 wa_add(wal
, GEN10_CACHE_MODE_SS
, 0,
541 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE
),
542 0 /* write-only, so skip validation */,
545 /* WaDisableGPGPUMidThreadPreemption:icl */
546 wa_masked_field_set(wal
, GEN8_CS_CHICKEN1
,
547 GEN9_PREEMPT_GPGPU_LEVEL_MASK
,
548 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL
);
550 /* allow headerless messages for preemptible GPGPU context */
551 wa_masked_en(wal
, GEN10_SAMPLER_MODE
,
552 GEN11_SAMPLER_ENABLE_HEADLESS_MSG
);
554 /* Wa_1604278689:icl,ehl */
555 wa_write(wal
, IVB_FBC_RT_BASE
, 0xFFFFFFFF & ~ILK_FBC_RT_VALID
);
556 wa_write_clr_set(wal
, IVB_FBC_RT_BASE_UPPER
,
557 0, /* write-only register; skip validation */
560 /* Wa_1406306137:icl,ehl */
561 wa_masked_en(wal
, GEN9_ROW_CHICKEN4
, GEN11_DIS_PICK_2ND_EU
);
565 * These settings aren't actually workarounds, but general tuning settings that
566 * need to be programmed on dg2 platform.
568 static void dg2_ctx_gt_tuning_init(struct intel_engine_cs
*engine
,
569 struct i915_wa_list
*wal
)
571 wa_masked_en(wal
, CHICKEN_RASTER_2
, TBIMR_FAST_CLIP
);
572 wa_write_clr_set(wal
, GEN11_L3SQCREG5
, L3_PWM_TIMER_INIT_VAL_MASK
,
573 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK
, 0x7f));
576 FF_MODE2_TDS_TIMER_MASK
,
577 FF_MODE2_TDS_TIMER_128
,
582 * These settings aren't actually workarounds, but general tuning settings that
583 * need to be programmed on several platforms.
585 static void gen12_ctx_gt_tuning_init(struct intel_engine_cs
*engine
,
586 struct i915_wa_list
*wal
)
589 * Although some platforms refer to it as Wa_1604555607, we need to
590 * program it even on those that don't explicitly list that
593 * Note that the programming of this register is further modified
594 * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
595 * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
596 * value when read. The default value for this register is zero for all
597 * fields and there are no bit masks. So instead of doing a RMW we
598 * should just write TDS timer value. For the same reason read
599 * verification is ignored.
603 FF_MODE2_TDS_TIMER_MASK
,
604 FF_MODE2_TDS_TIMER_128
,
608 static void gen12_ctx_workarounds_init(struct intel_engine_cs
*engine
,
609 struct i915_wa_list
*wal
)
611 gen12_ctx_gt_tuning_init(engine
, wal
);
614 * Wa_1409142259:tgl,dg1,adl-p
615 * Wa_1409347922:tgl,dg1,adl-p
616 * Wa_1409252684:tgl,dg1,adl-p
617 * Wa_1409217633:tgl,dg1,adl-p
618 * Wa_1409207793:tgl,dg1,adl-p
619 * Wa_1409178076:tgl,dg1,adl-p
620 * Wa_1408979724:tgl,dg1,adl-p
621 * Wa_14010443199:tgl,rkl,dg1,adl-p
622 * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
623 * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
625 wa_masked_en(wal
, GEN11_COMMON_SLICE_CHICKEN3
,
626 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE
);
628 /* WaDisableGPGPUMidThreadPreemption:gen12 */
629 wa_masked_field_set(wal
, GEN8_CS_CHICKEN1
,
630 GEN9_PREEMPT_GPGPU_LEVEL_MASK
,
631 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL
);
636 * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
641 FF_MODE2_GS_TIMER_MASK
,
642 FF_MODE2_GS_TIMER_224
,
646 static void dg1_ctx_workarounds_init(struct intel_engine_cs
*engine
,
647 struct i915_wa_list
*wal
)
649 gen12_ctx_workarounds_init(engine
, wal
);
652 wa_masked_dis(wal
, GEN11_COMMON_SLICE_CHICKEN3
,
653 DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN
);
656 wa_masked_en(wal
, HIZ_CHICKEN
,
657 DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE
);
660 static void dg2_ctx_workarounds_init(struct intel_engine_cs
*engine
,
661 struct i915_wa_list
*wal
)
663 dg2_ctx_gt_tuning_init(engine
, wal
);
665 /* Wa_16011186671:dg2_g11 */
666 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G11
, STEP_A0
, STEP_B0
)) {
667 wa_masked_dis(wal
, VFLSKPD
, DIS_MULT_MISS_RD_SQUASH
);
668 wa_masked_en(wal
, VFLSKPD
, DIS_OVER_FETCH_CACHE
);
671 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_A0
, STEP_B0
)) {
672 /* Wa_14010469329:dg2_g10 */
673 wa_masked_en(wal
, GEN11_COMMON_SLICE_CHICKEN3
,
674 XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE
);
677 * Wa_22010465075:dg2_g10
678 * Wa_22010613112:dg2_g10
679 * Wa_14010698770:dg2_g10
681 wa_masked_en(wal
, GEN11_COMMON_SLICE_CHICKEN3
,
682 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE
);
685 /* Wa_16013271637:dg2 */
686 wa_masked_en(wal
, SLICE_COMMON_ECO_CHICKEN1
,
687 MSC_MSAA_REODER_BUF_BYPASS_DISABLE
);
689 /* Wa_14014947963:dg2 */
690 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_B0
, STEP_FOREVER
) ||
691 IS_DG2_G11(engine
->i915
) || IS_DG2_G12(engine
->i915
))
692 wa_masked_field_set(wal
, VF_PREEMPTION
, PREEMPTION_VERTEX_COUNT
, 0x4000);
694 /* Wa_15010599737:dg2 */
695 wa_masked_en(wal
, CHICKEN_RASTER_1
, DIS_SF_ROUND_NEAREST_EVEN
);
698 static void fakewa_disable_nestedbb_mode(struct intel_engine_cs
*engine
,
699 struct i915_wa_list
*wal
)
702 * This is a "fake" workaround defined by software to ensure we
703 * maintain reliable, backward-compatible behavior for userspace with
704 * regards to how nested MI_BATCH_BUFFER_START commands are handled.
706 * The per-context setting of MI_MODE[12] determines whether the bits
707 * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
708 * in the traditional manner or whether they should instead use a new
709 * tgl+ meaning that breaks backward compatibility, but allows nesting
710 * into 3rd-level batchbuffers. When this new capability was first
711 * added in TGL, it remained off by default unless a context
712 * intentionally opted in to the new behavior. However Xe_HPG now
713 * flips this on by default and requires that we explicitly opt out if
714 * we don't want the new behavior.
716 * From a SW perspective, we want to maintain the backward-compatible
717 * behavior for userspace, so we'll apply a fake workaround to set it
718 * back to the legacy behavior on platforms where the hardware default
719 * is to break compatibility. At the moment there is no Linux
720 * userspace that utilizes third-level batchbuffers, so this will avoid
721 * userspace from needing to make any changes. using the legacy
722 * meaning is the correct thing to do. If/when we have userspace
723 * consumers that want to utilize third-level batch nesting, we can
724 * provide a context parameter to allow them to opt-in.
726 wa_masked_dis(wal
, RING_MI_MODE(engine
->mmio_base
), TGL_NESTED_BB_EN
);
729 static void gen12_ctx_gt_mocs_init(struct intel_engine_cs
*engine
,
730 struct i915_wa_list
*wal
)
735 * Some blitter commands do not have a field for MOCS, those
736 * commands will use MOCS index pointed by BLIT_CCTL.
737 * BLIT_CCTL registers are needed to be programmed to un-cached.
739 if (engine
->class == COPY_ENGINE_CLASS
) {
740 mocs
= engine
->gt
->mocs
.uc_index
;
741 wa_write_clr_set(wal
,
742 BLIT_CCTL(engine
->mmio_base
),
744 BLIT_CCTL_MOCS(mocs
, mocs
));
749 * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
750 * defined by the hardware team, but it programming general context registers.
751 * Adding those context register programming in context workaround
752 * allow us to use the wa framework for proper application and validation.
755 gen12_ctx_gt_fake_wa_init(struct intel_engine_cs
*engine
,
756 struct i915_wa_list
*wal
)
758 if (GRAPHICS_VER_FULL(engine
->i915
) >= IP_VER(12, 55))
759 fakewa_disable_nestedbb_mode(engine
, wal
);
761 gen12_ctx_gt_mocs_init(engine
, wal
);
765 __intel_engine_init_ctx_wa(struct intel_engine_cs
*engine
,
766 struct i915_wa_list
*wal
,
769 struct drm_i915_private
*i915
= engine
->i915
;
771 wa_init_start(wal
, name
, engine
->name
);
773 /* Applies to all engines */
775 * Fake workarounds are not the actual workaround but
776 * programming of context registers using workaround framework.
778 if (GRAPHICS_VER(i915
) >= 12)
779 gen12_ctx_gt_fake_wa_init(engine
, wal
);
781 if (engine
->class != RENDER_CLASS
)
784 if (IS_PONTEVECCHIO(i915
))
785 ; /* noop; none at this time */
786 else if (IS_DG2(i915
))
787 dg2_ctx_workarounds_init(engine
, wal
);
788 else if (IS_XEHPSDV(i915
))
789 ; /* noop; none at this time */
790 else if (IS_DG1(i915
))
791 dg1_ctx_workarounds_init(engine
, wal
);
792 else if (GRAPHICS_VER(i915
) == 12)
793 gen12_ctx_workarounds_init(engine
, wal
);
794 else if (GRAPHICS_VER(i915
) == 11)
795 icl_ctx_workarounds_init(engine
, wal
);
796 else if (IS_COFFEELAKE(i915
) || IS_COMETLAKE(i915
))
797 cfl_ctx_workarounds_init(engine
, wal
);
798 else if (IS_GEMINILAKE(i915
))
799 glk_ctx_workarounds_init(engine
, wal
);
800 else if (IS_KABYLAKE(i915
))
801 kbl_ctx_workarounds_init(engine
, wal
);
802 else if (IS_BROXTON(i915
))
803 bxt_ctx_workarounds_init(engine
, wal
);
804 else if (IS_SKYLAKE(i915
))
805 skl_ctx_workarounds_init(engine
, wal
);
806 else if (IS_CHERRYVIEW(i915
))
807 chv_ctx_workarounds_init(engine
, wal
);
808 else if (IS_BROADWELL(i915
))
809 bdw_ctx_workarounds_init(engine
, wal
);
810 else if (GRAPHICS_VER(i915
) == 7)
811 gen7_ctx_workarounds_init(engine
, wal
);
812 else if (GRAPHICS_VER(i915
) == 6)
813 gen6_ctx_workarounds_init(engine
, wal
);
814 else if (GRAPHICS_VER(i915
) < 8)
817 MISSING_CASE(GRAPHICS_VER(i915
));
823 void intel_engine_init_ctx_wa(struct intel_engine_cs
*engine
)
825 __intel_engine_init_ctx_wa(engine
, &engine
->ctx_wa_list
, "context");
828 int intel_engine_emit_ctx_wa(struct i915_request
*rq
)
830 struct i915_wa_list
*wal
= &rq
->engine
->ctx_wa_list
;
839 ret
= rq
->engine
->emit_flush(rq
, EMIT_BARRIER
);
843 cs
= intel_ring_begin(rq
, (wal
->count
* 2 + 2));
847 *cs
++ = MI_LOAD_REGISTER_IMM(wal
->count
);
848 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++) {
849 *cs
++ = i915_mmio_reg_offset(wa
->reg
);
854 intel_ring_advance(rq
, cs
);
856 ret
= rq
->engine
->emit_flush(rq
, EMIT_BARRIER
);
864 gen4_gt_workarounds_init(struct intel_gt
*gt
,
865 struct i915_wa_list
*wal
)
867 /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
868 wa_masked_dis(wal
, CACHE_MODE_0
, RC_OP_FLUSH_ENABLE
);
872 g4x_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
874 gen4_gt_workarounds_init(gt
, wal
);
876 /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
877 wa_masked_en(wal
, CACHE_MODE_0
, CM0_PIPELINED_RENDER_FLUSH_DISABLE
);
881 ilk_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
883 g4x_gt_workarounds_init(gt
, wal
);
885 wa_masked_en(wal
, _3D_CHICKEN2
, _3D_CHICKEN2_WM_READ_PIPELINED
);
889 snb_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
894 ivb_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
896 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
898 GEN7_COMMON_SLICE_CHICKEN1
,
899 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC
);
901 /* WaApplyL3ControlAndL3ChickenMode:ivb */
902 wa_write(wal
, GEN7_L3CNTLREG1
, GEN7_WA_FOR_GEN7_L3_CONTROL
);
903 wa_write(wal
, GEN7_L3_CHICKEN_MODE_REGISTER
, GEN7_WA_L3_CHICKEN_MODE
);
905 /* WaForceL3Serialization:ivb */
906 wa_write_clr(wal
, GEN7_L3SQCREG4
, L3SQ_URB_READ_CAM_MATCH_DISABLE
);
910 vlv_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
912 /* WaForceL3Serialization:vlv */
913 wa_write_clr(wal
, GEN7_L3SQCREG4
, L3SQ_URB_READ_CAM_MATCH_DISABLE
);
916 * WaIncreaseL3CreditsForVLVB0:vlv
917 * This is the hardware default actually.
919 wa_write(wal
, GEN7_L3SQCREG1
, VLV_B0_WA_L3SQCREG1_VALUE
);
923 hsw_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
925 /* L3 caching of data atomics doesn't work -- disable it. */
926 wa_write(wal
, HSW_SCRATCH1
, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE
);
930 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE
),
931 0 /* XXX does this reg exist? */, true);
933 /* WaVSRefCountFullforceMissDisable:hsw */
934 wa_write_clr(wal
, GEN7_FF_THREAD_MODE
, GEN7_FF_VS_REF_CNT_FFME
);
938 gen9_wa_init_mcr(struct drm_i915_private
*i915
, struct i915_wa_list
*wal
)
940 const struct sseu_dev_info
*sseu
= &to_gt(i915
)->info
.sseu
;
941 unsigned int slice
, subslice
;
944 GEM_BUG_ON(GRAPHICS_VER(i915
) != 9);
947 * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
948 * Before any MMIO read into slice/subslice specific registers, MCR
949 * packet control register needs to be programmed to point to any
950 * enabled s/ss pair. Otherwise, incorrect values will be returned.
951 * This means each subsequent MMIO read will be forwarded to an
952 * specific s/ss combination, but this is OK since these registers
953 * are consistent across s/ss in almost all cases. In the rare
954 * occasions, such as INSTDONE, where this value is dependent
955 * on s/ss combo, the read should be done with read_subslice_reg.
957 slice
= ffs(sseu
->slice_mask
) - 1;
958 GEM_BUG_ON(slice
>= ARRAY_SIZE(sseu
->subslice_mask
.hsw
));
959 subslice
= ffs(intel_sseu_get_hsw_subslices(sseu
, slice
));
960 GEM_BUG_ON(!subslice
);
964 * We use GEN8_MCR..() macros to calculate the |mcr| value for
965 * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
967 mcr
= GEN8_MCR_SLICE(slice
) | GEN8_MCR_SUBSLICE(subslice
);
968 mcr_mask
= GEN8_MCR_SLICE_MASK
| GEN8_MCR_SUBSLICE_MASK
;
970 drm_dbg(&i915
->drm
, "MCR slice:%d/subslice:%d = %x\n", slice
, subslice
, mcr
);
972 wa_write_clr_set(wal
, GEN8_MCR_SELECTOR
, mcr_mask
, mcr
);
976 gen9_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
978 struct drm_i915_private
*i915
= gt
->i915
;
980 /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
981 gen9_wa_init_mcr(i915
, wal
);
983 /* WaDisableKillLogic:bxt,skl,kbl */
984 if (!IS_COFFEELAKE(i915
) && !IS_COMETLAKE(i915
))
990 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
992 * Must match Display Engine. See
993 * WaCompressedResourceDisplayNewHashMode.
997 MMCD_PCLA
| MMCD_HOTSPOT_EN
);
1000 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1003 BDW_DISABLE_HDC_INVALIDATION
);
1007 skl_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1009 gen9_gt_workarounds_init(gt
, wal
);
1011 /* WaDisableGafsUnitClkGating:skl */
1014 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
1016 /* WaInPlaceDecompressionHang:skl */
1017 if (IS_SKL_GRAPHICS_STEP(gt
->i915
, STEP_A0
, STEP_H0
))
1019 GEN9_GAMT_ECO_REG_RW_IA
,
1020 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
1024 kbl_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1026 gen9_gt_workarounds_init(gt
, wal
);
1028 /* WaDisableDynamicCreditSharing:kbl */
1029 if (IS_KBL_GRAPHICS_STEP(gt
->i915
, 0, STEP_C0
))
1032 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING
);
1034 /* WaDisableGafsUnitClkGating:kbl */
1037 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
1039 /* WaInPlaceDecompressionHang:kbl */
1041 GEN9_GAMT_ECO_REG_RW_IA
,
1042 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
1046 glk_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1048 gen9_gt_workarounds_init(gt
, wal
);
1052 cfl_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1054 gen9_gt_workarounds_init(gt
, wal
);
1056 /* WaDisableGafsUnitClkGating:cfl */
1059 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
1061 /* WaInPlaceDecompressionHang:cfl */
1063 GEN9_GAMT_ECO_REG_RW_IA
,
1064 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
1067 static void __set_mcr_steering(struct i915_wa_list
*wal
,
1068 i915_reg_t steering_reg
,
1069 unsigned int slice
, unsigned int subslice
)
1073 mcr
= GEN11_MCR_SLICE(slice
) | GEN11_MCR_SUBSLICE(subslice
);
1074 mcr_mask
= GEN11_MCR_SLICE_MASK
| GEN11_MCR_SUBSLICE_MASK
;
1076 wa_write_clr_set(wal
, steering_reg
, mcr_mask
, mcr
);
1079 static void __add_mcr_wa(struct intel_gt
*gt
, struct i915_wa_list
*wal
,
1080 unsigned int slice
, unsigned int subslice
)
1082 struct drm_printer p
= drm_debug_printer("MCR Steering:");
1084 __set_mcr_steering(wal
, GEN8_MCR_SELECTOR
, slice
, subslice
);
1086 gt
->default_steering
.groupid
= slice
;
1087 gt
->default_steering
.instanceid
= subslice
;
1089 if (drm_debug_enabled(DRM_UT_DRIVER
))
1090 intel_gt_mcr_report_steering(&p
, gt
, false);
1094 icl_wa_init_mcr(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1096 const struct sseu_dev_info
*sseu
= >
->info
.sseu
;
1097 unsigned int subslice
;
1099 GEM_BUG_ON(GRAPHICS_VER(gt
->i915
) < 11);
1100 GEM_BUG_ON(hweight8(sseu
->slice_mask
) > 1);
1103 * Although a platform may have subslices, we need to always steer
1104 * reads to the lowest instance that isn't fused off. When Render
1105 * Power Gating is enabled, grabbing forcewake will only power up a
1106 * single subslice (the "minconfig") if there isn't a real workload
1107 * that needs to be run; this means that if we steer register reads to
1108 * one of the higher subslices, we run the risk of reading back 0's or
1111 subslice
= __ffs(intel_sseu_get_hsw_subslices(sseu
, 0));
1114 * If the subslice we picked above also steers us to a valid L3 bank,
1115 * then we can just rely on the default steering and won't need to
1116 * worry about explicitly re-steering L3BANK reads later.
1118 if (gt
->info
.l3bank_mask
& BIT(subslice
))
1119 gt
->steering_table
[L3BANK
] = NULL
;
1121 __add_mcr_wa(gt
, wal
, 0, subslice
);
1125 xehp_init_mcr(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1127 const struct sseu_dev_info
*sseu
= >
->info
.sseu
;
1128 unsigned long slice
, subslice
= 0, slice_mask
= 0;
1133 * On Xe_HP the steering increases in complexity. There are now several
1134 * more units that require steering and we're not guaranteed to be able
1135 * to find a common setting for all of them. These are:
1136 * - GSLICE (fusable)
1137 * - DSS (sub-unit within gslice; fusable)
1138 * - L3 Bank (fusable)
1139 * - MSLICE (fusable)
1140 * - LNCF (sub-unit within mslice; always present if mslice is present)
1142 * We'll do our default/implicit steering based on GSLICE (in the
1143 * sliceid field) and DSS (in the subsliceid field). If we can
1144 * find overlap between the valid MSLICE and/or LNCF values with
1145 * a suitable GSLICE, then we can just re-use the default value and
1146 * skip and explicit steering at runtime.
1148 * We only need to look for overlap between GSLICE/MSLICE/LNCF to find
1149 * a valid sliceid value. DSS steering is the only type of steering
1150 * that utilizes the 'subsliceid' bits.
1152 * Also note that, even though the steering domain is called "GSlice"
1153 * and it is encoded in the register using the gslice format, the spec
1154 * says that the combined (geometry | compute) fuse should be used to
1155 * select the steering.
1158 /* Find the potential gslice candidates */
1159 slice_mask
= intel_slicemask_from_xehp_dssmask(sseu
->subslice_mask
,
1160 GEN_DSS_PER_GSLICE
);
1163 * Find the potential LNCF candidates. Either LNCF within a valid
1166 for_each_set_bit(i
, >
->info
.mslice_mask
, GEN12_MAX_MSLICES
)
1167 lncf_mask
|= (0x3 << (i
* 2));
1170 * Are there any sliceid values that work for both GSLICE and LNCF
1173 if (slice_mask
& lncf_mask
) {
1174 slice_mask
&= lncf_mask
;
1175 gt
->steering_table
[LNCF
] = NULL
;
1178 /* How about sliceid values that also work for MSLICE steering? */
1179 if (slice_mask
& gt
->info
.mslice_mask
) {
1180 slice_mask
&= gt
->info
.mslice_mask
;
1181 gt
->steering_table
[MSLICE
] = NULL
;
1184 slice
= __ffs(slice_mask
);
1185 subslice
= intel_sseu_find_first_xehp_dss(sseu
, GEN_DSS_PER_GSLICE
, slice
) %
1188 __add_mcr_wa(gt
, wal
, slice
, subslice
);
1191 * SQIDI ranges are special because they use different steering
1192 * registers than everything else we work with. On XeHP SDV and
1193 * DG2-G10, any value in the steering registers will work fine since
1194 * all instances are present, but DG2-G11 only has SQIDI instances at
1195 * ID's 2 and 3, so we need to steer to one of those. For simplicity
1196 * we'll just steer to a hardcoded "2" since that value will work
1199 __set_mcr_steering(wal
, MCFG_MCR_SELECTOR
, 0, 2);
1200 __set_mcr_steering(wal
, SF_MCR_SELECTOR
, 0, 2);
1204 pvc_init_mcr(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1209 * Setup implicit steering for COMPUTE and DSS ranges to the first
1210 * non-fused-off DSS. All other types of MCR registers will be
1211 * explicitly steered.
1213 dss
= intel_sseu_find_first_xehp_dss(>
->info
.sseu
, 0, 0);
1214 __add_mcr_wa(gt
, wal
, dss
/ GEN_DSS_PER_CSLICE
, dss
% GEN_DSS_PER_CSLICE
);
1218 icl_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1220 struct drm_i915_private
*i915
= gt
->i915
;
1222 icl_wa_init_mcr(gt
, wal
);
1224 /* WaModifyGamTlbPartitioning:icl */
1225 wa_write_clr_set(wal
,
1226 GEN11_GACB_PERF_CTRL
,
1227 GEN11_HASH_CTRL_MASK
,
1228 GEN11_HASH_CTRL_BIT0
| GEN11_HASH_CTRL_BIT4
);
1230 /* Wa_1405766107:icl
1231 * Formerly known as WaCL2SFHalfMaxAlloc
1235 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC
|
1236 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC
);
1239 * Formerly known as WaDisCtxReload
1242 GEN8_GAMW_ECO_DEV_RW_IA
,
1243 GAMW_ECO_DEV_CTX_RELOAD_DISABLE
);
1245 /* Wa_1406463099:icl
1246 * Formerly known as WaGamTlbPendError
1250 GAMT_CHKN_DISABLE_L3_COH_PIPE
);
1252 /* Wa_1407352427:icl,ehl */
1253 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE2
,
1254 PSDUNIT_CLKGATE_DIS
);
1256 /* Wa_1406680159:icl,ehl */
1258 SUBSLICE_UNIT_LEVEL_CLKGATE
,
1259 GWUNIT_CLKGATE_DIS
);
1261 /* Wa_1607087056:icl,ehl,jsl */
1262 if (IS_ICELAKE(i915
) ||
1263 IS_JSL_EHL_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
))
1265 SLICE_UNIT_LEVEL_CLKGATE
,
1266 L3_CLKGATE_DIS
| L3_CR2X_CLKGATE_DIS
);
1269 * This is not a documented workaround, but rather an optimization
1270 * to reduce sampler power.
1272 wa_write_clr(wal
, GEN10_DFR_RATIO_EN_AND_CHICKEN
, DFR_DISABLE
);
1276 * Though there are per-engine instances of these registers,
1277 * they retain their value through engine resets and should
1278 * only be provided on the GT workaround list rather than
1279 * the engine-specific workaround list.
1282 wa_14011060649(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1284 struct intel_engine_cs
*engine
;
1287 for_each_engine(engine
, gt
, id
) {
1288 if (engine
->class != VIDEO_DECODE_CLASS
||
1289 (engine
->instance
% 2))
1292 wa_write_or(wal
, VDBOX_CGCTL3F10(engine
->mmio_base
),
1293 IECPUNIT_CLKGATE_DIS
);
1298 gen12_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1300 icl_wa_init_mcr(gt
, wal
);
1302 /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
1303 wa_14011060649(gt
, wal
);
1305 /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
1306 wa_write_or(wal
, GEN10_DFR_RATIO_EN_AND_CHICKEN
, DFR_DISABLE
);
1310 tgl_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1312 struct drm_i915_private
*i915
= gt
->i915
;
1314 gen12_gt_workarounds_init(gt
, wal
);
1316 /* Wa_1409420604:tgl */
1317 if (IS_TGL_UY_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
))
1319 SUBSLICE_UNIT_LEVEL_CLKGATE2
,
1320 CPSSUNIT_CLKGATE_DIS
);
1322 /* Wa_1607087056:tgl also know as BUG:1409180338 */
1323 if (IS_TGL_UY_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
))
1325 SLICE_UNIT_LEVEL_CLKGATE
,
1326 L3_CLKGATE_DIS
| L3_CR2X_CLKGATE_DIS
);
1328 /* Wa_1408615072:tgl[a0] */
1329 if (IS_TGL_UY_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
))
1330 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE2
,
1331 VSUNIT_CLKGATE_DIS_TGL
);
1335 dg1_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1337 struct drm_i915_private
*i915
= gt
->i915
;
1339 gen12_gt_workarounds_init(gt
, wal
);
1341 /* Wa_1607087056:dg1 */
1342 if (IS_DG1_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
))
1344 SLICE_UNIT_LEVEL_CLKGATE
,
1345 L3_CLKGATE_DIS
| L3_CR2X_CLKGATE_DIS
);
1347 /* Wa_1409420604:dg1 */
1350 SUBSLICE_UNIT_LEVEL_CLKGATE2
,
1351 CPSSUNIT_CLKGATE_DIS
);
1353 /* Wa_1408615072:dg1 */
1354 /* Empirical testing shows this register is unaffected by engine reset. */
1356 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE2
,
1357 VSUNIT_CLKGATE_DIS_TGL
);
1361 xehpsdv_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1363 struct drm_i915_private
*i915
= gt
->i915
;
1365 xehp_init_mcr(gt
, wal
);
1367 /* Wa_1409757795:xehpsdv */
1368 wa_write_or(wal
, SCCGCTL94DC
, CG3DDISURB
);
1370 /* Wa_16011155590:xehpsdv */
1371 if (IS_XEHPSDV_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
))
1372 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE
,
1373 TSGUNIT_CLKGATE_DIS
);
1375 /* Wa_14011780169:xehpsdv */
1376 if (IS_XEHPSDV_GRAPHICS_STEP(i915
, STEP_B0
, STEP_FOREVER
)) {
1377 wa_write_or(wal
, UNSLCGCTL9440
, GAMTLBOACS_CLKGATE_DIS
|
1378 GAMTLBVDBOX7_CLKGATE_DIS
|
1379 GAMTLBVDBOX6_CLKGATE_DIS
|
1380 GAMTLBVDBOX5_CLKGATE_DIS
|
1381 GAMTLBVDBOX4_CLKGATE_DIS
|
1382 GAMTLBVDBOX3_CLKGATE_DIS
|
1383 GAMTLBVDBOX2_CLKGATE_DIS
|
1384 GAMTLBVDBOX1_CLKGATE_DIS
|
1385 GAMTLBVDBOX0_CLKGATE_DIS
|
1386 GAMTLBKCR_CLKGATE_DIS
|
1387 GAMTLBGUC_CLKGATE_DIS
|
1388 GAMTLBBLT_CLKGATE_DIS
);
1389 wa_write_or(wal
, UNSLCGCTL9444
, GAMTLBGFXA0_CLKGATE_DIS
|
1390 GAMTLBGFXA1_CLKGATE_DIS
|
1391 GAMTLBCOMPA0_CLKGATE_DIS
|
1392 GAMTLBCOMPA1_CLKGATE_DIS
|
1393 GAMTLBCOMPB0_CLKGATE_DIS
|
1394 GAMTLBCOMPB1_CLKGATE_DIS
|
1395 GAMTLBCOMPC0_CLKGATE_DIS
|
1396 GAMTLBCOMPC1_CLKGATE_DIS
|
1397 GAMTLBCOMPD0_CLKGATE_DIS
|
1398 GAMTLBCOMPD1_CLKGATE_DIS
|
1399 GAMTLBMERT_CLKGATE_DIS
|
1400 GAMTLBVEBOX3_CLKGATE_DIS
|
1401 GAMTLBVEBOX2_CLKGATE_DIS
|
1402 GAMTLBVEBOX1_CLKGATE_DIS
|
1403 GAMTLBVEBOX0_CLKGATE_DIS
);
1406 /* Wa_16012725990:xehpsdv */
1407 if (IS_XEHPSDV_GRAPHICS_STEP(i915
, STEP_A1
, STEP_FOREVER
))
1408 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE
, VFUNIT_CLKGATE_DIS
);
1410 /* Wa_14011060649:xehpsdv */
1411 wa_14011060649(gt
, wal
);
1415 dg2_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1417 struct intel_engine_cs
*engine
;
1420 xehp_init_mcr(gt
, wal
);
1422 /* Wa_14011060649:dg2 */
1423 wa_14011060649(gt
, wal
);
1426 * Although there are per-engine instances of these registers,
1427 * they technically exist outside the engine itself and are not
1428 * impacted by engine resets. Furthermore, they're part of the
1429 * GuC blacklist so trying to treat them as engine workarounds
1430 * will result in GuC initialization failure and a wedged GPU.
1432 for_each_engine(engine
, gt
, id
) {
1433 if (engine
->class != VIDEO_DECODE_CLASS
)
1436 /* Wa_16010515920:dg2_g10 */
1437 if (IS_DG2_GRAPHICS_STEP(gt
->i915
, G10
, STEP_A0
, STEP_B0
))
1438 wa_write_or(wal
, VDBOX_CGCTL3F18(engine
->mmio_base
),
1439 ALNUNIT_CLKGATE_DIS
);
1442 if (IS_DG2_G10(gt
->i915
)) {
1443 /* Wa_22010523718:dg2 */
1444 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE
,
1445 CG3DDISCFEG_CLKGATE_DIS
);
1447 /* Wa_14011006942:dg2 */
1448 wa_write_or(wal
, SUBSLICE_UNIT_LEVEL_CLKGATE
,
1449 DSS_ROUTER_CLKGATE_DIS
);
1452 if (IS_DG2_GRAPHICS_STEP(gt
->i915
, G10
, STEP_A0
, STEP_B0
)) {
1453 /* Wa_14010948348:dg2_g10 */
1454 wa_write_or(wal
, UNSLCGCTL9430
, MSQDUNIT_CLKGATE_DIS
);
1456 /* Wa_14011037102:dg2_g10 */
1457 wa_write_or(wal
, UNSLCGCTL9444
, LTCDD_CLKGATE_DIS
);
1459 /* Wa_14011371254:dg2_g10 */
1460 wa_write_or(wal
, SLICE_UNIT_LEVEL_CLKGATE
, NODEDSS_CLKGATE_DIS
);
1462 /* Wa_14011431319:dg2_g10 */
1463 wa_write_or(wal
, UNSLCGCTL9440
, GAMTLBOACS_CLKGATE_DIS
|
1464 GAMTLBVDBOX7_CLKGATE_DIS
|
1465 GAMTLBVDBOX6_CLKGATE_DIS
|
1466 GAMTLBVDBOX5_CLKGATE_DIS
|
1467 GAMTLBVDBOX4_CLKGATE_DIS
|
1468 GAMTLBVDBOX3_CLKGATE_DIS
|
1469 GAMTLBVDBOX2_CLKGATE_DIS
|
1470 GAMTLBVDBOX1_CLKGATE_DIS
|
1471 GAMTLBVDBOX0_CLKGATE_DIS
|
1472 GAMTLBKCR_CLKGATE_DIS
|
1473 GAMTLBGUC_CLKGATE_DIS
|
1474 GAMTLBBLT_CLKGATE_DIS
);
1475 wa_write_or(wal
, UNSLCGCTL9444
, GAMTLBGFXA0_CLKGATE_DIS
|
1476 GAMTLBGFXA1_CLKGATE_DIS
|
1477 GAMTLBCOMPA0_CLKGATE_DIS
|
1478 GAMTLBCOMPA1_CLKGATE_DIS
|
1479 GAMTLBCOMPB0_CLKGATE_DIS
|
1480 GAMTLBCOMPB1_CLKGATE_DIS
|
1481 GAMTLBCOMPC0_CLKGATE_DIS
|
1482 GAMTLBCOMPC1_CLKGATE_DIS
|
1483 GAMTLBCOMPD0_CLKGATE_DIS
|
1484 GAMTLBCOMPD1_CLKGATE_DIS
|
1485 GAMTLBMERT_CLKGATE_DIS
|
1486 GAMTLBVEBOX3_CLKGATE_DIS
|
1487 GAMTLBVEBOX2_CLKGATE_DIS
|
1488 GAMTLBVEBOX1_CLKGATE_DIS
|
1489 GAMTLBVEBOX0_CLKGATE_DIS
);
1491 /* Wa_14010569222:dg2_g10 */
1492 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE
,
1493 GAMEDIA_CLKGATE_DIS
);
1495 /* Wa_14011028019:dg2_g10 */
1496 wa_write_or(wal
, SSMCGCTL9530
, RTFUNIT_CLKGATE_DIS
);
1499 /* Wa_14014830051:dg2 */
1500 wa_write_clr(wal
, SARB_CHICKEN1
, COMP_CKN_IN
);
1503 * The following are not actually "workarounds" but rather
1504 * recommended tuning settings documented in the bspec's
1505 * performance guide section.
1507 wa_write_or(wal
, GEN12_SQCM
, EN_32B_ACCESS
);
1509 /* Wa_14015795083 */
1510 wa_write_clr(wal
, GEN7_MISCCPCTL
, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE
);
1514 pvc_gt_workarounds_init(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1516 pvc_init_mcr(gt
, wal
);
1518 /* Wa_14015795083 */
1519 wa_write_clr(wal
, GEN7_MISCCPCTL
, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE
);
1523 gt_init_workarounds(struct intel_gt
*gt
, struct i915_wa_list
*wal
)
1525 struct drm_i915_private
*i915
= gt
->i915
;
1527 if (IS_PONTEVECCHIO(i915
))
1528 pvc_gt_workarounds_init(gt
, wal
);
1529 else if (IS_DG2(i915
))
1530 dg2_gt_workarounds_init(gt
, wal
);
1531 else if (IS_XEHPSDV(i915
))
1532 xehpsdv_gt_workarounds_init(gt
, wal
);
1533 else if (IS_DG1(i915
))
1534 dg1_gt_workarounds_init(gt
, wal
);
1535 else if (IS_TIGERLAKE(i915
))
1536 tgl_gt_workarounds_init(gt
, wal
);
1537 else if (GRAPHICS_VER(i915
) == 12)
1538 gen12_gt_workarounds_init(gt
, wal
);
1539 else if (GRAPHICS_VER(i915
) == 11)
1540 icl_gt_workarounds_init(gt
, wal
);
1541 else if (IS_COFFEELAKE(i915
) || IS_COMETLAKE(i915
))
1542 cfl_gt_workarounds_init(gt
, wal
);
1543 else if (IS_GEMINILAKE(i915
))
1544 glk_gt_workarounds_init(gt
, wal
);
1545 else if (IS_KABYLAKE(i915
))
1546 kbl_gt_workarounds_init(gt
, wal
);
1547 else if (IS_BROXTON(i915
))
1548 gen9_gt_workarounds_init(gt
, wal
);
1549 else if (IS_SKYLAKE(i915
))
1550 skl_gt_workarounds_init(gt
, wal
);
1551 else if (IS_HASWELL(i915
))
1552 hsw_gt_workarounds_init(gt
, wal
);
1553 else if (IS_VALLEYVIEW(i915
))
1554 vlv_gt_workarounds_init(gt
, wal
);
1555 else if (IS_IVYBRIDGE(i915
))
1556 ivb_gt_workarounds_init(gt
, wal
);
1557 else if (GRAPHICS_VER(i915
) == 6)
1558 snb_gt_workarounds_init(gt
, wal
);
1559 else if (GRAPHICS_VER(i915
) == 5)
1560 ilk_gt_workarounds_init(gt
, wal
);
1561 else if (IS_G4X(i915
))
1562 g4x_gt_workarounds_init(gt
, wal
);
1563 else if (GRAPHICS_VER(i915
) == 4)
1564 gen4_gt_workarounds_init(gt
, wal
);
1565 else if (GRAPHICS_VER(i915
) <= 8)
1568 MISSING_CASE(GRAPHICS_VER(i915
));
1571 void intel_gt_init_workarounds(struct intel_gt
*gt
)
1573 struct i915_wa_list
*wal
= >
->wa_list
;
1575 wa_init_start(wal
, "GT", "global");
1576 gt_init_workarounds(gt
, wal
);
1577 wa_init_finish(wal
);
1580 static enum forcewake_domains
1581 wal_get_fw_for_rmw(struct intel_uncore
*uncore
, const struct i915_wa_list
*wal
)
1583 enum forcewake_domains fw
= 0;
1587 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++)
1588 fw
|= intel_uncore_forcewake_for_reg(uncore
,
1597 wa_verify(const struct i915_wa
*wa
, u32 cur
, const char *name
, const char *from
)
1599 if ((cur
^ wa
->set
) & wa
->read
) {
1600 DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1601 name
, from
, i915_mmio_reg_offset(wa
->reg
),
1602 cur
, cur
& wa
->read
, wa
->set
& wa
->read
);
1611 wa_list_apply(struct intel_gt
*gt
, const struct i915_wa_list
*wal
)
1613 struct intel_uncore
*uncore
= gt
->uncore
;
1614 enum forcewake_domains fw
;
1615 unsigned long flags
;
1622 fw
= wal_get_fw_for_rmw(uncore
, wal
);
1624 spin_lock_irqsave(&uncore
->lock
, flags
);
1625 intel_uncore_forcewake_get__locked(uncore
, fw
);
1627 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++) {
1630 /* open-coded rmw due to steering */
1631 old
= wa
->clr
? intel_gt_mcr_read_any_fw(gt
, wa
->reg
) : 0;
1632 val
= (old
& ~wa
->clr
) | wa
->set
;
1633 if (val
!= old
|| !wa
->clr
)
1634 intel_uncore_write_fw(uncore
, wa
->reg
, val
);
1636 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM
))
1637 wa_verify(wa
, intel_gt_mcr_read_any_fw(gt
, wa
->reg
),
1638 wal
->name
, "application");
1641 intel_uncore_forcewake_put__locked(uncore
, fw
);
1642 spin_unlock_irqrestore(&uncore
->lock
, flags
);
1645 void intel_gt_apply_workarounds(struct intel_gt
*gt
)
1647 wa_list_apply(gt
, >
->wa_list
);
1650 static bool wa_list_verify(struct intel_gt
*gt
,
1651 const struct i915_wa_list
*wal
,
1654 struct intel_uncore
*uncore
= gt
->uncore
;
1656 enum forcewake_domains fw
;
1657 unsigned long flags
;
1661 fw
= wal_get_fw_for_rmw(uncore
, wal
);
1663 spin_lock_irqsave(&uncore
->lock
, flags
);
1664 intel_uncore_forcewake_get__locked(uncore
, fw
);
1666 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++)
1668 intel_gt_mcr_read_any_fw(gt
, wa
->reg
),
1671 intel_uncore_forcewake_put__locked(uncore
, fw
);
1672 spin_unlock_irqrestore(&uncore
->lock
, flags
);
1677 bool intel_gt_verify_workarounds(struct intel_gt
*gt
, const char *from
)
1679 return wa_list_verify(gt
, >
->wa_list
, from
);
1683 static bool is_nonpriv_flags_valid(u32 flags
)
1685 /* Check only valid flag bits are set */
1686 if (flags
& ~RING_FORCE_TO_NONPRIV_MASK_VALID
)
1689 /* NB: Only 3 out of 4 enum values are valid for access field */
1690 if ((flags
& RING_FORCE_TO_NONPRIV_ACCESS_MASK
) ==
1691 RING_FORCE_TO_NONPRIV_ACCESS_INVALID
)
1698 whitelist_reg_ext(struct i915_wa_list
*wal
, i915_reg_t reg
, u32 flags
)
1700 struct i915_wa wa
= {
1704 if (GEM_DEBUG_WARN_ON(wal
->count
>= RING_MAX_NONPRIV_SLOTS
))
1707 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags
)))
1710 wa
.reg
.reg
|= flags
;
1715 whitelist_reg(struct i915_wa_list
*wal
, i915_reg_t reg
)
1717 whitelist_reg_ext(wal
, reg
, RING_FORCE_TO_NONPRIV_ACCESS_RW
);
1720 static void gen9_whitelist_build(struct i915_wa_list
*w
)
1722 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1723 whitelist_reg(w
, GEN9_CTX_PREEMPT_REG
);
1725 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1726 whitelist_reg(w
, GEN8_CS_CHICKEN1
);
1728 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1729 whitelist_reg(w
, GEN8_HDC_CHICKEN1
);
1731 /* WaSendPushConstantsFromMMIO:skl,bxt */
1732 whitelist_reg(w
, COMMON_SLICE_CHICKEN2
);
1735 static void skl_whitelist_build(struct intel_engine_cs
*engine
)
1737 struct i915_wa_list
*w
= &engine
->whitelist
;
1739 if (engine
->class != RENDER_CLASS
)
1742 gen9_whitelist_build(w
);
1744 /* WaDisableLSQCROPERFforOCL:skl */
1745 whitelist_reg(w
, GEN8_L3SQCREG4
);
1748 static void bxt_whitelist_build(struct intel_engine_cs
*engine
)
1750 if (engine
->class != RENDER_CLASS
)
1753 gen9_whitelist_build(&engine
->whitelist
);
1756 static void kbl_whitelist_build(struct intel_engine_cs
*engine
)
1758 struct i915_wa_list
*w
= &engine
->whitelist
;
1760 if (engine
->class != RENDER_CLASS
)
1763 gen9_whitelist_build(w
);
1765 /* WaDisableLSQCROPERFforOCL:kbl */
1766 whitelist_reg(w
, GEN8_L3SQCREG4
);
1769 static void glk_whitelist_build(struct intel_engine_cs
*engine
)
1771 struct i915_wa_list
*w
= &engine
->whitelist
;
1773 if (engine
->class != RENDER_CLASS
)
1776 gen9_whitelist_build(w
);
1778 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1779 whitelist_reg(w
, GEN9_SLICE_COMMON_ECO_CHICKEN1
);
1782 static void cfl_whitelist_build(struct intel_engine_cs
*engine
)
1784 struct i915_wa_list
*w
= &engine
->whitelist
;
1786 if (engine
->class != RENDER_CLASS
)
1789 gen9_whitelist_build(w
);
1792 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1794 * This covers 4 register which are next to one another :
1795 * - PS_INVOCATION_COUNT
1796 * - PS_INVOCATION_COUNT_UDW
1798 * - PS_DEPTH_COUNT_UDW
1800 whitelist_reg_ext(w
, PS_INVOCATION_COUNT
,
1801 RING_FORCE_TO_NONPRIV_ACCESS_RD
|
1802 RING_FORCE_TO_NONPRIV_RANGE_4
);
1805 static void allow_read_ctx_timestamp(struct intel_engine_cs
*engine
)
1807 struct i915_wa_list
*w
= &engine
->whitelist
;
1809 if (engine
->class != RENDER_CLASS
)
1810 whitelist_reg_ext(w
,
1811 RING_CTX_TIMESTAMP(engine
->mmio_base
),
1812 RING_FORCE_TO_NONPRIV_ACCESS_RD
);
1815 static void cml_whitelist_build(struct intel_engine_cs
*engine
)
1817 allow_read_ctx_timestamp(engine
);
1819 cfl_whitelist_build(engine
);
1822 static void icl_whitelist_build(struct intel_engine_cs
*engine
)
1824 struct i915_wa_list
*w
= &engine
->whitelist
;
1826 allow_read_ctx_timestamp(engine
);
1828 switch (engine
->class) {
1830 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1831 whitelist_reg(w
, GEN9_HALF_SLICE_CHICKEN7
);
1833 /* WaAllowUMDToModifySamplerMode:icl */
1834 whitelist_reg(w
, GEN10_SAMPLER_MODE
);
1836 /* WaEnableStateCacheRedirectToCS:icl */
1837 whitelist_reg(w
, GEN9_SLICE_COMMON_ECO_CHICKEN1
);
1840 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1842 * This covers 4 register which are next to one another :
1843 * - PS_INVOCATION_COUNT
1844 * - PS_INVOCATION_COUNT_UDW
1846 * - PS_DEPTH_COUNT_UDW
1848 whitelist_reg_ext(w
, PS_INVOCATION_COUNT
,
1849 RING_FORCE_TO_NONPRIV_ACCESS_RD
|
1850 RING_FORCE_TO_NONPRIV_RANGE_4
);
1853 case VIDEO_DECODE_CLASS
:
1854 /* hucStatusRegOffset */
1855 whitelist_reg_ext(w
, _MMIO(0x2000 + engine
->mmio_base
),
1856 RING_FORCE_TO_NONPRIV_ACCESS_RD
);
1857 /* hucUKernelHdrInfoRegOffset */
1858 whitelist_reg_ext(w
, _MMIO(0x2014 + engine
->mmio_base
),
1859 RING_FORCE_TO_NONPRIV_ACCESS_RD
);
1860 /* hucStatus2RegOffset */
1861 whitelist_reg_ext(w
, _MMIO(0x23B0 + engine
->mmio_base
),
1862 RING_FORCE_TO_NONPRIV_ACCESS_RD
);
1870 static void tgl_whitelist_build(struct intel_engine_cs
*engine
)
1872 struct i915_wa_list
*w
= &engine
->whitelist
;
1874 allow_read_ctx_timestamp(engine
);
1876 switch (engine
->class) {
1879 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1882 * This covers 4 registers which are next to one another :
1883 * - PS_INVOCATION_COUNT
1884 * - PS_INVOCATION_COUNT_UDW
1886 * - PS_DEPTH_COUNT_UDW
1888 whitelist_reg_ext(w
, PS_INVOCATION_COUNT
,
1889 RING_FORCE_TO_NONPRIV_ACCESS_RD
|
1890 RING_FORCE_TO_NONPRIV_RANGE_4
);
1894 * Wa_14012131227:dg1
1895 * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
1897 whitelist_reg(w
, GEN7_COMMON_SLICE_CHICKEN1
);
1899 /* Wa_1806527549:tgl */
1900 whitelist_reg(w
, HIZ_CHICKEN
);
1907 static void dg1_whitelist_build(struct intel_engine_cs
*engine
)
1909 struct i915_wa_list
*w
= &engine
->whitelist
;
1911 tgl_whitelist_build(engine
);
1913 /* GEN:BUG:1409280441:dg1 */
1914 if (IS_DG1_GRAPHICS_STEP(engine
->i915
, STEP_A0
, STEP_B0
) &&
1915 (engine
->class == RENDER_CLASS
||
1916 engine
->class == COPY_ENGINE_CLASS
))
1917 whitelist_reg_ext(w
, RING_ID(engine
->mmio_base
),
1918 RING_FORCE_TO_NONPRIV_ACCESS_RD
);
1921 static void xehpsdv_whitelist_build(struct intel_engine_cs
*engine
)
1923 allow_read_ctx_timestamp(engine
);
1926 static void dg2_whitelist_build(struct intel_engine_cs
*engine
)
1928 struct i915_wa_list
*w
= &engine
->whitelist
;
1930 allow_read_ctx_timestamp(engine
);
1932 switch (engine
->class) {
1935 * Wa_1507100340:dg2_g10
1937 * This covers 4 registers which are next to one another :
1938 * - PS_INVOCATION_COUNT
1939 * - PS_INVOCATION_COUNT_UDW
1941 * - PS_DEPTH_COUNT_UDW
1943 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_A0
, STEP_B0
))
1944 whitelist_reg_ext(w
, PS_INVOCATION_COUNT
,
1945 RING_FORCE_TO_NONPRIV_ACCESS_RD
|
1946 RING_FORCE_TO_NONPRIV_RANGE_4
);
1950 /* Wa_16011157294:dg2_g10 */
1951 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_A0
, STEP_B0
))
1952 whitelist_reg(w
, GEN9_CTX_PREEMPT_REG
);
1959 static void blacklist_trtt(struct intel_engine_cs
*engine
)
1961 struct i915_wa_list
*w
= &engine
->whitelist
;
1964 * Prevent read/write access to [0x4400, 0x4600) which covers
1965 * the TRTT range across all engines. Note that normally userspace
1966 * cannot access the other engines' trtt control, but for simplicity
1967 * we cover the entire range on each engine.
1969 whitelist_reg_ext(w
, _MMIO(0x4400),
1970 RING_FORCE_TO_NONPRIV_DENY
|
1971 RING_FORCE_TO_NONPRIV_RANGE_64
);
1972 whitelist_reg_ext(w
, _MMIO(0x4500),
1973 RING_FORCE_TO_NONPRIV_DENY
|
1974 RING_FORCE_TO_NONPRIV_RANGE_64
);
1977 static void pvc_whitelist_build(struct intel_engine_cs
*engine
)
1979 allow_read_ctx_timestamp(engine
);
1981 /* Wa_16014440446:pvc */
1982 blacklist_trtt(engine
);
1985 void intel_engine_init_whitelist(struct intel_engine_cs
*engine
)
1987 struct drm_i915_private
*i915
= engine
->i915
;
1988 struct i915_wa_list
*w
= &engine
->whitelist
;
1990 wa_init_start(w
, "whitelist", engine
->name
);
1992 if (IS_PONTEVECCHIO(i915
))
1993 pvc_whitelist_build(engine
);
1994 else if (IS_DG2(i915
))
1995 dg2_whitelist_build(engine
);
1996 else if (IS_XEHPSDV(i915
))
1997 xehpsdv_whitelist_build(engine
);
1998 else if (IS_DG1(i915
))
1999 dg1_whitelist_build(engine
);
2000 else if (GRAPHICS_VER(i915
) == 12)
2001 tgl_whitelist_build(engine
);
2002 else if (GRAPHICS_VER(i915
) == 11)
2003 icl_whitelist_build(engine
);
2004 else if (IS_COMETLAKE(i915
))
2005 cml_whitelist_build(engine
);
2006 else if (IS_COFFEELAKE(i915
))
2007 cfl_whitelist_build(engine
);
2008 else if (IS_GEMINILAKE(i915
))
2009 glk_whitelist_build(engine
);
2010 else if (IS_KABYLAKE(i915
))
2011 kbl_whitelist_build(engine
);
2012 else if (IS_BROXTON(i915
))
2013 bxt_whitelist_build(engine
);
2014 else if (IS_SKYLAKE(i915
))
2015 skl_whitelist_build(engine
);
2016 else if (GRAPHICS_VER(i915
) <= 8)
2019 MISSING_CASE(GRAPHICS_VER(i915
));
2024 void intel_engine_apply_whitelist(struct intel_engine_cs
*engine
)
2026 const struct i915_wa_list
*wal
= &engine
->whitelist
;
2027 struct intel_uncore
*uncore
= engine
->uncore
;
2028 const u32 base
= engine
->mmio_base
;
2035 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++)
2036 intel_uncore_write(uncore
,
2037 RING_FORCE_TO_NONPRIV(base
, i
),
2038 i915_mmio_reg_offset(wa
->reg
));
2040 /* And clear the rest just in case of garbage */
2041 for (; i
< RING_MAX_NONPRIV_SLOTS
; i
++)
2042 intel_uncore_write(uncore
,
2043 RING_FORCE_TO_NONPRIV(base
, i
),
2044 i915_mmio_reg_offset(RING_NOPID(base
)));
2048 * engine_fake_wa_init(), a place holder to program the registers
2049 * which are not part of an official workaround defined by the
2051 * Adding programming of those register inside workaround will
2052 * allow utilizing wa framework to proper application and verification.
2055 engine_fake_wa_init(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2060 * RING_CMD_CCTL specifies the default MOCS entry that will be used
2061 * by the command streamer when executing commands that don't have
2062 * a way to explicitly specify a MOCS setting. The default should
2063 * usually reference whichever MOCS entry corresponds to uncached
2064 * behavior, although use of a WB cached entry is recommended by the
2065 * spec in certain circumstances on specific platforms.
2067 if (GRAPHICS_VER(engine
->i915
) >= 12) {
2068 mocs_r
= engine
->gt
->mocs
.uc_index
;
2069 mocs_w
= engine
->gt
->mocs
.uc_index
;
2071 if (HAS_L3_CCS_READ(engine
->i915
) &&
2072 engine
->class == COMPUTE_CLASS
) {
2073 mocs_r
= engine
->gt
->mocs
.wb_index
;
2076 * Even on the few platforms where MOCS 0 is a
2077 * legitimate table entry, it's never the correct
2078 * setting to use here; we can assume the MOCS init
2079 * just forgot to initialize wb_index.
2081 drm_WARN_ON(&engine
->i915
->drm
, mocs_r
== 0);
2084 wa_masked_field_set(wal
,
2085 RING_CMD_CCTL(engine
->mmio_base
),
2087 CMD_CCTL_MOCS_OVERRIDE(mocs_w
, mocs_r
));
2091 static bool needs_wa_1308578152(struct intel_engine_cs
*engine
)
2093 return intel_sseu_find_first_xehp_dss(&engine
->gt
->info
.sseu
, 0, 0) >=
2098 rcs_engine_wa_init(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2100 struct drm_i915_private
*i915
= engine
->i915
;
2103 /* Wa_1509235366:dg2 */
2104 wa_write_or(wal
, GEN12_GAMCNTRL_CTRL
, INVALIDATION_BROADCAST_MODE_DIS
|
2105 GLOBAL_INVALIDATION_MODE
);
2108 if (IS_DG2_GRAPHICS_STEP(i915
, G11
, STEP_A0
, STEP_B0
)) {
2109 /* Wa_14013392000:dg2_g11 */
2110 wa_masked_en(wal
, GEN7_ROW_CHICKEN2
, GEN12_ENABLE_LARGE_GRF_MODE
);
2112 /* Wa_16011620976:dg2_g11 */
2113 wa_write_or(wal
, LSC_CHICKEN_BIT_0_UDW
, DIS_CHAIN_2XSIMD8
);
2116 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_FOREVER
) ||
2117 IS_DG2_G11(i915
) || IS_DG2_G12(i915
)) {
2118 /* Wa_1509727124:dg2 */
2119 wa_masked_en(wal
, GEN10_SAMPLER_MODE
,
2120 SC_DISABLE_POWER_OPTIMIZATION_EBB
);
2123 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_A0
, STEP_B0
) ||
2124 IS_DG2_GRAPHICS_STEP(i915
, G11
, STEP_A0
, STEP_B0
)) {
2125 /* Wa_14012419201:dg2 */
2126 wa_masked_en(wal
, GEN9_ROW_CHICKEN4
,
2127 GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX
);
2130 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_C0
) ||
2133 * Wa_22012826095:dg2
2134 * Wa_22013059131:dg2
2136 wa_write_clr_set(wal
, LSC_CHICKEN_BIT_0_UDW
,
2138 REG_FIELD_PREP(MAXREQS_PER_BANK
, 2));
2140 /* Wa_22013059131:dg2 */
2141 wa_write_or(wal
, LSC_CHICKEN_BIT_0
,
2142 FORCE_1_SUB_MESSAGE_PER_FRAGMENT
);
2145 /* Wa_1308578152:dg2_g10 when first gslice is fused off */
2146 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_C0
) &&
2147 needs_wa_1308578152(engine
)) {
2148 wa_masked_dis(wal
, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON
,
2149 GEN12_REPLAY_MODE_GRANULARITY
);
2152 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_FOREVER
) ||
2153 IS_DG2_G11(i915
) || IS_DG2_G12(i915
)) {
2154 /* Wa_22013037850:dg2 */
2155 wa_write_or(wal
, LSC_CHICKEN_BIT_0_UDW
,
2156 DISABLE_128B_EVICTION_COMMAND_UDW
);
2158 /* Wa_22012856258:dg2 */
2159 wa_masked_en(wal
, GEN7_ROW_CHICKEN2
,
2160 GEN12_DISABLE_READ_SUPPRESSION
);
2163 * Wa_22010960976:dg2
2164 * Wa_14013347512:dg2
2166 wa_masked_dis(wal
, GEN12_HDC_CHICKEN0
,
2167 LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK
);
2170 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_A0
, STEP_B0
)) {
2172 * Wa_1608949956:dg2_g10
2173 * Wa_14010198302:dg2_g10
2175 wa_masked_en(wal
, GEN8_ROW_CHICKEN
,
2176 MDQ_ARBITRATION_MODE
| UGM_BACKUP_MODE
);
2179 * Wa_14010918519:dg2_g10
2181 * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
2182 * so ignoring verification.
2184 wa_add(wal
, LSC_CHICKEN_BIT_0_UDW
, 0,
2185 FORCE_SLM_FENCE_SCOPE_TO_TILE
| FORCE_UGM_FENCE_SCOPE_TO_TILE
,
2189 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_A0
, STEP_B0
)) {
2190 /* Wa_22010430635:dg2 */
2193 GEN12_DISABLE_GRF_CLEAR
);
2195 /* Wa_14010648519:dg2 */
2196 wa_write_or(wal
, XEHP_L3NODEARBCFG
, XEHP_LNESPARE
);
2199 /* Wa_14013202645:dg2 */
2200 if (IS_DG2_GRAPHICS_STEP(i915
, G10
, STEP_B0
, STEP_C0
) ||
2201 IS_DG2_GRAPHICS_STEP(i915
, G11
, STEP_A0
, STEP_B0
))
2202 wa_write_or(wal
, RT_CTRL
, DIS_NULL_QUERY
);
2204 /* Wa_22012532006:dg2 */
2205 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_A0
, STEP_C0
) ||
2206 IS_DG2_GRAPHICS_STEP(engine
->i915
, G11
, STEP_A0
, STEP_B0
))
2207 wa_masked_en(wal
, GEN9_HALF_SLICE_CHICKEN7
,
2208 DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA
);
2210 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_A0
, STEP_B0
)) {
2211 /* Wa_14010680813:dg2_g10 */
2212 wa_write_or(wal
, GEN12_GAMSTLB_CTRL
, CONTROL_BLOCK_CLKGATE_DIS
|
2213 EGRESS_BLOCK_CLKGATE_DIS
| TAG_BLOCK_CLKGATE_DIS
);
2216 if (IS_DG2_GRAPHICS_STEP(engine
->i915
, G10
, STEP_A0
, STEP_B0
) ||
2217 IS_DG2_GRAPHICS_STEP(engine
->i915
, G11
, STEP_A0
, STEP_B0
)) {
2218 /* Wa_14012362059:dg2 */
2219 wa_write_or(wal
, GEN12_MERT_MOD_CTRL
, FORCE_MISS_FTLB
);
2222 if (IS_DG2_GRAPHICS_STEP(i915
, G11
, STEP_B0
, STEP_FOREVER
) ||
2224 /* Wa_22014600077:dg2 */
2225 wa_add(wal
, GEN10_CACHE_MODE_SS
, 0,
2226 _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH
),
2227 0 /* Wa_14012342262 :write-only reg, so skip
2232 if (IS_DG1_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
) ||
2233 IS_TGL_UY_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
)) {
2235 * Wa_1607138336:tgl[a0],dg1[a0]
2236 * Wa_1607063988:tgl[a0],dg1[a0]
2239 GEN9_CTX_PREEMPT_REG
,
2240 GEN12_DISABLE_POSH_BUSY_FF_DOP_CG
);
2243 if (IS_TGL_UY_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
)) {
2246 * (see also Wa_1606682166:icl)
2250 GEN7_DISABLE_SAMPLER_PREFETCH
);
2253 if (IS_ALDERLAKE_P(i915
) || IS_ALDERLAKE_S(i915
) || IS_DG1(i915
) ||
2254 IS_ROCKETLAKE(i915
) || IS_TIGERLAKE(i915
)) {
2255 /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
2256 wa_masked_en(wal
, GEN7_ROW_CHICKEN2
, GEN12_DISABLE_EARLY_READ
);
2259 * Wa_1407928979:tgl A*
2260 * Wa_18011464164:tgl[B0+],dg1[B0+]
2261 * Wa_22010931296:tgl[B0+],dg1[B0+]
2262 * Wa_14010919138:rkl,dg1,adl-s,adl-p
2264 wa_write_or(wal
, GEN7_FF_THREAD_MODE
,
2265 GEN12_FF_TESSELATION_DOP_GATE_DISABLE
);
2268 if (IS_ALDERLAKE_P(i915
) || IS_DG2(i915
) || IS_ALDERLAKE_S(i915
) ||
2269 IS_DG1(i915
) || IS_ROCKETLAKE(i915
) || IS_TIGERLAKE(i915
)) {
2271 * Wa_1606700617:tgl,dg1,adl-p
2272 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
2273 * Wa_14010826681:tgl,dg1,rkl,adl-p
2274 * Wa_18019627453:dg2
2277 GEN9_CS_DEBUG_MODE1
,
2278 FF_DOP_CLOCK_GATE_DISABLE
);
2281 if (IS_ALDERLAKE_P(i915
) || IS_ALDERLAKE_S(i915
) ||
2282 IS_DG1_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
) ||
2283 IS_ROCKETLAKE(i915
) || IS_TIGERLAKE(i915
)) {
2284 /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
2285 wa_masked_en(wal
, GEN7_ROW_CHICKEN2
,
2286 GEN12_PUSH_CONST_DEREF_HOLD_DIS
);
2290 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
2292 wa_masked_en(wal
, GEN9_ROW_CHICKEN4
, GEN12_DISABLE_TDL_PUSH
);
2295 if (IS_DG1_GRAPHICS_STEP(i915
, STEP_A0
, STEP_B0
) ||
2296 IS_ROCKETLAKE(i915
) || IS_TIGERLAKE(i915
)) {
2300 * Wa_1607297627:tgl,rkl,dg1[a0]
2302 * On TGL and RKL there are multiple entries for this WA in the
2303 * BSpec; some indicate this is an A0-only WA, others indicate
2304 * it applies to all steppings so we trust the "all steppings."
2305 * For DG1 this only applies to A0.
2308 RING_PSMI_CTL(RENDER_RING_BASE
),
2309 GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE
|
2310 GEN8_RC_SEMA_IDLE_MSG_DISABLE
);
2313 if (IS_DG1(i915
) || IS_ROCKETLAKE(i915
) || IS_TIGERLAKE(i915
) ||
2314 IS_ALDERLAKE_S(i915
) || IS_ALDERLAKE_P(i915
)) {
2315 /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
2321 if (GRAPHICS_VER(i915
) == 11) {
2322 /* This is not an Wa. Enable for better image quality */
2325 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE
);
2329 * Formerly known as WaGAPZPriorityScheme
2333 GEN11_ARBITRATION_PRIO_ORDER_MASK
);
2337 * Formerly known as WaL3BankAddressHashing
2339 wa_write_clr_set(wal
,
2341 GEN11_HASH_CTRL_EXCL_MASK
,
2342 GEN11_HASH_CTRL_EXCL_BIT0
);
2343 wa_write_clr_set(wal
,
2345 GEN11_BANK_HASH_ADDR_EXCL_MASK
,
2346 GEN11_BANK_HASH_ADDR_EXCL_BIT0
);
2350 * Formerly known as WaDisableCleanEvicts
2354 GEN11_LQSC_CLEAN_EVICT_DISABLE
);
2356 /* Wa_1606682166:icl */
2359 GEN7_DISABLE_SAMPLER_PREFETCH
);
2361 /* Wa_1409178092:icl */
2362 wa_write_clr_set(wal
,
2364 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE
,
2367 /* WaEnable32PlaneMode:icl */
2368 wa_masked_en(wal
, GEN9_CSFE_CHICKEN1_RCS
,
2369 GEN11_ENABLE_32_PLANE_MODE
);
2372 * Wa_1408615072:icl,ehl (vsunit)
2373 * Wa_1407596294:icl,ehl (hsunit)
2375 wa_write_or(wal
, UNSLICE_UNIT_LEVEL_CLKGATE
,
2376 VSUNIT_CLKGATE_DIS
| HSUNIT_CLKGATE_DIS
);
2379 * Wa_1408767742:icl[a2..forever],ehl[all]
2380 * Wa_1605460711:icl[a0..c0]
2383 GEN7_FF_THREAD_MODE
,
2384 GEN12_FF_TESSELATION_DOP_GATE_DISABLE
);
2386 /* Wa_22010271021 */
2388 GEN9_CS_DEBUG_MODE1
,
2389 FF_DOP_CLOCK_GATE_DISABLE
);
2392 if (IS_GRAPHICS_VER(i915
, 9, 12)) {
2393 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
2395 GEN7_FF_SLICE_CS_CHICKEN1
,
2396 GEN9_FFSC_PERCTX_PREEMPT_CTRL
);
2399 if (IS_SKYLAKE(i915
) ||
2400 IS_KABYLAKE(i915
) ||
2401 IS_COFFEELAKE(i915
) ||
2402 IS_COMETLAKE(i915
)) {
2403 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
2406 GEN9_GAPS_TSV_CREDIT_DISABLE
);
2409 if (IS_BROXTON(i915
)) {
2410 /* WaDisablePooledEuLoadBalancingFix:bxt */
2412 FF_SLICE_CS_CHICKEN2
,
2413 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE
);
2416 if (GRAPHICS_VER(i915
) == 9) {
2417 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
2419 GEN9_CSFE_CHICKEN1_RCS
,
2420 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE
);
2422 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
2425 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE
);
2427 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
2428 if (IS_GEN9_LP(i915
))
2429 wa_write_clr_set(wal
,
2431 L3_PRIO_CREDITS_MASK
,
2432 L3_GENERAL_PRIO_CREDITS(62) |
2433 L3_HIGH_PRIO_CREDITS(2));
2435 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
2438 GEN8_LQSC_FLUSH_COHERENT_LINES
);
2440 /* Disable atomics in L3 to prevent unrecoverable hangs */
2441 wa_write_clr_set(wal
, GEN9_SCRATCH_LNCF1
,
2442 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE
, 0);
2443 wa_write_clr_set(wal
, GEN8_L3SQCREG4
,
2444 GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE
, 0);
2445 wa_write_clr_set(wal
, GEN9_SCRATCH1
,
2446 EVICTION_PERF_FIX_ENABLE
, 0);
2449 if (IS_HASWELL(i915
)) {
2450 /* WaSampleCChickenBitEnable:hsw */
2452 HALF_SLICE_CHICKEN3
, HSW_SAMPLE_C_PERFORMANCE
);
2456 /* enable HiZ Raw Stall Optimization */
2457 HIZ_RAW_STALL_OPT_DISABLE
);
2460 if (IS_VALLEYVIEW(i915
)) {
2461 /* WaDisableEarlyCull:vlv */
2464 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL
);
2467 * WaVSThreadDispatchOverride:ivb,vlv
2469 * This actually overrides the dispatch
2470 * mode for all thread types.
2472 wa_write_clr_set(wal
,
2473 GEN7_FF_THREAD_MODE
,
2475 GEN7_FF_TS_SCHED_HW
|
2476 GEN7_FF_VS_SCHED_HW
|
2477 GEN7_FF_DS_SCHED_HW
);
2479 /* WaPsdDispatchEnable:vlv */
2480 /* WaDisablePSDDualDispatchEnable:vlv */
2482 GEN7_HALF_SLICE_CHICKEN1
,
2483 GEN7_MAX_PS_THREAD_DEP
|
2484 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE
);
2487 if (IS_IVYBRIDGE(i915
)) {
2488 /* WaDisableEarlyCull:ivb */
2491 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL
);
2493 if (0) { /* causes HiZ corruption on ivb:gt1 */
2494 /* enable HiZ Raw Stall Optimization */
2497 HIZ_RAW_STALL_OPT_DISABLE
);
2501 * WaVSThreadDispatchOverride:ivb,vlv
2503 * This actually overrides the dispatch
2504 * mode for all thread types.
2506 wa_write_clr_set(wal
,
2507 GEN7_FF_THREAD_MODE
,
2509 GEN7_FF_TS_SCHED_HW
|
2510 GEN7_FF_VS_SCHED_HW
|
2511 GEN7_FF_DS_SCHED_HW
);
2513 /* WaDisablePSDDualDispatchEnable:ivb */
2514 if (IS_IVB_GT1(i915
))
2516 GEN7_HALF_SLICE_CHICKEN1
,
2517 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE
);
2520 if (GRAPHICS_VER(i915
) == 7) {
2521 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
2523 RING_MODE_GEN7(RENDER_RING_BASE
),
2524 GFX_TLB_INVALIDATE_EXPLICIT
| GFX_REPLAY_MODE
);
2526 /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
2527 wa_masked_dis(wal
, CACHE_MODE_0_GEN7
, RC_OP_FLUSH_ENABLE
);
2530 * BSpec says this must be set, even though
2531 * WaDisable4x2SubspanOptimization:ivb,hsw
2532 * WaDisable4x2SubspanOptimization isn't listed for VLV.
2536 PIXEL_SUBSPAN_COLLECT_OPT_DISABLE
);
2539 * BSpec recommends 8x4 when MSAA is used,
2540 * however in practice 16x4 seems fastest.
2542 * Note that PS/WM thread counts depend on the WIZ hashing
2543 * disable bit, which we don't touch here, but it's good
2544 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2546 wa_masked_field_set(wal
,
2548 GEN6_WIZ_HASHING_MASK
,
2549 GEN6_WIZ_HASHING_16x4
);
2552 if (IS_GRAPHICS_VER(i915
, 6, 7))
2554 * We need to disable the AsyncFlip performance optimisations in
2555 * order to use MI_WAIT_FOR_EVENT within the CS. It should
2556 * already be programmed to '1' on all products.
2558 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
2561 RING_MI_MODE(RENDER_RING_BASE
),
2562 ASYNC_FLIP_PERF_DISABLE
);
2564 if (GRAPHICS_VER(i915
) == 6) {
2566 * Required for the hardware to program scanline values for
2568 * WaEnableFlushTlbInvalidationMode:snb
2572 GFX_TLB_INVALIDATE_EXPLICIT
);
2574 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
2577 _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB
);
2581 /* WaStripsFansDisableFastClipPerformanceFix:snb */
2582 _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL
|
2585 * "This bit must be set if 3DSTATE_CLIP clip mode is set
2586 * to normal and 3DSTATE_SF number of SF output attributes
2589 _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH
);
2592 * BSpec recommends 8x4 when MSAA is used,
2593 * however in practice 16x4 seems fastest.
2595 * Note that PS/WM thread counts depend on the WIZ hashing
2596 * disable bit, which we don't touch here, but it's good
2597 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2599 wa_masked_field_set(wal
,
2601 GEN6_WIZ_HASHING_MASK
,
2602 GEN6_WIZ_HASHING_16x4
);
2604 /* WaDisable_RenderCache_OperationalFlush:snb */
2605 wa_masked_dis(wal
, CACHE_MODE_0
, RC_OP_FLUSH_ENABLE
);
2608 * From the Sandybridge PRM, volume 1 part 3, page 24:
2609 * "If this bit is set, STCunit will have LRA as replacement
2610 * policy. [...] This bit must be reset. LRA replacement
2611 * policy is not supported."
2615 CM0_STC_EVICT_DISABLE_LRA_SNB
);
2618 if (IS_GRAPHICS_VER(i915
, 4, 6))
2619 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2620 wa_add(wal
, RING_MI_MODE(RENDER_RING_BASE
),
2621 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH
),
2622 /* XXX bit doesn't stick on Broadwater */
2623 IS_I965G(i915
) ? 0 : VS_TIMER_DISPATCH
, true);
2625 if (GRAPHICS_VER(i915
) == 4)
2627 * Disable CONSTANT_BUFFER before it is loaded from the context
2628 * image. For as it is loaded, it is executed and the stored
2629 * address may no longer be valid, leading to a GPU hang.
2631 * This imposes the requirement that userspace reload their
2632 * CONSTANT_BUFFER on every batch, fortunately a requirement
2633 * they are already accustomed to from before contexts were
2636 wa_add(wal
, ECOSKPD(RENDER_RING_BASE
),
2637 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE
),
2638 0 /* XXX bit doesn't stick on Broadwater */,
2643 xcs_engine_wa_init(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2645 struct drm_i915_private
*i915
= engine
->i915
;
2647 /* WaKBLVECSSemaphoreWaitPoll:kbl */
2648 if (IS_KBL_GRAPHICS_STEP(i915
, STEP_A0
, STEP_F0
)) {
2650 RING_SEMA_WAIT_POLL(engine
->mmio_base
),
2656 ccs_engine_wa_init(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2658 if (IS_PVC_CT_STEP(engine
->i915
, STEP_A0
, STEP_C0
)) {
2659 /* Wa_14014999345:pvc */
2660 wa_masked_en(wal
, GEN10_CACHE_MODE_SS
, DISABLE_ECC
);
2665 * The bspec performance guide has recommended MMIO tuning settings. These
2666 * aren't truly "workarounds" but we want to program them with the same
2667 * workaround infrastructure to ensure that they're automatically added to
2668 * the GuC save/restore lists, re-applied at the right times, and checked for
2669 * any conflicting programming requested by real workarounds.
2671 * Programming settings should be added here only if their registers are not
2672 * part of an engine's register state context. If a register is part of a
2673 * context, then any tuning settings should be programmed in an appropriate
2674 * function invoked by __intel_engine_init_ctx_wa().
2677 add_render_compute_tuning_settings(struct drm_i915_private
*i915
,
2678 struct i915_wa_list
*wal
)
2680 if (IS_PONTEVECCHIO(i915
)) {
2681 wa_write(wal
, XEHPC_L3SCRUB
,
2682 SCRUB_CL_DWNGRADE_SHARED
| SCRUB_RATE_4B_PER_CLK
);
2686 wa_write_or(wal
, XEHP_L3SCQREG7
, BLEND_FILL_CACHING_OPT_DIS
);
2687 wa_write_clr_set(wal
, RT_CTRL
, STACKID_CTRL
, STACKID_CTRL_512
);
2690 * This is also listed as Wa_22012654132 for certain DG2
2691 * steppings, but the tuning setting programming is a superset
2692 * since it applies to all DG2 variants and steppings.
2694 * Note that register 0xE420 is write-only and cannot be read
2695 * back for verification on DG2 (due to Wa_14012342262), so
2696 * we need to explicitly skip the readback.
2698 wa_add(wal
, GEN10_CACHE_MODE_SS
, 0,
2699 _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC
),
2700 0 /* write-only, so skip validation */,
2705 * This tuning setting proves beneficial only on ATS-M designs; the
2706 * default "age based" setting is optimal on regular DG2 and other
2709 if (INTEL_INFO(i915
)->tuning_thread_rr_after_dep
)
2710 wa_masked_field_set(wal
, GEN9_ROW_CHICKEN4
, THREAD_EX_ARB_MODE
,
2711 THREAD_EX_ARB_MODE_RR_AFTER_DEP
);
2715 * The workarounds in this function apply to shared registers in
2716 * the general render reset domain that aren't tied to a
2717 * specific engine. Since all render+compute engines get reset
2718 * together, and the contents of these registers are lost during
2719 * the shared render domain reset, we'll define such workarounds
2720 * here and then add them to just a single RCS or CCS engine's
2721 * workaround list (whichever engine has the XXXX flag).
2724 general_render_compute_wa_init(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2726 struct drm_i915_private
*i915
= engine
->i915
;
2728 add_render_compute_tuning_settings(i915
, wal
);
2730 if (IS_PONTEVECCHIO(i915
)) {
2731 /* Wa_16016694945 */
2732 wa_masked_en(wal
, XEHPC_LNCFMISCCFGREG0
, XEHPC_OVRLSCCC
);
2735 if (IS_XEHPSDV(i915
)) {
2739 SYSTOLIC_DOP_CLOCK_GATING_DIS
);
2744 GEN12_DISABLE_GRF_CLEAR
);
2746 /* Wa_14010670810:xehpsdv */
2747 wa_write_or(wal
, XEHP_L3NODEARBCFG
, XEHP_LNESPARE
);
2749 /* Wa_14010449647:xehpsdv */
2750 wa_masked_en(wal
, GEN7_HALF_SLICE_CHICKEN1
,
2751 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE
);
2753 /* Wa_18011725039:xehpsdv */
2754 if (IS_XEHPSDV_GRAPHICS_STEP(i915
, STEP_A1
, STEP_B0
)) {
2755 wa_masked_dis(wal
, MLTICTXCTL
, TDONRENDER
);
2756 wa_write_or(wal
, L3SQCREG1_CCS0
, FLUSHALLNONCOH
);
2759 /* Wa_14012362059:xehpsdv */
2760 wa_write_or(wal
, GEN12_MERT_MOD_CTRL
, FORCE_MISS_FTLB
);
2762 /* Wa_14014368820:xehpsdv */
2763 wa_write_or(wal
, GEN12_GAMCNTRL_CTRL
, INVALIDATION_BROADCAST_MODE_DIS
|
2764 GLOBAL_INVALIDATION_MODE
);
2767 if (IS_DG2(i915
) || IS_PONTEVECCHIO(i915
)) {
2768 /* Wa_14015227452:dg2,pvc */
2769 wa_masked_en(wal
, GEN9_ROW_CHICKEN4
, XEHP_DIS_BBL_SYSPIPE
);
2771 /* Wa_22014226127:dg2,pvc */
2772 wa_write_or(wal
, LSC_CHICKEN_BIT_0
, DISABLE_D8_D16_COASLESCE
);
2774 /* Wa_16015675438:dg2,pvc */
2775 wa_masked_en(wal
, FF_SLICE_CS_CHICKEN2
, GEN12_PERF_FIX_BALANCING_CFE_DISABLE
);
2777 /* Wa_18018781329:dg2,pvc */
2778 wa_write_or(wal
, RENDER_MOD_CTRL
, FORCE_MISS_FTLB
);
2779 wa_write_or(wal
, COMP_MOD_CTRL
, FORCE_MISS_FTLB
);
2780 wa_write_or(wal
, VDBX_MOD_CTRL
, FORCE_MISS_FTLB
);
2781 wa_write_or(wal
, VEBX_MOD_CTRL
, FORCE_MISS_FTLB
);
2786 engine_init_workarounds(struct intel_engine_cs
*engine
, struct i915_wa_list
*wal
)
2788 if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine
->i915
) < 4))
2791 engine_fake_wa_init(engine
, wal
);
2794 * These are common workarounds that just need to applied
2795 * to a single RCS/CCS engine's workaround list since
2796 * they're reset as part of the general render domain reset.
2798 if (engine
->flags
& I915_ENGINE_FIRST_RENDER_COMPUTE
)
2799 general_render_compute_wa_init(engine
, wal
);
2801 if (engine
->class == COMPUTE_CLASS
)
2802 ccs_engine_wa_init(engine
, wal
);
2803 else if (engine
->class == RENDER_CLASS
)
2804 rcs_engine_wa_init(engine
, wal
);
2806 xcs_engine_wa_init(engine
, wal
);
2809 void intel_engine_init_workarounds(struct intel_engine_cs
*engine
)
2811 struct i915_wa_list
*wal
= &engine
->wa_list
;
2813 if (GRAPHICS_VER(engine
->i915
) < 4)
2816 wa_init_start(wal
, "engine", engine
->name
);
2817 engine_init_workarounds(engine
, wal
);
2818 wa_init_finish(wal
);
2821 void intel_engine_apply_workarounds(struct intel_engine_cs
*engine
)
2823 wa_list_apply(engine
->gt
, &engine
->wa_list
);
2826 static const struct i915_range mcr_ranges_gen8
[] = {
2827 { .start
= 0x5500, .end
= 0x55ff },
2828 { .start
= 0x7000, .end
= 0x7fff },
2829 { .start
= 0x9400, .end
= 0x97ff },
2830 { .start
= 0xb000, .end
= 0xb3ff },
2831 { .start
= 0xe000, .end
= 0xe7ff },
2835 static const struct i915_range mcr_ranges_gen12
[] = {
2836 { .start
= 0x8150, .end
= 0x815f },
2837 { .start
= 0x9520, .end
= 0x955f },
2838 { .start
= 0xb100, .end
= 0xb3ff },
2839 { .start
= 0xde80, .end
= 0xe8ff },
2840 { .start
= 0x24a00, .end
= 0x24a7f },
2844 static const struct i915_range mcr_ranges_xehp
[] = {
2845 { .start
= 0x4000, .end
= 0x4aff },
2846 { .start
= 0x5200, .end
= 0x52ff },
2847 { .start
= 0x5400, .end
= 0x7fff },
2848 { .start
= 0x8140, .end
= 0x815f },
2849 { .start
= 0x8c80, .end
= 0x8dff },
2850 { .start
= 0x94d0, .end
= 0x955f },
2851 { .start
= 0x9680, .end
= 0x96ff },
2852 { .start
= 0xb000, .end
= 0xb3ff },
2853 { .start
= 0xc800, .end
= 0xcfff },
2854 { .start
= 0xd800, .end
= 0xd8ff },
2855 { .start
= 0xdc00, .end
= 0xffff },
2856 { .start
= 0x17000, .end
= 0x17fff },
2857 { .start
= 0x24a00, .end
= 0x24a7f },
2861 static bool mcr_range(struct drm_i915_private
*i915
, u32 offset
)
2863 const struct i915_range
*mcr_ranges
;
2866 if (GRAPHICS_VER_FULL(i915
) >= IP_VER(12, 50))
2867 mcr_ranges
= mcr_ranges_xehp
;
2868 else if (GRAPHICS_VER(i915
) >= 12)
2869 mcr_ranges
= mcr_ranges_gen12
;
2870 else if (GRAPHICS_VER(i915
) >= 8)
2871 mcr_ranges
= mcr_ranges_gen8
;
2876 * Registers in these ranges are affected by the MCR selector
2877 * which only controls CPU initiated MMIO. Routing does not
2878 * work for CS access so we cannot verify them on this path.
2880 for (i
= 0; mcr_ranges
[i
].start
; i
++)
2881 if (offset
>= mcr_ranges
[i
].start
&&
2882 offset
<= mcr_ranges
[i
].end
)
2889 wa_list_srm(struct i915_request
*rq
,
2890 const struct i915_wa_list
*wal
,
2891 struct i915_vma
*vma
)
2893 struct drm_i915_private
*i915
= rq
->engine
->i915
;
2894 unsigned int i
, count
= 0;
2895 const struct i915_wa
*wa
;
2898 srm
= MI_STORE_REGISTER_MEM
| MI_SRM_LRM_GLOBAL_GTT
;
2899 if (GRAPHICS_VER(i915
) >= 8)
2902 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++) {
2903 if (!mcr_range(i915
, i915_mmio_reg_offset(wa
->reg
)))
2907 cs
= intel_ring_begin(rq
, 4 * count
);
2911 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++) {
2912 u32 offset
= i915_mmio_reg_offset(wa
->reg
);
2914 if (mcr_range(i915
, offset
))
2919 *cs
++ = i915_ggtt_offset(vma
) + sizeof(u32
) * i
;
2922 intel_ring_advance(rq
, cs
);
2927 static int engine_wa_list_verify(struct intel_context
*ce
,
2928 const struct i915_wa_list
* const wal
,
2931 const struct i915_wa
*wa
;
2932 struct i915_request
*rq
;
2933 struct i915_vma
*vma
;
2934 struct i915_gem_ww_ctx ww
;
2942 vma
= __vm_create_scratch_for_read(&ce
->engine
->gt
->ggtt
->vm
,
2943 wal
->count
* sizeof(u32
));
2945 return PTR_ERR(vma
);
2947 intel_engine_pm_get(ce
->engine
);
2948 i915_gem_ww_ctx_init(&ww
, false);
2950 err
= i915_gem_object_lock(vma
->obj
, &ww
);
2952 err
= intel_context_pin_ww(ce
, &ww
);
2956 err
= i915_vma_pin_ww(vma
, &ww
, 0, 0,
2957 i915_vma_is_ggtt(vma
) ? PIN_GLOBAL
: PIN_USER
);
2961 rq
= i915_request_create(ce
);
2967 err
= i915_request_await_object(rq
, vma
->obj
, true);
2969 err
= i915_vma_move_to_active(vma
, rq
, EXEC_OBJECT_WRITE
);
2971 err
= wa_list_srm(rq
, wal
, vma
);
2973 i915_request_get(rq
);
2975 i915_request_set_error_once(rq
, err
);
2976 i915_request_add(rq
);
2981 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2986 results
= i915_gem_object_pin_map(vma
->obj
, I915_MAP_WB
);
2987 if (IS_ERR(results
)) {
2988 err
= PTR_ERR(results
);
2993 for (i
= 0, wa
= wal
->list
; i
< wal
->count
; i
++, wa
++) {
2994 if (mcr_range(rq
->engine
->i915
, i915_mmio_reg_offset(wa
->reg
)))
2997 if (!wa_verify(wa
, results
[i
], wal
->name
, from
))
3001 i915_gem_object_unpin_map(vma
->obj
);
3004 i915_request_put(rq
);
3006 i915_vma_unpin(vma
);
3008 intel_context_unpin(ce
);
3010 if (err
== -EDEADLK
) {
3011 err
= i915_gem_ww_ctx_backoff(&ww
);
3015 i915_gem_ww_ctx_fini(&ww
);
3016 intel_engine_pm_put(ce
->engine
);
3021 int intel_engine_verify_workarounds(struct intel_engine_cs
*engine
,
3024 return engine_wa_list_verify(engine
->kernel_context
,
3029 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3030 #include "selftest_workarounds.c"