1 // SPDX-License-Identifier: MIT
3 * Copyright © 2014 Intel Corporation
6 #include "gem/i915_gem_lmem.h"
8 #include "gen8_engine_cs.h"
10 #include "i915_perf.h"
12 #include "intel_context.h"
13 #include "intel_engine.h"
14 #include "intel_engine_regs.h"
15 #include "intel_gpu_commands.h"
17 #include "intel_gt_regs.h"
18 #include "intel_lrc.h"
19 #include "intel_lrc_reg.h"
20 #include "intel_ring.h"
21 #include "shmem_utils.h"
24 * The per-platform tables are u8-encoded in @data. Decode @data and set the
25 * addresses' offset and commands in @regs. The following encoding is used
26 * for each byte. There are 2 steps: decoding commands and decoding addresses.
29 * [7]: create NOPs - number of NOPs are set in lower bits
30 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
32 * [5:0]: Number of NOPs or registers to set values to in case of
33 * MI_LOAD_REGISTER_IMM
35 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
36 * number of registers. They are set by using the REG/REG16 macros: the former
37 * is used for offsets smaller than 0x200 while the latter is for values bigger
38 * than that. Those macros already set all the bits documented below correctly:
40 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
41 * follow, for the lower bits
42 * [6:0]: Register offset, without considering the engine base.
44 * This function only tweaks the commands and register offsets. Values are not
47 static void set_offsets(u32
*regs
,
49 const struct intel_engine_cs
*engine
,
51 #define NOP(x) (BIT(7) | (x))
52 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
54 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
56 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
60 const u32 base
= engine
->mmio_base
;
65 if (*data
& BIT(7)) { /* skip */
66 count
= *data
++ & ~BIT(7);
75 *regs
= MI_LOAD_REGISTER_IMM(count
);
77 *regs
|= MI_LRI_FORCE_POSTED
;
78 if (GRAPHICS_VER(engine
->i915
) >= 11)
79 *regs
|= MI_LRI_LRM_CS_MMIO
;
90 offset
|= v
& ~BIT(7);
93 regs
[0] = base
+ (offset
<< 2);
99 /* Close the batch; used mainly by live_lrc_layout() */
100 *regs
= MI_BATCH_BUFFER_END
;
101 if (GRAPHICS_VER(engine
->i915
) >= 11)
106 static const u8 gen8_xcs_offsets
[] = {
141 static const u8 gen9_xcs_offsets
[] = {
225 static const u8 gen12_xcs_offsets
[] = {
257 static const u8 dg2_xcs_offsets
[] = {
291 static const u8 gen8_rcs_offsets
[] = {
328 static const u8 gen9_rcs_offsets
[] = {
412 static const u8 gen11_rcs_offsets
[] = {
453 static const u8 gen12_rcs_offsets
[] = {
549 static const u8 xehp_rcs_offsets
[] = {
590 static const u8 dg2_rcs_offsets
[] = {
633 static const u8 mtl_rcs_offsets
[] = {
682 static const u8
*reg_offsets(const struct intel_engine_cs
*engine
)
685 * The gen12+ lists only have the registers we program in the basic
686 * default state. We rely on the context image using relative
687 * addressing to automatic fixup the register state between the
688 * physical engines for virtual engine.
690 GEM_BUG_ON(GRAPHICS_VER(engine
->i915
) >= 12 &&
691 !intel_engine_has_relative_mmio(engine
));
693 if (engine
->flags
& I915_ENGINE_HAS_RCS_REG_STATE
) {
694 if (GRAPHICS_VER_FULL(engine
->i915
) >= IP_VER(12, 70))
695 return mtl_rcs_offsets
;
696 else if (GRAPHICS_VER_FULL(engine
->i915
) >= IP_VER(12, 55))
697 return dg2_rcs_offsets
;
698 else if (GRAPHICS_VER_FULL(engine
->i915
) >= IP_VER(12, 50))
699 return xehp_rcs_offsets
;
700 else if (GRAPHICS_VER(engine
->i915
) >= 12)
701 return gen12_rcs_offsets
;
702 else if (GRAPHICS_VER(engine
->i915
) >= 11)
703 return gen11_rcs_offsets
;
704 else if (GRAPHICS_VER(engine
->i915
) >= 9)
705 return gen9_rcs_offsets
;
707 return gen8_rcs_offsets
;
709 if (GRAPHICS_VER_FULL(engine
->i915
) >= IP_VER(12, 55))
710 return dg2_xcs_offsets
;
711 else if (GRAPHICS_VER(engine
->i915
) >= 12)
712 return gen12_xcs_offsets
;
713 else if (GRAPHICS_VER(engine
->i915
) >= 9)
714 return gen9_xcs_offsets
;
716 return gen8_xcs_offsets
;
720 static int lrc_ring_mi_mode(const struct intel_engine_cs
*engine
)
722 if (GRAPHICS_VER_FULL(engine
->i915
) >= IP_VER(12, 50))
724 else if (GRAPHICS_VER(engine
->i915
) >= 12)
726 else if (GRAPHICS_VER(engine
->i915
) >= 9)
728 else if (engine
->class == RENDER_CLASS
)
734 static int lrc_ring_bb_offset(const struct intel_engine_cs
*engine
)
736 if (GRAPHICS_VER_FULL(engine
->i915
) >= IP_VER(12, 50))
738 else if (GRAPHICS_VER(engine
->i915
) >= 12)
740 else if (GRAPHICS_VER(engine
->i915
) >= 9)
742 else if (GRAPHICS_VER(engine
->i915
) >= 8 &&
743 engine
->class == RENDER_CLASS
)
749 static int lrc_ring_gpr0(const struct intel_engine_cs
*engine
)
751 if (GRAPHICS_VER_FULL(engine
->i915
) >= IP_VER(12, 50))
753 else if (GRAPHICS_VER(engine
->i915
) >= 12)
755 else if (GRAPHICS_VER(engine
->i915
) >= 9)
757 else if (engine
->class == RENDER_CLASS
)
763 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs
*engine
)
765 if (GRAPHICS_VER(engine
->i915
) >= 12)
767 else if (GRAPHICS_VER(engine
->i915
) >= 9 || engine
->class == RENDER_CLASS
)
773 static int lrc_ring_indirect_ptr(const struct intel_engine_cs
*engine
)
777 x
= lrc_ring_wa_bb_per_ctx(engine
);
784 static int lrc_ring_indirect_offset(const struct intel_engine_cs
*engine
)
788 x
= lrc_ring_indirect_ptr(engine
);
795 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs
*engine
)
798 if (GRAPHICS_VER_FULL(engine
->i915
) >= IP_VER(12, 50))
800 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
801 * simply to match the RCS context image layout.
804 else if (engine
->class != RENDER_CLASS
)
806 else if (GRAPHICS_VER(engine
->i915
) >= 12)
808 else if (GRAPHICS_VER(engine
->i915
) >= 11)
815 lrc_ring_indirect_offset_default(const struct intel_engine_cs
*engine
)
817 if (GRAPHICS_VER(engine
->i915
) >= 12)
818 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT
;
819 else if (GRAPHICS_VER(engine
->i915
) >= 11)
820 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT
;
821 else if (GRAPHICS_VER(engine
->i915
) >= 9)
822 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT
;
823 else if (GRAPHICS_VER(engine
->i915
) >= 8)
824 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT
;
826 GEM_BUG_ON(GRAPHICS_VER(engine
->i915
) < 8);
832 lrc_setup_indirect_ctx(u32
*regs
,
833 const struct intel_engine_cs
*engine
,
834 u32 ctx_bb_ggtt_addr
,
838 GEM_BUG_ON(!IS_ALIGNED(size
, CACHELINE_BYTES
));
839 GEM_BUG_ON(lrc_ring_indirect_ptr(engine
) == -1);
840 regs
[lrc_ring_indirect_ptr(engine
) + 1] =
841 ctx_bb_ggtt_addr
| (size
/ CACHELINE_BYTES
);
843 GEM_BUG_ON(lrc_ring_indirect_offset(engine
) == -1);
844 regs
[lrc_ring_indirect_offset(engine
) + 1] =
845 lrc_ring_indirect_offset_default(engine
) << 6;
848 static void init_common_regs(u32
* const regs
,
849 const struct intel_context
*ce
,
850 const struct intel_engine_cs
*engine
,
856 ctl
= _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH
);
857 ctl
|= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT
);
859 ctl
|= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT
;
860 if (GRAPHICS_VER(engine
->i915
) < 11)
861 ctl
|= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT
|
862 CTX_CTRL_RS_CTX_ENABLE
);
863 regs
[CTX_CONTEXT_CONTROL
] = ctl
;
865 regs
[CTX_TIMESTAMP
] = ce
->stats
.runtime
.last
;
867 loc
= lrc_ring_bb_offset(engine
);
872 static void init_wa_bb_regs(u32
* const regs
,
873 const struct intel_engine_cs
*engine
)
875 const struct i915_ctx_workarounds
* const wa_ctx
= &engine
->wa_ctx
;
877 if (wa_ctx
->per_ctx
.size
) {
878 const u32 ggtt_offset
= i915_ggtt_offset(wa_ctx
->vma
);
880 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine
) == -1);
881 regs
[lrc_ring_wa_bb_per_ctx(engine
) + 1] =
882 (ggtt_offset
+ wa_ctx
->per_ctx
.offset
) | 0x01;
885 if (wa_ctx
->indirect_ctx
.size
) {
886 lrc_setup_indirect_ctx(regs
, engine
,
887 i915_ggtt_offset(wa_ctx
->vma
) +
888 wa_ctx
->indirect_ctx
.offset
,
889 wa_ctx
->indirect_ctx
.size
);
893 static void init_ppgtt_regs(u32
*regs
, const struct i915_ppgtt
*ppgtt
)
895 if (i915_vm_is_4lvl(&ppgtt
->vm
)) {
896 /* 64b PPGTT (48bit canonical)
897 * PDP0_DESCRIPTOR contains the base address to PML4 and
898 * other PDP Descriptors are ignored.
900 ASSIGN_CTX_PML4(ppgtt
, regs
);
902 ASSIGN_CTX_PDP(ppgtt
, regs
, 3);
903 ASSIGN_CTX_PDP(ppgtt
, regs
, 2);
904 ASSIGN_CTX_PDP(ppgtt
, regs
, 1);
905 ASSIGN_CTX_PDP(ppgtt
, regs
, 0);
909 static struct i915_ppgtt
*vm_alias(struct i915_address_space
*vm
)
911 if (i915_is_ggtt(vm
))
912 return i915_vm_to_ggtt(vm
)->alias
;
914 return i915_vm_to_ppgtt(vm
);
917 static void __reset_stop_ring(u32
*regs
, const struct intel_engine_cs
*engine
)
921 x
= lrc_ring_mi_mode(engine
);
923 regs
[x
+ 1] &= ~STOP_RING
;
924 regs
[x
+ 1] |= STOP_RING
<< 16;
928 static void __lrc_init_regs(u32
*regs
,
929 const struct intel_context
*ce
,
930 const struct intel_engine_cs
*engine
,
934 * A context is actually a big batch buffer with several
935 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
936 * values we are setting here are only for the first context restore:
937 * on a subsequent save, the GPU will recreate this batchbuffer with new
938 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
939 * we are not initializing here).
941 * Must keep consistent with virtual_update_register_offsets().
945 memset(regs
, 0, PAGE_SIZE
);
947 set_offsets(regs
, reg_offsets(engine
), engine
, inhibit
);
949 init_common_regs(regs
, ce
, engine
, inhibit
);
950 init_ppgtt_regs(regs
, vm_alias(ce
->vm
));
952 init_wa_bb_regs(regs
, engine
);
954 __reset_stop_ring(regs
, engine
);
957 void lrc_init_regs(const struct intel_context
*ce
,
958 const struct intel_engine_cs
*engine
,
961 __lrc_init_regs(ce
->lrc_reg_state
, ce
, engine
, inhibit
);
964 void lrc_reset_regs(const struct intel_context
*ce
,
965 const struct intel_engine_cs
*engine
)
967 __reset_stop_ring(ce
->lrc_reg_state
, engine
);
971 set_redzone(void *vaddr
, const struct intel_engine_cs
*engine
)
973 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM
))
976 vaddr
+= engine
->context_size
;
978 memset(vaddr
, CONTEXT_REDZONE
, I915_GTT_PAGE_SIZE
);
982 check_redzone(const void *vaddr
, const struct intel_engine_cs
*engine
)
984 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM
))
987 vaddr
+= engine
->context_size
;
989 if (memchr_inv(vaddr
, CONTEXT_REDZONE
, I915_GTT_PAGE_SIZE
))
990 drm_err_once(&engine
->i915
->drm
,
991 "%s context redzone overwritten!\n",
995 static u32
context_wa_bb_offset(const struct intel_context
*ce
)
997 return PAGE_SIZE
* ce
->wa_bb_page
;
1000 static u32
*context_indirect_bb(const struct intel_context
*ce
)
1004 GEM_BUG_ON(!ce
->wa_bb_page
);
1006 ptr
= ce
->lrc_reg_state
;
1007 ptr
-= LRC_STATE_OFFSET
; /* back to start of context image */
1008 ptr
+= context_wa_bb_offset(ce
);
1013 void lrc_init_state(struct intel_context
*ce
,
1014 struct intel_engine_cs
*engine
,
1017 bool inhibit
= true;
1019 set_redzone(state
, engine
);
1021 if (engine
->default_state
) {
1022 shmem_read(engine
->default_state
, 0,
1023 state
, engine
->context_size
);
1024 __set_bit(CONTEXT_VALID_BIT
, &ce
->flags
);
1028 /* Clear the ppHWSP (inc. per-context counters) */
1029 memset(state
, 0, PAGE_SIZE
);
1031 /* Clear the indirect wa and storage */
1033 memset(state
+ context_wa_bb_offset(ce
), 0, PAGE_SIZE
);
1036 * The second page of the context object contains some registers which
1037 * must be set up prior to the first execution.
1039 __lrc_init_regs(state
+ LRC_STATE_OFFSET
, ce
, engine
, inhibit
);
1042 u32
lrc_indirect_bb(const struct intel_context
*ce
)
1044 return i915_ggtt_offset(ce
->state
) + context_wa_bb_offset(ce
);
1047 static u32
*setup_predicate_disable_wa(const struct intel_context
*ce
, u32
*cs
)
1049 /* If predication is active, this will be noop'ed */
1050 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
| (4 - 2);
1051 *cs
++ = lrc_indirect_bb(ce
) + DG2_PREDICATE_RESULT_WA
;
1053 *cs
++ = 0; /* No predication */
1055 /* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
1056 *cs
++ = MI_BATCH_BUFFER_END
| BIT(15);
1057 *cs
++ = MI_SET_PREDICATE
| MI_SET_PREDICATE_DISABLE
;
1059 /* Instructions are no longer predicated (disabled), we can proceed */
1060 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
| (4 - 2);
1061 *cs
++ = lrc_indirect_bb(ce
) + DG2_PREDICATE_RESULT_WA
;
1063 *cs
++ = 1; /* enable predication before the next BB */
1065 *cs
++ = MI_BATCH_BUFFER_END
;
1066 GEM_BUG_ON(offset_in_page(cs
) > DG2_PREDICATE_RESULT_WA
);
1071 static struct i915_vma
*
1072 __lrc_alloc_state(struct intel_context
*ce
, struct intel_engine_cs
*engine
)
1074 struct drm_i915_gem_object
*obj
;
1075 struct i915_vma
*vma
;
1078 context_size
= round_up(engine
->context_size
, I915_GTT_PAGE_SIZE
);
1080 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM
))
1081 context_size
+= I915_GTT_PAGE_SIZE
; /* for redzone */
1083 if (GRAPHICS_VER(engine
->i915
) >= 12) {
1084 ce
->wa_bb_page
= context_size
/ PAGE_SIZE
;
1085 context_size
+= PAGE_SIZE
;
1088 if (intel_context_is_parent(ce
) && intel_engine_uses_guc(engine
)) {
1089 ce
->parallel
.guc
.parent_page
= context_size
/ PAGE_SIZE
;
1090 context_size
+= PARENT_SCRATCH_SIZE
;
1093 obj
= i915_gem_object_create_lmem(engine
->i915
, context_size
,
1094 I915_BO_ALLOC_PM_VOLATILE
);
1096 obj
= i915_gem_object_create_shmem(engine
->i915
, context_size
);
1098 * Wa_22016122933: For Media version 13.0, all Media GT shared
1099 * memory needs to be mapped as WC on CPU side and UC (PAT
1100 * index 2) on GPU side.
1102 if (intel_gt_needs_wa_22016122933(engine
->gt
))
1103 i915_gem_object_set_cache_coherency(obj
, I915_CACHE_NONE
);
1106 return ERR_CAST(obj
);
1108 vma
= i915_vma_instance(obj
, &engine
->gt
->ggtt
->vm
, NULL
);
1110 i915_gem_object_put(obj
);
1117 static struct intel_timeline
*
1118 pinned_timeline(struct intel_context
*ce
, struct intel_engine_cs
*engine
)
1120 struct intel_timeline
*tl
= fetch_and_zero(&ce
->timeline
);
1122 return intel_timeline_create_from_engine(engine
, page_unmask_bits(tl
));
1125 int lrc_alloc(struct intel_context
*ce
, struct intel_engine_cs
*engine
)
1127 struct intel_ring
*ring
;
1128 struct i915_vma
*vma
;
1131 GEM_BUG_ON(ce
->state
);
1133 vma
= __lrc_alloc_state(ce
, engine
);
1135 return PTR_ERR(vma
);
1137 ring
= intel_engine_create_ring(engine
, ce
->ring_size
);
1139 err
= PTR_ERR(ring
);
1143 if (!page_mask_bits(ce
->timeline
)) {
1144 struct intel_timeline
*tl
;
1147 * Use the static global HWSP for the kernel context, and
1148 * a dynamically allocated cacheline for everyone else.
1150 if (unlikely(ce
->timeline
))
1151 tl
= pinned_timeline(ce
, engine
);
1153 tl
= intel_timeline_create(engine
->gt
);
1168 intel_ring_put(ring
);
1174 void lrc_reset(struct intel_context
*ce
)
1176 GEM_BUG_ON(!intel_context_is_pinned(ce
));
1178 intel_ring_reset(ce
->ring
, ce
->ring
->emit
);
1180 /* Scrub away the garbage */
1181 lrc_init_regs(ce
, ce
->engine
, true);
1182 ce
->lrc
.lrca
= lrc_update_regs(ce
, ce
->engine
, ce
->ring
->tail
);
1186 lrc_pre_pin(struct intel_context
*ce
,
1187 struct intel_engine_cs
*engine
,
1188 struct i915_gem_ww_ctx
*ww
,
1191 GEM_BUG_ON(!ce
->state
);
1192 GEM_BUG_ON(!i915_vma_is_pinned(ce
->state
));
1194 *vaddr
= i915_gem_object_pin_map(ce
->state
->obj
,
1195 intel_gt_coherent_map_type(ce
->engine
->gt
,
1200 return PTR_ERR_OR_ZERO(*vaddr
);
1204 lrc_pin(struct intel_context
*ce
,
1205 struct intel_engine_cs
*engine
,
1208 ce
->lrc_reg_state
= vaddr
+ LRC_STATE_OFFSET
;
1210 if (!__test_and_set_bit(CONTEXT_INIT_BIT
, &ce
->flags
))
1211 lrc_init_state(ce
, engine
, vaddr
);
1213 ce
->lrc
.lrca
= lrc_update_regs(ce
, engine
, ce
->ring
->tail
);
1217 void lrc_unpin(struct intel_context
*ce
)
1219 if (unlikely(ce
->parallel
.last_rq
)) {
1220 i915_request_put(ce
->parallel
.last_rq
);
1221 ce
->parallel
.last_rq
= NULL
;
1223 check_redzone((void *)ce
->lrc_reg_state
- LRC_STATE_OFFSET
,
1227 void lrc_post_unpin(struct intel_context
*ce
)
1229 i915_gem_object_unpin_map(ce
->state
->obj
);
1232 void lrc_fini(struct intel_context
*ce
)
1237 intel_ring_put(fetch_and_zero(&ce
->ring
));
1238 i915_vma_put(fetch_and_zero(&ce
->state
));
1241 void lrc_destroy(struct kref
*kref
)
1243 struct intel_context
*ce
= container_of(kref
, typeof(*ce
), ref
);
1245 GEM_BUG_ON(!i915_active_is_idle(&ce
->active
));
1246 GEM_BUG_ON(intel_context_is_pinned(ce
));
1250 intel_context_fini(ce
);
1251 intel_context_free(ce
);
1255 gen12_emit_timestamp_wa(const struct intel_context
*ce
, u32
*cs
)
1257 *cs
++ = MI_LOAD_REGISTER_MEM_GEN8
|
1258 MI_SRM_LRM_GLOBAL_GTT
|
1260 *cs
++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1261 *cs
++ = i915_ggtt_offset(ce
->state
) + LRC_STATE_OFFSET
+
1262 CTX_TIMESTAMP
* sizeof(u32
);
1265 *cs
++ = MI_LOAD_REGISTER_REG
|
1266 MI_LRR_SOURCE_CS_MMIO
|
1268 *cs
++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1269 *cs
++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1271 *cs
++ = MI_LOAD_REGISTER_REG
|
1272 MI_LRR_SOURCE_CS_MMIO
|
1274 *cs
++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1275 *cs
++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1281 gen12_emit_restore_scratch(const struct intel_context
*ce
, u32
*cs
)
1283 GEM_BUG_ON(lrc_ring_gpr0(ce
->engine
) == -1);
1285 *cs
++ = MI_LOAD_REGISTER_MEM_GEN8
|
1286 MI_SRM_LRM_GLOBAL_GTT
|
1288 *cs
++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1289 *cs
++ = i915_ggtt_offset(ce
->state
) + LRC_STATE_OFFSET
+
1290 (lrc_ring_gpr0(ce
->engine
) + 1) * sizeof(u32
);
1297 gen12_emit_cmd_buf_wa(const struct intel_context
*ce
, u32
*cs
)
1299 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce
->engine
) == -1);
1301 *cs
++ = MI_LOAD_REGISTER_MEM_GEN8
|
1302 MI_SRM_LRM_GLOBAL_GTT
|
1304 *cs
++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1305 *cs
++ = i915_ggtt_offset(ce
->state
) + LRC_STATE_OFFSET
+
1306 (lrc_ring_cmd_buf_cctl(ce
->engine
) + 1) * sizeof(u32
);
1309 *cs
++ = MI_LOAD_REGISTER_REG
|
1310 MI_LRR_SOURCE_CS_MMIO
|
1312 *cs
++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1313 *cs
++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1319 * On DG2 during context restore of a preempted context in GPGPU mode,
1320 * RCS restore hang is detected. This is extremely timing dependent.
1321 * To address this below sw wabb is implemented for DG2 A steppings.
1324 dg2_emit_rcs_hang_wabb(const struct intel_context
*ce
, u32
*cs
)
1326 *cs
++ = MI_LOAD_REGISTER_IMM(1);
1327 *cs
++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce
->engine
->mmio_base
));
1330 *cs
++ = MI_LOAD_REGISTER_REG
;
1331 *cs
++ = i915_mmio_reg_offset(RING_NOPID(ce
->engine
->mmio_base
));
1332 *cs
++ = i915_mmio_reg_offset(XEHP_CULLBIT1
);
1334 *cs
++ = MI_LOAD_REGISTER_REG
;
1335 *cs
++ = i915_mmio_reg_offset(RING_NOPID(ce
->engine
->mmio_base
));
1336 *cs
++ = i915_mmio_reg_offset(XEHP_CULLBIT2
);
1342 * The bspec's tuning guide asks us to program a vertical watermark value of
1343 * 0x3FF. However this register is not saved/restored properly by the
1344 * hardware, so we're required to apply the desired value via INDIRECT_CTX
1345 * batch buffer to ensure the value takes effect properly. All other bits
1346 * in this register should remain at 0 (the hardware default).
1349 dg2_emit_draw_watermark_setting(u32
*cs
)
1351 *cs
++ = MI_LOAD_REGISTER_IMM(1);
1352 *cs
++ = i915_mmio_reg_offset(DRAW_WATERMARK
);
1353 *cs
++ = REG_FIELD_PREP(VERT_WM_VAL
, 0x3FF);
1359 gen12_emit_indirect_ctx_rcs(const struct intel_context
*ce
, u32
*cs
)
1361 cs
= gen12_emit_timestamp_wa(ce
, cs
);
1362 cs
= gen12_emit_cmd_buf_wa(ce
, cs
);
1363 cs
= gen12_emit_restore_scratch(ce
, cs
);
1365 /* Wa_22011450934:dg2 */
1366 if (IS_DG2_GRAPHICS_STEP(ce
->engine
->i915
, G10
, STEP_A0
, STEP_B0
) ||
1367 IS_DG2_GRAPHICS_STEP(ce
->engine
->i915
, G11
, STEP_A0
, STEP_B0
))
1368 cs
= dg2_emit_rcs_hang_wabb(ce
, cs
);
1370 /* Wa_16013000631:dg2 */
1371 if (IS_DG2_GRAPHICS_STEP(ce
->engine
->i915
, G10
, STEP_B0
, STEP_C0
) ||
1372 IS_DG2_G11(ce
->engine
->i915
))
1373 cs
= gen8_emit_pipe_control(cs
, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE
, 0);
1375 cs
= gen12_emit_aux_table_inv(ce
->engine
, cs
);
1377 /* Wa_16014892111 */
1378 if (IS_MTL_GRAPHICS_STEP(ce
->engine
->i915
, M
, STEP_A0
, STEP_B0
) ||
1379 IS_MTL_GRAPHICS_STEP(ce
->engine
->i915
, P
, STEP_A0
, STEP_B0
) ||
1380 IS_DG2(ce
->engine
->i915
))
1381 cs
= dg2_emit_draw_watermark_setting(cs
);
1387 gen12_emit_indirect_ctx_xcs(const struct intel_context
*ce
, u32
*cs
)
1389 cs
= gen12_emit_timestamp_wa(ce
, cs
);
1390 cs
= gen12_emit_restore_scratch(ce
, cs
);
1392 /* Wa_16013000631:dg2 */
1393 if (IS_DG2_GRAPHICS_STEP(ce
->engine
->i915
, G10
, STEP_B0
, STEP_C0
) ||
1394 IS_DG2_G11(ce
->engine
->i915
))
1395 if (ce
->engine
->class == COMPUTE_CLASS
)
1396 cs
= gen8_emit_pipe_control(cs
,
1397 PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE
,
1400 return gen12_emit_aux_table_inv(ce
->engine
, cs
);
1404 setup_indirect_ctx_bb(const struct intel_context
*ce
,
1405 const struct intel_engine_cs
*engine
,
1406 u32
*(*emit
)(const struct intel_context
*, u32
*))
1408 u32
* const start
= context_indirect_bb(ce
);
1411 cs
= emit(ce
, start
);
1412 GEM_BUG_ON(cs
- start
> I915_GTT_PAGE_SIZE
/ sizeof(*cs
));
1413 while ((unsigned long)cs
% CACHELINE_BYTES
)
1416 GEM_BUG_ON(cs
- start
> DG2_PREDICATE_RESULT_BB
/ sizeof(*start
));
1417 setup_predicate_disable_wa(ce
, start
+ DG2_PREDICATE_RESULT_BB
/ sizeof(*start
));
1419 lrc_setup_indirect_ctx(ce
->lrc_reg_state
, engine
,
1420 lrc_indirect_bb(ce
),
1421 (cs
- start
) * sizeof(*cs
));
1425 * The context descriptor encodes various attributes of a context,
1426 * including its GTT address and some flags. Because it's fairly
1427 * expensive to calculate, we'll just do it once and cache the result,
1428 * which remains valid until the context is unpinned.
1430 * This is what a descriptor looks like, from LSB to MSB::
1432 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
1433 * bits 12-31: LRCA, GTT address of (the HWSP of) this context
1434 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
1435 * bits 53-54: mbz, reserved for use by hardware
1436 * bits 55-63: group ID, currently unused and set to 0
1438 * Starting from Gen11, the upper dword of the descriptor has a new format:
1440 * bits 32-36: reserved
1441 * bits 37-47: SW context ID
1442 * bits 48:53: engine instance
1443 * bit 54: mbz, reserved for use by hardware
1444 * bits 55-60: SW counter
1445 * bits 61-63: engine class
1447 * On Xe_HP, the upper dword of the descriptor has a new format:
1449 * bits 32-37: virtual function number
1450 * bit 38: mbz, reserved for use by hardware
1451 * bits 39-54: SW context ID
1452 * bits 55-57: reserved
1453 * bits 58-63: SW counter
1455 * engine info, SW context ID and SW counter need to form a unique number
1456 * (Context ID) per lrc.
1458 static u32
lrc_descriptor(const struct intel_context
*ce
)
1462 desc
= INTEL_LEGACY_32B_CONTEXT
;
1463 if (i915_vm_is_4lvl(ce
->vm
))
1464 desc
= INTEL_LEGACY_64B_CONTEXT
;
1465 desc
<<= GEN8_CTX_ADDRESSING_MODE_SHIFT
;
1467 desc
|= GEN8_CTX_VALID
| GEN8_CTX_PRIVILEGE
;
1468 if (GRAPHICS_VER(ce
->vm
->i915
) == 8)
1469 desc
|= GEN8_CTX_L3LLC_COHERENT
;
1471 return i915_ggtt_offset(ce
->state
) | desc
;
1474 u32
lrc_update_regs(const struct intel_context
*ce
,
1475 const struct intel_engine_cs
*engine
,
1478 struct intel_ring
*ring
= ce
->ring
;
1479 u32
*regs
= ce
->lrc_reg_state
;
1481 GEM_BUG_ON(!intel_ring_offset_valid(ring
, head
));
1482 GEM_BUG_ON(!intel_ring_offset_valid(ring
, ring
->tail
));
1484 regs
[CTX_RING_START
] = i915_ggtt_offset(ring
->vma
);
1485 regs
[CTX_RING_HEAD
] = head
;
1486 regs
[CTX_RING_TAIL
] = ring
->tail
;
1487 regs
[CTX_RING_CTL
] = RING_CTL_SIZE(ring
->size
) | RING_VALID
;
1490 if (engine
->class == RENDER_CLASS
) {
1491 regs
[CTX_R_PWR_CLK_STATE
] =
1492 intel_sseu_make_rpcs(engine
->gt
, &ce
->sseu
);
1494 i915_oa_init_reg_state(ce
, engine
);
1497 if (ce
->wa_bb_page
) {
1498 u32
*(*fn
)(const struct intel_context
*ce
, u32
*cs
);
1500 fn
= gen12_emit_indirect_ctx_xcs
;
1501 if (ce
->engine
->class == RENDER_CLASS
)
1502 fn
= gen12_emit_indirect_ctx_rcs
;
1504 /* Mutually exclusive wrt to global indirect bb */
1505 GEM_BUG_ON(engine
->wa_ctx
.indirect_ctx
.size
);
1506 setup_indirect_ctx_bb(ce
, engine
, fn
);
1509 return lrc_descriptor(ce
) | CTX_DESC_FORCE_RESTORE
;
1512 void lrc_update_offsets(struct intel_context
*ce
,
1513 struct intel_engine_cs
*engine
)
1515 set_offsets(ce
->lrc_reg_state
, reg_offsets(engine
), engine
, false);
1518 void lrc_check_regs(const struct intel_context
*ce
,
1519 const struct intel_engine_cs
*engine
,
1522 const struct intel_ring
*ring
= ce
->ring
;
1523 u32
*regs
= ce
->lrc_reg_state
;
1527 if (regs
[CTX_RING_START
] != i915_ggtt_offset(ring
->vma
)) {
1528 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1530 regs
[CTX_RING_START
],
1531 i915_ggtt_offset(ring
->vma
));
1532 regs
[CTX_RING_START
] = i915_ggtt_offset(ring
->vma
);
1536 if ((regs
[CTX_RING_CTL
] & ~(RING_WAIT
| RING_WAIT_SEMAPHORE
)) !=
1537 (RING_CTL_SIZE(ring
->size
) | RING_VALID
)) {
1538 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1541 (u32
)(RING_CTL_SIZE(ring
->size
) | RING_VALID
));
1542 regs
[CTX_RING_CTL
] = RING_CTL_SIZE(ring
->size
) | RING_VALID
;
1546 x
= lrc_ring_mi_mode(engine
);
1547 if (x
!= -1 && regs
[x
+ 1] & (regs
[x
+ 1] >> 16) & STOP_RING
) {
1548 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1549 engine
->name
, regs
[x
+ 1]);
1550 regs
[x
+ 1] &= ~STOP_RING
;
1551 regs
[x
+ 1] |= STOP_RING
<< 16;
1555 WARN_ONCE(!valid
, "Invalid lrc state found %s submission\n", when
);
1559 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1560 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1561 * but there is a slight complication as this is applied in WA batch where the
1562 * values are only initialized once so we cannot take register value at the
1563 * beginning and reuse it further; hence we save its value to memory, upload a
1564 * constant value with bit21 set and then we restore it back with the saved value.
1565 * To simplify the WA, a constant value is formed by using the default value
1566 * of this register. This shouldn't be a problem because we are only modifying
1567 * it for a short period and this batch in non-premptible. We can ofcourse
1568 * use additional instructions that read the actual value of the register
1569 * at that time and set our bit of interest but it makes the WA complicated.
1571 * This WA is also required for Gen9 so extracting as a function avoids
1575 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs
*engine
, u32
*batch
)
1577 /* NB no one else is allowed to scribble over scratch + 256! */
1578 *batch
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_SRM_LRM_GLOBAL_GTT
;
1579 *batch
++ = i915_mmio_reg_offset(GEN8_L3SQCREG4
);
1580 *batch
++ = intel_gt_scratch_offset(engine
->gt
,
1581 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA
);
1584 *batch
++ = MI_LOAD_REGISTER_IMM(1);
1585 *batch
++ = i915_mmio_reg_offset(GEN8_L3SQCREG4
);
1586 *batch
++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES
;
1588 batch
= gen8_emit_pipe_control(batch
,
1589 PIPE_CONTROL_CS_STALL
|
1590 PIPE_CONTROL_DC_FLUSH_ENABLE
,
1593 *batch
++ = MI_LOAD_REGISTER_MEM_GEN8
| MI_SRM_LRM_GLOBAL_GTT
;
1594 *batch
++ = i915_mmio_reg_offset(GEN8_L3SQCREG4
);
1595 *batch
++ = intel_gt_scratch_offset(engine
->gt
,
1596 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA
);
1603 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1604 * initialized at the beginning and shared across all contexts but this field
1605 * helps us to have multiple batches at different offsets and select them based
1606 * on a criteria. At the moment this batch always start at the beginning of the page
1607 * and at this point we don't have multiple wa_ctx batch buffers.
1609 * The number of WA applied are not known at the beginning; we use this field
1610 * to return the no of DWORDS written.
1612 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1613 * so it adds NOOPs as padding to make it cacheline aligned.
1614 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1615 * makes a complete batch buffer.
1617 static u32
*gen8_init_indirectctx_bb(struct intel_engine_cs
*engine
, u32
*batch
)
1619 /* WaDisableCtxRestoreArbitration:bdw,chv */
1620 *batch
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
1622 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1623 if (IS_BROADWELL(engine
->i915
))
1624 batch
= gen8_emit_flush_coherentl3_wa(engine
, batch
);
1626 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1627 /* Actual scratch location is at 128 bytes offset */
1628 batch
= gen8_emit_pipe_control(batch
,
1629 PIPE_CONTROL_FLUSH_L3
|
1630 PIPE_CONTROL_STORE_DATA_INDEX
|
1631 PIPE_CONTROL_CS_STALL
|
1632 PIPE_CONTROL_QW_WRITE
,
1633 LRC_PPHWSP_SCRATCH_ADDR
);
1635 *batch
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
1637 /* Pad to end of cacheline */
1638 while ((unsigned long)batch
% CACHELINE_BYTES
)
1642 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1643 * execution depends on the length specified in terms of cache lines
1644 * in the register CTX_RCS_INDIRECT_CTX
1655 static u32
*emit_lri(u32
*batch
, const struct lri
*lri
, unsigned int count
)
1657 GEM_BUG_ON(!count
|| count
> 63);
1659 *batch
++ = MI_LOAD_REGISTER_IMM(count
);
1661 *batch
++ = i915_mmio_reg_offset(lri
->reg
);
1662 *batch
++ = lri
->value
;
1663 } while (lri
++, --count
);
1669 static u32
*gen9_init_indirectctx_bb(struct intel_engine_cs
*engine
, u32
*batch
)
1671 static const struct lri lri
[] = {
1672 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1674 COMMON_SLICE_CHICKEN2
,
1675 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE
,
1682 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX
,
1683 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX
),
1689 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX
,
1690 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX
),
1694 *batch
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
1696 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1697 batch
= gen8_emit_flush_coherentl3_wa(engine
, batch
);
1699 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1700 batch
= gen8_emit_pipe_control(batch
,
1701 PIPE_CONTROL_FLUSH_L3
|
1702 PIPE_CONTROL_STORE_DATA_INDEX
|
1703 PIPE_CONTROL_CS_STALL
|
1704 PIPE_CONTROL_QW_WRITE
,
1705 LRC_PPHWSP_SCRATCH_ADDR
);
1707 batch
= emit_lri(batch
, lri
, ARRAY_SIZE(lri
));
1709 /* WaMediaPoolStateCmdInWABB:bxt,glk */
1710 if (HAS_POOLED_EU(engine
->i915
)) {
1712 * EU pool configuration is setup along with golden context
1713 * during context initialization. This value depends on
1714 * device type (2x6 or 3x6) and needs to be updated based
1715 * on which subslice is disabled especially for 2x6
1716 * devices, however it is safe to load default
1717 * configuration of 3x6 device instead of masking off
1718 * corresponding bits because HW ignores bits of a disabled
1719 * subslice and drops down to appropriate config. Please
1720 * see render_state_setup() in i915_gem_render_state.c for
1721 * possible configurations, to avoid duplication they are
1722 * not shown here again.
1724 *batch
++ = GEN9_MEDIA_POOL_STATE
;
1725 *batch
++ = GEN9_MEDIA_POOL_ENABLE
;
1726 *batch
++ = 0x00777000;
1732 *batch
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
1734 /* Pad to end of cacheline */
1735 while ((unsigned long)batch
% CACHELINE_BYTES
)
1741 #define CTX_WA_BB_SIZE (PAGE_SIZE)
1743 static int lrc_create_wa_ctx(struct intel_engine_cs
*engine
)
1745 struct drm_i915_gem_object
*obj
;
1746 struct i915_vma
*vma
;
1749 obj
= i915_gem_object_create_shmem(engine
->i915
, CTX_WA_BB_SIZE
);
1751 return PTR_ERR(obj
);
1753 vma
= i915_vma_instance(obj
, &engine
->gt
->ggtt
->vm
, NULL
);
1759 engine
->wa_ctx
.vma
= vma
;
1763 i915_gem_object_put(obj
);
1767 void lrc_fini_wa_ctx(struct intel_engine_cs
*engine
)
1769 i915_vma_unpin_and_release(&engine
->wa_ctx
.vma
, 0);
1772 typedef u32
*(*wa_bb_func_t
)(struct intel_engine_cs
*engine
, u32
*batch
);
1774 void lrc_init_wa_ctx(struct intel_engine_cs
*engine
)
1776 struct i915_ctx_workarounds
*wa_ctx
= &engine
->wa_ctx
;
1777 struct i915_wa_ctx_bb
*wa_bb
[] = {
1778 &wa_ctx
->indirect_ctx
, &wa_ctx
->per_ctx
1780 wa_bb_func_t wa_bb_fn
[ARRAY_SIZE(wa_bb
)];
1781 struct i915_gem_ww_ctx ww
;
1782 void *batch
, *batch_ptr
;
1786 if (GRAPHICS_VER(engine
->i915
) >= 11 ||
1787 !(engine
->flags
& I915_ENGINE_HAS_RCS_REG_STATE
))
1790 if (GRAPHICS_VER(engine
->i915
) == 9) {
1791 wa_bb_fn
[0] = gen9_init_indirectctx_bb
;
1793 } else if (GRAPHICS_VER(engine
->i915
) == 8) {
1794 wa_bb_fn
[0] = gen8_init_indirectctx_bb
;
1798 err
= lrc_create_wa_ctx(engine
);
1801 * We continue even if we fail to initialize WA batch
1802 * because we only expect rare glitches but nothing
1803 * critical to prevent us from using GPU
1805 drm_err(&engine
->i915
->drm
,
1806 "Ignoring context switch w/a allocation error:%d\n",
1811 if (!engine
->wa_ctx
.vma
)
1814 i915_gem_ww_ctx_init(&ww
, true);
1816 err
= i915_gem_object_lock(wa_ctx
->vma
->obj
, &ww
);
1818 err
= i915_ggtt_pin(wa_ctx
->vma
, &ww
, 0, PIN_HIGH
);
1822 batch
= i915_gem_object_pin_map(wa_ctx
->vma
->obj
, I915_MAP_WB
);
1823 if (IS_ERR(batch
)) {
1824 err
= PTR_ERR(batch
);
1829 * Emit the two workaround batch buffers, recording the offset from the
1830 * start of the workaround batch buffer object for each and their
1834 for (i
= 0; i
< ARRAY_SIZE(wa_bb_fn
); i
++) {
1835 wa_bb
[i
]->offset
= batch_ptr
- batch
;
1836 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb
[i
]->offset
,
1837 CACHELINE_BYTES
))) {
1842 batch_ptr
= wa_bb_fn
[i
](engine
, batch_ptr
);
1843 wa_bb
[i
]->size
= batch_ptr
- (batch
+ wa_bb
[i
]->offset
);
1845 GEM_BUG_ON(batch_ptr
- batch
> CTX_WA_BB_SIZE
);
1847 __i915_gem_object_flush_map(wa_ctx
->vma
->obj
, 0, batch_ptr
- batch
);
1848 __i915_gem_object_release_map(wa_ctx
->vma
->obj
);
1850 /* Verify that we can handle failure to setup the wa_ctx */
1852 err
= i915_inject_probe_error(engine
->i915
, -ENODEV
);
1856 i915_vma_unpin(wa_ctx
->vma
);
1858 if (err
== -EDEADLK
) {
1859 err
= i915_gem_ww_ctx_backoff(&ww
);
1863 i915_gem_ww_ctx_fini(&ww
);
1866 i915_vma_put(engine
->wa_ctx
.vma
);
1868 /* Clear all flags to prevent further use */
1869 memset(wa_ctx
, 0, sizeof(*wa_ctx
));
1873 static void st_runtime_underflow(struct intel_context_stats
*stats
, s32 dt
)
1875 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1876 stats
->runtime
.num_underflow
++;
1877 stats
->runtime
.max_underflow
=
1878 max_t(u32
, stats
->runtime
.max_underflow
, -dt
);
1882 static u32
lrc_get_runtime(const struct intel_context
*ce
)
1885 * We can use either ppHWSP[16] which is recorded before the context
1886 * switch (and so excludes the cost of context switches) or use the
1887 * value from the context image itself, which is saved/restored earlier
1888 * and so includes the cost of the save.
1890 return READ_ONCE(ce
->lrc_reg_state
[CTX_TIMESTAMP
]);
1893 void lrc_update_runtime(struct intel_context
*ce
)
1895 struct intel_context_stats
*stats
= &ce
->stats
;
1899 old
= stats
->runtime
.last
;
1900 stats
->runtime
.last
= lrc_get_runtime(ce
);
1901 dt
= stats
->runtime
.last
- old
;
1905 if (unlikely(dt
< 0)) {
1906 CE_TRACE(ce
, "runtime underflow: last=%u, new=%u, delta=%d\n",
1907 old
, stats
->runtime
.last
, dt
);
1908 st_runtime_underflow(stats
, dt
);
1912 ewma_runtime_add(&stats
->runtime
.avg
, dt
);
1913 stats
->runtime
.total
+= dt
;
1916 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1917 #include "selftest_lrc.c"