]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
8187a2b7 ZN |
2 | #ifndef _INTEL_RINGBUFFER_H_ |
3 | #define _INTEL_RINGBUFFER_H_ | |
4 | ||
d78aa650 DV |
5 | #include <drm/drm_util.h> |
6 | ||
44e895a8 | 7 | #include <linux/hashtable.h> |
741258cd | 8 | #include <linux/seqlock.h> |
e61e0f51 | 9 | |
06fbca71 | 10 | #include "i915_gem_batch_pool.h" |
e61e0f51 | 11 | |
c080363f | 12 | #include "i915_reg.h" |
b46a33e2 | 13 | #include "i915_pmu.h" |
e61e0f51 | 14 | #include "i915_request.h" |
f97fbf96 | 15 | #include "i915_selftest.h" |
a89d1f92 | 16 | #include "i915_timeline.h" |
c080363f | 17 | #include "intel_gpu_commands.h" |
90098efa | 18 | #include "intel_workarounds.h" |
44e895a8 | 19 | |
f636edb2 | 20 | struct drm_printer; |
b7268c5e | 21 | struct i915_sched_attr; |
f636edb2 | 22 | |
44e895a8 BV |
23 | #define I915_CMD_HASH_ORDER 9 |
24 | ||
4712274c OM |
25 | /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, |
26 | * but keeps the logic simple. Indeed, the whole purpose of this macro is just | |
27 | * to give some inclination as to some of the magic values used in the various | |
28 | * workarounds! | |
29 | */ | |
30 | #define CACHELINE_BYTES 64 | |
17ee950d | 31 | #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) |
4712274c | 32 | |
57e88531 CW |
33 | struct intel_hw_status_page { |
34 | struct i915_vma *vma; | |
35 | u32 *page_addr; | |
36 | u32 ggtt_offset; | |
8187a2b7 ZN |
37 | }; |
38 | ||
bbdc070a DG |
39 | #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) |
40 | #define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) | |
cae5852d | 41 | |
bbdc070a DG |
42 | #define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) |
43 | #define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) | |
cae5852d | 44 | |
bbdc070a DG |
45 | #define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) |
46 | #define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) | |
cae5852d | 47 | |
bbdc070a DG |
48 | #define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) |
49 | #define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) | |
cae5852d | 50 | |
bbdc070a DG |
51 | #define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) |
52 | #define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) | |
870e86dd | 53 | |
bbdc070a DG |
54 | #define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) |
55 | #define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) | |
e9fea574 | 56 | |
3e78998a BW |
57 | /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to |
58 | * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. | |
59 | */ | |
7e37f889 | 60 | enum intel_engine_hangcheck_action { |
3fe3b030 MK |
61 | ENGINE_IDLE = 0, |
62 | ENGINE_WAIT, | |
63 | ENGINE_ACTIVE_SEQNO, | |
64 | ENGINE_ACTIVE_HEAD, | |
65 | ENGINE_ACTIVE_SUBUNITS, | |
66 | ENGINE_WAIT_KICK, | |
67 | ENGINE_DEAD, | |
f2f4d82f | 68 | }; |
ad8beaea | 69 | |
3fe3b030 MK |
70 | static inline const char * |
71 | hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) | |
72 | { | |
73 | switch (a) { | |
74 | case ENGINE_IDLE: | |
75 | return "idle"; | |
76 | case ENGINE_WAIT: | |
77 | return "wait"; | |
78 | case ENGINE_ACTIVE_SEQNO: | |
79 | return "active seqno"; | |
80 | case ENGINE_ACTIVE_HEAD: | |
81 | return "active head"; | |
82 | case ENGINE_ACTIVE_SUBUNITS: | |
83 | return "active subunits"; | |
84 | case ENGINE_WAIT_KICK: | |
85 | return "wait kick"; | |
86 | case ENGINE_DEAD: | |
87 | return "dead"; | |
88 | } | |
89 | ||
90 | return "unknown"; | |
91 | } | |
b6b0fac0 | 92 | |
f9e61372 | 93 | #define I915_MAX_SLICES 3 |
d3d57927 | 94 | #define I915_MAX_SUBSLICES 8 |
f9e61372 BW |
95 | |
96 | #define instdone_slice_mask(dev_priv__) \ | |
97 | (INTEL_GEN(dev_priv__) == 7 ? \ | |
98 | 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask) | |
99 | ||
100 | #define instdone_subslice_mask(dev_priv__) \ | |
101 | (INTEL_GEN(dev_priv__) == 7 ? \ | |
8cc76693 | 102 | 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) |
f9e61372 BW |
103 | |
104 | #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ | |
105 | for ((slice__) = 0, (subslice__) = 0; \ | |
106 | (slice__) < I915_MAX_SLICES; \ | |
107 | (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ | |
108 | (slice__) += ((subslice__) == 0)) \ | |
109 | for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ | |
110 | (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) | |
111 | ||
d636951e BW |
112 | struct intel_instdone { |
113 | u32 instdone; | |
114 | /* The following exist only in the RCS engine */ | |
115 | u32 slice_common; | |
f9e61372 BW |
116 | u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; |
117 | u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; | |
d636951e BW |
118 | }; |
119 | ||
7e37f889 | 120 | struct intel_engine_hangcheck { |
50877445 | 121 | u64 acthd; |
92cab734 | 122 | u32 seqno; |
7e37f889 | 123 | enum intel_engine_hangcheck_action action; |
3fe3b030 | 124 | unsigned long action_timestamp; |
4be17381 | 125 | int deadlock; |
d636951e | 126 | struct intel_instdone instdone; |
e61e0f51 | 127 | struct i915_request *active_request; |
1fd00c0f CW |
128 | bool stalled:1; |
129 | bool wedged:1; | |
92cab734 MK |
130 | }; |
131 | ||
7e37f889 | 132 | struct intel_ring { |
0eb973d3 | 133 | struct i915_vma *vma; |
57e88531 | 134 | void *vaddr; |
8ee14975 | 135 | |
a89d1f92 | 136 | struct i915_timeline *timeline; |
675d9ad7 | 137 | struct list_head request_list; |
643b450a | 138 | struct list_head active_link; |
675d9ad7 | 139 | |
8ee14975 OM |
140 | u32 head; |
141 | u32 tail; | |
e6ba9992 | 142 | u32 emit; |
eca56a35 | 143 | |
605d5b32 CW |
144 | u32 space; |
145 | u32 size; | |
146 | u32 effective_size; | |
8ee14975 OM |
147 | }; |
148 | ||
e2efd130 | 149 | struct i915_gem_context; |
361b027b | 150 | struct drm_i915_reg_table; |
21076372 | 151 | |
17ee950d AS |
152 | /* |
153 | * we use a single page to load ctx workarounds so all of these | |
154 | * values are referred in terms of dwords | |
155 | * | |
156 | * struct i915_wa_ctx_bb: | |
157 | * offset: specifies batch starting position, also helpful in case | |
158 | * if we want to have multiple batches at different offsets based on | |
159 | * some criteria. It is not a requirement at the moment but provides | |
160 | * an option for future use. | |
161 | * size: size of the batch in DWORDS | |
162 | */ | |
48bb74e4 | 163 | struct i915_ctx_workarounds { |
17ee950d AS |
164 | struct i915_wa_ctx_bb { |
165 | u32 offset; | |
166 | u32 size; | |
167 | } indirect_ctx, per_ctx; | |
48bb74e4 | 168 | struct i915_vma *vma; |
17ee950d AS |
169 | }; |
170 | ||
e61e0f51 | 171 | struct i915_request; |
c81d4613 | 172 | |
022d3093 TU |
173 | #define I915_MAX_VCS 4 |
174 | #define I915_MAX_VECS 2 | |
175 | ||
237ae7c7 MW |
176 | /* |
177 | * Engine IDs definitions. | |
178 | * Keep instances of the same type engine together. | |
179 | */ | |
180 | enum intel_engine_id { | |
181 | RCS = 0, | |
182 | BCS, | |
183 | VCS, | |
184 | VCS2, | |
022d3093 TU |
185 | VCS3, |
186 | VCS4, | |
237ae7c7 | 187 | #define _VCS(n) (VCS + (n)) |
022d3093 TU |
188 | VECS, |
189 | VECS2 | |
190 | #define _VECS(n) (VECS + (n)) | |
237ae7c7 MW |
191 | }; |
192 | ||
6c067579 CW |
193 | struct i915_priolist { |
194 | struct rb_node node; | |
195 | struct list_head requests; | |
196 | int priority; | |
197 | }; | |
198 | ||
0f6b79fa CW |
199 | struct st_preempt_hang { |
200 | struct completion completion; | |
201 | bool inject_hang; | |
202 | }; | |
203 | ||
b620e870 MK |
204 | /** |
205 | * struct intel_engine_execlists - execlist submission queue and port state | |
206 | * | |
207 | * The struct intel_engine_execlists represents the combined logical state of | |
208 | * driver and the hardware state for execlist mode of submission. | |
209 | */ | |
210 | struct intel_engine_execlists { | |
211 | /** | |
c6dce8f1 | 212 | * @tasklet: softirq tasklet for bottom handler |
b620e870 | 213 | */ |
c6dce8f1 | 214 | struct tasklet_struct tasklet; |
b620e870 MK |
215 | |
216 | /** | |
217 | * @default_priolist: priority list for I915_PRIORITY_NORMAL | |
218 | */ | |
219 | struct i915_priolist default_priolist; | |
220 | ||
221 | /** | |
222 | * @no_priolist: priority lists disabled | |
223 | */ | |
224 | bool no_priolist; | |
225 | ||
2fc7a06a | 226 | /** |
05f0addd TD |
227 | * @submit_reg: gen-specific execlist submission register |
228 | * set to the ExecList Submission Port (elsp) register pre-Gen11 and to | |
229 | * the ExecList Submission Queue Contents register array for Gen11+ | |
2fc7a06a | 230 | */ |
05f0addd TD |
231 | u32 __iomem *submit_reg; |
232 | ||
233 | /** | |
234 | * @ctrl_reg: the enhanced execlists control register, used to load the | |
235 | * submit queue on the HW and to request preemptions to idle | |
236 | */ | |
237 | u32 __iomem *ctrl_reg; | |
2fc7a06a | 238 | |
b620e870 MK |
239 | /** |
240 | * @port: execlist port states | |
241 | * | |
242 | * For each hardware ELSP (ExecList Submission Port) we keep | |
243 | * track of the last request and the number of times we submitted | |
244 | * that port to hw. We then count the number of times the hw reports | |
245 | * a context completion or preemption. As only one context can | |
246 | * be active on hw, we limit resubmission of context to port[0]. This | |
247 | * is called Lite Restore, of the context. | |
248 | */ | |
249 | struct execlist_port { | |
250 | /** | |
251 | * @request_count: combined request and submission count | |
252 | */ | |
e61e0f51 | 253 | struct i915_request *request_count; |
b620e870 MK |
254 | #define EXECLIST_COUNT_BITS 2 |
255 | #define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) | |
256 | #define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) | |
257 | #define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) | |
258 | #define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) | |
259 | #define port_set(p, packed) ((p)->request_count = (packed)) | |
260 | #define port_isset(p) ((p)->request_count) | |
7a62cc61 | 261 | #define port_index(p, execlists) ((p) - (execlists)->port) |
b620e870 MK |
262 | |
263 | /** | |
264 | * @context_id: context ID for port | |
265 | */ | |
266 | GEM_DEBUG_DECL(u32 context_id); | |
76e70087 MK |
267 | |
268 | #define EXECLIST_MAX_PORTS 2 | |
269 | } port[EXECLIST_MAX_PORTS]; | |
270 | ||
beecec90 | 271 | /** |
4a118ecb CW |
272 | * @active: is the HW active? We consider the HW as active after |
273 | * submitting any context for execution and until we have seen the | |
274 | * last context completion event. After that, we do not expect any | |
275 | * more events until we submit, and so can park the HW. | |
276 | * | |
277 | * As we have a small number of different sources from which we feed | |
278 | * the HW, we track the state of each inside a single bitfield. | |
beecec90 | 279 | */ |
4a118ecb CW |
280 | unsigned int active; |
281 | #define EXECLISTS_ACTIVE_USER 0 | |
282 | #define EXECLISTS_ACTIVE_PREEMPT 1 | |
ba74cb10 | 283 | #define EXECLISTS_ACTIVE_HWACK 2 |
beecec90 | 284 | |
76e70087 MK |
285 | /** |
286 | * @port_mask: number of execlist ports - 1 | |
287 | */ | |
288 | unsigned int port_mask; | |
b620e870 | 289 | |
f6322edd CW |
290 | /** |
291 | * @queue_priority: Highest pending priority. | |
292 | * | |
293 | * When we add requests into the queue, or adjust the priority of | |
294 | * executing requests, we compute the maximum priority of those | |
295 | * pending requests. We can then use this value to determine if | |
296 | * we need to preempt the executing requests to service the queue. | |
297 | */ | |
298 | int queue_priority; | |
299 | ||
b620e870 MK |
300 | /** |
301 | * @queue: queue of requests, in priority lists | |
302 | */ | |
655250a8 | 303 | struct rb_root_cached queue; |
b620e870 MK |
304 | |
305 | /** | |
bc4237ec CW |
306 | * @csb_read: control register for Context Switch buffer |
307 | * | |
308 | * Note this register is always in mmio. | |
b620e870 | 309 | */ |
bc4237ec | 310 | u32 __iomem *csb_read; |
b620e870 MK |
311 | |
312 | /** | |
bc4237ec CW |
313 | * @csb_write: control register for Context Switch buffer |
314 | * | |
315 | * Note this register may be either mmio or HWSP shadow. | |
b620e870 | 316 | */ |
bc4237ec | 317 | u32 *csb_write; |
b620e870 MK |
318 | |
319 | /** | |
bc4237ec CW |
320 | * @csb_status: status array for Context Switch buffer |
321 | * | |
322 | * Note these register may be either mmio or HWSP shadow. | |
b620e870 | 323 | */ |
bc4237ec | 324 | u32 *csb_status; |
d6376374 CW |
325 | |
326 | /** | |
327 | * @preempt_complete_status: expected CSB upon completing preemption | |
328 | */ | |
329 | u32 preempt_complete_status; | |
bc4237ec | 330 | |
f4b58f04 CW |
331 | /** |
332 | * @csb_write_reset: reset value for CSB write pointer | |
333 | * | |
334 | * As the CSB write pointer maybe either in HWSP or as a field | |
335 | * inside an mmio register, we want to reprogram it slightly | |
336 | * differently to avoid later confusion. | |
337 | */ | |
338 | u32 csb_write_reset; | |
339 | ||
bc4237ec CW |
340 | /** |
341 | * @csb_head: context status buffer head | |
342 | */ | |
343 | u8 csb_head; | |
0f6b79fa CW |
344 | |
345 | I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) | |
b620e870 MK |
346 | }; |
347 | ||
6e516148 OM |
348 | #define INTEL_ENGINE_CS_MAX_NAME 8 |
349 | ||
c033666a CW |
350 | struct intel_engine_cs { |
351 | struct drm_i915_private *i915; | |
6e516148 | 352 | char name[INTEL_ENGINE_CS_MAX_NAME]; |
1803fcbc | 353 | |
237ae7c7 | 354 | enum intel_engine_id id; |
237ae7c7 | 355 | unsigned int hw_id; |
63ffbcda | 356 | unsigned int guc_id; |
0908180b | 357 | |
1803fcbc TU |
358 | u8 uabi_id; |
359 | u8 uabi_class; | |
360 | ||
0908180b DCS |
361 | u8 class; |
362 | u8 instance; | |
63ffbcda JL |
363 | u32 context_size; |
364 | u32 mmio_base; | |
63ffbcda | 365 | |
7e37f889 | 366 | struct intel_ring *buffer; |
a89d1f92 CW |
367 | |
368 | struct i915_timeline timeline; | |
8187a2b7 | 369 | |
d2b4b979 | 370 | struct drm_i915_gem_object *default_state; |
fe0c4935 | 371 | void *pinned_default_state; |
4e50f082 | 372 | |
538b257d CW |
373 | unsigned long irq_posted; |
374 | #define ENGINE_IRQ_BREADCRUMB 0 | |
375 | ||
688e6c72 CW |
376 | /* Rather than have every client wait upon all user interrupts, |
377 | * with the herd waking after every interrupt and each doing the | |
378 | * heavyweight seqno dance, we delegate the task (of being the | |
379 | * bottom-half of the user interrupt) to the first client. After | |
380 | * every interrupt, we wake up one client, who does the heavyweight | |
381 | * coherent seqno read and either goes back to sleep (if incomplete), | |
382 | * or wakes up all the completed clients in parallel, before then | |
383 | * transferring the bottom-half status to the next client in the queue. | |
384 | * | |
385 | * Compared to walking the entire list of waiters in a single dedicated | |
386 | * bottom-half, we reduce the latency of the first waiter by avoiding | |
387 | * a context switch, but incur additional coherent seqno reads when | |
388 | * following the chain of request breadcrumbs. Since it is most likely | |
389 | * that we have a single client waiting on each seqno, then reducing | |
390 | * the overhead of waking that client is much preferred. | |
391 | */ | |
392 | struct intel_breadcrumbs { | |
61d3dc70 CW |
393 | spinlock_t irq_lock; /* protects irq_*; irqsafe */ |
394 | struct intel_wait *irq_wait; /* oldest waiter by retirement */ | |
395 | ||
396 | spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ | |
688e6c72 | 397 | struct rb_root waiters; /* sorted by retirement, priority */ |
cd46c545 | 398 | struct list_head signals; /* sorted by retirement */ |
c81d4613 | 399 | struct task_struct *signaler; /* used for fence signalling */ |
cd46c545 | 400 | |
688e6c72 | 401 | struct timer_list fake_irq; /* used after a missed interrupt */ |
83348ba8 CW |
402 | struct timer_list hangcheck; /* detect missed interrupts */ |
403 | ||
2246bea6 | 404 | unsigned int hangcheck_interrupts; |
bcbd5c33 | 405 | unsigned int irq_enabled; |
78796877 | 406 | unsigned int irq_count; |
aca34b6e | 407 | |
67b807a8 | 408 | bool irq_armed : 1; |
f97fbf96 | 409 | I915_SELFTEST_DECLARE(bool mock : 1); |
688e6c72 CW |
410 | } breadcrumbs; |
411 | ||
b46a33e2 TU |
412 | struct { |
413 | /** | |
414 | * @enable: Bitmask of enable sample events on this engine. | |
415 | * | |
416 | * Bits correspond to sample event types, for instance | |
417 | * I915_SAMPLE_QUEUED is bit 0 etc. | |
418 | */ | |
419 | u32 enable; | |
420 | /** | |
421 | * @enable_count: Reference count for the enabled samplers. | |
422 | * | |
423 | * Index number corresponds to the bit number from @enable. | |
424 | */ | |
425 | unsigned int enable_count[I915_PMU_SAMPLE_BITS]; | |
426 | /** | |
427 | * @sample: Counter values for sampling events. | |
428 | * | |
429 | * Our internal timer stores the current counters in this field. | |
430 | */ | |
b552ae44 | 431 | #define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) |
b46a33e2 TU |
432 | struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; |
433 | } pmu; | |
434 | ||
06fbca71 CW |
435 | /* |
436 | * A pool of objects to use as shadow copies of client batch buffers | |
437 | * when the command parser is enabled. Prevents the client from | |
438 | * modifying the batch contents after software parsing. | |
439 | */ | |
440 | struct i915_gem_batch_pool batch_pool; | |
441 | ||
8187a2b7 | 442 | struct intel_hw_status_page status_page; |
17ee950d | 443 | struct i915_ctx_workarounds wa_ctx; |
90098efa | 444 | struct i915_wa_list wa_list; |
8187a2b7 | 445 | |
61ff75ac CW |
446 | u32 irq_keep_mask; /* always keep these interrupts */ |
447 | u32 irq_enable_mask; /* bitmask to enable ring interrupt */ | |
38a0f2db DG |
448 | void (*irq_enable)(struct intel_engine_cs *engine); |
449 | void (*irq_disable)(struct intel_engine_cs *engine); | |
8187a2b7 | 450 | |
38a0f2db | 451 | int (*init_hw)(struct intel_engine_cs *engine); |
5adfb772 CW |
452 | |
453 | struct { | |
454 | struct i915_request *(*prepare)(struct intel_engine_cs *engine); | |
455 | void (*reset)(struct intel_engine_cs *engine, | |
456 | struct i915_request *rq); | |
457 | void (*finish)(struct intel_engine_cs *engine); | |
458 | } reset; | |
8187a2b7 | 459 | |
aba5e278 CW |
460 | void (*park)(struct intel_engine_cs *engine); |
461 | void (*unpark)(struct intel_engine_cs *engine); | |
462 | ||
ff44ad51 CW |
463 | void (*set_default_submission)(struct intel_engine_cs *engine); |
464 | ||
1fc44d9b CW |
465 | struct intel_context *(*context_pin)(struct intel_engine_cs *engine, |
466 | struct i915_gem_context *ctx); | |
467 | ||
e61e0f51 CW |
468 | int (*request_alloc)(struct i915_request *rq); |
469 | int (*init_context)(struct i915_request *rq); | |
86d7f238 | 470 | |
e61e0f51 | 471 | int (*emit_flush)(struct i915_request *request, u32 mode); |
ddd66c51 CW |
472 | #define EMIT_INVALIDATE BIT(0) |
473 | #define EMIT_FLUSH BIT(1) | |
474 | #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) | |
e61e0f51 | 475 | int (*emit_bb_start)(struct i915_request *rq, |
ddd66c51 CW |
476 | u64 offset, u32 length, |
477 | unsigned int dispatch_flags); | |
478 | #define I915_DISPATCH_SECURE BIT(0) | |
479 | #define I915_DISPATCH_PINNED BIT(1) | |
e61e0f51 | 480 | void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); |
98f29e8d | 481 | int emit_breadcrumb_sz; |
5590af3e CW |
482 | |
483 | /* Pass the request to the hardware queue (e.g. directly into | |
484 | * the legacy ringbuffer or to the end of an execlist). | |
485 | * | |
486 | * This is called from an atomic context with irqs disabled; must | |
487 | * be irq safe. | |
488 | */ | |
e61e0f51 | 489 | void (*submit_request)(struct i915_request *rq); |
5590af3e | 490 | |
0de9136d CW |
491 | /* Call when the priority on a request has changed and it and its |
492 | * dependencies may need rescheduling. Note the request itself may | |
493 | * not be ready to run! | |
494 | * | |
495 | * Called under the struct_mutex. | |
496 | */ | |
b7268c5e CW |
497 | void (*schedule)(struct i915_request *request, |
498 | const struct i915_sched_attr *attr); | |
0de9136d | 499 | |
27a5f61b CW |
500 | /* |
501 | * Cancel all requests on the hardware, or queued for execution. | |
502 | * This should only cancel the ready requests that have been | |
503 | * submitted to the engine (via the engine->submit_request callback). | |
504 | * This is called when marking the device as wedged. | |
505 | */ | |
506 | void (*cancel_requests)(struct intel_engine_cs *engine); | |
507 | ||
b2eadbc8 CW |
508 | /* Some chipsets are not quite as coherent as advertised and need |
509 | * an expensive kick to force a true read of the up-to-date seqno. | |
510 | * However, the up-to-date seqno is not always required and the last | |
511 | * seen value is good enough. Note that the seqno will always be | |
512 | * monotonic, even if not coherent. | |
513 | */ | |
38a0f2db | 514 | void (*irq_seqno_barrier)(struct intel_engine_cs *engine); |
38a0f2db | 515 | void (*cleanup)(struct intel_engine_cs *engine); |
ebc348b2 | 516 | |
3e78998a BW |
517 | /* GEN8 signal/wait table - never trust comments! |
518 | * signal to signal to signal to signal to signal to | |
519 | * RCS VCS BCS VECS VCS2 | |
520 | * -------------------------------------------------------------------- | |
521 | * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | | |
522 | * |------------------------------------------------------------------- | |
523 | * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | | |
524 | * |------------------------------------------------------------------- | |
525 | * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | | |
526 | * |------------------------------------------------------------------- | |
527 | * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | | |
528 | * |------------------------------------------------------------------- | |
529 | * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | | |
530 | * |------------------------------------------------------------------- | |
531 | * | |
532 | * Generalization: | |
533 | * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) | |
534 | * ie. transpose of g(x, y) | |
535 | * | |
536 | * sync from sync from sync from sync from sync from | |
537 | * RCS VCS BCS VECS VCS2 | |
538 | * -------------------------------------------------------------------- | |
539 | * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | | |
540 | * |------------------------------------------------------------------- | |
541 | * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | | |
542 | * |------------------------------------------------------------------- | |
543 | * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | | |
544 | * |------------------------------------------------------------------- | |
545 | * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | | |
546 | * |------------------------------------------------------------------- | |
547 | * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | | |
548 | * |------------------------------------------------------------------- | |
549 | * | |
550 | * Generalization: | |
551 | * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) | |
552 | * ie. transpose of f(x, y) | |
553 | */ | |
ebc348b2 | 554 | struct { |
318f89ca TU |
555 | #define GEN6_SEMAPHORE_LAST VECS_HW |
556 | #define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) | |
557 | #define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) | |
79e6770c CW |
558 | struct { |
559 | /* our mbox written by others */ | |
560 | u32 wait[GEN6_NUM_SEMAPHORES]; | |
561 | /* mboxes this ring signals to */ | |
562 | i915_reg_t signal[GEN6_NUM_SEMAPHORES]; | |
563 | } mbox; | |
78325f2d BW |
564 | |
565 | /* AKA wait() */ | |
e61e0f51 CW |
566 | int (*sync_to)(struct i915_request *rq, |
567 | struct i915_request *signal); | |
568 | u32 *(*signal)(struct i915_request *rq, u32 *cs); | |
ebc348b2 | 569 | } semaphore; |
ad776f8b | 570 | |
b620e870 | 571 | struct intel_engine_execlists execlists; |
4da46e1e | 572 | |
e8a9c58f CW |
573 | /* Contexts are pinned whilst they are active on the GPU. The last |
574 | * context executed remains active whilst the GPU is idle - the | |
575 | * switch away and write to the context object only occurs on the | |
576 | * next execution. Contexts are only unpinned on retirement of the | |
577 | * following request ensuring that we can always write to the object | |
578 | * on the context switch even after idling. Across suspend, we switch | |
579 | * to the kernel context and trash it as the save may not happen | |
580 | * before the hardware is powered down. | |
581 | */ | |
1fc44d9b | 582 | struct intel_context *last_retired_context; |
e8a9c58f | 583 | |
3fc03069 CD |
584 | /* status_notifier: list of callbacks for context-switch changes */ |
585 | struct atomic_notifier_head context_status_notifier; | |
586 | ||
7e37f889 | 587 | struct intel_engine_hangcheck hangcheck; |
92cab734 | 588 | |
439e2ee4 | 589 | #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) |
cf669b4e | 590 | #define I915_ENGINE_SUPPORTS_STATS BIT(1) |
2a694feb | 591 | #define I915_ENGINE_HAS_PREEMPTION BIT(2) |
439e2ee4 | 592 | unsigned int flags; |
44e895a8 | 593 | |
351e3db2 | 594 | /* |
44e895a8 | 595 | * Table of commands the command parser needs to know about |
33a051a5 | 596 | * for this engine. |
351e3db2 | 597 | */ |
44e895a8 | 598 | DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); |
351e3db2 BV |
599 | |
600 | /* | |
601 | * Table of registers allowed in commands that read/write registers. | |
602 | */ | |
361b027b JJ |
603 | const struct drm_i915_reg_table *reg_tables; |
604 | int reg_table_count; | |
351e3db2 BV |
605 | |
606 | /* | |
607 | * Returns the bitmask for the length field of the specified command. | |
608 | * Return 0 for an unrecognized/invalid command. | |
609 | * | |
33a051a5 | 610 | * If the command parser finds an entry for a command in the engine's |
351e3db2 | 611 | * cmd_tables, it gets the command's length based on the table entry. |
33a051a5 CW |
612 | * If not, it calls this function to determine the per-engine length |
613 | * field encoding for the command (i.e. different opcode ranges use | |
614 | * certain bits to encode the command length in the header). | |
351e3db2 BV |
615 | */ |
616 | u32 (*get_cmd_length_mask)(u32 cmd_header); | |
30e17b78 TU |
617 | |
618 | struct { | |
619 | /** | |
620 | * @lock: Lock protecting the below fields. | |
621 | */ | |
741258cd | 622 | seqlock_t lock; |
30e17b78 TU |
623 | /** |
624 | * @enabled: Reference count indicating number of listeners. | |
625 | */ | |
626 | unsigned int enabled; | |
627 | /** | |
628 | * @active: Number of contexts currently scheduled in. | |
629 | */ | |
630 | unsigned int active; | |
631 | /** | |
632 | * @enabled_at: Timestamp when busy stats were enabled. | |
633 | */ | |
634 | ktime_t enabled_at; | |
635 | /** | |
636 | * @start: Timestamp of the last idle to active transition. | |
637 | * | |
638 | * Idle is defined as active == 0, active is active > 0. | |
639 | */ | |
640 | ktime_t start; | |
641 | /** | |
642 | * @total: Total time this engine was busy. | |
643 | * | |
644 | * Accumulated time not counting the most recent block in cases | |
645 | * where engine is currently busy (active > 0). | |
646 | */ | |
647 | ktime_t total; | |
648 | } stats; | |
8187a2b7 ZN |
649 | }; |
650 | ||
2a694feb CW |
651 | static inline bool |
652 | intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) | |
439e2ee4 TU |
653 | { |
654 | return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; | |
655 | } | |
656 | ||
2a694feb CW |
657 | static inline bool |
658 | intel_engine_supports_stats(const struct intel_engine_cs *engine) | |
cf669b4e TU |
659 | { |
660 | return engine->flags & I915_ENGINE_SUPPORTS_STATS; | |
661 | } | |
662 | ||
2a694feb CW |
663 | static inline bool |
664 | intel_engine_has_preemption(const struct intel_engine_cs *engine) | |
665 | { | |
666 | return engine->flags & I915_ENGINE_HAS_PREEMPTION; | |
667 | } | |
668 | ||
669 | static inline bool __execlists_need_preempt(int prio, int last) | |
670 | { | |
671 | return prio > max(0, last); | |
672 | } | |
673 | ||
4a118ecb CW |
674 | static inline void |
675 | execlists_set_active(struct intel_engine_execlists *execlists, | |
676 | unsigned int bit) | |
677 | { | |
678 | __set_bit(bit, (unsigned long *)&execlists->active); | |
679 | } | |
680 | ||
f2605207 CW |
681 | static inline bool |
682 | execlists_set_active_once(struct intel_engine_execlists *execlists, | |
683 | unsigned int bit) | |
684 | { | |
685 | return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); | |
686 | } | |
687 | ||
4a118ecb CW |
688 | static inline void |
689 | execlists_clear_active(struct intel_engine_execlists *execlists, | |
690 | unsigned int bit) | |
691 | { | |
692 | __clear_bit(bit, (unsigned long *)&execlists->active); | |
693 | } | |
694 | ||
0051163a CW |
695 | static inline void |
696 | execlists_clear_all_active(struct intel_engine_execlists *execlists) | |
697 | { | |
698 | execlists->active = 0; | |
699 | } | |
700 | ||
4a118ecb CW |
701 | static inline bool |
702 | execlists_is_active(const struct intel_engine_execlists *execlists, | |
703 | unsigned int bit) | |
704 | { | |
705 | return test_bit(bit, (unsigned long *)&execlists->active); | |
706 | } | |
707 | ||
f2605207 CW |
708 | void execlists_user_begin(struct intel_engine_execlists *execlists, |
709 | const struct execlist_port *port); | |
710 | void execlists_user_end(struct intel_engine_execlists *execlists); | |
711 | ||
c41937fd MW |
712 | void |
713 | execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); | |
714 | ||
715 | void | |
716 | execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); | |
717 | ||
76e70087 MK |
718 | static inline unsigned int |
719 | execlists_num_ports(const struct intel_engine_execlists * const execlists) | |
720 | { | |
721 | return execlists->port_mask + 1; | |
722 | } | |
723 | ||
f2605207 | 724 | static inline struct execlist_port * |
7a62cc61 MK |
725 | execlists_port_complete(struct intel_engine_execlists * const execlists, |
726 | struct execlist_port * const port) | |
727 | { | |
76e70087 | 728 | const unsigned int m = execlists->port_mask; |
7a62cc61 MK |
729 | |
730 | GEM_BUG_ON(port_index(port, execlists) != 0); | |
4a118ecb | 731 | GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); |
7a62cc61 | 732 | |
76e70087 MK |
733 | memmove(port, port + 1, m * sizeof(struct execlist_port)); |
734 | memset(port + m, 0, sizeof(struct execlist_port)); | |
f2605207 CW |
735 | |
736 | return port; | |
7a62cc61 MK |
737 | } |
738 | ||
59ce1310 | 739 | static inline unsigned int |
67d97da3 | 740 | intel_engine_flag(const struct intel_engine_cs *engine) |
96154f2f | 741 | { |
59ce1310 | 742 | return BIT(engine->id); |
96154f2f DV |
743 | } |
744 | ||
8187a2b7 | 745 | static inline u32 |
3ceda3a4 | 746 | intel_read_status_page(const struct intel_engine_cs *engine, int reg) |
8187a2b7 | 747 | { |
4225d0f2 | 748 | /* Ensure that the compiler doesn't optimize away the load. */ |
5dd8e50c | 749 | return READ_ONCE(engine->status_page.page_addr[reg]); |
8187a2b7 ZN |
750 | } |
751 | ||
b70ec5bf | 752 | static inline void |
9a29dd85 | 753 | intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) |
b70ec5bf | 754 | { |
9a29dd85 CW |
755 | /* Writing into the status page should be done sparingly. Since |
756 | * we do when we are uncertain of the device state, we take a bit | |
757 | * of extra paranoia to try and ensure that the HWS takes the value | |
758 | * we give and that it doesn't end up trapped inside the CPU! | |
759 | */ | |
760 | if (static_cpu_has(X86_FEATURE_CLFLUSH)) { | |
761 | mb(); | |
762 | clflush(&engine->status_page.page_addr[reg]); | |
763 | engine->status_page.page_addr[reg] = value; | |
764 | clflush(&engine->status_page.page_addr[reg]); | |
765 | mb(); | |
766 | } else { | |
767 | WRITE_ONCE(engine->status_page.page_addr[reg], value); | |
768 | } | |
b70ec5bf MK |
769 | } |
770 | ||
e2828914 | 771 | /* |
311bd68e CW |
772 | * Reads a dword out of the status page, which is written to from the command |
773 | * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or | |
774 | * MI_STORE_DATA_IMM. | |
775 | * | |
776 | * The following dwords have a reserved meaning: | |
777 | * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. | |
778 | * 0x04: ring 0 head pointer | |
779 | * 0x05: ring 1 head pointer (915-class) | |
780 | * 0x06: ring 2 head pointer (915-class) | |
781 | * 0x10-0x1b: Context status DWords (GM45) | |
782 | * 0x1f: Last written status offset. (GM45) | |
b07da53c | 783 | * 0x20-0x2f: Reserved (Gen6+) |
311bd68e | 784 | * |
b07da53c | 785 | * The area from dword 0x30 to 0x3ff is available for driver usage. |
311bd68e | 786 | */ |
b07da53c | 787 | #define I915_GEM_HWS_INDEX 0x30 |
7c17d377 | 788 | #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) |
3b8a8a30 MW |
789 | #define I915_GEM_HWS_PREEMPT_INDEX 0x32 |
790 | #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) | |
b07da53c | 791 | #define I915_GEM_HWS_SCRATCH_INDEX 0x40 |
9a289771 | 792 | #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) |
311bd68e | 793 | |
6d2cb5aa | 794 | #define I915_HWS_CSB_BUF0_INDEX 0x10 |
767a983a CW |
795 | #define I915_HWS_CSB_WRITE_INDEX 0x1f |
796 | #define CNL_HWS_CSB_WRITE_INDEX 0x2f | |
6d2cb5aa | 797 | |
7e37f889 | 798 | struct intel_ring * |
65fcb806 | 799 | intel_engine_create_ring(struct intel_engine_cs *engine, |
a89d1f92 | 800 | struct i915_timeline *timeline, |
65fcb806 | 801 | int size); |
5503cb0d | 802 | int intel_ring_pin(struct intel_ring *ring); |
e6ba9992 | 803 | void intel_ring_reset(struct intel_ring *ring, u32 tail); |
95aebcb2 | 804 | unsigned int intel_ring_update_space(struct intel_ring *ring); |
aad29fbb | 805 | void intel_ring_unpin(struct intel_ring *ring); |
7e37f889 | 806 | void intel_ring_free(struct intel_ring *ring); |
84c2377f | 807 | |
7e37f889 CW |
808 | void intel_engine_stop(struct intel_engine_cs *engine); |
809 | void intel_engine_cleanup(struct intel_engine_cs *engine); | |
96f298aa | 810 | |
821ed7df CW |
811 | void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); |
812 | ||
e61e0f51 | 813 | int __must_check intel_ring_cacheline_align(struct i915_request *rq); |
406ea8d2 | 814 | |
fd138212 | 815 | int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); |
e61e0f51 | 816 | u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); |
406ea8d2 | 817 | |
e61e0f51 | 818 | static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) |
09246732 | 819 | { |
8f942018 CW |
820 | /* Dummy function. |
821 | * | |
822 | * This serves as a placeholder in the code so that the reader | |
823 | * can compare against the preceding intel_ring_begin() and | |
824 | * check that the number of dwords emitted matches the space | |
825 | * reserved for the command packet (i.e. the value passed to | |
826 | * intel_ring_begin()). | |
c5efa1ad | 827 | */ |
e61e0f51 | 828 | GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); |
8f942018 CW |
829 | } |
830 | ||
e61e0f51 | 831 | static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) |
450362d3 CW |
832 | { |
833 | return pos & (ring->size - 1); | |
834 | } | |
835 | ||
41d37680 CW |
836 | static inline bool |
837 | intel_ring_offset_valid(const struct intel_ring *ring, | |
838 | unsigned int pos) | |
839 | { | |
840 | if (pos & -ring->size) /* must be strictly within the ring */ | |
841 | return false; | |
842 | ||
843 | if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ | |
844 | return false; | |
845 | ||
846 | return true; | |
847 | } | |
848 | ||
e61e0f51 | 849 | static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) |
8f942018 CW |
850 | { |
851 | /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ | |
e61e0f51 CW |
852 | u32 offset = addr - rq->ring->vaddr; |
853 | GEM_BUG_ON(offset > rq->ring->size); | |
854 | return intel_ring_wrap(rq->ring, offset); | |
09246732 | 855 | } |
406ea8d2 | 856 | |
ed1501d4 CW |
857 | static inline void |
858 | assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) | |
859 | { | |
41d37680 | 860 | GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); |
605d5b32 CW |
861 | |
862 | /* | |
863 | * "Ring Buffer Use" | |
864 | * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 | |
865 | * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 | |
866 | * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 | |
867 | * "If the Ring Buffer Head Pointer and the Tail Pointer are on the | |
868 | * same cacheline, the Head Pointer must not be greater than the Tail | |
869 | * Pointer." | |
870 | * | |
871 | * We use ring->head as the last known location of the actual RING_HEAD, | |
872 | * it may have advanced but in the worst case it is equally the same | |
873 | * as ring->head and so we should never program RING_TAIL to advance | |
874 | * into the same cacheline as ring->head. | |
875 | */ | |
876 | #define cacheline(a) round_down(a, CACHELINE_BYTES) | |
877 | GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && | |
878 | tail < ring->head); | |
879 | #undef cacheline | |
ed1501d4 CW |
880 | } |
881 | ||
e6ba9992 CW |
882 | static inline unsigned int |
883 | intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) | |
884 | { | |
885 | /* Whilst writes to the tail are strictly order, there is no | |
886 | * serialisation between readers and the writers. The tail may be | |
e61e0f51 | 887 | * read by i915_request_retire() just as it is being updated |
e6ba9992 CW |
888 | * by execlists, as although the breadcrumb is complete, the context |
889 | * switch hasn't been seen. | |
890 | */ | |
891 | assert_ring_tail_valid(ring, tail); | |
892 | ring->tail = tail; | |
893 | return tail; | |
894 | } | |
09246732 | 895 | |
73cb9701 | 896 | void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); |
8187a2b7 | 897 | |
019bf277 TU |
898 | void intel_engine_setup_common(struct intel_engine_cs *engine); |
899 | int intel_engine_init_common(struct intel_engine_cs *engine); | |
96a945aa | 900 | void intel_engine_cleanup_common(struct intel_engine_cs *engine); |
019bf277 | 901 | |
8b3e2d36 TU |
902 | int intel_init_render_ring_buffer(struct intel_engine_cs *engine); |
903 | int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); | |
8b3e2d36 TU |
904 | int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); |
905 | int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); | |
8187a2b7 | 906 | |
3f6e9822 | 907 | int intel_engine_stop_cs(struct intel_engine_cs *engine); |
a99b32a6 | 908 | void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); |
3f6e9822 | 909 | |
3ceda3a4 CW |
910 | u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); |
911 | u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); | |
1b36595f | 912 | |
cb399eab CW |
913 | static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) |
914 | { | |
97f06158 CW |
915 | /* |
916 | * We are only peeking at the tail of the submit queue (and not the | |
cb399eab CW |
917 | * queue itself) in order to gain a hint as to the current active |
918 | * state of the engine. Callers are not expected to be taking | |
919 | * engine->timeline->lock, nor are they expected to be concerned | |
920 | * wtih serialising this hint with anything, so document it as | |
921 | * a hint and nothing more. | |
922 | */ | |
a89d1f92 | 923 | return READ_ONCE(engine->timeline.seqno); |
cb399eab CW |
924 | } |
925 | ||
97f06158 CW |
926 | static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) |
927 | { | |
928 | return intel_read_status_page(engine, I915_GEM_HWS_INDEX); | |
929 | } | |
930 | ||
931 | static inline bool intel_engine_signaled(struct intel_engine_cs *engine, | |
932 | u32 seqno) | |
933 | { | |
934 | return i915_seqno_passed(intel_engine_get_seqno(engine), seqno); | |
935 | } | |
936 | ||
937 | static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, | |
938 | u32 seqno) | |
939 | { | |
940 | GEM_BUG_ON(!seqno); | |
941 | return intel_engine_signaled(engine, seqno); | |
942 | } | |
943 | ||
944 | static inline bool intel_engine_has_started(struct intel_engine_cs *engine, | |
945 | u32 seqno) | |
946 | { | |
947 | GEM_BUG_ON(!seqno); | |
948 | return intel_engine_signaled(engine, seqno - 1); | |
949 | } | |
950 | ||
0e704476 CW |
951 | void intel_engine_get_instdone(struct intel_engine_cs *engine, |
952 | struct intel_instdone *instdone); | |
953 | ||
29b1b415 JH |
954 | /* |
955 | * Arbitrary size for largest possible 'add request' sequence. The code paths | |
956 | * are complex and variable. Empirical measurement shows that the worst case | |
596e5efc CW |
957 | * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However, |
958 | * we need to allocate double the largest single packet within that emission | |
959 | * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW). | |
29b1b415 | 960 | */ |
596e5efc | 961 | #define MIN_SPACE_FOR_ADD_REQUEST 336 |
29b1b415 | 962 | |
a58c01aa CW |
963 | static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) |
964 | { | |
57e88531 | 965 | return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; |
a58c01aa CW |
966 | } |
967 | ||
3b8a8a30 MW |
968 | static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) |
969 | { | |
970 | return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; | |
971 | } | |
972 | ||
688e6c72 | 973 | /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ |
688e6c72 CW |
974 | int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); |
975 | ||
e3be4079 | 976 | static inline void intel_wait_init(struct intel_wait *wait) |
688e6c72 CW |
977 | { |
978 | wait->tsk = current; | |
e3be4079 | 979 | wait->request = NULL; |
754c9fd5 CW |
980 | } |
981 | ||
982 | static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) | |
983 | { | |
984 | wait->tsk = current; | |
985 | wait->seqno = seqno; | |
986 | } | |
987 | ||
988 | static inline bool intel_wait_has_seqno(const struct intel_wait *wait) | |
989 | { | |
990 | return wait->seqno; | |
991 | } | |
992 | ||
993 | static inline bool | |
994 | intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) | |
995 | { | |
688e6c72 | 996 | wait->seqno = seqno; |
754c9fd5 CW |
997 | return intel_wait_has_seqno(wait); |
998 | } | |
999 | ||
1000 | static inline bool | |
1001 | intel_wait_update_request(struct intel_wait *wait, | |
e61e0f51 | 1002 | const struct i915_request *rq) |
754c9fd5 | 1003 | { |
e61e0f51 | 1004 | return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); |
754c9fd5 CW |
1005 | } |
1006 | ||
1007 | static inline bool | |
1008 | intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) | |
1009 | { | |
1010 | return wait->seqno == seqno; | |
1011 | } | |
1012 | ||
1013 | static inline bool | |
1014 | intel_wait_check_request(const struct intel_wait *wait, | |
e61e0f51 | 1015 | const struct i915_request *rq) |
754c9fd5 | 1016 | { |
e61e0f51 | 1017 | return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); |
688e6c72 CW |
1018 | } |
1019 | ||
1020 | static inline bool intel_wait_complete(const struct intel_wait *wait) | |
1021 | { | |
1022 | return RB_EMPTY_NODE(&wait->node); | |
1023 | } | |
1024 | ||
1025 | bool intel_engine_add_wait(struct intel_engine_cs *engine, | |
1026 | struct intel_wait *wait); | |
1027 | void intel_engine_remove_wait(struct intel_engine_cs *engine, | |
1028 | struct intel_wait *wait); | |
6f9ec414 | 1029 | bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); |
e61e0f51 | 1030 | void intel_engine_cancel_signaling(struct i915_request *request); |
688e6c72 | 1031 | |
dbd6ef29 | 1032 | static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) |
688e6c72 | 1033 | { |
61d3dc70 | 1034 | return READ_ONCE(engine->breadcrumbs.irq_wait); |
688e6c72 CW |
1035 | } |
1036 | ||
8d769ea7 CW |
1037 | unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); |
1038 | #define ENGINE_WAKEUP_WAITER BIT(0) | |
67b807a8 CW |
1039 | #define ENGINE_WAKEUP_ASLEEP BIT(1) |
1040 | ||
bcbd5c33 CW |
1041 | void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); |
1042 | void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); | |
1043 | ||
67b807a8 CW |
1044 | void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); |
1045 | void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); | |
688e6c72 | 1046 | |
ad07dfcd | 1047 | void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); |
688e6c72 | 1048 | void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); |
688e6c72 | 1049 | |
9f235dfa TU |
1050 | static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) |
1051 | { | |
1052 | memset(batch, 0, 6 * sizeof(u32)); | |
1053 | ||
1054 | batch[0] = GFX_OP_PIPE_CONTROL(6); | |
1055 | batch[1] = flags; | |
1056 | batch[2] = offset; | |
1057 | ||
1058 | return batch + 6; | |
1059 | } | |
1060 | ||
df77cd83 MW |
1061 | static inline u32 * |
1062 | gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) | |
1063 | { | |
1064 | /* We're using qword write, offset should be aligned to 8 bytes. */ | |
1065 | GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); | |
1066 | ||
1067 | /* w/a for post sync ops following a GPGPU operation we | |
1068 | * need a prior CS_STALL, which is emitted by the flush | |
1069 | * following the batch. | |
1070 | */ | |
1071 | *cs++ = GFX_OP_PIPE_CONTROL(6); | |
1072 | *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | | |
1073 | PIPE_CONTROL_QW_WRITE; | |
1074 | *cs++ = gtt_offset; | |
1075 | *cs++ = 0; | |
1076 | *cs++ = value; | |
1077 | /* We're thrashing one dword of HWS. */ | |
1078 | *cs++ = 0; | |
1079 | ||
1080 | return cs; | |
1081 | } | |
1082 | ||
1083 | static inline u32 * | |
1084 | gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) | |
1085 | { | |
1086 | /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ | |
1087 | GEM_BUG_ON(gtt_offset & (1 << 5)); | |
1088 | /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ | |
1089 | GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); | |
1090 | ||
1091 | *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; | |
1092 | *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; | |
1093 | *cs++ = 0; | |
1094 | *cs++ = value; | |
1095 | ||
1096 | return cs; | |
1097 | } | |
1098 | ||
4fdd5b4e CW |
1099 | void intel_engines_sanitize(struct drm_i915_private *i915); |
1100 | ||
5400367a | 1101 | bool intel_engine_is_idle(struct intel_engine_cs *engine); |
05425249 | 1102 | bool intel_engines_are_idle(struct drm_i915_private *dev_priv); |
5400367a | 1103 | |
20ccd4d3 | 1104 | bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); |
01278cb1 | 1105 | void intel_engine_lost_context(struct intel_engine_cs *engine); |
20ccd4d3 | 1106 | |
aba5e278 CW |
1107 | void intel_engines_park(struct drm_i915_private *i915); |
1108 | void intel_engines_unpark(struct drm_i915_private *i915); | |
1109 | ||
ff44ad51 | 1110 | void intel_engines_reset_default_submission(struct drm_i915_private *i915); |
d2b4b979 | 1111 | unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); |
ff44ad51 | 1112 | |
90cad095 | 1113 | bool intel_engine_can_store_dword(struct intel_engine_cs *engine); |
f2f5c061 | 1114 | |
0db18b17 CW |
1115 | __printf(3, 4) |
1116 | void intel_engine_dump(struct intel_engine_cs *engine, | |
1117 | struct drm_printer *m, | |
1118 | const char *header, ...); | |
f636edb2 | 1119 | |
b46a33e2 TU |
1120 | struct intel_engine_cs * |
1121 | intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); | |
1122 | ||
30e17b78 TU |
1123 | static inline void intel_engine_context_in(struct intel_engine_cs *engine) |
1124 | { | |
1125 | unsigned long flags; | |
1126 | ||
1127 | if (READ_ONCE(engine->stats.enabled) == 0) | |
1128 | return; | |
1129 | ||
741258cd | 1130 | write_seqlock_irqsave(&engine->stats.lock, flags); |
30e17b78 TU |
1131 | |
1132 | if (engine->stats.enabled > 0) { | |
1133 | if (engine->stats.active++ == 0) | |
1134 | engine->stats.start = ktime_get(); | |
1135 | GEM_BUG_ON(engine->stats.active == 0); | |
1136 | } | |
1137 | ||
741258cd | 1138 | write_sequnlock_irqrestore(&engine->stats.lock, flags); |
30e17b78 TU |
1139 | } |
1140 | ||
1141 | static inline void intel_engine_context_out(struct intel_engine_cs *engine) | |
1142 | { | |
1143 | unsigned long flags; | |
1144 | ||
1145 | if (READ_ONCE(engine->stats.enabled) == 0) | |
1146 | return; | |
1147 | ||
741258cd | 1148 | write_seqlock_irqsave(&engine->stats.lock, flags); |
30e17b78 TU |
1149 | |
1150 | if (engine->stats.enabled > 0) { | |
1151 | ktime_t last; | |
1152 | ||
1153 | if (engine->stats.active && --engine->stats.active == 0) { | |
1154 | /* | |
1155 | * Decrement the active context count and in case GPU | |
1156 | * is now idle add up to the running total. | |
1157 | */ | |
1158 | last = ktime_sub(ktime_get(), engine->stats.start); | |
1159 | ||
1160 | engine->stats.total = ktime_add(engine->stats.total, | |
1161 | last); | |
1162 | } else if (engine->stats.active == 0) { | |
1163 | /* | |
1164 | * After turning on engine stats, context out might be | |
1165 | * the first event in which case we account from the | |
1166 | * time stats gathering was turned on. | |
1167 | */ | |
1168 | last = ktime_sub(ktime_get(), engine->stats.enabled_at); | |
1169 | ||
1170 | engine->stats.total = ktime_add(engine->stats.total, | |
1171 | last); | |
1172 | } | |
1173 | } | |
1174 | ||
741258cd | 1175 | write_sequnlock_irqrestore(&engine->stats.lock, flags); |
30e17b78 TU |
1176 | } |
1177 | ||
1178 | int intel_enable_engine_stats(struct intel_engine_cs *engine); | |
1179 | void intel_disable_engine_stats(struct intel_engine_cs *engine); | |
1180 | ||
1181 | ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); | |
1182 | ||
0f6b79fa CW |
1183 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
1184 | ||
1185 | static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) | |
1186 | { | |
1187 | if (!execlists->preempt_hang.inject_hang) | |
1188 | return false; | |
1189 | ||
1190 | complete(&execlists->preempt_hang.completion); | |
1191 | return true; | |
1192 | } | |
1193 | ||
1194 | #else | |
1195 | ||
1196 | static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) | |
1197 | { | |
1198 | return false; | |
1199 | } | |
1200 | ||
1201 | #endif | |
1202 | ||
8187a2b7 | 1203 | #endif /* _INTEL_RINGBUFFER_H_ */ |