]>
Commit | Line | Data |
---|---|---|
b20385f1 OM |
1 | /* |
2 | * Copyright © 2014 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | * Authors: | |
24 | * Ben Widawsky <ben@bwidawsk.net> | |
25 | * Michel Thierry <michel.thierry@intel.com> | |
26 | * Thomas Daniel <thomas.daniel@intel.com> | |
27 | * Oscar Mateo <oscar.mateo@intel.com> | |
28 | * | |
29 | */ | |
30 | ||
73e4d07f OM |
31 | /** |
32 | * DOC: Logical Rings, Logical Ring Contexts and Execlists | |
33 | * | |
34 | * Motivation: | |
b20385f1 OM |
35 | * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". |
36 | * These expanded contexts enable a number of new abilities, especially | |
37 | * "Execlists" (also implemented in this file). | |
38 | * | |
73e4d07f OM |
39 | * One of the main differences with the legacy HW contexts is that logical |
40 | * ring contexts incorporate many more things to the context's state, like | |
41 | * PDPs or ringbuffer control registers: | |
42 | * | |
43 | * The reason why PDPs are included in the context is straightforward: as | |
44 | * PPGTTs (per-process GTTs) are actually per-context, having the PDPs | |
45 | * contained there mean you don't need to do a ppgtt->switch_mm yourself, | |
46 | * instead, the GPU will do it for you on the context switch. | |
47 | * | |
48 | * But, what about the ringbuffer control registers (head, tail, etc..)? | |
49 | * shouldn't we just need a set of those per engine command streamer? This is | |
50 | * where the name "Logical Rings" starts to make sense: by virtualizing the | |
51 | * rings, the engine cs shifts to a new "ring buffer" with every context | |
52 | * switch. When you want to submit a workload to the GPU you: A) choose your | |
53 | * context, B) find its appropriate virtualized ring, C) write commands to it | |
54 | * and then, finally, D) tell the GPU to switch to that context. | |
55 | * | |
56 | * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch | |
57 | * to a contexts is via a context execution list, ergo "Execlists". | |
58 | * | |
59 | * LRC implementation: | |
60 | * Regarding the creation of contexts, we have: | |
61 | * | |
62 | * - One global default context. | |
63 | * - One local default context for each opened fd. | |
64 | * - One local extra context for each context create ioctl call. | |
65 | * | |
66 | * Now that ringbuffers belong per-context (and not per-engine, like before) | |
67 | * and that contexts are uniquely tied to a given engine (and not reusable, | |
68 | * like before) we need: | |
69 | * | |
70 | * - One ringbuffer per-engine inside each context. | |
71 | * - One backing object per-engine inside each context. | |
72 | * | |
73 | * The global default context starts its life with these new objects fully | |
74 | * allocated and populated. The local default context for each opened fd is | |
75 | * more complex, because we don't know at creation time which engine is going | |
76 | * to use them. To handle this, we have implemented a deferred creation of LR | |
77 | * contexts: | |
78 | * | |
79 | * The local context starts its life as a hollow or blank holder, that only | |
80 | * gets populated for a given engine once we receive an execbuffer. If later | |
81 | * on we receive another execbuffer ioctl for the same context but a different | |
82 | * engine, we allocate/populate a new ringbuffer and context backing object and | |
83 | * so on. | |
84 | * | |
85 | * Finally, regarding local contexts created using the ioctl call: as they are | |
86 | * only allowed with the render ring, we can allocate & populate them right | |
87 | * away (no need to defer anything, at least for now). | |
88 | * | |
89 | * Execlists implementation: | |
b20385f1 OM |
90 | * Execlists are the new method by which, on gen8+ hardware, workloads are |
91 | * submitted for execution (as opposed to the legacy, ringbuffer-based, method). | |
73e4d07f OM |
92 | * This method works as follows: |
93 | * | |
94 | * When a request is committed, its commands (the BB start and any leading or | |
95 | * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer | |
96 | * for the appropriate context. The tail pointer in the hardware context is not | |
97 | * updated at this time, but instead, kept by the driver in the ringbuffer | |
98 | * structure. A structure representing this request is added to a request queue | |
99 | * for the appropriate engine: this structure contains a copy of the context's | |
100 | * tail after the request was written to the ring buffer and a pointer to the | |
101 | * context itself. | |
102 | * | |
103 | * If the engine's request queue was empty before the request was added, the | |
104 | * queue is processed immediately. Otherwise the queue will be processed during | |
105 | * a context switch interrupt. In any case, elements on the queue will get sent | |
106 | * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a | |
107 | * globally unique 20-bits submission ID. | |
108 | * | |
109 | * When execution of a request completes, the GPU updates the context status | |
110 | * buffer with a context complete event and generates a context switch interrupt. | |
111 | * During the interrupt handling, the driver examines the events in the buffer: | |
112 | * for each context complete event, if the announced ID matches that on the head | |
113 | * of the request queue, then that request is retired and removed from the queue. | |
114 | * | |
115 | * After processing, if any requests were retired and the queue is not empty | |
116 | * then a new execution list can be submitted. The two requests at the front of | |
117 | * the queue are next to be submitted but since a context may not occur twice in | |
118 | * an execution list, if subsequent requests have the same ID as the first then | |
119 | * the two requests must be combined. This is done simply by discarding requests | |
120 | * at the head of the queue until either only one requests is left (in which case | |
121 | * we use a NULL second context) or the first two requests have unique IDs. | |
122 | * | |
123 | * By always executing the first two requests in the queue the driver ensures | |
124 | * that the GPU is kept as busy as possible. In the case where a single context | |
125 | * completes but a second context is still executing, the request for this second | |
126 | * context will be at the head of the queue when we remove the first one. This | |
127 | * request will then be resubmitted along with a new request for a different context, | |
128 | * which will cause the hardware to continue executing the second request and queue | |
129 | * the new request (the GPU detects the condition of a context getting preempted | |
130 | * with the same context and optimizes the context switch flow by not doing | |
131 | * preemption, but just sampling the new tail pointer). | |
132 | * | |
b20385f1 | 133 | */ |
27af5eea | 134 | #include <linux/interrupt.h> |
b20385f1 | 135 | |
b20385f1 | 136 | #include "i915_drv.h" |
db94e9f1 | 137 | #include "i915_perf.h" |
a09d9a80 | 138 | #include "i915_trace.h" |
bc4237ec | 139 | #include "i915_vgpu.h" |
e6ba7648 | 140 | #include "intel_context.h" |
c34c5bca | 141 | #include "intel_engine_pm.h" |
2006058e | 142 | #include "intel_gt.h" |
c7302f20 | 143 | #include "intel_gt_pm.h" |
4f88f874 | 144 | #include "intel_gt_requests.h" |
578f1ac6 | 145 | #include "intel_lrc_reg.h" |
3bbaba0c | 146 | #include "intel_mocs.h" |
112ed2d3 | 147 | #include "intel_reset.h" |
2871ea85 | 148 | #include "intel_ring.h" |
7d3c425f | 149 | #include "intel_workarounds.h" |
127f1003 | 150 | |
e981e7b1 TD |
151 | #define RING_EXECLIST_QFULL (1 << 0x2) |
152 | #define RING_EXECLIST1_VALID (1 << 0x3) | |
153 | #define RING_EXECLIST0_VALID (1 << 0x4) | |
154 | #define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) | |
155 | #define RING_EXECLIST1_ACTIVE (1 << 0x11) | |
156 | #define RING_EXECLIST0_ACTIVE (1 << 0x12) | |
157 | ||
158 | #define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) | |
159 | #define GEN8_CTX_STATUS_PREEMPTED (1 << 1) | |
160 | #define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) | |
161 | #define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) | |
162 | #define GEN8_CTX_STATUS_COMPLETE (1 << 4) | |
163 | #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) | |
8670d6f9 | 164 | |
70c2a24d | 165 | #define GEN8_CTX_STATUS_COMPLETED_MASK \ |
d8747afb | 166 | (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) |
70c2a24d | 167 | |
22b7a426 CW |
168 | #define CTX_DESC_FORCE_RESTORE BIT_ULL(2) |
169 | ||
f4785682 DCS |
170 | #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ |
171 | #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ | |
172 | #define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) | |
173 | #define GEN12_IDLE_CTX_ID 0x7FF | |
174 | #define GEN12_CSB_CTX_VALID(csb_dw) \ | |
175 | (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) | |
176 | ||
0e93cdd4 CW |
177 | /* Typical size of the average request (2 pipecontrols and a MI_BB) */ |
178 | #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ | |
a3aabe86 | 179 | |
6d06779e CW |
180 | struct virtual_engine { |
181 | struct intel_engine_cs base; | |
182 | struct intel_context context; | |
183 | ||
184 | /* | |
185 | * We allow only a single request through the virtual engine at a time | |
186 | * (each request in the timeline waits for the completion fence of | |
187 | * the previous before being submitted). By restricting ourselves to | |
188 | * only submitting a single request, each request is placed on to a | |
189 | * physical to maximise load spreading (by virtue of the late greedy | |
190 | * scheduling -- each real engine takes the next available request | |
191 | * upon idling). | |
192 | */ | |
193 | struct i915_request *request; | |
194 | ||
195 | /* | |
196 | * We keep a rbtree of available virtual engines inside each physical | |
197 | * engine, sorted by priority. Here we preallocate the nodes we need | |
198 | * for the virtual engine, indexed by physical_engine->id. | |
199 | */ | |
200 | struct ve_node { | |
201 | struct rb_node rb; | |
202 | int prio; | |
203 | } nodes[I915_NUM_ENGINES]; | |
204 | ||
ee113690 CW |
205 | /* |
206 | * Keep track of bonded pairs -- restrictions upon on our selection | |
207 | * of physical engines any particular request may be submitted to. | |
208 | * If we receive a submit-fence from a master engine, we will only | |
209 | * use one of sibling_mask physical engines. | |
210 | */ | |
211 | struct ve_bond { | |
212 | const struct intel_engine_cs *master; | |
213 | intel_engine_mask_t sibling_mask; | |
214 | } *bonds; | |
215 | unsigned int num_bonds; | |
216 | ||
6d06779e CW |
217 | /* And finally, which physical engines this virtual engine maps onto. */ |
218 | unsigned int num_siblings; | |
219 | struct intel_engine_cs *siblings[0]; | |
220 | }; | |
221 | ||
222 | static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) | |
223 | { | |
224 | GEM_BUG_ON(!intel_engine_is_virtual(engine)); | |
225 | return container_of(engine, struct virtual_engine, base); | |
226 | } | |
227 | ||
4c60b1aa CW |
228 | static int __execlists_context_alloc(struct intel_context *ce, |
229 | struct intel_engine_cs *engine); | |
230 | ||
a3aabe86 | 231 | static void execlists_init_reg_state(u32 *reg_state, |
7dc56af5 CW |
232 | const struct intel_context *ce, |
233 | const struct intel_engine_cs *engine, | |
234 | const struct intel_ring *ring, | |
235 | bool close); | |
d12acee8 CW |
236 | static void |
237 | __execlists_update_reg_state(const struct intel_context *ce, | |
42827350 CW |
238 | const struct intel_engine_cs *engine, |
239 | u32 head); | |
7ba717cf | 240 | |
0d7cf7bc CW |
241 | static void mark_eio(struct i915_request *rq) |
242 | { | |
cbbf2787 CW |
243 | if (i915_request_completed(rq)) |
244 | return; | |
245 | ||
246 | GEM_BUG_ON(i915_request_signaled(rq)); | |
247 | ||
36e191f0 | 248 | i915_request_set_error_once(rq, -EIO); |
0d7cf7bc CW |
249 | i915_request_mark_complete(rq); |
250 | } | |
251 | ||
a7f328fc CW |
252 | static struct i915_request * |
253 | active_request(const struct intel_timeline * const tl, struct i915_request *rq) | |
d12acee8 | 254 | { |
a7f328fc | 255 | struct i915_request *active = rq; |
d12acee8 CW |
256 | |
257 | rcu_read_lock(); | |
a7f328fc | 258 | list_for_each_entry_continue_reverse(rq, &tl->requests, link) { |
d12acee8 CW |
259 | if (i915_request_completed(rq)) |
260 | break; | |
261 | ||
d12acee8 CW |
262 | active = rq; |
263 | } | |
264 | rcu_read_unlock(); | |
265 | ||
266 | return active; | |
267 | } | |
268 | ||
22b7a426 CW |
269 | static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) |
270 | { | |
271 | return (i915_ggtt_offset(engine->status_page.vma) + | |
272 | I915_GEM_HWS_PREEMPT_ADDR); | |
273 | } | |
274 | ||
275 | static inline void | |
276 | ring_set_paused(const struct intel_engine_cs *engine, int state) | |
277 | { | |
278 | /* | |
279 | * We inspect HWS_PREEMPT with a semaphore inside | |
280 | * engine->emit_fini_breadcrumb. If the dword is true, | |
281 | * the ring is paused as the semaphore will busywait | |
282 | * until the dword is false. | |
283 | */ | |
284 | engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; | |
8db7933e CW |
285 | if (state) |
286 | wmb(); | |
22b7a426 CW |
287 | } |
288 | ||
f6322edd CW |
289 | static inline struct i915_priolist *to_priolist(struct rb_node *rb) |
290 | { | |
291 | return rb_entry(rb, struct i915_priolist, node); | |
292 | } | |
293 | ||
294 | static inline int rq_prio(const struct i915_request *rq) | |
295 | { | |
a4e648a0 | 296 | return READ_ONCE(rq->sched.attr.priority); |
f6322edd CW |
297 | } |
298 | ||
b5773a36 CW |
299 | static int effective_prio(const struct i915_request *rq) |
300 | { | |
1e3f697e CW |
301 | int prio = rq_prio(rq); |
302 | ||
2a98f4e6 LL |
303 | /* |
304 | * If this request is special and must not be interrupted at any | |
305 | * cost, so be it. Note we are only checking the most recent request | |
306 | * in the context and so may be masking an earlier vip request. It | |
307 | * is hoped that under the conditions where nopreempt is used, this | |
308 | * will not matter (i.e. all requests to that context will be | |
309 | * nopreempt for as long as desired). | |
310 | */ | |
311 | if (i915_request_has_nopreempt(rq)) | |
312 | prio = I915_PRIORITY_UNPREEMPTABLE; | |
313 | ||
1e3f697e CW |
314 | /* |
315 | * On unwinding the active request, we give it a priority bump | |
4cc79cbb CW |
316 | * if it has completed waiting on any semaphore. If we know that |
317 | * the request has already started, we can prevent an unwanted | |
318 | * preempt-to-idle cycle by taking that into account now. | |
1e3f697e | 319 | */ |
4cc79cbb CW |
320 | if (__i915_request_has_started(rq)) |
321 | prio |= I915_PRIORITY_NOSEMAPHORE; | |
1e3f697e | 322 | |
b5773a36 | 323 | /* Restrict mere WAIT boosts from triggering preemption */ |
8ee36e04 | 324 | BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */ |
1e3f697e | 325 | return prio | __NO_PREEMPTION; |
b5773a36 CW |
326 | } |
327 | ||
c9a64622 CW |
328 | static int queue_prio(const struct intel_engine_execlists *execlists) |
329 | { | |
330 | struct i915_priolist *p; | |
331 | struct rb_node *rb; | |
332 | ||
333 | rb = rb_first_cached(&execlists->queue); | |
334 | if (!rb) | |
335 | return INT_MIN; | |
336 | ||
337 | /* | |
338 | * As the priolist[] are inverted, with the highest priority in [0], | |
339 | * we have to flip the index value to become priority. | |
340 | */ | |
341 | p = to_priolist(rb); | |
342 | return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); | |
343 | } | |
344 | ||
f6322edd | 345 | static inline bool need_preempt(const struct intel_engine_cs *engine, |
6d06779e CW |
346 | const struct i915_request *rq, |
347 | struct rb_node *rb) | |
c9a64622 | 348 | { |
b5773a36 | 349 | int last_prio; |
c9a64622 | 350 | |
09975b86 CW |
351 | if (!intel_engine_has_semaphores(engine)) |
352 | return false; | |
353 | ||
c9a64622 CW |
354 | /* |
355 | * Check if the current priority hint merits a preemption attempt. | |
356 | * | |
357 | * We record the highest value priority we saw during rescheduling | |
358 | * prior to this dequeue, therefore we know that if it is strictly | |
359 | * less than the current tail of ESLP[0], we do not need to force | |
360 | * a preempt-to-idle cycle. | |
361 | * | |
362 | * However, the priority hint is a mere hint that we may need to | |
363 | * preempt. If that hint is stale or we may be trying to preempt | |
364 | * ourselves, ignore the request. | |
253a774b CW |
365 | * |
366 | * More naturally we would write | |
367 | * prio >= max(0, last); | |
368 | * except that we wish to prevent triggering preemption at the same | |
369 | * priority level: the task that is running should remain running | |
370 | * to preserve FIFO ordering of dependencies. | |
c9a64622 | 371 | */ |
253a774b CW |
372 | last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); |
373 | if (engine->execlists.queue_priority_hint <= last_prio) | |
c9a64622 CW |
374 | return false; |
375 | ||
376 | /* | |
377 | * Check against the first request in ELSP[1], it will, thanks to the | |
378 | * power of PI, be the highest priority of that context. | |
379 | */ | |
422d7df4 CW |
380 | if (!list_is_last(&rq->sched.link, &engine->active.requests) && |
381 | rq_prio(list_next_entry(rq, sched.link)) > last_prio) | |
c9a64622 CW |
382 | return true; |
383 | ||
6d06779e CW |
384 | if (rb) { |
385 | struct virtual_engine *ve = | |
386 | rb_entry(rb, typeof(*ve), nodes[engine->id].rb); | |
387 | bool preempt = false; | |
388 | ||
389 | if (engine == ve->siblings[0]) { /* only preempt one sibling */ | |
390 | struct i915_request *next; | |
391 | ||
392 | rcu_read_lock(); | |
393 | next = READ_ONCE(ve->request); | |
394 | if (next) | |
395 | preempt = rq_prio(next) > last_prio; | |
396 | rcu_read_unlock(); | |
397 | } | |
398 | ||
399 | if (preempt) | |
400 | return preempt; | |
401 | } | |
402 | ||
c9a64622 CW |
403 | /* |
404 | * If the inflight context did not trigger the preemption, then maybe | |
405 | * it was the set of queued requests? Pick the highest priority in | |
406 | * the queue (the first active priolist) and see if it deserves to be | |
407 | * running instead of ELSP[0]. | |
408 | * | |
409 | * The highest priority request in the queue can not be either | |
410 | * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same | |
411 | * context, it's priority would not exceed ELSP[0] aka last_prio. | |
412 | */ | |
413 | return queue_prio(&engine->execlists) > last_prio; | |
414 | } | |
415 | ||
416 | __maybe_unused static inline bool | |
c10c78ad | 417 | assert_priority_queue(const struct i915_request *prev, |
c9a64622 | 418 | const struct i915_request *next) |
f6322edd | 419 | { |
c9a64622 CW |
420 | /* |
421 | * Without preemption, the prev may refer to the still active element | |
422 | * which we refuse to let go. | |
423 | * | |
424 | * Even with preemption, there are times when we think it is better not | |
425 | * to preempt and leave an ostensibly lower priority request in flight. | |
426 | */ | |
22b7a426 | 427 | if (i915_request_is_active(prev)) |
c9a64622 CW |
428 | return true; |
429 | ||
430 | return rq_prio(prev) >= rq_prio(next); | |
f6322edd CW |
431 | } |
432 | ||
1fc44d9b | 433 | /* |
ca82580c TU |
434 | * The context descriptor encodes various attributes of a context, |
435 | * including its GTT address and some flags. Because it's fairly | |
436 | * expensive to calculate, we'll just do it once and cache the result, | |
437 | * which remains valid until the context is unpinned. | |
438 | * | |
6e5248b5 DV |
439 | * This is what a descriptor looks like, from LSB to MSB:: |
440 | * | |
2355cf08 | 441 | * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) |
6e5248b5 | 442 | * bits 12-31: LRCA, GTT address of (the HWSP of) this context |
218b5000 | 443 | * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) |
6e5248b5 DV |
444 | * bits 53-54: mbz, reserved for use by hardware |
445 | * bits 55-63: group ID, currently unused and set to 0 | |
ac52da6a DCS |
446 | * |
447 | * Starting from Gen11, the upper dword of the descriptor has a new format: | |
448 | * | |
449 | * bits 32-36: reserved | |
450 | * bits 37-47: SW context ID | |
451 | * bits 48:53: engine instance | |
452 | * bit 54: mbz, reserved for use by hardware | |
453 | * bits 55-60: SW counter | |
454 | * bits 61-63: engine class | |
455 | * | |
456 | * engine info, SW context ID and SW counter need to form a unique number | |
457 | * (Context ID) per lrc. | |
73e4d07f | 458 | */ |
53b2622e | 459 | static u32 |
95f697eb | 460 | lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) |
84b790f8 | 461 | { |
53b2622e | 462 | u32 desc; |
84b790f8 | 463 | |
a1c9ca22 CW |
464 | desc = INTEL_LEGACY_32B_CONTEXT; |
465 | if (i915_vm_is_4lvl(ce->vm)) | |
466 | desc = INTEL_LEGACY_64B_CONTEXT; | |
467 | desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; | |
468 | ||
469 | desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; | |
470 | if (IS_GEN(engine->i915, 8)) | |
471 | desc |= GEN8_CTX_L3LLC_COHERENT; | |
ac52da6a | 472 | |
53b2622e | 473 | return i915_ggtt_offset(ce->state) | desc; |
5af05fef MT |
474 | } |
475 | ||
d1813ca2 CW |
476 | static inline unsigned int dword_in_page(void *addr) |
477 | { | |
478 | return offset_in_page(addr) / sizeof(u32); | |
479 | } | |
480 | ||
481 | static void set_offsets(u32 *regs, | |
7dc56af5 | 482 | const u8 *data, |
d1813ca2 CW |
483 | const struct intel_engine_cs *engine, |
484 | bool clear) | |
7dc56af5 | 485 | #define NOP(x) (BIT(7) | (x)) |
6a505e64 | 486 | #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) |
7dc56af5 CW |
487 | #define POSTED BIT(0) |
488 | #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) | |
489 | #define REG16(x) \ | |
490 | (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ | |
491 | (((x) >> 2) & 0x7f) | |
d1813ca2 | 492 | #define END(x) 0, (x) |
7dc56af5 CW |
493 | { |
494 | const u32 base = engine->mmio_base; | |
495 | ||
496 | while (*data) { | |
497 | u8 count, flags; | |
498 | ||
499 | if (*data & BIT(7)) { /* skip */ | |
d1813ca2 CW |
500 | count = *data++ & ~BIT(7); |
501 | if (clear) | |
502 | memset32(regs, MI_NOOP, count); | |
503 | regs += count; | |
7dc56af5 CW |
504 | continue; |
505 | } | |
506 | ||
507 | count = *data & 0x3f; | |
508 | flags = *data >> 6; | |
509 | data++; | |
510 | ||
511 | *regs = MI_LOAD_REGISTER_IMM(count); | |
512 | if (flags & POSTED) | |
513 | *regs |= MI_LRI_FORCE_POSTED; | |
514 | if (INTEL_GEN(engine->i915) >= 11) | |
515 | *regs |= MI_LRI_CS_MMIO; | |
516 | regs++; | |
517 | ||
518 | GEM_BUG_ON(!count); | |
519 | do { | |
520 | u32 offset = 0; | |
521 | u8 v; | |
522 | ||
523 | do { | |
524 | v = *data++; | |
525 | offset <<= 7; | |
526 | offset |= v & ~BIT(7); | |
527 | } while (v & BIT(7)); | |
528 | ||
d1813ca2 CW |
529 | regs[0] = base + (offset << 2); |
530 | if (clear) | |
531 | regs[1] = 0; | |
7dc56af5 CW |
532 | regs += 2; |
533 | } while (--count); | |
534 | } | |
535 | ||
d1813ca2 CW |
536 | if (clear) { |
537 | u8 count = *++data; | |
538 | ||
539 | /* Clear past the tail for HW access */ | |
540 | GEM_BUG_ON(dword_in_page(regs) > count); | |
541 | memset32(regs, MI_NOOP, count - dword_in_page(regs)); | |
542 | ||
543 | /* Close the batch; used mainly by live_lrc_layout() */ | |
544 | *regs = MI_BATCH_BUFFER_END; | |
545 | if (INTEL_GEN(engine->i915) >= 10) | |
546 | *regs |= BIT(0); | |
547 | } | |
7dc56af5 CW |
548 | } |
549 | ||
550 | static const u8 gen8_xcs_offsets[] = { | |
551 | NOP(1), | |
552 | LRI(11, 0), | |
553 | REG16(0x244), | |
554 | REG(0x034), | |
555 | REG(0x030), | |
556 | REG(0x038), | |
557 | REG(0x03c), | |
558 | REG(0x168), | |
559 | REG(0x140), | |
560 | REG(0x110), | |
561 | REG(0x11c), | |
562 | REG(0x114), | |
563 | REG(0x118), | |
564 | ||
565 | NOP(9), | |
566 | LRI(9, 0), | |
567 | REG16(0x3a8), | |
568 | REG16(0x28c), | |
569 | REG16(0x288), | |
570 | REG16(0x284), | |
571 | REG16(0x280), | |
572 | REG16(0x27c), | |
573 | REG16(0x278), | |
574 | REG16(0x274), | |
575 | REG16(0x270), | |
576 | ||
577 | NOP(13), | |
578 | LRI(2, 0), | |
579 | REG16(0x200), | |
580 | REG(0x028), | |
581 | ||
d1813ca2 | 582 | END(80) |
7dc56af5 CW |
583 | }; |
584 | ||
585 | static const u8 gen9_xcs_offsets[] = { | |
586 | NOP(1), | |
587 | LRI(14, POSTED), | |
588 | REG16(0x244), | |
589 | REG(0x034), | |
590 | REG(0x030), | |
591 | REG(0x038), | |
592 | REG(0x03c), | |
593 | REG(0x168), | |
594 | REG(0x140), | |
595 | REG(0x110), | |
596 | REG(0x11c), | |
597 | REG(0x114), | |
598 | REG(0x118), | |
599 | REG(0x1c0), | |
600 | REG(0x1c4), | |
601 | REG(0x1c8), | |
602 | ||
603 | NOP(3), | |
604 | LRI(9, POSTED), | |
605 | REG16(0x3a8), | |
606 | REG16(0x28c), | |
607 | REG16(0x288), | |
608 | REG16(0x284), | |
609 | REG16(0x280), | |
610 | REG16(0x27c), | |
611 | REG16(0x278), | |
612 | REG16(0x274), | |
613 | REG16(0x270), | |
614 | ||
615 | NOP(13), | |
616 | LRI(1, POSTED), | |
617 | REG16(0x200), | |
618 | ||
619 | NOP(13), | |
620 | LRI(44, POSTED), | |
621 | REG(0x028), | |
622 | REG(0x09c), | |
623 | REG(0x0c0), | |
624 | REG(0x178), | |
625 | REG(0x17c), | |
626 | REG16(0x358), | |
627 | REG(0x170), | |
628 | REG(0x150), | |
629 | REG(0x154), | |
630 | REG(0x158), | |
631 | REG16(0x41c), | |
632 | REG16(0x600), | |
633 | REG16(0x604), | |
634 | REG16(0x608), | |
635 | REG16(0x60c), | |
636 | REG16(0x610), | |
637 | REG16(0x614), | |
638 | REG16(0x618), | |
639 | REG16(0x61c), | |
640 | REG16(0x620), | |
641 | REG16(0x624), | |
642 | REG16(0x628), | |
643 | REG16(0x62c), | |
644 | REG16(0x630), | |
645 | REG16(0x634), | |
646 | REG16(0x638), | |
647 | REG16(0x63c), | |
648 | REG16(0x640), | |
649 | REG16(0x644), | |
650 | REG16(0x648), | |
651 | REG16(0x64c), | |
652 | REG16(0x650), | |
653 | REG16(0x654), | |
654 | REG16(0x658), | |
655 | REG16(0x65c), | |
656 | REG16(0x660), | |
657 | REG16(0x664), | |
658 | REG16(0x668), | |
659 | REG16(0x66c), | |
660 | REG16(0x670), | |
661 | REG16(0x674), | |
662 | REG16(0x678), | |
663 | REG16(0x67c), | |
664 | REG(0x068), | |
665 | ||
d1813ca2 | 666 | END(176) |
7dc56af5 CW |
667 | }; |
668 | ||
669 | static const u8 gen12_xcs_offsets[] = { | |
670 | NOP(1), | |
671 | LRI(13, POSTED), | |
672 | REG16(0x244), | |
673 | REG(0x034), | |
674 | REG(0x030), | |
675 | REG(0x038), | |
676 | REG(0x03c), | |
677 | REG(0x168), | |
678 | REG(0x140), | |
679 | REG(0x110), | |
680 | REG(0x1c0), | |
681 | REG(0x1c4), | |
682 | REG(0x1c8), | |
683 | REG(0x180), | |
684 | REG16(0x2b4), | |
685 | ||
686 | NOP(5), | |
687 | LRI(9, POSTED), | |
688 | REG16(0x3a8), | |
689 | REG16(0x28c), | |
690 | REG16(0x288), | |
691 | REG16(0x284), | |
692 | REG16(0x280), | |
693 | REG16(0x27c), | |
694 | REG16(0x278), | |
695 | REG16(0x274), | |
696 | REG16(0x270), | |
697 | ||
d1813ca2 | 698 | END(80) |
7dc56af5 CW |
699 | }; |
700 | ||
701 | static const u8 gen8_rcs_offsets[] = { | |
702 | NOP(1), | |
703 | LRI(14, POSTED), | |
704 | REG16(0x244), | |
705 | REG(0x034), | |
706 | REG(0x030), | |
707 | REG(0x038), | |
708 | REG(0x03c), | |
709 | REG(0x168), | |
710 | REG(0x140), | |
711 | REG(0x110), | |
712 | REG(0x11c), | |
713 | REG(0x114), | |
714 | REG(0x118), | |
715 | REG(0x1c0), | |
716 | REG(0x1c4), | |
717 | REG(0x1c8), | |
718 | ||
719 | NOP(3), | |
720 | LRI(9, POSTED), | |
721 | REG16(0x3a8), | |
722 | REG16(0x28c), | |
723 | REG16(0x288), | |
724 | REG16(0x284), | |
725 | REG16(0x280), | |
726 | REG16(0x27c), | |
727 | REG16(0x278), | |
728 | REG16(0x274), | |
729 | REG16(0x270), | |
730 | ||
731 | NOP(13), | |
732 | LRI(1, 0), | |
733 | REG(0x0c8), | |
734 | ||
d1813ca2 | 735 | END(80) |
7dc56af5 CW |
736 | }; |
737 | ||
6a505e64 CW |
738 | static const u8 gen9_rcs_offsets[] = { |
739 | NOP(1), | |
740 | LRI(14, POSTED), | |
741 | REG16(0x244), | |
742 | REG(0x34), | |
743 | REG(0x30), | |
744 | REG(0x38), | |
745 | REG(0x3c), | |
746 | REG(0x168), | |
747 | REG(0x140), | |
748 | REG(0x110), | |
749 | REG(0x11c), | |
750 | REG(0x114), | |
751 | REG(0x118), | |
752 | REG(0x1c0), | |
753 | REG(0x1c4), | |
754 | REG(0x1c8), | |
755 | ||
756 | NOP(3), | |
757 | LRI(9, POSTED), | |
758 | REG16(0x3a8), | |
759 | REG16(0x28c), | |
760 | REG16(0x288), | |
761 | REG16(0x284), | |
762 | REG16(0x280), | |
763 | REG16(0x27c), | |
764 | REG16(0x278), | |
765 | REG16(0x274), | |
766 | REG16(0x270), | |
767 | ||
768 | NOP(13), | |
769 | LRI(1, 0), | |
770 | REG(0xc8), | |
771 | ||
772 | NOP(13), | |
773 | LRI(44, POSTED), | |
774 | REG(0x28), | |
775 | REG(0x9c), | |
776 | REG(0xc0), | |
777 | REG(0x178), | |
778 | REG(0x17c), | |
779 | REG16(0x358), | |
780 | REG(0x170), | |
781 | REG(0x150), | |
782 | REG(0x154), | |
783 | REG(0x158), | |
784 | REG16(0x41c), | |
785 | REG16(0x600), | |
786 | REG16(0x604), | |
787 | REG16(0x608), | |
788 | REG16(0x60c), | |
789 | REG16(0x610), | |
790 | REG16(0x614), | |
791 | REG16(0x618), | |
792 | REG16(0x61c), | |
793 | REG16(0x620), | |
794 | REG16(0x624), | |
795 | REG16(0x628), | |
796 | REG16(0x62c), | |
797 | REG16(0x630), | |
798 | REG16(0x634), | |
799 | REG16(0x638), | |
800 | REG16(0x63c), | |
801 | REG16(0x640), | |
802 | REG16(0x644), | |
803 | REG16(0x648), | |
804 | REG16(0x64c), | |
805 | REG16(0x650), | |
806 | REG16(0x654), | |
807 | REG16(0x658), | |
808 | REG16(0x65c), | |
809 | REG16(0x660), | |
810 | REG16(0x664), | |
811 | REG16(0x668), | |
812 | REG16(0x66c), | |
813 | REG16(0x670), | |
814 | REG16(0x674), | |
815 | REG16(0x678), | |
816 | REG16(0x67c), | |
817 | REG(0x68), | |
818 | ||
d1813ca2 | 819 | END(176) |
6a505e64 CW |
820 | }; |
821 | ||
7dc56af5 CW |
822 | static const u8 gen11_rcs_offsets[] = { |
823 | NOP(1), | |
824 | LRI(15, POSTED), | |
825 | REG16(0x244), | |
826 | REG(0x034), | |
827 | REG(0x030), | |
828 | REG(0x038), | |
829 | REG(0x03c), | |
830 | REG(0x168), | |
831 | REG(0x140), | |
832 | REG(0x110), | |
833 | REG(0x11c), | |
834 | REG(0x114), | |
835 | REG(0x118), | |
836 | REG(0x1c0), | |
837 | REG(0x1c4), | |
838 | REG(0x1c8), | |
839 | REG(0x180), | |
840 | ||
841 | NOP(1), | |
842 | LRI(9, POSTED), | |
843 | REG16(0x3a8), | |
844 | REG16(0x28c), | |
845 | REG16(0x288), | |
846 | REG16(0x284), | |
847 | REG16(0x280), | |
848 | REG16(0x27c), | |
849 | REG16(0x278), | |
850 | REG16(0x274), | |
851 | REG16(0x270), | |
852 | ||
853 | LRI(1, POSTED), | |
854 | REG(0x1b0), | |
855 | ||
856 | NOP(10), | |
857 | LRI(1, 0), | |
858 | REG(0x0c8), | |
859 | ||
d1813ca2 | 860 | END(80) |
7dc56af5 CW |
861 | }; |
862 | ||
863 | static const u8 gen12_rcs_offsets[] = { | |
864 | NOP(1), | |
865 | LRI(13, POSTED), | |
866 | REG16(0x244), | |
867 | REG(0x034), | |
868 | REG(0x030), | |
869 | REG(0x038), | |
870 | REG(0x03c), | |
871 | REG(0x168), | |
872 | REG(0x140), | |
873 | REG(0x110), | |
874 | REG(0x1c0), | |
875 | REG(0x1c4), | |
876 | REG(0x1c8), | |
877 | REG(0x180), | |
878 | REG16(0x2b4), | |
879 | ||
880 | NOP(5), | |
881 | LRI(9, POSTED), | |
882 | REG16(0x3a8), | |
883 | REG16(0x28c), | |
884 | REG16(0x288), | |
885 | REG16(0x284), | |
886 | REG16(0x280), | |
887 | REG16(0x27c), | |
888 | REG16(0x278), | |
889 | REG16(0x274), | |
890 | REG16(0x270), | |
891 | ||
892 | LRI(3, POSTED), | |
893 | REG(0x1b0), | |
894 | REG16(0x5a8), | |
895 | REG16(0x5ac), | |
896 | ||
897 | NOP(6), | |
898 | LRI(1, 0), | |
899 | REG(0x0c8), | |
900 | ||
d1813ca2 | 901 | END(80) |
7dc56af5 CW |
902 | }; |
903 | ||
904 | #undef END | |
905 | #undef REG16 | |
906 | #undef REG | |
907 | #undef LRI | |
908 | #undef NOP | |
909 | ||
910 | static const u8 *reg_offsets(const struct intel_engine_cs *engine) | |
911 | { | |
9d41318c DCS |
912 | /* |
913 | * The gen12+ lists only have the registers we program in the basic | |
914 | * default state. We rely on the context image using relative | |
915 | * addressing to automatic fixup the register state between the | |
916 | * physical engines for virtual engine. | |
917 | */ | |
918 | GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && | |
919 | !intel_engine_has_relative_mmio(engine)); | |
920 | ||
7dc56af5 CW |
921 | if (engine->class == RENDER_CLASS) { |
922 | if (INTEL_GEN(engine->i915) >= 12) | |
923 | return gen12_rcs_offsets; | |
924 | else if (INTEL_GEN(engine->i915) >= 11) | |
925 | return gen11_rcs_offsets; | |
6a505e64 CW |
926 | else if (INTEL_GEN(engine->i915) >= 9) |
927 | return gen9_rcs_offsets; | |
7dc56af5 CW |
928 | else |
929 | return gen8_rcs_offsets; | |
930 | } else { | |
931 | if (INTEL_GEN(engine->i915) >= 12) | |
932 | return gen12_xcs_offsets; | |
933 | else if (INTEL_GEN(engine->i915) >= 9) | |
934 | return gen9_xcs_offsets; | |
935 | else | |
936 | return gen8_xcs_offsets; | |
937 | } | |
938 | } | |
939 | ||
eb8d0f5a | 940 | static struct i915_request * |
4cc79cbb | 941 | __unwind_incomplete_requests(struct intel_engine_cs *engine) |
7e4992ac | 942 | { |
b16c7651 | 943 | struct i915_request *rq, *rn, *active = NULL; |
85f5e1f3 | 944 | struct list_head *uninitialized_var(pl); |
4cc79cbb | 945 | int prio = I915_PRIORITY_INVALID; |
7e4992ac | 946 | |
422d7df4 | 947 | lockdep_assert_held(&engine->active.lock); |
7e4992ac CW |
948 | |
949 | list_for_each_entry_safe_reverse(rq, rn, | |
422d7df4 CW |
950 | &engine->active.requests, |
951 | sched.link) { | |
e61e0f51 | 952 | if (i915_request_completed(rq)) |
22b7a426 | 953 | continue; /* XXX */ |
7e4992ac | 954 | |
e61e0f51 | 955 | __i915_request_unsubmit(rq); |
7e4992ac | 956 | |
6d06779e CW |
957 | /* |
958 | * Push the request back into the queue for later resubmission. | |
959 | * If this request is not native to this physical engine (i.e. | |
960 | * it came from a virtual source), push it back onto the virtual | |
961 | * engine so that it can be moved across onto another physical | |
962 | * engine as load dictates. | |
963 | */ | |
89b6d183 | 964 | if (likely(rq->execution_mask == engine->mask)) { |
6d06779e CW |
965 | GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); |
966 | if (rq_prio(rq) != prio) { | |
967 | prio = rq_prio(rq); | |
968 | pl = i915_sched_lookup_priolist(engine, prio); | |
969 | } | |
970 | GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); | |
b16c7651 | 971 | |
422d7df4 | 972 | list_move(&rq->sched.link, pl); |
672c368f CW |
973 | set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); |
974 | ||
6d06779e CW |
975 | active = rq; |
976 | } else { | |
9f3ccd40 | 977 | struct intel_engine_cs *owner = rq->context->engine; |
89b6d183 | 978 | |
7d6b60db CW |
979 | /* |
980 | * Decouple the virtual breadcrumb before moving it | |
981 | * back to the virtual engine -- we don't want the | |
982 | * request to complete in the background and try | |
983 | * and cancel the breadcrumb on the virtual engine | |
984 | * (instead of the old engine where it is linked)! | |
985 | */ | |
986 | if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, | |
987 | &rq->fence.flags)) { | |
08ad9a38 CW |
988 | spin_lock_nested(&rq->lock, |
989 | SINGLE_DEPTH_NESTING); | |
7d6b60db CW |
990 | i915_request_cancel_breadcrumb(rq); |
991 | spin_unlock(&rq->lock); | |
992 | } | |
3a55dc89 | 993 | WRITE_ONCE(rq->engine, owner); |
6d06779e CW |
994 | owner->submit_request(rq); |
995 | active = NULL; | |
996 | } | |
b16c7651 CW |
997 | } |
998 | ||
eb8d0f5a | 999 | return active; |
7e4992ac CW |
1000 | } |
1001 | ||
292ad25c | 1002 | struct i915_request * |
a4598d17 MW |
1003 | execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) |
1004 | { | |
1005 | struct intel_engine_cs *engine = | |
1006 | container_of(execlists, typeof(*engine), execlists); | |
1007 | ||
4cc79cbb | 1008 | return __unwind_incomplete_requests(engine); |
a4598d17 MW |
1009 | } |
1010 | ||
bbd6c47e | 1011 | static inline void |
e61e0f51 | 1012 | execlists_context_status_change(struct i915_request *rq, unsigned long status) |
84b790f8 | 1013 | { |
bbd6c47e CW |
1014 | /* |
1015 | * Only used when GVT-g is enabled now. When GVT-g is disabled, | |
1016 | * The compiler should eliminate this function as dead-code. | |
1017 | */ | |
1018 | if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) | |
1019 | return; | |
6daccb0b | 1020 | |
3fc03069 CD |
1021 | atomic_notifier_call_chain(&rq->engine->context_status_notifier, |
1022 | status, rq); | |
84b790f8 BW |
1023 | } |
1024 | ||
5932925a TU |
1025 | static void intel_engine_context_in(struct intel_engine_cs *engine) |
1026 | { | |
1027 | unsigned long flags; | |
1028 | ||
1029 | if (READ_ONCE(engine->stats.enabled) == 0) | |
1030 | return; | |
1031 | ||
1032 | write_seqlock_irqsave(&engine->stats.lock, flags); | |
1033 | ||
1034 | if (engine->stats.enabled > 0) { | |
1035 | if (engine->stats.active++ == 0) | |
1036 | engine->stats.start = ktime_get(); | |
1037 | GEM_BUG_ON(engine->stats.active == 0); | |
1038 | } | |
1039 | ||
1040 | write_sequnlock_irqrestore(&engine->stats.lock, flags); | |
1041 | } | |
1042 | ||
1043 | static void intel_engine_context_out(struct intel_engine_cs *engine) | |
1044 | { | |
1045 | unsigned long flags; | |
1046 | ||
1047 | if (READ_ONCE(engine->stats.enabled) == 0) | |
1048 | return; | |
1049 | ||
1050 | write_seqlock_irqsave(&engine->stats.lock, flags); | |
1051 | ||
1052 | if (engine->stats.enabled > 0) { | |
1053 | ktime_t last; | |
1054 | ||
1055 | if (engine->stats.active && --engine->stats.active == 0) { | |
1056 | /* | |
1057 | * Decrement the active context count and in case GPU | |
1058 | * is now idle add up to the running total. | |
1059 | */ | |
1060 | last = ktime_sub(ktime_get(), engine->stats.start); | |
1061 | ||
1062 | engine->stats.total = ktime_add(engine->stats.total, | |
1063 | last); | |
1064 | } else if (engine->stats.active == 0) { | |
1065 | /* | |
1066 | * After turning on engine stats, context out might be | |
1067 | * the first event in which case we account from the | |
1068 | * time stats gathering was turned on. | |
1069 | */ | |
1070 | last = ktime_sub(ktime_get(), engine->stats.enabled_at); | |
1071 | ||
1072 | engine->stats.total = ktime_add(engine->stats.total, | |
1073 | last); | |
1074 | } | |
1075 | } | |
1076 | ||
1077 | write_sequnlock_irqrestore(&engine->stats.lock, flags); | |
1078 | } | |
1079 | ||
b0b10248 CW |
1080 | static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) |
1081 | { | |
1082 | if (INTEL_GEN(engine->i915) >= 12) | |
1083 | return 0x60; | |
1084 | else if (INTEL_GEN(engine->i915) >= 9) | |
1085 | return 0x54; | |
1086 | else if (engine->class == RENDER_CLASS) | |
1087 | return 0x58; | |
1088 | else | |
1089 | return -1; | |
1090 | } | |
1091 | ||
1092 | static void | |
1093 | execlists_check_context(const struct intel_context *ce, | |
1094 | const struct intel_engine_cs *engine) | |
1095 | { | |
1096 | const struct intel_ring *ring = ce->ring; | |
1097 | u32 *regs = ce->lrc_reg_state; | |
1098 | bool valid = true; | |
1099 | int x; | |
1100 | ||
1101 | if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { | |
1102 | pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", | |
1103 | engine->name, | |
1104 | regs[CTX_RING_START], | |
1105 | i915_ggtt_offset(ring->vma)); | |
1106 | regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); | |
1107 | valid = false; | |
1108 | } | |
1109 | ||
1110 | if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != | |
1111 | (RING_CTL_SIZE(ring->size) | RING_VALID)) { | |
1112 | pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", | |
1113 | engine->name, | |
1114 | regs[CTX_RING_CTL], | |
1115 | (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); | |
1116 | regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; | |
1117 | valid = false; | |
1118 | } | |
1119 | ||
1120 | x = lrc_ring_mi_mode(engine); | |
1121 | if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { | |
1122 | pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", | |
1123 | engine->name, regs[x + 1]); | |
1124 | regs[x + 1] &= ~STOP_RING; | |
1125 | regs[x + 1] |= STOP_RING << 16; | |
1126 | valid = false; | |
1127 | } | |
1128 | ||
1129 | WARN_ONCE(!valid, "Invalid lrc state found before submission\n"); | |
1130 | } | |
1131 | ||
31b61f0e CW |
1132 | static void restore_default_state(struct intel_context *ce, |
1133 | struct intel_engine_cs *engine) | |
1134 | { | |
1135 | u32 *regs = ce->lrc_reg_state; | |
1136 | ||
1137 | if (engine->pinned_default_state) | |
1138 | memcpy(regs, /* skip restoring the vanilla PPHWSP */ | |
1139 | engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, | |
1140 | engine->context_size - PAGE_SIZE); | |
1141 | ||
1142 | execlists_init_reg_state(regs, ce, engine, ce->ring, false); | |
1143 | } | |
1144 | ||
1145 | static void reset_active(struct i915_request *rq, | |
1146 | struct intel_engine_cs *engine) | |
1147 | { | |
9f3ccd40 | 1148 | struct intel_context * const ce = rq->context; |
31b61f0e CW |
1149 | u32 head; |
1150 | ||
1151 | /* | |
1152 | * The executing context has been cancelled. We want to prevent | |
1153 | * further execution along this context and propagate the error on | |
1154 | * to anything depending on its results. | |
1155 | * | |
1156 | * In __i915_request_submit(), we apply the -EIO and remove the | |
1157 | * requests' payloads for any banned requests. But first, we must | |
1158 | * rewind the context back to the start of the incomplete request so | |
1159 | * that we do not jump back into the middle of the batch. | |
1160 | * | |
1161 | * We preserve the breadcrumbs and semaphores of the incomplete | |
1162 | * requests so that inter-timeline dependencies (i.e other timelines) | |
1163 | * remain correctly ordered. And we defer to __i915_request_submit() | |
1164 | * so that all asynchronous waits are correctly handled. | |
1165 | */ | |
639f2f24 VSD |
1166 | ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n", |
1167 | rq->fence.context, rq->fence.seqno); | |
31b61f0e CW |
1168 | |
1169 | /* On resubmission of the active request, payload will be scrubbed */ | |
1170 | if (i915_request_completed(rq)) | |
1171 | head = rq->tail; | |
1172 | else | |
1173 | head = active_request(ce->timeline, rq)->head; | |
42827350 | 1174 | head = intel_ring_wrap(ce->ring, head); |
31b61f0e CW |
1175 | |
1176 | /* Scrub the context image to prevent replaying the previous batch */ | |
1177 | restore_default_state(ce, engine); | |
42827350 | 1178 | __execlists_update_reg_state(ce, engine, head); |
31b61f0e CW |
1179 | |
1180 | /* We've switched away, so this should be a no-op, but intent matters */ | |
53b2622e | 1181 | ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; |
31b61f0e CW |
1182 | } |
1183 | ||
1883a0a4 TU |
1184 | static u32 intel_context_get_runtime(const struct intel_context *ce) |
1185 | { | |
1186 | /* | |
1187 | * We can use either ppHWSP[16] which is recorded before the context | |
1188 | * switch (and so excludes the cost of context switches) or use the | |
1189 | * value from the context image itself, which is saved/restored earlier | |
1190 | * and so includes the cost of the save. | |
1191 | */ | |
1192 | return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); | |
1193 | } | |
1194 | ||
cf274daa CW |
1195 | static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) |
1196 | { | |
1197 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) | |
1198 | ce->runtime.num_underflow += dt < 0; | |
1199 | ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); | |
1200 | #endif | |
1201 | } | |
1202 | ||
1883a0a4 TU |
1203 | static void intel_context_update_runtime(struct intel_context *ce) |
1204 | { | |
1205 | u32 old; | |
1206 | s32 dt; | |
1207 | ||
1208 | if (intel_context_is_barrier(ce)) | |
1209 | return; | |
1210 | ||
1211 | old = ce->runtime.last; | |
1212 | ce->runtime.last = intel_context_get_runtime(ce); | |
1213 | dt = ce->runtime.last - old; | |
1214 | ||
1215 | if (unlikely(dt <= 0)) { | |
1216 | CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", | |
1217 | old, ce->runtime.last, dt); | |
cf274daa | 1218 | st_update_runtime_underflow(ce, dt); |
1883a0a4 TU |
1219 | return; |
1220 | } | |
1221 | ||
1222 | ewma_runtime_add(&ce->runtime.avg, dt); | |
1223 | ce->runtime.total += dt; | |
1224 | } | |
1225 | ||
df403069 CW |
1226 | static inline struct intel_engine_cs * |
1227 | __execlists_schedule_in(struct i915_request *rq) | |
1228 | { | |
1229 | struct intel_engine_cs * const engine = rq->engine; | |
9f3ccd40 | 1230 | struct intel_context * const ce = rq->context; |
df403069 CW |
1231 | |
1232 | intel_context_get(ce); | |
1233 | ||
9f3ccd40 | 1234 | if (unlikely(intel_context_is_banned(ce))) |
31b61f0e CW |
1235 | reset_active(rq, engine); |
1236 | ||
b0b10248 | 1237 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
31b61f0e | 1238 | execlists_check_context(ce, engine); |
b0b10248 | 1239 | |
2935ed53 CW |
1240 | if (ce->tag) { |
1241 | /* Use a fixed tag for OA and friends */ | |
1bc6a601 | 1242 | GEM_BUG_ON(ce->tag <= BITS_PER_LONG); |
53b2622e | 1243 | ce->lrc.ccid = ce->tag; |
2935ed53 CW |
1244 | } else { |
1245 | /* We don't need a strict matching tag, just different values */ | |
1bc6a601 CW |
1246 | unsigned int tag = ffs(engine->context_tag); |
1247 | ||
1248 | GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); | |
1249 | clear_bit(tag - 1, &engine->context_tag); | |
1250 | ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32); | |
1251 | ||
1252 | BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); | |
2935ed53 CW |
1253 | } |
1254 | ||
53b2622e CW |
1255 | ce->lrc.ccid |= engine->execlists.ccid; |
1256 | ||
93b0e8fe | 1257 | __intel_gt_pm_get(engine->gt); |
df403069 CW |
1258 | execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); |
1259 | intel_engine_context_in(engine); | |
1260 | ||
1261 | return engine; | |
1262 | } | |
1263 | ||
22b7a426 CW |
1264 | static inline struct i915_request * |
1265 | execlists_schedule_in(struct i915_request *rq, int idx) | |
f2605207 | 1266 | { |
9f3ccd40 | 1267 | struct intel_context * const ce = rq->context; |
df403069 | 1268 | struct intel_engine_cs *old; |
f2605207 | 1269 | |
df403069 | 1270 | GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); |
22b7a426 | 1271 | trace_i915_request_in(rq, idx); |
f2605207 | 1272 | |
df403069 CW |
1273 | old = READ_ONCE(ce->inflight); |
1274 | do { | |
1275 | if (!old) { | |
1276 | WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq)); | |
1277 | break; | |
1278 | } | |
1279 | } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))); | |
22b7a426 | 1280 | |
22b7a426 | 1281 | GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); |
22b7a426 | 1282 | return i915_request_get(rq); |
73fd9d38 TU |
1283 | } |
1284 | ||
22b7a426 | 1285 | static void kick_siblings(struct i915_request *rq, struct intel_context *ce) |
78e41ddd | 1286 | { |
22b7a426 | 1287 | struct virtual_engine *ve = container_of(ce, typeof(*ve), context); |
78e41ddd CW |
1288 | struct i915_request *next = READ_ONCE(ve->request); |
1289 | ||
1290 | if (next && next->execution_mask & ~rq->execution_mask) | |
1291 | tasklet_schedule(&ve->base.execlists.tasklet); | |
1292 | } | |
1293 | ||
73fd9d38 | 1294 | static inline void |
df403069 | 1295 | __execlists_schedule_out(struct i915_request *rq, |
1bc6a601 CW |
1296 | struct intel_engine_cs * const engine, |
1297 | unsigned int ccid) | |
73fd9d38 | 1298 | { |
9f3ccd40 | 1299 | struct intel_context * const ce = rq->context; |
22b7a426 | 1300 | |
31b61f0e CW |
1301 | /* |
1302 | * NB process_csb() is not under the engine->active.lock and hence | |
1303 | * schedule_out can race with schedule_in meaning that we should | |
1304 | * refrain from doing non-trivial work here. | |
1305 | */ | |
1306 | ||
4f88f874 CW |
1307 | /* |
1308 | * If we have just completed this context, the engine may now be | |
1309 | * idle and we want to re-enter powersaving. | |
1310 | */ | |
875c3b4b | 1311 | if (list_is_last_rcu(&rq->link, &ce->timeline->requests) && |
4f88f874 CW |
1312 | i915_request_completed(rq)) |
1313 | intel_engine_add_retire(engine, ce->timeline); | |
1314 | ||
1bc6a601 CW |
1315 | ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; |
1316 | ccid &= GEN12_MAX_CONTEXT_HW_ID; | |
1317 | if (ccid < BITS_PER_LONG) { | |
1318 | GEM_BUG_ON(ccid == 0); | |
1319 | GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); | |
1320 | set_bit(ccid - 1, &engine->context_tag); | |
1321 | } | |
1322 | ||
1883a0a4 | 1323 | intel_context_update_runtime(ce); |
df403069 CW |
1324 | intel_engine_context_out(engine); |
1325 | execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); | |
07779a76 | 1326 | intel_gt_pm_put_async(engine->gt); |
22b7a426 | 1327 | |
df403069 CW |
1328 | /* |
1329 | * If this is part of a virtual engine, its next request may | |
1330 | * have been blocked waiting for access to the active context. | |
1331 | * We have to kick all the siblings again in case we need to | |
1332 | * switch (e.g. the next request is not runnable on this | |
1333 | * engine). Hopefully, we will already have submitted the next | |
1334 | * request before the tasklet runs and do not need to rebuild | |
1335 | * each virtual tree and kick everyone again. | |
1336 | */ | |
1337 | if (ce->engine != engine) | |
1338 | kick_siblings(rq, ce); | |
78e41ddd | 1339 | |
df403069 CW |
1340 | intel_context_put(ce); |
1341 | } | |
22b7a426 | 1342 | |
df403069 CW |
1343 | static inline void |
1344 | execlists_schedule_out(struct i915_request *rq) | |
1345 | { | |
9f3ccd40 | 1346 | struct intel_context * const ce = rq->context; |
df403069 | 1347 | struct intel_engine_cs *cur, *old; |
1bc6a601 | 1348 | u32 ccid; |
12fdaf19 | 1349 | |
df403069 | 1350 | trace_i915_request_out(rq); |
df403069 | 1351 | |
1bc6a601 | 1352 | ccid = rq->context->lrc.ccid; |
df403069 CW |
1353 | old = READ_ONCE(ce->inflight); |
1354 | do | |
1355 | cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; | |
1356 | while (!try_cmpxchg(&ce->inflight, &old, cur)); | |
1357 | if (!cur) | |
1bc6a601 | 1358 | __execlists_schedule_out(rq, old, ccid); |
22b7a426 CW |
1359 | |
1360 | i915_request_put(rq); | |
73fd9d38 TU |
1361 | } |
1362 | ||
82c69bf5 | 1363 | static u64 execlists_update_context(struct i915_request *rq) |
ae1250b9 | 1364 | { |
9f3ccd40 | 1365 | struct intel_context *ce = rq->context; |
53b2622e | 1366 | u64 desc = ce->lrc.desc; |
5ba32c7b | 1367 | u32 tail, prev; |
ae1250b9 | 1368 | |
82c69bf5 CW |
1369 | /* |
1370 | * WaIdleLiteRestore:bdw,skl | |
1371 | * | |
1372 | * We should never submit the context with the same RING_TAIL twice | |
1373 | * just in case we submit an empty ring, which confuses the HW. | |
1374 | * | |
1375 | * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of | |
1376 | * the normal request to be able to always advance the RING_TAIL on | |
1377 | * subsequent resubmissions (for lite restore). Should that fail us, | |
1378 | * and we try and submit the same tail again, force the context | |
1379 | * reload. | |
5ba32c7b CW |
1380 | * |
1381 | * If we need to return to a preempted context, we need to skip the | |
1382 | * lite-restore and force it to reload the RING_TAIL. Otherwise, the | |
1383 | * HW has a tendency to ignore us rewinding the TAIL to the end of | |
1384 | * an earlier request. | |
82c69bf5 CW |
1385 | */ |
1386 | tail = intel_ring_set_tail(rq->ring, rq->tail); | |
5ba32c7b CW |
1387 | prev = ce->lrc_reg_state[CTX_RING_TAIL]; |
1388 | if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) | |
82c69bf5 CW |
1389 | desc |= CTX_DESC_FORCE_RESTORE; |
1390 | ce->lrc_reg_state[CTX_RING_TAIL] = tail; | |
1391 | rq->tail = rq->wa_tail; | |
70c2a24d | 1392 | |
987abd5c CW |
1393 | /* |
1394 | * Make sure the context image is complete before we submit it to HW. | |
1395 | * | |
1396 | * Ostensibly, writes (including the WCB) should be flushed prior to | |
1397 | * an uncached write such as our mmio register access, the empirical | |
1398 | * evidence (esp. on Braswell) suggests that the WC write into memory | |
1399 | * may not be visible to the HW prior to the completion of the UC | |
1400 | * register write and that we may begin execution from the context | |
1401 | * before its image is complete leading to invalid PD chasing. | |
1402 | */ | |
69a48c1d | 1403 | wmb(); |
22b7a426 | 1404 | |
53b2622e | 1405 | ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; |
22b7a426 | 1406 | return desc; |
ae1250b9 OM |
1407 | } |
1408 | ||
05f0addd | 1409 | static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) |
beecec90 | 1410 | { |
05f0addd TD |
1411 | if (execlists->ctrl_reg) { |
1412 | writel(lower_32_bits(desc), execlists->submit_reg + port * 2); | |
1413 | writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); | |
1414 | } else { | |
1415 | writel(upper_32_bits(desc), execlists->submit_reg); | |
1416 | writel(lower_32_bits(desc), execlists->submit_reg); | |
1417 | } | |
beecec90 CW |
1418 | } |
1419 | ||
22b7a426 CW |
1420 | static __maybe_unused void |
1421 | trace_ports(const struct intel_engine_execlists *execlists, | |
1422 | const char *msg, | |
1423 | struct i915_request * const *ports) | |
1424 | { | |
1425 | const struct intel_engine_cs *engine = | |
1426 | container_of(execlists, typeof(*engine), execlists); | |
1427 | ||
198d2533 CW |
1428 | if (!ports[0]) |
1429 | return; | |
1430 | ||
639f2f24 VSD |
1431 | ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg, |
1432 | ports[0]->fence.context, | |
1433 | ports[0]->fence.seqno, | |
1434 | i915_request_completed(ports[0]) ? "!" : | |
1435 | i915_request_started(ports[0]) ? "*" : | |
1436 | "", | |
1437 | ports[1] ? ports[1]->fence.context : 0, | |
1438 | ports[1] ? ports[1]->fence.seqno : 0); | |
22b7a426 CW |
1439 | } |
1440 | ||
f1042cc8 CW |
1441 | static inline bool |
1442 | reset_in_progress(const struct intel_engine_execlists *execlists) | |
1443 | { | |
1444 | return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); | |
1445 | } | |
1446 | ||
22b7a426 CW |
1447 | static __maybe_unused bool |
1448 | assert_pending_valid(const struct intel_engine_execlists *execlists, | |
1449 | const char *msg) | |
1450 | { | |
1451 | struct i915_request * const *port, *rq; | |
1452 | struct intel_context *ce = NULL; | |
15db5fcc | 1453 | bool sentinel = false; |
22b7a426 CW |
1454 | |
1455 | trace_ports(execlists, msg, execlists->pending); | |
1456 | ||
f1042cc8 CW |
1457 | /* We may be messing around with the lists during reset, lalala */ |
1458 | if (reset_in_progress(execlists)) | |
1459 | return true; | |
1460 | ||
c97fb526 CW |
1461 | if (!execlists->pending[0]) { |
1462 | GEM_TRACE_ERR("Nothing pending for promotion!\n"); | |
df403069 | 1463 | return false; |
c97fb526 | 1464 | } |
df403069 | 1465 | |
c97fb526 CW |
1466 | if (execlists->pending[execlists_num_ports(execlists)]) { |
1467 | GEM_TRACE_ERR("Excess pending[%d] for promotion!\n", | |
1468 | execlists_num_ports(execlists)); | |
22b7a426 | 1469 | return false; |
c97fb526 | 1470 | } |
22b7a426 CW |
1471 | |
1472 | for (port = execlists->pending; (rq = *port); port++) { | |
c95d31c3 CW |
1473 | unsigned long flags; |
1474 | bool ok = true; | |
1475 | ||
80aac91b CW |
1476 | GEM_BUG_ON(!kref_read(&rq->fence.refcount)); |
1477 | GEM_BUG_ON(!i915_request_is_active(rq)); | |
1478 | ||
9f3ccd40 | 1479 | if (ce == rq->context) { |
38098750 CW |
1480 | GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n", |
1481 | ce->timeline->fence_context, | |
c97fb526 | 1482 | port - execlists->pending); |
22b7a426 | 1483 | return false; |
c97fb526 | 1484 | } |
9f3ccd40 | 1485 | ce = rq->context; |
c95d31c3 | 1486 | |
15db5fcc CW |
1487 | /* |
1488 | * Sentinels are supposed to be lonely so they flush the | |
1489 | * current exection off the HW. Check that they are the | |
1490 | * only request in the pending submission. | |
1491 | */ | |
1492 | if (sentinel) { | |
1493 | GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n", | |
1494 | ce->timeline->fence_context, | |
1495 | port - execlists->pending); | |
1496 | return false; | |
1497 | } | |
1498 | ||
1499 | sentinel = i915_request_has_sentinel(rq); | |
1500 | if (sentinel && port != execlists->pending) { | |
1501 | GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n", | |
1502 | ce->timeline->fence_context, | |
1503 | port - execlists->pending); | |
1504 | return false; | |
1505 | } | |
1506 | ||
c95d31c3 | 1507 | /* Hold tightly onto the lock to prevent concurrent retires! */ |
49e74c8f CW |
1508 | if (!spin_trylock_irqsave(&rq->lock, flags)) |
1509 | continue; | |
c95d31c3 | 1510 | |
22b7a426 | 1511 | if (i915_request_completed(rq)) |
c95d31c3 | 1512 | goto unlock; |
22b7a426 | 1513 | |
e6ba7648 CW |
1514 | if (i915_active_is_idle(&ce->active) && |
1515 | !intel_context_is_barrier(ce)) { | |
38098750 CW |
1516 | GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n", |
1517 | ce->timeline->fence_context, | |
c97fb526 | 1518 | port - execlists->pending); |
c95d31c3 CW |
1519 | ok = false; |
1520 | goto unlock; | |
c97fb526 CW |
1521 | } |
1522 | ||
1523 | if (!i915_vma_is_pinned(ce->state)) { | |
38098750 CW |
1524 | GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n", |
1525 | ce->timeline->fence_context, | |
c97fb526 | 1526 | port - execlists->pending); |
c95d31c3 CW |
1527 | ok = false; |
1528 | goto unlock; | |
c97fb526 | 1529 | } |
22b7a426 | 1530 | |
c97fb526 | 1531 | if (!i915_vma_is_pinned(ce->ring->vma)) { |
38098750 CW |
1532 | GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n", |
1533 | ce->timeline->fence_context, | |
c97fb526 | 1534 | port - execlists->pending); |
c95d31c3 CW |
1535 | ok = false; |
1536 | goto unlock; | |
c97fb526 | 1537 | } |
c95d31c3 CW |
1538 | |
1539 | unlock: | |
1540 | spin_unlock_irqrestore(&rq->lock, flags); | |
1541 | if (!ok) | |
1542 | return false; | |
22b7a426 CW |
1543 | } |
1544 | ||
1545 | return ce; | |
1546 | } | |
1547 | ||
70c2a24d | 1548 | static void execlists_submit_ports(struct intel_engine_cs *engine) |
bbd6c47e | 1549 | { |
05f0addd | 1550 | struct intel_engine_execlists *execlists = &engine->execlists; |
77f0d0e9 | 1551 | unsigned int n; |
bbd6c47e | 1552 | |
22b7a426 CW |
1553 | GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); |
1554 | ||
d78d3343 CW |
1555 | /* |
1556 | * We can skip acquiring intel_runtime_pm_get() here as it was taken | |
1557 | * on our behalf by the request (see i915_gem_mark_busy()) and it will | |
1558 | * not be relinquished until the device is idle (see | |
1559 | * i915_gem_idle_work_handler()). As a precaution, we make sure | |
1560 | * that all ELSP are drained i.e. we have processed the CSB, | |
1561 | * before allowing ourselves to idle and calling intel_runtime_pm_put(). | |
1562 | */ | |
5f22e5b3 | 1563 | GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); |
d78d3343 | 1564 | |
05f0addd TD |
1565 | /* |
1566 | * ELSQ note: the submit queue is not cleared after being submitted | |
1567 | * to the HW so we need to make sure we always clean it up. This is | |
1568 | * currently ensured by the fact that we always write the same number | |
1569 | * of elsq entries, keep this in mind before changing the loop below. | |
1570 | */ | |
1571 | for (n = execlists_num_ports(execlists); n--; ) { | |
22b7a426 | 1572 | struct i915_request *rq = execlists->pending[n]; |
bbd6c47e | 1573 | |
22b7a426 CW |
1574 | write_desc(execlists, |
1575 | rq ? execlists_update_context(rq) : 0, | |
1576 | n); | |
77f0d0e9 | 1577 | } |
05f0addd TD |
1578 | |
1579 | /* we need to manually load the submit queue */ | |
1580 | if (execlists->ctrl_reg) | |
1581 | writel(EL_CTRL_LOAD, execlists->ctrl_reg); | |
bbd6c47e CW |
1582 | } |
1583 | ||
1fc44d9b | 1584 | static bool ctx_single_port_submission(const struct intel_context *ce) |
84b790f8 | 1585 | { |
70c2a24d | 1586 | return (IS_ENABLED(CONFIG_DRM_I915_GVT) && |
9f3ccd40 | 1587 | intel_context_force_single_submission(ce)); |
70c2a24d | 1588 | } |
84b790f8 | 1589 | |
1fc44d9b CW |
1590 | static bool can_merge_ctx(const struct intel_context *prev, |
1591 | const struct intel_context *next) | |
70c2a24d CW |
1592 | { |
1593 | if (prev != next) | |
1594 | return false; | |
26720ab9 | 1595 | |
70c2a24d CW |
1596 | if (ctx_single_port_submission(prev)) |
1597 | return false; | |
26720ab9 | 1598 | |
70c2a24d | 1599 | return true; |
84b790f8 BW |
1600 | } |
1601 | ||
fa192d90 CW |
1602 | static unsigned long i915_request_flags(const struct i915_request *rq) |
1603 | { | |
1604 | return READ_ONCE(rq->fence.flags); | |
1605 | } | |
1606 | ||
c10c78ad CW |
1607 | static bool can_merge_rq(const struct i915_request *prev, |
1608 | const struct i915_request *next) | |
1609 | { | |
22b7a426 | 1610 | GEM_BUG_ON(prev == next); |
c10c78ad CW |
1611 | GEM_BUG_ON(!assert_priority_queue(prev, next)); |
1612 | ||
c0bb487d CW |
1613 | /* |
1614 | * We do not submit known completed requests. Therefore if the next | |
1615 | * request is already completed, we can pretend to merge it in | |
1616 | * with the previous context (and we will skip updating the ELSP | |
1617 | * and tracking). Thus hopefully keeping the ELSP full with active | |
1618 | * contexts, despite the best efforts of preempt-to-busy to confuse | |
1619 | * us. | |
1620 | */ | |
1621 | if (i915_request_completed(next)) | |
1622 | return true; | |
1623 | ||
fa192d90 | 1624 | if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) & |
72ff2b8d CW |
1625 | (BIT(I915_FENCE_FLAG_NOPREEMPT) | |
1626 | BIT(I915_FENCE_FLAG_SENTINEL)))) | |
d8ad5f52 CW |
1627 | return false; |
1628 | ||
9f3ccd40 | 1629 | if (!can_merge_ctx(prev->context, next->context)) |
c10c78ad CW |
1630 | return false; |
1631 | ||
1eaa251b | 1632 | GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); |
c10c78ad CW |
1633 | return true; |
1634 | } | |
1635 | ||
6d06779e CW |
1636 | static void virtual_update_register_offsets(u32 *regs, |
1637 | struct intel_engine_cs *engine) | |
1638 | { | |
d1813ca2 | 1639 | set_offsets(regs, reg_offsets(engine), engine, false); |
6d06779e CW |
1640 | } |
1641 | ||
1642 | static bool virtual_matches(const struct virtual_engine *ve, | |
1643 | const struct i915_request *rq, | |
1644 | const struct intel_engine_cs *engine) | |
1645 | { | |
754f7a0b | 1646 | const struct intel_engine_cs *inflight; |
6d06779e | 1647 | |
78e41ddd CW |
1648 | if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ |
1649 | return false; | |
1650 | ||
6d06779e CW |
1651 | /* |
1652 | * We track when the HW has completed saving the context image | |
1653 | * (i.e. when we have seen the final CS event switching out of | |
1654 | * the context) and must not overwrite the context image before | |
1655 | * then. This restricts us to only using the active engine | |
1656 | * while the previous virtualized request is inflight (so | |
1657 | * we reuse the register offsets). This is a very small | |
1658 | * hystersis on the greedy seelction algorithm. | |
1659 | */ | |
22b7a426 | 1660 | inflight = intel_context_inflight(&ve->context); |
754f7a0b | 1661 | if (inflight && inflight != engine) |
6d06779e CW |
1662 | return false; |
1663 | ||
1664 | return true; | |
1665 | } | |
1666 | ||
1667 | static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, | |
a97b786b | 1668 | struct i915_request *rq) |
6d06779e CW |
1669 | { |
1670 | struct intel_engine_cs *old = ve->siblings[0]; | |
1671 | ||
1672 | /* All unattached (rq->engine == old) must already be completed */ | |
1673 | ||
1674 | spin_lock(&old->breadcrumbs.irq_lock); | |
1675 | if (!list_empty(&ve->context.signal_link)) { | |
a97b786b CW |
1676 | list_del_init(&ve->context.signal_link); |
1677 | ||
1678 | /* | |
1679 | * We cannot acquire the new engine->breadcrumbs.irq_lock | |
1680 | * (as we are holding a breadcrumbs.irq_lock already), | |
1681 | * so attach this request to the signaler on submission. | |
1682 | * The queued irq_work will occur when we finally drop | |
1683 | * the engine->active.lock after dequeue. | |
1684 | */ | |
1685 | set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags); | |
1686 | ||
1687 | /* Also transfer the pending irq_work for the old breadcrumb. */ | |
1688 | intel_engine_signal_breadcrumbs(rq->engine); | |
6d06779e CW |
1689 | } |
1690 | spin_unlock(&old->breadcrumbs.irq_lock); | |
1691 | } | |
1692 | ||
f14f27b1 CW |
1693 | #define for_each_waiter(p__, rq__) \ |
1694 | list_for_each_entry_lockless(p__, \ | |
1695 | &(rq__)->sched.waiters_list, \ | |
1696 | wait_link) | |
1697 | ||
793c2261 | 1698 | #define for_each_signaler(p__, rq__) \ |
66940061 CW |
1699 | list_for_each_entry_rcu(p__, \ |
1700 | &(rq__)->sched.signalers_list, \ | |
1701 | signal_link) | |
793c2261 | 1702 | |
07bfe6bf | 1703 | static void defer_request(struct i915_request *rq, struct list_head * const pl) |
8ee36e04 | 1704 | { |
07bfe6bf | 1705 | LIST_HEAD(list); |
8ee36e04 CW |
1706 | |
1707 | /* | |
1708 | * We want to move the interrupted request to the back of | |
1709 | * the round-robin list (i.e. its priority level), but | |
1710 | * in doing so, we must then move all requests that were in | |
1711 | * flight and were waiting for the interrupted request to | |
1712 | * be run after it again. | |
1713 | */ | |
07bfe6bf CW |
1714 | do { |
1715 | struct i915_dependency *p; | |
8ee36e04 | 1716 | |
07bfe6bf CW |
1717 | GEM_BUG_ON(i915_request_is_active(rq)); |
1718 | list_move_tail(&rq->sched.link, pl); | |
8ee36e04 | 1719 | |
f14f27b1 | 1720 | for_each_waiter(p, rq) { |
07bfe6bf CW |
1721 | struct i915_request *w = |
1722 | container_of(p->waiter, typeof(*w), sched); | |
8ee36e04 | 1723 | |
a9d094dc CW |
1724 | if (p->flags & I915_DEPENDENCY_WEAK) |
1725 | continue; | |
1726 | ||
07bfe6bf CW |
1727 | /* Leave semaphores spinning on the other engines */ |
1728 | if (w->engine != rq->engine) | |
1729 | continue; | |
8ee36e04 | 1730 | |
07bfe6bf CW |
1731 | /* No waiter should start before its signaler */ |
1732 | GEM_BUG_ON(i915_request_started(w) && | |
1733 | !i915_request_completed(rq)); | |
8ee36e04 | 1734 | |
07bfe6bf | 1735 | GEM_BUG_ON(i915_request_is_active(w)); |
32ff621f CW |
1736 | if (!i915_request_is_ready(w)) |
1737 | continue; | |
8ee36e04 | 1738 | |
07bfe6bf CW |
1739 | if (rq_prio(w) < rq_prio(rq)) |
1740 | continue; | |
1741 | ||
1742 | GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); | |
1743 | list_move_tail(&w->sched.link, &list); | |
1744 | } | |
1745 | ||
1746 | rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); | |
1747 | } while (rq); | |
8ee36e04 CW |
1748 | } |
1749 | ||
1750 | static void defer_active(struct intel_engine_cs *engine) | |
1751 | { | |
1752 | struct i915_request *rq; | |
1753 | ||
1754 | rq = __unwind_incomplete_requests(engine); | |
1755 | if (!rq) | |
1756 | return; | |
1757 | ||
1758 | defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); | |
1759 | } | |
1760 | ||
1761 | static bool | |
220dcfc1 CW |
1762 | need_timeslice(const struct intel_engine_cs *engine, |
1763 | const struct i915_request *rq) | |
8ee36e04 CW |
1764 | { |
1765 | int hint; | |
1766 | ||
b79029b2 | 1767 | if (!intel_engine_has_timeslices(engine)) |
09975b86 CW |
1768 | return false; |
1769 | ||
3df2deed CW |
1770 | hint = engine->execlists.queue_priority_hint; |
1771 | if (!list_is_last(&rq->sched.link, &engine->active.requests)) | |
1772 | hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); | |
8ee36e04 | 1773 | |
ad9e3792 | 1774 | return hint >= effective_prio(rq); |
8ee36e04 CW |
1775 | } |
1776 | ||
220dcfc1 CW |
1777 | static bool |
1778 | timeslice_yield(const struct intel_engine_execlists *el, | |
1779 | const struct i915_request *rq) | |
1780 | { | |
1781 | /* | |
1782 | * Once bitten, forever smitten! | |
1783 | * | |
1784 | * If the active context ever busy-waited on a semaphore, | |
1785 | * it will be treated as a hog until the end of its timeslice (i.e. | |
1786 | * until it is scheduled out and replaced by a new submission, | |
1787 | * possibly even its own lite-restore). The HW only sends an interrupt | |
1788 | * on the first miss, and we do know if that semaphore has been | |
1789 | * signaled, or even if it is now stuck on another semaphore. Play | |
1790 | * safe, yield if it might be stuck -- it will be given a fresh | |
1791 | * timeslice in the near future. | |
1792 | */ | |
53b2622e | 1793 | return rq->context->lrc.ccid == READ_ONCE(el->yield); |
220dcfc1 CW |
1794 | } |
1795 | ||
1796 | static bool | |
1797 | timeslice_expired(const struct intel_engine_execlists *el, | |
1798 | const struct i915_request *rq) | |
1799 | { | |
1800 | return timer_expired(&el->timer) || timeslice_yield(el, rq); | |
1801 | } | |
1802 | ||
df403069 CW |
1803 | static int |
1804 | switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) | |
1805 | { | |
1806 | if (list_is_last(&rq->sched.link, &engine->active.requests)) | |
1807 | return INT_MIN; | |
1808 | ||
1809 | return rq_prio(list_next_entry(rq, sched.link)); | |
1810 | } | |
1811 | ||
b79029b2 CW |
1812 | static inline unsigned long |
1813 | timeslice(const struct intel_engine_cs *engine) | |
1814 | { | |
1815 | return READ_ONCE(engine->props.timeslice_duration_ms); | |
1816 | } | |
1817 | ||
220dcfc1 | 1818 | static unsigned long active_timeslice(const struct intel_engine_cs *engine) |
8ee36e04 | 1819 | { |
23a44ae9 CW |
1820 | const struct intel_engine_execlists *execlists = &engine->execlists; |
1821 | const struct i915_request *rq = *execlists->active; | |
df403069 | 1822 | |
6b7133b6 | 1823 | if (!rq || i915_request_completed(rq)) |
b79029b2 | 1824 | return 0; |
8ee36e04 | 1825 | |
23a44ae9 | 1826 | if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq)) |
b79029b2 CW |
1827 | return 0; |
1828 | ||
1829 | return timeslice(engine); | |
1830 | } | |
1831 | ||
1832 | static void set_timeslice(struct intel_engine_cs *engine) | |
1833 | { | |
1834 | if (!intel_engine_has_timeslices(engine)) | |
1835 | return; | |
1836 | ||
1837 | set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); | |
8ee36e04 CW |
1838 | } |
1839 | ||
3df2deed CW |
1840 | static void start_timeslice(struct intel_engine_cs *engine) |
1841 | { | |
1842 | struct intel_engine_execlists *execlists = &engine->execlists; | |
23a44ae9 | 1843 | int prio = queue_prio(execlists); |
3df2deed | 1844 | |
23a44ae9 CW |
1845 | WRITE_ONCE(execlists->switch_priority_hint, prio); |
1846 | if (prio == INT_MIN) | |
1847 | return; | |
3df2deed CW |
1848 | |
1849 | if (timer_pending(&execlists->timer)) | |
1850 | return; | |
1851 | ||
1852 | set_timer_ms(&execlists->timer, timeslice(engine)); | |
1853 | } | |
1854 | ||
58d1b427 CW |
1855 | static void record_preemption(struct intel_engine_execlists *execlists) |
1856 | { | |
1857 | (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); | |
1858 | } | |
1859 | ||
60ef5b7a CW |
1860 | static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, |
1861 | const struct i915_request *rq) | |
3a7a92ab | 1862 | { |
3a7a92ab CW |
1863 | if (!rq) |
1864 | return 0; | |
1865 | ||
d12acee8 | 1866 | /* Force a fast reset for terminated contexts (ignoring sysfs!) */ |
9f3ccd40 | 1867 | if (unlikely(intel_context_is_banned(rq->context))) |
d12acee8 CW |
1868 | return 1; |
1869 | ||
3a7a92ab CW |
1870 | return READ_ONCE(engine->props.preempt_timeout_ms); |
1871 | } | |
1872 | ||
60ef5b7a CW |
1873 | static void set_preempt_timeout(struct intel_engine_cs *engine, |
1874 | const struct i915_request *rq) | |
3a7a92ab CW |
1875 | { |
1876 | if (!intel_engine_has_preempt_reset(engine)) | |
1877 | return; | |
1878 | ||
1879 | set_timer_ms(&engine->execlists.preempt, | |
60ef5b7a | 1880 | active_preempt_timeout(engine, rq)); |
3a7a92ab CW |
1881 | } |
1882 | ||
ab17e6ca CW |
1883 | static inline void clear_ports(struct i915_request **ports, int count) |
1884 | { | |
1885 | memset_p((void **)ports, NULL, count); | |
1886 | } | |
1887 | ||
9512f985 | 1888 | static void execlists_dequeue(struct intel_engine_cs *engine) |
acdd884a | 1889 | { |
7a62cc61 | 1890 | struct intel_engine_execlists * const execlists = &engine->execlists; |
22b7a426 CW |
1891 | struct i915_request **port = execlists->pending; |
1892 | struct i915_request ** const last_port = port + execlists->port_mask; | |
60ef5b7a | 1893 | struct i915_request * const *active; |
22b7a426 | 1894 | struct i915_request *last; |
20311bd3 | 1895 | struct rb_node *rb; |
70c2a24d CW |
1896 | bool submit = false; |
1897 | ||
9512f985 CW |
1898 | /* |
1899 | * Hardware submission is through 2 ports. Conceptually each port | |
70c2a24d CW |
1900 | * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is |
1901 | * static for a context, and unique to each, so we only execute | |
1902 | * requests belonging to a single context from each ring. RING_HEAD | |
1903 | * is maintained by the CS in the context image, it marks the place | |
1904 | * where it got up to last time, and through RING_TAIL we tell the CS | |
1905 | * where we want to execute up to this time. | |
1906 | * | |
1907 | * In this list the requests are in order of execution. Consecutive | |
1908 | * requests from the same context are adjacent in the ringbuffer. We | |
1909 | * can combine these requests into a single RING_TAIL update: | |
1910 | * | |
1911 | * RING_HEAD...req1...req2 | |
1912 | * ^- RING_TAIL | |
1913 | * since to execute req2 the CS must first execute req1. | |
1914 | * | |
1915 | * Our goal then is to point each port to the end of a consecutive | |
1916 | * sequence of requests as being the most optimal (fewest wake ups | |
1917 | * and context switches) submission. | |
779949f4 | 1918 | */ |
acdd884a | 1919 | |
6d06779e CW |
1920 | for (rb = rb_first_cached(&execlists->virtual); rb; ) { |
1921 | struct virtual_engine *ve = | |
1922 | rb_entry(rb, typeof(*ve), nodes[engine->id].rb); | |
1923 | struct i915_request *rq = READ_ONCE(ve->request); | |
1924 | ||
1925 | if (!rq) { /* lazily cleanup after another engine handled rq */ | |
1926 | rb_erase_cached(rb, &execlists->virtual); | |
1927 | RB_CLEAR_NODE(rb); | |
1928 | rb = rb_first_cached(&execlists->virtual); | |
1929 | continue; | |
1930 | } | |
1931 | ||
1932 | if (!virtual_matches(ve, rq, engine)) { | |
1933 | rb = rb_next(rb); | |
1934 | continue; | |
1935 | } | |
1936 | ||
1937 | break; | |
1938 | } | |
1939 | ||
22b7a426 CW |
1940 | /* |
1941 | * If the queue is higher priority than the last | |
1942 | * request in the currently active context, submit afresh. | |
1943 | * We will resubmit again afterwards in case we need to split | |
1944 | * the active context to interject the preemption request, | |
1945 | * i.e. we will retrigger preemption following the ack in case | |
1946 | * of trouble. | |
1947 | */ | |
60ef5b7a CW |
1948 | active = READ_ONCE(execlists->active); |
1949 | while ((last = *active) && i915_request_completed(last)) | |
1950 | active++; | |
1951 | ||
beecec90 | 1952 | if (last) { |
6d06779e | 1953 | if (need_preempt(engine, last, rb)) { |
639f2f24 VSD |
1954 | ENGINE_TRACE(engine, |
1955 | "preempting last=%llx:%lld, prio=%d, hint=%d\n", | |
1956 | last->fence.context, | |
1957 | last->fence.seqno, | |
1958 | last->sched.attr.priority, | |
1959 | execlists->queue_priority_hint); | |
58d1b427 CW |
1960 | record_preemption(execlists); |
1961 | ||
22b7a426 CW |
1962 | /* |
1963 | * Don't let the RING_HEAD advance past the breadcrumb | |
1964 | * as we unwind (and until we resubmit) so that we do | |
1965 | * not accidentally tell it to go backwards. | |
1966 | */ | |
1967 | ring_set_paused(engine, 1); | |
f6322edd | 1968 | |
22b7a426 CW |
1969 | /* |
1970 | * Note that we have not stopped the GPU at this point, | |
1971 | * so we are unwinding the incomplete requests as they | |
1972 | * remain inflight and so by the time we do complete | |
1973 | * the preemption, some of the unwound requests may | |
1974 | * complete! | |
1975 | */ | |
1976 | __unwind_incomplete_requests(engine); | |
f6322edd | 1977 | |
22b7a426 | 1978 | last = NULL; |
8ee36e04 | 1979 | } else if (need_timeslice(engine, last) && |
220dcfc1 | 1980 | timeslice_expired(execlists, last)) { |
639f2f24 | 1981 | ENGINE_TRACE(engine, |
220dcfc1 | 1982 | "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", |
639f2f24 VSD |
1983 | last->fence.context, |
1984 | last->fence.seqno, | |
1985 | last->sched.attr.priority, | |
220dcfc1 CW |
1986 | execlists->queue_priority_hint, |
1987 | yesno(timeslice_yield(execlists, last))); | |
8ee36e04 CW |
1988 | |
1989 | ring_set_paused(engine, 1); | |
1990 | defer_active(engine); | |
1991 | ||
1992 | /* | |
1993 | * Unlike for preemption, if we rewind and continue | |
1994 | * executing the same context as previously active, | |
1995 | * the order of execution will remain the same and | |
1996 | * the tail will only advance. We do not need to | |
1997 | * force a full context restore, as a lite-restore | |
1998 | * is sufficient to resample the monotonic TAIL. | |
1999 | * | |
2000 | * If we switch to any other context, similarly we | |
2001 | * will not rewind TAIL of current context, and | |
2002 | * normal save/restore will preserve state and allow | |
2003 | * us to later continue executing the same request. | |
2004 | */ | |
2005 | last = NULL; | |
22b7a426 CW |
2006 | } else { |
2007 | /* | |
2008 | * Otherwise if we already have a request pending | |
2009 | * for execution after the current one, we can | |
2010 | * just wait until the next CS event before | |
2011 | * queuing more. In either case we will force a | |
2012 | * lite-restore preemption event, but if we wait | |
2013 | * we hopefully coalesce several updates into a single | |
2014 | * submission. | |
2015 | */ | |
2016 | if (!list_is_last(&last->sched.link, | |
253a774b CW |
2017 | &engine->active.requests)) { |
2018 | /* | |
2019 | * Even if ELSP[1] is occupied and not worthy | |
2020 | * of timeslices, our queue might be. | |
2021 | */ | |
3df2deed | 2022 | start_timeslice(engine); |
22b7a426 | 2023 | return; |
253a774b | 2024 | } |
22b7a426 | 2025 | } |
beecec90 CW |
2026 | } |
2027 | ||
6d06779e CW |
2028 | while (rb) { /* XXX virtual is always taking precedence */ |
2029 | struct virtual_engine *ve = | |
2030 | rb_entry(rb, typeof(*ve), nodes[engine->id].rb); | |
2031 | struct i915_request *rq; | |
2032 | ||
422d7df4 | 2033 | spin_lock(&ve->base.active.lock); |
6d06779e CW |
2034 | |
2035 | rq = ve->request; | |
2036 | if (unlikely(!rq)) { /* lost the race to a sibling */ | |
422d7df4 | 2037 | spin_unlock(&ve->base.active.lock); |
6d06779e CW |
2038 | rb_erase_cached(rb, &execlists->virtual); |
2039 | RB_CLEAR_NODE(rb); | |
2040 | rb = rb_first_cached(&execlists->virtual); | |
2041 | continue; | |
2042 | } | |
2043 | ||
2044 | GEM_BUG_ON(rq != ve->request); | |
2045 | GEM_BUG_ON(rq->engine != &ve->base); | |
9f3ccd40 | 2046 | GEM_BUG_ON(rq->context != &ve->context); |
6d06779e CW |
2047 | |
2048 | if (rq_prio(rq) >= queue_prio(execlists)) { | |
2049 | if (!virtual_matches(ve, rq, engine)) { | |
422d7df4 | 2050 | spin_unlock(&ve->base.active.lock); |
6d06779e CW |
2051 | rb = rb_next(rb); |
2052 | continue; | |
2053 | } | |
2054 | ||
2055 | if (last && !can_merge_rq(last, rq)) { | |
422d7df4 | 2056 | spin_unlock(&ve->base.active.lock); |
3df2deed CW |
2057 | start_timeslice(engine); |
2058 | return; /* leave this for another sibling */ | |
6d06779e CW |
2059 | } |
2060 | ||
639f2f24 VSD |
2061 | ENGINE_TRACE(engine, |
2062 | "virtual rq=%llx:%lld%s, new engine? %s\n", | |
2063 | rq->fence.context, | |
2064 | rq->fence.seqno, | |
2065 | i915_request_completed(rq) ? "!" : | |
2066 | i915_request_started(rq) ? "*" : | |
2067 | "", | |
2068 | yesno(engine != ve->siblings[0])); | |
6d06779e | 2069 | |
3a55dc89 CW |
2070 | WRITE_ONCE(ve->request, NULL); |
2071 | WRITE_ONCE(ve->base.execlists.queue_priority_hint, | |
2072 | INT_MIN); | |
6d06779e CW |
2073 | rb_erase_cached(rb, &execlists->virtual); |
2074 | RB_CLEAR_NODE(rb); | |
2075 | ||
ee113690 | 2076 | GEM_BUG_ON(!(rq->execution_mask & engine->mask)); |
3a55dc89 | 2077 | WRITE_ONCE(rq->engine, engine); |
6d06779e CW |
2078 | |
2079 | if (engine != ve->siblings[0]) { | |
2080 | u32 *regs = ve->context.lrc_reg_state; | |
2081 | unsigned int n; | |
2082 | ||
754f7a0b | 2083 | GEM_BUG_ON(READ_ONCE(ve->context.inflight)); |
cdb736fa MK |
2084 | |
2085 | if (!intel_engine_has_relative_mmio(engine)) | |
2086 | virtual_update_register_offsets(regs, | |
2087 | engine); | |
6d06779e CW |
2088 | |
2089 | if (!list_empty(&ve->context.signals)) | |
a97b786b | 2090 | virtual_xfer_breadcrumbs(ve, rq); |
6d06779e CW |
2091 | |
2092 | /* | |
2093 | * Move the bound engine to the top of the list | |
2094 | * for future execution. We then kick this | |
2095 | * tasklet first before checking others, so that | |
2096 | * we preferentially reuse this set of bound | |
2097 | * registers. | |
2098 | */ | |
2099 | for (n = 1; n < ve->num_siblings; n++) { | |
2100 | if (ve->siblings[n] == engine) { | |
2101 | swap(ve->siblings[n], | |
2102 | ve->siblings[0]); | |
2103 | break; | |
2104 | } | |
2105 | } | |
2106 | ||
2107 | GEM_BUG_ON(ve->siblings[0] != engine); | |
2108 | } | |
2109 | ||
c0bb487d | 2110 | if (__i915_request_submit(rq)) { |
22b7a426 CW |
2111 | submit = true; |
2112 | last = rq; | |
2113 | } | |
b647c7df | 2114 | i915_request_put(rq); |
c0bb487d CW |
2115 | |
2116 | /* | |
2117 | * Hmm, we have a bunch of virtual engine requests, | |
2118 | * but the first one was already completed (thanks | |
2119 | * preempt-to-busy!). Keep looking at the veng queue | |
2120 | * until we have no more relevant requests (i.e. | |
2121 | * the normal submit queue has higher priority). | |
2122 | */ | |
2123 | if (!submit) { | |
2124 | spin_unlock(&ve->base.active.lock); | |
2125 | rb = rb_first_cached(&execlists->virtual); | |
2126 | continue; | |
2127 | } | |
6d06779e CW |
2128 | } |
2129 | ||
422d7df4 | 2130 | spin_unlock(&ve->base.active.lock); |
6d06779e CW |
2131 | break; |
2132 | } | |
2133 | ||
655250a8 | 2134 | while ((rb = rb_first_cached(&execlists->queue))) { |
f6322edd | 2135 | struct i915_priolist *p = to_priolist(rb); |
e61e0f51 | 2136 | struct i915_request *rq, *rn; |
85f5e1f3 | 2137 | int i; |
6c067579 | 2138 | |
85f5e1f3 | 2139 | priolist_for_each_request_consume(rq, rn, p, i) { |
c0bb487d | 2140 | bool merge = true; |
22b7a426 | 2141 | |
6c067579 CW |
2142 | /* |
2143 | * Can we combine this request with the current port? | |
2144 | * It has to be the same context/ringbuffer and not | |
2145 | * have any exceptions (e.g. GVT saying never to | |
2146 | * combine contexts). | |
2147 | * | |
2148 | * If we can combine the requests, we can execute both | |
2149 | * by updating the RING_TAIL to point to the end of the | |
2150 | * second request, and so we never need to tell the | |
2151 | * hardware about the first. | |
70c2a24d | 2152 | */ |
c10c78ad | 2153 | if (last && !can_merge_rq(last, rq)) { |
6c067579 CW |
2154 | /* |
2155 | * If we are on the second port and cannot | |
2156 | * combine this request with the last, then we | |
2157 | * are done. | |
2158 | */ | |
85f5e1f3 | 2159 | if (port == last_port) |
6c067579 | 2160 | goto done; |
6c067579 | 2161 | |
c10c78ad CW |
2162 | /* |
2163 | * We must not populate both ELSP[] with the | |
2164 | * same LRCA, i.e. we must submit 2 different | |
2165 | * contexts if we submit 2 ELSP. | |
2166 | */ | |
9f3ccd40 | 2167 | if (last->context == rq->context) |
c10c78ad CW |
2168 | goto done; |
2169 | ||
c3eb54aa CW |
2170 | if (i915_request_has_sentinel(last)) |
2171 | goto done; | |
2172 | ||
6c067579 CW |
2173 | /* |
2174 | * If GVT overrides us we only ever submit | |
2175 | * port[0], leaving port[1] empty. Note that we | |
2176 | * also have to be careful that we don't queue | |
2177 | * the same context (even though a different | |
2178 | * request) to the second port. | |
2179 | */ | |
9f3ccd40 CW |
2180 | if (ctx_single_port_submission(last->context) || |
2181 | ctx_single_port_submission(rq->context)) | |
6c067579 | 2182 | goto done; |
6c067579 | 2183 | |
c0bb487d | 2184 | merge = false; |
6c067579 | 2185 | } |
70c2a24d | 2186 | |
c0bb487d CW |
2187 | if (__i915_request_submit(rq)) { |
2188 | if (!merge) { | |
2189 | *port = execlists_schedule_in(last, port - execlists->pending); | |
2190 | port++; | |
2191 | last = NULL; | |
2192 | } | |
2193 | ||
2194 | GEM_BUG_ON(last && | |
9f3ccd40 CW |
2195 | !can_merge_ctx(last->context, |
2196 | rq->context)); | |
1eaa251b CW |
2197 | GEM_BUG_ON(last && |
2198 | i915_seqno_passed(last->fence.seqno, | |
2199 | rq->fence.seqno)); | |
c0bb487d CW |
2200 | |
2201 | submit = true; | |
2202 | last = rq; | |
2203 | } | |
70c2a24d | 2204 | } |
d55ac5bf | 2205 | |
655250a8 | 2206 | rb_erase_cached(&p->node, &execlists->queue); |
32eb6bcf | 2207 | i915_priolist_free(p); |
f6322edd | 2208 | } |
15c83c43 | 2209 | |
6c067579 | 2210 | done: |
15c83c43 CW |
2211 | /* |
2212 | * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. | |
2213 | * | |
4d97cbe0 | 2214 | * We choose the priority hint such that if we add a request of greater |
15c83c43 CW |
2215 | * priority than this, we kick the submission tasklet to decide on |
2216 | * the right order of submitting the requests to hardware. We must | |
2217 | * also be prepared to reorder requests as they are in-flight on the | |
4d97cbe0 | 2218 | * HW. We derive the priority hint then as the first "hole" in |
15c83c43 CW |
2219 | * the HW submission ports and if there are no available slots, |
2220 | * the priority of the lowest executing request, i.e. last. | |
2221 | * | |
2222 | * When we do receive a higher priority request ready to run from the | |
4d97cbe0 | 2223 | * user, see queue_request(), the priority hint is bumped to that |
15c83c43 CW |
2224 | * request triggering preemption on the next dequeue (or subsequent |
2225 | * interrupt for secondary ports). | |
2226 | */ | |
c10c78ad | 2227 | execlists->queue_priority_hint = queue_prio(execlists); |
15c83c43 | 2228 | |
0b02befa | 2229 | if (submit) { |
22b7a426 | 2230 | *port = execlists_schedule_in(last, port - execlists->pending); |
df403069 CW |
2231 | execlists->switch_priority_hint = |
2232 | switch_prio(engine, *execlists->pending); | |
44d0a9c0 CW |
2233 | |
2234 | /* | |
2235 | * Skip if we ended up with exactly the same set of requests, | |
2236 | * e.g. trying to timeslice a pair of ordered contexts | |
2237 | */ | |
60ef5b7a | 2238 | if (!memcmp(active, execlists->pending, |
44d0a9c0 CW |
2239 | (port - execlists->pending + 1) * sizeof(*port))) { |
2240 | do | |
2241 | execlists_schedule_out(fetch_and_zero(port)); | |
2242 | while (port-- != execlists->pending); | |
2243 | ||
2244 | goto skip_submit; | |
2245 | } | |
ab17e6ca | 2246 | clear_ports(port + 1, last_port - port); |
44d0a9c0 | 2247 | |
220dcfc1 | 2248 | WRITE_ONCE(execlists->yield, -1); |
a20ab592 | 2249 | execlists_submit_ports(engine); |
60ef5b7a | 2250 | set_preempt_timeout(engine, *active); |
8db7933e | 2251 | } else { |
44d0a9c0 | 2252 | skip_submit: |
8db7933e | 2253 | ring_set_paused(engine, 0); |
0b02befa | 2254 | } |
4413c474 CW |
2255 | } |
2256 | ||
5f15c1e6 CW |
2257 | static void |
2258 | cancel_port_requests(struct intel_engine_execlists * const execlists) | |
cf4591d1 | 2259 | { |
da0ef77e | 2260 | struct i915_request * const *port; |
702791f7 | 2261 | |
da0ef77e CW |
2262 | for (port = execlists->pending; *port; port++) |
2263 | execlists_schedule_out(*port); | |
ab17e6ca | 2264 | clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); |
7e44fc28 | 2265 | |
331bf905 | 2266 | /* Mark the end of active before we overwrite *active */ |
da0ef77e CW |
2267 | for (port = xchg(&execlists->active, execlists->pending); *port; port++) |
2268 | execlists_schedule_out(*port); | |
ab17e6ca CW |
2269 | clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); |
2270 | ||
f494960d | 2271 | smp_wmb(); /* complete the seqlock for execlists_active() */ |
ab17e6ca | 2272 | WRITE_ONCE(execlists->active, execlists->inflight); |
cf4591d1 MK |
2273 | } |
2274 | ||
d8f50531 MK |
2275 | static inline void |
2276 | invalidate_csb_entries(const u32 *first, const u32 *last) | |
2277 | { | |
2278 | clflush((void *)first); | |
2279 | clflush((void *)last); | |
2280 | } | |
2281 | ||
f4785682 DCS |
2282 | /* |
2283 | * Starting with Gen12, the status has a new format: | |
2284 | * | |
2285 | * bit 0: switched to new queue | |
2286 | * bit 1: reserved | |
2287 | * bit 2: semaphore wait mode (poll or signal), only valid when | |
2288 | * switch detail is set to "wait on semaphore" | |
2289 | * bits 3-5: engine class | |
2290 | * bits 6-11: engine instance | |
2291 | * bits 12-14: reserved | |
2292 | * bits 15-25: sw context id of the lrc the GT switched to | |
2293 | * bits 26-31: sw counter of the lrc the GT switched to | |
2294 | * bits 32-35: context switch detail | |
2295 | * - 0: ctx complete | |
2296 | * - 1: wait on sync flip | |
2297 | * - 2: wait on vblank | |
2298 | * - 3: wait on scanline | |
2299 | * - 4: wait on semaphore | |
2300 | * - 5: context preempted (not on SEMAPHORE_WAIT or | |
2301 | * WAIT_FOR_EVENT) | |
2302 | * bit 36: reserved | |
2303 | * bits 37-43: wait detail (for switch detail 1 to 4) | |
2304 | * bits 44-46: reserved | |
2305 | * bits 47-57: sw context id of the lrc the GT switched away from | |
2306 | * bits 58-63: sw counter of the lrc the GT switched away from | |
2307 | */ | |
198d2533 | 2308 | static inline bool |
f4785682 DCS |
2309 | gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) |
2310 | { | |
2311 | u32 lower_dw = csb[0]; | |
2312 | u32 upper_dw = csb[1]; | |
2313 | bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw); | |
2314 | bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw); | |
2315 | bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; | |
2316 | ||
f4785682 DCS |
2317 | /* |
2318 | * The context switch detail is not guaranteed to be 5 when a preemption | |
2319 | * occurs, so we can't just check for that. The check below works for | |
2320 | * all the cases we care about, including preemptions of WAIT | |
2321 | * instructions and lite-restore. Preempt-to-idle via the CTRL register | |
2322 | * would require some extra handling, but we don't support that. | |
2323 | */ | |
f9d4eae2 CW |
2324 | if (!ctx_away_valid || new_queue) { |
2325 | GEM_BUG_ON(!ctx_to_valid); | |
198d2533 | 2326 | return true; |
f9d4eae2 | 2327 | } |
f4785682 DCS |
2328 | |
2329 | /* | |
2330 | * switch detail = 5 is covered by the case above and we do not expect a | |
2331 | * context switch on an unsuccessful wait instruction since we always | |
2332 | * use polling mode. | |
2333 | */ | |
2334 | GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); | |
198d2533 | 2335 | return false; |
f4785682 DCS |
2336 | } |
2337 | ||
198d2533 | 2338 | static inline bool |
f4785682 | 2339 | gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) |
8759aa4c | 2340 | { |
198d2533 | 2341 | return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); |
8759aa4c CW |
2342 | } |
2343 | ||
73377dbc | 2344 | static void process_csb(struct intel_engine_cs *engine) |
e981e7b1 | 2345 | { |
b620e870 | 2346 | struct intel_engine_execlists * const execlists = &engine->execlists; |
bc4237ec | 2347 | const u32 * const buf = execlists->csb_status; |
7d4c75d9 | 2348 | const u8 num_entries = execlists->csb_size; |
bc4237ec | 2349 | u8 head, tail; |
c6a2ac71 | 2350 | |
3c00660d CW |
2351 | /* |
2352 | * As we modify our execlists state tracking we require exclusive | |
2353 | * access. Either we are inside the tasklet, or the tasklet is disabled | |
2354 | * and we assume that is only inside the reset paths and so serialised. | |
2355 | */ | |
2356 | GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && | |
2357 | !reset_in_progress(execlists)); | |
19c17b76 | 2358 | GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); |
c9a64622 | 2359 | |
bc4237ec CW |
2360 | /* |
2361 | * Note that csb_write, csb_status may be either in HWSP or mmio. | |
2362 | * When reading from the csb_write mmio register, we have to be | |
2363 | * careful to only use the GEN8_CSB_WRITE_PTR portion, which is | |
2364 | * the low 4bits. As it happens we know the next 4bits are always | |
2365 | * zero and so we can simply masked off the low u8 of the register | |
2366 | * and treat it identically to reading from the HWSP (without having | |
2367 | * to use explicit shifting and masking, and probably bifurcating | |
2368 | * the code to handle the legacy mmio read). | |
2369 | */ | |
2370 | head = execlists->csb_head; | |
2371 | tail = READ_ONCE(*execlists->csb_write); | |
bc4237ec CW |
2372 | if (unlikely(head == tail)) |
2373 | return; | |
b2209e62 | 2374 | |
bc4237ec CW |
2375 | /* |
2376 | * Hopefully paired with a wmb() in HW! | |
2377 | * | |
2378 | * We must complete the read of the write pointer before any reads | |
2379 | * from the CSB, so that we do not see stale values. Without an rmb | |
2380 | * (lfence) the HW may speculatively perform the CSB[] reads *before* | |
2381 | * we perform the READ_ONCE(*csb_write). | |
2382 | */ | |
2383 | rmb(); | |
767a983a | 2384 | |
7d7569ac | 2385 | ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); |
bc4237ec | 2386 | do { |
198d2533 | 2387 | bool promote; |
f4785682 | 2388 | |
7d4c75d9 | 2389 | if (++head == num_entries) |
8ea397fa CW |
2390 | head = 0; |
2391 | ||
2392 | /* | |
2393 | * We are flying near dragons again. | |
2394 | * | |
2395 | * We hold a reference to the request in execlist_port[] | |
2396 | * but no more than that. We are operating in softirq | |
2397 | * context and so cannot hold any mutex or sleep. That | |
2398 | * prevents us stopping the requests we are processing | |
2399 | * in port[] from being retired simultaneously (the | |
2400 | * breadcrumb will be complete before we see the | |
2401 | * context-switch). As we only hold the reference to the | |
2402 | * request, any pointer chasing underneath the request | |
2403 | * is subject to a potential use-after-free. Thus we | |
2404 | * store all of the bookkeeping within port[] as | |
2405 | * required, and avoid using unguarded pointers beneath | |
2406 | * request itself. The same applies to the atomic | |
2407 | * status notifier. | |
2408 | */ | |
2409 | ||
639f2f24 VSD |
2410 | ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", |
2411 | head, buf[2 * head + 0], buf[2 * head + 1]); | |
8ea397fa | 2412 | |
f4785682 | 2413 | if (INTEL_GEN(engine->i915) >= 12) |
198d2533 | 2414 | promote = gen12_csb_parse(execlists, buf + 2 * head); |
f4785682 | 2415 | else |
198d2533 CW |
2416 | promote = gen8_csb_parse(execlists, buf + 2 * head); |
2417 | if (promote) { | |
331bf905 CW |
2418 | struct i915_request * const *old = execlists->active; |
2419 | ||
b6560007 CW |
2420 | GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); |
2421 | ||
2422 | ring_set_paused(engine, 0); | |
2423 | ||
331bf905 CW |
2424 | /* Point active to the new ELSP; prevent overwriting */ |
2425 | WRITE_ONCE(execlists->active, execlists->pending); | |
f494960d | 2426 | smp_wmb(); /* notify execlists_active() */ |
331bf905 | 2427 | |
198d2533 | 2428 | /* cancel old inflight, prepare for switch */ |
331bf905 CW |
2429 | trace_ports(execlists, "preempted", old); |
2430 | while (*old) | |
2431 | execlists_schedule_out(*old++); | |
8759aa4c | 2432 | |
198d2533 | 2433 | /* switch pending to inflight */ |
f494960d CW |
2434 | memcpy(execlists->inflight, |
2435 | execlists->pending, | |
2436 | execlists_num_ports(execlists) * | |
2437 | sizeof(*execlists->pending)); | |
2438 | smp_wmb(); /* complete the seqlock */ | |
2439 | WRITE_ONCE(execlists->active, execlists->inflight); | |
8ee36e04 | 2440 | |
df403069 | 2441 | WRITE_ONCE(execlists->pending[0], NULL); |
198d2533 CW |
2442 | } else { |
2443 | GEM_BUG_ON(!*execlists->active); | |
22b7a426 | 2444 | |
198d2533 | 2445 | /* port0 completed, advanced to port1 */ |
8759aa4c | 2446 | trace_ports(execlists, "completed", execlists->active); |
2ffe80aa | 2447 | |
8ea397fa CW |
2448 | /* |
2449 | * We rely on the hardware being strongly | |
2450 | * ordered, that the breadcrumb write is | |
2451 | * coherent (visible from the CPU) before the | |
2452 | * user interrupt and CSB is processed. | |
2453 | */ | |
c616d238 CW |
2454 | if (GEM_SHOW_DEBUG() && |
2455 | !i915_request_completed(*execlists->active) && | |
2456 | !reset_in_progress(execlists)) { | |
e06b8524 CW |
2457 | struct i915_request *rq __maybe_unused = |
2458 | *execlists->active; | |
2459 | const u32 *regs __maybe_unused = | |
2460 | rq->context->lrc_reg_state; | |
c616d238 CW |
2461 | |
2462 | ENGINE_TRACE(engine, | |
2463 | "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", | |
2464 | ENGINE_READ(engine, RING_START), | |
2465 | ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, | |
2466 | ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR, | |
2467 | ENGINE_READ(engine, RING_CTL), | |
2468 | ENGINE_READ(engine, RING_MI_MODE)); | |
2469 | ENGINE_TRACE(engine, | |
2470 | "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ", | |
2471 | i915_ggtt_offset(rq->ring->vma), | |
2472 | rq->head, rq->tail, | |
2473 | rq->fence.context, | |
2474 | lower_32_bits(rq->fence.seqno), | |
2475 | hwsp_seqno(rq)); | |
2476 | ENGINE_TRACE(engine, | |
2477 | "ctx:{start:%08x, head:%04x, tail:%04x}, ", | |
2478 | regs[CTX_RING_START], | |
2479 | regs[CTX_RING_HEAD], | |
2480 | regs[CTX_RING_TAIL]); | |
2481 | ||
2482 | GEM_BUG_ON("context completed before request"); | |
2483 | } | |
2484 | ||
8759aa4c | 2485 | execlists_schedule_out(*execlists->active++); |
beecec90 | 2486 | |
22b7a426 CW |
2487 | GEM_BUG_ON(execlists->active - execlists->inflight > |
2488 | execlists_num_ports(execlists)); | |
4af0d727 | 2489 | } |
bc4237ec | 2490 | } while (head != tail); |
e981e7b1 | 2491 | |
bc4237ec | 2492 | execlists->csb_head = head; |
6b7133b6 | 2493 | set_timeslice(engine); |
d8f50531 MK |
2494 | |
2495 | /* | |
2496 | * Gen11 has proven to fail wrt global observation point between | |
2497 | * entry and tail update, failing on the ordering and thus | |
2498 | * we see an old entry in the context status buffer. | |
2499 | * | |
2500 | * Forcibly evict out entries for the next gpu csb update, | |
2501 | * to increase the odds that we get a fresh entries with non | |
2502 | * working hardware. The cost for doing so comes out mostly with | |
2503 | * the wash as hardware, working or not, will need to do the | |
2504 | * invalidation before. | |
2505 | */ | |
1863e302 | 2506 | invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); |
73377dbc | 2507 | } |
c6a2ac71 | 2508 | |
9512f985 | 2509 | static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) |
73377dbc | 2510 | { |
422d7df4 | 2511 | lockdep_assert_held(&engine->active.lock); |
8a574698 | 2512 | if (!READ_ONCE(engine->execlists.pending[0])) { |
c949ae43 | 2513 | rcu_read_lock(); /* protect peeking at execlists->active */ |
73377dbc | 2514 | execlists_dequeue(engine); |
c949ae43 CW |
2515 | rcu_read_unlock(); |
2516 | } | |
e981e7b1 TD |
2517 | } |
2518 | ||
32ff621f CW |
2519 | static void __execlists_hold(struct i915_request *rq) |
2520 | { | |
2521 | LIST_HEAD(list); | |
2522 | ||
2523 | do { | |
2524 | struct i915_dependency *p; | |
2525 | ||
2526 | if (i915_request_is_active(rq)) | |
2527 | __i915_request_unsubmit(rq); | |
2528 | ||
32ff621f CW |
2529 | clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); |
2530 | list_move_tail(&rq->sched.link, &rq->engine->active.hold); | |
2531 | i915_request_set_hold(rq); | |
26208d87 | 2532 | RQ_TRACE(rq, "on hold\n"); |
32ff621f | 2533 | |
793c2261 | 2534 | for_each_waiter(p, rq) { |
32ff621f CW |
2535 | struct i915_request *w = |
2536 | container_of(p->waiter, typeof(*w), sched); | |
2537 | ||
2538 | /* Leave semaphores spinning on the other engines */ | |
2539 | if (w->engine != rq->engine) | |
2540 | continue; | |
2541 | ||
2542 | if (!i915_request_is_ready(w)) | |
2543 | continue; | |
2544 | ||
2545 | if (i915_request_completed(w)) | |
2546 | continue; | |
2547 | ||
26208d87 | 2548 | if (i915_request_on_hold(w)) |
32ff621f CW |
2549 | continue; |
2550 | ||
2551 | list_move_tail(&w->sched.link, &list); | |
2552 | } | |
2553 | ||
2554 | rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); | |
2555 | } while (rq); | |
2556 | } | |
2557 | ||
4ba5c086 | 2558 | static bool execlists_hold(struct intel_engine_cs *engine, |
32ff621f CW |
2559 | struct i915_request *rq) |
2560 | { | |
2561 | spin_lock_irq(&engine->active.lock); | |
2562 | ||
4ba5c086 CW |
2563 | if (i915_request_completed(rq)) { /* too late! */ |
2564 | rq = NULL; | |
2565 | goto unlock; | |
2566 | } | |
2567 | ||
989df3a7 CW |
2568 | if (rq->engine != engine) { /* preempted virtual engine */ |
2569 | struct virtual_engine *ve = to_virtual_engine(rq->engine); | |
2570 | ||
2571 | /* | |
2572 | * intel_context_inflight() is only protected by virtue | |
2573 | * of process_csb() being called only by the tasklet (or | |
2574 | * directly from inside reset while the tasklet is suspended). | |
2575 | * Assert that neither of those are allowed to run while we | |
2576 | * poke at the request queues. | |
2577 | */ | |
2578 | GEM_BUG_ON(!reset_in_progress(&engine->execlists)); | |
2579 | ||
2580 | /* | |
2581 | * An unsubmitted request along a virtual engine will | |
2582 | * remain on the active (this) engine until we are able | |
2583 | * to process the context switch away (and so mark the | |
2584 | * context as no longer in flight). That cannot have happened | |
2585 | * yet, otherwise we would not be hanging! | |
2586 | */ | |
2587 | spin_lock(&ve->base.active.lock); | |
2588 | GEM_BUG_ON(intel_context_inflight(rq->context) != engine); | |
2589 | GEM_BUG_ON(ve->request != rq); | |
2590 | ve->request = NULL; | |
2591 | spin_unlock(&ve->base.active.lock); | |
2592 | i915_request_put(rq); | |
2593 | ||
2594 | rq->engine = engine; | |
2595 | } | |
2596 | ||
32ff621f CW |
2597 | /* |
2598 | * Transfer this request onto the hold queue to prevent it | |
2599 | * being resumbitted to HW (and potentially completed) before we have | |
2600 | * released it. Since we may have already submitted following | |
2601 | * requests, we need to remove those as well. | |
2602 | */ | |
2603 | GEM_BUG_ON(i915_request_on_hold(rq)); | |
2604 | GEM_BUG_ON(rq->engine != engine); | |
2605 | __execlists_hold(rq); | |
26208d87 | 2606 | GEM_BUG_ON(list_empty(&engine->active.hold)); |
32ff621f | 2607 | |
4ba5c086 | 2608 | unlock: |
32ff621f | 2609 | spin_unlock_irq(&engine->active.lock); |
4ba5c086 | 2610 | return rq; |
32ff621f CW |
2611 | } |
2612 | ||
2613 | static bool hold_request(const struct i915_request *rq) | |
2614 | { | |
2615 | struct i915_dependency *p; | |
66940061 | 2616 | bool result = false; |
32ff621f CW |
2617 | |
2618 | /* | |
2619 | * If one of our ancestors is on hold, we must also be on hold, | |
2620 | * otherwise we will bypass it and execute before it. | |
2621 | */ | |
66940061 | 2622 | rcu_read_lock(); |
793c2261 | 2623 | for_each_signaler(p, rq) { |
32ff621f CW |
2624 | const struct i915_request *s = |
2625 | container_of(p->signaler, typeof(*s), sched); | |
2626 | ||
2627 | if (s->engine != rq->engine) | |
2628 | continue; | |
2629 | ||
66940061 CW |
2630 | result = i915_request_on_hold(s); |
2631 | if (result) | |
2632 | break; | |
32ff621f | 2633 | } |
66940061 | 2634 | rcu_read_unlock(); |
32ff621f | 2635 | |
66940061 | 2636 | return result; |
32ff621f CW |
2637 | } |
2638 | ||
2639 | static void __execlists_unhold(struct i915_request *rq) | |
2640 | { | |
2641 | LIST_HEAD(list); | |
2642 | ||
2643 | do { | |
2644 | struct i915_dependency *p; | |
2645 | ||
26208d87 CW |
2646 | RQ_TRACE(rq, "hold release\n"); |
2647 | ||
32ff621f CW |
2648 | GEM_BUG_ON(!i915_request_on_hold(rq)); |
2649 | GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); | |
2650 | ||
2651 | i915_request_clear_hold(rq); | |
2652 | list_move_tail(&rq->sched.link, | |
2653 | i915_sched_lookup_priolist(rq->engine, | |
2654 | rq_prio(rq))); | |
2655 | set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); | |
32ff621f CW |
2656 | |
2657 | /* Also release any children on this engine that are ready */ | |
793c2261 | 2658 | for_each_waiter(p, rq) { |
32ff621f CW |
2659 | struct i915_request *w = |
2660 | container_of(p->waiter, typeof(*w), sched); | |
2661 | ||
8e9f84cf CW |
2662 | /* Propagate any change in error status */ |
2663 | if (rq->fence.error) | |
2664 | i915_request_set_error_once(w, rq->fence.error); | |
2665 | ||
32ff621f CW |
2666 | if (w->engine != rq->engine) |
2667 | continue; | |
2668 | ||
26208d87 | 2669 | if (!i915_request_on_hold(w)) |
32ff621f CW |
2670 | continue; |
2671 | ||
2672 | /* Check that no other parents are also on hold */ | |
26208d87 | 2673 | if (hold_request(w)) |
32ff621f CW |
2674 | continue; |
2675 | ||
2676 | list_move_tail(&w->sched.link, &list); | |
2677 | } | |
2678 | ||
2679 | rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); | |
2680 | } while (rq); | |
2681 | } | |
2682 | ||
32ff621f CW |
2683 | static void execlists_unhold(struct intel_engine_cs *engine, |
2684 | struct i915_request *rq) | |
2685 | { | |
2686 | spin_lock_irq(&engine->active.lock); | |
2687 | ||
2688 | /* | |
2689 | * Move this request back to the priority queue, and all of its | |
2690 | * children and grandchildren that were suspended along with it. | |
2691 | */ | |
2692 | __execlists_unhold(rq); | |
2693 | ||
2694 | if (rq_prio(rq) > engine->execlists.queue_priority_hint) { | |
2695 | engine->execlists.queue_priority_hint = rq_prio(rq); | |
2696 | tasklet_hi_schedule(&engine->execlists.tasklet); | |
2697 | } | |
2698 | ||
2699 | spin_unlock_irq(&engine->active.lock); | |
2700 | } | |
2701 | ||
74831738 CW |
2702 | struct execlists_capture { |
2703 | struct work_struct work; | |
2704 | struct i915_request *rq; | |
2705 | struct i915_gpu_coredump *error; | |
2706 | }; | |
2707 | ||
2708 | static void execlists_capture_work(struct work_struct *work) | |
2709 | { | |
2710 | struct execlists_capture *cap = container_of(work, typeof(*cap), work); | |
2711 | const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; | |
2712 | struct intel_engine_cs *engine = cap->rq->engine; | |
2713 | struct intel_gt_coredump *gt = cap->error->gt; | |
2714 | struct intel_engine_capture_vma *vma; | |
2715 | ||
2716 | /* Compress all the objects attached to the request, slow! */ | |
2717 | vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); | |
2718 | if (vma) { | |
2719 | struct i915_vma_compress *compress = | |
2720 | i915_vma_capture_prepare(gt); | |
2721 | ||
2722 | intel_engine_coredump_add_vma(gt->engine, vma, compress); | |
2723 | i915_vma_capture_finish(gt, compress); | |
2724 | } | |
2725 | ||
2726 | gt->simulated = gt->engine->simulated; | |
2727 | cap->error->simulated = gt->simulated; | |
2728 | ||
2729 | /* Publish the error state, and announce it to the world */ | |
2730 | i915_error_state_store(cap->error); | |
2731 | i915_gpu_coredump_put(cap->error); | |
2732 | ||
2733 | /* Return this request and all that depend upon it for signaling */ | |
2734 | execlists_unhold(engine, cap->rq); | |
4ba5c086 | 2735 | i915_request_put(cap->rq); |
74831738 CW |
2736 | |
2737 | kfree(cap); | |
2738 | } | |
2739 | ||
2740 | static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) | |
2741 | { | |
2742 | const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; | |
2743 | struct execlists_capture *cap; | |
2744 | ||
2745 | cap = kmalloc(sizeof(*cap), gfp); | |
2746 | if (!cap) | |
2747 | return NULL; | |
2748 | ||
2749 | cap->error = i915_gpu_coredump_alloc(engine->i915, gfp); | |
2750 | if (!cap->error) | |
2751 | goto err_cap; | |
2752 | ||
2753 | cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp); | |
2754 | if (!cap->error->gt) | |
2755 | goto err_gpu; | |
2756 | ||
2757 | cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp); | |
2758 | if (!cap->error->gt->engine) | |
2759 | goto err_gt; | |
2760 | ||
2761 | return cap; | |
2762 | ||
2763 | err_gt: | |
2764 | kfree(cap->error->gt); | |
2765 | err_gpu: | |
2766 | kfree(cap->error); | |
2767 | err_cap: | |
2768 | kfree(cap); | |
2769 | return NULL; | |
2770 | } | |
2771 | ||
4ba5c086 | 2772 | static bool execlists_capture(struct intel_engine_cs *engine) |
74831738 CW |
2773 | { |
2774 | struct execlists_capture *cap; | |
2775 | ||
2776 | if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) | |
4ba5c086 | 2777 | return true; |
74831738 CW |
2778 | |
2779 | /* | |
2780 | * We need to _quickly_ capture the engine state before we reset. | |
2781 | * We are inside an atomic section (softirq) here and we are delaying | |
2782 | * the forced preemption event. | |
2783 | */ | |
2784 | cap = capture_regs(engine); | |
2785 | if (!cap) | |
4ba5c086 | 2786 | return true; |
74831738 | 2787 | |
70a76a9b | 2788 | spin_lock_irq(&engine->active.lock); |
74831738 | 2789 | cap->rq = execlists_active(&engine->execlists); |
70a76a9b CW |
2790 | if (cap->rq) { |
2791 | cap->rq = active_request(cap->rq->context->timeline, cap->rq); | |
2792 | cap->rq = i915_request_get_rcu(cap->rq); | |
2793 | } | |
2794 | spin_unlock_irq(&engine->active.lock); | |
4ba5c086 CW |
2795 | if (!cap->rq) |
2796 | goto err_free; | |
74831738 CW |
2797 | |
2798 | /* | |
2799 | * Remove the request from the execlists queue, and take ownership | |
2800 | * of the request. We pass it to our worker who will _slowly_ compress | |
2801 | * all the pages the _user_ requested for debugging their batch, after | |
2802 | * which we return it to the queue for signaling. | |
2803 | * | |
2804 | * By removing them from the execlists queue, we also remove the | |
2805 | * requests from being processed by __unwind_incomplete_requests() | |
2806 | * during the intel_engine_reset(), and so they will *not* be replayed | |
2807 | * afterwards. | |
2808 | * | |
2809 | * Note that because we have not yet reset the engine at this point, | |
2810 | * it is possible for the request that we have identified as being | |
2811 | * guilty, did in fact complete and we will then hit an arbitration | |
2812 | * point allowing the outstanding preemption to succeed. The likelihood | |
2813 | * of that is very low (as capturing of the engine registers should be | |
2814 | * fast enough to run inside an irq-off atomic section!), so we will | |
2815 | * simply hold that request accountable for being non-preemptible | |
2816 | * long enough to force the reset. | |
2817 | */ | |
4ba5c086 CW |
2818 | if (!execlists_hold(engine, cap->rq)) |
2819 | goto err_rq; | |
74831738 CW |
2820 | |
2821 | INIT_WORK(&cap->work, execlists_capture_work); | |
2822 | schedule_work(&cap->work); | |
4ba5c086 CW |
2823 | return true; |
2824 | ||
2825 | err_rq: | |
2826 | i915_request_put(cap->rq); | |
2827 | err_free: | |
2828 | i915_gpu_coredump_put(cap->error); | |
2829 | kfree(cap); | |
2830 | return false; | |
74831738 CW |
2831 | } |
2832 | ||
70a76a9b | 2833 | static void execlists_reset(struct intel_engine_cs *engine, const char *msg) |
3a7a92ab CW |
2834 | { |
2835 | const unsigned int bit = I915_RESET_ENGINE + engine->id; | |
2836 | unsigned long *lock = &engine->gt->reset.flags; | |
2837 | ||
70a76a9b | 2838 | if (!intel_has_reset_engine(engine->gt)) |
3a7a92ab CW |
2839 | return; |
2840 | ||
2841 | if (test_and_set_bit(bit, lock)) | |
2842 | return; | |
2843 | ||
70a76a9b CW |
2844 | ENGINE_TRACE(engine, "reset for %s\n", msg); |
2845 | ||
3a7a92ab CW |
2846 | /* Mark this tasklet as disabled to avoid waiting for it to complete */ |
2847 | tasklet_disable_nosync(&engine->execlists.tasklet); | |
2848 | ||
74831738 | 2849 | ring_set_paused(engine, 1); /* Freeze the current request in place */ |
4ba5c086 | 2850 | if (execlists_capture(engine)) |
70a76a9b | 2851 | intel_engine_reset(engine, msg); |
4ba5c086 CW |
2852 | else |
2853 | ring_set_paused(engine, 0); | |
3a7a92ab CW |
2854 | |
2855 | tasklet_enable(&engine->execlists.tasklet); | |
2856 | clear_and_wake_up_bit(bit, lock); | |
2857 | } | |
2858 | ||
2859 | static bool preempt_timeout(const struct intel_engine_cs *const engine) | |
2860 | { | |
2861 | const struct timer_list *t = &engine->execlists.preempt; | |
2862 | ||
2863 | if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) | |
2864 | return false; | |
2865 | ||
2866 | if (!timer_expired(t)) | |
2867 | return false; | |
2868 | ||
2869 | return READ_ONCE(engine->execlists.pending[0]); | |
2870 | } | |
2871 | ||
9512f985 CW |
2872 | /* |
2873 | * Check the unread Context Status Buffers and manage the submission of new | |
2874 | * contexts to the ELSP accordingly. | |
2875 | */ | |
2876 | static void execlists_submission_tasklet(unsigned long data) | |
2877 | { | |
2878 | struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; | |
3a7a92ab | 2879 | bool timeout = preempt_timeout(engine); |
9512f985 | 2880 | |
df403069 | 2881 | process_csb(engine); |
70a76a9b CW |
2882 | |
2883 | if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { | |
2884 | engine->execlists.error_interrupt = 0; | |
2885 | if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */ | |
2886 | execlists_reset(engine, "CS error"); | |
2887 | } | |
2888 | ||
3a7a92ab CW |
2889 | if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { |
2890 | unsigned long flags; | |
2891 | ||
df403069 CW |
2892 | spin_lock_irqsave(&engine->active.lock, flags); |
2893 | __execlists_submission_tasklet(engine); | |
2894 | spin_unlock_irqrestore(&engine->active.lock, flags); | |
3a7a92ab CW |
2895 | |
2896 | /* Recheck after serialising with direct-submission */ | |
70a76a9b CW |
2897 | if (unlikely(timeout && preempt_timeout(engine))) |
2898 | execlists_reset(engine, "preemption time out"); | |
df403069 | 2899 | } |
9512f985 CW |
2900 | } |
2901 | ||
3a7a92ab | 2902 | static void __execlists_kick(struct intel_engine_execlists *execlists) |
8ee36e04 | 2903 | { |
8ee36e04 | 2904 | /* Kick the tasklet for some interrupt coalescing and reset handling */ |
3a7a92ab CW |
2905 | tasklet_hi_schedule(&execlists->tasklet); |
2906 | } | |
2907 | ||
2908 | #define execlists_kick(t, member) \ | |
2909 | __execlists_kick(container_of(t, struct intel_engine_execlists, member)) | |
2910 | ||
2911 | static void execlists_timeslice(struct timer_list *timer) | |
2912 | { | |
2913 | execlists_kick(timer, timer); | |
2914 | } | |
2915 | ||
2916 | static void execlists_preempt(struct timer_list *timer) | |
2917 | { | |
2918 | execlists_kick(timer, preempt); | |
8ee36e04 CW |
2919 | } |
2920 | ||
f6322edd | 2921 | static void queue_request(struct intel_engine_cs *engine, |
672c368f | 2922 | struct i915_request *rq) |
27606fd8 | 2923 | { |
672c368f CW |
2924 | GEM_BUG_ON(!list_empty(&rq->sched.link)); |
2925 | list_add_tail(&rq->sched.link, | |
2926 | i915_sched_lookup_priolist(engine, rq_prio(rq))); | |
2927 | set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); | |
9512f985 CW |
2928 | } |
2929 | ||
2930 | static void __submit_queue_imm(struct intel_engine_cs *engine) | |
2931 | { | |
2932 | struct intel_engine_execlists * const execlists = &engine->execlists; | |
2933 | ||
2934 | if (reset_in_progress(execlists)) | |
2935 | return; /* defer until we restart the engine following reset */ | |
2936 | ||
2937 | if (execlists->tasklet.func == execlists_submission_tasklet) | |
2938 | __execlists_submission_tasklet(engine); | |
2939 | else | |
2940 | tasklet_hi_schedule(&execlists->tasklet); | |
ae2f5c00 CW |
2941 | } |
2942 | ||
22b7a426 CW |
2943 | static void submit_queue(struct intel_engine_cs *engine, |
2944 | const struct i915_request *rq) | |
f6322edd | 2945 | { |
22b7a426 CW |
2946 | struct intel_engine_execlists *execlists = &engine->execlists; |
2947 | ||
2948 | if (rq_prio(rq) <= execlists->queue_priority_hint) | |
2949 | return; | |
2950 | ||
2951 | execlists->queue_priority_hint = rq_prio(rq); | |
2952 | __submit_queue_imm(engine); | |
27606fd8 CW |
2953 | } |
2954 | ||
32ff621f CW |
2955 | static bool ancestor_on_hold(const struct intel_engine_cs *engine, |
2956 | const struct i915_request *rq) | |
2957 | { | |
2958 | GEM_BUG_ON(i915_request_on_hold(rq)); | |
2959 | return !list_empty(&engine->active.hold) && hold_request(rq); | |
2960 | } | |
2961 | ||
e61e0f51 | 2962 | static void execlists_submit_request(struct i915_request *request) |
acdd884a | 2963 | { |
4a570db5 | 2964 | struct intel_engine_cs *engine = request->engine; |
5590af3e | 2965 | unsigned long flags; |
acdd884a | 2966 | |
663f71e7 | 2967 | /* Will be called from irq-context when using foreign fences. */ |
422d7df4 | 2968 | spin_lock_irqsave(&engine->active.lock, flags); |
acdd884a | 2969 | |
32ff621f | 2970 | if (unlikely(ancestor_on_hold(engine, request))) { |
26208d87 | 2971 | RQ_TRACE(request, "ancestor on hold\n"); |
32ff621f CW |
2972 | list_add_tail(&request->sched.link, &engine->active.hold); |
2973 | i915_request_set_hold(request); | |
2974 | } else { | |
2975 | queue_request(engine, request); | |
acdd884a | 2976 | |
32ff621f CW |
2977 | GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); |
2978 | GEM_BUG_ON(list_empty(&request->sched.link)); | |
6c067579 | 2979 | |
32ff621f CW |
2980 | submit_queue(engine, request); |
2981 | } | |
9512f985 | 2982 | |
422d7df4 | 2983 | spin_unlock_irqrestore(&engine->active.lock, flags); |
acdd884a MT |
2984 | } |
2985 | ||
c4d52feb | 2986 | static void __execlists_context_fini(struct intel_context *ce) |
1fc44d9b | 2987 | { |
65baf0ef | 2988 | intel_ring_put(ce->ring); |
a93615f9 | 2989 | i915_vma_put(ce->state); |
1fc44d9b CW |
2990 | } |
2991 | ||
4c5896dc | 2992 | static void execlists_context_destroy(struct kref *kref) |
c4d52feb | 2993 | { |
4c5896dc CW |
2994 | struct intel_context *ce = container_of(kref, typeof(*ce), ref); |
2995 | ||
09c5ab38 | 2996 | GEM_BUG_ON(!i915_active_is_idle(&ce->active)); |
08819549 | 2997 | GEM_BUG_ON(intel_context_is_pinned(ce)); |
c4d52feb CW |
2998 | |
2999 | if (ce->state) | |
3000 | __execlists_context_fini(ce); | |
3001 | ||
df8cf31e | 3002 | intel_context_fini(ce); |
c4d52feb CW |
3003 | intel_context_free(ce); |
3004 | } | |
3005 | ||
9559c875 CW |
3006 | static void |
3007 | set_redzone(void *vaddr, const struct intel_engine_cs *engine) | |
3008 | { | |
3009 | if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) | |
3010 | return; | |
3011 | ||
9559c875 CW |
3012 | vaddr += engine->context_size; |
3013 | ||
1d0e2c93 | 3014 | memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); |
9559c875 CW |
3015 | } |
3016 | ||
3017 | static void | |
3018 | check_redzone(const void *vaddr, const struct intel_engine_cs *engine) | |
3019 | { | |
3020 | if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) | |
3021 | return; | |
3022 | ||
9559c875 CW |
3023 | vaddr += engine->context_size; |
3024 | ||
1d0e2c93 | 3025 | if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) |
9559c875 CW |
3026 | dev_err_once(engine->i915->drm.dev, |
3027 | "%s context redzone overwritten!\n", | |
3028 | engine->name); | |
3029 | } | |
3030 | ||
867985d4 | 3031 | static void execlists_context_unpin(struct intel_context *ce) |
1fc44d9b | 3032 | { |
9559c875 CW |
3033 | check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, |
3034 | ce->engine); | |
3035 | ||
1fc44d9b | 3036 | i915_gem_object_unpin_map(ce->state->obj); |
f4e15af7 CW |
3037 | } |
3038 | ||
8e525cb4 | 3039 | static void |
7dc56af5 | 3040 | __execlists_update_reg_state(const struct intel_context *ce, |
42827350 CW |
3041 | const struct intel_engine_cs *engine, |
3042 | u32 head) | |
8e525cb4 | 3043 | { |
8e525cb4 | 3044 | struct intel_ring *ring = ce->ring; |
95f697eb CW |
3045 | u32 *regs = ce->lrc_reg_state; |
3046 | ||
42827350 | 3047 | GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); |
95f697eb | 3048 | GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); |
8e525cb4 | 3049 | |
b0b10248 | 3050 | regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); |
42827350 | 3051 | regs[CTX_RING_HEAD] = head; |
7dc56af5 | 3052 | regs[CTX_RING_TAIL] = ring->tail; |
88be76cd | 3053 | regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; |
8e525cb4 TU |
3054 | |
3055 | /* RPCS */ | |
a9877da2 | 3056 | if (engine->class == RENDER_CLASS) { |
7dc56af5 | 3057 | regs[CTX_R_PWR_CLK_STATE] = |
09407579 | 3058 | intel_sseu_make_rpcs(engine->i915, &ce->sseu); |
a9877da2 | 3059 | |
7dc56af5 | 3060 | i915_oa_init_reg_state(ce, engine); |
a9877da2 | 3061 | } |
8e525cb4 TU |
3062 | } |
3063 | ||
95f697eb CW |
3064 | static int |
3065 | __execlists_context_pin(struct intel_context *ce, | |
3066 | struct intel_engine_cs *engine) | |
dcb4c12a | 3067 | { |
7d774cac | 3068 | void *vaddr; |
dcb4c12a | 3069 | |
56f6e0a7 | 3070 | GEM_BUG_ON(!ce->state); |
12c255b5 | 3071 | GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); |
7ba717cf | 3072 | |
666424ab | 3073 | vaddr = i915_gem_object_pin_map(ce->state->obj, |
95f697eb | 3074 | i915_coherent_map_type(engine->i915) | |
666424ab | 3075 | I915_MAP_OVERRIDE); |
b11b28ea CW |
3076 | if (IS_ERR(vaddr)) |
3077 | return PTR_ERR(vaddr); | |
82352e90 | 3078 | |
53b2622e | 3079 | ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; |
a3aabe86 | 3080 | ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; |
42827350 | 3081 | __execlists_update_reg_state(ce, engine, ce->ring->tail); |
a3aabe86 | 3082 | |
95f697eb | 3083 | return 0; |
e84fe803 NH |
3084 | } |
3085 | ||
95f697eb | 3086 | static int execlists_context_pin(struct intel_context *ce) |
e84fe803 | 3087 | { |
95f697eb | 3088 | return __execlists_context_pin(ce, ce->engine); |
dcb4c12a OM |
3089 | } |
3090 | ||
4c60b1aa CW |
3091 | static int execlists_context_alloc(struct intel_context *ce) |
3092 | { | |
3093 | return __execlists_context_alloc(ce, ce->engine); | |
3094 | } | |
3095 | ||
9726920b CW |
3096 | static void execlists_context_reset(struct intel_context *ce) |
3097 | { | |
49a24e71 CW |
3098 | CE_TRACE(ce, "reset\n"); |
3099 | GEM_BUG_ON(!intel_context_is_pinned(ce)); | |
3100 | ||
49a24e71 CW |
3101 | intel_ring_reset(ce->ring, ce->ring->emit); |
3102 | ||
3103 | /* Scrub away the garbage */ | |
3104 | execlists_init_reg_state(ce->lrc_reg_state, | |
3105 | ce, ce->engine, ce->ring, true); | |
42827350 | 3106 | __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); |
49a24e71 | 3107 | |
53b2622e | 3108 | ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; |
9726920b CW |
3109 | } |
3110 | ||
4dc84b77 | 3111 | static const struct intel_context_ops execlists_context_ops = { |
4c60b1aa CW |
3112 | .alloc = execlists_context_alloc, |
3113 | ||
95f697eb | 3114 | .pin = execlists_context_pin, |
4dc84b77 | 3115 | .unpin = execlists_context_unpin, |
9726920b | 3116 | |
6eee33e8 CW |
3117 | .enter = intel_context_enter_engine, |
3118 | .exit = intel_context_exit_engine, | |
3119 | ||
9726920b | 3120 | .reset = execlists_context_reset, |
4dc84b77 CW |
3121 | .destroy = execlists_context_destroy, |
3122 | }; | |
3123 | ||
85474441 CW |
3124 | static int gen8_emit_init_breadcrumb(struct i915_request *rq) |
3125 | { | |
3126 | u32 *cs; | |
3127 | ||
f16ccb64 CW |
3128 | if (!i915_request_timeline(rq)->has_initial_breadcrumb) |
3129 | return 0; | |
85474441 CW |
3130 | |
3131 | cs = intel_ring_begin(rq, 6); | |
3132 | if (IS_ERR(cs)) | |
3133 | return PTR_ERR(cs); | |
3134 | ||
3135 | /* | |
3136 | * Check if we have been preempted before we even get started. | |
3137 | * | |
3138 | * After this point i915_request_started() reports true, even if | |
3139 | * we get preempted and so are no longer running. | |
3140 | */ | |
3141 | *cs++ = MI_ARB_CHECK; | |
3142 | *cs++ = MI_NOOP; | |
3143 | ||
3144 | *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; | |
d19d71fc | 3145 | *cs++ = i915_request_timeline(rq)->hwsp_offset; |
85474441 CW |
3146 | *cs++ = 0; |
3147 | *cs++ = rq->fence.seqno - 1; | |
3148 | ||
3149 | intel_ring_advance(rq, cs); | |
21182b3c CW |
3150 | |
3151 | /* Record the updated position of the request's payload */ | |
3152 | rq->infix = intel_ring_offset(rq, cs); | |
3153 | ||
85474441 CW |
3154 | return 0; |
3155 | } | |
3156 | ||
e61e0f51 | 3157 | static int execlists_request_alloc(struct i915_request *request) |
ef11c01d | 3158 | { |
fd138212 | 3159 | int ret; |
ef11c01d | 3160 | |
9f3ccd40 | 3161 | GEM_BUG_ON(!intel_context_is_pinned(request->context)); |
e8a9c58f | 3162 | |
5f5800a7 CW |
3163 | /* |
3164 | * Flush enough space to reduce the likelihood of waiting after | |
ef11c01d CW |
3165 | * we start building the request - in which case we will just |
3166 | * have to repeat work. | |
3167 | */ | |
3168 | request->reserved_space += EXECLISTS_REQUEST_SIZE; | |
3169 | ||
5f5800a7 CW |
3170 | /* |
3171 | * Note that after this point, we have committed to using | |
ef11c01d CW |
3172 | * this request as it is being used to both track the |
3173 | * state of engine initialisation and liveness of the | |
3174 | * golden renderstate above. Think twice before you try | |
3175 | * to cancel/unwind this request now. | |
3176 | */ | |
3177 | ||
e8894267 | 3178 | /* Unconditionally invalidate GPU caches and TLBs. */ |
0b718ba1 | 3179 | ret = request->engine->emit_flush(request, EMIT_INVALIDATE); |
e8894267 CW |
3180 | if (ret) |
3181 | return ret; | |
3182 | ||
ef11c01d CW |
3183 | request->reserved_space -= EXECLISTS_REQUEST_SIZE; |
3184 | return 0; | |
ef11c01d CW |
3185 | } |
3186 | ||
9e000847 AS |
3187 | /* |
3188 | * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after | |
3189 | * PIPE_CONTROL instruction. This is required for the flush to happen correctly | |
3190 | * but there is a slight complication as this is applied in WA batch where the | |
3191 | * values are only initialized once so we cannot take register value at the | |
3192 | * beginning and reuse it further; hence we save its value to memory, upload a | |
3193 | * constant value with bit21 set and then we restore it back with the saved value. | |
3194 | * To simplify the WA, a constant value is formed by using the default value | |
3195 | * of this register. This shouldn't be a problem because we are only modifying | |
3196 | * it for a short period and this batch in non-premptible. We can ofcourse | |
3197 | * use additional instructions that read the actual value of the register | |
3198 | * at that time and set our bit of interest but it makes the WA complicated. | |
3199 | * | |
3200 | * This WA is also required for Gen9 so extracting as a function avoids | |
3201 | * code duplication. | |
3202 | */ | |
097d4f1c TU |
3203 | static u32 * |
3204 | gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) | |
17ee950d | 3205 | { |
51797499 | 3206 | /* NB no one else is allowed to scribble over scratch + 256! */ |
097d4f1c TU |
3207 | *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; |
3208 | *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); | |
46c5847e LL |
3209 | *batch++ = intel_gt_scratch_offset(engine->gt, |
3210 | INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); | |
097d4f1c TU |
3211 | *batch++ = 0; |
3212 | ||
3213 | *batch++ = MI_LOAD_REGISTER_IMM(1); | |
3214 | *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); | |
3215 | *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; | |
3216 | ||
9f235dfa TU |
3217 | batch = gen8_emit_pipe_control(batch, |
3218 | PIPE_CONTROL_CS_STALL | | |
3219 | PIPE_CONTROL_DC_FLUSH_ENABLE, | |
3220 | 0); | |
097d4f1c TU |
3221 | |
3222 | *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; | |
3223 | *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); | |
46c5847e LL |
3224 | *batch++ = intel_gt_scratch_offset(engine->gt, |
3225 | INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); | |
097d4f1c TU |
3226 | *batch++ = 0; |
3227 | ||
3228 | return batch; | |
17ee950d AS |
3229 | } |
3230 | ||
6e5248b5 DV |
3231 | /* |
3232 | * Typically we only have one indirect_ctx and per_ctx batch buffer which are | |
3233 | * initialized at the beginning and shared across all contexts but this field | |
3234 | * helps us to have multiple batches at different offsets and select them based | |
3235 | * on a criteria. At the moment this batch always start at the beginning of the page | |
3236 | * and at this point we don't have multiple wa_ctx batch buffers. | |
4d78c8dc | 3237 | * |
6e5248b5 DV |
3238 | * The number of WA applied are not known at the beginning; we use this field |
3239 | * to return the no of DWORDS written. | |
17ee950d | 3240 | * |
6e5248b5 DV |
3241 | * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END |
3242 | * so it adds NOOPs as padding to make it cacheline aligned. | |
3243 | * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together | |
3244 | * makes a complete batch buffer. | |
17ee950d | 3245 | */ |
097d4f1c | 3246 | static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) |
17ee950d | 3247 | { |
7ad00d1a | 3248 | /* WaDisableCtxRestoreArbitration:bdw,chv */ |
097d4f1c | 3249 | *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; |
17ee950d | 3250 | |
c82435bb | 3251 | /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ |
097d4f1c TU |
3252 | if (IS_BROADWELL(engine->i915)) |
3253 | batch = gen8_emit_flush_coherentl3_wa(engine, batch); | |
c82435bb | 3254 | |
0160f055 AS |
3255 | /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ |
3256 | /* Actual scratch location is at 128 bytes offset */ | |
9f235dfa TU |
3257 | batch = gen8_emit_pipe_control(batch, |
3258 | PIPE_CONTROL_FLUSH_L3 | | |
e1237523 | 3259 | PIPE_CONTROL_STORE_DATA_INDEX | |
9f235dfa TU |
3260 | PIPE_CONTROL_CS_STALL | |
3261 | PIPE_CONTROL_QW_WRITE, | |
e1237523 | 3262 | LRC_PPHWSP_SCRATCH_ADDR); |
0160f055 | 3263 | |
beecec90 CW |
3264 | *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; |
3265 | ||
17ee950d | 3266 | /* Pad to end of cacheline */ |
097d4f1c TU |
3267 | while ((unsigned long)batch % CACHELINE_BYTES) |
3268 | *batch++ = MI_NOOP; | |
17ee950d AS |
3269 | |
3270 | /* | |
3271 | * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because | |
3272 | * execution depends on the length specified in terms of cache lines | |
3273 | * in the register CTX_RCS_INDIRECT_CTX | |
3274 | */ | |
3275 | ||
097d4f1c | 3276 | return batch; |
17ee950d AS |
3277 | } |
3278 | ||
5ee4a7a6 CW |
3279 | struct lri { |
3280 | i915_reg_t reg; | |
3281 | u32 value; | |
3282 | }; | |
3283 | ||
3284 | static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) | |
0504cffc | 3285 | { |
5ee4a7a6 | 3286 | GEM_BUG_ON(!count || count > 63); |
beecec90 | 3287 | |
5ee4a7a6 CW |
3288 | *batch++ = MI_LOAD_REGISTER_IMM(count); |
3289 | do { | |
3290 | *batch++ = i915_mmio_reg_offset(lri->reg); | |
3291 | *batch++ = lri->value; | |
3292 | } while (lri++, --count); | |
3293 | *batch++ = MI_NOOP; | |
a4106a78 | 3294 | |
5ee4a7a6 CW |
3295 | return batch; |
3296 | } | |
b77422f8 | 3297 | |
5ee4a7a6 CW |
3298 | static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) |
3299 | { | |
3300 | static const struct lri lri[] = { | |
3301 | /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ | |
3302 | { | |
3303 | COMMON_SLICE_CHICKEN2, | |
3304 | __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, | |
3305 | 0), | |
3306 | }, | |
3307 | ||
3308 | /* BSpec: 11391 */ | |
3309 | { | |
3310 | FF_SLICE_CHICKEN, | |
3311 | __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, | |
3312 | FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), | |
3313 | }, | |
3314 | ||
3315 | /* BSpec: 11299 */ | |
3316 | { | |
3317 | _3D_CHICKEN3, | |
3318 | __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, | |
3319 | _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), | |
3320 | } | |
3321 | }; | |
b77422f8 | 3322 | |
5ee4a7a6 | 3323 | *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; |
b77422f8 | 3324 | |
5ee4a7a6 CW |
3325 | /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ |
3326 | batch = gen8_emit_flush_coherentl3_wa(engine, batch); | |
b77422f8 | 3327 | |
bc8a76a1 AA |
3328 | /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ |
3329 | batch = gen8_emit_pipe_control(batch, | |
3330 | PIPE_CONTROL_FLUSH_L3 | | |
3331 | PIPE_CONTROL_STORE_DATA_INDEX | | |
3332 | PIPE_CONTROL_CS_STALL | | |
3333 | PIPE_CONTROL_QW_WRITE, | |
3334 | LRC_PPHWSP_SCRATCH_ADDR); | |
3335 | ||
5ee4a7a6 | 3336 | batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); |
873e8171 | 3337 | |
9fb5026f | 3338 | /* WaMediaPoolStateCmdInWABB:bxt,glk */ |
3485d99e TG |
3339 | if (HAS_POOLED_EU(engine->i915)) { |
3340 | /* | |
3341 | * EU pool configuration is setup along with golden context | |
3342 | * during context initialization. This value depends on | |
3343 | * device type (2x6 or 3x6) and needs to be updated based | |
3344 | * on which subslice is disabled especially for 2x6 | |
3345 | * devices, however it is safe to load default | |
3346 | * configuration of 3x6 device instead of masking off | |
3347 | * corresponding bits because HW ignores bits of a disabled | |
3348 | * subslice and drops down to appropriate config. Please | |
3349 | * see render_state_setup() in i915_gem_render_state.c for | |
3350 | * possible configurations, to avoid duplication they are | |
3351 | * not shown here again. | |
3352 | */ | |
097d4f1c TU |
3353 | *batch++ = GEN9_MEDIA_POOL_STATE; |
3354 | *batch++ = GEN9_MEDIA_POOL_ENABLE; | |
3355 | *batch++ = 0x00777000; | |
3356 | *batch++ = 0; | |
3357 | *batch++ = 0; | |
3358 | *batch++ = 0; | |
3485d99e TG |
3359 | } |
3360 | ||
beecec90 CW |
3361 | *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; |
3362 | ||
0504cffc | 3363 | /* Pad to end of cacheline */ |
097d4f1c TU |
3364 | while ((unsigned long)batch % CACHELINE_BYTES) |
3365 | *batch++ = MI_NOOP; | |
0504cffc | 3366 | |
097d4f1c | 3367 | return batch; |
0504cffc AS |
3368 | } |
3369 | ||
4b6ce681 RA |
3370 | static u32 * |
3371 | gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) | |
3372 | { | |
3373 | int i; | |
3374 | ||
3375 | /* | |
3376 | * WaPipeControlBefore3DStateSamplePattern: cnl | |
3377 | * | |
3378 | * Ensure the engine is idle prior to programming a | |
3379 | * 3DSTATE_SAMPLE_PATTERN during a context restore. | |
3380 | */ | |
3381 | batch = gen8_emit_pipe_control(batch, | |
3382 | PIPE_CONTROL_CS_STALL, | |
3383 | 0); | |
3384 | /* | |
3385 | * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for | |
3386 | * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in | |
3387 | * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is | |
3388 | * confusing. Since gen8_emit_pipe_control() already advances the | |
3389 | * batch by 6 dwords, we advance the other 10 here, completing a | |
3390 | * cacheline. It's not clear if the workaround requires this padding | |
3391 | * before other commands, or if it's just the regular padding we would | |
3392 | * already have for the workaround bb, so leave it here for now. | |
3393 | */ | |
3394 | for (i = 0; i < 10; i++) | |
3395 | *batch++ = MI_NOOP; | |
3396 | ||
3397 | /* Pad to end of cacheline */ | |
3398 | while ((unsigned long)batch % CACHELINE_BYTES) | |
3399 | *batch++ = MI_NOOP; | |
3400 | ||
3401 | return batch; | |
3402 | } | |
3403 | ||
097d4f1c TU |
3404 | #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) |
3405 | ||
3406 | static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) | |
17ee950d | 3407 | { |
48bb74e4 CW |
3408 | struct drm_i915_gem_object *obj; |
3409 | struct i915_vma *vma; | |
3410 | int err; | |
17ee950d | 3411 | |
8475355f | 3412 | obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE); |
48bb74e4 CW |
3413 | if (IS_ERR(obj)) |
3414 | return PTR_ERR(obj); | |
17ee950d | 3415 | |
ba4134a4 | 3416 | vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); |
48bb74e4 CW |
3417 | if (IS_ERR(vma)) { |
3418 | err = PTR_ERR(vma); | |
3419 | goto err; | |
17ee950d AS |
3420 | } |
3421 | ||
e3793468 | 3422 | err = i915_ggtt_pin(vma, 0, PIN_HIGH); |
48bb74e4 CW |
3423 | if (err) |
3424 | goto err; | |
3425 | ||
3426 | engine->wa_ctx.vma = vma; | |
17ee950d | 3427 | return 0; |
48bb74e4 CW |
3428 | |
3429 | err: | |
3430 | i915_gem_object_put(obj); | |
3431 | return err; | |
17ee950d AS |
3432 | } |
3433 | ||
097d4f1c | 3434 | static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) |
17ee950d | 3435 | { |
6a2f59e4 | 3436 | i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); |
17ee950d AS |
3437 | } |
3438 | ||
097d4f1c TU |
3439 | typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); |
3440 | ||
0bc40be8 | 3441 | static int intel_init_workaround_bb(struct intel_engine_cs *engine) |
17ee950d | 3442 | { |
48bb74e4 | 3443 | struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; |
097d4f1c TU |
3444 | struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, |
3445 | &wa_ctx->per_ctx }; | |
3446 | wa_bb_func_t wa_bb_fn[2]; | |
17ee950d | 3447 | struct page *page; |
097d4f1c TU |
3448 | void *batch, *batch_ptr; |
3449 | unsigned int i; | |
48bb74e4 | 3450 | int ret; |
17ee950d | 3451 | |
11334c6a CW |
3452 | if (engine->class != RENDER_CLASS) |
3453 | return 0; | |
17ee950d | 3454 | |
097d4f1c | 3455 | switch (INTEL_GEN(engine->i915)) { |
13e53c5c | 3456 | case 12: |
cc38cae7 OM |
3457 | case 11: |
3458 | return 0; | |
90007bca | 3459 | case 10: |
4b6ce681 RA |
3460 | wa_bb_fn[0] = gen10_init_indirectctx_bb; |
3461 | wa_bb_fn[1] = NULL; | |
3462 | break; | |
097d4f1c TU |
3463 | case 9: |
3464 | wa_bb_fn[0] = gen9_init_indirectctx_bb; | |
b8aa2233 | 3465 | wa_bb_fn[1] = NULL; |
097d4f1c TU |
3466 | break; |
3467 | case 8: | |
3468 | wa_bb_fn[0] = gen8_init_indirectctx_bb; | |
3ad7b52d | 3469 | wa_bb_fn[1] = NULL; |
097d4f1c TU |
3470 | break; |
3471 | default: | |
3472 | MISSING_CASE(INTEL_GEN(engine->i915)); | |
5e60d790 | 3473 | return 0; |
0504cffc | 3474 | } |
5e60d790 | 3475 | |
097d4f1c | 3476 | ret = lrc_setup_wa_ctx(engine); |
17ee950d AS |
3477 | if (ret) { |
3478 | DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret); | |
3479 | return ret; | |
3480 | } | |
3481 | ||
48bb74e4 | 3482 | page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); |
097d4f1c | 3483 | batch = batch_ptr = kmap_atomic(page); |
17ee950d | 3484 | |
097d4f1c TU |
3485 | /* |
3486 | * Emit the two workaround batch buffers, recording the offset from the | |
3487 | * start of the workaround batch buffer object for each and their | |
3488 | * respective sizes. | |
3489 | */ | |
3490 | for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { | |
3491 | wa_bb[i]->offset = batch_ptr - batch; | |
bbb8a9d7 TU |
3492 | if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, |
3493 | CACHELINE_BYTES))) { | |
097d4f1c TU |
3494 | ret = -EINVAL; |
3495 | break; | |
3496 | } | |
604a8f6f CW |
3497 | if (wa_bb_fn[i]) |
3498 | batch_ptr = wa_bb_fn[i](engine, batch_ptr); | |
097d4f1c | 3499 | wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); |
17ee950d AS |
3500 | } |
3501 | ||
097d4f1c TU |
3502 | BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); |
3503 | ||
17ee950d AS |
3504 | kunmap_atomic(batch); |
3505 | if (ret) | |
097d4f1c | 3506 | lrc_destroy_wa_ctx(engine); |
17ee950d AS |
3507 | |
3508 | return ret; | |
3509 | } | |
3510 | ||
70a76a9b CW |
3511 | static void enable_error_interrupt(struct intel_engine_cs *engine) |
3512 | { | |
3513 | u32 status; | |
3514 | ||
3515 | engine->execlists.error_interrupt = 0; | |
3516 | ENGINE_WRITE(engine, RING_EMR, ~0u); | |
3517 | ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */ | |
3518 | ||
3519 | status = ENGINE_READ(engine, RING_ESR); | |
3520 | if (unlikely(status)) { | |
3521 | dev_err(engine->i915->drm.dev, | |
3522 | "engine '%s' resumed still in error: %08x\n", | |
3523 | engine->name, status); | |
3524 | __intel_gt_reset(engine->gt, engine->mask); | |
3525 | } | |
3526 | ||
3527 | /* | |
3528 | * On current gen8+, we have 2 signals to play with | |
3529 | * | |
3530 | * - I915_ERROR_INSTUCTION (bit 0) | |
3531 | * | |
3532 | * Generate an error if the command parser encounters an invalid | |
3533 | * instruction | |
3534 | * | |
3535 | * This is a fatal error. | |
3536 | * | |
3537 | * - CP_PRIV (bit 2) | |
3538 | * | |
3539 | * Generate an error on privilege violation (where the CP replaces | |
3540 | * the instruction with a no-op). This also fires for writes into | |
3541 | * read-only scratch pages. | |
3542 | * | |
3543 | * This is a non-fatal error, parsing continues. | |
3544 | * | |
3545 | * * there are a few others defined for odd HW that we do not use | |
3546 | * | |
3547 | * Since CP_PRIV fires for cases where we have chosen to ignore the | |
3548 | * error (as the HW is validating and suppressing the mistakes), we | |
3549 | * only unmask the instruction error bit. | |
3550 | */ | |
3551 | ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION); | |
3552 | } | |
3553 | ||
f3c9d407 | 3554 | static void enable_execlists(struct intel_engine_cs *engine) |
9b1136d5 | 3555 | { |
313443b1 CW |
3556 | u32 mode; |
3557 | ||
3558 | assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); | |
3559 | ||
060f2322 | 3560 | intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ |
225701fc | 3561 | |
dbc65183 | 3562 | if (INTEL_GEN(engine->i915) >= 11) |
313443b1 | 3563 | mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); |
225701fc | 3564 | else |
313443b1 CW |
3565 | mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); |
3566 | ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); | |
225701fc | 3567 | |
313443b1 | 3568 | ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); |
9a4dc803 | 3569 | |
313443b1 CW |
3570 | ENGINE_WRITE_FW(engine, |
3571 | RING_HWS_PGA, | |
3572 | i915_ggtt_offset(engine->status_page.vma)); | |
dbc65183 | 3573 | ENGINE_POSTING_READ(engine, RING_HWS_PGA); |
7b02b23e | 3574 | |
70a76a9b CW |
3575 | enable_error_interrupt(engine); |
3576 | ||
1bc6a601 | 3577 | engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); |
f3c9d407 CW |
3578 | } |
3579 | ||
9a4dc803 CW |
3580 | static bool unexpected_starting_state(struct intel_engine_cs *engine) |
3581 | { | |
9a4dc803 CW |
3582 | bool unexpected = false; |
3583 | ||
313443b1 | 3584 | if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) { |
9a4dc803 CW |
3585 | DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n"); |
3586 | unexpected = true; | |
3587 | } | |
3588 | ||
3589 | return unexpected; | |
3590 | } | |
3591 | ||
79ffac85 | 3592 | static int execlists_resume(struct intel_engine_cs *engine) |
f3c9d407 | 3593 | { |
805615da | 3594 | intel_mocs_init_engine(engine); |
9b1136d5 | 3595 | |
ad07dfcd | 3596 | intel_engine_reset_breadcrumbs(engine); |
821ed7df | 3597 | |
9a4dc803 CW |
3598 | if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) { |
3599 | struct drm_printer p = drm_debug_printer(__func__); | |
3600 | ||
3601 | intel_engine_dump(engine, &p, NULL); | |
3602 | } | |
3603 | ||
f3c9d407 | 3604 | enable_execlists(engine); |
9b1136d5 | 3605 | |
821ed7df | 3606 | return 0; |
9b1136d5 OM |
3607 | } |
3608 | ||
eb8d0f5a | 3609 | static void execlists_reset_prepare(struct intel_engine_cs *engine) |
5adfb772 CW |
3610 | { |
3611 | struct intel_engine_execlists * const execlists = &engine->execlists; | |
9512f985 | 3612 | unsigned long flags; |
5adfb772 | 3613 | |
639f2f24 VSD |
3614 | ENGINE_TRACE(engine, "depth<-%d\n", |
3615 | atomic_read(&execlists->tasklet.count)); | |
5adfb772 CW |
3616 | |
3617 | /* | |
3618 | * Prevent request submission to the hardware until we have | |
3619 | * completed the reset in i915_gem_reset_finish(). If a request | |
3620 | * is completed by one engine, it may then queue a request | |
3621 | * to a second via its execlists->tasklet *just* as we are | |
79ffac85 | 3622 | * calling engine->resume() and also writing the ELSP. |
5adfb772 CW |
3623 | * Turning off the execlists->tasklet until the reset is over |
3624 | * prevents the race. | |
3625 | */ | |
3626 | __tasklet_disable_sync_once(&execlists->tasklet); | |
eb8d0f5a | 3627 | GEM_BUG_ON(!reset_in_progress(execlists)); |
5adfb772 | 3628 | |
eb8d0f5a | 3629 | /* And flush any current direct submission. */ |
422d7df4 CW |
3630 | spin_lock_irqsave(&engine->active.lock, flags); |
3631 | spin_unlock_irqrestore(&engine->active.lock, flags); | |
c30d5dc6 CW |
3632 | |
3633 | /* | |
3634 | * We stop engines, otherwise we might get failed reset and a | |
3635 | * dead gpu (on elk). Also as modern gpu as kbl can suffer | |
3636 | * from system hang if batchbuffer is progressing when | |
3637 | * the reset is issued, regardless of READY_TO_RESET ack. | |
3638 | * Thus assume it is best to stop engines on all gens | |
3639 | * where we have a gpu reset. | |
3640 | * | |
3641 | * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) | |
3642 | * | |
3643 | * FIXME: Wa for more modern gens needs to be validated | |
3644 | */ | |
3645 | intel_engine_stop_cs(engine); | |
5adfb772 CW |
3646 | } |
3647 | ||
22b7a426 | 3648 | static void reset_csb_pointers(struct intel_engine_cs *engine) |
1863e302 | 3649 | { |
22b7a426 | 3650 | struct intel_engine_execlists * const execlists = &engine->execlists; |
1863e302 CW |
3651 | const unsigned int reset_value = execlists->csb_size - 1; |
3652 | ||
22b7a426 CW |
3653 | ring_set_paused(engine, 0); |
3654 | ||
1863e302 CW |
3655 | /* |
3656 | * After a reset, the HW starts writing into CSB entry [0]. We | |
3657 | * therefore have to set our HEAD pointer back one entry so that | |
3658 | * the *first* entry we check is entry 0. To complicate this further, | |
3659 | * as we don't wait for the first interrupt after reset, we have to | |
3660 | * fake the HW write to point back to the last entry so that our | |
3661 | * inline comparison of our cached head position against the last HW | |
3662 | * write works even before the first interrupt. | |
3663 | */ | |
3664 | execlists->csb_head = reset_value; | |
3665 | WRITE_ONCE(*execlists->csb_write, reset_value); | |
0edda1d6 | 3666 | wmb(); /* Make sure this is visible to HW (paranoia?) */ |
1863e302 | 3667 | |
0a1f57b8 CW |
3668 | /* |
3669 | * Sometimes Icelake forgets to reset its pointers on a GPU reset. | |
3670 | * Bludgeon them with a mmio update to be sure. | |
3671 | */ | |
3672 | ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, | |
3673 | reset_value << 8 | reset_value); | |
3674 | ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); | |
3675 | ||
1863e302 CW |
3676 | invalidate_csb_entries(&execlists->csb_status[0], |
3677 | &execlists->csb_status[reset_value]); | |
3678 | } | |
3679 | ||
987281ab | 3680 | static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) |
fa9a09f1 | 3681 | { |
35865aef | 3682 | int x; |
fa9a09f1 | 3683 | |
35865aef CW |
3684 | x = lrc_ring_mi_mode(engine); |
3685 | if (x != -1) { | |
3686 | regs[x + 1] &= ~STOP_RING; | |
3687 | regs[x + 1] |= STOP_RING << 16; | |
fa9a09f1 CW |
3688 | } |
3689 | } | |
3690 | ||
987281ab CW |
3691 | static void __execlists_reset_reg_state(const struct intel_context *ce, |
3692 | const struct intel_engine_cs *engine) | |
3693 | { | |
3694 | u32 *regs = ce->lrc_reg_state; | |
3695 | ||
3696 | __reset_stop_ring(regs, engine); | |
3697 | } | |
3698 | ||
1863e302 | 3699 | static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) |
821ed7df | 3700 | { |
b620e870 | 3701 | struct intel_engine_execlists * const execlists = &engine->execlists; |
1863e302 | 3702 | struct intel_context *ce; |
eb8d0f5a | 3703 | struct i915_request *rq; |
42827350 | 3704 | u32 head; |
cdb6ded4 | 3705 | |
582a6f90 CW |
3706 | mb(); /* paranoia: read the CSB pointers from after the reset */ |
3707 | clflush(execlists->csb_write); | |
3708 | mb(); | |
3709 | ||
1863e302 CW |
3710 | process_csb(engine); /* drain preemption events */ |
3711 | ||
3712 | /* Following the reset, we need to reload the CSB read/write pointers */ | |
22b7a426 | 3713 | reset_csb_pointers(engine); |
1863e302 CW |
3714 | |
3715 | /* | |
3716 | * Save the currently executing context, even if we completed | |
3717 | * its request, it was still running at the time of the | |
3718 | * reset and will have been clobbered. | |
3719 | */ | |
22b7a426 CW |
3720 | rq = execlists_active(execlists); |
3721 | if (!rq) | |
fff8102a | 3722 | goto unwind; |
1863e302 | 3723 | |
9f3ccd40 | 3724 | ce = rq->context; |
22b7a426 | 3725 | GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); |
dffa8feb | 3726 | |
a7f328fc | 3727 | if (i915_request_completed(rq)) { |
bd9bec5b | 3728 | /* Idle context; tidy up the ring so we can restart afresh */ |
42827350 | 3729 | head = intel_ring_wrap(ce->ring, rq->tail); |
1863e302 | 3730 | goto out_replay; |
22b7a426 CW |
3731 | } |
3732 | ||
d3b03d8b CW |
3733 | /* We still have requests in-flight; the engine should be active */ |
3734 | GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); | |
3735 | ||
bd9bec5b CW |
3736 | /* Context has requests still in-flight; it should not be idle! */ |
3737 | GEM_BUG_ON(i915_active_is_idle(&ce->active)); | |
d3b03d8b | 3738 | |
a7f328fc | 3739 | rq = active_request(ce->timeline, rq); |
42827350 CW |
3740 | head = intel_ring_wrap(ce->ring, rq->head); |
3741 | GEM_BUG_ON(head == ce->ring->tail); | |
1863e302 | 3742 | |
21182b3c CW |
3743 | /* |
3744 | * If this request hasn't started yet, e.g. it is waiting on a | |
3745 | * semaphore, we need to avoid skipping the request or else we | |
3746 | * break the signaling chain. However, if the context is corrupt | |
3747 | * the request will not restart and we will be stuck with a wedged | |
3748 | * device. It is quite often the case that if we issue a reset | |
3749 | * while the GPU is loading the context image, that the context | |
3750 | * image becomes corrupt. | |
3751 | * | |
3752 | * Otherwise, if we have not started yet, the request should replay | |
3753 | * perfectly and we do not need to flag the result as being erroneous. | |
3754 | */ | |
22b7a426 | 3755 | if (!i915_request_started(rq)) |
1863e302 | 3756 | goto out_replay; |
21182b3c | 3757 | |
a3e38836 CW |
3758 | /* |
3759 | * If the request was innocent, we leave the request in the ELSP | |
c0dcb203 CW |
3760 | * and will try to replay it on restarting. The context image may |
3761 | * have been corrupted by the reset, in which case we may have | |
3762 | * to service a new GPU hang, but more likely we can continue on | |
3763 | * without impact. | |
3764 | * | |
3765 | * If the request was guilty, we presume the context is corrupt | |
3766 | * and have to at least restore the RING register in the context | |
3767 | * image back to the expected values to skip over the guilty request. | |
3768 | */ | |
cb823ed9 | 3769 | __i915_request_reset(rq, stalled); |
22b7a426 | 3770 | if (!stalled) |
1863e302 | 3771 | goto out_replay; |
821ed7df | 3772 | |
a3e38836 CW |
3773 | /* |
3774 | * We want a simple context + ring to execute the breadcrumb update. | |
a3aabe86 CW |
3775 | * We cannot rely on the context being intact across the GPU hang, |
3776 | * so clear it and rebuild just what we need for the breadcrumb. | |
3777 | * All pending requests for this context will be zapped, and any | |
3778 | * future request will be after userspace has had the opportunity | |
3779 | * to recreate its own state. | |
3780 | */ | |
ae911b23 | 3781 | GEM_BUG_ON(!intel_context_is_pinned(ce)); |
d12acee8 | 3782 | restore_default_state(ce, engine); |
a3aabe86 | 3783 | |
1863e302 | 3784 | out_replay: |
639f2f24 | 3785 | ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", |
42827350 | 3786 | head, ce->ring->tail); |
fa9a09f1 | 3787 | __execlists_reset_reg_state(ce, engine); |
42827350 | 3788 | __execlists_update_reg_state(ce, engine, head); |
53b2622e | 3789 | ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ |
1863e302 | 3790 | |
fff8102a | 3791 | unwind: |
6d06779e | 3792 | /* Push back any incomplete requests for replay after the reset. */ |
5f15c1e6 | 3793 | cancel_port_requests(execlists); |
6d06779e | 3794 | __unwind_incomplete_requests(engine); |
1863e302 | 3795 | } |
8e525cb4 | 3796 | |
e26b6d43 | 3797 | static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) |
1863e302 CW |
3798 | { |
3799 | unsigned long flags; | |
3800 | ||
639f2f24 | 3801 | ENGINE_TRACE(engine, "\n"); |
1863e302 | 3802 | |
422d7df4 | 3803 | spin_lock_irqsave(&engine->active.lock, flags); |
1863e302 CW |
3804 | |
3805 | __execlists_reset(engine, stalled); | |
3806 | ||
422d7df4 | 3807 | spin_unlock_irqrestore(&engine->active.lock, flags); |
1863e302 CW |
3808 | } |
3809 | ||
3810 | static void nop_submission_tasklet(unsigned long data) | |
3811 | { | |
3fc28d3e CW |
3812 | struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; |
3813 | ||
1863e302 | 3814 | /* The driver is wedged; don't process any more events. */ |
3fc28d3e | 3815 | WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); |
1863e302 CW |
3816 | } |
3817 | ||
e26b6d43 | 3818 | static void execlists_reset_cancel(struct intel_engine_cs *engine) |
1863e302 CW |
3819 | { |
3820 | struct intel_engine_execlists * const execlists = &engine->execlists; | |
3821 | struct i915_request *rq, *rn; | |
3822 | struct rb_node *rb; | |
3823 | unsigned long flags; | |
3824 | ||
639f2f24 | 3825 | ENGINE_TRACE(engine, "\n"); |
1863e302 CW |
3826 | |
3827 | /* | |
3828 | * Before we call engine->cancel_requests(), we should have exclusive | |
3829 | * access to the submission state. This is arranged for us by the | |
3830 | * caller disabling the interrupt generation, the tasklet and other | |
3831 | * threads that may then access the same state, giving us a free hand | |
3832 | * to reset state. However, we still need to let lockdep be aware that | |
3833 | * we know this state may be accessed in hardirq context, so we | |
3834 | * disable the irq around this manipulation and we want to keep | |
3835 | * the spinlock focused on its duties and not accidentally conflate | |
3836 | * coverage to the submission's irq state. (Similarly, although we | |
3837 | * shouldn't need to disable irq around the manipulation of the | |
3838 | * submission's irq state, we also wish to remind ourselves that | |
3839 | * it is irq state.) | |
3840 | */ | |
422d7df4 | 3841 | spin_lock_irqsave(&engine->active.lock, flags); |
1863e302 CW |
3842 | |
3843 | __execlists_reset(engine, true); | |
3844 | ||
3845 | /* Mark all executing requests as skipped. */ | |
0d7cf7bc CW |
3846 | list_for_each_entry(rq, &engine->active.requests, sched.link) |
3847 | mark_eio(rq); | |
1863e302 CW |
3848 | |
3849 | /* Flush the queued requests to the timeline list (for retiring). */ | |
3850 | while ((rb = rb_first_cached(&execlists->queue))) { | |
3851 | struct i915_priolist *p = to_priolist(rb); | |
3852 | int i; | |
3853 | ||
3854 | priolist_for_each_request_consume(rq, rn, p, i) { | |
0d7cf7bc | 3855 | mark_eio(rq); |
1863e302 | 3856 | __i915_request_submit(rq); |
1863e302 CW |
3857 | } |
3858 | ||
3859 | rb_erase_cached(&p->node, &execlists->queue); | |
3860 | i915_priolist_free(p); | |
3861 | } | |
3862 | ||
32ff621f CW |
3863 | /* On-hold requests will be flushed to timeline upon their release */ |
3864 | list_for_each_entry(rq, &engine->active.hold, sched.link) | |
3865 | mark_eio(rq); | |
3866 | ||
6d06779e CW |
3867 | /* Cancel all attached virtual engines */ |
3868 | while ((rb = rb_first_cached(&execlists->virtual))) { | |
3869 | struct virtual_engine *ve = | |
3870 | rb_entry(rb, typeof(*ve), nodes[engine->id].rb); | |
3871 | ||
3872 | rb_erase_cached(rb, &execlists->virtual); | |
3873 | RB_CLEAR_NODE(rb); | |
3874 | ||
422d7df4 | 3875 | spin_lock(&ve->base.active.lock); |
0d7cf7bc CW |
3876 | rq = fetch_and_zero(&ve->request); |
3877 | if (rq) { | |
3878 | mark_eio(rq); | |
3879 | ||
3880 | rq->engine = engine; | |
3881 | __i915_request_submit(rq); | |
b647c7df | 3882 | i915_request_put(rq); |
0d7cf7bc | 3883 | |
6d06779e | 3884 | ve->base.execlists.queue_priority_hint = INT_MIN; |
6d06779e | 3885 | } |
422d7df4 | 3886 | spin_unlock(&ve->base.active.lock); |
6d06779e CW |
3887 | } |
3888 | ||
1863e302 CW |
3889 | /* Remaining _unready_ requests will be nop'ed when submitted */ |
3890 | ||
3891 | execlists->queue_priority_hint = INT_MIN; | |
3892 | execlists->queue = RB_ROOT_CACHED; | |
1863e302 CW |
3893 | |
3894 | GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); | |
3895 | execlists->tasklet.func = nop_submission_tasklet; | |
8e525cb4 | 3896 | |
422d7df4 | 3897 | spin_unlock_irqrestore(&engine->active.lock, flags); |
821ed7df CW |
3898 | } |
3899 | ||
5adfb772 CW |
3900 | static void execlists_reset_finish(struct intel_engine_cs *engine) |
3901 | { | |
5db1d4ea CW |
3902 | struct intel_engine_execlists * const execlists = &engine->execlists; |
3903 | ||
fe25f304 | 3904 | /* |
9e4fa012 CW |
3905 | * After a GPU reset, we may have requests to replay. Do so now while |
3906 | * we still have the forcewake to be sure that the GPU is not allowed | |
3907 | * to sleep before we restart and reload a context. | |
fe25f304 | 3908 | */ |
eb8d0f5a | 3909 | GEM_BUG_ON(!reset_in_progress(execlists)); |
9e4fa012 CW |
3910 | if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) |
3911 | execlists->tasklet.func(execlists->tasklet.data); | |
5adfb772 | 3912 | |
41a1bde3 CW |
3913 | if (__tasklet_enable(&execlists->tasklet)) |
3914 | /* And kick in case we missed a new request submission. */ | |
3915 | tasklet_hi_schedule(&execlists->tasklet); | |
639f2f24 VSD |
3916 | ENGINE_TRACE(engine, "depth->%d\n", |
3917 | atomic_read(&execlists->tasklet.count)); | |
5adfb772 CW |
3918 | } |
3919 | ||
a5e93b42 CW |
3920 | static int gen8_emit_bb_start_noarb(struct i915_request *rq, |
3921 | u64 offset, u32 len, | |
3922 | const unsigned int flags) | |
15648585 | 3923 | { |
73dec95e | 3924 | u32 *cs; |
7a01a0a2 | 3925 | |
bac24f59 | 3926 | cs = intel_ring_begin(rq, 4); |
73dec95e TU |
3927 | if (IS_ERR(cs)) |
3928 | return PTR_ERR(cs); | |
15648585 | 3929 | |
279f5a00 CW |
3930 | /* |
3931 | * WaDisableCtxRestoreArbitration:bdw,chv | |
3932 | * | |
3933 | * We don't need to perform MI_ARB_ENABLE as often as we do (in | |
3934 | * particular all the gen that do not need the w/a at all!), if we | |
3935 | * took care to make sure that on every switch into this context | |
3936 | * (both ordinary and for preemption) that arbitrartion was enabled | |
bac24f59 CW |
3937 | * we would be fine. However, for gen8 there is another w/a that |
3938 | * requires us to not preempt inside GPGPU execution, so we keep | |
3939 | * arbitration disabled for gen8 batches. Arbitration will be | |
3940 | * re-enabled before we close the request | |
3941 | * (engine->emit_fini_breadcrumb). | |
279f5a00 | 3942 | */ |
bac24f59 CW |
3943 | *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; |
3944 | ||
3945 | /* FIXME(BDW+): Address space and security selectors. */ | |
3946 | *cs++ = MI_BATCH_BUFFER_START_GEN8 | | |
3947 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); | |
3948 | *cs++ = lower_32_bits(offset); | |
3949 | *cs++ = upper_32_bits(offset); | |
3950 | ||
3951 | intel_ring_advance(rq, cs); | |
3952 | ||
3953 | return 0; | |
3954 | } | |
3955 | ||
a5e93b42 | 3956 | static int gen8_emit_bb_start(struct i915_request *rq, |
bac24f59 CW |
3957 | u64 offset, u32 len, |
3958 | const unsigned int flags) | |
3959 | { | |
3960 | u32 *cs; | |
3961 | ||
3962 | cs = intel_ring_begin(rq, 6); | |
3963 | if (IS_ERR(cs)) | |
3964 | return PTR_ERR(cs); | |
3965 | ||
3ad7b52d CW |
3966 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; |
3967 | ||
54af56db | 3968 | *cs++ = MI_BATCH_BUFFER_START_GEN8 | |
08e3e21a | 3969 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); |
73dec95e TU |
3970 | *cs++ = lower_32_bits(offset); |
3971 | *cs++ = upper_32_bits(offset); | |
74f94741 CW |
3972 | |
3973 | *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; | |
3974 | *cs++ = MI_NOOP; | |
e8894267 | 3975 | |
e61e0f51 | 3976 | intel_ring_advance(rq, cs); |
15648585 OM |
3977 | |
3978 | return 0; | |
3979 | } | |
3980 | ||
31bb59cc | 3981 | static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) |
73d477f6 | 3982 | { |
baba6e57 DCS |
3983 | ENGINE_WRITE(engine, RING_IMR, |
3984 | ~(engine->irq_enable_mask | engine->irq_keep_mask)); | |
3985 | ENGINE_POSTING_READ(engine, RING_IMR); | |
73d477f6 OM |
3986 | } |
3987 | ||
31bb59cc | 3988 | static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) |
73d477f6 | 3989 | { |
baba6e57 | 3990 | ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); |
73d477f6 OM |
3991 | } |
3992 | ||
e61e0f51 | 3993 | static int gen8_emit_flush(struct i915_request *request, u32 mode) |
4712274c | 3994 | { |
73dec95e | 3995 | u32 cmd, *cs; |
4712274c | 3996 | |
73dec95e TU |
3997 | cs = intel_ring_begin(request, 4); |
3998 | if (IS_ERR(cs)) | |
3999 | return PTR_ERR(cs); | |
4712274c OM |
4000 | |
4001 | cmd = MI_FLUSH_DW + 1; | |
4002 | ||
f0a1fb10 CW |
4003 | /* We always require a command barrier so that subsequent |
4004 | * commands, such as breadcrumb interrupts, are strictly ordered | |
4005 | * wrt the contents of the write cache being flushed to memory | |
4006 | * (and thus being coherent from the CPU). | |
4007 | */ | |
4008 | cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; | |
4009 | ||
7c9cf4e3 | 4010 | if (mode & EMIT_INVALIDATE) { |
f0a1fb10 | 4011 | cmd |= MI_INVALIDATE_TLB; |
5fc2805b | 4012 | if (request->engine->class == VIDEO_DECODE_CLASS) |
f0a1fb10 | 4013 | cmd |= MI_INVALIDATE_BSD; |
4712274c OM |
4014 | } |
4015 | ||
73dec95e | 4016 | *cs++ = cmd; |
e1237523 | 4017 | *cs++ = LRC_PPHWSP_SCRATCH_ADDR; |
73dec95e TU |
4018 | *cs++ = 0; /* upper addr */ |
4019 | *cs++ = 0; /* value */ | |
4020 | intel_ring_advance(request, cs); | |
4712274c OM |
4021 | |
4022 | return 0; | |
4023 | } | |
4024 | ||
e61e0f51 | 4025 | static int gen8_emit_flush_render(struct i915_request *request, |
7c9cf4e3 | 4026 | u32 mode) |
4712274c | 4027 | { |
0b2d0934 | 4028 | bool vf_flush_wa = false, dc_flush_wa = false; |
73dec95e | 4029 | u32 *cs, flags = 0; |
0b2d0934 | 4030 | int len; |
4712274c OM |
4031 | |
4032 | flags |= PIPE_CONTROL_CS_STALL; | |
4033 | ||
7c9cf4e3 | 4034 | if (mode & EMIT_FLUSH) { |
4712274c OM |
4035 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; |
4036 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | |
965fd602 | 4037 | flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; |
40a24488 | 4038 | flags |= PIPE_CONTROL_FLUSH_ENABLE; |
4712274c OM |
4039 | } |
4040 | ||
7c9cf4e3 | 4041 | if (mode & EMIT_INVALIDATE) { |
4712274c OM |
4042 | flags |= PIPE_CONTROL_TLB_INVALIDATE; |
4043 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | |
4044 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | |
4045 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | |
4046 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | |
4047 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | |
4048 | flags |= PIPE_CONTROL_QW_WRITE; | |
e1237523 | 4049 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; |
4712274c | 4050 | |
1a5a9ce7 BW |
4051 | /* |
4052 | * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL | |
4053 | * pipe control. | |
4054 | */ | |
cf819eff | 4055 | if (IS_GEN(request->i915, 9)) |
1a5a9ce7 | 4056 | vf_flush_wa = true; |
0b2d0934 MK |
4057 | |
4058 | /* WaForGAMHang:kbl */ | |
4059 | if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0)) | |
4060 | dc_flush_wa = true; | |
1a5a9ce7 | 4061 | } |
9647ff36 | 4062 | |
0b2d0934 MK |
4063 | len = 6; |
4064 | ||
4065 | if (vf_flush_wa) | |
4066 | len += 6; | |
4067 | ||
4068 | if (dc_flush_wa) | |
4069 | len += 12; | |
4070 | ||
73dec95e TU |
4071 | cs = intel_ring_begin(request, len); |
4072 | if (IS_ERR(cs)) | |
4073 | return PTR_ERR(cs); | |
4712274c | 4074 | |
9f235dfa TU |
4075 | if (vf_flush_wa) |
4076 | cs = gen8_emit_pipe_control(cs, 0, 0); | |
9647ff36 | 4077 | |
9f235dfa TU |
4078 | if (dc_flush_wa) |
4079 | cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, | |
4080 | 0); | |
0b2d0934 | 4081 | |
e1237523 | 4082 | cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); |
0b2d0934 | 4083 | |
9f235dfa TU |
4084 | if (dc_flush_wa) |
4085 | cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); | |
0b2d0934 | 4086 | |
73dec95e | 4087 | intel_ring_advance(request, cs); |
4712274c OM |
4088 | |
4089 | return 0; | |
4090 | } | |
4091 | ||
cfba6bd8 MK |
4092 | static int gen11_emit_flush_render(struct i915_request *request, |
4093 | u32 mode) | |
4094 | { | |
cfba6bd8 MK |
4095 | if (mode & EMIT_FLUSH) { |
4096 | u32 *cs; | |
4097 | u32 flags = 0; | |
4098 | ||
4099 | flags |= PIPE_CONTROL_CS_STALL; | |
4100 | ||
4101 | flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; | |
4102 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | |
4103 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | |
4104 | flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; | |
4105 | flags |= PIPE_CONTROL_FLUSH_ENABLE; | |
4106 | flags |= PIPE_CONTROL_QW_WRITE; | |
e1237523 | 4107 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; |
cfba6bd8 MK |
4108 | |
4109 | cs = intel_ring_begin(request, 6); | |
4110 | if (IS_ERR(cs)) | |
4111 | return PTR_ERR(cs); | |
4112 | ||
e1237523 | 4113 | cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); |
cfba6bd8 MK |
4114 | intel_ring_advance(request, cs); |
4115 | } | |
4116 | ||
4117 | if (mode & EMIT_INVALIDATE) { | |
4118 | u32 *cs; | |
4119 | u32 flags = 0; | |
4120 | ||
4121 | flags |= PIPE_CONTROL_CS_STALL; | |
4122 | ||
8a8b540a | 4123 | flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; |
cfba6bd8 MK |
4124 | flags |= PIPE_CONTROL_TLB_INVALIDATE; |
4125 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | |
4126 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | |
4127 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | |
4128 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | |
4129 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | |
4130 | flags |= PIPE_CONTROL_QW_WRITE; | |
e1237523 | 4131 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; |
cfba6bd8 MK |
4132 | |
4133 | cs = intel_ring_begin(request, 6); | |
4134 | if (IS_ERR(cs)) | |
4135 | return PTR_ERR(cs); | |
4136 | ||
e1237523 | 4137 | cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); |
cfba6bd8 MK |
4138 | intel_ring_advance(request, cs); |
4139 | } | |
4140 | ||
4141 | return 0; | |
4142 | } | |
4143 | ||
c45e788d CW |
4144 | static u32 preparser_disable(bool state) |
4145 | { | |
4146 | return MI_ARB_CHECK | 1 << 8 | state; | |
4147 | } | |
4148 | ||
4149 | static int gen12_emit_flush_render(struct i915_request *request, | |
4150 | u32 mode) | |
4151 | { | |
c45e788d CW |
4152 | if (mode & EMIT_FLUSH) { |
4153 | u32 flags = 0; | |
4154 | u32 *cs; | |
4155 | ||
4156 | flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; | |
4157 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | |
4158 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | |
2e19af94 MK |
4159 | /* Wa_1409600907:tgl */ |
4160 | flags |= PIPE_CONTROL_DEPTH_STALL; | |
c45e788d CW |
4161 | flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; |
4162 | flags |= PIPE_CONTROL_FLUSH_ENABLE; | |
4aa0b5d4 | 4163 | flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; |
c45e788d | 4164 | |
e1237523 | 4165 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; |
c45e788d CW |
4166 | flags |= PIPE_CONTROL_QW_WRITE; |
4167 | ||
4168 | flags |= PIPE_CONTROL_CS_STALL; | |
4169 | ||
4170 | cs = intel_ring_begin(request, 6); | |
4171 | if (IS_ERR(cs)) | |
4172 | return PTR_ERR(cs); | |
4173 | ||
e1237523 | 4174 | cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); |
c45e788d CW |
4175 | intel_ring_advance(request, cs); |
4176 | } | |
4177 | ||
4178 | if (mode & EMIT_INVALIDATE) { | |
4179 | u32 flags = 0; | |
4180 | u32 *cs; | |
4181 | ||
4182 | flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; | |
4183 | flags |= PIPE_CONTROL_TLB_INVALIDATE; | |
4184 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | |
4185 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | |
4186 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | |
4187 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | |
4188 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | |
62037fff | 4189 | flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE; |
c45e788d | 4190 | |
e1237523 | 4191 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; |
c45e788d CW |
4192 | flags |= PIPE_CONTROL_QW_WRITE; |
4193 | ||
4194 | flags |= PIPE_CONTROL_CS_STALL; | |
4195 | ||
4196 | cs = intel_ring_begin(request, 8); | |
4197 | if (IS_ERR(cs)) | |
4198 | return PTR_ERR(cs); | |
4199 | ||
4200 | /* | |
4201 | * Prevent the pre-parser from skipping past the TLB | |
4202 | * invalidate and loading a stale page for the batch | |
4203 | * buffer / request payload. | |
4204 | */ | |
4205 | *cs++ = preparser_disable(true); | |
4206 | ||
e1237523 | 4207 | cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); |
c45e788d CW |
4208 | |
4209 | *cs++ = preparser_disable(false); | |
4210 | intel_ring_advance(request, cs); | |
4211 | } | |
4212 | ||
4213 | return 0; | |
4214 | } | |
4215 | ||
7c17d377 CW |
4216 | /* |
4217 | * Reserve space for 2 NOOPs at the end of each request to be | |
4218 | * used as a workaround for not being allowed to do lite | |
4219 | * restore with HEAD==TAIL (WaIdleLiteRestore). | |
4220 | */ | |
e1a73a54 | 4221 | static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) |
4da46e1e | 4222 | { |
beecec90 CW |
4223 | /* Ensure there's always at least one preemption point per-request. */ |
4224 | *cs++ = MI_ARB_CHECK; | |
73dec95e TU |
4225 | *cs++ = MI_NOOP; |
4226 | request->wa_tail = intel_ring_offset(request, cs); | |
e1a73a54 CW |
4227 | |
4228 | return cs; | |
caddfe71 | 4229 | } |
4da46e1e | 4230 | |
22b7a426 CW |
4231 | static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) |
4232 | { | |
4233 | *cs++ = MI_SEMAPHORE_WAIT | | |
4234 | MI_SEMAPHORE_GLOBAL_GTT | | |
4235 | MI_SEMAPHORE_POLL | | |
4236 | MI_SEMAPHORE_SAD_EQ_SDD; | |
4237 | *cs++ = 0; | |
4238 | *cs++ = intel_hws_preempt_address(request->engine); | |
4239 | *cs++ = 0; | |
4240 | ||
4241 | return cs; | |
4242 | } | |
4243 | ||
845f7f7e MK |
4244 | static __always_inline u32* |
4245 | gen8_emit_fini_breadcrumb_footer(struct i915_request *request, | |
4246 | u32 *cs) | |
caddfe71 | 4247 | { |
73dec95e | 4248 | *cs++ = MI_USER_INTERRUPT; |
22b7a426 | 4249 | |
74f94741 | 4250 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; |
09975b86 CW |
4251 | if (intel_engine_has_semaphores(request->engine)) |
4252 | cs = emit_preempt_busywait(request, cs); | |
5013eb8c | 4253 | |
73dec95e | 4254 | request->tail = intel_ring_offset(request, cs); |
ed1501d4 | 4255 | assert_ring_tail_valid(request->ring, request->tail); |
caddfe71 | 4256 | |
e1a73a54 | 4257 | return gen8_emit_wa_tail(request, cs); |
7c17d377 | 4258 | } |
98f29e8d | 4259 | |
845f7f7e MK |
4260 | static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) |
4261 | { | |
4262 | cs = gen8_emit_ggtt_write(cs, | |
4263 | request->fence.seqno, | |
d19d71fc | 4264 | i915_request_active_timeline(request)->hwsp_offset, |
845f7f7e MK |
4265 | 0); |
4266 | ||
4267 | return gen8_emit_fini_breadcrumb_footer(request, cs); | |
4268 | } | |
4269 | ||
85474441 | 4270 | static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) |
7c17d377 | 4271 | { |
11988e39 CW |
4272 | cs = gen8_emit_pipe_control(cs, |
4273 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | |
4274 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | |
4275 | PIPE_CONTROL_DC_FLUSH_ENABLE, | |
4276 | 0); | |
4277 | ||
4278 | /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ | |
6a623729 | 4279 | cs = gen8_emit_ggtt_write_rcs(cs, |
5013eb8c | 4280 | request->fence.seqno, |
d19d71fc | 4281 | i915_request_active_timeline(request)->hwsp_offset, |
11988e39 CW |
4282 | PIPE_CONTROL_FLUSH_ENABLE | |
4283 | PIPE_CONTROL_CS_STALL); | |
22b7a426 | 4284 | |
845f7f7e MK |
4285 | return gen8_emit_fini_breadcrumb_footer(request, cs); |
4286 | } | |
6a623729 | 4287 | |
c210e85b CW |
4288 | static u32 * |
4289 | gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) | |
4290 | { | |
4291 | cs = gen8_emit_ggtt_write_rcs(cs, | |
4292 | request->fence.seqno, | |
d19d71fc | 4293 | i915_request_active_timeline(request)->hwsp_offset, |
c210e85b CW |
4294 | PIPE_CONTROL_CS_STALL | |
4295 | PIPE_CONTROL_TILE_CACHE_FLUSH | | |
4296 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | |
4297 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | |
4298 | PIPE_CONTROL_DC_FLUSH_ENABLE | | |
4299 | PIPE_CONTROL_FLUSH_ENABLE); | |
4300 | ||
4301 | return gen8_emit_fini_breadcrumb_footer(request, cs); | |
4302 | } | |
4303 | ||
8a9a9827 DCS |
4304 | /* |
4305 | * Note that the CS instruction pre-parser will not stall on the breadcrumb | |
4306 | * flush and will continue pre-fetching the instructions after it before the | |
4307 | * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at | |
4308 | * BB_START/END instructions, so, even though we might pre-fetch the pre-amble | |
4309 | * of the next request before the memory has been flushed, we're guaranteed that | |
4310 | * we won't access the batch itself too early. | |
4311 | * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, | |
4312 | * so, if the current request is modifying an instruction in the next request on | |
4313 | * the same intel_context, we might pre-fetch and then execute the pre-update | |
4314 | * instruction. To avoid this, the users of self-modifying code should either | |
4315 | * disable the parser around the code emitting the memory writes, via a new flag | |
4316 | * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For | |
4317 | * the in-kernel use-cases we've opted to use a separate context, see | |
4318 | * reloc_gpu() as an example. | |
4319 | * All the above applies only to the instructions themselves. Non-inline data | |
4320 | * used by the instructions is not pre-fetched. | |
4321 | */ | |
c210e85b CW |
4322 | |
4323 | static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) | |
4324 | { | |
4325 | *cs++ = MI_SEMAPHORE_WAIT_TOKEN | | |
4326 | MI_SEMAPHORE_GLOBAL_GTT | | |
4327 | MI_SEMAPHORE_POLL | | |
4328 | MI_SEMAPHORE_SAD_EQ_SDD; | |
4329 | *cs++ = 0; | |
4330 | *cs++ = intel_hws_preempt_address(request->engine); | |
4331 | *cs++ = 0; | |
4332 | *cs++ = 0; | |
4333 | *cs++ = MI_NOOP; | |
4334 | ||
4335 | return cs; | |
4336 | } | |
4337 | ||
4338 | static __always_inline u32* | |
4339 | gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) | |
4340 | { | |
4341 | *cs++ = MI_USER_INTERRUPT; | |
4342 | ||
4343 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; | |
4344 | if (intel_engine_has_semaphores(request->engine)) | |
4345 | cs = gen12_emit_preempt_busywait(request, cs); | |
4346 | ||
4347 | request->tail = intel_ring_offset(request, cs); | |
4348 | assert_ring_tail_valid(request->ring, request->tail); | |
4349 | ||
4350 | return gen8_emit_wa_tail(request, cs); | |
4351 | } | |
4352 | ||
4353 | static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) | |
4354 | { | |
4355 | cs = gen8_emit_ggtt_write(cs, | |
4356 | request->fence.seqno, | |
d19d71fc | 4357 | i915_request_active_timeline(request)->hwsp_offset, |
c210e85b CW |
4358 | 0); |
4359 | ||
4360 | return gen12_emit_fini_breadcrumb_footer(request, cs); | |
4361 | } | |
4362 | ||
4363 | static u32 * | |
4364 | gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) | |
845f7f7e MK |
4365 | { |
4366 | cs = gen8_emit_ggtt_write_rcs(cs, | |
4367 | request->fence.seqno, | |
d19d71fc | 4368 | i915_request_active_timeline(request)->hwsp_offset, |
845f7f7e MK |
4369 | PIPE_CONTROL_CS_STALL | |
4370 | PIPE_CONTROL_TILE_CACHE_FLUSH | | |
4371 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | |
4372 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | |
2e19af94 MK |
4373 | /* Wa_1409600907:tgl */ |
4374 | PIPE_CONTROL_DEPTH_STALL | | |
845f7f7e | 4375 | PIPE_CONTROL_DC_FLUSH_ENABLE | |
4aa0b5d4 MK |
4376 | PIPE_CONTROL_FLUSH_ENABLE | |
4377 | PIPE_CONTROL_HDC_PIPELINE_FLUSH); | |
caddfe71 | 4378 | |
c210e85b | 4379 | return gen12_emit_fini_breadcrumb_footer(request, cs); |
4da46e1e | 4380 | } |
98f29e8d | 4381 | |
c34c5bca CW |
4382 | static void execlists_park(struct intel_engine_cs *engine) |
4383 | { | |
2229adc8 | 4384 | cancel_timer(&engine->execlists.timer); |
3a7a92ab | 4385 | cancel_timer(&engine->execlists.preempt); |
c34c5bca CW |
4386 | } |
4387 | ||
209b7955 | 4388 | void intel_execlists_set_default_submission(struct intel_engine_cs *engine) |
ddd66c51 | 4389 | { |
ff44ad51 | 4390 | engine->submit_request = execlists_submit_request; |
e2f3496e | 4391 | engine->schedule = i915_schedule; |
c6dce8f1 | 4392 | engine->execlists.tasklet.func = execlists_submission_tasklet; |
aba5e278 | 4393 | |
1329115c | 4394 | engine->reset.prepare = execlists_reset_prepare; |
e26b6d43 CW |
4395 | engine->reset.rewind = execlists_reset_rewind; |
4396 | engine->reset.cancel = execlists_reset_cancel; | |
292ad25c | 4397 | engine->reset.finish = execlists_reset_finish; |
1329115c | 4398 | |
c34c5bca | 4399 | engine->park = execlists_park; |
aba5e278 | 4400 | engine->unpark = NULL; |
cf669b4e TU |
4401 | |
4402 | engine->flags |= I915_ENGINE_SUPPORTS_STATS; | |
09975b86 | 4403 | if (!intel_vgpu_active(engine->i915)) { |
a2deb873 | 4404 | engine->flags |= I915_ENGINE_HAS_SEMAPHORES; |
fe5a7082 | 4405 | if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { |
09975b86 | 4406 | engine->flags |= I915_ENGINE_HAS_PREEMPTION; |
fe5a7082 CW |
4407 | if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) |
4408 | engine->flags |= I915_ENGINE_HAS_TIMESLICES; | |
4409 | } | |
09975b86 | 4410 | } |
cdb736fa | 4411 | |
ba2c74da | 4412 | if (INTEL_GEN(engine->i915) >= 12) |
cdb736fa | 4413 | engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; |
a5e93b42 CW |
4414 | |
4415 | if (intel_engine_has_preemption(engine)) | |
4416 | engine->emit_bb_start = gen8_emit_bb_start; | |
4417 | else | |
4418 | engine->emit_bb_start = gen8_emit_bb_start_noarb; | |
ddd66c51 CW |
4419 | } |
4420 | ||
97c16353 CW |
4421 | static void execlists_shutdown(struct intel_engine_cs *engine) |
4422 | { | |
4423 | /* Synchronise with residual timers and any softirq they raise */ | |
4424 | del_timer_sync(&engine->execlists.timer); | |
4425 | del_timer_sync(&engine->execlists.preempt); | |
4426 | tasklet_kill(&engine->execlists.tasklet); | |
4427 | } | |
4428 | ||
e26b6d43 | 4429 | static void execlists_release(struct intel_engine_cs *engine) |
45b9c968 | 4430 | { |
97c16353 CW |
4431 | execlists_shutdown(engine); |
4432 | ||
45b9c968 CW |
4433 | intel_engine_cleanup_common(engine); |
4434 | lrc_destroy_wa_ctx(engine); | |
45b9c968 CW |
4435 | } |
4436 | ||
c9cacf93 | 4437 | static void |
e1382efb | 4438 | logical_ring_default_vfuncs(struct intel_engine_cs *engine) |
c9cacf93 TU |
4439 | { |
4440 | /* Default vfuncs which can be overriden by each engine. */ | |
45b9c968 | 4441 | |
79ffac85 | 4442 | engine->resume = execlists_resume; |
5adfb772 | 4443 | |
4dc84b77 | 4444 | engine->cops = &execlists_context_ops; |
f73e7399 CW |
4445 | engine->request_alloc = execlists_request_alloc; |
4446 | ||
0bc40be8 | 4447 | engine->emit_flush = gen8_emit_flush; |
85474441 CW |
4448 | engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; |
4449 | engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; | |
c210e85b CW |
4450 | if (INTEL_GEN(engine->i915) >= 12) |
4451 | engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; | |
ff44ad51 | 4452 | |
209b7955 | 4453 | engine->set_default_submission = intel_execlists_set_default_submission; |
ddd66c51 | 4454 | |
d4ccceb0 TU |
4455 | if (INTEL_GEN(engine->i915) < 11) { |
4456 | engine->irq_enable = gen8_logical_ring_enable_irq; | |
4457 | engine->irq_disable = gen8_logical_ring_disable_irq; | |
4458 | } else { | |
4459 | /* | |
4460 | * TODO: On Gen11 interrupt masks need to be clear | |
4461 | * to allow C6 entry. Keep interrupts enabled at | |
4462 | * and take the hit of generating extra interrupts | |
4463 | * until a more refined solution exists. | |
4464 | */ | |
4465 | } | |
c9cacf93 TU |
4466 | } |
4467 | ||
d9f3af96 | 4468 | static inline void |
c2c7f240 | 4469 | logical_ring_default_irqs(struct intel_engine_cs *engine) |
d9f3af96 | 4470 | { |
fa6f071d DCS |
4471 | unsigned int shift = 0; |
4472 | ||
4473 | if (INTEL_GEN(engine->i915) < 11) { | |
4474 | const u8 irq_shifts[] = { | |
8a68d464 CW |
4475 | [RCS0] = GEN8_RCS_IRQ_SHIFT, |
4476 | [BCS0] = GEN8_BCS_IRQ_SHIFT, | |
4477 | [VCS0] = GEN8_VCS0_IRQ_SHIFT, | |
4478 | [VCS1] = GEN8_VCS1_IRQ_SHIFT, | |
4479 | [VECS0] = GEN8_VECS_IRQ_SHIFT, | |
fa6f071d DCS |
4480 | }; |
4481 | ||
4482 | shift = irq_shifts[engine->id]; | |
4483 | } | |
4484 | ||
0bc40be8 TU |
4485 | engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; |
4486 | engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; | |
70a76a9b | 4487 | engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; |
220dcfc1 | 4488 | engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; |
d9f3af96 TU |
4489 | } |
4490 | ||
845f7f7e MK |
4491 | static void rcs_submission_override(struct intel_engine_cs *engine) |
4492 | { | |
4493 | switch (INTEL_GEN(engine->i915)) { | |
4494 | case 12: | |
c45e788d | 4495 | engine->emit_flush = gen12_emit_flush_render; |
c210e85b CW |
4496 | engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; |
4497 | break; | |
845f7f7e MK |
4498 | case 11: |
4499 | engine->emit_flush = gen11_emit_flush_render; | |
4500 | engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; | |
4501 | break; | |
4502 | default: | |
4503 | engine->emit_flush = gen8_emit_flush_render; | |
4504 | engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; | |
4505 | break; | |
4506 | } | |
4507 | } | |
4508 | ||
11334c6a | 4509 | int intel_execlists_submission_setup(struct intel_engine_cs *engine) |
bb45438f | 4510 | { |
7d70a123 CW |
4511 | struct intel_engine_execlists * const execlists = &engine->execlists; |
4512 | struct drm_i915_private *i915 = engine->i915; | |
4513 | struct intel_uncore *uncore = engine->uncore; | |
4514 | u32 base = engine->mmio_base; | |
4515 | ||
c6dce8f1 SAK |
4516 | tasklet_init(&engine->execlists.tasklet, |
4517 | execlists_submission_tasklet, (unsigned long)engine); | |
3a7a92ab CW |
4518 | timer_setup(&engine->execlists.timer, execlists_timeslice, 0); |
4519 | timer_setup(&engine->execlists.preempt, execlists_preempt, 0); | |
bb45438f | 4520 | |
bb45438f TU |
4521 | logical_ring_default_vfuncs(engine); |
4522 | logical_ring_default_irqs(engine); | |
52954edd | 4523 | |
845f7f7e MK |
4524 | if (engine->class == RENDER_CLASS) |
4525 | rcs_submission_override(engine); | |
11334c6a | 4526 | |
11334c6a CW |
4527 | if (intel_init_workaround_bb(engine)) |
4528 | /* | |
4529 | * We continue even if we fail to initialize WA batch | |
4530 | * because we only expect rare glitches but nothing | |
4531 | * critical to prevent us from using GPU | |
4532 | */ | |
4533 | DRM_ERROR("WA batch buffer initialization failed\n"); | |
a60acb22 | 4534 | |
bc4237ec | 4535 | if (HAS_LOGICAL_RING_ELSQ(i915)) { |
f6e903db | 4536 | execlists->submit_reg = uncore->regs + |
baba6e57 | 4537 | i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); |
f6e903db | 4538 | execlists->ctrl_reg = uncore->regs + |
baba6e57 | 4539 | i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); |
05f0addd | 4540 | } else { |
f6e903db | 4541 | execlists->submit_reg = uncore->regs + |
baba6e57 | 4542 | i915_mmio_reg_offset(RING_ELSP(base)); |
05f0addd | 4543 | } |
693cfbf0 | 4544 | |
46592892 | 4545 | execlists->csb_status = |
0ca88ba0 | 4546 | &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; |
bc4237ec | 4547 | |
46592892 | 4548 | execlists->csb_write = |
0ca88ba0 | 4549 | &engine->status_page.addr[intel_hws_csb_write_index(i915)]; |
bc4237ec | 4550 | |
f6e903db | 4551 | if (INTEL_GEN(i915) < 11) |
632c7ad6 MK |
4552 | execlists->csb_size = GEN8_CSB_ENTRIES; |
4553 | else | |
4554 | execlists->csb_size = GEN11_CSB_ENTRIES; | |
7d4c75d9 | 4555 | |
53b2622e CW |
4556 | if (INTEL_GEN(engine->i915) >= 11) { |
4557 | execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); | |
4558 | execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); | |
4559 | } | |
4560 | ||
22b7a426 | 4561 | reset_csb_pointers(engine); |
c3160da9 | 4562 | |
7807a76b CW |
4563 | /* Finally, take ownership and responsibility for cleanup! */ |
4564 | engine->release = execlists_release; | |
4565 | ||
a19d6ff2 | 4566 | return 0; |
a19d6ff2 TU |
4567 | } |
4568 | ||
7dc56af5 | 4569 | static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) |
71562919 MT |
4570 | { |
4571 | u32 indirect_ctx_offset; | |
4572 | ||
c033666a | 4573 | switch (INTEL_GEN(engine->i915)) { |
71562919 | 4574 | default: |
c033666a | 4575 | MISSING_CASE(INTEL_GEN(engine->i915)); |
71562919 | 4576 | /* fall through */ |
487f471d DCS |
4577 | case 12: |
4578 | indirect_ctx_offset = | |
4579 | GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; | |
4580 | break; | |
fd034c77 MT |
4581 | case 11: |
4582 | indirect_ctx_offset = | |
4583 | GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; | |
4584 | break; | |
7bd0a2c6 MT |
4585 | case 10: |
4586 | indirect_ctx_offset = | |
4587 | GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; | |
4588 | break; | |
71562919 MT |
4589 | case 9: |
4590 | indirect_ctx_offset = | |
4591 | GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; | |
4592 | break; | |
4593 | case 8: | |
4594 | indirect_ctx_offset = | |
4595 | GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; | |
4596 | break; | |
4597 | } | |
4598 | ||
4599 | return indirect_ctx_offset; | |
4600 | } | |
4601 | ||
0b718ba1 | 4602 | |
5bf05dc5 | 4603 | static void init_common_reg_state(u32 * const regs, |
7dc56af5 | 4604 | const struct intel_engine_cs *engine, |
d1813ca2 CW |
4605 | const struct intel_ring *ring, |
4606 | bool inhibit) | |
8670d6f9 | 4607 | { |
d1813ca2 CW |
4608 | u32 ctl; |
4609 | ||
4610 | ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); | |
4611 | ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); | |
4612 | if (inhibit) | |
4613 | ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; | |
7dc56af5 | 4614 | if (INTEL_GEN(engine->i915) < 11) |
d1813ca2 CW |
4615 | ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | |
4616 | CTX_CTRL_RS_CTX_ENABLE); | |
4617 | regs[CTX_CONTEXT_CONTROL] = ctl; | |
7dc56af5 | 4618 | |
b0b10248 | 4619 | regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; |
5bf05dc5 | 4620 | } |
17ee950d | 4621 | |
5bf05dc5 | 4622 | static void init_wa_bb_reg_state(u32 * const regs, |
7dc56af5 | 4623 | const struct intel_engine_cs *engine, |
5bf05dc5 MT |
4624 | u32 pos_bb_per_ctx) |
4625 | { | |
7dc56af5 CW |
4626 | const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; |
4627 | ||
4628 | if (wa_ctx->per_ctx.size) { | |
4629 | const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); | |
4630 | ||
4631 | regs[pos_bb_per_ctx] = | |
4632 | (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; | |
4633 | } | |
17ee950d | 4634 | |
5bf05dc5 MT |
4635 | if (wa_ctx->indirect_ctx.size) { |
4636 | const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); | |
604a8f6f | 4637 | |
7dc56af5 | 4638 | regs[pos_bb_per_ctx + 2] = |
5bf05dc5 MT |
4639 | (ggtt_offset + wa_ctx->indirect_ctx.offset) | |
4640 | (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); | |
17ee950d | 4641 | |
7dc56af5 | 4642 | regs[pos_bb_per_ctx + 4] = |
5bf05dc5 | 4643 | intel_lr_indirect_ctx_offset(engine) << 6; |
8670d6f9 | 4644 | } |
5bf05dc5 MT |
4645 | } |
4646 | ||
7dc56af5 | 4647 | static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt) |
5bf05dc5 | 4648 | { |
a9fe9ca4 | 4649 | if (i915_vm_is_4lvl(&ppgtt->vm)) { |
2dba3239 MT |
4650 | /* 64b PPGTT (48bit canonical) |
4651 | * PDP0_DESCRIPTOR contains the base address to PML4 and | |
4652 | * other PDP Descriptors are ignored. | |
4653 | */ | |
b146e5ef | 4654 | ASSIGN_CTX_PML4(ppgtt, regs); |
e8894267 | 4655 | } else { |
b146e5ef CW |
4656 | ASSIGN_CTX_PDP(ppgtt, regs, 3); |
4657 | ASSIGN_CTX_PDP(ppgtt, regs, 2); | |
4658 | ASSIGN_CTX_PDP(ppgtt, regs, 1); | |
4659 | ASSIGN_CTX_PDP(ppgtt, regs, 0); | |
2dba3239 | 4660 | } |
5bf05dc5 MT |
4661 | } |
4662 | ||
4663 | static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) | |
4664 | { | |
4665 | if (i915_is_ggtt(vm)) | |
4666 | return i915_vm_to_ggtt(vm)->alias; | |
4667 | else | |
4668 | return i915_vm_to_ppgtt(vm); | |
4669 | } | |
4670 | ||
5bf05dc5 | 4671 | static void execlists_init_reg_state(u32 *regs, |
7dc56af5 CW |
4672 | const struct intel_context *ce, |
4673 | const struct intel_engine_cs *engine, | |
4674 | const struct intel_ring *ring, | |
d1813ca2 | 4675 | bool inhibit) |
5bf05dc5 MT |
4676 | { |
4677 | /* | |
4678 | * A context is actually a big batch buffer with several | |
4679 | * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The | |
4680 | * values we are setting here are only for the first context restore: | |
4681 | * on a subsequent save, the GPU will recreate this batchbuffer with new | |
4682 | * values (including all the missing MI_LOAD_REGISTER_IMM commands that | |
4683 | * we are not initializing here). | |
4684 | * | |
4685 | * Must keep consistent with virtual_update_register_offsets(). | |
4686 | */ | |
d1813ca2 | 4687 | set_offsets(regs, reg_offsets(engine), engine, inhibit); |
7dc56af5 | 4688 | |
d1813ca2 | 4689 | init_common_reg_state(regs, engine, ring, inhibit); |
7dc56af5 CW |
4690 | init_ppgtt_reg_state(regs, vm_alias(ce->vm)); |
4691 | ||
4692 | init_wa_bb_reg_state(regs, engine, | |
4693 | INTEL_GEN(engine->i915) >= 12 ? | |
4694 | GEN12_CTX_BB_PER_CTX_PTR : | |
4695 | CTX_BB_PER_CTX_PTR); | |
987281ab CW |
4696 | |
4697 | __reset_stop_ring(regs, engine); | |
5bf05dc5 MT |
4698 | } |
4699 | ||
a3aabe86 | 4700 | static int |
b146e5ef | 4701 | populate_lr_context(struct intel_context *ce, |
a3aabe86 CW |
4702 | struct drm_i915_gem_object *ctx_obj, |
4703 | struct intel_engine_cs *engine, | |
4704 | struct intel_ring *ring) | |
4705 | { | |
7dc56af5 | 4706 | bool inhibit = true; |
a3aabe86 CW |
4707 | void *vaddr; |
4708 | int ret; | |
4709 | ||
a3aabe86 CW |
4710 | vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); |
4711 | if (IS_ERR(vaddr)) { | |
4712 | ret = PTR_ERR(vaddr); | |
4713 | DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); | |
4714 | return ret; | |
4715 | } | |
a3aabe86 | 4716 | |
9559c875 CW |
4717 | set_redzone(vaddr, engine); |
4718 | ||
d2b4b979 | 4719 | if (engine->default_state) { |
d2b4b979 CW |
4720 | void *defaults; |
4721 | ||
4722 | defaults = i915_gem_object_pin_map(engine->default_state, | |
4723 | I915_MAP_WB); | |
aaefa06a MA |
4724 | if (IS_ERR(defaults)) { |
4725 | ret = PTR_ERR(defaults); | |
4726 | goto err_unpin_ctx; | |
4727 | } | |
d2b4b979 | 4728 | |
9f379407 | 4729 | memcpy(vaddr, defaults, engine->context_size); |
d2b4b979 | 4730 | i915_gem_object_unpin_map(engine->default_state); |
f70de8d2 | 4731 | __set_bit(CONTEXT_VALID_BIT, &ce->flags); |
7dc56af5 | 4732 | inhibit = false; |
d2b4b979 CW |
4733 | } |
4734 | ||
1883a0a4 TU |
4735 | /* Clear the ppHWSP (inc. per-context counters) */ |
4736 | memset(vaddr, 0, PAGE_SIZE); | |
4737 | ||
4738 | /* | |
4739 | * The second page of the context object contains some registers which | |
4740 | * must be set up prior to the first execution. | |
4741 | */ | |
d1813ca2 CW |
4742 | execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, |
4743 | ce, engine, ring, inhibit); | |
8670d6f9 | 4744 | |
a679f58d | 4745 | ret = 0; |
aaefa06a | 4746 | err_unpin_ctx: |
9f379407 | 4747 | __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); |
7d774cac | 4748 | i915_gem_object_unpin_map(ctx_obj); |
aaefa06a | 4749 | return ret; |
8670d6f9 OM |
4750 | } |
4751 | ||
4c60b1aa CW |
4752 | static int __execlists_context_alloc(struct intel_context *ce, |
4753 | struct intel_engine_cs *engine) | |
ede7d42b | 4754 | { |
8c857917 | 4755 | struct drm_i915_gem_object *ctx_obj; |
75d0a7f3 | 4756 | struct intel_ring *ring; |
bf3783e5 | 4757 | struct i915_vma *vma; |
739f3abd | 4758 | u32 context_size; |
8c857917 OM |
4759 | int ret; |
4760 | ||
4c60b1aa | 4761 | GEM_BUG_ON(ce->state); |
63ffbcda | 4762 | context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); |
8c857917 | 4763 | |
9559c875 CW |
4764 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
4765 | context_size += I915_GTT_PAGE_SIZE; /* for redzone */ | |
d1675198 | 4766 | |
8475355f | 4767 | ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size); |
467d3578 CW |
4768 | if (IS_ERR(ctx_obj)) |
4769 | return PTR_ERR(ctx_obj); | |
8c857917 | 4770 | |
ba4134a4 | 4771 | vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL); |
bf3783e5 CW |
4772 | if (IS_ERR(vma)) { |
4773 | ret = PTR_ERR(vma); | |
4774 | goto error_deref_obj; | |
4775 | } | |
4776 | ||
75d0a7f3 CW |
4777 | if (!ce->timeline) { |
4778 | struct intel_timeline *tl; | |
f16ccb64 CW |
4779 | struct i915_vma *hwsp; |
4780 | ||
4781 | /* | |
4782 | * Use the static global HWSP for the kernel context, and | |
4783 | * a dynamically allocated cacheline for everyone else. | |
4784 | */ | |
4785 | hwsp = NULL; | |
4786 | if (unlikely(intel_context_is_barrier(ce))) | |
4787 | hwsp = engine->status_page.vma; | |
75d0a7f3 | 4788 | |
f16ccb64 | 4789 | tl = intel_timeline_create(engine->gt, hwsp); |
75d0a7f3 CW |
4790 | if (IS_ERR(tl)) { |
4791 | ret = PTR_ERR(tl); | |
4792 | goto error_deref_obj; | |
4793 | } | |
4794 | ||
4795 | ce->timeline = tl; | |
a89d1f92 CW |
4796 | } |
4797 | ||
75d0a7f3 | 4798 | ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); |
dca33ecc CW |
4799 | if (IS_ERR(ring)) { |
4800 | ret = PTR_ERR(ring); | |
e84fe803 | 4801 | goto error_deref_obj; |
8670d6f9 OM |
4802 | } |
4803 | ||
b146e5ef | 4804 | ret = populate_lr_context(ce, ctx_obj, engine, ring); |
8670d6f9 OM |
4805 | if (ret) { |
4806 | DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); | |
dca33ecc | 4807 | goto error_ring_free; |
84c2377f OM |
4808 | } |
4809 | ||
dca33ecc | 4810 | ce->ring = ring; |
bf3783e5 | 4811 | ce->state = vma; |
ede7d42b OM |
4812 | |
4813 | return 0; | |
8670d6f9 | 4814 | |
dca33ecc | 4815 | error_ring_free: |
65baf0ef | 4816 | intel_ring_put(ring); |
e84fe803 | 4817 | error_deref_obj: |
f8c417cd | 4818 | i915_gem_object_put(ctx_obj); |
8670d6f9 | 4819 | return ret; |
ede7d42b | 4820 | } |
3e5b6f05 | 4821 | |
422d7df4 CW |
4822 | static struct list_head *virtual_queue(struct virtual_engine *ve) |
4823 | { | |
4824 | return &ve->base.execlists.default_priolist.requests[0]; | |
4825 | } | |
4826 | ||
6d06779e CW |
4827 | static void virtual_context_destroy(struct kref *kref) |
4828 | { | |
4829 | struct virtual_engine *ve = | |
4830 | container_of(kref, typeof(*ve), context.ref); | |
4831 | unsigned int n; | |
4832 | ||
422d7df4 | 4833 | GEM_BUG_ON(!list_empty(virtual_queue(ve))); |
6d06779e | 4834 | GEM_BUG_ON(ve->request); |
754f7a0b | 4835 | GEM_BUG_ON(ve->context.inflight); |
6d06779e CW |
4836 | |
4837 | for (n = 0; n < ve->num_siblings; n++) { | |
4838 | struct intel_engine_cs *sibling = ve->siblings[n]; | |
4839 | struct rb_node *node = &ve->nodes[sibling->id].rb; | |
6f7ac828 | 4840 | unsigned long flags; |
6d06779e CW |
4841 | |
4842 | if (RB_EMPTY_NODE(node)) | |
4843 | continue; | |
4844 | ||
6f7ac828 | 4845 | spin_lock_irqsave(&sibling->active.lock, flags); |
6d06779e CW |
4846 | |
4847 | /* Detachment is lazily performed in the execlists tasklet */ | |
4848 | if (!RB_EMPTY_NODE(node)) | |
4849 | rb_erase_cached(node, &sibling->execlists.virtual); | |
4850 | ||
6f7ac828 | 4851 | spin_unlock_irqrestore(&sibling->active.lock, flags); |
6d06779e CW |
4852 | } |
4853 | GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); | |
4854 | ||
4855 | if (ve->context.state) | |
4856 | __execlists_context_fini(&ve->context); | |
df8cf31e | 4857 | intel_context_fini(&ve->context); |
6d06779e | 4858 | |
ee113690 | 4859 | kfree(ve->bonds); |
6d06779e CW |
4860 | kfree(ve); |
4861 | } | |
4862 | ||
4863 | static void virtual_engine_initial_hint(struct virtual_engine *ve) | |
4864 | { | |
4865 | int swp; | |
4866 | ||
4867 | /* | |
4868 | * Pick a random sibling on starting to help spread the load around. | |
4869 | * | |
4870 | * New contexts are typically created with exactly the same order | |
4871 | * of siblings, and often started in batches. Due to the way we iterate | |
4872 | * the array of sibling when submitting requests, sibling[0] is | |
4873 | * prioritised for dequeuing. If we make sure that sibling[0] is fairly | |
4874 | * randomised across the system, we also help spread the load by the | |
4875 | * first engine we inspect being different each time. | |
4876 | * | |
4877 | * NB This does not force us to execute on this engine, it will just | |
4878 | * typically be the first we inspect for submission. | |
4879 | */ | |
4880 | swp = prandom_u32_max(ve->num_siblings); | |
4881 | if (!swp) | |
4882 | return; | |
4883 | ||
4884 | swap(ve->siblings[swp], ve->siblings[0]); | |
cdb736fa MK |
4885 | if (!intel_engine_has_relative_mmio(ve->siblings[0])) |
4886 | virtual_update_register_offsets(ve->context.lrc_reg_state, | |
4887 | ve->siblings[0]); | |
6d06779e CW |
4888 | } |
4889 | ||
d5e19353 CW |
4890 | static int virtual_context_alloc(struct intel_context *ce) |
4891 | { | |
4892 | struct virtual_engine *ve = container_of(ce, typeof(*ve), context); | |
4893 | ||
4894 | return __execlists_context_alloc(ce, ve->siblings[0]); | |
4895 | } | |
4896 | ||
6d06779e CW |
4897 | static int virtual_context_pin(struct intel_context *ce) |
4898 | { | |
4899 | struct virtual_engine *ve = container_of(ce, typeof(*ve), context); | |
4900 | int err; | |
4901 | ||
4902 | /* Note: we must use a real engine class for setting up reg state */ | |
4903 | err = __execlists_context_pin(ce, ve->siblings[0]); | |
4904 | if (err) | |
4905 | return err; | |
4906 | ||
4907 | virtual_engine_initial_hint(ve); | |
4908 | return 0; | |
4909 | } | |
4910 | ||
4911 | static void virtual_context_enter(struct intel_context *ce) | |
4912 | { | |
4913 | struct virtual_engine *ve = container_of(ce, typeof(*ve), context); | |
4914 | unsigned int n; | |
4915 | ||
4916 | for (n = 0; n < ve->num_siblings; n++) | |
4917 | intel_engine_pm_get(ve->siblings[n]); | |
531958f6 CW |
4918 | |
4919 | intel_timeline_enter(ce->timeline); | |
6d06779e CW |
4920 | } |
4921 | ||
4922 | static void virtual_context_exit(struct intel_context *ce) | |
4923 | { | |
4924 | struct virtual_engine *ve = container_of(ce, typeof(*ve), context); | |
4925 | unsigned int n; | |
4926 | ||
531958f6 CW |
4927 | intel_timeline_exit(ce->timeline); |
4928 | ||
6d06779e CW |
4929 | for (n = 0; n < ve->num_siblings; n++) |
4930 | intel_engine_pm_put(ve->siblings[n]); | |
4931 | } | |
4932 | ||
4933 | static const struct intel_context_ops virtual_context_ops = { | |
d5e19353 CW |
4934 | .alloc = virtual_context_alloc, |
4935 | ||
6d06779e CW |
4936 | .pin = virtual_context_pin, |
4937 | .unpin = execlists_context_unpin, | |
4938 | ||
4939 | .enter = virtual_context_enter, | |
4940 | .exit = virtual_context_exit, | |
4941 | ||
4942 | .destroy = virtual_context_destroy, | |
4943 | }; | |
4944 | ||
78e41ddd CW |
4945 | static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) |
4946 | { | |
4947 | struct i915_request *rq; | |
4948 | intel_engine_mask_t mask; | |
4949 | ||
4950 | rq = READ_ONCE(ve->request); | |
4951 | if (!rq) | |
4952 | return 0; | |
4953 | ||
4954 | /* The rq is ready for submission; rq->execution_mask is now stable. */ | |
4955 | mask = rq->execution_mask; | |
4956 | if (unlikely(!mask)) { | |
4957 | /* Invalid selection, submit to a random engine in error */ | |
36e191f0 | 4958 | i915_request_set_error_once(rq, -ENODEV); |
78e41ddd CW |
4959 | mask = ve->siblings[0]->mask; |
4960 | } | |
4961 | ||
639f2f24 VSD |
4962 | ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", |
4963 | rq->fence.context, rq->fence.seqno, | |
4964 | mask, ve->base.execlists.queue_priority_hint); | |
78e41ddd CW |
4965 | |
4966 | return mask; | |
4967 | } | |
4968 | ||
6d06779e CW |
4969 | static void virtual_submission_tasklet(unsigned long data) |
4970 | { | |
4971 | struct virtual_engine * const ve = (struct virtual_engine *)data; | |
3a55dc89 | 4972 | const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); |
78e41ddd | 4973 | intel_engine_mask_t mask; |
6d06779e CW |
4974 | unsigned int n; |
4975 | ||
78e41ddd CW |
4976 | rcu_read_lock(); |
4977 | mask = virtual_submission_mask(ve); | |
4978 | rcu_read_unlock(); | |
4979 | if (unlikely(!mask)) | |
4980 | return; | |
4981 | ||
6d06779e CW |
4982 | local_irq_disable(); |
4983 | for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) { | |
4984 | struct intel_engine_cs *sibling = ve->siblings[n]; | |
4985 | struct ve_node * const node = &ve->nodes[sibling->id]; | |
4986 | struct rb_node **parent, *rb; | |
4987 | bool first; | |
4988 | ||
78e41ddd CW |
4989 | if (unlikely(!(mask & sibling->mask))) { |
4990 | if (!RB_EMPTY_NODE(&node->rb)) { | |
422d7df4 | 4991 | spin_lock(&sibling->active.lock); |
78e41ddd CW |
4992 | rb_erase_cached(&node->rb, |
4993 | &sibling->execlists.virtual); | |
4994 | RB_CLEAR_NODE(&node->rb); | |
422d7df4 | 4995 | spin_unlock(&sibling->active.lock); |
78e41ddd CW |
4996 | } |
4997 | continue; | |
4998 | } | |
4999 | ||
422d7df4 | 5000 | spin_lock(&sibling->active.lock); |
6d06779e CW |
5001 | |
5002 | if (!RB_EMPTY_NODE(&node->rb)) { | |
5003 | /* | |
5004 | * Cheat and avoid rebalancing the tree if we can | |
5005 | * reuse this node in situ. | |
5006 | */ | |
5007 | first = rb_first_cached(&sibling->execlists.virtual) == | |
5008 | &node->rb; | |
5009 | if (prio == node->prio || (prio > node->prio && first)) | |
5010 | goto submit_engine; | |
5011 | ||
5012 | rb_erase_cached(&node->rb, &sibling->execlists.virtual); | |
5013 | } | |
5014 | ||
5015 | rb = NULL; | |
5016 | first = true; | |
5017 | parent = &sibling->execlists.virtual.rb_root.rb_node; | |
5018 | while (*parent) { | |
5019 | struct ve_node *other; | |
5020 | ||
5021 | rb = *parent; | |
5022 | other = rb_entry(rb, typeof(*other), rb); | |
5023 | if (prio > other->prio) { | |
5024 | parent = &rb->rb_left; | |
5025 | } else { | |
5026 | parent = &rb->rb_right; | |
5027 | first = false; | |
5028 | } | |
5029 | } | |
5030 | ||
5031 | rb_link_node(&node->rb, rb, parent); | |
5032 | rb_insert_color_cached(&node->rb, | |
5033 | &sibling->execlists.virtual, | |
5034 | first); | |
5035 | ||
5036 | submit_engine: | |
5037 | GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); | |
5038 | node->prio = prio; | |
5039 | if (first && prio > sibling->execlists.queue_priority_hint) { | |
5040 | sibling->execlists.queue_priority_hint = prio; | |
5041 | tasklet_hi_schedule(&sibling->execlists.tasklet); | |
5042 | } | |
5043 | ||
422d7df4 | 5044 | spin_unlock(&sibling->active.lock); |
6d06779e CW |
5045 | } |
5046 | local_irq_enable(); | |
5047 | } | |
5048 | ||
5049 | static void virtual_submit_request(struct i915_request *rq) | |
5050 | { | |
5051 | struct virtual_engine *ve = to_virtual_engine(rq->engine); | |
b647c7df CW |
5052 | struct i915_request *old; |
5053 | unsigned long flags; | |
6d06779e | 5054 | |
639f2f24 VSD |
5055 | ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", |
5056 | rq->fence.context, | |
5057 | rq->fence.seqno); | |
6d06779e CW |
5058 | |
5059 | GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); | |
5060 | ||
b647c7df CW |
5061 | spin_lock_irqsave(&ve->base.active.lock, flags); |
5062 | ||
5063 | old = ve->request; | |
5064 | if (old) { /* background completion event from preempt-to-busy */ | |
5065 | GEM_BUG_ON(!i915_request_completed(old)); | |
5066 | __i915_request_submit(old); | |
5067 | i915_request_put(old); | |
5068 | } | |
422d7df4 | 5069 | |
b647c7df CW |
5070 | if (i915_request_completed(rq)) { |
5071 | __i915_request_submit(rq); | |
6d06779e | 5072 | |
b647c7df CW |
5073 | ve->base.execlists.queue_priority_hint = INT_MIN; |
5074 | ve->request = NULL; | |
5075 | } else { | |
5076 | ve->base.execlists.queue_priority_hint = rq_prio(rq); | |
5077 | ve->request = i915_request_get(rq); | |
5078 | ||
5079 | GEM_BUG_ON(!list_empty(virtual_queue(ve))); | |
5080 | list_move_tail(&rq->sched.link, virtual_queue(ve)); | |
5081 | ||
5082 | tasklet_schedule(&ve->base.execlists.tasklet); | |
5083 | } | |
422d7df4 | 5084 | |
b647c7df | 5085 | spin_unlock_irqrestore(&ve->base.active.lock, flags); |
6d06779e CW |
5086 | } |
5087 | ||
ee113690 CW |
5088 | static struct ve_bond * |
5089 | virtual_find_bond(struct virtual_engine *ve, | |
5090 | const struct intel_engine_cs *master) | |
5091 | { | |
5092 | int i; | |
5093 | ||
5094 | for (i = 0; i < ve->num_bonds; i++) { | |
5095 | if (ve->bonds[i].master == master) | |
5096 | return &ve->bonds[i]; | |
5097 | } | |
5098 | ||
5099 | return NULL; | |
5100 | } | |
5101 | ||
5102 | static void | |
5103 | virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) | |
5104 | { | |
5105 | struct virtual_engine *ve = to_virtual_engine(rq->engine); | |
e2144503 | 5106 | intel_engine_mask_t allowed, exec; |
ee113690 CW |
5107 | struct ve_bond *bond; |
5108 | ||
e2144503 CW |
5109 | allowed = ~to_request(signal)->engine->mask; |
5110 | ||
ee113690 | 5111 | bond = virtual_find_bond(ve, to_request(signal)->engine); |
e2144503 CW |
5112 | if (bond) |
5113 | allowed &= bond->sibling_mask; | |
5114 | ||
5115 | /* Restrict the bonded request to run on only the available engines */ | |
5116 | exec = READ_ONCE(rq->execution_mask); | |
5117 | while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) | |
5118 | ; | |
5119 | ||
5120 | /* Prevent the master from being re-run on the bonded engines */ | |
5121 | to_request(signal)->execution_mask &= ~allowed; | |
ee113690 CW |
5122 | } |
5123 | ||
6d06779e | 5124 | struct intel_context * |
e6ba7648 | 5125 | intel_execlists_create_virtual(struct intel_engine_cs **siblings, |
6d06779e CW |
5126 | unsigned int count) |
5127 | { | |
5128 | struct virtual_engine *ve; | |
5129 | unsigned int n; | |
5130 | int err; | |
5131 | ||
5132 | if (count == 0) | |
5133 | return ERR_PTR(-EINVAL); | |
5134 | ||
5135 | if (count == 1) | |
e6ba7648 | 5136 | return intel_context_create(siblings[0]); |
6d06779e CW |
5137 | |
5138 | ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); | |
5139 | if (!ve) | |
5140 | return ERR_PTR(-ENOMEM); | |
5141 | ||
e6ba7648 | 5142 | ve->base.i915 = siblings[0]->i915; |
f937f561 | 5143 | ve->base.gt = siblings[0]->gt; |
20af04f3 | 5144 | ve->base.uncore = siblings[0]->uncore; |
6d06779e | 5145 | ve->base.id = -1; |
f75fc37b | 5146 | |
6d06779e CW |
5147 | ve->base.class = OTHER_CLASS; |
5148 | ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; | |
5149 | ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; | |
f75fc37b | 5150 | ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; |
6d06779e | 5151 | |
44d89409 CW |
5152 | /* |
5153 | * The decision on whether to submit a request using semaphores | |
5154 | * depends on the saturated state of the engine. We only compute | |
5155 | * this during HW submission of the request, and we need for this | |
5156 | * state to be globally applied to all requests being submitted | |
5157 | * to this engine. Virtual engines encompass more than one physical | |
5158 | * engine and so we cannot accurately tell in advance if one of those | |
5159 | * engines is already saturated and so cannot afford to use a semaphore | |
5160 | * and be pessimized in priority for doing so -- if we are the only | |
5161 | * context using semaphores after all other clients have stopped, we | |
5162 | * will be starved on the saturated system. Such a global switch for | |
5163 | * semaphores is less than ideal, but alas is the current compromise. | |
5164 | */ | |
5165 | ve->base.saturated = ALL_ENGINES; | |
5166 | ||
6d06779e CW |
5167 | snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); |
5168 | ||
422d7df4 | 5169 | intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); |
f8db4d05 | 5170 | intel_engine_init_breadcrumbs(&ve->base); |
6d06779e CW |
5171 | intel_engine_init_execlists(&ve->base); |
5172 | ||
5173 | ve->base.cops = &virtual_context_ops; | |
5174 | ve->base.request_alloc = execlists_request_alloc; | |
5175 | ||
5176 | ve->base.schedule = i915_schedule; | |
5177 | ve->base.submit_request = virtual_submit_request; | |
ee113690 | 5178 | ve->base.bond_execute = virtual_bond_execute; |
6d06779e | 5179 | |
422d7df4 | 5180 | INIT_LIST_HEAD(virtual_queue(ve)); |
6d06779e CW |
5181 | ve->base.execlists.queue_priority_hint = INT_MIN; |
5182 | tasklet_init(&ve->base.execlists.tasklet, | |
5183 | virtual_submission_tasklet, | |
5184 | (unsigned long)ve); | |
5185 | ||
e6ba7648 | 5186 | intel_context_init(&ve->context, &ve->base); |
6d06779e CW |
5187 | |
5188 | for (n = 0; n < count; n++) { | |
5189 | struct intel_engine_cs *sibling = siblings[n]; | |
5190 | ||
5191 | GEM_BUG_ON(!is_power_of_2(sibling->mask)); | |
5192 | if (sibling->mask & ve->base.mask) { | |
5193 | DRM_DEBUG("duplicate %s entry in load balancer\n", | |
5194 | sibling->name); | |
5195 | err = -EINVAL; | |
5196 | goto err_put; | |
5197 | } | |
5198 | ||
5199 | /* | |
5200 | * The virtual engine implementation is tightly coupled to | |
5201 | * the execlists backend -- we push out request directly | |
5202 | * into a tree inside each physical engine. We could support | |
5203 | * layering if we handle cloning of the requests and | |
5204 | * submitting a copy into each backend. | |
5205 | */ | |
5206 | if (sibling->execlists.tasklet.func != | |
5207 | execlists_submission_tasklet) { | |
5208 | err = -ENODEV; | |
5209 | goto err_put; | |
5210 | } | |
5211 | ||
5212 | GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); | |
5213 | RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); | |
5214 | ||
5215 | ve->siblings[ve->num_siblings++] = sibling; | |
5216 | ve->base.mask |= sibling->mask; | |
5217 | ||
5218 | /* | |
5219 | * All physical engines must be compatible for their emission | |
5220 | * functions (as we build the instructions during request | |
5221 | * construction and do not alter them before submission | |
5222 | * on the physical engine). We use the engine class as a guide | |
5223 | * here, although that could be refined. | |
5224 | */ | |
5225 | if (ve->base.class != OTHER_CLASS) { | |
5226 | if (ve->base.class != sibling->class) { | |
5227 | DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", | |
5228 | sibling->class, ve->base.class); | |
5229 | err = -EINVAL; | |
5230 | goto err_put; | |
5231 | } | |
5232 | continue; | |
5233 | } | |
5234 | ||
5235 | ve->base.class = sibling->class; | |
5236 | ve->base.uabi_class = sibling->uabi_class; | |
5237 | snprintf(ve->base.name, sizeof(ve->base.name), | |
5238 | "v%dx%d", ve->base.class, count); | |
5239 | ve->base.context_size = sibling->context_size; | |
5240 | ||
5241 | ve->base.emit_bb_start = sibling->emit_bb_start; | |
5242 | ve->base.emit_flush = sibling->emit_flush; | |
5243 | ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; | |
5244 | ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; | |
5245 | ve->base.emit_fini_breadcrumb_dw = | |
5246 | sibling->emit_fini_breadcrumb_dw; | |
09975b86 CW |
5247 | |
5248 | ve->base.flags = sibling->flags; | |
6d06779e CW |
5249 | } |
5250 | ||
09975b86 CW |
5251 | ve->base.flags |= I915_ENGINE_IS_VIRTUAL; |
5252 | ||
6d06779e CW |
5253 | return &ve->context; |
5254 | ||
5255 | err_put: | |
5256 | intel_context_put(&ve->context); | |
5257 | return ERR_PTR(err); | |
5258 | } | |
5259 | ||
5260 | struct intel_context * | |
e6ba7648 | 5261 | intel_execlists_clone_virtual(struct intel_engine_cs *src) |
6d06779e CW |
5262 | { |
5263 | struct virtual_engine *se = to_virtual_engine(src); | |
5264 | struct intel_context *dst; | |
5265 | ||
e6ba7648 | 5266 | dst = intel_execlists_create_virtual(se->siblings, |
6d06779e CW |
5267 | se->num_siblings); |
5268 | if (IS_ERR(dst)) | |
5269 | return dst; | |
5270 | ||
ee113690 CW |
5271 | if (se->num_bonds) { |
5272 | struct virtual_engine *de = to_virtual_engine(dst->engine); | |
5273 | ||
5274 | de->bonds = kmemdup(se->bonds, | |
5275 | sizeof(*se->bonds) * se->num_bonds, | |
5276 | GFP_KERNEL); | |
5277 | if (!de->bonds) { | |
5278 | intel_context_put(dst); | |
5279 | return ERR_PTR(-ENOMEM); | |
5280 | } | |
5281 | ||
5282 | de->num_bonds = se->num_bonds; | |
5283 | } | |
5284 | ||
6d06779e CW |
5285 | return dst; |
5286 | } | |
5287 | ||
ee113690 CW |
5288 | int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, |
5289 | const struct intel_engine_cs *master, | |
5290 | const struct intel_engine_cs *sibling) | |
5291 | { | |
5292 | struct virtual_engine *ve = to_virtual_engine(engine); | |
5293 | struct ve_bond *bond; | |
5294 | int n; | |
5295 | ||
5296 | /* Sanity check the sibling is part of the virtual engine */ | |
5297 | for (n = 0; n < ve->num_siblings; n++) | |
5298 | if (sibling == ve->siblings[n]) | |
5299 | break; | |
5300 | if (n == ve->num_siblings) | |
5301 | return -EINVAL; | |
5302 | ||
5303 | bond = virtual_find_bond(ve, master); | |
5304 | if (bond) { | |
5305 | bond->sibling_mask |= sibling->mask; | |
5306 | return 0; | |
5307 | } | |
5308 | ||
5309 | bond = krealloc(ve->bonds, | |
5310 | sizeof(*bond) * (ve->num_bonds + 1), | |
5311 | GFP_KERNEL); | |
5312 | if (!bond) | |
5313 | return -ENOMEM; | |
5314 | ||
5315 | bond[ve->num_bonds].master = master; | |
5316 | bond[ve->num_bonds].sibling_mask = sibling->mask; | |
5317 | ||
5318 | ve->bonds = bond; | |
5319 | ve->num_bonds++; | |
5320 | ||
5321 | return 0; | |
5322 | } | |
5323 | ||
cccdce1d CW |
5324 | struct intel_engine_cs * |
5325 | intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, | |
5326 | unsigned int sibling) | |
5327 | { | |
5328 | struct virtual_engine *ve = to_virtual_engine(engine); | |
5329 | ||
5330 | if (sibling >= ve->num_siblings) | |
5331 | return NULL; | |
5332 | ||
5333 | return ve->siblings[sibling]; | |
5334 | } | |
5335 | ||
0212bdef CW |
5336 | void intel_execlists_show_requests(struct intel_engine_cs *engine, |
5337 | struct drm_printer *m, | |
5338 | void (*show_request)(struct drm_printer *m, | |
5339 | struct i915_request *rq, | |
5340 | const char *prefix), | |
5341 | unsigned int max) | |
5342 | { | |
5343 | const struct intel_engine_execlists *execlists = &engine->execlists; | |
5344 | struct i915_request *rq, *last; | |
5345 | unsigned long flags; | |
5346 | unsigned int count; | |
5347 | struct rb_node *rb; | |
5348 | ||
422d7df4 | 5349 | spin_lock_irqsave(&engine->active.lock, flags); |
0212bdef CW |
5350 | |
5351 | last = NULL; | |
5352 | count = 0; | |
422d7df4 | 5353 | list_for_each_entry(rq, &engine->active.requests, sched.link) { |
0212bdef CW |
5354 | if (count++ < max - 1) |
5355 | show_request(m, rq, "\t\tE "); | |
5356 | else | |
5357 | last = rq; | |
5358 | } | |
5359 | if (last) { | |
5360 | if (count > max) { | |
5361 | drm_printf(m, | |
5362 | "\t\t...skipping %d executing requests...\n", | |
5363 | count - max); | |
5364 | } | |
5365 | show_request(m, last, "\t\tE "); | |
5366 | } | |
5367 | ||
81dcef4c CW |
5368 | if (execlists->switch_priority_hint != INT_MIN) |
5369 | drm_printf(m, "\t\tSwitch priority hint: %d\n", | |
23a44ae9 | 5370 | READ_ONCE(execlists->switch_priority_hint)); |
4d97cbe0 CW |
5371 | if (execlists->queue_priority_hint != INT_MIN) |
5372 | drm_printf(m, "\t\tQueue priority hint: %d\n", | |
23a44ae9 | 5373 | READ_ONCE(execlists->queue_priority_hint)); |
81dcef4c CW |
5374 | |
5375 | last = NULL; | |
5376 | count = 0; | |
0212bdef CW |
5377 | for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { |
5378 | struct i915_priolist *p = rb_entry(rb, typeof(*p), node); | |
5379 | int i; | |
5380 | ||
5381 | priolist_for_each_request(rq, p, i) { | |
5382 | if (count++ < max - 1) | |
5383 | show_request(m, rq, "\t\tQ "); | |
5384 | else | |
5385 | last = rq; | |
5386 | } | |
5387 | } | |
5388 | if (last) { | |
5389 | if (count > max) { | |
5390 | drm_printf(m, | |
5391 | "\t\t...skipping %d queued requests...\n", | |
5392 | count - max); | |
5393 | } | |
5394 | show_request(m, last, "\t\tQ "); | |
5395 | } | |
5396 | ||
6d06779e CW |
5397 | last = NULL; |
5398 | count = 0; | |
5399 | for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { | |
5400 | struct virtual_engine *ve = | |
5401 | rb_entry(rb, typeof(*ve), nodes[engine->id].rb); | |
5402 | struct i915_request *rq = READ_ONCE(ve->request); | |
5403 | ||
5404 | if (rq) { | |
5405 | if (count++ < max - 1) | |
5406 | show_request(m, rq, "\t\tV "); | |
5407 | else | |
5408 | last = rq; | |
5409 | } | |
5410 | } | |
5411 | if (last) { | |
5412 | if (count > max) { | |
5413 | drm_printf(m, | |
5414 | "\t\t...skipping %d virtual requests...\n", | |
5415 | count - max); | |
5416 | } | |
5417 | show_request(m, last, "\t\tV "); | |
5418 | } | |
5419 | ||
422d7df4 | 5420 | spin_unlock_irqrestore(&engine->active.lock, flags); |
0212bdef CW |
5421 | } |
5422 | ||
292ad25c CW |
5423 | void intel_lr_context_reset(struct intel_engine_cs *engine, |
5424 | struct intel_context *ce, | |
5425 | u32 head, | |
5426 | bool scrub) | |
5427 | { | |
dffa8feb | 5428 | GEM_BUG_ON(!intel_context_is_pinned(ce)); |
dffa8feb | 5429 | |
292ad25c CW |
5430 | /* |
5431 | * We want a simple context + ring to execute the breadcrumb update. | |
5432 | * We cannot rely on the context being intact across the GPU hang, | |
5433 | * so clear it and rebuild just what we need for the breadcrumb. | |
5434 | * All pending requests for this context will be zapped, and any | |
5435 | * future request will be after userspace has had the opportunity | |
5436 | * to recreate its own state. | |
5437 | */ | |
d12acee8 CW |
5438 | if (scrub) |
5439 | restore_default_state(ce, engine); | |
292ad25c CW |
5440 | |
5441 | /* Rerun the request; its payload has been neutered (if guilty). */ | |
42827350 | 5442 | __execlists_update_reg_state(ce, engine, head); |
292ad25c CW |
5443 | } |
5444 | ||
19c17b76 MW |
5445 | bool |
5446 | intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) | |
5447 | { | |
5448 | return engine->set_default_submission == | |
5449 | intel_execlists_set_default_submission; | |
5450 | } | |
5451 | ||
2c66555e | 5452 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
112ed2d3 | 5453 | #include "selftest_lrc.c" |
2c66555e | 5454 | #endif |