]> git.ipfire.org Git - thirdparty/linux.git/blob - drivers/gpu/drm/i915/gt/intel_lrc.c
io_uring: reset -EBUSY error when io sq thread is waken up
[thirdparty/linux.git] / drivers / gpu / drm / i915 / gt / intel_lrc.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Ben Widawsky <ben@bwidawsk.net>
25 * Michel Thierry <michel.thierry@intel.com>
26 * Thomas Daniel <thomas.daniel@intel.com>
27 * Oscar Mateo <oscar.mateo@intel.com>
28 *
29 */
30
31 /**
32 * DOC: Logical Rings, Logical Ring Contexts and Execlists
33 *
34 * Motivation:
35 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36 * These expanded contexts enable a number of new abilities, especially
37 * "Execlists" (also implemented in this file).
38 *
39 * One of the main differences with the legacy HW contexts is that logical
40 * ring contexts incorporate many more things to the context's state, like
41 * PDPs or ringbuffer control registers:
42 *
43 * The reason why PDPs are included in the context is straightforward: as
44 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46 * instead, the GPU will do it for you on the context switch.
47 *
48 * But, what about the ringbuffer control registers (head, tail, etc..)?
49 * shouldn't we just need a set of those per engine command streamer? This is
50 * where the name "Logical Rings" starts to make sense: by virtualizing the
51 * rings, the engine cs shifts to a new "ring buffer" with every context
52 * switch. When you want to submit a workload to the GPU you: A) choose your
53 * context, B) find its appropriate virtualized ring, C) write commands to it
54 * and then, finally, D) tell the GPU to switch to that context.
55 *
56 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57 * to a contexts is via a context execution list, ergo "Execlists".
58 *
59 * LRC implementation:
60 * Regarding the creation of contexts, we have:
61 *
62 * - One global default context.
63 * - One local default context for each opened fd.
64 * - One local extra context for each context create ioctl call.
65 *
66 * Now that ringbuffers belong per-context (and not per-engine, like before)
67 * and that contexts are uniquely tied to a given engine (and not reusable,
68 * like before) we need:
69 *
70 * - One ringbuffer per-engine inside each context.
71 * - One backing object per-engine inside each context.
72 *
73 * The global default context starts its life with these new objects fully
74 * allocated and populated. The local default context for each opened fd is
75 * more complex, because we don't know at creation time which engine is going
76 * to use them. To handle this, we have implemented a deferred creation of LR
77 * contexts:
78 *
79 * The local context starts its life as a hollow or blank holder, that only
80 * gets populated for a given engine once we receive an execbuffer. If later
81 * on we receive another execbuffer ioctl for the same context but a different
82 * engine, we allocate/populate a new ringbuffer and context backing object and
83 * so on.
84 *
85 * Finally, regarding local contexts created using the ioctl call: as they are
86 * only allowed with the render ring, we can allocate & populate them right
87 * away (no need to defer anything, at least for now).
88 *
89 * Execlists implementation:
90 * Execlists are the new method by which, on gen8+ hardware, workloads are
91 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92 * This method works as follows:
93 *
94 * When a request is committed, its commands (the BB start and any leading or
95 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96 * for the appropriate context. The tail pointer in the hardware context is not
97 * updated at this time, but instead, kept by the driver in the ringbuffer
98 * structure. A structure representing this request is added to a request queue
99 * for the appropriate engine: this structure contains a copy of the context's
100 * tail after the request was written to the ring buffer and a pointer to the
101 * context itself.
102 *
103 * If the engine's request queue was empty before the request was added, the
104 * queue is processed immediately. Otherwise the queue will be processed during
105 * a context switch interrupt. In any case, elements on the queue will get sent
106 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107 * globally unique 20-bits submission ID.
108 *
109 * When execution of a request completes, the GPU updates the context status
110 * buffer with a context complete event and generates a context switch interrupt.
111 * During the interrupt handling, the driver examines the events in the buffer:
112 * for each context complete event, if the announced ID matches that on the head
113 * of the request queue, then that request is retired and removed from the queue.
114 *
115 * After processing, if any requests were retired and the queue is not empty
116 * then a new execution list can be submitted. The two requests at the front of
117 * the queue are next to be submitted but since a context may not occur twice in
118 * an execution list, if subsequent requests have the same ID as the first then
119 * the two requests must be combined. This is done simply by discarding requests
120 * at the head of the queue until either only one requests is left (in which case
121 * we use a NULL second context) or the first two requests have unique IDs.
122 *
123 * By always executing the first two requests in the queue the driver ensures
124 * that the GPU is kept as busy as possible. In the case where a single context
125 * completes but a second context is still executing, the request for this second
126 * context will be at the head of the queue when we remove the first one. This
127 * request will then be resubmitted along with a new request for a different context,
128 * which will cause the hardware to continue executing the second request and queue
129 * the new request (the GPU detects the condition of a context getting preempted
130 * with the same context and optimizes the context switch flow by not doing
131 * preemption, but just sampling the new tail pointer).
132 *
133 */
134 #include <linux/interrupt.h>
135
136 #include "i915_drv.h"
137 #include "i915_perf.h"
138 #include "i915_trace.h"
139 #include "i915_vgpu.h"
140 #include "intel_context.h"
141 #include "intel_engine_pm.h"
142 #include "intel_gt.h"
143 #include "intel_gt_pm.h"
144 #include "intel_gt_requests.h"
145 #include "intel_lrc_reg.h"
146 #include "intel_mocs.h"
147 #include "intel_reset.h"
148 #include "intel_ring.h"
149 #include "intel_workarounds.h"
150
151 #define RING_EXECLIST_QFULL (1 << 0x2)
152 #define RING_EXECLIST1_VALID (1 << 0x3)
153 #define RING_EXECLIST0_VALID (1 << 0x4)
154 #define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE)
155 #define RING_EXECLIST1_ACTIVE (1 << 0x11)
156 #define RING_EXECLIST0_ACTIVE (1 << 0x12)
157
158 #define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0)
159 #define GEN8_CTX_STATUS_PREEMPTED (1 << 1)
160 #define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2)
161 #define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
162 #define GEN8_CTX_STATUS_COMPLETE (1 << 4)
163 #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
164
165 #define GEN8_CTX_STATUS_COMPLETED_MASK \
166 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
167
168 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
169
170 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */
171 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
172 #define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15)
173 #define GEN12_IDLE_CTX_ID 0x7FF
174 #define GEN12_CSB_CTX_VALID(csb_dw) \
175 (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
176
177 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
178 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
179
180 struct virtual_engine {
181 struct intel_engine_cs base;
182 struct intel_context context;
183
184 /*
185 * We allow only a single request through the virtual engine at a time
186 * (each request in the timeline waits for the completion fence of
187 * the previous before being submitted). By restricting ourselves to
188 * only submitting a single request, each request is placed on to a
189 * physical to maximise load spreading (by virtue of the late greedy
190 * scheduling -- each real engine takes the next available request
191 * upon idling).
192 */
193 struct i915_request *request;
194
195 /*
196 * We keep a rbtree of available virtual engines inside each physical
197 * engine, sorted by priority. Here we preallocate the nodes we need
198 * for the virtual engine, indexed by physical_engine->id.
199 */
200 struct ve_node {
201 struct rb_node rb;
202 int prio;
203 } nodes[I915_NUM_ENGINES];
204
205 /*
206 * Keep track of bonded pairs -- restrictions upon on our selection
207 * of physical engines any particular request may be submitted to.
208 * If we receive a submit-fence from a master engine, we will only
209 * use one of sibling_mask physical engines.
210 */
211 struct ve_bond {
212 const struct intel_engine_cs *master;
213 intel_engine_mask_t sibling_mask;
214 } *bonds;
215 unsigned int num_bonds;
216
217 /* And finally, which physical engines this virtual engine maps onto. */
218 unsigned int num_siblings;
219 struct intel_engine_cs *siblings[0];
220 };
221
222 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
223 {
224 GEM_BUG_ON(!intel_engine_is_virtual(engine));
225 return container_of(engine, struct virtual_engine, base);
226 }
227
228 static int __execlists_context_alloc(struct intel_context *ce,
229 struct intel_engine_cs *engine);
230
231 static void execlists_init_reg_state(u32 *reg_state,
232 const struct intel_context *ce,
233 const struct intel_engine_cs *engine,
234 const struct intel_ring *ring,
235 bool close);
236 static void
237 __execlists_update_reg_state(const struct intel_context *ce,
238 const struct intel_engine_cs *engine,
239 u32 head);
240
241 static void mark_eio(struct i915_request *rq)
242 {
243 if (i915_request_completed(rq))
244 return;
245
246 GEM_BUG_ON(i915_request_signaled(rq));
247
248 i915_request_set_error_once(rq, -EIO);
249 i915_request_mark_complete(rq);
250 }
251
252 static struct i915_request *
253 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
254 {
255 struct i915_request *active = rq;
256
257 rcu_read_lock();
258 list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
259 if (i915_request_completed(rq))
260 break;
261
262 active = rq;
263 }
264 rcu_read_unlock();
265
266 return active;
267 }
268
269 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
270 {
271 return (i915_ggtt_offset(engine->status_page.vma) +
272 I915_GEM_HWS_PREEMPT_ADDR);
273 }
274
275 static inline void
276 ring_set_paused(const struct intel_engine_cs *engine, int state)
277 {
278 /*
279 * We inspect HWS_PREEMPT with a semaphore inside
280 * engine->emit_fini_breadcrumb. If the dword is true,
281 * the ring is paused as the semaphore will busywait
282 * until the dword is false.
283 */
284 engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
285 if (state)
286 wmb();
287 }
288
289 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
290 {
291 return rb_entry(rb, struct i915_priolist, node);
292 }
293
294 static inline int rq_prio(const struct i915_request *rq)
295 {
296 return READ_ONCE(rq->sched.attr.priority);
297 }
298
299 static int effective_prio(const struct i915_request *rq)
300 {
301 int prio = rq_prio(rq);
302
303 /*
304 * If this request is special and must not be interrupted at any
305 * cost, so be it. Note we are only checking the most recent request
306 * in the context and so may be masking an earlier vip request. It
307 * is hoped that under the conditions where nopreempt is used, this
308 * will not matter (i.e. all requests to that context will be
309 * nopreempt for as long as desired).
310 */
311 if (i915_request_has_nopreempt(rq))
312 prio = I915_PRIORITY_UNPREEMPTABLE;
313
314 /*
315 * On unwinding the active request, we give it a priority bump
316 * if it has completed waiting on any semaphore. If we know that
317 * the request has already started, we can prevent an unwanted
318 * preempt-to-idle cycle by taking that into account now.
319 */
320 if (__i915_request_has_started(rq))
321 prio |= I915_PRIORITY_NOSEMAPHORE;
322
323 /* Restrict mere WAIT boosts from triggering preemption */
324 BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
325 return prio | __NO_PREEMPTION;
326 }
327
328 static int queue_prio(const struct intel_engine_execlists *execlists)
329 {
330 struct i915_priolist *p;
331 struct rb_node *rb;
332
333 rb = rb_first_cached(&execlists->queue);
334 if (!rb)
335 return INT_MIN;
336
337 /*
338 * As the priolist[] are inverted, with the highest priority in [0],
339 * we have to flip the index value to become priority.
340 */
341 p = to_priolist(rb);
342 return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
343 }
344
345 static inline bool need_preempt(const struct intel_engine_cs *engine,
346 const struct i915_request *rq,
347 struct rb_node *rb)
348 {
349 int last_prio;
350
351 if (!intel_engine_has_semaphores(engine))
352 return false;
353
354 /*
355 * Check if the current priority hint merits a preemption attempt.
356 *
357 * We record the highest value priority we saw during rescheduling
358 * prior to this dequeue, therefore we know that if it is strictly
359 * less than the current tail of ESLP[0], we do not need to force
360 * a preempt-to-idle cycle.
361 *
362 * However, the priority hint is a mere hint that we may need to
363 * preempt. If that hint is stale or we may be trying to preempt
364 * ourselves, ignore the request.
365 *
366 * More naturally we would write
367 * prio >= max(0, last);
368 * except that we wish to prevent triggering preemption at the same
369 * priority level: the task that is running should remain running
370 * to preserve FIFO ordering of dependencies.
371 */
372 last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
373 if (engine->execlists.queue_priority_hint <= last_prio)
374 return false;
375
376 /*
377 * Check against the first request in ELSP[1], it will, thanks to the
378 * power of PI, be the highest priority of that context.
379 */
380 if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
381 rq_prio(list_next_entry(rq, sched.link)) > last_prio)
382 return true;
383
384 if (rb) {
385 struct virtual_engine *ve =
386 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
387 bool preempt = false;
388
389 if (engine == ve->siblings[0]) { /* only preempt one sibling */
390 struct i915_request *next;
391
392 rcu_read_lock();
393 next = READ_ONCE(ve->request);
394 if (next)
395 preempt = rq_prio(next) > last_prio;
396 rcu_read_unlock();
397 }
398
399 if (preempt)
400 return preempt;
401 }
402
403 /*
404 * If the inflight context did not trigger the preemption, then maybe
405 * it was the set of queued requests? Pick the highest priority in
406 * the queue (the first active priolist) and see if it deserves to be
407 * running instead of ELSP[0].
408 *
409 * The highest priority request in the queue can not be either
410 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
411 * context, it's priority would not exceed ELSP[0] aka last_prio.
412 */
413 return queue_prio(&engine->execlists) > last_prio;
414 }
415
416 __maybe_unused static inline bool
417 assert_priority_queue(const struct i915_request *prev,
418 const struct i915_request *next)
419 {
420 /*
421 * Without preemption, the prev may refer to the still active element
422 * which we refuse to let go.
423 *
424 * Even with preemption, there are times when we think it is better not
425 * to preempt and leave an ostensibly lower priority request in flight.
426 */
427 if (i915_request_is_active(prev))
428 return true;
429
430 return rq_prio(prev) >= rq_prio(next);
431 }
432
433 /*
434 * The context descriptor encodes various attributes of a context,
435 * including its GTT address and some flags. Because it's fairly
436 * expensive to calculate, we'll just do it once and cache the result,
437 * which remains valid until the context is unpinned.
438 *
439 * This is what a descriptor looks like, from LSB to MSB::
440 *
441 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
442 * bits 12-31: LRCA, GTT address of (the HWSP of) this context
443 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
444 * bits 53-54: mbz, reserved for use by hardware
445 * bits 55-63: group ID, currently unused and set to 0
446 *
447 * Starting from Gen11, the upper dword of the descriptor has a new format:
448 *
449 * bits 32-36: reserved
450 * bits 37-47: SW context ID
451 * bits 48:53: engine instance
452 * bit 54: mbz, reserved for use by hardware
453 * bits 55-60: SW counter
454 * bits 61-63: engine class
455 *
456 * engine info, SW context ID and SW counter need to form a unique number
457 * (Context ID) per lrc.
458 */
459 static u64
460 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
461 {
462 u64 desc;
463
464 desc = INTEL_LEGACY_32B_CONTEXT;
465 if (i915_vm_is_4lvl(ce->vm))
466 desc = INTEL_LEGACY_64B_CONTEXT;
467 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
468
469 desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
470 if (IS_GEN(engine->i915, 8))
471 desc |= GEN8_CTX_L3LLC_COHERENT;
472
473 desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
474 /*
475 * The following 32bits are copied into the OA reports (dword 2).
476 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
477 * anything below.
478 */
479 if (INTEL_GEN(engine->i915) >= 11) {
480 desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
481 /* bits 48-53 */
482
483 desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
484 /* bits 61-63 */
485 }
486
487 return desc;
488 }
489
490 static inline unsigned int dword_in_page(void *addr)
491 {
492 return offset_in_page(addr) / sizeof(u32);
493 }
494
495 static void set_offsets(u32 *regs,
496 const u8 *data,
497 const struct intel_engine_cs *engine,
498 bool clear)
499 #define NOP(x) (BIT(7) | (x))
500 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
501 #define POSTED BIT(0)
502 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
503 #define REG16(x) \
504 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
505 (((x) >> 2) & 0x7f)
506 #define END(x) 0, (x)
507 {
508 const u32 base = engine->mmio_base;
509
510 while (*data) {
511 u8 count, flags;
512
513 if (*data & BIT(7)) { /* skip */
514 count = *data++ & ~BIT(7);
515 if (clear)
516 memset32(regs, MI_NOOP, count);
517 regs += count;
518 continue;
519 }
520
521 count = *data & 0x3f;
522 flags = *data >> 6;
523 data++;
524
525 *regs = MI_LOAD_REGISTER_IMM(count);
526 if (flags & POSTED)
527 *regs |= MI_LRI_FORCE_POSTED;
528 if (INTEL_GEN(engine->i915) >= 11)
529 *regs |= MI_LRI_CS_MMIO;
530 regs++;
531
532 GEM_BUG_ON(!count);
533 do {
534 u32 offset = 0;
535 u8 v;
536
537 do {
538 v = *data++;
539 offset <<= 7;
540 offset |= v & ~BIT(7);
541 } while (v & BIT(7));
542
543 regs[0] = base + (offset << 2);
544 if (clear)
545 regs[1] = 0;
546 regs += 2;
547 } while (--count);
548 }
549
550 if (clear) {
551 u8 count = *++data;
552
553 /* Clear past the tail for HW access */
554 GEM_BUG_ON(dword_in_page(regs) > count);
555 memset32(regs, MI_NOOP, count - dword_in_page(regs));
556
557 /* Close the batch; used mainly by live_lrc_layout() */
558 *regs = MI_BATCH_BUFFER_END;
559 if (INTEL_GEN(engine->i915) >= 10)
560 *regs |= BIT(0);
561 }
562 }
563
564 static const u8 gen8_xcs_offsets[] = {
565 NOP(1),
566 LRI(11, 0),
567 REG16(0x244),
568 REG(0x034),
569 REG(0x030),
570 REG(0x038),
571 REG(0x03c),
572 REG(0x168),
573 REG(0x140),
574 REG(0x110),
575 REG(0x11c),
576 REG(0x114),
577 REG(0x118),
578
579 NOP(9),
580 LRI(9, 0),
581 REG16(0x3a8),
582 REG16(0x28c),
583 REG16(0x288),
584 REG16(0x284),
585 REG16(0x280),
586 REG16(0x27c),
587 REG16(0x278),
588 REG16(0x274),
589 REG16(0x270),
590
591 NOP(13),
592 LRI(2, 0),
593 REG16(0x200),
594 REG(0x028),
595
596 END(80)
597 };
598
599 static const u8 gen9_xcs_offsets[] = {
600 NOP(1),
601 LRI(14, POSTED),
602 REG16(0x244),
603 REG(0x034),
604 REG(0x030),
605 REG(0x038),
606 REG(0x03c),
607 REG(0x168),
608 REG(0x140),
609 REG(0x110),
610 REG(0x11c),
611 REG(0x114),
612 REG(0x118),
613 REG(0x1c0),
614 REG(0x1c4),
615 REG(0x1c8),
616
617 NOP(3),
618 LRI(9, POSTED),
619 REG16(0x3a8),
620 REG16(0x28c),
621 REG16(0x288),
622 REG16(0x284),
623 REG16(0x280),
624 REG16(0x27c),
625 REG16(0x278),
626 REG16(0x274),
627 REG16(0x270),
628
629 NOP(13),
630 LRI(1, POSTED),
631 REG16(0x200),
632
633 NOP(13),
634 LRI(44, POSTED),
635 REG(0x028),
636 REG(0x09c),
637 REG(0x0c0),
638 REG(0x178),
639 REG(0x17c),
640 REG16(0x358),
641 REG(0x170),
642 REG(0x150),
643 REG(0x154),
644 REG(0x158),
645 REG16(0x41c),
646 REG16(0x600),
647 REG16(0x604),
648 REG16(0x608),
649 REG16(0x60c),
650 REG16(0x610),
651 REG16(0x614),
652 REG16(0x618),
653 REG16(0x61c),
654 REG16(0x620),
655 REG16(0x624),
656 REG16(0x628),
657 REG16(0x62c),
658 REG16(0x630),
659 REG16(0x634),
660 REG16(0x638),
661 REG16(0x63c),
662 REG16(0x640),
663 REG16(0x644),
664 REG16(0x648),
665 REG16(0x64c),
666 REG16(0x650),
667 REG16(0x654),
668 REG16(0x658),
669 REG16(0x65c),
670 REG16(0x660),
671 REG16(0x664),
672 REG16(0x668),
673 REG16(0x66c),
674 REG16(0x670),
675 REG16(0x674),
676 REG16(0x678),
677 REG16(0x67c),
678 REG(0x068),
679
680 END(176)
681 };
682
683 static const u8 gen12_xcs_offsets[] = {
684 NOP(1),
685 LRI(13, POSTED),
686 REG16(0x244),
687 REG(0x034),
688 REG(0x030),
689 REG(0x038),
690 REG(0x03c),
691 REG(0x168),
692 REG(0x140),
693 REG(0x110),
694 REG(0x1c0),
695 REG(0x1c4),
696 REG(0x1c8),
697 REG(0x180),
698 REG16(0x2b4),
699
700 NOP(5),
701 LRI(9, POSTED),
702 REG16(0x3a8),
703 REG16(0x28c),
704 REG16(0x288),
705 REG16(0x284),
706 REG16(0x280),
707 REG16(0x27c),
708 REG16(0x278),
709 REG16(0x274),
710 REG16(0x270),
711
712 END(80)
713 };
714
715 static const u8 gen8_rcs_offsets[] = {
716 NOP(1),
717 LRI(14, POSTED),
718 REG16(0x244),
719 REG(0x034),
720 REG(0x030),
721 REG(0x038),
722 REG(0x03c),
723 REG(0x168),
724 REG(0x140),
725 REG(0x110),
726 REG(0x11c),
727 REG(0x114),
728 REG(0x118),
729 REG(0x1c0),
730 REG(0x1c4),
731 REG(0x1c8),
732
733 NOP(3),
734 LRI(9, POSTED),
735 REG16(0x3a8),
736 REG16(0x28c),
737 REG16(0x288),
738 REG16(0x284),
739 REG16(0x280),
740 REG16(0x27c),
741 REG16(0x278),
742 REG16(0x274),
743 REG16(0x270),
744
745 NOP(13),
746 LRI(1, 0),
747 REG(0x0c8),
748
749 END(80)
750 };
751
752 static const u8 gen9_rcs_offsets[] = {
753 NOP(1),
754 LRI(14, POSTED),
755 REG16(0x244),
756 REG(0x34),
757 REG(0x30),
758 REG(0x38),
759 REG(0x3c),
760 REG(0x168),
761 REG(0x140),
762 REG(0x110),
763 REG(0x11c),
764 REG(0x114),
765 REG(0x118),
766 REG(0x1c0),
767 REG(0x1c4),
768 REG(0x1c8),
769
770 NOP(3),
771 LRI(9, POSTED),
772 REG16(0x3a8),
773 REG16(0x28c),
774 REG16(0x288),
775 REG16(0x284),
776 REG16(0x280),
777 REG16(0x27c),
778 REG16(0x278),
779 REG16(0x274),
780 REG16(0x270),
781
782 NOP(13),
783 LRI(1, 0),
784 REG(0xc8),
785
786 NOP(13),
787 LRI(44, POSTED),
788 REG(0x28),
789 REG(0x9c),
790 REG(0xc0),
791 REG(0x178),
792 REG(0x17c),
793 REG16(0x358),
794 REG(0x170),
795 REG(0x150),
796 REG(0x154),
797 REG(0x158),
798 REG16(0x41c),
799 REG16(0x600),
800 REG16(0x604),
801 REG16(0x608),
802 REG16(0x60c),
803 REG16(0x610),
804 REG16(0x614),
805 REG16(0x618),
806 REG16(0x61c),
807 REG16(0x620),
808 REG16(0x624),
809 REG16(0x628),
810 REG16(0x62c),
811 REG16(0x630),
812 REG16(0x634),
813 REG16(0x638),
814 REG16(0x63c),
815 REG16(0x640),
816 REG16(0x644),
817 REG16(0x648),
818 REG16(0x64c),
819 REG16(0x650),
820 REG16(0x654),
821 REG16(0x658),
822 REG16(0x65c),
823 REG16(0x660),
824 REG16(0x664),
825 REG16(0x668),
826 REG16(0x66c),
827 REG16(0x670),
828 REG16(0x674),
829 REG16(0x678),
830 REG16(0x67c),
831 REG(0x68),
832
833 END(176)
834 };
835
836 static const u8 gen11_rcs_offsets[] = {
837 NOP(1),
838 LRI(15, POSTED),
839 REG16(0x244),
840 REG(0x034),
841 REG(0x030),
842 REG(0x038),
843 REG(0x03c),
844 REG(0x168),
845 REG(0x140),
846 REG(0x110),
847 REG(0x11c),
848 REG(0x114),
849 REG(0x118),
850 REG(0x1c0),
851 REG(0x1c4),
852 REG(0x1c8),
853 REG(0x180),
854
855 NOP(1),
856 LRI(9, POSTED),
857 REG16(0x3a8),
858 REG16(0x28c),
859 REG16(0x288),
860 REG16(0x284),
861 REG16(0x280),
862 REG16(0x27c),
863 REG16(0x278),
864 REG16(0x274),
865 REG16(0x270),
866
867 LRI(1, POSTED),
868 REG(0x1b0),
869
870 NOP(10),
871 LRI(1, 0),
872 REG(0x0c8),
873
874 END(80)
875 };
876
877 static const u8 gen12_rcs_offsets[] = {
878 NOP(1),
879 LRI(13, POSTED),
880 REG16(0x244),
881 REG(0x034),
882 REG(0x030),
883 REG(0x038),
884 REG(0x03c),
885 REG(0x168),
886 REG(0x140),
887 REG(0x110),
888 REG(0x1c0),
889 REG(0x1c4),
890 REG(0x1c8),
891 REG(0x180),
892 REG16(0x2b4),
893
894 NOP(5),
895 LRI(9, POSTED),
896 REG16(0x3a8),
897 REG16(0x28c),
898 REG16(0x288),
899 REG16(0x284),
900 REG16(0x280),
901 REG16(0x27c),
902 REG16(0x278),
903 REG16(0x274),
904 REG16(0x270),
905
906 LRI(3, POSTED),
907 REG(0x1b0),
908 REG16(0x5a8),
909 REG16(0x5ac),
910
911 NOP(6),
912 LRI(1, 0),
913 REG(0x0c8),
914
915 END(80)
916 };
917
918 #undef END
919 #undef REG16
920 #undef REG
921 #undef LRI
922 #undef NOP
923
924 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
925 {
926 /*
927 * The gen12+ lists only have the registers we program in the basic
928 * default state. We rely on the context image using relative
929 * addressing to automatic fixup the register state between the
930 * physical engines for virtual engine.
931 */
932 GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
933 !intel_engine_has_relative_mmio(engine));
934
935 if (engine->class == RENDER_CLASS) {
936 if (INTEL_GEN(engine->i915) >= 12)
937 return gen12_rcs_offsets;
938 else if (INTEL_GEN(engine->i915) >= 11)
939 return gen11_rcs_offsets;
940 else if (INTEL_GEN(engine->i915) >= 9)
941 return gen9_rcs_offsets;
942 else
943 return gen8_rcs_offsets;
944 } else {
945 if (INTEL_GEN(engine->i915) >= 12)
946 return gen12_xcs_offsets;
947 else if (INTEL_GEN(engine->i915) >= 9)
948 return gen9_xcs_offsets;
949 else
950 return gen8_xcs_offsets;
951 }
952 }
953
954 static struct i915_request *
955 __unwind_incomplete_requests(struct intel_engine_cs *engine)
956 {
957 struct i915_request *rq, *rn, *active = NULL;
958 struct list_head *uninitialized_var(pl);
959 int prio = I915_PRIORITY_INVALID;
960
961 lockdep_assert_held(&engine->active.lock);
962
963 list_for_each_entry_safe_reverse(rq, rn,
964 &engine->active.requests,
965 sched.link) {
966 if (i915_request_completed(rq))
967 continue; /* XXX */
968
969 __i915_request_unsubmit(rq);
970
971 /*
972 * Push the request back into the queue for later resubmission.
973 * If this request is not native to this physical engine (i.e.
974 * it came from a virtual source), push it back onto the virtual
975 * engine so that it can be moved across onto another physical
976 * engine as load dictates.
977 */
978 if (likely(rq->execution_mask == engine->mask)) {
979 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
980 if (rq_prio(rq) != prio) {
981 prio = rq_prio(rq);
982 pl = i915_sched_lookup_priolist(engine, prio);
983 }
984 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
985
986 list_move(&rq->sched.link, pl);
987 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
988
989 active = rq;
990 } else {
991 struct intel_engine_cs *owner = rq->context->engine;
992
993 /*
994 * Decouple the virtual breadcrumb before moving it
995 * back to the virtual engine -- we don't want the
996 * request to complete in the background and try
997 * and cancel the breadcrumb on the virtual engine
998 * (instead of the old engine where it is linked)!
999 */
1000 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1001 &rq->fence.flags)) {
1002 spin_lock_nested(&rq->lock,
1003 SINGLE_DEPTH_NESTING);
1004 i915_request_cancel_breadcrumb(rq);
1005 spin_unlock(&rq->lock);
1006 }
1007 WRITE_ONCE(rq->engine, owner);
1008 owner->submit_request(rq);
1009 active = NULL;
1010 }
1011 }
1012
1013 return active;
1014 }
1015
1016 struct i915_request *
1017 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1018 {
1019 struct intel_engine_cs *engine =
1020 container_of(execlists, typeof(*engine), execlists);
1021
1022 return __unwind_incomplete_requests(engine);
1023 }
1024
1025 static inline void
1026 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1027 {
1028 /*
1029 * Only used when GVT-g is enabled now. When GVT-g is disabled,
1030 * The compiler should eliminate this function as dead-code.
1031 */
1032 if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1033 return;
1034
1035 atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1036 status, rq);
1037 }
1038
1039 static void intel_engine_context_in(struct intel_engine_cs *engine)
1040 {
1041 unsigned long flags;
1042
1043 if (READ_ONCE(engine->stats.enabled) == 0)
1044 return;
1045
1046 write_seqlock_irqsave(&engine->stats.lock, flags);
1047
1048 if (engine->stats.enabled > 0) {
1049 if (engine->stats.active++ == 0)
1050 engine->stats.start = ktime_get();
1051 GEM_BUG_ON(engine->stats.active == 0);
1052 }
1053
1054 write_sequnlock_irqrestore(&engine->stats.lock, flags);
1055 }
1056
1057 static void intel_engine_context_out(struct intel_engine_cs *engine)
1058 {
1059 unsigned long flags;
1060
1061 if (READ_ONCE(engine->stats.enabled) == 0)
1062 return;
1063
1064 write_seqlock_irqsave(&engine->stats.lock, flags);
1065
1066 if (engine->stats.enabled > 0) {
1067 ktime_t last;
1068
1069 if (engine->stats.active && --engine->stats.active == 0) {
1070 /*
1071 * Decrement the active context count and in case GPU
1072 * is now idle add up to the running total.
1073 */
1074 last = ktime_sub(ktime_get(), engine->stats.start);
1075
1076 engine->stats.total = ktime_add(engine->stats.total,
1077 last);
1078 } else if (engine->stats.active == 0) {
1079 /*
1080 * After turning on engine stats, context out might be
1081 * the first event in which case we account from the
1082 * time stats gathering was turned on.
1083 */
1084 last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1085
1086 engine->stats.total = ktime_add(engine->stats.total,
1087 last);
1088 }
1089 }
1090
1091 write_sequnlock_irqrestore(&engine->stats.lock, flags);
1092 }
1093
1094 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
1095 {
1096 if (INTEL_GEN(engine->i915) >= 12)
1097 return 0x60;
1098 else if (INTEL_GEN(engine->i915) >= 9)
1099 return 0x54;
1100 else if (engine->class == RENDER_CLASS)
1101 return 0x58;
1102 else
1103 return -1;
1104 }
1105
1106 static void
1107 execlists_check_context(const struct intel_context *ce,
1108 const struct intel_engine_cs *engine)
1109 {
1110 const struct intel_ring *ring = ce->ring;
1111 u32 *regs = ce->lrc_reg_state;
1112 bool valid = true;
1113 int x;
1114
1115 if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1116 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1117 engine->name,
1118 regs[CTX_RING_START],
1119 i915_ggtt_offset(ring->vma));
1120 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1121 valid = false;
1122 }
1123
1124 if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1125 (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1126 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1127 engine->name,
1128 regs[CTX_RING_CTL],
1129 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1130 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1131 valid = false;
1132 }
1133
1134 x = lrc_ring_mi_mode(engine);
1135 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1136 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1137 engine->name, regs[x + 1]);
1138 regs[x + 1] &= ~STOP_RING;
1139 regs[x + 1] |= STOP_RING << 16;
1140 valid = false;
1141 }
1142
1143 WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1144 }
1145
1146 static void restore_default_state(struct intel_context *ce,
1147 struct intel_engine_cs *engine)
1148 {
1149 u32 *regs = ce->lrc_reg_state;
1150
1151 if (engine->pinned_default_state)
1152 memcpy(regs, /* skip restoring the vanilla PPHWSP */
1153 engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
1154 engine->context_size - PAGE_SIZE);
1155
1156 execlists_init_reg_state(regs, ce, engine, ce->ring, false);
1157 }
1158
1159 static void reset_active(struct i915_request *rq,
1160 struct intel_engine_cs *engine)
1161 {
1162 struct intel_context * const ce = rq->context;
1163 u32 head;
1164
1165 /*
1166 * The executing context has been cancelled. We want to prevent
1167 * further execution along this context and propagate the error on
1168 * to anything depending on its results.
1169 *
1170 * In __i915_request_submit(), we apply the -EIO and remove the
1171 * requests' payloads for any banned requests. But first, we must
1172 * rewind the context back to the start of the incomplete request so
1173 * that we do not jump back into the middle of the batch.
1174 *
1175 * We preserve the breadcrumbs and semaphores of the incomplete
1176 * requests so that inter-timeline dependencies (i.e other timelines)
1177 * remain correctly ordered. And we defer to __i915_request_submit()
1178 * so that all asynchronous waits are correctly handled.
1179 */
1180 ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1181 rq->fence.context, rq->fence.seqno);
1182
1183 /* On resubmission of the active request, payload will be scrubbed */
1184 if (i915_request_completed(rq))
1185 head = rq->tail;
1186 else
1187 head = active_request(ce->timeline, rq)->head;
1188 head = intel_ring_wrap(ce->ring, head);
1189
1190 /* Scrub the context image to prevent replaying the previous batch */
1191 restore_default_state(ce, engine);
1192 __execlists_update_reg_state(ce, engine, head);
1193
1194 /* We've switched away, so this should be a no-op, but intent matters */
1195 ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
1196 }
1197
1198 static u32 intel_context_get_runtime(const struct intel_context *ce)
1199 {
1200 /*
1201 * We can use either ppHWSP[16] which is recorded before the context
1202 * switch (and so excludes the cost of context switches) or use the
1203 * value from the context image itself, which is saved/restored earlier
1204 * and so includes the cost of the save.
1205 */
1206 return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
1207 }
1208
1209 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1210 {
1211 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1212 ce->runtime.num_underflow += dt < 0;
1213 ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1214 #endif
1215 }
1216
1217 static void intel_context_update_runtime(struct intel_context *ce)
1218 {
1219 u32 old;
1220 s32 dt;
1221
1222 if (intel_context_is_barrier(ce))
1223 return;
1224
1225 old = ce->runtime.last;
1226 ce->runtime.last = intel_context_get_runtime(ce);
1227 dt = ce->runtime.last - old;
1228
1229 if (unlikely(dt <= 0)) {
1230 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1231 old, ce->runtime.last, dt);
1232 st_update_runtime_underflow(ce, dt);
1233 return;
1234 }
1235
1236 ewma_runtime_add(&ce->runtime.avg, dt);
1237 ce->runtime.total += dt;
1238 }
1239
1240 static inline struct intel_engine_cs *
1241 __execlists_schedule_in(struct i915_request *rq)
1242 {
1243 struct intel_engine_cs * const engine = rq->engine;
1244 struct intel_context * const ce = rq->context;
1245
1246 intel_context_get(ce);
1247
1248 if (unlikely(intel_context_is_banned(ce)))
1249 reset_active(rq, engine);
1250
1251 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1252 execlists_check_context(ce, engine);
1253
1254 ce->lrc_desc &= ~GENMASK_ULL(47, 37);
1255 if (ce->tag) {
1256 /* Use a fixed tag for OA and friends */
1257 ce->lrc_desc |= (u64)ce->tag << 32;
1258 } else {
1259 /* We don't need a strict matching tag, just different values */
1260 ce->lrc_desc |=
1261 (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
1262 GEN11_SW_CTX_ID_SHIFT;
1263 BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
1264 }
1265
1266 __intel_gt_pm_get(engine->gt);
1267 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1268 intel_engine_context_in(engine);
1269
1270 return engine;
1271 }
1272
1273 static inline struct i915_request *
1274 execlists_schedule_in(struct i915_request *rq, int idx)
1275 {
1276 struct intel_context * const ce = rq->context;
1277 struct intel_engine_cs *old;
1278
1279 GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1280 trace_i915_request_in(rq, idx);
1281
1282 old = READ_ONCE(ce->inflight);
1283 do {
1284 if (!old) {
1285 WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1286 break;
1287 }
1288 } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1289
1290 GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1291 return i915_request_get(rq);
1292 }
1293
1294 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1295 {
1296 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1297 struct i915_request *next = READ_ONCE(ve->request);
1298
1299 if (next && next->execution_mask & ~rq->execution_mask)
1300 tasklet_schedule(&ve->base.execlists.tasklet);
1301 }
1302
1303 static inline void
1304 __execlists_schedule_out(struct i915_request *rq,
1305 struct intel_engine_cs * const engine)
1306 {
1307 struct intel_context * const ce = rq->context;
1308
1309 /*
1310 * NB process_csb() is not under the engine->active.lock and hence
1311 * schedule_out can race with schedule_in meaning that we should
1312 * refrain from doing non-trivial work here.
1313 */
1314
1315 /*
1316 * If we have just completed this context, the engine may now be
1317 * idle and we want to re-enter powersaving.
1318 */
1319 if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1320 i915_request_completed(rq))
1321 intel_engine_add_retire(engine, ce->timeline);
1322
1323 intel_context_update_runtime(ce);
1324 intel_engine_context_out(engine);
1325 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1326 intel_gt_pm_put_async(engine->gt);
1327
1328 /*
1329 * If this is part of a virtual engine, its next request may
1330 * have been blocked waiting for access to the active context.
1331 * We have to kick all the siblings again in case we need to
1332 * switch (e.g. the next request is not runnable on this
1333 * engine). Hopefully, we will already have submitted the next
1334 * request before the tasklet runs and do not need to rebuild
1335 * each virtual tree and kick everyone again.
1336 */
1337 if (ce->engine != engine)
1338 kick_siblings(rq, ce);
1339
1340 intel_context_put(ce);
1341 }
1342
1343 static inline void
1344 execlists_schedule_out(struct i915_request *rq)
1345 {
1346 struct intel_context * const ce = rq->context;
1347 struct intel_engine_cs *cur, *old;
1348
1349 trace_i915_request_out(rq);
1350
1351 old = READ_ONCE(ce->inflight);
1352 do
1353 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1354 while (!try_cmpxchg(&ce->inflight, &old, cur));
1355 if (!cur)
1356 __execlists_schedule_out(rq, old);
1357
1358 i915_request_put(rq);
1359 }
1360
1361 static u64 execlists_update_context(struct i915_request *rq)
1362 {
1363 struct intel_context *ce = rq->context;
1364 u64 desc = ce->lrc_desc;
1365 u32 tail, prev;
1366
1367 /*
1368 * WaIdleLiteRestore:bdw,skl
1369 *
1370 * We should never submit the context with the same RING_TAIL twice
1371 * just in case we submit an empty ring, which confuses the HW.
1372 *
1373 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1374 * the normal request to be able to always advance the RING_TAIL on
1375 * subsequent resubmissions (for lite restore). Should that fail us,
1376 * and we try and submit the same tail again, force the context
1377 * reload.
1378 *
1379 * If we need to return to a preempted context, we need to skip the
1380 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1381 * HW has a tendency to ignore us rewinding the TAIL to the end of
1382 * an earlier request.
1383 */
1384 tail = intel_ring_set_tail(rq->ring, rq->tail);
1385 prev = ce->lrc_reg_state[CTX_RING_TAIL];
1386 if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1387 desc |= CTX_DESC_FORCE_RESTORE;
1388 ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1389 rq->tail = rq->wa_tail;
1390
1391 /*
1392 * Make sure the context image is complete before we submit it to HW.
1393 *
1394 * Ostensibly, writes (including the WCB) should be flushed prior to
1395 * an uncached write such as our mmio register access, the empirical
1396 * evidence (esp. on Braswell) suggests that the WC write into memory
1397 * may not be visible to the HW prior to the completion of the UC
1398 * register write and that we may begin execution from the context
1399 * before its image is complete leading to invalid PD chasing.
1400 */
1401 wmb();
1402
1403 ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
1404 return desc;
1405 }
1406
1407 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1408 {
1409 if (execlists->ctrl_reg) {
1410 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1411 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1412 } else {
1413 writel(upper_32_bits(desc), execlists->submit_reg);
1414 writel(lower_32_bits(desc), execlists->submit_reg);
1415 }
1416 }
1417
1418 static __maybe_unused void
1419 trace_ports(const struct intel_engine_execlists *execlists,
1420 const char *msg,
1421 struct i915_request * const *ports)
1422 {
1423 const struct intel_engine_cs *engine =
1424 container_of(execlists, typeof(*engine), execlists);
1425
1426 if (!ports[0])
1427 return;
1428
1429 ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
1430 ports[0]->fence.context,
1431 ports[0]->fence.seqno,
1432 i915_request_completed(ports[0]) ? "!" :
1433 i915_request_started(ports[0]) ? "*" :
1434 "",
1435 ports[1] ? ports[1]->fence.context : 0,
1436 ports[1] ? ports[1]->fence.seqno : 0);
1437 }
1438
1439 static inline bool
1440 reset_in_progress(const struct intel_engine_execlists *execlists)
1441 {
1442 return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1443 }
1444
1445 static __maybe_unused bool
1446 assert_pending_valid(const struct intel_engine_execlists *execlists,
1447 const char *msg)
1448 {
1449 struct i915_request * const *port, *rq;
1450 struct intel_context *ce = NULL;
1451 bool sentinel = false;
1452
1453 trace_ports(execlists, msg, execlists->pending);
1454
1455 /* We may be messing around with the lists during reset, lalala */
1456 if (reset_in_progress(execlists))
1457 return true;
1458
1459 if (!execlists->pending[0]) {
1460 GEM_TRACE_ERR("Nothing pending for promotion!\n");
1461 return false;
1462 }
1463
1464 if (execlists->pending[execlists_num_ports(execlists)]) {
1465 GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
1466 execlists_num_ports(execlists));
1467 return false;
1468 }
1469
1470 for (port = execlists->pending; (rq = *port); port++) {
1471 unsigned long flags;
1472 bool ok = true;
1473
1474 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1475 GEM_BUG_ON(!i915_request_is_active(rq));
1476
1477 if (ce == rq->context) {
1478 GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
1479 ce->timeline->fence_context,
1480 port - execlists->pending);
1481 return false;
1482 }
1483 ce = rq->context;
1484
1485 /*
1486 * Sentinels are supposed to be lonely so they flush the
1487 * current exection off the HW. Check that they are the
1488 * only request in the pending submission.
1489 */
1490 if (sentinel) {
1491 GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n",
1492 ce->timeline->fence_context,
1493 port - execlists->pending);
1494 return false;
1495 }
1496
1497 sentinel = i915_request_has_sentinel(rq);
1498 if (sentinel && port != execlists->pending) {
1499 GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n",
1500 ce->timeline->fence_context,
1501 port - execlists->pending);
1502 return false;
1503 }
1504
1505 /* Hold tightly onto the lock to prevent concurrent retires! */
1506 if (!spin_trylock_irqsave(&rq->lock, flags))
1507 continue;
1508
1509 if (i915_request_completed(rq))
1510 goto unlock;
1511
1512 if (i915_active_is_idle(&ce->active) &&
1513 !intel_context_is_barrier(ce)) {
1514 GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
1515 ce->timeline->fence_context,
1516 port - execlists->pending);
1517 ok = false;
1518 goto unlock;
1519 }
1520
1521 if (!i915_vma_is_pinned(ce->state)) {
1522 GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
1523 ce->timeline->fence_context,
1524 port - execlists->pending);
1525 ok = false;
1526 goto unlock;
1527 }
1528
1529 if (!i915_vma_is_pinned(ce->ring->vma)) {
1530 GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
1531 ce->timeline->fence_context,
1532 port - execlists->pending);
1533 ok = false;
1534 goto unlock;
1535 }
1536
1537 unlock:
1538 spin_unlock_irqrestore(&rq->lock, flags);
1539 if (!ok)
1540 return false;
1541 }
1542
1543 return ce;
1544 }
1545
1546 static void execlists_submit_ports(struct intel_engine_cs *engine)
1547 {
1548 struct intel_engine_execlists *execlists = &engine->execlists;
1549 unsigned int n;
1550
1551 GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1552
1553 /*
1554 * We can skip acquiring intel_runtime_pm_get() here as it was taken
1555 * on our behalf by the request (see i915_gem_mark_busy()) and it will
1556 * not be relinquished until the device is idle (see
1557 * i915_gem_idle_work_handler()). As a precaution, we make sure
1558 * that all ELSP are drained i.e. we have processed the CSB,
1559 * before allowing ourselves to idle and calling intel_runtime_pm_put().
1560 */
1561 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1562
1563 /*
1564 * ELSQ note: the submit queue is not cleared after being submitted
1565 * to the HW so we need to make sure we always clean it up. This is
1566 * currently ensured by the fact that we always write the same number
1567 * of elsq entries, keep this in mind before changing the loop below.
1568 */
1569 for (n = execlists_num_ports(execlists); n--; ) {
1570 struct i915_request *rq = execlists->pending[n];
1571
1572 write_desc(execlists,
1573 rq ? execlists_update_context(rq) : 0,
1574 n);
1575 }
1576
1577 /* we need to manually load the submit queue */
1578 if (execlists->ctrl_reg)
1579 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1580 }
1581
1582 static bool ctx_single_port_submission(const struct intel_context *ce)
1583 {
1584 return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1585 intel_context_force_single_submission(ce));
1586 }
1587
1588 static bool can_merge_ctx(const struct intel_context *prev,
1589 const struct intel_context *next)
1590 {
1591 if (prev != next)
1592 return false;
1593
1594 if (ctx_single_port_submission(prev))
1595 return false;
1596
1597 return true;
1598 }
1599
1600 static unsigned long i915_request_flags(const struct i915_request *rq)
1601 {
1602 return READ_ONCE(rq->fence.flags);
1603 }
1604
1605 static bool can_merge_rq(const struct i915_request *prev,
1606 const struct i915_request *next)
1607 {
1608 GEM_BUG_ON(prev == next);
1609 GEM_BUG_ON(!assert_priority_queue(prev, next));
1610
1611 /*
1612 * We do not submit known completed requests. Therefore if the next
1613 * request is already completed, we can pretend to merge it in
1614 * with the previous context (and we will skip updating the ELSP
1615 * and tracking). Thus hopefully keeping the ELSP full with active
1616 * contexts, despite the best efforts of preempt-to-busy to confuse
1617 * us.
1618 */
1619 if (i915_request_completed(next))
1620 return true;
1621
1622 if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1623 (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1624 BIT(I915_FENCE_FLAG_SENTINEL))))
1625 return false;
1626
1627 if (!can_merge_ctx(prev->context, next->context))
1628 return false;
1629
1630 GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1631 return true;
1632 }
1633
1634 static void virtual_update_register_offsets(u32 *regs,
1635 struct intel_engine_cs *engine)
1636 {
1637 set_offsets(regs, reg_offsets(engine), engine, false);
1638 }
1639
1640 static bool virtual_matches(const struct virtual_engine *ve,
1641 const struct i915_request *rq,
1642 const struct intel_engine_cs *engine)
1643 {
1644 const struct intel_engine_cs *inflight;
1645
1646 if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1647 return false;
1648
1649 /*
1650 * We track when the HW has completed saving the context image
1651 * (i.e. when we have seen the final CS event switching out of
1652 * the context) and must not overwrite the context image before
1653 * then. This restricts us to only using the active engine
1654 * while the previous virtualized request is inflight (so
1655 * we reuse the register offsets). This is a very small
1656 * hystersis on the greedy seelction algorithm.
1657 */
1658 inflight = intel_context_inflight(&ve->context);
1659 if (inflight && inflight != engine)
1660 return false;
1661
1662 return true;
1663 }
1664
1665 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
1666 struct i915_request *rq)
1667 {
1668 struct intel_engine_cs *old = ve->siblings[0];
1669
1670 /* All unattached (rq->engine == old) must already be completed */
1671
1672 spin_lock(&old->breadcrumbs.irq_lock);
1673 if (!list_empty(&ve->context.signal_link)) {
1674 list_del_init(&ve->context.signal_link);
1675
1676 /*
1677 * We cannot acquire the new engine->breadcrumbs.irq_lock
1678 * (as we are holding a breadcrumbs.irq_lock already),
1679 * so attach this request to the signaler on submission.
1680 * The queued irq_work will occur when we finally drop
1681 * the engine->active.lock after dequeue.
1682 */
1683 set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags);
1684
1685 /* Also transfer the pending irq_work for the old breadcrumb. */
1686 intel_engine_signal_breadcrumbs(rq->engine);
1687 }
1688 spin_unlock(&old->breadcrumbs.irq_lock);
1689 }
1690
1691 #define for_each_waiter(p__, rq__) \
1692 list_for_each_entry_lockless(p__, \
1693 &(rq__)->sched.waiters_list, \
1694 wait_link)
1695
1696 #define for_each_signaler(p__, rq__) \
1697 list_for_each_entry_rcu(p__, \
1698 &(rq__)->sched.signalers_list, \
1699 signal_link)
1700
1701 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1702 {
1703 LIST_HEAD(list);
1704
1705 /*
1706 * We want to move the interrupted request to the back of
1707 * the round-robin list (i.e. its priority level), but
1708 * in doing so, we must then move all requests that were in
1709 * flight and were waiting for the interrupted request to
1710 * be run after it again.
1711 */
1712 do {
1713 struct i915_dependency *p;
1714
1715 GEM_BUG_ON(i915_request_is_active(rq));
1716 list_move_tail(&rq->sched.link, pl);
1717
1718 for_each_waiter(p, rq) {
1719 struct i915_request *w =
1720 container_of(p->waiter, typeof(*w), sched);
1721
1722 /* Leave semaphores spinning on the other engines */
1723 if (w->engine != rq->engine)
1724 continue;
1725
1726 /* No waiter should start before its signaler */
1727 GEM_BUG_ON(i915_request_started(w) &&
1728 !i915_request_completed(rq));
1729
1730 GEM_BUG_ON(i915_request_is_active(w));
1731 if (!i915_request_is_ready(w))
1732 continue;
1733
1734 if (rq_prio(w) < rq_prio(rq))
1735 continue;
1736
1737 GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1738 list_move_tail(&w->sched.link, &list);
1739 }
1740
1741 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1742 } while (rq);
1743 }
1744
1745 static void defer_active(struct intel_engine_cs *engine)
1746 {
1747 struct i915_request *rq;
1748
1749 rq = __unwind_incomplete_requests(engine);
1750 if (!rq)
1751 return;
1752
1753 defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1754 }
1755
1756 static bool
1757 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
1758 {
1759 int hint;
1760
1761 if (!intel_engine_has_timeslices(engine))
1762 return false;
1763
1764 hint = engine->execlists.queue_priority_hint;
1765 if (!list_is_last(&rq->sched.link, &engine->active.requests))
1766 hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1767
1768 return hint >= effective_prio(rq);
1769 }
1770
1771 static int
1772 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1773 {
1774 if (list_is_last(&rq->sched.link, &engine->active.requests))
1775 return INT_MIN;
1776
1777 return rq_prio(list_next_entry(rq, sched.link));
1778 }
1779
1780 static inline unsigned long
1781 timeslice(const struct intel_engine_cs *engine)
1782 {
1783 return READ_ONCE(engine->props.timeslice_duration_ms);
1784 }
1785
1786 static unsigned long
1787 active_timeslice(const struct intel_engine_cs *engine)
1788 {
1789 const struct intel_engine_execlists *execlists = &engine->execlists;
1790 const struct i915_request *rq = *execlists->active;
1791
1792 if (!rq || i915_request_completed(rq))
1793 return 0;
1794
1795 if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1796 return 0;
1797
1798 return timeslice(engine);
1799 }
1800
1801 static void set_timeslice(struct intel_engine_cs *engine)
1802 {
1803 if (!intel_engine_has_timeslices(engine))
1804 return;
1805
1806 set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
1807 }
1808
1809 static void start_timeslice(struct intel_engine_cs *engine)
1810 {
1811 struct intel_engine_execlists *execlists = &engine->execlists;
1812 int prio = queue_prio(execlists);
1813
1814 WRITE_ONCE(execlists->switch_priority_hint, prio);
1815 if (prio == INT_MIN)
1816 return;
1817
1818 if (timer_pending(&execlists->timer))
1819 return;
1820
1821 set_timer_ms(&execlists->timer, timeslice(engine));
1822 }
1823
1824 static void record_preemption(struct intel_engine_execlists *execlists)
1825 {
1826 (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
1827 }
1828
1829 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
1830 const struct i915_request *rq)
1831 {
1832 if (!rq)
1833 return 0;
1834
1835 /* Force a fast reset for terminated contexts (ignoring sysfs!) */
1836 if (unlikely(intel_context_is_banned(rq->context)))
1837 return 1;
1838
1839 return READ_ONCE(engine->props.preempt_timeout_ms);
1840 }
1841
1842 static void set_preempt_timeout(struct intel_engine_cs *engine,
1843 const struct i915_request *rq)
1844 {
1845 if (!intel_engine_has_preempt_reset(engine))
1846 return;
1847
1848 set_timer_ms(&engine->execlists.preempt,
1849 active_preempt_timeout(engine, rq));
1850 }
1851
1852 static inline void clear_ports(struct i915_request **ports, int count)
1853 {
1854 memset_p((void **)ports, NULL, count);
1855 }
1856
1857 static void execlists_dequeue(struct intel_engine_cs *engine)
1858 {
1859 struct intel_engine_execlists * const execlists = &engine->execlists;
1860 struct i915_request **port = execlists->pending;
1861 struct i915_request ** const last_port = port + execlists->port_mask;
1862 struct i915_request * const *active;
1863 struct i915_request *last;
1864 struct rb_node *rb;
1865 bool submit = false;
1866
1867 /*
1868 * Hardware submission is through 2 ports. Conceptually each port
1869 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
1870 * static for a context, and unique to each, so we only execute
1871 * requests belonging to a single context from each ring. RING_HEAD
1872 * is maintained by the CS in the context image, it marks the place
1873 * where it got up to last time, and through RING_TAIL we tell the CS
1874 * where we want to execute up to this time.
1875 *
1876 * In this list the requests are in order of execution. Consecutive
1877 * requests from the same context are adjacent in the ringbuffer. We
1878 * can combine these requests into a single RING_TAIL update:
1879 *
1880 * RING_HEAD...req1...req2
1881 * ^- RING_TAIL
1882 * since to execute req2 the CS must first execute req1.
1883 *
1884 * Our goal then is to point each port to the end of a consecutive
1885 * sequence of requests as being the most optimal (fewest wake ups
1886 * and context switches) submission.
1887 */
1888
1889 for (rb = rb_first_cached(&execlists->virtual); rb; ) {
1890 struct virtual_engine *ve =
1891 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1892 struct i915_request *rq = READ_ONCE(ve->request);
1893
1894 if (!rq) { /* lazily cleanup after another engine handled rq */
1895 rb_erase_cached(rb, &execlists->virtual);
1896 RB_CLEAR_NODE(rb);
1897 rb = rb_first_cached(&execlists->virtual);
1898 continue;
1899 }
1900
1901 if (!virtual_matches(ve, rq, engine)) {
1902 rb = rb_next(rb);
1903 continue;
1904 }
1905
1906 break;
1907 }
1908
1909 /*
1910 * If the queue is higher priority than the last
1911 * request in the currently active context, submit afresh.
1912 * We will resubmit again afterwards in case we need to split
1913 * the active context to interject the preemption request,
1914 * i.e. we will retrigger preemption following the ack in case
1915 * of trouble.
1916 */
1917 active = READ_ONCE(execlists->active);
1918 while ((last = *active) && i915_request_completed(last))
1919 active++;
1920
1921 if (last) {
1922 if (need_preempt(engine, last, rb)) {
1923 ENGINE_TRACE(engine,
1924 "preempting last=%llx:%lld, prio=%d, hint=%d\n",
1925 last->fence.context,
1926 last->fence.seqno,
1927 last->sched.attr.priority,
1928 execlists->queue_priority_hint);
1929 record_preemption(execlists);
1930
1931 /*
1932 * Don't let the RING_HEAD advance past the breadcrumb
1933 * as we unwind (and until we resubmit) so that we do
1934 * not accidentally tell it to go backwards.
1935 */
1936 ring_set_paused(engine, 1);
1937
1938 /*
1939 * Note that we have not stopped the GPU at this point,
1940 * so we are unwinding the incomplete requests as they
1941 * remain inflight and so by the time we do complete
1942 * the preemption, some of the unwound requests may
1943 * complete!
1944 */
1945 __unwind_incomplete_requests(engine);
1946
1947 last = NULL;
1948 } else if (need_timeslice(engine, last) &&
1949 timer_expired(&engine->execlists.timer)) {
1950 ENGINE_TRACE(engine,
1951 "expired last=%llx:%lld, prio=%d, hint=%d\n",
1952 last->fence.context,
1953 last->fence.seqno,
1954 last->sched.attr.priority,
1955 execlists->queue_priority_hint);
1956
1957 ring_set_paused(engine, 1);
1958 defer_active(engine);
1959
1960 /*
1961 * Unlike for preemption, if we rewind and continue
1962 * executing the same context as previously active,
1963 * the order of execution will remain the same and
1964 * the tail will only advance. We do not need to
1965 * force a full context restore, as a lite-restore
1966 * is sufficient to resample the monotonic TAIL.
1967 *
1968 * If we switch to any other context, similarly we
1969 * will not rewind TAIL of current context, and
1970 * normal save/restore will preserve state and allow
1971 * us to later continue executing the same request.
1972 */
1973 last = NULL;
1974 } else {
1975 /*
1976 * Otherwise if we already have a request pending
1977 * for execution after the current one, we can
1978 * just wait until the next CS event before
1979 * queuing more. In either case we will force a
1980 * lite-restore preemption event, but if we wait
1981 * we hopefully coalesce several updates into a single
1982 * submission.
1983 */
1984 if (!list_is_last(&last->sched.link,
1985 &engine->active.requests)) {
1986 /*
1987 * Even if ELSP[1] is occupied and not worthy
1988 * of timeslices, our queue might be.
1989 */
1990 start_timeslice(engine);
1991 return;
1992 }
1993 }
1994 }
1995
1996 while (rb) { /* XXX virtual is always taking precedence */
1997 struct virtual_engine *ve =
1998 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1999 struct i915_request *rq;
2000
2001 spin_lock(&ve->base.active.lock);
2002
2003 rq = ve->request;
2004 if (unlikely(!rq)) { /* lost the race to a sibling */
2005 spin_unlock(&ve->base.active.lock);
2006 rb_erase_cached(rb, &execlists->virtual);
2007 RB_CLEAR_NODE(rb);
2008 rb = rb_first_cached(&execlists->virtual);
2009 continue;
2010 }
2011
2012 GEM_BUG_ON(rq != ve->request);
2013 GEM_BUG_ON(rq->engine != &ve->base);
2014 GEM_BUG_ON(rq->context != &ve->context);
2015
2016 if (rq_prio(rq) >= queue_prio(execlists)) {
2017 if (!virtual_matches(ve, rq, engine)) {
2018 spin_unlock(&ve->base.active.lock);
2019 rb = rb_next(rb);
2020 continue;
2021 }
2022
2023 if (last && !can_merge_rq(last, rq)) {
2024 spin_unlock(&ve->base.active.lock);
2025 start_timeslice(engine);
2026 return; /* leave this for another sibling */
2027 }
2028
2029 ENGINE_TRACE(engine,
2030 "virtual rq=%llx:%lld%s, new engine? %s\n",
2031 rq->fence.context,
2032 rq->fence.seqno,
2033 i915_request_completed(rq) ? "!" :
2034 i915_request_started(rq) ? "*" :
2035 "",
2036 yesno(engine != ve->siblings[0]));
2037
2038 WRITE_ONCE(ve->request, NULL);
2039 WRITE_ONCE(ve->base.execlists.queue_priority_hint,
2040 INT_MIN);
2041 rb_erase_cached(rb, &execlists->virtual);
2042 RB_CLEAR_NODE(rb);
2043
2044 GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2045 WRITE_ONCE(rq->engine, engine);
2046
2047 if (engine != ve->siblings[0]) {
2048 u32 *regs = ve->context.lrc_reg_state;
2049 unsigned int n;
2050
2051 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
2052
2053 if (!intel_engine_has_relative_mmio(engine))
2054 virtual_update_register_offsets(regs,
2055 engine);
2056
2057 if (!list_empty(&ve->context.signals))
2058 virtual_xfer_breadcrumbs(ve, rq);
2059
2060 /*
2061 * Move the bound engine to the top of the list
2062 * for future execution. We then kick this
2063 * tasklet first before checking others, so that
2064 * we preferentially reuse this set of bound
2065 * registers.
2066 */
2067 for (n = 1; n < ve->num_siblings; n++) {
2068 if (ve->siblings[n] == engine) {
2069 swap(ve->siblings[n],
2070 ve->siblings[0]);
2071 break;
2072 }
2073 }
2074
2075 GEM_BUG_ON(ve->siblings[0] != engine);
2076 }
2077
2078 if (__i915_request_submit(rq)) {
2079 submit = true;
2080 last = rq;
2081 }
2082 i915_request_put(rq);
2083
2084 /*
2085 * Hmm, we have a bunch of virtual engine requests,
2086 * but the first one was already completed (thanks
2087 * preempt-to-busy!). Keep looking at the veng queue
2088 * until we have no more relevant requests (i.e.
2089 * the normal submit queue has higher priority).
2090 */
2091 if (!submit) {
2092 spin_unlock(&ve->base.active.lock);
2093 rb = rb_first_cached(&execlists->virtual);
2094 continue;
2095 }
2096 }
2097
2098 spin_unlock(&ve->base.active.lock);
2099 break;
2100 }
2101
2102 while ((rb = rb_first_cached(&execlists->queue))) {
2103 struct i915_priolist *p = to_priolist(rb);
2104 struct i915_request *rq, *rn;
2105 int i;
2106
2107 priolist_for_each_request_consume(rq, rn, p, i) {
2108 bool merge = true;
2109
2110 /*
2111 * Can we combine this request with the current port?
2112 * It has to be the same context/ringbuffer and not
2113 * have any exceptions (e.g. GVT saying never to
2114 * combine contexts).
2115 *
2116 * If we can combine the requests, we can execute both
2117 * by updating the RING_TAIL to point to the end of the
2118 * second request, and so we never need to tell the
2119 * hardware about the first.
2120 */
2121 if (last && !can_merge_rq(last, rq)) {
2122 /*
2123 * If we are on the second port and cannot
2124 * combine this request with the last, then we
2125 * are done.
2126 */
2127 if (port == last_port)
2128 goto done;
2129
2130 /*
2131 * We must not populate both ELSP[] with the
2132 * same LRCA, i.e. we must submit 2 different
2133 * contexts if we submit 2 ELSP.
2134 */
2135 if (last->context == rq->context)
2136 goto done;
2137
2138 if (i915_request_has_sentinel(last))
2139 goto done;
2140
2141 /*
2142 * If GVT overrides us we only ever submit
2143 * port[0], leaving port[1] empty. Note that we
2144 * also have to be careful that we don't queue
2145 * the same context (even though a different
2146 * request) to the second port.
2147 */
2148 if (ctx_single_port_submission(last->context) ||
2149 ctx_single_port_submission(rq->context))
2150 goto done;
2151
2152 merge = false;
2153 }
2154
2155 if (__i915_request_submit(rq)) {
2156 if (!merge) {
2157 *port = execlists_schedule_in(last, port - execlists->pending);
2158 port++;
2159 last = NULL;
2160 }
2161
2162 GEM_BUG_ON(last &&
2163 !can_merge_ctx(last->context,
2164 rq->context));
2165 GEM_BUG_ON(last &&
2166 i915_seqno_passed(last->fence.seqno,
2167 rq->fence.seqno));
2168
2169 submit = true;
2170 last = rq;
2171 }
2172 }
2173
2174 rb_erase_cached(&p->node, &execlists->queue);
2175 i915_priolist_free(p);
2176 }
2177
2178 done:
2179 /*
2180 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2181 *
2182 * We choose the priority hint such that if we add a request of greater
2183 * priority than this, we kick the submission tasklet to decide on
2184 * the right order of submitting the requests to hardware. We must
2185 * also be prepared to reorder requests as they are in-flight on the
2186 * HW. We derive the priority hint then as the first "hole" in
2187 * the HW submission ports and if there are no available slots,
2188 * the priority of the lowest executing request, i.e. last.
2189 *
2190 * When we do receive a higher priority request ready to run from the
2191 * user, see queue_request(), the priority hint is bumped to that
2192 * request triggering preemption on the next dequeue (or subsequent
2193 * interrupt for secondary ports).
2194 */
2195 execlists->queue_priority_hint = queue_prio(execlists);
2196
2197 if (submit) {
2198 *port = execlists_schedule_in(last, port - execlists->pending);
2199 execlists->switch_priority_hint =
2200 switch_prio(engine, *execlists->pending);
2201
2202 /*
2203 * Skip if we ended up with exactly the same set of requests,
2204 * e.g. trying to timeslice a pair of ordered contexts
2205 */
2206 if (!memcmp(active, execlists->pending,
2207 (port - execlists->pending + 1) * sizeof(*port))) {
2208 do
2209 execlists_schedule_out(fetch_and_zero(port));
2210 while (port-- != execlists->pending);
2211
2212 goto skip_submit;
2213 }
2214 clear_ports(port + 1, last_port - port);
2215
2216 execlists_submit_ports(engine);
2217 set_preempt_timeout(engine, *active);
2218 } else {
2219 skip_submit:
2220 ring_set_paused(engine, 0);
2221 }
2222 }
2223
2224 static void
2225 cancel_port_requests(struct intel_engine_execlists * const execlists)
2226 {
2227 struct i915_request * const *port;
2228
2229 for (port = execlists->pending; *port; port++)
2230 execlists_schedule_out(*port);
2231 clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2232
2233 /* Mark the end of active before we overwrite *active */
2234 for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2235 execlists_schedule_out(*port);
2236 clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2237
2238 smp_wmb(); /* complete the seqlock for execlists_active() */
2239 WRITE_ONCE(execlists->active, execlists->inflight);
2240 }
2241
2242 static inline void
2243 invalidate_csb_entries(const u32 *first, const u32 *last)
2244 {
2245 clflush((void *)first);
2246 clflush((void *)last);
2247 }
2248
2249 /*
2250 * Starting with Gen12, the status has a new format:
2251 *
2252 * bit 0: switched to new queue
2253 * bit 1: reserved
2254 * bit 2: semaphore wait mode (poll or signal), only valid when
2255 * switch detail is set to "wait on semaphore"
2256 * bits 3-5: engine class
2257 * bits 6-11: engine instance
2258 * bits 12-14: reserved
2259 * bits 15-25: sw context id of the lrc the GT switched to
2260 * bits 26-31: sw counter of the lrc the GT switched to
2261 * bits 32-35: context switch detail
2262 * - 0: ctx complete
2263 * - 1: wait on sync flip
2264 * - 2: wait on vblank
2265 * - 3: wait on scanline
2266 * - 4: wait on semaphore
2267 * - 5: context preempted (not on SEMAPHORE_WAIT or
2268 * WAIT_FOR_EVENT)
2269 * bit 36: reserved
2270 * bits 37-43: wait detail (for switch detail 1 to 4)
2271 * bits 44-46: reserved
2272 * bits 47-57: sw context id of the lrc the GT switched away from
2273 * bits 58-63: sw counter of the lrc the GT switched away from
2274 */
2275 static inline bool
2276 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2277 {
2278 u32 lower_dw = csb[0];
2279 u32 upper_dw = csb[1];
2280 bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2281 bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2282 bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2283
2284 /*
2285 * The context switch detail is not guaranteed to be 5 when a preemption
2286 * occurs, so we can't just check for that. The check below works for
2287 * all the cases we care about, including preemptions of WAIT
2288 * instructions and lite-restore. Preempt-to-idle via the CTRL register
2289 * would require some extra handling, but we don't support that.
2290 */
2291 if (!ctx_away_valid || new_queue) {
2292 GEM_BUG_ON(!ctx_to_valid);
2293 return true;
2294 }
2295
2296 /*
2297 * switch detail = 5 is covered by the case above and we do not expect a
2298 * context switch on an unsuccessful wait instruction since we always
2299 * use polling mode.
2300 */
2301 GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2302 return false;
2303 }
2304
2305 static inline bool
2306 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2307 {
2308 return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2309 }
2310
2311 static void process_csb(struct intel_engine_cs *engine)
2312 {
2313 struct intel_engine_execlists * const execlists = &engine->execlists;
2314 const u32 * const buf = execlists->csb_status;
2315 const u8 num_entries = execlists->csb_size;
2316 u8 head, tail;
2317
2318 /*
2319 * As we modify our execlists state tracking we require exclusive
2320 * access. Either we are inside the tasklet, or the tasklet is disabled
2321 * and we assume that is only inside the reset paths and so serialised.
2322 */
2323 GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2324 !reset_in_progress(execlists));
2325 GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2326
2327 /*
2328 * Note that csb_write, csb_status may be either in HWSP or mmio.
2329 * When reading from the csb_write mmio register, we have to be
2330 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2331 * the low 4bits. As it happens we know the next 4bits are always
2332 * zero and so we can simply masked off the low u8 of the register
2333 * and treat it identically to reading from the HWSP (without having
2334 * to use explicit shifting and masking, and probably bifurcating
2335 * the code to handle the legacy mmio read).
2336 */
2337 head = execlists->csb_head;
2338 tail = READ_ONCE(*execlists->csb_write);
2339 if (unlikely(head == tail))
2340 return;
2341
2342 /*
2343 * Hopefully paired with a wmb() in HW!
2344 *
2345 * We must complete the read of the write pointer before any reads
2346 * from the CSB, so that we do not see stale values. Without an rmb
2347 * (lfence) the HW may speculatively perform the CSB[] reads *before*
2348 * we perform the READ_ONCE(*csb_write).
2349 */
2350 rmb();
2351
2352 ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2353 do {
2354 bool promote;
2355
2356 if (++head == num_entries)
2357 head = 0;
2358
2359 /*
2360 * We are flying near dragons again.
2361 *
2362 * We hold a reference to the request in execlist_port[]
2363 * but no more than that. We are operating in softirq
2364 * context and so cannot hold any mutex or sleep. That
2365 * prevents us stopping the requests we are processing
2366 * in port[] from being retired simultaneously (the
2367 * breadcrumb will be complete before we see the
2368 * context-switch). As we only hold the reference to the
2369 * request, any pointer chasing underneath the request
2370 * is subject to a potential use-after-free. Thus we
2371 * store all of the bookkeeping within port[] as
2372 * required, and avoid using unguarded pointers beneath
2373 * request itself. The same applies to the atomic
2374 * status notifier.
2375 */
2376
2377 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2378 head, buf[2 * head + 0], buf[2 * head + 1]);
2379
2380 if (INTEL_GEN(engine->i915) >= 12)
2381 promote = gen12_csb_parse(execlists, buf + 2 * head);
2382 else
2383 promote = gen8_csb_parse(execlists, buf + 2 * head);
2384 if (promote) {
2385 struct i915_request * const *old = execlists->active;
2386
2387 GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2388
2389 ring_set_paused(engine, 0);
2390
2391 /* Point active to the new ELSP; prevent overwriting */
2392 WRITE_ONCE(execlists->active, execlists->pending);
2393 smp_wmb(); /* notify execlists_active() */
2394
2395 /* cancel old inflight, prepare for switch */
2396 trace_ports(execlists, "preempted", old);
2397 while (*old)
2398 execlists_schedule_out(*old++);
2399
2400 /* switch pending to inflight */
2401 memcpy(execlists->inflight,
2402 execlists->pending,
2403 execlists_num_ports(execlists) *
2404 sizeof(*execlists->pending));
2405 smp_wmb(); /* complete the seqlock */
2406 WRITE_ONCE(execlists->active, execlists->inflight);
2407
2408 WRITE_ONCE(execlists->pending[0], NULL);
2409 } else {
2410 GEM_BUG_ON(!*execlists->active);
2411
2412 /* port0 completed, advanced to port1 */
2413 trace_ports(execlists, "completed", execlists->active);
2414
2415 /*
2416 * We rely on the hardware being strongly
2417 * ordered, that the breadcrumb write is
2418 * coherent (visible from the CPU) before the
2419 * user interrupt and CSB is processed.
2420 */
2421 if (GEM_SHOW_DEBUG() &&
2422 !i915_request_completed(*execlists->active) &&
2423 !reset_in_progress(execlists)) {
2424 struct i915_request *rq __maybe_unused =
2425 *execlists->active;
2426 const u32 *regs __maybe_unused =
2427 rq->context->lrc_reg_state;
2428
2429 ENGINE_TRACE(engine,
2430 "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2431 ENGINE_READ(engine, RING_START),
2432 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2433 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2434 ENGINE_READ(engine, RING_CTL),
2435 ENGINE_READ(engine, RING_MI_MODE));
2436 ENGINE_TRACE(engine,
2437 "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2438 i915_ggtt_offset(rq->ring->vma),
2439 rq->head, rq->tail,
2440 rq->fence.context,
2441 lower_32_bits(rq->fence.seqno),
2442 hwsp_seqno(rq));
2443 ENGINE_TRACE(engine,
2444 "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2445 regs[CTX_RING_START],
2446 regs[CTX_RING_HEAD],
2447 regs[CTX_RING_TAIL]);
2448
2449 GEM_BUG_ON("context completed before request");
2450 }
2451
2452 execlists_schedule_out(*execlists->active++);
2453
2454 GEM_BUG_ON(execlists->active - execlists->inflight >
2455 execlists_num_ports(execlists));
2456 }
2457 } while (head != tail);
2458
2459 execlists->csb_head = head;
2460 set_timeslice(engine);
2461
2462 /*
2463 * Gen11 has proven to fail wrt global observation point between
2464 * entry and tail update, failing on the ordering and thus
2465 * we see an old entry in the context status buffer.
2466 *
2467 * Forcibly evict out entries for the next gpu csb update,
2468 * to increase the odds that we get a fresh entries with non
2469 * working hardware. The cost for doing so comes out mostly with
2470 * the wash as hardware, working or not, will need to do the
2471 * invalidation before.
2472 */
2473 invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2474 }
2475
2476 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2477 {
2478 lockdep_assert_held(&engine->active.lock);
2479 if (!READ_ONCE(engine->execlists.pending[0])) {
2480 rcu_read_lock(); /* protect peeking at execlists->active */
2481 execlists_dequeue(engine);
2482 rcu_read_unlock();
2483 }
2484 }
2485
2486 static void __execlists_hold(struct i915_request *rq)
2487 {
2488 LIST_HEAD(list);
2489
2490 do {
2491 struct i915_dependency *p;
2492
2493 if (i915_request_is_active(rq))
2494 __i915_request_unsubmit(rq);
2495
2496 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2497 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2498 i915_request_set_hold(rq);
2499 RQ_TRACE(rq, "on hold\n");
2500
2501 for_each_waiter(p, rq) {
2502 struct i915_request *w =
2503 container_of(p->waiter, typeof(*w), sched);
2504
2505 /* Leave semaphores spinning on the other engines */
2506 if (w->engine != rq->engine)
2507 continue;
2508
2509 if (!i915_request_is_ready(w))
2510 continue;
2511
2512 if (i915_request_completed(w))
2513 continue;
2514
2515 if (i915_request_on_hold(w))
2516 continue;
2517
2518 list_move_tail(&w->sched.link, &list);
2519 }
2520
2521 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2522 } while (rq);
2523 }
2524
2525 static bool execlists_hold(struct intel_engine_cs *engine,
2526 struct i915_request *rq)
2527 {
2528 spin_lock_irq(&engine->active.lock);
2529
2530 if (i915_request_completed(rq)) { /* too late! */
2531 rq = NULL;
2532 goto unlock;
2533 }
2534
2535 if (rq->engine != engine) { /* preempted virtual engine */
2536 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2537
2538 /*
2539 * intel_context_inflight() is only protected by virtue
2540 * of process_csb() being called only by the tasklet (or
2541 * directly from inside reset while the tasklet is suspended).
2542 * Assert that neither of those are allowed to run while we
2543 * poke at the request queues.
2544 */
2545 GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2546
2547 /*
2548 * An unsubmitted request along a virtual engine will
2549 * remain on the active (this) engine until we are able
2550 * to process the context switch away (and so mark the
2551 * context as no longer in flight). That cannot have happened
2552 * yet, otherwise we would not be hanging!
2553 */
2554 spin_lock(&ve->base.active.lock);
2555 GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2556 GEM_BUG_ON(ve->request != rq);
2557 ve->request = NULL;
2558 spin_unlock(&ve->base.active.lock);
2559 i915_request_put(rq);
2560
2561 rq->engine = engine;
2562 }
2563
2564 /*
2565 * Transfer this request onto the hold queue to prevent it
2566 * being resumbitted to HW (and potentially completed) before we have
2567 * released it. Since we may have already submitted following
2568 * requests, we need to remove those as well.
2569 */
2570 GEM_BUG_ON(i915_request_on_hold(rq));
2571 GEM_BUG_ON(rq->engine != engine);
2572 __execlists_hold(rq);
2573 GEM_BUG_ON(list_empty(&engine->active.hold));
2574
2575 unlock:
2576 spin_unlock_irq(&engine->active.lock);
2577 return rq;
2578 }
2579
2580 static bool hold_request(const struct i915_request *rq)
2581 {
2582 struct i915_dependency *p;
2583 bool result = false;
2584
2585 /*
2586 * If one of our ancestors is on hold, we must also be on hold,
2587 * otherwise we will bypass it and execute before it.
2588 */
2589 rcu_read_lock();
2590 for_each_signaler(p, rq) {
2591 const struct i915_request *s =
2592 container_of(p->signaler, typeof(*s), sched);
2593
2594 if (s->engine != rq->engine)
2595 continue;
2596
2597 result = i915_request_on_hold(s);
2598 if (result)
2599 break;
2600 }
2601 rcu_read_unlock();
2602
2603 return result;
2604 }
2605
2606 static void __execlists_unhold(struct i915_request *rq)
2607 {
2608 LIST_HEAD(list);
2609
2610 do {
2611 struct i915_dependency *p;
2612
2613 RQ_TRACE(rq, "hold release\n");
2614
2615 GEM_BUG_ON(!i915_request_on_hold(rq));
2616 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2617
2618 i915_request_clear_hold(rq);
2619 list_move_tail(&rq->sched.link,
2620 i915_sched_lookup_priolist(rq->engine,
2621 rq_prio(rq)));
2622 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2623
2624 /* Also release any children on this engine that are ready */
2625 for_each_waiter(p, rq) {
2626 struct i915_request *w =
2627 container_of(p->waiter, typeof(*w), sched);
2628
2629 /* Propagate any change in error status */
2630 if (rq->fence.error)
2631 i915_request_set_error_once(w, rq->fence.error);
2632
2633 if (w->engine != rq->engine)
2634 continue;
2635
2636 if (!i915_request_on_hold(w))
2637 continue;
2638
2639 /* Check that no other parents are also on hold */
2640 if (hold_request(w))
2641 continue;
2642
2643 list_move_tail(&w->sched.link, &list);
2644 }
2645
2646 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2647 } while (rq);
2648 }
2649
2650 static void execlists_unhold(struct intel_engine_cs *engine,
2651 struct i915_request *rq)
2652 {
2653 spin_lock_irq(&engine->active.lock);
2654
2655 /*
2656 * Move this request back to the priority queue, and all of its
2657 * children and grandchildren that were suspended along with it.
2658 */
2659 __execlists_unhold(rq);
2660
2661 if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2662 engine->execlists.queue_priority_hint = rq_prio(rq);
2663 tasklet_hi_schedule(&engine->execlists.tasklet);
2664 }
2665
2666 spin_unlock_irq(&engine->active.lock);
2667 }
2668
2669 struct execlists_capture {
2670 struct work_struct work;
2671 struct i915_request *rq;
2672 struct i915_gpu_coredump *error;
2673 };
2674
2675 static void execlists_capture_work(struct work_struct *work)
2676 {
2677 struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2678 const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2679 struct intel_engine_cs *engine = cap->rq->engine;
2680 struct intel_gt_coredump *gt = cap->error->gt;
2681 struct intel_engine_capture_vma *vma;
2682
2683 /* Compress all the objects attached to the request, slow! */
2684 vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2685 if (vma) {
2686 struct i915_vma_compress *compress =
2687 i915_vma_capture_prepare(gt);
2688
2689 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2690 i915_vma_capture_finish(gt, compress);
2691 }
2692
2693 gt->simulated = gt->engine->simulated;
2694 cap->error->simulated = gt->simulated;
2695
2696 /* Publish the error state, and announce it to the world */
2697 i915_error_state_store(cap->error);
2698 i915_gpu_coredump_put(cap->error);
2699
2700 /* Return this request and all that depend upon it for signaling */
2701 execlists_unhold(engine, cap->rq);
2702 i915_request_put(cap->rq);
2703
2704 kfree(cap);
2705 }
2706
2707 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2708 {
2709 const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2710 struct execlists_capture *cap;
2711
2712 cap = kmalloc(sizeof(*cap), gfp);
2713 if (!cap)
2714 return NULL;
2715
2716 cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2717 if (!cap->error)
2718 goto err_cap;
2719
2720 cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2721 if (!cap->error->gt)
2722 goto err_gpu;
2723
2724 cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2725 if (!cap->error->gt->engine)
2726 goto err_gt;
2727
2728 return cap;
2729
2730 err_gt:
2731 kfree(cap->error->gt);
2732 err_gpu:
2733 kfree(cap->error);
2734 err_cap:
2735 kfree(cap);
2736 return NULL;
2737 }
2738
2739 static bool execlists_capture(struct intel_engine_cs *engine)
2740 {
2741 struct execlists_capture *cap;
2742
2743 if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
2744 return true;
2745
2746 /*
2747 * We need to _quickly_ capture the engine state before we reset.
2748 * We are inside an atomic section (softirq) here and we are delaying
2749 * the forced preemption event.
2750 */
2751 cap = capture_regs(engine);
2752 if (!cap)
2753 return true;
2754
2755 spin_lock_irq(&engine->active.lock);
2756 cap->rq = execlists_active(&engine->execlists);
2757 if (cap->rq) {
2758 cap->rq = active_request(cap->rq->context->timeline, cap->rq);
2759 cap->rq = i915_request_get_rcu(cap->rq);
2760 }
2761 spin_unlock_irq(&engine->active.lock);
2762 if (!cap->rq)
2763 goto err_free;
2764
2765 /*
2766 * Remove the request from the execlists queue, and take ownership
2767 * of the request. We pass it to our worker who will _slowly_ compress
2768 * all the pages the _user_ requested for debugging their batch, after
2769 * which we return it to the queue for signaling.
2770 *
2771 * By removing them from the execlists queue, we also remove the
2772 * requests from being processed by __unwind_incomplete_requests()
2773 * during the intel_engine_reset(), and so they will *not* be replayed
2774 * afterwards.
2775 *
2776 * Note that because we have not yet reset the engine at this point,
2777 * it is possible for the request that we have identified as being
2778 * guilty, did in fact complete and we will then hit an arbitration
2779 * point allowing the outstanding preemption to succeed. The likelihood
2780 * of that is very low (as capturing of the engine registers should be
2781 * fast enough to run inside an irq-off atomic section!), so we will
2782 * simply hold that request accountable for being non-preemptible
2783 * long enough to force the reset.
2784 */
2785 if (!execlists_hold(engine, cap->rq))
2786 goto err_rq;
2787
2788 INIT_WORK(&cap->work, execlists_capture_work);
2789 schedule_work(&cap->work);
2790 return true;
2791
2792 err_rq:
2793 i915_request_put(cap->rq);
2794 err_free:
2795 i915_gpu_coredump_put(cap->error);
2796 kfree(cap);
2797 return false;
2798 }
2799
2800 static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
2801 {
2802 const unsigned int bit = I915_RESET_ENGINE + engine->id;
2803 unsigned long *lock = &engine->gt->reset.flags;
2804
2805 if (!intel_has_reset_engine(engine->gt))
2806 return;
2807
2808 if (test_and_set_bit(bit, lock))
2809 return;
2810
2811 ENGINE_TRACE(engine, "reset for %s\n", msg);
2812
2813 /* Mark this tasklet as disabled to avoid waiting for it to complete */
2814 tasklet_disable_nosync(&engine->execlists.tasklet);
2815
2816 ring_set_paused(engine, 1); /* Freeze the current request in place */
2817 if (execlists_capture(engine))
2818 intel_engine_reset(engine, msg);
2819 else
2820 ring_set_paused(engine, 0);
2821
2822 tasklet_enable(&engine->execlists.tasklet);
2823 clear_and_wake_up_bit(bit, lock);
2824 }
2825
2826 static bool preempt_timeout(const struct intel_engine_cs *const engine)
2827 {
2828 const struct timer_list *t = &engine->execlists.preempt;
2829
2830 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2831 return false;
2832
2833 if (!timer_expired(t))
2834 return false;
2835
2836 return READ_ONCE(engine->execlists.pending[0]);
2837 }
2838
2839 /*
2840 * Check the unread Context Status Buffers and manage the submission of new
2841 * contexts to the ELSP accordingly.
2842 */
2843 static void execlists_submission_tasklet(unsigned long data)
2844 {
2845 struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
2846 bool timeout = preempt_timeout(engine);
2847
2848 process_csb(engine);
2849
2850 if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
2851 engine->execlists.error_interrupt = 0;
2852 if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */
2853 execlists_reset(engine, "CS error");
2854 }
2855
2856 if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
2857 unsigned long flags;
2858
2859 spin_lock_irqsave(&engine->active.lock, flags);
2860 __execlists_submission_tasklet(engine);
2861 spin_unlock_irqrestore(&engine->active.lock, flags);
2862
2863 /* Recheck after serialising with direct-submission */
2864 if (unlikely(timeout && preempt_timeout(engine)))
2865 execlists_reset(engine, "preemption time out");
2866 }
2867 }
2868
2869 static void __execlists_kick(struct intel_engine_execlists *execlists)
2870 {
2871 /* Kick the tasklet for some interrupt coalescing and reset handling */
2872 tasklet_hi_schedule(&execlists->tasklet);
2873 }
2874
2875 #define execlists_kick(t, member) \
2876 __execlists_kick(container_of(t, struct intel_engine_execlists, member))
2877
2878 static void execlists_timeslice(struct timer_list *timer)
2879 {
2880 execlists_kick(timer, timer);
2881 }
2882
2883 static void execlists_preempt(struct timer_list *timer)
2884 {
2885 execlists_kick(timer, preempt);
2886 }
2887
2888 static void queue_request(struct intel_engine_cs *engine,
2889 struct i915_request *rq)
2890 {
2891 GEM_BUG_ON(!list_empty(&rq->sched.link));
2892 list_add_tail(&rq->sched.link,
2893 i915_sched_lookup_priolist(engine, rq_prio(rq)));
2894 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2895 }
2896
2897 static void __submit_queue_imm(struct intel_engine_cs *engine)
2898 {
2899 struct intel_engine_execlists * const execlists = &engine->execlists;
2900
2901 if (reset_in_progress(execlists))
2902 return; /* defer until we restart the engine following reset */
2903
2904 if (execlists->tasklet.func == execlists_submission_tasklet)
2905 __execlists_submission_tasklet(engine);
2906 else
2907 tasklet_hi_schedule(&execlists->tasklet);
2908 }
2909
2910 static void submit_queue(struct intel_engine_cs *engine,
2911 const struct i915_request *rq)
2912 {
2913 struct intel_engine_execlists *execlists = &engine->execlists;
2914
2915 if (rq_prio(rq) <= execlists->queue_priority_hint)
2916 return;
2917
2918 execlists->queue_priority_hint = rq_prio(rq);
2919 __submit_queue_imm(engine);
2920 }
2921
2922 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
2923 const struct i915_request *rq)
2924 {
2925 GEM_BUG_ON(i915_request_on_hold(rq));
2926 return !list_empty(&engine->active.hold) && hold_request(rq);
2927 }
2928
2929 static void execlists_submit_request(struct i915_request *request)
2930 {
2931 struct intel_engine_cs *engine = request->engine;
2932 unsigned long flags;
2933
2934 /* Will be called from irq-context when using foreign fences. */
2935 spin_lock_irqsave(&engine->active.lock, flags);
2936
2937 if (unlikely(ancestor_on_hold(engine, request))) {
2938 RQ_TRACE(request, "ancestor on hold\n");
2939 list_add_tail(&request->sched.link, &engine->active.hold);
2940 i915_request_set_hold(request);
2941 } else {
2942 queue_request(engine, request);
2943
2944 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
2945 GEM_BUG_ON(list_empty(&request->sched.link));
2946
2947 submit_queue(engine, request);
2948 }
2949
2950 spin_unlock_irqrestore(&engine->active.lock, flags);
2951 }
2952
2953 static void __execlists_context_fini(struct intel_context *ce)
2954 {
2955 intel_ring_put(ce->ring);
2956 i915_vma_put(ce->state);
2957 }
2958
2959 static void execlists_context_destroy(struct kref *kref)
2960 {
2961 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2962
2963 GEM_BUG_ON(!i915_active_is_idle(&ce->active));
2964 GEM_BUG_ON(intel_context_is_pinned(ce));
2965
2966 if (ce->state)
2967 __execlists_context_fini(ce);
2968
2969 intel_context_fini(ce);
2970 intel_context_free(ce);
2971 }
2972
2973 static void
2974 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
2975 {
2976 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2977 return;
2978
2979 vaddr += engine->context_size;
2980
2981 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
2982 }
2983
2984 static void
2985 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
2986 {
2987 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2988 return;
2989
2990 vaddr += engine->context_size;
2991
2992 if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
2993 dev_err_once(engine->i915->drm.dev,
2994 "%s context redzone overwritten!\n",
2995 engine->name);
2996 }
2997
2998 static void execlists_context_unpin(struct intel_context *ce)
2999 {
3000 check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
3001 ce->engine);
3002
3003 i915_gem_object_unpin_map(ce->state->obj);
3004 }
3005
3006 static void
3007 __execlists_update_reg_state(const struct intel_context *ce,
3008 const struct intel_engine_cs *engine,
3009 u32 head)
3010 {
3011 struct intel_ring *ring = ce->ring;
3012 u32 *regs = ce->lrc_reg_state;
3013
3014 GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3015 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3016
3017 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3018 regs[CTX_RING_HEAD] = head;
3019 regs[CTX_RING_TAIL] = ring->tail;
3020 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3021
3022 /* RPCS */
3023 if (engine->class == RENDER_CLASS) {
3024 regs[CTX_R_PWR_CLK_STATE] =
3025 intel_sseu_make_rpcs(engine->i915, &ce->sseu);
3026
3027 i915_oa_init_reg_state(ce, engine);
3028 }
3029 }
3030
3031 static int
3032 __execlists_context_pin(struct intel_context *ce,
3033 struct intel_engine_cs *engine)
3034 {
3035 void *vaddr;
3036
3037 GEM_BUG_ON(!ce->state);
3038 GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3039
3040 vaddr = i915_gem_object_pin_map(ce->state->obj,
3041 i915_coherent_map_type(engine->i915) |
3042 I915_MAP_OVERRIDE);
3043 if (IS_ERR(vaddr))
3044 return PTR_ERR(vaddr);
3045
3046 ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3047 ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
3048 __execlists_update_reg_state(ce, engine, ce->ring->tail);
3049
3050 return 0;
3051 }
3052
3053 static int execlists_context_pin(struct intel_context *ce)
3054 {
3055 return __execlists_context_pin(ce, ce->engine);
3056 }
3057
3058 static int execlists_context_alloc(struct intel_context *ce)
3059 {
3060 return __execlists_context_alloc(ce, ce->engine);
3061 }
3062
3063 static void execlists_context_reset(struct intel_context *ce)
3064 {
3065 CE_TRACE(ce, "reset\n");
3066 GEM_BUG_ON(!intel_context_is_pinned(ce));
3067
3068 intel_ring_reset(ce->ring, ce->ring->emit);
3069
3070 /* Scrub away the garbage */
3071 execlists_init_reg_state(ce->lrc_reg_state,
3072 ce, ce->engine, ce->ring, true);
3073 __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3074
3075 ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
3076 }
3077
3078 static const struct intel_context_ops execlists_context_ops = {
3079 .alloc = execlists_context_alloc,
3080
3081 .pin = execlists_context_pin,
3082 .unpin = execlists_context_unpin,
3083
3084 .enter = intel_context_enter_engine,
3085 .exit = intel_context_exit_engine,
3086
3087 .reset = execlists_context_reset,
3088 .destroy = execlists_context_destroy,
3089 };
3090
3091 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3092 {
3093 u32 *cs;
3094
3095 if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3096 return 0;
3097
3098 cs = intel_ring_begin(rq, 6);
3099 if (IS_ERR(cs))
3100 return PTR_ERR(cs);
3101
3102 /*
3103 * Check if we have been preempted before we even get started.
3104 *
3105 * After this point i915_request_started() reports true, even if
3106 * we get preempted and so are no longer running.
3107 */
3108 *cs++ = MI_ARB_CHECK;
3109 *cs++ = MI_NOOP;
3110
3111 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3112 *cs++ = i915_request_timeline(rq)->hwsp_offset;
3113 *cs++ = 0;
3114 *cs++ = rq->fence.seqno - 1;
3115
3116 intel_ring_advance(rq, cs);
3117
3118 /* Record the updated position of the request's payload */
3119 rq->infix = intel_ring_offset(rq, cs);
3120
3121 return 0;
3122 }
3123
3124 static int execlists_request_alloc(struct i915_request *request)
3125 {
3126 int ret;
3127
3128 GEM_BUG_ON(!intel_context_is_pinned(request->context));
3129
3130 /*
3131 * Flush enough space to reduce the likelihood of waiting after
3132 * we start building the request - in which case we will just
3133 * have to repeat work.
3134 */
3135 request->reserved_space += EXECLISTS_REQUEST_SIZE;
3136
3137 /*
3138 * Note that after this point, we have committed to using
3139 * this request as it is being used to both track the
3140 * state of engine initialisation and liveness of the
3141 * golden renderstate above. Think twice before you try
3142 * to cancel/unwind this request now.
3143 */
3144
3145 /* Unconditionally invalidate GPU caches and TLBs. */
3146 ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3147 if (ret)
3148 return ret;
3149
3150 request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3151 return 0;
3152 }
3153
3154 /*
3155 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3156 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3157 * but there is a slight complication as this is applied in WA batch where the
3158 * values are only initialized once so we cannot take register value at the
3159 * beginning and reuse it further; hence we save its value to memory, upload a
3160 * constant value with bit21 set and then we restore it back with the saved value.
3161 * To simplify the WA, a constant value is formed by using the default value
3162 * of this register. This shouldn't be a problem because we are only modifying
3163 * it for a short period and this batch in non-premptible. We can ofcourse
3164 * use additional instructions that read the actual value of the register
3165 * at that time and set our bit of interest but it makes the WA complicated.
3166 *
3167 * This WA is also required for Gen9 so extracting as a function avoids
3168 * code duplication.
3169 */
3170 static u32 *
3171 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3172 {
3173 /* NB no one else is allowed to scribble over scratch + 256! */
3174 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3175 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3176 *batch++ = intel_gt_scratch_offset(engine->gt,
3177 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3178 *batch++ = 0;
3179
3180 *batch++ = MI_LOAD_REGISTER_IMM(1);
3181 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3182 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3183
3184 batch = gen8_emit_pipe_control(batch,
3185 PIPE_CONTROL_CS_STALL |
3186 PIPE_CONTROL_DC_FLUSH_ENABLE,
3187 0);
3188
3189 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3190 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3191 *batch++ = intel_gt_scratch_offset(engine->gt,
3192 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3193 *batch++ = 0;
3194
3195 return batch;
3196 }
3197
3198 /*
3199 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3200 * initialized at the beginning and shared across all contexts but this field
3201 * helps us to have multiple batches at different offsets and select them based
3202 * on a criteria. At the moment this batch always start at the beginning of the page
3203 * and at this point we don't have multiple wa_ctx batch buffers.
3204 *
3205 * The number of WA applied are not known at the beginning; we use this field
3206 * to return the no of DWORDS written.
3207 *
3208 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3209 * so it adds NOOPs as padding to make it cacheline aligned.
3210 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3211 * makes a complete batch buffer.
3212 */
3213 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3214 {
3215 /* WaDisableCtxRestoreArbitration:bdw,chv */
3216 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3217
3218 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3219 if (IS_BROADWELL(engine->i915))
3220 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3221
3222 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3223 /* Actual scratch location is at 128 bytes offset */
3224 batch = gen8_emit_pipe_control(batch,
3225 PIPE_CONTROL_FLUSH_L3 |
3226 PIPE_CONTROL_STORE_DATA_INDEX |
3227 PIPE_CONTROL_CS_STALL |
3228 PIPE_CONTROL_QW_WRITE,
3229 LRC_PPHWSP_SCRATCH_ADDR);
3230
3231 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3232
3233 /* Pad to end of cacheline */
3234 while ((unsigned long)batch % CACHELINE_BYTES)
3235 *batch++ = MI_NOOP;
3236
3237 /*
3238 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3239 * execution depends on the length specified in terms of cache lines
3240 * in the register CTX_RCS_INDIRECT_CTX
3241 */
3242
3243 return batch;
3244 }
3245
3246 struct lri {
3247 i915_reg_t reg;
3248 u32 value;
3249 };
3250
3251 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3252 {
3253 GEM_BUG_ON(!count || count > 63);
3254
3255 *batch++ = MI_LOAD_REGISTER_IMM(count);
3256 do {
3257 *batch++ = i915_mmio_reg_offset(lri->reg);
3258 *batch++ = lri->value;
3259 } while (lri++, --count);
3260 *batch++ = MI_NOOP;
3261
3262 return batch;
3263 }
3264
3265 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3266 {
3267 static const struct lri lri[] = {
3268 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3269 {
3270 COMMON_SLICE_CHICKEN2,
3271 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3272 0),
3273 },
3274
3275 /* BSpec: 11391 */
3276 {
3277 FF_SLICE_CHICKEN,
3278 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3279 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3280 },
3281
3282 /* BSpec: 11299 */
3283 {
3284 _3D_CHICKEN3,
3285 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3286 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3287 }
3288 };
3289
3290 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3291
3292 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3293 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3294
3295 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3296 batch = gen8_emit_pipe_control(batch,
3297 PIPE_CONTROL_FLUSH_L3 |
3298 PIPE_CONTROL_STORE_DATA_INDEX |
3299 PIPE_CONTROL_CS_STALL |
3300 PIPE_CONTROL_QW_WRITE,
3301 LRC_PPHWSP_SCRATCH_ADDR);
3302
3303 batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3304
3305 /* WaMediaPoolStateCmdInWABB:bxt,glk */
3306 if (HAS_POOLED_EU(engine->i915)) {
3307 /*
3308 * EU pool configuration is setup along with golden context
3309 * during context initialization. This value depends on
3310 * device type (2x6 or 3x6) and needs to be updated based
3311 * on which subslice is disabled especially for 2x6
3312 * devices, however it is safe to load default
3313 * configuration of 3x6 device instead of masking off
3314 * corresponding bits because HW ignores bits of a disabled
3315 * subslice and drops down to appropriate config. Please
3316 * see render_state_setup() in i915_gem_render_state.c for
3317 * possible configurations, to avoid duplication they are
3318 * not shown here again.
3319 */
3320 *batch++ = GEN9_MEDIA_POOL_STATE;
3321 *batch++ = GEN9_MEDIA_POOL_ENABLE;
3322 *batch++ = 0x00777000;
3323 *batch++ = 0;
3324 *batch++ = 0;
3325 *batch++ = 0;
3326 }
3327
3328 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3329
3330 /* Pad to end of cacheline */
3331 while ((unsigned long)batch % CACHELINE_BYTES)
3332 *batch++ = MI_NOOP;
3333
3334 return batch;
3335 }
3336
3337 static u32 *
3338 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3339 {
3340 int i;
3341
3342 /*
3343 * WaPipeControlBefore3DStateSamplePattern: cnl
3344 *
3345 * Ensure the engine is idle prior to programming a
3346 * 3DSTATE_SAMPLE_PATTERN during a context restore.
3347 */
3348 batch = gen8_emit_pipe_control(batch,
3349 PIPE_CONTROL_CS_STALL,
3350 0);
3351 /*
3352 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3353 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3354 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3355 * confusing. Since gen8_emit_pipe_control() already advances the
3356 * batch by 6 dwords, we advance the other 10 here, completing a
3357 * cacheline. It's not clear if the workaround requires this padding
3358 * before other commands, or if it's just the regular padding we would
3359 * already have for the workaround bb, so leave it here for now.
3360 */
3361 for (i = 0; i < 10; i++)
3362 *batch++ = MI_NOOP;
3363
3364 /* Pad to end of cacheline */
3365 while ((unsigned long)batch % CACHELINE_BYTES)
3366 *batch++ = MI_NOOP;
3367
3368 return batch;
3369 }
3370
3371 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3372
3373 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3374 {
3375 struct drm_i915_gem_object *obj;
3376 struct i915_vma *vma;
3377 int err;
3378
3379 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3380 if (IS_ERR(obj))
3381 return PTR_ERR(obj);
3382
3383 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3384 if (IS_ERR(vma)) {
3385 err = PTR_ERR(vma);
3386 goto err;
3387 }
3388
3389 err = i915_ggtt_pin(vma, 0, PIN_HIGH);
3390 if (err)
3391 goto err;
3392
3393 engine->wa_ctx.vma = vma;
3394 return 0;
3395
3396 err:
3397 i915_gem_object_put(obj);
3398 return err;
3399 }
3400
3401 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3402 {
3403 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3404 }
3405
3406 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3407
3408 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3409 {
3410 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3411 struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3412 &wa_ctx->per_ctx };
3413 wa_bb_func_t wa_bb_fn[2];
3414 struct page *page;
3415 void *batch, *batch_ptr;
3416 unsigned int i;
3417 int ret;
3418
3419 if (engine->class != RENDER_CLASS)
3420 return 0;
3421
3422 switch (INTEL_GEN(engine->i915)) {
3423 case 12:
3424 case 11:
3425 return 0;
3426 case 10:
3427 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3428 wa_bb_fn[1] = NULL;
3429 break;
3430 case 9:
3431 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3432 wa_bb_fn[1] = NULL;
3433 break;
3434 case 8:
3435 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3436 wa_bb_fn[1] = NULL;
3437 break;
3438 default:
3439 MISSING_CASE(INTEL_GEN(engine->i915));
3440 return 0;
3441 }
3442
3443 ret = lrc_setup_wa_ctx(engine);
3444 if (ret) {
3445 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
3446 return ret;
3447 }
3448
3449 page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
3450 batch = batch_ptr = kmap_atomic(page);
3451
3452 /*
3453 * Emit the two workaround batch buffers, recording the offset from the
3454 * start of the workaround batch buffer object for each and their
3455 * respective sizes.
3456 */
3457 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3458 wa_bb[i]->offset = batch_ptr - batch;
3459 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3460 CACHELINE_BYTES))) {
3461 ret = -EINVAL;
3462 break;
3463 }
3464 if (wa_bb_fn[i])
3465 batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3466 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3467 }
3468
3469 BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3470
3471 kunmap_atomic(batch);
3472 if (ret)
3473 lrc_destroy_wa_ctx(engine);
3474
3475 return ret;
3476 }
3477
3478 static void enable_error_interrupt(struct intel_engine_cs *engine)
3479 {
3480 u32 status;
3481
3482 engine->execlists.error_interrupt = 0;
3483 ENGINE_WRITE(engine, RING_EMR, ~0u);
3484 ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
3485
3486 status = ENGINE_READ(engine, RING_ESR);
3487 if (unlikely(status)) {
3488 dev_err(engine->i915->drm.dev,
3489 "engine '%s' resumed still in error: %08x\n",
3490 engine->name, status);
3491 __intel_gt_reset(engine->gt, engine->mask);
3492 }
3493
3494 /*
3495 * On current gen8+, we have 2 signals to play with
3496 *
3497 * - I915_ERROR_INSTUCTION (bit 0)
3498 *
3499 * Generate an error if the command parser encounters an invalid
3500 * instruction
3501 *
3502 * This is a fatal error.
3503 *
3504 * - CP_PRIV (bit 2)
3505 *
3506 * Generate an error on privilege violation (where the CP replaces
3507 * the instruction with a no-op). This also fires for writes into
3508 * read-only scratch pages.
3509 *
3510 * This is a non-fatal error, parsing continues.
3511 *
3512 * * there are a few others defined for odd HW that we do not use
3513 *
3514 * Since CP_PRIV fires for cases where we have chosen to ignore the
3515 * error (as the HW is validating and suppressing the mistakes), we
3516 * only unmask the instruction error bit.
3517 */
3518 ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
3519 }
3520
3521 static void enable_execlists(struct intel_engine_cs *engine)
3522 {
3523 u32 mode;
3524
3525 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3526
3527 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3528
3529 if (INTEL_GEN(engine->i915) >= 11)
3530 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
3531 else
3532 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
3533 ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
3534
3535 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3536
3537 ENGINE_WRITE_FW(engine,
3538 RING_HWS_PGA,
3539 i915_ggtt_offset(engine->status_page.vma));
3540 ENGINE_POSTING_READ(engine, RING_HWS_PGA);
3541
3542 enable_error_interrupt(engine);
3543
3544 engine->context_tag = 0;
3545 }
3546
3547 static bool unexpected_starting_state(struct intel_engine_cs *engine)
3548 {
3549 bool unexpected = false;
3550
3551 if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
3552 DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
3553 unexpected = true;
3554 }
3555
3556 return unexpected;
3557 }
3558
3559 static int execlists_resume(struct intel_engine_cs *engine)
3560 {
3561 intel_mocs_init_engine(engine);
3562
3563 intel_engine_reset_breadcrumbs(engine);
3564
3565 if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
3566 struct drm_printer p = drm_debug_printer(__func__);
3567
3568 intel_engine_dump(engine, &p, NULL);
3569 }
3570
3571 enable_execlists(engine);
3572
3573 return 0;
3574 }
3575
3576 static void execlists_reset_prepare(struct intel_engine_cs *engine)
3577 {
3578 struct intel_engine_execlists * const execlists = &engine->execlists;
3579 unsigned long flags;
3580
3581 ENGINE_TRACE(engine, "depth<-%d\n",
3582 atomic_read(&execlists->tasklet.count));
3583
3584 /*
3585 * Prevent request submission to the hardware until we have
3586 * completed the reset in i915_gem_reset_finish(). If a request
3587 * is completed by one engine, it may then queue a request
3588 * to a second via its execlists->tasklet *just* as we are
3589 * calling engine->resume() and also writing the ELSP.
3590 * Turning off the execlists->tasklet until the reset is over
3591 * prevents the race.
3592 */
3593 __tasklet_disable_sync_once(&execlists->tasklet);
3594 GEM_BUG_ON(!reset_in_progress(execlists));
3595
3596 /* And flush any current direct submission. */
3597 spin_lock_irqsave(&engine->active.lock, flags);
3598 spin_unlock_irqrestore(&engine->active.lock, flags);
3599
3600 /*
3601 * We stop engines, otherwise we might get failed reset and a
3602 * dead gpu (on elk). Also as modern gpu as kbl can suffer
3603 * from system hang if batchbuffer is progressing when
3604 * the reset is issued, regardless of READY_TO_RESET ack.
3605 * Thus assume it is best to stop engines on all gens
3606 * where we have a gpu reset.
3607 *
3608 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
3609 *
3610 * FIXME: Wa for more modern gens needs to be validated
3611 */
3612 intel_engine_stop_cs(engine);
3613 }
3614
3615 static void reset_csb_pointers(struct intel_engine_cs *engine)
3616 {
3617 struct intel_engine_execlists * const execlists = &engine->execlists;
3618 const unsigned int reset_value = execlists->csb_size - 1;
3619
3620 ring_set_paused(engine, 0);
3621
3622 /*
3623 * After a reset, the HW starts writing into CSB entry [0]. We
3624 * therefore have to set our HEAD pointer back one entry so that
3625 * the *first* entry we check is entry 0. To complicate this further,
3626 * as we don't wait for the first interrupt after reset, we have to
3627 * fake the HW write to point back to the last entry so that our
3628 * inline comparison of our cached head position against the last HW
3629 * write works even before the first interrupt.
3630 */
3631 execlists->csb_head = reset_value;
3632 WRITE_ONCE(*execlists->csb_write, reset_value);
3633 wmb(); /* Make sure this is visible to HW (paranoia?) */
3634
3635 /*
3636 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3637 * Bludgeon them with a mmio update to be sure.
3638 */
3639 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3640 reset_value << 8 | reset_value);
3641 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3642
3643 invalidate_csb_entries(&execlists->csb_status[0],
3644 &execlists->csb_status[reset_value]);
3645 }
3646
3647 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
3648 {
3649 int x;
3650
3651 x = lrc_ring_mi_mode(engine);
3652 if (x != -1) {
3653 regs[x + 1] &= ~STOP_RING;
3654 regs[x + 1] |= STOP_RING << 16;
3655 }
3656 }
3657
3658 static void __execlists_reset_reg_state(const struct intel_context *ce,
3659 const struct intel_engine_cs *engine)
3660 {
3661 u32 *regs = ce->lrc_reg_state;
3662
3663 __reset_stop_ring(regs, engine);
3664 }
3665
3666 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
3667 {
3668 struct intel_engine_execlists * const execlists = &engine->execlists;
3669 struct intel_context *ce;
3670 struct i915_request *rq;
3671 u32 head;
3672
3673 mb(); /* paranoia: read the CSB pointers from after the reset */
3674 clflush(execlists->csb_write);
3675 mb();
3676
3677 process_csb(engine); /* drain preemption events */
3678
3679 /* Following the reset, we need to reload the CSB read/write pointers */
3680 reset_csb_pointers(engine);
3681
3682 /*
3683 * Save the currently executing context, even if we completed
3684 * its request, it was still running at the time of the
3685 * reset and will have been clobbered.
3686 */
3687 rq = execlists_active(execlists);
3688 if (!rq)
3689 goto unwind;
3690
3691 ce = rq->context;
3692 GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3693
3694 if (i915_request_completed(rq)) {
3695 /* Idle context; tidy up the ring so we can restart afresh */
3696 head = intel_ring_wrap(ce->ring, rq->tail);
3697 goto out_replay;
3698 }
3699
3700 /* We still have requests in-flight; the engine should be active */
3701 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
3702
3703 /* Context has requests still in-flight; it should not be idle! */
3704 GEM_BUG_ON(i915_active_is_idle(&ce->active));
3705
3706 rq = active_request(ce->timeline, rq);
3707 head = intel_ring_wrap(ce->ring, rq->head);
3708 GEM_BUG_ON(head == ce->ring->tail);
3709
3710 /*
3711 * If this request hasn't started yet, e.g. it is waiting on a
3712 * semaphore, we need to avoid skipping the request or else we
3713 * break the signaling chain. However, if the context is corrupt
3714 * the request will not restart and we will be stuck with a wedged
3715 * device. It is quite often the case that if we issue a reset
3716 * while the GPU is loading the context image, that the context
3717 * image becomes corrupt.
3718 *
3719 * Otherwise, if we have not started yet, the request should replay
3720 * perfectly and we do not need to flag the result as being erroneous.
3721 */
3722 if (!i915_request_started(rq))
3723 goto out_replay;
3724
3725 /*
3726 * If the request was innocent, we leave the request in the ELSP
3727 * and will try to replay it on restarting. The context image may
3728 * have been corrupted by the reset, in which case we may have
3729 * to service a new GPU hang, but more likely we can continue on
3730 * without impact.
3731 *
3732 * If the request was guilty, we presume the context is corrupt
3733 * and have to at least restore the RING register in the context
3734 * image back to the expected values to skip over the guilty request.
3735 */
3736 __i915_request_reset(rq, stalled);
3737 if (!stalled)
3738 goto out_replay;
3739
3740 /*
3741 * We want a simple context + ring to execute the breadcrumb update.
3742 * We cannot rely on the context being intact across the GPU hang,
3743 * so clear it and rebuild just what we need for the breadcrumb.
3744 * All pending requests for this context will be zapped, and any
3745 * future request will be after userspace has had the opportunity
3746 * to recreate its own state.
3747 */
3748 GEM_BUG_ON(!intel_context_is_pinned(ce));
3749 restore_default_state(ce, engine);
3750
3751 out_replay:
3752 ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
3753 head, ce->ring->tail);
3754 __execlists_reset_reg_state(ce, engine);
3755 __execlists_update_reg_state(ce, engine, head);
3756 ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
3757
3758 unwind:
3759 /* Push back any incomplete requests for replay after the reset. */
3760 cancel_port_requests(execlists);
3761 __unwind_incomplete_requests(engine);
3762 }
3763
3764 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
3765 {
3766 unsigned long flags;
3767
3768 ENGINE_TRACE(engine, "\n");
3769
3770 spin_lock_irqsave(&engine->active.lock, flags);
3771
3772 __execlists_reset(engine, stalled);
3773
3774 spin_unlock_irqrestore(&engine->active.lock, flags);
3775 }
3776
3777 static void nop_submission_tasklet(unsigned long data)
3778 {
3779 struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3780
3781 /* The driver is wedged; don't process any more events. */
3782 WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
3783 }
3784
3785 static void execlists_reset_cancel(struct intel_engine_cs *engine)
3786 {
3787 struct intel_engine_execlists * const execlists = &engine->execlists;
3788 struct i915_request *rq, *rn;
3789 struct rb_node *rb;
3790 unsigned long flags;
3791
3792 ENGINE_TRACE(engine, "\n");
3793
3794 /*
3795 * Before we call engine->cancel_requests(), we should have exclusive
3796 * access to the submission state. This is arranged for us by the
3797 * caller disabling the interrupt generation, the tasklet and other
3798 * threads that may then access the same state, giving us a free hand
3799 * to reset state. However, we still need to let lockdep be aware that
3800 * we know this state may be accessed in hardirq context, so we
3801 * disable the irq around this manipulation and we want to keep
3802 * the spinlock focused on its duties and not accidentally conflate
3803 * coverage to the submission's irq state. (Similarly, although we
3804 * shouldn't need to disable irq around the manipulation of the
3805 * submission's irq state, we also wish to remind ourselves that
3806 * it is irq state.)
3807 */
3808 spin_lock_irqsave(&engine->active.lock, flags);
3809
3810 __execlists_reset(engine, true);
3811
3812 /* Mark all executing requests as skipped. */
3813 list_for_each_entry(rq, &engine->active.requests, sched.link)
3814 mark_eio(rq);
3815
3816 /* Flush the queued requests to the timeline list (for retiring). */
3817 while ((rb = rb_first_cached(&execlists->queue))) {
3818 struct i915_priolist *p = to_priolist(rb);
3819 int i;
3820
3821 priolist_for_each_request_consume(rq, rn, p, i) {
3822 mark_eio(rq);
3823 __i915_request_submit(rq);
3824 }
3825
3826 rb_erase_cached(&p->node, &execlists->queue);
3827 i915_priolist_free(p);
3828 }
3829
3830 /* On-hold requests will be flushed to timeline upon their release */
3831 list_for_each_entry(rq, &engine->active.hold, sched.link)
3832 mark_eio(rq);
3833
3834 /* Cancel all attached virtual engines */
3835 while ((rb = rb_first_cached(&execlists->virtual))) {
3836 struct virtual_engine *ve =
3837 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
3838
3839 rb_erase_cached(rb, &execlists->virtual);
3840 RB_CLEAR_NODE(rb);
3841
3842 spin_lock(&ve->base.active.lock);
3843 rq = fetch_and_zero(&ve->request);
3844 if (rq) {
3845 mark_eio(rq);
3846
3847 rq->engine = engine;
3848 __i915_request_submit(rq);
3849 i915_request_put(rq);
3850
3851 ve->base.execlists.queue_priority_hint = INT_MIN;
3852 }
3853 spin_unlock(&ve->base.active.lock);
3854 }
3855
3856 /* Remaining _unready_ requests will be nop'ed when submitted */
3857
3858 execlists->queue_priority_hint = INT_MIN;
3859 execlists->queue = RB_ROOT_CACHED;
3860
3861 GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
3862 execlists->tasklet.func = nop_submission_tasklet;
3863
3864 spin_unlock_irqrestore(&engine->active.lock, flags);
3865 }
3866
3867 static void execlists_reset_finish(struct intel_engine_cs *engine)
3868 {
3869 struct intel_engine_execlists * const execlists = &engine->execlists;
3870
3871 /*
3872 * After a GPU reset, we may have requests to replay. Do so now while
3873 * we still have the forcewake to be sure that the GPU is not allowed
3874 * to sleep before we restart and reload a context.
3875 */
3876 GEM_BUG_ON(!reset_in_progress(execlists));
3877 if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
3878 execlists->tasklet.func(execlists->tasklet.data);
3879
3880 if (__tasklet_enable(&execlists->tasklet))
3881 /* And kick in case we missed a new request submission. */
3882 tasklet_hi_schedule(&execlists->tasklet);
3883 ENGINE_TRACE(engine, "depth->%d\n",
3884 atomic_read(&execlists->tasklet.count));
3885 }
3886
3887 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
3888 u64 offset, u32 len,
3889 const unsigned int flags)
3890 {
3891 u32 *cs;
3892
3893 cs = intel_ring_begin(rq, 4);
3894 if (IS_ERR(cs))
3895 return PTR_ERR(cs);
3896
3897 /*
3898 * WaDisableCtxRestoreArbitration:bdw,chv
3899 *
3900 * We don't need to perform MI_ARB_ENABLE as often as we do (in
3901 * particular all the gen that do not need the w/a at all!), if we
3902 * took care to make sure that on every switch into this context
3903 * (both ordinary and for preemption) that arbitrartion was enabled
3904 * we would be fine. However, for gen8 there is another w/a that
3905 * requires us to not preempt inside GPGPU execution, so we keep
3906 * arbitration disabled for gen8 batches. Arbitration will be
3907 * re-enabled before we close the request
3908 * (engine->emit_fini_breadcrumb).
3909 */
3910 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3911
3912 /* FIXME(BDW+): Address space and security selectors. */
3913 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3914 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3915 *cs++ = lower_32_bits(offset);
3916 *cs++ = upper_32_bits(offset);
3917
3918 intel_ring_advance(rq, cs);
3919
3920 return 0;
3921 }
3922
3923 static int gen8_emit_bb_start(struct i915_request *rq,
3924 u64 offset, u32 len,
3925 const unsigned int flags)
3926 {
3927 u32 *cs;
3928
3929 cs = intel_ring_begin(rq, 6);
3930 if (IS_ERR(cs))
3931 return PTR_ERR(cs);
3932
3933 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3934
3935 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3936 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3937 *cs++ = lower_32_bits(offset);
3938 *cs++ = upper_32_bits(offset);
3939
3940 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3941 *cs++ = MI_NOOP;
3942
3943 intel_ring_advance(rq, cs);
3944
3945 return 0;
3946 }
3947
3948 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3949 {
3950 ENGINE_WRITE(engine, RING_IMR,
3951 ~(engine->irq_enable_mask | engine->irq_keep_mask));
3952 ENGINE_POSTING_READ(engine, RING_IMR);
3953 }
3954
3955 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3956 {
3957 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
3958 }
3959
3960 static int gen8_emit_flush(struct i915_request *request, u32 mode)
3961 {
3962 u32 cmd, *cs;
3963
3964 cs = intel_ring_begin(request, 4);
3965 if (IS_ERR(cs))
3966 return PTR_ERR(cs);
3967
3968 cmd = MI_FLUSH_DW + 1;
3969
3970 /* We always require a command barrier so that subsequent
3971 * commands, such as breadcrumb interrupts, are strictly ordered
3972 * wrt the contents of the write cache being flushed to memory
3973 * (and thus being coherent from the CPU).
3974 */
3975 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
3976
3977 if (mode & EMIT_INVALIDATE) {
3978 cmd |= MI_INVALIDATE_TLB;
3979 if (request->engine->class == VIDEO_DECODE_CLASS)
3980 cmd |= MI_INVALIDATE_BSD;
3981 }
3982
3983 *cs++ = cmd;
3984 *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
3985 *cs++ = 0; /* upper addr */
3986 *cs++ = 0; /* value */
3987 intel_ring_advance(request, cs);
3988
3989 return 0;
3990 }
3991
3992 static int gen8_emit_flush_render(struct i915_request *request,
3993 u32 mode)
3994 {
3995 bool vf_flush_wa = false, dc_flush_wa = false;
3996 u32 *cs, flags = 0;
3997 int len;
3998
3999 flags |= PIPE_CONTROL_CS_STALL;
4000
4001 if (mode & EMIT_FLUSH) {
4002 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4003 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4004 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4005 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4006 }
4007
4008 if (mode & EMIT_INVALIDATE) {
4009 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4010 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4011 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4012 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4013 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4014 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4015 flags |= PIPE_CONTROL_QW_WRITE;
4016 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4017
4018 /*
4019 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4020 * pipe control.
4021 */
4022 if (IS_GEN(request->i915, 9))
4023 vf_flush_wa = true;
4024
4025 /* WaForGAMHang:kbl */
4026 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
4027 dc_flush_wa = true;
4028 }
4029
4030 len = 6;
4031
4032 if (vf_flush_wa)
4033 len += 6;
4034
4035 if (dc_flush_wa)
4036 len += 12;
4037
4038 cs = intel_ring_begin(request, len);
4039 if (IS_ERR(cs))
4040 return PTR_ERR(cs);
4041
4042 if (vf_flush_wa)
4043 cs = gen8_emit_pipe_control(cs, 0, 0);
4044
4045 if (dc_flush_wa)
4046 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4047 0);
4048
4049 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4050
4051 if (dc_flush_wa)
4052 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4053
4054 intel_ring_advance(request, cs);
4055
4056 return 0;
4057 }
4058
4059 static int gen11_emit_flush_render(struct i915_request *request,
4060 u32 mode)
4061 {
4062 if (mode & EMIT_FLUSH) {
4063 u32 *cs;
4064 u32 flags = 0;
4065
4066 flags |= PIPE_CONTROL_CS_STALL;
4067
4068 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4069 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4070 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4071 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4072 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4073 flags |= PIPE_CONTROL_QW_WRITE;
4074 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4075
4076 cs = intel_ring_begin(request, 6);
4077 if (IS_ERR(cs))
4078 return PTR_ERR(cs);
4079
4080 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4081 intel_ring_advance(request, cs);
4082 }
4083
4084 if (mode & EMIT_INVALIDATE) {
4085 u32 *cs;
4086 u32 flags = 0;
4087
4088 flags |= PIPE_CONTROL_CS_STALL;
4089
4090 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4091 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4092 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4093 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4094 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4095 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4096 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4097 flags |= PIPE_CONTROL_QW_WRITE;
4098 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4099
4100 cs = intel_ring_begin(request, 6);
4101 if (IS_ERR(cs))
4102 return PTR_ERR(cs);
4103
4104 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4105 intel_ring_advance(request, cs);
4106 }
4107
4108 return 0;
4109 }
4110
4111 static u32 preparser_disable(bool state)
4112 {
4113 return MI_ARB_CHECK | 1 << 8 | state;
4114 }
4115
4116 static int gen12_emit_flush_render(struct i915_request *request,
4117 u32 mode)
4118 {
4119 if (mode & EMIT_FLUSH) {
4120 u32 flags = 0;
4121 u32 *cs;
4122
4123 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4124 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4125 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4126 /* Wa_1409600907:tgl */
4127 flags |= PIPE_CONTROL_DEPTH_STALL;
4128 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4129 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4130 flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
4131
4132 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4133 flags |= PIPE_CONTROL_QW_WRITE;
4134
4135 flags |= PIPE_CONTROL_CS_STALL;
4136
4137 cs = intel_ring_begin(request, 6);
4138 if (IS_ERR(cs))
4139 return PTR_ERR(cs);
4140
4141 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4142 intel_ring_advance(request, cs);
4143 }
4144
4145 if (mode & EMIT_INVALIDATE) {
4146 u32 flags = 0;
4147 u32 *cs;
4148
4149 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4150 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4151 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4152 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4153 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4154 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4155 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4156 flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
4157
4158 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4159 flags |= PIPE_CONTROL_QW_WRITE;
4160
4161 flags |= PIPE_CONTROL_CS_STALL;
4162
4163 cs = intel_ring_begin(request, 8);
4164 if (IS_ERR(cs))
4165 return PTR_ERR(cs);
4166
4167 /*
4168 * Prevent the pre-parser from skipping past the TLB
4169 * invalidate and loading a stale page for the batch
4170 * buffer / request payload.
4171 */
4172 *cs++ = preparser_disable(true);
4173
4174 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4175
4176 *cs++ = preparser_disable(false);
4177 intel_ring_advance(request, cs);
4178 }
4179
4180 return 0;
4181 }
4182
4183 /*
4184 * Reserve space for 2 NOOPs at the end of each request to be
4185 * used as a workaround for not being allowed to do lite
4186 * restore with HEAD==TAIL (WaIdleLiteRestore).
4187 */
4188 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4189 {
4190 /* Ensure there's always at least one preemption point per-request. */
4191 *cs++ = MI_ARB_CHECK;
4192 *cs++ = MI_NOOP;
4193 request->wa_tail = intel_ring_offset(request, cs);
4194
4195 return cs;
4196 }
4197
4198 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4199 {
4200 *cs++ = MI_SEMAPHORE_WAIT |
4201 MI_SEMAPHORE_GLOBAL_GTT |
4202 MI_SEMAPHORE_POLL |
4203 MI_SEMAPHORE_SAD_EQ_SDD;
4204 *cs++ = 0;
4205 *cs++ = intel_hws_preempt_address(request->engine);
4206 *cs++ = 0;
4207
4208 return cs;
4209 }
4210
4211 static __always_inline u32*
4212 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
4213 u32 *cs)
4214 {
4215 *cs++ = MI_USER_INTERRUPT;
4216
4217 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4218 if (intel_engine_has_semaphores(request->engine))
4219 cs = emit_preempt_busywait(request, cs);
4220
4221 request->tail = intel_ring_offset(request, cs);
4222 assert_ring_tail_valid(request->ring, request->tail);
4223
4224 return gen8_emit_wa_tail(request, cs);
4225 }
4226
4227 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4228 {
4229 cs = gen8_emit_ggtt_write(cs,
4230 request->fence.seqno,
4231 i915_request_active_timeline(request)->hwsp_offset,
4232 0);
4233
4234 return gen8_emit_fini_breadcrumb_footer(request, cs);
4235 }
4236
4237 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4238 {
4239 cs = gen8_emit_pipe_control(cs,
4240 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4241 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4242 PIPE_CONTROL_DC_FLUSH_ENABLE,
4243 0);
4244
4245 /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4246 cs = gen8_emit_ggtt_write_rcs(cs,
4247 request->fence.seqno,
4248 i915_request_active_timeline(request)->hwsp_offset,
4249 PIPE_CONTROL_FLUSH_ENABLE |
4250 PIPE_CONTROL_CS_STALL);
4251
4252 return gen8_emit_fini_breadcrumb_footer(request, cs);
4253 }
4254
4255 static u32 *
4256 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4257 {
4258 cs = gen8_emit_ggtt_write_rcs(cs,
4259 request->fence.seqno,
4260 i915_request_active_timeline(request)->hwsp_offset,
4261 PIPE_CONTROL_CS_STALL |
4262 PIPE_CONTROL_TILE_CACHE_FLUSH |
4263 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4264 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4265 PIPE_CONTROL_DC_FLUSH_ENABLE |
4266 PIPE_CONTROL_FLUSH_ENABLE);
4267
4268 return gen8_emit_fini_breadcrumb_footer(request, cs);
4269 }
4270
4271 /*
4272 * Note that the CS instruction pre-parser will not stall on the breadcrumb
4273 * flush and will continue pre-fetching the instructions after it before the
4274 * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4275 * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4276 * of the next request before the memory has been flushed, we're guaranteed that
4277 * we won't access the batch itself too early.
4278 * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4279 * so, if the current request is modifying an instruction in the next request on
4280 * the same intel_context, we might pre-fetch and then execute the pre-update
4281 * instruction. To avoid this, the users of self-modifying code should either
4282 * disable the parser around the code emitting the memory writes, via a new flag
4283 * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4284 * the in-kernel use-cases we've opted to use a separate context, see
4285 * reloc_gpu() as an example.
4286 * All the above applies only to the instructions themselves. Non-inline data
4287 * used by the instructions is not pre-fetched.
4288 */
4289
4290 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4291 {
4292 *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4293 MI_SEMAPHORE_GLOBAL_GTT |
4294 MI_SEMAPHORE_POLL |
4295 MI_SEMAPHORE_SAD_EQ_SDD;
4296 *cs++ = 0;
4297 *cs++ = intel_hws_preempt_address(request->engine);
4298 *cs++ = 0;
4299 *cs++ = 0;
4300 *cs++ = MI_NOOP;
4301
4302 return cs;
4303 }
4304
4305 static __always_inline u32*
4306 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
4307 {
4308 *cs++ = MI_USER_INTERRUPT;
4309
4310 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4311 if (intel_engine_has_semaphores(request->engine))
4312 cs = gen12_emit_preempt_busywait(request, cs);
4313
4314 request->tail = intel_ring_offset(request, cs);
4315 assert_ring_tail_valid(request->ring, request->tail);
4316
4317 return gen8_emit_wa_tail(request, cs);
4318 }
4319
4320 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4321 {
4322 cs = gen8_emit_ggtt_write(cs,
4323 request->fence.seqno,
4324 i915_request_active_timeline(request)->hwsp_offset,
4325 0);
4326
4327 return gen12_emit_fini_breadcrumb_footer(request, cs);
4328 }
4329
4330 static u32 *
4331 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4332 {
4333 cs = gen8_emit_ggtt_write_rcs(cs,
4334 request->fence.seqno,
4335 i915_request_active_timeline(request)->hwsp_offset,
4336 PIPE_CONTROL_CS_STALL |
4337 PIPE_CONTROL_TILE_CACHE_FLUSH |
4338 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4339 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4340 /* Wa_1409600907:tgl */
4341 PIPE_CONTROL_DEPTH_STALL |
4342 PIPE_CONTROL_DC_FLUSH_ENABLE |
4343 PIPE_CONTROL_FLUSH_ENABLE |
4344 PIPE_CONTROL_HDC_PIPELINE_FLUSH);
4345
4346 return gen12_emit_fini_breadcrumb_footer(request, cs);
4347 }
4348
4349 static void execlists_park(struct intel_engine_cs *engine)
4350 {
4351 cancel_timer(&engine->execlists.timer);
4352 cancel_timer(&engine->execlists.preempt);
4353 }
4354
4355 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4356 {
4357 engine->submit_request = execlists_submit_request;
4358 engine->schedule = i915_schedule;
4359 engine->execlists.tasklet.func = execlists_submission_tasklet;
4360
4361 engine->reset.prepare = execlists_reset_prepare;
4362 engine->reset.rewind = execlists_reset_rewind;
4363 engine->reset.cancel = execlists_reset_cancel;
4364 engine->reset.finish = execlists_reset_finish;
4365
4366 engine->park = execlists_park;
4367 engine->unpark = NULL;
4368
4369 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4370 if (!intel_vgpu_active(engine->i915)) {
4371 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4372 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
4373 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4374 }
4375
4376 if (INTEL_GEN(engine->i915) >= 12)
4377 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
4378
4379 if (intel_engine_has_preemption(engine))
4380 engine->emit_bb_start = gen8_emit_bb_start;
4381 else
4382 engine->emit_bb_start = gen8_emit_bb_start_noarb;
4383 }
4384
4385 static void execlists_shutdown(struct intel_engine_cs *engine)
4386 {
4387 /* Synchronise with residual timers and any softirq they raise */
4388 del_timer_sync(&engine->execlists.timer);
4389 del_timer_sync(&engine->execlists.preempt);
4390 tasklet_kill(&engine->execlists.tasklet);
4391 }
4392
4393 static void execlists_release(struct intel_engine_cs *engine)
4394 {
4395 execlists_shutdown(engine);
4396
4397 intel_engine_cleanup_common(engine);
4398 lrc_destroy_wa_ctx(engine);
4399 }
4400
4401 static void
4402 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
4403 {
4404 /* Default vfuncs which can be overriden by each engine. */
4405
4406 engine->resume = execlists_resume;
4407
4408 engine->cops = &execlists_context_ops;
4409 engine->request_alloc = execlists_request_alloc;
4410
4411 engine->emit_flush = gen8_emit_flush;
4412 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4413 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
4414 if (INTEL_GEN(engine->i915) >= 12)
4415 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
4416
4417 engine->set_default_submission = intel_execlists_set_default_submission;
4418
4419 if (INTEL_GEN(engine->i915) < 11) {
4420 engine->irq_enable = gen8_logical_ring_enable_irq;
4421 engine->irq_disable = gen8_logical_ring_disable_irq;
4422 } else {
4423 /*
4424 * TODO: On Gen11 interrupt masks need to be clear
4425 * to allow C6 entry. Keep interrupts enabled at
4426 * and take the hit of generating extra interrupts
4427 * until a more refined solution exists.
4428 */
4429 }
4430 }
4431
4432 static inline void
4433 logical_ring_default_irqs(struct intel_engine_cs *engine)
4434 {
4435 unsigned int shift = 0;
4436
4437 if (INTEL_GEN(engine->i915) < 11) {
4438 const u8 irq_shifts[] = {
4439 [RCS0] = GEN8_RCS_IRQ_SHIFT,
4440 [BCS0] = GEN8_BCS_IRQ_SHIFT,
4441 [VCS0] = GEN8_VCS0_IRQ_SHIFT,
4442 [VCS1] = GEN8_VCS1_IRQ_SHIFT,
4443 [VECS0] = GEN8_VECS_IRQ_SHIFT,
4444 };
4445
4446 shift = irq_shifts[engine->id];
4447 }
4448
4449 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
4450 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
4451 engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
4452 }
4453
4454 static void rcs_submission_override(struct intel_engine_cs *engine)
4455 {
4456 switch (INTEL_GEN(engine->i915)) {
4457 case 12:
4458 engine->emit_flush = gen12_emit_flush_render;
4459 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4460 break;
4461 case 11:
4462 engine->emit_flush = gen11_emit_flush_render;
4463 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4464 break;
4465 default:
4466 engine->emit_flush = gen8_emit_flush_render;
4467 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4468 break;
4469 }
4470 }
4471
4472 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
4473 {
4474 struct intel_engine_execlists * const execlists = &engine->execlists;
4475 struct drm_i915_private *i915 = engine->i915;
4476 struct intel_uncore *uncore = engine->uncore;
4477 u32 base = engine->mmio_base;
4478
4479 tasklet_init(&engine->execlists.tasklet,
4480 execlists_submission_tasklet, (unsigned long)engine);
4481 timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
4482 timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
4483
4484 logical_ring_default_vfuncs(engine);
4485 logical_ring_default_irqs(engine);
4486
4487 if (engine->class == RENDER_CLASS)
4488 rcs_submission_override(engine);
4489
4490 if (intel_init_workaround_bb(engine))
4491 /*
4492 * We continue even if we fail to initialize WA batch
4493 * because we only expect rare glitches but nothing
4494 * critical to prevent us from using GPU
4495 */
4496 DRM_ERROR("WA batch buffer initialization failed\n");
4497
4498 if (HAS_LOGICAL_RING_ELSQ(i915)) {
4499 execlists->submit_reg = uncore->regs +
4500 i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
4501 execlists->ctrl_reg = uncore->regs +
4502 i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
4503 } else {
4504 execlists->submit_reg = uncore->regs +
4505 i915_mmio_reg_offset(RING_ELSP(base));
4506 }
4507
4508 execlists->csb_status =
4509 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
4510
4511 execlists->csb_write =
4512 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
4513
4514 if (INTEL_GEN(i915) < 11)
4515 execlists->csb_size = GEN8_CSB_ENTRIES;
4516 else
4517 execlists->csb_size = GEN11_CSB_ENTRIES;
4518
4519 reset_csb_pointers(engine);
4520
4521 /* Finally, take ownership and responsibility for cleanup! */
4522 engine->release = execlists_release;
4523
4524 return 0;
4525 }
4526
4527 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
4528 {
4529 u32 indirect_ctx_offset;
4530
4531 switch (INTEL_GEN(engine->i915)) {
4532 default:
4533 MISSING_CASE(INTEL_GEN(engine->i915));
4534 /* fall through */
4535 case 12:
4536 indirect_ctx_offset =
4537 GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4538 break;
4539 case 11:
4540 indirect_ctx_offset =
4541 GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4542 break;
4543 case 10:
4544 indirect_ctx_offset =
4545 GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4546 break;
4547 case 9:
4548 indirect_ctx_offset =
4549 GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4550 break;
4551 case 8:
4552 indirect_ctx_offset =
4553 GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4554 break;
4555 }
4556
4557 return indirect_ctx_offset;
4558 }
4559
4560
4561 static void init_common_reg_state(u32 * const regs,
4562 const struct intel_engine_cs *engine,
4563 const struct intel_ring *ring,
4564 bool inhibit)
4565 {
4566 u32 ctl;
4567
4568 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
4569 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
4570 if (inhibit)
4571 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
4572 if (INTEL_GEN(engine->i915) < 11)
4573 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
4574 CTX_CTRL_RS_CTX_ENABLE);
4575 regs[CTX_CONTEXT_CONTROL] = ctl;
4576
4577 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
4578 }
4579
4580 static void init_wa_bb_reg_state(u32 * const regs,
4581 const struct intel_engine_cs *engine,
4582 u32 pos_bb_per_ctx)
4583 {
4584 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
4585
4586 if (wa_ctx->per_ctx.size) {
4587 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4588
4589 regs[pos_bb_per_ctx] =
4590 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
4591 }
4592
4593 if (wa_ctx->indirect_ctx.size) {
4594 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4595
4596 regs[pos_bb_per_ctx + 2] =
4597 (ggtt_offset + wa_ctx->indirect_ctx.offset) |
4598 (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
4599
4600 regs[pos_bb_per_ctx + 4] =
4601 intel_lr_indirect_ctx_offset(engine) << 6;
4602 }
4603 }
4604
4605 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
4606 {
4607 if (i915_vm_is_4lvl(&ppgtt->vm)) {
4608 /* 64b PPGTT (48bit canonical)
4609 * PDP0_DESCRIPTOR contains the base address to PML4 and
4610 * other PDP Descriptors are ignored.
4611 */
4612 ASSIGN_CTX_PML4(ppgtt, regs);
4613 } else {
4614 ASSIGN_CTX_PDP(ppgtt, regs, 3);
4615 ASSIGN_CTX_PDP(ppgtt, regs, 2);
4616 ASSIGN_CTX_PDP(ppgtt, regs, 1);
4617 ASSIGN_CTX_PDP(ppgtt, regs, 0);
4618 }
4619 }
4620
4621 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
4622 {
4623 if (i915_is_ggtt(vm))
4624 return i915_vm_to_ggtt(vm)->alias;
4625 else
4626 return i915_vm_to_ppgtt(vm);
4627 }
4628
4629 static void execlists_init_reg_state(u32 *regs,
4630 const struct intel_context *ce,
4631 const struct intel_engine_cs *engine,
4632 const struct intel_ring *ring,
4633 bool inhibit)
4634 {
4635 /*
4636 * A context is actually a big batch buffer with several
4637 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
4638 * values we are setting here are only for the first context restore:
4639 * on a subsequent save, the GPU will recreate this batchbuffer with new
4640 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
4641 * we are not initializing here).
4642 *
4643 * Must keep consistent with virtual_update_register_offsets().
4644 */
4645 set_offsets(regs, reg_offsets(engine), engine, inhibit);
4646
4647 init_common_reg_state(regs, engine, ring, inhibit);
4648 init_ppgtt_reg_state(regs, vm_alias(ce->vm));
4649
4650 init_wa_bb_reg_state(regs, engine,
4651 INTEL_GEN(engine->i915) >= 12 ?
4652 GEN12_CTX_BB_PER_CTX_PTR :
4653 CTX_BB_PER_CTX_PTR);
4654
4655 __reset_stop_ring(regs, engine);
4656 }
4657
4658 static int
4659 populate_lr_context(struct intel_context *ce,
4660 struct drm_i915_gem_object *ctx_obj,
4661 struct intel_engine_cs *engine,
4662 struct intel_ring *ring)
4663 {
4664 bool inhibit = true;
4665 void *vaddr;
4666 int ret;
4667
4668 vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
4669 if (IS_ERR(vaddr)) {
4670 ret = PTR_ERR(vaddr);
4671 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
4672 return ret;
4673 }
4674
4675 set_redzone(vaddr, engine);
4676
4677 if (engine->default_state) {
4678 void *defaults;
4679
4680 defaults = i915_gem_object_pin_map(engine->default_state,
4681 I915_MAP_WB);
4682 if (IS_ERR(defaults)) {
4683 ret = PTR_ERR(defaults);
4684 goto err_unpin_ctx;
4685 }
4686
4687 memcpy(vaddr, defaults, engine->context_size);
4688 i915_gem_object_unpin_map(engine->default_state);
4689 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
4690 inhibit = false;
4691 }
4692
4693 /* Clear the ppHWSP (inc. per-context counters) */
4694 memset(vaddr, 0, PAGE_SIZE);
4695
4696 /*
4697 * The second page of the context object contains some registers which
4698 * must be set up prior to the first execution.
4699 */
4700 execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
4701 ce, engine, ring, inhibit);
4702
4703 ret = 0;
4704 err_unpin_ctx:
4705 __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
4706 i915_gem_object_unpin_map(ctx_obj);
4707 return ret;
4708 }
4709
4710 static int __execlists_context_alloc(struct intel_context *ce,
4711 struct intel_engine_cs *engine)
4712 {
4713 struct drm_i915_gem_object *ctx_obj;
4714 struct intel_ring *ring;
4715 struct i915_vma *vma;
4716 u32 context_size;
4717 int ret;
4718
4719 GEM_BUG_ON(ce->state);
4720 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
4721
4722 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4723 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
4724
4725 ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
4726 if (IS_ERR(ctx_obj))
4727 return PTR_ERR(ctx_obj);
4728
4729 vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
4730 if (IS_ERR(vma)) {
4731 ret = PTR_ERR(vma);
4732 goto error_deref_obj;
4733 }
4734
4735 if (!ce->timeline) {
4736 struct intel_timeline *tl;
4737 struct i915_vma *hwsp;
4738
4739 /*
4740 * Use the static global HWSP for the kernel context, and
4741 * a dynamically allocated cacheline for everyone else.
4742 */
4743 hwsp = NULL;
4744 if (unlikely(intel_context_is_barrier(ce)))
4745 hwsp = engine->status_page.vma;
4746
4747 tl = intel_timeline_create(engine->gt, hwsp);
4748 if (IS_ERR(tl)) {
4749 ret = PTR_ERR(tl);
4750 goto error_deref_obj;
4751 }
4752
4753 ce->timeline = tl;
4754 }
4755
4756 ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
4757 if (IS_ERR(ring)) {
4758 ret = PTR_ERR(ring);
4759 goto error_deref_obj;
4760 }
4761
4762 ret = populate_lr_context(ce, ctx_obj, engine, ring);
4763 if (ret) {
4764 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
4765 goto error_ring_free;
4766 }
4767
4768 ce->ring = ring;
4769 ce->state = vma;
4770
4771 return 0;
4772
4773 error_ring_free:
4774 intel_ring_put(ring);
4775 error_deref_obj:
4776 i915_gem_object_put(ctx_obj);
4777 return ret;
4778 }
4779
4780 static struct list_head *virtual_queue(struct virtual_engine *ve)
4781 {
4782 return &ve->base.execlists.default_priolist.requests[0];
4783 }
4784
4785 static void virtual_context_destroy(struct kref *kref)
4786 {
4787 struct virtual_engine *ve =
4788 container_of(kref, typeof(*ve), context.ref);
4789 unsigned int n;
4790
4791 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4792 GEM_BUG_ON(ve->request);
4793 GEM_BUG_ON(ve->context.inflight);
4794
4795 for (n = 0; n < ve->num_siblings; n++) {
4796 struct intel_engine_cs *sibling = ve->siblings[n];
4797 struct rb_node *node = &ve->nodes[sibling->id].rb;
4798 unsigned long flags;
4799
4800 if (RB_EMPTY_NODE(node))
4801 continue;
4802
4803 spin_lock_irqsave(&sibling->active.lock, flags);
4804
4805 /* Detachment is lazily performed in the execlists tasklet */
4806 if (!RB_EMPTY_NODE(node))
4807 rb_erase_cached(node, &sibling->execlists.virtual);
4808
4809 spin_unlock_irqrestore(&sibling->active.lock, flags);
4810 }
4811 GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
4812
4813 if (ve->context.state)
4814 __execlists_context_fini(&ve->context);
4815 intel_context_fini(&ve->context);
4816
4817 kfree(ve->bonds);
4818 kfree(ve);
4819 }
4820
4821 static void virtual_engine_initial_hint(struct virtual_engine *ve)
4822 {
4823 int swp;
4824
4825 /*
4826 * Pick a random sibling on starting to help spread the load around.
4827 *
4828 * New contexts are typically created with exactly the same order
4829 * of siblings, and often started in batches. Due to the way we iterate
4830 * the array of sibling when submitting requests, sibling[0] is
4831 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
4832 * randomised across the system, we also help spread the load by the
4833 * first engine we inspect being different each time.
4834 *
4835 * NB This does not force us to execute on this engine, it will just
4836 * typically be the first we inspect for submission.
4837 */
4838 swp = prandom_u32_max(ve->num_siblings);
4839 if (!swp)
4840 return;
4841
4842 swap(ve->siblings[swp], ve->siblings[0]);
4843 if (!intel_engine_has_relative_mmio(ve->siblings[0]))
4844 virtual_update_register_offsets(ve->context.lrc_reg_state,
4845 ve->siblings[0]);
4846 }
4847
4848 static int virtual_context_alloc(struct intel_context *ce)
4849 {
4850 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4851
4852 return __execlists_context_alloc(ce, ve->siblings[0]);
4853 }
4854
4855 static int virtual_context_pin(struct intel_context *ce)
4856 {
4857 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4858 int err;
4859
4860 /* Note: we must use a real engine class for setting up reg state */
4861 err = __execlists_context_pin(ce, ve->siblings[0]);
4862 if (err)
4863 return err;
4864
4865 virtual_engine_initial_hint(ve);
4866 return 0;
4867 }
4868
4869 static void virtual_context_enter(struct intel_context *ce)
4870 {
4871 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4872 unsigned int n;
4873
4874 for (n = 0; n < ve->num_siblings; n++)
4875 intel_engine_pm_get(ve->siblings[n]);
4876
4877 intel_timeline_enter(ce->timeline);
4878 }
4879
4880 static void virtual_context_exit(struct intel_context *ce)
4881 {
4882 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4883 unsigned int n;
4884
4885 intel_timeline_exit(ce->timeline);
4886
4887 for (n = 0; n < ve->num_siblings; n++)
4888 intel_engine_pm_put(ve->siblings[n]);
4889 }
4890
4891 static const struct intel_context_ops virtual_context_ops = {
4892 .alloc = virtual_context_alloc,
4893
4894 .pin = virtual_context_pin,
4895 .unpin = execlists_context_unpin,
4896
4897 .enter = virtual_context_enter,
4898 .exit = virtual_context_exit,
4899
4900 .destroy = virtual_context_destroy,
4901 };
4902
4903 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
4904 {
4905 struct i915_request *rq;
4906 intel_engine_mask_t mask;
4907
4908 rq = READ_ONCE(ve->request);
4909 if (!rq)
4910 return 0;
4911
4912 /* The rq is ready for submission; rq->execution_mask is now stable. */
4913 mask = rq->execution_mask;
4914 if (unlikely(!mask)) {
4915 /* Invalid selection, submit to a random engine in error */
4916 i915_request_set_error_once(rq, -ENODEV);
4917 mask = ve->siblings[0]->mask;
4918 }
4919
4920 ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
4921 rq->fence.context, rq->fence.seqno,
4922 mask, ve->base.execlists.queue_priority_hint);
4923
4924 return mask;
4925 }
4926
4927 static void virtual_submission_tasklet(unsigned long data)
4928 {
4929 struct virtual_engine * const ve = (struct virtual_engine *)data;
4930 const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
4931 intel_engine_mask_t mask;
4932 unsigned int n;
4933
4934 rcu_read_lock();
4935 mask = virtual_submission_mask(ve);
4936 rcu_read_unlock();
4937 if (unlikely(!mask))
4938 return;
4939
4940 local_irq_disable();
4941 for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
4942 struct intel_engine_cs *sibling = ve->siblings[n];
4943 struct ve_node * const node = &ve->nodes[sibling->id];
4944 struct rb_node **parent, *rb;
4945 bool first;
4946
4947 if (unlikely(!(mask & sibling->mask))) {
4948 if (!RB_EMPTY_NODE(&node->rb)) {
4949 spin_lock(&sibling->active.lock);
4950 rb_erase_cached(&node->rb,
4951 &sibling->execlists.virtual);
4952 RB_CLEAR_NODE(&node->rb);
4953 spin_unlock(&sibling->active.lock);
4954 }
4955 continue;
4956 }
4957
4958 spin_lock(&sibling->active.lock);
4959
4960 if (!RB_EMPTY_NODE(&node->rb)) {
4961 /*
4962 * Cheat and avoid rebalancing the tree if we can
4963 * reuse this node in situ.
4964 */
4965 first = rb_first_cached(&sibling->execlists.virtual) ==
4966 &node->rb;
4967 if (prio == node->prio || (prio > node->prio && first))
4968 goto submit_engine;
4969
4970 rb_erase_cached(&node->rb, &sibling->execlists.virtual);
4971 }
4972
4973 rb = NULL;
4974 first = true;
4975 parent = &sibling->execlists.virtual.rb_root.rb_node;
4976 while (*parent) {
4977 struct ve_node *other;
4978
4979 rb = *parent;
4980 other = rb_entry(rb, typeof(*other), rb);
4981 if (prio > other->prio) {
4982 parent = &rb->rb_left;
4983 } else {
4984 parent = &rb->rb_right;
4985 first = false;
4986 }
4987 }
4988
4989 rb_link_node(&node->rb, rb, parent);
4990 rb_insert_color_cached(&node->rb,
4991 &sibling->execlists.virtual,
4992 first);
4993
4994 submit_engine:
4995 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
4996 node->prio = prio;
4997 if (first && prio > sibling->execlists.queue_priority_hint) {
4998 sibling->execlists.queue_priority_hint = prio;
4999 tasklet_hi_schedule(&sibling->execlists.tasklet);
5000 }
5001
5002 spin_unlock(&sibling->active.lock);
5003 }
5004 local_irq_enable();
5005 }
5006
5007 static void virtual_submit_request(struct i915_request *rq)
5008 {
5009 struct virtual_engine *ve = to_virtual_engine(rq->engine);
5010 struct i915_request *old;
5011 unsigned long flags;
5012
5013 ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5014 rq->fence.context,
5015 rq->fence.seqno);
5016
5017 GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5018
5019 spin_lock_irqsave(&ve->base.active.lock, flags);
5020
5021 old = ve->request;
5022 if (old) { /* background completion event from preempt-to-busy */
5023 GEM_BUG_ON(!i915_request_completed(old));
5024 __i915_request_submit(old);
5025 i915_request_put(old);
5026 }
5027
5028 if (i915_request_completed(rq)) {
5029 __i915_request_submit(rq);
5030
5031 ve->base.execlists.queue_priority_hint = INT_MIN;
5032 ve->request = NULL;
5033 } else {
5034 ve->base.execlists.queue_priority_hint = rq_prio(rq);
5035 ve->request = i915_request_get(rq);
5036
5037 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5038 list_move_tail(&rq->sched.link, virtual_queue(ve));
5039
5040 tasklet_schedule(&ve->base.execlists.tasklet);
5041 }
5042
5043 spin_unlock_irqrestore(&ve->base.active.lock, flags);
5044 }
5045
5046 static struct ve_bond *
5047 virtual_find_bond(struct virtual_engine *ve,
5048 const struct intel_engine_cs *master)
5049 {
5050 int i;
5051
5052 for (i = 0; i < ve->num_bonds; i++) {
5053 if (ve->bonds[i].master == master)
5054 return &ve->bonds[i];
5055 }
5056
5057 return NULL;
5058 }
5059
5060 static void
5061 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5062 {
5063 struct virtual_engine *ve = to_virtual_engine(rq->engine);
5064 intel_engine_mask_t allowed, exec;
5065 struct ve_bond *bond;
5066
5067 allowed = ~to_request(signal)->engine->mask;
5068
5069 bond = virtual_find_bond(ve, to_request(signal)->engine);
5070 if (bond)
5071 allowed &= bond->sibling_mask;
5072
5073 /* Restrict the bonded request to run on only the available engines */
5074 exec = READ_ONCE(rq->execution_mask);
5075 while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5076 ;
5077
5078 /* Prevent the master from being re-run on the bonded engines */
5079 to_request(signal)->execution_mask &= ~allowed;
5080 }
5081
5082 struct intel_context *
5083 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5084 unsigned int count)
5085 {
5086 struct virtual_engine *ve;
5087 unsigned int n;
5088 int err;
5089
5090 if (count == 0)
5091 return ERR_PTR(-EINVAL);
5092
5093 if (count == 1)
5094 return intel_context_create(siblings[0]);
5095
5096 ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5097 if (!ve)
5098 return ERR_PTR(-ENOMEM);
5099
5100 ve->base.i915 = siblings[0]->i915;
5101 ve->base.gt = siblings[0]->gt;
5102 ve->base.uncore = siblings[0]->uncore;
5103 ve->base.id = -1;
5104
5105 ve->base.class = OTHER_CLASS;
5106 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5107 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5108 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5109
5110 /*
5111 * The decision on whether to submit a request using semaphores
5112 * depends on the saturated state of the engine. We only compute
5113 * this during HW submission of the request, and we need for this
5114 * state to be globally applied to all requests being submitted
5115 * to this engine. Virtual engines encompass more than one physical
5116 * engine and so we cannot accurately tell in advance if one of those
5117 * engines is already saturated and so cannot afford to use a semaphore
5118 * and be pessimized in priority for doing so -- if we are the only
5119 * context using semaphores after all other clients have stopped, we
5120 * will be starved on the saturated system. Such a global switch for
5121 * semaphores is less than ideal, but alas is the current compromise.
5122 */
5123 ve->base.saturated = ALL_ENGINES;
5124
5125 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5126
5127 intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5128 intel_engine_init_breadcrumbs(&ve->base);
5129 intel_engine_init_execlists(&ve->base);
5130
5131 ve->base.cops = &virtual_context_ops;
5132 ve->base.request_alloc = execlists_request_alloc;
5133
5134 ve->base.schedule = i915_schedule;
5135 ve->base.submit_request = virtual_submit_request;
5136 ve->base.bond_execute = virtual_bond_execute;
5137
5138 INIT_LIST_HEAD(virtual_queue(ve));
5139 ve->base.execlists.queue_priority_hint = INT_MIN;
5140 tasklet_init(&ve->base.execlists.tasklet,
5141 virtual_submission_tasklet,
5142 (unsigned long)ve);
5143
5144 intel_context_init(&ve->context, &ve->base);
5145
5146 for (n = 0; n < count; n++) {
5147 struct intel_engine_cs *sibling = siblings[n];
5148
5149 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5150 if (sibling->mask & ve->base.mask) {
5151 DRM_DEBUG("duplicate %s entry in load balancer\n",
5152 sibling->name);
5153 err = -EINVAL;
5154 goto err_put;
5155 }
5156
5157 /*
5158 * The virtual engine implementation is tightly coupled to
5159 * the execlists backend -- we push out request directly
5160 * into a tree inside each physical engine. We could support
5161 * layering if we handle cloning of the requests and
5162 * submitting a copy into each backend.
5163 */
5164 if (sibling->execlists.tasklet.func !=
5165 execlists_submission_tasklet) {
5166 err = -ENODEV;
5167 goto err_put;
5168 }
5169
5170 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5171 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5172
5173 ve->siblings[ve->num_siblings++] = sibling;
5174 ve->base.mask |= sibling->mask;
5175
5176 /*
5177 * All physical engines must be compatible for their emission
5178 * functions (as we build the instructions during request
5179 * construction and do not alter them before submission
5180 * on the physical engine). We use the engine class as a guide
5181 * here, although that could be refined.
5182 */
5183 if (ve->base.class != OTHER_CLASS) {
5184 if (ve->base.class != sibling->class) {
5185 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5186 sibling->class, ve->base.class);
5187 err = -EINVAL;
5188 goto err_put;
5189 }
5190 continue;
5191 }
5192
5193 ve->base.class = sibling->class;
5194 ve->base.uabi_class = sibling->uabi_class;
5195 snprintf(ve->base.name, sizeof(ve->base.name),
5196 "v%dx%d", ve->base.class, count);
5197 ve->base.context_size = sibling->context_size;
5198
5199 ve->base.emit_bb_start = sibling->emit_bb_start;
5200 ve->base.emit_flush = sibling->emit_flush;
5201 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5202 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5203 ve->base.emit_fini_breadcrumb_dw =
5204 sibling->emit_fini_breadcrumb_dw;
5205
5206 ve->base.flags = sibling->flags;
5207 }
5208
5209 ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5210
5211 return &ve->context;
5212
5213 err_put:
5214 intel_context_put(&ve->context);
5215 return ERR_PTR(err);
5216 }
5217
5218 struct intel_context *
5219 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5220 {
5221 struct virtual_engine *se = to_virtual_engine(src);
5222 struct intel_context *dst;
5223
5224 dst = intel_execlists_create_virtual(se->siblings,
5225 se->num_siblings);
5226 if (IS_ERR(dst))
5227 return dst;
5228
5229 if (se->num_bonds) {
5230 struct virtual_engine *de = to_virtual_engine(dst->engine);
5231
5232 de->bonds = kmemdup(se->bonds,
5233 sizeof(*se->bonds) * se->num_bonds,
5234 GFP_KERNEL);
5235 if (!de->bonds) {
5236 intel_context_put(dst);
5237 return ERR_PTR(-ENOMEM);
5238 }
5239
5240 de->num_bonds = se->num_bonds;
5241 }
5242
5243 return dst;
5244 }
5245
5246 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5247 const struct intel_engine_cs *master,
5248 const struct intel_engine_cs *sibling)
5249 {
5250 struct virtual_engine *ve = to_virtual_engine(engine);
5251 struct ve_bond *bond;
5252 int n;
5253
5254 /* Sanity check the sibling is part of the virtual engine */
5255 for (n = 0; n < ve->num_siblings; n++)
5256 if (sibling == ve->siblings[n])
5257 break;
5258 if (n == ve->num_siblings)
5259 return -EINVAL;
5260
5261 bond = virtual_find_bond(ve, master);
5262 if (bond) {
5263 bond->sibling_mask |= sibling->mask;
5264 return 0;
5265 }
5266
5267 bond = krealloc(ve->bonds,
5268 sizeof(*bond) * (ve->num_bonds + 1),
5269 GFP_KERNEL);
5270 if (!bond)
5271 return -ENOMEM;
5272
5273 bond[ve->num_bonds].master = master;
5274 bond[ve->num_bonds].sibling_mask = sibling->mask;
5275
5276 ve->bonds = bond;
5277 ve->num_bonds++;
5278
5279 return 0;
5280 }
5281
5282 struct intel_engine_cs *
5283 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5284 unsigned int sibling)
5285 {
5286 struct virtual_engine *ve = to_virtual_engine(engine);
5287
5288 if (sibling >= ve->num_siblings)
5289 return NULL;
5290
5291 return ve->siblings[sibling];
5292 }
5293
5294 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5295 struct drm_printer *m,
5296 void (*show_request)(struct drm_printer *m,
5297 struct i915_request *rq,
5298 const char *prefix),
5299 unsigned int max)
5300 {
5301 const struct intel_engine_execlists *execlists = &engine->execlists;
5302 struct i915_request *rq, *last;
5303 unsigned long flags;
5304 unsigned int count;
5305 struct rb_node *rb;
5306
5307 spin_lock_irqsave(&engine->active.lock, flags);
5308
5309 last = NULL;
5310 count = 0;
5311 list_for_each_entry(rq, &engine->active.requests, sched.link) {
5312 if (count++ < max - 1)
5313 show_request(m, rq, "\t\tE ");
5314 else
5315 last = rq;
5316 }
5317 if (last) {
5318 if (count > max) {
5319 drm_printf(m,
5320 "\t\t...skipping %d executing requests...\n",
5321 count - max);
5322 }
5323 show_request(m, last, "\t\tE ");
5324 }
5325
5326 if (execlists->switch_priority_hint != INT_MIN)
5327 drm_printf(m, "\t\tSwitch priority hint: %d\n",
5328 READ_ONCE(execlists->switch_priority_hint));
5329 if (execlists->queue_priority_hint != INT_MIN)
5330 drm_printf(m, "\t\tQueue priority hint: %d\n",
5331 READ_ONCE(execlists->queue_priority_hint));
5332
5333 last = NULL;
5334 count = 0;
5335 for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
5336 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5337 int i;
5338
5339 priolist_for_each_request(rq, p, i) {
5340 if (count++ < max - 1)
5341 show_request(m, rq, "\t\tQ ");
5342 else
5343 last = rq;
5344 }
5345 }
5346 if (last) {
5347 if (count > max) {
5348 drm_printf(m,
5349 "\t\t...skipping %d queued requests...\n",
5350 count - max);
5351 }
5352 show_request(m, last, "\t\tQ ");
5353 }
5354
5355 last = NULL;
5356 count = 0;
5357 for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
5358 struct virtual_engine *ve =
5359 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5360 struct i915_request *rq = READ_ONCE(ve->request);
5361
5362 if (rq) {
5363 if (count++ < max - 1)
5364 show_request(m, rq, "\t\tV ");
5365 else
5366 last = rq;
5367 }
5368 }
5369 if (last) {
5370 if (count > max) {
5371 drm_printf(m,
5372 "\t\t...skipping %d virtual requests...\n",
5373 count - max);
5374 }
5375 show_request(m, last, "\t\tV ");
5376 }
5377
5378 spin_unlock_irqrestore(&engine->active.lock, flags);
5379 }
5380
5381 void intel_lr_context_reset(struct intel_engine_cs *engine,
5382 struct intel_context *ce,
5383 u32 head,
5384 bool scrub)
5385 {
5386 GEM_BUG_ON(!intel_context_is_pinned(ce));
5387
5388 /*
5389 * We want a simple context + ring to execute the breadcrumb update.
5390 * We cannot rely on the context being intact across the GPU hang,
5391 * so clear it and rebuild just what we need for the breadcrumb.
5392 * All pending requests for this context will be zapped, and any
5393 * future request will be after userspace has had the opportunity
5394 * to recreate its own state.
5395 */
5396 if (scrub)
5397 restore_default_state(ce, engine);
5398
5399 /* Rerun the request; its payload has been neutered (if guilty). */
5400 __execlists_update_reg_state(ce, engine, head);
5401 }
5402
5403 bool
5404 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
5405 {
5406 return engine->set_default_submission ==
5407 intel_execlists_set_default_submission;
5408 }
5409
5410 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5411 #include "selftest_lrc.c"
5412 #endif