]> git.ipfire.org Git - thirdparty/linux.git/blob - drivers/gpu/drm/i915/gt/selftest_lrc.c
Merge tag 'io_uring-5.7-2020-05-22' of git://git.kernel.dk/linux-block
[thirdparty/linux.git] / drivers / gpu / drm / i915 / gt / selftest_lrc.c
1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2018 Intel Corporation
5 */
6
7 #include <linux/prime_numbers.h>
8
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
25
26 static struct i915_vma *create_scratch(struct intel_gt *gt)
27 {
28 struct drm_i915_gem_object *obj;
29 struct i915_vma *vma;
30 int err;
31
32 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
33 if (IS_ERR(obj))
34 return ERR_CAST(obj);
35
36 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
37
38 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
39 if (IS_ERR(vma)) {
40 i915_gem_object_put(obj);
41 return vma;
42 }
43
44 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
45 if (err) {
46 i915_gem_object_put(obj);
47 return ERR_PTR(err);
48 }
49
50 return vma;
51 }
52
53 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
54 unsigned long *saved)
55 {
56 *saved = engine->props.heartbeat_interval_ms;
57 engine->props.heartbeat_interval_ms = 0;
58
59 intel_engine_pm_get(engine);
60 intel_engine_park_heartbeat(engine);
61 }
62
63 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
64 unsigned long saved)
65 {
66 intel_engine_pm_put(engine);
67
68 engine->props.heartbeat_interval_ms = saved;
69 }
70
71 static int wait_for_submit(struct intel_engine_cs *engine,
72 struct i915_request *rq,
73 unsigned long timeout)
74 {
75 timeout += jiffies;
76 do {
77 cond_resched();
78 intel_engine_flush_submission(engine);
79
80 if (READ_ONCE(engine->execlists.pending[0]))
81 continue;
82
83 if (i915_request_is_active(rq))
84 return 0;
85
86 if (i915_request_started(rq)) /* that was quick! */
87 return 0;
88 } while (time_before(jiffies, timeout));
89
90 return -ETIME;
91 }
92
93 static int wait_for_reset(struct intel_engine_cs *engine,
94 struct i915_request *rq,
95 unsigned long timeout)
96 {
97 timeout += jiffies;
98
99 do {
100 cond_resched();
101 intel_engine_flush_submission(engine);
102
103 if (READ_ONCE(engine->execlists.pending[0]))
104 continue;
105
106 if (i915_request_completed(rq))
107 break;
108
109 if (READ_ONCE(rq->fence.error))
110 break;
111 } while (time_before(jiffies, timeout));
112
113 flush_scheduled_work();
114
115 if (rq->fence.error != -EIO) {
116 pr_err("%s: hanging request %llx:%lld not reset\n",
117 engine->name,
118 rq->fence.context,
119 rq->fence.seqno);
120 return -EINVAL;
121 }
122
123 /* Give the request a jiffie to complete after flushing the worker */
124 if (i915_request_wait(rq, 0,
125 max(0l, (long)(timeout - jiffies)) + 1) < 0) {
126 pr_err("%s: hanging request %llx:%lld did not complete\n",
127 engine->name,
128 rq->fence.context,
129 rq->fence.seqno);
130 return -ETIME;
131 }
132
133 return 0;
134 }
135
136 static int live_sanitycheck(void *arg)
137 {
138 struct intel_gt *gt = arg;
139 struct intel_engine_cs *engine;
140 enum intel_engine_id id;
141 struct igt_spinner spin;
142 int err = 0;
143
144 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
145 return 0;
146
147 if (igt_spinner_init(&spin, gt))
148 return -ENOMEM;
149
150 for_each_engine(engine, gt, id) {
151 struct intel_context *ce;
152 struct i915_request *rq;
153
154 ce = intel_context_create(engine);
155 if (IS_ERR(ce)) {
156 err = PTR_ERR(ce);
157 break;
158 }
159
160 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
161 if (IS_ERR(rq)) {
162 err = PTR_ERR(rq);
163 goto out_ctx;
164 }
165
166 i915_request_add(rq);
167 if (!igt_wait_for_spinner(&spin, rq)) {
168 GEM_TRACE("spinner failed to start\n");
169 GEM_TRACE_DUMP();
170 intel_gt_set_wedged(gt);
171 err = -EIO;
172 goto out_ctx;
173 }
174
175 igt_spinner_end(&spin);
176 if (igt_flush_test(gt->i915)) {
177 err = -EIO;
178 goto out_ctx;
179 }
180
181 out_ctx:
182 intel_context_put(ce);
183 if (err)
184 break;
185 }
186
187 igt_spinner_fini(&spin);
188 return err;
189 }
190
191 static int live_unlite_restore(struct intel_gt *gt, int prio)
192 {
193 struct intel_engine_cs *engine;
194 enum intel_engine_id id;
195 struct igt_spinner spin;
196 int err = -ENOMEM;
197
198 /*
199 * Check that we can correctly context switch between 2 instances
200 * on the same engine from the same parent context.
201 */
202
203 if (igt_spinner_init(&spin, gt))
204 return err;
205
206 err = 0;
207 for_each_engine(engine, gt, id) {
208 struct intel_context *ce[2] = {};
209 struct i915_request *rq[2];
210 struct igt_live_test t;
211 unsigned long saved;
212 int n;
213
214 if (prio && !intel_engine_has_preemption(engine))
215 continue;
216
217 if (!intel_engine_can_store_dword(engine))
218 continue;
219
220 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
221 err = -EIO;
222 break;
223 }
224 engine_heartbeat_disable(engine, &saved);
225
226 for (n = 0; n < ARRAY_SIZE(ce); n++) {
227 struct intel_context *tmp;
228
229 tmp = intel_context_create(engine);
230 if (IS_ERR(tmp)) {
231 err = PTR_ERR(tmp);
232 goto err_ce;
233 }
234
235 err = intel_context_pin(tmp);
236 if (err) {
237 intel_context_put(tmp);
238 goto err_ce;
239 }
240
241 /*
242 * Setup the pair of contexts such that if we
243 * lite-restore using the RING_TAIL from ce[1] it
244 * will execute garbage from ce[0]->ring.
245 */
246 memset(tmp->ring->vaddr,
247 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
248 tmp->ring->vma->size);
249
250 ce[n] = tmp;
251 }
252 GEM_BUG_ON(!ce[1]->ring->size);
253 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
254 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
255
256 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
257 if (IS_ERR(rq[0])) {
258 err = PTR_ERR(rq[0]);
259 goto err_ce;
260 }
261
262 i915_request_get(rq[0]);
263 i915_request_add(rq[0]);
264 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
265
266 if (!igt_wait_for_spinner(&spin, rq[0])) {
267 i915_request_put(rq[0]);
268 goto err_ce;
269 }
270
271 rq[1] = i915_request_create(ce[1]);
272 if (IS_ERR(rq[1])) {
273 err = PTR_ERR(rq[1]);
274 i915_request_put(rq[0]);
275 goto err_ce;
276 }
277
278 if (!prio) {
279 /*
280 * Ensure we do the switch to ce[1] on completion.
281 *
282 * rq[0] is already submitted, so this should reduce
283 * to a no-op (a wait on a request on the same engine
284 * uses the submit fence, not the completion fence),
285 * but it will install a dependency on rq[1] for rq[0]
286 * that will prevent the pair being reordered by
287 * timeslicing.
288 */
289 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
290 }
291
292 i915_request_get(rq[1]);
293 i915_request_add(rq[1]);
294 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
295 i915_request_put(rq[0]);
296
297 if (prio) {
298 struct i915_sched_attr attr = {
299 .priority = prio,
300 };
301
302 /* Alternatively preempt the spinner with ce[1] */
303 engine->schedule(rq[1], &attr);
304 }
305
306 /* And switch back to ce[0] for good measure */
307 rq[0] = i915_request_create(ce[0]);
308 if (IS_ERR(rq[0])) {
309 err = PTR_ERR(rq[0]);
310 i915_request_put(rq[1]);
311 goto err_ce;
312 }
313
314 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
315 i915_request_get(rq[0]);
316 i915_request_add(rq[0]);
317 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
318 i915_request_put(rq[1]);
319 i915_request_put(rq[0]);
320
321 err_ce:
322 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
323 igt_spinner_end(&spin);
324 for (n = 0; n < ARRAY_SIZE(ce); n++) {
325 if (IS_ERR_OR_NULL(ce[n]))
326 break;
327
328 intel_context_unpin(ce[n]);
329 intel_context_put(ce[n]);
330 }
331
332 engine_heartbeat_enable(engine, saved);
333 if (igt_live_test_end(&t))
334 err = -EIO;
335 if (err)
336 break;
337 }
338
339 igt_spinner_fini(&spin);
340 return err;
341 }
342
343 static int live_unlite_switch(void *arg)
344 {
345 return live_unlite_restore(arg, 0);
346 }
347
348 static int live_unlite_preempt(void *arg)
349 {
350 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
351 }
352
353 static int live_pin_rewind(void *arg)
354 {
355 struct intel_gt *gt = arg;
356 struct intel_engine_cs *engine;
357 enum intel_engine_id id;
358 int err = 0;
359
360 /*
361 * We have to be careful not to trust intel_ring too much, for example
362 * ring->head is updated upon retire which is out of sync with pinning
363 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
364 * or else we risk writing an older, stale value.
365 *
366 * To simulate this, let's apply a bit of deliberate sabotague.
367 */
368
369 for_each_engine(engine, gt, id) {
370 struct intel_context *ce;
371 struct i915_request *rq;
372 struct intel_ring *ring;
373 struct igt_live_test t;
374
375 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
376 err = -EIO;
377 break;
378 }
379
380 ce = intel_context_create(engine);
381 if (IS_ERR(ce)) {
382 err = PTR_ERR(ce);
383 break;
384 }
385
386 err = intel_context_pin(ce);
387 if (err) {
388 intel_context_put(ce);
389 break;
390 }
391
392 /* Keep the context awake while we play games */
393 err = i915_active_acquire(&ce->active);
394 if (err) {
395 intel_context_unpin(ce);
396 intel_context_put(ce);
397 break;
398 }
399 ring = ce->ring;
400
401 /* Poison the ring, and offset the next request from HEAD */
402 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
403 ring->emit = ring->size / 2;
404 ring->tail = ring->emit;
405 GEM_BUG_ON(ring->head);
406
407 intel_context_unpin(ce);
408
409 /* Submit a simple nop request */
410 GEM_BUG_ON(intel_context_is_pinned(ce));
411 rq = intel_context_create_request(ce);
412 i915_active_release(&ce->active); /* e.g. async retire */
413 intel_context_put(ce);
414 if (IS_ERR(rq)) {
415 err = PTR_ERR(rq);
416 break;
417 }
418 GEM_BUG_ON(!rq->head);
419 i915_request_add(rq);
420
421 /* Expect not to hang! */
422 if (igt_live_test_end(&t)) {
423 err = -EIO;
424 break;
425 }
426 }
427
428 return err;
429 }
430
431 static int live_hold_reset(void *arg)
432 {
433 struct intel_gt *gt = arg;
434 struct intel_engine_cs *engine;
435 enum intel_engine_id id;
436 struct igt_spinner spin;
437 int err = 0;
438
439 /*
440 * In order to support offline error capture for fast preempt reset,
441 * we need to decouple the guilty request and ensure that it and its
442 * descendents are not executed while the capture is in progress.
443 */
444
445 if (!intel_has_reset_engine(gt))
446 return 0;
447
448 if (igt_spinner_init(&spin, gt))
449 return -ENOMEM;
450
451 for_each_engine(engine, gt, id) {
452 struct intel_context *ce;
453 unsigned long heartbeat;
454 struct i915_request *rq;
455
456 ce = intel_context_create(engine);
457 if (IS_ERR(ce)) {
458 err = PTR_ERR(ce);
459 break;
460 }
461
462 engine_heartbeat_disable(engine, &heartbeat);
463
464 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
465 if (IS_ERR(rq)) {
466 err = PTR_ERR(rq);
467 goto out;
468 }
469 i915_request_add(rq);
470
471 if (!igt_wait_for_spinner(&spin, rq)) {
472 intel_gt_set_wedged(gt);
473 err = -ETIME;
474 goto out;
475 }
476
477 /* We have our request executing, now remove it and reset */
478
479 if (test_and_set_bit(I915_RESET_ENGINE + id,
480 &gt->reset.flags)) {
481 intel_gt_set_wedged(gt);
482 err = -EBUSY;
483 goto out;
484 }
485 tasklet_disable(&engine->execlists.tasklet);
486
487 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
488 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
489
490 i915_request_get(rq);
491 execlists_hold(engine, rq);
492 GEM_BUG_ON(!i915_request_on_hold(rq));
493
494 intel_engine_reset(engine, NULL);
495 GEM_BUG_ON(rq->fence.error != -EIO);
496
497 tasklet_enable(&engine->execlists.tasklet);
498 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
499 &gt->reset.flags);
500
501 /* Check that we do not resubmit the held request */
502 if (!i915_request_wait(rq, 0, HZ / 5)) {
503 pr_err("%s: on hold request completed!\n",
504 engine->name);
505 i915_request_put(rq);
506 err = -EIO;
507 goto out;
508 }
509 GEM_BUG_ON(!i915_request_on_hold(rq));
510
511 /* But is resubmitted on release */
512 execlists_unhold(engine, rq);
513 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
514 pr_err("%s: held request did not complete!\n",
515 engine->name);
516 intel_gt_set_wedged(gt);
517 err = -ETIME;
518 }
519 i915_request_put(rq);
520
521 out:
522 engine_heartbeat_enable(engine, heartbeat);
523 intel_context_put(ce);
524 if (err)
525 break;
526 }
527
528 igt_spinner_fini(&spin);
529 return err;
530 }
531
532 static const char *error_repr(int err)
533 {
534 return err ? "bad" : "good";
535 }
536
537 static int live_error_interrupt(void *arg)
538 {
539 static const struct error_phase {
540 enum { GOOD = 0, BAD = -EIO } error[2];
541 } phases[] = {
542 { { BAD, GOOD } },
543 { { BAD, BAD } },
544 { { BAD, GOOD } },
545 { { GOOD, GOOD } }, /* sentinel */
546 };
547 struct intel_gt *gt = arg;
548 struct intel_engine_cs *engine;
549 enum intel_engine_id id;
550
551 /*
552 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
553 * of invalid commands in user batches that will cause a GPU hang.
554 * This is a faster mechanism than using hangcheck/heartbeats, but
555 * only detects problems the HW knows about -- it will not warn when
556 * we kill the HW!
557 *
558 * To verify our detection and reset, we throw some invalid commands
559 * at the HW and wait for the interrupt.
560 */
561
562 if (!intel_has_reset_engine(gt))
563 return 0;
564
565 for_each_engine(engine, gt, id) {
566 const struct error_phase *p;
567 unsigned long heartbeat;
568 int err = 0;
569
570 engine_heartbeat_disable(engine, &heartbeat);
571
572 for (p = phases; p->error[0] != GOOD; p++) {
573 struct i915_request *client[ARRAY_SIZE(phases->error)];
574 u32 *cs;
575 int i;
576
577 memset(client, 0, sizeof(*client));
578 for (i = 0; i < ARRAY_SIZE(client); i++) {
579 struct intel_context *ce;
580 struct i915_request *rq;
581
582 ce = intel_context_create(engine);
583 if (IS_ERR(ce)) {
584 err = PTR_ERR(ce);
585 goto out;
586 }
587
588 rq = intel_context_create_request(ce);
589 intel_context_put(ce);
590 if (IS_ERR(rq)) {
591 err = PTR_ERR(rq);
592 goto out;
593 }
594
595 if (rq->engine->emit_init_breadcrumb) {
596 err = rq->engine->emit_init_breadcrumb(rq);
597 if (err) {
598 i915_request_add(rq);
599 goto out;
600 }
601 }
602
603 cs = intel_ring_begin(rq, 2);
604 if (IS_ERR(cs)) {
605 i915_request_add(rq);
606 err = PTR_ERR(cs);
607 goto out;
608 }
609
610 if (p->error[i]) {
611 *cs++ = 0xdeadbeef;
612 *cs++ = 0xdeadbeef;
613 } else {
614 *cs++ = MI_NOOP;
615 *cs++ = MI_NOOP;
616 }
617
618 client[i] = i915_request_get(rq);
619 i915_request_add(rq);
620 }
621
622 err = wait_for_submit(engine, client[0], HZ / 2);
623 if (err) {
624 pr_err("%s: first request did not start within time!\n",
625 engine->name);
626 err = -ETIME;
627 goto out;
628 }
629
630 for (i = 0; i < ARRAY_SIZE(client); i++) {
631 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
632 pr_debug("%s: %s request incomplete!\n",
633 engine->name,
634 error_repr(p->error[i]));
635
636 if (!i915_request_started(client[i])) {
637 pr_debug("%s: %s request not stated!\n",
638 engine->name,
639 error_repr(p->error[i]));
640 err = -ETIME;
641 goto out;
642 }
643
644 /* Kick the tasklet to process the error */
645 intel_engine_flush_submission(engine);
646 if (client[i]->fence.error != p->error[i]) {
647 pr_err("%s: %s request completed with wrong error code: %d\n",
648 engine->name,
649 error_repr(p->error[i]),
650 client[i]->fence.error);
651 err = -EINVAL;
652 goto out;
653 }
654 }
655
656 out:
657 for (i = 0; i < ARRAY_SIZE(client); i++)
658 if (client[i])
659 i915_request_put(client[i]);
660 if (err) {
661 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
662 engine->name, p - phases,
663 p->error[0], p->error[1]);
664 break;
665 }
666 }
667
668 engine_heartbeat_enable(engine, heartbeat);
669 if (err) {
670 intel_gt_set_wedged(gt);
671 return err;
672 }
673 }
674
675 return 0;
676 }
677
678 static int
679 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
680 {
681 u32 *cs;
682
683 cs = intel_ring_begin(rq, 10);
684 if (IS_ERR(cs))
685 return PTR_ERR(cs);
686
687 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
688
689 *cs++ = MI_SEMAPHORE_WAIT |
690 MI_SEMAPHORE_GLOBAL_GTT |
691 MI_SEMAPHORE_POLL |
692 MI_SEMAPHORE_SAD_NEQ_SDD;
693 *cs++ = 0;
694 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
695 *cs++ = 0;
696
697 if (idx > 0) {
698 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
699 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
700 *cs++ = 0;
701 *cs++ = 1;
702 } else {
703 *cs++ = MI_NOOP;
704 *cs++ = MI_NOOP;
705 *cs++ = MI_NOOP;
706 *cs++ = MI_NOOP;
707 }
708
709 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
710
711 intel_ring_advance(rq, cs);
712 return 0;
713 }
714
715 static struct i915_request *
716 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
717 {
718 struct intel_context *ce;
719 struct i915_request *rq;
720 int err;
721
722 ce = intel_context_create(engine);
723 if (IS_ERR(ce))
724 return ERR_CAST(ce);
725
726 rq = intel_context_create_request(ce);
727 if (IS_ERR(rq))
728 goto out_ce;
729
730 err = 0;
731 if (rq->engine->emit_init_breadcrumb)
732 err = rq->engine->emit_init_breadcrumb(rq);
733 if (err == 0)
734 err = emit_semaphore_chain(rq, vma, idx);
735 if (err == 0)
736 i915_request_get(rq);
737 i915_request_add(rq);
738 if (err)
739 rq = ERR_PTR(err);
740
741 out_ce:
742 intel_context_put(ce);
743 return rq;
744 }
745
746 static int
747 release_queue(struct intel_engine_cs *engine,
748 struct i915_vma *vma,
749 int idx, int prio)
750 {
751 struct i915_sched_attr attr = {
752 .priority = prio,
753 };
754 struct i915_request *rq;
755 u32 *cs;
756
757 rq = intel_engine_create_kernel_request(engine);
758 if (IS_ERR(rq))
759 return PTR_ERR(rq);
760
761 cs = intel_ring_begin(rq, 4);
762 if (IS_ERR(cs)) {
763 i915_request_add(rq);
764 return PTR_ERR(cs);
765 }
766
767 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
768 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
769 *cs++ = 0;
770 *cs++ = 1;
771
772 intel_ring_advance(rq, cs);
773
774 i915_request_get(rq);
775 i915_request_add(rq);
776
777 local_bh_disable();
778 engine->schedule(rq, &attr);
779 local_bh_enable(); /* kick tasklet */
780
781 i915_request_put(rq);
782
783 return 0;
784 }
785
786 static int
787 slice_semaphore_queue(struct intel_engine_cs *outer,
788 struct i915_vma *vma,
789 int count)
790 {
791 struct intel_engine_cs *engine;
792 struct i915_request *head;
793 enum intel_engine_id id;
794 int err, i, n = 0;
795
796 head = semaphore_queue(outer, vma, n++);
797 if (IS_ERR(head))
798 return PTR_ERR(head);
799
800 for_each_engine(engine, outer->gt, id) {
801 for (i = 0; i < count; i++) {
802 struct i915_request *rq;
803
804 rq = semaphore_queue(engine, vma, n++);
805 if (IS_ERR(rq)) {
806 err = PTR_ERR(rq);
807 goto out;
808 }
809
810 i915_request_put(rq);
811 }
812 }
813
814 err = release_queue(outer, vma, n, INT_MAX);
815 if (err)
816 goto out;
817
818 if (i915_request_wait(head, 0,
819 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
820 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
821 count, n);
822 GEM_TRACE_DUMP();
823 intel_gt_set_wedged(outer->gt);
824 err = -EIO;
825 }
826
827 out:
828 i915_request_put(head);
829 return err;
830 }
831
832 static int live_timeslice_preempt(void *arg)
833 {
834 struct intel_gt *gt = arg;
835 struct drm_i915_gem_object *obj;
836 struct i915_vma *vma;
837 void *vaddr;
838 int err = 0;
839 int count;
840
841 /*
842 * If a request takes too long, we would like to give other users
843 * a fair go on the GPU. In particular, users may create batches
844 * that wait upon external input, where that input may even be
845 * supplied by another GPU job. To avoid blocking forever, we
846 * need to preempt the current task and replace it with another
847 * ready task.
848 */
849 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
850 return 0;
851
852 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
853 if (IS_ERR(obj))
854 return PTR_ERR(obj);
855
856 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
857 if (IS_ERR(vma)) {
858 err = PTR_ERR(vma);
859 goto err_obj;
860 }
861
862 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
863 if (IS_ERR(vaddr)) {
864 err = PTR_ERR(vaddr);
865 goto err_obj;
866 }
867
868 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
869 if (err)
870 goto err_map;
871
872 err = i915_vma_sync(vma);
873 if (err)
874 goto err_pin;
875
876 for_each_prime_number_from(count, 1, 16) {
877 struct intel_engine_cs *engine;
878 enum intel_engine_id id;
879
880 for_each_engine(engine, gt, id) {
881 unsigned long saved;
882
883 if (!intel_engine_has_preemption(engine))
884 continue;
885
886 memset(vaddr, 0, PAGE_SIZE);
887
888 engine_heartbeat_disable(engine, &saved);
889 err = slice_semaphore_queue(engine, vma, count);
890 engine_heartbeat_enable(engine, saved);
891 if (err)
892 goto err_pin;
893
894 if (igt_flush_test(gt->i915)) {
895 err = -EIO;
896 goto err_pin;
897 }
898 }
899 }
900
901 err_pin:
902 i915_vma_unpin(vma);
903 err_map:
904 i915_gem_object_unpin_map(obj);
905 err_obj:
906 i915_gem_object_put(obj);
907 return err;
908 }
909
910 static struct i915_request *
911 create_rewinder(struct intel_context *ce,
912 struct i915_request *wait,
913 void *slot, int idx)
914 {
915 const u32 offset =
916 i915_ggtt_offset(ce->engine->status_page.vma) +
917 offset_in_page(slot);
918 struct i915_request *rq;
919 u32 *cs;
920 int err;
921
922 rq = intel_context_create_request(ce);
923 if (IS_ERR(rq))
924 return rq;
925
926 if (wait) {
927 err = i915_request_await_dma_fence(rq, &wait->fence);
928 if (err)
929 goto err;
930 }
931
932 cs = intel_ring_begin(rq, 14);
933 if (IS_ERR(cs)) {
934 err = PTR_ERR(cs);
935 goto err;
936 }
937
938 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
939 *cs++ = MI_NOOP;
940
941 *cs++ = MI_SEMAPHORE_WAIT |
942 MI_SEMAPHORE_GLOBAL_GTT |
943 MI_SEMAPHORE_POLL |
944 MI_SEMAPHORE_SAD_GTE_SDD;
945 *cs++ = idx;
946 *cs++ = offset;
947 *cs++ = 0;
948
949 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
950 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
951 *cs++ = offset + idx * sizeof(u32);
952 *cs++ = 0;
953
954 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
955 *cs++ = offset;
956 *cs++ = 0;
957 *cs++ = idx + 1;
958
959 intel_ring_advance(rq, cs);
960
961 rq->sched.attr.priority = I915_PRIORITY_MASK;
962 err = 0;
963 err:
964 i915_request_get(rq);
965 i915_request_add(rq);
966 if (err) {
967 i915_request_put(rq);
968 return ERR_PTR(err);
969 }
970
971 return rq;
972 }
973
974 static int live_timeslice_rewind(void *arg)
975 {
976 struct intel_gt *gt = arg;
977 struct intel_engine_cs *engine;
978 enum intel_engine_id id;
979
980 /*
981 * The usual presumption on timeslice expiration is that we replace
982 * the active context with another. However, given a chain of
983 * dependencies we may end up with replacing the context with itself,
984 * but only a few of those requests, forcing us to rewind the
985 * RING_TAIL of the original request.
986 */
987 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
988 return 0;
989
990 for_each_engine(engine, gt, id) {
991 enum { A1, A2, B1 };
992 enum { X = 1, Z, Y };
993 struct i915_request *rq[3] = {};
994 struct intel_context *ce;
995 unsigned long heartbeat;
996 unsigned long timeslice;
997 int i, err = 0;
998 u32 *slot;
999
1000 if (!intel_engine_has_timeslices(engine))
1001 continue;
1002
1003 /*
1004 * A:rq1 -- semaphore wait, timestamp X
1005 * A:rq2 -- write timestamp Y
1006 *
1007 * B:rq1 [await A:rq1] -- write timestamp Z
1008 *
1009 * Force timeslice, release semaphore.
1010 *
1011 * Expect execution/evaluation order XZY
1012 */
1013
1014 engine_heartbeat_disable(engine, &heartbeat);
1015 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1016
1017 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1018
1019 ce = intel_context_create(engine);
1020 if (IS_ERR(ce)) {
1021 err = PTR_ERR(ce);
1022 goto err;
1023 }
1024
1025 rq[0] = create_rewinder(ce, NULL, slot, X);
1026 if (IS_ERR(rq[0])) {
1027 intel_context_put(ce);
1028 goto err;
1029 }
1030
1031 rq[1] = create_rewinder(ce, NULL, slot, Y);
1032 intel_context_put(ce);
1033 if (IS_ERR(rq[1]))
1034 goto err;
1035
1036 err = wait_for_submit(engine, rq[1], HZ / 2);
1037 if (err) {
1038 pr_err("%s: failed to submit first context\n",
1039 engine->name);
1040 goto err;
1041 }
1042
1043 ce = intel_context_create(engine);
1044 if (IS_ERR(ce)) {
1045 err = PTR_ERR(ce);
1046 goto err;
1047 }
1048
1049 rq[2] = create_rewinder(ce, rq[0], slot, Z);
1050 intel_context_put(ce);
1051 if (IS_ERR(rq[2]))
1052 goto err;
1053
1054 err = wait_for_submit(engine, rq[2], HZ / 2);
1055 if (err) {
1056 pr_err("%s: failed to submit second context\n",
1057 engine->name);
1058 goto err;
1059 }
1060 GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
1061
1062 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1063 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1064 /* Wait for the timeslice to kick in */
1065 del_timer(&engine->execlists.timer);
1066 tasklet_hi_schedule(&engine->execlists.tasklet);
1067 intel_engine_flush_submission(engine);
1068 }
1069 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1070 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1071 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1072 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1073
1074 /* Release the hounds! */
1075 slot[0] = 1;
1076 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1077
1078 for (i = 1; i <= 3; i++) {
1079 unsigned long timeout = jiffies + HZ / 2;
1080
1081 while (!READ_ONCE(slot[i]) &&
1082 time_before(jiffies, timeout))
1083 ;
1084
1085 if (!time_before(jiffies, timeout)) {
1086 pr_err("%s: rq[%d] timed out\n",
1087 engine->name, i - 1);
1088 err = -ETIME;
1089 goto err;
1090 }
1091
1092 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1093 }
1094
1095 /* XZY: XZ < XY */
1096 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1097 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1098 engine->name,
1099 slot[Z] - slot[X],
1100 slot[Y] - slot[X]);
1101 err = -EINVAL;
1102 }
1103
1104 err:
1105 memset32(&slot[0], -1, 4);
1106 wmb();
1107
1108 engine->props.timeslice_duration_ms = timeslice;
1109 engine_heartbeat_enable(engine, heartbeat);
1110 for (i = 0; i < 3; i++)
1111 i915_request_put(rq[i]);
1112 if (igt_flush_test(gt->i915))
1113 err = -EIO;
1114 if (err)
1115 return err;
1116 }
1117
1118 return 0;
1119 }
1120
1121 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1122 {
1123 struct i915_request *rq;
1124
1125 rq = intel_engine_create_kernel_request(engine);
1126 if (IS_ERR(rq))
1127 return rq;
1128
1129 i915_request_get(rq);
1130 i915_request_add(rq);
1131
1132 return rq;
1133 }
1134
1135 static long timeslice_threshold(const struct intel_engine_cs *engine)
1136 {
1137 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
1138 }
1139
1140 static int live_timeslice_queue(void *arg)
1141 {
1142 struct intel_gt *gt = arg;
1143 struct drm_i915_gem_object *obj;
1144 struct intel_engine_cs *engine;
1145 enum intel_engine_id id;
1146 struct i915_vma *vma;
1147 void *vaddr;
1148 int err = 0;
1149
1150 /*
1151 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1152 * timeslicing between them disabled, we *do* enable timeslicing
1153 * if the queue demands it. (Normally, we do not submit if
1154 * ELSP[1] is already occupied, so must rely on timeslicing to
1155 * eject ELSP[0] in favour of the queue.)
1156 */
1157 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1158 return 0;
1159
1160 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1161 if (IS_ERR(obj))
1162 return PTR_ERR(obj);
1163
1164 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1165 if (IS_ERR(vma)) {
1166 err = PTR_ERR(vma);
1167 goto err_obj;
1168 }
1169
1170 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1171 if (IS_ERR(vaddr)) {
1172 err = PTR_ERR(vaddr);
1173 goto err_obj;
1174 }
1175
1176 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1177 if (err)
1178 goto err_map;
1179
1180 err = i915_vma_sync(vma);
1181 if (err)
1182 goto err_pin;
1183
1184 for_each_engine(engine, gt, id) {
1185 struct i915_sched_attr attr = {
1186 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1187 };
1188 struct i915_request *rq, *nop;
1189 unsigned long saved;
1190
1191 if (!intel_engine_has_preemption(engine))
1192 continue;
1193
1194 engine_heartbeat_disable(engine, &saved);
1195 memset(vaddr, 0, PAGE_SIZE);
1196
1197 /* ELSP[0]: semaphore wait */
1198 rq = semaphore_queue(engine, vma, 0);
1199 if (IS_ERR(rq)) {
1200 err = PTR_ERR(rq);
1201 goto err_heartbeat;
1202 }
1203 engine->schedule(rq, &attr);
1204 err = wait_for_submit(engine, rq, HZ / 2);
1205 if (err) {
1206 pr_err("%s: Timed out trying to submit semaphores\n",
1207 engine->name);
1208 goto err_rq;
1209 }
1210
1211 /* ELSP[1]: nop request */
1212 nop = nop_request(engine);
1213 if (IS_ERR(nop)) {
1214 err = PTR_ERR(nop);
1215 goto err_rq;
1216 }
1217 err = wait_for_submit(engine, nop, HZ / 2);
1218 i915_request_put(nop);
1219 if (err) {
1220 pr_err("%s: Timed out trying to submit nop\n",
1221 engine->name);
1222 goto err_rq;
1223 }
1224
1225 GEM_BUG_ON(i915_request_completed(rq));
1226 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1227
1228 /* Queue: semaphore signal, matching priority as semaphore */
1229 err = release_queue(engine, vma, 1, effective_prio(rq));
1230 if (err)
1231 goto err_rq;
1232
1233 intel_engine_flush_submission(engine);
1234 if (!READ_ONCE(engine->execlists.timer.expires) &&
1235 !i915_request_completed(rq)) {
1236 struct drm_printer p =
1237 drm_info_printer(gt->i915->drm.dev);
1238
1239 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1240 engine->name);
1241 intel_engine_dump(engine, &p,
1242 "%s\n", engine->name);
1243 GEM_TRACE_DUMP();
1244
1245 memset(vaddr, 0xff, PAGE_SIZE);
1246 err = -EINVAL;
1247 }
1248
1249 /* Timeslice every jiffy, so within 2 we should signal */
1250 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
1251 struct drm_printer p =
1252 drm_info_printer(gt->i915->drm.dev);
1253
1254 pr_err("%s: Failed to timeslice into queue\n",
1255 engine->name);
1256 intel_engine_dump(engine, &p,
1257 "%s\n", engine->name);
1258
1259 memset(vaddr, 0xff, PAGE_SIZE);
1260 err = -EIO;
1261 }
1262 err_rq:
1263 i915_request_put(rq);
1264 err_heartbeat:
1265 engine_heartbeat_enable(engine, saved);
1266 if (err)
1267 break;
1268 }
1269
1270 err_pin:
1271 i915_vma_unpin(vma);
1272 err_map:
1273 i915_gem_object_unpin_map(obj);
1274 err_obj:
1275 i915_gem_object_put(obj);
1276 return err;
1277 }
1278
1279 static int live_busywait_preempt(void *arg)
1280 {
1281 struct intel_gt *gt = arg;
1282 struct i915_gem_context *ctx_hi, *ctx_lo;
1283 struct intel_engine_cs *engine;
1284 struct drm_i915_gem_object *obj;
1285 struct i915_vma *vma;
1286 enum intel_engine_id id;
1287 int err = -ENOMEM;
1288 u32 *map;
1289
1290 /*
1291 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1292 * preempt the busywaits used to synchronise between rings.
1293 */
1294
1295 ctx_hi = kernel_context(gt->i915);
1296 if (!ctx_hi)
1297 return -ENOMEM;
1298 ctx_hi->sched.priority =
1299 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1300
1301 ctx_lo = kernel_context(gt->i915);
1302 if (!ctx_lo)
1303 goto err_ctx_hi;
1304 ctx_lo->sched.priority =
1305 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1306
1307 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1308 if (IS_ERR(obj)) {
1309 err = PTR_ERR(obj);
1310 goto err_ctx_lo;
1311 }
1312
1313 map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1314 if (IS_ERR(map)) {
1315 err = PTR_ERR(map);
1316 goto err_obj;
1317 }
1318
1319 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1320 if (IS_ERR(vma)) {
1321 err = PTR_ERR(vma);
1322 goto err_map;
1323 }
1324
1325 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1326 if (err)
1327 goto err_map;
1328
1329 err = i915_vma_sync(vma);
1330 if (err)
1331 goto err_vma;
1332
1333 for_each_engine(engine, gt, id) {
1334 struct i915_request *lo, *hi;
1335 struct igt_live_test t;
1336 u32 *cs;
1337
1338 if (!intel_engine_has_preemption(engine))
1339 continue;
1340
1341 if (!intel_engine_can_store_dword(engine))
1342 continue;
1343
1344 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1345 err = -EIO;
1346 goto err_vma;
1347 }
1348
1349 /*
1350 * We create two requests. The low priority request
1351 * busywaits on a semaphore (inside the ringbuffer where
1352 * is should be preemptible) and the high priority requests
1353 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1354 * allowing the first request to complete. If preemption
1355 * fails, we hang instead.
1356 */
1357
1358 lo = igt_request_alloc(ctx_lo, engine);
1359 if (IS_ERR(lo)) {
1360 err = PTR_ERR(lo);
1361 goto err_vma;
1362 }
1363
1364 cs = intel_ring_begin(lo, 8);
1365 if (IS_ERR(cs)) {
1366 err = PTR_ERR(cs);
1367 i915_request_add(lo);
1368 goto err_vma;
1369 }
1370
1371 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1372 *cs++ = i915_ggtt_offset(vma);
1373 *cs++ = 0;
1374 *cs++ = 1;
1375
1376 /* XXX Do we need a flush + invalidate here? */
1377
1378 *cs++ = MI_SEMAPHORE_WAIT |
1379 MI_SEMAPHORE_GLOBAL_GTT |
1380 MI_SEMAPHORE_POLL |
1381 MI_SEMAPHORE_SAD_EQ_SDD;
1382 *cs++ = 0;
1383 *cs++ = i915_ggtt_offset(vma);
1384 *cs++ = 0;
1385
1386 intel_ring_advance(lo, cs);
1387
1388 i915_request_get(lo);
1389 i915_request_add(lo);
1390
1391 if (wait_for(READ_ONCE(*map), 10)) {
1392 i915_request_put(lo);
1393 err = -ETIMEDOUT;
1394 goto err_vma;
1395 }
1396
1397 /* Low priority request should be busywaiting now */
1398 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1399 i915_request_put(lo);
1400 pr_err("%s: Busywaiting request did not!\n",
1401 engine->name);
1402 err = -EIO;
1403 goto err_vma;
1404 }
1405
1406 hi = igt_request_alloc(ctx_hi, engine);
1407 if (IS_ERR(hi)) {
1408 err = PTR_ERR(hi);
1409 i915_request_put(lo);
1410 goto err_vma;
1411 }
1412
1413 cs = intel_ring_begin(hi, 4);
1414 if (IS_ERR(cs)) {
1415 err = PTR_ERR(cs);
1416 i915_request_add(hi);
1417 i915_request_put(lo);
1418 goto err_vma;
1419 }
1420
1421 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1422 *cs++ = i915_ggtt_offset(vma);
1423 *cs++ = 0;
1424 *cs++ = 0;
1425
1426 intel_ring_advance(hi, cs);
1427 i915_request_add(hi);
1428
1429 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1430 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1431
1432 pr_err("%s: Failed to preempt semaphore busywait!\n",
1433 engine->name);
1434
1435 intel_engine_dump(engine, &p, "%s\n", engine->name);
1436 GEM_TRACE_DUMP();
1437
1438 i915_request_put(lo);
1439 intel_gt_set_wedged(gt);
1440 err = -EIO;
1441 goto err_vma;
1442 }
1443 GEM_BUG_ON(READ_ONCE(*map));
1444 i915_request_put(lo);
1445
1446 if (igt_live_test_end(&t)) {
1447 err = -EIO;
1448 goto err_vma;
1449 }
1450 }
1451
1452 err = 0;
1453 err_vma:
1454 i915_vma_unpin(vma);
1455 err_map:
1456 i915_gem_object_unpin_map(obj);
1457 err_obj:
1458 i915_gem_object_put(obj);
1459 err_ctx_lo:
1460 kernel_context_close(ctx_lo);
1461 err_ctx_hi:
1462 kernel_context_close(ctx_hi);
1463 return err;
1464 }
1465
1466 static struct i915_request *
1467 spinner_create_request(struct igt_spinner *spin,
1468 struct i915_gem_context *ctx,
1469 struct intel_engine_cs *engine,
1470 u32 arb)
1471 {
1472 struct intel_context *ce;
1473 struct i915_request *rq;
1474
1475 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1476 if (IS_ERR(ce))
1477 return ERR_CAST(ce);
1478
1479 rq = igt_spinner_create_request(spin, ce, arb);
1480 intel_context_put(ce);
1481 return rq;
1482 }
1483
1484 static int live_preempt(void *arg)
1485 {
1486 struct intel_gt *gt = arg;
1487 struct i915_gem_context *ctx_hi, *ctx_lo;
1488 struct igt_spinner spin_hi, spin_lo;
1489 struct intel_engine_cs *engine;
1490 enum intel_engine_id id;
1491 int err = -ENOMEM;
1492
1493 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1494 return 0;
1495
1496 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1497 pr_err("Logical preemption supported, but not exposed\n");
1498
1499 if (igt_spinner_init(&spin_hi, gt))
1500 return -ENOMEM;
1501
1502 if (igt_spinner_init(&spin_lo, gt))
1503 goto err_spin_hi;
1504
1505 ctx_hi = kernel_context(gt->i915);
1506 if (!ctx_hi)
1507 goto err_spin_lo;
1508 ctx_hi->sched.priority =
1509 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1510
1511 ctx_lo = kernel_context(gt->i915);
1512 if (!ctx_lo)
1513 goto err_ctx_hi;
1514 ctx_lo->sched.priority =
1515 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1516
1517 for_each_engine(engine, gt, id) {
1518 struct igt_live_test t;
1519 struct i915_request *rq;
1520
1521 if (!intel_engine_has_preemption(engine))
1522 continue;
1523
1524 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1525 err = -EIO;
1526 goto err_ctx_lo;
1527 }
1528
1529 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1530 MI_ARB_CHECK);
1531 if (IS_ERR(rq)) {
1532 err = PTR_ERR(rq);
1533 goto err_ctx_lo;
1534 }
1535
1536 i915_request_add(rq);
1537 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1538 GEM_TRACE("lo spinner failed to start\n");
1539 GEM_TRACE_DUMP();
1540 intel_gt_set_wedged(gt);
1541 err = -EIO;
1542 goto err_ctx_lo;
1543 }
1544
1545 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1546 MI_ARB_CHECK);
1547 if (IS_ERR(rq)) {
1548 igt_spinner_end(&spin_lo);
1549 err = PTR_ERR(rq);
1550 goto err_ctx_lo;
1551 }
1552
1553 i915_request_add(rq);
1554 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1555 GEM_TRACE("hi spinner failed to start\n");
1556 GEM_TRACE_DUMP();
1557 intel_gt_set_wedged(gt);
1558 err = -EIO;
1559 goto err_ctx_lo;
1560 }
1561
1562 igt_spinner_end(&spin_hi);
1563 igt_spinner_end(&spin_lo);
1564
1565 if (igt_live_test_end(&t)) {
1566 err = -EIO;
1567 goto err_ctx_lo;
1568 }
1569 }
1570
1571 err = 0;
1572 err_ctx_lo:
1573 kernel_context_close(ctx_lo);
1574 err_ctx_hi:
1575 kernel_context_close(ctx_hi);
1576 err_spin_lo:
1577 igt_spinner_fini(&spin_lo);
1578 err_spin_hi:
1579 igt_spinner_fini(&spin_hi);
1580 return err;
1581 }
1582
1583 static int live_late_preempt(void *arg)
1584 {
1585 struct intel_gt *gt = arg;
1586 struct i915_gem_context *ctx_hi, *ctx_lo;
1587 struct igt_spinner spin_hi, spin_lo;
1588 struct intel_engine_cs *engine;
1589 struct i915_sched_attr attr = {};
1590 enum intel_engine_id id;
1591 int err = -ENOMEM;
1592
1593 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1594 return 0;
1595
1596 if (igt_spinner_init(&spin_hi, gt))
1597 return -ENOMEM;
1598
1599 if (igt_spinner_init(&spin_lo, gt))
1600 goto err_spin_hi;
1601
1602 ctx_hi = kernel_context(gt->i915);
1603 if (!ctx_hi)
1604 goto err_spin_lo;
1605
1606 ctx_lo = kernel_context(gt->i915);
1607 if (!ctx_lo)
1608 goto err_ctx_hi;
1609
1610 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1611 ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1612
1613 for_each_engine(engine, gt, id) {
1614 struct igt_live_test t;
1615 struct i915_request *rq;
1616
1617 if (!intel_engine_has_preemption(engine))
1618 continue;
1619
1620 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1621 err = -EIO;
1622 goto err_ctx_lo;
1623 }
1624
1625 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1626 MI_ARB_CHECK);
1627 if (IS_ERR(rq)) {
1628 err = PTR_ERR(rq);
1629 goto err_ctx_lo;
1630 }
1631
1632 i915_request_add(rq);
1633 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1634 pr_err("First context failed to start\n");
1635 goto err_wedged;
1636 }
1637
1638 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1639 MI_NOOP);
1640 if (IS_ERR(rq)) {
1641 igt_spinner_end(&spin_lo);
1642 err = PTR_ERR(rq);
1643 goto err_ctx_lo;
1644 }
1645
1646 i915_request_add(rq);
1647 if (igt_wait_for_spinner(&spin_hi, rq)) {
1648 pr_err("Second context overtook first?\n");
1649 goto err_wedged;
1650 }
1651
1652 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1653 engine->schedule(rq, &attr);
1654
1655 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1656 pr_err("High priority context failed to preempt the low priority context\n");
1657 GEM_TRACE_DUMP();
1658 goto err_wedged;
1659 }
1660
1661 igt_spinner_end(&spin_hi);
1662 igt_spinner_end(&spin_lo);
1663
1664 if (igt_live_test_end(&t)) {
1665 err = -EIO;
1666 goto err_ctx_lo;
1667 }
1668 }
1669
1670 err = 0;
1671 err_ctx_lo:
1672 kernel_context_close(ctx_lo);
1673 err_ctx_hi:
1674 kernel_context_close(ctx_hi);
1675 err_spin_lo:
1676 igt_spinner_fini(&spin_lo);
1677 err_spin_hi:
1678 igt_spinner_fini(&spin_hi);
1679 return err;
1680
1681 err_wedged:
1682 igt_spinner_end(&spin_hi);
1683 igt_spinner_end(&spin_lo);
1684 intel_gt_set_wedged(gt);
1685 err = -EIO;
1686 goto err_ctx_lo;
1687 }
1688
1689 struct preempt_client {
1690 struct igt_spinner spin;
1691 struct i915_gem_context *ctx;
1692 };
1693
1694 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1695 {
1696 c->ctx = kernel_context(gt->i915);
1697 if (!c->ctx)
1698 return -ENOMEM;
1699
1700 if (igt_spinner_init(&c->spin, gt))
1701 goto err_ctx;
1702
1703 return 0;
1704
1705 err_ctx:
1706 kernel_context_close(c->ctx);
1707 return -ENOMEM;
1708 }
1709
1710 static void preempt_client_fini(struct preempt_client *c)
1711 {
1712 igt_spinner_fini(&c->spin);
1713 kernel_context_close(c->ctx);
1714 }
1715
1716 static int live_nopreempt(void *arg)
1717 {
1718 struct intel_gt *gt = arg;
1719 struct intel_engine_cs *engine;
1720 struct preempt_client a, b;
1721 enum intel_engine_id id;
1722 int err = -ENOMEM;
1723
1724 /*
1725 * Verify that we can disable preemption for an individual request
1726 * that may be being observed and not want to be interrupted.
1727 */
1728
1729 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1730 return 0;
1731
1732 if (preempt_client_init(gt, &a))
1733 return -ENOMEM;
1734 if (preempt_client_init(gt, &b))
1735 goto err_client_a;
1736 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1737
1738 for_each_engine(engine, gt, id) {
1739 struct i915_request *rq_a, *rq_b;
1740
1741 if (!intel_engine_has_preemption(engine))
1742 continue;
1743
1744 engine->execlists.preempt_hang.count = 0;
1745
1746 rq_a = spinner_create_request(&a.spin,
1747 a.ctx, engine,
1748 MI_ARB_CHECK);
1749 if (IS_ERR(rq_a)) {
1750 err = PTR_ERR(rq_a);
1751 goto err_client_b;
1752 }
1753
1754 /* Low priority client, but unpreemptable! */
1755 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1756
1757 i915_request_add(rq_a);
1758 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1759 pr_err("First client failed to start\n");
1760 goto err_wedged;
1761 }
1762
1763 rq_b = spinner_create_request(&b.spin,
1764 b.ctx, engine,
1765 MI_ARB_CHECK);
1766 if (IS_ERR(rq_b)) {
1767 err = PTR_ERR(rq_b);
1768 goto err_client_b;
1769 }
1770
1771 i915_request_add(rq_b);
1772
1773 /* B is much more important than A! (But A is unpreemptable.) */
1774 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1775
1776 /* Wait long enough for preemption and timeslicing */
1777 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1778 pr_err("Second client started too early!\n");
1779 goto err_wedged;
1780 }
1781
1782 igt_spinner_end(&a.spin);
1783
1784 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1785 pr_err("Second client failed to start\n");
1786 goto err_wedged;
1787 }
1788
1789 igt_spinner_end(&b.spin);
1790
1791 if (engine->execlists.preempt_hang.count) {
1792 pr_err("Preemption recorded x%d; should have been suppressed!\n",
1793 engine->execlists.preempt_hang.count);
1794 err = -EINVAL;
1795 goto err_wedged;
1796 }
1797
1798 if (igt_flush_test(gt->i915))
1799 goto err_wedged;
1800 }
1801
1802 err = 0;
1803 err_client_b:
1804 preempt_client_fini(&b);
1805 err_client_a:
1806 preempt_client_fini(&a);
1807 return err;
1808
1809 err_wedged:
1810 igt_spinner_end(&b.spin);
1811 igt_spinner_end(&a.spin);
1812 intel_gt_set_wedged(gt);
1813 err = -EIO;
1814 goto err_client_b;
1815 }
1816
1817 struct live_preempt_cancel {
1818 struct intel_engine_cs *engine;
1819 struct preempt_client a, b;
1820 };
1821
1822 static int __cancel_active0(struct live_preempt_cancel *arg)
1823 {
1824 struct i915_request *rq;
1825 struct igt_live_test t;
1826 int err;
1827
1828 /* Preempt cancel of ELSP0 */
1829 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1830 if (igt_live_test_begin(&t, arg->engine->i915,
1831 __func__, arg->engine->name))
1832 return -EIO;
1833
1834 rq = spinner_create_request(&arg->a.spin,
1835 arg->a.ctx, arg->engine,
1836 MI_ARB_CHECK);
1837 if (IS_ERR(rq))
1838 return PTR_ERR(rq);
1839
1840 clear_bit(CONTEXT_BANNED, &rq->context->flags);
1841 i915_request_get(rq);
1842 i915_request_add(rq);
1843 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1844 err = -EIO;
1845 goto out;
1846 }
1847
1848 intel_context_set_banned(rq->context);
1849 err = intel_engine_pulse(arg->engine);
1850 if (err)
1851 goto out;
1852
1853 err = wait_for_reset(arg->engine, rq, HZ / 2);
1854 if (err) {
1855 pr_err("Cancelled inflight0 request did not reset\n");
1856 goto out;
1857 }
1858
1859 out:
1860 i915_request_put(rq);
1861 if (igt_live_test_end(&t))
1862 err = -EIO;
1863 return err;
1864 }
1865
1866 static int __cancel_active1(struct live_preempt_cancel *arg)
1867 {
1868 struct i915_request *rq[2] = {};
1869 struct igt_live_test t;
1870 int err;
1871
1872 /* Preempt cancel of ELSP1 */
1873 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1874 if (igt_live_test_begin(&t, arg->engine->i915,
1875 __func__, arg->engine->name))
1876 return -EIO;
1877
1878 rq[0] = spinner_create_request(&arg->a.spin,
1879 arg->a.ctx, arg->engine,
1880 MI_NOOP); /* no preemption */
1881 if (IS_ERR(rq[0]))
1882 return PTR_ERR(rq[0]);
1883
1884 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1885 i915_request_get(rq[0]);
1886 i915_request_add(rq[0]);
1887 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1888 err = -EIO;
1889 goto out;
1890 }
1891
1892 rq[1] = spinner_create_request(&arg->b.spin,
1893 arg->b.ctx, arg->engine,
1894 MI_ARB_CHECK);
1895 if (IS_ERR(rq[1])) {
1896 err = PTR_ERR(rq[1]);
1897 goto out;
1898 }
1899
1900 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1901 i915_request_get(rq[1]);
1902 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1903 i915_request_add(rq[1]);
1904 if (err)
1905 goto out;
1906
1907 intel_context_set_banned(rq[1]->context);
1908 err = intel_engine_pulse(arg->engine);
1909 if (err)
1910 goto out;
1911
1912 igt_spinner_end(&arg->a.spin);
1913 err = wait_for_reset(arg->engine, rq[1], HZ / 2);
1914 if (err)
1915 goto out;
1916
1917 if (rq[0]->fence.error != 0) {
1918 pr_err("Normal inflight0 request did not complete\n");
1919 err = -EINVAL;
1920 goto out;
1921 }
1922
1923 if (rq[1]->fence.error != -EIO) {
1924 pr_err("Cancelled inflight1 request did not report -EIO\n");
1925 err = -EINVAL;
1926 goto out;
1927 }
1928
1929 out:
1930 i915_request_put(rq[1]);
1931 i915_request_put(rq[0]);
1932 if (igt_live_test_end(&t))
1933 err = -EIO;
1934 return err;
1935 }
1936
1937 static int __cancel_queued(struct live_preempt_cancel *arg)
1938 {
1939 struct i915_request *rq[3] = {};
1940 struct igt_live_test t;
1941 int err;
1942
1943 /* Full ELSP and one in the wings */
1944 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1945 if (igt_live_test_begin(&t, arg->engine->i915,
1946 __func__, arg->engine->name))
1947 return -EIO;
1948
1949 rq[0] = spinner_create_request(&arg->a.spin,
1950 arg->a.ctx, arg->engine,
1951 MI_ARB_CHECK);
1952 if (IS_ERR(rq[0]))
1953 return PTR_ERR(rq[0]);
1954
1955 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1956 i915_request_get(rq[0]);
1957 i915_request_add(rq[0]);
1958 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1959 err = -EIO;
1960 goto out;
1961 }
1962
1963 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1964 if (IS_ERR(rq[1])) {
1965 err = PTR_ERR(rq[1]);
1966 goto out;
1967 }
1968
1969 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1970 i915_request_get(rq[1]);
1971 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1972 i915_request_add(rq[1]);
1973 if (err)
1974 goto out;
1975
1976 rq[2] = spinner_create_request(&arg->b.spin,
1977 arg->a.ctx, arg->engine,
1978 MI_ARB_CHECK);
1979 if (IS_ERR(rq[2])) {
1980 err = PTR_ERR(rq[2]);
1981 goto out;
1982 }
1983
1984 i915_request_get(rq[2]);
1985 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1986 i915_request_add(rq[2]);
1987 if (err)
1988 goto out;
1989
1990 intel_context_set_banned(rq[2]->context);
1991 err = intel_engine_pulse(arg->engine);
1992 if (err)
1993 goto out;
1994
1995 err = wait_for_reset(arg->engine, rq[2], HZ / 2);
1996 if (err)
1997 goto out;
1998
1999 if (rq[0]->fence.error != -EIO) {
2000 pr_err("Cancelled inflight0 request did not report -EIO\n");
2001 err = -EINVAL;
2002 goto out;
2003 }
2004
2005 if (rq[1]->fence.error != 0) {
2006 pr_err("Normal inflight1 request did not complete\n");
2007 err = -EINVAL;
2008 goto out;
2009 }
2010
2011 if (rq[2]->fence.error != -EIO) {
2012 pr_err("Cancelled queued request did not report -EIO\n");
2013 err = -EINVAL;
2014 goto out;
2015 }
2016
2017 out:
2018 i915_request_put(rq[2]);
2019 i915_request_put(rq[1]);
2020 i915_request_put(rq[0]);
2021 if (igt_live_test_end(&t))
2022 err = -EIO;
2023 return err;
2024 }
2025
2026 static int __cancel_hostile(struct live_preempt_cancel *arg)
2027 {
2028 struct i915_request *rq;
2029 int err;
2030
2031 /* Preempt cancel non-preemptible spinner in ELSP0 */
2032 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2033 return 0;
2034
2035 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2036 rq = spinner_create_request(&arg->a.spin,
2037 arg->a.ctx, arg->engine,
2038 MI_NOOP); /* preemption disabled */
2039 if (IS_ERR(rq))
2040 return PTR_ERR(rq);
2041
2042 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2043 i915_request_get(rq);
2044 i915_request_add(rq);
2045 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2046 err = -EIO;
2047 goto out;
2048 }
2049
2050 intel_context_set_banned(rq->context);
2051 err = intel_engine_pulse(arg->engine); /* force reset */
2052 if (err)
2053 goto out;
2054
2055 err = wait_for_reset(arg->engine, rq, HZ / 2);
2056 if (err) {
2057 pr_err("Cancelled inflight0 request did not reset\n");
2058 goto out;
2059 }
2060
2061 out:
2062 i915_request_put(rq);
2063 if (igt_flush_test(arg->engine->i915))
2064 err = -EIO;
2065 return err;
2066 }
2067
2068 static int live_preempt_cancel(void *arg)
2069 {
2070 struct intel_gt *gt = arg;
2071 struct live_preempt_cancel data;
2072 enum intel_engine_id id;
2073 int err = -ENOMEM;
2074
2075 /*
2076 * To cancel an inflight context, we need to first remove it from the
2077 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2078 */
2079
2080 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2081 return 0;
2082
2083 if (preempt_client_init(gt, &data.a))
2084 return -ENOMEM;
2085 if (preempt_client_init(gt, &data.b))
2086 goto err_client_a;
2087
2088 for_each_engine(data.engine, gt, id) {
2089 if (!intel_engine_has_preemption(data.engine))
2090 continue;
2091
2092 err = __cancel_active0(&data);
2093 if (err)
2094 goto err_wedged;
2095
2096 err = __cancel_active1(&data);
2097 if (err)
2098 goto err_wedged;
2099
2100 err = __cancel_queued(&data);
2101 if (err)
2102 goto err_wedged;
2103
2104 err = __cancel_hostile(&data);
2105 if (err)
2106 goto err_wedged;
2107 }
2108
2109 err = 0;
2110 err_client_b:
2111 preempt_client_fini(&data.b);
2112 err_client_a:
2113 preempt_client_fini(&data.a);
2114 return err;
2115
2116 err_wedged:
2117 GEM_TRACE_DUMP();
2118 igt_spinner_end(&data.b.spin);
2119 igt_spinner_end(&data.a.spin);
2120 intel_gt_set_wedged(gt);
2121 goto err_client_b;
2122 }
2123
2124 static int live_suppress_self_preempt(void *arg)
2125 {
2126 struct intel_gt *gt = arg;
2127 struct intel_engine_cs *engine;
2128 struct i915_sched_attr attr = {
2129 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2130 };
2131 struct preempt_client a, b;
2132 enum intel_engine_id id;
2133 int err = -ENOMEM;
2134
2135 /*
2136 * Verify that if a preemption request does not cause a change in
2137 * the current execution order, the preempt-to-idle injection is
2138 * skipped and that we do not accidentally apply it after the CS
2139 * completion event.
2140 */
2141
2142 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2143 return 0;
2144
2145 if (intel_uc_uses_guc_submission(&gt->uc))
2146 return 0; /* presume black blox */
2147
2148 if (intel_vgpu_active(gt->i915))
2149 return 0; /* GVT forces single port & request submission */
2150
2151 if (preempt_client_init(gt, &a))
2152 return -ENOMEM;
2153 if (preempt_client_init(gt, &b))
2154 goto err_client_a;
2155
2156 for_each_engine(engine, gt, id) {
2157 struct i915_request *rq_a, *rq_b;
2158 int depth;
2159
2160 if (!intel_engine_has_preemption(engine))
2161 continue;
2162
2163 if (igt_flush_test(gt->i915))
2164 goto err_wedged;
2165
2166 intel_engine_pm_get(engine);
2167 engine->execlists.preempt_hang.count = 0;
2168
2169 rq_a = spinner_create_request(&a.spin,
2170 a.ctx, engine,
2171 MI_NOOP);
2172 if (IS_ERR(rq_a)) {
2173 err = PTR_ERR(rq_a);
2174 intel_engine_pm_put(engine);
2175 goto err_client_b;
2176 }
2177
2178 i915_request_add(rq_a);
2179 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2180 pr_err("First client failed to start\n");
2181 intel_engine_pm_put(engine);
2182 goto err_wedged;
2183 }
2184
2185 /* Keep postponing the timer to avoid premature slicing */
2186 mod_timer(&engine->execlists.timer, jiffies + HZ);
2187 for (depth = 0; depth < 8; depth++) {
2188 rq_b = spinner_create_request(&b.spin,
2189 b.ctx, engine,
2190 MI_NOOP);
2191 if (IS_ERR(rq_b)) {
2192 err = PTR_ERR(rq_b);
2193 intel_engine_pm_put(engine);
2194 goto err_client_b;
2195 }
2196 i915_request_add(rq_b);
2197
2198 GEM_BUG_ON(i915_request_completed(rq_a));
2199 engine->schedule(rq_a, &attr);
2200 igt_spinner_end(&a.spin);
2201
2202 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2203 pr_err("Second client failed to start\n");
2204 intel_engine_pm_put(engine);
2205 goto err_wedged;
2206 }
2207
2208 swap(a, b);
2209 rq_a = rq_b;
2210 }
2211 igt_spinner_end(&a.spin);
2212
2213 if (engine->execlists.preempt_hang.count) {
2214 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2215 engine->name,
2216 engine->execlists.preempt_hang.count,
2217 depth);
2218 intel_engine_pm_put(engine);
2219 err = -EINVAL;
2220 goto err_client_b;
2221 }
2222
2223 intel_engine_pm_put(engine);
2224 if (igt_flush_test(gt->i915))
2225 goto err_wedged;
2226 }
2227
2228 err = 0;
2229 err_client_b:
2230 preempt_client_fini(&b);
2231 err_client_a:
2232 preempt_client_fini(&a);
2233 return err;
2234
2235 err_wedged:
2236 igt_spinner_end(&b.spin);
2237 igt_spinner_end(&a.spin);
2238 intel_gt_set_wedged(gt);
2239 err = -EIO;
2240 goto err_client_b;
2241 }
2242
2243 static int __i915_sw_fence_call
2244 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
2245 {
2246 return NOTIFY_DONE;
2247 }
2248
2249 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
2250 {
2251 struct i915_request *rq;
2252
2253 rq = kzalloc(sizeof(*rq), GFP_KERNEL);
2254 if (!rq)
2255 return NULL;
2256
2257 rq->engine = engine;
2258
2259 spin_lock_init(&rq->lock);
2260 INIT_LIST_HEAD(&rq->fence.cb_list);
2261 rq->fence.lock = &rq->lock;
2262 rq->fence.ops = &i915_fence_ops;
2263
2264 i915_sched_node_init(&rq->sched);
2265
2266 /* mark this request as permanently incomplete */
2267 rq->fence.seqno = 1;
2268 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
2269 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
2270 GEM_BUG_ON(i915_request_completed(rq));
2271
2272 i915_sw_fence_init(&rq->submit, dummy_notify);
2273 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2274
2275 spin_lock_init(&rq->lock);
2276 rq->fence.lock = &rq->lock;
2277 INIT_LIST_HEAD(&rq->fence.cb_list);
2278
2279 return rq;
2280 }
2281
2282 static void dummy_request_free(struct i915_request *dummy)
2283 {
2284 /* We have to fake the CS interrupt to kick the next request */
2285 i915_sw_fence_commit(&dummy->submit);
2286
2287 i915_request_mark_complete(dummy);
2288 dma_fence_signal(&dummy->fence);
2289
2290 i915_sched_node_fini(&dummy->sched);
2291 i915_sw_fence_fini(&dummy->submit);
2292
2293 dma_fence_free(&dummy->fence);
2294 }
2295
2296 static int live_suppress_wait_preempt(void *arg)
2297 {
2298 struct intel_gt *gt = arg;
2299 struct preempt_client client[4];
2300 struct i915_request *rq[ARRAY_SIZE(client)] = {};
2301 struct intel_engine_cs *engine;
2302 enum intel_engine_id id;
2303 int err = -ENOMEM;
2304 int i;
2305
2306 /*
2307 * Waiters are given a little priority nudge, but not enough
2308 * to actually cause any preemption. Double check that we do
2309 * not needlessly generate preempt-to-idle cycles.
2310 */
2311
2312 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2313 return 0;
2314
2315 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
2316 return -ENOMEM;
2317 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
2318 goto err_client_0;
2319 if (preempt_client_init(gt, &client[2])) /* head of queue */
2320 goto err_client_1;
2321 if (preempt_client_init(gt, &client[3])) /* bystander */
2322 goto err_client_2;
2323
2324 for_each_engine(engine, gt, id) {
2325 int depth;
2326
2327 if (!intel_engine_has_preemption(engine))
2328 continue;
2329
2330 if (!engine->emit_init_breadcrumb)
2331 continue;
2332
2333 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
2334 struct i915_request *dummy;
2335
2336 engine->execlists.preempt_hang.count = 0;
2337
2338 dummy = dummy_request(engine);
2339 if (!dummy)
2340 goto err_client_3;
2341
2342 for (i = 0; i < ARRAY_SIZE(client); i++) {
2343 struct i915_request *this;
2344
2345 this = spinner_create_request(&client[i].spin,
2346 client[i].ctx, engine,
2347 MI_NOOP);
2348 if (IS_ERR(this)) {
2349 err = PTR_ERR(this);
2350 goto err_wedged;
2351 }
2352
2353 /* Disable NEWCLIENT promotion */
2354 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
2355 &dummy->fence);
2356
2357 rq[i] = i915_request_get(this);
2358 i915_request_add(this);
2359 }
2360
2361 dummy_request_free(dummy);
2362
2363 GEM_BUG_ON(i915_request_completed(rq[0]));
2364 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
2365 pr_err("%s: First client failed to start\n",
2366 engine->name);
2367 goto err_wedged;
2368 }
2369 GEM_BUG_ON(!i915_request_started(rq[0]));
2370
2371 if (i915_request_wait(rq[depth],
2372 I915_WAIT_PRIORITY,
2373 1) != -ETIME) {
2374 pr_err("%s: Waiter depth:%d completed!\n",
2375 engine->name, depth);
2376 goto err_wedged;
2377 }
2378
2379 for (i = 0; i < ARRAY_SIZE(client); i++) {
2380 igt_spinner_end(&client[i].spin);
2381 i915_request_put(rq[i]);
2382 rq[i] = NULL;
2383 }
2384
2385 if (igt_flush_test(gt->i915))
2386 goto err_wedged;
2387
2388 if (engine->execlists.preempt_hang.count) {
2389 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2390 engine->name,
2391 engine->execlists.preempt_hang.count,
2392 depth);
2393 err = -EINVAL;
2394 goto err_client_3;
2395 }
2396 }
2397 }
2398
2399 err = 0;
2400 err_client_3:
2401 preempt_client_fini(&client[3]);
2402 err_client_2:
2403 preempt_client_fini(&client[2]);
2404 err_client_1:
2405 preempt_client_fini(&client[1]);
2406 err_client_0:
2407 preempt_client_fini(&client[0]);
2408 return err;
2409
2410 err_wedged:
2411 for (i = 0; i < ARRAY_SIZE(client); i++) {
2412 igt_spinner_end(&client[i].spin);
2413 i915_request_put(rq[i]);
2414 }
2415 intel_gt_set_wedged(gt);
2416 err = -EIO;
2417 goto err_client_3;
2418 }
2419
2420 static int live_chain_preempt(void *arg)
2421 {
2422 struct intel_gt *gt = arg;
2423 struct intel_engine_cs *engine;
2424 struct preempt_client hi, lo;
2425 enum intel_engine_id id;
2426 int err = -ENOMEM;
2427
2428 /*
2429 * Build a chain AB...BA between two contexts (A, B) and request
2430 * preemption of the last request. It should then complete before
2431 * the previously submitted spinner in B.
2432 */
2433
2434 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2435 return 0;
2436
2437 if (preempt_client_init(gt, &hi))
2438 return -ENOMEM;
2439
2440 if (preempt_client_init(gt, &lo))
2441 goto err_client_hi;
2442
2443 for_each_engine(engine, gt, id) {
2444 struct i915_sched_attr attr = {
2445 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2446 };
2447 struct igt_live_test t;
2448 struct i915_request *rq;
2449 int ring_size, count, i;
2450
2451 if (!intel_engine_has_preemption(engine))
2452 continue;
2453
2454 rq = spinner_create_request(&lo.spin,
2455 lo.ctx, engine,
2456 MI_ARB_CHECK);
2457 if (IS_ERR(rq))
2458 goto err_wedged;
2459
2460 i915_request_get(rq);
2461 i915_request_add(rq);
2462
2463 ring_size = rq->wa_tail - rq->head;
2464 if (ring_size < 0)
2465 ring_size += rq->ring->size;
2466 ring_size = rq->ring->size / ring_size;
2467 pr_debug("%s(%s): Using maximum of %d requests\n",
2468 __func__, engine->name, ring_size);
2469
2470 igt_spinner_end(&lo.spin);
2471 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2472 pr_err("Timed out waiting to flush %s\n", engine->name);
2473 i915_request_put(rq);
2474 goto err_wedged;
2475 }
2476 i915_request_put(rq);
2477
2478 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2479 err = -EIO;
2480 goto err_wedged;
2481 }
2482
2483 for_each_prime_number_from(count, 1, ring_size) {
2484 rq = spinner_create_request(&hi.spin,
2485 hi.ctx, engine,
2486 MI_ARB_CHECK);
2487 if (IS_ERR(rq))
2488 goto err_wedged;
2489 i915_request_add(rq);
2490 if (!igt_wait_for_spinner(&hi.spin, rq))
2491 goto err_wedged;
2492
2493 rq = spinner_create_request(&lo.spin,
2494 lo.ctx, engine,
2495 MI_ARB_CHECK);
2496 if (IS_ERR(rq))
2497 goto err_wedged;
2498 i915_request_add(rq);
2499
2500 for (i = 0; i < count; i++) {
2501 rq = igt_request_alloc(lo.ctx, engine);
2502 if (IS_ERR(rq))
2503 goto err_wedged;
2504 i915_request_add(rq);
2505 }
2506
2507 rq = igt_request_alloc(hi.ctx, engine);
2508 if (IS_ERR(rq))
2509 goto err_wedged;
2510
2511 i915_request_get(rq);
2512 i915_request_add(rq);
2513 engine->schedule(rq, &attr);
2514
2515 igt_spinner_end(&hi.spin);
2516 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2517 struct drm_printer p =
2518 drm_info_printer(gt->i915->drm.dev);
2519
2520 pr_err("Failed to preempt over chain of %d\n",
2521 count);
2522 intel_engine_dump(engine, &p,
2523 "%s\n", engine->name);
2524 i915_request_put(rq);
2525 goto err_wedged;
2526 }
2527 igt_spinner_end(&lo.spin);
2528 i915_request_put(rq);
2529
2530 rq = igt_request_alloc(lo.ctx, engine);
2531 if (IS_ERR(rq))
2532 goto err_wedged;
2533
2534 i915_request_get(rq);
2535 i915_request_add(rq);
2536
2537 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2538 struct drm_printer p =
2539 drm_info_printer(gt->i915->drm.dev);
2540
2541 pr_err("Failed to flush low priority chain of %d requests\n",
2542 count);
2543 intel_engine_dump(engine, &p,
2544 "%s\n", engine->name);
2545
2546 i915_request_put(rq);
2547 goto err_wedged;
2548 }
2549 i915_request_put(rq);
2550 }
2551
2552 if (igt_live_test_end(&t)) {
2553 err = -EIO;
2554 goto err_wedged;
2555 }
2556 }
2557
2558 err = 0;
2559 err_client_lo:
2560 preempt_client_fini(&lo);
2561 err_client_hi:
2562 preempt_client_fini(&hi);
2563 return err;
2564
2565 err_wedged:
2566 igt_spinner_end(&hi.spin);
2567 igt_spinner_end(&lo.spin);
2568 intel_gt_set_wedged(gt);
2569 err = -EIO;
2570 goto err_client_lo;
2571 }
2572
2573 static int create_gang(struct intel_engine_cs *engine,
2574 struct i915_request **prev)
2575 {
2576 struct drm_i915_gem_object *obj;
2577 struct intel_context *ce;
2578 struct i915_request *rq;
2579 struct i915_vma *vma;
2580 u32 *cs;
2581 int err;
2582
2583 ce = intel_context_create(engine);
2584 if (IS_ERR(ce))
2585 return PTR_ERR(ce);
2586
2587 obj = i915_gem_object_create_internal(engine->i915, 4096);
2588 if (IS_ERR(obj)) {
2589 err = PTR_ERR(obj);
2590 goto err_ce;
2591 }
2592
2593 vma = i915_vma_instance(obj, ce->vm, NULL);
2594 if (IS_ERR(vma)) {
2595 err = PTR_ERR(vma);
2596 goto err_obj;
2597 }
2598
2599 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2600 if (err)
2601 goto err_obj;
2602
2603 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2604 if (IS_ERR(cs))
2605 goto err_obj;
2606
2607 /* Semaphore target: spin until zero */
2608 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2609
2610 *cs++ = MI_SEMAPHORE_WAIT |
2611 MI_SEMAPHORE_POLL |
2612 MI_SEMAPHORE_SAD_EQ_SDD;
2613 *cs++ = 0;
2614 *cs++ = lower_32_bits(vma->node.start);
2615 *cs++ = upper_32_bits(vma->node.start);
2616
2617 if (*prev) {
2618 u64 offset = (*prev)->batch->node.start;
2619
2620 /* Terminate the spinner in the next lower priority batch. */
2621 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2622 *cs++ = lower_32_bits(offset);
2623 *cs++ = upper_32_bits(offset);
2624 *cs++ = 0;
2625 }
2626
2627 *cs++ = MI_BATCH_BUFFER_END;
2628 i915_gem_object_flush_map(obj);
2629 i915_gem_object_unpin_map(obj);
2630
2631 rq = intel_context_create_request(ce);
2632 if (IS_ERR(rq))
2633 goto err_obj;
2634
2635 rq->batch = vma;
2636 i915_request_get(rq);
2637
2638 i915_vma_lock(vma);
2639 err = i915_request_await_object(rq, vma->obj, false);
2640 if (!err)
2641 err = i915_vma_move_to_active(vma, rq, 0);
2642 if (!err)
2643 err = rq->engine->emit_bb_start(rq,
2644 vma->node.start,
2645 PAGE_SIZE, 0);
2646 i915_vma_unlock(vma);
2647 i915_request_add(rq);
2648 if (err)
2649 goto err_rq;
2650
2651 i915_gem_object_put(obj);
2652 intel_context_put(ce);
2653
2654 rq->client_link.next = &(*prev)->client_link;
2655 *prev = rq;
2656 return 0;
2657
2658 err_rq:
2659 i915_request_put(rq);
2660 err_obj:
2661 i915_gem_object_put(obj);
2662 err_ce:
2663 intel_context_put(ce);
2664 return err;
2665 }
2666
2667 static int live_preempt_gang(void *arg)
2668 {
2669 struct intel_gt *gt = arg;
2670 struct intel_engine_cs *engine;
2671 enum intel_engine_id id;
2672
2673 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2674 return 0;
2675
2676 /*
2677 * Build as long a chain of preempters as we can, with each
2678 * request higher priority than the last. Once we are ready, we release
2679 * the last batch which then precolates down the chain, each releasing
2680 * the next oldest in turn. The intent is to simply push as hard as we
2681 * can with the number of preemptions, trying to exceed narrow HW
2682 * limits. At a minimum, we insist that we can sort all the user
2683 * high priority levels into execution order.
2684 */
2685
2686 for_each_engine(engine, gt, id) {
2687 struct i915_request *rq = NULL;
2688 struct igt_live_test t;
2689 IGT_TIMEOUT(end_time);
2690 int prio = 0;
2691 int err = 0;
2692 u32 *cs;
2693
2694 if (!intel_engine_has_preemption(engine))
2695 continue;
2696
2697 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2698 return -EIO;
2699
2700 do {
2701 struct i915_sched_attr attr = {
2702 .priority = I915_USER_PRIORITY(prio++),
2703 };
2704
2705 err = create_gang(engine, &rq);
2706 if (err)
2707 break;
2708
2709 /* Submit each spinner at increasing priority */
2710 engine->schedule(rq, &attr);
2711
2712 if (prio <= I915_PRIORITY_MAX)
2713 continue;
2714
2715 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2716 break;
2717
2718 if (__igt_timeout(end_time, NULL))
2719 break;
2720 } while (1);
2721 pr_debug("%s: Preempt chain of %d requests\n",
2722 engine->name, prio);
2723
2724 /*
2725 * Such that the last spinner is the highest priority and
2726 * should execute first. When that spinner completes,
2727 * it will terminate the next lowest spinner until there
2728 * are no more spinners and the gang is complete.
2729 */
2730 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2731 if (!IS_ERR(cs)) {
2732 *cs = 0;
2733 i915_gem_object_unpin_map(rq->batch->obj);
2734 } else {
2735 err = PTR_ERR(cs);
2736 intel_gt_set_wedged(gt);
2737 }
2738
2739 while (rq) { /* wait for each rq from highest to lowest prio */
2740 struct i915_request *n =
2741 list_next_entry(rq, client_link);
2742
2743 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2744 struct drm_printer p =
2745 drm_info_printer(engine->i915->drm.dev);
2746
2747 pr_err("Failed to flush chain of %d requests, at %d\n",
2748 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2749 intel_engine_dump(engine, &p,
2750 "%s\n", engine->name);
2751
2752 err = -ETIME;
2753 }
2754
2755 i915_request_put(rq);
2756 rq = n;
2757 }
2758
2759 if (igt_live_test_end(&t))
2760 err = -EIO;
2761 if (err)
2762 return err;
2763 }
2764
2765 return 0;
2766 }
2767
2768 static int live_preempt_timeout(void *arg)
2769 {
2770 struct intel_gt *gt = arg;
2771 struct i915_gem_context *ctx_hi, *ctx_lo;
2772 struct igt_spinner spin_lo;
2773 struct intel_engine_cs *engine;
2774 enum intel_engine_id id;
2775 int err = -ENOMEM;
2776
2777 /*
2778 * Check that we force preemption to occur by cancelling the previous
2779 * context if it refuses to yield the GPU.
2780 */
2781 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2782 return 0;
2783
2784 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2785 return 0;
2786
2787 if (!intel_has_reset_engine(gt))
2788 return 0;
2789
2790 if (igt_spinner_init(&spin_lo, gt))
2791 return -ENOMEM;
2792
2793 ctx_hi = kernel_context(gt->i915);
2794 if (!ctx_hi)
2795 goto err_spin_lo;
2796 ctx_hi->sched.priority =
2797 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2798
2799 ctx_lo = kernel_context(gt->i915);
2800 if (!ctx_lo)
2801 goto err_ctx_hi;
2802 ctx_lo->sched.priority =
2803 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2804
2805 for_each_engine(engine, gt, id) {
2806 unsigned long saved_timeout;
2807 struct i915_request *rq;
2808
2809 if (!intel_engine_has_preemption(engine))
2810 continue;
2811
2812 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2813 MI_NOOP); /* preemption disabled */
2814 if (IS_ERR(rq)) {
2815 err = PTR_ERR(rq);
2816 goto err_ctx_lo;
2817 }
2818
2819 i915_request_add(rq);
2820 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2821 intel_gt_set_wedged(gt);
2822 err = -EIO;
2823 goto err_ctx_lo;
2824 }
2825
2826 rq = igt_request_alloc(ctx_hi, engine);
2827 if (IS_ERR(rq)) {
2828 igt_spinner_end(&spin_lo);
2829 err = PTR_ERR(rq);
2830 goto err_ctx_lo;
2831 }
2832
2833 /* Flush the previous CS ack before changing timeouts */
2834 while (READ_ONCE(engine->execlists.pending[0]))
2835 cpu_relax();
2836
2837 saved_timeout = engine->props.preempt_timeout_ms;
2838 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2839
2840 i915_request_get(rq);
2841 i915_request_add(rq);
2842
2843 intel_engine_flush_submission(engine);
2844 engine->props.preempt_timeout_ms = saved_timeout;
2845
2846 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2847 intel_gt_set_wedged(gt);
2848 i915_request_put(rq);
2849 err = -ETIME;
2850 goto err_ctx_lo;
2851 }
2852
2853 igt_spinner_end(&spin_lo);
2854 i915_request_put(rq);
2855 }
2856
2857 err = 0;
2858 err_ctx_lo:
2859 kernel_context_close(ctx_lo);
2860 err_ctx_hi:
2861 kernel_context_close(ctx_hi);
2862 err_spin_lo:
2863 igt_spinner_fini(&spin_lo);
2864 return err;
2865 }
2866
2867 static int random_range(struct rnd_state *rnd, int min, int max)
2868 {
2869 return i915_prandom_u32_max_state(max - min, rnd) + min;
2870 }
2871
2872 static int random_priority(struct rnd_state *rnd)
2873 {
2874 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2875 }
2876
2877 struct preempt_smoke {
2878 struct intel_gt *gt;
2879 struct i915_gem_context **contexts;
2880 struct intel_engine_cs *engine;
2881 struct drm_i915_gem_object *batch;
2882 unsigned int ncontext;
2883 struct rnd_state prng;
2884 unsigned long count;
2885 };
2886
2887 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2888 {
2889 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2890 &smoke->prng)];
2891 }
2892
2893 static int smoke_submit(struct preempt_smoke *smoke,
2894 struct i915_gem_context *ctx, int prio,
2895 struct drm_i915_gem_object *batch)
2896 {
2897 struct i915_request *rq;
2898 struct i915_vma *vma = NULL;
2899 int err = 0;
2900
2901 if (batch) {
2902 struct i915_address_space *vm;
2903
2904 vm = i915_gem_context_get_vm_rcu(ctx);
2905 vma = i915_vma_instance(batch, vm, NULL);
2906 i915_vm_put(vm);
2907 if (IS_ERR(vma))
2908 return PTR_ERR(vma);
2909
2910 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2911 if (err)
2912 return err;
2913 }
2914
2915 ctx->sched.priority = prio;
2916
2917 rq = igt_request_alloc(ctx, smoke->engine);
2918 if (IS_ERR(rq)) {
2919 err = PTR_ERR(rq);
2920 goto unpin;
2921 }
2922
2923 if (vma) {
2924 i915_vma_lock(vma);
2925 err = i915_request_await_object(rq, vma->obj, false);
2926 if (!err)
2927 err = i915_vma_move_to_active(vma, rq, 0);
2928 if (!err)
2929 err = rq->engine->emit_bb_start(rq,
2930 vma->node.start,
2931 PAGE_SIZE, 0);
2932 i915_vma_unlock(vma);
2933 }
2934
2935 i915_request_add(rq);
2936
2937 unpin:
2938 if (vma)
2939 i915_vma_unpin(vma);
2940
2941 return err;
2942 }
2943
2944 static int smoke_crescendo_thread(void *arg)
2945 {
2946 struct preempt_smoke *smoke = arg;
2947 IGT_TIMEOUT(end_time);
2948 unsigned long count;
2949
2950 count = 0;
2951 do {
2952 struct i915_gem_context *ctx = smoke_context(smoke);
2953 int err;
2954
2955 err = smoke_submit(smoke,
2956 ctx, count % I915_PRIORITY_MAX,
2957 smoke->batch);
2958 if (err)
2959 return err;
2960
2961 count++;
2962 } while (!__igt_timeout(end_time, NULL));
2963
2964 smoke->count = count;
2965 return 0;
2966 }
2967
2968 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2969 #define BATCH BIT(0)
2970 {
2971 struct task_struct *tsk[I915_NUM_ENGINES] = {};
2972 struct preempt_smoke arg[I915_NUM_ENGINES];
2973 struct intel_engine_cs *engine;
2974 enum intel_engine_id id;
2975 unsigned long count;
2976 int err = 0;
2977
2978 for_each_engine(engine, smoke->gt, id) {
2979 arg[id] = *smoke;
2980 arg[id].engine = engine;
2981 if (!(flags & BATCH))
2982 arg[id].batch = NULL;
2983 arg[id].count = 0;
2984
2985 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2986 "igt/smoke:%d", id);
2987 if (IS_ERR(tsk[id])) {
2988 err = PTR_ERR(tsk[id]);
2989 break;
2990 }
2991 get_task_struct(tsk[id]);
2992 }
2993
2994 yield(); /* start all threads before we kthread_stop() */
2995
2996 count = 0;
2997 for_each_engine(engine, smoke->gt, id) {
2998 int status;
2999
3000 if (IS_ERR_OR_NULL(tsk[id]))
3001 continue;
3002
3003 status = kthread_stop(tsk[id]);
3004 if (status && !err)
3005 err = status;
3006
3007 count += arg[id].count;
3008
3009 put_task_struct(tsk[id]);
3010 }
3011
3012 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3013 count, flags,
3014 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3015 return 0;
3016 }
3017
3018 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3019 {
3020 enum intel_engine_id id;
3021 IGT_TIMEOUT(end_time);
3022 unsigned long count;
3023
3024 count = 0;
3025 do {
3026 for_each_engine(smoke->engine, smoke->gt, id) {
3027 struct i915_gem_context *ctx = smoke_context(smoke);
3028 int err;
3029
3030 err = smoke_submit(smoke,
3031 ctx, random_priority(&smoke->prng),
3032 flags & BATCH ? smoke->batch : NULL);
3033 if (err)
3034 return err;
3035
3036 count++;
3037 }
3038 } while (!__igt_timeout(end_time, NULL));
3039
3040 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3041 count, flags,
3042 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3043 return 0;
3044 }
3045
3046 static int live_preempt_smoke(void *arg)
3047 {
3048 struct preempt_smoke smoke = {
3049 .gt = arg,
3050 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3051 .ncontext = 1024,
3052 };
3053 const unsigned int phase[] = { 0, BATCH };
3054 struct igt_live_test t;
3055 int err = -ENOMEM;
3056 u32 *cs;
3057 int n;
3058
3059 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3060 return 0;
3061
3062 smoke.contexts = kmalloc_array(smoke.ncontext,
3063 sizeof(*smoke.contexts),
3064 GFP_KERNEL);
3065 if (!smoke.contexts)
3066 return -ENOMEM;
3067
3068 smoke.batch =
3069 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3070 if (IS_ERR(smoke.batch)) {
3071 err = PTR_ERR(smoke.batch);
3072 goto err_free;
3073 }
3074
3075 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3076 if (IS_ERR(cs)) {
3077 err = PTR_ERR(cs);
3078 goto err_batch;
3079 }
3080 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3081 cs[n] = MI_ARB_CHECK;
3082 cs[n] = MI_BATCH_BUFFER_END;
3083 i915_gem_object_flush_map(smoke.batch);
3084 i915_gem_object_unpin_map(smoke.batch);
3085
3086 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3087 err = -EIO;
3088 goto err_batch;
3089 }
3090
3091 for (n = 0; n < smoke.ncontext; n++) {
3092 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3093 if (!smoke.contexts[n])
3094 goto err_ctx;
3095 }
3096
3097 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3098 err = smoke_crescendo(&smoke, phase[n]);
3099 if (err)
3100 goto err_ctx;
3101
3102 err = smoke_random(&smoke, phase[n]);
3103 if (err)
3104 goto err_ctx;
3105 }
3106
3107 err_ctx:
3108 if (igt_live_test_end(&t))
3109 err = -EIO;
3110
3111 for (n = 0; n < smoke.ncontext; n++) {
3112 if (!smoke.contexts[n])
3113 break;
3114 kernel_context_close(smoke.contexts[n]);
3115 }
3116
3117 err_batch:
3118 i915_gem_object_put(smoke.batch);
3119 err_free:
3120 kfree(smoke.contexts);
3121
3122 return err;
3123 }
3124
3125 static int nop_virtual_engine(struct intel_gt *gt,
3126 struct intel_engine_cs **siblings,
3127 unsigned int nsibling,
3128 unsigned int nctx,
3129 unsigned int flags)
3130 #define CHAIN BIT(0)
3131 {
3132 IGT_TIMEOUT(end_time);
3133 struct i915_request *request[16] = {};
3134 struct intel_context *ve[16];
3135 unsigned long n, prime, nc;
3136 struct igt_live_test t;
3137 ktime_t times[2] = {};
3138 int err;
3139
3140 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3141
3142 for (n = 0; n < nctx; n++) {
3143 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3144 if (IS_ERR(ve[n])) {
3145 err = PTR_ERR(ve[n]);
3146 nctx = n;
3147 goto out;
3148 }
3149
3150 err = intel_context_pin(ve[n]);
3151 if (err) {
3152 intel_context_put(ve[n]);
3153 nctx = n;
3154 goto out;
3155 }
3156 }
3157
3158 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3159 if (err)
3160 goto out;
3161
3162 for_each_prime_number_from(prime, 1, 8192) {
3163 times[1] = ktime_get_raw();
3164
3165 if (flags & CHAIN) {
3166 for (nc = 0; nc < nctx; nc++) {
3167 for (n = 0; n < prime; n++) {
3168 struct i915_request *rq;
3169
3170 rq = i915_request_create(ve[nc]);
3171 if (IS_ERR(rq)) {
3172 err = PTR_ERR(rq);
3173 goto out;
3174 }
3175
3176 if (request[nc])
3177 i915_request_put(request[nc]);
3178 request[nc] = i915_request_get(rq);
3179 i915_request_add(rq);
3180 }
3181 }
3182 } else {
3183 for (n = 0; n < prime; n++) {
3184 for (nc = 0; nc < nctx; nc++) {
3185 struct i915_request *rq;
3186
3187 rq = i915_request_create(ve[nc]);
3188 if (IS_ERR(rq)) {
3189 err = PTR_ERR(rq);
3190 goto out;
3191 }
3192
3193 if (request[nc])
3194 i915_request_put(request[nc]);
3195 request[nc] = i915_request_get(rq);
3196 i915_request_add(rq);
3197 }
3198 }
3199 }
3200
3201 for (nc = 0; nc < nctx; nc++) {
3202 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3203 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3204 __func__, ve[0]->engine->name,
3205 request[nc]->fence.context,
3206 request[nc]->fence.seqno);
3207
3208 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3209 __func__, ve[0]->engine->name,
3210 request[nc]->fence.context,
3211 request[nc]->fence.seqno);
3212 GEM_TRACE_DUMP();
3213 intel_gt_set_wedged(gt);
3214 break;
3215 }
3216 }
3217
3218 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3219 if (prime == 1)
3220 times[0] = times[1];
3221
3222 for (nc = 0; nc < nctx; nc++) {
3223 i915_request_put(request[nc]);
3224 request[nc] = NULL;
3225 }
3226
3227 if (__igt_timeout(end_time, NULL))
3228 break;
3229 }
3230
3231 err = igt_live_test_end(&t);
3232 if (err)
3233 goto out;
3234
3235 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3236 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3237 prime, div64_u64(ktime_to_ns(times[1]), prime));
3238
3239 out:
3240 if (igt_flush_test(gt->i915))
3241 err = -EIO;
3242
3243 for (nc = 0; nc < nctx; nc++) {
3244 i915_request_put(request[nc]);
3245 intel_context_unpin(ve[nc]);
3246 intel_context_put(ve[nc]);
3247 }
3248 return err;
3249 }
3250
3251 static int live_virtual_engine(void *arg)
3252 {
3253 struct intel_gt *gt = arg;
3254 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3255 struct intel_engine_cs *engine;
3256 enum intel_engine_id id;
3257 unsigned int class, inst;
3258 int err;
3259
3260 if (intel_uc_uses_guc_submission(&gt->uc))
3261 return 0;
3262
3263 for_each_engine(engine, gt, id) {
3264 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3265 if (err) {
3266 pr_err("Failed to wrap engine %s: err=%d\n",
3267 engine->name, err);
3268 return err;
3269 }
3270 }
3271
3272 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3273 int nsibling, n;
3274
3275 nsibling = 0;
3276 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3277 if (!gt->engine_class[class][inst])
3278 continue;
3279
3280 siblings[nsibling++] = gt->engine_class[class][inst];
3281 }
3282 if (nsibling < 2)
3283 continue;
3284
3285 for (n = 1; n <= nsibling + 1; n++) {
3286 err = nop_virtual_engine(gt, siblings, nsibling,
3287 n, 0);
3288 if (err)
3289 return err;
3290 }
3291
3292 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3293 if (err)
3294 return err;
3295 }
3296
3297 return 0;
3298 }
3299
3300 static int mask_virtual_engine(struct intel_gt *gt,
3301 struct intel_engine_cs **siblings,
3302 unsigned int nsibling)
3303 {
3304 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3305 struct intel_context *ve;
3306 struct igt_live_test t;
3307 unsigned int n;
3308 int err;
3309
3310 /*
3311 * Check that by setting the execution mask on a request, we can
3312 * restrict it to our desired engine within the virtual engine.
3313 */
3314
3315 ve = intel_execlists_create_virtual(siblings, nsibling);
3316 if (IS_ERR(ve)) {
3317 err = PTR_ERR(ve);
3318 goto out_close;
3319 }
3320
3321 err = intel_context_pin(ve);
3322 if (err)
3323 goto out_put;
3324
3325 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3326 if (err)
3327 goto out_unpin;
3328
3329 for (n = 0; n < nsibling; n++) {
3330 request[n] = i915_request_create(ve);
3331 if (IS_ERR(request[n])) {
3332 err = PTR_ERR(request[n]);
3333 nsibling = n;
3334 goto out;
3335 }
3336
3337 /* Reverse order as it's more likely to be unnatural */
3338 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3339
3340 i915_request_get(request[n]);
3341 i915_request_add(request[n]);
3342 }
3343
3344 for (n = 0; n < nsibling; n++) {
3345 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3346 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3347 __func__, ve->engine->name,
3348 request[n]->fence.context,
3349 request[n]->fence.seqno);
3350
3351 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3352 __func__, ve->engine->name,
3353 request[n]->fence.context,
3354 request[n]->fence.seqno);
3355 GEM_TRACE_DUMP();
3356 intel_gt_set_wedged(gt);
3357 err = -EIO;
3358 goto out;
3359 }
3360
3361 if (request[n]->engine != siblings[nsibling - n - 1]) {
3362 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3363 request[n]->engine->name,
3364 siblings[nsibling - n - 1]->name);
3365 err = -EINVAL;
3366 goto out;
3367 }
3368 }
3369
3370 err = igt_live_test_end(&t);
3371 out:
3372 if (igt_flush_test(gt->i915))
3373 err = -EIO;
3374
3375 for (n = 0; n < nsibling; n++)
3376 i915_request_put(request[n]);
3377
3378 out_unpin:
3379 intel_context_unpin(ve);
3380 out_put:
3381 intel_context_put(ve);
3382 out_close:
3383 return err;
3384 }
3385
3386 static int live_virtual_mask(void *arg)
3387 {
3388 struct intel_gt *gt = arg;
3389 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3390 unsigned int class, inst;
3391 int err;
3392
3393 if (intel_uc_uses_guc_submission(&gt->uc))
3394 return 0;
3395
3396 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3397 unsigned int nsibling;
3398
3399 nsibling = 0;
3400 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3401 if (!gt->engine_class[class][inst])
3402 break;
3403
3404 siblings[nsibling++] = gt->engine_class[class][inst];
3405 }
3406 if (nsibling < 2)
3407 continue;
3408
3409 err = mask_virtual_engine(gt, siblings, nsibling);
3410 if (err)
3411 return err;
3412 }
3413
3414 return 0;
3415 }
3416
3417 static int preserved_virtual_engine(struct intel_gt *gt,
3418 struct intel_engine_cs **siblings,
3419 unsigned int nsibling)
3420 {
3421 struct i915_request *last = NULL;
3422 struct intel_context *ve;
3423 struct i915_vma *scratch;
3424 struct igt_live_test t;
3425 unsigned int n;
3426 int err = 0;
3427 u32 *cs;
3428
3429 scratch = create_scratch(siblings[0]->gt);
3430 if (IS_ERR(scratch))
3431 return PTR_ERR(scratch);
3432
3433 err = i915_vma_sync(scratch);
3434 if (err)
3435 goto out_scratch;
3436
3437 ve = intel_execlists_create_virtual(siblings, nsibling);
3438 if (IS_ERR(ve)) {
3439 err = PTR_ERR(ve);
3440 goto out_scratch;
3441 }
3442
3443 err = intel_context_pin(ve);
3444 if (err)
3445 goto out_put;
3446
3447 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3448 if (err)
3449 goto out_unpin;
3450
3451 for (n = 0; n < NUM_GPR_DW; n++) {
3452 struct intel_engine_cs *engine = siblings[n % nsibling];
3453 struct i915_request *rq;
3454
3455 rq = i915_request_create(ve);
3456 if (IS_ERR(rq)) {
3457 err = PTR_ERR(rq);
3458 goto out_end;
3459 }
3460
3461 i915_request_put(last);
3462 last = i915_request_get(rq);
3463
3464 cs = intel_ring_begin(rq, 8);
3465 if (IS_ERR(cs)) {
3466 i915_request_add(rq);
3467 err = PTR_ERR(cs);
3468 goto out_end;
3469 }
3470
3471 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3472 *cs++ = CS_GPR(engine, n);
3473 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3474 *cs++ = 0;
3475
3476 *cs++ = MI_LOAD_REGISTER_IMM(1);
3477 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3478 *cs++ = n + 1;
3479
3480 *cs++ = MI_NOOP;
3481 intel_ring_advance(rq, cs);
3482
3483 /* Restrict this request to run on a particular engine */
3484 rq->execution_mask = engine->mask;
3485 i915_request_add(rq);
3486 }
3487
3488 if (i915_request_wait(last, 0, HZ / 5) < 0) {
3489 err = -ETIME;
3490 goto out_end;
3491 }
3492
3493 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3494 if (IS_ERR(cs)) {
3495 err = PTR_ERR(cs);
3496 goto out_end;
3497 }
3498
3499 for (n = 0; n < NUM_GPR_DW; n++) {
3500 if (cs[n] != n) {
3501 pr_err("Incorrect value[%d] found for GPR[%d]\n",
3502 cs[n], n);
3503 err = -EINVAL;
3504 break;
3505 }
3506 }
3507
3508 i915_gem_object_unpin_map(scratch->obj);
3509
3510 out_end:
3511 if (igt_live_test_end(&t))
3512 err = -EIO;
3513 i915_request_put(last);
3514 out_unpin:
3515 intel_context_unpin(ve);
3516 out_put:
3517 intel_context_put(ve);
3518 out_scratch:
3519 i915_vma_unpin_and_release(&scratch, 0);
3520 return err;
3521 }
3522
3523 static int live_virtual_preserved(void *arg)
3524 {
3525 struct intel_gt *gt = arg;
3526 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3527 unsigned int class, inst;
3528
3529 /*
3530 * Check that the context image retains non-privileged (user) registers
3531 * from one engine to the next. For this we check that the CS_GPR
3532 * are preserved.
3533 */
3534
3535 if (intel_uc_uses_guc_submission(&gt->uc))
3536 return 0;
3537
3538 /* As we use CS_GPR we cannot run before they existed on all engines. */
3539 if (INTEL_GEN(gt->i915) < 9)
3540 return 0;
3541
3542 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3543 int nsibling, err;
3544
3545 nsibling = 0;
3546 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3547 if (!gt->engine_class[class][inst])
3548 continue;
3549
3550 siblings[nsibling++] = gt->engine_class[class][inst];
3551 }
3552 if (nsibling < 2)
3553 continue;
3554
3555 err = preserved_virtual_engine(gt, siblings, nsibling);
3556 if (err)
3557 return err;
3558 }
3559
3560 return 0;
3561 }
3562
3563 static int bond_virtual_engine(struct intel_gt *gt,
3564 unsigned int class,
3565 struct intel_engine_cs **siblings,
3566 unsigned int nsibling,
3567 unsigned int flags)
3568 #define BOND_SCHEDULE BIT(0)
3569 {
3570 struct intel_engine_cs *master;
3571 struct i915_request *rq[16];
3572 enum intel_engine_id id;
3573 struct igt_spinner spin;
3574 unsigned long n;
3575 int err;
3576
3577 /*
3578 * A set of bonded requests is intended to be run concurrently
3579 * across a number of engines. We use one request per-engine
3580 * and a magic fence to schedule each of the bonded requests
3581 * at the same time. A consequence of our current scheduler is that
3582 * we only move requests to the HW ready queue when the request
3583 * becomes ready, that is when all of its prerequisite fences have
3584 * been signaled. As one of those fences is the master submit fence,
3585 * there is a delay on all secondary fences as the HW may be
3586 * currently busy. Equally, as all the requests are independent,
3587 * they may have other fences that delay individual request
3588 * submission to HW. Ergo, we do not guarantee that all requests are
3589 * immediately submitted to HW at the same time, just that if the
3590 * rules are abided by, they are ready at the same time as the
3591 * first is submitted. Userspace can embed semaphores in its batch
3592 * to ensure parallel execution of its phases as it requires.
3593 * Though naturally it gets requested that perhaps the scheduler should
3594 * take care of parallel execution, even across preemption events on
3595 * different HW. (The proper answer is of course "lalalala".)
3596 *
3597 * With the submit-fence, we have identified three possible phases
3598 * of synchronisation depending on the master fence: queued (not
3599 * ready), executing, and signaled. The first two are quite simple
3600 * and checked below. However, the signaled master fence handling is
3601 * contentious. Currently we do not distinguish between a signaled
3602 * fence and an expired fence, as once signaled it does not convey
3603 * any information about the previous execution. It may even be freed
3604 * and hence checking later it may not exist at all. Ergo we currently
3605 * do not apply the bonding constraint for an already signaled fence,
3606 * as our expectation is that it should not constrain the secondaries
3607 * and is outside of the scope of the bonded request API (i.e. all
3608 * userspace requests are meant to be running in parallel). As
3609 * it imposes no constraint, and is effectively a no-op, we do not
3610 * check below as normal execution flows are checked extensively above.
3611 *
3612 * XXX Is the degenerate handling of signaled submit fences the
3613 * expected behaviour for userpace?
3614 */
3615
3616 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3617
3618 if (igt_spinner_init(&spin, gt))
3619 return -ENOMEM;
3620
3621 err = 0;
3622 rq[0] = ERR_PTR(-ENOMEM);
3623 for_each_engine(master, gt, id) {
3624 struct i915_sw_fence fence = {};
3625 struct intel_context *ce;
3626
3627 if (master->class == class)
3628 continue;
3629
3630 ce = intel_context_create(master);
3631 if (IS_ERR(ce)) {
3632 err = PTR_ERR(ce);
3633 goto out;
3634 }
3635
3636 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3637
3638 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
3639 intel_context_put(ce);
3640 if (IS_ERR(rq[0])) {
3641 err = PTR_ERR(rq[0]);
3642 goto out;
3643 }
3644 i915_request_get(rq[0]);
3645
3646 if (flags & BOND_SCHEDULE) {
3647 onstack_fence_init(&fence);
3648 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3649 &fence,
3650 GFP_KERNEL);
3651 }
3652
3653 i915_request_add(rq[0]);
3654 if (err < 0)
3655 goto out;
3656
3657 if (!(flags & BOND_SCHEDULE) &&
3658 !igt_wait_for_spinner(&spin, rq[0])) {
3659 err = -EIO;
3660 goto out;
3661 }
3662
3663 for (n = 0; n < nsibling; n++) {
3664 struct intel_context *ve;
3665
3666 ve = intel_execlists_create_virtual(siblings, nsibling);
3667 if (IS_ERR(ve)) {
3668 err = PTR_ERR(ve);
3669 onstack_fence_fini(&fence);
3670 goto out;
3671 }
3672
3673 err = intel_virtual_engine_attach_bond(ve->engine,
3674 master,
3675 siblings[n]);
3676 if (err) {
3677 intel_context_put(ve);
3678 onstack_fence_fini(&fence);
3679 goto out;
3680 }
3681
3682 err = intel_context_pin(ve);
3683 intel_context_put(ve);
3684 if (err) {
3685 onstack_fence_fini(&fence);
3686 goto out;
3687 }
3688
3689 rq[n + 1] = i915_request_create(ve);
3690 intel_context_unpin(ve);
3691 if (IS_ERR(rq[n + 1])) {
3692 err = PTR_ERR(rq[n + 1]);
3693 onstack_fence_fini(&fence);
3694 goto out;
3695 }
3696 i915_request_get(rq[n + 1]);
3697
3698 err = i915_request_await_execution(rq[n + 1],
3699 &rq[0]->fence,
3700 ve->engine->bond_execute);
3701 i915_request_add(rq[n + 1]);
3702 if (err < 0) {
3703 onstack_fence_fini(&fence);
3704 goto out;
3705 }
3706 }
3707 onstack_fence_fini(&fence);
3708 intel_engine_flush_submission(master);
3709 igt_spinner_end(&spin);
3710
3711 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3712 pr_err("Master request did not execute (on %s)!\n",
3713 rq[0]->engine->name);
3714 err = -EIO;
3715 goto out;
3716 }
3717
3718 for (n = 0; n < nsibling; n++) {
3719 if (i915_request_wait(rq[n + 1], 0,
3720 MAX_SCHEDULE_TIMEOUT) < 0) {
3721 err = -EIO;
3722 goto out;
3723 }
3724
3725 if (rq[n + 1]->engine != siblings[n]) {
3726 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3727 siblings[n]->name,
3728 rq[n + 1]->engine->name,
3729 rq[0]->engine->name);
3730 err = -EINVAL;
3731 goto out;
3732 }
3733 }
3734
3735 for (n = 0; !IS_ERR(rq[n]); n++)
3736 i915_request_put(rq[n]);
3737 rq[0] = ERR_PTR(-ENOMEM);
3738 }
3739
3740 out:
3741 for (n = 0; !IS_ERR(rq[n]); n++)
3742 i915_request_put(rq[n]);
3743 if (igt_flush_test(gt->i915))
3744 err = -EIO;
3745
3746 igt_spinner_fini(&spin);
3747 return err;
3748 }
3749
3750 static int live_virtual_bond(void *arg)
3751 {
3752 static const struct phase {
3753 const char *name;
3754 unsigned int flags;
3755 } phases[] = {
3756 { "", 0 },
3757 { "schedule", BOND_SCHEDULE },
3758 { },
3759 };
3760 struct intel_gt *gt = arg;
3761 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3762 unsigned int class, inst;
3763 int err;
3764
3765 if (intel_uc_uses_guc_submission(&gt->uc))
3766 return 0;
3767
3768 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3769 const struct phase *p;
3770 int nsibling;
3771
3772 nsibling = 0;
3773 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3774 if (!gt->engine_class[class][inst])
3775 break;
3776
3777 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3778 siblings[nsibling++] = gt->engine_class[class][inst];
3779 }
3780 if (nsibling < 2)
3781 continue;
3782
3783 for (p = phases; p->name; p++) {
3784 err = bond_virtual_engine(gt,
3785 class, siblings, nsibling,
3786 p->flags);
3787 if (err) {
3788 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3789 __func__, p->name, class, nsibling, err);
3790 return err;
3791 }
3792 }
3793 }
3794
3795 return 0;
3796 }
3797
3798 static int reset_virtual_engine(struct intel_gt *gt,
3799 struct intel_engine_cs **siblings,
3800 unsigned int nsibling)
3801 {
3802 struct intel_engine_cs *engine;
3803 struct intel_context *ve;
3804 unsigned long *heartbeat;
3805 struct igt_spinner spin;
3806 struct i915_request *rq;
3807 unsigned int n;
3808 int err = 0;
3809
3810 /*
3811 * In order to support offline error capture for fast preempt reset,
3812 * we need to decouple the guilty request and ensure that it and its
3813 * descendents are not executed while the capture is in progress.
3814 */
3815
3816 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
3817 if (!heartbeat)
3818 return -ENOMEM;
3819
3820 if (igt_spinner_init(&spin, gt)) {
3821 err = -ENOMEM;
3822 goto out_free;
3823 }
3824
3825 ve = intel_execlists_create_virtual(siblings, nsibling);
3826 if (IS_ERR(ve)) {
3827 err = PTR_ERR(ve);
3828 goto out_spin;
3829 }
3830
3831 for (n = 0; n < nsibling; n++)
3832 engine_heartbeat_disable(siblings[n], &heartbeat[n]);
3833
3834 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
3835 if (IS_ERR(rq)) {
3836 err = PTR_ERR(rq);
3837 goto out_heartbeat;
3838 }
3839 i915_request_add(rq);
3840
3841 if (!igt_wait_for_spinner(&spin, rq)) {
3842 intel_gt_set_wedged(gt);
3843 err = -ETIME;
3844 goto out_heartbeat;
3845 }
3846
3847 engine = rq->engine;
3848 GEM_BUG_ON(engine == ve->engine);
3849
3850 /* Take ownership of the reset and tasklet */
3851 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
3852 &gt->reset.flags)) {
3853 intel_gt_set_wedged(gt);
3854 err = -EBUSY;
3855 goto out_heartbeat;
3856 }
3857 tasklet_disable(&engine->execlists.tasklet);
3858
3859 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
3860 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
3861
3862 /* Fake a preemption event; failed of course */
3863 spin_lock_irq(&engine->active.lock);
3864 __unwind_incomplete_requests(engine);
3865 spin_unlock_irq(&engine->active.lock);
3866 GEM_BUG_ON(rq->engine != ve->engine);
3867
3868 /* Reset the engine while keeping our active request on hold */
3869 execlists_hold(engine, rq);
3870 GEM_BUG_ON(!i915_request_on_hold(rq));
3871
3872 intel_engine_reset(engine, NULL);
3873 GEM_BUG_ON(rq->fence.error != -EIO);
3874
3875 /* Release our grasp on the engine, letting CS flow again */
3876 tasklet_enable(&engine->execlists.tasklet);
3877 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
3878
3879 /* Check that we do not resubmit the held request */
3880 i915_request_get(rq);
3881 if (!i915_request_wait(rq, 0, HZ / 5)) {
3882 pr_err("%s: on hold request completed!\n",
3883 engine->name);
3884 intel_gt_set_wedged(gt);
3885 err = -EIO;
3886 goto out_rq;
3887 }
3888 GEM_BUG_ON(!i915_request_on_hold(rq));
3889
3890 /* But is resubmitted on release */
3891 execlists_unhold(engine, rq);
3892 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3893 pr_err("%s: held request did not complete!\n",
3894 engine->name);
3895 intel_gt_set_wedged(gt);
3896 err = -ETIME;
3897 }
3898
3899 out_rq:
3900 i915_request_put(rq);
3901 out_heartbeat:
3902 for (n = 0; n < nsibling; n++)
3903 engine_heartbeat_enable(siblings[n], heartbeat[n]);
3904
3905 intel_context_put(ve);
3906 out_spin:
3907 igt_spinner_fini(&spin);
3908 out_free:
3909 kfree(heartbeat);
3910 return err;
3911 }
3912
3913 static int live_virtual_reset(void *arg)
3914 {
3915 struct intel_gt *gt = arg;
3916 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3917 unsigned int class, inst;
3918
3919 /*
3920 * Check that we handle a reset event within a virtual engine.
3921 * Only the physical engine is reset, but we have to check the flow
3922 * of the virtual requests around the reset, and make sure it is not
3923 * forgotten.
3924 */
3925
3926 if (intel_uc_uses_guc_submission(&gt->uc))
3927 return 0;
3928
3929 if (!intel_has_reset_engine(gt))
3930 return 0;
3931
3932 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3933 int nsibling, err;
3934
3935 nsibling = 0;
3936 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3937 if (!gt->engine_class[class][inst])
3938 continue;
3939
3940 siblings[nsibling++] = gt->engine_class[class][inst];
3941 }
3942 if (nsibling < 2)
3943 continue;
3944
3945 err = reset_virtual_engine(gt, siblings, nsibling);
3946 if (err)
3947 return err;
3948 }
3949
3950 return 0;
3951 }
3952
3953 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3954 {
3955 static const struct i915_subtest tests[] = {
3956 SUBTEST(live_sanitycheck),
3957 SUBTEST(live_unlite_switch),
3958 SUBTEST(live_unlite_preempt),
3959 SUBTEST(live_pin_rewind),
3960 SUBTEST(live_hold_reset),
3961 SUBTEST(live_error_interrupt),
3962 SUBTEST(live_timeslice_preempt),
3963 SUBTEST(live_timeslice_rewind),
3964 SUBTEST(live_timeslice_queue),
3965 SUBTEST(live_busywait_preempt),
3966 SUBTEST(live_preempt),
3967 SUBTEST(live_late_preempt),
3968 SUBTEST(live_nopreempt),
3969 SUBTEST(live_preempt_cancel),
3970 SUBTEST(live_suppress_self_preempt),
3971 SUBTEST(live_suppress_wait_preempt),
3972 SUBTEST(live_chain_preempt),
3973 SUBTEST(live_preempt_gang),
3974 SUBTEST(live_preempt_timeout),
3975 SUBTEST(live_preempt_smoke),
3976 SUBTEST(live_virtual_engine),
3977 SUBTEST(live_virtual_mask),
3978 SUBTEST(live_virtual_preserved),
3979 SUBTEST(live_virtual_bond),
3980 SUBTEST(live_virtual_reset),
3981 };
3982
3983 if (!HAS_EXECLISTS(i915))
3984 return 0;
3985
3986 if (intel_gt_is_wedged(&i915->gt))
3987 return 0;
3988
3989 return intel_gt_live_subtests(tests, &i915->gt);
3990 }
3991
3992 static void hexdump(const void *buf, size_t len)
3993 {
3994 const size_t rowsize = 8 * sizeof(u32);
3995 const void *prev = NULL;
3996 bool skip = false;
3997 size_t pos;
3998
3999 for (pos = 0; pos < len; pos += rowsize) {
4000 char line[128];
4001
4002 if (prev && !memcmp(prev, buf + pos, rowsize)) {
4003 if (!skip) {
4004 pr_info("*\n");
4005 skip = true;
4006 }
4007 continue;
4008 }
4009
4010 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
4011 rowsize, sizeof(u32),
4012 line, sizeof(line),
4013 false) >= sizeof(line));
4014 pr_info("[%04zx] %s\n", pos, line);
4015
4016 prev = buf + pos;
4017 skip = false;
4018 }
4019 }
4020
4021 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4022 {
4023 const u32 offset =
4024 i915_ggtt_offset(ce->engine->status_page.vma) +
4025 offset_in_page(slot);
4026 struct i915_request *rq;
4027 u32 *cs;
4028
4029 rq = intel_context_create_request(ce);
4030 if (IS_ERR(rq))
4031 return PTR_ERR(rq);
4032
4033 cs = intel_ring_begin(rq, 4);
4034 if (IS_ERR(cs)) {
4035 i915_request_add(rq);
4036 return PTR_ERR(cs);
4037 }
4038
4039 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4040 *cs++ = offset;
4041 *cs++ = 0;
4042 *cs++ = 1;
4043
4044 intel_ring_advance(rq, cs);
4045
4046 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4047 i915_request_add(rq);
4048 return 0;
4049 }
4050
4051 static int context_flush(struct intel_context *ce, long timeout)
4052 {
4053 struct i915_request *rq;
4054 struct dma_fence *fence;
4055 int err = 0;
4056
4057 rq = intel_engine_create_kernel_request(ce->engine);
4058 if (IS_ERR(rq))
4059 return PTR_ERR(rq);
4060
4061 fence = i915_active_fence_get(&ce->timeline->last_request);
4062 if (fence) {
4063 i915_request_await_dma_fence(rq, fence);
4064 dma_fence_put(fence);
4065 }
4066
4067 rq = i915_request_get(rq);
4068 i915_request_add(rq);
4069 if (i915_request_wait(rq, 0, timeout) < 0)
4070 err = -ETIME;
4071 i915_request_put(rq);
4072
4073 rmb(); /* We know the request is written, make sure all state is too! */
4074 return err;
4075 }
4076
4077 static int live_lrc_layout(void *arg)
4078 {
4079 struct intel_gt *gt = arg;
4080 struct intel_engine_cs *engine;
4081 enum intel_engine_id id;
4082 u32 *lrc;
4083 int err;
4084
4085 /*
4086 * Check the registers offsets we use to create the initial reg state
4087 * match the layout saved by HW.
4088 */
4089
4090 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4091 if (!lrc)
4092 return -ENOMEM;
4093
4094 err = 0;
4095 for_each_engine(engine, gt, id) {
4096 u32 *hw;
4097 int dw;
4098
4099 if (!engine->default_state)
4100 continue;
4101
4102 hw = i915_gem_object_pin_map(engine->default_state,
4103 I915_MAP_WB);
4104 if (IS_ERR(hw)) {
4105 err = PTR_ERR(hw);
4106 break;
4107 }
4108 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4109
4110 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4111 engine->kernel_context,
4112 engine,
4113 engine->kernel_context->ring,
4114 true);
4115
4116 dw = 0;
4117 do {
4118 u32 lri = hw[dw];
4119
4120 if (lri == 0) {
4121 dw++;
4122 continue;
4123 }
4124
4125 if (lrc[dw] == 0) {
4126 pr_debug("%s: skipped instruction %x at dword %d\n",
4127 engine->name, lri, dw);
4128 dw++;
4129 continue;
4130 }
4131
4132 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4133 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4134 engine->name, dw, lri);
4135 err = -EINVAL;
4136 break;
4137 }
4138
4139 if (lrc[dw] != lri) {
4140 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4141 engine->name, dw, lri, lrc[dw]);
4142 err = -EINVAL;
4143 break;
4144 }
4145
4146 lri &= 0x7f;
4147 lri++;
4148 dw++;
4149
4150 while (lri) {
4151 if (hw[dw] != lrc[dw]) {
4152 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4153 engine->name, dw, hw[dw], lrc[dw]);
4154 err = -EINVAL;
4155 break;
4156 }
4157
4158 /*
4159 * Skip over the actual register value as we
4160 * expect that to differ.
4161 */
4162 dw += 2;
4163 lri -= 2;
4164 }
4165 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4166
4167 if (err) {
4168 pr_info("%s: HW register image:\n", engine->name);
4169 hexdump(hw, PAGE_SIZE);
4170
4171 pr_info("%s: SW register image:\n", engine->name);
4172 hexdump(lrc, PAGE_SIZE);
4173 }
4174
4175 i915_gem_object_unpin_map(engine->default_state);
4176 if (err)
4177 break;
4178 }
4179
4180 kfree(lrc);
4181 return err;
4182 }
4183
4184 static int find_offset(const u32 *lri, u32 offset)
4185 {
4186 int i;
4187
4188 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4189 if (lri[i] == offset)
4190 return i;
4191
4192 return -1;
4193 }
4194
4195 static int live_lrc_fixed(void *arg)
4196 {
4197 struct intel_gt *gt = arg;
4198 struct intel_engine_cs *engine;
4199 enum intel_engine_id id;
4200 int err = 0;
4201
4202 /*
4203 * Check the assumed register offsets match the actual locations in
4204 * the context image.
4205 */
4206
4207 for_each_engine(engine, gt, id) {
4208 const struct {
4209 u32 reg;
4210 u32 offset;
4211 const char *name;
4212 } tbl[] = {
4213 {
4214 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4215 CTX_RING_START - 1,
4216 "RING_START"
4217 },
4218 {
4219 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4220 CTX_RING_CTL - 1,
4221 "RING_CTL"
4222 },
4223 {
4224 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4225 CTX_RING_HEAD - 1,
4226 "RING_HEAD"
4227 },
4228 {
4229 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4230 CTX_RING_TAIL - 1,
4231 "RING_TAIL"
4232 },
4233 {
4234 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4235 lrc_ring_mi_mode(engine),
4236 "RING_MI_MODE"
4237 },
4238 {
4239 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4240 CTX_BB_STATE - 1,
4241 "BB_STATE"
4242 },
4243 {
4244 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4245 CTX_TIMESTAMP - 1,
4246 "RING_CTX_TIMESTAMP"
4247 },
4248 { },
4249 }, *t;
4250 u32 *hw;
4251
4252 if (!engine->default_state)
4253 continue;
4254
4255 hw = i915_gem_object_pin_map(engine->default_state,
4256 I915_MAP_WB);
4257 if (IS_ERR(hw)) {
4258 err = PTR_ERR(hw);
4259 break;
4260 }
4261 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4262
4263 for (t = tbl; t->name; t++) {
4264 int dw = find_offset(hw, t->reg);
4265
4266 if (dw != t->offset) {
4267 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4268 engine->name,
4269 t->name,
4270 t->reg,
4271 dw,
4272 t->offset);
4273 err = -EINVAL;
4274 }
4275 }
4276
4277 i915_gem_object_unpin_map(engine->default_state);
4278 }
4279
4280 return err;
4281 }
4282
4283 static int __live_lrc_state(struct intel_engine_cs *engine,
4284 struct i915_vma *scratch)
4285 {
4286 struct intel_context *ce;
4287 struct i915_request *rq;
4288 enum {
4289 RING_START_IDX = 0,
4290 RING_TAIL_IDX,
4291 MAX_IDX
4292 };
4293 u32 expected[MAX_IDX];
4294 u32 *cs;
4295 int err;
4296 int n;
4297
4298 ce = intel_context_create(engine);
4299 if (IS_ERR(ce))
4300 return PTR_ERR(ce);
4301
4302 err = intel_context_pin(ce);
4303 if (err)
4304 goto err_put;
4305
4306 rq = i915_request_create(ce);
4307 if (IS_ERR(rq)) {
4308 err = PTR_ERR(rq);
4309 goto err_unpin;
4310 }
4311
4312 cs = intel_ring_begin(rq, 4 * MAX_IDX);
4313 if (IS_ERR(cs)) {
4314 err = PTR_ERR(cs);
4315 i915_request_add(rq);
4316 goto err_unpin;
4317 }
4318
4319 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4320 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
4321 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
4322 *cs++ = 0;
4323
4324 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
4325
4326 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4327 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
4328 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
4329 *cs++ = 0;
4330
4331 i915_vma_lock(scratch);
4332 err = i915_request_await_object(rq, scratch->obj, true);
4333 if (!err)
4334 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4335 i915_vma_unlock(scratch);
4336
4337 i915_request_get(rq);
4338 i915_request_add(rq);
4339 if (err)
4340 goto err_rq;
4341
4342 intel_engine_flush_submission(engine);
4343 expected[RING_TAIL_IDX] = ce->ring->tail;
4344
4345 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4346 err = -ETIME;
4347 goto err_rq;
4348 }
4349
4350 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4351 if (IS_ERR(cs)) {
4352 err = PTR_ERR(cs);
4353 goto err_rq;
4354 }
4355
4356 for (n = 0; n < MAX_IDX; n++) {
4357 if (cs[n] != expected[n]) {
4358 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4359 engine->name, n, cs[n], expected[n]);
4360 err = -EINVAL;
4361 break;
4362 }
4363 }
4364
4365 i915_gem_object_unpin_map(scratch->obj);
4366
4367 err_rq:
4368 i915_request_put(rq);
4369 err_unpin:
4370 intel_context_unpin(ce);
4371 err_put:
4372 intel_context_put(ce);
4373 return err;
4374 }
4375
4376 static int live_lrc_state(void *arg)
4377 {
4378 struct intel_gt *gt = arg;
4379 struct intel_engine_cs *engine;
4380 struct i915_vma *scratch;
4381 enum intel_engine_id id;
4382 int err = 0;
4383
4384 /*
4385 * Check the live register state matches what we expect for this
4386 * intel_context.
4387 */
4388
4389 scratch = create_scratch(gt);
4390 if (IS_ERR(scratch))
4391 return PTR_ERR(scratch);
4392
4393 for_each_engine(engine, gt, id) {
4394 err = __live_lrc_state(engine, scratch);
4395 if (err)
4396 break;
4397 }
4398
4399 if (igt_flush_test(gt->i915))
4400 err = -EIO;
4401
4402 i915_vma_unpin_and_release(&scratch, 0);
4403 return err;
4404 }
4405
4406 static int gpr_make_dirty(struct intel_context *ce)
4407 {
4408 struct i915_request *rq;
4409 u32 *cs;
4410 int n;
4411
4412 rq = intel_context_create_request(ce);
4413 if (IS_ERR(rq))
4414 return PTR_ERR(rq);
4415
4416 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
4417 if (IS_ERR(cs)) {
4418 i915_request_add(rq);
4419 return PTR_ERR(cs);
4420 }
4421
4422 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
4423 for (n = 0; n < NUM_GPR_DW; n++) {
4424 *cs++ = CS_GPR(ce->engine, n);
4425 *cs++ = STACK_MAGIC;
4426 }
4427 *cs++ = MI_NOOP;
4428
4429 intel_ring_advance(rq, cs);
4430
4431 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4432 i915_request_add(rq);
4433
4434 return 0;
4435 }
4436
4437 static struct i915_request *
4438 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
4439 {
4440 const u32 offset =
4441 i915_ggtt_offset(ce->engine->status_page.vma) +
4442 offset_in_page(slot);
4443 struct i915_request *rq;
4444 u32 *cs;
4445 int err;
4446 int n;
4447
4448 rq = intel_context_create_request(ce);
4449 if (IS_ERR(rq))
4450 return rq;
4451
4452 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
4453 if (IS_ERR(cs)) {
4454 i915_request_add(rq);
4455 return ERR_CAST(cs);
4456 }
4457
4458 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4459 *cs++ = MI_NOOP;
4460
4461 *cs++ = MI_SEMAPHORE_WAIT |
4462 MI_SEMAPHORE_GLOBAL_GTT |
4463 MI_SEMAPHORE_POLL |
4464 MI_SEMAPHORE_SAD_NEQ_SDD;
4465 *cs++ = 0;
4466 *cs++ = offset;
4467 *cs++ = 0;
4468
4469 for (n = 0; n < NUM_GPR_DW; n++) {
4470 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4471 *cs++ = CS_GPR(ce->engine, n);
4472 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4473 *cs++ = 0;
4474 }
4475
4476 i915_vma_lock(scratch);
4477 err = i915_request_await_object(rq, scratch->obj, true);
4478 if (!err)
4479 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4480 i915_vma_unlock(scratch);
4481
4482 i915_request_get(rq);
4483 i915_request_add(rq);
4484 if (err) {
4485 i915_request_put(rq);
4486 rq = ERR_PTR(err);
4487 }
4488
4489 return rq;
4490 }
4491
4492 static int __live_lrc_gpr(struct intel_engine_cs *engine,
4493 struct i915_vma *scratch,
4494 bool preempt)
4495 {
4496 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
4497 struct intel_context *ce;
4498 struct i915_request *rq;
4499 u32 *cs;
4500 int err;
4501 int n;
4502
4503 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
4504 return 0; /* GPR only on rcs0 for gen8 */
4505
4506 err = gpr_make_dirty(engine->kernel_context);
4507 if (err)
4508 return err;
4509
4510 ce = intel_context_create(engine);
4511 if (IS_ERR(ce))
4512 return PTR_ERR(ce);
4513
4514 rq = __gpr_read(ce, scratch, slot);
4515 if (IS_ERR(rq)) {
4516 err = PTR_ERR(rq);
4517 goto err_put;
4518 }
4519
4520 err = wait_for_submit(engine, rq, HZ / 2);
4521 if (err)
4522 goto err_rq;
4523
4524 if (preempt) {
4525 err = gpr_make_dirty(engine->kernel_context);
4526 if (err)
4527 goto err_rq;
4528
4529 err = emit_semaphore_signal(engine->kernel_context, slot);
4530 if (err)
4531 goto err_rq;
4532 } else {
4533 slot[0] = 1;
4534 wmb();
4535 }
4536
4537 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4538 err = -ETIME;
4539 goto err_rq;
4540 }
4541
4542 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4543 if (IS_ERR(cs)) {
4544 err = PTR_ERR(cs);
4545 goto err_rq;
4546 }
4547
4548 for (n = 0; n < NUM_GPR_DW; n++) {
4549 if (cs[n]) {
4550 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4551 engine->name,
4552 n / 2, n & 1 ? "udw" : "ldw",
4553 cs[n]);
4554 err = -EINVAL;
4555 break;
4556 }
4557 }
4558
4559 i915_gem_object_unpin_map(scratch->obj);
4560
4561 err_rq:
4562 memset32(&slot[0], -1, 4);
4563 wmb();
4564 i915_request_put(rq);
4565 err_put:
4566 intel_context_put(ce);
4567 return err;
4568 }
4569
4570 static int live_lrc_gpr(void *arg)
4571 {
4572 struct intel_gt *gt = arg;
4573 struct intel_engine_cs *engine;
4574 struct i915_vma *scratch;
4575 enum intel_engine_id id;
4576 int err = 0;
4577
4578 /*
4579 * Check that GPR registers are cleared in new contexts as we need
4580 * to avoid leaking any information from previous contexts.
4581 */
4582
4583 scratch = create_scratch(gt);
4584 if (IS_ERR(scratch))
4585 return PTR_ERR(scratch);
4586
4587 for_each_engine(engine, gt, id) {
4588 unsigned long heartbeat;
4589
4590 engine_heartbeat_disable(engine, &heartbeat);
4591
4592 err = __live_lrc_gpr(engine, scratch, false);
4593 if (err)
4594 goto err;
4595
4596 err = __live_lrc_gpr(engine, scratch, true);
4597 if (err)
4598 goto err;
4599
4600 err:
4601 engine_heartbeat_enable(engine, heartbeat);
4602 if (igt_flush_test(gt->i915))
4603 err = -EIO;
4604 if (err)
4605 break;
4606 }
4607
4608 i915_vma_unpin_and_release(&scratch, 0);
4609 return err;
4610 }
4611
4612 static struct i915_request *
4613 create_timestamp(struct intel_context *ce, void *slot, int idx)
4614 {
4615 const u32 offset =
4616 i915_ggtt_offset(ce->engine->status_page.vma) +
4617 offset_in_page(slot);
4618 struct i915_request *rq;
4619 u32 *cs;
4620 int err;
4621
4622 rq = intel_context_create_request(ce);
4623 if (IS_ERR(rq))
4624 return rq;
4625
4626 cs = intel_ring_begin(rq, 10);
4627 if (IS_ERR(cs)) {
4628 err = PTR_ERR(cs);
4629 goto err;
4630 }
4631
4632 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4633 *cs++ = MI_NOOP;
4634
4635 *cs++ = MI_SEMAPHORE_WAIT |
4636 MI_SEMAPHORE_GLOBAL_GTT |
4637 MI_SEMAPHORE_POLL |
4638 MI_SEMAPHORE_SAD_NEQ_SDD;
4639 *cs++ = 0;
4640 *cs++ = offset;
4641 *cs++ = 0;
4642
4643 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4644 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
4645 *cs++ = offset + idx * sizeof(u32);
4646 *cs++ = 0;
4647
4648 intel_ring_advance(rq, cs);
4649
4650 rq->sched.attr.priority = I915_PRIORITY_MASK;
4651 err = 0;
4652 err:
4653 i915_request_get(rq);
4654 i915_request_add(rq);
4655 if (err) {
4656 i915_request_put(rq);
4657 return ERR_PTR(err);
4658 }
4659
4660 return rq;
4661 }
4662
4663 struct lrc_timestamp {
4664 struct intel_engine_cs *engine;
4665 struct intel_context *ce[2];
4666 u32 poison;
4667 };
4668
4669 static bool timestamp_advanced(u32 start, u32 end)
4670 {
4671 return (s32)(end - start) > 0;
4672 }
4673
4674 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
4675 {
4676 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
4677 struct i915_request *rq;
4678 u32 timestamp;
4679 int err = 0;
4680
4681 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
4682 rq = create_timestamp(arg->ce[0], slot, 1);
4683 if (IS_ERR(rq))
4684 return PTR_ERR(rq);
4685
4686 err = wait_for_submit(rq->engine, rq, HZ / 2);
4687 if (err)
4688 goto err;
4689
4690 if (preempt) {
4691 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
4692 err = emit_semaphore_signal(arg->ce[1], slot);
4693 if (err)
4694 goto err;
4695 } else {
4696 slot[0] = 1;
4697 wmb();
4698 }
4699
4700 /* And wait for switch to kernel (to save our context to memory) */
4701 err = context_flush(arg->ce[0], HZ / 2);
4702 if (err)
4703 goto err;
4704
4705 if (!timestamp_advanced(arg->poison, slot[1])) {
4706 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
4707 arg->engine->name, preempt ? "preempt" : "simple",
4708 arg->poison, slot[1]);
4709 err = -EINVAL;
4710 }
4711
4712 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
4713 if (!timestamp_advanced(slot[1], timestamp)) {
4714 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
4715 arg->engine->name, preempt ? "preempt" : "simple",
4716 slot[1], timestamp);
4717 err = -EINVAL;
4718 }
4719
4720 err:
4721 memset32(slot, -1, 4);
4722 i915_request_put(rq);
4723 return err;
4724 }
4725
4726 static int live_lrc_timestamp(void *arg)
4727 {
4728 struct lrc_timestamp data = {};
4729 struct intel_gt *gt = arg;
4730 enum intel_engine_id id;
4731 const u32 poison[] = {
4732 0,
4733 S32_MAX,
4734 (u32)S32_MAX + 1,
4735 U32_MAX,
4736 };
4737
4738 /*
4739 * We want to verify that the timestamp is saved and restore across
4740 * context switches and is monotonic.
4741 *
4742 * So we do this with a little bit of LRC poisoning to check various
4743 * boundary conditions, and see what happens if we preempt the context
4744 * with a second request (carrying more poison into the timestamp).
4745 */
4746
4747 for_each_engine(data.engine, gt, id) {
4748 unsigned long heartbeat;
4749 int i, err = 0;
4750
4751 engine_heartbeat_disable(data.engine, &heartbeat);
4752
4753 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
4754 struct intel_context *tmp;
4755
4756 tmp = intel_context_create(data.engine);
4757 if (IS_ERR(tmp)) {
4758 err = PTR_ERR(tmp);
4759 goto err;
4760 }
4761
4762 err = intel_context_pin(tmp);
4763 if (err) {
4764 intel_context_put(tmp);
4765 goto err;
4766 }
4767
4768 data.ce[i] = tmp;
4769 }
4770
4771 for (i = 0; i < ARRAY_SIZE(poison); i++) {
4772 data.poison = poison[i];
4773
4774 err = __lrc_timestamp(&data, false);
4775 if (err)
4776 break;
4777
4778 err = __lrc_timestamp(&data, true);
4779 if (err)
4780 break;
4781 }
4782
4783 err:
4784 engine_heartbeat_enable(data.engine, heartbeat);
4785 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
4786 if (!data.ce[i])
4787 break;
4788
4789 intel_context_unpin(data.ce[i]);
4790 intel_context_put(data.ce[i]);
4791 }
4792
4793 if (igt_flush_test(gt->i915))
4794 err = -EIO;
4795 if (err)
4796 return err;
4797 }
4798
4799 return 0;
4800 }
4801
4802 static struct i915_vma *
4803 create_user_vma(struct i915_address_space *vm, unsigned long size)
4804 {
4805 struct drm_i915_gem_object *obj;
4806 struct i915_vma *vma;
4807 int err;
4808
4809 obj = i915_gem_object_create_internal(vm->i915, size);
4810 if (IS_ERR(obj))
4811 return ERR_CAST(obj);
4812
4813 vma = i915_vma_instance(obj, vm, NULL);
4814 if (IS_ERR(vma)) {
4815 i915_gem_object_put(obj);
4816 return vma;
4817 }
4818
4819 err = i915_vma_pin(vma, 0, 0, PIN_USER);
4820 if (err) {
4821 i915_gem_object_put(obj);
4822 return ERR_PTR(err);
4823 }
4824
4825 return vma;
4826 }
4827
4828 static struct i915_vma *
4829 store_context(struct intel_context *ce, struct i915_vma *scratch)
4830 {
4831 struct i915_vma *batch;
4832 u32 dw, x, *cs, *hw;
4833
4834 batch = create_user_vma(ce->vm, SZ_64K);
4835 if (IS_ERR(batch))
4836 return batch;
4837
4838 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
4839 if (IS_ERR(cs)) {
4840 i915_vma_put(batch);
4841 return ERR_CAST(cs);
4842 }
4843
4844 x = 0;
4845 dw = 0;
4846 hw = ce->engine->pinned_default_state;
4847 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4848 do {
4849 u32 len = hw[dw] & 0x7f;
4850
4851 if (hw[dw] == 0) {
4852 dw++;
4853 continue;
4854 }
4855
4856 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4857 dw += len + 2;
4858 continue;
4859 }
4860
4861 dw++;
4862 len = (len + 1) / 2;
4863 while (len--) {
4864 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
4865 *cs++ = hw[dw];
4866 *cs++ = lower_32_bits(scratch->node.start + x);
4867 *cs++ = upper_32_bits(scratch->node.start + x);
4868
4869 dw += 2;
4870 x += 4;
4871 }
4872 } while (dw < PAGE_SIZE / sizeof(u32) &&
4873 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4874
4875 *cs++ = MI_BATCH_BUFFER_END;
4876
4877 i915_gem_object_flush_map(batch->obj);
4878 i915_gem_object_unpin_map(batch->obj);
4879
4880 return batch;
4881 }
4882
4883 static int move_to_active(struct i915_request *rq,
4884 struct i915_vma *vma,
4885 unsigned int flags)
4886 {
4887 int err;
4888
4889 i915_vma_lock(vma);
4890 err = i915_request_await_object(rq, vma->obj, flags);
4891 if (!err)
4892 err = i915_vma_move_to_active(vma, rq, flags);
4893 i915_vma_unlock(vma);
4894
4895 return err;
4896 }
4897
4898 static struct i915_request *
4899 record_registers(struct intel_context *ce,
4900 struct i915_vma *before,
4901 struct i915_vma *after,
4902 u32 *sema)
4903 {
4904 struct i915_vma *b_before, *b_after;
4905 struct i915_request *rq;
4906 u32 *cs;
4907 int err;
4908
4909 b_before = store_context(ce, before);
4910 if (IS_ERR(b_before))
4911 return ERR_CAST(b_before);
4912
4913 b_after = store_context(ce, after);
4914 if (IS_ERR(b_after)) {
4915 rq = ERR_CAST(b_after);
4916 goto err_before;
4917 }
4918
4919 rq = intel_context_create_request(ce);
4920 if (IS_ERR(rq))
4921 goto err_after;
4922
4923 err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
4924 if (err)
4925 goto err_rq;
4926
4927 err = move_to_active(rq, b_before, 0);
4928 if (err)
4929 goto err_rq;
4930
4931 err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
4932 if (err)
4933 goto err_rq;
4934
4935 err = move_to_active(rq, b_after, 0);
4936 if (err)
4937 goto err_rq;
4938
4939 cs = intel_ring_begin(rq, 14);
4940 if (IS_ERR(cs)) {
4941 err = PTR_ERR(cs);
4942 goto err_rq;
4943 }
4944
4945 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4946 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
4947 *cs++ = lower_32_bits(b_before->node.start);
4948 *cs++ = upper_32_bits(b_before->node.start);
4949
4950 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4951 *cs++ = MI_SEMAPHORE_WAIT |
4952 MI_SEMAPHORE_GLOBAL_GTT |
4953 MI_SEMAPHORE_POLL |
4954 MI_SEMAPHORE_SAD_NEQ_SDD;
4955 *cs++ = 0;
4956 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
4957 offset_in_page(sema);
4958 *cs++ = 0;
4959 *cs++ = MI_NOOP;
4960
4961 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4962 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
4963 *cs++ = lower_32_bits(b_after->node.start);
4964 *cs++ = upper_32_bits(b_after->node.start);
4965
4966 intel_ring_advance(rq, cs);
4967
4968 WRITE_ONCE(*sema, 0);
4969 i915_request_get(rq);
4970 i915_request_add(rq);
4971 err_after:
4972 i915_vma_put(b_after);
4973 err_before:
4974 i915_vma_put(b_before);
4975 return rq;
4976
4977 err_rq:
4978 i915_request_add(rq);
4979 rq = ERR_PTR(err);
4980 goto err_after;
4981 }
4982
4983 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
4984 {
4985 struct i915_vma *batch;
4986 u32 dw, *cs, *hw;
4987
4988 batch = create_user_vma(ce->vm, SZ_64K);
4989 if (IS_ERR(batch))
4990 return batch;
4991
4992 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
4993 if (IS_ERR(cs)) {
4994 i915_vma_put(batch);
4995 return ERR_CAST(cs);
4996 }
4997
4998 dw = 0;
4999 hw = ce->engine->pinned_default_state;
5000 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
5001 do {
5002 u32 len = hw[dw] & 0x7f;
5003
5004 if (hw[dw] == 0) {
5005 dw++;
5006 continue;
5007 }
5008
5009 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5010 dw += len + 2;
5011 continue;
5012 }
5013
5014 dw++;
5015 len = (len + 1) / 2;
5016 *cs++ = MI_LOAD_REGISTER_IMM(len);
5017 while (len--) {
5018 *cs++ = hw[dw];
5019 *cs++ = poison;
5020 dw += 2;
5021 }
5022 } while (dw < PAGE_SIZE / sizeof(u32) &&
5023 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5024
5025 *cs++ = MI_BATCH_BUFFER_END;
5026
5027 i915_gem_object_flush_map(batch->obj);
5028 i915_gem_object_unpin_map(batch->obj);
5029
5030 return batch;
5031 }
5032
5033 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5034 {
5035 struct i915_request *rq;
5036 struct i915_vma *batch;
5037 u32 *cs;
5038 int err;
5039
5040 batch = load_context(ce, poison);
5041 if (IS_ERR(batch))
5042 return PTR_ERR(batch);
5043
5044 rq = intel_context_create_request(ce);
5045 if (IS_ERR(rq)) {
5046 err = PTR_ERR(rq);
5047 goto err_batch;
5048 }
5049
5050 err = move_to_active(rq, batch, 0);
5051 if (err)
5052 goto err_rq;
5053
5054 cs = intel_ring_begin(rq, 8);
5055 if (IS_ERR(cs)) {
5056 err = PTR_ERR(cs);
5057 goto err_rq;
5058 }
5059
5060 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5061 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5062 *cs++ = lower_32_bits(batch->node.start);
5063 *cs++ = upper_32_bits(batch->node.start);
5064
5065 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5066 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5067 offset_in_page(sema);
5068 *cs++ = 0;
5069 *cs++ = 1;
5070
5071 intel_ring_advance(rq, cs);
5072
5073 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5074 err_rq:
5075 i915_request_add(rq);
5076 err_batch:
5077 i915_vma_put(batch);
5078 return err;
5079 }
5080
5081 static bool is_moving(u32 a, u32 b)
5082 {
5083 return a != b;
5084 }
5085
5086 static int compare_isolation(struct intel_engine_cs *engine,
5087 struct i915_vma *ref[2],
5088 struct i915_vma *result[2],
5089 struct intel_context *ce,
5090 u32 poison)
5091 {
5092 u32 x, dw, *hw, *lrc;
5093 u32 *A[2], *B[2];
5094 int err = 0;
5095
5096 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5097 if (IS_ERR(A[0]))
5098 return PTR_ERR(A[0]);
5099
5100 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5101 if (IS_ERR(A[1])) {
5102 err = PTR_ERR(A[1]);
5103 goto err_A0;
5104 }
5105
5106 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5107 if (IS_ERR(B[0])) {
5108 err = PTR_ERR(B[0]);
5109 goto err_A1;
5110 }
5111
5112 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5113 if (IS_ERR(B[1])) {
5114 err = PTR_ERR(B[1]);
5115 goto err_B0;
5116 }
5117
5118 lrc = i915_gem_object_pin_map(ce->state->obj,
5119 i915_coherent_map_type(engine->i915));
5120 if (IS_ERR(lrc)) {
5121 err = PTR_ERR(lrc);
5122 goto err_B1;
5123 }
5124 lrc += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
5125
5126 x = 0;
5127 dw = 0;
5128 hw = engine->pinned_default_state;
5129 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
5130 do {
5131 u32 len = hw[dw] & 0x7f;
5132
5133 if (hw[dw] == 0) {
5134 dw++;
5135 continue;
5136 }
5137
5138 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5139 dw += len + 2;
5140 continue;
5141 }
5142
5143 dw++;
5144 len = (len + 1) / 2;
5145 while (len--) {
5146 if (!is_moving(A[0][x], A[1][x]) &&
5147 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5148 switch (hw[dw] & 4095) {
5149 case 0x30: /* RING_HEAD */
5150 case 0x34: /* RING_TAIL */
5151 break;
5152
5153 default:
5154 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5155 engine->name, dw,
5156 hw[dw], hw[dw + 1],
5157 A[0][x], B[0][x], B[1][x],
5158 poison, lrc[dw + 1]);
5159 err = -EINVAL;
5160 break;
5161 }
5162 }
5163 dw += 2;
5164 x++;
5165 }
5166 } while (dw < PAGE_SIZE / sizeof(u32) &&
5167 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5168
5169 i915_gem_object_unpin_map(ce->state->obj);
5170 err_B1:
5171 i915_gem_object_unpin_map(result[1]->obj);
5172 err_B0:
5173 i915_gem_object_unpin_map(result[0]->obj);
5174 err_A1:
5175 i915_gem_object_unpin_map(ref[1]->obj);
5176 err_A0:
5177 i915_gem_object_unpin_map(ref[0]->obj);
5178 return err;
5179 }
5180
5181 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5182 {
5183 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5184 struct i915_vma *ref[2], *result[2];
5185 struct intel_context *A, *B;
5186 struct i915_request *rq;
5187 int err;
5188
5189 A = intel_context_create(engine);
5190 if (IS_ERR(A))
5191 return PTR_ERR(A);
5192
5193 B = intel_context_create(engine);
5194 if (IS_ERR(B)) {
5195 err = PTR_ERR(B);
5196 goto err_A;
5197 }
5198
5199 ref[0] = create_user_vma(A->vm, SZ_64K);
5200 if (IS_ERR(ref[0])) {
5201 err = PTR_ERR(ref[0]);
5202 goto err_B;
5203 }
5204
5205 ref[1] = create_user_vma(A->vm, SZ_64K);
5206 if (IS_ERR(ref[1])) {
5207 err = PTR_ERR(ref[1]);
5208 goto err_ref0;
5209 }
5210
5211 rq = record_registers(A, ref[0], ref[1], sema);
5212 if (IS_ERR(rq)) {
5213 err = PTR_ERR(rq);
5214 goto err_ref1;
5215 }
5216
5217 WRITE_ONCE(*sema, 1);
5218 wmb();
5219
5220 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5221 i915_request_put(rq);
5222 err = -ETIME;
5223 goto err_ref1;
5224 }
5225 i915_request_put(rq);
5226
5227 result[0] = create_user_vma(A->vm, SZ_64K);
5228 if (IS_ERR(result[0])) {
5229 err = PTR_ERR(result[0]);
5230 goto err_ref1;
5231 }
5232
5233 result[1] = create_user_vma(A->vm, SZ_64K);
5234 if (IS_ERR(result[1])) {
5235 err = PTR_ERR(result[1]);
5236 goto err_result0;
5237 }
5238
5239 rq = record_registers(A, result[0], result[1], sema);
5240 if (IS_ERR(rq)) {
5241 err = PTR_ERR(rq);
5242 goto err_result1;
5243 }
5244
5245 err = poison_registers(B, poison, sema);
5246 if (err) {
5247 WRITE_ONCE(*sema, -1);
5248 i915_request_put(rq);
5249 goto err_result1;
5250 }
5251
5252 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5253 i915_request_put(rq);
5254 err = -ETIME;
5255 goto err_result1;
5256 }
5257 i915_request_put(rq);
5258
5259 err = compare_isolation(engine, ref, result, A, poison);
5260
5261 err_result1:
5262 i915_vma_put(result[1]);
5263 err_result0:
5264 i915_vma_put(result[0]);
5265 err_ref1:
5266 i915_vma_put(ref[1]);
5267 err_ref0:
5268 i915_vma_put(ref[0]);
5269 err_B:
5270 intel_context_put(B);
5271 err_A:
5272 intel_context_put(A);
5273 return err;
5274 }
5275
5276 static bool skip_isolation(const struct intel_engine_cs *engine)
5277 {
5278 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
5279 return true;
5280
5281 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
5282 return true;
5283
5284 return false;
5285 }
5286
5287 static int live_lrc_isolation(void *arg)
5288 {
5289 struct intel_gt *gt = arg;
5290 struct intel_engine_cs *engine;
5291 enum intel_engine_id id;
5292 const u32 poison[] = {
5293 STACK_MAGIC,
5294 0x3a3a3a3a,
5295 0x5c5c5c5c,
5296 0xffffffff,
5297 0xffff0000,
5298 };
5299
5300 /*
5301 * Our goal is try and verify that per-context state cannot be
5302 * tampered with by another non-privileged client.
5303 *
5304 * We take the list of context registers from the LRI in the default
5305 * context image and attempt to modify that list from a remote context.
5306 */
5307
5308 for_each_engine(engine, gt, id) {
5309 int err = 0;
5310 int i;
5311
5312 /* Just don't even ask */
5313 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
5314 skip_isolation(engine))
5315 continue;
5316
5317 intel_engine_pm_get(engine);
5318 if (engine->pinned_default_state) {
5319 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5320 err = __lrc_isolation(engine, poison[i]);
5321 if (err)
5322 break;
5323
5324 err = __lrc_isolation(engine, ~poison[i]);
5325 if (err)
5326 break;
5327 }
5328 }
5329 intel_engine_pm_put(engine);
5330 if (igt_flush_test(gt->i915))
5331 err = -EIO;
5332 if (err)
5333 return err;
5334 }
5335
5336 return 0;
5337 }
5338
5339 static void garbage_reset(struct intel_engine_cs *engine,
5340 struct i915_request *rq)
5341 {
5342 const unsigned int bit = I915_RESET_ENGINE + engine->id;
5343 unsigned long *lock = &engine->gt->reset.flags;
5344
5345 if (test_and_set_bit(bit, lock))
5346 return;
5347
5348 tasklet_disable(&engine->execlists.tasklet);
5349
5350 if (!rq->fence.error)
5351 intel_engine_reset(engine, NULL);
5352
5353 tasklet_enable(&engine->execlists.tasklet);
5354 clear_and_wake_up_bit(bit, lock);
5355 }
5356
5357 static struct i915_request *garbage(struct intel_context *ce,
5358 struct rnd_state *prng)
5359 {
5360 struct i915_request *rq;
5361 int err;
5362
5363 err = intel_context_pin(ce);
5364 if (err)
5365 return ERR_PTR(err);
5366
5367 prandom_bytes_state(prng,
5368 ce->lrc_reg_state,
5369 ce->engine->context_size -
5370 LRC_STATE_PN * PAGE_SIZE);
5371
5372 rq = intel_context_create_request(ce);
5373 if (IS_ERR(rq)) {
5374 err = PTR_ERR(rq);
5375 goto err_unpin;
5376 }
5377
5378 i915_request_get(rq);
5379 i915_request_add(rq);
5380 return rq;
5381
5382 err_unpin:
5383 intel_context_unpin(ce);
5384 return ERR_PTR(err);
5385 }
5386
5387 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
5388 {
5389 struct intel_context *ce;
5390 struct i915_request *hang;
5391 int err = 0;
5392
5393 ce = intel_context_create(engine);
5394 if (IS_ERR(ce))
5395 return PTR_ERR(ce);
5396
5397 hang = garbage(ce, prng);
5398 if (IS_ERR(hang)) {
5399 err = PTR_ERR(hang);
5400 goto err_ce;
5401 }
5402
5403 if (wait_for_submit(engine, hang, HZ / 2)) {
5404 i915_request_put(hang);
5405 err = -ETIME;
5406 goto err_ce;
5407 }
5408
5409 intel_context_set_banned(ce);
5410 garbage_reset(engine, hang);
5411
5412 intel_engine_flush_submission(engine);
5413 if (!hang->fence.error) {
5414 i915_request_put(hang);
5415 pr_err("%s: corrupted context was not reset\n",
5416 engine->name);
5417 err = -EINVAL;
5418 goto err_ce;
5419 }
5420
5421 if (i915_request_wait(hang, 0, HZ / 2) < 0) {
5422 pr_err("%s: corrupted context did not recover\n",
5423 engine->name);
5424 i915_request_put(hang);
5425 err = -EIO;
5426 goto err_ce;
5427 }
5428 i915_request_put(hang);
5429
5430 err_ce:
5431 intel_context_put(ce);
5432 return err;
5433 }
5434
5435 static int live_lrc_garbage(void *arg)
5436 {
5437 struct intel_gt *gt = arg;
5438 struct intel_engine_cs *engine;
5439 enum intel_engine_id id;
5440
5441 /*
5442 * Verify that we can recover if one context state is completely
5443 * corrupted.
5444 */
5445
5446 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
5447 return 0;
5448
5449 for_each_engine(engine, gt, id) {
5450 I915_RND_STATE(prng);
5451 int err = 0, i;
5452
5453 if (!intel_has_reset_engine(engine->gt))
5454 continue;
5455
5456 intel_engine_pm_get(engine);
5457 for (i = 0; i < 3; i++) {
5458 err = __lrc_garbage(engine, &prng);
5459 if (err)
5460 break;
5461 }
5462 intel_engine_pm_put(engine);
5463
5464 if (igt_flush_test(gt->i915))
5465 err = -EIO;
5466 if (err)
5467 return err;
5468 }
5469
5470 return 0;
5471 }
5472
5473 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
5474 {
5475 struct intel_context *ce;
5476 struct i915_request *rq;
5477 IGT_TIMEOUT(end_time);
5478 int err;
5479
5480 ce = intel_context_create(engine);
5481 if (IS_ERR(ce))
5482 return PTR_ERR(ce);
5483
5484 ce->runtime.num_underflow = 0;
5485 ce->runtime.max_underflow = 0;
5486
5487 do {
5488 unsigned int loop = 1024;
5489
5490 while (loop) {
5491 rq = intel_context_create_request(ce);
5492 if (IS_ERR(rq)) {
5493 err = PTR_ERR(rq);
5494 goto err_rq;
5495 }
5496
5497 if (--loop == 0)
5498 i915_request_get(rq);
5499
5500 i915_request_add(rq);
5501 }
5502
5503 if (__igt_timeout(end_time, NULL))
5504 break;
5505
5506 i915_request_put(rq);
5507 } while (1);
5508
5509 err = i915_request_wait(rq, 0, HZ / 5);
5510 if (err < 0) {
5511 pr_err("%s: request not completed!\n", engine->name);
5512 goto err_wait;
5513 }
5514
5515 igt_flush_test(engine->i915);
5516
5517 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
5518 engine->name,
5519 intel_context_get_total_runtime_ns(ce),
5520 intel_context_get_avg_runtime_ns(ce));
5521
5522 err = 0;
5523 if (ce->runtime.num_underflow) {
5524 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
5525 engine->name,
5526 ce->runtime.num_underflow,
5527 ce->runtime.max_underflow);
5528 GEM_TRACE_DUMP();
5529 err = -EOVERFLOW;
5530 }
5531
5532 err_wait:
5533 i915_request_put(rq);
5534 err_rq:
5535 intel_context_put(ce);
5536 return err;
5537 }
5538
5539 static int live_pphwsp_runtime(void *arg)
5540 {
5541 struct intel_gt *gt = arg;
5542 struct intel_engine_cs *engine;
5543 enum intel_engine_id id;
5544 int err = 0;
5545
5546 /*
5547 * Check that cumulative context runtime as stored in the pphwsp[16]
5548 * is monotonic.
5549 */
5550
5551 for_each_engine(engine, gt, id) {
5552 err = __live_pphwsp_runtime(engine);
5553 if (err)
5554 break;
5555 }
5556
5557 if (igt_flush_test(gt->i915))
5558 err = -EIO;
5559
5560 return err;
5561 }
5562
5563 int intel_lrc_live_selftests(struct drm_i915_private *i915)
5564 {
5565 static const struct i915_subtest tests[] = {
5566 SUBTEST(live_lrc_layout),
5567 SUBTEST(live_lrc_fixed),
5568 SUBTEST(live_lrc_state),
5569 SUBTEST(live_lrc_gpr),
5570 SUBTEST(live_lrc_isolation),
5571 SUBTEST(live_lrc_timestamp),
5572 SUBTEST(live_lrc_garbage),
5573 SUBTEST(live_pphwsp_runtime),
5574 };
5575
5576 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
5577 return 0;
5578
5579 return intel_gt_live_subtests(tests, &i915->gt);
5580 }