]> git.ipfire.org Git - thirdparty/linux.git/blob - drivers/gpu/drm/i915/gt/selftest_lrc.c
io_uring: reset -EBUSY error when io sq thread is waken up
[thirdparty/linux.git] / drivers / gpu / drm / i915 / gt / selftest_lrc.c
1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2018 Intel Corporation
5 */
6
7 #include <linux/prime_numbers.h>
8
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
25
26 static struct i915_vma *create_scratch(struct intel_gt *gt)
27 {
28 struct drm_i915_gem_object *obj;
29 struct i915_vma *vma;
30 int err;
31
32 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
33 if (IS_ERR(obj))
34 return ERR_CAST(obj);
35
36 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
37
38 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
39 if (IS_ERR(vma)) {
40 i915_gem_object_put(obj);
41 return vma;
42 }
43
44 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
45 if (err) {
46 i915_gem_object_put(obj);
47 return ERR_PTR(err);
48 }
49
50 return vma;
51 }
52
53 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
54 unsigned long *saved)
55 {
56 *saved = engine->props.heartbeat_interval_ms;
57 engine->props.heartbeat_interval_ms = 0;
58
59 intel_engine_pm_get(engine);
60 intel_engine_park_heartbeat(engine);
61 }
62
63 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
64 unsigned long saved)
65 {
66 intel_engine_pm_put(engine);
67
68 engine->props.heartbeat_interval_ms = saved;
69 }
70
71 static int wait_for_submit(struct intel_engine_cs *engine,
72 struct i915_request *rq,
73 unsigned long timeout)
74 {
75 timeout += jiffies;
76 do {
77 cond_resched();
78 intel_engine_flush_submission(engine);
79
80 if (READ_ONCE(engine->execlists.pending[0]))
81 continue;
82
83 if (i915_request_is_active(rq))
84 return 0;
85
86 if (i915_request_started(rq)) /* that was quick! */
87 return 0;
88 } while (time_before(jiffies, timeout));
89
90 return -ETIME;
91 }
92
93 static int wait_for_reset(struct intel_engine_cs *engine,
94 struct i915_request *rq,
95 unsigned long timeout)
96 {
97 timeout += jiffies;
98
99 do {
100 cond_resched();
101 intel_engine_flush_submission(engine);
102
103 if (READ_ONCE(engine->execlists.pending[0]))
104 continue;
105
106 if (i915_request_completed(rq))
107 break;
108
109 if (READ_ONCE(rq->fence.error))
110 break;
111 } while (time_before(jiffies, timeout));
112
113 flush_scheduled_work();
114
115 if (rq->fence.error != -EIO) {
116 pr_err("%s: hanging request %llx:%lld not reset\n",
117 engine->name,
118 rq->fence.context,
119 rq->fence.seqno);
120 return -EINVAL;
121 }
122
123 /* Give the request a jiffie to complete after flushing the worker */
124 if (i915_request_wait(rq, 0,
125 max(0l, (long)(timeout - jiffies)) + 1) < 0) {
126 pr_err("%s: hanging request %llx:%lld did not complete\n",
127 engine->name,
128 rq->fence.context,
129 rq->fence.seqno);
130 return -ETIME;
131 }
132
133 return 0;
134 }
135
136 static int live_sanitycheck(void *arg)
137 {
138 struct intel_gt *gt = arg;
139 struct intel_engine_cs *engine;
140 enum intel_engine_id id;
141 struct igt_spinner spin;
142 int err = 0;
143
144 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
145 return 0;
146
147 if (igt_spinner_init(&spin, gt))
148 return -ENOMEM;
149
150 for_each_engine(engine, gt, id) {
151 struct intel_context *ce;
152 struct i915_request *rq;
153
154 ce = intel_context_create(engine);
155 if (IS_ERR(ce)) {
156 err = PTR_ERR(ce);
157 break;
158 }
159
160 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
161 if (IS_ERR(rq)) {
162 err = PTR_ERR(rq);
163 goto out_ctx;
164 }
165
166 i915_request_add(rq);
167 if (!igt_wait_for_spinner(&spin, rq)) {
168 GEM_TRACE("spinner failed to start\n");
169 GEM_TRACE_DUMP();
170 intel_gt_set_wedged(gt);
171 err = -EIO;
172 goto out_ctx;
173 }
174
175 igt_spinner_end(&spin);
176 if (igt_flush_test(gt->i915)) {
177 err = -EIO;
178 goto out_ctx;
179 }
180
181 out_ctx:
182 intel_context_put(ce);
183 if (err)
184 break;
185 }
186
187 igt_spinner_fini(&spin);
188 return err;
189 }
190
191 static int live_unlite_restore(struct intel_gt *gt, int prio)
192 {
193 struct intel_engine_cs *engine;
194 enum intel_engine_id id;
195 struct igt_spinner spin;
196 int err = -ENOMEM;
197
198 /*
199 * Check that we can correctly context switch between 2 instances
200 * on the same engine from the same parent context.
201 */
202
203 if (igt_spinner_init(&spin, gt))
204 return err;
205
206 err = 0;
207 for_each_engine(engine, gt, id) {
208 struct intel_context *ce[2] = {};
209 struct i915_request *rq[2];
210 struct igt_live_test t;
211 unsigned long saved;
212 int n;
213
214 if (prio && !intel_engine_has_preemption(engine))
215 continue;
216
217 if (!intel_engine_can_store_dword(engine))
218 continue;
219
220 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
221 err = -EIO;
222 break;
223 }
224 engine_heartbeat_disable(engine, &saved);
225
226 for (n = 0; n < ARRAY_SIZE(ce); n++) {
227 struct intel_context *tmp;
228
229 tmp = intel_context_create(engine);
230 if (IS_ERR(tmp)) {
231 err = PTR_ERR(tmp);
232 goto err_ce;
233 }
234
235 err = intel_context_pin(tmp);
236 if (err) {
237 intel_context_put(tmp);
238 goto err_ce;
239 }
240
241 /*
242 * Setup the pair of contexts such that if we
243 * lite-restore using the RING_TAIL from ce[1] it
244 * will execute garbage from ce[0]->ring.
245 */
246 memset(tmp->ring->vaddr,
247 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
248 tmp->ring->vma->size);
249
250 ce[n] = tmp;
251 }
252 GEM_BUG_ON(!ce[1]->ring->size);
253 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
254 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
255
256 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
257 if (IS_ERR(rq[0])) {
258 err = PTR_ERR(rq[0]);
259 goto err_ce;
260 }
261
262 i915_request_get(rq[0]);
263 i915_request_add(rq[0]);
264 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
265
266 if (!igt_wait_for_spinner(&spin, rq[0])) {
267 i915_request_put(rq[0]);
268 goto err_ce;
269 }
270
271 rq[1] = i915_request_create(ce[1]);
272 if (IS_ERR(rq[1])) {
273 err = PTR_ERR(rq[1]);
274 i915_request_put(rq[0]);
275 goto err_ce;
276 }
277
278 if (!prio) {
279 /*
280 * Ensure we do the switch to ce[1] on completion.
281 *
282 * rq[0] is already submitted, so this should reduce
283 * to a no-op (a wait on a request on the same engine
284 * uses the submit fence, not the completion fence),
285 * but it will install a dependency on rq[1] for rq[0]
286 * that will prevent the pair being reordered by
287 * timeslicing.
288 */
289 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
290 }
291
292 i915_request_get(rq[1]);
293 i915_request_add(rq[1]);
294 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
295 i915_request_put(rq[0]);
296
297 if (prio) {
298 struct i915_sched_attr attr = {
299 .priority = prio,
300 };
301
302 /* Alternatively preempt the spinner with ce[1] */
303 engine->schedule(rq[1], &attr);
304 }
305
306 /* And switch back to ce[0] for good measure */
307 rq[0] = i915_request_create(ce[0]);
308 if (IS_ERR(rq[0])) {
309 err = PTR_ERR(rq[0]);
310 i915_request_put(rq[1]);
311 goto err_ce;
312 }
313
314 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
315 i915_request_get(rq[0]);
316 i915_request_add(rq[0]);
317 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
318 i915_request_put(rq[1]);
319 i915_request_put(rq[0]);
320
321 err_ce:
322 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
323 igt_spinner_end(&spin);
324 for (n = 0; n < ARRAY_SIZE(ce); n++) {
325 if (IS_ERR_OR_NULL(ce[n]))
326 break;
327
328 intel_context_unpin(ce[n]);
329 intel_context_put(ce[n]);
330 }
331
332 engine_heartbeat_enable(engine, saved);
333 if (igt_live_test_end(&t))
334 err = -EIO;
335 if (err)
336 break;
337 }
338
339 igt_spinner_fini(&spin);
340 return err;
341 }
342
343 static int live_unlite_switch(void *arg)
344 {
345 return live_unlite_restore(arg, 0);
346 }
347
348 static int live_unlite_preempt(void *arg)
349 {
350 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
351 }
352
353 static int live_pin_rewind(void *arg)
354 {
355 struct intel_gt *gt = arg;
356 struct intel_engine_cs *engine;
357 enum intel_engine_id id;
358 int err = 0;
359
360 /*
361 * We have to be careful not to trust intel_ring too much, for example
362 * ring->head is updated upon retire which is out of sync with pinning
363 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
364 * or else we risk writing an older, stale value.
365 *
366 * To simulate this, let's apply a bit of deliberate sabotague.
367 */
368
369 for_each_engine(engine, gt, id) {
370 struct intel_context *ce;
371 struct i915_request *rq;
372 struct intel_ring *ring;
373 struct igt_live_test t;
374
375 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
376 err = -EIO;
377 break;
378 }
379
380 ce = intel_context_create(engine);
381 if (IS_ERR(ce)) {
382 err = PTR_ERR(ce);
383 break;
384 }
385
386 err = intel_context_pin(ce);
387 if (err) {
388 intel_context_put(ce);
389 break;
390 }
391
392 /* Keep the context awake while we play games */
393 err = i915_active_acquire(&ce->active);
394 if (err) {
395 intel_context_unpin(ce);
396 intel_context_put(ce);
397 break;
398 }
399 ring = ce->ring;
400
401 /* Poison the ring, and offset the next request from HEAD */
402 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
403 ring->emit = ring->size / 2;
404 ring->tail = ring->emit;
405 GEM_BUG_ON(ring->head);
406
407 intel_context_unpin(ce);
408
409 /* Submit a simple nop request */
410 GEM_BUG_ON(intel_context_is_pinned(ce));
411 rq = intel_context_create_request(ce);
412 i915_active_release(&ce->active); /* e.g. async retire */
413 intel_context_put(ce);
414 if (IS_ERR(rq)) {
415 err = PTR_ERR(rq);
416 break;
417 }
418 GEM_BUG_ON(!rq->head);
419 i915_request_add(rq);
420
421 /* Expect not to hang! */
422 if (igt_live_test_end(&t)) {
423 err = -EIO;
424 break;
425 }
426 }
427
428 return err;
429 }
430
431 static int live_hold_reset(void *arg)
432 {
433 struct intel_gt *gt = arg;
434 struct intel_engine_cs *engine;
435 enum intel_engine_id id;
436 struct igt_spinner spin;
437 int err = 0;
438
439 /*
440 * In order to support offline error capture for fast preempt reset,
441 * we need to decouple the guilty request and ensure that it and its
442 * descendents are not executed while the capture is in progress.
443 */
444
445 if (!intel_has_reset_engine(gt))
446 return 0;
447
448 if (igt_spinner_init(&spin, gt))
449 return -ENOMEM;
450
451 for_each_engine(engine, gt, id) {
452 struct intel_context *ce;
453 unsigned long heartbeat;
454 struct i915_request *rq;
455
456 ce = intel_context_create(engine);
457 if (IS_ERR(ce)) {
458 err = PTR_ERR(ce);
459 break;
460 }
461
462 engine_heartbeat_disable(engine, &heartbeat);
463
464 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
465 if (IS_ERR(rq)) {
466 err = PTR_ERR(rq);
467 goto out;
468 }
469 i915_request_add(rq);
470
471 if (!igt_wait_for_spinner(&spin, rq)) {
472 intel_gt_set_wedged(gt);
473 err = -ETIME;
474 goto out;
475 }
476
477 /* We have our request executing, now remove it and reset */
478
479 if (test_and_set_bit(I915_RESET_ENGINE + id,
480 &gt->reset.flags)) {
481 intel_gt_set_wedged(gt);
482 err = -EBUSY;
483 goto out;
484 }
485 tasklet_disable(&engine->execlists.tasklet);
486
487 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
488 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
489
490 i915_request_get(rq);
491 execlists_hold(engine, rq);
492 GEM_BUG_ON(!i915_request_on_hold(rq));
493
494 intel_engine_reset(engine, NULL);
495 GEM_BUG_ON(rq->fence.error != -EIO);
496
497 tasklet_enable(&engine->execlists.tasklet);
498 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
499 &gt->reset.flags);
500
501 /* Check that we do not resubmit the held request */
502 if (!i915_request_wait(rq, 0, HZ / 5)) {
503 pr_err("%s: on hold request completed!\n",
504 engine->name);
505 i915_request_put(rq);
506 err = -EIO;
507 goto out;
508 }
509 GEM_BUG_ON(!i915_request_on_hold(rq));
510
511 /* But is resubmitted on release */
512 execlists_unhold(engine, rq);
513 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
514 pr_err("%s: held request did not complete!\n",
515 engine->name);
516 intel_gt_set_wedged(gt);
517 err = -ETIME;
518 }
519 i915_request_put(rq);
520
521 out:
522 engine_heartbeat_enable(engine, heartbeat);
523 intel_context_put(ce);
524 if (err)
525 break;
526 }
527
528 igt_spinner_fini(&spin);
529 return err;
530 }
531
532 static const char *error_repr(int err)
533 {
534 return err ? "bad" : "good";
535 }
536
537 static int live_error_interrupt(void *arg)
538 {
539 static const struct error_phase {
540 enum { GOOD = 0, BAD = -EIO } error[2];
541 } phases[] = {
542 { { BAD, GOOD } },
543 { { BAD, BAD } },
544 { { BAD, GOOD } },
545 { { GOOD, GOOD } }, /* sentinel */
546 };
547 struct intel_gt *gt = arg;
548 struct intel_engine_cs *engine;
549 enum intel_engine_id id;
550
551 /*
552 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
553 * of invalid commands in user batches that will cause a GPU hang.
554 * This is a faster mechanism than using hangcheck/heartbeats, but
555 * only detects problems the HW knows about -- it will not warn when
556 * we kill the HW!
557 *
558 * To verify our detection and reset, we throw some invalid commands
559 * at the HW and wait for the interrupt.
560 */
561
562 if (!intel_has_reset_engine(gt))
563 return 0;
564
565 for_each_engine(engine, gt, id) {
566 const struct error_phase *p;
567 unsigned long heartbeat;
568 int err = 0;
569
570 engine_heartbeat_disable(engine, &heartbeat);
571
572 for (p = phases; p->error[0] != GOOD; p++) {
573 struct i915_request *client[ARRAY_SIZE(phases->error)];
574 u32 *cs;
575 int i;
576
577 memset(client, 0, sizeof(*client));
578 for (i = 0; i < ARRAY_SIZE(client); i++) {
579 struct intel_context *ce;
580 struct i915_request *rq;
581
582 ce = intel_context_create(engine);
583 if (IS_ERR(ce)) {
584 err = PTR_ERR(ce);
585 goto out;
586 }
587
588 rq = intel_context_create_request(ce);
589 intel_context_put(ce);
590 if (IS_ERR(rq)) {
591 err = PTR_ERR(rq);
592 goto out;
593 }
594
595 if (rq->engine->emit_init_breadcrumb) {
596 err = rq->engine->emit_init_breadcrumb(rq);
597 if (err) {
598 i915_request_add(rq);
599 goto out;
600 }
601 }
602
603 cs = intel_ring_begin(rq, 2);
604 if (IS_ERR(cs)) {
605 i915_request_add(rq);
606 err = PTR_ERR(cs);
607 goto out;
608 }
609
610 if (p->error[i]) {
611 *cs++ = 0xdeadbeef;
612 *cs++ = 0xdeadbeef;
613 } else {
614 *cs++ = MI_NOOP;
615 *cs++ = MI_NOOP;
616 }
617
618 client[i] = i915_request_get(rq);
619 i915_request_add(rq);
620 }
621
622 err = wait_for_submit(engine, client[0], HZ / 2);
623 if (err) {
624 pr_err("%s: first request did not start within time!\n",
625 engine->name);
626 err = -ETIME;
627 goto out;
628 }
629
630 for (i = 0; i < ARRAY_SIZE(client); i++) {
631 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
632 pr_debug("%s: %s request incomplete!\n",
633 engine->name,
634 error_repr(p->error[i]));
635
636 if (!i915_request_started(client[i])) {
637 pr_debug("%s: %s request not stated!\n",
638 engine->name,
639 error_repr(p->error[i]));
640 err = -ETIME;
641 goto out;
642 }
643
644 /* Kick the tasklet to process the error */
645 intel_engine_flush_submission(engine);
646 if (client[i]->fence.error != p->error[i]) {
647 pr_err("%s: %s request completed with wrong error code: %d\n",
648 engine->name,
649 error_repr(p->error[i]),
650 client[i]->fence.error);
651 err = -EINVAL;
652 goto out;
653 }
654 }
655
656 out:
657 for (i = 0; i < ARRAY_SIZE(client); i++)
658 if (client[i])
659 i915_request_put(client[i]);
660 if (err) {
661 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
662 engine->name, p - phases,
663 p->error[0], p->error[1]);
664 break;
665 }
666 }
667
668 engine_heartbeat_enable(engine, heartbeat);
669 if (err) {
670 intel_gt_set_wedged(gt);
671 return err;
672 }
673 }
674
675 return 0;
676 }
677
678 static int
679 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
680 {
681 u32 *cs;
682
683 cs = intel_ring_begin(rq, 10);
684 if (IS_ERR(cs))
685 return PTR_ERR(cs);
686
687 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
688
689 *cs++ = MI_SEMAPHORE_WAIT |
690 MI_SEMAPHORE_GLOBAL_GTT |
691 MI_SEMAPHORE_POLL |
692 MI_SEMAPHORE_SAD_NEQ_SDD;
693 *cs++ = 0;
694 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
695 *cs++ = 0;
696
697 if (idx > 0) {
698 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
699 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
700 *cs++ = 0;
701 *cs++ = 1;
702 } else {
703 *cs++ = MI_NOOP;
704 *cs++ = MI_NOOP;
705 *cs++ = MI_NOOP;
706 *cs++ = MI_NOOP;
707 }
708
709 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
710
711 intel_ring_advance(rq, cs);
712 return 0;
713 }
714
715 static struct i915_request *
716 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
717 {
718 struct intel_context *ce;
719 struct i915_request *rq;
720 int err;
721
722 ce = intel_context_create(engine);
723 if (IS_ERR(ce))
724 return ERR_CAST(ce);
725
726 rq = intel_context_create_request(ce);
727 if (IS_ERR(rq))
728 goto out_ce;
729
730 err = 0;
731 if (rq->engine->emit_init_breadcrumb)
732 err = rq->engine->emit_init_breadcrumb(rq);
733 if (err == 0)
734 err = emit_semaphore_chain(rq, vma, idx);
735 if (err == 0)
736 i915_request_get(rq);
737 i915_request_add(rq);
738 if (err)
739 rq = ERR_PTR(err);
740
741 out_ce:
742 intel_context_put(ce);
743 return rq;
744 }
745
746 static int
747 release_queue(struct intel_engine_cs *engine,
748 struct i915_vma *vma,
749 int idx, int prio)
750 {
751 struct i915_sched_attr attr = {
752 .priority = prio,
753 };
754 struct i915_request *rq;
755 u32 *cs;
756
757 rq = intel_engine_create_kernel_request(engine);
758 if (IS_ERR(rq))
759 return PTR_ERR(rq);
760
761 cs = intel_ring_begin(rq, 4);
762 if (IS_ERR(cs)) {
763 i915_request_add(rq);
764 return PTR_ERR(cs);
765 }
766
767 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
768 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
769 *cs++ = 0;
770 *cs++ = 1;
771
772 intel_ring_advance(rq, cs);
773
774 i915_request_get(rq);
775 i915_request_add(rq);
776
777 local_bh_disable();
778 engine->schedule(rq, &attr);
779 local_bh_enable(); /* kick tasklet */
780
781 i915_request_put(rq);
782
783 return 0;
784 }
785
786 static int
787 slice_semaphore_queue(struct intel_engine_cs *outer,
788 struct i915_vma *vma,
789 int count)
790 {
791 struct intel_engine_cs *engine;
792 struct i915_request *head;
793 enum intel_engine_id id;
794 int err, i, n = 0;
795
796 head = semaphore_queue(outer, vma, n++);
797 if (IS_ERR(head))
798 return PTR_ERR(head);
799
800 for_each_engine(engine, outer->gt, id) {
801 for (i = 0; i < count; i++) {
802 struct i915_request *rq;
803
804 rq = semaphore_queue(engine, vma, n++);
805 if (IS_ERR(rq)) {
806 err = PTR_ERR(rq);
807 goto out;
808 }
809
810 i915_request_put(rq);
811 }
812 }
813
814 err = release_queue(outer, vma, n, INT_MAX);
815 if (err)
816 goto out;
817
818 if (i915_request_wait(head, 0,
819 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
820 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
821 count, n);
822 GEM_TRACE_DUMP();
823 intel_gt_set_wedged(outer->gt);
824 err = -EIO;
825 }
826
827 out:
828 i915_request_put(head);
829 return err;
830 }
831
832 static int live_timeslice_preempt(void *arg)
833 {
834 struct intel_gt *gt = arg;
835 struct drm_i915_gem_object *obj;
836 struct i915_vma *vma;
837 void *vaddr;
838 int err = 0;
839 int count;
840
841 /*
842 * If a request takes too long, we would like to give other users
843 * a fair go on the GPU. In particular, users may create batches
844 * that wait upon external input, where that input may even be
845 * supplied by another GPU job. To avoid blocking forever, we
846 * need to preempt the current task and replace it with another
847 * ready task.
848 */
849 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
850 return 0;
851
852 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
853 if (IS_ERR(obj))
854 return PTR_ERR(obj);
855
856 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
857 if (IS_ERR(vma)) {
858 err = PTR_ERR(vma);
859 goto err_obj;
860 }
861
862 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
863 if (IS_ERR(vaddr)) {
864 err = PTR_ERR(vaddr);
865 goto err_obj;
866 }
867
868 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
869 if (err)
870 goto err_map;
871
872 err = i915_vma_sync(vma);
873 if (err)
874 goto err_pin;
875
876 for_each_prime_number_from(count, 1, 16) {
877 struct intel_engine_cs *engine;
878 enum intel_engine_id id;
879
880 for_each_engine(engine, gt, id) {
881 unsigned long saved;
882
883 if (!intel_engine_has_preemption(engine))
884 continue;
885
886 memset(vaddr, 0, PAGE_SIZE);
887
888 engine_heartbeat_disable(engine, &saved);
889 err = slice_semaphore_queue(engine, vma, count);
890 engine_heartbeat_enable(engine, saved);
891 if (err)
892 goto err_pin;
893
894 if (igt_flush_test(gt->i915)) {
895 err = -EIO;
896 goto err_pin;
897 }
898 }
899 }
900
901 err_pin:
902 i915_vma_unpin(vma);
903 err_map:
904 i915_gem_object_unpin_map(obj);
905 err_obj:
906 i915_gem_object_put(obj);
907 return err;
908 }
909
910 static struct i915_request *
911 create_rewinder(struct intel_context *ce,
912 struct i915_request *wait,
913 void *slot, int idx)
914 {
915 const u32 offset =
916 i915_ggtt_offset(ce->engine->status_page.vma) +
917 offset_in_page(slot);
918 struct i915_request *rq;
919 u32 *cs;
920 int err;
921
922 rq = intel_context_create_request(ce);
923 if (IS_ERR(rq))
924 return rq;
925
926 if (wait) {
927 err = i915_request_await_dma_fence(rq, &wait->fence);
928 if (err)
929 goto err;
930 }
931
932 cs = intel_ring_begin(rq, 10);
933 if (IS_ERR(cs)) {
934 err = PTR_ERR(cs);
935 goto err;
936 }
937
938 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
939 *cs++ = MI_NOOP;
940
941 *cs++ = MI_SEMAPHORE_WAIT |
942 MI_SEMAPHORE_GLOBAL_GTT |
943 MI_SEMAPHORE_POLL |
944 MI_SEMAPHORE_SAD_NEQ_SDD;
945 *cs++ = 0;
946 *cs++ = offset;
947 *cs++ = 0;
948
949 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
950 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
951 *cs++ = offset + idx * sizeof(u32);
952 *cs++ = 0;
953
954 intel_ring_advance(rq, cs);
955
956 rq->sched.attr.priority = I915_PRIORITY_MASK;
957 err = 0;
958 err:
959 i915_request_get(rq);
960 i915_request_add(rq);
961 if (err) {
962 i915_request_put(rq);
963 return ERR_PTR(err);
964 }
965
966 return rq;
967 }
968
969 static int live_timeslice_rewind(void *arg)
970 {
971 struct intel_gt *gt = arg;
972 struct intel_engine_cs *engine;
973 enum intel_engine_id id;
974
975 /*
976 * The usual presumption on timeslice expiration is that we replace
977 * the active context with another. However, given a chain of
978 * dependencies we may end up with replacing the context with itself,
979 * but only a few of those requests, forcing us to rewind the
980 * RING_TAIL of the original request.
981 */
982 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
983 return 0;
984
985 for_each_engine(engine, gt, id) {
986 enum { A1, A2, B1 };
987 enum { X = 1, Y, Z };
988 struct i915_request *rq[3] = {};
989 struct intel_context *ce;
990 unsigned long heartbeat;
991 unsigned long timeslice;
992 int i, err = 0;
993 u32 *slot;
994
995 if (!intel_engine_has_timeslices(engine))
996 continue;
997
998 /*
999 * A:rq1 -- semaphore wait, timestamp X
1000 * A:rq2 -- write timestamp Y
1001 *
1002 * B:rq1 [await A:rq1] -- write timestamp Z
1003 *
1004 * Force timeslice, release semaphore.
1005 *
1006 * Expect execution/evaluation order XZY
1007 */
1008
1009 engine_heartbeat_disable(engine, &heartbeat);
1010 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1011
1012 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1013
1014 ce = intel_context_create(engine);
1015 if (IS_ERR(ce)) {
1016 err = PTR_ERR(ce);
1017 goto err;
1018 }
1019
1020 rq[0] = create_rewinder(ce, NULL, slot, 1);
1021 if (IS_ERR(rq[0])) {
1022 intel_context_put(ce);
1023 goto err;
1024 }
1025
1026 rq[1] = create_rewinder(ce, NULL, slot, 2);
1027 intel_context_put(ce);
1028 if (IS_ERR(rq[1]))
1029 goto err;
1030
1031 err = wait_for_submit(engine, rq[1], HZ / 2);
1032 if (err) {
1033 pr_err("%s: failed to submit first context\n",
1034 engine->name);
1035 goto err;
1036 }
1037
1038 ce = intel_context_create(engine);
1039 if (IS_ERR(ce)) {
1040 err = PTR_ERR(ce);
1041 goto err;
1042 }
1043
1044 rq[2] = create_rewinder(ce, rq[0], slot, 3);
1045 intel_context_put(ce);
1046 if (IS_ERR(rq[2]))
1047 goto err;
1048
1049 err = wait_for_submit(engine, rq[2], HZ / 2);
1050 if (err) {
1051 pr_err("%s: failed to submit second context\n",
1052 engine->name);
1053 goto err;
1054 }
1055 GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
1056
1057 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1058 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1059 GEM_BUG_ON(!i915_request_is_active(rq[A2]));
1060 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1061
1062 /* Wait for the timeslice to kick in */
1063 del_timer(&engine->execlists.timer);
1064 tasklet_hi_schedule(&engine->execlists.tasklet);
1065 intel_engine_flush_submission(engine);
1066
1067 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1068 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1069 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1070 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1071
1072 /* Release the hounds! */
1073 slot[0] = 1;
1074 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1075
1076 for (i = 1; i <= 3; i++) {
1077 unsigned long timeout = jiffies + HZ / 2;
1078
1079 while (!READ_ONCE(slot[i]) &&
1080 time_before(jiffies, timeout))
1081 ;
1082
1083 if (!time_before(jiffies, timeout)) {
1084 pr_err("%s: rq[%d] timed out\n",
1085 engine->name, i - 1);
1086 err = -ETIME;
1087 goto err;
1088 }
1089
1090 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1091 }
1092
1093 /* XZY: XZ < XY */
1094 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1095 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1096 engine->name,
1097 slot[Z] - slot[X],
1098 slot[Y] - slot[X]);
1099 err = -EINVAL;
1100 }
1101
1102 err:
1103 memset32(&slot[0], -1, 4);
1104 wmb();
1105
1106 engine->props.timeslice_duration_ms = timeslice;
1107 engine_heartbeat_enable(engine, heartbeat);
1108 for (i = 0; i < 3; i++)
1109 i915_request_put(rq[i]);
1110 if (igt_flush_test(gt->i915))
1111 err = -EIO;
1112 if (err)
1113 return err;
1114 }
1115
1116 return 0;
1117 }
1118
1119 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1120 {
1121 struct i915_request *rq;
1122
1123 rq = intel_engine_create_kernel_request(engine);
1124 if (IS_ERR(rq))
1125 return rq;
1126
1127 i915_request_get(rq);
1128 i915_request_add(rq);
1129
1130 return rq;
1131 }
1132
1133 static long timeslice_threshold(const struct intel_engine_cs *engine)
1134 {
1135 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
1136 }
1137
1138 static int live_timeslice_queue(void *arg)
1139 {
1140 struct intel_gt *gt = arg;
1141 struct drm_i915_gem_object *obj;
1142 struct intel_engine_cs *engine;
1143 enum intel_engine_id id;
1144 struct i915_vma *vma;
1145 void *vaddr;
1146 int err = 0;
1147
1148 /*
1149 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1150 * timeslicing between them disabled, we *do* enable timeslicing
1151 * if the queue demands it. (Normally, we do not submit if
1152 * ELSP[1] is already occupied, so must rely on timeslicing to
1153 * eject ELSP[0] in favour of the queue.)
1154 */
1155 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1156 return 0;
1157
1158 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1159 if (IS_ERR(obj))
1160 return PTR_ERR(obj);
1161
1162 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1163 if (IS_ERR(vma)) {
1164 err = PTR_ERR(vma);
1165 goto err_obj;
1166 }
1167
1168 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1169 if (IS_ERR(vaddr)) {
1170 err = PTR_ERR(vaddr);
1171 goto err_obj;
1172 }
1173
1174 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1175 if (err)
1176 goto err_map;
1177
1178 err = i915_vma_sync(vma);
1179 if (err)
1180 goto err_pin;
1181
1182 for_each_engine(engine, gt, id) {
1183 struct i915_sched_attr attr = {
1184 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1185 };
1186 struct i915_request *rq, *nop;
1187 unsigned long saved;
1188
1189 if (!intel_engine_has_preemption(engine))
1190 continue;
1191
1192 engine_heartbeat_disable(engine, &saved);
1193 memset(vaddr, 0, PAGE_SIZE);
1194
1195 /* ELSP[0]: semaphore wait */
1196 rq = semaphore_queue(engine, vma, 0);
1197 if (IS_ERR(rq)) {
1198 err = PTR_ERR(rq);
1199 goto err_heartbeat;
1200 }
1201 engine->schedule(rq, &attr);
1202 err = wait_for_submit(engine, rq, HZ / 2);
1203 if (err) {
1204 pr_err("%s: Timed out trying to submit semaphores\n",
1205 engine->name);
1206 goto err_rq;
1207 }
1208
1209 /* ELSP[1]: nop request */
1210 nop = nop_request(engine);
1211 if (IS_ERR(nop)) {
1212 err = PTR_ERR(nop);
1213 goto err_rq;
1214 }
1215 err = wait_for_submit(engine, nop, HZ / 2);
1216 i915_request_put(nop);
1217 if (err) {
1218 pr_err("%s: Timed out trying to submit nop\n",
1219 engine->name);
1220 goto err_rq;
1221 }
1222
1223 GEM_BUG_ON(i915_request_completed(rq));
1224 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1225
1226 /* Queue: semaphore signal, matching priority as semaphore */
1227 err = release_queue(engine, vma, 1, effective_prio(rq));
1228 if (err)
1229 goto err_rq;
1230
1231 intel_engine_flush_submission(engine);
1232 if (!READ_ONCE(engine->execlists.timer.expires) &&
1233 !i915_request_completed(rq)) {
1234 struct drm_printer p =
1235 drm_info_printer(gt->i915->drm.dev);
1236
1237 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1238 engine->name);
1239 intel_engine_dump(engine, &p,
1240 "%s\n", engine->name);
1241 GEM_TRACE_DUMP();
1242
1243 memset(vaddr, 0xff, PAGE_SIZE);
1244 err = -EINVAL;
1245 }
1246
1247 /* Timeslice every jiffy, so within 2 we should signal */
1248 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
1249 struct drm_printer p =
1250 drm_info_printer(gt->i915->drm.dev);
1251
1252 pr_err("%s: Failed to timeslice into queue\n",
1253 engine->name);
1254 intel_engine_dump(engine, &p,
1255 "%s\n", engine->name);
1256
1257 memset(vaddr, 0xff, PAGE_SIZE);
1258 err = -EIO;
1259 }
1260 err_rq:
1261 i915_request_put(rq);
1262 err_heartbeat:
1263 engine_heartbeat_enable(engine, saved);
1264 if (err)
1265 break;
1266 }
1267
1268 err_pin:
1269 i915_vma_unpin(vma);
1270 err_map:
1271 i915_gem_object_unpin_map(obj);
1272 err_obj:
1273 i915_gem_object_put(obj);
1274 return err;
1275 }
1276
1277 static int live_busywait_preempt(void *arg)
1278 {
1279 struct intel_gt *gt = arg;
1280 struct i915_gem_context *ctx_hi, *ctx_lo;
1281 struct intel_engine_cs *engine;
1282 struct drm_i915_gem_object *obj;
1283 struct i915_vma *vma;
1284 enum intel_engine_id id;
1285 int err = -ENOMEM;
1286 u32 *map;
1287
1288 /*
1289 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1290 * preempt the busywaits used to synchronise between rings.
1291 */
1292
1293 ctx_hi = kernel_context(gt->i915);
1294 if (!ctx_hi)
1295 return -ENOMEM;
1296 ctx_hi->sched.priority =
1297 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1298
1299 ctx_lo = kernel_context(gt->i915);
1300 if (!ctx_lo)
1301 goto err_ctx_hi;
1302 ctx_lo->sched.priority =
1303 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1304
1305 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1306 if (IS_ERR(obj)) {
1307 err = PTR_ERR(obj);
1308 goto err_ctx_lo;
1309 }
1310
1311 map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1312 if (IS_ERR(map)) {
1313 err = PTR_ERR(map);
1314 goto err_obj;
1315 }
1316
1317 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1318 if (IS_ERR(vma)) {
1319 err = PTR_ERR(vma);
1320 goto err_map;
1321 }
1322
1323 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1324 if (err)
1325 goto err_map;
1326
1327 err = i915_vma_sync(vma);
1328 if (err)
1329 goto err_vma;
1330
1331 for_each_engine(engine, gt, id) {
1332 struct i915_request *lo, *hi;
1333 struct igt_live_test t;
1334 u32 *cs;
1335
1336 if (!intel_engine_has_preemption(engine))
1337 continue;
1338
1339 if (!intel_engine_can_store_dword(engine))
1340 continue;
1341
1342 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1343 err = -EIO;
1344 goto err_vma;
1345 }
1346
1347 /*
1348 * We create two requests. The low priority request
1349 * busywaits on a semaphore (inside the ringbuffer where
1350 * is should be preemptible) and the high priority requests
1351 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1352 * allowing the first request to complete. If preemption
1353 * fails, we hang instead.
1354 */
1355
1356 lo = igt_request_alloc(ctx_lo, engine);
1357 if (IS_ERR(lo)) {
1358 err = PTR_ERR(lo);
1359 goto err_vma;
1360 }
1361
1362 cs = intel_ring_begin(lo, 8);
1363 if (IS_ERR(cs)) {
1364 err = PTR_ERR(cs);
1365 i915_request_add(lo);
1366 goto err_vma;
1367 }
1368
1369 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1370 *cs++ = i915_ggtt_offset(vma);
1371 *cs++ = 0;
1372 *cs++ = 1;
1373
1374 /* XXX Do we need a flush + invalidate here? */
1375
1376 *cs++ = MI_SEMAPHORE_WAIT |
1377 MI_SEMAPHORE_GLOBAL_GTT |
1378 MI_SEMAPHORE_POLL |
1379 MI_SEMAPHORE_SAD_EQ_SDD;
1380 *cs++ = 0;
1381 *cs++ = i915_ggtt_offset(vma);
1382 *cs++ = 0;
1383
1384 intel_ring_advance(lo, cs);
1385
1386 i915_request_get(lo);
1387 i915_request_add(lo);
1388
1389 if (wait_for(READ_ONCE(*map), 10)) {
1390 i915_request_put(lo);
1391 err = -ETIMEDOUT;
1392 goto err_vma;
1393 }
1394
1395 /* Low priority request should be busywaiting now */
1396 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1397 i915_request_put(lo);
1398 pr_err("%s: Busywaiting request did not!\n",
1399 engine->name);
1400 err = -EIO;
1401 goto err_vma;
1402 }
1403
1404 hi = igt_request_alloc(ctx_hi, engine);
1405 if (IS_ERR(hi)) {
1406 err = PTR_ERR(hi);
1407 i915_request_put(lo);
1408 goto err_vma;
1409 }
1410
1411 cs = intel_ring_begin(hi, 4);
1412 if (IS_ERR(cs)) {
1413 err = PTR_ERR(cs);
1414 i915_request_add(hi);
1415 i915_request_put(lo);
1416 goto err_vma;
1417 }
1418
1419 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1420 *cs++ = i915_ggtt_offset(vma);
1421 *cs++ = 0;
1422 *cs++ = 0;
1423
1424 intel_ring_advance(hi, cs);
1425 i915_request_add(hi);
1426
1427 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1428 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1429
1430 pr_err("%s: Failed to preempt semaphore busywait!\n",
1431 engine->name);
1432
1433 intel_engine_dump(engine, &p, "%s\n", engine->name);
1434 GEM_TRACE_DUMP();
1435
1436 i915_request_put(lo);
1437 intel_gt_set_wedged(gt);
1438 err = -EIO;
1439 goto err_vma;
1440 }
1441 GEM_BUG_ON(READ_ONCE(*map));
1442 i915_request_put(lo);
1443
1444 if (igt_live_test_end(&t)) {
1445 err = -EIO;
1446 goto err_vma;
1447 }
1448 }
1449
1450 err = 0;
1451 err_vma:
1452 i915_vma_unpin(vma);
1453 err_map:
1454 i915_gem_object_unpin_map(obj);
1455 err_obj:
1456 i915_gem_object_put(obj);
1457 err_ctx_lo:
1458 kernel_context_close(ctx_lo);
1459 err_ctx_hi:
1460 kernel_context_close(ctx_hi);
1461 return err;
1462 }
1463
1464 static struct i915_request *
1465 spinner_create_request(struct igt_spinner *spin,
1466 struct i915_gem_context *ctx,
1467 struct intel_engine_cs *engine,
1468 u32 arb)
1469 {
1470 struct intel_context *ce;
1471 struct i915_request *rq;
1472
1473 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1474 if (IS_ERR(ce))
1475 return ERR_CAST(ce);
1476
1477 rq = igt_spinner_create_request(spin, ce, arb);
1478 intel_context_put(ce);
1479 return rq;
1480 }
1481
1482 static int live_preempt(void *arg)
1483 {
1484 struct intel_gt *gt = arg;
1485 struct i915_gem_context *ctx_hi, *ctx_lo;
1486 struct igt_spinner spin_hi, spin_lo;
1487 struct intel_engine_cs *engine;
1488 enum intel_engine_id id;
1489 int err = -ENOMEM;
1490
1491 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1492 return 0;
1493
1494 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1495 pr_err("Logical preemption supported, but not exposed\n");
1496
1497 if (igt_spinner_init(&spin_hi, gt))
1498 return -ENOMEM;
1499
1500 if (igt_spinner_init(&spin_lo, gt))
1501 goto err_spin_hi;
1502
1503 ctx_hi = kernel_context(gt->i915);
1504 if (!ctx_hi)
1505 goto err_spin_lo;
1506 ctx_hi->sched.priority =
1507 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1508
1509 ctx_lo = kernel_context(gt->i915);
1510 if (!ctx_lo)
1511 goto err_ctx_hi;
1512 ctx_lo->sched.priority =
1513 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1514
1515 for_each_engine(engine, gt, id) {
1516 struct igt_live_test t;
1517 struct i915_request *rq;
1518
1519 if (!intel_engine_has_preemption(engine))
1520 continue;
1521
1522 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1523 err = -EIO;
1524 goto err_ctx_lo;
1525 }
1526
1527 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1528 MI_ARB_CHECK);
1529 if (IS_ERR(rq)) {
1530 err = PTR_ERR(rq);
1531 goto err_ctx_lo;
1532 }
1533
1534 i915_request_add(rq);
1535 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1536 GEM_TRACE("lo spinner failed to start\n");
1537 GEM_TRACE_DUMP();
1538 intel_gt_set_wedged(gt);
1539 err = -EIO;
1540 goto err_ctx_lo;
1541 }
1542
1543 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1544 MI_ARB_CHECK);
1545 if (IS_ERR(rq)) {
1546 igt_spinner_end(&spin_lo);
1547 err = PTR_ERR(rq);
1548 goto err_ctx_lo;
1549 }
1550
1551 i915_request_add(rq);
1552 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1553 GEM_TRACE("hi spinner failed to start\n");
1554 GEM_TRACE_DUMP();
1555 intel_gt_set_wedged(gt);
1556 err = -EIO;
1557 goto err_ctx_lo;
1558 }
1559
1560 igt_spinner_end(&spin_hi);
1561 igt_spinner_end(&spin_lo);
1562
1563 if (igt_live_test_end(&t)) {
1564 err = -EIO;
1565 goto err_ctx_lo;
1566 }
1567 }
1568
1569 err = 0;
1570 err_ctx_lo:
1571 kernel_context_close(ctx_lo);
1572 err_ctx_hi:
1573 kernel_context_close(ctx_hi);
1574 err_spin_lo:
1575 igt_spinner_fini(&spin_lo);
1576 err_spin_hi:
1577 igt_spinner_fini(&spin_hi);
1578 return err;
1579 }
1580
1581 static int live_late_preempt(void *arg)
1582 {
1583 struct intel_gt *gt = arg;
1584 struct i915_gem_context *ctx_hi, *ctx_lo;
1585 struct igt_spinner spin_hi, spin_lo;
1586 struct intel_engine_cs *engine;
1587 struct i915_sched_attr attr = {};
1588 enum intel_engine_id id;
1589 int err = -ENOMEM;
1590
1591 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1592 return 0;
1593
1594 if (igt_spinner_init(&spin_hi, gt))
1595 return -ENOMEM;
1596
1597 if (igt_spinner_init(&spin_lo, gt))
1598 goto err_spin_hi;
1599
1600 ctx_hi = kernel_context(gt->i915);
1601 if (!ctx_hi)
1602 goto err_spin_lo;
1603
1604 ctx_lo = kernel_context(gt->i915);
1605 if (!ctx_lo)
1606 goto err_ctx_hi;
1607
1608 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1609 ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1610
1611 for_each_engine(engine, gt, id) {
1612 struct igt_live_test t;
1613 struct i915_request *rq;
1614
1615 if (!intel_engine_has_preemption(engine))
1616 continue;
1617
1618 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1619 err = -EIO;
1620 goto err_ctx_lo;
1621 }
1622
1623 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1624 MI_ARB_CHECK);
1625 if (IS_ERR(rq)) {
1626 err = PTR_ERR(rq);
1627 goto err_ctx_lo;
1628 }
1629
1630 i915_request_add(rq);
1631 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1632 pr_err("First context failed to start\n");
1633 goto err_wedged;
1634 }
1635
1636 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1637 MI_NOOP);
1638 if (IS_ERR(rq)) {
1639 igt_spinner_end(&spin_lo);
1640 err = PTR_ERR(rq);
1641 goto err_ctx_lo;
1642 }
1643
1644 i915_request_add(rq);
1645 if (igt_wait_for_spinner(&spin_hi, rq)) {
1646 pr_err("Second context overtook first?\n");
1647 goto err_wedged;
1648 }
1649
1650 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1651 engine->schedule(rq, &attr);
1652
1653 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1654 pr_err("High priority context failed to preempt the low priority context\n");
1655 GEM_TRACE_DUMP();
1656 goto err_wedged;
1657 }
1658
1659 igt_spinner_end(&spin_hi);
1660 igt_spinner_end(&spin_lo);
1661
1662 if (igt_live_test_end(&t)) {
1663 err = -EIO;
1664 goto err_ctx_lo;
1665 }
1666 }
1667
1668 err = 0;
1669 err_ctx_lo:
1670 kernel_context_close(ctx_lo);
1671 err_ctx_hi:
1672 kernel_context_close(ctx_hi);
1673 err_spin_lo:
1674 igt_spinner_fini(&spin_lo);
1675 err_spin_hi:
1676 igt_spinner_fini(&spin_hi);
1677 return err;
1678
1679 err_wedged:
1680 igt_spinner_end(&spin_hi);
1681 igt_spinner_end(&spin_lo);
1682 intel_gt_set_wedged(gt);
1683 err = -EIO;
1684 goto err_ctx_lo;
1685 }
1686
1687 struct preempt_client {
1688 struct igt_spinner spin;
1689 struct i915_gem_context *ctx;
1690 };
1691
1692 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1693 {
1694 c->ctx = kernel_context(gt->i915);
1695 if (!c->ctx)
1696 return -ENOMEM;
1697
1698 if (igt_spinner_init(&c->spin, gt))
1699 goto err_ctx;
1700
1701 return 0;
1702
1703 err_ctx:
1704 kernel_context_close(c->ctx);
1705 return -ENOMEM;
1706 }
1707
1708 static void preempt_client_fini(struct preempt_client *c)
1709 {
1710 igt_spinner_fini(&c->spin);
1711 kernel_context_close(c->ctx);
1712 }
1713
1714 static int live_nopreempt(void *arg)
1715 {
1716 struct intel_gt *gt = arg;
1717 struct intel_engine_cs *engine;
1718 struct preempt_client a, b;
1719 enum intel_engine_id id;
1720 int err = -ENOMEM;
1721
1722 /*
1723 * Verify that we can disable preemption for an individual request
1724 * that may be being observed and not want to be interrupted.
1725 */
1726
1727 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1728 return 0;
1729
1730 if (preempt_client_init(gt, &a))
1731 return -ENOMEM;
1732 if (preempt_client_init(gt, &b))
1733 goto err_client_a;
1734 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1735
1736 for_each_engine(engine, gt, id) {
1737 struct i915_request *rq_a, *rq_b;
1738
1739 if (!intel_engine_has_preemption(engine))
1740 continue;
1741
1742 engine->execlists.preempt_hang.count = 0;
1743
1744 rq_a = spinner_create_request(&a.spin,
1745 a.ctx, engine,
1746 MI_ARB_CHECK);
1747 if (IS_ERR(rq_a)) {
1748 err = PTR_ERR(rq_a);
1749 goto err_client_b;
1750 }
1751
1752 /* Low priority client, but unpreemptable! */
1753 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1754
1755 i915_request_add(rq_a);
1756 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1757 pr_err("First client failed to start\n");
1758 goto err_wedged;
1759 }
1760
1761 rq_b = spinner_create_request(&b.spin,
1762 b.ctx, engine,
1763 MI_ARB_CHECK);
1764 if (IS_ERR(rq_b)) {
1765 err = PTR_ERR(rq_b);
1766 goto err_client_b;
1767 }
1768
1769 i915_request_add(rq_b);
1770
1771 /* B is much more important than A! (But A is unpreemptable.) */
1772 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1773
1774 /* Wait long enough for preemption and timeslicing */
1775 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1776 pr_err("Second client started too early!\n");
1777 goto err_wedged;
1778 }
1779
1780 igt_spinner_end(&a.spin);
1781
1782 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1783 pr_err("Second client failed to start\n");
1784 goto err_wedged;
1785 }
1786
1787 igt_spinner_end(&b.spin);
1788
1789 if (engine->execlists.preempt_hang.count) {
1790 pr_err("Preemption recorded x%d; should have been suppressed!\n",
1791 engine->execlists.preempt_hang.count);
1792 err = -EINVAL;
1793 goto err_wedged;
1794 }
1795
1796 if (igt_flush_test(gt->i915))
1797 goto err_wedged;
1798 }
1799
1800 err = 0;
1801 err_client_b:
1802 preempt_client_fini(&b);
1803 err_client_a:
1804 preempt_client_fini(&a);
1805 return err;
1806
1807 err_wedged:
1808 igt_spinner_end(&b.spin);
1809 igt_spinner_end(&a.spin);
1810 intel_gt_set_wedged(gt);
1811 err = -EIO;
1812 goto err_client_b;
1813 }
1814
1815 struct live_preempt_cancel {
1816 struct intel_engine_cs *engine;
1817 struct preempt_client a, b;
1818 };
1819
1820 static int __cancel_active0(struct live_preempt_cancel *arg)
1821 {
1822 struct i915_request *rq;
1823 struct igt_live_test t;
1824 int err;
1825
1826 /* Preempt cancel of ELSP0 */
1827 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1828 if (igt_live_test_begin(&t, arg->engine->i915,
1829 __func__, arg->engine->name))
1830 return -EIO;
1831
1832 rq = spinner_create_request(&arg->a.spin,
1833 arg->a.ctx, arg->engine,
1834 MI_ARB_CHECK);
1835 if (IS_ERR(rq))
1836 return PTR_ERR(rq);
1837
1838 clear_bit(CONTEXT_BANNED, &rq->context->flags);
1839 i915_request_get(rq);
1840 i915_request_add(rq);
1841 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1842 err = -EIO;
1843 goto out;
1844 }
1845
1846 intel_context_set_banned(rq->context);
1847 err = intel_engine_pulse(arg->engine);
1848 if (err)
1849 goto out;
1850
1851 err = wait_for_reset(arg->engine, rq, HZ / 2);
1852 if (err) {
1853 pr_err("Cancelled inflight0 request did not reset\n");
1854 goto out;
1855 }
1856
1857 out:
1858 i915_request_put(rq);
1859 if (igt_live_test_end(&t))
1860 err = -EIO;
1861 return err;
1862 }
1863
1864 static int __cancel_active1(struct live_preempt_cancel *arg)
1865 {
1866 struct i915_request *rq[2] = {};
1867 struct igt_live_test t;
1868 int err;
1869
1870 /* Preempt cancel of ELSP1 */
1871 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1872 if (igt_live_test_begin(&t, arg->engine->i915,
1873 __func__, arg->engine->name))
1874 return -EIO;
1875
1876 rq[0] = spinner_create_request(&arg->a.spin,
1877 arg->a.ctx, arg->engine,
1878 MI_NOOP); /* no preemption */
1879 if (IS_ERR(rq[0]))
1880 return PTR_ERR(rq[0]);
1881
1882 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1883 i915_request_get(rq[0]);
1884 i915_request_add(rq[0]);
1885 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1886 err = -EIO;
1887 goto out;
1888 }
1889
1890 rq[1] = spinner_create_request(&arg->b.spin,
1891 arg->b.ctx, arg->engine,
1892 MI_ARB_CHECK);
1893 if (IS_ERR(rq[1])) {
1894 err = PTR_ERR(rq[1]);
1895 goto out;
1896 }
1897
1898 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1899 i915_request_get(rq[1]);
1900 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1901 i915_request_add(rq[1]);
1902 if (err)
1903 goto out;
1904
1905 intel_context_set_banned(rq[1]->context);
1906 err = intel_engine_pulse(arg->engine);
1907 if (err)
1908 goto out;
1909
1910 igt_spinner_end(&arg->a.spin);
1911 err = wait_for_reset(arg->engine, rq[1], HZ / 2);
1912 if (err)
1913 goto out;
1914
1915 if (rq[0]->fence.error != 0) {
1916 pr_err("Normal inflight0 request did not complete\n");
1917 err = -EINVAL;
1918 goto out;
1919 }
1920
1921 if (rq[1]->fence.error != -EIO) {
1922 pr_err("Cancelled inflight1 request did not report -EIO\n");
1923 err = -EINVAL;
1924 goto out;
1925 }
1926
1927 out:
1928 i915_request_put(rq[1]);
1929 i915_request_put(rq[0]);
1930 if (igt_live_test_end(&t))
1931 err = -EIO;
1932 return err;
1933 }
1934
1935 static int __cancel_queued(struct live_preempt_cancel *arg)
1936 {
1937 struct i915_request *rq[3] = {};
1938 struct igt_live_test t;
1939 int err;
1940
1941 /* Full ELSP and one in the wings */
1942 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1943 if (igt_live_test_begin(&t, arg->engine->i915,
1944 __func__, arg->engine->name))
1945 return -EIO;
1946
1947 rq[0] = spinner_create_request(&arg->a.spin,
1948 arg->a.ctx, arg->engine,
1949 MI_ARB_CHECK);
1950 if (IS_ERR(rq[0]))
1951 return PTR_ERR(rq[0]);
1952
1953 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1954 i915_request_get(rq[0]);
1955 i915_request_add(rq[0]);
1956 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1957 err = -EIO;
1958 goto out;
1959 }
1960
1961 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1962 if (IS_ERR(rq[1])) {
1963 err = PTR_ERR(rq[1]);
1964 goto out;
1965 }
1966
1967 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1968 i915_request_get(rq[1]);
1969 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1970 i915_request_add(rq[1]);
1971 if (err)
1972 goto out;
1973
1974 rq[2] = spinner_create_request(&arg->b.spin,
1975 arg->a.ctx, arg->engine,
1976 MI_ARB_CHECK);
1977 if (IS_ERR(rq[2])) {
1978 err = PTR_ERR(rq[2]);
1979 goto out;
1980 }
1981
1982 i915_request_get(rq[2]);
1983 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1984 i915_request_add(rq[2]);
1985 if (err)
1986 goto out;
1987
1988 intel_context_set_banned(rq[2]->context);
1989 err = intel_engine_pulse(arg->engine);
1990 if (err)
1991 goto out;
1992
1993 err = wait_for_reset(arg->engine, rq[2], HZ / 2);
1994 if (err)
1995 goto out;
1996
1997 if (rq[0]->fence.error != -EIO) {
1998 pr_err("Cancelled inflight0 request did not report -EIO\n");
1999 err = -EINVAL;
2000 goto out;
2001 }
2002
2003 if (rq[1]->fence.error != 0) {
2004 pr_err("Normal inflight1 request did not complete\n");
2005 err = -EINVAL;
2006 goto out;
2007 }
2008
2009 if (rq[2]->fence.error != -EIO) {
2010 pr_err("Cancelled queued request did not report -EIO\n");
2011 err = -EINVAL;
2012 goto out;
2013 }
2014
2015 out:
2016 i915_request_put(rq[2]);
2017 i915_request_put(rq[1]);
2018 i915_request_put(rq[0]);
2019 if (igt_live_test_end(&t))
2020 err = -EIO;
2021 return err;
2022 }
2023
2024 static int __cancel_hostile(struct live_preempt_cancel *arg)
2025 {
2026 struct i915_request *rq;
2027 int err;
2028
2029 /* Preempt cancel non-preemptible spinner in ELSP0 */
2030 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2031 return 0;
2032
2033 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2034 rq = spinner_create_request(&arg->a.spin,
2035 arg->a.ctx, arg->engine,
2036 MI_NOOP); /* preemption disabled */
2037 if (IS_ERR(rq))
2038 return PTR_ERR(rq);
2039
2040 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2041 i915_request_get(rq);
2042 i915_request_add(rq);
2043 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2044 err = -EIO;
2045 goto out;
2046 }
2047
2048 intel_context_set_banned(rq->context);
2049 err = intel_engine_pulse(arg->engine); /* force reset */
2050 if (err)
2051 goto out;
2052
2053 err = wait_for_reset(arg->engine, rq, HZ / 2);
2054 if (err) {
2055 pr_err("Cancelled inflight0 request did not reset\n");
2056 goto out;
2057 }
2058
2059 out:
2060 i915_request_put(rq);
2061 if (igt_flush_test(arg->engine->i915))
2062 err = -EIO;
2063 return err;
2064 }
2065
2066 static int live_preempt_cancel(void *arg)
2067 {
2068 struct intel_gt *gt = arg;
2069 struct live_preempt_cancel data;
2070 enum intel_engine_id id;
2071 int err = -ENOMEM;
2072
2073 /*
2074 * To cancel an inflight context, we need to first remove it from the
2075 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2076 */
2077
2078 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2079 return 0;
2080
2081 if (preempt_client_init(gt, &data.a))
2082 return -ENOMEM;
2083 if (preempt_client_init(gt, &data.b))
2084 goto err_client_a;
2085
2086 for_each_engine(data.engine, gt, id) {
2087 if (!intel_engine_has_preemption(data.engine))
2088 continue;
2089
2090 err = __cancel_active0(&data);
2091 if (err)
2092 goto err_wedged;
2093
2094 err = __cancel_active1(&data);
2095 if (err)
2096 goto err_wedged;
2097
2098 err = __cancel_queued(&data);
2099 if (err)
2100 goto err_wedged;
2101
2102 err = __cancel_hostile(&data);
2103 if (err)
2104 goto err_wedged;
2105 }
2106
2107 err = 0;
2108 err_client_b:
2109 preempt_client_fini(&data.b);
2110 err_client_a:
2111 preempt_client_fini(&data.a);
2112 return err;
2113
2114 err_wedged:
2115 GEM_TRACE_DUMP();
2116 igt_spinner_end(&data.b.spin);
2117 igt_spinner_end(&data.a.spin);
2118 intel_gt_set_wedged(gt);
2119 goto err_client_b;
2120 }
2121
2122 static int live_suppress_self_preempt(void *arg)
2123 {
2124 struct intel_gt *gt = arg;
2125 struct intel_engine_cs *engine;
2126 struct i915_sched_attr attr = {
2127 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2128 };
2129 struct preempt_client a, b;
2130 enum intel_engine_id id;
2131 int err = -ENOMEM;
2132
2133 /*
2134 * Verify that if a preemption request does not cause a change in
2135 * the current execution order, the preempt-to-idle injection is
2136 * skipped and that we do not accidentally apply it after the CS
2137 * completion event.
2138 */
2139
2140 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2141 return 0;
2142
2143 if (intel_uc_uses_guc_submission(&gt->uc))
2144 return 0; /* presume black blox */
2145
2146 if (intel_vgpu_active(gt->i915))
2147 return 0; /* GVT forces single port & request submission */
2148
2149 if (preempt_client_init(gt, &a))
2150 return -ENOMEM;
2151 if (preempt_client_init(gt, &b))
2152 goto err_client_a;
2153
2154 for_each_engine(engine, gt, id) {
2155 struct i915_request *rq_a, *rq_b;
2156 int depth;
2157
2158 if (!intel_engine_has_preemption(engine))
2159 continue;
2160
2161 if (igt_flush_test(gt->i915))
2162 goto err_wedged;
2163
2164 intel_engine_pm_get(engine);
2165 engine->execlists.preempt_hang.count = 0;
2166
2167 rq_a = spinner_create_request(&a.spin,
2168 a.ctx, engine,
2169 MI_NOOP);
2170 if (IS_ERR(rq_a)) {
2171 err = PTR_ERR(rq_a);
2172 intel_engine_pm_put(engine);
2173 goto err_client_b;
2174 }
2175
2176 i915_request_add(rq_a);
2177 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2178 pr_err("First client failed to start\n");
2179 intel_engine_pm_put(engine);
2180 goto err_wedged;
2181 }
2182
2183 /* Keep postponing the timer to avoid premature slicing */
2184 mod_timer(&engine->execlists.timer, jiffies + HZ);
2185 for (depth = 0; depth < 8; depth++) {
2186 rq_b = spinner_create_request(&b.spin,
2187 b.ctx, engine,
2188 MI_NOOP);
2189 if (IS_ERR(rq_b)) {
2190 err = PTR_ERR(rq_b);
2191 intel_engine_pm_put(engine);
2192 goto err_client_b;
2193 }
2194 i915_request_add(rq_b);
2195
2196 GEM_BUG_ON(i915_request_completed(rq_a));
2197 engine->schedule(rq_a, &attr);
2198 igt_spinner_end(&a.spin);
2199
2200 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2201 pr_err("Second client failed to start\n");
2202 intel_engine_pm_put(engine);
2203 goto err_wedged;
2204 }
2205
2206 swap(a, b);
2207 rq_a = rq_b;
2208 }
2209 igt_spinner_end(&a.spin);
2210
2211 if (engine->execlists.preempt_hang.count) {
2212 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2213 engine->name,
2214 engine->execlists.preempt_hang.count,
2215 depth);
2216 intel_engine_pm_put(engine);
2217 err = -EINVAL;
2218 goto err_client_b;
2219 }
2220
2221 intel_engine_pm_put(engine);
2222 if (igt_flush_test(gt->i915))
2223 goto err_wedged;
2224 }
2225
2226 err = 0;
2227 err_client_b:
2228 preempt_client_fini(&b);
2229 err_client_a:
2230 preempt_client_fini(&a);
2231 return err;
2232
2233 err_wedged:
2234 igt_spinner_end(&b.spin);
2235 igt_spinner_end(&a.spin);
2236 intel_gt_set_wedged(gt);
2237 err = -EIO;
2238 goto err_client_b;
2239 }
2240
2241 static int __i915_sw_fence_call
2242 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
2243 {
2244 return NOTIFY_DONE;
2245 }
2246
2247 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
2248 {
2249 struct i915_request *rq;
2250
2251 rq = kzalloc(sizeof(*rq), GFP_KERNEL);
2252 if (!rq)
2253 return NULL;
2254
2255 rq->engine = engine;
2256
2257 spin_lock_init(&rq->lock);
2258 INIT_LIST_HEAD(&rq->fence.cb_list);
2259 rq->fence.lock = &rq->lock;
2260 rq->fence.ops = &i915_fence_ops;
2261
2262 i915_sched_node_init(&rq->sched);
2263
2264 /* mark this request as permanently incomplete */
2265 rq->fence.seqno = 1;
2266 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
2267 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
2268 GEM_BUG_ON(i915_request_completed(rq));
2269
2270 i915_sw_fence_init(&rq->submit, dummy_notify);
2271 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2272
2273 spin_lock_init(&rq->lock);
2274 rq->fence.lock = &rq->lock;
2275 INIT_LIST_HEAD(&rq->fence.cb_list);
2276
2277 return rq;
2278 }
2279
2280 static void dummy_request_free(struct i915_request *dummy)
2281 {
2282 /* We have to fake the CS interrupt to kick the next request */
2283 i915_sw_fence_commit(&dummy->submit);
2284
2285 i915_request_mark_complete(dummy);
2286 dma_fence_signal(&dummy->fence);
2287
2288 i915_sched_node_fini(&dummy->sched);
2289 i915_sw_fence_fini(&dummy->submit);
2290
2291 dma_fence_free(&dummy->fence);
2292 }
2293
2294 static int live_suppress_wait_preempt(void *arg)
2295 {
2296 struct intel_gt *gt = arg;
2297 struct preempt_client client[4];
2298 struct i915_request *rq[ARRAY_SIZE(client)] = {};
2299 struct intel_engine_cs *engine;
2300 enum intel_engine_id id;
2301 int err = -ENOMEM;
2302 int i;
2303
2304 /*
2305 * Waiters are given a little priority nudge, but not enough
2306 * to actually cause any preemption. Double check that we do
2307 * not needlessly generate preempt-to-idle cycles.
2308 */
2309
2310 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2311 return 0;
2312
2313 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
2314 return -ENOMEM;
2315 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
2316 goto err_client_0;
2317 if (preempt_client_init(gt, &client[2])) /* head of queue */
2318 goto err_client_1;
2319 if (preempt_client_init(gt, &client[3])) /* bystander */
2320 goto err_client_2;
2321
2322 for_each_engine(engine, gt, id) {
2323 int depth;
2324
2325 if (!intel_engine_has_preemption(engine))
2326 continue;
2327
2328 if (!engine->emit_init_breadcrumb)
2329 continue;
2330
2331 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
2332 struct i915_request *dummy;
2333
2334 engine->execlists.preempt_hang.count = 0;
2335
2336 dummy = dummy_request(engine);
2337 if (!dummy)
2338 goto err_client_3;
2339
2340 for (i = 0; i < ARRAY_SIZE(client); i++) {
2341 struct i915_request *this;
2342
2343 this = spinner_create_request(&client[i].spin,
2344 client[i].ctx, engine,
2345 MI_NOOP);
2346 if (IS_ERR(this)) {
2347 err = PTR_ERR(this);
2348 goto err_wedged;
2349 }
2350
2351 /* Disable NEWCLIENT promotion */
2352 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
2353 &dummy->fence);
2354
2355 rq[i] = i915_request_get(this);
2356 i915_request_add(this);
2357 }
2358
2359 dummy_request_free(dummy);
2360
2361 GEM_BUG_ON(i915_request_completed(rq[0]));
2362 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
2363 pr_err("%s: First client failed to start\n",
2364 engine->name);
2365 goto err_wedged;
2366 }
2367 GEM_BUG_ON(!i915_request_started(rq[0]));
2368
2369 if (i915_request_wait(rq[depth],
2370 I915_WAIT_PRIORITY,
2371 1) != -ETIME) {
2372 pr_err("%s: Waiter depth:%d completed!\n",
2373 engine->name, depth);
2374 goto err_wedged;
2375 }
2376
2377 for (i = 0; i < ARRAY_SIZE(client); i++) {
2378 igt_spinner_end(&client[i].spin);
2379 i915_request_put(rq[i]);
2380 rq[i] = NULL;
2381 }
2382
2383 if (igt_flush_test(gt->i915))
2384 goto err_wedged;
2385
2386 if (engine->execlists.preempt_hang.count) {
2387 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2388 engine->name,
2389 engine->execlists.preempt_hang.count,
2390 depth);
2391 err = -EINVAL;
2392 goto err_client_3;
2393 }
2394 }
2395 }
2396
2397 err = 0;
2398 err_client_3:
2399 preempt_client_fini(&client[3]);
2400 err_client_2:
2401 preempt_client_fini(&client[2]);
2402 err_client_1:
2403 preempt_client_fini(&client[1]);
2404 err_client_0:
2405 preempt_client_fini(&client[0]);
2406 return err;
2407
2408 err_wedged:
2409 for (i = 0; i < ARRAY_SIZE(client); i++) {
2410 igt_spinner_end(&client[i].spin);
2411 i915_request_put(rq[i]);
2412 }
2413 intel_gt_set_wedged(gt);
2414 err = -EIO;
2415 goto err_client_3;
2416 }
2417
2418 static int live_chain_preempt(void *arg)
2419 {
2420 struct intel_gt *gt = arg;
2421 struct intel_engine_cs *engine;
2422 struct preempt_client hi, lo;
2423 enum intel_engine_id id;
2424 int err = -ENOMEM;
2425
2426 /*
2427 * Build a chain AB...BA between two contexts (A, B) and request
2428 * preemption of the last request. It should then complete before
2429 * the previously submitted spinner in B.
2430 */
2431
2432 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2433 return 0;
2434
2435 if (preempt_client_init(gt, &hi))
2436 return -ENOMEM;
2437
2438 if (preempt_client_init(gt, &lo))
2439 goto err_client_hi;
2440
2441 for_each_engine(engine, gt, id) {
2442 struct i915_sched_attr attr = {
2443 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2444 };
2445 struct igt_live_test t;
2446 struct i915_request *rq;
2447 int ring_size, count, i;
2448
2449 if (!intel_engine_has_preemption(engine))
2450 continue;
2451
2452 rq = spinner_create_request(&lo.spin,
2453 lo.ctx, engine,
2454 MI_ARB_CHECK);
2455 if (IS_ERR(rq))
2456 goto err_wedged;
2457
2458 i915_request_get(rq);
2459 i915_request_add(rq);
2460
2461 ring_size = rq->wa_tail - rq->head;
2462 if (ring_size < 0)
2463 ring_size += rq->ring->size;
2464 ring_size = rq->ring->size / ring_size;
2465 pr_debug("%s(%s): Using maximum of %d requests\n",
2466 __func__, engine->name, ring_size);
2467
2468 igt_spinner_end(&lo.spin);
2469 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2470 pr_err("Timed out waiting to flush %s\n", engine->name);
2471 i915_request_put(rq);
2472 goto err_wedged;
2473 }
2474 i915_request_put(rq);
2475
2476 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2477 err = -EIO;
2478 goto err_wedged;
2479 }
2480
2481 for_each_prime_number_from(count, 1, ring_size) {
2482 rq = spinner_create_request(&hi.spin,
2483 hi.ctx, engine,
2484 MI_ARB_CHECK);
2485 if (IS_ERR(rq))
2486 goto err_wedged;
2487 i915_request_add(rq);
2488 if (!igt_wait_for_spinner(&hi.spin, rq))
2489 goto err_wedged;
2490
2491 rq = spinner_create_request(&lo.spin,
2492 lo.ctx, engine,
2493 MI_ARB_CHECK);
2494 if (IS_ERR(rq))
2495 goto err_wedged;
2496 i915_request_add(rq);
2497
2498 for (i = 0; i < count; i++) {
2499 rq = igt_request_alloc(lo.ctx, engine);
2500 if (IS_ERR(rq))
2501 goto err_wedged;
2502 i915_request_add(rq);
2503 }
2504
2505 rq = igt_request_alloc(hi.ctx, engine);
2506 if (IS_ERR(rq))
2507 goto err_wedged;
2508
2509 i915_request_get(rq);
2510 i915_request_add(rq);
2511 engine->schedule(rq, &attr);
2512
2513 igt_spinner_end(&hi.spin);
2514 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2515 struct drm_printer p =
2516 drm_info_printer(gt->i915->drm.dev);
2517
2518 pr_err("Failed to preempt over chain of %d\n",
2519 count);
2520 intel_engine_dump(engine, &p,
2521 "%s\n", engine->name);
2522 i915_request_put(rq);
2523 goto err_wedged;
2524 }
2525 igt_spinner_end(&lo.spin);
2526 i915_request_put(rq);
2527
2528 rq = igt_request_alloc(lo.ctx, engine);
2529 if (IS_ERR(rq))
2530 goto err_wedged;
2531
2532 i915_request_get(rq);
2533 i915_request_add(rq);
2534
2535 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2536 struct drm_printer p =
2537 drm_info_printer(gt->i915->drm.dev);
2538
2539 pr_err("Failed to flush low priority chain of %d requests\n",
2540 count);
2541 intel_engine_dump(engine, &p,
2542 "%s\n", engine->name);
2543
2544 i915_request_put(rq);
2545 goto err_wedged;
2546 }
2547 i915_request_put(rq);
2548 }
2549
2550 if (igt_live_test_end(&t)) {
2551 err = -EIO;
2552 goto err_wedged;
2553 }
2554 }
2555
2556 err = 0;
2557 err_client_lo:
2558 preempt_client_fini(&lo);
2559 err_client_hi:
2560 preempt_client_fini(&hi);
2561 return err;
2562
2563 err_wedged:
2564 igt_spinner_end(&hi.spin);
2565 igt_spinner_end(&lo.spin);
2566 intel_gt_set_wedged(gt);
2567 err = -EIO;
2568 goto err_client_lo;
2569 }
2570
2571 static int create_gang(struct intel_engine_cs *engine,
2572 struct i915_request **prev)
2573 {
2574 struct drm_i915_gem_object *obj;
2575 struct intel_context *ce;
2576 struct i915_request *rq;
2577 struct i915_vma *vma;
2578 u32 *cs;
2579 int err;
2580
2581 ce = intel_context_create(engine);
2582 if (IS_ERR(ce))
2583 return PTR_ERR(ce);
2584
2585 obj = i915_gem_object_create_internal(engine->i915, 4096);
2586 if (IS_ERR(obj)) {
2587 err = PTR_ERR(obj);
2588 goto err_ce;
2589 }
2590
2591 vma = i915_vma_instance(obj, ce->vm, NULL);
2592 if (IS_ERR(vma)) {
2593 err = PTR_ERR(vma);
2594 goto err_obj;
2595 }
2596
2597 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2598 if (err)
2599 goto err_obj;
2600
2601 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2602 if (IS_ERR(cs))
2603 goto err_obj;
2604
2605 /* Semaphore target: spin until zero */
2606 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2607
2608 *cs++ = MI_SEMAPHORE_WAIT |
2609 MI_SEMAPHORE_POLL |
2610 MI_SEMAPHORE_SAD_EQ_SDD;
2611 *cs++ = 0;
2612 *cs++ = lower_32_bits(vma->node.start);
2613 *cs++ = upper_32_bits(vma->node.start);
2614
2615 if (*prev) {
2616 u64 offset = (*prev)->batch->node.start;
2617
2618 /* Terminate the spinner in the next lower priority batch. */
2619 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2620 *cs++ = lower_32_bits(offset);
2621 *cs++ = upper_32_bits(offset);
2622 *cs++ = 0;
2623 }
2624
2625 *cs++ = MI_BATCH_BUFFER_END;
2626 i915_gem_object_flush_map(obj);
2627 i915_gem_object_unpin_map(obj);
2628
2629 rq = intel_context_create_request(ce);
2630 if (IS_ERR(rq))
2631 goto err_obj;
2632
2633 rq->batch = vma;
2634 i915_request_get(rq);
2635
2636 i915_vma_lock(vma);
2637 err = i915_request_await_object(rq, vma->obj, false);
2638 if (!err)
2639 err = i915_vma_move_to_active(vma, rq, 0);
2640 if (!err)
2641 err = rq->engine->emit_bb_start(rq,
2642 vma->node.start,
2643 PAGE_SIZE, 0);
2644 i915_vma_unlock(vma);
2645 i915_request_add(rq);
2646 if (err)
2647 goto err_rq;
2648
2649 i915_gem_object_put(obj);
2650 intel_context_put(ce);
2651
2652 rq->client_link.next = &(*prev)->client_link;
2653 *prev = rq;
2654 return 0;
2655
2656 err_rq:
2657 i915_request_put(rq);
2658 err_obj:
2659 i915_gem_object_put(obj);
2660 err_ce:
2661 intel_context_put(ce);
2662 return err;
2663 }
2664
2665 static int live_preempt_gang(void *arg)
2666 {
2667 struct intel_gt *gt = arg;
2668 struct intel_engine_cs *engine;
2669 enum intel_engine_id id;
2670
2671 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2672 return 0;
2673
2674 /*
2675 * Build as long a chain of preempters as we can, with each
2676 * request higher priority than the last. Once we are ready, we release
2677 * the last batch which then precolates down the chain, each releasing
2678 * the next oldest in turn. The intent is to simply push as hard as we
2679 * can with the number of preemptions, trying to exceed narrow HW
2680 * limits. At a minimum, we insist that we can sort all the user
2681 * high priority levels into execution order.
2682 */
2683
2684 for_each_engine(engine, gt, id) {
2685 struct i915_request *rq = NULL;
2686 struct igt_live_test t;
2687 IGT_TIMEOUT(end_time);
2688 int prio = 0;
2689 int err = 0;
2690 u32 *cs;
2691
2692 if (!intel_engine_has_preemption(engine))
2693 continue;
2694
2695 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2696 return -EIO;
2697
2698 do {
2699 struct i915_sched_attr attr = {
2700 .priority = I915_USER_PRIORITY(prio++),
2701 };
2702
2703 err = create_gang(engine, &rq);
2704 if (err)
2705 break;
2706
2707 /* Submit each spinner at increasing priority */
2708 engine->schedule(rq, &attr);
2709
2710 if (prio <= I915_PRIORITY_MAX)
2711 continue;
2712
2713 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2714 break;
2715
2716 if (__igt_timeout(end_time, NULL))
2717 break;
2718 } while (1);
2719 pr_debug("%s: Preempt chain of %d requests\n",
2720 engine->name, prio);
2721
2722 /*
2723 * Such that the last spinner is the highest priority and
2724 * should execute first. When that spinner completes,
2725 * it will terminate the next lowest spinner until there
2726 * are no more spinners and the gang is complete.
2727 */
2728 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2729 if (!IS_ERR(cs)) {
2730 *cs = 0;
2731 i915_gem_object_unpin_map(rq->batch->obj);
2732 } else {
2733 err = PTR_ERR(cs);
2734 intel_gt_set_wedged(gt);
2735 }
2736
2737 while (rq) { /* wait for each rq from highest to lowest prio */
2738 struct i915_request *n =
2739 list_next_entry(rq, client_link);
2740
2741 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2742 struct drm_printer p =
2743 drm_info_printer(engine->i915->drm.dev);
2744
2745 pr_err("Failed to flush chain of %d requests, at %d\n",
2746 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2747 intel_engine_dump(engine, &p,
2748 "%s\n", engine->name);
2749
2750 err = -ETIME;
2751 }
2752
2753 i915_request_put(rq);
2754 rq = n;
2755 }
2756
2757 if (igt_live_test_end(&t))
2758 err = -EIO;
2759 if (err)
2760 return err;
2761 }
2762
2763 return 0;
2764 }
2765
2766 static int live_preempt_timeout(void *arg)
2767 {
2768 struct intel_gt *gt = arg;
2769 struct i915_gem_context *ctx_hi, *ctx_lo;
2770 struct igt_spinner spin_lo;
2771 struct intel_engine_cs *engine;
2772 enum intel_engine_id id;
2773 int err = -ENOMEM;
2774
2775 /*
2776 * Check that we force preemption to occur by cancelling the previous
2777 * context if it refuses to yield the GPU.
2778 */
2779 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2780 return 0;
2781
2782 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2783 return 0;
2784
2785 if (!intel_has_reset_engine(gt))
2786 return 0;
2787
2788 if (igt_spinner_init(&spin_lo, gt))
2789 return -ENOMEM;
2790
2791 ctx_hi = kernel_context(gt->i915);
2792 if (!ctx_hi)
2793 goto err_spin_lo;
2794 ctx_hi->sched.priority =
2795 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2796
2797 ctx_lo = kernel_context(gt->i915);
2798 if (!ctx_lo)
2799 goto err_ctx_hi;
2800 ctx_lo->sched.priority =
2801 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2802
2803 for_each_engine(engine, gt, id) {
2804 unsigned long saved_timeout;
2805 struct i915_request *rq;
2806
2807 if (!intel_engine_has_preemption(engine))
2808 continue;
2809
2810 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2811 MI_NOOP); /* preemption disabled */
2812 if (IS_ERR(rq)) {
2813 err = PTR_ERR(rq);
2814 goto err_ctx_lo;
2815 }
2816
2817 i915_request_add(rq);
2818 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2819 intel_gt_set_wedged(gt);
2820 err = -EIO;
2821 goto err_ctx_lo;
2822 }
2823
2824 rq = igt_request_alloc(ctx_hi, engine);
2825 if (IS_ERR(rq)) {
2826 igt_spinner_end(&spin_lo);
2827 err = PTR_ERR(rq);
2828 goto err_ctx_lo;
2829 }
2830
2831 /* Flush the previous CS ack before changing timeouts */
2832 while (READ_ONCE(engine->execlists.pending[0]))
2833 cpu_relax();
2834
2835 saved_timeout = engine->props.preempt_timeout_ms;
2836 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2837
2838 i915_request_get(rq);
2839 i915_request_add(rq);
2840
2841 intel_engine_flush_submission(engine);
2842 engine->props.preempt_timeout_ms = saved_timeout;
2843
2844 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2845 intel_gt_set_wedged(gt);
2846 i915_request_put(rq);
2847 err = -ETIME;
2848 goto err_ctx_lo;
2849 }
2850
2851 igt_spinner_end(&spin_lo);
2852 i915_request_put(rq);
2853 }
2854
2855 err = 0;
2856 err_ctx_lo:
2857 kernel_context_close(ctx_lo);
2858 err_ctx_hi:
2859 kernel_context_close(ctx_hi);
2860 err_spin_lo:
2861 igt_spinner_fini(&spin_lo);
2862 return err;
2863 }
2864
2865 static int random_range(struct rnd_state *rnd, int min, int max)
2866 {
2867 return i915_prandom_u32_max_state(max - min, rnd) + min;
2868 }
2869
2870 static int random_priority(struct rnd_state *rnd)
2871 {
2872 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2873 }
2874
2875 struct preempt_smoke {
2876 struct intel_gt *gt;
2877 struct i915_gem_context **contexts;
2878 struct intel_engine_cs *engine;
2879 struct drm_i915_gem_object *batch;
2880 unsigned int ncontext;
2881 struct rnd_state prng;
2882 unsigned long count;
2883 };
2884
2885 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2886 {
2887 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2888 &smoke->prng)];
2889 }
2890
2891 static int smoke_submit(struct preempt_smoke *smoke,
2892 struct i915_gem_context *ctx, int prio,
2893 struct drm_i915_gem_object *batch)
2894 {
2895 struct i915_request *rq;
2896 struct i915_vma *vma = NULL;
2897 int err = 0;
2898
2899 if (batch) {
2900 struct i915_address_space *vm;
2901
2902 vm = i915_gem_context_get_vm_rcu(ctx);
2903 vma = i915_vma_instance(batch, vm, NULL);
2904 i915_vm_put(vm);
2905 if (IS_ERR(vma))
2906 return PTR_ERR(vma);
2907
2908 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2909 if (err)
2910 return err;
2911 }
2912
2913 ctx->sched.priority = prio;
2914
2915 rq = igt_request_alloc(ctx, smoke->engine);
2916 if (IS_ERR(rq)) {
2917 err = PTR_ERR(rq);
2918 goto unpin;
2919 }
2920
2921 if (vma) {
2922 i915_vma_lock(vma);
2923 err = i915_request_await_object(rq, vma->obj, false);
2924 if (!err)
2925 err = i915_vma_move_to_active(vma, rq, 0);
2926 if (!err)
2927 err = rq->engine->emit_bb_start(rq,
2928 vma->node.start,
2929 PAGE_SIZE, 0);
2930 i915_vma_unlock(vma);
2931 }
2932
2933 i915_request_add(rq);
2934
2935 unpin:
2936 if (vma)
2937 i915_vma_unpin(vma);
2938
2939 return err;
2940 }
2941
2942 static int smoke_crescendo_thread(void *arg)
2943 {
2944 struct preempt_smoke *smoke = arg;
2945 IGT_TIMEOUT(end_time);
2946 unsigned long count;
2947
2948 count = 0;
2949 do {
2950 struct i915_gem_context *ctx = smoke_context(smoke);
2951 int err;
2952
2953 err = smoke_submit(smoke,
2954 ctx, count % I915_PRIORITY_MAX,
2955 smoke->batch);
2956 if (err)
2957 return err;
2958
2959 count++;
2960 } while (!__igt_timeout(end_time, NULL));
2961
2962 smoke->count = count;
2963 return 0;
2964 }
2965
2966 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2967 #define BATCH BIT(0)
2968 {
2969 struct task_struct *tsk[I915_NUM_ENGINES] = {};
2970 struct preempt_smoke arg[I915_NUM_ENGINES];
2971 struct intel_engine_cs *engine;
2972 enum intel_engine_id id;
2973 unsigned long count;
2974 int err = 0;
2975
2976 for_each_engine(engine, smoke->gt, id) {
2977 arg[id] = *smoke;
2978 arg[id].engine = engine;
2979 if (!(flags & BATCH))
2980 arg[id].batch = NULL;
2981 arg[id].count = 0;
2982
2983 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2984 "igt/smoke:%d", id);
2985 if (IS_ERR(tsk[id])) {
2986 err = PTR_ERR(tsk[id]);
2987 break;
2988 }
2989 get_task_struct(tsk[id]);
2990 }
2991
2992 yield(); /* start all threads before we kthread_stop() */
2993
2994 count = 0;
2995 for_each_engine(engine, smoke->gt, id) {
2996 int status;
2997
2998 if (IS_ERR_OR_NULL(tsk[id]))
2999 continue;
3000
3001 status = kthread_stop(tsk[id]);
3002 if (status && !err)
3003 err = status;
3004
3005 count += arg[id].count;
3006
3007 put_task_struct(tsk[id]);
3008 }
3009
3010 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3011 count, flags,
3012 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3013 return 0;
3014 }
3015
3016 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3017 {
3018 enum intel_engine_id id;
3019 IGT_TIMEOUT(end_time);
3020 unsigned long count;
3021
3022 count = 0;
3023 do {
3024 for_each_engine(smoke->engine, smoke->gt, id) {
3025 struct i915_gem_context *ctx = smoke_context(smoke);
3026 int err;
3027
3028 err = smoke_submit(smoke,
3029 ctx, random_priority(&smoke->prng),
3030 flags & BATCH ? smoke->batch : NULL);
3031 if (err)
3032 return err;
3033
3034 count++;
3035 }
3036 } while (!__igt_timeout(end_time, NULL));
3037
3038 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3039 count, flags,
3040 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3041 return 0;
3042 }
3043
3044 static int live_preempt_smoke(void *arg)
3045 {
3046 struct preempt_smoke smoke = {
3047 .gt = arg,
3048 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3049 .ncontext = 1024,
3050 };
3051 const unsigned int phase[] = { 0, BATCH };
3052 struct igt_live_test t;
3053 int err = -ENOMEM;
3054 u32 *cs;
3055 int n;
3056
3057 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3058 return 0;
3059
3060 smoke.contexts = kmalloc_array(smoke.ncontext,
3061 sizeof(*smoke.contexts),
3062 GFP_KERNEL);
3063 if (!smoke.contexts)
3064 return -ENOMEM;
3065
3066 smoke.batch =
3067 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3068 if (IS_ERR(smoke.batch)) {
3069 err = PTR_ERR(smoke.batch);
3070 goto err_free;
3071 }
3072
3073 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3074 if (IS_ERR(cs)) {
3075 err = PTR_ERR(cs);
3076 goto err_batch;
3077 }
3078 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3079 cs[n] = MI_ARB_CHECK;
3080 cs[n] = MI_BATCH_BUFFER_END;
3081 i915_gem_object_flush_map(smoke.batch);
3082 i915_gem_object_unpin_map(smoke.batch);
3083
3084 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3085 err = -EIO;
3086 goto err_batch;
3087 }
3088
3089 for (n = 0; n < smoke.ncontext; n++) {
3090 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3091 if (!smoke.contexts[n])
3092 goto err_ctx;
3093 }
3094
3095 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3096 err = smoke_crescendo(&smoke, phase[n]);
3097 if (err)
3098 goto err_ctx;
3099
3100 err = smoke_random(&smoke, phase[n]);
3101 if (err)
3102 goto err_ctx;
3103 }
3104
3105 err_ctx:
3106 if (igt_live_test_end(&t))
3107 err = -EIO;
3108
3109 for (n = 0; n < smoke.ncontext; n++) {
3110 if (!smoke.contexts[n])
3111 break;
3112 kernel_context_close(smoke.contexts[n]);
3113 }
3114
3115 err_batch:
3116 i915_gem_object_put(smoke.batch);
3117 err_free:
3118 kfree(smoke.contexts);
3119
3120 return err;
3121 }
3122
3123 static int nop_virtual_engine(struct intel_gt *gt,
3124 struct intel_engine_cs **siblings,
3125 unsigned int nsibling,
3126 unsigned int nctx,
3127 unsigned int flags)
3128 #define CHAIN BIT(0)
3129 {
3130 IGT_TIMEOUT(end_time);
3131 struct i915_request *request[16] = {};
3132 struct intel_context *ve[16];
3133 unsigned long n, prime, nc;
3134 struct igt_live_test t;
3135 ktime_t times[2] = {};
3136 int err;
3137
3138 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3139
3140 for (n = 0; n < nctx; n++) {
3141 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3142 if (IS_ERR(ve[n])) {
3143 err = PTR_ERR(ve[n]);
3144 nctx = n;
3145 goto out;
3146 }
3147
3148 err = intel_context_pin(ve[n]);
3149 if (err) {
3150 intel_context_put(ve[n]);
3151 nctx = n;
3152 goto out;
3153 }
3154 }
3155
3156 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3157 if (err)
3158 goto out;
3159
3160 for_each_prime_number_from(prime, 1, 8192) {
3161 times[1] = ktime_get_raw();
3162
3163 if (flags & CHAIN) {
3164 for (nc = 0; nc < nctx; nc++) {
3165 for (n = 0; n < prime; n++) {
3166 struct i915_request *rq;
3167
3168 rq = i915_request_create(ve[nc]);
3169 if (IS_ERR(rq)) {
3170 err = PTR_ERR(rq);
3171 goto out;
3172 }
3173
3174 if (request[nc])
3175 i915_request_put(request[nc]);
3176 request[nc] = i915_request_get(rq);
3177 i915_request_add(rq);
3178 }
3179 }
3180 } else {
3181 for (n = 0; n < prime; n++) {
3182 for (nc = 0; nc < nctx; nc++) {
3183 struct i915_request *rq;
3184
3185 rq = i915_request_create(ve[nc]);
3186 if (IS_ERR(rq)) {
3187 err = PTR_ERR(rq);
3188 goto out;
3189 }
3190
3191 if (request[nc])
3192 i915_request_put(request[nc]);
3193 request[nc] = i915_request_get(rq);
3194 i915_request_add(rq);
3195 }
3196 }
3197 }
3198
3199 for (nc = 0; nc < nctx; nc++) {
3200 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3201 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3202 __func__, ve[0]->engine->name,
3203 request[nc]->fence.context,
3204 request[nc]->fence.seqno);
3205
3206 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3207 __func__, ve[0]->engine->name,
3208 request[nc]->fence.context,
3209 request[nc]->fence.seqno);
3210 GEM_TRACE_DUMP();
3211 intel_gt_set_wedged(gt);
3212 break;
3213 }
3214 }
3215
3216 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3217 if (prime == 1)
3218 times[0] = times[1];
3219
3220 for (nc = 0; nc < nctx; nc++) {
3221 i915_request_put(request[nc]);
3222 request[nc] = NULL;
3223 }
3224
3225 if (__igt_timeout(end_time, NULL))
3226 break;
3227 }
3228
3229 err = igt_live_test_end(&t);
3230 if (err)
3231 goto out;
3232
3233 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3234 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3235 prime, div64_u64(ktime_to_ns(times[1]), prime));
3236
3237 out:
3238 if (igt_flush_test(gt->i915))
3239 err = -EIO;
3240
3241 for (nc = 0; nc < nctx; nc++) {
3242 i915_request_put(request[nc]);
3243 intel_context_unpin(ve[nc]);
3244 intel_context_put(ve[nc]);
3245 }
3246 return err;
3247 }
3248
3249 static int live_virtual_engine(void *arg)
3250 {
3251 struct intel_gt *gt = arg;
3252 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3253 struct intel_engine_cs *engine;
3254 enum intel_engine_id id;
3255 unsigned int class, inst;
3256 int err;
3257
3258 if (intel_uc_uses_guc_submission(&gt->uc))
3259 return 0;
3260
3261 for_each_engine(engine, gt, id) {
3262 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3263 if (err) {
3264 pr_err("Failed to wrap engine %s: err=%d\n",
3265 engine->name, err);
3266 return err;
3267 }
3268 }
3269
3270 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3271 int nsibling, n;
3272
3273 nsibling = 0;
3274 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3275 if (!gt->engine_class[class][inst])
3276 continue;
3277
3278 siblings[nsibling++] = gt->engine_class[class][inst];
3279 }
3280 if (nsibling < 2)
3281 continue;
3282
3283 for (n = 1; n <= nsibling + 1; n++) {
3284 err = nop_virtual_engine(gt, siblings, nsibling,
3285 n, 0);
3286 if (err)
3287 return err;
3288 }
3289
3290 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3291 if (err)
3292 return err;
3293 }
3294
3295 return 0;
3296 }
3297
3298 static int mask_virtual_engine(struct intel_gt *gt,
3299 struct intel_engine_cs **siblings,
3300 unsigned int nsibling)
3301 {
3302 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3303 struct intel_context *ve;
3304 struct igt_live_test t;
3305 unsigned int n;
3306 int err;
3307
3308 /*
3309 * Check that by setting the execution mask on a request, we can
3310 * restrict it to our desired engine within the virtual engine.
3311 */
3312
3313 ve = intel_execlists_create_virtual(siblings, nsibling);
3314 if (IS_ERR(ve)) {
3315 err = PTR_ERR(ve);
3316 goto out_close;
3317 }
3318
3319 err = intel_context_pin(ve);
3320 if (err)
3321 goto out_put;
3322
3323 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3324 if (err)
3325 goto out_unpin;
3326
3327 for (n = 0; n < nsibling; n++) {
3328 request[n] = i915_request_create(ve);
3329 if (IS_ERR(request[n])) {
3330 err = PTR_ERR(request[n]);
3331 nsibling = n;
3332 goto out;
3333 }
3334
3335 /* Reverse order as it's more likely to be unnatural */
3336 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3337
3338 i915_request_get(request[n]);
3339 i915_request_add(request[n]);
3340 }
3341
3342 for (n = 0; n < nsibling; n++) {
3343 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3344 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3345 __func__, ve->engine->name,
3346 request[n]->fence.context,
3347 request[n]->fence.seqno);
3348
3349 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3350 __func__, ve->engine->name,
3351 request[n]->fence.context,
3352 request[n]->fence.seqno);
3353 GEM_TRACE_DUMP();
3354 intel_gt_set_wedged(gt);
3355 err = -EIO;
3356 goto out;
3357 }
3358
3359 if (request[n]->engine != siblings[nsibling - n - 1]) {
3360 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3361 request[n]->engine->name,
3362 siblings[nsibling - n - 1]->name);
3363 err = -EINVAL;
3364 goto out;
3365 }
3366 }
3367
3368 err = igt_live_test_end(&t);
3369 out:
3370 if (igt_flush_test(gt->i915))
3371 err = -EIO;
3372
3373 for (n = 0; n < nsibling; n++)
3374 i915_request_put(request[n]);
3375
3376 out_unpin:
3377 intel_context_unpin(ve);
3378 out_put:
3379 intel_context_put(ve);
3380 out_close:
3381 return err;
3382 }
3383
3384 static int live_virtual_mask(void *arg)
3385 {
3386 struct intel_gt *gt = arg;
3387 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3388 unsigned int class, inst;
3389 int err;
3390
3391 if (intel_uc_uses_guc_submission(&gt->uc))
3392 return 0;
3393
3394 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3395 unsigned int nsibling;
3396
3397 nsibling = 0;
3398 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3399 if (!gt->engine_class[class][inst])
3400 break;
3401
3402 siblings[nsibling++] = gt->engine_class[class][inst];
3403 }
3404 if (nsibling < 2)
3405 continue;
3406
3407 err = mask_virtual_engine(gt, siblings, nsibling);
3408 if (err)
3409 return err;
3410 }
3411
3412 return 0;
3413 }
3414
3415 static int preserved_virtual_engine(struct intel_gt *gt,
3416 struct intel_engine_cs **siblings,
3417 unsigned int nsibling)
3418 {
3419 struct i915_request *last = NULL;
3420 struct intel_context *ve;
3421 struct i915_vma *scratch;
3422 struct igt_live_test t;
3423 unsigned int n;
3424 int err = 0;
3425 u32 *cs;
3426
3427 scratch = create_scratch(siblings[0]->gt);
3428 if (IS_ERR(scratch))
3429 return PTR_ERR(scratch);
3430
3431 err = i915_vma_sync(scratch);
3432 if (err)
3433 goto out_scratch;
3434
3435 ve = intel_execlists_create_virtual(siblings, nsibling);
3436 if (IS_ERR(ve)) {
3437 err = PTR_ERR(ve);
3438 goto out_scratch;
3439 }
3440
3441 err = intel_context_pin(ve);
3442 if (err)
3443 goto out_put;
3444
3445 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3446 if (err)
3447 goto out_unpin;
3448
3449 for (n = 0; n < NUM_GPR_DW; n++) {
3450 struct intel_engine_cs *engine = siblings[n % nsibling];
3451 struct i915_request *rq;
3452
3453 rq = i915_request_create(ve);
3454 if (IS_ERR(rq)) {
3455 err = PTR_ERR(rq);
3456 goto out_end;
3457 }
3458
3459 i915_request_put(last);
3460 last = i915_request_get(rq);
3461
3462 cs = intel_ring_begin(rq, 8);
3463 if (IS_ERR(cs)) {
3464 i915_request_add(rq);
3465 err = PTR_ERR(cs);
3466 goto out_end;
3467 }
3468
3469 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3470 *cs++ = CS_GPR(engine, n);
3471 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3472 *cs++ = 0;
3473
3474 *cs++ = MI_LOAD_REGISTER_IMM(1);
3475 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3476 *cs++ = n + 1;
3477
3478 *cs++ = MI_NOOP;
3479 intel_ring_advance(rq, cs);
3480
3481 /* Restrict this request to run on a particular engine */
3482 rq->execution_mask = engine->mask;
3483 i915_request_add(rq);
3484 }
3485
3486 if (i915_request_wait(last, 0, HZ / 5) < 0) {
3487 err = -ETIME;
3488 goto out_end;
3489 }
3490
3491 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3492 if (IS_ERR(cs)) {
3493 err = PTR_ERR(cs);
3494 goto out_end;
3495 }
3496
3497 for (n = 0; n < NUM_GPR_DW; n++) {
3498 if (cs[n] != n) {
3499 pr_err("Incorrect value[%d] found for GPR[%d]\n",
3500 cs[n], n);
3501 err = -EINVAL;
3502 break;
3503 }
3504 }
3505
3506 i915_gem_object_unpin_map(scratch->obj);
3507
3508 out_end:
3509 if (igt_live_test_end(&t))
3510 err = -EIO;
3511 i915_request_put(last);
3512 out_unpin:
3513 intel_context_unpin(ve);
3514 out_put:
3515 intel_context_put(ve);
3516 out_scratch:
3517 i915_vma_unpin_and_release(&scratch, 0);
3518 return err;
3519 }
3520
3521 static int live_virtual_preserved(void *arg)
3522 {
3523 struct intel_gt *gt = arg;
3524 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3525 unsigned int class, inst;
3526
3527 /*
3528 * Check that the context image retains non-privileged (user) registers
3529 * from one engine to the next. For this we check that the CS_GPR
3530 * are preserved.
3531 */
3532
3533 if (intel_uc_uses_guc_submission(&gt->uc))
3534 return 0;
3535
3536 /* As we use CS_GPR we cannot run before they existed on all engines. */
3537 if (INTEL_GEN(gt->i915) < 9)
3538 return 0;
3539
3540 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3541 int nsibling, err;
3542
3543 nsibling = 0;
3544 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3545 if (!gt->engine_class[class][inst])
3546 continue;
3547
3548 siblings[nsibling++] = gt->engine_class[class][inst];
3549 }
3550 if (nsibling < 2)
3551 continue;
3552
3553 err = preserved_virtual_engine(gt, siblings, nsibling);
3554 if (err)
3555 return err;
3556 }
3557
3558 return 0;
3559 }
3560
3561 static int bond_virtual_engine(struct intel_gt *gt,
3562 unsigned int class,
3563 struct intel_engine_cs **siblings,
3564 unsigned int nsibling,
3565 unsigned int flags)
3566 #define BOND_SCHEDULE BIT(0)
3567 {
3568 struct intel_engine_cs *master;
3569 struct i915_request *rq[16];
3570 enum intel_engine_id id;
3571 struct igt_spinner spin;
3572 unsigned long n;
3573 int err;
3574
3575 /*
3576 * A set of bonded requests is intended to be run concurrently
3577 * across a number of engines. We use one request per-engine
3578 * and a magic fence to schedule each of the bonded requests
3579 * at the same time. A consequence of our current scheduler is that
3580 * we only move requests to the HW ready queue when the request
3581 * becomes ready, that is when all of its prerequisite fences have
3582 * been signaled. As one of those fences is the master submit fence,
3583 * there is a delay on all secondary fences as the HW may be
3584 * currently busy. Equally, as all the requests are independent,
3585 * they may have other fences that delay individual request
3586 * submission to HW. Ergo, we do not guarantee that all requests are
3587 * immediately submitted to HW at the same time, just that if the
3588 * rules are abided by, they are ready at the same time as the
3589 * first is submitted. Userspace can embed semaphores in its batch
3590 * to ensure parallel execution of its phases as it requires.
3591 * Though naturally it gets requested that perhaps the scheduler should
3592 * take care of parallel execution, even across preemption events on
3593 * different HW. (The proper answer is of course "lalalala".)
3594 *
3595 * With the submit-fence, we have identified three possible phases
3596 * of synchronisation depending on the master fence: queued (not
3597 * ready), executing, and signaled. The first two are quite simple
3598 * and checked below. However, the signaled master fence handling is
3599 * contentious. Currently we do not distinguish between a signaled
3600 * fence and an expired fence, as once signaled it does not convey
3601 * any information about the previous execution. It may even be freed
3602 * and hence checking later it may not exist at all. Ergo we currently
3603 * do not apply the bonding constraint for an already signaled fence,
3604 * as our expectation is that it should not constrain the secondaries
3605 * and is outside of the scope of the bonded request API (i.e. all
3606 * userspace requests are meant to be running in parallel). As
3607 * it imposes no constraint, and is effectively a no-op, we do not
3608 * check below as normal execution flows are checked extensively above.
3609 *
3610 * XXX Is the degenerate handling of signaled submit fences the
3611 * expected behaviour for userpace?
3612 */
3613
3614 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3615
3616 if (igt_spinner_init(&spin, gt))
3617 return -ENOMEM;
3618
3619 err = 0;
3620 rq[0] = ERR_PTR(-ENOMEM);
3621 for_each_engine(master, gt, id) {
3622 struct i915_sw_fence fence = {};
3623 struct intel_context *ce;
3624
3625 if (master->class == class)
3626 continue;
3627
3628 ce = intel_context_create(master);
3629 if (IS_ERR(ce)) {
3630 err = PTR_ERR(ce);
3631 goto out;
3632 }
3633
3634 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3635
3636 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
3637 intel_context_put(ce);
3638 if (IS_ERR(rq[0])) {
3639 err = PTR_ERR(rq[0]);
3640 goto out;
3641 }
3642 i915_request_get(rq[0]);
3643
3644 if (flags & BOND_SCHEDULE) {
3645 onstack_fence_init(&fence);
3646 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3647 &fence,
3648 GFP_KERNEL);
3649 }
3650
3651 i915_request_add(rq[0]);
3652 if (err < 0)
3653 goto out;
3654
3655 if (!(flags & BOND_SCHEDULE) &&
3656 !igt_wait_for_spinner(&spin, rq[0])) {
3657 err = -EIO;
3658 goto out;
3659 }
3660
3661 for (n = 0; n < nsibling; n++) {
3662 struct intel_context *ve;
3663
3664 ve = intel_execlists_create_virtual(siblings, nsibling);
3665 if (IS_ERR(ve)) {
3666 err = PTR_ERR(ve);
3667 onstack_fence_fini(&fence);
3668 goto out;
3669 }
3670
3671 err = intel_virtual_engine_attach_bond(ve->engine,
3672 master,
3673 siblings[n]);
3674 if (err) {
3675 intel_context_put(ve);
3676 onstack_fence_fini(&fence);
3677 goto out;
3678 }
3679
3680 err = intel_context_pin(ve);
3681 intel_context_put(ve);
3682 if (err) {
3683 onstack_fence_fini(&fence);
3684 goto out;
3685 }
3686
3687 rq[n + 1] = i915_request_create(ve);
3688 intel_context_unpin(ve);
3689 if (IS_ERR(rq[n + 1])) {
3690 err = PTR_ERR(rq[n + 1]);
3691 onstack_fence_fini(&fence);
3692 goto out;
3693 }
3694 i915_request_get(rq[n + 1]);
3695
3696 err = i915_request_await_execution(rq[n + 1],
3697 &rq[0]->fence,
3698 ve->engine->bond_execute);
3699 i915_request_add(rq[n + 1]);
3700 if (err < 0) {
3701 onstack_fence_fini(&fence);
3702 goto out;
3703 }
3704 }
3705 onstack_fence_fini(&fence);
3706 intel_engine_flush_submission(master);
3707 igt_spinner_end(&spin);
3708
3709 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3710 pr_err("Master request did not execute (on %s)!\n",
3711 rq[0]->engine->name);
3712 err = -EIO;
3713 goto out;
3714 }
3715
3716 for (n = 0; n < nsibling; n++) {
3717 if (i915_request_wait(rq[n + 1], 0,
3718 MAX_SCHEDULE_TIMEOUT) < 0) {
3719 err = -EIO;
3720 goto out;
3721 }
3722
3723 if (rq[n + 1]->engine != siblings[n]) {
3724 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3725 siblings[n]->name,
3726 rq[n + 1]->engine->name,
3727 rq[0]->engine->name);
3728 err = -EINVAL;
3729 goto out;
3730 }
3731 }
3732
3733 for (n = 0; !IS_ERR(rq[n]); n++)
3734 i915_request_put(rq[n]);
3735 rq[0] = ERR_PTR(-ENOMEM);
3736 }
3737
3738 out:
3739 for (n = 0; !IS_ERR(rq[n]); n++)
3740 i915_request_put(rq[n]);
3741 if (igt_flush_test(gt->i915))
3742 err = -EIO;
3743
3744 igt_spinner_fini(&spin);
3745 return err;
3746 }
3747
3748 static int live_virtual_bond(void *arg)
3749 {
3750 static const struct phase {
3751 const char *name;
3752 unsigned int flags;
3753 } phases[] = {
3754 { "", 0 },
3755 { "schedule", BOND_SCHEDULE },
3756 { },
3757 };
3758 struct intel_gt *gt = arg;
3759 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3760 unsigned int class, inst;
3761 int err;
3762
3763 if (intel_uc_uses_guc_submission(&gt->uc))
3764 return 0;
3765
3766 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3767 const struct phase *p;
3768 int nsibling;
3769
3770 nsibling = 0;
3771 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3772 if (!gt->engine_class[class][inst])
3773 break;
3774
3775 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3776 siblings[nsibling++] = gt->engine_class[class][inst];
3777 }
3778 if (nsibling < 2)
3779 continue;
3780
3781 for (p = phases; p->name; p++) {
3782 err = bond_virtual_engine(gt,
3783 class, siblings, nsibling,
3784 p->flags);
3785 if (err) {
3786 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3787 __func__, p->name, class, nsibling, err);
3788 return err;
3789 }
3790 }
3791 }
3792
3793 return 0;
3794 }
3795
3796 static int reset_virtual_engine(struct intel_gt *gt,
3797 struct intel_engine_cs **siblings,
3798 unsigned int nsibling)
3799 {
3800 struct intel_engine_cs *engine;
3801 struct intel_context *ve;
3802 unsigned long *heartbeat;
3803 struct igt_spinner spin;
3804 struct i915_request *rq;
3805 unsigned int n;
3806 int err = 0;
3807
3808 /*
3809 * In order to support offline error capture for fast preempt reset,
3810 * we need to decouple the guilty request and ensure that it and its
3811 * descendents are not executed while the capture is in progress.
3812 */
3813
3814 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
3815 if (!heartbeat)
3816 return -ENOMEM;
3817
3818 if (igt_spinner_init(&spin, gt)) {
3819 err = -ENOMEM;
3820 goto out_free;
3821 }
3822
3823 ve = intel_execlists_create_virtual(siblings, nsibling);
3824 if (IS_ERR(ve)) {
3825 err = PTR_ERR(ve);
3826 goto out_spin;
3827 }
3828
3829 for (n = 0; n < nsibling; n++)
3830 engine_heartbeat_disable(siblings[n], &heartbeat[n]);
3831
3832 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
3833 if (IS_ERR(rq)) {
3834 err = PTR_ERR(rq);
3835 goto out_heartbeat;
3836 }
3837 i915_request_add(rq);
3838
3839 if (!igt_wait_for_spinner(&spin, rq)) {
3840 intel_gt_set_wedged(gt);
3841 err = -ETIME;
3842 goto out_heartbeat;
3843 }
3844
3845 engine = rq->engine;
3846 GEM_BUG_ON(engine == ve->engine);
3847
3848 /* Take ownership of the reset and tasklet */
3849 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
3850 &gt->reset.flags)) {
3851 intel_gt_set_wedged(gt);
3852 err = -EBUSY;
3853 goto out_heartbeat;
3854 }
3855 tasklet_disable(&engine->execlists.tasklet);
3856
3857 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
3858 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
3859
3860 /* Fake a preemption event; failed of course */
3861 spin_lock_irq(&engine->active.lock);
3862 __unwind_incomplete_requests(engine);
3863 spin_unlock_irq(&engine->active.lock);
3864 GEM_BUG_ON(rq->engine != ve->engine);
3865
3866 /* Reset the engine while keeping our active request on hold */
3867 execlists_hold(engine, rq);
3868 GEM_BUG_ON(!i915_request_on_hold(rq));
3869
3870 intel_engine_reset(engine, NULL);
3871 GEM_BUG_ON(rq->fence.error != -EIO);
3872
3873 /* Release our grasp on the engine, letting CS flow again */
3874 tasklet_enable(&engine->execlists.tasklet);
3875 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
3876
3877 /* Check that we do not resubmit the held request */
3878 i915_request_get(rq);
3879 if (!i915_request_wait(rq, 0, HZ / 5)) {
3880 pr_err("%s: on hold request completed!\n",
3881 engine->name);
3882 intel_gt_set_wedged(gt);
3883 err = -EIO;
3884 goto out_rq;
3885 }
3886 GEM_BUG_ON(!i915_request_on_hold(rq));
3887
3888 /* But is resubmitted on release */
3889 execlists_unhold(engine, rq);
3890 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3891 pr_err("%s: held request did not complete!\n",
3892 engine->name);
3893 intel_gt_set_wedged(gt);
3894 err = -ETIME;
3895 }
3896
3897 out_rq:
3898 i915_request_put(rq);
3899 out_heartbeat:
3900 for (n = 0; n < nsibling; n++)
3901 engine_heartbeat_enable(siblings[n], heartbeat[n]);
3902
3903 intel_context_put(ve);
3904 out_spin:
3905 igt_spinner_fini(&spin);
3906 out_free:
3907 kfree(heartbeat);
3908 return err;
3909 }
3910
3911 static int live_virtual_reset(void *arg)
3912 {
3913 struct intel_gt *gt = arg;
3914 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3915 unsigned int class, inst;
3916
3917 /*
3918 * Check that we handle a reset event within a virtual engine.
3919 * Only the physical engine is reset, but we have to check the flow
3920 * of the virtual requests around the reset, and make sure it is not
3921 * forgotten.
3922 */
3923
3924 if (intel_uc_uses_guc_submission(&gt->uc))
3925 return 0;
3926
3927 if (!intel_has_reset_engine(gt))
3928 return 0;
3929
3930 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3931 int nsibling, err;
3932
3933 nsibling = 0;
3934 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3935 if (!gt->engine_class[class][inst])
3936 continue;
3937
3938 siblings[nsibling++] = gt->engine_class[class][inst];
3939 }
3940 if (nsibling < 2)
3941 continue;
3942
3943 err = reset_virtual_engine(gt, siblings, nsibling);
3944 if (err)
3945 return err;
3946 }
3947
3948 return 0;
3949 }
3950
3951 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3952 {
3953 static const struct i915_subtest tests[] = {
3954 SUBTEST(live_sanitycheck),
3955 SUBTEST(live_unlite_switch),
3956 SUBTEST(live_unlite_preempt),
3957 SUBTEST(live_pin_rewind),
3958 SUBTEST(live_hold_reset),
3959 SUBTEST(live_error_interrupt),
3960 SUBTEST(live_timeslice_preempt),
3961 SUBTEST(live_timeslice_rewind),
3962 SUBTEST(live_timeslice_queue),
3963 SUBTEST(live_busywait_preempt),
3964 SUBTEST(live_preempt),
3965 SUBTEST(live_late_preempt),
3966 SUBTEST(live_nopreempt),
3967 SUBTEST(live_preempt_cancel),
3968 SUBTEST(live_suppress_self_preempt),
3969 SUBTEST(live_suppress_wait_preempt),
3970 SUBTEST(live_chain_preempt),
3971 SUBTEST(live_preempt_gang),
3972 SUBTEST(live_preempt_timeout),
3973 SUBTEST(live_preempt_smoke),
3974 SUBTEST(live_virtual_engine),
3975 SUBTEST(live_virtual_mask),
3976 SUBTEST(live_virtual_preserved),
3977 SUBTEST(live_virtual_bond),
3978 SUBTEST(live_virtual_reset),
3979 };
3980
3981 if (!HAS_EXECLISTS(i915))
3982 return 0;
3983
3984 if (intel_gt_is_wedged(&i915->gt))
3985 return 0;
3986
3987 return intel_gt_live_subtests(tests, &i915->gt);
3988 }
3989
3990 static void hexdump(const void *buf, size_t len)
3991 {
3992 const size_t rowsize = 8 * sizeof(u32);
3993 const void *prev = NULL;
3994 bool skip = false;
3995 size_t pos;
3996
3997 for (pos = 0; pos < len; pos += rowsize) {
3998 char line[128];
3999
4000 if (prev && !memcmp(prev, buf + pos, rowsize)) {
4001 if (!skip) {
4002 pr_info("*\n");
4003 skip = true;
4004 }
4005 continue;
4006 }
4007
4008 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
4009 rowsize, sizeof(u32),
4010 line, sizeof(line),
4011 false) >= sizeof(line));
4012 pr_info("[%04zx] %s\n", pos, line);
4013
4014 prev = buf + pos;
4015 skip = false;
4016 }
4017 }
4018
4019 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4020 {
4021 const u32 offset =
4022 i915_ggtt_offset(ce->engine->status_page.vma) +
4023 offset_in_page(slot);
4024 struct i915_request *rq;
4025 u32 *cs;
4026
4027 rq = intel_context_create_request(ce);
4028 if (IS_ERR(rq))
4029 return PTR_ERR(rq);
4030
4031 cs = intel_ring_begin(rq, 4);
4032 if (IS_ERR(cs)) {
4033 i915_request_add(rq);
4034 return PTR_ERR(cs);
4035 }
4036
4037 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4038 *cs++ = offset;
4039 *cs++ = 0;
4040 *cs++ = 1;
4041
4042 intel_ring_advance(rq, cs);
4043
4044 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4045 i915_request_add(rq);
4046 return 0;
4047 }
4048
4049 static int context_flush(struct intel_context *ce, long timeout)
4050 {
4051 struct i915_request *rq;
4052 struct dma_fence *fence;
4053 int err = 0;
4054
4055 rq = intel_engine_create_kernel_request(ce->engine);
4056 if (IS_ERR(rq))
4057 return PTR_ERR(rq);
4058
4059 fence = i915_active_fence_get(&ce->timeline->last_request);
4060 if (fence) {
4061 i915_request_await_dma_fence(rq, fence);
4062 dma_fence_put(fence);
4063 }
4064
4065 rq = i915_request_get(rq);
4066 i915_request_add(rq);
4067 if (i915_request_wait(rq, 0, timeout) < 0)
4068 err = -ETIME;
4069 i915_request_put(rq);
4070
4071 rmb(); /* We know the request is written, make sure all state is too! */
4072 return err;
4073 }
4074
4075 static int live_lrc_layout(void *arg)
4076 {
4077 struct intel_gt *gt = arg;
4078 struct intel_engine_cs *engine;
4079 enum intel_engine_id id;
4080 u32 *lrc;
4081 int err;
4082
4083 /*
4084 * Check the registers offsets we use to create the initial reg state
4085 * match the layout saved by HW.
4086 */
4087
4088 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4089 if (!lrc)
4090 return -ENOMEM;
4091
4092 err = 0;
4093 for_each_engine(engine, gt, id) {
4094 u32 *hw;
4095 int dw;
4096
4097 if (!engine->default_state)
4098 continue;
4099
4100 hw = i915_gem_object_pin_map(engine->default_state,
4101 I915_MAP_WB);
4102 if (IS_ERR(hw)) {
4103 err = PTR_ERR(hw);
4104 break;
4105 }
4106 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4107
4108 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4109 engine->kernel_context,
4110 engine,
4111 engine->kernel_context->ring,
4112 true);
4113
4114 dw = 0;
4115 do {
4116 u32 lri = hw[dw];
4117
4118 if (lri == 0) {
4119 dw++;
4120 continue;
4121 }
4122
4123 if (lrc[dw] == 0) {
4124 pr_debug("%s: skipped instruction %x at dword %d\n",
4125 engine->name, lri, dw);
4126 dw++;
4127 continue;
4128 }
4129
4130 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4131 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4132 engine->name, dw, lri);
4133 err = -EINVAL;
4134 break;
4135 }
4136
4137 if (lrc[dw] != lri) {
4138 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4139 engine->name, dw, lri, lrc[dw]);
4140 err = -EINVAL;
4141 break;
4142 }
4143
4144 lri &= 0x7f;
4145 lri++;
4146 dw++;
4147
4148 while (lri) {
4149 if (hw[dw] != lrc[dw]) {
4150 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4151 engine->name, dw, hw[dw], lrc[dw]);
4152 err = -EINVAL;
4153 break;
4154 }
4155
4156 /*
4157 * Skip over the actual register value as we
4158 * expect that to differ.
4159 */
4160 dw += 2;
4161 lri -= 2;
4162 }
4163 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4164
4165 if (err) {
4166 pr_info("%s: HW register image:\n", engine->name);
4167 hexdump(hw, PAGE_SIZE);
4168
4169 pr_info("%s: SW register image:\n", engine->name);
4170 hexdump(lrc, PAGE_SIZE);
4171 }
4172
4173 i915_gem_object_unpin_map(engine->default_state);
4174 if (err)
4175 break;
4176 }
4177
4178 kfree(lrc);
4179 return err;
4180 }
4181
4182 static int find_offset(const u32 *lri, u32 offset)
4183 {
4184 int i;
4185
4186 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4187 if (lri[i] == offset)
4188 return i;
4189
4190 return -1;
4191 }
4192
4193 static int live_lrc_fixed(void *arg)
4194 {
4195 struct intel_gt *gt = arg;
4196 struct intel_engine_cs *engine;
4197 enum intel_engine_id id;
4198 int err = 0;
4199
4200 /*
4201 * Check the assumed register offsets match the actual locations in
4202 * the context image.
4203 */
4204
4205 for_each_engine(engine, gt, id) {
4206 const struct {
4207 u32 reg;
4208 u32 offset;
4209 const char *name;
4210 } tbl[] = {
4211 {
4212 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4213 CTX_RING_START - 1,
4214 "RING_START"
4215 },
4216 {
4217 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4218 CTX_RING_CTL - 1,
4219 "RING_CTL"
4220 },
4221 {
4222 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4223 CTX_RING_HEAD - 1,
4224 "RING_HEAD"
4225 },
4226 {
4227 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4228 CTX_RING_TAIL - 1,
4229 "RING_TAIL"
4230 },
4231 {
4232 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4233 lrc_ring_mi_mode(engine),
4234 "RING_MI_MODE"
4235 },
4236 {
4237 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4238 CTX_BB_STATE - 1,
4239 "BB_STATE"
4240 },
4241 {
4242 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4243 CTX_TIMESTAMP - 1,
4244 "RING_CTX_TIMESTAMP"
4245 },
4246 { },
4247 }, *t;
4248 u32 *hw;
4249
4250 if (!engine->default_state)
4251 continue;
4252
4253 hw = i915_gem_object_pin_map(engine->default_state,
4254 I915_MAP_WB);
4255 if (IS_ERR(hw)) {
4256 err = PTR_ERR(hw);
4257 break;
4258 }
4259 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4260
4261 for (t = tbl; t->name; t++) {
4262 int dw = find_offset(hw, t->reg);
4263
4264 if (dw != t->offset) {
4265 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4266 engine->name,
4267 t->name,
4268 t->reg,
4269 dw,
4270 t->offset);
4271 err = -EINVAL;
4272 }
4273 }
4274
4275 i915_gem_object_unpin_map(engine->default_state);
4276 }
4277
4278 return err;
4279 }
4280
4281 static int __live_lrc_state(struct intel_engine_cs *engine,
4282 struct i915_vma *scratch)
4283 {
4284 struct intel_context *ce;
4285 struct i915_request *rq;
4286 enum {
4287 RING_START_IDX = 0,
4288 RING_TAIL_IDX,
4289 MAX_IDX
4290 };
4291 u32 expected[MAX_IDX];
4292 u32 *cs;
4293 int err;
4294 int n;
4295
4296 ce = intel_context_create(engine);
4297 if (IS_ERR(ce))
4298 return PTR_ERR(ce);
4299
4300 err = intel_context_pin(ce);
4301 if (err)
4302 goto err_put;
4303
4304 rq = i915_request_create(ce);
4305 if (IS_ERR(rq)) {
4306 err = PTR_ERR(rq);
4307 goto err_unpin;
4308 }
4309
4310 cs = intel_ring_begin(rq, 4 * MAX_IDX);
4311 if (IS_ERR(cs)) {
4312 err = PTR_ERR(cs);
4313 i915_request_add(rq);
4314 goto err_unpin;
4315 }
4316
4317 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4318 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
4319 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
4320 *cs++ = 0;
4321
4322 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
4323
4324 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4325 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
4326 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
4327 *cs++ = 0;
4328
4329 i915_vma_lock(scratch);
4330 err = i915_request_await_object(rq, scratch->obj, true);
4331 if (!err)
4332 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4333 i915_vma_unlock(scratch);
4334
4335 i915_request_get(rq);
4336 i915_request_add(rq);
4337 if (err)
4338 goto err_rq;
4339
4340 intel_engine_flush_submission(engine);
4341 expected[RING_TAIL_IDX] = ce->ring->tail;
4342
4343 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4344 err = -ETIME;
4345 goto err_rq;
4346 }
4347
4348 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4349 if (IS_ERR(cs)) {
4350 err = PTR_ERR(cs);
4351 goto err_rq;
4352 }
4353
4354 for (n = 0; n < MAX_IDX; n++) {
4355 if (cs[n] != expected[n]) {
4356 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4357 engine->name, n, cs[n], expected[n]);
4358 err = -EINVAL;
4359 break;
4360 }
4361 }
4362
4363 i915_gem_object_unpin_map(scratch->obj);
4364
4365 err_rq:
4366 i915_request_put(rq);
4367 err_unpin:
4368 intel_context_unpin(ce);
4369 err_put:
4370 intel_context_put(ce);
4371 return err;
4372 }
4373
4374 static int live_lrc_state(void *arg)
4375 {
4376 struct intel_gt *gt = arg;
4377 struct intel_engine_cs *engine;
4378 struct i915_vma *scratch;
4379 enum intel_engine_id id;
4380 int err = 0;
4381
4382 /*
4383 * Check the live register state matches what we expect for this
4384 * intel_context.
4385 */
4386
4387 scratch = create_scratch(gt);
4388 if (IS_ERR(scratch))
4389 return PTR_ERR(scratch);
4390
4391 for_each_engine(engine, gt, id) {
4392 err = __live_lrc_state(engine, scratch);
4393 if (err)
4394 break;
4395 }
4396
4397 if (igt_flush_test(gt->i915))
4398 err = -EIO;
4399
4400 i915_vma_unpin_and_release(&scratch, 0);
4401 return err;
4402 }
4403
4404 static int gpr_make_dirty(struct intel_context *ce)
4405 {
4406 struct i915_request *rq;
4407 u32 *cs;
4408 int n;
4409
4410 rq = intel_context_create_request(ce);
4411 if (IS_ERR(rq))
4412 return PTR_ERR(rq);
4413
4414 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
4415 if (IS_ERR(cs)) {
4416 i915_request_add(rq);
4417 return PTR_ERR(cs);
4418 }
4419
4420 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
4421 for (n = 0; n < NUM_GPR_DW; n++) {
4422 *cs++ = CS_GPR(ce->engine, n);
4423 *cs++ = STACK_MAGIC;
4424 }
4425 *cs++ = MI_NOOP;
4426
4427 intel_ring_advance(rq, cs);
4428
4429 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4430 i915_request_add(rq);
4431
4432 return 0;
4433 }
4434
4435 static struct i915_request *
4436 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
4437 {
4438 const u32 offset =
4439 i915_ggtt_offset(ce->engine->status_page.vma) +
4440 offset_in_page(slot);
4441 struct i915_request *rq;
4442 u32 *cs;
4443 int err;
4444 int n;
4445
4446 rq = intel_context_create_request(ce);
4447 if (IS_ERR(rq))
4448 return rq;
4449
4450 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
4451 if (IS_ERR(cs)) {
4452 i915_request_add(rq);
4453 return ERR_CAST(cs);
4454 }
4455
4456 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4457 *cs++ = MI_NOOP;
4458
4459 *cs++ = MI_SEMAPHORE_WAIT |
4460 MI_SEMAPHORE_GLOBAL_GTT |
4461 MI_SEMAPHORE_POLL |
4462 MI_SEMAPHORE_SAD_NEQ_SDD;
4463 *cs++ = 0;
4464 *cs++ = offset;
4465 *cs++ = 0;
4466
4467 for (n = 0; n < NUM_GPR_DW; n++) {
4468 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4469 *cs++ = CS_GPR(ce->engine, n);
4470 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4471 *cs++ = 0;
4472 }
4473
4474 i915_vma_lock(scratch);
4475 err = i915_request_await_object(rq, scratch->obj, true);
4476 if (!err)
4477 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4478 i915_vma_unlock(scratch);
4479
4480 i915_request_get(rq);
4481 i915_request_add(rq);
4482 if (err) {
4483 i915_request_put(rq);
4484 rq = ERR_PTR(err);
4485 }
4486
4487 return rq;
4488 }
4489
4490 static int __live_lrc_gpr(struct intel_engine_cs *engine,
4491 struct i915_vma *scratch,
4492 bool preempt)
4493 {
4494 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
4495 struct intel_context *ce;
4496 struct i915_request *rq;
4497 u32 *cs;
4498 int err;
4499 int n;
4500
4501 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
4502 return 0; /* GPR only on rcs0 for gen8 */
4503
4504 err = gpr_make_dirty(engine->kernel_context);
4505 if (err)
4506 return err;
4507
4508 ce = intel_context_create(engine);
4509 if (IS_ERR(ce))
4510 return PTR_ERR(ce);
4511
4512 rq = __gpr_read(ce, scratch, slot);
4513 if (IS_ERR(rq)) {
4514 err = PTR_ERR(rq);
4515 goto err_put;
4516 }
4517
4518 err = wait_for_submit(engine, rq, HZ / 2);
4519 if (err)
4520 goto err_rq;
4521
4522 if (preempt) {
4523 err = gpr_make_dirty(engine->kernel_context);
4524 if (err)
4525 goto err_rq;
4526
4527 err = emit_semaphore_signal(engine->kernel_context, slot);
4528 if (err)
4529 goto err_rq;
4530 } else {
4531 slot[0] = 1;
4532 wmb();
4533 }
4534
4535 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4536 err = -ETIME;
4537 goto err_rq;
4538 }
4539
4540 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4541 if (IS_ERR(cs)) {
4542 err = PTR_ERR(cs);
4543 goto err_rq;
4544 }
4545
4546 for (n = 0; n < NUM_GPR_DW; n++) {
4547 if (cs[n]) {
4548 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4549 engine->name,
4550 n / 2, n & 1 ? "udw" : "ldw",
4551 cs[n]);
4552 err = -EINVAL;
4553 break;
4554 }
4555 }
4556
4557 i915_gem_object_unpin_map(scratch->obj);
4558
4559 err_rq:
4560 memset32(&slot[0], -1, 4);
4561 wmb();
4562 i915_request_put(rq);
4563 err_put:
4564 intel_context_put(ce);
4565 return err;
4566 }
4567
4568 static int live_lrc_gpr(void *arg)
4569 {
4570 struct intel_gt *gt = arg;
4571 struct intel_engine_cs *engine;
4572 struct i915_vma *scratch;
4573 enum intel_engine_id id;
4574 int err = 0;
4575
4576 /*
4577 * Check that GPR registers are cleared in new contexts as we need
4578 * to avoid leaking any information from previous contexts.
4579 */
4580
4581 scratch = create_scratch(gt);
4582 if (IS_ERR(scratch))
4583 return PTR_ERR(scratch);
4584
4585 for_each_engine(engine, gt, id) {
4586 unsigned long heartbeat;
4587
4588 engine_heartbeat_disable(engine, &heartbeat);
4589
4590 err = __live_lrc_gpr(engine, scratch, false);
4591 if (err)
4592 goto err;
4593
4594 err = __live_lrc_gpr(engine, scratch, true);
4595 if (err)
4596 goto err;
4597
4598 err:
4599 engine_heartbeat_enable(engine, heartbeat);
4600 if (igt_flush_test(gt->i915))
4601 err = -EIO;
4602 if (err)
4603 break;
4604 }
4605
4606 i915_vma_unpin_and_release(&scratch, 0);
4607 return err;
4608 }
4609
4610 static struct i915_request *
4611 create_timestamp(struct intel_context *ce, void *slot, int idx)
4612 {
4613 const u32 offset =
4614 i915_ggtt_offset(ce->engine->status_page.vma) +
4615 offset_in_page(slot);
4616 struct i915_request *rq;
4617 u32 *cs;
4618 int err;
4619
4620 rq = intel_context_create_request(ce);
4621 if (IS_ERR(rq))
4622 return rq;
4623
4624 cs = intel_ring_begin(rq, 10);
4625 if (IS_ERR(cs)) {
4626 err = PTR_ERR(cs);
4627 goto err;
4628 }
4629
4630 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4631 *cs++ = MI_NOOP;
4632
4633 *cs++ = MI_SEMAPHORE_WAIT |
4634 MI_SEMAPHORE_GLOBAL_GTT |
4635 MI_SEMAPHORE_POLL |
4636 MI_SEMAPHORE_SAD_NEQ_SDD;
4637 *cs++ = 0;
4638 *cs++ = offset;
4639 *cs++ = 0;
4640
4641 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4642 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
4643 *cs++ = offset + idx * sizeof(u32);
4644 *cs++ = 0;
4645
4646 intel_ring_advance(rq, cs);
4647
4648 rq->sched.attr.priority = I915_PRIORITY_MASK;
4649 err = 0;
4650 err:
4651 i915_request_get(rq);
4652 i915_request_add(rq);
4653 if (err) {
4654 i915_request_put(rq);
4655 return ERR_PTR(err);
4656 }
4657
4658 return rq;
4659 }
4660
4661 struct lrc_timestamp {
4662 struct intel_engine_cs *engine;
4663 struct intel_context *ce[2];
4664 u32 poison;
4665 };
4666
4667 static bool timestamp_advanced(u32 start, u32 end)
4668 {
4669 return (s32)(end - start) > 0;
4670 }
4671
4672 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
4673 {
4674 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
4675 struct i915_request *rq;
4676 u32 timestamp;
4677 int err = 0;
4678
4679 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
4680 rq = create_timestamp(arg->ce[0], slot, 1);
4681 if (IS_ERR(rq))
4682 return PTR_ERR(rq);
4683
4684 err = wait_for_submit(rq->engine, rq, HZ / 2);
4685 if (err)
4686 goto err;
4687
4688 if (preempt) {
4689 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
4690 err = emit_semaphore_signal(arg->ce[1], slot);
4691 if (err)
4692 goto err;
4693 } else {
4694 slot[0] = 1;
4695 wmb();
4696 }
4697
4698 /* And wait for switch to kernel (to save our context to memory) */
4699 err = context_flush(arg->ce[0], HZ / 2);
4700 if (err)
4701 goto err;
4702
4703 if (!timestamp_advanced(arg->poison, slot[1])) {
4704 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
4705 arg->engine->name, preempt ? "preempt" : "simple",
4706 arg->poison, slot[1]);
4707 err = -EINVAL;
4708 }
4709
4710 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
4711 if (!timestamp_advanced(slot[1], timestamp)) {
4712 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
4713 arg->engine->name, preempt ? "preempt" : "simple",
4714 slot[1], timestamp);
4715 err = -EINVAL;
4716 }
4717
4718 err:
4719 memset32(slot, -1, 4);
4720 i915_request_put(rq);
4721 return err;
4722 }
4723
4724 static int live_lrc_timestamp(void *arg)
4725 {
4726 struct lrc_timestamp data = {};
4727 struct intel_gt *gt = arg;
4728 enum intel_engine_id id;
4729 const u32 poison[] = {
4730 0,
4731 S32_MAX,
4732 (u32)S32_MAX + 1,
4733 U32_MAX,
4734 };
4735
4736 /*
4737 * We want to verify that the timestamp is saved and restore across
4738 * context switches and is monotonic.
4739 *
4740 * So we do this with a little bit of LRC poisoning to check various
4741 * boundary conditions, and see what happens if we preempt the context
4742 * with a second request (carrying more poison into the timestamp).
4743 */
4744
4745 for_each_engine(data.engine, gt, id) {
4746 unsigned long heartbeat;
4747 int i, err = 0;
4748
4749 engine_heartbeat_disable(data.engine, &heartbeat);
4750
4751 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
4752 struct intel_context *tmp;
4753
4754 tmp = intel_context_create(data.engine);
4755 if (IS_ERR(tmp)) {
4756 err = PTR_ERR(tmp);
4757 goto err;
4758 }
4759
4760 err = intel_context_pin(tmp);
4761 if (err) {
4762 intel_context_put(tmp);
4763 goto err;
4764 }
4765
4766 data.ce[i] = tmp;
4767 }
4768
4769 for (i = 0; i < ARRAY_SIZE(poison); i++) {
4770 data.poison = poison[i];
4771
4772 err = __lrc_timestamp(&data, false);
4773 if (err)
4774 break;
4775
4776 err = __lrc_timestamp(&data, true);
4777 if (err)
4778 break;
4779 }
4780
4781 err:
4782 engine_heartbeat_enable(data.engine, heartbeat);
4783 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
4784 if (!data.ce[i])
4785 break;
4786
4787 intel_context_unpin(data.ce[i]);
4788 intel_context_put(data.ce[i]);
4789 }
4790
4791 if (igt_flush_test(gt->i915))
4792 err = -EIO;
4793 if (err)
4794 return err;
4795 }
4796
4797 return 0;
4798 }
4799
4800 static struct i915_vma *
4801 create_user_vma(struct i915_address_space *vm, unsigned long size)
4802 {
4803 struct drm_i915_gem_object *obj;
4804 struct i915_vma *vma;
4805 int err;
4806
4807 obj = i915_gem_object_create_internal(vm->i915, size);
4808 if (IS_ERR(obj))
4809 return ERR_CAST(obj);
4810
4811 vma = i915_vma_instance(obj, vm, NULL);
4812 if (IS_ERR(vma)) {
4813 i915_gem_object_put(obj);
4814 return vma;
4815 }
4816
4817 err = i915_vma_pin(vma, 0, 0, PIN_USER);
4818 if (err) {
4819 i915_gem_object_put(obj);
4820 return ERR_PTR(err);
4821 }
4822
4823 return vma;
4824 }
4825
4826 static struct i915_vma *
4827 store_context(struct intel_context *ce, struct i915_vma *scratch)
4828 {
4829 struct i915_vma *batch;
4830 u32 dw, x, *cs, *hw;
4831
4832 batch = create_user_vma(ce->vm, SZ_64K);
4833 if (IS_ERR(batch))
4834 return batch;
4835
4836 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
4837 if (IS_ERR(cs)) {
4838 i915_vma_put(batch);
4839 return ERR_CAST(cs);
4840 }
4841
4842 x = 0;
4843 dw = 0;
4844 hw = ce->engine->pinned_default_state;
4845 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4846 do {
4847 u32 len = hw[dw] & 0x7f;
4848
4849 if (hw[dw] == 0) {
4850 dw++;
4851 continue;
4852 }
4853
4854 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4855 dw += len + 2;
4856 continue;
4857 }
4858
4859 dw++;
4860 len = (len + 1) / 2;
4861 while (len--) {
4862 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
4863 *cs++ = hw[dw];
4864 *cs++ = lower_32_bits(scratch->node.start + x);
4865 *cs++ = upper_32_bits(scratch->node.start + x);
4866
4867 dw += 2;
4868 x += 4;
4869 }
4870 } while (dw < PAGE_SIZE / sizeof(u32) &&
4871 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4872
4873 *cs++ = MI_BATCH_BUFFER_END;
4874
4875 i915_gem_object_flush_map(batch->obj);
4876 i915_gem_object_unpin_map(batch->obj);
4877
4878 return batch;
4879 }
4880
4881 static int move_to_active(struct i915_request *rq,
4882 struct i915_vma *vma,
4883 unsigned int flags)
4884 {
4885 int err;
4886
4887 i915_vma_lock(vma);
4888 err = i915_request_await_object(rq, vma->obj, flags);
4889 if (!err)
4890 err = i915_vma_move_to_active(vma, rq, flags);
4891 i915_vma_unlock(vma);
4892
4893 return err;
4894 }
4895
4896 static struct i915_request *
4897 record_registers(struct intel_context *ce,
4898 struct i915_vma *before,
4899 struct i915_vma *after,
4900 u32 *sema)
4901 {
4902 struct i915_vma *b_before, *b_after;
4903 struct i915_request *rq;
4904 u32 *cs;
4905 int err;
4906
4907 b_before = store_context(ce, before);
4908 if (IS_ERR(b_before))
4909 return ERR_CAST(b_before);
4910
4911 b_after = store_context(ce, after);
4912 if (IS_ERR(b_after)) {
4913 rq = ERR_CAST(b_after);
4914 goto err_before;
4915 }
4916
4917 rq = intel_context_create_request(ce);
4918 if (IS_ERR(rq))
4919 goto err_after;
4920
4921 err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
4922 if (err)
4923 goto err_rq;
4924
4925 err = move_to_active(rq, b_before, 0);
4926 if (err)
4927 goto err_rq;
4928
4929 err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
4930 if (err)
4931 goto err_rq;
4932
4933 err = move_to_active(rq, b_after, 0);
4934 if (err)
4935 goto err_rq;
4936
4937 cs = intel_ring_begin(rq, 14);
4938 if (IS_ERR(cs)) {
4939 err = PTR_ERR(cs);
4940 goto err_rq;
4941 }
4942
4943 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4944 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
4945 *cs++ = lower_32_bits(b_before->node.start);
4946 *cs++ = upper_32_bits(b_before->node.start);
4947
4948 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4949 *cs++ = MI_SEMAPHORE_WAIT |
4950 MI_SEMAPHORE_GLOBAL_GTT |
4951 MI_SEMAPHORE_POLL |
4952 MI_SEMAPHORE_SAD_NEQ_SDD;
4953 *cs++ = 0;
4954 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
4955 offset_in_page(sema);
4956 *cs++ = 0;
4957 *cs++ = MI_NOOP;
4958
4959 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4960 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
4961 *cs++ = lower_32_bits(b_after->node.start);
4962 *cs++ = upper_32_bits(b_after->node.start);
4963
4964 intel_ring_advance(rq, cs);
4965
4966 WRITE_ONCE(*sema, 0);
4967 i915_request_get(rq);
4968 i915_request_add(rq);
4969 err_after:
4970 i915_vma_put(b_after);
4971 err_before:
4972 i915_vma_put(b_before);
4973 return rq;
4974
4975 err_rq:
4976 i915_request_add(rq);
4977 rq = ERR_PTR(err);
4978 goto err_after;
4979 }
4980
4981 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
4982 {
4983 struct i915_vma *batch;
4984 u32 dw, *cs, *hw;
4985
4986 batch = create_user_vma(ce->vm, SZ_64K);
4987 if (IS_ERR(batch))
4988 return batch;
4989
4990 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
4991 if (IS_ERR(cs)) {
4992 i915_vma_put(batch);
4993 return ERR_CAST(cs);
4994 }
4995
4996 dw = 0;
4997 hw = ce->engine->pinned_default_state;
4998 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4999 do {
5000 u32 len = hw[dw] & 0x7f;
5001
5002 if (hw[dw] == 0) {
5003 dw++;
5004 continue;
5005 }
5006
5007 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5008 dw += len + 2;
5009 continue;
5010 }
5011
5012 dw++;
5013 len = (len + 1) / 2;
5014 *cs++ = MI_LOAD_REGISTER_IMM(len);
5015 while (len--) {
5016 *cs++ = hw[dw];
5017 *cs++ = poison;
5018 dw += 2;
5019 }
5020 } while (dw < PAGE_SIZE / sizeof(u32) &&
5021 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5022
5023 *cs++ = MI_BATCH_BUFFER_END;
5024
5025 i915_gem_object_flush_map(batch->obj);
5026 i915_gem_object_unpin_map(batch->obj);
5027
5028 return batch;
5029 }
5030
5031 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5032 {
5033 struct i915_request *rq;
5034 struct i915_vma *batch;
5035 u32 *cs;
5036 int err;
5037
5038 batch = load_context(ce, poison);
5039 if (IS_ERR(batch))
5040 return PTR_ERR(batch);
5041
5042 rq = intel_context_create_request(ce);
5043 if (IS_ERR(rq)) {
5044 err = PTR_ERR(rq);
5045 goto err_batch;
5046 }
5047
5048 err = move_to_active(rq, batch, 0);
5049 if (err)
5050 goto err_rq;
5051
5052 cs = intel_ring_begin(rq, 8);
5053 if (IS_ERR(cs)) {
5054 err = PTR_ERR(cs);
5055 goto err_rq;
5056 }
5057
5058 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5059 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5060 *cs++ = lower_32_bits(batch->node.start);
5061 *cs++ = upper_32_bits(batch->node.start);
5062
5063 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5064 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5065 offset_in_page(sema);
5066 *cs++ = 0;
5067 *cs++ = 1;
5068
5069 intel_ring_advance(rq, cs);
5070
5071 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5072 err_rq:
5073 i915_request_add(rq);
5074 err_batch:
5075 i915_vma_put(batch);
5076 return err;
5077 }
5078
5079 static bool is_moving(u32 a, u32 b)
5080 {
5081 return a != b;
5082 }
5083
5084 static int compare_isolation(struct intel_engine_cs *engine,
5085 struct i915_vma *ref[2],
5086 struct i915_vma *result[2],
5087 struct intel_context *ce,
5088 u32 poison)
5089 {
5090 u32 x, dw, *hw, *lrc;
5091 u32 *A[2], *B[2];
5092 int err = 0;
5093
5094 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5095 if (IS_ERR(A[0]))
5096 return PTR_ERR(A[0]);
5097
5098 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5099 if (IS_ERR(A[1])) {
5100 err = PTR_ERR(A[1]);
5101 goto err_A0;
5102 }
5103
5104 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5105 if (IS_ERR(B[0])) {
5106 err = PTR_ERR(B[0]);
5107 goto err_A1;
5108 }
5109
5110 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5111 if (IS_ERR(B[1])) {
5112 err = PTR_ERR(B[1]);
5113 goto err_B0;
5114 }
5115
5116 lrc = i915_gem_object_pin_map(ce->state->obj,
5117 i915_coherent_map_type(engine->i915));
5118 if (IS_ERR(lrc)) {
5119 err = PTR_ERR(lrc);
5120 goto err_B1;
5121 }
5122 lrc += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
5123
5124 x = 0;
5125 dw = 0;
5126 hw = engine->pinned_default_state;
5127 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
5128 do {
5129 u32 len = hw[dw] & 0x7f;
5130
5131 if (hw[dw] == 0) {
5132 dw++;
5133 continue;
5134 }
5135
5136 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5137 dw += len + 2;
5138 continue;
5139 }
5140
5141 dw++;
5142 len = (len + 1) / 2;
5143 while (len--) {
5144 if (!is_moving(A[0][x], A[1][x]) &&
5145 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5146 switch (hw[dw] & 4095) {
5147 case 0x30: /* RING_HEAD */
5148 case 0x34: /* RING_TAIL */
5149 break;
5150
5151 default:
5152 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5153 engine->name, dw,
5154 hw[dw], hw[dw + 1],
5155 A[0][x], B[0][x], B[1][x],
5156 poison, lrc[dw + 1]);
5157 err = -EINVAL;
5158 break;
5159 }
5160 }
5161 dw += 2;
5162 x++;
5163 }
5164 } while (dw < PAGE_SIZE / sizeof(u32) &&
5165 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5166
5167 i915_gem_object_unpin_map(ce->state->obj);
5168 err_B1:
5169 i915_gem_object_unpin_map(result[1]->obj);
5170 err_B0:
5171 i915_gem_object_unpin_map(result[0]->obj);
5172 err_A1:
5173 i915_gem_object_unpin_map(ref[1]->obj);
5174 err_A0:
5175 i915_gem_object_unpin_map(ref[0]->obj);
5176 return err;
5177 }
5178
5179 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5180 {
5181 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5182 struct i915_vma *ref[2], *result[2];
5183 struct intel_context *A, *B;
5184 struct i915_request *rq;
5185 int err;
5186
5187 A = intel_context_create(engine);
5188 if (IS_ERR(A))
5189 return PTR_ERR(A);
5190
5191 B = intel_context_create(engine);
5192 if (IS_ERR(B)) {
5193 err = PTR_ERR(B);
5194 goto err_A;
5195 }
5196
5197 ref[0] = create_user_vma(A->vm, SZ_64K);
5198 if (IS_ERR(ref[0])) {
5199 err = PTR_ERR(ref[0]);
5200 goto err_B;
5201 }
5202
5203 ref[1] = create_user_vma(A->vm, SZ_64K);
5204 if (IS_ERR(ref[1])) {
5205 err = PTR_ERR(ref[1]);
5206 goto err_ref0;
5207 }
5208
5209 rq = record_registers(A, ref[0], ref[1], sema);
5210 if (IS_ERR(rq)) {
5211 err = PTR_ERR(rq);
5212 goto err_ref1;
5213 }
5214
5215 WRITE_ONCE(*sema, 1);
5216 wmb();
5217
5218 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5219 i915_request_put(rq);
5220 err = -ETIME;
5221 goto err_ref1;
5222 }
5223 i915_request_put(rq);
5224
5225 result[0] = create_user_vma(A->vm, SZ_64K);
5226 if (IS_ERR(result[0])) {
5227 err = PTR_ERR(result[0]);
5228 goto err_ref1;
5229 }
5230
5231 result[1] = create_user_vma(A->vm, SZ_64K);
5232 if (IS_ERR(result[1])) {
5233 err = PTR_ERR(result[1]);
5234 goto err_result0;
5235 }
5236
5237 rq = record_registers(A, result[0], result[1], sema);
5238 if (IS_ERR(rq)) {
5239 err = PTR_ERR(rq);
5240 goto err_result1;
5241 }
5242
5243 err = poison_registers(B, poison, sema);
5244 if (err) {
5245 WRITE_ONCE(*sema, -1);
5246 i915_request_put(rq);
5247 goto err_result1;
5248 }
5249
5250 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5251 i915_request_put(rq);
5252 err = -ETIME;
5253 goto err_result1;
5254 }
5255 i915_request_put(rq);
5256
5257 err = compare_isolation(engine, ref, result, A, poison);
5258
5259 err_result1:
5260 i915_vma_put(result[1]);
5261 err_result0:
5262 i915_vma_put(result[0]);
5263 err_ref1:
5264 i915_vma_put(ref[1]);
5265 err_ref0:
5266 i915_vma_put(ref[0]);
5267 err_B:
5268 intel_context_put(B);
5269 err_A:
5270 intel_context_put(A);
5271 return err;
5272 }
5273
5274 static bool skip_isolation(const struct intel_engine_cs *engine)
5275 {
5276 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
5277 return true;
5278
5279 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
5280 return true;
5281
5282 return false;
5283 }
5284
5285 static int live_lrc_isolation(void *arg)
5286 {
5287 struct intel_gt *gt = arg;
5288 struct intel_engine_cs *engine;
5289 enum intel_engine_id id;
5290 const u32 poison[] = {
5291 STACK_MAGIC,
5292 0x3a3a3a3a,
5293 0x5c5c5c5c,
5294 0xffffffff,
5295 0xffff0000,
5296 };
5297
5298 /*
5299 * Our goal is try and verify that per-context state cannot be
5300 * tampered with by another non-privileged client.
5301 *
5302 * We take the list of context registers from the LRI in the default
5303 * context image and attempt to modify that list from a remote context.
5304 */
5305
5306 for_each_engine(engine, gt, id) {
5307 int err = 0;
5308 int i;
5309
5310 /* Just don't even ask */
5311 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
5312 skip_isolation(engine))
5313 continue;
5314
5315 intel_engine_pm_get(engine);
5316 if (engine->pinned_default_state) {
5317 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5318 err = __lrc_isolation(engine, poison[i]);
5319 if (err)
5320 break;
5321
5322 err = __lrc_isolation(engine, ~poison[i]);
5323 if (err)
5324 break;
5325 }
5326 }
5327 intel_engine_pm_put(engine);
5328 if (igt_flush_test(gt->i915))
5329 err = -EIO;
5330 if (err)
5331 return err;
5332 }
5333
5334 return 0;
5335 }
5336
5337 static void garbage_reset(struct intel_engine_cs *engine,
5338 struct i915_request *rq)
5339 {
5340 const unsigned int bit = I915_RESET_ENGINE + engine->id;
5341 unsigned long *lock = &engine->gt->reset.flags;
5342
5343 if (test_and_set_bit(bit, lock))
5344 return;
5345
5346 tasklet_disable(&engine->execlists.tasklet);
5347
5348 if (!rq->fence.error)
5349 intel_engine_reset(engine, NULL);
5350
5351 tasklet_enable(&engine->execlists.tasklet);
5352 clear_and_wake_up_bit(bit, lock);
5353 }
5354
5355 static struct i915_request *garbage(struct intel_context *ce,
5356 struct rnd_state *prng)
5357 {
5358 struct i915_request *rq;
5359 int err;
5360
5361 err = intel_context_pin(ce);
5362 if (err)
5363 return ERR_PTR(err);
5364
5365 prandom_bytes_state(prng,
5366 ce->lrc_reg_state,
5367 ce->engine->context_size -
5368 LRC_STATE_PN * PAGE_SIZE);
5369
5370 rq = intel_context_create_request(ce);
5371 if (IS_ERR(rq)) {
5372 err = PTR_ERR(rq);
5373 goto err_unpin;
5374 }
5375
5376 i915_request_get(rq);
5377 i915_request_add(rq);
5378 return rq;
5379
5380 err_unpin:
5381 intel_context_unpin(ce);
5382 return ERR_PTR(err);
5383 }
5384
5385 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
5386 {
5387 struct intel_context *ce;
5388 struct i915_request *hang;
5389 int err = 0;
5390
5391 ce = intel_context_create(engine);
5392 if (IS_ERR(ce))
5393 return PTR_ERR(ce);
5394
5395 hang = garbage(ce, prng);
5396 if (IS_ERR(hang)) {
5397 err = PTR_ERR(hang);
5398 goto err_ce;
5399 }
5400
5401 if (wait_for_submit(engine, hang, HZ / 2)) {
5402 i915_request_put(hang);
5403 err = -ETIME;
5404 goto err_ce;
5405 }
5406
5407 intel_context_set_banned(ce);
5408 garbage_reset(engine, hang);
5409
5410 intel_engine_flush_submission(engine);
5411 if (!hang->fence.error) {
5412 i915_request_put(hang);
5413 pr_err("%s: corrupted context was not reset\n",
5414 engine->name);
5415 err = -EINVAL;
5416 goto err_ce;
5417 }
5418
5419 if (i915_request_wait(hang, 0, HZ / 2) < 0) {
5420 pr_err("%s: corrupted context did not recover\n",
5421 engine->name);
5422 i915_request_put(hang);
5423 err = -EIO;
5424 goto err_ce;
5425 }
5426 i915_request_put(hang);
5427
5428 err_ce:
5429 intel_context_put(ce);
5430 return err;
5431 }
5432
5433 static int live_lrc_garbage(void *arg)
5434 {
5435 struct intel_gt *gt = arg;
5436 struct intel_engine_cs *engine;
5437 enum intel_engine_id id;
5438
5439 /*
5440 * Verify that we can recover if one context state is completely
5441 * corrupted.
5442 */
5443
5444 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
5445 return 0;
5446
5447 for_each_engine(engine, gt, id) {
5448 I915_RND_STATE(prng);
5449 int err = 0, i;
5450
5451 if (!intel_has_reset_engine(engine->gt))
5452 continue;
5453
5454 intel_engine_pm_get(engine);
5455 for (i = 0; i < 3; i++) {
5456 err = __lrc_garbage(engine, &prng);
5457 if (err)
5458 break;
5459 }
5460 intel_engine_pm_put(engine);
5461
5462 if (igt_flush_test(gt->i915))
5463 err = -EIO;
5464 if (err)
5465 return err;
5466 }
5467
5468 return 0;
5469 }
5470
5471 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
5472 {
5473 struct intel_context *ce;
5474 struct i915_request *rq;
5475 IGT_TIMEOUT(end_time);
5476 int err;
5477
5478 ce = intel_context_create(engine);
5479 if (IS_ERR(ce))
5480 return PTR_ERR(ce);
5481
5482 ce->runtime.num_underflow = 0;
5483 ce->runtime.max_underflow = 0;
5484
5485 do {
5486 unsigned int loop = 1024;
5487
5488 while (loop) {
5489 rq = intel_context_create_request(ce);
5490 if (IS_ERR(rq)) {
5491 err = PTR_ERR(rq);
5492 goto err_rq;
5493 }
5494
5495 if (--loop == 0)
5496 i915_request_get(rq);
5497
5498 i915_request_add(rq);
5499 }
5500
5501 if (__igt_timeout(end_time, NULL))
5502 break;
5503
5504 i915_request_put(rq);
5505 } while (1);
5506
5507 err = i915_request_wait(rq, 0, HZ / 5);
5508 if (err < 0) {
5509 pr_err("%s: request not completed!\n", engine->name);
5510 goto err_wait;
5511 }
5512
5513 igt_flush_test(engine->i915);
5514
5515 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
5516 engine->name,
5517 intel_context_get_total_runtime_ns(ce),
5518 intel_context_get_avg_runtime_ns(ce));
5519
5520 err = 0;
5521 if (ce->runtime.num_underflow) {
5522 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
5523 engine->name,
5524 ce->runtime.num_underflow,
5525 ce->runtime.max_underflow);
5526 GEM_TRACE_DUMP();
5527 err = -EOVERFLOW;
5528 }
5529
5530 err_wait:
5531 i915_request_put(rq);
5532 err_rq:
5533 intel_context_put(ce);
5534 return err;
5535 }
5536
5537 static int live_pphwsp_runtime(void *arg)
5538 {
5539 struct intel_gt *gt = arg;
5540 struct intel_engine_cs *engine;
5541 enum intel_engine_id id;
5542 int err = 0;
5543
5544 /*
5545 * Check that cumulative context runtime as stored in the pphwsp[16]
5546 * is monotonic.
5547 */
5548
5549 for_each_engine(engine, gt, id) {
5550 err = __live_pphwsp_runtime(engine);
5551 if (err)
5552 break;
5553 }
5554
5555 if (igt_flush_test(gt->i915))
5556 err = -EIO;
5557
5558 return err;
5559 }
5560
5561 int intel_lrc_live_selftests(struct drm_i915_private *i915)
5562 {
5563 static const struct i915_subtest tests[] = {
5564 SUBTEST(live_lrc_layout),
5565 SUBTEST(live_lrc_fixed),
5566 SUBTEST(live_lrc_state),
5567 SUBTEST(live_lrc_gpr),
5568 SUBTEST(live_lrc_isolation),
5569 SUBTEST(live_lrc_timestamp),
5570 SUBTEST(live_lrc_garbage),
5571 SUBTEST(live_pphwsp_runtime),
5572 };
5573
5574 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
5575 return 0;
5576
5577 return intel_gt_live_subtests(tests, &i915->gt);
5578 }