2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
26 static struct i915_vma
*create_scratch(struct intel_gt
*gt
)
28 struct drm_i915_gem_object
*obj
;
32 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
36 i915_gem_object_set_cache_coherency(obj
, I915_CACHING_CACHED
);
38 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
40 i915_gem_object_put(obj
);
44 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
46 i915_gem_object_put(obj
);
53 static void engine_heartbeat_disable(struct intel_engine_cs
*engine
,
56 *saved
= engine
->props
.heartbeat_interval_ms
;
57 engine
->props
.heartbeat_interval_ms
= 0;
59 intel_engine_pm_get(engine
);
60 intel_engine_park_heartbeat(engine
);
63 static void engine_heartbeat_enable(struct intel_engine_cs
*engine
,
66 intel_engine_pm_put(engine
);
68 engine
->props
.heartbeat_interval_ms
= saved
;
71 static int wait_for_submit(struct intel_engine_cs
*engine
,
72 struct i915_request
*rq
,
73 unsigned long timeout
)
78 intel_engine_flush_submission(engine
);
80 if (READ_ONCE(engine
->execlists
.pending
[0]))
83 if (i915_request_is_active(rq
))
86 if (i915_request_started(rq
)) /* that was quick! */
88 } while (time_before(jiffies
, timeout
));
93 static int wait_for_reset(struct intel_engine_cs
*engine
,
94 struct i915_request
*rq
,
95 unsigned long timeout
)
101 intel_engine_flush_submission(engine
);
103 if (READ_ONCE(engine
->execlists
.pending
[0]))
106 if (i915_request_completed(rq
))
109 if (READ_ONCE(rq
->fence
.error
))
111 } while (time_before(jiffies
, timeout
));
113 flush_scheduled_work();
115 if (rq
->fence
.error
!= -EIO
) {
116 pr_err("%s: hanging request %llx:%lld not reset\n",
123 /* Give the request a jiffie to complete after flushing the worker */
124 if (i915_request_wait(rq
, 0,
125 max(0l, (long)(timeout
- jiffies
)) + 1) < 0) {
126 pr_err("%s: hanging request %llx:%lld did not complete\n",
136 static int live_sanitycheck(void *arg
)
138 struct intel_gt
*gt
= arg
;
139 struct intel_engine_cs
*engine
;
140 enum intel_engine_id id
;
141 struct igt_spinner spin
;
144 if (!HAS_LOGICAL_RING_CONTEXTS(gt
->i915
))
147 if (igt_spinner_init(&spin
, gt
))
150 for_each_engine(engine
, gt
, id
) {
151 struct intel_context
*ce
;
152 struct i915_request
*rq
;
154 ce
= intel_context_create(engine
);
160 rq
= igt_spinner_create_request(&spin
, ce
, MI_NOOP
);
166 i915_request_add(rq
);
167 if (!igt_wait_for_spinner(&spin
, rq
)) {
168 GEM_TRACE("spinner failed to start\n");
170 intel_gt_set_wedged(gt
);
175 igt_spinner_end(&spin
);
176 if (igt_flush_test(gt
->i915
)) {
182 intel_context_put(ce
);
187 igt_spinner_fini(&spin
);
191 static int live_unlite_restore(struct intel_gt
*gt
, int prio
)
193 struct intel_engine_cs
*engine
;
194 enum intel_engine_id id
;
195 struct igt_spinner spin
;
199 * Check that we can correctly context switch between 2 instances
200 * on the same engine from the same parent context.
203 if (igt_spinner_init(&spin
, gt
))
207 for_each_engine(engine
, gt
, id
) {
208 struct intel_context
*ce
[2] = {};
209 struct i915_request
*rq
[2];
210 struct igt_live_test t
;
214 if (prio
&& !intel_engine_has_preemption(engine
))
217 if (!intel_engine_can_store_dword(engine
))
220 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
224 engine_heartbeat_disable(engine
, &saved
);
226 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
227 struct intel_context
*tmp
;
229 tmp
= intel_context_create(engine
);
235 err
= intel_context_pin(tmp
);
237 intel_context_put(tmp
);
242 * Setup the pair of contexts such that if we
243 * lite-restore using the RING_TAIL from ce[1] it
244 * will execute garbage from ce[0]->ring.
246 memset(tmp
->ring
->vaddr
,
247 POISON_INUSE
, /* IPEHR: 0x5a5a5a5a [hung!] */
248 tmp
->ring
->vma
->size
);
252 GEM_BUG_ON(!ce
[1]->ring
->size
);
253 intel_ring_reset(ce
[1]->ring
, ce
[1]->ring
->size
/ 2);
254 __execlists_update_reg_state(ce
[1], engine
, ce
[1]->ring
->head
);
256 rq
[0] = igt_spinner_create_request(&spin
, ce
[0], MI_ARB_CHECK
);
258 err
= PTR_ERR(rq
[0]);
262 i915_request_get(rq
[0]);
263 i915_request_add(rq
[0]);
264 GEM_BUG_ON(rq
[0]->postfix
> ce
[1]->ring
->emit
);
266 if (!igt_wait_for_spinner(&spin
, rq
[0])) {
267 i915_request_put(rq
[0]);
271 rq
[1] = i915_request_create(ce
[1]);
273 err
= PTR_ERR(rq
[1]);
274 i915_request_put(rq
[0]);
280 * Ensure we do the switch to ce[1] on completion.
282 * rq[0] is already submitted, so this should reduce
283 * to a no-op (a wait on a request on the same engine
284 * uses the submit fence, not the completion fence),
285 * but it will install a dependency on rq[1] for rq[0]
286 * that will prevent the pair being reordered by
289 i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
292 i915_request_get(rq
[1]);
293 i915_request_add(rq
[1]);
294 GEM_BUG_ON(rq
[1]->postfix
<= rq
[0]->postfix
);
295 i915_request_put(rq
[0]);
298 struct i915_sched_attr attr
= {
302 /* Alternatively preempt the spinner with ce[1] */
303 engine
->schedule(rq
[1], &attr
);
306 /* And switch back to ce[0] for good measure */
307 rq
[0] = i915_request_create(ce
[0]);
309 err
= PTR_ERR(rq
[0]);
310 i915_request_put(rq
[1]);
314 i915_request_await_dma_fence(rq
[0], &rq
[1]->fence
);
315 i915_request_get(rq
[0]);
316 i915_request_add(rq
[0]);
317 GEM_BUG_ON(rq
[0]->postfix
> rq
[1]->postfix
);
318 i915_request_put(rq
[1]);
319 i915_request_put(rq
[0]);
322 tasklet_kill(&engine
->execlists
.tasklet
); /* flush submission */
323 igt_spinner_end(&spin
);
324 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
325 if (IS_ERR_OR_NULL(ce
[n
]))
328 intel_context_unpin(ce
[n
]);
329 intel_context_put(ce
[n
]);
332 engine_heartbeat_enable(engine
, saved
);
333 if (igt_live_test_end(&t
))
339 igt_spinner_fini(&spin
);
343 static int live_unlite_switch(void *arg
)
345 return live_unlite_restore(arg
, 0);
348 static int live_unlite_preempt(void *arg
)
350 return live_unlite_restore(arg
, I915_USER_PRIORITY(I915_PRIORITY_MAX
));
353 static int live_pin_rewind(void *arg
)
355 struct intel_gt
*gt
= arg
;
356 struct intel_engine_cs
*engine
;
357 enum intel_engine_id id
;
361 * We have to be careful not to trust intel_ring too much, for example
362 * ring->head is updated upon retire which is out of sync with pinning
363 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
364 * or else we risk writing an older, stale value.
366 * To simulate this, let's apply a bit of deliberate sabotague.
369 for_each_engine(engine
, gt
, id
) {
370 struct intel_context
*ce
;
371 struct i915_request
*rq
;
372 struct intel_ring
*ring
;
373 struct igt_live_test t
;
375 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
380 ce
= intel_context_create(engine
);
386 err
= intel_context_pin(ce
);
388 intel_context_put(ce
);
392 /* Keep the context awake while we play games */
393 err
= i915_active_acquire(&ce
->active
);
395 intel_context_unpin(ce
);
396 intel_context_put(ce
);
401 /* Poison the ring, and offset the next request from HEAD */
402 memset32(ring
->vaddr
, STACK_MAGIC
, ring
->size
/ sizeof(u32
));
403 ring
->emit
= ring
->size
/ 2;
404 ring
->tail
= ring
->emit
;
405 GEM_BUG_ON(ring
->head
);
407 intel_context_unpin(ce
);
409 /* Submit a simple nop request */
410 GEM_BUG_ON(intel_context_is_pinned(ce
));
411 rq
= intel_context_create_request(ce
);
412 i915_active_release(&ce
->active
); /* e.g. async retire */
413 intel_context_put(ce
);
418 GEM_BUG_ON(!rq
->head
);
419 i915_request_add(rq
);
421 /* Expect not to hang! */
422 if (igt_live_test_end(&t
)) {
431 static int live_hold_reset(void *arg
)
433 struct intel_gt
*gt
= arg
;
434 struct intel_engine_cs
*engine
;
435 enum intel_engine_id id
;
436 struct igt_spinner spin
;
440 * In order to support offline error capture for fast preempt reset,
441 * we need to decouple the guilty request and ensure that it and its
442 * descendents are not executed while the capture is in progress.
445 if (!intel_has_reset_engine(gt
))
448 if (igt_spinner_init(&spin
, gt
))
451 for_each_engine(engine
, gt
, id
) {
452 struct intel_context
*ce
;
453 unsigned long heartbeat
;
454 struct i915_request
*rq
;
456 ce
= intel_context_create(engine
);
462 engine_heartbeat_disable(engine
, &heartbeat
);
464 rq
= igt_spinner_create_request(&spin
, ce
, MI_ARB_CHECK
);
469 i915_request_add(rq
);
471 if (!igt_wait_for_spinner(&spin
, rq
)) {
472 intel_gt_set_wedged(gt
);
477 /* We have our request executing, now remove it and reset */
479 if (test_and_set_bit(I915_RESET_ENGINE
+ id
,
481 intel_gt_set_wedged(gt
);
485 tasklet_disable(&engine
->execlists
.tasklet
);
487 engine
->execlists
.tasklet
.func(engine
->execlists
.tasklet
.data
);
488 GEM_BUG_ON(execlists_active(&engine
->execlists
) != rq
);
490 i915_request_get(rq
);
491 execlists_hold(engine
, rq
);
492 GEM_BUG_ON(!i915_request_on_hold(rq
));
494 intel_engine_reset(engine
, NULL
);
495 GEM_BUG_ON(rq
->fence
.error
!= -EIO
);
497 tasklet_enable(&engine
->execlists
.tasklet
);
498 clear_and_wake_up_bit(I915_RESET_ENGINE
+ id
,
501 /* Check that we do not resubmit the held request */
502 if (!i915_request_wait(rq
, 0, HZ
/ 5)) {
503 pr_err("%s: on hold request completed!\n",
505 i915_request_put(rq
);
509 GEM_BUG_ON(!i915_request_on_hold(rq
));
511 /* But is resubmitted on release */
512 execlists_unhold(engine
, rq
);
513 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
514 pr_err("%s: held request did not complete!\n",
516 intel_gt_set_wedged(gt
);
519 i915_request_put(rq
);
522 engine_heartbeat_enable(engine
, heartbeat
);
523 intel_context_put(ce
);
528 igt_spinner_fini(&spin
);
532 static const char *error_repr(int err
)
534 return err
? "bad" : "good";
537 static int live_error_interrupt(void *arg
)
539 static const struct error_phase
{
540 enum { GOOD
= 0, BAD
= -EIO
} error
[2];
545 { { GOOD
, GOOD
} }, /* sentinel */
547 struct intel_gt
*gt
= arg
;
548 struct intel_engine_cs
*engine
;
549 enum intel_engine_id id
;
552 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
553 * of invalid commands in user batches that will cause a GPU hang.
554 * This is a faster mechanism than using hangcheck/heartbeats, but
555 * only detects problems the HW knows about -- it will not warn when
558 * To verify our detection and reset, we throw some invalid commands
559 * at the HW and wait for the interrupt.
562 if (!intel_has_reset_engine(gt
))
565 for_each_engine(engine
, gt
, id
) {
566 const struct error_phase
*p
;
567 unsigned long heartbeat
;
570 engine_heartbeat_disable(engine
, &heartbeat
);
572 for (p
= phases
; p
->error
[0] != GOOD
; p
++) {
573 struct i915_request
*client
[ARRAY_SIZE(phases
->error
)];
577 memset(client
, 0, sizeof(*client
));
578 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
579 struct intel_context
*ce
;
580 struct i915_request
*rq
;
582 ce
= intel_context_create(engine
);
588 rq
= intel_context_create_request(ce
);
589 intel_context_put(ce
);
595 if (rq
->engine
->emit_init_breadcrumb
) {
596 err
= rq
->engine
->emit_init_breadcrumb(rq
);
598 i915_request_add(rq
);
603 cs
= intel_ring_begin(rq
, 2);
605 i915_request_add(rq
);
618 client
[i
] = i915_request_get(rq
);
619 i915_request_add(rq
);
622 err
= wait_for_submit(engine
, client
[0], HZ
/ 2);
624 pr_err("%s: first request did not start within time!\n",
630 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
631 if (i915_request_wait(client
[i
], 0, HZ
/ 5) < 0)
632 pr_debug("%s: %s request incomplete!\n",
634 error_repr(p
->error
[i
]));
636 if (!i915_request_started(client
[i
])) {
637 pr_debug("%s: %s request not stated!\n",
639 error_repr(p
->error
[i
]));
644 /* Kick the tasklet to process the error */
645 intel_engine_flush_submission(engine
);
646 if (client
[i
]->fence
.error
!= p
->error
[i
]) {
647 pr_err("%s: %s request completed with wrong error code: %d\n",
649 error_repr(p
->error
[i
]),
650 client
[i
]->fence
.error
);
657 for (i
= 0; i
< ARRAY_SIZE(client
); i
++)
659 i915_request_put(client
[i
]);
661 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
662 engine
->name
, p
- phases
,
663 p
->error
[0], p
->error
[1]);
668 engine_heartbeat_enable(engine
, heartbeat
);
670 intel_gt_set_wedged(gt
);
679 emit_semaphore_chain(struct i915_request
*rq
, struct i915_vma
*vma
, int idx
)
683 cs
= intel_ring_begin(rq
, 10);
687 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
689 *cs
++ = MI_SEMAPHORE_WAIT
|
690 MI_SEMAPHORE_GLOBAL_GTT
|
692 MI_SEMAPHORE_SAD_NEQ_SDD
;
694 *cs
++ = i915_ggtt_offset(vma
) + 4 * idx
;
698 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
699 *cs
++ = i915_ggtt_offset(vma
) + 4 * (idx
- 1);
709 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
711 intel_ring_advance(rq
, cs
);
715 static struct i915_request
*
716 semaphore_queue(struct intel_engine_cs
*engine
, struct i915_vma
*vma
, int idx
)
718 struct intel_context
*ce
;
719 struct i915_request
*rq
;
722 ce
= intel_context_create(engine
);
726 rq
= intel_context_create_request(ce
);
731 if (rq
->engine
->emit_init_breadcrumb
)
732 err
= rq
->engine
->emit_init_breadcrumb(rq
);
734 err
= emit_semaphore_chain(rq
, vma
, idx
);
736 i915_request_get(rq
);
737 i915_request_add(rq
);
742 intel_context_put(ce
);
747 release_queue(struct intel_engine_cs
*engine
,
748 struct i915_vma
*vma
,
751 struct i915_sched_attr attr
= {
754 struct i915_request
*rq
;
757 rq
= intel_engine_create_kernel_request(engine
);
761 cs
= intel_ring_begin(rq
, 4);
763 i915_request_add(rq
);
767 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
768 *cs
++ = i915_ggtt_offset(vma
) + 4 * (idx
- 1);
772 intel_ring_advance(rq
, cs
);
774 i915_request_get(rq
);
775 i915_request_add(rq
);
778 engine
->schedule(rq
, &attr
);
779 local_bh_enable(); /* kick tasklet */
781 i915_request_put(rq
);
787 slice_semaphore_queue(struct intel_engine_cs
*outer
,
788 struct i915_vma
*vma
,
791 struct intel_engine_cs
*engine
;
792 struct i915_request
*head
;
793 enum intel_engine_id id
;
796 head
= semaphore_queue(outer
, vma
, n
++);
798 return PTR_ERR(head
);
800 for_each_engine(engine
, outer
->gt
, id
) {
801 for (i
= 0; i
< count
; i
++) {
802 struct i915_request
*rq
;
804 rq
= semaphore_queue(engine
, vma
, n
++);
810 i915_request_put(rq
);
814 err
= release_queue(outer
, vma
, n
, INT_MAX
);
818 if (i915_request_wait(head
, 0,
819 2 * RUNTIME_INFO(outer
->i915
)->num_engines
* (count
+ 2) * (count
+ 3)) < 0) {
820 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
823 intel_gt_set_wedged(outer
->gt
);
828 i915_request_put(head
);
832 static int live_timeslice_preempt(void *arg
)
834 struct intel_gt
*gt
= arg
;
835 struct drm_i915_gem_object
*obj
;
836 struct i915_vma
*vma
;
842 * If a request takes too long, we would like to give other users
843 * a fair go on the GPU. In particular, users may create batches
844 * that wait upon external input, where that input may even be
845 * supplied by another GPU job. To avoid blocking forever, we
846 * need to preempt the current task and replace it with another
849 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
852 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
856 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
862 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
864 err
= PTR_ERR(vaddr
);
868 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
872 err
= i915_vma_sync(vma
);
876 for_each_prime_number_from(count
, 1, 16) {
877 struct intel_engine_cs
*engine
;
878 enum intel_engine_id id
;
880 for_each_engine(engine
, gt
, id
) {
883 if (!intel_engine_has_preemption(engine
))
886 memset(vaddr
, 0, PAGE_SIZE
);
888 engine_heartbeat_disable(engine
, &saved
);
889 err
= slice_semaphore_queue(engine
, vma
, count
);
890 engine_heartbeat_enable(engine
, saved
);
894 if (igt_flush_test(gt
->i915
)) {
904 i915_gem_object_unpin_map(obj
);
906 i915_gem_object_put(obj
);
910 static struct i915_request
*
911 create_rewinder(struct intel_context
*ce
,
912 struct i915_request
*wait
,
916 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
917 offset_in_page(slot
);
918 struct i915_request
*rq
;
922 rq
= intel_context_create_request(ce
);
927 err
= i915_request_await_dma_fence(rq
, &wait
->fence
);
932 cs
= intel_ring_begin(rq
, 14);
938 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
941 *cs
++ = MI_SEMAPHORE_WAIT
|
942 MI_SEMAPHORE_GLOBAL_GTT
|
944 MI_SEMAPHORE_SAD_GTE_SDD
;
949 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
950 *cs
++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq
->engine
->mmio_base
));
951 *cs
++ = offset
+ idx
* sizeof(u32
);
954 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
959 intel_ring_advance(rq
, cs
);
961 rq
->sched
.attr
.priority
= I915_PRIORITY_MASK
;
964 i915_request_get(rq
);
965 i915_request_add(rq
);
967 i915_request_put(rq
);
974 static int live_timeslice_rewind(void *arg
)
976 struct intel_gt
*gt
= arg
;
977 struct intel_engine_cs
*engine
;
978 enum intel_engine_id id
;
981 * The usual presumption on timeslice expiration is that we replace
982 * the active context with another. However, given a chain of
983 * dependencies we may end up with replacing the context with itself,
984 * but only a few of those requests, forcing us to rewind the
985 * RING_TAIL of the original request.
987 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
990 for_each_engine(engine
, gt
, id
) {
992 enum { X
= 1, Z
, Y
};
993 struct i915_request
*rq
[3] = {};
994 struct intel_context
*ce
;
995 unsigned long heartbeat
;
996 unsigned long timeslice
;
1000 if (!intel_engine_has_timeslices(engine
))
1004 * A:rq1 -- semaphore wait, timestamp X
1005 * A:rq2 -- write timestamp Y
1007 * B:rq1 [await A:rq1] -- write timestamp Z
1009 * Force timeslice, release semaphore.
1011 * Expect execution/evaluation order XZY
1014 engine_heartbeat_disable(engine
, &heartbeat
);
1015 timeslice
= xchg(&engine
->props
.timeslice_duration_ms
, 1);
1017 slot
= memset32(engine
->status_page
.addr
+ 1000, 0, 4);
1019 ce
= intel_context_create(engine
);
1025 rq
[0] = create_rewinder(ce
, NULL
, slot
, X
);
1026 if (IS_ERR(rq
[0])) {
1027 intel_context_put(ce
);
1031 rq
[1] = create_rewinder(ce
, NULL
, slot
, Y
);
1032 intel_context_put(ce
);
1036 err
= wait_for_submit(engine
, rq
[1], HZ
/ 2);
1038 pr_err("%s: failed to submit first context\n",
1043 ce
= intel_context_create(engine
);
1049 rq
[2] = create_rewinder(ce
, rq
[0], slot
, Z
);
1050 intel_context_put(ce
);
1054 err
= wait_for_submit(engine
, rq
[2], HZ
/ 2);
1056 pr_err("%s: failed to submit second context\n",
1060 GEM_BUG_ON(!timer_pending(&engine
->execlists
.timer
));
1062 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1063 if (i915_request_is_active(rq
[A2
])) { /* semaphore yielded! */
1064 /* Wait for the timeslice to kick in */
1065 del_timer(&engine
->execlists
.timer
);
1066 tasklet_hi_schedule(&engine
->execlists
.tasklet
);
1067 intel_engine_flush_submission(engine
);
1069 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1070 GEM_BUG_ON(!i915_request_is_active(rq
[A1
]));
1071 GEM_BUG_ON(!i915_request_is_active(rq
[B1
]));
1072 GEM_BUG_ON(i915_request_is_active(rq
[A2
]));
1074 /* Release the hounds! */
1076 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1078 for (i
= 1; i
<= 3; i
++) {
1079 unsigned long timeout
= jiffies
+ HZ
/ 2;
1081 while (!READ_ONCE(slot
[i
]) &&
1082 time_before(jiffies
, timeout
))
1085 if (!time_before(jiffies
, timeout
)) {
1086 pr_err("%s: rq[%d] timed out\n",
1087 engine
->name
, i
- 1);
1092 pr_debug("%s: slot[%d]:%x\n", engine
->name
, i
, slot
[i
]);
1096 if (slot
[Z
] - slot
[X
] >= slot
[Y
] - slot
[X
]) {
1097 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1105 memset32(&slot
[0], -1, 4);
1108 engine
->props
.timeslice_duration_ms
= timeslice
;
1109 engine_heartbeat_enable(engine
, heartbeat
);
1110 for (i
= 0; i
< 3; i
++)
1111 i915_request_put(rq
[i
]);
1112 if (igt_flush_test(gt
->i915
))
1121 static struct i915_request
*nop_request(struct intel_engine_cs
*engine
)
1123 struct i915_request
*rq
;
1125 rq
= intel_engine_create_kernel_request(engine
);
1129 i915_request_get(rq
);
1130 i915_request_add(rq
);
1135 static long timeslice_threshold(const struct intel_engine_cs
*engine
)
1137 return 2 * msecs_to_jiffies_timeout(timeslice(engine
)) + 1;
1140 static int live_timeslice_queue(void *arg
)
1142 struct intel_gt
*gt
= arg
;
1143 struct drm_i915_gem_object
*obj
;
1144 struct intel_engine_cs
*engine
;
1145 enum intel_engine_id id
;
1146 struct i915_vma
*vma
;
1151 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1152 * timeslicing between them disabled, we *do* enable timeslicing
1153 * if the queue demands it. (Normally, we do not submit if
1154 * ELSP[1] is already occupied, so must rely on timeslicing to
1155 * eject ELSP[0] in favour of the queue.)
1157 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
1160 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
1162 return PTR_ERR(obj
);
1164 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
1170 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
1171 if (IS_ERR(vaddr
)) {
1172 err
= PTR_ERR(vaddr
);
1176 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
1180 err
= i915_vma_sync(vma
);
1184 for_each_engine(engine
, gt
, id
) {
1185 struct i915_sched_attr attr
= {
1186 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
),
1188 struct i915_request
*rq
, *nop
;
1189 unsigned long saved
;
1191 if (!intel_engine_has_preemption(engine
))
1194 engine_heartbeat_disable(engine
, &saved
);
1195 memset(vaddr
, 0, PAGE_SIZE
);
1197 /* ELSP[0]: semaphore wait */
1198 rq
= semaphore_queue(engine
, vma
, 0);
1203 engine
->schedule(rq
, &attr
);
1204 err
= wait_for_submit(engine
, rq
, HZ
/ 2);
1206 pr_err("%s: Timed out trying to submit semaphores\n",
1211 /* ELSP[1]: nop request */
1212 nop
= nop_request(engine
);
1217 err
= wait_for_submit(engine
, nop
, HZ
/ 2);
1218 i915_request_put(nop
);
1220 pr_err("%s: Timed out trying to submit nop\n",
1225 GEM_BUG_ON(i915_request_completed(rq
));
1226 GEM_BUG_ON(execlists_active(&engine
->execlists
) != rq
);
1228 /* Queue: semaphore signal, matching priority as semaphore */
1229 err
= release_queue(engine
, vma
, 1, effective_prio(rq
));
1233 intel_engine_flush_submission(engine
);
1234 if (!READ_ONCE(engine
->execlists
.timer
.expires
) &&
1235 !i915_request_completed(rq
)) {
1236 struct drm_printer p
=
1237 drm_info_printer(gt
->i915
->drm
.dev
);
1239 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1241 intel_engine_dump(engine
, &p
,
1242 "%s\n", engine
->name
);
1245 memset(vaddr
, 0xff, PAGE_SIZE
);
1249 /* Timeslice every jiffy, so within 2 we should signal */
1250 if (i915_request_wait(rq
, 0, timeslice_threshold(engine
)) < 0) {
1251 struct drm_printer p
=
1252 drm_info_printer(gt
->i915
->drm
.dev
);
1254 pr_err("%s: Failed to timeslice into queue\n",
1256 intel_engine_dump(engine
, &p
,
1257 "%s\n", engine
->name
);
1259 memset(vaddr
, 0xff, PAGE_SIZE
);
1263 i915_request_put(rq
);
1265 engine_heartbeat_enable(engine
, saved
);
1271 i915_vma_unpin(vma
);
1273 i915_gem_object_unpin_map(obj
);
1275 i915_gem_object_put(obj
);
1279 static int live_busywait_preempt(void *arg
)
1281 struct intel_gt
*gt
= arg
;
1282 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
1283 struct intel_engine_cs
*engine
;
1284 struct drm_i915_gem_object
*obj
;
1285 struct i915_vma
*vma
;
1286 enum intel_engine_id id
;
1291 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1292 * preempt the busywaits used to synchronise between rings.
1295 ctx_hi
= kernel_context(gt
->i915
);
1298 ctx_hi
->sched
.priority
=
1299 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
1301 ctx_lo
= kernel_context(gt
->i915
);
1304 ctx_lo
->sched
.priority
=
1305 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
1307 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
1313 map
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
1319 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
1325 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
1329 err
= i915_vma_sync(vma
);
1333 for_each_engine(engine
, gt
, id
) {
1334 struct i915_request
*lo
, *hi
;
1335 struct igt_live_test t
;
1338 if (!intel_engine_has_preemption(engine
))
1341 if (!intel_engine_can_store_dword(engine
))
1344 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1350 * We create two requests. The low priority request
1351 * busywaits on a semaphore (inside the ringbuffer where
1352 * is should be preemptible) and the high priority requests
1353 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1354 * allowing the first request to complete. If preemption
1355 * fails, we hang instead.
1358 lo
= igt_request_alloc(ctx_lo
, engine
);
1364 cs
= intel_ring_begin(lo
, 8);
1367 i915_request_add(lo
);
1371 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
1372 *cs
++ = i915_ggtt_offset(vma
);
1376 /* XXX Do we need a flush + invalidate here? */
1378 *cs
++ = MI_SEMAPHORE_WAIT
|
1379 MI_SEMAPHORE_GLOBAL_GTT
|
1381 MI_SEMAPHORE_SAD_EQ_SDD
;
1383 *cs
++ = i915_ggtt_offset(vma
);
1386 intel_ring_advance(lo
, cs
);
1388 i915_request_get(lo
);
1389 i915_request_add(lo
);
1391 if (wait_for(READ_ONCE(*map
), 10)) {
1392 i915_request_put(lo
);
1397 /* Low priority request should be busywaiting now */
1398 if (i915_request_wait(lo
, 0, 1) != -ETIME
) {
1399 i915_request_put(lo
);
1400 pr_err("%s: Busywaiting request did not!\n",
1406 hi
= igt_request_alloc(ctx_hi
, engine
);
1409 i915_request_put(lo
);
1413 cs
= intel_ring_begin(hi
, 4);
1416 i915_request_add(hi
);
1417 i915_request_put(lo
);
1421 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
1422 *cs
++ = i915_ggtt_offset(vma
);
1426 intel_ring_advance(hi
, cs
);
1427 i915_request_add(hi
);
1429 if (i915_request_wait(lo
, 0, HZ
/ 5) < 0) {
1430 struct drm_printer p
= drm_info_printer(gt
->i915
->drm
.dev
);
1432 pr_err("%s: Failed to preempt semaphore busywait!\n",
1435 intel_engine_dump(engine
, &p
, "%s\n", engine
->name
);
1438 i915_request_put(lo
);
1439 intel_gt_set_wedged(gt
);
1443 GEM_BUG_ON(READ_ONCE(*map
));
1444 i915_request_put(lo
);
1446 if (igt_live_test_end(&t
)) {
1454 i915_vma_unpin(vma
);
1456 i915_gem_object_unpin_map(obj
);
1458 i915_gem_object_put(obj
);
1460 kernel_context_close(ctx_lo
);
1462 kernel_context_close(ctx_hi
);
1466 static struct i915_request
*
1467 spinner_create_request(struct igt_spinner
*spin
,
1468 struct i915_gem_context
*ctx
,
1469 struct intel_engine_cs
*engine
,
1472 struct intel_context
*ce
;
1473 struct i915_request
*rq
;
1475 ce
= i915_gem_context_get_engine(ctx
, engine
->legacy_idx
);
1477 return ERR_CAST(ce
);
1479 rq
= igt_spinner_create_request(spin
, ce
, arb
);
1480 intel_context_put(ce
);
1484 static int live_preempt(void *arg
)
1486 struct intel_gt
*gt
= arg
;
1487 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
1488 struct igt_spinner spin_hi
, spin_lo
;
1489 struct intel_engine_cs
*engine
;
1490 enum intel_engine_id id
;
1493 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1496 if (!(gt
->i915
->caps
.scheduler
& I915_SCHEDULER_CAP_PREEMPTION
))
1497 pr_err("Logical preemption supported, but not exposed\n");
1499 if (igt_spinner_init(&spin_hi
, gt
))
1502 if (igt_spinner_init(&spin_lo
, gt
))
1505 ctx_hi
= kernel_context(gt
->i915
);
1508 ctx_hi
->sched
.priority
=
1509 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
1511 ctx_lo
= kernel_context(gt
->i915
);
1514 ctx_lo
->sched
.priority
=
1515 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
1517 for_each_engine(engine
, gt
, id
) {
1518 struct igt_live_test t
;
1519 struct i915_request
*rq
;
1521 if (!intel_engine_has_preemption(engine
))
1524 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1529 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
1536 i915_request_add(rq
);
1537 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
1538 GEM_TRACE("lo spinner failed to start\n");
1540 intel_gt_set_wedged(gt
);
1545 rq
= spinner_create_request(&spin_hi
, ctx_hi
, engine
,
1548 igt_spinner_end(&spin_lo
);
1553 i915_request_add(rq
);
1554 if (!igt_wait_for_spinner(&spin_hi
, rq
)) {
1555 GEM_TRACE("hi spinner failed to start\n");
1557 intel_gt_set_wedged(gt
);
1562 igt_spinner_end(&spin_hi
);
1563 igt_spinner_end(&spin_lo
);
1565 if (igt_live_test_end(&t
)) {
1573 kernel_context_close(ctx_lo
);
1575 kernel_context_close(ctx_hi
);
1577 igt_spinner_fini(&spin_lo
);
1579 igt_spinner_fini(&spin_hi
);
1583 static int live_late_preempt(void *arg
)
1585 struct intel_gt
*gt
= arg
;
1586 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
1587 struct igt_spinner spin_hi
, spin_lo
;
1588 struct intel_engine_cs
*engine
;
1589 struct i915_sched_attr attr
= {};
1590 enum intel_engine_id id
;
1593 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1596 if (igt_spinner_init(&spin_hi
, gt
))
1599 if (igt_spinner_init(&spin_lo
, gt
))
1602 ctx_hi
= kernel_context(gt
->i915
);
1606 ctx_lo
= kernel_context(gt
->i915
);
1610 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1611 ctx_lo
->sched
.priority
= I915_USER_PRIORITY(1);
1613 for_each_engine(engine
, gt
, id
) {
1614 struct igt_live_test t
;
1615 struct i915_request
*rq
;
1617 if (!intel_engine_has_preemption(engine
))
1620 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1625 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
1632 i915_request_add(rq
);
1633 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
1634 pr_err("First context failed to start\n");
1638 rq
= spinner_create_request(&spin_hi
, ctx_hi
, engine
,
1641 igt_spinner_end(&spin_lo
);
1646 i915_request_add(rq
);
1647 if (igt_wait_for_spinner(&spin_hi
, rq
)) {
1648 pr_err("Second context overtook first?\n");
1652 attr
.priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
);
1653 engine
->schedule(rq
, &attr
);
1655 if (!igt_wait_for_spinner(&spin_hi
, rq
)) {
1656 pr_err("High priority context failed to preempt the low priority context\n");
1661 igt_spinner_end(&spin_hi
);
1662 igt_spinner_end(&spin_lo
);
1664 if (igt_live_test_end(&t
)) {
1672 kernel_context_close(ctx_lo
);
1674 kernel_context_close(ctx_hi
);
1676 igt_spinner_fini(&spin_lo
);
1678 igt_spinner_fini(&spin_hi
);
1682 igt_spinner_end(&spin_hi
);
1683 igt_spinner_end(&spin_lo
);
1684 intel_gt_set_wedged(gt
);
1689 struct preempt_client
{
1690 struct igt_spinner spin
;
1691 struct i915_gem_context
*ctx
;
1694 static int preempt_client_init(struct intel_gt
*gt
, struct preempt_client
*c
)
1696 c
->ctx
= kernel_context(gt
->i915
);
1700 if (igt_spinner_init(&c
->spin
, gt
))
1706 kernel_context_close(c
->ctx
);
1710 static void preempt_client_fini(struct preempt_client
*c
)
1712 igt_spinner_fini(&c
->spin
);
1713 kernel_context_close(c
->ctx
);
1716 static int live_nopreempt(void *arg
)
1718 struct intel_gt
*gt
= arg
;
1719 struct intel_engine_cs
*engine
;
1720 struct preempt_client a
, b
;
1721 enum intel_engine_id id
;
1725 * Verify that we can disable preemption for an individual request
1726 * that may be being observed and not want to be interrupted.
1729 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1732 if (preempt_client_init(gt
, &a
))
1734 if (preempt_client_init(gt
, &b
))
1736 b
.ctx
->sched
.priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
);
1738 for_each_engine(engine
, gt
, id
) {
1739 struct i915_request
*rq_a
, *rq_b
;
1741 if (!intel_engine_has_preemption(engine
))
1744 engine
->execlists
.preempt_hang
.count
= 0;
1746 rq_a
= spinner_create_request(&a
.spin
,
1750 err
= PTR_ERR(rq_a
);
1754 /* Low priority client, but unpreemptable! */
1755 __set_bit(I915_FENCE_FLAG_NOPREEMPT
, &rq_a
->fence
.flags
);
1757 i915_request_add(rq_a
);
1758 if (!igt_wait_for_spinner(&a
.spin
, rq_a
)) {
1759 pr_err("First client failed to start\n");
1763 rq_b
= spinner_create_request(&b
.spin
,
1767 err
= PTR_ERR(rq_b
);
1771 i915_request_add(rq_b
);
1773 /* B is much more important than A! (But A is unpreemptable.) */
1774 GEM_BUG_ON(rq_prio(rq_b
) <= rq_prio(rq_a
));
1776 /* Wait long enough for preemption and timeslicing */
1777 if (igt_wait_for_spinner(&b
.spin
, rq_b
)) {
1778 pr_err("Second client started too early!\n");
1782 igt_spinner_end(&a
.spin
);
1784 if (!igt_wait_for_spinner(&b
.spin
, rq_b
)) {
1785 pr_err("Second client failed to start\n");
1789 igt_spinner_end(&b
.spin
);
1791 if (engine
->execlists
.preempt_hang
.count
) {
1792 pr_err("Preemption recorded x%d; should have been suppressed!\n",
1793 engine
->execlists
.preempt_hang
.count
);
1798 if (igt_flush_test(gt
->i915
))
1804 preempt_client_fini(&b
);
1806 preempt_client_fini(&a
);
1810 igt_spinner_end(&b
.spin
);
1811 igt_spinner_end(&a
.spin
);
1812 intel_gt_set_wedged(gt
);
1817 struct live_preempt_cancel
{
1818 struct intel_engine_cs
*engine
;
1819 struct preempt_client a
, b
;
1822 static int __cancel_active0(struct live_preempt_cancel
*arg
)
1824 struct i915_request
*rq
;
1825 struct igt_live_test t
;
1828 /* Preempt cancel of ELSP0 */
1829 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
1830 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
1831 __func__
, arg
->engine
->name
))
1834 rq
= spinner_create_request(&arg
->a
.spin
,
1835 arg
->a
.ctx
, arg
->engine
,
1840 clear_bit(CONTEXT_BANNED
, &rq
->context
->flags
);
1841 i915_request_get(rq
);
1842 i915_request_add(rq
);
1843 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
)) {
1848 intel_context_set_banned(rq
->context
);
1849 err
= intel_engine_pulse(arg
->engine
);
1853 err
= wait_for_reset(arg
->engine
, rq
, HZ
/ 2);
1855 pr_err("Cancelled inflight0 request did not reset\n");
1860 i915_request_put(rq
);
1861 if (igt_live_test_end(&t
))
1866 static int __cancel_active1(struct live_preempt_cancel
*arg
)
1868 struct i915_request
*rq
[2] = {};
1869 struct igt_live_test t
;
1872 /* Preempt cancel of ELSP1 */
1873 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
1874 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
1875 __func__
, arg
->engine
->name
))
1878 rq
[0] = spinner_create_request(&arg
->a
.spin
,
1879 arg
->a
.ctx
, arg
->engine
,
1880 MI_NOOP
); /* no preemption */
1882 return PTR_ERR(rq
[0]);
1884 clear_bit(CONTEXT_BANNED
, &rq
[0]->context
->flags
);
1885 i915_request_get(rq
[0]);
1886 i915_request_add(rq
[0]);
1887 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
[0])) {
1892 rq
[1] = spinner_create_request(&arg
->b
.spin
,
1893 arg
->b
.ctx
, arg
->engine
,
1895 if (IS_ERR(rq
[1])) {
1896 err
= PTR_ERR(rq
[1]);
1900 clear_bit(CONTEXT_BANNED
, &rq
[1]->context
->flags
);
1901 i915_request_get(rq
[1]);
1902 err
= i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
1903 i915_request_add(rq
[1]);
1907 intel_context_set_banned(rq
[1]->context
);
1908 err
= intel_engine_pulse(arg
->engine
);
1912 igt_spinner_end(&arg
->a
.spin
);
1913 err
= wait_for_reset(arg
->engine
, rq
[1], HZ
/ 2);
1917 if (rq
[0]->fence
.error
!= 0) {
1918 pr_err("Normal inflight0 request did not complete\n");
1923 if (rq
[1]->fence
.error
!= -EIO
) {
1924 pr_err("Cancelled inflight1 request did not report -EIO\n");
1930 i915_request_put(rq
[1]);
1931 i915_request_put(rq
[0]);
1932 if (igt_live_test_end(&t
))
1937 static int __cancel_queued(struct live_preempt_cancel
*arg
)
1939 struct i915_request
*rq
[3] = {};
1940 struct igt_live_test t
;
1943 /* Full ELSP and one in the wings */
1944 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
1945 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
1946 __func__
, arg
->engine
->name
))
1949 rq
[0] = spinner_create_request(&arg
->a
.spin
,
1950 arg
->a
.ctx
, arg
->engine
,
1953 return PTR_ERR(rq
[0]);
1955 clear_bit(CONTEXT_BANNED
, &rq
[0]->context
->flags
);
1956 i915_request_get(rq
[0]);
1957 i915_request_add(rq
[0]);
1958 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
[0])) {
1963 rq
[1] = igt_request_alloc(arg
->b
.ctx
, arg
->engine
);
1964 if (IS_ERR(rq
[1])) {
1965 err
= PTR_ERR(rq
[1]);
1969 clear_bit(CONTEXT_BANNED
, &rq
[1]->context
->flags
);
1970 i915_request_get(rq
[1]);
1971 err
= i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
1972 i915_request_add(rq
[1]);
1976 rq
[2] = spinner_create_request(&arg
->b
.spin
,
1977 arg
->a
.ctx
, arg
->engine
,
1979 if (IS_ERR(rq
[2])) {
1980 err
= PTR_ERR(rq
[2]);
1984 i915_request_get(rq
[2]);
1985 err
= i915_request_await_dma_fence(rq
[2], &rq
[1]->fence
);
1986 i915_request_add(rq
[2]);
1990 intel_context_set_banned(rq
[2]->context
);
1991 err
= intel_engine_pulse(arg
->engine
);
1995 err
= wait_for_reset(arg
->engine
, rq
[2], HZ
/ 2);
1999 if (rq
[0]->fence
.error
!= -EIO
) {
2000 pr_err("Cancelled inflight0 request did not report -EIO\n");
2005 if (rq
[1]->fence
.error
!= 0) {
2006 pr_err("Normal inflight1 request did not complete\n");
2011 if (rq
[2]->fence
.error
!= -EIO
) {
2012 pr_err("Cancelled queued request did not report -EIO\n");
2018 i915_request_put(rq
[2]);
2019 i915_request_put(rq
[1]);
2020 i915_request_put(rq
[0]);
2021 if (igt_live_test_end(&t
))
2026 static int __cancel_hostile(struct live_preempt_cancel
*arg
)
2028 struct i915_request
*rq
;
2031 /* Preempt cancel non-preemptible spinner in ELSP0 */
2032 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT
))
2035 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
2036 rq
= spinner_create_request(&arg
->a
.spin
,
2037 arg
->a
.ctx
, arg
->engine
,
2038 MI_NOOP
); /* preemption disabled */
2042 clear_bit(CONTEXT_BANNED
, &rq
->context
->flags
);
2043 i915_request_get(rq
);
2044 i915_request_add(rq
);
2045 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
)) {
2050 intel_context_set_banned(rq
->context
);
2051 err
= intel_engine_pulse(arg
->engine
); /* force reset */
2055 err
= wait_for_reset(arg
->engine
, rq
, HZ
/ 2);
2057 pr_err("Cancelled inflight0 request did not reset\n");
2062 i915_request_put(rq
);
2063 if (igt_flush_test(arg
->engine
->i915
))
2068 static int live_preempt_cancel(void *arg
)
2070 struct intel_gt
*gt
= arg
;
2071 struct live_preempt_cancel data
;
2072 enum intel_engine_id id
;
2076 * To cancel an inflight context, we need to first remove it from the
2077 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2080 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2083 if (preempt_client_init(gt
, &data
.a
))
2085 if (preempt_client_init(gt
, &data
.b
))
2088 for_each_engine(data
.engine
, gt
, id
) {
2089 if (!intel_engine_has_preemption(data
.engine
))
2092 err
= __cancel_active0(&data
);
2096 err
= __cancel_active1(&data
);
2100 err
= __cancel_queued(&data
);
2104 err
= __cancel_hostile(&data
);
2111 preempt_client_fini(&data
.b
);
2113 preempt_client_fini(&data
.a
);
2118 igt_spinner_end(&data
.b
.spin
);
2119 igt_spinner_end(&data
.a
.spin
);
2120 intel_gt_set_wedged(gt
);
2124 static int live_suppress_self_preempt(void *arg
)
2126 struct intel_gt
*gt
= arg
;
2127 struct intel_engine_cs
*engine
;
2128 struct i915_sched_attr attr
= {
2129 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
)
2131 struct preempt_client a
, b
;
2132 enum intel_engine_id id
;
2136 * Verify that if a preemption request does not cause a change in
2137 * the current execution order, the preempt-to-idle injection is
2138 * skipped and that we do not accidentally apply it after the CS
2142 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2145 if (intel_uc_uses_guc_submission(>
->uc
))
2146 return 0; /* presume black blox */
2148 if (intel_vgpu_active(gt
->i915
))
2149 return 0; /* GVT forces single port & request submission */
2151 if (preempt_client_init(gt
, &a
))
2153 if (preempt_client_init(gt
, &b
))
2156 for_each_engine(engine
, gt
, id
) {
2157 struct i915_request
*rq_a
, *rq_b
;
2160 if (!intel_engine_has_preemption(engine
))
2163 if (igt_flush_test(gt
->i915
))
2166 intel_engine_pm_get(engine
);
2167 engine
->execlists
.preempt_hang
.count
= 0;
2169 rq_a
= spinner_create_request(&a
.spin
,
2173 err
= PTR_ERR(rq_a
);
2174 intel_engine_pm_put(engine
);
2178 i915_request_add(rq_a
);
2179 if (!igt_wait_for_spinner(&a
.spin
, rq_a
)) {
2180 pr_err("First client failed to start\n");
2181 intel_engine_pm_put(engine
);
2185 /* Keep postponing the timer to avoid premature slicing */
2186 mod_timer(&engine
->execlists
.timer
, jiffies
+ HZ
);
2187 for (depth
= 0; depth
< 8; depth
++) {
2188 rq_b
= spinner_create_request(&b
.spin
,
2192 err
= PTR_ERR(rq_b
);
2193 intel_engine_pm_put(engine
);
2196 i915_request_add(rq_b
);
2198 GEM_BUG_ON(i915_request_completed(rq_a
));
2199 engine
->schedule(rq_a
, &attr
);
2200 igt_spinner_end(&a
.spin
);
2202 if (!igt_wait_for_spinner(&b
.spin
, rq_b
)) {
2203 pr_err("Second client failed to start\n");
2204 intel_engine_pm_put(engine
);
2211 igt_spinner_end(&a
.spin
);
2213 if (engine
->execlists
.preempt_hang
.count
) {
2214 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2216 engine
->execlists
.preempt_hang
.count
,
2218 intel_engine_pm_put(engine
);
2223 intel_engine_pm_put(engine
);
2224 if (igt_flush_test(gt
->i915
))
2230 preempt_client_fini(&b
);
2232 preempt_client_fini(&a
);
2236 igt_spinner_end(&b
.spin
);
2237 igt_spinner_end(&a
.spin
);
2238 intel_gt_set_wedged(gt
);
2243 static int __i915_sw_fence_call
2244 dummy_notify(struct i915_sw_fence
*fence
, enum i915_sw_fence_notify state
)
2249 static struct i915_request
*dummy_request(struct intel_engine_cs
*engine
)
2251 struct i915_request
*rq
;
2253 rq
= kzalloc(sizeof(*rq
), GFP_KERNEL
);
2257 rq
->engine
= engine
;
2259 spin_lock_init(&rq
->lock
);
2260 INIT_LIST_HEAD(&rq
->fence
.cb_list
);
2261 rq
->fence
.lock
= &rq
->lock
;
2262 rq
->fence
.ops
= &i915_fence_ops
;
2264 i915_sched_node_init(&rq
->sched
);
2266 /* mark this request as permanently incomplete */
2267 rq
->fence
.seqno
= 1;
2268 BUILD_BUG_ON(sizeof(rq
->fence
.seqno
) != 8); /* upper 32b == 0 */
2269 rq
->hwsp_seqno
= (u32
*)&rq
->fence
.seqno
+ 1;
2270 GEM_BUG_ON(i915_request_completed(rq
));
2272 i915_sw_fence_init(&rq
->submit
, dummy_notify
);
2273 set_bit(I915_FENCE_FLAG_ACTIVE
, &rq
->fence
.flags
);
2275 spin_lock_init(&rq
->lock
);
2276 rq
->fence
.lock
= &rq
->lock
;
2277 INIT_LIST_HEAD(&rq
->fence
.cb_list
);
2282 static void dummy_request_free(struct i915_request
*dummy
)
2284 /* We have to fake the CS interrupt to kick the next request */
2285 i915_sw_fence_commit(&dummy
->submit
);
2287 i915_request_mark_complete(dummy
);
2288 dma_fence_signal(&dummy
->fence
);
2290 i915_sched_node_fini(&dummy
->sched
);
2291 i915_sw_fence_fini(&dummy
->submit
);
2293 dma_fence_free(&dummy
->fence
);
2296 static int live_suppress_wait_preempt(void *arg
)
2298 struct intel_gt
*gt
= arg
;
2299 struct preempt_client client
[4];
2300 struct i915_request
*rq
[ARRAY_SIZE(client
)] = {};
2301 struct intel_engine_cs
*engine
;
2302 enum intel_engine_id id
;
2307 * Waiters are given a little priority nudge, but not enough
2308 * to actually cause any preemption. Double check that we do
2309 * not needlessly generate preempt-to-idle cycles.
2312 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2315 if (preempt_client_init(gt
, &client
[0])) /* ELSP[0] */
2317 if (preempt_client_init(gt
, &client
[1])) /* ELSP[1] */
2319 if (preempt_client_init(gt
, &client
[2])) /* head of queue */
2321 if (preempt_client_init(gt
, &client
[3])) /* bystander */
2324 for_each_engine(engine
, gt
, id
) {
2327 if (!intel_engine_has_preemption(engine
))
2330 if (!engine
->emit_init_breadcrumb
)
2333 for (depth
= 0; depth
< ARRAY_SIZE(client
); depth
++) {
2334 struct i915_request
*dummy
;
2336 engine
->execlists
.preempt_hang
.count
= 0;
2338 dummy
= dummy_request(engine
);
2342 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
2343 struct i915_request
*this;
2345 this = spinner_create_request(&client
[i
].spin
,
2346 client
[i
].ctx
, engine
,
2349 err
= PTR_ERR(this);
2353 /* Disable NEWCLIENT promotion */
2354 __i915_active_fence_set(&i915_request_timeline(this)->last_request
,
2357 rq
[i
] = i915_request_get(this);
2358 i915_request_add(this);
2361 dummy_request_free(dummy
);
2363 GEM_BUG_ON(i915_request_completed(rq
[0]));
2364 if (!igt_wait_for_spinner(&client
[0].spin
, rq
[0])) {
2365 pr_err("%s: First client failed to start\n",
2369 GEM_BUG_ON(!i915_request_started(rq
[0]));
2371 if (i915_request_wait(rq
[depth
],
2374 pr_err("%s: Waiter depth:%d completed!\n",
2375 engine
->name
, depth
);
2379 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
2380 igt_spinner_end(&client
[i
].spin
);
2381 i915_request_put(rq
[i
]);
2385 if (igt_flush_test(gt
->i915
))
2388 if (engine
->execlists
.preempt_hang
.count
) {
2389 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2391 engine
->execlists
.preempt_hang
.count
,
2401 preempt_client_fini(&client
[3]);
2403 preempt_client_fini(&client
[2]);
2405 preempt_client_fini(&client
[1]);
2407 preempt_client_fini(&client
[0]);
2411 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
2412 igt_spinner_end(&client
[i
].spin
);
2413 i915_request_put(rq
[i
]);
2415 intel_gt_set_wedged(gt
);
2420 static int live_chain_preempt(void *arg
)
2422 struct intel_gt
*gt
= arg
;
2423 struct intel_engine_cs
*engine
;
2424 struct preempt_client hi
, lo
;
2425 enum intel_engine_id id
;
2429 * Build a chain AB...BA between two contexts (A, B) and request
2430 * preemption of the last request. It should then complete before
2431 * the previously submitted spinner in B.
2434 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2437 if (preempt_client_init(gt
, &hi
))
2440 if (preempt_client_init(gt
, &lo
))
2443 for_each_engine(engine
, gt
, id
) {
2444 struct i915_sched_attr attr
= {
2445 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
),
2447 struct igt_live_test t
;
2448 struct i915_request
*rq
;
2449 int ring_size
, count
, i
;
2451 if (!intel_engine_has_preemption(engine
))
2454 rq
= spinner_create_request(&lo
.spin
,
2460 i915_request_get(rq
);
2461 i915_request_add(rq
);
2463 ring_size
= rq
->wa_tail
- rq
->head
;
2465 ring_size
+= rq
->ring
->size
;
2466 ring_size
= rq
->ring
->size
/ ring_size
;
2467 pr_debug("%s(%s): Using maximum of %d requests\n",
2468 __func__
, engine
->name
, ring_size
);
2470 igt_spinner_end(&lo
.spin
);
2471 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0) {
2472 pr_err("Timed out waiting to flush %s\n", engine
->name
);
2473 i915_request_put(rq
);
2476 i915_request_put(rq
);
2478 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
2483 for_each_prime_number_from(count
, 1, ring_size
) {
2484 rq
= spinner_create_request(&hi
.spin
,
2489 i915_request_add(rq
);
2490 if (!igt_wait_for_spinner(&hi
.spin
, rq
))
2493 rq
= spinner_create_request(&lo
.spin
,
2498 i915_request_add(rq
);
2500 for (i
= 0; i
< count
; i
++) {
2501 rq
= igt_request_alloc(lo
.ctx
, engine
);
2504 i915_request_add(rq
);
2507 rq
= igt_request_alloc(hi
.ctx
, engine
);
2511 i915_request_get(rq
);
2512 i915_request_add(rq
);
2513 engine
->schedule(rq
, &attr
);
2515 igt_spinner_end(&hi
.spin
);
2516 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2517 struct drm_printer p
=
2518 drm_info_printer(gt
->i915
->drm
.dev
);
2520 pr_err("Failed to preempt over chain of %d\n",
2522 intel_engine_dump(engine
, &p
,
2523 "%s\n", engine
->name
);
2524 i915_request_put(rq
);
2527 igt_spinner_end(&lo
.spin
);
2528 i915_request_put(rq
);
2530 rq
= igt_request_alloc(lo
.ctx
, engine
);
2534 i915_request_get(rq
);
2535 i915_request_add(rq
);
2537 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2538 struct drm_printer p
=
2539 drm_info_printer(gt
->i915
->drm
.dev
);
2541 pr_err("Failed to flush low priority chain of %d requests\n",
2543 intel_engine_dump(engine
, &p
,
2544 "%s\n", engine
->name
);
2546 i915_request_put(rq
);
2549 i915_request_put(rq
);
2552 if (igt_live_test_end(&t
)) {
2560 preempt_client_fini(&lo
);
2562 preempt_client_fini(&hi
);
2566 igt_spinner_end(&hi
.spin
);
2567 igt_spinner_end(&lo
.spin
);
2568 intel_gt_set_wedged(gt
);
2573 static int create_gang(struct intel_engine_cs
*engine
,
2574 struct i915_request
**prev
)
2576 struct drm_i915_gem_object
*obj
;
2577 struct intel_context
*ce
;
2578 struct i915_request
*rq
;
2579 struct i915_vma
*vma
;
2583 ce
= intel_context_create(engine
);
2587 obj
= i915_gem_object_create_internal(engine
->i915
, 4096);
2593 vma
= i915_vma_instance(obj
, ce
->vm
, NULL
);
2599 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
2603 cs
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
2607 /* Semaphore target: spin until zero */
2608 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
2610 *cs
++ = MI_SEMAPHORE_WAIT
|
2612 MI_SEMAPHORE_SAD_EQ_SDD
;
2614 *cs
++ = lower_32_bits(vma
->node
.start
);
2615 *cs
++ = upper_32_bits(vma
->node
.start
);
2618 u64 offset
= (*prev
)->batch
->node
.start
;
2620 /* Terminate the spinner in the next lower priority batch. */
2621 *cs
++ = MI_STORE_DWORD_IMM_GEN4
;
2622 *cs
++ = lower_32_bits(offset
);
2623 *cs
++ = upper_32_bits(offset
);
2627 *cs
++ = MI_BATCH_BUFFER_END
;
2628 i915_gem_object_flush_map(obj
);
2629 i915_gem_object_unpin_map(obj
);
2631 rq
= intel_context_create_request(ce
);
2636 i915_request_get(rq
);
2639 err
= i915_request_await_object(rq
, vma
->obj
, false);
2641 err
= i915_vma_move_to_active(vma
, rq
, 0);
2643 err
= rq
->engine
->emit_bb_start(rq
,
2646 i915_vma_unlock(vma
);
2647 i915_request_add(rq
);
2651 i915_gem_object_put(obj
);
2652 intel_context_put(ce
);
2654 rq
->client_link
.next
= &(*prev
)->client_link
;
2659 i915_request_put(rq
);
2661 i915_gem_object_put(obj
);
2663 intel_context_put(ce
);
2667 static int live_preempt_gang(void *arg
)
2669 struct intel_gt
*gt
= arg
;
2670 struct intel_engine_cs
*engine
;
2671 enum intel_engine_id id
;
2673 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2677 * Build as long a chain of preempters as we can, with each
2678 * request higher priority than the last. Once we are ready, we release
2679 * the last batch which then precolates down the chain, each releasing
2680 * the next oldest in turn. The intent is to simply push as hard as we
2681 * can with the number of preemptions, trying to exceed narrow HW
2682 * limits. At a minimum, we insist that we can sort all the user
2683 * high priority levels into execution order.
2686 for_each_engine(engine
, gt
, id
) {
2687 struct i915_request
*rq
= NULL
;
2688 struct igt_live_test t
;
2689 IGT_TIMEOUT(end_time
);
2694 if (!intel_engine_has_preemption(engine
))
2697 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
))
2701 struct i915_sched_attr attr
= {
2702 .priority
= I915_USER_PRIORITY(prio
++),
2705 err
= create_gang(engine
, &rq
);
2709 /* Submit each spinner at increasing priority */
2710 engine
->schedule(rq
, &attr
);
2712 if (prio
<= I915_PRIORITY_MAX
)
2715 if (prio
> (INT_MAX
>> I915_USER_PRIORITY_SHIFT
))
2718 if (__igt_timeout(end_time
, NULL
))
2721 pr_debug("%s: Preempt chain of %d requests\n",
2722 engine
->name
, prio
);
2725 * Such that the last spinner is the highest priority and
2726 * should execute first. When that spinner completes,
2727 * it will terminate the next lowest spinner until there
2728 * are no more spinners and the gang is complete.
2730 cs
= i915_gem_object_pin_map(rq
->batch
->obj
, I915_MAP_WC
);
2733 i915_gem_object_unpin_map(rq
->batch
->obj
);
2736 intel_gt_set_wedged(gt
);
2739 while (rq
) { /* wait for each rq from highest to lowest prio */
2740 struct i915_request
*n
=
2741 list_next_entry(rq
, client_link
);
2743 if (err
== 0 && i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2744 struct drm_printer p
=
2745 drm_info_printer(engine
->i915
->drm
.dev
);
2747 pr_err("Failed to flush chain of %d requests, at %d\n",
2748 prio
, rq_prio(rq
) >> I915_USER_PRIORITY_SHIFT
);
2749 intel_engine_dump(engine
, &p
,
2750 "%s\n", engine
->name
);
2755 i915_request_put(rq
);
2759 if (igt_live_test_end(&t
))
2768 static int live_preempt_timeout(void *arg
)
2770 struct intel_gt
*gt
= arg
;
2771 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
2772 struct igt_spinner spin_lo
;
2773 struct intel_engine_cs
*engine
;
2774 enum intel_engine_id id
;
2778 * Check that we force preemption to occur by cancelling the previous
2779 * context if it refuses to yield the GPU.
2781 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT
))
2784 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2787 if (!intel_has_reset_engine(gt
))
2790 if (igt_spinner_init(&spin_lo
, gt
))
2793 ctx_hi
= kernel_context(gt
->i915
);
2796 ctx_hi
->sched
.priority
=
2797 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
2799 ctx_lo
= kernel_context(gt
->i915
);
2802 ctx_lo
->sched
.priority
=
2803 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
2805 for_each_engine(engine
, gt
, id
) {
2806 unsigned long saved_timeout
;
2807 struct i915_request
*rq
;
2809 if (!intel_engine_has_preemption(engine
))
2812 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
2813 MI_NOOP
); /* preemption disabled */
2819 i915_request_add(rq
);
2820 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
2821 intel_gt_set_wedged(gt
);
2826 rq
= igt_request_alloc(ctx_hi
, engine
);
2828 igt_spinner_end(&spin_lo
);
2833 /* Flush the previous CS ack before changing timeouts */
2834 while (READ_ONCE(engine
->execlists
.pending
[0]))
2837 saved_timeout
= engine
->props
.preempt_timeout_ms
;
2838 engine
->props
.preempt_timeout_ms
= 1; /* in ms, -> 1 jiffie */
2840 i915_request_get(rq
);
2841 i915_request_add(rq
);
2843 intel_engine_flush_submission(engine
);
2844 engine
->props
.preempt_timeout_ms
= saved_timeout
;
2846 if (i915_request_wait(rq
, 0, HZ
/ 10) < 0) {
2847 intel_gt_set_wedged(gt
);
2848 i915_request_put(rq
);
2853 igt_spinner_end(&spin_lo
);
2854 i915_request_put(rq
);
2859 kernel_context_close(ctx_lo
);
2861 kernel_context_close(ctx_hi
);
2863 igt_spinner_fini(&spin_lo
);
2867 static int random_range(struct rnd_state
*rnd
, int min
, int max
)
2869 return i915_prandom_u32_max_state(max
- min
, rnd
) + min
;
2872 static int random_priority(struct rnd_state
*rnd
)
2874 return random_range(rnd
, I915_PRIORITY_MIN
, I915_PRIORITY_MAX
);
2877 struct preempt_smoke
{
2878 struct intel_gt
*gt
;
2879 struct i915_gem_context
**contexts
;
2880 struct intel_engine_cs
*engine
;
2881 struct drm_i915_gem_object
*batch
;
2882 unsigned int ncontext
;
2883 struct rnd_state prng
;
2884 unsigned long count
;
2887 static struct i915_gem_context
*smoke_context(struct preempt_smoke
*smoke
)
2889 return smoke
->contexts
[i915_prandom_u32_max_state(smoke
->ncontext
,
2893 static int smoke_submit(struct preempt_smoke
*smoke
,
2894 struct i915_gem_context
*ctx
, int prio
,
2895 struct drm_i915_gem_object
*batch
)
2897 struct i915_request
*rq
;
2898 struct i915_vma
*vma
= NULL
;
2902 struct i915_address_space
*vm
;
2904 vm
= i915_gem_context_get_vm_rcu(ctx
);
2905 vma
= i915_vma_instance(batch
, vm
, NULL
);
2908 return PTR_ERR(vma
);
2910 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
2915 ctx
->sched
.priority
= prio
;
2917 rq
= igt_request_alloc(ctx
, smoke
->engine
);
2925 err
= i915_request_await_object(rq
, vma
->obj
, false);
2927 err
= i915_vma_move_to_active(vma
, rq
, 0);
2929 err
= rq
->engine
->emit_bb_start(rq
,
2932 i915_vma_unlock(vma
);
2935 i915_request_add(rq
);
2939 i915_vma_unpin(vma
);
2944 static int smoke_crescendo_thread(void *arg
)
2946 struct preempt_smoke
*smoke
= arg
;
2947 IGT_TIMEOUT(end_time
);
2948 unsigned long count
;
2952 struct i915_gem_context
*ctx
= smoke_context(smoke
);
2955 err
= smoke_submit(smoke
,
2956 ctx
, count
% I915_PRIORITY_MAX
,
2962 } while (!__igt_timeout(end_time
, NULL
));
2964 smoke
->count
= count
;
2968 static int smoke_crescendo(struct preempt_smoke
*smoke
, unsigned int flags
)
2969 #define BATCH BIT(0)
2971 struct task_struct
*tsk
[I915_NUM_ENGINES
] = {};
2972 struct preempt_smoke arg
[I915_NUM_ENGINES
];
2973 struct intel_engine_cs
*engine
;
2974 enum intel_engine_id id
;
2975 unsigned long count
;
2978 for_each_engine(engine
, smoke
->gt
, id
) {
2980 arg
[id
].engine
= engine
;
2981 if (!(flags
& BATCH
))
2982 arg
[id
].batch
= NULL
;
2985 tsk
[id
] = kthread_run(smoke_crescendo_thread
, &arg
,
2986 "igt/smoke:%d", id
);
2987 if (IS_ERR(tsk
[id
])) {
2988 err
= PTR_ERR(tsk
[id
]);
2991 get_task_struct(tsk
[id
]);
2994 yield(); /* start all threads before we kthread_stop() */
2997 for_each_engine(engine
, smoke
->gt
, id
) {
3000 if (IS_ERR_OR_NULL(tsk
[id
]))
3003 status
= kthread_stop(tsk
[id
]);
3007 count
+= arg
[id
].count
;
3009 put_task_struct(tsk
[id
]);
3012 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3014 RUNTIME_INFO(smoke
->gt
->i915
)->num_engines
, smoke
->ncontext
);
3018 static int smoke_random(struct preempt_smoke
*smoke
, unsigned int flags
)
3020 enum intel_engine_id id
;
3021 IGT_TIMEOUT(end_time
);
3022 unsigned long count
;
3026 for_each_engine(smoke
->engine
, smoke
->gt
, id
) {
3027 struct i915_gem_context
*ctx
= smoke_context(smoke
);
3030 err
= smoke_submit(smoke
,
3031 ctx
, random_priority(&smoke
->prng
),
3032 flags
& BATCH
? smoke
->batch
: NULL
);
3038 } while (!__igt_timeout(end_time
, NULL
));
3040 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3042 RUNTIME_INFO(smoke
->gt
->i915
)->num_engines
, smoke
->ncontext
);
3046 static int live_preempt_smoke(void *arg
)
3048 struct preempt_smoke smoke
= {
3050 .prng
= I915_RND_STATE_INITIALIZER(i915_selftest
.random_seed
),
3053 const unsigned int phase
[] = { 0, BATCH
};
3054 struct igt_live_test t
;
3059 if (!HAS_LOGICAL_RING_PREEMPTION(smoke
.gt
->i915
))
3062 smoke
.contexts
= kmalloc_array(smoke
.ncontext
,
3063 sizeof(*smoke
.contexts
),
3065 if (!smoke
.contexts
)
3069 i915_gem_object_create_internal(smoke
.gt
->i915
, PAGE_SIZE
);
3070 if (IS_ERR(smoke
.batch
)) {
3071 err
= PTR_ERR(smoke
.batch
);
3075 cs
= i915_gem_object_pin_map(smoke
.batch
, I915_MAP_WB
);
3080 for (n
= 0; n
< PAGE_SIZE
/ sizeof(*cs
) - 1; n
++)
3081 cs
[n
] = MI_ARB_CHECK
;
3082 cs
[n
] = MI_BATCH_BUFFER_END
;
3083 i915_gem_object_flush_map(smoke
.batch
);
3084 i915_gem_object_unpin_map(smoke
.batch
);
3086 if (igt_live_test_begin(&t
, smoke
.gt
->i915
, __func__
, "all")) {
3091 for (n
= 0; n
< smoke
.ncontext
; n
++) {
3092 smoke
.contexts
[n
] = kernel_context(smoke
.gt
->i915
);
3093 if (!smoke
.contexts
[n
])
3097 for (n
= 0; n
< ARRAY_SIZE(phase
); n
++) {
3098 err
= smoke_crescendo(&smoke
, phase
[n
]);
3102 err
= smoke_random(&smoke
, phase
[n
]);
3108 if (igt_live_test_end(&t
))
3111 for (n
= 0; n
< smoke
.ncontext
; n
++) {
3112 if (!smoke
.contexts
[n
])
3114 kernel_context_close(smoke
.contexts
[n
]);
3118 i915_gem_object_put(smoke
.batch
);
3120 kfree(smoke
.contexts
);
3125 static int nop_virtual_engine(struct intel_gt
*gt
,
3126 struct intel_engine_cs
**siblings
,
3127 unsigned int nsibling
,
3130 #define CHAIN BIT(0)
3132 IGT_TIMEOUT(end_time
);
3133 struct i915_request
*request
[16] = {};
3134 struct intel_context
*ve
[16];
3135 unsigned long n
, prime
, nc
;
3136 struct igt_live_test t
;
3137 ktime_t times
[2] = {};
3140 GEM_BUG_ON(!nctx
|| nctx
> ARRAY_SIZE(ve
));
3142 for (n
= 0; n
< nctx
; n
++) {
3143 ve
[n
] = intel_execlists_create_virtual(siblings
, nsibling
);
3144 if (IS_ERR(ve
[n
])) {
3145 err
= PTR_ERR(ve
[n
]);
3150 err
= intel_context_pin(ve
[n
]);
3152 intel_context_put(ve
[n
]);
3158 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
[0]->engine
->name
);
3162 for_each_prime_number_from(prime
, 1, 8192) {
3163 times
[1] = ktime_get_raw();
3165 if (flags
& CHAIN
) {
3166 for (nc
= 0; nc
< nctx
; nc
++) {
3167 for (n
= 0; n
< prime
; n
++) {
3168 struct i915_request
*rq
;
3170 rq
= i915_request_create(ve
[nc
]);
3177 i915_request_put(request
[nc
]);
3178 request
[nc
] = i915_request_get(rq
);
3179 i915_request_add(rq
);
3183 for (n
= 0; n
< prime
; n
++) {
3184 for (nc
= 0; nc
< nctx
; nc
++) {
3185 struct i915_request
*rq
;
3187 rq
= i915_request_create(ve
[nc
]);
3194 i915_request_put(request
[nc
]);
3195 request
[nc
] = i915_request_get(rq
);
3196 i915_request_add(rq
);
3201 for (nc
= 0; nc
< nctx
; nc
++) {
3202 if (i915_request_wait(request
[nc
], 0, HZ
/ 10) < 0) {
3203 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3204 __func__
, ve
[0]->engine
->name
,
3205 request
[nc
]->fence
.context
,
3206 request
[nc
]->fence
.seqno
);
3208 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3209 __func__
, ve
[0]->engine
->name
,
3210 request
[nc
]->fence
.context
,
3211 request
[nc
]->fence
.seqno
);
3213 intel_gt_set_wedged(gt
);
3218 times
[1] = ktime_sub(ktime_get_raw(), times
[1]);
3220 times
[0] = times
[1];
3222 for (nc
= 0; nc
< nctx
; nc
++) {
3223 i915_request_put(request
[nc
]);
3227 if (__igt_timeout(end_time
, NULL
))
3231 err
= igt_live_test_end(&t
);
3235 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3236 nctx
, ve
[0]->engine
->name
, ktime_to_ns(times
[0]),
3237 prime
, div64_u64(ktime_to_ns(times
[1]), prime
));
3240 if (igt_flush_test(gt
->i915
))
3243 for (nc
= 0; nc
< nctx
; nc
++) {
3244 i915_request_put(request
[nc
]);
3245 intel_context_unpin(ve
[nc
]);
3246 intel_context_put(ve
[nc
]);
3251 static int live_virtual_engine(void *arg
)
3253 struct intel_gt
*gt
= arg
;
3254 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3255 struct intel_engine_cs
*engine
;
3256 enum intel_engine_id id
;
3257 unsigned int class, inst
;
3260 if (intel_uc_uses_guc_submission(>
->uc
))
3263 for_each_engine(engine
, gt
, id
) {
3264 err
= nop_virtual_engine(gt
, &engine
, 1, 1, 0);
3266 pr_err("Failed to wrap engine %s: err=%d\n",
3272 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3276 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3277 if (!gt
->engine_class
[class][inst
])
3280 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3285 for (n
= 1; n
<= nsibling
+ 1; n
++) {
3286 err
= nop_virtual_engine(gt
, siblings
, nsibling
,
3292 err
= nop_virtual_engine(gt
, siblings
, nsibling
, n
, CHAIN
);
3300 static int mask_virtual_engine(struct intel_gt
*gt
,
3301 struct intel_engine_cs
**siblings
,
3302 unsigned int nsibling
)
3304 struct i915_request
*request
[MAX_ENGINE_INSTANCE
+ 1];
3305 struct intel_context
*ve
;
3306 struct igt_live_test t
;
3311 * Check that by setting the execution mask on a request, we can
3312 * restrict it to our desired engine within the virtual engine.
3315 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
3321 err
= intel_context_pin(ve
);
3325 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
->engine
->name
);
3329 for (n
= 0; n
< nsibling
; n
++) {
3330 request
[n
] = i915_request_create(ve
);
3331 if (IS_ERR(request
[n
])) {
3332 err
= PTR_ERR(request
[n
]);
3337 /* Reverse order as it's more likely to be unnatural */
3338 request
[n
]->execution_mask
= siblings
[nsibling
- n
- 1]->mask
;
3340 i915_request_get(request
[n
]);
3341 i915_request_add(request
[n
]);
3344 for (n
= 0; n
< nsibling
; n
++) {
3345 if (i915_request_wait(request
[n
], 0, HZ
/ 10) < 0) {
3346 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3347 __func__
, ve
->engine
->name
,
3348 request
[n
]->fence
.context
,
3349 request
[n
]->fence
.seqno
);
3351 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3352 __func__
, ve
->engine
->name
,
3353 request
[n
]->fence
.context
,
3354 request
[n
]->fence
.seqno
);
3356 intel_gt_set_wedged(gt
);
3361 if (request
[n
]->engine
!= siblings
[nsibling
- n
- 1]) {
3362 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3363 request
[n
]->engine
->name
,
3364 siblings
[nsibling
- n
- 1]->name
);
3370 err
= igt_live_test_end(&t
);
3372 if (igt_flush_test(gt
->i915
))
3375 for (n
= 0; n
< nsibling
; n
++)
3376 i915_request_put(request
[n
]);
3379 intel_context_unpin(ve
);
3381 intel_context_put(ve
);
3386 static int live_virtual_mask(void *arg
)
3388 struct intel_gt
*gt
= arg
;
3389 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3390 unsigned int class, inst
;
3393 if (intel_uc_uses_guc_submission(>
->uc
))
3396 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3397 unsigned int nsibling
;
3400 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3401 if (!gt
->engine_class
[class][inst
])
3404 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3409 err
= mask_virtual_engine(gt
, siblings
, nsibling
);
3417 static int preserved_virtual_engine(struct intel_gt
*gt
,
3418 struct intel_engine_cs
**siblings
,
3419 unsigned int nsibling
)
3421 struct i915_request
*last
= NULL
;
3422 struct intel_context
*ve
;
3423 struct i915_vma
*scratch
;
3424 struct igt_live_test t
;
3429 scratch
= create_scratch(siblings
[0]->gt
);
3430 if (IS_ERR(scratch
))
3431 return PTR_ERR(scratch
);
3433 err
= i915_vma_sync(scratch
);
3437 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
3443 err
= intel_context_pin(ve
);
3447 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
->engine
->name
);
3451 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
3452 struct intel_engine_cs
*engine
= siblings
[n
% nsibling
];
3453 struct i915_request
*rq
;
3455 rq
= i915_request_create(ve
);
3461 i915_request_put(last
);
3462 last
= i915_request_get(rq
);
3464 cs
= intel_ring_begin(rq
, 8);
3466 i915_request_add(rq
);
3471 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
3472 *cs
++ = CS_GPR(engine
, n
);
3473 *cs
++ = i915_ggtt_offset(scratch
) + n
* sizeof(u32
);
3476 *cs
++ = MI_LOAD_REGISTER_IMM(1);
3477 *cs
++ = CS_GPR(engine
, (n
+ 1) % NUM_GPR_DW
);
3481 intel_ring_advance(rq
, cs
);
3483 /* Restrict this request to run on a particular engine */
3484 rq
->execution_mask
= engine
->mask
;
3485 i915_request_add(rq
);
3488 if (i915_request_wait(last
, 0, HZ
/ 5) < 0) {
3493 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
3499 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
3501 pr_err("Incorrect value[%d] found for GPR[%d]\n",
3508 i915_gem_object_unpin_map(scratch
->obj
);
3511 if (igt_live_test_end(&t
))
3513 i915_request_put(last
);
3515 intel_context_unpin(ve
);
3517 intel_context_put(ve
);
3519 i915_vma_unpin_and_release(&scratch
, 0);
3523 static int live_virtual_preserved(void *arg
)
3525 struct intel_gt
*gt
= arg
;
3526 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3527 unsigned int class, inst
;
3530 * Check that the context image retains non-privileged (user) registers
3531 * from one engine to the next. For this we check that the CS_GPR
3535 if (intel_uc_uses_guc_submission(>
->uc
))
3538 /* As we use CS_GPR we cannot run before they existed on all engines. */
3539 if (INTEL_GEN(gt
->i915
) < 9)
3542 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3546 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3547 if (!gt
->engine_class
[class][inst
])
3550 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3555 err
= preserved_virtual_engine(gt
, siblings
, nsibling
);
3563 static int bond_virtual_engine(struct intel_gt
*gt
,
3565 struct intel_engine_cs
**siblings
,
3566 unsigned int nsibling
,
3568 #define BOND_SCHEDULE BIT(0)
3570 struct intel_engine_cs
*master
;
3571 struct i915_request
*rq
[16];
3572 enum intel_engine_id id
;
3573 struct igt_spinner spin
;
3578 * A set of bonded requests is intended to be run concurrently
3579 * across a number of engines. We use one request per-engine
3580 * and a magic fence to schedule each of the bonded requests
3581 * at the same time. A consequence of our current scheduler is that
3582 * we only move requests to the HW ready queue when the request
3583 * becomes ready, that is when all of its prerequisite fences have
3584 * been signaled. As one of those fences is the master submit fence,
3585 * there is a delay on all secondary fences as the HW may be
3586 * currently busy. Equally, as all the requests are independent,
3587 * they may have other fences that delay individual request
3588 * submission to HW. Ergo, we do not guarantee that all requests are
3589 * immediately submitted to HW at the same time, just that if the
3590 * rules are abided by, they are ready at the same time as the
3591 * first is submitted. Userspace can embed semaphores in its batch
3592 * to ensure parallel execution of its phases as it requires.
3593 * Though naturally it gets requested that perhaps the scheduler should
3594 * take care of parallel execution, even across preemption events on
3595 * different HW. (The proper answer is of course "lalalala".)
3597 * With the submit-fence, we have identified three possible phases
3598 * of synchronisation depending on the master fence: queued (not
3599 * ready), executing, and signaled. The first two are quite simple
3600 * and checked below. However, the signaled master fence handling is
3601 * contentious. Currently we do not distinguish between a signaled
3602 * fence and an expired fence, as once signaled it does not convey
3603 * any information about the previous execution. It may even be freed
3604 * and hence checking later it may not exist at all. Ergo we currently
3605 * do not apply the bonding constraint for an already signaled fence,
3606 * as our expectation is that it should not constrain the secondaries
3607 * and is outside of the scope of the bonded request API (i.e. all
3608 * userspace requests are meant to be running in parallel). As
3609 * it imposes no constraint, and is effectively a no-op, we do not
3610 * check below as normal execution flows are checked extensively above.
3612 * XXX Is the degenerate handling of signaled submit fences the
3613 * expected behaviour for userpace?
3616 GEM_BUG_ON(nsibling
>= ARRAY_SIZE(rq
) - 1);
3618 if (igt_spinner_init(&spin
, gt
))
3622 rq
[0] = ERR_PTR(-ENOMEM
);
3623 for_each_engine(master
, gt
, id
) {
3624 struct i915_sw_fence fence
= {};
3625 struct intel_context
*ce
;
3627 if (master
->class == class)
3630 ce
= intel_context_create(master
);
3636 memset_p((void *)rq
, ERR_PTR(-EINVAL
), ARRAY_SIZE(rq
));
3638 rq
[0] = igt_spinner_create_request(&spin
, ce
, MI_NOOP
);
3639 intel_context_put(ce
);
3640 if (IS_ERR(rq
[0])) {
3641 err
= PTR_ERR(rq
[0]);
3644 i915_request_get(rq
[0]);
3646 if (flags
& BOND_SCHEDULE
) {
3647 onstack_fence_init(&fence
);
3648 err
= i915_sw_fence_await_sw_fence_gfp(&rq
[0]->submit
,
3653 i915_request_add(rq
[0]);
3657 if (!(flags
& BOND_SCHEDULE
) &&
3658 !igt_wait_for_spinner(&spin
, rq
[0])) {
3663 for (n
= 0; n
< nsibling
; n
++) {
3664 struct intel_context
*ve
;
3666 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
3669 onstack_fence_fini(&fence
);
3673 err
= intel_virtual_engine_attach_bond(ve
->engine
,
3677 intel_context_put(ve
);
3678 onstack_fence_fini(&fence
);
3682 err
= intel_context_pin(ve
);
3683 intel_context_put(ve
);
3685 onstack_fence_fini(&fence
);
3689 rq
[n
+ 1] = i915_request_create(ve
);
3690 intel_context_unpin(ve
);
3691 if (IS_ERR(rq
[n
+ 1])) {
3692 err
= PTR_ERR(rq
[n
+ 1]);
3693 onstack_fence_fini(&fence
);
3696 i915_request_get(rq
[n
+ 1]);
3698 err
= i915_request_await_execution(rq
[n
+ 1],
3700 ve
->engine
->bond_execute
);
3701 i915_request_add(rq
[n
+ 1]);
3703 onstack_fence_fini(&fence
);
3707 onstack_fence_fini(&fence
);
3708 intel_engine_flush_submission(master
);
3709 igt_spinner_end(&spin
);
3711 if (i915_request_wait(rq
[0], 0, HZ
/ 10) < 0) {
3712 pr_err("Master request did not execute (on %s)!\n",
3713 rq
[0]->engine
->name
);
3718 for (n
= 0; n
< nsibling
; n
++) {
3719 if (i915_request_wait(rq
[n
+ 1], 0,
3720 MAX_SCHEDULE_TIMEOUT
) < 0) {
3725 if (rq
[n
+ 1]->engine
!= siblings
[n
]) {
3726 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3728 rq
[n
+ 1]->engine
->name
,
3729 rq
[0]->engine
->name
);
3735 for (n
= 0; !IS_ERR(rq
[n
]); n
++)
3736 i915_request_put(rq
[n
]);
3737 rq
[0] = ERR_PTR(-ENOMEM
);
3741 for (n
= 0; !IS_ERR(rq
[n
]); n
++)
3742 i915_request_put(rq
[n
]);
3743 if (igt_flush_test(gt
->i915
))
3746 igt_spinner_fini(&spin
);
3750 static int live_virtual_bond(void *arg
)
3752 static const struct phase
{
3757 { "schedule", BOND_SCHEDULE
},
3760 struct intel_gt
*gt
= arg
;
3761 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3762 unsigned int class, inst
;
3765 if (intel_uc_uses_guc_submission(>
->uc
))
3768 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3769 const struct phase
*p
;
3773 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3774 if (!gt
->engine_class
[class][inst
])
3777 GEM_BUG_ON(nsibling
== ARRAY_SIZE(siblings
));
3778 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3783 for (p
= phases
; p
->name
; p
++) {
3784 err
= bond_virtual_engine(gt
,
3785 class, siblings
, nsibling
,
3788 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3789 __func__
, p
->name
, class, nsibling
, err
);
3798 static int reset_virtual_engine(struct intel_gt
*gt
,
3799 struct intel_engine_cs
**siblings
,
3800 unsigned int nsibling
)
3802 struct intel_engine_cs
*engine
;
3803 struct intel_context
*ve
;
3804 unsigned long *heartbeat
;
3805 struct igt_spinner spin
;
3806 struct i915_request
*rq
;
3811 * In order to support offline error capture for fast preempt reset,
3812 * we need to decouple the guilty request and ensure that it and its
3813 * descendents are not executed while the capture is in progress.
3816 heartbeat
= kmalloc_array(nsibling
, sizeof(*heartbeat
), GFP_KERNEL
);
3820 if (igt_spinner_init(&spin
, gt
)) {
3825 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
3831 for (n
= 0; n
< nsibling
; n
++)
3832 engine_heartbeat_disable(siblings
[n
], &heartbeat
[n
]);
3834 rq
= igt_spinner_create_request(&spin
, ve
, MI_ARB_CHECK
);
3839 i915_request_add(rq
);
3841 if (!igt_wait_for_spinner(&spin
, rq
)) {
3842 intel_gt_set_wedged(gt
);
3847 engine
= rq
->engine
;
3848 GEM_BUG_ON(engine
== ve
->engine
);
3850 /* Take ownership of the reset and tasklet */
3851 if (test_and_set_bit(I915_RESET_ENGINE
+ engine
->id
,
3852 >
->reset
.flags
)) {
3853 intel_gt_set_wedged(gt
);
3857 tasklet_disable(&engine
->execlists
.tasklet
);
3859 engine
->execlists
.tasklet
.func(engine
->execlists
.tasklet
.data
);
3860 GEM_BUG_ON(execlists_active(&engine
->execlists
) != rq
);
3862 /* Fake a preemption event; failed of course */
3863 spin_lock_irq(&engine
->active
.lock
);
3864 __unwind_incomplete_requests(engine
);
3865 spin_unlock_irq(&engine
->active
.lock
);
3866 GEM_BUG_ON(rq
->engine
!= ve
->engine
);
3868 /* Reset the engine while keeping our active request on hold */
3869 execlists_hold(engine
, rq
);
3870 GEM_BUG_ON(!i915_request_on_hold(rq
));
3872 intel_engine_reset(engine
, NULL
);
3873 GEM_BUG_ON(rq
->fence
.error
!= -EIO
);
3875 /* Release our grasp on the engine, letting CS flow again */
3876 tasklet_enable(&engine
->execlists
.tasklet
);
3877 clear_and_wake_up_bit(I915_RESET_ENGINE
+ engine
->id
, >
->reset
.flags
);
3879 /* Check that we do not resubmit the held request */
3880 i915_request_get(rq
);
3881 if (!i915_request_wait(rq
, 0, HZ
/ 5)) {
3882 pr_err("%s: on hold request completed!\n",
3884 intel_gt_set_wedged(gt
);
3888 GEM_BUG_ON(!i915_request_on_hold(rq
));
3890 /* But is resubmitted on release */
3891 execlists_unhold(engine
, rq
);
3892 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
3893 pr_err("%s: held request did not complete!\n",
3895 intel_gt_set_wedged(gt
);
3900 i915_request_put(rq
);
3902 for (n
= 0; n
< nsibling
; n
++)
3903 engine_heartbeat_enable(siblings
[n
], heartbeat
[n
]);
3905 intel_context_put(ve
);
3907 igt_spinner_fini(&spin
);
3913 static int live_virtual_reset(void *arg
)
3915 struct intel_gt
*gt
= arg
;
3916 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3917 unsigned int class, inst
;
3920 * Check that we handle a reset event within a virtual engine.
3921 * Only the physical engine is reset, but we have to check the flow
3922 * of the virtual requests around the reset, and make sure it is not
3926 if (intel_uc_uses_guc_submission(>
->uc
))
3929 if (!intel_has_reset_engine(gt
))
3932 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3936 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3937 if (!gt
->engine_class
[class][inst
])
3940 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3945 err
= reset_virtual_engine(gt
, siblings
, nsibling
);
3953 int intel_execlists_live_selftests(struct drm_i915_private
*i915
)
3955 static const struct i915_subtest tests
[] = {
3956 SUBTEST(live_sanitycheck
),
3957 SUBTEST(live_unlite_switch
),
3958 SUBTEST(live_unlite_preempt
),
3959 SUBTEST(live_pin_rewind
),
3960 SUBTEST(live_hold_reset
),
3961 SUBTEST(live_error_interrupt
),
3962 SUBTEST(live_timeslice_preempt
),
3963 SUBTEST(live_timeslice_rewind
),
3964 SUBTEST(live_timeslice_queue
),
3965 SUBTEST(live_busywait_preempt
),
3966 SUBTEST(live_preempt
),
3967 SUBTEST(live_late_preempt
),
3968 SUBTEST(live_nopreempt
),
3969 SUBTEST(live_preempt_cancel
),
3970 SUBTEST(live_suppress_self_preempt
),
3971 SUBTEST(live_suppress_wait_preempt
),
3972 SUBTEST(live_chain_preempt
),
3973 SUBTEST(live_preempt_gang
),
3974 SUBTEST(live_preempt_timeout
),
3975 SUBTEST(live_preempt_smoke
),
3976 SUBTEST(live_virtual_engine
),
3977 SUBTEST(live_virtual_mask
),
3978 SUBTEST(live_virtual_preserved
),
3979 SUBTEST(live_virtual_bond
),
3980 SUBTEST(live_virtual_reset
),
3983 if (!HAS_EXECLISTS(i915
))
3986 if (intel_gt_is_wedged(&i915
->gt
))
3989 return intel_gt_live_subtests(tests
, &i915
->gt
);
3992 static void hexdump(const void *buf
, size_t len
)
3994 const size_t rowsize
= 8 * sizeof(u32
);
3995 const void *prev
= NULL
;
3999 for (pos
= 0; pos
< len
; pos
+= rowsize
) {
4002 if (prev
&& !memcmp(prev
, buf
+ pos
, rowsize
)) {
4010 WARN_ON_ONCE(hex_dump_to_buffer(buf
+ pos
, len
- pos
,
4011 rowsize
, sizeof(u32
),
4013 false) >= sizeof(line
));
4014 pr_info("[%04zx] %s\n", pos
, line
);
4021 static int emit_semaphore_signal(struct intel_context
*ce
, void *slot
)
4024 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
4025 offset_in_page(slot
);
4026 struct i915_request
*rq
;
4029 rq
= intel_context_create_request(ce
);
4033 cs
= intel_ring_begin(rq
, 4);
4035 i915_request_add(rq
);
4039 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
4044 intel_ring_advance(rq
, cs
);
4046 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
4047 i915_request_add(rq
);
4051 static int context_flush(struct intel_context
*ce
, long timeout
)
4053 struct i915_request
*rq
;
4054 struct dma_fence
*fence
;
4057 rq
= intel_engine_create_kernel_request(ce
->engine
);
4061 fence
= i915_active_fence_get(&ce
->timeline
->last_request
);
4063 i915_request_await_dma_fence(rq
, fence
);
4064 dma_fence_put(fence
);
4067 rq
= i915_request_get(rq
);
4068 i915_request_add(rq
);
4069 if (i915_request_wait(rq
, 0, timeout
) < 0)
4071 i915_request_put(rq
);
4073 rmb(); /* We know the request is written, make sure all state is too! */
4077 static int live_lrc_layout(void *arg
)
4079 struct intel_gt
*gt
= arg
;
4080 struct intel_engine_cs
*engine
;
4081 enum intel_engine_id id
;
4086 * Check the registers offsets we use to create the initial reg state
4087 * match the layout saved by HW.
4090 lrc
= kmalloc(PAGE_SIZE
, GFP_KERNEL
);
4095 for_each_engine(engine
, gt
, id
) {
4099 if (!engine
->default_state
)
4102 hw
= i915_gem_object_pin_map(engine
->default_state
,
4108 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
4110 execlists_init_reg_state(memset(lrc
, POISON_INUSE
, PAGE_SIZE
),
4111 engine
->kernel_context
,
4113 engine
->kernel_context
->ring
,
4126 pr_debug("%s: skipped instruction %x at dword %d\n",
4127 engine
->name
, lri
, dw
);
4132 if ((lri
& GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4133 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4134 engine
->name
, dw
, lri
);
4139 if (lrc
[dw
] != lri
) {
4140 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4141 engine
->name
, dw
, lri
, lrc
[dw
]);
4151 if (hw
[dw
] != lrc
[dw
]) {
4152 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4153 engine
->name
, dw
, hw
[dw
], lrc
[dw
]);
4159 * Skip over the actual register value as we
4160 * expect that to differ.
4165 } while ((lrc
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
4168 pr_info("%s: HW register image:\n", engine
->name
);
4169 hexdump(hw
, PAGE_SIZE
);
4171 pr_info("%s: SW register image:\n", engine
->name
);
4172 hexdump(lrc
, PAGE_SIZE
);
4175 i915_gem_object_unpin_map(engine
->default_state
);
4184 static int find_offset(const u32
*lri
, u32 offset
)
4188 for (i
= 0; i
< PAGE_SIZE
/ sizeof(u32
); i
++)
4189 if (lri
[i
] == offset
)
4195 static int live_lrc_fixed(void *arg
)
4197 struct intel_gt
*gt
= arg
;
4198 struct intel_engine_cs
*engine
;
4199 enum intel_engine_id id
;
4203 * Check the assumed register offsets match the actual locations in
4204 * the context image.
4207 for_each_engine(engine
, gt
, id
) {
4214 i915_mmio_reg_offset(RING_START(engine
->mmio_base
)),
4219 i915_mmio_reg_offset(RING_CTL(engine
->mmio_base
)),
4224 i915_mmio_reg_offset(RING_HEAD(engine
->mmio_base
)),
4229 i915_mmio_reg_offset(RING_TAIL(engine
->mmio_base
)),
4234 i915_mmio_reg_offset(RING_MI_MODE(engine
->mmio_base
)),
4235 lrc_ring_mi_mode(engine
),
4239 i915_mmio_reg_offset(RING_BBSTATE(engine
->mmio_base
)),
4244 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine
->mmio_base
)),
4246 "RING_CTX_TIMESTAMP"
4252 if (!engine
->default_state
)
4255 hw
= i915_gem_object_pin_map(engine
->default_state
,
4261 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
4263 for (t
= tbl
; t
->name
; t
++) {
4264 int dw
= find_offset(hw
, t
->reg
);
4266 if (dw
!= t
->offset
) {
4267 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4277 i915_gem_object_unpin_map(engine
->default_state
);
4283 static int __live_lrc_state(struct intel_engine_cs
*engine
,
4284 struct i915_vma
*scratch
)
4286 struct intel_context
*ce
;
4287 struct i915_request
*rq
;
4293 u32 expected
[MAX_IDX
];
4298 ce
= intel_context_create(engine
);
4302 err
= intel_context_pin(ce
);
4306 rq
= i915_request_create(ce
);
4312 cs
= intel_ring_begin(rq
, 4 * MAX_IDX
);
4315 i915_request_add(rq
);
4319 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
4320 *cs
++ = i915_mmio_reg_offset(RING_START(engine
->mmio_base
));
4321 *cs
++ = i915_ggtt_offset(scratch
) + RING_START_IDX
* sizeof(u32
);
4324 expected
[RING_START_IDX
] = i915_ggtt_offset(ce
->ring
->vma
);
4326 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
4327 *cs
++ = i915_mmio_reg_offset(RING_TAIL(engine
->mmio_base
));
4328 *cs
++ = i915_ggtt_offset(scratch
) + RING_TAIL_IDX
* sizeof(u32
);
4331 i915_vma_lock(scratch
);
4332 err
= i915_request_await_object(rq
, scratch
->obj
, true);
4334 err
= i915_vma_move_to_active(scratch
, rq
, EXEC_OBJECT_WRITE
);
4335 i915_vma_unlock(scratch
);
4337 i915_request_get(rq
);
4338 i915_request_add(rq
);
4342 intel_engine_flush_submission(engine
);
4343 expected
[RING_TAIL_IDX
] = ce
->ring
->tail
;
4345 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
4350 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
4356 for (n
= 0; n
< MAX_IDX
; n
++) {
4357 if (cs
[n
] != expected
[n
]) {
4358 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4359 engine
->name
, n
, cs
[n
], expected
[n
]);
4365 i915_gem_object_unpin_map(scratch
->obj
);
4368 i915_request_put(rq
);
4370 intel_context_unpin(ce
);
4372 intel_context_put(ce
);
4376 static int live_lrc_state(void *arg
)
4378 struct intel_gt
*gt
= arg
;
4379 struct intel_engine_cs
*engine
;
4380 struct i915_vma
*scratch
;
4381 enum intel_engine_id id
;
4385 * Check the live register state matches what we expect for this
4389 scratch
= create_scratch(gt
);
4390 if (IS_ERR(scratch
))
4391 return PTR_ERR(scratch
);
4393 for_each_engine(engine
, gt
, id
) {
4394 err
= __live_lrc_state(engine
, scratch
);
4399 if (igt_flush_test(gt
->i915
))
4402 i915_vma_unpin_and_release(&scratch
, 0);
4406 static int gpr_make_dirty(struct intel_context
*ce
)
4408 struct i915_request
*rq
;
4412 rq
= intel_context_create_request(ce
);
4416 cs
= intel_ring_begin(rq
, 2 * NUM_GPR_DW
+ 2);
4418 i915_request_add(rq
);
4422 *cs
++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW
);
4423 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
4424 *cs
++ = CS_GPR(ce
->engine
, n
);
4425 *cs
++ = STACK_MAGIC
;
4429 intel_ring_advance(rq
, cs
);
4431 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
4432 i915_request_add(rq
);
4437 static struct i915_request
*
4438 __gpr_read(struct intel_context
*ce
, struct i915_vma
*scratch
, u32
*slot
)
4441 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
4442 offset_in_page(slot
);
4443 struct i915_request
*rq
;
4448 rq
= intel_context_create_request(ce
);
4452 cs
= intel_ring_begin(rq
, 6 + 4 * NUM_GPR_DW
);
4454 i915_request_add(rq
);
4455 return ERR_CAST(cs
);
4458 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
4461 *cs
++ = MI_SEMAPHORE_WAIT
|
4462 MI_SEMAPHORE_GLOBAL_GTT
|
4464 MI_SEMAPHORE_SAD_NEQ_SDD
;
4469 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
4470 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
4471 *cs
++ = CS_GPR(ce
->engine
, n
);
4472 *cs
++ = i915_ggtt_offset(scratch
) + n
* sizeof(u32
);
4476 i915_vma_lock(scratch
);
4477 err
= i915_request_await_object(rq
, scratch
->obj
, true);
4479 err
= i915_vma_move_to_active(scratch
, rq
, EXEC_OBJECT_WRITE
);
4480 i915_vma_unlock(scratch
);
4482 i915_request_get(rq
);
4483 i915_request_add(rq
);
4485 i915_request_put(rq
);
4492 static int __live_lrc_gpr(struct intel_engine_cs
*engine
,
4493 struct i915_vma
*scratch
,
4496 u32
*slot
= memset32(engine
->status_page
.addr
+ 1000, 0, 4);
4497 struct intel_context
*ce
;
4498 struct i915_request
*rq
;
4503 if (INTEL_GEN(engine
->i915
) < 9 && engine
->class != RENDER_CLASS
)
4504 return 0; /* GPR only on rcs0 for gen8 */
4506 err
= gpr_make_dirty(engine
->kernel_context
);
4510 ce
= intel_context_create(engine
);
4514 rq
= __gpr_read(ce
, scratch
, slot
);
4520 err
= wait_for_submit(engine
, rq
, HZ
/ 2);
4525 err
= gpr_make_dirty(engine
->kernel_context
);
4529 err
= emit_semaphore_signal(engine
->kernel_context
, slot
);
4537 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
4542 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
4548 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
4550 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4552 n
/ 2, n
& 1 ? "udw" : "ldw",
4559 i915_gem_object_unpin_map(scratch
->obj
);
4562 memset32(&slot
[0], -1, 4);
4564 i915_request_put(rq
);
4566 intel_context_put(ce
);
4570 static int live_lrc_gpr(void *arg
)
4572 struct intel_gt
*gt
= arg
;
4573 struct intel_engine_cs
*engine
;
4574 struct i915_vma
*scratch
;
4575 enum intel_engine_id id
;
4579 * Check that GPR registers are cleared in new contexts as we need
4580 * to avoid leaking any information from previous contexts.
4583 scratch
= create_scratch(gt
);
4584 if (IS_ERR(scratch
))
4585 return PTR_ERR(scratch
);
4587 for_each_engine(engine
, gt
, id
) {
4588 unsigned long heartbeat
;
4590 engine_heartbeat_disable(engine
, &heartbeat
);
4592 err
= __live_lrc_gpr(engine
, scratch
, false);
4596 err
= __live_lrc_gpr(engine
, scratch
, true);
4601 engine_heartbeat_enable(engine
, heartbeat
);
4602 if (igt_flush_test(gt
->i915
))
4608 i915_vma_unpin_and_release(&scratch
, 0);
4612 static struct i915_request
*
4613 create_timestamp(struct intel_context
*ce
, void *slot
, int idx
)
4616 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
4617 offset_in_page(slot
);
4618 struct i915_request
*rq
;
4622 rq
= intel_context_create_request(ce
);
4626 cs
= intel_ring_begin(rq
, 10);
4632 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
4635 *cs
++ = MI_SEMAPHORE_WAIT
|
4636 MI_SEMAPHORE_GLOBAL_GTT
|
4638 MI_SEMAPHORE_SAD_NEQ_SDD
;
4643 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
4644 *cs
++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq
->engine
->mmio_base
));
4645 *cs
++ = offset
+ idx
* sizeof(u32
);
4648 intel_ring_advance(rq
, cs
);
4650 rq
->sched
.attr
.priority
= I915_PRIORITY_MASK
;
4653 i915_request_get(rq
);
4654 i915_request_add(rq
);
4656 i915_request_put(rq
);
4657 return ERR_PTR(err
);
4663 struct lrc_timestamp
{
4664 struct intel_engine_cs
*engine
;
4665 struct intel_context
*ce
[2];
4669 static bool timestamp_advanced(u32 start
, u32 end
)
4671 return (s32
)(end
- start
) > 0;
4674 static int __lrc_timestamp(const struct lrc_timestamp
*arg
, bool preempt
)
4676 u32
*slot
= memset32(arg
->engine
->status_page
.addr
+ 1000, 0, 4);
4677 struct i915_request
*rq
;
4681 arg
->ce
[0]->lrc_reg_state
[CTX_TIMESTAMP
] = arg
->poison
;
4682 rq
= create_timestamp(arg
->ce
[0], slot
, 1);
4686 err
= wait_for_submit(rq
->engine
, rq
, HZ
/ 2);
4691 arg
->ce
[1]->lrc_reg_state
[CTX_TIMESTAMP
] = 0xdeadbeef;
4692 err
= emit_semaphore_signal(arg
->ce
[1], slot
);
4700 /* And wait for switch to kernel (to save our context to memory) */
4701 err
= context_flush(arg
->ce
[0], HZ
/ 2);
4705 if (!timestamp_advanced(arg
->poison
, slot
[1])) {
4706 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
4707 arg
->engine
->name
, preempt
? "preempt" : "simple",
4708 arg
->poison
, slot
[1]);
4712 timestamp
= READ_ONCE(arg
->ce
[0]->lrc_reg_state
[CTX_TIMESTAMP
]);
4713 if (!timestamp_advanced(slot
[1], timestamp
)) {
4714 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
4715 arg
->engine
->name
, preempt
? "preempt" : "simple",
4716 slot
[1], timestamp
);
4721 memset32(slot
, -1, 4);
4722 i915_request_put(rq
);
4726 static int live_lrc_timestamp(void *arg
)
4728 struct lrc_timestamp data
= {};
4729 struct intel_gt
*gt
= arg
;
4730 enum intel_engine_id id
;
4731 const u32 poison
[] = {
4739 * We want to verify that the timestamp is saved and restore across
4740 * context switches and is monotonic.
4742 * So we do this with a little bit of LRC poisoning to check various
4743 * boundary conditions, and see what happens if we preempt the context
4744 * with a second request (carrying more poison into the timestamp).
4747 for_each_engine(data
.engine
, gt
, id
) {
4748 unsigned long heartbeat
;
4751 engine_heartbeat_disable(data
.engine
, &heartbeat
);
4753 for (i
= 0; i
< ARRAY_SIZE(data
.ce
); i
++) {
4754 struct intel_context
*tmp
;
4756 tmp
= intel_context_create(data
.engine
);
4762 err
= intel_context_pin(tmp
);
4764 intel_context_put(tmp
);
4771 for (i
= 0; i
< ARRAY_SIZE(poison
); i
++) {
4772 data
.poison
= poison
[i
];
4774 err
= __lrc_timestamp(&data
, false);
4778 err
= __lrc_timestamp(&data
, true);
4784 engine_heartbeat_enable(data
.engine
, heartbeat
);
4785 for (i
= 0; i
< ARRAY_SIZE(data
.ce
); i
++) {
4789 intel_context_unpin(data
.ce
[i
]);
4790 intel_context_put(data
.ce
[i
]);
4793 if (igt_flush_test(gt
->i915
))
4802 static struct i915_vma
*
4803 create_user_vma(struct i915_address_space
*vm
, unsigned long size
)
4805 struct drm_i915_gem_object
*obj
;
4806 struct i915_vma
*vma
;
4809 obj
= i915_gem_object_create_internal(vm
->i915
, size
);
4811 return ERR_CAST(obj
);
4813 vma
= i915_vma_instance(obj
, vm
, NULL
);
4815 i915_gem_object_put(obj
);
4819 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
4821 i915_gem_object_put(obj
);
4822 return ERR_PTR(err
);
4828 static struct i915_vma
*
4829 store_context(struct intel_context
*ce
, struct i915_vma
*scratch
)
4831 struct i915_vma
*batch
;
4832 u32 dw
, x
, *cs
, *hw
;
4834 batch
= create_user_vma(ce
->vm
, SZ_64K
);
4838 cs
= i915_gem_object_pin_map(batch
->obj
, I915_MAP_WC
);
4840 i915_vma_put(batch
);
4841 return ERR_CAST(cs
);
4846 hw
= ce
->engine
->pinned_default_state
;
4847 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
4849 u32 len
= hw
[dw
] & 0x7f;
4856 if ((hw
[dw
] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4862 len
= (len
+ 1) / 2;
4864 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
;
4866 *cs
++ = lower_32_bits(scratch
->node
.start
+ x
);
4867 *cs
++ = upper_32_bits(scratch
->node
.start
+ x
);
4872 } while (dw
< PAGE_SIZE
/ sizeof(u32
) &&
4873 (hw
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
4875 *cs
++ = MI_BATCH_BUFFER_END
;
4877 i915_gem_object_flush_map(batch
->obj
);
4878 i915_gem_object_unpin_map(batch
->obj
);
4883 static int move_to_active(struct i915_request
*rq
,
4884 struct i915_vma
*vma
,
4890 err
= i915_request_await_object(rq
, vma
->obj
, flags
);
4892 err
= i915_vma_move_to_active(vma
, rq
, flags
);
4893 i915_vma_unlock(vma
);
4898 static struct i915_request
*
4899 record_registers(struct intel_context
*ce
,
4900 struct i915_vma
*before
,
4901 struct i915_vma
*after
,
4904 struct i915_vma
*b_before
, *b_after
;
4905 struct i915_request
*rq
;
4909 b_before
= store_context(ce
, before
);
4910 if (IS_ERR(b_before
))
4911 return ERR_CAST(b_before
);
4913 b_after
= store_context(ce
, after
);
4914 if (IS_ERR(b_after
)) {
4915 rq
= ERR_CAST(b_after
);
4919 rq
= intel_context_create_request(ce
);
4923 err
= move_to_active(rq
, before
, EXEC_OBJECT_WRITE
);
4927 err
= move_to_active(rq
, b_before
, 0);
4931 err
= move_to_active(rq
, after
, EXEC_OBJECT_WRITE
);
4935 err
= move_to_active(rq
, b_after
, 0);
4939 cs
= intel_ring_begin(rq
, 14);
4945 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
4946 *cs
++ = MI_BATCH_BUFFER_START_GEN8
| BIT(8);
4947 *cs
++ = lower_32_bits(b_before
->node
.start
);
4948 *cs
++ = upper_32_bits(b_before
->node
.start
);
4950 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
4951 *cs
++ = MI_SEMAPHORE_WAIT
|
4952 MI_SEMAPHORE_GLOBAL_GTT
|
4954 MI_SEMAPHORE_SAD_NEQ_SDD
;
4956 *cs
++ = i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
4957 offset_in_page(sema
);
4961 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
4962 *cs
++ = MI_BATCH_BUFFER_START_GEN8
| BIT(8);
4963 *cs
++ = lower_32_bits(b_after
->node
.start
);
4964 *cs
++ = upper_32_bits(b_after
->node
.start
);
4966 intel_ring_advance(rq
, cs
);
4968 WRITE_ONCE(*sema
, 0);
4969 i915_request_get(rq
);
4970 i915_request_add(rq
);
4972 i915_vma_put(b_after
);
4974 i915_vma_put(b_before
);
4978 i915_request_add(rq
);
4983 static struct i915_vma
*load_context(struct intel_context
*ce
, u32 poison
)
4985 struct i915_vma
*batch
;
4988 batch
= create_user_vma(ce
->vm
, SZ_64K
);
4992 cs
= i915_gem_object_pin_map(batch
->obj
, I915_MAP_WC
);
4994 i915_vma_put(batch
);
4995 return ERR_CAST(cs
);
4999 hw
= ce
->engine
->pinned_default_state
;
5000 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
5002 u32 len
= hw
[dw
] & 0x7f;
5009 if ((hw
[dw
] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5015 len
= (len
+ 1) / 2;
5016 *cs
++ = MI_LOAD_REGISTER_IMM(len
);
5022 } while (dw
< PAGE_SIZE
/ sizeof(u32
) &&
5023 (hw
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
5025 *cs
++ = MI_BATCH_BUFFER_END
;
5027 i915_gem_object_flush_map(batch
->obj
);
5028 i915_gem_object_unpin_map(batch
->obj
);
5033 static int poison_registers(struct intel_context
*ce
, u32 poison
, u32
*sema
)
5035 struct i915_request
*rq
;
5036 struct i915_vma
*batch
;
5040 batch
= load_context(ce
, poison
);
5042 return PTR_ERR(batch
);
5044 rq
= intel_context_create_request(ce
);
5050 err
= move_to_active(rq
, batch
, 0);
5054 cs
= intel_ring_begin(rq
, 8);
5060 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
5061 *cs
++ = MI_BATCH_BUFFER_START_GEN8
| BIT(8);
5062 *cs
++ = lower_32_bits(batch
->node
.start
);
5063 *cs
++ = upper_32_bits(batch
->node
.start
);
5065 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
5066 *cs
++ = i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
5067 offset_in_page(sema
);
5071 intel_ring_advance(rq
, cs
);
5073 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
5075 i915_request_add(rq
);
5077 i915_vma_put(batch
);
5081 static bool is_moving(u32 a
, u32 b
)
5086 static int compare_isolation(struct intel_engine_cs
*engine
,
5087 struct i915_vma
*ref
[2],
5088 struct i915_vma
*result
[2],
5089 struct intel_context
*ce
,
5092 u32 x
, dw
, *hw
, *lrc
;
5096 A
[0] = i915_gem_object_pin_map(ref
[0]->obj
, I915_MAP_WC
);
5098 return PTR_ERR(A
[0]);
5100 A
[1] = i915_gem_object_pin_map(ref
[1]->obj
, I915_MAP_WC
);
5102 err
= PTR_ERR(A
[1]);
5106 B
[0] = i915_gem_object_pin_map(result
[0]->obj
, I915_MAP_WC
);
5108 err
= PTR_ERR(B
[0]);
5112 B
[1] = i915_gem_object_pin_map(result
[1]->obj
, I915_MAP_WC
);
5114 err
= PTR_ERR(B
[1]);
5118 lrc
= i915_gem_object_pin_map(ce
->state
->obj
,
5119 i915_coherent_map_type(engine
->i915
));
5124 lrc
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
5128 hw
= engine
->pinned_default_state
;
5129 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
5131 u32 len
= hw
[dw
] & 0x7f;
5138 if ((hw
[dw
] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5144 len
= (len
+ 1) / 2;
5146 if (!is_moving(A
[0][x
], A
[1][x
]) &&
5147 (A
[0][x
] != B
[0][x
] || A
[1][x
] != B
[1][x
])) {
5148 switch (hw
[dw
] & 4095) {
5149 case 0x30: /* RING_HEAD */
5150 case 0x34: /* RING_TAIL */
5154 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5157 A
[0][x
], B
[0][x
], B
[1][x
],
5158 poison
, lrc
[dw
+ 1]);
5166 } while (dw
< PAGE_SIZE
/ sizeof(u32
) &&
5167 (hw
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
5169 i915_gem_object_unpin_map(ce
->state
->obj
);
5171 i915_gem_object_unpin_map(result
[1]->obj
);
5173 i915_gem_object_unpin_map(result
[0]->obj
);
5175 i915_gem_object_unpin_map(ref
[1]->obj
);
5177 i915_gem_object_unpin_map(ref
[0]->obj
);
5181 static int __lrc_isolation(struct intel_engine_cs
*engine
, u32 poison
)
5183 u32
*sema
= memset32(engine
->status_page
.addr
+ 1000, 0, 1);
5184 struct i915_vma
*ref
[2], *result
[2];
5185 struct intel_context
*A
, *B
;
5186 struct i915_request
*rq
;
5189 A
= intel_context_create(engine
);
5193 B
= intel_context_create(engine
);
5199 ref
[0] = create_user_vma(A
->vm
, SZ_64K
);
5200 if (IS_ERR(ref
[0])) {
5201 err
= PTR_ERR(ref
[0]);
5205 ref
[1] = create_user_vma(A
->vm
, SZ_64K
);
5206 if (IS_ERR(ref
[1])) {
5207 err
= PTR_ERR(ref
[1]);
5211 rq
= record_registers(A
, ref
[0], ref
[1], sema
);
5217 WRITE_ONCE(*sema
, 1);
5220 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0) {
5221 i915_request_put(rq
);
5225 i915_request_put(rq
);
5227 result
[0] = create_user_vma(A
->vm
, SZ_64K
);
5228 if (IS_ERR(result
[0])) {
5229 err
= PTR_ERR(result
[0]);
5233 result
[1] = create_user_vma(A
->vm
, SZ_64K
);
5234 if (IS_ERR(result
[1])) {
5235 err
= PTR_ERR(result
[1]);
5239 rq
= record_registers(A
, result
[0], result
[1], sema
);
5245 err
= poison_registers(B
, poison
, sema
);
5247 WRITE_ONCE(*sema
, -1);
5248 i915_request_put(rq
);
5252 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0) {
5253 i915_request_put(rq
);
5257 i915_request_put(rq
);
5259 err
= compare_isolation(engine
, ref
, result
, A
, poison
);
5262 i915_vma_put(result
[1]);
5264 i915_vma_put(result
[0]);
5266 i915_vma_put(ref
[1]);
5268 i915_vma_put(ref
[0]);
5270 intel_context_put(B
);
5272 intel_context_put(A
);
5276 static bool skip_isolation(const struct intel_engine_cs
*engine
)
5278 if (engine
->class == COPY_ENGINE_CLASS
&& INTEL_GEN(engine
->i915
) == 9)
5281 if (engine
->class == RENDER_CLASS
&& INTEL_GEN(engine
->i915
) == 11)
5287 static int live_lrc_isolation(void *arg
)
5289 struct intel_gt
*gt
= arg
;
5290 struct intel_engine_cs
*engine
;
5291 enum intel_engine_id id
;
5292 const u32 poison
[] = {
5301 * Our goal is try and verify that per-context state cannot be
5302 * tampered with by another non-privileged client.
5304 * We take the list of context registers from the LRI in the default
5305 * context image and attempt to modify that list from a remote context.
5308 for_each_engine(engine
, gt
, id
) {
5312 /* Just don't even ask */
5313 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN
) &&
5314 skip_isolation(engine
))
5317 intel_engine_pm_get(engine
);
5318 if (engine
->pinned_default_state
) {
5319 for (i
= 0; i
< ARRAY_SIZE(poison
); i
++) {
5320 err
= __lrc_isolation(engine
, poison
[i
]);
5324 err
= __lrc_isolation(engine
, ~poison
[i
]);
5329 intel_engine_pm_put(engine
);
5330 if (igt_flush_test(gt
->i915
))
5339 static void garbage_reset(struct intel_engine_cs
*engine
,
5340 struct i915_request
*rq
)
5342 const unsigned int bit
= I915_RESET_ENGINE
+ engine
->id
;
5343 unsigned long *lock
= &engine
->gt
->reset
.flags
;
5345 if (test_and_set_bit(bit
, lock
))
5348 tasklet_disable(&engine
->execlists
.tasklet
);
5350 if (!rq
->fence
.error
)
5351 intel_engine_reset(engine
, NULL
);
5353 tasklet_enable(&engine
->execlists
.tasklet
);
5354 clear_and_wake_up_bit(bit
, lock
);
5357 static struct i915_request
*garbage(struct intel_context
*ce
,
5358 struct rnd_state
*prng
)
5360 struct i915_request
*rq
;
5363 err
= intel_context_pin(ce
);
5365 return ERR_PTR(err
);
5367 prandom_bytes_state(prng
,
5369 ce
->engine
->context_size
-
5370 LRC_STATE_PN
* PAGE_SIZE
);
5372 rq
= intel_context_create_request(ce
);
5378 i915_request_get(rq
);
5379 i915_request_add(rq
);
5383 intel_context_unpin(ce
);
5384 return ERR_PTR(err
);
5387 static int __lrc_garbage(struct intel_engine_cs
*engine
, struct rnd_state
*prng
)
5389 struct intel_context
*ce
;
5390 struct i915_request
*hang
;
5393 ce
= intel_context_create(engine
);
5397 hang
= garbage(ce
, prng
);
5399 err
= PTR_ERR(hang
);
5403 if (wait_for_submit(engine
, hang
, HZ
/ 2)) {
5404 i915_request_put(hang
);
5409 intel_context_set_banned(ce
);
5410 garbage_reset(engine
, hang
);
5412 intel_engine_flush_submission(engine
);
5413 if (!hang
->fence
.error
) {
5414 i915_request_put(hang
);
5415 pr_err("%s: corrupted context was not reset\n",
5421 if (i915_request_wait(hang
, 0, HZ
/ 2) < 0) {
5422 pr_err("%s: corrupted context did not recover\n",
5424 i915_request_put(hang
);
5428 i915_request_put(hang
);
5431 intel_context_put(ce
);
5435 static int live_lrc_garbage(void *arg
)
5437 struct intel_gt
*gt
= arg
;
5438 struct intel_engine_cs
*engine
;
5439 enum intel_engine_id id
;
5442 * Verify that we can recover if one context state is completely
5446 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN
))
5449 for_each_engine(engine
, gt
, id
) {
5450 I915_RND_STATE(prng
);
5453 if (!intel_has_reset_engine(engine
->gt
))
5456 intel_engine_pm_get(engine
);
5457 for (i
= 0; i
< 3; i
++) {
5458 err
= __lrc_garbage(engine
, &prng
);
5462 intel_engine_pm_put(engine
);
5464 if (igt_flush_test(gt
->i915
))
5473 static int __live_pphwsp_runtime(struct intel_engine_cs
*engine
)
5475 struct intel_context
*ce
;
5476 struct i915_request
*rq
;
5477 IGT_TIMEOUT(end_time
);
5480 ce
= intel_context_create(engine
);
5484 ce
->runtime
.num_underflow
= 0;
5485 ce
->runtime
.max_underflow
= 0;
5488 unsigned int loop
= 1024;
5491 rq
= intel_context_create_request(ce
);
5498 i915_request_get(rq
);
5500 i915_request_add(rq
);
5503 if (__igt_timeout(end_time
, NULL
))
5506 i915_request_put(rq
);
5509 err
= i915_request_wait(rq
, 0, HZ
/ 5);
5511 pr_err("%s: request not completed!\n", engine
->name
);
5515 igt_flush_test(engine
->i915
);
5517 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
5519 intel_context_get_total_runtime_ns(ce
),
5520 intel_context_get_avg_runtime_ns(ce
));
5523 if (ce
->runtime
.num_underflow
) {
5524 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
5526 ce
->runtime
.num_underflow
,
5527 ce
->runtime
.max_underflow
);
5533 i915_request_put(rq
);
5535 intel_context_put(ce
);
5539 static int live_pphwsp_runtime(void *arg
)
5541 struct intel_gt
*gt
= arg
;
5542 struct intel_engine_cs
*engine
;
5543 enum intel_engine_id id
;
5547 * Check that cumulative context runtime as stored in the pphwsp[16]
5551 for_each_engine(engine
, gt
, id
) {
5552 err
= __live_pphwsp_runtime(engine
);
5557 if (igt_flush_test(gt
->i915
))
5563 int intel_lrc_live_selftests(struct drm_i915_private
*i915
)
5565 static const struct i915_subtest tests
[] = {
5566 SUBTEST(live_lrc_layout
),
5567 SUBTEST(live_lrc_fixed
),
5568 SUBTEST(live_lrc_state
),
5569 SUBTEST(live_lrc_gpr
),
5570 SUBTEST(live_lrc_isolation
),
5571 SUBTEST(live_lrc_timestamp
),
5572 SUBTEST(live_lrc_garbage
),
5573 SUBTEST(live_pphwsp_runtime
),
5576 if (!HAS_LOGICAL_RING_CONTEXTS(i915
))
5579 return intel_gt_live_subtests(tests
, &i915
->gt
);