2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
26 static struct i915_vma
*create_scratch(struct intel_gt
*gt
)
28 struct drm_i915_gem_object
*obj
;
32 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
36 i915_gem_object_set_cache_coherency(obj
, I915_CACHING_CACHED
);
38 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
40 i915_gem_object_put(obj
);
44 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
46 i915_gem_object_put(obj
);
53 static void engine_heartbeat_disable(struct intel_engine_cs
*engine
,
56 *saved
= engine
->props
.heartbeat_interval_ms
;
57 engine
->props
.heartbeat_interval_ms
= 0;
59 intel_engine_pm_get(engine
);
60 intel_engine_park_heartbeat(engine
);
63 static void engine_heartbeat_enable(struct intel_engine_cs
*engine
,
66 intel_engine_pm_put(engine
);
68 engine
->props
.heartbeat_interval_ms
= saved
;
71 static int wait_for_submit(struct intel_engine_cs
*engine
,
72 struct i915_request
*rq
,
73 unsigned long timeout
)
78 intel_engine_flush_submission(engine
);
80 if (READ_ONCE(engine
->execlists
.pending
[0]))
83 if (i915_request_is_active(rq
))
86 if (i915_request_started(rq
)) /* that was quick! */
88 } while (time_before(jiffies
, timeout
));
93 static int wait_for_reset(struct intel_engine_cs
*engine
,
94 struct i915_request
*rq
,
95 unsigned long timeout
)
101 intel_engine_flush_submission(engine
);
103 if (READ_ONCE(engine
->execlists
.pending
[0]))
106 if (i915_request_completed(rq
))
109 if (READ_ONCE(rq
->fence
.error
))
111 } while (time_before(jiffies
, timeout
));
113 flush_scheduled_work();
115 if (rq
->fence
.error
!= -EIO
) {
116 pr_err("%s: hanging request %llx:%lld not reset\n",
123 /* Give the request a jiffie to complete after flushing the worker */
124 if (i915_request_wait(rq
, 0,
125 max(0l, (long)(timeout
- jiffies
)) + 1) < 0) {
126 pr_err("%s: hanging request %llx:%lld did not complete\n",
136 static int live_sanitycheck(void *arg
)
138 struct intel_gt
*gt
= arg
;
139 struct intel_engine_cs
*engine
;
140 enum intel_engine_id id
;
141 struct igt_spinner spin
;
144 if (!HAS_LOGICAL_RING_CONTEXTS(gt
->i915
))
147 if (igt_spinner_init(&spin
, gt
))
150 for_each_engine(engine
, gt
, id
) {
151 struct intel_context
*ce
;
152 struct i915_request
*rq
;
154 ce
= intel_context_create(engine
);
160 rq
= igt_spinner_create_request(&spin
, ce
, MI_NOOP
);
166 i915_request_add(rq
);
167 if (!igt_wait_for_spinner(&spin
, rq
)) {
168 GEM_TRACE("spinner failed to start\n");
170 intel_gt_set_wedged(gt
);
175 igt_spinner_end(&spin
);
176 if (igt_flush_test(gt
->i915
)) {
182 intel_context_put(ce
);
187 igt_spinner_fini(&spin
);
191 static int live_unlite_restore(struct intel_gt
*gt
, int prio
)
193 struct intel_engine_cs
*engine
;
194 enum intel_engine_id id
;
195 struct igt_spinner spin
;
199 * Check that we can correctly context switch between 2 instances
200 * on the same engine from the same parent context.
203 if (igt_spinner_init(&spin
, gt
))
207 for_each_engine(engine
, gt
, id
) {
208 struct intel_context
*ce
[2] = {};
209 struct i915_request
*rq
[2];
210 struct igt_live_test t
;
214 if (prio
&& !intel_engine_has_preemption(engine
))
217 if (!intel_engine_can_store_dword(engine
))
220 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
224 engine_heartbeat_disable(engine
, &saved
);
226 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
227 struct intel_context
*tmp
;
229 tmp
= intel_context_create(engine
);
235 err
= intel_context_pin(tmp
);
237 intel_context_put(tmp
);
242 * Setup the pair of contexts such that if we
243 * lite-restore using the RING_TAIL from ce[1] it
244 * will execute garbage from ce[0]->ring.
246 memset(tmp
->ring
->vaddr
,
247 POISON_INUSE
, /* IPEHR: 0x5a5a5a5a [hung!] */
248 tmp
->ring
->vma
->size
);
252 GEM_BUG_ON(!ce
[1]->ring
->size
);
253 intel_ring_reset(ce
[1]->ring
, ce
[1]->ring
->size
/ 2);
254 __execlists_update_reg_state(ce
[1], engine
, ce
[1]->ring
->head
);
256 rq
[0] = igt_spinner_create_request(&spin
, ce
[0], MI_ARB_CHECK
);
258 err
= PTR_ERR(rq
[0]);
262 i915_request_get(rq
[0]);
263 i915_request_add(rq
[0]);
264 GEM_BUG_ON(rq
[0]->postfix
> ce
[1]->ring
->emit
);
266 if (!igt_wait_for_spinner(&spin
, rq
[0])) {
267 i915_request_put(rq
[0]);
271 rq
[1] = i915_request_create(ce
[1]);
273 err
= PTR_ERR(rq
[1]);
274 i915_request_put(rq
[0]);
280 * Ensure we do the switch to ce[1] on completion.
282 * rq[0] is already submitted, so this should reduce
283 * to a no-op (a wait on a request on the same engine
284 * uses the submit fence, not the completion fence),
285 * but it will install a dependency on rq[1] for rq[0]
286 * that will prevent the pair being reordered by
289 i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
292 i915_request_get(rq
[1]);
293 i915_request_add(rq
[1]);
294 GEM_BUG_ON(rq
[1]->postfix
<= rq
[0]->postfix
);
295 i915_request_put(rq
[0]);
298 struct i915_sched_attr attr
= {
302 /* Alternatively preempt the spinner with ce[1] */
303 engine
->schedule(rq
[1], &attr
);
306 /* And switch back to ce[0] for good measure */
307 rq
[0] = i915_request_create(ce
[0]);
309 err
= PTR_ERR(rq
[0]);
310 i915_request_put(rq
[1]);
314 i915_request_await_dma_fence(rq
[0], &rq
[1]->fence
);
315 i915_request_get(rq
[0]);
316 i915_request_add(rq
[0]);
317 GEM_BUG_ON(rq
[0]->postfix
> rq
[1]->postfix
);
318 i915_request_put(rq
[1]);
319 i915_request_put(rq
[0]);
322 tasklet_kill(&engine
->execlists
.tasklet
); /* flush submission */
323 igt_spinner_end(&spin
);
324 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
325 if (IS_ERR_OR_NULL(ce
[n
]))
328 intel_context_unpin(ce
[n
]);
329 intel_context_put(ce
[n
]);
332 engine_heartbeat_enable(engine
, saved
);
333 if (igt_live_test_end(&t
))
339 igt_spinner_fini(&spin
);
343 static int live_unlite_switch(void *arg
)
345 return live_unlite_restore(arg
, 0);
348 static int live_unlite_preempt(void *arg
)
350 return live_unlite_restore(arg
, I915_USER_PRIORITY(I915_PRIORITY_MAX
));
353 static int live_pin_rewind(void *arg
)
355 struct intel_gt
*gt
= arg
;
356 struct intel_engine_cs
*engine
;
357 enum intel_engine_id id
;
361 * We have to be careful not to trust intel_ring too much, for example
362 * ring->head is updated upon retire which is out of sync with pinning
363 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
364 * or else we risk writing an older, stale value.
366 * To simulate this, let's apply a bit of deliberate sabotague.
369 for_each_engine(engine
, gt
, id
) {
370 struct intel_context
*ce
;
371 struct i915_request
*rq
;
372 struct intel_ring
*ring
;
373 struct igt_live_test t
;
375 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
380 ce
= intel_context_create(engine
);
386 err
= intel_context_pin(ce
);
388 intel_context_put(ce
);
392 /* Keep the context awake while we play games */
393 err
= i915_active_acquire(&ce
->active
);
395 intel_context_unpin(ce
);
396 intel_context_put(ce
);
401 /* Poison the ring, and offset the next request from HEAD */
402 memset32(ring
->vaddr
, STACK_MAGIC
, ring
->size
/ sizeof(u32
));
403 ring
->emit
= ring
->size
/ 2;
404 ring
->tail
= ring
->emit
;
405 GEM_BUG_ON(ring
->head
);
407 intel_context_unpin(ce
);
409 /* Submit a simple nop request */
410 GEM_BUG_ON(intel_context_is_pinned(ce
));
411 rq
= intel_context_create_request(ce
);
412 i915_active_release(&ce
->active
); /* e.g. async retire */
413 intel_context_put(ce
);
418 GEM_BUG_ON(!rq
->head
);
419 i915_request_add(rq
);
421 /* Expect not to hang! */
422 if (igt_live_test_end(&t
)) {
431 static int live_hold_reset(void *arg
)
433 struct intel_gt
*gt
= arg
;
434 struct intel_engine_cs
*engine
;
435 enum intel_engine_id id
;
436 struct igt_spinner spin
;
440 * In order to support offline error capture for fast preempt reset,
441 * we need to decouple the guilty request and ensure that it and its
442 * descendents are not executed while the capture is in progress.
445 if (!intel_has_reset_engine(gt
))
448 if (igt_spinner_init(&spin
, gt
))
451 for_each_engine(engine
, gt
, id
) {
452 struct intel_context
*ce
;
453 unsigned long heartbeat
;
454 struct i915_request
*rq
;
456 ce
= intel_context_create(engine
);
462 engine_heartbeat_disable(engine
, &heartbeat
);
464 rq
= igt_spinner_create_request(&spin
, ce
, MI_ARB_CHECK
);
469 i915_request_add(rq
);
471 if (!igt_wait_for_spinner(&spin
, rq
)) {
472 intel_gt_set_wedged(gt
);
477 /* We have our request executing, now remove it and reset */
479 if (test_and_set_bit(I915_RESET_ENGINE
+ id
,
481 intel_gt_set_wedged(gt
);
485 tasklet_disable(&engine
->execlists
.tasklet
);
487 engine
->execlists
.tasklet
.func(engine
->execlists
.tasklet
.data
);
488 GEM_BUG_ON(execlists_active(&engine
->execlists
) != rq
);
490 i915_request_get(rq
);
491 execlists_hold(engine
, rq
);
492 GEM_BUG_ON(!i915_request_on_hold(rq
));
494 intel_engine_reset(engine
, NULL
);
495 GEM_BUG_ON(rq
->fence
.error
!= -EIO
);
497 tasklet_enable(&engine
->execlists
.tasklet
);
498 clear_and_wake_up_bit(I915_RESET_ENGINE
+ id
,
501 /* Check that we do not resubmit the held request */
502 if (!i915_request_wait(rq
, 0, HZ
/ 5)) {
503 pr_err("%s: on hold request completed!\n",
505 i915_request_put(rq
);
509 GEM_BUG_ON(!i915_request_on_hold(rq
));
511 /* But is resubmitted on release */
512 execlists_unhold(engine
, rq
);
513 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
514 pr_err("%s: held request did not complete!\n",
516 intel_gt_set_wedged(gt
);
519 i915_request_put(rq
);
522 engine_heartbeat_enable(engine
, heartbeat
);
523 intel_context_put(ce
);
528 igt_spinner_fini(&spin
);
532 static const char *error_repr(int err
)
534 return err
? "bad" : "good";
537 static int live_error_interrupt(void *arg
)
539 static const struct error_phase
{
540 enum { GOOD
= 0, BAD
= -EIO
} error
[2];
545 { { GOOD
, GOOD
} }, /* sentinel */
547 struct intel_gt
*gt
= arg
;
548 struct intel_engine_cs
*engine
;
549 enum intel_engine_id id
;
552 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
553 * of invalid commands in user batches that will cause a GPU hang.
554 * This is a faster mechanism than using hangcheck/heartbeats, but
555 * only detects problems the HW knows about -- it will not warn when
558 * To verify our detection and reset, we throw some invalid commands
559 * at the HW and wait for the interrupt.
562 if (!intel_has_reset_engine(gt
))
565 for_each_engine(engine
, gt
, id
) {
566 const struct error_phase
*p
;
567 unsigned long heartbeat
;
570 engine_heartbeat_disable(engine
, &heartbeat
);
572 for (p
= phases
; p
->error
[0] != GOOD
; p
++) {
573 struct i915_request
*client
[ARRAY_SIZE(phases
->error
)];
577 memset(client
, 0, sizeof(*client
));
578 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
579 struct intel_context
*ce
;
580 struct i915_request
*rq
;
582 ce
= intel_context_create(engine
);
588 rq
= intel_context_create_request(ce
);
589 intel_context_put(ce
);
595 if (rq
->engine
->emit_init_breadcrumb
) {
596 err
= rq
->engine
->emit_init_breadcrumb(rq
);
598 i915_request_add(rq
);
603 cs
= intel_ring_begin(rq
, 2);
605 i915_request_add(rq
);
618 client
[i
] = i915_request_get(rq
);
619 i915_request_add(rq
);
622 err
= wait_for_submit(engine
, client
[0], HZ
/ 2);
624 pr_err("%s: first request did not start within time!\n",
630 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
631 if (i915_request_wait(client
[i
], 0, HZ
/ 5) < 0)
632 pr_debug("%s: %s request incomplete!\n",
634 error_repr(p
->error
[i
]));
636 if (!i915_request_started(client
[i
])) {
637 pr_debug("%s: %s request not stated!\n",
639 error_repr(p
->error
[i
]));
644 /* Kick the tasklet to process the error */
645 intel_engine_flush_submission(engine
);
646 if (client
[i
]->fence
.error
!= p
->error
[i
]) {
647 pr_err("%s: %s request completed with wrong error code: %d\n",
649 error_repr(p
->error
[i
]),
650 client
[i
]->fence
.error
);
657 for (i
= 0; i
< ARRAY_SIZE(client
); i
++)
659 i915_request_put(client
[i
]);
661 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
662 engine
->name
, p
- phases
,
663 p
->error
[0], p
->error
[1]);
668 engine_heartbeat_enable(engine
, heartbeat
);
670 intel_gt_set_wedged(gt
);
679 emit_semaphore_chain(struct i915_request
*rq
, struct i915_vma
*vma
, int idx
)
683 cs
= intel_ring_begin(rq
, 10);
687 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
689 *cs
++ = MI_SEMAPHORE_WAIT
|
690 MI_SEMAPHORE_GLOBAL_GTT
|
692 MI_SEMAPHORE_SAD_NEQ_SDD
;
694 *cs
++ = i915_ggtt_offset(vma
) + 4 * idx
;
698 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
699 *cs
++ = i915_ggtt_offset(vma
) + 4 * (idx
- 1);
709 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
711 intel_ring_advance(rq
, cs
);
715 static struct i915_request
*
716 semaphore_queue(struct intel_engine_cs
*engine
, struct i915_vma
*vma
, int idx
)
718 struct intel_context
*ce
;
719 struct i915_request
*rq
;
722 ce
= intel_context_create(engine
);
726 rq
= intel_context_create_request(ce
);
731 if (rq
->engine
->emit_init_breadcrumb
)
732 err
= rq
->engine
->emit_init_breadcrumb(rq
);
734 err
= emit_semaphore_chain(rq
, vma
, idx
);
736 i915_request_get(rq
);
737 i915_request_add(rq
);
742 intel_context_put(ce
);
747 release_queue(struct intel_engine_cs
*engine
,
748 struct i915_vma
*vma
,
751 struct i915_sched_attr attr
= {
754 struct i915_request
*rq
;
757 rq
= intel_engine_create_kernel_request(engine
);
761 cs
= intel_ring_begin(rq
, 4);
763 i915_request_add(rq
);
767 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
768 *cs
++ = i915_ggtt_offset(vma
) + 4 * (idx
- 1);
772 intel_ring_advance(rq
, cs
);
774 i915_request_get(rq
);
775 i915_request_add(rq
);
778 engine
->schedule(rq
, &attr
);
779 local_bh_enable(); /* kick tasklet */
781 i915_request_put(rq
);
787 slice_semaphore_queue(struct intel_engine_cs
*outer
,
788 struct i915_vma
*vma
,
791 struct intel_engine_cs
*engine
;
792 struct i915_request
*head
;
793 enum intel_engine_id id
;
796 head
= semaphore_queue(outer
, vma
, n
++);
798 return PTR_ERR(head
);
800 for_each_engine(engine
, outer
->gt
, id
) {
801 for (i
= 0; i
< count
; i
++) {
802 struct i915_request
*rq
;
804 rq
= semaphore_queue(engine
, vma
, n
++);
810 i915_request_put(rq
);
814 err
= release_queue(outer
, vma
, n
, INT_MAX
);
818 if (i915_request_wait(head
, 0,
819 2 * RUNTIME_INFO(outer
->i915
)->num_engines
* (count
+ 2) * (count
+ 3)) < 0) {
820 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
823 intel_gt_set_wedged(outer
->gt
);
828 i915_request_put(head
);
832 static int live_timeslice_preempt(void *arg
)
834 struct intel_gt
*gt
= arg
;
835 struct drm_i915_gem_object
*obj
;
836 struct i915_vma
*vma
;
842 * If a request takes too long, we would like to give other users
843 * a fair go on the GPU. In particular, users may create batches
844 * that wait upon external input, where that input may even be
845 * supplied by another GPU job. To avoid blocking forever, we
846 * need to preempt the current task and replace it with another
849 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
852 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
856 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
862 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
864 err
= PTR_ERR(vaddr
);
868 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
872 err
= i915_vma_sync(vma
);
876 for_each_prime_number_from(count
, 1, 16) {
877 struct intel_engine_cs
*engine
;
878 enum intel_engine_id id
;
880 for_each_engine(engine
, gt
, id
) {
883 if (!intel_engine_has_preemption(engine
))
886 memset(vaddr
, 0, PAGE_SIZE
);
888 engine_heartbeat_disable(engine
, &saved
);
889 err
= slice_semaphore_queue(engine
, vma
, count
);
890 engine_heartbeat_enable(engine
, saved
);
894 if (igt_flush_test(gt
->i915
)) {
904 i915_gem_object_unpin_map(obj
);
906 i915_gem_object_put(obj
);
910 static struct i915_request
*
911 create_rewinder(struct intel_context
*ce
,
912 struct i915_request
*wait
,
916 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
917 offset_in_page(slot
);
918 struct i915_request
*rq
;
922 rq
= intel_context_create_request(ce
);
927 err
= i915_request_await_dma_fence(rq
, &wait
->fence
);
932 cs
= intel_ring_begin(rq
, 10);
938 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
941 *cs
++ = MI_SEMAPHORE_WAIT
|
942 MI_SEMAPHORE_GLOBAL_GTT
|
944 MI_SEMAPHORE_SAD_NEQ_SDD
;
949 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
950 *cs
++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq
->engine
->mmio_base
));
951 *cs
++ = offset
+ idx
* sizeof(u32
);
954 intel_ring_advance(rq
, cs
);
956 rq
->sched
.attr
.priority
= I915_PRIORITY_MASK
;
959 i915_request_get(rq
);
960 i915_request_add(rq
);
962 i915_request_put(rq
);
969 static int live_timeslice_rewind(void *arg
)
971 struct intel_gt
*gt
= arg
;
972 struct intel_engine_cs
*engine
;
973 enum intel_engine_id id
;
976 * The usual presumption on timeslice expiration is that we replace
977 * the active context with another. However, given a chain of
978 * dependencies we may end up with replacing the context with itself,
979 * but only a few of those requests, forcing us to rewind the
980 * RING_TAIL of the original request.
982 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
985 for_each_engine(engine
, gt
, id
) {
987 enum { X
= 1, Y
, Z
};
988 struct i915_request
*rq
[3] = {};
989 struct intel_context
*ce
;
990 unsigned long heartbeat
;
991 unsigned long timeslice
;
995 if (!intel_engine_has_timeslices(engine
))
999 * A:rq1 -- semaphore wait, timestamp X
1000 * A:rq2 -- write timestamp Y
1002 * B:rq1 [await A:rq1] -- write timestamp Z
1004 * Force timeslice, release semaphore.
1006 * Expect execution/evaluation order XZY
1009 engine_heartbeat_disable(engine
, &heartbeat
);
1010 timeslice
= xchg(&engine
->props
.timeslice_duration_ms
, 1);
1012 slot
= memset32(engine
->status_page
.addr
+ 1000, 0, 4);
1014 ce
= intel_context_create(engine
);
1020 rq
[0] = create_rewinder(ce
, NULL
, slot
, 1);
1021 if (IS_ERR(rq
[0])) {
1022 intel_context_put(ce
);
1026 rq
[1] = create_rewinder(ce
, NULL
, slot
, 2);
1027 intel_context_put(ce
);
1031 err
= wait_for_submit(engine
, rq
[1], HZ
/ 2);
1033 pr_err("%s: failed to submit first context\n",
1038 ce
= intel_context_create(engine
);
1044 rq
[2] = create_rewinder(ce
, rq
[0], slot
, 3);
1045 intel_context_put(ce
);
1049 err
= wait_for_submit(engine
, rq
[2], HZ
/ 2);
1051 pr_err("%s: failed to submit second context\n",
1055 GEM_BUG_ON(!timer_pending(&engine
->execlists
.timer
));
1057 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1058 GEM_BUG_ON(!i915_request_is_active(rq
[A1
]));
1059 GEM_BUG_ON(!i915_request_is_active(rq
[A2
]));
1060 GEM_BUG_ON(!i915_request_is_active(rq
[B1
]));
1062 /* Wait for the timeslice to kick in */
1063 del_timer(&engine
->execlists
.timer
);
1064 tasklet_hi_schedule(&engine
->execlists
.tasklet
);
1065 intel_engine_flush_submission(engine
);
1067 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1068 GEM_BUG_ON(!i915_request_is_active(rq
[A1
]));
1069 GEM_BUG_ON(!i915_request_is_active(rq
[B1
]));
1070 GEM_BUG_ON(i915_request_is_active(rq
[A2
]));
1072 /* Release the hounds! */
1074 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1076 for (i
= 1; i
<= 3; i
++) {
1077 unsigned long timeout
= jiffies
+ HZ
/ 2;
1079 while (!READ_ONCE(slot
[i
]) &&
1080 time_before(jiffies
, timeout
))
1083 if (!time_before(jiffies
, timeout
)) {
1084 pr_err("%s: rq[%d] timed out\n",
1085 engine
->name
, i
- 1);
1090 pr_debug("%s: slot[%d]:%x\n", engine
->name
, i
, slot
[i
]);
1094 if (slot
[Z
] - slot
[X
] >= slot
[Y
] - slot
[X
]) {
1095 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1103 memset32(&slot
[0], -1, 4);
1106 engine
->props
.timeslice_duration_ms
= timeslice
;
1107 engine_heartbeat_enable(engine
, heartbeat
);
1108 for (i
= 0; i
< 3; i
++)
1109 i915_request_put(rq
[i
]);
1110 if (igt_flush_test(gt
->i915
))
1119 static struct i915_request
*nop_request(struct intel_engine_cs
*engine
)
1121 struct i915_request
*rq
;
1123 rq
= intel_engine_create_kernel_request(engine
);
1127 i915_request_get(rq
);
1128 i915_request_add(rq
);
1133 static long timeslice_threshold(const struct intel_engine_cs
*engine
)
1135 return 2 * msecs_to_jiffies_timeout(timeslice(engine
)) + 1;
1138 static int live_timeslice_queue(void *arg
)
1140 struct intel_gt
*gt
= arg
;
1141 struct drm_i915_gem_object
*obj
;
1142 struct intel_engine_cs
*engine
;
1143 enum intel_engine_id id
;
1144 struct i915_vma
*vma
;
1149 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1150 * timeslicing between them disabled, we *do* enable timeslicing
1151 * if the queue demands it. (Normally, we do not submit if
1152 * ELSP[1] is already occupied, so must rely on timeslicing to
1153 * eject ELSP[0] in favour of the queue.)
1155 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
1158 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
1160 return PTR_ERR(obj
);
1162 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
1168 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
1169 if (IS_ERR(vaddr
)) {
1170 err
= PTR_ERR(vaddr
);
1174 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
1178 err
= i915_vma_sync(vma
);
1182 for_each_engine(engine
, gt
, id
) {
1183 struct i915_sched_attr attr
= {
1184 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
),
1186 struct i915_request
*rq
, *nop
;
1187 unsigned long saved
;
1189 if (!intel_engine_has_preemption(engine
))
1192 engine_heartbeat_disable(engine
, &saved
);
1193 memset(vaddr
, 0, PAGE_SIZE
);
1195 /* ELSP[0]: semaphore wait */
1196 rq
= semaphore_queue(engine
, vma
, 0);
1201 engine
->schedule(rq
, &attr
);
1202 err
= wait_for_submit(engine
, rq
, HZ
/ 2);
1204 pr_err("%s: Timed out trying to submit semaphores\n",
1209 /* ELSP[1]: nop request */
1210 nop
= nop_request(engine
);
1215 err
= wait_for_submit(engine
, nop
, HZ
/ 2);
1216 i915_request_put(nop
);
1218 pr_err("%s: Timed out trying to submit nop\n",
1223 GEM_BUG_ON(i915_request_completed(rq
));
1224 GEM_BUG_ON(execlists_active(&engine
->execlists
) != rq
);
1226 /* Queue: semaphore signal, matching priority as semaphore */
1227 err
= release_queue(engine
, vma
, 1, effective_prio(rq
));
1231 intel_engine_flush_submission(engine
);
1232 if (!READ_ONCE(engine
->execlists
.timer
.expires
) &&
1233 !i915_request_completed(rq
)) {
1234 struct drm_printer p
=
1235 drm_info_printer(gt
->i915
->drm
.dev
);
1237 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1239 intel_engine_dump(engine
, &p
,
1240 "%s\n", engine
->name
);
1243 memset(vaddr
, 0xff, PAGE_SIZE
);
1247 /* Timeslice every jiffy, so within 2 we should signal */
1248 if (i915_request_wait(rq
, 0, timeslice_threshold(engine
)) < 0) {
1249 struct drm_printer p
=
1250 drm_info_printer(gt
->i915
->drm
.dev
);
1252 pr_err("%s: Failed to timeslice into queue\n",
1254 intel_engine_dump(engine
, &p
,
1255 "%s\n", engine
->name
);
1257 memset(vaddr
, 0xff, PAGE_SIZE
);
1261 i915_request_put(rq
);
1263 engine_heartbeat_enable(engine
, saved
);
1269 i915_vma_unpin(vma
);
1271 i915_gem_object_unpin_map(obj
);
1273 i915_gem_object_put(obj
);
1277 static int live_busywait_preempt(void *arg
)
1279 struct intel_gt
*gt
= arg
;
1280 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
1281 struct intel_engine_cs
*engine
;
1282 struct drm_i915_gem_object
*obj
;
1283 struct i915_vma
*vma
;
1284 enum intel_engine_id id
;
1289 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1290 * preempt the busywaits used to synchronise between rings.
1293 ctx_hi
= kernel_context(gt
->i915
);
1296 ctx_hi
->sched
.priority
=
1297 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
1299 ctx_lo
= kernel_context(gt
->i915
);
1302 ctx_lo
->sched
.priority
=
1303 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
1305 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
1311 map
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
1317 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
1323 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
1327 err
= i915_vma_sync(vma
);
1331 for_each_engine(engine
, gt
, id
) {
1332 struct i915_request
*lo
, *hi
;
1333 struct igt_live_test t
;
1336 if (!intel_engine_has_preemption(engine
))
1339 if (!intel_engine_can_store_dword(engine
))
1342 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1348 * We create two requests. The low priority request
1349 * busywaits on a semaphore (inside the ringbuffer where
1350 * is should be preemptible) and the high priority requests
1351 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1352 * allowing the first request to complete. If preemption
1353 * fails, we hang instead.
1356 lo
= igt_request_alloc(ctx_lo
, engine
);
1362 cs
= intel_ring_begin(lo
, 8);
1365 i915_request_add(lo
);
1369 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
1370 *cs
++ = i915_ggtt_offset(vma
);
1374 /* XXX Do we need a flush + invalidate here? */
1376 *cs
++ = MI_SEMAPHORE_WAIT
|
1377 MI_SEMAPHORE_GLOBAL_GTT
|
1379 MI_SEMAPHORE_SAD_EQ_SDD
;
1381 *cs
++ = i915_ggtt_offset(vma
);
1384 intel_ring_advance(lo
, cs
);
1386 i915_request_get(lo
);
1387 i915_request_add(lo
);
1389 if (wait_for(READ_ONCE(*map
), 10)) {
1390 i915_request_put(lo
);
1395 /* Low priority request should be busywaiting now */
1396 if (i915_request_wait(lo
, 0, 1) != -ETIME
) {
1397 i915_request_put(lo
);
1398 pr_err("%s: Busywaiting request did not!\n",
1404 hi
= igt_request_alloc(ctx_hi
, engine
);
1407 i915_request_put(lo
);
1411 cs
= intel_ring_begin(hi
, 4);
1414 i915_request_add(hi
);
1415 i915_request_put(lo
);
1419 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
1420 *cs
++ = i915_ggtt_offset(vma
);
1424 intel_ring_advance(hi
, cs
);
1425 i915_request_add(hi
);
1427 if (i915_request_wait(lo
, 0, HZ
/ 5) < 0) {
1428 struct drm_printer p
= drm_info_printer(gt
->i915
->drm
.dev
);
1430 pr_err("%s: Failed to preempt semaphore busywait!\n",
1433 intel_engine_dump(engine
, &p
, "%s\n", engine
->name
);
1436 i915_request_put(lo
);
1437 intel_gt_set_wedged(gt
);
1441 GEM_BUG_ON(READ_ONCE(*map
));
1442 i915_request_put(lo
);
1444 if (igt_live_test_end(&t
)) {
1452 i915_vma_unpin(vma
);
1454 i915_gem_object_unpin_map(obj
);
1456 i915_gem_object_put(obj
);
1458 kernel_context_close(ctx_lo
);
1460 kernel_context_close(ctx_hi
);
1464 static struct i915_request
*
1465 spinner_create_request(struct igt_spinner
*spin
,
1466 struct i915_gem_context
*ctx
,
1467 struct intel_engine_cs
*engine
,
1470 struct intel_context
*ce
;
1471 struct i915_request
*rq
;
1473 ce
= i915_gem_context_get_engine(ctx
, engine
->legacy_idx
);
1475 return ERR_CAST(ce
);
1477 rq
= igt_spinner_create_request(spin
, ce
, arb
);
1478 intel_context_put(ce
);
1482 static int live_preempt(void *arg
)
1484 struct intel_gt
*gt
= arg
;
1485 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
1486 struct igt_spinner spin_hi
, spin_lo
;
1487 struct intel_engine_cs
*engine
;
1488 enum intel_engine_id id
;
1491 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1494 if (!(gt
->i915
->caps
.scheduler
& I915_SCHEDULER_CAP_PREEMPTION
))
1495 pr_err("Logical preemption supported, but not exposed\n");
1497 if (igt_spinner_init(&spin_hi
, gt
))
1500 if (igt_spinner_init(&spin_lo
, gt
))
1503 ctx_hi
= kernel_context(gt
->i915
);
1506 ctx_hi
->sched
.priority
=
1507 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
1509 ctx_lo
= kernel_context(gt
->i915
);
1512 ctx_lo
->sched
.priority
=
1513 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
1515 for_each_engine(engine
, gt
, id
) {
1516 struct igt_live_test t
;
1517 struct i915_request
*rq
;
1519 if (!intel_engine_has_preemption(engine
))
1522 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1527 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
1534 i915_request_add(rq
);
1535 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
1536 GEM_TRACE("lo spinner failed to start\n");
1538 intel_gt_set_wedged(gt
);
1543 rq
= spinner_create_request(&spin_hi
, ctx_hi
, engine
,
1546 igt_spinner_end(&spin_lo
);
1551 i915_request_add(rq
);
1552 if (!igt_wait_for_spinner(&spin_hi
, rq
)) {
1553 GEM_TRACE("hi spinner failed to start\n");
1555 intel_gt_set_wedged(gt
);
1560 igt_spinner_end(&spin_hi
);
1561 igt_spinner_end(&spin_lo
);
1563 if (igt_live_test_end(&t
)) {
1571 kernel_context_close(ctx_lo
);
1573 kernel_context_close(ctx_hi
);
1575 igt_spinner_fini(&spin_lo
);
1577 igt_spinner_fini(&spin_hi
);
1581 static int live_late_preempt(void *arg
)
1583 struct intel_gt
*gt
= arg
;
1584 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
1585 struct igt_spinner spin_hi
, spin_lo
;
1586 struct intel_engine_cs
*engine
;
1587 struct i915_sched_attr attr
= {};
1588 enum intel_engine_id id
;
1591 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1594 if (igt_spinner_init(&spin_hi
, gt
))
1597 if (igt_spinner_init(&spin_lo
, gt
))
1600 ctx_hi
= kernel_context(gt
->i915
);
1604 ctx_lo
= kernel_context(gt
->i915
);
1608 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1609 ctx_lo
->sched
.priority
= I915_USER_PRIORITY(1);
1611 for_each_engine(engine
, gt
, id
) {
1612 struct igt_live_test t
;
1613 struct i915_request
*rq
;
1615 if (!intel_engine_has_preemption(engine
))
1618 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1623 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
1630 i915_request_add(rq
);
1631 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
1632 pr_err("First context failed to start\n");
1636 rq
= spinner_create_request(&spin_hi
, ctx_hi
, engine
,
1639 igt_spinner_end(&spin_lo
);
1644 i915_request_add(rq
);
1645 if (igt_wait_for_spinner(&spin_hi
, rq
)) {
1646 pr_err("Second context overtook first?\n");
1650 attr
.priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
);
1651 engine
->schedule(rq
, &attr
);
1653 if (!igt_wait_for_spinner(&spin_hi
, rq
)) {
1654 pr_err("High priority context failed to preempt the low priority context\n");
1659 igt_spinner_end(&spin_hi
);
1660 igt_spinner_end(&spin_lo
);
1662 if (igt_live_test_end(&t
)) {
1670 kernel_context_close(ctx_lo
);
1672 kernel_context_close(ctx_hi
);
1674 igt_spinner_fini(&spin_lo
);
1676 igt_spinner_fini(&spin_hi
);
1680 igt_spinner_end(&spin_hi
);
1681 igt_spinner_end(&spin_lo
);
1682 intel_gt_set_wedged(gt
);
1687 struct preempt_client
{
1688 struct igt_spinner spin
;
1689 struct i915_gem_context
*ctx
;
1692 static int preempt_client_init(struct intel_gt
*gt
, struct preempt_client
*c
)
1694 c
->ctx
= kernel_context(gt
->i915
);
1698 if (igt_spinner_init(&c
->spin
, gt
))
1704 kernel_context_close(c
->ctx
);
1708 static void preempt_client_fini(struct preempt_client
*c
)
1710 igt_spinner_fini(&c
->spin
);
1711 kernel_context_close(c
->ctx
);
1714 static int live_nopreempt(void *arg
)
1716 struct intel_gt
*gt
= arg
;
1717 struct intel_engine_cs
*engine
;
1718 struct preempt_client a
, b
;
1719 enum intel_engine_id id
;
1723 * Verify that we can disable preemption for an individual request
1724 * that may be being observed and not want to be interrupted.
1727 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1730 if (preempt_client_init(gt
, &a
))
1732 if (preempt_client_init(gt
, &b
))
1734 b
.ctx
->sched
.priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
);
1736 for_each_engine(engine
, gt
, id
) {
1737 struct i915_request
*rq_a
, *rq_b
;
1739 if (!intel_engine_has_preemption(engine
))
1742 engine
->execlists
.preempt_hang
.count
= 0;
1744 rq_a
= spinner_create_request(&a
.spin
,
1748 err
= PTR_ERR(rq_a
);
1752 /* Low priority client, but unpreemptable! */
1753 __set_bit(I915_FENCE_FLAG_NOPREEMPT
, &rq_a
->fence
.flags
);
1755 i915_request_add(rq_a
);
1756 if (!igt_wait_for_spinner(&a
.spin
, rq_a
)) {
1757 pr_err("First client failed to start\n");
1761 rq_b
= spinner_create_request(&b
.spin
,
1765 err
= PTR_ERR(rq_b
);
1769 i915_request_add(rq_b
);
1771 /* B is much more important than A! (But A is unpreemptable.) */
1772 GEM_BUG_ON(rq_prio(rq_b
) <= rq_prio(rq_a
));
1774 /* Wait long enough for preemption and timeslicing */
1775 if (igt_wait_for_spinner(&b
.spin
, rq_b
)) {
1776 pr_err("Second client started too early!\n");
1780 igt_spinner_end(&a
.spin
);
1782 if (!igt_wait_for_spinner(&b
.spin
, rq_b
)) {
1783 pr_err("Second client failed to start\n");
1787 igt_spinner_end(&b
.spin
);
1789 if (engine
->execlists
.preempt_hang
.count
) {
1790 pr_err("Preemption recorded x%d; should have been suppressed!\n",
1791 engine
->execlists
.preempt_hang
.count
);
1796 if (igt_flush_test(gt
->i915
))
1802 preempt_client_fini(&b
);
1804 preempt_client_fini(&a
);
1808 igt_spinner_end(&b
.spin
);
1809 igt_spinner_end(&a
.spin
);
1810 intel_gt_set_wedged(gt
);
1815 struct live_preempt_cancel
{
1816 struct intel_engine_cs
*engine
;
1817 struct preempt_client a
, b
;
1820 static int __cancel_active0(struct live_preempt_cancel
*arg
)
1822 struct i915_request
*rq
;
1823 struct igt_live_test t
;
1826 /* Preempt cancel of ELSP0 */
1827 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
1828 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
1829 __func__
, arg
->engine
->name
))
1832 rq
= spinner_create_request(&arg
->a
.spin
,
1833 arg
->a
.ctx
, arg
->engine
,
1838 clear_bit(CONTEXT_BANNED
, &rq
->context
->flags
);
1839 i915_request_get(rq
);
1840 i915_request_add(rq
);
1841 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
)) {
1846 intel_context_set_banned(rq
->context
);
1847 err
= intel_engine_pulse(arg
->engine
);
1851 err
= wait_for_reset(arg
->engine
, rq
, HZ
/ 2);
1853 pr_err("Cancelled inflight0 request did not reset\n");
1858 i915_request_put(rq
);
1859 if (igt_live_test_end(&t
))
1864 static int __cancel_active1(struct live_preempt_cancel
*arg
)
1866 struct i915_request
*rq
[2] = {};
1867 struct igt_live_test t
;
1870 /* Preempt cancel of ELSP1 */
1871 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
1872 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
1873 __func__
, arg
->engine
->name
))
1876 rq
[0] = spinner_create_request(&arg
->a
.spin
,
1877 arg
->a
.ctx
, arg
->engine
,
1878 MI_NOOP
); /* no preemption */
1880 return PTR_ERR(rq
[0]);
1882 clear_bit(CONTEXT_BANNED
, &rq
[0]->context
->flags
);
1883 i915_request_get(rq
[0]);
1884 i915_request_add(rq
[0]);
1885 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
[0])) {
1890 rq
[1] = spinner_create_request(&arg
->b
.spin
,
1891 arg
->b
.ctx
, arg
->engine
,
1893 if (IS_ERR(rq
[1])) {
1894 err
= PTR_ERR(rq
[1]);
1898 clear_bit(CONTEXT_BANNED
, &rq
[1]->context
->flags
);
1899 i915_request_get(rq
[1]);
1900 err
= i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
1901 i915_request_add(rq
[1]);
1905 intel_context_set_banned(rq
[1]->context
);
1906 err
= intel_engine_pulse(arg
->engine
);
1910 igt_spinner_end(&arg
->a
.spin
);
1911 err
= wait_for_reset(arg
->engine
, rq
[1], HZ
/ 2);
1915 if (rq
[0]->fence
.error
!= 0) {
1916 pr_err("Normal inflight0 request did not complete\n");
1921 if (rq
[1]->fence
.error
!= -EIO
) {
1922 pr_err("Cancelled inflight1 request did not report -EIO\n");
1928 i915_request_put(rq
[1]);
1929 i915_request_put(rq
[0]);
1930 if (igt_live_test_end(&t
))
1935 static int __cancel_queued(struct live_preempt_cancel
*arg
)
1937 struct i915_request
*rq
[3] = {};
1938 struct igt_live_test t
;
1941 /* Full ELSP and one in the wings */
1942 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
1943 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
1944 __func__
, arg
->engine
->name
))
1947 rq
[0] = spinner_create_request(&arg
->a
.spin
,
1948 arg
->a
.ctx
, arg
->engine
,
1951 return PTR_ERR(rq
[0]);
1953 clear_bit(CONTEXT_BANNED
, &rq
[0]->context
->flags
);
1954 i915_request_get(rq
[0]);
1955 i915_request_add(rq
[0]);
1956 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
[0])) {
1961 rq
[1] = igt_request_alloc(arg
->b
.ctx
, arg
->engine
);
1962 if (IS_ERR(rq
[1])) {
1963 err
= PTR_ERR(rq
[1]);
1967 clear_bit(CONTEXT_BANNED
, &rq
[1]->context
->flags
);
1968 i915_request_get(rq
[1]);
1969 err
= i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
1970 i915_request_add(rq
[1]);
1974 rq
[2] = spinner_create_request(&arg
->b
.spin
,
1975 arg
->a
.ctx
, arg
->engine
,
1977 if (IS_ERR(rq
[2])) {
1978 err
= PTR_ERR(rq
[2]);
1982 i915_request_get(rq
[2]);
1983 err
= i915_request_await_dma_fence(rq
[2], &rq
[1]->fence
);
1984 i915_request_add(rq
[2]);
1988 intel_context_set_banned(rq
[2]->context
);
1989 err
= intel_engine_pulse(arg
->engine
);
1993 err
= wait_for_reset(arg
->engine
, rq
[2], HZ
/ 2);
1997 if (rq
[0]->fence
.error
!= -EIO
) {
1998 pr_err("Cancelled inflight0 request did not report -EIO\n");
2003 if (rq
[1]->fence
.error
!= 0) {
2004 pr_err("Normal inflight1 request did not complete\n");
2009 if (rq
[2]->fence
.error
!= -EIO
) {
2010 pr_err("Cancelled queued request did not report -EIO\n");
2016 i915_request_put(rq
[2]);
2017 i915_request_put(rq
[1]);
2018 i915_request_put(rq
[0]);
2019 if (igt_live_test_end(&t
))
2024 static int __cancel_hostile(struct live_preempt_cancel
*arg
)
2026 struct i915_request
*rq
;
2029 /* Preempt cancel non-preemptible spinner in ELSP0 */
2030 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT
))
2033 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
2034 rq
= spinner_create_request(&arg
->a
.spin
,
2035 arg
->a
.ctx
, arg
->engine
,
2036 MI_NOOP
); /* preemption disabled */
2040 clear_bit(CONTEXT_BANNED
, &rq
->context
->flags
);
2041 i915_request_get(rq
);
2042 i915_request_add(rq
);
2043 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
)) {
2048 intel_context_set_banned(rq
->context
);
2049 err
= intel_engine_pulse(arg
->engine
); /* force reset */
2053 err
= wait_for_reset(arg
->engine
, rq
, HZ
/ 2);
2055 pr_err("Cancelled inflight0 request did not reset\n");
2060 i915_request_put(rq
);
2061 if (igt_flush_test(arg
->engine
->i915
))
2066 static int live_preempt_cancel(void *arg
)
2068 struct intel_gt
*gt
= arg
;
2069 struct live_preempt_cancel data
;
2070 enum intel_engine_id id
;
2074 * To cancel an inflight context, we need to first remove it from the
2075 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2078 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2081 if (preempt_client_init(gt
, &data
.a
))
2083 if (preempt_client_init(gt
, &data
.b
))
2086 for_each_engine(data
.engine
, gt
, id
) {
2087 if (!intel_engine_has_preemption(data
.engine
))
2090 err
= __cancel_active0(&data
);
2094 err
= __cancel_active1(&data
);
2098 err
= __cancel_queued(&data
);
2102 err
= __cancel_hostile(&data
);
2109 preempt_client_fini(&data
.b
);
2111 preempt_client_fini(&data
.a
);
2116 igt_spinner_end(&data
.b
.spin
);
2117 igt_spinner_end(&data
.a
.spin
);
2118 intel_gt_set_wedged(gt
);
2122 static int live_suppress_self_preempt(void *arg
)
2124 struct intel_gt
*gt
= arg
;
2125 struct intel_engine_cs
*engine
;
2126 struct i915_sched_attr attr
= {
2127 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
)
2129 struct preempt_client a
, b
;
2130 enum intel_engine_id id
;
2134 * Verify that if a preemption request does not cause a change in
2135 * the current execution order, the preempt-to-idle injection is
2136 * skipped and that we do not accidentally apply it after the CS
2140 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2143 if (intel_uc_uses_guc_submission(>
->uc
))
2144 return 0; /* presume black blox */
2146 if (intel_vgpu_active(gt
->i915
))
2147 return 0; /* GVT forces single port & request submission */
2149 if (preempt_client_init(gt
, &a
))
2151 if (preempt_client_init(gt
, &b
))
2154 for_each_engine(engine
, gt
, id
) {
2155 struct i915_request
*rq_a
, *rq_b
;
2158 if (!intel_engine_has_preemption(engine
))
2161 if (igt_flush_test(gt
->i915
))
2164 intel_engine_pm_get(engine
);
2165 engine
->execlists
.preempt_hang
.count
= 0;
2167 rq_a
= spinner_create_request(&a
.spin
,
2171 err
= PTR_ERR(rq_a
);
2172 intel_engine_pm_put(engine
);
2176 i915_request_add(rq_a
);
2177 if (!igt_wait_for_spinner(&a
.spin
, rq_a
)) {
2178 pr_err("First client failed to start\n");
2179 intel_engine_pm_put(engine
);
2183 /* Keep postponing the timer to avoid premature slicing */
2184 mod_timer(&engine
->execlists
.timer
, jiffies
+ HZ
);
2185 for (depth
= 0; depth
< 8; depth
++) {
2186 rq_b
= spinner_create_request(&b
.spin
,
2190 err
= PTR_ERR(rq_b
);
2191 intel_engine_pm_put(engine
);
2194 i915_request_add(rq_b
);
2196 GEM_BUG_ON(i915_request_completed(rq_a
));
2197 engine
->schedule(rq_a
, &attr
);
2198 igt_spinner_end(&a
.spin
);
2200 if (!igt_wait_for_spinner(&b
.spin
, rq_b
)) {
2201 pr_err("Second client failed to start\n");
2202 intel_engine_pm_put(engine
);
2209 igt_spinner_end(&a
.spin
);
2211 if (engine
->execlists
.preempt_hang
.count
) {
2212 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2214 engine
->execlists
.preempt_hang
.count
,
2216 intel_engine_pm_put(engine
);
2221 intel_engine_pm_put(engine
);
2222 if (igt_flush_test(gt
->i915
))
2228 preempt_client_fini(&b
);
2230 preempt_client_fini(&a
);
2234 igt_spinner_end(&b
.spin
);
2235 igt_spinner_end(&a
.spin
);
2236 intel_gt_set_wedged(gt
);
2241 static int __i915_sw_fence_call
2242 dummy_notify(struct i915_sw_fence
*fence
, enum i915_sw_fence_notify state
)
2247 static struct i915_request
*dummy_request(struct intel_engine_cs
*engine
)
2249 struct i915_request
*rq
;
2251 rq
= kzalloc(sizeof(*rq
), GFP_KERNEL
);
2255 rq
->engine
= engine
;
2257 spin_lock_init(&rq
->lock
);
2258 INIT_LIST_HEAD(&rq
->fence
.cb_list
);
2259 rq
->fence
.lock
= &rq
->lock
;
2260 rq
->fence
.ops
= &i915_fence_ops
;
2262 i915_sched_node_init(&rq
->sched
);
2264 /* mark this request as permanently incomplete */
2265 rq
->fence
.seqno
= 1;
2266 BUILD_BUG_ON(sizeof(rq
->fence
.seqno
) != 8); /* upper 32b == 0 */
2267 rq
->hwsp_seqno
= (u32
*)&rq
->fence
.seqno
+ 1;
2268 GEM_BUG_ON(i915_request_completed(rq
));
2270 i915_sw_fence_init(&rq
->submit
, dummy_notify
);
2271 set_bit(I915_FENCE_FLAG_ACTIVE
, &rq
->fence
.flags
);
2273 spin_lock_init(&rq
->lock
);
2274 rq
->fence
.lock
= &rq
->lock
;
2275 INIT_LIST_HEAD(&rq
->fence
.cb_list
);
2280 static void dummy_request_free(struct i915_request
*dummy
)
2282 /* We have to fake the CS interrupt to kick the next request */
2283 i915_sw_fence_commit(&dummy
->submit
);
2285 i915_request_mark_complete(dummy
);
2286 dma_fence_signal(&dummy
->fence
);
2288 i915_sched_node_fini(&dummy
->sched
);
2289 i915_sw_fence_fini(&dummy
->submit
);
2291 dma_fence_free(&dummy
->fence
);
2294 static int live_suppress_wait_preempt(void *arg
)
2296 struct intel_gt
*gt
= arg
;
2297 struct preempt_client client
[4];
2298 struct i915_request
*rq
[ARRAY_SIZE(client
)] = {};
2299 struct intel_engine_cs
*engine
;
2300 enum intel_engine_id id
;
2305 * Waiters are given a little priority nudge, but not enough
2306 * to actually cause any preemption. Double check that we do
2307 * not needlessly generate preempt-to-idle cycles.
2310 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2313 if (preempt_client_init(gt
, &client
[0])) /* ELSP[0] */
2315 if (preempt_client_init(gt
, &client
[1])) /* ELSP[1] */
2317 if (preempt_client_init(gt
, &client
[2])) /* head of queue */
2319 if (preempt_client_init(gt
, &client
[3])) /* bystander */
2322 for_each_engine(engine
, gt
, id
) {
2325 if (!intel_engine_has_preemption(engine
))
2328 if (!engine
->emit_init_breadcrumb
)
2331 for (depth
= 0; depth
< ARRAY_SIZE(client
); depth
++) {
2332 struct i915_request
*dummy
;
2334 engine
->execlists
.preempt_hang
.count
= 0;
2336 dummy
= dummy_request(engine
);
2340 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
2341 struct i915_request
*this;
2343 this = spinner_create_request(&client
[i
].spin
,
2344 client
[i
].ctx
, engine
,
2347 err
= PTR_ERR(this);
2351 /* Disable NEWCLIENT promotion */
2352 __i915_active_fence_set(&i915_request_timeline(this)->last_request
,
2355 rq
[i
] = i915_request_get(this);
2356 i915_request_add(this);
2359 dummy_request_free(dummy
);
2361 GEM_BUG_ON(i915_request_completed(rq
[0]));
2362 if (!igt_wait_for_spinner(&client
[0].spin
, rq
[0])) {
2363 pr_err("%s: First client failed to start\n",
2367 GEM_BUG_ON(!i915_request_started(rq
[0]));
2369 if (i915_request_wait(rq
[depth
],
2372 pr_err("%s: Waiter depth:%d completed!\n",
2373 engine
->name
, depth
);
2377 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
2378 igt_spinner_end(&client
[i
].spin
);
2379 i915_request_put(rq
[i
]);
2383 if (igt_flush_test(gt
->i915
))
2386 if (engine
->execlists
.preempt_hang
.count
) {
2387 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2389 engine
->execlists
.preempt_hang
.count
,
2399 preempt_client_fini(&client
[3]);
2401 preempt_client_fini(&client
[2]);
2403 preempt_client_fini(&client
[1]);
2405 preempt_client_fini(&client
[0]);
2409 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
2410 igt_spinner_end(&client
[i
].spin
);
2411 i915_request_put(rq
[i
]);
2413 intel_gt_set_wedged(gt
);
2418 static int live_chain_preempt(void *arg
)
2420 struct intel_gt
*gt
= arg
;
2421 struct intel_engine_cs
*engine
;
2422 struct preempt_client hi
, lo
;
2423 enum intel_engine_id id
;
2427 * Build a chain AB...BA between two contexts (A, B) and request
2428 * preemption of the last request. It should then complete before
2429 * the previously submitted spinner in B.
2432 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2435 if (preempt_client_init(gt
, &hi
))
2438 if (preempt_client_init(gt
, &lo
))
2441 for_each_engine(engine
, gt
, id
) {
2442 struct i915_sched_attr attr
= {
2443 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
),
2445 struct igt_live_test t
;
2446 struct i915_request
*rq
;
2447 int ring_size
, count
, i
;
2449 if (!intel_engine_has_preemption(engine
))
2452 rq
= spinner_create_request(&lo
.spin
,
2458 i915_request_get(rq
);
2459 i915_request_add(rq
);
2461 ring_size
= rq
->wa_tail
- rq
->head
;
2463 ring_size
+= rq
->ring
->size
;
2464 ring_size
= rq
->ring
->size
/ ring_size
;
2465 pr_debug("%s(%s): Using maximum of %d requests\n",
2466 __func__
, engine
->name
, ring_size
);
2468 igt_spinner_end(&lo
.spin
);
2469 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0) {
2470 pr_err("Timed out waiting to flush %s\n", engine
->name
);
2471 i915_request_put(rq
);
2474 i915_request_put(rq
);
2476 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
2481 for_each_prime_number_from(count
, 1, ring_size
) {
2482 rq
= spinner_create_request(&hi
.spin
,
2487 i915_request_add(rq
);
2488 if (!igt_wait_for_spinner(&hi
.spin
, rq
))
2491 rq
= spinner_create_request(&lo
.spin
,
2496 i915_request_add(rq
);
2498 for (i
= 0; i
< count
; i
++) {
2499 rq
= igt_request_alloc(lo
.ctx
, engine
);
2502 i915_request_add(rq
);
2505 rq
= igt_request_alloc(hi
.ctx
, engine
);
2509 i915_request_get(rq
);
2510 i915_request_add(rq
);
2511 engine
->schedule(rq
, &attr
);
2513 igt_spinner_end(&hi
.spin
);
2514 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2515 struct drm_printer p
=
2516 drm_info_printer(gt
->i915
->drm
.dev
);
2518 pr_err("Failed to preempt over chain of %d\n",
2520 intel_engine_dump(engine
, &p
,
2521 "%s\n", engine
->name
);
2522 i915_request_put(rq
);
2525 igt_spinner_end(&lo
.spin
);
2526 i915_request_put(rq
);
2528 rq
= igt_request_alloc(lo
.ctx
, engine
);
2532 i915_request_get(rq
);
2533 i915_request_add(rq
);
2535 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2536 struct drm_printer p
=
2537 drm_info_printer(gt
->i915
->drm
.dev
);
2539 pr_err("Failed to flush low priority chain of %d requests\n",
2541 intel_engine_dump(engine
, &p
,
2542 "%s\n", engine
->name
);
2544 i915_request_put(rq
);
2547 i915_request_put(rq
);
2550 if (igt_live_test_end(&t
)) {
2558 preempt_client_fini(&lo
);
2560 preempt_client_fini(&hi
);
2564 igt_spinner_end(&hi
.spin
);
2565 igt_spinner_end(&lo
.spin
);
2566 intel_gt_set_wedged(gt
);
2571 static int create_gang(struct intel_engine_cs
*engine
,
2572 struct i915_request
**prev
)
2574 struct drm_i915_gem_object
*obj
;
2575 struct intel_context
*ce
;
2576 struct i915_request
*rq
;
2577 struct i915_vma
*vma
;
2581 ce
= intel_context_create(engine
);
2585 obj
= i915_gem_object_create_internal(engine
->i915
, 4096);
2591 vma
= i915_vma_instance(obj
, ce
->vm
, NULL
);
2597 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
2601 cs
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
2605 /* Semaphore target: spin until zero */
2606 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
2608 *cs
++ = MI_SEMAPHORE_WAIT
|
2610 MI_SEMAPHORE_SAD_EQ_SDD
;
2612 *cs
++ = lower_32_bits(vma
->node
.start
);
2613 *cs
++ = upper_32_bits(vma
->node
.start
);
2616 u64 offset
= (*prev
)->batch
->node
.start
;
2618 /* Terminate the spinner in the next lower priority batch. */
2619 *cs
++ = MI_STORE_DWORD_IMM_GEN4
;
2620 *cs
++ = lower_32_bits(offset
);
2621 *cs
++ = upper_32_bits(offset
);
2625 *cs
++ = MI_BATCH_BUFFER_END
;
2626 i915_gem_object_flush_map(obj
);
2627 i915_gem_object_unpin_map(obj
);
2629 rq
= intel_context_create_request(ce
);
2634 i915_request_get(rq
);
2637 err
= i915_request_await_object(rq
, vma
->obj
, false);
2639 err
= i915_vma_move_to_active(vma
, rq
, 0);
2641 err
= rq
->engine
->emit_bb_start(rq
,
2644 i915_vma_unlock(vma
);
2645 i915_request_add(rq
);
2649 i915_gem_object_put(obj
);
2650 intel_context_put(ce
);
2652 rq
->client_link
.next
= &(*prev
)->client_link
;
2657 i915_request_put(rq
);
2659 i915_gem_object_put(obj
);
2661 intel_context_put(ce
);
2665 static int live_preempt_gang(void *arg
)
2667 struct intel_gt
*gt
= arg
;
2668 struct intel_engine_cs
*engine
;
2669 enum intel_engine_id id
;
2671 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2675 * Build as long a chain of preempters as we can, with each
2676 * request higher priority than the last. Once we are ready, we release
2677 * the last batch which then precolates down the chain, each releasing
2678 * the next oldest in turn. The intent is to simply push as hard as we
2679 * can with the number of preemptions, trying to exceed narrow HW
2680 * limits. At a minimum, we insist that we can sort all the user
2681 * high priority levels into execution order.
2684 for_each_engine(engine
, gt
, id
) {
2685 struct i915_request
*rq
= NULL
;
2686 struct igt_live_test t
;
2687 IGT_TIMEOUT(end_time
);
2692 if (!intel_engine_has_preemption(engine
))
2695 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
))
2699 struct i915_sched_attr attr
= {
2700 .priority
= I915_USER_PRIORITY(prio
++),
2703 err
= create_gang(engine
, &rq
);
2707 /* Submit each spinner at increasing priority */
2708 engine
->schedule(rq
, &attr
);
2710 if (prio
<= I915_PRIORITY_MAX
)
2713 if (prio
> (INT_MAX
>> I915_USER_PRIORITY_SHIFT
))
2716 if (__igt_timeout(end_time
, NULL
))
2719 pr_debug("%s: Preempt chain of %d requests\n",
2720 engine
->name
, prio
);
2723 * Such that the last spinner is the highest priority and
2724 * should execute first. When that spinner completes,
2725 * it will terminate the next lowest spinner until there
2726 * are no more spinners and the gang is complete.
2728 cs
= i915_gem_object_pin_map(rq
->batch
->obj
, I915_MAP_WC
);
2731 i915_gem_object_unpin_map(rq
->batch
->obj
);
2734 intel_gt_set_wedged(gt
);
2737 while (rq
) { /* wait for each rq from highest to lowest prio */
2738 struct i915_request
*n
=
2739 list_next_entry(rq
, client_link
);
2741 if (err
== 0 && i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2742 struct drm_printer p
=
2743 drm_info_printer(engine
->i915
->drm
.dev
);
2745 pr_err("Failed to flush chain of %d requests, at %d\n",
2746 prio
, rq_prio(rq
) >> I915_USER_PRIORITY_SHIFT
);
2747 intel_engine_dump(engine
, &p
,
2748 "%s\n", engine
->name
);
2753 i915_request_put(rq
);
2757 if (igt_live_test_end(&t
))
2766 static int live_preempt_timeout(void *arg
)
2768 struct intel_gt
*gt
= arg
;
2769 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
2770 struct igt_spinner spin_lo
;
2771 struct intel_engine_cs
*engine
;
2772 enum intel_engine_id id
;
2776 * Check that we force preemption to occur by cancelling the previous
2777 * context if it refuses to yield the GPU.
2779 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT
))
2782 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2785 if (!intel_has_reset_engine(gt
))
2788 if (igt_spinner_init(&spin_lo
, gt
))
2791 ctx_hi
= kernel_context(gt
->i915
);
2794 ctx_hi
->sched
.priority
=
2795 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
2797 ctx_lo
= kernel_context(gt
->i915
);
2800 ctx_lo
->sched
.priority
=
2801 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
2803 for_each_engine(engine
, gt
, id
) {
2804 unsigned long saved_timeout
;
2805 struct i915_request
*rq
;
2807 if (!intel_engine_has_preemption(engine
))
2810 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
2811 MI_NOOP
); /* preemption disabled */
2817 i915_request_add(rq
);
2818 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
2819 intel_gt_set_wedged(gt
);
2824 rq
= igt_request_alloc(ctx_hi
, engine
);
2826 igt_spinner_end(&spin_lo
);
2831 /* Flush the previous CS ack before changing timeouts */
2832 while (READ_ONCE(engine
->execlists
.pending
[0]))
2835 saved_timeout
= engine
->props
.preempt_timeout_ms
;
2836 engine
->props
.preempt_timeout_ms
= 1; /* in ms, -> 1 jiffie */
2838 i915_request_get(rq
);
2839 i915_request_add(rq
);
2841 intel_engine_flush_submission(engine
);
2842 engine
->props
.preempt_timeout_ms
= saved_timeout
;
2844 if (i915_request_wait(rq
, 0, HZ
/ 10) < 0) {
2845 intel_gt_set_wedged(gt
);
2846 i915_request_put(rq
);
2851 igt_spinner_end(&spin_lo
);
2852 i915_request_put(rq
);
2857 kernel_context_close(ctx_lo
);
2859 kernel_context_close(ctx_hi
);
2861 igt_spinner_fini(&spin_lo
);
2865 static int random_range(struct rnd_state
*rnd
, int min
, int max
)
2867 return i915_prandom_u32_max_state(max
- min
, rnd
) + min
;
2870 static int random_priority(struct rnd_state
*rnd
)
2872 return random_range(rnd
, I915_PRIORITY_MIN
, I915_PRIORITY_MAX
);
2875 struct preempt_smoke
{
2876 struct intel_gt
*gt
;
2877 struct i915_gem_context
**contexts
;
2878 struct intel_engine_cs
*engine
;
2879 struct drm_i915_gem_object
*batch
;
2880 unsigned int ncontext
;
2881 struct rnd_state prng
;
2882 unsigned long count
;
2885 static struct i915_gem_context
*smoke_context(struct preempt_smoke
*smoke
)
2887 return smoke
->contexts
[i915_prandom_u32_max_state(smoke
->ncontext
,
2891 static int smoke_submit(struct preempt_smoke
*smoke
,
2892 struct i915_gem_context
*ctx
, int prio
,
2893 struct drm_i915_gem_object
*batch
)
2895 struct i915_request
*rq
;
2896 struct i915_vma
*vma
= NULL
;
2900 struct i915_address_space
*vm
;
2902 vm
= i915_gem_context_get_vm_rcu(ctx
);
2903 vma
= i915_vma_instance(batch
, vm
, NULL
);
2906 return PTR_ERR(vma
);
2908 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
2913 ctx
->sched
.priority
= prio
;
2915 rq
= igt_request_alloc(ctx
, smoke
->engine
);
2923 err
= i915_request_await_object(rq
, vma
->obj
, false);
2925 err
= i915_vma_move_to_active(vma
, rq
, 0);
2927 err
= rq
->engine
->emit_bb_start(rq
,
2930 i915_vma_unlock(vma
);
2933 i915_request_add(rq
);
2937 i915_vma_unpin(vma
);
2942 static int smoke_crescendo_thread(void *arg
)
2944 struct preempt_smoke
*smoke
= arg
;
2945 IGT_TIMEOUT(end_time
);
2946 unsigned long count
;
2950 struct i915_gem_context
*ctx
= smoke_context(smoke
);
2953 err
= smoke_submit(smoke
,
2954 ctx
, count
% I915_PRIORITY_MAX
,
2960 } while (!__igt_timeout(end_time
, NULL
));
2962 smoke
->count
= count
;
2966 static int smoke_crescendo(struct preempt_smoke
*smoke
, unsigned int flags
)
2967 #define BATCH BIT(0)
2969 struct task_struct
*tsk
[I915_NUM_ENGINES
] = {};
2970 struct preempt_smoke arg
[I915_NUM_ENGINES
];
2971 struct intel_engine_cs
*engine
;
2972 enum intel_engine_id id
;
2973 unsigned long count
;
2976 for_each_engine(engine
, smoke
->gt
, id
) {
2978 arg
[id
].engine
= engine
;
2979 if (!(flags
& BATCH
))
2980 arg
[id
].batch
= NULL
;
2983 tsk
[id
] = kthread_run(smoke_crescendo_thread
, &arg
,
2984 "igt/smoke:%d", id
);
2985 if (IS_ERR(tsk
[id
])) {
2986 err
= PTR_ERR(tsk
[id
]);
2989 get_task_struct(tsk
[id
]);
2992 yield(); /* start all threads before we kthread_stop() */
2995 for_each_engine(engine
, smoke
->gt
, id
) {
2998 if (IS_ERR_OR_NULL(tsk
[id
]))
3001 status
= kthread_stop(tsk
[id
]);
3005 count
+= arg
[id
].count
;
3007 put_task_struct(tsk
[id
]);
3010 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3012 RUNTIME_INFO(smoke
->gt
->i915
)->num_engines
, smoke
->ncontext
);
3016 static int smoke_random(struct preempt_smoke
*smoke
, unsigned int flags
)
3018 enum intel_engine_id id
;
3019 IGT_TIMEOUT(end_time
);
3020 unsigned long count
;
3024 for_each_engine(smoke
->engine
, smoke
->gt
, id
) {
3025 struct i915_gem_context
*ctx
= smoke_context(smoke
);
3028 err
= smoke_submit(smoke
,
3029 ctx
, random_priority(&smoke
->prng
),
3030 flags
& BATCH
? smoke
->batch
: NULL
);
3036 } while (!__igt_timeout(end_time
, NULL
));
3038 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3040 RUNTIME_INFO(smoke
->gt
->i915
)->num_engines
, smoke
->ncontext
);
3044 static int live_preempt_smoke(void *arg
)
3046 struct preempt_smoke smoke
= {
3048 .prng
= I915_RND_STATE_INITIALIZER(i915_selftest
.random_seed
),
3051 const unsigned int phase
[] = { 0, BATCH
};
3052 struct igt_live_test t
;
3057 if (!HAS_LOGICAL_RING_PREEMPTION(smoke
.gt
->i915
))
3060 smoke
.contexts
= kmalloc_array(smoke
.ncontext
,
3061 sizeof(*smoke
.contexts
),
3063 if (!smoke
.contexts
)
3067 i915_gem_object_create_internal(smoke
.gt
->i915
, PAGE_SIZE
);
3068 if (IS_ERR(smoke
.batch
)) {
3069 err
= PTR_ERR(smoke
.batch
);
3073 cs
= i915_gem_object_pin_map(smoke
.batch
, I915_MAP_WB
);
3078 for (n
= 0; n
< PAGE_SIZE
/ sizeof(*cs
) - 1; n
++)
3079 cs
[n
] = MI_ARB_CHECK
;
3080 cs
[n
] = MI_BATCH_BUFFER_END
;
3081 i915_gem_object_flush_map(smoke
.batch
);
3082 i915_gem_object_unpin_map(smoke
.batch
);
3084 if (igt_live_test_begin(&t
, smoke
.gt
->i915
, __func__
, "all")) {
3089 for (n
= 0; n
< smoke
.ncontext
; n
++) {
3090 smoke
.contexts
[n
] = kernel_context(smoke
.gt
->i915
);
3091 if (!smoke
.contexts
[n
])
3095 for (n
= 0; n
< ARRAY_SIZE(phase
); n
++) {
3096 err
= smoke_crescendo(&smoke
, phase
[n
]);
3100 err
= smoke_random(&smoke
, phase
[n
]);
3106 if (igt_live_test_end(&t
))
3109 for (n
= 0; n
< smoke
.ncontext
; n
++) {
3110 if (!smoke
.contexts
[n
])
3112 kernel_context_close(smoke
.contexts
[n
]);
3116 i915_gem_object_put(smoke
.batch
);
3118 kfree(smoke
.contexts
);
3123 static int nop_virtual_engine(struct intel_gt
*gt
,
3124 struct intel_engine_cs
**siblings
,
3125 unsigned int nsibling
,
3128 #define CHAIN BIT(0)
3130 IGT_TIMEOUT(end_time
);
3131 struct i915_request
*request
[16] = {};
3132 struct intel_context
*ve
[16];
3133 unsigned long n
, prime
, nc
;
3134 struct igt_live_test t
;
3135 ktime_t times
[2] = {};
3138 GEM_BUG_ON(!nctx
|| nctx
> ARRAY_SIZE(ve
));
3140 for (n
= 0; n
< nctx
; n
++) {
3141 ve
[n
] = intel_execlists_create_virtual(siblings
, nsibling
);
3142 if (IS_ERR(ve
[n
])) {
3143 err
= PTR_ERR(ve
[n
]);
3148 err
= intel_context_pin(ve
[n
]);
3150 intel_context_put(ve
[n
]);
3156 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
[0]->engine
->name
);
3160 for_each_prime_number_from(prime
, 1, 8192) {
3161 times
[1] = ktime_get_raw();
3163 if (flags
& CHAIN
) {
3164 for (nc
= 0; nc
< nctx
; nc
++) {
3165 for (n
= 0; n
< prime
; n
++) {
3166 struct i915_request
*rq
;
3168 rq
= i915_request_create(ve
[nc
]);
3175 i915_request_put(request
[nc
]);
3176 request
[nc
] = i915_request_get(rq
);
3177 i915_request_add(rq
);
3181 for (n
= 0; n
< prime
; n
++) {
3182 for (nc
= 0; nc
< nctx
; nc
++) {
3183 struct i915_request
*rq
;
3185 rq
= i915_request_create(ve
[nc
]);
3192 i915_request_put(request
[nc
]);
3193 request
[nc
] = i915_request_get(rq
);
3194 i915_request_add(rq
);
3199 for (nc
= 0; nc
< nctx
; nc
++) {
3200 if (i915_request_wait(request
[nc
], 0, HZ
/ 10) < 0) {
3201 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3202 __func__
, ve
[0]->engine
->name
,
3203 request
[nc
]->fence
.context
,
3204 request
[nc
]->fence
.seqno
);
3206 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3207 __func__
, ve
[0]->engine
->name
,
3208 request
[nc
]->fence
.context
,
3209 request
[nc
]->fence
.seqno
);
3211 intel_gt_set_wedged(gt
);
3216 times
[1] = ktime_sub(ktime_get_raw(), times
[1]);
3218 times
[0] = times
[1];
3220 for (nc
= 0; nc
< nctx
; nc
++) {
3221 i915_request_put(request
[nc
]);
3225 if (__igt_timeout(end_time
, NULL
))
3229 err
= igt_live_test_end(&t
);
3233 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3234 nctx
, ve
[0]->engine
->name
, ktime_to_ns(times
[0]),
3235 prime
, div64_u64(ktime_to_ns(times
[1]), prime
));
3238 if (igt_flush_test(gt
->i915
))
3241 for (nc
= 0; nc
< nctx
; nc
++) {
3242 i915_request_put(request
[nc
]);
3243 intel_context_unpin(ve
[nc
]);
3244 intel_context_put(ve
[nc
]);
3249 static int live_virtual_engine(void *arg
)
3251 struct intel_gt
*gt
= arg
;
3252 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3253 struct intel_engine_cs
*engine
;
3254 enum intel_engine_id id
;
3255 unsigned int class, inst
;
3258 if (intel_uc_uses_guc_submission(>
->uc
))
3261 for_each_engine(engine
, gt
, id
) {
3262 err
= nop_virtual_engine(gt
, &engine
, 1, 1, 0);
3264 pr_err("Failed to wrap engine %s: err=%d\n",
3270 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3274 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3275 if (!gt
->engine_class
[class][inst
])
3278 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3283 for (n
= 1; n
<= nsibling
+ 1; n
++) {
3284 err
= nop_virtual_engine(gt
, siblings
, nsibling
,
3290 err
= nop_virtual_engine(gt
, siblings
, nsibling
, n
, CHAIN
);
3298 static int mask_virtual_engine(struct intel_gt
*gt
,
3299 struct intel_engine_cs
**siblings
,
3300 unsigned int nsibling
)
3302 struct i915_request
*request
[MAX_ENGINE_INSTANCE
+ 1];
3303 struct intel_context
*ve
;
3304 struct igt_live_test t
;
3309 * Check that by setting the execution mask on a request, we can
3310 * restrict it to our desired engine within the virtual engine.
3313 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
3319 err
= intel_context_pin(ve
);
3323 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
->engine
->name
);
3327 for (n
= 0; n
< nsibling
; n
++) {
3328 request
[n
] = i915_request_create(ve
);
3329 if (IS_ERR(request
[n
])) {
3330 err
= PTR_ERR(request
[n
]);
3335 /* Reverse order as it's more likely to be unnatural */
3336 request
[n
]->execution_mask
= siblings
[nsibling
- n
- 1]->mask
;
3338 i915_request_get(request
[n
]);
3339 i915_request_add(request
[n
]);
3342 for (n
= 0; n
< nsibling
; n
++) {
3343 if (i915_request_wait(request
[n
], 0, HZ
/ 10) < 0) {
3344 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3345 __func__
, ve
->engine
->name
,
3346 request
[n
]->fence
.context
,
3347 request
[n
]->fence
.seqno
);
3349 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3350 __func__
, ve
->engine
->name
,
3351 request
[n
]->fence
.context
,
3352 request
[n
]->fence
.seqno
);
3354 intel_gt_set_wedged(gt
);
3359 if (request
[n
]->engine
!= siblings
[nsibling
- n
- 1]) {
3360 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3361 request
[n
]->engine
->name
,
3362 siblings
[nsibling
- n
- 1]->name
);
3368 err
= igt_live_test_end(&t
);
3370 if (igt_flush_test(gt
->i915
))
3373 for (n
= 0; n
< nsibling
; n
++)
3374 i915_request_put(request
[n
]);
3377 intel_context_unpin(ve
);
3379 intel_context_put(ve
);
3384 static int live_virtual_mask(void *arg
)
3386 struct intel_gt
*gt
= arg
;
3387 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3388 unsigned int class, inst
;
3391 if (intel_uc_uses_guc_submission(>
->uc
))
3394 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3395 unsigned int nsibling
;
3398 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3399 if (!gt
->engine_class
[class][inst
])
3402 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3407 err
= mask_virtual_engine(gt
, siblings
, nsibling
);
3415 static int preserved_virtual_engine(struct intel_gt
*gt
,
3416 struct intel_engine_cs
**siblings
,
3417 unsigned int nsibling
)
3419 struct i915_request
*last
= NULL
;
3420 struct intel_context
*ve
;
3421 struct i915_vma
*scratch
;
3422 struct igt_live_test t
;
3427 scratch
= create_scratch(siblings
[0]->gt
);
3428 if (IS_ERR(scratch
))
3429 return PTR_ERR(scratch
);
3431 err
= i915_vma_sync(scratch
);
3435 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
3441 err
= intel_context_pin(ve
);
3445 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
->engine
->name
);
3449 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
3450 struct intel_engine_cs
*engine
= siblings
[n
% nsibling
];
3451 struct i915_request
*rq
;
3453 rq
= i915_request_create(ve
);
3459 i915_request_put(last
);
3460 last
= i915_request_get(rq
);
3462 cs
= intel_ring_begin(rq
, 8);
3464 i915_request_add(rq
);
3469 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
3470 *cs
++ = CS_GPR(engine
, n
);
3471 *cs
++ = i915_ggtt_offset(scratch
) + n
* sizeof(u32
);
3474 *cs
++ = MI_LOAD_REGISTER_IMM(1);
3475 *cs
++ = CS_GPR(engine
, (n
+ 1) % NUM_GPR_DW
);
3479 intel_ring_advance(rq
, cs
);
3481 /* Restrict this request to run on a particular engine */
3482 rq
->execution_mask
= engine
->mask
;
3483 i915_request_add(rq
);
3486 if (i915_request_wait(last
, 0, HZ
/ 5) < 0) {
3491 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
3497 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
3499 pr_err("Incorrect value[%d] found for GPR[%d]\n",
3506 i915_gem_object_unpin_map(scratch
->obj
);
3509 if (igt_live_test_end(&t
))
3511 i915_request_put(last
);
3513 intel_context_unpin(ve
);
3515 intel_context_put(ve
);
3517 i915_vma_unpin_and_release(&scratch
, 0);
3521 static int live_virtual_preserved(void *arg
)
3523 struct intel_gt
*gt
= arg
;
3524 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3525 unsigned int class, inst
;
3528 * Check that the context image retains non-privileged (user) registers
3529 * from one engine to the next. For this we check that the CS_GPR
3533 if (intel_uc_uses_guc_submission(>
->uc
))
3536 /* As we use CS_GPR we cannot run before they existed on all engines. */
3537 if (INTEL_GEN(gt
->i915
) < 9)
3540 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3544 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3545 if (!gt
->engine_class
[class][inst
])
3548 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3553 err
= preserved_virtual_engine(gt
, siblings
, nsibling
);
3561 static int bond_virtual_engine(struct intel_gt
*gt
,
3563 struct intel_engine_cs
**siblings
,
3564 unsigned int nsibling
,
3566 #define BOND_SCHEDULE BIT(0)
3568 struct intel_engine_cs
*master
;
3569 struct i915_request
*rq
[16];
3570 enum intel_engine_id id
;
3571 struct igt_spinner spin
;
3576 * A set of bonded requests is intended to be run concurrently
3577 * across a number of engines. We use one request per-engine
3578 * and a magic fence to schedule each of the bonded requests
3579 * at the same time. A consequence of our current scheduler is that
3580 * we only move requests to the HW ready queue when the request
3581 * becomes ready, that is when all of its prerequisite fences have
3582 * been signaled. As one of those fences is the master submit fence,
3583 * there is a delay on all secondary fences as the HW may be
3584 * currently busy. Equally, as all the requests are independent,
3585 * they may have other fences that delay individual request
3586 * submission to HW. Ergo, we do not guarantee that all requests are
3587 * immediately submitted to HW at the same time, just that if the
3588 * rules are abided by, they are ready at the same time as the
3589 * first is submitted. Userspace can embed semaphores in its batch
3590 * to ensure parallel execution of its phases as it requires.
3591 * Though naturally it gets requested that perhaps the scheduler should
3592 * take care of parallel execution, even across preemption events on
3593 * different HW. (The proper answer is of course "lalalala".)
3595 * With the submit-fence, we have identified three possible phases
3596 * of synchronisation depending on the master fence: queued (not
3597 * ready), executing, and signaled. The first two are quite simple
3598 * and checked below. However, the signaled master fence handling is
3599 * contentious. Currently we do not distinguish between a signaled
3600 * fence and an expired fence, as once signaled it does not convey
3601 * any information about the previous execution. It may even be freed
3602 * and hence checking later it may not exist at all. Ergo we currently
3603 * do not apply the bonding constraint for an already signaled fence,
3604 * as our expectation is that it should not constrain the secondaries
3605 * and is outside of the scope of the bonded request API (i.e. all
3606 * userspace requests are meant to be running in parallel). As
3607 * it imposes no constraint, and is effectively a no-op, we do not
3608 * check below as normal execution flows are checked extensively above.
3610 * XXX Is the degenerate handling of signaled submit fences the
3611 * expected behaviour for userpace?
3614 GEM_BUG_ON(nsibling
>= ARRAY_SIZE(rq
) - 1);
3616 if (igt_spinner_init(&spin
, gt
))
3620 rq
[0] = ERR_PTR(-ENOMEM
);
3621 for_each_engine(master
, gt
, id
) {
3622 struct i915_sw_fence fence
= {};
3623 struct intel_context
*ce
;
3625 if (master
->class == class)
3628 ce
= intel_context_create(master
);
3634 memset_p((void *)rq
, ERR_PTR(-EINVAL
), ARRAY_SIZE(rq
));
3636 rq
[0] = igt_spinner_create_request(&spin
, ce
, MI_NOOP
);
3637 intel_context_put(ce
);
3638 if (IS_ERR(rq
[0])) {
3639 err
= PTR_ERR(rq
[0]);
3642 i915_request_get(rq
[0]);
3644 if (flags
& BOND_SCHEDULE
) {
3645 onstack_fence_init(&fence
);
3646 err
= i915_sw_fence_await_sw_fence_gfp(&rq
[0]->submit
,
3651 i915_request_add(rq
[0]);
3655 if (!(flags
& BOND_SCHEDULE
) &&
3656 !igt_wait_for_spinner(&spin
, rq
[0])) {
3661 for (n
= 0; n
< nsibling
; n
++) {
3662 struct intel_context
*ve
;
3664 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
3667 onstack_fence_fini(&fence
);
3671 err
= intel_virtual_engine_attach_bond(ve
->engine
,
3675 intel_context_put(ve
);
3676 onstack_fence_fini(&fence
);
3680 err
= intel_context_pin(ve
);
3681 intel_context_put(ve
);
3683 onstack_fence_fini(&fence
);
3687 rq
[n
+ 1] = i915_request_create(ve
);
3688 intel_context_unpin(ve
);
3689 if (IS_ERR(rq
[n
+ 1])) {
3690 err
= PTR_ERR(rq
[n
+ 1]);
3691 onstack_fence_fini(&fence
);
3694 i915_request_get(rq
[n
+ 1]);
3696 err
= i915_request_await_execution(rq
[n
+ 1],
3698 ve
->engine
->bond_execute
);
3699 i915_request_add(rq
[n
+ 1]);
3701 onstack_fence_fini(&fence
);
3705 onstack_fence_fini(&fence
);
3706 intel_engine_flush_submission(master
);
3707 igt_spinner_end(&spin
);
3709 if (i915_request_wait(rq
[0], 0, HZ
/ 10) < 0) {
3710 pr_err("Master request did not execute (on %s)!\n",
3711 rq
[0]->engine
->name
);
3716 for (n
= 0; n
< nsibling
; n
++) {
3717 if (i915_request_wait(rq
[n
+ 1], 0,
3718 MAX_SCHEDULE_TIMEOUT
) < 0) {
3723 if (rq
[n
+ 1]->engine
!= siblings
[n
]) {
3724 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3726 rq
[n
+ 1]->engine
->name
,
3727 rq
[0]->engine
->name
);
3733 for (n
= 0; !IS_ERR(rq
[n
]); n
++)
3734 i915_request_put(rq
[n
]);
3735 rq
[0] = ERR_PTR(-ENOMEM
);
3739 for (n
= 0; !IS_ERR(rq
[n
]); n
++)
3740 i915_request_put(rq
[n
]);
3741 if (igt_flush_test(gt
->i915
))
3744 igt_spinner_fini(&spin
);
3748 static int live_virtual_bond(void *arg
)
3750 static const struct phase
{
3755 { "schedule", BOND_SCHEDULE
},
3758 struct intel_gt
*gt
= arg
;
3759 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3760 unsigned int class, inst
;
3763 if (intel_uc_uses_guc_submission(>
->uc
))
3766 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3767 const struct phase
*p
;
3771 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3772 if (!gt
->engine_class
[class][inst
])
3775 GEM_BUG_ON(nsibling
== ARRAY_SIZE(siblings
));
3776 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3781 for (p
= phases
; p
->name
; p
++) {
3782 err
= bond_virtual_engine(gt
,
3783 class, siblings
, nsibling
,
3786 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3787 __func__
, p
->name
, class, nsibling
, err
);
3796 static int reset_virtual_engine(struct intel_gt
*gt
,
3797 struct intel_engine_cs
**siblings
,
3798 unsigned int nsibling
)
3800 struct intel_engine_cs
*engine
;
3801 struct intel_context
*ve
;
3802 unsigned long *heartbeat
;
3803 struct igt_spinner spin
;
3804 struct i915_request
*rq
;
3809 * In order to support offline error capture for fast preempt reset,
3810 * we need to decouple the guilty request and ensure that it and its
3811 * descendents are not executed while the capture is in progress.
3814 heartbeat
= kmalloc_array(nsibling
, sizeof(*heartbeat
), GFP_KERNEL
);
3818 if (igt_spinner_init(&spin
, gt
)) {
3823 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
3829 for (n
= 0; n
< nsibling
; n
++)
3830 engine_heartbeat_disable(siblings
[n
], &heartbeat
[n
]);
3832 rq
= igt_spinner_create_request(&spin
, ve
, MI_ARB_CHECK
);
3837 i915_request_add(rq
);
3839 if (!igt_wait_for_spinner(&spin
, rq
)) {
3840 intel_gt_set_wedged(gt
);
3845 engine
= rq
->engine
;
3846 GEM_BUG_ON(engine
== ve
->engine
);
3848 /* Take ownership of the reset and tasklet */
3849 if (test_and_set_bit(I915_RESET_ENGINE
+ engine
->id
,
3850 >
->reset
.flags
)) {
3851 intel_gt_set_wedged(gt
);
3855 tasklet_disable(&engine
->execlists
.tasklet
);
3857 engine
->execlists
.tasklet
.func(engine
->execlists
.tasklet
.data
);
3858 GEM_BUG_ON(execlists_active(&engine
->execlists
) != rq
);
3860 /* Fake a preemption event; failed of course */
3861 spin_lock_irq(&engine
->active
.lock
);
3862 __unwind_incomplete_requests(engine
);
3863 spin_unlock_irq(&engine
->active
.lock
);
3864 GEM_BUG_ON(rq
->engine
!= ve
->engine
);
3866 /* Reset the engine while keeping our active request on hold */
3867 execlists_hold(engine
, rq
);
3868 GEM_BUG_ON(!i915_request_on_hold(rq
));
3870 intel_engine_reset(engine
, NULL
);
3871 GEM_BUG_ON(rq
->fence
.error
!= -EIO
);
3873 /* Release our grasp on the engine, letting CS flow again */
3874 tasklet_enable(&engine
->execlists
.tasklet
);
3875 clear_and_wake_up_bit(I915_RESET_ENGINE
+ engine
->id
, >
->reset
.flags
);
3877 /* Check that we do not resubmit the held request */
3878 i915_request_get(rq
);
3879 if (!i915_request_wait(rq
, 0, HZ
/ 5)) {
3880 pr_err("%s: on hold request completed!\n",
3882 intel_gt_set_wedged(gt
);
3886 GEM_BUG_ON(!i915_request_on_hold(rq
));
3888 /* But is resubmitted on release */
3889 execlists_unhold(engine
, rq
);
3890 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
3891 pr_err("%s: held request did not complete!\n",
3893 intel_gt_set_wedged(gt
);
3898 i915_request_put(rq
);
3900 for (n
= 0; n
< nsibling
; n
++)
3901 engine_heartbeat_enable(siblings
[n
], heartbeat
[n
]);
3903 intel_context_put(ve
);
3905 igt_spinner_fini(&spin
);
3911 static int live_virtual_reset(void *arg
)
3913 struct intel_gt
*gt
= arg
;
3914 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3915 unsigned int class, inst
;
3918 * Check that we handle a reset event within a virtual engine.
3919 * Only the physical engine is reset, but we have to check the flow
3920 * of the virtual requests around the reset, and make sure it is not
3924 if (intel_uc_uses_guc_submission(>
->uc
))
3927 if (!intel_has_reset_engine(gt
))
3930 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3934 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3935 if (!gt
->engine_class
[class][inst
])
3938 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3943 err
= reset_virtual_engine(gt
, siblings
, nsibling
);
3951 int intel_execlists_live_selftests(struct drm_i915_private
*i915
)
3953 static const struct i915_subtest tests
[] = {
3954 SUBTEST(live_sanitycheck
),
3955 SUBTEST(live_unlite_switch
),
3956 SUBTEST(live_unlite_preempt
),
3957 SUBTEST(live_pin_rewind
),
3958 SUBTEST(live_hold_reset
),
3959 SUBTEST(live_error_interrupt
),
3960 SUBTEST(live_timeslice_preempt
),
3961 SUBTEST(live_timeslice_rewind
),
3962 SUBTEST(live_timeslice_queue
),
3963 SUBTEST(live_busywait_preempt
),
3964 SUBTEST(live_preempt
),
3965 SUBTEST(live_late_preempt
),
3966 SUBTEST(live_nopreempt
),
3967 SUBTEST(live_preempt_cancel
),
3968 SUBTEST(live_suppress_self_preempt
),
3969 SUBTEST(live_suppress_wait_preempt
),
3970 SUBTEST(live_chain_preempt
),
3971 SUBTEST(live_preempt_gang
),
3972 SUBTEST(live_preempt_timeout
),
3973 SUBTEST(live_preempt_smoke
),
3974 SUBTEST(live_virtual_engine
),
3975 SUBTEST(live_virtual_mask
),
3976 SUBTEST(live_virtual_preserved
),
3977 SUBTEST(live_virtual_bond
),
3978 SUBTEST(live_virtual_reset
),
3981 if (!HAS_EXECLISTS(i915
))
3984 if (intel_gt_is_wedged(&i915
->gt
))
3987 return intel_gt_live_subtests(tests
, &i915
->gt
);
3990 static void hexdump(const void *buf
, size_t len
)
3992 const size_t rowsize
= 8 * sizeof(u32
);
3993 const void *prev
= NULL
;
3997 for (pos
= 0; pos
< len
; pos
+= rowsize
) {
4000 if (prev
&& !memcmp(prev
, buf
+ pos
, rowsize
)) {
4008 WARN_ON_ONCE(hex_dump_to_buffer(buf
+ pos
, len
- pos
,
4009 rowsize
, sizeof(u32
),
4011 false) >= sizeof(line
));
4012 pr_info("[%04zx] %s\n", pos
, line
);
4019 static int emit_semaphore_signal(struct intel_context
*ce
, void *slot
)
4022 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
4023 offset_in_page(slot
);
4024 struct i915_request
*rq
;
4027 rq
= intel_context_create_request(ce
);
4031 cs
= intel_ring_begin(rq
, 4);
4033 i915_request_add(rq
);
4037 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
4042 intel_ring_advance(rq
, cs
);
4044 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
4045 i915_request_add(rq
);
4049 static int context_flush(struct intel_context
*ce
, long timeout
)
4051 struct i915_request
*rq
;
4052 struct dma_fence
*fence
;
4055 rq
= intel_engine_create_kernel_request(ce
->engine
);
4059 fence
= i915_active_fence_get(&ce
->timeline
->last_request
);
4061 i915_request_await_dma_fence(rq
, fence
);
4062 dma_fence_put(fence
);
4065 rq
= i915_request_get(rq
);
4066 i915_request_add(rq
);
4067 if (i915_request_wait(rq
, 0, timeout
) < 0)
4069 i915_request_put(rq
);
4071 rmb(); /* We know the request is written, make sure all state is too! */
4075 static int live_lrc_layout(void *arg
)
4077 struct intel_gt
*gt
= arg
;
4078 struct intel_engine_cs
*engine
;
4079 enum intel_engine_id id
;
4084 * Check the registers offsets we use to create the initial reg state
4085 * match the layout saved by HW.
4088 lrc
= kmalloc(PAGE_SIZE
, GFP_KERNEL
);
4093 for_each_engine(engine
, gt
, id
) {
4097 if (!engine
->default_state
)
4100 hw
= i915_gem_object_pin_map(engine
->default_state
,
4106 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
4108 execlists_init_reg_state(memset(lrc
, POISON_INUSE
, PAGE_SIZE
),
4109 engine
->kernel_context
,
4111 engine
->kernel_context
->ring
,
4124 pr_debug("%s: skipped instruction %x at dword %d\n",
4125 engine
->name
, lri
, dw
);
4130 if ((lri
& GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4131 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4132 engine
->name
, dw
, lri
);
4137 if (lrc
[dw
] != lri
) {
4138 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4139 engine
->name
, dw
, lri
, lrc
[dw
]);
4149 if (hw
[dw
] != lrc
[dw
]) {
4150 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4151 engine
->name
, dw
, hw
[dw
], lrc
[dw
]);
4157 * Skip over the actual register value as we
4158 * expect that to differ.
4163 } while ((lrc
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
4166 pr_info("%s: HW register image:\n", engine
->name
);
4167 hexdump(hw
, PAGE_SIZE
);
4169 pr_info("%s: SW register image:\n", engine
->name
);
4170 hexdump(lrc
, PAGE_SIZE
);
4173 i915_gem_object_unpin_map(engine
->default_state
);
4182 static int find_offset(const u32
*lri
, u32 offset
)
4186 for (i
= 0; i
< PAGE_SIZE
/ sizeof(u32
); i
++)
4187 if (lri
[i
] == offset
)
4193 static int live_lrc_fixed(void *arg
)
4195 struct intel_gt
*gt
= arg
;
4196 struct intel_engine_cs
*engine
;
4197 enum intel_engine_id id
;
4201 * Check the assumed register offsets match the actual locations in
4202 * the context image.
4205 for_each_engine(engine
, gt
, id
) {
4212 i915_mmio_reg_offset(RING_START(engine
->mmio_base
)),
4217 i915_mmio_reg_offset(RING_CTL(engine
->mmio_base
)),
4222 i915_mmio_reg_offset(RING_HEAD(engine
->mmio_base
)),
4227 i915_mmio_reg_offset(RING_TAIL(engine
->mmio_base
)),
4232 i915_mmio_reg_offset(RING_MI_MODE(engine
->mmio_base
)),
4233 lrc_ring_mi_mode(engine
),
4237 i915_mmio_reg_offset(RING_BBSTATE(engine
->mmio_base
)),
4242 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine
->mmio_base
)),
4244 "RING_CTX_TIMESTAMP"
4250 if (!engine
->default_state
)
4253 hw
= i915_gem_object_pin_map(engine
->default_state
,
4259 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
4261 for (t
= tbl
; t
->name
; t
++) {
4262 int dw
= find_offset(hw
, t
->reg
);
4264 if (dw
!= t
->offset
) {
4265 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4275 i915_gem_object_unpin_map(engine
->default_state
);
4281 static int __live_lrc_state(struct intel_engine_cs
*engine
,
4282 struct i915_vma
*scratch
)
4284 struct intel_context
*ce
;
4285 struct i915_request
*rq
;
4291 u32 expected
[MAX_IDX
];
4296 ce
= intel_context_create(engine
);
4300 err
= intel_context_pin(ce
);
4304 rq
= i915_request_create(ce
);
4310 cs
= intel_ring_begin(rq
, 4 * MAX_IDX
);
4313 i915_request_add(rq
);
4317 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
4318 *cs
++ = i915_mmio_reg_offset(RING_START(engine
->mmio_base
));
4319 *cs
++ = i915_ggtt_offset(scratch
) + RING_START_IDX
* sizeof(u32
);
4322 expected
[RING_START_IDX
] = i915_ggtt_offset(ce
->ring
->vma
);
4324 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
4325 *cs
++ = i915_mmio_reg_offset(RING_TAIL(engine
->mmio_base
));
4326 *cs
++ = i915_ggtt_offset(scratch
) + RING_TAIL_IDX
* sizeof(u32
);
4329 i915_vma_lock(scratch
);
4330 err
= i915_request_await_object(rq
, scratch
->obj
, true);
4332 err
= i915_vma_move_to_active(scratch
, rq
, EXEC_OBJECT_WRITE
);
4333 i915_vma_unlock(scratch
);
4335 i915_request_get(rq
);
4336 i915_request_add(rq
);
4340 intel_engine_flush_submission(engine
);
4341 expected
[RING_TAIL_IDX
] = ce
->ring
->tail
;
4343 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
4348 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
4354 for (n
= 0; n
< MAX_IDX
; n
++) {
4355 if (cs
[n
] != expected
[n
]) {
4356 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4357 engine
->name
, n
, cs
[n
], expected
[n
]);
4363 i915_gem_object_unpin_map(scratch
->obj
);
4366 i915_request_put(rq
);
4368 intel_context_unpin(ce
);
4370 intel_context_put(ce
);
4374 static int live_lrc_state(void *arg
)
4376 struct intel_gt
*gt
= arg
;
4377 struct intel_engine_cs
*engine
;
4378 struct i915_vma
*scratch
;
4379 enum intel_engine_id id
;
4383 * Check the live register state matches what we expect for this
4387 scratch
= create_scratch(gt
);
4388 if (IS_ERR(scratch
))
4389 return PTR_ERR(scratch
);
4391 for_each_engine(engine
, gt
, id
) {
4392 err
= __live_lrc_state(engine
, scratch
);
4397 if (igt_flush_test(gt
->i915
))
4400 i915_vma_unpin_and_release(&scratch
, 0);
4404 static int gpr_make_dirty(struct intel_context
*ce
)
4406 struct i915_request
*rq
;
4410 rq
= intel_context_create_request(ce
);
4414 cs
= intel_ring_begin(rq
, 2 * NUM_GPR_DW
+ 2);
4416 i915_request_add(rq
);
4420 *cs
++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW
);
4421 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
4422 *cs
++ = CS_GPR(ce
->engine
, n
);
4423 *cs
++ = STACK_MAGIC
;
4427 intel_ring_advance(rq
, cs
);
4429 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
4430 i915_request_add(rq
);
4435 static struct i915_request
*
4436 __gpr_read(struct intel_context
*ce
, struct i915_vma
*scratch
, u32
*slot
)
4439 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
4440 offset_in_page(slot
);
4441 struct i915_request
*rq
;
4446 rq
= intel_context_create_request(ce
);
4450 cs
= intel_ring_begin(rq
, 6 + 4 * NUM_GPR_DW
);
4452 i915_request_add(rq
);
4453 return ERR_CAST(cs
);
4456 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
4459 *cs
++ = MI_SEMAPHORE_WAIT
|
4460 MI_SEMAPHORE_GLOBAL_GTT
|
4462 MI_SEMAPHORE_SAD_NEQ_SDD
;
4467 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
4468 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
4469 *cs
++ = CS_GPR(ce
->engine
, n
);
4470 *cs
++ = i915_ggtt_offset(scratch
) + n
* sizeof(u32
);
4474 i915_vma_lock(scratch
);
4475 err
= i915_request_await_object(rq
, scratch
->obj
, true);
4477 err
= i915_vma_move_to_active(scratch
, rq
, EXEC_OBJECT_WRITE
);
4478 i915_vma_unlock(scratch
);
4480 i915_request_get(rq
);
4481 i915_request_add(rq
);
4483 i915_request_put(rq
);
4490 static int __live_lrc_gpr(struct intel_engine_cs
*engine
,
4491 struct i915_vma
*scratch
,
4494 u32
*slot
= memset32(engine
->status_page
.addr
+ 1000, 0, 4);
4495 struct intel_context
*ce
;
4496 struct i915_request
*rq
;
4501 if (INTEL_GEN(engine
->i915
) < 9 && engine
->class != RENDER_CLASS
)
4502 return 0; /* GPR only on rcs0 for gen8 */
4504 err
= gpr_make_dirty(engine
->kernel_context
);
4508 ce
= intel_context_create(engine
);
4512 rq
= __gpr_read(ce
, scratch
, slot
);
4518 err
= wait_for_submit(engine
, rq
, HZ
/ 2);
4523 err
= gpr_make_dirty(engine
->kernel_context
);
4527 err
= emit_semaphore_signal(engine
->kernel_context
, slot
);
4535 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
4540 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
4546 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
4548 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4550 n
/ 2, n
& 1 ? "udw" : "ldw",
4557 i915_gem_object_unpin_map(scratch
->obj
);
4560 memset32(&slot
[0], -1, 4);
4562 i915_request_put(rq
);
4564 intel_context_put(ce
);
4568 static int live_lrc_gpr(void *arg
)
4570 struct intel_gt
*gt
= arg
;
4571 struct intel_engine_cs
*engine
;
4572 struct i915_vma
*scratch
;
4573 enum intel_engine_id id
;
4577 * Check that GPR registers are cleared in new contexts as we need
4578 * to avoid leaking any information from previous contexts.
4581 scratch
= create_scratch(gt
);
4582 if (IS_ERR(scratch
))
4583 return PTR_ERR(scratch
);
4585 for_each_engine(engine
, gt
, id
) {
4586 unsigned long heartbeat
;
4588 engine_heartbeat_disable(engine
, &heartbeat
);
4590 err
= __live_lrc_gpr(engine
, scratch
, false);
4594 err
= __live_lrc_gpr(engine
, scratch
, true);
4599 engine_heartbeat_enable(engine
, heartbeat
);
4600 if (igt_flush_test(gt
->i915
))
4606 i915_vma_unpin_and_release(&scratch
, 0);
4610 static struct i915_request
*
4611 create_timestamp(struct intel_context
*ce
, void *slot
, int idx
)
4614 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
4615 offset_in_page(slot
);
4616 struct i915_request
*rq
;
4620 rq
= intel_context_create_request(ce
);
4624 cs
= intel_ring_begin(rq
, 10);
4630 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
4633 *cs
++ = MI_SEMAPHORE_WAIT
|
4634 MI_SEMAPHORE_GLOBAL_GTT
|
4636 MI_SEMAPHORE_SAD_NEQ_SDD
;
4641 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
4642 *cs
++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq
->engine
->mmio_base
));
4643 *cs
++ = offset
+ idx
* sizeof(u32
);
4646 intel_ring_advance(rq
, cs
);
4648 rq
->sched
.attr
.priority
= I915_PRIORITY_MASK
;
4651 i915_request_get(rq
);
4652 i915_request_add(rq
);
4654 i915_request_put(rq
);
4655 return ERR_PTR(err
);
4661 struct lrc_timestamp
{
4662 struct intel_engine_cs
*engine
;
4663 struct intel_context
*ce
[2];
4667 static bool timestamp_advanced(u32 start
, u32 end
)
4669 return (s32
)(end
- start
) > 0;
4672 static int __lrc_timestamp(const struct lrc_timestamp
*arg
, bool preempt
)
4674 u32
*slot
= memset32(arg
->engine
->status_page
.addr
+ 1000, 0, 4);
4675 struct i915_request
*rq
;
4679 arg
->ce
[0]->lrc_reg_state
[CTX_TIMESTAMP
] = arg
->poison
;
4680 rq
= create_timestamp(arg
->ce
[0], slot
, 1);
4684 err
= wait_for_submit(rq
->engine
, rq
, HZ
/ 2);
4689 arg
->ce
[1]->lrc_reg_state
[CTX_TIMESTAMP
] = 0xdeadbeef;
4690 err
= emit_semaphore_signal(arg
->ce
[1], slot
);
4698 /* And wait for switch to kernel (to save our context to memory) */
4699 err
= context_flush(arg
->ce
[0], HZ
/ 2);
4703 if (!timestamp_advanced(arg
->poison
, slot
[1])) {
4704 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
4705 arg
->engine
->name
, preempt
? "preempt" : "simple",
4706 arg
->poison
, slot
[1]);
4710 timestamp
= READ_ONCE(arg
->ce
[0]->lrc_reg_state
[CTX_TIMESTAMP
]);
4711 if (!timestamp_advanced(slot
[1], timestamp
)) {
4712 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
4713 arg
->engine
->name
, preempt
? "preempt" : "simple",
4714 slot
[1], timestamp
);
4719 memset32(slot
, -1, 4);
4720 i915_request_put(rq
);
4724 static int live_lrc_timestamp(void *arg
)
4726 struct lrc_timestamp data
= {};
4727 struct intel_gt
*gt
= arg
;
4728 enum intel_engine_id id
;
4729 const u32 poison
[] = {
4737 * We want to verify that the timestamp is saved and restore across
4738 * context switches and is monotonic.
4740 * So we do this with a little bit of LRC poisoning to check various
4741 * boundary conditions, and see what happens if we preempt the context
4742 * with a second request (carrying more poison into the timestamp).
4745 for_each_engine(data
.engine
, gt
, id
) {
4746 unsigned long heartbeat
;
4749 engine_heartbeat_disable(data
.engine
, &heartbeat
);
4751 for (i
= 0; i
< ARRAY_SIZE(data
.ce
); i
++) {
4752 struct intel_context
*tmp
;
4754 tmp
= intel_context_create(data
.engine
);
4760 err
= intel_context_pin(tmp
);
4762 intel_context_put(tmp
);
4769 for (i
= 0; i
< ARRAY_SIZE(poison
); i
++) {
4770 data
.poison
= poison
[i
];
4772 err
= __lrc_timestamp(&data
, false);
4776 err
= __lrc_timestamp(&data
, true);
4782 engine_heartbeat_enable(data
.engine
, heartbeat
);
4783 for (i
= 0; i
< ARRAY_SIZE(data
.ce
); i
++) {
4787 intel_context_unpin(data
.ce
[i
]);
4788 intel_context_put(data
.ce
[i
]);
4791 if (igt_flush_test(gt
->i915
))
4800 static struct i915_vma
*
4801 create_user_vma(struct i915_address_space
*vm
, unsigned long size
)
4803 struct drm_i915_gem_object
*obj
;
4804 struct i915_vma
*vma
;
4807 obj
= i915_gem_object_create_internal(vm
->i915
, size
);
4809 return ERR_CAST(obj
);
4811 vma
= i915_vma_instance(obj
, vm
, NULL
);
4813 i915_gem_object_put(obj
);
4817 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
4819 i915_gem_object_put(obj
);
4820 return ERR_PTR(err
);
4826 static struct i915_vma
*
4827 store_context(struct intel_context
*ce
, struct i915_vma
*scratch
)
4829 struct i915_vma
*batch
;
4830 u32 dw
, x
, *cs
, *hw
;
4832 batch
= create_user_vma(ce
->vm
, SZ_64K
);
4836 cs
= i915_gem_object_pin_map(batch
->obj
, I915_MAP_WC
);
4838 i915_vma_put(batch
);
4839 return ERR_CAST(cs
);
4844 hw
= ce
->engine
->pinned_default_state
;
4845 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
4847 u32 len
= hw
[dw
] & 0x7f;
4854 if ((hw
[dw
] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4860 len
= (len
+ 1) / 2;
4862 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
;
4864 *cs
++ = lower_32_bits(scratch
->node
.start
+ x
);
4865 *cs
++ = upper_32_bits(scratch
->node
.start
+ x
);
4870 } while (dw
< PAGE_SIZE
/ sizeof(u32
) &&
4871 (hw
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
4873 *cs
++ = MI_BATCH_BUFFER_END
;
4875 i915_gem_object_flush_map(batch
->obj
);
4876 i915_gem_object_unpin_map(batch
->obj
);
4881 static int move_to_active(struct i915_request
*rq
,
4882 struct i915_vma
*vma
,
4888 err
= i915_request_await_object(rq
, vma
->obj
, flags
);
4890 err
= i915_vma_move_to_active(vma
, rq
, flags
);
4891 i915_vma_unlock(vma
);
4896 static struct i915_request
*
4897 record_registers(struct intel_context
*ce
,
4898 struct i915_vma
*before
,
4899 struct i915_vma
*after
,
4902 struct i915_vma
*b_before
, *b_after
;
4903 struct i915_request
*rq
;
4907 b_before
= store_context(ce
, before
);
4908 if (IS_ERR(b_before
))
4909 return ERR_CAST(b_before
);
4911 b_after
= store_context(ce
, after
);
4912 if (IS_ERR(b_after
)) {
4913 rq
= ERR_CAST(b_after
);
4917 rq
= intel_context_create_request(ce
);
4921 err
= move_to_active(rq
, before
, EXEC_OBJECT_WRITE
);
4925 err
= move_to_active(rq
, b_before
, 0);
4929 err
= move_to_active(rq
, after
, EXEC_OBJECT_WRITE
);
4933 err
= move_to_active(rq
, b_after
, 0);
4937 cs
= intel_ring_begin(rq
, 14);
4943 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
4944 *cs
++ = MI_BATCH_BUFFER_START_GEN8
| BIT(8);
4945 *cs
++ = lower_32_bits(b_before
->node
.start
);
4946 *cs
++ = upper_32_bits(b_before
->node
.start
);
4948 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
4949 *cs
++ = MI_SEMAPHORE_WAIT
|
4950 MI_SEMAPHORE_GLOBAL_GTT
|
4952 MI_SEMAPHORE_SAD_NEQ_SDD
;
4954 *cs
++ = i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
4955 offset_in_page(sema
);
4959 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
4960 *cs
++ = MI_BATCH_BUFFER_START_GEN8
| BIT(8);
4961 *cs
++ = lower_32_bits(b_after
->node
.start
);
4962 *cs
++ = upper_32_bits(b_after
->node
.start
);
4964 intel_ring_advance(rq
, cs
);
4966 WRITE_ONCE(*sema
, 0);
4967 i915_request_get(rq
);
4968 i915_request_add(rq
);
4970 i915_vma_put(b_after
);
4972 i915_vma_put(b_before
);
4976 i915_request_add(rq
);
4981 static struct i915_vma
*load_context(struct intel_context
*ce
, u32 poison
)
4983 struct i915_vma
*batch
;
4986 batch
= create_user_vma(ce
->vm
, SZ_64K
);
4990 cs
= i915_gem_object_pin_map(batch
->obj
, I915_MAP_WC
);
4992 i915_vma_put(batch
);
4993 return ERR_CAST(cs
);
4997 hw
= ce
->engine
->pinned_default_state
;
4998 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
5000 u32 len
= hw
[dw
] & 0x7f;
5007 if ((hw
[dw
] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5013 len
= (len
+ 1) / 2;
5014 *cs
++ = MI_LOAD_REGISTER_IMM(len
);
5020 } while (dw
< PAGE_SIZE
/ sizeof(u32
) &&
5021 (hw
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
5023 *cs
++ = MI_BATCH_BUFFER_END
;
5025 i915_gem_object_flush_map(batch
->obj
);
5026 i915_gem_object_unpin_map(batch
->obj
);
5031 static int poison_registers(struct intel_context
*ce
, u32 poison
, u32
*sema
)
5033 struct i915_request
*rq
;
5034 struct i915_vma
*batch
;
5038 batch
= load_context(ce
, poison
);
5040 return PTR_ERR(batch
);
5042 rq
= intel_context_create_request(ce
);
5048 err
= move_to_active(rq
, batch
, 0);
5052 cs
= intel_ring_begin(rq
, 8);
5058 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
5059 *cs
++ = MI_BATCH_BUFFER_START_GEN8
| BIT(8);
5060 *cs
++ = lower_32_bits(batch
->node
.start
);
5061 *cs
++ = upper_32_bits(batch
->node
.start
);
5063 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
5064 *cs
++ = i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
5065 offset_in_page(sema
);
5069 intel_ring_advance(rq
, cs
);
5071 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
5073 i915_request_add(rq
);
5075 i915_vma_put(batch
);
5079 static bool is_moving(u32 a
, u32 b
)
5084 static int compare_isolation(struct intel_engine_cs
*engine
,
5085 struct i915_vma
*ref
[2],
5086 struct i915_vma
*result
[2],
5087 struct intel_context
*ce
,
5090 u32 x
, dw
, *hw
, *lrc
;
5094 A
[0] = i915_gem_object_pin_map(ref
[0]->obj
, I915_MAP_WC
);
5096 return PTR_ERR(A
[0]);
5098 A
[1] = i915_gem_object_pin_map(ref
[1]->obj
, I915_MAP_WC
);
5100 err
= PTR_ERR(A
[1]);
5104 B
[0] = i915_gem_object_pin_map(result
[0]->obj
, I915_MAP_WC
);
5106 err
= PTR_ERR(B
[0]);
5110 B
[1] = i915_gem_object_pin_map(result
[1]->obj
, I915_MAP_WC
);
5112 err
= PTR_ERR(B
[1]);
5116 lrc
= i915_gem_object_pin_map(ce
->state
->obj
,
5117 i915_coherent_map_type(engine
->i915
));
5122 lrc
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
5126 hw
= engine
->pinned_default_state
;
5127 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
5129 u32 len
= hw
[dw
] & 0x7f;
5136 if ((hw
[dw
] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5142 len
= (len
+ 1) / 2;
5144 if (!is_moving(A
[0][x
], A
[1][x
]) &&
5145 (A
[0][x
] != B
[0][x
] || A
[1][x
] != B
[1][x
])) {
5146 switch (hw
[dw
] & 4095) {
5147 case 0x30: /* RING_HEAD */
5148 case 0x34: /* RING_TAIL */
5152 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5155 A
[0][x
], B
[0][x
], B
[1][x
],
5156 poison
, lrc
[dw
+ 1]);
5164 } while (dw
< PAGE_SIZE
/ sizeof(u32
) &&
5165 (hw
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
5167 i915_gem_object_unpin_map(ce
->state
->obj
);
5169 i915_gem_object_unpin_map(result
[1]->obj
);
5171 i915_gem_object_unpin_map(result
[0]->obj
);
5173 i915_gem_object_unpin_map(ref
[1]->obj
);
5175 i915_gem_object_unpin_map(ref
[0]->obj
);
5179 static int __lrc_isolation(struct intel_engine_cs
*engine
, u32 poison
)
5181 u32
*sema
= memset32(engine
->status_page
.addr
+ 1000, 0, 1);
5182 struct i915_vma
*ref
[2], *result
[2];
5183 struct intel_context
*A
, *B
;
5184 struct i915_request
*rq
;
5187 A
= intel_context_create(engine
);
5191 B
= intel_context_create(engine
);
5197 ref
[0] = create_user_vma(A
->vm
, SZ_64K
);
5198 if (IS_ERR(ref
[0])) {
5199 err
= PTR_ERR(ref
[0]);
5203 ref
[1] = create_user_vma(A
->vm
, SZ_64K
);
5204 if (IS_ERR(ref
[1])) {
5205 err
= PTR_ERR(ref
[1]);
5209 rq
= record_registers(A
, ref
[0], ref
[1], sema
);
5215 WRITE_ONCE(*sema
, 1);
5218 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0) {
5219 i915_request_put(rq
);
5223 i915_request_put(rq
);
5225 result
[0] = create_user_vma(A
->vm
, SZ_64K
);
5226 if (IS_ERR(result
[0])) {
5227 err
= PTR_ERR(result
[0]);
5231 result
[1] = create_user_vma(A
->vm
, SZ_64K
);
5232 if (IS_ERR(result
[1])) {
5233 err
= PTR_ERR(result
[1]);
5237 rq
= record_registers(A
, result
[0], result
[1], sema
);
5243 err
= poison_registers(B
, poison
, sema
);
5245 WRITE_ONCE(*sema
, -1);
5246 i915_request_put(rq
);
5250 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0) {
5251 i915_request_put(rq
);
5255 i915_request_put(rq
);
5257 err
= compare_isolation(engine
, ref
, result
, A
, poison
);
5260 i915_vma_put(result
[1]);
5262 i915_vma_put(result
[0]);
5264 i915_vma_put(ref
[1]);
5266 i915_vma_put(ref
[0]);
5268 intel_context_put(B
);
5270 intel_context_put(A
);
5274 static bool skip_isolation(const struct intel_engine_cs
*engine
)
5276 if (engine
->class == COPY_ENGINE_CLASS
&& INTEL_GEN(engine
->i915
) == 9)
5279 if (engine
->class == RENDER_CLASS
&& INTEL_GEN(engine
->i915
) == 11)
5285 static int live_lrc_isolation(void *arg
)
5287 struct intel_gt
*gt
= arg
;
5288 struct intel_engine_cs
*engine
;
5289 enum intel_engine_id id
;
5290 const u32 poison
[] = {
5299 * Our goal is try and verify that per-context state cannot be
5300 * tampered with by another non-privileged client.
5302 * We take the list of context registers from the LRI in the default
5303 * context image and attempt to modify that list from a remote context.
5306 for_each_engine(engine
, gt
, id
) {
5310 /* Just don't even ask */
5311 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN
) &&
5312 skip_isolation(engine
))
5315 intel_engine_pm_get(engine
);
5316 if (engine
->pinned_default_state
) {
5317 for (i
= 0; i
< ARRAY_SIZE(poison
); i
++) {
5318 err
= __lrc_isolation(engine
, poison
[i
]);
5322 err
= __lrc_isolation(engine
, ~poison
[i
]);
5327 intel_engine_pm_put(engine
);
5328 if (igt_flush_test(gt
->i915
))
5337 static void garbage_reset(struct intel_engine_cs
*engine
,
5338 struct i915_request
*rq
)
5340 const unsigned int bit
= I915_RESET_ENGINE
+ engine
->id
;
5341 unsigned long *lock
= &engine
->gt
->reset
.flags
;
5343 if (test_and_set_bit(bit
, lock
))
5346 tasklet_disable(&engine
->execlists
.tasklet
);
5348 if (!rq
->fence
.error
)
5349 intel_engine_reset(engine
, NULL
);
5351 tasklet_enable(&engine
->execlists
.tasklet
);
5352 clear_and_wake_up_bit(bit
, lock
);
5355 static struct i915_request
*garbage(struct intel_context
*ce
,
5356 struct rnd_state
*prng
)
5358 struct i915_request
*rq
;
5361 err
= intel_context_pin(ce
);
5363 return ERR_PTR(err
);
5365 prandom_bytes_state(prng
,
5367 ce
->engine
->context_size
-
5368 LRC_STATE_PN
* PAGE_SIZE
);
5370 rq
= intel_context_create_request(ce
);
5376 i915_request_get(rq
);
5377 i915_request_add(rq
);
5381 intel_context_unpin(ce
);
5382 return ERR_PTR(err
);
5385 static int __lrc_garbage(struct intel_engine_cs
*engine
, struct rnd_state
*prng
)
5387 struct intel_context
*ce
;
5388 struct i915_request
*hang
;
5391 ce
= intel_context_create(engine
);
5395 hang
= garbage(ce
, prng
);
5397 err
= PTR_ERR(hang
);
5401 if (wait_for_submit(engine
, hang
, HZ
/ 2)) {
5402 i915_request_put(hang
);
5407 intel_context_set_banned(ce
);
5408 garbage_reset(engine
, hang
);
5410 intel_engine_flush_submission(engine
);
5411 if (!hang
->fence
.error
) {
5412 i915_request_put(hang
);
5413 pr_err("%s: corrupted context was not reset\n",
5419 if (i915_request_wait(hang
, 0, HZ
/ 2) < 0) {
5420 pr_err("%s: corrupted context did not recover\n",
5422 i915_request_put(hang
);
5426 i915_request_put(hang
);
5429 intel_context_put(ce
);
5433 static int live_lrc_garbage(void *arg
)
5435 struct intel_gt
*gt
= arg
;
5436 struct intel_engine_cs
*engine
;
5437 enum intel_engine_id id
;
5440 * Verify that we can recover if one context state is completely
5444 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN
))
5447 for_each_engine(engine
, gt
, id
) {
5448 I915_RND_STATE(prng
);
5451 if (!intel_has_reset_engine(engine
->gt
))
5454 intel_engine_pm_get(engine
);
5455 for (i
= 0; i
< 3; i
++) {
5456 err
= __lrc_garbage(engine
, &prng
);
5460 intel_engine_pm_put(engine
);
5462 if (igt_flush_test(gt
->i915
))
5471 static int __live_pphwsp_runtime(struct intel_engine_cs
*engine
)
5473 struct intel_context
*ce
;
5474 struct i915_request
*rq
;
5475 IGT_TIMEOUT(end_time
);
5478 ce
= intel_context_create(engine
);
5482 ce
->runtime
.num_underflow
= 0;
5483 ce
->runtime
.max_underflow
= 0;
5486 unsigned int loop
= 1024;
5489 rq
= intel_context_create_request(ce
);
5496 i915_request_get(rq
);
5498 i915_request_add(rq
);
5501 if (__igt_timeout(end_time
, NULL
))
5504 i915_request_put(rq
);
5507 err
= i915_request_wait(rq
, 0, HZ
/ 5);
5509 pr_err("%s: request not completed!\n", engine
->name
);
5513 igt_flush_test(engine
->i915
);
5515 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
5517 intel_context_get_total_runtime_ns(ce
),
5518 intel_context_get_avg_runtime_ns(ce
));
5521 if (ce
->runtime
.num_underflow
) {
5522 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
5524 ce
->runtime
.num_underflow
,
5525 ce
->runtime
.max_underflow
);
5531 i915_request_put(rq
);
5533 intel_context_put(ce
);
5537 static int live_pphwsp_runtime(void *arg
)
5539 struct intel_gt
*gt
= arg
;
5540 struct intel_engine_cs
*engine
;
5541 enum intel_engine_id id
;
5545 * Check that cumulative context runtime as stored in the pphwsp[16]
5549 for_each_engine(engine
, gt
, id
) {
5550 err
= __live_pphwsp_runtime(engine
);
5555 if (igt_flush_test(gt
->i915
))
5561 int intel_lrc_live_selftests(struct drm_i915_private
*i915
)
5563 static const struct i915_subtest tests
[] = {
5564 SUBTEST(live_lrc_layout
),
5565 SUBTEST(live_lrc_fixed
),
5566 SUBTEST(live_lrc_state
),
5567 SUBTEST(live_lrc_gpr
),
5568 SUBTEST(live_lrc_isolation
),
5569 SUBTEST(live_lrc_timestamp
),
5570 SUBTEST(live_lrc_garbage
),
5571 SUBTEST(live_pphwsp_runtime
),
5574 if (!HAS_LOGICAL_RING_CONTEXTS(i915
))
5577 return intel_gt_live_subtests(tests
, &i915
->gt
);