]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/i915/selftests: Use preemption timeout on cleanup
authorJanusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Fri, 13 Dec 2024 18:59:48 +0000 (19:59 +0100)
committerAndi Shyti <andi.shyti@kernel.org>
Fri, 20 Dec 2024 22:34:43 +0000 (23:34 +0100)
Many selftests call igt_flush_test() on cleanup.  With default preemption
timeout of compute engines raised to 7.5 seconds, hardcoded flush timeout
of 3 seconds is too short.  That results in GPU forcibly wedged and kernel
taineted, then IGT abort triggered.  CI BAT runs loose a part of their
expected coverage.

Calculate the flush timeout based on the longest preemption timeout
currently configured for any engine.  That way, selftest can still report
detected issues as non-critical, and the GPU gets a chance to recover from
preemptible hangs and prepare for fluent execution of next test cases.

Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12061
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20241213190122.513709-2-janusz.krzysztofik@linux.intel.com
drivers/gpu/drm/i915/selftests/igt_flush_test.c

index 29110abb4fe0587a863bd4a8dadb565a29210182..c383d31d46b0f89d7c0e61b456350bb590ec2b0f 100644 (file)
@@ -19,12 +19,22 @@ int igt_flush_test(struct drm_i915_private *i915)
        int ret = 0;
 
        for_each_gt(gt, i915, i) {
+               struct intel_engine_cs *engine;
+               unsigned long timeout_ms = 0;
+               unsigned int id;
+
                if (intel_gt_is_wedged(gt))
                        ret = -EIO;
 
+               for_each_engine(engine, gt, id) {
+                       if (engine->props.preempt_timeout_ms > timeout_ms)
+                               timeout_ms = engine->props.preempt_timeout_ms;
+               }
+
                cond_resched();
 
-               if (intel_gt_wait_for_idle(gt, HZ * 3) == -ETIME) {
+               /* 2x longest preempt timeout, experimentally determined */
+               if (intel_gt_wait_for_idle(gt, HZ * timeout_ms / 500) == -ETIME) {
                        pr_err("%pS timed out, cancelling all further testing.\n",
                               __builtin_return_address(0));