]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: reduce queue timeout to 2 seconds v2
authorChristian König <christian.koenig@amd.com>
Thu, 25 Sep 2025 10:09:56 +0000 (12:09 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 13 Oct 2025 18:14:33 +0000 (14:14 -0400)
There has been multiple complains that 10 seconds are usually to long.

The original requirement for longer timeout came from compute tests on
AMDVLK, since that is no longer a topic reduce the timeout back to 2
seconds for all queues.

While at it also remove any special handling for compute queues under
SRIOV or pass through.

v2: fix checkpatch warning.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 3d032c4e2dce1df277c6fa124e9a6ab328d3ba12..6d228012187c3c7a601132b55062101f95f78a59 100644 (file)
@@ -4285,58 +4285,53 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
        long timeout;
        int ret = 0;
 
-       /*
-        * By default timeout for jobs is 10 sec
-        */
-       adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000);
-       adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+       /* By default timeout for all queues is 2 sec */
+       adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+               adev->video_timeout = msecs_to_jiffies(2000);
 
-       if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
-               while ((timeout_setting = strsep(&input, ",")) &&
-                               strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
-                       ret = kstrtol(timeout_setting, 0, &timeout);
-                       if (ret)
-                               return ret;
+       if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
+               return 0;
 
-                       if (timeout == 0) {
-                               index++;
-                               continue;
-                       } else if (timeout < 0) {
-                               timeout = MAX_SCHEDULE_TIMEOUT;
-                               dev_warn(adev->dev, "lockup timeout disabled");
-                               add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
-                       } else {
-                               timeout = msecs_to_jiffies(timeout);
-                       }
+       while ((timeout_setting = strsep(&input, ",")) &&
+              strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+               ret = kstrtol(timeout_setting, 0, &timeout);
+               if (ret)
+                       return ret;
 
-                       switch (index++) {
-                       case 0:
-                               adev->gfx_timeout = timeout;
-                               break;
-                       case 1:
-                               adev->compute_timeout = timeout;
-                               break;
-                       case 2:
-                               adev->sdma_timeout = timeout;
-                               break;
-                       case 3:
-                               adev->video_timeout = timeout;
-                               break;
-                       default:
-                               break;
-                       }
+               if (timeout == 0) {
+                       index++;
+                       continue;
+               } else if (timeout < 0) {
+                       timeout = MAX_SCHEDULE_TIMEOUT;
+                       dev_warn(adev->dev, "lockup timeout disabled");
+                       add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
+               } else {
+                       timeout = msecs_to_jiffies(timeout);
                }
-               /*
-                * There is only one value specified and
-                * it should apply to all non-compute jobs.
-                */
-               if (index == 1) {
-                       adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
-                       if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
-                               adev->compute_timeout = adev->gfx_timeout;
+
+               switch (index++) {
+               case 0:
+                       adev->gfx_timeout = timeout;
+                       break;
+               case 1:
+                       adev->compute_timeout = timeout;
+                       break;
+               case 2:
+                       adev->sdma_timeout = timeout;
+                       break;
+               case 3:
+                       adev->video_timeout = timeout;
+                       break;
+               default:
+                       break;
                }
        }
 
+       /* When only one value specified apply it to all queues. */
+       if (index == 1)
+               adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+                       adev->video_timeout = timeout;
+
        return ret;
 }
 
index 61268aa82df4d6ce3ea9118575c7935b48595d7e..7eac510c2d6ff08e159e2316101e82bb35dc3cb1 100644 (file)
@@ -354,22 +354,16 @@ module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint
  * DOC: lockup_timeout (string)
  * Set GPU scheduler timeout value in ms.
  *
- * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
- * multiple values specified. 0 and negative values are invalidated. They will be adjusted
- * to the default timeout.
+ * The format can be [single value] for setting all timeouts at once or
+ * [GFX,Compute,SDMA,Video] to set individual timeouts.
+ * Negative values mean infinity.
  *
- * - With one value specified, the setting will apply to all non-compute jobs.
- * - With multiple values specified, the first one will be for GFX.
- *   The second one is for Compute. The third and fourth ones are
- *   for SDMA and Video.
- *
- * By default(with no lockup_timeout settings), the timeout for all jobs is 10000.
+ * By default(with no lockup_timeout settings), the timeout for all queues is 2000.
  */
 MODULE_PARM_DESC(lockup_timeout,
-                "GPU lockup timeout in ms (default: 10000 for all jobs. "
-                "0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
-                "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
-module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
+                "GPU lockup timeout in ms (default: 2000. 0: keep default value. negative: infinity timeout), format: [single value for all] or [GFX,Compute,SDMA,Video].");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout,
+                   sizeof(amdgpu_lockup_timeout), 0444);
 
 /**
  * DOC: dpm (int)