Among the scheduler's statuses, the only one that indicates an error is
DRM_GPU_SCHED_STAT_ENODEV. Any status other than DRM_GPU_SCHED_STAT_ENODEV
signifies that the operation succeeded and the GPU is in a nominal state.
However, to provide more information about the GPU's status, it is needed
to convey more information than just "OK".
Therefore, rename DRM_GPU_SCHED_STAT_NOMINAL to
DRM_GPU_SCHED_STAT_RESET, which better communicates the meaning of this
status. The status DRM_GPU_SCHED_STAT_RESET indicates that the GPU has
hung, but it has been successfully reset and is now in a nominal state
again.
Reviewed-by: Philipp Stanner <phasta@kernel.org>
Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-1-5c5ba4f55039@igalia.com
Signed-off-by: Maíra Canal <mcanal@igalia.com>
aie2_hwctx_restart(xdna, hwctx);
mutex_unlock(&xdna->dev_lock);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static const struct drm_sched_backend_ops sched_ops = {
exit:
amdgpu_vm_put_task_info(ti);
drm_dev_exit(idx);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
drm_sched_resubmit_jobs(&gpu->sched);
drm_sched_start(&gpu->sched, 0);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
out_no_timeout:
list_add(&sched_job->list, &sched_job->sched->pending_list);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static void etnaviv_sched_free_job(struct drm_sched_job *sched_job)
* the scheduler, and re-assign parent fences in the middle.
*
* Return:
- * * DRM_GPU_SCHED_STAT_NOMINAL.
+ * * DRM_GPU_SCHED_STAT_RESET.
*/
static enum drm_gpu_sched_stat
pvr_queue_timedout_job(struct drm_sched_job *s_job)
drm_sched_start(sched, 0);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
/**
*/
if (dma_fence_is_signaled(task->fence)) {
DRM_WARN("%s spurious timeout\n", lima_ip_name(ip));
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
/*
if (dma_fence_is_signaled(task->fence)) {
DRM_WARN("%s unexpectedly high interrupt latency\n", lima_ip_name(ip));
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
/*
drm_sched_resubmit_jobs(&pipe->base);
drm_sched_start(&pipe->base, 0);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static void lima_sched_free_job(struct drm_sched_job *job)
NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
chan->chid);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static const struct nouveau_job_ops nouveau_exec_job_ops = {
{
struct drm_gpu_scheduler *sched = sched_job->sched;
struct nouveau_job *job = to_nouveau_job(sched_job);
- enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL;
+ enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_RESET;
drm_sched_stop(sched, sched_job);
* spurious. Bail out.
*/
if (dma_fence_is_signaled(job->done_fence))
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
/*
* Panfrost IRQ handler may take a long time to process an interrupt
if (dma_fence_is_signaled(job->done_fence)) {
dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n");
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
atomic_set(&pfdev->reset.pending, 1);
panfrost_reset(pfdev, sched_job);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static void panfrost_reset_work(struct work_struct *work)
panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job)
{
WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!");
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static const struct drm_sched_backend_ops panthor_vm_bind_ops = {
queue_start(queue);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static void queue_free_job(struct drm_sched_job *sched_job)
{
struct drm_gpu_scheduler *sched;
struct drm_sched_job *job;
- enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
+ enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_RESET;
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
drm_sched_job_cleanup(sched_job);
/* Mock job itself is freed by the kunit framework. */
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static void mock_sched_free_job(struct drm_sched_job *sched_job)
mutex_unlock(&v3d->reset_lock);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static void
*timedout_ctra = ctra;
v3d_sched_skip_reset(sched_job);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
return v3d_gpu_reset_for_timeout(v3d, sched_job);
job->timedout_batches = batches;
v3d_sched_skip_reset(sched_job);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
return v3d_gpu_reset_for_timeout(v3d, sched_job);
xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
/* Kill the run_job entry point */
/* Start fence signaling */
xe_hw_fence_irq_start(q->fence_irq);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
sched_enable:
enable_scheduling(q);
xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static void __guc_exec_queue_fini_async(struct work_struct *w)
* enum drm_gpu_sched_stat - the scheduler's status
*
* @DRM_GPU_SCHED_STAT_NONE: Reserved. Do not use.
- * @DRM_GPU_SCHED_STAT_NOMINAL: Operation succeeded.
+ * @DRM_GPU_SCHED_STAT_RESET: The GPU hung and successfully reset.
* @DRM_GPU_SCHED_STAT_ENODEV: Error: Device is not available anymore.
*/
enum drm_gpu_sched_stat {
DRM_GPU_SCHED_STAT_NONE,
- DRM_GPU_SCHED_STAT_NOMINAL,
+ DRM_GPU_SCHED_STAT_RESET,
DRM_GPU_SCHED_STAT_ENODEV,
};