From 40527034d1d5719f9e87ccd4382a83c6865b3d8f Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Wed, 8 Oct 2025 08:12:55 +0200 Subject: [PATCH] accel/ivpu: Return correct job error status Currently the driver returns ABORTED for all errors that trigger engine reset. It is better to distinguish between different error types by returning the actual error code reported by firmware. This allows userspace to take different actions based on the error type and improves debuggability. Refactor ivpu_job_signal_and_destroy() by extracting engine error handling logic into a new function ivpu_job_handle_engine_error(). This simplifies engine error handling logic by removing necessity of calling ivpu_job_singal_and_destroy() multiple times by a single job changing it's behavior based on job status. Signed-off-by: Andrzej Kacprowski Reviewed-by: Jeff Hugo Signed-off-by: Karol Wachowski Link: https://lore.kernel.org/r/20251008061255.2909794-1-karol.wachowski@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 49 ++++++++++++++++++++++++----------- drivers/accel/ivpu/ivpu_job.h | 3 +++ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index fd548028f1d64..ba4535a75aa7d 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -564,25 +564,26 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device * return job; } -static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) +bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status) { - struct ivpu_job *job; - lockdep_assert_held(&vdev->submitted_jobs_lock); - job = xa_load(&vdev->submitted_jobs_xa, job_id); - if (!job) - return -ENOENT; - switch (job_status) { case VPU_JSM_STATUS_PROCESSING_ERR: case VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN ... VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX: { + struct ivpu_job *job = xa_load(&vdev->submitted_jobs_xa, job_id); + + if (!job) + return false; + /* Trigger an engine reset */ guard(mutex)(&job->file_priv->lock); + job->job_status = job_status; + if (job->file_priv->has_mmu_faults) - return 0; + return false; /* * Mark context as faulty and defer destruction of the job to jobs abort thread @@ -591,26 +592,42 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 */ job->file_priv->has_mmu_faults = true; queue_work(system_wq, &vdev->context_abort_work); - return 0; + return true; } default: /* Complete job with error status, engine reset not required */ break; } - job = ivpu_job_remove_from_submitted_jobs(vdev, job_id); + return false; +} + +static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) +{ + struct ivpu_job *job; + + lockdep_assert_held(&vdev->submitted_jobs_lock); + + job = xa_load(&vdev->submitted_jobs_xa, job_id); if (!job) return -ENOENT; - if (job->file_priv->has_mmu_faults) - job_status = DRM_IVPU_JOB_STATUS_ABORTED; + ivpu_job_remove_from_submitted_jobs(vdev, job_id); + + if (job->job_status == VPU_JSM_STATUS_SUCCESS) { + if (job->file_priv->has_mmu_faults) + job->job_status = DRM_IVPU_JOB_STATUS_ABORTED; + else + job->job_status = job_status; + } - job->bos[CMD_BUF_IDX]->job_status = job_status; + job->bos[CMD_BUF_IDX]->job_status = job->job_status; dma_fence_signal(job->done_fence); trace_job("done", job); ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", - job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status); + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, + job->job_status); ivpu_job_destroy(job); ivpu_stop_job_timeout_detection(vdev); @@ -1030,7 +1047,9 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; mutex_lock(&vdev->submitted_jobs_lock); - ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); + if (!ivpu_job_handle_engine_error(vdev, payload->job_id, payload->job_status)) + /* No engine error, complete the job normally */ + ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); mutex_unlock(&vdev->submitted_jobs_lock); } diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index d2fc4c151614b..3ab61e6a5616c 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -51,6 +51,7 @@ struct ivpu_cmdq { * @cmdq_id: Command queue ID used for submission * @job_id: Unique job ID for tracking and status reporting * @engine_idx: Engine index for job execution + * @job_status: Status reported by firmware for this job * @primary_preempt_buf: Primary preemption buffer for job * @secondary_preempt_buf: Secondary preemption buffer for job (optional) * @bo_count: Number of buffer objects associated with this job @@ -64,6 +65,7 @@ struct ivpu_job { u32 cmdq_id; u32 job_id; u32 engine_idx; + u32 job_status; struct ivpu_bo *primary_preempt_buf; struct ivpu_bo *secondary_preempt_buf; size_t bo_count; @@ -83,6 +85,7 @@ void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id) void ivpu_job_done_consumer_init(struct ivpu_device *vdev); void ivpu_job_done_consumer_fini(struct ivpu_device *vdev); +bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status); void ivpu_context_abort_work_fn(struct work_struct *work); void ivpu_jobs_abort_all(struct ivpu_device *vdev); -- 2.47.3