From: Greg Kroah-Hartman Date: Wed, 7 May 2025 14:05:35 +0000 (+0200) Subject: 6.12-stable patches X-Git-Tag: v5.15.182~31 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f5c4f5386d0e075226e8545c60335676e97f6eb6;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: accel-ivpu-abort-all-jobs-after-command-queue-unregister.patch accel-ivpu-add-handling-of-vpu_jsm_status_mvnci_context_violation_hw.patch accel-ivpu-fix-locking-order-in-ivpu_job_submit.patch sch_drr-make-drr_qlen_notify-idempotent.patch sch_ets-make-est_qlen_notify-idempotent.patch sch_hfsc-make-hfsc_qlen_notify-idempotent.patch sch_htb-make-htb_qlen_notify-idempotent.patch sch_qfq-make-qfq_qlen_notify-idempotent.patch --- diff --git a/queue-6.12/accel-ivpu-abort-all-jobs-after-command-queue-unregister.patch b/queue-6.12/accel-ivpu-abort-all-jobs-after-command-queue-unregister.patch new file mode 100644 index 0000000000..3243076c0d --- /dev/null +++ b/queue-6.12/accel-ivpu-abort-all-jobs-after-command-queue-unregister.patch @@ -0,0 +1,342 @@ +From 5bbccadaf33eea2b879d8326ad59ae0663be47d1 Mon Sep 17 00:00:00 2001 +From: Karol Wachowski +Date: Tue, 7 Jan 2025 18:32:26 +0100 +Subject: accel/ivpu: Abort all jobs after command queue unregister + +From: Karol Wachowski + +commit 5bbccadaf33eea2b879d8326ad59ae0663be47d1 upstream. + +With hardware scheduler it is not expected to receive JOB_DONE +notifications from NPU FW for the jobs aborted due to command queue destroy +JSM command. + +Remove jobs submitted to unregistered command queue from submitted_jobs_xa +to avoid triggering a TDR in such case. + +Add explicit submitted_jobs_lock that protects access to list of submitted +jobs which is now used to find jobs to abort. + +Move context abort procedure to separate work queue not to slow down +handling of IPCs or DCT requests in case where job abort takes longer, +especially when destruction of the last job of a specific context results +in context release. + +Signed-off-by: Karol Wachowski +Signed-off-by: Maciej Falkowski +Reviewed-by: Jacek Lawrynowicz +Signed-off-by: Jacek Lawrynowicz +Link: https://patchwork.freedesktop.org/patch/msgid/20250107173238.381120-4-maciej.falkowski@linux.intel.com +[ This backport removes all the lines from upstream commit related to + the command queue UAPI, as it is not present in the 6.12 kernel and + should not be backported. ] +Signed-off-by: Jacek Lawrynowicz +Signed-off-by: Greg Kroah-Hartman +--- + drivers/accel/ivpu/ivpu_drv.c | 32 ++------------- + drivers/accel/ivpu/ivpu_drv.h | 2 + drivers/accel/ivpu/ivpu_job.c | 82 +++++++++++++++++++++++++++++++--------- + drivers/accel/ivpu/ivpu_job.h | 1 + drivers/accel/ivpu/ivpu_mmu.c | 3 - + drivers/accel/ivpu/ivpu_sysfs.c | 5 +- + 6 files changed, 77 insertions(+), 48 deletions(-) + +--- a/drivers/accel/ivpu/ivpu_drv.c ++++ b/drivers/accel/ivpu/ivpu_drv.c +@@ -36,8 +36,6 @@ + __stringify(DRM_IVPU_DRIVER_MINOR) "." + #endif + +-static struct lock_class_key submitted_jobs_xa_lock_class_key; +- + int ivpu_dbg_mask; + module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); + MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); +@@ -455,26 +453,6 @@ static const struct drm_driver driver = + .minor = DRM_IVPU_DRIVER_MINOR, + }; + +-static void ivpu_context_abort_invalid(struct ivpu_device *vdev) +-{ +- struct ivpu_file_priv *file_priv; +- unsigned long ctx_id; +- +- mutex_lock(&vdev->context_list_lock); +- +- xa_for_each(&vdev->context_xa, ctx_id, file_priv) { +- if (!file_priv->has_mmu_faults || file_priv->aborted) +- continue; +- +- mutex_lock(&file_priv->lock); +- ivpu_context_abort_locked(file_priv); +- file_priv->aborted = true; +- mutex_unlock(&file_priv->lock); +- } +- +- mutex_unlock(&vdev->context_list_lock); +-} +- + static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg) + { + struct ivpu_device *vdev = arg; +@@ -488,9 +466,6 @@ static irqreturn_t ivpu_irq_thread_handl + case IVPU_HW_IRQ_SRC_IPC: + ivpu_ipc_irq_thread_handler(vdev); + break; +- case IVPU_HW_IRQ_SRC_MMU_EVTQ: +- ivpu_context_abort_invalid(vdev); +- break; + case IVPU_HW_IRQ_SRC_DCT: + ivpu_pm_dct_irq_thread_handler(vdev); + break; +@@ -607,16 +582,21 @@ static int ivpu_dev_init(struct ivpu_dev + xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); + xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1); +- lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); + INIT_LIST_HEAD(&vdev->bo_list); + + vdev->db_limit.min = IVPU_MIN_DB; + vdev->db_limit.max = IVPU_MAX_DB; + ++ INIT_WORK(&vdev->context_abort_work, ivpu_context_abort_thread_handler); ++ + ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); + if (ret) + goto err_xa_destroy; + ++ ret = drmm_mutex_init(&vdev->drm, &vdev->submitted_jobs_lock); ++ if (ret) ++ goto err_xa_destroy; ++ + ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock); + if (ret) + goto err_xa_destroy; +--- a/drivers/accel/ivpu/ivpu_drv.h ++++ b/drivers/accel/ivpu/ivpu_drv.h +@@ -137,6 +137,7 @@ struct ivpu_device { + struct mutex context_list_lock; /* Protects user context addition/removal */ + struct xarray context_xa; + struct xa_limit context_xa_limit; ++ struct work_struct context_abort_work; + + struct xarray db_xa; + struct xa_limit db_limit; +@@ -145,6 +146,7 @@ struct ivpu_device { + struct mutex bo_list_lock; /* Protects bo_list */ + struct list_head bo_list; + ++ struct mutex submitted_jobs_lock; /* Protects submitted_jobs */ + struct xarray submitted_jobs_xa; + struct ivpu_ipc_consumer job_done_consumer; + +--- a/drivers/accel/ivpu/ivpu_job.c ++++ b/drivers/accel/ivpu/ivpu_job.c +@@ -335,6 +335,8 @@ void ivpu_context_abort_locked(struct iv + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) + ivpu_jsm_context_release(vdev, file_priv->ctx.id); ++ ++ file_priv->aborted = true; + } + + static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) +@@ -467,16 +469,14 @@ static struct ivpu_job *ivpu_job_remove_ + { + struct ivpu_job *job; + +- xa_lock(&vdev->submitted_jobs_xa); +- job = __xa_erase(&vdev->submitted_jobs_xa, job_id); ++ lockdep_assert_held(&vdev->submitted_jobs_lock); + ++ job = xa_erase(&vdev->submitted_jobs_xa, job_id); + if (xa_empty(&vdev->submitted_jobs_xa) && job) { + vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts), + vdev->busy_time); + } + +- xa_unlock(&vdev->submitted_jobs_xa); +- + return job; + } + +@@ -484,6 +484,8 @@ static int ivpu_job_signal_and_destroy(s + { + struct ivpu_job *job; + ++ lockdep_assert_held(&vdev->submitted_jobs_lock); ++ + job = ivpu_job_remove_from_submitted_jobs(vdev, job_id); + if (!job) + return -ENOENT; +@@ -501,6 +503,10 @@ static int ivpu_job_signal_and_destroy(s + ivpu_stop_job_timeout_detection(vdev); + + ivpu_rpm_put(vdev); ++ ++ if (!xa_empty(&vdev->submitted_jobs_xa)) ++ ivpu_start_job_timeout_detection(vdev); ++ + return 0; + } + +@@ -509,8 +515,12 @@ void ivpu_jobs_abort_all(struct ivpu_dev + struct ivpu_job *job; + unsigned long id; + ++ mutex_lock(&vdev->submitted_jobs_lock); ++ + xa_for_each(&vdev->submitted_jobs_xa, id, job) + ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); ++ ++ mutex_unlock(&vdev->submitted_jobs_lock); + } + + static int ivpu_job_submit(struct ivpu_job *job, u8 priority) +@@ -535,15 +545,16 @@ static int ivpu_job_submit(struct ivpu_j + goto err_unlock_file_priv; + } + +- xa_lock(&vdev->submitted_jobs_xa); ++ mutex_lock(&vdev->submitted_jobs_lock); ++ + is_first_job = xa_empty(&vdev->submitted_jobs_xa); +- ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, +- &file_priv->job_id_next, GFP_KERNEL); ++ ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, ++ &file_priv->job_id_next, GFP_KERNEL); + if (ret < 0) { + ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", + file_priv->ctx.id); + ret = -EBUSY; +- goto err_unlock_submitted_jobs_xa; ++ goto err_unlock_submitted_jobs; + } + + ret = ivpu_cmdq_push_job(cmdq, job); +@@ -565,19 +576,21 @@ static int ivpu_job_submit(struct ivpu_j + job->job_id, file_priv->ctx.id, job->engine_idx, priority, + job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); + +- xa_unlock(&vdev->submitted_jobs_xa); +- ++ mutex_unlock(&vdev->submitted_jobs_lock); + mutex_unlock(&file_priv->lock); + +- if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) ++ if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { ++ mutex_lock(&vdev->submitted_jobs_lock); + ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); ++ mutex_unlock(&vdev->submitted_jobs_lock); ++ } + + return 0; + + err_erase_xa: +- __xa_erase(&vdev->submitted_jobs_xa, job->job_id); +-err_unlock_submitted_jobs_xa: +- xa_unlock(&vdev->submitted_jobs_xa); ++ xa_erase(&vdev->submitted_jobs_xa, job->job_id); ++err_unlock_submitted_jobs: ++ mutex_unlock(&vdev->submitted_jobs_lock); + err_unlock_file_priv: + mutex_unlock(&file_priv->lock); + ivpu_rpm_put(vdev); +@@ -748,7 +761,6 @@ ivpu_job_done_callback(struct ivpu_devic + struct vpu_jsm_msg *jsm_msg) + { + struct vpu_ipc_msg_payload_job_done *payload; +- int ret; + + if (!jsm_msg) { + ivpu_err(vdev, "IPC message has no JSM payload\n"); +@@ -761,9 +773,10 @@ ivpu_job_done_callback(struct ivpu_devic + } + + payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; +- ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); +- if (!ret && !xa_empty(&vdev->submitted_jobs_xa)) +- ivpu_start_job_timeout_detection(vdev); ++ ++ mutex_lock(&vdev->submitted_jobs_lock); ++ ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); ++ mutex_unlock(&vdev->submitted_jobs_lock); + } + + void ivpu_job_done_consumer_init(struct ivpu_device *vdev) +@@ -776,3 +789,36 @@ void ivpu_job_done_consumer_fini(struct + { + ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer); + } ++ ++void ivpu_context_abort_thread_handler(struct work_struct *work) ++{ ++ struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work); ++ struct ivpu_file_priv *file_priv; ++ unsigned long ctx_id; ++ struct ivpu_job *job; ++ unsigned long id; ++ ++ mutex_lock(&vdev->context_list_lock); ++ xa_for_each(&vdev->context_xa, ctx_id, file_priv) { ++ if (!file_priv->has_mmu_faults || file_priv->aborted) ++ continue; ++ ++ mutex_lock(&file_priv->lock); ++ ivpu_context_abort_locked(file_priv); ++ mutex_unlock(&file_priv->lock); ++ } ++ mutex_unlock(&vdev->context_list_lock); ++ ++ if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) ++ return; ++ /* ++ * In hardware scheduling mode NPU already has stopped processing jobs ++ * and won't send us any further notifications, thus we have to free job related resources ++ * and notify userspace ++ */ ++ mutex_lock(&vdev->submitted_jobs_lock); ++ xa_for_each(&vdev->submitted_jobs_xa, id, job) ++ if (job->file_priv->aborted) ++ ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED); ++ mutex_unlock(&vdev->submitted_jobs_lock); ++} +--- a/drivers/accel/ivpu/ivpu_job.h ++++ b/drivers/accel/ivpu/ivpu_job.h +@@ -64,6 +64,7 @@ void ivpu_cmdq_reset_all_contexts(struct + + void ivpu_job_done_consumer_init(struct ivpu_device *vdev); + void ivpu_job_done_consumer_fini(struct ivpu_device *vdev); ++void ivpu_context_abort_thread_handler(struct work_struct *work); + + void ivpu_jobs_abort_all(struct ivpu_device *vdev); + +--- a/drivers/accel/ivpu/ivpu_mmu.c ++++ b/drivers/accel/ivpu/ivpu_mmu.c +@@ -917,8 +917,7 @@ void ivpu_mmu_irq_evtq_handler(struct iv + REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons); + } + +- if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_MMU_EVTQ)) +- ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); ++ queue_work(system_wq, &vdev->context_abort_work); + } + + void ivpu_mmu_evtq_dump(struct ivpu_device *vdev) +--- a/drivers/accel/ivpu/ivpu_sysfs.c ++++ b/drivers/accel/ivpu/ivpu_sysfs.c +@@ -30,11 +30,12 @@ npu_busy_time_us_show(struct device *dev + struct ivpu_device *vdev = to_ivpu_device(drm); + ktime_t total, now = 0; + +- xa_lock(&vdev->submitted_jobs_xa); ++ mutex_lock(&vdev->submitted_jobs_lock); ++ + total = vdev->busy_time; + if (!xa_empty(&vdev->submitted_jobs_xa)) + now = ktime_sub(ktime_get(), vdev->busy_start_ts); +- xa_unlock(&vdev->submitted_jobs_xa); ++ mutex_unlock(&vdev->submitted_jobs_lock); + + return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now))); + } diff --git a/queue-6.12/accel-ivpu-add-handling-of-vpu_jsm_status_mvnci_context_violation_hw.patch b/queue-6.12/accel-ivpu-add-handling-of-vpu_jsm_status_mvnci_context_violation_hw.patch new file mode 100644 index 0000000000..f1fe3c4139 --- /dev/null +++ b/queue-6.12/accel-ivpu-add-handling-of-vpu_jsm_status_mvnci_context_violation_hw.patch @@ -0,0 +1,72 @@ +From dad945c27a42dfadddff1049cf5ae417209a8996 Mon Sep 17 00:00:00 2001 +From: Karol Wachowski +Date: Tue, 7 Jan 2025 18:32:35 +0100 +Subject: accel/ivpu: Add handling of VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW + +From: Karol Wachowski + +commit dad945c27a42dfadddff1049cf5ae417209a8996 upstream. + +Mark as invalid context of a job that returned HW context violation +error and queue work that aborts jobs from faulty context. +Add engine reset to the context abort thread handler to not only abort +currently executing jobs but also to ensure NPU invalid state recovery. + +Signed-off-by: Karol Wachowski +Signed-off-by: Maciej Falkowski +Reviewed-by: Jacek Lawrynowicz +Signed-off-by: Jacek Lawrynowicz +Link: https://patchwork.freedesktop.org/patch/msgid/20250107173238.381120-13-maciej.falkowski@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/accel/ivpu/ivpu_job.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +--- a/drivers/accel/ivpu/ivpu_job.c ++++ b/drivers/accel/ivpu/ivpu_job.c +@@ -486,6 +486,26 @@ static int ivpu_job_signal_and_destroy(s + + lockdep_assert_held(&vdev->submitted_jobs_lock); + ++ job = xa_load(&vdev->submitted_jobs_xa, job_id); ++ if (!job) ++ return -ENOENT; ++ ++ if (job_status == VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW) { ++ guard(mutex)(&job->file_priv->lock); ++ ++ if (job->file_priv->has_mmu_faults) ++ return 0; ++ ++ /* ++ * Mark context as faulty and defer destruction of the job to jobs abort thread ++ * handler to synchronize between both faults and jobs returning context violation ++ * status and ensure both are handled in the same way ++ */ ++ job->file_priv->has_mmu_faults = true; ++ queue_work(system_wq, &vdev->context_abort_work); ++ return 0; ++ } ++ + job = ivpu_job_remove_from_submitted_jobs(vdev, job_id); + if (!job) + return -ENOENT; +@@ -795,6 +815,9 @@ void ivpu_context_abort_thread_handler(s + struct ivpu_job *job; + unsigned long id; + ++ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) ++ ivpu_jsm_reset_engine(vdev, 0); ++ + mutex_lock(&vdev->context_list_lock); + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + if (!file_priv->has_mmu_faults || file_priv->aborted) +@@ -808,6 +831,8 @@ void ivpu_context_abort_thread_handler(s + + if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) + return; ++ ++ ivpu_jsm_hws_resume_engine(vdev, 0); + /* + * In hardware scheduling mode NPU already has stopped processing jobs + * and won't send us any further notifications, thus we have to free job related resources diff --git a/queue-6.12/accel-ivpu-fix-locking-order-in-ivpu_job_submit.patch b/queue-6.12/accel-ivpu-fix-locking-order-in-ivpu_job_submit.patch new file mode 100644 index 0000000000..ce67f7548b --- /dev/null +++ b/queue-6.12/accel-ivpu-fix-locking-order-in-ivpu_job_submit.patch @@ -0,0 +1,105 @@ +From ab680dc6c78aa035e944ecc8c48a1caab9f39924 Mon Sep 17 00:00:00 2001 +From: Karol Wachowski +Date: Tue, 7 Jan 2025 18:32:34 +0100 +Subject: accel/ivpu: Fix locking order in ivpu_job_submit + +From: Karol Wachowski + +commit ab680dc6c78aa035e944ecc8c48a1caab9f39924 upstream. + +Fix deadlock in job submission and abort handling. +When a thread aborts currently executing jobs due to a fault, +it first locks the global lock protecting submitted_jobs (#1). + +After the last job is destroyed, it proceeds to release the related context +and locks file_priv (#2). Meanwhile, in the job submission thread, +the file_priv lock (#2) is taken first, and then the submitted_jobs +lock (#1) is obtained when a job is added to the submitted jobs list. + + CPU0 CPU1 + ---- ---- + (for example due to a fault) (jobs submissions keep coming) + + lock(&vdev->submitted_jobs_lock) #1 + ivpu_jobs_abort_all() + job_destroy() + lock(&file_priv->lock) #2 + lock(&vdev->submitted_jobs_lock) #1 + file_priv_release() + lock(&vdev->context_list_lock) + lock(&file_priv->lock) #2 + +This order of locking causes a deadlock. To resolve this issue, +change the order of locking in ivpu_job_submit(). + +Signed-off-by: Karol Wachowski +Signed-off-by: Maciej Falkowski +Reviewed-by: Jacek Lawrynowicz +Signed-off-by: Jacek Lawrynowicz +Link: https://patchwork.freedesktop.org/patch/msgid/20250107173238.381120-12-maciej.falkowski@linux.intel.com +[ This backport required small adjustments to ivpu_job_submit(), which + lacks support for explicit command queue creation added in 6.15. ] +Signed-off-by: Jacek Lawrynowicz +Signed-off-by: Greg Kroah-Hartman +--- + drivers/accel/ivpu/ivpu_job.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +--- a/drivers/accel/ivpu/ivpu_job.c ++++ b/drivers/accel/ivpu/ivpu_job.c +@@ -535,6 +535,7 @@ static int ivpu_job_submit(struct ivpu_j + if (ret < 0) + return ret; + ++ mutex_lock(&vdev->submitted_jobs_lock); + mutex_lock(&file_priv->lock); + + cmdq = ivpu_cmdq_acquire(file_priv, job->engine_idx, priority); +@@ -542,11 +543,9 @@ static int ivpu_job_submit(struct ivpu_j + ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n", + file_priv->ctx.id, job->engine_idx, priority); + ret = -EINVAL; +- goto err_unlock_file_priv; ++ goto err_unlock; + } + +- mutex_lock(&vdev->submitted_jobs_lock); +- + is_first_job = xa_empty(&vdev->submitted_jobs_xa); + ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, + &file_priv->job_id_next, GFP_KERNEL); +@@ -554,7 +553,7 @@ static int ivpu_job_submit(struct ivpu_j + ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", + file_priv->ctx.id); + ret = -EBUSY; +- goto err_unlock_submitted_jobs; ++ goto err_unlock; + } + + ret = ivpu_cmdq_push_job(cmdq, job); +@@ -576,22 +575,20 @@ static int ivpu_job_submit(struct ivpu_j + job->job_id, file_priv->ctx.id, job->engine_idx, priority, + job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); + +- mutex_unlock(&vdev->submitted_jobs_lock); + mutex_unlock(&file_priv->lock); + + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { +- mutex_lock(&vdev->submitted_jobs_lock); + ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); +- mutex_unlock(&vdev->submitted_jobs_lock); + } + ++ mutex_unlock(&vdev->submitted_jobs_lock); ++ + return 0; + + err_erase_xa: + xa_erase(&vdev->submitted_jobs_xa, job->job_id); +-err_unlock_submitted_jobs: ++err_unlock: + mutex_unlock(&vdev->submitted_jobs_lock); +-err_unlock_file_priv: + mutex_unlock(&file_priv->lock); + ivpu_rpm_put(vdev); + return ret; diff --git a/queue-6.12/sch_drr-make-drr_qlen_notify-idempotent.patch b/queue-6.12/sch_drr-make-drr_qlen_notify-idempotent.patch new file mode 100644 index 0000000000..1377bde9a9 --- /dev/null +++ b/queue-6.12/sch_drr-make-drr_qlen_notify-idempotent.patch @@ -0,0 +1,65 @@ +From df008598b3a00be02a8051fde89ca0fbc416bd55 Mon Sep 17 00:00:00 2001 +From: Cong Wang +Date: Thu, 3 Apr 2025 14:10:24 -0700 +Subject: sch_drr: make drr_qlen_notify() idempotent + +From: Cong Wang + +commit df008598b3a00be02a8051fde89ca0fbc416bd55 upstream. + +drr_qlen_notify() always deletes the DRR class from its active list +with list_del(), therefore, it is not idempotent and not friendly +to its callers, like fq_codel_dequeue(). + +Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' +life. Also change other list_del()'s to list_del_init() just to be +extra safe. + +Reported-by: Gerrard Tai +Signed-off-by: Cong Wang +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250403211033.166059-3-xiyou.wangcong@gmail.com +Acked-by: Jamal Hadi Salim +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_drr.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/net/sched/sch_drr.c ++++ b/net/sched/sch_drr.c +@@ -110,6 +110,7 @@ static int drr_change_class(struct Qdisc + return -ENOBUFS; + + gnet_stats_basic_sync_init(&cl->bstats); ++ INIT_LIST_HEAD(&cl->alist); + cl->common.classid = classid; + cl->quantum = quantum; + cl->qdisc = qdisc_create_dflt(sch->dev_queue, +@@ -234,7 +235,7 @@ static void drr_qlen_notify(struct Qdisc + { + struct drr_class *cl = (struct drr_class *)arg; + +- list_del(&cl->alist); ++ list_del_init(&cl->alist); + } + + static int drr_dump_class(struct Qdisc *sch, unsigned long arg, +@@ -393,7 +394,7 @@ static struct sk_buff *drr_dequeue(struc + if (unlikely(skb == NULL)) + goto out; + if (cl->qdisc->q.qlen == 0) +- list_del(&cl->alist); ++ list_del_init(&cl->alist); + + bstats_update(&cl->bstats, skb); + qdisc_bstats_update(sch, skb); +@@ -434,7 +435,7 @@ static void drr_reset_qdisc(struct Qdisc + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { + if (cl->qdisc->q.qlen) +- list_del(&cl->alist); ++ list_del_init(&cl->alist); + qdisc_reset(cl->qdisc); + } + } diff --git a/queue-6.12/sch_ets-make-est_qlen_notify-idempotent.patch b/queue-6.12/sch_ets-make-est_qlen_notify-idempotent.patch new file mode 100644 index 0000000000..9cb3d9d5af --- /dev/null +++ b/queue-6.12/sch_ets-make-est_qlen_notify-idempotent.patch @@ -0,0 +1,65 @@ +From a7a15f39c682ac4268624da2abdb9114bdde96d5 Mon Sep 17 00:00:00 2001 +From: Cong Wang +Date: Thu, 3 Apr 2025 14:10:27 -0700 +Subject: sch_ets: make est_qlen_notify() idempotent + +From: Cong Wang + +commit a7a15f39c682ac4268624da2abdb9114bdde96d5 upstream. + +est_qlen_notify() deletes its class from its active list with +list_del() when qlen is 0, therefore, it is not idempotent and +not friendly to its callers, like fq_codel_dequeue(). + +Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' +life. Also change other list_del()'s to list_del_init() just to be +extra safe. + +Reported-by: Gerrard Tai +Signed-off-by: Cong Wang +Link: https://patch.msgid.link/20250403211033.166059-6-xiyou.wangcong@gmail.com +Acked-by: Jamal Hadi Salim +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_ets.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_ets.c ++++ b/net/sched/sch_ets.c +@@ -298,7 +298,7 @@ static void ets_class_qlen_notify(struct + * to remove them. + */ + if (!ets_class_is_strict(q, cl) && sch->q.qlen) +- list_del(&cl->alist); ++ list_del_init(&cl->alist); + } + + static int ets_class_dump(struct Qdisc *sch, unsigned long arg, +@@ -491,7 +491,7 @@ static struct sk_buff *ets_qdisc_dequeue + if (unlikely(!skb)) + goto out; + if (cl->qdisc->q.qlen == 0) +- list_del(&cl->alist); ++ list_del_init(&cl->alist); + return ets_qdisc_dequeue_skb(sch, skb); + } + +@@ -660,7 +660,7 @@ static int ets_qdisc_change(struct Qdisc + } + for (i = q->nbands; i < oldbands; i++) { + if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) +- list_del(&q->classes[i].alist); ++ list_del_init(&q->classes[i].alist); + qdisc_tree_flush_backlog(q->classes[i].qdisc); + } + WRITE_ONCE(q->nstrict, nstrict); +@@ -716,7 +716,7 @@ static void ets_qdisc_reset(struct Qdisc + + for (band = q->nstrict; band < q->nbands; band++) { + if (q->classes[band].qdisc->q.qlen) +- list_del(&q->classes[band].alist); ++ list_del_init(&q->classes[band].alist); + } + for (band = 0; band < q->nbands; band++) + qdisc_reset(q->classes[band].qdisc); diff --git a/queue-6.12/sch_hfsc-make-hfsc_qlen_notify-idempotent.patch b/queue-6.12/sch_hfsc-make-hfsc_qlen_notify-idempotent.patch new file mode 100644 index 0000000000..f4afe4fb73 --- /dev/null +++ b/queue-6.12/sch_hfsc-make-hfsc_qlen_notify-idempotent.patch @@ -0,0 +1,54 @@ +From 51eb3b65544c9efd6a1026889ee5fb5aa62da3bb Mon Sep 17 00:00:00 2001 +From: Cong Wang +Date: Thu, 3 Apr 2025 14:10:25 -0700 +Subject: sch_hfsc: make hfsc_qlen_notify() idempotent + +From: Cong Wang + +commit 51eb3b65544c9efd6a1026889ee5fb5aa62da3bb upstream. + +hfsc_qlen_notify() is not idempotent either and not friendly +to its callers, like fq_codel_dequeue(). Let's make it idempotent +to ease qdisc_tree_reduce_backlog() callers' life: + +1. update_vf() decreases cl->cl_nactive, so we can check whether it is +non-zero before calling it. + +2. eltree_remove() always removes RB node cl->el_node, but we can use + RB_EMPTY_NODE() + RB_CLEAR_NODE() to make it safe. + +Reported-by: Gerrard Tai +Signed-off-by: Cong Wang +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250403211033.166059-4-xiyou.wangcong@gmail.com +Acked-by: Jamal Hadi Salim +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_hfsc.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_hfsc.c ++++ b/net/sched/sch_hfsc.c +@@ -203,7 +203,10 @@ eltree_insert(struct hfsc_class *cl) + static inline void + eltree_remove(struct hfsc_class *cl) + { +- rb_erase(&cl->el_node, &cl->sched->eligible); ++ if (!RB_EMPTY_NODE(&cl->el_node)) { ++ rb_erase(&cl->el_node, &cl->sched->eligible); ++ RB_CLEAR_NODE(&cl->el_node); ++ } + } + + static inline void +@@ -1225,7 +1228,8 @@ hfsc_qlen_notify(struct Qdisc *sch, unsi + /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0) + * needs to be called explicitly to remove a class from vttree. + */ +- update_vf(cl, 0, 0); ++ if (cl->cl_nactive) ++ update_vf(cl, 0, 0); + if (cl->cl_flags & HFSC_RSC) + eltree_remove(cl); + } diff --git a/queue-6.12/sch_htb-make-htb_qlen_notify-idempotent.patch b/queue-6.12/sch_htb-make-htb_qlen_notify-idempotent.patch new file mode 100644 index 0000000000..f49a82c404 --- /dev/null +++ b/queue-6.12/sch_htb-make-htb_qlen_notify-idempotent.patch @@ -0,0 +1,38 @@ +From 5ba8b837b522d7051ef81bacf3d95383ff8edce5 Mon Sep 17 00:00:00 2001 +From: Cong Wang +Date: Thu, 3 Apr 2025 14:10:23 -0700 +Subject: sch_htb: make htb_qlen_notify() idempotent + +From: Cong Wang + +commit 5ba8b837b522d7051ef81bacf3d95383ff8edce5 upstream. + +htb_qlen_notify() always deactivates the HTB class and in fact could +trigger a warning if it is already deactivated. Therefore, it is not +idempotent and not friendly to its callers, like fq_codel_dequeue(). + +Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' +life. + +Reported-by: Gerrard Tai +Signed-off-by: Cong Wang +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250403211033.166059-2-xiyou.wangcong@gmail.com +Acked-by: Jamal Hadi Salim +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_htb.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/sched/sch_htb.c ++++ b/net/sched/sch_htb.c +@@ -1485,6 +1485,8 @@ static void htb_qlen_notify(struct Qdisc + { + struct htb_class *cl = (struct htb_class *)arg; + ++ if (!cl->prio_activity) ++ return; + htb_deactivate(qdisc_priv(sch), cl); + } + diff --git a/queue-6.12/sch_qfq-make-qfq_qlen_notify-idempotent.patch b/queue-6.12/sch_qfq-make-qfq_qlen_notify-idempotent.patch new file mode 100644 index 0000000000..9eb7a84ead --- /dev/null +++ b/queue-6.12/sch_qfq-make-qfq_qlen_notify-idempotent.patch @@ -0,0 +1,66 @@ +From 55f9eca4bfe30a15d8656f915922e8c98b7f0728 Mon Sep 17 00:00:00 2001 +From: Cong Wang +Date: Thu, 3 Apr 2025 14:10:26 -0700 +Subject: sch_qfq: make qfq_qlen_notify() idempotent + +From: Cong Wang + +commit 55f9eca4bfe30a15d8656f915922e8c98b7f0728 upstream. + +qfq_qlen_notify() always deletes its class from its active list +with list_del_init() _and_ calls qfq_deactivate_agg() when the whole list +becomes empty. + +To make it idempotent, just skip everything when it is not in the active +list. + +Also change other list_del()'s to list_del_init() just to be extra safe. + +Reported-by: Gerrard Tai +Signed-off-by: Cong Wang +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250403211033.166059-5-xiyou.wangcong@gmail.com +Acked-by: Jamal Hadi Salim +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_qfq.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_qfq.c ++++ b/net/sched/sch_qfq.c +@@ -352,7 +352,7 @@ static void qfq_deactivate_class(struct + struct qfq_aggregate *agg = cl->agg; + + +- list_del(&cl->alist); /* remove from RR queue of the aggregate */ ++ list_del_init(&cl->alist); /* remove from RR queue of the aggregate */ + if (list_empty(&agg->active)) /* agg is now inactive */ + qfq_deactivate_agg(q, agg); + } +@@ -482,6 +482,7 @@ static int qfq_change_class(struct Qdisc + gnet_stats_basic_sync_init(&cl->bstats); + cl->common.classid = classid; + cl->deficit = lmax; ++ INIT_LIST_HEAD(&cl->alist); + + cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + classid, NULL); +@@ -990,7 +991,7 @@ static struct sk_buff *agg_dequeue(struc + cl->deficit -= (int) len; + + if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ +- list_del(&cl->alist); ++ list_del_init(&cl->alist); + else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { + cl->deficit += agg->lmax; + list_move_tail(&cl->alist, &agg->active); +@@ -1421,6 +1422,8 @@ static void qfq_qlen_notify(struct Qdisc + struct qfq_sched *q = qdisc_priv(sch); + struct qfq_class *cl = (struct qfq_class *)arg; + ++ if (list_empty(&cl->alist)) ++ return; + qfq_deactivate_class(q, cl); + } + diff --git a/queue-6.12/series b/queue-6.12/series index dc59c5d036..5895d5c5e1 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -141,3 +141,11 @@ accel-ivpu-make-db_id-and-job_id-allocations-incremental.patch accel-ivpu-use-xa_alloc_cyclic-instead-of-custom-function.patch accel-ivpu-fix-a-typo.patch accel-ivpu-update-vpu-fw-api-headers.patch +accel-ivpu-abort-all-jobs-after-command-queue-unregister.patch +accel-ivpu-fix-locking-order-in-ivpu_job_submit.patch +accel-ivpu-add-handling-of-vpu_jsm_status_mvnci_context_violation_hw.patch +sch_htb-make-htb_qlen_notify-idempotent.patch +sch_drr-make-drr_qlen_notify-idempotent.patch +sch_hfsc-make-hfsc_qlen_notify-idempotent.patch +sch_qfq-make-qfq_qlen_notify-idempotent.patch +sch_ets-make-est_qlen_notify-idempotent.patch