From: Greg Kroah-Hartman Date: Tue, 30 Jul 2024 09:44:45 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v6.1.103~40 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=323a2503c61802ad449d69e49aa5b5256c6ada9c;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: asoc-codecs-wcd939x-fix-typec-mux-and-switch-leak-during-device-removal.patch asoc-sof-ipc4-topology-use-correct-queue_id-for-requesting-input-pin-format.patch bus-mhi-ep-do-not-allocate-memory-for-mhi-objects-from-dma-zone.patch drm-amd-amdgpu-fix-uninitialized-variable-warnings.patch drm-amdgpu-add-missed-harvest-check-for-vcn-ip-v4-v5.patch drm-amdgpu-reset-vm-state-machine-after-gpu-reset-vram-lost.patch drm-amdgpu-sdma5.2-update-wptr-registers-as-well-as-doorbell.patch drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-suspend-resume.patch drm-i915-dp-don-t-switch-the-lttpr-mode-on-an-active-link.patch drm-i915-dp-reset-intel_dp-link_trained-before-retraining-the-link.patch drm-i915-gt-do-not-consider-preemption-during-execlists_dequeue-for-gen8.patch drm-udl-remove-drm_connector_poll_hpd.patch perf-fix-event-leak-upon-exec-and-file-release.patch perf-fix-event-leak-upon-exit.patch perf-stat-fix-the-hard-coded-metrics-calculation-on-the-hybrid.patch perf-x86-intel-ds-fix-non-0-retire-latency-on-raptorlake.patch perf-x86-intel-pt-fix-a-topa_entry-base-address-calculation.patch perf-x86-intel-pt-fix-topa_entry-base-length.patch perf-x86-intel-uncore-fix-the-bits-of-the-cha-extended-umask-for-spr.patch rtc-abx80x-fix-return-value-of-nvmem-callback-on-read.patch rtc-isl1208-fix-return-value-of-nvmem-callbacks.patch scsi-lpfc-allow-device_recovery-mode-after-rscn-receipt-if-in-prli_issue-state.patch scsi-qla2xxx-complete-command-early-within-lock.patch scsi-qla2xxx-during-vport-delete-send-async-logout-explicitly.patch scsi-qla2xxx-fix-flash-read-failure.patch scsi-qla2xxx-fix-for-possible-memory-corruption.patch scsi-qla2xxx-reduce-fabric-scan-duplicate-code.patch scsi-qla2xxx-unable-to-act-on-rscn-for-port-online.patch scsi-qla2xxx-use-qp-lock-to-search-for-bsg.patch scsi-qla2xxx-validate-nvme_local_port-correctly.patch watchdog-perf-properly-initialize-the-turbo-mode-timestamp-and-rearm-counter.patch --- diff --git a/queue-6.1/drm-amdgpu-sdma5.2-update-wptr-registers-as-well-as-doorbell.patch b/queue-6.1/drm-amdgpu-sdma5.2-update-wptr-registers-as-well-as-doorbell.patch new file mode 100644 index 00000000000..5069eb5f9f2 --- /dev/null +++ b/queue-6.1/drm-amdgpu-sdma5.2-update-wptr-registers-as-well-as-doorbell.patch @@ -0,0 +1,56 @@ +From a03ebf116303e5d13ba9a2b65726b106cb1e96f6 Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Tue, 9 Jul 2024 17:54:11 -0400 +Subject: drm/amdgpu/sdma5.2: Update wptr registers as well as doorbell + +From: Alex Deucher + +commit a03ebf116303e5d13ba9a2b65726b106cb1e96f6 upstream. + +We seem to have a case where SDMA will sometimes miss a doorbell +if GFX is entering the powergating state when the doorbell comes in. +To workaround this, we can update the wptr via MMIO, however, +this is only safe because we disallow gfxoff in begin_ring() for +SDMA 5.2 and then allow it again in end_ring(). + +Enable this workaround while we are root causing the issue with +the HW team. + +Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/3440 +Tested-by: Friedrich Vock +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +(cherry picked from commit f2ac52634963fc38e4935e11077b6f7854e5d700) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +@@ -241,6 +241,14 @@ static void sdma_v5_2_ring_set_wptr(stru + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); ++ /* SDMA seems to miss doorbells sometimes when powergating kicks in. ++ * Updating the wptr directly will wake it. This is only safe because ++ * we disallow gfxoff in begin_use() and then allow it again in end_use(). ++ */ ++ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), ++ lower_32_bits(ring->wptr << 2)); ++ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), ++ upper_32_bits(ring->wptr << 2)); + } else { + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " +@@ -1705,6 +1713,10 @@ static void sdma_v5_2_ring_begin_use(str + * but it shouldn't hurt for other parts since + * this GFXOFF will be disallowed anyway when SDMA is + * active, this just makes it explicit. ++ * sdma_v5_2_ring_set_wptr() takes advantage of this ++ * to update the wptr because sometimes SDMA seems to miss ++ * doorbells when entering PG. If you remove this, update ++ * sdma_v5_2_ring_set_wptr() as well! + */ + amdgpu_gfx_off_ctrl(adev, false); + } diff --git a/queue-6.1/drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-suspend-resume.patch b/queue-6.1/drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-suspend-resume.patch new file mode 100644 index 00000000000..96383323900 --- /dev/null +++ b/queue-6.1/drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-suspend-resume.patch @@ -0,0 +1,56 @@ +From d63d81094d208abb20fc444514b2d9ec2f4b7c4e Mon Sep 17 00:00:00 2001 +From: Wayne Lin +Date: Wed, 26 Jun 2024 16:48:23 +0800 +Subject: drm/dp_mst: Fix all mstb marked as not probed after suspend/resume + +From: Wayne Lin + +commit d63d81094d208abb20fc444514b2d9ec2f4b7c4e upstream. + +[Why] +After supend/resume, with topology unchanged, observe that +link_address_sent of all mstb are marked as false even the topology probing +is done without any error. + +It is caused by wrongly also include "ret == 0" case as a probing failure +case. + +[How] +Remove inappropriate checking conditions. + +Cc: Lyude Paul +Cc: Harry Wentland +Cc: Jani Nikula +Cc: Imre Deak +Cc: Daniel Vetter +Cc: stable@vger.kernel.org +Fixes: 37dfdc55ffeb ("drm/dp_mst: Cleanup drm_dp_send_link_address() a bit") +Signed-off-by: Wayne Lin +Reviewed-by: Lyude Paul +Signed-off-by: Lyude Paul +Link: https://patchwork.freedesktop.org/patch/msgid/20240626084825.878565-2-Wayne.Lin@amd.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/display/drm_dp_mst_topology.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c ++++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c +@@ -2923,7 +2923,7 @@ static int drm_dp_send_link_address(stru + + /* FIXME: Actually do some real error handling here */ + ret = drm_dp_mst_wait_tx_reply(mstb, txmsg); +- if (ret <= 0) { ++ if (ret < 0) { + drm_err(mgr->dev, "Sending link address failed with %d\n", ret); + goto out; + } +@@ -2975,7 +2975,7 @@ static int drm_dp_send_link_address(stru + mutex_unlock(&mgr->lock); + + out: +- if (ret <= 0) ++ if (ret < 0) + mstb->link_address_sent = false; + kfree(txmsg); + return ret < 0 ? ret : changed; diff --git a/queue-6.1/drm-i915-dp-reset-intel_dp-link_trained-before-retraining-the-link.patch b/queue-6.1/drm-i915-dp-reset-intel_dp-link_trained-before-retraining-the-link.patch new file mode 100644 index 00000000000..86583d0f698 --- /dev/null +++ b/queue-6.1/drm-i915-dp-reset-intel_dp-link_trained-before-retraining-the-link.patch @@ -0,0 +1,42 @@ +From d13e2a6e95e6b87f571c837c71a3d05691def9bb Mon Sep 17 00:00:00 2001 +From: Imre Deak +Date: Mon, 8 Jul 2024 22:00:24 +0300 +Subject: drm/i915/dp: Reset intel_dp->link_trained before retraining the link +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Imre Deak + +commit d13e2a6e95e6b87f571c837c71a3d05691def9bb upstream. + +Regularly retraining a link during an atomic commit happens with the +given pipe/link already disabled and hence intel_dp->link_trained being +false. Ensure this also for retraining a DP SST link via direct calls to +the link training functions (vs. an actual commit as for DP MST). So far +nothing depended on this, however the next patch will depend on +link_trained==false for changing the LTTPR mode to non-transparent. + +Cc: # v5.15+ +Cc: Ville Syrjälä +Reviewed-by: Ankit Nautiyal +Signed-off-by: Imre Deak +Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-2-imre.deak@intel.com +(cherry picked from commit a4d5ce61765c08ab364aa4b327f6739b646e6cfa) +Signed-off-by: Tvrtko Ursulin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/intel_dp.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/gpu/drm/i915/display/intel_dp.c ++++ b/drivers/gpu/drm/i915/display/intel_dp.c +@@ -4089,6 +4089,8 @@ int intel_dp_retrain_link(struct intel_e + !intel_dp_mst_is_master_trans(crtc_state)) + continue; + ++ intel_dp->link_trained = false; ++ + intel_dp_check_frl_training(intel_dp); + intel_dp_pcon_dsc_configure(intel_dp, crtc_state); + intel_dp_start_link_train(intel_dp, crtc_state); diff --git a/queue-6.1/drm-i915-gt-do-not-consider-preemption-during-execlists_dequeue-for-gen8.patch b/queue-6.1/drm-i915-gt-do-not-consider-preemption-during-execlists_dequeue-for-gen8.patch new file mode 100644 index 00000000000..3f7a25bca70 --- /dev/null +++ b/queue-6.1/drm-i915-gt-do-not-consider-preemption-during-execlists_dequeue-for-gen8.patch @@ -0,0 +1,63 @@ +From 65564157ae64cec0f527583f96e32f484f730f92 Mon Sep 17 00:00:00 2001 +From: Nitin Gote +Date: Thu, 11 Jul 2024 22:02:08 +0530 +Subject: drm/i915/gt: Do not consider preemption during execlists_dequeue for gen8 + +From: Nitin Gote + +commit 65564157ae64cec0f527583f96e32f484f730f92 upstream. + +We're seeing a GPU hang issue on a CHV platform, which was caused by commit +bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for +Gen8"). + +The Gen8 platform only supports timeslicing and doesn't have a preemption +mechanism, as its engines do not have a preemption timer. + +Commit 751f82b353a6 ("drm/i915/gt: Only disable preemption on Gen8 render +engines") addressed this issue only for render engines. This patch extends +that fix by ensuring that preemption is not considered for all engines on +Gen8 platforms. + +v4: + - Use the correct Fixes tag (Rodrigo Vivi) + - Reworded commit log (Andi Shyti) + +v3: + - Inside need_preempt(), condition of can_preempt() is not required + as simplified can_preempt() is enough. (Chris Wilson) + +v2: Simplify can_preempt() function (Tvrtko Ursulin) + +Fixes: 751f82b353a6 ("drm/i915/gt: Only disable preemption on gen8 render engines") +Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11396 +Suggested-by: Andi Shyti +Signed-off-by: Nitin Gote +Cc: Chris Wilson +CC: # v5.12+ +Reviewed-by: Jonathan Cavitt +Reviewed-by: Andi Shyti +Signed-off-by: Andi Shyti +Link: https://patchwork.freedesktop.org/patch/msgid/20240711163208.1355736-1-nitin.r.gote@intel.com +(cherry picked from commit 7df0be6e6280c6fca01d039864bb123e5e36604b) +Signed-off-by: Tvrtko Ursulin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c ++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +@@ -3313,11 +3313,7 @@ static void remove_from_engine(struct i9 + + static bool can_preempt(struct intel_engine_cs *engine) + { +- if (GRAPHICS_VER(engine->i915) > 8) +- return true; +- +- /* GPGPU on bdw requires extra w/a; not implemented */ +- return engine->class != RENDER_CLASS; ++ return GRAPHICS_VER(engine->i915) > 8; + } + + static void kick_execlists(const struct i915_request *rq, int prio) diff --git a/queue-6.1/perf-fix-event-leak-upon-exec-and-file-release.patch b/queue-6.1/perf-fix-event-leak-upon-exec-and-file-release.patch new file mode 100644 index 00000000000..c26d24adc80 --- /dev/null +++ b/queue-6.1/perf-fix-event-leak-upon-exec-and-file-release.patch @@ -0,0 +1,156 @@ +From 3a5465418f5fd970e86a86c7f4075be262682840 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Fri, 21 Jun 2024 11:16:01 +0200 +Subject: perf: Fix event leak upon exec and file release + +From: Frederic Weisbecker + +commit 3a5465418f5fd970e86a86c7f4075be262682840 upstream. + +The perf pending task work is never waited upon the matching event +release. In the case of a child event, released via free_event() +directly, this can potentially result in a leaked event, such as in the +following scenario that doesn't even require a weak IRQ work +implementation to trigger: + +schedule() + prepare_task_switch() +=======> + perf_event_overflow() + event->pending_sigtrap = ... + irq_work_queue(&event->pending_irq) +<======= + perf_event_task_sched_out() + event_sched_out() + event->pending_sigtrap = 0; + atomic_long_inc_not_zero(&event->refcount) + task_work_add(&event->pending_task) + finish_lock_switch() +=======> + perf_pending_irq() + //do nothing, rely on pending task work +<======= + +begin_new_exec() + perf_event_exit_task() + perf_event_exit_event() + // If is child event + free_event() + WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1) + // event is leaked + +Similar scenarios can also happen with perf_event_remove_on_exec() or +simply against concurrent perf_event_release(). + +Fix this with synchonizing against the possibly remaining pending task +work while freeing the event, just like is done with remaining pending +IRQ work. This means that the pending task callback neither need nor +should hold a reference to the event, preventing it from ever beeing +freed. + +Fixes: 517e6a301f34 ("perf: Fix perf_pending_task() UaF") +Signed-off-by: Frederic Weisbecker +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240621091601.18227-5-frederic@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/perf_event.h | 1 + + kernel/events/core.c | 38 ++++++++++++++++++++++++++++++++++---- + 2 files changed, 35 insertions(+), 4 deletions(-) + +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -765,6 +765,7 @@ struct perf_event { + struct irq_work pending_irq; + struct callback_head pending_task; + unsigned int pending_work; ++ struct rcuwait pending_work_wait; + + atomic_t event_limit; + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2320,7 +2320,6 @@ event_sched_out(struct perf_event *event + if (state != PERF_EVENT_STATE_OFF && + !event->pending_work && + !task_work_add(current, &event->pending_task, TWA_RESUME)) { +- WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); + event->pending_work = 1; + } else { + local_dec(&event->ctx->nr_pending); +@@ -5004,9 +5003,35 @@ static bool exclusive_event_installable( + static void perf_addr_filters_splice(struct perf_event *event, + struct list_head *head); + ++static void perf_pending_task_sync(struct perf_event *event) ++{ ++ struct callback_head *head = &event->pending_task; ++ ++ if (!event->pending_work) ++ return; ++ /* ++ * If the task is queued to the current task's queue, we ++ * obviously can't wait for it to complete. Simply cancel it. ++ */ ++ if (task_work_cancel(current, head)) { ++ event->pending_work = 0; ++ local_dec(&event->ctx->nr_pending); ++ return; ++ } ++ ++ /* ++ * All accesses related to the event are within the same ++ * non-preemptible section in perf_pending_task(). The RCU ++ * grace period before the event is freed will make sure all ++ * those accesses are complete by then. ++ */ ++ rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE); ++} ++ + static void _free_event(struct perf_event *event) + { + irq_work_sync(&event->pending_irq); ++ perf_pending_task_sync(event); + + unaccount_event(event); + +@@ -6637,23 +6662,27 @@ static void perf_pending_task(struct cal + int rctx; + + /* ++ * All accesses to the event must belong to the same implicit RCU read-side ++ * critical section as the ->pending_work reset. See comment in ++ * perf_pending_task_sync(). ++ */ ++ preempt_disable_notrace(); ++ /* + * If we 'fail' here, that's OK, it means recursion is already disabled + * and we won't recurse 'further'. + */ +- preempt_disable_notrace(); + rctx = perf_swevent_get_recursion_context(); + + if (event->pending_work) { + event->pending_work = 0; + perf_sigtrap(event); + local_dec(&event->ctx->nr_pending); ++ rcuwait_wake_up(&event->pending_work_wait); + } + + if (rctx >= 0) + perf_swevent_put_recursion_context(rctx); + preempt_enable_notrace(); +- +- put_event(event); + } + + #ifdef CONFIG_GUEST_PERF_EVENTS +@@ -11779,6 +11808,7 @@ perf_event_alloc(struct perf_event_attr + init_waitqueue_head(&event->waitq); + init_irq_work(&event->pending_irq, perf_pending_irq); + init_task_work(&event->pending_task, perf_pending_task); ++ rcuwait_init(&event->pending_work_wait); + + mutex_init(&event->mmap_mutex); + raw_spin_lock_init(&event->addr_filters.lock); diff --git a/queue-6.1/perf-fix-event-leak-upon-exit.patch b/queue-6.1/perf-fix-event-leak-upon-exit.patch new file mode 100644 index 00000000000..d4cc74a0e83 --- /dev/null +++ b/queue-6.1/perf-fix-event-leak-upon-exit.patch @@ -0,0 +1,88 @@ +From 2fd5ad3f310de22836cdacae919dd99d758a1f1b Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Fri, 21 Jun 2024 11:16:00 +0200 +Subject: perf: Fix event leak upon exit + +From: Frederic Weisbecker + +commit 2fd5ad3f310de22836cdacae919dd99d758a1f1b upstream. + +When a task is scheduled out, pending sigtrap deliveries are deferred +to the target task upon resume to userspace via task_work. + +However failures while adding an event's callback to the task_work +engine are ignored. And since the last call for events exit happen +after task work is eventually closed, there is a small window during +which pending sigtrap can be queued though ignored, leaking the event +refcount addition such as in the following scenario: + + TASK A + ----- + + do_exit() + exit_task_work(tsk); + + + perf_event_overflow() + event->pending_sigtrap = pending_id; + irq_work_queue(&event->pending_irq); + + =========> PREEMPTION: TASK A -> TASK B + event_sched_out() + event->pending_sigtrap = 0; + atomic_long_inc_not_zero(&event->refcount) + // FAILS: task work has exited + task_work_add(&event->pending_task) + [...] + + perf_pending_irq() + // early return: event->oncpu = -1 + + [...] + =========> TASK B -> TASK A + perf_event_exit_task(tsk) + perf_event_exit_event() + free_event() + WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1) + // leak event due to unexpected refcount == 2 + +As a result the event is never released while the task exits. + +Fix this with appropriate task_work_add()'s error handling. + +Fixes: 517e6a301f34 ("perf: Fix perf_pending_task() UaF") +Signed-off-by: Frederic Weisbecker +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240621091601.18227-4-frederic@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/events/core.c | 13 +++++-------- + 1 file changed, 5 insertions(+), 8 deletions(-) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2316,18 +2316,15 @@ event_sched_out(struct perf_event *event + } + + if (event->pending_sigtrap) { +- bool dec = true; +- + event->pending_sigtrap = 0; + if (state != PERF_EVENT_STATE_OFF && +- !event->pending_work) { +- event->pending_work = 1; +- dec = false; ++ !event->pending_work && ++ !task_work_add(current, &event->pending_task, TWA_RESUME)) { + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); +- task_work_add(current, &event->pending_task, TWA_RESUME); +- } +- if (dec) ++ event->pending_work = 1; ++ } else { + local_dec(&event->ctx->nr_pending); ++ } + } + + perf_event_set_state(event, state); diff --git a/queue-6.1/perf-x86-intel-pt-fix-a-topa_entry-base-address-calculation.patch b/queue-6.1/perf-x86-intel-pt-fix-a-topa_entry-base-address-calculation.patch new file mode 100644 index 00000000000..aea3e3b5ef7 --- /dev/null +++ b/queue-6.1/perf-x86-intel-pt-fix-a-topa_entry-base-address-calculation.patch @@ -0,0 +1,46 @@ +From ad97196379d0b8cb24ef3d5006978a6554e6467f Mon Sep 17 00:00:00 2001 +From: Adrian Hunter +Date: Mon, 24 Jun 2024 23:10:56 +0300 +Subject: perf/x86/intel/pt: Fix a topa_entry base address calculation + +From: Adrian Hunter + +commit ad97196379d0b8cb24ef3d5006978a6554e6467f upstream. + +topa_entry->base is a bit-field. Bit-fields are not promoted to a 64-bit +type, even if the underlying type is 64-bit, and so, if necessary, must +be cast to a larger type when calculations are done. + +Fix a topa_entry->base address calculation by adding a cast. + +Without the cast, the address was limited to 36-bits i.e. 64GiB. + +The address calculation is used on systems that do not support Multiple +Entry ToPA (only Broadwell), and affects physical addresses on or above +64GiB. Instead of writing to the correct address, the address comprising +the first 36 bits would be written to. + +Intel PT snapshot and sampling modes are not affected. + +Fixes: 52ca9ced3f70 ("perf/x86/intel/pt: Add Intel PT PMU driver") +Reported-by: Dave Hansen +Signed-off-by: Adrian Hunter +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240624201101.60186-3-adrian.hunter@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/pt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/events/intel/pt.c ++++ b/arch/x86/events/intel/pt.c +@@ -877,7 +877,7 @@ static void pt_update_head(struct pt *pt + */ + static void *pt_buffer_region(struct pt_buffer *buf) + { +- return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT); ++ return phys_to_virt((phys_addr_t)TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT); + } + + /** diff --git a/queue-6.1/perf-x86-intel-pt-fix-topa_entry-base-length.patch b/queue-6.1/perf-x86-intel-pt-fix-topa_entry-base-length.patch new file mode 100644 index 00000000000..1bb968c07ce --- /dev/null +++ b/queue-6.1/perf-x86-intel-pt-fix-topa_entry-base-length.patch @@ -0,0 +1,44 @@ +From 5638bd722a44bbe97c1a7b3fae5b9efddb3e70ff Mon Sep 17 00:00:00 2001 +From: Marco Cavenati +Date: Mon, 24 Jun 2024 23:10:55 +0300 +Subject: perf/x86/intel/pt: Fix topa_entry base length + +From: Marco Cavenati + +commit 5638bd722a44bbe97c1a7b3fae5b9efddb3e70ff upstream. + +topa_entry->base needs to store a pfn. It obviously needs to be +large enough to store the largest possible x86 pfn which is +MAXPHYADDR-PAGE_SIZE (52-12). So it is 4 bits too small. + +Increase the size of topa_entry->base from 36 bits to 40 bits. + +Note, systems where physical addresses can be 256TiB or more are affected. + +[ Adrian: Amend commit message as suggested by Dave Hansen ] + +Fixes: 52ca9ced3f70 ("perf/x86/intel/pt: Add Intel PT PMU driver") +Signed-off-by: Marco Cavenati +Signed-off-by: Adrian Hunter +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Adrian Hunter +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240624201101.60186-2-adrian.hunter@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/pt.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/events/intel/pt.h ++++ b/arch/x86/events/intel/pt.h +@@ -33,8 +33,8 @@ struct topa_entry { + u64 rsvd2 : 1; + u64 size : 4; + u64 rsvd3 : 2; +- u64 base : 36; +- u64 rsvd4 : 16; ++ u64 base : 40; ++ u64 rsvd4 : 12; + }; + + /* TSC to Core Crystal Clock Ratio */ diff --git a/queue-6.1/perf-x86-intel-uncore-fix-the-bits-of-the-cha-extended-umask-for-spr.patch b/queue-6.1/perf-x86-intel-uncore-fix-the-bits-of-the-cha-extended-umask-for-spr.patch new file mode 100644 index 00000000000..ecfb4f44e29 --- /dev/null +++ b/queue-6.1/perf-x86-intel-uncore-fix-the-bits-of-the-cha-extended-umask-for-spr.patch @@ -0,0 +1,70 @@ +From a5a6ff3d639d088d4af7e2935e1ee0d8b4e817d4 Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Mon, 8 Jul 2024 11:55:24 -0700 +Subject: perf/x86/intel/uncore: Fix the bits of the CHA extended umask for SPR + +From: Kan Liang + +commit a5a6ff3d639d088d4af7e2935e1ee0d8b4e817d4 upstream. + +The perf stat errors out with UNC_CHA_TOR_INSERTS.IA_HIT_CXL_ACC_LOCAL +event. + + $perf stat -e uncore_cha_55/event=0x35,umask=0x10c0008101/ -a -- ls + event syntax error: '..0x35,umask=0x10c0008101/' + \___ Bad event or PMU + +The definition of the CHA umask is config:8-15,32-55, which is 32bit. +However, the umask of the event is bigger than 32bit. +This is an error in the original uncore spec. + +Add a new umask_ext5 for the new CHA umask range. + +Fixes: 949b11381f81 ("perf/x86/intel/uncore: Add Sapphire Rapids server CHA support") +Closes: https://lore.kernel.org/linux-perf-users/alpine.LRH.2.20.2401300733310.11354@Diego/ +Signed-off-by: Kan Liang +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Ian Rogers +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20240708185524.1185505-1-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/uncore_snbep.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/x86/events/intel/uncore_snbep.c ++++ b/arch/x86/events/intel/uncore_snbep.c +@@ -459,6 +459,7 @@ + #define SPR_RAW_EVENT_MASK_EXT 0xffffff + + /* SPR CHA */ ++#define SPR_CHA_EVENT_MASK_EXT 0xffffffff + #define SPR_CHA_PMON_CTL_TID_EN (1 << 16) + #define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SPR_CHA_PMON_CTL_TID_EN) +@@ -475,6 +476,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext, uma + DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); + DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); + DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55"); ++DEFINE_UNCORE_FORMAT_ATTR(umask_ext5, umask, "config:8-15,32-63"); + DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); + DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); + DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); +@@ -5648,7 +5650,7 @@ static struct intel_uncore_ops spr_uncor + + static struct attribute *spr_uncore_cha_formats_attr[] = { + &format_attr_event.attr, +- &format_attr_umask_ext4.attr, ++ &format_attr_umask_ext5.attr, + &format_attr_tid_en2.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, +@@ -5684,7 +5686,7 @@ ATTRIBUTE_GROUPS(uncore_alias); + static struct intel_uncore_type spr_uncore_chabox = { + .name = "cha", + .event_mask = SPR_CHA_PMON_EVENT_MASK, +- .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, ++ .event_mask_ext = SPR_CHA_EVENT_MASK_EXT, + .num_shared_regs = 1, + .constraints = skx_uncore_chabox_constraints, + .ops = &spr_uncore_chabox_ops, diff --git a/queue-6.1/rtc-isl1208-fix-return-value-of-nvmem-callbacks.patch b/queue-6.1/rtc-isl1208-fix-return-value-of-nvmem-callbacks.patch new file mode 100644 index 00000000000..c99c2f6cb55 --- /dev/null +++ b/queue-6.1/rtc-isl1208-fix-return-value-of-nvmem-callbacks.patch @@ -0,0 +1,63 @@ +From 70f1ae5f0e7f44edf842444044615da7b59838c1 Mon Sep 17 00:00:00 2001 +From: Joy Chakraborty +Date: Wed, 12 Jun 2024 08:08:31 +0000 +Subject: rtc: isl1208: Fix return value of nvmem callbacks + +From: Joy Chakraborty + +commit 70f1ae5f0e7f44edf842444044615da7b59838c1 upstream. + +Read/write callbacks registered with nvmem core expect 0 to be returned +on success and a negative value to be returned on failure. + +isl1208_nvmem_read()/isl1208_nvmem_write() currently return the number of +bytes read/written on success, fix to return 0 on success and negative on +failure. + +Fixes: c3544f6f51ed ("rtc: isl1208: Add new style nvmem support to driver") +Cc: stable@vger.kernel.org +Signed-off-by: Joy Chakraborty +Link: https://lore.kernel.org/r/20240612080831.1227131-1-joychakr@google.com +Signed-off-by: Alexandre Belloni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/rtc/rtc-isl1208.c | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +--- a/drivers/rtc/rtc-isl1208.c ++++ b/drivers/rtc/rtc-isl1208.c +@@ -743,14 +743,13 @@ static int isl1208_nvmem_read(void *priv + { + struct isl1208_state *isl1208 = priv; + struct i2c_client *client = to_i2c_client(isl1208->rtc->dev.parent); +- int ret; + + /* nvmem sanitizes offset/count for us, but count==0 is possible */ + if (!count) + return count; +- ret = isl1208_i2c_read_regs(client, ISL1208_REG_USR1 + off, buf, ++ ++ return isl1208_i2c_read_regs(client, ISL1208_REG_USR1 + off, buf, + count); +- return ret == 0 ? count : ret; + } + + static int isl1208_nvmem_write(void *priv, unsigned int off, void *buf, +@@ -758,15 +757,13 @@ static int isl1208_nvmem_write(void *pri + { + struct isl1208_state *isl1208 = priv; + struct i2c_client *client = to_i2c_client(isl1208->rtc->dev.parent); +- int ret; + + /* nvmem sanitizes off/count for us, but count==0 is possible */ + if (!count) + return count; +- ret = isl1208_i2c_set_regs(client, ISL1208_REG_USR1 + off, buf, +- count); + +- return ret == 0 ? count : ret; ++ return isl1208_i2c_set_regs(client, ISL1208_REG_USR1 + off, buf, ++ count); + } + + static const struct nvmem_config isl1208_nvmem_config = { diff --git a/queue-6.1/scsi-qla2xxx-complete-command-early-within-lock.patch b/queue-6.1/scsi-qla2xxx-complete-command-early-within-lock.patch new file mode 100644 index 00000000000..ac4f87c7a3a --- /dev/null +++ b/queue-6.1/scsi-qla2xxx-complete-command-early-within-lock.patch @@ -0,0 +1,79 @@ +From 4475afa2646d3fec176fc4d011d3879b26cb26e3 Mon Sep 17 00:00:00 2001 +From: Shreyas Deodhar +Date: Wed, 10 Jul 2024 22:40:52 +0530 +Subject: scsi: qla2xxx: Complete command early within lock + +From: Shreyas Deodhar + +commit 4475afa2646d3fec176fc4d011d3879b26cb26e3 upstream. + +A crash was observed while performing NPIV and FW reset, + + BUG: kernel NULL pointer dereference, address: 000000000000001c + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: 0000 1 PREEMPT_RT SMP NOPTI + RIP: 0010:dma_direct_unmap_sg+0x51/0x1e0 + RSP: 0018:ffffc90026f47b88 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: 0000000000000021 RCX: 0000000000000002 + RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff8881041130d0 + RBP: ffff8881041130d0 R08: 0000000000000000 R09: 0000000000000034 + R10: ffffc90026f47c48 R11: 0000000000000031 R12: 0000000000000000 + R13: 0000000000000000 R14: ffff8881565e4a20 R15: 0000000000000000 + FS: 00007f4c69ed3d00(0000) GS:ffff889faac80000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000000000000001c CR3: 0000000288a50002 CR4: 00000000007706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + PKRU: 55555554 + Call Trace: + + ? __die_body+0x1a/0x60 + ? page_fault_oops+0x16f/0x4a0 + ? do_user_addr_fault+0x174/0x7f0 + ? exc_page_fault+0x69/0x1a0 + ? asm_exc_page_fault+0x22/0x30 + ? dma_direct_unmap_sg+0x51/0x1e0 + ? preempt_count_sub+0x96/0xe0 + qla2xxx_qpair_sp_free_dma+0x29f/0x3b0 [qla2xxx] + qla2xxx_qpair_sp_compl+0x60/0x80 [qla2xxx] + __qla2x00_abort_all_cmds+0xa2/0x450 [qla2xxx] + +The command completion was done early while aborting the commands in driver +unload path but outside lock to avoid the WARN_ON condition of performing +dma_free_attr within the lock. However this caused race condition while +command completion via multiple paths causing system crash. + +Hence complete the command early in unload path but within the lock to +avoid race condition. + +Fixes: 0367076b0817 ("scsi: qla2xxx: Perform lockless command completion in abort path") +Cc: stable@vger.kernel.org +Signed-off-by: Shreyas Deodhar +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-7-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_os.c | 5 ----- + 1 file changed, 5 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -1869,14 +1869,9 @@ __qla2x00_abort_all_cmds(struct qla_qpai + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { + sp = req->outstanding_cmds[cnt]; + if (sp) { +- /* +- * perform lockless completion during driver unload +- */ + if (qla2x00_chip_is_down(vha)) { + req->outstanding_cmds[cnt] = NULL; +- spin_unlock_irqrestore(qp->qp_lock_ptr, flags); + sp->done(sp, res); +- spin_lock_irqsave(qp->qp_lock_ptr, flags); + continue; + } + diff --git a/queue-6.1/scsi-qla2xxx-during-vport-delete-send-async-logout-explicitly.patch b/queue-6.1/scsi-qla2xxx-during-vport-delete-send-async-logout-explicitly.patch new file mode 100644 index 00000000000..3ac2a98c3c9 --- /dev/null +++ b/queue-6.1/scsi-qla2xxx-during-vport-delete-send-async-logout-explicitly.patch @@ -0,0 +1,73 @@ +From 76f480d7c717368f29a3870f7d64471ce0ff8fb2 Mon Sep 17 00:00:00 2001 +From: Manish Rangankar +Date: Wed, 10 Jul 2024 22:40:53 +0530 +Subject: scsi: qla2xxx: During vport delete send async logout explicitly + +From: Manish Rangankar + +commit 76f480d7c717368f29a3870f7d64471ce0ff8fb2 upstream. + +During vport delete, it is observed that during unload we hit a crash +because of stale entries in outstanding command array. For all these stale +I/O entries, eh_abort was issued and aborted (fast_fail_io = 2009h) but +I/Os could not complete while vport delete is in process of deleting. + + BUG: kernel NULL pointer dereference, address: 000000000000001c + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: 0000 [#1] PREEMPT SMP NOPTI + Workqueue: qla2xxx_wq qla_do_work [qla2xxx] + RIP: 0010:dma_direct_unmap_sg+0x51/0x1e0 + RSP: 0018:ffffa1e1e150fc68 EFLAGS: 00010046 + RAX: 0000000000000000 RBX: 0000000000000021 RCX: 0000000000000001 + RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff8ce208a7a0d0 + RBP: ffff8ce208a7a0d0 R08: 0000000000000000 R09: ffff8ce378aac9c8 + R10: ffff8ce378aac8a0 R11: ffffa1e1e150f9d8 R12: 0000000000000000 + R13: 0000000000000000 R14: ffff8ce378aac9c8 R15: 0000000000000000 + FS: 0000000000000000(0000) GS:ffff8d217f000000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000000000000001c CR3: 0000002089acc000 CR4: 0000000000350ee0 + Call Trace: + + qla2xxx_qpair_sp_free_dma+0x417/0x4e0 + ? qla2xxx_qpair_sp_compl+0x10d/0x1a0 + ? qla2x00_status_entry+0x768/0x2830 + ? newidle_balance+0x2f0/0x430 + ? dequeue_entity+0x100/0x3c0 + ? qla24xx_process_response_queue+0x6a1/0x19e0 + ? __schedule+0x2d5/0x1140 + ? qla_do_work+0x47/0x60 + ? process_one_work+0x267/0x440 + ? process_one_work+0x440/0x440 + ? worker_thread+0x2d/0x3d0 + ? process_one_work+0x440/0x440 + ? kthread+0x156/0x180 + ? set_kthread_struct+0x50/0x50 + ? ret_from_fork+0x22/0x30 + + +Send out async logout explicitly for all the ports during vport delete. + +Cc: stable@vger.kernel.org +Signed-off-by: Manish Rangankar +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-8-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_mid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/qla2xxx/qla_mid.c ++++ b/drivers/scsi/qla2xxx/qla_mid.c +@@ -180,7 +180,7 @@ qla24xx_disable_vp(scsi_qla_host_t *vha) + atomic_set(&vha->loop_state, LOOP_DOWN); + atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); + list_for_each_entry(fcport, &vha->vp_fcports, list) +- fcport->logout_on_delete = 0; ++ fcport->logout_on_delete = 1; + + if (!vha->hw->flags.edif_enabled) + qla2x00_wait_for_sess_deletion(vha); diff --git a/queue-6.1/scsi-qla2xxx-fix-flash-read-failure.patch b/queue-6.1/scsi-qla2xxx-fix-flash-read-failure.patch new file mode 100644 index 00000000000..c49e4bf97f4 --- /dev/null +++ b/queue-6.1/scsi-qla2xxx-fix-flash-read-failure.patch @@ -0,0 +1,371 @@ +From 29e222085d8907ccff18ecd931bdd4c6b1f11b92 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Wed, 10 Jul 2024 22:40:51 +0530 +Subject: scsi: qla2xxx: Fix flash read failure + +From: Quinn Tran + +commit 29e222085d8907ccff18ecd931bdd4c6b1f11b92 upstream. + +Link up failure is observed as a result of flash read failure. Current +code does not check flash read return code where it relies on FW checksum +to detect the problem. + +Add check of flash read failure to detect the problem sooner. + +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Closes: https://lore.kernel.org/all/202406210815.rPDRDMBi-lkp@intel.com/ +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-6-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_init.c | 63 ++++++++++++++++++----- + drivers/scsi/qla2xxx/qla_sup.c | 108 +++++++++++++++++++++++++++------------- + 2 files changed, 125 insertions(+), 46 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -8274,15 +8274,21 @@ qla28xx_get_aux_images( + struct qla27xx_image_status pri_aux_image_status, sec_aux_image_status; + bool valid_pri_image = false, valid_sec_image = false; + bool active_pri_image = false, active_sec_image = false; ++ int rc; + + if (!ha->flt_region_aux_img_status_pri) { + ql_dbg(ql_dbg_init, vha, 0x018a, "Primary aux image not addressed\n"); + goto check_sec_image; + } + +- qla24xx_read_flash_data(vha, (uint32_t *)&pri_aux_image_status, ++ rc = qla24xx_read_flash_data(vha, (uint32_t *)&pri_aux_image_status, + ha->flt_region_aux_img_status_pri, + sizeof(pri_aux_image_status) >> 2); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x01a1, ++ "Unable to read Primary aux image(%x).\n", rc); ++ goto check_sec_image; ++ } + qla27xx_print_image(vha, "Primary aux image", &pri_aux_image_status); + + if (qla28xx_check_aux_image_status_signature(&pri_aux_image_status)) { +@@ -8313,9 +8319,15 @@ check_sec_image: + goto check_valid_image; + } + +- qla24xx_read_flash_data(vha, (uint32_t *)&sec_aux_image_status, ++ rc = qla24xx_read_flash_data(vha, (uint32_t *)&sec_aux_image_status, + ha->flt_region_aux_img_status_sec, + sizeof(sec_aux_image_status) >> 2); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x01a2, ++ "Unable to read Secondary aux image(%x).\n", rc); ++ goto check_valid_image; ++ } ++ + qla27xx_print_image(vha, "Secondary aux image", &sec_aux_image_status); + + if (qla28xx_check_aux_image_status_signature(&sec_aux_image_status)) { +@@ -8373,6 +8385,7 @@ qla27xx_get_active_image(struct scsi_qla + struct qla27xx_image_status pri_image_status, sec_image_status; + bool valid_pri_image = false, valid_sec_image = false; + bool active_pri_image = false, active_sec_image = false; ++ int rc; + + if (!ha->flt_region_img_status_pri) { + ql_dbg(ql_dbg_init, vha, 0x018a, "Primary image not addressed\n"); +@@ -8414,8 +8427,14 @@ check_sec_image: + goto check_valid_image; + } + +- qla24xx_read_flash_data(vha, (uint32_t *)(&sec_image_status), ++ rc = qla24xx_read_flash_data(vha, (uint32_t *)(&sec_image_status), + ha->flt_region_img_status_sec, sizeof(sec_image_status) >> 2); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x01a3, ++ "Unable to read Secondary image status(%x).\n", rc); ++ goto check_valid_image; ++ } ++ + qla27xx_print_image(vha, "Secondary image", &sec_image_status); + + if (qla27xx_check_image_status_signature(&sec_image_status)) { +@@ -8487,11 +8506,10 @@ qla24xx_load_risc_flash(scsi_qla_host_t + "FW: Loading firmware from flash (%x).\n", faddr); + + dcode = (uint32_t *)req->ring; +- qla24xx_read_flash_data(vha, dcode, faddr, 8); +- if (qla24xx_risc_firmware_invalid(dcode)) { ++ rval = qla24xx_read_flash_data(vha, dcode, faddr, 8); ++ if (rval || qla24xx_risc_firmware_invalid(dcode)) { + ql_log(ql_log_fatal, vha, 0x008c, +- "Unable to verify the integrity of flash firmware " +- "image.\n"); ++ "Unable to verify the integrity of flash firmware image (rval %x).\n", rval); + ql_log(ql_log_fatal, vha, 0x008d, + "Firmware data: %08x %08x %08x %08x.\n", + dcode[0], dcode[1], dcode[2], dcode[3]); +@@ -8505,7 +8523,12 @@ qla24xx_load_risc_flash(scsi_qla_host_t + for (j = 0; j < segments; j++) { + ql_dbg(ql_dbg_init, vha, 0x008d, + "-> Loading segment %u...\n", j); +- qla24xx_read_flash_data(vha, dcode, faddr, 10); ++ rval = qla24xx_read_flash_data(vha, dcode, faddr, 10); ++ if (rval) { ++ ql_log(ql_log_fatal, vha, 0x016a, ++ "-> Unable to read segment addr + size .\n"); ++ return QLA_FUNCTION_FAILED; ++ } + risc_addr = be32_to_cpu((__force __be32)dcode[2]); + risc_size = be32_to_cpu((__force __be32)dcode[3]); + if (!*srisc_addr) { +@@ -8521,7 +8544,13 @@ qla24xx_load_risc_flash(scsi_qla_host_t + ql_dbg(ql_dbg_init, vha, 0x008e, + "-> Loading fragment %u: %#x <- %#x (%#lx dwords)...\n", + fragment, risc_addr, faddr, dlen); +- qla24xx_read_flash_data(vha, dcode, faddr, dlen); ++ rval = qla24xx_read_flash_data(vha, dcode, faddr, dlen); ++ if (rval) { ++ ql_log(ql_log_fatal, vha, 0x016b, ++ "-> Unable to read fragment(faddr %#x dlen %#lx).\n", ++ faddr, dlen); ++ return QLA_FUNCTION_FAILED; ++ } + for (i = 0; i < dlen; i++) + dcode[i] = swab32(dcode[i]); + +@@ -8550,7 +8579,14 @@ qla24xx_load_risc_flash(scsi_qla_host_t + fwdt->length = 0; + + dcode = (uint32_t *)req->ring; +- qla24xx_read_flash_data(vha, dcode, faddr, 7); ++ ++ rval = qla24xx_read_flash_data(vha, dcode, faddr, 7); ++ if (rval) { ++ ql_log(ql_log_fatal, vha, 0x016c, ++ "-> Unable to read template size.\n"); ++ goto failed; ++ } ++ + risc_size = be32_to_cpu((__force __be32)dcode[2]); + ql_dbg(ql_dbg_init, vha, 0x0161, + "-> fwdt%u template array at %#x (%#x dwords)\n", +@@ -8576,11 +8612,12 @@ qla24xx_load_risc_flash(scsi_qla_host_t + } + + dcode = fwdt->template; +- qla24xx_read_flash_data(vha, dcode, faddr, risc_size); ++ rval = qla24xx_read_flash_data(vha, dcode, faddr, risc_size); + +- if (!qla27xx_fwdt_template_valid(dcode)) { ++ if (rval || !qla27xx_fwdt_template_valid(dcode)) { + ql_log(ql_log_warn, vha, 0x0165, +- "-> fwdt%u failed template validate\n", j); ++ "-> fwdt%u failed template validate (rval %x)\n", ++ j, rval); + goto failed; + } + +--- a/drivers/scsi/qla2xxx/qla_sup.c ++++ b/drivers/scsi/qla2xxx/qla_sup.c +@@ -555,6 +555,7 @@ qla2xxx_find_flt_start(scsi_qla_host_t * + struct qla_flt_location *fltl = (void *)req->ring; + uint32_t *dcode = (uint32_t *)req->ring; + uint8_t *buf = (void *)req->ring, *bcode, last_image; ++ int rc; + + /* + * FLT-location structure resides after the last PCI region. +@@ -584,14 +585,24 @@ qla2xxx_find_flt_start(scsi_qla_host_t * + pcihdr = 0; + do { + /* Verify PCI expansion ROM header. */ +- qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, 0x20); ++ rc = qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, 0x20); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x016d, ++ "Unable to read PCI Expansion Rom Header (%x).\n", rc); ++ return QLA_FUNCTION_FAILED; ++ } + bcode = buf + (pcihdr % 4); + if (bcode[0x0] != 0x55 || bcode[0x1] != 0xaa) + goto end; + + /* Locate PCI data structure. */ + pcids = pcihdr + ((bcode[0x19] << 8) | bcode[0x18]); +- qla24xx_read_flash_data(vha, dcode, pcids >> 2, 0x20); ++ rc = qla24xx_read_flash_data(vha, dcode, pcids >> 2, 0x20); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x0179, ++ "Unable to read PCI Data Structure (%x).\n", rc); ++ return QLA_FUNCTION_FAILED; ++ } + bcode = buf + (pcihdr % 4); + + /* Validate signature of PCI data structure. */ +@@ -606,7 +617,12 @@ qla2xxx_find_flt_start(scsi_qla_host_t * + } while (!last_image); + + /* Now verify FLT-location structure. */ +- qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, sizeof(*fltl) >> 2); ++ rc = qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, sizeof(*fltl) >> 2); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x017a, ++ "Unable to read FLT (%x).\n", rc); ++ return QLA_FUNCTION_FAILED; ++ } + if (memcmp(fltl->sig, "QFLT", 4)) + goto end; + +@@ -2605,13 +2621,18 @@ qla24xx_read_optrom_data(struct scsi_qla + uint32_t offset, uint32_t length) + { + struct qla_hw_data *ha = vha->hw; ++ int rc; + + /* Suspend HBA. */ + scsi_block_requests(vha->host); + set_bit(MBX_UPDATE_FLASH_ACTIVE, &ha->mbx_cmd_flags); + + /* Go with read. */ +- qla24xx_read_flash_data(vha, buf, offset >> 2, length >> 2); ++ rc = qla24xx_read_flash_data(vha, buf, offset >> 2, length >> 2); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x01a0, ++ "Unable to perform optrom read(%x).\n", rc); ++ } + + /* Resume HBA. */ + clear_bit(MBX_UPDATE_FLASH_ACTIVE, &ha->mbx_cmd_flags); +@@ -3412,7 +3433,7 @@ qla24xx_get_flash_version(scsi_qla_host_ + struct active_regions active_regions = { }; + + if (IS_P3P_TYPE(ha)) +- return ret; ++ return QLA_SUCCESS; + + if (!mbuf) + return QLA_FUNCTION_FAILED; +@@ -3432,20 +3453,31 @@ qla24xx_get_flash_version(scsi_qla_host_ + + do { + /* Verify PCI expansion ROM header. */ +- qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, 0x20); ++ ret = qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, 0x20); ++ if (ret) { ++ ql_log(ql_log_info, vha, 0x017d, ++ "Unable to read PCI EXP Rom Header(%x).\n", ret); ++ return QLA_FUNCTION_FAILED; ++ } ++ + bcode = mbuf + (pcihdr % 4); + if (memcmp(bcode, "\x55\xaa", 2)) { + /* No signature */ + ql_log(ql_log_fatal, vha, 0x0059, + "No matching ROM signature.\n"); +- ret = QLA_FUNCTION_FAILED; +- break; ++ return QLA_FUNCTION_FAILED; + } + + /* Locate PCI data structure. */ + pcids = pcihdr + ((bcode[0x19] << 8) | bcode[0x18]); + +- qla24xx_read_flash_data(vha, dcode, pcids >> 2, 0x20); ++ ret = qla24xx_read_flash_data(vha, dcode, pcids >> 2, 0x20); ++ if (ret) { ++ ql_log(ql_log_info, vha, 0x018e, ++ "Unable to read PCI Data Structure (%x).\n", ret); ++ return QLA_FUNCTION_FAILED; ++ } ++ + bcode = mbuf + (pcihdr % 4); + + /* Validate signature of PCI data structure. */ +@@ -3454,8 +3486,7 @@ qla24xx_get_flash_version(scsi_qla_host_ + ql_log(ql_log_fatal, vha, 0x005a, + "PCI data struct not found pcir_adr=%x.\n", pcids); + ql_dump_buffer(ql_dbg_init, vha, 0x0059, dcode, 32); +- ret = QLA_FUNCTION_FAILED; +- break; ++ return QLA_FUNCTION_FAILED; + } + + /* Read version */ +@@ -3507,20 +3538,26 @@ qla24xx_get_flash_version(scsi_qla_host_ + faddr = ha->flt_region_fw_sec; + } + +- qla24xx_read_flash_data(vha, dcode, faddr, 8); +- if (qla24xx_risc_firmware_invalid(dcode)) { +- ql_log(ql_log_warn, vha, 0x005f, +- "Unrecognized fw revision at %x.\n", +- ha->flt_region_fw * 4); +- ql_dump_buffer(ql_dbg_init, vha, 0x005f, dcode, 32); ++ ret = qla24xx_read_flash_data(vha, dcode, faddr, 8); ++ if (ret) { ++ ql_log(ql_log_info, vha, 0x019e, ++ "Unable to read FW version (%x).\n", ret); ++ return ret; + } else { +- for (i = 0; i < 4; i++) +- ha->fw_revision[i] = ++ if (qla24xx_risc_firmware_invalid(dcode)) { ++ ql_log(ql_log_warn, vha, 0x005f, ++ "Unrecognized fw revision at %x.\n", ++ ha->flt_region_fw * 4); ++ ql_dump_buffer(ql_dbg_init, vha, 0x005f, dcode, 32); ++ } else { ++ for (i = 0; i < 4; i++) ++ ha->fw_revision[i] = + be32_to_cpu((__force __be32)dcode[4+i]); +- ql_dbg(ql_dbg_init, vha, 0x0060, +- "Firmware revision (flash) %u.%u.%u (%x).\n", +- ha->fw_revision[0], ha->fw_revision[1], +- ha->fw_revision[2], ha->fw_revision[3]); ++ ql_dbg(ql_dbg_init, vha, 0x0060, ++ "Firmware revision (flash) %u.%u.%u (%x).\n", ++ ha->fw_revision[0], ha->fw_revision[1], ++ ha->fw_revision[2], ha->fw_revision[3]); ++ } + } + + /* Check for golden firmware and get version if available */ +@@ -3531,18 +3568,23 @@ qla24xx_get_flash_version(scsi_qla_host_ + + memset(ha->gold_fw_version, 0, sizeof(ha->gold_fw_version)); + faddr = ha->flt_region_gold_fw; +- qla24xx_read_flash_data(vha, dcode, ha->flt_region_gold_fw, 8); +- if (qla24xx_risc_firmware_invalid(dcode)) { +- ql_log(ql_log_warn, vha, 0x0056, +- "Unrecognized golden fw at %#x.\n", faddr); +- ql_dump_buffer(ql_dbg_init, vha, 0x0056, dcode, 32); ++ ret = qla24xx_read_flash_data(vha, dcode, ha->flt_region_gold_fw, 8); ++ if (ret) { ++ ql_log(ql_log_info, vha, 0x019f, ++ "Unable to read Gold FW version (%x).\n", ret); + return ret; +- } +- +- for (i = 0; i < 4; i++) +- ha->gold_fw_version[i] = +- be32_to_cpu((__force __be32)dcode[4+i]); ++ } else { ++ if (qla24xx_risc_firmware_invalid(dcode)) { ++ ql_log(ql_log_warn, vha, 0x0056, ++ "Unrecognized golden fw at %#x.\n", faddr); ++ ql_dump_buffer(ql_dbg_init, vha, 0x0056, dcode, 32); ++ return QLA_FUNCTION_FAILED; ++ } + ++ for (i = 0; i < 4; i++) ++ ha->gold_fw_version[i] = ++ be32_to_cpu((__force __be32)dcode[4+i]); ++ } + return ret; + } + diff --git a/queue-6.1/scsi-qla2xxx-fix-for-possible-memory-corruption.patch b/queue-6.1/scsi-qla2xxx-fix-for-possible-memory-corruption.patch new file mode 100644 index 00000000000..d9f1adffb7e --- /dev/null +++ b/queue-6.1/scsi-qla2xxx-fix-for-possible-memory-corruption.patch @@ -0,0 +1,33 @@ +From c03d740152f78e86945a75b2ad541bf972fab92a Mon Sep 17 00:00:00 2001 +From: Shreyas Deodhar +Date: Wed, 10 Jul 2024 22:40:49 +0530 +Subject: scsi: qla2xxx: Fix for possible memory corruption + +From: Shreyas Deodhar + +commit c03d740152f78e86945a75b2ad541bf972fab92a upstream. + +Init Control Block is dereferenced incorrectly. Correctly dereference ICB + +Cc: stable@vger.kernel.org +Signed-off-by: Shreyas Deodhar +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-4-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_os.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -4667,7 +4667,7 @@ static void + qla2x00_number_of_exch(scsi_qla_host_t *vha, u32 *ret_cnt, u16 max_cnt) + { + u32 temp; +- struct init_cb_81xx *icb = (struct init_cb_81xx *)&vha->hw->init_cb; ++ struct init_cb_81xx *icb = (struct init_cb_81xx *)vha->hw->init_cb; + *ret_cnt = FW_DEF_EXCHANGES_CNT; + + if (max_cnt > vha->hw->max_exchg) diff --git a/queue-6.1/scsi-qla2xxx-unable-to-act-on-rscn-for-port-online.patch b/queue-6.1/scsi-qla2xxx-unable-to-act-on-rscn-for-port-online.patch new file mode 100644 index 00000000000..bacf552a56c --- /dev/null +++ b/queue-6.1/scsi-qla2xxx-unable-to-act-on-rscn-for-port-online.patch @@ -0,0 +1,234 @@ +From c3d98b12eef8db436e32f1a8c5478be57dc15621 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Wed, 10 Jul 2024 22:40:47 +0530 +Subject: scsi: qla2xxx: Unable to act on RSCN for port online + +From: Quinn Tran + +commit c3d98b12eef8db436e32f1a8c5478be57dc15621 upstream. + +The device does not come online when the target port is online. There were +multiple RSCNs indicating multiple devices were affected. Driver is in the +process of finishing a fabric scan. A new RSCN (device up) arrived at the +tail end of the last fabric scan. Driver mistakenly thinks the new RSCN is +being taken care of by the previous fabric scan, where this notification is +cleared and not acted on. The laser needs to be blinked again to get the +device to show up. + +To prevent driver from accidentally clearing the RSCN notification, each +RSCN is given a generation value. A fabric scan will scan for that +generation(s). Any new RSCN arrive after the scan start will have a new +generation value. This will trigger another scan to get latest data. The +RSCN notification flag will be cleared when the scan is associate to that +generation. + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202406210538.w875N70K-lkp@intel.com/ +Fixes: bb2ca6b3f09a ("scsi: qla2xxx: Relogin during fabric disturbance") +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-2-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_def.h | 3 +++ + drivers/scsi/qla2xxx/qla_gs.c | 33 ++++++++++++++++++++++++++++++--- + drivers/scsi/qla2xxx/qla_init.c | 24 +++++++++++++++++++----- + drivers/scsi/qla2xxx/qla_inline.h | 8 ++++++++ + 4 files changed, 60 insertions(+), 8 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_def.h ++++ b/drivers/scsi/qla2xxx/qla_def.h +@@ -3278,6 +3278,8 @@ struct fab_scan_rp { + struct fab_scan { + struct fab_scan_rp *l; + u32 size; ++ u32 rscn_gen_start; ++ u32 rscn_gen_end; + u16 scan_retry; + #define MAX_SCAN_RETRIES 5 + enum scan_flags_t scan_flags; +@@ -4985,6 +4987,7 @@ typedef struct scsi_qla_host { + + /* Counter to detect races between ELS and RSCN events */ + atomic_t generation_tick; ++ atomic_t rscn_gen; + /* Time when global fcport update has been scheduled */ + int total_fcport_update_gen; + /* List of pending LOGOs, protected by tgt_mutex */ +--- a/drivers/scsi/qla2xxx/qla_gs.c ++++ b/drivers/scsi/qla2xxx/qla_gs.c +@@ -3465,6 +3465,29 @@ static int qla2x00_is_a_vp(scsi_qla_host + return rc; + } + ++static bool qla_ok_to_clear_rscn(scsi_qla_host_t *vha, fc_port_t *fcport) ++{ ++ u32 rscn_gen; ++ ++ rscn_gen = atomic_read(&vha->rscn_gen); ++ ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x2017, ++ "%s %d %8phC rscn_gen %x start %x end %x current %x\n", ++ __func__, __LINE__, fcport->port_name, fcport->rscn_gen, ++ vha->scan.rscn_gen_start, vha->scan.rscn_gen_end, rscn_gen); ++ ++ if (val_is_in_range(fcport->rscn_gen, vha->scan.rscn_gen_start, ++ vha->scan.rscn_gen_end)) ++ /* rscn came in before fabric scan */ ++ return true; ++ ++ if (val_is_in_range(fcport->rscn_gen, vha->scan.rscn_gen_end, rscn_gen)) ++ /* rscn came in after fabric scan */ ++ return false; ++ ++ /* rare: fcport's scan_needed + rscn_gen must be stale */ ++ return true; ++} ++ + void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp) + { + fc_port_t *fcport; +@@ -3578,10 +3601,10 @@ void qla24xx_async_gnnft_done(scsi_qla_h + (fcport->scan_needed && + fcport->port_type != FCT_INITIATOR && + fcport->port_type != FCT_NVME_INITIATOR)) { ++ fcport->scan_needed = 0; + qlt_schedule_sess_for_deletion(fcport); + } + fcport->d_id.b24 = rp->id.b24; +- fcport->scan_needed = 0; + break; + } + +@@ -3622,7 +3645,9 @@ login_logout: + do_delete = true; + } + +- fcport->scan_needed = 0; ++ if (qla_ok_to_clear_rscn(vha, fcport)) ++ fcport->scan_needed = 0; ++ + if (((qla_dual_mode_enabled(vha) || + qla_ini_mode_enabled(vha)) && + atomic_read(&fcport->state) == FCS_ONLINE) || +@@ -3652,7 +3677,9 @@ login_logout: + fcport->port_name, fcport->loop_id, + fcport->login_retry); + } +- fcport->scan_needed = 0; ++ ++ if (qla_ok_to_clear_rscn(vha, fcport)) ++ fcport->scan_needed = 0; + qla24xx_fcport_handle_login(vha, fcport); + } + } +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -1843,10 +1843,18 @@ int qla24xx_post_newsess_work(struct scs + return qla2x00_post_work(vha, e); + } + ++static void qla_rscn_gen_tick(scsi_qla_host_t *vha, u32 *ret_rscn_gen) ++{ ++ *ret_rscn_gen = atomic_inc_return(&vha->rscn_gen); ++ /* memory barrier */ ++ wmb(); ++} ++ + void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea) + { + fc_port_t *fcport; + unsigned long flags; ++ u32 rscn_gen; + + switch (ea->id.b.rsvd_1) { + case RSCN_PORT_ADDR: +@@ -1876,15 +1884,16 @@ void qla2x00_handle_rscn(scsi_qla_host_t + * Otherwise we're already in the middle of a relogin + */ + fcport->scan_needed = 1; +- fcport->rscn_gen++; ++ qla_rscn_gen_tick(vha, &fcport->rscn_gen); + } + } else { + fcport->scan_needed = 1; +- fcport->rscn_gen++; ++ qla_rscn_gen_tick(vha, &fcport->rscn_gen); + } + } + break; + case RSCN_AREA_ADDR: ++ qla_rscn_gen_tick(vha, &rscn_gen); + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->flags & FCF_FCP2_DEVICE && + atomic_read(&fcport->state) == FCS_ONLINE) +@@ -1892,11 +1901,12 @@ void qla2x00_handle_rscn(scsi_qla_host_t + + if ((ea->id.b24 & 0xffff00) == (fcport->d_id.b24 & 0xffff00)) { + fcport->scan_needed = 1; +- fcport->rscn_gen++; ++ fcport->rscn_gen = rscn_gen; + } + } + break; + case RSCN_DOM_ADDR: ++ qla_rscn_gen_tick(vha, &rscn_gen); + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->flags & FCF_FCP2_DEVICE && + atomic_read(&fcport->state) == FCS_ONLINE) +@@ -1904,19 +1914,20 @@ void qla2x00_handle_rscn(scsi_qla_host_t + + if ((ea->id.b24 & 0xff0000) == (fcport->d_id.b24 & 0xff0000)) { + fcport->scan_needed = 1; +- fcport->rscn_gen++; ++ fcport->rscn_gen = rscn_gen; + } + } + break; + case RSCN_FAB_ADDR: + default: ++ qla_rscn_gen_tick(vha, &rscn_gen); + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->flags & FCF_FCP2_DEVICE && + atomic_read(&fcport->state) == FCS_ONLINE) + continue; + + fcport->scan_needed = 1; +- fcport->rscn_gen++; ++ fcport->rscn_gen = rscn_gen; + } + break; + } +@@ -1925,6 +1936,7 @@ void qla2x00_handle_rscn(scsi_qla_host_t + if (vha->scan.scan_flags == 0) { + ql_dbg(ql_dbg_disc, vha, 0xffff, "%s: schedule\n", __func__); + vha->scan.scan_flags |= SF_QUEUED; ++ vha->scan.rscn_gen_start = atomic_read(&vha->rscn_gen); + schedule_delayed_work(&vha->scan.scan_work, 5); + } + spin_unlock_irqrestore(&vha->work_lock, flags); +@@ -6419,6 +6431,8 @@ qla2x00_configure_fabric(scsi_qla_host_t + qlt_do_generation_tick(vha, &discovery_gen); + + if (USE_ASYNC_SCAN(ha)) { ++ /* start of scan begins here */ ++ vha->scan.rscn_gen_end = atomic_read(&vha->rscn_gen); + rval = qla24xx_async_gpnft(vha, FC4_TYPE_FCP_SCSI, + NULL); + if (rval) +--- a/drivers/scsi/qla2xxx/qla_inline.h ++++ b/drivers/scsi/qla2xxx/qla_inline.h +@@ -631,3 +631,11 @@ static inline int qla_mapq_alloc_qp_cpu_ + } + return 0; + } ++ ++static inline bool val_is_in_range(u32 val, u32 start, u32 end) ++{ ++ if (val >= start && val <= end) ++ return true; ++ else ++ return false; ++} diff --git a/queue-6.1/scsi-qla2xxx-use-qp-lock-to-search-for-bsg.patch b/queue-6.1/scsi-qla2xxx-use-qp-lock-to-search-for-bsg.patch new file mode 100644 index 00000000000..888943be4e6 --- /dev/null +++ b/queue-6.1/scsi-qla2xxx-use-qp-lock-to-search-for-bsg.patch @@ -0,0 +1,149 @@ +From c449b4198701d828e40d60a2abd30970b74a1d75 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Wed, 10 Jul 2024 22:40:56 +0530 +Subject: scsi: qla2xxx: Use QP lock to search for bsg + +From: Quinn Tran + +commit c449b4198701d828e40d60a2abd30970b74a1d75 upstream. + +On bsg timeout, hardware_lock is used as part of search for the srb. +Instead, qpair lock should be used to iterate through different qpair. + +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-11-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_bsg.c | 96 ++++++++++++++++++++++++----------------- + 1 file changed, 57 insertions(+), 39 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_bsg.c ++++ b/drivers/scsi/qla2xxx/qla_bsg.c +@@ -3059,17 +3059,61 @@ skip_chip_chk: + return ret; + } + +-int +-qla24xx_bsg_timeout(struct bsg_job *bsg_job) ++static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job) + { ++ bool found = false; + struct fc_bsg_reply *bsg_reply = bsg_job->reply; + scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job)); + struct qla_hw_data *ha = vha->hw; +- srb_t *sp; +- int cnt, que; ++ srb_t *sp = NULL; ++ int cnt; + unsigned long flags; + struct req_que *req; + ++ spin_lock_irqsave(qpair->qp_lock_ptr, flags); ++ req = qpair->req; ++ ++ for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { ++ sp = req->outstanding_cmds[cnt]; ++ if (sp && ++ (sp->type == SRB_CT_CMD || ++ sp->type == SRB_ELS_CMD_HST || ++ sp->type == SRB_ELS_CMD_HST_NOLOGIN) && ++ sp->u.bsg_job == bsg_job) { ++ req->outstanding_cmds[cnt] = NULL; ++ spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); ++ ++ if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) { ++ ql_log(ql_log_warn, vha, 0x7089, ++ "mbx abort_command failed.\n"); ++ bsg_reply->result = -EIO; ++ } else { ++ ql_dbg(ql_dbg_user, vha, 0x708a, ++ "mbx abort_command success.\n"); ++ bsg_reply->result = 0; ++ } ++ /* ref: INIT */ ++ kref_put(&sp->cmd_kref, qla2x00_sp_release); ++ ++ found = true; ++ goto done; ++ } ++ } ++ spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); ++ ++done: ++ return found; ++} ++ ++int ++qla24xx_bsg_timeout(struct bsg_job *bsg_job) ++{ ++ struct fc_bsg_reply *bsg_reply = bsg_job->reply; ++ scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job)); ++ struct qla_hw_data *ha = vha->hw; ++ int i; ++ struct qla_qpair *qpair; ++ + ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n", + __func__, bsg_job); + +@@ -3079,48 +3123,22 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_ + qla_pci_set_eeh_busy(vha); + } + ++ if (qla_bsg_found(ha->base_qpair, bsg_job)) ++ goto done; ++ + /* find the bsg job from the active list of commands */ +- spin_lock_irqsave(&ha->hardware_lock, flags); +- for (que = 0; que < ha->max_req_queues; que++) { +- req = ha->req_q_map[que]; +- if (!req) ++ for (i = 0; i < ha->max_qpairs; i++) { ++ qpair = vha->hw->queue_pair_map[i]; ++ if (!qpair) + continue; +- +- for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { +- sp = req->outstanding_cmds[cnt]; +- if (sp && +- (sp->type == SRB_CT_CMD || +- sp->type == SRB_ELS_CMD_HST || +- sp->type == SRB_ELS_CMD_HST_NOLOGIN || +- sp->type == SRB_FXIOCB_BCMD) && +- sp->u.bsg_job == bsg_job) { +- req->outstanding_cmds[cnt] = NULL; +- spin_unlock_irqrestore(&ha->hardware_lock, flags); +- +- if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) { +- ql_log(ql_log_warn, vha, 0x7089, +- "mbx abort_command failed.\n"); +- bsg_reply->result = -EIO; +- } else { +- ql_dbg(ql_dbg_user, vha, 0x708a, +- "mbx abort_command success.\n"); +- bsg_reply->result = 0; +- } +- spin_lock_irqsave(&ha->hardware_lock, flags); +- goto done; +- +- } +- } ++ if (qla_bsg_found(qpair, bsg_job)) ++ goto done; + } +- spin_unlock_irqrestore(&ha->hardware_lock, flags); ++ + ql_log(ql_log_info, vha, 0x708b, "SRB not found to abort.\n"); + bsg_reply->result = -ENXIO; +- return 0; + + done: +- spin_unlock_irqrestore(&ha->hardware_lock, flags); +- /* ref: INIT */ +- kref_put(&sp->cmd_kref, qla2x00_sp_release); + return 0; + } + diff --git a/queue-6.1/scsi-qla2xxx-validate-nvme_local_port-correctly.patch b/queue-6.1/scsi-qla2xxx-validate-nvme_local_port-correctly.patch new file mode 100644 index 00000000000..18feeae68c4 --- /dev/null +++ b/queue-6.1/scsi-qla2xxx-validate-nvme_local_port-correctly.patch @@ -0,0 +1,60 @@ +From eb1d4ce2609584eeb7694866f34d4b213caa3af9 Mon Sep 17 00:00:00 2001 +From: Nilesh Javali +Date: Wed, 10 Jul 2024 22:40:48 +0530 +Subject: scsi: qla2xxx: validate nvme_local_port correctly + +From: Nilesh Javali + +commit eb1d4ce2609584eeb7694866f34d4b213caa3af9 upstream. + +The driver load failed with error message, + +qla2xxx [0000:04:00.0]-ffff:0: register_localport failed: ret=ffffffef + +and with a kernel crash, + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000070 + Workqueue: events_unbound qla_register_fcport_fn [qla2xxx] + RIP: 0010:nvme_fc_register_remoteport+0x16/0x430 [nvme_fc] + RSP: 0018:ffffaaa040eb3d98 EFLAGS: 00010282 + RAX: 0000000000000000 RBX: ffff9dfb46b78c00 RCX: 0000000000000000 + RDX: ffff9dfb46b78da8 RSI: ffffaaa040eb3e08 RDI: 0000000000000000 + RBP: ffff9dfb612a0a58 R08: ffffffffaf1d6270 R09: 3a34303a30303030 + R10: 34303a303030305b R11: 2078787832616c71 R12: ffff9dfb46b78dd4 + R13: ffff9dfb46b78c24 R14: ffff9dfb41525300 R15: ffff9dfb46b78da8 + FS: 0000000000000000(0000) GS:ffff9dfc67c00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000070 CR3: 000000018da10004 CR4: 00000000000206f0 + Call Trace: + qla_nvme_register_remote+0xeb/0x1f0 [qla2xxx] + ? qla2x00_dfs_create_rport+0x231/0x270 [qla2xxx] + qla2x00_update_fcport+0x2a1/0x3c0 [qla2xxx] + qla_register_fcport_fn+0x54/0xc0 [qla2xxx] + +Exit the qla_nvme_register_remote() function when qla_nvme_register_hba() +fails and correctly validate nvme_local_port. + +Cc: stable@vger.kernel.org +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-3-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_nvme.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/qla2xxx/qla_nvme.c ++++ b/drivers/scsi/qla2xxx/qla_nvme.c +@@ -29,7 +29,10 @@ int qla_nvme_register_remote(struct scsi + return 0; + } + +- if (!vha->nvme_local_port && qla_nvme_register_hba(vha)) ++ if (qla_nvme_register_hba(vha)) ++ return 0; ++ ++ if (!vha->nvme_local_port) + return 0; + + if (!(fcport->nvme_prli_service_param & diff --git a/queue-6.1/series b/queue-6.1/series index d9f03a4b392..8ab46869204 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -344,3 +344,21 @@ devres-fix-memory-leakage-caused-by-driver-api-devm_free_percpu.patch irqchip-imx-irqsteer-handle-runtime-power-management-correctly.patch mm-numa_balancing-teach-mpol_to_str-about-the-balancing-mode.patch rtc-cmos-fix-return-value-of-nvmem-callbacks.patch +scsi-qla2xxx-during-vport-delete-send-async-logout-explicitly.patch +scsi-qla2xxx-unable-to-act-on-rscn-for-port-online.patch +scsi-qla2xxx-fix-for-possible-memory-corruption.patch +scsi-qla2xxx-use-qp-lock-to-search-for-bsg.patch +scsi-qla2xxx-fix-flash-read-failure.patch +scsi-qla2xxx-complete-command-early-within-lock.patch +scsi-qla2xxx-validate-nvme_local_port-correctly.patch +perf-fix-event-leak-upon-exit.patch +perf-fix-event-leak-upon-exec-and-file-release.patch +perf-x86-intel-uncore-fix-the-bits-of-the-cha-extended-umask-for-spr.patch +perf-x86-intel-pt-fix-topa_entry-base-length.patch +perf-x86-intel-pt-fix-a-topa_entry-base-address-calculation.patch +drm-i915-gt-do-not-consider-preemption-during-execlists_dequeue-for-gen8.patch +drm-amdgpu-sdma5.2-update-wptr-registers-as-well-as-doorbell.patch +drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-suspend-resume.patch +drm-i915-dp-reset-intel_dp-link_trained-before-retraining-the-link.patch +rtc-isl1208-fix-return-value-of-nvmem-callbacks.patch +watchdog-perf-properly-initialize-the-turbo-mode-timestamp-and-rearm-counter.patch diff --git a/queue-6.1/watchdog-perf-properly-initialize-the-turbo-mode-timestamp-and-rearm-counter.patch b/queue-6.1/watchdog-perf-properly-initialize-the-turbo-mode-timestamp-and-rearm-counter.patch new file mode 100644 index 00000000000..2340937a0cb --- /dev/null +++ b/queue-6.1/watchdog-perf-properly-initialize-the-turbo-mode-timestamp-and-rearm-counter.patch @@ -0,0 +1,68 @@ +From f944ffcbc2e1c759764850261670586ddf3bdabb Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 11 Jul 2024 22:25:21 +0200 +Subject: watchdog/perf: properly initialize the turbo mode timestamp and rearm counter + +From: Thomas Gleixner + +commit f944ffcbc2e1c759764850261670586ddf3bdabb upstream. + +For systems on which the performance counter can expire early due to turbo +modes the watchdog handler has a safety net in place which validates that +since the last watchdog event there has at least 4/5th of the watchdog +period elapsed. + +This works reliably only after the first watchdog event because the per +CPU variable which holds the timestamp of the last event is never +initialized. + +So a first spurious event will validate against a timestamp of 0 which +results in a delta which is likely to be way over the 4/5 threshold of the +period. As this might happen before the first watchdog hrtimer event +increments the watchdog counter, this can lead to false positives. + +Fix this by initializing the timestamp before enabling the hardware event. +Reset the rearm counter as well, as that might be non zero after the +watchdog was disabled and reenabled. + +Link: https://lkml.kernel.org/r/87frsfu15a.ffs@tglx +Fixes: 7edaeb6841df ("kernel/watchdog: Prevent false positives with turbo modes") +Signed-off-by: Thomas Gleixner +Cc: Arjan van de Ven +Cc: Peter Zijlstra +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/watchdog_hld.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/kernel/watchdog_hld.c ++++ b/kernel/watchdog_hld.c +@@ -91,11 +91,15 @@ static bool watchdog_check_timestamp(voi + __this_cpu_write(last_timestamp, now); + return true; + } +-#else +-static inline bool watchdog_check_timestamp(void) ++ ++static void watchdog_init_timestamp(void) + { +- return true; ++ __this_cpu_write(nmi_rearmed, 0); ++ __this_cpu_write(last_timestamp, ktime_get_mono_fast_ns()); + } ++#else ++static inline bool watchdog_check_timestamp(void) { return true; } ++static inline void watchdog_init_timestamp(void) { } + #endif + + static struct perf_event_attr wd_hw_attr = { +@@ -196,6 +200,7 @@ void hardlockup_detector_perf_enable(voi + if (!atomic_fetch_inc(&watchdog_cpus)) + pr_info("Enabled. Permanently consumes one hw-PMU counter.\n"); + ++ watchdog_init_timestamp(); + perf_event_enable(this_cpu_read(watchdog_ev)); + } +