From: Greg Kroah-Hartman Date: Tue, 30 Jul 2024 10:31:06 +0000 (+0200) Subject: 6.10-stable patches X-Git-Tag: v6.1.103~37 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3fcfe23fa8c9bb467c10bf7a8846dc37e4321a03;p=thirdparty%2Fkernel%2Fstable-queue.git 6.10-stable patches added patches: asoc-codecs-wcd939x-fix-typec-mux-and-switch-leak-during-device-removal.patch asoc-sof-ipc4-topology-use-correct-queue_id-for-requesting-input-pin-format.patch bus-mhi-ep-do-not-allocate-memory-for-mhi-objects-from-dma-zone.patch crypto-ccp-fix-null-pointer-dereference-in-__sev_snp_shutdown_locked.patch dm-verity-fix-dm_is_verity_target-when-dm-verity-is-builtin.patch drm-amd-amdgpu-fix-uninitialized-variable-warnings.patch drm-amdgpu-add-missed-harvest-check-for-vcn-ip-v4-v5.patch drm-amdgpu-reset-vm-state-machine-after-gpu-reset-vram-lost.patch drm-amdgpu-sdma5.2-update-wptr-registers-as-well-as-doorbell.patch drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-suspend-resume.patch drm-etnaviv-don-t-block-scheduler-when-gpu-is-still-active.patch drm-i915-dp-don-t-switch-the-lttpr-mode-on-an-active-link.patch drm-i915-dp-reset-intel_dp-link_trained-before-retraining-the-link.patch drm-i915-gt-do-not-consider-preemption-during-execlists_dequeue-for-gen8.patch drm-panfrost-mark-simple_ondemand-governor-as-softdep.patch drm-udl-remove-drm_connector_poll_hpd.patch mips-dts-loongson-add-isa-node.patch mips-dts-loongson-fix-gmac-phy-node.patch mips-ip30-ip30-console-add-missing-include.patch mips-loongson64-env-hook-up-loongsson-2k.patch mips-loongson64-remove-memory-node-for-builtin-dtb.patch mips-loongson64-reset-prioritise-firmware-service.patch mips-loongson64-test-register-availability-before-use.patch perf-fix-event-leak-upon-exec-and-file-release.patch perf-fix-event-leak-upon-exit.patch perf-stat-fix-the-hard-coded-metrics-calculation-on-the-hybrid.patch perf-x86-intel-ds-fix-non-0-retire-latency-on-raptorlake.patch perf-x86-intel-pt-fix-a-topa_entry-base-address-calculation.patch perf-x86-intel-pt-fix-topa_entry-base-length.patch perf-x86-intel-uncore-fix-the-bits-of-the-cha-extended-umask-for-spr.patch platform-mips-cpu_hwmon-disable-driver-on-unsupported-hardware.patch rbd-don-t-assume-rbd_is_lock_owner-for-exclusive-mappings.patch rdma-iwcm-fix-a-use-after-free-related-to-destroying-cm-ids.patch remoteproc-imx_rproc-fix-refcount-mistake-in-imx_rproc_addr_init.patch remoteproc-imx_rproc-skip-over-memory-region-when-node-value-is-null.patch remoteproc-stm32_rproc-fix-mailbox-interrupts-queuing.patch rtc-abx80x-fix-return-value-of-nvmem-callback-on-read.patch rtc-isl1208-fix-return-value-of-nvmem-callbacks.patch scsi-lpfc-allow-device_recovery-mode-after-rscn-receipt-if-in-prli_issue-state.patch scsi-qla2xxx-complete-command-early-within-lock.patch scsi-qla2xxx-during-vport-delete-send-async-logout-explicitly.patch scsi-qla2xxx-fix-flash-read-failure.patch scsi-qla2xxx-fix-for-possible-memory-corruption.patch scsi-qla2xxx-reduce-fabric-scan-duplicate-code.patch scsi-qla2xxx-unable-to-act-on-rscn-for-port-online.patch scsi-qla2xxx-use-qp-lock-to-search-for-bsg.patch scsi-qla2xxx-validate-nvme_local_port-correctly.patch selftests-sigaltstack-fix-ppc64-gcc-build.patch watchdog-perf-properly-initialize-the-turbo-mode-timestamp-and-rearm-counter.patch --- diff --git a/queue-6.10/asoc-codecs-wcd939x-fix-typec-mux-and-switch-leak-during-device-removal.patch b/queue-6.10/asoc-codecs-wcd939x-fix-typec-mux-and-switch-leak-during-device-removal.patch new file mode 100644 index 00000000000..8cf73aa00e1 --- /dev/null +++ b/queue-6.10/asoc-codecs-wcd939x-fix-typec-mux-and-switch-leak-during-device-removal.patch @@ -0,0 +1,174 @@ +From 9f3ae72c5dbca9ba558c752f1ef969ed6908be01 Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Mon, 1 Jul 2024 14:26:16 +0200 +Subject: ASoC: codecs: wcd939x: Fix typec mux and switch leak during device removal + +From: Krzysztof Kozlowski + +commit 9f3ae72c5dbca9ba558c752f1ef969ed6908be01 upstream. + +Driver does not unregister typec structures (typec_mux_dev and +typec_switch_desc) during removal leading to leaks. Fix this by moving +typec registering parts to separate function and using devm interface to +release them. This also makes code a bit simpler: + - Smaller probe() function with less error paths and no #ifdefs, + - No need to store typec_mux_dev and typec_switch_desc in driver state + container structure. + +Cc: stable@vger.kernel.org +Fixes: 10f514bd172a ("ASoC: codecs: Add WCD939x Codec driver") +Signed-off-by: Krzysztof Kozlowski +Reviewed-by: Neil Armstrong +Link: https://patch.msgid.link/20240701122616.414158-1-krzysztof.kozlowski@linaro.org +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/codecs/wcd939x.c | 113 ++++++++++++++++++++++++++------------------- + 1 file changed, 66 insertions(+), 47 deletions(-) + +--- a/sound/soc/codecs/wcd939x.c ++++ b/sound/soc/codecs/wcd939x.c +@@ -182,8 +182,6 @@ struct wcd939x_priv { + /* typec handling */ + bool typec_analog_mux; + #if IS_ENABLED(CONFIG_TYPEC) +- struct typec_mux_dev *typec_mux; +- struct typec_switch_dev *typec_sw; + enum typec_orientation typec_orientation; + unsigned long typec_mode; + struct typec_switch *typec_switch; +@@ -3528,6 +3526,68 @@ static const struct component_master_ops + .unbind = wcd939x_unbind, + }; + ++static void __maybe_unused wcd939x_typec_mux_unregister(void *data) ++{ ++ struct typec_mux_dev *typec_mux = data; ++ ++ typec_mux_unregister(typec_mux); ++} ++ ++static void __maybe_unused wcd939x_typec_switch_unregister(void *data) ++{ ++ struct typec_switch_dev *typec_sw = data; ++ ++ typec_switch_unregister(typec_sw); ++} ++ ++static int wcd939x_add_typec(struct wcd939x_priv *wcd939x, struct device *dev) ++{ ++#if IS_ENABLED(CONFIG_TYPEC) ++ int ret; ++ struct typec_mux_dev *typec_mux; ++ struct typec_switch_dev *typec_sw; ++ struct typec_mux_desc mux_desc = { ++ .drvdata = wcd939x, ++ .fwnode = dev_fwnode(dev), ++ .set = wcd939x_typec_mux_set, ++ }; ++ struct typec_switch_desc sw_desc = { ++ .drvdata = wcd939x, ++ .fwnode = dev_fwnode(dev), ++ .set = wcd939x_typec_switch_set, ++ }; ++ ++ /* ++ * Is USBSS is used to mux analog lines, ++ * register a typec mux/switch to get typec events ++ */ ++ if (!wcd939x->typec_analog_mux) ++ return 0; ++ ++ typec_mux = typec_mux_register(dev, &mux_desc); ++ if (IS_ERR(typec_mux)) ++ return dev_err_probe(dev, PTR_ERR(typec_mux), ++ "failed to register typec mux\n"); ++ ++ ret = devm_add_action_or_reset(dev, wcd939x_typec_mux_unregister, ++ typec_mux); ++ if (ret) ++ return ret; ++ ++ typec_sw = typec_switch_register(dev, &sw_desc); ++ if (IS_ERR(typec_sw)) ++ return dev_err_probe(dev, PTR_ERR(typec_sw), ++ "failed to register typec switch\n"); ++ ++ ret = devm_add_action_or_reset(dev, wcd939x_typec_switch_unregister, ++ typec_sw); ++ if (ret) ++ return ret; ++#endif ++ ++ return 0; ++} ++ + static int wcd939x_add_slave_components(struct wcd939x_priv *wcd939x, + struct device *dev, + struct component_match **matchptr) +@@ -3576,42 +3636,13 @@ static int wcd939x_probe(struct platform + return -EINVAL; + } + +-#if IS_ENABLED(CONFIG_TYPEC) +- /* +- * Is USBSS is used to mux analog lines, +- * register a typec mux/switch to get typec events +- */ +- if (wcd939x->typec_analog_mux) { +- struct typec_mux_desc mux_desc = { +- .drvdata = wcd939x, +- .fwnode = dev_fwnode(dev), +- .set = wcd939x_typec_mux_set, +- }; +- struct typec_switch_desc sw_desc = { +- .drvdata = wcd939x, +- .fwnode = dev_fwnode(dev), +- .set = wcd939x_typec_switch_set, +- }; +- +- wcd939x->typec_mux = typec_mux_register(dev, &mux_desc); +- if (IS_ERR(wcd939x->typec_mux)) { +- ret = dev_err_probe(dev, PTR_ERR(wcd939x->typec_mux), +- "failed to register typec mux\n"); +- goto err_disable_regulators; +- } +- +- wcd939x->typec_sw = typec_switch_register(dev, &sw_desc); +- if (IS_ERR(wcd939x->typec_sw)) { +- ret = dev_err_probe(dev, PTR_ERR(wcd939x->typec_sw), +- "failed to register typec switch\n"); +- goto err_unregister_typec_mux; +- } +- } +-#endif /* CONFIG_TYPEC */ ++ ret = wcd939x_add_typec(wcd939x, dev); ++ if (ret) ++ goto err_disable_regulators; + + ret = wcd939x_add_slave_components(wcd939x, dev, &match); + if (ret) +- goto err_unregister_typec_switch; ++ goto err_disable_regulators; + + wcd939x_reset(wcd939x); + +@@ -3628,18 +3659,6 @@ static int wcd939x_probe(struct platform + + return 0; + +-#if IS_ENABLED(CONFIG_TYPEC) +-err_unregister_typec_mux: +- if (wcd939x->typec_analog_mux) +- typec_mux_unregister(wcd939x->typec_mux); +-#endif /* CONFIG_TYPEC */ +- +-err_unregister_typec_switch: +-#if IS_ENABLED(CONFIG_TYPEC) +- if (wcd939x->typec_analog_mux) +- typec_switch_unregister(wcd939x->typec_sw); +-#endif /* CONFIG_TYPEC */ +- + err_disable_regulators: + regulator_bulk_disable(WCD939X_MAX_SUPPLY, wcd939x->supplies); + regulator_bulk_free(WCD939X_MAX_SUPPLY, wcd939x->supplies); diff --git a/queue-6.10/asoc-sof-ipc4-topology-use-correct-queue_id-for-requesting-input-pin-format.patch b/queue-6.10/asoc-sof-ipc4-topology-use-correct-queue_id-for-requesting-input-pin-format.patch new file mode 100644 index 00000000000..38e605ed94d --- /dev/null +++ b/queue-6.10/asoc-sof-ipc4-topology-use-correct-queue_id-for-requesting-input-pin-format.patch @@ -0,0 +1,114 @@ +From fe836c78ef1ff16da32912c22348091a0d67bda1 Mon Sep 17 00:00:00 2001 +From: Peter Ujfalusi +Date: Mon, 24 Jun 2024 14:15:18 +0200 +Subject: ASoC: SOF: ipc4-topology: Use correct queue_id for requesting input pin format + +From: Peter Ujfalusi + +commit fe836c78ef1ff16da32912c22348091a0d67bda1 upstream. + +It is incorrect to request the input pin format of the destination widget +using the output pin index of the source module as the indexes are not +necessarily matching. +moduleA.out_pin1 can be connected to moduleB.in_pin0 for example. + +Use the dst_queue_id to request the input format of the destination module. + +This bug remained unnoticed likely because in nocodec topologies we don't +have process modules after a module copier, thus the pin/queue index is +ignored. +For the process module case, the code was likely have been tested in a +controlled way where all the pin/queue/format properties were present to +work. + +Update the debug prints to have better information. + +Reviewed-by: Kai Vehmanen +Reviewed-by: Ranjani Sridharan +Reviewed-by: Bard Liao +Signed-off-by: Peter Ujfalusi +Signed-off-by: Pierre-Louis Bossart +Cc: stable@vger.kernel.org # v6.8+ +Link: https://patch.msgid.link/20240624121519.91703-3-pierre-louis.bossart@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/sof/ipc4-topology.c | 28 +++++++++++++++------------- + 1 file changed, 15 insertions(+), 13 deletions(-) + +--- a/sound/soc/sof/ipc4-topology.c ++++ b/sound/soc/sof/ipc4-topology.c +@@ -2875,7 +2875,7 @@ static void sof_ipc4_put_queue_id(struct + static int sof_ipc4_set_copier_sink_format(struct snd_sof_dev *sdev, + struct snd_sof_widget *src_widget, + struct snd_sof_widget *sink_widget, +- int sink_id) ++ struct snd_sof_route *sroute) + { + struct sof_ipc4_copier_config_set_sink_format format; + const struct sof_ipc_ops *iops = sdev->ipc->ops; +@@ -2884,9 +2884,6 @@ static int sof_ipc4_set_copier_sink_form + struct sof_ipc4_fw_module *fw_module; + struct sof_ipc4_msg msg = {{ 0 }}; + +- dev_dbg(sdev->dev, "%s set copier sink %d format\n", +- src_widget->widget->name, sink_id); +- + if (WIDGET_IS_DAI(src_widget->id)) { + struct snd_sof_dai *dai = src_widget->private; + +@@ -2897,13 +2894,15 @@ static int sof_ipc4_set_copier_sink_form + + fw_module = src_widget->module_info; + +- format.sink_id = sink_id; ++ format.sink_id = sroute->src_queue_id; + memcpy(&format.source_fmt, &src_config->audio_fmt, sizeof(format.source_fmt)); + +- pin_fmt = sof_ipc4_get_input_pin_audio_fmt(sink_widget, sink_id); ++ pin_fmt = sof_ipc4_get_input_pin_audio_fmt(sink_widget, sroute->dst_queue_id); + if (!pin_fmt) { +- dev_err(sdev->dev, "Unable to get pin %d format for %s", +- sink_id, sink_widget->widget->name); ++ dev_err(sdev->dev, ++ "Failed to get input audio format of %s:%d for output of %s:%d\n", ++ sink_widget->widget->name, sroute->dst_queue_id, ++ src_widget->widget->name, sroute->src_queue_id); + return -EINVAL; + } + +@@ -2961,7 +2960,8 @@ static int sof_ipc4_route_setup(struct s + sroute->src_queue_id = sof_ipc4_get_queue_id(src_widget, sink_widget, + SOF_PIN_TYPE_OUTPUT); + if (sroute->src_queue_id < 0) { +- dev_err(sdev->dev, "failed to get queue ID for source widget: %s\n", ++ dev_err(sdev->dev, ++ "failed to get src_queue_id ID from source widget %s\n", + src_widget->widget->name); + return sroute->src_queue_id; + } +@@ -2969,7 +2969,8 @@ static int sof_ipc4_route_setup(struct s + sroute->dst_queue_id = sof_ipc4_get_queue_id(src_widget, sink_widget, + SOF_PIN_TYPE_INPUT); + if (sroute->dst_queue_id < 0) { +- dev_err(sdev->dev, "failed to get queue ID for sink widget: %s\n", ++ dev_err(sdev->dev, ++ "failed to get dst_queue_id ID from sink widget %s\n", + sink_widget->widget->name); + sof_ipc4_put_queue_id(src_widget, sroute->src_queue_id, + SOF_PIN_TYPE_OUTPUT); +@@ -2978,10 +2979,11 @@ static int sof_ipc4_route_setup(struct s + + /* Pin 0 format is already set during copier module init */ + if (sroute->src_queue_id > 0 && WIDGET_IS_COPIER(src_widget->id)) { +- ret = sof_ipc4_set_copier_sink_format(sdev, src_widget, sink_widget, +- sroute->src_queue_id); ++ ret = sof_ipc4_set_copier_sink_format(sdev, src_widget, ++ sink_widget, sroute); + if (ret < 0) { +- dev_err(sdev->dev, "failed to set sink format for %s source queue ID %d\n", ++ dev_err(sdev->dev, ++ "failed to set sink format for source %s:%d\n", + src_widget->widget->name, sroute->src_queue_id); + goto out; + } diff --git a/queue-6.10/bus-mhi-ep-do-not-allocate-memory-for-mhi-objects-from-dma-zone.patch b/queue-6.10/bus-mhi-ep-do-not-allocate-memory-for-mhi-objects-from-dma-zone.patch new file mode 100644 index 00000000000..6f04f4ed559 --- /dev/null +++ b/queue-6.10/bus-mhi-ep-do-not-allocate-memory-for-mhi-objects-from-dma-zone.patch @@ -0,0 +1,91 @@ +From c7d0b2db5bc5e8c0fdc67b3c8f463c3dfec92f77 Mon Sep 17 00:00:00 2001 +From: Manivannan Sadhasivam +Date: Mon, 3 Jun 2024 22:13:54 +0530 +Subject: bus: mhi: ep: Do not allocate memory for MHI objects from DMA zone + +From: Manivannan Sadhasivam + +commit c7d0b2db5bc5e8c0fdc67b3c8f463c3dfec92f77 upstream. + +MHI endpoint stack accidentally started allocating memory for objects from +DMA zone since commit 62210a26cd4f ("bus: mhi: ep: Use slab allocator +where applicable"). But there is no real need to allocate memory from this +naturally limited DMA zone. This also causes the MHI endpoint stack to run +out of memory while doing high bandwidth transfers. + +So let's switch over to normal memory. + +Cc: # 6.8 +Fixes: 62210a26cd4f ("bus: mhi: ep: Use slab allocator where applicable") +Reviewed-by: Mayank Rana +Link: https://lore.kernel.org/r/20240603164354.79035-1-manivannan.sadhasivam@linaro.org +Signed-off-by: Manivannan Sadhasivam +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bus/mhi/ep/main.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/bus/mhi/ep/main.c ++++ b/drivers/bus/mhi/ep/main.c +@@ -90,7 +90,7 @@ static int mhi_ep_send_completion_event( + struct mhi_ring_element *event; + int ret; + +- event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL | GFP_DMA); ++ event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL); + if (!event) + return -ENOMEM; + +@@ -109,7 +109,7 @@ int mhi_ep_send_state_change_event(struc + struct mhi_ring_element *event; + int ret; + +- event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL | GFP_DMA); ++ event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL); + if (!event) + return -ENOMEM; + +@@ -127,7 +127,7 @@ int mhi_ep_send_ee_event(struct mhi_ep_c + struct mhi_ring_element *event; + int ret; + +- event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL | GFP_DMA); ++ event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL); + if (!event) + return -ENOMEM; + +@@ -146,7 +146,7 @@ static int mhi_ep_send_cmd_comp_event(st + struct mhi_ring_element *event; + int ret; + +- event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL | GFP_DMA); ++ event = kmem_cache_zalloc(mhi_cntrl->ev_ring_el_cache, GFP_KERNEL); + if (!event) + return -ENOMEM; + +@@ -438,7 +438,7 @@ static int mhi_ep_read_channel(struct mh + read_offset = mhi_chan->tre_size - mhi_chan->tre_bytes_left; + write_offset = len - buf_left; + +- buf_addr = kmem_cache_zalloc(mhi_cntrl->tre_buf_cache, GFP_KERNEL | GFP_DMA); ++ buf_addr = kmem_cache_zalloc(mhi_cntrl->tre_buf_cache, GFP_KERNEL); + if (!buf_addr) + return -ENOMEM; + +@@ -1481,14 +1481,14 @@ int mhi_ep_register_controller(struct mh + + mhi_cntrl->ev_ring_el_cache = kmem_cache_create("mhi_ep_event_ring_el", + sizeof(struct mhi_ring_element), 0, +- SLAB_CACHE_DMA, NULL); ++ 0, NULL); + if (!mhi_cntrl->ev_ring_el_cache) { + ret = -ENOMEM; + goto err_free_cmd; + } + + mhi_cntrl->tre_buf_cache = kmem_cache_create("mhi_ep_tre_buf", MHI_EP_DEFAULT_MTU, 0, +- SLAB_CACHE_DMA, NULL); ++ 0, NULL); + if (!mhi_cntrl->tre_buf_cache) { + ret = -ENOMEM; + goto err_destroy_ev_ring_el_cache; diff --git a/queue-6.10/crypto-ccp-fix-null-pointer-dereference-in-__sev_snp_shutdown_locked.patch b/queue-6.10/crypto-ccp-fix-null-pointer-dereference-in-__sev_snp_shutdown_locked.patch new file mode 100644 index 00000000000..ad6ae7ac7a4 --- /dev/null +++ b/queue-6.10/crypto-ccp-fix-null-pointer-dereference-in-__sev_snp_shutdown_locked.patch @@ -0,0 +1,151 @@ +From 468e3295774d0edce15f4ae475913b5076dd4f40 Mon Sep 17 00:00:00 2001 +From: Kim Phillips +Date: Tue, 4 Jun 2024 12:47:39 -0500 +Subject: crypto: ccp - Fix null pointer dereference in __sev_snp_shutdown_locked + +From: Kim Phillips + +commit 468e3295774d0edce15f4ae475913b5076dd4f40 upstream. + +Fix a null pointer dereference induced by DEBUG_TEST_DRIVER_REMOVE. +Return from __sev_snp_shutdown_locked() if the psp_device or the +sev_device structs are not initialized. Without the fix, the driver will +produce the following splat: + + ccp 0000:55:00.5: enabling device (0000 -> 0002) + ccp 0000:55:00.5: sev enabled + ccp 0000:55:00.5: psp enabled + BUG: kernel NULL pointer dereference, address: 00000000000000f0 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC NOPTI + CPU: 262 PID: 1 Comm: swapper/0 Not tainted 6.9.0-rc1+ #29 + RIP: 0010:__sev_snp_shutdown_locked+0x2e/0x150 + Code: 00 55 48 89 e5 41 57 41 56 41 54 53 48 83 ec 10 41 89 f7 49 89 fe 65 48 8b 04 25 28 00 00 00 48 89 45 d8 48 8b 05 6a 5a 7f 06 <4c> 8b a0 f0 00 00 00 41 0f b6 9c 24 a2 00 00 00 48 83 fb 02 0f 83 + RSP: 0018:ffffb2ea4014b7b8 EFLAGS: 00010286 + RAX: 0000000000000000 RBX: ffff9e4acd2e0a28 RCX: 0000000000000000 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffb2ea4014b808 + RBP: ffffb2ea4014b7e8 R08: 0000000000000106 R09: 000000000003d9c0 + R10: 0000000000000001 R11: ffffffffa39ff070 R12: ffff9e49d40590c8 + R13: 0000000000000000 R14: ffffb2ea4014b808 R15: 0000000000000000 + FS: 0000000000000000(0000) GS:ffff9e58b1e00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00000000000000f0 CR3: 0000000418a3e001 CR4: 0000000000770ef0 + PKRU: 55555554 + Call Trace: + + ? __die_body+0x6f/0xb0 + ? __die+0xcc/0xf0 + ? page_fault_oops+0x330/0x3a0 + ? save_trace+0x2a5/0x360 + ? do_user_addr_fault+0x583/0x630 + ? exc_page_fault+0x81/0x120 + ? asm_exc_page_fault+0x2b/0x30 + ? __sev_snp_shutdown_locked+0x2e/0x150 + __sev_firmware_shutdown+0x349/0x5b0 + ? pm_runtime_barrier+0x66/0xe0 + sev_dev_destroy+0x34/0xb0 + psp_dev_destroy+0x27/0x60 + sp_destroy+0x39/0x90 + sp_pci_remove+0x22/0x60 + pci_device_remove+0x4e/0x110 + really_probe+0x271/0x4e0 + __driver_probe_device+0x8f/0x160 + driver_probe_device+0x24/0x120 + __driver_attach+0xc7/0x280 + ? driver_attach+0x30/0x30 + bus_for_each_dev+0x10d/0x130 + driver_attach+0x22/0x30 + bus_add_driver+0x171/0x2b0 + ? unaccepted_memory_init_kdump+0x20/0x20 + driver_register+0x67/0x100 + __pci_register_driver+0x83/0x90 + sp_pci_init+0x22/0x30 + sp_mod_init+0x13/0x30 + do_one_initcall+0xb8/0x290 + ? sched_clock_noinstr+0xd/0x10 + ? local_clock_noinstr+0x3e/0x100 + ? stack_depot_save_flags+0x21e/0x6a0 + ? local_clock+0x1c/0x60 + ? stack_depot_save_flags+0x21e/0x6a0 + ? sched_clock_noinstr+0xd/0x10 + ? local_clock_noinstr+0x3e/0x100 + ? __lock_acquire+0xd90/0xe30 + ? sched_clock_noinstr+0xd/0x10 + ? local_clock_noinstr+0x3e/0x100 + ? __create_object+0x66/0x100 + ? local_clock+0x1c/0x60 + ? __create_object+0x66/0x100 + ? parameq+0x1b/0x90 + ? parse_one+0x6d/0x1d0 + ? parse_args+0xd7/0x1f0 + ? do_initcall_level+0x180/0x180 + do_initcall_level+0xb0/0x180 + do_initcalls+0x60/0xa0 + ? kernel_init+0x1f/0x1d0 + do_basic_setup+0x41/0x50 + kernel_init_freeable+0x1ac/0x230 + ? rest_init+0x1f0/0x1f0 + kernel_init+0x1f/0x1d0 + ? rest_init+0x1f0/0x1f0 + ret_from_fork+0x3d/0x50 + ? rest_init+0x1f0/0x1f0 + ret_from_fork_asm+0x11/0x20 + + Modules linked in: + CR2: 00000000000000f0 + ---[ end trace 0000000000000000 ]--- + RIP: 0010:__sev_snp_shutdown_locked+0x2e/0x150 + Code: 00 55 48 89 e5 41 57 41 56 41 54 53 48 83 ec 10 41 89 f7 49 89 fe 65 48 8b 04 25 28 00 00 00 48 89 45 d8 48 8b 05 6a 5a 7f 06 <4c> 8b a0 f0 00 00 00 41 0f b6 9c 24 a2 00 00 00 48 83 fb 02 0f 83 + RSP: 0018:ffffb2ea4014b7b8 EFLAGS: 00010286 + RAX: 0000000000000000 RBX: ffff9e4acd2e0a28 RCX: 0000000000000000 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffb2ea4014b808 + RBP: ffffb2ea4014b7e8 R08: 0000000000000106 R09: 000000000003d9c0 + R10: 0000000000000001 R11: ffffffffa39ff070 R12: ffff9e49d40590c8 + R13: 0000000000000000 R14: ffffb2ea4014b808 R15: 0000000000000000 + FS: 0000000000000000(0000) GS:ffff9e58b1e00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00000000000000f0 CR3: 0000000418a3e001 CR4: 0000000000770ef0 + PKRU: 55555554 + Kernel panic - not syncing: Fatal exception + Kernel Offset: 0x1fc00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) + +Fixes: 1ca5614b84ee ("crypto: ccp: Add support to initialize the AMD-SP for SEV-SNP") +Cc: stable@vger.kernel.org +Signed-off-by: Kim Phillips +Reviewed-by: Liam Merwick +Reviewed-by: Mario Limonciello +Reviewed-by: John Allen +Reviewed-by: Tom Lendacky +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/ccp/sev-dev.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c +index 2102377f727b..1912bee22dd4 100644 +--- a/drivers/crypto/ccp/sev-dev.c ++++ b/drivers/crypto/ccp/sev-dev.c +@@ -1642,10 +1642,16 @@ static int sev_update_firmware(struct device *dev) + + static int __sev_snp_shutdown_locked(int *error, bool panic) + { +- struct sev_device *sev = psp_master->sev_data; ++ struct psp_device *psp = psp_master; ++ struct sev_device *sev; + struct sev_data_snp_shutdown_ex data; + int ret; + ++ if (!psp || !psp->sev_data) ++ return 0; ++ ++ sev = psp->sev_data; ++ + if (!sev->snp_initialized) + return 0; + +-- +2.45.2 + diff --git a/queue-6.10/dm-verity-fix-dm_is_verity_target-when-dm-verity-is-builtin.patch b/queue-6.10/dm-verity-fix-dm_is_verity_target-when-dm-verity-is-builtin.patch new file mode 100644 index 00000000000..7ea85722637 --- /dev/null +++ b/queue-6.10/dm-verity-fix-dm_is_verity_target-when-dm-verity-is-builtin.patch @@ -0,0 +1,56 @@ +From 3708c7269593b836b1d684214cd9f5d83e4ed3fd Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Thu, 4 Jul 2024 16:09:57 +0200 +Subject: dm-verity: fix dm_is_verity_target() when dm-verity is builtin + +From: Eric Biggers + +commit 3708c7269593b836b1d684214cd9f5d83e4ed3fd upstream. + +When CONFIG_DM_VERITY=y, dm_is_verity_target() returned true for any +builtin dm target, not just dm-verity. Fix this by checking for +verity_target instead of THIS_MODULE (which is NULL for builtin code). + +Fixes: b6c1c5745ccc ("dm: Add verity helpers for LoadPin") +Cc: stable@vger.kernel.org +Cc: Matthias Kaehlcke +Cc: Kees Cook +Signed-off-by: Eric Biggers +Signed-off-by: Mikulas Patocka +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-verity-target.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/drivers/md/dm-verity-target.c ++++ b/drivers/md/dm-verity-target.c +@@ -1539,14 +1539,6 @@ bad: + } + + /* +- * Check whether a DM target is a verity target. +- */ +-bool dm_is_verity_target(struct dm_target *ti) +-{ +- return ti->type->module == THIS_MODULE; +-} +- +-/* + * Get the verity mode (error behavior) of a verity target. + * + * Returns the verity mode of the target, or -EINVAL if 'ti' is not a verity +@@ -1599,6 +1591,14 @@ static struct target_type verity_target + }; + module_dm(verity); + ++/* ++ * Check whether a DM target is a verity target. ++ */ ++bool dm_is_verity_target(struct dm_target *ti) ++{ ++ return ti->type == &verity_target; ++} ++ + MODULE_AUTHOR("Mikulas Patocka "); + MODULE_AUTHOR("Mandeep Baines "); + MODULE_AUTHOR("Will Drewry "); diff --git a/queue-6.10/drm-amd-amdgpu-fix-uninitialized-variable-warnings.patch b/queue-6.10/drm-amd-amdgpu-fix-uninitialized-variable-warnings.patch new file mode 100644 index 00000000000..b504a76affd --- /dev/null +++ b/queue-6.10/drm-amd-amdgpu-fix-uninitialized-variable-warnings.patch @@ -0,0 +1,32 @@ +From df65aabef3c0327c23b840ab5520150df4db6b5f Mon Sep 17 00:00:00 2001 +From: Ma Ke +Date: Thu, 18 Jul 2024 22:17:35 +0800 +Subject: drm/amd/amdgpu: Fix uninitialized variable warnings + +From: Ma Ke + +commit df65aabef3c0327c23b840ab5520150df4db6b5f upstream. + +Return 0 to avoid returning an uninitialized variable r. + +Cc: stable@vger.kernel.org +Fixes: 230dd6bb6117 ("drm/amd/amdgpu: implement mode2 reset on smu_v13_0_10") +Signed-off-by: Ma Ke +Signed-off-by: Alex Deucher +(cherry picked from commit 6472de66c0aa18d50a4b5ca85f8272e88a737676) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c ++++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c +@@ -91,7 +91,7 @@ static int smu_v13_0_10_mode2_suspend_ip + adev->ip_blocks[i].status.hw = false; + } + +- return r; ++ return 0; + } + + static int diff --git a/queue-6.10/drm-amdgpu-add-missed-harvest-check-for-vcn-ip-v4-v5.patch b/queue-6.10/drm-amdgpu-add-missed-harvest-check-for-vcn-ip-v4-v5.patch new file mode 100644 index 00000000000..9a187a451b6 --- /dev/null +++ b/queue-6.10/drm-amdgpu-add-missed-harvest-check-for-vcn-ip-v4-v5.patch @@ -0,0 +1,159 @@ +From fab1ead0ae3a4757afb92ff6909b37d63db17e55 Mon Sep 17 00:00:00 2001 +From: Tim Huang +Date: Tue, 23 Jul 2024 16:54:34 +0800 +Subject: drm/amdgpu: add missed harvest check for VCN IP v4/v5 + +From: Tim Huang + +commit fab1ead0ae3a4757afb92ff6909b37d63db17e55 upstream. + +To prevent below probe failure, add a check for models with VCN +IP v4.0.6 where VCN1 may be harvested. + +v2: +Apply the same check to VCN IP v4.0 and v5.0. + +[ 54.070117] RIP: 0010:vcn_v4_0_5_start_dpg_mode+0x9be/0x36b0 [amdgpu] +[ 54.071055] Code: 80 fb ff 8d 82 00 80 fe ff 81 fe 00 06 00 00 0f 43 +c2 49 69 d5 38 0d 00 00 48 8d 71 04 c1 e8 02 4c 01 f2 48 89 b2 50 f6 02 +00 <89> 01 48 8b 82 50 f6 02 00 48 8d 48 04 48 89 8a 50 f6 02 00 c7 00 +[ 54.072408] RSP: 0018:ffffb17985f736f8 EFLAGS: 00010286 +[ 54.072793] RAX: 00000000000000d6 RBX: ffff99a82f680000 RCX: +0000000000000000 +[ 54.073315] RDX: ffff99a82f680000 RSI: 0000000000000004 RDI: +ffff99a82f680000 +[ 54.073835] RBP: ffffb17985f73730 R08: 0000000000000001 R09: +0000000000000000 +[ 54.074353] R10: 0000000000000008 R11: ffffb17983c05000 R12: +0000000000000000 +[ 54.074879] R13: 0000000000000000 R14: ffff99a82f680000 R15: +0000000000000001 +[ 54.075400] FS: 00007f8d9c79a000(0000) GS:ffff99ab2f140000(0000) +knlGS:0000000000000000 +[ 54.075988] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 54.076408] CR2: 0000000000000000 CR3: 0000000140c3a000 CR4: +0000000000750ef0 +[ 54.076927] PKRU: 55555554 +[ 54.077132] Call Trace: +[ 54.077319] +[ 54.077484] ? show_regs+0x69/0x80 +[ 54.077747] ? __die+0x28/0x70 +[ 54.077979] ? page_fault_oops+0x180/0x4b0 +[ 54.078286] ? do_user_addr_fault+0x2d2/0x680 +[ 54.078610] ? exc_page_fault+0x84/0x190 +[ 54.078910] ? asm_exc_page_fault+0x2b/0x30 +[ 54.079224] ? vcn_v4_0_5_start_dpg_mode+0x9be/0x36b0 [amdgpu] +[ 54.079941] ? vcn_v4_0_5_start_dpg_mode+0xe6/0x36b0 [amdgpu] +[ 54.080617] vcn_v4_0_5_set_powergating_state+0x82/0x19b0 [amdgpu] +[ 54.081316] amdgpu_device_ip_set_powergating_state+0x64/0xc0 +[amdgpu] +[ 54.082057] amdgpu_vcn_ring_begin_use+0x6f/0x1d0 [amdgpu] +[ 54.082727] amdgpu_ring_alloc+0x44/0x70 [amdgpu] +[ 54.083351] amdgpu_vcn_dec_sw_ring_test_ring+0x40/0x110 [amdgpu] +[ 54.084054] amdgpu_ring_test_helper+0x22/0x90 [amdgpu] +[ 54.084698] vcn_v4_0_5_hw_init+0x87/0xc0 [amdgpu] +[ 54.085307] amdgpu_device_init+0x1f96/0x2780 [amdgpu] +[ 54.085951] amdgpu_driver_load_kms+0x1e/0xc0 [amdgpu] +[ 54.086591] amdgpu_pci_probe+0x19f/0x550 [amdgpu] +[ 54.087215] local_pci_probe+0x48/0xa0 +[ 54.087509] pci_device_probe+0xc9/0x250 +[ 54.087812] really_probe+0x1a4/0x3f0 +[ 54.088101] __driver_probe_device+0x7d/0x170 +[ 54.088443] driver_probe_device+0x24/0xa0 +[ 54.088765] __driver_attach+0xdd/0x1d0 +[ 54.089068] ? __pfx___driver_attach+0x10/0x10 +[ 54.089417] bus_for_each_dev+0x8e/0xe0 +[ 54.089718] driver_attach+0x22/0x30 +[ 54.090000] bus_add_driver+0x120/0x220 +[ 54.090303] driver_register+0x62/0x120 +[ 54.090606] ? __pfx_amdgpu_init+0x10/0x10 [amdgpu] +[ 54.091255] __pci_register_driver+0x62/0x70 +[ 54.091593] amdgpu_init+0x67/0xff0 [amdgpu] +[ 54.092190] do_one_initcall+0x5f/0x330 +[ 54.092495] do_init_module+0x68/0x240 +[ 54.092794] load_module+0x201c/0x2110 +[ 54.093093] init_module_from_file+0x97/0xd0 +[ 54.093428] ? init_module_from_file+0x97/0xd0 +[ 54.093777] idempotent_init_module+0x11c/0x2a0 +[ 54.094134] __x64_sys_finit_module+0x64/0xc0 +[ 54.094476] do_syscall_64+0x58/0x120 +[ 54.094767] entry_SYSCALL_64_after_hwframe+0x6e/0x76 + +Signed-off-by: Tim Huang +Reviewed-by: Saleemkhan Jamadar +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +(cherry picked from commit 0b071245ddd98539d4f7493bdd188417fcf2d629) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 6 ++++++ + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 6 ++++++ + drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 6 ++++++ + 3 files changed, 18 insertions(+) + +--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +@@ -1053,6 +1053,9 @@ static int vcn_v4_0_start(struct amdgpu_ + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ if (adev->vcn.harvest_config & (1 << i)) ++ continue; ++ + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { +@@ -1506,6 +1509,9 @@ static int vcn_v4_0_stop(struct amdgpu_d + int i, r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ if (adev->vcn.harvest_config & (1 << i)) ++ continue; ++ + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + +--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c ++++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +@@ -964,6 +964,9 @@ static int vcn_v4_0_5_start(struct amdgp + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ if (adev->vcn.harvest_config & (1 << i)) ++ continue; ++ + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { +@@ -1168,6 +1171,9 @@ static int vcn_v4_0_5_stop(struct amdgpu + int i, r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ if (adev->vcn.harvest_config & (1 << i)) ++ continue; ++ + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + +--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +@@ -722,6 +722,9 @@ static int vcn_v5_0_0_start(struct amdgp + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ if (adev->vcn.harvest_config & (1 << i)) ++ continue; ++ + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { +@@ -899,6 +902,9 @@ static int vcn_v5_0_0_stop(struct amdgpu + int i, r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ if (adev->vcn.harvest_config & (1 << i)) ++ continue; ++ + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + diff --git a/queue-6.10/drm-amdgpu-reset-vm-state-machine-after-gpu-reset-vram-lost.patch b/queue-6.10/drm-amdgpu-reset-vm-state-machine-after-gpu-reset-vram-lost.patch new file mode 100644 index 00000000000..cf1e604c10f --- /dev/null +++ b/queue-6.10/drm-amdgpu-reset-vm-state-machine-after-gpu-reset-vram-lost.patch @@ -0,0 +1,72 @@ +From 5659b0c93a1ea02c662a030b322093203f299185 Mon Sep 17 00:00:00 2001 +From: ZhenGuo Yin +Date: Fri, 19 Jul 2024 16:10:40 +0800 +Subject: drm/amdgpu: reset vm state machine after gpu reset(vram lost) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: ZhenGuo Yin + +commit 5659b0c93a1ea02c662a030b322093203f299185 upstream. + +[Why] +Page table of compute VM in the VRAM will lost after gpu reset. +VRAM won't be restored since compute VM has no shadows. + +[How] +Use higher 32-bit of vm->generation to record a vram_lost_counter. +Reset the VM state machine when vm->genertaion is not equal to +the new generation token. + +v2: Check vm->generation instead of calling drm_sched_entity_error +in amdgpu_vm_validate. +v3: Use new generation token instead of vram_lost_counter for check. + +Signed-off-by: ZhenGuo Yin +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +(cherry picked from commit 47c0388b0589cb481c294dcb857d25a214c46eb3) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amd + if (!vm) + return result; + +- result += vm->generation; ++ result += lower_32_bits(vm->generation); + /* Add one if the page tables will be re-generated on next CS */ + if (drm_sched_entity_error(&vm->delayed)) + ++result; +@@ -463,13 +463,14 @@ int amdgpu_vm_validate(struct amdgpu_dev + int (*validate)(void *p, struct amdgpu_bo *bo), + void *param) + { ++ uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); + struct amdgpu_vm_bo_base *bo_base; + struct amdgpu_bo *shadow; + struct amdgpu_bo *bo; + int r; + +- if (drm_sched_entity_error(&vm->delayed)) { +- ++vm->generation; ++ if (vm->generation != new_vm_generation) { ++ vm->generation = new_vm_generation; + amdgpu_vm_bo_reset_state_machine(vm); + amdgpu_vm_fini_entities(vm); + r = amdgpu_vm_init_entities(adev, vm); +@@ -2441,7 +2442,7 @@ int amdgpu_vm_init(struct amdgpu_device + vm->last_update = dma_fence_get_stub(); + vm->last_unlocked = dma_fence_get_stub(); + vm->last_tlb_flush = dma_fence_get_stub(); +- vm->generation = 0; ++ vm->generation = amdgpu_vm_generation(adev, NULL); + + mutex_init(&vm->eviction_lock); + vm->evicting = false; diff --git a/queue-6.10/drm-amdgpu-sdma5.2-update-wptr-registers-as-well-as-doorbell.patch b/queue-6.10/drm-amdgpu-sdma5.2-update-wptr-registers-as-well-as-doorbell.patch new file mode 100644 index 00000000000..a5a5400e409 --- /dev/null +++ b/queue-6.10/drm-amdgpu-sdma5.2-update-wptr-registers-as-well-as-doorbell.patch @@ -0,0 +1,56 @@ +From a03ebf116303e5d13ba9a2b65726b106cb1e96f6 Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Tue, 9 Jul 2024 17:54:11 -0400 +Subject: drm/amdgpu/sdma5.2: Update wptr registers as well as doorbell + +From: Alex Deucher + +commit a03ebf116303e5d13ba9a2b65726b106cb1e96f6 upstream. + +We seem to have a case where SDMA will sometimes miss a doorbell +if GFX is entering the powergating state when the doorbell comes in. +To workaround this, we can update the wptr via MMIO, however, +this is only safe because we disallow gfxoff in begin_ring() for +SDMA 5.2 and then allow it again in end_ring(). + +Enable this workaround while we are root causing the issue with +the HW team. + +Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/3440 +Tested-by: Friedrich Vock +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +(cherry picked from commit f2ac52634963fc38e4935e11077b6f7854e5d700) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +@@ -176,6 +176,14 @@ static void sdma_v5_2_ring_set_wptr(stru + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); ++ /* SDMA seems to miss doorbells sometimes when powergating kicks in. ++ * Updating the wptr directly will wake it. This is only safe because ++ * we disallow gfxoff in begin_use() and then allow it again in end_use(). ++ */ ++ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), ++ lower_32_bits(ring->wptr << 2)); ++ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), ++ upper_32_bits(ring->wptr << 2)); + } else { + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " +@@ -1647,6 +1655,10 @@ static void sdma_v5_2_ring_begin_use(str + * but it shouldn't hurt for other parts since + * this GFXOFF will be disallowed anyway when SDMA is + * active, this just makes it explicit. ++ * sdma_v5_2_ring_set_wptr() takes advantage of this ++ * to update the wptr because sometimes SDMA seems to miss ++ * doorbells when entering PG. If you remove this, update ++ * sdma_v5_2_ring_set_wptr() as well! + */ + amdgpu_gfx_off_ctrl(adev, false); + } diff --git a/queue-6.10/drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-suspend-resume.patch b/queue-6.10/drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-suspend-resume.patch new file mode 100644 index 00000000000..cf441cbc13b --- /dev/null +++ b/queue-6.10/drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-suspend-resume.patch @@ -0,0 +1,56 @@ +From d63d81094d208abb20fc444514b2d9ec2f4b7c4e Mon Sep 17 00:00:00 2001 +From: Wayne Lin +Date: Wed, 26 Jun 2024 16:48:23 +0800 +Subject: drm/dp_mst: Fix all mstb marked as not probed after suspend/resume + +From: Wayne Lin + +commit d63d81094d208abb20fc444514b2d9ec2f4b7c4e upstream. + +[Why] +After supend/resume, with topology unchanged, observe that +link_address_sent of all mstb are marked as false even the topology probing +is done without any error. + +It is caused by wrongly also include "ret == 0" case as a probing failure +case. + +[How] +Remove inappropriate checking conditions. + +Cc: Lyude Paul +Cc: Harry Wentland +Cc: Jani Nikula +Cc: Imre Deak +Cc: Daniel Vetter +Cc: stable@vger.kernel.org +Fixes: 37dfdc55ffeb ("drm/dp_mst: Cleanup drm_dp_send_link_address() a bit") +Signed-off-by: Wayne Lin +Reviewed-by: Lyude Paul +Signed-off-by: Lyude Paul +Link: https://patchwork.freedesktop.org/patch/msgid/20240626084825.878565-2-Wayne.Lin@amd.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/display/drm_dp_mst_topology.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c ++++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c +@@ -2929,7 +2929,7 @@ static int drm_dp_send_link_address(stru + + /* FIXME: Actually do some real error handling here */ + ret = drm_dp_mst_wait_tx_reply(mstb, txmsg); +- if (ret <= 0) { ++ if (ret < 0) { + drm_err(mgr->dev, "Sending link address failed with %d\n", ret); + goto out; + } +@@ -2981,7 +2981,7 @@ static int drm_dp_send_link_address(stru + mutex_unlock(&mgr->lock); + + out: +- if (ret <= 0) ++ if (ret < 0) + mstb->link_address_sent = false; + kfree(txmsg); + return ret < 0 ? ret : changed; diff --git a/queue-6.10/drm-etnaviv-don-t-block-scheduler-when-gpu-is-still-active.patch b/queue-6.10/drm-etnaviv-don-t-block-scheduler-when-gpu-is-still-active.patch new file mode 100644 index 00000000000..0c39878d9fc --- /dev/null +++ b/queue-6.10/drm-etnaviv-don-t-block-scheduler-when-gpu-is-still-active.patch @@ -0,0 +1,65 @@ +From 704d3d60fec451f37706368d9d3e320322978986 Mon Sep 17 00:00:00 2001 +From: Lucas Stach +Date: Fri, 21 Jun 2024 21:59:19 +0200 +Subject: drm/etnaviv: don't block scheduler when GPU is still active + +From: Lucas Stach + +commit 704d3d60fec451f37706368d9d3e320322978986 upstream. + +Since 45ecaea73883 ("drm/sched: Partial revert of 'drm/sched: Keep +s_fence->parent pointer'") still active jobs aren't put back in the +pending list on drm_sched_start(), as they don't have a active +parent fence anymore, so if the GPU is still working and the timeout +is extended, all currently active jobs will be freed. + +To avoid prematurely freeing jobs that are still active on the GPU, +don't block the scheduler until we are fully committed to actually +reset the GPU. + +As the current job is already removed from the pending list and +will not be put back when drm_sched_start() isn't called, we must +make sure to put the job back on the pending list when extending +the timeout. + +Cc: stable@vger.kernel.org #6.0 +Signed-off-by: Lucas Stach +Reviewed-by: Philipp Zabel +Reviewed-by: Christian Gmeiner +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/etnaviv/etnaviv_sched.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c ++++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c +@@ -38,9 +38,6 @@ static enum drm_gpu_sched_stat etnaviv_s + u32 dma_addr; + int change; + +- /* block scheduler */ +- drm_sched_stop(&gpu->sched, sched_job); +- + /* + * If the GPU managed to complete this jobs fence, the timout is + * spurious. Bail out. +@@ -63,6 +60,9 @@ static enum drm_gpu_sched_stat etnaviv_s + goto out_no_timeout; + } + ++ /* block scheduler */ ++ drm_sched_stop(&gpu->sched, sched_job); ++ + if(sched_job) + drm_sched_increase_karma(sched_job); + +@@ -76,8 +76,7 @@ static enum drm_gpu_sched_stat etnaviv_s + return DRM_GPU_SCHED_STAT_NOMINAL; + + out_no_timeout: +- /* restart scheduler after GPU is usable again */ +- drm_sched_start(&gpu->sched, true); ++ list_add(&sched_job->list, &sched_job->sched->pending_list); + return DRM_GPU_SCHED_STAT_NOMINAL; + } + diff --git a/queue-6.10/drm-i915-dp-don-t-switch-the-lttpr-mode-on-an-active-link.patch b/queue-6.10/drm-i915-dp-don-t-switch-the-lttpr-mode-on-an-active-link.patch new file mode 100644 index 00000000000..11b9ddf76d2 --- /dev/null +++ b/queue-6.10/drm-i915-dp-don-t-switch-the-lttpr-mode-on-an-active-link.patch @@ -0,0 +1,129 @@ +From 509580fad7323b6a5da27e8365cd488f3b57210e Mon Sep 17 00:00:00 2001 +From: Imre Deak +Date: Mon, 8 Jul 2024 22:00:25 +0300 +Subject: drm/i915/dp: Don't switch the LTTPR mode on an active link +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Imre Deak + +commit 509580fad7323b6a5da27e8365cd488f3b57210e upstream. + +Switching to transparent mode leads to a loss of link synchronization, +so prevent doing this on an active link. This happened at least on an +Intel N100 system / DELL UD22 dock, the LTTPR residing either on the +host or the dock. To fix the issue, keep the current mode on an active +link, adjusting the LTTPR count accordingly (resetting it to 0 in +transparent mode). + +v2: Adjust code comment during link training about reiniting the LTTPRs. + (Ville) + +Fixes: 7b2a4ab8b0ef ("drm/i915: Switch to LTTPR transparent mode link training") +Reported-and-tested-by: Gareth Yu +Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10902 +Cc: # v5.15+ +Cc: Ville Syrjälä +Reviewed-by: Ville Syrjälä +Reviewed-by: Ankit Nautiyal +Signed-off-by: Imre Deak +Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-3-imre.deak@intel.com +(cherry picked from commit 211ad49cf8ccfdc798a719b4d1e000d0a8a9e588) +Signed-off-by: Tvrtko Ursulin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/intel_dp_link_training.c | 55 +++++++++++++++--- + 1 file changed, 48 insertions(+), 7 deletions(-) + +--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c ++++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c +@@ -114,10 +114,24 @@ intel_dp_set_lttpr_transparent_mode(stru + return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1; + } + +-static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) ++static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp) ++{ ++ return intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE - ++ DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] == ++ DP_PHY_REPEATER_MODE_TRANSPARENT; ++} ++ ++/* ++ * Read the LTTPR common capabilities and switch the LTTPR PHYs to ++ * non-transparent mode if this is supported. Preserve the ++ * transparent/non-transparent mode on an active link. ++ * ++ * Return the number of detected LTTPRs in non-transparent mode or 0 if the ++ * LTTPRs are in transparent mode or the detection failed. ++ */ ++static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) + { + int lttpr_count; +- int i; + + if (!intel_dp_read_lttpr_common_caps(intel_dp, dpcd)) + return 0; +@@ -132,6 +146,19 @@ static int intel_dp_init_lttpr(struct in + return 0; + + /* ++ * Don't change the mode on an active link, to prevent a loss of link ++ * synchronization. See DP Standard v2.0 3.6.7. about the LTTPR ++ * resetting its internal state when the mode is changed from ++ * non-transparent to transparent. ++ */ ++ if (intel_dp->link_trained) { ++ if (lttpr_count < 0 || intel_dp_lttpr_transparent_mode_enabled(intel_dp)) ++ goto out_reset_lttpr_count; ++ ++ return lttpr_count; ++ } ++ ++ /* + * See DP Standard v2.0 3.6.6.1. about the explicit disabling of + * non-transparent mode and the disable->enable non-transparent mode + * sequence. +@@ -151,11 +178,25 @@ static int intel_dp_init_lttpr(struct in + "Switching to LTTPR non-transparent LT mode failed, fall-back to transparent mode\n"); + + intel_dp_set_lttpr_transparent_mode(intel_dp, true); +- intel_dp_reset_lttpr_count(intel_dp); + +- return 0; ++ goto out_reset_lttpr_count; + } + ++ return lttpr_count; ++ ++out_reset_lttpr_count: ++ intel_dp_reset_lttpr_count(intel_dp); ++ ++ return 0; ++} ++ ++static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) ++{ ++ int lttpr_count; ++ int i; ++ ++ lttpr_count = intel_dp_init_lttpr_phys(intel_dp, dpcd); ++ + for (i = 0; i < lttpr_count; i++) + intel_dp_read_lttpr_phy_caps(intel_dp, dpcd, DP_PHY_LTTPR(i)); + +@@ -1372,10 +1413,10 @@ void intel_dp_start_link_train(struct in + { + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + bool passed; +- + /* +- * TODO: Reiniting LTTPRs here won't be needed once proper connector +- * HW state readout is added. ++ * Reinit the LTTPRs here to ensure that they are switched to ++ * non-transparent mode. During an earlier LTTPR detection this ++ * could've been prevented by an active link. + */ + int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp); + diff --git a/queue-6.10/drm-i915-dp-reset-intel_dp-link_trained-before-retraining-the-link.patch b/queue-6.10/drm-i915-dp-reset-intel_dp-link_trained-before-retraining-the-link.patch new file mode 100644 index 00000000000..16b8654cb28 --- /dev/null +++ b/queue-6.10/drm-i915-dp-reset-intel_dp-link_trained-before-retraining-the-link.patch @@ -0,0 +1,42 @@ +From d13e2a6e95e6b87f571c837c71a3d05691def9bb Mon Sep 17 00:00:00 2001 +From: Imre Deak +Date: Mon, 8 Jul 2024 22:00:24 +0300 +Subject: drm/i915/dp: Reset intel_dp->link_trained before retraining the link +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Imre Deak + +commit d13e2a6e95e6b87f571c837c71a3d05691def9bb upstream. + +Regularly retraining a link during an atomic commit happens with the +given pipe/link already disabled and hence intel_dp->link_trained being +false. Ensure this also for retraining a DP SST link via direct calls to +the link training functions (vs. an actual commit as for DP MST). So far +nothing depended on this, however the next patch will depend on +link_trained==false for changing the LTTPR mode to non-transparent. + +Cc: # v5.15+ +Cc: Ville Syrjälä +Reviewed-by: Ankit Nautiyal +Signed-off-by: Imre Deak +Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-2-imre.deak@intel.com +(cherry picked from commit a4d5ce61765c08ab364aa4b327f6739b646e6cfa) +Signed-off-by: Tvrtko Ursulin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/intel_dp.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/gpu/drm/i915/display/intel_dp.c ++++ b/drivers/gpu/drm/i915/display/intel_dp.c +@@ -5267,6 +5267,8 @@ int intel_dp_retrain_link(struct intel_e + !intel_dp_mst_is_master_trans(crtc_state)) + continue; + ++ intel_dp->link_trained = false; ++ + intel_dp_check_frl_training(intel_dp); + intel_dp_pcon_dsc_configure(intel_dp, crtc_state); + intel_dp_start_link_train(intel_dp, crtc_state); diff --git a/queue-6.10/drm-i915-gt-do-not-consider-preemption-during-execlists_dequeue-for-gen8.patch b/queue-6.10/drm-i915-gt-do-not-consider-preemption-during-execlists_dequeue-for-gen8.patch new file mode 100644 index 00000000000..aa5f1a9512c --- /dev/null +++ b/queue-6.10/drm-i915-gt-do-not-consider-preemption-during-execlists_dequeue-for-gen8.patch @@ -0,0 +1,63 @@ +From 65564157ae64cec0f527583f96e32f484f730f92 Mon Sep 17 00:00:00 2001 +From: Nitin Gote +Date: Thu, 11 Jul 2024 22:02:08 +0530 +Subject: drm/i915/gt: Do not consider preemption during execlists_dequeue for gen8 + +From: Nitin Gote + +commit 65564157ae64cec0f527583f96e32f484f730f92 upstream. + +We're seeing a GPU hang issue on a CHV platform, which was caused by commit +bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for +Gen8"). + +The Gen8 platform only supports timeslicing and doesn't have a preemption +mechanism, as its engines do not have a preemption timer. + +Commit 751f82b353a6 ("drm/i915/gt: Only disable preemption on Gen8 render +engines") addressed this issue only for render engines. This patch extends +that fix by ensuring that preemption is not considered for all engines on +Gen8 platforms. + +v4: + - Use the correct Fixes tag (Rodrigo Vivi) + - Reworded commit log (Andi Shyti) + +v3: + - Inside need_preempt(), condition of can_preempt() is not required + as simplified can_preempt() is enough. (Chris Wilson) + +v2: Simplify can_preempt() function (Tvrtko Ursulin) + +Fixes: 751f82b353a6 ("drm/i915/gt: Only disable preemption on gen8 render engines") +Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11396 +Suggested-by: Andi Shyti +Signed-off-by: Nitin Gote +Cc: Chris Wilson +CC: # v5.12+ +Reviewed-by: Jonathan Cavitt +Reviewed-by: Andi Shyti +Signed-off-by: Andi Shyti +Link: https://patchwork.freedesktop.org/patch/msgid/20240711163208.1355736-1-nitin.r.gote@intel.com +(cherry picked from commit 7df0be6e6280c6fca01d039864bb123e5e36604b) +Signed-off-by: Tvrtko Ursulin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c ++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +@@ -3315,11 +3315,7 @@ static void remove_from_engine(struct i9 + + static bool can_preempt(struct intel_engine_cs *engine) + { +- if (GRAPHICS_VER(engine->i915) > 8) +- return true; +- +- /* GPGPU on bdw requires extra w/a; not implemented */ +- return engine->class != RENDER_CLASS; ++ return GRAPHICS_VER(engine->i915) > 8; + } + + static void kick_execlists(const struct i915_request *rq, int prio) diff --git a/queue-6.10/drm-panfrost-mark-simple_ondemand-governor-as-softdep.patch b/queue-6.10/drm-panfrost-mark-simple_ondemand-governor-as-softdep.patch new file mode 100644 index 00000000000..f2de69e8867 --- /dev/null +++ b/queue-6.10/drm-panfrost-mark-simple_ondemand-governor-as-softdep.patch @@ -0,0 +1,64 @@ +From 80f4e62730a91572b7fdc657f7bb747e107ae308 Mon Sep 17 00:00:00 2001 +From: Dragan Simic +Date: Mon, 17 Jun 2024 22:17:48 +0200 +Subject: drm/panfrost: Mark simple_ondemand governor as softdep + +From: Dragan Simic + +commit 80f4e62730a91572b7fdc657f7bb747e107ae308 upstream. + +Panfrost DRM driver uses devfreq to perform DVFS, while using simple_ondemand +devfreq governor by default. This causes driver initialization to fail on +boot when simple_ondemand governor isn't built into the kernel statically, +as a result of the missing module dependency and, consequently, the required +governor module not being included in the initial ramdisk. Thus, let's mark +simple_ondemand governor as a softdep for Panfrost, to have its kernel module +included in the initial ramdisk. + +This is a rather longstanding issue that has forced distributions to build +devfreq governors statically into their kernels, [1][2] or has forced users +to introduce some unnecessary workarounds. [3] + +For future reference, not having support for the simple_ondemand governor in +the initial ramdisk produces errors in the kernel log similar to these below, +which were taken from a Pine64 RockPro64: + + panfrost ff9a0000.gpu: [drm:panfrost_devfreq_init [panfrost]] *ERROR* Couldn't initialize GPU devfreq + panfrost ff9a0000.gpu: Fatal error during GPU init + panfrost: probe of ff9a0000.gpu failed with error -22 + +Having simple_ondemand marked as a softdep for Panfrost may not resolve this +issue for all Linux distributions. In particular, it will remain unresolved +for the distributions whose utilities for the initial ramdisk generation do +not handle the available softdep information [4] properly yet. However, some +Linux distributions already handle softdeps properly while generating their +initial ramdisks, [5] and this is a prerequisite step in the right direction +for the distributions that don't handle them properly yet. + +[1] https://gitlab.manjaro.org/manjaro-arm/packages/core/linux/-/blob/linux61/config?ref_type=heads#L8180 +[2] https://salsa.debian.org/kernel-team/linux/-/merge_requests/1066 +[3] https://forum.pine64.org/showthread.php?tid=15458 +[4] https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git/commit/?id=49d8e0b59052999de577ab732b719cfbeb89504d +[5] https://github.com/archlinux/mkinitcpio/commit/97ac4d37aae084a050be512f6d8f4489054668ad + +Cc: Diederik de Haas +Cc: Furkan Kardame +Cc: stable@vger.kernel.org +Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver") +Signed-off-by: Dragan Simic +Reviewed-by: Steven Price +Reviewed-by: Boris Brezillon +Signed-off-by: Steven Price +Link: https://patchwork.freedesktop.org/patch/msgid/4e1e00422a14db4e2a80870afb704405da16fd1b.1718655077.git.dsimic@manjaro.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/panfrost/panfrost_drv.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/panfrost/panfrost_drv.c ++++ b/drivers/gpu/drm/panfrost/panfrost_drv.c +@@ -828,3 +828,4 @@ module_platform_driver(panfrost_driver); + MODULE_AUTHOR("Panfrost Project Developers"); + MODULE_DESCRIPTION("Panfrost DRM Driver"); + MODULE_LICENSE("GPL v2"); ++MODULE_SOFTDEP("pre: governor_simpleondemand"); diff --git a/queue-6.10/drm-udl-remove-drm_connector_poll_hpd.patch b/queue-6.10/drm-udl-remove-drm_connector_poll_hpd.patch new file mode 100644 index 00000000000..8a6fbb9271c --- /dev/null +++ b/queue-6.10/drm-udl-remove-drm_connector_poll_hpd.patch @@ -0,0 +1,41 @@ +From 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Fri, 10 May 2024 17:47:08 +0200 +Subject: drm/udl: Remove DRM_CONNECTOR_POLL_HPD + +From: Thomas Zimmermann + +commit 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc upstream. + +DisplayLink devices do not generate hotplug events. Remove the poll +flag DRM_CONNECTOR_POLL_HPD, as it may not be specified together with +DRM_CONNECTOR_POLL_CONNECT or DRM_CONNECTOR_POLL_DISCONNECT. + +Signed-off-by: Thomas Zimmermann +Fixes: afdfc4c6f55f ("drm/udl: Fixed problem with UDL adpater reconnection") +Reviewed-by: Jani Nikula +Cc: Robert Tarasov +Cc: Alex Deucher +Cc: Dave Airlie +Cc: Sean Paul +Cc: Thomas Zimmermann +Cc: dri-devel@lists.freedesktop.org +Cc: # v4.15+ +Link: https://patchwork.freedesktop.org/patch/msgid/20240510154841.11370-2-tzimmermann@suse.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/udl/udl_modeset.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/gpu/drm/udl/udl_modeset.c ++++ b/drivers/gpu/drm/udl/udl_modeset.c +@@ -527,8 +527,7 @@ struct drm_connector *udl_connector_init + + drm_connector_helper_add(connector, &udl_connector_helper_funcs); + +- connector->polled = DRM_CONNECTOR_POLL_HPD | +- DRM_CONNECTOR_POLL_CONNECT | ++ connector->polled = DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT; + + return connector; diff --git a/queue-6.10/mips-dts-loongson-add-isa-node.patch b/queue-6.10/mips-dts-loongson-add-isa-node.patch new file mode 100644 index 00000000000..68b7f10997f --- /dev/null +++ b/queue-6.10/mips-dts-loongson-add-isa-node.patch @@ -0,0 +1,38 @@ +From da3f62466e5afc752f8b72146bbc4700dbba5a9f Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Fri, 14 Jun 2024 16:40:13 +0100 +Subject: MIPS: dts: loongson: Add ISA node + +From: Jiaxun Yang + +commit da3f62466e5afc752f8b72146bbc4700dbba5a9f upstream. + +ISA node is required by Loongson64 platforms to initialize +PIO support. + +Kernel will hang at boot without ISA node. + +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi ++++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +@@ -52,6 +52,13 @@ + 0 0x40000000 0 0x40000000 0 0x40000000 + 0xfe 0x00000000 0xfe 0x00000000 0 0x40000000>; + ++ isa@18000000 { ++ compatible = "isa"; ++ #size-cells = <1>; ++ #address-cells = <2>; ++ ranges = <1 0x0 0x0 0x18000000 0x4000>; ++ }; ++ + pm: reset-controller@1fe07000 { + compatible = "loongson,ls2k-pm"; + reg = <0 0x1fe07000 0 0x422>; diff --git a/queue-6.10/mips-dts-loongson-fix-gmac-phy-node.patch b/queue-6.10/mips-dts-loongson-fix-gmac-phy-node.patch new file mode 100644 index 00000000000..6914f26fa47 --- /dev/null +++ b/queue-6.10/mips-dts-loongson-fix-gmac-phy-node.patch @@ -0,0 +1,44 @@ +From 813c18d1ca1987afaf47e035152e1baa1375b1b2 Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Fri, 14 Jun 2024 16:40:12 +0100 +Subject: MIPS: dts: loongson: Fix GMAC phy node + +From: Jiaxun Yang + +commit 813c18d1ca1987afaf47e035152e1baa1375b1b2 upstream. + +phy-mode should be rgmii-id to match hardware configuration. + +Also there should be a phy-handle to reference phy node. + +Fixes: f8a11425075f ("MIPS: Loongson64: Add GMAC support for Loongson-2K1000") +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi ++++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +@@ -144,7 +144,8 @@ + <13 IRQ_TYPE_LEVEL_LOW>; + interrupt-names = "macirq", "eth_lpi"; + interrupt-parent = <&liointc0>; +- phy-mode = "rgmii"; ++ phy-mode = "rgmii-id"; ++ phy-handle = <&phy1>; + mdio { + #address-cells = <1>; + #size-cells = <0>; +@@ -167,7 +168,8 @@ + <15 IRQ_TYPE_LEVEL_LOW>; + interrupt-names = "macirq", "eth_lpi"; + interrupt-parent = <&liointc0>; +- phy-mode = "rgmii"; ++ phy-mode = "rgmii-id"; ++ phy-handle = <&phy1>; + mdio { + #address-cells = <1>; + #size-cells = <0>; diff --git a/queue-6.10/mips-ip30-ip30-console-add-missing-include.patch b/queue-6.10/mips-ip30-ip30-console-add-missing-include.patch new file mode 100644 index 00000000000..903a3b61b51 --- /dev/null +++ b/queue-6.10/mips-ip30-ip30-console-add-missing-include.patch @@ -0,0 +1,36 @@ +From 8de4ed75bd14ed197119ac509c6902a8561e0c1c Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Sun, 16 Jun 2024 18:54:24 +0100 +Subject: MIPS: ip30: ip30-console: Add missing include +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jiaxun Yang + +commit 8de4ed75bd14ed197119ac509c6902a8561e0c1c upstream. + +Include linux/processor.h to fix build error: + +arch/mips/sgi-ip30/ip30-console.c: In function ‘prom_putchar’: +arch/mips/sgi-ip30/ip30-console.c:21:17: error: implicit declaration of function ‘cpu_relax’ [-Werror=implicit-function-declaration] + 21 | cpu_relax(); + +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/sgi-ip30/ip30-console.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/mips/sgi-ip30/ip30-console.c ++++ b/arch/mips/sgi-ip30/ip30-console.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + + #include ++#include + + #include + #include diff --git a/queue-6.10/mips-loongson64-env-hook-up-loongsson-2k.patch b/queue-6.10/mips-loongson64-env-hook-up-loongsson-2k.patch new file mode 100644 index 00000000000..84fc8c5a902 --- /dev/null +++ b/queue-6.10/mips-loongson64-env-hook-up-loongsson-2k.patch @@ -0,0 +1,65 @@ +From 77543269ff23c75bebfb8e6e9a1177b350908ea7 Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Fri, 14 Jun 2024 16:40:18 +0100 +Subject: MIPS: Loongson64: env: Hook up Loongsson-2K + +From: Jiaxun Yang + +commit 77543269ff23c75bebfb8e6e9a1177b350908ea7 upstream. + +Somehow those enablement bits were left over when we were +adding initial Loongson-2K support. + +Set up basic information and select proper builtin DTB for +Loongson-2K. + +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/include/asm/mach-loongson64/boot_param.h | 2 ++ + arch/mips/loongson64/env.c | 8 ++++++++ + 2 files changed, 10 insertions(+) + +--- a/arch/mips/include/asm/mach-loongson64/boot_param.h ++++ b/arch/mips/include/asm/mach-loongson64/boot_param.h +@@ -42,12 +42,14 @@ enum loongson_cpu_type { + Legacy_1B = 0x5, + Legacy_2G = 0x6, + Legacy_2H = 0x7, ++ Legacy_2K = 0x8, + Loongson_1A = 0x100, + Loongson_1B = 0x101, + Loongson_2E = 0x200, + Loongson_2F = 0x201, + Loongson_2G = 0x202, + Loongson_2H = 0x203, ++ Loongson_2K = 0x204, + Loongson_3A = 0x300, + Loongson_3B = 0x301 + }; +--- a/arch/mips/loongson64/env.c ++++ b/arch/mips/loongson64/env.c +@@ -88,6 +88,12 @@ void __init prom_lefi_init_env(void) + cpu_clock_freq = ecpu->cpu_clock_freq; + loongson_sysconf.cputype = ecpu->cputype; + switch (ecpu->cputype) { ++ case Legacy_2K: ++ case Loongson_2K: ++ smp_group[0] = 0x900000001fe11000; ++ loongson_sysconf.cores_per_node = 2; ++ loongson_sysconf.cores_per_package = 2; ++ break; + case Legacy_3A: + case Loongson_3A: + loongson_sysconf.cores_per_node = 4; +@@ -221,6 +227,8 @@ void __init prom_lefi_init_env(void) + default: + break; + } ++ } else if ((read_c0_prid() & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) { ++ loongson_fdt_blob = __dtb_loongson64_2core_2k1000_begin; + } else if ((read_c0_prid() & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G) { + if (loongson_sysconf.bridgetype == LS7A) + loongson_fdt_blob = __dtb_loongson64g_4core_ls7a_begin; diff --git a/queue-6.10/mips-loongson64-remove-memory-node-for-builtin-dtb.patch b/queue-6.10/mips-loongson64-remove-memory-node-for-builtin-dtb.patch new file mode 100644 index 00000000000..cad383386d9 --- /dev/null +++ b/queue-6.10/mips-loongson64-remove-memory-node-for-builtin-dtb.patch @@ -0,0 +1,40 @@ +From b81656c37acf1e682dde02f3e07987784b0f3634 Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Fri, 14 Jun 2024 16:40:09 +0100 +Subject: MIPS: Loongson64: Remove memory node for builtin-dtb + +From: Jiaxun Yang + +commit b81656c37acf1e682dde02f3e07987784b0f3634 upstream. + +Builtin DTBS should never contain memory node as memory is +going to be managed by LEFI interface. + +Remove memory node to prevent confliction. + +Fixes: b1a792601f26 ("MIPS: Loongson64: DeviceTree for Loongson-2K1000") +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi | 8 -------- + 1 file changed, 8 deletions(-) + +--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi ++++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +@@ -23,14 +23,6 @@ + }; + }; + +- memory@200000 { +- compatible = "memory"; +- device_type = "memory"; +- reg = <0x00000000 0x00200000 0x00000000 0x0ee00000>, /* 238 MB at 2 MB */ +- <0x00000000 0x20000000 0x00000000 0x1f000000>, /* 496 MB at 512 MB */ +- <0x00000001 0x10000000 0x00000001 0xb0000000>; /* 6912 MB at 4352MB */ +- }; +- + cpu_clk: cpu_clk { + #clock-cells = <0>; + compatible = "fixed-clock"; diff --git a/queue-6.10/mips-loongson64-reset-prioritise-firmware-service.patch b/queue-6.10/mips-loongson64-reset-prioritise-firmware-service.patch new file mode 100644 index 00000000000..0056d4ce5ef --- /dev/null +++ b/queue-6.10/mips-loongson64-reset-prioritise-firmware-service.patch @@ -0,0 +1,102 @@ +From 4e7ca0b57f3bc09ba3e4ab86bf6b7c35134bfd04 Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Fri, 14 Jun 2024 16:40:16 +0100 +Subject: MIPS: Loongson64: reset: Prioritise firmware service + +From: Jiaxun Yang + +commit 4e7ca0b57f3bc09ba3e4ab86bf6b7c35134bfd04 upstream. + +We should always use firmware's poweroff & reboot service +if it's available as firmware may need to perform more task +than platform's syscon etc. + +However _machine_restart & poweroff hooks are registered at +low priority, which means platform reboot driver can override +them. + +Register firmware based reboot/poweroff implementation with +register_sys_off_handler with appropriate priority so that +they will be prioritised. Remove _machine_halt hook as it's +deemed to be unnecessary. + +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/loongson64/reset.c | 38 ++++++++++++++++---------------------- + 1 file changed, 16 insertions(+), 22 deletions(-) + +--- a/arch/mips/loongson64/reset.c ++++ b/arch/mips/loongson64/reset.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -21,36 +22,21 @@ + #include + #include + +-static void loongson_restart(char *command) ++static int firmware_restart(struct sys_off_data *unusedd) + { + + void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr; + + fw_restart(); +- while (1) { +- if (cpu_wait) +- cpu_wait(); +- } ++ return NOTIFY_DONE; + } + +-static void loongson_poweroff(void) ++static int firmware_poweroff(struct sys_off_data *unused) + { + void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr; + + fw_poweroff(); +- while (1) { +- if (cpu_wait) +- cpu_wait(); +- } +-} +- +-static void loongson_halt(void) +-{ +- pr_notice("\n\n** You can safely turn off the power now **\n\n"); +- while (1) { +- if (cpu_wait) +- cpu_wait(); +- } ++ return NOTIFY_DONE; + } + + #ifdef CONFIG_KEXEC_CORE +@@ -154,9 +140,17 @@ static void loongson_crash_shutdown(stru + + static int __init mips_reboot_setup(void) + { +- _machine_restart = loongson_restart; +- _machine_halt = loongson_halt; +- pm_power_off = loongson_poweroff; ++ if (loongson_sysconf.restart_addr) { ++ register_sys_off_handler(SYS_OFF_MODE_RESTART, ++ SYS_OFF_PRIO_FIRMWARE, ++ firmware_restart, NULL); ++ } ++ ++ if (loongson_sysconf.poweroff_addr) { ++ register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, ++ SYS_OFF_PRIO_FIRMWARE, ++ firmware_poweroff, NULL); ++ } + + #ifdef CONFIG_KEXEC_CORE + kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); diff --git a/queue-6.10/mips-loongson64-test-register-availability-before-use.patch b/queue-6.10/mips-loongson64-test-register-availability-before-use.patch new file mode 100644 index 00000000000..5721968f8a7 --- /dev/null +++ b/queue-6.10/mips-loongson64-test-register-availability-before-use.patch @@ -0,0 +1,85 @@ +From c04366b1207a036b7de02dfcc1ac7138d3343c9b Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Fri, 14 Jun 2024 16:40:14 +0100 +Subject: MIPS: Loongson64: Test register availability before use + +From: Jiaxun Yang + +commit c04366b1207a036b7de02dfcc1ac7138d3343c9b upstream. + +Some global register address variable may be missing on +specific CPU type, test them before use them. + +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/loongson64/smp.c | 23 +++++++++++++++++++++-- + 1 file changed, 21 insertions(+), 2 deletions(-) + +--- a/arch/mips/loongson64/smp.c ++++ b/arch/mips/loongson64/smp.c +@@ -466,12 +466,25 @@ static void loongson3_smp_finish(void) + static void __init loongson3_smp_setup(void) + { + int i = 0, num = 0; /* i: physical id, num: logical id */ ++ int max_cpus = 0; + + init_cpu_possible(cpu_none_mask); + ++ for (i = 0; i < ARRAY_SIZE(smp_group); i++) { ++ if (!smp_group[i]) ++ break; ++ max_cpus += loongson_sysconf.cores_per_node; ++ } ++ ++ if (max_cpus < loongson_sysconf.nr_cpus) { ++ pr_err("SMP Groups are less than the number of CPUs\n"); ++ loongson_sysconf.nr_cpus = max_cpus ? max_cpus : 1; ++ } ++ + /* For unified kernel, NR_CPUS is the maximum possible value, + * loongson_sysconf.nr_cpus is the really present value + */ ++ i = 0; + while (i < loongson_sysconf.nr_cpus) { + if (loongson_sysconf.reserved_cpus_mask & (1< +Date: Fri, 21 Jun 2024 11:16:01 +0200 +Subject: perf: Fix event leak upon exec and file release + +From: Frederic Weisbecker + +commit 3a5465418f5fd970e86a86c7f4075be262682840 upstream. + +The perf pending task work is never waited upon the matching event +release. In the case of a child event, released via free_event() +directly, this can potentially result in a leaked event, such as in the +following scenario that doesn't even require a weak IRQ work +implementation to trigger: + +schedule() + prepare_task_switch() +=======> + perf_event_overflow() + event->pending_sigtrap = ... + irq_work_queue(&event->pending_irq) +<======= + perf_event_task_sched_out() + event_sched_out() + event->pending_sigtrap = 0; + atomic_long_inc_not_zero(&event->refcount) + task_work_add(&event->pending_task) + finish_lock_switch() +=======> + perf_pending_irq() + //do nothing, rely on pending task work +<======= + +begin_new_exec() + perf_event_exit_task() + perf_event_exit_event() + // If is child event + free_event() + WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1) + // event is leaked + +Similar scenarios can also happen with perf_event_remove_on_exec() or +simply against concurrent perf_event_release(). + +Fix this with synchonizing against the possibly remaining pending task +work while freeing the event, just like is done with remaining pending +IRQ work. This means that the pending task callback neither need nor +should hold a reference to the event, preventing it from ever beeing +freed. + +Fixes: 517e6a301f34 ("perf: Fix perf_pending_task() UaF") +Signed-off-by: Frederic Weisbecker +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240621091601.18227-5-frederic@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/perf_event.h | 1 + + kernel/events/core.c | 38 ++++++++++++++++++++++++++++++++++---- + 2 files changed, 35 insertions(+), 4 deletions(-) + +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -786,6 +786,7 @@ struct perf_event { + struct irq_work pending_irq; + struct callback_head pending_task; + unsigned int pending_work; ++ struct rcuwait pending_work_wait; + + atomic_t event_limit; + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2288,7 +2288,6 @@ event_sched_out(struct perf_event *event + if (state != PERF_EVENT_STATE_OFF && + !event->pending_work && + !task_work_add(current, &event->pending_task, TWA_RESUME)) { +- WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); + event->pending_work = 1; + } else { + local_dec(&event->ctx->nr_pending); +@@ -5203,9 +5202,35 @@ static bool exclusive_event_installable( + static void perf_addr_filters_splice(struct perf_event *event, + struct list_head *head); + ++static void perf_pending_task_sync(struct perf_event *event) ++{ ++ struct callback_head *head = &event->pending_task; ++ ++ if (!event->pending_work) ++ return; ++ /* ++ * If the task is queued to the current task's queue, we ++ * obviously can't wait for it to complete. Simply cancel it. ++ */ ++ if (task_work_cancel(current, head)) { ++ event->pending_work = 0; ++ local_dec(&event->ctx->nr_pending); ++ return; ++ } ++ ++ /* ++ * All accesses related to the event are within the same ++ * non-preemptible section in perf_pending_task(). The RCU ++ * grace period before the event is freed will make sure all ++ * those accesses are complete by then. ++ */ ++ rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE); ++} ++ + static void _free_event(struct perf_event *event) + { + irq_work_sync(&event->pending_irq); ++ perf_pending_task_sync(event); + + unaccount_event(event); + +@@ -6831,23 +6856,27 @@ static void perf_pending_task(struct cal + int rctx; + + /* ++ * All accesses to the event must belong to the same implicit RCU read-side ++ * critical section as the ->pending_work reset. See comment in ++ * perf_pending_task_sync(). ++ */ ++ preempt_disable_notrace(); ++ /* + * If we 'fail' here, that's OK, it means recursion is already disabled + * and we won't recurse 'further'. + */ +- preempt_disable_notrace(); + rctx = perf_swevent_get_recursion_context(); + + if (event->pending_work) { + event->pending_work = 0; + perf_sigtrap(event); + local_dec(&event->ctx->nr_pending); ++ rcuwait_wake_up(&event->pending_work_wait); + } + + if (rctx >= 0) + perf_swevent_put_recursion_context(rctx); + preempt_enable_notrace(); +- +- put_event(event); + } + + #ifdef CONFIG_GUEST_PERF_EVENTS +@@ -11961,6 +11990,7 @@ perf_event_alloc(struct perf_event_attr + init_waitqueue_head(&event->waitq); + init_irq_work(&event->pending_irq, perf_pending_irq); + init_task_work(&event->pending_task, perf_pending_task); ++ rcuwait_init(&event->pending_work_wait); + + mutex_init(&event->mmap_mutex); + raw_spin_lock_init(&event->addr_filters.lock); diff --git a/queue-6.10/perf-fix-event-leak-upon-exit.patch b/queue-6.10/perf-fix-event-leak-upon-exit.patch new file mode 100644 index 00000000000..ac4435b0acb --- /dev/null +++ b/queue-6.10/perf-fix-event-leak-upon-exit.patch @@ -0,0 +1,88 @@ +From 2fd5ad3f310de22836cdacae919dd99d758a1f1b Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Fri, 21 Jun 2024 11:16:00 +0200 +Subject: perf: Fix event leak upon exit + +From: Frederic Weisbecker + +commit 2fd5ad3f310de22836cdacae919dd99d758a1f1b upstream. + +When a task is scheduled out, pending sigtrap deliveries are deferred +to the target task upon resume to userspace via task_work. + +However failures while adding an event's callback to the task_work +engine are ignored. And since the last call for events exit happen +after task work is eventually closed, there is a small window during +which pending sigtrap can be queued though ignored, leaking the event +refcount addition such as in the following scenario: + + TASK A + ----- + + do_exit() + exit_task_work(tsk); + + + perf_event_overflow() + event->pending_sigtrap = pending_id; + irq_work_queue(&event->pending_irq); + + =========> PREEMPTION: TASK A -> TASK B + event_sched_out() + event->pending_sigtrap = 0; + atomic_long_inc_not_zero(&event->refcount) + // FAILS: task work has exited + task_work_add(&event->pending_task) + [...] + + perf_pending_irq() + // early return: event->oncpu = -1 + + [...] + =========> TASK B -> TASK A + perf_event_exit_task(tsk) + perf_event_exit_event() + free_event() + WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1) + // leak event due to unexpected refcount == 2 + +As a result the event is never released while the task exits. + +Fix this with appropriate task_work_add()'s error handling. + +Fixes: 517e6a301f34 ("perf: Fix perf_pending_task() UaF") +Signed-off-by: Frederic Weisbecker +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240621091601.18227-4-frederic@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/events/core.c | 13 +++++-------- + 1 file changed, 5 insertions(+), 8 deletions(-) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2284,18 +2284,15 @@ event_sched_out(struct perf_event *event + } + + if (event->pending_sigtrap) { +- bool dec = true; +- + event->pending_sigtrap = 0; + if (state != PERF_EVENT_STATE_OFF && +- !event->pending_work) { +- event->pending_work = 1; +- dec = false; ++ !event->pending_work && ++ !task_work_add(current, &event->pending_task, TWA_RESUME)) { + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); +- task_work_add(current, &event->pending_task, TWA_RESUME); +- } +- if (dec) ++ event->pending_work = 1; ++ } else { + local_dec(&event->ctx->nr_pending); ++ } + } + + perf_event_set_state(event, state); diff --git a/queue-6.10/perf-stat-fix-the-hard-coded-metrics-calculation-on-the-hybrid.patch b/queue-6.10/perf-stat-fix-the-hard-coded-metrics-calculation-on-the-hybrid.patch new file mode 100644 index 00000000000..04cb409fdd4 --- /dev/null +++ b/queue-6.10/perf-stat-fix-the-hard-coded-metrics-calculation-on-the-hybrid.patch @@ -0,0 +1,59 @@ +From 3612ca8e2935c4c142d99e33b8effa7045ce32b5 Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Thu, 6 Jun 2024 11:03:16 -0700 +Subject: perf stat: Fix the hard-coded metrics calculation on the hybrid + +From: Kan Liang + +commit 3612ca8e2935c4c142d99e33b8effa7045ce32b5 upstream. + +The hard-coded metrics is wrongly calculated on the hybrid machine. + +$ perf stat -e cycles,instructions -a sleep 1 + + Performance counter stats for 'system wide': + + 18,205,487 cpu_atom/cycles/ + 9,733,603 cpu_core/cycles/ + 9,423,111 cpu_atom/instructions/ # 0.52 insn per cycle + 4,268,965 cpu_core/instructions/ # 0.23 insn per cycle + +The insn per cycle for cpu_core should be 4,268,965 / 9,733,603 = 0.44. + +When finding the metric events, the find_stat() doesn't take the PMU +type into account. The cpu_atom/cycles/ is wrongly used to calculate +the IPC of the cpu_core. + +In the hard-coded metrics, the events from a different PMU are only +SW_CPU_CLOCK and SW_TASK_CLOCK. They both have the stat type, +STAT_NSECS. Except the SW CLOCK events, check the PMU type as well. + +Fixes: 0a57b910807a ("perf stat: Use counts rather than saved_value") +Reported-by: Khalil, Amiri +Reviewed-by: Ian Rogers +Signed-off-by: Kan Liang +Acked-by: Namhyung Kim +Cc: stable@vger.kernel.org +Signed-off-by: Namhyung Kim +Link: https://lore.kernel.org/r/20240606180316.4122904-1-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + tools/perf/util/stat-shadow.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/tools/perf/util/stat-shadow.c ++++ b/tools/perf/util/stat-shadow.c +@@ -176,6 +176,13 @@ static double find_stat(const struct evs + if (type != evsel__stat_type(cur)) + continue; + ++ /* ++ * Except the SW CLOCK events, ++ * ignore if not the PMU we're looking for. ++ */ ++ if ((type != STAT_NSECS) && (evsel->pmu != cur->pmu)) ++ continue; ++ + aggr = &cur->stats->aggr[aggr_idx]; + if (type == STAT_NSECS) + return aggr->counts.val; diff --git a/queue-6.10/perf-x86-intel-ds-fix-non-0-retire-latency-on-raptorlake.patch b/queue-6.10/perf-x86-intel-ds-fix-non-0-retire-latency-on-raptorlake.patch new file mode 100644 index 00000000000..b4166d436a0 --- /dev/null +++ b/queue-6.10/perf-x86-intel-ds-fix-non-0-retire-latency-on-raptorlake.patch @@ -0,0 +1,57 @@ +From e5f32ad56b22ebe384a6e7ddad6e9520c5495563 Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Mon, 8 Jul 2024 12:33:36 -0700 +Subject: perf/x86/intel/ds: Fix non 0 retire latency on Raptorlake +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Kan Liang + +commit e5f32ad56b22ebe384a6e7ddad6e9520c5495563 upstream. + +A non-0 retire latency can be observed on a Raptorlake which doesn't +support the retire latency feature. +By design, the retire latency shares the PERF_SAMPLE_WEIGHT_STRUCT +sample type with other types of latency. That could avoid adding too +many different sample types to support all kinds of latency. For the +machine which doesn't support some kind of latency, 0 should be +returned. + +Perf doesn’t clear/init all the fields of a sample data for the sake +of performance. It expects the later perf_{prepare,output}_sample() to +update the uninitialized field. However, the current implementation +doesn't touch the field of the retire latency if the feature is not +supported. The memory garbage is dumped into the perf data. + +Clear the retire latency if the feature is not supported. + +Fixes: c87a31093c70 ("perf/x86: Support Retire Latency") +Reported-by: "Bayduraev, Alexey V" +Signed-off-by: Kan Liang +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: "Bayduraev, Alexey V" +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20240708193336.1192217-4-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/ds.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/events/intel/ds.c ++++ b/arch/x86/events/intel/ds.c +@@ -1831,8 +1831,12 @@ static void setup_pebs_adaptive_sample_d + set_linear_ip(regs, basic->ip); + regs->flags = PERF_EFLAGS_EXACT; + +- if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)) +- data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK; ++ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { ++ if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY) ++ data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK; ++ else ++ data->weight.var3_w = 0; ++ } + + /* + * The record for MEMINFO is in front of GP diff --git a/queue-6.10/perf-x86-intel-pt-fix-a-topa_entry-base-address-calculation.patch b/queue-6.10/perf-x86-intel-pt-fix-a-topa_entry-base-address-calculation.patch new file mode 100644 index 00000000000..6b3c12a8682 --- /dev/null +++ b/queue-6.10/perf-x86-intel-pt-fix-a-topa_entry-base-address-calculation.patch @@ -0,0 +1,46 @@ +From ad97196379d0b8cb24ef3d5006978a6554e6467f Mon Sep 17 00:00:00 2001 +From: Adrian Hunter +Date: Mon, 24 Jun 2024 23:10:56 +0300 +Subject: perf/x86/intel/pt: Fix a topa_entry base address calculation + +From: Adrian Hunter + +commit ad97196379d0b8cb24ef3d5006978a6554e6467f upstream. + +topa_entry->base is a bit-field. Bit-fields are not promoted to a 64-bit +type, even if the underlying type is 64-bit, and so, if necessary, must +be cast to a larger type when calculations are done. + +Fix a topa_entry->base address calculation by adding a cast. + +Without the cast, the address was limited to 36-bits i.e. 64GiB. + +The address calculation is used on systems that do not support Multiple +Entry ToPA (only Broadwell), and affects physical addresses on or above +64GiB. Instead of writing to the correct address, the address comprising +the first 36 bits would be written to. + +Intel PT snapshot and sampling modes are not affected. + +Fixes: 52ca9ced3f70 ("perf/x86/intel/pt: Add Intel PT PMU driver") +Reported-by: Dave Hansen +Signed-off-by: Adrian Hunter +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240624201101.60186-3-adrian.hunter@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/pt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/events/intel/pt.c ++++ b/arch/x86/events/intel/pt.c +@@ -878,7 +878,7 @@ static void pt_update_head(struct pt *pt + */ + static void *pt_buffer_region(struct pt_buffer *buf) + { +- return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT); ++ return phys_to_virt((phys_addr_t)TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT); + } + + /** diff --git a/queue-6.10/perf-x86-intel-pt-fix-topa_entry-base-length.patch b/queue-6.10/perf-x86-intel-pt-fix-topa_entry-base-length.patch new file mode 100644 index 00000000000..1bb968c07ce --- /dev/null +++ b/queue-6.10/perf-x86-intel-pt-fix-topa_entry-base-length.patch @@ -0,0 +1,44 @@ +From 5638bd722a44bbe97c1a7b3fae5b9efddb3e70ff Mon Sep 17 00:00:00 2001 +From: Marco Cavenati +Date: Mon, 24 Jun 2024 23:10:55 +0300 +Subject: perf/x86/intel/pt: Fix topa_entry base length + +From: Marco Cavenati + +commit 5638bd722a44bbe97c1a7b3fae5b9efddb3e70ff upstream. + +topa_entry->base needs to store a pfn. It obviously needs to be +large enough to store the largest possible x86 pfn which is +MAXPHYADDR-PAGE_SIZE (52-12). So it is 4 bits too small. + +Increase the size of topa_entry->base from 36 bits to 40 bits. + +Note, systems where physical addresses can be 256TiB or more are affected. + +[ Adrian: Amend commit message as suggested by Dave Hansen ] + +Fixes: 52ca9ced3f70 ("perf/x86/intel/pt: Add Intel PT PMU driver") +Signed-off-by: Marco Cavenati +Signed-off-by: Adrian Hunter +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Adrian Hunter +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240624201101.60186-2-adrian.hunter@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/pt.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/events/intel/pt.h ++++ b/arch/x86/events/intel/pt.h +@@ -33,8 +33,8 @@ struct topa_entry { + u64 rsvd2 : 1; + u64 size : 4; + u64 rsvd3 : 2; +- u64 base : 36; +- u64 rsvd4 : 16; ++ u64 base : 40; ++ u64 rsvd4 : 12; + }; + + /* TSC to Core Crystal Clock Ratio */ diff --git a/queue-6.10/perf-x86-intel-uncore-fix-the-bits-of-the-cha-extended-umask-for-spr.patch b/queue-6.10/perf-x86-intel-uncore-fix-the-bits-of-the-cha-extended-umask-for-spr.patch new file mode 100644 index 00000000000..f7553652660 --- /dev/null +++ b/queue-6.10/perf-x86-intel-uncore-fix-the-bits-of-the-cha-extended-umask-for-spr.patch @@ -0,0 +1,70 @@ +From a5a6ff3d639d088d4af7e2935e1ee0d8b4e817d4 Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Mon, 8 Jul 2024 11:55:24 -0700 +Subject: perf/x86/intel/uncore: Fix the bits of the CHA extended umask for SPR + +From: Kan Liang + +commit a5a6ff3d639d088d4af7e2935e1ee0d8b4e817d4 upstream. + +The perf stat errors out with UNC_CHA_TOR_INSERTS.IA_HIT_CXL_ACC_LOCAL +event. + + $perf stat -e uncore_cha_55/event=0x35,umask=0x10c0008101/ -a -- ls + event syntax error: '..0x35,umask=0x10c0008101/' + \___ Bad event or PMU + +The definition of the CHA umask is config:8-15,32-55, which is 32bit. +However, the umask of the event is bigger than 32bit. +This is an error in the original uncore spec. + +Add a new umask_ext5 for the new CHA umask range. + +Fixes: 949b11381f81 ("perf/x86/intel/uncore: Add Sapphire Rapids server CHA support") +Closes: https://lore.kernel.org/linux-perf-users/alpine.LRH.2.20.2401300733310.11354@Diego/ +Signed-off-by: Kan Liang +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Ian Rogers +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20240708185524.1185505-1-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/uncore_snbep.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/x86/events/intel/uncore_snbep.c ++++ b/arch/x86/events/intel/uncore_snbep.c +@@ -462,6 +462,7 @@ + #define SPR_UBOX_DID 0x3250 + + /* SPR CHA */ ++#define SPR_CHA_EVENT_MASK_EXT 0xffffffff + #define SPR_CHA_PMON_CTL_TID_EN (1 << 16) + #define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SPR_CHA_PMON_CTL_TID_EN) +@@ -478,6 +479,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext, uma + DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); + DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); + DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55"); ++DEFINE_UNCORE_FORMAT_ATTR(umask_ext5, umask, "config:8-15,32-63"); + DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); + DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); + DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); +@@ -5958,7 +5960,7 @@ static struct intel_uncore_ops spr_uncor + + static struct attribute *spr_uncore_cha_formats_attr[] = { + &format_attr_event.attr, +- &format_attr_umask_ext4.attr, ++ &format_attr_umask_ext5.attr, + &format_attr_tid_en2.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, +@@ -5994,7 +5996,7 @@ ATTRIBUTE_GROUPS(uncore_alias); + static struct intel_uncore_type spr_uncore_chabox = { + .name = "cha", + .event_mask = SPR_CHA_PMON_EVENT_MASK, +- .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, ++ .event_mask_ext = SPR_CHA_EVENT_MASK_EXT, + .num_shared_regs = 1, + .constraints = skx_uncore_chabox_constraints, + .ops = &spr_uncore_chabox_ops, diff --git a/queue-6.10/platform-mips-cpu_hwmon-disable-driver-on-unsupported-hardware.patch b/queue-6.10/platform-mips-cpu_hwmon-disable-driver-on-unsupported-hardware.patch new file mode 100644 index 00000000000..1b2e55af9b9 --- /dev/null +++ b/queue-6.10/platform-mips-cpu_hwmon-disable-driver-on-unsupported-hardware.patch @@ -0,0 +1,32 @@ +From f4d430db17b4ef4e9c3c352a04b2fe3c93011978 Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Fri, 14 Jun 2024 16:40:15 +0100 +Subject: platform: mips: cpu_hwmon: Disable driver on unsupported hardware + +From: Jiaxun Yang + +commit f4d430db17b4ef4e9c3c352a04b2fe3c93011978 upstream. + +cpu_hwmon is unsupported on CPUs without loongson_chiptemp +register and csr. + +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/mips/cpu_hwmon.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/platform/mips/cpu_hwmon.c ++++ b/drivers/platform/mips/cpu_hwmon.c +@@ -139,6 +139,9 @@ static int __init loongson_hwmon_init(vo + csr_temp_enable = csr_readl(LOONGSON_CSR_FEATURES) & + LOONGSON_CSRF_TEMP; + ++ if (!csr_temp_enable && !loongson_chiptemp[0]) ++ return -ENODEV; ++ + nr_packages = loongson_sysconf.nr_cpus / + loongson_sysconf.cores_per_package; + diff --git a/queue-6.10/rbd-don-t-assume-rbd_is_lock_owner-for-exclusive-mappings.patch b/queue-6.10/rbd-don-t-assume-rbd_is_lock_owner-for-exclusive-mappings.patch new file mode 100644 index 00000000000..811411a1869 --- /dev/null +++ b/queue-6.10/rbd-don-t-assume-rbd_is_lock_owner-for-exclusive-mappings.patch @@ -0,0 +1,43 @@ +From 3ceccb14f5576e02b81cc8b105ab81f224bd87f6 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Tue, 23 Jul 2024 18:08:08 +0200 +Subject: rbd: don't assume rbd_is_lock_owner() for exclusive mappings + +From: Ilya Dryomov + +commit 3ceccb14f5576e02b81cc8b105ab81f224bd87f6 upstream. + +Expanding on the previous commit, assuming that rbd_is_lock_owner() +always returns true (i.e. that we are either in RBD_LOCK_STATE_LOCKED +or RBD_LOCK_STATE_QUIESCING) if the mapping is exclusive is wrong too. +In case ceph_cls_set_cookie() fails, the lock would be temporarily +released even if the mapping is exclusive, meaning that we can end up +even in RBD_LOCK_STATE_UNLOCKED. + +IOW, exclusive mappings are really "just" about disabling automatic +lock transitions (as documented in the man page), not about grabbing +the lock and holding on to it whatever it takes. + +Cc: stable@vger.kernel.org +Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code") +Signed-off-by: Ilya Dryomov +Reviewed-by: Dongsheng Yang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/rbd.c | 5 ----- + 1 file changed, 5 deletions(-) + +--- a/drivers/block/rbd.c ++++ b/drivers/block/rbd.c +@@ -6589,11 +6589,6 @@ static int rbd_add_acquire_lock(struct r + if (ret) + return ret; + +- /* +- * The lock may have been released by now, unless automatic lock +- * transitions are disabled. +- */ +- rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev)); + return 0; + } + diff --git a/queue-6.10/rdma-iwcm-fix-a-use-after-free-related-to-destroying-cm-ids.patch b/queue-6.10/rdma-iwcm-fix-a-use-after-free-related-to-destroying-cm-ids.patch new file mode 100644 index 00000000000..67ca7793712 --- /dev/null +++ b/queue-6.10/rdma-iwcm-fix-a-use-after-free-related-to-destroying-cm-ids.patch @@ -0,0 +1,74 @@ +From aee2424246f9f1dadc33faa78990c1e2eb7826e4 Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Wed, 5 Jun 2024 08:51:01 -0600 +Subject: RDMA/iwcm: Fix a use-after-free related to destroying CM IDs + +From: Bart Van Assche + +commit aee2424246f9f1dadc33faa78990c1e2eb7826e4 upstream. + +iw_conn_req_handler() associates a new struct rdma_id_private (conn_id) with +an existing struct iw_cm_id (cm_id) as follows: + + conn_id->cm_id.iw = cm_id; + cm_id->context = conn_id; + cm_id->cm_handler = cma_iw_handler; + +rdma_destroy_id() frees both the cm_id and the struct rdma_id_private. Make +sure that cm_work_handler() does not trigger a use-after-free by only +freeing of the struct rdma_id_private after all pending work has finished. + +Cc: stable@vger.kernel.org +Fixes: 59c68ac31e15 ("iw_cm: free cm_id resources on the last deref") +Reviewed-by: Zhu Yanjun +Tested-by: Shin'ichiro Kawasaki +Signed-off-by: Bart Van Assche +Link: https://lore.kernel.org/r/20240605145117.397751-6-bvanassche@acm.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/core/iwcm.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/drivers/infiniband/core/iwcm.c ++++ b/drivers/infiniband/core/iwcm.c +@@ -368,8 +368,10 @@ EXPORT_SYMBOL(iw_cm_disconnect); + * + * Clean up all resources associated with the connection and release + * the initial reference taken by iw_create_cm_id. ++ * ++ * Returns true if and only if the last cm_id_priv reference has been dropped. + */ +-static void destroy_cm_id(struct iw_cm_id *cm_id) ++static bool destroy_cm_id(struct iw_cm_id *cm_id) + { + struct iwcm_id_private *cm_id_priv; + struct ib_qp *qp; +@@ -439,7 +441,7 @@ static void destroy_cm_id(struct iw_cm_i + iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); + } + +- (void)iwcm_deref_id(cm_id_priv); ++ return iwcm_deref_id(cm_id_priv); + } + + /* +@@ -450,7 +452,8 @@ static void destroy_cm_id(struct iw_cm_i + */ + void iw_destroy_cm_id(struct iw_cm_id *cm_id) + { +- destroy_cm_id(cm_id); ++ if (!destroy_cm_id(cm_id)) ++ flush_workqueue(iwcm_wq); + } + EXPORT_SYMBOL(iw_destroy_cm_id); + +@@ -1034,7 +1037,7 @@ static void cm_work_handler(struct work_ + if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { + ret = process_event(cm_id_priv, &levent); + if (ret) +- destroy_cm_id(&cm_id_priv->id); ++ WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id)); + } else + pr_debug("dropping event %d\n", levent.event); + if (iwcm_deref_id(cm_id_priv)) diff --git a/queue-6.10/remoteproc-imx_rproc-fix-refcount-mistake-in-imx_rproc_addr_init.patch b/queue-6.10/remoteproc-imx_rproc-fix-refcount-mistake-in-imx_rproc_addr_init.patch new file mode 100644 index 00000000000..f67b85ed23f --- /dev/null +++ b/queue-6.10/remoteproc-imx_rproc-fix-refcount-mistake-in-imx_rproc_addr_init.patch @@ -0,0 +1,59 @@ +From dce68a49be26abf52712e0ee452a45fa01ab4624 Mon Sep 17 00:00:00 2001 +From: Aleksandr Mishin +Date: Wed, 12 Jun 2024 16:17:14 +0300 +Subject: remoteproc: imx_rproc: Fix refcount mistake in imx_rproc_addr_init + +From: Aleksandr Mishin + +commit dce68a49be26abf52712e0ee452a45fa01ab4624 upstream. + +In imx_rproc_addr_init() strcmp() is performed over the node after the +of_node_put() is performed over it. +Fix this error by moving of_node_put() calls. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: 5e4c1243071d ("remoteproc: imx_rproc: support remote cores booted before Linux Kernel") +Cc: stable@vger.kernel.org +Signed-off-by: Aleksandr Mishin +Link: https://lore.kernel.org/r/20240612131714.12907-1-amishin@t-argos.ru +Signed-off-by: Mathieu Poirier +Signed-off-by: Greg Kroah-Hartman +--- + drivers/remoteproc/imx_rproc.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/remoteproc/imx_rproc.c ++++ b/drivers/remoteproc/imx_rproc.c +@@ -734,25 +734,29 @@ static int imx_rproc_addr_init(struct im + continue; + } + err = of_address_to_resource(node, 0, &res); +- of_node_put(node); + if (err) { + dev_err(dev, "unable to resolve memory region\n"); ++ of_node_put(node); + return err; + } + +- if (b >= IMX_RPROC_MEM_MAX) ++ if (b >= IMX_RPROC_MEM_MAX) { ++ of_node_put(node); + break; ++ } + + /* Not use resource version, because we might share region */ + priv->mem[b].cpu_addr = devm_ioremap_wc(&pdev->dev, res.start, resource_size(&res)); + if (!priv->mem[b].cpu_addr) { + dev_err(dev, "failed to remap %pr\n", &res); ++ of_node_put(node); + return -ENOMEM; + } + priv->mem[b].sys_addr = res.start; + priv->mem[b].size = resource_size(&res); + if (!strcmp(node->name, "rsc-table")) + priv->rsc_table = priv->mem[b].cpu_addr; ++ of_node_put(node); + b++; + } + diff --git a/queue-6.10/remoteproc-imx_rproc-skip-over-memory-region-when-node-value-is-null.patch b/queue-6.10/remoteproc-imx_rproc-skip-over-memory-region-when-node-value-is-null.patch new file mode 100644 index 00000000000..0bb4f204275 --- /dev/null +++ b/queue-6.10/remoteproc-imx_rproc-skip-over-memory-region-when-node-value-is-null.patch @@ -0,0 +1,39 @@ +From 2fa26ca8b786888673689ccc9da6094150939982 Mon Sep 17 00:00:00 2001 +From: Aleksandr Mishin +Date: Thu, 6 Jun 2024 10:52:04 +0300 +Subject: remoteproc: imx_rproc: Skip over memory region when node value is NULL + +From: Aleksandr Mishin + +commit 2fa26ca8b786888673689ccc9da6094150939982 upstream. + +In imx_rproc_addr_init() "nph = of_count_phandle_with_args()" just counts +number of phandles. But phandles may be empty. So of_parse_phandle() in +the parsing loop (0 < a < nph) may return NULL which is later dereferenced. +Adjust this issue by adding NULL-return check. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: a0ff4aa6f010 ("remoteproc: imx_rproc: add a NXP/Freescale imx_rproc driver") +Signed-off-by: Aleksandr Mishin +Reviewed-by: Peng Fan +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240606075204.12354-1-amishin@t-argos.ru +[Fixed title to fit within the prescribed 70-75 charcters] +Signed-off-by: Mathieu Poirier +Signed-off-by: Greg Kroah-Hartman +--- + drivers/remoteproc/imx_rproc.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/remoteproc/imx_rproc.c ++++ b/drivers/remoteproc/imx_rproc.c +@@ -726,6 +726,8 @@ static int imx_rproc_addr_init(struct im + struct resource res; + + node = of_parse_phandle(np, "memory-region", a); ++ if (!node) ++ continue; + /* Not map vdevbuffer, vdevring region */ + if (!strncmp(node->name, "vdev", strlen("vdev"))) { + of_node_put(node); diff --git a/queue-6.10/remoteproc-stm32_rproc-fix-mailbox-interrupts-queuing.patch b/queue-6.10/remoteproc-stm32_rproc-fix-mailbox-interrupts-queuing.patch new file mode 100644 index 00000000000..331cfae4771 --- /dev/null +++ b/queue-6.10/remoteproc-stm32_rproc-fix-mailbox-interrupts-queuing.patch @@ -0,0 +1,34 @@ +From c3281abea67c9c0dc6219bbc41d1feae05a16da3 Mon Sep 17 00:00:00 2001 +From: Gwenael Treuveur +Date: Tue, 21 May 2024 18:23:16 +0200 +Subject: remoteproc: stm32_rproc: Fix mailbox interrupts queuing + +From: Gwenael Treuveur + +commit c3281abea67c9c0dc6219bbc41d1feae05a16da3 upstream. + +Manage interrupt coming from coprocessor also when state is +ATTACHED. + +Fixes: 35bdafda40cc ("remoteproc: stm32_rproc: Add mutex protection for workqueue") +Cc: stable@vger.kernel.org +Signed-off-by: Gwenael Treuveur +Acked-by: Arnaud Pouliquen +Link: https://lore.kernel.org/r/20240521162316.156259-1-gwenael.treuveur@foss.st.com +Signed-off-by: Mathieu Poirier +Signed-off-by: Greg Kroah-Hartman +--- + drivers/remoteproc/stm32_rproc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/remoteproc/stm32_rproc.c ++++ b/drivers/remoteproc/stm32_rproc.c +@@ -294,7 +294,7 @@ static void stm32_rproc_mb_vq_work(struc + + mutex_lock(&rproc->lock); + +- if (rproc->state != RPROC_RUNNING) ++ if (rproc->state != RPROC_RUNNING && rproc->state != RPROC_ATTACHED) + goto unlock_mutex; + + if (rproc_vq_interrupt(rproc, mb->vq_id) == IRQ_NONE) diff --git a/queue-6.10/rtc-abx80x-fix-return-value-of-nvmem-callback-on-read.patch b/queue-6.10/rtc-abx80x-fix-return-value-of-nvmem-callback-on-read.patch new file mode 100644 index 00000000000..f1b7999dada --- /dev/null +++ b/queue-6.10/rtc-abx80x-fix-return-value-of-nvmem-callback-on-read.patch @@ -0,0 +1,57 @@ +From fc82336b50e7652530bc32caec80be0f8792513b Mon Sep 17 00:00:00 2001 +From: Joy Chakraborty +Date: Thu, 13 Jun 2024 12:07:50 +0000 +Subject: rtc: abx80x: Fix return value of nvmem callback on read + +From: Joy Chakraborty + +commit fc82336b50e7652530bc32caec80be0f8792513b upstream. + +Read callbacks registered with nvmem core expect 0 to be returned on +success and a negative value to be returned on failure. + +abx80x_nvmem_xfer() on read calls i2c_smbus_read_i2c_block_data() which +returns the number of bytes read on success as per its api description, +this return value is handled as an error and returned to nvmem even on +success. + +Fix to handle all possible values that would be returned by +i2c_smbus_read_i2c_block_data(). + +Fixes: e90ff8ede777 ("rtc: abx80x: Add nvmem support") +Cc: stable@vger.kernel.org +Signed-off-by: Joy Chakraborty +Reviewed-by: Dan Carpenter +Reviewed-by: Sean Anderson +Link: https://lore.kernel.org/r/20240613120750.1455209-1-joychakr@google.com +Signed-off-by: Alexandre Belloni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/rtc/rtc-abx80x.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/drivers/rtc/rtc-abx80x.c ++++ b/drivers/rtc/rtc-abx80x.c +@@ -705,14 +705,18 @@ static int abx80x_nvmem_xfer(struct abx8 + if (ret) + return ret; + +- if (write) ++ if (write) { + ret = i2c_smbus_write_i2c_block_data(priv->client, reg, + len, val); +- else ++ if (ret) ++ return ret; ++ } else { + ret = i2c_smbus_read_i2c_block_data(priv->client, reg, + len, val); +- if (ret) +- return ret; ++ if (ret <= 0) ++ return ret ? ret : -EIO; ++ len = ret; ++ } + + offset += len; + val += len; diff --git a/queue-6.10/rtc-isl1208-fix-return-value-of-nvmem-callbacks.patch b/queue-6.10/rtc-isl1208-fix-return-value-of-nvmem-callbacks.patch new file mode 100644 index 00000000000..d55f525cd79 --- /dev/null +++ b/queue-6.10/rtc-isl1208-fix-return-value-of-nvmem-callbacks.patch @@ -0,0 +1,63 @@ +From 70f1ae5f0e7f44edf842444044615da7b59838c1 Mon Sep 17 00:00:00 2001 +From: Joy Chakraborty +Date: Wed, 12 Jun 2024 08:08:31 +0000 +Subject: rtc: isl1208: Fix return value of nvmem callbacks + +From: Joy Chakraborty + +commit 70f1ae5f0e7f44edf842444044615da7b59838c1 upstream. + +Read/write callbacks registered with nvmem core expect 0 to be returned +on success and a negative value to be returned on failure. + +isl1208_nvmem_read()/isl1208_nvmem_write() currently return the number of +bytes read/written on success, fix to return 0 on success and negative on +failure. + +Fixes: c3544f6f51ed ("rtc: isl1208: Add new style nvmem support to driver") +Cc: stable@vger.kernel.org +Signed-off-by: Joy Chakraborty +Link: https://lore.kernel.org/r/20240612080831.1227131-1-joychakr@google.com +Signed-off-by: Alexandre Belloni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/rtc/rtc-isl1208.c | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +--- a/drivers/rtc/rtc-isl1208.c ++++ b/drivers/rtc/rtc-isl1208.c +@@ -775,14 +775,13 @@ static int isl1208_nvmem_read(void *priv + { + struct isl1208_state *isl1208 = priv; + struct i2c_client *client = to_i2c_client(isl1208->rtc->dev.parent); +- int ret; + + /* nvmem sanitizes offset/count for us, but count==0 is possible */ + if (!count) + return count; +- ret = isl1208_i2c_read_regs(client, ISL1208_REG_USR1 + off, buf, ++ ++ return isl1208_i2c_read_regs(client, ISL1208_REG_USR1 + off, buf, + count); +- return ret == 0 ? count : ret; + } + + static int isl1208_nvmem_write(void *priv, unsigned int off, void *buf, +@@ -790,15 +789,13 @@ static int isl1208_nvmem_write(void *pri + { + struct isl1208_state *isl1208 = priv; + struct i2c_client *client = to_i2c_client(isl1208->rtc->dev.parent); +- int ret; + + /* nvmem sanitizes off/count for us, but count==0 is possible */ + if (!count) + return count; +- ret = isl1208_i2c_set_regs(client, ISL1208_REG_USR1 + off, buf, +- count); + +- return ret == 0 ? count : ret; ++ return isl1208_i2c_set_regs(client, ISL1208_REG_USR1 + off, buf, ++ count); + } + + static const struct nvmem_config isl1208_nvmem_config = { diff --git a/queue-6.10/scsi-lpfc-allow-device_recovery-mode-after-rscn-receipt-if-in-prli_issue-state.patch b/queue-6.10/scsi-lpfc-allow-device_recovery-mode-after-rscn-receipt-if-in-prli_issue-state.patch new file mode 100644 index 00000000000..42276cf5120 --- /dev/null +++ b/queue-6.10/scsi-lpfc-allow-device_recovery-mode-after-rscn-receipt-if-in-prli_issue-state.patch @@ -0,0 +1,43 @@ +From 9609385dd91b26751019b22ca9bfa4bec7602ae1 Mon Sep 17 00:00:00 2001 +From: Justin Tee +Date: Fri, 28 Jun 2024 10:20:05 -0700 +Subject: scsi: lpfc: Allow DEVICE_RECOVERY mode after RSCN receipt if in PRLI_ISSUE state + +From: Justin Tee + +commit 9609385dd91b26751019b22ca9bfa4bec7602ae1 upstream. + +Certain vendor specific targets initially register with the fabric as an +initiator function first and then re-register as a target function +afterwards. + +The timing of the target function re-registration can cause a race +condition such that the driver is stuck assuming the remote port as an +initiator function and never discovers the target's hosted LUNs. + +Expand the nlp_state qualifier to also include NLP_STE_PRLI_ISSUE because +the state means that PRLI was issued but we have not quite reached +MAPPED_NODE state yet. If we received an RSCN in the PRLI_ISSUE state, +then we should restart discovery again by going into DEVICE_RECOVERY. + +Fixes: dded1dc31aa4 ("scsi: lpfc: Modify when a node should be put in device recovery mode during RSCN") +Cc: # v6.6+ +Signed-off-by: Justin Tee +Link: https://lore.kernel.org/r/20240628172011.25921-3-justintee8345@gmail.com +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/lpfc/lpfc_hbadisc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/lpfc/lpfc_hbadisc.c ++++ b/drivers/scsi/lpfc/lpfc_hbadisc.c +@@ -5725,7 +5725,7 @@ lpfc_setup_disc_node(struct lpfc_vport * + return ndlp; + + if (ndlp->nlp_state > NLP_STE_UNUSED_NODE && +- ndlp->nlp_state < NLP_STE_PRLI_ISSUE) { ++ ndlp->nlp_state <= NLP_STE_PRLI_ISSUE) { + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RECOVERY); + } diff --git a/queue-6.10/scsi-qla2xxx-complete-command-early-within-lock.patch b/queue-6.10/scsi-qla2xxx-complete-command-early-within-lock.patch new file mode 100644 index 00000000000..4b8c5eeb3c0 --- /dev/null +++ b/queue-6.10/scsi-qla2xxx-complete-command-early-within-lock.patch @@ -0,0 +1,79 @@ +From 4475afa2646d3fec176fc4d011d3879b26cb26e3 Mon Sep 17 00:00:00 2001 +From: Shreyas Deodhar +Date: Wed, 10 Jul 2024 22:40:52 +0530 +Subject: scsi: qla2xxx: Complete command early within lock + +From: Shreyas Deodhar + +commit 4475afa2646d3fec176fc4d011d3879b26cb26e3 upstream. + +A crash was observed while performing NPIV and FW reset, + + BUG: kernel NULL pointer dereference, address: 000000000000001c + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: 0000 1 PREEMPT_RT SMP NOPTI + RIP: 0010:dma_direct_unmap_sg+0x51/0x1e0 + RSP: 0018:ffffc90026f47b88 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: 0000000000000021 RCX: 0000000000000002 + RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff8881041130d0 + RBP: ffff8881041130d0 R08: 0000000000000000 R09: 0000000000000034 + R10: ffffc90026f47c48 R11: 0000000000000031 R12: 0000000000000000 + R13: 0000000000000000 R14: ffff8881565e4a20 R15: 0000000000000000 + FS: 00007f4c69ed3d00(0000) GS:ffff889faac80000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000000000000001c CR3: 0000000288a50002 CR4: 00000000007706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + PKRU: 55555554 + Call Trace: + + ? __die_body+0x1a/0x60 + ? page_fault_oops+0x16f/0x4a0 + ? do_user_addr_fault+0x174/0x7f0 + ? exc_page_fault+0x69/0x1a0 + ? asm_exc_page_fault+0x22/0x30 + ? dma_direct_unmap_sg+0x51/0x1e0 + ? preempt_count_sub+0x96/0xe0 + qla2xxx_qpair_sp_free_dma+0x29f/0x3b0 [qla2xxx] + qla2xxx_qpair_sp_compl+0x60/0x80 [qla2xxx] + __qla2x00_abort_all_cmds+0xa2/0x450 [qla2xxx] + +The command completion was done early while aborting the commands in driver +unload path but outside lock to avoid the WARN_ON condition of performing +dma_free_attr within the lock. However this caused race condition while +command completion via multiple paths causing system crash. + +Hence complete the command early in unload path but within the lock to +avoid race condition. + +Fixes: 0367076b0817 ("scsi: qla2xxx: Perform lockless command completion in abort path") +Cc: stable@vger.kernel.org +Signed-off-by: Shreyas Deodhar +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-7-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_os.c | 5 ----- + 1 file changed, 5 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -1875,14 +1875,9 @@ __qla2x00_abort_all_cmds(struct qla_qpai + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { + sp = req->outstanding_cmds[cnt]; + if (sp) { +- /* +- * perform lockless completion during driver unload +- */ + if (qla2x00_chip_is_down(vha)) { + req->outstanding_cmds[cnt] = NULL; +- spin_unlock_irqrestore(qp->qp_lock_ptr, flags); + sp->done(sp, res); +- spin_lock_irqsave(qp->qp_lock_ptr, flags); + continue; + } + diff --git a/queue-6.10/scsi-qla2xxx-during-vport-delete-send-async-logout-explicitly.patch b/queue-6.10/scsi-qla2xxx-during-vport-delete-send-async-logout-explicitly.patch new file mode 100644 index 00000000000..3ac2a98c3c9 --- /dev/null +++ b/queue-6.10/scsi-qla2xxx-during-vport-delete-send-async-logout-explicitly.patch @@ -0,0 +1,73 @@ +From 76f480d7c717368f29a3870f7d64471ce0ff8fb2 Mon Sep 17 00:00:00 2001 +From: Manish Rangankar +Date: Wed, 10 Jul 2024 22:40:53 +0530 +Subject: scsi: qla2xxx: During vport delete send async logout explicitly + +From: Manish Rangankar + +commit 76f480d7c717368f29a3870f7d64471ce0ff8fb2 upstream. + +During vport delete, it is observed that during unload we hit a crash +because of stale entries in outstanding command array. For all these stale +I/O entries, eh_abort was issued and aborted (fast_fail_io = 2009h) but +I/Os could not complete while vport delete is in process of deleting. + + BUG: kernel NULL pointer dereference, address: 000000000000001c + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: 0000 [#1] PREEMPT SMP NOPTI + Workqueue: qla2xxx_wq qla_do_work [qla2xxx] + RIP: 0010:dma_direct_unmap_sg+0x51/0x1e0 + RSP: 0018:ffffa1e1e150fc68 EFLAGS: 00010046 + RAX: 0000000000000000 RBX: 0000000000000021 RCX: 0000000000000001 + RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff8ce208a7a0d0 + RBP: ffff8ce208a7a0d0 R08: 0000000000000000 R09: ffff8ce378aac9c8 + R10: ffff8ce378aac8a0 R11: ffffa1e1e150f9d8 R12: 0000000000000000 + R13: 0000000000000000 R14: ffff8ce378aac9c8 R15: 0000000000000000 + FS: 0000000000000000(0000) GS:ffff8d217f000000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000000000000001c CR3: 0000002089acc000 CR4: 0000000000350ee0 + Call Trace: + + qla2xxx_qpair_sp_free_dma+0x417/0x4e0 + ? qla2xxx_qpair_sp_compl+0x10d/0x1a0 + ? qla2x00_status_entry+0x768/0x2830 + ? newidle_balance+0x2f0/0x430 + ? dequeue_entity+0x100/0x3c0 + ? qla24xx_process_response_queue+0x6a1/0x19e0 + ? __schedule+0x2d5/0x1140 + ? qla_do_work+0x47/0x60 + ? process_one_work+0x267/0x440 + ? process_one_work+0x440/0x440 + ? worker_thread+0x2d/0x3d0 + ? process_one_work+0x440/0x440 + ? kthread+0x156/0x180 + ? set_kthread_struct+0x50/0x50 + ? ret_from_fork+0x22/0x30 + + +Send out async logout explicitly for all the ports during vport delete. + +Cc: stable@vger.kernel.org +Signed-off-by: Manish Rangankar +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-8-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_mid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/qla2xxx/qla_mid.c ++++ b/drivers/scsi/qla2xxx/qla_mid.c +@@ -180,7 +180,7 @@ qla24xx_disable_vp(scsi_qla_host_t *vha) + atomic_set(&vha->loop_state, LOOP_DOWN); + atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); + list_for_each_entry(fcport, &vha->vp_fcports, list) +- fcport->logout_on_delete = 0; ++ fcport->logout_on_delete = 1; + + if (!vha->hw->flags.edif_enabled) + qla2x00_wait_for_sess_deletion(vha); diff --git a/queue-6.10/scsi-qla2xxx-fix-flash-read-failure.patch b/queue-6.10/scsi-qla2xxx-fix-flash-read-failure.patch new file mode 100644 index 00000000000..dcf0af17993 --- /dev/null +++ b/queue-6.10/scsi-qla2xxx-fix-flash-read-failure.patch @@ -0,0 +1,371 @@ +From 29e222085d8907ccff18ecd931bdd4c6b1f11b92 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Wed, 10 Jul 2024 22:40:51 +0530 +Subject: scsi: qla2xxx: Fix flash read failure + +From: Quinn Tran + +commit 29e222085d8907ccff18ecd931bdd4c6b1f11b92 upstream. + +Link up failure is observed as a result of flash read failure. Current +code does not check flash read return code where it relies on FW checksum +to detect the problem. + +Add check of flash read failure to detect the problem sooner. + +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Closes: https://lore.kernel.org/all/202406210815.rPDRDMBi-lkp@intel.com/ +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-6-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_init.c | 63 ++++++++++++++++++----- + drivers/scsi/qla2xxx/qla_sup.c | 108 +++++++++++++++++++++++++++------------- + 2 files changed, 125 insertions(+), 46 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -8218,15 +8218,21 @@ qla28xx_get_aux_images( + struct qla27xx_image_status pri_aux_image_status, sec_aux_image_status; + bool valid_pri_image = false, valid_sec_image = false; + bool active_pri_image = false, active_sec_image = false; ++ int rc; + + if (!ha->flt_region_aux_img_status_pri) { + ql_dbg(ql_dbg_init, vha, 0x018a, "Primary aux image not addressed\n"); + goto check_sec_image; + } + +- qla24xx_read_flash_data(vha, (uint32_t *)&pri_aux_image_status, ++ rc = qla24xx_read_flash_data(vha, (uint32_t *)&pri_aux_image_status, + ha->flt_region_aux_img_status_pri, + sizeof(pri_aux_image_status) >> 2); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x01a1, ++ "Unable to read Primary aux image(%x).\n", rc); ++ goto check_sec_image; ++ } + qla27xx_print_image(vha, "Primary aux image", &pri_aux_image_status); + + if (qla28xx_check_aux_image_status_signature(&pri_aux_image_status)) { +@@ -8257,9 +8263,15 @@ check_sec_image: + goto check_valid_image; + } + +- qla24xx_read_flash_data(vha, (uint32_t *)&sec_aux_image_status, ++ rc = qla24xx_read_flash_data(vha, (uint32_t *)&sec_aux_image_status, + ha->flt_region_aux_img_status_sec, + sizeof(sec_aux_image_status) >> 2); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x01a2, ++ "Unable to read Secondary aux image(%x).\n", rc); ++ goto check_valid_image; ++ } ++ + qla27xx_print_image(vha, "Secondary aux image", &sec_aux_image_status); + + if (qla28xx_check_aux_image_status_signature(&sec_aux_image_status)) { +@@ -8317,6 +8329,7 @@ qla27xx_get_active_image(struct scsi_qla + struct qla27xx_image_status pri_image_status, sec_image_status; + bool valid_pri_image = false, valid_sec_image = false; + bool active_pri_image = false, active_sec_image = false; ++ int rc; + + if (!ha->flt_region_img_status_pri) { + ql_dbg(ql_dbg_init, vha, 0x018a, "Primary image not addressed\n"); +@@ -8358,8 +8371,14 @@ check_sec_image: + goto check_valid_image; + } + +- qla24xx_read_flash_data(vha, (uint32_t *)(&sec_image_status), ++ rc = qla24xx_read_flash_data(vha, (uint32_t *)(&sec_image_status), + ha->flt_region_img_status_sec, sizeof(sec_image_status) >> 2); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x01a3, ++ "Unable to read Secondary image status(%x).\n", rc); ++ goto check_valid_image; ++ } ++ + qla27xx_print_image(vha, "Secondary image", &sec_image_status); + + if (qla27xx_check_image_status_signature(&sec_image_status)) { +@@ -8431,11 +8450,10 @@ qla24xx_load_risc_flash(scsi_qla_host_t + "FW: Loading firmware from flash (%x).\n", faddr); + + dcode = (uint32_t *)req->ring; +- qla24xx_read_flash_data(vha, dcode, faddr, 8); +- if (qla24xx_risc_firmware_invalid(dcode)) { ++ rval = qla24xx_read_flash_data(vha, dcode, faddr, 8); ++ if (rval || qla24xx_risc_firmware_invalid(dcode)) { + ql_log(ql_log_fatal, vha, 0x008c, +- "Unable to verify the integrity of flash firmware " +- "image.\n"); ++ "Unable to verify the integrity of flash firmware image (rval %x).\n", rval); + ql_log(ql_log_fatal, vha, 0x008d, + "Firmware data: %08x %08x %08x %08x.\n", + dcode[0], dcode[1], dcode[2], dcode[3]); +@@ -8449,7 +8467,12 @@ qla24xx_load_risc_flash(scsi_qla_host_t + for (j = 0; j < segments; j++) { + ql_dbg(ql_dbg_init, vha, 0x008d, + "-> Loading segment %u...\n", j); +- qla24xx_read_flash_data(vha, dcode, faddr, 10); ++ rval = qla24xx_read_flash_data(vha, dcode, faddr, 10); ++ if (rval) { ++ ql_log(ql_log_fatal, vha, 0x016a, ++ "-> Unable to read segment addr + size .\n"); ++ return QLA_FUNCTION_FAILED; ++ } + risc_addr = be32_to_cpu((__force __be32)dcode[2]); + risc_size = be32_to_cpu((__force __be32)dcode[3]); + if (!*srisc_addr) { +@@ -8465,7 +8488,13 @@ qla24xx_load_risc_flash(scsi_qla_host_t + ql_dbg(ql_dbg_init, vha, 0x008e, + "-> Loading fragment %u: %#x <- %#x (%#lx dwords)...\n", + fragment, risc_addr, faddr, dlen); +- qla24xx_read_flash_data(vha, dcode, faddr, dlen); ++ rval = qla24xx_read_flash_data(vha, dcode, faddr, dlen); ++ if (rval) { ++ ql_log(ql_log_fatal, vha, 0x016b, ++ "-> Unable to read fragment(faddr %#x dlen %#lx).\n", ++ faddr, dlen); ++ return QLA_FUNCTION_FAILED; ++ } + for (i = 0; i < dlen; i++) + dcode[i] = swab32(dcode[i]); + +@@ -8494,7 +8523,14 @@ qla24xx_load_risc_flash(scsi_qla_host_t + fwdt->length = 0; + + dcode = (uint32_t *)req->ring; +- qla24xx_read_flash_data(vha, dcode, faddr, 7); ++ ++ rval = qla24xx_read_flash_data(vha, dcode, faddr, 7); ++ if (rval) { ++ ql_log(ql_log_fatal, vha, 0x016c, ++ "-> Unable to read template size.\n"); ++ goto failed; ++ } ++ + risc_size = be32_to_cpu((__force __be32)dcode[2]); + ql_dbg(ql_dbg_init, vha, 0x0161, + "-> fwdt%u template array at %#x (%#x dwords)\n", +@@ -8520,11 +8556,12 @@ qla24xx_load_risc_flash(scsi_qla_host_t + } + + dcode = fwdt->template; +- qla24xx_read_flash_data(vha, dcode, faddr, risc_size); ++ rval = qla24xx_read_flash_data(vha, dcode, faddr, risc_size); + +- if (!qla27xx_fwdt_template_valid(dcode)) { ++ if (rval || !qla27xx_fwdt_template_valid(dcode)) { + ql_log(ql_log_warn, vha, 0x0165, +- "-> fwdt%u failed template validate\n", j); ++ "-> fwdt%u failed template validate (rval %x)\n", ++ j, rval); + goto failed; + } + +--- a/drivers/scsi/qla2xxx/qla_sup.c ++++ b/drivers/scsi/qla2xxx/qla_sup.c +@@ -555,6 +555,7 @@ qla2xxx_find_flt_start(scsi_qla_host_t * + struct qla_flt_location *fltl = (void *)req->ring; + uint32_t *dcode = (uint32_t *)req->ring; + uint8_t *buf = (void *)req->ring, *bcode, last_image; ++ int rc; + + /* + * FLT-location structure resides after the last PCI region. +@@ -584,14 +585,24 @@ qla2xxx_find_flt_start(scsi_qla_host_t * + pcihdr = 0; + do { + /* Verify PCI expansion ROM header. */ +- qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, 0x20); ++ rc = qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, 0x20); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x016d, ++ "Unable to read PCI Expansion Rom Header (%x).\n", rc); ++ return QLA_FUNCTION_FAILED; ++ } + bcode = buf + (pcihdr % 4); + if (bcode[0x0] != 0x55 || bcode[0x1] != 0xaa) + goto end; + + /* Locate PCI data structure. */ + pcids = pcihdr + ((bcode[0x19] << 8) | bcode[0x18]); +- qla24xx_read_flash_data(vha, dcode, pcids >> 2, 0x20); ++ rc = qla24xx_read_flash_data(vha, dcode, pcids >> 2, 0x20); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x0179, ++ "Unable to read PCI Data Structure (%x).\n", rc); ++ return QLA_FUNCTION_FAILED; ++ } + bcode = buf + (pcihdr % 4); + + /* Validate signature of PCI data structure. */ +@@ -606,7 +617,12 @@ qla2xxx_find_flt_start(scsi_qla_host_t * + } while (!last_image); + + /* Now verify FLT-location structure. */ +- qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, sizeof(*fltl) >> 2); ++ rc = qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, sizeof(*fltl) >> 2); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x017a, ++ "Unable to read FLT (%x).\n", rc); ++ return QLA_FUNCTION_FAILED; ++ } + if (memcmp(fltl->sig, "QFLT", 4)) + goto end; + +@@ -2605,13 +2621,18 @@ qla24xx_read_optrom_data(struct scsi_qla + uint32_t offset, uint32_t length) + { + struct qla_hw_data *ha = vha->hw; ++ int rc; + + /* Suspend HBA. */ + scsi_block_requests(vha->host); + set_bit(MBX_UPDATE_FLASH_ACTIVE, &ha->mbx_cmd_flags); + + /* Go with read. */ +- qla24xx_read_flash_data(vha, buf, offset >> 2, length >> 2); ++ rc = qla24xx_read_flash_data(vha, buf, offset >> 2, length >> 2); ++ if (rc) { ++ ql_log(ql_log_info, vha, 0x01a0, ++ "Unable to perform optrom read(%x).\n", rc); ++ } + + /* Resume HBA. */ + clear_bit(MBX_UPDATE_FLASH_ACTIVE, &ha->mbx_cmd_flags); +@@ -3412,7 +3433,7 @@ qla24xx_get_flash_version(scsi_qla_host_ + struct active_regions active_regions = { }; + + if (IS_P3P_TYPE(ha)) +- return ret; ++ return QLA_SUCCESS; + + if (!mbuf) + return QLA_FUNCTION_FAILED; +@@ -3432,20 +3453,31 @@ qla24xx_get_flash_version(scsi_qla_host_ + + do { + /* Verify PCI expansion ROM header. */ +- qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, 0x20); ++ ret = qla24xx_read_flash_data(vha, dcode, pcihdr >> 2, 0x20); ++ if (ret) { ++ ql_log(ql_log_info, vha, 0x017d, ++ "Unable to read PCI EXP Rom Header(%x).\n", ret); ++ return QLA_FUNCTION_FAILED; ++ } ++ + bcode = mbuf + (pcihdr % 4); + if (memcmp(bcode, "\x55\xaa", 2)) { + /* No signature */ + ql_log(ql_log_fatal, vha, 0x0059, + "No matching ROM signature.\n"); +- ret = QLA_FUNCTION_FAILED; +- break; ++ return QLA_FUNCTION_FAILED; + } + + /* Locate PCI data structure. */ + pcids = pcihdr + ((bcode[0x19] << 8) | bcode[0x18]); + +- qla24xx_read_flash_data(vha, dcode, pcids >> 2, 0x20); ++ ret = qla24xx_read_flash_data(vha, dcode, pcids >> 2, 0x20); ++ if (ret) { ++ ql_log(ql_log_info, vha, 0x018e, ++ "Unable to read PCI Data Structure (%x).\n", ret); ++ return QLA_FUNCTION_FAILED; ++ } ++ + bcode = mbuf + (pcihdr % 4); + + /* Validate signature of PCI data structure. */ +@@ -3454,8 +3486,7 @@ qla24xx_get_flash_version(scsi_qla_host_ + ql_log(ql_log_fatal, vha, 0x005a, + "PCI data struct not found pcir_adr=%x.\n", pcids); + ql_dump_buffer(ql_dbg_init, vha, 0x0059, dcode, 32); +- ret = QLA_FUNCTION_FAILED; +- break; ++ return QLA_FUNCTION_FAILED; + } + + /* Read version */ +@@ -3507,20 +3538,26 @@ qla24xx_get_flash_version(scsi_qla_host_ + faddr = ha->flt_region_fw_sec; + } + +- qla24xx_read_flash_data(vha, dcode, faddr, 8); +- if (qla24xx_risc_firmware_invalid(dcode)) { +- ql_log(ql_log_warn, vha, 0x005f, +- "Unrecognized fw revision at %x.\n", +- ha->flt_region_fw * 4); +- ql_dump_buffer(ql_dbg_init, vha, 0x005f, dcode, 32); ++ ret = qla24xx_read_flash_data(vha, dcode, faddr, 8); ++ if (ret) { ++ ql_log(ql_log_info, vha, 0x019e, ++ "Unable to read FW version (%x).\n", ret); ++ return ret; + } else { +- for (i = 0; i < 4; i++) +- ha->fw_revision[i] = ++ if (qla24xx_risc_firmware_invalid(dcode)) { ++ ql_log(ql_log_warn, vha, 0x005f, ++ "Unrecognized fw revision at %x.\n", ++ ha->flt_region_fw * 4); ++ ql_dump_buffer(ql_dbg_init, vha, 0x005f, dcode, 32); ++ } else { ++ for (i = 0; i < 4; i++) ++ ha->fw_revision[i] = + be32_to_cpu((__force __be32)dcode[4+i]); +- ql_dbg(ql_dbg_init, vha, 0x0060, +- "Firmware revision (flash) %u.%u.%u (%x).\n", +- ha->fw_revision[0], ha->fw_revision[1], +- ha->fw_revision[2], ha->fw_revision[3]); ++ ql_dbg(ql_dbg_init, vha, 0x0060, ++ "Firmware revision (flash) %u.%u.%u (%x).\n", ++ ha->fw_revision[0], ha->fw_revision[1], ++ ha->fw_revision[2], ha->fw_revision[3]); ++ } + } + + /* Check for golden firmware and get version if available */ +@@ -3531,18 +3568,23 @@ qla24xx_get_flash_version(scsi_qla_host_ + + memset(ha->gold_fw_version, 0, sizeof(ha->gold_fw_version)); + faddr = ha->flt_region_gold_fw; +- qla24xx_read_flash_data(vha, dcode, ha->flt_region_gold_fw, 8); +- if (qla24xx_risc_firmware_invalid(dcode)) { +- ql_log(ql_log_warn, vha, 0x0056, +- "Unrecognized golden fw at %#x.\n", faddr); +- ql_dump_buffer(ql_dbg_init, vha, 0x0056, dcode, 32); ++ ret = qla24xx_read_flash_data(vha, dcode, ha->flt_region_gold_fw, 8); ++ if (ret) { ++ ql_log(ql_log_info, vha, 0x019f, ++ "Unable to read Gold FW version (%x).\n", ret); + return ret; +- } +- +- for (i = 0; i < 4; i++) +- ha->gold_fw_version[i] = +- be32_to_cpu((__force __be32)dcode[4+i]); ++ } else { ++ if (qla24xx_risc_firmware_invalid(dcode)) { ++ ql_log(ql_log_warn, vha, 0x0056, ++ "Unrecognized golden fw at %#x.\n", faddr); ++ ql_dump_buffer(ql_dbg_init, vha, 0x0056, dcode, 32); ++ return QLA_FUNCTION_FAILED; ++ } + ++ for (i = 0; i < 4; i++) ++ ha->gold_fw_version[i] = ++ be32_to_cpu((__force __be32)dcode[4+i]); ++ } + return ret; + } + diff --git a/queue-6.10/scsi-qla2xxx-fix-for-possible-memory-corruption.patch b/queue-6.10/scsi-qla2xxx-fix-for-possible-memory-corruption.patch new file mode 100644 index 00000000000..55e838bf372 --- /dev/null +++ b/queue-6.10/scsi-qla2xxx-fix-for-possible-memory-corruption.patch @@ -0,0 +1,33 @@ +From c03d740152f78e86945a75b2ad541bf972fab92a Mon Sep 17 00:00:00 2001 +From: Shreyas Deodhar +Date: Wed, 10 Jul 2024 22:40:49 +0530 +Subject: scsi: qla2xxx: Fix for possible memory corruption + +From: Shreyas Deodhar + +commit c03d740152f78e86945a75b2ad541bf972fab92a upstream. + +Init Control Block is dereferenced incorrectly. Correctly dereference ICB + +Cc: stable@vger.kernel.org +Signed-off-by: Shreyas Deodhar +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-4-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_os.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -4689,7 +4689,7 @@ static void + qla2x00_number_of_exch(scsi_qla_host_t *vha, u32 *ret_cnt, u16 max_cnt) + { + u32 temp; +- struct init_cb_81xx *icb = (struct init_cb_81xx *)&vha->hw->init_cb; ++ struct init_cb_81xx *icb = (struct init_cb_81xx *)vha->hw->init_cb; + *ret_cnt = FW_DEF_EXCHANGES_CNT; + + if (max_cnt > vha->hw->max_exchg) diff --git a/queue-6.10/scsi-qla2xxx-reduce-fabric-scan-duplicate-code.patch b/queue-6.10/scsi-qla2xxx-reduce-fabric-scan-duplicate-code.patch new file mode 100644 index 00000000000..ef1bec0b519 --- /dev/null +++ b/queue-6.10/scsi-qla2xxx-reduce-fabric-scan-duplicate-code.patch @@ -0,0 +1,711 @@ +From beafd692461443e0fb1d61aa56886bf85ef6f5e4 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Wed, 10 Jul 2024 22:40:55 +0530 +Subject: scsi: qla2xxx: Reduce fabric scan duplicate code + +From: Quinn Tran + +commit beafd692461443e0fb1d61aa56886bf85ef6f5e4 upstream. + +For fabric scan, current code uses switch scan opcode and flags as the +method to iterate through different commands to carry out the process. +This makes it hard to read. This patch convert those opcode and flags into +steps. In addition, this help reduce some duplicate code. + +Consolidate routines that handle GPNFT & GNNFT. + +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-10-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_def.h | 14 + + drivers/scsi/qla2xxx/qla_gbl.h | 6 + drivers/scsi/qla2xxx/qla_gs.c | 432 ++++++++++++++++------------------------ + drivers/scsi/qla2xxx/qla_init.c | 5 + drivers/scsi/qla2xxx/qla_os.c | 12 - + 5 files changed, 200 insertions(+), 269 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_def.h ++++ b/drivers/scsi/qla2xxx/qla_def.h +@@ -3309,11 +3309,20 @@ struct fab_scan_rp { + u8 node_name[8]; + }; + ++enum scan_step { ++ FAB_SCAN_START, ++ FAB_SCAN_GPNFT_FCP, ++ FAB_SCAN_GNNFT_FCP, ++ FAB_SCAN_GPNFT_NVME, ++ FAB_SCAN_GNNFT_NVME, ++}; ++ + struct fab_scan { + struct fab_scan_rp *l; + u32 size; + u32 rscn_gen_start; + u32 rscn_gen_end; ++ enum scan_step step; + u16 scan_retry; + #define MAX_SCAN_RETRIES 5 + enum scan_flags_t scan_flags; +@@ -3539,9 +3548,8 @@ enum qla_work_type { + QLA_EVT_RELOGIN, + QLA_EVT_ASYNC_PRLO, + QLA_EVT_ASYNC_PRLO_DONE, +- QLA_EVT_GPNFT, +- QLA_EVT_GPNFT_DONE, +- QLA_EVT_GNNFT_DONE, ++ QLA_EVT_SCAN_CMD, ++ QLA_EVT_SCAN_FINISH, + QLA_EVT_GFPNID, + QLA_EVT_SP_RETRY, + QLA_EVT_IIDMA, +--- a/drivers/scsi/qla2xxx/qla_gbl.h ++++ b/drivers/scsi/qla2xxx/qla_gbl.h +@@ -728,9 +728,9 @@ int qla24xx_async_gpsc(scsi_qla_host_t * + void qla24xx_handle_gpsc_event(scsi_qla_host_t *, struct event_arg *); + int qla2x00_mgmt_svr_login(scsi_qla_host_t *); + int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport, bool); +-int qla24xx_async_gpnft(scsi_qla_host_t *, u8, srb_t *); +-void qla24xx_async_gpnft_done(scsi_qla_host_t *, srb_t *); +-void qla24xx_async_gnnft_done(scsi_qla_host_t *, srb_t *); ++int qla_fab_async_scan(scsi_qla_host_t *, srb_t *); ++void qla_fab_scan_start(struct scsi_qla_host *); ++void qla_fab_scan_finish(scsi_qla_host_t *, srb_t *); + int qla24xx_post_gfpnid_work(struct scsi_qla_host *, fc_port_t *); + int qla24xx_async_gfpnid(scsi_qla_host_t *, fc_port_t *); + void qla24xx_handle_gfpnid_event(scsi_qla_host_t *, struct event_arg *); +--- a/drivers/scsi/qla2xxx/qla_gs.c ++++ b/drivers/scsi/qla2xxx/qla_gs.c +@@ -3191,7 +3191,7 @@ static bool qla_ok_to_clear_rscn(scsi_ql + return true; + } + +-void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp) ++void qla_fab_scan_finish(scsi_qla_host_t *vha, srb_t *sp) + { + fc_port_t *fcport; + u32 i, rc; +@@ -3406,14 +3406,11 @@ out: + } + } + +-static int qla2x00_post_gnnft_gpnft_done_work(struct scsi_qla_host *vha, ++static int qla2x00_post_next_scan_work(struct scsi_qla_host *vha, + srb_t *sp, int cmd) + { + struct qla_work_evt *e; + +- if (cmd != QLA_EVT_GPNFT_DONE && cmd != QLA_EVT_GNNFT_DONE) +- return QLA_PARAMETER_ERROR; +- + e = qla2x00_alloc_work(vha, cmd); + if (!e) + return QLA_FUNCTION_FAILED; +@@ -3423,37 +3420,15 @@ static int qla2x00_post_gnnft_gpnft_done + return qla2x00_post_work(vha, e); + } + +-static int qla2x00_post_nvme_gpnft_work(struct scsi_qla_host *vha, +- srb_t *sp, int cmd) +-{ +- struct qla_work_evt *e; +- +- if (cmd != QLA_EVT_GPNFT) +- return QLA_PARAMETER_ERROR; +- +- e = qla2x00_alloc_work(vha, cmd); +- if (!e) +- return QLA_FUNCTION_FAILED; +- +- e->u.gpnft.fc4_type = FC4_TYPE_NVME; +- e->u.gpnft.sp = sp; +- +- return qla2x00_post_work(vha, e); +-} +- + static void qla2x00_find_free_fcp_nvme_slot(struct scsi_qla_host *vha, + struct srb *sp) + { + struct qla_hw_data *ha = vha->hw; + int num_fibre_dev = ha->max_fibre_devices; +- struct ct_sns_req *ct_req = +- (struct ct_sns_req *)sp->u.iocb_cmd.u.ctarg.req; + struct ct_sns_gpnft_rsp *ct_rsp = + (struct ct_sns_gpnft_rsp *)sp->u.iocb_cmd.u.ctarg.rsp; + struct ct_sns_gpn_ft_data *d; + struct fab_scan_rp *rp; +- u16 cmd = be16_to_cpu(ct_req->command); +- u8 fc4_type = sp->gen2; + int i, j, k; + port_id_t id; + u8 found; +@@ -3472,85 +3447,83 @@ static void qla2x00_find_free_fcp_nvme_s + if (id.b24 == 0 || wwn == 0) + continue; + +- if (fc4_type == FC4_TYPE_FCP_SCSI) { +- if (cmd == GPN_FT_CMD) { +- rp = &vha->scan.l[j]; +- rp->id = id; +- memcpy(rp->port_name, d->port_name, 8); +- j++; +- rp->fc4type = FS_FC4TYPE_FCP; +- } else { +- for (k = 0; k < num_fibre_dev; k++) { +- rp = &vha->scan.l[k]; +- if (id.b24 == rp->id.b24) { +- memcpy(rp->node_name, +- d->port_name, 8); +- break; +- } ++ ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x2025, ++ "%s %06x %8ph \n", ++ __func__, id.b24, d->port_name); ++ ++ switch (vha->scan.step) { ++ case FAB_SCAN_GPNFT_FCP: ++ rp = &vha->scan.l[j]; ++ rp->id = id; ++ memcpy(rp->port_name, d->port_name, 8); ++ j++; ++ rp->fc4type = FS_FC4TYPE_FCP; ++ break; ++ case FAB_SCAN_GNNFT_FCP: ++ for (k = 0; k < num_fibre_dev; k++) { ++ rp = &vha->scan.l[k]; ++ if (id.b24 == rp->id.b24) { ++ memcpy(rp->node_name, ++ d->port_name, 8); ++ break; + } + } +- } else { +- /* Search if the fibre device supports FC4_TYPE_NVME */ +- if (cmd == GPN_FT_CMD) { +- found = 0; ++ break; ++ case FAB_SCAN_GPNFT_NVME: ++ found = 0; + +- for (k = 0; k < num_fibre_dev; k++) { +- rp = &vha->scan.l[k]; +- if (!memcmp(rp->port_name, +- d->port_name, 8)) { +- /* +- * Supports FC-NVMe & FCP +- */ +- rp->fc4type |= FS_FC4TYPE_NVME; +- found = 1; +- break; +- } ++ for (k = 0; k < num_fibre_dev; k++) { ++ rp = &vha->scan.l[k]; ++ if (!memcmp(rp->port_name, d->port_name, 8)) { ++ /* ++ * Supports FC-NVMe & FCP ++ */ ++ rp->fc4type |= FS_FC4TYPE_NVME; ++ found = 1; ++ break; + } ++ } + +- /* We found new FC-NVMe only port */ +- if (!found) { +- for (k = 0; k < num_fibre_dev; k++) { +- rp = &vha->scan.l[k]; +- if (wwn_to_u64(rp->port_name)) { +- continue; +- } else { +- rp->id = id; +- memcpy(rp->port_name, +- d->port_name, 8); +- rp->fc4type = +- FS_FC4TYPE_NVME; +- break; +- } +- } +- } +- } else { ++ /* We found new FC-NVMe only port */ ++ if (!found) { + for (k = 0; k < num_fibre_dev; k++) { + rp = &vha->scan.l[k]; +- if (id.b24 == rp->id.b24) { +- memcpy(rp->node_name, +- d->port_name, 8); ++ if (wwn_to_u64(rp->port_name)) { ++ continue; ++ } else { ++ rp->id = id; ++ memcpy(rp->port_name, d->port_name, 8); ++ rp->fc4type = FS_FC4TYPE_NVME; + break; + } + } + } ++ break; ++ case FAB_SCAN_GNNFT_NVME: ++ for (k = 0; k < num_fibre_dev; k++) { ++ rp = &vha->scan.l[k]; ++ if (id.b24 == rp->id.b24) { ++ memcpy(rp->node_name, d->port_name, 8); ++ break; ++ } ++ } ++ break; ++ default: ++ break; + } + } + } + +-static void qla2x00_async_gpnft_gnnft_sp_done(srb_t *sp, int res) ++static void qla_async_scan_sp_done(srb_t *sp, int res) + { + struct scsi_qla_host *vha = sp->vha; +- struct ct_sns_req *ct_req = +- (struct ct_sns_req *)sp->u.iocb_cmd.u.ctarg.req; +- u16 cmd = be16_to_cpu(ct_req->command); +- u8 fc4_type = sp->gen2; + unsigned long flags; + int rc; + + /* gen2 field is holding the fc4type */ +- ql_dbg(ql_dbg_disc, vha, 0xffff, +- "Async done-%s res %x FC4Type %x\n", +- sp->name, res, sp->gen2); ++ ql_dbg(ql_dbg_disc, vha, 0x2026, ++ "Async done-%s res %x step %x\n", ++ sp->name, res, vha->scan.step); + + sp->rc = res; + if (res) { +@@ -3574,8 +3547,7 @@ static void qla2x00_async_gpnft_gnnft_sp + * sp for GNNFT_DONE work. This will allow all + * the resource to get freed up. + */ +- rc = qla2x00_post_gnnft_gpnft_done_work(vha, sp, +- QLA_EVT_GNNFT_DONE); ++ rc = qla2x00_post_next_scan_work(vha, sp, QLA_EVT_SCAN_FINISH); + if (rc) { + /* Cleanup here to prevent memory leak */ + qla24xx_sp_unmap(vha, sp); +@@ -3600,28 +3572,30 @@ static void qla2x00_async_gpnft_gnnft_sp + + qla2x00_find_free_fcp_nvme_slot(vha, sp); + +- if ((fc4_type == FC4_TYPE_FCP_SCSI) && vha->flags.nvme_enabled && +- cmd == GNN_FT_CMD) { +- spin_lock_irqsave(&vha->work_lock, flags); +- vha->scan.scan_flags &= ~SF_SCANNING; +- spin_unlock_irqrestore(&vha->work_lock, flags); +- +- sp->rc = res; +- rc = qla2x00_post_nvme_gpnft_work(vha, sp, QLA_EVT_GPNFT); +- if (rc) { +- qla24xx_sp_unmap(vha, sp); +- set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); +- set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); +- } +- return; +- } ++ spin_lock_irqsave(&vha->work_lock, flags); ++ vha->scan.scan_flags &= ~SF_SCANNING; ++ spin_unlock_irqrestore(&vha->work_lock, flags); + +- if (cmd == GPN_FT_CMD) { +- rc = qla2x00_post_gnnft_gpnft_done_work(vha, sp, +- QLA_EVT_GPNFT_DONE); +- } else { +- rc = qla2x00_post_gnnft_gpnft_done_work(vha, sp, +- QLA_EVT_GNNFT_DONE); ++ switch (vha->scan.step) { ++ case FAB_SCAN_GPNFT_FCP: ++ case FAB_SCAN_GPNFT_NVME: ++ rc = qla2x00_post_next_scan_work(vha, sp, QLA_EVT_SCAN_CMD); ++ break; ++ case FAB_SCAN_GNNFT_FCP: ++ if (vha->flags.nvme_enabled) ++ rc = qla2x00_post_next_scan_work(vha, sp, QLA_EVT_SCAN_CMD); ++ else ++ rc = qla2x00_post_next_scan_work(vha, sp, QLA_EVT_SCAN_FINISH); ++ ++ break; ++ case FAB_SCAN_GNNFT_NVME: ++ rc = qla2x00_post_next_scan_work(vha, sp, QLA_EVT_SCAN_FINISH); ++ break; ++ default: ++ /* should not be here */ ++ WARN_ON(1); ++ rc = QLA_FUNCTION_FAILED; ++ break; + } + + if (rc) { +@@ -3632,127 +3606,16 @@ static void qla2x00_async_gpnft_gnnft_sp + } + } + +-/* +- * Get WWNN list for fc4_type +- * +- * It is assumed the same SRB is re-used from GPNFT to avoid +- * mem free & re-alloc +- */ +-static int qla24xx_async_gnnft(scsi_qla_host_t *vha, struct srb *sp, +- u8 fc4_type) +-{ +- int rval = QLA_FUNCTION_FAILED; +- struct ct_sns_req *ct_req; +- struct ct_sns_pkt *ct_sns; +- unsigned long flags; +- +- if (!vha->flags.online) { +- spin_lock_irqsave(&vha->work_lock, flags); +- vha->scan.scan_flags &= ~SF_SCANNING; +- spin_unlock_irqrestore(&vha->work_lock, flags); +- goto done_free_sp; +- } +- +- if (!sp->u.iocb_cmd.u.ctarg.req || !sp->u.iocb_cmd.u.ctarg.rsp) { +- ql_log(ql_log_warn, vha, 0xffff, +- "%s: req %p rsp %p are not setup\n", +- __func__, sp->u.iocb_cmd.u.ctarg.req, +- sp->u.iocb_cmd.u.ctarg.rsp); +- spin_lock_irqsave(&vha->work_lock, flags); +- vha->scan.scan_flags &= ~SF_SCANNING; +- spin_unlock_irqrestore(&vha->work_lock, flags); +- WARN_ON(1); +- set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); +- set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); +- goto done_free_sp; +- } +- +- ql_dbg(ql_dbg_disc, vha, 0xfffff, +- "%s: FC4Type %x, CT-PASSTHRU %s command ctarg rsp size %d, ctarg req size %d\n", +- __func__, fc4_type, sp->name, sp->u.iocb_cmd.u.ctarg.rsp_size, +- sp->u.iocb_cmd.u.ctarg.req_size); +- +- sp->type = SRB_CT_PTHRU_CMD; +- sp->name = "gnnft"; +- sp->gen1 = vha->hw->base_qpair->chip_reset; +- sp->gen2 = fc4_type; +- qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, +- qla2x00_async_gpnft_gnnft_sp_done); +- +- memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size); +- memset(sp->u.iocb_cmd.u.ctarg.req, 0, sp->u.iocb_cmd.u.ctarg.req_size); +- +- ct_sns = (struct ct_sns_pkt *)sp->u.iocb_cmd.u.ctarg.req; +- /* CT_IU preamble */ +- ct_req = qla2x00_prep_ct_req(ct_sns, GNN_FT_CMD, +- sp->u.iocb_cmd.u.ctarg.rsp_size); +- +- /* GPN_FT req */ +- ct_req->req.gpn_ft.port_type = fc4_type; +- +- sp->u.iocb_cmd.u.ctarg.req_size = GNN_FT_REQ_SIZE; +- sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; +- +- ql_dbg(ql_dbg_disc, vha, 0xffff, +- "Async-%s hdl=%x FC4Type %x.\n", sp->name, +- sp->handle, ct_req->req.gpn_ft.port_type); +- +- rval = qla2x00_start_sp(sp); +- if (rval != QLA_SUCCESS) { +- goto done_free_sp; +- } +- +- return rval; +- +-done_free_sp: +- if (sp->u.iocb_cmd.u.ctarg.req) { +- dma_free_coherent(&vha->hw->pdev->dev, +- sp->u.iocb_cmd.u.ctarg.req_allocated_size, +- sp->u.iocb_cmd.u.ctarg.req, +- sp->u.iocb_cmd.u.ctarg.req_dma); +- sp->u.iocb_cmd.u.ctarg.req = NULL; +- } +- if (sp->u.iocb_cmd.u.ctarg.rsp) { +- dma_free_coherent(&vha->hw->pdev->dev, +- sp->u.iocb_cmd.u.ctarg.rsp_allocated_size, +- sp->u.iocb_cmd.u.ctarg.rsp, +- sp->u.iocb_cmd.u.ctarg.rsp_dma); +- sp->u.iocb_cmd.u.ctarg.rsp = NULL; +- } +- /* ref: INIT */ +- kref_put(&sp->cmd_kref, qla2x00_sp_release); +- +- spin_lock_irqsave(&vha->work_lock, flags); +- vha->scan.scan_flags &= ~SF_SCANNING; +- if (vha->scan.scan_flags == 0) { +- ql_dbg(ql_dbg_disc, vha, 0xffff, +- "%s: schedule\n", __func__); +- vha->scan.scan_flags |= SF_QUEUED; +- schedule_delayed_work(&vha->scan.scan_work, 5); +- } +- spin_unlock_irqrestore(&vha->work_lock, flags); +- +- +- return rval; +-} /* GNNFT */ +- +-void qla24xx_async_gpnft_done(scsi_qla_host_t *vha, srb_t *sp) +-{ +- ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff, +- "%s enter\n", __func__); +- qla24xx_async_gnnft(vha, sp, sp->gen2); +-} +- + /* Get WWPN list for certain fc4_type */ +-int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) ++int qla_fab_async_scan(scsi_qla_host_t *vha, srb_t *sp) + { + int rval = QLA_FUNCTION_FAILED; + struct ct_sns_req *ct_req; + struct ct_sns_pkt *ct_sns; +- u32 rspsz; ++ u32 rspsz = 0; + unsigned long flags; + +- ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff, ++ ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x200c, + "%s enter\n", __func__); + + if (!vha->flags.online) +@@ -3761,22 +3624,21 @@ int qla24xx_async_gpnft(scsi_qla_host_t + spin_lock_irqsave(&vha->work_lock, flags); + if (vha->scan.scan_flags & SF_SCANNING) { + spin_unlock_irqrestore(&vha->work_lock, flags); +- ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff, ++ ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x2012, + "%s: scan active\n", __func__); + return rval; + } + vha->scan.scan_flags |= SF_SCANNING; ++ if (!sp) ++ vha->scan.step = FAB_SCAN_START; ++ + spin_unlock_irqrestore(&vha->work_lock, flags); + +- if (fc4_type == FC4_TYPE_FCP_SCSI) { +- ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff, ++ switch (vha->scan.step) { ++ case FAB_SCAN_START: ++ ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x2018, + "%s: Performing FCP Scan\n", __func__); + +- if (sp) { +- /* ref: INIT */ +- kref_put(&sp->cmd_kref, qla2x00_sp_release); +- } +- + /* ref: INIT */ + sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); + if (!sp) { +@@ -3792,7 +3654,7 @@ int qla24xx_async_gpnft(scsi_qla_host_t + GFP_KERNEL); + sp->u.iocb_cmd.u.ctarg.req_allocated_size = sizeof(struct ct_sns_pkt); + if (!sp->u.iocb_cmd.u.ctarg.req) { +- ql_log(ql_log_warn, vha, 0xffff, ++ ql_log(ql_log_warn, vha, 0x201a, + "Failed to allocate ct_sns request.\n"); + spin_lock_irqsave(&vha->work_lock, flags); + vha->scan.scan_flags &= ~SF_SCANNING; +@@ -3800,7 +3662,6 @@ int qla24xx_async_gpnft(scsi_qla_host_t + qla2x00_rel_sp(sp); + return rval; + } +- sp->u.iocb_cmd.u.ctarg.req_size = GPN_FT_REQ_SIZE; + + rspsz = sizeof(struct ct_sns_gpnft_rsp) + + vha->hw->max_fibre_devices * +@@ -3812,7 +3673,7 @@ int qla24xx_async_gpnft(scsi_qla_host_t + GFP_KERNEL); + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size = rspsz; + if (!sp->u.iocb_cmd.u.ctarg.rsp) { +- ql_log(ql_log_warn, vha, 0xffff, ++ ql_log(ql_log_warn, vha, 0x201b, + "Failed to allocate ct_sns request.\n"); + spin_lock_irqsave(&vha->work_lock, flags); + vha->scan.scan_flags &= ~SF_SCANNING; +@@ -3832,35 +3693,95 @@ int qla24xx_async_gpnft(scsi_qla_host_t + "%s scan list size %d\n", __func__, vha->scan.size); + + memset(vha->scan.l, 0, vha->scan.size); +- } else if (!sp) { +- ql_dbg(ql_dbg_disc, vha, 0xffff, +- "NVME scan did not provide SP\n"); ++ ++ vha->scan.step = FAB_SCAN_GPNFT_FCP; ++ break; ++ case FAB_SCAN_GPNFT_FCP: ++ vha->scan.step = FAB_SCAN_GNNFT_FCP; ++ break; ++ case FAB_SCAN_GNNFT_FCP: ++ vha->scan.step = FAB_SCAN_GPNFT_NVME; ++ break; ++ case FAB_SCAN_GPNFT_NVME: ++ vha->scan.step = FAB_SCAN_GNNFT_NVME; ++ break; ++ case FAB_SCAN_GNNFT_NVME: ++ default: ++ /* should not be here */ ++ WARN_ON(1); ++ goto done_free_sp; ++ } ++ ++ if (!sp) { ++ ql_dbg(ql_dbg_disc, vha, 0x201c, ++ "scan did not provide SP\n"); + return rval; + } ++ if (!sp->u.iocb_cmd.u.ctarg.req || !sp->u.iocb_cmd.u.ctarg.rsp) { ++ ql_log(ql_log_warn, vha, 0x201d, ++ "%s: req %p rsp %p are not setup\n", ++ __func__, sp->u.iocb_cmd.u.ctarg.req, ++ sp->u.iocb_cmd.u.ctarg.rsp); ++ spin_lock_irqsave(&vha->work_lock, flags); ++ vha->scan.scan_flags &= ~SF_SCANNING; ++ spin_unlock_irqrestore(&vha->work_lock, flags); ++ WARN_ON(1); ++ set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); ++ set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); ++ goto done_free_sp; ++ } ++ ++ rspsz = sp->u.iocb_cmd.u.ctarg.rsp_size; ++ memset(sp->u.iocb_cmd.u.ctarg.req, 0, sp->u.iocb_cmd.u.ctarg.req_size); ++ memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size); ++ + + sp->type = SRB_CT_PTHRU_CMD; +- sp->name = "gpnft"; + sp->gen1 = vha->hw->base_qpair->chip_reset; +- sp->gen2 = fc4_type; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, +- qla2x00_async_gpnft_gnnft_sp_done); +- +- rspsz = sp->u.iocb_cmd.u.ctarg.rsp_size; +- memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size); +- memset(sp->u.iocb_cmd.u.ctarg.req, 0, sp->u.iocb_cmd.u.ctarg.req_size); ++ qla_async_scan_sp_done); + + ct_sns = (struct ct_sns_pkt *)sp->u.iocb_cmd.u.ctarg.req; +- /* CT_IU preamble */ +- ct_req = qla2x00_prep_ct_req(ct_sns, GPN_FT_CMD, rspsz); + +- /* GPN_FT req */ +- ct_req->req.gpn_ft.port_type = fc4_type; ++ /* CT_IU preamble */ ++ switch (vha->scan.step) { ++ case FAB_SCAN_GPNFT_FCP: ++ sp->name = "gpnft"; ++ ct_req = qla2x00_prep_ct_req(ct_sns, GPN_FT_CMD, rspsz); ++ ct_req->req.gpn_ft.port_type = FC4_TYPE_FCP_SCSI; ++ sp->u.iocb_cmd.u.ctarg.req_size = GPN_FT_REQ_SIZE; ++ break; ++ case FAB_SCAN_GNNFT_FCP: ++ sp->name = "gnnft"; ++ ct_req = qla2x00_prep_ct_req(ct_sns, GNN_FT_CMD, rspsz); ++ ct_req->req.gpn_ft.port_type = FC4_TYPE_FCP_SCSI; ++ sp->u.iocb_cmd.u.ctarg.req_size = GNN_FT_REQ_SIZE; ++ break; ++ case FAB_SCAN_GPNFT_NVME: ++ sp->name = "gpnft"; ++ ct_req = qla2x00_prep_ct_req(ct_sns, GPN_FT_CMD, rspsz); ++ ct_req->req.gpn_ft.port_type = FC4_TYPE_NVME; ++ sp->u.iocb_cmd.u.ctarg.req_size = GPN_FT_REQ_SIZE; ++ break; ++ case FAB_SCAN_GNNFT_NVME: ++ sp->name = "gnnft"; ++ ct_req = qla2x00_prep_ct_req(ct_sns, GNN_FT_CMD, rspsz); ++ ct_req->req.gpn_ft.port_type = FC4_TYPE_NVME; ++ sp->u.iocb_cmd.u.ctarg.req_size = GNN_FT_REQ_SIZE; ++ break; ++ default: ++ /* should not be here */ ++ WARN_ON(1); ++ goto done_free_sp; ++ } + + sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; + +- ql_dbg(ql_dbg_disc, vha, 0xffff, +- "Async-%s hdl=%x FC4Type %x.\n", sp->name, +- sp->handle, ct_req->req.gpn_ft.port_type); ++ ql_dbg(ql_dbg_disc, vha, 0x2003, ++ "%s: step %d, rsp size %d, req size %d hdl %x %s FC4TYPE %x \n", ++ __func__, vha->scan.step, sp->u.iocb_cmd.u.ctarg.rsp_size, ++ sp->u.iocb_cmd.u.ctarg.req_size, sp->handle, sp->name, ++ ct_req->req.gpn_ft.port_type); + + rval = qla2x00_start_sp(sp); + if (rval != QLA_SUCCESS) { +@@ -3891,7 +3812,7 @@ done_free_sp: + spin_lock_irqsave(&vha->work_lock, flags); + vha->scan.scan_flags &= ~SF_SCANNING; + if (vha->scan.scan_flags == 0) { +- ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff, ++ ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x2007, + "%s: Scan scheduled.\n", __func__); + vha->scan.scan_flags |= SF_QUEUED; + schedule_delayed_work(&vha->scan.scan_work, 5); +@@ -3902,6 +3823,15 @@ done_free_sp: + return rval; + } + ++void qla_fab_scan_start(struct scsi_qla_host *vha) ++{ ++ int rval; ++ ++ rval = qla_fab_async_scan(vha, NULL); ++ if (rval) ++ set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); ++} ++ + void qla_scan_work_fn(struct work_struct *work) + { + struct fab_scan *s = container_of(to_delayed_work(work), +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -6407,10 +6407,7 @@ qla2x00_configure_fabric(scsi_qla_host_t + if (USE_ASYNC_SCAN(ha)) { + /* start of scan begins here */ + vha->scan.rscn_gen_end = atomic_read(&vha->rscn_gen); +- rval = qla24xx_async_gpnft(vha, FC4_TYPE_FCP_SCSI, +- NULL); +- if (rval) +- set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); ++ qla_fab_scan_start(vha); + } else { + list_for_each_entry(fcport, &vha->vp_fcports, list) + fcport->scan_state = QLA_FCPORT_SCAN; +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -5563,15 +5563,11 @@ qla2x00_do_work(struct scsi_qla_host *vh + qla2x00_async_prlo_done(vha, e->u.logio.fcport, + e->u.logio.data); + break; +- case QLA_EVT_GPNFT: +- qla24xx_async_gpnft(vha, e->u.gpnft.fc4_type, +- e->u.gpnft.sp); ++ case QLA_EVT_SCAN_CMD: ++ qla_fab_async_scan(vha, e->u.iosb.sp); + break; +- case QLA_EVT_GPNFT_DONE: +- qla24xx_async_gpnft_done(vha, e->u.iosb.sp); +- break; +- case QLA_EVT_GNNFT_DONE: +- qla24xx_async_gnnft_done(vha, e->u.iosb.sp); ++ case QLA_EVT_SCAN_FINISH: ++ qla_fab_scan_finish(vha, e->u.iosb.sp); + break; + case QLA_EVT_GFPNID: + qla24xx_async_gfpnid(vha, e->u.fcport.fcport); diff --git a/queue-6.10/scsi-qla2xxx-unable-to-act-on-rscn-for-port-online.patch b/queue-6.10/scsi-qla2xxx-unable-to-act-on-rscn-for-port-online.patch new file mode 100644 index 00000000000..fbda5869c2d --- /dev/null +++ b/queue-6.10/scsi-qla2xxx-unable-to-act-on-rscn-for-port-online.patch @@ -0,0 +1,234 @@ +From c3d98b12eef8db436e32f1a8c5478be57dc15621 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Wed, 10 Jul 2024 22:40:47 +0530 +Subject: scsi: qla2xxx: Unable to act on RSCN for port online + +From: Quinn Tran + +commit c3d98b12eef8db436e32f1a8c5478be57dc15621 upstream. + +The device does not come online when the target port is online. There were +multiple RSCNs indicating multiple devices were affected. Driver is in the +process of finishing a fabric scan. A new RSCN (device up) arrived at the +tail end of the last fabric scan. Driver mistakenly thinks the new RSCN is +being taken care of by the previous fabric scan, where this notification is +cleared and not acted on. The laser needs to be blinked again to get the +device to show up. + +To prevent driver from accidentally clearing the RSCN notification, each +RSCN is given a generation value. A fabric scan will scan for that +generation(s). Any new RSCN arrive after the scan start will have a new +generation value. This will trigger another scan to get latest data. The +RSCN notification flag will be cleared when the scan is associate to that +generation. + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202406210538.w875N70K-lkp@intel.com/ +Fixes: bb2ca6b3f09a ("scsi: qla2xxx: Relogin during fabric disturbance") +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-2-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_def.h | 3 +++ + drivers/scsi/qla2xxx/qla_gs.c | 33 ++++++++++++++++++++++++++++++--- + drivers/scsi/qla2xxx/qla_init.c | 24 +++++++++++++++++++----- + drivers/scsi/qla2xxx/qla_inline.h | 8 ++++++++ + 4 files changed, 60 insertions(+), 8 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_def.h ++++ b/drivers/scsi/qla2xxx/qla_def.h +@@ -3312,6 +3312,8 @@ struct fab_scan_rp { + struct fab_scan { + struct fab_scan_rp *l; + u32 size; ++ u32 rscn_gen_start; ++ u32 rscn_gen_end; + u16 scan_retry; + #define MAX_SCAN_RETRIES 5 + enum scan_flags_t scan_flags; +@@ -5030,6 +5032,7 @@ typedef struct scsi_qla_host { + + /* Counter to detect races between ELS and RSCN events */ + atomic_t generation_tick; ++ atomic_t rscn_gen; + /* Time when global fcport update has been scheduled */ + int total_fcport_update_gen; + /* List of pending LOGOs, protected by tgt_mutex */ +--- a/drivers/scsi/qla2xxx/qla_gs.c ++++ b/drivers/scsi/qla2xxx/qla_gs.c +@@ -3168,6 +3168,29 @@ static int qla2x00_is_a_vp(scsi_qla_host + return rc; + } + ++static bool qla_ok_to_clear_rscn(scsi_qla_host_t *vha, fc_port_t *fcport) ++{ ++ u32 rscn_gen; ++ ++ rscn_gen = atomic_read(&vha->rscn_gen); ++ ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x2017, ++ "%s %d %8phC rscn_gen %x start %x end %x current %x\n", ++ __func__, __LINE__, fcport->port_name, fcport->rscn_gen, ++ vha->scan.rscn_gen_start, vha->scan.rscn_gen_end, rscn_gen); ++ ++ if (val_is_in_range(fcport->rscn_gen, vha->scan.rscn_gen_start, ++ vha->scan.rscn_gen_end)) ++ /* rscn came in before fabric scan */ ++ return true; ++ ++ if (val_is_in_range(fcport->rscn_gen, vha->scan.rscn_gen_end, rscn_gen)) ++ /* rscn came in after fabric scan */ ++ return false; ++ ++ /* rare: fcport's scan_needed + rscn_gen must be stale */ ++ return true; ++} ++ + void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp) + { + fc_port_t *fcport; +@@ -3281,10 +3304,10 @@ void qla24xx_async_gnnft_done(scsi_qla_h + (fcport->scan_needed && + fcport->port_type != FCT_INITIATOR && + fcport->port_type != FCT_NVME_INITIATOR)) { ++ fcport->scan_needed = 0; + qlt_schedule_sess_for_deletion(fcport); + } + fcport->d_id.b24 = rp->id.b24; +- fcport->scan_needed = 0; + break; + } + +@@ -3325,7 +3348,9 @@ login_logout: + do_delete = true; + } + +- fcport->scan_needed = 0; ++ if (qla_ok_to_clear_rscn(vha, fcport)) ++ fcport->scan_needed = 0; ++ + if (((qla_dual_mode_enabled(vha) || + qla_ini_mode_enabled(vha)) && + atomic_read(&fcport->state) == FCS_ONLINE) || +@@ -3355,7 +3380,9 @@ login_logout: + fcport->port_name, fcport->loop_id, + fcport->login_retry); + } +- fcport->scan_needed = 0; ++ ++ if (qla_ok_to_clear_rscn(vha, fcport)) ++ fcport->scan_needed = 0; + qla24xx_fcport_handle_login(vha, fcport); + } + } +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -1842,10 +1842,18 @@ int qla24xx_post_newsess_work(struct scs + return qla2x00_post_work(vha, e); + } + ++static void qla_rscn_gen_tick(scsi_qla_host_t *vha, u32 *ret_rscn_gen) ++{ ++ *ret_rscn_gen = atomic_inc_return(&vha->rscn_gen); ++ /* memory barrier */ ++ wmb(); ++} ++ + void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea) + { + fc_port_t *fcport; + unsigned long flags; ++ u32 rscn_gen; + + switch (ea->id.b.rsvd_1) { + case RSCN_PORT_ADDR: +@@ -1875,15 +1883,16 @@ void qla2x00_handle_rscn(scsi_qla_host_t + * Otherwise we're already in the middle of a relogin + */ + fcport->scan_needed = 1; +- fcport->rscn_gen++; ++ qla_rscn_gen_tick(vha, &fcport->rscn_gen); + } + } else { + fcport->scan_needed = 1; +- fcport->rscn_gen++; ++ qla_rscn_gen_tick(vha, &fcport->rscn_gen); + } + } + break; + case RSCN_AREA_ADDR: ++ qla_rscn_gen_tick(vha, &rscn_gen); + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->flags & FCF_FCP2_DEVICE && + atomic_read(&fcport->state) == FCS_ONLINE) +@@ -1891,11 +1900,12 @@ void qla2x00_handle_rscn(scsi_qla_host_t + + if ((ea->id.b24 & 0xffff00) == (fcport->d_id.b24 & 0xffff00)) { + fcport->scan_needed = 1; +- fcport->rscn_gen++; ++ fcport->rscn_gen = rscn_gen; + } + } + break; + case RSCN_DOM_ADDR: ++ qla_rscn_gen_tick(vha, &rscn_gen); + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->flags & FCF_FCP2_DEVICE && + atomic_read(&fcport->state) == FCS_ONLINE) +@@ -1903,19 +1913,20 @@ void qla2x00_handle_rscn(scsi_qla_host_t + + if ((ea->id.b24 & 0xff0000) == (fcport->d_id.b24 & 0xff0000)) { + fcport->scan_needed = 1; +- fcport->rscn_gen++; ++ fcport->rscn_gen = rscn_gen; + } + } + break; + case RSCN_FAB_ADDR: + default: ++ qla_rscn_gen_tick(vha, &rscn_gen); + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->flags & FCF_FCP2_DEVICE && + atomic_read(&fcport->state) == FCS_ONLINE) + continue; + + fcport->scan_needed = 1; +- fcport->rscn_gen++; ++ fcport->rscn_gen = rscn_gen; + } + break; + } +@@ -1924,6 +1935,7 @@ void qla2x00_handle_rscn(scsi_qla_host_t + if (vha->scan.scan_flags == 0) { + ql_dbg(ql_dbg_disc, vha, 0xffff, "%s: schedule\n", __func__); + vha->scan.scan_flags |= SF_QUEUED; ++ vha->scan.rscn_gen_start = atomic_read(&vha->rscn_gen); + schedule_delayed_work(&vha->scan.scan_work, 5); + } + spin_unlock_irqrestore(&vha->work_lock, flags); +@@ -6393,6 +6405,8 @@ qla2x00_configure_fabric(scsi_qla_host_t + qlt_do_generation_tick(vha, &discovery_gen); + + if (USE_ASYNC_SCAN(ha)) { ++ /* start of scan begins here */ ++ vha->scan.rscn_gen_end = atomic_read(&vha->rscn_gen); + rval = qla24xx_async_gpnft(vha, FC4_TYPE_FCP_SCSI, + NULL); + if (rval) +--- a/drivers/scsi/qla2xxx/qla_inline.h ++++ b/drivers/scsi/qla2xxx/qla_inline.h +@@ -631,3 +631,11 @@ static inline int qla_mapq_alloc_qp_cpu_ + } + return 0; + } ++ ++static inline bool val_is_in_range(u32 val, u32 start, u32 end) ++{ ++ if (val >= start && val <= end) ++ return true; ++ else ++ return false; ++} diff --git a/queue-6.10/scsi-qla2xxx-use-qp-lock-to-search-for-bsg.patch b/queue-6.10/scsi-qla2xxx-use-qp-lock-to-search-for-bsg.patch new file mode 100644 index 00000000000..888943be4e6 --- /dev/null +++ b/queue-6.10/scsi-qla2xxx-use-qp-lock-to-search-for-bsg.patch @@ -0,0 +1,149 @@ +From c449b4198701d828e40d60a2abd30970b74a1d75 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Wed, 10 Jul 2024 22:40:56 +0530 +Subject: scsi: qla2xxx: Use QP lock to search for bsg + +From: Quinn Tran + +commit c449b4198701d828e40d60a2abd30970b74a1d75 upstream. + +On bsg timeout, hardware_lock is used as part of search for the srb. +Instead, qpair lock should be used to iterate through different qpair. + +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-11-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_bsg.c | 96 ++++++++++++++++++++++++----------------- + 1 file changed, 57 insertions(+), 39 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_bsg.c ++++ b/drivers/scsi/qla2xxx/qla_bsg.c +@@ -3059,17 +3059,61 @@ skip_chip_chk: + return ret; + } + +-int +-qla24xx_bsg_timeout(struct bsg_job *bsg_job) ++static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job) + { ++ bool found = false; + struct fc_bsg_reply *bsg_reply = bsg_job->reply; + scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job)); + struct qla_hw_data *ha = vha->hw; +- srb_t *sp; +- int cnt, que; ++ srb_t *sp = NULL; ++ int cnt; + unsigned long flags; + struct req_que *req; + ++ spin_lock_irqsave(qpair->qp_lock_ptr, flags); ++ req = qpair->req; ++ ++ for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { ++ sp = req->outstanding_cmds[cnt]; ++ if (sp && ++ (sp->type == SRB_CT_CMD || ++ sp->type == SRB_ELS_CMD_HST || ++ sp->type == SRB_ELS_CMD_HST_NOLOGIN) && ++ sp->u.bsg_job == bsg_job) { ++ req->outstanding_cmds[cnt] = NULL; ++ spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); ++ ++ if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) { ++ ql_log(ql_log_warn, vha, 0x7089, ++ "mbx abort_command failed.\n"); ++ bsg_reply->result = -EIO; ++ } else { ++ ql_dbg(ql_dbg_user, vha, 0x708a, ++ "mbx abort_command success.\n"); ++ bsg_reply->result = 0; ++ } ++ /* ref: INIT */ ++ kref_put(&sp->cmd_kref, qla2x00_sp_release); ++ ++ found = true; ++ goto done; ++ } ++ } ++ spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); ++ ++done: ++ return found; ++} ++ ++int ++qla24xx_bsg_timeout(struct bsg_job *bsg_job) ++{ ++ struct fc_bsg_reply *bsg_reply = bsg_job->reply; ++ scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job)); ++ struct qla_hw_data *ha = vha->hw; ++ int i; ++ struct qla_qpair *qpair; ++ + ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n", + __func__, bsg_job); + +@@ -3079,48 +3123,22 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_ + qla_pci_set_eeh_busy(vha); + } + ++ if (qla_bsg_found(ha->base_qpair, bsg_job)) ++ goto done; ++ + /* find the bsg job from the active list of commands */ +- spin_lock_irqsave(&ha->hardware_lock, flags); +- for (que = 0; que < ha->max_req_queues; que++) { +- req = ha->req_q_map[que]; +- if (!req) ++ for (i = 0; i < ha->max_qpairs; i++) { ++ qpair = vha->hw->queue_pair_map[i]; ++ if (!qpair) + continue; +- +- for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { +- sp = req->outstanding_cmds[cnt]; +- if (sp && +- (sp->type == SRB_CT_CMD || +- sp->type == SRB_ELS_CMD_HST || +- sp->type == SRB_ELS_CMD_HST_NOLOGIN || +- sp->type == SRB_FXIOCB_BCMD) && +- sp->u.bsg_job == bsg_job) { +- req->outstanding_cmds[cnt] = NULL; +- spin_unlock_irqrestore(&ha->hardware_lock, flags); +- +- if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) { +- ql_log(ql_log_warn, vha, 0x7089, +- "mbx abort_command failed.\n"); +- bsg_reply->result = -EIO; +- } else { +- ql_dbg(ql_dbg_user, vha, 0x708a, +- "mbx abort_command success.\n"); +- bsg_reply->result = 0; +- } +- spin_lock_irqsave(&ha->hardware_lock, flags); +- goto done; +- +- } +- } ++ if (qla_bsg_found(qpair, bsg_job)) ++ goto done; + } +- spin_unlock_irqrestore(&ha->hardware_lock, flags); ++ + ql_log(ql_log_info, vha, 0x708b, "SRB not found to abort.\n"); + bsg_reply->result = -ENXIO; +- return 0; + + done: +- spin_unlock_irqrestore(&ha->hardware_lock, flags); +- /* ref: INIT */ +- kref_put(&sp->cmd_kref, qla2x00_sp_release); + return 0; + } + diff --git a/queue-6.10/scsi-qla2xxx-validate-nvme_local_port-correctly.patch b/queue-6.10/scsi-qla2xxx-validate-nvme_local_port-correctly.patch new file mode 100644 index 00000000000..ff91f07d597 --- /dev/null +++ b/queue-6.10/scsi-qla2xxx-validate-nvme_local_port-correctly.patch @@ -0,0 +1,60 @@ +From eb1d4ce2609584eeb7694866f34d4b213caa3af9 Mon Sep 17 00:00:00 2001 +From: Nilesh Javali +Date: Wed, 10 Jul 2024 22:40:48 +0530 +Subject: scsi: qla2xxx: validate nvme_local_port correctly + +From: Nilesh Javali + +commit eb1d4ce2609584eeb7694866f34d4b213caa3af9 upstream. + +The driver load failed with error message, + +qla2xxx [0000:04:00.0]-ffff:0: register_localport failed: ret=ffffffef + +and with a kernel crash, + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000070 + Workqueue: events_unbound qla_register_fcport_fn [qla2xxx] + RIP: 0010:nvme_fc_register_remoteport+0x16/0x430 [nvme_fc] + RSP: 0018:ffffaaa040eb3d98 EFLAGS: 00010282 + RAX: 0000000000000000 RBX: ffff9dfb46b78c00 RCX: 0000000000000000 + RDX: ffff9dfb46b78da8 RSI: ffffaaa040eb3e08 RDI: 0000000000000000 + RBP: ffff9dfb612a0a58 R08: ffffffffaf1d6270 R09: 3a34303a30303030 + R10: 34303a303030305b R11: 2078787832616c71 R12: ffff9dfb46b78dd4 + R13: ffff9dfb46b78c24 R14: ffff9dfb41525300 R15: ffff9dfb46b78da8 + FS: 0000000000000000(0000) GS:ffff9dfc67c00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000070 CR3: 000000018da10004 CR4: 00000000000206f0 + Call Trace: + qla_nvme_register_remote+0xeb/0x1f0 [qla2xxx] + ? qla2x00_dfs_create_rport+0x231/0x270 [qla2xxx] + qla2x00_update_fcport+0x2a1/0x3c0 [qla2xxx] + qla_register_fcport_fn+0x54/0xc0 [qla2xxx] + +Exit the qla_nvme_register_remote() function when qla_nvme_register_hba() +fails and correctly validate nvme_local_port. + +Cc: stable@vger.kernel.org +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240710171057.35066-3-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_nvme.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/qla2xxx/qla_nvme.c ++++ b/drivers/scsi/qla2xxx/qla_nvme.c +@@ -49,7 +49,10 @@ int qla_nvme_register_remote(struct scsi + return 0; + } + +- if (!vha->nvme_local_port && qla_nvme_register_hba(vha)) ++ if (qla_nvme_register_hba(vha)) ++ return 0; ++ ++ if (!vha->nvme_local_port) + return 0; + + if (!(fcport->nvme_prli_service_param & diff --git a/queue-6.10/selftests-sigaltstack-fix-ppc64-gcc-build.patch b/queue-6.10/selftests-sigaltstack-fix-ppc64-gcc-build.patch new file mode 100644 index 00000000000..a1caf44e0ae --- /dev/null +++ b/queue-6.10/selftests-sigaltstack-fix-ppc64-gcc-build.patch @@ -0,0 +1,48 @@ +From 17c743b9da9e0d073ff19fd5313f521744514939 Mon Sep 17 00:00:00 2001 +From: Michael Ellerman +Date: Mon, 20 May 2024 16:26:47 +1000 +Subject: selftests/sigaltstack: Fix ppc64 GCC build +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Michael Ellerman + +commit 17c743b9da9e0d073ff19fd5313f521744514939 upstream. + +Building the sigaltstack test with GCC on 64-bit powerpc errors with: + + gcc -Wall sas.c -o /home/michael/linux/.build/kselftest/sigaltstack/sas + In file included from sas.c:23: + current_stack_pointer.h:22:2: error: #error "implement current_stack_pointer equivalent" + 22 | #error "implement current_stack_pointer equivalent" + | ^~~~~ + sas.c: In function ‘my_usr1’: + sas.c:50:13: error: ‘sp’ undeclared (first use in this function); did you mean ‘p’? + 50 | if (sp < (unsigned long)sstack || + | ^~ + +This happens because GCC doesn't define __ppc__ for 64-bit builds, only +32-bit builds. Instead use __powerpc__ to detect powerpc builds, which +is defined by clang and GCC for 64-bit and 32-bit builds. + +Fixes: 05107edc9101 ("selftests: sigaltstack: fix -Wuninitialized") +Cc: stable@vger.kernel.org # v6.3+ +Signed-off-by: Michael Ellerman +Link: https://msgid.link/20240520062647.688667-1-mpe@ellerman.id.au +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/sigaltstack/current_stack_pointer.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/sigaltstack/current_stack_pointer.h ++++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h +@@ -8,7 +8,7 @@ register unsigned long sp asm("sp"); + register unsigned long sp asm("esp"); + #elif __loongarch64 + register unsigned long sp asm("$sp"); +-#elif __ppc__ ++#elif __powerpc__ + register unsigned long sp asm("r1"); + #elif __s390x__ + register unsigned long sp asm("%15"); diff --git a/queue-6.10/series b/queue-6.10/series index 23edc3cbcee..15fcf9bbafb 100644 --- a/queue-6.10/series +++ b/queue-6.10/series @@ -679,3 +679,52 @@ md-raid1-set-max_sectors-during-early-return-from-choose_slow_rdev.patch irqchip-imx-irqsteer-handle-runtime-power-management-correctly.patch mm-numa_balancing-teach-mpol_to_str-about-the-balancing-mode.patch rtc-cmos-fix-return-value-of-nvmem-callbacks.patch +scsi-lpfc-allow-device_recovery-mode-after-rscn-receipt-if-in-prli_issue-state.patch +scsi-qla2xxx-during-vport-delete-send-async-logout-explicitly.patch +scsi-qla2xxx-unable-to-act-on-rscn-for-port-online.patch +scsi-qla2xxx-fix-for-possible-memory-corruption.patch +scsi-qla2xxx-use-qp-lock-to-search-for-bsg.patch +scsi-qla2xxx-reduce-fabric-scan-duplicate-code.patch +scsi-qla2xxx-fix-flash-read-failure.patch +scsi-qla2xxx-complete-command-early-within-lock.patch +scsi-qla2xxx-validate-nvme_local_port-correctly.patch +perf-fix-event-leak-upon-exit.patch +perf-fix-event-leak-upon-exec-and-file-release.patch +perf-stat-fix-the-hard-coded-metrics-calculation-on-the-hybrid.patch +perf-x86-intel-uncore-fix-the-bits-of-the-cha-extended-umask-for-spr.patch +perf-x86-intel-ds-fix-non-0-retire-latency-on-raptorlake.patch +perf-x86-intel-pt-fix-topa_entry-base-length.patch +perf-x86-intel-pt-fix-a-topa_entry-base-address-calculation.patch +drm-i915-gt-do-not-consider-preemption-during-execlists_dequeue-for-gen8.patch +drm-amdgpu-sdma5.2-update-wptr-registers-as-well-as-doorbell.patch +drm-udl-remove-drm_connector_poll_hpd.patch +drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-suspend-resume.patch +drm-amdgpu-reset-vm-state-machine-after-gpu-reset-vram-lost.patch +drm-amdgpu-add-missed-harvest-check-for-vcn-ip-v4-v5.patch +drm-amd-amdgpu-fix-uninitialized-variable-warnings.patch +drm-i915-dp-reset-intel_dp-link_trained-before-retraining-the-link.patch +drm-i915-dp-don-t-switch-the-lttpr-mode-on-an-active-link.patch +rtc-isl1208-fix-return-value-of-nvmem-callbacks.patch +rtc-abx80x-fix-return-value-of-nvmem-callback-on-read.patch +watchdog-perf-properly-initialize-the-turbo-mode-timestamp-and-rearm-counter.patch +bus-mhi-ep-do-not-allocate-memory-for-mhi-objects-from-dma-zone.patch +asoc-codecs-wcd939x-fix-typec-mux-and-switch-leak-during-device-removal.patch +asoc-sof-ipc4-topology-use-correct-queue_id-for-requesting-input-pin-format.patch +platform-mips-cpu_hwmon-disable-driver-on-unsupported-hardware.patch +rdma-iwcm-fix-a-use-after-free-related-to-destroying-cm-ids.patch +crypto-ccp-fix-null-pointer-dereference-in-__sev_snp_shutdown_locked.patch +selftests-sigaltstack-fix-ppc64-gcc-build.patch +dm-verity-fix-dm_is_verity_target-when-dm-verity-is-builtin.patch +rbd-don-t-assume-rbd_is_lock_owner-for-exclusive-mappings.patch +remoteproc-stm32_rproc-fix-mailbox-interrupts-queuing.patch +remoteproc-imx_rproc-skip-over-memory-region-when-node-value-is-null.patch +remoteproc-imx_rproc-fix-refcount-mistake-in-imx_rproc_addr_init.patch +mips-dts-loongson-add-isa-node.patch +mips-ip30-ip30-console-add-missing-include.patch +mips-dts-loongson-fix-gmac-phy-node.patch +mips-loongson64-env-hook-up-loongsson-2k.patch +mips-loongson64-remove-memory-node-for-builtin-dtb.patch +mips-loongson64-reset-prioritise-firmware-service.patch +mips-loongson64-test-register-availability-before-use.patch +drm-etnaviv-don-t-block-scheduler-when-gpu-is-still-active.patch +drm-panfrost-mark-simple_ondemand-governor-as-softdep.patch diff --git a/queue-6.10/watchdog-perf-properly-initialize-the-turbo-mode-timestamp-and-rearm-counter.patch b/queue-6.10/watchdog-perf-properly-initialize-the-turbo-mode-timestamp-and-rearm-counter.patch new file mode 100644 index 00000000000..ef40242b196 --- /dev/null +++ b/queue-6.10/watchdog-perf-properly-initialize-the-turbo-mode-timestamp-and-rearm-counter.patch @@ -0,0 +1,68 @@ +From f944ffcbc2e1c759764850261670586ddf3bdabb Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 11 Jul 2024 22:25:21 +0200 +Subject: watchdog/perf: properly initialize the turbo mode timestamp and rearm counter + +From: Thomas Gleixner + +commit f944ffcbc2e1c759764850261670586ddf3bdabb upstream. + +For systems on which the performance counter can expire early due to turbo +modes the watchdog handler has a safety net in place which validates that +since the last watchdog event there has at least 4/5th of the watchdog +period elapsed. + +This works reliably only after the first watchdog event because the per +CPU variable which holds the timestamp of the last event is never +initialized. + +So a first spurious event will validate against a timestamp of 0 which +results in a delta which is likely to be way over the 4/5 threshold of the +period. As this might happen before the first watchdog hrtimer event +increments the watchdog counter, this can lead to false positives. + +Fix this by initializing the timestamp before enabling the hardware event. +Reset the rearm counter as well, as that might be non zero after the +watchdog was disabled and reenabled. + +Link: https://lkml.kernel.org/r/87frsfu15a.ffs@tglx +Fixes: 7edaeb6841df ("kernel/watchdog: Prevent false positives with turbo modes") +Signed-off-by: Thomas Gleixner +Cc: Arjan van de Ven +Cc: Peter Zijlstra +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/watchdog_perf.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/kernel/watchdog_perf.c ++++ b/kernel/watchdog_perf.c +@@ -75,11 +75,15 @@ static bool watchdog_check_timestamp(voi + __this_cpu_write(last_timestamp, now); + return true; + } +-#else +-static inline bool watchdog_check_timestamp(void) ++ ++static void watchdog_init_timestamp(void) + { +- return true; ++ __this_cpu_write(nmi_rearmed, 0); ++ __this_cpu_write(last_timestamp, ktime_get_mono_fast_ns()); + } ++#else ++static inline bool watchdog_check_timestamp(void) { return true; } ++static inline void watchdog_init_timestamp(void) { } + #endif + + static struct perf_event_attr wd_hw_attr = { +@@ -161,6 +165,7 @@ void watchdog_hardlockup_enable(unsigned + if (!atomic_fetch_inc(&watchdog_cpus)) + pr_info("Enabled. Permanently consumes one hw-PMU counter.\n"); + ++ watchdog_init_timestamp(); + perf_event_enable(this_cpu_read(watchdog_ev)); + } +