--- /dev/null
+From 8d6a1fea53864cd9545741f48f4ae4df804db557 Mon Sep 17 00:00:00 2001
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Date: Wed, 28 Dec 2022 21:47:03 +0530
+Subject: bus: mhi: ep: Move chan->lock to the start of processing queued ch ring
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+commit 8d6a1fea53864cd9545741f48f4ae4df804db557 upstream.
+
+There is a good chance that while the channel ring gets processed, the STOP
+or RESET command for the channel might be received from the MHI host. In
+those cases, the entire channel ring processing needs to be protected by
+chan->lock to prevent the race where the corresponding channel ring might
+be reset.
+
+While at it, let's also add a sanity check to make sure that the ring is
+started before processing it. Because, if the STOP/RESET command gets
+processed while mhi_ep_ch_ring_worker() waited for chan->lock, the ring
+would've been reset.
+
+Cc: <stable@vger.kernel.org> # 5.19
+Fixes: 03c0bb8ec983 ("bus: mhi: ep: Add support for processing channel rings")
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
+Link: https://lore.kernel.org/r/20221228161704.255268-6-manivannan.sadhasivam@linaro.org
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bus/mhi/ep/main.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+--- a/drivers/bus/mhi/ep/main.c
++++ b/drivers/bus/mhi/ep/main.c
+@@ -723,24 +723,37 @@ static void mhi_ep_ch_ring_worker(struct
+ list_del(&itr->node);
+ ring = itr->ring;
+
++ chan = &mhi_cntrl->mhi_chan[ring->ch_id];
++ mutex_lock(&chan->lock);
++
++ /*
++ * The ring could've stopped while we waited to grab the (chan->lock), so do
++ * a sanity check before going further.
++ */
++ if (!ring->started) {
++ mutex_unlock(&chan->lock);
++ kfree(itr);
++ continue;
++ }
++
+ /* Update the write offset for the ring */
+ ret = mhi_ep_update_wr_offset(ring);
+ if (ret) {
+ dev_err(dev, "Error updating write offset for ring\n");
++ mutex_unlock(&chan->lock);
+ kfree(itr);
+ continue;
+ }
+
+ /* Sanity check to make sure there are elements in the ring */
+ if (ring->rd_offset == ring->wr_offset) {
++ mutex_unlock(&chan->lock);
+ kfree(itr);
+ continue;
+ }
+
+ el = &ring->ring_cache[ring->rd_offset];
+- chan = &mhi_cntrl->mhi_chan[ring->ch_id];
+
+- mutex_lock(&chan->lock);
+ dev_dbg(dev, "Processing the ring for channel (%u)\n", ring->ch_id);
+ ret = mhi_ep_process_ch_ring(ring, el);
+ if (ret) {
--- /dev/null
+From e6cebcc27519dcf1652e604c73b9fd4f416987c0 Mon Sep 17 00:00:00 2001
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Date: Wed, 28 Dec 2022 21:47:01 +0530
+Subject: bus: mhi: ep: Only send -ENOTCONN status if client driver is available
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+commit e6cebcc27519dcf1652e604c73b9fd4f416987c0 upstream.
+
+For the STOP and RESET commands, only send the channel disconnect status
+-ENOTCONN if client driver is available. Otherwise, it will result in
+null pointer dereference.
+
+Cc: <stable@vger.kernel.org> # 5.19
+Fixes: e827569062a8 ("bus: mhi: ep: Add support for processing command rings")
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
+Link: https://lore.kernel.org/r/20221228161704.255268-4-manivannan.sadhasivam@linaro.org
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bus/mhi/ep/main.c | 16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/drivers/bus/mhi/ep/main.c
++++ b/drivers/bus/mhi/ep/main.c
+@@ -196,9 +196,11 @@ static int mhi_ep_process_cmd_ring(struc
+ mhi_ep_mmio_disable_chdb(mhi_cntrl, ch_id);
+
+ /* Send channel disconnect status to client drivers */
+- result.transaction_status = -ENOTCONN;
+- result.bytes_xferd = 0;
+- mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++ if (mhi_chan->xfer_cb) {
++ result.transaction_status = -ENOTCONN;
++ result.bytes_xferd = 0;
++ mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++ }
+
+ /* Set channel state to STOP */
+ mhi_chan->state = MHI_CH_STATE_STOP;
+@@ -228,9 +230,11 @@ static int mhi_ep_process_cmd_ring(struc
+ mhi_ep_ring_reset(mhi_cntrl, ch_ring);
+
+ /* Send channel disconnect status to client driver */
+- result.transaction_status = -ENOTCONN;
+- result.bytes_xferd = 0;
+- mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++ if (mhi_chan->xfer_cb) {
++ result.transaction_status = -ENOTCONN;
++ result.bytes_xferd = 0;
++ mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++ }
+
+ /* Set channel state to DISABLED */
+ mhi_chan->state = MHI_CH_STATE_DISABLED;
--- /dev/null
+From 8a1c24bb908f9ecbc4be0fea014df67d43161551 Mon Sep 17 00:00:00 2001
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Date: Wed, 28 Dec 2022 21:47:04 +0530
+Subject: bus: mhi: ep: Save channel state locally during suspend and resume
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+commit 8a1c24bb908f9ecbc4be0fea014df67d43161551 upstream.
+
+During suspend and resume, the channel state needs to be saved locally.
+Otherwise, the endpoint may access the channels while they were being
+suspended and causing access violations.
+
+Fix it by saving the channel state locally during suspend and resume.
+
+Cc: <stable@vger.kernel.org> # 5.19
+Fixes: e4b7b5f0f30a ("bus: mhi: ep: Add support for suspending and resuming channels")
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com)
+Link: https://lore.kernel.org/r/20221228161704.255268-7-manivannan.sadhasivam@linaro.org
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bus/mhi/ep/main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/bus/mhi/ep/main.c
++++ b/drivers/bus/mhi/ep/main.c
+@@ -1136,6 +1136,7 @@ void mhi_ep_suspend_channels(struct mhi_
+
+ dev_dbg(&mhi_chan->mhi_dev->dev, "Suspending channel\n");
+ /* Set channel state to SUSPENDED */
++ mhi_chan->state = MHI_CH_STATE_SUSPENDED;
+ tmp &= ~CHAN_CTX_CHSTATE_MASK;
+ tmp |= FIELD_PREP(CHAN_CTX_CHSTATE_MASK, MHI_CH_STATE_SUSPENDED);
+ mhi_cntrl->ch_ctx_cache[i].chcfg = cpu_to_le32(tmp);
+@@ -1165,6 +1166,7 @@ void mhi_ep_resume_channels(struct mhi_e
+
+ dev_dbg(&mhi_chan->mhi_dev->dev, "Resuming channel\n");
+ /* Set channel state to RUNNING */
++ mhi_chan->state = MHI_CH_STATE_RUNNING;
+ tmp &= ~CHAN_CTX_CHSTATE_MASK;
+ tmp |= FIELD_PREP(CHAN_CTX_CHSTATE_MASK, MHI_CH_STATE_RUNNING);
+ mhi_cntrl->ch_ctx_cache[i].chcfg = cpu_to_le32(tmp);
--- /dev/null
+From 65a24000808f70ac69bd2a96381fa0c7341f20c0 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Sun, 19 Feb 2023 23:04:04 -0600
+Subject: drm/amd: Fix initialization for nbio 7.5.1
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+commit 65a24000808f70ac69bd2a96381fa0c7341f20c0 upstream.
+
+A mistake has been made in the BIOS for some ASICs with NBIO 7.5.1
+where some NBIO registers aren't properly setup.
+
+Ensure that they're set during initialization.
+
+Tested-by: Richard Gong <richard.gong@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.1.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
+@@ -382,6 +382,11 @@ static void nbio_v7_2_init_registers(str
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3), data);
+ break;
++ case IP_VERSION(7, 5, 1):
++ data = RREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2);
++ data &= ~RCC_DEV2_EPF0_STRAP2__STRAP_NO_SOFT_RESET_DEV2_F0_MASK;
++ WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data);
++ fallthrough;
+ default:
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL));
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL,
--- /dev/null
+From 951df98024f7272f85df5044eca7374f5b5b24ef Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= <noralf@tronnes.org>
+Date: Wed, 30 Nov 2022 20:26:49 +0100
+Subject: drm/gud: Fix UBSAN warning
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Noralf Trønnes <noralf@tronnes.org>
+
+commit 951df98024f7272f85df5044eca7374f5b5b24ef upstream.
+
+UBSAN complains about invalid value for bool:
+
+[ 101.165172] [drm] Initialized gud 1.0.0 20200422 for 2-3.2:1.0 on minor 1
+[ 101.213360] gud 2-3.2:1.0: [drm] fb1: guddrmfb frame buffer device
+[ 101.213426] usbcore: registered new interface driver gud
+[ 101.989431] ================================================================================
+[ 101.989441] UBSAN: invalid-load in linux/include/linux/iosys-map.h:253:9
+[ 101.989447] load of value 121 is not a valid value for type '_Bool'
+[ 101.989451] CPU: 1 PID: 455 Comm: kworker/1:6 Not tainted 5.18.0-rc5-gud-5.18-rc5 #3
+[ 101.989456] Hardware name: Hewlett-Packard HP EliteBook 820 G1/1991, BIOS L71 Ver. 01.44 04/12/2018
+[ 101.989459] Workqueue: events_long gud_flush_work [gud]
+[ 101.989471] Call Trace:
+[ 101.989474] <TASK>
+[ 101.989479] dump_stack_lvl+0x49/0x5f
+[ 101.989488] dump_stack+0x10/0x12
+[ 101.989493] ubsan_epilogue+0x9/0x3b
+[ 101.989498] __ubsan_handle_load_invalid_value.cold+0x44/0x49
+[ 101.989504] dma_buf_vmap.cold+0x38/0x3d
+[ 101.989511] ? find_busiest_group+0x48/0x300
+[ 101.989520] drm_gem_shmem_vmap+0x76/0x1b0 [drm_shmem_helper]
+[ 101.989528] drm_gem_shmem_object_vmap+0x9/0xb [drm_shmem_helper]
+[ 101.989535] drm_gem_vmap+0x26/0x60 [drm]
+[ 101.989594] drm_gem_fb_vmap+0x47/0x150 [drm_kms_helper]
+[ 101.989630] gud_prep_flush+0xc1/0x710 [gud]
+[ 101.989639] ? _raw_spin_lock+0x17/0x40
+[ 101.989648] gud_flush_work+0x1e0/0x430 [gud]
+[ 101.989653] ? __switch_to+0x11d/0x470
+[ 101.989664] process_one_work+0x21f/0x3f0
+[ 101.989673] worker_thread+0x200/0x3e0
+[ 101.989679] ? rescuer_thread+0x390/0x390
+[ 101.989684] kthread+0xfd/0x130
+[ 101.989690] ? kthread_complete_and_exit+0x20/0x20
+[ 101.989696] ret_from_fork+0x22/0x30
+[ 101.989706] </TASK>
+[ 101.989708] ================================================================================
+
+The source of this warning is in iosys_map_clear() called from
+dma_buf_vmap(). It conditionally sets values based on map->is_iomem. The
+iosys_map variables are allocated uninitialized on the stack leading to
+->is_iomem having all kinds of values and not only 0/1.
+
+Fix this by zeroing the iosys_map variables.
+
+Fixes: 40e1a70b4aed ("drm: Add GUD USB Display driver")
+Cc: <stable@vger.kernel.org> # v5.18+
+Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20221122-gud-shadow-plane-v2-1-435037990a83@tronnes.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/gud/gud_pipe.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/gud/gud_pipe.c
++++ b/drivers/gpu/drm/gud/gud_pipe.c
+@@ -157,8 +157,8 @@ static int gud_prep_flush(struct gud_dev
+ {
+ struct dma_buf_attachment *import_attach = fb->obj[0]->import_attach;
+ u8 compression = gdrm->compression;
+- struct iosys_map map[DRM_FORMAT_MAX_PLANES];
+- struct iosys_map map_data[DRM_FORMAT_MAX_PLANES];
++ struct iosys_map map[DRM_FORMAT_MAX_PLANES] = { };
++ struct iosys_map map_data[DRM_FORMAT_MAX_PLANES] = { };
+ struct iosys_map dst;
+ void *vaddr, *buf;
+ size_t pitch, len;
--- /dev/null
+From 85636167e3206c3fbd52254fc432991cc4e90194 Mon Sep 17 00:00:00 2001
+From: John Harrison <John.C.Harrison@Intel.com>
+Date: Wed, 15 Feb 2023 17:11:01 -0800
+Subject: drm/i915: Don't use BAR mappings for ring buffers with LLC
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: John Harrison <John.C.Harrison@Intel.com>
+
+commit 85636167e3206c3fbd52254fc432991cc4e90194 upstream.
+
+Direction from hardware is that ring buffers should never be mapped
+via the BAR on systems with LLC. There are too many caching pitfalls
+due to the way BAR accesses are routed. So it is safest to just not
+use it.
+
+Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
+Fixes: 9d80841ea4c9 ("drm/i915: Allow ringbuffers to be bound anywhere")
+Cc: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Jani Nikula <jani.nikula@linux.intel.com>
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
+Cc: intel-gfx@lists.freedesktop.org
+Cc: <stable@vger.kernel.org> # v4.9+
+Tested-by: Jouni Högander <jouni.hogander@intel.com>
+Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230216011101.1909009-3-John.C.Harrison@Intel.com
+(cherry picked from commit 65c08339db1ada87afd6cfe7db8e60bb4851d919)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gt/intel_ring.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/i915/gt/intel_ring.c
++++ b/drivers/gpu/drm/i915/gt/intel_ring.c
+@@ -53,7 +53,7 @@ int intel_ring_pin(struct intel_ring *ri
+ if (unlikely(ret))
+ goto err_unpin;
+
+- if (i915_vma_is_map_and_fenceable(vma)) {
++ if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) {
+ addr = (void __force *)i915_vma_pin_iomap(vma);
+ } else {
+ int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false);
+@@ -98,7 +98,7 @@ void intel_ring_unpin(struct intel_ring
+ return;
+
+ i915_vma_unset_ggtt_write(vma);
+- if (i915_vma_is_map_and_fenceable(vma))
++ if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915))
+ i915_vma_unpin_iomap(vma);
+ else
+ i915_gem_object_unpin_map(vma->obj);
--- /dev/null
+From 690e0ec8e63da9a29b39fedc6ed5da09c7c82651 Mon Sep 17 00:00:00 2001
+From: John Harrison <John.C.Harrison@Intel.com>
+Date: Wed, 15 Feb 2023 17:11:00 -0800
+Subject: drm/i915: Don't use stolen memory for ring buffers with LLC
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: John Harrison <John.C.Harrison@Intel.com>
+
+commit 690e0ec8e63da9a29b39fedc6ed5da09c7c82651 upstream.
+
+Direction from hardware is that stolen memory should never be used for
+ring buffer allocations on platforms with LLC. There are too many
+caching pitfalls due to the way stolen memory accesses are routed. So
+it is safest to just not use it.
+
+Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
+Fixes: c58b735fc762 ("drm/i915: Allocate rings from stolen")
+Cc: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Jani Nikula <jani.nikula@linux.intel.com>
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
+Cc: intel-gfx@lists.freedesktop.org
+Cc: <stable@vger.kernel.org> # v4.9+
+Tested-by: Jouni Högander <jouni.hogander@intel.com>
+Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230216011101.1909009-2-John.C.Harrison@Intel.com
+(cherry picked from commit f54c1f6c697c4297f7ed94283c184acc338a5cf8)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gt/intel_ring.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/gt/intel_ring.c
++++ b/drivers/gpu/drm/i915/gt/intel_ring.c
+@@ -116,7 +116,7 @@ static struct i915_vma *create_ring_vma(
+
+ obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE |
+ I915_BO_ALLOC_PM_VOLATILE);
+- if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt))
++ if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915))
+ obj = i915_gem_object_create_stolen(i915, size);
+ if (IS_ERR(obj))
+ obj = i915_gem_object_create_internal(i915, size);
--- /dev/null
+From 5e438bf7f9a1705ebcae5fa89cdbfbc6932a7871 Mon Sep 17 00:00:00 2001
+From: Mavroudis Chatzilaridis <mavchatz@protonmail.com>
+Date: Wed, 1 Feb 2023 18:51:25 +0000
+Subject: drm/i915/quirks: Add inverted backlight quirk for HP 14-r206nv
+
+From: Mavroudis Chatzilaridis <mavchatz@protonmail.com>
+
+commit 5e438bf7f9a1705ebcae5fa89cdbfbc6932a7871 upstream.
+
+This laptop uses inverted backlight PWM. Thus, without this quirk,
+backlight brightness decreases as the brightness value increases and
+vice versa.
+
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8013
+Cc: stable@vger.kernel.org
+Signed-off-by: Mavroudis Chatzilaridis <mavchatz@protonmail.com>
+Reviewed-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230201184947.8835-1-mavchatz@protonmail.com
+(cherry picked from commit 83e7d6fd330d413cb2064e680ffea91b0512a520)
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/intel_quirks.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/gpu/drm/i915/display/intel_quirks.c
++++ b/drivers/gpu/drm/i915/display/intel_quirks.c
+@@ -199,6 +199,8 @@ static struct intel_quirk intel_quirks[]
+ /* ECS Liva Q2 */
+ { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
+ { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
++ /* HP Notebook - 14-r206nv */
++ { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness },
+ };
+
+ void intel_init_quirks(struct drm_i915_private *i915)
--- /dev/null
+From 05eacc198c68cbb35a7281ce4011f8899ee1cfb8 Mon Sep 17 00:00:00 2001
+From: Mark Hawrylak <mark.hawrylak@gmail.com>
+Date: Sun, 19 Feb 2023 16:02:00 +1100
+Subject: drm/radeon: Fix eDP for single-display iMac11,2
+
+From: Mark Hawrylak <mark.hawrylak@gmail.com>
+
+commit 05eacc198c68cbb35a7281ce4011f8899ee1cfb8 upstream.
+
+Apple iMac11,2 (mid 2010) also with Radeon HD-4670 that has the same
+issue as iMac10,1 (late 2009) where the internal eDP panel stays dark on
+driver load. This patch treats iMac11,2 the same as iMac10,1,
+so the eDP panel stays active.
+
+Additional steps:
+Kernel boot parameter radeon.nomodeset=0 required to keep the eDP
+panel active.
+
+This patch is an extension of
+commit 564d8a2cf3ab ("drm/radeon: Fix eDP for single-display iMac10,1 (v2)")
+Link: https://lore.kernel.org/all/lsq.1507553064.833262317@decadent.org.uk/
+Signed-off-by: Mark Hawrylak <mark.hawrylak@gmail.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/radeon/atombios_encoders.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/radeon/atombios_encoders.c
++++ b/drivers/gpu/drm/radeon/atombios_encoders.c
+@@ -2122,11 +2122,12 @@ int radeon_atom_pick_dig_encoder(struct
+
+ /*
+ * On DCE32 any encoder can drive any block so usually just use crtc id,
+- * but Apple thinks different at least on iMac10,1, so there use linkb,
++ * but Apple thinks different at least on iMac10,1 and iMac11,2, so there use linkb,
+ * otherwise the internal eDP panel will stay dark.
+ */
+ if (ASIC_IS_DCE32(rdev)) {
+- if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1"))
++ if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1") ||
++ dmi_match(DMI_PRODUCT_NAME, "iMac11,2"))
+ enc_idx = (dig->linkb) ? 1 : 0;
+ else
+ enc_idx = radeon_crtc->crtc_id;
--- /dev/null
+From 16a75bbe480c3598b3af57a2504ea89b1e32c3ac Mon Sep 17 00:00:00 2001
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Date: Thu, 16 Feb 2023 21:08:14 +0800
+Subject: iommu/vt-d: Avoid superfluous IOTLB tracking in lazy mode
+
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+commit 16a75bbe480c3598b3af57a2504ea89b1e32c3ac upstream.
+
+Intel IOMMU driver implements IOTLB flush queue with domain selective
+or PASID selective invalidations. In this case there's no need to track
+IOVA page range and sync IOTLBs, which may cause significant performance
+hit.
+
+This patch adds a check to avoid IOVA gather page and IOTLB sync for
+the lazy path.
+
+The performance difference on Sapphire Rapids 100Gb NIC is improved by
+the following (as measured by iperf send):
+
+w/o this fix~48 Gbits/s. with this fix ~54 Gbits/s
+
+Cc: <stable@vger.kernel.org>
+Fixes: 2a2b8eaa5b25 ("iommu: Handle freelists when using deferred flushing in iommu drivers")
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Tested-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
+Signed-off-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
+Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Link: https://lore.kernel.org/r/20230209175330.1783556-1-jacob.jun.pan@linux.intel.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/intel/iommu.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -4347,7 +4347,12 @@ static size_t intel_iommu_unmap(struct i
+ if (dmar_domain->max_addr == iova + size)
+ dmar_domain->max_addr = iova;
+
+- iommu_iotlb_gather_add_page(domain, gather, iova, size);
++ /*
++ * We do not use page-selective IOTLB invalidation in flush queue,
++ * so there is no need to track page and sync iotlb.
++ */
++ if (!iommu_iotlb_gather_queued(gather))
++ iommu_iotlb_gather_add_page(domain, gather, iova, size);
+
+ return size;
+ }
--- /dev/null
+From 194b3348bdbb7db65375c72f3f774aee4cc6614e Mon Sep 17 00:00:00 2001
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Date: Thu, 16 Feb 2023 21:08:15 +0800
+Subject: iommu/vt-d: Fix PASID directory pointer coherency
+
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+commit 194b3348bdbb7db65375c72f3f774aee4cc6614e upstream.
+
+On platforms that do not support IOMMU Extended capability bit 0
+Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing
+any translation structures. IOMMU access goes only directly to
+memory. Intel IOMMU code was missing a flush for the PASID table
+directory that resulted in the unrecoverable fault as shown below.
+
+This patch adds clflush calls whenever allocating and updating
+a PASID table directory to ensure cache coherency.
+
+On the reverse direction, there's no need to clflush the PASID directory
+pointer when we deactivate a context entry in that IOMMU hardware will
+not see the old PASID directory pointer after we clear the context entry.
+PASID directory entries are also never freed once allocated.
+
+ DMAR: DRHD: handling fault status reg 3
+ DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000
+ [fault reason 0x51] SM: Present bit in Directory Entry is clear
+ DMAR: Dump dmar1 table entries for IOVA 0x1026a4000
+ DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001
+ DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401
+ DMAR: pasid dir entry: 0x0000000101b4e001
+ DMAR: pasid table entry[0]: 0x0000000000000109
+ DMAR: pasid table entry[1]: 0x0000000000000001
+ DMAR: pasid table entry[2]: 0x0000000000000000
+ DMAR: pasid table entry[3]: 0x0000000000000000
+ DMAR: pasid table entry[4]: 0x0000000000000000
+ DMAR: pasid table entry[5]: 0x0000000000000000
+ DMAR: pasid table entry[6]: 0x0000000000000000
+ DMAR: pasid table entry[7]: 0x0000000000000000
+ DMAR: PTE not present at level 4
+
+Cc: <stable@vger.kernel.org>
+Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables")
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reported-by: Sukumar Ghorai <sukumar.ghorai@intel.com>
+Signed-off-by: Ashok Raj <ashok.raj@intel.com>
+Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.intel.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/intel/pasid.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/iommu/intel/pasid.c
++++ b/drivers/iommu/intel/pasid.c
+@@ -128,6 +128,9 @@ int intel_pasid_alloc_table(struct devic
+ pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
+ info->pasid_table = pasid_table;
+
++ if (!ecap_coherent(info->iommu->ecap))
++ clflush_cache_range(pasid_table->table, size);
++
+ return 0;
+ }
+
+@@ -215,6 +218,10 @@ retry:
+ free_pgtable_page(entries);
+ goto retry;
+ }
++ if (!ecap_coherent(info->iommu->ecap)) {
++ clflush_cache_range(entries, VTD_PAGE_SIZE);
++ clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
++ }
+ }
+
+ return &entries[index];
--- /dev/null
+From b4ff830eca097df51af10a9be29e8cc817327919 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Mon, 13 Feb 2023 14:02:42 -0400
+Subject: iommufd: Do not add the same hwpt to the ioas->hwpt_list twice
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit b4ff830eca097df51af10a9be29e8cc817327919 upstream.
+
+The hwpt is added to the hwpt_list only during its creation, it is never
+added again. This hunk is some missed leftover from rework. Adding it
+twice will corrupt the linked list in some cases.
+
+It effects HWPT specific attachment, which is something the test suite
+cannot cover until we can create a legitimate struct device with a
+non-system iommu "driver" (ie we need the bus removed from the iommu code)
+
+Cc: stable@vger.kernel.org
+Fixes: e8d57210035b ("iommufd: Add kAPI toward external drivers for physical devices")
+Link: https://lore.kernel.org/r/1-v1-4336b5cb2fe4+1d7-iommufd_hwpt_jgg@nvidia.com
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reported-by: Kevin Tian <kevin.tian@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/iommufd/device.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/drivers/iommu/iommufd/device.c
++++ b/drivers/iommu/iommufd/device.c
+@@ -346,10 +346,6 @@ int iommufd_device_attach(struct iommufd
+ rc = iommufd_device_do_attach(idev, hwpt);
+ if (rc)
+ goto out_put_pt_obj;
+-
+- mutex_lock(&hwpt->ioas->mutex);
+- list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list);
+- mutex_unlock(&hwpt->ioas->mutex);
+ break;
+ }
+ case IOMMUFD_OBJ_IOAS: {
--- /dev/null
+From b3551ead616318ea155558cdbe7e91495b8d9b33 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Mon, 13 Feb 2023 10:32:21 -0400
+Subject: iommufd: Make sure to zero vfio_iommu_type1_info before copying to user
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit b3551ead616318ea155558cdbe7e91495b8d9b33 upstream.
+
+Missed a zero initialization here. Most of the struct is filled with
+a copy_from_user(), however minsz for that copy is smaller than the
+actual struct by 8 bytes, thus we don't fill the padding.
+
+Cc: stable@vger.kernel.org # 6.1+
+Fixes: d624d6652a65 ("iommufd: vfio container FD ioctl compatibility")
+Link: https://lore.kernel.org/r/0-v1-a74499ece799+1a-iommufd_get_info_leak_jgg@nvidia.com
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reported-by: syzbot+cb1e0978f6bf46b83a58@syzkaller.appspotmail.com
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/iommufd/vfio_compat.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iommu/iommufd/vfio_compat.c
++++ b/drivers/iommu/iommufd/vfio_compat.c
+@@ -381,7 +381,7 @@ static int iommufd_vfio_iommu_get_info(s
+ };
+ size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
+ struct vfio_info_cap_header __user *last_cap = NULL;
+- struct vfio_iommu_type1_info info;
++ struct vfio_iommu_type1_info info = {};
+ struct iommufd_ioas *ioas;
+ size_t total_cap_size;
+ int rc;
--- /dev/null
+From 0cb4228f6cc9ed0ca2be0d9ddf29168a8e3a3905 Mon Sep 17 00:00:00 2001
+From: "H. Nikolaus Schaller" <hns@goldelico.com>
+Date: Sun, 29 Jan 2023 19:57:04 +0100
+Subject: MIPS: DTS: CI20: fix otg power gpio
+
+From: H. Nikolaus Schaller <hns@goldelico.com>
+
+commit 0cb4228f6cc9ed0ca2be0d9ddf29168a8e3a3905 upstream.
+
+According to schematics it is PF15 and not PF14 (MIC_SW_EN).
+Seems as if it was hidden and not noticed during testing since
+there is no sound DT node.
+
+Fixes: 158c774d3c64 ("MIPS: Ingenic: Add missing nodes for Ingenic SoCs and boards.")
+Cc: stable@vger.kernel.org
+Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
+Acked-by: Paul Cercueil <paul@crapouillou.net>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/boot/dts/ingenic/ci20.dts | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/mips/boot/dts/ingenic/ci20.dts
++++ b/arch/mips/boot/dts/ingenic/ci20.dts
+@@ -113,7 +113,7 @@
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+
+- gpio = <&gpf 14 GPIO_ACTIVE_LOW>;
++ gpio = <&gpf 15 GPIO_ACTIVE_LOW>;
+ enable-active-high;
+ };
+ };
--- /dev/null
+From 63ba51db24ed1b8f8088a897290eb6c036c5435d Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Date: Sat, 28 Jan 2023 10:39:51 +0900
+Subject: PCI: Avoid FLR for AMD FCH AHCI adapters
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+commit 63ba51db24ed1b8f8088a897290eb6c036c5435d upstream.
+
+PCI passthrough to VMs does not work with AMD FCH AHCI adapters: the guest
+OS fails to correctly probe devices attached to the controller due to FIS
+communication failures:
+
+ ata4: softreset failed (1st FIS failed)
+ ...
+ ata4.00: qc timeout after 5000 msecs (cmd 0xec)
+ ata4.00: failed to IDENTIFY (I/O error, err_mask=0x4)
+
+Forcing the "bus" reset method before unbinding & binding the adapter to
+the vfio-pci driver solves this issue, e.g.:
+
+ echo "bus" > /sys/bus/pci/devices/<ID>/reset_method
+
+gives a working guest OS, indicating that the default FLR reset method
+doesn't work correctly.
+
+Apply quirk_no_flr() to AMD FCH AHCI devices to work around this issue.
+
+Link: https://lore.kernel.org/r/20230128013951.523247-1-damien.lemoal@opensource.wdc.com
+Reported-by: Niklas Cassel <niklas.cassel@wdc.com>
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/quirks.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -5340,6 +5340,7 @@ static void quirk_no_flr(struct pci_dev
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1487, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x148c, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x149c, quirk_no_flr);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x7901, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_no_flr);
+
--- /dev/null
+From 53b54ad074de1896f8b021615f65b27f557ce874 Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Sun, 15 Jan 2023 09:20:33 +0100
+Subject: PCI/DPC: Await readiness of secondary bus after reset
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 53b54ad074de1896f8b021615f65b27f557ce874 upstream.
+
+pci_bridge_wait_for_secondary_bus() is called after a Secondary Bus
+Reset, but not after a DPC-induced Hot Reset.
+
+As a result, the delays prescribed by PCIe r6.0 sec 6.6.1 are not
+observed and devices on the secondary bus may be accessed before
+they're ready.
+
+One affected device is Intel's Ponte Vecchio HPC GPU. It comprises a
+PCIe switch whose upstream port is not immediately ready after reset.
+Because its config space is restored too early, it remains in
+D0uninitialized, its subordinate devices remain inaccessible and DPC
+recovery fails with messages such as:
+
+ i915 0000:8c:00.0: can't change power state from D3cold to D0 (config space inaccessible)
+ intel_vsec 0000:8e:00.1: can't change power state from D3cold to D0 (config space inaccessible)
+ pcieport 0000:89:02.0: AER: device recovery failed
+
+Fix it.
+
+Link: https://lore.kernel.org/r/9f5ff00e1593d8d9a4b452398b98aa14d23fca11.1673769517.git.lukas@wunner.de
+Tested-by: Ravi Kishore Koppuravuri <ravi.kishore.koppuravuri@intel.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.c | 3 ---
+ drivers/pci/pci.h | 6 ++++++
+ drivers/pci/pcie/dpc.c | 4 ++--
+ 3 files changed, 8 insertions(+), 5 deletions(-)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -167,9 +167,6 @@ static int __init pcie_port_pm_setup(cha
+ }
+ __setup("pcie_port_pm=", pcie_port_pm_setup);
+
+-/* Time to wait after a reset for device to become responsive */
+-#define PCIE_RESET_READY_POLL_MS 60000
+-
+ /**
+ * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
+ * @bus: pointer to PCI bus structure to search
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -70,6 +70,12 @@ struct pci_cap_saved_state *pci_find_sav
+ * Reset (PCIe r6.0 sec 5.8).
+ */
+ #define PCI_RESET_WAIT 1000 /* msec */
++/*
++ * Devices may extend the 1 sec period through Request Retry Status completions
++ * (PCIe r6.0 sec 2.3.1). The spec does not provide an upper limit, but 60 sec
++ * ought to be enough for any device to become responsive.
++ */
++#define PCIE_RESET_READY_POLL_MS 60000 /* msec */
+
+ void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
+ void pci_refresh_power_state(struct pci_dev *dev);
+--- a/drivers/pci/pcie/dpc.c
++++ b/drivers/pci/pcie/dpc.c
+@@ -170,8 +170,8 @@ pci_ers_result_t dpc_reset_link(struct p
+ pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
+ PCI_EXP_DPC_STATUS_TRIGGER);
+
+- if (!pcie_wait_for_link(pdev, true)) {
+- pci_info(pdev, "Data Link Layer Link Active not set in 1000 msec\n");
++ if (pci_bridge_wait_for_secondary_bus(pdev, "DPC",
++ PCIE_RESET_READY_POLL_MS)) {
+ clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
+ ret = PCI_ERS_RESULT_DISCONNECT;
+ } else {
--- /dev/null
+From 74ff8864cc842be994853095dba6db48e716400a Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Fri, 20 Jan 2023 10:19:02 +0100
+Subject: PCI: hotplug: Allow marking devices as disconnected during bind/unbind
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 74ff8864cc842be994853095dba6db48e716400a upstream.
+
+On surprise removal, pciehp_unconfigure_device() and acpiphp's
+trim_stale_devices() call pci_dev_set_disconnected() to mark removed
+devices as permanently offline. Thereby, the PCI core and drivers know
+to skip device accesses.
+
+However pci_dev_set_disconnected() takes the device_lock and thus waits for
+a concurrent driver bind or unbind to complete. As a result, the driver's
+->probe and ->remove hooks have no chance to learn that the device is gone.
+
+That doesn't make any sense, so drop the device_lock and instead use atomic
+xchg() and cmpxchg() operations to update the device state.
+
+As a byproduct, an AB-BA deadlock reported by Anatoli is fixed which occurs
+on surprise removal with AER concurrently performing a bus reset.
+
+AER bus reset:
+
+ INFO: task irq/26-aerdrv:95 blocked for more than 120 seconds.
+ Tainted: G W 6.2.0-rc3-custom-norework-jan11+
+ schedule
+ rwsem_down_write_slowpath
+ down_write_nested
+ pciehp_reset_slot # acquires reset_lock
+ pci_reset_hotplug_slot
+ pci_slot_reset # acquires device_lock
+ pci_bus_error_reset
+ aer_root_reset
+ pcie_do_recovery
+ aer_process_err_devices
+ aer_isr
+
+pciehp surprise removal:
+
+ INFO: task irq/26-pciehp:96 blocked for more than 120 seconds.
+ Tainted: G W 6.2.0-rc3-custom-norework-jan11+
+ schedule_preempt_disabled
+ __mutex_lock
+ mutex_lock_nested
+ pci_dev_set_disconnected # acquires device_lock
+ pci_walk_bus
+ pciehp_unconfigure_device
+ pciehp_disable_slot
+ pciehp_handle_presence_or_link_change
+ pciehp_ist # acquires reset_lock
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=215590
+Fixes: a6bd101b8f84 ("PCI: Unify device inaccessible")
+Link: https://lore.kernel.org/r/3dc88ea82bdc0e37d9000e413d5ebce481cbd629.1674205689.git.lukas@wunner.de
+Reported-by: Anatoli Antonovitch <anatoli.antonovitch@amd.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org # v4.20+
+Cc: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.h | 43 +++++++++++++------------------------------
+ 1 file changed, 13 insertions(+), 30 deletions(-)
+
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -318,53 +318,36 @@ struct pci_sriov {
+ * @dev: PCI device to set new error_state
+ * @new: the state we want dev to be in
+ *
+- * Must be called with device_lock held.
++ * If the device is experiencing perm_failure, it has to remain in that state.
++ * Any other transition is allowed.
+ *
+ * Returns true if state has been changed to the requested state.
+ */
+ static inline bool pci_dev_set_io_state(struct pci_dev *dev,
+ pci_channel_state_t new)
+ {
+- bool changed = false;
++ pci_channel_state_t old;
+
+- device_lock_assert(&dev->dev);
+ switch (new) {
+ case pci_channel_io_perm_failure:
+- switch (dev->error_state) {
+- case pci_channel_io_frozen:
+- case pci_channel_io_normal:
+- case pci_channel_io_perm_failure:
+- changed = true;
+- break;
+- }
+- break;
++ xchg(&dev->error_state, pci_channel_io_perm_failure);
++ return true;
+ case pci_channel_io_frozen:
+- switch (dev->error_state) {
+- case pci_channel_io_frozen:
+- case pci_channel_io_normal:
+- changed = true;
+- break;
+- }
+- break;
++ old = cmpxchg(&dev->error_state, pci_channel_io_normal,
++ pci_channel_io_frozen);
++ return old != pci_channel_io_perm_failure;
+ case pci_channel_io_normal:
+- switch (dev->error_state) {
+- case pci_channel_io_frozen:
+- case pci_channel_io_normal:
+- changed = true;
+- break;
+- }
+- break;
++ old = cmpxchg(&dev->error_state, pci_channel_io_frozen,
++ pci_channel_io_normal);
++ return old != pci_channel_io_perm_failure;
++ default:
++ return false;
+ }
+- if (changed)
+- dev->error_state = new;
+- return changed;
+ }
+
+ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
+ {
+- device_lock(&dev->dev);
+ pci_dev_set_io_state(dev, pci_channel_io_perm_failure);
+- device_unlock(&dev->dev);
+
+ return 0;
+ }
--- /dev/null
+From 8ef0217227b42e2c34a18de316cee3da16c9bf1e Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Sun, 15 Jan 2023 09:20:31 +0100
+Subject: PCI/PM: Observe reset delay irrespective of bridge_d3
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 8ef0217227b42e2c34a18de316cee3da16c9bf1e upstream.
+
+If a PCI bridge is suspended to D3cold upon entering system sleep,
+resuming it entails a Fundamental Reset per PCIe r6.0 sec 5.8.
+
+The delay prescribed after a Fundamental Reset in PCIe r6.0 sec 6.6.1
+is sought to be observed by:
+
+ pci_pm_resume_noirq()
+ pci_pm_bridge_power_up_actions()
+ pci_bridge_wait_for_secondary_bus()
+
+However, pci_bridge_wait_for_secondary_bus() bails out if the bridge_d3
+flag is not set. That flag indicates whether a bridge is allowed to
+suspend to D3cold at *runtime*.
+
+Hence *no* delay is observed on resume from system sleep if runtime
+D3cold is forbidden. That doesn't make any sense, so drop the bridge_d3
+check from pci_bridge_wait_for_secondary_bus().
+
+The purpose of the bridge_d3 check was probably to avoid delays if a
+bridge remained in D0 during suspend. However the sole caller of
+pci_bridge_wait_for_secondary_bus(), pci_pm_bridge_power_up_actions(),
+is only invoked if the previous power state was D3cold. Hence the
+additional bridge_d3 check seems superfluous.
+
+Fixes: ad9001f2f411 ("PCI/PM: Add missing link delays required by the PCIe spec")
+Link: https://lore.kernel.org/r/eb37fa345285ec8bacabbf06b020b803f77bdd3d.1673769517.git.lukas@wunner.de
+Tested-by: Ravi Kishore Koppuravuri <ravi.kishore.koppuravuri@intel.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org # v5.5+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -4957,7 +4957,7 @@ void pci_bridge_wait_for_secondary_bus(s
+ if (pci_dev_is_disconnected(dev))
+ return;
+
+- if (!pci_is_bridge(dev) || !dev->bridge_d3)
++ if (!pci_is_bridge(dev))
+ return;
+
+ down_read(&pci_bus_sem);
--- /dev/null
+From ac91e6980563ed53afadd925fa6585ffd2bc4a2c Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Sun, 15 Jan 2023 09:20:32 +0100
+Subject: PCI: Unify delay handling for reset and resume
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit ac91e6980563ed53afadd925fa6585ffd2bc4a2c upstream.
+
+Sheng Bi reports that pci_bridge_secondary_bus_reset() may fail to wait
+for devices on the secondary bus to become accessible after reset:
+
+Although it does call pci_dev_wait(), it erroneously passes the bridge's
+pci_dev rather than that of a child. The bridge of course is always
+accessible while its secondary bus is reset, so pci_dev_wait() returns
+immediately.
+
+Sheng Bi proposes introducing a new pci_bridge_secondary_bus_wait()
+function which is called from pci_bridge_secondary_bus_reset():
+
+https://lore.kernel.org/linux-pci/20220523171517.32407-1-windy.bi.enflame@gmail.com/
+
+However we already have pci_bridge_wait_for_secondary_bus() which does
+almost exactly what we need. So far it's only called on resume from
+D3cold (which implies a Fundamental Reset per PCIe r6.0 sec 5.8).
+Re-using it for Secondary Bus Resets is a leaner and more rational
+approach than introducing a new function.
+
+That only requires a few minor tweaks:
+
+- Amend pci_bridge_wait_for_secondary_bus() to await accessibility of
+ the first device on the secondary bus by calling pci_dev_wait() after
+ performing the prescribed delays. pci_dev_wait() needs two parameters,
+ a reset reason and a timeout, which callers must now pass to
+ pci_bridge_wait_for_secondary_bus(). The timeout is 1 sec for resume
+ (PCIe r6.0 sec 6.6.1) and 60 sec for reset (commit 821cdad5c46c ("PCI:
+ Wait up to 60 seconds for device to become ready after FLR")).
+ Introduce a PCI_RESET_WAIT macro for the 1 sec timeout.
+
+- Amend pci_bridge_wait_for_secondary_bus() to return 0 on success or
+ -ENOTTY on error for consumption by pci_bridge_secondary_bus_reset().
+
+- Drop an unnecessary 1 sec delay from pci_reset_secondary_bus() which
+ is now performed by pci_bridge_wait_for_secondary_bus(). A static
+ delay this long is only necessary for Conventional PCI, so modern
+ PCIe systems benefit from shorter reset times as a side effect.
+
+Fixes: 6b2f1351af56 ("PCI: Wait for device to become ready after secondary bus reset")
+Link: https://lore.kernel.org/r/da77c92796b99ec568bd070cbe4725074a117038.1673769517.git.lukas@wunner.de
+Reported-by: Sheng Bi <windy.bi.enflame@gmail.com>
+Tested-by: Ravi Kishore Koppuravuri <ravi.kishore.koppuravuri@intel.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org # v4.17+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci-driver.c | 2 -
+ drivers/pci/pci.c | 54 ++++++++++++++++++++---------------------------
+ drivers/pci/pci.h | 10 +++++++-
+ 3 files changed, 34 insertions(+), 32 deletions(-)
+
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -572,7 +572,7 @@ static void pci_pm_default_resume_early(
+
+ static void pci_pm_bridge_power_up_actions(struct pci_dev *pci_dev)
+ {
+- pci_bridge_wait_for_secondary_bus(pci_dev);
++ pci_bridge_wait_for_secondary_bus(pci_dev, "resume", PCI_RESET_WAIT);
+ /*
+ * When powering on a bridge from D3cold, the whole hierarchy may be
+ * powered on into D0uninitialized state, resume them to give them a
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -1174,7 +1174,7 @@ static int pci_dev_wait(struct pci_dev *
+ return -ENOTTY;
+ }
+
+- if (delay > 1000)
++ if (delay > PCI_RESET_WAIT)
+ pci_info(dev, "not ready %dms after %s; waiting\n",
+ delay - 1, reset_type);
+
+@@ -1183,7 +1183,7 @@ static int pci_dev_wait(struct pci_dev *
+ pci_read_config_dword(dev, PCI_COMMAND, &id);
+ }
+
+- if (delay > 1000)
++ if (delay > PCI_RESET_WAIT)
+ pci_info(dev, "ready %dms after %s\n", delay - 1,
+ reset_type);
+
+@@ -4941,24 +4941,31 @@ static int pci_bus_max_d3cold_delay(cons
+ /**
+ * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
+ * @dev: PCI bridge
++ * @reset_type: reset type in human-readable form
++ * @timeout: maximum time to wait for devices on secondary bus (milliseconds)
+ *
+ * Handle necessary delays before access to the devices on the secondary
+- * side of the bridge are permitted after D3cold to D0 transition.
++ * side of the bridge are permitted after D3cold to D0 transition
++ * or Conventional Reset.
+ *
+ * For PCIe this means the delays in PCIe 5.0 section 6.6.1. For
+ * conventional PCI it means Tpvrh + Trhfa specified in PCI 3.0 section
+ * 4.3.2.
++ *
++ * Return 0 on success or -ENOTTY if the first device on the secondary bus
++ * failed to become accessible.
+ */
+-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
++int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
++ int timeout)
+ {
+ struct pci_dev *child;
+ int delay;
+
+ if (pci_dev_is_disconnected(dev))
+- return;
++ return 0;
+
+ if (!pci_is_bridge(dev))
+- return;
++ return 0;
+
+ down_read(&pci_bus_sem);
+
+@@ -4970,14 +4977,14 @@ void pci_bridge_wait_for_secondary_bus(s
+ */
+ if (!dev->subordinate || list_empty(&dev->subordinate->devices)) {
+ up_read(&pci_bus_sem);
+- return;
++ return 0;
+ }
+
+ /* Take d3cold_delay requirements into account */
+ delay = pci_bus_max_d3cold_delay(dev->subordinate);
+ if (!delay) {
+ up_read(&pci_bus_sem);
+- return;
++ return 0;
+ }
+
+ child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
+@@ -4986,14 +4993,12 @@ void pci_bridge_wait_for_secondary_bus(s
+
+ /*
+ * Conventional PCI and PCI-X we need to wait Tpvrh + Trhfa before
+- * accessing the device after reset (that is 1000 ms + 100 ms). In
+- * practice this should not be needed because we don't do power
+- * management for them (see pci_bridge_d3_possible()).
++ * accessing the device after reset (that is 1000 ms + 100 ms).
+ */
+ if (!pci_is_pcie(dev)) {
+ pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
+ msleep(1000 + delay);
+- return;
++ return 0;
+ }
+
+ /*
+@@ -5010,11 +5015,11 @@ void pci_bridge_wait_for_secondary_bus(s
+ * configuration requests if we only wait for 100 ms (see
+ * https://bugzilla.kernel.org/show_bug.cgi?id=203885).
+ *
+- * Therefore we wait for 100 ms and check for the device presence.
+- * If it is still not present give it an additional 100 ms.
++ * Therefore we wait for 100 ms and check for the device presence
++ * until the timeout expires.
+ */
+ if (!pcie_downstream_port(dev))
+- return;
++ return 0;
+
+ if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
+ pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
+@@ -5025,14 +5030,11 @@ void pci_bridge_wait_for_secondary_bus(s
+ if (!pcie_wait_for_link_delay(dev, true, delay)) {
+ /* Did not train, no need to wait any further */
+ pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n");
+- return;
++ return -ENOTTY;
+ }
+ }
+
+- if (!pci_device_is_present(child)) {
+- pci_dbg(child, "waiting additional %d ms to become accessible\n", delay);
+- msleep(delay);
+- }
++ return pci_dev_wait(child, reset_type, timeout - delay);
+ }
+
+ void pci_reset_secondary_bus(struct pci_dev *dev)
+@@ -5051,15 +5053,6 @@ void pci_reset_secondary_bus(struct pci_
+
+ ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+-
+- /*
+- * Trhfa for conventional PCI is 2^25 clock cycles.
+- * Assuming a minimum 33MHz clock this results in a 1s
+- * delay before we can consider subordinate devices to
+- * be re-initialized. PCIe has some ways to shorten this,
+- * but we don't make use of them yet.
+- */
+- ssleep(1);
+ }
+
+ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
+@@ -5078,7 +5071,8 @@ int pci_bridge_secondary_bus_reset(struc
+ {
+ pcibios_reset_secondary_bus(dev);
+
+- return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
++ return pci_bridge_wait_for_secondary_bus(dev, "bus reset",
++ PCIE_RESET_READY_POLL_MS);
+ }
+ EXPORT_SYMBOL_GPL(pci_bridge_secondary_bus_reset);
+
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -64,6 +64,13 @@ struct pci_cap_saved_state *pci_find_sav
+ #define PCI_PM_D3HOT_WAIT 10 /* msec */
+ #define PCI_PM_D3COLD_WAIT 100 /* msec */
+
++/*
++ * Following exit from Conventional Reset, devices must be ready within 1 sec
++ * (PCIe r6.0 sec 6.6.1). A D3cold to D0 transition implies a Conventional
++ * Reset (PCIe r6.0 sec 5.8).
++ */
++#define PCI_RESET_WAIT 1000 /* msec */
++
+ void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
+ void pci_refresh_power_state(struct pci_dev *dev);
+ int pci_power_up(struct pci_dev *dev);
+@@ -86,8 +93,9 @@ void pci_msi_init(struct pci_dev *dev);
+ void pci_msix_init(struct pci_dev *dev);
+ bool pci_bridge_d3_possible(struct pci_dev *dev);
+ void pci_bridge_d3_update(struct pci_dev *dev);
+-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
+ void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
++int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
++ int timeout);
+
+ static inline void pci_wakeup_event(struct pci_dev *dev)
+ {
--- /dev/null
+From eb9be8310c58c166f9fae3b71c0ad9d6741b4897 Mon Sep 17 00:00:00 2001
+From: Conor Dooley <conor.dooley@microchip.com>
+Date: Fri, 10 Feb 2023 18:59:45 +0000
+Subject: RISC-V: add a spin_shadow_stack declaration
+
+From: Conor Dooley <conor.dooley@microchip.com>
+
+commit eb9be8310c58c166f9fae3b71c0ad9d6741b4897 upstream.
+
+The patchwork automation reported a sparse complaint that
+spin_shadow_stack was not declared and should be static:
+../arch/riscv/kernel/traps.c:335:15: warning: symbol 'spin_shadow_stack' was not declared. Should it be static?
+
+However, this is used in entry.S and therefore shouldn't be static.
+The same applies to the shadow_stack that this pseudo spinlock is
+trying to protect, so do like its charge and add a declaration to
+thread_info.h
+
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Fixes: 7e1864332fbc ("riscv: fix race when vmap stack overflow")
+Reviewed-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230210185945.915806-1-conor@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/thread_info.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/riscv/include/asm/thread_info.h
++++ b/arch/riscv/include/asm/thread_info.h
+@@ -43,6 +43,7 @@
+ #ifndef __ASSEMBLY__
+
+ extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
++extern unsigned long spin_shadow_stack;
+
+ #include <asm/processor.h>
+ #include <asm/csr.h>
--- /dev/null
+From 130aee3fd9981297ff9354e5d5609cd59aafbbea Mon Sep 17 00:00:00 2001
+From: Mattias Nissler <mnissler@rivosinc.com>
+Date: Wed, 15 Feb 2023 14:48:28 +0000
+Subject: riscv: Avoid enabling interrupts in die()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mattias Nissler <mnissler@rivosinc.com>
+
+commit 130aee3fd9981297ff9354e5d5609cd59aafbbea upstream.
+
+While working on something else, I noticed that the kernel would start
+accepting interrupts again after crashing in an interrupt handler. Since
+the kernel is already in inconsistent state, enabling interrupts is
+dangerous and opens up risk of kernel state deteriorating further.
+Interrupts do get enabled via what looks like an unintended side effect of
+spin_unlock_irq, so switch to the more cautious
+spin_lock_irqsave/spin_unlock_irqrestore instead.
+
+Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code")
+Signed-off-by: Mattias Nissler <mnissler@rivosinc.com>
+Reviewed-by: Björn Töpel <bjorn@kernel.org>
+Link: https://lore.kernel.org/r/20230215144828.3370316-1-mnissler@rivosinc.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/kernel/traps.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -34,10 +34,11 @@ void die(struct pt_regs *regs, const cha
+ static int die_counter;
+ int ret;
+ long cause;
++ unsigned long flags;
+
+ oops_enter();
+
+- spin_lock_irq(&die_lock);
++ spin_lock_irqsave(&die_lock, flags);
+ console_verbose();
+ bust_spinlocks(1);
+
+@@ -54,7 +55,7 @@ void die(struct pt_regs *regs, const cha
+
+ bust_spinlocks(0);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+- spin_unlock_irq(&die_lock);
++ spin_unlock_irqrestore(&die_lock, flags);
+ oops_exit();
+
+ if (in_interrupt())
--- /dev/null
+From 8547649981e6631328cd64f583667501ae385531 Mon Sep 17 00:00:00 2001
+From: Andy Chiu <andy.chiu@sifive.com>
+Date: Thu, 12 Jan 2023 04:05:57 -0500
+Subject: riscv: ftrace: Fixup panic by disabling preemption
+
+From: Andy Chiu <andy.chiu@sifive.com>
+
+commit 8547649981e6631328cd64f583667501ae385531 upstream.
+
+In RISCV, we must use an AUIPC + JALR pair to encode an immediate,
+forming a jump that jumps to an address over 4K. This may cause errors
+if we want to enable kernel preemption and remove dependency from
+patching code with stop_machine(). For example, if a task was switched
+out on auipc. And, if we changed the ftrace function before it was
+switched back, then it would jump to an address that has updated 11:0
+bits mixing with previous XLEN:12 part.
+
+p: patched area performed by dynamic ftrace
+ftrace_prologue:
+p| REG_S ra, -SZREG(sp)
+p| auipc ra, 0x? ------------> preempted
+ ...
+ change ftrace function
+ ...
+p| jalr -?(ra) <------------- switched back
+p| REG_L ra, -SZREG(sp)
+func:
+ xxx
+ ret
+
+Fixes: afc76b8b8011 ("riscv: Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")
+Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230112090603.1295340-2-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/riscv/Kconfig
++++ b/arch/riscv/Kconfig
+@@ -138,7 +138,7 @@ config RISCV
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
+ select HAVE_FUNCTION_GRAPH_TRACER
+- select HAVE_FUNCTION_TRACER if !XIP_KERNEL
++ select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
+
+ config ARCH_MMAP_RND_BITS_MIN
+ default 18 if 64BIT
--- /dev/null
+From 6724a76cff85ee271bbbff42ac527e4643b2ec52 Mon Sep 17 00:00:00 2001
+From: Guo Ren <guoren@linux.alibaba.com>
+Date: Thu, 12 Jan 2023 04:05:59 -0500
+Subject: riscv: ftrace: Reduce the detour code size to half
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+commit 6724a76cff85ee271bbbff42ac527e4643b2ec52 upstream.
+
+Use a temporary register to reduce the size of detour code from 16 bytes to
+8 bytes. The previous implementation is from 'commit afc76b8b8011 ("riscv:
+Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")'.
+
+Before the patch:
+<func_prolog>:
+ 0: REG_S ra, -SZREG(sp)
+ 4: auipc ra, ?
+ 8: jalr ?(ra)
+12: REG_L ra, -SZREG(sp)
+ (func_boddy)
+
+After the patch:
+<func_prolog>:
+ 0: auipc t0, ?
+ 4: jalr t0, ?(t0)
+ (func_boddy)
+
+This patch not just reduces the size of detour code, but also fixes an
+important issue:
+
+An Ftrace callback registered with FTRACE_OPS_FL_IPMODIFY flag can
+actually change the instruction pointer, e.g. to "replace" the given
+kernel function with a new one, which is needed for livepatching, etc.
+
+In this case, the trampoline (ftrace_regs_caller) would not return to
+<func_prolog+12> but would rather jump to the new function. So, "REG_L
+ra, -SZREG(sp)" would not run and the original return address would not
+be restored. The kernel is likely to hang or crash as a result.
+
+This can be easily demonstrated if one tries to "replace", say,
+cmdline_proc_show() with a new function with the same signature using
+instruction_pointer_set(&fregs->regs, new_func_addr) in the Ftrace
+callback.
+
+Link: https://lore.kernel.org/linux-riscv/20221122075440.1165172-1-suagrfillet@gmail.com/
+Link: https://lore.kernel.org/linux-riscv/d7d5730b-ebef-68e5-5046-e763e1ee6164@yadro.com/
+Co-developed-by: Song Shuai <suagrfillet@gmail.com>
+Signed-off-by: Song Shuai <suagrfillet@gmail.com>
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Cc: Evgenii Shatokhin <e.shatokhin@yadro.com>
+Reviewed-by: Evgenii Shatokhin <e.shatokhin@yadro.com>
+Link: https://lore.kernel.org/r/20230112090603.1295340-4-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Fixes: 10626c32e382 ("riscv/ftrace: Add basic support")
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/Makefile | 4 +-
+ arch/riscv/include/asm/ftrace.h | 50 +++++++++++++++++++++++-------
+ arch/riscv/kernel/ftrace.c | 65 +++++++++++-----------------------------
+ arch/riscv/kernel/mcount-dyn.S | 42 +++++++++----------------
+ 4 files changed, 75 insertions(+), 86 deletions(-)
+
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -12,9 +12,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+ LDFLAGS_vmlinux := --no-relax
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ ifeq ($(CONFIG_RISCV_ISA_C),y)
+- CC_FLAGS_FTRACE := -fpatchable-function-entry=8
+-else
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++else
++ CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+ endif
+ endif
+
+--- a/arch/riscv/include/asm/ftrace.h
++++ b/arch/riscv/include/asm/ftrace.h
+@@ -42,6 +42,14 @@ struct dyn_arch_ftrace {
+ * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to
+ * return address (original pc + 4)
+ *
++ *<ftrace enable>:
++ * 0: auipc t0/ra, 0x?
++ * 4: jalr t0/ra, ?(t0/ra)
++ *
++ *<ftrace disable>:
++ * 0: nop
++ * 4: nop
++ *
+ * Dynamic ftrace generates probes to call sites, so we must deal with
+ * both auipc and jalr at the same time.
+ */
+@@ -52,25 +60,43 @@ struct dyn_arch_ftrace {
+ #define AUIPC_OFFSET_MASK (0xfffff000)
+ #define AUIPC_PAD (0x00001000)
+ #define JALR_SHIFT 20
+-#define JALR_BASIC (0x000080e7)
+-#define AUIPC_BASIC (0x00000097)
++#define JALR_RA (0x000080e7)
++#define AUIPC_RA (0x00000097)
++#define JALR_T0 (0x000282e7)
++#define AUIPC_T0 (0x00000297)
+ #define NOP4 (0x00000013)
+
+-#define make_call(caller, callee, call) \
++#define to_jalr_t0(offset) \
++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0)
++
++#define to_auipc_t0(offset) \
++ ((offset & JALR_SIGN_MASK) ? \
++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_T0) : \
++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_T0))
++
++#define make_call_t0(caller, callee, call) \
+ do { \
+- call[0] = to_auipc_insn((unsigned int)((unsigned long)callee - \
+- (unsigned long)caller)); \
+- call[1] = to_jalr_insn((unsigned int)((unsigned long)callee - \
+- (unsigned long)caller)); \
++ unsigned int offset = \
++ (unsigned long) callee - (unsigned long) caller; \
++ call[0] = to_auipc_t0(offset); \
++ call[1] = to_jalr_t0(offset); \
+ } while (0)
+
+-#define to_jalr_insn(offset) \
+- (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC)
++#define to_jalr_ra(offset) \
++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA)
+
+-#define to_auipc_insn(offset) \
++#define to_auipc_ra(offset) \
+ ((offset & JALR_SIGN_MASK) ? \
+- (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) : \
+- ((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC))
++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) : \
++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA))
++
++#define make_call_ra(caller, callee, call) \
++do { \
++ unsigned int offset = \
++ (unsigned long) callee - (unsigned long) caller; \
++ call[0] = to_auipc_ra(offset); \
++ call[1] = to_jalr_ra(offset); \
++} while (0)
+
+ /*
+ * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
+--- a/arch/riscv/kernel/ftrace.c
++++ b/arch/riscv/kernel/ftrace.c
+@@ -55,12 +55,15 @@ static int ftrace_check_current_call(uns
+ }
+
+ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+- bool enable)
++ bool enable, bool ra)
+ {
+ unsigned int call[2];
+ unsigned int nops[2] = {NOP4, NOP4};
+
+- make_call(hook_pos, target, call);
++ if (ra)
++ make_call_ra(hook_pos, target, call);
++ else
++ make_call_t0(hook_pos, target, call);
+
+ /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
+ if (patch_text_nosync
+@@ -70,42 +73,13 @@ static int __ftrace_modify_call(unsigned
+ return 0;
+ }
+
+-/*
+- * Put 5 instructions with 16 bytes at the front of function within
+- * patchable function entry nops' area.
+- *
+- * 0: REG_S ra, -SZREG(sp)
+- * 1: auipc ra, 0x?
+- * 2: jalr -?(ra)
+- * 3: REG_L ra, -SZREG(sp)
+- *
+- * So the opcodes is:
+- * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
+- * 1: 0x???????? -> auipc
+- * 2: 0x???????? -> jalr
+- * 3: 0xff813083 (ld)/0xffc12083 (lw)
+- */
+-#if __riscv_xlen == 64
+-#define INSN0 0xfe113c23
+-#define INSN3 0xff813083
+-#elif __riscv_xlen == 32
+-#define INSN0 0xfe112e23
+-#define INSN3 0xffc12083
+-#endif
+-
+-#define FUNC_ENTRY_SIZE 16
+-#define FUNC_ENTRY_JMP 4
+-
+ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ {
+- unsigned int call[4] = {INSN0, 0, 0, INSN3};
+- unsigned long target = addr;
+- unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++ unsigned int call[2];
+
+- call[1] = to_auipc_insn((unsigned int)(target - caller));
+- call[2] = to_jalr_insn((unsigned int)(target - caller));
++ make_call_t0(rec->ip, addr, call);
+
+- if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
++ if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ return 0;
+@@ -114,15 +88,14 @@ int ftrace_make_call(struct dyn_ftrace *
+ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+ {
+- unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
++ unsigned int nops[2] = {NOP4, NOP4};
+
+- if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
++ if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ return 0;
+ }
+
+-
+ /*
+ * This is called early on, and isn't wrapped by
+ * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold
+@@ -144,10 +117,10 @@ int ftrace_init_nop(struct module *mod,
+ int ftrace_update_ftrace_func(ftrace_func_t func)
+ {
+ int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
+- (unsigned long)func, true);
++ (unsigned long)func, true, true);
+ if (!ret) {
+ ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call,
+- (unsigned long)func, true);
++ (unsigned long)func, true, true);
+ }
+
+ return ret;
+@@ -159,16 +132,16 @@ int ftrace_modify_call(struct dyn_ftrace
+ unsigned long addr)
+ {
+ unsigned int call[2];
+- unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++ unsigned long caller = rec->ip;
+ int ret;
+
+- make_call(caller, old_addr, call);
++ make_call_t0(caller, old_addr, call);
+ ret = ftrace_check_current_call(caller, call);
+
+ if (ret)
+ return ret;
+
+- return __ftrace_modify_call(caller, addr, true);
++ return __ftrace_modify_call(caller, addr, true, false);
+ }
+ #endif
+
+@@ -203,12 +176,12 @@ int ftrace_enable_ftrace_graph_caller(vo
+ int ret;
+
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+- (unsigned long)&prepare_ftrace_return, true);
++ (unsigned long)&prepare_ftrace_return, true, true);
+ if (ret)
+ return ret;
+
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+- (unsigned long)&prepare_ftrace_return, true);
++ (unsigned long)&prepare_ftrace_return, true, true);
+ }
+
+ int ftrace_disable_ftrace_graph_caller(void)
+@@ -216,12 +189,12 @@ int ftrace_disable_ftrace_graph_caller(v
+ int ret;
+
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+- (unsigned long)&prepare_ftrace_return, false);
++ (unsigned long)&prepare_ftrace_return, false, true);
+ if (ret)
+ return ret;
+
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+- (unsigned long)&prepare_ftrace_return, false);
++ (unsigned long)&prepare_ftrace_return, false, true);
+ }
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+--- a/arch/riscv/kernel/mcount-dyn.S
++++ b/arch/riscv/kernel/mcount-dyn.S
+@@ -13,8 +13,8 @@
+
+ .text
+
+-#define FENTRY_RA_OFFSET 12
+-#define ABI_SIZE_ON_STACK 72
++#define FENTRY_RA_OFFSET 8
++#define ABI_SIZE_ON_STACK 80
+ #define ABI_A0 0
+ #define ABI_A1 8
+ #define ABI_A2 16
+@@ -23,10 +23,10 @@
+ #define ABI_A5 40
+ #define ABI_A6 48
+ #define ABI_A7 56
+-#define ABI_RA 64
++#define ABI_T0 64
++#define ABI_RA 72
+
+ .macro SAVE_ABI
+- addi sp, sp, -SZREG
+ addi sp, sp, -ABI_SIZE_ON_STACK
+
+ REG_S a0, ABI_A0(sp)
+@@ -37,6 +37,7 @@
+ REG_S a5, ABI_A5(sp)
+ REG_S a6, ABI_A6(sp)
+ REG_S a7, ABI_A7(sp)
++ REG_S t0, ABI_T0(sp)
+ REG_S ra, ABI_RA(sp)
+ .endm
+
+@@ -49,24 +50,18 @@
+ REG_L a5, ABI_A5(sp)
+ REG_L a6, ABI_A6(sp)
+ REG_L a7, ABI_A7(sp)
++ REG_L t0, ABI_T0(sp)
+ REG_L ra, ABI_RA(sp)
+
+ addi sp, sp, ABI_SIZE_ON_STACK
+- addi sp, sp, SZREG
+ .endm
+
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ .macro SAVE_ALL
+- addi sp, sp, -SZREG
+ addi sp, sp, -PT_SIZE_ON_STACK
+
+- REG_S x1, PT_EPC(sp)
+- addi sp, sp, PT_SIZE_ON_STACK
+- REG_L x1, (sp)
+- addi sp, sp, -PT_SIZE_ON_STACK
++ REG_S t0, PT_EPC(sp)
+ REG_S x1, PT_RA(sp)
+- REG_L x1, PT_EPC(sp)
+-
+ REG_S x2, PT_SP(sp)
+ REG_S x3, PT_GP(sp)
+ REG_S x4, PT_TP(sp)
+@@ -100,15 +95,11 @@
+ .endm
+
+ .macro RESTORE_ALL
++ REG_L t0, PT_EPC(sp)
+ REG_L x1, PT_RA(sp)
+- addi sp, sp, PT_SIZE_ON_STACK
+- REG_S x1, (sp)
+- addi sp, sp, -PT_SIZE_ON_STACK
+- REG_L x1, PT_EPC(sp)
+ REG_L x2, PT_SP(sp)
+ REG_L x3, PT_GP(sp)
+ REG_L x4, PT_TP(sp)
+- REG_L x5, PT_T0(sp)
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x8, PT_S0(sp)
+@@ -137,17 +128,16 @@
+ REG_L x31, PT_T6(sp)
+
+ addi sp, sp, PT_SIZE_ON_STACK
+- addi sp, sp, SZREG
+ .endm
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+ ENTRY(ftrace_caller)
+ SAVE_ABI
+
+- addi a0, ra, -FENTRY_RA_OFFSET
++ addi a0, t0, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+- REG_L a1, ABI_SIZE_ON_STACK(sp)
++ mv a1, ra
+ mv a3, sp
+
+ ftrace_call:
+@@ -155,8 +145,8 @@ ftrace_call:
+ call ftrace_stub
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- addi a0, sp, ABI_SIZE_ON_STACK
+- REG_L a1, ABI_RA(sp)
++ addi a0, sp, ABI_RA
++ REG_L a1, ABI_T0(sp)
+ addi a1, a1, -FENTRY_RA_OFFSET
+ #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ mv a2, s0
+@@ -166,17 +156,17 @@ ftrace_graph_call:
+ call ftrace_stub
+ #endif
+ RESTORE_ABI
+- ret
++ jr t0
+ ENDPROC(ftrace_caller)
+
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ ENTRY(ftrace_regs_caller)
+ SAVE_ALL
+
+- addi a0, ra, -FENTRY_RA_OFFSET
++ addi a0, t0, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+- REG_L a1, PT_SIZE_ON_STACK(sp)
++ mv a1, ra
+ mv a3, sp
+
+ ftrace_regs_call:
+@@ -196,6 +186,6 @@ ftrace_graph_regs_call:
+ #endif
+
+ RESTORE_ALL
+- ret
++ jr t0
+ ENDPROC(ftrace_regs_caller)
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
--- /dev/null
+From 409c8fb20c66df7150e592747412438c04aeb11f Mon Sep 17 00:00:00 2001
+From: Guo Ren <guoren@linux.alibaba.com>
+Date: Thu, 12 Jan 2023 04:05:58 -0500
+Subject: riscv: ftrace: Remove wasted nops for !RISCV_ISA_C
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+commit 409c8fb20c66df7150e592747412438c04aeb11f upstream.
+
+When CONFIG_RISCV_ISA_C=n, -fpatchable-function-entry=8 would generate
+more nops than we expect. Because it treat nop opcode as 0x00000013
+instead of 0x0001.
+
+Dump of assembler code for function dw_pcie_free_msi:
+ 0xffffffff806fce94 <+0>: sd ra,-8(sp)
+ 0xffffffff806fce98 <+4>: auipc ra,0xff90f
+ 0xffffffff806fce9c <+8>: jalr -684(ra) # 0xffffffff8000bbec
+<ftrace_caller>
+ 0xffffffff806fcea0 <+12>: ld ra,-8(sp)
+ 0xffffffff806fcea4 <+16>: nop /* wasted */
+ 0xffffffff806fcea8 <+20>: nop /* wasted */
+ 0xffffffff806fceac <+24>: nop /* wasted */
+ 0xffffffff806fceb0 <+28>: nop /* wasted */
+ 0xffffffff806fceb4 <+0>: addi sp,sp,-48
+ 0xffffffff806fceb8 <+4>: sd s0,32(sp)
+ 0xffffffff806fcebc <+8>: sd s1,24(sp)
+ 0xffffffff806fcec0 <+12>: sd s2,16(sp)
+ 0xffffffff806fcec4 <+16>: sd s3,8(sp)
+ 0xffffffff806fcec8 <+20>: sd ra,40(sp)
+ 0xffffffff806fcecc <+24>: addi s0,sp,48
+
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230112090603.1295340-3-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/Makefile | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -11,7 +11,11 @@ LDFLAGS_vmlinux :=
+ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+ LDFLAGS_vmlinux := --no-relax
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
++ifeq ($(CONFIG_RISCV_ISA_C),y)
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=8
++else
++ CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++endif
+ endif
+
+ ifeq ($(CONFIG_CMODEL_MEDLOW),y)
--- /dev/null
+From 9ddfc3cd806081ce1f6c9c2f988cbb031f35d28f Mon Sep 17 00:00:00 2001
+From: Andy Chiu <andy.chiu@sifive.com>
+Date: Mon, 6 Feb 2023 04:04:40 -0500
+Subject: riscv: jump_label: Fixup unaligned arch_static_branch function
+
+From: Andy Chiu <andy.chiu@sifive.com>
+
+commit 9ddfc3cd806081ce1f6c9c2f988cbb031f35d28f upstream.
+
+Runtime code patching must be done at a naturally aligned address, or we
+may execute on a partial instruction.
+
+We have encountered problems traced back to static jump functions during
+the test. We switched the tracer randomly for every 1~5 seconds on a
+dual-core QEMU setup and found the kernel sucking at a static branch
+where it jumps to itself.
+
+The reason is that the static branch was 2-byte but not 4-byte aligned.
+Then, the kernel would patch the instruction, either J or NOP, with two
+half-word stores if the machine does not have efficient unaligned
+accesses. Thus, moments exist where half of the NOP mixes with the other
+half of the J when transitioning the branch. In our particular case, on
+a little-endian machine, the upper half of the NOP was mixed with the
+lower part of the J when enabling the branch, resulting in a jump that
+jumped to itself. Conversely, it would result in a HINT instruction when
+disabling the branch, but it might not be observable.
+
+ARM64 does not have this problem since all instructions must be 4-byte
+aligned.
+
+Fixes: ebc00dde8a97 ("riscv: Add jump-label implementation")
+Link: https://lore.kernel.org/linux-riscv/20220913094252.3555240-6-andy.chiu@sifive.com/
+Reviewed-by: Greentime Hu <greentime.hu@sifive.com>
+Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230206090440.1255001-1-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/jump_label.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/riscv/include/asm/jump_label.h
++++ b/arch/riscv/include/asm/jump_label.h
+@@ -18,6 +18,7 @@ static __always_inline bool arch_static_
+ const bool branch)
+ {
+ asm_volatile_goto(
++ " .align 2 \n\t"
+ " .option push \n\t"
+ " .option norelax \n\t"
+ " .option norvc \n\t"
+@@ -39,6 +40,7 @@ static __always_inline bool arch_static_
+ const bool branch)
+ {
+ asm_volatile_goto(
++ " .align 2 \n\t"
+ " .option push \n\t"
+ " .option norelax \n\t"
+ " .option norvc \n\t"
--- /dev/null
+From b49f700668fff7565b945dce823def79bff59bb0 Mon Sep 17 00:00:00 2001
+From: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+Date: Mon, 30 Jan 2023 00:18:18 +0300
+Subject: riscv: mm: fix regression due to update_mmu_cache change
+
+From: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+
+commit b49f700668fff7565b945dce823def79bff59bb0 upstream.
+
+This is a partial revert of the commit 4bd1d80efb5a ("riscv: mm: notify
+remote harts about mmu cache updates"). Original commit included two
+loosely related changes serving the same purpose of fixing stale TLB
+entries causing user-space application crash:
+- introduce deferred per-ASID TLB flush for CPUs not running the task
+- switch to per-ASID TLB flush on all CPUs running the task in update_mmu_cache
+
+According to report and discussion in [1], the second part caused a
+regression on Renesas RZ/Five SoC. For now restore the old behavior
+of the update_mmu_cache.
+
+[1] https://lore.kernel.org/linux-riscv/20220829205219.283543-1-geomatsi@gmail.com/
+
+Fixes: 4bd1d80efb5a ("riscv: mm: notify remote harts about mmu cache updates")
+Reported-by: "Lad, Prabhakar" <prabhakar.csengg@gmail.com>
+Signed-off-by: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+Link: trailer, so that it can be parsed with git's trailer functionality?
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20230129211818.686557-1-geomatsi@gmail.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/pgtable.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/riscv/include/asm/pgtable.h
++++ b/arch/riscv/include/asm/pgtable.h
+@@ -415,7 +415,7 @@ static inline void update_mmu_cache(stru
+ * Relying on flush_tlb_fix_spurious_fault would suffice, but
+ * the extra traps reduce performance. So, eagerly SFENCE.VMA.
+ */
+- flush_tlb_page(vma, address);
++ local_flush_tlb_page(address);
+ }
+
+ #define __HAVE_ARCH_UPDATE_MMU_TLB
--- /dev/null
+From 416721ff05fddc58ca531b6f069de250301de6e5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn@rivosinc.com>
+Date: Tue, 14 Feb 2023 17:25:15 +0100
+Subject: riscv, mm: Perform BPF exhandler fixup on page fault
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Björn Töpel <bjorn@rivosinc.com>
+
+commit 416721ff05fddc58ca531b6f069de250301de6e5 upstream.
+
+Commit 21855cac82d3 ("riscv/mm: Prevent kernel module to access user
+memory without uaccess routines") added early exits/deaths for page
+faults stemming from accesses to user-space without using proper
+uaccess routines (where sstatus.SUM is set).
+
+Unfortunatly, this is too strict for some BPF programs, which relies
+on BPF exhandler fixups. These BPF programs loads "BTF pointers". A
+BTF pointers could either be a valid kernel pointer or NULL, but not a
+userspace address.
+
+Resolve the problem by calling the fixup handler in the early exit
+path.
+
+Fixes: 21855cac82d3 ("riscv/mm: Prevent kernel module to access user memory without uaccess routines")
+Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
+Link: https://lore.kernel.org/r/20230214162515.184827-1-bjorn@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/mm/fault.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/riscv/mm/fault.c
++++ b/arch/riscv/mm/fault.c
+@@ -267,10 +267,12 @@ asmlinkage void do_page_fault(struct pt_
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
+- if (!user_mode(regs) && addr < TASK_SIZE &&
+- unlikely(!(regs->status & SR_SUM)))
+- die_kernel_fault("access to user memory without uaccess routines",
+- addr, regs);
++ if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) {
++ if (fixup_exception(regs))
++ return;
++
++ die_kernel_fault("access to user memory without uaccess routines", addr, regs);
++ }
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
--- /dev/null
+From 3fe97ff3d94934649abb0652028dd7296170c8d0 Mon Sep 17 00:00:00 2001
+From: James Bottomley <jejb@linux.ibm.com>
+Date: Sat, 28 Nov 2020 15:27:21 -0800
+Subject: scsi: ses: Don't attach if enclosure has no components
+
+From: James Bottomley <jejb@linux.ibm.com>
+
+commit 3fe97ff3d94934649abb0652028dd7296170c8d0 upstream.
+
+An enclosure with no components can't usefully be operated by the driver
+(since effectively it has nothing to manage), so report the problem and
+don't attach. Not attaching also fixes an oops which could occur if the
+driver tries to manage a zero component enclosure.
+
+[mkp: Switched to KERN_WARNING since this scenario is common]
+
+Link: https://lore.kernel.org/r/c5deac044ac409e32d9ad9968ce0dcbc996bfc7a.camel@linux.ibm.com
+Cc: stable@vger.kernel.org
+Reported-by: Ding Hui <dinghui@sangfor.com.cn>
+Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -704,6 +704,12 @@ static int ses_intf_add(struct device *c
+ type_ptr[0] == ENCLOSURE_COMPONENT_ARRAY_DEVICE)
+ components += type_ptr[1];
+ }
++
++ if (components == 0) {
++ sdev_printk(KERN_WARNING, sdev, "enclosure has no enumerated components\n");
++ goto err_free;
++ }
++
+ ses_dev->page1 = buf;
+ ses_dev->page1_len = len;
+ buf = NULL;
--- /dev/null
+From db95d4df71cb55506425b6e4a5f8d68e3a765b63 Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:49 +0100
+Subject: scsi: ses: Fix possible addl_desc_ptr out-of-bounds accesses
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit db95d4df71cb55506425b6e4a5f8d68e3a765b63 upstream.
+
+Sanitize possible addl_desc_ptr out-of-bounds accesses in
+ses_enclosure_data_process().
+
+Link: https://lore.kernel.org/r/20230202162451.15346-3-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 35 ++++++++++++++++++++++++++---------
+ 1 file changed, 26 insertions(+), 9 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -433,8 +433,8 @@ int ses_match_host(struct enclosure_devi
+ }
+ #endif /* 0 */
+
+-static void ses_process_descriptor(struct enclosure_component *ecomp,
+- unsigned char *desc)
++static int ses_process_descriptor(struct enclosure_component *ecomp,
++ unsigned char *desc, int max_desc_len)
+ {
+ int eip = desc[0] & 0x10;
+ int invalid = desc[0] & 0x80;
+@@ -445,22 +445,32 @@ static void ses_process_descriptor(struc
+ unsigned char *d;
+
+ if (invalid)
+- return;
++ return 0;
+
+ switch (proto) {
+ case SCSI_PROTOCOL_FCP:
+ if (eip) {
++ if (max_desc_len <= 7)
++ return 1;
+ d = desc + 4;
+ slot = d[3];
+ }
+ break;
+ case SCSI_PROTOCOL_SAS:
++
+ if (eip) {
++ if (max_desc_len <= 27)
++ return 1;
+ d = desc + 4;
+ slot = d[3];
+ d = desc + 8;
+- } else
++ } else {
++ if (max_desc_len <= 23)
++ return 1;
+ d = desc + 4;
++ }
++
++
+ /* only take the phy0 addr */
+ addr = (u64)d[12] << 56 |
+ (u64)d[13] << 48 |
+@@ -477,6 +487,8 @@ static void ses_process_descriptor(struc
+ }
+ ecomp->slot = slot;
+ scomp->addr = addr;
++
++ return 0;
+ }
+
+ struct efd {
+@@ -549,7 +561,7 @@ static void ses_enclosure_data_process(s
+ /* skip past overall descriptor */
+ desc_ptr += len + 4;
+ }
+- if (ses_dev->page10)
++ if (ses_dev->page10 && ses_dev->page10_len > 9)
+ addl_desc_ptr = ses_dev->page10 + 8;
+ type_ptr = ses_dev->page1_types;
+ components = 0;
+@@ -557,6 +569,7 @@ static void ses_enclosure_data_process(s
+ for (j = 0; j < type_ptr[1]; j++) {
+ char *name = NULL;
+ struct enclosure_component *ecomp;
++ int max_desc_len;
+
+ if (desc_ptr) {
+ if (desc_ptr >= buf + page7_len) {
+@@ -583,10 +596,14 @@ static void ses_enclosure_data_process(s
+ ecomp = &edev->component[components++];
+
+ if (!IS_ERR(ecomp)) {
+- if (addl_desc_ptr)
+- ses_process_descriptor(
+- ecomp,
+- addl_desc_ptr);
++ if (addl_desc_ptr) {
++ max_desc_len = ses_dev->page10_len -
++ (addl_desc_ptr - ses_dev->page10);
++ if (ses_process_descriptor(ecomp,
++ addl_desc_ptr,
++ max_desc_len))
++ addl_desc_ptr = NULL;
++ }
+ if (create)
+ enclosure_component_register(
+ ecomp);
--- /dev/null
+From 801ab13d50cf3d26170ee073ea8bb4eececb76ab Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:50 +0100
+Subject: scsi: ses: Fix possible desc_ptr out-of-bounds accesses
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit 801ab13d50cf3d26170ee073ea8bb4eececb76ab upstream.
+
+Sanitize possible desc_ptr out-of-bounds accesses in
+ses_enclosure_data_process().
+
+Link: https://lore.kernel.org/r/20230202162451.15346-4-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -572,15 +572,19 @@ static void ses_enclosure_data_process(s
+ int max_desc_len;
+
+ if (desc_ptr) {
+- if (desc_ptr >= buf + page7_len) {
++ if (desc_ptr + 3 >= buf + page7_len) {
+ desc_ptr = NULL;
+ } else {
+ len = (desc_ptr[2] << 8) + desc_ptr[3];
+ desc_ptr += 4;
+- /* Add trailing zero - pushes into
+- * reserved space */
+- desc_ptr[len] = '\0';
+- name = desc_ptr;
++ if (desc_ptr + len > buf + page7_len)
++ desc_ptr = NULL;
++ else {
++ /* Add trailing zero - pushes into
++ * reserved space */
++ desc_ptr[len] = '\0';
++ name = desc_ptr;
++ }
+ }
+ }
+ if (type_ptr[0] == ENCLOSURE_COMPONENT_DEVICE ||
--- /dev/null
+From 9b4f5028e493cb353a5c8f5c45073eeea0303abd Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:48 +0100
+Subject: scsi: ses: Fix slab-out-of-bounds in ses_enclosure_data_process()
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit 9b4f5028e493cb353a5c8f5c45073eeea0303abd upstream.
+
+A fix for:
+
+BUG: KASAN: slab-out-of-bounds in ses_enclosure_data_process+0x949/0xe30 [ses]
+Read of size 1 at addr ffff88a1b043a451 by task systemd-udevd/3271
+
+Checking after (and before in next loop) addl_desc_ptr[1] is sufficient, we
+expect the size to be sanitized before first access to addl_desc_ptr[1].
+Make sure we don't walk beyond end of page.
+
+Link: https://lore.kernel.org/r/20230202162451.15346-2-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -603,9 +603,11 @@ static void ses_enclosure_data_process(s
+ /* these elements are optional */
+ type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_TARGET_PORT ||
+ type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT ||
+- type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS))
++ type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS)) {
+ addl_desc_ptr += addl_desc_ptr[1] + 2;
+-
++ if (addl_desc_ptr + 1 >= ses_dev->page10 + ses_dev->page10_len)
++ addl_desc_ptr = NULL;
++ }
+ }
+ }
+ kfree(buf);
--- /dev/null
+From 578797f0c8cbc2e3ec5fc0dab87087b4c7073686 Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:51 +0100
+Subject: scsi: ses: Fix slab-out-of-bounds in ses_intf_remove()
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit 578797f0c8cbc2e3ec5fc0dab87087b4c7073686 upstream.
+
+A fix for:
+
+BUG: KASAN: slab-out-of-bounds in ses_intf_remove+0x23f/0x270 [ses]
+Read of size 8 at addr ffff88a10d32e5d8 by task rmmod/12013
+
+When edev->components is zero, accessing edev->component[0] members is
+wrong.
+
+Link: https://lore.kernel.org/r/20230202162451.15346-5-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -856,7 +856,8 @@ static void ses_intf_remove_enclosure(st
+ kfree(ses_dev->page2);
+ kfree(ses_dev);
+
+- kfree(edev->component[0].scratch);
++ if (edev->components)
++ kfree(edev->component[0].scratch);
+
+ put_device(&edev->edev);
+ enclosure_unregister(edev);
scsi-qla2xxx-remove-unintended-flag-clearing.patch
scsi-qla2xxx-fix-erroneous-link-down.patch
scsi-qla2xxx-remove-increment-of-interface-err-cnt.patch
+scsi-ses-don-t-attach-if-enclosure-has-no-components.patch
+scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch
+scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch
+scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch
+scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch
+risc-v-add-a-spin_shadow_stack-declaration.patch
+riscv-avoid-enabling-interrupts-in-die.patch
+riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch
+riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch
+riscv-ftrace-fixup-panic-by-disabling-preemption.patch
+riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch
+riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch
+riscv-ftrace-reduce-the-detour-code-size-to-half.patch
+mips-dts-ci20-fix-otg-power-gpio.patch
+pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch
+pci-unify-delay-handling-for-reset-and-resume.patch
+pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch
+pci-avoid-flr-for-amd-fch-ahci-adapters.patch
+pci-dpc-await-readiness-of-secondary-bus-after-reset.patch
+bus-mhi-ep-only-send-enotconn-status-if-client-driver-is-available.patch
+bus-mhi-ep-move-chan-lock-to-the-start-of-processing-queued-ch-ring.patch
+bus-mhi-ep-save-channel-state-locally-during-suspend-and-resume.patch
+iommufd-make-sure-to-zero-vfio_iommu_type1_info-before-copying-to-user.patch
+iommufd-do-not-add-the-same-hwpt-to-the-ioas-hwpt_list-twice.patch
+iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch
+iommu-vt-d-fix-pasid-directory-pointer-coherency.patch
+vfio-type1-exclude-mdevs-from-vfio_update_vaddr.patch
+vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch
+vfio-type1-track-locked_vm-per-dma.patch
+vfio-type1-restore-locked_vm.patch
+drm-amd-fix-initialization-for-nbio-7.5.1.patch
+drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch
+drm-radeon-fix-edp-for-single-display-imac11-2.patch
+drm-i915-don-t-use-stolen-memory-for-ring-buffers-with-llc.patch
+drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch
+drm-gud-fix-ubsan-warning.patch
--- /dev/null
+From ef3a3f6a294ba65fd906a291553935881796f8a5 Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:03 -0800
+Subject: vfio/type1: exclude mdevs from VFIO_UPDATE_VADDR
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit ef3a3f6a294ba65fd906a291553935881796f8a5 upstream.
+
+Disable the VFIO_UPDATE_VADDR capability if mediated devices are present.
+Their kernel threads could be blocked indefinitely by a misbehaving
+userland while trying to pin/unpin pages while vaddrs are being updated.
+
+Do not allow groups to be added to the container while vaddr's are invalid,
+so we never need to block user threads from pinning, and can delete the
+vaddr-waiting code in a subsequent patch.
+
+Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-2-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c | 44 ++++++++++++++++++++++++++++++++++++++--
+ include/uapi/linux/vfio.h | 15 ++++++++-----
+ 2 files changed, 51 insertions(+), 8 deletions(-)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -861,6 +861,12 @@ static int vfio_iommu_type1_pin_pages(vo
+
+ mutex_lock(&iommu->lock);
+
++ if (WARN_ONCE(iommu->vaddr_invalid_count,
++ "vfio_pin_pages not allowed with VFIO_UPDATE_VADDR\n")) {
++ ret = -EBUSY;
++ goto pin_done;
++ }
++
+ /*
+ * Wait for all necessary vaddr's to be valid so they can be used in
+ * the main loop without dropping the lock, to avoid racing vs unmap.
+@@ -1343,6 +1349,12 @@ static int vfio_dma_do_unmap(struct vfio
+
+ mutex_lock(&iommu->lock);
+
++ /* Cannot update vaddr if mdev is present. */
++ if (invalidate_vaddr && !list_empty(&iommu->emulated_iommu_groups)) {
++ ret = -EBUSY;
++ goto unlock;
++ }
++
+ pgshift = __ffs(iommu->pgsize_bitmap);
+ pgsize = (size_t)1 << pgshift;
+
+@@ -2194,11 +2206,16 @@ static int vfio_iommu_type1_attach_group
+ struct iommu_domain_geometry *geo;
+ LIST_HEAD(iova_copy);
+ LIST_HEAD(group_resv_regions);
+- int ret = -EINVAL;
++ int ret = -EBUSY;
+
+ mutex_lock(&iommu->lock);
+
++ /* Attach could require pinning, so disallow while vaddr is invalid. */
++ if (iommu->vaddr_invalid_count)
++ goto out_unlock;
++
+ /* Check for duplicates */
++ ret = -EINVAL;
+ if (vfio_iommu_find_iommu_group(iommu, iommu_group))
+ goto out_unlock;
+
+@@ -2669,6 +2686,16 @@ static int vfio_domains_have_enforce_cac
+ return ret;
+ }
+
++static bool vfio_iommu_has_emulated(struct vfio_iommu *iommu)
++{
++ bool ret;
++
++ mutex_lock(&iommu->lock);
++ ret = !list_empty(&iommu->emulated_iommu_groups);
++ mutex_unlock(&iommu->lock);
++ return ret;
++}
++
+ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu,
+ unsigned long arg)
+ {
+@@ -2677,8 +2704,13 @@ static int vfio_iommu_type1_check_extens
+ case VFIO_TYPE1v2_IOMMU:
+ case VFIO_TYPE1_NESTING_IOMMU:
+ case VFIO_UNMAP_ALL:
+- case VFIO_UPDATE_VADDR:
+ return 1;
++ case VFIO_UPDATE_VADDR:
++ /*
++ * Disable this feature if mdevs are present. They cannot
++ * safely pin/unpin/rw while vaddrs are being updated.
++ */
++ return iommu && !vfio_iommu_has_emulated(iommu);
+ case VFIO_DMA_CC_IOMMU:
+ if (!iommu)
+ return 0;
+@@ -3147,6 +3179,13 @@ static int vfio_iommu_type1_dma_rw(void
+ size_t done;
+
+ mutex_lock(&iommu->lock);
++
++ if (WARN_ONCE(iommu->vaddr_invalid_count,
++ "vfio_dma_rw not allowed with VFIO_UPDATE_VADDR\n")) {
++ ret = -EBUSY;
++ goto out;
++ }
++
+ while (count > 0) {
+ ret = vfio_iommu_type1_dma_rw_chunk(iommu, user_iova, data,
+ count, write, &done);
+@@ -3158,6 +3197,7 @@ static int vfio_iommu_type1_dma_rw(void
+ user_iova += done;
+ }
+
++out:
+ mutex_unlock(&iommu->lock);
+ return ret;
+ }
+--- a/include/uapi/linux/vfio.h
++++ b/include/uapi/linux/vfio.h
+@@ -49,7 +49,11 @@
+ /* Supports VFIO_DMA_UNMAP_FLAG_ALL */
+ #define VFIO_UNMAP_ALL 9
+
+-/* Supports the vaddr flag for DMA map and unmap */
++/*
++ * Supports the vaddr flag for DMA map and unmap. Not supported for mediated
++ * devices, so this capability is subject to change as groups are added or
++ * removed.
++ */
+ #define VFIO_UPDATE_VADDR 10
+
+ /*
+@@ -1343,8 +1347,7 @@ struct vfio_iommu_type1_info_dma_avail {
+ * Map process virtual addresses to IO virtual addresses using the
+ * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+ *
+- * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and
+- * unblock translation of host virtual addresses in the iova range. The vaddr
++ * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova. The vaddr
+ * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To
+ * maintain memory consistency within the user application, the updated vaddr
+ * must address the same memory object as originally mapped. Failure to do so
+@@ -1395,9 +1398,9 @@ struct vfio_bitmap {
+ * must be 0. This cannot be combined with the get-dirty-bitmap flag.
+ *
+ * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host
+- * virtual addresses in the iova range. Tasks that attempt to translate an
+- * iova's vaddr will block. DMA to already-mapped pages continues. This
+- * cannot be combined with the get-dirty-bitmap flag.
++ * virtual addresses in the iova range. DMA to already-mapped pages continues.
++ * Groups may not be added to the container while any addresses are invalid.
++ * This cannot be combined with the get-dirty-bitmap flag.
+ */
+ struct vfio_iommu_type1_dma_unmap {
+ __u32 argsz;
--- /dev/null
+From 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:04 -0800
+Subject: vfio/type1: prevent underflow of locked_vm via exec()
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 upstream.
+
+When a vfio container is preserved across exec, the task does not change,
+but it gets a new mm with locked_vm=0, and loses the count from existing
+dma mappings. If the user later unmaps a dma mapping, locked_vm underflows
+to a large unsigned value, and a subsequent dma map request fails with
+ENOMEM in __account_locked_vm.
+
+To avoid underflow, grab and save the mm at the time a dma is mapped.
+Use that mm when adjusting locked_vm, rather than re-acquiring the saved
+task's mm, which may have changed. If the saved mm is dead, do nothing.
+
+locked_vm is incremented for existing mappings in a subsequent patch.
+
+Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-3-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c | 41 +++++++++++++---------------------------
+ 1 file changed, 14 insertions(+), 27 deletions(-)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -100,6 +100,7 @@ struct vfio_dma {
+ struct task_struct *task;
+ struct rb_root pfn_list; /* Ex-user pinned pfn list */
+ unsigned long *bitmap;
++ struct mm_struct *mm;
+ };
+
+ struct vfio_batch {
+@@ -420,8 +421,8 @@ static int vfio_lock_acct(struct vfio_dm
+ if (!npage)
+ return 0;
+
+- mm = async ? get_task_mm(dma->task) : dma->task->mm;
+- if (!mm)
++ mm = dma->mm;
++ if (async && !mmget_not_zero(mm))
+ return -ESRCH; /* process exited */
+
+ ret = mmap_write_lock_killable(mm);
+@@ -794,8 +795,8 @@ static int vfio_pin_page_external(struct
+ struct mm_struct *mm;
+ int ret;
+
+- mm = get_task_mm(dma->task);
+- if (!mm)
++ mm = dma->mm;
++ if (!mmget_not_zero(mm))
+ return -ENODEV;
+
+ ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
+@@ -805,7 +806,7 @@ static int vfio_pin_page_external(struct
+ ret = 0;
+
+ if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
+- ret = vfio_lock_acct(dma, 1, true);
++ ret = vfio_lock_acct(dma, 1, false);
+ if (ret) {
+ put_pfn(*pfn_base, dma->prot);
+ if (ret == -ENOMEM)
+@@ -1180,6 +1181,7 @@ static void vfio_remove_dma(struct vfio_
+ vfio_unmap_unpin(iommu, dma, true);
+ vfio_unlink_dma(iommu, dma);
+ put_task_struct(dma->task);
++ mmdrop(dma->mm);
+ vfio_dma_bitmap_free(dma);
+ if (dma->vaddr_invalid) {
+ iommu->vaddr_invalid_count--;
+@@ -1664,29 +1666,15 @@ static int vfio_dma_do_map(struct vfio_i
+ * against the locked memory limit and we need to be able to do both
+ * outside of this call path as pinning can be asynchronous via the
+ * external interfaces for mdev devices. RLIMIT_MEMLOCK requires a
+- * task_struct and VM locked pages requires an mm_struct, however
+- * holding an indefinite mm reference is not recommended, therefore we
+- * only hold a reference to a task. We could hold a reference to
+- * current, however QEMU uses this call path through vCPU threads,
+- * which can be killed resulting in a NULL mm and failure in the unmap
+- * path when called via a different thread. Avoid this problem by
+- * using the group_leader as threads within the same group require
+- * both CLONE_THREAD and CLONE_VM and will therefore use the same
+- * mm_struct.
+- *
+- * Previously we also used the task for testing CAP_IPC_LOCK at the
+- * time of pinning and accounting, however has_capability() makes use
+- * of real_cred, a copy-on-write field, so we can't guarantee that it
+- * matches group_leader, or in fact that it might not change by the
+- * time it's evaluated. If a process were to call MAP_DMA with
+- * CAP_IPC_LOCK but later drop it, it doesn't make sense that they
+- * possibly see different results for an iommu_mapped vfio_dma vs
+- * externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the
+- * time of calling MAP_DMA.
++ * task_struct. Save the group_leader so that all DMA tracking uses
++ * the same task, to make debugging easier. VM locked pages requires
++ * an mm_struct, so grab the mm in case the task dies.
+ */
+ get_task_struct(current->group_leader);
+ dma->task = current->group_leader;
+ dma->lock_cap = capable(CAP_IPC_LOCK);
++ dma->mm = current->mm;
++ mmgrab(dma->mm);
+
+ dma->pfn_list = RB_ROOT;
+
+@@ -3131,9 +3119,8 @@ static int vfio_iommu_type1_dma_rw_chunk
+ !(dma->prot & IOMMU_READ))
+ return -EPERM;
+
+- mm = get_task_mm(dma->task);
+-
+- if (!mm)
++ mm = dma->mm;
++ if (!mmget_not_zero(mm))
+ return -EPERM;
+
+ if (kthread)
--- /dev/null
+From 90fdd158a695d70403163f9a0e4efc5b20f3fd3e Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:06 -0800
+Subject: vfio/type1: restore locked_vm
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit 90fdd158a695d70403163f9a0e4efc5b20f3fd3e upstream.
+
+When a vfio container is preserved across exec or fork-exec, the new
+task's mm has a locked_vm count of 0. After a dma vaddr is updated using
+VFIO_DMA_MAP_FLAG_VADDR, locked_vm remains 0, and the pinned memory does
+not count against the task's RLIMIT_MEMLOCK.
+
+To restore the correct locked_vm count, when VFIO_DMA_MAP_FLAG_VADDR is
+used and the dma's mm has changed, add the dma's locked_vm count to
+the new mm->locked_vm, subject to the rlimit, and subtract it from the
+old mm->locked_vm.
+
+Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-5-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c | 35 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -1591,6 +1591,38 @@ static bool vfio_iommu_iova_dma_valid(st
+ return list_empty(iova);
+ }
+
++static int vfio_change_dma_owner(struct vfio_dma *dma)
++{
++ struct task_struct *task = current->group_leader;
++ struct mm_struct *mm = current->mm;
++ long npage = dma->locked_vm;
++ bool lock_cap;
++ int ret;
++
++ if (mm == dma->mm)
++ return 0;
++
++ lock_cap = capable(CAP_IPC_LOCK);
++ ret = mm_lock_acct(task, mm, lock_cap, npage);
++ if (ret)
++ return ret;
++
++ if (mmget_not_zero(dma->mm)) {
++ mm_lock_acct(dma->task, dma->mm, dma->lock_cap, -npage);
++ mmput(dma->mm);
++ }
++
++ if (dma->task != task) {
++ put_task_struct(dma->task);
++ dma->task = get_task_struct(task);
++ }
++ mmdrop(dma->mm);
++ dma->mm = mm;
++ mmgrab(dma->mm);
++ dma->lock_cap = lock_cap;
++ return 0;
++}
++
+ static int vfio_dma_do_map(struct vfio_iommu *iommu,
+ struct vfio_iommu_type1_dma_map *map)
+ {
+@@ -1640,6 +1672,9 @@ static int vfio_dma_do_map(struct vfio_i
+ dma->size != size) {
+ ret = -EINVAL;
+ } else {
++ ret = vfio_change_dma_owner(dma);
++ if (ret)
++ goto out_unlock;
+ dma->vaddr = vaddr;
+ dma->vaddr_invalid = false;
+ iommu->vaddr_invalid_count--;
--- /dev/null
+From 18e292705ba21cc9b3227b9ad5b1c28973605ee5 Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:05 -0800
+Subject: vfio/type1: track locked_vm per dma
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit 18e292705ba21cc9b3227b9ad5b1c28973605ee5 upstream.
+
+Track locked_vm per dma struct, and create a new subroutine, both for use
+in a subsequent patch. No functional change.
+
+Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-4-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c | 23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -101,6 +101,7 @@ struct vfio_dma {
+ struct rb_root pfn_list; /* Ex-user pinned pfn list */
+ unsigned long *bitmap;
+ struct mm_struct *mm;
++ size_t locked_vm;
+ };
+
+ struct vfio_batch {
+@@ -413,6 +414,19 @@ static int vfio_iova_put_vfio_pfn(struct
+ return ret;
+ }
+
++static int mm_lock_acct(struct task_struct *task, struct mm_struct *mm,
++ bool lock_cap, long npage)
++{
++ int ret = mmap_write_lock_killable(mm);
++
++ if (ret)
++ return ret;
++
++ ret = __account_locked_vm(mm, abs(npage), npage > 0, task, lock_cap);
++ mmap_write_unlock(mm);
++ return ret;
++}
++
+ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
+ {
+ struct mm_struct *mm;
+@@ -425,12 +439,9 @@ static int vfio_lock_acct(struct vfio_dm
+ if (async && !mmget_not_zero(mm))
+ return -ESRCH; /* process exited */
+
+- ret = mmap_write_lock_killable(mm);
+- if (!ret) {
+- ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task,
+- dma->lock_cap);
+- mmap_write_unlock(mm);
+- }
++ ret = mm_lock_acct(dma->task, mm, dma->lock_cap, npage);
++ if (!ret)
++ dma->locked_vm += npage;
+
+ if (async)
+ mmput(mm);