6.2-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 7 Mar 2023 16:20:53 +0000 (17:20 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 7 Mar 2023 16:20:53 +0000 (17:20 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 7 Mar 2023 16:20:53 +0000 (17:20 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 7 Mar 2023 16:20:53 +0000 (17:20 +0100)
diff --git a/queue-6.2/bus-mhi-ep-move-chan-lock-to-the-start-of-processing-queued-ch-ring.patch b/queue-6.2/bus-mhi-ep-move-chan-lock-to-the-start-of-processing-queued-ch-ring.patch

new file mode 100644 (file)

index 0000000..6221f8f
--- /dev/null
+++ b/queue-6.2/bus-mhi-ep-move-chan-lock-to-the-start-of-processing-queued-ch-ring.patch
@@ -0,0 +1,73 @@
+From 8d6a1fea53864cd9545741f48f4ae4df804db557 Mon Sep 17 00:00:00 2001
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Date: Wed, 28 Dec 2022 21:47:03 +0530
+Subject: bus: mhi: ep: Move chan->lock to the start of processing queued ch ring
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+commit 8d6a1fea53864cd9545741f48f4ae4df804db557 upstream.
+
+There is a good chance that while the channel ring gets processed, the STOP
+or RESET command for the channel might be received from the MHI host. In
+those cases, the entire channel ring processing needs to be protected by
+chan->lock to prevent the race where the corresponding channel ring might
+be reset.
+
+While at it, let's also add a sanity check to make sure that the ring is
+started before processing it. Because, if the STOP/RESET command gets
+processed while mhi_ep_ch_ring_worker() waited for chan->lock, the ring
+would've been reset.
+
+Cc: <stable@vger.kernel.org> # 5.19
+Fixes: 03c0bb8ec983 ("bus: mhi: ep: Add support for processing channel rings")
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
+Link: https://lore.kernel.org/r/20221228161704.255268-6-manivannan.sadhasivam@linaro.org
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bus/mhi/ep/main.c |   17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+--- a/drivers/bus/mhi/ep/main.c
++++ b/drivers/bus/mhi/ep/main.c
+@@ -723,24 +723,37 @@ static void mhi_ep_ch_ring_worker(struct
+               list_del(&itr->node);
+               ring = itr->ring;
+ 
++              chan = &mhi_cntrl->mhi_chan[ring->ch_id];
++              mutex_lock(&chan->lock);
++
++              /*
++               * The ring could've stopped while we waited to grab the (chan->lock), so do
++               * a sanity check before going further.
++               */
++              if (!ring->started) {
++                      mutex_unlock(&chan->lock);
++                      kfree(itr);
++                      continue;
++              }
++
+               /* Update the write offset for the ring */
+               ret = mhi_ep_update_wr_offset(ring);
+               if (ret) {
+                       dev_err(dev, "Error updating write offset for ring\n");
++                      mutex_unlock(&chan->lock);
+                       kfree(itr);
+                       continue;
+               }
+ 
+               /* Sanity check to make sure there are elements in the ring */
+               if (ring->rd_offset == ring->wr_offset) {
++                      mutex_unlock(&chan->lock);
+                       kfree(itr);
+                       continue;
+               }
+ 
+               el = &ring->ring_cache[ring->rd_offset];
+-              chan = &mhi_cntrl->mhi_chan[ring->ch_id];
+ 
+-              mutex_lock(&chan->lock);
+               dev_dbg(dev, "Processing the ring for channel (%u)\n", ring->ch_id);
+               ret = mhi_ep_process_ch_ring(ring, el);
+               if (ret) {
diff --git a/queue-6.2/bus-mhi-ep-only-send-enotconn-status-if-client-driver-is-available.patch b/queue-6.2/bus-mhi-ep-only-send-enotconn-status-if-client-driver-is-available.patch

new file mode 100644 (file)

index 0000000..f9e6974
--- /dev/null
+++ b/queue-6.2/bus-mhi-ep-only-send-enotconn-status-if-client-driver-is-available.patch
@@ -0,0 +1,56 @@
+From e6cebcc27519dcf1652e604c73b9fd4f416987c0 Mon Sep 17 00:00:00 2001
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Date: Wed, 28 Dec 2022 21:47:01 +0530
+Subject: bus: mhi: ep: Only send -ENOTCONN status if client driver is available
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+commit e6cebcc27519dcf1652e604c73b9fd4f416987c0 upstream.
+
+For the STOP and RESET commands, only send the channel disconnect status
+-ENOTCONN if client driver is available. Otherwise, it will result in
+null pointer dereference.
+
+Cc: <stable@vger.kernel.org> # 5.19
+Fixes: e827569062a8 ("bus: mhi: ep: Add support for processing command rings")
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
+Link: https://lore.kernel.org/r/20221228161704.255268-4-manivannan.sadhasivam@linaro.org
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bus/mhi/ep/main.c |   16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/drivers/bus/mhi/ep/main.c
++++ b/drivers/bus/mhi/ep/main.c
+@@ -196,9 +196,11 @@ static int mhi_ep_process_cmd_ring(struc
+               mhi_ep_mmio_disable_chdb(mhi_cntrl, ch_id);
+ 
+               /* Send channel disconnect status to client drivers */
+-              result.transaction_status = -ENOTCONN;
+-              result.bytes_xferd = 0;
+-              mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++              if (mhi_chan->xfer_cb) {
++                      result.transaction_status = -ENOTCONN;
++                      result.bytes_xferd = 0;
++                      mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++              }
+ 
+               /* Set channel state to STOP */
+               mhi_chan->state = MHI_CH_STATE_STOP;
+@@ -228,9 +230,11 @@ static int mhi_ep_process_cmd_ring(struc
+               mhi_ep_ring_reset(mhi_cntrl, ch_ring);
+ 
+               /* Send channel disconnect status to client driver */
+-              result.transaction_status = -ENOTCONN;
+-              result.bytes_xferd = 0;
+-              mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++              if (mhi_chan->xfer_cb) {
++                      result.transaction_status = -ENOTCONN;
++                      result.bytes_xferd = 0;
++                      mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++              }
+ 
+               /* Set channel state to DISABLED */
+               mhi_chan->state = MHI_CH_STATE_DISABLED;
diff --git a/queue-6.2/bus-mhi-ep-save-channel-state-locally-during-suspend-and-resume.patch b/queue-6.2/bus-mhi-ep-save-channel-state-locally-during-suspend-and-resume.patch

new file mode 100644 (file)

index 0000000..cf380e3
--- /dev/null
+++ b/queue-6.2/bus-mhi-ep-save-channel-state-locally-during-suspend-and-resume.patch
@@ -0,0 +1,44 @@
+From 8a1c24bb908f9ecbc4be0fea014df67d43161551 Mon Sep 17 00:00:00 2001
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Date: Wed, 28 Dec 2022 21:47:04 +0530
+Subject: bus: mhi: ep: Save channel state locally during suspend and resume
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+commit 8a1c24bb908f9ecbc4be0fea014df67d43161551 upstream.
+
+During suspend and resume, the channel state needs to be saved locally.
+Otherwise, the endpoint may access the channels while they were being
+suspended and causing access violations.
+
+Fix it by saving the channel state locally during suspend and resume.
+
+Cc: <stable@vger.kernel.org> # 5.19
+Fixes: e4b7b5f0f30a ("bus: mhi: ep: Add support for suspending and resuming channels")
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com)
+Link: https://lore.kernel.org/r/20221228161704.255268-7-manivannan.sadhasivam@linaro.org
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bus/mhi/ep/main.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/bus/mhi/ep/main.c
++++ b/drivers/bus/mhi/ep/main.c
+@@ -1136,6 +1136,7 @@ void mhi_ep_suspend_channels(struct mhi_
+ 
+               dev_dbg(&mhi_chan->mhi_dev->dev, "Suspending channel\n");
+               /* Set channel state to SUSPENDED */
++              mhi_chan->state = MHI_CH_STATE_SUSPENDED;
+               tmp &= ~CHAN_CTX_CHSTATE_MASK;
+               tmp |= FIELD_PREP(CHAN_CTX_CHSTATE_MASK, MHI_CH_STATE_SUSPENDED);
+               mhi_cntrl->ch_ctx_cache[i].chcfg = cpu_to_le32(tmp);
+@@ -1165,6 +1166,7 @@ void mhi_ep_resume_channels(struct mhi_e
+ 
+               dev_dbg(&mhi_chan->mhi_dev->dev, "Resuming channel\n");
+               /* Set channel state to RUNNING */
++              mhi_chan->state = MHI_CH_STATE_RUNNING;
+               tmp &= ~CHAN_CTX_CHSTATE_MASK;
+               tmp |= FIELD_PREP(CHAN_CTX_CHSTATE_MASK, MHI_CH_STATE_RUNNING);
+               mhi_cntrl->ch_ctx_cache[i].chcfg = cpu_to_le32(tmp);
diff --git a/queue-6.2/drm-amd-fix-initialization-for-nbio-7.5.1.patch b/queue-6.2/drm-amd-fix-initialization-for-nbio-7.5.1.patch

new file mode 100644 (file)

index 0000000..bbedbe8
--- /dev/null
+++ b/queue-6.2/drm-amd-fix-initialization-for-nbio-7.5.1.patch
@@ -0,0 +1,38 @@
+From 65a24000808f70ac69bd2a96381fa0c7341f20c0 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Sun, 19 Feb 2023 23:04:04 -0600
+Subject: drm/amd: Fix initialization for nbio 7.5.1
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+commit 65a24000808f70ac69bd2a96381fa0c7341f20c0 upstream.
+
+A mistake has been made in the BIOS for some ASICs with NBIO 7.5.1
+where some NBIO registers aren't properly setup.
+
+Ensure that they're set during initialization.
+
+Tested-by: Richard Gong <richard.gong@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.1.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
+@@ -382,6 +382,11 @@ static void nbio_v7_2_init_registers(str
+               if (def != data)
+                       WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3), data);
+               break;
++      case IP_VERSION(7, 5, 1):
++              data = RREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2);
++              data &= ~RCC_DEV2_EPF0_STRAP2__STRAP_NO_SOFT_RESET_DEV2_F0_MASK;
++              WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data);
++              fallthrough;
+       default:
+               def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL));
+               data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL,
diff --git a/queue-6.2/drm-gud-fix-ubsan-warning.patch b/queue-6.2/drm-gud-fix-ubsan-warning.patch

new file mode 100644 (file)

index 0000000..3c6e7d6
--- /dev/null
+++ b/queue-6.2/drm-gud-fix-ubsan-warning.patch
@@ -0,0 +1,79 @@
+From 951df98024f7272f85df5044eca7374f5b5b24ef Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= <noralf@tronnes.org>
+Date: Wed, 30 Nov 2022 20:26:49 +0100
+Subject: drm/gud: Fix UBSAN warning
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Noralf Trønnes <noralf@tronnes.org>
+
+commit 951df98024f7272f85df5044eca7374f5b5b24ef upstream.
+
+UBSAN complains about invalid value for bool:
+
+[  101.165172] [drm] Initialized gud 1.0.0 20200422 for 2-3.2:1.0 on minor 1
+[  101.213360] gud 2-3.2:1.0: [drm] fb1: guddrmfb frame buffer device
+[  101.213426] usbcore: registered new interface driver gud
+[  101.989431] ================================================================================
+[  101.989441] UBSAN: invalid-load in linux/include/linux/iosys-map.h:253:9
+[  101.989447] load of value 121 is not a valid value for type '_Bool'
+[  101.989451] CPU: 1 PID: 455 Comm: kworker/1:6 Not tainted 5.18.0-rc5-gud-5.18-rc5 #3
+[  101.989456] Hardware name: Hewlett-Packard HP EliteBook 820 G1/1991, BIOS L71 Ver. 01.44 04/12/2018
+[  101.989459] Workqueue: events_long gud_flush_work [gud]
+[  101.989471] Call Trace:
+[  101.989474]  <TASK>
+[  101.989479]  dump_stack_lvl+0x49/0x5f
+[  101.989488]  dump_stack+0x10/0x12
+[  101.989493]  ubsan_epilogue+0x9/0x3b
+[  101.989498]  __ubsan_handle_load_invalid_value.cold+0x44/0x49
+[  101.989504]  dma_buf_vmap.cold+0x38/0x3d
+[  101.989511]  ? find_busiest_group+0x48/0x300
+[  101.989520]  drm_gem_shmem_vmap+0x76/0x1b0 [drm_shmem_helper]
+[  101.989528]  drm_gem_shmem_object_vmap+0x9/0xb [drm_shmem_helper]
+[  101.989535]  drm_gem_vmap+0x26/0x60 [drm]
+[  101.989594]  drm_gem_fb_vmap+0x47/0x150 [drm_kms_helper]
+[  101.989630]  gud_prep_flush+0xc1/0x710 [gud]
+[  101.989639]  ? _raw_spin_lock+0x17/0x40
+[  101.989648]  gud_flush_work+0x1e0/0x430 [gud]
+[  101.989653]  ? __switch_to+0x11d/0x470
+[  101.989664]  process_one_work+0x21f/0x3f0
+[  101.989673]  worker_thread+0x200/0x3e0
+[  101.989679]  ? rescuer_thread+0x390/0x390
+[  101.989684]  kthread+0xfd/0x130
+[  101.989690]  ? kthread_complete_and_exit+0x20/0x20
+[  101.989696]  ret_from_fork+0x22/0x30
+[  101.989706]  </TASK>
+[  101.989708] ================================================================================
+
+The source of this warning is in iosys_map_clear() called from
+dma_buf_vmap(). It conditionally sets values based on map->is_iomem. The
+iosys_map variables are allocated uninitialized on the stack leading to
+->is_iomem having all kinds of values and not only 0/1.
+
+Fix this by zeroing the iosys_map variables.
+
+Fixes: 40e1a70b4aed ("drm: Add GUD USB Display driver")
+Cc: <stable@vger.kernel.org> # v5.18+
+Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20221122-gud-shadow-plane-v2-1-435037990a83@tronnes.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/gud/gud_pipe.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/gud/gud_pipe.c
++++ b/drivers/gpu/drm/gud/gud_pipe.c
+@@ -157,8 +157,8 @@ static int gud_prep_flush(struct gud_dev
+ {
+       struct dma_buf_attachment *import_attach = fb->obj[0]->import_attach;
+       u8 compression = gdrm->compression;
+-      struct iosys_map map[DRM_FORMAT_MAX_PLANES];
+-      struct iosys_map map_data[DRM_FORMAT_MAX_PLANES];
++      struct iosys_map map[DRM_FORMAT_MAX_PLANES] = { };
++      struct iosys_map map_data[DRM_FORMAT_MAX_PLANES] = { };
+       struct iosys_map dst;
+       void *vaddr, *buf;
+       size_t pitch, len;
diff --git a/queue-6.2/drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch b/queue-6.2/drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch

new file mode 100644 (file)

index 0000000..5fbc66c
--- /dev/null
+++ b/queue-6.2/drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch
@@ -0,0 +1,56 @@
+From 85636167e3206c3fbd52254fc432991cc4e90194 Mon Sep 17 00:00:00 2001
+From: John Harrison <John.C.Harrison@Intel.com>
+Date: Wed, 15 Feb 2023 17:11:01 -0800
+Subject: drm/i915: Don't use BAR mappings for ring buffers with LLC
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: John Harrison <John.C.Harrison@Intel.com>
+
+commit 85636167e3206c3fbd52254fc432991cc4e90194 upstream.
+
+Direction from hardware is that ring buffers should never be mapped
+via the BAR on systems with LLC. There are too many caching pitfalls
+due to the way BAR accesses are routed. So it is safest to just not
+use it.
+
+Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
+Fixes: 9d80841ea4c9 ("drm/i915: Allow ringbuffers to be bound anywhere")
+Cc: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Jani Nikula <jani.nikula@linux.intel.com>
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
+Cc: intel-gfx@lists.freedesktop.org
+Cc: <stable@vger.kernel.org> # v4.9+
+Tested-by: Jouni Högander <jouni.hogander@intel.com>
+Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230216011101.1909009-3-John.C.Harrison@Intel.com
+(cherry picked from commit 65c08339db1ada87afd6cfe7db8e60bb4851d919)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gt/intel_ring.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/i915/gt/intel_ring.c
++++ b/drivers/gpu/drm/i915/gt/intel_ring.c
+@@ -53,7 +53,7 @@ int intel_ring_pin(struct intel_ring *ri
+       if (unlikely(ret))
+               goto err_unpin;
+ 
+-      if (i915_vma_is_map_and_fenceable(vma)) {
++      if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) {
+               addr = (void __force *)i915_vma_pin_iomap(vma);
+       } else {
+               int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false);
+@@ -98,7 +98,7 @@ void intel_ring_unpin(struct intel_ring
+               return;
+ 
+       i915_vma_unset_ggtt_write(vma);
+-      if (i915_vma_is_map_and_fenceable(vma))
++      if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915))
+               i915_vma_unpin_iomap(vma);
+       else
+               i915_gem_object_unpin_map(vma->obj);
diff --git a/queue-6.2/drm-i915-don-t-use-stolen-memory-for-ring-buffers-with-llc.patch b/queue-6.2/drm-i915-don-t-use-stolen-memory-for-ring-buffers-with-llc.patch

new file mode 100644 (file)

index 0000000..8a109f7
--- /dev/null
+++ b/queue-6.2/drm-i915-don-t-use-stolen-memory-for-ring-buffers-with-llc.patch
@@ -0,0 +1,47 @@
+From 690e0ec8e63da9a29b39fedc6ed5da09c7c82651 Mon Sep 17 00:00:00 2001
+From: John Harrison <John.C.Harrison@Intel.com>
+Date: Wed, 15 Feb 2023 17:11:00 -0800
+Subject: drm/i915: Don't use stolen memory for ring buffers with LLC
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: John Harrison <John.C.Harrison@Intel.com>
+
+commit 690e0ec8e63da9a29b39fedc6ed5da09c7c82651 upstream.
+
+Direction from hardware is that stolen memory should never be used for
+ring buffer allocations on platforms with LLC. There are too many
+caching pitfalls due to the way stolen memory accesses are routed. So
+it is safest to just not use it.
+
+Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
+Fixes: c58b735fc762 ("drm/i915: Allocate rings from stolen")
+Cc: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Jani Nikula <jani.nikula@linux.intel.com>
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
+Cc: intel-gfx@lists.freedesktop.org
+Cc: <stable@vger.kernel.org> # v4.9+
+Tested-by: Jouni Högander <jouni.hogander@intel.com>
+Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230216011101.1909009-2-John.C.Harrison@Intel.com
+(cherry picked from commit f54c1f6c697c4297f7ed94283c184acc338a5cf8)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gt/intel_ring.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/gt/intel_ring.c
++++ b/drivers/gpu/drm/i915/gt/intel_ring.c
+@@ -116,7 +116,7 @@ static struct i915_vma *create_ring_vma(
+ 
+       obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE |
+                                         I915_BO_ALLOC_PM_VOLATILE);
+-      if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt))
++      if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915))
+               obj = i915_gem_object_create_stolen(i915, size);
+       if (IS_ERR(obj))
+               obj = i915_gem_object_create_internal(i915, size);
diff --git a/queue-6.2/drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch b/queue-6.2/drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch

new file mode 100644 (file)

index 0000000..d2f0a93
--- /dev/null
+++ b/queue-6.2/drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch
@@ -0,0 +1,36 @@
+From 5e438bf7f9a1705ebcae5fa89cdbfbc6932a7871 Mon Sep 17 00:00:00 2001
+From: Mavroudis Chatzilaridis <mavchatz@protonmail.com>
+Date: Wed, 1 Feb 2023 18:51:25 +0000
+Subject: drm/i915/quirks: Add inverted backlight quirk for HP 14-r206nv
+
+From: Mavroudis Chatzilaridis <mavchatz@protonmail.com>
+
+commit 5e438bf7f9a1705ebcae5fa89cdbfbc6932a7871 upstream.
+
+This laptop uses inverted backlight PWM. Thus, without this quirk,
+backlight brightness decreases as the brightness value increases and
+vice versa.
+
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8013
+Cc: stable@vger.kernel.org
+Signed-off-by: Mavroudis Chatzilaridis <mavchatz@protonmail.com>
+Reviewed-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230201184947.8835-1-mavchatz@protonmail.com
+(cherry picked from commit 83e7d6fd330d413cb2064e680ffea91b0512a520)
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/intel_quirks.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/gpu/drm/i915/display/intel_quirks.c
++++ b/drivers/gpu/drm/i915/display/intel_quirks.c
+@@ -199,6 +199,8 @@ static struct intel_quirk intel_quirks[]
+       /* ECS Liva Q2 */
+       { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
+       { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
++      /* HP Notebook - 14-r206nv */
++      { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness },
+ };
+ 
+ void intel_init_quirks(struct drm_i915_private *i915)
diff --git a/queue-6.2/drm-radeon-fix-edp-for-single-display-imac11-2.patch b/queue-6.2/drm-radeon-fix-edp-for-single-display-imac11-2.patch

new file mode 100644 (file)

index 0000000..85d44bf
--- /dev/null
+++ b/queue-6.2/drm-radeon-fix-edp-for-single-display-imac11-2.patch
@@ -0,0 +1,46 @@
+From 05eacc198c68cbb35a7281ce4011f8899ee1cfb8 Mon Sep 17 00:00:00 2001
+From: Mark Hawrylak <mark.hawrylak@gmail.com>
+Date: Sun, 19 Feb 2023 16:02:00 +1100
+Subject: drm/radeon: Fix eDP for single-display iMac11,2
+
+From: Mark Hawrylak <mark.hawrylak@gmail.com>
+
+commit 05eacc198c68cbb35a7281ce4011f8899ee1cfb8 upstream.
+
+Apple iMac11,2 (mid 2010) also with Radeon HD-4670 that has the same
+issue as iMac10,1 (late 2009) where the internal eDP panel stays dark on
+driver load.  This patch treats iMac11,2 the same as iMac10,1,
+so the eDP panel stays active.
+
+Additional steps:
+Kernel boot parameter radeon.nomodeset=0 required to keep the eDP
+panel active.
+
+This patch is an extension of
+commit 564d8a2cf3ab ("drm/radeon: Fix eDP for single-display iMac10,1 (v2)")
+Link: https://lore.kernel.org/all/lsq.1507553064.833262317@decadent.org.uk/
+Signed-off-by: Mark Hawrylak <mark.hawrylak@gmail.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/radeon/atombios_encoders.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/radeon/atombios_encoders.c
++++ b/drivers/gpu/drm/radeon/atombios_encoders.c
+@@ -2122,11 +2122,12 @@ int radeon_atom_pick_dig_encoder(struct
+ 
+       /*
+        * On DCE32 any encoder can drive any block so usually just use crtc id,
+-       * but Apple thinks different at least on iMac10,1, so there use linkb,
++       * but Apple thinks different at least on iMac10,1 and iMac11,2, so there use linkb,
+        * otherwise the internal eDP panel will stay dark.
+        */
+       if (ASIC_IS_DCE32(rdev)) {
+-              if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1"))
++              if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1") ||
++                  dmi_match(DMI_PRODUCT_NAME, "iMac11,2"))
+                       enc_idx = (dig->linkb) ? 1 : 0;
+               else
+                       enc_idx = radeon_crtc->crtc_id;
diff --git a/queue-6.2/iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch b/queue-6.2/iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch

new file mode 100644 (file)

index 0000000..5ff7d72
--- /dev/null
+++ b/queue-6.2/iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch
@@ -0,0 +1,53 @@
+From 16a75bbe480c3598b3af57a2504ea89b1e32c3ac Mon Sep 17 00:00:00 2001
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Date: Thu, 16 Feb 2023 21:08:14 +0800
+Subject: iommu/vt-d: Avoid superfluous IOTLB tracking in lazy mode
+
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+commit 16a75bbe480c3598b3af57a2504ea89b1e32c3ac upstream.
+
+Intel IOMMU driver implements IOTLB flush queue with domain selective
+or PASID selective invalidations. In this case there's no need to track
+IOVA page range and sync IOTLBs, which may cause significant performance
+hit.
+
+This patch adds a check to avoid IOVA gather page and IOTLB sync for
+the lazy path.
+
+The performance difference on Sapphire Rapids 100Gb NIC is improved by
+the following (as measured by iperf send):
+
+w/o this fix~48 Gbits/s. with this fix ~54 Gbits/s
+
+Cc: <stable@vger.kernel.org>
+Fixes: 2a2b8eaa5b25 ("iommu: Handle freelists when using deferred flushing in iommu drivers")
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Tested-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
+Signed-off-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
+Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Link: https://lore.kernel.org/r/20230209175330.1783556-1-jacob.jun.pan@linux.intel.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/intel/iommu.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -4347,7 +4347,12 @@ static size_t intel_iommu_unmap(struct i
+       if (dmar_domain->max_addr == iova + size)
+               dmar_domain->max_addr = iova;
+ 
+-      iommu_iotlb_gather_add_page(domain, gather, iova, size);
++      /*
++       * We do not use page-selective IOTLB invalidation in flush queue,
++       * so there is no need to track page and sync iotlb.
++       */
++      if (!iommu_iotlb_gather_queued(gather))
++              iommu_iotlb_gather_add_page(domain, gather, iova, size);
+ 
+       return size;
+ }
diff --git a/queue-6.2/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch b/queue-6.2/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch

new file mode 100644 (file)

index 0000000..db7fe5f
--- /dev/null
+++ b/queue-6.2/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch
@@ -0,0 +1,77 @@
+From 194b3348bdbb7db65375c72f3f774aee4cc6614e Mon Sep 17 00:00:00 2001
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Date: Thu, 16 Feb 2023 21:08:15 +0800
+Subject: iommu/vt-d: Fix PASID directory pointer coherency
+
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+commit 194b3348bdbb7db65375c72f3f774aee4cc6614e upstream.
+
+On platforms that do not support IOMMU Extended capability bit 0
+Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing
+any translation structures. IOMMU access goes only directly to
+memory. Intel IOMMU code was missing a flush for the PASID table
+directory that resulted in the unrecoverable fault as shown below.
+
+This patch adds clflush calls whenever allocating and updating
+a PASID table directory to ensure cache coherency.
+
+On the reverse direction, there's no need to clflush the PASID directory
+pointer when we deactivate a context entry in that IOMMU hardware will
+not see the old PASID directory pointer after we clear the context entry.
+PASID directory entries are also never freed once allocated.
+
+ DMAR: DRHD: handling fault status reg 3
+ DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000
+       [fault reason 0x51] SM: Present bit in Directory Entry is clear
+ DMAR: Dump dmar1 table entries for IOVA 0x1026a4000
+ DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001
+ DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401
+ DMAR: pasid dir entry: 0x0000000101b4e001
+ DMAR: pasid table entry[0]: 0x0000000000000109
+ DMAR: pasid table entry[1]: 0x0000000000000001
+ DMAR: pasid table entry[2]: 0x0000000000000000
+ DMAR: pasid table entry[3]: 0x0000000000000000
+ DMAR: pasid table entry[4]: 0x0000000000000000
+ DMAR: pasid table entry[5]: 0x0000000000000000
+ DMAR: pasid table entry[6]: 0x0000000000000000
+ DMAR: pasid table entry[7]: 0x0000000000000000
+ DMAR: PTE not present at level 4
+
+Cc: <stable@vger.kernel.org>
+Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables")
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reported-by: Sukumar Ghorai <sukumar.ghorai@intel.com>
+Signed-off-by: Ashok Raj <ashok.raj@intel.com>
+Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.intel.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/intel/pasid.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/iommu/intel/pasid.c
++++ b/drivers/iommu/intel/pasid.c
+@@ -128,6 +128,9 @@ int intel_pasid_alloc_table(struct devic
+       pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
+       info->pasid_table = pasid_table;
+ 
++      if (!ecap_coherent(info->iommu->ecap))
++              clflush_cache_range(pasid_table->table, size);
++
+       return 0;
+ }
+ 
+@@ -215,6 +218,10 @@ retry:
+                       free_pgtable_page(entries);
+                       goto retry;
+               }
++              if (!ecap_coherent(info->iommu->ecap)) {
++                      clflush_cache_range(entries, VTD_PAGE_SIZE);
++                      clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
++              }
+       }
+ 
+       return &entries[index];
diff --git a/queue-6.2/iommufd-do-not-add-the-same-hwpt-to-the-ioas-hwpt_list-twice.patch b/queue-6.2/iommufd-do-not-add-the-same-hwpt-to-the-ioas-hwpt_list-twice.patch

new file mode 100644 (file)

index 0000000..2ddf641
--- /dev/null
+++ b/queue-6.2/iommufd-do-not-add-the-same-hwpt-to-the-ioas-hwpt_list-twice.patch
@@ -0,0 +1,41 @@
+From b4ff830eca097df51af10a9be29e8cc817327919 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Mon, 13 Feb 2023 14:02:42 -0400
+Subject: iommufd: Do not add the same hwpt to the ioas->hwpt_list twice
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit b4ff830eca097df51af10a9be29e8cc817327919 upstream.
+
+The hwpt is added to the hwpt_list only during its creation, it is never
+added again. This hunk is some missed leftover from rework. Adding it
+twice will corrupt the linked list in some cases.
+
+It effects HWPT specific attachment, which is something the test suite
+cannot cover until we can create a legitimate struct device with a
+non-system iommu "driver" (ie we need the bus removed from the iommu code)
+
+Cc: stable@vger.kernel.org
+Fixes: e8d57210035b ("iommufd: Add kAPI toward external drivers for physical devices")
+Link: https://lore.kernel.org/r/1-v1-4336b5cb2fe4+1d7-iommufd_hwpt_jgg@nvidia.com
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reported-by: Kevin Tian <kevin.tian@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/iommufd/device.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/drivers/iommu/iommufd/device.c
++++ b/drivers/iommu/iommufd/device.c
+@@ -346,10 +346,6 @@ int iommufd_device_attach(struct iommufd
+               rc = iommufd_device_do_attach(idev, hwpt);
+               if (rc)
+                       goto out_put_pt_obj;
+-
+-              mutex_lock(&hwpt->ioas->mutex);
+-              list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list);
+-              mutex_unlock(&hwpt->ioas->mutex);
+               break;
+       }
+       case IOMMUFD_OBJ_IOAS: {
diff --git a/queue-6.2/iommufd-make-sure-to-zero-vfio_iommu_type1_info-before-copying-to-user.patch b/queue-6.2/iommufd-make-sure-to-zero-vfio_iommu_type1_info-before-copying-to-user.patch

new file mode 100644 (file)

index 0000000..f956c3a
--- /dev/null
+++ b/queue-6.2/iommufd-make-sure-to-zero-vfio_iommu_type1_info-before-copying-to-user.patch
@@ -0,0 +1,35 @@
+From b3551ead616318ea155558cdbe7e91495b8d9b33 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Mon, 13 Feb 2023 10:32:21 -0400
+Subject: iommufd: Make sure to zero vfio_iommu_type1_info before copying to user
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit b3551ead616318ea155558cdbe7e91495b8d9b33 upstream.
+
+Missed a zero initialization here. Most of the struct is filled with
+a copy_from_user(), however minsz for that copy is smaller than the
+actual struct by 8 bytes, thus we don't fill the padding.
+
+Cc: stable@vger.kernel.org # 6.1+
+Fixes: d624d6652a65 ("iommufd: vfio container FD ioctl compatibility")
+Link: https://lore.kernel.org/r/0-v1-a74499ece799+1a-iommufd_get_info_leak_jgg@nvidia.com
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reported-by: syzbot+cb1e0978f6bf46b83a58@syzkaller.appspotmail.com
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/iommufd/vfio_compat.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iommu/iommufd/vfio_compat.c
++++ b/drivers/iommu/iommufd/vfio_compat.c
+@@ -381,7 +381,7 @@ static int iommufd_vfio_iommu_get_info(s
+       };
+       size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
+       struct vfio_info_cap_header __user *last_cap = NULL;
+-      struct vfio_iommu_type1_info info;
++      struct vfio_iommu_type1_info info = {};
+       struct iommufd_ioas *ioas;
+       size_t total_cap_size;
+       int rc;
diff --git a/queue-6.2/mips-dts-ci20-fix-otg-power-gpio.patch b/queue-6.2/mips-dts-ci20-fix-otg-power-gpio.patch

new file mode 100644 (file)

index 0000000..a2dfa96
--- /dev/null
+++ b/queue-6.2/mips-dts-ci20-fix-otg-power-gpio.patch
@@ -0,0 +1,34 @@
+From 0cb4228f6cc9ed0ca2be0d9ddf29168a8e3a3905 Mon Sep 17 00:00:00 2001
+From: "H. Nikolaus Schaller" <hns@goldelico.com>
+Date: Sun, 29 Jan 2023 19:57:04 +0100
+Subject: MIPS: DTS: CI20: fix otg power gpio
+
+From: H. Nikolaus Schaller <hns@goldelico.com>
+
+commit 0cb4228f6cc9ed0ca2be0d9ddf29168a8e3a3905 upstream.
+
+According to schematics it is PF15 and not PF14 (MIC_SW_EN).
+Seems as if it was hidden and not noticed during testing since
+there is no sound DT node.
+
+Fixes: 158c774d3c64 ("MIPS: Ingenic: Add missing nodes for Ingenic SoCs and boards.")
+Cc: stable@vger.kernel.org
+Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
+Acked-by: Paul Cercueil <paul@crapouillou.net>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/boot/dts/ingenic/ci20.dts |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/mips/boot/dts/ingenic/ci20.dts
++++ b/arch/mips/boot/dts/ingenic/ci20.dts
+@@ -113,7 +113,7 @@
+               regulator-min-microvolt = <5000000>;
+               regulator-max-microvolt = <5000000>;
+ 
+-              gpio = <&gpf 14 GPIO_ACTIVE_LOW>;
++              gpio = <&gpf 15 GPIO_ACTIVE_LOW>;
+               enable-active-high;
+       };
+ };
diff --git a/queue-6.2/pci-avoid-flr-for-amd-fch-ahci-adapters.patch b/queue-6.2/pci-avoid-flr-for-amd-fch-ahci-adapters.patch

new file mode 100644 (file)

index 0000000..f5ef6af
--- /dev/null
+++ b/queue-6.2/pci-avoid-flr-for-amd-fch-ahci-adapters.patch
@@ -0,0 +1,48 @@
+From 63ba51db24ed1b8f8088a897290eb6c036c5435d Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Date: Sat, 28 Jan 2023 10:39:51 +0900
+Subject: PCI: Avoid FLR for AMD FCH AHCI adapters
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+commit 63ba51db24ed1b8f8088a897290eb6c036c5435d upstream.
+
+PCI passthrough to VMs does not work with AMD FCH AHCI adapters: the guest
+OS fails to correctly probe devices attached to the controller due to FIS
+communication failures:
+
+  ata4: softreset failed (1st FIS failed)
+  ...
+  ata4.00: qc timeout after 5000 msecs (cmd 0xec)
+  ata4.00: failed to IDENTIFY (I/O error, err_mask=0x4)
+
+Forcing the "bus" reset method before unbinding & binding the adapter to
+the vfio-pci driver solves this issue, e.g.:
+
+  echo "bus" > /sys/bus/pci/devices/<ID>/reset_method
+
+gives a working guest OS, indicating that the default FLR reset method
+doesn't work correctly.
+
+Apply quirk_no_flr() to AMD FCH AHCI devices to work around this issue.
+
+Link: https://lore.kernel.org/r/20230128013951.523247-1-damien.lemoal@opensource.wdc.com
+Reported-by: Niklas Cassel <niklas.cassel@wdc.com>
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/quirks.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -5340,6 +5340,7 @@ static void quirk_no_flr(struct pci_dev
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1487, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x148c, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x149c, quirk_no_flr);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x7901, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_no_flr);
+ 
diff --git a/queue-6.2/pci-dpc-await-readiness-of-secondary-bus-after-reset.patch b/queue-6.2/pci-dpc-await-readiness-of-secondary-bus-after-reset.patch

new file mode 100644 (file)

index 0000000..d547717
--- /dev/null
+++ b/queue-6.2/pci-dpc-await-readiness-of-secondary-bus-after-reset.patch
@@ -0,0 +1,81 @@
+From 53b54ad074de1896f8b021615f65b27f557ce874 Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Sun, 15 Jan 2023 09:20:33 +0100
+Subject: PCI/DPC: Await readiness of secondary bus after reset
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 53b54ad074de1896f8b021615f65b27f557ce874 upstream.
+
+pci_bridge_wait_for_secondary_bus() is called after a Secondary Bus
+Reset, but not after a DPC-induced Hot Reset.
+
+As a result, the delays prescribed by PCIe r6.0 sec 6.6.1 are not
+observed and devices on the secondary bus may be accessed before
+they're ready.
+
+One affected device is Intel's Ponte Vecchio HPC GPU.  It comprises a
+PCIe switch whose upstream port is not immediately ready after reset.
+Because its config space is restored too early, it remains in
+D0uninitialized, its subordinate devices remain inaccessible and DPC
+recovery fails with messages such as:
+
+  i915 0000:8c:00.0: can't change power state from D3cold to D0 (config space inaccessible)
+  intel_vsec 0000:8e:00.1: can't change power state from D3cold to D0 (config space inaccessible)
+  pcieport 0000:89:02.0: AER: device recovery failed
+
+Fix it.
+
+Link: https://lore.kernel.org/r/9f5ff00e1593d8d9a4b452398b98aa14d23fca11.1673769517.git.lukas@wunner.de
+Tested-by: Ravi Kishore Koppuravuri <ravi.kishore.koppuravuri@intel.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.c      |    3 ---
+ drivers/pci/pci.h      |    6 ++++++
+ drivers/pci/pcie/dpc.c |    4 ++--
+ 3 files changed, 8 insertions(+), 5 deletions(-)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -167,9 +167,6 @@ static int __init pcie_port_pm_setup(cha
+ }
+ __setup("pcie_port_pm=", pcie_port_pm_setup);
+ 
+-/* Time to wait after a reset for device to become responsive */
+-#define PCIE_RESET_READY_POLL_MS 60000
+-
+ /**
+  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
+  * @bus: pointer to PCI bus structure to search
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -70,6 +70,12 @@ struct pci_cap_saved_state *pci_find_sav
+  * Reset (PCIe r6.0 sec 5.8).
+  */
+ #define PCI_RESET_WAIT                1000    /* msec */
++/*
++ * Devices may extend the 1 sec period through Request Retry Status completions
++ * (PCIe r6.0 sec 2.3.1).  The spec does not provide an upper limit, but 60 sec
++ * ought to be enough for any device to become responsive.
++ */
++#define PCIE_RESET_READY_POLL_MS 60000        /* msec */
+ 
+ void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
+ void pci_refresh_power_state(struct pci_dev *dev);
+--- a/drivers/pci/pcie/dpc.c
++++ b/drivers/pci/pcie/dpc.c
+@@ -170,8 +170,8 @@ pci_ers_result_t dpc_reset_link(struct p
+       pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
+                             PCI_EXP_DPC_STATUS_TRIGGER);
+ 
+-      if (!pcie_wait_for_link(pdev, true)) {
+-              pci_info(pdev, "Data Link Layer Link Active not set in 1000 msec\n");
++      if (pci_bridge_wait_for_secondary_bus(pdev, "DPC",
++                                            PCIE_RESET_READY_POLL_MS)) {
+               clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
+               ret = PCI_ERS_RESULT_DISCONNECT;
+       } else {
diff --git a/queue-6.2/pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch b/queue-6.2/pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch

new file mode 100644 (file)

index 0000000..b2552d5
--- /dev/null
+++ b/queue-6.2/pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch
@@ -0,0 +1,136 @@
+From 74ff8864cc842be994853095dba6db48e716400a Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Fri, 20 Jan 2023 10:19:02 +0100
+Subject: PCI: hotplug: Allow marking devices as disconnected during bind/unbind
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 74ff8864cc842be994853095dba6db48e716400a upstream.
+
+On surprise removal, pciehp_unconfigure_device() and acpiphp's
+trim_stale_devices() call pci_dev_set_disconnected() to mark removed
+devices as permanently offline.  Thereby, the PCI core and drivers know
+to skip device accesses.
+
+However pci_dev_set_disconnected() takes the device_lock and thus waits for
+a concurrent driver bind or unbind to complete.  As a result, the driver's
+->probe and ->remove hooks have no chance to learn that the device is gone.
+
+That doesn't make any sense, so drop the device_lock and instead use atomic
+xchg() and cmpxchg() operations to update the device state.
+
+As a byproduct, an AB-BA deadlock reported by Anatoli is fixed which occurs
+on surprise removal with AER concurrently performing a bus reset.
+
+AER bus reset:
+
+  INFO: task irq/26-aerdrv:95 blocked for more than 120 seconds.
+  Tainted: G        W          6.2.0-rc3-custom-norework-jan11+
+  schedule
+  rwsem_down_write_slowpath
+  down_write_nested
+  pciehp_reset_slot                      # acquires reset_lock
+  pci_reset_hotplug_slot
+  pci_slot_reset                         # acquires device_lock
+  pci_bus_error_reset
+  aer_root_reset
+  pcie_do_recovery
+  aer_process_err_devices
+  aer_isr
+
+pciehp surprise removal:
+
+  INFO: task irq/26-pciehp:96 blocked for more than 120 seconds.
+  Tainted: G        W          6.2.0-rc3-custom-norework-jan11+
+  schedule_preempt_disabled
+  __mutex_lock
+  mutex_lock_nested
+  pci_dev_set_disconnected               # acquires device_lock
+  pci_walk_bus
+  pciehp_unconfigure_device
+  pciehp_disable_slot
+  pciehp_handle_presence_or_link_change
+  pciehp_ist                             # acquires reset_lock
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=215590
+Fixes: a6bd101b8f84 ("PCI: Unify device inaccessible")
+Link: https://lore.kernel.org/r/3dc88ea82bdc0e37d9000e413d5ebce481cbd629.1674205689.git.lukas@wunner.de
+Reported-by: Anatoli Antonovitch <anatoli.antonovitch@amd.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org # v4.20+
+Cc: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.h |   43 +++++++++++++------------------------------
+ 1 file changed, 13 insertions(+), 30 deletions(-)
+
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -318,53 +318,36 @@ struct pci_sriov {
+  * @dev: PCI device to set new error_state
+  * @new: the state we want dev to be in
+  *
+- * Must be called with device_lock held.
++ * If the device is experiencing perm_failure, it has to remain in that state.
++ * Any other transition is allowed.
+  *
+  * Returns true if state has been changed to the requested state.
+  */
+ static inline bool pci_dev_set_io_state(struct pci_dev *dev,
+                                       pci_channel_state_t new)
+ {
+-      bool changed = false;
++      pci_channel_state_t old;
+ 
+-      device_lock_assert(&dev->dev);
+       switch (new) {
+       case pci_channel_io_perm_failure:
+-              switch (dev->error_state) {
+-              case pci_channel_io_frozen:
+-              case pci_channel_io_normal:
+-              case pci_channel_io_perm_failure:
+-                      changed = true;
+-                      break;
+-              }
+-              break;
++              xchg(&dev->error_state, pci_channel_io_perm_failure);
++              return true;
+       case pci_channel_io_frozen:
+-              switch (dev->error_state) {
+-              case pci_channel_io_frozen:
+-              case pci_channel_io_normal:
+-                      changed = true;
+-                      break;
+-              }
+-              break;
++              old = cmpxchg(&dev->error_state, pci_channel_io_normal,
++                            pci_channel_io_frozen);
++              return old != pci_channel_io_perm_failure;
+       case pci_channel_io_normal:
+-              switch (dev->error_state) {
+-              case pci_channel_io_frozen:
+-              case pci_channel_io_normal:
+-                      changed = true;
+-                      break;
+-              }
+-              break;
++              old = cmpxchg(&dev->error_state, pci_channel_io_frozen,
++                            pci_channel_io_normal);
++              return old != pci_channel_io_perm_failure;
++      default:
++              return false;
+       }
+-      if (changed)
+-              dev->error_state = new;
+-      return changed;
+ }
+ 
+ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
+ {
+-      device_lock(&dev->dev);
+       pci_dev_set_io_state(dev, pci_channel_io_perm_failure);
+-      device_unlock(&dev->dev);
+ 
+       return 0;
+ }
diff --git a/queue-6.2/pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch b/queue-6.2/pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch

new file mode 100644 (file)

index 0000000..aad0a00
--- /dev/null
+++ b/queue-6.2/pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch
@@ -0,0 +1,57 @@
+From 8ef0217227b42e2c34a18de316cee3da16c9bf1e Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Sun, 15 Jan 2023 09:20:31 +0100
+Subject: PCI/PM: Observe reset delay irrespective of bridge_d3
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 8ef0217227b42e2c34a18de316cee3da16c9bf1e upstream.
+
+If a PCI bridge is suspended to D3cold upon entering system sleep,
+resuming it entails a Fundamental Reset per PCIe r6.0 sec 5.8.
+
+The delay prescribed after a Fundamental Reset in PCIe r6.0 sec 6.6.1
+is sought to be observed by:
+
+  pci_pm_resume_noirq()
+    pci_pm_bridge_power_up_actions()
+      pci_bridge_wait_for_secondary_bus()
+
+However, pci_bridge_wait_for_secondary_bus() bails out if the bridge_d3
+flag is not set.  That flag indicates whether a bridge is allowed to
+suspend to D3cold at *runtime*.
+
+Hence *no* delay is observed on resume from system sleep if runtime
+D3cold is forbidden.  That doesn't make any sense, so drop the bridge_d3
+check from pci_bridge_wait_for_secondary_bus().
+
+The purpose of the bridge_d3 check was probably to avoid delays if a
+bridge remained in D0 during suspend.  However the sole caller of
+pci_bridge_wait_for_secondary_bus(), pci_pm_bridge_power_up_actions(),
+is only invoked if the previous power state was D3cold.  Hence the
+additional bridge_d3 check seems superfluous.
+
+Fixes: ad9001f2f411 ("PCI/PM: Add missing link delays required by the PCIe spec")
+Link: https://lore.kernel.org/r/eb37fa345285ec8bacabbf06b020b803f77bdd3d.1673769517.git.lukas@wunner.de
+Tested-by: Ravi Kishore Koppuravuri <ravi.kishore.koppuravuri@intel.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org # v5.5+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -4957,7 +4957,7 @@ void pci_bridge_wait_for_secondary_bus(s
+       if (pci_dev_is_disconnected(dev))
+               return;
+ 
+-      if (!pci_is_bridge(dev) || !dev->bridge_d3)
++      if (!pci_is_bridge(dev))
+               return;
+ 
+       down_read(&pci_bus_sem);
diff --git a/queue-6.2/pci-unify-delay-handling-for-reset-and-resume.patch b/queue-6.2/pci-unify-delay-handling-for-reset-and-resume.patch

new file mode 100644 (file)

index 0000000..edb3215
--- /dev/null
+++ b/queue-6.2/pci-unify-delay-handling-for-reset-and-resume.patch
@@ -0,0 +1,249 @@
+From ac91e6980563ed53afadd925fa6585ffd2bc4a2c Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Sun, 15 Jan 2023 09:20:32 +0100
+Subject: PCI: Unify delay handling for reset and resume
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit ac91e6980563ed53afadd925fa6585ffd2bc4a2c upstream.
+
+Sheng Bi reports that pci_bridge_secondary_bus_reset() may fail to wait
+for devices on the secondary bus to become accessible after reset:
+
+Although it does call pci_dev_wait(), it erroneously passes the bridge's
+pci_dev rather than that of a child.  The bridge of course is always
+accessible while its secondary bus is reset, so pci_dev_wait() returns
+immediately.
+
+Sheng Bi proposes introducing a new pci_bridge_secondary_bus_wait()
+function which is called from pci_bridge_secondary_bus_reset():
+
+https://lore.kernel.org/linux-pci/20220523171517.32407-1-windy.bi.enflame@gmail.com/
+
+However we already have pci_bridge_wait_for_secondary_bus() which does
+almost exactly what we need.  So far it's only called on resume from
+D3cold (which implies a Fundamental Reset per PCIe r6.0 sec 5.8).
+Re-using it for Secondary Bus Resets is a leaner and more rational
+approach than introducing a new function.
+
+That only requires a few minor tweaks:
+
+- Amend pci_bridge_wait_for_secondary_bus() to await accessibility of
+  the first device on the secondary bus by calling pci_dev_wait() after
+  performing the prescribed delays.  pci_dev_wait() needs two parameters,
+  a reset reason and a timeout, which callers must now pass to
+  pci_bridge_wait_for_secondary_bus().  The timeout is 1 sec for resume
+  (PCIe r6.0 sec 6.6.1) and 60 sec for reset (commit 821cdad5c46c ("PCI:
+  Wait up to 60 seconds for device to become ready after FLR")).
+  Introduce a PCI_RESET_WAIT macro for the 1 sec timeout.
+
+- Amend pci_bridge_wait_for_secondary_bus() to return 0 on success or
+  -ENOTTY on error for consumption by pci_bridge_secondary_bus_reset().
+
+- Drop an unnecessary 1 sec delay from pci_reset_secondary_bus() which
+  is now performed by pci_bridge_wait_for_secondary_bus().  A static
+  delay this long is only necessary for Conventional PCI, so modern
+  PCIe systems benefit from shorter reset times as a side effect.
+
+Fixes: 6b2f1351af56 ("PCI: Wait for device to become ready after secondary bus reset")
+Link: https://lore.kernel.org/r/da77c92796b99ec568bd070cbe4725074a117038.1673769517.git.lukas@wunner.de
+Reported-by: Sheng Bi <windy.bi.enflame@gmail.com>
+Tested-by: Ravi Kishore Koppuravuri <ravi.kishore.koppuravuri@intel.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org # v4.17+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci-driver.c |    2 -
+ drivers/pci/pci.c        |   54 ++++++++++++++++++++---------------------------
+ drivers/pci/pci.h        |   10 +++++++-
+ 3 files changed, 34 insertions(+), 32 deletions(-)
+
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -572,7 +572,7 @@ static void pci_pm_default_resume_early(
+ 
+ static void pci_pm_bridge_power_up_actions(struct pci_dev *pci_dev)
+ {
+-      pci_bridge_wait_for_secondary_bus(pci_dev);
++      pci_bridge_wait_for_secondary_bus(pci_dev, "resume", PCI_RESET_WAIT);
+       /*
+        * When powering on a bridge from D3cold, the whole hierarchy may be
+        * powered on into D0uninitialized state, resume them to give them a
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -1174,7 +1174,7 @@ static int pci_dev_wait(struct pci_dev *
+                       return -ENOTTY;
+               }
+ 
+-              if (delay > 1000)
++              if (delay > PCI_RESET_WAIT)
+                       pci_info(dev, "not ready %dms after %s; waiting\n",
+                                delay - 1, reset_type);
+ 
+@@ -1183,7 +1183,7 @@ static int pci_dev_wait(struct pci_dev *
+               pci_read_config_dword(dev, PCI_COMMAND, &id);
+       }
+ 
+-      if (delay > 1000)
++      if (delay > PCI_RESET_WAIT)
+               pci_info(dev, "ready %dms after %s\n", delay - 1,
+                        reset_type);
+ 
+@@ -4941,24 +4941,31 @@ static int pci_bus_max_d3cold_delay(cons
+ /**
+  * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
+  * @dev: PCI bridge
++ * @reset_type: reset type in human-readable form
++ * @timeout: maximum time to wait for devices on secondary bus (milliseconds)
+  *
+  * Handle necessary delays before access to the devices on the secondary
+- * side of the bridge are permitted after D3cold to D0 transition.
++ * side of the bridge are permitted after D3cold to D0 transition
++ * or Conventional Reset.
+  *
+  * For PCIe this means the delays in PCIe 5.0 section 6.6.1. For
+  * conventional PCI it means Tpvrh + Trhfa specified in PCI 3.0 section
+  * 4.3.2.
++ *
++ * Return 0 on success or -ENOTTY if the first device on the secondary bus
++ * failed to become accessible.
+  */
+-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
++int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
++                                    int timeout)
+ {
+       struct pci_dev *child;
+       int delay;
+ 
+       if (pci_dev_is_disconnected(dev))
+-              return;
++              return 0;
+ 
+       if (!pci_is_bridge(dev))
+-              return;
++              return 0;
+ 
+       down_read(&pci_bus_sem);
+ 
+@@ -4970,14 +4977,14 @@ void pci_bridge_wait_for_secondary_bus(s
+        */
+       if (!dev->subordinate || list_empty(&dev->subordinate->devices)) {
+               up_read(&pci_bus_sem);
+-              return;
++              return 0;
+       }
+ 
+       /* Take d3cold_delay requirements into account */
+       delay = pci_bus_max_d3cold_delay(dev->subordinate);
+       if (!delay) {
+               up_read(&pci_bus_sem);
+-              return;
++              return 0;
+       }
+ 
+       child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
+@@ -4986,14 +4993,12 @@ void pci_bridge_wait_for_secondary_bus(s
+ 
+       /*
+        * Conventional PCI and PCI-X we need to wait Tpvrh + Trhfa before
+-       * accessing the device after reset (that is 1000 ms + 100 ms). In
+-       * practice this should not be needed because we don't do power
+-       * management for them (see pci_bridge_d3_possible()).
++       * accessing the device after reset (that is 1000 ms + 100 ms).
+        */
+       if (!pci_is_pcie(dev)) {
+               pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
+               msleep(1000 + delay);
+-              return;
++              return 0;
+       }
+ 
+       /*
+@@ -5010,11 +5015,11 @@ void pci_bridge_wait_for_secondary_bus(s
+        * configuration requests if we only wait for 100 ms (see
+        * https://bugzilla.kernel.org/show_bug.cgi?id=203885).
+        *
+-       * Therefore we wait for 100 ms and check for the device presence.
+-       * If it is still not present give it an additional 100 ms.
++       * Therefore we wait for 100 ms and check for the device presence
++       * until the timeout expires.
+        */
+       if (!pcie_downstream_port(dev))
+-              return;
++              return 0;
+ 
+       if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
+               pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
+@@ -5025,14 +5030,11 @@ void pci_bridge_wait_for_secondary_bus(s
+               if (!pcie_wait_for_link_delay(dev, true, delay)) {
+                       /* Did not train, no need to wait any further */
+                       pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n");
+-                      return;
++                      return -ENOTTY;
+               }
+       }
+ 
+-      if (!pci_device_is_present(child)) {
+-              pci_dbg(child, "waiting additional %d ms to become accessible\n", delay);
+-              msleep(delay);
+-      }
++      return pci_dev_wait(child, reset_type, timeout - delay);
+ }
+ 
+ void pci_reset_secondary_bus(struct pci_dev *dev)
+@@ -5051,15 +5053,6 @@ void pci_reset_secondary_bus(struct pci_
+ 
+       ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+       pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+-
+-      /*
+-       * Trhfa for conventional PCI is 2^25 clock cycles.
+-       * Assuming a minimum 33MHz clock this results in a 1s
+-       * delay before we can consider subordinate devices to
+-       * be re-initialized.  PCIe has some ways to shorten this,
+-       * but we don't make use of them yet.
+-       */
+-      ssleep(1);
+ }
+ 
+ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
+@@ -5078,7 +5071,8 @@ int pci_bridge_secondary_bus_reset(struc
+ {
+       pcibios_reset_secondary_bus(dev);
+ 
+-      return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
++      return pci_bridge_wait_for_secondary_bus(dev, "bus reset",
++                                               PCIE_RESET_READY_POLL_MS);
+ }
+ EXPORT_SYMBOL_GPL(pci_bridge_secondary_bus_reset);
+ 
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -64,6 +64,13 @@ struct pci_cap_saved_state *pci_find_sav
+ #define PCI_PM_D3HOT_WAIT       10    /* msec */
+ #define PCI_PM_D3COLD_WAIT      100   /* msec */
+ 
++/*
++ * Following exit from Conventional Reset, devices must be ready within 1 sec
++ * (PCIe r6.0 sec 6.6.1).  A D3cold to D0 transition implies a Conventional
++ * Reset (PCIe r6.0 sec 5.8).
++ */
++#define PCI_RESET_WAIT                1000    /* msec */
++
+ void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
+ void pci_refresh_power_state(struct pci_dev *dev);
+ int pci_power_up(struct pci_dev *dev);
+@@ -86,8 +93,9 @@ void pci_msi_init(struct pci_dev *dev);
+ void pci_msix_init(struct pci_dev *dev);
+ bool pci_bridge_d3_possible(struct pci_dev *dev);
+ void pci_bridge_d3_update(struct pci_dev *dev);
+-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
+ void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
++int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
++                                    int timeout);
+ 
+ static inline void pci_wakeup_event(struct pci_dev *dev)
+ {
diff --git a/queue-6.2/risc-v-add-a-spin_shadow_stack-declaration.patch b/queue-6.2/risc-v-add-a-spin_shadow_stack-declaration.patch

new file mode 100644 (file)

index 0000000..e1ea186
--- /dev/null
+++ b/queue-6.2/risc-v-add-a-spin_shadow_stack-declaration.patch
@@ -0,0 +1,39 @@
+From eb9be8310c58c166f9fae3b71c0ad9d6741b4897 Mon Sep 17 00:00:00 2001
+From: Conor Dooley <conor.dooley@microchip.com>
+Date: Fri, 10 Feb 2023 18:59:45 +0000
+Subject: RISC-V: add a spin_shadow_stack declaration
+
+From: Conor Dooley <conor.dooley@microchip.com>
+
+commit eb9be8310c58c166f9fae3b71c0ad9d6741b4897 upstream.
+
+The patchwork automation reported a sparse complaint that
+spin_shadow_stack was not declared and should be static:
+../arch/riscv/kernel/traps.c:335:15: warning: symbol 'spin_shadow_stack' was not declared. Should it be static?
+
+However, this is used in entry.S and therefore shouldn't be static.
+The same applies to the shadow_stack that this pseudo spinlock is
+trying to protect, so do like its charge and add a declaration to
+thread_info.h
+
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Fixes: 7e1864332fbc ("riscv: fix race when vmap stack overflow")
+Reviewed-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230210185945.915806-1-conor@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/thread_info.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/riscv/include/asm/thread_info.h
++++ b/arch/riscv/include/asm/thread_info.h
+@@ -43,6 +43,7 @@
+ #ifndef __ASSEMBLY__
+ 
+ extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
++extern unsigned long spin_shadow_stack;
+ 
+ #include <asm/processor.h>
+ #include <asm/csr.h>
diff --git a/queue-6.2/riscv-avoid-enabling-interrupts-in-die.patch b/queue-6.2/riscv-avoid-enabling-interrupts-in-die.patch

new file mode 100644 (file)

index 0000000..b3dbd43
--- /dev/null
+++ b/queue-6.2/riscv-avoid-enabling-interrupts-in-die.patch
@@ -0,0 +1,55 @@
+From 130aee3fd9981297ff9354e5d5609cd59aafbbea Mon Sep 17 00:00:00 2001
+From: Mattias Nissler <mnissler@rivosinc.com>
+Date: Wed, 15 Feb 2023 14:48:28 +0000
+Subject: riscv: Avoid enabling interrupts in die()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mattias Nissler <mnissler@rivosinc.com>
+
+commit 130aee3fd9981297ff9354e5d5609cd59aafbbea upstream.
+
+While working on something else, I noticed that the kernel would start
+accepting interrupts again after crashing in an interrupt handler. Since
+the kernel is already in inconsistent state, enabling interrupts is
+dangerous and opens up risk of kernel state deteriorating further.
+Interrupts do get enabled via what looks like an unintended side effect of
+spin_unlock_irq, so switch to the more cautious
+spin_lock_irqsave/spin_unlock_irqrestore instead.
+
+Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code")
+Signed-off-by: Mattias Nissler <mnissler@rivosinc.com>
+Reviewed-by: Björn Töpel <bjorn@kernel.org>
+Link: https://lore.kernel.org/r/20230215144828.3370316-1-mnissler@rivosinc.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/kernel/traps.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -34,10 +34,11 @@ void die(struct pt_regs *regs, const cha
+       static int die_counter;
+       int ret;
+       long cause;
++      unsigned long flags;
+ 
+       oops_enter();
+ 
+-      spin_lock_irq(&die_lock);
++      spin_lock_irqsave(&die_lock, flags);
+       console_verbose();
+       bust_spinlocks(1);
+ 
+@@ -54,7 +55,7 @@ void die(struct pt_regs *regs, const cha
+ 
+       bust_spinlocks(0);
+       add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+-      spin_unlock_irq(&die_lock);
++      spin_unlock_irqrestore(&die_lock, flags);
+       oops_exit();
+ 
+       if (in_interrupt())
diff --git a/queue-6.2/riscv-ftrace-fixup-panic-by-disabling-preemption.patch b/queue-6.2/riscv-ftrace-fixup-panic-by-disabling-preemption.patch

new file mode 100644 (file)

index 0000000..14d39d3
--- /dev/null
+++ b/queue-6.2/riscv-ftrace-fixup-panic-by-disabling-preemption.patch
@@ -0,0 +1,52 @@
+From 8547649981e6631328cd64f583667501ae385531 Mon Sep 17 00:00:00 2001
+From: Andy Chiu <andy.chiu@sifive.com>
+Date: Thu, 12 Jan 2023 04:05:57 -0500
+Subject: riscv: ftrace: Fixup panic by disabling preemption
+
+From: Andy Chiu <andy.chiu@sifive.com>
+
+commit 8547649981e6631328cd64f583667501ae385531 upstream.
+
+In RISCV, we must use an AUIPC + JALR pair to encode an immediate,
+forming a jump that jumps to an address over 4K. This may cause errors
+if we want to enable kernel preemption and remove dependency from
+patching code with stop_machine(). For example, if a task was switched
+out on auipc. And, if we changed the ftrace function before it was
+switched back, then it would jump to an address that has updated 11:0
+bits mixing with previous XLEN:12 part.
+
+p: patched area performed by dynamic ftrace
+ftrace_prologue:
+p|      REG_S   ra, -SZREG(sp)
+p|      auipc   ra, 0x? ------------> preempted
+                                       ...
+                               change ftrace function
+                                       ...
+p|      jalr    -?(ra) <------------- switched back
+p|      REG_L   ra, -SZREG(sp)
+func:
+       xxx
+       ret
+
+Fixes: afc76b8b8011 ("riscv: Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")
+Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230112090603.1295340-2-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/Kconfig |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/riscv/Kconfig
++++ b/arch/riscv/Kconfig
+@@ -138,7 +138,7 @@ config RISCV
+       select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+       select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
+       select HAVE_FUNCTION_GRAPH_TRACER
+-      select HAVE_FUNCTION_TRACER if !XIP_KERNEL
++      select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
+ 
+ config ARCH_MMAP_RND_BITS_MIN
+       default 18 if 64BIT
diff --git a/queue-6.2/riscv-ftrace-reduce-the-detour-code-size-to-half.patch b/queue-6.2/riscv-ftrace-reduce-the-detour-code-size-to-half.patch

new file mode 100644 (file)

index 0000000..29f5bf4
--- /dev/null
+++ b/queue-6.2/riscv-ftrace-reduce-the-detour-code-size-to-half.patch
@@ -0,0 +1,436 @@
+From 6724a76cff85ee271bbbff42ac527e4643b2ec52 Mon Sep 17 00:00:00 2001
+From: Guo Ren <guoren@linux.alibaba.com>
+Date: Thu, 12 Jan 2023 04:05:59 -0500
+Subject: riscv: ftrace: Reduce the detour code size to half
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+commit 6724a76cff85ee271bbbff42ac527e4643b2ec52 upstream.
+
+Use a temporary register to reduce the size of detour code from 16 bytes to
+8 bytes. The previous implementation is from 'commit afc76b8b8011 ("riscv:
+Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")'.
+
+Before the patch:
+<func_prolog>:
+ 0: REG_S  ra, -SZREG(sp)
+ 4: auipc  ra, ?
+ 8: jalr   ?(ra)
+12: REG_L  ra, -SZREG(sp)
+ (func_boddy)
+
+After the patch:
+<func_prolog>:
+ 0: auipc  t0, ?
+ 4: jalr   t0, ?(t0)
+ (func_boddy)
+
+This patch not just reduces the size of detour code, but also fixes an
+important issue:
+
+An Ftrace callback registered with FTRACE_OPS_FL_IPMODIFY flag can
+actually change the instruction pointer, e.g. to "replace" the given
+kernel function with a new one, which is needed for livepatching, etc.
+
+In this case, the trampoline (ftrace_regs_caller) would not return to
+<func_prolog+12> but would rather jump to the new function. So, "REG_L
+ra, -SZREG(sp)" would not run and the original return address would not
+be restored. The kernel is likely to hang or crash as a result.
+
+This can be easily demonstrated if one tries to "replace", say,
+cmdline_proc_show() with a new function with the same signature using
+instruction_pointer_set(&fregs->regs, new_func_addr) in the Ftrace
+callback.
+
+Link: https://lore.kernel.org/linux-riscv/20221122075440.1165172-1-suagrfillet@gmail.com/
+Link: https://lore.kernel.org/linux-riscv/d7d5730b-ebef-68e5-5046-e763e1ee6164@yadro.com/
+Co-developed-by: Song Shuai <suagrfillet@gmail.com>
+Signed-off-by: Song Shuai <suagrfillet@gmail.com>
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Cc: Evgenii Shatokhin <e.shatokhin@yadro.com>
+Reviewed-by: Evgenii Shatokhin <e.shatokhin@yadro.com>
+Link: https://lore.kernel.org/r/20230112090603.1295340-4-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Fixes: 10626c32e382 ("riscv/ftrace: Add basic support")
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/Makefile             |    4 +-
+ arch/riscv/include/asm/ftrace.h |   50 +++++++++++++++++++++++-------
+ arch/riscv/kernel/ftrace.c      |   65 +++++++++++-----------------------------
+ arch/riscv/kernel/mcount-dyn.S  |   42 +++++++++----------------
+ 4 files changed, 75 insertions(+), 86 deletions(-)
+
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -12,9 +12,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+       LDFLAGS_vmlinux := --no-relax
+       KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ ifeq ($(CONFIG_RISCV_ISA_C),y)
+-      CC_FLAGS_FTRACE := -fpatchable-function-entry=8
+-else
+       CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++else
++      CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+ endif
+ endif
+ 
+--- a/arch/riscv/include/asm/ftrace.h
++++ b/arch/riscv/include/asm/ftrace.h
+@@ -42,6 +42,14 @@ struct dyn_arch_ftrace {
+  * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to
+  *          return address (original pc + 4)
+  *
++ *<ftrace enable>:
++ * 0: auipc  t0/ra, 0x?
++ * 4: jalr   t0/ra, ?(t0/ra)
++ *
++ *<ftrace disable>:
++ * 0: nop
++ * 4: nop
++ *
+  * Dynamic ftrace generates probes to call sites, so we must deal with
+  * both auipc and jalr at the same time.
+  */
+@@ -52,25 +60,43 @@ struct dyn_arch_ftrace {
+ #define AUIPC_OFFSET_MASK     (0xfffff000)
+ #define AUIPC_PAD             (0x00001000)
+ #define JALR_SHIFT            20
+-#define JALR_BASIC            (0x000080e7)
+-#define AUIPC_BASIC           (0x00000097)
++#define JALR_RA                       (0x000080e7)
++#define AUIPC_RA              (0x00000097)
++#define JALR_T0                       (0x000282e7)
++#define AUIPC_T0              (0x00000297)
+ #define NOP4                  (0x00000013)
+ 
+-#define make_call(caller, callee, call)                                       \
++#define to_jalr_t0(offset)                                            \
++      (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0)
++
++#define to_auipc_t0(offset)                                           \
++      ((offset & JALR_SIGN_MASK) ?                                    \
++      (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_T0) :       \
++      ((offset & AUIPC_OFFSET_MASK) | AUIPC_T0))
++
++#define make_call_t0(caller, callee, call)                            \
+ do {                                                                  \
+-      call[0] = to_auipc_insn((unsigned int)((unsigned long)callee -  \
+-                              (unsigned long)caller));                \
+-      call[1] = to_jalr_insn((unsigned int)((unsigned long)callee -   \
+-                             (unsigned long)caller));                 \
++      unsigned int offset =                                           \
++              (unsigned long) callee - (unsigned long) caller;        \
++      call[0] = to_auipc_t0(offset);                                  \
++      call[1] = to_jalr_t0(offset);                                   \
+ } while (0)
+ 
+-#define to_jalr_insn(offset)                                          \
+-      (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC)
++#define to_jalr_ra(offset)                                            \
++      (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA)
+ 
+-#define to_auipc_insn(offset)                                         \
++#define to_auipc_ra(offset)                                           \
+       ((offset & JALR_SIGN_MASK) ?                                    \
+-      (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) :    \
+-      ((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC))
++      (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) :       \
++      ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA))
++
++#define make_call_ra(caller, callee, call)                            \
++do {                                                                  \
++      unsigned int offset =                                           \
++              (unsigned long) callee - (unsigned long) caller;        \
++      call[0] = to_auipc_ra(offset);                                  \
++      call[1] = to_jalr_ra(offset);                                   \
++} while (0)
+ 
+ /*
+  * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
+--- a/arch/riscv/kernel/ftrace.c
++++ b/arch/riscv/kernel/ftrace.c
+@@ -55,12 +55,15 @@ static int ftrace_check_current_call(uns
+ }
+ 
+ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+-                              bool enable)
++                              bool enable, bool ra)
+ {
+       unsigned int call[2];
+       unsigned int nops[2] = {NOP4, NOP4};
+ 
+-      make_call(hook_pos, target, call);
++      if (ra)
++              make_call_ra(hook_pos, target, call);
++      else
++              make_call_t0(hook_pos, target, call);
+ 
+       /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
+       if (patch_text_nosync
+@@ -70,42 +73,13 @@ static int __ftrace_modify_call(unsigned
+       return 0;
+ }
+ 
+-/*
+- * Put 5 instructions with 16 bytes at the front of function within
+- * patchable function entry nops' area.
+- *
+- * 0: REG_S  ra, -SZREG(sp)
+- * 1: auipc  ra, 0x?
+- * 2: jalr   -?(ra)
+- * 3: REG_L  ra, -SZREG(sp)
+- *
+- * So the opcodes is:
+- * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
+- * 1: 0x???????? -> auipc
+- * 2: 0x???????? -> jalr
+- * 3: 0xff813083 (ld)/0xffc12083 (lw)
+- */
+-#if __riscv_xlen == 64
+-#define INSN0 0xfe113c23
+-#define INSN3 0xff813083
+-#elif __riscv_xlen == 32
+-#define INSN0 0xfe112e23
+-#define INSN3 0xffc12083
+-#endif
+-
+-#define FUNC_ENTRY_SIZE       16
+-#define FUNC_ENTRY_JMP        4
+-
+ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ {
+-      unsigned int call[4] = {INSN0, 0, 0, INSN3};
+-      unsigned long target = addr;
+-      unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++      unsigned int call[2];
+ 
+-      call[1] = to_auipc_insn((unsigned int)(target - caller));
+-      call[2] = to_jalr_insn((unsigned int)(target - caller));
++      make_call_t0(rec->ip, addr, call);
+ 
+-      if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
++      if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE))
+               return -EPERM;
+ 
+       return 0;
+@@ -114,15 +88,14 @@ int ftrace_make_call(struct dyn_ftrace *
+ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+                   unsigned long addr)
+ {
+-      unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
++      unsigned int nops[2] = {NOP4, NOP4};
+ 
+-      if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
++      if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
+               return -EPERM;
+ 
+       return 0;
+ }
+ 
+-
+ /*
+  * This is called early on, and isn't wrapped by
+  * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold
+@@ -144,10 +117,10 @@ int ftrace_init_nop(struct module *mod,
+ int ftrace_update_ftrace_func(ftrace_func_t func)
+ {
+       int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
+-                                     (unsigned long)func, true);
++                                     (unsigned long)func, true, true);
+       if (!ret) {
+               ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call,
+-                                         (unsigned long)func, true);
++                                         (unsigned long)func, true, true);
+       }
+ 
+       return ret;
+@@ -159,16 +132,16 @@ int ftrace_modify_call(struct dyn_ftrace
+                      unsigned long addr)
+ {
+       unsigned int call[2];
+-      unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++      unsigned long caller = rec->ip;
+       int ret;
+ 
+-      make_call(caller, old_addr, call);
++      make_call_t0(caller, old_addr, call);
+       ret = ftrace_check_current_call(caller, call);
+ 
+       if (ret)
+               return ret;
+ 
+-      return __ftrace_modify_call(caller, addr, true);
++      return __ftrace_modify_call(caller, addr, true, false);
+ }
+ #endif
+ 
+@@ -203,12 +176,12 @@ int ftrace_enable_ftrace_graph_caller(vo
+       int ret;
+ 
+       ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+-                                  (unsigned long)&prepare_ftrace_return, true);
++                                  (unsigned long)&prepare_ftrace_return, true, true);
+       if (ret)
+               return ret;
+ 
+       return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+-                                  (unsigned long)&prepare_ftrace_return, true);
++                                  (unsigned long)&prepare_ftrace_return, true, true);
+ }
+ 
+ int ftrace_disable_ftrace_graph_caller(void)
+@@ -216,12 +189,12 @@ int ftrace_disable_ftrace_graph_caller(v
+       int ret;
+ 
+       ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+-                                  (unsigned long)&prepare_ftrace_return, false);
++                                  (unsigned long)&prepare_ftrace_return, false, true);
+       if (ret)
+               return ret;
+ 
+       return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+-                                  (unsigned long)&prepare_ftrace_return, false);
++                                  (unsigned long)&prepare_ftrace_return, false, true);
+ }
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+--- a/arch/riscv/kernel/mcount-dyn.S
++++ b/arch/riscv/kernel/mcount-dyn.S
+@@ -13,8 +13,8 @@
+ 
+       .text
+ 
+-#define FENTRY_RA_OFFSET      12
+-#define ABI_SIZE_ON_STACK     72
++#define FENTRY_RA_OFFSET      8
++#define ABI_SIZE_ON_STACK     80
+ #define ABI_A0                        0
+ #define ABI_A1                        8
+ #define ABI_A2                        16
+@@ -23,10 +23,10 @@
+ #define ABI_A5                        40
+ #define ABI_A6                        48
+ #define ABI_A7                        56
+-#define ABI_RA                        64
++#define ABI_T0                        64
++#define ABI_RA                        72
+ 
+       .macro SAVE_ABI
+-      addi    sp, sp, -SZREG
+       addi    sp, sp, -ABI_SIZE_ON_STACK
+ 
+       REG_S   a0, ABI_A0(sp)
+@@ -37,6 +37,7 @@
+       REG_S   a5, ABI_A5(sp)
+       REG_S   a6, ABI_A6(sp)
+       REG_S   a7, ABI_A7(sp)
++      REG_S   t0, ABI_T0(sp)
+       REG_S   ra, ABI_RA(sp)
+       .endm
+ 
+@@ -49,24 +50,18 @@
+       REG_L   a5, ABI_A5(sp)
+       REG_L   a6, ABI_A6(sp)
+       REG_L   a7, ABI_A7(sp)
++      REG_L   t0, ABI_T0(sp)
+       REG_L   ra, ABI_RA(sp)
+ 
+       addi    sp, sp, ABI_SIZE_ON_STACK
+-      addi    sp, sp, SZREG
+       .endm
+ 
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+       .macro SAVE_ALL
+-      addi    sp, sp, -SZREG
+       addi    sp, sp, -PT_SIZE_ON_STACK
+ 
+-      REG_S x1,  PT_EPC(sp)
+-      addi    sp, sp, PT_SIZE_ON_STACK
+-      REG_L x1,  (sp)
+-      addi    sp, sp, -PT_SIZE_ON_STACK
++      REG_S t0,  PT_EPC(sp)
+       REG_S x1,  PT_RA(sp)
+-      REG_L x1,  PT_EPC(sp)
+-
+       REG_S x2,  PT_SP(sp)
+       REG_S x3,  PT_GP(sp)
+       REG_S x4,  PT_TP(sp)
+@@ -100,15 +95,11 @@
+       .endm
+ 
+       .macro RESTORE_ALL
++      REG_L t0,  PT_EPC(sp)
+       REG_L x1,  PT_RA(sp)
+-      addi    sp, sp, PT_SIZE_ON_STACK
+-      REG_S x1,  (sp)
+-      addi    sp, sp, -PT_SIZE_ON_STACK
+-      REG_L x1,  PT_EPC(sp)
+       REG_L x2,  PT_SP(sp)
+       REG_L x3,  PT_GP(sp)
+       REG_L x4,  PT_TP(sp)
+-      REG_L x5,  PT_T0(sp)
+       REG_L x6,  PT_T1(sp)
+       REG_L x7,  PT_T2(sp)
+       REG_L x8,  PT_S0(sp)
+@@ -137,17 +128,16 @@
+       REG_L x31, PT_T6(sp)
+ 
+       addi    sp, sp, PT_SIZE_ON_STACK
+-      addi    sp, sp, SZREG
+       .endm
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+ 
+ ENTRY(ftrace_caller)
+       SAVE_ABI
+ 
+-      addi    a0, ra, -FENTRY_RA_OFFSET
++      addi    a0, t0, -FENTRY_RA_OFFSET
+       la      a1, function_trace_op
+       REG_L   a2, 0(a1)
+-      REG_L   a1, ABI_SIZE_ON_STACK(sp)
++      mv      a1, ra
+       mv      a3, sp
+ 
+ ftrace_call:
+@@ -155,8 +145,8 @@ ftrace_call:
+       call    ftrace_stub
+ 
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-      addi    a0, sp, ABI_SIZE_ON_STACK
+-      REG_L   a1, ABI_RA(sp)
++      addi    a0, sp, ABI_RA
++      REG_L   a1, ABI_T0(sp)
+       addi    a1, a1, -FENTRY_RA_OFFSET
+ #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+       mv      a2, s0
+@@ -166,17 +156,17 @@ ftrace_graph_call:
+       call    ftrace_stub
+ #endif
+       RESTORE_ABI
+-      ret
++      jr t0
+ ENDPROC(ftrace_caller)
+ 
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ ENTRY(ftrace_regs_caller)
+       SAVE_ALL
+ 
+-      addi    a0, ra, -FENTRY_RA_OFFSET
++      addi    a0, t0, -FENTRY_RA_OFFSET
+       la      a1, function_trace_op
+       REG_L   a2, 0(a1)
+-      REG_L   a1, PT_SIZE_ON_STACK(sp)
++      mv      a1, ra
+       mv      a3, sp
+ 
+ ftrace_regs_call:
+@@ -196,6 +186,6 @@ ftrace_graph_regs_call:
+ #endif
+ 
+       RESTORE_ALL
+-      ret
++      jr t0
+ ENDPROC(ftrace_regs_caller)
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
diff --git a/queue-6.2/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch b/queue-6.2/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch

new file mode 100644 (file)

index 0000000..2578970
--- /dev/null
+++ b/queue-6.2/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch
@@ -0,0 +1,55 @@
+From 409c8fb20c66df7150e592747412438c04aeb11f Mon Sep 17 00:00:00 2001
+From: Guo Ren <guoren@linux.alibaba.com>
+Date: Thu, 12 Jan 2023 04:05:58 -0500
+Subject: riscv: ftrace: Remove wasted nops for !RISCV_ISA_C
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+commit 409c8fb20c66df7150e592747412438c04aeb11f upstream.
+
+When CONFIG_RISCV_ISA_C=n, -fpatchable-function-entry=8 would generate
+more nops than we expect. Because it treat nop opcode as 0x00000013
+instead of 0x0001.
+
+Dump of assembler code for function dw_pcie_free_msi:
+   0xffffffff806fce94 <+0>:     sd      ra,-8(sp)
+   0xffffffff806fce98 <+4>:     auipc   ra,0xff90f
+   0xffffffff806fce9c <+8>:     jalr    -684(ra) # 0xffffffff8000bbec
+<ftrace_caller>
+   0xffffffff806fcea0 <+12>:    ld      ra,-8(sp)
+   0xffffffff806fcea4 <+16>:    nop /* wasted */
+   0xffffffff806fcea8 <+20>:    nop /* wasted */
+   0xffffffff806fceac <+24>:    nop /* wasted */
+   0xffffffff806fceb0 <+28>:    nop /* wasted */
+   0xffffffff806fceb4 <+0>:     addi    sp,sp,-48
+   0xffffffff806fceb8 <+4>:     sd      s0,32(sp)
+   0xffffffff806fcebc <+8>:     sd      s1,24(sp)
+   0xffffffff806fcec0 <+12>:    sd      s2,16(sp)
+   0xffffffff806fcec4 <+16>:    sd      s3,8(sp)
+   0xffffffff806fcec8 <+20>:    sd      ra,40(sp)
+   0xffffffff806fcecc <+24>:    addi    s0,sp,48
+
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230112090603.1295340-3-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/Makefile |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -11,7 +11,11 @@ LDFLAGS_vmlinux :=
+ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+       LDFLAGS_vmlinux := --no-relax
+       KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
++ifeq ($(CONFIG_RISCV_ISA_C),y)
+       CC_FLAGS_FTRACE := -fpatchable-function-entry=8
++else
++      CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++endif
+ endif
+ 
+ ifeq ($(CONFIG_CMODEL_MEDLOW),y)
diff --git a/queue-6.2/riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch b/queue-6.2/riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch

new file mode 100644 (file)

index 0000000..f98f0e0
--- /dev/null
+++ b/queue-6.2/riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch
@@ -0,0 +1,61 @@
+From 9ddfc3cd806081ce1f6c9c2f988cbb031f35d28f Mon Sep 17 00:00:00 2001
+From: Andy Chiu <andy.chiu@sifive.com>
+Date: Mon, 6 Feb 2023 04:04:40 -0500
+Subject: riscv: jump_label: Fixup unaligned arch_static_branch function
+
+From: Andy Chiu <andy.chiu@sifive.com>
+
+commit 9ddfc3cd806081ce1f6c9c2f988cbb031f35d28f upstream.
+
+Runtime code patching must be done at a naturally aligned address, or we
+may execute on a partial instruction.
+
+We have encountered problems traced back to static jump functions during
+the test. We switched the tracer randomly for every 1~5 seconds on a
+dual-core QEMU setup and found the kernel sucking at a static branch
+where it jumps to itself.
+
+The reason is that the static branch was 2-byte but not 4-byte aligned.
+Then, the kernel would patch the instruction, either J or NOP, with two
+half-word stores if the machine does not have efficient unaligned
+accesses. Thus, moments exist where half of the NOP mixes with the other
+half of the J when transitioning the branch. In our particular case, on
+a little-endian machine, the upper half of the NOP was mixed with the
+lower part of the J when enabling the branch, resulting in a jump that
+jumped to itself. Conversely, it would result in a HINT instruction when
+disabling the branch, but it might not be observable.
+
+ARM64 does not have this problem since all instructions must be 4-byte
+aligned.
+
+Fixes: ebc00dde8a97 ("riscv: Add jump-label implementation")
+Link: https://lore.kernel.org/linux-riscv/20220913094252.3555240-6-andy.chiu@sifive.com/
+Reviewed-by: Greentime Hu <greentime.hu@sifive.com>
+Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230206090440.1255001-1-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/jump_label.h |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/riscv/include/asm/jump_label.h
++++ b/arch/riscv/include/asm/jump_label.h
+@@ -18,6 +18,7 @@ static __always_inline bool arch_static_
+                                              const bool branch)
+ {
+       asm_volatile_goto(
++              "       .align          2                       \n\t"
+               "       .option push                            \n\t"
+               "       .option norelax                         \n\t"
+               "       .option norvc                           \n\t"
+@@ -39,6 +40,7 @@ static __always_inline bool arch_static_
+                                                   const bool branch)
+ {
+       asm_volatile_goto(
++              "       .align          2                       \n\t"
+               "       .option push                            \n\t"
+               "       .option norelax                         \n\t"
+               "       .option norvc                           \n\t"
diff --git a/queue-6.2/riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch b/queue-6.2/riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch

new file mode 100644 (file)

index 0000000..edf105e
--- /dev/null
+++ b/queue-6.2/riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch
@@ -0,0 +1,46 @@
+From b49f700668fff7565b945dce823def79bff59bb0 Mon Sep 17 00:00:00 2001
+From: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+Date: Mon, 30 Jan 2023 00:18:18 +0300
+Subject: riscv: mm: fix regression due to update_mmu_cache change
+
+From: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+
+commit b49f700668fff7565b945dce823def79bff59bb0 upstream.
+
+This is a partial revert of the commit 4bd1d80efb5a ("riscv: mm: notify
+remote harts about mmu cache updates"). Original commit included two
+loosely related changes serving the same purpose of fixing stale TLB
+entries causing user-space application crash:
+- introduce deferred per-ASID TLB flush for CPUs not running the task
+- switch to per-ASID TLB flush on all CPUs running the task in update_mmu_cache
+
+According to report and discussion in [1], the second part caused a
+regression on Renesas RZ/Five SoC. For now restore the old behavior
+of the update_mmu_cache.
+
+[1] https://lore.kernel.org/linux-riscv/20220829205219.283543-1-geomatsi@gmail.com/
+
+Fixes: 4bd1d80efb5a ("riscv: mm: notify remote harts about mmu cache updates")
+Reported-by: "Lad, Prabhakar" <prabhakar.csengg@gmail.com>
+Signed-off-by: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+Link: trailer, so that it can be parsed with git's trailer functionality?
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20230129211818.686557-1-geomatsi@gmail.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/pgtable.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/riscv/include/asm/pgtable.h
++++ b/arch/riscv/include/asm/pgtable.h
+@@ -415,7 +415,7 @@ static inline void update_mmu_cache(stru
+        * Relying on flush_tlb_fix_spurious_fault would suffice, but
+        * the extra traps reduce performance.  So, eagerly SFENCE.VMA.
+        */
+-      flush_tlb_page(vma, address);
++      local_flush_tlb_page(address);
+ }
+ 
+ #define __HAVE_ARCH_UPDATE_MMU_TLB
diff --git a/queue-6.2/riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch b/queue-6.2/riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch

new file mode 100644 (file)

index 0000000..2952d09
--- /dev/null
+++ b/queue-6.2/riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch
@@ -0,0 +1,54 @@
+From 416721ff05fddc58ca531b6f069de250301de6e5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn@rivosinc.com>
+Date: Tue, 14 Feb 2023 17:25:15 +0100
+Subject: riscv, mm: Perform BPF exhandler fixup on page fault
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Björn Töpel <bjorn@rivosinc.com>
+
+commit 416721ff05fddc58ca531b6f069de250301de6e5 upstream.
+
+Commit 21855cac82d3 ("riscv/mm: Prevent kernel module to access user
+memory without uaccess routines") added early exits/deaths for page
+faults stemming from accesses to user-space without using proper
+uaccess routines (where sstatus.SUM is set).
+
+Unfortunatly, this is too strict for some BPF programs, which relies
+on BPF exhandler fixups. These BPF programs loads "BTF pointers". A
+BTF pointers could either be a valid kernel pointer or NULL, but not a
+userspace address.
+
+Resolve the problem by calling the fixup handler in the early exit
+path.
+
+Fixes: 21855cac82d3 ("riscv/mm: Prevent kernel module to access user memory without uaccess routines")
+Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
+Link: https://lore.kernel.org/r/20230214162515.184827-1-bjorn@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/mm/fault.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/riscv/mm/fault.c
++++ b/arch/riscv/mm/fault.c
+@@ -267,10 +267,12 @@ asmlinkage void do_page_fault(struct pt_
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+ 
+-      if (!user_mode(regs) && addr < TASK_SIZE &&
+-                      unlikely(!(regs->status & SR_SUM)))
+-              die_kernel_fault("access to user memory without uaccess routines",
+-                              addr, regs);
++      if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) {
++              if (fixup_exception(regs))
++                      return;
++
++              die_kernel_fault("access to user memory without uaccess routines", addr, regs);
++      }
+ 
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+ 
diff --git a/queue-6.2/scsi-ses-don-t-attach-if-enclosure-has-no-components.patch b/queue-6.2/scsi-ses-don-t-attach-if-enclosure-has-no-components.patch

new file mode 100644 (file)

index 0000000..2d1c56f
--- /dev/null
+++ b/queue-6.2/scsi-ses-don-t-attach-if-enclosure-has-no-components.patch
@@ -0,0 +1,41 @@
+From 3fe97ff3d94934649abb0652028dd7296170c8d0 Mon Sep 17 00:00:00 2001
+From: James Bottomley <jejb@linux.ibm.com>
+Date: Sat, 28 Nov 2020 15:27:21 -0800
+Subject: scsi: ses: Don't attach if enclosure has no components
+
+From: James Bottomley <jejb@linux.ibm.com>
+
+commit 3fe97ff3d94934649abb0652028dd7296170c8d0 upstream.
+
+An enclosure with no components can't usefully be operated by the driver
+(since effectively it has nothing to manage), so report the problem and
+don't attach. Not attaching also fixes an oops which could occur if the
+driver tries to manage a zero component enclosure.
+
+[mkp: Switched to KERN_WARNING since this scenario is common]
+
+Link: https://lore.kernel.org/r/c5deac044ac409e32d9ad9968ce0dcbc996bfc7a.camel@linux.ibm.com
+Cc: stable@vger.kernel.org
+Reported-by: Ding Hui <dinghui@sangfor.com.cn>
+Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -704,6 +704,12 @@ static int ses_intf_add(struct device *c
+                   type_ptr[0] == ENCLOSURE_COMPONENT_ARRAY_DEVICE)
+                       components += type_ptr[1];
+       }
++
++      if (components == 0) {
++              sdev_printk(KERN_WARNING, sdev, "enclosure has no enumerated components\n");
++              goto err_free;
++      }
++
+       ses_dev->page1 = buf;
+       ses_dev->page1_len = len;
+       buf = NULL;
diff --git a/queue-6.2/scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch b/queue-6.2/scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch

new file mode 100644 (file)

index 0000000..2f96d15
--- /dev/null
+++ b/queue-6.2/scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch
@@ -0,0 +1,114 @@
+From db95d4df71cb55506425b6e4a5f8d68e3a765b63 Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:49 +0100
+Subject: scsi: ses: Fix possible addl_desc_ptr out-of-bounds accesses
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit db95d4df71cb55506425b6e4a5f8d68e3a765b63 upstream.
+
+Sanitize possible addl_desc_ptr out-of-bounds accesses in
+ses_enclosure_data_process().
+
+Link: https://lore.kernel.org/r/20230202162451.15346-3-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c |   35 ++++++++++++++++++++++++++---------
+ 1 file changed, 26 insertions(+), 9 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -433,8 +433,8 @@ int ses_match_host(struct enclosure_devi
+ }
+ #endif  /*  0  */
+ 
+-static void ses_process_descriptor(struct enclosure_component *ecomp,
+-                                 unsigned char *desc)
++static int ses_process_descriptor(struct enclosure_component *ecomp,
++                                 unsigned char *desc, int max_desc_len)
+ {
+       int eip = desc[0] & 0x10;
+       int invalid = desc[0] & 0x80;
+@@ -445,22 +445,32 @@ static void ses_process_descriptor(struc
+       unsigned char *d;
+ 
+       if (invalid)
+-              return;
++              return 0;
+ 
+       switch (proto) {
+       case SCSI_PROTOCOL_FCP:
+               if (eip) {
++                      if (max_desc_len <= 7)
++                              return 1;
+                       d = desc + 4;
+                       slot = d[3];
+               }
+               break;
+       case SCSI_PROTOCOL_SAS:
++
+               if (eip) {
++                      if (max_desc_len <= 27)
++                              return 1;
+                       d = desc + 4;
+                       slot = d[3];
+                       d = desc + 8;
+-              } else
++              } else {
++                      if (max_desc_len <= 23)
++                              return 1;
+                       d = desc + 4;
++              }
++
++
+               /* only take the phy0 addr */
+               addr = (u64)d[12] << 56 |
+                       (u64)d[13] << 48 |
+@@ -477,6 +487,8 @@ static void ses_process_descriptor(struc
+       }
+       ecomp->slot = slot;
+       scomp->addr = addr;
++
++      return 0;
+ }
+ 
+ struct efd {
+@@ -549,7 +561,7 @@ static void ses_enclosure_data_process(s
+               /* skip past overall descriptor */
+               desc_ptr += len + 4;
+       }
+-      if (ses_dev->page10)
++      if (ses_dev->page10 && ses_dev->page10_len > 9)
+               addl_desc_ptr = ses_dev->page10 + 8;
+       type_ptr = ses_dev->page1_types;
+       components = 0;
+@@ -557,6 +569,7 @@ static void ses_enclosure_data_process(s
+               for (j = 0; j < type_ptr[1]; j++) {
+                       char *name = NULL;
+                       struct enclosure_component *ecomp;
++                      int max_desc_len;
+ 
+                       if (desc_ptr) {
+                               if (desc_ptr >= buf + page7_len) {
+@@ -583,10 +596,14 @@ static void ses_enclosure_data_process(s
+                                       ecomp = &edev->component[components++];
+ 
+                               if (!IS_ERR(ecomp)) {
+-                                      if (addl_desc_ptr)
+-                                              ses_process_descriptor(
+-                                                      ecomp,
+-                                                      addl_desc_ptr);
++                                      if (addl_desc_ptr) {
++                                              max_desc_len = ses_dev->page10_len -
++                                                  (addl_desc_ptr - ses_dev->page10);
++                                              if (ses_process_descriptor(ecomp,
++                                                  addl_desc_ptr,
++                                                  max_desc_len))
++                                                      addl_desc_ptr = NULL;
++                                      }
+                                       if (create)
+                                               enclosure_component_register(
+                                                       ecomp);
diff --git a/queue-6.2/scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch b/queue-6.2/scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch

new file mode 100644 (file)

index 0000000..986e20a
--- /dev/null
+++ b/queue-6.2/scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch
@@ -0,0 +1,48 @@
+From 801ab13d50cf3d26170ee073ea8bb4eececb76ab Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:50 +0100
+Subject: scsi: ses: Fix possible desc_ptr out-of-bounds accesses
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit 801ab13d50cf3d26170ee073ea8bb4eececb76ab upstream.
+
+Sanitize possible desc_ptr out-of-bounds accesses in
+ses_enclosure_data_process().
+
+Link: https://lore.kernel.org/r/20230202162451.15346-4-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -572,15 +572,19 @@ static void ses_enclosure_data_process(s
+                       int max_desc_len;
+ 
+                       if (desc_ptr) {
+-                              if (desc_ptr >= buf + page7_len) {
++                              if (desc_ptr + 3 >= buf + page7_len) {
+                                       desc_ptr = NULL;
+                               } else {
+                                       len = (desc_ptr[2] << 8) + desc_ptr[3];
+                                       desc_ptr += 4;
+-                                      /* Add trailing zero - pushes into
+-                                       * reserved space */
+-                                      desc_ptr[len] = '\0';
+-                                      name = desc_ptr;
++                                      if (desc_ptr + len > buf + page7_len)
++                                              desc_ptr = NULL;
++                                      else {
++                                              /* Add trailing zero - pushes into
++                                               * reserved space */
++                                              desc_ptr[len] = '\0';
++                                              name = desc_ptr;
++                                      }
+                               }
+                       }
+                       if (type_ptr[0] == ENCLOSURE_COMPONENT_DEVICE ||
diff --git a/queue-6.2/scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch b/queue-6.2/scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch

new file mode 100644 (file)

index 0000000..d4cf236
--- /dev/null
+++ b/queue-6.2/scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch
@@ -0,0 +1,43 @@
+From 9b4f5028e493cb353a5c8f5c45073eeea0303abd Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:48 +0100
+Subject: scsi: ses: Fix slab-out-of-bounds in ses_enclosure_data_process()
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit 9b4f5028e493cb353a5c8f5c45073eeea0303abd upstream.
+
+A fix for:
+
+BUG: KASAN: slab-out-of-bounds in ses_enclosure_data_process+0x949/0xe30 [ses]
+Read of size 1 at addr ffff88a1b043a451 by task systemd-udevd/3271
+
+Checking after (and before in next loop) addl_desc_ptr[1] is sufficient, we
+expect the size to be sanitized before first access to addl_desc_ptr[1].
+Make sure we don't walk beyond end of page.
+
+Link: https://lore.kernel.org/r/20230202162451.15346-2-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -603,9 +603,11 @@ static void ses_enclosure_data_process(s
+                            /* these elements are optional */
+                            type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_TARGET_PORT ||
+                            type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT ||
+-                           type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS))
++                           type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS)) {
+                               addl_desc_ptr += addl_desc_ptr[1] + 2;
+-
++                              if (addl_desc_ptr + 1 >= ses_dev->page10 + ses_dev->page10_len)
++                                      addl_desc_ptr = NULL;
++                      }
+               }
+       }
+       kfree(buf);
diff --git a/queue-6.2/scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch b/queue-6.2/scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch

new file mode 100644 (file)

index 0000000..9d22b32
--- /dev/null
+++ b/queue-6.2/scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch
@@ -0,0 +1,38 @@
+From 578797f0c8cbc2e3ec5fc0dab87087b4c7073686 Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:51 +0100
+Subject: scsi: ses: Fix slab-out-of-bounds in ses_intf_remove()
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit 578797f0c8cbc2e3ec5fc0dab87087b4c7073686 upstream.
+
+A fix for:
+
+BUG: KASAN: slab-out-of-bounds in ses_intf_remove+0x23f/0x270 [ses]
+Read of size 8 at addr ffff88a10d32e5d8 by task rmmod/12013
+
+When edev->components is zero, accessing edev->component[0] members is
+wrong.
+
+Link: https://lore.kernel.org/r/20230202162451.15346-5-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -856,7 +856,8 @@ static void ses_intf_remove_enclosure(st
+       kfree(ses_dev->page2);
+       kfree(ses_dev);
+ 
+-      kfree(edev->component[0].scratch);
++      if (edev->components)
++              kfree(edev->component[0].scratch);
+ 
+       put_device(&edev->edev);
+       enclosure_unregister(edev);
diff --git a/queue-6.2/series b/queue-6.2/series

index a1685b0018b61ba16de95e706895e22e0d17179f..e986f9c9b37de242450eff4f4c50614513772246 100644 (file)
--- a/queue-6.2/series
+++ b/queue-6.2/series
@@ -961,3 +961,39 @@ scsi-qla2xxx-fix-dma-api-call-trace-on-nvme-ls-requests.patch
  scsi-qla2xxx-remove-unintended-flag-clearing.patch
  scsi-qla2xxx-fix-erroneous-link-down.patch
  scsi-qla2xxx-remove-increment-of-interface-err-cnt.patch
+scsi-ses-don-t-attach-if-enclosure-has-no-components.patch
+scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch
+scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch
+scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch
+scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch
+risc-v-add-a-spin_shadow_stack-declaration.patch
+riscv-avoid-enabling-interrupts-in-die.patch
+riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch
+riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch
+riscv-ftrace-fixup-panic-by-disabling-preemption.patch
+riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch
+riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch
+riscv-ftrace-reduce-the-detour-code-size-to-half.patch
+mips-dts-ci20-fix-otg-power-gpio.patch
+pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch
+pci-unify-delay-handling-for-reset-and-resume.patch
+pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch
+pci-avoid-flr-for-amd-fch-ahci-adapters.patch
+pci-dpc-await-readiness-of-secondary-bus-after-reset.patch
+bus-mhi-ep-only-send-enotconn-status-if-client-driver-is-available.patch
+bus-mhi-ep-move-chan-lock-to-the-start-of-processing-queued-ch-ring.patch
+bus-mhi-ep-save-channel-state-locally-during-suspend-and-resume.patch
+iommufd-make-sure-to-zero-vfio_iommu_type1_info-before-copying-to-user.patch
+iommufd-do-not-add-the-same-hwpt-to-the-ioas-hwpt_list-twice.patch
+iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch
+iommu-vt-d-fix-pasid-directory-pointer-coherency.patch
+vfio-type1-exclude-mdevs-from-vfio_update_vaddr.patch
+vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch
+vfio-type1-track-locked_vm-per-dma.patch
+vfio-type1-restore-locked_vm.patch
+drm-amd-fix-initialization-for-nbio-7.5.1.patch
+drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch
+drm-radeon-fix-edp-for-single-display-imac11-2.patch
+drm-i915-don-t-use-stolen-memory-for-ring-buffers-with-llc.patch
+drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch
+drm-gud-fix-ubsan-warning.patch
diff --git a/queue-6.2/vfio-type1-exclude-mdevs-from-vfio_update_vaddr.patch b/queue-6.2/vfio-type1-exclude-mdevs-from-vfio_update_vaddr.patch

new file mode 100644 (file)

index 0000000..5e20b80
--- /dev/null
+++ b/queue-6.2/vfio-type1-exclude-mdevs-from-vfio_update_vaddr.patch
@@ -0,0 +1,168 @@
+From ef3a3f6a294ba65fd906a291553935881796f8a5 Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:03 -0800
+Subject: vfio/type1: exclude mdevs from VFIO_UPDATE_VADDR
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit ef3a3f6a294ba65fd906a291553935881796f8a5 upstream.
+
+Disable the VFIO_UPDATE_VADDR capability if mediated devices are present.
+Their kernel threads could be blocked indefinitely by a misbehaving
+userland while trying to pin/unpin pages while vaddrs are being updated.
+
+Do not allow groups to be added to the container while vaddr's are invalid,
+so we never need to block user threads from pinning, and can delete the
+vaddr-waiting code in a subsequent patch.
+
+Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-2-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c |   44 ++++++++++++++++++++++++++++++++++++++--
+ include/uapi/linux/vfio.h       |   15 ++++++++-----
+ 2 files changed, 51 insertions(+), 8 deletions(-)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -861,6 +861,12 @@ static int vfio_iommu_type1_pin_pages(vo
+ 
+       mutex_lock(&iommu->lock);
+ 
++      if (WARN_ONCE(iommu->vaddr_invalid_count,
++                    "vfio_pin_pages not allowed with VFIO_UPDATE_VADDR\n")) {
++              ret = -EBUSY;
++              goto pin_done;
++      }
++
+       /*
+        * Wait for all necessary vaddr's to be valid so they can be used in
+        * the main loop without dropping the lock, to avoid racing vs unmap.
+@@ -1343,6 +1349,12 @@ static int vfio_dma_do_unmap(struct vfio
+ 
+       mutex_lock(&iommu->lock);
+ 
++      /* Cannot update vaddr if mdev is present. */
++      if (invalidate_vaddr && !list_empty(&iommu->emulated_iommu_groups)) {
++              ret = -EBUSY;
++              goto unlock;
++      }
++
+       pgshift = __ffs(iommu->pgsize_bitmap);
+       pgsize = (size_t)1 << pgshift;
+ 
+@@ -2194,11 +2206,16 @@ static int vfio_iommu_type1_attach_group
+       struct iommu_domain_geometry *geo;
+       LIST_HEAD(iova_copy);
+       LIST_HEAD(group_resv_regions);
+-      int ret = -EINVAL;
++      int ret = -EBUSY;
+ 
+       mutex_lock(&iommu->lock);
+ 
++      /* Attach could require pinning, so disallow while vaddr is invalid. */
++      if (iommu->vaddr_invalid_count)
++              goto out_unlock;
++
+       /* Check for duplicates */
++      ret = -EINVAL;
+       if (vfio_iommu_find_iommu_group(iommu, iommu_group))
+               goto out_unlock;
+ 
+@@ -2669,6 +2686,16 @@ static int vfio_domains_have_enforce_cac
+       return ret;
+ }
+ 
++static bool vfio_iommu_has_emulated(struct vfio_iommu *iommu)
++{
++      bool ret;
++
++      mutex_lock(&iommu->lock);
++      ret = !list_empty(&iommu->emulated_iommu_groups);
++      mutex_unlock(&iommu->lock);
++      return ret;
++}
++
+ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu,
+                                           unsigned long arg)
+ {
+@@ -2677,8 +2704,13 @@ static int vfio_iommu_type1_check_extens
+       case VFIO_TYPE1v2_IOMMU:
+       case VFIO_TYPE1_NESTING_IOMMU:
+       case VFIO_UNMAP_ALL:
+-      case VFIO_UPDATE_VADDR:
+               return 1;
++      case VFIO_UPDATE_VADDR:
++              /*
++               * Disable this feature if mdevs are present.  They cannot
++               * safely pin/unpin/rw while vaddrs are being updated.
++               */
++              return iommu && !vfio_iommu_has_emulated(iommu);
+       case VFIO_DMA_CC_IOMMU:
+               if (!iommu)
+                       return 0;
+@@ -3147,6 +3179,13 @@ static int vfio_iommu_type1_dma_rw(void
+       size_t done;
+ 
+       mutex_lock(&iommu->lock);
++
++      if (WARN_ONCE(iommu->vaddr_invalid_count,
++                    "vfio_dma_rw not allowed with VFIO_UPDATE_VADDR\n")) {
++              ret = -EBUSY;
++              goto out;
++      }
++
+       while (count > 0) {
+               ret = vfio_iommu_type1_dma_rw_chunk(iommu, user_iova, data,
+                                                   count, write, &done);
+@@ -3158,6 +3197,7 @@ static int vfio_iommu_type1_dma_rw(void
+               user_iova += done;
+       }
+ 
++out:
+       mutex_unlock(&iommu->lock);
+       return ret;
+ }
+--- a/include/uapi/linux/vfio.h
++++ b/include/uapi/linux/vfio.h
+@@ -49,7 +49,11 @@
+ /* Supports VFIO_DMA_UNMAP_FLAG_ALL */
+ #define VFIO_UNMAP_ALL                        9
+ 
+-/* Supports the vaddr flag for DMA map and unmap */
++/*
++ * Supports the vaddr flag for DMA map and unmap.  Not supported for mediated
++ * devices, so this capability is subject to change as groups are added or
++ * removed.
++ */
+ #define VFIO_UPDATE_VADDR             10
+ 
+ /*
+@@ -1343,8 +1347,7 @@ struct vfio_iommu_type1_info_dma_avail {
+  * Map process virtual addresses to IO virtual addresses using the
+  * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+  *
+- * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and
+- * unblock translation of host virtual addresses in the iova range.  The vaddr
++ * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova. The vaddr
+  * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR.  To
+  * maintain memory consistency within the user application, the updated vaddr
+  * must address the same memory object as originally mapped.  Failure to do so
+@@ -1395,9 +1398,9 @@ struct vfio_bitmap {
+  * must be 0.  This cannot be combined with the get-dirty-bitmap flag.
+  *
+  * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host
+- * virtual addresses in the iova range.  Tasks that attempt to translate an
+- * iova's vaddr will block.  DMA to already-mapped pages continues.  This
+- * cannot be combined with the get-dirty-bitmap flag.
++ * virtual addresses in the iova range.  DMA to already-mapped pages continues.
++ * Groups may not be added to the container while any addresses are invalid.
++ * This cannot be combined with the get-dirty-bitmap flag.
+  */
+ struct vfio_iommu_type1_dma_unmap {
+       __u32   argsz;
diff --git a/queue-6.2/vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch b/queue-6.2/vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch

new file mode 100644 (file)

index 0000000..bef8bac
--- /dev/null
+++ b/queue-6.2/vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch
@@ -0,0 +1,129 @@
+From 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:04 -0800
+Subject: vfio/type1: prevent underflow of locked_vm via exec()
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 upstream.
+
+When a vfio container is preserved across exec, the task does not change,
+but it gets a new mm with locked_vm=0, and loses the count from existing
+dma mappings.  If the user later unmaps a dma mapping, locked_vm underflows
+to a large unsigned value, and a subsequent dma map request fails with
+ENOMEM in __account_locked_vm.
+
+To avoid underflow, grab and save the mm at the time a dma is mapped.
+Use that mm when adjusting locked_vm, rather than re-acquiring the saved
+task's mm, which may have changed.  If the saved mm is dead, do nothing.
+
+locked_vm is incremented for existing mappings in a subsequent patch.
+
+Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-3-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c |   41 +++++++++++++---------------------------
+ 1 file changed, 14 insertions(+), 27 deletions(-)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -100,6 +100,7 @@ struct vfio_dma {
+       struct task_struct      *task;
+       struct rb_root          pfn_list;       /* Ex-user pinned pfn list */
+       unsigned long           *bitmap;
++      struct mm_struct        *mm;
+ };
+ 
+ struct vfio_batch {
+@@ -420,8 +421,8 @@ static int vfio_lock_acct(struct vfio_dm
+       if (!npage)
+               return 0;
+ 
+-      mm = async ? get_task_mm(dma->task) : dma->task->mm;
+-      if (!mm)
++      mm = dma->mm;
++      if (async && !mmget_not_zero(mm))
+               return -ESRCH; /* process exited */
+ 
+       ret = mmap_write_lock_killable(mm);
+@@ -794,8 +795,8 @@ static int vfio_pin_page_external(struct
+       struct mm_struct *mm;
+       int ret;
+ 
+-      mm = get_task_mm(dma->task);
+-      if (!mm)
++      mm = dma->mm;
++      if (!mmget_not_zero(mm))
+               return -ENODEV;
+ 
+       ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
+@@ -805,7 +806,7 @@ static int vfio_pin_page_external(struct
+       ret = 0;
+ 
+       if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
+-              ret = vfio_lock_acct(dma, 1, true);
++              ret = vfio_lock_acct(dma, 1, false);
+               if (ret) {
+                       put_pfn(*pfn_base, dma->prot);
+                       if (ret == -ENOMEM)
+@@ -1180,6 +1181,7 @@ static void vfio_remove_dma(struct vfio_
+       vfio_unmap_unpin(iommu, dma, true);
+       vfio_unlink_dma(iommu, dma);
+       put_task_struct(dma->task);
++      mmdrop(dma->mm);
+       vfio_dma_bitmap_free(dma);
+       if (dma->vaddr_invalid) {
+               iommu->vaddr_invalid_count--;
+@@ -1664,29 +1666,15 @@ static int vfio_dma_do_map(struct vfio_i
+        * against the locked memory limit and we need to be able to do both
+        * outside of this call path as pinning can be asynchronous via the
+        * external interfaces for mdev devices.  RLIMIT_MEMLOCK requires a
+-       * task_struct and VM locked pages requires an mm_struct, however
+-       * holding an indefinite mm reference is not recommended, therefore we
+-       * only hold a reference to a task.  We could hold a reference to
+-       * current, however QEMU uses this call path through vCPU threads,
+-       * which can be killed resulting in a NULL mm and failure in the unmap
+-       * path when called via a different thread.  Avoid this problem by
+-       * using the group_leader as threads within the same group require
+-       * both CLONE_THREAD and CLONE_VM and will therefore use the same
+-       * mm_struct.
+-       *
+-       * Previously we also used the task for testing CAP_IPC_LOCK at the
+-       * time of pinning and accounting, however has_capability() makes use
+-       * of real_cred, a copy-on-write field, so we can't guarantee that it
+-       * matches group_leader, or in fact that it might not change by the
+-       * time it's evaluated.  If a process were to call MAP_DMA with
+-       * CAP_IPC_LOCK but later drop it, it doesn't make sense that they
+-       * possibly see different results for an iommu_mapped vfio_dma vs
+-       * externally mapped.  Therefore track CAP_IPC_LOCK in vfio_dma at the
+-       * time of calling MAP_DMA.
++       * task_struct. Save the group_leader so that all DMA tracking uses
++       * the same task, to make debugging easier.  VM locked pages requires
++       * an mm_struct, so grab the mm in case the task dies.
+        */
+       get_task_struct(current->group_leader);
+       dma->task = current->group_leader;
+       dma->lock_cap = capable(CAP_IPC_LOCK);
++      dma->mm = current->mm;
++      mmgrab(dma->mm);
+ 
+       dma->pfn_list = RB_ROOT;
+ 
+@@ -3131,9 +3119,8 @@ static int vfio_iommu_type1_dma_rw_chunk
+                       !(dma->prot & IOMMU_READ))
+               return -EPERM;
+ 
+-      mm = get_task_mm(dma->task);
+-
+-      if (!mm)
++      mm = dma->mm;
++      if (!mmget_not_zero(mm))
+               return -EPERM;
+ 
+       if (kthread)
diff --git a/queue-6.2/vfio-type1-restore-locked_vm.patch b/queue-6.2/vfio-type1-restore-locked_vm.patch

new file mode 100644 (file)

index 0000000..dd78739
--- /dev/null
+++ b/queue-6.2/vfio-type1-restore-locked_vm.patch
@@ -0,0 +1,82 @@
+From 90fdd158a695d70403163f9a0e4efc5b20f3fd3e Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:06 -0800
+Subject: vfio/type1: restore locked_vm
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit 90fdd158a695d70403163f9a0e4efc5b20f3fd3e upstream.
+
+When a vfio container is preserved across exec or fork-exec, the new
+task's mm has a locked_vm count of 0.  After a dma vaddr is updated using
+VFIO_DMA_MAP_FLAG_VADDR, locked_vm remains 0, and the pinned memory does
+not count against the task's RLIMIT_MEMLOCK.
+
+To restore the correct locked_vm count, when VFIO_DMA_MAP_FLAG_VADDR is
+used and the dma's mm has changed, add the dma's locked_vm count to
+the new mm->locked_vm, subject to the rlimit, and subtract it from the
+old mm->locked_vm.
+
+Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-5-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c |   35 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -1591,6 +1591,38 @@ static bool vfio_iommu_iova_dma_valid(st
+       return list_empty(iova);
+ }
+ 
++static int vfio_change_dma_owner(struct vfio_dma *dma)
++{
++      struct task_struct *task = current->group_leader;
++      struct mm_struct *mm = current->mm;
++      long npage = dma->locked_vm;
++      bool lock_cap;
++      int ret;
++
++      if (mm == dma->mm)
++              return 0;
++
++      lock_cap = capable(CAP_IPC_LOCK);
++      ret = mm_lock_acct(task, mm, lock_cap, npage);
++      if (ret)
++              return ret;
++
++      if (mmget_not_zero(dma->mm)) {
++              mm_lock_acct(dma->task, dma->mm, dma->lock_cap, -npage);
++              mmput(dma->mm);
++      }
++
++      if (dma->task != task) {
++              put_task_struct(dma->task);
++              dma->task = get_task_struct(task);
++      }
++      mmdrop(dma->mm);
++      dma->mm = mm;
++      mmgrab(dma->mm);
++      dma->lock_cap = lock_cap;
++      return 0;
++}
++
+ static int vfio_dma_do_map(struct vfio_iommu *iommu,
+                          struct vfio_iommu_type1_dma_map *map)
+ {
+@@ -1640,6 +1672,9 @@ static int vfio_dma_do_map(struct vfio_i
+                          dma->size != size) {
+                       ret = -EINVAL;
+               } else {
++                      ret = vfio_change_dma_owner(dma);
++                      if (ret)
++                              goto out_unlock;
+                       dma->vaddr = vaddr;
+                       dma->vaddr_invalid = false;
+                       iommu->vaddr_invalid_count--;
diff --git a/queue-6.2/vfio-type1-track-locked_vm-per-dma.patch b/queue-6.2/vfio-type1-track-locked_vm-per-dma.patch

new file mode 100644 (file)

index 0000000..5b80743
--- /dev/null
+++ b/queue-6.2/vfio-type1-track-locked_vm-per-dma.patch
@@ -0,0 +1,70 @@
+From 18e292705ba21cc9b3227b9ad5b1c28973605ee5 Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:05 -0800
+Subject: vfio/type1: track locked_vm per dma
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit 18e292705ba21cc9b3227b9ad5b1c28973605ee5 upstream.
+
+Track locked_vm per dma struct, and create a new subroutine, both for use
+in a subsequent patch.  No functional change.
+
+Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-4-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c |   23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -101,6 +101,7 @@ struct vfio_dma {
+       struct rb_root          pfn_list;       /* Ex-user pinned pfn list */
+       unsigned long           *bitmap;
+       struct mm_struct        *mm;
++      size_t                  locked_vm;
+ };
+ 
+ struct vfio_batch {
+@@ -413,6 +414,19 @@ static int vfio_iova_put_vfio_pfn(struct
+       return ret;
+ }
+ 
++static int mm_lock_acct(struct task_struct *task, struct mm_struct *mm,
++                      bool lock_cap, long npage)
++{
++      int ret = mmap_write_lock_killable(mm);
++
++      if (ret)
++              return ret;
++
++      ret = __account_locked_vm(mm, abs(npage), npage > 0, task, lock_cap);
++      mmap_write_unlock(mm);
++      return ret;
++}
++
+ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
+ {
+       struct mm_struct *mm;
+@@ -425,12 +439,9 @@ static int vfio_lock_acct(struct vfio_dm
+       if (async && !mmget_not_zero(mm))
+               return -ESRCH; /* process exited */
+ 
+-      ret = mmap_write_lock_killable(mm);
+-      if (!ret) {
+-              ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task,
+-                                        dma->lock_cap);
+-              mmap_write_unlock(mm);
+-      }
++      ret = mm_lock_acct(dma->task, mm, dma->lock_cap, npage);
++      if (!ret)
++              dma->locked_vm += npage;
+ 
+       if (async)
+               mmput(mm);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 7 Mar 2023 16:20:53 +0000 (17:20 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 7 Mar 2023 16:20:53 +0000 (17:20 +0100)
queue-6.2/bus-mhi-ep-move-chan-lock-to-the-start-of-processing-queued-ch-ring.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/bus-mhi-ep-only-send-enotconn-status-if-client-driver-is-available.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/bus-mhi-ep-save-channel-state-locally-during-suspend-and-resume.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/drm-amd-fix-initialization-for-nbio-7.5.1.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/drm-gud-fix-ubsan-warning.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/drm-i915-don-t-use-stolen-memory-for-ring-buffers-with-llc.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/drm-radeon-fix-edp-for-single-display-imac11-2.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/iommufd-do-not-add-the-same-hwpt-to-the-ioas-hwpt_list-twice.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/iommufd-make-sure-to-zero-vfio_iommu_type1_info-before-copying-to-user.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/mips-dts-ci20-fix-otg-power-gpio.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/pci-avoid-flr-for-amd-fch-ahci-adapters.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/pci-dpc-await-readiness-of-secondary-bus-after-reset.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/pci-unify-delay-handling-for-reset-and-resume.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/risc-v-add-a-spin_shadow_stack-declaration.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/riscv-avoid-enabling-interrupts-in-die.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/riscv-ftrace-fixup-panic-by-disabling-preemption.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/riscv-ftrace-reduce-the-detour-code-size-to-half.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/scsi-ses-don-t-attach-if-enclosure-has-no-components.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/series		patch \| blob \| blame \| history
queue-6.2/vfio-type1-exclude-mdevs-from-vfio_update_vaddr.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/vfio-type1-restore-locked_vm.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/vfio-type1-track-locked_vm-per-dma.patch	[new file with mode: 0644]	patch \| blob