--- /dev/null
+From 85636167e3206c3fbd52254fc432991cc4e90194 Mon Sep 17 00:00:00 2001
+From: John Harrison <John.C.Harrison@Intel.com>
+Date: Wed, 15 Feb 2023 17:11:01 -0800
+Subject: drm/i915: Don't use BAR mappings for ring buffers with LLC
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: John Harrison <John.C.Harrison@Intel.com>
+
+commit 85636167e3206c3fbd52254fc432991cc4e90194 upstream.
+
+Direction from hardware is that ring buffers should never be mapped
+via the BAR on systems with LLC. There are too many caching pitfalls
+due to the way BAR accesses are routed. So it is safest to just not
+use it.
+
+Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
+Fixes: 9d80841ea4c9 ("drm/i915: Allow ringbuffers to be bound anywhere")
+Cc: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Jani Nikula <jani.nikula@linux.intel.com>
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
+Cc: intel-gfx@lists.freedesktop.org
+Cc: <stable@vger.kernel.org> # v4.9+
+Tested-by: Jouni Högander <jouni.hogander@intel.com>
+Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230216011101.1909009-3-John.C.Harrison@Intel.com
+(cherry picked from commit 65c08339db1ada87afd6cfe7db8e60bb4851d919)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gt/intel_ring.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/i915/gt/intel_ring.c
++++ b/drivers/gpu/drm/i915/gt/intel_ring.c
+@@ -51,7 +51,7 @@ int intel_ring_pin(struct intel_ring *ri
+ if (unlikely(ret))
+ goto err_unpin;
+
+- if (i915_vma_is_map_and_fenceable(vma)) {
++ if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) {
+ addr = (void __force *)i915_vma_pin_iomap(vma);
+ } else {
+ int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false);
+@@ -96,7 +96,7 @@ void intel_ring_unpin(struct intel_ring
+ return;
+
+ i915_vma_unset_ggtt_write(vma);
+- if (i915_vma_is_map_and_fenceable(vma))
++ if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915))
+ i915_vma_unpin_iomap(vma);
+ else
+ i915_gem_object_unpin_map(vma->obj);
--- /dev/null
+From 5e438bf7f9a1705ebcae5fa89cdbfbc6932a7871 Mon Sep 17 00:00:00 2001
+From: Mavroudis Chatzilaridis <mavchatz@protonmail.com>
+Date: Wed, 1 Feb 2023 18:51:25 +0000
+Subject: drm/i915/quirks: Add inverted backlight quirk for HP 14-r206nv
+
+From: Mavroudis Chatzilaridis <mavchatz@protonmail.com>
+
+commit 5e438bf7f9a1705ebcae5fa89cdbfbc6932a7871 upstream.
+
+This laptop uses inverted backlight PWM. Thus, without this quirk,
+backlight brightness decreases as the brightness value increases and
+vice versa.
+
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8013
+Cc: stable@vger.kernel.org
+Signed-off-by: Mavroudis Chatzilaridis <mavchatz@protonmail.com>
+Reviewed-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230201184947.8835-1-mavchatz@protonmail.com
+(cherry picked from commit 83e7d6fd330d413cb2064e680ffea91b0512a520)
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/intel_quirks.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/gpu/drm/i915/display/intel_quirks.c
++++ b/drivers/gpu/drm/i915/display/intel_quirks.c
+@@ -193,6 +193,8 @@ static struct intel_quirk intel_quirks[]
+ /* ECS Liva Q2 */
+ { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
+ { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
++ /* HP Notebook - 14-r206nv */
++ { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness },
+ };
+
+ void intel_init_quirks(struct drm_i915_private *i915)
--- /dev/null
+From 05eacc198c68cbb35a7281ce4011f8899ee1cfb8 Mon Sep 17 00:00:00 2001
+From: Mark Hawrylak <mark.hawrylak@gmail.com>
+Date: Sun, 19 Feb 2023 16:02:00 +1100
+Subject: drm/radeon: Fix eDP for single-display iMac11,2
+
+From: Mark Hawrylak <mark.hawrylak@gmail.com>
+
+commit 05eacc198c68cbb35a7281ce4011f8899ee1cfb8 upstream.
+
+Apple iMac11,2 (mid 2010) also with Radeon HD-4670 that has the same
+issue as iMac10,1 (late 2009) where the internal eDP panel stays dark on
+driver load. This patch treats iMac11,2 the same as iMac10,1,
+so the eDP panel stays active.
+
+Additional steps:
+Kernel boot parameter radeon.nomodeset=0 required to keep the eDP
+panel active.
+
+This patch is an extension of
+commit 564d8a2cf3ab ("drm/radeon: Fix eDP for single-display iMac10,1 (v2)")
+Link: https://lore.kernel.org/all/lsq.1507553064.833262317@decadent.org.uk/
+Signed-off-by: Mark Hawrylak <mark.hawrylak@gmail.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/radeon/atombios_encoders.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/radeon/atombios_encoders.c
++++ b/drivers/gpu/drm/radeon/atombios_encoders.c
+@@ -2188,11 +2188,12 @@ int radeon_atom_pick_dig_encoder(struct
+
+ /*
+ * On DCE32 any encoder can drive any block so usually just use crtc id,
+- * but Apple thinks different at least on iMac10,1, so there use linkb,
++ * but Apple thinks different at least on iMac10,1 and iMac11,2, so there use linkb,
+ * otherwise the internal eDP panel will stay dark.
+ */
+ if (ASIC_IS_DCE32(rdev)) {
+- if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1"))
++ if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1") ||
++ dmi_match(DMI_PRODUCT_NAME, "iMac11,2"))
+ enc_idx = (dig->linkb) ? 1 : 0;
+ else
+ enc_idx = radeon_crtc->crtc_id;
--- /dev/null
+From 16a75bbe480c3598b3af57a2504ea89b1e32c3ac Mon Sep 17 00:00:00 2001
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Date: Thu, 16 Feb 2023 21:08:14 +0800
+Subject: iommu/vt-d: Avoid superfluous IOTLB tracking in lazy mode
+
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+commit 16a75bbe480c3598b3af57a2504ea89b1e32c3ac upstream.
+
+Intel IOMMU driver implements IOTLB flush queue with domain selective
+or PASID selective invalidations. In this case there's no need to track
+IOVA page range and sync IOTLBs, which may cause significant performance
+hit.
+
+This patch adds a check to avoid IOVA gather page and IOTLB sync for
+the lazy path.
+
+The performance difference on Sapphire Rapids 100Gb NIC is improved by
+the following (as measured by iperf send):
+
+w/o this fix~48 Gbits/s. with this fix ~54 Gbits/s
+
+Cc: <stable@vger.kernel.org>
+Fixes: 2a2b8eaa5b25 ("iommu: Handle freelists when using deferred flushing in iommu drivers")
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Tested-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
+Signed-off-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
+Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Link: https://lore.kernel.org/r/20230209175330.1783556-1-jacob.jun.pan@linux.intel.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/intel/iommu.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -5140,7 +5140,12 @@ static size_t intel_iommu_unmap(struct i
+ if (dmar_domain->max_addr == iova + size)
+ dmar_domain->max_addr = iova;
+
+- iommu_iotlb_gather_add_page(domain, gather, iova, size);
++ /*
++ * We do not use page-selective IOTLB invalidation in flush queue,
++ * so there is no need to track page and sync iotlb.
++ */
++ if (!iommu_iotlb_gather_queued(gather))
++ iommu_iotlb_gather_add_page(domain, gather, iova, size);
+
+ return size;
+ }
--- /dev/null
+From 0cb4228f6cc9ed0ca2be0d9ddf29168a8e3a3905 Mon Sep 17 00:00:00 2001
+From: "H. Nikolaus Schaller" <hns@goldelico.com>
+Date: Sun, 29 Jan 2023 19:57:04 +0100
+Subject: MIPS: DTS: CI20: fix otg power gpio
+
+From: H. Nikolaus Schaller <hns@goldelico.com>
+
+commit 0cb4228f6cc9ed0ca2be0d9ddf29168a8e3a3905 upstream.
+
+According to schematics it is PF15 and not PF14 (MIC_SW_EN).
+Seems as if it was hidden and not noticed during testing since
+there is no sound DT node.
+
+Fixes: 158c774d3c64 ("MIPS: Ingenic: Add missing nodes for Ingenic SoCs and boards.")
+Cc: stable@vger.kernel.org
+Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
+Acked-by: Paul Cercueil <paul@crapouillou.net>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/boot/dts/ingenic/ci20.dts | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/mips/boot/dts/ingenic/ci20.dts
++++ b/arch/mips/boot/dts/ingenic/ci20.dts
+@@ -99,7 +99,7 @@
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+
+- gpio = <&gpf 14 GPIO_ACTIVE_LOW>;
++ gpio = <&gpf 15 GPIO_ACTIVE_LOW>;
+ enable-active-high;
+ };
+ };
--- /dev/null
+From 63ba51db24ed1b8f8088a897290eb6c036c5435d Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Date: Sat, 28 Jan 2023 10:39:51 +0900
+Subject: PCI: Avoid FLR for AMD FCH AHCI adapters
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+commit 63ba51db24ed1b8f8088a897290eb6c036c5435d upstream.
+
+PCI passthrough to VMs does not work with AMD FCH AHCI adapters: the guest
+OS fails to correctly probe devices attached to the controller due to FIS
+communication failures:
+
+ ata4: softreset failed (1st FIS failed)
+ ...
+ ata4.00: qc timeout after 5000 msecs (cmd 0xec)
+ ata4.00: failed to IDENTIFY (I/O error, err_mask=0x4)
+
+Forcing the "bus" reset method before unbinding & binding the adapter to
+the vfio-pci driver solves this issue, e.g.:
+
+ echo "bus" > /sys/bus/pci/devices/<ID>/reset_method
+
+gives a working guest OS, indicating that the default FLR reset method
+doesn't work correctly.
+
+Apply quirk_no_flr() to AMD FCH AHCI devices to work around this issue.
+
+Link: https://lore.kernel.org/r/20230128013951.523247-1-damien.lemoal@opensource.wdc.com
+Reported-by: Niklas Cassel <niklas.cassel@wdc.com>
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/quirks.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -5328,6 +5328,7 @@ static void quirk_no_flr(struct pci_dev
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1487, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x148c, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x149c, quirk_no_flr);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x7901, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_no_flr);
+
--- /dev/null
+From 74ff8864cc842be994853095dba6db48e716400a Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Fri, 20 Jan 2023 10:19:02 +0100
+Subject: PCI: hotplug: Allow marking devices as disconnected during bind/unbind
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 74ff8864cc842be994853095dba6db48e716400a upstream.
+
+On surprise removal, pciehp_unconfigure_device() and acpiphp's
+trim_stale_devices() call pci_dev_set_disconnected() to mark removed
+devices as permanently offline. Thereby, the PCI core and drivers know
+to skip device accesses.
+
+However pci_dev_set_disconnected() takes the device_lock and thus waits for
+a concurrent driver bind or unbind to complete. As a result, the driver's
+->probe and ->remove hooks have no chance to learn that the device is gone.
+
+That doesn't make any sense, so drop the device_lock and instead use atomic
+xchg() and cmpxchg() operations to update the device state.
+
+As a byproduct, an AB-BA deadlock reported by Anatoli is fixed which occurs
+on surprise removal with AER concurrently performing a bus reset.
+
+AER bus reset:
+
+ INFO: task irq/26-aerdrv:95 blocked for more than 120 seconds.
+ Tainted: G W 6.2.0-rc3-custom-norework-jan11+
+ schedule
+ rwsem_down_write_slowpath
+ down_write_nested
+ pciehp_reset_slot # acquires reset_lock
+ pci_reset_hotplug_slot
+ pci_slot_reset # acquires device_lock
+ pci_bus_error_reset
+ aer_root_reset
+ pcie_do_recovery
+ aer_process_err_devices
+ aer_isr
+
+pciehp surprise removal:
+
+ INFO: task irq/26-pciehp:96 blocked for more than 120 seconds.
+ Tainted: G W 6.2.0-rc3-custom-norework-jan11+
+ schedule_preempt_disabled
+ __mutex_lock
+ mutex_lock_nested
+ pci_dev_set_disconnected # acquires device_lock
+ pci_walk_bus
+ pciehp_unconfigure_device
+ pciehp_disable_slot
+ pciehp_handle_presence_or_link_change
+ pciehp_ist # acquires reset_lock
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=215590
+Fixes: a6bd101b8f84 ("PCI: Unify device inaccessible")
+Link: https://lore.kernel.org/r/3dc88ea82bdc0e37d9000e413d5ebce481cbd629.1674205689.git.lukas@wunner.de
+Reported-by: Anatoli Antonovitch <anatoli.antonovitch@amd.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org # v4.20+
+Cc: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.h | 43 +++++++++++++------------------------------
+ 1 file changed, 13 insertions(+), 30 deletions(-)
+
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -347,53 +347,36 @@ struct pci_sriov {
+ * @dev: PCI device to set new error_state
+ * @new: the state we want dev to be in
+ *
+- * Must be called with device_lock held.
++ * If the device is experiencing perm_failure, it has to remain in that state.
++ * Any other transition is allowed.
+ *
+ * Returns true if state has been changed to the requested state.
+ */
+ static inline bool pci_dev_set_io_state(struct pci_dev *dev,
+ pci_channel_state_t new)
+ {
+- bool changed = false;
++ pci_channel_state_t old;
+
+- device_lock_assert(&dev->dev);
+ switch (new) {
+ case pci_channel_io_perm_failure:
+- switch (dev->error_state) {
+- case pci_channel_io_frozen:
+- case pci_channel_io_normal:
+- case pci_channel_io_perm_failure:
+- changed = true;
+- break;
+- }
+- break;
++ xchg(&dev->error_state, pci_channel_io_perm_failure);
++ return true;
+ case pci_channel_io_frozen:
+- switch (dev->error_state) {
+- case pci_channel_io_frozen:
+- case pci_channel_io_normal:
+- changed = true;
+- break;
+- }
+- break;
++ old = cmpxchg(&dev->error_state, pci_channel_io_normal,
++ pci_channel_io_frozen);
++ return old != pci_channel_io_perm_failure;
+ case pci_channel_io_normal:
+- switch (dev->error_state) {
+- case pci_channel_io_frozen:
+- case pci_channel_io_normal:
+- changed = true;
+- break;
+- }
+- break;
++ old = cmpxchg(&dev->error_state, pci_channel_io_frozen,
++ pci_channel_io_normal);
++ return old != pci_channel_io_perm_failure;
++ default:
++ return false;
+ }
+- if (changed)
+- dev->error_state = new;
+- return changed;
+ }
+
+ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
+ {
+- device_lock(&dev->dev);
+ pci_dev_set_io_state(dev, pci_channel_io_perm_failure);
+- device_unlock(&dev->dev);
+
+ return 0;
+ }
--- /dev/null
+From 8ef0217227b42e2c34a18de316cee3da16c9bf1e Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Sun, 15 Jan 2023 09:20:31 +0100
+Subject: PCI/PM: Observe reset delay irrespective of bridge_d3
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 8ef0217227b42e2c34a18de316cee3da16c9bf1e upstream.
+
+If a PCI bridge is suspended to D3cold upon entering system sleep,
+resuming it entails a Fundamental Reset per PCIe r6.0 sec 5.8.
+
+The delay prescribed after a Fundamental Reset in PCIe r6.0 sec 6.6.1
+is sought to be observed by:
+
+ pci_pm_resume_noirq()
+ pci_pm_bridge_power_up_actions()
+ pci_bridge_wait_for_secondary_bus()
+
+However, pci_bridge_wait_for_secondary_bus() bails out if the bridge_d3
+flag is not set. That flag indicates whether a bridge is allowed to
+suspend to D3cold at *runtime*.
+
+Hence *no* delay is observed on resume from system sleep if runtime
+D3cold is forbidden. That doesn't make any sense, so drop the bridge_d3
+check from pci_bridge_wait_for_secondary_bus().
+
+The purpose of the bridge_d3 check was probably to avoid delays if a
+bridge remained in D0 during suspend. However the sole caller of
+pci_bridge_wait_for_secondary_bus(), pci_pm_bridge_power_up_actions(),
+is only invoked if the previous power state was D3cold. Hence the
+additional bridge_d3 check seems superfluous.
+
+Fixes: ad9001f2f411 ("PCI/PM: Add missing link delays required by the PCIe spec")
+Link: https://lore.kernel.org/r/eb37fa345285ec8bacabbf06b020b803f77bdd3d.1673769517.git.lukas@wunner.de
+Tested-by: Ravi Kishore Koppuravuri <ravi.kishore.koppuravuri@intel.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org # v5.5+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -4902,7 +4902,7 @@ void pci_bridge_wait_for_secondary_bus(s
+ if (pci_dev_is_disconnected(dev))
+ return;
+
+- if (!pci_is_bridge(dev) || !dev->bridge_d3)
++ if (!pci_is_bridge(dev))
+ return;
+
+ down_read(&pci_bus_sem);
--- /dev/null
+From eb9be8310c58c166f9fae3b71c0ad9d6741b4897 Mon Sep 17 00:00:00 2001
+From: Conor Dooley <conor.dooley@microchip.com>
+Date: Fri, 10 Feb 2023 18:59:45 +0000
+Subject: RISC-V: add a spin_shadow_stack declaration
+
+From: Conor Dooley <conor.dooley@microchip.com>
+
+commit eb9be8310c58c166f9fae3b71c0ad9d6741b4897 upstream.
+
+The patchwork automation reported a sparse complaint that
+spin_shadow_stack was not declared and should be static:
+../arch/riscv/kernel/traps.c:335:15: warning: symbol 'spin_shadow_stack' was not declared. Should it be static?
+
+However, this is used in entry.S and therefore shouldn't be static.
+The same applies to the shadow_stack that this pseudo spinlock is
+trying to protect, so do like its charge and add a declaration to
+thread_info.h
+
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Fixes: 7e1864332fbc ("riscv: fix race when vmap stack overflow")
+Reviewed-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230210185945.915806-1-conor@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/thread_info.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/riscv/include/asm/thread_info.h
++++ b/arch/riscv/include/asm/thread_info.h
+@@ -43,6 +43,7 @@
+ #ifndef __ASSEMBLY__
+
+ extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
++extern unsigned long spin_shadow_stack;
+
+ #include <asm/processor.h>
+ #include <asm/csr.h>
--- /dev/null
+From 6724a76cff85ee271bbbff42ac527e4643b2ec52 Mon Sep 17 00:00:00 2001
+From: Guo Ren <guoren@linux.alibaba.com>
+Date: Thu, 12 Jan 2023 04:05:59 -0500
+Subject: riscv: ftrace: Reduce the detour code size to half
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+commit 6724a76cff85ee271bbbff42ac527e4643b2ec52 upstream.
+
+Use a temporary register to reduce the size of detour code from 16 bytes to
+8 bytes. The previous implementation is from 'commit afc76b8b8011 ("riscv:
+Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")'.
+
+Before the patch:
+<func_prolog>:
+ 0: REG_S ra, -SZREG(sp)
+ 4: auipc ra, ?
+ 8: jalr ?(ra)
+12: REG_L ra, -SZREG(sp)
+ (func_boddy)
+
+After the patch:
+<func_prolog>:
+ 0: auipc t0, ?
+ 4: jalr t0, ?(t0)
+ (func_boddy)
+
+This patch not just reduces the size of detour code, but also fixes an
+important issue:
+
+An Ftrace callback registered with FTRACE_OPS_FL_IPMODIFY flag can
+actually change the instruction pointer, e.g. to "replace" the given
+kernel function with a new one, which is needed for livepatching, etc.
+
+In this case, the trampoline (ftrace_regs_caller) would not return to
+<func_prolog+12> but would rather jump to the new function. So, "REG_L
+ra, -SZREG(sp)" would not run and the original return address would not
+be restored. The kernel is likely to hang or crash as a result.
+
+This can be easily demonstrated if one tries to "replace", say,
+cmdline_proc_show() with a new function with the same signature using
+instruction_pointer_set(&fregs->regs, new_func_addr) in the Ftrace
+callback.
+
+Link: https://lore.kernel.org/linux-riscv/20221122075440.1165172-1-suagrfillet@gmail.com/
+Link: https://lore.kernel.org/linux-riscv/d7d5730b-ebef-68e5-5046-e763e1ee6164@yadro.com/
+Co-developed-by: Song Shuai <suagrfillet@gmail.com>
+Signed-off-by: Song Shuai <suagrfillet@gmail.com>
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Cc: Evgenii Shatokhin <e.shatokhin@yadro.com>
+Reviewed-by: Evgenii Shatokhin <e.shatokhin@yadro.com>
+Link: https://lore.kernel.org/r/20230112090603.1295340-4-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Fixes: 10626c32e382 ("riscv/ftrace: Add basic support")
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/Makefile | 4 +-
+ arch/riscv/include/asm/ftrace.h | 50 +++++++++++++++++++++++-------
+ arch/riscv/kernel/ftrace.c | 65 +++++++++++-----------------------------
+ arch/riscv/kernel/mcount-dyn.S | 42 +++++++++----------------
+ 4 files changed, 75 insertions(+), 86 deletions(-)
+
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -14,9 +14,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+ LDFLAGS_vmlinux := --no-relax
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ ifeq ($(CONFIG_RISCV_ISA_C),y)
+- CC_FLAGS_FTRACE := -fpatchable-function-entry=8
+-else
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++else
++ CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+ endif
+ endif
+
+--- a/arch/riscv/include/asm/ftrace.h
++++ b/arch/riscv/include/asm/ftrace.h
+@@ -42,6 +42,14 @@ struct dyn_arch_ftrace {
+ * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to
+ * return address (original pc + 4)
+ *
++ *<ftrace enable>:
++ * 0: auipc t0/ra, 0x?
++ * 4: jalr t0/ra, ?(t0/ra)
++ *
++ *<ftrace disable>:
++ * 0: nop
++ * 4: nop
++ *
+ * Dynamic ftrace generates probes to call sites, so we must deal with
+ * both auipc and jalr at the same time.
+ */
+@@ -52,25 +60,43 @@ struct dyn_arch_ftrace {
+ #define AUIPC_OFFSET_MASK (0xfffff000)
+ #define AUIPC_PAD (0x00001000)
+ #define JALR_SHIFT 20
+-#define JALR_BASIC (0x000080e7)
+-#define AUIPC_BASIC (0x00000097)
++#define JALR_RA (0x000080e7)
++#define AUIPC_RA (0x00000097)
++#define JALR_T0 (0x000282e7)
++#define AUIPC_T0 (0x00000297)
+ #define NOP4 (0x00000013)
+
+-#define make_call(caller, callee, call) \
++#define to_jalr_t0(offset) \
++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0)
++
++#define to_auipc_t0(offset) \
++ ((offset & JALR_SIGN_MASK) ? \
++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_T0) : \
++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_T0))
++
++#define make_call_t0(caller, callee, call) \
+ do { \
+- call[0] = to_auipc_insn((unsigned int)((unsigned long)callee - \
+- (unsigned long)caller)); \
+- call[1] = to_jalr_insn((unsigned int)((unsigned long)callee - \
+- (unsigned long)caller)); \
++ unsigned int offset = \
++ (unsigned long) callee - (unsigned long) caller; \
++ call[0] = to_auipc_t0(offset); \
++ call[1] = to_jalr_t0(offset); \
+ } while (0)
+
+-#define to_jalr_insn(offset) \
+- (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC)
++#define to_jalr_ra(offset) \
++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA)
+
+-#define to_auipc_insn(offset) \
++#define to_auipc_ra(offset) \
+ ((offset & JALR_SIGN_MASK) ? \
+- (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) : \
+- ((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC))
++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) : \
++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA))
++
++#define make_call_ra(caller, callee, call) \
++do { \
++ unsigned int offset = \
++ (unsigned long) callee - (unsigned long) caller; \
++ call[0] = to_auipc_ra(offset); \
++ call[1] = to_jalr_ra(offset); \
++} while (0)
+
+ /*
+ * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
+--- a/arch/riscv/kernel/ftrace.c
++++ b/arch/riscv/kernel/ftrace.c
+@@ -57,12 +57,15 @@ static int ftrace_check_current_call(uns
+ }
+
+ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+- bool enable)
++ bool enable, bool ra)
+ {
+ unsigned int call[2];
+ unsigned int nops[2] = {NOP4, NOP4};
+
+- make_call(hook_pos, target, call);
++ if (ra)
++ make_call_ra(hook_pos, target, call);
++ else
++ make_call_t0(hook_pos, target, call);
+
+ /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
+ if (patch_text_nosync
+@@ -72,42 +75,13 @@ static int __ftrace_modify_call(unsigned
+ return 0;
+ }
+
+-/*
+- * Put 5 instructions with 16 bytes at the front of function within
+- * patchable function entry nops' area.
+- *
+- * 0: REG_S ra, -SZREG(sp)
+- * 1: auipc ra, 0x?
+- * 2: jalr -?(ra)
+- * 3: REG_L ra, -SZREG(sp)
+- *
+- * So the opcodes is:
+- * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
+- * 1: 0x???????? -> auipc
+- * 2: 0x???????? -> jalr
+- * 3: 0xff813083 (ld)/0xffc12083 (lw)
+- */
+-#if __riscv_xlen == 64
+-#define INSN0 0xfe113c23
+-#define INSN3 0xff813083
+-#elif __riscv_xlen == 32
+-#define INSN0 0xfe112e23
+-#define INSN3 0xffc12083
+-#endif
+-
+-#define FUNC_ENTRY_SIZE 16
+-#define FUNC_ENTRY_JMP 4
+-
+ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ {
+- unsigned int call[4] = {INSN0, 0, 0, INSN3};
+- unsigned long target = addr;
+- unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++ unsigned int call[2];
+
+- call[1] = to_auipc_insn((unsigned int)(target - caller));
+- call[2] = to_jalr_insn((unsigned int)(target - caller));
++ make_call_t0(rec->ip, addr, call);
+
+- if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
++ if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ return 0;
+@@ -116,15 +90,14 @@ int ftrace_make_call(struct dyn_ftrace *
+ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+ {
+- unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
++ unsigned int nops[2] = {NOP4, NOP4};
+
+- if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
++ if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ return 0;
+ }
+
+-
+ /*
+ * This is called early on, and isn't wrapped by
+ * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold
+@@ -146,10 +119,10 @@ int ftrace_init_nop(struct module *mod,
+ int ftrace_update_ftrace_func(ftrace_func_t func)
+ {
+ int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
+- (unsigned long)func, true);
++ (unsigned long)func, true, true);
+ if (!ret) {
+ ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call,
+- (unsigned long)func, true);
++ (unsigned long)func, true, true);
+ }
+
+ return ret;
+@@ -166,16 +139,16 @@ int ftrace_modify_call(struct dyn_ftrace
+ unsigned long addr)
+ {
+ unsigned int call[2];
+- unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++ unsigned long caller = rec->ip;
+ int ret;
+
+- make_call(caller, old_addr, call);
++ make_call_t0(caller, old_addr, call);
+ ret = ftrace_check_current_call(caller, call);
+
+ if (ret)
+ return ret;
+
+- return __ftrace_modify_call(caller, addr, true);
++ return __ftrace_modify_call(caller, addr, true, false);
+ }
+ #endif
+
+@@ -210,12 +183,12 @@ int ftrace_enable_ftrace_graph_caller(vo
+ int ret;
+
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+- (unsigned long)&prepare_ftrace_return, true);
++ (unsigned long)&prepare_ftrace_return, true, true);
+ if (ret)
+ return ret;
+
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+- (unsigned long)&prepare_ftrace_return, true);
++ (unsigned long)&prepare_ftrace_return, true, true);
+ }
+
+ int ftrace_disable_ftrace_graph_caller(void)
+@@ -223,12 +196,12 @@ int ftrace_disable_ftrace_graph_caller(v
+ int ret;
+
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+- (unsigned long)&prepare_ftrace_return, false);
++ (unsigned long)&prepare_ftrace_return, false, true);
+ if (ret)
+ return ret;
+
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+- (unsigned long)&prepare_ftrace_return, false);
++ (unsigned long)&prepare_ftrace_return, false, true);
+ }
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+--- a/arch/riscv/kernel/mcount-dyn.S
++++ b/arch/riscv/kernel/mcount-dyn.S
+@@ -13,8 +13,8 @@
+
+ .text
+
+-#define FENTRY_RA_OFFSET 12
+-#define ABI_SIZE_ON_STACK 72
++#define FENTRY_RA_OFFSET 8
++#define ABI_SIZE_ON_STACK 80
+ #define ABI_A0 0
+ #define ABI_A1 8
+ #define ABI_A2 16
+@@ -23,10 +23,10 @@
+ #define ABI_A5 40
+ #define ABI_A6 48
+ #define ABI_A7 56
+-#define ABI_RA 64
++#define ABI_T0 64
++#define ABI_RA 72
+
+ .macro SAVE_ABI
+- addi sp, sp, -SZREG
+ addi sp, sp, -ABI_SIZE_ON_STACK
+
+ REG_S a0, ABI_A0(sp)
+@@ -37,6 +37,7 @@
+ REG_S a5, ABI_A5(sp)
+ REG_S a6, ABI_A6(sp)
+ REG_S a7, ABI_A7(sp)
++ REG_S t0, ABI_T0(sp)
+ REG_S ra, ABI_RA(sp)
+ .endm
+
+@@ -49,24 +50,18 @@
+ REG_L a5, ABI_A5(sp)
+ REG_L a6, ABI_A6(sp)
+ REG_L a7, ABI_A7(sp)
++ REG_L t0, ABI_T0(sp)
+ REG_L ra, ABI_RA(sp)
+
+ addi sp, sp, ABI_SIZE_ON_STACK
+- addi sp, sp, SZREG
+ .endm
+
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ .macro SAVE_ALL
+- addi sp, sp, -SZREG
+ addi sp, sp, -PT_SIZE_ON_STACK
+
+- REG_S x1, PT_EPC(sp)
+- addi sp, sp, PT_SIZE_ON_STACK
+- REG_L x1, (sp)
+- addi sp, sp, -PT_SIZE_ON_STACK
++ REG_S t0, PT_EPC(sp)
+ REG_S x1, PT_RA(sp)
+- REG_L x1, PT_EPC(sp)
+-
+ REG_S x2, PT_SP(sp)
+ REG_S x3, PT_GP(sp)
+ REG_S x4, PT_TP(sp)
+@@ -100,15 +95,11 @@
+ .endm
+
+ .macro RESTORE_ALL
++ REG_L t0, PT_EPC(sp)
+ REG_L x1, PT_RA(sp)
+- addi sp, sp, PT_SIZE_ON_STACK
+- REG_S x1, (sp)
+- addi sp, sp, -PT_SIZE_ON_STACK
+- REG_L x1, PT_EPC(sp)
+ REG_L x2, PT_SP(sp)
+ REG_L x3, PT_GP(sp)
+ REG_L x4, PT_TP(sp)
+- REG_L x5, PT_T0(sp)
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x8, PT_S0(sp)
+@@ -137,17 +128,16 @@
+ REG_L x31, PT_T6(sp)
+
+ addi sp, sp, PT_SIZE_ON_STACK
+- addi sp, sp, SZREG
+ .endm
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+ ENTRY(ftrace_caller)
+ SAVE_ABI
+
+- addi a0, ra, -FENTRY_RA_OFFSET
++ addi a0, t0, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+- REG_L a1, ABI_SIZE_ON_STACK(sp)
++ mv a1, ra
+ mv a3, sp
+
+ ftrace_call:
+@@ -155,8 +145,8 @@ ftrace_call:
+ call ftrace_stub
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- addi a0, sp, ABI_SIZE_ON_STACK
+- REG_L a1, ABI_RA(sp)
++ addi a0, sp, ABI_RA
++ REG_L a1, ABI_T0(sp)
+ addi a1, a1, -FENTRY_RA_OFFSET
+ #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ mv a2, s0
+@@ -166,17 +156,17 @@ ftrace_graph_call:
+ call ftrace_stub
+ #endif
+ RESTORE_ABI
+- ret
++ jr t0
+ ENDPROC(ftrace_caller)
+
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ ENTRY(ftrace_regs_caller)
+ SAVE_ALL
+
+- addi a0, ra, -FENTRY_RA_OFFSET
++ addi a0, t0, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+- REG_L a1, PT_SIZE_ON_STACK(sp)
++ mv a1, ra
+ mv a3, sp
+
+ ftrace_regs_call:
+@@ -196,6 +186,6 @@ ftrace_graph_regs_call:
+ #endif
+
+ RESTORE_ALL
+- ret
++ jr t0
+ ENDPROC(ftrace_regs_caller)
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
--- /dev/null
+From 409c8fb20c66df7150e592747412438c04aeb11f Mon Sep 17 00:00:00 2001
+From: Guo Ren <guoren@linux.alibaba.com>
+Date: Thu, 12 Jan 2023 04:05:58 -0500
+Subject: riscv: ftrace: Remove wasted nops for !RISCV_ISA_C
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+commit 409c8fb20c66df7150e592747412438c04aeb11f upstream.
+
+When CONFIG_RISCV_ISA_C=n, -fpatchable-function-entry=8 would generate
+more nops than we expect. Because it treat nop opcode as 0x00000013
+instead of 0x0001.
+
+Dump of assembler code for function dw_pcie_free_msi:
+ 0xffffffff806fce94 <+0>: sd ra,-8(sp)
+ 0xffffffff806fce98 <+4>: auipc ra,0xff90f
+ 0xffffffff806fce9c <+8>: jalr -684(ra) # 0xffffffff8000bbec
+<ftrace_caller>
+ 0xffffffff806fcea0 <+12>: ld ra,-8(sp)
+ 0xffffffff806fcea4 <+16>: nop /* wasted */
+ 0xffffffff806fcea8 <+20>: nop /* wasted */
+ 0xffffffff806fceac <+24>: nop /* wasted */
+ 0xffffffff806fceb0 <+28>: nop /* wasted */
+ 0xffffffff806fceb4 <+0>: addi sp,sp,-48
+ 0xffffffff806fceb8 <+4>: sd s0,32(sp)
+ 0xffffffff806fcebc <+8>: sd s1,24(sp)
+ 0xffffffff806fcec0 <+12>: sd s2,16(sp)
+ 0xffffffff806fcec4 <+16>: sd s3,8(sp)
+ 0xffffffff806fcec8 <+20>: sd ra,40(sp)
+ 0xffffffff806fcecc <+24>: addi s0,sp,48
+
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230112090603.1295340-3-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/Makefile | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -13,7 +13,11 @@ LDFLAGS_vmlinux :=
+ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+ LDFLAGS_vmlinux := --no-relax
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
++ifeq ($(CONFIG_RISCV_ISA_C),y)
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=8
++else
++ CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++endif
+ endif
+
+ ifeq ($(CONFIG_CMODEL_MEDLOW),y)
--- /dev/null
+From 9ddfc3cd806081ce1f6c9c2f988cbb031f35d28f Mon Sep 17 00:00:00 2001
+From: Andy Chiu <andy.chiu@sifive.com>
+Date: Mon, 6 Feb 2023 04:04:40 -0500
+Subject: riscv: jump_label: Fixup unaligned arch_static_branch function
+
+From: Andy Chiu <andy.chiu@sifive.com>
+
+commit 9ddfc3cd806081ce1f6c9c2f988cbb031f35d28f upstream.
+
+Runtime code patching must be done at a naturally aligned address, or we
+may execute on a partial instruction.
+
+We have encountered problems traced back to static jump functions during
+the test. We switched the tracer randomly for every 1~5 seconds on a
+dual-core QEMU setup and found the kernel sucking at a static branch
+where it jumps to itself.
+
+The reason is that the static branch was 2-byte but not 4-byte aligned.
+Then, the kernel would patch the instruction, either J or NOP, with two
+half-word stores if the machine does not have efficient unaligned
+accesses. Thus, moments exist where half of the NOP mixes with the other
+half of the J when transitioning the branch. In our particular case, on
+a little-endian machine, the upper half of the NOP was mixed with the
+lower part of the J when enabling the branch, resulting in a jump that
+jumped to itself. Conversely, it would result in a HINT instruction when
+disabling the branch, but it might not be observable.
+
+ARM64 does not have this problem since all instructions must be 4-byte
+aligned.
+
+Fixes: ebc00dde8a97 ("riscv: Add jump-label implementation")
+Link: https://lore.kernel.org/linux-riscv/20220913094252.3555240-6-andy.chiu@sifive.com/
+Reviewed-by: Greentime Hu <greentime.hu@sifive.com>
+Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230206090440.1255001-1-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/jump_label.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/riscv/include/asm/jump_label.h
++++ b/arch/riscv/include/asm/jump_label.h
+@@ -18,6 +18,7 @@ static __always_inline bool arch_static_
+ bool branch)
+ {
+ asm_volatile_goto(
++ " .align 2 \n\t"
+ " .option push \n\t"
+ " .option norelax \n\t"
+ " .option norvc \n\t"
+@@ -39,6 +40,7 @@ static __always_inline bool arch_static_
+ bool branch)
+ {
+ asm_volatile_goto(
++ " .align 2 \n\t"
+ " .option push \n\t"
+ " .option norelax \n\t"
+ " .option norvc \n\t"
--- /dev/null
+From b49f700668fff7565b945dce823def79bff59bb0 Mon Sep 17 00:00:00 2001
+From: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+Date: Mon, 30 Jan 2023 00:18:18 +0300
+Subject: riscv: mm: fix regression due to update_mmu_cache change
+
+From: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+
+commit b49f700668fff7565b945dce823def79bff59bb0 upstream.
+
+This is a partial revert of the commit 4bd1d80efb5a ("riscv: mm: notify
+remote harts about mmu cache updates"). Original commit included two
+loosely related changes serving the same purpose of fixing stale TLB
+entries causing user-space application crash:
+- introduce deferred per-ASID TLB flush for CPUs not running the task
+- switch to per-ASID TLB flush on all CPUs running the task in update_mmu_cache
+
+According to report and discussion in [1], the second part caused a
+regression on Renesas RZ/Five SoC. For now restore the old behavior
+of the update_mmu_cache.
+
+[1] https://lore.kernel.org/linux-riscv/20220829205219.283543-1-geomatsi@gmail.com/
+
+Fixes: 4bd1d80efb5a ("riscv: mm: notify remote harts about mmu cache updates")
+Reported-by: "Lad, Prabhakar" <prabhakar.csengg@gmail.com>
+Signed-off-by: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+Link: trailer, so that it can be parsed with git's trailer functionality?
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20230129211818.686557-1-geomatsi@gmail.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/pgtable.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/riscv/include/asm/pgtable.h
++++ b/arch/riscv/include/asm/pgtable.h
+@@ -386,7 +386,7 @@ static inline void update_mmu_cache(stru
+ * Relying on flush_tlb_fix_spurious_fault would suffice, but
+ * the extra traps reduce performance. So, eagerly SFENCE.VMA.
+ */
+- flush_tlb_page(vma, address);
++ local_flush_tlb_page(address);
+ }
+
+ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
--- /dev/null
+From 416721ff05fddc58ca531b6f069de250301de6e5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn@rivosinc.com>
+Date: Tue, 14 Feb 2023 17:25:15 +0100
+Subject: riscv, mm: Perform BPF exhandler fixup on page fault
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Björn Töpel <bjorn@rivosinc.com>
+
+commit 416721ff05fddc58ca531b6f069de250301de6e5 upstream.
+
+Commit 21855cac82d3 ("riscv/mm: Prevent kernel module to access user
+memory without uaccess routines") added early exits/deaths for page
+faults stemming from accesses to user-space without using proper
+uaccess routines (where sstatus.SUM is set).
+
+Unfortunatly, this is too strict for some BPF programs, which relies
+on BPF exhandler fixups. These BPF programs loads "BTF pointers". A
+BTF pointers could either be a valid kernel pointer or NULL, but not a
+userspace address.
+
+Resolve the problem by calling the fixup handler in the early exit
+path.
+
+Fixes: 21855cac82d3 ("riscv/mm: Prevent kernel module to access user memory without uaccess routines")
+Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
+Link: https://lore.kernel.org/r/20230214162515.184827-1-bjorn@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/mm/fault.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/riscv/mm/fault.c
++++ b/arch/riscv/mm/fault.c
+@@ -271,10 +271,12 @@ asmlinkage void do_page_fault(struct pt_
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
+- if (!user_mode(regs) && addr < TASK_SIZE &&
+- unlikely(!(regs->status & SR_SUM)))
+- die_kernel_fault("access to user memory without uaccess routines",
+- addr, regs);
++ if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) {
++ if (fixup_exception(regs))
++ return;
++
++ die_kernel_fault("access to user memory without uaccess routines", addr, regs);
++ }
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
--- /dev/null
+From 3fe97ff3d94934649abb0652028dd7296170c8d0 Mon Sep 17 00:00:00 2001
+From: James Bottomley <jejb@linux.ibm.com>
+Date: Sat, 28 Nov 2020 15:27:21 -0800
+Subject: scsi: ses: Don't attach if enclosure has no components
+
+From: James Bottomley <jejb@linux.ibm.com>
+
+commit 3fe97ff3d94934649abb0652028dd7296170c8d0 upstream.
+
+An enclosure with no components can't usefully be operated by the driver
+(since effectively it has nothing to manage), so report the problem and
+don't attach. Not attaching also fixes an oops which could occur if the
+driver tries to manage a zero component enclosure.
+
+[mkp: Switched to KERN_WARNING since this scenario is common]
+
+Link: https://lore.kernel.org/r/c5deac044ac409e32d9ad9968ce0dcbc996bfc7a.camel@linux.ibm.com
+Cc: stable@vger.kernel.org
+Reported-by: Ding Hui <dinghui@sangfor.com.cn>
+Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -704,6 +704,12 @@ static int ses_intf_add(struct device *c
+ type_ptr[0] == ENCLOSURE_COMPONENT_ARRAY_DEVICE)
+ components += type_ptr[1];
+ }
++
++ if (components == 0) {
++ sdev_printk(KERN_WARNING, sdev, "enclosure has no enumerated components\n");
++ goto err_free;
++ }
++
+ ses_dev->page1 = buf;
+ ses_dev->page1_len = len;
+ buf = NULL;
--- /dev/null
+From db95d4df71cb55506425b6e4a5f8d68e3a765b63 Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:49 +0100
+Subject: scsi: ses: Fix possible addl_desc_ptr out-of-bounds accesses
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit db95d4df71cb55506425b6e4a5f8d68e3a765b63 upstream.
+
+Sanitize possible addl_desc_ptr out-of-bounds accesses in
+ses_enclosure_data_process().
+
+Link: https://lore.kernel.org/r/20230202162451.15346-3-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 35 ++++++++++++++++++++++++++---------
+ 1 file changed, 26 insertions(+), 9 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -433,8 +433,8 @@ int ses_match_host(struct enclosure_devi
+ }
+ #endif /* 0 */
+
+-static void ses_process_descriptor(struct enclosure_component *ecomp,
+- unsigned char *desc)
++static int ses_process_descriptor(struct enclosure_component *ecomp,
++ unsigned char *desc, int max_desc_len)
+ {
+ int eip = desc[0] & 0x10;
+ int invalid = desc[0] & 0x80;
+@@ -445,22 +445,32 @@ static void ses_process_descriptor(struc
+ unsigned char *d;
+
+ if (invalid)
+- return;
++ return 0;
+
+ switch (proto) {
+ case SCSI_PROTOCOL_FCP:
+ if (eip) {
++ if (max_desc_len <= 7)
++ return 1;
+ d = desc + 4;
+ slot = d[3];
+ }
+ break;
+ case SCSI_PROTOCOL_SAS:
++
+ if (eip) {
++ if (max_desc_len <= 27)
++ return 1;
+ d = desc + 4;
+ slot = d[3];
+ d = desc + 8;
+- } else
++ } else {
++ if (max_desc_len <= 23)
++ return 1;
+ d = desc + 4;
++ }
++
++
+ /* only take the phy0 addr */
+ addr = (u64)d[12] << 56 |
+ (u64)d[13] << 48 |
+@@ -477,6 +487,8 @@ static void ses_process_descriptor(struc
+ }
+ ecomp->slot = slot;
+ scomp->addr = addr;
++
++ return 0;
+ }
+
+ struct efd {
+@@ -549,7 +561,7 @@ static void ses_enclosure_data_process(s
+ /* skip past overall descriptor */
+ desc_ptr += len + 4;
+ }
+- if (ses_dev->page10)
++ if (ses_dev->page10 && ses_dev->page10_len > 9)
+ addl_desc_ptr = ses_dev->page10 + 8;
+ type_ptr = ses_dev->page1_types;
+ components = 0;
+@@ -557,6 +569,7 @@ static void ses_enclosure_data_process(s
+ for (j = 0; j < type_ptr[1]; j++) {
+ char *name = NULL;
+ struct enclosure_component *ecomp;
++ int max_desc_len;
+
+ if (desc_ptr) {
+ if (desc_ptr >= buf + page7_len) {
+@@ -583,10 +596,14 @@ static void ses_enclosure_data_process(s
+ ecomp = &edev->component[components++];
+
+ if (!IS_ERR(ecomp)) {
+- if (addl_desc_ptr)
+- ses_process_descriptor(
+- ecomp,
+- addl_desc_ptr);
++ if (addl_desc_ptr) {
++ max_desc_len = ses_dev->page10_len -
++ (addl_desc_ptr - ses_dev->page10);
++ if (ses_process_descriptor(ecomp,
++ addl_desc_ptr,
++ max_desc_len))
++ addl_desc_ptr = NULL;
++ }
+ if (create)
+ enclosure_component_register(
+ ecomp);
--- /dev/null
+From 801ab13d50cf3d26170ee073ea8bb4eececb76ab Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:50 +0100
+Subject: scsi: ses: Fix possible desc_ptr out-of-bounds accesses
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit 801ab13d50cf3d26170ee073ea8bb4eececb76ab upstream.
+
+Sanitize possible desc_ptr out-of-bounds accesses in
+ses_enclosure_data_process().
+
+Link: https://lore.kernel.org/r/20230202162451.15346-4-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -572,15 +572,19 @@ static void ses_enclosure_data_process(s
+ int max_desc_len;
+
+ if (desc_ptr) {
+- if (desc_ptr >= buf + page7_len) {
++ if (desc_ptr + 3 >= buf + page7_len) {
+ desc_ptr = NULL;
+ } else {
+ len = (desc_ptr[2] << 8) + desc_ptr[3];
+ desc_ptr += 4;
+- /* Add trailing zero - pushes into
+- * reserved space */
+- desc_ptr[len] = '\0';
+- name = desc_ptr;
++ if (desc_ptr + len > buf + page7_len)
++ desc_ptr = NULL;
++ else {
++ /* Add trailing zero - pushes into
++ * reserved space */
++ desc_ptr[len] = '\0';
++ name = desc_ptr;
++ }
+ }
+ }
+ if (type_ptr[0] == ENCLOSURE_COMPONENT_DEVICE ||
--- /dev/null
+From 9b4f5028e493cb353a5c8f5c45073eeea0303abd Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:48 +0100
+Subject: scsi: ses: Fix slab-out-of-bounds in ses_enclosure_data_process()
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit 9b4f5028e493cb353a5c8f5c45073eeea0303abd upstream.
+
+A fix for:
+
+BUG: KASAN: slab-out-of-bounds in ses_enclosure_data_process+0x949/0xe30 [ses]
+Read of size 1 at addr ffff88a1b043a451 by task systemd-udevd/3271
+
+Checking after (and before in next loop) addl_desc_ptr[1] is sufficient, we
+expect the size to be sanitized before first access to addl_desc_ptr[1].
+Make sure we don't walk beyond end of page.
+
+Link: https://lore.kernel.org/r/20230202162451.15346-2-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -603,9 +603,11 @@ static void ses_enclosure_data_process(s
+ /* these elements are optional */
+ type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_TARGET_PORT ||
+ type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT ||
+- type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS))
++ type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS)) {
+ addl_desc_ptr += addl_desc_ptr[1] + 2;
+-
++ if (addl_desc_ptr + 1 >= ses_dev->page10 + ses_dev->page10_len)
++ addl_desc_ptr = NULL;
++ }
+ }
+ }
+ kfree(buf);
--- /dev/null
+From 578797f0c8cbc2e3ec5fc0dab87087b4c7073686 Mon Sep 17 00:00:00 2001
+From: Tomas Henzl <thenzl@redhat.com>
+Date: Thu, 2 Feb 2023 17:24:51 +0100
+Subject: scsi: ses: Fix slab-out-of-bounds in ses_intf_remove()
+
+From: Tomas Henzl <thenzl@redhat.com>
+
+commit 578797f0c8cbc2e3ec5fc0dab87087b4c7073686 upstream.
+
+A fix for:
+
+BUG: KASAN: slab-out-of-bounds in ses_intf_remove+0x23f/0x270 [ses]
+Read of size 8 at addr ffff88a10d32e5d8 by task rmmod/12013
+
+When edev->components is zero, accessing edev->component[0] members is
+wrong.
+
+Link: https://lore.kernel.org/r/20230202162451.15346-5-thenzl@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -856,7 +856,8 @@ static void ses_intf_remove_enclosure(st
+ kfree(ses_dev->page2);
+ kfree(ses_dev);
+
+- kfree(edev->component[0].scratch);
++ if (edev->components)
++ kfree(edev->component[0].scratch);
+
+ put_device(&edev->edev);
+ enclosure_unregister(edev);
scsi-qla2xxx-remove-unintended-flag-clearing.patch
scsi-qla2xxx-fix-erroneous-link-down.patch
scsi-qla2xxx-remove-increment-of-interface-err-cnt.patch
+scsi-ses-don-t-attach-if-enclosure-has-no-components.patch
+scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch
+scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch
+scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch
+scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch
+risc-v-add-a-spin_shadow_stack-declaration.patch
+riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch
+riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch
+riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch
+riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch
+riscv-ftrace-reduce-the-detour-code-size-to-half.patch
+mips-dts-ci20-fix-otg-power-gpio.patch
+pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch
+pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch
+pci-avoid-flr-for-amd-fch-ahci-adapters.patch
+iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch
+vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch
+vfio-type1-track-locked_vm-per-dma.patch
+vfio-type1-restore-locked_vm.patch
+drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch
+drm-radeon-fix-edp-for-single-display-imac11-2.patch
+drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch
--- /dev/null
+From 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:04 -0800
+Subject: vfio/type1: prevent underflow of locked_vm via exec()
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 upstream.
+
+When a vfio container is preserved across exec, the task does not change,
+but it gets a new mm with locked_vm=0, and loses the count from existing
+dma mappings. If the user later unmaps a dma mapping, locked_vm underflows
+to a large unsigned value, and a subsequent dma map request fails with
+ENOMEM in __account_locked_vm.
+
+To avoid underflow, grab and save the mm at the time a dma is mapped.
+Use that mm when adjusting locked_vm, rather than re-acquiring the saved
+task's mm, which may have changed. If the saved mm is dead, do nothing.
+
+locked_vm is incremented for existing mappings in a subsequent patch.
+
+Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-3-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c | 41 +++++++++++++---------------------------
+ 1 file changed, 14 insertions(+), 27 deletions(-)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -100,6 +100,7 @@ struct vfio_dma {
+ struct task_struct *task;
+ struct rb_root pfn_list; /* Ex-user pinned pfn list */
+ unsigned long *bitmap;
++ struct mm_struct *mm;
+ };
+
+ struct vfio_batch {
+@@ -424,8 +425,8 @@ static int vfio_lock_acct(struct vfio_dm
+ if (!npage)
+ return 0;
+
+- mm = async ? get_task_mm(dma->task) : dma->task->mm;
+- if (!mm)
++ mm = dma->mm;
++ if (async && !mmget_not_zero(mm))
+ return -ESRCH; /* process exited */
+
+ ret = mmap_write_lock_killable(mm);
+@@ -798,8 +799,8 @@ static int vfio_pin_page_external(struct
+ struct mm_struct *mm;
+ int ret;
+
+- mm = get_task_mm(dma->task);
+- if (!mm)
++ mm = dma->mm;
++ if (!mmget_not_zero(mm))
+ return -ENODEV;
+
+ ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
+@@ -809,7 +810,7 @@ static int vfio_pin_page_external(struct
+ ret = 0;
+
+ if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
+- ret = vfio_lock_acct(dma, 1, true);
++ ret = vfio_lock_acct(dma, 1, false);
+ if (ret) {
+ put_pfn(*pfn_base, dma->prot);
+ if (ret == -ENOMEM)
+@@ -1179,6 +1180,7 @@ static void vfio_remove_dma(struct vfio_
+ vfio_unmap_unpin(iommu, dma, true);
+ vfio_unlink_dma(iommu, dma);
+ put_task_struct(dma->task);
++ mmdrop(dma->mm);
+ vfio_dma_bitmap_free(dma);
+ if (dma->vaddr_invalid) {
+ iommu->vaddr_invalid_count--;
+@@ -1649,29 +1651,15 @@ static int vfio_dma_do_map(struct vfio_i
+ * against the locked memory limit and we need to be able to do both
+ * outside of this call path as pinning can be asynchronous via the
+ * external interfaces for mdev devices. RLIMIT_MEMLOCK requires a
+- * task_struct and VM locked pages requires an mm_struct, however
+- * holding an indefinite mm reference is not recommended, therefore we
+- * only hold a reference to a task. We could hold a reference to
+- * current, however QEMU uses this call path through vCPU threads,
+- * which can be killed resulting in a NULL mm and failure in the unmap
+- * path when called via a different thread. Avoid this problem by
+- * using the group_leader as threads within the same group require
+- * both CLONE_THREAD and CLONE_VM and will therefore use the same
+- * mm_struct.
+- *
+- * Previously we also used the task for testing CAP_IPC_LOCK at the
+- * time of pinning and accounting, however has_capability() makes use
+- * of real_cred, a copy-on-write field, so we can't guarantee that it
+- * matches group_leader, or in fact that it might not change by the
+- * time it's evaluated. If a process were to call MAP_DMA with
+- * CAP_IPC_LOCK but later drop it, it doesn't make sense that they
+- * possibly see different results for an iommu_mapped vfio_dma vs
+- * externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the
+- * time of calling MAP_DMA.
++ * task_struct. Save the group_leader so that all DMA tracking uses
++ * the same task, to make debugging easier. VM locked pages requires
++ * an mm_struct, so grab the mm in case the task dies.
+ */
+ get_task_struct(current->group_leader);
+ dma->task = current->group_leader;
+ dma->lock_cap = capable(CAP_IPC_LOCK);
++ dma->mm = current->mm;
++ mmgrab(dma->mm);
+
+ dma->pfn_list = RB_ROOT;
+
+@@ -3168,9 +3156,8 @@ static int vfio_iommu_type1_dma_rw_chunk
+ !(dma->prot & IOMMU_READ))
+ return -EPERM;
+
+- mm = get_task_mm(dma->task);
+-
+- if (!mm)
++ mm = dma->mm;
++ if (!mmget_not_zero(mm))
+ return -EPERM;
+
+ if (kthread)
--- /dev/null
+From 90fdd158a695d70403163f9a0e4efc5b20f3fd3e Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:06 -0800
+Subject: vfio/type1: restore locked_vm
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit 90fdd158a695d70403163f9a0e4efc5b20f3fd3e upstream.
+
+When a vfio container is preserved across exec or fork-exec, the new
+task's mm has a locked_vm count of 0. After a dma vaddr is updated using
+VFIO_DMA_MAP_FLAG_VADDR, locked_vm remains 0, and the pinned memory does
+not count against the task's RLIMIT_MEMLOCK.
+
+To restore the correct locked_vm count, when VFIO_DMA_MAP_FLAG_VADDR is
+used and the dma's mm has changed, add the dma's locked_vm count to
+the new mm->locked_vm, subject to the rlimit, and subtract it from the
+old mm->locked_vm.
+
+Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-5-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c | 35 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -1576,6 +1576,38 @@ static bool vfio_iommu_iova_dma_valid(st
+ return list_empty(iova);
+ }
+
++static int vfio_change_dma_owner(struct vfio_dma *dma)
++{
++ struct task_struct *task = current->group_leader;
++ struct mm_struct *mm = current->mm;
++ long npage = dma->locked_vm;
++ bool lock_cap;
++ int ret;
++
++ if (mm == dma->mm)
++ return 0;
++
++ lock_cap = capable(CAP_IPC_LOCK);
++ ret = mm_lock_acct(task, mm, lock_cap, npage);
++ if (ret)
++ return ret;
++
++ if (mmget_not_zero(dma->mm)) {
++ mm_lock_acct(dma->task, dma->mm, dma->lock_cap, -npage);
++ mmput(dma->mm);
++ }
++
++ if (dma->task != task) {
++ put_task_struct(dma->task);
++ dma->task = get_task_struct(task);
++ }
++ mmdrop(dma->mm);
++ dma->mm = mm;
++ mmgrab(dma->mm);
++ dma->lock_cap = lock_cap;
++ return 0;
++}
++
+ static int vfio_dma_do_map(struct vfio_iommu *iommu,
+ struct vfio_iommu_type1_dma_map *map)
+ {
+@@ -1625,6 +1657,9 @@ static int vfio_dma_do_map(struct vfio_i
+ dma->size != size) {
+ ret = -EINVAL;
+ } else {
++ ret = vfio_change_dma_owner(dma);
++ if (ret)
++ goto out_unlock;
+ dma->vaddr = vaddr;
+ dma->vaddr_invalid = false;
+ iommu->vaddr_invalid_count--;
--- /dev/null
+From 18e292705ba21cc9b3227b9ad5b1c28973605ee5 Mon Sep 17 00:00:00 2001
+From: Steve Sistare <steven.sistare@oracle.com>
+Date: Tue, 31 Jan 2023 08:58:05 -0800
+Subject: vfio/type1: track locked_vm per dma
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+commit 18e292705ba21cc9b3227b9ad5b1c28973605ee5 upstream.
+
+Track locked_vm per dma struct, and create a new subroutine, both for use
+in a subsequent patch. No functional change.
+
+Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/1675184289-267876-4-git-send-email-steven.sistare@oracle.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/vfio_iommu_type1.c | 23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -101,6 +101,7 @@ struct vfio_dma {
+ struct rb_root pfn_list; /* Ex-user pinned pfn list */
+ unsigned long *bitmap;
+ struct mm_struct *mm;
++ size_t locked_vm;
+ };
+
+ struct vfio_batch {
+@@ -417,6 +418,19 @@ static int vfio_iova_put_vfio_pfn(struct
+ return ret;
+ }
+
++static int mm_lock_acct(struct task_struct *task, struct mm_struct *mm,
++ bool lock_cap, long npage)
++{
++ int ret = mmap_write_lock_killable(mm);
++
++ if (ret)
++ return ret;
++
++ ret = __account_locked_vm(mm, abs(npage), npage > 0, task, lock_cap);
++ mmap_write_unlock(mm);
++ return ret;
++}
++
+ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
+ {
+ struct mm_struct *mm;
+@@ -429,12 +443,9 @@ static int vfio_lock_acct(struct vfio_dm
+ if (async && !mmget_not_zero(mm))
+ return -ESRCH; /* process exited */
+
+- ret = mmap_write_lock_killable(mm);
+- if (!ret) {
+- ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task,
+- dma->lock_cap);
+- mmap_write_unlock(mm);
+- }
++ ret = mm_lock_acct(dma->task, mm, dma->lock_cap, npage);
++ if (!ret)
++ dma->locked_vm += npage;
+
+ if (async)
+ mmput(mm);