From: Greg Kroah-Hartman Date: Tue, 7 Mar 2023 16:38:47 +0000 (+0100) Subject: 5.15-stable patches X-Git-Tag: v6.2.3~33 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=15723147534fcbb96925f2f923bf621c864674ce;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch drm-radeon-fix-edp-for-single-display-imac11-2.patch iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch mips-dts-ci20-fix-otg-power-gpio.patch pci-avoid-flr-for-amd-fch-ahci-adapters.patch pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch risc-v-add-a-spin_shadow_stack-declaration.patch riscv-ftrace-reduce-the-detour-code-size-to-half.patch riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch scsi-ses-don-t-attach-if-enclosure-has-no-components.patch scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch vfio-type1-restore-locked_vm.patch vfio-type1-track-locked_vm-per-dma.patch --- diff --git a/queue-5.15/drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch b/queue-5.15/drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch new file mode 100644 index 00000000000..d3cb9e6ad0b --- /dev/null +++ b/queue-5.15/drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch @@ -0,0 +1,56 @@ +From 85636167e3206c3fbd52254fc432991cc4e90194 Mon Sep 17 00:00:00 2001 +From: John Harrison +Date: Wed, 15 Feb 2023 17:11:01 -0800 +Subject: drm/i915: Don't use BAR mappings for ring buffers with LLC +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: John Harrison + +commit 85636167e3206c3fbd52254fc432991cc4e90194 upstream. + +Direction from hardware is that ring buffers should never be mapped +via the BAR on systems with LLC. There are too many caching pitfalls +due to the way BAR accesses are routed. So it is safest to just not +use it. + +Signed-off-by: John Harrison +Fixes: 9d80841ea4c9 ("drm/i915: Allow ringbuffers to be bound anywhere") +Cc: Chris Wilson +Cc: Joonas Lahtinen +Cc: Jani Nikula +Cc: Rodrigo Vivi +Cc: Tvrtko Ursulin +Cc: intel-gfx@lists.freedesktop.org +Cc: # v4.9+ +Tested-by: Jouni Högander +Reviewed-by: Daniele Ceraolo Spurio +Link: https://patchwork.freedesktop.org/patch/msgid/20230216011101.1909009-3-John.C.Harrison@Intel.com +(cherry picked from commit 65c08339db1ada87afd6cfe7db8e60bb4851d919) +Signed-off-by: Jani Nikula +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/gt/intel_ring.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_ring.c ++++ b/drivers/gpu/drm/i915/gt/intel_ring.c +@@ -51,7 +51,7 @@ int intel_ring_pin(struct intel_ring *ri + if (unlikely(ret)) + goto err_unpin; + +- if (i915_vma_is_map_and_fenceable(vma)) { ++ if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) { + addr = (void __force *)i915_vma_pin_iomap(vma); + } else { + int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false); +@@ -96,7 +96,7 @@ void intel_ring_unpin(struct intel_ring + return; + + i915_vma_unset_ggtt_write(vma); +- if (i915_vma_is_map_and_fenceable(vma)) ++ if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) + i915_vma_unpin_iomap(vma); + else + i915_gem_object_unpin_map(vma->obj); diff --git a/queue-5.15/drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch b/queue-5.15/drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch new file mode 100644 index 00000000000..b42ad332017 --- /dev/null +++ b/queue-5.15/drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch @@ -0,0 +1,36 @@ +From 5e438bf7f9a1705ebcae5fa89cdbfbc6932a7871 Mon Sep 17 00:00:00 2001 +From: Mavroudis Chatzilaridis +Date: Wed, 1 Feb 2023 18:51:25 +0000 +Subject: drm/i915/quirks: Add inverted backlight quirk for HP 14-r206nv + +From: Mavroudis Chatzilaridis + +commit 5e438bf7f9a1705ebcae5fa89cdbfbc6932a7871 upstream. + +This laptop uses inverted backlight PWM. Thus, without this quirk, +backlight brightness decreases as the brightness value increases and +vice versa. + +Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8013 +Cc: stable@vger.kernel.org +Signed-off-by: Mavroudis Chatzilaridis +Reviewed-by: Jani Nikula +Signed-off-by: Jani Nikula +Link: https://patchwork.freedesktop.org/patch/msgid/20230201184947.8835-1-mavchatz@protonmail.com +(cherry picked from commit 83e7d6fd330d413cb2064e680ffea91b0512a520) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/intel_quirks.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/gpu/drm/i915/display/intel_quirks.c ++++ b/drivers/gpu/drm/i915/display/intel_quirks.c +@@ -193,6 +193,8 @@ static struct intel_quirk intel_quirks[] + /* ECS Liva Q2 */ + { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, + { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, ++ /* HP Notebook - 14-r206nv */ ++ { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness }, + }; + + void intel_init_quirks(struct drm_i915_private *i915) diff --git a/queue-5.15/drm-radeon-fix-edp-for-single-display-imac11-2.patch b/queue-5.15/drm-radeon-fix-edp-for-single-display-imac11-2.patch new file mode 100644 index 00000000000..22eddfe8c8c --- /dev/null +++ b/queue-5.15/drm-radeon-fix-edp-for-single-display-imac11-2.patch @@ -0,0 +1,46 @@ +From 05eacc198c68cbb35a7281ce4011f8899ee1cfb8 Mon Sep 17 00:00:00 2001 +From: Mark Hawrylak +Date: Sun, 19 Feb 2023 16:02:00 +1100 +Subject: drm/radeon: Fix eDP for single-display iMac11,2 + +From: Mark Hawrylak + +commit 05eacc198c68cbb35a7281ce4011f8899ee1cfb8 upstream. + +Apple iMac11,2 (mid 2010) also with Radeon HD-4670 that has the same +issue as iMac10,1 (late 2009) where the internal eDP panel stays dark on +driver load. This patch treats iMac11,2 the same as iMac10,1, +so the eDP panel stays active. + +Additional steps: +Kernel boot parameter radeon.nomodeset=0 required to keep the eDP +panel active. + +This patch is an extension of +commit 564d8a2cf3ab ("drm/radeon: Fix eDP for single-display iMac10,1 (v2)") +Link: https://lore.kernel.org/all/lsq.1507553064.833262317@decadent.org.uk/ +Signed-off-by: Mark Hawrylak +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/radeon/atombios_encoders.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/radeon/atombios_encoders.c ++++ b/drivers/gpu/drm/radeon/atombios_encoders.c +@@ -2188,11 +2188,12 @@ int radeon_atom_pick_dig_encoder(struct + + /* + * On DCE32 any encoder can drive any block so usually just use crtc id, +- * but Apple thinks different at least on iMac10,1, so there use linkb, ++ * but Apple thinks different at least on iMac10,1 and iMac11,2, so there use linkb, + * otherwise the internal eDP panel will stay dark. + */ + if (ASIC_IS_DCE32(rdev)) { +- if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1")) ++ if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1") || ++ dmi_match(DMI_PRODUCT_NAME, "iMac11,2")) + enc_idx = (dig->linkb) ? 1 : 0; + else + enc_idx = radeon_crtc->crtc_id; diff --git a/queue-5.15/iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch b/queue-5.15/iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch new file mode 100644 index 00000000000..d0305d81f8c --- /dev/null +++ b/queue-5.15/iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch @@ -0,0 +1,53 @@ +From 16a75bbe480c3598b3af57a2504ea89b1e32c3ac Mon Sep 17 00:00:00 2001 +From: Jacob Pan +Date: Thu, 16 Feb 2023 21:08:14 +0800 +Subject: iommu/vt-d: Avoid superfluous IOTLB tracking in lazy mode + +From: Jacob Pan + +commit 16a75bbe480c3598b3af57a2504ea89b1e32c3ac upstream. + +Intel IOMMU driver implements IOTLB flush queue with domain selective +or PASID selective invalidations. In this case there's no need to track +IOVA page range and sync IOTLBs, which may cause significant performance +hit. + +This patch adds a check to avoid IOVA gather page and IOTLB sync for +the lazy path. + +The performance difference on Sapphire Rapids 100Gb NIC is improved by +the following (as measured by iperf send): + +w/o this fix~48 Gbits/s. with this fix ~54 Gbits/s + +Cc: +Fixes: 2a2b8eaa5b25 ("iommu: Handle freelists when using deferred flushing in iommu drivers") +Reviewed-by: Robin Murphy +Reviewed-by: Kevin Tian +Tested-by: Sanjay Kumar +Signed-off-by: Sanjay Kumar +Signed-off-by: Jacob Pan +Link: https://lore.kernel.org/r/20230209175330.1783556-1-jacob.jun.pan@linux.intel.com +Signed-off-by: Lu Baolu +Signed-off-by: Joerg Roedel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iommu/intel/iommu.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/iommu/intel/iommu.c ++++ b/drivers/iommu/intel/iommu.c +@@ -5140,7 +5140,12 @@ static size_t intel_iommu_unmap(struct i + if (dmar_domain->max_addr == iova + size) + dmar_domain->max_addr = iova; + +- iommu_iotlb_gather_add_page(domain, gather, iova, size); ++ /* ++ * We do not use page-selective IOTLB invalidation in flush queue, ++ * so there is no need to track page and sync iotlb. ++ */ ++ if (!iommu_iotlb_gather_queued(gather)) ++ iommu_iotlb_gather_add_page(domain, gather, iova, size); + + return size; + } diff --git a/queue-5.15/mips-dts-ci20-fix-otg-power-gpio.patch b/queue-5.15/mips-dts-ci20-fix-otg-power-gpio.patch new file mode 100644 index 00000000000..9b0a80a7d6e --- /dev/null +++ b/queue-5.15/mips-dts-ci20-fix-otg-power-gpio.patch @@ -0,0 +1,34 @@ +From 0cb4228f6cc9ed0ca2be0d9ddf29168a8e3a3905 Mon Sep 17 00:00:00 2001 +From: "H. Nikolaus Schaller" +Date: Sun, 29 Jan 2023 19:57:04 +0100 +Subject: MIPS: DTS: CI20: fix otg power gpio + +From: H. Nikolaus Schaller + +commit 0cb4228f6cc9ed0ca2be0d9ddf29168a8e3a3905 upstream. + +According to schematics it is PF15 and not PF14 (MIC_SW_EN). +Seems as if it was hidden and not noticed during testing since +there is no sound DT node. + +Fixes: 158c774d3c64 ("MIPS: Ingenic: Add missing nodes for Ingenic SoCs and boards.") +Cc: stable@vger.kernel.org +Signed-off-by: H. Nikolaus Schaller +Acked-by: Paul Cercueil +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/boot/dts/ingenic/ci20.dts | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/mips/boot/dts/ingenic/ci20.dts ++++ b/arch/mips/boot/dts/ingenic/ci20.dts +@@ -99,7 +99,7 @@ + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + +- gpio = <&gpf 14 GPIO_ACTIVE_LOW>; ++ gpio = <&gpf 15 GPIO_ACTIVE_LOW>; + enable-active-high; + }; + }; diff --git a/queue-5.15/pci-avoid-flr-for-amd-fch-ahci-adapters.patch b/queue-5.15/pci-avoid-flr-for-amd-fch-ahci-adapters.patch new file mode 100644 index 00000000000..a3555e01a09 --- /dev/null +++ b/queue-5.15/pci-avoid-flr-for-amd-fch-ahci-adapters.patch @@ -0,0 +1,48 @@ +From 63ba51db24ed1b8f8088a897290eb6c036c5435d Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Sat, 28 Jan 2023 10:39:51 +0900 +Subject: PCI: Avoid FLR for AMD FCH AHCI adapters + +From: Damien Le Moal + +commit 63ba51db24ed1b8f8088a897290eb6c036c5435d upstream. + +PCI passthrough to VMs does not work with AMD FCH AHCI adapters: the guest +OS fails to correctly probe devices attached to the controller due to FIS +communication failures: + + ata4: softreset failed (1st FIS failed) + ... + ata4.00: qc timeout after 5000 msecs (cmd 0xec) + ata4.00: failed to IDENTIFY (I/O error, err_mask=0x4) + +Forcing the "bus" reset method before unbinding & binding the adapter to +the vfio-pci driver solves this issue, e.g.: + + echo "bus" > /sys/bus/pci/devices//reset_method + +gives a working guest OS, indicating that the default FLR reset method +doesn't work correctly. + +Apply quirk_no_flr() to AMD FCH AHCI devices to work around this issue. + +Link: https://lore.kernel.org/r/20230128013951.523247-1-damien.lemoal@opensource.wdc.com +Reported-by: Niklas Cassel +Signed-off-by: Damien Le Moal +Signed-off-by: Bjorn Helgaas +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/quirks.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/pci/quirks.c ++++ b/drivers/pci/quirks.c +@@ -5328,6 +5328,7 @@ static void quirk_no_flr(struct pci_dev + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1487, quirk_no_flr); + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x148c, quirk_no_flr); + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x149c, quirk_no_flr); ++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x7901, quirk_no_flr); + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_no_flr); + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_no_flr); + diff --git a/queue-5.15/pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch b/queue-5.15/pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch new file mode 100644 index 00000000000..cc1ab866626 --- /dev/null +++ b/queue-5.15/pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch @@ -0,0 +1,136 @@ +From 74ff8864cc842be994853095dba6db48e716400a Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Fri, 20 Jan 2023 10:19:02 +0100 +Subject: PCI: hotplug: Allow marking devices as disconnected during bind/unbind + +From: Lukas Wunner + +commit 74ff8864cc842be994853095dba6db48e716400a upstream. + +On surprise removal, pciehp_unconfigure_device() and acpiphp's +trim_stale_devices() call pci_dev_set_disconnected() to mark removed +devices as permanently offline. Thereby, the PCI core and drivers know +to skip device accesses. + +However pci_dev_set_disconnected() takes the device_lock and thus waits for +a concurrent driver bind or unbind to complete. As a result, the driver's +->probe and ->remove hooks have no chance to learn that the device is gone. + +That doesn't make any sense, so drop the device_lock and instead use atomic +xchg() and cmpxchg() operations to update the device state. + +As a byproduct, an AB-BA deadlock reported by Anatoli is fixed which occurs +on surprise removal with AER concurrently performing a bus reset. + +AER bus reset: + + INFO: task irq/26-aerdrv:95 blocked for more than 120 seconds. + Tainted: G W 6.2.0-rc3-custom-norework-jan11+ + schedule + rwsem_down_write_slowpath + down_write_nested + pciehp_reset_slot # acquires reset_lock + pci_reset_hotplug_slot + pci_slot_reset # acquires device_lock + pci_bus_error_reset + aer_root_reset + pcie_do_recovery + aer_process_err_devices + aer_isr + +pciehp surprise removal: + + INFO: task irq/26-pciehp:96 blocked for more than 120 seconds. + Tainted: G W 6.2.0-rc3-custom-norework-jan11+ + schedule_preempt_disabled + __mutex_lock + mutex_lock_nested + pci_dev_set_disconnected # acquires device_lock + pci_walk_bus + pciehp_unconfigure_device + pciehp_disable_slot + pciehp_handle_presence_or_link_change + pciehp_ist # acquires reset_lock + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=215590 +Fixes: a6bd101b8f84 ("PCI: Unify device inaccessible") +Link: https://lore.kernel.org/r/3dc88ea82bdc0e37d9000e413d5ebce481cbd629.1674205689.git.lukas@wunner.de +Reported-by: Anatoli Antonovitch +Signed-off-by: Lukas Wunner +Signed-off-by: Bjorn Helgaas +Cc: stable@vger.kernel.org # v4.20+ +Cc: Keith Busch +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/pci.h | 43 +++++++++++++------------------------------ + 1 file changed, 13 insertions(+), 30 deletions(-) + +--- a/drivers/pci/pci.h ++++ b/drivers/pci/pci.h +@@ -347,53 +347,36 @@ struct pci_sriov { + * @dev: PCI device to set new error_state + * @new: the state we want dev to be in + * +- * Must be called with device_lock held. ++ * If the device is experiencing perm_failure, it has to remain in that state. ++ * Any other transition is allowed. + * + * Returns true if state has been changed to the requested state. + */ + static inline bool pci_dev_set_io_state(struct pci_dev *dev, + pci_channel_state_t new) + { +- bool changed = false; ++ pci_channel_state_t old; + +- device_lock_assert(&dev->dev); + switch (new) { + case pci_channel_io_perm_failure: +- switch (dev->error_state) { +- case pci_channel_io_frozen: +- case pci_channel_io_normal: +- case pci_channel_io_perm_failure: +- changed = true; +- break; +- } +- break; ++ xchg(&dev->error_state, pci_channel_io_perm_failure); ++ return true; + case pci_channel_io_frozen: +- switch (dev->error_state) { +- case pci_channel_io_frozen: +- case pci_channel_io_normal: +- changed = true; +- break; +- } +- break; ++ old = cmpxchg(&dev->error_state, pci_channel_io_normal, ++ pci_channel_io_frozen); ++ return old != pci_channel_io_perm_failure; + case pci_channel_io_normal: +- switch (dev->error_state) { +- case pci_channel_io_frozen: +- case pci_channel_io_normal: +- changed = true; +- break; +- } +- break; ++ old = cmpxchg(&dev->error_state, pci_channel_io_frozen, ++ pci_channel_io_normal); ++ return old != pci_channel_io_perm_failure; ++ default: ++ return false; + } +- if (changed) +- dev->error_state = new; +- return changed; + } + + static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) + { +- device_lock(&dev->dev); + pci_dev_set_io_state(dev, pci_channel_io_perm_failure); +- device_unlock(&dev->dev); + + return 0; + } diff --git a/queue-5.15/pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch b/queue-5.15/pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch new file mode 100644 index 00000000000..7baf8ca865f --- /dev/null +++ b/queue-5.15/pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch @@ -0,0 +1,57 @@ +From 8ef0217227b42e2c34a18de316cee3da16c9bf1e Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Sun, 15 Jan 2023 09:20:31 +0100 +Subject: PCI/PM: Observe reset delay irrespective of bridge_d3 + +From: Lukas Wunner + +commit 8ef0217227b42e2c34a18de316cee3da16c9bf1e upstream. + +If a PCI bridge is suspended to D3cold upon entering system sleep, +resuming it entails a Fundamental Reset per PCIe r6.0 sec 5.8. + +The delay prescribed after a Fundamental Reset in PCIe r6.0 sec 6.6.1 +is sought to be observed by: + + pci_pm_resume_noirq() + pci_pm_bridge_power_up_actions() + pci_bridge_wait_for_secondary_bus() + +However, pci_bridge_wait_for_secondary_bus() bails out if the bridge_d3 +flag is not set. That flag indicates whether a bridge is allowed to +suspend to D3cold at *runtime*. + +Hence *no* delay is observed on resume from system sleep if runtime +D3cold is forbidden. That doesn't make any sense, so drop the bridge_d3 +check from pci_bridge_wait_for_secondary_bus(). + +The purpose of the bridge_d3 check was probably to avoid delays if a +bridge remained in D0 during suspend. However the sole caller of +pci_bridge_wait_for_secondary_bus(), pci_pm_bridge_power_up_actions(), +is only invoked if the previous power state was D3cold. Hence the +additional bridge_d3 check seems superfluous. + +Fixes: ad9001f2f411 ("PCI/PM: Add missing link delays required by the PCIe spec") +Link: https://lore.kernel.org/r/eb37fa345285ec8bacabbf06b020b803f77bdd3d.1673769517.git.lukas@wunner.de +Tested-by: Ravi Kishore Koppuravuri +Signed-off-by: Lukas Wunner +Signed-off-by: Bjorn Helgaas +Reviewed-by: Mika Westerberg +Reviewed-by: Kuppuswamy Sathyanarayanan +Cc: stable@vger.kernel.org # v5.5+ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/pci.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -4902,7 +4902,7 @@ void pci_bridge_wait_for_secondary_bus(s + if (pci_dev_is_disconnected(dev)) + return; + +- if (!pci_is_bridge(dev) || !dev->bridge_d3) ++ if (!pci_is_bridge(dev)) + return; + + down_read(&pci_bus_sem); diff --git a/queue-5.15/risc-v-add-a-spin_shadow_stack-declaration.patch b/queue-5.15/risc-v-add-a-spin_shadow_stack-declaration.patch new file mode 100644 index 00000000000..e1ea1864034 --- /dev/null +++ b/queue-5.15/risc-v-add-a-spin_shadow_stack-declaration.patch @@ -0,0 +1,39 @@ +From eb9be8310c58c166f9fae3b71c0ad9d6741b4897 Mon Sep 17 00:00:00 2001 +From: Conor Dooley +Date: Fri, 10 Feb 2023 18:59:45 +0000 +Subject: RISC-V: add a spin_shadow_stack declaration + +From: Conor Dooley + +commit eb9be8310c58c166f9fae3b71c0ad9d6741b4897 upstream. + +The patchwork automation reported a sparse complaint that +spin_shadow_stack was not declared and should be static: +../arch/riscv/kernel/traps.c:335:15: warning: symbol 'spin_shadow_stack' was not declared. Should it be static? + +However, this is used in entry.S and therefore shouldn't be static. +The same applies to the shadow_stack that this pseudo spinlock is +trying to protect, so do like its charge and add a declaration to +thread_info.h + +Signed-off-by: Conor Dooley +Fixes: 7e1864332fbc ("riscv: fix race when vmap stack overflow") +Reviewed-by: Guo Ren +Link: https://lore.kernel.org/r/20230210185945.915806-1-conor@kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/include/asm/thread_info.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/riscv/include/asm/thread_info.h ++++ b/arch/riscv/include/asm/thread_info.h +@@ -43,6 +43,7 @@ + #ifndef __ASSEMBLY__ + + extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; ++extern unsigned long spin_shadow_stack; + + #include + #include diff --git a/queue-5.15/riscv-ftrace-reduce-the-detour-code-size-to-half.patch b/queue-5.15/riscv-ftrace-reduce-the-detour-code-size-to-half.patch new file mode 100644 index 00000000000..c16c63828a3 --- /dev/null +++ b/queue-5.15/riscv-ftrace-reduce-the-detour-code-size-to-half.patch @@ -0,0 +1,436 @@ +From 6724a76cff85ee271bbbff42ac527e4643b2ec52 Mon Sep 17 00:00:00 2001 +From: Guo Ren +Date: Thu, 12 Jan 2023 04:05:59 -0500 +Subject: riscv: ftrace: Reduce the detour code size to half + +From: Guo Ren + +commit 6724a76cff85ee271bbbff42ac527e4643b2ec52 upstream. + +Use a temporary register to reduce the size of detour code from 16 bytes to +8 bytes. The previous implementation is from 'commit afc76b8b8011 ("riscv: +Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")'. + +Before the patch: +: + 0: REG_S ra, -SZREG(sp) + 4: auipc ra, ? + 8: jalr ?(ra) +12: REG_L ra, -SZREG(sp) + (func_boddy) + +After the patch: +: + 0: auipc t0, ? + 4: jalr t0, ?(t0) + (func_boddy) + +This patch not just reduces the size of detour code, but also fixes an +important issue: + +An Ftrace callback registered with FTRACE_OPS_FL_IPMODIFY flag can +actually change the instruction pointer, e.g. to "replace" the given +kernel function with a new one, which is needed for livepatching, etc. + +In this case, the trampoline (ftrace_regs_caller) would not return to + but would rather jump to the new function. So, "REG_L +ra, -SZREG(sp)" would not run and the original return address would not +be restored. The kernel is likely to hang or crash as a result. + +This can be easily demonstrated if one tries to "replace", say, +cmdline_proc_show() with a new function with the same signature using +instruction_pointer_set(&fregs->regs, new_func_addr) in the Ftrace +callback. + +Link: https://lore.kernel.org/linux-riscv/20221122075440.1165172-1-suagrfillet@gmail.com/ +Link: https://lore.kernel.org/linux-riscv/d7d5730b-ebef-68e5-5046-e763e1ee6164@yadro.com/ +Co-developed-by: Song Shuai +Signed-off-by: Song Shuai +Signed-off-by: Guo Ren +Signed-off-by: Guo Ren +Cc: Evgenii Shatokhin +Reviewed-by: Evgenii Shatokhin +Link: https://lore.kernel.org/r/20230112090603.1295340-4-guoren@kernel.org +Cc: stable@vger.kernel.org +Fixes: 10626c32e382 ("riscv/ftrace: Add basic support") +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/Makefile | 4 +- + arch/riscv/include/asm/ftrace.h | 50 +++++++++++++++++++++++------- + arch/riscv/kernel/ftrace.c | 65 +++++++++++----------------------------- + arch/riscv/kernel/mcount-dyn.S | 42 +++++++++---------------- + 4 files changed, 75 insertions(+), 86 deletions(-) + +--- a/arch/riscv/Makefile ++++ b/arch/riscv/Makefile +@@ -14,9 +14,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y) + LDFLAGS_vmlinux := --no-relax + KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY + ifeq ($(CONFIG_RISCV_ISA_C),y) +- CC_FLAGS_FTRACE := -fpatchable-function-entry=8 +-else + CC_FLAGS_FTRACE := -fpatchable-function-entry=4 ++else ++ CC_FLAGS_FTRACE := -fpatchable-function-entry=2 + endif + endif + +--- a/arch/riscv/include/asm/ftrace.h ++++ b/arch/riscv/include/asm/ftrace.h +@@ -42,6 +42,14 @@ struct dyn_arch_ftrace { + * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to + * return address (original pc + 4) + * ++ *: ++ * 0: auipc t0/ra, 0x? ++ * 4: jalr t0/ra, ?(t0/ra) ++ * ++ *: ++ * 0: nop ++ * 4: nop ++ * + * Dynamic ftrace generates probes to call sites, so we must deal with + * both auipc and jalr at the same time. + */ +@@ -52,25 +60,43 @@ struct dyn_arch_ftrace { + #define AUIPC_OFFSET_MASK (0xfffff000) + #define AUIPC_PAD (0x00001000) + #define JALR_SHIFT 20 +-#define JALR_BASIC (0x000080e7) +-#define AUIPC_BASIC (0x00000097) ++#define JALR_RA (0x000080e7) ++#define AUIPC_RA (0x00000097) ++#define JALR_T0 (0x000282e7) ++#define AUIPC_T0 (0x00000297) + #define NOP4 (0x00000013) + +-#define make_call(caller, callee, call) \ ++#define to_jalr_t0(offset) \ ++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0) ++ ++#define to_auipc_t0(offset) \ ++ ((offset & JALR_SIGN_MASK) ? \ ++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_T0) : \ ++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_T0)) ++ ++#define make_call_t0(caller, callee, call) \ + do { \ +- call[0] = to_auipc_insn((unsigned int)((unsigned long)callee - \ +- (unsigned long)caller)); \ +- call[1] = to_jalr_insn((unsigned int)((unsigned long)callee - \ +- (unsigned long)caller)); \ ++ unsigned int offset = \ ++ (unsigned long) callee - (unsigned long) caller; \ ++ call[0] = to_auipc_t0(offset); \ ++ call[1] = to_jalr_t0(offset); \ + } while (0) + +-#define to_jalr_insn(offset) \ +- (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC) ++#define to_jalr_ra(offset) \ ++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA) + +-#define to_auipc_insn(offset) \ ++#define to_auipc_ra(offset) \ + ((offset & JALR_SIGN_MASK) ? \ +- (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) : \ +- ((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC)) ++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) : \ ++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA)) ++ ++#define make_call_ra(caller, callee, call) \ ++do { \ ++ unsigned int offset = \ ++ (unsigned long) callee - (unsigned long) caller; \ ++ call[0] = to_auipc_ra(offset); \ ++ call[1] = to_jalr_ra(offset); \ ++} while (0) + + /* + * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here. +--- a/arch/riscv/kernel/ftrace.c ++++ b/arch/riscv/kernel/ftrace.c +@@ -57,12 +57,15 @@ static int ftrace_check_current_call(uns + } + + static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, +- bool enable) ++ bool enable, bool ra) + { + unsigned int call[2]; + unsigned int nops[2] = {NOP4, NOP4}; + +- make_call(hook_pos, target, call); ++ if (ra) ++ make_call_ra(hook_pos, target, call); ++ else ++ make_call_t0(hook_pos, target, call); + + /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ + if (patch_text_nosync +@@ -72,42 +75,13 @@ static int __ftrace_modify_call(unsigned + return 0; + } + +-/* +- * Put 5 instructions with 16 bytes at the front of function within +- * patchable function entry nops' area. +- * +- * 0: REG_S ra, -SZREG(sp) +- * 1: auipc ra, 0x? +- * 2: jalr -?(ra) +- * 3: REG_L ra, -SZREG(sp) +- * +- * So the opcodes is: +- * 0: 0xfe113c23 (sd)/0xfe112e23 (sw) +- * 1: 0x???????? -> auipc +- * 2: 0x???????? -> jalr +- * 3: 0xff813083 (ld)/0xffc12083 (lw) +- */ +-#if __riscv_xlen == 64 +-#define INSN0 0xfe113c23 +-#define INSN3 0xff813083 +-#elif __riscv_xlen == 32 +-#define INSN0 0xfe112e23 +-#define INSN3 0xffc12083 +-#endif +- +-#define FUNC_ENTRY_SIZE 16 +-#define FUNC_ENTRY_JMP 4 +- + int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) + { +- unsigned int call[4] = {INSN0, 0, 0, INSN3}; +- unsigned long target = addr; +- unsigned long caller = rec->ip + FUNC_ENTRY_JMP; ++ unsigned int call[2]; + +- call[1] = to_auipc_insn((unsigned int)(target - caller)); +- call[2] = to_jalr_insn((unsigned int)(target - caller)); ++ make_call_t0(rec->ip, addr, call); + +- if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE)) ++ if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE)) + return -EPERM; + + return 0; +@@ -116,15 +90,14 @@ int ftrace_make_call(struct dyn_ftrace * + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) + { +- unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4}; ++ unsigned int nops[2] = {NOP4, NOP4}; + +- if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE)) ++ if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) + return -EPERM; + + return 0; + } + +- + /* + * This is called early on, and isn't wrapped by + * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold +@@ -146,10 +119,10 @@ int ftrace_init_nop(struct module *mod, + int ftrace_update_ftrace_func(ftrace_func_t func) + { + int ret = __ftrace_modify_call((unsigned long)&ftrace_call, +- (unsigned long)func, true); ++ (unsigned long)func, true, true); + if (!ret) { + ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call, +- (unsigned long)func, true); ++ (unsigned long)func, true, true); + } + + return ret; +@@ -166,16 +139,16 @@ int ftrace_modify_call(struct dyn_ftrace + unsigned long addr) + { + unsigned int call[2]; +- unsigned long caller = rec->ip + FUNC_ENTRY_JMP; ++ unsigned long caller = rec->ip; + int ret; + +- make_call(caller, old_addr, call); ++ make_call_t0(caller, old_addr, call); + ret = ftrace_check_current_call(caller, call); + + if (ret) + return ret; + +- return __ftrace_modify_call(caller, addr, true); ++ return __ftrace_modify_call(caller, addr, true, false); + } + #endif + +@@ -210,12 +183,12 @@ int ftrace_enable_ftrace_graph_caller(vo + int ret; + + ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, +- (unsigned long)&prepare_ftrace_return, true); ++ (unsigned long)&prepare_ftrace_return, true, true); + if (ret) + return ret; + + return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, +- (unsigned long)&prepare_ftrace_return, true); ++ (unsigned long)&prepare_ftrace_return, true, true); + } + + int ftrace_disable_ftrace_graph_caller(void) +@@ -223,12 +196,12 @@ int ftrace_disable_ftrace_graph_caller(v + int ret; + + ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, +- (unsigned long)&prepare_ftrace_return, false); ++ (unsigned long)&prepare_ftrace_return, false, true); + if (ret) + return ret; + + return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, +- (unsigned long)&prepare_ftrace_return, false); ++ (unsigned long)&prepare_ftrace_return, false, true); + } + #endif /* CONFIG_DYNAMIC_FTRACE */ + #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +--- a/arch/riscv/kernel/mcount-dyn.S ++++ b/arch/riscv/kernel/mcount-dyn.S +@@ -13,8 +13,8 @@ + + .text + +-#define FENTRY_RA_OFFSET 12 +-#define ABI_SIZE_ON_STACK 72 ++#define FENTRY_RA_OFFSET 8 ++#define ABI_SIZE_ON_STACK 80 + #define ABI_A0 0 + #define ABI_A1 8 + #define ABI_A2 16 +@@ -23,10 +23,10 @@ + #define ABI_A5 40 + #define ABI_A6 48 + #define ABI_A7 56 +-#define ABI_RA 64 ++#define ABI_T0 64 ++#define ABI_RA 72 + + .macro SAVE_ABI +- addi sp, sp, -SZREG + addi sp, sp, -ABI_SIZE_ON_STACK + + REG_S a0, ABI_A0(sp) +@@ -37,6 +37,7 @@ + REG_S a5, ABI_A5(sp) + REG_S a6, ABI_A6(sp) + REG_S a7, ABI_A7(sp) ++ REG_S t0, ABI_T0(sp) + REG_S ra, ABI_RA(sp) + .endm + +@@ -49,24 +50,18 @@ + REG_L a5, ABI_A5(sp) + REG_L a6, ABI_A6(sp) + REG_L a7, ABI_A7(sp) ++ REG_L t0, ABI_T0(sp) + REG_L ra, ABI_RA(sp) + + addi sp, sp, ABI_SIZE_ON_STACK +- addi sp, sp, SZREG + .endm + + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + .macro SAVE_ALL +- addi sp, sp, -SZREG + addi sp, sp, -PT_SIZE_ON_STACK + +- REG_S x1, PT_EPC(sp) +- addi sp, sp, PT_SIZE_ON_STACK +- REG_L x1, (sp) +- addi sp, sp, -PT_SIZE_ON_STACK ++ REG_S t0, PT_EPC(sp) + REG_S x1, PT_RA(sp) +- REG_L x1, PT_EPC(sp) +- + REG_S x2, PT_SP(sp) + REG_S x3, PT_GP(sp) + REG_S x4, PT_TP(sp) +@@ -100,15 +95,11 @@ + .endm + + .macro RESTORE_ALL ++ REG_L t0, PT_EPC(sp) + REG_L x1, PT_RA(sp) +- addi sp, sp, PT_SIZE_ON_STACK +- REG_S x1, (sp) +- addi sp, sp, -PT_SIZE_ON_STACK +- REG_L x1, PT_EPC(sp) + REG_L x2, PT_SP(sp) + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) +- REG_L x5, PT_T0(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) +@@ -137,17 +128,16 @@ + REG_L x31, PT_T6(sp) + + addi sp, sp, PT_SIZE_ON_STACK +- addi sp, sp, SZREG + .endm + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + + ENTRY(ftrace_caller) + SAVE_ABI + +- addi a0, ra, -FENTRY_RA_OFFSET ++ addi a0, t0, -FENTRY_RA_OFFSET + la a1, function_trace_op + REG_L a2, 0(a1) +- REG_L a1, ABI_SIZE_ON_STACK(sp) ++ mv a1, ra + mv a3, sp + + ftrace_call: +@@ -155,8 +145,8 @@ ftrace_call: + call ftrace_stub + + #ifdef CONFIG_FUNCTION_GRAPH_TRACER +- addi a0, sp, ABI_SIZE_ON_STACK +- REG_L a1, ABI_RA(sp) ++ addi a0, sp, ABI_RA ++ REG_L a1, ABI_T0(sp) + addi a1, a1, -FENTRY_RA_OFFSET + #ifdef HAVE_FUNCTION_GRAPH_FP_TEST + mv a2, s0 +@@ -166,17 +156,17 @@ ftrace_graph_call: + call ftrace_stub + #endif + RESTORE_ABI +- ret ++ jr t0 + ENDPROC(ftrace_caller) + + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + ENTRY(ftrace_regs_caller) + SAVE_ALL + +- addi a0, ra, -FENTRY_RA_OFFSET ++ addi a0, t0, -FENTRY_RA_OFFSET + la a1, function_trace_op + REG_L a2, 0(a1) +- REG_L a1, PT_SIZE_ON_STACK(sp) ++ mv a1, ra + mv a3, sp + + ftrace_regs_call: +@@ -196,6 +186,6 @@ ftrace_graph_regs_call: + #endif + + RESTORE_ALL +- ret ++ jr t0 + ENDPROC(ftrace_regs_caller) + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ diff --git a/queue-5.15/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch b/queue-5.15/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch new file mode 100644 index 00000000000..c59f2178452 --- /dev/null +++ b/queue-5.15/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch @@ -0,0 +1,55 @@ +From 409c8fb20c66df7150e592747412438c04aeb11f Mon Sep 17 00:00:00 2001 +From: Guo Ren +Date: Thu, 12 Jan 2023 04:05:58 -0500 +Subject: riscv: ftrace: Remove wasted nops for !RISCV_ISA_C + +From: Guo Ren + +commit 409c8fb20c66df7150e592747412438c04aeb11f upstream. + +When CONFIG_RISCV_ISA_C=n, -fpatchable-function-entry=8 would generate +more nops than we expect. Because it treat nop opcode as 0x00000013 +instead of 0x0001. + +Dump of assembler code for function dw_pcie_free_msi: + 0xffffffff806fce94 <+0>: sd ra,-8(sp) + 0xffffffff806fce98 <+4>: auipc ra,0xff90f + 0xffffffff806fce9c <+8>: jalr -684(ra) # 0xffffffff8000bbec + + 0xffffffff806fcea0 <+12>: ld ra,-8(sp) + 0xffffffff806fcea4 <+16>: nop /* wasted */ + 0xffffffff806fcea8 <+20>: nop /* wasted */ + 0xffffffff806fceac <+24>: nop /* wasted */ + 0xffffffff806fceb0 <+28>: nop /* wasted */ + 0xffffffff806fceb4 <+0>: addi sp,sp,-48 + 0xffffffff806fceb8 <+4>: sd s0,32(sp) + 0xffffffff806fcebc <+8>: sd s1,24(sp) + 0xffffffff806fcec0 <+12>: sd s2,16(sp) + 0xffffffff806fcec4 <+16>: sd s3,8(sp) + 0xffffffff806fcec8 <+20>: sd ra,40(sp) + 0xffffffff806fcecc <+24>: addi s0,sp,48 + +Signed-off-by: Guo Ren +Signed-off-by: Guo Ren +Link: https://lore.kernel.org/r/20230112090603.1295340-3-guoren@kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/Makefile | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/riscv/Makefile ++++ b/arch/riscv/Makefile +@@ -13,7 +13,11 @@ LDFLAGS_vmlinux := + ifeq ($(CONFIG_DYNAMIC_FTRACE),y) + LDFLAGS_vmlinux := --no-relax + KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY ++ifeq ($(CONFIG_RISCV_ISA_C),y) + CC_FLAGS_FTRACE := -fpatchable-function-entry=8 ++else ++ CC_FLAGS_FTRACE := -fpatchable-function-entry=4 ++endif + endif + + ifeq ($(CONFIG_CMODEL_MEDLOW),y) diff --git a/queue-5.15/riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch b/queue-5.15/riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch new file mode 100644 index 00000000000..4b2e60bca9e --- /dev/null +++ b/queue-5.15/riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch @@ -0,0 +1,61 @@ +From 9ddfc3cd806081ce1f6c9c2f988cbb031f35d28f Mon Sep 17 00:00:00 2001 +From: Andy Chiu +Date: Mon, 6 Feb 2023 04:04:40 -0500 +Subject: riscv: jump_label: Fixup unaligned arch_static_branch function + +From: Andy Chiu + +commit 9ddfc3cd806081ce1f6c9c2f988cbb031f35d28f upstream. + +Runtime code patching must be done at a naturally aligned address, or we +may execute on a partial instruction. + +We have encountered problems traced back to static jump functions during +the test. We switched the tracer randomly for every 1~5 seconds on a +dual-core QEMU setup and found the kernel sucking at a static branch +where it jumps to itself. + +The reason is that the static branch was 2-byte but not 4-byte aligned. +Then, the kernel would patch the instruction, either J or NOP, with two +half-word stores if the machine does not have efficient unaligned +accesses. Thus, moments exist where half of the NOP mixes with the other +half of the J when transitioning the branch. In our particular case, on +a little-endian machine, the upper half of the NOP was mixed with the +lower part of the J when enabling the branch, resulting in a jump that +jumped to itself. Conversely, it would result in a HINT instruction when +disabling the branch, but it might not be observable. + +ARM64 does not have this problem since all instructions must be 4-byte +aligned. + +Fixes: ebc00dde8a97 ("riscv: Add jump-label implementation") +Link: https://lore.kernel.org/linux-riscv/20220913094252.3555240-6-andy.chiu@sifive.com/ +Reviewed-by: Greentime Hu +Signed-off-by: Andy Chiu +Signed-off-by: Guo Ren +Link: https://lore.kernel.org/r/20230206090440.1255001-1-guoren@kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/include/asm/jump_label.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/riscv/include/asm/jump_label.h ++++ b/arch/riscv/include/asm/jump_label.h +@@ -18,6 +18,7 @@ static __always_inline bool arch_static_ + bool branch) + { + asm_volatile_goto( ++ " .align 2 \n\t" + " .option push \n\t" + " .option norelax \n\t" + " .option norvc \n\t" +@@ -39,6 +40,7 @@ static __always_inline bool arch_static_ + bool branch) + { + asm_volatile_goto( ++ " .align 2 \n\t" + " .option push \n\t" + " .option norelax \n\t" + " .option norvc \n\t" diff --git a/queue-5.15/riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch b/queue-5.15/riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch new file mode 100644 index 00000000000..e2442dcc8d9 --- /dev/null +++ b/queue-5.15/riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch @@ -0,0 +1,46 @@ +From b49f700668fff7565b945dce823def79bff59bb0 Mon Sep 17 00:00:00 2001 +From: Sergey Matyukevich +Date: Mon, 30 Jan 2023 00:18:18 +0300 +Subject: riscv: mm: fix regression due to update_mmu_cache change + +From: Sergey Matyukevich + +commit b49f700668fff7565b945dce823def79bff59bb0 upstream. + +This is a partial revert of the commit 4bd1d80efb5a ("riscv: mm: notify +remote harts about mmu cache updates"). Original commit included two +loosely related changes serving the same purpose of fixing stale TLB +entries causing user-space application crash: +- introduce deferred per-ASID TLB flush for CPUs not running the task +- switch to per-ASID TLB flush on all CPUs running the task in update_mmu_cache + +According to report and discussion in [1], the second part caused a +regression on Renesas RZ/Five SoC. For now restore the old behavior +of the update_mmu_cache. + +[1] https://lore.kernel.org/linux-riscv/20220829205219.283543-1-geomatsi@gmail.com/ + +Fixes: 4bd1d80efb5a ("riscv: mm: notify remote harts about mmu cache updates") +Reported-by: "Lad, Prabhakar" +Signed-off-by: Sergey Matyukevich +Link: trailer, so that it can be parsed with git's trailer functionality? +Reviewed-by: Conor Dooley +Link: https://lore.kernel.org/r/20230129211818.686557-1-geomatsi@gmail.com +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/include/asm/pgtable.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/riscv/include/asm/pgtable.h ++++ b/arch/riscv/include/asm/pgtable.h +@@ -386,7 +386,7 @@ static inline void update_mmu_cache(stru + * Relying on flush_tlb_fix_spurious_fault would suffice, but + * the extra traps reduce performance. So, eagerly SFENCE.VMA. + */ +- flush_tlb_page(vma, address); ++ local_flush_tlb_page(address); + } + + static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, diff --git a/queue-5.15/riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch b/queue-5.15/riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch new file mode 100644 index 00000000000..e7efc70d4c8 --- /dev/null +++ b/queue-5.15/riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch @@ -0,0 +1,54 @@ +From 416721ff05fddc58ca531b6f069de250301de6e5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= +Date: Tue, 14 Feb 2023 17:25:15 +0100 +Subject: riscv, mm: Perform BPF exhandler fixup on page fault +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Björn Töpel + +commit 416721ff05fddc58ca531b6f069de250301de6e5 upstream. + +Commit 21855cac82d3 ("riscv/mm: Prevent kernel module to access user +memory without uaccess routines") added early exits/deaths for page +faults stemming from accesses to user-space without using proper +uaccess routines (where sstatus.SUM is set). + +Unfortunatly, this is too strict for some BPF programs, which relies +on BPF exhandler fixups. These BPF programs loads "BTF pointers". A +BTF pointers could either be a valid kernel pointer or NULL, but not a +userspace address. + +Resolve the problem by calling the fixup handler in the early exit +path. + +Fixes: 21855cac82d3 ("riscv/mm: Prevent kernel module to access user memory without uaccess routines") +Signed-off-by: Björn Töpel +Link: https://lore.kernel.org/r/20230214162515.184827-1-bjorn@kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/mm/fault.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/arch/riscv/mm/fault.c ++++ b/arch/riscv/mm/fault.c +@@ -271,10 +271,12 @@ asmlinkage void do_page_fault(struct pt_ + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + +- if (!user_mode(regs) && addr < TASK_SIZE && +- unlikely(!(regs->status & SR_SUM))) +- die_kernel_fault("access to user memory without uaccess routines", +- addr, regs); ++ if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) { ++ if (fixup_exception(regs)) ++ return; ++ ++ die_kernel_fault("access to user memory without uaccess routines", addr, regs); ++ } + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + diff --git a/queue-5.15/scsi-ses-don-t-attach-if-enclosure-has-no-components.patch b/queue-5.15/scsi-ses-don-t-attach-if-enclosure-has-no-components.patch new file mode 100644 index 00000000000..2d1c56fb199 --- /dev/null +++ b/queue-5.15/scsi-ses-don-t-attach-if-enclosure-has-no-components.patch @@ -0,0 +1,41 @@ +From 3fe97ff3d94934649abb0652028dd7296170c8d0 Mon Sep 17 00:00:00 2001 +From: James Bottomley +Date: Sat, 28 Nov 2020 15:27:21 -0800 +Subject: scsi: ses: Don't attach if enclosure has no components + +From: James Bottomley + +commit 3fe97ff3d94934649abb0652028dd7296170c8d0 upstream. + +An enclosure with no components can't usefully be operated by the driver +(since effectively it has nothing to manage), so report the problem and +don't attach. Not attaching also fixes an oops which could occur if the +driver tries to manage a zero component enclosure. + +[mkp: Switched to KERN_WARNING since this scenario is common] + +Link: https://lore.kernel.org/r/c5deac044ac409e32d9ad9968ce0dcbc996bfc7a.camel@linux.ibm.com +Cc: stable@vger.kernel.org +Reported-by: Ding Hui +Signed-off-by: James Bottomley +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/ses.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/scsi/ses.c ++++ b/drivers/scsi/ses.c +@@ -704,6 +704,12 @@ static int ses_intf_add(struct device *c + type_ptr[0] == ENCLOSURE_COMPONENT_ARRAY_DEVICE) + components += type_ptr[1]; + } ++ ++ if (components == 0) { ++ sdev_printk(KERN_WARNING, sdev, "enclosure has no enumerated components\n"); ++ goto err_free; ++ } ++ + ses_dev->page1 = buf; + ses_dev->page1_len = len; + buf = NULL; diff --git a/queue-5.15/scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch b/queue-5.15/scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch new file mode 100644 index 00000000000..2f96d15cf65 --- /dev/null +++ b/queue-5.15/scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch @@ -0,0 +1,114 @@ +From db95d4df71cb55506425b6e4a5f8d68e3a765b63 Mon Sep 17 00:00:00 2001 +From: Tomas Henzl +Date: Thu, 2 Feb 2023 17:24:49 +0100 +Subject: scsi: ses: Fix possible addl_desc_ptr out-of-bounds accesses + +From: Tomas Henzl + +commit db95d4df71cb55506425b6e4a5f8d68e3a765b63 upstream. + +Sanitize possible addl_desc_ptr out-of-bounds accesses in +ses_enclosure_data_process(). + +Link: https://lore.kernel.org/r/20230202162451.15346-3-thenzl@redhat.com +Cc: stable@vger.kernel.org +Signed-off-by: Tomas Henzl +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/ses.c | 35 ++++++++++++++++++++++++++--------- + 1 file changed, 26 insertions(+), 9 deletions(-) + +--- a/drivers/scsi/ses.c ++++ b/drivers/scsi/ses.c +@@ -433,8 +433,8 @@ int ses_match_host(struct enclosure_devi + } + #endif /* 0 */ + +-static void ses_process_descriptor(struct enclosure_component *ecomp, +- unsigned char *desc) ++static int ses_process_descriptor(struct enclosure_component *ecomp, ++ unsigned char *desc, int max_desc_len) + { + int eip = desc[0] & 0x10; + int invalid = desc[0] & 0x80; +@@ -445,22 +445,32 @@ static void ses_process_descriptor(struc + unsigned char *d; + + if (invalid) +- return; ++ return 0; + + switch (proto) { + case SCSI_PROTOCOL_FCP: + if (eip) { ++ if (max_desc_len <= 7) ++ return 1; + d = desc + 4; + slot = d[3]; + } + break; + case SCSI_PROTOCOL_SAS: ++ + if (eip) { ++ if (max_desc_len <= 27) ++ return 1; + d = desc + 4; + slot = d[3]; + d = desc + 8; +- } else ++ } else { ++ if (max_desc_len <= 23) ++ return 1; + d = desc + 4; ++ } ++ ++ + /* only take the phy0 addr */ + addr = (u64)d[12] << 56 | + (u64)d[13] << 48 | +@@ -477,6 +487,8 @@ static void ses_process_descriptor(struc + } + ecomp->slot = slot; + scomp->addr = addr; ++ ++ return 0; + } + + struct efd { +@@ -549,7 +561,7 @@ static void ses_enclosure_data_process(s + /* skip past overall descriptor */ + desc_ptr += len + 4; + } +- if (ses_dev->page10) ++ if (ses_dev->page10 && ses_dev->page10_len > 9) + addl_desc_ptr = ses_dev->page10 + 8; + type_ptr = ses_dev->page1_types; + components = 0; +@@ -557,6 +569,7 @@ static void ses_enclosure_data_process(s + for (j = 0; j < type_ptr[1]; j++) { + char *name = NULL; + struct enclosure_component *ecomp; ++ int max_desc_len; + + if (desc_ptr) { + if (desc_ptr >= buf + page7_len) { +@@ -583,10 +596,14 @@ static void ses_enclosure_data_process(s + ecomp = &edev->component[components++]; + + if (!IS_ERR(ecomp)) { +- if (addl_desc_ptr) +- ses_process_descriptor( +- ecomp, +- addl_desc_ptr); ++ if (addl_desc_ptr) { ++ max_desc_len = ses_dev->page10_len - ++ (addl_desc_ptr - ses_dev->page10); ++ if (ses_process_descriptor(ecomp, ++ addl_desc_ptr, ++ max_desc_len)) ++ addl_desc_ptr = NULL; ++ } + if (create) + enclosure_component_register( + ecomp); diff --git a/queue-5.15/scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch b/queue-5.15/scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch new file mode 100644 index 00000000000..986e20ad165 --- /dev/null +++ b/queue-5.15/scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch @@ -0,0 +1,48 @@ +From 801ab13d50cf3d26170ee073ea8bb4eececb76ab Mon Sep 17 00:00:00 2001 +From: Tomas Henzl +Date: Thu, 2 Feb 2023 17:24:50 +0100 +Subject: scsi: ses: Fix possible desc_ptr out-of-bounds accesses + +From: Tomas Henzl + +commit 801ab13d50cf3d26170ee073ea8bb4eececb76ab upstream. + +Sanitize possible desc_ptr out-of-bounds accesses in +ses_enclosure_data_process(). + +Link: https://lore.kernel.org/r/20230202162451.15346-4-thenzl@redhat.com +Cc: stable@vger.kernel.org +Signed-off-by: Tomas Henzl +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/ses.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/drivers/scsi/ses.c ++++ b/drivers/scsi/ses.c +@@ -572,15 +572,19 @@ static void ses_enclosure_data_process(s + int max_desc_len; + + if (desc_ptr) { +- if (desc_ptr >= buf + page7_len) { ++ if (desc_ptr + 3 >= buf + page7_len) { + desc_ptr = NULL; + } else { + len = (desc_ptr[2] << 8) + desc_ptr[3]; + desc_ptr += 4; +- /* Add trailing zero - pushes into +- * reserved space */ +- desc_ptr[len] = '\0'; +- name = desc_ptr; ++ if (desc_ptr + len > buf + page7_len) ++ desc_ptr = NULL; ++ else { ++ /* Add trailing zero - pushes into ++ * reserved space */ ++ desc_ptr[len] = '\0'; ++ name = desc_ptr; ++ } + } + } + if (type_ptr[0] == ENCLOSURE_COMPONENT_DEVICE || diff --git a/queue-5.15/scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch b/queue-5.15/scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch new file mode 100644 index 00000000000..d4cf236032d --- /dev/null +++ b/queue-5.15/scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch @@ -0,0 +1,43 @@ +From 9b4f5028e493cb353a5c8f5c45073eeea0303abd Mon Sep 17 00:00:00 2001 +From: Tomas Henzl +Date: Thu, 2 Feb 2023 17:24:48 +0100 +Subject: scsi: ses: Fix slab-out-of-bounds in ses_enclosure_data_process() + +From: Tomas Henzl + +commit 9b4f5028e493cb353a5c8f5c45073eeea0303abd upstream. + +A fix for: + +BUG: KASAN: slab-out-of-bounds in ses_enclosure_data_process+0x949/0xe30 [ses] +Read of size 1 at addr ffff88a1b043a451 by task systemd-udevd/3271 + +Checking after (and before in next loop) addl_desc_ptr[1] is sufficient, we +expect the size to be sanitized before first access to addl_desc_ptr[1]. +Make sure we don't walk beyond end of page. + +Link: https://lore.kernel.org/r/20230202162451.15346-2-thenzl@redhat.com +Cc: stable@vger.kernel.org +Signed-off-by: Tomas Henzl +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/ses.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/ses.c ++++ b/drivers/scsi/ses.c +@@ -603,9 +603,11 @@ static void ses_enclosure_data_process(s + /* these elements are optional */ + type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_TARGET_PORT || + type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT || +- type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS)) ++ type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS)) { + addl_desc_ptr += addl_desc_ptr[1] + 2; +- ++ if (addl_desc_ptr + 1 >= ses_dev->page10 + ses_dev->page10_len) ++ addl_desc_ptr = NULL; ++ } + } + } + kfree(buf); diff --git a/queue-5.15/scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch b/queue-5.15/scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch new file mode 100644 index 00000000000..9d22b329dcf --- /dev/null +++ b/queue-5.15/scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch @@ -0,0 +1,38 @@ +From 578797f0c8cbc2e3ec5fc0dab87087b4c7073686 Mon Sep 17 00:00:00 2001 +From: Tomas Henzl +Date: Thu, 2 Feb 2023 17:24:51 +0100 +Subject: scsi: ses: Fix slab-out-of-bounds in ses_intf_remove() + +From: Tomas Henzl + +commit 578797f0c8cbc2e3ec5fc0dab87087b4c7073686 upstream. + +A fix for: + +BUG: KASAN: slab-out-of-bounds in ses_intf_remove+0x23f/0x270 [ses] +Read of size 8 at addr ffff88a10d32e5d8 by task rmmod/12013 + +When edev->components is zero, accessing edev->component[0] members is +wrong. + +Link: https://lore.kernel.org/r/20230202162451.15346-5-thenzl@redhat.com +Cc: stable@vger.kernel.org +Signed-off-by: Tomas Henzl +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/ses.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/ses.c ++++ b/drivers/scsi/ses.c +@@ -856,7 +856,8 @@ static void ses_intf_remove_enclosure(st + kfree(ses_dev->page2); + kfree(ses_dev); + +- kfree(edev->component[0].scratch); ++ if (edev->components) ++ kfree(edev->component[0].scratch); + + put_device(&edev->edev); + enclosure_unregister(edev); diff --git a/queue-5.15/series b/queue-5.15/series index ae65b288763..cf991c10feb 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -542,3 +542,25 @@ scsi-qla2xxx-fix-dma-api-call-trace-on-nvme-ls-requests.patch scsi-qla2xxx-remove-unintended-flag-clearing.patch scsi-qla2xxx-fix-erroneous-link-down.patch scsi-qla2xxx-remove-increment-of-interface-err-cnt.patch +scsi-ses-don-t-attach-if-enclosure-has-no-components.patch +scsi-ses-fix-slab-out-of-bounds-in-ses_enclosure_data_process.patch +scsi-ses-fix-possible-addl_desc_ptr-out-of-bounds-accesses.patch +scsi-ses-fix-possible-desc_ptr-out-of-bounds-accesses.patch +scsi-ses-fix-slab-out-of-bounds-in-ses_intf_remove.patch +risc-v-add-a-spin_shadow_stack-declaration.patch +riscv-mm-fix-regression-due-to-update_mmu_cache-change.patch +riscv-jump_label-fixup-unaligned-arch_static_branch-function.patch +riscv-mm-perform-bpf-exhandler-fixup-on-page-fault.patch +riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch +riscv-ftrace-reduce-the-detour-code-size-to-half.patch +mips-dts-ci20-fix-otg-power-gpio.patch +pci-pm-observe-reset-delay-irrespective-of-bridge_d3.patch +pci-hotplug-allow-marking-devices-as-disconnected-during-bind-unbind.patch +pci-avoid-flr-for-amd-fch-ahci-adapters.patch +iommu-vt-d-avoid-superfluous-iotlb-tracking-in-lazy-mode.patch +vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch +vfio-type1-track-locked_vm-per-dma.patch +vfio-type1-restore-locked_vm.patch +drm-i915-quirks-add-inverted-backlight-quirk-for-hp-14-r206nv.patch +drm-radeon-fix-edp-for-single-display-imac11-2.patch +drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch diff --git a/queue-5.15/vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch b/queue-5.15/vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch new file mode 100644 index 00000000000..f6f7453330b --- /dev/null +++ b/queue-5.15/vfio-type1-prevent-underflow-of-locked_vm-via-exec.patch @@ -0,0 +1,129 @@ +From 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 Mon Sep 17 00:00:00 2001 +From: Steve Sistare +Date: Tue, 31 Jan 2023 08:58:04 -0800 +Subject: vfio/type1: prevent underflow of locked_vm via exec() + +From: Steve Sistare + +commit 046eca5018f8a5dd1dc2cedf87fb5843b9ea3026 upstream. + +When a vfio container is preserved across exec, the task does not change, +but it gets a new mm with locked_vm=0, and loses the count from existing +dma mappings. If the user later unmaps a dma mapping, locked_vm underflows +to a large unsigned value, and a subsequent dma map request fails with +ENOMEM in __account_locked_vm. + +To avoid underflow, grab and save the mm at the time a dma is mapped. +Use that mm when adjusting locked_vm, rather than re-acquiring the saved +task's mm, which may have changed. If the saved mm is dead, do nothing. + +locked_vm is incremented for existing mappings in a subsequent patch. + +Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation") +Cc: stable@vger.kernel.org +Signed-off-by: Steve Sistare +Reviewed-by: Kevin Tian +Reviewed-by: Jason Gunthorpe +Link: https://lore.kernel.org/r/1675184289-267876-3-git-send-email-steven.sistare@oracle.com +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/vfio_iommu_type1.c | 41 +++++++++++++--------------------------- + 1 file changed, 14 insertions(+), 27 deletions(-) + +--- a/drivers/vfio/vfio_iommu_type1.c ++++ b/drivers/vfio/vfio_iommu_type1.c +@@ -100,6 +100,7 @@ struct vfio_dma { + struct task_struct *task; + struct rb_root pfn_list; /* Ex-user pinned pfn list */ + unsigned long *bitmap; ++ struct mm_struct *mm; + }; + + struct vfio_batch { +@@ -424,8 +425,8 @@ static int vfio_lock_acct(struct vfio_dm + if (!npage) + return 0; + +- mm = async ? get_task_mm(dma->task) : dma->task->mm; +- if (!mm) ++ mm = dma->mm; ++ if (async && !mmget_not_zero(mm)) + return -ESRCH; /* process exited */ + + ret = mmap_write_lock_killable(mm); +@@ -798,8 +799,8 @@ static int vfio_pin_page_external(struct + struct mm_struct *mm; + int ret; + +- mm = get_task_mm(dma->task); +- if (!mm) ++ mm = dma->mm; ++ if (!mmget_not_zero(mm)) + return -ENODEV; + + ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages); +@@ -809,7 +810,7 @@ static int vfio_pin_page_external(struct + ret = 0; + + if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { +- ret = vfio_lock_acct(dma, 1, true); ++ ret = vfio_lock_acct(dma, 1, false); + if (ret) { + put_pfn(*pfn_base, dma->prot); + if (ret == -ENOMEM) +@@ -1179,6 +1180,7 @@ static void vfio_remove_dma(struct vfio_ + vfio_unmap_unpin(iommu, dma, true); + vfio_unlink_dma(iommu, dma); + put_task_struct(dma->task); ++ mmdrop(dma->mm); + vfio_dma_bitmap_free(dma); + if (dma->vaddr_invalid) { + iommu->vaddr_invalid_count--; +@@ -1649,29 +1651,15 @@ static int vfio_dma_do_map(struct vfio_i + * against the locked memory limit and we need to be able to do both + * outside of this call path as pinning can be asynchronous via the + * external interfaces for mdev devices. RLIMIT_MEMLOCK requires a +- * task_struct and VM locked pages requires an mm_struct, however +- * holding an indefinite mm reference is not recommended, therefore we +- * only hold a reference to a task. We could hold a reference to +- * current, however QEMU uses this call path through vCPU threads, +- * which can be killed resulting in a NULL mm and failure in the unmap +- * path when called via a different thread. Avoid this problem by +- * using the group_leader as threads within the same group require +- * both CLONE_THREAD and CLONE_VM and will therefore use the same +- * mm_struct. +- * +- * Previously we also used the task for testing CAP_IPC_LOCK at the +- * time of pinning and accounting, however has_capability() makes use +- * of real_cred, a copy-on-write field, so we can't guarantee that it +- * matches group_leader, or in fact that it might not change by the +- * time it's evaluated. If a process were to call MAP_DMA with +- * CAP_IPC_LOCK but later drop it, it doesn't make sense that they +- * possibly see different results for an iommu_mapped vfio_dma vs +- * externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the +- * time of calling MAP_DMA. ++ * task_struct. Save the group_leader so that all DMA tracking uses ++ * the same task, to make debugging easier. VM locked pages requires ++ * an mm_struct, so grab the mm in case the task dies. + */ + get_task_struct(current->group_leader); + dma->task = current->group_leader; + dma->lock_cap = capable(CAP_IPC_LOCK); ++ dma->mm = current->mm; ++ mmgrab(dma->mm); + + dma->pfn_list = RB_ROOT; + +@@ -3168,9 +3156,8 @@ static int vfio_iommu_type1_dma_rw_chunk + !(dma->prot & IOMMU_READ)) + return -EPERM; + +- mm = get_task_mm(dma->task); +- +- if (!mm) ++ mm = dma->mm; ++ if (!mmget_not_zero(mm)) + return -EPERM; + + if (kthread) diff --git a/queue-5.15/vfio-type1-restore-locked_vm.patch b/queue-5.15/vfio-type1-restore-locked_vm.patch new file mode 100644 index 00000000000..f07e544ad1c --- /dev/null +++ b/queue-5.15/vfio-type1-restore-locked_vm.patch @@ -0,0 +1,82 @@ +From 90fdd158a695d70403163f9a0e4efc5b20f3fd3e Mon Sep 17 00:00:00 2001 +From: Steve Sistare +Date: Tue, 31 Jan 2023 08:58:06 -0800 +Subject: vfio/type1: restore locked_vm + +From: Steve Sistare + +commit 90fdd158a695d70403163f9a0e4efc5b20f3fd3e upstream. + +When a vfio container is preserved across exec or fork-exec, the new +task's mm has a locked_vm count of 0. After a dma vaddr is updated using +VFIO_DMA_MAP_FLAG_VADDR, locked_vm remains 0, and the pinned memory does +not count against the task's RLIMIT_MEMLOCK. + +To restore the correct locked_vm count, when VFIO_DMA_MAP_FLAG_VADDR is +used and the dma's mm has changed, add the dma's locked_vm count to +the new mm->locked_vm, subject to the rlimit, and subtract it from the +old mm->locked_vm. + +Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr") +Cc: stable@vger.kernel.org +Signed-off-by: Steve Sistare +Reviewed-by: Kevin Tian +Reviewed-by: Jason Gunthorpe +Link: https://lore.kernel.org/r/1675184289-267876-5-git-send-email-steven.sistare@oracle.com +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/vfio_iommu_type1.c | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +--- a/drivers/vfio/vfio_iommu_type1.c ++++ b/drivers/vfio/vfio_iommu_type1.c +@@ -1576,6 +1576,38 @@ static bool vfio_iommu_iova_dma_valid(st + return list_empty(iova); + } + ++static int vfio_change_dma_owner(struct vfio_dma *dma) ++{ ++ struct task_struct *task = current->group_leader; ++ struct mm_struct *mm = current->mm; ++ long npage = dma->locked_vm; ++ bool lock_cap; ++ int ret; ++ ++ if (mm == dma->mm) ++ return 0; ++ ++ lock_cap = capable(CAP_IPC_LOCK); ++ ret = mm_lock_acct(task, mm, lock_cap, npage); ++ if (ret) ++ return ret; ++ ++ if (mmget_not_zero(dma->mm)) { ++ mm_lock_acct(dma->task, dma->mm, dma->lock_cap, -npage); ++ mmput(dma->mm); ++ } ++ ++ if (dma->task != task) { ++ put_task_struct(dma->task); ++ dma->task = get_task_struct(task); ++ } ++ mmdrop(dma->mm); ++ dma->mm = mm; ++ mmgrab(dma->mm); ++ dma->lock_cap = lock_cap; ++ return 0; ++} ++ + static int vfio_dma_do_map(struct vfio_iommu *iommu, + struct vfio_iommu_type1_dma_map *map) + { +@@ -1625,6 +1657,9 @@ static int vfio_dma_do_map(struct vfio_i + dma->size != size) { + ret = -EINVAL; + } else { ++ ret = vfio_change_dma_owner(dma); ++ if (ret) ++ goto out_unlock; + dma->vaddr = vaddr; + dma->vaddr_invalid = false; + iommu->vaddr_invalid_count--; diff --git a/queue-5.15/vfio-type1-track-locked_vm-per-dma.patch b/queue-5.15/vfio-type1-track-locked_vm-per-dma.patch new file mode 100644 index 00000000000..fe81d820056 --- /dev/null +++ b/queue-5.15/vfio-type1-track-locked_vm-per-dma.patch @@ -0,0 +1,70 @@ +From 18e292705ba21cc9b3227b9ad5b1c28973605ee5 Mon Sep 17 00:00:00 2001 +From: Steve Sistare +Date: Tue, 31 Jan 2023 08:58:05 -0800 +Subject: vfio/type1: track locked_vm per dma + +From: Steve Sistare + +commit 18e292705ba21cc9b3227b9ad5b1c28973605ee5 upstream. + +Track locked_vm per dma struct, and create a new subroutine, both for use +in a subsequent patch. No functional change. + +Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr") +Cc: stable@vger.kernel.org +Signed-off-by: Steve Sistare +Reviewed-by: Kevin Tian +Reviewed-by: Jason Gunthorpe +Link: https://lore.kernel.org/r/1675184289-267876-4-git-send-email-steven.sistare@oracle.com +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/vfio_iommu_type1.c | 23 +++++++++++++++++------ + 1 file changed, 17 insertions(+), 6 deletions(-) + +--- a/drivers/vfio/vfio_iommu_type1.c ++++ b/drivers/vfio/vfio_iommu_type1.c +@@ -101,6 +101,7 @@ struct vfio_dma { + struct rb_root pfn_list; /* Ex-user pinned pfn list */ + unsigned long *bitmap; + struct mm_struct *mm; ++ size_t locked_vm; + }; + + struct vfio_batch { +@@ -417,6 +418,19 @@ static int vfio_iova_put_vfio_pfn(struct + return ret; + } + ++static int mm_lock_acct(struct task_struct *task, struct mm_struct *mm, ++ bool lock_cap, long npage) ++{ ++ int ret = mmap_write_lock_killable(mm); ++ ++ if (ret) ++ return ret; ++ ++ ret = __account_locked_vm(mm, abs(npage), npage > 0, task, lock_cap); ++ mmap_write_unlock(mm); ++ return ret; ++} ++ + static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async) + { + struct mm_struct *mm; +@@ -429,12 +443,9 @@ static int vfio_lock_acct(struct vfio_dm + if (async && !mmget_not_zero(mm)) + return -ESRCH; /* process exited */ + +- ret = mmap_write_lock_killable(mm); +- if (!ret) { +- ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task, +- dma->lock_cap); +- mmap_write_unlock(mm); +- } ++ ret = mm_lock_acct(dma->task, mm, dma->lock_cap, npage); ++ if (!ret) ++ dma->locked_vm += npage; + + if (async) + mmput(mm);