4.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 8 Jan 2024 12:39:49 +0000 (13:39 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 8 Jan 2024 12:39:49 +0000 (13:39 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 8 Jan 2024 12:39:49 +0000 (13:39 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 8 Jan 2024 12:39:49 +0000 (13:39 +0100)
diff --git a/queue-4.19/firewire-ohci-suppress-unexpected-system-reboot-in-amd-ryzen-machines-and-asm108x-vt630x-pcie-cards.patch b/queue-4.19/firewire-ohci-suppress-unexpected-system-reboot-in-amd-ryzen-machines-and-asm108x-vt630x-pcie-cards.patch

new file mode 100644 (file)

index 0000000..e595cd8
--- /dev/null
+++ b/queue-4.19/firewire-ohci-suppress-unexpected-system-reboot-in-amd-ryzen-machines-and-asm108x-vt630x-pcie-cards.patch
@@ -0,0 +1,125 @@
+From ac9184fbb8478dab4a0724b279f94956b69be827 Mon Sep 17 00:00:00 2001
+From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
+Date: Tue, 2 Jan 2024 20:01:50 +0900
+Subject: firewire: ohci: suppress unexpected system reboot in AMD Ryzen machines and ASM108x/VT630x PCIe cards
+
+From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
+
+commit ac9184fbb8478dab4a0724b279f94956b69be827 upstream.
+
+VIA VT6306/6307/6308 provides PCI interface compliant to 1394 OHCI. When
+the hardware is combined with Asmedia ASM1083/1085 PCIe-to-PCI bus bridge,
+it appears that accesses to its 'Isochronous Cycle Timer' register (offset
+0xf0 on PCI memory space) often causes unexpected system reboot in any
+type of AMD Ryzen machine (both 0x17 and 0x19 families). It does not
+appears in the other type of machine (AMD pre-Ryzen machine, Intel
+machine, at least), or in the other OHCI 1394 hardware (e.g. Texas
+Instruments).
+
+The issue explicitly appears at a commit dcadfd7f7c74 ("firewire: core:
+use union for callback of transaction completion") added to v6.5 kernel.
+It changed 1394 OHCI driver to access to the register every time to
+dispatch local asynchronous transaction. However, the issue exists in
+older version of kernel as long as it runs in AMD Ryzen machine, since
+the access to the register is required to maintain bus time. It is not
+hard to imagine that users experience the unexpected system reboot when
+generating bus reset by plugging any devices in, or reading the register
+by time-aware application programs; e.g. audio sample processing.
+
+This commit suppresses the unexpected system reboot in the combination of
+hardware. It avoids the access itself. As a result, the software stack can
+not provide the hardware time anymore to unit drivers, userspace
+applications, and nodes in the same IEEE 1394 bus. It brings apparent
+disadvantage since time-aware application programs require it, while
+time-unaware applications are available again; e.g. sbp2.
+
+Cc: stable@vger.kernel.org
+Reported-by: Jiri Slaby <jirislaby@kernel.org>
+Closes: https://bugzilla.suse.com/show_bug.cgi?id=1215436
+Reported-by: Mario Limonciello <mario.limonciello@amd.com>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217994
+Reported-by: Tobias Gruetzmacher <tobias-lists@23.gs>
+Closes: https://sourceforge.net/p/linux1394/mailman/message/58711901/
+Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2240973
+Closes: https://bugs.launchpad.net/linux/+bug/2043905
+Link: https://lore.kernel.org/r/20240102110150.244475-1-o-takashi@sakamocchi.jp
+Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firewire/ohci.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 51 insertions(+)
+
+--- a/drivers/firewire/ohci.c
++++ b/drivers/firewire/ohci.c
+@@ -292,6 +292,51 @@ static char ohci_driver_name[] = KBUILD_
+ #define QUIRK_TI_SLLZ059              0x20
+ #define QUIRK_IR_WAKE                 0x40
+ 
++// On PCI Express Root Complex in any type of AMD Ryzen machine, VIA VT6306/6307/6308 with Asmedia
++// ASM1083/1085 brings an inconvenience that the read accesses to 'Isochronous Cycle Timer' register
++// (at offset 0xf0 in PCI I/O space) often causes unexpected system reboot. The mechanism is not
++// clear, since the read access to the other registers is enough safe; e.g. 'Node ID' register,
++// while it is probable due to detection of any type of PCIe error.
++#define QUIRK_REBOOT_BY_CYCLE_TIMER_READ      0x80000000
++
++#if IS_ENABLED(CONFIG_X86)
++
++static bool has_reboot_by_cycle_timer_read_quirk(const struct fw_ohci *ohci)
++{
++      return !!(ohci->quirks & QUIRK_REBOOT_BY_CYCLE_TIMER_READ);
++}
++
++#define PCI_DEVICE_ID_ASMEDIA_ASM108X 0x1080
++
++static bool detect_vt630x_with_asm1083_on_amd_ryzen_machine(const struct pci_dev *pdev)
++{
++      const struct pci_dev *pcie_to_pci_bridge;
++
++      // Detect any type of AMD Ryzen machine.
++      if (!static_cpu_has(X86_FEATURE_ZEN))
++              return false;
++
++      // Detect VIA VT6306/6307/6308.
++      if (pdev->vendor != PCI_VENDOR_ID_VIA)
++              return false;
++      if (pdev->device != PCI_DEVICE_ID_VIA_VT630X)
++              return false;
++
++      // Detect Asmedia ASM1083/1085.
++      pcie_to_pci_bridge = pdev->bus->self;
++      if (pcie_to_pci_bridge->vendor != PCI_VENDOR_ID_ASMEDIA)
++              return false;
++      if (pcie_to_pci_bridge->device != PCI_DEVICE_ID_ASMEDIA_ASM108X)
++              return false;
++
++      return true;
++}
++
++#else
++#define has_reboot_by_cycle_timer_read_quirk(ohci) false
++#define detect_vt630x_with_asm1083_on_amd_ryzen_machine(pdev) false
++#endif
++
+ /* In case of multiple matches in ohci_quirks[], only the first one is used. */
+ static const struct {
+       unsigned short vendor, device, revision, flags;
+@@ -1730,6 +1775,9 @@ static u32 get_cycle_time(struct fw_ohci
+       s32 diff01, diff12;
+       int i;
+ 
++      if (has_reboot_by_cycle_timer_read_quirk(ohci))
++              return 0;
++
+       c2 = reg_read(ohci, OHCI1394_IsochronousCycleTimer);
+ 
+       if (ohci->quirks & QUIRK_CYCLE_TIMER) {
+@@ -3633,6 +3681,9 @@ static int pci_probe(struct pci_dev *dev
+       if (param_quirks)
+               ohci->quirks = param_quirks;
+ 
++      if (detect_vt630x_with_asm1083_on_amd_ryzen_machine(dev))
++              ohci->quirks |= QUIRK_REBOOT_BY_CYCLE_TIMER_READ;
++
+       /*
+        * Because dma_alloc_coherent() allocates at least one page,
+        * we save space by using a common buffer for the AR request/
diff --git a/queue-4.19/mm-fix-unmap_mapping_range-high-bits-shift-bug.patch b/queue-4.19/mm-fix-unmap_mapping_range-high-bits-shift-bug.patch

new file mode 100644 (file)

index 0000000..e01bbf2
--- /dev/null
+++ b/queue-4.19/mm-fix-unmap_mapping_range-high-bits-shift-bug.patch
@@ -0,0 +1,81 @@
+From 9eab0421fa94a3dde0d1f7e36ab3294fc306c99d Mon Sep 17 00:00:00 2001
+From: Jiajun Xie <jiajun.xie.sh@gmail.com>
+Date: Wed, 20 Dec 2023 13:28:39 +0800
+Subject: mm: fix unmap_mapping_range high bits shift bug
+
+From: Jiajun Xie <jiajun.xie.sh@gmail.com>
+
+commit 9eab0421fa94a3dde0d1f7e36ab3294fc306c99d upstream.
+
+The bug happens when highest bit of holebegin is 1, suppose holebegin is
+0x8000000111111000, after shift, hba would be 0xfff8000000111111, then
+vma_interval_tree_foreach would look it up fail or leads to the wrong
+result.
+
+error call seq e.g.:
+- mmap(..., offset=0x8000000111111000)
+  |- syscall(mmap, ... unsigned long, off):
+     |- ksys_mmap_pgoff( ... , off >> PAGE_SHIFT);
+
+  here pgoff is correctly shifted to 0x8000000111111,
+  but pass 0x8000000111111000 as holebegin to unmap
+  would then cause terrible result, as shown below:
+
+- unmap_mapping_range(..., loff_t const holebegin)
+  |- pgoff_t hba = holebegin >> PAGE_SHIFT;
+          /* hba = 0xfff8000000111111 unexpectedly */
+
+The issue happens in Heterogeneous computing, where the device(e.g.
+gpu) and host share the same virtual address space.
+
+A simple workflow pattern which hit the issue is:
+        /* host */
+    1. userspace first mmap a file backed VA range with specified offset.
+                        e.g. (offset=0x800..., mmap return: va_a)
+    2. write some data to the corresponding sys page
+                         e.g. (va_a = 0xAABB)
+        /* device */
+    3. gpu workload touches VA, triggers gpu fault and notify the host.
+        /* host */
+    4. reviced gpu fault notification, then it will:
+            4.1 unmap host pages and also takes care of cpu tlb
+                  (use unmap_mapping_range with offset=0x800...)
+            4.2 migrate sys page to device
+            4.3 setup device page table and resolve device fault.
+        /* device */
+    5. gpu workload continued, it accessed va_a and got 0xAABB.
+    6. gpu workload continued, it wrote 0xBBCC to va_a.
+        /* host */
+    7. userspace access va_a, as expected, it will:
+            7.1 trigger cpu vm fault.
+            7.2 driver handling fault to migrate gpu local page to host.
+    8. userspace then could correctly get 0xBBCC from va_a
+    9. done
+
+But in step 4.1, if we hit the bug this patch mentioned, then userspace
+would never trigger cpu fault, and still get the old value: 0xAABB.
+
+Making holebegin unsigned first fixes the bug.
+
+Link: https://lkml.kernel.org/r/20231220052839.26970-1-jiajun.xie.sh@gmail.com
+Signed-off-by: Jiajun Xie <jiajun.xie.sh@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3042,8 +3042,8 @@ void unmap_mapping_pages(struct address_
+ void unmap_mapping_range(struct address_space *mapping,
+               loff_t const holebegin, loff_t const holelen, int even_cows)
+ {
+-      pgoff_t hba = holebegin >> PAGE_SHIFT;
+-      pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
++      pgoff_t hba = (pgoff_t)(holebegin) >> PAGE_SHIFT;
++      pgoff_t hlen = ((pgoff_t)(holelen) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ 
+       /* Check for overflow. */
+       if (sizeof(holelen) > sizeof(hlen)) {
diff --git a/queue-4.19/series b/queue-4.19/series

index dfb64c927d673119912145d83d9bd49748c4cb18..ea26c3942beae89bddf718b42a9be627f69fc42f 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -11,3 +11,5 @@ net-qla3xxx-fix-potential-memleak-in-ql_alloc_buffer.patch
  asix-add-check-for-usbnet_get_endpoints.patch
  bnxt_en-remove-mis-applied-code-from-bnxt_cfg_ntp_fi.patch
  mm-memory-failure-check-the-mapcount-of-the-precise-.patch
+firewire-ohci-suppress-unexpected-system-reboot-in-amd-ryzen-machines-and-asm108x-vt630x-pcie-cards.patch
+mm-fix-unmap_mapping_range-high-bits-shift-bug.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 8 Jan 2024 12:39:49 +0000 (13:39 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 8 Jan 2024 12:39:49 +0000 (13:39 +0100)
queue-4.19/firewire-ohci-suppress-unexpected-system-reboot-in-amd-ryzen-machines-and-asm108x-vt630x-pcie-cards.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/mm-fix-unmap_mapping_range-high-bits-shift-bug.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history