]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 25 Apr 2022 10:27:35 +0000 (12:27 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 25 Apr 2022 10:27:35 +0000 (12:27 +0200)
added patches:
ata-pata_marvell-check-the-bmdma_addr-beforing-reading.patch
dma-at_xdmac-fix-a-missing-check-on-list-iterator.patch
edac-synopsys-read-the-error-count-from-the-correct-register.patch
mm-hugetlb-allow-for-high-userspace-addresses.patch
mm-mmu_notifier.c-fix-race-in-mmu_interval_notifier_remove.patch
net-atlantic-invert-deep-par-in-pm-functions-preventing-null-derefs.patch
oom_kill.c-futex-delay-the-oom-reaper-to-allow-time-for-proper-futex-cleanup.patch

queue-5.10/ata-pata_marvell-check-the-bmdma_addr-beforing-reading.patch [new file with mode: 0644]
queue-5.10/dma-at_xdmac-fix-a-missing-check-on-list-iterator.patch [new file with mode: 0644]
queue-5.10/edac-synopsys-read-the-error-count-from-the-correct-register.patch [new file with mode: 0644]
queue-5.10/mm-hugetlb-allow-for-high-userspace-addresses.patch [new file with mode: 0644]
queue-5.10/mm-mmu_notifier.c-fix-race-in-mmu_interval_notifier_remove.patch [new file with mode: 0644]
queue-5.10/net-atlantic-invert-deep-par-in-pm-functions-preventing-null-derefs.patch [new file with mode: 0644]
queue-5.10/oom_kill.c-futex-delay-the-oom-reaper-to-allow-time-for-proper-futex-cleanup.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/ata-pata_marvell-check-the-bmdma_addr-beforing-reading.patch b/queue-5.10/ata-pata_marvell-check-the-bmdma_addr-beforing-reading.patch
new file mode 100644 (file)
index 0000000..c79f199
--- /dev/null
@@ -0,0 +1,40 @@
+From aafa9f958342db36c17ac2a7f1b841032c96feb4 Mon Sep 17 00:00:00 2001
+From: Zheyu Ma <zheyuma97@gmail.com>
+Date: Thu, 21 Apr 2022 09:39:20 +0800
+Subject: ata: pata_marvell: Check the 'bmdma_addr' beforing reading
+
+From: Zheyu Ma <zheyuma97@gmail.com>
+
+commit aafa9f958342db36c17ac2a7f1b841032c96feb4 upstream.
+
+Before detecting the cable type on the dma bar, the driver should check
+whether the 'bmdma_addr' is zero, which means the adapter does not
+support DMA, otherwise we will get the following error:
+
+[    5.146634] Bad IO access at port 0x1 (return inb(port))
+[    5.147206] WARNING: CPU: 2 PID: 303 at lib/iomap.c:44 ioread8+0x4a/0x60
+[    5.150856] RIP: 0010:ioread8+0x4a/0x60
+[    5.160238] Call Trace:
+[    5.160470]  <TASK>
+[    5.160674]  marvell_cable_detect+0x6e/0xc0 [pata_marvell]
+[    5.161728]  ata_eh_recover+0x3520/0x6cc0
+[    5.168075]  ata_do_eh+0x49/0x3c0
+
+Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/pata_marvell.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/ata/pata_marvell.c
++++ b/drivers/ata/pata_marvell.c
+@@ -83,6 +83,8 @@ static int marvell_cable_detect(struct a
+       switch(ap->port_no)
+       {
+       case 0:
++              if (!ap->ioaddr.bmdma_addr)
++                      return ATA_CBL_PATA_UNK;
+               if (ioread8(ap->ioaddr.bmdma_addr + 1) & 1)
+                       return ATA_CBL_PATA40;
+               return ATA_CBL_PATA80;
diff --git a/queue-5.10/dma-at_xdmac-fix-a-missing-check-on-list-iterator.patch b/queue-5.10/dma-at_xdmac-fix-a-missing-check-on-list-iterator.patch
new file mode 100644 (file)
index 0000000..25b5d4d
--- /dev/null
@@ -0,0 +1,57 @@
+From 206680c4e46b62fd8909385e0874a36952595b85 Mon Sep 17 00:00:00 2001
+From: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+Date: Sun, 27 Mar 2022 14:11:54 +0800
+Subject: dma: at_xdmac: fix a missing check on list iterator
+
+From: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+
+commit 206680c4e46b62fd8909385e0874a36952595b85 upstream.
+
+The bug is here:
+       __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue);
+
+The list iterator 'desc' will point to a bogus position containing
+HEAD if the list is empty or no element is found. To avoid dev_dbg()
+prints a invalid address, use a new variable 'iter' as the list
+iterator, while use the origin variable 'desc' as a dedicated
+pointer to point to the found element.
+
+Cc: stable@vger.kernel.org
+Fixes: 82e2424635f4c ("dmaengine: xdmac: fix print warning on dma_addr_t variable")
+Signed-off-by: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+Link: https://lore.kernel.org/r/20220327061154.4867-1-xiam0nd.tong@gmail.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma/at_xdmac.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/drivers/dma/at_xdmac.c
++++ b/drivers/dma/at_xdmac.c
+@@ -1390,7 +1390,7 @@ at_xdmac_tx_status(struct dma_chan *chan
+ {
+       struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
+       struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
+-      struct at_xdmac_desc    *desc, *_desc;
++      struct at_xdmac_desc    *desc, *_desc, *iter;
+       struct list_head        *descs_list;
+       enum dma_status         ret;
+       int                     residue, retry;
+@@ -1505,11 +1505,13 @@ at_xdmac_tx_status(struct dma_chan *chan
+        * microblock.
+        */
+       descs_list = &desc->descs_list;
+-      list_for_each_entry_safe(desc, _desc, descs_list, desc_node) {
+-              dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg);
+-              residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth;
+-              if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
++      list_for_each_entry_safe(iter, _desc, descs_list, desc_node) {
++              dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg);
++              residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth;
++              if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) {
++                      desc = iter;
+                       break;
++              }
+       }
+       residue += cur_ubc << dwidth;
diff --git a/queue-5.10/edac-synopsys-read-the-error-count-from-the-correct-register.patch b/queue-5.10/edac-synopsys-read-the-error-count-from-the-correct-register.patch
new file mode 100644 (file)
index 0000000..7d6c370
--- /dev/null
@@ -0,0 +1,61 @@
+From e2932d1f6f055b2af2114c7e64a26dc1b5593d0c Mon Sep 17 00:00:00 2001
+From: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
+Date: Thu, 14 Apr 2022 15:58:13 +0530
+Subject: EDAC/synopsys: Read the error count from the correct register
+
+From: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
+
+commit e2932d1f6f055b2af2114c7e64a26dc1b5593d0c upstream.
+
+Currently, the error count is read wrongly from the status register. Read
+the count from the proper error count register (ERRCNT).
+
+  [ bp: Massage. ]
+
+Fixes: b500b4a029d5 ("EDAC, synopsys: Add ECC support for ZynqMP DDR controller")
+Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Michal Simek <michal.simek@xilinx.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220414102813.4468-1-shubhrajyoti.datta@xilinx.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/edac/synopsys_edac.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/drivers/edac/synopsys_edac.c
++++ b/drivers/edac/synopsys_edac.c
+@@ -163,6 +163,11 @@
+ #define ECC_STAT_CECNT_SHIFT          8
+ #define ECC_STAT_BITNUM_MASK          0x7F
++/* ECC error count register definitions */
++#define ECC_ERRCNT_UECNT_MASK         0xFFFF0000
++#define ECC_ERRCNT_UECNT_SHIFT                16
++#define ECC_ERRCNT_CECNT_MASK         0xFFFF
++
+ /* DDR QOS Interrupt register definitions */
+ #define DDR_QOS_IRQ_STAT_OFST         0x20200
+ #define DDR_QOSUE_MASK                        0x4
+@@ -418,15 +423,16 @@ static int zynqmp_get_error_info(struct
+       base = priv->baseaddr;
+       p = &priv->stat;
++      regval = readl(base + ECC_ERRCNT_OFST);
++      p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK;
++      p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT;
++      if (!p->ce_cnt)
++              goto ue_err;
++
+       regval = readl(base + ECC_STAT_OFST);
+       if (!regval)
+               return 1;
+-      p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT;
+-      p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT;
+-      if (!p->ce_cnt)
+-              goto ue_err;
+-
+       p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK);
+       regval = readl(base + ECC_CEADDR0_OFST);
diff --git a/queue-5.10/mm-hugetlb-allow-for-high-userspace-addresses.patch b/queue-5.10/mm-hugetlb-allow-for-high-userspace-addresses.patch
new file mode 100644 (file)
index 0000000..50541d6
--- /dev/null
@@ -0,0 +1,145 @@
+From 5f24d5a579d1eace79d505b148808a850b417d4c Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Thu, 21 Apr 2022 16:35:46 -0700
+Subject: mm, hugetlb: allow for "high" userspace addresses
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 5f24d5a579d1eace79d505b148808a850b417d4c upstream.
+
+This is a fix for commit f6795053dac8 ("mm: mmap: Allow for "high"
+userspace addresses") for hugetlb.
+
+This patch adds support for "high" userspace addresses that are
+optionally supported on the system and have to be requested via a hint
+mechanism ("high" addr parameter to mmap).
+
+Architectures such as powerpc and x86 achieve this by making changes to
+their architectural versions of hugetlb_get_unmapped_area() function.
+However, arm64 uses the generic version of that function.
+
+So take into account arch_get_mmap_base() and arch_get_mmap_end() in
+hugetlb_get_unmapped_area().  To allow that, move those two macros out
+of mm/mmap.c into include/linux/sched/mm.h
+
+If these macros are not defined in architectural code then they default
+to (TASK_SIZE) and (base) so should not introduce any behavioural
+changes to architectures that do not define them.
+
+For the time being, only ARM64 is affected by this change.
+
+Catalin (ARM64) said
+ "We should have fixed hugetlb_get_unmapped_area() as well when we added
+  support for 52-bit VA. The reason for commit f6795053dac8 was to
+  prevent normal mmap() from returning addresses above 48-bit by default
+  as some user-space had hard assumptions about this.
+
+  It's a slight ABI change if you do this for hugetlb_get_unmapped_area()
+  but I doubt anyone would notice. It's more likely that the current
+  behaviour would cause issues, so I'd rather have them consistent.
+
+  Basically when arm64 gained support for 52-bit addresses we did not
+  want user-space calling mmap() to suddenly get such high addresses,
+  otherwise we could have inadvertently broken some programs (similar
+  behaviour to x86 here). Hence we added commit f6795053dac8. But we
+  missed hugetlbfs which could still get such high mmap() addresses. So
+  in theory that's a potential regression that should have bee addressed
+  at the same time as commit f6795053dac8 (and before arm64 enabled
+  52-bit addresses)"
+
+Link: https://lkml.kernel.org/r/ab847b6edb197bffdfe189e70fb4ac76bfe79e0d.1650033747.git.christophe.leroy@csgroup.eu
+Fixes: f6795053dac8 ("mm: mmap: Allow for "high" userspace addresses")
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Steve Capper <steve.capper@arm.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: <stable@vger.kernel.org>   [5.0.x]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c     |    9 +++++----
+ include/linux/sched/mm.h |    8 ++++++++
+ mm/mmap.c                |    8 --------
+ 3 files changed, 13 insertions(+), 12 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -206,7 +206,7 @@ hugetlb_get_unmapped_area_bottomup(struc
+       info.flags = 0;
+       info.length = len;
+       info.low_limit = current->mm->mmap_base;
+-      info.high_limit = TASK_SIZE;
++      info.high_limit = arch_get_mmap_end(addr);
+       info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+       info.align_offset = 0;
+       return vm_unmapped_area(&info);
+@@ -222,7 +222,7 @@ hugetlb_get_unmapped_area_topdown(struct
+       info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+       info.length = len;
+       info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+-      info.high_limit = current->mm->mmap_base;
++      info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+       info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+       info.align_offset = 0;
+       addr = vm_unmapped_area(&info);
+@@ -237,7 +237,7 @@ hugetlb_get_unmapped_area_topdown(struct
+               VM_BUG_ON(addr != -ENOMEM);
+               info.flags = 0;
+               info.low_limit = current->mm->mmap_base;
+-              info.high_limit = TASK_SIZE;
++              info.high_limit = arch_get_mmap_end(addr);
+               addr = vm_unmapped_area(&info);
+       }
+@@ -251,6 +251,7 @@ hugetlb_get_unmapped_area(struct file *f
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       struct hstate *h = hstate_file(file);
++      const unsigned long mmap_end = arch_get_mmap_end(addr);
+       if (len & ~huge_page_mask(h))
+               return -EINVAL;
+@@ -266,7 +267,7 @@ hugetlb_get_unmapped_area(struct file *f
+       if (addr) {
+               addr = ALIGN(addr, huge_page_size(h));
+               vma = find_vma(mm, addr);
+-              if (TASK_SIZE - len >= addr &&
++              if (mmap_end - len >= addr &&
+                   (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+--- a/include/linux/sched/mm.h
++++ b/include/linux/sched/mm.h
+@@ -106,6 +106,14 @@ static inline void mm_update_next_owner(
+ #endif /* CONFIG_MEMCG */
+ #ifdef CONFIG_MMU
++#ifndef arch_get_mmap_end
++#define arch_get_mmap_end(addr)       (TASK_SIZE)
++#endif
++
++#ifndef arch_get_mmap_base
++#define arch_get_mmap_base(addr, base) (base)
++#endif
++
+ extern void arch_pick_mmap_layout(struct mm_struct *mm,
+                                 struct rlimit *rlim_stack);
+ extern unsigned long
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -2140,14 +2140,6 @@ unsigned long vm_unmapped_area(struct vm
+       return addr;
+ }
+-#ifndef arch_get_mmap_end
+-#define arch_get_mmap_end(addr)       (TASK_SIZE)
+-#endif
+-
+-#ifndef arch_get_mmap_base
+-#define arch_get_mmap_base(addr, base) (base)
+-#endif
+-
+ /* Get an address range which is currently unmapped.
+  * For shmat() with addr=0.
+  *
diff --git a/queue-5.10/mm-mmu_notifier.c-fix-race-in-mmu_interval_notifier_remove.patch b/queue-5.10/mm-mmu_notifier.c-fix-race-in-mmu_interval_notifier_remove.patch
new file mode 100644 (file)
index 0000000..2750226
--- /dev/null
@@ -0,0 +1,84 @@
+From 319561669a59d8e9206ab311ae5433ef92fd79d1 Mon Sep 17 00:00:00 2001
+From: Alistair Popple <apopple@nvidia.com>
+Date: Thu, 21 Apr 2022 16:36:10 -0700
+Subject: mm/mmu_notifier.c: fix race in mmu_interval_notifier_remove()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alistair Popple <apopple@nvidia.com>
+
+commit 319561669a59d8e9206ab311ae5433ef92fd79d1 upstream.
+
+In some cases it is possible for mmu_interval_notifier_remove() to race
+with mn_tree_inv_end() allowing it to return while the notifier data
+structure is still in use.  Consider the following sequence:
+
+  CPU0 - mn_tree_inv_end()            CPU1 - mmu_interval_notifier_remove()
+  ----------------------------------- ------------------------------------
+                                      spin_lock(subscriptions->lock);
+                                      seq = subscriptions->invalidate_seq;
+  spin_lock(subscriptions->lock);     spin_unlock(subscriptions->lock);
+  subscriptions->invalidate_seq++;
+                                      wait_event(invalidate_seq != seq);
+                                      return;
+  interval_tree_remove(interval_sub); kfree(interval_sub);
+  spin_unlock(subscriptions->lock);
+  wake_up_all();
+
+As the wait_event() condition is true it will return immediately.  This
+can lead to use-after-free type errors if the caller frees the data
+structure containing the interval notifier subscription while it is
+still on a deferred list.  Fix this by taking the appropriate lock when
+reading invalidate_seq to ensure proper synchronisation.
+
+I observed this whilst running stress testing during some development.
+You do have to be pretty unlucky, but it leads to the usual problems of
+use-after-free (memory corruption, kernel crash, difficult to diagnose
+WARN_ON, etc).
+
+Link: https://lkml.kernel.org/r/20220420043734.476348-1-apopple@nvidia.com
+Fixes: 99cb252f5e68 ("mm/mmu_notifier: add an interval tree notifier")
+Signed-off-by: Alistair Popple <apopple@nvidia.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Cc: Christian König <christian.koenig@amd.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mmu_notifier.c |   14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/mm/mmu_notifier.c
++++ b/mm/mmu_notifier.c
+@@ -1043,6 +1043,18 @@ int mmu_interval_notifier_insert_locked(
+ }
+ EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked);
++static bool
++mmu_interval_seq_released(struct mmu_notifier_subscriptions *subscriptions,
++                        unsigned long seq)
++{
++      bool ret;
++
++      spin_lock(&subscriptions->lock);
++      ret = subscriptions->invalidate_seq != seq;
++      spin_unlock(&subscriptions->lock);
++      return ret;
++}
++
+ /**
+  * mmu_interval_notifier_remove - Remove a interval notifier
+  * @interval_sub: Interval subscription to unregister
+@@ -1090,7 +1102,7 @@ void mmu_interval_notifier_remove(struct
+       lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+       if (seq)
+               wait_event(subscriptions->wq,
+-                         READ_ONCE(subscriptions->invalidate_seq) != seq);
++                         mmu_interval_seq_released(subscriptions, seq));
+       /* pairs with mmgrab in mmu_interval_notifier_insert() */
+       mmdrop(mm);
diff --git a/queue-5.10/net-atlantic-invert-deep-par-in-pm-functions-preventing-null-derefs.patch b/queue-5.10/net-atlantic-invert-deep-par-in-pm-functions-preventing-null-derefs.patch
new file mode 100644 (file)
index 0000000..8f4ebbb
--- /dev/null
@@ -0,0 +1,95 @@
+From cbe6c3a8f8f4315b96e46e1a1c70393c06d95a4c Mon Sep 17 00:00:00 2001
+From: Manuel Ullmann <labre@posteo.de>
+Date: Mon, 18 Apr 2022 00:20:01 +0200
+Subject: net: atlantic: invert deep par in pm functions, preventing null derefs
+
+From: Manuel Ullmann <labre@posteo.de>
+
+commit cbe6c3a8f8f4315b96e46e1a1c70393c06d95a4c upstream.
+
+This will reset deeply on freeze and thaw instead of suspend and
+resume and prevent null pointer dereferences of the uninitialized ring
+0 buffer while thawing.
+
+The impact is an indefinitely hanging kernel. You can't switch
+consoles after this and the only possible user interaction is SysRq.
+
+BUG: kernel NULL pointer dereference
+RIP: 0010:aq_ring_rx_fill+0xcf/0x210 [atlantic]
+aq_vec_init+0x85/0xe0 [atlantic]
+aq_nic_init+0xf7/0x1d0 [atlantic]
+atl_resume_common+0x4f/0x100 [atlantic]
+pci_pm_thaw+0x42/0xa0
+
+resolves in aq_ring.o to
+
+```
+0000000000000ae0 <aq_ring_rx_fill>:
+{
+/* ... */
+ baf:  48 8b 43 08             mov    0x8(%rbx),%rax
+               buff->flags = 0U; /* buff is NULL */
+```
+
+The bug has been present since the introduction of the new pm code in
+8aaa112a57c1 ("net: atlantic: refactoring pm logic") and was hidden
+until 8ce84271697a ("net: atlantic: changes for multi-TC support"),
+which refactored the aq_vec_{free,alloc} functions into
+aq_vec_{,ring}_{free,alloc}, but is technically not wrong. The
+original functions just always reinitialized the buffers on S3/S4. If
+the interface is down before freezing, the bug does not occur. It does
+not matter, whether the initrd contains and loads the module before
+thawing.
+
+So the fix is to invert the boolean parameter deep in all pm function
+calls, which was clearly intended to be set like that.
+
+First report was on Github [1], which you have to guess from the
+resume logs in the posted dmesg snippet. Recently I posted one on
+Bugzilla [2], since I did not have an AQC device so far.
+
+#regzbot introduced: 8ce84271697a
+#regzbot from: koo5 <kolman.jindrich@gmail.com>
+#regzbot monitor: https://github.com/Aquantia/AQtion/issues/32
+
+Fixes: 8aaa112a57c1 ("net: atlantic: refactoring pm logic")
+Link: https://github.com/Aquantia/AQtion/issues/32 [1]
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=215798 [2]
+Cc: stable@vger.kernel.org
+Reported-by: koo5 <kolman.jindrich@gmail.com>
+Signed-off-by: Manuel Ullmann <labre@posteo.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+@@ -450,22 +450,22 @@ err_exit:
+ static int aq_pm_freeze(struct device *dev)
+ {
+-      return aq_suspend_common(dev, false);
++      return aq_suspend_common(dev, true);
+ }
+ static int aq_pm_suspend_poweroff(struct device *dev)
+ {
+-      return aq_suspend_common(dev, true);
++      return aq_suspend_common(dev, false);
+ }
+ static int aq_pm_thaw(struct device *dev)
+ {
+-      return atl_resume_common(dev, false);
++      return atl_resume_common(dev, true);
+ }
+ static int aq_pm_resume_restore(struct device *dev)
+ {
+-      return atl_resume_common(dev, true);
++      return atl_resume_common(dev, false);
+ }
+ static const struct dev_pm_ops aq_pm_ops = {
diff --git a/queue-5.10/oom_kill.c-futex-delay-the-oom-reaper-to-allow-time-for-proper-futex-cleanup.patch b/queue-5.10/oom_kill.c-futex-delay-the-oom-reaper-to-allow-time-for-proper-futex-cleanup.patch
new file mode 100644 (file)
index 0000000..58f1d2b
--- /dev/null
@@ -0,0 +1,206 @@
+From e4a38402c36e42df28eb1a5394be87e6571fb48a Mon Sep 17 00:00:00 2001
+From: Nico Pache <npache@redhat.com>
+Date: Thu, 21 Apr 2022 16:36:01 -0700
+Subject: oom_kill.c: futex: delay the OOM reaper to allow time for proper futex cleanup
+
+From: Nico Pache <npache@redhat.com>
+
+commit e4a38402c36e42df28eb1a5394be87e6571fb48a upstream.
+
+The pthread struct is allocated on PRIVATE|ANONYMOUS memory [1] which
+can be targeted by the oom reaper.  This mapping is used to store the
+futex robust list head; the kernel does not keep a copy of the robust
+list and instead references a userspace address to maintain the
+robustness during a process death.
+
+A race can occur between exit_mm and the oom reaper that allows the oom
+reaper to free the memory of the futex robust list before the exit path
+has handled the futex death:
+
+    CPU1                               CPU2
+    --------------------------------------------------------------------
+    page_fault
+    do_exit "signal"
+    wake_oom_reaper
+                                        oom_reaper
+                                        oom_reap_task_mm (invalidates mm)
+    exit_mm
+    exit_mm_release
+    futex_exit_release
+    futex_cleanup
+    exit_robust_list
+    get_user (EFAULT- can't access memory)
+
+If the get_user EFAULT's, the kernel will be unable to recover the
+waiters on the robust_list, leaving userspace mutexes hung indefinitely.
+
+Delay the OOM reaper, allowing more time for the exit path to perform
+the futex cleanup.
+
+Reproducer: https://gitlab.com/jsavitz/oom_futex_reproducer
+
+Based on a patch by Michal Hocko.
+
+Link: https://elixir.bootlin.com/glibc/glibc-2.35/source/nptl/allocatestack.c#L370 [1]
+Link: https://lkml.kernel.org/r/20220414144042.677008-1-npache@redhat.com
+Fixes: 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently")
+Signed-off-by: Joel Savitz <jsavitz@redhat.com>
+Signed-off-by: Nico Pache <npache@redhat.com>
+Co-developed-by: Joel Savitz <jsavitz@redhat.com>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Rafael Aquini <aquini@redhat.com>
+Cc: Waiman Long <longman@redhat.com>
+Cc: Herton R. Krzesinski <herton@redhat.com>
+Cc: Juri Lelli <juri.lelli@redhat.com>
+Cc: Vincent Guittot <vincent.guittot@linaro.org>
+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Ben Segall <bsegall@google.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Joel Savitz <jsavitz@redhat.com>
+Cc: Darren Hart <dvhart@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/sched.h |    1 
+ mm/oom_kill.c         |   54 +++++++++++++++++++++++++++++++++++++-------------
+ 2 files changed, 41 insertions(+), 14 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1325,6 +1325,7 @@ struct task_struct {
+       int                             pagefault_disabled;
+ #ifdef CONFIG_MMU
+       struct task_struct              *oom_reaper_list;
++      struct timer_list               oom_reaper_timer;
+ #endif
+ #ifdef CONFIG_VMAP_STACK
+       struct vm_struct                *stack_vm_area;
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -633,7 +633,7 @@ done:
+        */
+       set_bit(MMF_OOM_SKIP, &mm->flags);
+-      /* Drop a reference taken by wake_oom_reaper */
++      /* Drop a reference taken by queue_oom_reaper */
+       put_task_struct(tsk);
+ }
+@@ -643,12 +643,12 @@ static int oom_reaper(void *unused)
+               struct task_struct *tsk = NULL;
+               wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
+-              spin_lock(&oom_reaper_lock);
++              spin_lock_irq(&oom_reaper_lock);
+               if (oom_reaper_list != NULL) {
+                       tsk = oom_reaper_list;
+                       oom_reaper_list = tsk->oom_reaper_list;
+               }
+-              spin_unlock(&oom_reaper_lock);
++              spin_unlock_irq(&oom_reaper_lock);
+               if (tsk)
+                       oom_reap_task(tsk);
+@@ -657,22 +657,48 @@ static int oom_reaper(void *unused)
+       return 0;
+ }
+-static void wake_oom_reaper(struct task_struct *tsk)
++static void wake_oom_reaper(struct timer_list *timer)
+ {
+-      /* mm is already queued? */
+-      if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
++      struct task_struct *tsk = container_of(timer, struct task_struct,
++                      oom_reaper_timer);
++      struct mm_struct *mm = tsk->signal->oom_mm;
++      unsigned long flags;
++
++      /* The victim managed to terminate on its own - see exit_mmap */
++      if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
++              put_task_struct(tsk);
+               return;
++      }
+-      get_task_struct(tsk);
+-
+-      spin_lock(&oom_reaper_lock);
++      spin_lock_irqsave(&oom_reaper_lock, flags);
+       tsk->oom_reaper_list = oom_reaper_list;
+       oom_reaper_list = tsk;
+-      spin_unlock(&oom_reaper_lock);
++      spin_unlock_irqrestore(&oom_reaper_lock, flags);
+       trace_wake_reaper(tsk->pid);
+       wake_up(&oom_reaper_wait);
+ }
++/*
++ * Give the OOM victim time to exit naturally before invoking the oom_reaping.
++ * The timers timeout is arbitrary... the longer it is, the longer the worst
++ * case scenario for the OOM can take. If it is too small, the oom_reaper can
++ * get in the way and release resources needed by the process exit path.
++ * e.g. The futex robust list can sit in Anon|Private memory that gets reaped
++ * before the exit path is able to wake the futex waiters.
++ */
++#define OOM_REAPER_DELAY (2*HZ)
++static void queue_oom_reaper(struct task_struct *tsk)
++{
++      /* mm is already queued? */
++      if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
++              return;
++
++      get_task_struct(tsk);
++      timer_setup(&tsk->oom_reaper_timer, wake_oom_reaper, 0);
++      tsk->oom_reaper_timer.expires = jiffies + OOM_REAPER_DELAY;
++      add_timer(&tsk->oom_reaper_timer);
++}
++
+ static int __init oom_init(void)
+ {
+       oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
+@@ -680,7 +706,7 @@ static int __init oom_init(void)
+ }
+ subsys_initcall(oom_init)
+ #else
+-static inline void wake_oom_reaper(struct task_struct *tsk)
++static inline void queue_oom_reaper(struct task_struct *tsk)
+ {
+ }
+ #endif /* CONFIG_MMU */
+@@ -931,7 +957,7 @@ static void __oom_kill_process(struct ta
+       rcu_read_unlock();
+       if (can_oom_reap)
+-              wake_oom_reaper(victim);
++              queue_oom_reaper(victim);
+       mmdrop(mm);
+       put_task_struct(victim);
+@@ -967,7 +993,7 @@ static void oom_kill_process(struct oom_
+       task_lock(victim);
+       if (task_will_free_mem(victim)) {
+               mark_oom_victim(victim);
+-              wake_oom_reaper(victim);
++              queue_oom_reaper(victim);
+               task_unlock(victim);
+               put_task_struct(victim);
+               return;
+@@ -1065,7 +1091,7 @@ bool out_of_memory(struct oom_control *o
+        */
+       if (task_will_free_mem(current)) {
+               mark_oom_victim(current);
+-              wake_oom_reaper(current);
++              queue_oom_reaper(current);
+               return true;
+       }
index 02531252248d164bbf4c54364a1641a0b9bc2350..f9ac328ab3527477dbc96dd07624696e7b347479 100644 (file)
@@ -48,3 +48,10 @@ scsi-qedi-fix-failed-disconnect-handling.patch
 stat-fix-inconsistency-between-struct-stat-and-struc.patch
 nvme-add-a-quirk-to-disable-namespace-identifiers.patch
 nvme-pci-disable-namespace-identifiers-for-qemu-cont.patch
+edac-synopsys-read-the-error-count-from-the-correct-register.patch
+mm-hugetlb-allow-for-high-userspace-addresses.patch
+oom_kill.c-futex-delay-the-oom-reaper-to-allow-time-for-proper-futex-cleanup.patch
+mm-mmu_notifier.c-fix-race-in-mmu_interval_notifier_remove.patch
+ata-pata_marvell-check-the-bmdma_addr-beforing-reading.patch
+dma-at_xdmac-fix-a-missing-check-on-list-iterator.patch
+net-atlantic-invert-deep-par-in-pm-functions-preventing-null-derefs.patch