]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 21 Jun 2021 13:09:22 +0000 (15:09 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 21 Jun 2021 13:09:22 +0000 (15:09 +0200)
added patches:
cfg80211-avoid-double-free-of-pmsr-request.patch
cfg80211-fix-phy80211-symlink-creation.patch
cfg80211-make-certificate-generation-more-robust.patch
cfg80211-shut-down-interfaces-on-failed-resume.patch
crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch
dmaengine-pl330-fix-wrong-usage-of-spinlock-flags-in-dma_cyclc.patch
drm-amdgpu-gfx10-enlarge-cp_mec_doorbell_range_upper-to-cover-full-doorbell.patch
drm-amdgpu-gfx9-fix-the-doorbell-missing-when-in-cgpg-issue.patch
mac80211-fix-deadlock-in-ap-vlan-handling.patch
mac80211-fix-null-ptr-deref-for-injected-rate-info.patch
mac80211-fix-reset-debugfs-locking.patch
mac80211-minstrel_ht-fix-sample-time-check.patch
mac80211-move-interface-shutdown-out-of-wiphy-lock.patch
makefile-lto-pass-warn-stack-size-only-on-lld-13.0.0.patch
mm-hugetlb-expand-restore_reserve_on_error-functionality.patch
mm-hwpoison-fix-race-with-hugetlb-page-allocation.patch
mm-slub-actually-fix-freelist-pointer-vs-redzoning.patch
mm-slub-clarify-verification-reporting.patch
mm-slub-fix-redzoning-for-small-allocations.patch
mm-slub.c-include-swab.h.patch
mm-swap-fix-pte_same_as_swp-not-removing-uffd-wp-bit-when-compare.patch
net-bridge-fix-vlan-tunnel-dst-null-pointer-dereference.patch
net-bridge-fix-vlan-tunnel-dst-refcnt-when-egressing.patch
net-ll_temac-fix-tx-bd-buffer-overwrite.patch
net-ll_temac-make-sure-to-free-skb-when-it-is-completely-used.patch
powerpc-perf-fix-crash-in-perf_instruction_pointer-when-ppmu-is-not-set.patch
x86-fpu-invalidate-fpu-state-after-a-failed-xrstor-from-a-user-buffer.patch
x86-fpu-prevent-state-corruption-in-__fpu__restore_sig.patch
x86-fpu-reset-state-for-all-signal-restore-failures.patch
x86-ioremap-map-efi-reserved-memory-as-encrypted-for-sev.patch
x86-mm-avoid-truncating-memblocks-for-sgx-memory.patch
x86-pkru-write-hardware-init-value-to-pkru-when-xstate-is-init.patch
x86-process-check-pf_kthread-and-not-current-mm-for-kernel-threads.patch

34 files changed:
queue-5.12/cfg80211-avoid-double-free-of-pmsr-request.patch [new file with mode: 0644]
queue-5.12/cfg80211-fix-phy80211-symlink-creation.patch [new file with mode: 0644]
queue-5.12/cfg80211-make-certificate-generation-more-robust.patch [new file with mode: 0644]
queue-5.12/cfg80211-shut-down-interfaces-on-failed-resume.patch [new file with mode: 0644]
queue-5.12/crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch [new file with mode: 0644]
queue-5.12/dmaengine-pl330-fix-wrong-usage-of-spinlock-flags-in-dma_cyclc.patch [new file with mode: 0644]
queue-5.12/drm-amdgpu-gfx10-enlarge-cp_mec_doorbell_range_upper-to-cover-full-doorbell.patch [new file with mode: 0644]
queue-5.12/drm-amdgpu-gfx9-fix-the-doorbell-missing-when-in-cgpg-issue.patch [new file with mode: 0644]
queue-5.12/mac80211-fix-deadlock-in-ap-vlan-handling.patch [new file with mode: 0644]
queue-5.12/mac80211-fix-null-ptr-deref-for-injected-rate-info.patch [new file with mode: 0644]
queue-5.12/mac80211-fix-reset-debugfs-locking.patch [new file with mode: 0644]
queue-5.12/mac80211-minstrel_ht-fix-sample-time-check.patch [new file with mode: 0644]
queue-5.12/mac80211-move-interface-shutdown-out-of-wiphy-lock.patch [new file with mode: 0644]
queue-5.12/makefile-lto-pass-warn-stack-size-only-on-lld-13.0.0.patch [new file with mode: 0644]
queue-5.12/mm-hugetlb-expand-restore_reserve_on_error-functionality.patch [new file with mode: 0644]
queue-5.12/mm-hwpoison-fix-race-with-hugetlb-page-allocation.patch [new file with mode: 0644]
queue-5.12/mm-slub-actually-fix-freelist-pointer-vs-redzoning.patch [new file with mode: 0644]
queue-5.12/mm-slub-clarify-verification-reporting.patch [new file with mode: 0644]
queue-5.12/mm-slub-fix-redzoning-for-small-allocations.patch [new file with mode: 0644]
queue-5.12/mm-slub.c-include-swab.h.patch [new file with mode: 0644]
queue-5.12/mm-swap-fix-pte_same_as_swp-not-removing-uffd-wp-bit-when-compare.patch [new file with mode: 0644]
queue-5.12/net-bridge-fix-vlan-tunnel-dst-null-pointer-dereference.patch [new file with mode: 0644]
queue-5.12/net-bridge-fix-vlan-tunnel-dst-refcnt-when-egressing.patch [new file with mode: 0644]
queue-5.12/net-ll_temac-fix-tx-bd-buffer-overwrite.patch [new file with mode: 0644]
queue-5.12/net-ll_temac-make-sure-to-free-skb-when-it-is-completely-used.patch [new file with mode: 0644]
queue-5.12/powerpc-perf-fix-crash-in-perf_instruction_pointer-when-ppmu-is-not-set.patch [new file with mode: 0644]
queue-5.12/series
queue-5.12/x86-fpu-invalidate-fpu-state-after-a-failed-xrstor-from-a-user-buffer.patch [new file with mode: 0644]
queue-5.12/x86-fpu-prevent-state-corruption-in-__fpu__restore_sig.patch [new file with mode: 0644]
queue-5.12/x86-fpu-reset-state-for-all-signal-restore-failures.patch [new file with mode: 0644]
queue-5.12/x86-ioremap-map-efi-reserved-memory-as-encrypted-for-sev.patch [new file with mode: 0644]
queue-5.12/x86-mm-avoid-truncating-memblocks-for-sgx-memory.patch [new file with mode: 0644]
queue-5.12/x86-pkru-write-hardware-init-value-to-pkru-when-xstate-is-init.patch [new file with mode: 0644]
queue-5.12/x86-process-check-pf_kthread-and-not-current-mm-for-kernel-threads.patch [new file with mode: 0644]

diff --git a/queue-5.12/cfg80211-avoid-double-free-of-pmsr-request.patch b/queue-5.12/cfg80211-avoid-double-free-of-pmsr-request.patch
new file mode 100644 (file)
index 0000000..24e09cf
--- /dev/null
@@ -0,0 +1,61 @@
+From 0288e5e16a2e18f0b7e61a2b70d9037fc6e4abeb Mon Sep 17 00:00:00 2001
+From: Avraham Stern <avraham.stern@intel.com>
+Date: Fri, 18 Jun 2021 13:41:31 +0300
+Subject: cfg80211: avoid double free of PMSR request
+
+From: Avraham Stern <avraham.stern@intel.com>
+
+commit 0288e5e16a2e18f0b7e61a2b70d9037fc6e4abeb upstream.
+
+If cfg80211_pmsr_process_abort() moves all the PMSR requests that
+need to be freed into a local list before aborting and freeing them.
+As a result, it is possible that cfg80211_pmsr_complete() will run in
+parallel and free the same PMSR request.
+
+Fix it by freeing the request in cfg80211_pmsr_complete() only if it
+is still in the original pmsr list.
+
+Cc: stable@vger.kernel.org
+Fixes: 9bb7e0f24e7e ("cfg80211: add peer measurement with FTM initiator API")
+Signed-off-by: Avraham Stern <avraham.stern@intel.com>
+Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
+Link: https://lore.kernel.org/r/iwlwifi.20210618133832.1fbef57e269a.I00294bebdb0680b892f8d1d5c871fd9dbe785a5e@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/wireless/pmsr.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/net/wireless/pmsr.c
++++ b/net/wireless/pmsr.c
+@@ -324,6 +324,7 @@ void cfg80211_pmsr_complete(struct wirel
+                           gfp_t gfp)
+ {
+       struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
++      struct cfg80211_pmsr_request *tmp, *prev, *to_free = NULL;
+       struct sk_buff *msg;
+       void *hdr;
+@@ -354,9 +355,20 @@ free_msg:
+       nlmsg_free(msg);
+ free_request:
+       spin_lock_bh(&wdev->pmsr_lock);
+-      list_del(&req->list);
++      /*
++       * cfg80211_pmsr_process_abort() may have already moved this request
++       * to the free list, and will free it later. In this case, don't free
++       * it here.
++       */
++      list_for_each_entry_safe(tmp, prev, &wdev->pmsr_list, list) {
++              if (tmp == req) {
++                      list_del(&req->list);
++                      to_free = req;
++                      break;
++              }
++      }
+       spin_unlock_bh(&wdev->pmsr_lock);
+-      kfree(req);
++      kfree(to_free);
+ }
+ EXPORT_SYMBOL_GPL(cfg80211_pmsr_complete);
diff --git a/queue-5.12/cfg80211-fix-phy80211-symlink-creation.patch b/queue-5.12/cfg80211-fix-phy80211-symlink-creation.patch
new file mode 100644 (file)
index 0000000..1adad4a
--- /dev/null
@@ -0,0 +1,55 @@
+From 43076c1e074359f11c85d7d1b85ede1bbb8ee6b9 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Tue, 8 Jun 2021 11:32:28 +0200
+Subject: cfg80211: fix phy80211 symlink creation
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 43076c1e074359f11c85d7d1b85ede1bbb8ee6b9 upstream.
+
+When I moved around the code here, I neglected that we could still
+call register_netdev() or similar without the wiphy mutex held,
+which then calls cfg80211_register_wdev() - that's also done from
+cfg80211_register_netdevice(), but the phy80211 symlink creation
+was only there. Now, the symlink isn't needed for a *pure* wdev,
+but a netdev not registered via cfg80211_register_wdev() should
+still have the symlink, so move the creation to the right place.
+
+Cc: stable@vger.kernel.org
+Fixes: 2fe8ef106238 ("cfg80211: change netdev registration/unregistration semantics")
+Link: https://lore.kernel.org/r/20210608113226.a5dc4c1e488c.Ia42fe663cefe47b0883af78c98f284c5555bbe5d@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/wireless/core.c |   13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+--- a/net/wireless/core.c
++++ b/net/wireless/core.c
+@@ -1339,6 +1339,11 @@ void cfg80211_register_wdev(struct cfg80
+       rdev->devlist_generation++;
+       wdev->registered = true;
++      if (wdev->netdev &&
++          sysfs_create_link(&wdev->netdev->dev.kobj, &rdev->wiphy.dev.kobj,
++                            "phy80211"))
++              pr_err("failed to add phy80211 symlink to netdev!\n");
++
+       nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
+ }
+@@ -1364,14 +1369,6 @@ int cfg80211_register_netdevice(struct n
+       if (ret)
+               goto out;
+-      if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
+-                            "phy80211")) {
+-              pr_err("failed to add phy80211 symlink to netdev!\n");
+-              unregister_netdevice(dev);
+-              ret = -EINVAL;
+-              goto out;
+-      }
+-
+       cfg80211_register_wdev(rdev, wdev);
+       ret = 0;
+ out:
diff --git a/queue-5.12/cfg80211-make-certificate-generation-more-robust.patch b/queue-5.12/cfg80211-make-certificate-generation-more-robust.patch
new file mode 100644 (file)
index 0000000..f2b0b74
--- /dev/null
@@ -0,0 +1,35 @@
+From b5642479b0f7168fe16d156913533fe65ab4f8d5 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Fri, 18 Jun 2021 13:41:29 +0300
+Subject: cfg80211: make certificate generation more robust
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit b5642479b0f7168fe16d156913533fe65ab4f8d5 upstream.
+
+If all net/wireless/certs/*.hex files are deleted, the build
+will hang at this point since the 'cat' command will have no
+arguments. Do "echo | cat - ..." so that even if the "..."
+part is empty, the whole thing won't hang.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
+Link: https://lore.kernel.org/r/iwlwifi.20210618133832.c989056c3664.Ic3b77531d00b30b26dcd69c64e55ae2f60c3f31e@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/wireless/Makefile |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/wireless/Makefile
++++ b/net/wireless/Makefile
+@@ -28,7 +28,7 @@ $(obj)/shipped-certs.c: $(wildcard $(src
+       @$(kecho) "  GEN     $@"
+       @(echo '#include "reg.h"'; \
+         echo 'const u8 shipped_regdb_certs[] = {'; \
+-        cat $^ ; \
++        echo | cat - $^ ; \
+         echo '};'; \
+         echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
+        ) > $@
diff --git a/queue-5.12/cfg80211-shut-down-interfaces-on-failed-resume.patch b/queue-5.12/cfg80211-shut-down-interfaces-on-failed-resume.patch
new file mode 100644 (file)
index 0000000..fc4d523
--- /dev/null
@@ -0,0 +1,36 @@
+From 65bec836da8394b1d56bdec2c478dcac21cf12a4 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Tue, 8 Jun 2021 11:32:29 +0200
+Subject: cfg80211: shut down interfaces on failed resume
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 65bec836da8394b1d56bdec2c478dcac21cf12a4 upstream.
+
+If resume fails, we should shut down all interfaces as the
+hardware is probably dead. This was/is already done now in
+mac80211, but we need to change that due to locking issues,
+so move it here and do it without the wiphy lock held.
+
+Cc: stable@vger.kernel.org
+Fixes: 2fe8ef106238 ("cfg80211: change netdev registration/unregistration semantics")
+Link: https://lore.kernel.org/r/20210608113226.d564ca69de7c.I2e3c3e5d410b72a4f63bade4fb075df041b3d92f@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/wireless/sysfs.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/wireless/sysfs.c
++++ b/net/wireless/sysfs.c
+@@ -133,6 +133,10 @@ static int wiphy_resume(struct device *d
+       if (rdev->wiphy.registered && rdev->ops->resume)
+               ret = rdev_resume(rdev);
+       wiphy_unlock(&rdev->wiphy);
++
++      if (ret)
++              cfg80211_shutdown_all_interfaces(&rdev->wiphy);
++
+       rtnl_unlock();
+       return ret;
diff --git a/queue-5.12/crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch b/queue-5.12/crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch
new file mode 100644 (file)
index 0000000..6af9117
--- /dev/null
@@ -0,0 +1,60 @@
+From 4f5aecdff25f59fb5ea456d5152a913906ecf287 Mon Sep 17 00:00:00 2001
+From: Pingfan Liu <kernelfans@gmail.com>
+Date: Tue, 15 Jun 2021 18:23:36 -0700
+Subject: crash_core, vmcoreinfo: append 'SECTION_SIZE_BITS' to vmcoreinfo
+
+From: Pingfan Liu <kernelfans@gmail.com>
+
+commit 4f5aecdff25f59fb5ea456d5152a913906ecf287 upstream.
+
+As mentioned in kernel commit 1d50e5d0c505 ("crash_core, vmcoreinfo:
+Append 'MAX_PHYSMEM_BITS' to vmcoreinfo"), SECTION_SIZE_BITS in the
+formula:
+
+    #define SECTIONS_SHIFT    (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
+
+Besides SECTIONS_SHIFT, SECTION_SIZE_BITS is also used to calculate
+PAGES_PER_SECTION in makedumpfile just like kernel.
+
+Unfortunately, this arch-dependent macro SECTION_SIZE_BITS changes, e.g.
+recently in kernel commit f0b13ee23241 ("arm64/sparsemem: reduce
+SECTION_SIZE_BITS").  But user space wants a stable interface to get
+this info.  Such info is impossible to be deduced from a crashdump
+vmcore.  Hence append SECTION_SIZE_BITS to vmcoreinfo.
+
+Link: https://lkml.kernel.org/r/20210608103359.84907-1-kernelfans@gmail.com
+Link: http://lists.infradead.org/pipermail/kexec/2021-June/022676.html
+Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
+Acked-by: Baoquan He <bhe@redhat.com>
+Cc: Bhupesh Sharma <bhupesh.sharma@linaro.org>
+Cc: Kazuhito Hagio <k-hagio@ab.jp.nec.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Boris Petkov <bp@alien8.de>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: James Morse <james.morse@arm.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Dave Anderson <anderson@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/crash_core.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/crash_core.c
++++ b/kernel/crash_core.c
+@@ -464,6 +464,7 @@ static int __init crash_save_vmcoreinfo_
+       VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
+       VMCOREINFO_STRUCT_SIZE(mem_section);
+       VMCOREINFO_OFFSET(mem_section, section_mem_map);
++      VMCOREINFO_NUMBER(SECTION_SIZE_BITS);
+       VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
+ #endif
+       VMCOREINFO_STRUCT_SIZE(page);
diff --git a/queue-5.12/dmaengine-pl330-fix-wrong-usage-of-spinlock-flags-in-dma_cyclc.patch b/queue-5.12/dmaengine-pl330-fix-wrong-usage-of-spinlock-flags-in-dma_cyclc.patch
new file mode 100644 (file)
index 0000000..426821e
--- /dev/null
@@ -0,0 +1,52 @@
+From 4ad5dd2d7876d79507a20f026507d1a93b8fff10 Mon Sep 17 00:00:00 2001
+From: Bumyong Lee <bumyong.lee@samsung.com>
+Date: Fri, 7 May 2021 15:36:47 +0900
+Subject: dmaengine: pl330: fix wrong usage of spinlock flags in dma_cyclc
+
+From: Bumyong Lee <bumyong.lee@samsung.com>
+
+commit 4ad5dd2d7876d79507a20f026507d1a93b8fff10 upstream.
+
+flags varible which is the input parameter of pl330_prep_dma_cyclic()
+should not be used by spinlock_irq[save/restore] function.
+
+Signed-off-by: Jongho Park <jongho7.park@samsung.com>
+Signed-off-by: Bumyong Lee <bumyong.lee@samsung.com>
+Signed-off-by: Chanho Park <chanho61.park@samsung.com>
+Link: https://lore.kernel.org/r/20210507063647.111209-1-chanho61.park@samsung.com
+Fixes: f6f2421c0a1c ("dmaengine: pl330: Merge dma_pl330_dmac and pl330_dmac structs")
+Cc: stable@vger.kernel.org
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma/pl330.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/dma/pl330.c
++++ b/drivers/dma/pl330.c
+@@ -2694,13 +2694,15 @@ static struct dma_async_tx_descriptor *p
+       for (i = 0; i < len / period_len; i++) {
+               desc = pl330_get_desc(pch);
+               if (!desc) {
++                      unsigned long iflags;
++
+                       dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
+                               __func__, __LINE__);
+                       if (!first)
+                               return NULL;
+-                      spin_lock_irqsave(&pl330->pool_lock, flags);
++                      spin_lock_irqsave(&pl330->pool_lock, iflags);
+                       while (!list_empty(&first->node)) {
+                               desc = list_entry(first->node.next,
+@@ -2710,7 +2712,7 @@ static struct dma_async_tx_descriptor *p
+                       list_move_tail(&first->node, &pl330->desc_pool);
+-                      spin_unlock_irqrestore(&pl330->pool_lock, flags);
++                      spin_unlock_irqrestore(&pl330->pool_lock, iflags);
+                       return NULL;
+               }
diff --git a/queue-5.12/drm-amdgpu-gfx10-enlarge-cp_mec_doorbell_range_upper-to-cover-full-doorbell.patch b/queue-5.12/drm-amdgpu-gfx10-enlarge-cp_mec_doorbell_range_upper-to-cover-full-doorbell.patch
new file mode 100644 (file)
index 0000000..6776e18
--- /dev/null
@@ -0,0 +1,38 @@
+From 1c0b0efd148d5b24c4932ddb3fa03c8edd6097b3 Mon Sep 17 00:00:00 2001
+From: Yifan Zhang <yifan1.zhang@amd.com>
+Date: Thu, 10 Jun 2021 10:10:07 +0800
+Subject: drm/amdgpu/gfx10: enlarge CP_MEC_DOORBELL_RANGE_UPPER to cover full doorbell.
+
+From: Yifan Zhang <yifan1.zhang@amd.com>
+
+commit 1c0b0efd148d5b24c4932ddb3fa03c8edd6097b3 upstream.
+
+If GC has entered CGPG, ringing doorbell > first page doesn't wakeup GC.
+Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround this issue.
+
+Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+@@ -6769,8 +6769,12 @@ static int gfx_v10_0_kiq_init_register(s
+       if (ring->use_doorbell) {
+               WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
+                       (adev->doorbell_index.kiq * 2) << 2);
++              /* If GC has entered CGPG, ringing doorbell > first page doesn't
++               * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
++               * this issue.
++               */
+               WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+-                      (adev->doorbell_index.userqueue_end * 2) << 2);
++                      (adev->doorbell.size - 4));
+       }
+       WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
diff --git a/queue-5.12/drm-amdgpu-gfx9-fix-the-doorbell-missing-when-in-cgpg-issue.patch b/queue-5.12/drm-amdgpu-gfx9-fix-the-doorbell-missing-when-in-cgpg-issue.patch
new file mode 100644 (file)
index 0000000..f95bf7b
--- /dev/null
@@ -0,0 +1,38 @@
+From 4cbbe34807938e6e494e535a68d5ff64edac3f20 Mon Sep 17 00:00:00 2001
+From: Yifan Zhang <yifan1.zhang@amd.com>
+Date: Thu, 10 Jun 2021 09:55:01 +0800
+Subject: drm/amdgpu/gfx9: fix the doorbell missing when in CGPG issue.
+
+From: Yifan Zhang <yifan1.zhang@amd.com>
+
+commit 4cbbe34807938e6e494e535a68d5ff64edac3f20 upstream.
+
+If GC has entered CGPG, ringing doorbell > first page doesn't wakeup GC.
+Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround this issue.
+
+Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+@@ -3623,8 +3623,12 @@ static int gfx_v9_0_kiq_init_register(st
+       if (ring->use_doorbell) {
+               WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
+                                       (adev->doorbell_index.kiq * 2) << 2);
++              /* If GC has entered CGPG, ringing doorbell > first page doesn't
++               * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
++               * this issue.
++               */
+               WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+-                                      (adev->doorbell_index.userqueue_end * 2) << 2);
++                                      (adev->doorbell.size - 4));
+       }
+       WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
diff --git a/queue-5.12/mac80211-fix-deadlock-in-ap-vlan-handling.patch b/queue-5.12/mac80211-fix-deadlock-in-ap-vlan-handling.patch
new file mode 100644 (file)
index 0000000..85031fb
--- /dev/null
@@ -0,0 +1,77 @@
+From d5befb224edbe53056c2c18999d630dafb4a08b9 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Mon, 17 May 2021 16:03:23 +0200
+Subject: mac80211: fix deadlock in AP/VLAN handling
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit d5befb224edbe53056c2c18999d630dafb4a08b9 upstream.
+
+Syzbot reports that when you have AP_VLAN interfaces that are up
+and close the AP interface they belong to, we get a deadlock. No
+surprise - since we dev_close() them with the wiphy mutex held,
+which goes back into the netdev notifier in cfg80211 and tries to
+acquire the wiphy mutex there.
+
+To fix this, we need to do two things:
+ 1) prevent changing iftype while AP_VLANs are up, we can't
+    easily fix this case since cfg80211 already calls us with
+    the wiphy mutex held, but change_interface() is relatively
+    rare in drivers anyway, so changing iftype isn't used much
+    (and userspace has to fall back to down/change/up anyway)
+ 2) pull the dev_close() loop over VLANs out of the wiphy mutex
+    section in the normal stop case
+
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+452ea4fbbef700ff0a56@syzkaller.appspotmail.com
+Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
+Link: https://lore.kernel.org/r/20210517160322.9b8f356c0222.I392cb0e2fa5a1a94cf2e637555d702c7e512c1ff@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mac80211/iface.c |   19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+--- a/net/mac80211/iface.c
++++ b/net/mac80211/iface.c
+@@ -475,14 +475,7 @@ static void ieee80211_do_stop(struct iee
+                                  GFP_KERNEL);
+       }
+-      /* APs need special treatment */
+       if (sdata->vif.type == NL80211_IFTYPE_AP) {
+-              struct ieee80211_sub_if_data *vlan, *tmpsdata;
+-
+-              /* down all dependent devices, that is VLANs */
+-              list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
+-                                       u.vlan.list)
+-                      dev_close(vlan->dev);
+               WARN_ON(!list_empty(&sdata->u.ap.vlans));
+       } else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+               /* remove all packets in parent bc_buf pointing to this dev */
+@@ -640,6 +633,15 @@ static int ieee80211_stop(struct net_dev
+ {
+       struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
++      /* close all dependent VLAN interfaces before locking wiphy */
++      if (sdata->vif.type == NL80211_IFTYPE_AP) {
++              struct ieee80211_sub_if_data *vlan, *tmpsdata;
++
++              list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
++                                       u.vlan.list)
++                      dev_close(vlan->dev);
++      }
++
+       wiphy_lock(sdata->local->hw.wiphy);
+       ieee80211_do_stop(sdata, true);
+       wiphy_unlock(sdata->local->hw.wiphy);
+@@ -1589,6 +1591,9 @@ static int ieee80211_runtime_change_ifty
+       switch (sdata->vif.type) {
+       case NL80211_IFTYPE_AP:
++              if (!list_empty(&sdata->u.ap.vlans))
++                      return -EBUSY;
++              break;
+       case NL80211_IFTYPE_STATION:
+       case NL80211_IFTYPE_ADHOC:
+       case NL80211_IFTYPE_OCB:
diff --git a/queue-5.12/mac80211-fix-null-ptr-deref-for-injected-rate-info.patch b/queue-5.12/mac80211-fix-null-ptr-deref-for-injected-rate-info.patch
new file mode 100644 (file)
index 0000000..6f575cf
--- /dev/null
@@ -0,0 +1,164 @@
+From bddc0c411a45d3718ac535a070f349be8eca8d48 Mon Sep 17 00:00:00 2001
+From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
+Date: Sun, 30 May 2021 15:32:26 +0200
+Subject: mac80211: Fix NULL ptr deref for injected rate info
+
+From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
+
+commit bddc0c411a45d3718ac535a070f349be8eca8d48 upstream.
+
+The commit cb17ed29a7a5 ("mac80211: parse radiotap header when selecting Tx
+queue") moved the code to validate the radiotap header from
+ieee80211_monitor_start_xmit to ieee80211_parse_tx_radiotap. This made is
+possible to share more code with the new Tx queue selection code for
+injected frames. But at the same time, it now required the call of
+ieee80211_parse_tx_radiotap at the beginning of functions which wanted to
+handle the radiotap header. And this broke the rate parser for radiotap
+header parser.
+
+The radiotap parser for rates is operating most of the time only on the
+data in the actual radiotap header. But for the 802.11a/b/g rates, it must
+also know the selected band from the chandef information. But this
+information is only written to the ieee80211_tx_info at the end of the
+ieee80211_monitor_start_xmit - long after ieee80211_parse_tx_radiotap was
+already called. The info->band information was therefore always 0
+(NL80211_BAND_2GHZ) when the parser code tried to access it.
+
+For a 5GHz only device, injecting a frame with 802.11a rates would cause a
+NULL pointer dereference because local->hw.wiphy->bands[NL80211_BAND_2GHZ]
+would most likely have been NULL when the radiotap parser searched for the
+correct rate index of the driver.
+
+Cc: stable@vger.kernel.org
+Reported-by: Ben Greear <greearb@candelatech.com>
+Fixes: cb17ed29a7a5 ("mac80211: parse radiotap header when selecting Tx queue")
+Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
+[sven@narfation.org: added commit message]
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Link: https://lore.kernel.org/r/20210530133226.40587-1-sven@narfation.org
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/mac80211.h |    7 +++++-
+ net/mac80211/tx.c      |   52 +++++++++++++++++++++++++++++++++----------------
+ 2 files changed, 42 insertions(+), 17 deletions(-)
+
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -6388,7 +6388,12 @@ bool ieee80211_tx_prepare_skb(struct iee
+ /**
+  * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header
+- *                             of injected frames
++ *                             of injected frames.
++ *
++ * To accurately parse and take into account rate and retransmission fields,
++ * you must initialize the chandef field in the ieee80211_tx_info structure
++ * of the skb before calling this function.
++ *
+  * @skb: packet injected by userspace
+  * @dev: the &struct device of this 802.11 device
+  */
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -2002,6 +2002,26 @@ void ieee80211_xmit(struct ieee80211_sub
+       ieee80211_tx(sdata, sta, skb, false);
+ }
++static bool ieee80211_validate_radiotap_len(struct sk_buff *skb)
++{
++      struct ieee80211_radiotap_header *rthdr =
++              (struct ieee80211_radiotap_header *)skb->data;
++
++      /* check for not even having the fixed radiotap header part */
++      if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
++              return false; /* too short to be possibly valid */
++
++      /* is it a header version we can trust to find length from? */
++      if (unlikely(rthdr->it_version))
++              return false; /* only version 0 is supported */
++
++      /* does the skb contain enough to deliver on the alleged length? */
++      if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
++              return false; /* skb too short for claimed rt header extent */
++
++      return true;
++}
++
+ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
+                                struct net_device *dev)
+ {
+@@ -2010,8 +2030,6 @@ bool ieee80211_parse_tx_radiotap(struct
+       struct ieee80211_radiotap_header *rthdr =
+               (struct ieee80211_radiotap_header *) skb->data;
+       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+-      struct ieee80211_supported_band *sband =
+-              local->hw.wiphy->bands[info->band];
+       int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
+                                                  NULL);
+       u16 txflags;
+@@ -2024,17 +2042,8 @@ bool ieee80211_parse_tx_radiotap(struct
+       u8 vht_mcs = 0, vht_nss = 0;
+       int i;
+-      /* check for not even having the fixed radiotap header part */
+-      if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
+-              return false; /* too short to be possibly valid */
+-
+-      /* is it a header version we can trust to find length from? */
+-      if (unlikely(rthdr->it_version))
+-              return false; /* only version 0 is supported */
+-
+-      /* does the skb contain enough to deliver on the alleged length? */
+-      if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
+-              return false; /* skb too short for claimed rt header extent */
++      if (!ieee80211_validate_radiotap_len(skb))
++              return false;
+       info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
+                      IEEE80211_TX_CTL_DONTFRAG;
+@@ -2174,6 +2183,9 @@ bool ieee80211_parse_tx_radiotap(struct
+               return false;
+       if (rate_found) {
++              struct ieee80211_supported_band *sband =
++                      local->hw.wiphy->bands[info->band];
++
+               info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT;
+               for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+@@ -2187,7 +2199,7 @@ bool ieee80211_parse_tx_radiotap(struct
+               } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) {
+                       ieee80211_rate_set_vht(info->control.rates, vht_mcs,
+                                              vht_nss);
+-              } else {
++              } else if (sband) {
+                       for (i = 0; i < sband->n_bitrates; i++) {
+                               if (rate * 5 != sband->bitrates[i].bitrate)
+                                       continue;
+@@ -2224,8 +2236,8 @@ netdev_tx_t ieee80211_monitor_start_xmit
+       info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
+                     IEEE80211_TX_CTL_INJECTED;
+-      /* Sanity-check and process the injection radiotap header */
+-      if (!ieee80211_parse_tx_radiotap(skb, dev))
++      /* Sanity-check the length of the radiotap header */
++      if (!ieee80211_validate_radiotap_len(skb))
+               goto fail;
+       /* we now know there is a radiotap header with a length we can use */
+@@ -2339,6 +2351,14 @@ netdev_tx_t ieee80211_monitor_start_xmit
+       ieee80211_select_queue_80211(sdata, skb, hdr);
+       skb_set_queue_mapping(skb, ieee80211_ac_from_tid(skb->priority));
++      /*
++       * Process the radiotap header. This will now take into account the
++       * selected chandef above to accurately set injection rates and
++       * retransmissions.
++       */
++      if (!ieee80211_parse_tx_radiotap(skb, dev))
++              goto fail_rcu;
++
+       /* remove the injection radiotap header */
+       skb_pull(skb, len_rthdr);
diff --git a/queue-5.12/mac80211-fix-reset-debugfs-locking.patch b/queue-5.12/mac80211-fix-reset-debugfs-locking.patch
new file mode 100644 (file)
index 0000000..e9fd3b8
--- /dev/null
@@ -0,0 +1,44 @@
+From adaed1b9daf5a045be71e923e04b5069d2bee664 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Tue, 8 Jun 2021 11:32:27 +0200
+Subject: mac80211: fix 'reset' debugfs locking
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit adaed1b9daf5a045be71e923e04b5069d2bee664 upstream.
+
+cfg80211 now calls suspend/resume with the wiphy lock
+held, and while there's a problem with that needing
+to be fixed, we should do the same in debugfs.
+
+Cc: stable@vger.kernel.org
+Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
+Link: https://lore.kernel.org/r/20210608113226.14020430e449.I78e19db0a55a8295a376e15ac4cf77dbb4c6fb51@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mac80211/debugfs.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/mac80211/debugfs.c
++++ b/net/mac80211/debugfs.c
+@@ -4,7 +4,7 @@
+  *
+  * Copyright 2007     Johannes Berg <johannes@sipsolutions.net>
+  * Copyright 2013-2014  Intel Mobile Communications GmbH
+- * Copyright (C) 2018 - 2019 Intel Corporation
++ * Copyright (C) 2018 - 2019, 2021 Intel Corporation
+  */
+ #include <linux/debugfs.h>
+@@ -389,8 +389,10 @@ static ssize_t reset_write(struct file *
+       struct ieee80211_local *local = file->private_data;
+       rtnl_lock();
++      wiphy_lock(local->hw.wiphy);
+       __ieee80211_suspend(&local->hw, NULL);
+       __ieee80211_resume(&local->hw);
++      wiphy_unlock(local->hw.wiphy);
+       rtnl_unlock();
+       return count;
diff --git a/queue-5.12/mac80211-minstrel_ht-fix-sample-time-check.patch b/queue-5.12/mac80211-minstrel_ht-fix-sample-time-check.patch
new file mode 100644 (file)
index 0000000..660d9fa
--- /dev/null
@@ -0,0 +1,34 @@
+From 1236af327af476731aa548dfcbbefb1a3ec6726a Mon Sep 17 00:00:00 2001
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 17 Jun 2021 12:38:54 +0200
+Subject: mac80211: minstrel_ht: fix sample time check
+
+From: Felix Fietkau <nbd@nbd.name>
+
+commit 1236af327af476731aa548dfcbbefb1a3ec6726a upstream.
+
+We need to skip sampling if the next sample time is after jiffies, not before.
+This patch fixes an issue where in some cases only very little sampling (or none
+at all) is performed, leading to really bad data rates
+
+Fixes: 80d55154b2f8 ("mac80211: minstrel_ht: significantly redesign the rate probing strategy")
+Cc: stable@vger.kernel.org
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Link: https://lore.kernel.org/r/20210617103854.61875-1-nbd@nbd.name
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mac80211/rc80211_minstrel_ht.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mac80211/rc80211_minstrel_ht.c
++++ b/net/mac80211/rc80211_minstrel_ht.c
+@@ -1516,7 +1516,7 @@ minstrel_ht_get_rate(void *priv, struct
+           (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
+               return;
+-      if (time_is_before_jiffies(mi->sample_time))
++      if (time_is_after_jiffies(mi->sample_time))
+               return;
+       mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL;
diff --git a/queue-5.12/mac80211-move-interface-shutdown-out-of-wiphy-lock.patch b/queue-5.12/mac80211-move-interface-shutdown-out-of-wiphy-lock.patch
new file mode 100644 (file)
index 0000000..6f5351b
--- /dev/null
@@ -0,0 +1,82 @@
+From f5baf287f5da5641099ad5c809b3b4ebfc08506d Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Tue, 8 Jun 2021 11:32:30 +0200
+Subject: mac80211: move interface shutdown out of wiphy lock
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit f5baf287f5da5641099ad5c809b3b4ebfc08506d upstream.
+
+When reconfiguration fails, we shut down everything, but we
+cannot call cfg80211_shutdown_all_interfaces() with the wiphy
+mutex held. Since cfg80211 now calls it on resume errors, we
+only need to do likewise for where we call reconfig (whether
+directly or indirectly), but not under the wiphy lock.
+
+Cc: stable@vger.kernel.org
+Fixes: 2fe8ef106238 ("cfg80211: change netdev registration/unregistration semantics")
+Link: https://lore.kernel.org/r/20210608113226.78233c80f548.Iecc104aceb89f0568f50e9670a9cb191a1c8887b@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mac80211/debugfs.c |    7 ++++++-
+ net/mac80211/main.c    |    7 ++++++-
+ net/mac80211/util.c    |    2 --
+ 3 files changed, 12 insertions(+), 4 deletions(-)
+
+--- a/net/mac80211/debugfs.c
++++ b/net/mac80211/debugfs.c
+@@ -387,12 +387,17 @@ static ssize_t reset_write(struct file *
+                          size_t count, loff_t *ppos)
+ {
+       struct ieee80211_local *local = file->private_data;
++      int ret;
+       rtnl_lock();
+       wiphy_lock(local->hw.wiphy);
+       __ieee80211_suspend(&local->hw, NULL);
+-      __ieee80211_resume(&local->hw);
++      ret = __ieee80211_resume(&local->hw);
+       wiphy_unlock(local->hw.wiphy);
++
++      if (ret)
++              cfg80211_shutdown_all_interfaces(local->hw.wiphy);
++
+       rtnl_unlock();
+       return count;
+--- a/net/mac80211/main.c
++++ b/net/mac80211/main.c
+@@ -252,6 +252,7 @@ static void ieee80211_restart_work(struc
+       struct ieee80211_local *local =
+               container_of(work, struct ieee80211_local, restart_work);
+       struct ieee80211_sub_if_data *sdata;
++      int ret;
+       /* wait for scan work complete */
+       flush_workqueue(local->workqueue);
+@@ -294,8 +295,12 @@ static void ieee80211_restart_work(struc
+       /* wait for all packet processing to be done */
+       synchronize_net();
+-      ieee80211_reconfig(local);
++      ret = ieee80211_reconfig(local);
+       wiphy_unlock(local->hw.wiphy);
++
++      if (ret)
++              cfg80211_shutdown_all_interfaces(local->hw.wiphy);
++
+       rtnl_unlock();
+ }
+--- a/net/mac80211/util.c
++++ b/net/mac80211/util.c
+@@ -2186,8 +2186,6 @@ static void ieee80211_handle_reconfig_fa
+       list_for_each_entry(ctx, &local->chanctx_list, list)
+               ctx->driver_present = false;
+       mutex_unlock(&local->chanctx_mtx);
+-
+-      cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+ }
+ static void ieee80211_assign_chanctx(struct ieee80211_local *local,
diff --git a/queue-5.12/makefile-lto-pass-warn-stack-size-only-on-lld-13.0.0.patch b/queue-5.12/makefile-lto-pass-warn-stack-size-only-on-lld-13.0.0.patch
new file mode 100644 (file)
index 0000000..8ca4f85
--- /dev/null
@@ -0,0 +1,54 @@
+From 0236526d76b87c1dc2cbe3eb31ae29be5b0ca151 Mon Sep 17 00:00:00 2001
+From: Tor Vic <torvic9@mailbox.org>
+Date: Sun, 13 Jun 2021 13:07:49 +0000
+Subject: Makefile: lto: Pass -warn-stack-size only on LLD < 13.0.0
+
+From: Tor Vic <torvic9@mailbox.org>
+
+commit 0236526d76b87c1dc2cbe3eb31ae29be5b0ca151 upstream.
+
+Since LLVM commit fc018eb, the '-warn-stack-size' flag has been dropped
+[1], leading to the following error message when building with Clang-13
+and LLD-13:
+
+    ld.lld: error: -plugin-opt=-: ld.lld: Unknown command line argument
+    '-warn-stack-size=2048'.  Try: 'ld.lld --help'
+    ld.lld: Did you mean '--asan-stack=2048'?
+
+In the same way as with commit 2398ce80152a ("x86, lto: Pass
+-stack-alignment only on LLD < 13.0.0") , make '-warn-stack-size'
+conditional on LLD < 13.0.0.
+
+[1] https://reviews.llvm.org/D103928
+
+Fixes: 24845dcb170e ("Makefile: LTO: have linker check -Wframe-larger-than")
+Cc: stable@vger.kernel.org
+Link: https://github.com/ClangBuiltLinux/linux/issues/1377
+Signed-off-by: Tor Vic <torvic9@mailbox.org>
+Reviewed-by: Nathan Chancellor <nathan@kernel.org>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/7631bab7-a8ab-f884-ab54-f4198976125c@mailbox.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Makefile |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -913,11 +913,14 @@ CC_FLAGS_LTO     += -fvisibility=hidden
+ # Limit inlining across translation units to reduce binary size
+ KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
+-# Check for frame size exceeding threshold during prolog/epilog insertion.
++# Check for frame size exceeding threshold during prolog/epilog insertion
++# when using lld < 13.0.0.
+ ifneq ($(CONFIG_FRAME_WARN),0)
++ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
+ KBUILD_LDFLAGS        += -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN)
+ endif
+ endif
++endif
+ ifdef CONFIG_LTO
+ KBUILD_CFLAGS += -fno-lto $(CC_FLAGS_LTO)
diff --git a/queue-5.12/mm-hugetlb-expand-restore_reserve_on_error-functionality.patch b/queue-5.12/mm-hugetlb-expand-restore_reserve_on_error-functionality.patch
new file mode 100644 (file)
index 0000000..95170a1
--- /dev/null
@@ -0,0 +1,286 @@
+From 846be08578edb81f02bc8534577e6c367ef34f41 Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Tue, 15 Jun 2021 18:23:29 -0700
+Subject: mm/hugetlb: expand restore_reserve_on_error functionality
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit 846be08578edb81f02bc8534577e6c367ef34f41 upstream.
+
+The routine restore_reserve_on_error is called to restore reservation
+information when an error occurs after page allocation.  The routine
+alloc_huge_page modifies the mapping reserve map and potentially the
+reserve count during allocation.  If code calling alloc_huge_page
+encounters an error after allocation and needs to free the page, the
+reservation information needs to be adjusted.
+
+Currently, restore_reserve_on_error only takes action on pages for which
+the reserve count was adjusted(HPageRestoreReserve flag).  There is
+nothing wrong with these adjustments.  However, alloc_huge_page ALWAYS
+modifies the reserve map during allocation even if the reserve count is
+not adjusted.  This can cause issues as observed during development of
+this patch [1].
+
+One specific series of operations causing an issue is:
+
+ - Create a shared hugetlb mapping
+   Reservations for all pages created by default
+
+ - Fault in a page in the mapping
+   Reservation exists so reservation count is decremented
+
+ - Punch a hole in the file/mapping at index previously faulted
+   Reservation and any associated pages will be removed
+
+ - Allocate a page to fill the hole
+   No reservation entry, so reserve count unmodified
+   Reservation entry added to map by alloc_huge_page
+
+ - Error after allocation and before instantiating the page
+   Reservation entry remains in map
+
+ - Allocate a page to fill the hole
+   Reservation entry exists, so decrement reservation count
+
+This will cause a reservation count underflow as the reservation count
+was decremented twice for the same index.
+
+A user would observe a very large number for HugePages_Rsvd in
+/proc/meminfo.  This would also likely cause subsequent allocations of
+hugetlb pages to fail as it would 'appear' that all pages are reserved.
+
+This sequence of operations is unlikely to happen, however they were
+easily reproduced and observed using hacked up code as described in [1].
+
+Address the issue by having the routine restore_reserve_on_error take
+action on pages where HPageRestoreReserve is not set.  In this case, we
+need to remove any reserve map entry created by alloc_huge_page.  A new
+helper routine vma_del_reservation assists with this operation.
+
+There are three callers of alloc_huge_page which do not currently call
+restore_reserve_on error before freeing a page on error paths.  Add
+those missing calls.
+
+[1] https://lore.kernel.org/linux-mm/20210528005029.88088-1-almasrymina@google.com/
+
+Link: https://lkml.kernel.org/r/20210607204510.22617-1-mike.kravetz@oracle.com
+Fixes: 96b96a96ddee ("mm/hugetlb: fix huge page reservation leak in private mapping error paths"
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Mina Almasry <almasrymina@google.com>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c    |    1 
+ include/linux/hugetlb.h |    2 
+ mm/hugetlb.c            |  120 ++++++++++++++++++++++++++++++++++++++----------
+ 3 files changed, 100 insertions(+), 23 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -738,6 +738,7 @@ static long hugetlbfs_fallocate(struct f
+               __SetPageUptodate(page);
+               error = huge_add_to_page_cache(page, mapping, index);
+               if (unlikely(error)) {
++                      restore_reserve_on_error(h, &pseudo_vma, addr, page);
+                       put_page(page);
+                       mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+                       goto out;
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -597,6 +597,8 @@ struct page *alloc_huge_page_vma(struct
+                               unsigned long address);
+ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
+                       pgoff_t idx);
++void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
++                              unsigned long address, struct page *page);
+ /* arch callback */
+ int __init __alloc_bootmem_huge_page(struct hstate *h);
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -2127,12 +2127,18 @@ out:
+  * be restored when a newly allocated huge page must be freed.  It is
+  * to be called after calling vma_needs_reservation to determine if a
+  * reservation exists.
++ *
++ * vma_del_reservation is used in error paths where an entry in the reserve
++ * map was created during huge page allocation and must be removed.  It is to
++ * be called after calling vma_needs_reservation to determine if a reservation
++ * exists.
+  */
+ enum vma_resv_mode {
+       VMA_NEEDS_RESV,
+       VMA_COMMIT_RESV,
+       VMA_END_RESV,
+       VMA_ADD_RESV,
++      VMA_DEL_RESV,
+ };
+ static long __vma_reservation_common(struct hstate *h,
+                               struct vm_area_struct *vma, unsigned long addr,
+@@ -2176,11 +2182,21 @@ static long __vma_reservation_common(str
+                       ret = region_del(resv, idx, idx + 1);
+               }
+               break;
++      case VMA_DEL_RESV:
++              if (vma->vm_flags & VM_MAYSHARE) {
++                      region_abort(resv, idx, idx + 1, 1);
++                      ret = region_del(resv, idx, idx + 1);
++              } else {
++                      ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
++                      /* region_add calls of range 1 should never fail. */
++                      VM_BUG_ON(ret < 0);
++              }
++              break;
+       default:
+               BUG();
+       }
+-      if (vma->vm_flags & VM_MAYSHARE)
++      if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV)
+               return ret;
+       else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && ret >= 0) {
+               /*
+@@ -2229,25 +2245,39 @@ static long vma_add_reservation(struct h
+       return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
+ }
++static long vma_del_reservation(struct hstate *h,
++                      struct vm_area_struct *vma, unsigned long addr)
++{
++      return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV);
++}
++
+ /*
+- * This routine is called to restore a reservation on error paths.  In the
+- * specific error paths, a huge page was allocated (via alloc_huge_page)
+- * and is about to be freed.  If a reservation for the page existed,
+- * alloc_huge_page would have consumed the reservation and set
+- * HPageRestoreReserve in the newly allocated page.  When the page is freed
+- * via free_huge_page, the global reservation count will be incremented if
+- * HPageRestoreReserve is set.  However, free_huge_page can not adjust the
+- * reserve map.  Adjust the reserve map here to be consistent with global
+- * reserve count adjustments to be made by free_huge_page.
++ * This routine is called to restore reservation information on error paths.
++ * It should ONLY be called for pages allocated via alloc_huge_page(), and
++ * the hugetlb mutex should remain held when calling this routine.
++ *
++ * It handles two specific cases:
++ * 1) A reservation was in place and the page consumed the reservation.
++ *    HPageRestoreReserve is set in the page.
++ * 2) No reservation was in place for the page, so HPageRestoreReserve is
++ *    not set.  However, alloc_huge_page always updates the reserve map.
++ *
++ * In case 1, free_huge_page later in the error path will increment the
++ * global reserve count.  But, free_huge_page does not have enough context
++ * to adjust the reservation map.  This case deals primarily with private
++ * mappings.  Adjust the reserve map here to be consistent with global
++ * reserve count adjustments to be made by free_huge_page.  Make sure the
++ * reserve map indicates there is a reservation present.
++ *
++ * In case 2, simply undo reserve map modifications done by alloc_huge_page.
+  */
+-static void restore_reserve_on_error(struct hstate *h,
+-                      struct vm_area_struct *vma, unsigned long address,
+-                      struct page *page)
++void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
++                      unsigned long address, struct page *page)
+ {
+-      if (unlikely(HPageRestoreReserve(page))) {
+-              long rc = vma_needs_reservation(h, vma, address);
++      long rc = vma_needs_reservation(h, vma, address);
+-              if (unlikely(rc < 0)) {
++      if (HPageRestoreReserve(page)) {
++              if (unlikely(rc < 0))
+                       /*
+                        * Rare out of memory condition in reserve map
+                        * manipulation.  Clear HPageRestoreReserve so that
+@@ -2260,16 +2290,57 @@ static void restore_reserve_on_error(str
+                        * accounting of reserve counts.
+                        */
+                       ClearHPageRestoreReserve(page);
+-              } else if (rc) {
+-                      rc = vma_add_reservation(h, vma, address);
+-                      if (unlikely(rc < 0))
++              else if (rc)
++                      (void)vma_add_reservation(h, vma, address);
++              else
++                      vma_end_reservation(h, vma, address);
++      } else {
++              if (!rc) {
++                      /*
++                       * This indicates there is an entry in the reserve map
++                       * added by alloc_huge_page.  We know it was added
++                       * before the alloc_huge_page call, otherwise
++                       * HPageRestoreReserve would be set on the page.
++                       * Remove the entry so that a subsequent allocation
++                       * does not consume a reservation.
++                       */
++                      rc = vma_del_reservation(h, vma, address);
++                      if (rc < 0)
+                               /*
+-                               * See above comment about rare out of
+-                               * memory condition.
++                               * VERY rare out of memory condition.  Since
++                               * we can not delete the entry, set
++                               * HPageRestoreReserve so that the reserve
++                               * count will be incremented when the page
++                               * is freed.  This reserve will be consumed
++                               * on a subsequent allocation.
+                                */
+-                              ClearHPageRestoreReserve(page);
++                              SetHPageRestoreReserve(page);
++              } else if (rc < 0) {
++                      /*
++                       * Rare out of memory condition from
++                       * vma_needs_reservation call.  Memory allocation is
++                       * only attempted if a new entry is needed.  Therefore,
++                       * this implies there is not an entry in the
++                       * reserve map.
++                       *
++                       * For shared mappings, no entry in the map indicates
++                       * no reservation.  We are done.
++                       */
++                      if (!(vma->vm_flags & VM_MAYSHARE))
++                              /*
++                               * For private mappings, no entry indicates
++                               * a reservation is present.  Since we can
++                               * not add an entry, set SetHPageRestoreReserve
++                               * on the page so reserve count will be
++                               * incremented when freed.  This reserve will
++                               * be consumed on a subsequent allocation.
++                               */
++                              SetHPageRestoreReserve(page);
+               } else
+-                      vma_end_reservation(h, vma, address);
++                      /*
++                       * No reservation present, do nothing
++                       */
++                       vma_end_reservation(h, vma, address);
+       }
+ }
+@@ -3886,6 +3957,8 @@ again:
+                               spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+                               entry = huge_ptep_get(src_pte);
+                               if (!pte_same(src_pte_old, entry)) {
++                                      restore_reserve_on_error(h, vma, addr,
++                                                              new);
+                                       put_page(new);
+                                       /* dst_entry won't change as in child */
+                                       goto again;
+@@ -4820,6 +4893,7 @@ out_release_unlock:
+       if (vm_shared)
+               unlock_page(page);
+ out_release_nounlock:
++      restore_reserve_on_error(h, dst_vma, dst_addr, page);
+       put_page(page);
+       goto out;
+ }
diff --git a/queue-5.12/mm-hwpoison-fix-race-with-hugetlb-page-allocation.patch b/queue-5.12/mm-hwpoison-fix-race-with-hugetlb-page-allocation.patch
new file mode 100644 (file)
index 0000000..9b2be81
--- /dev/null
@@ -0,0 +1,155 @@
+From 25182f05ffed0b45602438693e4eed5d7f3ebadd Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Date: Tue, 15 Jun 2021 18:23:13 -0700
+Subject: mm,hwpoison: fix race with hugetlb page allocation
+
+From: Naoya Horiguchi <naoya.horiguchi@nec.com>
+
+commit 25182f05ffed0b45602438693e4eed5d7f3ebadd upstream.
+
+When hugetlb page fault (under overcommitting situation) and
+memory_failure() race, VM_BUG_ON_PAGE() is triggered by the following
+race:
+
+    CPU0:                           CPU1:
+
+                                    gather_surplus_pages()
+                                      page = alloc_surplus_huge_page()
+    memory_failure_hugetlb()
+      get_hwpoison_page(page)
+        __get_hwpoison_page(page)
+          get_page_unless_zero(page)
+                                      zero = put_page_testzero(page)
+                                      VM_BUG_ON_PAGE(!zero, page)
+                                      enqueue_huge_page(h, page)
+      put_page(page)
+
+__get_hwpoison_page() only checks the page refcount before taking an
+additional one for memory error handling, which is not enough because
+there's a time window where compound pages have non-zero refcount during
+hugetlb page initialization.
+
+So make __get_hwpoison_page() check page status a bit more for hugetlb
+pages with get_hwpoison_huge_page().  Checking hugetlb-specific flags
+under hugetlb_lock makes sure that the hugetlb page is not transitive.
+It's notable that another new function, HWPoisonHandlable(), is helpful
+to prevent a race against other transitive page states (like a generic
+compound page just before PageHuge becomes true).
+
+Link: https://lkml.kernel.org/r/20210603233632.2964832-2-nao.horiguchi@gmail.com
+Fixes: ead07f6a867b ("mm/memory-failure: introduce get_hwpoison_page() for consistent refcount handling")
+Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Reported-by: Muchun Song <songmuchun@bytedance.com>
+Acked-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: <stable@vger.kernel.org>   [5.12+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hugetlb.h |    6 ++++++
+ mm/hugetlb.c            |   15 +++++++++++++++
+ mm/memory-failure.c     |   29 +++++++++++++++++++++++++++--
+ 3 files changed, 48 insertions(+), 2 deletions(-)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -145,6 +145,7 @@ bool hugetlb_reserve_pages(struct inode
+ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+                                               long freed);
+ bool isolate_huge_page(struct page *page, struct list_head *list);
++int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
+ void putback_active_hugepage(struct page *page);
+ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
+ void free_huge_page(struct page *page);
+@@ -330,6 +331,11 @@ static inline bool isolate_huge_page(str
+       return false;
+ }
++static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
++{
++      return 0;
++}
++
+ static inline void putback_active_hugepage(struct page *page)
+ {
+ }
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5664,6 +5664,21 @@ unlock:
+       return ret;
+ }
++int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
++{
++      int ret = 0;
++
++      *hugetlb = false;
++      spin_lock_irq(&hugetlb_lock);
++      if (PageHeadHuge(page)) {
++              *hugetlb = true;
++              if (HPageFreed(page) || HPageMigratable(page))
++                      ret = get_page_unless_zero(page);
++      }
++      spin_unlock_irq(&hugetlb_lock);
++      return ret;
++}
++
+ void putback_active_hugepage(struct page *page)
+ {
+       spin_lock(&hugetlb_lock);
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -949,6 +949,17 @@ static int page_action(struct page_state
+       return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
+ }
++/*
++ * Return true if a page type of a given page is supported by hwpoison
++ * mechanism (while handling could fail), otherwise false.  This function
++ * does not return true for hugetlb or device memory pages, so it's assumed
++ * to be called only in the context where we never have such pages.
++ */
++static inline bool HWPoisonHandlable(struct page *page)
++{
++      return PageLRU(page) || __PageMovable(page);
++}
++
+ /**
+  * __get_hwpoison_page() - Get refcount for memory error handling:
+  * @page:     raw error page (hit by memory error)
+@@ -959,8 +970,22 @@ static int page_action(struct page_state
+ static int __get_hwpoison_page(struct page *page)
+ {
+       struct page *head = compound_head(page);
++      int ret = 0;
++      bool hugetlb = false;
++
++      ret = get_hwpoison_huge_page(head, &hugetlb);
++      if (hugetlb)
++              return ret;
++
++      /*
++       * This check prevents from calling get_hwpoison_unless_zero()
++       * for any unsupported type of page in order to reduce the risk of
++       * unexpected races caused by taking a page refcount.
++       */
++      if (!HWPoisonHandlable(head))
++              return 0;
+-      if (!PageHuge(head) && PageTransHuge(head)) {
++      if (PageTransHuge(head)) {
+               /*
+                * Non anonymous thp exists only in allocation/free time. We
+                * can't handle such a case correctly, so let's give it up.
+@@ -1017,7 +1042,7 @@ try_again:
+                       ret = -EIO;
+               }
+       } else {
+-              if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
++              if (PageHuge(p) || HWPoisonHandlable(p)) {
+                       ret = 1;
+               } else {
+                       /*
diff --git a/queue-5.12/mm-slub-actually-fix-freelist-pointer-vs-redzoning.patch b/queue-5.12/mm-slub-actually-fix-freelist-pointer-vs-redzoning.patch
new file mode 100644 (file)
index 0000000..ec7cf5c
--- /dev/null
@@ -0,0 +1,104 @@
+From e41a49fadbc80b60b48d3c095d9e2ee7ef7c9a8e Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Tue, 15 Jun 2021 18:23:26 -0700
+Subject: mm/slub: actually fix freelist pointer vs redzoning
+
+From: Kees Cook <keescook@chromium.org>
+
+commit e41a49fadbc80b60b48d3c095d9e2ee7ef7c9a8e upstream.
+
+It turns out that SLUB redzoning ("slub_debug=Z") checks from
+s->object_size rather than from s->inuse (which is normally bumped to
+make room for the freelist pointer), so a cache created with an object
+size less than 24 would have the freelist pointer written beyond
+s->object_size, causing the redzone to be corrupted by the freelist
+pointer.  This was very visible with "slub_debug=ZF":
+
+  BUG test (Tainted: G    B            ): Right Redzone overwritten
+  -----------------------------------------------------------------------------
+
+  INFO: 0xffff957ead1c05de-0xffff957ead1c05df @offset=1502. First byte 0x1a instead of 0xbb
+  INFO: Slab 0xffffef3950b47000 objects=170 used=170 fp=0x0000000000000000 flags=0x8000000000000200
+  INFO: Object 0xffff957ead1c05d8 @offset=1496 fp=0xffff957ead1c0620
+
+  Redzone  (____ptrval____): bb bb bb bb bb bb bb bb               ........
+  Object   (____ptrval____): 00 00 00 00 00 f6 f4 a5               ........
+  Redzone  (____ptrval____): 40 1d e8 1a aa                        @....
+  Padding  (____ptrval____): 00 00 00 00 00 00 00 00               ........
+
+Adjust the offset to stay within s->object_size.
+
+(Note that no caches of in this size range are known to exist in the
+kernel currently.)
+
+Link: https://lkml.kernel.org/r/20210608183955.280836-4-keescook@chromium.org
+Link: https://lore.kernel.org/linux-mm/20200807160627.GA1420741@elver.google.com/
+Link: https://lore.kernel.org/lkml/0f7dd7b2-7496-5e2d-9488-2ec9f8e90441@suse.cz/Fixes: 89b83f282d8b (slub: avoid redzone when choosing freepointer location)
+Link: https://lore.kernel.org/lkml/CANpmjNOwZ5VpKQn+SYWovTkFB4VsT-RPwyENBmaK0dLcpqStkA@mail.gmail.com
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Reported-by: Marco Elver <elver@google.com>
+Reported-by: "Lin, Zhenpeng" <zplin@psu.edu>
+Tested-by: Marco Elver <elver@google.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slub.c |   14 +++-----------
+ 1 file changed, 3 insertions(+), 11 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -3687,7 +3687,6 @@ static int calculate_sizes(struct kmem_c
+ {
+       slab_flags_t flags = s->flags;
+       unsigned int size = s->object_size;
+-      unsigned int freepointer_area;
+       unsigned int order;
+       /*
+@@ -3696,13 +3695,6 @@ static int calculate_sizes(struct kmem_c
+        * the possible location of the free pointer.
+        */
+       size = ALIGN(size, sizeof(void *));
+-      /*
+-       * This is the area of the object where a freepointer can be
+-       * safely written. If redzoning adds more to the inuse size, we
+-       * can't use that portion for writing the freepointer, so
+-       * s->offset must be limited within this for the general case.
+-       */
+-      freepointer_area = size;
+ #ifdef CONFIG_SLUB_DEBUG
+       /*
+@@ -3728,7 +3720,7 @@ static int calculate_sizes(struct kmem_c
+       /*
+        * With that we have determined the number of bytes in actual use
+-       * by the object. This is the potential offset to the free pointer.
++       * by the object and redzoning.
+        */
+       s->inuse = size;
+@@ -3751,13 +3743,13 @@ static int calculate_sizes(struct kmem_c
+                */
+               s->offset = size;
+               size += sizeof(void *);
+-      } else if (freepointer_area > sizeof(void *)) {
++      } else {
+               /*
+                * Store freelist pointer near middle of object to keep
+                * it away from the edges of the object to avoid small
+                * sized over/underflows from neighboring allocations.
+                */
+-              s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
++              s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
+       }
+ #ifdef CONFIG_SLUB_DEBUG
diff --git a/queue-5.12/mm-slub-clarify-verification-reporting.patch b/queue-5.12/mm-slub-clarify-verification-reporting.patch
new file mode 100644 (file)
index 0000000..61ccb16
--- /dev/null
@@ -0,0 +1,147 @@
+From 8669dbab2ae56085c128894b181c2aa50f97e368 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Tue, 15 Jun 2021 18:23:19 -0700
+Subject: mm/slub: clarify verification reporting
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 8669dbab2ae56085c128894b181c2aa50f97e368 upstream.
+
+Patch series "Actually fix freelist pointer vs redzoning", v4.
+
+This fixes redzoning vs the freelist pointer (both for middle-position
+and very small caches).  Both are "theoretical" fixes, in that I see no
+evidence of such small-sized caches actually be used in the kernel, but
+that's no reason to let the bugs continue to exist, especially since
+people doing local development keep tripping over it.  :)
+
+This patch (of 3):
+
+Instead of repeating "Redzone" and "Poison", clarify which sides of
+those zones got tripped.  Additionally fix column alignment in the
+trailer.
+
+Before:
+
+  BUG test (Tainted: G    B            ): Redzone overwritten
+  ...
+  Redzone (____ptrval____): bb bb bb bb bb bb bb bb      ........
+  Object (____ptrval____): f6 f4 a5 40 1d e8            ...@..
+  Redzone (____ptrval____): 1a aa                        ..
+  Padding (____ptrval____): 00 00 00 00 00 00 00 00      ........
+
+After:
+
+  BUG test (Tainted: G    B            ): Right Redzone overwritten
+  ...
+  Redzone  (____ptrval____): bb bb bb bb bb bb bb bb      ........
+  Object   (____ptrval____): f6 f4 a5 40 1d e8            ...@..
+  Redzone  (____ptrval____): 1a aa                        ..
+  Padding  (____ptrval____): 00 00 00 00 00 00 00 00      ........
+
+The earlier commits that slowly resulted in the "Before" reporting were:
+
+  d86bd1bece6f ("mm/slub: support left redzone")
+  ffc79d288000 ("slub: use print_hex_dump")
+  2492268472e7 ("SLUB: change error reporting format to follow lockdep loosely")
+
+Link: https://lkml.kernel.org/r/20210608183955.280836-1-keescook@chromium.org
+Link: https://lkml.kernel.org/r/20210608183955.280836-2-keescook@chromium.org
+Link: https://lore.kernel.org/lkml/cfdb11d7-fb8e-e578-c939-f7f5fb69a6bd@suse.cz/
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Marco Elver <elver@google.com>
+Cc: "Lin, Zhenpeng" <zplin@psu.edu>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/vm/slub.rst |   10 +++++-----
+ mm/slub.c                 |   14 +++++++-------
+ 2 files changed, 12 insertions(+), 12 deletions(-)
+
+--- a/Documentation/vm/slub.rst
++++ b/Documentation/vm/slub.rst
+@@ -181,7 +181,7 @@ SLUB Debug output
+ Here is a sample of slub debug output::
+  ====================================================================
+- BUG kmalloc-8: Redzone overwritten
++ BUG kmalloc-8: Right Redzone overwritten
+  --------------------------------------------------------------------
+  INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
+@@ -189,10 +189,10 @@ Here is a sample of slub debug output::
+  INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
+  INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
+- Bytes b4 0xc90f6d10:  00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
+-   Object 0xc90f6d20:  31 30 31 39 2e 30 30 35                         1019.005
+-  Redzone 0xc90f6d28:  00 cc cc cc                                     .
+-  Padding 0xc90f6d50:  5a 5a 5a 5a 5a 5a 5a 5a                         ZZZZZZZZ
++ Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
++ Object   (0xc90f6d20): 31 30 31 39 2e 30 30 35                         1019.005
++ Redzone  (0xc90f6d28): 00 cc cc cc                                     .
++ Padding  (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a                         ZZZZZZZZ
+    [<c010523d>] dump_trace+0x63/0x1eb
+    [<c01053df>] show_trace_log_lvl+0x1a/0x2f
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -710,15 +710,15 @@ static void print_trailer(struct kmem_ca
+              p, p - addr, get_freepointer(s, p));
+       if (s->flags & SLAB_RED_ZONE)
+-              print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
++              print_section(KERN_ERR, "Redzone  ", p - s->red_left_pad,
+                             s->red_left_pad);
+       else if (p > addr + 16)
+               print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
+-      print_section(KERN_ERR, "Object ", p,
++      print_section(KERN_ERR,         "Object   ", p,
+                     min_t(unsigned int, s->object_size, PAGE_SIZE));
+       if (s->flags & SLAB_RED_ZONE)
+-              print_section(KERN_ERR, "Redzone ", p + s->object_size,
++              print_section(KERN_ERR, "Redzone  ", p + s->object_size,
+                       s->inuse - s->object_size);
+       off = get_info_end(s);
+@@ -730,7 +730,7 @@ static void print_trailer(struct kmem_ca
+       if (off != size_from_object(s))
+               /* Beginning of the filler is the free pointer */
+-              print_section(KERN_ERR, "Padding ", p + off,
++              print_section(KERN_ERR, "Padding  ", p + off,
+                             size_from_object(s) - off);
+       dump_stack();
+@@ -907,11 +907,11 @@ static int check_object(struct kmem_cach
+       u8 *endobject = object + s->object_size;
+       if (s->flags & SLAB_RED_ZONE) {
+-              if (!check_bytes_and_report(s, page, object, "Redzone",
++              if (!check_bytes_and_report(s, page, object, "Left Redzone",
+                       object - s->red_left_pad, val, s->red_left_pad))
+                       return 0;
+-              if (!check_bytes_and_report(s, page, object, "Redzone",
++              if (!check_bytes_and_report(s, page, object, "Right Redzone",
+                       endobject, val, s->inuse - s->object_size))
+                       return 0;
+       } else {
+@@ -926,7 +926,7 @@ static int check_object(struct kmem_cach
+               if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
+                       (!check_bytes_and_report(s, page, p, "Poison", p,
+                                       POISON_FREE, s->object_size - 1) ||
+-                       !check_bytes_and_report(s, page, p, "Poison",
++                       !check_bytes_and_report(s, page, p, "End Poison",
+                               p + s->object_size - 1, POISON_END, 1)))
+                       return 0;
+               /*
diff --git a/queue-5.12/mm-slub-fix-redzoning-for-small-allocations.patch b/queue-5.12/mm-slub-fix-redzoning-for-small-allocations.patch
new file mode 100644 (file)
index 0000000..8f36a11
--- /dev/null
@@ -0,0 +1,92 @@
+From 74c1d3e081533825f2611e46edea1fcdc0701985 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Tue, 15 Jun 2021 18:23:22 -0700
+Subject: mm/slub: fix redzoning for small allocations
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 74c1d3e081533825f2611e46edea1fcdc0701985 upstream.
+
+The redzone area for SLUB exists between s->object_size and s->inuse
+(which is at least the word-aligned object_size).  If a cache were
+created with an object_size smaller than sizeof(void *), the in-object
+stored freelist pointer would overwrite the redzone (e.g.  with boot
+param "slub_debug=ZF"):
+
+  BUG test (Tainted: G    B            ): Right Redzone overwritten
+  -----------------------------------------------------------------------------
+
+  INFO: 0xffff957ead1c05de-0xffff957ead1c05df @offset=1502. First byte 0x1a instead of 0xbb
+  INFO: Slab 0xffffef3950b47000 objects=170 used=170 fp=0x0000000000000000 flags=0x8000000000000200
+  INFO: Object 0xffff957ead1c05d8 @offset=1496 fp=0xffff957ead1c0620
+
+  Redzone  (____ptrval____): bb bb bb bb bb bb bb bb    ........
+  Object   (____ptrval____): f6 f4 a5 40 1d e8          ...@..
+  Redzone  (____ptrval____): 1a aa                      ..
+  Padding  (____ptrval____): 00 00 00 00 00 00 00 00    ........
+
+Store the freelist pointer out of line when object_size is smaller than
+sizeof(void *) and redzoning is enabled.
+
+Additionally remove the "smaller than sizeof(void *)" check under
+CONFIG_DEBUG_VM in kmem_cache_sanity_check() as it is now redundant:
+SLAB and SLOB both handle small sizes.
+
+(Note that no caches within this size range are known to exist in the
+kernel currently.)
+
+Link: https://lkml.kernel.org/r/20210608183955.280836-3-keescook@chromium.org
+Fixes: 81819f0fc828 ("SLUB core")
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: "Lin, Zhenpeng" <zplin@psu.edu>
+Cc: Marco Elver <elver@google.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slab_common.c |    3 +--
+ mm/slub.c        |    8 +++++---
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -89,8 +89,7 @@ EXPORT_SYMBOL(kmem_cache_size);
+ #ifdef CONFIG_DEBUG_VM
+ static int kmem_cache_sanity_check(const char *name, unsigned int size)
+ {
+-      if (!name || in_interrupt() || size < sizeof(void *) ||
+-              size > KMALLOC_MAX_SIZE) {
++      if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) {
+               pr_err("kmem_cache_create(%s) integrity check failed\n", name);
+               return -EINVAL;
+       }
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -3732,15 +3732,17 @@ static int calculate_sizes(struct kmem_c
+        */
+       s->inuse = size;
+-      if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
+-              s->ctor)) {
++      if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
++          ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
++          s->ctor) {
+               /*
+                * Relocate free pointer after the object if it is not
+                * permitted to overwrite the first word of the object on
+                * kmem_cache_free.
+                *
+                * This is the case if we do RCU, have a constructor or
+-               * destructor or are poisoning the objects.
++               * destructor, are poisoning the objects, or are
++               * redzoning an object smaller than sizeof(void *).
+                *
+                * The assumption that s->offset >= s->inuse means free
+                * pointer is outside of the object is used in the
diff --git a/queue-5.12/mm-slub.c-include-swab.h.patch b/queue-5.12/mm-slub.c-include-swab.h.patch
new file mode 100644 (file)
index 0000000..b74f615
--- /dev/null
@@ -0,0 +1,35 @@
+From 1b3865d016815cbd69a1879ca1c8a8901fda1072 Mon Sep 17 00:00:00 2001
+From: Andrew Morton <akpm@linux-foundation.org>
+Date: Tue, 15 Jun 2021 18:23:39 -0700
+Subject: mm/slub.c: include swab.h
+
+From: Andrew Morton <akpm@linux-foundation.org>
+
+commit 1b3865d016815cbd69a1879ca1c8a8901fda1072 upstream.
+
+Fixes build with CONFIG_SLAB_FREELIST_HARDENED=y.
+
+Hopefully.  But it's the right thing to do anwyay.
+
+Fixes: 1ad53d9fa3f61 ("slub: improve bit diffusion for freelist ptr obfuscation")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=213417
+Reported-by: <vannguye@cisco.com>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slub.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -15,6 +15,7 @@
+ #include <linux/module.h>
+ #include <linux/bit_spinlock.h>
+ #include <linux/interrupt.h>
++#include <linux/swab.h>
+ #include <linux/bitops.h>
+ #include <linux/slab.h>
+ #include "slab.h"
diff --git a/queue-5.12/mm-swap-fix-pte_same_as_swp-not-removing-uffd-wp-bit-when-compare.patch b/queue-5.12/mm-swap-fix-pte_same_as_swp-not-removing-uffd-wp-bit-when-compare.patch
new file mode 100644 (file)
index 0000000..02b7c50
--- /dev/null
@@ -0,0 +1,70 @@
+From 099dd6878b9b12d6bbfa6bf29ce0c8ddd38f6901 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Tue, 15 Jun 2021 18:23:16 -0700
+Subject: mm/swap: fix pte_same_as_swp() not removing uffd-wp bit when compare
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 099dd6878b9b12d6bbfa6bf29ce0c8ddd38f6901 upstream.
+
+I found it by pure code review, that pte_same_as_swp() of unuse_vma()
+didn't take uffd-wp bit into account when comparing ptes.
+pte_same_as_swp() returning false negative could cause failure to
+swapoff swap ptes that was wr-protected by userfaultfd.
+
+Link: https://lkml.kernel.org/r/20210603180546.9083-1-peterx@redhat.com
+Fixes: f45ec5ff16a7 ("userfaultfd: wp: support swap and page migration")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: <stable@vger.kernel.org>   [5.7+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/swapops.h |   15 +++++++++++----
+ mm/swapfile.c           |    2 +-
+ 2 files changed, 12 insertions(+), 5 deletions(-)
+
+--- a/include/linux/swapops.h
++++ b/include/linux/swapops.h
+@@ -23,6 +23,16 @@
+ #define SWP_TYPE_SHIFT        (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT)
+ #define SWP_OFFSET_MASK       ((1UL << SWP_TYPE_SHIFT) - 1)
++/* Clear all flags but only keep swp_entry_t related information */
++static inline pte_t pte_swp_clear_flags(pte_t pte)
++{
++      if (pte_swp_soft_dirty(pte))
++              pte = pte_swp_clear_soft_dirty(pte);
++      if (pte_swp_uffd_wp(pte))
++              pte = pte_swp_clear_uffd_wp(pte);
++      return pte;
++}
++
+ /*
+  * Store a type+offset into a swp_entry_t in an arch-independent format
+  */
+@@ -66,10 +76,7 @@ static inline swp_entry_t pte_to_swp_ent
+ {
+       swp_entry_t arch_entry;
+-      if (pte_swp_soft_dirty(pte))
+-              pte = pte_swp_clear_soft_dirty(pte);
+-      if (pte_swp_uffd_wp(pte))
+-              pte = pte_swp_clear_uffd_wp(pte);
++      pte = pte_swp_clear_flags(pte);
+       arch_entry = __pte_to_swp_entry(pte);
+       return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
+ }
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -1900,7 +1900,7 @@ unsigned int count_swap_pages(int type,
+ static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte)
+ {
+-      return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte);
++      return pte_same(pte_swp_clear_flags(pte), swp_pte);
+ }
+ /*
diff --git a/queue-5.12/net-bridge-fix-vlan-tunnel-dst-null-pointer-dereference.patch b/queue-5.12/net-bridge-fix-vlan-tunnel-dst-null-pointer-dereference.patch
new file mode 100644 (file)
index 0000000..d5916f1
--- /dev/null
@@ -0,0 +1,135 @@
+From 58e2071742e38f29f051b709a5cca014ba51166f Mon Sep 17 00:00:00 2001
+From: Nikolay Aleksandrov <nikolay@nvidia.com>
+Date: Thu, 10 Jun 2021 15:04:10 +0300
+Subject: net: bridge: fix vlan tunnel dst null pointer dereference
+
+From: Nikolay Aleksandrov <nikolay@nvidia.com>
+
+commit 58e2071742e38f29f051b709a5cca014ba51166f upstream.
+
+This patch fixes a tunnel_dst null pointer dereference due to lockless
+access in the tunnel egress path. When deleting a vlan tunnel the
+tunnel_dst pointer is set to NULL without waiting a grace period (i.e.
+while it's still usable) and packets egressing are dereferencing it
+without checking. Use READ/WRITE_ONCE to annotate the lockless use of
+tunnel_id, use RCU for accessing tunnel_dst and make sure it is read
+only once and checked in the egress path. The dst is already properly RCU
+protected so we don't need to do anything fancy than to make sure
+tunnel_id and tunnel_dst are read only once and checked in the egress path.
+
+Cc: stable@vger.kernel.org
+Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths")
+Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_private.h     |    4 ++--
+ net/bridge/br_vlan_tunnel.c |   38 ++++++++++++++++++++++++--------------
+ 2 files changed, 26 insertions(+), 16 deletions(-)
+
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -90,8 +90,8 @@ struct bridge_mcast_stats {
+ #endif
+ struct br_tunnel_info {
+-      __be64                  tunnel_id;
+-      struct metadata_dst     *tunnel_dst;
++      __be64                          tunnel_id;
++      struct metadata_dst __rcu       *tunnel_dst;
+ };
+ /* private vlan flags */
+--- a/net/bridge/br_vlan_tunnel.c
++++ b/net/bridge/br_vlan_tunnel.c
+@@ -41,26 +41,33 @@ static struct net_bridge_vlan *br_vlan_t
+                                     br_vlan_tunnel_rht_params);
+ }
++static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan)
++{
++      struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst);
++
++      WRITE_ONCE(vlan->tinfo.tunnel_id, 0);
++      RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL);
++      dst_release(&tdst->dst);
++}
++
+ void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
+                         struct net_bridge_vlan *vlan)
+ {
+-      if (!vlan->tinfo.tunnel_dst)
++      if (!rcu_access_pointer(vlan->tinfo.tunnel_dst))
+               return;
+       rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode,
+                              br_vlan_tunnel_rht_params);
+-      vlan->tinfo.tunnel_id = 0;
+-      dst_release(&vlan->tinfo.tunnel_dst->dst);
+-      vlan->tinfo.tunnel_dst = NULL;
++      vlan_tunnel_info_release(vlan);
+ }
+ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
+                                 struct net_bridge_vlan *vlan, u32 tun_id)
+ {
+-      struct metadata_dst *metadata = NULL;
++      struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst);
+       __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
+       int err;
+-      if (vlan->tinfo.tunnel_dst)
++      if (metadata)
+               return -EEXIST;
+       metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
+@@ -69,8 +76,8 @@ static int __vlan_tunnel_info_add(struct
+               return -EINVAL;
+       metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE;
+-      vlan->tinfo.tunnel_dst = metadata;
+-      vlan->tinfo.tunnel_id = key;
++      rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata);
++      WRITE_ONCE(vlan->tinfo.tunnel_id, key);
+       err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode,
+                                           br_vlan_tunnel_rht_params);
+@@ -79,9 +86,7 @@ static int __vlan_tunnel_info_add(struct
+       return 0;
+ out:
+-      dst_release(&vlan->tinfo.tunnel_dst->dst);
+-      vlan->tinfo.tunnel_dst = NULL;
+-      vlan->tinfo.tunnel_id = 0;
++      vlan_tunnel_info_release(vlan);
+       return err;
+ }
+@@ -182,12 +187,15 @@ int br_handle_ingress_vlan_tunnel(struct
+ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
+                                struct net_bridge_vlan *vlan)
+ {
++      struct metadata_dst *tunnel_dst;
++      __be64 tunnel_id;
+       int err;
+-      if (!vlan || !vlan->tinfo.tunnel_id)
++      if (!vlan)
+               return 0;
+-      if (unlikely(!skb_vlan_tag_present(skb)))
++      tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id);
++      if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb)))
+               return 0;
+       skb_dst_drop(skb);
+@@ -195,7 +203,9 @@ int br_handle_egress_vlan_tunnel(struct
+       if (err)
+               return err;
+-      skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst));
++      tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
++      if (tunnel_dst)
++              skb_dst_set(skb, dst_clone(&tunnel_dst->dst));
+       return 0;
+ }
diff --git a/queue-5.12/net-bridge-fix-vlan-tunnel-dst-refcnt-when-egressing.patch b/queue-5.12/net-bridge-fix-vlan-tunnel-dst-refcnt-when-egressing.patch
new file mode 100644 (file)
index 0000000..b1c112f
--- /dev/null
@@ -0,0 +1,87 @@
+From cfc579f9d89af4ada58c69b03bcaa4887840f3b3 Mon Sep 17 00:00:00 2001
+From: Nikolay Aleksandrov <nikolay@nvidia.com>
+Date: Thu, 10 Jun 2021 15:04:11 +0300
+Subject: net: bridge: fix vlan tunnel dst refcnt when egressing
+
+From: Nikolay Aleksandrov <nikolay@nvidia.com>
+
+commit cfc579f9d89af4ada58c69b03bcaa4887840f3b3 upstream.
+
+The egress tunnel code uses dst_clone() and directly sets the result
+which is wrong because the entry might have 0 refcnt or be already deleted,
+causing number of problems. It also triggers the WARN_ON() in dst_hold()[1]
+when a refcnt couldn't be taken. Fix it by using dst_hold_safe() and
+checking if a reference was actually taken before setting the dst.
+
+[1] dmesg WARN_ON log and following refcnt errors
+ WARNING: CPU: 5 PID: 38 at include/net/dst.h:230 br_handle_egress_vlan_tunnel+0x10b/0x134 [bridge]
+ Modules linked in: 8021q garp mrp bridge stp llc bonding ipv6 virtio_net
+ CPU: 5 PID: 38 Comm: ksoftirqd/5 Kdump: loaded Tainted: G        W         5.13.0-rc3+ #360
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014
+ RIP: 0010:br_handle_egress_vlan_tunnel+0x10b/0x134 [bridge]
+ Code: e8 85 bc 01 e1 45 84 f6 74 90 45 31 f6 85 db 48 c7 c7 a0 02 19 a0 41 0f 94 c6 31 c9 31 d2 44 89 f6 e8 64 bc 01 e1 85 db 75 02 <0f> 0b 31 c9 31 d2 44 89 f6 48 c7 c7 70 02 19 a0 e8 4b bc 01 e1 49
+ RSP: 0018:ffff8881003d39e8 EFLAGS: 00010246
+ RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
+ RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffffa01902a0
+ RBP: ffff8881040c6700 R08: 0000000000000000 R09: 0000000000000001
+ R10: 2ce93d0054fe0d00 R11: 54fe0d00000e0000 R12: ffff888109515000
+ R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000401
+ FS:  0000000000000000(0000) GS:ffff88822bf40000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00007f42ba70f030 CR3: 0000000109926000 CR4: 00000000000006e0
+ Call Trace:
+  br_handle_vlan+0xbc/0xca [bridge]
+  __br_forward+0x23/0x164 [bridge]
+  deliver_clone+0x41/0x48 [bridge]
+  br_handle_frame_finish+0x36f/0x3aa [bridge]
+  ? skb_dst+0x2e/0x38 [bridge]
+  ? br_handle_ingress_vlan_tunnel+0x3e/0x1c8 [bridge]
+  ? br_handle_frame_finish+0x3aa/0x3aa [bridge]
+  br_handle_frame+0x2c3/0x377 [bridge]
+  ? __skb_pull+0x33/0x51
+  ? vlan_do_receive+0x4f/0x36a
+  ? br_handle_frame_finish+0x3aa/0x3aa [bridge]
+  __netif_receive_skb_core+0x539/0x7c6
+  ? __list_del_entry_valid+0x16e/0x1c2
+  __netif_receive_skb_list_core+0x6d/0xd6
+  netif_receive_skb_list_internal+0x1d9/0x1fa
+  gro_normal_list+0x22/0x3e
+  dev_gro_receive+0x55b/0x600
+  ? detach_buf_split+0x58/0x140
+  napi_gro_receive+0x94/0x12e
+  virtnet_poll+0x15d/0x315 [virtio_net]
+  __napi_poll+0x2c/0x1c9
+  net_rx_action+0xe6/0x1fb
+  __do_softirq+0x115/0x2d8
+  run_ksoftirqd+0x18/0x20
+  smpboot_thread_fn+0x183/0x19c
+  ? smpboot_unregister_percpu_thread+0x66/0x66
+  kthread+0x10a/0x10f
+  ? kthread_mod_delayed_work+0xb6/0xb6
+  ret_from_fork+0x22/0x30
+ ---[ end trace 49f61b07f775fd2b ]---
+ dst_release: dst:00000000c02d677a refcnt:-1
+ dst_release underflow
+
+Cc: stable@vger.kernel.org
+Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths")
+Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_vlan_tunnel.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/bridge/br_vlan_tunnel.c
++++ b/net/bridge/br_vlan_tunnel.c
+@@ -204,8 +204,8 @@ int br_handle_egress_vlan_tunnel(struct
+               return err;
+       tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
+-      if (tunnel_dst)
+-              skb_dst_set(skb, dst_clone(&tunnel_dst->dst));
++      if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
++              skb_dst_set(skb, &tunnel_dst->dst);
+       return 0;
+ }
diff --git a/queue-5.12/net-ll_temac-fix-tx-bd-buffer-overwrite.patch b/queue-5.12/net-ll_temac-fix-tx-bd-buffer-overwrite.patch
new file mode 100644 (file)
index 0000000..8035548
--- /dev/null
@@ -0,0 +1,36 @@
+From c364df2489b8ef2f5e3159b1dff1ff1fdb16040d Mon Sep 17 00:00:00 2001
+From: Esben Haabendal <esben@geanix.com>
+Date: Fri, 18 Jun 2021 12:52:33 +0200
+Subject: net: ll_temac: Fix TX BD buffer overwrite
+
+From: Esben Haabendal <esben@geanix.com>
+
+commit c364df2489b8ef2f5e3159b1dff1ff1fdb16040d upstream.
+
+Just as the initial check, we need to ensure num_frag+1 buffers available,
+as that is the number of buffers we are going to use.
+
+This fixes a buffer overflow, which might be seen during heavy network
+load. Complete lockup of TEMAC was reproducible within about 10 minutes of
+a particular load.
+
+Fixes: 84823ff80f74 ("net: ll_temac: Fix race condition causing TX hang")
+Cc: stable@vger.kernel.org # v5.4+
+Signed-off-by: Esben Haabendal <esben@geanix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/xilinx/ll_temac_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
+@@ -849,7 +849,7 @@ temac_start_xmit(struct sk_buff *skb, st
+               smp_mb();
+               /* Space might have just been freed - check again */
+-              if (temac_check_tx_bd_space(lp, num_frag))
++              if (temac_check_tx_bd_space(lp, num_frag + 1))
+                       return NETDEV_TX_BUSY;
+               netif_wake_queue(ndev);
diff --git a/queue-5.12/net-ll_temac-make-sure-to-free-skb-when-it-is-completely-used.patch b/queue-5.12/net-ll_temac-make-sure-to-free-skb-when-it-is-completely-used.patch
new file mode 100644 (file)
index 0000000..ac1e38d
--- /dev/null
@@ -0,0 +1,49 @@
+From 6aa32217a9a446275440ee8724b1ecaf1838df47 Mon Sep 17 00:00:00 2001
+From: Esben Haabendal <esben@geanix.com>
+Date: Fri, 18 Jun 2021 12:52:23 +0200
+Subject: net: ll_temac: Make sure to free skb when it is completely used
+
+From: Esben Haabendal <esben@geanix.com>
+
+commit 6aa32217a9a446275440ee8724b1ecaf1838df47 upstream.
+
+With the skb pointer piggy-backed on the TX BD, we have a simple and
+efficient way to free the skb buffer when the frame has been transmitted.
+But in order to avoid freeing the skb while there are still fragments from
+the skb in use, we need to piggy-back on the TX BD of the skb, not the
+first.
+
+Without this, we are doing use-after-free on the DMA side, when the first
+BD of a multi TX BD packet is seen as completed in xmit_done, and the
+remaining BDs are still being processed.
+
+Cc: stable@vger.kernel.org # v5.4+
+Signed-off-by: Esben Haabendal <esben@geanix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/xilinx/ll_temac_main.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
+@@ -876,7 +876,6 @@ temac_start_xmit(struct sk_buff *skb, st
+               return NETDEV_TX_OK;
+       }
+       cur_p->phys = cpu_to_be32(skb_dma_addr);
+-      ptr_to_txbd((void *)skb, cur_p);
+       for (ii = 0; ii < num_frag; ii++) {
+               if (++lp->tx_bd_tail >= lp->tx_bd_num)
+@@ -915,6 +914,11 @@ temac_start_xmit(struct sk_buff *skb, st
+       }
+       cur_p->app0 |= cpu_to_be32(STS_CTRL_APP0_EOP);
++      /* Mark last fragment with skb address, so it can be consumed
++       * in temac_start_xmit_done()
++       */
++      ptr_to_txbd((void *)skb, cur_p);
++
+       tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail;
+       lp->tx_bd_tail++;
+       if (lp->tx_bd_tail >= lp->tx_bd_num)
diff --git a/queue-5.12/powerpc-perf-fix-crash-in-perf_instruction_pointer-when-ppmu-is-not-set.patch b/queue-5.12/powerpc-perf-fix-crash-in-perf_instruction_pointer-when-ppmu-is-not-set.patch
new file mode 100644 (file)
index 0000000..b164a19
--- /dev/null
@@ -0,0 +1,62 @@
+From 60b7ed54a41b550d50caf7f2418db4a7e75b5bdc Mon Sep 17 00:00:00 2001
+From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Date: Thu, 17 Jun 2021 13:55:06 -0400
+Subject: powerpc/perf: Fix crash in perf_instruction_pointer() when ppmu is not set
+
+From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+
+commit 60b7ed54a41b550d50caf7f2418db4a7e75b5bdc upstream.
+
+On systems without any specific PMU driver support registered, running
+perf record causes Oops.
+
+The relevant portion from call trace:
+
+  BUG: Kernel NULL pointer dereference on read at 0x00000040
+  Faulting instruction address: 0xc0021f0c
+  Oops: Kernel access of bad area, sig: 11 [#1]
+  BE PAGE_SIZE=4K PREEMPT CMPCPRO
+  SAF3000 DIE NOTIFICATION
+  CPU: 0 PID: 442 Comm: null_syscall Not tainted 5.13.0-rc6-s3k-dev-01645-g7649ee3d2957 #5164
+  NIP:  c0021f0c LR: c00e8ad8 CTR: c00d8a5c
+  NIP perf_instruction_pointer+0x10/0x60
+  LR  perf_prepare_sample+0x344/0x674
+  Call Trace:
+    perf_prepare_sample+0x7c/0x674 (unreliable)
+    perf_event_output_forward+0x3c/0x94
+    __perf_event_overflow+0x74/0x14c
+    perf_swevent_hrtimer+0xf8/0x170
+    __hrtimer_run_queues.constprop.0+0x160/0x318
+    hrtimer_interrupt+0x148/0x3b0
+    timer_interrupt+0xc4/0x22c
+    Decrementer_virt+0xb8/0xbc
+
+During perf record session, perf_instruction_pointer() is called to
+capture the sample IP. This function in core-book3s accesses
+ppmu->flags. If a platform specific PMU driver is not registered, ppmu
+is set to NULL and accessing its members results in a crash. Fix this
+crash by checking if ppmu is set.
+
+Fixes: 2ca13a4cc56c ("powerpc/perf: Use regs->nip when SIAR is zero")
+Cc: stable@vger.kernel.org # v5.11+
+Reported-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/1623952506-1431-1-git-send-email-atrajeev@linux.vnet.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/perf/core-book3s.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/perf/core-book3s.c
++++ b/arch/powerpc/perf/core-book3s.c
+@@ -2242,7 +2242,7 @@ unsigned long perf_instruction_pointer(s
+       bool use_siar = regs_use_siar(regs);
+       unsigned long siar = mfspr(SPRN_SIAR);
+-      if (ppmu->flags & PPMU_P10_DD1) {
++      if (ppmu && (ppmu->flags & PPMU_P10_DD1)) {
+               if (siar)
+                       return siar;
+               else
index 1213163cf4de124f6e20e9956600e97fcd59ab03..a5d4907d785e5bce555d175ca6236f690a28fc2c 100644 (file)
@@ -137,3 +137,36 @@ kvm-x86-fix-x86_emulator-slab-cache-leak.patch
 s390-mcck-fix-calculation-of-sie-critical-section-size.patch
 s390-ap-fix-hanging-ioctl-caused-by-wrong-msg-counter.patch
 arcv2-save-abi-registers-across-signal-handling.patch
+x86-mm-avoid-truncating-memblocks-for-sgx-memory.patch
+x86-process-check-pf_kthread-and-not-current-mm-for-kernel-threads.patch
+x86-ioremap-map-efi-reserved-memory-as-encrypted-for-sev.patch
+x86-pkru-write-hardware-init-value-to-pkru-when-xstate-is-init.patch
+x86-fpu-prevent-state-corruption-in-__fpu__restore_sig.patch
+x86-fpu-invalidate-fpu-state-after-a-failed-xrstor-from-a-user-buffer.patch
+x86-fpu-reset-state-for-all-signal-restore-failures.patch
+powerpc-perf-fix-crash-in-perf_instruction_pointer-when-ppmu-is-not-set.patch
+makefile-lto-pass-warn-stack-size-only-on-lld-13.0.0.patch
+crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch
+dmaengine-pl330-fix-wrong-usage-of-spinlock-flags-in-dma_cyclc.patch
+mac80211-fix-deadlock-in-ap-vlan-handling.patch
+mac80211-fix-null-ptr-deref-for-injected-rate-info.patch
+mac80211-fix-reset-debugfs-locking.patch
+cfg80211-fix-phy80211-symlink-creation.patch
+cfg80211-shut-down-interfaces-on-failed-resume.patch
+mac80211-move-interface-shutdown-out-of-wiphy-lock.patch
+mac80211-minstrel_ht-fix-sample-time-check.patch
+cfg80211-make-certificate-generation-more-robust.patch
+cfg80211-avoid-double-free-of-pmsr-request.patch
+drm-amdgpu-gfx10-enlarge-cp_mec_doorbell_range_upper-to-cover-full-doorbell.patch
+drm-amdgpu-gfx9-fix-the-doorbell-missing-when-in-cgpg-issue.patch
+net-ll_temac-make-sure-to-free-skb-when-it-is-completely-used.patch
+net-ll_temac-fix-tx-bd-buffer-overwrite.patch
+net-bridge-fix-vlan-tunnel-dst-null-pointer-dereference.patch
+net-bridge-fix-vlan-tunnel-dst-refcnt-when-egressing.patch
+mm-hwpoison-fix-race-with-hugetlb-page-allocation.patch
+mm-swap-fix-pte_same_as_swp-not-removing-uffd-wp-bit-when-compare.patch
+mm-hugetlb-expand-restore_reserve_on_error-functionality.patch
+mm-slub-clarify-verification-reporting.patch
+mm-slub-fix-redzoning-for-small-allocations.patch
+mm-slub-actually-fix-freelist-pointer-vs-redzoning.patch
+mm-slub.c-include-swab.h.patch
diff --git a/queue-5.12/x86-fpu-invalidate-fpu-state-after-a-failed-xrstor-from-a-user-buffer.patch b/queue-5.12/x86-fpu-invalidate-fpu-state-after-a-failed-xrstor-from-a-user-buffer.patch
new file mode 100644 (file)
index 0000000..07700f3
--- /dev/null
@@ -0,0 +1,74 @@
+From d8778e393afa421f1f117471144f8ce6deb6953a Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 8 Jun 2021 16:36:19 +0200
+Subject: x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit d8778e393afa421f1f117471144f8ce6deb6953a upstream.
+
+Both Intel and AMD consider it to be architecturally valid for XRSTOR to
+fail with #PF but nonetheless change the register state.  The actual
+conditions under which this might occur are unclear [1], but it seems
+plausible that this might be triggered if one sibling thread unmaps a page
+and invalidates the shared TLB while another sibling thread is executing
+XRSTOR on the page in question.
+
+__fpu__restore_sig() can execute XRSTOR while the hardware registers
+are preserved on behalf of a different victim task (using the
+fpu_fpregs_owner_ctx mechanism), and, in theory, XRSTOR could fail but
+modify the registers.
+
+If this happens, then there is a window in which __fpu__restore_sig()
+could schedule out and the victim task could schedule back in without
+reloading its own FPU registers. This would result in part of the FPU
+state that __fpu__restore_sig() was attempting to load leaking into the
+victim task's user-visible state.
+
+Invalidate preserved FPU registers on XRSTOR failure to prevent this
+situation from corrupting any state.
+
+[1] Frequent readers of the errata lists might imagine "complex
+    microarchitectural conditions".
+
+Fixes: 1d731e731c4c ("x86/fpu: Add a fastpath to __fpu__restore_sig()")
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Rik van Riel <riel@surriel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210608144345.758116583@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/signal.c |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -369,6 +369,25 @@ static int __fpu__restore_sig(void __use
+                       fpregs_unlock();
+                       return 0;
+               }
++
++              /*
++               * The above did an FPU restore operation, restricted to
++               * the user portion of the registers, and failed, but the
++               * microcode might have modified the FPU registers
++               * nevertheless.
++               *
++               * If the FPU registers do not belong to current, then
++               * invalidate the FPU register state otherwise the task might
++               * preempt current and return to user space with corrupted
++               * FPU registers.
++               *
++               * In case current owns the FPU registers then no further
++               * action is required. The fixup below will handle it
++               * correctly.
++               */
++              if (test_thread_flag(TIF_NEED_FPU_LOAD))
++                      __cpu_invalidate_fpregs_state();
++
+               fpregs_unlock();
+       } else {
+               /*
diff --git a/queue-5.12/x86-fpu-prevent-state-corruption-in-__fpu__restore_sig.patch b/queue-5.12/x86-fpu-prevent-state-corruption-in-__fpu__restore_sig.patch
new file mode 100644 (file)
index 0000000..3b9caf0
--- /dev/null
@@ -0,0 +1,64 @@
+From 484cea4f362e1eeb5c869abbfb5f90eae6421b38 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 8 Jun 2021 16:36:18 +0200
+Subject: x86/fpu: Prevent state corruption in __fpu__restore_sig()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 484cea4f362e1eeb5c869abbfb5f90eae6421b38 upstream.
+
+The non-compacted slowpath uses __copy_from_user() and copies the entire
+user buffer into the kernel buffer, verbatim.  This means that the kernel
+buffer may now contain entirely invalid state on which XRSTOR will #GP.
+validate_user_xstate_header() can detect some of that corruption, but that
+leaves the onus on callers to clear the buffer.
+
+Prior to XSAVES support, it was possible just to reinitialize the buffer,
+completely, but with supervisor states that is not longer possible as the
+buffer clearing code split got it backwards. Fixing that is possible but
+not corrupting the state in the first place is more robust.
+
+Avoid corruption of the kernel XSAVE buffer by using copy_user_to_xstate()
+which validates the XSAVE header contents before copying the actual states
+to the kernel. copy_user_to_xstate() was previously only called for
+compacted-format kernel buffers, but it works for both compacted and
+non-compacted forms.
+
+Using it for the non-compacted form is slower because of multiple
+__copy_from_user() operations, but that cost is less important than robust
+code in an already slow path.
+
+[ Changelog polished by Dave Hansen ]
+
+Fixes: b860eb8dce59 ("x86/fpu/xstate: Define new functions for clearing fpregs and xstates")
+Reported-by: syzbot+2067e764dbcd10721e2e@syzkaller.appspotmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Rik van Riel <riel@surriel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210608144345.611833074@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/signal.c |    9 +--------
+ 1 file changed, 1 insertion(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -405,14 +405,7 @@ static int __fpu__restore_sig(void __use
+       if (use_xsave() && !fx_only) {
+               u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
+-              if (using_compacted_format()) {
+-                      ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
+-              } else {
+-                      ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
+-
+-                      if (!ret && state_size > offsetof(struct xregs_state, header))
+-                              ret = validate_user_xstate_header(&fpu->state.xsave.header);
+-              }
++              ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
+               if (ret)
+                       goto err_out;
diff --git a/queue-5.12/x86-fpu-reset-state-for-all-signal-restore-failures.patch b/queue-5.12/x86-fpu-reset-state-for-all-signal-restore-failures.patch
new file mode 100644 (file)
index 0000000..34e989a
--- /dev/null
@@ -0,0 +1,96 @@
+From efa165504943f2128d50f63de0c02faf6dcceb0d Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 9 Jun 2021 21:18:00 +0200
+Subject: x86/fpu: Reset state for all signal restore failures
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit efa165504943f2128d50f63de0c02faf6dcceb0d upstream.
+
+If access_ok() or fpregs_soft_set() fails in __fpu__restore_sig() then the
+function just returns but does not clear the FPU state as it does for all
+other fatal failures.
+
+Clear the FPU state for these failures as well.
+
+Fixes: 72a671ced66d ("x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/87mtryyhhz.ffs@nanos.tec.linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/signal.c |   26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -307,13 +307,17 @@ static int __fpu__restore_sig(void __use
+               return 0;
+       }
+-      if (!access_ok(buf, size))
+-              return -EACCES;
++      if (!access_ok(buf, size)) {
++              ret = -EACCES;
++              goto out;
++      }
+-      if (!static_cpu_has(X86_FEATURE_FPU))
+-              return fpregs_soft_set(current, NULL,
+-                                     0, sizeof(struct user_i387_ia32_struct),
+-                                     NULL, buf) != 0;
++      if (!static_cpu_has(X86_FEATURE_FPU)) {
++              ret = fpregs_soft_set(current, NULL, 0,
++                                    sizeof(struct user_i387_ia32_struct),
++                                    NULL, buf);
++              goto out;
++      }
+       if (use_xsave()) {
+               struct _fpx_sw_bytes fx_sw_user;
+@@ -396,7 +400,7 @@ static int __fpu__restore_sig(void __use
+                */
+               ret = __copy_from_user(&env, buf, sizeof(env));
+               if (ret)
+-                      goto err_out;
++                      goto out;
+               envp = &env;
+       }
+@@ -426,7 +430,7 @@ static int __fpu__restore_sig(void __use
+               ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
+               if (ret)
+-                      goto err_out;
++                      goto out;
+               sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
+                                             fx_only);
+@@ -446,7 +450,7 @@ static int __fpu__restore_sig(void __use
+               ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
+               if (ret) {
+                       ret = -EFAULT;
+-                      goto err_out;
++                      goto out;
+               }
+               sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
+@@ -464,7 +468,7 @@ static int __fpu__restore_sig(void __use
+       } else {
+               ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size);
+               if (ret)
+-                      goto err_out;
++                      goto out;
+               fpregs_lock();
+               ret = copy_kernel_to_fregs_err(&fpu->state.fsave);
+@@ -475,7 +479,7 @@ static int __fpu__restore_sig(void __use
+               fpregs_deactivate(fpu);
+       fpregs_unlock();
+-err_out:
++out:
+       if (ret)
+               fpu__clear_user_states(fpu);
+       return ret;
diff --git a/queue-5.12/x86-ioremap-map-efi-reserved-memory-as-encrypted-for-sev.patch b/queue-5.12/x86-ioremap-map-efi-reserved-memory-as-encrypted-for-sev.patch
new file mode 100644 (file)
index 0000000..0f03087
--- /dev/null
@@ -0,0 +1,67 @@
+From 8d651ee9c71bb12fc0c8eb2786b66cbe5aa3e43b Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Tue, 8 Jun 2021 11:54:33 +0200
+Subject: x86/ioremap: Map EFI-reserved memory as encrypted for SEV
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit 8d651ee9c71bb12fc0c8eb2786b66cbe5aa3e43b upstream.
+
+Some drivers require memory that is marked as EFI boot services
+data. In order for this memory to not be re-used by the kernel
+after ExitBootServices(), efi_mem_reserve() is used to preserve it
+by inserting a new EFI memory descriptor and marking it with the
+EFI_MEMORY_RUNTIME attribute.
+
+Under SEV, memory marked with the EFI_MEMORY_RUNTIME attribute needs to
+be mapped encrypted by Linux, otherwise the kernel might crash at boot
+like below:
+
+  EFI Variables Facility v0.08 2004-May-17
+  general protection fault, probably for non-canonical address 0x3597688770a868b2: 0000 [#1] SMP NOPTI
+  CPU: 13 PID: 1 Comm: swapper/0 Not tainted 5.12.4-2-default #1 openSUSE Tumbleweed
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+  RIP: 0010:efi_mokvar_entry_next
+  [...]
+  Call Trace:
+   efi_mokvar_sysfs_init
+   ? efi_mokvar_table_init
+   do_one_initcall
+   ? __kmalloc
+   kernel_init_freeable
+   ? rest_init
+   kernel_init
+   ret_from_fork
+
+Expand the __ioremap_check_other() function to additionally check for
+this other type of boot data reserved at runtime and indicate that it
+should be mapped encrypted for an SEV guest.
+
+ [ bp: Massage commit message. ]
+
+Fixes: 58c909022a5a ("efi: Support for MOK variable config table")
+Reported-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Tested-by: Joerg Roedel <jroedel@suse.de>
+Cc: <stable@vger.kernel.org> # 5.10+
+Link: https://lkml.kernel.org/r/20210608095439.12668-2-joro@8bytes.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/ioremap.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/ioremap.c
++++ b/arch/x86/mm/ioremap.c
+@@ -118,7 +118,9 @@ static void __ioremap_check_other(resour
+       if (!IS_ENABLED(CONFIG_EFI))
+               return;
+-      if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA)
++      if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
++          (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
++           efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
+               desc->flags |= IORES_MAP_ENCRYPTED;
+ }
diff --git a/queue-5.12/x86-mm-avoid-truncating-memblocks-for-sgx-memory.patch b/queue-5.12/x86-mm-avoid-truncating-memblocks-for-sgx-memory.patch
new file mode 100644 (file)
index 0000000..91e80e5
--- /dev/null
@@ -0,0 +1,95 @@
+From 28e5e44aa3f4e0e0370864ed008fb5e2d85f4dc8 Mon Sep 17 00:00:00 2001
+From: Fan Du <fan.du@intel.com>
+Date: Thu, 17 Jun 2021 12:46:57 -0700
+Subject: x86/mm: Avoid truncating memblocks for SGX memory
+
+From: Fan Du <fan.du@intel.com>
+
+commit 28e5e44aa3f4e0e0370864ed008fb5e2d85f4dc8 upstream.
+
+tl;dr:
+
+Several SGX users reported seeing the following message on NUMA systems:
+
+  sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0.
+
+This turned out to be the memblock code mistakenly throwing away SGX
+memory.
+
+=== Full Changelog ===
+
+The 'max_pfn' variable represents the highest known RAM address.  It can
+be used, for instance, to quickly determine for which physical addresses
+there is mem_map[] space allocated.  The numa_meminfo code makes an
+effort to throw out ("trim") all memory blocks which are above 'max_pfn'.
+
+SGX memory is not considered RAM (it is marked as "Reserved" in the
+e820) and is not taken into account by max_pfn. Despite this, SGX memory
+areas have NUMA affinity and are enumerated in the ACPI SRAT table. The
+existing SGX code uses the numa_meminfo mechanism to look up the NUMA
+affinity for its memory areas.
+
+In cases where SGX memory was above max_pfn (usually just the one EPC
+section in the last highest NUMA node), the numa_memblock is truncated
+at 'max_pfn', which is below the SGX memory.  When the SGX code tries to
+look up the affinity of this memory, it fails and produces an error message:
+
+  sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0.
+
+and assigns the memory to NUMA node 0.
+
+Instead of silently truncating the memory block at 'max_pfn' and
+dropping the SGX memory, add the truncated portion to
+'numa_reserved_meminfo'.  This allows the SGX code to later determine
+the NUMA affinity of its 'Reserved' area.
+
+Before, numa_meminfo looked like this (from 'crash'):
+
+  blk = { start =          0x0, end = 0x2080000000, nid = 0x0 }
+        { start = 0x2080000000, end = 0x4000000000, nid = 0x1 }
+
+numa_reserved_meminfo is empty.
+
+With this, numa_meminfo looks like this:
+
+  blk = { start =          0x0, end = 0x2080000000, nid = 0x0 }
+        { start = 0x2080000000, end = 0x4000000000, nid = 0x1 }
+
+and numa_reserved_meminfo has an entry for node 1's SGX memory:
+
+  blk =  { start = 0x4000000000, end = 0x4080000000, nid = 0x1 }
+
+ [ daveh: completely rewrote/reworked changelog ]
+
+Fixes: 5d30f92e7631 ("x86/NUMA: Provide a range-to-target_node lookup facility")
+Reported-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Fan Du <fan.du@intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Reviewed-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Dave Hansen <dave.hansen@intel.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20210617194657.0A99CB22@viggo.jf.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/numa.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/numa.c
++++ b/arch/x86/mm/numa.c
+@@ -254,7 +254,13 @@ int __init numa_cleanup_meminfo(struct n
+               /* make sure all non-reserved blocks are inside the limits */
+               bi->start = max(bi->start, low);
+-              bi->end = min(bi->end, high);
++
++              /* preserve info for non-RAM areas above 'max_pfn': */
++              if (bi->end > high) {
++                      numa_add_memblk_to(bi->nid, high, bi->end,
++                                         &numa_reserved_meminfo);
++                      bi->end = high;
++              }
+               /* and there's no empty block */
+               if (bi->start >= bi->end)
diff --git a/queue-5.12/x86-pkru-write-hardware-init-value-to-pkru-when-xstate-is-init.patch b/queue-5.12/x86-pkru-write-hardware-init-value-to-pkru-when-xstate-is-init.patch
new file mode 100644 (file)
index 0000000..70ff870
--- /dev/null
@@ -0,0 +1,93 @@
+From 510b80a6a0f1a0d114c6e33bcea64747d127973c Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 8 Jun 2021 16:36:21 +0200
+Subject: x86/pkru: Write hardware init value to PKRU when xstate is init
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 510b80a6a0f1a0d114c6e33bcea64747d127973c upstream.
+
+When user space brings PKRU into init state, then the kernel handling is
+broken:
+
+  T1 user space
+     xsave(state)
+     state.header.xfeatures &= ~XFEATURE_MASK_PKRU;
+     xrstor(state)
+
+  T1 -> kernel
+     schedule()
+       XSAVE(S) -> T1->xsave.header.xfeatures[PKRU] == 0
+       T1->flags |= TIF_NEED_FPU_LOAD;
+
+       wrpkru();
+
+     schedule()
+       ...
+       pk = get_xsave_addr(&T1->fpu->state.xsave, XFEATURE_PKRU);
+       if (pk)
+        wrpkru(pk->pkru);
+       else
+        wrpkru(DEFAULT_PKRU);
+
+Because the xfeatures bit is 0 and therefore the value in the xsave
+storage is not valid, get_xsave_addr() returns NULL and switch_to()
+writes the default PKRU. -> FAIL #1!
+
+So that wrecks any copy_to/from_user() on the way back to user space
+which hits memory which is protected by the default PKRU value.
+
+Assumed that this does not fail (pure luck) then T1 goes back to user
+space and because TIF_NEED_FPU_LOAD is set it ends up in
+
+  switch_fpu_return()
+      __fpregs_load_activate()
+        if (!fpregs_state_valid()) {
+        load_XSTATE_from_task();
+        }
+
+But if nothing touched the FPU between T1 scheduling out and back in,
+then the fpregs_state is still valid which means switch_fpu_return()
+does nothing and just clears TIF_NEED_FPU_LOAD. Back to user space with
+DEFAULT_PKRU loaded. -> FAIL #2!
+
+The fix is simple: if get_xsave_addr() returns NULL then set the
+PKRU value to 0 instead of the restrictive default PKRU value in
+init_pkru_value.
+
+ [ bp: Massage in minor nitpicks from folks. ]
+
+Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Rik van Riel <riel@surriel.com>
+Tested-by: Babu Moger <babu.moger@amd.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210608144346.045616965@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/fpu/internal.h |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/fpu/internal.h
++++ b/arch/x86/include/asm/fpu/internal.h
+@@ -579,9 +579,16 @@ static inline void switch_fpu_finish(str
+        * return to userland e.g. for a copy_to_user() operation.
+        */
+       if (!(current->flags & PF_KTHREAD)) {
++              /*
++               * If the PKRU bit in xsave.header.xfeatures is not set,
++               * then the PKRU component was in init state, which means
++               * XRSTOR will set PKRU to 0. If the bit is not set then
++               * get_xsave_addr() will return NULL because the PKRU value
++               * in memory is not valid. This means pkru_val has to be
++               * set to 0 and not to init_pkru_value.
++               */
+               pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
+-              if (pk)
+-                      pkru_val = pk->pkru;
++              pkru_val = pk ? pk->pkru : 0;
+       }
+       __write_pkru(pkru_val);
+ }
diff --git a/queue-5.12/x86-process-check-pf_kthread-and-not-current-mm-for-kernel-threads.patch b/queue-5.12/x86-process-check-pf_kthread-and-not-current-mm-for-kernel-threads.patch
new file mode 100644 (file)
index 0000000..bbc1e44
--- /dev/null
@@ -0,0 +1,38 @@
+From 12f7764ac61200e32c916f038bdc08f884b0b604 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 8 Jun 2021 16:36:20 +0200
+Subject: x86/process: Check PF_KTHREAD and not current->mm for kernel threads
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 12f7764ac61200e32c916f038bdc08f884b0b604 upstream.
+
+switch_fpu_finish() checks current->mm as indicator for kernel threads.
+That's wrong because kernel threads can temporarily use a mm of a user
+process via kthread_use_mm().
+
+Check the task flags for PF_KTHREAD instead.
+
+Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Rik van Riel <riel@surriel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210608144345.912645927@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/fpu/internal.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/fpu/internal.h
++++ b/arch/x86/include/asm/fpu/internal.h
+@@ -578,7 +578,7 @@ static inline void switch_fpu_finish(str
+        * PKRU state is switched eagerly because it needs to be valid before we
+        * return to userland e.g. for a copy_to_user() operation.
+        */
+-      if (current->mm) {
++      if (!(current->flags & PF_KTHREAD)) {
+               pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
+               if (pk)
+                       pkru_val = pk->pkru;