From: Greg Kroah-Hartman Date: Mon, 21 Jun 2021 11:02:40 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v5.4.128~26 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4a586f75449946c601d81cfe7b6b61d981b47658;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: cfg80211-avoid-double-free-of-pmsr-request.patch cfg80211-make-certificate-generation-more-robust.patch crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch dmaengine-pl330-fix-wrong-usage-of-spinlock-flags-in-dma_cyclc.patch drm-amdgpu-gfx10-enlarge-cp_mec_doorbell_range_upper-to-cover-full-doorbell.patch drm-amdgpu-gfx9-fix-the-doorbell-missing-when-in-cgpg-issue.patch mac80211-fix-null-ptr-deref-for-injected-rate-info.patch net-bridge-fix-vlan-tunnel-dst-null-pointer-dereference.patch net-bridge-fix-vlan-tunnel-dst-refcnt-when-egressing.patch net-ll_temac-fix-tx-bd-buffer-overwrite.patch net-ll_temac-make-sure-to-free-skb-when-it-is-completely-used.patch x86-fpu-invalidate-fpu-state-after-a-failed-xrstor-from-a-user-buffer.patch x86-fpu-prevent-state-corruption-in-__fpu__restore_sig.patch x86-fpu-reset-state-for-all-signal-restore-failures.patch x86-ioremap-map-efi-reserved-memory-as-encrypted-for-sev.patch x86-mm-avoid-truncating-memblocks-for-sgx-memory.patch x86-pkru-write-hardware-init-value-to-pkru-when-xstate-is-init.patch x86-process-check-pf_kthread-and-not-current-mm-for-kernel-threads.patch --- diff --git a/queue-5.10/cfg80211-avoid-double-free-of-pmsr-request.patch b/queue-5.10/cfg80211-avoid-double-free-of-pmsr-request.patch new file mode 100644 index 00000000000..24e09cf033e --- /dev/null +++ b/queue-5.10/cfg80211-avoid-double-free-of-pmsr-request.patch @@ -0,0 +1,61 @@ +From 0288e5e16a2e18f0b7e61a2b70d9037fc6e4abeb Mon Sep 17 00:00:00 2001 +From: Avraham Stern +Date: Fri, 18 Jun 2021 13:41:31 +0300 +Subject: cfg80211: avoid double free of PMSR request + +From: Avraham Stern + +commit 0288e5e16a2e18f0b7e61a2b70d9037fc6e4abeb upstream. + +If cfg80211_pmsr_process_abort() moves all the PMSR requests that +need to be freed into a local list before aborting and freeing them. +As a result, it is possible that cfg80211_pmsr_complete() will run in +parallel and free the same PMSR request. + +Fix it by freeing the request in cfg80211_pmsr_complete() only if it +is still in the original pmsr list. + +Cc: stable@vger.kernel.org +Fixes: 9bb7e0f24e7e ("cfg80211: add peer measurement with FTM initiator API") +Signed-off-by: Avraham Stern +Signed-off-by: Luca Coelho +Link: https://lore.kernel.org/r/iwlwifi.20210618133832.1fbef57e269a.I00294bebdb0680b892f8d1d5c871fd9dbe785a5e@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + net/wireless/pmsr.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/net/wireless/pmsr.c ++++ b/net/wireless/pmsr.c +@@ -324,6 +324,7 @@ void cfg80211_pmsr_complete(struct wirel + gfp_t gfp) + { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); ++ struct cfg80211_pmsr_request *tmp, *prev, *to_free = NULL; + struct sk_buff *msg; + void *hdr; + +@@ -354,9 +355,20 @@ free_msg: + nlmsg_free(msg); + free_request: + spin_lock_bh(&wdev->pmsr_lock); +- list_del(&req->list); ++ /* ++ * cfg80211_pmsr_process_abort() may have already moved this request ++ * to the free list, and will free it later. In this case, don't free ++ * it here. ++ */ ++ list_for_each_entry_safe(tmp, prev, &wdev->pmsr_list, list) { ++ if (tmp == req) { ++ list_del(&req->list); ++ to_free = req; ++ break; ++ } ++ } + spin_unlock_bh(&wdev->pmsr_lock); +- kfree(req); ++ kfree(to_free); + } + EXPORT_SYMBOL_GPL(cfg80211_pmsr_complete); + diff --git a/queue-5.10/cfg80211-make-certificate-generation-more-robust.patch b/queue-5.10/cfg80211-make-certificate-generation-more-robust.patch new file mode 100644 index 00000000000..f2b0b74f2bd --- /dev/null +++ b/queue-5.10/cfg80211-make-certificate-generation-more-robust.patch @@ -0,0 +1,35 @@ +From b5642479b0f7168fe16d156913533fe65ab4f8d5 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Fri, 18 Jun 2021 13:41:29 +0300 +Subject: cfg80211: make certificate generation more robust + +From: Johannes Berg + +commit b5642479b0f7168fe16d156913533fe65ab4f8d5 upstream. + +If all net/wireless/certs/*.hex files are deleted, the build +will hang at this point since the 'cat' command will have no +arguments. Do "echo | cat - ..." so that even if the "..." +part is empty, the whole thing won't hang. + +Cc: stable@vger.kernel.org +Signed-off-by: Johannes Berg +Signed-off-by: Luca Coelho +Link: https://lore.kernel.org/r/iwlwifi.20210618133832.c989056c3664.Ic3b77531d00b30b26dcd69c64e55ae2f60c3f31e@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + net/wireless/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/wireless/Makefile ++++ b/net/wireless/Makefile +@@ -28,7 +28,7 @@ $(obj)/shipped-certs.c: $(wildcard $(src + @$(kecho) " GEN $@" + @(echo '#include "reg.h"'; \ + echo 'const u8 shipped_regdb_certs[] = {'; \ +- cat $^ ; \ ++ echo | cat - $^ ; \ + echo '};'; \ + echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ + ) > $@ diff --git a/queue-5.10/crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch b/queue-5.10/crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch new file mode 100644 index 00000000000..092c025d0b2 --- /dev/null +++ b/queue-5.10/crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch @@ -0,0 +1,60 @@ +From 4f5aecdff25f59fb5ea456d5152a913906ecf287 Mon Sep 17 00:00:00 2001 +From: Pingfan Liu +Date: Tue, 15 Jun 2021 18:23:36 -0700 +Subject: crash_core, vmcoreinfo: append 'SECTION_SIZE_BITS' to vmcoreinfo + +From: Pingfan Liu + +commit 4f5aecdff25f59fb5ea456d5152a913906ecf287 upstream. + +As mentioned in kernel commit 1d50e5d0c505 ("crash_core, vmcoreinfo: +Append 'MAX_PHYSMEM_BITS' to vmcoreinfo"), SECTION_SIZE_BITS in the +formula: + + #define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) + +Besides SECTIONS_SHIFT, SECTION_SIZE_BITS is also used to calculate +PAGES_PER_SECTION in makedumpfile just like kernel. + +Unfortunately, this arch-dependent macro SECTION_SIZE_BITS changes, e.g. +recently in kernel commit f0b13ee23241 ("arm64/sparsemem: reduce +SECTION_SIZE_BITS"). But user space wants a stable interface to get +this info. Such info is impossible to be deduced from a crashdump +vmcore. Hence append SECTION_SIZE_BITS to vmcoreinfo. + +Link: https://lkml.kernel.org/r/20210608103359.84907-1-kernelfans@gmail.com +Link: http://lists.infradead.org/pipermail/kexec/2021-June/022676.html +Signed-off-by: Pingfan Liu +Acked-by: Baoquan He +Cc: Bhupesh Sharma +Cc: Kazuhito Hagio +Cc: Dave Young +Cc: Boris Petkov +Cc: Ingo Molnar +Cc: Thomas Gleixner +Cc: James Morse +Cc: Mark Rutland +Cc: Will Deacon +Cc: Catalin Marinas +Cc: Michael Ellerman +Cc: Paul Mackerras +Cc: Benjamin Herrenschmidt +Cc: Dave Anderson +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + kernel/crash_core.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/crash_core.c ++++ b/kernel/crash_core.c +@@ -463,6 +463,7 @@ static int __init crash_save_vmcoreinfo_ + VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); + VMCOREINFO_STRUCT_SIZE(mem_section); + VMCOREINFO_OFFSET(mem_section, section_mem_map); ++ VMCOREINFO_NUMBER(SECTION_SIZE_BITS); + VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); + #endif + VMCOREINFO_STRUCT_SIZE(page); diff --git a/queue-5.10/dmaengine-pl330-fix-wrong-usage-of-spinlock-flags-in-dma_cyclc.patch b/queue-5.10/dmaengine-pl330-fix-wrong-usage-of-spinlock-flags-in-dma_cyclc.patch new file mode 100644 index 00000000000..7905de8c3e9 --- /dev/null +++ b/queue-5.10/dmaengine-pl330-fix-wrong-usage-of-spinlock-flags-in-dma_cyclc.patch @@ -0,0 +1,52 @@ +From 4ad5dd2d7876d79507a20f026507d1a93b8fff10 Mon Sep 17 00:00:00 2001 +From: Bumyong Lee +Date: Fri, 7 May 2021 15:36:47 +0900 +Subject: dmaengine: pl330: fix wrong usage of spinlock flags in dma_cyclc + +From: Bumyong Lee + +commit 4ad5dd2d7876d79507a20f026507d1a93b8fff10 upstream. + +flags varible which is the input parameter of pl330_prep_dma_cyclic() +should not be used by spinlock_irq[save/restore] function. + +Signed-off-by: Jongho Park +Signed-off-by: Bumyong Lee +Signed-off-by: Chanho Park +Link: https://lore.kernel.org/r/20210507063647.111209-1-chanho61.park@samsung.com +Fixes: f6f2421c0a1c ("dmaengine: pl330: Merge dma_pl330_dmac and pl330_dmac structs") +Cc: stable@vger.kernel.org +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dma/pl330.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/dma/pl330.c ++++ b/drivers/dma/pl330.c +@@ -2696,13 +2696,15 @@ static struct dma_async_tx_descriptor *p + for (i = 0; i < len / period_len; i++) { + desc = pl330_get_desc(pch); + if (!desc) { ++ unsigned long iflags; ++ + dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n", + __func__, __LINE__); + + if (!first) + return NULL; + +- spin_lock_irqsave(&pl330->pool_lock, flags); ++ spin_lock_irqsave(&pl330->pool_lock, iflags); + + while (!list_empty(&first->node)) { + desc = list_entry(first->node.next, +@@ -2712,7 +2714,7 @@ static struct dma_async_tx_descriptor *p + + list_move_tail(&first->node, &pl330->desc_pool); + +- spin_unlock_irqrestore(&pl330->pool_lock, flags); ++ spin_unlock_irqrestore(&pl330->pool_lock, iflags); + + return NULL; + } diff --git a/queue-5.10/drm-amdgpu-gfx10-enlarge-cp_mec_doorbell_range_upper-to-cover-full-doorbell.patch b/queue-5.10/drm-amdgpu-gfx10-enlarge-cp_mec_doorbell_range_upper-to-cover-full-doorbell.patch new file mode 100644 index 00000000000..55eea5cb502 --- /dev/null +++ b/queue-5.10/drm-amdgpu-gfx10-enlarge-cp_mec_doorbell_range_upper-to-cover-full-doorbell.patch @@ -0,0 +1,38 @@ +From 1c0b0efd148d5b24c4932ddb3fa03c8edd6097b3 Mon Sep 17 00:00:00 2001 +From: Yifan Zhang +Date: Thu, 10 Jun 2021 10:10:07 +0800 +Subject: drm/amdgpu/gfx10: enlarge CP_MEC_DOORBELL_RANGE_UPPER to cover full doorbell. + +From: Yifan Zhang + +commit 1c0b0efd148d5b24c4932ddb3fa03c8edd6097b3 upstream. + +If GC has entered CGPG, ringing doorbell > first page doesn't wakeup GC. +Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround this issue. + +Signed-off-by: Yifan Zhang +Reviewed-by: Felix Kuehling +Reviewed-by: Alex Deucher +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +@@ -6590,8 +6590,12 @@ static int gfx_v10_0_kiq_init_register(s + if (ring->use_doorbell) { + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.kiq * 2) << 2); ++ /* If GC has entered CGPG, ringing doorbell > first page doesn't ++ * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround ++ * this issue. ++ */ + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, +- (adev->doorbell_index.userqueue_end * 2) << 2); ++ (adev->doorbell.size - 4)); + } + + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, diff --git a/queue-5.10/drm-amdgpu-gfx9-fix-the-doorbell-missing-when-in-cgpg-issue.patch b/queue-5.10/drm-amdgpu-gfx9-fix-the-doorbell-missing-when-in-cgpg-issue.patch new file mode 100644 index 00000000000..b1baae1742f --- /dev/null +++ b/queue-5.10/drm-amdgpu-gfx9-fix-the-doorbell-missing-when-in-cgpg-issue.patch @@ -0,0 +1,38 @@ +From 4cbbe34807938e6e494e535a68d5ff64edac3f20 Mon Sep 17 00:00:00 2001 +From: Yifan Zhang +Date: Thu, 10 Jun 2021 09:55:01 +0800 +Subject: drm/amdgpu/gfx9: fix the doorbell missing when in CGPG issue. + +From: Yifan Zhang + +commit 4cbbe34807938e6e494e535a68d5ff64edac3f20 upstream. + +If GC has entered CGPG, ringing doorbell > first page doesn't wakeup GC. +Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround this issue. + +Signed-off-by: Yifan Zhang +Reviewed-by: Felix Kuehling +Reviewed-by: Alex Deucher +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +@@ -3619,8 +3619,12 @@ static int gfx_v9_0_kiq_init_register(st + if (ring->use_doorbell) { + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.kiq * 2) << 2); ++ /* If GC has entered CGPG, ringing doorbell > first page doesn't ++ * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround ++ * this issue. ++ */ + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, +- (adev->doorbell_index.userqueue_end * 2) << 2); ++ (adev->doorbell.size - 4)); + } + + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, diff --git a/queue-5.10/mac80211-fix-null-ptr-deref-for-injected-rate-info.patch b/queue-5.10/mac80211-fix-null-ptr-deref-for-injected-rate-info.patch new file mode 100644 index 00000000000..13ed14ec51e --- /dev/null +++ b/queue-5.10/mac80211-fix-null-ptr-deref-for-injected-rate-info.patch @@ -0,0 +1,164 @@ +From bddc0c411a45d3718ac535a070f349be8eca8d48 Mon Sep 17 00:00:00 2001 +From: Mathy Vanhoef +Date: Sun, 30 May 2021 15:32:26 +0200 +Subject: mac80211: Fix NULL ptr deref for injected rate info + +From: Mathy Vanhoef + +commit bddc0c411a45d3718ac535a070f349be8eca8d48 upstream. + +The commit cb17ed29a7a5 ("mac80211: parse radiotap header when selecting Tx +queue") moved the code to validate the radiotap header from +ieee80211_monitor_start_xmit to ieee80211_parse_tx_radiotap. This made is +possible to share more code with the new Tx queue selection code for +injected frames. But at the same time, it now required the call of +ieee80211_parse_tx_radiotap at the beginning of functions which wanted to +handle the radiotap header. And this broke the rate parser for radiotap +header parser. + +The radiotap parser for rates is operating most of the time only on the +data in the actual radiotap header. But for the 802.11a/b/g rates, it must +also know the selected band from the chandef information. But this +information is only written to the ieee80211_tx_info at the end of the +ieee80211_monitor_start_xmit - long after ieee80211_parse_tx_radiotap was +already called. The info->band information was therefore always 0 +(NL80211_BAND_2GHZ) when the parser code tried to access it. + +For a 5GHz only device, injecting a frame with 802.11a rates would cause a +NULL pointer dereference because local->hw.wiphy->bands[NL80211_BAND_2GHZ] +would most likely have been NULL when the radiotap parser searched for the +correct rate index of the driver. + +Cc: stable@vger.kernel.org +Reported-by: Ben Greear +Fixes: cb17ed29a7a5 ("mac80211: parse radiotap header when selecting Tx queue") +Signed-off-by: Mathy Vanhoef +[sven@narfation.org: added commit message] +Signed-off-by: Sven Eckelmann +Link: https://lore.kernel.org/r/20210530133226.40587-1-sven@narfation.org +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + include/net/mac80211.h | 7 +++++- + net/mac80211/tx.c | 52 +++++++++++++++++++++++++++++++++---------------- + 2 files changed, 42 insertions(+), 17 deletions(-) + +--- a/include/net/mac80211.h ++++ b/include/net/mac80211.h +@@ -6335,7 +6335,12 @@ bool ieee80211_tx_prepare_skb(struct iee + + /** + * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header +- * of injected frames ++ * of injected frames. ++ * ++ * To accurately parse and take into account rate and retransmission fields, ++ * you must initialize the chandef field in the ieee80211_tx_info structure ++ * of the skb before calling this function. ++ * + * @skb: packet injected by userspace + * @dev: the &struct device of this 802.11 device + */ +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -2030,6 +2030,26 @@ void ieee80211_xmit(struct ieee80211_sub + ieee80211_tx(sdata, sta, skb, false); + } + ++static bool ieee80211_validate_radiotap_len(struct sk_buff *skb) ++{ ++ struct ieee80211_radiotap_header *rthdr = ++ (struct ieee80211_radiotap_header *)skb->data; ++ ++ /* check for not even having the fixed radiotap header part */ ++ if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) ++ return false; /* too short to be possibly valid */ ++ ++ /* is it a header version we can trust to find length from? */ ++ if (unlikely(rthdr->it_version)) ++ return false; /* only version 0 is supported */ ++ ++ /* does the skb contain enough to deliver on the alleged length? */ ++ if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data))) ++ return false; /* skb too short for claimed rt header extent */ ++ ++ return true; ++} ++ + bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, + struct net_device *dev) + { +@@ -2038,8 +2058,6 @@ bool ieee80211_parse_tx_radiotap(struct + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); +- struct ieee80211_supported_band *sband = +- local->hw.wiphy->bands[info->band]; + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, + NULL); + u16 txflags; +@@ -2052,17 +2070,8 @@ bool ieee80211_parse_tx_radiotap(struct + u8 vht_mcs = 0, vht_nss = 0; + int i; + +- /* check for not even having the fixed radiotap header part */ +- if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) +- return false; /* too short to be possibly valid */ +- +- /* is it a header version we can trust to find length from? */ +- if (unlikely(rthdr->it_version)) +- return false; /* only version 0 is supported */ +- +- /* does the skb contain enough to deliver on the alleged length? */ +- if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data))) +- return false; /* skb too short for claimed rt header extent */ ++ if (!ieee80211_validate_radiotap_len(skb)) ++ return false; + + info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + IEEE80211_TX_CTL_DONTFRAG; +@@ -2186,6 +2195,9 @@ bool ieee80211_parse_tx_radiotap(struct + return false; + + if (rate_found) { ++ struct ieee80211_supported_band *sband = ++ local->hw.wiphy->bands[info->band]; ++ + info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT; + + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { +@@ -2199,7 +2211,7 @@ bool ieee80211_parse_tx_radiotap(struct + } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) { + ieee80211_rate_set_vht(info->control.rates, vht_mcs, + vht_nss); +- } else { ++ } else if (sband) { + for (i = 0; i < sband->n_bitrates; i++) { + if (rate * 5 != sband->bitrates[i].bitrate) + continue; +@@ -2236,8 +2248,8 @@ netdev_tx_t ieee80211_monitor_start_xmit + info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_CTL_INJECTED; + +- /* Sanity-check and process the injection radiotap header */ +- if (!ieee80211_parse_tx_radiotap(skb, dev)) ++ /* Sanity-check the length of the radiotap header */ ++ if (!ieee80211_validate_radiotap_len(skb)) + goto fail; + + /* we now know there is a radiotap header with a length we can use */ +@@ -2353,6 +2365,14 @@ netdev_tx_t ieee80211_monitor_start_xmit + + info->band = chandef->chan->band; + ++ /* ++ * Process the radiotap header. This will now take into account the ++ * selected chandef above to accurately set injection rates and ++ * retransmissions. ++ */ ++ if (!ieee80211_parse_tx_radiotap(skb, dev)) ++ goto fail_rcu; ++ + /* remove the injection radiotap header */ + skb_pull(skb, len_rthdr); + diff --git a/queue-5.10/net-bridge-fix-vlan-tunnel-dst-null-pointer-dereference.patch b/queue-5.10/net-bridge-fix-vlan-tunnel-dst-null-pointer-dereference.patch new file mode 100644 index 00000000000..1e9eb3042fe --- /dev/null +++ b/queue-5.10/net-bridge-fix-vlan-tunnel-dst-null-pointer-dereference.patch @@ -0,0 +1,135 @@ +From 58e2071742e38f29f051b709a5cca014ba51166f Mon Sep 17 00:00:00 2001 +From: Nikolay Aleksandrov +Date: Thu, 10 Jun 2021 15:04:10 +0300 +Subject: net: bridge: fix vlan tunnel dst null pointer dereference + +From: Nikolay Aleksandrov + +commit 58e2071742e38f29f051b709a5cca014ba51166f upstream. + +This patch fixes a tunnel_dst null pointer dereference due to lockless +access in the tunnel egress path. When deleting a vlan tunnel the +tunnel_dst pointer is set to NULL without waiting a grace period (i.e. +while it's still usable) and packets egressing are dereferencing it +without checking. Use READ/WRITE_ONCE to annotate the lockless use of +tunnel_id, use RCU for accessing tunnel_dst and make sure it is read +only once and checked in the egress path. The dst is already properly RCU +protected so we don't need to do anything fancy than to make sure +tunnel_id and tunnel_dst are read only once and checked in the egress path. + +Cc: stable@vger.kernel.org +Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_private.h | 4 ++-- + net/bridge/br_vlan_tunnel.c | 38 ++++++++++++++++++++++++-------------- + 2 files changed, 26 insertions(+), 16 deletions(-) + +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -98,8 +98,8 @@ struct br_vlan_stats { + }; + + struct br_tunnel_info { +- __be64 tunnel_id; +- struct metadata_dst *tunnel_dst; ++ __be64 tunnel_id; ++ struct metadata_dst __rcu *tunnel_dst; + }; + + /* private vlan flags */ +--- a/net/bridge/br_vlan_tunnel.c ++++ b/net/bridge/br_vlan_tunnel.c +@@ -41,26 +41,33 @@ static struct net_bridge_vlan *br_vlan_t + br_vlan_tunnel_rht_params); + } + ++static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan) ++{ ++ struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst); ++ ++ WRITE_ONCE(vlan->tinfo.tunnel_id, 0); ++ RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL); ++ dst_release(&tdst->dst); ++} ++ + void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan) + { +- if (!vlan->tinfo.tunnel_dst) ++ if (!rcu_access_pointer(vlan->tinfo.tunnel_dst)) + return; + rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode, + br_vlan_tunnel_rht_params); +- vlan->tinfo.tunnel_id = 0; +- dst_release(&vlan->tinfo.tunnel_dst->dst); +- vlan->tinfo.tunnel_dst = NULL; ++ vlan_tunnel_info_release(vlan); + } + + static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan, u32 tun_id) + { +- struct metadata_dst *metadata = NULL; ++ struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst); + __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id)); + int err; + +- if (vlan->tinfo.tunnel_dst) ++ if (metadata) + return -EEXIST; + + metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, +@@ -69,8 +76,8 @@ static int __vlan_tunnel_info_add(struct + return -EINVAL; + + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE; +- vlan->tinfo.tunnel_dst = metadata; +- vlan->tinfo.tunnel_id = key; ++ rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata); ++ WRITE_ONCE(vlan->tinfo.tunnel_id, key); + + err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode, + br_vlan_tunnel_rht_params); +@@ -79,9 +86,7 @@ static int __vlan_tunnel_info_add(struct + + return 0; + out: +- dst_release(&vlan->tinfo.tunnel_dst->dst); +- vlan->tinfo.tunnel_dst = NULL; +- vlan->tinfo.tunnel_id = 0; ++ vlan_tunnel_info_release(vlan); + + return err; + } +@@ -182,12 +187,15 @@ int br_handle_ingress_vlan_tunnel(struct + int br_handle_egress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_vlan *vlan) + { ++ struct metadata_dst *tunnel_dst; ++ __be64 tunnel_id; + int err; + +- if (!vlan || !vlan->tinfo.tunnel_id) ++ if (!vlan) + return 0; + +- if (unlikely(!skb_vlan_tag_present(skb))) ++ tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id); ++ if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb))) + return 0; + + skb_dst_drop(skb); +@@ -195,7 +203,9 @@ int br_handle_egress_vlan_tunnel(struct + if (err) + return err; + +- skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst)); ++ tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst); ++ if (tunnel_dst) ++ skb_dst_set(skb, dst_clone(&tunnel_dst->dst)); + + return 0; + } diff --git a/queue-5.10/net-bridge-fix-vlan-tunnel-dst-refcnt-when-egressing.patch b/queue-5.10/net-bridge-fix-vlan-tunnel-dst-refcnt-when-egressing.patch new file mode 100644 index 00000000000..b1c112f5bdd --- /dev/null +++ b/queue-5.10/net-bridge-fix-vlan-tunnel-dst-refcnt-when-egressing.patch @@ -0,0 +1,87 @@ +From cfc579f9d89af4ada58c69b03bcaa4887840f3b3 Mon Sep 17 00:00:00 2001 +From: Nikolay Aleksandrov +Date: Thu, 10 Jun 2021 15:04:11 +0300 +Subject: net: bridge: fix vlan tunnel dst refcnt when egressing + +From: Nikolay Aleksandrov + +commit cfc579f9d89af4ada58c69b03bcaa4887840f3b3 upstream. + +The egress tunnel code uses dst_clone() and directly sets the result +which is wrong because the entry might have 0 refcnt or be already deleted, +causing number of problems. It also triggers the WARN_ON() in dst_hold()[1] +when a refcnt couldn't be taken. Fix it by using dst_hold_safe() and +checking if a reference was actually taken before setting the dst. + +[1] dmesg WARN_ON log and following refcnt errors + WARNING: CPU: 5 PID: 38 at include/net/dst.h:230 br_handle_egress_vlan_tunnel+0x10b/0x134 [bridge] + Modules linked in: 8021q garp mrp bridge stp llc bonding ipv6 virtio_net + CPU: 5 PID: 38 Comm: ksoftirqd/5 Kdump: loaded Tainted: G W 5.13.0-rc3+ #360 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 + RIP: 0010:br_handle_egress_vlan_tunnel+0x10b/0x134 [bridge] + Code: e8 85 bc 01 e1 45 84 f6 74 90 45 31 f6 85 db 48 c7 c7 a0 02 19 a0 41 0f 94 c6 31 c9 31 d2 44 89 f6 e8 64 bc 01 e1 85 db 75 02 <0f> 0b 31 c9 31 d2 44 89 f6 48 c7 c7 70 02 19 a0 e8 4b bc 01 e1 49 + RSP: 0018:ffff8881003d39e8 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 + RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffffa01902a0 + RBP: ffff8881040c6700 R08: 0000000000000000 R09: 0000000000000001 + R10: 2ce93d0054fe0d00 R11: 54fe0d00000e0000 R12: ffff888109515000 + R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000401 + FS: 0000000000000000(0000) GS:ffff88822bf40000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f42ba70f030 CR3: 0000000109926000 CR4: 00000000000006e0 + Call Trace: + br_handle_vlan+0xbc/0xca [bridge] + __br_forward+0x23/0x164 [bridge] + deliver_clone+0x41/0x48 [bridge] + br_handle_frame_finish+0x36f/0x3aa [bridge] + ? skb_dst+0x2e/0x38 [bridge] + ? br_handle_ingress_vlan_tunnel+0x3e/0x1c8 [bridge] + ? br_handle_frame_finish+0x3aa/0x3aa [bridge] + br_handle_frame+0x2c3/0x377 [bridge] + ? __skb_pull+0x33/0x51 + ? vlan_do_receive+0x4f/0x36a + ? br_handle_frame_finish+0x3aa/0x3aa [bridge] + __netif_receive_skb_core+0x539/0x7c6 + ? __list_del_entry_valid+0x16e/0x1c2 + __netif_receive_skb_list_core+0x6d/0xd6 + netif_receive_skb_list_internal+0x1d9/0x1fa + gro_normal_list+0x22/0x3e + dev_gro_receive+0x55b/0x600 + ? detach_buf_split+0x58/0x140 + napi_gro_receive+0x94/0x12e + virtnet_poll+0x15d/0x315 [virtio_net] + __napi_poll+0x2c/0x1c9 + net_rx_action+0xe6/0x1fb + __do_softirq+0x115/0x2d8 + run_ksoftirqd+0x18/0x20 + smpboot_thread_fn+0x183/0x19c + ? smpboot_unregister_percpu_thread+0x66/0x66 + kthread+0x10a/0x10f + ? kthread_mod_delayed_work+0xb6/0xb6 + ret_from_fork+0x22/0x30 + ---[ end trace 49f61b07f775fd2b ]--- + dst_release: dst:00000000c02d677a refcnt:-1 + dst_release underflow + +Cc: stable@vger.kernel.org +Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_vlan_tunnel.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/bridge/br_vlan_tunnel.c ++++ b/net/bridge/br_vlan_tunnel.c +@@ -204,8 +204,8 @@ int br_handle_egress_vlan_tunnel(struct + return err; + + tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst); +- if (tunnel_dst) +- skb_dst_set(skb, dst_clone(&tunnel_dst->dst)); ++ if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst)) ++ skb_dst_set(skb, &tunnel_dst->dst); + + return 0; + } diff --git a/queue-5.10/net-ll_temac-fix-tx-bd-buffer-overwrite.patch b/queue-5.10/net-ll_temac-fix-tx-bd-buffer-overwrite.patch new file mode 100644 index 00000000000..8035548fc67 --- /dev/null +++ b/queue-5.10/net-ll_temac-fix-tx-bd-buffer-overwrite.patch @@ -0,0 +1,36 @@ +From c364df2489b8ef2f5e3159b1dff1ff1fdb16040d Mon Sep 17 00:00:00 2001 +From: Esben Haabendal +Date: Fri, 18 Jun 2021 12:52:33 +0200 +Subject: net: ll_temac: Fix TX BD buffer overwrite + +From: Esben Haabendal + +commit c364df2489b8ef2f5e3159b1dff1ff1fdb16040d upstream. + +Just as the initial check, we need to ensure num_frag+1 buffers available, +as that is the number of buffers we are going to use. + +This fixes a buffer overflow, which might be seen during heavy network +load. Complete lockup of TEMAC was reproducible within about 10 minutes of +a particular load. + +Fixes: 84823ff80f74 ("net: ll_temac: Fix race condition causing TX hang") +Cc: stable@vger.kernel.org # v5.4+ +Signed-off-by: Esben Haabendal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/xilinx/ll_temac_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/xilinx/ll_temac_main.c ++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c +@@ -849,7 +849,7 @@ temac_start_xmit(struct sk_buff *skb, st + smp_mb(); + + /* Space might have just been freed - check again */ +- if (temac_check_tx_bd_space(lp, num_frag)) ++ if (temac_check_tx_bd_space(lp, num_frag + 1)) + return NETDEV_TX_BUSY; + + netif_wake_queue(ndev); diff --git a/queue-5.10/net-ll_temac-make-sure-to-free-skb-when-it-is-completely-used.patch b/queue-5.10/net-ll_temac-make-sure-to-free-skb-when-it-is-completely-used.patch new file mode 100644 index 00000000000..ac1e38de387 --- /dev/null +++ b/queue-5.10/net-ll_temac-make-sure-to-free-skb-when-it-is-completely-used.patch @@ -0,0 +1,49 @@ +From 6aa32217a9a446275440ee8724b1ecaf1838df47 Mon Sep 17 00:00:00 2001 +From: Esben Haabendal +Date: Fri, 18 Jun 2021 12:52:23 +0200 +Subject: net: ll_temac: Make sure to free skb when it is completely used + +From: Esben Haabendal + +commit 6aa32217a9a446275440ee8724b1ecaf1838df47 upstream. + +With the skb pointer piggy-backed on the TX BD, we have a simple and +efficient way to free the skb buffer when the frame has been transmitted. +But in order to avoid freeing the skb while there are still fragments from +the skb in use, we need to piggy-back on the TX BD of the skb, not the +first. + +Without this, we are doing use-after-free on the DMA side, when the first +BD of a multi TX BD packet is seen as completed in xmit_done, and the +remaining BDs are still being processed. + +Cc: stable@vger.kernel.org # v5.4+ +Signed-off-by: Esben Haabendal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/xilinx/ll_temac_main.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/xilinx/ll_temac_main.c ++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c +@@ -876,7 +876,6 @@ temac_start_xmit(struct sk_buff *skb, st + return NETDEV_TX_OK; + } + cur_p->phys = cpu_to_be32(skb_dma_addr); +- ptr_to_txbd((void *)skb, cur_p); + + for (ii = 0; ii < num_frag; ii++) { + if (++lp->tx_bd_tail >= lp->tx_bd_num) +@@ -915,6 +914,11 @@ temac_start_xmit(struct sk_buff *skb, st + } + cur_p->app0 |= cpu_to_be32(STS_CTRL_APP0_EOP); + ++ /* Mark last fragment with skb address, so it can be consumed ++ * in temac_start_xmit_done() ++ */ ++ ptr_to_txbd((void *)skb, cur_p); ++ + tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail; + lp->tx_bd_tail++; + if (lp->tx_bd_tail >= lp->tx_bd_num) diff --git a/queue-5.10/series b/queue-5.10/series index 464623d31c5..100d53997d1 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -115,3 +115,21 @@ kvm-x86-fix-x86_emulator-slab-cache-leak.patch s390-mcck-fix-calculation-of-sie-critical-section-size.patch s390-ap-fix-hanging-ioctl-caused-by-wrong-msg-counter.patch arcv2-save-abi-registers-across-signal-handling.patch +x86-mm-avoid-truncating-memblocks-for-sgx-memory.patch +x86-process-check-pf_kthread-and-not-current-mm-for-kernel-threads.patch +x86-ioremap-map-efi-reserved-memory-as-encrypted-for-sev.patch +x86-pkru-write-hardware-init-value-to-pkru-when-xstate-is-init.patch +x86-fpu-prevent-state-corruption-in-__fpu__restore_sig.patch +x86-fpu-invalidate-fpu-state-after-a-failed-xrstor-from-a-user-buffer.patch +x86-fpu-reset-state-for-all-signal-restore-failures.patch +crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch +dmaengine-pl330-fix-wrong-usage-of-spinlock-flags-in-dma_cyclc.patch +mac80211-fix-null-ptr-deref-for-injected-rate-info.patch +cfg80211-make-certificate-generation-more-robust.patch +cfg80211-avoid-double-free-of-pmsr-request.patch +drm-amdgpu-gfx10-enlarge-cp_mec_doorbell_range_upper-to-cover-full-doorbell.patch +drm-amdgpu-gfx9-fix-the-doorbell-missing-when-in-cgpg-issue.patch +net-ll_temac-make-sure-to-free-skb-when-it-is-completely-used.patch +net-ll_temac-fix-tx-bd-buffer-overwrite.patch +net-bridge-fix-vlan-tunnel-dst-null-pointer-dereference.patch +net-bridge-fix-vlan-tunnel-dst-refcnt-when-egressing.patch diff --git a/queue-5.10/x86-fpu-invalidate-fpu-state-after-a-failed-xrstor-from-a-user-buffer.patch b/queue-5.10/x86-fpu-invalidate-fpu-state-after-a-failed-xrstor-from-a-user-buffer.patch new file mode 100644 index 00000000000..07700f32c35 --- /dev/null +++ b/queue-5.10/x86-fpu-invalidate-fpu-state-after-a-failed-xrstor-from-a-user-buffer.patch @@ -0,0 +1,74 @@ +From d8778e393afa421f1f117471144f8ce6deb6953a Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Tue, 8 Jun 2021 16:36:19 +0200 +Subject: x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer + +From: Andy Lutomirski + +commit d8778e393afa421f1f117471144f8ce6deb6953a upstream. + +Both Intel and AMD consider it to be architecturally valid for XRSTOR to +fail with #PF but nonetheless change the register state. The actual +conditions under which this might occur are unclear [1], but it seems +plausible that this might be triggered if one sibling thread unmaps a page +and invalidates the shared TLB while another sibling thread is executing +XRSTOR on the page in question. + +__fpu__restore_sig() can execute XRSTOR while the hardware registers +are preserved on behalf of a different victim task (using the +fpu_fpregs_owner_ctx mechanism), and, in theory, XRSTOR could fail but +modify the registers. + +If this happens, then there is a window in which __fpu__restore_sig() +could schedule out and the victim task could schedule back in without +reloading its own FPU registers. This would result in part of the FPU +state that __fpu__restore_sig() was attempting to load leaking into the +victim task's user-visible state. + +Invalidate preserved FPU registers on XRSTOR failure to prevent this +situation from corrupting any state. + +[1] Frequent readers of the errata lists might imagine "complex + microarchitectural conditions". + +Fixes: 1d731e731c4c ("x86/fpu: Add a fastpath to __fpu__restore_sig()") +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Acked-by: Dave Hansen +Acked-by: Rik van Riel +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210608144345.758116583@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fpu/signal.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +--- a/arch/x86/kernel/fpu/signal.c ++++ b/arch/x86/kernel/fpu/signal.c +@@ -369,6 +369,25 @@ static int __fpu__restore_sig(void __use + fpregs_unlock(); + return 0; + } ++ ++ /* ++ * The above did an FPU restore operation, restricted to ++ * the user portion of the registers, and failed, but the ++ * microcode might have modified the FPU registers ++ * nevertheless. ++ * ++ * If the FPU registers do not belong to current, then ++ * invalidate the FPU register state otherwise the task might ++ * preempt current and return to user space with corrupted ++ * FPU registers. ++ * ++ * In case current owns the FPU registers then no further ++ * action is required. The fixup below will handle it ++ * correctly. ++ */ ++ if (test_thread_flag(TIF_NEED_FPU_LOAD)) ++ __cpu_invalidate_fpregs_state(); ++ + fpregs_unlock(); + } else { + /* diff --git a/queue-5.10/x86-fpu-prevent-state-corruption-in-__fpu__restore_sig.patch b/queue-5.10/x86-fpu-prevent-state-corruption-in-__fpu__restore_sig.patch new file mode 100644 index 00000000000..3b9caf02c8a --- /dev/null +++ b/queue-5.10/x86-fpu-prevent-state-corruption-in-__fpu__restore_sig.patch @@ -0,0 +1,64 @@ +From 484cea4f362e1eeb5c869abbfb5f90eae6421b38 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 8 Jun 2021 16:36:18 +0200 +Subject: x86/fpu: Prevent state corruption in __fpu__restore_sig() + +From: Thomas Gleixner + +commit 484cea4f362e1eeb5c869abbfb5f90eae6421b38 upstream. + +The non-compacted slowpath uses __copy_from_user() and copies the entire +user buffer into the kernel buffer, verbatim. This means that the kernel +buffer may now contain entirely invalid state on which XRSTOR will #GP. +validate_user_xstate_header() can detect some of that corruption, but that +leaves the onus on callers to clear the buffer. + +Prior to XSAVES support, it was possible just to reinitialize the buffer, +completely, but with supervisor states that is not longer possible as the +buffer clearing code split got it backwards. Fixing that is possible but +not corrupting the state in the first place is more robust. + +Avoid corruption of the kernel XSAVE buffer by using copy_user_to_xstate() +which validates the XSAVE header contents before copying the actual states +to the kernel. copy_user_to_xstate() was previously only called for +compacted-format kernel buffers, but it works for both compacted and +non-compacted forms. + +Using it for the non-compacted form is slower because of multiple +__copy_from_user() operations, but that cost is less important than robust +code in an already slow path. + +[ Changelog polished by Dave Hansen ] + +Fixes: b860eb8dce59 ("x86/fpu/xstate: Define new functions for clearing fpregs and xstates") +Reported-by: syzbot+2067e764dbcd10721e2e@syzkaller.appspotmail.com +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Reviewed-by: Borislav Petkov +Acked-by: Dave Hansen +Acked-by: Rik van Riel +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210608144345.611833074@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fpu/signal.c | 9 +-------- + 1 file changed, 1 insertion(+), 8 deletions(-) + +--- a/arch/x86/kernel/fpu/signal.c ++++ b/arch/x86/kernel/fpu/signal.c +@@ -405,14 +405,7 @@ static int __fpu__restore_sig(void __use + if (use_xsave() && !fx_only) { + u64 init_bv = xfeatures_mask_user() & ~user_xfeatures; + +- if (using_compacted_format()) { +- ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); +- } else { +- ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); +- +- if (!ret && state_size > offsetof(struct xregs_state, header)) +- ret = validate_user_xstate_header(&fpu->state.xsave.header); +- } ++ ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); + if (ret) + goto err_out; + diff --git a/queue-5.10/x86-fpu-reset-state-for-all-signal-restore-failures.patch b/queue-5.10/x86-fpu-reset-state-for-all-signal-restore-failures.patch new file mode 100644 index 00000000000..34e989aafc3 --- /dev/null +++ b/queue-5.10/x86-fpu-reset-state-for-all-signal-restore-failures.patch @@ -0,0 +1,96 @@ +From efa165504943f2128d50f63de0c02faf6dcceb0d Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Wed, 9 Jun 2021 21:18:00 +0200 +Subject: x86/fpu: Reset state for all signal restore failures + +From: Thomas Gleixner + +commit efa165504943f2128d50f63de0c02faf6dcceb0d upstream. + +If access_ok() or fpregs_soft_set() fails in __fpu__restore_sig() then the +function just returns but does not clear the FPU state as it does for all +other fatal failures. + +Clear the FPU state for these failures as well. + +Fixes: 72a671ced66d ("x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels") +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/87mtryyhhz.ffs@nanos.tec.linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fpu/signal.c | 26 +++++++++++++++----------- + 1 file changed, 15 insertions(+), 11 deletions(-) + +--- a/arch/x86/kernel/fpu/signal.c ++++ b/arch/x86/kernel/fpu/signal.c +@@ -307,13 +307,17 @@ static int __fpu__restore_sig(void __use + return 0; + } + +- if (!access_ok(buf, size)) +- return -EACCES; ++ if (!access_ok(buf, size)) { ++ ret = -EACCES; ++ goto out; ++ } + +- if (!static_cpu_has(X86_FEATURE_FPU)) +- return fpregs_soft_set(current, NULL, +- 0, sizeof(struct user_i387_ia32_struct), +- NULL, buf) != 0; ++ if (!static_cpu_has(X86_FEATURE_FPU)) { ++ ret = fpregs_soft_set(current, NULL, 0, ++ sizeof(struct user_i387_ia32_struct), ++ NULL, buf); ++ goto out; ++ } + + if (use_xsave()) { + struct _fpx_sw_bytes fx_sw_user; +@@ -396,7 +400,7 @@ static int __fpu__restore_sig(void __use + */ + ret = __copy_from_user(&env, buf, sizeof(env)); + if (ret) +- goto err_out; ++ goto out; + envp = &env; + } + +@@ -426,7 +430,7 @@ static int __fpu__restore_sig(void __use + + ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); + if (ret) +- goto err_out; ++ goto out; + + sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, + fx_only); +@@ -446,7 +450,7 @@ static int __fpu__restore_sig(void __use + ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); + if (ret) { + ret = -EFAULT; +- goto err_out; ++ goto out; + } + + sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, +@@ -464,7 +468,7 @@ static int __fpu__restore_sig(void __use + } else { + ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); + if (ret) +- goto err_out; ++ goto out; + + fpregs_lock(); + ret = copy_kernel_to_fregs_err(&fpu->state.fsave); +@@ -475,7 +479,7 @@ static int __fpu__restore_sig(void __use + fpregs_deactivate(fpu); + fpregs_unlock(); + +-err_out: ++out: + if (ret) + fpu__clear_user_states(fpu); + return ret; diff --git a/queue-5.10/x86-ioremap-map-efi-reserved-memory-as-encrypted-for-sev.patch b/queue-5.10/x86-ioremap-map-efi-reserved-memory-as-encrypted-for-sev.patch new file mode 100644 index 00000000000..0f030876da7 --- /dev/null +++ b/queue-5.10/x86-ioremap-map-efi-reserved-memory-as-encrypted-for-sev.patch @@ -0,0 +1,67 @@ +From 8d651ee9c71bb12fc0c8eb2786b66cbe5aa3e43b Mon Sep 17 00:00:00 2001 +From: Tom Lendacky +Date: Tue, 8 Jun 2021 11:54:33 +0200 +Subject: x86/ioremap: Map EFI-reserved memory as encrypted for SEV + +From: Tom Lendacky + +commit 8d651ee9c71bb12fc0c8eb2786b66cbe5aa3e43b upstream. + +Some drivers require memory that is marked as EFI boot services +data. In order for this memory to not be re-used by the kernel +after ExitBootServices(), efi_mem_reserve() is used to preserve it +by inserting a new EFI memory descriptor and marking it with the +EFI_MEMORY_RUNTIME attribute. + +Under SEV, memory marked with the EFI_MEMORY_RUNTIME attribute needs to +be mapped encrypted by Linux, otherwise the kernel might crash at boot +like below: + + EFI Variables Facility v0.08 2004-May-17 + general protection fault, probably for non-canonical address 0x3597688770a868b2: 0000 [#1] SMP NOPTI + CPU: 13 PID: 1 Comm: swapper/0 Not tainted 5.12.4-2-default #1 openSUSE Tumbleweed + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 + RIP: 0010:efi_mokvar_entry_next + [...] + Call Trace: + efi_mokvar_sysfs_init + ? efi_mokvar_table_init + do_one_initcall + ? __kmalloc + kernel_init_freeable + ? rest_init + kernel_init + ret_from_fork + +Expand the __ioremap_check_other() function to additionally check for +this other type of boot data reserved at runtime and indicate that it +should be mapped encrypted for an SEV guest. + + [ bp: Massage commit message. ] + +Fixes: 58c909022a5a ("efi: Support for MOK variable config table") +Reported-by: Joerg Roedel +Signed-off-by: Tom Lendacky +Signed-off-by: Joerg Roedel +Signed-off-by: Borislav Petkov +Tested-by: Joerg Roedel +Cc: # 5.10+ +Link: https://lkml.kernel.org/r/20210608095439.12668-2-joro@8bytes.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/ioremap.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/mm/ioremap.c ++++ b/arch/x86/mm/ioremap.c +@@ -118,7 +118,9 @@ static void __ioremap_check_other(resour + if (!IS_ENABLED(CONFIG_EFI)) + return; + +- if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA) ++ if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA || ++ (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA && ++ efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME)) + desc->flags |= IORES_MAP_ENCRYPTED; + } + diff --git a/queue-5.10/x86-mm-avoid-truncating-memblocks-for-sgx-memory.patch b/queue-5.10/x86-mm-avoid-truncating-memblocks-for-sgx-memory.patch new file mode 100644 index 00000000000..91e80e5f7b5 --- /dev/null +++ b/queue-5.10/x86-mm-avoid-truncating-memblocks-for-sgx-memory.patch @@ -0,0 +1,95 @@ +From 28e5e44aa3f4e0e0370864ed008fb5e2d85f4dc8 Mon Sep 17 00:00:00 2001 +From: Fan Du +Date: Thu, 17 Jun 2021 12:46:57 -0700 +Subject: x86/mm: Avoid truncating memblocks for SGX memory + +From: Fan Du + +commit 28e5e44aa3f4e0e0370864ed008fb5e2d85f4dc8 upstream. + +tl;dr: + +Several SGX users reported seeing the following message on NUMA systems: + + sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. + +This turned out to be the memblock code mistakenly throwing away SGX +memory. + +=== Full Changelog === + +The 'max_pfn' variable represents the highest known RAM address. It can +be used, for instance, to quickly determine for which physical addresses +there is mem_map[] space allocated. The numa_meminfo code makes an +effort to throw out ("trim") all memory blocks which are above 'max_pfn'. + +SGX memory is not considered RAM (it is marked as "Reserved" in the +e820) and is not taken into account by max_pfn. Despite this, SGX memory +areas have NUMA affinity and are enumerated in the ACPI SRAT table. The +existing SGX code uses the numa_meminfo mechanism to look up the NUMA +affinity for its memory areas. + +In cases where SGX memory was above max_pfn (usually just the one EPC +section in the last highest NUMA node), the numa_memblock is truncated +at 'max_pfn', which is below the SGX memory. When the SGX code tries to +look up the affinity of this memory, it fails and produces an error message: + + sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. + +and assigns the memory to NUMA node 0. + +Instead of silently truncating the memory block at 'max_pfn' and +dropping the SGX memory, add the truncated portion to +'numa_reserved_meminfo'. This allows the SGX code to later determine +the NUMA affinity of its 'Reserved' area. + +Before, numa_meminfo looked like this (from 'crash'): + + blk = { start = 0x0, end = 0x2080000000, nid = 0x0 } + { start = 0x2080000000, end = 0x4000000000, nid = 0x1 } + +numa_reserved_meminfo is empty. + +With this, numa_meminfo looks like this: + + blk = { start = 0x0, end = 0x2080000000, nid = 0x0 } + { start = 0x2080000000, end = 0x4000000000, nid = 0x1 } + +and numa_reserved_meminfo has an entry for node 1's SGX memory: + + blk = { start = 0x4000000000, end = 0x4080000000, nid = 0x1 } + + [ daveh: completely rewrote/reworked changelog ] + +Fixes: 5d30f92e7631 ("x86/NUMA: Provide a range-to-target_node lookup facility") +Reported-by: Reinette Chatre +Signed-off-by: Fan Du +Signed-off-by: Dave Hansen +Signed-off-by: Borislav Petkov +Reviewed-by: Jarkko Sakkinen +Reviewed-by: Dan Williams +Reviewed-by: Dave Hansen +Cc: +Link: https://lkml.kernel.org/r/20210617194657.0A99CB22@viggo.jf.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/numa.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/arch/x86/mm/numa.c ++++ b/arch/x86/mm/numa.c +@@ -254,7 +254,13 @@ int __init numa_cleanup_meminfo(struct n + + /* make sure all non-reserved blocks are inside the limits */ + bi->start = max(bi->start, low); +- bi->end = min(bi->end, high); ++ ++ /* preserve info for non-RAM areas above 'max_pfn': */ ++ if (bi->end > high) { ++ numa_add_memblk_to(bi->nid, high, bi->end, ++ &numa_reserved_meminfo); ++ bi->end = high; ++ } + + /* and there's no empty block */ + if (bi->start >= bi->end) diff --git a/queue-5.10/x86-pkru-write-hardware-init-value-to-pkru-when-xstate-is-init.patch b/queue-5.10/x86-pkru-write-hardware-init-value-to-pkru-when-xstate-is-init.patch new file mode 100644 index 00000000000..70ff8700edd --- /dev/null +++ b/queue-5.10/x86-pkru-write-hardware-init-value-to-pkru-when-xstate-is-init.patch @@ -0,0 +1,93 @@ +From 510b80a6a0f1a0d114c6e33bcea64747d127973c Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 8 Jun 2021 16:36:21 +0200 +Subject: x86/pkru: Write hardware init value to PKRU when xstate is init + +From: Thomas Gleixner + +commit 510b80a6a0f1a0d114c6e33bcea64747d127973c upstream. + +When user space brings PKRU into init state, then the kernel handling is +broken: + + T1 user space + xsave(state) + state.header.xfeatures &= ~XFEATURE_MASK_PKRU; + xrstor(state) + + T1 -> kernel + schedule() + XSAVE(S) -> T1->xsave.header.xfeatures[PKRU] == 0 + T1->flags |= TIF_NEED_FPU_LOAD; + + wrpkru(); + + schedule() + ... + pk = get_xsave_addr(&T1->fpu->state.xsave, XFEATURE_PKRU); + if (pk) + wrpkru(pk->pkru); + else + wrpkru(DEFAULT_PKRU); + +Because the xfeatures bit is 0 and therefore the value in the xsave +storage is not valid, get_xsave_addr() returns NULL and switch_to() +writes the default PKRU. -> FAIL #1! + +So that wrecks any copy_to/from_user() on the way back to user space +which hits memory which is protected by the default PKRU value. + +Assumed that this does not fail (pure luck) then T1 goes back to user +space and because TIF_NEED_FPU_LOAD is set it ends up in + + switch_fpu_return() + __fpregs_load_activate() + if (!fpregs_state_valid()) { + load_XSTATE_from_task(); + } + +But if nothing touched the FPU between T1 scheduling out and back in, +then the fpregs_state is still valid which means switch_fpu_return() +does nothing and just clears TIF_NEED_FPU_LOAD. Back to user space with +DEFAULT_PKRU loaded. -> FAIL #2! + +The fix is simple: if get_xsave_addr() returns NULL then set the +PKRU value to 0 instead of the restrictive default PKRU value in +init_pkru_value. + + [ bp: Massage in minor nitpicks from folks. ] + +Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state") +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Acked-by: Dave Hansen +Acked-by: Rik van Riel +Tested-by: Babu Moger +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210608144346.045616965@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/fpu/internal.h | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/fpu/internal.h ++++ b/arch/x86/include/asm/fpu/internal.h +@@ -579,9 +579,16 @@ static inline void switch_fpu_finish(str + * return to userland e.g. for a copy_to_user() operation. + */ + if (!(current->flags & PF_KTHREAD)) { ++ /* ++ * If the PKRU bit in xsave.header.xfeatures is not set, ++ * then the PKRU component was in init state, which means ++ * XRSTOR will set PKRU to 0. If the bit is not set then ++ * get_xsave_addr() will return NULL because the PKRU value ++ * in memory is not valid. This means pkru_val has to be ++ * set to 0 and not to init_pkru_value. ++ */ + pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); +- if (pk) +- pkru_val = pk->pkru; ++ pkru_val = pk ? pk->pkru : 0; + } + __write_pkru(pkru_val); + } diff --git a/queue-5.10/x86-process-check-pf_kthread-and-not-current-mm-for-kernel-threads.patch b/queue-5.10/x86-process-check-pf_kthread-and-not-current-mm-for-kernel-threads.patch new file mode 100644 index 00000000000..bbc1e444c89 --- /dev/null +++ b/queue-5.10/x86-process-check-pf_kthread-and-not-current-mm-for-kernel-threads.patch @@ -0,0 +1,38 @@ +From 12f7764ac61200e32c916f038bdc08f884b0b604 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 8 Jun 2021 16:36:20 +0200 +Subject: x86/process: Check PF_KTHREAD and not current->mm for kernel threads + +From: Thomas Gleixner + +commit 12f7764ac61200e32c916f038bdc08f884b0b604 upstream. + +switch_fpu_finish() checks current->mm as indicator for kernel threads. +That's wrong because kernel threads can temporarily use a mm of a user +process via kthread_use_mm(). + +Check the task flags for PF_KTHREAD instead. + +Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state") +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Acked-by: Dave Hansen +Acked-by: Rik van Riel +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210608144345.912645927@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/fpu/internal.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/include/asm/fpu/internal.h ++++ b/arch/x86/include/asm/fpu/internal.h +@@ -578,7 +578,7 @@ static inline void switch_fpu_finish(str + * PKRU state is switched eagerly because it needs to be valid before we + * return to userland e.g. for a copy_to_user() operation. + */ +- if (current->mm) { ++ if (!(current->flags & PF_KTHREAD)) { + pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); + if (pk) + pkru_val = pk->pkru;