From 95e35cd7b2a8a2bcdeb598a42d3bff22249861e3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 1 Jul 2019 17:39:34 +0200 Subject: [PATCH] 5.1-stable patches added patches: cpu-speculation-warn-on-unsupported-mitigations-parameter.patch efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch mm-fix-page-cache-convergence-regression.patch nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch sunrpc-fix-up-calculation-of-client-message-length.patch x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch --- ...on-unsupported-mitigations-parameter.patch | 47 +++++ ...emreserve-entries-in-unmapped-memory.patch | 60 ++++++ ...orrect-local-interrupt-map-registers.patch | 105 +++++++++++ ...ix-page-cache-convergence-regression.patch | 176 ++++++++++++++++++ ...orrect-tcp-timeout-for-flexfiles-i-o.patch | 33 ++++ queue-5.1/series | 9 + ...calculation-of-client-message-length.patch | 85 +++++++++ ...crocode-load-on-cpu-hotplug-for-real.patch | 66 +++++++ ...ble-overrun-during-bitmap-operations.patch | 119 ++++++++++++ ...ts-to-use-ssbd-even-if-host-does-not.patch | 70 +++++++ 10 files changed, 770 insertions(+) create mode 100644 queue-5.1/cpu-speculation-warn-on-unsupported-mitigations-parameter.patch create mode 100644 queue-5.1/efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch create mode 100644 queue-5.1/irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch create mode 100644 queue-5.1/mm-fix-page-cache-convergence-regression.patch create mode 100644 queue-5.1/nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch create mode 100644 queue-5.1/sunrpc-fix-up-calculation-of-client-message-length.patch create mode 100644 queue-5.1/x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch create mode 100644 queue-5.1/x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch create mode 100644 queue-5.1/x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch diff --git a/queue-5.1/cpu-speculation-warn-on-unsupported-mitigations-parameter.patch b/queue-5.1/cpu-speculation-warn-on-unsupported-mitigations-parameter.patch new file mode 100644 index 00000000000..51e42b62437 --- /dev/null +++ b/queue-5.1/cpu-speculation-warn-on-unsupported-mitigations-parameter.patch @@ -0,0 +1,47 @@ +From 1bf72720281770162c87990697eae1ba2f1d917a Mon Sep 17 00:00:00 2001 +From: Geert Uytterhoeven +Date: Thu, 16 May 2019 09:09:35 +0200 +Subject: cpu/speculation: Warn on unsupported mitigations= parameter + +From: Geert Uytterhoeven + +commit 1bf72720281770162c87990697eae1ba2f1d917a upstream. + +Currently, if the user specifies an unsupported mitigation strategy on the +kernel command line, it will be ignored silently. The code will fall back +to the default strategy, possibly leaving the system more vulnerable than +expected. + +This may happen due to e.g. a simple typo, or, for a stable kernel release, +because not all mitigation strategies have been backported. + +Inform the user by printing a message. + +Fixes: 98af8452945c5565 ("cpu/speculation: Add 'mitigations=' cmdline option") +Signed-off-by: Geert Uytterhoeven +Signed-off-by: Thomas Gleixner +Acked-by: Josh Poimboeuf +Cc: Peter Zijlstra +Cc: Jiri Kosina +Cc: Greg Kroah-Hartman +Cc: Ben Hutchings +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20190516070935.22546-1-geert@linux-m68k.org +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cpu.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -2315,6 +2315,9 @@ static int __init mitigations_parse_cmdl + cpu_mitigations = CPU_MITIGATIONS_AUTO; + else if (!strcmp(arg, "auto,nosmt")) + cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; ++ else ++ pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n", ++ arg); + + return 0; + } diff --git a/queue-5.1/efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch b/queue-5.1/efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch new file mode 100644 index 00000000000..70e7f853496 --- /dev/null +++ b/queue-5.1/efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch @@ -0,0 +1,60 @@ +From 18df7577adae6c6c778bf774b3aebcacbc1fb439 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Sun, 9 Jun 2019 20:17:44 +0200 +Subject: efi/memreserve: deal with memreserve entries in unmapped memory + +From: Ard Biesheuvel + +commit 18df7577adae6c6c778bf774b3aebcacbc1fb439 upstream. + +Ensure that the EFI memreserve entries can be accessed, even if they +are located in memory that the kernel (e.g., a crashkernel) omits from +the linear map. + +Fixes: 80424b02d42b ("efi: Reduce the amount of memblock reservations ...") +Cc: # 5.0+ +Reported-by: Jonathan Richardson +Reviewed-by: Jonathan Richardson +Tested-by: Jonathan Richardson +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/firmware/efi/efi.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/drivers/firmware/efi/efi.c ++++ b/drivers/firmware/efi/efi.c +@@ -1007,14 +1007,16 @@ int __ref efi_mem_reserve_persistent(phy + + /* first try to find a slot in an existing linked list entry */ + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { +- rsv = __va(prsv); ++ rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); + index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); + if (index < rsv->size) { + rsv->entry[index].base = addr; + rsv->entry[index].size = size; + ++ memunmap(rsv); + return 0; + } ++ memunmap(rsv); + } + + /* no slot found - allocate a new linked list entry */ +@@ -1022,7 +1024,13 @@ int __ref efi_mem_reserve_persistent(phy + if (!rsv) + return -ENOMEM; + +- rsv->size = EFI_MEMRESERVE_COUNT(PAGE_SIZE); ++ /* ++ * The memremap() call above assumes that a linux_efi_memreserve entry ++ * never crosses a page boundary, so let's ensure that this remains true ++ * even when kexec'ing a 4k pages kernel from a >4k pages kernel, by ++ * using SZ_4K explicitly in the size calculation below. ++ */ ++ rsv->size = EFI_MEMRESERVE_COUNT(SZ_4K); + atomic_set(&rsv->count, 1); + rsv->entry[0].base = addr; + rsv->entry[0].size = size; diff --git a/queue-5.1/irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch b/queue-5.1/irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch new file mode 100644 index 00000000000..fa21a674caa --- /dev/null +++ b/queue-5.1/irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch @@ -0,0 +1,105 @@ +From 6d4d367d0e9ffab4d64a3436256a6a052dc1195d Mon Sep 17 00:00:00 2001 +From: Paul Burton +Date: Wed, 5 Jun 2019 09:34:10 +0100 +Subject: irqchip/mips-gic: Use the correct local interrupt map registers + +From: Paul Burton + +commit 6d4d367d0e9ffab4d64a3436256a6a052dc1195d upstream. + +The MIPS GIC contains a block of registers used to map local interrupts +to a particular CPU interrupt pin. Since these registers are found at a +consecutive range of addresses we access them using an index, via the +(read|write)_gic_v[lo]_map accessor functions. We currently use values +from enum mips_gic_local_interrupt as those indices. + +Unfortunately whilst enum mips_gic_local_interrupt provides the correct +offsets for bits in the pending & mask registers, the ordering of the +map registers is subtly different... Compared with the ordering of +pending & mask bits, the map registers move the FDC from the end of the +list to index 3 after the timer interrupt. As a result the performance +counter & software interrupts are therefore at indices 4-6 rather than +indices 3-5. + +Notably this causes problems with performance counter interrupts being +incorrectly mapped on some systems, and presumably will also cause +problems for FDC interrupts. + +Introduce a function to map from enum mips_gic_local_interrupt to the +index of the corresponding map register, and use it to ensure we access +the map registers for the correct interrupts. + +Signed-off-by: Paul Burton +Fixes: a0dc5cb5e31b ("irqchip: mips-gic: Simplify gic_local_irq_domain_map()") +Fixes: da61fcf9d62a ("irqchip: mips-gic: Use irq_cpu_online to (un)mask all-VP(E) IRQs") +Reported-and-tested-by: Archer Yan +Cc: Thomas Gleixner +Cc: Jason Cooper +Cc: stable@vger.kernel.org # v4.14+ +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/include/asm/mips-gic.h | 30 ++++++++++++++++++++++++++++++ + drivers/irqchip/irq-mips-gic.c | 4 ++-- + 2 files changed, 32 insertions(+), 2 deletions(-) + +--- a/arch/mips/include/asm/mips-gic.h ++++ b/arch/mips/include/asm/mips-gic.h +@@ -315,6 +315,36 @@ static inline bool mips_gic_present(void + } + + /** ++ * mips_gic_vx_map_reg() - Return GIC_Vx__MAP register offset ++ * @intr: A GIC local interrupt ++ * ++ * Determine the index of the GIC_VL__MAP or GIC_VO__MAP register ++ * within the block of GIC map registers. This is almost the same as the order ++ * of interrupts in the pending & mask registers, as used by enum ++ * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the ++ * interrupts after it... ++ * ++ * Return: The map register index corresponding to @intr. ++ * ++ * The return value is suitable for use with the (read|write)_gic_v[lo]_map ++ * accessor functions. ++ */ ++static inline unsigned int ++mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr) ++{ ++ /* WD, Compare & Timer are 1:1 */ ++ if (intr <= GIC_LOCAL_INT_TIMER) ++ return intr; ++ ++ /* FDC moves to after Timer... */ ++ if (intr == GIC_LOCAL_INT_FDC) ++ return GIC_LOCAL_INT_TIMER + 1; ++ ++ /* As a result everything else is offset by 1 */ ++ return intr + 1; ++} ++ ++/** + * gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq + * + * Determine the virq number to use for the coprocessor 0 count/compare +--- a/drivers/irqchip/irq-mips-gic.c ++++ b/drivers/irqchip/irq-mips-gic.c +@@ -388,7 +388,7 @@ static void gic_all_vpes_irq_cpu_online( + intr = GIC_HWIRQ_TO_LOCAL(d->hwirq); + cd = irq_data_get_irq_chip_data(d); + +- write_gic_vl_map(intr, cd->map); ++ write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map); + if (cd->mask) + write_gic_vl_smask(BIT(intr)); + } +@@ -517,7 +517,7 @@ static int gic_irq_domain_map(struct irq + spin_lock_irqsave(&gic_lock, flags); + for_each_online_cpu(cpu) { + write_gic_vl_other(mips_cm_vp_id(cpu)); +- write_gic_vo_map(intr, map); ++ write_gic_vo_map(mips_gic_vx_map_reg(intr), map); + } + spin_unlock_irqrestore(&gic_lock, flags); + diff --git a/queue-5.1/mm-fix-page-cache-convergence-regression.patch b/queue-5.1/mm-fix-page-cache-convergence-regression.patch new file mode 100644 index 00000000000..81ae5addb1d --- /dev/null +++ b/queue-5.1/mm-fix-page-cache-convergence-regression.patch @@ -0,0 +1,176 @@ +From 7b785645e8f13e17cbce492708cf6e7039d32e46 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Fri, 24 May 2019 10:12:46 -0400 +Subject: mm: fix page cache convergence regression + +From: Johannes Weiner + +commit 7b785645e8f13e17cbce492708cf6e7039d32e46 upstream. + +Since a28334862993 ("page cache: Finish XArray conversion"), on most +major Linux distributions, the page cache doesn't correctly transition +when the hot data set is changing, and leaves the new pages thrashing +indefinitely instead of kicking out the cold ones. + +On a freshly booted, freshly ssh'd into virtual machine with 1G RAM +running stock Arch Linux: + +[root@ham ~]# ./reclaimtest.sh ++ dd of=workingset-a bs=1M count=0 seek=600 ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ ./mincore workingset-a +153600/153600 workingset-a ++ dd of=workingset-b bs=1M count=0 seek=600 ++ cat workingset-b ++ cat workingset-b ++ cat workingset-b ++ cat workingset-b ++ ./mincore workingset-a workingset-b +104029/153600 workingset-a +120086/153600 workingset-b ++ cat workingset-b ++ cat workingset-b ++ cat workingset-b ++ cat workingset-b ++ ./mincore workingset-a workingset-b +104029/153600 workingset-a +120268/153600 workingset-b + +workingset-b is a 600M file on a 1G host that is otherwise entirely +idle. No matter how often it's being accessed, it won't get cached. + +While investigating, I noticed that the non-resident information gets +aggressively reclaimed - /proc/vmstat::workingset_nodereclaim. This is +a problem because a workingset transition like this relies on the +non-resident information tracked in the page cache tree of evicted +file ranges: when the cache faults are refaults of recently evicted +cache, we challenge the existing active set, and that allows a new +workingset to establish itself. + +Tracing the shrinker that maintains this memory revealed that all page +cache tree nodes were allocated to the root cgroup. This is a problem, +because 1) the shrinker sizes the amount of non-resident information +it keeps to the size of the cgroup's other memory and 2) on most major +Linux distributions, only kernel threads live in the root cgroup and +everything else gets put into services or session groups: + +[root@ham ~]# cat /proc/self/cgroup +0::/user.slice/user-0.slice/session-c1.scope + +As a result, we basically maintain no non-resident information for the +workloads running on the system, thus breaking the caching algorithm. + +Looking through the code, I found the culprit in the above-mentioned +patch: when switching from the radix tree to xarray, it dropped the +__GFP_ACCOUNT flag from the tree node allocations - the flag that +makes sure the allocated memory gets charged to and tracked by the +cgroup of the calling process - in this case, the one doing the fault. + +To fix this, allow xarray users to specify per-tree flag that makes +xarray allocate nodes using __GFP_ACCOUNT. Then restore the page cache +tree annotation to request such cgroup tracking for the cache nodes. + +With this patch applied, the page cache correctly converges on new +workingsets again after just a few iterations: + +[root@ham ~]# ./reclaimtest.sh ++ dd of=workingset-a bs=1M count=0 seek=600 ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ cat workingset-a ++ ./mincore workingset-a +153600/153600 workingset-a ++ dd of=workingset-b bs=1M count=0 seek=600 ++ cat workingset-b ++ ./mincore workingset-a workingset-b +124607/153600 workingset-a +87876/153600 workingset-b ++ cat workingset-b ++ ./mincore workingset-a workingset-b +81313/153600 workingset-a +133321/153600 workingset-b ++ cat workingset-b ++ ./mincore workingset-a workingset-b +63036/153600 workingset-a +153600/153600 workingset-b + +Cc: stable@vger.kernel.org # 4.20+ +Signed-off-by: Johannes Weiner +Reviewed-by: Shakeel Butt +Signed-off-by: Matthew Wilcox (Oracle) +Signed-off-by: Greg Kroah-Hartman + +--- + fs/inode.c | 2 +- + include/linux/xarray.h | 1 + + lib/xarray.c | 12 ++++++++++-- + 3 files changed, 12 insertions(+), 3 deletions(-) + +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -349,7 +349,7 @@ EXPORT_SYMBOL(inc_nlink); + + static void __address_space_init_once(struct address_space *mapping) + { +- xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ); ++ xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); + init_rwsem(&mapping->i_mmap_rwsem); + INIT_LIST_HEAD(&mapping->private_list); + spin_lock_init(&mapping->private_lock); +--- a/include/linux/xarray.h ++++ b/include/linux/xarray.h +@@ -265,6 +265,7 @@ enum xa_lock_type { + #define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) + #define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) + #define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U) ++#define XA_FLAGS_ACCOUNT ((__force gfp_t)32U) + #define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ + (__force unsigned)(mark))) + +--- a/lib/xarray.c ++++ b/lib/xarray.c +@@ -298,6 +298,8 @@ bool xas_nomem(struct xa_state *xas, gfp + xas_destroy(xas); + return false; + } ++ if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) ++ gfp |= __GFP_ACCOUNT; + xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); + if (!xas->xa_alloc) + return false; +@@ -325,6 +327,8 @@ static bool __xas_nomem(struct xa_state + xas_destroy(xas); + return false; + } ++ if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) ++ gfp |= __GFP_ACCOUNT; + if (gfpflags_allow_blocking(gfp)) { + xas_unlock_type(xas, lock_type); + xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); +@@ -358,8 +362,12 @@ static void *xas_alloc(struct xa_state * + if (node) { + xas->xa_alloc = NULL; + } else { +- node = kmem_cache_alloc(radix_tree_node_cachep, +- GFP_NOWAIT | __GFP_NOWARN); ++ gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; ++ ++ if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) ++ gfp |= __GFP_ACCOUNT; ++ ++ node = kmem_cache_alloc(radix_tree_node_cachep, gfp); + if (!node) { + xas_set_err(xas, -ENOMEM); + return NULL; diff --git a/queue-5.1/nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch b/queue-5.1/nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch new file mode 100644 index 00000000000..8c4440bea35 --- /dev/null +++ b/queue-5.1/nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch @@ -0,0 +1,33 @@ +From 68f461593f76bd5f17e87cdd0bea28f4278c7268 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 25 Jun 2019 16:41:16 -0400 +Subject: NFS/flexfiles: Use the correct TCP timeout for flexfiles I/O + +From: Trond Myklebust + +commit 68f461593f76bd5f17e87cdd0bea28f4278c7268 upstream. + +Fix a typo where we're confusing the default TCP retrans value +(NFS_DEF_TCP_RETRANS) for the default TCP timeout value. + +Fixes: 15d03055cf39f ("pNFS/flexfiles: Set reasonable default ...") +Cc: stable@vger.kernel.org # 4.8+ +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c ++++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c +@@ -18,7 +18,7 @@ + + #define NFSDBG_FACILITY NFSDBG_PNFS_LD + +-static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; ++static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO; + static unsigned int dataserver_retrans; + + static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); diff --git a/queue-5.1/series b/queue-5.1/series index 7d28dd57ba9..dabe045a43f 100644 --- a/queue-5.1/series +++ b/queue-5.1/series @@ -18,3 +18,12 @@ dm-init-fix-incorrect-uses-of-kstrndup.patch dm-log-writes-make-sure-super-sector-log-updates-are-written-in-order.patch io_uring-ensure-req-file-is-cleared-on-allocation.patch scsi-vmw_pscsi-fix-use-after-free-in-pvscsi_queue_lck.patch +x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch +x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch +x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch +mm-fix-page-cache-convergence-regression.patch +efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch +nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch +cpu-speculation-warn-on-unsupported-mitigations-parameter.patch +sunrpc-fix-up-calculation-of-client-message-length.patch +irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch diff --git a/queue-5.1/sunrpc-fix-up-calculation-of-client-message-length.patch b/queue-5.1/sunrpc-fix-up-calculation-of-client-message-length.patch new file mode 100644 index 00000000000..07d884f0197 --- /dev/null +++ b/queue-5.1/sunrpc-fix-up-calculation-of-client-message-length.patch @@ -0,0 +1,85 @@ +From 7e3d3620974b743b91b1f9d0660061b1de20174c Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Mon, 24 Jun 2019 19:15:44 -0400 +Subject: SUNRPC: Fix up calculation of client message length + +From: Trond Myklebust + +commit 7e3d3620974b743b91b1f9d0660061b1de20174c upstream. + +In the case where a record marker was used, xs_sendpages() needs +to return the length of the payload + record marker so that we +operate correctly in the case of a partial transmission. +When the callers check return value, they therefore need to +take into account the record marker length. + +Fixes: 06b5fc3ad94e ("Merge tag 'nfs-rdma-for-5.1-1'...") +Cc: stable@vger.kernel.org # 5.1+ +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtsock.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -950,6 +950,8 @@ static int xs_local_send_request(struct + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct xdr_buf *xdr = &req->rq_snd_buf; ++ rpc_fraghdr rm = xs_stream_record_marker(xdr); ++ unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; + int status; + int sent = 0; + +@@ -964,9 +966,7 @@ static int xs_local_send_request(struct + + req->rq_xtime = ktime_get(); + status = xs_sendpages(transport->sock, NULL, 0, xdr, +- transport->xmit.offset, +- xs_stream_record_marker(xdr), +- &sent); ++ transport->xmit.offset, rm, &sent); + dprintk("RPC: %s(%u) = %d\n", + __func__, xdr->len - transport->xmit.offset, status); + +@@ -976,7 +976,7 @@ static int xs_local_send_request(struct + if (likely(sent > 0) || status == 0) { + transport->xmit.offset += sent; + req->rq_bytes_sent = transport->xmit.offset; +- if (likely(req->rq_bytes_sent >= req->rq_slen)) { ++ if (likely(req->rq_bytes_sent >= msglen)) { + req->rq_xmit_bytes_sent += transport->xmit.offset; + transport->xmit.offset = 0; + return 0; +@@ -1097,6 +1097,8 @@ static int xs_tcp_send_request(struct rp + struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct xdr_buf *xdr = &req->rq_snd_buf; ++ rpc_fraghdr rm = xs_stream_record_marker(xdr); ++ unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; + bool vm_wait = false; + int status; + int sent; +@@ -1122,9 +1124,7 @@ static int xs_tcp_send_request(struct rp + while (1) { + sent = 0; + status = xs_sendpages(transport->sock, NULL, 0, xdr, +- transport->xmit.offset, +- xs_stream_record_marker(xdr), +- &sent); ++ transport->xmit.offset, rm, &sent); + + dprintk("RPC: xs_tcp_send_request(%u) = %d\n", + xdr->len - transport->xmit.offset, status); +@@ -1133,7 +1133,7 @@ static int xs_tcp_send_request(struct rp + * reset the count of bytes sent. */ + transport->xmit.offset += sent; + req->rq_bytes_sent = transport->xmit.offset; +- if (likely(req->rq_bytes_sent >= req->rq_slen)) { ++ if (likely(req->rq_bytes_sent >= msglen)) { + req->rq_xmit_bytes_sent += transport->xmit.offset; + transport->xmit.offset = 0; + return 0; diff --git a/queue-5.1/x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch b/queue-5.1/x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch new file mode 100644 index 00000000000..565c787638d --- /dev/null +++ b/queue-5.1/x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch @@ -0,0 +1,66 @@ +From 5423f5ce5ca410b3646f355279e4e937d452e622 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 18 Jun 2019 22:31:40 +0200 +Subject: x86/microcode: Fix the microcode load on CPU hotplug for real + +From: Thomas Gleixner + +commit 5423f5ce5ca410b3646f355279e4e937d452e622 upstream. + +A recent change moved the microcode loader hotplug callback into the early +startup phase which is running with interrupts disabled. It missed that +the callbacks invoke sysfs functions which might sleep causing nice 'might +sleep' splats with proper debugging enabled. + +Split the callbacks and only load the microcode in the early startup phase +and move the sysfs handling back into the later threaded and preemptible +bringup phase where it was before. + +Fixes: 78f4e932f776 ("x86/microcode, cpuhotplug: Add a microcode loader CPU hotplug callback") +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: stable@vger.kernel.org +Cc: x86-ml +Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1906182228350.1766@nanos.tec.linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/microcode/core.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/cpu/microcode/core.c ++++ b/arch/x86/kernel/cpu/microcode/core.c +@@ -793,13 +793,16 @@ static struct syscore_ops mc_syscore_ops + .resume = mc_bp_resume, + }; + +-static int mc_cpu_online(unsigned int cpu) ++static int mc_cpu_starting(unsigned int cpu) + { +- struct device *dev; +- +- dev = get_cpu_device(cpu); + microcode_update_cpu(cpu); + pr_debug("CPU%d added\n", cpu); ++ return 0; ++} ++ ++static int mc_cpu_online(unsigned int cpu) ++{ ++ struct device *dev = get_cpu_device(cpu); + + if (sysfs_create_group(&dev->kobj, &mc_attr_group)) + pr_err("Failed to create group for CPU%d\n", cpu); +@@ -876,7 +879,9 @@ int __init microcode_init(void) + goto out_ucode_group; + + register_syscore_ops(&mc_syscore_ops); +- cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online", ++ cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting", ++ mc_cpu_starting, NULL); ++ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", + mc_cpu_online, mc_cpu_down_prep); + + pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); diff --git a/queue-5.1/x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch b/queue-5.1/x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch new file mode 100644 index 00000000000..0d1d40de6ec --- /dev/null +++ b/queue-5.1/x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch @@ -0,0 +1,119 @@ +From 32f010deab575199df4ebe7b6aec20c17bb7eccd Mon Sep 17 00:00:00 2001 +From: Reinette Chatre +Date: Wed, 19 Jun 2019 13:27:16 -0700 +Subject: x86/resctrl: Prevent possible overrun during bitmap operations + +From: Reinette Chatre + +commit 32f010deab575199df4ebe7b6aec20c17bb7eccd upstream. + +While the DOC at the beginning of lib/bitmap.c explicitly states that +"The number of valid bits in a given bitmap does _not_ need to be an +exact multiple of BITS_PER_LONG.", some of the bitmap operations do +indeed access BITS_PER_LONG portions of the provided bitmap no matter +the size of the provided bitmap. + +For example, if find_first_bit() is provided with an 8 bit bitmap the +operation will access BITS_PER_LONG bits from the provided bitmap. While +the operation ensures that these extra bits do not affect the result, +the memory is still accessed. + +The capacity bitmasks (CBMs) are typically stored in u32 since they +can never exceed 32 bits. A few instances exist where a bitmap_* +operation is performed on a CBM by simply pointing the bitmap operation +to the stored u32 value. + +The consequence of this pattern is that some bitmap_* operations will +access out-of-bounds memory when interacting with the provided CBM. + +This same issue has previously been addressed with commit 49e00eee0061 +("x86/intel_rdt: Fix out-of-bounds memory access in CBM tests") +but at that time not all instances of the issue were fixed. + +Fix this by using an unsigned long to store the capacity bitmask data +that is passed to bitmap functions. + +Fixes: e651901187ab ("x86/intel_rdt: Introduce "bit_usage" to display cache allocations details") +Fixes: f4e80d67a527 ("x86/intel_rdt: Resctrl files reflect pseudo-locked information") +Fixes: 95f0b77efa57 ("x86/intel_rdt: Initialize new resource group with sane defaults") +Signed-off-by: Reinette Chatre +Signed-off-by: Borislav Petkov +Cc: Fenghua Yu +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: stable +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: x86-ml +Link: https://lkml.kernel.org/r/58c9b6081fd9bf599af0dfc01a6fdd335768efef.1560975645.git.reinette.chatre@intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 +++++++++++++++------------------ + 1 file changed, 16 insertions(+), 19 deletions(-) + +--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c ++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +@@ -804,8 +804,12 @@ static int rdt_bit_usage_show(struct ker + struct seq_file *seq, void *v) + { + struct rdt_resource *r = of->kn->parent->priv; +- u32 sw_shareable = 0, hw_shareable = 0; +- u32 exclusive = 0, pseudo_locked = 0; ++ /* ++ * Use unsigned long even though only 32 bits are used to ensure ++ * test_bit() is used safely. ++ */ ++ unsigned long sw_shareable = 0, hw_shareable = 0; ++ unsigned long exclusive = 0, pseudo_locked = 0; + struct rdt_domain *dom; + int i, hwb, swb, excl, psl; + enum rdtgrp_mode mode; +@@ -850,10 +854,10 @@ static int rdt_bit_usage_show(struct ker + } + for (i = r->cache.cbm_len - 1; i >= 0; i--) { + pseudo_locked = dom->plr ? dom->plr->cbm : 0; +- hwb = test_bit(i, (unsigned long *)&hw_shareable); +- swb = test_bit(i, (unsigned long *)&sw_shareable); +- excl = test_bit(i, (unsigned long *)&exclusive); +- psl = test_bit(i, (unsigned long *)&pseudo_locked); ++ hwb = test_bit(i, &hw_shareable); ++ swb = test_bit(i, &sw_shareable); ++ excl = test_bit(i, &exclusive); ++ psl = test_bit(i, &pseudo_locked); + if (hwb && swb) + seq_putc(seq, 'X'); + else if (hwb && !swb) +@@ -2494,26 +2498,19 @@ out_destroy: + */ + static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) + { +- /* +- * Convert the u32 _val to an unsigned long required by all the bit +- * operations within this function. No more than 32 bits of this +- * converted value can be accessed because all bit operations are +- * additionally provided with cbm_len that is initialized during +- * hardware enumeration using five bits from the EAX register and +- * thus never can exceed 32 bits. +- */ +- unsigned long *val = (unsigned long *)_val; ++ unsigned long val = *_val; + unsigned int cbm_len = r->cache.cbm_len; + unsigned long first_bit, zero_bit; + +- if (*val == 0) ++ if (val == 0) + return; + +- first_bit = find_first_bit(val, cbm_len); +- zero_bit = find_next_zero_bit(val, cbm_len, first_bit); ++ first_bit = find_first_bit(&val, cbm_len); ++ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); + + /* Clear any remaining bits to ensure contiguous region */ +- bitmap_clear(val, zero_bit, cbm_len - zero_bit); ++ bitmap_clear(&val, zero_bit, cbm_len - zero_bit); ++ *_val = (u32)val; + } + + /** diff --git a/queue-5.1/x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch b/queue-5.1/x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch new file mode 100644 index 00000000000..ec8490c2cc1 --- /dev/null +++ b/queue-5.1/x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch @@ -0,0 +1,70 @@ +From c1f7fec1eb6a2c86d01bc22afce772c743451d88 Mon Sep 17 00:00:00 2001 +From: Alejandro Jimenez +Date: Mon, 10 Jun 2019 13:20:10 -0400 +Subject: x86/speculation: Allow guests to use SSBD even if host does not + +From: Alejandro Jimenez + +commit c1f7fec1eb6a2c86d01bc22afce772c743451d88 upstream. + +The bits set in x86_spec_ctrl_mask are used to calculate the guest's value +of SPEC_CTRL that is written to the MSR before VMENTRY, and control which +mitigations the guest can enable. In the case of SSBD, unless the host has +enabled SSBD always on mode (by passing "spec_store_bypass_disable=on" in +the kernel parameters), the SSBD bit is not set in the mask and the guest +can not properly enable the SSBD always on mitigation mode. + +This has been confirmed by running the SSBD PoC on a guest using the SSBD +always on mitigation mode (booted with kernel parameter +"spec_store_bypass_disable=on"), and verifying that the guest is vulnerable +unless the host is also using SSBD always on mode. In addition, the guest +OS incorrectly reports the SSB vulnerability as mitigated. + +Always set the SSBD bit in x86_spec_ctrl_mask when the host CPU supports +it, allowing the guest to use SSBD whether or not the host has chosen to +enable the mitigation in any of its modes. + +Fixes: be6fcb5478e9 ("x86/bugs: Rework spec_ctrl base and mask logic") +Signed-off-by: Alejandro Jimenez +Signed-off-by: Thomas Gleixner +Reviewed-by: Liam Merwick +Reviewed-by: Mark Kanda +Reviewed-by: Paolo Bonzini +Cc: bp@alien8.de +Cc: rkrcmar@redhat.com +Cc: kvm@vger.kernel.org +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/1560187210-11054-1-git-send-email-alejandro.j.jimenez@oracle.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/bugs.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -836,6 +836,16 @@ static enum ssb_mitigation __init __ssb_ + } + + /* ++ * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper ++ * bit in the mask to allow guests to use the mitigation even in the ++ * case where the host does not enable it. ++ */ ++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || ++ static_cpu_has(X86_FEATURE_AMD_SSBD)) { ++ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; ++ } ++ ++ /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. + * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass +@@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_ + x86_amd_ssb_disable(); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; +- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + } + } -- 2.47.3