]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 1 Jul 2019 15:39:34 +0000 (17:39 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 1 Jul 2019 15:39:34 +0000 (17:39 +0200)
added patches:
cpu-speculation-warn-on-unsupported-mitigations-parameter.patch
efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch
irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch
mm-fix-page-cache-convergence-regression.patch
nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch
sunrpc-fix-up-calculation-of-client-message-length.patch
x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch
x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch
x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch

queue-5.1/cpu-speculation-warn-on-unsupported-mitigations-parameter.patch [new file with mode: 0644]
queue-5.1/efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch [new file with mode: 0644]
queue-5.1/irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch [new file with mode: 0644]
queue-5.1/mm-fix-page-cache-convergence-regression.patch [new file with mode: 0644]
queue-5.1/nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch [new file with mode: 0644]
queue-5.1/series
queue-5.1/sunrpc-fix-up-calculation-of-client-message-length.patch [new file with mode: 0644]
queue-5.1/x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch [new file with mode: 0644]
queue-5.1/x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch [new file with mode: 0644]
queue-5.1/x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch [new file with mode: 0644]

diff --git a/queue-5.1/cpu-speculation-warn-on-unsupported-mitigations-parameter.patch b/queue-5.1/cpu-speculation-warn-on-unsupported-mitigations-parameter.patch
new file mode 100644 (file)
index 0000000..51e42b6
--- /dev/null
@@ -0,0 +1,47 @@
+From 1bf72720281770162c87990697eae1ba2f1d917a Mon Sep 17 00:00:00 2001
+From: Geert Uytterhoeven <geert@linux-m68k.org>
+Date: Thu, 16 May 2019 09:09:35 +0200
+Subject: cpu/speculation: Warn on unsupported mitigations= parameter
+
+From: Geert Uytterhoeven <geert@linux-m68k.org>
+
+commit 1bf72720281770162c87990697eae1ba2f1d917a upstream.
+
+Currently, if the user specifies an unsupported mitigation strategy on the
+kernel command line, it will be ignored silently.  The code will fall back
+to the default strategy, possibly leaving the system more vulnerable than
+expected.
+
+This may happen due to e.g. a simple typo, or, for a stable kernel release,
+because not all mitigation strategies have been backported.
+
+Inform the user by printing a message.
+
+Fixes: 98af8452945c5565 ("cpu/speculation: Add 'mitigations=' cmdline option")
+Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Jiri Kosina <jkosina@suse.cz>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20190516070935.22546-1-geert@linux-m68k.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cpu.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -2315,6 +2315,9 @@ static int __init mitigations_parse_cmdl
+               cpu_mitigations = CPU_MITIGATIONS_AUTO;
+       else if (!strcmp(arg, "auto,nosmt"))
+               cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
++      else
++              pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
++                      arg);
+       return 0;
+ }
diff --git a/queue-5.1/efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch b/queue-5.1/efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch
new file mode 100644 (file)
index 0000000..70e7f85
--- /dev/null
@@ -0,0 +1,60 @@
+From 18df7577adae6c6c778bf774b3aebcacbc1fb439 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Date: Sun, 9 Jun 2019 20:17:44 +0200
+Subject: efi/memreserve: deal with memreserve entries in unmapped memory
+
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+
+commit 18df7577adae6c6c778bf774b3aebcacbc1fb439 upstream.
+
+Ensure that the EFI memreserve entries can be accessed, even if they
+are located in memory that the kernel (e.g., a crashkernel) omits from
+the linear map.
+
+Fixes: 80424b02d42b ("efi: Reduce the amount of memblock reservations ...")
+Cc: <stable@vger.kernel.org> # 5.0+
+Reported-by: Jonathan Richardson <jonathan.richardson@broadcom.com>
+Reviewed-by: Jonathan Richardson <jonathan.richardson@broadcom.com>
+Tested-by: Jonathan Richardson <jonathan.richardson@broadcom.com>
+Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/firmware/efi/efi.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -1007,14 +1007,16 @@ int __ref efi_mem_reserve_persistent(phy
+       /* first try to find a slot in an existing linked list entry */
+       for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
+-              rsv = __va(prsv);
++              rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
+               index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size);
+               if (index < rsv->size) {
+                       rsv->entry[index].base = addr;
+                       rsv->entry[index].size = size;
++                      memunmap(rsv);
+                       return 0;
+               }
++              memunmap(rsv);
+       }
+       /* no slot found - allocate a new linked list entry */
+@@ -1022,7 +1024,13 @@ int __ref efi_mem_reserve_persistent(phy
+       if (!rsv)
+               return -ENOMEM;
+-      rsv->size = EFI_MEMRESERVE_COUNT(PAGE_SIZE);
++      /*
++       * The memremap() call above assumes that a linux_efi_memreserve entry
++       * never crosses a page boundary, so let's ensure that this remains true
++       * even when kexec'ing a 4k pages kernel from a >4k pages kernel, by
++       * using SZ_4K explicitly in the size calculation below.
++       */
++      rsv->size = EFI_MEMRESERVE_COUNT(SZ_4K);
+       atomic_set(&rsv->count, 1);
+       rsv->entry[0].base = addr;
+       rsv->entry[0].size = size;
diff --git a/queue-5.1/irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch b/queue-5.1/irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch
new file mode 100644 (file)
index 0000000..fa21a67
--- /dev/null
@@ -0,0 +1,105 @@
+From 6d4d367d0e9ffab4d64a3436256a6a052dc1195d Mon Sep 17 00:00:00 2001
+From: Paul Burton <paul.burton@mips.com>
+Date: Wed, 5 Jun 2019 09:34:10 +0100
+Subject: irqchip/mips-gic: Use the correct local interrupt map registers
+
+From: Paul Burton <paul.burton@mips.com>
+
+commit 6d4d367d0e9ffab4d64a3436256a6a052dc1195d upstream.
+
+The MIPS GIC contains a block of registers used to map local interrupts
+to a particular CPU interrupt pin. Since these registers are found at a
+consecutive range of addresses we access them using an index, via the
+(read|write)_gic_v[lo]_map accessor functions. We currently use values
+from enum mips_gic_local_interrupt as those indices.
+
+Unfortunately whilst enum mips_gic_local_interrupt provides the correct
+offsets for bits in the pending & mask registers, the ordering of the
+map registers is subtly different... Compared with the ordering of
+pending & mask bits, the map registers move the FDC from the end of the
+list to index 3 after the timer interrupt. As a result the performance
+counter & software interrupts are therefore at indices 4-6 rather than
+indices 3-5.
+
+Notably this causes problems with performance counter interrupts being
+incorrectly mapped on some systems, and presumably will also cause
+problems for FDC interrupts.
+
+Introduce a function to map from enum mips_gic_local_interrupt to the
+index of the corresponding map register, and use it to ensure we access
+the map registers for the correct interrupts.
+
+Signed-off-by: Paul Burton <paul.burton@mips.com>
+Fixes: a0dc5cb5e31b ("irqchip: mips-gic: Simplify gic_local_irq_domain_map()")
+Fixes: da61fcf9d62a ("irqchip: mips-gic: Use irq_cpu_online to (un)mask all-VP(E) IRQs")
+Reported-and-tested-by: Archer Yan <ayan@wavecomp.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Jason Cooper <jason@lakedaemon.net>
+Cc: stable@vger.kernel.org # v4.14+
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/include/asm/mips-gic.h |   30 ++++++++++++++++++++++++++++++
+ drivers/irqchip/irq-mips-gic.c   |    4 ++--
+ 2 files changed, 32 insertions(+), 2 deletions(-)
+
+--- a/arch/mips/include/asm/mips-gic.h
++++ b/arch/mips/include/asm/mips-gic.h
+@@ -315,6 +315,36 @@ static inline bool mips_gic_present(void
+ }
+ /**
++ * mips_gic_vx_map_reg() - Return GIC_Vx_<intr>_MAP register offset
++ * @intr: A GIC local interrupt
++ *
++ * Determine the index of the GIC_VL_<intr>_MAP or GIC_VO_<intr>_MAP register
++ * within the block of GIC map registers. This is almost the same as the order
++ * of interrupts in the pending & mask registers, as used by enum
++ * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the
++ * interrupts after it...
++ *
++ * Return: The map register index corresponding to @intr.
++ *
++ * The return value is suitable for use with the (read|write)_gic_v[lo]_map
++ * accessor functions.
++ */
++static inline unsigned int
++mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr)
++{
++      /* WD, Compare & Timer are 1:1 */
++      if (intr <= GIC_LOCAL_INT_TIMER)
++              return intr;
++
++      /* FDC moves to after Timer... */
++      if (intr == GIC_LOCAL_INT_FDC)
++              return GIC_LOCAL_INT_TIMER + 1;
++
++      /* As a result everything else is offset by 1 */
++      return intr + 1;
++}
++
++/**
+  * gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq
+  *
+  * Determine the virq number to use for the coprocessor 0 count/compare
+--- a/drivers/irqchip/irq-mips-gic.c
++++ b/drivers/irqchip/irq-mips-gic.c
+@@ -388,7 +388,7 @@ static void gic_all_vpes_irq_cpu_online(
+       intr = GIC_HWIRQ_TO_LOCAL(d->hwirq);
+       cd = irq_data_get_irq_chip_data(d);
+-      write_gic_vl_map(intr, cd->map);
++      write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map);
+       if (cd->mask)
+               write_gic_vl_smask(BIT(intr));
+ }
+@@ -517,7 +517,7 @@ static int gic_irq_domain_map(struct irq
+       spin_lock_irqsave(&gic_lock, flags);
+       for_each_online_cpu(cpu) {
+               write_gic_vl_other(mips_cm_vp_id(cpu));
+-              write_gic_vo_map(intr, map);
++              write_gic_vo_map(mips_gic_vx_map_reg(intr), map);
+       }
+       spin_unlock_irqrestore(&gic_lock, flags);
diff --git a/queue-5.1/mm-fix-page-cache-convergence-regression.patch b/queue-5.1/mm-fix-page-cache-convergence-regression.patch
new file mode 100644 (file)
index 0000000..81ae5ad
--- /dev/null
@@ -0,0 +1,176 @@
+From 7b785645e8f13e17cbce492708cf6e7039d32e46 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Fri, 24 May 2019 10:12:46 -0400
+Subject: mm: fix page cache convergence regression
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 7b785645e8f13e17cbce492708cf6e7039d32e46 upstream.
+
+Since a28334862993 ("page cache: Finish XArray conversion"), on most
+major Linux distributions, the page cache doesn't correctly transition
+when the hot data set is changing, and leaves the new pages thrashing
+indefinitely instead of kicking out the cold ones.
+
+On a freshly booted, freshly ssh'd into virtual machine with 1G RAM
+running stock Arch Linux:
+
+[root@ham ~]# ./reclaimtest.sh
++ dd of=workingset-a bs=1M count=0 seek=600
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ ./mincore workingset-a
+153600/153600 workingset-a
++ dd of=workingset-b bs=1M count=0 seek=600
++ cat workingset-b
++ cat workingset-b
++ cat workingset-b
++ cat workingset-b
++ ./mincore workingset-a workingset-b
+104029/153600 workingset-a
+120086/153600 workingset-b
++ cat workingset-b
++ cat workingset-b
++ cat workingset-b
++ cat workingset-b
++ ./mincore workingset-a workingset-b
+104029/153600 workingset-a
+120268/153600 workingset-b
+
+workingset-b is a 600M file on a 1G host that is otherwise entirely
+idle. No matter how often it's being accessed, it won't get cached.
+
+While investigating, I noticed that the non-resident information gets
+aggressively reclaimed - /proc/vmstat::workingset_nodereclaim. This is
+a problem because a workingset transition like this relies on the
+non-resident information tracked in the page cache tree of evicted
+file ranges: when the cache faults are refaults of recently evicted
+cache, we challenge the existing active set, and that allows a new
+workingset to establish itself.
+
+Tracing the shrinker that maintains this memory revealed that all page
+cache tree nodes were allocated to the root cgroup. This is a problem,
+because 1) the shrinker sizes the amount of non-resident information
+it keeps to the size of the cgroup's other memory and 2) on most major
+Linux distributions, only kernel threads live in the root cgroup and
+everything else gets put into services or session groups:
+
+[root@ham ~]# cat /proc/self/cgroup
+0::/user.slice/user-0.slice/session-c1.scope
+
+As a result, we basically maintain no non-resident information for the
+workloads running on the system, thus breaking the caching algorithm.
+
+Looking through the code, I found the culprit in the above-mentioned
+patch: when switching from the radix tree to xarray, it dropped the
+__GFP_ACCOUNT flag from the tree node allocations - the flag that
+makes sure the allocated memory gets charged to and tracked by the
+cgroup of the calling process - in this case, the one doing the fault.
+
+To fix this, allow xarray users to specify per-tree flag that makes
+xarray allocate nodes using __GFP_ACCOUNT. Then restore the page cache
+tree annotation to request such cgroup tracking for the cache nodes.
+
+With this patch applied, the page cache correctly converges on new
+workingsets again after just a few iterations:
+
+[root@ham ~]# ./reclaimtest.sh
++ dd of=workingset-a bs=1M count=0 seek=600
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ cat workingset-a
++ ./mincore workingset-a
+153600/153600 workingset-a
++ dd of=workingset-b bs=1M count=0 seek=600
++ cat workingset-b
++ ./mincore workingset-a workingset-b
+124607/153600 workingset-a
+87876/153600 workingset-b
++ cat workingset-b
++ ./mincore workingset-a workingset-b
+81313/153600 workingset-a
+133321/153600 workingset-b
++ cat workingset-b
++ ./mincore workingset-a workingset-b
+63036/153600 workingset-a
+153600/153600 workingset-b
+
+Cc: stable@vger.kernel.org # 4.20+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Shakeel Butt <shakeelb@google.com>
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/inode.c             |    2 +-
+ include/linux/xarray.h |    1 +
+ lib/xarray.c           |   12 ++++++++++--
+ 3 files changed, 12 insertions(+), 3 deletions(-)
+
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -349,7 +349,7 @@ EXPORT_SYMBOL(inc_nlink);
+ static void __address_space_init_once(struct address_space *mapping)
+ {
+-      xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ);
++      xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
+       init_rwsem(&mapping->i_mmap_rwsem);
+       INIT_LIST_HEAD(&mapping->private_list);
+       spin_lock_init(&mapping->private_lock);
+--- a/include/linux/xarray.h
++++ b/include/linux/xarray.h
+@@ -265,6 +265,7 @@ enum xa_lock_type {
+ #define XA_FLAGS_TRACK_FREE   ((__force gfp_t)4U)
+ #define XA_FLAGS_ZERO_BUSY    ((__force gfp_t)8U)
+ #define XA_FLAGS_ALLOC_WRAPPED        ((__force gfp_t)16U)
++#define XA_FLAGS_ACCOUNT      ((__force gfp_t)32U)
+ #define XA_FLAGS_MARK(mark)   ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
+                                               (__force unsigned)(mark)))
+--- a/lib/xarray.c
++++ b/lib/xarray.c
+@@ -298,6 +298,8 @@ bool xas_nomem(struct xa_state *xas, gfp
+               xas_destroy(xas);
+               return false;
+       }
++      if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
++              gfp |= __GFP_ACCOUNT;
+       xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+       if (!xas->xa_alloc)
+               return false;
+@@ -325,6 +327,8 @@ static bool __xas_nomem(struct xa_state
+               xas_destroy(xas);
+               return false;
+       }
++      if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
++              gfp |= __GFP_ACCOUNT;
+       if (gfpflags_allow_blocking(gfp)) {
+               xas_unlock_type(xas, lock_type);
+               xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+@@ -358,8 +362,12 @@ static void *xas_alloc(struct xa_state *
+       if (node) {
+               xas->xa_alloc = NULL;
+       } else {
+-              node = kmem_cache_alloc(radix_tree_node_cachep,
+-                                      GFP_NOWAIT | __GFP_NOWARN);
++              gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
++
++              if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
++                      gfp |= __GFP_ACCOUNT;
++
++              node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+               if (!node) {
+                       xas_set_err(xas, -ENOMEM);
+                       return NULL;
diff --git a/queue-5.1/nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch b/queue-5.1/nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch
new file mode 100644 (file)
index 0000000..8c4440b
--- /dev/null
@@ -0,0 +1,33 @@
+From 68f461593f76bd5f17e87cdd0bea28f4278c7268 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trondmy@gmail.com>
+Date: Tue, 25 Jun 2019 16:41:16 -0400
+Subject: NFS/flexfiles: Use the correct TCP timeout for flexfiles I/O
+
+From: Trond Myklebust <trondmy@gmail.com>
+
+commit 68f461593f76bd5f17e87cdd0bea28f4278c7268 upstream.
+
+Fix a typo where we're confusing the default TCP retrans value
+(NFS_DEF_TCP_RETRANS) for the default TCP timeout value.
+
+Fixes: 15d03055cf39f ("pNFS/flexfiles: Set reasonable default ...")
+Cc: stable@vger.kernel.org # 4.8+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/flexfilelayout/flexfilelayoutdev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
++++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+@@ -18,7 +18,7 @@
+ #define NFSDBG_FACILITY               NFSDBG_PNFS_LD
+-static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
++static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO;
+ static unsigned int dataserver_retrans;
+ static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
index 7d28dd57ba958508695a6ab3da63e27292011898..dabe045a43f44c25e7acee3d998833826e38f5ab 100644 (file)
@@ -18,3 +18,12 @@ dm-init-fix-incorrect-uses-of-kstrndup.patch
 dm-log-writes-make-sure-super-sector-log-updates-are-written-in-order.patch
 io_uring-ensure-req-file-is-cleared-on-allocation.patch
 scsi-vmw_pscsi-fix-use-after-free-in-pvscsi_queue_lck.patch
+x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch
+x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch
+x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch
+mm-fix-page-cache-convergence-regression.patch
+efi-memreserve-deal-with-memreserve-entries-in-unmapped-memory.patch
+nfs-flexfiles-use-the-correct-tcp-timeout-for-flexfiles-i-o.patch
+cpu-speculation-warn-on-unsupported-mitigations-parameter.patch
+sunrpc-fix-up-calculation-of-client-message-length.patch
+irqchip-mips-gic-use-the-correct-local-interrupt-map-registers.patch
diff --git a/queue-5.1/sunrpc-fix-up-calculation-of-client-message-length.patch b/queue-5.1/sunrpc-fix-up-calculation-of-client-message-length.patch
new file mode 100644 (file)
index 0000000..07d884f
--- /dev/null
@@ -0,0 +1,85 @@
+From 7e3d3620974b743b91b1f9d0660061b1de20174c Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trondmy@gmail.com>
+Date: Mon, 24 Jun 2019 19:15:44 -0400
+Subject: SUNRPC: Fix up calculation of client message length
+
+From: Trond Myklebust <trondmy@gmail.com>
+
+commit 7e3d3620974b743b91b1f9d0660061b1de20174c upstream.
+
+In the case where a record marker was used, xs_sendpages() needs
+to return the length of the payload + record marker so that we
+operate correctly in the case of a partial transmission.
+When the callers check return value, they therefore need to
+take into account the record marker length.
+
+Fixes: 06b5fc3ad94e ("Merge tag 'nfs-rdma-for-5.1-1'...")
+Cc: stable@vger.kernel.org # 5.1+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtsock.c |   16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -950,6 +950,8 @@ static int xs_local_send_request(struct
+       struct sock_xprt *transport =
+                               container_of(xprt, struct sock_xprt, xprt);
+       struct xdr_buf *xdr = &req->rq_snd_buf;
++      rpc_fraghdr rm = xs_stream_record_marker(xdr);
++      unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
+       int status;
+       int sent = 0;
+@@ -964,9 +966,7 @@ static int xs_local_send_request(struct
+       req->rq_xtime = ktime_get();
+       status = xs_sendpages(transport->sock, NULL, 0, xdr,
+-                            transport->xmit.offset,
+-                            xs_stream_record_marker(xdr),
+-                            &sent);
++                            transport->xmit.offset, rm, &sent);
+       dprintk("RPC:       %s(%u) = %d\n",
+                       __func__, xdr->len - transport->xmit.offset, status);
+@@ -976,7 +976,7 @@ static int xs_local_send_request(struct
+       if (likely(sent > 0) || status == 0) {
+               transport->xmit.offset += sent;
+               req->rq_bytes_sent = transport->xmit.offset;
+-              if (likely(req->rq_bytes_sent >= req->rq_slen)) {
++              if (likely(req->rq_bytes_sent >= msglen)) {
+                       req->rq_xmit_bytes_sent += transport->xmit.offset;
+                       transport->xmit.offset = 0;
+                       return 0;
+@@ -1097,6 +1097,8 @@ static int xs_tcp_send_request(struct rp
+       struct rpc_xprt *xprt = req->rq_xprt;
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       struct xdr_buf *xdr = &req->rq_snd_buf;
++      rpc_fraghdr rm = xs_stream_record_marker(xdr);
++      unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
+       bool vm_wait = false;
+       int status;
+       int sent;
+@@ -1122,9 +1124,7 @@ static int xs_tcp_send_request(struct rp
+       while (1) {
+               sent = 0;
+               status = xs_sendpages(transport->sock, NULL, 0, xdr,
+-                                    transport->xmit.offset,
+-                                    xs_stream_record_marker(xdr),
+-                                    &sent);
++                                    transport->xmit.offset, rm, &sent);
+               dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
+                               xdr->len - transport->xmit.offset, status);
+@@ -1133,7 +1133,7 @@ static int xs_tcp_send_request(struct rp
+                * reset the count of bytes sent. */
+               transport->xmit.offset += sent;
+               req->rq_bytes_sent = transport->xmit.offset;
+-              if (likely(req->rq_bytes_sent >= req->rq_slen)) {
++              if (likely(req->rq_bytes_sent >= msglen)) {
+                       req->rq_xmit_bytes_sent += transport->xmit.offset;
+                       transport->xmit.offset = 0;
+                       return 0;
diff --git a/queue-5.1/x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch b/queue-5.1/x86-microcode-fix-the-microcode-load-on-cpu-hotplug-for-real.patch
new file mode 100644 (file)
index 0000000..565c787
--- /dev/null
@@ -0,0 +1,66 @@
+From 5423f5ce5ca410b3646f355279e4e937d452e622 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 18 Jun 2019 22:31:40 +0200
+Subject: x86/microcode: Fix the microcode load on CPU hotplug for real
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 5423f5ce5ca410b3646f355279e4e937d452e622 upstream.
+
+A recent change moved the microcode loader hotplug callback into the early
+startup phase which is running with interrupts disabled. It missed that
+the callbacks invoke sysfs functions which might sleep causing nice 'might
+sleep' splats with proper debugging enabled.
+
+Split the callbacks and only load the microcode in the early startup phase
+and move the sysfs handling back into the later threaded and preemptible
+bringup phase where it was before.
+
+Fixes: 78f4e932f776 ("x86/microcode, cpuhotplug: Add a microcode loader CPU hotplug callback")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: stable@vger.kernel.org
+Cc: x86-ml <x86@kernel.org>
+Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1906182228350.1766@nanos.tec.linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/microcode/core.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -793,13 +793,16 @@ static struct syscore_ops mc_syscore_ops
+       .resume                 = mc_bp_resume,
+ };
+-static int mc_cpu_online(unsigned int cpu)
++static int mc_cpu_starting(unsigned int cpu)
+ {
+-      struct device *dev;
+-
+-      dev = get_cpu_device(cpu);
+       microcode_update_cpu(cpu);
+       pr_debug("CPU%d added\n", cpu);
++      return 0;
++}
++
++static int mc_cpu_online(unsigned int cpu)
++{
++      struct device *dev = get_cpu_device(cpu);
+       if (sysfs_create_group(&dev->kobj, &mc_attr_group))
+               pr_err("Failed to create group for CPU%d\n", cpu);
+@@ -876,7 +879,9 @@ int __init microcode_init(void)
+               goto out_ucode_group;
+       register_syscore_ops(&mc_syscore_ops);
+-      cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online",
++      cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
++                                mc_cpu_starting, NULL);
++      cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
+                                 mc_cpu_online, mc_cpu_down_prep);
+       pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
diff --git a/queue-5.1/x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch b/queue-5.1/x86-resctrl-prevent-possible-overrun-during-bitmap-operations.patch
new file mode 100644 (file)
index 0000000..0d1d40d
--- /dev/null
@@ -0,0 +1,119 @@
+From 32f010deab575199df4ebe7b6aec20c17bb7eccd Mon Sep 17 00:00:00 2001
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Wed, 19 Jun 2019 13:27:16 -0700
+Subject: x86/resctrl: Prevent possible overrun during bitmap operations
+
+From: Reinette Chatre <reinette.chatre@intel.com>
+
+commit 32f010deab575199df4ebe7b6aec20c17bb7eccd upstream.
+
+While the DOC at the beginning of lib/bitmap.c explicitly states that
+"The number of valid bits in a given bitmap does _not_ need to be an
+exact multiple of BITS_PER_LONG.", some of the bitmap operations do
+indeed access BITS_PER_LONG portions of the provided bitmap no matter
+the size of the provided bitmap.
+
+For example, if find_first_bit() is provided with an 8 bit bitmap the
+operation will access BITS_PER_LONG bits from the provided bitmap. While
+the operation ensures that these extra bits do not affect the result,
+the memory is still accessed.
+
+The capacity bitmasks (CBMs) are typically stored in u32 since they
+can never exceed 32 bits. A few instances exist where a bitmap_*
+operation is performed on a CBM by simply pointing the bitmap operation
+to the stored u32 value.
+
+The consequence of this pattern is that some bitmap_* operations will
+access out-of-bounds memory when interacting with the provided CBM.
+
+This same issue has previously been addressed with commit 49e00eee0061
+("x86/intel_rdt: Fix out-of-bounds memory access in CBM tests")
+but at that time not all instances of the issue were fixed.
+
+Fix this by using an unsigned long to store the capacity bitmask data
+that is passed to bitmap functions.
+
+Fixes: e651901187ab ("x86/intel_rdt: Introduce "bit_usage" to display cache allocations details")
+Fixes: f4e80d67a527 ("x86/intel_rdt: Resctrl files reflect pseudo-locked information")
+Fixes: 95f0b77efa57 ("x86/intel_rdt: Initialize new resource group with sane defaults")
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: stable <stable@vger.kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: x86-ml <x86@kernel.org>
+Link: https://lkml.kernel.org/r/58c9b6081fd9bf599af0dfc01a6fdd335768efef.1560975645.git.reinette.chatre@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c |   35 +++++++++++++++------------------
+ 1 file changed, 16 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -804,8 +804,12 @@ static int rdt_bit_usage_show(struct ker
+                             struct seq_file *seq, void *v)
+ {
+       struct rdt_resource *r = of->kn->parent->priv;
+-      u32 sw_shareable = 0, hw_shareable = 0;
+-      u32 exclusive = 0, pseudo_locked = 0;
++      /*
++       * Use unsigned long even though only 32 bits are used to ensure
++       * test_bit() is used safely.
++       */
++      unsigned long sw_shareable = 0, hw_shareable = 0;
++      unsigned long exclusive = 0, pseudo_locked = 0;
+       struct rdt_domain *dom;
+       int i, hwb, swb, excl, psl;
+       enum rdtgrp_mode mode;
+@@ -850,10 +854,10 @@ static int rdt_bit_usage_show(struct ker
+               }
+               for (i = r->cache.cbm_len - 1; i >= 0; i--) {
+                       pseudo_locked = dom->plr ? dom->plr->cbm : 0;
+-                      hwb = test_bit(i, (unsigned long *)&hw_shareable);
+-                      swb = test_bit(i, (unsigned long *)&sw_shareable);
+-                      excl = test_bit(i, (unsigned long *)&exclusive);
+-                      psl = test_bit(i, (unsigned long *)&pseudo_locked);
++                      hwb = test_bit(i, &hw_shareable);
++                      swb = test_bit(i, &sw_shareable);
++                      excl = test_bit(i, &exclusive);
++                      psl = test_bit(i, &pseudo_locked);
+                       if (hwb && swb)
+                               seq_putc(seq, 'X');
+                       else if (hwb && !swb)
+@@ -2494,26 +2498,19 @@ out_destroy:
+  */
+ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
+ {
+-      /*
+-       * Convert the u32 _val to an unsigned long required by all the bit
+-       * operations within this function. No more than 32 bits of this
+-       * converted value can be accessed because all bit operations are
+-       * additionally provided with cbm_len that is initialized during
+-       * hardware enumeration using five bits from the EAX register and
+-       * thus never can exceed 32 bits.
+-       */
+-      unsigned long *val = (unsigned long *)_val;
++      unsigned long val = *_val;
+       unsigned int cbm_len = r->cache.cbm_len;
+       unsigned long first_bit, zero_bit;
+-      if (*val == 0)
++      if (val == 0)
+               return;
+-      first_bit = find_first_bit(val, cbm_len);
+-      zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
++      first_bit = find_first_bit(&val, cbm_len);
++      zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
+       /* Clear any remaining bits to ensure contiguous region */
+-      bitmap_clear(val, zero_bit, cbm_len - zero_bit);
++      bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
++      *_val = (u32)val;
+ }
+ /**
diff --git a/queue-5.1/x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch b/queue-5.1/x86-speculation-allow-guests-to-use-ssbd-even-if-host-does-not.patch
new file mode 100644 (file)
index 0000000..ec8490c
--- /dev/null
@@ -0,0 +1,70 @@
+From c1f7fec1eb6a2c86d01bc22afce772c743451d88 Mon Sep 17 00:00:00 2001
+From: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
+Date: Mon, 10 Jun 2019 13:20:10 -0400
+Subject: x86/speculation: Allow guests to use SSBD even if host does not
+
+From: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
+
+commit c1f7fec1eb6a2c86d01bc22afce772c743451d88 upstream.
+
+The bits set in x86_spec_ctrl_mask are used to calculate the guest's value
+of SPEC_CTRL that is written to the MSR before VMENTRY, and control which
+mitigations the guest can enable.  In the case of SSBD, unless the host has
+enabled SSBD always on mode (by passing "spec_store_bypass_disable=on" in
+the kernel parameters), the SSBD bit is not set in the mask and the guest
+can not properly enable the SSBD always on mitigation mode.
+
+This has been confirmed by running the SSBD PoC on a guest using the SSBD
+always on mitigation mode (booted with kernel parameter
+"spec_store_bypass_disable=on"), and verifying that the guest is vulnerable
+unless the host is also using SSBD always on mode. In addition, the guest
+OS incorrectly reports the SSB vulnerability as mitigated.
+
+Always set the SSBD bit in x86_spec_ctrl_mask when the host CPU supports
+it, allowing the guest to use SSBD whether or not the host has chosen to
+enable the mitigation in any of its modes.
+
+Fixes: be6fcb5478e9 ("x86/bugs: Rework spec_ctrl base and mask logic")
+Signed-off-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
+Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: bp@alien8.de
+Cc: rkrcmar@redhat.com
+Cc: kvm@vger.kernel.org
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1560187210-11054-1-git-send-email-alejandro.j.jimenez@oracle.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/bugs.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -836,6 +836,16 @@ static enum ssb_mitigation __init __ssb_
+       }
+       /*
++       * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
++       * bit in the mask to allow guests to use the mitigation even in the
++       * case where the host does not enable it.
++       */
++      if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
++          static_cpu_has(X86_FEATURE_AMD_SSBD)) {
++              x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
++      }
++
++      /*
+        * We have three CPU feature flags that are in play here:
+        *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+        *  - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
+@@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_
+                       x86_amd_ssb_disable();
+               } else {
+                       x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+-                      x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+                       wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+               }
+       }