4.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 14 Aug 2018 17:08:52 +0000 (19:08 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 14 Aug 2018 17:08:52 +0000 (19:08 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 14 Aug 2018 17:08:52 +0000 (19:08 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 14 Aug 2018 17:08:52 +0000 (19:08 +0200)
diff --git a/queue-4.4/mm-add-vm_insert_pfn_prot.patch b/queue-4.4/mm-add-vm_insert_pfn_prot.patch

new file mode 100644 (file)

index 0000000..891a192
--- /dev/null
+++ b/queue-4.4/mm-add-vm_insert_pfn_prot.patch
@@ -0,0 +1,101 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 29 Dec 2015 20:12:20 -0800
+Subject: mm: Add vm_insert_pfn_prot()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 1745cbc5d0dee0749a6bc0ea8e872c5db0074061 upstream
+
+The x86 vvar vma contains pages with differing cacheability
+flags.  x86 currently implements this by manually inserting all
+the ptes using (io_)remap_pfn_range when the vma is set up.
+
+x86 wants to move to using .fault with VM_FAULT_NOPAGE to set up
+the mappings as needed.  The correct API to use to insert a pfn
+in .fault is vm_insert_pfn(), but vm_insert_pfn() can't override the
+vma's cache mode, and the HPET page in particular needs to be
+uncached despite the fact that the rest of the VMA is cached.
+
+Add vm_insert_pfn_prot() to support varying cacheability within
+the same non-COW VMA in a more sane manner.
+
+x86 could alternatively use multiple VMAs, but that's messy,
+would break CRIU, and would create unnecessary VMAs that would
+waste memory.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Acked-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/d2938d1eb37be7a5e4f86182db646551f11e45aa.1451446564.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h |    2 ++
+ mm/memory.c        |   25 +++++++++++++++++++++++--
+ 2 files changed, 25 insertions(+), 2 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2083,6 +2083,8 @@ int remap_pfn_range(struct vm_area_struc
+ int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
+ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+                       unsigned long pfn);
++int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
++                      unsigned long pfn, pgprot_t pgprot);
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+                       unsigned long pfn);
+ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1605,8 +1605,29 @@ out:
+ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+                       unsigned long pfn)
+ {
++      return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
++}
++EXPORT_SYMBOL(vm_insert_pfn);
++
++/**
++ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
++ * @vma: user vma to map to
++ * @addr: target user address of this page
++ * @pfn: source kernel pfn
++ * @pgprot: pgprot flags for the inserted page
++ *
++ * This is exactly like vm_insert_pfn, except that it allows drivers to
++ * to override pgprot on a per-page basis.
++ *
++ * This only makes sense for IO mappings, and it makes no sense for
++ * cow mappings.  In general, using multiple vmas is preferable;
++ * vm_insert_pfn_prot should only be used if using multiple VMAs is
++ * impractical.
++ */
++int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
++                      unsigned long pfn, pgprot_t pgprot)
++{
+       int ret;
+-      pgprot_t pgprot = vma->vm_page_prot;
+       /*
+        * Technically, architectures with pte_special can avoid all these
+        * restrictions (same for remap_pfn_range).  However we would like
+@@ -1628,7 +1649,7 @@ int vm_insert_pfn(struct vm_area_struct
+ 
+       return ret;
+ }
+-EXPORT_SYMBOL(vm_insert_pfn);
++EXPORT_SYMBOL(vm_insert_pfn_prot);
+ 
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+                       unsigned long pfn)
diff --git a/queue-4.4/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch b/queue-4.4/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch

new file mode 100644 (file)

index 0000000..8471682
--- /dev/null
+++ b/queue-4.4/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch
@@ -0,0 +1,59 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Fri, 7 Oct 2016 17:00:18 -0700
+Subject: mm: fix cache mode tracking in vm_insert_mixed()
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 87744ab3832b83ba71b931f86f9cfdb000d07da5 upstream
+
+vm_insert_mixed() unlike vm_insert_pfn_prot() and vmf_insert_pfn_pmd(),
+fails to check the pgprot_t it uses for the mapping against the one
+recorded in the memtype tracking tree.  Add the missing call to
+track_pfn_insert() to preclude cases where incompatible aliased mappings
+are established for a given physical address range.
+
+[groeck: Backport to v4.4.y]
+
+Link: http://lkml.kernel.org/r/147328717909.35069.14256589123570653697.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Cc: David Airlie <airlied@linux.ie>
+Cc: Matthew Wilcox <mawilcox@microsoft.com>
+Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1654,10 +1654,14 @@ EXPORT_SYMBOL(vm_insert_pfn_prot);
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+                       unsigned long pfn)
+ {
++      pgprot_t pgprot = vma->vm_page_prot;
++
+       BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
+ 
+       if (addr < vma->vm_start || addr >= vma->vm_end)
+               return -EFAULT;
++      if (track_pfn_insert(vma, &pgprot, pfn))
++              return -EINVAL;
+ 
+       /*
+        * If we don't have pte special, then we have to use the pfn_valid()
+@@ -1670,9 +1674,9 @@ int vm_insert_mixed(struct vm_area_struc
+               struct page *page;
+ 
+               page = pfn_to_page(pfn);
+-              return insert_page(vma, addr, page, vma->vm_page_prot);
++              return insert_page(vma, addr, page, pgprot);
+       }
+-      return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
++      return insert_pfn(vma, addr, pfn, pgprot);
+ }
+ EXPORT_SYMBOL(vm_insert_mixed);
+ 
diff --git a/queue-4.4/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch b/queue-4.4/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch

new file mode 100644 (file)

index 0000000..c596654
--- /dev/null
+++ b/queue-4.4/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch
@@ -0,0 +1,103 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Fri, 8 Sep 2017 16:10:46 -0700
+Subject: mm: x86: move _PAGE_SWP_SOFT_DIRTY from bit 7 to bit 1
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit eee4818baac0f2b37848fdf90e4b16430dc536ac upstream
+
+_PAGE_PSE is used to distinguish between a truly non-present
+(_PAGE_PRESENT=0) PMD, and a PMD which is undergoing a THP split and
+should be treated as present.
+
+But _PAGE_SWP_SOFT_DIRTY currently uses the _PAGE_PSE bit, which would
+cause confusion between one of those PMDs undergoing a THP split, and a
+soft-dirty PMD.  Dropping _PAGE_PSE check in pmd_present() does not work
+well, because it can hurt optimization of tlb handling in thp split.
+
+Thus, we need to move the bit.
+
+In the current kernel, bits 1-4 are not used in non-present format since
+commit 00839ee3b299 ("x86/mm: Move swap offset/type up in PTE to work
+around erratum").  So let's move _PAGE_SWP_SOFT_DIRTY to bit 1.  Bit 7
+is used as reserved (always clear), so please don't use it for other
+purpose.
+
+[dwmw2: Pulled in to 4.9 backport to support L1TF changes]
+
+Link: http://lkml.kernel.org/r/20170717193955.20207-3-zi.yan@sent.com
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Signed-off-by: Zi Yan <zi.yan@cs.rutgers.edu>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
+Cc: David Nellans <dnellans@nvidia.com>
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable_64.h    |   12 +++++++++---
+ arch/x86/include/asm/pgtable_types.h |   10 +++++-----
+ 2 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -166,15 +166,21 @@ static inline int pgd_large(pgd_t pgd) {
+ /*
+  * Encode and de-code a swap entry
+  *
+- * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
+- * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
+- * | OFFSET (14->63) | TYPE (9-13)  |0|X|X|X| X| X|X|X|0| <- swp entry
++ * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
++ * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
++ * | OFFSET (14->63) | TYPE (9-13)  |0|0|X|X| X| X|X|SD|0| <- swp entry
+  *
+  * G (8) is aliased and used as a PROT_NONE indicator for
+  * !present ptes.  We need to start storing swap entries above
+  * there.  We also need to avoid using A and D because of an
+  * erratum where they can be incorrectly set by hardware on
+  * non-present PTEs.
++ *
++ * SD (1) in swp entry is used to store soft dirty bit, which helps us
++ * remember soft dirty over page migration
++ *
++ * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
++ * but also L and G.
+  */
+ #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
+ #define SWP_TYPE_BITS 5
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -70,15 +70,15 @@
+ /*
+  * Tracking soft dirty bit when a page goes to a swap is tricky.
+  * We need a bit which can be stored in pte _and_ not conflict
+- * with swap entry format. On x86 bits 6 and 7 are *not* involved
+- * into swap entry computation, but bit 6 is used for nonlinear
+- * file mapping, so we borrow bit 7 for soft dirty tracking.
++ * with swap entry format. On x86 bits 1-4 are *not* involved
++ * into swap entry computation, but bit 7 is used for thp migration,
++ * so we borrow bit 1 for soft dirty tracking.
+  *
+  * Please note that this bit must be treated as swap dirty page
+- * mark if and only if the PTE has present bit clear!
++ * mark if and only if the PTE/PMD has present bit clear!
+  */
+ #ifdef CONFIG_MEM_SOFT_DIRTY
+-#define _PAGE_SWP_SOFT_DIRTY  _PAGE_PSE
++#define _PAGE_SWP_SOFT_DIRTY  _PAGE_RW
+ #else
+ #define _PAGE_SWP_SOFT_DIRTY  (_AT(pteval_t, 0))
+ #endif
diff --git a/queue-4.4/series b/queue-4.4/series

index 859ff6f5cbe4bd7c25ac932d259ce3e6a7f9e164..bba71f5293a0006cb0c8c24c3b84fcc52229c508 100644 (file)
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -18,3 +18,26 @@ x86-paravirt-fix-spectre-v2-mitigations-for-paravirt-guests.patch
  x86-speculation-protect-against-userspace-userspace-spectrersb.patch
  kprobes-x86-fix-p-uses-in-error-messages.patch
  x86-irqflags-provide-a-declaration-for-native_save_fl.patch
+x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch
+x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch
+x86-mm-fix-swap-entry-comment-and-macro.patch
+mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch
+x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch
+x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch
+x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch
+x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch
+x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch
+mm-add-vm_insert_pfn_prot.patch
+mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch
+x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none-mappings.patch
+x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch
+x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch
+x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch
+x86-cpufeatures-add-detection-of-l1d-cache-flush-support.patch
+x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch
+x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch
+x86-speculation-l1tf-invert-all-not-present-mappings.patch
+x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch
+x86-mm-pat-make-set_memory_np-l1tf-safe.patch
+x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch
+x86-speculation-l1tf-fix-up-cpu-feature-flags.patch
diff --git a/queue-4.4/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch b/queue-4.4/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch

new file mode 100644 (file)

index 0000000..0dd9525
--- /dev/null
+++ b/queue-4.4/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch
@@ -0,0 +1,95 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Wed, 20 Jun 2018 16:42:57 -0400
+Subject: x86/bugs: Move the l1tf function and define pr_fmt properly
+
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+
+commit 56563f53d3066afa9e63d6c997bf67e76a8b05c0 upstream
+
+The pr_warn in l1tf_select_mitigation would have used the prior pr_fmt
+which was defined as "Spectre V2 : ".
+
+Move the function to be past SSBD and also define the pr_fmt.
+
+Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   55 +++++++++++++++++++++++----------------------
+ 1 file changed, 29 insertions(+), 26 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -207,32 +207,6 @@ static void x86_amd_ssb_disable(void)
+               wrmsrl(MSR_AMD64_LS_CFG, msrval);
+ }
+ 
+-static void __init l1tf_select_mitigation(void)
+-{
+-      u64 half_pa;
+-
+-      if (!boot_cpu_has_bug(X86_BUG_L1TF))
+-              return;
+-
+-#if CONFIG_PGTABLE_LEVELS == 2
+-      pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+-      return;
+-#endif
+-
+-      /*
+-       * This is extremely unlikely to happen because almost all
+-       * systems have far more MAX_PA/2 than RAM can be fit into
+-       * DIMM slots.
+-       */
+-      half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+-      if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
+-              pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+-              return;
+-      }
+-
+-      setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+-}
+-
+ #ifdef RETPOLINE
+ static bool spectre_v2_bad_module;
+ 
+@@ -658,6 +632,35 @@ void x86_spec_ctrl_setup_ap(void)
+               x86_amd_ssb_disable();
+ }
+ 
++#undef pr_fmt
++#define pr_fmt(fmt)   "L1TF: " fmt
++static void __init l1tf_select_mitigation(void)
++{
++      u64 half_pa;
++
++      if (!boot_cpu_has_bug(X86_BUG_L1TF))
++              return;
++
++#if CONFIG_PGTABLE_LEVELS == 2
++      pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
++      return;
++#endif
++
++      /*
++       * This is extremely unlikely to happen because almost all
++       * systems have far more MAX_PA/2 than RAM can be fit into
++       * DIMM slots.
++       */
++      half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
++      if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
++              pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
++              return;
++      }
++
++      setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
++}
++#undef pr_fmt
++
+ #ifdef CONFIG_SYSFS
+ 
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
diff --git a/queue-4.4/x86-cpufeatures-add-detection-of-l1d-cache-flush-support.patch b/queue-4.4/x86-cpufeatures-add-detection-of-l1d-cache-flush-support.patch

new file mode 100644 (file)

index 0000000..360b627
--- /dev/null
+++ b/queue-4.4/x86-cpufeatures-add-detection-of-l1d-cache-flush-support.patch
@@ -0,0 +1,37 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Wed, 20 Jun 2018 16:42:58 -0400
+Subject: x86/cpufeatures: Add detection of L1D cache flush support.
+
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+
+commit 11e34e64e4103955fc4568750914c75d65ea87ee upstream
+
+336996-Speculative-Execution-Side-Channel-Mitigations.pdf defines a new MSR
+(IA32_FLUSH_CMD) which is detected by CPUID.7.EDX[28]=1 bit being set.
+
+This new MSR "gives software a way to invalidate structures with finer
+granularity than other architectual methods like WBINVD."
+
+A copy of this document is available at
+  https://bugzilla.kernel.org/show_bug.cgi?id=199511
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -310,6 +310,7 @@
+ #define X86_FEATURE_AVX512_4FMAPS     (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+ #define X86_FEATURE_SPEC_CTRL         (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
+ #define X86_FEATURE_INTEL_STIBP               (18*32+27) /* "" Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_FLUSH_L1D         (18*32+28) /* Flush L1D cache */
+ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+ #define X86_FEATURE_SPEC_CTRL_SSBD    (18*32+31) /* "" Speculative Store Bypass Disable */
+ 
diff --git a/queue-4.4/x86-mm-fix-swap-entry-comment-and-macro.patch b/queue-4.4/x86-mm-fix-swap-entry-comment-and-macro.patch

new file mode 100644 (file)

index 0000000..38b97c7
--- /dev/null
+++ b/queue-4.4/x86-mm-fix-swap-entry-comment-and-macro.patch
@@ -0,0 +1,66 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Wed, 10 Aug 2016 10:23:25 -0700
+Subject: x86/mm: Fix swap entry comment and macro
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit ace7fab7a6cdd363a615ec537f2aa94dbc761ee2 upstream
+
+A recent patch changed the format of a swap PTE.
+
+The comment explaining the format of the swap PTE is wrong about
+the bits used for the swap type field.  Amusingly, the ASCII art
+and the patch description are correct, but the comment itself
+is wrong.
+
+As I was looking at this, I also noticed that the
+SWP_OFFSET_FIRST_BIT has an off-by-one error.  This does not
+really hurt anything.  It just wasted a bit of space in the PTE,
+giving us 2^59 bytes of addressable space in our swapfiles
+instead of 2^60.  But, it doesn't match with the comments, and it
+wastes a bit of space, so fix it.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave@sr71.net>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Fixes: 00839ee3b299 ("x86/mm: Move swap offset/type up in PTE to work around erratum")
+Link: http://lkml.kernel.org/r/20160810172325.E56AD7DA@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable_64.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) {
+  *
+  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
+  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
+- * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
++ * | OFFSET (14->63) | TYPE (9-13)  |0|X|X|X| X| X|X|X|0| <- swp entry
+  *
+  * G (8) is aliased and used as a PROT_NONE indicator for
+  * !present ptes.  We need to start storing swap entries above
+@@ -179,7 +179,7 @@ static inline int pgd_large(pgd_t pgd) {
+ #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
+ #define SWP_TYPE_BITS 5
+ /* Place the offset above the type: */
+-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
++#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
+ 
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+ 
diff --git a/queue-4.4/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch b/queue-4.4/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch

new file mode 100644 (file)

index 0000000..7359032
--- /dev/null
+++ b/queue-4.4/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch
@@ -0,0 +1,71 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:38 -0700
+Subject: x86/mm/kmmio: Make the tracer robust against L1TF
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 1063711b57393c1999248cccb57bebfaf16739e7 upstream
+
+The mmio tracer sets io mapping PTEs and PMDs to non present when enabled
+without inverting the address bits, which makes the PTE entry vulnerable
+for L1TF.
+
+Make it use the right low level macros to actually invert the address bits
+to protect against L1TF.
+
+In principle this could be avoided because MMIO tracing is not likely to be
+enabled on production machines, but the fix is straigt forward and for
+consistency sake it's better to get rid of the open coded PTE manipulation.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kmmio.c |   25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/mm/kmmio.c
++++ b/arch/x86/mm/kmmio.c
+@@ -125,24 +125,29 @@ static struct kmmio_fault_page *get_kmmi
+ 
+ static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
+ {
++      pmd_t new_pmd;
+       pmdval_t v = pmd_val(*pmd);
+       if (clear) {
+-              *old = v & _PAGE_PRESENT;
+-              v &= ~_PAGE_PRESENT;
+-      } else  /* presume this has been called with clear==true previously */
+-              v |= *old;
+-      set_pmd(pmd, __pmd(v));
++              *old = v;
++              new_pmd = pmd_mknotpresent(*pmd);
++      } else {
++              /* Presume this has been called with clear==true previously */
++              new_pmd = __pmd(*old);
++      }
++      set_pmd(pmd, new_pmd);
+ }
+ 
+ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
+ {
+       pteval_t v = pte_val(*pte);
+       if (clear) {
+-              *old = v & _PAGE_PRESENT;
+-              v &= ~_PAGE_PRESENT;
+-      } else  /* presume this has been called with clear==true previously */
+-              v |= *old;
+-      set_pte_atomic(pte, __pte(v));
++              *old = v;
++              /* Nothing should care about address */
++              pte_clear(&init_mm, 0, pte);
++      } else {
++              /* Presume this has been called with clear==true previously */
++              set_pte_atomic(pte, __pte(*old));
++      }
+ }
+ 
+ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
diff --git a/queue-4.4/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch b/queue-4.4/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch

new file mode 100644 (file)

index 0000000..602033e
--- /dev/null
+++ b/queue-4.4/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch
@@ -0,0 +1,100 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Thu, 7 Jul 2016 17:19:11 -0700
+Subject: x86/mm: Move swap offset/type up in PTE to work around erratum
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 00839ee3b299303c6a5e26a0a2485427a3afcbbf upstream
+
+This erratum can result in Accessed/Dirty getting set by the hardware
+when we do not expect them to be (on !Present PTEs).
+
+Instead of trying to fix them up after this happens, we just
+allow the bits to get set and try to ignore them.  We do this by
+shifting the layout of the bits we use for swap offset/type in
+our 64-bit PTEs.
+
+It looks like this:
+
+ bitnrs: |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0|
+ names:  |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P|
+ before: |         OFFSET (9-63)          |0|X|X| TYPE(1-5) |0|
+  after: | OFFSET (14-63)  |  TYPE (9-13) |0|X|X|X| X| X|X|X|0|
+
+Note that D was already a don't care (X) even before.  We just
+move TYPE up and turn its old spot (which could be hit by the
+A bit) into all don't cares.
+
+We take 5 bits away from the offset, but that still leaves us
+with 50 bits which lets us index into a 62-bit swapfile (4 EiB).
+I think that's probably fine for the moment.  We could
+theoretically reclaim 5 of the bits (1, 2, 3, 4, 7) but it
+doesn't gain us anything.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave@sr71.net>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: dave.hansen@intel.com
+Cc: linux-mm@kvack.org
+Cc: mhocko@suse.com
+Link: http://lkml.kernel.org/r/20160708001911.9A3FD2B6@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable_64.h |   26 ++++++++++++++++++++------
+ 1 file changed, 20 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -163,18 +163,32 @@ static inline int pgd_large(pgd_t pgd) {
+ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
+ #define pte_unmap(pte) ((void)(pte))/* NOP */
+ 
+-/* Encode and de-code a swap entry */
++/*
++ * Encode and de-code a swap entry
++ *
++ * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
++ * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
++ * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
++ *
++ * G (8) is aliased and used as a PROT_NONE indicator for
++ * !present ptes.  We need to start storing swap entries above
++ * there.  We also need to avoid using A and D because of an
++ * erratum where they can be incorrectly set by hardware on
++ * non-present PTEs.
++ */
++#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
+ #define SWP_TYPE_BITS 5
+-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
++/* Place the offset above the type: */
++#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
+ 
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+ 
+-#define __swp_type(x)                 (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
++#define __swp_type(x)                 (((x).val >> (SWP_TYPE_FIRST_BIT)) \
+                                        & ((1U << SWP_TYPE_BITS) - 1))
+-#define __swp_offset(x)                       ((x).val >> SWP_OFFSET_SHIFT)
++#define __swp_offset(x)                       ((x).val >> SWP_OFFSET_FIRST_BIT)
+ #define __swp_entry(type, offset)     ((swp_entry_t) { \
+-                                       ((type) << (_PAGE_BIT_PRESENT + 1)) \
+-                                       | ((offset) << SWP_OFFSET_SHIFT) })
++                                       ((type) << (SWP_TYPE_FIRST_BIT)) \
++                                       | ((offset) << SWP_OFFSET_FIRST_BIT) })
+ #define __pte_to_swp_entry(pte)               ((swp_entry_t) { pte_val((pte)) })
+ #define __swp_entry_to_pte(x)         ((pte_t) { .pte = (x).val })
+ 
diff --git a/queue-4.4/x86-mm-pat-make-set_memory_np-l1tf-safe.patch b/queue-4.4/x86-mm-pat-make-set_memory_np-l1tf-safe.patch

new file mode 100644 (file)

index 0000000..5ea1655
--- /dev/null
+++ b/queue-4.4/x86-mm-pat-make-set_memory_np-l1tf-safe.patch
@@ -0,0 +1,96 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:39 -0700
+Subject: x86/mm/pat: Make set_memory_np() L1TF safe
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream
+
+set_memory_np() is used to mark kernel mappings not present, but it has
+it's own open coded mechanism which does not have the L1TF protection of
+inverting the address bits.
+
+Replace the open coded PTE manipulation with the L1TF protecting low level
+PTE routines.
+
+Passes the CPA self test.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[ dwmw2: Pull in pud_mkhuge() from commit a00cc7d9dd, and pfn_pud() ]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+[groeck: port to 4.4]
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable.h |   27 +++++++++++++++++++++++++++
+ arch/x86/mm/pageattr.c         |    8 ++++----
+ 2 files changed, 31 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -378,12 +378,39 @@ static inline pmd_t pfn_pmd(unsigned lon
+       return __pmd(pfn | massage_pgprot(pgprot));
+ }
+ 
++static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
++{
++      phys_addr_t pfn = page_nr << PAGE_SHIFT;
++      pfn ^= protnone_mask(pgprot_val(pgprot));
++      pfn &= PHYSICAL_PUD_PAGE_MASK;
++      return __pud(pfn | massage_pgprot(pgprot));
++}
++
+ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+ {
+       return pfn_pmd(pmd_pfn(pmd),
+                      __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+ }
+ 
++static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
++{
++      pudval_t v = native_pud_val(pud);
++
++      return __pud(v | set);
++}
++
++static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
++{
++      pudval_t v = native_pud_val(pud);
++
++      return __pud(v & ~clear);
++}
++
++static inline pud_t pud_mkhuge(pud_t pud)
++{
++      return pud_set_flags(pud, _PAGE_PSE);
++}
++
+ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
+ 
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -1006,8 +1006,8 @@ static int populate_pmd(struct cpa_data
+ 
+               pmd = pmd_offset(pud, start);
+ 
+-              set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE |
+-                                 massage_pgprot(pmd_pgprot)));
++              set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn,
++                                      canon_pgprot(pmd_pgprot))));
+ 
+               start     += PMD_SIZE;
+               cpa->pfn  += PMD_SIZE;
+@@ -1079,8 +1079,8 @@ static int populate_pud(struct cpa_data
+        * Map everything starting from the Gb boundary, possibly with 1G pages
+        */
+       while (end - start >= PUD_SIZE) {
+-              set_pud(pud, __pud(cpa->pfn | _PAGE_PSE |
+-                                 massage_pgprot(pud_pgprot)));
++              set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
++                                 canon_pgprot(pud_pgprot))));
+ 
+               start     += PUD_SIZE;
+               cpa->pfn  += PUD_SIZE;
diff --git a/queue-4.4/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch b/queue-4.4/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch

new file mode 100644 (file)

index 0000000..efdb970
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch
@@ -0,0 +1,232 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:26 -0700
+Subject: x86/speculation/l1tf: Add sysfs reporting for l1tf
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 17dbca119312b4e8173d4e25ff64262119fcef38 upstream
+
+L1TF core kernel workarounds are cheap and normally always enabled, However
+they still should be reported in sysfs if the system is vulnerable or
+mitigated. Add the necessary CPU feature/bug bits.
+
+- Extend the existing checks for Meltdowns to determine if the system is
+  vulnerable. All CPUs which are not vulnerable to Meltdown are also not
+  vulnerable to L1TF
+
+- Check for 32bit non PAE and emit a warning as there is no practical way
+  for mitigation due to the limited physical address bits
+
+- If the system has more than MAX_PA/2 physical memory the invert page
+  workarounds don't protect the system against the L1TF attack anymore,
+  because an inverted physical address will also point to valid
+  memory. Print a warning in this case and report that the system is
+  vulnerable.
+
+Add a function which returns the PFN limit for the L1TF mitigation, which
+will be used in follow up patches for sanity and range checks.
+
+[ tglx: Renamed the CPU feature bit to L1TF_PTEINV ]
+[ dwmw2: Backport to 4.9 (cpufeatures.h, E820) ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    3 +-
+ arch/x86/include/asm/processor.h   |    5 ++++
+ arch/x86/kernel/cpu/bugs.c         |   40 +++++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/common.c       |   20 ++++++++++++++++++
+ drivers/base/cpu.c                 |    8 +++++++
+ include/linux/cpu.h                |    2 +
+ 6 files changed, 77 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -214,7 +214,7 @@
+ #define X86_FEATURE_IBPB      ( 7*32+26) /* Indirect Branch Prediction Barrier */
+ #define X86_FEATURE_STIBP     ( 7*32+27) /* Single Thread Indirect Branch Predictors */
+ #define X86_FEATURE_ZEN               ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+-
++#define X86_FEATURE_L1TF_PTEINV       ( 7*32+29) /* "" L1TF workaround PTE inversion */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+@@ -331,5 +331,6 @@
+ #define X86_BUG_SPECTRE_V1    X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+ #define X86_BUG_SPECTRE_V2    X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
++#define X86_BUG_L1TF          X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
+ 
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -172,6 +172,11 @@ extern const struct seq_operations cpuin
+ 
+ extern void cpu_detect(struct cpuinfo_x86 *c);
+ 
++static inline unsigned long l1tf_pfn_limit(void)
++{
++      return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
++}
++
+ extern void early_cpu_init(void);
+ extern void identify_boot_cpu(void);
+ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -26,9 +26,11 @@
+ #include <asm/pgtable.h>
+ #include <asm/cacheflush.h>
+ #include <asm/intel-family.h>
++#include <asm/e820.h>
+ 
+ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
++static void __init l1tf_select_mitigation(void);
+ 
+ /*
+  * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+@@ -80,6 +82,8 @@ void __init check_bugs(void)
+        */
+       ssb_select_mitigation();
+ 
++      l1tf_select_mitigation();
++
+ #ifdef CONFIG_X86_32
+       /*
+        * Check whether we are able to run this kernel safely on SMP.
+@@ -203,6 +207,32 @@ static void x86_amd_ssb_disable(void)
+               wrmsrl(MSR_AMD64_LS_CFG, msrval);
+ }
+ 
++static void __init l1tf_select_mitigation(void)
++{
++      u64 half_pa;
++
++      if (!boot_cpu_has_bug(X86_BUG_L1TF))
++              return;
++
++#if CONFIG_PGTABLE_LEVELS == 2
++      pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
++      return;
++#endif
++
++      /*
++       * This is extremely unlikely to happen because almost all
++       * systems have far more MAX_PA/2 than RAM can be fit into
++       * DIMM slots.
++       */
++      half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
++      if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
++              pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
++              return;
++      }
++
++      setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
++}
++
+ #ifdef RETPOLINE
+ static bool spectre_v2_bad_module;
+ 
+@@ -655,6 +685,11 @@ static ssize_t cpu_show_common(struct de
+       case X86_BUG_SPEC_STORE_BYPASS:
+               return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+ 
++      case X86_BUG_L1TF:
++              if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
++                      return sprintf(buf, "Mitigation: Page Table Inversion\n");
++              break;
++
+       default:
+               break;
+       }
+@@ -681,4 +716,9 @@ ssize_t cpu_show_spec_store_bypass(struc
+ {
+       return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
+ }
++
++ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
++{
++      return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
++}
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -880,6 +880,21 @@ static const __initconst struct x86_cpu_
+       {}
+ };
+ 
++static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
++      /* in addition to cpu_no_speculation */
++      { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_SILVERMONT1     },
++      { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_SILVERMONT2     },
++      { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_AIRMONT         },
++      { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_MERRIFIELD      },
++      { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_MOOREFIELD      },
++      { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_GOLDMONT        },
++      { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_DENVERTON       },
++      { X86_VENDOR_INTEL,     6,      INTEL_FAM6_ATOM_GEMINI_LAKE     },
++      { X86_VENDOR_INTEL,     6,      INTEL_FAM6_XEON_PHI_KNL         },
++      { X86_VENDOR_INTEL,     6,      INTEL_FAM6_XEON_PHI_KNM         },
++      {}
++};
++
+ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+ {
+       u64 ia32_cap = 0;
+@@ -905,6 +920,11 @@ static void __init cpu_set_bug_bits(stru
+               return;
+ 
+       setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
++
++      if (x86_match_cpu(cpu_no_l1tf))
++              return;
++
++      setup_force_cpu_bug(X86_BUG_L1TF);
+ }
+ 
+ /*
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -524,16 +524,24 @@ ssize_t __weak cpu_show_spec_store_bypas
+       return sprintf(buf, "Not affected\n");
+ }
+ 
++ssize_t __weak cpu_show_l1tf(struct device *dev,
++                           struct device_attribute *attr, char *buf)
++{
++      return sprintf(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+ static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
++static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
+ 
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+       &dev_attr_meltdown.attr,
+       &dev_attr_spectre_v1.attr,
+       &dev_attr_spectre_v2.attr,
+       &dev_attr_spec_store_bypass.attr,
++      &dev_attr_l1tf.attr,
+       NULL
+ };
+ 
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -48,6 +48,8 @@ extern ssize_t cpu_show_spectre_v2(struc
+                                  struct device_attribute *attr, char *buf);
+ extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
+                                         struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_l1tf(struct device *dev,
++                           struct device_attribute *attr, char *buf);
+ 
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/queue-4.4/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch b/queue-4.4/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch

new file mode 100644 (file)

index 0000000..ed7fbbf
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch
@@ -0,0 +1,106 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 13 Jun 2018 15:48:22 -0700
+Subject: x86/speculation/l1tf: Change order of offset/type in swap entry
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit bcd11afa7adad8d720e7ba5ef58bdcd9775cf45f upstream
+
+If pages are swapped out, the swap entry is stored in the corresponding
+PTE, which has the Present bit cleared. CPUs vulnerable to L1TF speculate
+on PTE entries which have the present bit set and would treat the swap
+entry as phsyical address (PFN). To mitigate that the upper bits of the PTE
+must be set so the PTE points to non existent memory.
+
+The swap entry stores the type and the offset of a swapped out page in the
+PTE. type is stored in bit 9-13 and offset in bit 14-63. The hardware
+ignores the bits beyond the phsyical address space limit, so to make the
+mitigation effective its required to start 'offset' at the lowest possible
+bit so that even large swap offsets do not reach into the physical address
+space limit bits.
+
+Move offset to bit 9-58 and type to bit 59-63 which are the bits that
+hardware generally doesn't care about.
+
+That, in turn, means that if you on desktop chip with only 40 bits of
+physical addressing, now that the offset starts at bit 9, there needs to be
+30 bits of offset actually *in use* until bit 39 ends up being set, which
+means when inverted it will again point into existing memory.
+
+So that's 4 terabyte of swap space (because the offset is counted in pages,
+so 30 bits of offset is 42 bits of actual coverage). With bigger physical
+addressing, that obviously grows further, until the limit of the offset is
+hit (at 50 bits of offset - 62 bits of actual swap file coverage).
+
+This is a preparatory change for the actual swap entry inversion to protect
+against L1TF.
+
+[ AK: Updated description and minor tweaks. Split into two parts ]
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable_64.h |   31 ++++++++++++++++++++-----------
+ 1 file changed, 20 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) {
+  *
+  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
+  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
+- * | OFFSET (14->63) | TYPE (9-13)  |0|0|X|X| X| X|X|SD|0| <- swp entry
++ * | TYPE (59-63) |  OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
+  *
+  * G (8) is aliased and used as a PROT_NONE indicator for
+  * !present ptes.  We need to start storing swap entries above
+@@ -182,19 +182,28 @@ static inline int pgd_large(pgd_t pgd) {
+  * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
+  * but also L and G.
+  */
+-#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
+-#define SWP_TYPE_BITS 5
+-/* Place the offset above the type: */
+-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
++#define SWP_TYPE_BITS         5
++
++#define SWP_OFFSET_FIRST_BIT  (_PAGE_BIT_PROTNONE + 1)
++
++/* We always extract/encode the offset by shifting it all the way up, and then down again */
++#define SWP_OFFSET_SHIFT      (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS)
+ 
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+ 
+-#define __swp_type(x)                 (((x).val >> (SWP_TYPE_FIRST_BIT)) \
+-                                       & ((1U << SWP_TYPE_BITS) - 1))
+-#define __swp_offset(x)                       ((x).val >> SWP_OFFSET_FIRST_BIT)
+-#define __swp_entry(type, offset)     ((swp_entry_t) { \
+-                                       ((type) << (SWP_TYPE_FIRST_BIT)) \
+-                                       | ((offset) << SWP_OFFSET_FIRST_BIT) })
++/* Extract the high bits for type */
++#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
++
++/* Shift up (to get rid of type), then down to get value */
++#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
++
++/*
++ * Shift the offset up "too far" by TYPE bits, then down again
++ */
++#define __swp_entry(type, offset) ((swp_entry_t) { \
++      ((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
++      | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
++
+ #define __pte_to_swp_entry(pte)               ((swp_entry_t) { pte_val((pte)) })
+ #define __swp_entry_to_pte(x)         ((pte_t) { .pte = (x).val })
+ 
diff --git a/queue-4.4/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none-mappings.patch b/queue-4.4/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none-mappings.patch

new file mode 100644 (file)

index 0000000..c652362
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none-mappings.patch
@@ -0,0 +1,275 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:27 -0700
+Subject: x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 42e4089c7890725fcd329999252dc489b72f2921 upstream
+
+For L1TF PROT_NONE mappings are protected by inverting the PFN in the page
+table entry. This sets the high bits in the CPU's address space, thus
+making sure to point to not point an unmapped entry to valid cached memory.
+
+Some server system BIOSes put the MMIO mappings high up in the physical
+address space. If such an high mapping was mapped to unprivileged users
+they could attack low memory by setting such a mapping to PROT_NONE. This
+could happen through a special device driver which is not access
+protected. Normal /dev/mem is of course access protected.
+
+To avoid this forbid PROT_NONE mappings or mprotect for high MMIO mappings.
+
+Valid page mappings are allowed because the system is then unsafe anyways.
+
+It's not expected that users commonly use PROT_NONE on MMIO. But to
+minimize any impact this is only enforced if the mapping actually refers to
+a high MMIO address (defined as the MAX_PA-1 bit being set), and also skip
+the check for root.
+
+For mmaps this is straight forward and can be handled in vm_insert_pfn and
+in remap_pfn_range().
+
+For mprotect it's a bit trickier. At the point where the actual PTEs are
+accessed a lot of state has been changed and it would be difficult to undo
+on an error. Since this is a uncommon case use a separate early page talk
+walk pass for MMIO PROT_NONE mappings that checks for this condition
+early. For non MMIO and non PROT_NONE there are no changes.
+
+[dwmw2: Backport to 4.9]
+[groeck: Backport to 4.4]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable.h |    8 ++++++
+ arch/x86/mm/mmap.c             |   21 +++++++++++++++++
+ include/asm-generic/pgtable.h  |   12 ++++++++++
+ mm/memory.c                    |   29 ++++++++++++++++++------
+ mm/mprotect.c                  |   49 +++++++++++++++++++++++++++++++++++++++++
+ 5 files changed, 112 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -942,6 +942,14 @@ static inline pte_t pte_swp_clear_soft_d
+ }
+ #endif
+ 
++#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
++extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);
++
++static inline bool arch_has_pfn_modify_check(void)
++{
++      return boot_cpu_has_bug(X86_BUG_L1TF);
++}
++
+ #include <asm-generic/pgtable.h>
+ #endif        /* __ASSEMBLY__ */
+ 
+--- a/arch/x86/mm/mmap.c
++++ b/arch/x86/mm/mmap.c
+@@ -121,3 +121,24 @@ const char *arch_vma_name(struct vm_area
+               return "[mpx]";
+       return NULL;
+ }
++
++/*
++ * Only allow root to set high MMIO mappings to PROT_NONE.
++ * This prevents an unpriv. user to set them to PROT_NONE and invert
++ * them, then pointing to valid memory for L1TF speculation.
++ *
++ * Note: for locked down kernels may want to disable the root override.
++ */
++bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
++{
++      if (!boot_cpu_has_bug(X86_BUG_L1TF))
++              return true;
++      if (!__pte_needs_invert(pgprot_val(prot)))
++              return true;
++      /* If it's real memory always allow */
++      if (pfn_valid(pfn))
++              return true;
++      if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
++              return false;
++      return true;
++}
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -805,4 +805,16 @@ static inline int pmd_free_pte_page(pmd_
+ #define io_remap_pfn_range remap_pfn_range
+ #endif
+ 
++#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
++static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
++{
++      return true;
++}
++
++static inline bool arch_has_pfn_modify_check(void)
++{
++      return false;
++}
++#endif
++
+ #endif /* _ASM_GENERIC_PGTABLE_H */
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1645,6 +1645,9 @@ int vm_insert_pfn_prot(struct vm_area_st
+       if (track_pfn_insert(vma, &pgprot, pfn))
+               return -EINVAL;
+ 
++      if (!pfn_modify_allowed(pfn, pgprot))
++              return -EACCES;
++
+       ret = insert_pfn(vma, addr, pfn, pgprot);
+ 
+       return ret;
+@@ -1663,6 +1666,9 @@ int vm_insert_mixed(struct vm_area_struc
+       if (track_pfn_insert(vma, &pgprot, pfn))
+               return -EINVAL;
+ 
++      if (!pfn_modify_allowed(pfn, pgprot))
++              return -EACCES;
++
+       /*
+        * If we don't have pte special, then we have to use the pfn_valid()
+        * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
+@@ -1691,6 +1697,7 @@ static int remap_pte_range(struct mm_str
+ {
+       pte_t *pte;
+       spinlock_t *ptl;
++      int err = 0;
+ 
+       pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+       if (!pte)
+@@ -1698,12 +1705,16 @@ static int remap_pte_range(struct mm_str
+       arch_enter_lazy_mmu_mode();
+       do {
+               BUG_ON(!pte_none(*pte));
++              if (!pfn_modify_allowed(pfn, prot)) {
++                      err = -EACCES;
++                      break;
++              }
+               set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
+               pfn++;
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       arch_leave_lazy_mmu_mode();
+       pte_unmap_unlock(pte - 1, ptl);
+-      return 0;
++      return err;
+ }
+ 
+ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+@@ -1712,6 +1723,7 @@ static inline int remap_pmd_range(struct
+ {
+       pmd_t *pmd;
+       unsigned long next;
++      int err;
+ 
+       pfn -= addr >> PAGE_SHIFT;
+       pmd = pmd_alloc(mm, pud, addr);
+@@ -1720,9 +1732,10 @@ static inline int remap_pmd_range(struct
+       VM_BUG_ON(pmd_trans_huge(*pmd));
+       do {
+               next = pmd_addr_end(addr, end);
+-              if (remap_pte_range(mm, pmd, addr, next,
+-                              pfn + (addr >> PAGE_SHIFT), prot))
+-                      return -ENOMEM;
++              err = remap_pte_range(mm, pmd, addr, next,
++                              pfn + (addr >> PAGE_SHIFT), prot);
++              if (err)
++                      return err;
+       } while (pmd++, addr = next, addr != end);
+       return 0;
+ }
+@@ -1733,6 +1746,7 @@ static inline int remap_pud_range(struct
+ {
+       pud_t *pud;
+       unsigned long next;
++      int err;
+ 
+       pfn -= addr >> PAGE_SHIFT;
+       pud = pud_alloc(mm, pgd, addr);
+@@ -1740,9 +1754,10 @@ static inline int remap_pud_range(struct
+               return -ENOMEM;
+       do {
+               next = pud_addr_end(addr, end);
+-              if (remap_pmd_range(mm, pud, addr, next,
+-                              pfn + (addr >> PAGE_SHIFT), prot))
+-                      return -ENOMEM;
++              err = remap_pmd_range(mm, pud, addr, next,
++                              pfn + (addr >> PAGE_SHIFT), prot);
++              if (err)
++                      return err;
+       } while (pud++, addr = next, addr != end);
+       return 0;
+ }
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -255,6 +255,42 @@ unsigned long change_protection(struct v
+       return pages;
+ }
+ 
++static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
++                             unsigned long next, struct mm_walk *walk)
++{
++      return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
++              0 : -EACCES;
++}
++
++static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
++                                 unsigned long addr, unsigned long next,
++                                 struct mm_walk *walk)
++{
++      return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
++              0 : -EACCES;
++}
++
++static int prot_none_test(unsigned long addr, unsigned long next,
++                        struct mm_walk *walk)
++{
++      return 0;
++}
++
++static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
++                         unsigned long end, unsigned long newflags)
++{
++      pgprot_t new_pgprot = vm_get_page_prot(newflags);
++      struct mm_walk prot_none_walk = {
++              .pte_entry = prot_none_pte_entry,
++              .hugetlb_entry = prot_none_hugetlb_entry,
++              .test_walk = prot_none_test,
++              .mm = current->mm,
++              .private = &new_pgprot,
++      };
++
++      return walk_page_range(start, end, &prot_none_walk);
++}
++
+ int
+ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
+       unsigned long start, unsigned long end, unsigned long newflags)
+@@ -273,6 +309,19 @@ mprotect_fixup(struct vm_area_struct *vm
+       }
+ 
+       /*
++       * Do PROT_NONE PFN permission checks here when we can still
++       * bail out without undoing a lot of state. This is a rather
++       * uncommon case, so doesn't need to be very optimized.
++       */
++      if (arch_has_pfn_modify_check() &&
++          (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
++          (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
++              error = prot_none_walk(vma, start, end, newflags);
++              if (error)
++                      return error;
++      }
++
++      /*
+        * If we make a private mapping writable we increase our commit;
+        * but (without finer accounting) cannot reduce our commit if we
+        * make it unwritable again. hugetlb mapping were accounted for
diff --git a/queue-4.4/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch b/queue-4.4/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch

new file mode 100644 (file)

index 0000000..9fa8cd4
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch
@@ -0,0 +1,47 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 21 Jun 2018 12:36:29 +0200
+Subject: x86/speculation/l1tf: Extend 64bit swap file size limit
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 1a7ed1ba4bba6c075d5ad61bb75e3fbc870840d6 upstream
+
+The previous patch has limited swap file size so that large offsets cannot
+clear bits above MAX_PA/2 in the pte and interfere with L1TF mitigation.
+
+It assumed that offsets are encoded starting with bit 12, same as pfn. But
+on x86_64, offsets are encoded starting with bit 9.
+
+Thus the limit can be raised by 3 bits. That means 16TB with 42bit MAX_PA
+and 256TB with 46bit MAX_PA.
+
+Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/init.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -778,7 +778,15 @@ unsigned long max_swapfile_size(void)
+ 
+       if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+               /* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+-              pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
++              unsigned long l1tf_limit = l1tf_pfn_limit() + 1;
++              /*
++               * We encode swap offsets also with 3 bits below those for pfn
++               * which makes the usable limit higher.
++               */
++#ifdef CONFIG_X86_64
++              l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
++#endif
++              pages = min_t(unsigned long, l1tf_limit, pages);
+       }
+       return pages;
+ }
diff --git a/queue-4.4/x86-speculation-l1tf-fix-up-cpu-feature-flags.patch b/queue-4.4/x86-speculation-l1tf-fix-up-cpu-feature-flags.patch

new file mode 100644 (file)

index 0000000..575b514
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-fix-up-cpu-feature-flags.patch
@@ -0,0 +1,37 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Guenter Roeck <linux@roeck-us.net>
+Date: Mon, 13 Aug 2018 10:15:16 -0700
+Subject: x86/speculation/l1tf: Fix up CPU feature flags
+
+From: Guenter Roeck <linux@roeck-us.net>
+
+In linux-4.4.y, the definition of X86_FEATURE_RETPOLINE and
+X86_FEATURE_RETPOLINE_AMD is different from the upstream
+definition. Result is an overlap with the newly introduced
+X86_FEATURE_L1TF_PTEINV. Update RETPOLINE definitions to match
+upstream definitions to improve alignment with upstream code.
+
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -193,12 +193,12 @@
+ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ 
++#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
++
+ #define X86_FEATURE_INTEL_PT  ( 7*32+15) /* Intel Processor Trace */
+ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
+ 
+-#define X86_FEATURE_RETPOLINE ( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+-
+ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+ #define X86_FEATURE_SSBD      ( 7*32+17) /* Speculative Store Bypass Disable */
+ 
diff --git a/queue-4.4/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch b/queue-4.4/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch

new file mode 100644 (file)

index 0000000..ba1aabd
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch
@@ -0,0 +1,73 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Michal Hocko <mhocko@suse.cz>
+Date: Wed, 27 Jun 2018 17:46:50 +0200
+Subject: x86/speculation/l1tf: Fix up pte->pfn conversion for PAE
+
+From: Michal Hocko <mhocko@suse.cz>
+
+commit e14d7dfb41f5807a0c1c26a13f2b8ef16af24935 upstream
+
+Jan has noticed that pte_pfn and co. resp. pfn_pte are incorrect for
+CONFIG_PAE because phys_addr_t is wider than unsigned long and so the
+pte_val reps. shift left would get truncated. Fix this up by using proper
+types.
+
+[dwmw2: Backport to 4.9]
+
+Fixes: 6b28baca9b1f ("x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation")
+Reported-by: Jan Beulich <JBeulich@suse.com>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable.h |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -154,21 +154,21 @@ static inline u64 protnone_mask(u64 val)
+ 
+ static inline unsigned long pte_pfn(pte_t pte)
+ {
+-      unsigned long pfn = pte_val(pte);
++      phys_addr_t pfn = pte_val(pte);
+       pfn ^= protnone_mask(pfn);
+       return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+ 
+ static inline unsigned long pmd_pfn(pmd_t pmd)
+ {
+-      unsigned long pfn = pmd_val(pmd);
++      phys_addr_t pfn = pmd_val(pmd);
+       pfn ^= protnone_mask(pfn);
+       return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
+ }
+ 
+ static inline unsigned long pud_pfn(pud_t pud)
+ {
+-      unsigned long pfn = pud_val(pud);
++      phys_addr_t pfn = pud_val(pud);
+       pfn ^= protnone_mask(pfn);
+       return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
+ }
+@@ -369,7 +369,7 @@ static inline pgprotval_t massage_pgprot
+ 
+ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+ {
+-      phys_addr_t pfn = page_nr << PAGE_SHIFT;
++      phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+       pfn ^= protnone_mask(pgprot_val(pgprot));
+       pfn &= PTE_PFN_MASK;
+       return __pte(pfn | massage_pgprot(pgprot));
+@@ -377,7 +377,7 @@ static inline pte_t pfn_pte(unsigned lon
+ 
+ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+ {
+-      phys_addr_t pfn = page_nr << PAGE_SHIFT;
++      phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+       pfn ^= protnone_mask(pgprot_val(pgprot));
+       pfn &= PHYSICAL_PMD_PAGE_MASK;
+       return __pmd(pfn | massage_pgprot(pgprot));
diff --git a/queue-4.4/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch b/queue-4.4/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch

new file mode 100644 (file)

index 0000000..0e4f6ca
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch
@@ -0,0 +1,82 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:21 -0700
+Subject: x86/speculation/l1tf: Increase 32bit PAE __PHYSICAL_PAGE_SHIFT
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 50896e180c6aa3a9c61a26ced99e15d602666a4c upstream
+
+L1 Terminal Fault (L1TF) is a speculation related vulnerability. The CPU
+speculates on PTE entries which do not have the PRESENT bit set, if the
+content of the resulting physical address is available in the L1D cache.
+
+The OS side mitigation makes sure that a !PRESENT PTE entry points to a
+physical address outside the actually existing and cachable memory
+space. This is achieved by inverting the upper bits of the PTE. Due to the
+address space limitations this only works for 64bit and 32bit PAE kernels,
+but not for 32bit non PAE.
+
+This mitigation applies to both host and guest kernels, but in case of a
+64bit host (hypervisor) and a 32bit PAE guest, inverting the upper bits of
+the PAE address space (44bit) is not enough if the host has more than 43
+bits of populated memory address space, because the speculation treats the
+PTE content as a physical host address bypassing EPT.
+
+The host (hypervisor) protects itself against the guest by flushing L1D as
+needed, but pages inside the guest are not protected against attacks from
+other processes inside the same guest.
+
+For the guest the inverted PTE mask has to match the host to provide the
+full protection for all pages the host could possibly map into the
+guest. The hosts populated address space is not known to the guest, so the
+mask must cover the possible maximal host address space, i.e. 52 bit.
+
+On 32bit PAE the maximum PTE mask is currently set to 44 bit because that
+is the limit imposed by 32bit unsigned long PFNs in the VMs. This limits
+the mask to be below what the host could possible use for physical pages.
+
+The L1TF PROT_NONE protection code uses the PTE masks to determine which
+bits to invert to make sure the higher bits are set for unmapped entries to
+prevent L1TF speculation attacks against EPT inside guests.
+
+In order to invert all bits that could be used by the host, increase
+__PHYSICAL_PAGE_SHIFT to 52 to match 64bit.
+
+The real limit for a 32bit PAE kernel is still 44 bits because all Linux
+PTEs are created from unsigned long PFNs, so they cannot be higher than 44
+bits on a 32bit kernel. So these extra PFN bits should be never set. The
+only users of this macro are using it to look at PTEs, so it's safe.
+
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/page_32_types.h |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/page_32_types.h
++++ b/arch/x86/include/asm/page_32_types.h
+@@ -27,8 +27,13 @@
+ #define N_EXCEPTION_STACKS 1
+ 
+ #ifdef CONFIG_X86_PAE
+-/* 44=32+12, the limit we can fit into an unsigned long pfn */
+-#define __PHYSICAL_MASK_SHIFT 44
++/*
++ * This is beyond the 44 bit limit imposed by the 32bit long pfns,
++ * but we need the full mask to make sure inverted PROT_NONE
++ * entries have all the host bits set in a guest.
++ * The real limit is still 44 bits.
++ */
++#define __PHYSICAL_MASK_SHIFT 52
+ #define __VIRTUAL_MASK_SHIFT  32
+ 
+ #else  /* !CONFIG_X86_PAE */
diff --git a/queue-4.4/x86-speculation-l1tf-invert-all-not-present-mappings.patch b/queue-4.4/x86-speculation-l1tf-invert-all-not-present-mappings.patch

new file mode 100644 (file)

index 0000000..68af71f
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-invert-all-not-present-mappings.patch
@@ -0,0 +1,36 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:36 -0700
+Subject: x86/speculation/l1tf: Invert all not present mappings
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit f22cc87f6c1f771b57c407555cfefd811cdd9507 upstream
+
+For kernel mappings PAGE_PROTNONE is not necessarily set for a non present
+mapping, but the inversion logic explicitely checks for !PRESENT and
+PROT_NONE.
+
+Remove the PROT_NONE check and make the inversion unconditional for all not
+present mappings.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable-invert.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/pgtable-invert.h
++++ b/arch/x86/include/asm/pgtable-invert.h
+@@ -6,7 +6,7 @@
+ 
+ static inline bool __pte_needs_invert(u64 val)
+ {
+-      return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
++      return !(val & _PAGE_PRESENT);
+ }
+ 
+ /* Get a mask to xor with the page table entry to get the correct pfn. */
diff --git a/queue-4.4/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch b/queue-4.4/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch

new file mode 100644 (file)

index 0000000..c44fcd8
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch
@@ -0,0 +1,138 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:28 -0700
+Subject: x86/speculation/l1tf: Limit swap file size to MAX_PA/2
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 377eeaa8e11fe815b1d07c81c4a0e2843a8c15eb upstream
+
+For the L1TF workaround its necessary to limit the swap file size to below
+MAX_PA/2, so that the higher bits of the swap offset inverted never point
+to valid memory.
+
+Add a mechanism for the architecture to override the swap file size check
+in swapfile.c and add a x86 specific max swapfile check function that
+enforces that limit.
+
+The check is only enabled if the CPU is vulnerable to L1TF.
+
+In VMs with 42bit MAX_PA the typical limit is 2TB now, on a native system
+with 46bit PA it is 32TB. The limit is only per individual swap file, so
+it's always possible to exceed these limits with multiple swap files or
+partitions.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/init.c       |   15 +++++++++++++++
+ include/linux/swapfile.h |    2 ++
+ mm/swapfile.c            |   46 ++++++++++++++++++++++++++++++----------------
+ 3 files changed, 47 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -4,6 +4,8 @@
+ #include <linux/swap.h>
+ #include <linux/memblock.h>
+ #include <linux/bootmem.h>    /* for max_low_pfn */
++#include <linux/swapfile.h>
++#include <linux/swapops.h>
+ 
+ #include <asm/cacheflush.h>
+ #include <asm/e820.h>
+@@ -767,3 +769,16 @@ void update_cache_mode_entry(unsigned en
+       __cachemode2pte_tbl[cache] = __cm_idx2pte(entry);
+       __pte2cachemode_tbl[entry] = cache;
+ }
++
++unsigned long max_swapfile_size(void)
++{
++      unsigned long pages;
++
++      pages = generic_max_swapfile_size();
++
++      if (boot_cpu_has_bug(X86_BUG_L1TF)) {
++              /* Limit the swap file size to MAX_PA/2 for L1TF workaround */
++              pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
++      }
++      return pages;
++}
+--- a/include/linux/swapfile.h
++++ b/include/linux/swapfile.h
+@@ -9,5 +9,7 @@ extern spinlock_t swap_lock;
+ extern struct plist_head swap_active_head;
+ extern struct swap_info_struct *swap_info[];
+ extern int try_to_unuse(unsigned int, bool, unsigned long);
++extern unsigned long generic_max_swapfile_size(void);
++extern unsigned long max_swapfile_size(void);
+ 
+ #endif /* _LINUX_SWAPFILE_H */
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -2206,6 +2206,35 @@ static int claim_swapfile(struct swap_in
+       return 0;
+ }
+ 
++
++/*
++ * Find out how many pages are allowed for a single swap device. There
++ * are two limiting factors:
++ * 1) the number of bits for the swap offset in the swp_entry_t type, and
++ * 2) the number of bits in the swap pte, as defined by the different
++ * architectures.
++ *
++ * In order to find the largest possible bit mask, a swap entry with
++ * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
++ * decoded to a swp_entry_t again, and finally the swap offset is
++ * extracted.
++ *
++ * This will mask all the bits from the initial ~0UL mask that can't
++ * be encoded in either the swp_entry_t or the architecture definition
++ * of a swap pte.
++ */
++unsigned long generic_max_swapfile_size(void)
++{
++      return swp_offset(pte_to_swp_entry(
++                      swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
++}
++
++/* Can be overridden by an architecture for additional checks. */
++__weak unsigned long max_swapfile_size(void)
++{
++      return generic_max_swapfile_size();
++}
++
+ static unsigned long read_swap_header(struct swap_info_struct *p,
+                                       union swap_header *swap_header,
+                                       struct inode *inode)
+@@ -2241,22 +2270,7 @@ static unsigned long read_swap_header(st
+       p->cluster_next = 1;
+       p->cluster_nr = 0;
+ 
+-      /*
+-       * Find out how many pages are allowed for a single swap
+-       * device. There are two limiting factors: 1) the number
+-       * of bits for the swap offset in the swp_entry_t type, and
+-       * 2) the number of bits in the swap pte as defined by the
+-       * different architectures. In order to find the
+-       * largest possible bit mask, a swap entry with swap type 0
+-       * and swap offset ~0UL is created, encoded to a swap pte,
+-       * decoded to a swp_entry_t again, and finally the swap
+-       * offset is extracted. This will mask all the bits from
+-       * the initial ~0UL mask that can't be encoded in either
+-       * the swp_entry_t or the architecture definition of a
+-       * swap pte.
+-       */
+-      maxpages = swp_offset(pte_to_swp_entry(
+-                      swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
++      maxpages = max_swapfile_size();
+       last_page = swap_header->info.last_page;
+       if (!last_page) {
+               pr_warn("Empty swap-file\n");
diff --git a/queue-4.4/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch b/queue-4.4/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch

new file mode 100644 (file)

index 0000000..60e5201
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch
@@ -0,0 +1,57 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:37 -0700
+Subject: x86/speculation/l1tf: Make pmd/pud_mknotpresent() invert
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 0768f91530ff46683e0b372df14fd79fe8d156e5 upstream
+
+Some cases in THP like:
+  - MADV_FREE
+  - mprotect
+  - split
+
+mark the PMD non present for temporarily to prevent races. The window for
+an L1TF attack in these contexts is very small, but it wants to be fixed
+for correctness sake.
+
+Use the proper low level functions for pmd/pud_mknotpresent() to address
+this.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable.h |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -315,11 +315,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pm
+       return pmd_set_flags(pmd, _PAGE_RW);
+ }
+ 
+-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+-{
+-      return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
+-}
+-
+ #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+ static inline int pte_soft_dirty(pte_t pte)
+ {
+@@ -383,6 +378,12 @@ static inline pmd_t pfn_pmd(unsigned lon
+       return __pmd(pfn | massage_pgprot(pgprot));
+ }
+ 
++static inline pmd_t pmd_mknotpresent(pmd_t pmd)
++{
++      return pfn_pmd(pmd_pfn(pmd),
++                     __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
++}
++
+ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
+ 
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
diff --git a/queue-4.4/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch b/queue-4.4/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch

new file mode 100644 (file)

index 0000000..9e7940a
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch
@@ -0,0 +1,44 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:25 -0700
+Subject: x86/speculation/l1tf: Make sure the first page is always reserved
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 10a70416e1f067f6c4efda6ffd8ea96002ac4223 upstream
+
+The L1TF workaround doesn't make any attempt to mitigate speculate accesses
+to the first physical page for zeroed PTEs. Normally it only contains some
+data from the early real mode BIOS.
+
+It's not entirely clear that the first page is reserved in all
+configurations, so add an extra reservation call to make sure it is really
+reserved. In most configurations (e.g.  with the standard reservations)
+it's likely a nop.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/setup.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -851,6 +851,12 @@ void __init setup_arch(char **cmdline_p)
+       memblock_reserve(__pa_symbol(_text),
+                        (unsigned long)__bss_stop - (unsigned long)_text);
+ 
++      /*
++       * Make sure page 0 is always reserved because on systems with
++       * L1TF its contents can be leaked to user processes.
++       */
++      memblock_reserve(0, PAGE_SIZE);
++
+       early_reserve_initrd();
+ 
+       /*
diff --git a/queue-4.4/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch b/queue-4.4/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch

new file mode 100644 (file)

index 0000000..db15d8c
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch
@@ -0,0 +1,91 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Fri, 22 Jun 2018 17:39:33 +0200
+Subject: x86/speculation/l1tf: Protect PAE swap entries against L1TF
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 0d0f6249058834ffe1ceaad0bb31464af66f6e7a upstream
+
+The PAE 3-level paging code currently doesn't mitigate L1TF by flipping the
+offset bits, and uses the high PTE word, thus bits 32-36 for type, 37-63 for
+offset. The lower word is zeroed, thus systems with less than 4GB memory are
+safe. With 4GB to 128GB the swap type selects the memory locations vulnerable
+to L1TF; with even more memory, also the swap offfset influences the address.
+This might be a problem with 32bit PAE guests running on large 64bit hosts.
+
+By continuing to keep the whole swap entry in either high or low 32bit word of
+PTE we would limit the swap size too much. Thus this patch uses the whole PAE
+PTE with the same layout as the 64bit version does. The macros just become a
+bit tricky since they assume the arch-dependent swp_entry_t to be 32bit.
+
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable-3level.h |   35 ++++++++++++++++++++++++++++++++--
+ arch/x86/mm/init.c                    |    2 -
+ 2 files changed, 34 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable-3level.h
++++ b/arch/x86/include/asm/pgtable-3level.h
+@@ -177,12 +177,43 @@ static inline pmd_t native_pmdp_get_and_
+ #endif
+ 
+ /* Encode and de-code a swap entry */
++#define SWP_TYPE_BITS         5
++
++#define SWP_OFFSET_FIRST_BIT  (_PAGE_BIT_PROTNONE + 1)
++
++/* We always extract/encode the offset by shifting it all the way up, and then down again */
++#define SWP_OFFSET_SHIFT      (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
++
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
+ #define __swp_type(x)                 (((x).val) & 0x1f)
+ #define __swp_offset(x)                       ((x).val >> 5)
+ #define __swp_entry(type, offset)     ((swp_entry_t){(type) | (offset) << 5})
+-#define __pte_to_swp_entry(pte)               ((swp_entry_t){ (pte).pte_high })
+-#define __swp_entry_to_pte(x)         ((pte_t){ { .pte_high = (x).val } })
++
++/*
++ * Normally, __swp_entry() converts from arch-independent swp_entry_t to
++ * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result
++ * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the
++ * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to
++ * __swp_entry_to_pte() through the following helper macro based on 64bit
++ * __swp_entry().
++ */
++#define __swp_pteval_entry(type, offset) ((pteval_t) { \
++      (~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
++      | ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) })
++
++#define __swp_entry_to_pte(x) ((pte_t){ .pte = \
++              __swp_pteval_entry(__swp_type(x), __swp_offset(x)) })
++/*
++ * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent
++ * swp_entry_t, but also has to convert it from 64bit to the 32bit
++ * intermediate representation, using the following macros based on 64bit
++ * __swp_type() and __swp_offset().
++ */
++#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS)))
++#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT))
++
++#define __pte_to_swp_entry(pte)       (__swp_entry(__pteval_swp_type(pte), \
++                                           __pteval_swp_offset(pte)))
+ 
+ #include <asm/pgtable-invert.h>
+ 
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -783,7 +783,7 @@ unsigned long max_swapfile_size(void)
+                * We encode swap offsets also with 3 bits below those for pfn
+                * which makes the usable limit higher.
+                */
+-#ifdef CONFIG_X86_64
++#if CONFIG_PGTABLE_LEVELS > 2
+               l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
+ #endif
+               pages = min_t(unsigned long, l1tf_limit, pages);
diff --git a/queue-4.4/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch b/queue-4.4/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch

new file mode 100644 (file)

index 0000000..bf510cc
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch
@@ -0,0 +1,246 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:24 -0700
+Subject: x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 6b28baca9b1f0d4a42b865da7a05b1c81424bd5c upstream
+
+When PTEs are set to PROT_NONE the kernel just clears the Present bit and
+preserves the PFN, which creates attack surface for L1TF speculation
+speculation attacks.
+
+This is important inside guests, because L1TF speculation bypasses physical
+page remapping. While the host has its own migitations preventing leaking
+data from other VMs into the guest, this would still risk leaking the wrong
+page inside the current guest.
+
+This uses the same technique as Linus' swap entry patch: while an entry is
+is in PROTNONE state invert the complete PFN part part of it. This ensures
+that the the highest bit will point to non existing memory.
+
+The invert is done by pte/pmd_modify and pfn/pmd/pud_pte for PROTNONE and
+pte/pmd/pud_pfn undo it.
+
+This assume that no code path touches the PFN part of a PTE directly
+without using these primitives.
+
+This doesn't handle the case that MMIO is on the top of the CPU physical
+memory. If such an MMIO region was exposed by an unpriviledged driver for
+mmap it would be possible to attack some real memory.  However this
+situation is all rather unlikely.
+
+For 32bit non PAE the inversion is not done because there are really not
+enough bits to protect anything.
+
+Q: Why does the guest need to be protected when the HyperVisor already has
+   L1TF mitigations?
+
+A: Here's an example:
+
+   Physical pages 1 2 get mapped into a guest as
+   GPA 1 -> PA 2
+   GPA 2 -> PA 1
+   through EPT.
+
+   The L1TF speculation ignores the EPT remapping.
+
+   Now the guest kernel maps GPA 1 to process A and GPA 2 to process B, and
+   they belong to different users and should be isolated.
+
+   A sets the GPA 1 PA 2 PTE to PROT_NONE to bypass the EPT remapping and
+   gets read access to the underlying physical page. Which in this case
+   points to PA 2, so it can read process B's data, if it happened to be in
+   L1, so isolation inside the guest is broken.
+
+   There's nothing the hypervisor can do about this. This mitigation has to
+   be done in the guest itself.
+
+[ tglx: Massaged changelog ]
+[ dwmw2: backported to 4.9 ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable-2level.h |   17 +++++++++++++++
+ arch/x86/include/asm/pgtable-3level.h |    2 +
+ arch/x86/include/asm/pgtable-invert.h |   32 ++++++++++++++++++++++++++++
+ arch/x86/include/asm/pgtable.h        |   38 ++++++++++++++++++++++++----------
+ arch/x86/include/asm/pgtable_64.h     |    2 +
+ 5 files changed, 80 insertions(+), 11 deletions(-)
+ create mode 100644 arch/x86/include/asm/pgtable-invert.h
+
+--- a/arch/x86/include/asm/pgtable-2level.h
++++ b/arch/x86/include/asm/pgtable-2level.h
+@@ -77,4 +77,21 @@ static inline unsigned long pte_bitop(un
+ #define __pte_to_swp_entry(pte)               ((swp_entry_t) { (pte).pte_low })
+ #define __swp_entry_to_pte(x)         ((pte_t) { .pte = (x).val })
+ 
++/* No inverted PFNs on 2 level page tables */
++
++static inline u64 protnone_mask(u64 val)
++{
++      return 0;
++}
++
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
++{
++      return val;
++}
++
++static inline bool __pte_needs_invert(u64 val)
++{
++      return false;
++}
++
+ #endif /* _ASM_X86_PGTABLE_2LEVEL_H */
+--- a/arch/x86/include/asm/pgtable-3level.h
++++ b/arch/x86/include/asm/pgtable-3level.h
+@@ -184,4 +184,6 @@ static inline pmd_t native_pmdp_get_and_
+ #define __pte_to_swp_entry(pte)               ((swp_entry_t){ (pte).pte_high })
+ #define __swp_entry_to_pte(x)         ((pte_t){ { .pte_high = (x).val } })
+ 
++#include <asm/pgtable-invert.h>
++
+ #endif /* _ASM_X86_PGTABLE_3LEVEL_H */
+--- /dev/null
++++ b/arch/x86/include/asm/pgtable-invert.h
+@@ -0,0 +1,32 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_PGTABLE_INVERT_H
++#define _ASM_PGTABLE_INVERT_H 1
++
++#ifndef __ASSEMBLY__
++
++static inline bool __pte_needs_invert(u64 val)
++{
++      return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
++}
++
++/* Get a mask to xor with the page table entry to get the correct pfn. */
++static inline u64 protnone_mask(u64 val)
++{
++      return __pte_needs_invert(val) ?  ~0ull : 0;
++}
++
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
++{
++      /*
++       * When a PTE transitions from NONE to !NONE or vice-versa
++       * invert the PFN part to stop speculation.
++       * pte_pfn undoes this when needed.
++       */
++      if (__pte_needs_invert(oldval) != __pte_needs_invert(val))
++              val = (val & ~mask) | (~val & mask);
++      return val;
++}
++
++#endif /* __ASSEMBLY__ */
++
++#endif
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -148,19 +148,29 @@ static inline int pte_special(pte_t pte)
+       return pte_flags(pte) & _PAGE_SPECIAL;
+ }
+ 
++/* Entries that were set to PROT_NONE are inverted */
++
++static inline u64 protnone_mask(u64 val);
++
+ static inline unsigned long pte_pfn(pte_t pte)
+ {
+-      return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
++      unsigned long pfn = pte_val(pte);
++      pfn ^= protnone_mask(pfn);
++      return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+ 
+ static inline unsigned long pmd_pfn(pmd_t pmd)
+ {
+-      return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
++      unsigned long pfn = pmd_val(pmd);
++      pfn ^= protnone_mask(pfn);
++      return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
+ }
+ 
+ static inline unsigned long pud_pfn(pud_t pud)
+ {
+-      return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
++      unsigned long pfn = pud_val(pud);
++      pfn ^= protnone_mask(pfn);
++      return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
+ }
+ 
+ #define pte_page(pte) pfn_to_page(pte_pfn(pte))
+@@ -359,19 +369,25 @@ static inline pgprotval_t massage_pgprot
+ 
+ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+ {
+-      return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
+-                   massage_pgprot(pgprot));
++      phys_addr_t pfn = page_nr << PAGE_SHIFT;
++      pfn ^= protnone_mask(pgprot_val(pgprot));
++      pfn &= PTE_PFN_MASK;
++      return __pte(pfn | massage_pgprot(pgprot));
+ }
+ 
+ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+ {
+-      return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
+-                   massage_pgprot(pgprot));
++      phys_addr_t pfn = page_nr << PAGE_SHIFT;
++      pfn ^= protnone_mask(pgprot_val(pgprot));
++      pfn &= PHYSICAL_PMD_PAGE_MASK;
++      return __pmd(pfn | massage_pgprot(pgprot));
+ }
+ 
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
++
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
+-      pteval_t val = pte_val(pte);
++      pteval_t val = pte_val(pte), oldval = val;
+ 
+       /*
+        * Chop off the NX bit (if present), and add the NX portion of
+@@ -379,17 +395,17 @@ static inline pte_t pte_modify(pte_t pte
+        */
+       val &= _PAGE_CHG_MASK;
+       val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK;
+-
++      val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
+       return __pte(val);
+ }
+ 
+ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+ {
+-      pmdval_t val = pmd_val(pmd);
++      pmdval_t val = pmd_val(pmd), oldval = val;
+ 
+       val &= _HPAGE_CHG_MASK;
+       val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+-
++      val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
+       return __pmd(val);
+ }
+ 
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -235,6 +235,8 @@ extern void cleanup_highmap(void);
+ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
+ extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
+ 
++#include <asm/pgtable-invert.h>
++
+ #endif /* !__ASSEMBLY__ */
+ 
+ #endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/queue-4.4/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch b/queue-4.4/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch

new file mode 100644 (file)

index 0000000..76b286b
--- /dev/null
+++ b/queue-4.4/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch
@@ -0,0 +1,85 @@
+From foo@baz Tue Aug 14 17:08:55 CEST 2018
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 13 Jun 2018 15:48:23 -0700
+Subject: x86/speculation/l1tf: Protect swap entries against L1TF
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 2f22b4cd45b67b3496f4aa4c7180a1271c6452f6 upstream
+
+With L1 terminal fault the CPU speculates into unmapped PTEs, and resulting
+side effects allow to read the memory the PTE is pointing too, if its
+values are still in the L1 cache.
+
+For swapped out pages Linux uses unmapped PTEs and stores a swap entry into
+them.
+
+To protect against L1TF it must be ensured that the swap entry is not
+pointing to valid memory, which requires setting higher bits (between bit
+36 and bit 45) that are inside the CPUs physical address space, but outside
+any real memory.
+
+To do this invert the offset to make sure the higher bits are always set,
+as long as the swap file is not too big.
+
+Note there is no workaround for 32bit !PAE, or on systems which have more
+than MAX_PA/2 worth of memory. The later case is very unlikely to happen on
+real systems.
+
+[AK: updated description and minor tweaks by. Split out from the original
+     patch ]
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable_64.h |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) {
+  *
+  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
+  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
+- * | TYPE (59-63) |  OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
++ * | TYPE (59-63) | ~OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
+  *
+  * G (8) is aliased and used as a PROT_NONE indicator for
+  * !present ptes.  We need to start storing swap entries above
+@@ -181,6 +181,9 @@ static inline int pgd_large(pgd_t pgd) {
+  *
+  * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
+  * but also L and G.
++ *
++ * The offset is inverted by a binary not operation to make the high
++ * physical bits set.
+  */
+ #define SWP_TYPE_BITS         5
+ 
+@@ -195,13 +198,15 @@ static inline int pgd_large(pgd_t pgd) {
+ #define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
+ 
+ /* Shift up (to get rid of type), then down to get value */
+-#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
++#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
+ 
+ /*
+  * Shift the offset up "too far" by TYPE bits, then down again
++ * The offset is inverted by a binary not operation to make the high
++ * physical bits set.
+  */
+ #define __swp_entry(type, offset) ((swp_entry_t) { \
+-      ((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
++      (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+       | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
+ 
+ #define __pte_to_swp_entry(pte)               ((swp_entry_t) { pte_val((pte)) })
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 14 Aug 2018 17:08:52 +0000 (19:08 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 14 Aug 2018 17:08:52 +0000 (19:08 +0200)
queue-4.4/mm-add-vm_insert_pfn_prot.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/series		patch \| blob \| blame \| history
queue-4.4/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-cpufeatures-add-detection-of-l1d-cache-flush-support.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-mm-fix-swap-entry-comment-and-macro.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-mm-pat-make-set_memory_np-l1tf-safe.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none-mappings.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-fix-up-cpu-feature-flags.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-invert-all-not-present-mappings.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch	[new file with mode: 0644]	patch \| blob