]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 21 Dec 2017 16:05:27 +0000 (17:05 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 21 Dec 2017 16:05:27 +0000 (17:05 +0100)
added patches:
acpi-apei-replace-ioremap_page_range-with-fixmap.patch
bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch
bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch
drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch
locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch
locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch
perf-x86-enable-free-running-pebs-for-regs_user-intr.patch
selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch
selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch
x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch
x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch
x86-cpufeatures-make-cpu-bugs-sticky.patch
x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch
x86-cpuid-replace-set-clear_bit32.patch
x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch
x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch
x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
x86-entry-64-de-xen-ify-our-nmi-code.patch
x86-entry-64-make-cpu_entry_area.tss-read-only.patch
x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch
x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch
x86-entry-64-pass-sp0-directly-to-load_sp0.patch
x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch
x86-entry-64-remove-the-restore_..._regs-infrastructure.patch
x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch
x86-entry-64-remove-the-sysenter-stack-canary.patch
x86-entry-64-remove-thread_struct-sp0.patch
x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch
x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch
x86-entry-64-shorten-test-instructions.patch
x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch
x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch
x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch
x86-entry-64-stop-initializing-tss.sp0-at-boot.patch
x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch
x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch
x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch
x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch
x86-entry-clean-up-the-sysenter_stack-code.patch
x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch
x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch
x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch
x86-entry-remap-the-tss-into-the-cpu-entry-area.patch
x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch
x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch
x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch
x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch
x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch
x86-mm-define-_page_table-using-_kernpg_table.patch
x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch
x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch
x86-paravirt-dont-patch-flush_tlb_single.patch
x86-paravirt-provide-a-way-to-check-for-hypervisors.patch
x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch
x86-unwinder-handle-stack-overflows-more-gracefully.patch
x86-unwinder-orc-dont-bail-on-stack-overflow.patch
x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch
x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch
x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch
xen-x86-entry-64-add-xen-nmi-trap-entry.patch

66 files changed:
queue-4.14/acpi-apei-replace-ioremap_page_range-with-fixmap.patch [new file with mode: 0644]
queue-4.14/bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch [new file with mode: 0644]
queue-4.14/bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch [new file with mode: 0644]
queue-4.14/drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch [new file with mode: 0644]
queue-4.14/locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch [new file with mode: 0644]
queue-4.14/locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch [new file with mode: 0644]
queue-4.14/perf-x86-enable-free-running-pebs-for-regs_user-intr.patch [new file with mode: 0644]
queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch [new file with mode: 0644]
queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch [new file with mode: 0644]
queue-4.14/series
queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch [new file with mode: 0644]
queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch [new file with mode: 0644]
queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch [new file with mode: 0644]
queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch [new file with mode: 0644]
queue-4.14/x86-cpuid-replace-set-clear_bit32.patch [new file with mode: 0644]
queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch [new file with mode: 0644]
queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch [new file with mode: 0644]
queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch [new file with mode: 0644]
queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-shorten-test-instructions.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch [new file with mode: 0644]
queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch [new file with mode: 0644]
queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch [new file with mode: 0644]
queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch [new file with mode: 0644]
queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch [new file with mode: 0644]
queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch [new file with mode: 0644]
queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch [new file with mode: 0644]
queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch [new file with mode: 0644]
queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch [new file with mode: 0644]
queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch [new file with mode: 0644]
queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch [new file with mode: 0644]
queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch [new file with mode: 0644]
queue-4.14/x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch [new file with mode: 0644]
queue-4.14/x86-mm-define-_page_table-using-_kernpg_table.patch [new file with mode: 0644]
queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch [new file with mode: 0644]
queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch [new file with mode: 0644]
queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch [new file with mode: 0644]
queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch [new file with mode: 0644]
queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch [new file with mode: 0644]
queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch [new file with mode: 0644]
queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch [new file with mode: 0644]
queue-4.14/x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch [new file with mode: 0644]
queue-4.14/x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch [new file with mode: 0644]
queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch [new file with mode: 0644]
queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch [new file with mode: 0644]

diff --git a/queue-4.14/acpi-apei-replace-ioremap_page_range-with-fixmap.patch b/queue-4.14/acpi-apei-replace-ioremap_page_range-with-fixmap.patch
new file mode 100644 (file)
index 0000000..15f5133
--- /dev/null
@@ -0,0 +1,177 @@
+From 4f89fa286f6729312e227e7c2d764e8e7b9d340e Mon Sep 17 00:00:00 2001
+From: James Morse <james.morse@arm.com>
+Date: Mon, 6 Nov 2017 18:44:24 +0000
+Subject: ACPI / APEI: Replace ioremap_page_range() with fixmap
+
+From: James Morse <james.morse@arm.com>
+
+commit 4f89fa286f6729312e227e7c2d764e8e7b9d340e upstream.
+
+Replace ghes_io{re,un}map_pfn_{nmi,irq}()s use of ioremap_page_range()
+with __set_fixmap() as ioremap_page_range() may sleep to allocate a new
+level of page-table, even if its passed an existing final-address to
+use in the mapping.
+
+The GHES driver can only be enabled for architectures that select
+HAVE_ACPI_APEI: Add fixmap entries to both x86 and arm64.
+
+clear_fixmap() does the TLB invalidation in __set_fixmap() for arm64
+and __set_pte_vaddr() for x86. In each case its the same as the
+respective arch_apei_flush_tlb_one().
+
+Reported-by: Fengguang Wu <fengguang.wu@intel.com>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: James Morse <james.morse@arm.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Tested-by: Tyler Baicar <tbaicar@codeaurora.org>
+Tested-by: Toshi Kani <toshi.kani@hpe.com>
+[ For the arm64 bits: ]
+Acked-by: Will Deacon <will.deacon@arm.com>
+[ For the x86 bits: ]
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: All applicable <stable@vger.kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/fixmap.h |    7 ++++++
+ arch/x86/include/asm/fixmap.h   |    6 +++++
+ drivers/acpi/apei/ghes.c        |   44 ++++++++++++----------------------------
+ 3 files changed, 27 insertions(+), 30 deletions(-)
+
+--- a/arch/arm64/include/asm/fixmap.h
++++ b/arch/arm64/include/asm/fixmap.h
+@@ -51,6 +51,13 @@ enum fixed_addresses {
+       FIX_EARLYCON_MEM_BASE,
+       FIX_TEXT_POKE0,
++
++#ifdef CONFIG_ACPI_APEI_GHES
++      /* Used for GHES mapping from assorted contexts */
++      FIX_APEI_GHES_IRQ,
++      FIX_APEI_GHES_NMI,
++#endif /* CONFIG_ACPI_APEI_GHES */
++
+       __end_of_permanent_fixed_addresses,
+       /*
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -104,6 +104,12 @@ enum fixed_addresses {
+       FIX_GDT_REMAP_BEGIN,
+       FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
++#ifdef CONFIG_ACPI_APEI_GHES
++      /* Used for GHES mapping from assorted contexts */
++      FIX_APEI_GHES_IRQ,
++      FIX_APEI_GHES_NMI,
++#endif
++
+       __end_of_permanent_fixed_addresses,
+       /*
+--- a/drivers/acpi/apei/ghes.c
++++ b/drivers/acpi/apei/ghes.c
+@@ -51,6 +51,7 @@
+ #include <acpi/actbl1.h>
+ #include <acpi/ghes.h>
+ #include <acpi/apei.h>
++#include <asm/fixmap.h>
+ #include <asm/tlbflush.h>
+ #include <ras/ras_event.h>
+@@ -112,7 +113,7 @@ static DEFINE_MUTEX(ghes_list_mutex);
+  * Because the memory area used to transfer hardware error information
+  * from BIOS to Linux can be determined only in NMI, IRQ or timer
+  * handler, but general ioremap can not be used in atomic context, so
+- * a special version of atomic ioremap is implemented for that.
++ * the fixmap is used instead.
+  */
+ /*
+@@ -126,8 +127,8 @@ static DEFINE_MUTEX(ghes_list_mutex);
+ /* virtual memory area for atomic ioremap */
+ static struct vm_struct *ghes_ioremap_area;
+ /*
+- * These 2 spinlock is used to prevent atomic ioremap virtual memory
+- * area from being mapped simultaneously.
++ * These 2 spinlocks are used to prevent the fixmap entries from being used
++ * simultaneously.
+  */
+ static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
+ static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
+@@ -159,53 +160,36 @@ static void ghes_ioremap_exit(void)
+ static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
+ {
+-      unsigned long vaddr;
+       phys_addr_t paddr;
+       pgprot_t prot;
+-      vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
+-
+       paddr = pfn << PAGE_SHIFT;
+       prot = arch_apei_get_mem_attribute(paddr);
+-      ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
++      __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
+-      return (void __iomem *)vaddr;
++      return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
+ }
+ static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
+ {
+-      unsigned long vaddr;
+       phys_addr_t paddr;
+       pgprot_t prot;
+-      vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
+-
+       paddr = pfn << PAGE_SHIFT;
+       prot = arch_apei_get_mem_attribute(paddr);
++      __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
+-      ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
+-
+-      return (void __iomem *)vaddr;
++      return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
+ }
+-static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
++static void ghes_iounmap_nmi(void)
+ {
+-      unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+-      void *base = ghes_ioremap_area->addr;
+-
+-      BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
+-      unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+-      arch_apei_flush_tlb_one(vaddr);
++      clear_fixmap(FIX_APEI_GHES_NMI);
+ }
+-static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
++static void ghes_iounmap_irq(void)
+ {
+-      unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+-      void *base = ghes_ioremap_area->addr;
+-
+-      BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
+-      unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+-      arch_apei_flush_tlb_one(vaddr);
++      clear_fixmap(FIX_APEI_GHES_IRQ);
+ }
+ static int ghes_estatus_pool_init(void)
+@@ -361,10 +345,10 @@ static void ghes_copy_tofrom_phys(void *
+               paddr += trunk;
+               buffer += trunk;
+               if (in_nmi) {
+-                      ghes_iounmap_nmi(vaddr);
++                      ghes_iounmap_nmi();
+                       raw_spin_unlock(&ghes_ioremap_lock_nmi);
+               } else {
+-                      ghes_iounmap_irq(vaddr);
++                      ghes_iounmap_irq();
+                       spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
+               }
+       }
diff --git a/queue-4.14/bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch b/queue-4.14/bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch
new file mode 100644 (file)
index 0000000..aeb6f75
--- /dev/null
@@ -0,0 +1,57 @@
+From 1943dc07b45e347c52c1bfdd4a37e04a86e399aa Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 2 Nov 2017 13:30:03 +0100
+Subject: bitops: Revert cbe96375025e ("bitops: Add clear/set_bit32() to linux/bitops.h")
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 1943dc07b45e347c52c1bfdd4a37e04a86e399aa upstream.
+
+These ops are not endian safe and may break on architectures which have
+aligment requirements.
+
+Reverts: cbe96375025e ("bitops: Add clear/set_bit32() to linux/bitops.h")
+Reported-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/bitops.h |   26 --------------------------
+ 1 file changed, 26 deletions(-)
+
+--- a/include/linux/bitops.h
++++ b/include/linux/bitops.h
+@@ -228,32 +228,6 @@ static inline unsigned long __ffs64(u64
+       return __ffs((unsigned long)word);
+ }
+-/*
+- * clear_bit32 - Clear a bit in memory for u32 array
+- * @nr: Bit to clear
+- * @addr: u32 * address of bitmap
+- *
+- * Same as clear_bit, but avoids needing casts for u32 arrays.
+- */
+-
+-static __always_inline void clear_bit32(long nr, volatile u32 *addr)
+-{
+-      clear_bit(nr, (volatile unsigned long *)addr);
+-}
+-
+-/*
+- * set_bit32 - Set a bit in memory for u32 array
+- * @nr: Bit to clear
+- * @addr: u32 * address of bitmap
+- *
+- * Same as set_bit, but avoids needing casts for u32 arrays.
+- */
+-
+-static __always_inline void set_bit32(long nr, volatile u32 *addr)
+-{
+-      set_bit(nr, (volatile unsigned long *)addr);
+-}
+-
+ #ifdef __KERNEL__
+ #ifndef set_mask_bits
diff --git a/queue-4.14/bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch b/queue-4.14/bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch
new file mode 100644 (file)
index 0000000..beca580
--- /dev/null
@@ -0,0 +1,59 @@
+From ab95477e7cb35557ecfc837687007b646bab9a9f Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 12 Dec 2017 02:25:31 +0100
+Subject: bpf: fix build issues on um due to mising bpf_perf_event.h
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit ab95477e7cb35557ecfc837687007b646bab9a9f upstream.
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    a23f06f06dbe ("bpf: fix build issues on um due to mising bpf_perf_event.h")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+Since c895f6f703ad ("bpf: correct broken uapi for
+BPF_PROG_TYPE_PERF_EVENT program type") um (uml) won't build
+on i386 or x86_64:
+
+  [...]
+    CC      init/main.o
+  In file included from ../include/linux/perf_event.h:18:0,
+                   from ../include/linux/trace_events.h:10,
+                   from ../include/trace/syscall.h:7,
+                   from ../include/linux/syscalls.h:82,
+                   from ../init/main.c:20:
+  ../include/uapi/linux/bpf_perf_event.h:11:32: fatal error:
+  asm/bpf_perf_event.h: No such file or directory #include
+  <asm/bpf_perf_event.h>
+  [...]
+
+Lets add missing bpf_perf_event.h also to um arch. This seems
+to be the only one still missing.
+
+Fixes: c895f6f703ad ("bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type")
+Reported-by: Randy Dunlap <rdunlap@infradead.org>
+Suggested-by: Richard Weinberger <richard@sigma-star.at>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: Randy Dunlap <rdunlap@infradead.org>
+Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+Cc: Richard Weinberger <richard@sigma-star.at>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/um/include/asm/Kbuild |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/um/include/asm/Kbuild
++++ b/arch/um/include/asm/Kbuild
+@@ -1,4 +1,5 @@
+ generic-y += barrier.h
++generic-y += bpf_perf_event.h
+ generic-y += bug.h
+ generic-y += clkdev.h
+ generic-y += current.h
diff --git a/queue-4.14/drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch b/queue-4.14/drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch
new file mode 100644 (file)
index 0000000..d14ee6d
--- /dev/null
@@ -0,0 +1,131 @@
+From 1784f9144b143a1e8b19fe94083b040aa559182b Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@kernel.org>
+Date: Tue, 5 Dec 2017 14:14:47 +0100
+Subject: drivers/misc/intel/pti: Rename the header file to free up the namespace
+
+From: Ingo Molnar <mingo@kernel.org>
+
+commit 1784f9144b143a1e8b19fe94083b040aa559182b upstream.
+
+We'd like to use the 'PTI' acronym for 'Page Table Isolation' - free up the
+namespace by renaming the <linux/pti.h> driver header to <linux/intel-pti.h>.
+
+(Also standardize the header guard name while at it.)
+
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: J Freyensee <james_p_freyensee@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/pti.c        |    2 +-
+ include/linux/intel-pti.h |   43 +++++++++++++++++++++++++++++++++++++++++++
+ include/linux/pti.h       |   43 -------------------------------------------
+ 3 files changed, 44 insertions(+), 44 deletions(-)
+
+--- a/drivers/misc/pti.c
++++ b/drivers/misc/pti.c
+@@ -32,7 +32,7 @@
+ #include <linux/pci.h>
+ #include <linux/mutex.h>
+ #include <linux/miscdevice.h>
+-#include <linux/pti.h>
++#include <linux/intel-pti.h>
+ #include <linux/slab.h>
+ #include <linux/uaccess.h>
+--- /dev/null
++++ b/include/linux/intel-pti.h
+@@ -0,0 +1,43 @@
++/*
++ *  Copyright (C) Intel 2011
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ *
++ * The PTI (Parallel Trace Interface) driver directs trace data routed from
++ * various parts in the system out through the Intel Penwell PTI port and
++ * out of the mobile device for analysis with a debugging tool
++ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
++ * compact JTAG, standard.
++ *
++ * This header file will allow other parts of the OS to use the
++ * interface to write out it's contents for debugging a mobile system.
++ */
++
++#ifndef LINUX_INTEL_PTI_H_
++#define LINUX_INTEL_PTI_H_
++
++/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
++#define PTI_LASTDWORD_DTS     0x30
++
++/* basic structure used as a write address to the PTI HW */
++struct pti_masterchannel {
++      u8 master;
++      u8 channel;
++};
++
++/* the following functions are defined in misc/pti.c */
++void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
++struct pti_masterchannel *pti_request_masterchannel(u8 type,
++                                                  const char *thread_name);
++void pti_release_masterchannel(struct pti_masterchannel *mc);
++
++#endif /* LINUX_INTEL_PTI_H_ */
+--- a/include/linux/pti.h
++++ /dev/null
+@@ -1,43 +0,0 @@
+-/*
+- *  Copyright (C) Intel 2011
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License version 2 as
+- * published by the Free Software Foundation.
+- *
+- * This program is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+- * GNU General Public License for more details.
+- *
+- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- *
+- * The PTI (Parallel Trace Interface) driver directs trace data routed from
+- * various parts in the system out through the Intel Penwell PTI port and
+- * out of the mobile device for analysis with a debugging tool
+- * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
+- * compact JTAG, standard.
+- *
+- * This header file will allow other parts of the OS to use the
+- * interface to write out it's contents for debugging a mobile system.
+- */
+-
+-#ifndef PTI_H_
+-#define PTI_H_
+-
+-/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
+-#define PTI_LASTDWORD_DTS     0x30
+-
+-/* basic structure used as a write address to the PTI HW */
+-struct pti_masterchannel {
+-      u8 master;
+-      u8 channel;
+-};
+-
+-/* the following functions are defined in misc/pti.c */
+-void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
+-struct pti_masterchannel *pti_request_masterchannel(u8 type,
+-                                                  const char *thread_name);
+-void pti_release_masterchannel(struct pti_masterchannel *mc);
+-
+-#endif /*PTI_H_*/
diff --git a/queue-4.14/locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch b/queue-4.14/locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch
new file mode 100644 (file)
index 0000000..ac2f19a
--- /dev/null
@@ -0,0 +1,43 @@
+From c2bc66082e1048c7573d72e62f597bdc5ce13fea Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 24 Oct 2017 11:22:47 +0100
+Subject: locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE()
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit c2bc66082e1048c7573d72e62f597bdc5ce13fea upstream.
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    76ebbe78f739 ("locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE()")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+In preparation for the removal of lockless_dereference(), which is the
+same as READ_ONCE() on all architectures other than Alpha, add an
+implicit smp_read_barrier_depends() to READ_ONCE() so that it can be
+used to head dependency chains on all architectures.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1508840570-22169-3-git-send-email-will.deacon@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/compiler.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/linux/compiler.h
++++ b/include/linux/compiler.h
+@@ -341,6 +341,7 @@ static __always_inline void __write_once
+               __read_once_size(&(x), __u.__c, sizeof(x));             \
+       else                                                            \
+               __read_once_size_nocheck(&(x), __u.__c, sizeof(x));     \
++      smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
+       __u.__val;                                                      \
+ })
+ #define READ_ONCE(x) __READ_ONCE(x, 1)
diff --git a/queue-4.14/locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch b/queue-4.14/locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch
new file mode 100644 (file)
index 0000000..50ee566
--- /dev/null
@@ -0,0 +1,290 @@
+From 3382290ed2d5e275429cef510ab21889d3ccd164 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 24 Oct 2017 11:22:48 +0100
+Subject: locking/barriers: Convert users of lockless_dereference() to READ_ONCE()
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit 3382290ed2d5e275429cef510ab21889d3ccd164 upstream.
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    506458efaf15 ("locking/barriers: Convert users of lockless_dereference() to READ_ONCE()")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+READ_ONCE() now has an implicit smp_read_barrier_depends() call, so it
+can be used instead of lockless_dereference() without any change in
+semantics.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1508840570-22169-4-git-send-email-will.deacon@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/events/core.c             |    2 +-
+ arch/x86/include/asm/mmu_context.h |    4 ++--
+ arch/x86/kernel/ldt.c              |    2 +-
+ drivers/md/dm-mpath.c              |   20 ++++++++++----------
+ fs/dcache.c                        |    4 ++--
+ fs/overlayfs/ovl_entry.h           |    2 +-
+ fs/overlayfs/readdir.c             |    2 +-
+ include/linux/rculist.h            |    4 ++--
+ include/linux/rcupdate.h           |    4 ++--
+ kernel/events/core.c               |    4 ++--
+ kernel/seccomp.c                   |    2 +-
+ kernel/task_work.c                 |    2 +-
+ mm/slab.h                          |    2 +-
+ 13 files changed, 27 insertions(+), 27 deletions(-)
+
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(un
+               struct ldt_struct *ldt;
+               /* IRQs are off, so this synchronizes with smp_store_release */
+-              ldt = lockless_dereference(current->active_mm->context.ldt);
++              ldt = READ_ONCE(current->active_mm->context.ldt);
+               if (!ldt || idx >= ldt->nr_entries)
+                       return 0;
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -73,8 +73,8 @@ static inline void load_mm_ldt(struct mm
+ #ifdef CONFIG_MODIFY_LDT_SYSCALL
+       struct ldt_struct *ldt;
+-      /* lockless_dereference synchronizes with smp_store_release */
+-      ldt = lockless_dereference(mm->context.ldt);
++      /* READ_ONCE synchronizes with smp_store_release */
++      ldt = READ_ONCE(mm->context.ldt);
+       /*
+        * Any change to mm->context.ldt is followed by an IPI to all
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -103,7 +103,7 @@ static void finalize_ldt_struct(struct l
+ static void install_ldt(struct mm_struct *current_mm,
+                       struct ldt_struct *ldt)
+ {
+-      /* Synchronizes with lockless_dereference in load_mm_ldt. */
++      /* Synchronizes with READ_ONCE in load_mm_ldt. */
+       smp_store_release(&current_mm->context.ldt, ldt);
+       /* Activate the LDT for all CPUs using current_mm. */
+--- a/drivers/md/dm-mpath.c
++++ b/drivers/md/dm-mpath.c
+@@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg(
+       pgpath = path_to_pgpath(path);
+-      if (unlikely(lockless_dereference(m->current_pg) != pg)) {
++      if (unlikely(READ_ONCE(m->current_pg) != pg)) {
+               /* Only update current_pgpath if pg changed */
+               spin_lock_irqsave(&m->lock, flags);
+               m->current_pgpath = pgpath;
+@@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(stru
+       }
+       /* Were we instructed to switch PG? */
+-      if (lockless_dereference(m->next_pg)) {
++      if (READ_ONCE(m->next_pg)) {
+               spin_lock_irqsave(&m->lock, flags);
+               pg = m->next_pg;
+               if (!pg) {
+@@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(stru
+       /* Don't change PG until it has no remaining paths */
+ check_current_pg:
+-      pg = lockless_dereference(m->current_pg);
++      pg = READ_ONCE(m->current_pg);
+       if (pg) {
+               pgpath = choose_path_in_pg(m, pg, nr_bytes);
+               if (!IS_ERR_OR_NULL(pgpath))
+@@ -473,7 +473,7 @@ static int multipath_clone_and_map(struc
+       struct request *clone;
+       /* Do we need to select a new pgpath? */
+-      pgpath = lockless_dereference(m->current_pgpath);
++      pgpath = READ_ONCE(m->current_pgpath);
+       if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
+               pgpath = choose_pgpath(m, nr_bytes);
+@@ -533,7 +533,7 @@ static int __multipath_map_bio(struct mu
+       bool queue_io;
+       /* Do we need to select a new pgpath? */
+-      pgpath = lockless_dereference(m->current_pgpath);
++      pgpath = READ_ONCE(m->current_pgpath);
+       queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
+       if (!pgpath || !queue_io)
+               pgpath = choose_pgpath(m, nr_bytes);
+@@ -1802,7 +1802,7 @@ static int multipath_prepare_ioctl(struc
+       struct pgpath *current_pgpath;
+       int r;
+-      current_pgpath = lockless_dereference(m->current_pgpath);
++      current_pgpath = READ_ONCE(m->current_pgpath);
+       if (!current_pgpath)
+               current_pgpath = choose_pgpath(m, 0);
+@@ -1824,7 +1824,7 @@ static int multipath_prepare_ioctl(struc
+       }
+       if (r == -ENOTCONN) {
+-              if (!lockless_dereference(m->current_pg)) {
++              if (!READ_ONCE(m->current_pg)) {
+                       /* Path status changed, redo selection */
+                       (void) choose_pgpath(m, 0);
+               }
+@@ -1893,9 +1893,9 @@ static int multipath_busy(struct dm_targ
+               return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
+       /* Guess which priority_group will be used at next mapping time */
+-      pg = lockless_dereference(m->current_pg);
+-      next_pg = lockless_dereference(m->next_pg);
+-      if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg))
++      pg = READ_ONCE(m->current_pg);
++      next_pg = READ_ONCE(m->next_pg);
++      if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg))
+               pg = next_pg;
+       if (!pg) {
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -231,7 +231,7 @@ static inline int dentry_cmp(const struc
+ {
+       /*
+        * Be careful about RCU walk racing with rename:
+-       * use 'lockless_dereference' to fetch the name pointer.
++       * use 'READ_ONCE' to fetch the name pointer.
+        *
+        * NOTE! Even if a rename will mean that the length
+        * was not loaded atomically, we don't care. The
+@@ -245,7 +245,7 @@ static inline int dentry_cmp(const struc
+        * early because the data cannot match (there can
+        * be no NUL in the ct/tcount data)
+        */
+-      const unsigned char *cs = lockless_dereference(dentry->d_name.name);
++      const unsigned char *cs = READ_ONCE(dentry->d_name.name);
+       return dentry_string_cmp(cs, ct, tcount);
+ }
+--- a/fs/overlayfs/ovl_entry.h
++++ b/fs/overlayfs/ovl_entry.h
+@@ -77,5 +77,5 @@ static inline struct ovl_inode *OVL_I(st
+ static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
+ {
+-      return lockless_dereference(oi->__upperdentry);
++      return READ_ONCE(oi->__upperdentry);
+ }
+--- a/fs/overlayfs/readdir.c
++++ b/fs/overlayfs/readdir.c
+@@ -757,7 +757,7 @@ static int ovl_dir_fsync(struct file *fi
+       if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
+               struct inode *inode = file_inode(file);
+-              realfile = lockless_dereference(od->upperfile);
++              realfile = READ_ONCE(od->upperfile);
+               if (!realfile) {
+                       struct path upperpath;
+--- a/include/linux/rculist.h
++++ b/include/linux/rculist.h
+@@ -275,7 +275,7 @@ static inline void list_splice_tail_init
+  * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+  */
+ #define list_entry_rcu(ptr, type, member) \
+-      container_of(lockless_dereference(ptr), type, member)
++      container_of(READ_ONCE(ptr), type, member)
+ /*
+  * Where are list_empty_rcu() and list_first_entry_rcu()?
+@@ -368,7 +368,7 @@ static inline void list_splice_tail_init
+  * example is when items are added to the list, but never deleted.
+  */
+ #define list_entry_lockless(ptr, type, member) \
+-      container_of((typeof(ptr))lockless_dereference(ptr), type, member)
++      container_of((typeof(ptr))READ_ONCE(ptr), type, member)
+ /**
+  * list_for_each_entry_lockless - iterate over rcu list of given type
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -346,7 +346,7 @@ static inline void rcu_preempt_sleep_che
+ #define __rcu_dereference_check(p, c, space) \
+ ({ \
+       /* Dependency order vs. p above. */ \
+-      typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \
++      typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \
+       RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \
+       rcu_dereference_sparse(p, space); \
+       ((typeof(*p) __force __kernel *)(________p1)); \
+@@ -360,7 +360,7 @@ static inline void rcu_preempt_sleep_che
+ #define rcu_dereference_raw(p) \
+ ({ \
+       /* Dependency order vs. p above. */ \
+-      typeof(p) ________p1 = lockless_dereference(p); \
++      typeof(p) ________p1 = READ_ONCE(p); \
+       ((typeof(*p) __force __kernel *)(________p1)); \
+ })
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -4233,7 +4233,7 @@ static void perf_remove_from_owner(struc
+        * indeed free this event, otherwise we need to serialize on
+        * owner->perf_event_mutex.
+        */
+-      owner = lockless_dereference(event->owner);
++      owner = READ_ONCE(event->owner);
+       if (owner) {
+               /*
+                * Since delayed_put_task_struct() also drops the last
+@@ -4330,7 +4330,7 @@ again:
+                * Cannot change, child events are not migrated, see the
+                * comment with perf_event_ctx_lock_nested().
+                */
+-              ctx = lockless_dereference(child->ctx);
++              ctx = READ_ONCE(child->ctx);
+               /*
+                * Since child_mutex nests inside ctx::mutex, we must jump
+                * through hoops. We start by grabbing a reference on the ctx.
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -190,7 +190,7 @@ static u32 seccomp_run_filters(const str
+       u32 ret = SECCOMP_RET_ALLOW;
+       /* Make sure cross-thread synced filter points somewhere sane. */
+       struct seccomp_filter *f =
+-                      lockless_dereference(current->seccomp.filter);
++                      READ_ONCE(current->seccomp.filter);
+       /* Ensure unexpected behavior doesn't result in failing open. */
+       if (unlikely(WARN_ON(f == NULL)))
+--- a/kernel/task_work.c
++++ b/kernel/task_work.c
+@@ -68,7 +68,7 @@ task_work_cancel(struct task_struct *tas
+        * we raced with task_work_run(), *pprev == NULL/exited.
+        */
+       raw_spin_lock_irqsave(&task->pi_lock, flags);
+-      while ((work = lockless_dereference(*pprev))) {
++      while ((work = READ_ONCE(*pprev))) {
+               if (work->func != func)
+                       pprev = &work->next;
+               else if (cmpxchg(pprev, work, work->next) == work)
+--- a/mm/slab.h
++++ b/mm/slab.h
+@@ -259,7 +259,7 @@ cache_from_memcg_idx(struct kmem_cache *
+        * memcg_caches issues a write barrier to match this (see
+        * memcg_create_kmem_cache()).
+        */
+-      cachep = lockless_dereference(arr->entries[idx]);
++      cachep = READ_ONCE(arr->entries[idx]);
+       rcu_read_unlock();
+       return cachep;
diff --git a/queue-4.14/perf-x86-enable-free-running-pebs-for-regs_user-intr.patch b/queue-4.14/perf-x86-enable-free-running-pebs-for-regs_user-intr.patch
new file mode 100644 (file)
index 0000000..709c25a
--- /dev/null
@@ -0,0 +1,98 @@
+From 2fe1bc1f501d55e5925b4035bcd85781adc76c63 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Thu, 31 Aug 2017 14:46:30 -0700
+Subject: perf/x86: Enable free running PEBS for REGS_USER/INTR
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 2fe1bc1f501d55e5925b4035bcd85781adc76c63 upstream.
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    a47ba4d77e12 ("perf/x86: Enable free running PEBS for REGS_USER/INTR")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+Currently free running PEBS is disabled when user or interrupt
+registers are requested. Most of the registers are actually
+available in the PEBS record and can be supported.
+
+So we just need to check for the supported registers and then
+allow it: it is all except for the segment register.
+
+For user registers this only works when the counter is limited
+to ring 3 only, so this also needs to be checked.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20170831214630.21892-1-andi@firstfloor.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/events/intel/core.c |    4 ++++
+ arch/x86/events/perf_event.h |   24 +++++++++++++++++++++++-
+ 2 files changed, 27 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_runn
+       if (event->attr.use_clockid)
+               flags &= ~PERF_SAMPLE_TIME;
++      if (!event->attr.exclude_kernel)
++              flags &= ~PERF_SAMPLE_REGS_USER;
++      if (event->attr.sample_regs_user & ~PEBS_REGS)
++              flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
+       return flags;
+ }
+--- a/arch/x86/events/perf_event.h
++++ b/arch/x86/events/perf_event.h
+@@ -85,13 +85,15 @@ struct amd_nb {
+  * Flags PEBS can handle without an PMI.
+  *
+  * TID can only be handled by flushing at context switch.
++ * REGS_USER can be handled for events limited to ring 3.
+  *
+  */
+ #define PEBS_FREERUNNING_FLAGS \
+       (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
+       PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+       PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+-      PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
++      PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
++      PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
+ /*
+  * A debug store configuration.
+@@ -110,6 +112,26 @@ struct debug_store {
+       u64     pebs_event_reset[MAX_PEBS_EVENTS];
+ };
++#define PEBS_REGS \
++      (PERF_REG_X86_AX | \
++       PERF_REG_X86_BX | \
++       PERF_REG_X86_CX | \
++       PERF_REG_X86_DX | \
++       PERF_REG_X86_DI | \
++       PERF_REG_X86_SI | \
++       PERF_REG_X86_SP | \
++       PERF_REG_X86_BP | \
++       PERF_REG_X86_IP | \
++       PERF_REG_X86_FLAGS | \
++       PERF_REG_X86_R8 | \
++       PERF_REG_X86_R9 | \
++       PERF_REG_X86_R10 | \
++       PERF_REG_X86_R11 | \
++       PERF_REG_X86_R12 | \
++       PERF_REG_X86_R13 | \
++       PERF_REG_X86_R14 | \
++       PERF_REG_X86_R15)
++
+ /*
+  * Per register state.
+  */
diff --git a/queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch b/queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch
new file mode 100644 (file)
index 0000000..db650d0
--- /dev/null
@@ -0,0 +1,104 @@
+From d744dcad39094c9187075e274d1cdef79c57c8b5 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 4 Nov 2017 04:19:50 -0700
+Subject: selftests/x86/ldt_gdt: Add infrastructure to test set_thread_area()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit d744dcad39094c9187075e274d1cdef79c57c8b5 upstream.
+
+Much of the test design could apply to set_thread_area() (i.e. GDT),
+not just modify_ldt().  Add set_thread_area() to the
+install_valid_mode() helper.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/02c23f8fba5547007f741dc24c3926e5284ede02.1509794321.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/x86/ldt_gdt.c |   53 +++++++++++++++++++++++-----------
+ 1 file changed, 37 insertions(+), 16 deletions(-)
+
+--- a/tools/testing/selftests/x86/ldt_gdt.c
++++ b/tools/testing/selftests/x86/ldt_gdt.c
+@@ -137,30 +137,51 @@ static void check_valid_segment(uint16_t
+       }
+ }
+-static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
+-                             bool oldmode)
++static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
++                             bool oldmode, bool ldt)
+ {
+-      int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+-                        desc, sizeof(*desc));
+-      if (ret < -1)
+-              errno = -ret;
++      struct user_desc desc = *d;
++      int ret;
++
++      if (!ldt) {
++#ifndef __i386__
++              /* No point testing set_thread_area in a 64-bit build */
++              return false;
++#endif
++              if (!gdt_entry_num)
++                      return false;
++              desc.entry_number = gdt_entry_num;
++
++              ret = syscall(SYS_set_thread_area, &desc);
++      } else {
++              ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
++                            &desc, sizeof(desc));
++
++              if (ret < -1)
++                      errno = -ret;
++
++              if (ret != 0 && errno == ENOSYS) {
++                      printf("[OK]\tmodify_ldt returned -ENOSYS\n");
++                      return false;
++              }
++      }
++
+       if (ret == 0) {
+-              uint32_t limit = desc->limit;
+-              if (desc->limit_in_pages)
++              uint32_t limit = desc.limit;
++              if (desc.limit_in_pages)
+                       limit = (limit << 12) + 4095;
+-              check_valid_segment(desc->entry_number, 1, ar, limit, true);
++              check_valid_segment(desc.entry_number, ldt, ar, limit, true);
+               return true;
+-      } else if (errno == ENOSYS) {
+-              printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+-              return false;
+       } else {
+-              if (desc->seg_32bit) {
+-                      printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
++              if (desc.seg_32bit) {
++                      printf("[FAIL]\tUnexpected %s failure %d\n",
++                             ldt ? "modify_ldt" : "set_thread_area",
+                              errno);
+                       nerrs++;
+                       return false;
+               } else {
+-                      printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
++                      printf("[OK]\t%s rejected 16 bit segment\n",
++                             ldt ? "modify_ldt" : "set_thread_area");
+                       return false;
+               }
+       }
+@@ -168,7 +189,7 @@ static bool install_valid_mode(const str
+ static bool install_valid(const struct user_desc *desc, uint32_t ar)
+ {
+-      return install_valid_mode(desc, ar, false);
++      return install_valid_mode(desc, ar, false, true);
+ }
+ static void install_invalid(const struct user_desc *desc, bool oldmode)
diff --git a/queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch b/queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch
new file mode 100644 (file)
index 0000000..7919048
--- /dev/null
@@ -0,0 +1,44 @@
+From adedf2893c192dd09b1cc2f2dcfdd7cad99ec49d Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 4 Nov 2017 04:19:51 -0700
+Subject: selftests/x86/ldt_gdt: Run most existing LDT test cases against the GDT as well
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit adedf2893c192dd09b1cc2f2dcfdd7cad99ec49d upstream.
+
+Now that the main test infrastructure supports the GDT, run tests
+that will pass the kernel's GDT permission tests against the GDT.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/686a1eda63414da38fcecc2412db8dba1ae40581.1509794321.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/x86/ldt_gdt.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/x86/ldt_gdt.c
++++ b/tools/testing/selftests/x86/ldt_gdt.c
+@@ -189,7 +189,15 @@ static bool install_valid_mode(const str
+ static bool install_valid(const struct user_desc *desc, uint32_t ar)
+ {
+-      return install_valid_mode(desc, ar, false, true);
++      bool ret = install_valid_mode(desc, ar, false, true);
++
++      if (desc->contents <= 1 && desc->seg_32bit &&
++          !desc->seg_not_present) {
++              /* Should work in the GDT, too. */
++              install_valid_mode(desc, ar, false, false);
++      }
++
++      return ret;
+ }
+ static void install_invalid(const struct user_desc *desc, bool oldmode)
index 2d0c808f8db347907f8b7287fb4d2283f5560f2d..0bc857d9d1431d054fcc3efadef146ee94fa2036 100644 (file)
@@ -32,6 +32,71 @@ x86-cpufeatures-enable-new-sse-avx-avx512-cpu-features.patch
 x86-mm-relocate-page-fault-error-codes-to-traps.h.patch
 x86-boot-relocate-definition-of-the-initial-state-of-cr0.patch
 ptrace-x86-make-user_64bit_mode-available-to-32-bit-builds.patch
+x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch
+x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch
+x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
+x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch
+x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch
+x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch
+x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
+x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch
+x86-entry-64-remove-the-restore_..._regs-infrastructure.patch
+xen-x86-entry-64-add-xen-nmi-trap-entry.patch
+x86-entry-64-de-xen-ify-our-nmi-code.patch
+x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch
+x86-entry-64-pass-sp0-directly-to-load_sp0.patch
+x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch
+x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch
+x86-entry-64-stop-initializing-tss.sp0-at-boot.patch
+x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch
+x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
+x86-entry-64-remove-thread_struct-sp0.patch
+x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch
+x86-entry-64-shorten-test-instructions.patch
+x86-cpuid-replace-set-clear_bit32.patch
+bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch
+x86-mm-define-_page_table-using-_kernpg_table.patch
+x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch
+x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch
+selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch
+selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch
+acpi-apei-replace-ioremap_page_range-with-fixmap.patch
+x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch
+x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch
+drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch
+x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch
+x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch
+perf-x86-enable-free-running-pebs-for-regs_user-intr.patch
+bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch
+locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch
+locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch
+x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch
+x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch
+x86-unwinder-orc-dont-bail-on-stack-overflow.patch
+x86-unwinder-handle-stack-overflows-more-gracefully.patch
+x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch
+x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch
+x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
+x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch
+x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch
+x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch
+x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch
+x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch
+x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
+x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch
+x86-entry-remap-the-tss-into-the-cpu-entry-area.patch
+x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch
+x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch
+x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch
+x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch
+x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
+x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch
+x86-entry-64-remove-the-sysenter-stack-canary.patch
+x86-entry-clean-up-the-sysenter_stack-code.patch
+x86-entry-64-make-cpu_entry_area.tss-read-only.patch
+x86-paravirt-dont-patch-flush_tlb_single.patch
+x86-paravirt-provide-a-way-to-check-for-hypervisors.patch
+x86-cpufeatures-make-cpu-bugs-sticky.patch
 optee-fix-invalid-of_node_put-in-optee_driver_init.patch
 backlight-pwm_bl-fix-overflow-condition.patch
 drm-add-retries-for-lspcon-mode-detection.patch
diff --git a/queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch b/queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch
new file mode 100644 (file)
index 0000000..d254e00
--- /dev/null
@@ -0,0 +1,78 @@
+From a8b4db562e7283a1520f9e9730297ecaab7622ea Mon Sep 17 00:00:00 2001
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Date: Sun, 5 Nov 2017 18:27:51 -0800
+Subject: x86/cpufeature: Add User-Mode Instruction Prevention definitions
+
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+
+commit a8b4db562e7283a1520f9e9730297ecaab7622ea upstream.
+
+[ Note, this is a Git cherry-pick of the following commit: (limited to the cpufeatures.h file)
+
+    3522c2a6a4f3 ("x86/cpufeature: Add User-Mode Instruction Prevention definitions")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+User-Mode Instruction Prevention is a security feature present in new
+Intel processors that, when set, prevents the execution of a subset of
+instructions if such instructions are executed in user mode (CPL > 0).
+Attempting to execute such instructions causes a general protection
+exception.
+
+The subset of instructions comprises:
+
+ * SGDT - Store Global Descriptor Table
+ * SIDT - Store Interrupt Descriptor Table
+ * SLDT - Store Local Descriptor Table
+ * SMSW - Store Machine Status Word
+ * STR  - Store Task Register
+
+This feature is also added to the list of disabled-features to allow
+a cleaner handling of build-time configuration.
+
+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Chen Yucong <slaoub@gmail.com>
+Cc: Chris Metcalf <cmetcalf@mellanox.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Huang Rui <ray.huang@amd.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: ricardo.neri@intel.com
+Link: http://lkml.kernel.org/r/1509935277-22138-7-git-send-email-ricardo.neri-calderon@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeatures.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -296,6 +296,7 @@
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
+ #define X86_FEATURE_AVX512VBMI                (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
++#define X86_FEATURE_UMIP              (16*32+ 2) /* User Mode Instruction Protection */
+ #define X86_FEATURE_PKU                       (16*32+ 3) /* Protection Keys for Userspace */
+ #define X86_FEATURE_OSPKE             (16*32+ 4) /* OS Protection Keys Enable */
+ #define X86_FEATURE_AVX512_VBMI2      (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
diff --git a/queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch b/queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch
new file mode 100644 (file)
index 0000000..d2d7306
--- /dev/null
@@ -0,0 +1,359 @@
+From f3a624e901c633593156f7b00ca743a6204a29bc Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@kernel.org>
+Date: Tue, 31 Oct 2017 13:17:23 +0100
+Subject: x86/cpufeatures: Fix various details in the feature definitions
+
+From: Ingo Molnar <mingo@kernel.org>
+
+commit f3a624e901c633593156f7b00ca743a6204a29bc upstream.
+
+Kept this commit separate from the re-tabulation changes, to make
+the changes easier to review:
+
+ - add better explanation for entries with no explanation
+ - fix/enhance the text of some of the entries
+ - fix the vertical alignment of some of the feature number definitions
+ - fix inconsistent capitalization
+ - ... and lots of other small details
+
+i.e. make it all more of a coherent unit, instead of a patchwork of years of additions.
+
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171031121723.28524-4-mingo@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeatures.h |  149 ++++++++++++++++++-------------------
+ 1 file changed, 74 insertions(+), 75 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -20,14 +20,12 @@
+  * Note: If the comment begins with a quoted string, that string is used
+  * in /proc/cpuinfo instead of the macro name.  If the string is "",
+  * this feature bit is not displayed in /proc/cpuinfo at all.
+- */
+-
+-/*
++ *
+  * When adding new features here that depend on other features,
+- * please update the table in kernel/cpu/cpuid-deps.c
++ * please update the table in kernel/cpu/cpuid-deps.c as well.
+  */
+-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
++/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
+ #define X86_FEATURE_FPU                       ( 0*32+ 0) /* Onboard FPU */
+ #define X86_FEATURE_VME                       ( 0*32+ 1) /* Virtual Mode Extensions */
+ #define X86_FEATURE_DE                        ( 0*32+ 2) /* Debugging Extensions */
+@@ -42,8 +40,7 @@
+ #define X86_FEATURE_MTRR              ( 0*32+12) /* Memory Type Range Registers */
+ #define X86_FEATURE_PGE                       ( 0*32+13) /* Page Global Enable */
+ #define X86_FEATURE_MCA                       ( 0*32+14) /* Machine Check Architecture */
+-#define X86_FEATURE_CMOV              ( 0*32+15) /* CMOV instructions */
+-                                        /* (plus FCMOVcc, FCOMI with FPU) */
++#define X86_FEATURE_CMOV              ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+ #define X86_FEATURE_PAT                       ( 0*32+16) /* Page Attribute Table */
+ #define X86_FEATURE_PSE36             ( 0*32+17) /* 36-bit PSEs */
+ #define X86_FEATURE_PN                        ( 0*32+18) /* Processor serial number */
+@@ -63,15 +60,15 @@
+ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+ /* Don't duplicate feature flags which are redundant with Intel! */
+ #define X86_FEATURE_SYSCALL           ( 1*32+11) /* SYSCALL/SYSRET */
+-#define X86_FEATURE_MP                        ( 1*32+19) /* MP Capable. */
++#define X86_FEATURE_MP                        ( 1*32+19) /* MP Capable */
+ #define X86_FEATURE_NX                        ( 1*32+20) /* Execute Disable */
+ #define X86_FEATURE_MMXEXT            ( 1*32+22) /* AMD MMX extensions */
+ #define X86_FEATURE_FXSR_OPT          ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+ #define X86_FEATURE_GBPAGES           ( 1*32+26) /* "pdpe1gb" GB pages */
+ #define X86_FEATURE_RDTSCP            ( 1*32+27) /* RDTSCP */
+-#define X86_FEATURE_LM                        ( 1*32+29) /* Long Mode (x86-64) */
+-#define X86_FEATURE_3DNOWEXT          ( 1*32+30) /* AMD 3DNow! extensions */
+-#define X86_FEATURE_3DNOW             ( 1*32+31) /* 3DNow! */
++#define X86_FEATURE_LM                        ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
++#define X86_FEATURE_3DNOWEXT          ( 1*32+30) /* AMD 3DNow extensions */
++#define X86_FEATURE_3DNOW             ( 1*32+31) /* 3DNow */
+ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+ #define X86_FEATURE_RECOVERY          ( 2*32+ 0) /* CPU in recovery mode */
+@@ -84,66 +81,67 @@
+ #define X86_FEATURE_K6_MTRR           ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+ #define X86_FEATURE_CYRIX_ARR         ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+ #define X86_FEATURE_CENTAUR_MCR               ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+-/* cpu types for specific tunings: */
++
++/* CPU types for specific tunings: */
+ #define X86_FEATURE_K8                        ( 3*32+ 4) /* "" Opteron, Athlon64 */
+ #define X86_FEATURE_K7                        ( 3*32+ 5) /* "" Athlon */
+ #define X86_FEATURE_P3                        ( 3*32+ 6) /* "" P3 */
+ #define X86_FEATURE_P4                        ( 3*32+ 7) /* "" P4 */
+ #define X86_FEATURE_CONSTANT_TSC      ( 3*32+ 8) /* TSC ticks at a constant rate */
+-#define X86_FEATURE_UP                        ( 3*32+ 9) /* smp kernel running on up */
+-#define X86_FEATURE_ART                       ( 3*32+10) /* Platform has always running timer (ART) */
++#define X86_FEATURE_UP                        ( 3*32+ 9) /* SMP kernel running on UP */
++#define X86_FEATURE_ART                       ( 3*32+10) /* Always running timer (ART) */
+ #define X86_FEATURE_ARCH_PERFMON      ( 3*32+11) /* Intel Architectural PerfMon */
+ #define X86_FEATURE_PEBS              ( 3*32+12) /* Precise-Event Based Sampling */
+ #define X86_FEATURE_BTS                       ( 3*32+13) /* Branch Trace Store */
+-#define X86_FEATURE_SYSCALL32         ( 3*32+14) /* "" syscall in ia32 userspace */
+-#define X86_FEATURE_SYSENTER32                ( 3*32+15) /* "" sysenter in ia32 userspace */
+-#define X86_FEATURE_REP_GOOD          ( 3*32+16) /* rep microcode works well */
+-#define X86_FEATURE_MFENCE_RDTSC      ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+-#define X86_FEATURE_LFENCE_RDTSC      ( 3*32+18) /* "" Lfence synchronizes RDTSC */
++#define X86_FEATURE_SYSCALL32         ( 3*32+14) /* "" syscall in IA32 userspace */
++#define X86_FEATURE_SYSENTER32                ( 3*32+15) /* "" sysenter in IA32 userspace */
++#define X86_FEATURE_REP_GOOD          ( 3*32+16) /* REP microcode works well */
++#define X86_FEATURE_MFENCE_RDTSC      ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
++#define X86_FEATURE_LFENCE_RDTSC      ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
+ #define X86_FEATURE_ACC_POWER         ( 3*32+19) /* AMD Accumulated Power Mechanism */
+ #define X86_FEATURE_NOPL              ( 3*32+20) /* The NOPL (0F 1F) instructions */
+ #define X86_FEATURE_ALWAYS            ( 3*32+21) /* "" Always-present feature */
+-#define X86_FEATURE_XTOPOLOGY         ( 3*32+22) /* cpu topology enum extensions */
++#define X86_FEATURE_XTOPOLOGY         ( 3*32+22) /* CPU topology enum extensions */
+ #define X86_FEATURE_TSC_RELIABLE      ( 3*32+23) /* TSC is known to be reliable */
+ #define X86_FEATURE_NONSTOP_TSC               ( 3*32+24) /* TSC does not stop in C states */
+ #define X86_FEATURE_CPUID             ( 3*32+25) /* CPU has CPUID instruction itself */
+-#define X86_FEATURE_EXTD_APICID               ( 3*32+26) /* has extended APICID (8 bits) */
+-#define X86_FEATURE_AMD_DCM           ( 3*32+27) /* multi-node processor */
+-#define X86_FEATURE_APERFMPERF                ( 3*32+28) /* APERFMPERF */
++#define X86_FEATURE_EXTD_APICID               ( 3*32+26) /* Extended APICID (8 bits) */
++#define X86_FEATURE_AMD_DCM           ( 3*32+27) /* AMD multi-node processor */
++#define X86_FEATURE_APERFMPERF                ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+ #define X86_FEATURE_NONSTOP_TSC_S3    ( 3*32+30) /* TSC doesn't stop in S3 state */
+ #define X86_FEATURE_TSC_KNOWN_FREQ    ( 3*32+31) /* TSC has known frequency */
+-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
++/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
+ #define X86_FEATURE_XMM3              ( 4*32+ 0) /* "pni" SSE-3 */
+ #define X86_FEATURE_PCLMULQDQ         ( 4*32+ 1) /* PCLMULQDQ instruction */
+ #define X86_FEATURE_DTES64            ( 4*32+ 2) /* 64-bit Debug Store */
+-#define X86_FEATURE_MWAIT             ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+-#define X86_FEATURE_DSCPL             ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
++#define X86_FEATURE_MWAIT             ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
++#define X86_FEATURE_DSCPL             ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
+ #define X86_FEATURE_VMX                       ( 4*32+ 5) /* Hardware virtualization */
+-#define X86_FEATURE_SMX                       ( 4*32+ 6) /* Safer mode */
++#define X86_FEATURE_SMX                       ( 4*32+ 6) /* Safer Mode eXtensions */
+ #define X86_FEATURE_EST                       ( 4*32+ 7) /* Enhanced SpeedStep */
+ #define X86_FEATURE_TM2                       ( 4*32+ 8) /* Thermal Monitor 2 */
+ #define X86_FEATURE_SSSE3             ( 4*32+ 9) /* Supplemental SSE-3 */
+ #define X86_FEATURE_CID                       ( 4*32+10) /* Context ID */
+ #define X86_FEATURE_SDBG              ( 4*32+11) /* Silicon Debug */
+ #define X86_FEATURE_FMA                       ( 4*32+12) /* Fused multiply-add */
+-#define X86_FEATURE_CX16              ( 4*32+13) /* CMPXCHG16B */
++#define X86_FEATURE_CX16              ( 4*32+13) /* CMPXCHG16B instruction */
+ #define X86_FEATURE_XTPR              ( 4*32+14) /* Send Task Priority Messages */
+-#define X86_FEATURE_PDCM              ( 4*32+15) /* Performance Capabilities */
++#define X86_FEATURE_PDCM              ( 4*32+15) /* Perf/Debug Capabilities MSR */
+ #define X86_FEATURE_PCID              ( 4*32+17) /* Process Context Identifiers */
+ #define X86_FEATURE_DCA                       ( 4*32+18) /* Direct Cache Access */
+ #define X86_FEATURE_XMM4_1            ( 4*32+19) /* "sse4_1" SSE-4.1 */
+ #define X86_FEATURE_XMM4_2            ( 4*32+20) /* "sse4_2" SSE-4.2 */
+-#define X86_FEATURE_X2APIC            ( 4*32+21) /* x2APIC */
++#define X86_FEATURE_X2APIC            ( 4*32+21) /* X2APIC */
+ #define X86_FEATURE_MOVBE             ( 4*32+22) /* MOVBE instruction */
+ #define X86_FEATURE_POPCNT            ( 4*32+23) /* POPCNT instruction */
+-#define X86_FEATURE_TSC_DEADLINE_TIMER        ( 4*32+24) /* Tsc deadline timer */
++#define X86_FEATURE_TSC_DEADLINE_TIMER        ( 4*32+24) /* TSC deadline timer */
+ #define X86_FEATURE_AES                       ( 4*32+25) /* AES instructions */
+-#define X86_FEATURE_XSAVE             ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+-#define X86_FEATURE_OSXSAVE           ( 4*32+27) /* "" XSAVE enabled in the OS */
++#define X86_FEATURE_XSAVE             ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
++#define X86_FEATURE_OSXSAVE           ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
+ #define X86_FEATURE_AVX                       ( 4*32+28) /* Advanced Vector Extensions */
+-#define X86_FEATURE_F16C              ( 4*32+29) /* 16-bit fp conversions */
+-#define X86_FEATURE_RDRAND            ( 4*32+30) /* The RDRAND instruction */
++#define X86_FEATURE_F16C              ( 4*32+29) /* 16-bit FP conversions */
++#define X86_FEATURE_RDRAND            ( 4*32+30) /* RDRAND instruction */
+ #define X86_FEATURE_HYPERVISOR                ( 4*32+31) /* Running on a hypervisor */
+ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+@@ -158,10 +156,10 @@
+ #define X86_FEATURE_PMM                       ( 5*32+12) /* PadLock Montgomery Multiplier */
+ #define X86_FEATURE_PMM_EN            ( 5*32+13) /* PMM enabled */
+-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
++/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
+ #define X86_FEATURE_LAHF_LM           ( 6*32+ 0) /* LAHF/SAHF in long mode */
+ #define X86_FEATURE_CMP_LEGACY                ( 6*32+ 1) /* If yes HyperThreading not valid */
+-#define X86_FEATURE_SVM                       ( 6*32+ 2) /* Secure virtual machine */
++#define X86_FEATURE_SVM                       ( 6*32+ 2) /* Secure Virtual Machine */
+ #define X86_FEATURE_EXTAPIC           ( 6*32+ 3) /* Extended APIC space */
+ #define X86_FEATURE_CR8_LEGACY                ( 6*32+ 4) /* CR8 in 32-bit mode */
+ #define X86_FEATURE_ABM                       ( 6*32+ 5) /* Advanced bit manipulation */
+@@ -175,16 +173,16 @@
+ #define X86_FEATURE_WDT                       ( 6*32+13) /* Watchdog timer */
+ #define X86_FEATURE_LWP                       ( 6*32+15) /* Light Weight Profiling */
+ #define X86_FEATURE_FMA4              ( 6*32+16) /* 4 operands MAC instructions */
+-#define X86_FEATURE_TCE                       ( 6*32+17) /* translation cache extension */
++#define X86_FEATURE_TCE                       ( 6*32+17) /* Translation Cache Extension */
+ #define X86_FEATURE_NODEID_MSR                ( 6*32+19) /* NodeId MSR */
+-#define X86_FEATURE_TBM                       ( 6*32+21) /* trailing bit manipulations */
+-#define X86_FEATURE_TOPOEXT           ( 6*32+22) /* topology extensions CPUID leafs */
+-#define X86_FEATURE_PERFCTR_CORE      ( 6*32+23) /* core performance counter extensions */
++#define X86_FEATURE_TBM                       ( 6*32+21) /* Trailing Bit Manipulations */
++#define X86_FEATURE_TOPOEXT           ( 6*32+22) /* Topology extensions CPUID leafs */
++#define X86_FEATURE_PERFCTR_CORE      ( 6*32+23) /* Core performance counter extensions */
+ #define X86_FEATURE_PERFCTR_NB                ( 6*32+24) /* NB performance counter extensions */
+-#define X86_FEATURE_BPEXT             (6*32+26) /* data breakpoint extension */
+-#define X86_FEATURE_PTSC              ( 6*32+27) /* performance time-stamp counter */
++#define X86_FEATURE_BPEXT             ( 6*32+26) /* Data breakpoint extension */
++#define X86_FEATURE_PTSC              ( 6*32+27) /* Performance time-stamp counter */
+ #define X86_FEATURE_PERFCTR_LLC               ( 6*32+28) /* Last Level Cache performance counter extensions */
+-#define X86_FEATURE_MWAITX            ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
++#define X86_FEATURE_MWAITX            ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
+ /*
+  * Auxiliary flags: Linux defined - For features scattered in various
+@@ -192,7 +190,7 @@
+  *
+  * Reuse free bits when adding new feature flags!
+  */
+-#define X86_FEATURE_RING3MWAIT                ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
++#define X86_FEATURE_RING3MWAIT                ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
+ #define X86_FEATURE_CPUID_FAULT               ( 7*32+ 1) /* Intel CPUID faulting */
+ #define X86_FEATURE_CPB                       ( 7*32+ 2) /* AMD Core Performance Boost */
+ #define X86_FEATURE_EPB                       ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+@@ -206,8 +204,8 @@
+ #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_INTEL_PT          ( 7*32+15) /* Intel Processor Trace */
+-#define X86_FEATURE_AVX512_4VNNIW     (7*32+16) /* AVX-512 Neural Network Instructions */
+-#define X86_FEATURE_AVX512_4FMAPS     (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
++#define X86_FEATURE_AVX512_4VNNIW     ( 7*32+16) /* AVX-512 Neural Network Instructions */
++#define X86_FEATURE_AVX512_4FMAPS     ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ #define X86_FEATURE_MBA                       ( 7*32+18) /* Memory Bandwidth Allocation */
+@@ -218,19 +216,19 @@
+ #define X86_FEATURE_EPT                       ( 8*32+ 3) /* Intel Extended Page Table */
+ #define X86_FEATURE_VPID              ( 8*32+ 4) /* Intel Virtual Processor ID */
+-#define X86_FEATURE_VMMCALL           ( 8*32+15) /* Prefer vmmcall to vmcall */
++#define X86_FEATURE_VMMCALL           ( 8*32+15) /* Prefer VMMCALL to VMCALL */
+ #define X86_FEATURE_XENPV             ( 8*32+16) /* "" Xen paravirtual guest */
+-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+-#define X86_FEATURE_FSGSBASE          ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+-#define X86_FEATURE_TSC_ADJUST                ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
++#define X86_FEATURE_FSGSBASE          ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
++#define X86_FEATURE_TSC_ADJUST                ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
+ #define X86_FEATURE_BMI1              ( 9*32+ 3) /* 1st group bit manipulation extensions */
+ #define X86_FEATURE_HLE                       ( 9*32+ 4) /* Hardware Lock Elision */
+ #define X86_FEATURE_AVX2              ( 9*32+ 5) /* AVX2 instructions */
+ #define X86_FEATURE_SMEP              ( 9*32+ 7) /* Supervisor Mode Execution Protection */
+ #define X86_FEATURE_BMI2              ( 9*32+ 8) /* 2nd group bit manipulation extensions */
+-#define X86_FEATURE_ERMS              ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
++#define X86_FEATURE_ERMS              ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
+ #define X86_FEATURE_INVPCID           ( 9*32+10) /* Invalidate Processor Context ID */
+ #define X86_FEATURE_RTM                       ( 9*32+11) /* Restricted Transactional Memory */
+ #define X86_FEATURE_CQM                       ( 9*32+12) /* Cache QoS Monitoring */
+@@ -238,8 +236,8 @@
+ #define X86_FEATURE_RDT_A             ( 9*32+15) /* Resource Director Technology Allocation */
+ #define X86_FEATURE_AVX512F           ( 9*32+16) /* AVX-512 Foundation */
+ #define X86_FEATURE_AVX512DQ          ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+-#define X86_FEATURE_RDSEED            ( 9*32+18) /* The RDSEED instruction */
+-#define X86_FEATURE_ADX                       ( 9*32+19) /* The ADCX and ADOX instructions */
++#define X86_FEATURE_RDSEED            ( 9*32+18) /* RDSEED instruction */
++#define X86_FEATURE_ADX                       ( 9*32+19) /* ADCX and ADOX instructions */
+ #define X86_FEATURE_SMAP              ( 9*32+20) /* Supervisor Mode Access Prevention */
+ #define X86_FEATURE_AVX512IFMA                ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
+ #define X86_FEATURE_CLFLUSHOPT                ( 9*32+23) /* CLFLUSHOPT instruction */
+@@ -251,25 +249,25 @@
+ #define X86_FEATURE_AVX512BW          ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+ #define X86_FEATURE_AVX512VL          ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+-#define X86_FEATURE_XSAVEOPT          (10*32+ 0) /* XSAVEOPT */
+-#define X86_FEATURE_XSAVEC            (10*32+ 1) /* XSAVEC */
+-#define X86_FEATURE_XGETBV1           (10*32+ 2) /* XGETBV with ECX = 1 */
+-#define X86_FEATURE_XSAVES            (10*32+ 3) /* XSAVES/XRSTORS */
++/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
++#define X86_FEATURE_XSAVEOPT          (10*32+ 0) /* XSAVEOPT instruction */
++#define X86_FEATURE_XSAVEC            (10*32+ 1) /* XSAVEC instruction */
++#define X86_FEATURE_XGETBV1           (10*32+ 2) /* XGETBV with ECX = 1 instruction */
++#define X86_FEATURE_XSAVES            (10*32+ 3) /* XSAVES/XRSTORS instructions */
+-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
++/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
+ #define X86_FEATURE_CQM_LLC           (11*32+ 1) /* LLC QoS if 1 */
+-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+-#define X86_FEATURE_CQM_OCCUP_LLC     (12*32+ 0) /* LLC occupancy monitoring if 1 */
++/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
++#define X86_FEATURE_CQM_OCCUP_LLC     (12*32+ 0) /* LLC occupancy monitoring */
+ #define X86_FEATURE_CQM_MBM_TOTAL     (12*32+ 1) /* LLC Total MBM monitoring */
+ #define X86_FEATURE_CQM_MBM_LOCAL     (12*32+ 2) /* LLC Local MBM monitoring */
+-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+-#define X86_FEATURE_CLZERO            (13*32+0) /* CLZERO instruction */
+-#define X86_FEATURE_IRPERF            (13*32+1) /* Instructions Retired Count */
++/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
++#define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
++#define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
+-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
++/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
+ #define X86_FEATURE_IDA                       (14*32+ 1) /* Intel Dynamic Acceleration */
+ #define X86_FEATURE_ARAT              (14*32+ 2) /* Always Running APIC Timer */
+@@ -281,7 +279,7 @@
+ #define X86_FEATURE_HWP_EPP           (14*32+10) /* HWP Energy Perf. Preference */
+ #define X86_FEATURE_HWP_PKG_REQ               (14*32+11) /* HWP Package Level Request */
+-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
++/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
+ #define X86_FEATURE_NPT                       (15*32+ 0) /* Nested Page Table support */
+ #define X86_FEATURE_LBRV              (15*32+ 1) /* LBR Virtualization support */
+ #define X86_FEATURE_SVML              (15*32+ 2) /* "svm_lock" SVM locking MSR */
+@@ -296,24 +294,24 @@
+ #define X86_FEATURE_V_VMSAVE_VMLOAD   (15*32+15) /* Virtual VMSAVE VMLOAD */
+ #define X86_FEATURE_VGIF              (15*32+16) /* Virtual GIF */
+-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
++/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
+ #define X86_FEATURE_AVX512VBMI                (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
+ #define X86_FEATURE_PKU                       (16*32+ 3) /* Protection Keys for Userspace */
+ #define X86_FEATURE_OSPKE             (16*32+ 4) /* OS Protection Keys Enable */
+ #define X86_FEATURE_AVX512_VBMI2      (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+ #define X86_FEATURE_GFNI              (16*32+ 8) /* Galois Field New Instructions */
+ #define X86_FEATURE_VAES              (16*32+ 9) /* Vector AES */
+-#define X86_FEATURE_VPCLMULQDQ                (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
+-#define X86_FEATURE_AVX512_VNNI               (16*32+ 11) /* Vector Neural Network Instructions */
+-#define X86_FEATURE_AVX512_BITALG     (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
++#define X86_FEATURE_VPCLMULQDQ                (16*32+10) /* Carry-Less Multiplication Double Quadword */
++#define X86_FEATURE_AVX512_VNNI               (16*32+11) /* Vector Neural Network Instructions */
++#define X86_FEATURE_AVX512_BITALG     (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+ #define X86_FEATURE_AVX512_VPOPCNTDQ  (16*32+14) /* POPCNT for vectors of DW/QW */
+ #define X86_FEATURE_LA57              (16*32+16) /* 5-level page tables */
+ #define X86_FEATURE_RDPID             (16*32+22) /* RDPID instruction */
+-/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
+-#define X86_FEATURE_OVERFLOW_RECOV    (17*32+0) /* MCA overflow recovery support */
+-#define X86_FEATURE_SUCCOR            (17*32+1) /* Uncorrectable error containment and recovery */
+-#define X86_FEATURE_SMCA              (17*32+3) /* Scalable MCA */
++/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
++#define X86_FEATURE_OVERFLOW_RECOV    (17*32+ 0) /* MCA overflow recovery support */
++#define X86_FEATURE_SUCCOR            (17*32+ 1) /* Uncorrectable error containment and recovery */
++#define X86_FEATURE_SMCA              (17*32+ 3) /* Scalable MCA */
+ /*
+  * BUG word(s)
+@@ -340,4 +338,5 @@
+ #define X86_BUG_SWAPGS_FENCE          X86_BUG(11) /* SWAPGS without input dep on GS */
+ #define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
++
+ #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch b/queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch
new file mode 100644 (file)
index 0000000..b424c07
--- /dev/null
@@ -0,0 +1,95 @@
+From 6cbd2171e89b13377261d15e64384df60ecb530e Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:32 +0100
+Subject: x86/cpufeatures: Make CPU bugs sticky
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 6cbd2171e89b13377261d15e64384df60ecb530e upstream.
+
+There is currently no way to force CPU bug bits like CPU feature bits. That
+makes it impossible to set a bug bit once at boot and have it stick for all
+upcoming CPUs.
+
+Extend the force set/clear arrays to handle bug bits as well.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeature.h |    2 ++
+ arch/x86/include/asm/processor.h  |    4 ++--
+ arch/x86/kernel/cpu/common.c      |    6 +++---
+ 3 files changed, 7 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo
+       set_bit(bit, (unsigned long *)cpu_caps_set);    \
+ } while (0)
++#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
++
+ #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
+ /*
+  * Static testing of CPU features.  Used the same as boot_cpu_has().
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -163,8 +163,8 @@ extern struct cpuinfo_x86  boot_cpu_data;
+ extern struct cpuinfo_x86     new_cpu_data;
+ extern struct x86_hw_tss      doublefault_tss;
+-extern __u32                  cpu_caps_cleared[NCAPINTS];
+-extern __u32                  cpu_caps_set[NCAPINTS];
++extern __u32                  cpu_caps_cleared[NCAPINTS + NBUGINTS];
++extern __u32                  cpu_caps_set[NCAPINTS + NBUGINTS];
+ #ifdef CONFIG_SMP
+ DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -452,8 +452,8 @@ static const char *table_lookup_model(st
+       return NULL;            /* Not found */
+ }
+-__u32 cpu_caps_cleared[NCAPINTS];
+-__u32 cpu_caps_set[NCAPINTS];
++__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
++__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
+ void load_percpu_segment(int cpu)
+ {
+@@ -812,7 +812,7 @@ static void apply_forced_caps(struct cpu
+ {
+       int i;
+-      for (i = 0; i < NCAPINTS; i++) {
++      for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
+               c->x86_capability[i] &= ~cpu_caps_cleared[i];
+               c->x86_capability[i] |= cpu_caps_set[i];
+       }
diff --git a/queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch b/queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch
new file mode 100644 (file)
index 0000000..e6da543
--- /dev/null
@@ -0,0 +1,618 @@
+From acbc845ffefd9fb70466182cd8555a26189462b2 Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@kernel.org>
+Date: Tue, 31 Oct 2017 13:17:22 +0100
+Subject: x86/cpufeatures: Re-tabulate the X86_FEATURE definitions
+
+From: Ingo Molnar <mingo@kernel.org>
+
+commit acbc845ffefd9fb70466182cd8555a26189462b2 upstream.
+
+Over the years asm/cpufeatures.h has become somewhat of a mess: the original
+tabulation style was too narrow, while x86 feature names also kept growing
+in length, creating frequent field width overflows.
+
+Re-tabulate it to make it wider and easier to read/modify. Also harmonize
+the tabulation of the other defines in this file to match it.
+
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171031121723.28524-3-mingo@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeatures.h |  512 ++++++++++++++++++-------------------
+ 1 file changed, 256 insertions(+), 256 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -13,8 +13,8 @@
+ /*
+  * Defines x86 CPU feature bits
+  */
+-#define NCAPINTS      18      /* N 32-bit words worth of info */
+-#define NBUGINTS      1       /* N 32-bit bug flags */
++#define NCAPINTS                      18         /* N 32-bit words worth of info */
++#define NBUGINTS                      1          /* N 32-bit bug flags */
+ /*
+  * Note: If the comment begins with a quoted string, that string is used
+@@ -28,163 +28,163 @@
+  */
+ /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+-#define X86_FEATURE_FPU               ( 0*32+ 0) /* Onboard FPU */
+-#define X86_FEATURE_VME               ( 0*32+ 1) /* Virtual Mode Extensions */
+-#define X86_FEATURE_DE                ( 0*32+ 2) /* Debugging Extensions */
+-#define X86_FEATURE_PSE               ( 0*32+ 3) /* Page Size Extensions */
+-#define X86_FEATURE_TSC               ( 0*32+ 4) /* Time Stamp Counter */
+-#define X86_FEATURE_MSR               ( 0*32+ 5) /* Model-Specific Registers */
+-#define X86_FEATURE_PAE               ( 0*32+ 6) /* Physical Address Extensions */
+-#define X86_FEATURE_MCE               ( 0*32+ 7) /* Machine Check Exception */
+-#define X86_FEATURE_CX8               ( 0*32+ 8) /* CMPXCHG8 instruction */
+-#define X86_FEATURE_APIC      ( 0*32+ 9) /* Onboard APIC */
+-#define X86_FEATURE_SEP               ( 0*32+11) /* SYSENTER/SYSEXIT */
+-#define X86_FEATURE_MTRR      ( 0*32+12) /* Memory Type Range Registers */
+-#define X86_FEATURE_PGE               ( 0*32+13) /* Page Global Enable */
+-#define X86_FEATURE_MCA               ( 0*32+14) /* Machine Check Architecture */
+-#define X86_FEATURE_CMOV      ( 0*32+15) /* CMOV instructions */
++#define X86_FEATURE_FPU                       ( 0*32+ 0) /* Onboard FPU */
++#define X86_FEATURE_VME                       ( 0*32+ 1) /* Virtual Mode Extensions */
++#define X86_FEATURE_DE                        ( 0*32+ 2) /* Debugging Extensions */
++#define X86_FEATURE_PSE                       ( 0*32+ 3) /* Page Size Extensions */
++#define X86_FEATURE_TSC                       ( 0*32+ 4) /* Time Stamp Counter */
++#define X86_FEATURE_MSR                       ( 0*32+ 5) /* Model-Specific Registers */
++#define X86_FEATURE_PAE                       ( 0*32+ 6) /* Physical Address Extensions */
++#define X86_FEATURE_MCE                       ( 0*32+ 7) /* Machine Check Exception */
++#define X86_FEATURE_CX8                       ( 0*32+ 8) /* CMPXCHG8 instruction */
++#define X86_FEATURE_APIC              ( 0*32+ 9) /* Onboard APIC */
++#define X86_FEATURE_SEP                       ( 0*32+11) /* SYSENTER/SYSEXIT */
++#define X86_FEATURE_MTRR              ( 0*32+12) /* Memory Type Range Registers */
++#define X86_FEATURE_PGE                       ( 0*32+13) /* Page Global Enable */
++#define X86_FEATURE_MCA                       ( 0*32+14) /* Machine Check Architecture */
++#define X86_FEATURE_CMOV              ( 0*32+15) /* CMOV instructions */
+                                         /* (plus FCMOVcc, FCOMI with FPU) */
+-#define X86_FEATURE_PAT               ( 0*32+16) /* Page Attribute Table */
+-#define X86_FEATURE_PSE36     ( 0*32+17) /* 36-bit PSEs */
+-#define X86_FEATURE_PN                ( 0*32+18) /* Processor serial number */
+-#define X86_FEATURE_CLFLUSH   ( 0*32+19) /* CLFLUSH instruction */
+-#define X86_FEATURE_DS                ( 0*32+21) /* "dts" Debug Store */
+-#define X86_FEATURE_ACPI      ( 0*32+22) /* ACPI via MSR */
+-#define X86_FEATURE_MMX               ( 0*32+23) /* Multimedia Extensions */
+-#define X86_FEATURE_FXSR      ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+-#define X86_FEATURE_XMM               ( 0*32+25) /* "sse" */
+-#define X86_FEATURE_XMM2      ( 0*32+26) /* "sse2" */
+-#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
+-#define X86_FEATURE_HT                ( 0*32+28) /* Hyper-Threading */
+-#define X86_FEATURE_ACC               ( 0*32+29) /* "tm" Automatic clock control */
+-#define X86_FEATURE_IA64      ( 0*32+30) /* IA-64 processor */
+-#define X86_FEATURE_PBE               ( 0*32+31) /* Pending Break Enable */
++#define X86_FEATURE_PAT                       ( 0*32+16) /* Page Attribute Table */
++#define X86_FEATURE_PSE36             ( 0*32+17) /* 36-bit PSEs */
++#define X86_FEATURE_PN                        ( 0*32+18) /* Processor serial number */
++#define X86_FEATURE_CLFLUSH           ( 0*32+19) /* CLFLUSH instruction */
++#define X86_FEATURE_DS                        ( 0*32+21) /* "dts" Debug Store */
++#define X86_FEATURE_ACPI              ( 0*32+22) /* ACPI via MSR */
++#define X86_FEATURE_MMX                       ( 0*32+23) /* Multimedia Extensions */
++#define X86_FEATURE_FXSR              ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
++#define X86_FEATURE_XMM                       ( 0*32+25) /* "sse" */
++#define X86_FEATURE_XMM2              ( 0*32+26) /* "sse2" */
++#define X86_FEATURE_SELFSNOOP         ( 0*32+27) /* "ss" CPU self snoop */
++#define X86_FEATURE_HT                        ( 0*32+28) /* Hyper-Threading */
++#define X86_FEATURE_ACC                       ( 0*32+29) /* "tm" Automatic clock control */
++#define X86_FEATURE_IA64              ( 0*32+30) /* IA-64 processor */
++#define X86_FEATURE_PBE                       ( 0*32+31) /* Pending Break Enable */
+ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+ /* Don't duplicate feature flags which are redundant with Intel! */
+-#define X86_FEATURE_SYSCALL   ( 1*32+11) /* SYSCALL/SYSRET */
+-#define X86_FEATURE_MP                ( 1*32+19) /* MP Capable. */
+-#define X86_FEATURE_NX                ( 1*32+20) /* Execute Disable */
+-#define X86_FEATURE_MMXEXT    ( 1*32+22) /* AMD MMX extensions */
+-#define X86_FEATURE_FXSR_OPT  ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+-#define X86_FEATURE_GBPAGES   ( 1*32+26) /* "pdpe1gb" GB pages */
+-#define X86_FEATURE_RDTSCP    ( 1*32+27) /* RDTSCP */
+-#define X86_FEATURE_LM                ( 1*32+29) /* Long Mode (x86-64) */
+-#define X86_FEATURE_3DNOWEXT  ( 1*32+30) /* AMD 3DNow! extensions */
+-#define X86_FEATURE_3DNOW     ( 1*32+31) /* 3DNow! */
++#define X86_FEATURE_SYSCALL           ( 1*32+11) /* SYSCALL/SYSRET */
++#define X86_FEATURE_MP                        ( 1*32+19) /* MP Capable. */
++#define X86_FEATURE_NX                        ( 1*32+20) /* Execute Disable */
++#define X86_FEATURE_MMXEXT            ( 1*32+22) /* AMD MMX extensions */
++#define X86_FEATURE_FXSR_OPT          ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
++#define X86_FEATURE_GBPAGES           ( 1*32+26) /* "pdpe1gb" GB pages */
++#define X86_FEATURE_RDTSCP            ( 1*32+27) /* RDTSCP */
++#define X86_FEATURE_LM                        ( 1*32+29) /* Long Mode (x86-64) */
++#define X86_FEATURE_3DNOWEXT          ( 1*32+30) /* AMD 3DNow! extensions */
++#define X86_FEATURE_3DNOW             ( 1*32+31) /* 3DNow! */
+ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+-#define X86_FEATURE_RECOVERY  ( 2*32+ 0) /* CPU in recovery mode */
+-#define X86_FEATURE_LONGRUN   ( 2*32+ 1) /* Longrun power control */
+-#define X86_FEATURE_LRTI      ( 2*32+ 3) /* LongRun table interface */
++#define X86_FEATURE_RECOVERY          ( 2*32+ 0) /* CPU in recovery mode */
++#define X86_FEATURE_LONGRUN           ( 2*32+ 1) /* Longrun power control */
++#define X86_FEATURE_LRTI              ( 2*32+ 3) /* LongRun table interface */
+ /* Other features, Linux-defined mapping, word 3 */
+ /* This range is used for feature bits which conflict or are synthesized */
+-#define X86_FEATURE_CXMMX     ( 3*32+ 0) /* Cyrix MMX extensions */
+-#define X86_FEATURE_K6_MTRR   ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+-#define X86_FEATURE_CENTAUR_MCR       ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
++#define X86_FEATURE_CXMMX             ( 3*32+ 0) /* Cyrix MMX extensions */
++#define X86_FEATURE_K6_MTRR           ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
++#define X86_FEATURE_CYRIX_ARR         ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
++#define X86_FEATURE_CENTAUR_MCR               ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+ /* cpu types for specific tunings: */
+-#define X86_FEATURE_K8                ( 3*32+ 4) /* "" Opteron, Athlon64 */
+-#define X86_FEATURE_K7                ( 3*32+ 5) /* "" Athlon */
+-#define X86_FEATURE_P3                ( 3*32+ 6) /* "" P3 */
+-#define X86_FEATURE_P4                ( 3*32+ 7) /* "" P4 */
+-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+-#define X86_FEATURE_UP                ( 3*32+ 9) /* smp kernel running on up */
+-#define X86_FEATURE_ART               ( 3*32+10) /* Platform has always running timer (ART) */
+-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+-#define X86_FEATURE_PEBS      ( 3*32+12) /* Precise-Event Based Sampling */
+-#define X86_FEATURE_BTS               ( 3*32+13) /* Branch Trace Store */
+-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
+-#define X86_FEATURE_SYSENTER32        ( 3*32+15) /* "" sysenter in ia32 userspace */
+-#define X86_FEATURE_REP_GOOD  ( 3*32+16) /* rep microcode works well */
+-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+-#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
+-#define X86_FEATURE_NOPL      ( 3*32+20) /* The NOPL (0F 1F) instructions */
+-#define X86_FEATURE_ALWAYS    ( 3*32+21) /* "" Always-present feature */
+-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
+-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+-#define X86_FEATURE_NONSTOP_TSC       ( 3*32+24) /* TSC does not stop in C states */
+-#define X86_FEATURE_CPUID     ( 3*32+25) /* CPU has CPUID instruction itself */
+-#define X86_FEATURE_EXTD_APICID       ( 3*32+26) /* has extended APICID (8 bits) */
+-#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
+-#define X86_FEATURE_APERFMPERF        ( 3*32+28) /* APERFMPERF */
+-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
++#define X86_FEATURE_K8                        ( 3*32+ 4) /* "" Opteron, Athlon64 */
++#define X86_FEATURE_K7                        ( 3*32+ 5) /* "" Athlon */
++#define X86_FEATURE_P3                        ( 3*32+ 6) /* "" P3 */
++#define X86_FEATURE_P4                        ( 3*32+ 7) /* "" P4 */
++#define X86_FEATURE_CONSTANT_TSC      ( 3*32+ 8) /* TSC ticks at a constant rate */
++#define X86_FEATURE_UP                        ( 3*32+ 9) /* smp kernel running on up */
++#define X86_FEATURE_ART                       ( 3*32+10) /* Platform has always running timer (ART) */
++#define X86_FEATURE_ARCH_PERFMON      ( 3*32+11) /* Intel Architectural PerfMon */
++#define X86_FEATURE_PEBS              ( 3*32+12) /* Precise-Event Based Sampling */
++#define X86_FEATURE_BTS                       ( 3*32+13) /* Branch Trace Store */
++#define X86_FEATURE_SYSCALL32         ( 3*32+14) /* "" syscall in ia32 userspace */
++#define X86_FEATURE_SYSENTER32                ( 3*32+15) /* "" sysenter in ia32 userspace */
++#define X86_FEATURE_REP_GOOD          ( 3*32+16) /* rep microcode works well */
++#define X86_FEATURE_MFENCE_RDTSC      ( 3*32+17) /* "" Mfence synchronizes RDTSC */
++#define X86_FEATURE_LFENCE_RDTSC      ( 3*32+18) /* "" Lfence synchronizes RDTSC */
++#define X86_FEATURE_ACC_POWER         ( 3*32+19) /* AMD Accumulated Power Mechanism */
++#define X86_FEATURE_NOPL              ( 3*32+20) /* The NOPL (0F 1F) instructions */
++#define X86_FEATURE_ALWAYS            ( 3*32+21) /* "" Always-present feature */
++#define X86_FEATURE_XTOPOLOGY         ( 3*32+22) /* cpu topology enum extensions */
++#define X86_FEATURE_TSC_RELIABLE      ( 3*32+23) /* TSC is known to be reliable */
++#define X86_FEATURE_NONSTOP_TSC               ( 3*32+24) /* TSC does not stop in C states */
++#define X86_FEATURE_CPUID             ( 3*32+25) /* CPU has CPUID instruction itself */
++#define X86_FEATURE_EXTD_APICID               ( 3*32+26) /* has extended APICID (8 bits) */
++#define X86_FEATURE_AMD_DCM           ( 3*32+27) /* multi-node processor */
++#define X86_FEATURE_APERFMPERF                ( 3*32+28) /* APERFMPERF */
++#define X86_FEATURE_NONSTOP_TSC_S3    ( 3*32+30) /* TSC doesn't stop in S3 state */
++#define X86_FEATURE_TSC_KNOWN_FREQ    ( 3*32+31) /* TSC has known frequency */
+ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+-#define X86_FEATURE_XMM3      ( 4*32+ 0) /* "pni" SSE-3 */
+-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
+-#define X86_FEATURE_DTES64    ( 4*32+ 2) /* 64-bit Debug Store */
+-#define X86_FEATURE_MWAIT     ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+-#define X86_FEATURE_DSCPL     ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+-#define X86_FEATURE_VMX               ( 4*32+ 5) /* Hardware virtualization */
+-#define X86_FEATURE_SMX               ( 4*32+ 6) /* Safer mode */
+-#define X86_FEATURE_EST               ( 4*32+ 7) /* Enhanced SpeedStep */
+-#define X86_FEATURE_TM2               ( 4*32+ 8) /* Thermal Monitor 2 */
+-#define X86_FEATURE_SSSE3     ( 4*32+ 9) /* Supplemental SSE-3 */
+-#define X86_FEATURE_CID               ( 4*32+10) /* Context ID */
+-#define X86_FEATURE_SDBG      ( 4*32+11) /* Silicon Debug */
+-#define X86_FEATURE_FMA               ( 4*32+12) /* Fused multiply-add */
+-#define X86_FEATURE_CX16      ( 4*32+13) /* CMPXCHG16B */
+-#define X86_FEATURE_XTPR      ( 4*32+14) /* Send Task Priority Messages */
+-#define X86_FEATURE_PDCM      ( 4*32+15) /* Performance Capabilities */
+-#define X86_FEATURE_PCID      ( 4*32+17) /* Process Context Identifiers */
+-#define X86_FEATURE_DCA               ( 4*32+18) /* Direct Cache Access */
+-#define X86_FEATURE_XMM4_1    ( 4*32+19) /* "sse4_1" SSE-4.1 */
+-#define X86_FEATURE_XMM4_2    ( 4*32+20) /* "sse4_2" SSE-4.2 */
+-#define X86_FEATURE_X2APIC    ( 4*32+21) /* x2APIC */
+-#define X86_FEATURE_MOVBE     ( 4*32+22) /* MOVBE instruction */
+-#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
++#define X86_FEATURE_XMM3              ( 4*32+ 0) /* "pni" SSE-3 */
++#define X86_FEATURE_PCLMULQDQ         ( 4*32+ 1) /* PCLMULQDQ instruction */
++#define X86_FEATURE_DTES64            ( 4*32+ 2) /* 64-bit Debug Store */
++#define X86_FEATURE_MWAIT             ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
++#define X86_FEATURE_DSCPL             ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
++#define X86_FEATURE_VMX                       ( 4*32+ 5) /* Hardware virtualization */
++#define X86_FEATURE_SMX                       ( 4*32+ 6) /* Safer mode */
++#define X86_FEATURE_EST                       ( 4*32+ 7) /* Enhanced SpeedStep */
++#define X86_FEATURE_TM2                       ( 4*32+ 8) /* Thermal Monitor 2 */
++#define X86_FEATURE_SSSE3             ( 4*32+ 9) /* Supplemental SSE-3 */
++#define X86_FEATURE_CID                       ( 4*32+10) /* Context ID */
++#define X86_FEATURE_SDBG              ( 4*32+11) /* Silicon Debug */
++#define X86_FEATURE_FMA                       ( 4*32+12) /* Fused multiply-add */
++#define X86_FEATURE_CX16              ( 4*32+13) /* CMPXCHG16B */
++#define X86_FEATURE_XTPR              ( 4*32+14) /* Send Task Priority Messages */
++#define X86_FEATURE_PDCM              ( 4*32+15) /* Performance Capabilities */
++#define X86_FEATURE_PCID              ( 4*32+17) /* Process Context Identifiers */
++#define X86_FEATURE_DCA                       ( 4*32+18) /* Direct Cache Access */
++#define X86_FEATURE_XMM4_1            ( 4*32+19) /* "sse4_1" SSE-4.1 */
++#define X86_FEATURE_XMM4_2            ( 4*32+20) /* "sse4_2" SSE-4.2 */
++#define X86_FEATURE_X2APIC            ( 4*32+21) /* x2APIC */
++#define X86_FEATURE_MOVBE             ( 4*32+22) /* MOVBE instruction */
++#define X86_FEATURE_POPCNT            ( 4*32+23) /* POPCNT instruction */
+ #define X86_FEATURE_TSC_DEADLINE_TIMER        ( 4*32+24) /* Tsc deadline timer */
+-#define X86_FEATURE_AES               ( 4*32+25) /* AES instructions */
+-#define X86_FEATURE_XSAVE     ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+-#define X86_FEATURE_OSXSAVE   ( 4*32+27) /* "" XSAVE enabled in the OS */
+-#define X86_FEATURE_AVX               ( 4*32+28) /* Advanced Vector Extensions */
+-#define X86_FEATURE_F16C      ( 4*32+29) /* 16-bit fp conversions */
+-#define X86_FEATURE_RDRAND    ( 4*32+30) /* The RDRAND instruction */
+-#define X86_FEATURE_HYPERVISOR        ( 4*32+31) /* Running on a hypervisor */
++#define X86_FEATURE_AES                       ( 4*32+25) /* AES instructions */
++#define X86_FEATURE_XSAVE             ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
++#define X86_FEATURE_OSXSAVE           ( 4*32+27) /* "" XSAVE enabled in the OS */
++#define X86_FEATURE_AVX                       ( 4*32+28) /* Advanced Vector Extensions */
++#define X86_FEATURE_F16C              ( 4*32+29) /* 16-bit fp conversions */
++#define X86_FEATURE_RDRAND            ( 4*32+30) /* The RDRAND instruction */
++#define X86_FEATURE_HYPERVISOR                ( 4*32+31) /* Running on a hypervisor */
+ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+-#define X86_FEATURE_XSTORE    ( 5*32+ 2) /* "rng" RNG present (xstore) */
+-#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
+-#define X86_FEATURE_XCRYPT    ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+-#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+-#define X86_FEATURE_ACE2      ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+-#define X86_FEATURE_ACE2_EN   ( 5*32+ 9) /* ACE v2 enabled */
+-#define X86_FEATURE_PHE               ( 5*32+10) /* PadLock Hash Engine */
+-#define X86_FEATURE_PHE_EN    ( 5*32+11) /* PHE enabled */
+-#define X86_FEATURE_PMM               ( 5*32+12) /* PadLock Montgomery Multiplier */
+-#define X86_FEATURE_PMM_EN    ( 5*32+13) /* PMM enabled */
++#define X86_FEATURE_XSTORE            ( 5*32+ 2) /* "rng" RNG present (xstore) */
++#define X86_FEATURE_XSTORE_EN         ( 5*32+ 3) /* "rng_en" RNG enabled */
++#define X86_FEATURE_XCRYPT            ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
++#define X86_FEATURE_XCRYPT_EN         ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
++#define X86_FEATURE_ACE2              ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
++#define X86_FEATURE_ACE2_EN           ( 5*32+ 9) /* ACE v2 enabled */
++#define X86_FEATURE_PHE                       ( 5*32+10) /* PadLock Hash Engine */
++#define X86_FEATURE_PHE_EN            ( 5*32+11) /* PHE enabled */
++#define X86_FEATURE_PMM                       ( 5*32+12) /* PadLock Montgomery Multiplier */
++#define X86_FEATURE_PMM_EN            ( 5*32+13) /* PMM enabled */
+ /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+-#define X86_FEATURE_LAHF_LM   ( 6*32+ 0) /* LAHF/SAHF in long mode */
+-#define X86_FEATURE_CMP_LEGACY        ( 6*32+ 1) /* If yes HyperThreading not valid */
+-#define X86_FEATURE_SVM               ( 6*32+ 2) /* Secure virtual machine */
+-#define X86_FEATURE_EXTAPIC   ( 6*32+ 3) /* Extended APIC space */
+-#define X86_FEATURE_CR8_LEGACY        ( 6*32+ 4) /* CR8 in 32-bit mode */
+-#define X86_FEATURE_ABM               ( 6*32+ 5) /* Advanced bit manipulation */
+-#define X86_FEATURE_SSE4A     ( 6*32+ 6) /* SSE-4A */
+-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+-#define X86_FEATURE_OSVW      ( 6*32+ 9) /* OS Visible Workaround */
+-#define X86_FEATURE_IBS               ( 6*32+10) /* Instruction Based Sampling */
+-#define X86_FEATURE_XOP               ( 6*32+11) /* extended AVX instructions */
+-#define X86_FEATURE_SKINIT    ( 6*32+12) /* SKINIT/STGI instructions */
+-#define X86_FEATURE_WDT               ( 6*32+13) /* Watchdog timer */
+-#define X86_FEATURE_LWP               ( 6*32+15) /* Light Weight Profiling */
+-#define X86_FEATURE_FMA4      ( 6*32+16) /* 4 operands MAC instructions */
+-#define X86_FEATURE_TCE               ( 6*32+17) /* translation cache extension */
+-#define X86_FEATURE_NODEID_MSR        ( 6*32+19) /* NodeId MSR */
+-#define X86_FEATURE_TBM               ( 6*32+21) /* trailing bit manipulations */
+-#define X86_FEATURE_TOPOEXT   ( 6*32+22) /* topology extensions CPUID leafs */
+-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
+-#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
+-#define X86_FEATURE_BPEXT     (6*32+26) /* data breakpoint extension */
+-#define X86_FEATURE_PTSC      ( 6*32+27) /* performance time-stamp counter */
+-#define X86_FEATURE_PERFCTR_LLC       ( 6*32+28) /* Last Level Cache performance counter extensions */
+-#define X86_FEATURE_MWAITX    ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
++#define X86_FEATURE_LAHF_LM           ( 6*32+ 0) /* LAHF/SAHF in long mode */
++#define X86_FEATURE_CMP_LEGACY                ( 6*32+ 1) /* If yes HyperThreading not valid */
++#define X86_FEATURE_SVM                       ( 6*32+ 2) /* Secure virtual machine */
++#define X86_FEATURE_EXTAPIC           ( 6*32+ 3) /* Extended APIC space */
++#define X86_FEATURE_CR8_LEGACY                ( 6*32+ 4) /* CR8 in 32-bit mode */
++#define X86_FEATURE_ABM                       ( 6*32+ 5) /* Advanced bit manipulation */
++#define X86_FEATURE_SSE4A             ( 6*32+ 6) /* SSE-4A */
++#define X86_FEATURE_MISALIGNSSE               ( 6*32+ 7) /* Misaligned SSE mode */
++#define X86_FEATURE_3DNOWPREFETCH     ( 6*32+ 8) /* 3DNow prefetch instructions */
++#define X86_FEATURE_OSVW              ( 6*32+ 9) /* OS Visible Workaround */
++#define X86_FEATURE_IBS                       ( 6*32+10) /* Instruction Based Sampling */
++#define X86_FEATURE_XOP                       ( 6*32+11) /* extended AVX instructions */
++#define X86_FEATURE_SKINIT            ( 6*32+12) /* SKINIT/STGI instructions */
++#define X86_FEATURE_WDT                       ( 6*32+13) /* Watchdog timer */
++#define X86_FEATURE_LWP                       ( 6*32+15) /* Light Weight Profiling */
++#define X86_FEATURE_FMA4              ( 6*32+16) /* 4 operands MAC instructions */
++#define X86_FEATURE_TCE                       ( 6*32+17) /* translation cache extension */
++#define X86_FEATURE_NODEID_MSR                ( 6*32+19) /* NodeId MSR */
++#define X86_FEATURE_TBM                       ( 6*32+21) /* trailing bit manipulations */
++#define X86_FEATURE_TOPOEXT           ( 6*32+22) /* topology extensions CPUID leafs */
++#define X86_FEATURE_PERFCTR_CORE      ( 6*32+23) /* core performance counter extensions */
++#define X86_FEATURE_PERFCTR_NB                ( 6*32+24) /* NB performance counter extensions */
++#define X86_FEATURE_BPEXT             (6*32+26) /* data breakpoint extension */
++#define X86_FEATURE_PTSC              ( 6*32+27) /* performance time-stamp counter */
++#define X86_FEATURE_PERFCTR_LLC               ( 6*32+28) /* Last Level Cache performance counter extensions */
++#define X86_FEATURE_MWAITX            ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+ /*
+  * Auxiliary flags: Linux defined - For features scattered in various
+@@ -192,152 +192,152 @@
+  *
+  * Reuse free bits when adding new feature flags!
+  */
+-#define X86_FEATURE_RING3MWAIT        ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
+-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
+-#define X86_FEATURE_CPB               ( 7*32+ 2) /* AMD Core Performance Boost */
+-#define X86_FEATURE_EPB               ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+-#define X86_FEATURE_CAT_L3    ( 7*32+ 4) /* Cache Allocation Technology L3 */
+-#define X86_FEATURE_CAT_L2    ( 7*32+ 5) /* Cache Allocation Technology L2 */
+-#define X86_FEATURE_CDP_L3    ( 7*32+ 6) /* Code and Data Prioritization L3 */
+-
+-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+-#define X86_FEATURE_SME               ( 7*32+10) /* AMD Secure Memory Encryption */
+-
+-#define X86_FEATURE_INTEL_PPIN        ( 7*32+14) /* Intel Processor Inventory Number */
+-#define X86_FEATURE_INTEL_PT  ( 7*32+15) /* Intel Processor Trace */
+-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
++#define X86_FEATURE_RING3MWAIT                ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
++#define X86_FEATURE_CPUID_FAULT               ( 7*32+ 1) /* Intel CPUID faulting */
++#define X86_FEATURE_CPB                       ( 7*32+ 2) /* AMD Core Performance Boost */
++#define X86_FEATURE_EPB                       ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
++#define X86_FEATURE_CAT_L3            ( 7*32+ 4) /* Cache Allocation Technology L3 */
++#define X86_FEATURE_CAT_L2            ( 7*32+ 5) /* Cache Allocation Technology L2 */
++#define X86_FEATURE_CDP_L3            ( 7*32+ 6) /* Code and Data Prioritization L3 */
++
++#define X86_FEATURE_HW_PSTATE         ( 7*32+ 8) /* AMD HW-PState */
++#define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
++#define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
++
++#define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
++#define X86_FEATURE_INTEL_PT          ( 7*32+15) /* Intel Processor Trace */
++#define X86_FEATURE_AVX512_4VNNIW     (7*32+16) /* AVX-512 Neural Network Instructions */
++#define X86_FEATURE_AVX512_4FMAPS     (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+-#define X86_FEATURE_MBA         ( 7*32+18) /* Memory Bandwidth Allocation */
++#define X86_FEATURE_MBA                       ( 7*32+18) /* Memory Bandwidth Allocation */
+ /* Virtualization flags: Linux defined, word 8 */
+-#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+-#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
+-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+-#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
+-#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
++#define X86_FEATURE_TPR_SHADOW                ( 8*32+ 0) /* Intel TPR Shadow */
++#define X86_FEATURE_VNMI              ( 8*32+ 1) /* Intel Virtual NMI */
++#define X86_FEATURE_FLEXPRIORITY      ( 8*32+ 2) /* Intel FlexPriority */
++#define X86_FEATURE_EPT                       ( 8*32+ 3) /* Intel Extended Page Table */
++#define X86_FEATURE_VPID              ( 8*32+ 4) /* Intel Virtual Processor ID */
+-#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
+-#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
++#define X86_FEATURE_VMMCALL           ( 8*32+15) /* Prefer vmmcall to vmcall */
++#define X86_FEATURE_XENPV             ( 8*32+16) /* "" Xen paravirtual guest */
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+-#define X86_FEATURE_FSGSBASE  ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+-#define X86_FEATURE_TSC_ADJUST        ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
+-#define X86_FEATURE_BMI1      ( 9*32+ 3) /* 1st group bit manipulation extensions */
+-#define X86_FEATURE_HLE               ( 9*32+ 4) /* Hardware Lock Elision */
+-#define X86_FEATURE_AVX2      ( 9*32+ 5) /* AVX2 instructions */
+-#define X86_FEATURE_SMEP      ( 9*32+ 7) /* Supervisor Mode Execution Protection */
+-#define X86_FEATURE_BMI2      ( 9*32+ 8) /* 2nd group bit manipulation extensions */
+-#define X86_FEATURE_ERMS      ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+-#define X86_FEATURE_INVPCID   ( 9*32+10) /* Invalidate Processor Context ID */
+-#define X86_FEATURE_RTM               ( 9*32+11) /* Restricted Transactional Memory */
+-#define X86_FEATURE_CQM               ( 9*32+12) /* Cache QoS Monitoring */
+-#define X86_FEATURE_MPX               ( 9*32+14) /* Memory Protection Extension */
+-#define X86_FEATURE_RDT_A     ( 9*32+15) /* Resource Director Technology Allocation */
+-#define X86_FEATURE_AVX512F   ( 9*32+16) /* AVX-512 Foundation */
+-#define X86_FEATURE_AVX512DQ  ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+-#define X86_FEATURE_RDSEED    ( 9*32+18) /* The RDSEED instruction */
+-#define X86_FEATURE_ADX               ( 9*32+19) /* The ADCX and ADOX instructions */
+-#define X86_FEATURE_SMAP      ( 9*32+20) /* Supervisor Mode Access Prevention */
+-#define X86_FEATURE_AVX512IFMA  ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
+-#define X86_FEATURE_CLFLUSHOPT        ( 9*32+23) /* CLFLUSHOPT instruction */
+-#define X86_FEATURE_CLWB      ( 9*32+24) /* CLWB instruction */
+-#define X86_FEATURE_AVX512PF  ( 9*32+26) /* AVX-512 Prefetch */
+-#define X86_FEATURE_AVX512ER  ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+-#define X86_FEATURE_AVX512CD  ( 9*32+28) /* AVX-512 Conflict Detection */
+-#define X86_FEATURE_SHA_NI    ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+-#define X86_FEATURE_AVX512BW  ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+-#define X86_FEATURE_AVX512VL  ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
++#define X86_FEATURE_FSGSBASE          ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
++#define X86_FEATURE_TSC_ADJUST                ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
++#define X86_FEATURE_BMI1              ( 9*32+ 3) /* 1st group bit manipulation extensions */
++#define X86_FEATURE_HLE                       ( 9*32+ 4) /* Hardware Lock Elision */
++#define X86_FEATURE_AVX2              ( 9*32+ 5) /* AVX2 instructions */
++#define X86_FEATURE_SMEP              ( 9*32+ 7) /* Supervisor Mode Execution Protection */
++#define X86_FEATURE_BMI2              ( 9*32+ 8) /* 2nd group bit manipulation extensions */
++#define X86_FEATURE_ERMS              ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
++#define X86_FEATURE_INVPCID           ( 9*32+10) /* Invalidate Processor Context ID */
++#define X86_FEATURE_RTM                       ( 9*32+11) /* Restricted Transactional Memory */
++#define X86_FEATURE_CQM                       ( 9*32+12) /* Cache QoS Monitoring */
++#define X86_FEATURE_MPX                       ( 9*32+14) /* Memory Protection Extension */
++#define X86_FEATURE_RDT_A             ( 9*32+15) /* Resource Director Technology Allocation */
++#define X86_FEATURE_AVX512F           ( 9*32+16) /* AVX-512 Foundation */
++#define X86_FEATURE_AVX512DQ          ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
++#define X86_FEATURE_RDSEED            ( 9*32+18) /* The RDSEED instruction */
++#define X86_FEATURE_ADX                       ( 9*32+19) /* The ADCX and ADOX instructions */
++#define X86_FEATURE_SMAP              ( 9*32+20) /* Supervisor Mode Access Prevention */
++#define X86_FEATURE_AVX512IFMA                ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
++#define X86_FEATURE_CLFLUSHOPT                ( 9*32+23) /* CLFLUSHOPT instruction */
++#define X86_FEATURE_CLWB              ( 9*32+24) /* CLWB instruction */
++#define X86_FEATURE_AVX512PF          ( 9*32+26) /* AVX-512 Prefetch */
++#define X86_FEATURE_AVX512ER          ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
++#define X86_FEATURE_AVX512CD          ( 9*32+28) /* AVX-512 Conflict Detection */
++#define X86_FEATURE_SHA_NI            ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
++#define X86_FEATURE_AVX512BW          ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
++#define X86_FEATURE_AVX512VL          ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+ /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+-#define X86_FEATURE_XSAVEOPT  (10*32+ 0) /* XSAVEOPT */
+-#define X86_FEATURE_XSAVEC    (10*32+ 1) /* XSAVEC */
+-#define X86_FEATURE_XGETBV1   (10*32+ 2) /* XGETBV with ECX = 1 */
+-#define X86_FEATURE_XSAVES    (10*32+ 3) /* XSAVES/XRSTORS */
++#define X86_FEATURE_XSAVEOPT          (10*32+ 0) /* XSAVEOPT */
++#define X86_FEATURE_XSAVEC            (10*32+ 1) /* XSAVEC */
++#define X86_FEATURE_XGETBV1           (10*32+ 2) /* XGETBV with ECX = 1 */
++#define X86_FEATURE_XSAVES            (10*32+ 3) /* XSAVES/XRSTORS */
+ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+-#define X86_FEATURE_CQM_LLC   (11*32+ 1) /* LLC QoS if 1 */
++#define X86_FEATURE_CQM_LLC           (11*32+ 1) /* LLC QoS if 1 */
+ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
+-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
++#define X86_FEATURE_CQM_OCCUP_LLC     (12*32+ 0) /* LLC occupancy monitoring if 1 */
++#define X86_FEATURE_CQM_MBM_TOTAL     (12*32+ 1) /* LLC Total MBM monitoring */
++#define X86_FEATURE_CQM_MBM_LOCAL     (12*32+ 2) /* LLC Local MBM monitoring */
+ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+-#define X86_FEATURE_CLZERO    (13*32+0) /* CLZERO instruction */
+-#define X86_FEATURE_IRPERF    (13*32+1) /* Instructions Retired Count */
++#define X86_FEATURE_CLZERO            (13*32+0) /* CLZERO instruction */
++#define X86_FEATURE_IRPERF            (13*32+1) /* Instructions Retired Count */
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+-#define X86_FEATURE_DTHERM    (14*32+ 0) /* Digital Thermal Sensor */
+-#define X86_FEATURE_IDA               (14*32+ 1) /* Intel Dynamic Acceleration */
+-#define X86_FEATURE_ARAT      (14*32+ 2) /* Always Running APIC Timer */
+-#define X86_FEATURE_PLN               (14*32+ 4) /* Intel Power Limit Notification */
+-#define X86_FEATURE_PTS               (14*32+ 6) /* Intel Package Thermal Status */
+-#define X86_FEATURE_HWP               (14*32+ 7) /* Intel Hardware P-states */
+-#define X86_FEATURE_HWP_NOTIFY        (14*32+ 8) /* HWP Notification */
+-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+-#define X86_FEATURE_HWP_EPP   (14*32+10) /* HWP Energy Perf. Preference */
+-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
++#define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
++#define X86_FEATURE_IDA                       (14*32+ 1) /* Intel Dynamic Acceleration */
++#define X86_FEATURE_ARAT              (14*32+ 2) /* Always Running APIC Timer */
++#define X86_FEATURE_PLN                       (14*32+ 4) /* Intel Power Limit Notification */
++#define X86_FEATURE_PTS                       (14*32+ 6) /* Intel Package Thermal Status */
++#define X86_FEATURE_HWP                       (14*32+ 7) /* Intel Hardware P-states */
++#define X86_FEATURE_HWP_NOTIFY                (14*32+ 8) /* HWP Notification */
++#define X86_FEATURE_HWP_ACT_WINDOW    (14*32+ 9) /* HWP Activity Window */
++#define X86_FEATURE_HWP_EPP           (14*32+10) /* HWP Energy Perf. Preference */
++#define X86_FEATURE_HWP_PKG_REQ               (14*32+11) /* HWP Package Level Request */
+ /* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+-#define X86_FEATURE_NPT               (15*32+ 0) /* Nested Page Table support */
+-#define X86_FEATURE_LBRV      (15*32+ 1) /* LBR Virtualization support */
+-#define X86_FEATURE_SVML      (15*32+ 2) /* "svm_lock" SVM locking MSR */
+-#define X86_FEATURE_NRIPS     (15*32+ 3) /* "nrip_save" SVM next_rip save */
+-#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
+-#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+-#define X86_FEATURE_AVIC      (15*32+13) /* Virtual Interrupt Controller */
+-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+-#define X86_FEATURE_VGIF      (15*32+16) /* Virtual GIF */
++#define X86_FEATURE_NPT                       (15*32+ 0) /* Nested Page Table support */
++#define X86_FEATURE_LBRV              (15*32+ 1) /* LBR Virtualization support */
++#define X86_FEATURE_SVML              (15*32+ 2) /* "svm_lock" SVM locking MSR */
++#define X86_FEATURE_NRIPS             (15*32+ 3) /* "nrip_save" SVM next_rip save */
++#define X86_FEATURE_TSCRATEMSR                (15*32+ 4) /* "tsc_scale" TSC scaling support */
++#define X86_FEATURE_VMCBCLEAN         (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
++#define X86_FEATURE_FLUSHBYASID               (15*32+ 6) /* flush-by-ASID support */
++#define X86_FEATURE_DECODEASSISTS     (15*32+ 7) /* Decode Assists support */
++#define X86_FEATURE_PAUSEFILTER               (15*32+10) /* filtered pause intercept */
++#define X86_FEATURE_PFTHRESHOLD               (15*32+12) /* pause filter threshold */
++#define X86_FEATURE_AVIC              (15*32+13) /* Virtual Interrupt Controller */
++#define X86_FEATURE_V_VMSAVE_VMLOAD   (15*32+15) /* Virtual VMSAVE VMLOAD */
++#define X86_FEATURE_VGIF              (15*32+16) /* Virtual GIF */
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
+-#define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
+-#define X86_FEATURE_PKU               (16*32+ 3) /* Protection Keys for Userspace */
+-#define X86_FEATURE_OSPKE     (16*32+ 4) /* OS Protection Keys Enable */
+-#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+-#define X86_FEATURE_GFNI      (16*32+ 8) /* Galois Field New Instructions */
+-#define X86_FEATURE_VAES      (16*32+ 9) /* Vector AES */
+-#define X86_FEATURE_VPCLMULQDQ        (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
+-#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
+-#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
+-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
+-#define X86_FEATURE_LA57      (16*32+16) /* 5-level page tables */
+-#define X86_FEATURE_RDPID     (16*32+22) /* RDPID instruction */
++#define X86_FEATURE_AVX512VBMI                (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
++#define X86_FEATURE_PKU                       (16*32+ 3) /* Protection Keys for Userspace */
++#define X86_FEATURE_OSPKE             (16*32+ 4) /* OS Protection Keys Enable */
++#define X86_FEATURE_AVX512_VBMI2      (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
++#define X86_FEATURE_GFNI              (16*32+ 8) /* Galois Field New Instructions */
++#define X86_FEATURE_VAES              (16*32+ 9) /* Vector AES */
++#define X86_FEATURE_VPCLMULQDQ                (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
++#define X86_FEATURE_AVX512_VNNI               (16*32+ 11) /* Vector Neural Network Instructions */
++#define X86_FEATURE_AVX512_BITALG     (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
++#define X86_FEATURE_AVX512_VPOPCNTDQ  (16*32+14) /* POPCNT for vectors of DW/QW */
++#define X86_FEATURE_LA57              (16*32+16) /* 5-level page tables */
++#define X86_FEATURE_RDPID             (16*32+22) /* RDPID instruction */
+ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
+-#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
+-#define X86_FEATURE_SUCCOR    (17*32+1) /* Uncorrectable error containment and recovery */
+-#define X86_FEATURE_SMCA      (17*32+3) /* Scalable MCA */
++#define X86_FEATURE_OVERFLOW_RECOV    (17*32+0) /* MCA overflow recovery support */
++#define X86_FEATURE_SUCCOR            (17*32+1) /* Uncorrectable error containment and recovery */
++#define X86_FEATURE_SMCA              (17*32+3) /* Scalable MCA */
+ /*
+  * BUG word(s)
+  */
+-#define X86_BUG(x)            (NCAPINTS*32 + (x))
++#define X86_BUG(x)                    (NCAPINTS*32 + (x))
+-#define X86_BUG_F00F          X86_BUG(0) /* Intel F00F */
+-#define X86_BUG_FDIV          X86_BUG(1) /* FPU FDIV */
+-#define X86_BUG_COMA          X86_BUG(2) /* Cyrix 6x86 coma */
+-#define X86_BUG_AMD_TLB_MMATCH        X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+-#define X86_BUG_AMD_APIC_C1E  X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+-#define X86_BUG_11AP          X86_BUG(5) /* Bad local APIC aka 11AP */
+-#define X86_BUG_FXSAVE_LEAK   X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+-#define X86_BUG_CLFLUSH_MONITOR       X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+-#define X86_BUG_SYSRET_SS_ATTRS       X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
++#define X86_BUG_F00F                  X86_BUG(0) /* Intel F00F */
++#define X86_BUG_FDIV                  X86_BUG(1) /* FPU FDIV */
++#define X86_BUG_COMA                  X86_BUG(2) /* Cyrix 6x86 coma */
++#define X86_BUG_AMD_TLB_MMATCH                X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
++#define X86_BUG_AMD_APIC_C1E          X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
++#define X86_BUG_11AP                  X86_BUG(5) /* Bad local APIC aka 11AP */
++#define X86_BUG_FXSAVE_LEAK           X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
++#define X86_BUG_CLFLUSH_MONITOR               X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
++#define X86_BUG_SYSRET_SS_ATTRS               X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+ #ifdef CONFIG_X86_32
+ /*
+  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
+  * to avoid confusion.
+  */
+-#define X86_BUG_ESPFIX                X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
++#define X86_BUG_ESPFIX                        X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+ #endif
+-#define X86_BUG_NULL_SEG      X86_BUG(10) /* Nulling a selector preserves the base */
+-#define X86_BUG_SWAPGS_FENCE  X86_BUG(11) /* SWAPGS without input dep on GS */
+-#define X86_BUG_MONITOR               X86_BUG(12) /* IPI required to wake up remote CPU */
+-#define X86_BUG_AMD_E400      X86_BUG(13) /* CPU is among the affected by Erratum 400 */
++#define X86_BUG_NULL_SEG              X86_BUG(10) /* Nulling a selector preserves the base */
++#define X86_BUG_SWAPGS_FENCE          X86_BUG(11) /* SWAPGS without input dep on GS */
++#define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
++#define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+ #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/queue-4.14/x86-cpuid-replace-set-clear_bit32.patch b/queue-4.14/x86-cpuid-replace-set-clear_bit32.patch
new file mode 100644 (file)
index 0000000..8e39fa5
--- /dev/null
@@ -0,0 +1,62 @@
+From 06dd688ddda5819025e014b79aea9af6ab475fa2 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 2 Nov 2017 13:22:35 +0100
+Subject: x86/cpuid: Replace set/clear_bit32()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 06dd688ddda5819025e014b79aea9af6ab475fa2 upstream.
+
+Peter pointed out that the set/clear_bit32() variants are broken in various
+aspects.
+
+Replace them with open coded set/clear_bit() and type cast
+cpu_info::x86_capability as it's done in all other places throughout x86.
+
+Fixes: 0b00de857a64 ("x86/cpuid: Add generic table for CPUID dependencies")
+Reported-by: Peter Ziljstra <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/cpuid-deps.c |   26 +++++++++++---------------
+ 1 file changed, 11 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kernel/cpu/cpuid-deps.c
++++ b/arch/x86/kernel/cpu/cpuid-deps.c
+@@ -62,23 +62,19 @@ const static struct cpuid_dep cpuid_deps
+       {}
+ };
+-static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
+-{
+-      clear_bit32(bit, c->x86_capability);
+-}
+-
+-static inline void __setup_clear_cpu_cap(unsigned int bit)
+-{
+-      clear_cpu_cap(&boot_cpu_data, bit);
+-      set_bit32(bit, cpu_caps_cleared);
+-}
+-
+ static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
+ {
+-      if (!c)
+-              __setup_clear_cpu_cap(feature);
+-      else
+-              __clear_cpu_cap(c, feature);
++      /*
++       * Note: This could use the non atomic __*_bit() variants, but the
++       * rest of the cpufeature code uses atomics as well, so keep it for
++       * consistency. Cleanup all of it separately.
++       */
++      if (!c) {
++              clear_cpu_cap(&boot_cpu_data, feature);
++              set_bit(feature, (unsigned long *)cpu_caps_cleared);
++      } else {
++              clear_bit(feature, (unsigned long *)c->x86_capability);
++      }
+ }
+ /* Take the capabilities and the BUG bits into account */
diff --git a/queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch b/queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch
new file mode 100644 (file)
index 0000000..ecbb43c
--- /dev/null
@@ -0,0 +1,169 @@
+From 33a2f1a6c4d7c0a02d1c006fb0379cc5ca3b96bb Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:13 +0100
+Subject: x86/dumpstack: Add get_stack_info() support for the SYSENTER stack
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 33a2f1a6c4d7c0a02d1c006fb0379cc5ca3b96bb upstream.
+
+get_stack_info() doesn't currently know about the SYSENTER stack, so
+unwinding will fail if we entered the kernel on the SYSENTER stack
+and haven't fully switched off.  Teach get_stack_info() about the
+SYSENTER stack.
+
+With future patches applied that run part of the entry code on the
+SYSENTER stack and introduce an intentional BUG(), I would get:
+
+  PANIC: double fault, error_code: 0x0
+  ...
+  RIP: 0010:do_error_trap+0x33/0x1c0
+  ...
+  Call Trace:
+  Code: ...
+
+With this patch, I get:
+
+  PANIC: double fault, error_code: 0x0
+  ...
+  Call Trace:
+   <SYSENTER>
+   ? async_page_fault+0x36/0x60
+   ? invalid_op+0x22/0x40
+   ? async_page_fault+0x36/0x60
+   ? sync_regs+0x3c/0x40
+   ? sync_regs+0x2e/0x40
+   ? error_entry+0x6c/0xd0
+   ? async_page_fault+0x36/0x60
+   </SYSENTER>
+  Code: ...
+
+which is a lot more informative.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.392711508@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
+index 8da111b3c342..f8062bfd43a0 100644
+--- a/arch/x86/include/asm/stacktrace.h
++++ b/arch/x86/include/asm/stacktrace.h
+@@ -16,6 +16,7 @@ enum stack_type {
+       STACK_TYPE_TASK,
+       STACK_TYPE_IRQ,
+       STACK_TYPE_SOFTIRQ,
++      STACK_TYPE_SYSENTER,
+       STACK_TYPE_EXCEPTION,
+       STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
+ };
+@@ -28,6 +29,8 @@ struct stack_info {
+ bool in_task_stack(unsigned long *stack, struct task_struct *task,
+                  struct stack_info *info);
++bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
++
+ int get_stack_info(unsigned long *stack, struct task_struct *task,
+                  struct stack_info *info, unsigned long *visit_mask);
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index 0bc95be5c638..a33a1373a252 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -43,6 +43,25 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
+       return true;
+ }
++bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
++{
++      struct tss_struct *tss = this_cpu_ptr(&cpu_tss);
++
++      /* Treat the canary as part of the stack for unwinding purposes. */
++      void *begin = &tss->SYSENTER_stack_canary;
++      void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack);
++
++      if ((void *)stack < begin || (void *)stack >= end)
++              return false;
++
++      info->type      = STACK_TYPE_SYSENTER;
++      info->begin     = begin;
++      info->end       = end;
++      info->next_sp   = NULL;
++
++      return true;
++}
++
+ static void printk_stack_address(unsigned long address, int reliable,
+                                char *log_lvl)
+ {
+diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
+index daefae83a3aa..5ff13a6b3680 100644
+--- a/arch/x86/kernel/dumpstack_32.c
++++ b/arch/x86/kernel/dumpstack_32.c
+@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
+       if (type == STACK_TYPE_SOFTIRQ)
+               return "SOFTIRQ";
++      if (type == STACK_TYPE_SYSENTER)
++              return "SYSENTER";
++
+       return NULL;
+ }
+@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
+       if (task != current)
+               goto unknown;
++      if (in_sysenter_stack(stack, info))
++              goto recursion_check;
++
+       if (in_hardirq_stack(stack, info))
+               goto recursion_check;
+diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
+index 88ce2ffdb110..abc828f8c297 100644
+--- a/arch/x86/kernel/dumpstack_64.c
++++ b/arch/x86/kernel/dumpstack_64.c
+@@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type)
+       if (type == STACK_TYPE_IRQ)
+               return "IRQ";
++      if (type == STACK_TYPE_SYSENTER)
++              return "SYSENTER";
++
+       if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
+               return exception_stack_names[type - STACK_TYPE_EXCEPTION];
+@@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
+       if (in_irq_stack(stack, info))
+               goto recursion_check;
++      if (in_sysenter_stack(stack, info))
++              goto recursion_check;
++
+       goto unknown;
+ recursion_check:
diff --git a/queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch b/queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
new file mode 100644 (file)
index 0000000..05e9c5f
--- /dev/null
@@ -0,0 +1,87 @@
+From 6e60e583426c2f8751c22c2dfe5c207083b4483a Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:18 +0100
+Subject: x86/dumpstack: Handle stack overflow on all stacks
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 6e60e583426c2f8751c22c2dfe5c207083b4483a upstream.
+
+We currently special-case stack overflow on the task stack.  We're
+going to start putting special stacks in the fixmap with a custom
+layout, so they'll have guard pages, too.  Teach the unwinder to be
+able to unwind an overflow of any of the stacks.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.802057305@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/dumpstack.c |   24 ++++++++++++++----------
+ 1 file changed, 14 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -112,24 +112,28 @@ void show_trace_log_lvl(struct task_stru
+        * - task stack
+        * - interrupt stack
+        * - HW exception stacks (double fault, nmi, debug, mce)
++       * - SYSENTER stack
+        *
+-       * x86-32 can have up to three stacks:
++       * x86-32 can have up to four stacks:
+        * - task stack
+        * - softirq stack
+        * - hardirq stack
++       * - SYSENTER stack
+        */
+       for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+               const char *stack_name;
+-              /*
+-               * If we overflowed the task stack into a guard page, jump back
+-               * to the bottom of the usable stack.
+-               */
+-              if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
+-                      stack = task_stack_page(task);
+-
+-              if (get_stack_info(stack, task, &stack_info, &visit_mask))
+-                      break;
++              if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
++                      /*
++                       * We weren't on a valid stack.  It's possible that
++                       * we overflowed a valid stack into a guard page.
++                       * See if the next page up is valid so that we can
++                       * generate some kind of backtrace if this happens.
++                       */
++                      stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
++                      if (get_stack_info(stack, task, &stack_info, &visit_mask))
++                              break;
++              }
+               stack_name = stack_type_name(stack_info.type);
+               if (stack_name)
diff --git a/queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch b/queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
new file mode 100644 (file)
index 0000000..9d7a709
--- /dev/null
@@ -0,0 +1,41 @@
+From cd493a6deb8b78eca280d05f7fa73fd69403ae29 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:15 -0700
+Subject: x86/entry/32: Fix cpu_current_top_of_stack initialization at boot
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit cd493a6deb8b78eca280d05f7fa73fd69403ae29 upstream.
+
+cpu_current_top_of_stack's initialization forgot about
+TOP_OF_KERNEL_STACK_PADDING.  This bug didn't matter because the
+idle threads never enter user mode.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/e5e370a7e6e4fddd1c4e4cf619765d96bb874b21.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/smpboot.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, str
+ #ifdef CONFIG_X86_32
+       /* Stack for startup_32 can be just as for start_secondary onwards */
+       irq_ctx_init(cpu);
+-      per_cpu(cpu_current_top_of_stack, cpu) =
+-              (unsigned long)task_stack_page(idle) + THREAD_SIZE;
++      per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
+ #else
+       initial_gs = per_cpu_offset(cpu);
+ #endif
diff --git a/queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch b/queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch
new file mode 100644 (file)
index 0000000..1e6a7be
--- /dev/null
@@ -0,0 +1,127 @@
+From bd7dc5a6afac719d8ce4092391eef2c7e83c2a75 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:09 -0700
+Subject: x86/entry/32: Pull the MSR_IA32_SYSENTER_CS update code out of native_load_sp0()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit bd7dc5a6afac719d8ce4092391eef2c7e83c2a75 upstream.
+
+This causes the MSR_IA32_SYSENTER_CS write to move out of the
+paravirt callback.  This shouldn't affect Xen PV: Xen already ignores
+MSR_IA32_SYSENTER_ESP writes.  In any event, Xen doesn't support
+vm86() in a useful way.
+
+Note to any potential backporters: This patch won't break lguest, as
+lguest didn't have any SYSENTER support at all.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/75cf09fe03ae778532d0ca6c65aa58e66bc2f90c.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/processor.h |    7 -------
+ arch/x86/include/asm/switch_to.h |   12 ++++++++++++
+ arch/x86/kernel/process_32.c     |    4 +++-
+ arch/x86/kernel/process_64.c     |    2 +-
+ arch/x86/kernel/vm86_32.c        |    6 +++++-
+ 5 files changed, 21 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -521,13 +521,6 @@ static inline void
+ native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
+ {
+       tss->x86_tss.sp0 = thread->sp0;
+-#ifdef CONFIG_X86_32
+-      /* Only happens when SEP is enabled, no need to test "SEP"arately: */
+-      if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
+-              tss->x86_tss.ss1 = thread->sysenter_cs;
+-              wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+-      }
+-#endif
+ }
+ static inline void native_swapgs(void)
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -73,4 +73,16 @@ do {                                                                        \
+       ((last) = __switch_to_asm((prev), (next)));                     \
+ } while (0)
++#ifdef CONFIG_X86_32
++static inline void refresh_sysenter_cs(struct thread_struct *thread)
++{
++      /* Only happens when SEP is enabled, no need to test "SEP"arately: */
++      if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
++              return;
++
++      this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
++      wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
++}
++#endif
++
+ #endif /* _ASM_X86_SWITCH_TO_H */
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p,
+       /*
+        * Reload esp0 and cpu_current_top_of_stack.  This changes
+-       * current_thread_info().
++       * current_thread_info().  Refresh the SYSENTER configuration in
++       * case prev or next is vm86.
+        */
+       load_sp0(tss, next);
++      refresh_sysenter_cs(next);
+       this_cpu_write(cpu_current_top_of_stack,
+                      (unsigned long)task_stack_page(next_p) +
+                      THREAD_SIZE);
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -464,7 +464,7 @@ __switch_to(struct task_struct *prev_p,
+        */
+       this_cpu_write(current_task, next_p);
+-      /* Reload esp0 and ss1.  This changes current_thread_info(). */
++      /* Reload sp0. */
+       load_sp0(tss, next);
+       /*
+--- a/arch/x86/kernel/vm86_32.c
++++ b/arch/x86/kernel/vm86_32.c
+@@ -55,6 +55,7 @@
+ #include <asm/irq.h>
+ #include <asm/traps.h>
+ #include <asm/vm86.h>
++#include <asm/switch_to.h>
+ /*
+  * Known problems:
+@@ -150,6 +151,7 @@ void save_v86_state(struct kernel_vm86_r
+       tsk->thread.sp0 = vm86->saved_sp0;
+       tsk->thread.sysenter_cs = __KERNEL_CS;
+       load_sp0(tss, &tsk->thread);
++      refresh_sysenter_cs(&tsk->thread);
+       vm86->saved_sp0 = 0;
+       put_cpu();
+@@ -369,8 +371,10 @@ static long do_sys_vm86(struct vm86plus_
+       /* make room for real-mode segments */
+       tsk->thread.sp0 += 16;
+-      if (static_cpu_has(X86_FEATURE_SEP))
++      if (static_cpu_has(X86_FEATURE_SEP)) {
+               tsk->thread.sysenter_cs = 0;
++              refresh_sysenter_cs(&tsk->thread);
++      }
+       load_sp0(tss, &tsk->thread);
+       put_cpu();
diff --git a/queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch b/queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
new file mode 100644 (file)
index 0000000..60c9cb5
--- /dev/null
@@ -0,0 +1,161 @@
+From 1a79797b58cddfa948420a7553241c79c013e3ca Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:12 +0100
+Subject: x86/entry/64: Allocate and enable the SYSENTER stack
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 1a79797b58cddfa948420a7553241c79c013e3ca upstream.
+
+This will simplify future changes that want scratch variables early in
+the SYSENTER handler -- they'll be able to spill registers to the
+stack.  It also lets us get rid of a SWAPGS_UNSAFE_STACK user.
+
+This does not depend on CONFIG_IA32_EMULATION=y because we'll want the
+stack space even without IA32 emulation.
+
+As far as I can tell, the reason that this wasn't done from day 1 is
+that we use IST for #DB and #BP, which is IMO rather nasty and causes
+a lot more problems than it solves.  But, since #DB uses IST, we don't
+actually need a real stack for SYSENTER (because SYSENTER with TF set
+will invoke #DB on the IST stack rather than the SYSENTER stack).
+
+I want to remove IST usage from these vectors some day, and this patch
+is a prerequisite for that as well.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.312726423@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64_compat.S |    2 +-
+ arch/x86/include/asm/processor.h |    3 ---
+ arch/x86/kernel/asm-offsets.c    |    5 +++++
+ arch/x86/kernel/asm-offsets_32.c |    5 -----
+ arch/x86/kernel/cpu/common.c     |    4 +++-
+ arch/x86/kernel/process.c        |    2 --
+ arch/x86/kernel/traps.c          |    3 +--
+ 7 files changed, 10 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -48,7 +48,7 @@
+  */
+ ENTRY(entry_SYSENTER_compat)
+       /* Interrupts are off on entry. */
+-      SWAPGS_UNSAFE_STACK
++      SWAPGS
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       /*
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -339,14 +339,11 @@ struct tss_struct {
+        */
+       unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
+-#ifdef CONFIG_X86_32
+       /*
+        * Space for the temporary SYSENTER stack.
+        */
+       unsigned long           SYSENTER_stack_canary;
+       unsigned long           SYSENTER_stack[64];
+-#endif
+-
+ } ____cacheline_aligned;
+ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -93,4 +93,9 @@ void common(void) {
+       BLANK();
+       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
++
++      /* Offset from cpu_tss to SYSENTER_stack */
++      OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
++      /* Size of SYSENTER_stack */
++      DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+ }
+--- a/arch/x86/kernel/asm-offsets_32.c
++++ b/arch/x86/kernel/asm-offsets_32.c
+@@ -50,11 +50,6 @@ void foo(void)
+       DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
+              offsetofend(struct tss_struct, SYSENTER_stack));
+-      /* Offset from cpu_tss to SYSENTER_stack */
+-      OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+-      /* Size of SYSENTER_stack */
+-      DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+-
+ #ifdef CONFIG_CC_STACKPROTECTOR
+       BLANK();
+       OFFSET(stack_canary_offset, stack_canary, canary);
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1361,7 +1361,9 @@ void syscall_init(void)
+        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+        */
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+-      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
++      wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
++                  (unsigned long)this_cpu_ptr(&cpu_tss) +
++                  offsetofend(struct tss_struct, SYSENTER_stack));
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ #else
+       wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -71,9 +71,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(
+         */
+       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
+ #endif
+-#ifdef CONFIG_X86_32
+       .SYSENTER_stack_canary  = STACK_END_MAGIC,
+-#endif
+ };
+ EXPORT_PER_CPU_SYMBOL(cpu_tss);
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -794,14 +794,13 @@ dotraplinkage void do_debug(struct pt_re
+       debug_stack_usage_dec();
+ exit:
+-#if defined(CONFIG_X86_32)
+       /*
+        * This is the most likely code path that involves non-trivial use
+        * of the SYSENTER stack.  Check that we haven't overrun it.
+        */
+       WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
+            "Overran or corrupted SYSENTER stack\n");
+-#endif
++
+       ist_exit(regs);
+ }
+ NOKPROBE_SYMBOL(do_debug);
diff --git a/queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch b/queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
new file mode 100644 (file)
index 0000000..a19a3a7
--- /dev/null
@@ -0,0 +1,224 @@
+From 3386bc8aed825e9f1f65ce38df4b109b2019b71a Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:25 +0100
+Subject: x86/entry/64: Create a per-CPU SYSCALL entry trampoline
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 3386bc8aed825e9f1f65ce38df4b109b2019b71a upstream.
+
+Handling SYSCALL is tricky: the SYSCALL handler is entered with every
+single register (except FLAGS), including RSP, live.  It somehow needs
+to set RSP to point to a valid stack, which means it needs to save the
+user RSP somewhere and find its own stack pointer.  The canonical way
+to do this is with SWAPGS, which lets us access percpu data using the
+%gs prefix.
+
+With PAGE_TABLE_ISOLATION-like pagetable switching, this is
+problematic.  Without a scratch register, switching CR3 is impossible, so
+%gs-based percpu memory would need to be mapped in the user pagetables.
+Doing that without information leaks is difficult or impossible.
+
+Instead, use a different sneaky trick.  Map a copy of the first part
+of the SYSCALL asm at a different address for each CPU.  Now RIP
+varies depending on the CPU, so we can use RIP-relative memory access
+to access percpu memory.  By putting the relevant information (one
+scratch slot and the stack address) at a constant offset relative to
+RIP, we can make SYSCALL work without relying on %gs.
+
+A nice thing about this approach is that we can easily switch it on
+and off if we want pagetable switching to be configurable.
+
+The compat variant of SYSCALL doesn't have this problem in the first
+place -- there are plenty of scratch registers, since we don't care
+about preserving r8-r15.  This patch therefore doesn't touch SYSCALL32
+at all.
+
+This patch actually seems to be a small speedup.  With this patch,
+SYSCALL touches an extra cache line and an extra virtual page, but
+the pipeline no longer stalls waiting for SWAPGS.  It seems that, at
+least in a tight loop, the latter outweights the former.
+
+Thanks to David Laight for an optimization tip.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bpetkov@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.403607157@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S     |   58 ++++++++++++++++++++++++++++++++++++++++++
+ arch/x86/include/asm/fixmap.h |    2 +
+ arch/x86/kernel/asm-offsets.c |    1 
+ arch/x86/kernel/cpu/common.c  |   15 ++++++++++
+ arch/x86/kernel/vmlinux.lds.S |    9 ++++++
+ 5 files changed, 84 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -136,6 +136,64 @@ END(native_usergs_sysret64)
+  * with them due to bugs in both AMD and Intel CPUs.
+  */
++      .pushsection .entry_trampoline, "ax"
++
++/*
++ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
++ * that the assembler and linker have the wrong idea as to where this code
++ * lives (and, in fact, it's mapped more than once, so it's not even at a
++ * fixed address).  So we can't reference any symbols outside the entry
++ * trampoline and expect it to work.
++ *
++ * Instead, we carefully abuse %rip-relative addressing.
++ * _entry_trampoline(%rip) refers to the start of the remapped) entry
++ * trampoline.  We can thus find cpu_entry_area with this macro:
++ */
++
++#define CPU_ENTRY_AREA \
++      _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
++
++/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
++#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \
++                      SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
++
++ENTRY(entry_SYSCALL_64_trampoline)
++      UNWIND_HINT_EMPTY
++      swapgs
++
++      /* Stash the user RSP. */
++      movq    %rsp, RSP_SCRATCH
++
++      /* Load the top of the task stack into RSP */
++      movq    CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
++
++      /* Start building the simulated IRET frame. */
++      pushq   $__USER_DS                      /* pt_regs->ss */
++      pushq   RSP_SCRATCH                     /* pt_regs->sp */
++      pushq   %r11                            /* pt_regs->flags */
++      pushq   $__USER_CS                      /* pt_regs->cs */
++      pushq   %rcx                            /* pt_regs->ip */
++
++      /*
++       * x86 lacks a near absolute jump, and we can't jump to the real
++       * entry text with a relative jump.  We could push the target
++       * address and then use retq, but this destroys the pipeline on
++       * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
++       * spill RDI and restore it in a second-stage trampoline.
++       */
++      pushq   %rdi
++      movq    $entry_SYSCALL_64_stage2, %rdi
++      jmp     *%rdi
++END(entry_SYSCALL_64_trampoline)
++
++      .popsection
++
++ENTRY(entry_SYSCALL_64_stage2)
++      UNWIND_HINT_EMPTY
++      popq    %rdi
++      jmp     entry_SYSCALL_64_after_hwframe
++END(entry_SYSCALL_64_stage2)
++
+ ENTRY(entry_SYSCALL_64)
+       UNWIND_HINT_EMPTY
+       /*
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -61,6 +61,8 @@ struct cpu_entry_area {
+        * of the TSS region.
+        */
+       struct tss_struct tss;
++
++      char entry_trampoline[PAGE_SIZE];
+ };
+ #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -101,4 +101,5 @@ void common(void) {
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
++      OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+ }
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -486,6 +486,8 @@ DEFINE_PER_CPU(struct cpu_entry_area *,
+ static inline void setup_cpu_entry_area(int cpu)
+ {
+ #ifdef CONFIG_X86_64
++      extern char _entry_trampoline[];
++
+       /* On 64-bit systems, we use a read-only fixmap GDT. */
+       pgprot_t gdt_prot = PAGE_KERNEL_RO;
+ #else
+@@ -532,6 +534,11 @@ static inline void setup_cpu_entry_area(
+ #ifdef CONFIG_X86_32
+       this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
+ #endif
++
++#ifdef CONFIG_X86_64
++      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
++                   __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
++#endif
+ }
+ /* Load the original GDT from the per-cpu structure */
+@@ -1395,10 +1402,16 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char,
+ /* May not be marked __init: used by software suspend */
+ void syscall_init(void)
+ {
++      extern char _entry_trampoline[];
++      extern char entry_SYSCALL_64_trampoline[];
++
+       int cpu = smp_processor_id();
++      unsigned long SYSCALL64_entry_trampoline =
++              (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
++              (entry_SYSCALL_64_trampoline - _entry_trampoline);
+       wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
+-      wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
++      wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
+ #ifdef CONFIG_IA32_EMULATION
+       wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -107,6 +107,15 @@ SECTIONS
+               SOFTIRQENTRY_TEXT
+               *(.fixup)
+               *(.gnu.warning)
++
++#ifdef CONFIG_X86_64
++              . = ALIGN(PAGE_SIZE);
++              _entry_trampoline = .;
++              *(.entry_trampoline)
++              . = ALIGN(PAGE_SIZE);
++              ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
++#endif
++
+               /* End of text section */
+               _etext = .;
+       } :text = 0x9090
diff --git a/queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch b/queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch
new file mode 100644 (file)
index 0000000..d156862
--- /dev/null
@@ -0,0 +1,108 @@
+From 929bacec21478a72c78e4f29f98fb799bd00105a Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:08 -0700
+Subject: x86/entry/64: De-Xen-ify our NMI code
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 929bacec21478a72c78e4f29f98fb799bd00105a upstream.
+
+Xen PV is fundamentally incompatible with our fancy NMI code: it
+doesn't use IST at all, and Xen entries clobber two stack slots
+below the hardware frame.
+
+Drop Xen PV support from our NMI code entirely.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Juergen Gross <jgross@suse.com>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/bfbe711b5ae03f672f8848999a8eb2711efc7f98.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S |   30 ++++++++++++++++++------------
+ 1 file changed, 18 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1241,9 +1241,13 @@ ENTRY(error_exit)
+       jmp     retint_user
+ END(error_exit)
+-/* Runs on exception stack */
++/*
++ * Runs on exception stack.  Xen PV does not go through this path at all,
++ * so we can use real assembly here.
++ */
+ ENTRY(nmi)
+       UNWIND_HINT_IRET_REGS
++
+       /*
+        * We allow breakpoints in NMIs. If a breakpoint occurs, then
+        * the iretq it performs will take us out of NMI context.
+@@ -1301,7 +1305,7 @@ ENTRY(nmi)
+        * stacks lest we corrupt the "NMI executing" variable.
+        */
+-      SWAPGS_UNSAFE_STACK
++      swapgs
+       cld
+       movq    %rsp, %rdx
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+@@ -1466,7 +1470,7 @@ nested_nmi_out:
+       popq    %rdx
+       /* We are returning to kernel mode, so this cannot result in a fault. */
+-      INTERRUPT_RETURN
++      iretq
+ first_nmi:
+       /* Restore rdx. */
+@@ -1497,7 +1501,7 @@ first_nmi:
+       pushfq                  /* RFLAGS */
+       pushq   $__KERNEL_CS    /* CS */
+       pushq   $1f             /* RIP */
+-      INTERRUPT_RETURN        /* continues at repeat_nmi below */
++      iretq                   /* continues at repeat_nmi below */
+       UNWIND_HINT_IRET_REGS
+ 1:
+ #endif
+@@ -1572,20 +1576,22 @@ nmi_restore:
+       /*
+        * Clear "NMI executing".  Set DF first so that we can easily
+        * distinguish the remaining code between here and IRET from
+-       * the SYSCALL entry and exit paths.  On a native kernel, we
+-       * could just inspect RIP, but, on paravirt kernels,
+-       * INTERRUPT_RETURN can translate into a jump into a
+-       * hypercall page.
++       * the SYSCALL entry and exit paths.
++       *
++       * We arguably should just inspect RIP instead, but I (Andy) wrote
++       * this code when I had the misapprehension that Xen PV supported
++       * NMIs, and Xen PV would break that approach.
+        */
+       std
+       movq    $0, 5*8(%rsp)           /* clear "NMI executing" */
+       /*
+-       * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
+-       * stack in a single instruction.  We are returning to kernel
+-       * mode, so this cannot result in a fault.
++       * iretq reads the "iret" frame and exits the NMI stack in a
++       * single instruction.  We are returning to kernel mode, so this
++       * cannot result in a fault.  Similarly, we don't need to worry
++       * about espfix64 on the way back to kernel mode.
+        */
+-      INTERRUPT_RETURN
++      iretq
+ END(nmi)
+ ENTRY(ignore_sysret)
diff --git a/queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch b/queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch
new file mode 100644 (file)
index 0000000..6248d76
--- /dev/null
@@ -0,0 +1,453 @@
+From c482feefe1aeb150156248ba0fd3e029bc886605 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:29 +0100
+Subject: x86/entry/64: Make cpu_entry_area.tss read-only
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit c482feefe1aeb150156248ba0fd3e029bc886605 upstream.
+
+The TSS is a fairly juicy target for exploits, and, now that the TSS
+is in the cpu_entry_area, it's no longer protected by kASLR.  Make it
+read-only on x86_64.
+
+On x86_32, it can't be RO because it's written by the CPU during task
+switches, and we use a task gate for double faults.  I'd also be
+nervous about errata if we tried to make it RO even on configurations
+without double fault handling.
+
+[ tglx: AMD confirmed that there is no problem on 64-bit with TSS RO.  So
+       it's probably safe to assume that it's a non issue, though Intel
+       might have been creative in that area. Still waiting for
+       confirmation. ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bpetkov@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.733700132@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_32.S          |    4 ++--
+ arch/x86/entry/entry_64.S          |    8 ++++----
+ arch/x86/include/asm/fixmap.h      |   13 +++++++++----
+ arch/x86/include/asm/processor.h   |   17 ++++++++---------
+ arch/x86/include/asm/switch_to.h   |    4 ++--
+ arch/x86/include/asm/thread_info.h |    2 +-
+ arch/x86/kernel/asm-offsets.c      |    5 ++---
+ arch/x86/kernel/asm-offsets_32.c   |    4 ++--
+ arch/x86/kernel/cpu/common.c       |   29 +++++++++++++++++++----------
+ arch/x86/kernel/ioport.c           |    2 +-
+ arch/x86/kernel/process.c          |    6 +++---
+ arch/x86/kernel/process_32.c       |    2 +-
+ arch/x86/kernel/process_64.c       |    2 +-
+ arch/x86/kernel/traps.c            |    4 ++--
+ arch/x86/lib/delay.c               |    4 ++--
+ arch/x86/xen/enlighten_pv.c        |    2 +-
+ 16 files changed, 60 insertions(+), 48 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -942,7 +942,7 @@ ENTRY(debug)
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Ldebug_from_sysenter_stack
+@@ -986,7 +986,7 @@ ENTRY(nmi)
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Lnmi_from_sysenter_stack
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -154,7 +154,7 @@ END(native_usergs_sysret64)
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+ /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+-#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + \
++#define RSP_SCRATCH   CPU_ENTRY_AREA_SYSENTER_stack + \
+                       SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
+ ENTRY(entry_SYSCALL_64_trampoline)
+@@ -390,7 +390,7 @@ syscall_return_via_sysret:
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+-      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
++      movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+       pushq   RSP-RDI(%rdi)   /* RSP */
+       pushq   (%rdi)          /* RDI */
+@@ -719,7 +719,7 @@ GLOBAL(swapgs_restore_regs_and_return_to
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+-      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
++      movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+       /* Copy the IRET frame to the trampoline stack. */
+       pushq   6*8(%rdi)       /* SS */
+@@ -934,7 +934,7 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work
+ /*
+  * Exception entry points.
+  */
+-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
++#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+ /*
+  * Switch to the thread stack.  This is called with the IRET frame and
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -56,9 +56,14 @@ struct cpu_entry_area {
+       char gdt[PAGE_SIZE];
+       /*
+-       * The GDT is just below cpu_tss and thus serves (on x86_64) as a
+-       * a read-only guard page for the SYSENTER stack at the bottom
+-       * of the TSS region.
++       * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
++       * a a read-only guard page.
++       */
++      struct SYSENTER_stack_page SYSENTER_stack_page;
++
++      /*
++       * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
++       * we need task switches to work, and task switches write to the TSS.
+        */
+       struct tss_struct tss;
+@@ -247,7 +252,7 @@ static inline struct cpu_entry_area *get
+ static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
+ {
+-      return &get_cpu_entry_area(cpu)->tss.SYSENTER_stack;
++      return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
+ }
+ #endif /* !__ASSEMBLY__ */
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -340,13 +340,11 @@ struct SYSENTER_stack {
+       unsigned long           words[64];
+ };
+-struct tss_struct {
+-      /*
+-       * Space for the temporary SYSENTER stack, used for SYSENTER
+-       * and the entry trampoline as well.
+-       */
+-      struct SYSENTER_stack   SYSENTER_stack;
++struct SYSENTER_stack_page {
++      struct SYSENTER_stack stack;
++} __aligned(PAGE_SIZE);
++struct tss_struct {
+       /*
+        * The fixed hardware portion.  This must not cross a page boundary
+        * at risk of violating the SDM's advice and potentially triggering
+@@ -363,7 +361,7 @@ struct tss_struct {
+       unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
+ } __aligned(PAGE_SIZE);
+-DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
++DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
+ /*
+  * sizeof(unsigned long) coming from an extra "long" at the end
+@@ -378,7 +376,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_
+ #ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+ #else
+-#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1
++/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
++#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
+ #endif
+ /*
+@@ -538,7 +537,7 @@ static inline void native_set_iopl_mask(
+ static inline void
+ native_load_sp0(unsigned long sp0)
+ {
+-      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
++      this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
+ }
+ static inline void native_swapgs(void)
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -79,10 +79,10 @@ do {                                                                       \
+ static inline void refresh_sysenter_cs(struct thread_struct *thread)
+ {
+       /* Only happens when SEP is enabled, no need to test "SEP"arately: */
+-      if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
++      if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
+               return;
+-      this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
++      this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
+       wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+ }
+ #endif
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -207,7 +207,7 @@ static inline int arch_within_stack_fram
+ #else /* !__ASSEMBLY__ */
+ #ifdef CONFIG_X86_64
+-# define cpu_current_top_of_stack (cpu_tss + TSS_sp1)
++# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
+ #endif
+ #endif
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -94,10 +94,9 @@ void common(void) {
+       BLANK();
+       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+-      OFFSET(TSS_STRUCT_SYSENTER_stack, tss_struct, SYSENTER_stack);
+-      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
+-
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
++      OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
++      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
+ }
+--- a/arch/x86/kernel/asm-offsets_32.c
++++ b/arch/x86/kernel/asm-offsets_32.c
+@@ -47,8 +47,8 @@ void foo(void)
+       BLANK();
+       /* Offset from the sysenter stack to tss.sp0 */
+-      DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
+-             offsetofend(struct tss_struct, SYSENTER_stack));
++      DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
++             offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
+ #ifdef CONFIG_CC_STACKPROTECTOR
+       BLANK();
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -487,6 +487,9 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char,
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ #endif
++static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
++                                 SYSENTER_stack_storage);
++
+ static void __init
+ set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
+ {
+@@ -500,23 +503,29 @@ static void __init setup_cpu_entry_area(
+ #ifdef CONFIG_X86_64
+       extern char _entry_trampoline[];
+-      /* On 64-bit systems, we use a read-only fixmap GDT. */
++      /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+       pgprot_t gdt_prot = PAGE_KERNEL_RO;
++      pgprot_t tss_prot = PAGE_KERNEL_RO;
+ #else
+       /*
+        * On native 32-bit systems, the GDT cannot be read-only because
+        * our double fault handler uses a task gate, and entering through
+-       * a task gate needs to change an available TSS to busy.  If the GDT
+-       * is read-only, that will triple fault.
++       * a task gate needs to change an available TSS to busy.  If the
++       * GDT is read-only, that will triple fault.  The TSS cannot be
++       * read-only because the CPU writes to it on task switches.
+        *
+-       * On Xen PV, the GDT must be read-only because the hypervisor requires
+-       * it.
++       * On Xen PV, the GDT must be read-only because the hypervisor
++       * requires it.
+        */
+       pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+               PAGE_KERNEL_RO : PAGE_KERNEL;
++      pgprot_t tss_prot = PAGE_KERNEL;
+ #endif
+       __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
++                              per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
++                              PAGE_KERNEL);
+       /*
+        * The Intel SDM says (Volume 3, 7.2.1):
+@@ -539,9 +548,9 @@ static void __init setup_cpu_entry_area(
+                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+       set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
+-                              &per_cpu(cpu_tss, cpu),
++                              &per_cpu(cpu_tss_rw, cpu),
+                               sizeof(struct tss_struct) / PAGE_SIZE,
+-                              PAGE_KERNEL);
++                              tss_prot);
+ #ifdef CONFIG_X86_32
+       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+@@ -1305,7 +1314,7 @@ void enable_sep_cpu(void)
+               return;
+       cpu = get_cpu();
+-      tss = &per_cpu(cpu_tss, cpu);
++      tss = &per_cpu(cpu_tss_rw, cpu);
+       /*
+        * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
+@@ -1575,7 +1584,7 @@ void cpu_init(void)
+       if (cpu)
+               load_ucode_ap();
+-      t = &per_cpu(cpu_tss, cpu);
++      t = &per_cpu(cpu_tss_rw, cpu);
+       oist = &per_cpu(orig_ist, cpu);
+ #ifdef CONFIG_NUMA
+@@ -1667,7 +1676,7 @@ void cpu_init(void)
+ {
+       int cpu = smp_processor_id();
+       struct task_struct *curr = current;
+-      struct tss_struct *t = &per_cpu(cpu_tss, cpu);
++      struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
+       wait_for_master_cpu(cpu);
+--- a/arch/x86/kernel/ioport.c
++++ b/arch/x86/kernel/ioport.c
+@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long
+        * because the ->io_bitmap_max value must match the bitmap
+        * contents:
+        */
+-      tss = &per_cpu(cpu_tss, get_cpu());
++      tss = &per_cpu(cpu_tss_rw, get_cpu());
+       if (turn_on)
+               bitmap_clear(t->io_bitmap_ptr, from, num);
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -47,7 +47,7 @@
+  * section. Since TSS's are completely CPU-local, we want them
+  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+  */
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
++__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
+       .x86_tss = {
+               /*
+                * .sp0 is only used when entering ring 0 from a lower
+@@ -82,7 +82,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(
+       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
+ #endif
+ };
+-EXPORT_PER_CPU_SYMBOL(cpu_tss);
++EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
+ DEFINE_PER_CPU(bool, __tss_limit_invalid);
+ EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
+@@ -111,7 +111,7 @@ void exit_thread(struct task_struct *tsk
+       struct fpu *fpu = &t->fpu;
+       if (bp) {
+-              struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
++              struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
+               t->io_bitmap_ptr = NULL;
+               clear_thread_flag(TIF_IO_BITMAP);
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p,
+       struct fpu *prev_fpu = &prev->fpu;
+       struct fpu *next_fpu = &next->fpu;
+       int cpu = smp_processor_id();
+-      struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
++      struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
+       /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -399,7 +399,7 @@ __switch_to(struct task_struct *prev_p,
+       struct fpu *prev_fpu = &prev->fpu;
+       struct fpu *next_fpu = &next->fpu;
+       int cpu = smp_processor_id();
+-      struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
++      struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
+                    this_cpu_read(irq_count) != -1);
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -364,7 +364,7 @@ dotraplinkage void do_double_fault(struc
+               regs->cs == __KERNEL_CS &&
+               regs->ip == (unsigned long)native_irq_return_iret)
+       {
+-              struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
++              struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
+               /*
+                * regs->sp points to the failing IRET frame on the
+@@ -649,7 +649,7 @@ struct bad_iret_stack *fixup_bad_iret(st
+        * exception came from the IRET target.
+        */
+       struct bad_iret_stack *new_stack =
+-              (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
++              (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
+       /* Copy the IRET target to the new stack. */
+       memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
+--- a/arch/x86/lib/delay.c
++++ b/arch/x86/lib/delay.c
+@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long _
+               delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
+               /*
+-               * Use cpu_tss as a cacheline-aligned, seldomly
++               * Use cpu_tss_rw as a cacheline-aligned, seldomly
+                * accessed per-cpu variable as the monitor target.
+                */
+-              __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
++              __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
+               /*
+                * AMD, like Intel, supports the EAX hint and EAX=0xf
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -818,7 +818,7 @@ static void xen_load_sp0(unsigned long s
+       mcs = xen_mc_entry(0);
+       MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
+       xen_mc_issue(PARAVIRT_LAZY_CPU);
+-      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
++      this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
+ }
+ void xen_set_iopl_mask(unsigned mask)
diff --git a/queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch b/queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
new file mode 100644 (file)
index 0000000..a55755c
--- /dev/null
@@ -0,0 +1,51 @@
+From a512210643da8082cb44181dba8b18e752bd68f0 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:04 -0700
+Subject: x86/entry/64: Merge the fast and slow SYSRET paths
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit a512210643da8082cb44181dba8b18e752bd68f0 upstream.
+
+They did almost the same thing.  Remove a bunch of pointless
+instructions (mostly hidden in macros) and reduce cognitive load by
+merging them.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1204e20233fcab9130a1ba80b3b1879b5db3fc1f.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -221,10 +221,9 @@ entry_SYSCALL_64_fastpath:
+       TRACE_IRQS_ON           /* user mode is traced as IRQs on */
+       movq    RIP(%rsp), %rcx
+       movq    EFLAGS(%rsp), %r11
+-      RESTORE_C_REGS_EXCEPT_RCX_R11
+-      movq    RSP(%rsp), %rsp
++      addq    $6*8, %rsp      /* skip extra regs -- they were preserved */
+       UNWIND_HINT_EMPTY
+-      USERGS_SYSRET64
++      jmp     .Lpop_c_regs_except_rcx_r11_and_sysret
+ 1:
+       /*
+@@ -318,6 +317,7 @@ syscall_return_via_sysret:
+       /* rcx and r11 are already restored (see code above) */
+       UNWIND_HINT_EMPTY
+       POP_EXTRA_REGS
++.Lpop_c_regs_except_rcx_r11_and_sysret:
+       popq    %rsi    /* skip r11 */
+       popq    %r10
+       popq    %r9
diff --git a/queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch b/queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
new file mode 100644 (file)
index 0000000..8e5d437
--- /dev/null
@@ -0,0 +1,144 @@
+From 8a055d7f411d41755ce30db5bb65b154777c4b78 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:00 -0700
+Subject: x86/entry/64: Move SWAPGS into the common IRET-to-usermode path
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 8a055d7f411d41755ce30db5bb65b154777c4b78 upstream.
+
+All of the code paths that ended up doing IRET to usermode did
+SWAPGS immediately beforehand.  Move the SWAPGS into the common
+code.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/27fd6f45b7cd640de38fb9066fd0349bcd11f8e1.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S        |   32 ++++++++++++++------------------
+ arch/x86/entry/entry_64_compat.S |    3 +--
+ 2 files changed, 15 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -250,12 +250,14 @@ return_from_SYSCALL_64:
+       /*
+        * Try to use SYSRET instead of IRET if we're returning to
+-       * a completely clean 64-bit userspace context.
++       * a completely clean 64-bit userspace context.  If we're not,
++       * go to the slow exit path.
+        */
+       movq    RCX(%rsp), %rcx
+       movq    RIP(%rsp), %r11
+-      cmpq    %rcx, %r11                      /* RCX == RIP */
+-      jne     opportunistic_sysret_failed
++
++      cmpq    %rcx, %r11      /* SYSRET requires RCX == RIP */
++      jne     swapgs_restore_regs_and_return_to_usermode
+       /*
+        * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+@@ -273,14 +275,14 @@ return_from_SYSCALL_64:
+       /* If this changed %rcx, it was not canonical */
+       cmpq    %rcx, %r11
+-      jne     opportunistic_sysret_failed
++      jne     swapgs_restore_regs_and_return_to_usermode
+       cmpq    $__USER_CS, CS(%rsp)            /* CS must match SYSRET */
+-      jne     opportunistic_sysret_failed
++      jne     swapgs_restore_regs_and_return_to_usermode
+       movq    R11(%rsp), %r11
+       cmpq    %r11, EFLAGS(%rsp)              /* R11 == RFLAGS */
+-      jne     opportunistic_sysret_failed
++      jne     swapgs_restore_regs_and_return_to_usermode
+       /*
+        * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
+@@ -301,12 +303,12 @@ return_from_SYSCALL_64:
+        * would never get past 'stuck_here'.
+        */
+       testq   $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
+-      jnz     opportunistic_sysret_failed
++      jnz     swapgs_restore_regs_and_return_to_usermode
+       /* nothing to check for RSP */
+       cmpq    $__USER_DS, SS(%rsp)            /* SS must match SYSRET */
+-      jne     opportunistic_sysret_failed
++      jne     swapgs_restore_regs_and_return_to_usermode
+       /*
+        * We win! This label is here just for ease of understanding
+@@ -319,10 +321,6 @@ syscall_return_via_sysret:
+       movq    RSP(%rsp), %rsp
+       UNWIND_HINT_EMPTY
+       USERGS_SYSRET64
+-
+-opportunistic_sysret_failed:
+-      SWAPGS
+-      jmp     restore_regs_and_return_to_usermode
+ END(entry_SYSCALL_64)
+ ENTRY(stub_ptregs_64)
+@@ -423,8 +421,7 @@ ENTRY(ret_from_fork)
+       movq    %rsp, %rdi
+       call    syscall_return_slowpath /* returns with IRQs disabled */
+       TRACE_IRQS_ON                   /* user mode is traced as IRQS on */
+-      SWAPGS
+-      jmp     restore_regs_and_return_to_usermode
++      jmp     swapgs_restore_regs_and_return_to_usermode
+ 1:
+       /* kernel thread */
+@@ -612,9 +609,8 @@ GLOBAL(retint_user)
+       mov     %rsp,%rdi
+       call    prepare_exit_to_usermode
+       TRACE_IRQS_IRETQ
+-      SWAPGS
+-GLOBAL(restore_regs_and_return_to_usermode)
++GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+ #ifdef CONFIG_DEBUG_ENTRY
+       /* Assert that pt_regs indicates user mode. */
+       testl   $3, CS(%rsp)
+@@ -622,6 +618,7 @@ GLOBAL(restore_regs_and_return_to_usermo
+       ud2
+ 1:
+ #endif
++      SWAPGS
+       RESTORE_EXTRA_REGS
+       RESTORE_C_REGS
+       REMOVE_PT_GPREGS_FROM_STACK 8
+@@ -1343,8 +1340,7 @@ ENTRY(nmi)
+        * Return back to user mode.  We must *not* do the normal exit
+        * work, because we don't want to enable interrupts.
+        */
+-      SWAPGS
+-      jmp     restore_regs_and_return_to_usermode
++      jmp     swapgs_restore_regs_and_return_to_usermode
+ .Lnmi_from_kernel:
+       /*
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -337,8 +337,7 @@ ENTRY(entry_INT80_compat)
+       /* Go back to user mode. */
+       TRACE_IRQS_ON
+-      SWAPGS
+-      jmp     restore_regs_and_return_to_usermode
++      jmp     swapgs_restore_regs_and_return_to_usermode
+ END(entry_INT80_compat)
+ ENTRY(stub32_clone)
diff --git a/queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch b/queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch
new file mode 100644 (file)
index 0000000..5ababca
--- /dev/null
@@ -0,0 +1,221 @@
+From 40e7f949e0d9a33968ebde5d67f7e3a47c97742a Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:26 +0100
+Subject: x86/entry/64: Move the IST stacks into struct cpu_entry_area
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 40e7f949e0d9a33968ebde5d67f7e3a47c97742a upstream.
+
+The IST stacks are needed when an IST exception occurs and are accessed
+before any kernel code at all runs.  Move them into struct cpu_entry_area.
+
+The IST stacks are unlike the rest of cpu_entry_area: they're used even for
+entries from kernel mode.  This means that they should be set up before we
+load the final IDT.  Move cpu_entry_area setup to trap_init() for the boot
+CPU and set it up for all possible CPUs at once in native_smp_prepare_cpus().
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.480598743@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/fixmap.h |   12 ++++++
+ arch/x86/kernel/cpu/common.c  |   74 +++++++++++++++++++++++-------------------
+ arch/x86/kernel/traps.c       |    3 +
+ 3 files changed, 57 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -63,10 +63,22 @@ struct cpu_entry_area {
+       struct tss_struct tss;
+       char entry_trampoline[PAGE_SIZE];
++
++#ifdef CONFIG_X86_64
++      /*
++       * Exception stacks used for IST entries.
++       *
++       * In the future, this should have a separate slot for each stack
++       * with guard pages between them.
++       */
++      char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
++#endif
+ };
+ #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
++extern void setup_cpu_entry_areas(void);
++
+ /*
+  * Here we define all the compile-time 'special' virtual
+  * addresses. The point is to have a constant address at
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -466,24 +466,36 @@ void load_percpu_segment(int cpu)
+       load_stack_canary_segment();
+ }
+-static void set_percpu_fixmap_pages(int fixmap_index, void *ptr,
+-                                  int pages, pgprot_t prot)
+-{
+-      int i;
+-
+-      for (i = 0; i < pages; i++) {
+-              __set_fixmap(fixmap_index - i,
+-                           per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot);
+-      }
+-}
+-
+ #ifdef CONFIG_X86_32
+ /* The 32-bit entry code needs to find cpu_entry_area. */
+ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+ #endif
++#ifdef CONFIG_X86_64
++/*
++ * Special IST stacks which the CPU switches to when it calls
++ * an IST-marked descriptor entry. Up to 7 stacks (hardware
++ * limit), all of them are 4K, except the debug stack which
++ * is 8K.
++ */
++static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
++        [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
++        [DEBUG_STACK - 1]                     = DEBUG_STKSZ
++};
++
++static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
++#endif
++
++static void __init
++set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
++{
++      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
++              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
++}
++
+ /* Setup the fixmap mappings only once per-processor */
+-static inline void setup_cpu_entry_area(int cpu)
++static void __init setup_cpu_entry_area(int cpu)
+ {
+ #ifdef CONFIG_X86_64
+       extern char _entry_trampoline[];
+@@ -532,15 +544,31 @@ static inline void setup_cpu_entry_area(
+                               PAGE_KERNEL);
+ #ifdef CONFIG_X86_32
+-      this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
++      per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+ #endif
+ #ifdef CONFIG_X86_64
++      BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
++      BUILD_BUG_ON(sizeof(exception_stacks) !=
++                   sizeof(((struct cpu_entry_area *)0)->exception_stacks));
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
++                              &per_cpu(exception_stacks, cpu),
++                              sizeof(exception_stacks) / PAGE_SIZE,
++                              PAGE_KERNEL);
++
+       __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
+                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+ #endif
+ }
++void __init setup_cpu_entry_areas(void)
++{
++      unsigned int cpu;
++
++      for_each_possible_cpu(cpu)
++              setup_cpu_entry_area(cpu);
++}
++
+ /* Load the original GDT from the per-cpu structure */
+ void load_direct_gdt(int cpu)
+ {
+@@ -1385,20 +1413,6 @@ DEFINE_PER_CPU(unsigned int, irq_count)
+ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
+ EXPORT_PER_CPU_SYMBOL(__preempt_count);
+-/*
+- * Special IST stacks which the CPU switches to when it calls
+- * an IST-marked descriptor entry. Up to 7 stacks (hardware
+- * limit), all of them are 4K, except the debug stack which
+- * is 8K.
+- */
+-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+-        [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+-        [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+-};
+-
+-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+-      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+-
+ /* May not be marked __init: used by software suspend */
+ void syscall_init(void)
+ {
+@@ -1607,7 +1621,7 @@ void cpu_init(void)
+        * set up and load the per-CPU TSS
+        */
+       if (!oist->ist[0]) {
+-              char *estacks = per_cpu(exception_stacks, cpu);
++              char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
+               for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+                       estacks += exception_stack_sizes[v];
+@@ -1633,8 +1647,6 @@ void cpu_init(void)
+       initialize_tlbstate_and_flush();
+       enter_lazy_tlb(&init_mm, me);
+-      setup_cpu_entry_area(cpu);
+-
+       /*
+        * Initialize the TSS.  sp0 points to the entry trampoline stack
+        * regardless of what task is running.
+@@ -1694,8 +1706,6 @@ void cpu_init(void)
+       initialize_tlbstate_and_flush();
+       enter_lazy_tlb(&init_mm, curr);
+-      setup_cpu_entry_area(cpu);
+-
+       /*
+        * Initialize the TSS.  Don't bother initializing sp0, as the initial
+        * task never enters user mode.
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -947,6 +947,9 @@ dotraplinkage void do_iret_error(struct
+ void __init trap_init(void)
+ {
++      /* Init cpu_entry_area before IST entries are set up */
++      setup_cpu_entry_areas();
++
+       idt_setup_traps();
+       /*
diff --git a/queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch b/queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch
new file mode 100644 (file)
index 0000000..77d3f2d
--- /dev/null
@@ -0,0 +1,113 @@
+From e17f8234538d1ff708673f287a42457c4dee720d Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Date: Mon, 4 Dec 2017 15:07:07 +0100
+Subject: x86/entry/64/paravirt: Use paravirt-safe macro to access eflags
+
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+
+commit e17f8234538d1ff708673f287a42457c4dee720d upstream.
+
+Commit 1d3e53e8624a ("x86/entry/64: Refactor IRQ stacks and make them
+NMI-safe") added DEBUG_ENTRY_ASSERT_IRQS_OFF macro that acceses eflags
+using 'pushfq' instruction when testing for IF bit. On PV Xen guests
+looking at IF flag directly will always see it set, resulting in 'ud2'.
+
+Introduce SAVE_FLAGS() macro that will use appropriate save_fl pv op when
+running paravirt.
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: xen-devel@lists.xenproject.org
+Link: https://lkml.kernel.org/r/20171204150604.899457242@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S        |    7 ++++---
+ arch/x86/include/asm/irqflags.h  |    3 +++
+ arch/x86/include/asm/paravirt.h  |    9 +++++++++
+ arch/x86/kernel/asm-offsets_64.c |    3 +++
+ 4 files changed, 19 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -462,12 +462,13 @@ END(irq_entries_start)
+ .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
+ #ifdef CONFIG_DEBUG_ENTRY
+-      pushfq
+-      testl $X86_EFLAGS_IF, (%rsp)
++      pushq %rax
++      SAVE_FLAGS(CLBR_RAX)
++      testl $X86_EFLAGS_IF, %eax
+       jz .Lokay_\@
+       ud2
+ .Lokay_\@:
+-      addq $8, %rsp
++      popq %rax
+ #endif
+ .endm
+--- a/arch/x86/include/asm/irqflags.h
++++ b/arch/x86/include/asm/irqflags.h
+@@ -142,6 +142,9 @@ static inline notrace unsigned long arch
+       swapgs;                                 \
+       sysretl
++#ifdef CONFIG_DEBUG_ENTRY
++#define SAVE_FLAGS(x)         pushfq; popq %rax
++#endif
+ #else
+ #define INTERRUPT_RETURN              iret
+ #define ENABLE_INTERRUPTS_SYSEXIT     sti; sysexit
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -927,6 +927,15 @@ extern void default_banner(void);
+       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
+                 CLBR_NONE,                                            \
+                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
++
++#ifdef CONFIG_DEBUG_ENTRY
++#define SAVE_FLAGS(clobbers)                                        \
++      PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
++                PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
++                call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
++                PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
++#endif
++
+ #endif        /* CONFIG_X86_32 */
+ #endif /* __ASSEMBLY__ */
+--- a/arch/x86/kernel/asm-offsets_64.c
++++ b/arch/x86/kernel/asm-offsets_64.c
+@@ -23,6 +23,9 @@ int main(void)
+ #ifdef CONFIG_PARAVIRT
+       OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
+       OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
++#ifdef CONFIG_DEBUG_ENTRY
++      OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
++#endif
+       BLANK();
+ #endif
diff --git a/queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch b/queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch
new file mode 100644 (file)
index 0000000..ee3aa19
--- /dev/null
@@ -0,0 +1,215 @@
+From da51da189a24bb9b7e2d5a123be096e51a4695a5 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:10 -0700
+Subject: x86/entry/64: Pass SP0 directly to load_sp0()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit da51da189a24bb9b7e2d5a123be096e51a4695a5 upstream.
+
+load_sp0() had an odd signature:
+
+  void load_sp0(struct tss_struct *tss, struct thread_struct *thread);
+
+Simplify it to:
+
+  void load_sp0(unsigned long sp0);
+
+Also simplify a few get_cpu()/put_cpu() sequences to
+preempt_disable()/preempt_enable().
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/2655d8b42ed940aa384fe18ee1129bbbcf730a08.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/paravirt.h       |    5 ++---
+ arch/x86/include/asm/paravirt_types.h |    2 +-
+ arch/x86/include/asm/processor.h      |    9 ++++-----
+ arch/x86/kernel/cpu/common.c          |    4 ++--
+ arch/x86/kernel/process_32.c          |    2 +-
+ arch/x86/kernel/process_64.c          |    2 +-
+ arch/x86/kernel/vm86_32.c             |   14 ++++++--------
+ arch/x86/xen/enlighten_pv.c           |    7 +++----
+ 8 files changed, 20 insertions(+), 25 deletions(-)
+
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -16,10 +16,9 @@
+ #include <linux/cpumask.h>
+ #include <asm/frame.h>
+-static inline void load_sp0(struct tss_struct *tss,
+-                           struct thread_struct *thread)
++static inline void load_sp0(unsigned long sp0)
+ {
+-      PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
++      PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
+ }
+ /* The paravirtualized CPUID instruction. */
+--- a/arch/x86/include/asm/paravirt_types.h
++++ b/arch/x86/include/asm/paravirt_types.h
+@@ -134,7 +134,7 @@ struct pv_cpu_ops {
+       void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
+       void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
+-      void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
++      void (*load_sp0)(unsigned long sp0);
+       void (*set_iopl_mask)(unsigned mask);
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -518,9 +518,9 @@ static inline void native_set_iopl_mask(
+ }
+ static inline void
+-native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
++native_load_sp0(unsigned long sp0)
+ {
+-      tss->x86_tss.sp0 = thread->sp0;
++      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+ }
+ static inline void native_swapgs(void)
+@@ -545,10 +545,9 @@ static inline unsigned long current_top_
+ #else
+ #define __cpuid                       native_cpuid
+-static inline void load_sp0(struct tss_struct *tss,
+-                          struct thread_struct *thread)
++static inline void load_sp0(unsigned long sp0)
+ {
+-      native_load_sp0(tss, thread);
++      native_load_sp0(sp0);
+ }
+ #define set_iopl_mask native_set_iopl_mask
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1570,7 +1570,7 @@ void cpu_init(void)
+       initialize_tlbstate_and_flush();
+       enter_lazy_tlb(&init_mm, me);
+-      load_sp0(t, &current->thread);
++      load_sp0(current->thread.sp0);
+       set_tss_desc(cpu, t);
+       load_TR_desc();
+       load_mm_ldt(&init_mm);
+@@ -1625,7 +1625,7 @@ void cpu_init(void)
+       initialize_tlbstate_and_flush();
+       enter_lazy_tlb(&init_mm, curr);
+-      load_sp0(t, thread);
++      load_sp0(thread->sp0);
+       set_tss_desc(cpu, t);
+       load_TR_desc();
+       load_mm_ldt(&init_mm);
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p,
+        * current_thread_info().  Refresh the SYSENTER configuration in
+        * case prev or next is vm86.
+        */
+-      load_sp0(tss, next);
++      load_sp0(next->sp0);
+       refresh_sysenter_cs(next);
+       this_cpu_write(cpu_current_top_of_stack,
+                      (unsigned long)task_stack_page(next_p) +
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p,
+       this_cpu_write(current_task, next_p);
+       /* Reload sp0. */
+-      load_sp0(tss, next);
++      load_sp0(next->sp0);
+       /*
+        * Now maybe reload the debug registers and handle I/O bitmaps
+--- a/arch/x86/kernel/vm86_32.c
++++ b/arch/x86/kernel/vm86_32.c
+@@ -95,7 +95,6 @@
+ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
+ {
+-      struct tss_struct *tss;
+       struct task_struct *tsk = current;
+       struct vm86plus_struct __user *user;
+       struct vm86 *vm86 = current->thread.vm86;
+@@ -147,13 +146,13 @@ void save_v86_state(struct kernel_vm86_r
+               do_exit(SIGSEGV);
+       }
+-      tss = &per_cpu(cpu_tss, get_cpu());
++      preempt_disable();
+       tsk->thread.sp0 = vm86->saved_sp0;
+       tsk->thread.sysenter_cs = __KERNEL_CS;
+-      load_sp0(tss, &tsk->thread);
++      load_sp0(tsk->thread.sp0);
+       refresh_sysenter_cs(&tsk->thread);
+       vm86->saved_sp0 = 0;
+-      put_cpu();
++      preempt_enable();
+       memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
+@@ -239,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd
+ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
+ {
+-      struct tss_struct *tss;
+       struct task_struct *tsk = current;
+       struct vm86 *vm86 = tsk->thread.vm86;
+       struct kernel_vm86_regs vm86regs;
+@@ -367,8 +365,8 @@ static long do_sys_vm86(struct vm86plus_
+       vm86->saved_sp0 = tsk->thread.sp0;
+       lazy_save_gs(vm86->regs32.gs);
+-      tss = &per_cpu(cpu_tss, get_cpu());
+       /* make room for real-mode segments */
++      preempt_disable();
+       tsk->thread.sp0 += 16;
+       if (static_cpu_has(X86_FEATURE_SEP)) {
+@@ -376,8 +374,8 @@ static long do_sys_vm86(struct vm86plus_
+               refresh_sysenter_cs(&tsk->thread);
+       }
+-      load_sp0(tss, &tsk->thread);
+-      put_cpu();
++      load_sp0(tsk->thread.sp0);
++      preempt_enable();
+       if (vm86->flags & VM86_SCREEN_BITMAP)
+               mark_screen_rdonly(tsk->mm);
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_b
+       }
+ }
+-static void xen_load_sp0(struct tss_struct *tss,
+-                       struct thread_struct *thread)
++static void xen_load_sp0(unsigned long sp0)
+ {
+       struct multicall_space mcs;
+       mcs = xen_mc_entry(0);
+-      MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
++      MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
+       xen_mc_issue(PARAVIRT_LAZY_CPU);
+-      tss->x86_tss.sp0 = thread->sp0;
++      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+ }
+ void xen_set_iopl_mask(unsigned mask)
diff --git a/queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch b/queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch
new file mode 100644 (file)
index 0000000..9f2f659
--- /dev/null
@@ -0,0 +1,87 @@
+From 46f5a10a721ce8dce8cc8fe55279b49e1c6b3288 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:14 -0700
+Subject: x86/entry/64: Remove all remaining direct thread_struct::sp0 reads
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 46f5a10a721ce8dce8cc8fe55279b49e1c6b3288 upstream.
+
+The only remaining readers in context switch code or vm86(), and
+they all just want to update TSS.sp0 to match the current task.
+Replace them all with a new helper update_sp0().
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/2d231687f4ff288c9d9e98d7861b7df374246ac3.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/switch_to.h |    6 ++++++
+ arch/x86/kernel/process_32.c     |    2 +-
+ arch/x86/kernel/process_64.c     |    2 +-
+ arch/x86/kernel/vm86_32.c        |    4 ++--
+ 4 files changed, 10 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -85,4 +85,10 @@ static inline void refresh_sysenter_cs(s
+ }
+ #endif
++/* This is used when switching tasks or entering/exiting vm86 mode. */
++static inline void update_sp0(struct task_struct *task)
++{
++      load_sp0(task->thread.sp0);
++}
++
+ #endif /* _ASM_X86_SWITCH_TO_H */
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p,
+        * current_thread_info().  Refresh the SYSENTER configuration in
+        * case prev or next is vm86.
+        */
+-      load_sp0(next->sp0);
++      update_sp0(next_p);
+       refresh_sysenter_cs(next);
+       this_cpu_write(cpu_current_top_of_stack,
+                      (unsigned long)task_stack_page(next_p) +
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p,
+       this_cpu_write(current_task, next_p);
+       /* Reload sp0. */
+-      load_sp0(next->sp0);
++      update_sp0(next_p);
+       /*
+        * Now maybe reload the debug registers and handle I/O bitmaps
+--- a/arch/x86/kernel/vm86_32.c
++++ b/arch/x86/kernel/vm86_32.c
+@@ -149,7 +149,7 @@ void save_v86_state(struct kernel_vm86_r
+       preempt_disable();
+       tsk->thread.sp0 = vm86->saved_sp0;
+       tsk->thread.sysenter_cs = __KERNEL_CS;
+-      load_sp0(tsk->thread.sp0);
++      update_sp0(tsk);
+       refresh_sysenter_cs(&tsk->thread);
+       vm86->saved_sp0 = 0;
+       preempt_enable();
+@@ -374,7 +374,7 @@ static long do_sys_vm86(struct vm86plus_
+               refresh_sysenter_cs(&tsk->thread);
+       }
+-      load_sp0(tsk->thread.sp0);
++      update_sp0(tsk);
+       preempt_enable();
+       if (vm86->flags & VM86_SCREEN_BITMAP)
diff --git a/queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch b/queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch
new file mode 100644 (file)
index 0000000..9ad4927
--- /dev/null
@@ -0,0 +1,95 @@
+From c39858de696f0cc160a544455e8403d663d577e9 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:06 -0700
+Subject: x86/entry/64: Remove the RESTORE_..._REGS infrastructure
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit c39858de696f0cc160a544455e8403d663d577e9 upstream.
+
+All users of RESTORE_EXTRA_REGS, RESTORE_C_REGS and such, and
+REMOVE_PT_GPREGS_FROM_STACK are gone.  Delete the macros.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/c32672f6e47c561893316d48e06c7656b1039a36.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/calling.h |   52 -----------------------------------------------
+ 1 file changed, 52 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -142,16 +142,6 @@ For 32-bit we have the following convent
+       UNWIND_HINT_REGS offset=\offset
+       .endm
+-      .macro RESTORE_EXTRA_REGS offset=0
+-      movq 0*8+\offset(%rsp), %r15
+-      movq 1*8+\offset(%rsp), %r14
+-      movq 2*8+\offset(%rsp), %r13
+-      movq 3*8+\offset(%rsp), %r12
+-      movq 4*8+\offset(%rsp), %rbp
+-      movq 5*8+\offset(%rsp), %rbx
+-      UNWIND_HINT_REGS offset=\offset extra=0
+-      .endm
+-
+       .macro POP_EXTRA_REGS
+       popq %r15
+       popq %r14
+@@ -173,48 +163,6 @@ For 32-bit we have the following convent
+       popq %rdi
+       .endm
+-      .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
+-      .if \rstor_r11
+-      movq 6*8(%rsp), %r11
+-      .endif
+-      .if \rstor_r8910
+-      movq 7*8(%rsp), %r10
+-      movq 8*8(%rsp), %r9
+-      movq 9*8(%rsp), %r8
+-      .endif
+-      .if \rstor_rax
+-      movq 10*8(%rsp), %rax
+-      .endif
+-      .if \rstor_rcx
+-      movq 11*8(%rsp), %rcx
+-      .endif
+-      .if \rstor_rdx
+-      movq 12*8(%rsp), %rdx
+-      .endif
+-      movq 13*8(%rsp), %rsi
+-      movq 14*8(%rsp), %rdi
+-      UNWIND_HINT_IRET_REGS offset=16*8
+-      .endm
+-      .macro RESTORE_C_REGS
+-      RESTORE_C_REGS_HELPER 1,1,1,1,1
+-      .endm
+-      .macro RESTORE_C_REGS_EXCEPT_RAX
+-      RESTORE_C_REGS_HELPER 0,1,1,1,1
+-      .endm
+-      .macro RESTORE_C_REGS_EXCEPT_RCX
+-      RESTORE_C_REGS_HELPER 1,0,1,1,1
+-      .endm
+-      .macro RESTORE_C_REGS_EXCEPT_R11
+-      RESTORE_C_REGS_HELPER 1,1,0,1,1
+-      .endm
+-      .macro RESTORE_C_REGS_EXCEPT_RCX_R11
+-      RESTORE_C_REGS_HELPER 1,0,0,1,1
+-      .endm
+-
+-      .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+-      subq $-(15*8+\addskip), %rsp
+-      .endm
+-
+       .macro icebp
+       .byte 0xf1
+       .endm
diff --git a/queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch b/queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch
new file mode 100644 (file)
index 0000000..1edcadb
--- /dev/null
@@ -0,0 +1,65 @@
+From 9da78ba6b47b46428cfdfc0851511ab29c869798 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:58:58 -0700
+Subject: x86/entry/64: Remove the restore_c_regs_and_iret label
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 9da78ba6b47b46428cfdfc0851511ab29c869798 upstream.
+
+The only user was the 64-bit opportunistic SYSRET failure path, and
+that path didn't really need it.  This change makes the
+opportunistic SYSRET code a bit more straightforward and gets rid of
+the label.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/be3006a7ad3326e3458cf1cc55d416252cbe1986.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -246,7 +246,6 @@ entry_SYSCALL64_slow_path:
+       call    do_syscall_64           /* returns with IRQs disabled */
+ return_from_SYSCALL_64:
+-      RESTORE_EXTRA_REGS
+       TRACE_IRQS_IRETQ                /* we're about to change IF */
+       /*
+@@ -315,6 +314,7 @@ return_from_SYSCALL_64:
+        */
+ syscall_return_via_sysret:
+       /* rcx and r11 are already restored (see code above) */
++      RESTORE_EXTRA_REGS
+       RESTORE_C_REGS_EXCEPT_RCX_R11
+       movq    RSP(%rsp), %rsp
+       UNWIND_HINT_EMPTY
+@@ -322,7 +322,7 @@ syscall_return_via_sysret:
+ opportunistic_sysret_failed:
+       SWAPGS
+-      jmp     restore_c_regs_and_iret
++      jmp     restore_regs_and_iret
+ END(entry_SYSCALL_64)
+ ENTRY(stub_ptregs_64)
+@@ -639,7 +639,6 @@ retint_kernel:
+  */
+ GLOBAL(restore_regs_and_iret)
+       RESTORE_EXTRA_REGS
+-restore_c_regs_and_iret:
+       RESTORE_C_REGS
+       REMOVE_PT_GPREGS_FROM_STACK 8
+       INTERRUPT_RETURN
diff --git a/queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch b/queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch
new file mode 100644 (file)
index 0000000..c77e793
--- /dev/null
@@ -0,0 +1,96 @@
+From 7fbbd5cbebf118a9e09f5453f686656a167c3d1c Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:27 +0100
+Subject: x86/entry/64: Remove the SYSENTER stack canary
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 7fbbd5cbebf118a9e09f5453f686656a167c3d1c upstream.
+
+Now that the SYSENTER stack has a guard page, there's no need for a canary
+to detect overflow after the fact.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.572577316@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/processor.h |    1 -
+ arch/x86/kernel/dumpstack.c      |    3 +--
+ arch/x86/kernel/process.c        |    1 -
+ arch/x86/kernel/traps.c          |    7 -------
+ 4 files changed, 1 insertion(+), 11 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -341,7 +341,6 @@ struct tss_struct {
+        * Space for the temporary SYSENTER stack, used for SYSENTER
+        * and the entry trampoline as well.
+        */
+-      unsigned long           SYSENTER_stack_canary;
+       unsigned long           SYSENTER_stack[64];
+       /*
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -48,8 +48,7 @@ bool in_sysenter_stack(unsigned long *st
+       int cpu = smp_processor_id();
+       struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss;
+-      /* Treat the canary as part of the stack for unwinding purposes. */
+-      void *begin = &tss->SYSENTER_stack_canary;
++      void *begin = &tss->SYSENTER_stack;
+       void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack);
+       if ((void *)stack < begin || (void *)stack >= end)
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -81,7 +81,6 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(
+         */
+       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
+ #endif
+-      .SYSENTER_stack_canary  = STACK_END_MAGIC,
+ };
+ EXPORT_PER_CPU_SYMBOL(cpu_tss);
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -814,13 +814,6 @@ dotraplinkage void do_debug(struct pt_re
+       debug_stack_usage_dec();
+ exit:
+-      /*
+-       * This is the most likely code path that involves non-trivial use
+-       * of the SYSENTER stack.  Check that we haven't overrun it.
+-       */
+-      WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
+-           "Overran or corrupted SYSENTER stack\n");
+-
+       ist_exit(regs);
+ }
+ NOKPROBE_SYMBOL(do_debug);
diff --git a/queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch b/queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch
new file mode 100644 (file)
index 0000000..7407229
--- /dev/null
@@ -0,0 +1,139 @@
+From d375cf1530595e33961a8844192cddab913650e3 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:16 -0700
+Subject: x86/entry/64: Remove thread_struct::sp0
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit d375cf1530595e33961a8844192cddab913650e3 upstream.
+
+On x86_64, we can easily calculate sp0 when needed instead of
+storing it in thread_struct.
+
+On x86_32, a similar cleanup would be possible, but it would require
+cleaning up the vm86 code first, and that can wait for a later
+cleanup series.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/719cd9c66c548c4350d98a90f050aee8b17f8919.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/compat.h    |    1 +
+ arch/x86/include/asm/processor.h |   28 +++++++++-------------------
+ arch/x86/include/asm/switch_to.h |    6 ++++++
+ arch/x86/kernel/process_64.c     |    1 -
+ 4 files changed, 16 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/include/asm/compat.h
++++ b/arch/x86/include/asm/compat.h
+@@ -7,6 +7,7 @@
+  */
+ #include <linux/types.h>
+ #include <linux/sched.h>
++#include <linux/sched/task_stack.h>
+ #include <asm/processor.h>
+ #include <asm/user32.h>
+ #include <asm/unistd.h>
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -431,7 +431,9 @@ typedef struct {
+ struct thread_struct {
+       /* Cached TLS descriptors: */
+       struct desc_struct      tls_array[GDT_ENTRY_TLS_ENTRIES];
++#ifdef CONFIG_X86_32
+       unsigned long           sp0;
++#endif
+       unsigned long           sp;
+ #ifdef CONFIG_X86_32
+       unsigned long           sysenter_cs;
+@@ -798,6 +800,13 @@ static inline void spin_lock_prefetch(co
+ #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
++#define task_pt_regs(task) \
++({                                                                    \
++      unsigned long __ptr = (unsigned long)task_stack_page(task);     \
++      __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
++      ((struct pt_regs *)__ptr) - 1;                                  \
++})
++
+ #ifdef CONFIG_X86_32
+ /*
+  * User space process size: 3GB (default).
+@@ -817,23 +826,6 @@ static inline void spin_lock_prefetch(co
+       .addr_limit             = KERNEL_DS,                              \
+ }
+-/*
+- * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
+- * This is necessary to guarantee that the entire "struct pt_regs"
+- * is accessible even if the CPU haven't stored the SS/ESP registers
+- * on the stack (interrupt gate does not save these registers
+- * when switching to the same priv ring).
+- * Therefore beware: accessing the ss/esp fields of the
+- * "struct pt_regs" is possible, but they may contain the
+- * completely wrong values.
+- */
+-#define task_pt_regs(task) \
+-({                                                                    \
+-      unsigned long __ptr = (unsigned long)task_stack_page(task);     \
+-      __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
+-      ((struct pt_regs *)__ptr) - 1;                                  \
+-})
+-
+ #define KSTK_ESP(task)                (task_pt_regs(task)->sp)
+ #else
+@@ -867,11 +859,9 @@ static inline void spin_lock_prefetch(co
+ #define STACK_TOP_MAX         TASK_SIZE_MAX
+ #define INIT_THREAD  {                                                \
+-      .sp0                    = TOP_OF_INIT_STACK,            \
+       .addr_limit             = KERNEL_DS,                    \
+ }
+-#define task_pt_regs(tsk)     ((struct pt_regs *)(tsk)->thread.sp0 - 1)
+ extern unsigned long KSTK_ESP(struct task_struct *task);
+ #endif /* CONFIG_X86_64 */
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -2,6 +2,8 @@
+ #ifndef _ASM_X86_SWITCH_TO_H
+ #define _ASM_X86_SWITCH_TO_H
++#include <linux/sched/task_stack.h>
++
+ struct task_struct; /* one of the stranger aspects of C forward declarations */
+ struct task_struct *__switch_to_asm(struct task_struct *prev,
+@@ -88,7 +90,11 @@ static inline void refresh_sysenter_cs(s
+ /* This is used when switching tasks or entering/exiting vm86 mode. */
+ static inline void update_sp0(struct task_struct *task)
+ {
++#ifdef CONFIG_X86_32
+       load_sp0(task->thread.sp0);
++#else
++      load_sp0(task_top_of_stack(task));
++#endif
+ }
+ #endif /* _ASM_X86_SWITCH_TO_H */
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_
+       struct inactive_task_frame *frame;
+       struct task_struct *me = current;
+-      p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+       childregs = task_pt_regs(p);
+       fork_frame = container_of(childregs, struct fork_frame, regs);
+       frame = &fork_frame->frame;
diff --git a/queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch b/queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch
new file mode 100644 (file)
index 0000000..1677be9
--- /dev/null
@@ -0,0 +1,124 @@
+From 3e3b9293d392c577b62e24e4bc9982320438e749 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:24 +0100
+Subject: x86/entry/64: Return to userspace from the trampoline stack
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 3e3b9293d392c577b62e24e4bc9982320438e749 upstream.
+
+By itself, this is useless.  It gives us the ability to run some final code
+before exit that cannnot run on the kernel stack.  This could include a CR3
+switch a la PAGE_TABLE_ISOLATION or some kernel stack erasing, for
+example.  (Or even weird things like *changing* which kernel stack gets
+used as an ASLR-strengthening mechanism.)
+
+The SYSRET32 path is not covered yet.  It could be in the future or
+we could just ignore it and force the slow path if needed.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.306546484@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S |   55 ++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 51 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -326,8 +326,24 @@ syscall_return_via_sysret:
+       popq    %rsi    /* skip rcx */
+       popq    %rdx
+       popq    %rsi
++
++      /*
++       * Now all regs are restored except RSP and RDI.
++       * Save old stack pointer and switch to trampoline stack.
++       */
++      movq    %rsp, %rdi
++      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
++
++      pushq   RSP-RDI(%rdi)   /* RSP */
++      pushq   (%rdi)          /* RDI */
++
++      /*
++       * We are on the trampoline stack.  All regs except RDI are live.
++       * We can do future final exit work right here.
++       */
++
+       popq    %rdi
+-      movq    RSP-ORIG_RAX(%rsp), %rsp
++      popq    %rsp
+       USERGS_SYSRET64
+ END(entry_SYSCALL_64)
+@@ -630,10 +646,41 @@ GLOBAL(swapgs_restore_regs_and_return_to
+       ud2
+ 1:
+ #endif
+-      SWAPGS
+       POP_EXTRA_REGS
+-      POP_C_REGS
+-      addq    $8, %rsp        /* skip regs->orig_ax */
++      popq    %r11
++      popq    %r10
++      popq    %r9
++      popq    %r8
++      popq    %rax
++      popq    %rcx
++      popq    %rdx
++      popq    %rsi
++
++      /*
++       * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
++       * Save old stack pointer and switch to trampoline stack.
++       */
++      movq    %rsp, %rdi
++      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
++
++      /* Copy the IRET frame to the trampoline stack. */
++      pushq   6*8(%rdi)       /* SS */
++      pushq   5*8(%rdi)       /* RSP */
++      pushq   4*8(%rdi)       /* EFLAGS */
++      pushq   3*8(%rdi)       /* CS */
++      pushq   2*8(%rdi)       /* RIP */
++
++      /* Push user RDI on the trampoline stack. */
++      pushq   (%rdi)
++
++      /*
++       * We are on the trampoline stack.  All regs except RDI are live.
++       * We can do future final exit work right here.
++       */
++
++      /* Restore RDI. */
++      popq    %rdi
++      SWAPGS
+       INTERRUPT_RETURN
diff --git a/queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch b/queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch
new file mode 100644 (file)
index 0000000..81f291f
--- /dev/null
@@ -0,0 +1,144 @@
+From 9aaefe7b59ae00605256a7d6bd1c1456432495fc Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:21 +0100
+Subject: x86/entry/64: Separate cpu_current_top_of_stack from TSS.sp0
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 9aaefe7b59ae00605256a7d6bd1c1456432495fc upstream.
+
+On 64-bit kernels, we used to assume that TSS.sp0 was the current
+top of stack.  With the addition of an entry trampoline, this will
+no longer be the case.  Store the current top of stack in TSS.sp1,
+which is otherwise unused but shares the same cacheline.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.050864668@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/processor.h   |   18 +++++++++++++-----
+ arch/x86/include/asm/thread_info.h |    2 +-
+ arch/x86/kernel/asm-offsets_64.c   |    1 +
+ arch/x86/kernel/process.c          |   10 ++++++++++
+ arch/x86/kernel/process_64.c       |    1 +
+ 5 files changed, 26 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -309,7 +309,13 @@ struct x86_hw_tss {
+ struct x86_hw_tss {
+       u32                     reserved1;
+       u64                     sp0;
++
++      /*
++       * We store cpu_current_top_of_stack in sp1 so it's always accessible.
++       * Linux does not use ring 1, so sp1 is not otherwise needed.
++       */
+       u64                     sp1;
++
+       u64                     sp2;
+       u64                     reserved2;
+       u64                     ist[7];
+@@ -368,6 +374,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_
+ #ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
++#else
++#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1
+ #endif
+ /*
+@@ -539,12 +547,12 @@ static inline void native_swapgs(void)
+ static inline unsigned long current_top_of_stack(void)
+ {
+-#ifdef CONFIG_X86_64
+-      return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
+-#else
+-      /* sp0 on x86_32 is special in and around vm86 mode. */
++      /*
++       *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
++       *  and around vm86 mode and sp0 on x86_64 is special because of the
++       *  entry trampoline.
++       */
+       return this_cpu_read_stable(cpu_current_top_of_stack);
+-#endif
+ }
+ static inline bool on_thread_stack(void)
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -207,7 +207,7 @@ static inline int arch_within_stack_fram
+ #else /* !__ASSEMBLY__ */
+ #ifdef CONFIG_X86_64
+-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
++# define cpu_current_top_of_stack (cpu_tss + TSS_sp1)
+ #endif
+ #endif
+--- a/arch/x86/kernel/asm-offsets_64.c
++++ b/arch/x86/kernel/asm-offsets_64.c
+@@ -66,6 +66,7 @@ int main(void)
+       OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+       OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
++      OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
+       BLANK();
+ #ifdef CONFIG_CC_STACKPROTECTOR
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(
+                * Poison it.
+                */
+               .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
++
++#ifdef CONFIG_X86_64
++              /*
++               * .sp1 is cpu_current_top_of_stack.  The init task never
++               * runs user code, but cpu_current_top_of_stack should still
++               * be well defined before the first context switch.
++               */
++              .sp1 = TOP_OF_INIT_STACK,
++#endif
++
+ #ifdef CONFIG_X86_32
+               .ss0 = __KERNEL_DS,
+               .ss1 = __KERNEL_CS,
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -461,6 +461,7 @@ __switch_to(struct task_struct *prev_p,
+        * Switch the PDA and FPU contexts.
+        */
+       this_cpu_write(current_task, next_p);
++      this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
+       /* Reload sp0. */
+       update_sp0(next_p);
diff --git a/queue-4.14/x86-entry-64-shorten-test-instructions.patch b/queue-4.14/x86-entry-64-shorten-test-instructions.patch
new file mode 100644 (file)
index 0000000..60e9c23
--- /dev/null
@@ -0,0 +1,48 @@
+From 1e4c4f610f774df6088d7c065b2dd4d22adba698 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Thu, 2 Nov 2017 13:09:26 +0100
+Subject: x86/entry/64: Shorten TEST instructions
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 1e4c4f610f774df6088d7c065b2dd4d22adba698 upstream.
+
+Convert TESTL to TESTB and save 3 bytes per callsite.
+
+No functionality change.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171102120926.4srwerqrr7g72e2k@pd.tnic
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -621,7 +621,7 @@ GLOBAL(retint_user)
+ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+ #ifdef CONFIG_DEBUG_ENTRY
+       /* Assert that pt_regs indicates user mode. */
+-      testl   $3, CS(%rsp)
++      testb   $3, CS(%rsp)
+       jnz     1f
+       ud2
+ 1:
+@@ -654,7 +654,7 @@ retint_kernel:
+ GLOBAL(restore_regs_and_return_to_kernel)
+ #ifdef CONFIG_DEBUG_ENTRY
+       /* Assert that pt_regs indicates kernel mode. */
+-      testl   $3, CS(%rsp)
++      testb   $3, CS(%rsp)
+       jz      1f
+       ud2
+ 1:
diff --git a/queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch b/queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch
new file mode 100644 (file)
index 0000000..51a2592
--- /dev/null
@@ -0,0 +1,60 @@
+From e53178328c9b96fbdbc719e78c93b5687ee007c3 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:02 -0700
+Subject: x86/entry/64: Shrink paranoid_exit_restore and make labels local
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit e53178328c9b96fbdbc719e78c93b5687ee007c3 upstream.
+
+paranoid_exit_restore was a copy of restore_regs_and_return_to_kernel.
+Merge them and make the paranoid_exit internal labels local.
+
+Keeping .Lparanoid_exit makes the code a bit shorter because it
+allows a 2-byte jnz instead of a 5-byte jnz.
+
+Saves 96 bytes of text.
+
+( This is still a bit suboptimal in a non-CONFIG_TRACE_IRQFLAGS
+  kernel, but fixing that would make the code rather messy. )
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/510d66a1895cda9473c84b1086f0bb974f22de6a.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S |   13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1124,17 +1124,14 @@ ENTRY(paranoid_exit)
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       TRACE_IRQS_OFF_DEBUG
+       testl   %ebx, %ebx                      /* swapgs needed? */
+-      jnz     paranoid_exit_no_swapgs
++      jnz     .Lparanoid_exit_no_swapgs
+       TRACE_IRQS_IRETQ
+       SWAPGS_UNSAFE_STACK
+-      jmp     paranoid_exit_restore
+-paranoid_exit_no_swapgs:
++      jmp     .Lparanoid_exit_restore
++.Lparanoid_exit_no_swapgs:
+       TRACE_IRQS_IRETQ_DEBUG
+-paranoid_exit_restore:
+-      RESTORE_EXTRA_REGS
+-      RESTORE_C_REGS
+-      REMOVE_PT_GPREGS_FROM_STACK 8
+-      INTERRUPT_RETURN
++.Lparanoid_exit_restore:
++      jmp restore_regs_and_return_to_kernel
+ END(paranoid_exit)
+ /*
diff --git a/queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch b/queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch
new file mode 100644 (file)
index 0000000..ac40b6a
--- /dev/null
@@ -0,0 +1,91 @@
+From e872045bfd9c465a8555bab4b8567d56a4d2d3bb Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:01 -0700
+Subject: x86/entry/64: Simplify reg restore code in the standard IRET paths
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit e872045bfd9c465a8555bab4b8567d56a4d2d3bb upstream.
+
+The old code restored all the registers with movq instead of pop.
+
+In theory, this was done because some CPUs have higher movq
+throughput, but any gain there would be tiny and is almost certainly
+outweighed by the higher text size.
+
+This saves 96 bytes of text.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/ad82520a207ccd851b04ba613f4f752b33ac05f7.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/calling.h  |   21 +++++++++++++++++++++
+ arch/x86/entry/entry_64.S |   12 ++++++------
+ 2 files changed, 27 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -152,6 +152,27 @@ For 32-bit we have the following convent
+       UNWIND_HINT_REGS offset=\offset extra=0
+       .endm
++      .macro POP_EXTRA_REGS
++      popq %r15
++      popq %r14
++      popq %r13
++      popq %r12
++      popq %rbp
++      popq %rbx
++      .endm
++
++      .macro POP_C_REGS
++      popq %r11
++      popq %r10
++      popq %r9
++      popq %r8
++      popq %rax
++      popq %rcx
++      popq %rdx
++      popq %rsi
++      popq %rdi
++      .endm
++
+       .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
+       .if \rstor_r11
+       movq 6*8(%rsp), %r11
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -619,9 +619,9 @@ GLOBAL(swapgs_restore_regs_and_return_to
+ 1:
+ #endif
+       SWAPGS
+-      RESTORE_EXTRA_REGS
+-      RESTORE_C_REGS
+-      REMOVE_PT_GPREGS_FROM_STACK 8
++      POP_EXTRA_REGS
++      POP_C_REGS
++      addq    $8, %rsp        /* skip regs->orig_ax */
+       INTERRUPT_RETURN
+@@ -651,9 +651,9 @@ GLOBAL(restore_regs_and_return_to_kernel
+       ud2
+ 1:
+ #endif
+-      RESTORE_EXTRA_REGS
+-      RESTORE_C_REGS
+-      REMOVE_PT_GPREGS_FROM_STACK 8
++      POP_EXTRA_REGS
++      POP_C_REGS
++      addq    $8, %rsp        /* skip regs->orig_ax */
+       INTERRUPT_RETURN
+ ENTRY(native_iret)
diff --git a/queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch b/queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch
new file mode 100644 (file)
index 0000000..0c2c9a9
--- /dev/null
@@ -0,0 +1,121 @@
+From 26c4ef9c49d8a0341f6d97ce2cfdd55d1236ed29 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:58:59 -0700
+Subject: x86/entry/64: Split the IRET-to-user and IRET-to-kernel paths
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 26c4ef9c49d8a0341f6d97ce2cfdd55d1236ed29 upstream.
+
+These code paths will diverge soon.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/dccf8c7b3750199b4b30383c812d4e2931811509.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S        |   34 +++++++++++++++++++++++++---------
+ arch/x86/entry/entry_64_compat.S |    2 +-
+ arch/x86/kernel/head_64.S        |    2 +-
+ 3 files changed, 27 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -322,7 +322,7 @@ syscall_return_via_sysret:
+ opportunistic_sysret_failed:
+       SWAPGS
+-      jmp     restore_regs_and_iret
++      jmp     restore_regs_and_return_to_usermode
+ END(entry_SYSCALL_64)
+ ENTRY(stub_ptregs_64)
+@@ -424,7 +424,7 @@ ENTRY(ret_from_fork)
+       call    syscall_return_slowpath /* returns with IRQs disabled */
+       TRACE_IRQS_ON                   /* user mode is traced as IRQS on */
+       SWAPGS
+-      jmp     restore_regs_and_iret
++      jmp     restore_regs_and_return_to_usermode
+ 1:
+       /* kernel thread */
+@@ -613,7 +613,20 @@ GLOBAL(retint_user)
+       call    prepare_exit_to_usermode
+       TRACE_IRQS_IRETQ
+       SWAPGS
+-      jmp     restore_regs_and_iret
++
++GLOBAL(restore_regs_and_return_to_usermode)
++#ifdef CONFIG_DEBUG_ENTRY
++      /* Assert that pt_regs indicates user mode. */
++      testl   $3, CS(%rsp)
++      jnz     1f
++      ud2
++1:
++#endif
++      RESTORE_EXTRA_REGS
++      RESTORE_C_REGS
++      REMOVE_PT_GPREGS_FROM_STACK 8
++      INTERRUPT_RETURN
++
+ /* Returning to kernel space */
+ retint_kernel:
+@@ -633,11 +646,14 @@ retint_kernel:
+        */
+       TRACE_IRQS_IRETQ
+-/*
+- * At this label, code paths which return to kernel and to user,
+- * which come from interrupts/exception and from syscalls, merge.
+- */
+-GLOBAL(restore_regs_and_iret)
++GLOBAL(restore_regs_and_return_to_kernel)
++#ifdef CONFIG_DEBUG_ENTRY
++      /* Assert that pt_regs indicates kernel mode. */
++      testl   $3, CS(%rsp)
++      jz      1f
++      ud2
++1:
++#endif
+       RESTORE_EXTRA_REGS
+       RESTORE_C_REGS
+       REMOVE_PT_GPREGS_FROM_STACK 8
+@@ -1328,7 +1344,7 @@ ENTRY(nmi)
+        * work, because we don't want to enable interrupts.
+        */
+       SWAPGS
+-      jmp     restore_regs_and_iret
++      jmp     restore_regs_and_return_to_usermode
+ .Lnmi_from_kernel:
+       /*
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -338,7 +338,7 @@ ENTRY(entry_INT80_compat)
+       /* Go back to user mode. */
+       TRACE_IRQS_ON
+       SWAPGS
+-      jmp     restore_regs_and_iret
++      jmp     restore_regs_and_return_to_usermode
+ END(entry_INT80_compat)
+ ENTRY(stub32_clone)
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -328,7 +328,7 @@ early_idt_handler_common:
+ 20:
+       decl early_recursion_flag(%rip)
+-      jmp restore_regs_and_iret
++      jmp restore_regs_and_return_to_kernel
+ END(early_idt_handler_common)
+       __INITDATA
diff --git a/queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch b/queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch
new file mode 100644 (file)
index 0000000..e83062c
--- /dev/null
@@ -0,0 +1,91 @@
+From 20bb83443ea79087b5e5f8dab4e9d80bb9bf7acb Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:13 -0700
+Subject: x86/entry/64: Stop initializing TSS.sp0 at boot
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 20bb83443ea79087b5e5f8dab4e9d80bb9bf7acb upstream.
+
+In my quest to get rid of thread_struct::sp0, I want to clean up or
+remove all of its readers.  Two of them are in cpu_init() (32-bit and
+64-bit), and they aren't needed.  This is because we never enter
+userspace at all on the threads that CPUs are initialized in.
+
+Poison the initial TSS.sp0 and stop initializing it on CPU init.
+
+The comment text mostly comes from Dave Hansen.  Thanks!
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/ee4a00540ad28c6cff475fbcc7769a4460acc861.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/common.c |   13 ++++++++++---
+ arch/x86/kernel/process.c    |    8 +++++++-
+ 2 files changed, 17 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1570,9 +1570,13 @@ void cpu_init(void)
+       initialize_tlbstate_and_flush();
+       enter_lazy_tlb(&init_mm, me);
+-      load_sp0(current->thread.sp0);
++      /*
++       * Initialize the TSS.  Don't bother initializing sp0, as the initial
++       * task never enters user mode.
++       */
+       set_tss_desc(cpu, t);
+       load_TR_desc();
++
+       load_mm_ldt(&init_mm);
+       clear_all_debug_regs();
+@@ -1594,7 +1598,6 @@ void cpu_init(void)
+       int cpu = smp_processor_id();
+       struct task_struct *curr = current;
+       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+-      struct thread_struct *thread = &curr->thread;
+       wait_for_master_cpu(cpu);
+@@ -1625,9 +1628,13 @@ void cpu_init(void)
+       initialize_tlbstate_and_flush();
+       enter_lazy_tlb(&init_mm, curr);
+-      load_sp0(thread->sp0);
++      /*
++       * Initialize the TSS.  Don't bother initializing sp0, as the initial
++       * task never enters user mode.
++       */
+       set_tss_desc(cpu, t);
+       load_TR_desc();
++
+       load_mm_ldt(&init_mm);
+       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -49,7 +49,13 @@
+  */
+ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+       .x86_tss = {
+-              .sp0 = TOP_OF_INIT_STACK,
++              /*
++               * .sp0 is only used when entering ring 0 from a lower
++               * privilege level.  Since the init task never runs anything
++               * but ring 0 code, there is no need for a valid value here.
++               * Poison it.
++               */
++              .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+ #ifdef CONFIG_X86_32
+               .ss0 = __KERNEL_DS,
+               .ss1 = __KERNEL_CS,
diff --git a/queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch b/queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch
new file mode 100644 (file)
index 0000000..846718f
--- /dev/null
@@ -0,0 +1,276 @@
+From 7f2590a110b837af5679d08fc25c6227c5a8c497 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:23 +0100
+Subject: x86/entry/64: Use a per-CPU trampoline stack for IDT entries
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 7f2590a110b837af5679d08fc25c6227c5a8c497 upstream.
+
+Historically, IDT entries from usermode have always gone directly
+to the running task's kernel stack.  Rearrange it so that we enter on
+a per-CPU trampoline stack and then manually switch to the task's stack.
+This touches a couple of extra cachelines, but it gives us a chance
+to run some code before we touch the kernel stack.
+
+The asm isn't exactly beautiful, but I think that fully refactoring
+it can wait.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.225330557@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S        |   67 +++++++++++++++++++++++++++++----------
+ arch/x86/entry/entry_64_compat.S |    5 ++
+ arch/x86/include/asm/switch_to.h |    4 +-
+ arch/x86/include/asm/traps.h     |    1 
+ arch/x86/kernel/cpu/common.c     |    6 ++-
+ arch/x86/kernel/traps.c          |   21 ++++++------
+ 6 files changed, 72 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -560,6 +560,13 @@ END(irq_entries_start)
+ /* 0(%rsp): ~(interrupt number) */
+       .macro interrupt func
+       cld
++
++      testb   $3, CS-ORIG_RAX(%rsp)
++      jz      1f
++      SWAPGS
++      call    switch_to_thread_stack
++1:
++
+       ALLOC_PT_GPREGS_ON_STACK
+       SAVE_C_REGS
+       SAVE_EXTRA_REGS
+@@ -569,12 +576,8 @@ END(irq_entries_start)
+       jz      1f
+       /*
+-       * IRQ from user mode.  Switch to kernel gsbase and inform context
+-       * tracking that we're in kernel mode.
+-       */
+-      SWAPGS
+-
+-      /*
++       * IRQ from user mode.
++       *
+        * We need to tell lockdep that IRQs are off.  We can't do this until
+        * we fix gsbase, and we should do it before enter_from_user_mode
+        * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
+@@ -828,6 +831,32 @@ apicinterrupt IRQ_WORK_VECTOR                     irq_work
+  */
+ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
++/*
++ * Switch to the thread stack.  This is called with the IRET frame and
++ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
++ * space has not been allocated for them.)
++ */
++ENTRY(switch_to_thread_stack)
++      UNWIND_HINT_FUNC
++
++      pushq   %rdi
++      movq    %rsp, %rdi
++      movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
++      UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
++
++      pushq   7*8(%rdi)               /* regs->ss */
++      pushq   6*8(%rdi)               /* regs->rsp */
++      pushq   5*8(%rdi)               /* regs->eflags */
++      pushq   4*8(%rdi)               /* regs->cs */
++      pushq   3*8(%rdi)               /* regs->ip */
++      pushq   2*8(%rdi)               /* regs->orig_ax */
++      pushq   8(%rdi)                 /* return address */
++      UNWIND_HINT_FUNC
++
++      movq    (%rdi), %rdi
++      ret
++END(switch_to_thread_stack)
++
+ .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+ ENTRY(\sym)
+       UNWIND_HINT_IRET_REGS offset=\has_error_code*8
+@@ -845,11 +874,12 @@ ENTRY(\sym)
+       ALLOC_PT_GPREGS_ON_STACK
+-      .if \paranoid
+-      .if \paranoid == 1
++      .if \paranoid < 2
+       testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
+-      jnz     1f
++      jnz     .Lfrom_usermode_switch_stack_\@
+       .endif
++
++      .if \paranoid
+       call    paranoid_entry
+       .else
+       call    error_entry
+@@ -891,20 +921,15 @@ ENTRY(\sym)
+       jmp     error_exit
+       .endif
+-      .if \paranoid == 1
++      .if \paranoid < 2
+       /*
+-       * Paranoid entry from userspace.  Switch stacks and treat it
++       * Entry from userspace.  Switch stacks and treat it
+        * as a normal entry.  This means that paranoid handlers
+        * run in real process context if user_mode(regs).
+        */
+-1:
++.Lfrom_usermode_switch_stack_\@:
+       call    error_entry
+-
+-      movq    %rsp, %rdi                      /* pt_regs pointer */
+-      call    sync_regs
+-      movq    %rax, %rsp                      /* switch stack */
+-
+       movq    %rsp, %rdi                      /* pt_regs pointer */
+       .if \has_error_code
+@@ -1165,6 +1190,14 @@ ENTRY(error_entry)
+       SWAPGS
+ .Lerror_entry_from_usermode_after_swapgs:
++      /* Put us onto the real thread stack. */
++      popq    %r12                            /* save return addr in %12 */
++      movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
++      call    sync_regs
++      movq    %rax, %rsp                      /* switch stack */
++      ENCODE_FRAME_POINTER
++      pushq   %r12
++
+       /*
+        * We need to tell lockdep that IRQs are off.  We can't do this until
+        * we fix gsbase, and we should do it before enter_from_user_mode
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
+        */
+       movl    %eax, %eax
+-      /* Construct struct pt_regs on stack (iret frame is already on stack) */
+       pushq   %rax                    /* pt_regs->orig_ax */
++
++      /* switch to thread stack expects orig_ax to be pushed */
++      call    switch_to_thread_stack
++
+       pushq   %rdi                    /* pt_regs->di */
+       pushq   %rsi                    /* pt_regs->si */
+       pushq   %rdx                    /* pt_regs->dx */
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(s
+ /* This is used when switching tasks or entering/exiting vm86 mode. */
+ static inline void update_sp0(struct task_struct *task)
+ {
++      /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
+ #ifdef CONFIG_X86_32
+       load_sp0(task->thread.sp0);
+ #else
+-      load_sp0(task_top_of_stack(task));
++      if (static_cpu_has(X86_FEATURE_XENPV))
++              load_sp0(task_top_of_stack(task));
+ #endif
+ }
+--- a/arch/x86/include/asm/traps.h
++++ b/arch/x86/include/asm/traps.h
+@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_presen
+ dotraplinkage void do_stack_segment(struct pt_regs *, long);
+ #ifdef CONFIG_X86_64
+ dotraplinkage void do_double_fault(struct pt_regs *, long);
+-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
+ #endif
+ dotraplinkage void do_general_protection(struct pt_regs *, long);
+ dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1623,11 +1623,13 @@ void cpu_init(void)
+       setup_cpu_entry_area(cpu);
+       /*
+-       * Initialize the TSS.  Don't bother initializing sp0, as the initial
+-       * task never enters user mode.
++       * Initialize the TSS.  sp0 points to the entry trampoline stack
++       * regardless of what task is running.
+        */
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+       load_TR_desc();
++      load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss +
++               offsetofend(struct tss_struct, SYSENTER_stack));
+       load_mm_ldt(&init_mm);
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -619,14 +619,15 @@ NOKPROBE_SYMBOL(do_int3);
+ #ifdef CONFIG_X86_64
+ /*
+- * Help handler running on IST stack to switch off the IST stack if the
+- * interrupted code was in user mode. The actual stack switch is done in
+- * entry_64.S
++ * Help handler running on a per-cpu (IST or entry trampoline) stack
++ * to switch to the normal thread stack if the interrupted code was in
++ * user mode. The actual stack switch is done in entry_64.S
+  */
+ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
+ {
+-      struct pt_regs *regs = task_pt_regs(current);
+-      *regs = *eregs;
++      struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
++      if (regs != eregs)
++              *regs = *eregs;
+       return regs;
+ }
+ NOKPROBE_SYMBOL(sync_regs);
+@@ -642,13 +643,13 @@ struct bad_iret_stack *fixup_bad_iret(st
+       /*
+        * This is called from entry_64.S early in handling a fault
+        * caused by a bad iret to user mode.  To handle the fault
+-       * correctly, we want move our stack frame to task_pt_regs
+-       * and we want to pretend that the exception came from the
+-       * iret target.
++       * correctly, we want to move our stack frame to where it would
++       * be had we entered directly on the entry stack (rather than
++       * just below the IRET frame) and we want to pretend that the
++       * exception came from the IRET target.
+        */
+       struct bad_iret_stack *new_stack =
+-              container_of(task_pt_regs(current),
+-                           struct bad_iret_stack, regs);
++              (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
+       /* Copy the IRET target to the new stack. */
+       memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
diff --git a/queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch b/queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch
new file mode 100644 (file)
index 0000000..5a0bf84
--- /dev/null
@@ -0,0 +1,47 @@
+From 471ee4832209e986029b9fabdaad57b1eecb856b Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:05 -0700
+Subject: x86/entry/64: Use POP instead of MOV to restore regs on NMI return
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 471ee4832209e986029b9fabdaad57b1eecb856b upstream.
+
+This gets rid of the last user of the old RESTORE_..._REGS infrastructure.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/652a260f17a160789bc6a41d997f98249b73e2ab.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1560,11 +1560,14 @@ end_repeat_nmi:
+ nmi_swapgs:
+       SWAPGS_UNSAFE_STACK
+ nmi_restore:
+-      RESTORE_EXTRA_REGS
+-      RESTORE_C_REGS
++      POP_EXTRA_REGS
++      POP_C_REGS
+-      /* Point RSP at the "iret" frame. */
+-      REMOVE_PT_GPREGS_FROM_STACK 6*8
++      /*
++       * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
++       * at the "iret" frame.
++       */
++      addq    $6*8, %rsp
+       /*
+        * Clear "NMI executing".  Set DF first so that we can easily
diff --git a/queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch b/queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch
new file mode 100644 (file)
index 0000000..8f05e12
--- /dev/null
@@ -0,0 +1,51 @@
+From 4fbb39108f972437c44e5ffa781b56635d496826 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:03 -0700
+Subject: x86/entry/64: Use pop instead of movq in syscall_return_via_sysret
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 4fbb39108f972437c44e5ffa781b56635d496826 upstream.
+
+Saves 64 bytes.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/6609b7f74ab31c36604ad746e019ea8495aec76c.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -316,10 +316,18 @@ return_from_SYSCALL_64:
+        */
+ syscall_return_via_sysret:
+       /* rcx and r11 are already restored (see code above) */
+-      RESTORE_EXTRA_REGS
+-      RESTORE_C_REGS_EXCEPT_RCX_R11
+-      movq    RSP(%rsp), %rsp
+       UNWIND_HINT_EMPTY
++      POP_EXTRA_REGS
++      popq    %rsi    /* skip r11 */
++      popq    %r10
++      popq    %r9
++      popq    %r8
++      popq    %rax
++      popq    %rsi    /* skip rcx */
++      popq    %rdx
++      popq    %rsi
++      popq    %rdi
++      movq    RSP-ORIG_RAX(%rsp), %rsp
+       USERGS_SYSRET64
+ END(entry_SYSCALL_64)
diff --git a/queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch b/queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch
new file mode 100644 (file)
index 0000000..04abd3a
--- /dev/null
@@ -0,0 +1,38 @@
+From 3500130b84a3cdc5b6796eba1daf178944935efe Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:11 -0700
+Subject: x86/entry: Add task_top_of_stack() to find the top of a task's stack
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 3500130b84a3cdc5b6796eba1daf178944935efe upstream.
+
+This will let us get rid of a few places that hardcode accesses to
+thread.sp0.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/b49b3f95a8ff858c40c9b0f5b32be0355324327d.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/processor.h |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -796,6 +796,8 @@ static inline void spin_lock_prefetch(co
+ #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
+                          TOP_OF_KERNEL_STACK_PADDING)
++#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
++
+ #ifdef CONFIG_X86_32
+ /*
+  * User space process size: 3GB (default).
diff --git a/queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch b/queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch
new file mode 100644 (file)
index 0000000..9aad70f
--- /dev/null
@@ -0,0 +1,184 @@
+From 0f9a48100fba3f189724ae88a450c2261bf91c80 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:28 +0100
+Subject: x86/entry: Clean up the SYSENTER_stack code
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 0f9a48100fba3f189724ae88a450c2261bf91c80 upstream.
+
+The existing code was a mess, mainly because C arrays are nasty.  Turn
+SYSENTER_stack into a struct, add a helper to find it, and do all the
+obvious cleanups this enables.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bpetkov@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.653244723@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_32.S        |    4 ++--
+ arch/x86/entry/entry_64.S        |    2 +-
+ arch/x86/include/asm/fixmap.h    |    5 +++++
+ arch/x86/include/asm/processor.h |    6 +++++-
+ arch/x86/kernel/asm-offsets.c    |    6 ++----
+ arch/x86/kernel/cpu/common.c     |   14 +++-----------
+ arch/x86/kernel/dumpstack.c      |    7 +++----
+ 7 files changed, 21 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -942,7 +942,7 @@ ENTRY(debug)
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Ldebug_from_sysenter_stack
+@@ -986,7 +986,7 @@ ENTRY(nmi)
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Lnmi_from_sysenter_stack
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -154,7 +154,7 @@ END(native_usergs_sysret64)
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+ /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+-#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \
++#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + \
+                       SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
+ ENTRY(entry_SYSCALL_64_trampoline)
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -245,5 +245,10 @@ static inline struct cpu_entry_area *get
+       return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
+ }
++static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
++{
++      return &get_cpu_entry_area(cpu)->tss.SYSENTER_stack;
++}
++
+ #endif /* !__ASSEMBLY__ */
+ #endif /* _ASM_X86_FIXMAP_H */
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -336,12 +336,16 @@ struct x86_hw_tss {
+ #define IO_BITMAP_OFFSET              (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
+ #define INVALID_IO_BITMAP_OFFSET      0x8000
++struct SYSENTER_stack {
++      unsigned long           words[64];
++};
++
+ struct tss_struct {
+       /*
+        * Space for the temporary SYSENTER stack, used for SYSENTER
+        * and the entry trampoline as well.
+        */
+-      unsigned long           SYSENTER_stack[64];
++      struct SYSENTER_stack   SYSENTER_stack;
+       /*
+        * The fixed hardware portion.  This must not cross a page boundary
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -94,10 +94,8 @@ void common(void) {
+       BLANK();
+       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+-      /* Offset from cpu_tss to SYSENTER_stack */
+-      OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+-      /* Size of SYSENTER_stack */
+-      DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
++      OFFSET(TSS_STRUCT_SYSENTER_stack, tss_struct, SYSENTER_stack);
++      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1314,12 +1314,7 @@ void enable_sep_cpu(void)
+       tss->x86_tss.ss1 = __KERNEL_CS;
+       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+-
+-      wrmsr(MSR_IA32_SYSENTER_ESP,
+-            (unsigned long)&get_cpu_entry_area(cpu)->tss +
+-            offsetofend(struct tss_struct, SYSENTER_stack),
+-            0);
+-
++      wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
+       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
+       put_cpu();
+@@ -1436,9 +1431,7 @@ void syscall_init(void)
+        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+        */
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+-      wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
+-                  (unsigned long)&get_cpu_entry_area(cpu)->tss +
+-                  offsetofend(struct tss_struct, SYSENTER_stack));
++      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ #else
+       wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
+@@ -1653,8 +1646,7 @@ void cpu_init(void)
+        */
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+       load_TR_desc();
+-      load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss +
+-               offsetofend(struct tss_struct, SYSENTER_stack));
++      load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
+       load_mm_ldt(&init_mm);
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -45,11 +45,10 @@ bool in_task_stack(unsigned long *stack,
+ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
+ {
+-      int cpu = smp_processor_id();
+-      struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss;
++      struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
+-      void *begin = &tss->SYSENTER_stack;
+-      void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack);
++      void *begin = ss;
++      void *end = ss + 1;
+       if ((void *)stack < begin || (void *)stack >= end)
+               return false;
diff --git a/queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch b/queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch
new file mode 100644 (file)
index 0000000..eac247e
--- /dev/null
@@ -0,0 +1,205 @@
+From 7fb983b4dd569e08564134a850dfd4eb1c63d9b8 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:17 +0100
+Subject: x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 7fb983b4dd569e08564134a850dfd4eb1c63d9b8 upstream.
+
+A future patch will move SYSENTER_stack to the beginning of cpu_tss
+to help detect overflow.  Before this can happen, fix several code
+paths that hardcode assumptions about the old layout.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Dave Hansen <dave.hansen@intel.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.722425540@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/desc.h      |    2 +-
+ arch/x86/include/asm/processor.h |    9 +++++++--
+ arch/x86/kernel/cpu/common.c     |    8 ++++----
+ arch/x86/kernel/doublefault.c    |   32 +++++++++++++++-----------------
+ arch/x86/kvm/vmx.c               |    2 +-
+ arch/x86/power/cpu.c             |   13 +++++++------
+ 6 files changed, 35 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -178,7 +178,7 @@ static inline void set_tssldt_descriptor
+ #endif
+ }
+-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
++static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
+ {
+       struct desc_struct *d = get_cpu_gdt_rw(cpu);
+       tss_desc tss;
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -162,7 +162,7 @@ enum cpuid_regs_idx {
+ extern struct cpuinfo_x86     boot_cpu_data;
+ extern struct cpuinfo_x86     new_cpu_data;
+-extern struct tss_struct      doublefault_tss;
++extern struct x86_hw_tss      doublefault_tss;
+ extern __u32                  cpu_caps_cleared[NCAPINTS];
+ extern __u32                  cpu_caps_set[NCAPINTS];
+@@ -252,6 +252,11 @@ static inline void load_cr3(pgd_t *pgdir
+       write_cr3(__sme_pa(pgdir));
+ }
++/*
++ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
++ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
++ * unrelated to the task-switch mechanism:
++ */
+ #ifdef CONFIG_X86_32
+ /* This is the TSS defined by the hardware. */
+ struct x86_hw_tss {
+@@ -322,7 +327,7 @@ struct x86_hw_tss {
+ #define IO_BITMAP_BITS                        65536
+ #define IO_BITMAP_BYTES                       (IO_BITMAP_BITS/8)
+ #define IO_BITMAP_LONGS                       (IO_BITMAP_BYTES/sizeof(long))
+-#define IO_BITMAP_OFFSET              offsetof(struct tss_struct, io_bitmap)
++#define IO_BITMAP_OFFSET              (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
+ #define INVALID_IO_BITMAP_OFFSET      0x8000
+ struct tss_struct {
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1557,7 +1557,7 @@ void cpu_init(void)
+               }
+       }
+-      t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
++      t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
+       /*
+        * <= is required because the CPU will access up to
+@@ -1576,7 +1576,7 @@ void cpu_init(void)
+        * Initialize the TSS.  Don't bother initializing sp0, as the initial
+        * task never enters user mode.
+        */
+-      set_tss_desc(cpu, t);
++      set_tss_desc(cpu, &t->x86_tss);
+       load_TR_desc();
+       load_mm_ldt(&init_mm);
+@@ -1634,12 +1634,12 @@ void cpu_init(void)
+        * Initialize the TSS.  Don't bother initializing sp0, as the initial
+        * task never enters user mode.
+        */
+-      set_tss_desc(cpu, t);
++      set_tss_desc(cpu, &t->x86_tss);
+       load_TR_desc();
+       load_mm_ldt(&init_mm);
+-      t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
++      t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
+ #ifdef CONFIG_DOUBLEFAULT
+       /* Set up doublefault TSS pointer in the GDT */
+--- a/arch/x86/kernel/doublefault.c
++++ b/arch/x86/kernel/doublefault.c
+@@ -50,25 +50,23 @@ static void doublefault_fn(void)
+               cpu_relax();
+ }
+-struct tss_struct doublefault_tss __cacheline_aligned = {
+-      .x86_tss = {
+-              .sp0            = STACK_START,
+-              .ss0            = __KERNEL_DS,
+-              .ldt            = 0,
+-              .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
++struct x86_hw_tss doublefault_tss __cacheline_aligned = {
++      .sp0            = STACK_START,
++      .ss0            = __KERNEL_DS,
++      .ldt            = 0,
++      .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+-              .ip             = (unsigned long) doublefault_fn,
+-              /* 0x2 bit is always set */
+-              .flags          = X86_EFLAGS_SF | 0x2,
+-              .sp             = STACK_START,
+-              .es             = __USER_DS,
+-              .cs             = __KERNEL_CS,
+-              .ss             = __KERNEL_DS,
+-              .ds             = __USER_DS,
+-              .fs             = __KERNEL_PERCPU,
++      .ip             = (unsigned long) doublefault_fn,
++      /* 0x2 bit is always set */
++      .flags          = X86_EFLAGS_SF | 0x2,
++      .sp             = STACK_START,
++      .es             = __USER_DS,
++      .cs             = __KERNEL_CS,
++      .ss             = __KERNEL_DS,
++      .ds             = __USER_DS,
++      .fs             = __KERNEL_PERCPU,
+-              .__cr3          = __pa_nodebug(swapper_pg_dir),
+-      }
++      .__cr3          = __pa_nodebug(swapper_pg_dir),
+ };
+ /* dummy for do_double_fault() call */
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2295,7 +2295,7 @@ static void vmx_vcpu_load(struct kvm_vcp
+                * processors.  See 22.2.4.
+                */
+               vmcs_writel(HOST_TR_BASE,
+-                          (unsigned long)this_cpu_ptr(&cpu_tss));
++                          (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss));
+               vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
+               /*
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -165,12 +165,13 @@ static void fix_processor_context(void)
+       struct desc_struct *desc = get_cpu_gdt_rw(cpu);
+       tss_desc tss;
+ #endif
+-      set_tss_desc(cpu, t);   /*
+-                               * This just modifies memory; should not be
+-                               * necessary. But... This is necessary, because
+-                               * 386 hardware has concept of busy TSS or some
+-                               * similar stupidity.
+-                               */
++
++      /*
++       * This just modifies memory; should not be necessary. But... This is
++       * necessary, because 386 hardware has concept of busy TSS or some
++       * similar stupidity.
++       */
++      set_tss_desc(cpu, &t->x86_tss);
+ #ifdef CONFIG_X86_64
+       memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
diff --git a/queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch b/queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch
new file mode 100644 (file)
index 0000000..236521f
--- /dev/null
@@ -0,0 +1,61 @@
+From aaeed3aeb39c1ba69f0a49baec8cb728121d0a91 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:14 +0100
+Subject: x86/entry/gdt: Put per-CPU GDT remaps in ascending order
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit aaeed3aeb39c1ba69f0a49baec8cb728121d0a91 upstream.
+
+We currently have CPU 0's GDT at the top of the GDT range and
+higher-numbered CPUs at lower addresses.  This happens because the
+fixmap is upside down (index 0 is the top of the fixmap).
+
+Flip it so that GDTs are in ascending order by virtual address.
+This will simplify a future patch that will generalize the GDT
+remap to contain multiple pages.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.471561421@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/desc.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -63,7 +63,7 @@ static inline struct desc_struct *get_cu
+ /* Get the fixmap index for a specific processor */
+ static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+ {
+-      return FIX_GDT_REMAP_BEGIN + cpu;
++      return FIX_GDT_REMAP_END - cpu;
+ }
+ /* Provide the fixmap address of the remapped GDT */
diff --git a/queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch b/queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch
new file mode 100644 (file)
index 0000000..63c6988
--- /dev/null
@@ -0,0 +1,118 @@
+From 1a935bc3d4ea61556461a9e92a68ca3556232efd Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:19 +0100
+Subject: x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 1a935bc3d4ea61556461a9e92a68ca3556232efd upstream.
+
+SYSENTER_stack should have reliable overflow detection, which
+means that it needs to be at the bottom of a page, not the top.
+Move it to the beginning of struct tss_struct and page-align it.
+
+Also add an assertion to make sure that the fixed hardware TSS
+doesn't cross a page boundary.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.881827433@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/processor.h |   21 ++++++++++++---------
+ arch/x86/kernel/cpu/common.c     |   21 +++++++++++++++++++++
+ 2 files changed, 33 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -332,7 +332,16 @@ struct x86_hw_tss {
+ struct tss_struct {
+       /*
+-       * The hardware state:
++       * Space for the temporary SYSENTER stack, used for SYSENTER
++       * and the entry trampoline as well.
++       */
++      unsigned long           SYSENTER_stack_canary;
++      unsigned long           SYSENTER_stack[64];
++
++      /*
++       * The fixed hardware portion.  This must not cross a page boundary
++       * at risk of violating the SDM's advice and potentially triggering
++       * errata.
+        */
+       struct x86_hw_tss       x86_tss;
+@@ -343,15 +352,9 @@ struct tss_struct {
+        * be within the limit.
+        */
+       unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
++} __aligned(PAGE_SIZE);
+-      /*
+-       * Space for the temporary SYSENTER stack.
+-       */
+-      unsigned long           SYSENTER_stack_canary;
+-      unsigned long           SYSENTER_stack[64];
+-} ____cacheline_aligned;
+-
+-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
++DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
+ /*
+  * sizeof(unsigned long) coming from an extra "long" at the end
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -487,6 +487,27 @@ static inline void setup_cpu_entry_area(
+ #endif
+       __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
++
++      /*
++       * The Intel SDM says (Volume 3, 7.2.1):
++       *
++       *  Avoid placing a page boundary in the part of the TSS that the
++       *  processor reads during a task switch (the first 104 bytes). The
++       *  processor may not correctly perform address translations if a
++       *  boundary occurs in this area. During a task switch, the processor
++       *  reads and writes into the first 104 bytes of each TSS (using
++       *  contiguous physical addresses beginning with the physical address
++       *  of the first byte of the TSS). So, after TSS access begins, if
++       *  part of the 104 bytes is not physically contiguous, the processor
++       *  will access incorrect information without generating a page-fault
++       *  exception.
++       *
++       * There are also a lot of errata involving the TSS spanning a page
++       * boundary.  Assert that we're not doing that.
++       */
++      BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
++                    offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
++
+ }
+ /* Load the original GDT from the per-cpu structure */
diff --git a/queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch b/queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch
new file mode 100644 (file)
index 0000000..f1772ad
--- /dev/null
@@ -0,0 +1,265 @@
+From 72f5e08dbba2d01aa90b592cf76c378ea233b00b Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:20 +0100
+Subject: x86/entry: Remap the TSS into the CPU entry area
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 72f5e08dbba2d01aa90b592cf76c378ea233b00b upstream.
+
+This has a secondary purpose: it puts the entry stack into a region
+with a well-controlled layout.  A subsequent patch will take
+advantage of this to streamline the SYSCALL entry code to be able to
+find it more easily.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bpetkov@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.962042855@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_32.S     |    6 ++++--
+ arch/x86/include/asm/fixmap.h |    7 +++++++
+ arch/x86/kernel/asm-offsets.c |    3 +++
+ arch/x86/kernel/cpu/common.c  |   41 +++++++++++++++++++++++++++++++++++------
+ arch/x86/kernel/dumpstack.c   |    3 ++-
+ arch/x86/kvm/vmx.c            |    2 +-
+ arch/x86/power/cpu.c          |   11 ++++++-----
+ 7 files changed, 58 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -941,7 +941,8 @@ ENTRY(debug)
+       movl    %esp, %eax                      # pt_regs pointer
+       /* Are we currently on the SYSENTER stack? */
+-      PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
++      movl    PER_CPU_VAR(cpu_entry_area), %ecx
++      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Ldebug_from_sysenter_stack
+@@ -984,7 +985,8 @@ ENTRY(nmi)
+       movl    %esp, %eax                      # pt_regs pointer
+       /* Are we currently on the SYSENTER stack? */
+-      PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
++      movl    PER_CPU_VAR(cpu_entry_area), %ecx
++      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Lnmi_from_sysenter_stack
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -54,6 +54,13 @@ extern unsigned long __FIXADDR_TOP;
+  */
+ struct cpu_entry_area {
+       char gdt[PAGE_SIZE];
++
++      /*
++       * The GDT is just below cpu_tss and thus serves (on x86_64) as a
++       * a read-only guard page for the SYSENTER stack at the bottom
++       * of the TSS region.
++       */
++      struct tss_struct tss;
+ };
+ #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -98,4 +98,7 @@ void common(void) {
+       OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+       /* Size of SYSENTER_stack */
+       DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
++
++      /* Layout info for cpu_entry_area */
++      OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+ }
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -466,6 +466,22 @@ void load_percpu_segment(int cpu)
+       load_stack_canary_segment();
+ }
++static void set_percpu_fixmap_pages(int fixmap_index, void *ptr,
++                                  int pages, pgprot_t prot)
++{
++      int i;
++
++      for (i = 0; i < pages; i++) {
++              __set_fixmap(fixmap_index - i,
++                           per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot);
++      }
++}
++
++#ifdef CONFIG_X86_32
++/* The 32-bit entry code needs to find cpu_entry_area. */
++DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
++#endif
++
+ /* Setup the fixmap mappings only once per-processor */
+ static inline void setup_cpu_entry_area(int cpu)
+ {
+@@ -507,7 +523,15 @@ static inline void setup_cpu_entry_area(
+        */
+       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
++      BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
++                              &per_cpu(cpu_tss, cpu),
++                              sizeof(struct tss_struct) / PAGE_SIZE,
++                              PAGE_KERNEL);
++#ifdef CONFIG_X86_32
++      this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
++#endif
+ }
+ /* Load the original GDT from the per-cpu structure */
+@@ -1257,7 +1281,8 @@ void enable_sep_cpu(void)
+       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+       wrmsr(MSR_IA32_SYSENTER_ESP,
+-            (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
++            (unsigned long)&get_cpu_entry_area(cpu)->tss +
++            offsetofend(struct tss_struct, SYSENTER_stack),
+             0);
+       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
+@@ -1370,6 +1395,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char,
+ /* May not be marked __init: used by software suspend */
+ void syscall_init(void)
+ {
++      int cpu = smp_processor_id();
++
+       wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
+       wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+@@ -1383,7 +1410,7 @@ void syscall_init(void)
+        */
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
+-                  (unsigned long)this_cpu_ptr(&cpu_tss) +
++                  (unsigned long)&get_cpu_entry_area(cpu)->tss +
+                   offsetofend(struct tss_struct, SYSENTER_stack));
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ #else
+@@ -1593,11 +1620,13 @@ void cpu_init(void)
+       initialize_tlbstate_and_flush();
+       enter_lazy_tlb(&init_mm, me);
++      setup_cpu_entry_area(cpu);
++
+       /*
+        * Initialize the TSS.  Don't bother initializing sp0, as the initial
+        * task never enters user mode.
+        */
+-      set_tss_desc(cpu, &t->x86_tss);
++      set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+       load_TR_desc();
+       load_mm_ldt(&init_mm);
+@@ -1610,7 +1639,6 @@ void cpu_init(void)
+       if (is_uv_system())
+               uv_cpu_init();
+-      setup_cpu_entry_area(cpu);
+       load_fixmap_gdt(cpu);
+ }
+@@ -1651,11 +1679,13 @@ void cpu_init(void)
+       initialize_tlbstate_and_flush();
+       enter_lazy_tlb(&init_mm, curr);
++      setup_cpu_entry_area(cpu);
++
+       /*
+        * Initialize the TSS.  Don't bother initializing sp0, as the initial
+        * task never enters user mode.
+        */
+-      set_tss_desc(cpu, &t->x86_tss);
++      set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+       load_TR_desc();
+       load_mm_ldt(&init_mm);
+@@ -1672,7 +1702,6 @@ void cpu_init(void)
+       fpu__init_cpu();
+-      setup_cpu_entry_area(cpu);
+       load_fixmap_gdt(cpu);
+ }
+ #endif
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -45,7 +45,8 @@ bool in_task_stack(unsigned long *stack,
+ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
+ {
+-      struct tss_struct *tss = this_cpu_ptr(&cpu_tss);
++      int cpu = smp_processor_id();
++      struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss;
+       /* Treat the canary as part of the stack for unwinding purposes. */
+       void *begin = &tss->SYSENTER_stack_canary;
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2295,7 +2295,7 @@ static void vmx_vcpu_load(struct kvm_vcp
+                * processors.  See 22.2.4.
+                */
+               vmcs_writel(HOST_TR_BASE,
+-                          (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss));
++                          (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
+               vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
+               /*
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -160,18 +160,19 @@ static void do_fpu_end(void)
+ static void fix_processor_context(void)
+ {
+       int cpu = smp_processor_id();
+-      struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+ #ifdef CONFIG_X86_64
+       struct desc_struct *desc = get_cpu_gdt_rw(cpu);
+       tss_desc tss;
+ #endif
+       /*
+-       * This just modifies memory; should not be necessary. But... This is
+-       * necessary, because 386 hardware has concept of busy TSS or some
+-       * similar stupidity.
++       * We need to reload TR, which requires that we change the
++       * GDT entry to indicate "available" first.
++       *
++       * XXX: This could probably all be replaced by a call to
++       * force_reload_TR().
+        */
+-      set_tss_desc(cpu, &t->x86_tss);
++      set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+ #ifdef CONFIG_X86_64
+       memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
diff --git a/queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch b/queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch
new file mode 100644 (file)
index 0000000..12ff141
--- /dev/null
@@ -0,0 +1,114 @@
+From 6d9256f0a89eaff97fca6006100bcaea8d1d8bdb Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:22 +0100
+Subject: x86/espfix/64: Stop assuming that pt_regs is on the entry stack
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 6d9256f0a89eaff97fca6006100bcaea8d1d8bdb upstream.
+
+When we start using an entry trampoline, a #GP from userspace will
+be delivered on the entry stack, not on the task stack.  Fix the
+espfix64 #DF fixup to set up #GP according to TSS.SP0, rather than
+assuming that pt_regs + 1 == SP0.  This won't change anything
+without an entry stack, but it will make the code continue to work
+when an entry stack is added.
+
+While we're at it, improve the comments to explain what's actually
+going on.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.130778051@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/traps.c |   37 ++++++++++++++++++++++++++++---------
+ 1 file changed, 28 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -348,9 +348,15 @@ dotraplinkage void do_double_fault(struc
+       /*
+        * If IRET takes a non-IST fault on the espfix64 stack, then we
+-       * end up promoting it to a doublefault.  In that case, modify
+-       * the stack to make it look like we just entered the #GP
+-       * handler from user space, similar to bad_iret.
++       * end up promoting it to a doublefault.  In that case, take
++       * advantage of the fact that we're not using the normal (TSS.sp0)
++       * stack right now.  We can write a fake #GP(0) frame at TSS.sp0
++       * and then modify our own IRET frame so that, when we return,
++       * we land directly at the #GP(0) vector with the stack already
++       * set up according to its expectations.
++       *
++       * The net result is that our #GP handler will think that we
++       * entered from usermode with the bad user context.
+        *
+        * No need for ist_enter here because we don't use RCU.
+        */
+@@ -358,13 +364,26 @@ dotraplinkage void do_double_fault(struc
+               regs->cs == __KERNEL_CS &&
+               regs->ip == (unsigned long)native_irq_return_iret)
+       {
+-              struct pt_regs *normal_regs = task_pt_regs(current);
++              struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
+-              /* Fake a #GP(0) from userspace. */
+-              memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
+-              normal_regs->orig_ax = 0;  /* Missing (lost) #GP error code */
++              /*
++               * regs->sp points to the failing IRET frame on the
++               * ESPFIX64 stack.  Copy it to the entry stack.  This fills
++               * in gpregs->ss through gpregs->ip.
++               *
++               */
++              memmove(&gpregs->ip, (void *)regs->sp, 5*8);
++              gpregs->orig_ax = 0;  /* Missing (lost) #GP error code */
++
++              /*
++               * Adjust our frame so that we return straight to the #GP
++               * vector with the expected RSP value.  This is safe because
++               * we won't enable interupts or schedule before we invoke
++               * general_protection, so nothing will clobber the stack
++               * frame we just set up.
++               */
+               regs->ip = (unsigned long)general_protection;
+-              regs->sp = (unsigned long)&normal_regs->orig_ax;
++              regs->sp = (unsigned long)&gpregs->orig_ax;
+               return;
+       }
+@@ -389,7 +408,7 @@ dotraplinkage void do_double_fault(struc
+        *
+        *   Processors update CR2 whenever a page fault is detected. If a
+        *   second page fault occurs while an earlier page fault is being
+-       *   deliv- ered, the faulting linear address of the second fault will
++       *   delivered, the faulting linear address of the second fault will
+        *   overwrite the contents of CR2 (replacing the previous
+        *   address). These updates to CR2 occur even if the page fault
+        *   results in a double fault or occurs during the delivery of a
diff --git a/queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch b/queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch
new file mode 100644 (file)
index 0000000..0c74ef2
--- /dev/null
@@ -0,0 +1,60 @@
+From 4f3789e792296e21405f708cf3cb409d7c7d5683 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:11 +0100
+Subject: x86/irq/64: Print the offending IP in the stack overflow warning
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 4f3789e792296e21405f708cf3cb409d7c7d5683 upstream.
+
+In case something goes wrong with unwind (not unlikely in case of
+overflow), print the offending IP where we detected the overflow.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.231677119@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/irq_64.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/irq_64.c
++++ b/arch/x86/kernel/irq_64.c
+@@ -57,10 +57,10 @@ static inline void stack_overflow_check(
+       if (regs->sp >= estack_top && regs->sp <= estack_bottom)
+               return;
+-      WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
++      WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
+               current->comm, curbase, regs->sp,
+               irq_stack_top, irq_stack_bottom,
+-              estack_top, estack_bottom);
++              estack_top, estack_bottom, (void *)regs->ip);
+       if (sysctl_panic_on_stackoverflow)
+               panic("low stack detected by irq handler - check messages\n");
diff --git a/queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch b/queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch
new file mode 100644 (file)
index 0000000..5f57765
--- /dev/null
@@ -0,0 +1,65 @@
+From 6669a692605547892a026445e460bf233958bd7f Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:10 +0100
+Subject: x86/irq: Remove an old outdated comment about context tracking races
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 6669a692605547892a026445e460bf233958bd7f upstream.
+
+That race has been fixed and code cleaned up for a while now.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.150551639@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/irq.c |   12 ------------
+ 1 file changed, 12 deletions(-)
+
+--- a/arch/x86/kernel/irq.c
++++ b/arch/x86/kernel/irq.c
+@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IR
+       /* high bit used in ret_from_ code  */
+       unsigned vector = ~regs->orig_ax;
+-      /*
+-       * NB: Unlike exception entries, IRQ entries do not reliably
+-       * handle context tracking in the low-level entry code.  This is
+-       * because syscall entries execute briefly with IRQs on before
+-       * updating context tracking state, so we can take an IRQ from
+-       * kernel mode with CONTEXT_USER.  The low-level entry code only
+-       * updates the context if we came from user mode, so we won't
+-       * switch to CONTEXT_KERNEL.  We'll fix that once the syscall
+-       * code is cleaned up enough that we can cleanly defer enabling
+-       * IRQs.
+-       */
+-
+       entering_irq();
+       /* entering_irq() tells RCU that we're not quiescent.  Check it. */
diff --git a/queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch b/queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch
new file mode 100644 (file)
index 0000000..01c6ed2
--- /dev/null
@@ -0,0 +1,82 @@
+From 21506525fb8ddb0342f2a2370812d47f6a1f3833 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:16 +0100
+Subject: x86/kasan/64: Teach KASAN about the cpu_entry_area
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 21506525fb8ddb0342f2a2370812d47f6a1f3833 upstream.
+
+The cpu_entry_area will contain stacks.  Make sure that KASAN has
+appropriate shadow mappings for them.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: kasan-dev@googlegroups.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.642806442@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/kasan_init_64.c |   18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/kasan_init_64.c
++++ b/arch/x86/mm/kasan_init_64.c
+@@ -277,6 +277,7 @@ void __init kasan_early_init(void)
+ void __init kasan_init(void)
+ {
+       int i;
++      void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
+ #ifdef CONFIG_KASAN_INLINE
+       register_die_notifier(&kasan_die_notifier);
+@@ -329,8 +330,23 @@ void __init kasan_init(void)
+                             (unsigned long)kasan_mem_to_shadow(_end),
+                             early_pfn_to_nid(__pa(_stext)));
++      shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
++      shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
++      shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
++                                              PAGE_SIZE);
++
++      shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
++      shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
++      shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
++                                      PAGE_SIZE);
++
+       kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
+-                      (void *)KASAN_SHADOW_END);
++                                 shadow_cpu_entry_begin);
++
++      kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
++                            (unsigned long)shadow_cpu_entry_end, 0);
++
++      kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
+       load_cr3(init_top_pgt);
+       __flush_tlb_all();
diff --git a/queue-4.14/x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch b/queue-4.14/x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch
new file mode 100644 (file)
index 0000000..9549af0
--- /dev/null
@@ -0,0 +1,62 @@
+From f2dbad36c55e5d3a91dccbde6e8cae345fe5632f Mon Sep 17 00:00:00 2001
+From: Rudolf Marek <r.marek@assembler.cz>
+Date: Tue, 28 Nov 2017 22:01:06 +0100
+Subject: x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD
+
+From: Rudolf Marek <r.marek@assembler.cz>
+
+commit f2dbad36c55e5d3a91dccbde6e8cae345fe5632f upstream.
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    2b67799bdf25 ("x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+The latest AMD AMD64 Architecture Programmer's Manual
+adds a CPUID feature XSaveErPtr (CPUID_Fn80000008_EBX[2]).
+
+If this feature is set, the FXSAVE, XSAVE, FXSAVEOPT, XSAVEC, XSAVES
+/ FXRSTOR, XRSTOR, XRSTORS always save/restore error pointers,
+thus making the X86_BUG_FXSAVE_LEAK workaround obsolete on such CPUs.
+
+Signed-Off-By: Rudolf Marek <r.marek@assembler.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Tested-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Link: https://lkml.kernel.org/r/bdcebe90-62c5-1f05-083c-eba7f08b2540@assembler.cz
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeatures.h |    1 +
+ arch/x86/kernel/cpu/amd.c          |    7 +++++--
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -266,6 +266,7 @@
+ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
+ #define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
+ #define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
++#define X86_FEATURE_XSAVEERPTR                (13*32+ 2) /* Always save/restore FP error pointers */
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86
+       case 0x17: init_amd_zn(c); break;
+       }
+-      /* Enable workaround for FXSAVE leak */
+-      if (c->x86 >= 6)
++      /*
++       * Enable workaround for FXSAVE leak on CPUs
++       * without a XSaveErPtr feature
++       */
++      if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
+               set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
+       cpu_detect_cache_sizes(c);
diff --git a/queue-4.14/x86-mm-define-_page_table-using-_kernpg_table.patch b/queue-4.14/x86-mm-define-_page_table-using-_kernpg_table.patch
new file mode 100644 (file)
index 0000000..988cee7
--- /dev/null
@@ -0,0 +1,39 @@
+From c7da092a1f243bfd1bfb4124f538e69e941882da Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 3 Nov 2017 11:20:28 +0100
+Subject: x86/mm: Define _PAGE_TABLE using _KERNPG_TABLE
+
+From: Borislav Petkov <bp@suse.de>
+
+commit c7da092a1f243bfd1bfb4124f538e69e941882da upstream.
+
+... so that the difference is obvious.
+
+No functionality change.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171103102028.20284-1-bp@alien8.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/pgtable_types.h |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -200,10 +200,9 @@ enum page_cache_mode {
+ #define _PAGE_ENC     (_AT(pteval_t, sme_me_mask))
+-#define _PAGE_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |        \
+-                       _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
+ #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |    \
+                        _PAGE_DIRTY | _PAGE_ENC)
++#define _PAGE_TABLE   (_KERNPG_TABLE | _PAGE_USER)
+ #define __PAGE_KERNEL_ENC     (__PAGE_KERNEL | _PAGE_ENC)
+ #define __PAGE_KERNEL_ENC_WP  (__PAGE_KERNEL_WP | _PAGE_ENC)
diff --git a/queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch b/queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch
new file mode 100644 (file)
index 0000000..4881de1
--- /dev/null
@@ -0,0 +1,190 @@
+From ef8813ab280507972bb57e4b1b502811ad4411e9 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:15 +0100
+Subject: x86/mm/fixmap: Generalize the GDT fixmap mechanism, introduce struct cpu_entry_area
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit ef8813ab280507972bb57e4b1b502811ad4411e9 upstream.
+
+Currently, the GDT is an ad-hoc array of pages, one per CPU, in the
+fixmap.  Generalize it to be an array of a new 'struct cpu_entry_area'
+so that we can cleanly add new things to it.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.563271721@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/desc.h   |    9 +--------
+ arch/x86/include/asm/fixmap.h |   37 +++++++++++++++++++++++++++++++++++--
+ arch/x86/kernel/cpu/common.c  |   14 +++++++-------
+ arch/x86/xen/mmu_pv.c         |    2 +-
+ 4 files changed, 44 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -60,17 +60,10 @@ static inline struct desc_struct *get_cu
+       return this_cpu_ptr(&gdt_page)->gdt;
+ }
+-/* Get the fixmap index for a specific processor */
+-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+-{
+-      return FIX_GDT_REMAP_END - cpu;
+-}
+-
+ /* Provide the fixmap address of the remapped GDT */
+ static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
+ {
+-      unsigned int idx = get_cpu_gdt_ro_index(cpu);
+-      return (struct desc_struct *)__fix_to_virt(idx);
++      return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
+ }
+ /* Provide the current read-only GDT */
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -44,6 +44,19 @@ extern unsigned long __FIXADDR_TOP;
+                        PAGE_SIZE)
+ #endif
++/*
++ * cpu_entry_area is a percpu region in the fixmap that contains things
++ * needed by the CPU and early entry/exit code.  Real types aren't used
++ * for all fields here to avoid circular header dependencies.
++ *
++ * Every field is a virtual alias of some other allocated backing store.
++ * There is no direct allocation of a struct cpu_entry_area.
++ */
++struct cpu_entry_area {
++      char gdt[PAGE_SIZE];
++};
++
++#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+ /*
+  * Here we define all the compile-time 'special' virtual
+@@ -101,8 +114,8 @@ enum fixed_addresses {
+       FIX_LNW_VRTC,
+ #endif
+       /* Fixmap entries to remap the GDTs, one per processor. */
+-      FIX_GDT_REMAP_BEGIN,
+-      FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
++      FIX_CPU_ENTRY_AREA_TOP,
++      FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
+ #ifdef CONFIG_ACPI_APEI_GHES
+       /* Used for GHES mapping from assorted contexts */
+@@ -191,5 +204,25 @@ void __init *early_memremap_decrypted_wp
+ void __early_set_fixmap(enum fixed_addresses idx,
+                       phys_addr_t phys, pgprot_t flags);
++static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
++{
++      BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
++
++      return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
++}
++
++#define __get_cpu_entry_area_offset_index(cpu, offset) ({             \
++      BUILD_BUG_ON(offset % PAGE_SIZE != 0);                          \
++      __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE);       \
++      })
++
++#define get_cpu_entry_area_index(cpu, field)                          \
++      __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
++
++static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
++{
++      return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
++}
++
+ #endif /* !__ASSEMBLY__ */
+ #endif /* _ASM_X86_FIXMAP_H */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -466,12 +466,12 @@ void load_percpu_segment(int cpu)
+       load_stack_canary_segment();
+ }
+-/* Setup the fixmap mapping only once per-processor */
+-static inline void setup_fixmap_gdt(int cpu)
++/* Setup the fixmap mappings only once per-processor */
++static inline void setup_cpu_entry_area(int cpu)
+ {
+ #ifdef CONFIG_X86_64
+       /* On 64-bit systems, we use a read-only fixmap GDT. */
+-      pgprot_t prot = PAGE_KERNEL_RO;
++      pgprot_t gdt_prot = PAGE_KERNEL_RO;
+ #else
+       /*
+        * On native 32-bit systems, the GDT cannot be read-only because
+@@ -482,11 +482,11 @@ static inline void setup_fixmap_gdt(int
+        * On Xen PV, the GDT must be read-only because the hypervisor requires
+        * it.
+        */
+-      pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
++      pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+               PAGE_KERNEL_RO : PAGE_KERNEL;
+ #endif
+-      __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
++      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+ }
+ /* Load the original GDT from the per-cpu structure */
+@@ -1589,7 +1589,7 @@ void cpu_init(void)
+       if (is_uv_system())
+               uv_cpu_init();
+-      setup_fixmap_gdt(cpu);
++      setup_cpu_entry_area(cpu);
+       load_fixmap_gdt(cpu);
+ }
+@@ -1651,7 +1651,7 @@ void cpu_init(void)
+       fpu__init_cpu();
+-      setup_fixmap_gdt(cpu);
++      setup_cpu_entry_area(cpu);
+       load_fixmap_gdt(cpu);
+ }
+ #endif
+--- a/arch/x86/xen/mmu_pv.c
++++ b/arch/x86/xen/mmu_pv.c
+@@ -2272,7 +2272,7 @@ static void xen_set_fixmap(unsigned idx,
+ #endif
+       case FIX_TEXT_POKE0:
+       case FIX_TEXT_POKE1:
+-      case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
++      case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
+               /* All local page mappings */
+               pte = pfn_pte(phys, prot);
+               break;
diff --git a/queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch b/queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch
new file mode 100644 (file)
index 0000000..7178b10
--- /dev/null
@@ -0,0 +1,254 @@
+From 2aeb07365bcd489620f71390a7d2031cd4dfb83e Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Wed, 15 Nov 2017 17:36:35 -0800
+Subject: x86/mm/kasan: Don't use vmemmap_populate() to initialize shadow
+
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+
+commit 2aeb07365bcd489620f71390a7d2031cd4dfb83e upstream.
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    d17a1d97dc20: ("x86/mm/kasan: don't use vmemmap_populate() to initialize shadow")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+The KASAN shadow is currently mapped using vmemmap_populate() since that
+provides a semi-convenient way to map pages into init_top_pgt.  However,
+since that no longer zeroes the mapped pages, it is not suitable for
+KASAN, which requires zeroed shadow memory.
+
+Add kasan_populate_shadow() interface and use it instead of
+vmemmap_populate().  Besides, this allows us to take advantage of
+gigantic pages and use them to populate the shadow, which should save us
+some memory wasted on page tables and reduce TLB pressure.
+
+Link: http://lkml.kernel.org/r/20171103185147.2688-2-pasha.tatashin@oracle.com
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Steven Sistare <steven.sistare@oracle.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: Bob Picco <bob.picco@oracle.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Sam Ravnborg <sam@ravnborg.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/Kconfig            |    2 
+ arch/x86/mm/kasan_init_64.c |  143 +++++++++++++++++++++++++++++++++++++++++---
+ 2 files changed, 137 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -108,7 +108,7 @@ config X86
+       select HAVE_ARCH_AUDITSYSCALL
+       select HAVE_ARCH_HUGE_VMAP              if X86_64 || X86_PAE
+       select HAVE_ARCH_JUMP_LABEL
+-      select HAVE_ARCH_KASAN                  if X86_64 && SPARSEMEM_VMEMMAP
++      select HAVE_ARCH_KASAN                  if X86_64
+       select HAVE_ARCH_KGDB
+       select HAVE_ARCH_KMEMCHECK
+       select HAVE_ARCH_MMAP_RND_BITS          if MMU
+--- a/arch/x86/mm/kasan_init_64.c
++++ b/arch/x86/mm/kasan_init_64.c
+@@ -4,12 +4,14 @@
+ #include <linux/bootmem.h>
+ #include <linux/kasan.h>
+ #include <linux/kdebug.h>
++#include <linux/memblock.h>
+ #include <linux/mm.h>
+ #include <linux/sched.h>
+ #include <linux/sched/task.h>
+ #include <linux/vmalloc.h>
+ #include <asm/e820/types.h>
++#include <asm/pgalloc.h>
+ #include <asm/tlbflush.h>
+ #include <asm/sections.h>
+ #include <asm/pgtable.h>
+@@ -18,7 +20,134 @@ extern struct range pfn_mapped[E820_MAX_
+ static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+-static int __init map_range(struct range *range)
++static __init void *early_alloc(size_t size, int nid)
++{
++      return memblock_virt_alloc_try_nid_nopanic(size, size,
++              __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
++}
++
++static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
++                                    unsigned long end, int nid)
++{
++      pte_t *pte;
++
++      if (pmd_none(*pmd)) {
++              void *p;
++
++              if (boot_cpu_has(X86_FEATURE_PSE) &&
++                  ((end - addr) == PMD_SIZE) &&
++                  IS_ALIGNED(addr, PMD_SIZE)) {
++                      p = early_alloc(PMD_SIZE, nid);
++                      if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
++                              return;
++                      else if (p)
++                              memblock_free(__pa(p), PMD_SIZE);
++              }
++
++              p = early_alloc(PAGE_SIZE, nid);
++              pmd_populate_kernel(&init_mm, pmd, p);
++      }
++
++      pte = pte_offset_kernel(pmd, addr);
++      do {
++              pte_t entry;
++              void *p;
++
++              if (!pte_none(*pte))
++                      continue;
++
++              p = early_alloc(PAGE_SIZE, nid);
++              entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
++              set_pte_at(&init_mm, addr, pte, entry);
++      } while (pte++, addr += PAGE_SIZE, addr != end);
++}
++
++static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
++                                    unsigned long end, int nid)
++{
++      pmd_t *pmd;
++      unsigned long next;
++
++      if (pud_none(*pud)) {
++              void *p;
++
++              if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
++                  ((end - addr) == PUD_SIZE) &&
++                  IS_ALIGNED(addr, PUD_SIZE)) {
++                      p = early_alloc(PUD_SIZE, nid);
++                      if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
++                              return;
++                      else if (p)
++                              memblock_free(__pa(p), PUD_SIZE);
++              }
++
++              p = early_alloc(PAGE_SIZE, nid);
++              pud_populate(&init_mm, pud, p);
++      }
++
++      pmd = pmd_offset(pud, addr);
++      do {
++              next = pmd_addr_end(addr, end);
++              if (!pmd_large(*pmd))
++                      kasan_populate_pmd(pmd, addr, next, nid);
++      } while (pmd++, addr = next, addr != end);
++}
++
++static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
++                                    unsigned long end, int nid)
++{
++      pud_t *pud;
++      unsigned long next;
++
++      if (p4d_none(*p4d)) {
++              void *p = early_alloc(PAGE_SIZE, nid);
++
++              p4d_populate(&init_mm, p4d, p);
++      }
++
++      pud = pud_offset(p4d, addr);
++      do {
++              next = pud_addr_end(addr, end);
++              if (!pud_large(*pud))
++                      kasan_populate_pud(pud, addr, next, nid);
++      } while (pud++, addr = next, addr != end);
++}
++
++static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
++                                    unsigned long end, int nid)
++{
++      void *p;
++      p4d_t *p4d;
++      unsigned long next;
++
++      if (pgd_none(*pgd)) {
++              p = early_alloc(PAGE_SIZE, nid);
++              pgd_populate(&init_mm, pgd, p);
++      }
++
++      p4d = p4d_offset(pgd, addr);
++      do {
++              next = p4d_addr_end(addr, end);
++              kasan_populate_p4d(p4d, addr, next, nid);
++      } while (p4d++, addr = next, addr != end);
++}
++
++static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
++                                       int nid)
++{
++      pgd_t *pgd;
++      unsigned long next;
++
++      addr = addr & PAGE_MASK;
++      end = round_up(end, PAGE_SIZE);
++      pgd = pgd_offset_k(addr);
++      do {
++              next = pgd_addr_end(addr, end);
++              kasan_populate_pgd(pgd, addr, next, nid);
++      } while (pgd++, addr = next, addr != end);
++}
++
++static void __init map_range(struct range *range)
+ {
+       unsigned long start;
+       unsigned long end;
+@@ -26,7 +155,7 @@ static int __init map_range(struct range
+       start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
+       end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
+-      return vmemmap_populate(start, end, NUMA_NO_NODE);
++      kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
+ }
+ static void __init clear_pgds(unsigned long start,
+@@ -189,16 +318,16 @@ void __init kasan_init(void)
+               if (pfn_mapped[i].end == 0)
+                       break;
+-              if (map_range(&pfn_mapped[i]))
+-                      panic("kasan: unable to allocate shadow!");
++              map_range(&pfn_mapped[i]);
+       }
++
+       kasan_populate_zero_shadow(
+               kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
+               kasan_mem_to_shadow((void *)__START_KERNEL_map));
+-      vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
+-                      (unsigned long)kasan_mem_to_shadow(_end),
+-                      NUMA_NO_NODE);
++      kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
++                            (unsigned long)kasan_mem_to_shadow(_end),
++                            early_pfn_to_nid(__pa(_stext)));
+       kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
+                       (void *)KASAN_SHADOW_END);
diff --git a/queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch b/queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch
new file mode 100644 (file)
index 0000000..02cd12c
--- /dev/null
@@ -0,0 +1,68 @@
+From a035795499ca1c2bd1928808d1a156eda1420383 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:30 +0100
+Subject: x86/paravirt: Dont patch flush_tlb_single
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit a035795499ca1c2bd1928808d1a156eda1420383 upstream.
+
+native_flush_tlb_single() will be changed with the upcoming
+PAGE_TABLE_ISOLATION feature. This requires to have more code in
+there than INVLPG.
+
+Remove the paravirt patching for it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Cc: michael.schwarz@iaik.tugraz.at
+Cc: moritz.lipp@iaik.tugraz.at
+Cc: richard.fellner@student.tugraz.at
+Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/paravirt_patch_64.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/arch/x86/kernel/paravirt_patch_64.c
++++ b/arch/x86/kernel/paravirt_patch_64.c
+@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq;
+ DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
+ DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
+ DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
+-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
+ DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
+ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
+@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobb
+               PATCH_SITE(pv_mmu_ops, read_cr2);
+               PATCH_SITE(pv_mmu_ops, read_cr3);
+               PATCH_SITE(pv_mmu_ops, write_cr3);
+-              PATCH_SITE(pv_mmu_ops, flush_tlb_single);
+               PATCH_SITE(pv_cpu_ops, wbinvd);
+ #if defined(CONFIG_PARAVIRT_SPINLOCKS)
+               case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch b/queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch
new file mode 100644 (file)
index 0000000..923cc7c
--- /dev/null
@@ -0,0 +1,96 @@
+From 79cc74155218316b9a5d28577c7077b2adba8e58 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:31 +0100
+Subject: x86/paravirt: Provide a way to check for hypervisors
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 79cc74155218316b9a5d28577c7077b2adba8e58 upstream.
+
+There is no generic way to test whether a kernel is running on a specific
+hypervisor. But that's required to prevent the upcoming user address space
+separation feature in certain guest modes.
+
+Make the hypervisor type enum unconditionally available and provide a
+helper function which allows to test for a specific type.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.912938129@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/hypervisor.h |   25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/hypervisor.h
++++ b/arch/x86/include/asm/hypervisor.h
+@@ -20,16 +20,7 @@
+ #ifndef _ASM_X86_HYPERVISOR_H
+ #define _ASM_X86_HYPERVISOR_H
+-#ifdef CONFIG_HYPERVISOR_GUEST
+-
+-#include <asm/kvm_para.h>
+-#include <asm/x86_init.h>
+-#include <asm/xen/hypervisor.h>
+-
+-/*
+- * x86 hypervisor information
+- */
+-
++/* x86 hypervisor types  */
+ enum x86_hypervisor_type {
+       X86_HYPER_NATIVE = 0,
+       X86_HYPER_VMWARE,
+@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
+       X86_HYPER_KVM,
+ };
++#ifdef CONFIG_HYPERVISOR_GUEST
++
++#include <asm/kvm_para.h>
++#include <asm/x86_init.h>
++#include <asm/xen/hypervisor.h>
++
+ struct hypervisor_x86 {
+       /* Hypervisor name */
+       const char      *name;
+@@ -58,7 +55,15 @@ struct hypervisor_x86 {
+ extern enum x86_hypervisor_type x86_hyper_type;
+ extern void init_hypervisor_platform(void);
++static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
++{
++      return x86_hyper_type == type;
++}
+ #else
+ static inline void init_hypervisor_platform(void) { }
++static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
++{
++      return type == X86_HYPER_NATIVE;
++}
+ #endif /* CONFIG_HYPERVISOR_GUEST */
+ #endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch b/queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch
new file mode 100644 (file)
index 0000000..c3ec135
--- /dev/null
@@ -0,0 +1,56 @@
+From 3383642c2f9d4f5b4fa37436db4a109a1a10018c Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:17 -0700
+Subject: x86/traps: Use a new on_thread_stack() helper to clean up an assertion
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 3383642c2f9d4f5b4fa37436db4a109a1a10018c upstream.
+
+Let's keep the stack-related logic together rather than open-coding
+a comparison in an assertion in the traps code.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/856b15bee1f55017b8f79d3758b0d51c48a08cf8.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/processor.h |    6 ++++++
+ arch/x86/kernel/traps.c          |    3 +--
+ 2 files changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -542,6 +542,12 @@ static inline unsigned long current_top_
+ #endif
+ }
++static inline bool on_thread_stack(void)
++{
++      return (unsigned long)(current_top_of_stack() -
++                             current_stack_pointer) < THREAD_SIZE;
++}
++
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs
+        * will catch asm bugs and any attempt to use ist_preempt_enable
+        * from double_fault.
+        */
+-      BUG_ON((unsigned long)(current_top_of_stack() -
+-                             current_stack_pointer) >= THREAD_SIZE);
++      BUG_ON(!on_thread_stack());
+       preempt_enable_no_resched();
+ }
diff --git a/queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch b/queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch
new file mode 100644 (file)
index 0000000..4ffb7e4
--- /dev/null
@@ -0,0 +1,319 @@
+From b02fcf9ba1211097754b286043cd87a8b4907e75 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 4 Dec 2017 15:07:09 +0100
+Subject: x86/unwinder: Handle stack overflows more gracefully
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit b02fcf9ba1211097754b286043cd87a8b4907e75 upstream.
+
+There are at least two unwinder bugs hindering the debugging of
+stack-overflow crashes:
+
+- It doesn't deal gracefully with the case where the stack overflows and
+  the stack pointer itself isn't on a valid stack but the
+  to-be-dereferenced data *is*.
+
+- The ORC oops dump code doesn't know how to print partial pt_regs, for the
+  case where if we get an interrupt/exception in *early* entry code
+  before the full pt_regs have been saved.
+
+Fix both issues.
+
+http://lkml.kernel.org/r/20171126024031.uxi4numpbjm5rlbr@treble
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bpetkov@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.071425003@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/kdebug.h |    1 
+ arch/x86/include/asm/unwind.h |    7 +++
+ arch/x86/kernel/dumpstack.c   |   32 ++++++++++++++---
+ arch/x86/kernel/process_64.c  |   11 ++----
+ arch/x86/kernel/unwind_orc.c  |   76 ++++++++++++++----------------------------
+ 5 files changed, 66 insertions(+), 61 deletions(-)
+
+--- a/arch/x86/include/asm/kdebug.h
++++ b/arch/x86/include/asm/kdebug.h
+@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_
+ extern int __must_check __die(const char *, struct pt_regs *, long);
+ extern void show_stack_regs(struct pt_regs *regs);
+ extern void __show_regs(struct pt_regs *regs, int all);
++extern void show_iret_regs(struct pt_regs *regs);
+ extern unsigned long oops_begin(void);
+ extern void oops_end(unsigned long, struct pt_regs *, int signr);
+--- a/arch/x86/include/asm/unwind.h
++++ b/arch/x86/include/asm/unwind.h
+@@ -7,6 +7,9 @@
+ #include <asm/ptrace.h>
+ #include <asm/stacktrace.h>
++#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
++#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
++
+ struct unwind_state {
+       struct stack_info stack_info;
+       unsigned long stack_mask;
+@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *s
+ }
+ #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
++/*
++ * WARNING: The entire pt_regs may not be safe to dereference.  In some cases,
++ * only the iret frame registers are accessible.  Use with caution!
++ */
+ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+ {
+       if (unwind_done(state))
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -50,6 +50,28 @@ static void printk_stack_address(unsigne
+       printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
+ }
++void show_iret_regs(struct pt_regs *regs)
++{
++      printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
++      printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
++              regs->sp, regs->flags);
++}
++
++static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
++{
++      if (on_stack(info, regs, sizeof(*regs)))
++              __show_regs(regs, 0);
++      else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
++                        IRET_FRAME_SIZE)) {
++              /*
++               * When an interrupt or exception occurs in entry code, the
++               * full pt_regs might not have been saved yet.  In that case
++               * just print the iret frame.
++               */
++              show_iret_regs(regs);
++      }
++}
++
+ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+                       unsigned long *stack, char *log_lvl)
+ {
+@@ -94,8 +116,8 @@ void show_trace_log_lvl(struct task_stru
+               if (stack_name)
+                       printk("%s <%s>\n", log_lvl, stack_name);
+-              if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
+-                      __show_regs(regs, 0);
++              if (regs)
++                      show_regs_safe(&stack_info, regs);
+               /*
+                * Scan the stack, printing any text addresses we find.  At the
+@@ -119,7 +141,7 @@ void show_trace_log_lvl(struct task_stru
+                       /*
+                        * Don't print regs->ip again if it was already printed
+-                       * by __show_regs() below.
++                       * by show_regs_safe() below.
+                        */
+                       if (regs && stack == &regs->ip)
+                               goto next;
+@@ -155,8 +177,8 @@ next:
+                       /* if the frame has entry regs, print them */
+                       regs = unwind_get_entry_regs(&state);
+-                      if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
+-                              __show_regs(regs, 0);
++                      if (regs)
++                              show_regs_safe(&stack_info, regs);
+               }
+               if (stack_name)
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, i
+       unsigned int fsindex, gsindex;
+       unsigned int ds, cs, es;
+-      printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
+-      printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
+-              regs->sp, regs->flags);
++      show_iret_regs(regs);
++
+       if (regs->orig_ax != -1)
+               pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
+       else
+@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, i
+       printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
+              regs->r13, regs->r14, regs->r15);
++      if (!all)
++              return;
++
+       asm("movl %%ds,%0" : "=r" (ds));
+       asm("movl %%cs,%0" : "=r" (cs));
+       asm("movl %%es,%0" : "=r" (es));
+@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, i
+       rdmsrl(MSR_GS_BASE, gs);
+       rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+-      if (!all)
+-              return;
+-
+       cr0 = read_cr0();
+       cr2 = read_cr2();
+       cr3 = __read_cr3();
+--- a/arch/x86/kernel/unwind_orc.c
++++ b/arch/x86/kernel/unwind_orc.c
+@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address
+       return NULL;
+ }
+-static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
++static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
+                           size_t len)
+ {
+       struct stack_info *info = &state->stack_info;
++      void *addr = (void *)_addr;
+-      /*
+-       * If the address isn't on the current stack, switch to the next one.
+-       *
+-       * We may have to traverse multiple stacks to deal with the possibility
+-       * that info->next_sp could point to an empty stack and the address
+-       * could be on a subsequent stack.
+-       */
+-      while (!on_stack(info, (void *)addr, len))
+-              if (get_stack_info(info->next_sp, state->task, info,
+-                                 &state->stack_mask))
+-                      return false;
++      if (!on_stack(info, addr, len) &&
++          (get_stack_info(addr, state->task, info, &state->stack_mask)))
++              return false;
+       return true;
+ }
+@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwin
+       return true;
+ }
+-#define REGS_SIZE (sizeof(struct pt_regs))
+-#define SP_OFFSET (offsetof(struct pt_regs, sp))
+-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
+-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
+-
+ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
+-                           unsigned long *ip, unsigned long *sp, bool full)
++                           unsigned long *ip, unsigned long *sp)
+ {
+-      size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
+-      size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
+-      struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
+-
+-      if (IS_ENABLED(CONFIG_X86_64)) {
+-              if (!stack_access_ok(state, addr, regs_size))
+-                      return false;
+-
+-              *ip = regs->ip;
+-              *sp = regs->sp;
++      struct pt_regs *regs = (struct pt_regs *)addr;
+-              return true;
+-      }
++      /* x86-32 support will be more complicated due to the &regs->sp hack */
++      BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
+-      if (!stack_access_ok(state, addr, sp_offset))
++      if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
+               return false;
+       *ip = regs->ip;
++      *sp = regs->sp;
++      return true;
++}
+-      if (user_mode(regs)) {
+-              if (!stack_access_ok(state, addr + sp_offset,
+-                                   REGS_SIZE - SP_OFFSET))
+-                      return false;
+-
+-              *sp = regs->sp;
+-      } else
+-              *sp = (unsigned long)&regs->sp;
++static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
++                                unsigned long *ip, unsigned long *sp)
++{
++      struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
++      if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
++              return false;
++
++      *ip = regs->ip;
++      *sp = regs->sp;
+       return true;
+ }
+@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_sta
+       unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
+       enum stack_type prev_type = state->stack_info.type;
+       struct orc_entry *orc;
+-      struct pt_regs *ptregs;
+       bool indirect = false;
+       if (unwind_done(state))
+@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_sta
+               break;
+       case ORC_TYPE_REGS:
+-              if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
++              if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
+                       orc_warn("can't dereference registers at %p for ip %pB\n",
+                                (void *)sp, (void *)orig_ip);
+                       goto done;
+@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_sta
+               break;
+       case ORC_TYPE_REGS_IRET:
+-              if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
++              if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
+                       orc_warn("can't dereference iret registers at %p for ip %pB\n",
+                                (void *)sp, (void *)orig_ip);
+                       goto done;
+               }
+-              ptregs = container_of((void *)sp, struct pt_regs, ip);
+-              if ((unsigned long)ptregs >= prev_sp &&
+-                  on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
+-                      state->regs = ptregs;
+-                      state->full_regs = false;
+-              } else
+-                      state->regs = NULL;
+-
++              state->regs = (void *)sp - IRET_FRAME_OFFSET;
++              state->full_regs = false;
+               state->signal = true;
+               break;
diff --git a/queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch b/queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch
new file mode 100644 (file)
index 0000000..cae3c13
--- /dev/null
@@ -0,0 +1,82 @@
+From d3a09104018cf2ad5973dfa8a9c138ef9f5015a3 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:08 +0100
+Subject: x86/unwinder/orc: Dont bail on stack overflow
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit d3a09104018cf2ad5973dfa8a9c138ef9f5015a3 upstream.
+
+If the stack overflows into a guard page and the ORC unwinder should work
+well: by construction, there can't be any meaningful data in the guard page
+because no writes to the guard page will have succeeded.
+
+But there is a bug that prevents unwinding from working correctly: if the
+starting register state has RSP pointing into a stack guard page, the ORC
+unwinder bails out immediately.
+
+Instead of bailing out immediately check whether the next page up is a
+valid check page and if so analyze that. As a result the ORC unwinder will
+start the unwind.
+
+Tested by intentionally overflowing the task stack.  The result is an
+accurate call trace instead of a trace consisting purely of '?' entries.
+
+There are a few other bugs that are triggered if the unwinder encounters a
+stack overflow after the first step, but they are outside the scope of this
+fix.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150604.991389777@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/unwind_orc.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/unwind_orc.c
++++ b/arch/x86/kernel/unwind_orc.c
+@@ -553,8 +553,18 @@ void __unwind_start(struct unwind_state
+       }
+       if (get_stack_info((unsigned long *)state->sp, state->task,
+-                         &state->stack_info, &state->stack_mask))
+-              return;
++                         &state->stack_info, &state->stack_mask)) {
++              /*
++               * We weren't on a valid stack.  It's possible that
++               * we overflowed a valid stack into a guard page.
++               * See if the next page up is valid so that we can
++               * generate some kind of backtrace if this happens.
++               */
++              void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
++              if (get_stack_info(next_page, state->task, &state->stack_info,
++                                 &state->stack_mask))
++                      return;
++      }
+       /*
+        * The caller can provide the address of the first frame directly
diff --git a/queue-4.14/x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch b/queue-4.14/x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch
new file mode 100644 (file)
index 0000000..11f263f
--- /dev/null
@@ -0,0 +1,271 @@
+From 03b2a320b19f1424e9ac9c21696be9c60b6d0d93 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 9 Nov 2017 14:27:36 +0100
+Subject: x86/virt: Add enum for hypervisors to replace x86_hyper
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 03b2a320b19f1424e9ac9c21696be9c60b6d0d93 upstream.
+
+The x86_hyper pointer is only used for checking whether a virtual
+device is supporting the hypervisor the system is running on.
+
+Use an enum for that purpose instead and drop the x86_hyper pointer.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Xavier Deguillard <xdeguillard@vmware.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: akataria@vmware.com
+Cc: arnd@arndb.de
+Cc: boris.ostrovsky@oracle.com
+Cc: devel@linuxdriverproject.org
+Cc: dmitry.torokhov@gmail.com
+Cc: gregkh@linuxfoundation.org
+Cc: haiyangz@microsoft.com
+Cc: kvm@vger.kernel.org
+Cc: kys@microsoft.com
+Cc: linux-graphics-maintainer@vmware.com
+Cc: linux-input@vger.kernel.org
+Cc: moltmann@vmware.com
+Cc: pbonzini@redhat.com
+Cc: pv-drivers@vmware.com
+Cc: rkrcmar@redhat.com
+Cc: sthemmin@microsoft.com
+Cc: virtualization@lists.linux-foundation.org
+Cc: xen-devel@lists.xenproject.org
+Link: http://lkml.kernel.org/r/20171109132739.23465-3-jgross@suse.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/hyperv/hv_init.c         |    2 +-
+ arch/x86/include/asm/hypervisor.h |   23 ++++++++++++++---------
+ arch/x86/kernel/cpu/hypervisor.c  |   12 +++++++++---
+ arch/x86/kernel/cpu/mshyperv.c    |    4 ++--
+ arch/x86/kernel/cpu/vmware.c      |    4 ++--
+ arch/x86/kernel/kvm.c             |    4 ++--
+ arch/x86/xen/enlighten_hvm.c      |    4 ++--
+ arch/x86/xen/enlighten_pv.c       |    4 ++--
+ drivers/hv/vmbus_drv.c            |    2 +-
+ drivers/input/mouse/vmmouse.c     |   10 ++++------
+ drivers/misc/vmw_balloon.c        |    2 +-
+ 11 files changed, 40 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/hyperv/hv_init.c
++++ b/arch/x86/hyperv/hv_init.c
+@@ -113,7 +113,7 @@ void hyperv_init(void)
+       u64 guest_id;
+       union hv_x64_msr_hypercall_contents hypercall_msr;
+-      if (x86_hyper != &x86_hyper_ms_hyperv)
++      if (x86_hyper_type != X86_HYPER_MS_HYPERV)
+               return;
+       /* Allocate percpu VP index */
+--- a/arch/x86/include/asm/hypervisor.h
++++ b/arch/x86/include/asm/hypervisor.h
+@@ -29,6 +29,16 @@
+ /*
+  * x86 hypervisor information
+  */
++
++enum x86_hypervisor_type {
++      X86_HYPER_NATIVE = 0,
++      X86_HYPER_VMWARE,
++      X86_HYPER_MS_HYPERV,
++      X86_HYPER_XEN_PV,
++      X86_HYPER_XEN_HVM,
++      X86_HYPER_KVM,
++};
++
+ struct hypervisor_x86 {
+       /* Hypervisor name */
+       const char      *name;
+@@ -36,6 +46,9 @@ struct hypervisor_x86 {
+       /* Detection routine */
+       uint32_t        (*detect)(void);
++      /* Hypervisor type */
++      enum x86_hypervisor_type type;
++
+       /* init time callbacks */
+       struct x86_hyper_init init;
+@@ -43,15 +56,7 @@ struct hypervisor_x86 {
+       struct x86_hyper_runtime runtime;
+ };
+-extern const struct hypervisor_x86 *x86_hyper;
+-
+-/* Recognized hypervisors */
+-extern const struct hypervisor_x86 x86_hyper_vmware;
+-extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+-extern const struct hypervisor_x86 x86_hyper_xen_pv;
+-extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+-extern const struct hypervisor_x86 x86_hyper_kvm;
+-
++extern enum x86_hypervisor_type x86_hyper_type;
+ extern void init_hypervisor_platform(void);
+ #else
+ static inline void init_hypervisor_platform(void) { }
+--- a/arch/x86/kernel/cpu/hypervisor.c
++++ b/arch/x86/kernel/cpu/hypervisor.c
+@@ -26,6 +26,12 @@
+ #include <asm/processor.h>
+ #include <asm/hypervisor.h>
++extern const struct hypervisor_x86 x86_hyper_vmware;
++extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
++extern const struct hypervisor_x86 x86_hyper_xen_pv;
++extern const struct hypervisor_x86 x86_hyper_xen_hvm;
++extern const struct hypervisor_x86 x86_hyper_kvm;
++
+ static const __initconst struct hypervisor_x86 * const hypervisors[] =
+ {
+ #ifdef CONFIG_XEN_PV
+@@ -41,8 +47,8 @@ static const __initconst struct hypervis
+ #endif
+ };
+-const struct hypervisor_x86 *x86_hyper;
+-EXPORT_SYMBOL(x86_hyper);
++enum x86_hypervisor_type x86_hyper_type;
++EXPORT_SYMBOL(x86_hyper_type);
+ static inline const struct hypervisor_x86 * __init
+ detect_hypervisor_vendor(void)
+@@ -87,6 +93,6 @@ void __init init_hypervisor_platform(voi
+       copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
+       copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
+-      x86_hyper = h;
++      x86_hyper_type = h->type;
+       x86_init.hyper.init_platform();
+ }
+--- a/arch/x86/kernel/cpu/mshyperv.c
++++ b/arch/x86/kernel/cpu/mshyperv.c
+@@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platfo
+ #endif
+ }
+-const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
++const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
+       .name                   = "Microsoft Hyper-V",
+       .detect                 = ms_hyperv_platform,
++      .type                   = X86_HYPER_MS_HYPERV,
+       .init.init_platform     = ms_hyperv_init_platform,
+ };
+-EXPORT_SYMBOL(x86_hyper_ms_hyperv);
+--- a/arch/x86/kernel/cpu/vmware.c
++++ b/arch/x86/kernel/cpu/vmware.c
+@@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_
+              (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
+ }
+-const __refconst struct hypervisor_x86 x86_hyper_vmware = {
++const __initconst struct hypervisor_x86 x86_hyper_vmware = {
+       .name                   = "VMware",
+       .detect                 = vmware_platform,
++      .type                   = X86_HYPER_VMWARE,
+       .init.init_platform     = vmware_platform_setup,
+       .init.x2apic_available  = vmware_legacy_x2apic_available,
+ };
+-EXPORT_SYMBOL(x86_hyper_vmware);
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -544,12 +544,12 @@ static uint32_t __init kvm_detect(void)
+       return kvm_cpuid_base();
+ }
+-const struct hypervisor_x86 x86_hyper_kvm __refconst = {
++const __initconst struct hypervisor_x86 x86_hyper_kvm = {
+       .name                   = "KVM",
+       .detect                 = kvm_detect,
++      .type                   = X86_HYPER_KVM,
+       .init.x2apic_available  = kvm_para_available,
+ };
+-EXPORT_SYMBOL_GPL(x86_hyper_kvm);
+ static __init int activate_jump_labels(void)
+ {
+--- a/arch/x86/xen/enlighten_hvm.c
++++ b/arch/x86/xen/enlighten_hvm.c
+@@ -226,12 +226,12 @@ static uint32_t __init xen_platform_hvm(
+       return xen_cpuid_base();
+ }
+-const struct hypervisor_x86 x86_hyper_xen_hvm = {
++const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
+       .name                   = "Xen HVM",
+       .detect                 = xen_platform_hvm,
++      .type                   = X86_HYPER_XEN_HVM,
+       .init.init_platform     = xen_hvm_guest_init,
+       .init.x2apic_available  = xen_x2apic_para_available,
+       .init.init_mem_mapping  = xen_hvm_init_mem_mapping,
+       .runtime.pin_vcpu       = xen_pin_vcpu,
+ };
+-EXPORT_SYMBOL(x86_hyper_xen_hvm);
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -1459,9 +1459,9 @@ static uint32_t __init xen_platform_pv(v
+       return 0;
+ }
+-const struct hypervisor_x86 x86_hyper_xen_pv = {
++const __initconst struct hypervisor_x86 x86_hyper_xen_pv = {
+       .name                   = "Xen PV",
+       .detect                 = xen_platform_pv,
++      .type                   = X86_HYPER_XEN_PV,
+       .runtime.pin_vcpu       = xen_pin_vcpu,
+ };
+-EXPORT_SYMBOL(x86_hyper_xen_pv);
+--- a/drivers/hv/vmbus_drv.c
++++ b/drivers/hv/vmbus_drv.c
+@@ -1534,7 +1534,7 @@ static int __init hv_acpi_init(void)
+ {
+       int ret, t;
+-      if (x86_hyper != &x86_hyper_ms_hyperv)
++      if (x86_hyper_type != X86_HYPER_MS_HYPERV)
+               return -ENODEV;
+       init_completion(&probe_event);
+--- a/drivers/input/mouse/vmmouse.c
++++ b/drivers/input/mouse/vmmouse.c
+@@ -316,11 +316,9 @@ static int vmmouse_enable(struct psmouse
+ /*
+  * Array of supported hypervisors.
+  */
+-static const struct hypervisor_x86 *vmmouse_supported_hypervisors[] = {
+-      &x86_hyper_vmware,
+-#ifdef CONFIG_KVM_GUEST
+-      &x86_hyper_kvm,
+-#endif
++static enum x86_hypervisor_type vmmouse_supported_hypervisors[] = {
++      X86_HYPER_VMWARE,
++      X86_HYPER_KVM,
+ };
+ /**
+@@ -331,7 +329,7 @@ static bool vmmouse_check_hypervisor(voi
+       int i;
+       for (i = 0; i < ARRAY_SIZE(vmmouse_supported_hypervisors); i++)
+-              if (vmmouse_supported_hypervisors[i] == x86_hyper)
++              if (vmmouse_supported_hypervisors[i] == x86_hyper_type)
+                       return true;
+       return false;
+--- a/drivers/misc/vmw_balloon.c
++++ b/drivers/misc/vmw_balloon.c
+@@ -1271,7 +1271,7 @@ static int __init vmballoon_init(void)
+        * Check if we are running on VMware's hypervisor and bail out
+        * if we are not.
+        */
+-      if (x86_hyper != &x86_hyper_vmware)
++      if (x86_hyper_type != X86_HYPER_VMWARE)
+               return -ENODEV;
+       for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
diff --git a/queue-4.14/x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch b/queue-4.14/x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch
new file mode 100644 (file)
index 0000000..4356dad
--- /dev/null
@@ -0,0 +1,375 @@
+From f72e38e8ec8869ac0ba5a75d7d2f897d98a1454e Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 9 Nov 2017 14:27:35 +0100
+Subject: x86/virt, x86/platform: Merge 'struct x86_hyper' into 'struct x86_platform' and 'struct x86_init'
+
+From: Juergen Gross <jgross@suse.com>
+
+commit f72e38e8ec8869ac0ba5a75d7d2f897d98a1454e upstream.
+
+Instead of x86_hyper being either NULL on bare metal or a pointer to a
+struct hypervisor_x86 in case of the kernel running as a guest merge
+the struct into x86_platform and x86_init.
+
+This will remove the need for wrappers making it hard to find out what
+is being called. With dummy functions added for all callbacks testing
+for a NULL function pointer can be removed, too.
+
+Suggested-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: akataria@vmware.com
+Cc: boris.ostrovsky@oracle.com
+Cc: devel@linuxdriverproject.org
+Cc: haiyangz@microsoft.com
+Cc: kvm@vger.kernel.org
+Cc: kys@microsoft.com
+Cc: pbonzini@redhat.com
+Cc: rkrcmar@redhat.com
+Cc: rusty@rustcorp.com.au
+Cc: sthemmin@microsoft.com
+Cc: virtualization@lists.linux-foundation.org
+Cc: xen-devel@lists.xenproject.org
+Link: http://lkml.kernel.org/r/20171109132739.23465-2-jgross@suse.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/hypervisor.h |   25 +++-------------
+ arch/x86/include/asm/x86_init.h   |   24 ++++++++++++++++
+ arch/x86/kernel/apic/apic.c       |    2 -
+ arch/x86/kernel/cpu/hypervisor.c  |   56 ++++++++++++++++++--------------------
+ arch/x86/kernel/cpu/mshyperv.c    |    2 -
+ arch/x86/kernel/cpu/vmware.c      |    4 +-
+ arch/x86/kernel/kvm.c             |    2 -
+ arch/x86/kernel/x86_init.c        |    9 ++++++
+ arch/x86/mm/init.c                |    2 -
+ arch/x86/xen/enlighten_hvm.c      |    8 ++---
+ arch/x86/xen/enlighten_pv.c       |    2 -
+ include/linux/hypervisor.h        |    8 ++++-
+ 12 files changed, 82 insertions(+), 62 deletions(-)
+
+--- a/arch/x86/include/asm/hypervisor.h
++++ b/arch/x86/include/asm/hypervisor.h
+@@ -23,6 +23,7 @@
+ #ifdef CONFIG_HYPERVISOR_GUEST
+ #include <asm/kvm_para.h>
++#include <asm/x86_init.h>
+ #include <asm/xen/hypervisor.h>
+ /*
+@@ -35,17 +36,11 @@ struct hypervisor_x86 {
+       /* Detection routine */
+       uint32_t        (*detect)(void);
+-      /* Platform setup (run once per boot) */
+-      void            (*init_platform)(void);
++      /* init time callbacks */
++      struct x86_hyper_init init;
+-      /* X2APIC detection (run once per boot) */
+-      bool            (*x2apic_available)(void);
+-
+-      /* pin current vcpu to specified physical cpu (run rarely) */
+-      void            (*pin_vcpu)(int);
+-
+-      /* called during init_mem_mapping() to setup early mappings. */
+-      void            (*init_mem_mapping)(void);
++      /* runtime callbacks */
++      struct x86_hyper_runtime runtime;
+ };
+ extern const struct hypervisor_x86 *x86_hyper;
+@@ -58,17 +53,7 @@ extern const struct hypervisor_x86 x86_h
+ extern const struct hypervisor_x86 x86_hyper_kvm;
+ extern void init_hypervisor_platform(void);
+-extern bool hypervisor_x2apic_available(void);
+-extern void hypervisor_pin_vcpu(int cpu);
+-
+-static inline void hypervisor_init_mem_mapping(void)
+-{
+-      if (x86_hyper && x86_hyper->init_mem_mapping)
+-              x86_hyper->init_mem_mapping();
+-}
+ #else
+ static inline void init_hypervisor_platform(void) { }
+-static inline bool hypervisor_x2apic_available(void) { return false; }
+-static inline void hypervisor_init_mem_mapping(void) { }
+ #endif /* CONFIG_HYPERVISOR_GUEST */
+ #endif /* _ASM_X86_HYPERVISOR_H */
+--- a/arch/x86/include/asm/x86_init.h
++++ b/arch/x86/include/asm/x86_init.h
+@@ -115,6 +115,18 @@ struct x86_init_pci {
+ };
+ /**
++ * struct x86_hyper_init - x86 hypervisor init functions
++ * @init_platform:            platform setup
++ * @x2apic_available:         X2APIC detection
++ * @init_mem_mapping:         setup early mappings during init_mem_mapping()
++ */
++struct x86_hyper_init {
++      void (*init_platform)(void);
++      bool (*x2apic_available)(void);
++      void (*init_mem_mapping)(void);
++};
++
++/**
+  * struct x86_init_ops - functions for platform specific setup
+  *
+  */
+@@ -127,6 +139,7 @@ struct x86_init_ops {
+       struct x86_init_timers          timers;
+       struct x86_init_iommu           iommu;
+       struct x86_init_pci             pci;
++      struct x86_hyper_init           hyper;
+ };
+ /**
+@@ -200,6 +213,15 @@ struct x86_legacy_features {
+ };
+ /**
++ * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks
++ *
++ * @pin_vcpu:         pin current vcpu to specified physical cpu (run rarely)
++ */
++struct x86_hyper_runtime {
++      void (*pin_vcpu)(int cpu);
++};
++
++/**
+  * struct x86_platform_ops - platform specific runtime functions
+  * @calibrate_cpu:            calibrate CPU
+  * @calibrate_tsc:            calibrate TSC, if different from CPU
+@@ -218,6 +240,7 @@ struct x86_legacy_features {
+  *                            possible in x86_early_init_platform_quirks() by
+  *                            only using the current x86_hardware_subarch
+  *                            semantics.
++ * @hyper:                    x86 hypervisor specific runtime callbacks
+  */
+ struct x86_platform_ops {
+       unsigned long (*calibrate_cpu)(void);
+@@ -233,6 +256,7 @@ struct x86_platform_ops {
+       void (*apic_post_init)(void);
+       struct x86_legacy_features legacy;
+       void (*set_legacy_features)(void);
++      struct x86_hyper_runtime hyper;
+ };
+ struct pci_dev;
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -1645,7 +1645,7 @@ static __init void try_to_enable_x2apic(
+                * under KVM
+                */
+               if (max_physical_apicid > 255 ||
+-                  !hypervisor_x2apic_available()) {
++                  !x86_init.hyper.x2apic_available()) {
+                       pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
+                       x2apic_disable();
+                       return;
+--- a/arch/x86/kernel/cpu/hypervisor.c
++++ b/arch/x86/kernel/cpu/hypervisor.c
+@@ -44,51 +44,49 @@ static const __initconst struct hypervis
+ const struct hypervisor_x86 *x86_hyper;
+ EXPORT_SYMBOL(x86_hyper);
+-static inline void __init
++static inline const struct hypervisor_x86 * __init
+ detect_hypervisor_vendor(void)
+ {
+-      const struct hypervisor_x86 *h, * const *p;
++      const struct hypervisor_x86 *h = NULL, * const *p;
+       uint32_t pri, max_pri = 0;
+       for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
+-              h = *p;
+-              pri = h->detect();
+-              if (pri != 0 && pri > max_pri) {
++              pri = (*p)->detect();
++              if (pri > max_pri) {
+                       max_pri = pri;
+-                      x86_hyper = h;
++                      h = *p;
+               }
+       }
+-      if (max_pri)
+-              pr_info("Hypervisor detected: %s\n", x86_hyper->name);
+-}
+-
+-void __init init_hypervisor_platform(void)
+-{
+-
+-      detect_hypervisor_vendor();
++      if (h)
++              pr_info("Hypervisor detected: %s\n", h->name);
+-      if (!x86_hyper)
+-              return;
+-
+-      if (x86_hyper->init_platform)
+-              x86_hyper->init_platform();
++      return h;
+ }
+-bool __init hypervisor_x2apic_available(void)
++static void __init copy_array(const void *src, void *target, unsigned int size)
+ {
+-      return x86_hyper                   &&
+-             x86_hyper->x2apic_available &&
+-             x86_hyper->x2apic_available();
++      unsigned int i, n = size / sizeof(void *);
++      const void * const *from = (const void * const *)src;
++      const void **to = (const void **)target;
++
++      for (i = 0; i < n; i++)
++              if (from[i])
++                      to[i] = from[i];
+ }
+-void hypervisor_pin_vcpu(int cpu)
++void __init init_hypervisor_platform(void)
+ {
+-      if (!x86_hyper)
++      const struct hypervisor_x86 *h;
++
++      h = detect_hypervisor_vendor();
++
++      if (!h)
+               return;
+-      if (x86_hyper->pin_vcpu)
+-              x86_hyper->pin_vcpu(cpu);
+-      else
+-              WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
++      copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
++      copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
++
++      x86_hyper = h;
++      x86_init.hyper.init_platform();
+ }
+--- a/arch/x86/kernel/cpu/mshyperv.c
++++ b/arch/x86/kernel/cpu/mshyperv.c
+@@ -257,6 +257,6 @@ static void __init ms_hyperv_init_platfo
+ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
+       .name                   = "Microsoft Hyper-V",
+       .detect                 = ms_hyperv_platform,
+-      .init_platform          = ms_hyperv_init_platform,
++      .init.init_platform     = ms_hyperv_init_platform,
+ };
+ EXPORT_SYMBOL(x86_hyper_ms_hyperv);
+--- a/arch/x86/kernel/cpu/vmware.c
++++ b/arch/x86/kernel/cpu/vmware.c
+@@ -208,7 +208,7 @@ static bool __init vmware_legacy_x2apic_
+ const __refconst struct hypervisor_x86 x86_hyper_vmware = {
+       .name                   = "VMware",
+       .detect                 = vmware_platform,
+-      .init_platform          = vmware_platform_setup,
+-      .x2apic_available       = vmware_legacy_x2apic_available,
++      .init.init_platform     = vmware_platform_setup,
++      .init.x2apic_available  = vmware_legacy_x2apic_available,
+ };
+ EXPORT_SYMBOL(x86_hyper_vmware);
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -547,7 +547,7 @@ static uint32_t __init kvm_detect(void)
+ const struct hypervisor_x86 x86_hyper_kvm __refconst = {
+       .name                   = "KVM",
+       .detect                 = kvm_detect,
+-      .x2apic_available       = kvm_para_available,
++      .init.x2apic_available  = kvm_para_available,
+ };
+ EXPORT_SYMBOL_GPL(x86_hyper_kvm);
+--- a/arch/x86/kernel/x86_init.c
++++ b/arch/x86/kernel/x86_init.c
+@@ -28,6 +28,8 @@ void x86_init_noop(void) { }
+ void __init x86_init_uint_noop(unsigned int unused) { }
+ int __init iommu_init_noop(void) { return 0; }
+ void iommu_shutdown_noop(void) { }
++bool __init bool_x86_init_noop(void) { return false; }
++void x86_op_int_noop(int cpu) { }
+ /*
+  * The platform setup functions are preset with the default functions
+@@ -81,6 +83,12 @@ struct x86_init_ops x86_init __initdata
+               .init_irq               = x86_default_pci_init_irq,
+               .fixup_irqs             = x86_default_pci_fixup_irqs,
+       },
++
++      .hyper = {
++              .init_platform          = x86_init_noop,
++              .x2apic_available       = bool_x86_init_noop,
++              .init_mem_mapping       = x86_init_noop,
++      },
+ };
+ struct x86_cpuinit_ops x86_cpuinit = {
+@@ -101,6 +109,7 @@ struct x86_platform_ops x86_platform __r
+       .get_nmi_reason                 = default_get_nmi_reason,
+       .save_sched_clock_state         = tsc_save_sched_clock_state,
+       .restore_sched_clock_state      = tsc_restore_sched_clock_state,
++      .hyper.pin_vcpu                 = x86_op_int_noop,
+ };
+ EXPORT_SYMBOL_GPL(x86_platform);
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -671,7 +671,7 @@ void __init init_mem_mapping(void)
+       load_cr3(swapper_pg_dir);
+       __flush_tlb_all();
+-      hypervisor_init_mem_mapping();
++      x86_init.hyper.init_mem_mapping();
+       early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
+ }
+--- a/arch/x86/xen/enlighten_hvm.c
++++ b/arch/x86/xen/enlighten_hvm.c
+@@ -229,9 +229,9 @@ static uint32_t __init xen_platform_hvm(
+ const struct hypervisor_x86 x86_hyper_xen_hvm = {
+       .name                   = "Xen HVM",
+       .detect                 = xen_platform_hvm,
+-      .init_platform          = xen_hvm_guest_init,
+-      .pin_vcpu               = xen_pin_vcpu,
+-      .x2apic_available       = xen_x2apic_para_available,
+-      .init_mem_mapping       = xen_hvm_init_mem_mapping,
++      .init.init_platform     = xen_hvm_guest_init,
++      .init.x2apic_available  = xen_x2apic_para_available,
++      .init.init_mem_mapping  = xen_hvm_init_mem_mapping,
++      .runtime.pin_vcpu       = xen_pin_vcpu,
+ };
+ EXPORT_SYMBOL(x86_hyper_xen_hvm);
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -1462,6 +1462,6 @@ static uint32_t __init xen_platform_pv(v
+ const struct hypervisor_x86 x86_hyper_xen_pv = {
+       .name                   = "Xen PV",
+       .detect                 = xen_platform_pv,
+-      .pin_vcpu               = xen_pin_vcpu,
++      .runtime.pin_vcpu       = xen_pin_vcpu,
+ };
+ EXPORT_SYMBOL(x86_hyper_xen_pv);
+--- a/include/linux/hypervisor.h
++++ b/include/linux/hypervisor.h
+@@ -7,8 +7,12 @@
+  *            Juergen Gross <jgross@suse.com>
+  */
+-#ifdef CONFIG_HYPERVISOR_GUEST
+-#include <asm/hypervisor.h>
++#ifdef CONFIG_X86
++#include <asm/x86_init.h>
++static inline void hypervisor_pin_vcpu(int cpu)
++{
++      x86_platform.hyper.pin_vcpu(cpu);
++}
+ #else
+ static inline void hypervisor_pin_vcpu(int cpu)
+ {
diff --git a/queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch b/queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch
new file mode 100644 (file)
index 0000000..ad09e51
--- /dev/null
@@ -0,0 +1,89 @@
+From f16b3da1dc936c0f8121741d0a1731bf242f2f56 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:12 -0700
+Subject: x86/xen/64, x86/entry/64: Clean up SP code in cpu_initialize_context()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit f16b3da1dc936c0f8121741d0a1731bf242f2f56 upstream.
+
+I'm removing thread_struct::sp0, and Xen's usage of it is slightly
+dubious and unnecessary.  Use appropriate helpers instead.
+
+While we're at at, reorder the code slightly to make it more obvious
+what's going on.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/d5b9a3da2b47c68325bd2bbe8f82d9554dee0d0f.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/smp_pv.c |   17 ++++++++++++++---
+ 1 file changed, 14 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/xen/smp_pv.c
++++ b/arch/x86/xen/smp_pv.c
+@@ -14,6 +14,7 @@
+  * single-threaded.
+  */
+ #include <linux/sched.h>
++#include <linux/sched/task_stack.h>
+ #include <linux/err.h>
+ #include <linux/slab.h>
+ #include <linux/smp.h>
+@@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu,
+ #endif
+       memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
++      /*
++       * Bring up the CPU in cpu_bringup_and_idle() with the stack
++       * pointing just below where pt_regs would be if it were a normal
++       * kernel entry.
++       */
+       ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
+       ctxt->flags = VGCF_IN_KERNEL;
+       ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
+       ctxt->user_regs.ds = __USER_DS;
+       ctxt->user_regs.es = __USER_DS;
+       ctxt->user_regs.ss = __KERNEL_DS;
++      ctxt->user_regs.cs = __KERNEL_CS;
++      ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
+       xen_copy_trap_info(ctxt->trap_ctxt);
+@@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu,
+       ctxt->gdt_frames[0] = gdt_mfn;
+       ctxt->gdt_ents      = GDT_ENTRIES;
++      /*
++       * Set SS:SP that Xen will use when entering guest kernel mode
++       * from guest user mode.  Subsequent calls to load_sp0() can
++       * change this value.
++       */
+       ctxt->kernel_ss = __KERNEL_DS;
+-      ctxt->kernel_sp = idle->thread.sp0;
++      ctxt->kernel_sp = task_top_of_stack(idle);
+ #ifdef CONFIG_X86_32
+       ctxt->event_callback_cs     = __KERNEL_CS;
+@@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu,
+               (unsigned long)xen_hypervisor_callback;
+       ctxt->failsafe_callback_eip =
+               (unsigned long)xen_failsafe_callback;
+-      ctxt->user_regs.cs = __KERNEL_CS;
+       per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
+-      ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
+       ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
+       if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
+               BUG();
diff --git a/queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch b/queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch
new file mode 100644 (file)
index 0000000..9626a60
--- /dev/null
@@ -0,0 +1,90 @@
+From 43e4111086a70c78bedb6ad990bee97f17b27a6e Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 2 Nov 2017 00:59:07 -0700
+Subject: xen, x86/entry/64: Add xen NMI trap entry
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 43e4111086a70c78bedb6ad990bee97f17b27a6e upstream.
+
+Instead of trying to execute any NMI via the bare metal's NMI trap
+handler use a Xen specific one for PV domains, like we do for e.g.
+debug traps. As in a PV domain the NMI is handled via the normal
+kernel stack this is the correct thing to do.
+
+This will enable us to get rid of the very fragile and questionable
+dependencies between the bare metal NMI handler and Xen assumptions
+believed to be broken anyway.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/5baf5c0528d58402441550c5770b98e7961e7680.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64.S    |    2 +-
+ arch/x86/include/asm/traps.h |    2 +-
+ arch/x86/xen/enlighten_pv.c  |    2 +-
+ arch/x86/xen/xen-asm_64.S    |    2 +-
+ 4 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1079,6 +1079,7 @@ idtentry int3                    do_int3                 has_error_code
+ idtentry stack_segment                do_stack_segment        has_error_code=1
+ #ifdef CONFIG_XEN
++idtentry xennmi                       do_nmi                  has_error_code=0
+ idtentry xendebug             do_debug                has_error_code=0
+ idtentry xenint3              do_int3                 has_error_code=0
+ #endif
+@@ -1241,7 +1242,6 @@ ENTRY(error_exit)
+ END(error_exit)
+ /* Runs on exception stack */
+-/* XXX: broken on Xen PV */
+ ENTRY(nmi)
+       UNWIND_HINT_IRET_REGS
+       /*
+--- a/arch/x86/include/asm/traps.h
++++ b/arch/x86/include/asm/traps.h
+@@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(v
+ #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+ asmlinkage void xen_divide_error(void);
++asmlinkage void xen_xennmi(void);
+ asmlinkage void xen_xendebug(void);
+ asmlinkage void xen_xenint3(void);
+-asmlinkage void xen_nmi(void);
+ asmlinkage void xen_overflow(void);
+ asmlinkage void xen_bounds(void);
+ asmlinkage void xen_invalid_op(void);
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -601,7 +601,7 @@ static struct trap_array_entry trap_arra
+ #ifdef CONFIG_X86_MCE
+       { machine_check,               xen_machine_check,               true },
+ #endif
+-      { nmi,                         xen_nmi,                         true },
++      { nmi,                         xen_xennmi,                      true },
+       { overflow,                    xen_overflow,                    false },
+ #ifdef CONFIG_IA32_EMULATION
+       { entry_INT80_compat,          xen_entry_INT80_compat,          false },
+--- a/arch/x86/xen/xen-asm_64.S
++++ b/arch/x86/xen/xen-asm_64.S
+@@ -30,7 +30,7 @@ xen_pv_trap debug
+ xen_pv_trap xendebug
+ xen_pv_trap int3
+ xen_pv_trap xenint3
+-xen_pv_trap nmi
++xen_pv_trap xennmi
+ xen_pv_trap overflow
+ xen_pv_trap bounds
+ xen_pv_trap invalid_op