From 217b84e276bfbedc3a4e774d628606113e228926 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 21 Dec 2017 17:05:27 +0100 Subject: [PATCH] 4.14-stable patches added patches: acpi-apei-replace-ioremap_page_range-with-fixmap.patch bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch perf-x86-enable-free-running-pebs-for-regs_user-intr.patch selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch x86-cpufeatures-make-cpu-bugs-sticky.patch x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch x86-cpuid-replace-set-clear_bit32.patch x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch x86-dumpstack-handle-stack-overflow-on-all-stacks.patch x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch x86-entry-64-allocate-and-enable-the-sysenter-stack.patch x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch x86-entry-64-de-xen-ify-our-nmi-code.patch x86-entry-64-make-cpu_entry_area.tss-read-only.patch x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch x86-entry-64-pass-sp0-directly-to-load_sp0.patch x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch x86-entry-64-remove-the-restore_..._regs-infrastructure.patch x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch x86-entry-64-remove-the-sysenter-stack-canary.patch x86-entry-64-remove-thread_struct-sp0.patch x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch x86-entry-64-shorten-test-instructions.patch x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch x86-entry-64-stop-initializing-tss.sp0-at-boot.patch x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch x86-entry-clean-up-the-sysenter_stack-code.patch x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch x86-entry-remap-the-tss-into-the-cpu-entry-area.patch x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch x86-mm-define-_page_table-using-_kernpg_table.patch x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch x86-paravirt-dont-patch-flush_tlb_single.patch x86-paravirt-provide-a-way-to-check-for-hypervisors.patch x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch x86-unwinder-handle-stack-overflows-more-gracefully.patch x86-unwinder-orc-dont-bail-on-stack-overflow.patch x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch xen-x86-entry-64-add-xen-nmi-trap-entry.patch --- ...place-ioremap_page_range-with-fixmap.patch | 177 +++++ ...dd-clear-set_bit32-to-linux-bitops.h.patch | 57 ++ ...on-um-due-to-mising-bpf_perf_event.h.patch | 59 ++ ...header-file-to-free-up-the-namespace.patch | 131 ++++ ...mp_read_barrier_depends-to-read_once.patch | 43 ++ ...of-lockless_dereference-to-read_once.patch | 290 ++++++++ ...free-running-pebs-for-regs_user-intr.patch | 98 +++ ...frastructure-to-test-set_thread_area.patch | 104 +++ ...t-test-cases-against-the-gdt-as-well.patch | 44 ++ queue-4.14/series | 65 ++ ...e-instruction-prevention-definitions.patch | 78 +++ ...s-details-in-the-feature-definitions.patch | 359 ++++++++++ ...x86-cpufeatures-make-cpu-bugs-sticky.patch | 95 +++ ...tabulate-the-x86_feature-definitions.patch | 618 ++++++++++++++++++ .../x86-cpuid-replace-set-clear_bit32.patch | 62 ++ ..._info-support-for-the-sysenter-stack.patch | 169 +++++ ...-handle-stack-overflow-on-all-stacks.patch | 87 +++ ..._top_of_stack-initialization-at-boot.patch | 41 ++ ...s-update-code-out-of-native_load_sp0.patch | 127 ++++ ...locate-and-enable-the-sysenter-stack.patch | 161 +++++ ...e-a-per-cpu-syscall-entry-trampoline.patch | 224 +++++++ ...x86-entry-64-de-xen-ify-our-nmi-code.patch | 108 +++ ...64-make-cpu_entry_area.tss-read-only.patch | 453 +++++++++++++ ...merge-the-fast-and-slow-sysret-paths.patch | 51 ++ ...nto-the-common-iret-to-usermode-path.patch | 144 ++++ ...st-stacks-into-struct-cpu_entry_area.patch | 221 +++++++ ...paravirt-safe-macro-to-access-eflags.patch | 113 ++++ ...try-64-pass-sp0-directly-to-load_sp0.patch | 215 ++++++ ...ining-direct-thread_struct-sp0-reads.patch | 87 +++ ...-the-restore_..._regs-infrastructure.patch | 95 +++ ...ve-the-restore_c_regs_and_iret-label.patch | 65 ++ ...-64-remove-the-sysenter-stack-canary.patch | 96 +++ ...86-entry-64-remove-thread_struct-sp0.patch | 139 ++++ ...-userspace-from-the-trampoline-stack.patch | 124 ++++ ...pu_current_top_of_stack-from-tss.sp0.patch | 144 ++++ ...6-entry-64-shorten-test-instructions.patch | 48 ++ ...d_exit_restore-and-make-labels-local.patch | 60 ++ ...tore-code-in-the-standard-iret-paths.patch | 91 +++ ...ret-to-user-and-iret-to-kernel-paths.patch | 121 ++++ ...64-stop-initializing-tss.sp0-at-boot.patch | 91 +++ ...cpu-trampoline-stack-for-idt-entries.patch | 276 ++++++++ ...of-mov-to-restore-regs-on-nmi-return.patch | 47 ++ ...of-movq-in-syscall_return_via_sysret.patch | 51 ++ ...ck-to-find-the-top-of-a-task-s-stack.patch | 38 ++ ...try-clean-up-the-sysenter_stack-code.patch | 184 ++++++ ...w-tss-is-at-the-beginning-of-cpu_tss.patch | 205 ++++++ ...er-cpu-gdt-remaps-in-ascending-order.patch | 61 ++ ...o-the-beginning-of-struct-tss_struct.patch | 118 ++++ ...emap-the-tss-into-the-cpu-entry-area.patch | 265 ++++++++ ...g-that-pt_regs-is-on-the-entry-stack.patch | 114 ++++ ...ing-ip-in-the-stack-overflow-warning.patch | 60 ++ ...comment-about-context-tracking-races.patch | 65 ++ ...teach-kasan-about-the-cpu_entry_area.patch | 82 +++ ...save_leak-detectable-in-cpuid-on-amd.patch | 62 ++ ...fine-_page_table-using-_kernpg_table.patch | 39 ++ ...nism-introduce-struct-cpu_entry_area.patch | 190 ++++++ ...memmap_populate-to-initialize-shadow.patch | 254 +++++++ ...paravirt-dont-patch-flush_tlb_single.patch | 68 ++ ...ovide-a-way-to-check-for-hypervisors.patch | 96 +++ ...tack-helper-to-clean-up-an-assertion.patch | 56 ++ ...ndle-stack-overflows-more-gracefully.patch | 319 +++++++++ ...nder-orc-dont-bail-on-stack-overflow.patch | 82 +++ ...for-hypervisors-to-replace-x86_hyper.patch | 271 ++++++++ ...uct-x86_platform-and-struct-x86_init.patch | 375 +++++++++++ ...up-sp-code-in-cpu_initialize_context.patch | 89 +++ ...-x86-entry-64-add-xen-nmi-trap-entry.patch | 90 +++ 66 files changed, 9112 insertions(+) create mode 100644 queue-4.14/acpi-apei-replace-ioremap_page_range-with-fixmap.patch create mode 100644 queue-4.14/bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch create mode 100644 queue-4.14/bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch create mode 100644 queue-4.14/drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch create mode 100644 queue-4.14/locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch create mode 100644 queue-4.14/locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch create mode 100644 queue-4.14/perf-x86-enable-free-running-pebs-for-regs_user-intr.patch create mode 100644 queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch create mode 100644 queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch create mode 100644 queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch create mode 100644 queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch create mode 100644 queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch create mode 100644 queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch create mode 100644 queue-4.14/x86-cpuid-replace-set-clear_bit32.patch create mode 100644 queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch create mode 100644 queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch create mode 100644 queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch create mode 100644 queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch create mode 100644 queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch create mode 100644 queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch create mode 100644 queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch create mode 100644 queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch create mode 100644 queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch create mode 100644 queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch create mode 100644 queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch create mode 100644 queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch create mode 100644 queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch create mode 100644 queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch create mode 100644 queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch create mode 100644 queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch create mode 100644 queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch create mode 100644 queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch create mode 100644 queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch create mode 100644 queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch create mode 100644 queue-4.14/x86-entry-64-shorten-test-instructions.patch create mode 100644 queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch create mode 100644 queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch create mode 100644 queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch create mode 100644 queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch create mode 100644 queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch create mode 100644 queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch create mode 100644 queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch create mode 100644 queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch create mode 100644 queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch create mode 100644 queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch create mode 100644 queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch create mode 100644 queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch create mode 100644 queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch create mode 100644 queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch create mode 100644 queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch create mode 100644 queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch create mode 100644 queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch create mode 100644 queue-4.14/x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch create mode 100644 queue-4.14/x86-mm-define-_page_table-using-_kernpg_table.patch create mode 100644 queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch create mode 100644 queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch create mode 100644 queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch create mode 100644 queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch create mode 100644 queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch create mode 100644 queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch create mode 100644 queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch create mode 100644 queue-4.14/x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch create mode 100644 queue-4.14/x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch create mode 100644 queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch create mode 100644 queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch diff --git a/queue-4.14/acpi-apei-replace-ioremap_page_range-with-fixmap.patch b/queue-4.14/acpi-apei-replace-ioremap_page_range-with-fixmap.patch new file mode 100644 index 00000000000..15f5133f0e9 --- /dev/null +++ b/queue-4.14/acpi-apei-replace-ioremap_page_range-with-fixmap.patch @@ -0,0 +1,177 @@ +From 4f89fa286f6729312e227e7c2d764e8e7b9d340e Mon Sep 17 00:00:00 2001 +From: James Morse +Date: Mon, 6 Nov 2017 18:44:24 +0000 +Subject: ACPI / APEI: Replace ioremap_page_range() with fixmap + +From: James Morse + +commit 4f89fa286f6729312e227e7c2d764e8e7b9d340e upstream. + +Replace ghes_io{re,un}map_pfn_{nmi,irq}()s use of ioremap_page_range() +with __set_fixmap() as ioremap_page_range() may sleep to allocate a new +level of page-table, even if its passed an existing final-address to +use in the mapping. + +The GHES driver can only be enabled for architectures that select +HAVE_ACPI_APEI: Add fixmap entries to both x86 and arm64. + +clear_fixmap() does the TLB invalidation in __set_fixmap() for arm64 +and __set_pte_vaddr() for x86. In each case its the same as the +respective arch_apei_flush_tlb_one(). + +Reported-by: Fengguang Wu +Suggested-by: Linus Torvalds +Signed-off-by: James Morse +Reviewed-by: Borislav Petkov +Tested-by: Tyler Baicar +Tested-by: Toshi Kani +[ For the arm64 bits: ] +Acked-by: Will Deacon +[ For the x86 bits: ] +Acked-by: Ingo Molnar +Signed-off-by: Rafael J. Wysocki +Cc: All applicable +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/fixmap.h | 7 ++++++ + arch/x86/include/asm/fixmap.h | 6 +++++ + drivers/acpi/apei/ghes.c | 44 ++++++++++++---------------------------- + 3 files changed, 27 insertions(+), 30 deletions(-) + +--- a/arch/arm64/include/asm/fixmap.h ++++ b/arch/arm64/include/asm/fixmap.h +@@ -51,6 +51,13 @@ enum fixed_addresses { + + FIX_EARLYCON_MEM_BASE, + FIX_TEXT_POKE0, ++ ++#ifdef CONFIG_ACPI_APEI_GHES ++ /* Used for GHES mapping from assorted contexts */ ++ FIX_APEI_GHES_IRQ, ++ FIX_APEI_GHES_NMI, ++#endif /* CONFIG_ACPI_APEI_GHES */ ++ + __end_of_permanent_fixed_addresses, + + /* +--- a/arch/x86/include/asm/fixmap.h ++++ b/arch/x86/include/asm/fixmap.h +@@ -104,6 +104,12 @@ enum fixed_addresses { + FIX_GDT_REMAP_BEGIN, + FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1, + ++#ifdef CONFIG_ACPI_APEI_GHES ++ /* Used for GHES mapping from assorted contexts */ ++ FIX_APEI_GHES_IRQ, ++ FIX_APEI_GHES_NMI, ++#endif ++ + __end_of_permanent_fixed_addresses, + + /* +--- a/drivers/acpi/apei/ghes.c ++++ b/drivers/acpi/apei/ghes.c +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -112,7 +113,7 @@ static DEFINE_MUTEX(ghes_list_mutex); + * Because the memory area used to transfer hardware error information + * from BIOS to Linux can be determined only in NMI, IRQ or timer + * handler, but general ioremap can not be used in atomic context, so +- * a special version of atomic ioremap is implemented for that. ++ * the fixmap is used instead. + */ + + /* +@@ -126,8 +127,8 @@ static DEFINE_MUTEX(ghes_list_mutex); + /* virtual memory area for atomic ioremap */ + static struct vm_struct *ghes_ioremap_area; + /* +- * These 2 spinlock is used to prevent atomic ioremap virtual memory +- * area from being mapped simultaneously. ++ * These 2 spinlocks are used to prevent the fixmap entries from being used ++ * simultaneously. + */ + static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); + static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); +@@ -159,53 +160,36 @@ static void ghes_ioremap_exit(void) + + static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) + { +- unsigned long vaddr; + phys_addr_t paddr; + pgprot_t prot; + +- vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr); +- + paddr = pfn << PAGE_SHIFT; + prot = arch_apei_get_mem_attribute(paddr); +- ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); ++ __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot); + +- return (void __iomem *)vaddr; ++ return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI); + } + + static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) + { +- unsigned long vaddr; + phys_addr_t paddr; + pgprot_t prot; + +- vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr); +- + paddr = pfn << PAGE_SHIFT; + prot = arch_apei_get_mem_attribute(paddr); ++ __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot); + +- ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); +- +- return (void __iomem *)vaddr; ++ return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ); + } + +-static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) ++static void ghes_iounmap_nmi(void) + { +- unsigned long vaddr = (unsigned long __force)vaddr_ptr; +- void *base = ghes_ioremap_area->addr; +- +- BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); +- unmap_kernel_range_noflush(vaddr, PAGE_SIZE); +- arch_apei_flush_tlb_one(vaddr); ++ clear_fixmap(FIX_APEI_GHES_NMI); + } + +-static void ghes_iounmap_irq(void __iomem *vaddr_ptr) ++static void ghes_iounmap_irq(void) + { +- unsigned long vaddr = (unsigned long __force)vaddr_ptr; +- void *base = ghes_ioremap_area->addr; +- +- BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); +- unmap_kernel_range_noflush(vaddr, PAGE_SIZE); +- arch_apei_flush_tlb_one(vaddr); ++ clear_fixmap(FIX_APEI_GHES_IRQ); + } + + static int ghes_estatus_pool_init(void) +@@ -361,10 +345,10 @@ static void ghes_copy_tofrom_phys(void * + paddr += trunk; + buffer += trunk; + if (in_nmi) { +- ghes_iounmap_nmi(vaddr); ++ ghes_iounmap_nmi(); + raw_spin_unlock(&ghes_ioremap_lock_nmi); + } else { +- ghes_iounmap_irq(vaddr); ++ ghes_iounmap_irq(); + spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); + } + } diff --git a/queue-4.14/bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch b/queue-4.14/bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch new file mode 100644 index 00000000000..aeb6f75c394 --- /dev/null +++ b/queue-4.14/bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch @@ -0,0 +1,57 @@ +From 1943dc07b45e347c52c1bfdd4a37e04a86e399aa Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 2 Nov 2017 13:30:03 +0100 +Subject: bitops: Revert cbe96375025e ("bitops: Add clear/set_bit32() to linux/bitops.h") + +From: Thomas Gleixner + +commit 1943dc07b45e347c52c1bfdd4a37e04a86e399aa upstream. + +These ops are not endian safe and may break on architectures which have +aligment requirements. + +Reverts: cbe96375025e ("bitops: Add clear/set_bit32() to linux/bitops.h") +Reported-by: Peter Zijlstra +Signed-off-by: Thomas Gleixner +Cc: Andi Kleen +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/bitops.h | 26 -------------------------- + 1 file changed, 26 deletions(-) + +--- a/include/linux/bitops.h ++++ b/include/linux/bitops.h +@@ -228,32 +228,6 @@ static inline unsigned long __ffs64(u64 + return __ffs((unsigned long)word); + } + +-/* +- * clear_bit32 - Clear a bit in memory for u32 array +- * @nr: Bit to clear +- * @addr: u32 * address of bitmap +- * +- * Same as clear_bit, but avoids needing casts for u32 arrays. +- */ +- +-static __always_inline void clear_bit32(long nr, volatile u32 *addr) +-{ +- clear_bit(nr, (volatile unsigned long *)addr); +-} +- +-/* +- * set_bit32 - Set a bit in memory for u32 array +- * @nr: Bit to clear +- * @addr: u32 * address of bitmap +- * +- * Same as set_bit, but avoids needing casts for u32 arrays. +- */ +- +-static __always_inline void set_bit32(long nr, volatile u32 *addr) +-{ +- set_bit(nr, (volatile unsigned long *)addr); +-} +- + #ifdef __KERNEL__ + + #ifndef set_mask_bits diff --git a/queue-4.14/bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch b/queue-4.14/bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch new file mode 100644 index 00000000000..beca5807e0d --- /dev/null +++ b/queue-4.14/bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch @@ -0,0 +1,59 @@ +From ab95477e7cb35557ecfc837687007b646bab9a9f Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Tue, 12 Dec 2017 02:25:31 +0100 +Subject: bpf: fix build issues on um due to mising bpf_perf_event.h + +From: Daniel Borkmann + +commit ab95477e7cb35557ecfc837687007b646bab9a9f upstream. + +[ Note, this is a Git cherry-pick of the following commit: + + a23f06f06dbe ("bpf: fix build issues on um due to mising bpf_perf_event.h") + + ... for easier x86 PTI code testing and back-porting. ] + +Since c895f6f703ad ("bpf: correct broken uapi for +BPF_PROG_TYPE_PERF_EVENT program type") um (uml) won't build +on i386 or x86_64: + + [...] + CC init/main.o + In file included from ../include/linux/perf_event.h:18:0, + from ../include/linux/trace_events.h:10, + from ../include/trace/syscall.h:7, + from ../include/linux/syscalls.h:82, + from ../init/main.c:20: + ../include/uapi/linux/bpf_perf_event.h:11:32: fatal error: + asm/bpf_perf_event.h: No such file or directory #include + + [...] + +Lets add missing bpf_perf_event.h also to um arch. This seems +to be the only one still missing. + +Fixes: c895f6f703ad ("bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type") +Reported-by: Randy Dunlap +Suggested-by: Richard Weinberger +Signed-off-by: Daniel Borkmann +Tested-by: Randy Dunlap +Cc: Hendrik Brueckner +Cc: Richard Weinberger +Acked-by: Alexei Starovoitov +Acked-by: Richard Weinberger +Signed-off-by: Alexei Starovoitov +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/um/include/asm/Kbuild | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/um/include/asm/Kbuild ++++ b/arch/um/include/asm/Kbuild +@@ -1,4 +1,5 @@ + generic-y += barrier.h ++generic-y += bpf_perf_event.h + generic-y += bug.h + generic-y += clkdev.h + generic-y += current.h diff --git a/queue-4.14/drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch b/queue-4.14/drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch new file mode 100644 index 00000000000..d14ee6dbe3e --- /dev/null +++ b/queue-4.14/drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch @@ -0,0 +1,131 @@ +From 1784f9144b143a1e8b19fe94083b040aa559182b Mon Sep 17 00:00:00 2001 +From: Ingo Molnar +Date: Tue, 5 Dec 2017 14:14:47 +0100 +Subject: drivers/misc/intel/pti: Rename the header file to free up the namespace + +From: Ingo Molnar + +commit 1784f9144b143a1e8b19fe94083b040aa559182b upstream. + +We'd like to use the 'PTI' acronym for 'Page Table Isolation' - free up the +namespace by renaming the driver header to . + +(Also standardize the header guard name while at it.) + +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: J Freyensee +Cc: Greg Kroah-Hartman +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/misc/pti.c | 2 +- + include/linux/intel-pti.h | 43 +++++++++++++++++++++++++++++++++++++++++++ + include/linux/pti.h | 43 ------------------------------------------- + 3 files changed, 44 insertions(+), 44 deletions(-) + +--- a/drivers/misc/pti.c ++++ b/drivers/misc/pti.c +@@ -32,7 +32,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +--- /dev/null ++++ b/include/linux/intel-pti.h +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (C) Intel 2011 ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * The PTI (Parallel Trace Interface) driver directs trace data routed from ++ * various parts in the system out through the Intel Penwell PTI port and ++ * out of the mobile device for analysis with a debugging tool ++ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7, ++ * compact JTAG, standard. ++ * ++ * This header file will allow other parts of the OS to use the ++ * interface to write out it's contents for debugging a mobile system. ++ */ ++ ++#ifndef LINUX_INTEL_PTI_H_ ++#define LINUX_INTEL_PTI_H_ ++ ++/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ ++#define PTI_LASTDWORD_DTS 0x30 ++ ++/* basic structure used as a write address to the PTI HW */ ++struct pti_masterchannel { ++ u8 master; ++ u8 channel; ++}; ++ ++/* the following functions are defined in misc/pti.c */ ++void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count); ++struct pti_masterchannel *pti_request_masterchannel(u8 type, ++ const char *thread_name); ++void pti_release_masterchannel(struct pti_masterchannel *mc); ++ ++#endif /* LINUX_INTEL_PTI_H_ */ +--- a/include/linux/pti.h ++++ /dev/null +@@ -1,43 +0,0 @@ +-/* +- * Copyright (C) Intel 2011 +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License version 2 as +- * published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- * +- * The PTI (Parallel Trace Interface) driver directs trace data routed from +- * various parts in the system out through the Intel Penwell PTI port and +- * out of the mobile device for analysis with a debugging tool +- * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7, +- * compact JTAG, standard. +- * +- * This header file will allow other parts of the OS to use the +- * interface to write out it's contents for debugging a mobile system. +- */ +- +-#ifndef PTI_H_ +-#define PTI_H_ +- +-/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ +-#define PTI_LASTDWORD_DTS 0x30 +- +-/* basic structure used as a write address to the PTI HW */ +-struct pti_masterchannel { +- u8 master; +- u8 channel; +-}; +- +-/* the following functions are defined in misc/pti.c */ +-void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count); +-struct pti_masterchannel *pti_request_masterchannel(u8 type, +- const char *thread_name); +-void pti_release_masterchannel(struct pti_masterchannel *mc); +- +-#endif /*PTI_H_*/ diff --git a/queue-4.14/locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch b/queue-4.14/locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch new file mode 100644 index 00000000000..ac2f19a9203 --- /dev/null +++ b/queue-4.14/locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch @@ -0,0 +1,43 @@ +From c2bc66082e1048c7573d72e62f597bdc5ce13fea Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Tue, 24 Oct 2017 11:22:47 +0100 +Subject: locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE() + +From: Will Deacon + +commit c2bc66082e1048c7573d72e62f597bdc5ce13fea upstream. + +[ Note, this is a Git cherry-pick of the following commit: + + 76ebbe78f739 ("locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE()") + + ... for easier x86 PTI code testing and back-porting. ] + +In preparation for the removal of lockless_dereference(), which is the +same as READ_ONCE() on all architectures other than Alpha, add an +implicit smp_read_barrier_depends() to READ_ONCE() so that it can be +used to head dependency chains on all architectures. + +Signed-off-by: Will Deacon +Cc: Linus Torvalds +Cc: Paul E. McKenney +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/1508840570-22169-3-git-send-email-will.deacon@arm.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/compiler.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/include/linux/compiler.h ++++ b/include/linux/compiler.h +@@ -341,6 +341,7 @@ static __always_inline void __write_once + __read_once_size(&(x), __u.__c, sizeof(x)); \ + else \ + __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \ ++ smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ + __u.__val; \ + }) + #define READ_ONCE(x) __READ_ONCE(x, 1) diff --git a/queue-4.14/locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch b/queue-4.14/locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch new file mode 100644 index 00000000000..50ee566f8b4 --- /dev/null +++ b/queue-4.14/locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch @@ -0,0 +1,290 @@ +From 3382290ed2d5e275429cef510ab21889d3ccd164 Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Tue, 24 Oct 2017 11:22:48 +0100 +Subject: locking/barriers: Convert users of lockless_dereference() to READ_ONCE() + +From: Will Deacon + +commit 3382290ed2d5e275429cef510ab21889d3ccd164 upstream. + +[ Note, this is a Git cherry-pick of the following commit: + + 506458efaf15 ("locking/barriers: Convert users of lockless_dereference() to READ_ONCE()") + + ... for easier x86 PTI code testing and back-porting. ] + +READ_ONCE() now has an implicit smp_read_barrier_depends() call, so it +can be used instead of lockless_dereference() without any change in +semantics. + +Signed-off-by: Will Deacon +Cc: Linus Torvalds +Cc: Paul E. McKenney +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/1508840570-22169-4-git-send-email-will.deacon@arm.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/events/core.c | 2 +- + arch/x86/include/asm/mmu_context.h | 4 ++-- + arch/x86/kernel/ldt.c | 2 +- + drivers/md/dm-mpath.c | 20 ++++++++++---------- + fs/dcache.c | 4 ++-- + fs/overlayfs/ovl_entry.h | 2 +- + fs/overlayfs/readdir.c | 2 +- + include/linux/rculist.h | 4 ++-- + include/linux/rcupdate.h | 4 ++-- + kernel/events/core.c | 4 ++-- + kernel/seccomp.c | 2 +- + kernel/task_work.c | 2 +- + mm/slab.h | 2 +- + 13 files changed, 27 insertions(+), 27 deletions(-) + +--- a/arch/x86/events/core.c ++++ b/arch/x86/events/core.c +@@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(un + struct ldt_struct *ldt; + + /* IRQs are off, so this synchronizes with smp_store_release */ +- ldt = lockless_dereference(current->active_mm->context.ldt); ++ ldt = READ_ONCE(current->active_mm->context.ldt); + if (!ldt || idx >= ldt->nr_entries) + return 0; + +--- a/arch/x86/include/asm/mmu_context.h ++++ b/arch/x86/include/asm/mmu_context.h +@@ -73,8 +73,8 @@ static inline void load_mm_ldt(struct mm + #ifdef CONFIG_MODIFY_LDT_SYSCALL + struct ldt_struct *ldt; + +- /* lockless_dereference synchronizes with smp_store_release */ +- ldt = lockless_dereference(mm->context.ldt); ++ /* READ_ONCE synchronizes with smp_store_release */ ++ ldt = READ_ONCE(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all +--- a/arch/x86/kernel/ldt.c ++++ b/arch/x86/kernel/ldt.c +@@ -103,7 +103,7 @@ static void finalize_ldt_struct(struct l + static void install_ldt(struct mm_struct *current_mm, + struct ldt_struct *ldt) + { +- /* Synchronizes with lockless_dereference in load_mm_ldt. */ ++ /* Synchronizes with READ_ONCE in load_mm_ldt. */ + smp_store_release(¤t_mm->context.ldt, ldt); + + /* Activate the LDT for all CPUs using current_mm. */ +--- a/drivers/md/dm-mpath.c ++++ b/drivers/md/dm-mpath.c +@@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg( + + pgpath = path_to_pgpath(path); + +- if (unlikely(lockless_dereference(m->current_pg) != pg)) { ++ if (unlikely(READ_ONCE(m->current_pg) != pg)) { + /* Only update current_pgpath if pg changed */ + spin_lock_irqsave(&m->lock, flags); + m->current_pgpath = pgpath; +@@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(stru + } + + /* Were we instructed to switch PG? */ +- if (lockless_dereference(m->next_pg)) { ++ if (READ_ONCE(m->next_pg)) { + spin_lock_irqsave(&m->lock, flags); + pg = m->next_pg; + if (!pg) { +@@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(stru + + /* Don't change PG until it has no remaining paths */ + check_current_pg: +- pg = lockless_dereference(m->current_pg); ++ pg = READ_ONCE(m->current_pg); + if (pg) { + pgpath = choose_path_in_pg(m, pg, nr_bytes); + if (!IS_ERR_OR_NULL(pgpath)) +@@ -473,7 +473,7 @@ static int multipath_clone_and_map(struc + struct request *clone; + + /* Do we need to select a new pgpath? */ +- pgpath = lockless_dereference(m->current_pgpath); ++ pgpath = READ_ONCE(m->current_pgpath); + if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) + pgpath = choose_pgpath(m, nr_bytes); + +@@ -533,7 +533,7 @@ static int __multipath_map_bio(struct mu + bool queue_io; + + /* Do we need to select a new pgpath? */ +- pgpath = lockless_dereference(m->current_pgpath); ++ pgpath = READ_ONCE(m->current_pgpath); + queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); + if (!pgpath || !queue_io) + pgpath = choose_pgpath(m, nr_bytes); +@@ -1802,7 +1802,7 @@ static int multipath_prepare_ioctl(struc + struct pgpath *current_pgpath; + int r; + +- current_pgpath = lockless_dereference(m->current_pgpath); ++ current_pgpath = READ_ONCE(m->current_pgpath); + if (!current_pgpath) + current_pgpath = choose_pgpath(m, 0); + +@@ -1824,7 +1824,7 @@ static int multipath_prepare_ioctl(struc + } + + if (r == -ENOTCONN) { +- if (!lockless_dereference(m->current_pg)) { ++ if (!READ_ONCE(m->current_pg)) { + /* Path status changed, redo selection */ + (void) choose_pgpath(m, 0); + } +@@ -1893,9 +1893,9 @@ static int multipath_busy(struct dm_targ + return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED); + + /* Guess which priority_group will be used at next mapping time */ +- pg = lockless_dereference(m->current_pg); +- next_pg = lockless_dereference(m->next_pg); +- if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg)) ++ pg = READ_ONCE(m->current_pg); ++ next_pg = READ_ONCE(m->next_pg); ++ if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg)) + pg = next_pg; + + if (!pg) { +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -231,7 +231,7 @@ static inline int dentry_cmp(const struc + { + /* + * Be careful about RCU walk racing with rename: +- * use 'lockless_dereference' to fetch the name pointer. ++ * use 'READ_ONCE' to fetch the name pointer. + * + * NOTE! Even if a rename will mean that the length + * was not loaded atomically, we don't care. The +@@ -245,7 +245,7 @@ static inline int dentry_cmp(const struc + * early because the data cannot match (there can + * be no NUL in the ct/tcount data) + */ +- const unsigned char *cs = lockless_dereference(dentry->d_name.name); ++ const unsigned char *cs = READ_ONCE(dentry->d_name.name); + + return dentry_string_cmp(cs, ct, tcount); + } +--- a/fs/overlayfs/ovl_entry.h ++++ b/fs/overlayfs/ovl_entry.h +@@ -77,5 +77,5 @@ static inline struct ovl_inode *OVL_I(st + + static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi) + { +- return lockless_dereference(oi->__upperdentry); ++ return READ_ONCE(oi->__upperdentry); + } +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -757,7 +757,7 @@ static int ovl_dir_fsync(struct file *fi + if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { + struct inode *inode = file_inode(file); + +- realfile = lockless_dereference(od->upperfile); ++ realfile = READ_ONCE(od->upperfile); + if (!realfile) { + struct path upperpath; + +--- a/include/linux/rculist.h ++++ b/include/linux/rculist.h +@@ -275,7 +275,7 @@ static inline void list_splice_tail_init + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ + #define list_entry_rcu(ptr, type, member) \ +- container_of(lockless_dereference(ptr), type, member) ++ container_of(READ_ONCE(ptr), type, member) + + /* + * Where are list_empty_rcu() and list_first_entry_rcu()? +@@ -368,7 +368,7 @@ static inline void list_splice_tail_init + * example is when items are added to the list, but never deleted. + */ + #define list_entry_lockless(ptr, type, member) \ +- container_of((typeof(ptr))lockless_dereference(ptr), type, member) ++ container_of((typeof(ptr))READ_ONCE(ptr), type, member) + + /** + * list_for_each_entry_lockless - iterate over rcu list of given type +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -346,7 +346,7 @@ static inline void rcu_preempt_sleep_che + #define __rcu_dereference_check(p, c, space) \ + ({ \ + /* Dependency order vs. p above. */ \ +- typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \ ++ typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ + RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ + rcu_dereference_sparse(p, space); \ + ((typeof(*p) __force __kernel *)(________p1)); \ +@@ -360,7 +360,7 @@ static inline void rcu_preempt_sleep_che + #define rcu_dereference_raw(p) \ + ({ \ + /* Dependency order vs. p above. */ \ +- typeof(p) ________p1 = lockless_dereference(p); \ ++ typeof(p) ________p1 = READ_ONCE(p); \ + ((typeof(*p) __force __kernel *)(________p1)); \ + }) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -4233,7 +4233,7 @@ static void perf_remove_from_owner(struc + * indeed free this event, otherwise we need to serialize on + * owner->perf_event_mutex. + */ +- owner = lockless_dereference(event->owner); ++ owner = READ_ONCE(event->owner); + if (owner) { + /* + * Since delayed_put_task_struct() also drops the last +@@ -4330,7 +4330,7 @@ again: + * Cannot change, child events are not migrated, see the + * comment with perf_event_ctx_lock_nested(). + */ +- ctx = lockless_dereference(child->ctx); ++ ctx = READ_ONCE(child->ctx); + /* + * Since child_mutex nests inside ctx::mutex, we must jump + * through hoops. We start by grabbing a reference on the ctx. +--- a/kernel/seccomp.c ++++ b/kernel/seccomp.c +@@ -190,7 +190,7 @@ static u32 seccomp_run_filters(const str + u32 ret = SECCOMP_RET_ALLOW; + /* Make sure cross-thread synced filter points somewhere sane. */ + struct seccomp_filter *f = +- lockless_dereference(current->seccomp.filter); ++ READ_ONCE(current->seccomp.filter); + + /* Ensure unexpected behavior doesn't result in failing open. */ + if (unlikely(WARN_ON(f == NULL))) +--- a/kernel/task_work.c ++++ b/kernel/task_work.c +@@ -68,7 +68,7 @@ task_work_cancel(struct task_struct *tas + * we raced with task_work_run(), *pprev == NULL/exited. + */ + raw_spin_lock_irqsave(&task->pi_lock, flags); +- while ((work = lockless_dereference(*pprev))) { ++ while ((work = READ_ONCE(*pprev))) { + if (work->func != func) + pprev = &work->next; + else if (cmpxchg(pprev, work, work->next) == work) +--- a/mm/slab.h ++++ b/mm/slab.h +@@ -259,7 +259,7 @@ cache_from_memcg_idx(struct kmem_cache * + * memcg_caches issues a write barrier to match this (see + * memcg_create_kmem_cache()). + */ +- cachep = lockless_dereference(arr->entries[idx]); ++ cachep = READ_ONCE(arr->entries[idx]); + rcu_read_unlock(); + + return cachep; diff --git a/queue-4.14/perf-x86-enable-free-running-pebs-for-regs_user-intr.patch b/queue-4.14/perf-x86-enable-free-running-pebs-for-regs_user-intr.patch new file mode 100644 index 00000000000..709c25ae53c --- /dev/null +++ b/queue-4.14/perf-x86-enable-free-running-pebs-for-regs_user-intr.patch @@ -0,0 +1,98 @@ +From 2fe1bc1f501d55e5925b4035bcd85781adc76c63 Mon Sep 17 00:00:00 2001 +From: Andi Kleen +Date: Thu, 31 Aug 2017 14:46:30 -0700 +Subject: perf/x86: Enable free running PEBS for REGS_USER/INTR + +From: Andi Kleen + +commit 2fe1bc1f501d55e5925b4035bcd85781adc76c63 upstream. + +[ Note, this is a Git cherry-pick of the following commit: + + a47ba4d77e12 ("perf/x86: Enable free running PEBS for REGS_USER/INTR") + + ... for easier x86 PTI code testing and back-porting. ] + +Currently free running PEBS is disabled when user or interrupt +registers are requested. Most of the registers are actually +available in the PEBS record and can be supported. + +So we just need to check for the supported registers and then +allow it: it is all except for the segment register. + +For user registers this only works when the counter is limited +to ring 3 only, so this also needs to be checked. + +Signed-off-by: Andi Kleen +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20170831214630.21892-1-andi@firstfloor.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/events/intel/core.c | 4 ++++ + arch/x86/events/perf_event.h | 24 +++++++++++++++++++++++- + 2 files changed, 27 insertions(+), 1 deletion(-) + +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_runn + + if (event->attr.use_clockid) + flags &= ~PERF_SAMPLE_TIME; ++ if (!event->attr.exclude_kernel) ++ flags &= ~PERF_SAMPLE_REGS_USER; ++ if (event->attr.sample_regs_user & ~PEBS_REGS) ++ flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); + return flags; + } + +--- a/arch/x86/events/perf_event.h ++++ b/arch/x86/events/perf_event.h +@@ -85,13 +85,15 @@ struct amd_nb { + * Flags PEBS can handle without an PMI. + * + * TID can only be handled by flushing at context switch. ++ * REGS_USER can be handled for events limited to ring 3. + * + */ + #define PEBS_FREERUNNING_FLAGS \ + (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ +- PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR) ++ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ ++ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) + + /* + * A debug store configuration. +@@ -110,6 +112,26 @@ struct debug_store { + u64 pebs_event_reset[MAX_PEBS_EVENTS]; + }; + ++#define PEBS_REGS \ ++ (PERF_REG_X86_AX | \ ++ PERF_REG_X86_BX | \ ++ PERF_REG_X86_CX | \ ++ PERF_REG_X86_DX | \ ++ PERF_REG_X86_DI | \ ++ PERF_REG_X86_SI | \ ++ PERF_REG_X86_SP | \ ++ PERF_REG_X86_BP | \ ++ PERF_REG_X86_IP | \ ++ PERF_REG_X86_FLAGS | \ ++ PERF_REG_X86_R8 | \ ++ PERF_REG_X86_R9 | \ ++ PERF_REG_X86_R10 | \ ++ PERF_REG_X86_R11 | \ ++ PERF_REG_X86_R12 | \ ++ PERF_REG_X86_R13 | \ ++ PERF_REG_X86_R14 | \ ++ PERF_REG_X86_R15) ++ + /* + * Per register state. + */ diff --git a/queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch b/queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch new file mode 100644 index 00000000000..db650d0c038 --- /dev/null +++ b/queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch @@ -0,0 +1,104 @@ +From d744dcad39094c9187075e274d1cdef79c57c8b5 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Sat, 4 Nov 2017 04:19:50 -0700 +Subject: selftests/x86/ldt_gdt: Add infrastructure to test set_thread_area() + +From: Andy Lutomirski + +commit d744dcad39094c9187075e274d1cdef79c57c8b5 upstream. + +Much of the test design could apply to set_thread_area() (i.e. GDT), +not just modify_ldt(). Add set_thread_area() to the +install_valid_mode() helper. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/02c23f8fba5547007f741dc24c3926e5284ede02.1509794321.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/x86/ldt_gdt.c | 53 +++++++++++++++++++++++----------- + 1 file changed, 37 insertions(+), 16 deletions(-) + +--- a/tools/testing/selftests/x86/ldt_gdt.c ++++ b/tools/testing/selftests/x86/ldt_gdt.c +@@ -137,30 +137,51 @@ static void check_valid_segment(uint16_t + } + } + +-static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, +- bool oldmode) ++static bool install_valid_mode(const struct user_desc *d, uint32_t ar, ++ bool oldmode, bool ldt) + { +- int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, +- desc, sizeof(*desc)); +- if (ret < -1) +- errno = -ret; ++ struct user_desc desc = *d; ++ int ret; ++ ++ if (!ldt) { ++#ifndef __i386__ ++ /* No point testing set_thread_area in a 64-bit build */ ++ return false; ++#endif ++ if (!gdt_entry_num) ++ return false; ++ desc.entry_number = gdt_entry_num; ++ ++ ret = syscall(SYS_set_thread_area, &desc); ++ } else { ++ ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, ++ &desc, sizeof(desc)); ++ ++ if (ret < -1) ++ errno = -ret; ++ ++ if (ret != 0 && errno == ENOSYS) { ++ printf("[OK]\tmodify_ldt returned -ENOSYS\n"); ++ return false; ++ } ++ } ++ + if (ret == 0) { +- uint32_t limit = desc->limit; +- if (desc->limit_in_pages) ++ uint32_t limit = desc.limit; ++ if (desc.limit_in_pages) + limit = (limit << 12) + 4095; +- check_valid_segment(desc->entry_number, 1, ar, limit, true); ++ check_valid_segment(desc.entry_number, ldt, ar, limit, true); + return true; +- } else if (errno == ENOSYS) { +- printf("[OK]\tmodify_ldt returned -ENOSYS\n"); +- return false; + } else { +- if (desc->seg_32bit) { +- printf("[FAIL]\tUnexpected modify_ldt failure %d\n", ++ if (desc.seg_32bit) { ++ printf("[FAIL]\tUnexpected %s failure %d\n", ++ ldt ? "modify_ldt" : "set_thread_area", + errno); + nerrs++; + return false; + } else { +- printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); ++ printf("[OK]\t%s rejected 16 bit segment\n", ++ ldt ? "modify_ldt" : "set_thread_area"); + return false; + } + } +@@ -168,7 +189,7 @@ static bool install_valid_mode(const str + + static bool install_valid(const struct user_desc *desc, uint32_t ar) + { +- return install_valid_mode(desc, ar, false); ++ return install_valid_mode(desc, ar, false, true); + } + + static void install_invalid(const struct user_desc *desc, bool oldmode) diff --git a/queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch b/queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch new file mode 100644 index 00000000000..79190480675 --- /dev/null +++ b/queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch @@ -0,0 +1,44 @@ +From adedf2893c192dd09b1cc2f2dcfdd7cad99ec49d Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Sat, 4 Nov 2017 04:19:51 -0700 +Subject: selftests/x86/ldt_gdt: Run most existing LDT test cases against the GDT as well + +From: Andy Lutomirski + +commit adedf2893c192dd09b1cc2f2dcfdd7cad99ec49d upstream. + +Now that the main test infrastructure supports the GDT, run tests +that will pass the kernel's GDT permission tests against the GDT. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/686a1eda63414da38fcecc2412db8dba1ae40581.1509794321.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/x86/ldt_gdt.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/x86/ldt_gdt.c ++++ b/tools/testing/selftests/x86/ldt_gdt.c +@@ -189,7 +189,15 @@ static bool install_valid_mode(const str + + static bool install_valid(const struct user_desc *desc, uint32_t ar) + { +- return install_valid_mode(desc, ar, false, true); ++ bool ret = install_valid_mode(desc, ar, false, true); ++ ++ if (desc->contents <= 1 && desc->seg_32bit && ++ !desc->seg_not_present) { ++ /* Should work in the GDT, too. */ ++ install_valid_mode(desc, ar, false, false); ++ } ++ ++ return ret; + } + + static void install_invalid(const struct user_desc *desc, bool oldmode) diff --git a/queue-4.14/series b/queue-4.14/series index 2d0c808f8db..0bc857d9d14 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -32,6 +32,71 @@ x86-cpufeatures-enable-new-sse-avx-avx512-cpu-features.patch x86-mm-relocate-page-fault-error-codes-to-traps.h.patch x86-boot-relocate-definition-of-the-initial-state-of-cr0.patch ptrace-x86-make-user_64bit_mode-available-to-32-bit-builds.patch +x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch +x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch +x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch +x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch +x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch +x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch +x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch +x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch +x86-entry-64-remove-the-restore_..._regs-infrastructure.patch +xen-x86-entry-64-add-xen-nmi-trap-entry.patch +x86-entry-64-de-xen-ify-our-nmi-code.patch +x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch +x86-entry-64-pass-sp0-directly-to-load_sp0.patch +x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch +x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch +x86-entry-64-stop-initializing-tss.sp0-at-boot.patch +x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch +x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch +x86-entry-64-remove-thread_struct-sp0.patch +x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch +x86-entry-64-shorten-test-instructions.patch +x86-cpuid-replace-set-clear_bit32.patch +bitops-revert-cbe96375025e-bitops-add-clear-set_bit32-to-linux-bitops.h.patch +x86-mm-define-_page_table-using-_kernpg_table.patch +x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch +x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch +selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch +selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch +acpi-apei-replace-ioremap_page_range-with-fixmap.patch +x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch +x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch +drivers-misc-intel-pti-rename-the-header-file-to-free-up-the-namespace.patch +x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch +x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch +perf-x86-enable-free-running-pebs-for-regs_user-intr.patch +bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_event.h.patch +locking-barriers-add-implicit-smp_read_barrier_depends-to-read_once.patch +locking-barriers-convert-users-of-lockless_dereference-to-read_once.patch +x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch +x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch +x86-unwinder-orc-dont-bail-on-stack-overflow.patch +x86-unwinder-handle-stack-overflows-more-gracefully.patch +x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch +x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch +x86-entry-64-allocate-and-enable-the-sysenter-stack.patch +x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch +x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch +x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch +x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch +x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch +x86-dumpstack-handle-stack-overflow-on-all-stacks.patch +x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch +x86-entry-remap-the-tss-into-the-cpu-entry-area.patch +x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch +x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch +x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch +x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch +x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch +x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch +x86-entry-64-remove-the-sysenter-stack-canary.patch +x86-entry-clean-up-the-sysenter_stack-code.patch +x86-entry-64-make-cpu_entry_area.tss-read-only.patch +x86-paravirt-dont-patch-flush_tlb_single.patch +x86-paravirt-provide-a-way-to-check-for-hypervisors.patch +x86-cpufeatures-make-cpu-bugs-sticky.patch optee-fix-invalid-of_node_put-in-optee_driver_init.patch backlight-pwm_bl-fix-overflow-condition.patch drm-add-retries-for-lspcon-mode-detection.patch diff --git a/queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch b/queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch new file mode 100644 index 00000000000..d254e003056 --- /dev/null +++ b/queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch @@ -0,0 +1,78 @@ +From a8b4db562e7283a1520f9e9730297ecaab7622ea Mon Sep 17 00:00:00 2001 +From: Ricardo Neri +Date: Sun, 5 Nov 2017 18:27:51 -0800 +Subject: x86/cpufeature: Add User-Mode Instruction Prevention definitions + +From: Ricardo Neri + +commit a8b4db562e7283a1520f9e9730297ecaab7622ea upstream. + +[ Note, this is a Git cherry-pick of the following commit: (limited to the cpufeatures.h file) + + 3522c2a6a4f3 ("x86/cpufeature: Add User-Mode Instruction Prevention definitions") + + ... for easier x86 PTI code testing and back-porting. ] + +User-Mode Instruction Prevention is a security feature present in new +Intel processors that, when set, prevents the execution of a subset of +instructions if such instructions are executed in user mode (CPL > 0). +Attempting to execute such instructions causes a general protection +exception. + +The subset of instructions comprises: + + * SGDT - Store Global Descriptor Table + * SIDT - Store Interrupt Descriptor Table + * SLDT - Store Local Descriptor Table + * SMSW - Store Machine Status Word + * STR - Store Task Register + +This feature is also added to the list of disabled-features to allow +a cleaner handling of build-time configuration. + +Signed-off-by: Ricardo Neri +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Chen Yucong +Cc: Chris Metcalf +Cc: Dave Hansen +Cc: Denys Vlasenko +Cc: Fenghua Yu +Cc: H. Peter Anvin +Cc: Huang Rui +Cc: Jiri Slaby +Cc: Jonathan Corbet +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Masami Hiramatsu +Cc: Michael S. Tsirkin +Cc: Paolo Bonzini +Cc: Paul Gortmaker +Cc: Peter Zijlstra +Cc: Ravi V. Shankar +Cc: Shuah Khan +Cc: Tony Luck +Cc: Vlastimil Babka +Cc: ricardo.neri@intel.com +Link: http://lkml.kernel.org/r/1509935277-22138-7-git-send-email-ricardo.neri-calderon@linux.intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/cpufeatures.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -296,6 +296,7 @@ + + /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ + #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ ++#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ + #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ + #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ + #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ diff --git a/queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch b/queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch new file mode 100644 index 00000000000..d2d7306999c --- /dev/null +++ b/queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch @@ -0,0 +1,359 @@ +From f3a624e901c633593156f7b00ca743a6204a29bc Mon Sep 17 00:00:00 2001 +From: Ingo Molnar +Date: Tue, 31 Oct 2017 13:17:23 +0100 +Subject: x86/cpufeatures: Fix various details in the feature definitions + +From: Ingo Molnar + +commit f3a624e901c633593156f7b00ca743a6204a29bc upstream. + +Kept this commit separate from the re-tabulation changes, to make +the changes easier to review: + + - add better explanation for entries with no explanation + - fix/enhance the text of some of the entries + - fix the vertical alignment of some of the feature number definitions + - fix inconsistent capitalization + - ... and lots of other small details + +i.e. make it all more of a coherent unit, instead of a patchwork of years of additions. + +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20171031121723.28524-4-mingo@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/cpufeatures.h | 149 ++++++++++++++++++------------------- + 1 file changed, 74 insertions(+), 75 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -20,14 +20,12 @@ + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. +- */ +- +-/* ++ * + * When adding new features here that depend on other features, +- * please update the table in kernel/cpu/cpuid-deps.c ++ * please update the table in kernel/cpu/cpuid-deps.c as well. + */ + +-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ ++/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ + #define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ + #define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ + #define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ +@@ -42,8 +40,7 @@ + #define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ + #define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ + #define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ +-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ +- /* (plus FCMOVcc, FCOMI with FPU) */ ++#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ + #define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ + #define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ + #define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ +@@ -63,15 +60,15 @@ + /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ + /* Don't duplicate feature flags which are redundant with Intel! */ + #define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ +-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ ++#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ + #define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ + #define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ + #define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ + #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ + #define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ +-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ +-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ +-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ ++#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ ++#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ ++#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ + + /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ + #define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ +@@ -84,66 +81,67 @@ + #define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ + #define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ + #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ +-/* cpu types for specific tunings: */ ++ ++/* CPU types for specific tunings: */ + #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ + #define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ + #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ + #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ + #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ +-#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ +-#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ ++#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ ++#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ + #define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ + #define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ + #define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ +-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ +-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ +-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ +-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ +-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ ++#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ ++#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ ++#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ ++#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */ ++#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ + #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ + #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ + #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ ++#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ + #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ + #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ + #define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ +-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ +-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ +-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ ++#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ ++#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ ++#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ + #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ + #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ + +-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ ++/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ + #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ + #define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ + #define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +-#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ +-#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ ++#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ ++#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ + #define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ +-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ ++#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ + #define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ + #define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ + #define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ + #define X86_FEATURE_CID ( 4*32+10) /* Context ID */ + #define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ + #define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ +-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ ++#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ + #define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ +-#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ ++#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ + #define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ + #define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ + #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ + #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ +-#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ ++#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ + #define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ + #define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ +-#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ ++#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ + #define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ +-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ ++#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ ++#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ + #define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ +-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ +-#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ ++#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ ++#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ + #define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ + + /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ +@@ -158,10 +156,10 @@ + #define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ + #define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ + +-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ ++/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ + #define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ + #define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ +-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ ++#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ + #define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ + #define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ + #define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ +@@ -175,16 +173,16 @@ + #define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ + #define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ + #define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ +-#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ ++#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ + #define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ +-#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ +-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ +-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ ++#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ ++#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ ++#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ + #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +-#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ +-#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ ++#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ ++#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ + #define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ +-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ ++#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ + + /* + * Auxiliary flags: Linux defined - For features scattered in various +@@ -192,7 +190,7 @@ + * + * Reuse free bits when adding new feature flags! + */ +-#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ ++#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ + #define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ + #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ + #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +@@ -206,8 +204,8 @@ + + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ +-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ ++#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ ++#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ + + #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ + +@@ -218,19 +216,19 @@ + #define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ + #define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ + +-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ ++#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ + #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ + + +-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ +-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ ++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ ++#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ ++#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ + #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ + #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ + #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ + #define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ + #define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ +-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ ++#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ + #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ + #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ + #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +@@ -238,8 +236,8 @@ + #define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ + #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ + #define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ +-#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ +-#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ ++#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ ++#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ + #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ + #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ + #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ +@@ -251,25 +249,25 @@ + #define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ + #define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ + +-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ +-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ +-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ +-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ +-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ ++/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ ++#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ ++#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ ++#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ ++#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ + +-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ ++/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ + #define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ + +-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ +-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ ++/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ ++#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ + #define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ + #define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ + +-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ +-#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ +-#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ ++/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ ++#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ ++#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ + +-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ ++/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ + #define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ + #define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ +@@ -281,7 +279,7 @@ + #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ + #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ + +-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ ++/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ + #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ + #define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ + #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ +@@ -296,24 +294,24 @@ + #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ + #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ + +-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ ++/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ + #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ + #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ + #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ + #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ + #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ + #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ +-#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */ +-#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */ +-#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */ ++#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ ++#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ ++#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ + #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ + #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ + #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ + +-/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ +-#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ +-#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ +-#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ ++/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ ++#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ ++#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ ++#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ + + /* + * BUG word(s) +@@ -340,4 +338,5 @@ + #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ + #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ + #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ ++ + #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch b/queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch new file mode 100644 index 00000000000..b424c07d667 --- /dev/null +++ b/queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch @@ -0,0 +1,95 @@ +From 6cbd2171e89b13377261d15e64384df60ecb530e Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 4 Dec 2017 15:07:32 +0100 +Subject: x86/cpufeatures: Make CPU bugs sticky + +From: Thomas Gleixner + +commit 6cbd2171e89b13377261d15e64384df60ecb530e upstream. + +There is currently no way to force CPU bug bits like CPU feature bits. That +makes it impossible to set a bug bit once at boot and have it stick for all +upcoming CPUs. + +Extend the force set/clear arrays to handle bug bits as well. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Andy Lutomirski +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/cpufeature.h | 2 ++ + arch/x86/include/asm/processor.h | 4 ++-- + arch/x86/kernel/cpu/common.c | 6 +++--- + 3 files changed, 7 insertions(+), 5 deletions(-) + +--- a/arch/x86/include/asm/cpufeature.h ++++ b/arch/x86/include/asm/cpufeature.h +@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo + set_bit(bit, (unsigned long *)cpu_caps_set); \ + } while (0) + ++#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) ++ + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) + /* + * Static testing of CPU features. Used the same as boot_cpu_has(). +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -163,8 +163,8 @@ extern struct cpuinfo_x86 boot_cpu_data; + extern struct cpuinfo_x86 new_cpu_data; + + extern struct x86_hw_tss doublefault_tss; +-extern __u32 cpu_caps_cleared[NCAPINTS]; +-extern __u32 cpu_caps_set[NCAPINTS]; ++extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; ++extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; + + #ifdef CONFIG_SMP + DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -452,8 +452,8 @@ static const char *table_lookup_model(st + return NULL; /* Not found */ + } + +-__u32 cpu_caps_cleared[NCAPINTS]; +-__u32 cpu_caps_set[NCAPINTS]; ++__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; ++__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; + + void load_percpu_segment(int cpu) + { +@@ -812,7 +812,7 @@ static void apply_forced_caps(struct cpu + { + int i; + +- for (i = 0; i < NCAPINTS; i++) { ++ for (i = 0; i < NCAPINTS + NBUGINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } diff --git a/queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch b/queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch new file mode 100644 index 00000000000..e6da5439a0d --- /dev/null +++ b/queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch @@ -0,0 +1,618 @@ +From acbc845ffefd9fb70466182cd8555a26189462b2 Mon Sep 17 00:00:00 2001 +From: Ingo Molnar +Date: Tue, 31 Oct 2017 13:17:22 +0100 +Subject: x86/cpufeatures: Re-tabulate the X86_FEATURE definitions + +From: Ingo Molnar + +commit acbc845ffefd9fb70466182cd8555a26189462b2 upstream. + +Over the years asm/cpufeatures.h has become somewhat of a mess: the original +tabulation style was too narrow, while x86 feature names also kept growing +in length, creating frequent field width overflows. + +Re-tabulate it to make it wider and easier to read/modify. Also harmonize +the tabulation of the other defines in this file to match it. + +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20171031121723.28524-3-mingo@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/cpufeatures.h | 512 ++++++++++++++++++------------------- + 1 file changed, 256 insertions(+), 256 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -13,8 +13,8 @@ + /* + * Defines x86 CPU feature bits + */ +-#define NCAPINTS 18 /* N 32-bit words worth of info */ +-#define NBUGINTS 1 /* N 32-bit bug flags */ ++#define NCAPINTS 18 /* N 32-bit words worth of info */ ++#define NBUGINTS 1 /* N 32-bit bug flags */ + + /* + * Note: If the comment begins with a quoted string, that string is used +@@ -28,163 +28,163 @@ + */ + + /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ +-#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ +-#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ +-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ +-#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ +-#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ +-#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ +-#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ +-#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ +-#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ +-#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ +-#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ +-#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ +-#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ +-#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ +-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ ++#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ ++#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ ++#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ ++#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ ++#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ ++#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ ++#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ ++#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ ++#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ ++#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ ++#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ ++#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ ++#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ ++#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ ++#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ + /* (plus FCMOVcc, FCOMI with FPU) */ +-#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ +-#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ +-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ +-#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +-#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ +-#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ +-#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ +-#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +-#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ +-#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ +-#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ +-#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +-#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ +-#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ +-#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ ++#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ ++#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ ++#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ ++#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ ++#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ ++#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ ++#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ ++#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ ++#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ ++#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ ++#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ ++#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ ++#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ ++#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ ++#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ + + /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ + /* Don't duplicate feature flags which are redundant with Intel! */ +-#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ +-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ +-#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ +-#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ +-#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +-#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ +-#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ +-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ +-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ +-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ ++#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ ++#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ ++#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ ++#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ ++#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ ++#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ ++#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ ++#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ ++#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ ++#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ + + /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +-#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ +-#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ +-#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ ++#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ ++#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ ++#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ + + /* Other features, Linux-defined mapping, word 3 */ + /* This range is used for feature bits which conflict or are synthesized */ +-#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ +-#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ +-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +-#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ ++#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ ++#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ ++#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ ++#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ + /* cpu types for specific tunings: */ +-#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ +-#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +-#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ +-#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ +-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ +-#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ +-#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ +-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ +-#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ +-#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ +-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ +-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ +-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ +-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ +-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ +-#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ +-#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ +-#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ +-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ +-#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ +-#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ +-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ +-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ +-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ +-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ +-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ ++#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ ++#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ ++#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ ++#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ ++#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ ++#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ ++#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ ++#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ ++#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ ++#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ ++#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ ++#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ ++#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ ++#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ ++#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ ++#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ ++#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ ++#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ ++#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ ++#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ ++#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ ++#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ ++#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ ++#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ ++#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ ++#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ ++#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ + + /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +-#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ +-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ +-#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +-#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ +-#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ +-#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ +-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ +-#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ +-#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ +-#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ +-#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ +-#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ +-#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ +-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ +-#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ +-#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ +-#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ +-#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +-#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ +-#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ +-#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ +-#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ +-#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ ++#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ ++#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ ++#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ ++#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ ++#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ ++#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ ++#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ ++#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ ++#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ ++#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ ++#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ ++#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ ++#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ ++#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ ++#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ ++#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ ++#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ ++#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ ++#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ ++#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ ++#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ ++#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ ++#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ + #define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ +-#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ +-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ +-#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ +-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ +-#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ +-#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ ++#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ ++#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ ++#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ ++#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ ++#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ ++#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ ++#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ + + /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ +-#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ +-#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ +-#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +-#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +-#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ +-#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ +-#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ +-#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ +-#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ +-#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ ++#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ ++#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ ++#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ ++#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ ++#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ ++#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ ++#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ ++#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ ++#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ ++#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ + + /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ +-#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ +-#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ +-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ +-#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ +-#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ +-#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ +-#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ +-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ +-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ +-#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ +-#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ +-#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ +-#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ +-#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ +-#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ +-#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ +-#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ +-#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ +-#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ +-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ +-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ +-#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +-#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ +-#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ +-#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ +-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ ++#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ ++#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ ++#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ ++#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ ++#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ ++#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ ++#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ ++#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ ++#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ ++#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ ++#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ ++#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ ++#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ ++#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ ++#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ ++#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ ++#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ ++#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ ++#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ ++#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ ++#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ ++#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ ++#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ ++#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ ++#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ ++#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ + + /* + * Auxiliary flags: Linux defined - For features scattered in various +@@ -192,152 +192,152 @@ + * + * Reuse free bits when adding new feature flags! + */ +-#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ +-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ +-#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ +-#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +-#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ +-#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ +-#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +- +-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ +-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +-#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ +- +-#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ +-#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ +-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ ++#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ ++#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ ++#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ ++#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ ++#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ ++#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ ++#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ ++ ++#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ ++#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ ++#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ ++ ++#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ ++#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ ++#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ ++#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ + +-#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ ++#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ + + /* Virtualization flags: Linux defined, word 8 */ +-#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +-#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ +-#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ +-#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ ++#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ ++#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ ++#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ ++#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ ++#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ + +-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ +-#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ ++#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ ++#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ + + + /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ +-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ +-#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ +-#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ +-#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +-#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ +-#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ +-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ +-#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ +-#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ +-#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +-#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ +-#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ +-#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ +-#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ +-#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ +-#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ +-#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ +-#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ +-#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ +-#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +-#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ +-#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ +-#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +-#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ +-#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ +-#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ ++#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ ++#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ ++#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ ++#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ ++#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ ++#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ ++#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ ++#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ ++#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ ++#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ ++#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ ++#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ ++#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ ++#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ ++#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ ++#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ ++#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ ++#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ ++#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ ++#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ ++#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ ++#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ ++#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ ++#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ ++#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ ++#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ ++#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ + + /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ +-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ +-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ +-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ +-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ ++#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ ++#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ ++#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ ++#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ + + /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ +-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ ++#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ + + /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ +-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ +-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ +-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ ++#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ ++#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ ++#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ + + /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ +-#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ +-#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ ++#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ ++#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ +-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ +-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ +-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ +-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ +-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ +-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ +-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ +-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ +-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ ++#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ ++#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ ++#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ ++#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ ++#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ ++#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ ++#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ ++#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ ++#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ ++#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ + + /* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ +-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ +-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +-#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ +-#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ +-#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ +-#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ +-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ +-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ +-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ +-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ +-#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ +-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ +-#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ ++#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ ++#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ ++#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ ++#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ ++#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ ++#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ ++#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ ++#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ ++#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ ++#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ ++#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ ++#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ ++#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ + + /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ +-#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ +-#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ +-#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +-#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ +-#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ +-#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ +-#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */ +-#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */ +-#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */ +-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ +-#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +-#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ ++#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ ++#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ ++#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ ++#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ ++#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ ++#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ ++#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */ ++#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */ ++#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */ ++#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ ++#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ ++#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ + + /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ +-#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ +-#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ +-#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ ++#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ ++#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ ++#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ + + /* + * BUG word(s) + */ +-#define X86_BUG(x) (NCAPINTS*32 + (x)) ++#define X86_BUG(x) (NCAPINTS*32 + (x)) + +-#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ +-#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ +-#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +-#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ +-#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ +-#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ +-#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ +-#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ +-#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ ++#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ ++#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ ++#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ ++#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ ++#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ ++#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ ++#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ ++#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ ++#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ + #ifdef CONFIG_X86_32 + /* + * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional + * to avoid confusion. + */ +-#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ ++#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ + #endif +-#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ +-#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ +-#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ +-#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ ++#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ ++#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ ++#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ ++#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ + #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/queue-4.14/x86-cpuid-replace-set-clear_bit32.patch b/queue-4.14/x86-cpuid-replace-set-clear_bit32.patch new file mode 100644 index 00000000000..8e39fa5dc3e --- /dev/null +++ b/queue-4.14/x86-cpuid-replace-set-clear_bit32.patch @@ -0,0 +1,62 @@ +From 06dd688ddda5819025e014b79aea9af6ab475fa2 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 2 Nov 2017 13:22:35 +0100 +Subject: x86/cpuid: Replace set/clear_bit32() + +From: Thomas Gleixner + +commit 06dd688ddda5819025e014b79aea9af6ab475fa2 upstream. + +Peter pointed out that the set/clear_bit32() variants are broken in various +aspects. + +Replace them with open coded set/clear_bit() and type cast +cpu_info::x86_capability as it's done in all other places throughout x86. + +Fixes: 0b00de857a64 ("x86/cpuid: Add generic table for CPUID dependencies") +Reported-by: Peter Ziljstra +Signed-off-by: Thomas Gleixner +Cc: Andi Kleen +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/cpuid-deps.c | 26 +++++++++++--------------- + 1 file changed, 11 insertions(+), 15 deletions(-) + +--- a/arch/x86/kernel/cpu/cpuid-deps.c ++++ b/arch/x86/kernel/cpu/cpuid-deps.c +@@ -62,23 +62,19 @@ const static struct cpuid_dep cpuid_deps + {} + }; + +-static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit) +-{ +- clear_bit32(bit, c->x86_capability); +-} +- +-static inline void __setup_clear_cpu_cap(unsigned int bit) +-{ +- clear_cpu_cap(&boot_cpu_data, bit); +- set_bit32(bit, cpu_caps_cleared); +-} +- + static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature) + { +- if (!c) +- __setup_clear_cpu_cap(feature); +- else +- __clear_cpu_cap(c, feature); ++ /* ++ * Note: This could use the non atomic __*_bit() variants, but the ++ * rest of the cpufeature code uses atomics as well, so keep it for ++ * consistency. Cleanup all of it separately. ++ */ ++ if (!c) { ++ clear_cpu_cap(&boot_cpu_data, feature); ++ set_bit(feature, (unsigned long *)cpu_caps_cleared); ++ } else { ++ clear_bit(feature, (unsigned long *)c->x86_capability); ++ } + } + + /* Take the capabilities and the BUG bits into account */ diff --git a/queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch b/queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch new file mode 100644 index 00000000000..ecbb43c0256 --- /dev/null +++ b/queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch @@ -0,0 +1,169 @@ +From 33a2f1a6c4d7c0a02d1c006fb0379cc5ca3b96bb Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:13 +0100 +Subject: x86/dumpstack: Add get_stack_info() support for the SYSENTER stack + +From: Andy Lutomirski + +commit 33a2f1a6c4d7c0a02d1c006fb0379cc5ca3b96bb upstream. + +get_stack_info() doesn't currently know about the SYSENTER stack, so +unwinding will fail if we entered the kernel on the SYSENTER stack +and haven't fully switched off. Teach get_stack_info() about the +SYSENTER stack. + +With future patches applied that run part of the entry code on the +SYSENTER stack and introduce an intentional BUG(), I would get: + + PANIC: double fault, error_code: 0x0 + ... + RIP: 0010:do_error_trap+0x33/0x1c0 + ... + Call Trace: + Code: ... + +With this patch, I get: + + PANIC: double fault, error_code: 0x0 + ... + Call Trace: + + ? async_page_fault+0x36/0x60 + ? invalid_op+0x22/0x40 + ? async_page_fault+0x36/0x60 + ? sync_regs+0x3c/0x40 + ? sync_regs+0x2e/0x40 + ? error_entry+0x6c/0xd0 + ? async_page_fault+0x36/0x60 + + Code: ... + +which is a lot more informative. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.392711508@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h +index 8da111b3c342..f8062bfd43a0 100644 +--- a/arch/x86/include/asm/stacktrace.h ++++ b/arch/x86/include/asm/stacktrace.h +@@ -16,6 +16,7 @@ enum stack_type { + STACK_TYPE_TASK, + STACK_TYPE_IRQ, + STACK_TYPE_SOFTIRQ, ++ STACK_TYPE_SYSENTER, + STACK_TYPE_EXCEPTION, + STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, + }; +@@ -28,6 +29,8 @@ struct stack_info { + bool in_task_stack(unsigned long *stack, struct task_struct *task, + struct stack_info *info); + ++bool in_sysenter_stack(unsigned long *stack, struct stack_info *info); ++ + int get_stack_info(unsigned long *stack, struct task_struct *task, + struct stack_info *info, unsigned long *visit_mask); + +diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c +index 0bc95be5c638..a33a1373a252 100644 +--- a/arch/x86/kernel/dumpstack.c ++++ b/arch/x86/kernel/dumpstack.c +@@ -43,6 +43,25 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, + return true; + } + ++bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) ++{ ++ struct tss_struct *tss = this_cpu_ptr(&cpu_tss); ++ ++ /* Treat the canary as part of the stack for unwinding purposes. */ ++ void *begin = &tss->SYSENTER_stack_canary; ++ void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack); ++ ++ if ((void *)stack < begin || (void *)stack >= end) ++ return false; ++ ++ info->type = STACK_TYPE_SYSENTER; ++ info->begin = begin; ++ info->end = end; ++ info->next_sp = NULL; ++ ++ return true; ++} ++ + static void printk_stack_address(unsigned long address, int reliable, + char *log_lvl) + { +diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c +index daefae83a3aa..5ff13a6b3680 100644 +--- a/arch/x86/kernel/dumpstack_32.c ++++ b/arch/x86/kernel/dumpstack_32.c +@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type) + if (type == STACK_TYPE_SOFTIRQ) + return "SOFTIRQ"; + ++ if (type == STACK_TYPE_SYSENTER) ++ return "SYSENTER"; ++ + return NULL; + } + +@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, + if (task != current) + goto unknown; + ++ if (in_sysenter_stack(stack, info)) ++ goto recursion_check; ++ + if (in_hardirq_stack(stack, info)) + goto recursion_check; + +diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c +index 88ce2ffdb110..abc828f8c297 100644 +--- a/arch/x86/kernel/dumpstack_64.c ++++ b/arch/x86/kernel/dumpstack_64.c +@@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type) + if (type == STACK_TYPE_IRQ) + return "IRQ"; + ++ if (type == STACK_TYPE_SYSENTER) ++ return "SYSENTER"; ++ + if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) + return exception_stack_names[type - STACK_TYPE_EXCEPTION]; + +@@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, + if (in_irq_stack(stack, info)) + goto recursion_check; + ++ if (in_sysenter_stack(stack, info)) ++ goto recursion_check; ++ + goto unknown; + + recursion_check: diff --git a/queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch b/queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch new file mode 100644 index 00000000000..05e9c5fb89e --- /dev/null +++ b/queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch @@ -0,0 +1,87 @@ +From 6e60e583426c2f8751c22c2dfe5c207083b4483a Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:18 +0100 +Subject: x86/dumpstack: Handle stack overflow on all stacks + +From: Andy Lutomirski + +commit 6e60e583426c2f8751c22c2dfe5c207083b4483a upstream. + +We currently special-case stack overflow on the task stack. We're +going to start putting special stacks in the fixmap with a custom +layout, so they'll have guard pages, too. Teach the unwinder to be +able to unwind an overflow of any of the stacks. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.802057305@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/dumpstack.c | 24 ++++++++++++++---------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +--- a/arch/x86/kernel/dumpstack.c ++++ b/arch/x86/kernel/dumpstack.c +@@ -112,24 +112,28 @@ void show_trace_log_lvl(struct task_stru + * - task stack + * - interrupt stack + * - HW exception stacks (double fault, nmi, debug, mce) ++ * - SYSENTER stack + * +- * x86-32 can have up to three stacks: ++ * x86-32 can have up to four stacks: + * - task stack + * - softirq stack + * - hardirq stack ++ * - SYSENTER stack + */ + for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + const char *stack_name; + +- /* +- * If we overflowed the task stack into a guard page, jump back +- * to the bottom of the usable stack. +- */ +- if (task_stack_page(task) - (void *)stack < PAGE_SIZE) +- stack = task_stack_page(task); +- +- if (get_stack_info(stack, task, &stack_info, &visit_mask)) +- break; ++ if (get_stack_info(stack, task, &stack_info, &visit_mask)) { ++ /* ++ * We weren't on a valid stack. It's possible that ++ * we overflowed a valid stack into a guard page. ++ * See if the next page up is valid so that we can ++ * generate some kind of backtrace if this happens. ++ */ ++ stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack); ++ if (get_stack_info(stack, task, &stack_info, &visit_mask)) ++ break; ++ } + + stack_name = stack_type_name(stack_info.type); + if (stack_name) diff --git a/queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch b/queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch new file mode 100644 index 00000000000..9d7a709e883 --- /dev/null +++ b/queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch @@ -0,0 +1,41 @@ +From cd493a6deb8b78eca280d05f7fa73fd69403ae29 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:15 -0700 +Subject: x86/entry/32: Fix cpu_current_top_of_stack initialization at boot + +From: Andy Lutomirski + +commit cd493a6deb8b78eca280d05f7fa73fd69403ae29 upstream. + +cpu_current_top_of_stack's initialization forgot about +TOP_OF_KERNEL_STACK_PADDING. This bug didn't matter because the +idle threads never enter user mode. + +Signed-off-by: Andy Lutomirski +Reviewed-by: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/e5e370a7e6e4fddd1c4e4cf619765d96bb874b21.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/smpboot.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, str + #ifdef CONFIG_X86_32 + /* Stack for startup_32 can be just as for start_secondary onwards */ + irq_ctx_init(cpu); +- per_cpu(cpu_current_top_of_stack, cpu) = +- (unsigned long)task_stack_page(idle) + THREAD_SIZE; ++ per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); + #else + initial_gs = per_cpu_offset(cpu); + #endif diff --git a/queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch b/queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch new file mode 100644 index 00000000000..1e6a7be5f7f --- /dev/null +++ b/queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch @@ -0,0 +1,127 @@ +From bd7dc5a6afac719d8ce4092391eef2c7e83c2a75 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:09 -0700 +Subject: x86/entry/32: Pull the MSR_IA32_SYSENTER_CS update code out of native_load_sp0() + +From: Andy Lutomirski + +commit bd7dc5a6afac719d8ce4092391eef2c7e83c2a75 upstream. + +This causes the MSR_IA32_SYSENTER_CS write to move out of the +paravirt callback. This shouldn't affect Xen PV: Xen already ignores +MSR_IA32_SYSENTER_ESP writes. In any event, Xen doesn't support +vm86() in a useful way. + +Note to any potential backporters: This patch won't break lguest, as +lguest didn't have any SYSENTER support at all. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/75cf09fe03ae778532d0ca6c65aa58e66bc2f90c.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/processor.h | 7 ------- + arch/x86/include/asm/switch_to.h | 12 ++++++++++++ + arch/x86/kernel/process_32.c | 4 +++- + arch/x86/kernel/process_64.c | 2 +- + arch/x86/kernel/vm86_32.c | 6 +++++- + 5 files changed, 21 insertions(+), 10 deletions(-) + +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -521,13 +521,6 @@ static inline void + native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) + { + tss->x86_tss.sp0 = thread->sp0; +-#ifdef CONFIG_X86_32 +- /* Only happens when SEP is enabled, no need to test "SEP"arately: */ +- if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { +- tss->x86_tss.ss1 = thread->sysenter_cs; +- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); +- } +-#endif + } + + static inline void native_swapgs(void) +--- a/arch/x86/include/asm/switch_to.h ++++ b/arch/x86/include/asm/switch_to.h +@@ -73,4 +73,16 @@ do { \ + ((last) = __switch_to_asm((prev), (next))); \ + } while (0) + ++#ifdef CONFIG_X86_32 ++static inline void refresh_sysenter_cs(struct thread_struct *thread) ++{ ++ /* Only happens when SEP is enabled, no need to test "SEP"arately: */ ++ if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) ++ return; ++ ++ this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); ++ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); ++} ++#endif ++ + #endif /* _ASM_X86_SWITCH_TO_H */ +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, + + /* + * Reload esp0 and cpu_current_top_of_stack. This changes +- * current_thread_info(). ++ * current_thread_info(). Refresh the SYSENTER configuration in ++ * case prev or next is vm86. + */ + load_sp0(tss, next); ++ refresh_sysenter_cs(next); + this_cpu_write(cpu_current_top_of_stack, + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE); +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -464,7 +464,7 @@ __switch_to(struct task_struct *prev_p, + */ + this_cpu_write(current_task, next_p); + +- /* Reload esp0 and ss1. This changes current_thread_info(). */ ++ /* Reload sp0. */ + load_sp0(tss, next); + + /* +--- a/arch/x86/kernel/vm86_32.c ++++ b/arch/x86/kernel/vm86_32.c +@@ -55,6 +55,7 @@ + #include + #include + #include ++#include + + /* + * Known problems: +@@ -150,6 +151,7 @@ void save_v86_state(struct kernel_vm86_r + tsk->thread.sp0 = vm86->saved_sp0; + tsk->thread.sysenter_cs = __KERNEL_CS; + load_sp0(tss, &tsk->thread); ++ refresh_sysenter_cs(&tsk->thread); + vm86->saved_sp0 = 0; + put_cpu(); + +@@ -369,8 +371,10 @@ static long do_sys_vm86(struct vm86plus_ + /* make room for real-mode segments */ + tsk->thread.sp0 += 16; + +- if (static_cpu_has(X86_FEATURE_SEP)) ++ if (static_cpu_has(X86_FEATURE_SEP)) { + tsk->thread.sysenter_cs = 0; ++ refresh_sysenter_cs(&tsk->thread); ++ } + + load_sp0(tss, &tsk->thread); + put_cpu(); diff --git a/queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch b/queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch new file mode 100644 index 00000000000..60c9cb55651 --- /dev/null +++ b/queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch @@ -0,0 +1,161 @@ +From 1a79797b58cddfa948420a7553241c79c013e3ca Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:12 +0100 +Subject: x86/entry/64: Allocate and enable the SYSENTER stack + +From: Andy Lutomirski + +commit 1a79797b58cddfa948420a7553241c79c013e3ca upstream. + +This will simplify future changes that want scratch variables early in +the SYSENTER handler -- they'll be able to spill registers to the +stack. It also lets us get rid of a SWAPGS_UNSAFE_STACK user. + +This does not depend on CONFIG_IA32_EMULATION=y because we'll want the +stack space even without IA32 emulation. + +As far as I can tell, the reason that this wasn't done from day 1 is +that we use IST for #DB and #BP, which is IMO rather nasty and causes +a lot more problems than it solves. But, since #DB uses IST, we don't +actually need a real stack for SYSENTER (because SYSENTER with TF set +will invoke #DB on the IST stack rather than the SYSENTER stack). + +I want to remove IST usage from these vectors some day, and this patch +is a prerequisite for that as well. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.312726423@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64_compat.S | 2 +- + arch/x86/include/asm/processor.h | 3 --- + arch/x86/kernel/asm-offsets.c | 5 +++++ + arch/x86/kernel/asm-offsets_32.c | 5 ----- + arch/x86/kernel/cpu/common.c | 4 +++- + arch/x86/kernel/process.c | 2 -- + arch/x86/kernel/traps.c | 3 +-- + 7 files changed, 10 insertions(+), 14 deletions(-) + +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -48,7 +48,7 @@ + */ + ENTRY(entry_SYSENTER_compat) + /* Interrupts are off on entry. */ +- SWAPGS_UNSAFE_STACK ++ SWAPGS + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + /* +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -339,14 +339,11 @@ struct tss_struct { + */ + unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; + +-#ifdef CONFIG_X86_32 + /* + * Space for the temporary SYSENTER stack. + */ + unsigned long SYSENTER_stack_canary; + unsigned long SYSENTER_stack[64]; +-#endif +- + } ____cacheline_aligned; + + DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); +--- a/arch/x86/kernel/asm-offsets.c ++++ b/arch/x86/kernel/asm-offsets.c +@@ -93,4 +93,9 @@ void common(void) { + + BLANK(); + DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); ++ ++ /* Offset from cpu_tss to SYSENTER_stack */ ++ OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); ++ /* Size of SYSENTER_stack */ ++ DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); + } +--- a/arch/x86/kernel/asm-offsets_32.c ++++ b/arch/x86/kernel/asm-offsets_32.c +@@ -50,11 +50,6 @@ void foo(void) + DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - + offsetofend(struct tss_struct, SYSENTER_stack)); + +- /* Offset from cpu_tss to SYSENTER_stack */ +- OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); +- /* Size of SYSENTER_stack */ +- DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); +- + #ifdef CONFIG_CC_STACKPROTECTOR + BLANK(); + OFFSET(stack_canary_offset, stack_canary, canary); +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1361,7 +1361,9 @@ void syscall_init(void) + * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). + */ + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); +- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); ++ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, ++ (unsigned long)this_cpu_ptr(&cpu_tss) + ++ offsetofend(struct tss_struct, SYSENTER_stack)); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); + #else + wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -71,9 +71,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED( + */ + .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, + #endif +-#ifdef CONFIG_X86_32 + .SYSENTER_stack_canary = STACK_END_MAGIC, +-#endif + }; + EXPORT_PER_CPU_SYMBOL(cpu_tss); + +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -794,14 +794,13 @@ dotraplinkage void do_debug(struct pt_re + debug_stack_usage_dec(); + + exit: +-#if defined(CONFIG_X86_32) + /* + * This is the most likely code path that involves non-trivial use + * of the SYSENTER stack. Check that we haven't overrun it. + */ + WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, + "Overran or corrupted SYSENTER stack\n"); +-#endif ++ + ist_exit(regs); + } + NOKPROBE_SYMBOL(do_debug); diff --git a/queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch b/queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch new file mode 100644 index 00000000000..a19a3a7b1fb --- /dev/null +++ b/queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch @@ -0,0 +1,224 @@ +From 3386bc8aed825e9f1f65ce38df4b109b2019b71a Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:25 +0100 +Subject: x86/entry/64: Create a per-CPU SYSCALL entry trampoline + +From: Andy Lutomirski + +commit 3386bc8aed825e9f1f65ce38df4b109b2019b71a upstream. + +Handling SYSCALL is tricky: the SYSCALL handler is entered with every +single register (except FLAGS), including RSP, live. It somehow needs +to set RSP to point to a valid stack, which means it needs to save the +user RSP somewhere and find its own stack pointer. The canonical way +to do this is with SWAPGS, which lets us access percpu data using the +%gs prefix. + +With PAGE_TABLE_ISOLATION-like pagetable switching, this is +problematic. Without a scratch register, switching CR3 is impossible, so +%gs-based percpu memory would need to be mapped in the user pagetables. +Doing that without information leaks is difficult or impossible. + +Instead, use a different sneaky trick. Map a copy of the first part +of the SYSCALL asm at a different address for each CPU. Now RIP +varies depending on the CPU, so we can use RIP-relative memory access +to access percpu memory. By putting the relevant information (one +scratch slot and the stack address) at a constant offset relative to +RIP, we can make SYSCALL work without relying on %gs. + +A nice thing about this approach is that we can easily switch it on +and off if we want pagetable switching to be configurable. + +The compat variant of SYSCALL doesn't have this problem in the first +place -- there are plenty of scratch registers, since we don't care +about preserving r8-r15. This patch therefore doesn't touch SYSCALL32 +at all. + +This patch actually seems to be a small speedup. With this patch, +SYSCALL touches an extra cache line and an extra virtual page, but +the pipeline no longer stalls waiting for SWAPGS. It seems that, at +least in a tight loop, the latter outweights the former. + +Thanks to David Laight for an optimization tip. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.403607157@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 58 ++++++++++++++++++++++++++++++++++++++++++ + arch/x86/include/asm/fixmap.h | 2 + + arch/x86/kernel/asm-offsets.c | 1 + arch/x86/kernel/cpu/common.c | 15 ++++++++++ + arch/x86/kernel/vmlinux.lds.S | 9 ++++++ + 5 files changed, 84 insertions(+), 1 deletion(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -136,6 +136,64 @@ END(native_usergs_sysret64) + * with them due to bugs in both AMD and Intel CPUs. + */ + ++ .pushsection .entry_trampoline, "ax" ++ ++/* ++ * The code in here gets remapped into cpu_entry_area's trampoline. This means ++ * that the assembler and linker have the wrong idea as to where this code ++ * lives (and, in fact, it's mapped more than once, so it's not even at a ++ * fixed address). So we can't reference any symbols outside the entry ++ * trampoline and expect it to work. ++ * ++ * Instead, we carefully abuse %rip-relative addressing. ++ * _entry_trampoline(%rip) refers to the start of the remapped) entry ++ * trampoline. We can thus find cpu_entry_area with this macro: ++ */ ++ ++#define CPU_ENTRY_AREA \ ++ _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) ++ ++/* The top word of the SYSENTER stack is hot and is usable as scratch space. */ ++#define RSP_SCRATCH CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \ ++ SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA ++ ++ENTRY(entry_SYSCALL_64_trampoline) ++ UNWIND_HINT_EMPTY ++ swapgs ++ ++ /* Stash the user RSP. */ ++ movq %rsp, RSP_SCRATCH ++ ++ /* Load the top of the task stack into RSP */ ++ movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp ++ ++ /* Start building the simulated IRET frame. */ ++ pushq $__USER_DS /* pt_regs->ss */ ++ pushq RSP_SCRATCH /* pt_regs->sp */ ++ pushq %r11 /* pt_regs->flags */ ++ pushq $__USER_CS /* pt_regs->cs */ ++ pushq %rcx /* pt_regs->ip */ ++ ++ /* ++ * x86 lacks a near absolute jump, and we can't jump to the real ++ * entry text with a relative jump. We could push the target ++ * address and then use retq, but this destroys the pipeline on ++ * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead, ++ * spill RDI and restore it in a second-stage trampoline. ++ */ ++ pushq %rdi ++ movq $entry_SYSCALL_64_stage2, %rdi ++ jmp *%rdi ++END(entry_SYSCALL_64_trampoline) ++ ++ .popsection ++ ++ENTRY(entry_SYSCALL_64_stage2) ++ UNWIND_HINT_EMPTY ++ popq %rdi ++ jmp entry_SYSCALL_64_after_hwframe ++END(entry_SYSCALL_64_stage2) ++ + ENTRY(entry_SYSCALL_64) + UNWIND_HINT_EMPTY + /* +--- a/arch/x86/include/asm/fixmap.h ++++ b/arch/x86/include/asm/fixmap.h +@@ -61,6 +61,8 @@ struct cpu_entry_area { + * of the TSS region. + */ + struct tss_struct tss; ++ ++ char entry_trampoline[PAGE_SIZE]; + }; + + #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) +--- a/arch/x86/kernel/asm-offsets.c ++++ b/arch/x86/kernel/asm-offsets.c +@@ -101,4 +101,5 @@ void common(void) { + + /* Layout info for cpu_entry_area */ + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); ++ OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); + } +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -486,6 +486,8 @@ DEFINE_PER_CPU(struct cpu_entry_area *, + static inline void setup_cpu_entry_area(int cpu) + { + #ifdef CONFIG_X86_64 ++ extern char _entry_trampoline[]; ++ + /* On 64-bit systems, we use a read-only fixmap GDT. */ + pgprot_t gdt_prot = PAGE_KERNEL_RO; + #else +@@ -532,6 +534,11 @@ static inline void setup_cpu_entry_area( + #ifdef CONFIG_X86_32 + this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu)); + #endif ++ ++#ifdef CONFIG_X86_64 ++ __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), ++ __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); ++#endif + } + + /* Load the original GDT from the per-cpu structure */ +@@ -1395,10 +1402,16 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, + /* May not be marked __init: used by software suspend */ + void syscall_init(void) + { ++ extern char _entry_trampoline[]; ++ extern char entry_SYSCALL_64_trampoline[]; ++ + int cpu = smp_processor_id(); ++ unsigned long SYSCALL64_entry_trampoline = ++ (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + ++ (entry_SYSCALL_64_trampoline - _entry_trampoline); + + wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); +- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); ++ wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); + + #ifdef CONFIG_IA32_EMULATION + wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -107,6 +107,15 @@ SECTIONS + SOFTIRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) ++ ++#ifdef CONFIG_X86_64 ++ . = ALIGN(PAGE_SIZE); ++ _entry_trampoline = .; ++ *(.entry_trampoline) ++ . = ALIGN(PAGE_SIZE); ++ ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); ++#endif ++ + /* End of text section */ + _etext = .; + } :text = 0x9090 diff --git a/queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch b/queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch new file mode 100644 index 00000000000..d1568620f32 --- /dev/null +++ b/queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch @@ -0,0 +1,108 @@ +From 929bacec21478a72c78e4f29f98fb799bd00105a Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:08 -0700 +Subject: x86/entry/64: De-Xen-ify our NMI code + +From: Andy Lutomirski + +commit 929bacec21478a72c78e4f29f98fb799bd00105a upstream. + +Xen PV is fundamentally incompatible with our fancy NMI code: it +doesn't use IST at all, and Xen entries clobber two stack slots +below the hardware frame. + +Drop Xen PV support from our NMI code entirely. + +Signed-off-by: Andy Lutomirski +Reviewed-by: Borislav Petkov +Acked-by: Juergen Gross +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/bfbe711b5ae03f672f8848999a8eb2711efc7f98.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 30 ++++++++++++++++++------------ + 1 file changed, 18 insertions(+), 12 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1241,9 +1241,13 @@ ENTRY(error_exit) + jmp retint_user + END(error_exit) + +-/* Runs on exception stack */ ++/* ++ * Runs on exception stack. Xen PV does not go through this path at all, ++ * so we can use real assembly here. ++ */ + ENTRY(nmi) + UNWIND_HINT_IRET_REGS ++ + /* + * We allow breakpoints in NMIs. If a breakpoint occurs, then + * the iretq it performs will take us out of NMI context. +@@ -1301,7 +1305,7 @@ ENTRY(nmi) + * stacks lest we corrupt the "NMI executing" variable. + */ + +- SWAPGS_UNSAFE_STACK ++ swapgs + cld + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp +@@ -1466,7 +1470,7 @@ nested_nmi_out: + popq %rdx + + /* We are returning to kernel mode, so this cannot result in a fault. */ +- INTERRUPT_RETURN ++ iretq + + first_nmi: + /* Restore rdx. */ +@@ -1497,7 +1501,7 @@ first_nmi: + pushfq /* RFLAGS */ + pushq $__KERNEL_CS /* CS */ + pushq $1f /* RIP */ +- INTERRUPT_RETURN /* continues at repeat_nmi below */ ++ iretq /* continues at repeat_nmi below */ + UNWIND_HINT_IRET_REGS + 1: + #endif +@@ -1572,20 +1576,22 @@ nmi_restore: + /* + * Clear "NMI executing". Set DF first so that we can easily + * distinguish the remaining code between here and IRET from +- * the SYSCALL entry and exit paths. On a native kernel, we +- * could just inspect RIP, but, on paravirt kernels, +- * INTERRUPT_RETURN can translate into a jump into a +- * hypercall page. ++ * the SYSCALL entry and exit paths. ++ * ++ * We arguably should just inspect RIP instead, but I (Andy) wrote ++ * this code when I had the misapprehension that Xen PV supported ++ * NMIs, and Xen PV would break that approach. + */ + std + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + + /* +- * INTERRUPT_RETURN reads the "iret" frame and exits the NMI +- * stack in a single instruction. We are returning to kernel +- * mode, so this cannot result in a fault. ++ * iretq reads the "iret" frame and exits the NMI stack in a ++ * single instruction. We are returning to kernel mode, so this ++ * cannot result in a fault. Similarly, we don't need to worry ++ * about espfix64 on the way back to kernel mode. + */ +- INTERRUPT_RETURN ++ iretq + END(nmi) + + ENTRY(ignore_sysret) diff --git a/queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch b/queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch new file mode 100644 index 00000000000..6248d7690b6 --- /dev/null +++ b/queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch @@ -0,0 +1,453 @@ +From c482feefe1aeb150156248ba0fd3e029bc886605 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:29 +0100 +Subject: x86/entry/64: Make cpu_entry_area.tss read-only + +From: Andy Lutomirski + +commit c482feefe1aeb150156248ba0fd3e029bc886605 upstream. + +The TSS is a fairly juicy target for exploits, and, now that the TSS +is in the cpu_entry_area, it's no longer protected by kASLR. Make it +read-only on x86_64. + +On x86_32, it can't be RO because it's written by the CPU during task +switches, and we use a task gate for double faults. I'd also be +nervous about errata if we tried to make it RO even on configurations +without double fault handling. + +[ tglx: AMD confirmed that there is no problem on 64-bit with TSS RO. So + it's probably safe to assume that it's a non issue, though Intel + might have been creative in that area. Still waiting for + confirmation. ] + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Kees Cook +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.733700132@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_32.S | 4 ++-- + arch/x86/entry/entry_64.S | 8 ++++---- + arch/x86/include/asm/fixmap.h | 13 +++++++++---- + arch/x86/include/asm/processor.h | 17 ++++++++--------- + arch/x86/include/asm/switch_to.h | 4 ++-- + arch/x86/include/asm/thread_info.h | 2 +- + arch/x86/kernel/asm-offsets.c | 5 ++--- + arch/x86/kernel/asm-offsets_32.c | 4 ++-- + arch/x86/kernel/cpu/common.c | 29 +++++++++++++++++++---------- + arch/x86/kernel/ioport.c | 2 +- + arch/x86/kernel/process.c | 6 +++--- + arch/x86/kernel/process_32.c | 2 +- + arch/x86/kernel/process_64.c | 2 +- + arch/x86/kernel/traps.c | 4 ++-- + arch/x86/lib/delay.c | 4 ++-- + arch/x86/xen/enlighten_pv.c | 2 +- + 16 files changed, 60 insertions(+), 48 deletions(-) + +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -942,7 +942,7 @@ ENTRY(debug) + + /* Are we currently on the SYSENTER stack? */ + movl PER_CPU_VAR(cpu_entry_area), %ecx +- addl $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx ++ addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx + subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ + cmpl $SIZEOF_SYSENTER_stack, %ecx + jb .Ldebug_from_sysenter_stack +@@ -986,7 +986,7 @@ ENTRY(nmi) + + /* Are we currently on the SYSENTER stack? */ + movl PER_CPU_VAR(cpu_entry_area), %ecx +- addl $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx ++ addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx + subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ + cmpl $SIZEOF_SYSENTER_stack, %ecx + jb .Lnmi_from_sysenter_stack +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -154,7 +154,7 @@ END(native_usergs_sysret64) + _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) + + /* The top word of the SYSENTER stack is hot and is usable as scratch space. */ +-#define RSP_SCRATCH CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + \ ++#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \ + SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA + + ENTRY(entry_SYSCALL_64_trampoline) +@@ -390,7 +390,7 @@ syscall_return_via_sysret: + * Save old stack pointer and switch to trampoline stack. + */ + movq %rsp, %rdi +- movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp ++ movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + + pushq RSP-RDI(%rdi) /* RSP */ + pushq (%rdi) /* RDI */ +@@ -719,7 +719,7 @@ GLOBAL(swapgs_restore_regs_and_return_to + * Save old stack pointer and switch to trampoline stack. + */ + movq %rsp, %rdi +- movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp ++ movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + + /* Copy the IRET frame to the trampoline stack. */ + pushq 6*8(%rdi) /* SS */ +@@ -934,7 +934,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work + /* + * Exception entry points. + */ +-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) ++#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) + + /* + * Switch to the thread stack. This is called with the IRET frame and +--- a/arch/x86/include/asm/fixmap.h ++++ b/arch/x86/include/asm/fixmap.h +@@ -56,9 +56,14 @@ struct cpu_entry_area { + char gdt[PAGE_SIZE]; + + /* +- * The GDT is just below cpu_tss and thus serves (on x86_64) as a +- * a read-only guard page for the SYSENTER stack at the bottom +- * of the TSS region. ++ * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as ++ * a a read-only guard page. ++ */ ++ struct SYSENTER_stack_page SYSENTER_stack_page; ++ ++ /* ++ * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because ++ * we need task switches to work, and task switches write to the TSS. + */ + struct tss_struct tss; + +@@ -247,7 +252,7 @@ static inline struct cpu_entry_area *get + + static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu) + { +- return &get_cpu_entry_area(cpu)->tss.SYSENTER_stack; ++ return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack; + } + + #endif /* !__ASSEMBLY__ */ +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -340,13 +340,11 @@ struct SYSENTER_stack { + unsigned long words[64]; + }; + +-struct tss_struct { +- /* +- * Space for the temporary SYSENTER stack, used for SYSENTER +- * and the entry trampoline as well. +- */ +- struct SYSENTER_stack SYSENTER_stack; ++struct SYSENTER_stack_page { ++ struct SYSENTER_stack stack; ++} __aligned(PAGE_SIZE); + ++struct tss_struct { + /* + * The fixed hardware portion. This must not cross a page boundary + * at risk of violating the SDM's advice and potentially triggering +@@ -363,7 +361,7 @@ struct tss_struct { + unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; + } __aligned(PAGE_SIZE); + +-DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss); ++DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); + + /* + * sizeof(unsigned long) coming from an extra "long" at the end +@@ -378,7 +376,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_ + #ifdef CONFIG_X86_32 + DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); + #else +-#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1 ++/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */ ++#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 + #endif + + /* +@@ -538,7 +537,7 @@ static inline void native_set_iopl_mask( + static inline void + native_load_sp0(unsigned long sp0) + { +- this_cpu_write(cpu_tss.x86_tss.sp0, sp0); ++ this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); + } + + static inline void native_swapgs(void) +--- a/arch/x86/include/asm/switch_to.h ++++ b/arch/x86/include/asm/switch_to.h +@@ -79,10 +79,10 @@ do { \ + static inline void refresh_sysenter_cs(struct thread_struct *thread) + { + /* Only happens when SEP is enabled, no need to test "SEP"arately: */ +- if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) ++ if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs)) + return; + +- this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); ++ this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs); + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); + } + #endif +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -207,7 +207,7 @@ static inline int arch_within_stack_fram + #else /* !__ASSEMBLY__ */ + + #ifdef CONFIG_X86_64 +-# define cpu_current_top_of_stack (cpu_tss + TSS_sp1) ++# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1) + #endif + + #endif +--- a/arch/x86/kernel/asm-offsets.c ++++ b/arch/x86/kernel/asm-offsets.c +@@ -94,10 +94,9 @@ void common(void) { + BLANK(); + DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + +- OFFSET(TSS_STRUCT_SYSENTER_stack, tss_struct, SYSENTER_stack); +- DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); +- + /* Layout info for cpu_entry_area */ + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); + OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); ++ OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page); ++ DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); + } +--- a/arch/x86/kernel/asm-offsets_32.c ++++ b/arch/x86/kernel/asm-offsets_32.c +@@ -47,8 +47,8 @@ void foo(void) + BLANK(); + + /* Offset from the sysenter stack to tss.sp0 */ +- DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - +- offsetofend(struct tss_struct, SYSENTER_stack)); ++ DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - ++ offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack)); + + #ifdef CONFIG_CC_STACKPROTECTOR + BLANK(); +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -487,6 +487,9 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); + #endif + ++static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page, ++ SYSENTER_stack_storage); ++ + static void __init + set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) + { +@@ -500,23 +503,29 @@ static void __init setup_cpu_entry_area( + #ifdef CONFIG_X86_64 + extern char _entry_trampoline[]; + +- /* On 64-bit systems, we use a read-only fixmap GDT. */ ++ /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ + pgprot_t gdt_prot = PAGE_KERNEL_RO; ++ pgprot_t tss_prot = PAGE_KERNEL_RO; + #else + /* + * On native 32-bit systems, the GDT cannot be read-only because + * our double fault handler uses a task gate, and entering through +- * a task gate needs to change an available TSS to busy. If the GDT +- * is read-only, that will triple fault. ++ * a task gate needs to change an available TSS to busy. If the ++ * GDT is read-only, that will triple fault. The TSS cannot be ++ * read-only because the CPU writes to it on task switches. + * +- * On Xen PV, the GDT must be read-only because the hypervisor requires +- * it. ++ * On Xen PV, the GDT must be read-only because the hypervisor ++ * requires it. + */ + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? + PAGE_KERNEL_RO : PAGE_KERNEL; ++ pgprot_t tss_prot = PAGE_KERNEL; + #endif + + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); ++ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page), ++ per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1, ++ PAGE_KERNEL); + + /* + * The Intel SDM says (Volume 3, 7.2.1): +@@ -539,9 +548,9 @@ static void __init setup_cpu_entry_area( + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), +- &per_cpu(cpu_tss, cpu), ++ &per_cpu(cpu_tss_rw, cpu), + sizeof(struct tss_struct) / PAGE_SIZE, +- PAGE_KERNEL); ++ tss_prot); + + #ifdef CONFIG_X86_32 + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); +@@ -1305,7 +1314,7 @@ void enable_sep_cpu(void) + return; + + cpu = get_cpu(); +- tss = &per_cpu(cpu_tss, cpu); ++ tss = &per_cpu(cpu_tss_rw, cpu); + + /* + * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- +@@ -1575,7 +1584,7 @@ void cpu_init(void) + if (cpu) + load_ucode_ap(); + +- t = &per_cpu(cpu_tss, cpu); ++ t = &per_cpu(cpu_tss_rw, cpu); + oist = &per_cpu(orig_ist, cpu); + + #ifdef CONFIG_NUMA +@@ -1667,7 +1676,7 @@ void cpu_init(void) + { + int cpu = smp_processor_id(); + struct task_struct *curr = current; +- struct tss_struct *t = &per_cpu(cpu_tss, cpu); ++ struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); + + wait_for_master_cpu(cpu); + +--- a/arch/x86/kernel/ioport.c ++++ b/arch/x86/kernel/ioport.c +@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long + * because the ->io_bitmap_max value must match the bitmap + * contents: + */ +- tss = &per_cpu(cpu_tss, get_cpu()); ++ tss = &per_cpu(cpu_tss_rw, get_cpu()); + + if (turn_on) + bitmap_clear(t->io_bitmap_ptr, from, num); +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -47,7 +47,7 @@ + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { ++__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = { + .x86_tss = { + /* + * .sp0 is only used when entering ring 0 from a lower +@@ -82,7 +82,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED( + .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, + #endif + }; +-EXPORT_PER_CPU_SYMBOL(cpu_tss); ++EXPORT_PER_CPU_SYMBOL(cpu_tss_rw); + + DEFINE_PER_CPU(bool, __tss_limit_invalid); + EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); +@@ -111,7 +111,7 @@ void exit_thread(struct task_struct *tsk + struct fpu *fpu = &t->fpu; + + if (bp) { +- struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); ++ struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); + + t->io_bitmap_ptr = NULL; + clear_thread_flag(TIF_IO_BITMAP); +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; + int cpu = smp_processor_id(); +- struct tss_struct *tss = &per_cpu(cpu_tss, cpu); ++ struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); + + /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ + +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -399,7 +399,7 @@ __switch_to(struct task_struct *prev_p, + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; + int cpu = smp_processor_id(); +- struct tss_struct *tss = &per_cpu(cpu_tss, cpu); ++ struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); + + WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && + this_cpu_read(irq_count) != -1); +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -364,7 +364,7 @@ dotraplinkage void do_double_fault(struc + regs->cs == __KERNEL_CS && + regs->ip == (unsigned long)native_irq_return_iret) + { +- struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1; ++ struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; + + /* + * regs->sp points to the failing IRET frame on the +@@ -649,7 +649,7 @@ struct bad_iret_stack *fixup_bad_iret(st + * exception came from the IRET target. + */ + struct bad_iret_stack *new_stack = +- (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1; ++ (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; + + /* Copy the IRET target to the new stack. */ + memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); +--- a/arch/x86/lib/delay.c ++++ b/arch/x86/lib/delay.c +@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long _ + delay = min_t(u64, MWAITX_MAX_LOOPS, loops); + + /* +- * Use cpu_tss as a cacheline-aligned, seldomly ++ * Use cpu_tss_rw as a cacheline-aligned, seldomly + * accessed per-cpu variable as the monitor target. + */ +- __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); ++ __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); + + /* + * AMD, like Intel, supports the EAX hint and EAX=0xf +--- a/arch/x86/xen/enlighten_pv.c ++++ b/arch/x86/xen/enlighten_pv.c +@@ -818,7 +818,7 @@ static void xen_load_sp0(unsigned long s + mcs = xen_mc_entry(0); + MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); + xen_mc_issue(PARAVIRT_LAZY_CPU); +- this_cpu_write(cpu_tss.x86_tss.sp0, sp0); ++ this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); + } + + void xen_set_iopl_mask(unsigned mask) diff --git a/queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch b/queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch new file mode 100644 index 00000000000..a55755c8dd3 --- /dev/null +++ b/queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch @@ -0,0 +1,51 @@ +From a512210643da8082cb44181dba8b18e752bd68f0 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:04 -0700 +Subject: x86/entry/64: Merge the fast and slow SYSRET paths + +From: Andy Lutomirski + +commit a512210643da8082cb44181dba8b18e752bd68f0 upstream. + +They did almost the same thing. Remove a bunch of pointless +instructions (mostly hidden in macros) and reduce cognitive load by +merging them. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/1204e20233fcab9130a1ba80b3b1879b5db3fc1f.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -221,10 +221,9 @@ entry_SYSCALL_64_fastpath: + TRACE_IRQS_ON /* user mode is traced as IRQs on */ + movq RIP(%rsp), %rcx + movq EFLAGS(%rsp), %r11 +- RESTORE_C_REGS_EXCEPT_RCX_R11 +- movq RSP(%rsp), %rsp ++ addq $6*8, %rsp /* skip extra regs -- they were preserved */ + UNWIND_HINT_EMPTY +- USERGS_SYSRET64 ++ jmp .Lpop_c_regs_except_rcx_r11_and_sysret + + 1: + /* +@@ -318,6 +317,7 @@ syscall_return_via_sysret: + /* rcx and r11 are already restored (see code above) */ + UNWIND_HINT_EMPTY + POP_EXTRA_REGS ++.Lpop_c_regs_except_rcx_r11_and_sysret: + popq %rsi /* skip r11 */ + popq %r10 + popq %r9 diff --git a/queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch b/queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch new file mode 100644 index 00000000000..8e5d437179a --- /dev/null +++ b/queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch @@ -0,0 +1,144 @@ +From 8a055d7f411d41755ce30db5bb65b154777c4b78 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:00 -0700 +Subject: x86/entry/64: Move SWAPGS into the common IRET-to-usermode path + +From: Andy Lutomirski + +commit 8a055d7f411d41755ce30db5bb65b154777c4b78 upstream. + +All of the code paths that ended up doing IRET to usermode did +SWAPGS immediately beforehand. Move the SWAPGS into the common +code. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/27fd6f45b7cd640de38fb9066fd0349bcd11f8e1.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 32 ++++++++++++++------------------ + arch/x86/entry/entry_64_compat.S | 3 +-- + 2 files changed, 15 insertions(+), 20 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -250,12 +250,14 @@ return_from_SYSCALL_64: + + /* + * Try to use SYSRET instead of IRET if we're returning to +- * a completely clean 64-bit userspace context. ++ * a completely clean 64-bit userspace context. If we're not, ++ * go to the slow exit path. + */ + movq RCX(%rsp), %rcx + movq RIP(%rsp), %r11 +- cmpq %rcx, %r11 /* RCX == RIP */ +- jne opportunistic_sysret_failed ++ ++ cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */ ++ jne swapgs_restore_regs_and_return_to_usermode + + /* + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP +@@ -273,14 +275,14 @@ return_from_SYSCALL_64: + + /* If this changed %rcx, it was not canonical */ + cmpq %rcx, %r11 +- jne opportunistic_sysret_failed ++ jne swapgs_restore_regs_and_return_to_usermode + + cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */ +- jne opportunistic_sysret_failed ++ jne swapgs_restore_regs_and_return_to_usermode + + movq R11(%rsp), %r11 + cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */ +- jne opportunistic_sysret_failed ++ jne swapgs_restore_regs_and_return_to_usermode + + /* + * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot +@@ -301,12 +303,12 @@ return_from_SYSCALL_64: + * would never get past 'stuck_here'. + */ + testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 +- jnz opportunistic_sysret_failed ++ jnz swapgs_restore_regs_and_return_to_usermode + + /* nothing to check for RSP */ + + cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */ +- jne opportunistic_sysret_failed ++ jne swapgs_restore_regs_and_return_to_usermode + + /* + * We win! This label is here just for ease of understanding +@@ -319,10 +321,6 @@ syscall_return_via_sysret: + movq RSP(%rsp), %rsp + UNWIND_HINT_EMPTY + USERGS_SYSRET64 +- +-opportunistic_sysret_failed: +- SWAPGS +- jmp restore_regs_and_return_to_usermode + END(entry_SYSCALL_64) + + ENTRY(stub_ptregs_64) +@@ -423,8 +421,7 @@ ENTRY(ret_from_fork) + movq %rsp, %rdi + call syscall_return_slowpath /* returns with IRQs disabled */ + TRACE_IRQS_ON /* user mode is traced as IRQS on */ +- SWAPGS +- jmp restore_regs_and_return_to_usermode ++ jmp swapgs_restore_regs_and_return_to_usermode + + 1: + /* kernel thread */ +@@ -612,9 +609,8 @@ GLOBAL(retint_user) + mov %rsp,%rdi + call prepare_exit_to_usermode + TRACE_IRQS_IRETQ +- SWAPGS + +-GLOBAL(restore_regs_and_return_to_usermode) ++GLOBAL(swapgs_restore_regs_and_return_to_usermode) + #ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates user mode. */ + testl $3, CS(%rsp) +@@ -622,6 +618,7 @@ GLOBAL(restore_regs_and_return_to_usermo + ud2 + 1: + #endif ++ SWAPGS + RESTORE_EXTRA_REGS + RESTORE_C_REGS + REMOVE_PT_GPREGS_FROM_STACK 8 +@@ -1343,8 +1340,7 @@ ENTRY(nmi) + * Return back to user mode. We must *not* do the normal exit + * work, because we don't want to enable interrupts. + */ +- SWAPGS +- jmp restore_regs_and_return_to_usermode ++ jmp swapgs_restore_regs_and_return_to_usermode + + .Lnmi_from_kernel: + /* +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -337,8 +337,7 @@ ENTRY(entry_INT80_compat) + + /* Go back to user mode. */ + TRACE_IRQS_ON +- SWAPGS +- jmp restore_regs_and_return_to_usermode ++ jmp swapgs_restore_regs_and_return_to_usermode + END(entry_INT80_compat) + + ENTRY(stub32_clone) diff --git a/queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch b/queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch new file mode 100644 index 00000000000..5ababca5d4f --- /dev/null +++ b/queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch @@ -0,0 +1,221 @@ +From 40e7f949e0d9a33968ebde5d67f7e3a47c97742a Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:26 +0100 +Subject: x86/entry/64: Move the IST stacks into struct cpu_entry_area + +From: Andy Lutomirski + +commit 40e7f949e0d9a33968ebde5d67f7e3a47c97742a upstream. + +The IST stacks are needed when an IST exception occurs and are accessed +before any kernel code at all runs. Move them into struct cpu_entry_area. + +The IST stacks are unlike the rest of cpu_entry_area: they're used even for +entries from kernel mode. This means that they should be set up before we +load the final IDT. Move cpu_entry_area setup to trap_init() for the boot +CPU and set it up for all possible CPUs at once in native_smp_prepare_cpus(). + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.480598743@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/fixmap.h | 12 ++++++ + arch/x86/kernel/cpu/common.c | 74 +++++++++++++++++++++++------------------- + arch/x86/kernel/traps.c | 3 + + 3 files changed, 57 insertions(+), 32 deletions(-) + +--- a/arch/x86/include/asm/fixmap.h ++++ b/arch/x86/include/asm/fixmap.h +@@ -63,10 +63,22 @@ struct cpu_entry_area { + struct tss_struct tss; + + char entry_trampoline[PAGE_SIZE]; ++ ++#ifdef CONFIG_X86_64 ++ /* ++ * Exception stacks used for IST entries. ++ * ++ * In the future, this should have a separate slot for each stack ++ * with guard pages between them. ++ */ ++ char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; ++#endif + }; + + #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) + ++extern void setup_cpu_entry_areas(void); ++ + /* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -466,24 +466,36 @@ void load_percpu_segment(int cpu) + load_stack_canary_segment(); + } + +-static void set_percpu_fixmap_pages(int fixmap_index, void *ptr, +- int pages, pgprot_t prot) +-{ +- int i; +- +- for (i = 0; i < pages; i++) { +- __set_fixmap(fixmap_index - i, +- per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot); +- } +-} +- + #ifdef CONFIG_X86_32 + /* The 32-bit entry code needs to find cpu_entry_area. */ + DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); + #endif + ++#ifdef CONFIG_X86_64 ++/* ++ * Special IST stacks which the CPU switches to when it calls ++ * an IST-marked descriptor entry. Up to 7 stacks (hardware ++ * limit), all of them are 4K, except the debug stack which ++ * is 8K. ++ */ ++static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { ++ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, ++ [DEBUG_STACK - 1] = DEBUG_STKSZ ++}; ++ ++static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks ++ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); ++#endif ++ ++static void __init ++set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) ++{ ++ for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) ++ __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); ++} ++ + /* Setup the fixmap mappings only once per-processor */ +-static inline void setup_cpu_entry_area(int cpu) ++static void __init setup_cpu_entry_area(int cpu) + { + #ifdef CONFIG_X86_64 + extern char _entry_trampoline[]; +@@ -532,15 +544,31 @@ static inline void setup_cpu_entry_area( + PAGE_KERNEL); + + #ifdef CONFIG_X86_32 +- this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu)); ++ per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); + #endif + + #ifdef CONFIG_X86_64 ++ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); ++ BUILD_BUG_ON(sizeof(exception_stacks) != ++ sizeof(((struct cpu_entry_area *)0)->exception_stacks)); ++ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), ++ &per_cpu(exception_stacks, cpu), ++ sizeof(exception_stacks) / PAGE_SIZE, ++ PAGE_KERNEL); ++ + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); + #endif + } + ++void __init setup_cpu_entry_areas(void) ++{ ++ unsigned int cpu; ++ ++ for_each_possible_cpu(cpu) ++ setup_cpu_entry_area(cpu); ++} ++ + /* Load the original GDT from the per-cpu structure */ + void load_direct_gdt(int cpu) + { +@@ -1385,20 +1413,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) + DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; + EXPORT_PER_CPU_SYMBOL(__preempt_count); + +-/* +- * Special IST stacks which the CPU switches to when it calls +- * an IST-marked descriptor entry. Up to 7 stacks (hardware +- * limit), all of them are 4K, except the debug stack which +- * is 8K. +- */ +-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { +- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, +- [DEBUG_STACK - 1] = DEBUG_STKSZ +-}; +- +-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks +- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); +- + /* May not be marked __init: used by software suspend */ + void syscall_init(void) + { +@@ -1607,7 +1621,7 @@ void cpu_init(void) + * set up and load the per-CPU TSS + */ + if (!oist->ist[0]) { +- char *estacks = per_cpu(exception_stacks, cpu); ++ char *estacks = get_cpu_entry_area(cpu)->exception_stacks; + + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + estacks += exception_stack_sizes[v]; +@@ -1633,8 +1647,6 @@ void cpu_init(void) + initialize_tlbstate_and_flush(); + enter_lazy_tlb(&init_mm, me); + +- setup_cpu_entry_area(cpu); +- + /* + * Initialize the TSS. sp0 points to the entry trampoline stack + * regardless of what task is running. +@@ -1694,8 +1706,6 @@ void cpu_init(void) + initialize_tlbstate_and_flush(); + enter_lazy_tlb(&init_mm, curr); + +- setup_cpu_entry_area(cpu); +- + /* + * Initialize the TSS. Don't bother initializing sp0, as the initial + * task never enters user mode. +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -947,6 +947,9 @@ dotraplinkage void do_iret_error(struct + + void __init trap_init(void) + { ++ /* Init cpu_entry_area before IST entries are set up */ ++ setup_cpu_entry_areas(); ++ + idt_setup_traps(); + + /* diff --git a/queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch b/queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch new file mode 100644 index 00000000000..77d3f2dfca5 --- /dev/null +++ b/queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch @@ -0,0 +1,113 @@ +From e17f8234538d1ff708673f287a42457c4dee720d Mon Sep 17 00:00:00 2001 +From: Boris Ostrovsky +Date: Mon, 4 Dec 2017 15:07:07 +0100 +Subject: x86/entry/64/paravirt: Use paravirt-safe macro to access eflags + +From: Boris Ostrovsky + +commit e17f8234538d1ff708673f287a42457c4dee720d upstream. + +Commit 1d3e53e8624a ("x86/entry/64: Refactor IRQ stacks and make them +NMI-safe") added DEBUG_ENTRY_ASSERT_IRQS_OFF macro that acceses eflags +using 'pushfq' instruction when testing for IF bit. On PV Xen guests +looking at IF flag directly will always see it set, resulting in 'ud2'. + +Introduce SAVE_FLAGS() macro that will use appropriate save_fl pv op when +running paravirt. + +Signed-off-by: Boris Ostrovsky +Signed-off-by: Thomas Gleixner +Reviewed-by: Juergen Gross +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Cc: xen-devel@lists.xenproject.org +Link: https://lkml.kernel.org/r/20171204150604.899457242@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 7 ++++--- + arch/x86/include/asm/irqflags.h | 3 +++ + arch/x86/include/asm/paravirt.h | 9 +++++++++ + arch/x86/kernel/asm-offsets_64.c | 3 +++ + 4 files changed, 19 insertions(+), 3 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -462,12 +462,13 @@ END(irq_entries_start) + + .macro DEBUG_ENTRY_ASSERT_IRQS_OFF + #ifdef CONFIG_DEBUG_ENTRY +- pushfq +- testl $X86_EFLAGS_IF, (%rsp) ++ pushq %rax ++ SAVE_FLAGS(CLBR_RAX) ++ testl $X86_EFLAGS_IF, %eax + jz .Lokay_\@ + ud2 + .Lokay_\@: +- addq $8, %rsp ++ popq %rax + #endif + .endm + +--- a/arch/x86/include/asm/irqflags.h ++++ b/arch/x86/include/asm/irqflags.h +@@ -142,6 +142,9 @@ static inline notrace unsigned long arch + swapgs; \ + sysretl + ++#ifdef CONFIG_DEBUG_ENTRY ++#define SAVE_FLAGS(x) pushfq; popq %rax ++#endif + #else + #define INTERRUPT_RETURN iret + #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit +--- a/arch/x86/include/asm/paravirt.h ++++ b/arch/x86/include/asm/paravirt.h +@@ -927,6 +927,15 @@ extern void default_banner(void); + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ + CLBR_NONE, \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) ++ ++#ifdef CONFIG_DEBUG_ENTRY ++#define SAVE_FLAGS(clobbers) \ ++ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ ++ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ ++ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ ++ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) ++#endif ++ + #endif /* CONFIG_X86_32 */ + + #endif /* __ASSEMBLY__ */ +--- a/arch/x86/kernel/asm-offsets_64.c ++++ b/arch/x86/kernel/asm-offsets_64.c +@@ -23,6 +23,9 @@ int main(void) + #ifdef CONFIG_PARAVIRT + OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); + OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); ++#ifdef CONFIG_DEBUG_ENTRY ++ OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl); ++#endif + BLANK(); + #endif + diff --git a/queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch b/queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch new file mode 100644 index 00000000000..ee3aa196c5b --- /dev/null +++ b/queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch @@ -0,0 +1,215 @@ +From da51da189a24bb9b7e2d5a123be096e51a4695a5 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:10 -0700 +Subject: x86/entry/64: Pass SP0 directly to load_sp0() + +From: Andy Lutomirski + +commit da51da189a24bb9b7e2d5a123be096e51a4695a5 upstream. + +load_sp0() had an odd signature: + + void load_sp0(struct tss_struct *tss, struct thread_struct *thread); + +Simplify it to: + + void load_sp0(unsigned long sp0); + +Also simplify a few get_cpu()/put_cpu() sequences to +preempt_disable()/preempt_enable(). + +Signed-off-by: Andy Lutomirski +Reviewed-by: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/2655d8b42ed940aa384fe18ee1129bbbcf730a08.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/paravirt.h | 5 ++--- + arch/x86/include/asm/paravirt_types.h | 2 +- + arch/x86/include/asm/processor.h | 9 ++++----- + arch/x86/kernel/cpu/common.c | 4 ++-- + arch/x86/kernel/process_32.c | 2 +- + arch/x86/kernel/process_64.c | 2 +- + arch/x86/kernel/vm86_32.c | 14 ++++++-------- + arch/x86/xen/enlighten_pv.c | 7 +++---- + 8 files changed, 20 insertions(+), 25 deletions(-) + +--- a/arch/x86/include/asm/paravirt.h ++++ b/arch/x86/include/asm/paravirt.h +@@ -16,10 +16,9 @@ + #include + #include + +-static inline void load_sp0(struct tss_struct *tss, +- struct thread_struct *thread) ++static inline void load_sp0(unsigned long sp0) + { +- PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); ++ PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0); + } + + /* The paravirtualized CPUID instruction. */ +--- a/arch/x86/include/asm/paravirt_types.h ++++ b/arch/x86/include/asm/paravirt_types.h +@@ -134,7 +134,7 @@ struct pv_cpu_ops { + void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); + void (*free_ldt)(struct desc_struct *ldt, unsigned entries); + +- void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); ++ void (*load_sp0)(unsigned long sp0); + + void (*set_iopl_mask)(unsigned mask); + +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -518,9 +518,9 @@ static inline void native_set_iopl_mask( + } + + static inline void +-native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) ++native_load_sp0(unsigned long sp0) + { +- tss->x86_tss.sp0 = thread->sp0; ++ this_cpu_write(cpu_tss.x86_tss.sp0, sp0); + } + + static inline void native_swapgs(void) +@@ -545,10 +545,9 @@ static inline unsigned long current_top_ + #else + #define __cpuid native_cpuid + +-static inline void load_sp0(struct tss_struct *tss, +- struct thread_struct *thread) ++static inline void load_sp0(unsigned long sp0) + { +- native_load_sp0(tss, thread); ++ native_load_sp0(sp0); + } + + #define set_iopl_mask native_set_iopl_mask +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1570,7 +1570,7 @@ void cpu_init(void) + initialize_tlbstate_and_flush(); + enter_lazy_tlb(&init_mm, me); + +- load_sp0(t, ¤t->thread); ++ load_sp0(current->thread.sp0); + set_tss_desc(cpu, t); + load_TR_desc(); + load_mm_ldt(&init_mm); +@@ -1625,7 +1625,7 @@ void cpu_init(void) + initialize_tlbstate_and_flush(); + enter_lazy_tlb(&init_mm, curr); + +- load_sp0(t, thread); ++ load_sp0(thread->sp0); + set_tss_desc(cpu, t); + load_TR_desc(); + load_mm_ldt(&init_mm); +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, + * current_thread_info(). Refresh the SYSENTER configuration in + * case prev or next is vm86. + */ +- load_sp0(tss, next); ++ load_sp0(next->sp0); + refresh_sysenter_cs(next); + this_cpu_write(cpu_current_top_of_stack, + (unsigned long)task_stack_page(next_p) + +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p, + this_cpu_write(current_task, next_p); + + /* Reload sp0. */ +- load_sp0(tss, next); ++ load_sp0(next->sp0); + + /* + * Now maybe reload the debug registers and handle I/O bitmaps +--- a/arch/x86/kernel/vm86_32.c ++++ b/arch/x86/kernel/vm86_32.c +@@ -95,7 +95,6 @@ + + void save_v86_state(struct kernel_vm86_regs *regs, int retval) + { +- struct tss_struct *tss; + struct task_struct *tsk = current; + struct vm86plus_struct __user *user; + struct vm86 *vm86 = current->thread.vm86; +@@ -147,13 +146,13 @@ void save_v86_state(struct kernel_vm86_r + do_exit(SIGSEGV); + } + +- tss = &per_cpu(cpu_tss, get_cpu()); ++ preempt_disable(); + tsk->thread.sp0 = vm86->saved_sp0; + tsk->thread.sysenter_cs = __KERNEL_CS; +- load_sp0(tss, &tsk->thread); ++ load_sp0(tsk->thread.sp0); + refresh_sysenter_cs(&tsk->thread); + vm86->saved_sp0 = 0; +- put_cpu(); ++ preempt_enable(); + + memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); + +@@ -239,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd + + static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) + { +- struct tss_struct *tss; + struct task_struct *tsk = current; + struct vm86 *vm86 = tsk->thread.vm86; + struct kernel_vm86_regs vm86regs; +@@ -367,8 +365,8 @@ static long do_sys_vm86(struct vm86plus_ + vm86->saved_sp0 = tsk->thread.sp0; + lazy_save_gs(vm86->regs32.gs); + +- tss = &per_cpu(cpu_tss, get_cpu()); + /* make room for real-mode segments */ ++ preempt_disable(); + tsk->thread.sp0 += 16; + + if (static_cpu_has(X86_FEATURE_SEP)) { +@@ -376,8 +374,8 @@ static long do_sys_vm86(struct vm86plus_ + refresh_sysenter_cs(&tsk->thread); + } + +- load_sp0(tss, &tsk->thread); +- put_cpu(); ++ load_sp0(tsk->thread.sp0); ++ preempt_enable(); + + if (vm86->flags & VM86_SCREEN_BITMAP) + mark_screen_rdonly(tsk->mm); +--- a/arch/x86/xen/enlighten_pv.c ++++ b/arch/x86/xen/enlighten_pv.c +@@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_b + } + } + +-static void xen_load_sp0(struct tss_struct *tss, +- struct thread_struct *thread) ++static void xen_load_sp0(unsigned long sp0) + { + struct multicall_space mcs; + + mcs = xen_mc_entry(0); +- MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); ++ MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); + xen_mc_issue(PARAVIRT_LAZY_CPU); +- tss->x86_tss.sp0 = thread->sp0; ++ this_cpu_write(cpu_tss.x86_tss.sp0, sp0); + } + + void xen_set_iopl_mask(unsigned mask) diff --git a/queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch b/queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch new file mode 100644 index 00000000000..9f2f65994ba --- /dev/null +++ b/queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch @@ -0,0 +1,87 @@ +From 46f5a10a721ce8dce8cc8fe55279b49e1c6b3288 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:14 -0700 +Subject: x86/entry/64: Remove all remaining direct thread_struct::sp0 reads + +From: Andy Lutomirski + +commit 46f5a10a721ce8dce8cc8fe55279b49e1c6b3288 upstream. + +The only remaining readers in context switch code or vm86(), and +they all just want to update TSS.sp0 to match the current task. +Replace them all with a new helper update_sp0(). + +Signed-off-by: Andy Lutomirski +Reviewed-by: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/2d231687f4ff288c9d9e98d7861b7df374246ac3.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/switch_to.h | 6 ++++++ + arch/x86/kernel/process_32.c | 2 +- + arch/x86/kernel/process_64.c | 2 +- + arch/x86/kernel/vm86_32.c | 4 ++-- + 4 files changed, 10 insertions(+), 4 deletions(-) + +--- a/arch/x86/include/asm/switch_to.h ++++ b/arch/x86/include/asm/switch_to.h +@@ -85,4 +85,10 @@ static inline void refresh_sysenter_cs(s + } + #endif + ++/* This is used when switching tasks or entering/exiting vm86 mode. */ ++static inline void update_sp0(struct task_struct *task) ++{ ++ load_sp0(task->thread.sp0); ++} ++ + #endif /* _ASM_X86_SWITCH_TO_H */ +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, + * current_thread_info(). Refresh the SYSENTER configuration in + * case prev or next is vm86. + */ +- load_sp0(next->sp0); ++ update_sp0(next_p); + refresh_sysenter_cs(next); + this_cpu_write(cpu_current_top_of_stack, + (unsigned long)task_stack_page(next_p) + +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p, + this_cpu_write(current_task, next_p); + + /* Reload sp0. */ +- load_sp0(next->sp0); ++ update_sp0(next_p); + + /* + * Now maybe reload the debug registers and handle I/O bitmaps +--- a/arch/x86/kernel/vm86_32.c ++++ b/arch/x86/kernel/vm86_32.c +@@ -149,7 +149,7 @@ void save_v86_state(struct kernel_vm86_r + preempt_disable(); + tsk->thread.sp0 = vm86->saved_sp0; + tsk->thread.sysenter_cs = __KERNEL_CS; +- load_sp0(tsk->thread.sp0); ++ update_sp0(tsk); + refresh_sysenter_cs(&tsk->thread); + vm86->saved_sp0 = 0; + preempt_enable(); +@@ -374,7 +374,7 @@ static long do_sys_vm86(struct vm86plus_ + refresh_sysenter_cs(&tsk->thread); + } + +- load_sp0(tsk->thread.sp0); ++ update_sp0(tsk); + preempt_enable(); + + if (vm86->flags & VM86_SCREEN_BITMAP) diff --git a/queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch b/queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch new file mode 100644 index 00000000000..9ad4927d8bd --- /dev/null +++ b/queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch @@ -0,0 +1,95 @@ +From c39858de696f0cc160a544455e8403d663d577e9 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:06 -0700 +Subject: x86/entry/64: Remove the RESTORE_..._REGS infrastructure + +From: Andy Lutomirski + +commit c39858de696f0cc160a544455e8403d663d577e9 upstream. + +All users of RESTORE_EXTRA_REGS, RESTORE_C_REGS and such, and +REMOVE_PT_GPREGS_FROM_STACK are gone. Delete the macros. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/c32672f6e47c561893316d48e06c7656b1039a36.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/calling.h | 52 ----------------------------------------------- + 1 file changed, 52 deletions(-) + +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -142,16 +142,6 @@ For 32-bit we have the following convent + UNWIND_HINT_REGS offset=\offset + .endm + +- .macro RESTORE_EXTRA_REGS offset=0 +- movq 0*8+\offset(%rsp), %r15 +- movq 1*8+\offset(%rsp), %r14 +- movq 2*8+\offset(%rsp), %r13 +- movq 3*8+\offset(%rsp), %r12 +- movq 4*8+\offset(%rsp), %rbp +- movq 5*8+\offset(%rsp), %rbx +- UNWIND_HINT_REGS offset=\offset extra=0 +- .endm +- + .macro POP_EXTRA_REGS + popq %r15 + popq %r14 +@@ -173,48 +163,6 @@ For 32-bit we have the following convent + popq %rdi + .endm + +- .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 +- .if \rstor_r11 +- movq 6*8(%rsp), %r11 +- .endif +- .if \rstor_r8910 +- movq 7*8(%rsp), %r10 +- movq 8*8(%rsp), %r9 +- movq 9*8(%rsp), %r8 +- .endif +- .if \rstor_rax +- movq 10*8(%rsp), %rax +- .endif +- .if \rstor_rcx +- movq 11*8(%rsp), %rcx +- .endif +- .if \rstor_rdx +- movq 12*8(%rsp), %rdx +- .endif +- movq 13*8(%rsp), %rsi +- movq 14*8(%rsp), %rdi +- UNWIND_HINT_IRET_REGS offset=16*8 +- .endm +- .macro RESTORE_C_REGS +- RESTORE_C_REGS_HELPER 1,1,1,1,1 +- .endm +- .macro RESTORE_C_REGS_EXCEPT_RAX +- RESTORE_C_REGS_HELPER 0,1,1,1,1 +- .endm +- .macro RESTORE_C_REGS_EXCEPT_RCX +- RESTORE_C_REGS_HELPER 1,0,1,1,1 +- .endm +- .macro RESTORE_C_REGS_EXCEPT_R11 +- RESTORE_C_REGS_HELPER 1,1,0,1,1 +- .endm +- .macro RESTORE_C_REGS_EXCEPT_RCX_R11 +- RESTORE_C_REGS_HELPER 1,0,0,1,1 +- .endm +- +- .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 +- subq $-(15*8+\addskip), %rsp +- .endm +- + .macro icebp + .byte 0xf1 + .endm diff --git a/queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch b/queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch new file mode 100644 index 00000000000..1edcadbfa0e --- /dev/null +++ b/queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch @@ -0,0 +1,65 @@ +From 9da78ba6b47b46428cfdfc0851511ab29c869798 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:58:58 -0700 +Subject: x86/entry/64: Remove the restore_c_regs_and_iret label + +From: Andy Lutomirski + +commit 9da78ba6b47b46428cfdfc0851511ab29c869798 upstream. + +The only user was the 64-bit opportunistic SYSRET failure path, and +that path didn't really need it. This change makes the +opportunistic SYSRET code a bit more straightforward and gets rid of +the label. + +Signed-off-by: Andy Lutomirski +Reviewed-by: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/be3006a7ad3326e3458cf1cc55d416252cbe1986.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -246,7 +246,6 @@ entry_SYSCALL64_slow_path: + call do_syscall_64 /* returns with IRQs disabled */ + + return_from_SYSCALL_64: +- RESTORE_EXTRA_REGS + TRACE_IRQS_IRETQ /* we're about to change IF */ + + /* +@@ -315,6 +314,7 @@ return_from_SYSCALL_64: + */ + syscall_return_via_sysret: + /* rcx and r11 are already restored (see code above) */ ++ RESTORE_EXTRA_REGS + RESTORE_C_REGS_EXCEPT_RCX_R11 + movq RSP(%rsp), %rsp + UNWIND_HINT_EMPTY +@@ -322,7 +322,7 @@ syscall_return_via_sysret: + + opportunistic_sysret_failed: + SWAPGS +- jmp restore_c_regs_and_iret ++ jmp restore_regs_and_iret + END(entry_SYSCALL_64) + + ENTRY(stub_ptregs_64) +@@ -639,7 +639,6 @@ retint_kernel: + */ + GLOBAL(restore_regs_and_iret) + RESTORE_EXTRA_REGS +-restore_c_regs_and_iret: + RESTORE_C_REGS + REMOVE_PT_GPREGS_FROM_STACK 8 + INTERRUPT_RETURN diff --git a/queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch b/queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch new file mode 100644 index 00000000000..c77e7930be0 --- /dev/null +++ b/queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch @@ -0,0 +1,96 @@ +From 7fbbd5cbebf118a9e09f5453f686656a167c3d1c Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:27 +0100 +Subject: x86/entry/64: Remove the SYSENTER stack canary + +From: Andy Lutomirski + +commit 7fbbd5cbebf118a9e09f5453f686656a167c3d1c upstream. + +Now that the SYSENTER stack has a guard page, there's no need for a canary +to detect overflow after the fact. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.572577316@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/processor.h | 1 - + arch/x86/kernel/dumpstack.c | 3 +-- + arch/x86/kernel/process.c | 1 - + arch/x86/kernel/traps.c | 7 ------- + 4 files changed, 1 insertion(+), 11 deletions(-) + +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -341,7 +341,6 @@ struct tss_struct { + * Space for the temporary SYSENTER stack, used for SYSENTER + * and the entry trampoline as well. + */ +- unsigned long SYSENTER_stack_canary; + unsigned long SYSENTER_stack[64]; + + /* +--- a/arch/x86/kernel/dumpstack.c ++++ b/arch/x86/kernel/dumpstack.c +@@ -48,8 +48,7 @@ bool in_sysenter_stack(unsigned long *st + int cpu = smp_processor_id(); + struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss; + +- /* Treat the canary as part of the stack for unwinding purposes. */ +- void *begin = &tss->SYSENTER_stack_canary; ++ void *begin = &tss->SYSENTER_stack; + void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack); + + if ((void *)stack < begin || (void *)stack >= end) +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -81,7 +81,6 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED( + */ + .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, + #endif +- .SYSENTER_stack_canary = STACK_END_MAGIC, + }; + EXPORT_PER_CPU_SYMBOL(cpu_tss); + +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -814,13 +814,6 @@ dotraplinkage void do_debug(struct pt_re + debug_stack_usage_dec(); + + exit: +- /* +- * This is the most likely code path that involves non-trivial use +- * of the SYSENTER stack. Check that we haven't overrun it. +- */ +- WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, +- "Overran or corrupted SYSENTER stack\n"); +- + ist_exit(regs); + } + NOKPROBE_SYMBOL(do_debug); diff --git a/queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch b/queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch new file mode 100644 index 00000000000..7407229e0f8 --- /dev/null +++ b/queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch @@ -0,0 +1,139 @@ +From d375cf1530595e33961a8844192cddab913650e3 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:16 -0700 +Subject: x86/entry/64: Remove thread_struct::sp0 + +From: Andy Lutomirski + +commit d375cf1530595e33961a8844192cddab913650e3 upstream. + +On x86_64, we can easily calculate sp0 when needed instead of +storing it in thread_struct. + +On x86_32, a similar cleanup would be possible, but it would require +cleaning up the vm86 code first, and that can wait for a later +cleanup series. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/719cd9c66c548c4350d98a90f050aee8b17f8919.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/compat.h | 1 + + arch/x86/include/asm/processor.h | 28 +++++++++------------------- + arch/x86/include/asm/switch_to.h | 6 ++++++ + arch/x86/kernel/process_64.c | 1 - + 4 files changed, 16 insertions(+), 20 deletions(-) + +--- a/arch/x86/include/asm/compat.h ++++ b/arch/x86/include/asm/compat.h +@@ -7,6 +7,7 @@ + */ + #include + #include ++#include + #include + #include + #include +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -431,7 +431,9 @@ typedef struct { + struct thread_struct { + /* Cached TLS descriptors: */ + struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; ++#ifdef CONFIG_X86_32 + unsigned long sp0; ++#endif + unsigned long sp; + #ifdef CONFIG_X86_32 + unsigned long sysenter_cs; +@@ -798,6 +800,13 @@ static inline void spin_lock_prefetch(co + + #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) + ++#define task_pt_regs(task) \ ++({ \ ++ unsigned long __ptr = (unsigned long)task_stack_page(task); \ ++ __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ ++ ((struct pt_regs *)__ptr) - 1; \ ++}) ++ + #ifdef CONFIG_X86_32 + /* + * User space process size: 3GB (default). +@@ -817,23 +826,6 @@ static inline void spin_lock_prefetch(co + .addr_limit = KERNEL_DS, \ + } + +-/* +- * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. +- * This is necessary to guarantee that the entire "struct pt_regs" +- * is accessible even if the CPU haven't stored the SS/ESP registers +- * on the stack (interrupt gate does not save these registers +- * when switching to the same priv ring). +- * Therefore beware: accessing the ss/esp fields of the +- * "struct pt_regs" is possible, but they may contain the +- * completely wrong values. +- */ +-#define task_pt_regs(task) \ +-({ \ +- unsigned long __ptr = (unsigned long)task_stack_page(task); \ +- __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ +- ((struct pt_regs *)__ptr) - 1; \ +-}) +- + #define KSTK_ESP(task) (task_pt_regs(task)->sp) + + #else +@@ -867,11 +859,9 @@ static inline void spin_lock_prefetch(co + #define STACK_TOP_MAX TASK_SIZE_MAX + + #define INIT_THREAD { \ +- .sp0 = TOP_OF_INIT_STACK, \ + .addr_limit = KERNEL_DS, \ + } + +-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) + extern unsigned long KSTK_ESP(struct task_struct *task); + + #endif /* CONFIG_X86_64 */ +--- a/arch/x86/include/asm/switch_to.h ++++ b/arch/x86/include/asm/switch_to.h +@@ -2,6 +2,8 @@ + #ifndef _ASM_X86_SWITCH_TO_H + #define _ASM_X86_SWITCH_TO_H + ++#include ++ + struct task_struct; /* one of the stranger aspects of C forward declarations */ + + struct task_struct *__switch_to_asm(struct task_struct *prev, +@@ -88,7 +90,11 @@ static inline void refresh_sysenter_cs(s + /* This is used when switching tasks or entering/exiting vm86 mode. */ + static inline void update_sp0(struct task_struct *task) + { ++#ifdef CONFIG_X86_32 + load_sp0(task->thread.sp0); ++#else ++ load_sp0(task_top_of_stack(task)); ++#endif + } + + #endif /* _ASM_X86_SWITCH_TO_H */ +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_ + struct inactive_task_frame *frame; + struct task_struct *me = current; + +- p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; + childregs = task_pt_regs(p); + fork_frame = container_of(childregs, struct fork_frame, regs); + frame = &fork_frame->frame; diff --git a/queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch b/queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch new file mode 100644 index 00000000000..1677be92603 --- /dev/null +++ b/queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch @@ -0,0 +1,124 @@ +From 3e3b9293d392c577b62e24e4bc9982320438e749 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:24 +0100 +Subject: x86/entry/64: Return to userspace from the trampoline stack + +From: Andy Lutomirski + +commit 3e3b9293d392c577b62e24e4bc9982320438e749 upstream. + +By itself, this is useless. It gives us the ability to run some final code +before exit that cannnot run on the kernel stack. This could include a CR3 +switch a la PAGE_TABLE_ISOLATION or some kernel stack erasing, for +example. (Or even weird things like *changing* which kernel stack gets +used as an ASLR-strengthening mechanism.) + +The SYSRET32 path is not covered yet. It could be in the future or +we could just ignore it and force the slow path if needed. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.306546484@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 55 ++++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 51 insertions(+), 4 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -326,8 +326,24 @@ syscall_return_via_sysret: + popq %rsi /* skip rcx */ + popq %rdx + popq %rsi ++ ++ /* ++ * Now all regs are restored except RSP and RDI. ++ * Save old stack pointer and switch to trampoline stack. ++ */ ++ movq %rsp, %rdi ++ movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp ++ ++ pushq RSP-RDI(%rdi) /* RSP */ ++ pushq (%rdi) /* RDI */ ++ ++ /* ++ * We are on the trampoline stack. All regs except RDI are live. ++ * We can do future final exit work right here. ++ */ ++ + popq %rdi +- movq RSP-ORIG_RAX(%rsp), %rsp ++ popq %rsp + USERGS_SYSRET64 + END(entry_SYSCALL_64) + +@@ -630,10 +646,41 @@ GLOBAL(swapgs_restore_regs_and_return_to + ud2 + 1: + #endif +- SWAPGS + POP_EXTRA_REGS +- POP_C_REGS +- addq $8, %rsp /* skip regs->orig_ax */ ++ popq %r11 ++ popq %r10 ++ popq %r9 ++ popq %r8 ++ popq %rax ++ popq %rcx ++ popq %rdx ++ popq %rsi ++ ++ /* ++ * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. ++ * Save old stack pointer and switch to trampoline stack. ++ */ ++ movq %rsp, %rdi ++ movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp ++ ++ /* Copy the IRET frame to the trampoline stack. */ ++ pushq 6*8(%rdi) /* SS */ ++ pushq 5*8(%rdi) /* RSP */ ++ pushq 4*8(%rdi) /* EFLAGS */ ++ pushq 3*8(%rdi) /* CS */ ++ pushq 2*8(%rdi) /* RIP */ ++ ++ /* Push user RDI on the trampoline stack. */ ++ pushq (%rdi) ++ ++ /* ++ * We are on the trampoline stack. All regs except RDI are live. ++ * We can do future final exit work right here. ++ */ ++ ++ /* Restore RDI. */ ++ popq %rdi ++ SWAPGS + INTERRUPT_RETURN + + diff --git a/queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch b/queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch new file mode 100644 index 00000000000..81f291fa415 --- /dev/null +++ b/queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch @@ -0,0 +1,144 @@ +From 9aaefe7b59ae00605256a7d6bd1c1456432495fc Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:21 +0100 +Subject: x86/entry/64: Separate cpu_current_top_of_stack from TSS.sp0 + +From: Andy Lutomirski + +commit 9aaefe7b59ae00605256a7d6bd1c1456432495fc upstream. + +On 64-bit kernels, we used to assume that TSS.sp0 was the current +top of stack. With the addition of an entry trampoline, this will +no longer be the case. Store the current top of stack in TSS.sp1, +which is otherwise unused but shares the same cacheline. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.050864668@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/processor.h | 18 +++++++++++++----- + arch/x86/include/asm/thread_info.h | 2 +- + arch/x86/kernel/asm-offsets_64.c | 1 + + arch/x86/kernel/process.c | 10 ++++++++++ + arch/x86/kernel/process_64.c | 1 + + 5 files changed, 26 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -309,7 +309,13 @@ struct x86_hw_tss { + struct x86_hw_tss { + u32 reserved1; + u64 sp0; ++ ++ /* ++ * We store cpu_current_top_of_stack in sp1 so it's always accessible. ++ * Linux does not use ring 1, so sp1 is not otherwise needed. ++ */ + u64 sp1; ++ + u64 sp2; + u64 reserved2; + u64 ist[7]; +@@ -368,6 +374,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_ + + #ifdef CONFIG_X86_32 + DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); ++#else ++#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1 + #endif + + /* +@@ -539,12 +547,12 @@ static inline void native_swapgs(void) + + static inline unsigned long current_top_of_stack(void) + { +-#ifdef CONFIG_X86_64 +- return this_cpu_read_stable(cpu_tss.x86_tss.sp0); +-#else +- /* sp0 on x86_32 is special in and around vm86 mode. */ ++ /* ++ * We can't read directly from tss.sp0: sp0 on x86_32 is special in ++ * and around vm86 mode and sp0 on x86_64 is special because of the ++ * entry trampoline. ++ */ + return this_cpu_read_stable(cpu_current_top_of_stack); +-#endif + } + + static inline bool on_thread_stack(void) +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -207,7 +207,7 @@ static inline int arch_within_stack_fram + #else /* !__ASSEMBLY__ */ + + #ifdef CONFIG_X86_64 +-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) ++# define cpu_current_top_of_stack (cpu_tss + TSS_sp1) + #endif + + #endif +--- a/arch/x86/kernel/asm-offsets_64.c ++++ b/arch/x86/kernel/asm-offsets_64.c +@@ -66,6 +66,7 @@ int main(void) + + OFFSET(TSS_ist, tss_struct, x86_tss.ist); + OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); ++ OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); + BLANK(); + + #ifdef CONFIG_CC_STACKPROTECTOR +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED( + * Poison it. + */ + .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, ++ ++#ifdef CONFIG_X86_64 ++ /* ++ * .sp1 is cpu_current_top_of_stack. The init task never ++ * runs user code, but cpu_current_top_of_stack should still ++ * be well defined before the first context switch. ++ */ ++ .sp1 = TOP_OF_INIT_STACK, ++#endif ++ + #ifdef CONFIG_X86_32 + .ss0 = __KERNEL_DS, + .ss1 = __KERNEL_CS, +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -461,6 +461,7 @@ __switch_to(struct task_struct *prev_p, + * Switch the PDA and FPU contexts. + */ + this_cpu_write(current_task, next_p); ++ this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); + + /* Reload sp0. */ + update_sp0(next_p); diff --git a/queue-4.14/x86-entry-64-shorten-test-instructions.patch b/queue-4.14/x86-entry-64-shorten-test-instructions.patch new file mode 100644 index 00000000000..60e9c23a802 --- /dev/null +++ b/queue-4.14/x86-entry-64-shorten-test-instructions.patch @@ -0,0 +1,48 @@ +From 1e4c4f610f774df6088d7c065b2dd4d22adba698 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov +Date: Thu, 2 Nov 2017 13:09:26 +0100 +Subject: x86/entry/64: Shorten TEST instructions + +From: Borislav Petkov + +commit 1e4c4f610f774df6088d7c065b2dd4d22adba698 upstream. + +Convert TESTL to TESTB and save 3 bytes per callsite. + +No functionality change. + +Signed-off-by: Borislav Petkov +Cc: Andy Lutomirski +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20171102120926.4srwerqrr7g72e2k@pd.tnic +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -621,7 +621,7 @@ GLOBAL(retint_user) + GLOBAL(swapgs_restore_regs_and_return_to_usermode) + #ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates user mode. */ +- testl $3, CS(%rsp) ++ testb $3, CS(%rsp) + jnz 1f + ud2 + 1: +@@ -654,7 +654,7 @@ retint_kernel: + GLOBAL(restore_regs_and_return_to_kernel) + #ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates kernel mode. */ +- testl $3, CS(%rsp) ++ testb $3, CS(%rsp) + jz 1f + ud2 + 1: diff --git a/queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch b/queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch new file mode 100644 index 00000000000..51a259229f3 --- /dev/null +++ b/queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch @@ -0,0 +1,60 @@ +From e53178328c9b96fbdbc719e78c93b5687ee007c3 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:02 -0700 +Subject: x86/entry/64: Shrink paranoid_exit_restore and make labels local + +From: Andy Lutomirski + +commit e53178328c9b96fbdbc719e78c93b5687ee007c3 upstream. + +paranoid_exit_restore was a copy of restore_regs_and_return_to_kernel. +Merge them and make the paranoid_exit internal labels local. + +Keeping .Lparanoid_exit makes the code a bit shorter because it +allows a 2-byte jnz instead of a 5-byte jnz. + +Saves 96 bytes of text. + +( This is still a bit suboptimal in a non-CONFIG_TRACE_IRQFLAGS + kernel, but fixing that would make the code rather messy. ) + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/510d66a1895cda9473c84b1086f0bb974f22de6a.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 13 +++++-------- + 1 file changed, 5 insertions(+), 8 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1124,17 +1124,14 @@ ENTRY(paranoid_exit) + DISABLE_INTERRUPTS(CLBR_ANY) + TRACE_IRQS_OFF_DEBUG + testl %ebx, %ebx /* swapgs needed? */ +- jnz paranoid_exit_no_swapgs ++ jnz .Lparanoid_exit_no_swapgs + TRACE_IRQS_IRETQ + SWAPGS_UNSAFE_STACK +- jmp paranoid_exit_restore +-paranoid_exit_no_swapgs: ++ jmp .Lparanoid_exit_restore ++.Lparanoid_exit_no_swapgs: + TRACE_IRQS_IRETQ_DEBUG +-paranoid_exit_restore: +- RESTORE_EXTRA_REGS +- RESTORE_C_REGS +- REMOVE_PT_GPREGS_FROM_STACK 8 +- INTERRUPT_RETURN ++.Lparanoid_exit_restore: ++ jmp restore_regs_and_return_to_kernel + END(paranoid_exit) + + /* diff --git a/queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch b/queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch new file mode 100644 index 00000000000..ac40b6a6c94 --- /dev/null +++ b/queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch @@ -0,0 +1,91 @@ +From e872045bfd9c465a8555bab4b8567d56a4d2d3bb Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:01 -0700 +Subject: x86/entry/64: Simplify reg restore code in the standard IRET paths + +From: Andy Lutomirski + +commit e872045bfd9c465a8555bab4b8567d56a4d2d3bb upstream. + +The old code restored all the registers with movq instead of pop. + +In theory, this was done because some CPUs have higher movq +throughput, but any gain there would be tiny and is almost certainly +outweighed by the higher text size. + +This saves 96 bytes of text. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/ad82520a207ccd851b04ba613f4f752b33ac05f7.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/calling.h | 21 +++++++++++++++++++++ + arch/x86/entry/entry_64.S | 12 ++++++------ + 2 files changed, 27 insertions(+), 6 deletions(-) + +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -152,6 +152,27 @@ For 32-bit we have the following convent + UNWIND_HINT_REGS offset=\offset extra=0 + .endm + ++ .macro POP_EXTRA_REGS ++ popq %r15 ++ popq %r14 ++ popq %r13 ++ popq %r12 ++ popq %rbp ++ popq %rbx ++ .endm ++ ++ .macro POP_C_REGS ++ popq %r11 ++ popq %r10 ++ popq %r9 ++ popq %r8 ++ popq %rax ++ popq %rcx ++ popq %rdx ++ popq %rsi ++ popq %rdi ++ .endm ++ + .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 + .if \rstor_r11 + movq 6*8(%rsp), %r11 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -619,9 +619,9 @@ GLOBAL(swapgs_restore_regs_and_return_to + 1: + #endif + SWAPGS +- RESTORE_EXTRA_REGS +- RESTORE_C_REGS +- REMOVE_PT_GPREGS_FROM_STACK 8 ++ POP_EXTRA_REGS ++ POP_C_REGS ++ addq $8, %rsp /* skip regs->orig_ax */ + INTERRUPT_RETURN + + +@@ -651,9 +651,9 @@ GLOBAL(restore_regs_and_return_to_kernel + ud2 + 1: + #endif +- RESTORE_EXTRA_REGS +- RESTORE_C_REGS +- REMOVE_PT_GPREGS_FROM_STACK 8 ++ POP_EXTRA_REGS ++ POP_C_REGS ++ addq $8, %rsp /* skip regs->orig_ax */ + INTERRUPT_RETURN + + ENTRY(native_iret) diff --git a/queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch b/queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch new file mode 100644 index 00000000000..0c2c9a9d7c2 --- /dev/null +++ b/queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch @@ -0,0 +1,121 @@ +From 26c4ef9c49d8a0341f6d97ce2cfdd55d1236ed29 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:58:59 -0700 +Subject: x86/entry/64: Split the IRET-to-user and IRET-to-kernel paths + +From: Andy Lutomirski + +commit 26c4ef9c49d8a0341f6d97ce2cfdd55d1236ed29 upstream. + +These code paths will diverge soon. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/dccf8c7b3750199b4b30383c812d4e2931811509.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 34 +++++++++++++++++++++++++--------- + arch/x86/entry/entry_64_compat.S | 2 +- + arch/x86/kernel/head_64.S | 2 +- + 3 files changed, 27 insertions(+), 11 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -322,7 +322,7 @@ syscall_return_via_sysret: + + opportunistic_sysret_failed: + SWAPGS +- jmp restore_regs_and_iret ++ jmp restore_regs_and_return_to_usermode + END(entry_SYSCALL_64) + + ENTRY(stub_ptregs_64) +@@ -424,7 +424,7 @@ ENTRY(ret_from_fork) + call syscall_return_slowpath /* returns with IRQs disabled */ + TRACE_IRQS_ON /* user mode is traced as IRQS on */ + SWAPGS +- jmp restore_regs_and_iret ++ jmp restore_regs_and_return_to_usermode + + 1: + /* kernel thread */ +@@ -613,7 +613,20 @@ GLOBAL(retint_user) + call prepare_exit_to_usermode + TRACE_IRQS_IRETQ + SWAPGS +- jmp restore_regs_and_iret ++ ++GLOBAL(restore_regs_and_return_to_usermode) ++#ifdef CONFIG_DEBUG_ENTRY ++ /* Assert that pt_regs indicates user mode. */ ++ testl $3, CS(%rsp) ++ jnz 1f ++ ud2 ++1: ++#endif ++ RESTORE_EXTRA_REGS ++ RESTORE_C_REGS ++ REMOVE_PT_GPREGS_FROM_STACK 8 ++ INTERRUPT_RETURN ++ + + /* Returning to kernel space */ + retint_kernel: +@@ -633,11 +646,14 @@ retint_kernel: + */ + TRACE_IRQS_IRETQ + +-/* +- * At this label, code paths which return to kernel and to user, +- * which come from interrupts/exception and from syscalls, merge. +- */ +-GLOBAL(restore_regs_and_iret) ++GLOBAL(restore_regs_and_return_to_kernel) ++#ifdef CONFIG_DEBUG_ENTRY ++ /* Assert that pt_regs indicates kernel mode. */ ++ testl $3, CS(%rsp) ++ jz 1f ++ ud2 ++1: ++#endif + RESTORE_EXTRA_REGS + RESTORE_C_REGS + REMOVE_PT_GPREGS_FROM_STACK 8 +@@ -1328,7 +1344,7 @@ ENTRY(nmi) + * work, because we don't want to enable interrupts. + */ + SWAPGS +- jmp restore_regs_and_iret ++ jmp restore_regs_and_return_to_usermode + + .Lnmi_from_kernel: + /* +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -338,7 +338,7 @@ ENTRY(entry_INT80_compat) + /* Go back to user mode. */ + TRACE_IRQS_ON + SWAPGS +- jmp restore_regs_and_iret ++ jmp restore_regs_and_return_to_usermode + END(entry_INT80_compat) + + ENTRY(stub32_clone) +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -328,7 +328,7 @@ early_idt_handler_common: + + 20: + decl early_recursion_flag(%rip) +- jmp restore_regs_and_iret ++ jmp restore_regs_and_return_to_kernel + END(early_idt_handler_common) + + __INITDATA diff --git a/queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch b/queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch new file mode 100644 index 00000000000..e83062c479b --- /dev/null +++ b/queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch @@ -0,0 +1,91 @@ +From 20bb83443ea79087b5e5f8dab4e9d80bb9bf7acb Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:13 -0700 +Subject: x86/entry/64: Stop initializing TSS.sp0 at boot + +From: Andy Lutomirski + +commit 20bb83443ea79087b5e5f8dab4e9d80bb9bf7acb upstream. + +In my quest to get rid of thread_struct::sp0, I want to clean up or +remove all of its readers. Two of them are in cpu_init() (32-bit and +64-bit), and they aren't needed. This is because we never enter +userspace at all on the threads that CPUs are initialized in. + +Poison the initial TSS.sp0 and stop initializing it on CPU init. + +The comment text mostly comes from Dave Hansen. Thanks! + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/ee4a00540ad28c6cff475fbcc7769a4460acc861.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/common.c | 13 ++++++++++--- + arch/x86/kernel/process.c | 8 +++++++- + 2 files changed, 17 insertions(+), 4 deletions(-) + +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1570,9 +1570,13 @@ void cpu_init(void) + initialize_tlbstate_and_flush(); + enter_lazy_tlb(&init_mm, me); + +- load_sp0(current->thread.sp0); ++ /* ++ * Initialize the TSS. Don't bother initializing sp0, as the initial ++ * task never enters user mode. ++ */ + set_tss_desc(cpu, t); + load_TR_desc(); ++ + load_mm_ldt(&init_mm); + + clear_all_debug_regs(); +@@ -1594,7 +1598,6 @@ void cpu_init(void) + int cpu = smp_processor_id(); + struct task_struct *curr = current; + struct tss_struct *t = &per_cpu(cpu_tss, cpu); +- struct thread_struct *thread = &curr->thread; + + wait_for_master_cpu(cpu); + +@@ -1625,9 +1628,13 @@ void cpu_init(void) + initialize_tlbstate_and_flush(); + enter_lazy_tlb(&init_mm, curr); + +- load_sp0(thread->sp0); ++ /* ++ * Initialize the TSS. Don't bother initializing sp0, as the initial ++ * task never enters user mode. ++ */ + set_tss_desc(cpu, t); + load_TR_desc(); ++ + load_mm_ldt(&init_mm); + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -49,7 +49,13 @@ + */ + __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { + .x86_tss = { +- .sp0 = TOP_OF_INIT_STACK, ++ /* ++ * .sp0 is only used when entering ring 0 from a lower ++ * privilege level. Since the init task never runs anything ++ * but ring 0 code, there is no need for a valid value here. ++ * Poison it. ++ */ ++ .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, + #ifdef CONFIG_X86_32 + .ss0 = __KERNEL_DS, + .ss1 = __KERNEL_CS, diff --git a/queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch b/queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch new file mode 100644 index 00000000000..846718f6543 --- /dev/null +++ b/queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch @@ -0,0 +1,276 @@ +From 7f2590a110b837af5679d08fc25c6227c5a8c497 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:23 +0100 +Subject: x86/entry/64: Use a per-CPU trampoline stack for IDT entries + +From: Andy Lutomirski + +commit 7f2590a110b837af5679d08fc25c6227c5a8c497 upstream. + +Historically, IDT entries from usermode have always gone directly +to the running task's kernel stack. Rearrange it so that we enter on +a per-CPU trampoline stack and then manually switch to the task's stack. +This touches a couple of extra cachelines, but it gives us a chance +to run some code before we touch the kernel stack. + +The asm isn't exactly beautiful, but I think that fully refactoring +it can wait. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Reviewed-by: Thomas Gleixner +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.225330557@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 67 +++++++++++++++++++++++++++++---------- + arch/x86/entry/entry_64_compat.S | 5 ++ + arch/x86/include/asm/switch_to.h | 4 +- + arch/x86/include/asm/traps.h | 1 + arch/x86/kernel/cpu/common.c | 6 ++- + arch/x86/kernel/traps.c | 21 ++++++------ + 6 files changed, 72 insertions(+), 32 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -560,6 +560,13 @@ END(irq_entries_start) + /* 0(%rsp): ~(interrupt number) */ + .macro interrupt func + cld ++ ++ testb $3, CS-ORIG_RAX(%rsp) ++ jz 1f ++ SWAPGS ++ call switch_to_thread_stack ++1: ++ + ALLOC_PT_GPREGS_ON_STACK + SAVE_C_REGS + SAVE_EXTRA_REGS +@@ -569,12 +576,8 @@ END(irq_entries_start) + jz 1f + + /* +- * IRQ from user mode. Switch to kernel gsbase and inform context +- * tracking that we're in kernel mode. +- */ +- SWAPGS +- +- /* ++ * IRQ from user mode. ++ * + * We need to tell lockdep that IRQs are off. We can't do this until + * we fix gsbase, and we should do it before enter_from_user_mode + * (which can take locks). Since TRACE_IRQS_OFF idempotent, +@@ -828,6 +831,32 @@ apicinterrupt IRQ_WORK_VECTOR irq_work + */ + #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) + ++/* ++ * Switch to the thread stack. This is called with the IRET frame and ++ * orig_ax on the stack. (That is, RDI..R12 are not on the stack and ++ * space has not been allocated for them.) ++ */ ++ENTRY(switch_to_thread_stack) ++ UNWIND_HINT_FUNC ++ ++ pushq %rdi ++ movq %rsp, %rdi ++ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp ++ UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI ++ ++ pushq 7*8(%rdi) /* regs->ss */ ++ pushq 6*8(%rdi) /* regs->rsp */ ++ pushq 5*8(%rdi) /* regs->eflags */ ++ pushq 4*8(%rdi) /* regs->cs */ ++ pushq 3*8(%rdi) /* regs->ip */ ++ pushq 2*8(%rdi) /* regs->orig_ax */ ++ pushq 8(%rdi) /* return address */ ++ UNWIND_HINT_FUNC ++ ++ movq (%rdi), %rdi ++ ret ++END(switch_to_thread_stack) ++ + .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 + ENTRY(\sym) + UNWIND_HINT_IRET_REGS offset=\has_error_code*8 +@@ -845,11 +874,12 @@ ENTRY(\sym) + + ALLOC_PT_GPREGS_ON_STACK + +- .if \paranoid +- .if \paranoid == 1 ++ .if \paranoid < 2 + testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ +- jnz 1f ++ jnz .Lfrom_usermode_switch_stack_\@ + .endif ++ ++ .if \paranoid + call paranoid_entry + .else + call error_entry +@@ -891,20 +921,15 @@ ENTRY(\sym) + jmp error_exit + .endif + +- .if \paranoid == 1 ++ .if \paranoid < 2 + /* +- * Paranoid entry from userspace. Switch stacks and treat it ++ * Entry from userspace. Switch stacks and treat it + * as a normal entry. This means that paranoid handlers + * run in real process context if user_mode(regs). + */ +-1: ++.Lfrom_usermode_switch_stack_\@: + call error_entry + +- +- movq %rsp, %rdi /* pt_regs pointer */ +- call sync_regs +- movq %rax, %rsp /* switch stack */ +- + movq %rsp, %rdi /* pt_regs pointer */ + + .if \has_error_code +@@ -1165,6 +1190,14 @@ ENTRY(error_entry) + SWAPGS + + .Lerror_entry_from_usermode_after_swapgs: ++ /* Put us onto the real thread stack. */ ++ popq %r12 /* save return addr in %12 */ ++ movq %rsp, %rdi /* arg0 = pt_regs pointer */ ++ call sync_regs ++ movq %rax, %rsp /* switch stack */ ++ ENCODE_FRAME_POINTER ++ pushq %r12 ++ + /* + * We need to tell lockdep that IRQs are off. We can't do this until + * we fix gsbase, and we should do it before enter_from_user_mode +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat) + */ + movl %eax, %eax + +- /* Construct struct pt_regs on stack (iret frame is already on stack) */ + pushq %rax /* pt_regs->orig_ax */ ++ ++ /* switch to thread stack expects orig_ax to be pushed */ ++ call switch_to_thread_stack ++ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ +--- a/arch/x86/include/asm/switch_to.h ++++ b/arch/x86/include/asm/switch_to.h +@@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(s + /* This is used when switching tasks or entering/exiting vm86 mode. */ + static inline void update_sp0(struct task_struct *task) + { ++ /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */ + #ifdef CONFIG_X86_32 + load_sp0(task->thread.sp0); + #else +- load_sp0(task_top_of_stack(task)); ++ if (static_cpu_has(X86_FEATURE_XENPV)) ++ load_sp0(task_top_of_stack(task)); + #endif + } + +--- a/arch/x86/include/asm/traps.h ++++ b/arch/x86/include/asm/traps.h +@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_presen + dotraplinkage void do_stack_segment(struct pt_regs *, long); + #ifdef CONFIG_X86_64 + dotraplinkage void do_double_fault(struct pt_regs *, long); +-asmlinkage struct pt_regs *sync_regs(struct pt_regs *); + #endif + dotraplinkage void do_general_protection(struct pt_regs *, long); + dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1623,11 +1623,13 @@ void cpu_init(void) + setup_cpu_entry_area(cpu); + + /* +- * Initialize the TSS. Don't bother initializing sp0, as the initial +- * task never enters user mode. ++ * Initialize the TSS. sp0 points to the entry trampoline stack ++ * regardless of what task is running. + */ + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); + load_TR_desc(); ++ load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss + ++ offsetofend(struct tss_struct, SYSENTER_stack)); + + load_mm_ldt(&init_mm); + +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -619,14 +619,15 @@ NOKPROBE_SYMBOL(do_int3); + + #ifdef CONFIG_X86_64 + /* +- * Help handler running on IST stack to switch off the IST stack if the +- * interrupted code was in user mode. The actual stack switch is done in +- * entry_64.S ++ * Help handler running on a per-cpu (IST or entry trampoline) stack ++ * to switch to the normal thread stack if the interrupted code was in ++ * user mode. The actual stack switch is done in entry_64.S + */ + asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) + { +- struct pt_regs *regs = task_pt_regs(current); +- *regs = *eregs; ++ struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; ++ if (regs != eregs) ++ *regs = *eregs; + return regs; + } + NOKPROBE_SYMBOL(sync_regs); +@@ -642,13 +643,13 @@ struct bad_iret_stack *fixup_bad_iret(st + /* + * This is called from entry_64.S early in handling a fault + * caused by a bad iret to user mode. To handle the fault +- * correctly, we want move our stack frame to task_pt_regs +- * and we want to pretend that the exception came from the +- * iret target. ++ * correctly, we want to move our stack frame to where it would ++ * be had we entered directly on the entry stack (rather than ++ * just below the IRET frame) and we want to pretend that the ++ * exception came from the IRET target. + */ + struct bad_iret_stack *new_stack = +- container_of(task_pt_regs(current), +- struct bad_iret_stack, regs); ++ (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1; + + /* Copy the IRET target to the new stack. */ + memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); diff --git a/queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch b/queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch new file mode 100644 index 00000000000..5a0bf84af55 --- /dev/null +++ b/queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch @@ -0,0 +1,47 @@ +From 471ee4832209e986029b9fabdaad57b1eecb856b Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:05 -0700 +Subject: x86/entry/64: Use POP instead of MOV to restore regs on NMI return + +From: Andy Lutomirski + +commit 471ee4832209e986029b9fabdaad57b1eecb856b upstream. + +This gets rid of the last user of the old RESTORE_..._REGS infrastructure. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/652a260f17a160789bc6a41d997f98249b73e2ab.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1560,11 +1560,14 @@ end_repeat_nmi: + nmi_swapgs: + SWAPGS_UNSAFE_STACK + nmi_restore: +- RESTORE_EXTRA_REGS +- RESTORE_C_REGS ++ POP_EXTRA_REGS ++ POP_C_REGS + +- /* Point RSP at the "iret" frame. */ +- REMOVE_PT_GPREGS_FROM_STACK 6*8 ++ /* ++ * Skip orig_ax and the "outermost" frame to point RSP at the "iret" ++ * at the "iret" frame. ++ */ ++ addq $6*8, %rsp + + /* + * Clear "NMI executing". Set DF first so that we can easily diff --git a/queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch b/queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch new file mode 100644 index 00000000000..8f05e12bedf --- /dev/null +++ b/queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch @@ -0,0 +1,51 @@ +From 4fbb39108f972437c44e5ffa781b56635d496826 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:03 -0700 +Subject: x86/entry/64: Use pop instead of movq in syscall_return_via_sysret + +From: Andy Lutomirski + +commit 4fbb39108f972437c44e5ffa781b56635d496826 upstream. + +Saves 64 bytes. + +Signed-off-by: Andy Lutomirski +Reviewed-by: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/6609b7f74ab31c36604ad746e019ea8495aec76c.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -316,10 +316,18 @@ return_from_SYSCALL_64: + */ + syscall_return_via_sysret: + /* rcx and r11 are already restored (see code above) */ +- RESTORE_EXTRA_REGS +- RESTORE_C_REGS_EXCEPT_RCX_R11 +- movq RSP(%rsp), %rsp + UNWIND_HINT_EMPTY ++ POP_EXTRA_REGS ++ popq %rsi /* skip r11 */ ++ popq %r10 ++ popq %r9 ++ popq %r8 ++ popq %rax ++ popq %rsi /* skip rcx */ ++ popq %rdx ++ popq %rsi ++ popq %rdi ++ movq RSP-ORIG_RAX(%rsp), %rsp + USERGS_SYSRET64 + END(entry_SYSCALL_64) + diff --git a/queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch b/queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch new file mode 100644 index 00000000000..04abd3a811a --- /dev/null +++ b/queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch @@ -0,0 +1,38 @@ +From 3500130b84a3cdc5b6796eba1daf178944935efe Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:11 -0700 +Subject: x86/entry: Add task_top_of_stack() to find the top of a task's stack + +From: Andy Lutomirski + +commit 3500130b84a3cdc5b6796eba1daf178944935efe upstream. + +This will let us get rid of a few places that hardcode accesses to +thread.sp0. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/b49b3f95a8ff858c40c9b0f5b32be0355324327d.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/processor.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -796,6 +796,8 @@ static inline void spin_lock_prefetch(co + #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ + TOP_OF_KERNEL_STACK_PADDING) + ++#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) ++ + #ifdef CONFIG_X86_32 + /* + * User space process size: 3GB (default). diff --git a/queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch b/queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch new file mode 100644 index 00000000000..9aad70fd6f1 --- /dev/null +++ b/queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch @@ -0,0 +1,184 @@ +From 0f9a48100fba3f189724ae88a450c2261bf91c80 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:28 +0100 +Subject: x86/entry: Clean up the SYSENTER_stack code + +From: Andy Lutomirski + +commit 0f9a48100fba3f189724ae88a450c2261bf91c80 upstream. + +The existing code was a mess, mainly because C arrays are nasty. Turn +SYSENTER_stack into a struct, add a helper to find it, and do all the +obvious cleanups this enables. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.653244723@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_32.S | 4 ++-- + arch/x86/entry/entry_64.S | 2 +- + arch/x86/include/asm/fixmap.h | 5 +++++ + arch/x86/include/asm/processor.h | 6 +++++- + arch/x86/kernel/asm-offsets.c | 6 ++---- + arch/x86/kernel/cpu/common.c | 14 +++----------- + arch/x86/kernel/dumpstack.c | 7 +++---- + 7 files changed, 21 insertions(+), 23 deletions(-) + +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -942,7 +942,7 @@ ENTRY(debug) + + /* Are we currently on the SYSENTER stack? */ + movl PER_CPU_VAR(cpu_entry_area), %ecx +- addl $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx ++ addl $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx + subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ + cmpl $SIZEOF_SYSENTER_stack, %ecx + jb .Ldebug_from_sysenter_stack +@@ -986,7 +986,7 @@ ENTRY(nmi) + + /* Are we currently on the SYSENTER stack? */ + movl PER_CPU_VAR(cpu_entry_area), %ecx +- addl $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx ++ addl $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx + subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ + cmpl $SIZEOF_SYSENTER_stack, %ecx + jb .Lnmi_from_sysenter_stack +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -154,7 +154,7 @@ END(native_usergs_sysret64) + _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) + + /* The top word of the SYSENTER stack is hot and is usable as scratch space. */ +-#define RSP_SCRATCH CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \ ++#define RSP_SCRATCH CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + \ + SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA + + ENTRY(entry_SYSCALL_64_trampoline) +--- a/arch/x86/include/asm/fixmap.h ++++ b/arch/x86/include/asm/fixmap.h +@@ -245,5 +245,10 @@ static inline struct cpu_entry_area *get + return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); + } + ++static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu) ++{ ++ return &get_cpu_entry_area(cpu)->tss.SYSENTER_stack; ++} ++ + #endif /* !__ASSEMBLY__ */ + #endif /* _ASM_X86_FIXMAP_H */ +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -336,12 +336,16 @@ struct x86_hw_tss { + #define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) + #define INVALID_IO_BITMAP_OFFSET 0x8000 + ++struct SYSENTER_stack { ++ unsigned long words[64]; ++}; ++ + struct tss_struct { + /* + * Space for the temporary SYSENTER stack, used for SYSENTER + * and the entry trampoline as well. + */ +- unsigned long SYSENTER_stack[64]; ++ struct SYSENTER_stack SYSENTER_stack; + + /* + * The fixed hardware portion. This must not cross a page boundary +--- a/arch/x86/kernel/asm-offsets.c ++++ b/arch/x86/kernel/asm-offsets.c +@@ -94,10 +94,8 @@ void common(void) { + BLANK(); + DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + +- /* Offset from cpu_tss to SYSENTER_stack */ +- OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); +- /* Size of SYSENTER_stack */ +- DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); ++ OFFSET(TSS_STRUCT_SYSENTER_stack, tss_struct, SYSENTER_stack); ++ DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); + + /* Layout info for cpu_entry_area */ + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1314,12 +1314,7 @@ void enable_sep_cpu(void) + + tss->x86_tss.ss1 = __KERNEL_CS; + wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); +- +- wrmsr(MSR_IA32_SYSENTER_ESP, +- (unsigned long)&get_cpu_entry_area(cpu)->tss + +- offsetofend(struct tss_struct, SYSENTER_stack), +- 0); +- ++ wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0); + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); + + put_cpu(); +@@ -1436,9 +1431,7 @@ void syscall_init(void) + * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). + */ + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); +- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, +- (unsigned long)&get_cpu_entry_area(cpu)->tss + +- offsetofend(struct tss_struct, SYSENTER_stack)); ++ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); + #else + wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); +@@ -1653,8 +1646,7 @@ void cpu_init(void) + */ + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); + load_TR_desc(); +- load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss + +- offsetofend(struct tss_struct, SYSENTER_stack)); ++ load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); + + load_mm_ldt(&init_mm); + +--- a/arch/x86/kernel/dumpstack.c ++++ b/arch/x86/kernel/dumpstack.c +@@ -45,11 +45,10 @@ bool in_task_stack(unsigned long *stack, + + bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) + { +- int cpu = smp_processor_id(); +- struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss; ++ struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id()); + +- void *begin = &tss->SYSENTER_stack; +- void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack); ++ void *begin = ss; ++ void *end = ss + 1; + + if ((void *)stack < begin || (void *)stack >= end) + return false; diff --git a/queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch b/queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch new file mode 100644 index 00000000000..eac247e04d6 --- /dev/null +++ b/queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch @@ -0,0 +1,205 @@ +From 7fb983b4dd569e08564134a850dfd4eb1c63d9b8 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:17 +0100 +Subject: x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss + +From: Andy Lutomirski + +commit 7fb983b4dd569e08564134a850dfd4eb1c63d9b8 upstream. + +A future patch will move SYSENTER_stack to the beginning of cpu_tss +to help detect overflow. Before this can happen, fix several code +paths that hardcode assumptions about the old layout. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Reviewed-by: Dave Hansen +Reviewed-by: Thomas Gleixner +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.722425540@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/desc.h | 2 +- + arch/x86/include/asm/processor.h | 9 +++++++-- + arch/x86/kernel/cpu/common.c | 8 ++++---- + arch/x86/kernel/doublefault.c | 32 +++++++++++++++----------------- + arch/x86/kvm/vmx.c | 2 +- + arch/x86/power/cpu.c | 13 +++++++------ + 6 files changed, 35 insertions(+), 31 deletions(-) + +--- a/arch/x86/include/asm/desc.h ++++ b/arch/x86/include/asm/desc.h +@@ -178,7 +178,7 @@ static inline void set_tssldt_descriptor + #endif + } + +-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) ++static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr) + { + struct desc_struct *d = get_cpu_gdt_rw(cpu); + tss_desc tss; +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -162,7 +162,7 @@ enum cpuid_regs_idx { + extern struct cpuinfo_x86 boot_cpu_data; + extern struct cpuinfo_x86 new_cpu_data; + +-extern struct tss_struct doublefault_tss; ++extern struct x86_hw_tss doublefault_tss; + extern __u32 cpu_caps_cleared[NCAPINTS]; + extern __u32 cpu_caps_set[NCAPINTS]; + +@@ -252,6 +252,11 @@ static inline void load_cr3(pgd_t *pgdir + write_cr3(__sme_pa(pgdir)); + } + ++/* ++ * Note that while the legacy 'TSS' name comes from 'Task State Segment', ++ * on modern x86 CPUs the TSS also holds information important to 64-bit mode, ++ * unrelated to the task-switch mechanism: ++ */ + #ifdef CONFIG_X86_32 + /* This is the TSS defined by the hardware. */ + struct x86_hw_tss { +@@ -322,7 +327,7 @@ struct x86_hw_tss { + #define IO_BITMAP_BITS 65536 + #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) + #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) +-#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) ++#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) + #define INVALID_IO_BITMAP_OFFSET 0x8000 + + struct tss_struct { +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1557,7 +1557,7 @@ void cpu_init(void) + } + } + +- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); ++ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; + + /* + * <= is required because the CPU will access up to +@@ -1576,7 +1576,7 @@ void cpu_init(void) + * Initialize the TSS. Don't bother initializing sp0, as the initial + * task never enters user mode. + */ +- set_tss_desc(cpu, t); ++ set_tss_desc(cpu, &t->x86_tss); + load_TR_desc(); + + load_mm_ldt(&init_mm); +@@ -1634,12 +1634,12 @@ void cpu_init(void) + * Initialize the TSS. Don't bother initializing sp0, as the initial + * task never enters user mode. + */ +- set_tss_desc(cpu, t); ++ set_tss_desc(cpu, &t->x86_tss); + load_TR_desc(); + + load_mm_ldt(&init_mm); + +- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); ++ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; + + #ifdef CONFIG_DOUBLEFAULT + /* Set up doublefault TSS pointer in the GDT */ +--- a/arch/x86/kernel/doublefault.c ++++ b/arch/x86/kernel/doublefault.c +@@ -50,25 +50,23 @@ static void doublefault_fn(void) + cpu_relax(); + } + +-struct tss_struct doublefault_tss __cacheline_aligned = { +- .x86_tss = { +- .sp0 = STACK_START, +- .ss0 = __KERNEL_DS, +- .ldt = 0, +- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, ++struct x86_hw_tss doublefault_tss __cacheline_aligned = { ++ .sp0 = STACK_START, ++ .ss0 = __KERNEL_DS, ++ .ldt = 0, ++ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, + +- .ip = (unsigned long) doublefault_fn, +- /* 0x2 bit is always set */ +- .flags = X86_EFLAGS_SF | 0x2, +- .sp = STACK_START, +- .es = __USER_DS, +- .cs = __KERNEL_CS, +- .ss = __KERNEL_DS, +- .ds = __USER_DS, +- .fs = __KERNEL_PERCPU, ++ .ip = (unsigned long) doublefault_fn, ++ /* 0x2 bit is always set */ ++ .flags = X86_EFLAGS_SF | 0x2, ++ .sp = STACK_START, ++ .es = __USER_DS, ++ .cs = __KERNEL_CS, ++ .ss = __KERNEL_DS, ++ .ds = __USER_DS, ++ .fs = __KERNEL_PERCPU, + +- .__cr3 = __pa_nodebug(swapper_pg_dir), +- } ++ .__cr3 = __pa_nodebug(swapper_pg_dir), + }; + + /* dummy for do_double_fault() call */ +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2295,7 +2295,7 @@ static void vmx_vcpu_load(struct kvm_vcp + * processors. See 22.2.4. + */ + vmcs_writel(HOST_TR_BASE, +- (unsigned long)this_cpu_ptr(&cpu_tss)); ++ (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss)); + vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ + + /* +--- a/arch/x86/power/cpu.c ++++ b/arch/x86/power/cpu.c +@@ -165,12 +165,13 @@ static void fix_processor_context(void) + struct desc_struct *desc = get_cpu_gdt_rw(cpu); + tss_desc tss; + #endif +- set_tss_desc(cpu, t); /* +- * This just modifies memory; should not be +- * necessary. But... This is necessary, because +- * 386 hardware has concept of busy TSS or some +- * similar stupidity. +- */ ++ ++ /* ++ * This just modifies memory; should not be necessary. But... This is ++ * necessary, because 386 hardware has concept of busy TSS or some ++ * similar stupidity. ++ */ ++ set_tss_desc(cpu, &t->x86_tss); + + #ifdef CONFIG_X86_64 + memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); diff --git a/queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch b/queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch new file mode 100644 index 00000000000..236521fda2c --- /dev/null +++ b/queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch @@ -0,0 +1,61 @@ +From aaeed3aeb39c1ba69f0a49baec8cb728121d0a91 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:14 +0100 +Subject: x86/entry/gdt: Put per-CPU GDT remaps in ascending order + +From: Andy Lutomirski + +commit aaeed3aeb39c1ba69f0a49baec8cb728121d0a91 upstream. + +We currently have CPU 0's GDT at the top of the GDT range and +higher-numbered CPUs at lower addresses. This happens because the +fixmap is upside down (index 0 is the top of the fixmap). + +Flip it so that GDTs are in ascending order by virtual address. +This will simplify a future patch that will generalize the GDT +remap to contain multiple pages. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Reviewed-by: Thomas Gleixner +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.471561421@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/desc.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/include/asm/desc.h ++++ b/arch/x86/include/asm/desc.h +@@ -63,7 +63,7 @@ static inline struct desc_struct *get_cu + /* Get the fixmap index for a specific processor */ + static inline unsigned int get_cpu_gdt_ro_index(int cpu) + { +- return FIX_GDT_REMAP_BEGIN + cpu; ++ return FIX_GDT_REMAP_END - cpu; + } + + /* Provide the fixmap address of the remapped GDT */ diff --git a/queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch b/queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch new file mode 100644 index 00000000000..63c6988a596 --- /dev/null +++ b/queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch @@ -0,0 +1,118 @@ +From 1a935bc3d4ea61556461a9e92a68ca3556232efd Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:19 +0100 +Subject: x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct + +From: Andy Lutomirski + +commit 1a935bc3d4ea61556461a9e92a68ca3556232efd upstream. + +SYSENTER_stack should have reliable overflow detection, which +means that it needs to be at the bottom of a page, not the top. +Move it to the beginning of struct tss_struct and page-align it. + +Also add an assertion to make sure that the fixed hardware TSS +doesn't cross a page boundary. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.881827433@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/processor.h | 21 ++++++++++++--------- + arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++ + 2 files changed, 33 insertions(+), 9 deletions(-) + +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -332,7 +332,16 @@ struct x86_hw_tss { + + struct tss_struct { + /* +- * The hardware state: ++ * Space for the temporary SYSENTER stack, used for SYSENTER ++ * and the entry trampoline as well. ++ */ ++ unsigned long SYSENTER_stack_canary; ++ unsigned long SYSENTER_stack[64]; ++ ++ /* ++ * The fixed hardware portion. This must not cross a page boundary ++ * at risk of violating the SDM's advice and potentially triggering ++ * errata. + */ + struct x86_hw_tss x86_tss; + +@@ -343,15 +352,9 @@ struct tss_struct { + * be within the limit. + */ + unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; ++} __aligned(PAGE_SIZE); + +- /* +- * Space for the temporary SYSENTER stack. +- */ +- unsigned long SYSENTER_stack_canary; +- unsigned long SYSENTER_stack[64]; +-} ____cacheline_aligned; +- +-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); ++DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss); + + /* + * sizeof(unsigned long) coming from an extra "long" at the end +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -487,6 +487,27 @@ static inline void setup_cpu_entry_area( + #endif + + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); ++ ++ /* ++ * The Intel SDM says (Volume 3, 7.2.1): ++ * ++ * Avoid placing a page boundary in the part of the TSS that the ++ * processor reads during a task switch (the first 104 bytes). The ++ * processor may not correctly perform address translations if a ++ * boundary occurs in this area. During a task switch, the processor ++ * reads and writes into the first 104 bytes of each TSS (using ++ * contiguous physical addresses beginning with the physical address ++ * of the first byte of the TSS). So, after TSS access begins, if ++ * part of the 104 bytes is not physically contiguous, the processor ++ * will access incorrect information without generating a page-fault ++ * exception. ++ * ++ * There are also a lot of errata involving the TSS spanning a page ++ * boundary. Assert that we're not doing that. ++ */ ++ BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ ++ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); ++ + } + + /* Load the original GDT from the per-cpu structure */ diff --git a/queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch b/queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch new file mode 100644 index 00000000000..f1772adf27b --- /dev/null +++ b/queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch @@ -0,0 +1,265 @@ +From 72f5e08dbba2d01aa90b592cf76c378ea233b00b Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:20 +0100 +Subject: x86/entry: Remap the TSS into the CPU entry area + +From: Andy Lutomirski + +commit 72f5e08dbba2d01aa90b592cf76c378ea233b00b upstream. + +This has a secondary purpose: it puts the entry stack into a region +with a well-controlled layout. A subsequent patch will take +advantage of this to streamline the SYSCALL entry code to be able to +find it more easily. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.962042855@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_32.S | 6 ++++-- + arch/x86/include/asm/fixmap.h | 7 +++++++ + arch/x86/kernel/asm-offsets.c | 3 +++ + arch/x86/kernel/cpu/common.c | 41 +++++++++++++++++++++++++++++++++++------ + arch/x86/kernel/dumpstack.c | 3 ++- + arch/x86/kvm/vmx.c | 2 +- + arch/x86/power/cpu.c | 11 ++++++----- + 7 files changed, 58 insertions(+), 15 deletions(-) + +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -941,7 +941,8 @@ ENTRY(debug) + movl %esp, %eax # pt_regs pointer + + /* Are we currently on the SYSENTER stack? */ +- PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) ++ movl PER_CPU_VAR(cpu_entry_area), %ecx ++ addl $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx + subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ + cmpl $SIZEOF_SYSENTER_stack, %ecx + jb .Ldebug_from_sysenter_stack +@@ -984,7 +985,8 @@ ENTRY(nmi) + movl %esp, %eax # pt_regs pointer + + /* Are we currently on the SYSENTER stack? */ +- PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) ++ movl PER_CPU_VAR(cpu_entry_area), %ecx ++ addl $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx + subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ + cmpl $SIZEOF_SYSENTER_stack, %ecx + jb .Lnmi_from_sysenter_stack +--- a/arch/x86/include/asm/fixmap.h ++++ b/arch/x86/include/asm/fixmap.h +@@ -54,6 +54,13 @@ extern unsigned long __FIXADDR_TOP; + */ + struct cpu_entry_area { + char gdt[PAGE_SIZE]; ++ ++ /* ++ * The GDT is just below cpu_tss and thus serves (on x86_64) as a ++ * a read-only guard page for the SYSENTER stack at the bottom ++ * of the TSS region. ++ */ ++ struct tss_struct tss; + }; + + #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) +--- a/arch/x86/kernel/asm-offsets.c ++++ b/arch/x86/kernel/asm-offsets.c +@@ -98,4 +98,7 @@ void common(void) { + OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); + /* Size of SYSENTER_stack */ + DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); ++ ++ /* Layout info for cpu_entry_area */ ++ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); + } +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -466,6 +466,22 @@ void load_percpu_segment(int cpu) + load_stack_canary_segment(); + } + ++static void set_percpu_fixmap_pages(int fixmap_index, void *ptr, ++ int pages, pgprot_t prot) ++{ ++ int i; ++ ++ for (i = 0; i < pages; i++) { ++ __set_fixmap(fixmap_index - i, ++ per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot); ++ } ++} ++ ++#ifdef CONFIG_X86_32 ++/* The 32-bit entry code needs to find cpu_entry_area. */ ++DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); ++#endif ++ + /* Setup the fixmap mappings only once per-processor */ + static inline void setup_cpu_entry_area(int cpu) + { +@@ -507,7 +523,15 @@ static inline void setup_cpu_entry_area( + */ + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); ++ BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); ++ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), ++ &per_cpu(cpu_tss, cpu), ++ sizeof(struct tss_struct) / PAGE_SIZE, ++ PAGE_KERNEL); + ++#ifdef CONFIG_X86_32 ++ this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu)); ++#endif + } + + /* Load the original GDT from the per-cpu structure */ +@@ -1257,7 +1281,8 @@ void enable_sep_cpu(void) + wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); + + wrmsr(MSR_IA32_SYSENTER_ESP, +- (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), ++ (unsigned long)&get_cpu_entry_area(cpu)->tss + ++ offsetofend(struct tss_struct, SYSENTER_stack), + 0); + + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); +@@ -1370,6 +1395,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, + /* May not be marked __init: used by software suspend */ + void syscall_init(void) + { ++ int cpu = smp_processor_id(); ++ + wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); + wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); + +@@ -1383,7 +1410,7 @@ void syscall_init(void) + */ + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, +- (unsigned long)this_cpu_ptr(&cpu_tss) + ++ (unsigned long)&get_cpu_entry_area(cpu)->tss + + offsetofend(struct tss_struct, SYSENTER_stack)); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); + #else +@@ -1593,11 +1620,13 @@ void cpu_init(void) + initialize_tlbstate_and_flush(); + enter_lazy_tlb(&init_mm, me); + ++ setup_cpu_entry_area(cpu); ++ + /* + * Initialize the TSS. Don't bother initializing sp0, as the initial + * task never enters user mode. + */ +- set_tss_desc(cpu, &t->x86_tss); ++ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); + load_TR_desc(); + + load_mm_ldt(&init_mm); +@@ -1610,7 +1639,6 @@ void cpu_init(void) + if (is_uv_system()) + uv_cpu_init(); + +- setup_cpu_entry_area(cpu); + load_fixmap_gdt(cpu); + } + +@@ -1651,11 +1679,13 @@ void cpu_init(void) + initialize_tlbstate_and_flush(); + enter_lazy_tlb(&init_mm, curr); + ++ setup_cpu_entry_area(cpu); ++ + /* + * Initialize the TSS. Don't bother initializing sp0, as the initial + * task never enters user mode. + */ +- set_tss_desc(cpu, &t->x86_tss); ++ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); + load_TR_desc(); + + load_mm_ldt(&init_mm); +@@ -1672,7 +1702,6 @@ void cpu_init(void) + + fpu__init_cpu(); + +- setup_cpu_entry_area(cpu); + load_fixmap_gdt(cpu); + } + #endif +--- a/arch/x86/kernel/dumpstack.c ++++ b/arch/x86/kernel/dumpstack.c +@@ -45,7 +45,8 @@ bool in_task_stack(unsigned long *stack, + + bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) + { +- struct tss_struct *tss = this_cpu_ptr(&cpu_tss); ++ int cpu = smp_processor_id(); ++ struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss; + + /* Treat the canary as part of the stack for unwinding purposes. */ + void *begin = &tss->SYSENTER_stack_canary; +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2295,7 +2295,7 @@ static void vmx_vcpu_load(struct kvm_vcp + * processors. See 22.2.4. + */ + vmcs_writel(HOST_TR_BASE, +- (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss)); ++ (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); + vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ + + /* +--- a/arch/x86/power/cpu.c ++++ b/arch/x86/power/cpu.c +@@ -160,18 +160,19 @@ static void do_fpu_end(void) + static void fix_processor_context(void) + { + int cpu = smp_processor_id(); +- struct tss_struct *t = &per_cpu(cpu_tss, cpu); + #ifdef CONFIG_X86_64 + struct desc_struct *desc = get_cpu_gdt_rw(cpu); + tss_desc tss; + #endif + + /* +- * This just modifies memory; should not be necessary. But... This is +- * necessary, because 386 hardware has concept of busy TSS or some +- * similar stupidity. ++ * We need to reload TR, which requires that we change the ++ * GDT entry to indicate "available" first. ++ * ++ * XXX: This could probably all be replaced by a call to ++ * force_reload_TR(). + */ +- set_tss_desc(cpu, &t->x86_tss); ++ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); + + #ifdef CONFIG_X86_64 + memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); diff --git a/queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch b/queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch new file mode 100644 index 00000000000..12ff1412b73 --- /dev/null +++ b/queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch @@ -0,0 +1,114 @@ +From 6d9256f0a89eaff97fca6006100bcaea8d1d8bdb Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:22 +0100 +Subject: x86/espfix/64: Stop assuming that pt_regs is on the entry stack + +From: Andy Lutomirski + +commit 6d9256f0a89eaff97fca6006100bcaea8d1d8bdb upstream. + +When we start using an entry trampoline, a #GP from userspace will +be delivered on the entry stack, not on the task stack. Fix the +espfix64 #DF fixup to set up #GP according to TSS.SP0, rather than +assuming that pt_regs + 1 == SP0. This won't change anything +without an entry stack, but it will make the code continue to work +when an entry stack is added. + +While we're at it, improve the comments to explain what's actually +going on. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.130778051@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/traps.c | 37 ++++++++++++++++++++++++++++--------- + 1 file changed, 28 insertions(+), 9 deletions(-) + +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -348,9 +348,15 @@ dotraplinkage void do_double_fault(struc + + /* + * If IRET takes a non-IST fault on the espfix64 stack, then we +- * end up promoting it to a doublefault. In that case, modify +- * the stack to make it look like we just entered the #GP +- * handler from user space, similar to bad_iret. ++ * end up promoting it to a doublefault. In that case, take ++ * advantage of the fact that we're not using the normal (TSS.sp0) ++ * stack right now. We can write a fake #GP(0) frame at TSS.sp0 ++ * and then modify our own IRET frame so that, when we return, ++ * we land directly at the #GP(0) vector with the stack already ++ * set up according to its expectations. ++ * ++ * The net result is that our #GP handler will think that we ++ * entered from usermode with the bad user context. + * + * No need for ist_enter here because we don't use RCU. + */ +@@ -358,13 +364,26 @@ dotraplinkage void do_double_fault(struc + regs->cs == __KERNEL_CS && + regs->ip == (unsigned long)native_irq_return_iret) + { +- struct pt_regs *normal_regs = task_pt_regs(current); ++ struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1; + +- /* Fake a #GP(0) from userspace. */ +- memmove(&normal_regs->ip, (void *)regs->sp, 5*8); +- normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ ++ /* ++ * regs->sp points to the failing IRET frame on the ++ * ESPFIX64 stack. Copy it to the entry stack. This fills ++ * in gpregs->ss through gpregs->ip. ++ * ++ */ ++ memmove(&gpregs->ip, (void *)regs->sp, 5*8); ++ gpregs->orig_ax = 0; /* Missing (lost) #GP error code */ ++ ++ /* ++ * Adjust our frame so that we return straight to the #GP ++ * vector with the expected RSP value. This is safe because ++ * we won't enable interupts or schedule before we invoke ++ * general_protection, so nothing will clobber the stack ++ * frame we just set up. ++ */ + regs->ip = (unsigned long)general_protection; +- regs->sp = (unsigned long)&normal_regs->orig_ax; ++ regs->sp = (unsigned long)&gpregs->orig_ax; + + return; + } +@@ -389,7 +408,7 @@ dotraplinkage void do_double_fault(struc + * + * Processors update CR2 whenever a page fault is detected. If a + * second page fault occurs while an earlier page fault is being +- * deliv- ered, the faulting linear address of the second fault will ++ * delivered, the faulting linear address of the second fault will + * overwrite the contents of CR2 (replacing the previous + * address). These updates to CR2 occur even if the page fault + * results in a double fault or occurs during the delivery of a diff --git a/queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch b/queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch new file mode 100644 index 00000000000..0c74ef2dc7e --- /dev/null +++ b/queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch @@ -0,0 +1,60 @@ +From 4f3789e792296e21405f708cf3cb409d7c7d5683 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:11 +0100 +Subject: x86/irq/64: Print the offending IP in the stack overflow warning + +From: Andy Lutomirski + +commit 4f3789e792296e21405f708cf3cb409d7c7d5683 upstream. + +In case something goes wrong with unwind (not unlikely in case of +overflow), print the offending IP where we detected the overflow. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Reviewed-by: Thomas Gleixner +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.231677119@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/irq_64.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/irq_64.c ++++ b/arch/x86/kernel/irq_64.c +@@ -57,10 +57,10 @@ static inline void stack_overflow_check( + if (regs->sp >= estack_top && regs->sp <= estack_bottom) + return; + +- WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", ++ WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", + current->comm, curbase, regs->sp, + irq_stack_top, irq_stack_bottom, +- estack_top, estack_bottom); ++ estack_top, estack_bottom, (void *)regs->ip); + + if (sysctl_panic_on_stackoverflow) + panic("low stack detected by irq handler - check messages\n"); diff --git a/queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch b/queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch new file mode 100644 index 00000000000..5f5776525c2 --- /dev/null +++ b/queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch @@ -0,0 +1,65 @@ +From 6669a692605547892a026445e460bf233958bd7f Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:10 +0100 +Subject: x86/irq: Remove an old outdated comment about context tracking races + +From: Andy Lutomirski + +commit 6669a692605547892a026445e460bf233958bd7f upstream. + +That race has been fixed and code cleaned up for a while now. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Reviewed-by: Thomas Gleixner +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.150551639@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/irq.c | 12 ------------ + 1 file changed, 12 deletions(-) + +--- a/arch/x86/kernel/irq.c ++++ b/arch/x86/kernel/irq.c +@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IR + /* high bit used in ret_from_ code */ + unsigned vector = ~regs->orig_ax; + +- /* +- * NB: Unlike exception entries, IRQ entries do not reliably +- * handle context tracking in the low-level entry code. This is +- * because syscall entries execute briefly with IRQs on before +- * updating context tracking state, so we can take an IRQ from +- * kernel mode with CONTEXT_USER. The low-level entry code only +- * updates the context if we came from user mode, so we won't +- * switch to CONTEXT_KERNEL. We'll fix that once the syscall +- * code is cleaned up enough that we can cleanly defer enabling +- * IRQs. +- */ +- + entering_irq(); + + /* entering_irq() tells RCU that we're not quiescent. Check it. */ diff --git a/queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch b/queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch new file mode 100644 index 00000000000..01c6ed2290d --- /dev/null +++ b/queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch @@ -0,0 +1,82 @@ +From 21506525fb8ddb0342f2a2370812d47f6a1f3833 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:16 +0100 +Subject: x86/kasan/64: Teach KASAN about the cpu_entry_area + +From: Andy Lutomirski + +commit 21506525fb8ddb0342f2a2370812d47f6a1f3833 upstream. + +The cpu_entry_area will contain stacks. Make sure that KASAN has +appropriate shadow mappings for them. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Andrey Ryabinin +Signed-off-by: Thomas Gleixner +Cc: Alexander Potapenko +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Dmitry Vyukov +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: kasan-dev@googlegroups.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.642806442@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/kasan_init_64.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +--- a/arch/x86/mm/kasan_init_64.c ++++ b/arch/x86/mm/kasan_init_64.c +@@ -277,6 +277,7 @@ void __init kasan_early_init(void) + void __init kasan_init(void) + { + int i; ++ void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; + + #ifdef CONFIG_KASAN_INLINE + register_die_notifier(&kasan_die_notifier); +@@ -329,8 +330,23 @@ void __init kasan_init(void) + (unsigned long)kasan_mem_to_shadow(_end), + early_pfn_to_nid(__pa(_stext))); + ++ shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM); ++ shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); ++ shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin, ++ PAGE_SIZE); ++ ++ shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE); ++ shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); ++ shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end, ++ PAGE_SIZE); ++ + kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), +- (void *)KASAN_SHADOW_END); ++ shadow_cpu_entry_begin); ++ ++ kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, ++ (unsigned long)shadow_cpu_entry_end, 0); ++ ++ kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END); + + load_cr3(init_top_pgt); + __flush_tlb_all(); diff --git a/queue-4.14/x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch b/queue-4.14/x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch new file mode 100644 index 00000000000..9549af017b6 --- /dev/null +++ b/queue-4.14/x86-make-x86_bug_fxsave_leak-detectable-in-cpuid-on-amd.patch @@ -0,0 +1,62 @@ +From f2dbad36c55e5d3a91dccbde6e8cae345fe5632f Mon Sep 17 00:00:00 2001 +From: Rudolf Marek +Date: Tue, 28 Nov 2017 22:01:06 +0100 +Subject: x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD + +From: Rudolf Marek + +commit f2dbad36c55e5d3a91dccbde6e8cae345fe5632f upstream. + +[ Note, this is a Git cherry-pick of the following commit: + + 2b67799bdf25 ("x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD") + + ... for easier x86 PTI code testing and back-porting. ] + +The latest AMD AMD64 Architecture Programmer's Manual +adds a CPUID feature XSaveErPtr (CPUID_Fn80000008_EBX[2]). + +If this feature is set, the FXSAVE, XSAVE, FXSAVEOPT, XSAVEC, XSAVES +/ FXRSTOR, XRSTOR, XRSTORS always save/restore error pointers, +thus making the X86_BUG_FXSAVE_LEAK workaround obsolete on such CPUs. + +Signed-Off-By: Rudolf Marek +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Tested-by: Borislav Petkov +Cc: Andy Lutomirski +Link: https://lkml.kernel.org/r/bdcebe90-62c5-1f05-083c-eba7f08b2540@assembler.cz +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/amd.c | 7 +++++-- + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -266,6 +266,7 @@ + /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ + #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ + #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ ++#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 + case 0x17: init_amd_zn(c); break; + } + +- /* Enable workaround for FXSAVE leak */ +- if (c->x86 >= 6) ++ /* ++ * Enable workaround for FXSAVE leak on CPUs ++ * without a XSaveErPtr feature ++ */ ++ if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR))) + set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); + + cpu_detect_cache_sizes(c); diff --git a/queue-4.14/x86-mm-define-_page_table-using-_kernpg_table.patch b/queue-4.14/x86-mm-define-_page_table-using-_kernpg_table.patch new file mode 100644 index 00000000000..988cee7b582 --- /dev/null +++ b/queue-4.14/x86-mm-define-_page_table-using-_kernpg_table.patch @@ -0,0 +1,39 @@ +From c7da092a1f243bfd1bfb4124f538e69e941882da Mon Sep 17 00:00:00 2001 +From: Borislav Petkov +Date: Fri, 3 Nov 2017 11:20:28 +0100 +Subject: x86/mm: Define _PAGE_TABLE using _KERNPG_TABLE + +From: Borislav Petkov + +commit c7da092a1f243bfd1bfb4124f538e69e941882da upstream. + +... so that the difference is obvious. + +No functionality change. + +Signed-off-by: Borislav Petkov +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20171103102028.20284-1-bp@alien8.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/pgtable_types.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/x86/include/asm/pgtable_types.h ++++ b/arch/x86/include/asm/pgtable_types.h +@@ -200,10 +200,9 @@ enum page_cache_mode { + + #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) + +-#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ +- _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC) + #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ + _PAGE_DIRTY | _PAGE_ENC) ++#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) + + #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) + #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) diff --git a/queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch b/queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch new file mode 100644 index 00000000000..4881de11644 --- /dev/null +++ b/queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch @@ -0,0 +1,190 @@ +From ef8813ab280507972bb57e4b1b502811ad4411e9 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:15 +0100 +Subject: x86/mm/fixmap: Generalize the GDT fixmap mechanism, introduce struct cpu_entry_area + +From: Andy Lutomirski + +commit ef8813ab280507972bb57e4b1b502811ad4411e9 upstream. + +Currently, the GDT is an ad-hoc array of pages, one per CPU, in the +fixmap. Generalize it to be an array of a new 'struct cpu_entry_area' +so that we can cleanly add new things to it. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Reviewed-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.563271721@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/desc.h | 9 +-------- + arch/x86/include/asm/fixmap.h | 37 +++++++++++++++++++++++++++++++++++-- + arch/x86/kernel/cpu/common.c | 14 +++++++------- + arch/x86/xen/mmu_pv.c | 2 +- + 4 files changed, 44 insertions(+), 18 deletions(-) + +--- a/arch/x86/include/asm/desc.h ++++ b/arch/x86/include/asm/desc.h +@@ -60,17 +60,10 @@ static inline struct desc_struct *get_cu + return this_cpu_ptr(&gdt_page)->gdt; + } + +-/* Get the fixmap index for a specific processor */ +-static inline unsigned int get_cpu_gdt_ro_index(int cpu) +-{ +- return FIX_GDT_REMAP_END - cpu; +-} +- + /* Provide the fixmap address of the remapped GDT */ + static inline struct desc_struct *get_cpu_gdt_ro(int cpu) + { +- unsigned int idx = get_cpu_gdt_ro_index(cpu); +- return (struct desc_struct *)__fix_to_virt(idx); ++ return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt; + } + + /* Provide the current read-only GDT */ +--- a/arch/x86/include/asm/fixmap.h ++++ b/arch/x86/include/asm/fixmap.h +@@ -44,6 +44,19 @@ extern unsigned long __FIXADDR_TOP; + PAGE_SIZE) + #endif + ++/* ++ * cpu_entry_area is a percpu region in the fixmap that contains things ++ * needed by the CPU and early entry/exit code. Real types aren't used ++ * for all fields here to avoid circular header dependencies. ++ * ++ * Every field is a virtual alias of some other allocated backing store. ++ * There is no direct allocation of a struct cpu_entry_area. ++ */ ++struct cpu_entry_area { ++ char gdt[PAGE_SIZE]; ++}; ++ ++#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) + + /* + * Here we define all the compile-time 'special' virtual +@@ -101,8 +114,8 @@ enum fixed_addresses { + FIX_LNW_VRTC, + #endif + /* Fixmap entries to remap the GDTs, one per processor. */ +- FIX_GDT_REMAP_BEGIN, +- FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1, ++ FIX_CPU_ENTRY_AREA_TOP, ++ FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1, + + #ifdef CONFIG_ACPI_APEI_GHES + /* Used for GHES mapping from assorted contexts */ +@@ -191,5 +204,25 @@ void __init *early_memremap_decrypted_wp + void __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + ++static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page) ++{ ++ BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); ++ ++ return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page; ++} ++ ++#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \ ++ BUILD_BUG_ON(offset % PAGE_SIZE != 0); \ ++ __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \ ++ }) ++ ++#define get_cpu_entry_area_index(cpu, field) \ ++ __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field)) ++ ++static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) ++{ ++ return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); ++} ++ + #endif /* !__ASSEMBLY__ */ + #endif /* _ASM_X86_FIXMAP_H */ +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -466,12 +466,12 @@ void load_percpu_segment(int cpu) + load_stack_canary_segment(); + } + +-/* Setup the fixmap mapping only once per-processor */ +-static inline void setup_fixmap_gdt(int cpu) ++/* Setup the fixmap mappings only once per-processor */ ++static inline void setup_cpu_entry_area(int cpu) + { + #ifdef CONFIG_X86_64 + /* On 64-bit systems, we use a read-only fixmap GDT. */ +- pgprot_t prot = PAGE_KERNEL_RO; ++ pgprot_t gdt_prot = PAGE_KERNEL_RO; + #else + /* + * On native 32-bit systems, the GDT cannot be read-only because +@@ -482,11 +482,11 @@ static inline void setup_fixmap_gdt(int + * On Xen PV, the GDT must be read-only because the hypervisor requires + * it. + */ +- pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ? ++ pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? + PAGE_KERNEL_RO : PAGE_KERNEL; + #endif + +- __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); ++ __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); + } + + /* Load the original GDT from the per-cpu structure */ +@@ -1589,7 +1589,7 @@ void cpu_init(void) + if (is_uv_system()) + uv_cpu_init(); + +- setup_fixmap_gdt(cpu); ++ setup_cpu_entry_area(cpu); + load_fixmap_gdt(cpu); + } + +@@ -1651,7 +1651,7 @@ void cpu_init(void) + + fpu__init_cpu(); + +- setup_fixmap_gdt(cpu); ++ setup_cpu_entry_area(cpu); + load_fixmap_gdt(cpu); + } + #endif +--- a/arch/x86/xen/mmu_pv.c ++++ b/arch/x86/xen/mmu_pv.c +@@ -2272,7 +2272,7 @@ static void xen_set_fixmap(unsigned idx, + #endif + case FIX_TEXT_POKE0: + case FIX_TEXT_POKE1: +- case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END: ++ case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM: + /* All local page mappings */ + pte = pfn_pte(phys, prot); + break; diff --git a/queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch b/queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch new file mode 100644 index 00000000000..7178b102128 --- /dev/null +++ b/queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch @@ -0,0 +1,254 @@ +From 2aeb07365bcd489620f71390a7d2031cd4dfb83e Mon Sep 17 00:00:00 2001 +From: Andrey Ryabinin +Date: Wed, 15 Nov 2017 17:36:35 -0800 +Subject: x86/mm/kasan: Don't use vmemmap_populate() to initialize shadow + +From: Andrey Ryabinin + +commit 2aeb07365bcd489620f71390a7d2031cd4dfb83e upstream. + +[ Note, this is a Git cherry-pick of the following commit: + + d17a1d97dc20: ("x86/mm/kasan: don't use vmemmap_populate() to initialize shadow") + + ... for easier x86 PTI code testing and back-porting. ] + +The KASAN shadow is currently mapped using vmemmap_populate() since that +provides a semi-convenient way to map pages into init_top_pgt. However, +since that no longer zeroes the mapped pages, it is not suitable for +KASAN, which requires zeroed shadow memory. + +Add kasan_populate_shadow() interface and use it instead of +vmemmap_populate(). Besides, this allows us to take advantage of +gigantic pages and use them to populate the shadow, which should save us +some memory wasted on page tables and reduce TLB pressure. + +Link: http://lkml.kernel.org/r/20171103185147.2688-2-pasha.tatashin@oracle.com +Signed-off-by: Andrey Ryabinin +Signed-off-by: Pavel Tatashin +Cc: Andy Lutomirski +Cc: Steven Sistare +Cc: Daniel Jordan +Cc: Bob Picco +Cc: Michal Hocko +Cc: Alexander Potapenko +Cc: Ard Biesheuvel +Cc: Catalin Marinas +Cc: Christian Borntraeger +Cc: David S. Miller +Cc: Dmitry Vyukov +Cc: Heiko Carstens +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Mark Rutland +Cc: Matthew Wilcox +Cc: Mel Gorman +Cc: Michal Hocko +Cc: Sam Ravnborg +Cc: Thomas Gleixner +Cc: Will Deacon +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/Kconfig | 2 + arch/x86/mm/kasan_init_64.c | 143 +++++++++++++++++++++++++++++++++++++++++--- + 2 files changed, 137 insertions(+), 8 deletions(-) + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -108,7 +108,7 @@ config X86 + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE + select HAVE_ARCH_JUMP_LABEL +- select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP ++ select HAVE_ARCH_KASAN if X86_64 + select HAVE_ARCH_KGDB + select HAVE_ARCH_KMEMCHECK + select HAVE_ARCH_MMAP_RND_BITS if MMU +--- a/arch/x86/mm/kasan_init_64.c ++++ b/arch/x86/mm/kasan_init_64.c +@@ -4,12 +4,14 @@ + #include + #include + #include ++#include + #include + #include + #include + #include + + #include ++#include + #include + #include + #include +@@ -18,7 +20,134 @@ extern struct range pfn_mapped[E820_MAX_ + + static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); + +-static int __init map_range(struct range *range) ++static __init void *early_alloc(size_t size, int nid) ++{ ++ return memblock_virt_alloc_try_nid_nopanic(size, size, ++ __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); ++} ++ ++static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, ++ unsigned long end, int nid) ++{ ++ pte_t *pte; ++ ++ if (pmd_none(*pmd)) { ++ void *p; ++ ++ if (boot_cpu_has(X86_FEATURE_PSE) && ++ ((end - addr) == PMD_SIZE) && ++ IS_ALIGNED(addr, PMD_SIZE)) { ++ p = early_alloc(PMD_SIZE, nid); ++ if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) ++ return; ++ else if (p) ++ memblock_free(__pa(p), PMD_SIZE); ++ } ++ ++ p = early_alloc(PAGE_SIZE, nid); ++ pmd_populate_kernel(&init_mm, pmd, p); ++ } ++ ++ pte = pte_offset_kernel(pmd, addr); ++ do { ++ pte_t entry; ++ void *p; ++ ++ if (!pte_none(*pte)) ++ continue; ++ ++ p = early_alloc(PAGE_SIZE, nid); ++ entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); ++ set_pte_at(&init_mm, addr, pte, entry); ++ } while (pte++, addr += PAGE_SIZE, addr != end); ++} ++ ++static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, ++ unsigned long end, int nid) ++{ ++ pmd_t *pmd; ++ unsigned long next; ++ ++ if (pud_none(*pud)) { ++ void *p; ++ ++ if (boot_cpu_has(X86_FEATURE_GBPAGES) && ++ ((end - addr) == PUD_SIZE) && ++ IS_ALIGNED(addr, PUD_SIZE)) { ++ p = early_alloc(PUD_SIZE, nid); ++ if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) ++ return; ++ else if (p) ++ memblock_free(__pa(p), PUD_SIZE); ++ } ++ ++ p = early_alloc(PAGE_SIZE, nid); ++ pud_populate(&init_mm, pud, p); ++ } ++ ++ pmd = pmd_offset(pud, addr); ++ do { ++ next = pmd_addr_end(addr, end); ++ if (!pmd_large(*pmd)) ++ kasan_populate_pmd(pmd, addr, next, nid); ++ } while (pmd++, addr = next, addr != end); ++} ++ ++static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, ++ unsigned long end, int nid) ++{ ++ pud_t *pud; ++ unsigned long next; ++ ++ if (p4d_none(*p4d)) { ++ void *p = early_alloc(PAGE_SIZE, nid); ++ ++ p4d_populate(&init_mm, p4d, p); ++ } ++ ++ pud = pud_offset(p4d, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ if (!pud_large(*pud)) ++ kasan_populate_pud(pud, addr, next, nid); ++ } while (pud++, addr = next, addr != end); ++} ++ ++static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, ++ unsigned long end, int nid) ++{ ++ void *p; ++ p4d_t *p4d; ++ unsigned long next; ++ ++ if (pgd_none(*pgd)) { ++ p = early_alloc(PAGE_SIZE, nid); ++ pgd_populate(&init_mm, pgd, p); ++ } ++ ++ p4d = p4d_offset(pgd, addr); ++ do { ++ next = p4d_addr_end(addr, end); ++ kasan_populate_p4d(p4d, addr, next, nid); ++ } while (p4d++, addr = next, addr != end); ++} ++ ++static void __init kasan_populate_shadow(unsigned long addr, unsigned long end, ++ int nid) ++{ ++ pgd_t *pgd; ++ unsigned long next; ++ ++ addr = addr & PAGE_MASK; ++ end = round_up(end, PAGE_SIZE); ++ pgd = pgd_offset_k(addr); ++ do { ++ next = pgd_addr_end(addr, end); ++ kasan_populate_pgd(pgd, addr, next, nid); ++ } while (pgd++, addr = next, addr != end); ++} ++ ++static void __init map_range(struct range *range) + { + unsigned long start; + unsigned long end; +@@ -26,7 +155,7 @@ static int __init map_range(struct range + start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); + end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); + +- return vmemmap_populate(start, end, NUMA_NO_NODE); ++ kasan_populate_shadow(start, end, early_pfn_to_nid(range->start)); + } + + static void __init clear_pgds(unsigned long start, +@@ -189,16 +318,16 @@ void __init kasan_init(void) + if (pfn_mapped[i].end == 0) + break; + +- if (map_range(&pfn_mapped[i])) +- panic("kasan: unable to allocate shadow!"); ++ map_range(&pfn_mapped[i]); + } ++ + kasan_populate_zero_shadow( + kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + +- vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), +- (unsigned long)kasan_mem_to_shadow(_end), +- NUMA_NO_NODE); ++ kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), ++ (unsigned long)kasan_mem_to_shadow(_end), ++ early_pfn_to_nid(__pa(_stext))); + + kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), + (void *)KASAN_SHADOW_END); diff --git a/queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch b/queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch new file mode 100644 index 00000000000..02cd12cee0e --- /dev/null +++ b/queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch @@ -0,0 +1,68 @@ +From a035795499ca1c2bd1928808d1a156eda1420383 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 4 Dec 2017 15:07:30 +0100 +Subject: x86/paravirt: Dont patch flush_tlb_single + +From: Thomas Gleixner + +commit a035795499ca1c2bd1928808d1a156eda1420383 upstream. + +native_flush_tlb_single() will be changed with the upcoming +PAGE_TABLE_ISOLATION feature. This requires to have more code in +there than INVLPG. + +Remove the paravirt patching for it. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Josh Poimboeuf +Reviewed-by: Juergen Gross +Acked-by: Peter Zijlstra +Cc: Andy Lutomirski +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Cc: linux-mm@kvack.org +Cc: michael.schwarz@iaik.tugraz.at +Cc: moritz.lipp@iaik.tugraz.at +Cc: richard.fellner@student.tugraz.at +Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/paravirt_patch_64.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/arch/x86/kernel/paravirt_patch_64.c ++++ b/arch/x86/kernel/paravirt_patch_64.c +@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; + DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); + DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); + DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); +-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); + DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); + + DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); +@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobb + PATCH_SITE(pv_mmu_ops, read_cr2); + PATCH_SITE(pv_mmu_ops, read_cr3); + PATCH_SITE(pv_mmu_ops, write_cr3); +- PATCH_SITE(pv_mmu_ops, flush_tlb_single); + PATCH_SITE(pv_cpu_ops, wbinvd); + #if defined(CONFIG_PARAVIRT_SPINLOCKS) + case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): diff --git a/queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch b/queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch new file mode 100644 index 00000000000..923cc7c186e --- /dev/null +++ b/queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch @@ -0,0 +1,96 @@ +From 79cc74155218316b9a5d28577c7077b2adba8e58 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 4 Dec 2017 15:07:31 +0100 +Subject: x86/paravirt: Provide a way to check for hypervisors + +From: Thomas Gleixner + +commit 79cc74155218316b9a5d28577c7077b2adba8e58 upstream. + +There is no generic way to test whether a kernel is running on a specific +hypervisor. But that's required to prevent the upcoming user address space +separation feature in certain guest modes. + +Make the hypervisor type enum unconditionally available and provide a +helper function which allows to test for a specific type. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Juergen Gross +Cc: Andy Lutomirski +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.912938129@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/hypervisor.h | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +--- a/arch/x86/include/asm/hypervisor.h ++++ b/arch/x86/include/asm/hypervisor.h +@@ -20,16 +20,7 @@ + #ifndef _ASM_X86_HYPERVISOR_H + #define _ASM_X86_HYPERVISOR_H + +-#ifdef CONFIG_HYPERVISOR_GUEST +- +-#include +-#include +-#include +- +-/* +- * x86 hypervisor information +- */ +- ++/* x86 hypervisor types */ + enum x86_hypervisor_type { + X86_HYPER_NATIVE = 0, + X86_HYPER_VMWARE, +@@ -39,6 +30,12 @@ enum x86_hypervisor_type { + X86_HYPER_KVM, + }; + ++#ifdef CONFIG_HYPERVISOR_GUEST ++ ++#include ++#include ++#include ++ + struct hypervisor_x86 { + /* Hypervisor name */ + const char *name; +@@ -58,7 +55,15 @@ struct hypervisor_x86 { + + extern enum x86_hypervisor_type x86_hyper_type; + extern void init_hypervisor_platform(void); ++static inline bool hypervisor_is_type(enum x86_hypervisor_type type) ++{ ++ return x86_hyper_type == type; ++} + #else + static inline void init_hypervisor_platform(void) { } ++static inline bool hypervisor_is_type(enum x86_hypervisor_type type) ++{ ++ return type == X86_HYPER_NATIVE; ++} + #endif /* CONFIG_HYPERVISOR_GUEST */ + #endif /* _ASM_X86_HYPERVISOR_H */ diff --git a/queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch b/queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch new file mode 100644 index 00000000000..c3ec135bde6 --- /dev/null +++ b/queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch @@ -0,0 +1,56 @@ +From 3383642c2f9d4f5b4fa37436db4a109a1a10018c Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:17 -0700 +Subject: x86/traps: Use a new on_thread_stack() helper to clean up an assertion + +From: Andy Lutomirski + +commit 3383642c2f9d4f5b4fa37436db4a109a1a10018c upstream. + +Let's keep the stack-related logic together rather than open-coding +a comparison in an assertion in the traps code. + +Signed-off-by: Andy Lutomirski +Reviewed-by: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/856b15bee1f55017b8f79d3758b0d51c48a08cf8.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/processor.h | 6 ++++++ + arch/x86/kernel/traps.c | 3 +-- + 2 files changed, 7 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -542,6 +542,12 @@ static inline unsigned long current_top_ + #endif + } + ++static inline bool on_thread_stack(void) ++{ ++ return (unsigned long)(current_top_of_stack() - ++ current_stack_pointer) < THREAD_SIZE; ++} ++ + #ifdef CONFIG_PARAVIRT + #include + #else +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs + * will catch asm bugs and any attempt to use ist_preempt_enable + * from double_fault. + */ +- BUG_ON((unsigned long)(current_top_of_stack() - +- current_stack_pointer) >= THREAD_SIZE); ++ BUG_ON(!on_thread_stack()); + + preempt_enable_no_resched(); + } diff --git a/queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch b/queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch new file mode 100644 index 00000000000..4ffb7e4a6ec --- /dev/null +++ b/queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch @@ -0,0 +1,319 @@ +From b02fcf9ba1211097754b286043cd87a8b4907e75 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf +Date: Mon, 4 Dec 2017 15:07:09 +0100 +Subject: x86/unwinder: Handle stack overflows more gracefully + +From: Josh Poimboeuf + +commit b02fcf9ba1211097754b286043cd87a8b4907e75 upstream. + +There are at least two unwinder bugs hindering the debugging of +stack-overflow crashes: + +- It doesn't deal gracefully with the case where the stack overflows and + the stack pointer itself isn't on a valid stack but the + to-be-dereferenced data *is*. + +- The ORC oops dump code doesn't know how to print partial pt_regs, for the + case where if we get an interrupt/exception in *early* entry code + before the full pt_regs have been saved. + +Fix both issues. + +http://lkml.kernel.org/r/20171126024031.uxi4numpbjm5rlbr@treble + +Signed-off-by: Josh Poimboeuf +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov +Cc: Andy Lutomirski +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150605.071425003@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/kdebug.h | 1 + arch/x86/include/asm/unwind.h | 7 +++ + arch/x86/kernel/dumpstack.c | 32 ++++++++++++++--- + arch/x86/kernel/process_64.c | 11 ++---- + arch/x86/kernel/unwind_orc.c | 76 ++++++++++++++---------------------------- + 5 files changed, 66 insertions(+), 61 deletions(-) + +--- a/arch/x86/include/asm/kdebug.h ++++ b/arch/x86/include/asm/kdebug.h +@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_ + extern int __must_check __die(const char *, struct pt_regs *, long); + extern void show_stack_regs(struct pt_regs *regs); + extern void __show_regs(struct pt_regs *regs, int all); ++extern void show_iret_regs(struct pt_regs *regs); + extern unsigned long oops_begin(void); + extern void oops_end(unsigned long, struct pt_regs *, int signr); + +--- a/arch/x86/include/asm/unwind.h ++++ b/arch/x86/include/asm/unwind.h +@@ -7,6 +7,9 @@ + #include + #include + ++#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) ++#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) ++ + struct unwind_state { + struct stack_info stack_info; + unsigned long stack_mask; +@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *s + } + + #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) ++/* ++ * WARNING: The entire pt_regs may not be safe to dereference. In some cases, ++ * only the iret frame registers are accessible. Use with caution! ++ */ + static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) + { + if (unwind_done(state)) +--- a/arch/x86/kernel/dumpstack.c ++++ b/arch/x86/kernel/dumpstack.c +@@ -50,6 +50,28 @@ static void printk_stack_address(unsigne + printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); + } + ++void show_iret_regs(struct pt_regs *regs) ++{ ++ printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); ++ printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, ++ regs->sp, regs->flags); ++} ++ ++static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) ++{ ++ if (on_stack(info, regs, sizeof(*regs))) ++ __show_regs(regs, 0); ++ else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, ++ IRET_FRAME_SIZE)) { ++ /* ++ * When an interrupt or exception occurs in entry code, the ++ * full pt_regs might not have been saved yet. In that case ++ * just print the iret frame. ++ */ ++ show_iret_regs(regs); ++ } ++} ++ + void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, char *log_lvl) + { +@@ -94,8 +116,8 @@ void show_trace_log_lvl(struct task_stru + if (stack_name) + printk("%s <%s>\n", log_lvl, stack_name); + +- if (regs && on_stack(&stack_info, regs, sizeof(*regs))) +- __show_regs(regs, 0); ++ if (regs) ++ show_regs_safe(&stack_info, regs); + + /* + * Scan the stack, printing any text addresses we find. At the +@@ -119,7 +141,7 @@ void show_trace_log_lvl(struct task_stru + + /* + * Don't print regs->ip again if it was already printed +- * by __show_regs() below. ++ * by show_regs_safe() below. + */ + if (regs && stack == ®s->ip) + goto next; +@@ -155,8 +177,8 @@ next: + + /* if the frame has entry regs, print them */ + regs = unwind_get_entry_regs(&state); +- if (regs && on_stack(&stack_info, regs, sizeof(*regs))) +- __show_regs(regs, 0); ++ if (regs) ++ show_regs_safe(&stack_info, regs); + } + + if (stack_name) +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, i + unsigned int fsindex, gsindex; + unsigned int ds, cs, es; + +- printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); +- printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, +- regs->sp, regs->flags); ++ show_iret_regs(regs); ++ + if (regs->orig_ax != -1) + pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); + else +@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, i + printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", + regs->r13, regs->r14, regs->r15); + ++ if (!all) ++ return; ++ + asm("movl %%ds,%0" : "=r" (ds)); + asm("movl %%cs,%0" : "=r" (cs)); + asm("movl %%es,%0" : "=r" (es)); +@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, i + rdmsrl(MSR_GS_BASE, gs); + rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); + +- if (!all) +- return; +- + cr0 = read_cr0(); + cr2 = read_cr2(); + cr3 = __read_cr3(); +--- a/arch/x86/kernel/unwind_orc.c ++++ b/arch/x86/kernel/unwind_orc.c +@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address + return NULL; + } + +-static bool stack_access_ok(struct unwind_state *state, unsigned long addr, ++static bool stack_access_ok(struct unwind_state *state, unsigned long _addr, + size_t len) + { + struct stack_info *info = &state->stack_info; ++ void *addr = (void *)_addr; + +- /* +- * If the address isn't on the current stack, switch to the next one. +- * +- * We may have to traverse multiple stacks to deal with the possibility +- * that info->next_sp could point to an empty stack and the address +- * could be on a subsequent stack. +- */ +- while (!on_stack(info, (void *)addr, len)) +- if (get_stack_info(info->next_sp, state->task, info, +- &state->stack_mask)) +- return false; ++ if (!on_stack(info, addr, len) && ++ (get_stack_info(addr, state->task, info, &state->stack_mask))) ++ return false; + + return true; + } +@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwin + return true; + } + +-#define REGS_SIZE (sizeof(struct pt_regs)) +-#define SP_OFFSET (offsetof(struct pt_regs, sp)) +-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip)) +-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip)) +- + static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, +- unsigned long *ip, unsigned long *sp, bool full) ++ unsigned long *ip, unsigned long *sp) + { +- size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; +- size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET; +- struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE); +- +- if (IS_ENABLED(CONFIG_X86_64)) { +- if (!stack_access_ok(state, addr, regs_size)) +- return false; +- +- *ip = regs->ip; +- *sp = regs->sp; ++ struct pt_regs *regs = (struct pt_regs *)addr; + +- return true; +- } ++ /* x86-32 support will be more complicated due to the ®s->sp hack */ ++ BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32)); + +- if (!stack_access_ok(state, addr, sp_offset)) ++ if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) + return false; + + *ip = regs->ip; ++ *sp = regs->sp; ++ return true; ++} + +- if (user_mode(regs)) { +- if (!stack_access_ok(state, addr + sp_offset, +- REGS_SIZE - SP_OFFSET)) +- return false; +- +- *sp = regs->sp; +- } else +- *sp = (unsigned long)®s->sp; ++static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr, ++ unsigned long *ip, unsigned long *sp) ++{ ++ struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET; + ++ if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) ++ return false; ++ ++ *ip = regs->ip; ++ *sp = regs->sp; + return true; + } + +@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_sta + unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; + enum stack_type prev_type = state->stack_info.type; + struct orc_entry *orc; +- struct pt_regs *ptregs; + bool indirect = false; + + if (unwind_done(state)) +@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_sta + break; + + case ORC_TYPE_REGS: +- if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { ++ if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { + orc_warn("can't dereference registers at %p for ip %pB\n", + (void *)sp, (void *)orig_ip); + goto done; +@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_sta + break; + + case ORC_TYPE_REGS_IRET: +- if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { ++ if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { + orc_warn("can't dereference iret registers at %p for ip %pB\n", + (void *)sp, (void *)orig_ip); + goto done; + } + +- ptregs = container_of((void *)sp, struct pt_regs, ip); +- if ((unsigned long)ptregs >= prev_sp && +- on_stack(&state->stack_info, ptregs, REGS_SIZE)) { +- state->regs = ptregs; +- state->full_regs = false; +- } else +- state->regs = NULL; +- ++ state->regs = (void *)sp - IRET_FRAME_OFFSET; ++ state->full_regs = false; + state->signal = true; + break; + diff --git a/queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch b/queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch new file mode 100644 index 00000000000..cae3c13c09e --- /dev/null +++ b/queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch @@ -0,0 +1,82 @@ +From d3a09104018cf2ad5973dfa8a9c138ef9f5015a3 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 4 Dec 2017 15:07:08 +0100 +Subject: x86/unwinder/orc: Dont bail on stack overflow + +From: Andy Lutomirski + +commit d3a09104018cf2ad5973dfa8a9c138ef9f5015a3 upstream. + +If the stack overflows into a guard page and the ORC unwinder should work +well: by construction, there can't be any meaningful data in the guard page +because no writes to the guard page will have succeeded. + +But there is a bug that prevents unwinding from working correctly: if the +starting register state has RSP pointing into a stack guard page, the ORC +unwinder bails out immediately. + +Instead of bailing out immediately check whether the next page up is a +valid check page and if so analyze that. As a result the ORC unwinder will +start the unwind. + +Tested by intentionally overflowing the task stack. The result is an +accurate call trace instead of a trace consisting purely of '?' entries. + +There are a few other bugs that are triggered if the unwinder encounters a +stack overflow after the first step, but they are outside the scope of this +fix. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Dave Hansen +Cc: David Laight +Cc: Denys Vlasenko +Cc: Eduardo Valentin +Cc: Greg KH +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Will Deacon +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150604.991389777@linutronix.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/unwind_orc.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/unwind_orc.c ++++ b/arch/x86/kernel/unwind_orc.c +@@ -553,8 +553,18 @@ void __unwind_start(struct unwind_state + } + + if (get_stack_info((unsigned long *)state->sp, state->task, +- &state->stack_info, &state->stack_mask)) +- return; ++ &state->stack_info, &state->stack_mask)) { ++ /* ++ * We weren't on a valid stack. It's possible that ++ * we overflowed a valid stack into a guard page. ++ * See if the next page up is valid so that we can ++ * generate some kind of backtrace if this happens. ++ */ ++ void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp); ++ if (get_stack_info(next_page, state->task, &state->stack_info, ++ &state->stack_mask)) ++ return; ++ } + + /* + * The caller can provide the address of the first frame directly diff --git a/queue-4.14/x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch b/queue-4.14/x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch new file mode 100644 index 00000000000..11f263ff20f --- /dev/null +++ b/queue-4.14/x86-virt-add-enum-for-hypervisors-to-replace-x86_hyper.patch @@ -0,0 +1,271 @@ +From 03b2a320b19f1424e9ac9c21696be9c60b6d0d93 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 9 Nov 2017 14:27:36 +0100 +Subject: x86/virt: Add enum for hypervisors to replace x86_hyper + +From: Juergen Gross + +commit 03b2a320b19f1424e9ac9c21696be9c60b6d0d93 upstream. + +The x86_hyper pointer is only used for checking whether a virtual +device is supporting the hypervisor the system is running on. + +Use an enum for that purpose instead and drop the x86_hyper pointer. + +Signed-off-by: Juergen Gross +Acked-by: Thomas Gleixner +Acked-by: Xavier Deguillard +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: akataria@vmware.com +Cc: arnd@arndb.de +Cc: boris.ostrovsky@oracle.com +Cc: devel@linuxdriverproject.org +Cc: dmitry.torokhov@gmail.com +Cc: gregkh@linuxfoundation.org +Cc: haiyangz@microsoft.com +Cc: kvm@vger.kernel.org +Cc: kys@microsoft.com +Cc: linux-graphics-maintainer@vmware.com +Cc: linux-input@vger.kernel.org +Cc: moltmann@vmware.com +Cc: pbonzini@redhat.com +Cc: pv-drivers@vmware.com +Cc: rkrcmar@redhat.com +Cc: sthemmin@microsoft.com +Cc: virtualization@lists.linux-foundation.org +Cc: xen-devel@lists.xenproject.org +Link: http://lkml.kernel.org/r/20171109132739.23465-3-jgross@suse.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/hyperv/hv_init.c | 2 +- + arch/x86/include/asm/hypervisor.h | 23 ++++++++++++++--------- + arch/x86/kernel/cpu/hypervisor.c | 12 +++++++++--- + arch/x86/kernel/cpu/mshyperv.c | 4 ++-- + arch/x86/kernel/cpu/vmware.c | 4 ++-- + arch/x86/kernel/kvm.c | 4 ++-- + arch/x86/xen/enlighten_hvm.c | 4 ++-- + arch/x86/xen/enlighten_pv.c | 4 ++-- + drivers/hv/vmbus_drv.c | 2 +- + drivers/input/mouse/vmmouse.c | 10 ++++------ + drivers/misc/vmw_balloon.c | 2 +- + 11 files changed, 40 insertions(+), 31 deletions(-) + +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -113,7 +113,7 @@ void hyperv_init(void) + u64 guest_id; + union hv_x64_msr_hypercall_contents hypercall_msr; + +- if (x86_hyper != &x86_hyper_ms_hyperv) ++ if (x86_hyper_type != X86_HYPER_MS_HYPERV) + return; + + /* Allocate percpu VP index */ +--- a/arch/x86/include/asm/hypervisor.h ++++ b/arch/x86/include/asm/hypervisor.h +@@ -29,6 +29,16 @@ + /* + * x86 hypervisor information + */ ++ ++enum x86_hypervisor_type { ++ X86_HYPER_NATIVE = 0, ++ X86_HYPER_VMWARE, ++ X86_HYPER_MS_HYPERV, ++ X86_HYPER_XEN_PV, ++ X86_HYPER_XEN_HVM, ++ X86_HYPER_KVM, ++}; ++ + struct hypervisor_x86 { + /* Hypervisor name */ + const char *name; +@@ -36,6 +46,9 @@ struct hypervisor_x86 { + /* Detection routine */ + uint32_t (*detect)(void); + ++ /* Hypervisor type */ ++ enum x86_hypervisor_type type; ++ + /* init time callbacks */ + struct x86_hyper_init init; + +@@ -43,15 +56,7 @@ struct hypervisor_x86 { + struct x86_hyper_runtime runtime; + }; + +-extern const struct hypervisor_x86 *x86_hyper; +- +-/* Recognized hypervisors */ +-extern const struct hypervisor_x86 x86_hyper_vmware; +-extern const struct hypervisor_x86 x86_hyper_ms_hyperv; +-extern const struct hypervisor_x86 x86_hyper_xen_pv; +-extern const struct hypervisor_x86 x86_hyper_xen_hvm; +-extern const struct hypervisor_x86 x86_hyper_kvm; +- ++extern enum x86_hypervisor_type x86_hyper_type; + extern void init_hypervisor_platform(void); + #else + static inline void init_hypervisor_platform(void) { } +--- a/arch/x86/kernel/cpu/hypervisor.c ++++ b/arch/x86/kernel/cpu/hypervisor.c +@@ -26,6 +26,12 @@ + #include + #include + ++extern const struct hypervisor_x86 x86_hyper_vmware; ++extern const struct hypervisor_x86 x86_hyper_ms_hyperv; ++extern const struct hypervisor_x86 x86_hyper_xen_pv; ++extern const struct hypervisor_x86 x86_hyper_xen_hvm; ++extern const struct hypervisor_x86 x86_hyper_kvm; ++ + static const __initconst struct hypervisor_x86 * const hypervisors[] = + { + #ifdef CONFIG_XEN_PV +@@ -41,8 +47,8 @@ static const __initconst struct hypervis + #endif + }; + +-const struct hypervisor_x86 *x86_hyper; +-EXPORT_SYMBOL(x86_hyper); ++enum x86_hypervisor_type x86_hyper_type; ++EXPORT_SYMBOL(x86_hyper_type); + + static inline const struct hypervisor_x86 * __init + detect_hypervisor_vendor(void) +@@ -87,6 +93,6 @@ void __init init_hypervisor_platform(voi + copy_array(&h->init, &x86_init.hyper, sizeof(h->init)); + copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime)); + +- x86_hyper = h; ++ x86_hyper_type = h->type; + x86_init.hyper.init_platform(); + } +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platfo + #endif + } + +-const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { ++const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { + .name = "Microsoft Hyper-V", + .detect = ms_hyperv_platform, ++ .type = X86_HYPER_MS_HYPERV, + .init.init_platform = ms_hyperv_init_platform, + }; +-EXPORT_SYMBOL(x86_hyper_ms_hyperv); +--- a/arch/x86/kernel/cpu/vmware.c ++++ b/arch/x86/kernel/cpu/vmware.c +@@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_ + (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0; + } + +-const __refconst struct hypervisor_x86 x86_hyper_vmware = { ++const __initconst struct hypervisor_x86 x86_hyper_vmware = { + .name = "VMware", + .detect = vmware_platform, ++ .type = X86_HYPER_VMWARE, + .init.init_platform = vmware_platform_setup, + .init.x2apic_available = vmware_legacy_x2apic_available, + }; +-EXPORT_SYMBOL(x86_hyper_vmware); +--- a/arch/x86/kernel/kvm.c ++++ b/arch/x86/kernel/kvm.c +@@ -544,12 +544,12 @@ static uint32_t __init kvm_detect(void) + return kvm_cpuid_base(); + } + +-const struct hypervisor_x86 x86_hyper_kvm __refconst = { ++const __initconst struct hypervisor_x86 x86_hyper_kvm = { + .name = "KVM", + .detect = kvm_detect, ++ .type = X86_HYPER_KVM, + .init.x2apic_available = kvm_para_available, + }; +-EXPORT_SYMBOL_GPL(x86_hyper_kvm); + + static __init int activate_jump_labels(void) + { +--- a/arch/x86/xen/enlighten_hvm.c ++++ b/arch/x86/xen/enlighten_hvm.c +@@ -226,12 +226,12 @@ static uint32_t __init xen_platform_hvm( + return xen_cpuid_base(); + } + +-const struct hypervisor_x86 x86_hyper_xen_hvm = { ++const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = { + .name = "Xen HVM", + .detect = xen_platform_hvm, ++ .type = X86_HYPER_XEN_HVM, + .init.init_platform = xen_hvm_guest_init, + .init.x2apic_available = xen_x2apic_para_available, + .init.init_mem_mapping = xen_hvm_init_mem_mapping, + .runtime.pin_vcpu = xen_pin_vcpu, + }; +-EXPORT_SYMBOL(x86_hyper_xen_hvm); +--- a/arch/x86/xen/enlighten_pv.c ++++ b/arch/x86/xen/enlighten_pv.c +@@ -1459,9 +1459,9 @@ static uint32_t __init xen_platform_pv(v + return 0; + } + +-const struct hypervisor_x86 x86_hyper_xen_pv = { ++const __initconst struct hypervisor_x86 x86_hyper_xen_pv = { + .name = "Xen PV", + .detect = xen_platform_pv, ++ .type = X86_HYPER_XEN_PV, + .runtime.pin_vcpu = xen_pin_vcpu, + }; +-EXPORT_SYMBOL(x86_hyper_xen_pv); +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -1534,7 +1534,7 @@ static int __init hv_acpi_init(void) + { + int ret, t; + +- if (x86_hyper != &x86_hyper_ms_hyperv) ++ if (x86_hyper_type != X86_HYPER_MS_HYPERV) + return -ENODEV; + + init_completion(&probe_event); +--- a/drivers/input/mouse/vmmouse.c ++++ b/drivers/input/mouse/vmmouse.c +@@ -316,11 +316,9 @@ static int vmmouse_enable(struct psmouse + /* + * Array of supported hypervisors. + */ +-static const struct hypervisor_x86 *vmmouse_supported_hypervisors[] = { +- &x86_hyper_vmware, +-#ifdef CONFIG_KVM_GUEST +- &x86_hyper_kvm, +-#endif ++static enum x86_hypervisor_type vmmouse_supported_hypervisors[] = { ++ X86_HYPER_VMWARE, ++ X86_HYPER_KVM, + }; + + /** +@@ -331,7 +329,7 @@ static bool vmmouse_check_hypervisor(voi + int i; + + for (i = 0; i < ARRAY_SIZE(vmmouse_supported_hypervisors); i++) +- if (vmmouse_supported_hypervisors[i] == x86_hyper) ++ if (vmmouse_supported_hypervisors[i] == x86_hyper_type) + return true; + + return false; +--- a/drivers/misc/vmw_balloon.c ++++ b/drivers/misc/vmw_balloon.c +@@ -1271,7 +1271,7 @@ static int __init vmballoon_init(void) + * Check if we are running on VMware's hypervisor and bail out + * if we are not. + */ +- if (x86_hyper != &x86_hyper_vmware) ++ if (x86_hyper_type != X86_HYPER_VMWARE) + return -ENODEV; + + for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; diff --git a/queue-4.14/x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch b/queue-4.14/x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch new file mode 100644 index 00000000000..4356dade792 --- /dev/null +++ b/queue-4.14/x86-virt-x86-platform-merge-struct-x86_hyper-into-struct-x86_platform-and-struct-x86_init.patch @@ -0,0 +1,375 @@ +From f72e38e8ec8869ac0ba5a75d7d2f897d98a1454e Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 9 Nov 2017 14:27:35 +0100 +Subject: x86/virt, x86/platform: Merge 'struct x86_hyper' into 'struct x86_platform' and 'struct x86_init' + +From: Juergen Gross + +commit f72e38e8ec8869ac0ba5a75d7d2f897d98a1454e upstream. + +Instead of x86_hyper being either NULL on bare metal or a pointer to a +struct hypervisor_x86 in case of the kernel running as a guest merge +the struct into x86_platform and x86_init. + +This will remove the need for wrappers making it hard to find out what +is being called. With dummy functions added for all callbacks testing +for a NULL function pointer can be removed, too. + +Suggested-by: Ingo Molnar +Signed-off-by: Juergen Gross +Acked-by: Thomas Gleixner +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: akataria@vmware.com +Cc: boris.ostrovsky@oracle.com +Cc: devel@linuxdriverproject.org +Cc: haiyangz@microsoft.com +Cc: kvm@vger.kernel.org +Cc: kys@microsoft.com +Cc: pbonzini@redhat.com +Cc: rkrcmar@redhat.com +Cc: rusty@rustcorp.com.au +Cc: sthemmin@microsoft.com +Cc: virtualization@lists.linux-foundation.org +Cc: xen-devel@lists.xenproject.org +Link: http://lkml.kernel.org/r/20171109132739.23465-2-jgross@suse.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/hypervisor.h | 25 +++------------- + arch/x86/include/asm/x86_init.h | 24 ++++++++++++++++ + arch/x86/kernel/apic/apic.c | 2 - + arch/x86/kernel/cpu/hypervisor.c | 56 ++++++++++++++++++-------------------- + arch/x86/kernel/cpu/mshyperv.c | 2 - + arch/x86/kernel/cpu/vmware.c | 4 +- + arch/x86/kernel/kvm.c | 2 - + arch/x86/kernel/x86_init.c | 9 ++++++ + arch/x86/mm/init.c | 2 - + arch/x86/xen/enlighten_hvm.c | 8 ++--- + arch/x86/xen/enlighten_pv.c | 2 - + include/linux/hypervisor.h | 8 ++++- + 12 files changed, 82 insertions(+), 62 deletions(-) + +--- a/arch/x86/include/asm/hypervisor.h ++++ b/arch/x86/include/asm/hypervisor.h +@@ -23,6 +23,7 @@ + #ifdef CONFIG_HYPERVISOR_GUEST + + #include ++#include + #include + + /* +@@ -35,17 +36,11 @@ struct hypervisor_x86 { + /* Detection routine */ + uint32_t (*detect)(void); + +- /* Platform setup (run once per boot) */ +- void (*init_platform)(void); ++ /* init time callbacks */ ++ struct x86_hyper_init init; + +- /* X2APIC detection (run once per boot) */ +- bool (*x2apic_available)(void); +- +- /* pin current vcpu to specified physical cpu (run rarely) */ +- void (*pin_vcpu)(int); +- +- /* called during init_mem_mapping() to setup early mappings. */ +- void (*init_mem_mapping)(void); ++ /* runtime callbacks */ ++ struct x86_hyper_runtime runtime; + }; + + extern const struct hypervisor_x86 *x86_hyper; +@@ -58,17 +53,7 @@ extern const struct hypervisor_x86 x86_h + extern const struct hypervisor_x86 x86_hyper_kvm; + + extern void init_hypervisor_platform(void); +-extern bool hypervisor_x2apic_available(void); +-extern void hypervisor_pin_vcpu(int cpu); +- +-static inline void hypervisor_init_mem_mapping(void) +-{ +- if (x86_hyper && x86_hyper->init_mem_mapping) +- x86_hyper->init_mem_mapping(); +-} + #else + static inline void init_hypervisor_platform(void) { } +-static inline bool hypervisor_x2apic_available(void) { return false; } +-static inline void hypervisor_init_mem_mapping(void) { } + #endif /* CONFIG_HYPERVISOR_GUEST */ + #endif /* _ASM_X86_HYPERVISOR_H */ +--- a/arch/x86/include/asm/x86_init.h ++++ b/arch/x86/include/asm/x86_init.h +@@ -115,6 +115,18 @@ struct x86_init_pci { + }; + + /** ++ * struct x86_hyper_init - x86 hypervisor init functions ++ * @init_platform: platform setup ++ * @x2apic_available: X2APIC detection ++ * @init_mem_mapping: setup early mappings during init_mem_mapping() ++ */ ++struct x86_hyper_init { ++ void (*init_platform)(void); ++ bool (*x2apic_available)(void); ++ void (*init_mem_mapping)(void); ++}; ++ ++/** + * struct x86_init_ops - functions for platform specific setup + * + */ +@@ -127,6 +139,7 @@ struct x86_init_ops { + struct x86_init_timers timers; + struct x86_init_iommu iommu; + struct x86_init_pci pci; ++ struct x86_hyper_init hyper; + }; + + /** +@@ -200,6 +213,15 @@ struct x86_legacy_features { + }; + + /** ++ * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks ++ * ++ * @pin_vcpu: pin current vcpu to specified physical cpu (run rarely) ++ */ ++struct x86_hyper_runtime { ++ void (*pin_vcpu)(int cpu); ++}; ++ ++/** + * struct x86_platform_ops - platform specific runtime functions + * @calibrate_cpu: calibrate CPU + * @calibrate_tsc: calibrate TSC, if different from CPU +@@ -218,6 +240,7 @@ struct x86_legacy_features { + * possible in x86_early_init_platform_quirks() by + * only using the current x86_hardware_subarch + * semantics. ++ * @hyper: x86 hypervisor specific runtime callbacks + */ + struct x86_platform_ops { + unsigned long (*calibrate_cpu)(void); +@@ -233,6 +256,7 @@ struct x86_platform_ops { + void (*apic_post_init)(void); + struct x86_legacy_features legacy; + void (*set_legacy_features)(void); ++ struct x86_hyper_runtime hyper; + }; + + struct pci_dev; +--- a/arch/x86/kernel/apic/apic.c ++++ b/arch/x86/kernel/apic/apic.c +@@ -1645,7 +1645,7 @@ static __init void try_to_enable_x2apic( + * under KVM + */ + if (max_physical_apicid > 255 || +- !hypervisor_x2apic_available()) { ++ !x86_init.hyper.x2apic_available()) { + pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); + x2apic_disable(); + return; +--- a/arch/x86/kernel/cpu/hypervisor.c ++++ b/arch/x86/kernel/cpu/hypervisor.c +@@ -44,51 +44,49 @@ static const __initconst struct hypervis + const struct hypervisor_x86 *x86_hyper; + EXPORT_SYMBOL(x86_hyper); + +-static inline void __init ++static inline const struct hypervisor_x86 * __init + detect_hypervisor_vendor(void) + { +- const struct hypervisor_x86 *h, * const *p; ++ const struct hypervisor_x86 *h = NULL, * const *p; + uint32_t pri, max_pri = 0; + + for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { +- h = *p; +- pri = h->detect(); +- if (pri != 0 && pri > max_pri) { ++ pri = (*p)->detect(); ++ if (pri > max_pri) { + max_pri = pri; +- x86_hyper = h; ++ h = *p; + } + } + +- if (max_pri) +- pr_info("Hypervisor detected: %s\n", x86_hyper->name); +-} +- +-void __init init_hypervisor_platform(void) +-{ +- +- detect_hypervisor_vendor(); ++ if (h) ++ pr_info("Hypervisor detected: %s\n", h->name); + +- if (!x86_hyper) +- return; +- +- if (x86_hyper->init_platform) +- x86_hyper->init_platform(); ++ return h; + } + +-bool __init hypervisor_x2apic_available(void) ++static void __init copy_array(const void *src, void *target, unsigned int size) + { +- return x86_hyper && +- x86_hyper->x2apic_available && +- x86_hyper->x2apic_available(); ++ unsigned int i, n = size / sizeof(void *); ++ const void * const *from = (const void * const *)src; ++ const void **to = (const void **)target; ++ ++ for (i = 0; i < n; i++) ++ if (from[i]) ++ to[i] = from[i]; + } + +-void hypervisor_pin_vcpu(int cpu) ++void __init init_hypervisor_platform(void) + { +- if (!x86_hyper) ++ const struct hypervisor_x86 *h; ++ ++ h = detect_hypervisor_vendor(); ++ ++ if (!h) + return; + +- if (x86_hyper->pin_vcpu) +- x86_hyper->pin_vcpu(cpu); +- else +- WARN_ONCE(1, "vcpu pinning requested but not supported!\n"); ++ copy_array(&h->init, &x86_init.hyper, sizeof(h->init)); ++ copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime)); ++ ++ x86_hyper = h; ++ x86_init.hyper.init_platform(); + } +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -257,6 +257,6 @@ static void __init ms_hyperv_init_platfo + const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { + .name = "Microsoft Hyper-V", + .detect = ms_hyperv_platform, +- .init_platform = ms_hyperv_init_platform, ++ .init.init_platform = ms_hyperv_init_platform, + }; + EXPORT_SYMBOL(x86_hyper_ms_hyperv); +--- a/arch/x86/kernel/cpu/vmware.c ++++ b/arch/x86/kernel/cpu/vmware.c +@@ -208,7 +208,7 @@ static bool __init vmware_legacy_x2apic_ + const __refconst struct hypervisor_x86 x86_hyper_vmware = { + .name = "VMware", + .detect = vmware_platform, +- .init_platform = vmware_platform_setup, +- .x2apic_available = vmware_legacy_x2apic_available, ++ .init.init_platform = vmware_platform_setup, ++ .init.x2apic_available = vmware_legacy_x2apic_available, + }; + EXPORT_SYMBOL(x86_hyper_vmware); +--- a/arch/x86/kernel/kvm.c ++++ b/arch/x86/kernel/kvm.c +@@ -547,7 +547,7 @@ static uint32_t __init kvm_detect(void) + const struct hypervisor_x86 x86_hyper_kvm __refconst = { + .name = "KVM", + .detect = kvm_detect, +- .x2apic_available = kvm_para_available, ++ .init.x2apic_available = kvm_para_available, + }; + EXPORT_SYMBOL_GPL(x86_hyper_kvm); + +--- a/arch/x86/kernel/x86_init.c ++++ b/arch/x86/kernel/x86_init.c +@@ -28,6 +28,8 @@ void x86_init_noop(void) { } + void __init x86_init_uint_noop(unsigned int unused) { } + int __init iommu_init_noop(void) { return 0; } + void iommu_shutdown_noop(void) { } ++bool __init bool_x86_init_noop(void) { return false; } ++void x86_op_int_noop(int cpu) { } + + /* + * The platform setup functions are preset with the default functions +@@ -81,6 +83,12 @@ struct x86_init_ops x86_init __initdata + .init_irq = x86_default_pci_init_irq, + .fixup_irqs = x86_default_pci_fixup_irqs, + }, ++ ++ .hyper = { ++ .init_platform = x86_init_noop, ++ .x2apic_available = bool_x86_init_noop, ++ .init_mem_mapping = x86_init_noop, ++ }, + }; + + struct x86_cpuinit_ops x86_cpuinit = { +@@ -101,6 +109,7 @@ struct x86_platform_ops x86_platform __r + .get_nmi_reason = default_get_nmi_reason, + .save_sched_clock_state = tsc_save_sched_clock_state, + .restore_sched_clock_state = tsc_restore_sched_clock_state, ++ .hyper.pin_vcpu = x86_op_int_noop, + }; + + EXPORT_SYMBOL_GPL(x86_platform); +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -671,7 +671,7 @@ void __init init_mem_mapping(void) + load_cr3(swapper_pg_dir); + __flush_tlb_all(); + +- hypervisor_init_mem_mapping(); ++ x86_init.hyper.init_mem_mapping(); + + early_memtest(0, max_pfn_mapped << PAGE_SHIFT); + } +--- a/arch/x86/xen/enlighten_hvm.c ++++ b/arch/x86/xen/enlighten_hvm.c +@@ -229,9 +229,9 @@ static uint32_t __init xen_platform_hvm( + const struct hypervisor_x86 x86_hyper_xen_hvm = { + .name = "Xen HVM", + .detect = xen_platform_hvm, +- .init_platform = xen_hvm_guest_init, +- .pin_vcpu = xen_pin_vcpu, +- .x2apic_available = xen_x2apic_para_available, +- .init_mem_mapping = xen_hvm_init_mem_mapping, ++ .init.init_platform = xen_hvm_guest_init, ++ .init.x2apic_available = xen_x2apic_para_available, ++ .init.init_mem_mapping = xen_hvm_init_mem_mapping, ++ .runtime.pin_vcpu = xen_pin_vcpu, + }; + EXPORT_SYMBOL(x86_hyper_xen_hvm); +--- a/arch/x86/xen/enlighten_pv.c ++++ b/arch/x86/xen/enlighten_pv.c +@@ -1462,6 +1462,6 @@ static uint32_t __init xen_platform_pv(v + const struct hypervisor_x86 x86_hyper_xen_pv = { + .name = "Xen PV", + .detect = xen_platform_pv, +- .pin_vcpu = xen_pin_vcpu, ++ .runtime.pin_vcpu = xen_pin_vcpu, + }; + EXPORT_SYMBOL(x86_hyper_xen_pv); +--- a/include/linux/hypervisor.h ++++ b/include/linux/hypervisor.h +@@ -7,8 +7,12 @@ + * Juergen Gross + */ + +-#ifdef CONFIG_HYPERVISOR_GUEST +-#include ++#ifdef CONFIG_X86 ++#include ++static inline void hypervisor_pin_vcpu(int cpu) ++{ ++ x86_platform.hyper.pin_vcpu(cpu); ++} + #else + static inline void hypervisor_pin_vcpu(int cpu) + { diff --git a/queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch b/queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch new file mode 100644 index 00000000000..ad09e514768 --- /dev/null +++ b/queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch @@ -0,0 +1,89 @@ +From f16b3da1dc936c0f8121741d0a1731bf242f2f56 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 2 Nov 2017 00:59:12 -0700 +Subject: x86/xen/64, x86/entry/64: Clean up SP code in cpu_initialize_context() + +From: Andy Lutomirski + +commit f16b3da1dc936c0f8121741d0a1731bf242f2f56 upstream. + +I'm removing thread_struct::sp0, and Xen's usage of it is slightly +dubious and unnecessary. Use appropriate helpers instead. + +While we're at at, reorder the code slightly to make it more obvious +what's going on. + +Signed-off-by: Andy Lutomirski +Reviewed-by: Juergen Gross +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Juergen Gross +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/d5b9a3da2b47c68325bd2bbe8f82d9554dee0d0f.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/xen/smp_pv.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +--- a/arch/x86/xen/smp_pv.c ++++ b/arch/x86/xen/smp_pv.c +@@ -14,6 +14,7 @@ + * single-threaded. + */ + #include ++#include + #include + #include + #include +@@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, + #endif + memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); + ++ /* ++ * Bring up the CPU in cpu_bringup_and_idle() with the stack ++ * pointing just below where pt_regs would be if it were a normal ++ * kernel entry. ++ */ + ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; + ctxt->flags = VGCF_IN_KERNEL; + ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ + ctxt->user_regs.ds = __USER_DS; + ctxt->user_regs.es = __USER_DS; + ctxt->user_regs.ss = __KERNEL_DS; ++ ctxt->user_regs.cs = __KERNEL_CS; ++ ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle); + + xen_copy_trap_info(ctxt->trap_ctxt); + +@@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, + ctxt->gdt_frames[0] = gdt_mfn; + ctxt->gdt_ents = GDT_ENTRIES; + ++ /* ++ * Set SS:SP that Xen will use when entering guest kernel mode ++ * from guest user mode. Subsequent calls to load_sp0() can ++ * change this value. ++ */ + ctxt->kernel_ss = __KERNEL_DS; +- ctxt->kernel_sp = idle->thread.sp0; ++ ctxt->kernel_sp = task_top_of_stack(idle); + + #ifdef CONFIG_X86_32 + ctxt->event_callback_cs = __KERNEL_CS; +@@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, + (unsigned long)xen_hypervisor_callback; + ctxt->failsafe_callback_eip = + (unsigned long)xen_failsafe_callback; +- ctxt->user_regs.cs = __KERNEL_CS; + per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); + +- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); + ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) + BUG(); diff --git a/queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch b/queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch new file mode 100644 index 00000000000..9626a6001b7 --- /dev/null +++ b/queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch @@ -0,0 +1,90 @@ +From 43e4111086a70c78bedb6ad990bee97f17b27a6e Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 2 Nov 2017 00:59:07 -0700 +Subject: xen, x86/entry/64: Add xen NMI trap entry + +From: Juergen Gross + +commit 43e4111086a70c78bedb6ad990bee97f17b27a6e upstream. + +Instead of trying to execute any NMI via the bare metal's NMI trap +handler use a Xen specific one for PV domains, like we do for e.g. +debug traps. As in a PV domain the NMI is handled via the normal +kernel stack this is the correct thing to do. + +This will enable us to get rid of the very fragile and questionable +dependencies between the bare metal NMI handler and Xen assumptions +believed to be broken anyway. + +Signed-off-by: Juergen Gross +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/5baf5c0528d58402441550c5770b98e7961e7680.1509609304.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 2 +- + arch/x86/include/asm/traps.h | 2 +- + arch/x86/xen/enlighten_pv.c | 2 +- + arch/x86/xen/xen-asm_64.S | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1079,6 +1079,7 @@ idtentry int3 do_int3 has_error_code + idtentry stack_segment do_stack_segment has_error_code=1 + + #ifdef CONFIG_XEN ++idtentry xennmi do_nmi has_error_code=0 + idtentry xendebug do_debug has_error_code=0 + idtentry xenint3 do_int3 has_error_code=0 + #endif +@@ -1241,7 +1242,6 @@ ENTRY(error_exit) + END(error_exit) + + /* Runs on exception stack */ +-/* XXX: broken on Xen PV */ + ENTRY(nmi) + UNWIND_HINT_IRET_REGS + /* +--- a/arch/x86/include/asm/traps.h ++++ b/arch/x86/include/asm/traps.h +@@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(v + + #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) + asmlinkage void xen_divide_error(void); ++asmlinkage void xen_xennmi(void); + asmlinkage void xen_xendebug(void); + asmlinkage void xen_xenint3(void); +-asmlinkage void xen_nmi(void); + asmlinkage void xen_overflow(void); + asmlinkage void xen_bounds(void); + asmlinkage void xen_invalid_op(void); +--- a/arch/x86/xen/enlighten_pv.c ++++ b/arch/x86/xen/enlighten_pv.c +@@ -601,7 +601,7 @@ static struct trap_array_entry trap_arra + #ifdef CONFIG_X86_MCE + { machine_check, xen_machine_check, true }, + #endif +- { nmi, xen_nmi, true }, ++ { nmi, xen_xennmi, true }, + { overflow, xen_overflow, false }, + #ifdef CONFIG_IA32_EMULATION + { entry_INT80_compat, xen_entry_INT80_compat, false }, +--- a/arch/x86/xen/xen-asm_64.S ++++ b/arch/x86/xen/xen-asm_64.S +@@ -30,7 +30,7 @@ xen_pv_trap debug + xen_pv_trap xendebug + xen_pv_trap int3 + xen_pv_trap xenint3 +-xen_pv_trap nmi ++xen_pv_trap xennmi + xen_pv_trap overflow + xen_pv_trap bounds + xen_pv_trap invalid_op -- 2.47.3