From 5a74700e4355e4ef35fcc21c48a9ccb04c308a9d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 4 Aug 2014 16:56:41 -0700 Subject: [PATCH] 3.15-stable patches added patches: revert-x86-64-modify_ldt-make-support-for-16-bit-segments-a-runtime-option.patch x86-64-espfix-don-t-leak-bits-31-16-of-esp-returning-to-16-bit-stack.patch x86-espfix-fix-broken-header-guard.patch x86-espfix-make-espfix64-a-kconfig-option-fix-uml.patch x86-espfix-make-it-possible-to-disable-16-bit-support.patch x86-espfix-move-espfix-definitions-into-a-separate-header-file.patch x86_64-entry-xen-do-not-invoke-espfix64-on-xen.patch --- ...for-16-bit-segments-a-runtime-option.patch | 64 ++ queue-3.15/series | 7 + ...-16-of-esp-returning-to-16-bit-stack.patch | 598 ++++++++++++++++++ .../x86-espfix-fix-broken-header-guard.patch | 27 + ...ke-espfix64-a-kconfig-option-fix-uml.patch | 77 +++ ...t-possible-to-disable-16-bit-support.patch | 212 +++++++ ...initions-into-a-separate-header-file.patch | 68 ++ ...ry-xen-do-not-invoke-espfix64-on-xen.patch | 130 ++++ 8 files changed, 1183 insertions(+) create mode 100644 queue-3.15/revert-x86-64-modify_ldt-make-support-for-16-bit-segments-a-runtime-option.patch create mode 100644 queue-3.15/x86-64-espfix-don-t-leak-bits-31-16-of-esp-returning-to-16-bit-stack.patch create mode 100644 queue-3.15/x86-espfix-fix-broken-header-guard.patch create mode 100644 queue-3.15/x86-espfix-make-espfix64-a-kconfig-option-fix-uml.patch create mode 100644 queue-3.15/x86-espfix-make-it-possible-to-disable-16-bit-support.patch create mode 100644 queue-3.15/x86-espfix-move-espfix-definitions-into-a-separate-header-file.patch create mode 100644 queue-3.15/x86_64-entry-xen-do-not-invoke-espfix64-on-xen.patch diff --git a/queue-3.15/revert-x86-64-modify_ldt-make-support-for-16-bit-segments-a-runtime-option.patch b/queue-3.15/revert-x86-64-modify_ldt-make-support-for-16-bit-segments-a-runtime-option.patch new file mode 100644 index 00000000000..04b2f88b2af --- /dev/null +++ b/queue-3.15/revert-x86-64-modify_ldt-make-support-for-16-bit-segments-a-runtime-option.patch @@ -0,0 +1,64 @@ +From 7ed6fb9b5a5510e4ef78ab27419184741169978a Mon Sep 17 00:00:00 2001 +From: "H. Peter Anvin" +Date: Wed, 21 May 2014 10:22:59 -0700 +Subject: Revert "x86-64, modify_ldt: Make support for 16-bit segments a runtime option" + +From: "H. Peter Anvin" + +commit 7ed6fb9b5a5510e4ef78ab27419184741169978a upstream. + +This reverts commit fa81511bb0bbb2b1aace3695ce869da9762624ff in +preparation of merging in the proper fix (espfix64). + +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/ldt.c | 4 +--- + arch/x86/vdso/vdso32-setup.c | 8 -------- + 2 files changed, 1 insertion(+), 11 deletions(-) + +--- a/arch/x86/kernel/ldt.c ++++ b/arch/x86/kernel/ldt.c +@@ -20,8 +20,6 @@ + #include + #include + +-int sysctl_ldt16 = 0; +- + #ifdef CONFIG_SMP + static void flush_ldt(void *current_mm) + { +@@ -236,7 +234,7 @@ static int write_ldt(void __user *ptr, u + * IRET leaking the high bits of the kernel stack address. + */ + #ifdef CONFIG_X86_64 +- if (!ldt_info.seg_32bit && !sysctl_ldt16) { ++ if (!ldt_info.seg_32bit) { + error = -EINVAL; + goto out_unlock; + } +--- a/arch/x86/vdso/vdso32-setup.c ++++ b/arch/x86/vdso/vdso32-setup.c +@@ -39,7 +39,6 @@ + #ifdef CONFIG_X86_64 + #define vdso_enabled sysctl_vsyscall32 + #define arch_setup_additional_pages syscall32_setup_pages +-extern int sysctl_ldt16; + #endif + + /* +@@ -250,13 +249,6 @@ static struct ctl_table abi_table2[] = { + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec +- }, +- { +- .procname = "ldt16", +- .data = &sysctl_ldt16, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = proc_dointvec + }, + {} + }; diff --git a/queue-3.15/series b/queue-3.15/series index b0cef72fb0c..69298e91fe3 100644 --- a/queue-3.15/series +++ b/queue-3.15/series @@ -23,3 +23,10 @@ dm-cache-fix-race-affecting-dirty-block-count.patch printk-rename-printk_sched-to-printk_deferred.patch sched_clock-avoid-corrupting-hrtimer-tree-during-suspend.patch timer-fix-lock-inversion-between-hrtimer_bases.lock-and-scheduler-locks.patch +revert-x86-64-modify_ldt-make-support-for-16-bit-segments-a-runtime-option.patch +x86-64-espfix-don-t-leak-bits-31-16-of-esp-returning-to-16-bit-stack.patch +x86-espfix-move-espfix-definitions-into-a-separate-header-file.patch +x86-espfix-fix-broken-header-guard.patch +x86-espfix-make-espfix64-a-kconfig-option-fix-uml.patch +x86-espfix-make-it-possible-to-disable-16-bit-support.patch +x86_64-entry-xen-do-not-invoke-espfix64-on-xen.patch diff --git a/queue-3.15/x86-64-espfix-don-t-leak-bits-31-16-of-esp-returning-to-16-bit-stack.patch b/queue-3.15/x86-64-espfix-don-t-leak-bits-31-16-of-esp-returning-to-16-bit-stack.patch new file mode 100644 index 00000000000..67135153514 --- /dev/null +++ b/queue-3.15/x86-64-espfix-don-t-leak-bits-31-16-of-esp-returning-to-16-bit-stack.patch @@ -0,0 +1,598 @@ +From 3891a04aafd668686239349ea58f3314ea2af86b Mon Sep 17 00:00:00 2001 +From: "H. Peter Anvin" +Date: Tue, 29 Apr 2014 16:46:09 -0700 +Subject: x86-64, espfix: Don't leak bits 31:16 of %esp returning to 16-bit stack + +From: "H. Peter Anvin" + +commit 3891a04aafd668686239349ea58f3314ea2af86b upstream. + +The IRET instruction, when returning to a 16-bit segment, only +restores the bottom 16 bits of the user space stack pointer. This +causes some 16-bit software to break, but it also leaks kernel state +to user space. We have a software workaround for that ("espfix") for +the 32-bit kernel, but it relies on a nonzero stack segment base which +is not available in 64-bit mode. + +In checkin: + + b3b42ac2cbae x86-64, modify_ldt: Ban 16-bit segments on 64-bit kernels + +we "solved" this by forbidding 16-bit segments on 64-bit kernels, with +the logic that 16-bit support is crippled on 64-bit kernels anyway (no +V86 support), but it turns out that people are doing stuff like +running old Win16 binaries under Wine and expect it to work. + +This works around this by creating percpu "ministacks", each of which +is mapped 2^16 times 64K apart. When we detect that the return SS is +on the LDT, we copy the IRET frame to the ministack and use the +relevant alias to return to userspace. The ministacks are mapped +readonly, so if IRET faults we promote #GP to #DF which is an IST +vector and thus has its own stack; we then do the fixup in the #DF +handler. + +(Making #GP an IST exception would make the msr_safe functions unsafe +in NMI/MC context, and quite possibly have other effects.) + +Special thanks to: + +- Andy Lutomirski, for the suggestion of using very small stack slots + and copy (as opposed to map) the IRET frame there, and for the + suggestion to mark them readonly and let the fault promote to #DF. +- Konrad Wilk for paravirt fixup and testing. +- Borislav Petkov for testing help and useful comments. + +Reported-by: Brian Gerst +Signed-off-by: H. Peter Anvin +Link: http://lkml.kernel.org/r/1398816946-3351-1-git-send-email-hpa@linux.intel.com +Cc: Konrad Rzeszutek Wilk +Cc: Borislav Petkov +Cc: Andrew Lutomriski +Cc: Linus Torvalds +Cc: Dirk Hohndel +Cc: Arjan van de Ven +Cc: comex +Cc: Alexander van Heukelum +Cc: Boris Ostrovsky +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/x86/x86_64/mm.txt | 2 + arch/x86/include/asm/pgtable_64_types.h | 2 + arch/x86/include/asm/setup.h | 3 + arch/x86/kernel/Makefile | 1 + arch/x86/kernel/entry_64.S | 73 ++++++++++- + arch/x86/kernel/espfix_64.c | 208 ++++++++++++++++++++++++++++++++ + arch/x86/kernel/ldt.c | 11 - + arch/x86/kernel/smpboot.c | 7 + + arch/x86/mm/dump_pagetables.c | 44 +++++- + init/main.c | 4 + 10 files changed, 329 insertions(+), 26 deletions(-) + +--- a/Documentation/x86/x86_64/mm.txt ++++ b/Documentation/x86/x86_64/mm.txt +@@ -12,6 +12,8 @@ ffffc90000000000 - ffffe8ffffffffff (=45 + ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole + ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) + ... unused hole ... ++ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ++... unused hole ... + ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 + ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space + ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +--- a/arch/x86/include/asm/pgtable_64_types.h ++++ b/arch/x86/include/asm/pgtable_64_types.h +@@ -61,6 +61,8 @@ typedef struct { pteval_t pte; } pte_t; + #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) + #define MODULES_END _AC(0xffffffffff000000, UL) + #define MODULES_LEN (MODULES_END - MODULES_VADDR) ++#define ESPFIX_PGD_ENTRY _AC(-2, UL) ++#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT) + + #define EARLY_DYNAMIC_PAGE_TABLES 64 + +--- a/arch/x86/include/asm/setup.h ++++ b/arch/x86/include/asm/setup.h +@@ -57,6 +57,9 @@ extern void x86_ce4100_early_setup(void) + static inline void x86_ce4100_early_setup(void) { } + #endif + ++extern void init_espfix_bsp(void); ++extern void init_espfix_ap(void); ++ + #ifndef _SETUP + + /* +--- a/arch/x86/kernel/Makefile ++++ b/arch/x86/kernel/Makefile +@@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x86 + obj-y += syscall_$(BITS).o vsyscall_gtod.o + obj-$(CONFIG_X86_64) += vsyscall_64.o + obj-$(CONFIG_X86_64) += vsyscall_emu_64.o ++obj-$(CONFIG_X86_64) += espfix_64.o + obj-$(CONFIG_SYSFS) += ksysfs.o + obj-y += bootflag.o e820.o + obj-y += pci-dma.o quirks.o topology.o kdebugfs.o +--- a/arch/x86/kernel/entry_64.S ++++ b/arch/x86/kernel/entry_64.S +@@ -58,6 +58,7 @@ + #include + #include + #include ++#include + #include + + /* Avoid __ASSEMBLER__'ifying just for this. */ +@@ -1040,8 +1041,16 @@ restore_args: + RESTORE_ARGS 1,8,1 + + irq_return: ++ /* ++ * Are we returning to a stack segment from the LDT? Note: in ++ * 64-bit mode SS:RSP on the exception stack is always valid. ++ */ ++ testb $4,(SS-RIP)(%rsp) ++ jnz irq_return_ldt ++ ++irq_return_iret: + INTERRUPT_RETURN +- _ASM_EXTABLE(irq_return, bad_iret) ++ _ASM_EXTABLE(irq_return_iret, bad_iret) + + #ifdef CONFIG_PARAVIRT + ENTRY(native_iret) +@@ -1049,6 +1058,30 @@ ENTRY(native_iret) + _ASM_EXTABLE(native_iret, bad_iret) + #endif + ++irq_return_ldt: ++ pushq_cfi %rax ++ pushq_cfi %rdi ++ SWAPGS ++ movq PER_CPU_VAR(espfix_waddr),%rdi ++ movq %rax,(0*8)(%rdi) /* RAX */ ++ movq (2*8)(%rsp),%rax /* RIP */ ++ movq %rax,(1*8)(%rdi) ++ movq (3*8)(%rsp),%rax /* CS */ ++ movq %rax,(2*8)(%rdi) ++ movq (4*8)(%rsp),%rax /* RFLAGS */ ++ movq %rax,(3*8)(%rdi) ++ movq (6*8)(%rsp),%rax /* SS */ ++ movq %rax,(5*8)(%rdi) ++ movq (5*8)(%rsp),%rax /* RSP */ ++ movq %rax,(4*8)(%rdi) ++ andl $0xffff0000,%eax ++ popq_cfi %rdi ++ orq PER_CPU_VAR(espfix_stack),%rax ++ SWAPGS ++ movq %rax,%rsp ++ popq_cfi %rax ++ jmp irq_return_iret ++ + .section .fixup,"ax" + bad_iret: + /* +@@ -1110,9 +1143,41 @@ ENTRY(retint_kernel) + call preempt_schedule_irq + jmp exit_intr + #endif +- + CFI_ENDPROC + END(common_interrupt) ++ ++ /* ++ * If IRET takes a fault on the espfix stack, then we ++ * end up promoting it to a doublefault. In that case, ++ * modify the stack to make it look like we just entered ++ * the #GP handler from user space, similar to bad_iret. ++ */ ++ ALIGN ++__do_double_fault: ++ XCPT_FRAME 1 RDI+8 ++ movq RSP(%rdi),%rax /* Trap on the espfix stack? */ ++ sarq $PGDIR_SHIFT,%rax ++ cmpl $ESPFIX_PGD_ENTRY,%eax ++ jne do_double_fault /* No, just deliver the fault */ ++ cmpl $__KERNEL_CS,CS(%rdi) ++ jne do_double_fault ++ movq RIP(%rdi),%rax ++ cmpq $irq_return_iret,%rax ++#ifdef CONFIG_PARAVIRT ++ je 1f ++ cmpq $native_iret,%rax ++#endif ++ jne do_double_fault /* This shouldn't happen... */ ++1: ++ movq PER_CPU_VAR(kernel_stack),%rax ++ subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ ++ movq %rax,RSP(%rdi) ++ movq $0,(%rax) /* Missing (lost) #GP error code */ ++ movq $general_protection,RIP(%rdi) ++ retq ++ CFI_ENDPROC ++END(__do_double_fault) ++ + /* + * End of kprobes section + */ +@@ -1314,7 +1379,7 @@ zeroentry overflow do_overflow + zeroentry bounds do_bounds + zeroentry invalid_op do_invalid_op + zeroentry device_not_available do_device_not_available +-paranoiderrorentry double_fault do_double_fault ++paranoiderrorentry double_fault __do_double_fault + zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun + errorentry invalid_TSS do_invalid_TSS + errorentry segment_not_present do_segment_not_present +@@ -1601,7 +1666,7 @@ error_sti: + */ + error_kernelspace: + incl %ebx +- leaq irq_return(%rip),%rcx ++ leaq irq_return_iret(%rip),%rcx + cmpq %rcx,RIP+8(%rsp) + je error_swapgs + movl %ecx,%eax /* zero extend */ +--- /dev/null ++++ b/arch/x86/kernel/espfix_64.c +@@ -0,0 +1,208 @@ ++/* ----------------------------------------------------------------------- * ++ * ++ * Copyright 2014 Intel Corporation; author: H. Peter Anvin ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * ----------------------------------------------------------------------- */ ++ ++/* ++ * The IRET instruction, when returning to a 16-bit segment, only ++ * restores the bottom 16 bits of the user space stack pointer. This ++ * causes some 16-bit software to break, but it also leaks kernel state ++ * to user space. ++ * ++ * This works around this by creating percpu "ministacks", each of which ++ * is mapped 2^16 times 64K apart. When we detect that the return SS is ++ * on the LDT, we copy the IRET frame to the ministack and use the ++ * relevant alias to return to userspace. The ministacks are mapped ++ * readonly, so if the IRET fault we promote #GP to #DF which is an IST ++ * vector and thus has its own stack; we then do the fixup in the #DF ++ * handler. ++ * ++ * This file sets up the ministacks and the related page tables. The ++ * actual ministack invocation is in entry_64.S. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * Note: we only need 6*8 = 48 bytes for the espfix stack, but round ++ * it up to a cache line to avoid unnecessary sharing. ++ */ ++#define ESPFIX_STACK_SIZE (8*8UL) ++#define ESPFIX_STACKS_PER_PAGE (PAGE_SIZE/ESPFIX_STACK_SIZE) ++ ++/* There is address space for how many espfix pages? */ ++#define ESPFIX_PAGE_SPACE (1UL << (PGDIR_SHIFT-PAGE_SHIFT-16)) ++ ++#define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE) ++#if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS ++# error "Need more than one PGD for the ESPFIX hack" ++#endif ++ ++#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) ++ ++/* This contains the *bottom* address of the espfix stack */ ++DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); ++DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); ++ ++/* Initialization mutex - should this be a spinlock? */ ++static DEFINE_MUTEX(espfix_init_mutex); ++ ++/* Page allocation bitmap - each page serves ESPFIX_STACKS_PER_PAGE CPUs */ ++#define ESPFIX_MAX_PAGES DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE) ++static void *espfix_pages[ESPFIX_MAX_PAGES]; ++ ++static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD] ++ __aligned(PAGE_SIZE); ++ ++static unsigned int page_random, slot_random; ++ ++/* ++ * This returns the bottom address of the espfix stack for a specific CPU. ++ * The math allows for a non-power-of-two ESPFIX_STACK_SIZE, in which case ++ * we have to account for some amount of padding at the end of each page. ++ */ ++static inline unsigned long espfix_base_addr(unsigned int cpu) ++{ ++ unsigned long page, slot; ++ unsigned long addr; ++ ++ page = (cpu / ESPFIX_STACKS_PER_PAGE) ^ page_random; ++ slot = (cpu + slot_random) % ESPFIX_STACKS_PER_PAGE; ++ addr = (page << PAGE_SHIFT) + (slot * ESPFIX_STACK_SIZE); ++ addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16); ++ addr += ESPFIX_BASE_ADDR; ++ return addr; ++} ++ ++#define PTE_STRIDE (65536/PAGE_SIZE) ++#define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE) ++#define ESPFIX_PMD_CLONES PTRS_PER_PMD ++#define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES)) ++ ++#define PGTABLE_PROT ((_KERNPG_TABLE & ~_PAGE_RW) | _PAGE_NX) ++ ++static void init_espfix_random(void) ++{ ++ unsigned long rand; ++ ++ /* ++ * This is run before the entropy pools are initialized, ++ * but this is hopefully better than nothing. ++ */ ++ if (!arch_get_random_long(&rand)) { ++ /* The constant is an arbitrary large prime */ ++ rdtscll(rand); ++ rand *= 0xc345c6b72fd16123UL; ++ } ++ ++ slot_random = rand % ESPFIX_STACKS_PER_PAGE; ++ page_random = (rand / ESPFIX_STACKS_PER_PAGE) ++ & (ESPFIX_PAGE_SPACE - 1); ++} ++ ++void __init init_espfix_bsp(void) ++{ ++ pgd_t *pgd_p; ++ pteval_t ptemask; ++ ++ ptemask = __supported_pte_mask; ++ ++ /* Install the espfix pud into the kernel page directory */ ++ pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; ++ pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); ++ ++ /* Randomize the locations */ ++ init_espfix_random(); ++ ++ /* The rest is the same as for any other processor */ ++ init_espfix_ap(); ++} ++ ++void init_espfix_ap(void) ++{ ++ unsigned int cpu, page; ++ unsigned long addr; ++ pud_t pud, *pud_p; ++ pmd_t pmd, *pmd_p; ++ pte_t pte, *pte_p; ++ int n; ++ void *stack_page; ++ pteval_t ptemask; ++ ++ /* We only have to do this once... */ ++ if (likely(this_cpu_read(espfix_stack))) ++ return; /* Already initialized */ ++ ++ cpu = smp_processor_id(); ++ addr = espfix_base_addr(cpu); ++ page = cpu/ESPFIX_STACKS_PER_PAGE; ++ ++ /* Did another CPU already set this up? */ ++ stack_page = ACCESS_ONCE(espfix_pages[page]); ++ if (likely(stack_page)) ++ goto done; ++ ++ mutex_lock(&espfix_init_mutex); ++ ++ /* Did we race on the lock? */ ++ stack_page = ACCESS_ONCE(espfix_pages[page]); ++ if (stack_page) ++ goto unlock_done; ++ ++ ptemask = __supported_pte_mask; ++ ++ pud_p = &espfix_pud_page[pud_index(addr)]; ++ pud = *pud_p; ++ if (!pud_present(pud)) { ++ pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); ++ pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); ++ paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); ++ for (n = 0; n < ESPFIX_PUD_CLONES; n++) ++ set_pud(&pud_p[n], pud); ++ } ++ ++ pmd_p = pmd_offset(&pud, addr); ++ pmd = *pmd_p; ++ if (!pmd_present(pmd)) { ++ pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); ++ pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); ++ paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT); ++ for (n = 0; n < ESPFIX_PMD_CLONES; n++) ++ set_pmd(&pmd_p[n], pmd); ++ } ++ ++ pte_p = pte_offset_kernel(&pmd, addr); ++ stack_page = (void *)__get_free_page(GFP_KERNEL); ++ pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); ++ paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT); ++ for (n = 0; n < ESPFIX_PTE_CLONES; n++) ++ set_pte(&pte_p[n*PTE_STRIDE], pte); ++ ++ /* Job is done for this CPU and any CPU which shares this page */ ++ ACCESS_ONCE(espfix_pages[page]) = stack_page; ++ ++unlock_done: ++ mutex_unlock(&espfix_init_mutex); ++done: ++ this_cpu_write(espfix_stack, addr); ++ this_cpu_write(espfix_waddr, (unsigned long)stack_page ++ + (addr & ~PAGE_MASK)); ++} +--- a/arch/x86/kernel/ldt.c ++++ b/arch/x86/kernel/ldt.c +@@ -229,17 +229,6 @@ static int write_ldt(void __user *ptr, u + } + } + +- /* +- * On x86-64 we do not support 16-bit segments due to +- * IRET leaking the high bits of the kernel stack address. +- */ +-#ifdef CONFIG_X86_64 +- if (!ldt_info.seg_32bit) { +- error = -EINVAL; +- goto out_unlock; +- } +-#endif +- + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -244,6 +244,13 @@ static void notrace start_secondary(void + check_tsc_sync_target(); + + /* ++ * Enable the espfix hack for this CPU ++ */ ++#ifdef CONFIG_X86_64 ++ init_espfix_ap(); ++#endif ++ ++ /* + * We need to hold vector_lock so there the set of online cpus + * does not change while we are assigning vectors to cpus. Holding + * this lock ensures we don't half assign or remove an irq from a cpu. +--- a/arch/x86/mm/dump_pagetables.c ++++ b/arch/x86/mm/dump_pagetables.c +@@ -30,12 +30,14 @@ struct pg_state { + unsigned long start_address; + unsigned long current_address; + const struct addr_marker *marker; ++ unsigned long lines; + bool to_dmesg; + }; + + struct addr_marker { + unsigned long start_address; + const char *name; ++ unsigned long max_lines; + }; + + /* indices for address_markers; keep sync'd w/ address_markers below */ +@@ -46,6 +48,7 @@ enum address_markers_idx { + LOW_KERNEL_NR, + VMALLOC_START_NR, + VMEMMAP_START_NR, ++ ESPFIX_START_NR, + HIGH_KERNEL_NR, + MODULES_VADDR_NR, + MODULES_END_NR, +@@ -68,6 +71,7 @@ static struct addr_marker address_marker + { PAGE_OFFSET, "Low Kernel Mapping" }, + { VMALLOC_START, "vmalloc() Area" }, + { VMEMMAP_START, "Vmemmap" }, ++ { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, + { __START_KERNEL_map, "High Kernel Mapping" }, + { MODULES_VADDR, "Modules" }, + { MODULES_END, "End Modules" }, +@@ -182,7 +186,7 @@ static void note_page(struct seq_file *m + pgprot_t new_prot, int level) + { + pgprotval_t prot, cur; +- static const char units[] = "KMGTPE"; ++ static const char units[] = "BKMGTPE"; + + /* + * If we have a "break" in the series, we need to flush the state that +@@ -197,6 +201,7 @@ static void note_page(struct seq_file *m + st->current_prot = new_prot; + st->level = level; + st->marker = address_markers; ++ st->lines = 0; + pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", + st->marker->name); + } else if (prot != cur || level != st->level || +@@ -208,17 +213,24 @@ static void note_page(struct seq_file *m + /* + * Now print the actual finished series + */ +- pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", +- width, st->start_address, +- width, st->current_address); +- +- delta = (st->current_address - st->start_address) >> 10; +- while (!(delta & 1023) && unit[1]) { +- delta >>= 10; +- unit++; ++ if (!st->marker->max_lines || ++ st->lines < st->marker->max_lines) { ++ pt_dump_seq_printf(m, st->to_dmesg, ++ "0x%0*lx-0x%0*lx ", ++ width, st->start_address, ++ width, st->current_address); ++ ++ delta = st->current_address - st->start_address; ++ while (!(delta & 1023) && unit[1]) { ++ delta >>= 10; ++ unit++; ++ } ++ pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", ++ delta, *unit); ++ printk_prot(m, st->current_prot, st->level, ++ st->to_dmesg); + } +- pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", delta, *unit); +- printk_prot(m, st->current_prot, st->level, st->to_dmesg); ++ st->lines++; + + /* + * We print markers for special areas of address space, +@@ -226,7 +238,17 @@ static void note_page(struct seq_file *m + * This helps in the interpretation. + */ + if (st->current_address >= st->marker[1].start_address) { ++ if (st->marker->max_lines && ++ st->lines > st->marker->max_lines) { ++ unsigned long nskip = ++ st->lines - st->marker->max_lines; ++ pt_dump_seq_printf(m, st->to_dmesg, ++ "... %lu entr%s skipped ... \n", ++ nskip, ++ nskip == 1 ? "y" : "ies"); ++ } + st->marker++; ++ st->lines = 0; + pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", + st->marker->name); + } +--- a/init/main.c ++++ b/init/main.c +@@ -617,6 +617,10 @@ asmlinkage __visible void __init start_k + if (efi_enabled(EFI_RUNTIME_SERVICES)) + efi_enter_virtual_mode(); + #endif ++#ifdef CONFIG_X86_64 ++ /* Should be run before the first non-init thread is created */ ++ init_espfix_bsp(); ++#endif + thread_info_cache_init(); + cred_init(); + fork_init(totalram_pages); diff --git a/queue-3.15/x86-espfix-fix-broken-header-guard.patch b/queue-3.15/x86-espfix-fix-broken-header-guard.patch new file mode 100644 index 00000000000..39b5fb28006 --- /dev/null +++ b/queue-3.15/x86-espfix-fix-broken-header-guard.patch @@ -0,0 +1,27 @@ +From 20b68535cd27183ebd3651ff313afb2b97dac941 Mon Sep 17 00:00:00 2001 +From: "H. Peter Anvin" +Date: Fri, 2 May 2014 11:33:51 -0700 +Subject: x86, espfix: Fix broken header guard + +From: "H. Peter Anvin" + +commit 20b68535cd27183ebd3651ff313afb2b97dac941 upstream. + +Header guard is #ifndef, not #ifdef... + +Reported-by: Fengguang Wu +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/espfix.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/include/asm/espfix.h ++++ b/arch/x86/include/asm/espfix.h +@@ -1,4 +1,4 @@ +-#ifdef _ASM_X86_ESPFIX_H ++#ifndef _ASM_X86_ESPFIX_H + #define _ASM_X86_ESPFIX_H + + #ifdef CONFIG_X86_64 diff --git a/queue-3.15/x86-espfix-make-espfix64-a-kconfig-option-fix-uml.patch b/queue-3.15/x86-espfix-make-espfix64-a-kconfig-option-fix-uml.patch new file mode 100644 index 00000000000..31ac9ed3b99 --- /dev/null +++ b/queue-3.15/x86-espfix-make-espfix64-a-kconfig-option-fix-uml.patch @@ -0,0 +1,77 @@ +From 197725de65477bc8509b41388157c1a2283542bb Mon Sep 17 00:00:00 2001 +From: "H. Peter Anvin" +Date: Sun, 4 May 2014 10:00:49 -0700 +Subject: x86, espfix: Make espfix64 a Kconfig option, fix UML + +From: "H. Peter Anvin" + +commit 197725de65477bc8509b41388157c1a2283542bb upstream. + +Make espfix64 a hidden Kconfig option. This fixes the x86-64 UML +build which had broken due to the non-existence of init_espfix_bsp() +in UML: since UML uses its own Kconfig, this option does not appear in +the UML build. + +This also makes it possible to make support for 16-bit segments a +configuration option, for the people who want to minimize the size of +the kernel. + +Reported-by: Ingo Molnar +Signed-off-by: H. Peter Anvin +Cc: Richard Weinberger +Link: http://lkml.kernel.org/r/1398816946-3351-1-git-send-email-hpa@linux.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/Kconfig | 4 ++++ + arch/x86/kernel/Makefile | 2 +- + arch/x86/kernel/smpboot.c | 2 +- + init/main.c | 2 +- + 4 files changed, 7 insertions(+), 3 deletions(-) + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -915,6 +915,10 @@ config VM86 + XFree86 to initialize some video cards via BIOS. Disabling this + option saves about 6k. + ++config X86_ESPFIX64 ++ def_bool y ++ depends on X86_64 ++ + config TOSHIBA + tristate "Toshiba Laptop support" + depends on X86_32 +--- a/arch/x86/kernel/Makefile ++++ b/arch/x86/kernel/Makefile +@@ -29,7 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x86 + obj-y += syscall_$(BITS).o vsyscall_gtod.o + obj-$(CONFIG_X86_64) += vsyscall_64.o + obj-$(CONFIG_X86_64) += vsyscall_emu_64.o +-obj-$(CONFIG_X86_64) += espfix_64.o ++obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o + obj-$(CONFIG_SYSFS) += ksysfs.o + obj-y += bootflag.o e820.o + obj-y += pci-dma.o quirks.o topology.o kdebugfs.o +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -246,7 +246,7 @@ static void notrace start_secondary(void + /* + * Enable the espfix hack for this CPU + */ +-#ifdef CONFIG_X86_64 ++#ifdef CONFIG_X86_ESPFIX64 + init_espfix_ap(); + #endif + +--- a/init/main.c ++++ b/init/main.c +@@ -617,7 +617,7 @@ asmlinkage __visible void __init start_k + if (efi_enabled(EFI_RUNTIME_SERVICES)) + efi_enter_virtual_mode(); + #endif +-#ifdef CONFIG_X86_64 ++#ifdef CONFIG_X86_ESPFIX64 + /* Should be run before the first non-init thread is created */ + init_espfix_bsp(); + #endif diff --git a/queue-3.15/x86-espfix-make-it-possible-to-disable-16-bit-support.patch b/queue-3.15/x86-espfix-make-it-possible-to-disable-16-bit-support.patch new file mode 100644 index 00000000000..5d0aa27d43e --- /dev/null +++ b/queue-3.15/x86-espfix-make-it-possible-to-disable-16-bit-support.patch @@ -0,0 +1,212 @@ +From 34273f41d57ee8d854dcd2a1d754cbb546cb548f Mon Sep 17 00:00:00 2001 +From: "H. Peter Anvin" +Date: Sun, 4 May 2014 10:36:22 -0700 +Subject: x86, espfix: Make it possible to disable 16-bit support + +From: "H. Peter Anvin" + +commit 34273f41d57ee8d854dcd2a1d754cbb546cb548f upstream. + +Embedded systems, which may be very memory-size-sensitive, are +extremely unlikely to ever encounter any 16-bit software, so make it +a CONFIG_EXPERT option to turn off support for any 16-bit software +whatsoever. + +Signed-off-by: H. Peter Anvin +Link: http://lkml.kernel.org/r/1398816946-3351-1-git-send-email-hpa@linux.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/Kconfig | 23 ++++++++++++++++++----- + arch/x86/kernel/entry_32.S | 12 ++++++++++++ + arch/x86/kernel/entry_64.S | 8 ++++++++ + arch/x86/kernel/ldt.c | 5 +++++ + 4 files changed, 43 insertions(+), 5 deletions(-) + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -910,14 +910,27 @@ config VM86 + default y + depends on X86_32 + ---help--- +- This option is required by programs like DOSEMU to run 16-bit legacy +- code on X86 processors. It also may be needed by software like +- XFree86 to initialize some video cards via BIOS. Disabling this +- option saves about 6k. ++ This option is required by programs like DOSEMU to run ++ 16-bit real mode legacy code on x86 processors. It also may ++ be needed by software like XFree86 to initialize some video ++ cards via BIOS. Disabling this option saves about 6K. ++ ++config X86_16BIT ++ bool "Enable support for 16-bit segments" if EXPERT ++ default y ++ ---help--- ++ This option is required by programs like Wine to run 16-bit ++ protected mode legacy code on x86 processors. Disabling ++ this option saves about 300 bytes on i386, or around 6K text ++ plus 16K runtime memory on x86-64, ++ ++config X86_ESPFIX32 ++ def_bool y ++ depends on X86_16BIT && X86_32 + + config X86_ESPFIX64 + def_bool y +- depends on X86_64 ++ depends on X86_16BIT && X86_64 + + config TOSHIBA + tristate "Toshiba Laptop support" +--- a/arch/x86/kernel/entry_32.S ++++ b/arch/x86/kernel/entry_32.S +@@ -529,6 +529,7 @@ syscall_exit: + restore_all: + TRACE_IRQS_IRET + restore_all_notrace: ++#ifdef CONFIG_X86_ESPFIX32 + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS + # Warning: PT_OLDSS(%esp) contains the wrong/random values if we + # are returning to the kernel. +@@ -539,6 +540,7 @@ restore_all_notrace: + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax + CFI_REMEMBER_STATE + je ldt_ss # returning to user-space with LDT SS ++#endif + restore_nocheck: + RESTORE_REGS 4 # skip orig_eax/error_code + irq_return: +@@ -551,6 +553,7 @@ ENTRY(iret_exc) + .previous + _ASM_EXTABLE(irq_return,iret_exc) + ++#ifdef CONFIG_X86_ESPFIX32 + CFI_RESTORE_STATE + ldt_ss: + #ifdef CONFIG_PARAVIRT +@@ -594,6 +597,7 @@ ldt_ss: + lss (%esp), %esp /* switch to espfix segment */ + CFI_ADJUST_CFA_OFFSET -8 + jmp restore_nocheck ++#endif + CFI_ENDPROC + ENDPROC(system_call) + +@@ -706,6 +710,7 @@ END(syscall_badsys) + * the high word of the segment base from the GDT and swiches to the + * normal stack and adjusts ESP with the matching offset. + */ ++#ifdef CONFIG_X86_ESPFIX32 + /* fixup the stack */ + mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ + mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ +@@ -715,8 +720,10 @@ END(syscall_badsys) + pushl_cfi %eax + lss (%esp), %esp /* switch to the normal stack segment */ + CFI_ADJUST_CFA_OFFSET -8 ++#endif + .endm + .macro UNWIND_ESPFIX_STACK ++#ifdef CONFIG_X86_ESPFIX32 + movl %ss, %eax + /* see if on espfix stack */ + cmpw $__ESPFIX_SS, %ax +@@ -727,6 +734,7 @@ END(syscall_badsys) + /* switch to normal stack */ + FIXUP_ESPFIX_STACK + 27: ++#endif + .endm + + /* +@@ -1357,11 +1365,13 @@ END(debug) + ENTRY(nmi) + RING0_INT_FRAME + ASM_CLAC ++#ifdef CONFIG_X86_ESPFIX32 + pushl_cfi %eax + movl %ss, %eax + cmpw $__ESPFIX_SS, %ax + popl_cfi %eax + je nmi_espfix_stack ++#endif + cmpl $ia32_sysenter_target,(%esp) + je nmi_stack_fixup + pushl_cfi %eax +@@ -1401,6 +1411,7 @@ nmi_debug_stack_check: + FIX_STACK 24, nmi_stack_correct, 1 + jmp nmi_stack_correct + ++#ifdef CONFIG_X86_ESPFIX32 + nmi_espfix_stack: + /* We have a RING0_INT_FRAME here. + * +@@ -1422,6 +1433,7 @@ nmi_espfix_stack: + lss 12+4(%esp), %esp # back to espfix stack + CFI_ADJUST_CFA_OFFSET -24 + jmp irq_return ++#endif + CFI_ENDPROC + END(nmi) + +--- a/arch/x86/kernel/entry_64.S ++++ b/arch/x86/kernel/entry_64.S +@@ -1045,8 +1045,10 @@ irq_return: + * Are we returning to a stack segment from the LDT? Note: in + * 64-bit mode SS:RSP on the exception stack is always valid. + */ ++#ifdef CONFIG_X86_ESPFIX64 + testb $4,(SS-RIP)(%rsp) + jnz irq_return_ldt ++#endif + + irq_return_iret: + INTERRUPT_RETURN +@@ -1058,6 +1060,7 @@ ENTRY(native_iret) + _ASM_EXTABLE(native_iret, bad_iret) + #endif + ++#ifdef CONFIG_X86_ESPFIX64 + irq_return_ldt: + pushq_cfi %rax + pushq_cfi %rdi +@@ -1081,6 +1084,7 @@ irq_return_ldt: + movq %rax,%rsp + popq_cfi %rax + jmp irq_return_iret ++#endif + + .section .fixup,"ax" + bad_iret: +@@ -1152,6 +1156,7 @@ END(common_interrupt) + * modify the stack to make it look like we just entered + * the #GP handler from user space, similar to bad_iret. + */ ++#ifdef CONFIG_X86_ESPFIX64 + ALIGN + __do_double_fault: + XCPT_FRAME 1 RDI+8 +@@ -1177,6 +1182,9 @@ __do_double_fault: + retq + CFI_ENDPROC + END(__do_double_fault) ++#else ++# define __do_double_fault do_double_fault ++#endif + + /* + * End of kprobes section +--- a/arch/x86/kernel/ldt.c ++++ b/arch/x86/kernel/ldt.c +@@ -229,6 +229,11 @@ static int write_ldt(void __user *ptr, u + } + } + ++ if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { ++ error = -EINVAL; ++ goto out_unlock; ++ } ++ + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; diff --git a/queue-3.15/x86-espfix-move-espfix-definitions-into-a-separate-header-file.patch b/queue-3.15/x86-espfix-move-espfix-definitions-into-a-separate-header-file.patch new file mode 100644 index 00000000000..77907b2e7f5 --- /dev/null +++ b/queue-3.15/x86-espfix-move-espfix-definitions-into-a-separate-header-file.patch @@ -0,0 +1,68 @@ +From e1fe9ed8d2a4937510d0d60e20705035c2609aea Mon Sep 17 00:00:00 2001 +From: "H. Peter Anvin" +Date: Thu, 1 May 2014 14:12:23 -0700 +Subject: x86, espfix: Move espfix definitions into a separate header file + +From: "H. Peter Anvin" + +commit e1fe9ed8d2a4937510d0d60e20705035c2609aea upstream. + +Sparse warns that the percpu variables aren't declared before they are +defined. Rather than hacking around it, move espfix definitions into +a proper header file. + +Reported-by: Fengguang Wu +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/espfix.h | 16 ++++++++++++++++ + arch/x86/include/asm/setup.h | 5 ++--- + arch/x86/kernel/espfix_64.c | 1 + + 3 files changed, 19 insertions(+), 3 deletions(-) + +--- /dev/null ++++ b/arch/x86/include/asm/espfix.h +@@ -0,0 +1,16 @@ ++#ifdef _ASM_X86_ESPFIX_H ++#define _ASM_X86_ESPFIX_H ++ ++#ifdef CONFIG_X86_64 ++ ++#include ++ ++DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); ++DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); ++ ++extern void init_espfix_bsp(void); ++extern void init_espfix_ap(void); ++ ++#endif /* CONFIG_X86_64 */ ++ ++#endif /* _ASM_X86_ESPFIX_H */ +--- a/arch/x86/include/asm/setup.h ++++ b/arch/x86/include/asm/setup.h +@@ -57,11 +57,10 @@ extern void x86_ce4100_early_setup(void) + static inline void x86_ce4100_early_setup(void) { } + #endif + +-extern void init_espfix_bsp(void); +-extern void init_espfix_ap(void); +- + #ifndef _SETUP + ++#include ++ + /* + * This is set up by the setup-routine at boot-time + */ +--- a/arch/x86/kernel/espfix_64.c ++++ b/arch/x86/kernel/espfix_64.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + + /* + * Note: we only need 6*8 = 48 bytes for the espfix stack, but round diff --git a/queue-3.15/x86_64-entry-xen-do-not-invoke-espfix64-on-xen.patch b/queue-3.15/x86_64-entry-xen-do-not-invoke-espfix64-on-xen.patch new file mode 100644 index 00000000000..f7b3b2a021e --- /dev/null +++ b/queue-3.15/x86_64-entry-xen-do-not-invoke-espfix64-on-xen.patch @@ -0,0 +1,130 @@ +From 7209a75d2009dbf7745e2fd354abf25c3deb3ca3 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Wed, 23 Jul 2014 08:34:11 -0700 +Subject: x86_64/entry/xen: Do not invoke espfix64 on Xen + +From: Andy Lutomirski + +commit 7209a75d2009dbf7745e2fd354abf25c3deb3ca3 upstream. + +This moves the espfix64 logic into native_iret. To make this work, +it gets rid of the native patch for INTERRUPT_RETURN: +INTERRUPT_RETURN on native kernels is now 'jmp native_iret'. + +This changes the 16-bit SS behavior on Xen from OOPSing to leaking +some bits of the Xen hypervisor's RSP (I think). + +[ hpa: this is a nonzero cost on native, but probably not enough to + measure. Xen needs to fix this in their own code, probably doing + something equivalent to espfix64. ] + +Signed-off-by: Andy Lutomirski +Link: http://lkml.kernel.org/r/7b8f1d8ef6597cb16ae004a43c56980a7de3cf94.1406129132.git.luto@amacapital.net +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/irqflags.h | 2 +- + arch/x86/kernel/entry_64.S | 28 ++++++++++------------------ + arch/x86/kernel/paravirt_patch_64.c | 2 -- + 3 files changed, 11 insertions(+), 21 deletions(-) + +--- a/arch/x86/include/asm/irqflags.h ++++ b/arch/x86/include/asm/irqflags.h +@@ -129,7 +129,7 @@ static inline notrace unsigned long arch + + #define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ + +-#define INTERRUPT_RETURN iretq ++#define INTERRUPT_RETURN jmp native_iret + #define USERGS_SYSRET64 \ + swapgs; \ + sysretq; +--- a/arch/x86/kernel/entry_64.S ++++ b/arch/x86/kernel/entry_64.S +@@ -1041,27 +1041,24 @@ restore_args: + RESTORE_ARGS 1,8,1 + + irq_return: ++ INTERRUPT_RETURN ++ ++ENTRY(native_iret) + /* + * Are we returning to a stack segment from the LDT? Note: in + * 64-bit mode SS:RSP on the exception stack is always valid. + */ + #ifdef CONFIG_X86_ESPFIX64 + testb $4,(SS-RIP)(%rsp) +- jnz irq_return_ldt ++ jnz native_irq_return_ldt + #endif + +-irq_return_iret: +- INTERRUPT_RETURN +- _ASM_EXTABLE(irq_return_iret, bad_iret) +- +-#ifdef CONFIG_PARAVIRT +-ENTRY(native_iret) ++native_irq_return_iret: + iretq +- _ASM_EXTABLE(native_iret, bad_iret) +-#endif ++ _ASM_EXTABLE(native_irq_return_iret, bad_iret) + + #ifdef CONFIG_X86_ESPFIX64 +-irq_return_ldt: ++native_irq_return_ldt: + pushq_cfi %rax + pushq_cfi %rdi + SWAPGS +@@ -1083,7 +1080,7 @@ irq_return_ldt: + SWAPGS + movq %rax,%rsp + popq_cfi %rax +- jmp irq_return_iret ++ jmp native_irq_return_iret + #endif + + .section .fixup,"ax" +@@ -1167,13 +1164,8 @@ __do_double_fault: + cmpl $__KERNEL_CS,CS(%rdi) + jne do_double_fault + movq RIP(%rdi),%rax +- cmpq $irq_return_iret,%rax +-#ifdef CONFIG_PARAVIRT +- je 1f +- cmpq $native_iret,%rax +-#endif ++ cmpq $native_irq_return_iret,%rax + jne do_double_fault /* This shouldn't happen... */ +-1: + movq PER_CPU_VAR(kernel_stack),%rax + subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ + movq %rax,RSP(%rdi) +@@ -1674,7 +1666,7 @@ error_sti: + */ + error_kernelspace: + incl %ebx +- leaq irq_return_iret(%rip),%rcx ++ leaq native_irq_return_iret(%rip),%rcx + cmpq %rcx,RIP+8(%rsp) + je error_swapgs + movl %ecx,%eax /* zero extend */ +--- a/arch/x86/kernel/paravirt_patch_64.c ++++ b/arch/x86/kernel/paravirt_patch_64.c +@@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli + DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); + DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); + DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); +-DEF_NATIVE(pv_cpu_ops, iret, "iretq"); + DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); + DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); + DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); +@@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobb + PATCH_SITE(pv_irq_ops, save_fl); + PATCH_SITE(pv_irq_ops, irq_enable); + PATCH_SITE(pv_irq_ops, irq_disable); +- PATCH_SITE(pv_cpu_ops, iret); + PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); + PATCH_SITE(pv_cpu_ops, usergs_sysret32); + PATCH_SITE(pv_cpu_ops, usergs_sysret64); -- 2.47.3