From c779b6b226a75554f5f311755493efd77c0b2059 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:25 +0000 Subject: [PATCH] x86/retpoline: Add initial retpoline support commit 76b043848fd22dbf7f8bf3a1452f8c70d557b860 upstream. Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide the corresponding thunks. Provide assembler macros for invoking the thunks in the same way that GCC does, from native and inline assembler. This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In some circumstances, IBRS microcode features may be used instead, and the retpoline can be disabled. On AMD CPUs if lfence is serialising, the retpoline can be dramatically simplified to a simple "lfence; jmp *\reg". A future patch, after it has been verified that lfence really is serialising in all circumstances, can enable this by setting the X86_FEATURE_RETPOLINE_AMD feature bit in addition to X86_FEATURE_RETPOLINE. Do not align the retpoline in the altinstr section, because there is no guarantee that it stays aligned when it's copied over the oldinstr during alternative patching. [ Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks] [ tglx: Put actual function CALL/JMP in front of the macros, convert to symbolic labels ] [ dwmw2: Convert back to numeric labels, merge objtool fixes ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-4-git-send-email-dwmw@amazon.co.uk [bwh: Backported to 3.2: - Add C source to export the thunk symbols - Drop ANNOTATE_NOSPEC_ALTERNATIVE since we don't have objtool - Use the first available feaure numbers - Adjust filename, context] Signed-off-by: Ben Hutchings --- arch/x86/Kconfig | 13 ++++ arch/x86/Makefile | 10 +++ arch/x86/include/asm/cpufeature.h | 2 + arch/x86/include/asm/nospec-branch.h | 106 +++++++++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 4 + arch/x86/lib/Makefile | 2 + arch/x86/lib/retpoline-export.c | 26 +++++++ arch/x86/lib/retpoline.S | 46 ++++++++++++ 8 files changed, 209 insertions(+) create mode 100644 arch/x86/include/asm/nospec-branch.h create mode 100644 arch/x86/lib/retpoline-export.c create mode 100644 arch/x86/lib/retpoline.S diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2d4b16eed3c78..ac2addde045d4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -312,6 +312,19 @@ config X86_BIGSMP ---help--- This option is needed for the systems that have more than 8 CPUs +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + + Without compiler support, at least indirect branches in assembler + code are eliminated. Since this includes the syscall entry path, + it is not entirely pointless. + if X86_32 config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 03dbc7f5b4d9a..fbd94ddc994ff 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -117,6 +117,16 @@ KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) +# Avoid indirect branches in kernel to deal with Spectre +ifdef CONFIG_RETPOLINE + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE + else + $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) + endif +endif + archscripts: $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a6afd476ba90f..8e583a672316e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -179,6 +179,8 @@ #define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ #define X86_FEATURE_INVPCID_SINGLE (7*32+ 8) /* Effectively INVPCID && CR4.PCIDE=1 */ +#define X86_FEATURE_RETPOLINE (7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* "" CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h new file mode 100644 index 0000000000000..5763548fb30b9 --- /dev/null +++ b/arch/x86/include/asm/nospec-branch.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __NOSPEC_BRANCH_H__ +#define __NOSPEC_BRANCH_H__ + +#include +#include +#include + +#ifdef __ASSEMBLY__ + +/* + * These are the bare retpoline primitives for indirect jmp and call. + * Do not use these directly; they only exist to make the ALTERNATIVE + * invocation below less ugly. + */ +.macro RETPOLINE_JMP reg:req + call .Ldo_rop_\@ +.Lspec_trap_\@: + pause + jmp .Lspec_trap_\@ +.Ldo_rop_\@: + mov \reg, (%_ASM_SP) + ret +.endm + +/* + * This is a wrapper around RETPOLINE_JMP so the called function in reg + * returns to the instruction after the macro. + */ +.macro RETPOLINE_CALL reg:req + jmp .Ldo_call_\@ +.Ldo_retpoline_jmp_\@: + RETPOLINE_JMP \reg +.Ldo_call_\@: + call .Ldo_retpoline_jmp_\@ +.endm + +/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. + */ +.macro JMP_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ALTERNATIVE_2 __stringify(jmp *\reg), \ + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD +#else + jmp *\reg +#endif +.endm + +.macro CALL_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ALTERNATIVE_2 __stringify(call *\reg), \ + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD +#else + call *\reg +#endif +.endm + +#else /* __ASSEMBLY__ */ + +#if defined(CONFIG_X86_64) && defined(RETPOLINE) + +/* + * Since the inline asm uses the %V modifier which is only in newer GCC, + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + */ +# define CALL_NOSPEC \ + ALTERNATIVE( \ + "call *%[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ + X86_FEATURE_RETPOLINE) +# define THUNK_TARGET(addr) [thunk_target] "r" (addr) + +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +/* + * For i386 we use the original ret-equivalent retpoline, because + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ + " jmp 904f;\n" \ + " .align 16\n" \ + "901: call 903f;\n" \ + "902: pause;\n" \ + " jmp 902b;\n" \ + " .align 16\n" \ + "903: addl $4, %%esp;\n" \ + " pushl %[thunk_target];\n" \ + " ret;\n" \ + " .align 16\n" \ + "904: call 901b;\n", \ + X86_FEATURE_RETPOLINE) + +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#else /* No retpoline */ +# define CALL_NOSPEC "call *%[thunk_target]\n" +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 458c59508c307..faee115cc52f8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -773,6 +773,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + +#ifdef CONFIG_RETPOLINE + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); +#endif } void __init early_cpu_init(void) diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 2db5846d86edb..bd1dcf5a76ab5 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -23,6 +23,8 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_SMP) += rwlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o +lib-$(CONFIG_RETPOLINE) += retpoline.o +obj-$(CONFIG_RETPOLINE) += retpoline-export.o obj-y += msr.o msr-reg.o msr-reg-export.o diff --git a/arch/x86/lib/retpoline-export.c b/arch/x86/lib/retpoline-export.c new file mode 100644 index 0000000000000..fc2f666c11070 --- /dev/null +++ b/arch/x86/lib/retpoline-export.c @@ -0,0 +1,26 @@ +#include +#include + +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_32 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); EXPORT_SYMBOL(__x86_indirect_thunk_e ## reg); +#else +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); EXPORT_SYMBOL(__x86_indirect_thunk_r ## reg); +INDIRECT_THUNK(8) +INDIRECT_THUNK(9) +INDIRECT_THUNK(10) +INDIRECT_THUNK(11) +INDIRECT_THUNK(12) +INDIRECT_THUNK(13) +INDIRECT_THUNK(14) +INDIRECT_THUNK(15) +#endif +INDIRECT_THUNK(ax) +INDIRECT_THUNK(bx) +INDIRECT_THUNK(cx) +INDIRECT_THUNK(dx) +INDIRECT_THUNK(si) +INDIRECT_THUNK(di) +INDIRECT_THUNK(bp) +INDIRECT_THUNK(sp) +#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S new file mode 100644 index 0000000000000..f849d4733aadb --- /dev/null +++ b/arch/x86/lib/retpoline.S @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include + +.macro THUNK reg + .section .text.__x86.indirect_thunk.\reg + +ENTRY(__x86_indirect_thunk_\reg) + CFI_STARTPROC + JMP_NOSPEC %\reg + CFI_ENDPROC +ENDPROC(__x86_indirect_thunk_\reg) +.endm + +/* + * Despite being an assembler file we can't just use .irp here + * because __KSYM_DEPS__ only uses the C preprocessor and would + * only see one instance of "__x86_indirect_thunk_\reg" rather + * than one per register with the correct names. So we do it + * the simple and nasty way... + */ +#define GENERATE_THUNK(reg) THUNK reg + +GENERATE_THUNK(_ASM_AX) +GENERATE_THUNK(_ASM_BX) +GENERATE_THUNK(_ASM_CX) +GENERATE_THUNK(_ASM_DX) +GENERATE_THUNK(_ASM_SI) +GENERATE_THUNK(_ASM_DI) +GENERATE_THUNK(_ASM_BP) +GENERATE_THUNK(_ASM_SP) +#ifdef CONFIG_64BIT +GENERATE_THUNK(r8) +GENERATE_THUNK(r9) +GENERATE_THUNK(r10) +GENERATE_THUNK(r11) +GENERATE_THUNK(r12) +GENERATE_THUNK(r13) +GENERATE_THUNK(r14) +GENERATE_THUNK(r15) +#endif -- 2.47.2