]>
Commit | Line | Data |
---|---|---|
3a47a0a4 GKH |
1 | From 0ea410a7080341a041df56e24d4af551277dbf13 Mon Sep 17 00:00:00 2001 |
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | |
3 | Date: Wed, 3 Apr 2024 16:36:44 -0700 | |
4 | Subject: x86/syscall: Don't force use of indirect calls for system calls | |
5 | ||
6 | From: Linus Torvalds <torvalds@linux-foundation.org> | |
7 | ||
8 | commit 1e3ad78334a69b36e107232e337f9d693dcc9df2 upstream. | |
9 | ||
10 | Make <asm/syscall.h> build a switch statement instead, and the compiler can | |
11 | either decide to generate an indirect jump, or - more likely these days due | |
12 | to mitigations - just a series of conditional branches. | |
13 | ||
14 | Yes, the conditional branches also have branch prediction, but the branch | |
15 | prediction is much more controlled, in that it just causes speculatively | |
16 | running the wrong system call (harmless), rather than speculatively running | |
17 | possibly wrong random less controlled code gadgets. | |
18 | ||
19 | This doesn't mitigate other indirect calls, but the system call indirection | |
20 | is the first and most easily triggered case. | |
21 | ||
22 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
23 | Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> | |
24 | Signed-off-by: Thomas Gleixner <tglx@linutronix.de> | |
25 | Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> | |
26 | Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> | |
27 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
28 | --- | |
29 | arch/x86/entry/common.c | 6 +++--- | |
30 | arch/x86/entry/syscall_32.c | 21 +++++++++++++++++++-- | |
31 | arch/x86/entry/syscall_64.c | 19 +++++++++++++++++-- | |
32 | arch/x86/entry/syscall_x32.c | 10 +++++++--- | |
33 | arch/x86/include/asm/syscall.h | 10 ++++------ | |
34 | 5 files changed, 50 insertions(+), 16 deletions(-) | |
35 | ||
36 | --- a/arch/x86/entry/common.c | |
37 | +++ b/arch/x86/entry/common.c | |
38 | @@ -48,7 +48,7 @@ static __always_inline bool do_syscall_x | |
39 | ||
40 | if (likely(unr < NR_syscalls)) { | |
41 | unr = array_index_nospec(unr, NR_syscalls); | |
42 | - regs->ax = sys_call_table[unr](regs); | |
43 | + regs->ax = x64_sys_call(regs, unr); | |
44 | return true; | |
45 | } | |
46 | return false; | |
47 | @@ -65,7 +65,7 @@ static __always_inline bool do_syscall_x | |
48 | ||
49 | if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { | |
50 | xnr = array_index_nospec(xnr, X32_NR_syscalls); | |
51 | - regs->ax = x32_sys_call_table[xnr](regs); | |
52 | + regs->ax = x32_sys_call(regs, xnr); | |
53 | return true; | |
54 | } | |
55 | return false; | |
56 | @@ -114,7 +114,7 @@ static __always_inline void do_syscall_3 | |
57 | ||
58 | if (likely(unr < IA32_NR_syscalls)) { | |
59 | unr = array_index_nospec(unr, IA32_NR_syscalls); | |
60 | - regs->ax = ia32_sys_call_table[unr](regs); | |
61 | + regs->ax = ia32_sys_call(regs, unr); | |
62 | } else if (nr != -1) { | |
63 | regs->ax = __ia32_sys_ni_syscall(regs); | |
64 | } | |
65 | --- a/arch/x86/entry/syscall_32.c | |
66 | +++ b/arch/x86/entry/syscall_32.c | |
67 | @@ -18,8 +18,25 @@ | |
68 | #include <asm/syscalls_32.h> | |
69 | #undef __SYSCALL | |
70 | ||
71 | +/* | |
72 | + * The sys_call_table[] is no longer used for system calls, but | |
73 | + * kernel/trace/trace_syscalls.c still wants to know the system | |
74 | + * call address. | |
75 | + */ | |
76 | +#ifdef CONFIG_X86_32 | |
77 | #define __SYSCALL(nr, sym) __ia32_##sym, | |
78 | - | |
79 | -__visible const sys_call_ptr_t ia32_sys_call_table[] = { | |
80 | +const sys_call_ptr_t sys_call_table[] = { | |
81 | #include <asm/syscalls_32.h> | |
82 | }; | |
83 | +#undef __SYSCALL | |
84 | +#endif | |
85 | + | |
86 | +#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); | |
87 | + | |
88 | +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) | |
89 | +{ | |
90 | + switch (nr) { | |
91 | + #include <asm/syscalls_32.h> | |
92 | + default: return __ia32_sys_ni_syscall(regs); | |
93 | + } | |
94 | +}; | |
95 | --- a/arch/x86/entry/syscall_64.c | |
96 | +++ b/arch/x86/entry/syscall_64.c | |
97 | @@ -11,8 +11,23 @@ | |
98 | #include <asm/syscalls_64.h> | |
99 | #undef __SYSCALL | |
100 | ||
101 | +/* | |
102 | + * The sys_call_table[] is no longer used for system calls, but | |
103 | + * kernel/trace/trace_syscalls.c still wants to know the system | |
104 | + * call address. | |
105 | + */ | |
106 | #define __SYSCALL(nr, sym) __x64_##sym, | |
107 | - | |
108 | -asmlinkage const sys_call_ptr_t sys_call_table[] = { | |
109 | +const sys_call_ptr_t sys_call_table[] = { | |
110 | #include <asm/syscalls_64.h> | |
111 | }; | |
112 | +#undef __SYSCALL | |
113 | + | |
114 | +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); | |
115 | + | |
116 | +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) | |
117 | +{ | |
118 | + switch (nr) { | |
119 | + #include <asm/syscalls_64.h> | |
120 | + default: return __x64_sys_ni_syscall(regs); | |
121 | + } | |
122 | +}; | |
123 | --- a/arch/x86/entry/syscall_x32.c | |
124 | +++ b/arch/x86/entry/syscall_x32.c | |
125 | @@ -11,8 +11,12 @@ | |
126 | #include <asm/syscalls_x32.h> | |
127 | #undef __SYSCALL | |
128 | ||
129 | -#define __SYSCALL(nr, sym) __x64_##sym, | |
130 | +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); | |
131 | ||
132 | -asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { | |
133 | -#include <asm/syscalls_x32.h> | |
134 | +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) | |
135 | +{ | |
136 | + switch (nr) { | |
137 | + #include <asm/syscalls_x32.h> | |
138 | + default: return __x64_sys_ni_syscall(regs); | |
139 | + } | |
140 | }; | |
141 | --- a/arch/x86/include/asm/syscall.h | |
142 | +++ b/arch/x86/include/asm/syscall.h | |
143 | @@ -16,19 +16,17 @@ | |
144 | #include <asm/thread_info.h> /* for TS_COMPAT */ | |
145 | #include <asm/unistd.h> | |
146 | ||
147 | +/* This is used purely for kernel/trace/trace_syscalls.c */ | |
148 | typedef long (*sys_call_ptr_t)(const struct pt_regs *); | |
149 | extern const sys_call_ptr_t sys_call_table[]; | |
150 | ||
151 | -#if defined(CONFIG_X86_32) | |
152 | -#define ia32_sys_call_table sys_call_table | |
153 | -#else | |
154 | /* | |
155 | * These may not exist, but still put the prototypes in so we | |
156 | * can use IS_ENABLED(). | |
157 | */ | |
158 | -extern const sys_call_ptr_t ia32_sys_call_table[]; | |
159 | -extern const sys_call_ptr_t x32_sys_call_table[]; | |
160 | -#endif | |
161 | +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); | |
162 | +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); | |
163 | +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); | |
164 | ||
165 | /* | |
166 | * Only the low 32 bits of orig_ax are meaningful, so we return int. |