]>
Commit | Line | Data |
---|---|---|
5351d7ec GKH |
1 | From foo@baz Fri Mar 9 14:18:36 PST 2018 |
2 | From: Daniel Borkmann <daniel@iogearbox.net> | |
3 | Date: Thu, 8 Mar 2018 13:14:43 +0100 | |
4 | Subject: bpf, x64: implement retpoline for tail call | |
5 | To: gregkh@linuxfoundation.org | |
6 | Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org | |
7 | Message-ID: <f3349163f9a6f32c6e845e9a9e1a82d0d69110fe.1520504748.git.daniel@iogearbox.net> | |
8 | ||
9 | From: Daniel Borkmann <daniel@iogearbox.net> | |
10 | ||
11 | [ upstream commit a493a87f38cfa48caaa95c9347be2d914c6fdf29 ] | |
12 | ||
13 | Implement a retpoline [0] for the BPF tail call JIT'ing that converts | |
14 | the indirect jump via jmp %rax that is used to make the long jump into | |
15 | another JITed BPF image. Since this is subject to speculative execution, | |
16 | we need to control the transient instruction sequence here as well | |
17 | when CONFIG_RETPOLINE is set, and direct it into a pause + lfence loop. | |
18 | The latter aligns also with what gcc / clang emits (e.g. [1]). | |
19 | ||
20 | JIT dump after patch: | |
21 | ||
22 | # bpftool p d x i 1 | |
23 | 0: (18) r2 = map[id:1] | |
24 | 2: (b7) r3 = 0 | |
25 | 3: (85) call bpf_tail_call#12 | |
26 | 4: (b7) r0 = 2 | |
27 | 5: (95) exit | |
28 | ||
29 | With CONFIG_RETPOLINE: | |
30 | ||
31 | # bpftool p d j i 1 | |
32 | [...] | |
33 | 33: cmp %edx,0x24(%rsi) | |
34 | 36: jbe 0x0000000000000072 |* | |
35 | 38: mov 0x24(%rbp),%eax | |
36 | 3e: cmp $0x20,%eax | |
37 | 41: ja 0x0000000000000072 | | |
38 | 43: add $0x1,%eax | |
39 | 46: mov %eax,0x24(%rbp) | |
40 | 4c: mov 0x90(%rsi,%rdx,8),%rax | |
41 | 54: test %rax,%rax | |
42 | 57: je 0x0000000000000072 | | |
43 | 59: mov 0x28(%rax),%rax | |
44 | 5d: add $0x25,%rax | |
45 | 61: callq 0x000000000000006d |+ | |
46 | 66: pause | | |
47 | 68: lfence | | |
48 | 6b: jmp 0x0000000000000066 | | |
49 | 6d: mov %rax,(%rsp) | | |
50 | 71: retq | | |
51 | 72: mov $0x2,%eax | |
52 | [...] | |
53 | ||
54 | * relative fall-through jumps in error case | |
55 | + retpoline for indirect jump | |
56 | ||
57 | Without CONFIG_RETPOLINE: | |
58 | ||
59 | # bpftool p d j i 1 | |
60 | [...] | |
61 | 33: cmp %edx,0x24(%rsi) | |
62 | 36: jbe 0x0000000000000063 |* | |
63 | 38: mov 0x24(%rbp),%eax | |
64 | 3e: cmp $0x20,%eax | |
65 | 41: ja 0x0000000000000063 | | |
66 | 43: add $0x1,%eax | |
67 | 46: mov %eax,0x24(%rbp) | |
68 | 4c: mov 0x90(%rsi,%rdx,8),%rax | |
69 | 54: test %rax,%rax | |
70 | 57: je 0x0000000000000063 | | |
71 | 59: mov 0x28(%rax),%rax | |
72 | 5d: add $0x25,%rax | |
73 | 61: jmpq *%rax |- | |
74 | 63: mov $0x2,%eax | |
75 | [...] | |
76 | ||
77 | * relative fall-through jumps in error case | |
78 | - plain indirect jump as before | |
79 | ||
80 | [0] https://support.google.com/faqs/answer/7625886 | |
81 | [1] https://github.com/gcc-mirror/gcc/commit/a31e654fa107be968b802786d747e962c2fcdb2b | |
82 | ||
83 | Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> | |
84 | Signed-off-by: Alexei Starovoitov <ast@kernel.org> | |
85 | Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> | |
86 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
87 | --- | |
88 | arch/x86/include/asm/nospec-branch.h | 37 +++++++++++++++++++++++++++++++++++ | |
89 | arch/x86/net/bpf_jit_comp.c | 9 ++++---- | |
90 | 2 files changed, 42 insertions(+), 4 deletions(-) | |
91 | ||
92 | --- a/arch/x86/include/asm/nospec-branch.h | |
93 | +++ b/arch/x86/include/asm/nospec-branch.h | |
94 | @@ -177,4 +177,41 @@ static inline void indirect_branch_predi | |
95 | } | |
96 | ||
97 | #endif /* __ASSEMBLY__ */ | |
98 | + | |
99 | +/* | |
100 | + * Below is used in the eBPF JIT compiler and emits the byte sequence | |
101 | + * for the following assembly: | |
102 | + * | |
103 | + * With retpolines configured: | |
104 | + * | |
105 | + * callq do_rop | |
106 | + * spec_trap: | |
107 | + * pause | |
108 | + * lfence | |
109 | + * jmp spec_trap | |
110 | + * do_rop: | |
111 | + * mov %rax,(%rsp) | |
112 | + * retq | |
113 | + * | |
114 | + * Without retpolines configured: | |
115 | + * | |
116 | + * jmp *%rax | |
117 | + */ | |
118 | +#ifdef CONFIG_RETPOLINE | |
119 | +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 | |
120 | +# define RETPOLINE_RAX_BPF_JIT() \ | |
121 | + EMIT1_off32(0xE8, 7); /* callq do_rop */ \ | |
122 | + /* spec_trap: */ \ | |
123 | + EMIT2(0xF3, 0x90); /* pause */ \ | |
124 | + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ | |
125 | + EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ | |
126 | + /* do_rop: */ \ | |
127 | + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ | |
128 | + EMIT1(0xC3); /* retq */ | |
129 | +#else | |
130 | +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 | |
131 | +# define RETPOLINE_RAX_BPF_JIT() \ | |
132 | + EMIT2(0xFF, 0xE0); /* jmp *%rax */ | |
133 | +#endif | |
134 | + | |
135 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ | |
136 | --- a/arch/x86/net/bpf_jit_comp.c | |
137 | +++ b/arch/x86/net/bpf_jit_comp.c | |
138 | @@ -13,6 +13,7 @@ | |
139 | #include <linux/if_vlan.h> | |
140 | #include <asm/cacheflush.h> | |
141 | #include <asm/set_memory.h> | |
142 | +#include <asm/nospec-branch.h> | |
143 | #include <linux/bpf.h> | |
144 | ||
145 | int bpf_jit_enable __read_mostly; | |
146 | @@ -287,7 +288,7 @@ static void emit_bpf_tail_call(u8 **ppro | |
147 | EMIT2(0x89, 0xD2); /* mov edx, edx */ | |
148 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ | |
149 | offsetof(struct bpf_array, map.max_entries)); | |
150 | -#define OFFSET1 43 /* number of bytes to jump */ | |
151 | +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ | |
152 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ | |
153 | label1 = cnt; | |
154 | ||
155 | @@ -296,7 +297,7 @@ static void emit_bpf_tail_call(u8 **ppro | |
156 | */ | |
157 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ | |
158 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ | |
159 | -#define OFFSET2 32 | |
160 | +#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) | |
161 | EMIT2(X86_JA, OFFSET2); /* ja out */ | |
162 | label2 = cnt; | |
163 | EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ | |
164 | @@ -310,7 +311,7 @@ static void emit_bpf_tail_call(u8 **ppro | |
165 | * goto out; | |
166 | */ | |
167 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ | |
168 | -#define OFFSET3 10 | |
169 | +#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) | |
170 | EMIT2(X86_JE, OFFSET3); /* je out */ | |
171 | label3 = cnt; | |
172 | ||
173 | @@ -323,7 +324,7 @@ static void emit_bpf_tail_call(u8 **ppro | |
174 | * rdi == ctx (1st arg) | |
175 | * rax == prog->bpf_func + prologue_size | |
176 | */ | |
177 | - EMIT2(0xFF, 0xE0); /* jmp rax */ | |
178 | + RETPOLINE_RAX_BPF_JIT(); | |
179 | ||
180 | /* out: */ | |
181 | BUILD_BUG_ON(cnt - label1 != OFFSET1); |