]>
Commit | Line | Data |
---|---|---|
3751345d | 1 | /* Speculation tracking and mitigation (e.g. CVE 2017-5753) for AArch64. |
83ffe9cd | 2 | Copyright (C) 2018-2023 Free Software Foundation, Inc. |
3751345d RE |
3 | Contributed by ARM Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "target.h" | |
25 | #include "rtl.h" | |
26 | #include "tree-pass.h" | |
27 | #include "profile-count.h" | |
a9cf35bf | 28 | #include "backend.h" |
3751345d RE |
29 | #include "cfgbuild.h" |
30 | #include "print-rtl.h" | |
31 | #include "cfgrtl.h" | |
32 | #include "function.h" | |
33 | #include "basic-block.h" | |
34 | #include "memmodel.h" | |
35 | #include "emit-rtl.h" | |
36 | #include "insn-attr.h" | |
37 | #include "df.h" | |
38 | #include "tm_p.h" | |
39 | #include "insn-config.h" | |
40 | #include "recog.h" | |
41 | ||
42 | /* This pass scans the RTL just before the final branch | |
43 | re-organisation pass. The aim is to identify all places where | |
44 | there is conditional control flow and to insert code that tracks | |
45 | any speculative execution of a conditional branch. | |
46 | ||
47 | To do this we reserve a call-clobbered register (so that it can be | |
48 | initialized very early in the function prologue) that can then be | |
49 | updated each time there is a conditional branch. At each such | |
50 | branch we then generate a code sequence that uses conditional | |
51 | select operations that are not subject to speculation themselves | |
52 | (we ignore for the moment situations where that might not always be | |
53 | strictly true). For example, a branch sequence such as: | |
54 | ||
55 | B.EQ <dst> | |
56 | ... | |
57 | <dst>: | |
58 | ||
59 | is transformed to: | |
60 | ||
61 | B.EQ <dst> | |
62 | CSEL tracker, tracker, XZr, ne | |
63 | ... | |
64 | <dst>: | |
65 | CSEL tracker, tracker, XZr, eq | |
66 | ||
67 | Since we start with the tracker initialized to all bits one, if at any | |
68 | time the predicted control flow diverges from the architectural program | |
69 | behavior, then the tracker will become zero (but not otherwise). | |
70 | ||
71 | The tracker value can be used at any time at which a value needs | |
72 | guarding against incorrect speculation. This can be done in | |
73 | several ways, but they all amount to the same thing. For an | |
74 | untrusted address, or an untrusted offset to a trusted address, we | |
75 | can simply mask the address with the tracker with the untrusted | |
76 | value. If the CPU is not speculating, or speculating correctly, | |
77 | then the value will remain unchanged, otherwise it will be clamped | |
78 | to zero. For more complex scenarios we can compare the tracker | |
79 | against zero and use the flags to form a new selection with an | |
80 | alternate safe value. | |
81 | ||
82 | On implementations where the data processing instructions may | |
83 | themselves produce speculative values, the architecture requires | |
84 | that a CSDB instruction will resolve such data speculation, so each | |
85 | time we use the tracker for protecting a vulnerable value we also | |
86 | emit a CSDB: we do not need to do that each time the tracker itself | |
87 | is updated. | |
88 | ||
89 | At function boundaries, we need to communicate the speculation | |
90 | tracking state with the caller or the callee. This is tricky | |
91 | because there is no register available for such a purpose without | |
92 | creating a new ABI. We deal with this by relying on the principle | |
93 | that in all real programs the stack pointer, SP will never be NULL | |
94 | at a function boundary; we can thus encode the speculation state in | |
95 | SP by clearing SP if the speculation tracker itself is NULL. After | |
96 | the call we recover the tracking state back from SP into the | |
97 | tracker register. The results is that a function call sequence is | |
98 | transformed to | |
99 | ||
100 | MOV tmp, SP | |
101 | AND tmp, tmp, tracker | |
102 | MOV SP, tmp | |
103 | BL <callee> | |
104 | CMP SP, #0 | |
105 | CSETM tracker, ne | |
106 | ||
107 | The additional MOV instructions in the pre-call sequence are needed | |
108 | because SP cannot be used directly with the AND instruction. | |
109 | ||
110 | The code inside a function body uses the post-call sequence in the | |
111 | prologue to establish the tracker and the pre-call sequence in the | |
112 | epilogue to re-encode the state for the return. | |
113 | ||
114 | The code sequences have the nice property that if called from, or | |
115 | calling a function that does not track speculation then the stack pointer | |
116 | will always be non-NULL and hence the tracker will be initialized to all | |
117 | bits one as we need: we lose the ability to fully track speculation in that | |
118 | case, but we are still architecturally safe. | |
119 | ||
120 | Tracking speculation in this way is quite expensive, both in code | |
121 | size and execution time. We employ a number of tricks to try to | |
122 | limit this: | |
123 | ||
124 | 1) Simple leaf functions with no conditional branches (or use of | |
125 | the tracker) do not need to establish a new tracker: they simply | |
126 | carry the tracking state through SP for the duration of the call. | |
127 | The same is also true for leaf functions that end in a tail-call. | |
128 | ||
129 | 2) Back-to-back function calls in a single basic block also do not | |
130 | need to re-establish the tracker between the calls. Again, we can | |
131 | carry the tracking state in SP for this period of time unless the | |
132 | tracker value is needed at that point in time. | |
133 | ||
134 | We run the pass just before the final branch reorganization pass so | |
135 | that we can handle most of the conditional branch cases using the | |
136 | standard edge insertion code. The reorg pass will hopefully clean | |
137 | things up for afterwards so that the results aren't too | |
138 | horrible. */ | |
139 | ||
140 | /* Generate a code sequence to clobber SP if speculating incorreclty. */ | |
141 | static rtx_insn * | |
142 | aarch64_speculation_clobber_sp () | |
143 | { | |
144 | rtx sp = gen_rtx_REG (DImode, SP_REGNUM); | |
145 | rtx tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM); | |
146 | rtx scratch = gen_rtx_REG (DImode, SPECULATION_SCRATCH_REGNUM); | |
147 | ||
148 | start_sequence (); | |
149 | emit_insn (gen_rtx_SET (scratch, sp)); | |
150 | emit_insn (gen_anddi3 (scratch, scratch, tracker)); | |
151 | emit_insn (gen_rtx_SET (sp, scratch)); | |
152 | rtx_insn *seq = get_insns (); | |
153 | end_sequence (); | |
154 | return seq; | |
155 | } | |
156 | ||
157 | /* Generate a code sequence to establish the tracker variable from the | |
158 | contents of SP. */ | |
159 | static rtx_insn * | |
160 | aarch64_speculation_establish_tracker () | |
161 | { | |
162 | rtx sp = gen_rtx_REG (DImode, SP_REGNUM); | |
163 | rtx tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM); | |
164 | start_sequence (); | |
165 | rtx cc = aarch64_gen_compare_reg (EQ, sp, const0_rtx); | |
166 | emit_insn (gen_cstoredi_neg (tracker, | |
167 | gen_rtx_NE (CCmode, cc, const0_rtx), cc)); | |
168 | rtx_insn *seq = get_insns (); | |
169 | end_sequence (); | |
170 | return seq; | |
171 | } | |
172 | ||
173 | /* Main speculation tracking pass. */ | |
174 | unsigned int | |
175 | aarch64_do_track_speculation () | |
176 | { | |
177 | basic_block bb; | |
178 | bool needs_tracking = false; | |
179 | bool need_second_pass = false; | |
180 | rtx_insn *insn; | |
181 | int fixups_pending = 0; | |
182 | ||
183 | FOR_EACH_BB_FN (bb, cfun) | |
184 | { | |
185 | insn = BB_END (bb); | |
186 | ||
187 | if (dump_file) | |
188 | fprintf (dump_file, "Basic block %d:\n", bb->index); | |
189 | ||
190 | while (insn != BB_HEAD (bb) | |
191 | && NOTE_P (insn)) | |
192 | insn = PREV_INSN (insn); | |
193 | ||
194 | if (control_flow_insn_p (insn)) | |
195 | { | |
196 | if (any_condjump_p (insn)) | |
197 | { | |
198 | if (dump_file) | |
199 | { | |
200 | fprintf (dump_file, " condjump\n"); | |
201 | dump_insn_slim (dump_file, insn); | |
202 | } | |
203 | ||
204 | rtx src = SET_SRC (pc_set (insn)); | |
205 | ||
206 | /* Check for an inverted jump, where the fall-through edge | |
207 | appears first. */ | |
208 | bool inverted = GET_CODE (XEXP (src, 2)) != PC; | |
209 | /* The other edge must be the PC (we assume that we don't | |
210 | have conditional return instructions). */ | |
211 | gcc_assert (GET_CODE (XEXP (src, 1 + !inverted)) == PC); | |
212 | ||
213 | rtx cond = copy_rtx (XEXP (src, 0)); | |
214 | gcc_assert (COMPARISON_P (cond) | |
215 | && REG_P (XEXP (cond, 0)) | |
216 | && REGNO (XEXP (cond, 0)) == CC_REGNUM | |
217 | && XEXP (cond, 1) == const0_rtx); | |
2214053f RS |
218 | rtx branch_tracker = gen_speculation_tracker (copy_rtx (cond)); |
219 | rtx fallthru_tracker = gen_speculation_tracker_rev (cond); | |
3751345d | 220 | if (inverted) |
2214053f | 221 | std::swap (branch_tracker, fallthru_tracker); |
3751345d | 222 | |
2214053f RS |
223 | insert_insn_on_edge (branch_tracker, BRANCH_EDGE (bb)); |
224 | insert_insn_on_edge (fallthru_tracker, FALLTHRU_EDGE (bb)); | |
3751345d RE |
225 | needs_tracking = true; |
226 | } | |
227 | else if (GET_CODE (PATTERN (insn)) == RETURN) | |
228 | { | |
229 | /* If we already know we'll need a second pass, don't put | |
230 | out the return sequence now, or we might end up with | |
231 | two copies. Instead, we'll do all return statements | |
232 | during the second pass. However, if this is the | |
233 | first return insn we've found and we already | |
234 | know that we'll need to emit the code, we can save a | |
235 | second pass by emitting the code now. */ | |
236 | if (needs_tracking && ! need_second_pass) | |
237 | { | |
238 | rtx_insn *seq = aarch64_speculation_clobber_sp (); | |
239 | emit_insn_before (seq, insn); | |
240 | } | |
241 | else | |
242 | { | |
243 | fixups_pending++; | |
244 | need_second_pass = true; | |
245 | } | |
246 | } | |
247 | else if (find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX)) | |
248 | { | |
249 | rtx_insn *seq = aarch64_speculation_clobber_sp (); | |
250 | emit_insn_before (seq, insn); | |
251 | needs_tracking = true; | |
252 | } | |
253 | } | |
254 | else | |
255 | { | |
256 | if (dump_file) | |
257 | { | |
258 | fprintf (dump_file, " other\n"); | |
259 | dump_insn_slim (dump_file, insn); | |
260 | } | |
261 | } | |
262 | } | |
263 | ||
264 | FOR_EACH_BB_FN (bb, cfun) | |
265 | { | |
266 | rtx_insn *end = BB_END (bb); | |
267 | rtx_insn *call_insn = NULL; | |
268 | ||
269 | if (bb->flags & BB_NON_LOCAL_GOTO_TARGET) | |
270 | { | |
271 | rtx_insn *label = NULL; | |
272 | /* For non-local goto targets we have to recover the | |
273 | speculation state from SP. Find the last code label at | |
274 | the head of the block and place the fixup sequence after | |
275 | that. */ | |
276 | for (insn = BB_HEAD (bb); insn != end; insn = NEXT_INSN (insn)) | |
277 | { | |
278 | if (LABEL_P (insn)) | |
279 | label = insn; | |
280 | /* Never put anything before the basic block note. */ | |
281 | if (NOTE_INSN_BASIC_BLOCK_P (insn)) | |
282 | label = insn; | |
283 | if (INSN_P (insn)) | |
284 | break; | |
285 | } | |
286 | ||
287 | gcc_assert (label); | |
288 | emit_insn_after (aarch64_speculation_establish_tracker (), label); | |
289 | } | |
290 | ||
291 | /* Scan the insns looking for calls. We need to pass the | |
292 | speculation tracking state encoded in to SP. After a call we | |
293 | restore the speculation tracking into the tracker register. | |
294 | To avoid unnecessary transfers we look for two or more calls | |
295 | within a single basic block and eliminate, where possible, | |
296 | any redundant operations. */ | |
297 | for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn)) | |
298 | { | |
299 | if (NONDEBUG_INSN_P (insn) | |
300 | && recog_memoized (insn) >= 0 | |
301 | && (get_attr_speculation_barrier (insn) | |
302 | == SPECULATION_BARRIER_TRUE)) | |
303 | { | |
304 | if (call_insn) | |
305 | { | |
306 | /* This instruction requires the speculation | |
307 | tracking to be in the tracker register. If there | |
308 | was an earlier call in this block, we need to | |
309 | copy the speculation tracking back there. */ | |
310 | emit_insn_after (aarch64_speculation_establish_tracker (), | |
311 | call_insn); | |
312 | call_insn = NULL; | |
313 | } | |
314 | ||
315 | needs_tracking = true; | |
316 | } | |
317 | ||
318 | if (CALL_P (insn)) | |
319 | { | |
320 | bool tailcall | |
321 | = (SIBLING_CALL_P (insn) | |
322 | || find_reg_note (insn, REG_NORETURN, NULL_RTX)); | |
323 | ||
324 | /* Tailcalls are like returns, we can eliminate the | |
325 | transfer between the tracker register and SP if we | |
326 | know that this function does not itself need | |
327 | tracking. */ | |
328 | if (tailcall && (need_second_pass || !needs_tracking)) | |
329 | { | |
330 | /* Don't clear call_insn if it is set - needs_tracking | |
331 | will be true in that case and so we will end | |
332 | up putting out mitigation sequences. */ | |
333 | fixups_pending++; | |
334 | need_second_pass = true; | |
335 | break; | |
336 | } | |
337 | ||
338 | needs_tracking = true; | |
339 | ||
340 | /* We always need a transfer before the first call in a BB. */ | |
341 | if (!call_insn) | |
342 | emit_insn_before (aarch64_speculation_clobber_sp (), insn); | |
343 | ||
344 | /* Tail-calls and no-return calls don't need any post-call | |
345 | reestablishment of the tracker. */ | |
346 | if (! tailcall) | |
347 | call_insn = insn; | |
348 | else | |
349 | call_insn = NULL; | |
350 | } | |
351 | ||
352 | if (insn == end) | |
353 | break; | |
354 | } | |
355 | ||
356 | if (call_insn) | |
357 | { | |
358 | rtx_insn *seq = aarch64_speculation_establish_tracker (); | |
359 | ||
360 | /* Handle debug insns at the end of the BB. Put the extra | |
361 | insns after them. This ensures that we have consistent | |
362 | behaviour for the placement of the extra insns between | |
363 | debug and non-debug builds. */ | |
364 | for (insn = call_insn; | |
365 | insn != end && DEBUG_INSN_P (NEXT_INSN (insn)); | |
366 | insn = NEXT_INSN (insn)) | |
367 | ; | |
368 | ||
369 | if (insn == end) | |
370 | { | |
371 | edge e = find_fallthru_edge (bb->succs); | |
372 | /* We need to be very careful about some calls that | |
373 | appear at the end of a basic block. If the call | |
374 | involves exceptions, then the compiler may depend on | |
375 | this being the last instruction in the block. The | |
376 | easiest way to handle this is to commit the new | |
377 | instructions on the fall-through edge and to let | |
378 | commit_edge_insertions clean things up for us. | |
379 | ||
380 | Sometimes, eg with OMP, there may not even be an | |
381 | outgoing edge after the call. In that case, there's | |
382 | not much we can do, presumably the compiler has | |
383 | decided that the call can never return in this | |
384 | context. */ | |
385 | if (e) | |
386 | { | |
387 | /* We need to set the location lists explicitly in | |
388 | this case. */ | |
389 | if (! INSN_P (seq)) | |
390 | { | |
391 | start_sequence (); | |
392 | emit_insn (seq); | |
393 | seq = get_insns (); | |
394 | end_sequence (); | |
395 | } | |
396 | ||
397 | for (rtx_insn *list = seq; list; list = NEXT_INSN (list)) | |
398 | INSN_LOCATION (list) = INSN_LOCATION (call_insn); | |
399 | ||
400 | insert_insn_on_edge (seq, e); | |
401 | } | |
402 | } | |
403 | else | |
404 | emit_insn_after (seq, call_insn); | |
405 | } | |
406 | } | |
407 | ||
408 | if (needs_tracking) | |
409 | { | |
410 | if (need_second_pass) | |
411 | { | |
412 | /* We found a return instruction before we found out whether | |
413 | or not we need to emit the tracking code, but we now | |
414 | know we do. Run quickly over the basic blocks and | |
415 | fix up the return insns. */ | |
416 | FOR_EACH_BB_FN (bb, cfun) | |
417 | { | |
418 | insn = BB_END (bb); | |
419 | ||
420 | while (insn != BB_HEAD (bb) | |
421 | && NOTE_P (insn)) | |
422 | insn = PREV_INSN (insn); | |
423 | ||
424 | if ((control_flow_insn_p (insn) | |
425 | && GET_CODE (PATTERN (insn)) == RETURN) | |
426 | || (CALL_P (insn) | |
427 | && (SIBLING_CALL_P (insn) | |
428 | || find_reg_note (insn, REG_NORETURN, NULL_RTX)))) | |
429 | { | |
430 | rtx_insn *seq = aarch64_speculation_clobber_sp (); | |
431 | emit_insn_before (seq, insn); | |
432 | fixups_pending--; | |
433 | } | |
434 | } | |
435 | gcc_assert (fixups_pending == 0); | |
436 | } | |
437 | ||
438 | /* Set up the initial value of the tracker, using the incoming SP. */ | |
439 | insert_insn_on_edge (aarch64_speculation_establish_tracker (), | |
440 | single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); | |
441 | commit_edge_insertions (); | |
442 | } | |
443 | ||
444 | return 0; | |
445 | } | |
446 | ||
447 | namespace { | |
448 | ||
449 | const pass_data pass_data_aarch64_track_speculation = | |
450 | { | |
451 | RTL_PASS, /* type. */ | |
452 | "speculation", /* name. */ | |
453 | OPTGROUP_NONE, /* optinfo_flags. */ | |
454 | TV_MACH_DEP, /* tv_id. */ | |
455 | 0, /* properties_required. */ | |
456 | 0, /* properties_provided. */ | |
457 | 0, /* properties_destroyed. */ | |
458 | 0, /* todo_flags_start. */ | |
459 | 0 /* todo_flags_finish. */ | |
460 | }; | |
461 | ||
462 | class pass_track_speculation : public rtl_opt_pass | |
463 | { | |
464 | public: | |
465 | pass_track_speculation(gcc::context *ctxt) | |
466 | : rtl_opt_pass(pass_data_aarch64_track_speculation, ctxt) | |
467 | {} | |
468 | ||
469 | /* opt_pass methods: */ | |
470 | virtual bool gate (function *) | |
471 | { | |
472 | return aarch64_track_speculation; | |
473 | } | |
474 | ||
475 | virtual unsigned int execute (function *) | |
476 | { | |
477 | return aarch64_do_track_speculation (); | |
478 | } | |
479 | }; // class pass_track_speculation. | |
480 | } // anon namespace. | |
481 | ||
482 | /* Create a new pass instance. */ | |
483 | rtl_opt_pass * | |
484 | make_pass_track_speculation (gcc::context *ctxt) | |
485 | { | |
486 | return new pass_track_speculation (ctxt); | |
487 | } |