]>
Commit | Line | Data |
---|---|---|
db242b6d | 1 | /* Detect paths through the CFG which can never be executed in a conforming |
2 | program and isolate them. | |
3 | ||
4 | Copyright (C) 2013 | |
5 | Free Software Foundation, Inc. | |
6 | ||
7 | This file is part of GCC. | |
8 | ||
9 | GCC is free software; you can redistribute it and/or modify | |
10 | it under the terms of the GNU General Public License as published by | |
11 | the Free Software Foundation; either version 3, or (at your option) | |
12 | any later version. | |
13 | ||
14 | GCC is distributed in the hope that it will be useful, | |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | GNU General Public License for more details. | |
18 | ||
19 | You should have received a copy of the GNU General Public License | |
20 | along with GCC; see the file COPYING3. If not see | |
21 | <http://www.gnu.org/licenses/>. */ | |
22 | ||
23 | #include "config.h" | |
24 | #include "system.h" | |
25 | #include "coretypes.h" | |
26 | #include "tree.h" | |
27 | #include "flags.h" | |
28 | #include "basic-block.h" | |
29 | #include "gimple.h" | |
dcf1a1ec | 30 | #include "gimple-iterator.h" |
31 | #include "gimple-walk.h" | |
db242b6d | 32 | #include "tree-ssa.h" |
9ed99284 | 33 | #include "stringpool.h" |
db242b6d | 34 | #include "tree-ssanames.h" |
35 | #include "gimple-ssa.h" | |
36 | #include "tree-ssa-operands.h" | |
37 | #include "tree-phinodes.h" | |
38 | #include "ssa-iterators.h" | |
39 | #include "cfgloop.h" | |
40 | #include "tree-pass.h" | |
b61383dd | 41 | #include "tree-cfg.h" |
db242b6d | 42 | |
43 | ||
44 | static bool cfg_altered; | |
45 | ||
c59258dc | 46 | /* Callback for walk_stmt_load_store_ops. |
47 | ||
48 | Return TRUE if OP will dereference the tree stored in DATA, FALSE | |
49 | otherwise. | |
50 | ||
51 | This routine only makes a superficial check for a dereference. Thus, | |
52 | it must only be used if it is safe to return a false negative. */ | |
53 | static bool | |
54 | check_loadstore (gimple stmt ATTRIBUTE_UNUSED, tree op, void *data) | |
55 | { | |
56 | if ((TREE_CODE (op) == MEM_REF || TREE_CODE (op) == TARGET_MEM_REF) | |
57 | && operand_equal_p (TREE_OPERAND (op, 0), (tree)data, 0)) | |
2073fe3e | 58 | { |
59 | TREE_THIS_VOLATILE (op) = 1; | |
60 | TREE_SIDE_EFFECTS (op) = 1; | |
61 | update_stmt (stmt); | |
62 | return true; | |
63 | } | |
c59258dc | 64 | return false; |
65 | } | |
66 | ||
67 | /* Insert a trap after SI and remove SI and all statements after the trap. */ | |
db242b6d | 68 | |
69 | static void | |
c59258dc | 70 | insert_trap_and_remove_trailing_statements (gimple_stmt_iterator *si_p, tree op) |
db242b6d | 71 | { |
c59258dc | 72 | /* We want the NULL pointer dereference to actually occur so that |
73 | code that wishes to catch the signal can do so. | |
74 | ||
75 | If the dereference is a load, then there's nothing to do as the | |
2073fe3e | 76 | LHS will be a throw-away SSA_NAME and the RHS is the NULL dereference. |
db242b6d | 77 | |
c59258dc | 78 | If the dereference is a store and we can easily transform the RHS, |
456dc91f | 79 | then simplify the RHS to enable more DCE. Note that we require the |
80 | statement to be a GIMPLE_ASSIGN which filters out calls on the RHS. */ | |
c59258dc | 81 | gimple stmt = gsi_stmt (*si_p); |
82 | if (walk_stmt_load_store_ops (stmt, (void *)op, NULL, check_loadstore) | |
456dc91f | 83 | && is_gimple_assign (stmt) |
c59258dc | 84 | && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))) |
85 | { | |
86 | /* We just need to turn the RHS into zero converted to the proper | |
87 | type. */ | |
88 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
89 | gimple_assign_set_rhs_code (stmt, INTEGER_CST); | |
90 | gimple_assign_set_rhs1 (stmt, fold_convert (type, integer_zero_node)); | |
91 | update_stmt (stmt); | |
92 | } | |
93 | ||
94 | gimple new_stmt | |
95 | = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); | |
96 | gimple_seq seq = NULL; | |
97 | gimple_seq_add_stmt (&seq, new_stmt); | |
98 | ||
99 | /* If we had a NULL pointer dereference, then we want to insert the | |
100 | __builtin_trap after the statement, for the other cases we want | |
101 | to insert before the statement. */ | |
102 | if (walk_stmt_load_store_ops (stmt, (void *)op, | |
103 | check_loadstore, | |
104 | check_loadstore)) | |
105 | gsi_insert_after (si_p, seq, GSI_NEW_STMT); | |
106 | else | |
107 | gsi_insert_before (si_p, seq, GSI_NEW_STMT); | |
108 | ||
0a76252a | 109 | /* We must remove statements from the end of the block so that we |
110 | never reference a released SSA_NAME. */ | |
111 | basic_block bb = gimple_bb (gsi_stmt (*si_p)); | |
112 | for (gimple_stmt_iterator si = gsi_last_bb (bb); | |
113 | gsi_stmt (si) != gsi_stmt (*si_p); | |
114 | si = gsi_last_bb (bb)) | |
db242b6d | 115 | { |
0a76252a | 116 | stmt = gsi_stmt (si); |
db242b6d | 117 | unlink_stmt_vdef (stmt); |
0a76252a | 118 | gsi_remove (&si, true); |
db242b6d | 119 | release_defs (stmt); |
120 | } | |
121 | } | |
122 | ||
123 | /* BB when reached via incoming edge E will exhibit undefined behaviour | |
124 | at STMT. Isolate and optimize the path which exhibits undefined | |
125 | behaviour. | |
126 | ||
127 | Isolation is simple. Duplicate BB and redirect E to BB'. | |
128 | ||
129 | Optimization is simple as well. Replace STMT in BB' with an | |
130 | unconditional trap and remove all outgoing edges from BB'. | |
131 | ||
132 | DUPLICATE is a pre-existing duplicate, use it as BB' if it exists. | |
133 | ||
134 | Return BB'. */ | |
135 | ||
136 | basic_block | |
c59258dc | 137 | isolate_path (basic_block bb, basic_block duplicate, |
138 | edge e, gimple stmt, tree op) | |
db242b6d | 139 | { |
140 | gimple_stmt_iterator si, si2; | |
141 | edge_iterator ei; | |
142 | edge e2; | |
143 | ||
144 | ||
145 | /* First duplicate BB if we have not done so already and remove all | |
146 | the duplicate's outgoing edges as duplicate is going to unconditionally | |
147 | trap. Removing the outgoing edges is both an optimization and ensures | |
148 | we don't need to do any PHI node updates. */ | |
149 | if (!duplicate) | |
150 | { | |
151 | duplicate = duplicate_block (bb, NULL, NULL); | |
152 | for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); ) | |
153 | remove_edge (e2); | |
154 | } | |
155 | ||
156 | /* Complete the isolation step by redirecting E to reach DUPLICATE. */ | |
157 | e2 = redirect_edge_and_branch (e, duplicate); | |
158 | if (e2) | |
159 | flush_pending_stmts (e2); | |
160 | ||
161 | ||
162 | /* There may be more than one statement in DUPLICATE which exhibits | |
163 | undefined behaviour. Ultimately we want the first such statement in | |
164 | DUPLCIATE so that we're able to delete as much code as possible. | |
165 | ||
166 | So each time we discover undefined behaviour in DUPLICATE, search for | |
167 | the statement which triggers undefined behaviour. If found, then | |
168 | transform the statement into a trap and delete everything after the | |
169 | statement. If not found, then this particular instance was subsumed by | |
170 | an earlier instance of undefined behaviour and there's nothing to do. | |
171 | ||
172 | This is made more complicated by the fact that we have STMT, which is in | |
173 | BB rather than in DUPLICATE. So we set up two iterators, one for each | |
174 | block and walk forward looking for STMT in BB, advancing each iterator at | |
175 | each step. | |
176 | ||
177 | When we find STMT the second iterator should point to STMT's equivalent in | |
178 | duplicate. If DUPLICATE ends before STMT is found in BB, then there's | |
179 | nothing to do. | |
180 | ||
181 | Ignore labels and debug statements. */ | |
182 | si = gsi_start_nondebug_after_labels_bb (bb); | |
183 | si2 = gsi_start_nondebug_after_labels_bb (duplicate); | |
184 | while (!gsi_end_p (si) && !gsi_end_p (si2) && gsi_stmt (si) != stmt) | |
185 | { | |
186 | gsi_next_nondebug (&si); | |
187 | gsi_next_nondebug (&si2); | |
188 | } | |
189 | ||
190 | /* This would be an indicator that we never found STMT in BB, which should | |
191 | never happen. */ | |
192 | gcc_assert (!gsi_end_p (si)); | |
193 | ||
194 | /* If we did not run to the end of DUPLICATE, then SI points to STMT and | |
195 | SI2 points to the duplicate of STMT in DUPLICATE. Insert a trap | |
196 | before SI2 and remove SI2 and all trailing statements. */ | |
197 | if (!gsi_end_p (si2)) | |
c59258dc | 198 | insert_trap_and_remove_trailing_statements (&si2, op); |
db242b6d | 199 | |
200 | return duplicate; | |
201 | } | |
202 | ||
0a76252a | 203 | /* Look for PHI nodes which feed statements in the same block where |
204 | the value of the PHI node implies the statement is erroneous. | |
db242b6d | 205 | |
0a76252a | 206 | For example, a NULL PHI arg value which then feeds a pointer |
207 | dereference. | |
db242b6d | 208 | |
0a76252a | 209 | When found isolate and optimize the path associated with the PHI |
210 | argument feeding the erroneous statement. */ | |
211 | static void | |
212 | find_implicit_erroneous_behaviour (void) | |
db242b6d | 213 | { |
214 | basic_block bb; | |
215 | ||
db242b6d | 216 | FOR_EACH_BB (bb) |
217 | { | |
218 | gimple_stmt_iterator si; | |
219 | ||
b61383dd | 220 | /* Out of an abundance of caution, do not isolate paths to a |
221 | block where the block has any abnormal outgoing edges. | |
222 | ||
223 | We might be able to relax this in the future. We have to detect | |
224 | when we have to split the block with the NULL dereference and | |
225 | the trap we insert. We have to preserve abnormal edges out | |
226 | of the isolated block which in turn means updating PHIs at | |
227 | the targets of those abnormal outgoing edges. */ | |
cb287480 | 228 | if (has_abnormal_or_eh_outgoing_edge_p (bb)) |
b61383dd | 229 | continue; |
230 | ||
db242b6d | 231 | /* First look for a PHI which sets a pointer to NULL and which |
232 | is then dereferenced within BB. This is somewhat overly | |
233 | conservative, but probably catches most of the interesting | |
234 | cases. */ | |
235 | for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |
236 | { | |
237 | gimple phi = gsi_stmt (si); | |
238 | tree lhs = gimple_phi_result (phi); | |
239 | ||
240 | /* If the result is not a pointer, then there is no need to | |
241 | examine the arguments. */ | |
242 | if (!POINTER_TYPE_P (TREE_TYPE (lhs))) | |
243 | continue; | |
244 | ||
245 | /* PHI produces a pointer result. See if any of the PHI's | |
246 | arguments are NULL. | |
247 | ||
248 | When we remove an edge, we want to reprocess the current | |
249 | index, hence the ugly way we update I for each iteration. */ | |
250 | basic_block duplicate = NULL; | |
251 | for (unsigned i = 0, next_i = 0; | |
252 | i < gimple_phi_num_args (phi); | |
253 | i = next_i) | |
254 | { | |
255 | tree op = gimple_phi_arg_def (phi, i); | |
256 | ||
257 | next_i = i + 1; | |
258 | ||
259 | if (!integer_zerop (op)) | |
260 | continue; | |
261 | ||
262 | edge e = gimple_phi_arg_edge (phi, i); | |
263 | imm_use_iterator iter; | |
264 | gimple use_stmt; | |
265 | ||
266 | /* We've got a NULL PHI argument. Now see if the | |
267 | PHI's result is dereferenced within BB. */ | |
268 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
269 | { | |
270 | /* We only care about uses in BB. Catching cases in | |
271 | in other blocks would require more complex path | |
cb287480 | 272 | isolation code. */ |
273 | if (gimple_bb (use_stmt) != bb) | |
db242b6d | 274 | continue; |
275 | ||
276 | if (infer_nonnull_range (use_stmt, lhs)) | |
277 | { | |
278 | duplicate = isolate_path (bb, duplicate, | |
c59258dc | 279 | e, use_stmt, lhs); |
db242b6d | 280 | |
281 | /* When we remove an incoming edge, we need to | |
282 | reprocess the Ith element. */ | |
283 | next_i = i; | |
284 | cfg_altered = true; | |
285 | } | |
286 | } | |
287 | } | |
288 | } | |
0a76252a | 289 | } |
290 | } | |
291 | ||
292 | /* Look for statements which exhibit erroneous behaviour. For example | |
293 | a NULL pointer dereference. | |
294 | ||
295 | When found, optimize the block containing the erroneous behaviour. */ | |
296 | static void | |
297 | find_explicit_erroneous_behaviour (void) | |
298 | { | |
299 | basic_block bb; | |
300 | ||
301 | FOR_EACH_BB (bb) | |
302 | { | |
303 | gimple_stmt_iterator si; | |
db242b6d | 304 | |
b61383dd | 305 | /* Out of an abundance of caution, do not isolate paths to a |
306 | block where the block has any abnormal outgoing edges. | |
307 | ||
308 | We might be able to relax this in the future. We have to detect | |
309 | when we have to split the block with the NULL dereference and | |
310 | the trap we insert. We have to preserve abnormal edges out | |
311 | of the isolated block which in turn means updating PHIs at | |
312 | the targets of those abnormal outgoing edges. */ | |
cb287480 | 313 | if (has_abnormal_or_eh_outgoing_edge_p (bb)) |
b61383dd | 314 | continue; |
315 | ||
db242b6d | 316 | /* Now look at the statements in the block and see if any of |
317 | them explicitly dereference a NULL pointer. This happens | |
318 | because of jump threading and constant propagation. */ | |
319 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | |
320 | { | |
321 | gimple stmt = gsi_stmt (si); | |
322 | ||
323 | /* By passing null_pointer_node, we can use infer_nonnull_range | |
324 | to detect explicit NULL pointer dereferences and other uses | |
325 | where a non-NULL value is required. */ | |
cb287480 | 326 | if (infer_nonnull_range (stmt, null_pointer_node)) |
db242b6d | 327 | { |
c59258dc | 328 | insert_trap_and_remove_trailing_statements (&si, |
329 | null_pointer_node); | |
db242b6d | 330 | |
331 | /* And finally, remove all outgoing edges from BB. */ | |
332 | edge e; | |
333 | for (edge_iterator ei = ei_start (bb->succs); | |
334 | (e = ei_safe_edge (ei)); ) | |
335 | remove_edge (e); | |
336 | ||
337 | /* Ignore any more operands on this statement and | |
338 | continue the statement iterator (which should | |
339 | terminate its loop immediately. */ | |
340 | cfg_altered = true; | |
341 | break; | |
342 | } | |
343 | } | |
344 | } | |
0a76252a | 345 | } |
346 | /* Search the function for statements which, if executed, would cause | |
347 | the program to fault such as a dereference of a NULL pointer. | |
348 | ||
349 | Such a program can't be valid if such a statement was to execute | |
350 | according to ISO standards. | |
351 | ||
352 | We detect explicit NULL pointer dereferences as well as those implied | |
353 | by a PHI argument having a NULL value which unconditionally flows into | |
354 | a dereference in the same block as the PHI. | |
355 | ||
356 | In the former case we replace the offending statement with an | |
357 | unconditional trap and eliminate the outgoing edges from the statement's | |
358 | basic block. This may expose secondary optimization opportunities. | |
359 | ||
360 | In the latter case, we isolate the path(s) with the NULL PHI | |
361 | feeding the dereference. We can then replace the offending statement | |
362 | and eliminate the outgoing edges in the duplicate. Again, this may | |
363 | expose secondary optimization opportunities. | |
364 | ||
365 | A warning for both cases may be advisable as well. | |
366 | ||
367 | Other statically detectable violations of the ISO standard could be | |
368 | handled in a similar way, such as out-of-bounds array indexing. */ | |
369 | ||
370 | static unsigned int | |
371 | gimple_ssa_isolate_erroneous_paths (void) | |
372 | { | |
373 | initialize_original_copy_tables (); | |
374 | ||
375 | /* Search all the blocks for edges which, if traversed, will | |
376 | result in undefined behaviour. */ | |
377 | cfg_altered = false; | |
378 | ||
379 | /* First handle cases where traversal of a particular edge | |
380 | triggers undefined behaviour. These cases require creating | |
381 | duplicate blocks and thus new SSA_NAMEs. | |
382 | ||
383 | We want that process complete prior to the phase where we start | |
384 | removing edges from the CFG. Edge removal may ultimately result in | |
385 | removal of PHI nodes and thus releasing SSA_NAMEs back to the | |
386 | name manager. | |
387 | ||
388 | If the two processes run in parallel we could release an SSA_NAME | |
389 | back to the manager but we could still have dangling references | |
390 | to the released SSA_NAME in unreachable blocks. | |
391 | that any released names not have dangling references in the IL. */ | |
392 | find_implicit_erroneous_behaviour (); | |
393 | find_explicit_erroneous_behaviour (); | |
394 | ||
db242b6d | 395 | free_original_copy_tables (); |
396 | ||
397 | /* We scramble the CFG and loop structures a bit, clean up | |
398 | appropriately. We really should incrementally update the | |
399 | loop structures, in theory it shouldn't be that hard. */ | |
400 | if (cfg_altered) | |
401 | { | |
402 | free_dominance_info (CDI_DOMINATORS); | |
403 | free_dominance_info (CDI_POST_DOMINATORS); | |
404 | loops_state_set (LOOPS_NEED_FIXUP); | |
405 | return TODO_cleanup_cfg | TODO_update_ssa; | |
406 | } | |
407 | return 0; | |
408 | } | |
409 | ||
410 | static bool | |
411 | gate_isolate_erroneous_paths (void) | |
412 | { | |
413 | /* If we do not have a suitable builtin function for the trap statement, | |
414 | then do not perform the optimization. */ | |
a3eb5a98 | 415 | return (flag_isolate_erroneous_paths != 0); |
db242b6d | 416 | } |
417 | ||
418 | namespace { | |
419 | const pass_data pass_data_isolate_erroneous_paths = | |
420 | { | |
421 | GIMPLE_PASS, /* type */ | |
422 | "isolate-paths", /* name */ | |
423 | OPTGROUP_NONE, /* optinfo_flags */ | |
424 | true, /* has_gate */ | |
425 | true, /* has_execute */ | |
426 | TV_ISOLATE_ERRONEOUS_PATHS, /* tv_id */ | |
427 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
428 | 0, /* properties_provided */ | |
429 | 0, /* properties_destroyed */ | |
430 | 0, /* todo_flags_start */ | |
431 | TODO_verify_ssa, /* todo_flags_finish */ | |
432 | }; | |
433 | ||
434 | class pass_isolate_erroneous_paths : public gimple_opt_pass | |
435 | { | |
436 | public: | |
437 | pass_isolate_erroneous_paths (gcc::context *ctxt) | |
438 | : gimple_opt_pass (pass_data_isolate_erroneous_paths, ctxt) | |
439 | {} | |
440 | ||
441 | /* opt_pass methods: */ | |
442 | opt_pass * clone () { return new pass_isolate_erroneous_paths (m_ctxt); } | |
443 | bool gate () { return gate_isolate_erroneous_paths (); } | |
444 | unsigned int execute () { return gimple_ssa_isolate_erroneous_paths (); } | |
445 | ||
2b5aceaf | 446 | }; // class pass_isolate_erroneous_paths |
db242b6d | 447 | } |
448 | ||
449 | gimple_opt_pass * | |
450 | make_pass_isolate_erroneous_paths (gcc::context *ctxt) | |
451 | { | |
452 | return new pass_isolate_erroneous_paths (ctxt); | |
453 | } |