]>
Commit | Line | Data |
---|---|---|
8fdc414d JL |
1 | /* Detect paths through the CFG which can never be executed in a conforming |
2 | program and isolate them. | |
3 | ||
aeee4812 | 4 | Copyright (C) 2013-2023 Free Software Foundation, Inc. |
8fdc414d JL |
5 | |
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 3, or (at your option) | |
11 | any later version. | |
12 | ||
13 | GCC is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
c7131fb2 | 25 | #include "backend.h" |
8fdc414d | 26 | #include "tree.h" |
c7131fb2 | 27 | #include "gimple.h" |
957060b5 AM |
28 | #include "cfghooks.h" |
29 | #include "tree-pass.h" | |
c7131fb2 | 30 | #include "ssa.h" |
957060b5 | 31 | #include "diagnostic-core.h" |
40e23961 | 32 | #include "fold-const.h" |
5be5c238 AM |
33 | #include "gimple-iterator.h" |
34 | #include "gimple-walk.h" | |
8fdc414d | 35 | #include "tree-ssa.h" |
8fdc414d | 36 | #include "cfgloop.h" |
5e94175f | 37 | #include "tree-cfg.h" |
1486c2a7 | 38 | #include "cfganal.h" |
b4dfdc11 | 39 | #include "intl.h" |
8fdc414d JL |
40 | |
41 | ||
42 | static bool cfg_altered; | |
43 | ||
6ab7a3d7 | 44 | /* Callback for walk_stmt_load_store_ops. |
ae93744d | 45 | |
6ab7a3d7 JL |
46 | Return TRUE if OP will dereference the tree stored in DATA, FALSE |
47 | otherwise. | |
48 | ||
49 | This routine only makes a superficial check for a dereference. Thus, | |
50 | it must only be used if it is safe to return a false negative. */ | |
51 | static bool | |
355fe088 | 52 | check_loadstore (gimple *stmt, tree op, tree, void *data) |
6ab7a3d7 JL |
53 | { |
54 | if ((TREE_CODE (op) == MEM_REF || TREE_CODE (op) == TARGET_MEM_REF) | |
55 | && operand_equal_p (TREE_OPERAND (op, 0), (tree)data, 0)) | |
115d1851 JL |
56 | { |
57 | TREE_THIS_VOLATILE (op) = 1; | |
58 | TREE_SIDE_EFFECTS (op) = 1; | |
59 | update_stmt (stmt); | |
60 | return true; | |
61 | } | |
6ab7a3d7 JL |
62 | return false; |
63 | } | |
64 | ||
255520e0 | 65 | /* Insert a trap after SI and split the block after the trap. */ |
8fdc414d JL |
66 | |
67 | static void | |
255520e0 | 68 | insert_trap (gimple_stmt_iterator *si_p, tree op) |
8fdc414d | 69 | { |
6ab7a3d7 JL |
70 | /* We want the NULL pointer dereference to actually occur so that |
71 | code that wishes to catch the signal can do so. | |
72 | ||
73 | If the dereference is a load, then there's nothing to do as the | |
115d1851 | 74 | LHS will be a throw-away SSA_NAME and the RHS is the NULL dereference. |
8fdc414d | 75 | |
6ab7a3d7 | 76 | If the dereference is a store and we can easily transform the RHS, |
46dfed65 JL |
77 | then simplify the RHS to enable more DCE. Note that we require the |
78 | statement to be a GIMPLE_ASSIGN which filters out calls on the RHS. */ | |
355fe088 | 79 | gimple *stmt = gsi_stmt (*si_p); |
6ab7a3d7 | 80 | if (walk_stmt_load_store_ops (stmt, (void *)op, NULL, check_loadstore) |
46dfed65 | 81 | && is_gimple_assign (stmt) |
6ab7a3d7 JL |
82 | && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))) |
83 | { | |
84 | /* We just need to turn the RHS into zero converted to the proper | |
85 | type. */ | |
86 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
87 | gimple_assign_set_rhs_code (stmt, INTEGER_CST); | |
88 | gimple_assign_set_rhs1 (stmt, fold_convert (type, integer_zero_node)); | |
89 | update_stmt (stmt); | |
90 | } | |
91 | ||
538dd0b7 | 92 | gcall *new_stmt |
6ab7a3d7 JL |
93 | = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); |
94 | gimple_seq seq = NULL; | |
95 | gimple_seq_add_stmt (&seq, new_stmt); | |
96 | ||
97 | /* If we had a NULL pointer dereference, then we want to insert the | |
98 | __builtin_trap after the statement, for the other cases we want | |
99 | to insert before the statement. */ | |
100 | if (walk_stmt_load_store_ops (stmt, (void *)op, | |
101 | check_loadstore, | |
102 | check_loadstore)) | |
8f5ef693 RB |
103 | { |
104 | gsi_insert_after (si_p, seq, GSI_NEW_STMT); | |
105 | if (stmt_ends_bb_p (stmt)) | |
106 | { | |
107 | split_block (gimple_bb (stmt), stmt); | |
108 | return; | |
109 | } | |
110 | } | |
6ab7a3d7 JL |
111 | else |
112 | gsi_insert_before (si_p, seq, GSI_NEW_STMT); | |
113 | ||
255520e0 MP |
114 | split_block (gimple_bb (new_stmt), new_stmt); |
115 | *si_p = gsi_for_stmt (stmt); | |
8fdc414d JL |
116 | } |
117 | ||
9c582551 | 118 | /* BB when reached via incoming edge E will exhibit undefined behavior |
8fdc414d | 119 | at STMT. Isolate and optimize the path which exhibits undefined |
9c582551 | 120 | behavior. |
8fdc414d JL |
121 | |
122 | Isolation is simple. Duplicate BB and redirect E to BB'. | |
123 | ||
124 | Optimization is simple as well. Replace STMT in BB' with an | |
125 | unconditional trap and remove all outgoing edges from BB'. | |
126 | ||
b4dfdc11 MG |
127 | If RET_ZERO, do not trap, only return NULL. |
128 | ||
8fdc414d JL |
129 | DUPLICATE is a pre-existing duplicate, use it as BB' if it exists. |
130 | ||
aac9480d | 131 | Return BB' (which may be equal to DUPLICATE). */ |
8fdc414d | 132 | |
aac9480d | 133 | ATTRIBUTE_RETURNS_NONNULL basic_block |
6ab7a3d7 | 134 | isolate_path (basic_block bb, basic_block duplicate, |
355fe088 | 135 | edge e, gimple *stmt, tree op, bool ret_zero) |
8fdc414d JL |
136 | { |
137 | gimple_stmt_iterator si, si2; | |
138 | edge_iterator ei; | |
139 | edge e2; | |
aa11163b | 140 | bool impossible = true; |
c2893c6e | 141 | profile_count count = e->count (); |
aa11163b JH |
142 | |
143 | for (si = gsi_start_bb (bb); gsi_stmt (si) != stmt; gsi_next (&si)) | |
144 | if (stmt_can_terminate_bb_p (gsi_stmt (si))) | |
145 | { | |
146 | impossible = false; | |
147 | break; | |
148 | } | |
149 | force_edge_cold (e, impossible); | |
8fdc414d JL |
150 | |
151 | /* First duplicate BB if we have not done so already and remove all | |
152 | the duplicate's outgoing edges as duplicate is going to unconditionally | |
153 | trap. Removing the outgoing edges is both an optimization and ensures | |
154 | we don't need to do any PHI node updates. */ | |
155 | if (!duplicate) | |
156 | { | |
157 | duplicate = duplicate_block (bb, NULL, NULL); | |
c2893c6e | 158 | duplicate->count = profile_count::zero (); |
b4dfdc11 MG |
159 | if (!ret_zero) |
160 | for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); ) | |
161 | remove_edge (e2); | |
8fdc414d | 162 | } |
c2893c6e | 163 | bb->count -= count; |
8fdc414d JL |
164 | |
165 | /* Complete the isolation step by redirecting E to reach DUPLICATE. */ | |
166 | e2 = redirect_edge_and_branch (e, duplicate); | |
167 | if (e2) | |
002618d8 ML |
168 | { |
169 | flush_pending_stmts (e2); | |
8fdc414d | 170 | |
002618d8 | 171 | /* Update profile only when redirection is really processed. */ |
e7a74006 | 172 | bb->count += e->count (); |
002618d8 | 173 | } |
8fdc414d JL |
174 | |
175 | /* There may be more than one statement in DUPLICATE which exhibits | |
9c582551 | 176 | undefined behavior. Ultimately we want the first such statement in |
8fdc414d JL |
177 | DUPLCIATE so that we're able to delete as much code as possible. |
178 | ||
9c582551 JJ |
179 | So each time we discover undefined behavior in DUPLICATE, search for |
180 | the statement which triggers undefined behavior. If found, then | |
8fdc414d JL |
181 | transform the statement into a trap and delete everything after the |
182 | statement. If not found, then this particular instance was subsumed by | |
9c582551 | 183 | an earlier instance of undefined behavior and there's nothing to do. |
8fdc414d JL |
184 | |
185 | This is made more complicated by the fact that we have STMT, which is in | |
186 | BB rather than in DUPLICATE. So we set up two iterators, one for each | |
187 | block and walk forward looking for STMT in BB, advancing each iterator at | |
188 | each step. | |
189 | ||
190 | When we find STMT the second iterator should point to STMT's equivalent in | |
191 | duplicate. If DUPLICATE ends before STMT is found in BB, then there's | |
ae93744d | 192 | nothing to do. |
8fdc414d JL |
193 | |
194 | Ignore labels and debug statements. */ | |
195 | si = gsi_start_nondebug_after_labels_bb (bb); | |
196 | si2 = gsi_start_nondebug_after_labels_bb (duplicate); | |
197 | while (!gsi_end_p (si) && !gsi_end_p (si2) && gsi_stmt (si) != stmt) | |
198 | { | |
199 | gsi_next_nondebug (&si); | |
200 | gsi_next_nondebug (&si2); | |
201 | } | |
202 | ||
203 | /* This would be an indicator that we never found STMT in BB, which should | |
204 | never happen. */ | |
205 | gcc_assert (!gsi_end_p (si)); | |
206 | ||
207 | /* If we did not run to the end of DUPLICATE, then SI points to STMT and | |
208 | SI2 points to the duplicate of STMT in DUPLICATE. Insert a trap | |
209 | before SI2 and remove SI2 and all trailing statements. */ | |
210 | if (!gsi_end_p (si2)) | |
b4dfdc11 MG |
211 | { |
212 | if (ret_zero) | |
213 | { | |
538dd0b7 | 214 | greturn *ret = as_a <greturn *> (gsi_stmt (si2)); |
b4dfdc11 MG |
215 | tree zero = build_zero_cst (TREE_TYPE (gimple_return_retval (ret))); |
216 | gimple_return_set_retval (ret, zero); | |
217 | update_stmt (ret); | |
218 | } | |
219 | else | |
255520e0 | 220 | insert_trap (&si2, op); |
b4dfdc11 | 221 | } |
8fdc414d JL |
222 | |
223 | return duplicate; | |
224 | } | |
225 | ||
606f928d JL |
226 | /* Return TRUE if STMT is a div/mod operation using DIVISOR as the divisor. |
227 | FALSE otherwise. */ | |
228 | ||
229 | static bool | |
230 | is_divmod_with_given_divisor (gimple *stmt, tree divisor) | |
231 | { | |
232 | /* Only assignments matter. */ | |
233 | if (!is_gimple_assign (stmt)) | |
234 | return false; | |
235 | ||
236 | /* Check for every DIV/MOD expression. */ | |
237 | enum tree_code rhs_code = gimple_assign_rhs_code (stmt); | |
238 | if (rhs_code == TRUNC_DIV_EXPR | |
239 | || rhs_code == FLOOR_DIV_EXPR | |
240 | || rhs_code == CEIL_DIV_EXPR | |
241 | || rhs_code == EXACT_DIV_EXPR | |
242 | || rhs_code == ROUND_DIV_EXPR | |
243 | || rhs_code == TRUNC_MOD_EXPR | |
244 | || rhs_code == FLOOR_MOD_EXPR | |
245 | || rhs_code == CEIL_MOD_EXPR | |
246 | || rhs_code == ROUND_MOD_EXPR) | |
247 | { | |
248 | /* Pointer equality is fine when DIVISOR is an SSA_NAME, but | |
249 | not sufficient for constants which may have different types. */ | |
250 | if (operand_equal_p (gimple_assign_rhs2 (stmt), divisor, 0)) | |
251 | return true; | |
252 | } | |
253 | return false; | |
254 | } | |
255 | ||
256 | /* NAME is an SSA_NAME that we have already determined has the value 0 or NULL. | |
257 | ||
258 | Return TRUE if USE_STMT uses NAME in a way where a 0 or NULL value results | |
259 | in undefined behavior, FALSE otherwise | |
260 | ||
261 | LOC is used for issuing diagnostics. This case represents potential | |
262 | undefined behavior exposed by path splitting and that's reflected in | |
263 | the diagnostic. */ | |
264 | ||
265 | bool | |
266 | stmt_uses_name_in_undefined_way (gimple *use_stmt, tree name, location_t loc) | |
267 | { | |
268 | /* If we are working with a non pointer type, then see | |
269 | if this use is a DIV/MOD operation using NAME as the | |
270 | divisor. */ | |
271 | if (!POINTER_TYPE_P (TREE_TYPE (name))) | |
272 | { | |
3d058ebb | 273 | if (!cfun->can_throw_non_call_exceptions) |
606f928d JL |
274 | return is_divmod_with_given_divisor (use_stmt, name); |
275 | return false; | |
276 | } | |
277 | ||
278 | /* NAME is a pointer, so see if it's used in a context where it must | |
279 | be non-NULL. */ | |
280 | bool by_dereference | |
281 | = infer_nonnull_range_by_dereference (use_stmt, name); | |
282 | ||
283 | if (by_dereference | |
284 | || infer_nonnull_range_by_attribute (use_stmt, name)) | |
285 | { | |
286 | ||
287 | if (by_dereference) | |
288 | { | |
289 | warning_at (loc, OPT_Wnull_dereference, | |
290 | "potential null pointer dereference"); | |
291 | if (!flag_isolate_erroneous_paths_dereference) | |
292 | return false; | |
293 | } | |
294 | else | |
295 | { | |
296 | if (!flag_isolate_erroneous_paths_attribute) | |
297 | return false; | |
298 | } | |
299 | return true; | |
300 | } | |
301 | return false; | |
302 | } | |
303 | ||
304 | /* Return TRUE if USE_STMT uses 0 or NULL in a context which results in | |
305 | undefined behavior, FALSE otherwise. | |
306 | ||
307 | These cases are explicit in the IL. */ | |
308 | ||
309 | bool | |
310 | stmt_uses_0_or_null_in_undefined_way (gimple *stmt) | |
311 | { | |
3d058ebb | 312 | if (!cfun->can_throw_non_call_exceptions |
606f928d JL |
313 | && is_divmod_with_given_divisor (stmt, integer_zero_node)) |
314 | return true; | |
315 | ||
316 | /* By passing null_pointer_node, we can use the | |
317 | infer_nonnull_range functions to detect explicit NULL | |
318 | pointer dereferences and other uses where a non-NULL | |
319 | value is required. */ | |
320 | ||
321 | bool by_dereference | |
322 | = infer_nonnull_range_by_dereference (stmt, null_pointer_node); | |
323 | if (by_dereference | |
324 | || infer_nonnull_range_by_attribute (stmt, null_pointer_node)) | |
325 | { | |
326 | if (by_dereference) | |
327 | { | |
328 | location_t loc = gimple_location (stmt); | |
329 | warning_at (loc, OPT_Wnull_dereference, | |
330 | "null pointer dereference"); | |
331 | if (!flag_isolate_erroneous_paths_dereference) | |
332 | return false; | |
333 | } | |
334 | else | |
335 | { | |
336 | if (!flag_isolate_erroneous_paths_attribute) | |
337 | return false; | |
338 | } | |
339 | return true; | |
340 | } | |
341 | return false; | |
342 | } | |
343 | ||
aac9480d MS |
344 | /* Describes the property of a return statement that may return |
345 | the address of one or more local variables. The type must | |
346 | be safely assignable and copyable so that it can be stored in | |
347 | a hash_map. */ | |
348 | class args_loc_t | |
349 | { | |
350 | public: | |
351 | ||
352 | args_loc_t (): nargs (), locvec (), ptr (&ptr) | |
353 | { | |
354 | locvec.create (4); | |
355 | } | |
356 | ||
357 | args_loc_t (const args_loc_t &rhs) | |
358 | : nargs (rhs.nargs), locvec (rhs.locvec.copy ()), ptr (&ptr) { } | |
359 | ||
360 | args_loc_t& operator= (const args_loc_t &rhs) | |
361 | { | |
362 | nargs = rhs.nargs; | |
363 | locvec.release (); | |
364 | locvec = rhs.locvec.copy (); | |
365 | return *this; | |
366 | } | |
367 | ||
368 | ~args_loc_t () | |
369 | { | |
370 | locvec.release (); | |
371 | gcc_assert (ptr == &ptr); | |
372 | } | |
373 | ||
374 | /* For a PHI in a return statement its number of arguments. When greater | |
375 | than LOCVEC.LENGTH () implies that an address of one of the locals in | |
376 | LOCVEC may but need not be returned by the statement. Otherwise, | |
377 | unless both are zero, it implies it definitely is returned. */ | |
378 | unsigned nargs; | |
379 | /* The locations of local variables/alloca calls returned by the return | |
380 | statement. Avoid using auto_vec here since it's not safe to copy due | |
381 | to pr90904. */ | |
382 | vec <location_t> locvec; | |
383 | void *ptr; | |
384 | }; | |
385 | ||
386 | /* A mapping from a return statement to the locations of local variables | |
387 | whose addresses it may return. */ | |
388 | typedef hash_map <gimple *, args_loc_t> locmap_t; | |
389 | ||
390 | /* Given the LOCMAP mapping, issue diagnostics about returning addresses | |
391 | of local variables. When MAYBE is set, all diagnostics will be of | |
392 | the "may return" kind. Otherwise each will be determined based on | |
393 | the equality of the corresponding NARGS and LOCVEC.LENGTH () values. */ | |
394 | ||
395 | static void | |
396 | diag_returned_locals (bool maybe, const locmap_t &locmap) | |
397 | { | |
398 | for (locmap_t::iterator it = locmap.begin (); it != locmap.end (); ++it) | |
399 | { | |
400 | gimple *stmt = (*it).first; | |
401 | const args_loc_t &argsloc = (*it).second; | |
402 | location_t stmtloc = gimple_location (stmt); | |
e9e2bad7 MS |
403 | if (stmtloc == UNKNOWN_LOCATION) |
404 | /* When multiple return statements are merged into one it | |
405 | may not have an associated location. Use the location | |
406 | of the closing brace instead. */ | |
407 | stmtloc = cfun->function_end_locus; | |
aac9480d MS |
408 | |
409 | auto_diagnostic_group d; | |
410 | unsigned nargs = argsloc.locvec.length (); | |
411 | if (warning_at (stmtloc, OPT_Wreturn_local_addr, | |
412 | (maybe || argsloc.nargs > nargs | |
413 | ? G_("function may return address of local variable") | |
414 | : G_("function returns address of local variable")))) | |
415 | { | |
416 | for (unsigned i = 0; i != nargs; ++i) | |
417 | inform (argsloc.locvec[i], "declared here"); | |
418 | } | |
419 | } | |
420 | } | |
421 | ||
422 | /* Return true if EXPR is an expression of pointer type that refers | |
423 | to the address of one or more variables with automatic storage | |
424 | duration. If so, add an entry to *PLOCMAP and insert into | |
425 | PLOCMAP->LOCVEC the locations of the corresponding local variables | |
426 | whose address is returned by the RETURN_STMT (which may be set to | |
427 | (gimple*)-1 as a placeholder for such a statement). VISITED is | |
428 | a bitmap of PHI nodes already visited by recursive calls. When | |
429 | null, PHI expressions are not considered. */ | |
430 | ||
431 | static bool | |
432 | is_addr_local (gimple *return_stmt, tree exp, locmap_t *plocmap, | |
433 | hash_set<gphi *> *visited) | |
434 | { | |
435 | if (TREE_CODE (exp) == ADDR_EXPR) | |
436 | { | |
437 | tree baseaddr = get_base_address (TREE_OPERAND (exp, 0)); | |
438 | if (TREE_CODE (baseaddr) == MEM_REF) | |
439 | return is_addr_local (return_stmt, TREE_OPERAND (baseaddr, 0), | |
440 | plocmap, visited); | |
441 | ||
442 | if ((!VAR_P (baseaddr) | |
443 | || is_global_var (baseaddr)) | |
444 | && TREE_CODE (baseaddr) != PARM_DECL) | |
445 | return false; | |
446 | ||
447 | args_loc_t &argsloc = plocmap->get_or_insert (return_stmt); | |
448 | argsloc.locvec.safe_push (DECL_SOURCE_LOCATION (baseaddr)); | |
449 | return true; | |
450 | } | |
451 | ||
452 | if (!POINTER_TYPE_P (TREE_TYPE (exp))) | |
453 | return false; | |
454 | ||
455 | if (TREE_CODE (exp) == SSA_NAME) | |
456 | { | |
457 | gimple *def_stmt = SSA_NAME_DEF_STMT (exp); | |
458 | enum gimple_code code = gimple_code (def_stmt); | |
459 | ||
460 | if (is_gimple_assign (def_stmt)) | |
461 | { | |
462 | tree type = TREE_TYPE (gimple_assign_lhs (def_stmt)); | |
463 | if (POINTER_TYPE_P (type)) | |
464 | { | |
465 | tree_code code = gimple_assign_rhs_code (def_stmt); | |
466 | tree ptr1 = NULL_TREE, ptr2 = NULL_TREE; | |
467 | ||
468 | /* Set to the number of arguments examined that should | |
469 | be added to ARGSLOC->NARGS to identify expressions | |
470 | only some but not all of whose operands refer to local | |
471 | addresses. */ | |
472 | unsigned nargs = 0; | |
473 | if (code == COND_EXPR) | |
474 | { | |
475 | ptr1 = gimple_assign_rhs2 (def_stmt); | |
476 | ptr2 = gimple_assign_rhs3 (def_stmt); | |
477 | nargs = 2; | |
478 | } | |
479 | else if (code == MAX_EXPR || code == MIN_EXPR) | |
480 | { | |
481 | ptr1 = gimple_assign_rhs1 (def_stmt); | |
482 | ptr2 = gimple_assign_rhs2 (def_stmt); | |
483 | nargs = 2; | |
484 | } | |
485 | else if (code == ADDR_EXPR | |
486 | || code == NOP_EXPR | |
487 | || code == POINTER_PLUS_EXPR) | |
488 | /* Leave NARGS at zero and let the recursive call set it. */ | |
489 | ptr1 = gimple_assign_rhs1 (def_stmt); | |
490 | ||
491 | /* Avoid short-circuiting the logical OR result in case | |
492 | both operands refer to local variables, in which case | |
493 | both should be considered and identified in the warning. */ | |
494 | bool res1 = false, res2 = false; | |
495 | if (ptr1) | |
496 | res1 = is_addr_local (return_stmt, ptr1, plocmap, visited); | |
497 | if (ptr2) | |
498 | res2 = is_addr_local (return_stmt, ptr2, plocmap, visited); | |
499 | ||
500 | if (nargs) | |
501 | if (args_loc_t *argsloc = plocmap->get (return_stmt)) | |
502 | argsloc->nargs += nargs; | |
503 | ||
504 | return res1 || res2; | |
505 | } | |
506 | return false; | |
507 | } | |
508 | ||
509 | if (code == GIMPLE_CALL | |
cb1180d5 | 510 | && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL)) |
aac9480d MS |
511 | { |
512 | /* Handle alloca and friends that return pointers to automatic | |
513 | storage. */ | |
514 | tree fn = gimple_call_fndecl (def_stmt); | |
515 | int code = DECL_FUNCTION_CODE (fn); | |
516 | if (code == BUILT_IN_ALLOCA | |
517 | || code == BUILT_IN_ALLOCA_WITH_ALIGN | |
518 | || code == BUILT_IN_ALLOCA_WITH_ALIGN_AND_MAX) | |
519 | { | |
520 | args_loc_t &argsloc = plocmap->get_or_insert (return_stmt); | |
521 | argsloc.locvec.safe_push (gimple_location (def_stmt)); | |
522 | return true; | |
523 | } | |
524 | ||
525 | if (gimple_call_num_args (def_stmt) < 1) | |
526 | return false; | |
527 | ||
528 | /* Recursively examine the first argument of calls to built-ins | |
529 | that return it. */ | |
530 | switch (code) | |
531 | { | |
532 | case BUILT_IN_MEMCPY: | |
533 | case BUILT_IN_MEMCPY_CHK: | |
534 | case BUILT_IN_MEMPCPY: | |
535 | case BUILT_IN_MEMPCPY_CHK: | |
536 | case BUILT_IN_MEMMOVE: | |
537 | case BUILT_IN_MEMMOVE_CHK: | |
538 | case BUILT_IN_STPCPY: | |
539 | case BUILT_IN_STPCPY_CHK: | |
540 | case BUILT_IN_STPNCPY: | |
541 | case BUILT_IN_STPNCPY_CHK: | |
542 | case BUILT_IN_STRCAT: | |
543 | case BUILT_IN_STRCAT_CHK: | |
544 | case BUILT_IN_STRCHR: | |
545 | case BUILT_IN_STRCPY: | |
546 | case BUILT_IN_STRCPY_CHK: | |
547 | case BUILT_IN_STRNCAT: | |
548 | case BUILT_IN_STRNCAT_CHK: | |
549 | case BUILT_IN_STRNCPY: | |
550 | case BUILT_IN_STRNCPY_CHK: | |
551 | case BUILT_IN_STRRCHR: | |
552 | case BUILT_IN_STRSTR: | |
553 | return is_addr_local (return_stmt, | |
554 | gimple_call_arg (def_stmt, 0), | |
555 | plocmap, visited); | |
556 | default: | |
557 | return false; | |
558 | } | |
559 | } | |
560 | ||
561 | if (code == GIMPLE_PHI && visited) | |
562 | { | |
563 | gphi *phi_stmt = as_a <gphi *> (def_stmt); | |
564 | if (visited->add (phi_stmt)) | |
565 | return false; | |
566 | ||
567 | unsigned count = 0; | |
568 | unsigned nargs = gimple_phi_num_args (phi_stmt); | |
569 | args_loc_t &argsloc = plocmap->get_or_insert (return_stmt); | |
570 | /* Bump up the number of operands examined by the number of | |
571 | operands of this PHI. */ | |
572 | argsloc.nargs += nargs; | |
573 | for (unsigned i = 0; i < gimple_phi_num_args (phi_stmt); ++i) | |
574 | { | |
575 | tree arg = gimple_phi_arg_def (phi_stmt, i); | |
576 | if (is_addr_local (return_stmt, arg, plocmap, visited)) | |
577 | ++count; | |
578 | } | |
579 | return count != 0; | |
580 | } | |
581 | } | |
582 | ||
583 | return false; | |
584 | } | |
585 | ||
586 | /* Detect returning the address of a local variable in a PHI result LHS | |
587 | and argument ARG and PHI edge E in basic block BB. Add an entry for | |
588 | each use to LOCMAP, setting its NARGS member to the NARGS argument | |
589 | (the number of PHI operands) plus the number of arguments in binary | |
590 | expressions refereced by ARG. Call isolate_path for each returned | |
591 | address and set *ISOLATED to true if called. | |
592 | Return either DUPLICATE or the most recent result of isolate_path. */ | |
593 | ||
594 | static basic_block | |
595 | handle_return_addr_local_phi_arg (basic_block bb, basic_block duplicate, | |
596 | tree lhs, tree arg, edge e, locmap_t &locmap, | |
597 | unsigned nargs, bool *isolated) | |
598 | { | |
599 | /* Use (gimple*)-1 as a temporary placeholder and replace it with | |
600 | the return statement below once it is known. Using a null doesn't | |
601 | work because it's used by the hash_map to mean "no-entry." Pass | |
602 | null instead of a visited_phis bitmap to avoid descending into | |
603 | PHIs since they are being processed by the caller. Those that | |
604 | remain will be checked again later. */ | |
605 | if (!is_addr_local ((gimple*)-1, arg, &locmap, NULL)) | |
606 | { | |
607 | /* Remove the placeholder regardless of success or failure. */ | |
608 | locmap.remove ((gimple*)-1); | |
609 | return duplicate; | |
610 | } | |
611 | ||
612 | const args_loc_t* const placeargsloc = locmap.get ((gimple*)-1); | |
613 | const unsigned nlocs = placeargsloc->locvec.length (); | |
614 | gcc_assert (nlocs); | |
615 | ||
616 | /* Add to the number of PHI arguments determined by the caller | |
617 | the number of operands of the expressions referenced by ARG. | |
618 | This lets the caller determine whether it's dealing with | |
619 | a "may return" or "definitely returns." */ | |
620 | nargs += placeargsloc->nargs; | |
621 | ||
622 | /* Set to true if any expressions referenced by ARG involve | |
623 | multiple addresses only some of which are those of locals. */ | |
624 | bool maybe = placeargsloc->nargs > placeargsloc->locvec.length (); | |
625 | ||
626 | gimple *use_stmt; | |
627 | imm_use_iterator iter; | |
628 | ||
629 | /* Look for uses of the PHI result LHS in return statements. */ | |
630 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
631 | { | |
632 | greturn *return_stmt = dyn_cast <greturn *> (use_stmt); | |
633 | if (!return_stmt) | |
634 | continue; | |
635 | ||
636 | if (gimple_return_retval (return_stmt) != lhs) | |
637 | continue; | |
638 | ||
639 | /* Add an entry for the return statement and the locations | |
640 | oof the PHI arguments obtained above to the map. */ | |
641 | args_loc_t &argsloc = locmap.get_or_insert (use_stmt); | |
642 | argsloc.nargs = nargs; | |
643 | unsigned nelts = argsloc.locvec.length () + nlocs; | |
644 | argsloc.locvec.reserve (nelts); | |
645 | argsloc.locvec.splice (placeargsloc->locvec); | |
646 | ||
647 | if (!maybe | |
648 | && (flag_isolate_erroneous_paths_dereference | |
649 | || flag_isolate_erroneous_paths_attribute) | |
5ad3cc1e RB |
650 | && gimple_bb (use_stmt) == bb |
651 | && (duplicate || can_duplicate_block_p (bb))) | |
aac9480d MS |
652 | { |
653 | duplicate = isolate_path (bb, duplicate, e, | |
654 | use_stmt, lhs, true); | |
655 | ||
656 | /* Let caller know the path has been isolated. */ | |
657 | *isolated = true; | |
658 | } | |
659 | } | |
660 | ||
661 | locmap.remove ((gimple*)-1); | |
662 | ||
663 | return duplicate; | |
664 | } | |
665 | ||
56d338c9 JL |
666 | /* Look for PHI nodes which feed statements in the same block where |
667 | the value of the PHI node implies the statement is erroneous. | |
8fdc414d | 668 | |
56d338c9 JL |
669 | For example, a NULL PHI arg value which then feeds a pointer |
670 | dereference. | |
8fdc414d | 671 | |
56d338c9 JL |
672 | When found isolate and optimize the path associated with the PHI |
673 | argument feeding the erroneous statement. */ | |
674 | static void | |
9c582551 | 675 | find_implicit_erroneous_behavior (void) |
8fdc414d | 676 | { |
aac9480d MS |
677 | locmap_t locmap; |
678 | ||
8fdc414d JL |
679 | basic_block bb; |
680 | ||
11cd3bed | 681 | FOR_EACH_BB_FN (bb, cfun) |
8fdc414d | 682 | { |
538dd0b7 | 683 | gphi_iterator si; |
8fdc414d | 684 | |
5e94175f JL |
685 | /* Out of an abundance of caution, do not isolate paths to a |
686 | block where the block has any abnormal outgoing edges. | |
687 | ||
688 | We might be able to relax this in the future. We have to detect | |
689 | when we have to split the block with the NULL dereference and | |
690 | the trap we insert. We have to preserve abnormal edges out | |
691 | of the isolated block which in turn means updating PHIs at | |
692 | the targets of those abnormal outgoing edges. */ | |
6efe83b2 | 693 | if (has_abnormal_or_eh_outgoing_edge_p (bb)) |
5e94175f | 694 | continue; |
1486c2a7 JL |
695 | |
696 | ||
697 | /* If BB has an edge to itself, then duplication of BB below | |
698 | could result in reallocation of BB's PHI nodes. If that happens | |
699 | then the loop below over the PHIs would use the old PHI and | |
700 | thus invalid information. We don't have a good way to know | |
701 | if a PHI has been reallocated, so just avoid isolation in | |
702 | this case. */ | |
703 | if (find_edge (bb, bb)) | |
704 | continue; | |
5e94175f | 705 | |
8fdc414d JL |
706 | /* First look for a PHI which sets a pointer to NULL and which |
707 | is then dereferenced within BB. This is somewhat overly | |
708 | conservative, but probably catches most of the interesting | |
709 | cases. */ | |
710 | for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |
711 | { | |
538dd0b7 | 712 | gphi *phi = si.phi (); |
8fdc414d JL |
713 | tree lhs = gimple_phi_result (phi); |
714 | ||
aac9480d MS |
715 | /* Initial number of PHI arguments. The result may change |
716 | from one iteration of the loop below to the next in | |
717 | response to changes to the CFG but only the initial | |
718 | value is stored below for use by diagnostics. */ | |
719 | unsigned nargs = gimple_phi_num_args (phi); | |
720 | ||
8fdc414d | 721 | /* PHI produces a pointer result. See if any of the PHI's |
ae93744d | 722 | arguments are NULL. |
8fdc414d JL |
723 | |
724 | When we remove an edge, we want to reprocess the current | |
aac9480d MS |
725 | index since the argument at that index will have been |
726 | removed, hence the ugly way we update I for each iteration. */ | |
8fdc414d JL |
727 | basic_block duplicate = NULL; |
728 | for (unsigned i = 0, next_i = 0; | |
aac9480d | 729 | i < gimple_phi_num_args (phi); i = next_i) |
8fdc414d | 730 | { |
aac9480d | 731 | tree arg = gimple_phi_arg_def (phi, i); |
b4dfdc11 | 732 | edge e = gimple_phi_arg_edge (phi, i); |
8fdc414d | 733 | |
aac9480d MS |
734 | /* Advance the argument index unless a path involving |
735 | the current argument has been isolated. */ | |
8fdc414d | 736 | next_i = i + 1; |
aac9480d MS |
737 | bool isolated = false; |
738 | duplicate = handle_return_addr_local_phi_arg (bb, duplicate, lhs, | |
739 | arg, e, locmap, | |
740 | nargs, &isolated); | |
741 | if (isolated) | |
b4dfdc11 | 742 | { |
aac9480d MS |
743 | cfg_altered = true; |
744 | next_i = i; | |
b4dfdc11 MG |
745 | } |
746 | ||
aac9480d | 747 | if (!integer_zerop (arg)) |
8fdc414d JL |
748 | continue; |
749 | ||
c9930ecd TV |
750 | location_t phi_arg_loc = gimple_phi_arg_location (phi, i); |
751 | ||
aac9480d MS |
752 | imm_use_iterator iter; |
753 | gimple *use_stmt; | |
754 | ||
8fdc414d JL |
755 | /* We've got a NULL PHI argument. Now see if the |
756 | PHI's result is dereferenced within BB. */ | |
757 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
758 | { | |
759 | /* We only care about uses in BB. Catching cases in | |
760 | in other blocks would require more complex path | |
6efe83b2 JL |
761 | isolation code. */ |
762 | if (gimple_bb (use_stmt) != bb) | |
8fdc414d JL |
763 | continue; |
764 | ||
606f928d JL |
765 | location_t loc = gimple_location (use_stmt) |
766 | ? gimple_location (use_stmt) | |
c9930ecd | 767 | : phi_arg_loc; |
ae93744d | 768 | |
5ad3cc1e RB |
769 | if (stmt_uses_name_in_undefined_way (use_stmt, lhs, loc) |
770 | && (duplicate || can_duplicate_block_p (bb))) | |
8fdc414d | 771 | { |
b4dfdc11 MG |
772 | duplicate = isolate_path (bb, duplicate, e, |
773 | use_stmt, lhs, false); | |
8fdc414d JL |
774 | |
775 | /* When we remove an incoming edge, we need to | |
776 | reprocess the Ith element. */ | |
777 | next_i = i; | |
778 | cfg_altered = true; | |
779 | } | |
780 | } | |
781 | } | |
782 | } | |
56d338c9 | 783 | } |
aac9480d MS |
784 | |
785 | diag_returned_locals (false, locmap); | |
786 | } | |
787 | ||
788 | /* Detect and diagnose returning the address of a local variable | |
789 | in RETURN_STMT in basic block BB. This only becomes undefined | |
790 | behavior if the result is used, so we do not insert a trap and | |
791 | only return NULL instead. */ | |
792 | ||
793 | static void | |
794 | warn_return_addr_local (basic_block bb, greturn *return_stmt) | |
795 | { | |
796 | tree val = gimple_return_retval (return_stmt); | |
797 | if (!val) | |
798 | return; | |
799 | ||
800 | locmap_t locmap; | |
801 | hash_set<gphi *> visited_phis; | |
802 | if (!is_addr_local (return_stmt, val, &locmap, &visited_phis)) | |
803 | return; | |
804 | ||
805 | /* We only need it for this particular case. */ | |
806 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
807 | ||
808 | const args_loc_t *argsloc = locmap.get (return_stmt); | |
809 | gcc_assert (argsloc); | |
810 | ||
811 | bool maybe = argsloc->nargs > argsloc->locvec.length (); | |
812 | if (!maybe) | |
813 | maybe = !dominated_by_p (CDI_POST_DOMINATORS, | |
814 | single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)), bb); | |
815 | ||
816 | diag_returned_locals (maybe, locmap); | |
817 | ||
818 | /* Bail if the statement isn't certain to return the address | |
819 | of a local (e.g., if it involves a conditional expression | |
820 | that wasn't trasnformed into a PHI or if it involves | |
821 | a MAX_EXPR or MIN_EXPR only one of whose operands is a local | |
822 | (even though such an expression isn't valid in C or has | |
823 | defined semantics in C++). */ | |
824 | if (maybe) | |
825 | return; | |
826 | ||
827 | /* Do not modify code if the user only asked for warnings. */ | |
828 | if (flag_isolate_erroneous_paths_dereference | |
829 | || flag_isolate_erroneous_paths_attribute) | |
830 | { | |
831 | tree zero = build_zero_cst (TREE_TYPE (val)); | |
832 | gimple_return_set_retval (return_stmt, zero); | |
833 | update_stmt (return_stmt); | |
834 | } | |
56d338c9 JL |
835 | } |
836 | ||
9c582551 | 837 | /* Look for statements which exhibit erroneous behavior. For example |
ae93744d | 838 | a NULL pointer dereference. |
56d338c9 | 839 | |
9c582551 | 840 | When found, optimize the block containing the erroneous behavior. */ |
56d338c9 | 841 | static void |
9c582551 | 842 | find_explicit_erroneous_behavior (void) |
56d338c9 JL |
843 | { |
844 | basic_block bb; | |
845 | ||
11cd3bed | 846 | FOR_EACH_BB_FN (bb, cfun) |
56d338c9 JL |
847 | { |
848 | gimple_stmt_iterator si; | |
8fdc414d | 849 | |
5e94175f JL |
850 | /* Out of an abundance of caution, do not isolate paths to a |
851 | block where the block has any abnormal outgoing edges. | |
852 | ||
853 | We might be able to relax this in the future. We have to detect | |
854 | when we have to split the block with the NULL dereference and | |
855 | the trap we insert. We have to preserve abnormal edges out | |
856 | of the isolated block which in turn means updating PHIs at | |
857 | the targets of those abnormal outgoing edges. */ | |
6efe83b2 | 858 | if (has_abnormal_or_eh_outgoing_edge_p (bb)) |
5e94175f JL |
859 | continue; |
860 | ||
8fdc414d JL |
861 | /* Now look at the statements in the block and see if any of |
862 | them explicitly dereference a NULL pointer. This happens | |
863 | because of jump threading and constant propagation. */ | |
864 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | |
865 | { | |
355fe088 | 866 | gimple *stmt = gsi_stmt (si); |
8fdc414d | 867 | |
606f928d | 868 | if (stmt_uses_0_or_null_in_undefined_way (stmt)) |
8fdc414d | 869 | { |
255520e0 MP |
870 | insert_trap (&si, null_pointer_node); |
871 | bb = gimple_bb (gsi_stmt (si)); | |
8fdc414d JL |
872 | |
873 | /* Ignore any more operands on this statement and | |
874 | continue the statement iterator (which should | |
875 | terminate its loop immediately. */ | |
876 | cfg_altered = true; | |
877 | break; | |
878 | } | |
b4dfdc11 | 879 | |
aac9480d MS |
880 | /* Look for a return statement that returns the address |
881 | of a local variable or the result of alloca. */ | |
538dd0b7 | 882 | if (greturn *return_stmt = dyn_cast <greturn *> (stmt)) |
aac9480d | 883 | warn_return_addr_local (bb, return_stmt); |
8fdc414d JL |
884 | } |
885 | } | |
56d338c9 | 886 | } |
b4dfdc11 | 887 | |
56d338c9 JL |
888 | /* Search the function for statements which, if executed, would cause |
889 | the program to fault such as a dereference of a NULL pointer. | |
890 | ||
891 | Such a program can't be valid if such a statement was to execute | |
892 | according to ISO standards. | |
893 | ||
894 | We detect explicit NULL pointer dereferences as well as those implied | |
895 | by a PHI argument having a NULL value which unconditionally flows into | |
896 | a dereference in the same block as the PHI. | |
897 | ||
898 | In the former case we replace the offending statement with an | |
899 | unconditional trap and eliminate the outgoing edges from the statement's | |
900 | basic block. This may expose secondary optimization opportunities. | |
901 | ||
ae93744d | 902 | In the latter case, we isolate the path(s) with the NULL PHI |
56d338c9 JL |
903 | feeding the dereference. We can then replace the offending statement |
904 | and eliminate the outgoing edges in the duplicate. Again, this may | |
905 | expose secondary optimization opportunities. | |
906 | ||
907 | A warning for both cases may be advisable as well. | |
908 | ||
909 | Other statically detectable violations of the ISO standard could be | |
910 | handled in a similar way, such as out-of-bounds array indexing. */ | |
911 | ||
912 | static unsigned int | |
913 | gimple_ssa_isolate_erroneous_paths (void) | |
914 | { | |
915 | initialize_original_copy_tables (); | |
916 | ||
917 | /* Search all the blocks for edges which, if traversed, will | |
9c582551 | 918 | result in undefined behavior. */ |
56d338c9 JL |
919 | cfg_altered = false; |
920 | ||
921 | /* First handle cases where traversal of a particular edge | |
9c582551 | 922 | triggers undefined behavior. These cases require creating |
56d338c9 JL |
923 | duplicate blocks and thus new SSA_NAMEs. |
924 | ||
925 | We want that process complete prior to the phase where we start | |
926 | removing edges from the CFG. Edge removal may ultimately result in | |
927 | removal of PHI nodes and thus releasing SSA_NAMEs back to the | |
928 | name manager. | |
929 | ||
930 | If the two processes run in parallel we could release an SSA_NAME | |
931 | back to the manager but we could still have dangling references | |
932 | to the released SSA_NAME in unreachable blocks. | |
933 | that any released names not have dangling references in the IL. */ | |
9c582551 JJ |
934 | find_implicit_erroneous_behavior (); |
935 | find_explicit_erroneous_behavior (); | |
56d338c9 | 936 | |
8fdc414d JL |
937 | free_original_copy_tables (); |
938 | ||
ae93744d | 939 | /* We scramble the CFG and loop structures a bit, clean up |
8fdc414d JL |
940 | appropriately. We really should incrementally update the |
941 | loop structures, in theory it shouldn't be that hard. */ | |
cd6bbb33 | 942 | free_dominance_info (CDI_POST_DOMINATORS); |
8fdc414d JL |
943 | if (cfg_altered) |
944 | { | |
945 | free_dominance_info (CDI_DOMINATORS); | |
8fdc414d JL |
946 | loops_state_set (LOOPS_NEED_FIXUP); |
947 | return TODO_cleanup_cfg | TODO_update_ssa; | |
948 | } | |
949 | return 0; | |
950 | } | |
951 | ||
17795822 TS |
952 | namespace { |
953 | const pass_data pass_data_isolate_erroneous_paths = | |
8fdc414d JL |
954 | { |
955 | GIMPLE_PASS, /* type */ | |
956 | "isolate-paths", /* name */ | |
957 | OPTGROUP_NONE, /* optinfo_flags */ | |
8fdc414d JL |
958 | TV_ISOLATE_ERRONEOUS_PATHS, /* tv_id */ |
959 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
960 | 0, /* properties_provided */ | |
961 | 0, /* properties_destroyed */ | |
962 | 0, /* todo_flags_start */ | |
3bea341f | 963 | 0, /* todo_flags_finish */ |
8fdc414d JL |
964 | }; |
965 | ||
17795822 | 966 | class pass_isolate_erroneous_paths : public gimple_opt_pass |
8fdc414d JL |
967 | { |
968 | public: | |
969 | pass_isolate_erroneous_paths (gcc::context *ctxt) | |
970 | : gimple_opt_pass (pass_data_isolate_erroneous_paths, ctxt) | |
971 | {} | |
972 | ||
973 | /* opt_pass methods: */ | |
725793af DM |
974 | opt_pass * clone () final override |
975 | { | |
976 | return new pass_isolate_erroneous_paths (m_ctxt); | |
977 | } | |
978 | bool gate (function *) final override | |
1a3d085c TS |
979 | { |
980 | /* If we do not have a suitable builtin function for the trap statement, | |
981 | then do not perform the optimization. */ | |
982 | return (flag_isolate_erroneous_paths_dereference != 0 | |
76787f70 MLI |
983 | || flag_isolate_erroneous_paths_attribute != 0 |
984 | || warn_null_dereference); | |
1a3d085c TS |
985 | } |
986 | ||
725793af | 987 | unsigned int execute (function *) final override |
be55bfe6 TS |
988 | { |
989 | return gimple_ssa_isolate_erroneous_paths (); | |
990 | } | |
8fdc414d | 991 | |
d35e43b9 | 992 | }; // class pass_isolate_erroneous_paths |
17795822 | 993 | } |
8fdc414d JL |
994 | |
995 | gimple_opt_pass * | |
996 | make_pass_isolate_erroneous_paths (gcc::context *ctxt) | |
997 | { | |
998 | return new pass_isolate_erroneous_paths (ctxt); | |
999 | } |