]>
Commit | Line | Data |
---|---|---|
0732f75f | 1 | /* SSA Jump Threading |
99dee823 | 2 | Copyright (C) 2005-2021 Free Software Foundation, Inc. |
0732f75f JL |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "backend.h" | |
24 | #include "predict.h" | |
25 | #include "tree.h" | |
26 | #include "gimple.h" | |
27 | #include "fold-const.h" | |
28 | #include "cfgloop.h" | |
29 | #include "gimple-iterator.h" | |
30 | #include "tree-cfg.h" | |
31 | #include "tree-ssa-threadupdate.h" | |
0732f75f JL |
32 | #include "tree-ssa-loop.h" |
33 | #include "cfganal.h" | |
34 | #include "tree-pass.h" | |
8b2ef235 JL |
35 | #include "gimple-ssa.h" |
36 | #include "tree-phinodes.h" | |
27bddc4a | 37 | #include "tree-inline.h" |
0a4e5cf3 | 38 | #include "tree-vectorizer.h" |
2e96b5f1 AH |
39 | #include "value-range.h" |
40 | #include "gimple-range.h" | |
41 | #include "tree-ssa-threadedge.h" | |
42 | #include "gimple-range-path.h" | |
43 | #include "ssa.h" | |
44 | #include "tree-cfgcleanup.h" | |
779275c0 | 45 | #include "tree-pretty-print.h" |
0732f75f | 46 | |
69e55442 AH |
47 | // Path registry for the backwards threader. After all paths have been |
48 | // registered with register_path(), thread_through_all_blocks() is called | |
49 | // to modify the CFG. | |
50 | ||
51 | class back_threader_registry | |
52 | { | |
53 | public: | |
54 | back_threader_registry (int max_allowable_paths); | |
69e55442 | 55 | bool register_path (const vec<basic_block> &, edge taken); |
01005550 | 56 | bool thread_through_all_blocks (bool may_peel_loop_headers); |
69e55442 | 57 | private: |
69e55442 AH |
58 | jump_thread_path_registry m_lowlevel_registry; |
59 | const int m_max_allowable_paths; | |
60 | int m_threaded_paths; | |
61 | }; | |
62 | ||
63 | // Class to abstract the profitability code for the backwards threader. | |
64 | ||
65 | class back_threader_profitability | |
66 | { | |
67 | public: | |
68 | back_threader_profitability (bool speed_p) | |
69 | : m_speed_p (speed_p) | |
70 | { } | |
71 | bool profitable_path_p (const vec<basic_block> &, tree name, edge taken, | |
72 | bool *irreducible_loop = NULL); | |
69e55442 AH |
73 | private: |
74 | const bool m_speed_p; | |
75 | }; | |
76 | ||
2e96b5f1 AH |
77 | class back_threader |
78 | { | |
2e96b5f1 | 79 | public: |
34cd97ff | 80 | back_threader (bool speed_p); |
2e96b5f1 | 81 | ~back_threader (); |
34cd97ff | 82 | void maybe_thread_block (basic_block bb); |
01005550 | 83 | bool thread_through_all_blocks (bool may_peel_loop_headers); |
2e96b5f1 | 84 | private: |
34cd97ff | 85 | void find_paths (basic_block bb, tree name); |
2e96b5f1 AH |
86 | void maybe_register_path (edge taken_edge); |
87 | bool find_paths_to_names (basic_block bb, bitmap imports); | |
a3d3e8c3 | 88 | bool resolve_def (tree name, bitmap interesting, vec<tree> &worklist); |
2e96b5f1 AH |
89 | bool resolve_phi (gphi *phi, bitmap imports); |
90 | edge find_taken_edge (const vec<basic_block> &path); | |
91 | edge find_taken_edge_cond (const vec<basic_block> &path, gcond *); | |
92 | edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *); | |
779275c0 AH |
93 | virtual void debug (); |
94 | virtual void dump (FILE *out); | |
2e96b5f1 | 95 | |
34cd97ff AH |
96 | back_threader_registry m_registry; |
97 | back_threader_profitability m_profit; | |
2e96b5f1 AH |
98 | gimple_ranger m_ranger; |
99 | path_range_query m_solver; | |
100 | ||
101 | // Current path being analyzed. | |
102 | auto_vec<basic_block> m_path; | |
103 | // Hash to mark visited BBs while analyzing a path. | |
104 | hash_set<basic_block> m_visited_bbs; | |
105 | // The set of SSA names, any of which could potentially change the | |
106 | // value of the final conditional in a path. | |
107 | bitmap m_imports; | |
108 | // The last statement in the path. | |
109 | gimple *m_last_stmt; | |
110 | // This is a bit of a wart. It's used to pass the LHS SSA name to | |
111 | // the profitability engine. | |
112 | tree m_name; | |
113 | // Marker to differentiate unreachable edges. | |
114 | static const edge UNREACHABLE_EDGE; | |
115 | }; | |
116 | ||
117 | // Used to differentiate unreachable edges, so we may stop the search | |
118 | // in a the given direction. | |
119 | const edge back_threader::UNREACHABLE_EDGE = (edge) -1; | |
120 | ||
34cd97ff AH |
121 | back_threader::back_threader (bool speed_p) |
122 | : m_registry (param_max_fsm_thread_paths), | |
123 | m_profit (speed_p), | |
2e96b5f1 AH |
124 | m_solver (m_ranger) |
125 | { | |
126 | m_last_stmt = NULL; | |
127 | m_imports = BITMAP_ALLOC (NULL); | |
128 | } | |
129 | ||
130 | back_threader::~back_threader () | |
131 | { | |
132 | m_path.release (); | |
133 | BITMAP_FREE (m_imports); | |
134 | } | |
135 | ||
136 | // Register the current path for jump threading if it's profitable to | |
137 | // do so. TAKEN_EDGE is the known edge out of the path. | |
138 | ||
139 | void | |
140 | back_threader::maybe_register_path (edge taken_edge) | |
141 | { | |
142 | bool irreducible = false; | |
143 | bool profitable | |
144 | = m_profit.profitable_path_p (m_path, m_name, taken_edge, &irreducible); | |
145 | ||
146 | if (profitable) | |
147 | { | |
148 | m_registry.register_path (m_path, taken_edge); | |
149 | ||
150 | if (irreducible) | |
151 | vect_free_loop_info_assumptions (m_path[0]->loop_father); | |
152 | } | |
153 | } | |
154 | ||
155 | // Return the known taken edge out of a path. If the path can be | |
156 | // determined to be unreachable, return UNREACHABLE_EDGE. If no | |
157 | // outgoing edge can be calculated, return NULL. | |
158 | ||
159 | edge | |
160 | back_threader::find_taken_edge (const vec<basic_block> &path) | |
161 | { | |
162 | gcc_checking_assert (path.length () > 1); | |
163 | switch (gimple_code (m_last_stmt)) | |
164 | { | |
165 | case GIMPLE_COND: | |
166 | return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt)); | |
167 | ||
168 | case GIMPLE_SWITCH: | |
169 | return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt)); | |
170 | ||
171 | default: | |
172 | return NULL; | |
173 | } | |
174 | } | |
175 | ||
176 | // Same as find_taken_edge, but for paths ending in a switch. | |
177 | ||
178 | edge | |
179 | back_threader::find_taken_edge_switch (const vec<basic_block> &path, | |
180 | gswitch *sw) | |
181 | { | |
182 | tree name = gimple_switch_index (sw); | |
183 | int_range_max r; | |
184 | ||
185 | m_solver.precompute_ranges (path, m_imports); | |
186 | m_solver.range_of_expr (r, name, sw); | |
187 | ||
188 | if (r.undefined_p ()) | |
189 | return UNREACHABLE_EDGE; | |
190 | ||
191 | if (r.varying_p ()) | |
192 | return NULL; | |
193 | ||
194 | tree val; | |
195 | if (r.singleton_p (&val)) | |
196 | return ::find_taken_edge (gimple_bb (sw), val); | |
197 | ||
198 | return NULL; | |
199 | } | |
200 | ||
201 | // Same as find_taken_edge, but for paths ending in a GIMPLE_COND. | |
202 | ||
203 | edge | |
204 | back_threader::find_taken_edge_cond (const vec<basic_block> &path, | |
205 | gcond *cond) | |
206 | { | |
207 | m_solver.precompute_ranges (path, m_imports); | |
208 | ||
209 | // Check if either operand is unreachable since this knowledge could | |
210 | // help the caller cut down the search space. | |
211 | int_range_max r; | |
212 | m_solver.range_of_expr (r, gimple_cond_lhs (cond)); | |
213 | if (r.undefined_p ()) | |
214 | return UNREACHABLE_EDGE; | |
215 | m_solver.range_of_expr (r, gimple_cond_rhs (cond)); | |
216 | if (r.undefined_p ()) | |
217 | return UNREACHABLE_EDGE; | |
218 | ||
219 | m_solver.range_of_stmt (r, cond); | |
220 | ||
221 | int_range<2> true_range (boolean_true_node, boolean_true_node); | |
222 | int_range<2> false_range (boolean_false_node, boolean_false_node); | |
223 | ||
224 | if (r == true_range || r == false_range) | |
225 | { | |
226 | edge e_true, e_false; | |
227 | basic_block bb = gimple_bb (cond); | |
228 | extract_true_false_edges_from_block (bb, &e_true, &e_false); | |
229 | return r == true_range ? e_true : e_false; | |
230 | } | |
231 | return NULL; | |
232 | } | |
233 | ||
234 | // Populate a vector of trees from a bitmap. | |
235 | ||
236 | static inline void | |
a3d3e8c3 | 237 | populate_worklist (vec<tree> &worklist, bitmap bits) |
2e96b5f1 AH |
238 | { |
239 | bitmap_iterator bi; | |
240 | unsigned i; | |
241 | ||
242 | EXECUTE_IF_SET_IN_BITMAP (bits, 0, i, bi) | |
243 | { | |
244 | tree name = ssa_name (i); | |
245 | worklist.quick_push (name); | |
246 | } | |
247 | } | |
248 | ||
249 | // If taking any of the incoming edges to a PHI causes the final | |
250 | // conditional of the current path to be constant, register the | |
251 | // path(s), and return TRUE. | |
252 | ||
253 | bool | |
254 | back_threader::resolve_phi (gphi *phi, bitmap interesting) | |
255 | { | |
256 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (phi))) | |
257 | return true; | |
258 | ||
259 | bool done = false; | |
260 | for (size_t i = 0; i < gimple_phi_num_args (phi); ++i) | |
261 | { | |
262 | edge e = gimple_phi_arg_edge (phi, i); | |
263 | ||
264 | // This is like path_crosses_loops in profitable_path_p but more | |
265 | // restrictive, since profitable_path_p allows threading the | |
266 | // first block because it would be redirected anyhow. | |
267 | // | |
268 | // If we loosened the restriction and used profitable_path_p() | |
269 | // here instead, we would peel off the first iterations of loops | |
270 | // in places like tree-ssa/pr14341.c. | |
271 | bool profitable_p = m_path[0]->loop_father == e->src->loop_father; | |
272 | if (!profitable_p) | |
273 | { | |
274 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
275 | fprintf (dump_file, | |
276 | " FAIL: path through PHI in bb%d (incoming bb:%d) crosses loop\n", | |
277 | e->dest->index, e->src->index); | |
278 | continue; | |
279 | } | |
280 | ||
281 | tree arg = gimple_phi_arg_def (phi, i); | |
282 | if (TREE_CODE (arg) == SSA_NAME) | |
283 | { | |
284 | unsigned v = SSA_NAME_VERSION (arg); | |
285 | ||
286 | // Avoid loops as in: x_5 = PHI <x_5(2), ...>. | |
287 | if (bitmap_bit_p (interesting, v)) | |
288 | continue; | |
289 | ||
290 | bitmap_set_bit (interesting, v); | |
291 | bitmap_set_bit (m_imports, v); | |
292 | done |= find_paths_to_names (e->src, interesting); | |
293 | bitmap_clear_bit (interesting, v); | |
294 | } | |
295 | else if (TREE_CODE (arg) == INTEGER_CST) | |
296 | { | |
297 | m_path.safe_push (e->src); | |
298 | edge taken_edge = find_taken_edge (m_path); | |
299 | if (taken_edge && taken_edge != UNREACHABLE_EDGE) | |
300 | { | |
301 | maybe_register_path (taken_edge); | |
302 | done = true; | |
303 | } | |
304 | m_path.pop (); | |
305 | } | |
306 | } | |
307 | return done; | |
308 | } | |
309 | ||
310 | // If the definition of NAME causes the final conditional of the | |
311 | // current path to be constant, register the path, and return TRUE. | |
312 | ||
313 | bool | |
a3d3e8c3 | 314 | back_threader::resolve_def (tree name, bitmap interesting, vec<tree> &worklist) |
2e96b5f1 AH |
315 | { |
316 | gimple *def_stmt = SSA_NAME_DEF_STMT (name); | |
317 | ||
318 | // Handle PHIs. | |
319 | if (is_a<gphi *> (def_stmt) | |
320 | && resolve_phi (as_a<gphi *> (def_stmt), interesting)) | |
321 | return true; | |
322 | ||
323 | // Defer copies of SSAs by adding the source to the worklist. | |
324 | if (gimple_assign_single_p (def_stmt) | |
325 | && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) | |
326 | { | |
327 | tree rhs = gimple_assign_rhs1 (def_stmt); | |
328 | bitmap_set_bit (m_imports, SSA_NAME_VERSION (rhs)); | |
329 | bitmap_set_bit (interesting, SSA_NAME_VERSION (rhs)); | |
330 | worklist.safe_push (rhs); | |
331 | } | |
332 | return false; | |
333 | } | |
334 | ||
335 | // Find jump threading paths to any of the SSA names in the | |
336 | // INTERESTING bitmap, and register any such paths. | |
337 | // | |
338 | // Return TRUE if no further processing past this block is necessary. | |
339 | // This is because we've either registered a path, or because there is | |
340 | // nothing of interesting beyond this block. | |
341 | // | |
342 | // BB is the current path being processed. | |
343 | ||
344 | bool | |
345 | back_threader::find_paths_to_names (basic_block bb, bitmap interesting) | |
346 | { | |
347 | if (m_visited_bbs.add (bb)) | |
348 | return true; | |
349 | ||
350 | m_path.safe_push (bb); | |
351 | ||
352 | if (m_path.length () > 1 | |
353 | && !m_profit.profitable_path_p (m_path, m_name, NULL)) | |
354 | { | |
355 | m_path.pop (); | |
356 | m_visited_bbs.remove (bb); | |
357 | return false; | |
358 | } | |
359 | ||
360 | auto_bitmap processed; | |
361 | unsigned i; | |
362 | bool done = false; | |
363 | ||
364 | // We use a worklist instead of iterating through the bitmap, | |
365 | // because we may add new items in-flight. | |
366 | auto_vec<tree> worklist (bitmap_count_bits (interesting)); | |
367 | populate_worklist (worklist, interesting); | |
368 | while (!worklist.is_empty ()) | |
369 | { | |
370 | tree name = worklist.pop (); | |
371 | unsigned i = SSA_NAME_VERSION (name); | |
372 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (name)); | |
373 | ||
374 | // Process any names defined in this block. | |
375 | if (def_bb == bb) | |
376 | { | |
377 | bitmap_set_bit (processed, i); | |
378 | ||
379 | if (resolve_def (name, interesting, worklist)) | |
380 | { | |
381 | done = true; | |
382 | goto leave_bb; | |
383 | } | |
384 | } | |
385 | // Examine blocks that define or export an interesting SSA, | |
386 | // since they may compute a range which resolve this path. | |
387 | if ((def_bb == bb | |
388 | || bitmap_bit_p (m_ranger.gori ().exports (bb), i)) | |
389 | && m_path.length () > 1) | |
390 | { | |
391 | edge taken_edge = find_taken_edge (m_path); | |
392 | if (taken_edge) | |
393 | { | |
394 | if (taken_edge != UNREACHABLE_EDGE) | |
395 | maybe_register_path (taken_edge); | |
396 | ||
397 | done = true; | |
398 | goto leave_bb; | |
399 | } | |
400 | } | |
401 | } | |
402 | ||
403 | // If there are interesting names not yet processed, keep looking. | |
404 | bitmap_and_compl_into (interesting, processed); | |
405 | if (!bitmap_empty_p (interesting)) | |
406 | { | |
407 | edge_iterator iter; | |
408 | edge e; | |
409 | FOR_EACH_EDGE (e, iter, bb->preds) | |
410 | if ((e->flags & EDGE_ABNORMAL) == 0) | |
411 | done |= find_paths_to_names (e->src, interesting); | |
412 | } | |
413 | ||
414 | leave_bb: | |
415 | bitmap_iterator bi; | |
416 | EXECUTE_IF_SET_IN_BITMAP (processed, 0, i, bi) | |
417 | bitmap_set_bit (interesting, i); | |
418 | ||
419 | m_path.pop (); | |
420 | m_visited_bbs.remove (bb); | |
421 | return done; | |
422 | } | |
423 | ||
424 | // Search backwards from BB looking for paths where the final | |
425 | // conditional out of BB can be determined. NAME is the LHS of the | |
426 | // final conditional. Register such paths for jump threading. | |
427 | ||
428 | void | |
429 | back_threader::find_paths (basic_block bb, tree name) | |
430 | { | |
431 | gimple *stmt = last_stmt (bb); | |
432 | if (!stmt | |
433 | || (gimple_code (stmt) != GIMPLE_COND | |
434 | && gimple_code (stmt) != GIMPLE_SWITCH)) | |
435 | return; | |
436 | ||
437 | if (EDGE_COUNT (bb->succs) > 1 | |
438 | || single_succ_to_potentially_threadable_block (bb)) | |
439 | { | |
440 | m_last_stmt = stmt; | |
441 | m_visited_bbs.empty (); | |
442 | m_path.truncate (0); | |
443 | m_name = name; | |
444 | bitmap_clear (m_imports); | |
445 | ||
446 | auto_bitmap interesting; | |
447 | bitmap_copy (m_imports, m_ranger.gori ().imports (bb)); | |
448 | bitmap_copy (interesting, m_imports); | |
449 | find_paths_to_names (bb, interesting); | |
450 | } | |
451 | } | |
452 | ||
34cd97ff AH |
453 | // Simple helper to get the last statement from BB, which is assumed |
454 | // to be a control statement. Return NULL if the last statement is | |
455 | // not a control statement. | |
21f0717a | 456 | |
3d466672 JL |
457 | static gimple * |
458 | get_gimple_control_stmt (basic_block bb) | |
459 | { | |
21f0717a | 460 | gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); |
3d466672 JL |
461 | |
462 | if (gsi_end_p (gsi)) | |
463 | return NULL; | |
464 | ||
465 | gimple *stmt = gsi_stmt (gsi); | |
466 | enum gimple_code code = gimple_code (stmt); | |
21f0717a JL |
467 | if (code == GIMPLE_COND || code == GIMPLE_SWITCH || code == GIMPLE_GOTO) |
468 | return stmt; | |
469 | return NULL; | |
3d466672 JL |
470 | } |
471 | ||
34cd97ff AH |
472 | // Search backwards from BB looking for paths where the final |
473 | // conditional maybe threaded to a successor block. Record such paths | |
474 | // for jump threading. | |
0732f75f | 475 | |
34cd97ff AH |
476 | void |
477 | back_threader::maybe_thread_block (basic_block bb) | |
0732f75f | 478 | { |
34cd97ff AH |
479 | gimple *stmt = get_gimple_control_stmt (bb); |
480 | if (!stmt) | |
481 | return; | |
0732f75f | 482 | |
34cd97ff AH |
483 | enum gimple_code code = gimple_code (stmt); |
484 | tree name; | |
485 | if (code == GIMPLE_SWITCH) | |
486 | name = gimple_switch_index (as_a <gswitch *> (stmt)); | |
487 | else if (code == GIMPLE_COND) | |
488 | name = gimple_cond_lhs (stmt); | |
489 | else if (code == GIMPLE_GOTO) | |
490 | name = gimple_goto_dest (stmt); | |
491 | else | |
492 | name = NULL; | |
493 | ||
494 | find_paths (bb, name); | |
495 | } | |
496 | ||
497 | // Perform the actual jump threading for the all queued paths. | |
498 | ||
499 | bool | |
01005550 | 500 | back_threader::thread_through_all_blocks (bool may_peel_loop_headers) |
34cd97ff | 501 | { |
01005550 | 502 | return m_registry.thread_through_all_blocks (may_peel_loop_headers); |
34cd97ff AH |
503 | } |
504 | ||
505 | // Dump a sequence of BBs through the CFG. | |
0732f75f | 506 | |
34cd97ff AH |
507 | DEBUG_FUNCTION void |
508 | dump_path (FILE *dump_file, const vec<basic_block> &path) | |
509 | { | |
510 | for (size_t i = 0; i < path.length (); ++i) | |
0732f75f | 511 | { |
34cd97ff AH |
512 | fprintf (dump_file, "BB%d", path[i]->index); |
513 | if (i + 1 < path.length ()) | |
514 | fprintf (dump_file, " <- "); | |
0732f75f | 515 | } |
34cd97ff AH |
516 | fprintf (dump_file, "\n"); |
517 | } | |
0732f75f | 518 | |
34cd97ff AH |
519 | DEBUG_FUNCTION void |
520 | debug (const vec <basic_block> &path) | |
521 | { | |
522 | dump_path (stderr, path); | |
0732f75f JL |
523 | } |
524 | ||
779275c0 AH |
525 | void |
526 | back_threader::dump (FILE *out) | |
527 | { | |
528 | m_solver.dump (out); | |
529 | fprintf (out, "\nCandidates for pre-computation:\n"); | |
530 | fprintf (out, "===================================\n"); | |
531 | ||
532 | bitmap_iterator bi; | |
533 | unsigned i; | |
534 | ||
535 | EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi) | |
536 | { | |
537 | tree name = ssa_name (i); | |
538 | print_generic_expr (out, name, TDF_NONE); | |
539 | fprintf (out, "\n"); | |
540 | } | |
541 | } | |
542 | ||
543 | void | |
544 | back_threader::debug () | |
545 | { | |
546 | dump (stderr); | |
547 | } | |
548 | ||
69e55442 AH |
549 | back_threader_registry::back_threader_registry (int max_allowable_paths) |
550 | : m_max_allowable_paths (max_allowable_paths) | |
551 | { | |
69e55442 AH |
552 | m_threaded_paths = 0; |
553 | } | |
bb5e62d6 | 554 | |
69e55442 | 555 | bool |
01005550 | 556 | back_threader_registry::thread_through_all_blocks (bool may_peel_loop_headers) |
69e55442 | 557 | { |
01005550 | 558 | return m_lowlevel_registry.thread_through_all_blocks (may_peel_loop_headers); |
69e55442 AH |
559 | } |
560 | ||
561 | /* Examine jump threading path PATH and return TRUE if it is profitable to | |
562 | thread it, otherwise return FALSE. | |
bb5e62d6 JL |
563 | |
564 | NAME is the SSA_NAME of the variable we found to have a constant | |
69e55442 | 565 | value on PATH. If unknown, SSA_NAME is NULL. |
bb5e62d6 | 566 | |
69e55442 AH |
567 | If the taken edge out of the path is known ahead of time it is passed in |
568 | TAKEN_EDGE, otherwise it is NULL. | |
bb5e62d6 | 569 | |
69e55442 AH |
570 | CREATES_IRREDUCIBLE_LOOP, if non-null is set to TRUE if threading this path |
571 | would create an irreducible loop. */ | |
572 | ||
573 | bool | |
574 | back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path, | |
575 | tree name, | |
576 | edge taken_edge, | |
577 | bool *creates_irreducible_loop) | |
bb5e62d6 | 578 | { |
69e55442 | 579 | gcc_checking_assert (!m_path.is_empty ()); |
ad071b2b | 580 | |
69e55442 AH |
581 | /* We can an empty path here (excluding the DEF block) when the |
582 | statement that makes a conditional generate a compile-time | |
583 | constant result is in the same block as the conditional. | |
ad071b2b JL |
584 | |
585 | That's not really a jump threading opportunity, but instead is | |
586 | simple cprop & simplification. We could handle it here if we | |
587 | wanted by wiring up all the incoming edges. If we run this | |
588 | early in IPA, that might be worth doing. For now we just | |
589 | reject that case. */ | |
69e55442 AH |
590 | if (m_path.length () <= 1) |
591 | return false; | |
ad071b2b | 592 | |
69e55442 | 593 | if (m_path.length () > (unsigned) param_max_fsm_thread_length) |
bb5e62d6 JL |
594 | { |
595 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
69e55442 | 596 | fprintf (dump_file, " FAIL: FSM jump-thread path not considered: " |
bb5e62d6 JL |
597 | "the number of basic blocks on the path " |
598 | "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n"); | |
69e55442 | 599 | return false; |
bb5e62d6 JL |
600 | } |
601 | ||
bb5e62d6 JL |
602 | int n_insns = 0; |
603 | gimple_stmt_iterator gsi; | |
cde30fe0 | 604 | loop_p loop = m_path[0]->loop_father; |
bb5e62d6 JL |
605 | bool path_crosses_loops = false; |
606 | bool threaded_through_latch = false; | |
607 | bool multiway_branch_in_path = false; | |
608 | bool threaded_multiway_branch = false; | |
0f0c2cc3 JH |
609 | bool contains_hot_bb = false; |
610 | ||
611 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
612 | fprintf (dump_file, "Checking profitability of path (backwards): "); | |
bb5e62d6 JL |
613 | |
614 | /* Count the number of instructions on the path: as these instructions | |
615 | will have to be duplicated, we will not record the path if there | |
616 | are too many instructions on the path. Also check that all the | |
617 | blocks in the path belong to a single loop. */ | |
cde30fe0 | 618 | for (unsigned j = 0; j < m_path.length (); j++) |
bb5e62d6 | 619 | { |
cde30fe0 | 620 | basic_block bb = m_path[j]; |
bb5e62d6 | 621 | |
0f0c2cc3 JH |
622 | if (dump_file && (dump_flags & TDF_DETAILS)) |
623 | fprintf (dump_file, " bb:%i", bb->index); | |
bb5e62d6 JL |
624 | /* Remember, blocks in the path are stored in opposite order |
625 | in the PATH array. The last entry in the array represents | |
626 | the block with an outgoing edge that we will redirect to the | |
627 | jump threading path. Thus we don't care about that block's | |
628 | loop father, nor how many statements are in that block because | |
629 | it will not be copied or whether or not it ends in a multiway | |
630 | branch. */ | |
cde30fe0 | 631 | if (j < m_path.length () - 1) |
bb5e62d6 | 632 | { |
0f0c2cc3 | 633 | int orig_n_insns = n_insns; |
bb5e62d6 JL |
634 | if (bb->loop_father != loop) |
635 | { | |
636 | path_crosses_loops = true; | |
779275c0 AH |
637 | |
638 | // Dump rest of blocks. | |
639 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
640 | for (j++; j < m_path.length (); j++) | |
641 | { | |
642 | bb = m_path[j]; | |
643 | fprintf (dump_file, " bb:%i", bb->index); | |
644 | } | |
bb5e62d6 JL |
645 | break; |
646 | } | |
647 | ||
648 | /* PHIs in the path will create degenerate PHIS in the | |
649 | copied path which will then get propagated away, so | |
650 | looking at just the duplicate path the PHIs would | |
651 | seem unimportant. | |
652 | ||
653 | But those PHIs, because they're assignments to objects | |
654 | typically with lives that exist outside the thread path, | |
655 | will tend to generate PHIs (or at least new PHI arguments) | |
656 | at points where we leave the thread path and rejoin | |
657 | the original blocks. So we do want to account for them. | |
658 | ||
659 | We ignore virtual PHIs. We also ignore cases where BB | |
660 | has a single incoming edge. That's the most common | |
661 | degenerate PHI we'll see here. Finally we ignore PHIs | |
662 | that are associated with the value we're tracking as | |
663 | that object likely dies. */ | |
664 | if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1) | |
665 | { | |
666 | for (gphi_iterator gsip = gsi_start_phis (bb); | |
667 | !gsi_end_p (gsip); | |
668 | gsi_next (&gsip)) | |
669 | { | |
670 | gphi *phi = gsip.phi (); | |
671 | tree dst = gimple_phi_result (phi); | |
672 | ||
673 | /* Note that if both NAME and DST are anonymous | |
674 | SSA_NAMEs, then we do not have enough information | |
675 | to consider them associated. */ | |
fda5c810 | 676 | if (dst != name |
69e55442 AH |
677 | && name |
678 | && TREE_CODE (name) == SSA_NAME | |
fda5c810 RB |
679 | && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name) |
680 | || !SSA_NAME_VAR (dst)) | |
bb5e62d6 JL |
681 | && !virtual_operand_p (dst)) |
682 | ++n_insns; | |
683 | } | |
684 | } | |
685 | ||
cde30fe0 | 686 | if (!contains_hot_bb && m_speed_p) |
0f0c2cc3 | 687 | contains_hot_bb |= optimize_bb_for_speed_p (bb); |
bb5e62d6 JL |
688 | for (gsi = gsi_after_labels (bb); |
689 | !gsi_end_p (gsi); | |
690 | gsi_next_nondebug (&gsi)) | |
691 | { | |
70a62009 IL |
692 | /* Do not allow OpenACC loop markers and __builtin_constant_p on |
693 | threading paths. The latter is disallowed, because an | |
694 | expression might be constant on two threading paths, and | |
695 | become non-constant (i.e.: phi) when they merge. */ | |
bb5e62d6 | 696 | gimple *stmt = gsi_stmt (gsi); |
70a62009 IL |
697 | if (gimple_call_internal_p (stmt, IFN_UNIQUE) |
698 | || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)) | |
69e55442 | 699 | return false; |
bb5e62d6 JL |
700 | /* Do not count empty statements and labels. */ |
701 | if (gimple_code (stmt) != GIMPLE_NOP | |
702 | && !(gimple_code (stmt) == GIMPLE_ASSIGN | |
703 | && gimple_assign_rhs_code (stmt) == ASSERT_EXPR) | |
704 | && !is_gimple_debug (stmt)) | |
0f0c2cc3 | 705 | n_insns += estimate_num_insns (stmt, &eni_size_weights); |
bb5e62d6 | 706 | } |
0f0c2cc3 JH |
707 | if (dump_file && (dump_flags & TDF_DETAILS)) |
708 | fprintf (dump_file, " (%i insns)", n_insns-orig_n_insns); | |
bb5e62d6 JL |
709 | |
710 | /* We do not look at the block with the threaded branch | |
711 | in this loop. So if any block with a last statement that | |
712 | is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a | |
713 | multiway branch on our path. | |
714 | ||
715 | The block in PATH[0] is special, it's the block were we're | |
716 | going to be able to eliminate its branch. */ | |
717 | gimple *last = last_stmt (bb); | |
718 | if (last && (gimple_code (last) == GIMPLE_SWITCH | |
719 | || gimple_code (last) == GIMPLE_GOTO)) | |
720 | { | |
721 | if (j == 0) | |
722 | threaded_multiway_branch = true; | |
723 | else | |
724 | multiway_branch_in_path = true; | |
725 | } | |
726 | } | |
727 | ||
728 | /* Note if we thread through the latch, we will want to include | |
729 | the last entry in the array when determining if we thread | |
730 | through the loop latch. */ | |
731 | if (loop->latch == bb) | |
732 | threaded_through_latch = true; | |
733 | } | |
734 | ||
cde30fe0 | 735 | gimple *stmt = get_gimple_control_stmt (m_path[0]); |
27bddc4a JH |
736 | gcc_assert (stmt); |
737 | ||
bb5e62d6 JL |
738 | /* We are going to remove the control statement at the end of the |
739 | last block in the threading path. So don't count it against our | |
740 | statement count. */ | |
bb5e62d6 | 741 | |
0f0c2cc3 JH |
742 | int stmt_insns = estimate_num_insns (stmt, &eni_size_weights); |
743 | n_insns-= stmt_insns; | |
744 | ||
745 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
746 | fprintf (dump_file, "\n Control statement insns: %i\n" | |
747 | " Overall: %i insns\n", | |
748 | stmt_insns, n_insns); | |
27bddc4a | 749 | |
69e55442 | 750 | if (creates_irreducible_loop) |
bb5e62d6 | 751 | { |
69e55442 AH |
752 | /* If this path threaded through the loop latch back into the |
753 | same loop and the destination does not dominate the loop | |
754 | latch, then this thread would create an irreducible loop. */ | |
755 | *creates_irreducible_loop = false; | |
756 | if (taken_edge | |
757 | && threaded_through_latch | |
758 | && loop == taken_edge->dest->loop_father | |
759 | && (determine_bb_domination_status (loop, taken_edge->dest) | |
760 | == DOMST_NONDOMINATING)) | |
761 | *creates_irreducible_loop = true; | |
bb5e62d6 JL |
762 | } |
763 | ||
bb5e62d6 JL |
764 | if (path_crosses_loops) |
765 | { | |
766 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
69e55442 | 767 | fprintf (dump_file, " FAIL: FSM jump-thread path not considered: " |
bb5e62d6 | 768 | "the path crosses loops.\n"); |
69e55442 | 769 | return false; |
bb5e62d6 JL |
770 | } |
771 | ||
0f0c2cc3 JH |
772 | /* Threading is profitable if the path duplicated is hot but also |
773 | in a case we separate cold path from hot path and permit optimization | |
774 | of the hot path later. Be on the agressive side here. In some testcases, | |
775 | as in PR 78407 this leads to noticeable improvements. */ | |
69e55442 AH |
776 | if (m_speed_p |
777 | && ((taken_edge && optimize_edge_for_speed_p (taken_edge)) | |
778 | || contains_hot_bb)) | |
27bddc4a | 779 | { |
028d4092 | 780 | if (n_insns >= param_max_fsm_thread_path_insns) |
27bddc4a JH |
781 | { |
782 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
69e55442 | 783 | fprintf (dump_file, " FAIL: FSM jump-thread path not considered: " |
27bddc4a JH |
784 | "the number of instructions on the path " |
785 | "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n"); | |
69e55442 | 786 | return false; |
27bddc4a JH |
787 | } |
788 | } | |
69e55442 | 789 | else if (!m_speed_p && n_insns > 1) |
bb5e62d6 JL |
790 | { |
791 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
69e55442 | 792 | fprintf (dump_file, " FAIL: FSM jump-thread path not considered: " |
27bddc4a JH |
793 | "duplication of %i insns is needed and optimizing for size.\n", |
794 | n_insns); | |
69e55442 | 795 | return false; |
bb5e62d6 JL |
796 | } |
797 | ||
798 | /* We avoid creating irreducible inner loops unless we thread through | |
799 | a multiway branch, in which case we have deemed it worth losing | |
800 | other loop optimizations later. | |
801 | ||
802 | We also consider it worth creating an irreducible inner loop if | |
803 | the number of copied statement is low relative to the length of | |
804 | the path -- in that case there's little the traditional loop | |
805 | optimizer would have done anyway, so an irreducible loop is not | |
806 | so bad. */ | |
69e55442 AH |
807 | if (!threaded_multiway_branch |
808 | && creates_irreducible_loop | |
809 | && *creates_irreducible_loop | |
028d4092 | 810 | && (n_insns * (unsigned) param_fsm_scale_path_stmts |
cde30fe0 | 811 | > (m_path.length () * |
028d4092 | 812 | (unsigned) param_fsm_scale_path_blocks))) |
bb5e62d6 JL |
813 | |
814 | { | |
815 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
816 | fprintf (dump_file, | |
69e55442 | 817 | " FAIL: FSM would create irreducible loop without threading " |
bb5e62d6 | 818 | "multiway branch.\n"); |
69e55442 | 819 | return false; |
bb5e62d6 JL |
820 | } |
821 | ||
bb5e62d6 JL |
822 | /* If this path does not thread through the loop latch, then we are |
823 | using the FSM threader to find old style jump threads. This | |
824 | is good, except the FSM threader does not re-use an existing | |
825 | threading path to reduce code duplication. | |
826 | ||
827 | So for that case, drastically reduce the number of statements | |
828 | we are allowed to copy. */ | |
829 | if (!(threaded_through_latch && threaded_multiway_branch) | |
028d4092 ML |
830 | && (n_insns * param_fsm_scale_path_stmts |
831 | >= param_max_jump_thread_duplication_stmts)) | |
bb5e62d6 JL |
832 | { |
833 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
834 | fprintf (dump_file, | |
69e55442 | 835 | " FAIL: FSM did not thread around loop and would copy too " |
bb5e62d6 | 836 | "many statements.\n"); |
69e55442 | 837 | return false; |
bb5e62d6 JL |
838 | } |
839 | ||
840 | /* When there is a multi-way branch on the path, then threading can | |
841 | explode the CFG due to duplicating the edges for that multi-way | |
842 | branch. So like above, only allow a multi-way branch on the path | |
843 | if we actually thread a multi-way branch. */ | |
844 | if (!threaded_multiway_branch && multiway_branch_in_path) | |
845 | { | |
846 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
847 | fprintf (dump_file, | |
69e55442 | 848 | " FAIL: FSM Thread through multiway branch without threading " |
bb5e62d6 | 849 | "a multiway branch.\n"); |
69e55442 | 850 | return false; |
bb5e62d6 | 851 | } |
69e55442 AH |
852 | return true; |
853 | } | |
854 | ||
cde30fe0 AH |
855 | /* The current path PATH is a vector of blocks forming a jump threading |
856 | path in reverse order. TAKEN_EDGE is the edge taken from path[0]. | |
081fdda6 | 857 | |
cde30fe0 | 858 | Convert the current path into the form used by register_jump_thread and |
69e55442 | 859 | register it. |
081fdda6 | 860 | |
69e55442 AH |
861 | Return TRUE if successful or FALSE otherwise. */ |
862 | ||
863 | bool | |
864 | back_threader_registry::register_path (const vec<basic_block> &m_path, | |
865 | edge taken_edge) | |
081fdda6 | 866 | { |
69e55442 AH |
867 | if (m_threaded_paths > m_max_allowable_paths) |
868 | { | |
869 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
870 | fprintf (dump_file, " FAIL: FSM jump-thread path not considered: " | |
871 | "the number of previously recorded FSM paths to " | |
872 | "thread exceeds PARAM_MAX_FSM_THREAD_PATHS.\n"); | |
873 | return false; | |
874 | } | |
875 | ||
876 | vec<jump_thread_edge *> *jump_thread_path | |
877 | = m_lowlevel_registry.allocate_thread_path (); | |
081fdda6 JL |
878 | |
879 | /* Record the edges between the blocks in PATH. */ | |
cde30fe0 | 880 | for (unsigned int j = 0; j + 1 < m_path.length (); j++) |
081fdda6 | 881 | { |
cde30fe0 AH |
882 | basic_block bb1 = m_path[m_path.length () - j - 1]; |
883 | basic_block bb2 = m_path[m_path.length () - j - 2]; | |
8af01c66 | 884 | |
081fdda6 JL |
885 | edge e = find_edge (bb1, bb2); |
886 | gcc_assert (e); | |
d8ea4703 | 887 | jump_thread_edge *x |
69e55442 AH |
888 | = m_lowlevel_registry.allocate_thread_edge (e, EDGE_FSM_THREAD); |
889 | jump_thread_path->safe_push (x); | |
081fdda6 JL |
890 | } |
891 | ||
892 | /* Add the edge taken when the control variable has value ARG. */ | |
893 | jump_thread_edge *x | |
69e55442 AH |
894 | = m_lowlevel_registry.allocate_thread_edge (taken_edge, |
895 | EDGE_NO_COPY_SRC_BLOCK); | |
896 | jump_thread_path->safe_push (x); | |
081fdda6 | 897 | |
2e96b5f1 AH |
898 | if (m_lowlevel_registry.register_jump_thread (jump_thread_path)) |
899 | ++m_threaded_paths; | |
69e55442 | 900 | return true; |
081fdda6 JL |
901 | } |
902 | ||
8b2ef235 JL |
903 | namespace { |
904 | ||
905 | const pass_data pass_data_thread_jumps = | |
906 | { | |
907 | GIMPLE_PASS, | |
908 | "thread", | |
909 | OPTGROUP_NONE, | |
910 | TV_TREE_SSA_THREAD_JUMPS, | |
911 | ( PROP_cfg | PROP_ssa ), | |
912 | 0, | |
913 | 0, | |
914 | 0, | |
bb7ebad1 | 915 | TODO_update_ssa, |
8b2ef235 JL |
916 | }; |
917 | ||
918 | class pass_thread_jumps : public gimple_opt_pass | |
919 | { | |
920 | public: | |
921 | pass_thread_jumps (gcc::context *ctxt) | |
922 | : gimple_opt_pass (pass_data_thread_jumps, ctxt) | |
923 | {} | |
924 | ||
925 | opt_pass * clone (void) { return new pass_thread_jumps (m_ctxt); } | |
926 | virtual bool gate (function *); | |
927 | virtual unsigned int execute (function *); | |
928 | }; | |
929 | ||
930 | bool | |
931 | pass_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED) | |
932 | { | |
27bddc4a | 933 | return flag_expensive_optimizations; |
8b2ef235 JL |
934 | } |
935 | ||
2e96b5f1 AH |
936 | // Try to thread blocks in FUN. Return TRUE if any jump thread paths were |
937 | // registered. | |
8b2ef235 | 938 | |
2e96b5f1 AH |
939 | static bool |
940 | try_thread_blocks (function *fun) | |
8b2ef235 JL |
941 | { |
942 | /* Try to thread each block with more than one successor. */ | |
34cd97ff | 943 | back_threader threader (/*speed_p=*/true); |
8b2ef235 JL |
944 | basic_block bb; |
945 | FOR_EACH_BB_FN (bb, fun) | |
946 | { | |
947 | if (EDGE_COUNT (bb->succs) > 1) | |
34cd97ff | 948 | threader.maybe_thread_block (bb); |
8b2ef235 | 949 | } |
01005550 | 950 | return threader.thread_through_all_blocks (/*peel_loop_headers=*/true); |
2e96b5f1 AH |
951 | } |
952 | ||
953 | unsigned int | |
954 | pass_thread_jumps::execute (function *fun) | |
955 | { | |
956 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES); | |
957 | ||
cac2353f | 958 | bool changed = try_thread_blocks (fun); |
bb7ebad1 RB |
959 | |
960 | loop_optimizer_finalize (); | |
cac2353f | 961 | |
bb7ebad1 | 962 | return changed ? TODO_cleanup_cfg : 0; |
8b2ef235 JL |
963 | } |
964 | ||
965 | } | |
966 | ||
967 | gimple_opt_pass * | |
968 | make_pass_thread_jumps (gcc::context *ctxt) | |
969 | { | |
970 | return new pass_thread_jumps (ctxt); | |
971 | } | |
b720e919 JH |
972 | |
973 | namespace { | |
974 | ||
975 | const pass_data pass_data_early_thread_jumps = | |
976 | { | |
977 | GIMPLE_PASS, | |
978 | "ethread", | |
979 | OPTGROUP_NONE, | |
980 | TV_TREE_SSA_THREAD_JUMPS, | |
981 | ( PROP_cfg | PROP_ssa ), | |
982 | 0, | |
983 | 0, | |
984 | 0, | |
985 | ( TODO_cleanup_cfg | TODO_update_ssa ), | |
986 | }; | |
987 | ||
988 | class pass_early_thread_jumps : public gimple_opt_pass | |
989 | { | |
990 | public: | |
991 | pass_early_thread_jumps (gcc::context *ctxt) | |
992 | : gimple_opt_pass (pass_data_early_thread_jumps, ctxt) | |
993 | {} | |
994 | ||
995 | opt_pass * clone (void) { return new pass_early_thread_jumps (m_ctxt); } | |
996 | virtual bool gate (function *); | |
997 | virtual unsigned int execute (function *); | |
998 | }; | |
999 | ||
1000 | bool | |
1001 | pass_early_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED) | |
1002 | { | |
1003 | return true; | |
1004 | } | |
1005 | ||
b720e919 JH |
1006 | unsigned int |
1007 | pass_early_thread_jumps::execute (function *fun) | |
1008 | { | |
108fdd6d RB |
1009 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); |
1010 | ||
b720e919 | 1011 | /* Try to thread each block with more than one successor. */ |
34cd97ff | 1012 | back_threader threader (/*speed_p=*/false); |
b720e919 JH |
1013 | basic_block bb; |
1014 | FOR_EACH_BB_FN (bb, fun) | |
1015 | { | |
1016 | if (EDGE_COUNT (bb->succs) > 1) | |
34cd97ff | 1017 | threader.maybe_thread_block (bb); |
b720e919 | 1018 | } |
01005550 | 1019 | threader.thread_through_all_blocks (/*peel_loop_headers=*/true); |
108fdd6d RB |
1020 | |
1021 | loop_optimizer_finalize (); | |
b720e919 JH |
1022 | return 0; |
1023 | } | |
1024 | ||
1025 | } | |
1026 | ||
1027 | gimple_opt_pass * | |
1028 | make_pass_early_thread_jumps (gcc::context *ctxt) | |
1029 | { | |
1030 | return new pass_early_thread_jumps (ctxt); | |
1031 | } |