]>
Commit | Line | Data |
---|---|---|
2039eb2e | 1 | /* SSA Jump Threading |
fbd26352 | 2 | Copyright (C) 2005-2019 Free Software Foundation, Inc. |
2039eb2e | 3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "backend.h" | |
24 | #include "predict.h" | |
25 | #include "tree.h" | |
26 | #include "gimple.h" | |
27 | #include "fold-const.h" | |
28 | #include "cfgloop.h" | |
29 | #include "gimple-iterator.h" | |
30 | #include "tree-cfg.h" | |
31 | #include "tree-ssa-threadupdate.h" | |
32 | #include "params.h" | |
33 | #include "tree-ssa-loop.h" | |
34 | #include "cfganal.h" | |
35 | #include "tree-pass.h" | |
372172fe | 36 | #include "gimple-ssa.h" |
37 | #include "tree-phinodes.h" | |
e8272095 | 38 | #include "tree-inline.h" |
2f8d6297 | 39 | #include "tree-vectorizer.h" |
2039eb2e | 40 | |
ce73fd50 | 41 | class thread_jumps |
42 | { | |
43 | public: | |
44 | void find_jump_threads_backwards (basic_block bb, bool speed_p); | |
45 | private: | |
46 | edge profitable_jump_thread_path (basic_block bbi, tree name, tree arg, | |
47 | bool *creates_irreducible_loop); | |
48 | void convert_and_register_current_path (edge taken_edge); | |
49 | void register_jump_thread_path_if_profitable (tree name, tree arg, | |
50 | basic_block def_bb); | |
51 | void handle_assignment (gimple *stmt, tree name, basic_block def_bb); | |
52 | void handle_phi (gphi *phi, tree name, basic_block def_bb); | |
53 | void fsm_find_control_statement_thread_paths (tree name); | |
54 | bool check_subpath_and_update_thread_path (basic_block last_bb, | |
55 | basic_block new_bb, | |
56 | int *next_path_length); | |
57 | ||
58 | /* Maximum number of BBs we are allowed to thread. */ | |
59 | int m_max_threaded_paths; | |
60 | /* Hash to keep track of seen bbs. */ | |
61 | hash_set<basic_block> m_visited_bbs; | |
62 | /* Current path we're analyzing. */ | |
63 | auto_vec<basic_block> m_path; | |
64 | /* Tracks if we have recursed through a loop PHI node. */ | |
65 | bool m_seen_loop_phi; | |
66 | /* Indicate that we could increase code size to improve the | |
67 | code path. */ | |
68 | bool m_speed_p; | |
69 | }; | |
2039eb2e | 70 | |
f08943b6 | 71 | /* Simple helper to get the last statement from BB, which is assumed |
580efa23 | 72 | to be a control statement. Return NULL if the last statement is |
73 | not a control statement. */ | |
74 | ||
f08943b6 | 75 | static gimple * |
76 | get_gimple_control_stmt (basic_block bb) | |
77 | { | |
580efa23 | 78 | gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); |
f08943b6 | 79 | |
80 | if (gsi_end_p (gsi)) | |
81 | return NULL; | |
82 | ||
83 | gimple *stmt = gsi_stmt (gsi); | |
84 | enum gimple_code code = gimple_code (stmt); | |
580efa23 | 85 | if (code == GIMPLE_COND || code == GIMPLE_SWITCH || code == GIMPLE_GOTO) |
86 | return stmt; | |
87 | return NULL; | |
f08943b6 | 88 | } |
89 | ||
73b43bd5 | 90 | /* Return true if the CFG contains at least one path from START_BB to |
91 | END_BB. When a path is found, record in PATH the blocks from | |
ce73fd50 | 92 | END_BB to START_BB. LOCAL_VISITED_BBS is used to make sure we |
93 | don't fall into an infinite loop. Bound the recursion to basic | |
94 | blocks belonging to LOOP. */ | |
2039eb2e | 95 | |
96 | static bool | |
97 | fsm_find_thread_path (basic_block start_bb, basic_block end_bb, | |
73b43bd5 | 98 | vec<basic_block> &path, |
ce73fd50 | 99 | hash_set<basic_block> &local_visited_bbs, |
100 | loop_p loop) | |
2039eb2e | 101 | { |
102 | if (loop != start_bb->loop_father) | |
103 | return false; | |
104 | ||
105 | if (start_bb == end_bb) | |
106 | { | |
73b43bd5 | 107 | path.safe_push (start_bb); |
2039eb2e | 108 | return true; |
109 | } | |
110 | ||
ce73fd50 | 111 | if (!local_visited_bbs.add (start_bb)) |
2039eb2e | 112 | { |
113 | edge e; | |
114 | edge_iterator ei; | |
115 | FOR_EACH_EDGE (e, ei, start_bb->succs) | |
ce73fd50 | 116 | if (fsm_find_thread_path (e->dest, end_bb, path, local_visited_bbs, |
117 | loop)) | |
2039eb2e | 118 | { |
73b43bd5 | 119 | path.safe_push (start_bb); |
2039eb2e | 120 | return true; |
121 | } | |
122 | } | |
123 | ||
124 | return false; | |
125 | } | |
126 | ||
99583d05 | 127 | /* Examine jump threading path PATH to which we want to add BBI. |
128 | ||
129 | If the resulting path is profitable to thread, then return the | |
130 | final taken edge from the path, NULL otherwise. | |
131 | ||
132 | NAME is the SSA_NAME of the variable we found to have a constant | |
73b43bd5 | 133 | value on PATH. ARG is the constant value of NAME on that path. |
99583d05 | 134 | |
ce73fd50 | 135 | BBI will be appended to PATH when we have a profitable jump |
136 | threading path. Callers are responsible for removing BBI from PATH | |
137 | in that case. */ | |
99583d05 | 138 | |
ce73fd50 | 139 | edge |
140 | thread_jumps::profitable_jump_thread_path (basic_block bbi, tree name, | |
141 | tree arg, | |
142 | bool *creates_irreducible_loop) | |
99583d05 | 143 | { |
144 | /* Note BBI is not in the path yet, hence the +1 in the test below | |
145 | to make sure BBI is accounted for in the path length test. */ | |
6385d6d3 | 146 | |
147 | /* We can get a length of 0 here when the statement that | |
148 | makes a conditional generate a compile-time constant | |
149 | result is in the same block as the conditional. | |
150 | ||
151 | That's not really a jump threading opportunity, but instead is | |
152 | simple cprop & simplification. We could handle it here if we | |
153 | wanted by wiring up all the incoming edges. If we run this | |
154 | early in IPA, that might be worth doing. For now we just | |
155 | reject that case. */ | |
ce73fd50 | 156 | if (m_path.is_empty ()) |
6385d6d3 | 157 | return NULL; |
158 | ||
ce73fd50 | 159 | if (m_path.length () + 1 |
160 | > (unsigned) PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH)) | |
99583d05 | 161 | { |
162 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
163 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
164 | "the number of basic blocks on the path " | |
165 | "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n"); | |
166 | return NULL; | |
167 | } | |
168 | ||
ce73fd50 | 169 | if (m_max_threaded_paths <= 0) |
99583d05 | 170 | { |
171 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
172 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
173 | "the number of previously recorded FSM paths to " | |
174 | "thread exceeds PARAM_MAX_FSM_THREAD_PATHS.\n"); | |
175 | return NULL; | |
176 | } | |
177 | ||
178 | /* Add BBI to the path. | |
179 | From this point onward, if we decide we the path is not profitable | |
180 | to thread, we must remove BBI from the path. */ | |
ce73fd50 | 181 | m_path.safe_push (bbi); |
99583d05 | 182 | |
183 | int n_insns = 0; | |
184 | gimple_stmt_iterator gsi; | |
ce73fd50 | 185 | loop_p loop = m_path[0]->loop_father; |
99583d05 | 186 | bool path_crosses_loops = false; |
187 | bool threaded_through_latch = false; | |
188 | bool multiway_branch_in_path = false; | |
189 | bool threaded_multiway_branch = false; | |
0130b5f5 | 190 | bool contains_hot_bb = false; |
191 | ||
192 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
193 | fprintf (dump_file, "Checking profitability of path (backwards): "); | |
99583d05 | 194 | |
195 | /* Count the number of instructions on the path: as these instructions | |
196 | will have to be duplicated, we will not record the path if there | |
197 | are too many instructions on the path. Also check that all the | |
198 | blocks in the path belong to a single loop. */ | |
ce73fd50 | 199 | for (unsigned j = 0; j < m_path.length (); j++) |
99583d05 | 200 | { |
ce73fd50 | 201 | basic_block bb = m_path[j]; |
99583d05 | 202 | |
0130b5f5 | 203 | if (dump_file && (dump_flags & TDF_DETAILS)) |
204 | fprintf (dump_file, " bb:%i", bb->index); | |
99583d05 | 205 | /* Remember, blocks in the path are stored in opposite order |
206 | in the PATH array. The last entry in the array represents | |
207 | the block with an outgoing edge that we will redirect to the | |
208 | jump threading path. Thus we don't care about that block's | |
209 | loop father, nor how many statements are in that block because | |
210 | it will not be copied or whether or not it ends in a multiway | |
211 | branch. */ | |
ce73fd50 | 212 | if (j < m_path.length () - 1) |
99583d05 | 213 | { |
0130b5f5 | 214 | int orig_n_insns = n_insns; |
99583d05 | 215 | if (bb->loop_father != loop) |
216 | { | |
217 | path_crosses_loops = true; | |
218 | break; | |
219 | } | |
220 | ||
221 | /* PHIs in the path will create degenerate PHIS in the | |
222 | copied path which will then get propagated away, so | |
223 | looking at just the duplicate path the PHIs would | |
224 | seem unimportant. | |
225 | ||
226 | But those PHIs, because they're assignments to objects | |
227 | typically with lives that exist outside the thread path, | |
228 | will tend to generate PHIs (or at least new PHI arguments) | |
229 | at points where we leave the thread path and rejoin | |
230 | the original blocks. So we do want to account for them. | |
231 | ||
232 | We ignore virtual PHIs. We also ignore cases where BB | |
233 | has a single incoming edge. That's the most common | |
234 | degenerate PHI we'll see here. Finally we ignore PHIs | |
235 | that are associated with the value we're tracking as | |
236 | that object likely dies. */ | |
237 | if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1) | |
238 | { | |
239 | for (gphi_iterator gsip = gsi_start_phis (bb); | |
240 | !gsi_end_p (gsip); | |
241 | gsi_next (&gsip)) | |
242 | { | |
243 | gphi *phi = gsip.phi (); | |
244 | tree dst = gimple_phi_result (phi); | |
245 | ||
246 | /* Note that if both NAME and DST are anonymous | |
247 | SSA_NAMEs, then we do not have enough information | |
248 | to consider them associated. */ | |
7c7ac50d | 249 | if (dst != name |
250 | && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name) | |
251 | || !SSA_NAME_VAR (dst)) | |
99583d05 | 252 | && !virtual_operand_p (dst)) |
253 | ++n_insns; | |
254 | } | |
255 | } | |
256 | ||
ce73fd50 | 257 | if (!contains_hot_bb && m_speed_p) |
0130b5f5 | 258 | contains_hot_bb |= optimize_bb_for_speed_p (bb); |
99583d05 | 259 | for (gsi = gsi_after_labels (bb); |
260 | !gsi_end_p (gsi); | |
261 | gsi_next_nondebug (&gsi)) | |
262 | { | |
263 | gimple *stmt = gsi_stmt (gsi); | |
264 | /* Do not count empty statements and labels. */ | |
265 | if (gimple_code (stmt) != GIMPLE_NOP | |
266 | && !(gimple_code (stmt) == GIMPLE_ASSIGN | |
267 | && gimple_assign_rhs_code (stmt) == ASSERT_EXPR) | |
268 | && !is_gimple_debug (stmt)) | |
0130b5f5 | 269 | n_insns += estimate_num_insns (stmt, &eni_size_weights); |
99583d05 | 270 | } |
0130b5f5 | 271 | if (dump_file && (dump_flags & TDF_DETAILS)) |
272 | fprintf (dump_file, " (%i insns)", n_insns-orig_n_insns); | |
99583d05 | 273 | |
274 | /* We do not look at the block with the threaded branch | |
275 | in this loop. So if any block with a last statement that | |
276 | is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a | |
277 | multiway branch on our path. | |
278 | ||
279 | The block in PATH[0] is special, it's the block were we're | |
280 | going to be able to eliminate its branch. */ | |
281 | gimple *last = last_stmt (bb); | |
282 | if (last && (gimple_code (last) == GIMPLE_SWITCH | |
283 | || gimple_code (last) == GIMPLE_GOTO)) | |
284 | { | |
285 | if (j == 0) | |
286 | threaded_multiway_branch = true; | |
287 | else | |
288 | multiway_branch_in_path = true; | |
289 | } | |
290 | } | |
291 | ||
292 | /* Note if we thread through the latch, we will want to include | |
293 | the last entry in the array when determining if we thread | |
294 | through the loop latch. */ | |
295 | if (loop->latch == bb) | |
296 | threaded_through_latch = true; | |
297 | } | |
298 | ||
ce73fd50 | 299 | gimple *stmt = get_gimple_control_stmt (m_path[0]); |
e8272095 | 300 | gcc_assert (stmt); |
301 | ||
99583d05 | 302 | /* We are going to remove the control statement at the end of the |
303 | last block in the threading path. So don't count it against our | |
304 | statement count. */ | |
99583d05 | 305 | |
0130b5f5 | 306 | int stmt_insns = estimate_num_insns (stmt, &eni_size_weights); |
307 | n_insns-= stmt_insns; | |
308 | ||
309 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
310 | fprintf (dump_file, "\n Control statement insns: %i\n" | |
311 | " Overall: %i insns\n", | |
312 | stmt_insns, n_insns); | |
e8272095 | 313 | |
99583d05 | 314 | /* We have found a constant value for ARG. For GIMPLE_SWITCH |
315 | and GIMPLE_GOTO, we use it as-is. However, for a GIMPLE_COND | |
316 | we need to substitute, fold and simplify so we can determine | |
317 | the edge taken out of the last block. */ | |
318 | if (gimple_code (stmt) == GIMPLE_COND) | |
319 | { | |
320 | enum tree_code cond_code = gimple_cond_code (stmt); | |
321 | ||
322 | /* We know the underyling format of the condition. */ | |
323 | arg = fold_binary (cond_code, boolean_type_node, | |
324 | arg, gimple_cond_rhs (stmt)); | |
325 | } | |
326 | ||
327 | /* If this path threaded through the loop latch back into the | |
328 | same loop and the destination does not dominate the loop | |
329 | latch, then this thread would create an irreducible loop. | |
330 | ||
331 | We have to know the outgoing edge to figure this out. */ | |
ce73fd50 | 332 | edge taken_edge = find_taken_edge (m_path[0], arg); |
99583d05 | 333 | |
334 | /* There are cases where we may not be able to extract the | |
335 | taken edge. For example, a computed goto to an absolute | |
336 | address. Handle those cases gracefully. */ | |
337 | if (taken_edge == NULL) | |
338 | { | |
ce73fd50 | 339 | m_path.pop (); |
99583d05 | 340 | return NULL; |
341 | } | |
342 | ||
2f8d6297 | 343 | *creates_irreducible_loop = false; |
99583d05 | 344 | if (threaded_through_latch |
345 | && loop == taken_edge->dest->loop_father | |
346 | && (determine_bb_domination_status (loop, taken_edge->dest) | |
347 | == DOMST_NONDOMINATING)) | |
2f8d6297 | 348 | *creates_irreducible_loop = true; |
99583d05 | 349 | |
350 | if (path_crosses_loops) | |
351 | { | |
352 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
353 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
354 | "the path crosses loops.\n"); | |
ce73fd50 | 355 | m_path.pop (); |
99583d05 | 356 | return NULL; |
357 | } | |
358 | ||
0130b5f5 | 359 | /* Threading is profitable if the path duplicated is hot but also |
360 | in a case we separate cold path from hot path and permit optimization | |
361 | of the hot path later. Be on the agressive side here. In some testcases, | |
362 | as in PR 78407 this leads to noticeable improvements. */ | |
ce73fd50 | 363 | if (m_speed_p && (optimize_edge_for_speed_p (taken_edge) || contains_hot_bb)) |
e8272095 | 364 | { |
365 | if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS)) | |
366 | { | |
367 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
368 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
369 | "the number of instructions on the path " | |
370 | "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n"); | |
ce73fd50 | 371 | m_path.pop (); |
e8272095 | 372 | return NULL; |
373 | } | |
374 | } | |
375 | else if (n_insns > 1) | |
99583d05 | 376 | { |
377 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
378 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
e8272095 | 379 | "duplication of %i insns is needed and optimizing for size.\n", |
380 | n_insns); | |
ce73fd50 | 381 | m_path.pop (); |
99583d05 | 382 | return NULL; |
383 | } | |
384 | ||
385 | /* We avoid creating irreducible inner loops unless we thread through | |
386 | a multiway branch, in which case we have deemed it worth losing | |
387 | other loop optimizations later. | |
388 | ||
389 | We also consider it worth creating an irreducible inner loop if | |
390 | the number of copied statement is low relative to the length of | |
391 | the path -- in that case there's little the traditional loop | |
392 | optimizer would have done anyway, so an irreducible loop is not | |
393 | so bad. */ | |
2f8d6297 | 394 | if (!threaded_multiway_branch && *creates_irreducible_loop |
73b43bd5 | 395 | && (n_insns * (unsigned) PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS) |
ce73fd50 | 396 | > (m_path.length () * |
73b43bd5 | 397 | (unsigned) PARAM_VALUE (PARAM_FSM_SCALE_PATH_BLOCKS)))) |
99583d05 | 398 | |
399 | { | |
400 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
401 | fprintf (dump_file, | |
402 | "FSM would create irreducible loop without threading " | |
403 | "multiway branch.\n"); | |
ce73fd50 | 404 | m_path.pop (); |
99583d05 | 405 | return NULL; |
406 | } | |
407 | ||
408 | ||
409 | /* If this path does not thread through the loop latch, then we are | |
410 | using the FSM threader to find old style jump threads. This | |
411 | is good, except the FSM threader does not re-use an existing | |
412 | threading path to reduce code duplication. | |
413 | ||
414 | So for that case, drastically reduce the number of statements | |
415 | we are allowed to copy. */ | |
416 | if (!(threaded_through_latch && threaded_multiway_branch) | |
417 | && (n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS) | |
418 | >= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS))) | |
419 | { | |
420 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
421 | fprintf (dump_file, | |
422 | "FSM did not thread around loop and would copy too " | |
423 | "many statements.\n"); | |
ce73fd50 | 424 | m_path.pop (); |
99583d05 | 425 | return NULL; |
426 | } | |
427 | ||
428 | /* When there is a multi-way branch on the path, then threading can | |
429 | explode the CFG due to duplicating the edges for that multi-way | |
430 | branch. So like above, only allow a multi-way branch on the path | |
431 | if we actually thread a multi-way branch. */ | |
432 | if (!threaded_multiway_branch && multiway_branch_in_path) | |
433 | { | |
434 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
435 | fprintf (dump_file, | |
436 | "FSM Thread through multiway branch without threading " | |
437 | "a multiway branch.\n"); | |
ce73fd50 | 438 | m_path.pop (); |
99583d05 | 439 | return NULL; |
440 | } | |
441 | return taken_edge; | |
442 | } | |
443 | ||
ce73fd50 | 444 | /* The current path PATH is a vector of blocks forming a jump threading |
445 | path in reverse order. TAKEN_EDGE is the edge taken from path[0]. | |
4a40bded | 446 | |
ce73fd50 | 447 | Convert the current path into the form used by register_jump_thread and |
448 | register it. */ | |
4a40bded | 449 | |
ce73fd50 | 450 | void |
451 | thread_jumps::convert_and_register_current_path (edge taken_edge) | |
4a40bded | 452 | { |
453 | vec<jump_thread_edge *> *jump_thread_path = new vec<jump_thread_edge *> (); | |
454 | ||
455 | /* Record the edges between the blocks in PATH. */ | |
ce73fd50 | 456 | for (unsigned int j = 0; j + 1 < m_path.length (); j++) |
4a40bded | 457 | { |
ce73fd50 | 458 | basic_block bb1 = m_path[m_path.length () - j - 1]; |
459 | basic_block bb2 = m_path[m_path.length () - j - 2]; | |
955d947b | 460 | |
4a40bded | 461 | edge e = find_edge (bb1, bb2); |
462 | gcc_assert (e); | |
463 | jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD); | |
464 | jump_thread_path->safe_push (x); | |
465 | } | |
466 | ||
467 | /* Add the edge taken when the control variable has value ARG. */ | |
468 | jump_thread_edge *x | |
469 | = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK); | |
470 | jump_thread_path->safe_push (x); | |
471 | ||
472 | register_jump_thread (jump_thread_path); | |
ce73fd50 | 473 | --m_max_threaded_paths; |
4a40bded | 474 | } |
475 | ||
73b43bd5 | 476 | /* While following a chain of SSA_NAME definitions, we jumped from a |
477 | definition in LAST_BB to a definition in NEW_BB (walking | |
478 | backwards). | |
5fd049b7 | 479 | |
73b43bd5 | 480 | Verify there is a single path between the blocks and none of the |
481 | blocks in the path is already in VISITED_BBS. If so, then update | |
482 | VISISTED_BBS, add the new blocks to PATH and return TRUE. | |
483 | Otherwise return FALSE. | |
5fd049b7 | 484 | |
485 | Store the length of the subpath in NEXT_PATH_LENGTH. */ | |
486 | ||
ce73fd50 | 487 | bool |
488 | thread_jumps::check_subpath_and_update_thread_path (basic_block last_bb, | |
489 | basic_block new_bb, | |
490 | int *next_path_length) | |
5fd049b7 | 491 | { |
492 | edge e; | |
493 | int e_count = 0; | |
494 | edge_iterator ei; | |
73b43bd5 | 495 | auto_vec<basic_block> next_path; |
5fd049b7 | 496 | |
497 | FOR_EACH_EDGE (e, ei, last_bb->preds) | |
498 | { | |
ce73fd50 | 499 | hash_set<basic_block> local_visited_bbs; |
5fd049b7 | 500 | |
ce73fd50 | 501 | if (fsm_find_thread_path (new_bb, e->src, next_path, |
502 | local_visited_bbs, e->src->loop_father)) | |
5fd049b7 | 503 | ++e_count; |
504 | ||
5fd049b7 | 505 | /* If there is more than one path, stop. */ |
506 | if (e_count > 1) | |
73b43bd5 | 507 | return false; |
5fd049b7 | 508 | } |
509 | ||
510 | /* Stop if we have not found a path: this could occur when the recursion | |
511 | is stopped by one of the bounds. */ | |
512 | if (e_count == 0) | |
73b43bd5 | 513 | return false; |
5fd049b7 | 514 | |
515 | /* Make sure we haven't already visited any of the nodes in | |
516 | NEXT_PATH. Don't add them here to avoid pollution. */ | |
73b43bd5 | 517 | for (unsigned int i = 0; i + 1 < next_path.length (); i++) |
5fd049b7 | 518 | { |
ce73fd50 | 519 | if (m_visited_bbs.contains (next_path[i])) |
73b43bd5 | 520 | return false; |
5fd049b7 | 521 | } |
522 | ||
523 | /* Now add the nodes to VISISTED_BBS. */ | |
73b43bd5 | 524 | for (unsigned int i = 0; i + 1 < next_path.length (); i++) |
ce73fd50 | 525 | m_visited_bbs.add (next_path[i]); |
5fd049b7 | 526 | |
527 | /* Append all the nodes from NEXT_PATH to PATH. */ | |
ce73fd50 | 528 | m_path.safe_splice (next_path); |
73b43bd5 | 529 | *next_path_length = next_path.length (); |
5fd049b7 | 530 | |
531 | return true; | |
532 | } | |
533 | ||
73b43bd5 | 534 | /* If this is a profitable jump thread path, register it. |
535 | ||
536 | NAME is an SSA NAME with a possible constant value of ARG on PATH. | |
537 | ||
ce73fd50 | 538 | DEF_BB is the basic block that ultimately defines the constant. */ |
73b43bd5 | 539 | |
ce73fd50 | 540 | void |
541 | thread_jumps::register_jump_thread_path_if_profitable (tree name, tree arg, | |
542 | basic_block def_bb) | |
73b43bd5 | 543 | { |
544 | if (TREE_CODE_CLASS (TREE_CODE (arg)) != tcc_constant) | |
545 | return; | |
546 | ||
547 | bool irreducible = false; | |
ce73fd50 | 548 | edge taken_edge = profitable_jump_thread_path (def_bb, name, arg, |
549 | &irreducible); | |
73b43bd5 | 550 | if (taken_edge) |
551 | { | |
ce73fd50 | 552 | convert_and_register_current_path (taken_edge); |
553 | m_path.pop (); | |
73b43bd5 | 554 | |
555 | if (irreducible) | |
ce73fd50 | 556 | vect_free_loop_info_assumptions (m_path[0]->loop_father); |
73b43bd5 | 557 | } |
558 | } | |
559 | ||
73b43bd5 | 560 | /* Given PHI which defines NAME in block DEF_BB, recurse through the |
5fd049b7 | 561 | PHI's arguments searching for paths where NAME will ultimately have |
562 | a constant value. | |
563 | ||
5fd049b7 | 564 | PATH contains the series of blocks to traverse that will result in |
ce73fd50 | 565 | NAME having a constant value. */ |
5fd049b7 | 566 | |
ce73fd50 | 567 | void |
568 | thread_jumps::handle_phi (gphi *phi, tree name, basic_block def_bb) | |
5fd049b7 | 569 | { |
570 | /* Iterate over the arguments of PHI. */ | |
571 | for (unsigned int i = 0; i < gimple_phi_num_args (phi); i++) | |
572 | { | |
573 | tree arg = gimple_phi_arg_def (phi, i); | |
574 | basic_block bbi = gimple_phi_arg_edge (phi, i)->src; | |
575 | ||
576 | /* Skip edges pointing outside the current loop. */ | |
73b43bd5 | 577 | if (!arg || def_bb->loop_father != bbi->loop_father) |
5fd049b7 | 578 | continue; |
579 | ||
580 | if (TREE_CODE (arg) == SSA_NAME) | |
581 | { | |
ce73fd50 | 582 | m_path.safe_push (bbi); |
5fd049b7 | 583 | /* Recursively follow SSA_NAMEs looking for a constant |
584 | definition. */ | |
ce73fd50 | 585 | fsm_find_control_statement_thread_paths (arg); |
5fd049b7 | 586 | |
ce73fd50 | 587 | m_path.pop (); |
5fd049b7 | 588 | continue; |
589 | } | |
590 | ||
ce73fd50 | 591 | register_jump_thread_path_if_profitable (name, arg, bbi); |
5fd049b7 | 592 | } |
593 | } | |
594 | ||
595 | /* Return TRUE if STMT is a gimple assignment we want to either directly | |
596 | handle or recurse through. Return FALSE otherwise. | |
597 | ||
598 | Note that adding more cases here requires adding cases to handle_assignment | |
599 | below. */ | |
600 | ||
601 | static bool | |
602 | handle_assignment_p (gimple *stmt) | |
603 | { | |
604 | if (is_gimple_assign (stmt)) | |
605 | { | |
606 | enum tree_code def_code = gimple_assign_rhs_code (stmt); | |
607 | ||
608 | /* If the RHS is an SSA_NAME, then we will recurse through it. | |
609 | Go ahead and filter out cases where the SSA_NAME is a default | |
610 | definition. There's little to be gained by trying to handle that. */ | |
611 | if (def_code == SSA_NAME | |
612 | && !SSA_NAME_IS_DEFAULT_DEF (gimple_assign_rhs1 (stmt))) | |
613 | return true; | |
614 | ||
615 | /* If the RHS is a constant, then it's a terminal that we'll want | |
616 | to handle as well. */ | |
617 | if (TREE_CODE_CLASS (def_code) == tcc_constant) | |
618 | return true; | |
619 | } | |
620 | ||
621 | /* Anything not explicitly allowed is not handled. */ | |
622 | return false; | |
623 | } | |
624 | ||
73b43bd5 | 625 | /* Given STMT which defines NAME in block DEF_BB, recurse through the |
5fd049b7 | 626 | PHI's arguments searching for paths where NAME will ultimately have |
627 | a constant value. | |
628 | ||
5fd049b7 | 629 | PATH contains the series of blocks to traverse that will result in |
ce73fd50 | 630 | NAME having a constant value. */ |
5fd049b7 | 631 | |
ce73fd50 | 632 | void |
633 | thread_jumps::handle_assignment (gimple *stmt, tree name, basic_block def_bb) | |
5fd049b7 | 634 | { |
635 | tree arg = gimple_assign_rhs1 (stmt); | |
636 | ||
637 | if (TREE_CODE (arg) == SSA_NAME) | |
ce73fd50 | 638 | fsm_find_control_statement_thread_paths (arg); |
5fd049b7 | 639 | |
640 | else | |
641 | { | |
73b43bd5 | 642 | /* register_jump_thread_path_if_profitable will push the current |
5fd049b7 | 643 | block onto the path. But the path will always have the current |
644 | block at this point. So we can just pop it. */ | |
ce73fd50 | 645 | m_path.pop (); |
5fd049b7 | 646 | |
ce73fd50 | 647 | register_jump_thread_path_if_profitable (name, arg, def_bb); |
5fd049b7 | 648 | |
649 | /* And put the current block back onto the path so that the | |
650 | state of the stack is unchanged when we leave. */ | |
ce73fd50 | 651 | m_path.safe_push (def_bb); |
5fd049b7 | 652 | } |
653 | } | |
654 | ||
73b43bd5 | 655 | /* We trace the value of the SSA_NAME NAME back through any phi nodes |
656 | looking for places where it gets a constant value and save the | |
ce73fd50 | 657 | path. */ |
2039eb2e | 658 | |
ce73fd50 | 659 | void |
660 | thread_jumps::fsm_find_control_statement_thread_paths (tree name) | |
2039eb2e | 661 | { |
56d4d876 | 662 | /* If NAME appears in an abnormal PHI, then don't try to trace its |
663 | value back through PHI nodes. */ | |
664 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) | |
665 | return; | |
666 | ||
f08943b6 | 667 | gimple *def_stmt = SSA_NAME_DEF_STMT (name); |
73b43bd5 | 668 | basic_block def_bb = gimple_bb (def_stmt); |
2039eb2e | 669 | |
73b43bd5 | 670 | if (def_bb == NULL) |
2039eb2e | 671 | return; |
672 | ||
4a40bded | 673 | /* We allow the SSA chain to contains PHIs and simple copies and constant |
674 | initializations. */ | |
499b8575 | 675 | if (gimple_code (def_stmt) != GIMPLE_PHI |
4a40bded | 676 | && gimple_code (def_stmt) != GIMPLE_ASSIGN) |
677 | return; | |
678 | ||
679 | if (gimple_code (def_stmt) == GIMPLE_PHI | |
680 | && (gimple_phi_num_args (def_stmt) | |
499b8575 | 681 | >= (unsigned) PARAM_VALUE (PARAM_FSM_MAXIMUM_PHI_ARGUMENTS))) |
2039eb2e | 682 | return; |
683 | ||
5fd049b7 | 684 | if (is_gimple_assign (def_stmt) |
685 | && ! handle_assignment_p (def_stmt)) | |
4a40bded | 686 | return; |
687 | ||
2039eb2e | 688 | /* Avoid infinite recursion. */ |
ce73fd50 | 689 | if (m_visited_bbs.add (def_bb)) |
2039eb2e | 690 | return; |
691 | ||
2039eb2e | 692 | int next_path_length = 0; |
ce73fd50 | 693 | basic_block last_bb_in_path = m_path.last (); |
2039eb2e | 694 | |
4a40bded | 695 | if (loop_containing_stmt (def_stmt)->header == gimple_bb (def_stmt)) |
2039eb2e | 696 | { |
697 | /* Do not walk through more than one loop PHI node. */ | |
ce73fd50 | 698 | if (m_seen_loop_phi) |
2039eb2e | 699 | return; |
ce73fd50 | 700 | m_seen_loop_phi = true; |
2039eb2e | 701 | } |
702 | ||
703 | /* Following the chain of SSA_NAME definitions, we jumped from a definition in | |
73b43bd5 | 704 | LAST_BB_IN_PATH to a definition in DEF_BB. When these basic blocks are |
705 | different, append to PATH the blocks from LAST_BB_IN_PATH to DEF_BB. */ | |
706 | if (def_bb != last_bb_in_path) | |
2039eb2e | 707 | { |
73b43bd5 | 708 | /* When DEF_BB == LAST_BB_IN_PATH, then the first block in the path |
650fa032 | 709 | will already be in VISITED_BBS. When they are not equal, then we |
710 | must ensure that first block is accounted for to ensure we do not | |
711 | create bogus jump threading paths. */ | |
ce73fd50 | 712 | m_visited_bbs.add (m_path[0]); |
73b43bd5 | 713 | if (!check_subpath_and_update_thread_path (last_bb_in_path, def_bb, |
5fd049b7 | 714 | &next_path_length)) |
715 | return; | |
2039eb2e | 716 | } |
717 | ||
ce73fd50 | 718 | gcc_assert (m_path.last () == def_bb); |
2039eb2e | 719 | |
4a40bded | 720 | if (gimple_code (def_stmt) == GIMPLE_PHI) |
ce73fd50 | 721 | handle_phi (as_a <gphi *> (def_stmt), name, def_bb); |
4a40bded | 722 | else if (gimple_code (def_stmt) == GIMPLE_ASSIGN) |
ce73fd50 | 723 | handle_assignment (def_stmt, name, def_bb); |
2039eb2e | 724 | |
725 | /* Remove all the nodes that we added from NEXT_PATH. */ | |
726 | if (next_path_length) | |
ce73fd50 | 727 | m_path.truncate (m_path.length () - next_path_length); |
2039eb2e | 728 | } |
729 | ||
730 | /* Search backwards from BB looking for paths where NAME (an SSA_NAME) | |
731 | is a constant. Record such paths for jump threading. | |
732 | ||
733 | It is assumed that BB ends with a control statement and that by | |
a18b7a33 | 734 | finding a path where NAME is a constant, we can thread the path. |
eecc0d1e | 735 | SPEED_P indicates that we could increase code size to improve the |
73b43bd5 | 736 | code path. */ |
2039eb2e | 737 | |
ce73fd50 | 738 | void |
739 | thread_jumps::find_jump_threads_backwards (basic_block bb, bool speed_p) | |
2039eb2e | 740 | { |
372172fe | 741 | gimple *stmt = get_gimple_control_stmt (bb); |
580efa23 | 742 | if (!stmt) |
743 | return; | |
744 | ||
745 | enum gimple_code code = gimple_code (stmt); | |
746 | tree name = NULL; | |
747 | if (code == GIMPLE_SWITCH) | |
748 | name = gimple_switch_index (as_a <gswitch *> (stmt)); | |
749 | else if (code == GIMPLE_GOTO) | |
750 | name = gimple_goto_dest (stmt); | |
751 | else if (code == GIMPLE_COND) | |
752 | { | |
753 | if (TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME | |
5fd049b7 | 754 | && TREE_CODE_CLASS (TREE_CODE (gimple_cond_rhs (stmt))) == tcc_constant |
580efa23 | 755 | && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt))) |
756 | || POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt))))) | |
757 | name = gimple_cond_lhs (stmt); | |
758 | } | |
759 | ||
760 | if (!name || TREE_CODE (name) != SSA_NAME) | |
761 | return; | |
762 | ||
ce73fd50 | 763 | /* Initialize pass local data that's different for each BB. */ |
764 | m_path.truncate (0); | |
765 | m_path.safe_push (bb); | |
766 | m_visited_bbs.empty (); | |
767 | m_seen_loop_phi = false; | |
768 | m_speed_p = speed_p; | |
769 | m_max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS); | |
2039eb2e | 770 | |
ce73fd50 | 771 | fsm_find_control_statement_thread_paths (name); |
580efa23 | 772 | } |
372172fe | 773 | |
774 | namespace { | |
775 | ||
776 | const pass_data pass_data_thread_jumps = | |
777 | { | |
778 | GIMPLE_PASS, | |
779 | "thread", | |
780 | OPTGROUP_NONE, | |
781 | TV_TREE_SSA_THREAD_JUMPS, | |
782 | ( PROP_cfg | PROP_ssa ), | |
783 | 0, | |
784 | 0, | |
785 | 0, | |
f454033a | 786 | TODO_update_ssa, |
372172fe | 787 | }; |
788 | ||
789 | class pass_thread_jumps : public gimple_opt_pass | |
790 | { | |
791 | public: | |
792 | pass_thread_jumps (gcc::context *ctxt) | |
793 | : gimple_opt_pass (pass_data_thread_jumps, ctxt) | |
794 | {} | |
795 | ||
796 | opt_pass * clone (void) { return new pass_thread_jumps (m_ctxt); } | |
797 | virtual bool gate (function *); | |
798 | virtual unsigned int execute (function *); | |
799 | }; | |
800 | ||
801 | bool | |
802 | pass_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED) | |
803 | { | |
e8272095 | 804 | return flag_expensive_optimizations; |
372172fe | 805 | } |
806 | ||
807 | ||
808 | unsigned int | |
809 | pass_thread_jumps::execute (function *fun) | |
810 | { | |
f454033a | 811 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES); |
812 | ||
372172fe | 813 | /* Try to thread each block with more than one successor. */ |
ce73fd50 | 814 | thread_jumps threader; |
372172fe | 815 | basic_block bb; |
816 | FOR_EACH_BB_FN (bb, fun) | |
817 | { | |
818 | if (EDGE_COUNT (bb->succs) > 1) | |
ce73fd50 | 819 | threader.find_jump_threads_backwards (bb, true); |
372172fe | 820 | } |
f454033a | 821 | bool changed = thread_through_all_blocks (true); |
822 | ||
823 | loop_optimizer_finalize (); | |
824 | return changed ? TODO_cleanup_cfg : 0; | |
372172fe | 825 | } |
826 | ||
827 | } | |
828 | ||
829 | gimple_opt_pass * | |
830 | make_pass_thread_jumps (gcc::context *ctxt) | |
831 | { | |
832 | return new pass_thread_jumps (ctxt); | |
833 | } | |
a18b7a33 | 834 | |
835 | namespace { | |
836 | ||
837 | const pass_data pass_data_early_thread_jumps = | |
838 | { | |
839 | GIMPLE_PASS, | |
840 | "ethread", | |
841 | OPTGROUP_NONE, | |
842 | TV_TREE_SSA_THREAD_JUMPS, | |
843 | ( PROP_cfg | PROP_ssa ), | |
844 | 0, | |
845 | 0, | |
846 | 0, | |
847 | ( TODO_cleanup_cfg | TODO_update_ssa ), | |
848 | }; | |
849 | ||
850 | class pass_early_thread_jumps : public gimple_opt_pass | |
851 | { | |
852 | public: | |
853 | pass_early_thread_jumps (gcc::context *ctxt) | |
854 | : gimple_opt_pass (pass_data_early_thread_jumps, ctxt) | |
855 | {} | |
856 | ||
857 | opt_pass * clone (void) { return new pass_early_thread_jumps (m_ctxt); } | |
858 | virtual bool gate (function *); | |
859 | virtual unsigned int execute (function *); | |
860 | }; | |
861 | ||
862 | bool | |
863 | pass_early_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED) | |
864 | { | |
865 | return true; | |
866 | } | |
867 | ||
868 | ||
869 | unsigned int | |
870 | pass_early_thread_jumps::execute (function *fun) | |
871 | { | |
0afc9b47 | 872 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); |
873 | ||
a18b7a33 | 874 | /* Try to thread each block with more than one successor. */ |
ce73fd50 | 875 | thread_jumps threader; |
a18b7a33 | 876 | basic_block bb; |
877 | FOR_EACH_BB_FN (bb, fun) | |
878 | { | |
879 | if (EDGE_COUNT (bb->succs) > 1) | |
ce73fd50 | 880 | threader.find_jump_threads_backwards (bb, false); |
a18b7a33 | 881 | } |
882 | thread_through_all_blocks (true); | |
0afc9b47 | 883 | |
884 | loop_optimizer_finalize (); | |
a18b7a33 | 885 | return 0; |
886 | } | |
887 | ||
888 | } | |
889 | ||
890 | gimple_opt_pass * | |
891 | make_pass_early_thread_jumps (gcc::context *ctxt) | |
892 | { | |
893 | return new pass_early_thread_jumps (ctxt); | |
894 | } |