]>
Commit | Line | Data |
---|---|---|
0732f75f | 1 | /* SSA Jump Threading |
a5544970 | 2 | Copyright (C) 2005-2019 Free Software Foundation, Inc. |
0732f75f JL |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "backend.h" | |
24 | #include "predict.h" | |
25 | #include "tree.h" | |
26 | #include "gimple.h" | |
27 | #include "fold-const.h" | |
28 | #include "cfgloop.h" | |
29 | #include "gimple-iterator.h" | |
30 | #include "tree-cfg.h" | |
31 | #include "tree-ssa-threadupdate.h" | |
32 | #include "params.h" | |
33 | #include "tree-ssa-loop.h" | |
34 | #include "cfganal.h" | |
35 | #include "tree-pass.h" | |
8b2ef235 JL |
36 | #include "gimple-ssa.h" |
37 | #include "tree-phinodes.h" | |
27bddc4a | 38 | #include "tree-inline.h" |
0a4e5cf3 | 39 | #include "tree-vectorizer.h" |
0732f75f | 40 | |
cde30fe0 AH |
41 | class thread_jumps |
42 | { | |
43 | public: | |
44 | void find_jump_threads_backwards (basic_block bb, bool speed_p); | |
45 | private: | |
46 | edge profitable_jump_thread_path (basic_block bbi, tree name, tree arg, | |
47 | bool *creates_irreducible_loop); | |
48 | void convert_and_register_current_path (edge taken_edge); | |
49 | void register_jump_thread_path_if_profitable (tree name, tree arg, | |
50 | basic_block def_bb); | |
51 | void handle_assignment (gimple *stmt, tree name, basic_block def_bb); | |
52 | void handle_phi (gphi *phi, tree name, basic_block def_bb); | |
53 | void fsm_find_control_statement_thread_paths (tree name); | |
54 | bool check_subpath_and_update_thread_path (basic_block last_bb, | |
55 | basic_block new_bb, | |
56 | int *next_path_length); | |
57 | ||
58 | /* Maximum number of BBs we are allowed to thread. */ | |
59 | int m_max_threaded_paths; | |
60 | /* Hash to keep track of seen bbs. */ | |
61 | hash_set<basic_block> m_visited_bbs; | |
62 | /* Current path we're analyzing. */ | |
63 | auto_vec<basic_block> m_path; | |
64 | /* Tracks if we have recursed through a loop PHI node. */ | |
65 | bool m_seen_loop_phi; | |
66 | /* Indicate that we could increase code size to improve the | |
67 | code path. */ | |
68 | bool m_speed_p; | |
69 | }; | |
0732f75f | 70 | |
3d466672 | 71 | /* Simple helper to get the last statement from BB, which is assumed |
21f0717a JL |
72 | to be a control statement. Return NULL if the last statement is |
73 | not a control statement. */ | |
74 | ||
3d466672 JL |
75 | static gimple * |
76 | get_gimple_control_stmt (basic_block bb) | |
77 | { | |
21f0717a | 78 | gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); |
3d466672 JL |
79 | |
80 | if (gsi_end_p (gsi)) | |
81 | return NULL; | |
82 | ||
83 | gimple *stmt = gsi_stmt (gsi); | |
84 | enum gimple_code code = gimple_code (stmt); | |
21f0717a JL |
85 | if (code == GIMPLE_COND || code == GIMPLE_SWITCH || code == GIMPLE_GOTO) |
86 | return stmt; | |
87 | return NULL; | |
3d466672 JL |
88 | } |
89 | ||
cff44c10 AH |
90 | /* Return true if the CFG contains at least one path from START_BB to |
91 | END_BB. When a path is found, record in PATH the blocks from | |
cde30fe0 AH |
92 | END_BB to START_BB. LOCAL_VISITED_BBS is used to make sure we |
93 | don't fall into an infinite loop. Bound the recursion to basic | |
94 | blocks belonging to LOOP. */ | |
0732f75f JL |
95 | |
96 | static bool | |
97 | fsm_find_thread_path (basic_block start_bb, basic_block end_bb, | |
cff44c10 | 98 | vec<basic_block> &path, |
cde30fe0 AH |
99 | hash_set<basic_block> &local_visited_bbs, |
100 | loop_p loop) | |
0732f75f JL |
101 | { |
102 | if (loop != start_bb->loop_father) | |
103 | return false; | |
104 | ||
105 | if (start_bb == end_bb) | |
106 | { | |
cff44c10 | 107 | path.safe_push (start_bb); |
0732f75f JL |
108 | return true; |
109 | } | |
110 | ||
cde30fe0 | 111 | if (!local_visited_bbs.add (start_bb)) |
0732f75f JL |
112 | { |
113 | edge e; | |
114 | edge_iterator ei; | |
115 | FOR_EACH_EDGE (e, ei, start_bb->succs) | |
cde30fe0 AH |
116 | if (fsm_find_thread_path (e->dest, end_bb, path, local_visited_bbs, |
117 | loop)) | |
0732f75f | 118 | { |
cff44c10 | 119 | path.safe_push (start_bb); |
0732f75f JL |
120 | return true; |
121 | } | |
122 | } | |
123 | ||
124 | return false; | |
125 | } | |
126 | ||
bb5e62d6 JL |
127 | /* Examine jump threading path PATH to which we want to add BBI. |
128 | ||
129 | If the resulting path is profitable to thread, then return the | |
130 | final taken edge from the path, NULL otherwise. | |
131 | ||
132 | NAME is the SSA_NAME of the variable we found to have a constant | |
cff44c10 | 133 | value on PATH. ARG is the constant value of NAME on that path. |
bb5e62d6 | 134 | |
cde30fe0 AH |
135 | BBI will be appended to PATH when we have a profitable jump |
136 | threading path. Callers are responsible for removing BBI from PATH | |
137 | in that case. */ | |
bb5e62d6 | 138 | |
cde30fe0 AH |
139 | edge |
140 | thread_jumps::profitable_jump_thread_path (basic_block bbi, tree name, | |
141 | tree arg, | |
142 | bool *creates_irreducible_loop) | |
bb5e62d6 JL |
143 | { |
144 | /* Note BBI is not in the path yet, hence the +1 in the test below | |
145 | to make sure BBI is accounted for in the path length test. */ | |
ad071b2b JL |
146 | |
147 | /* We can get a length of 0 here when the statement that | |
148 | makes a conditional generate a compile-time constant | |
149 | result is in the same block as the conditional. | |
150 | ||
151 | That's not really a jump threading opportunity, but instead is | |
152 | simple cprop & simplification. We could handle it here if we | |
153 | wanted by wiring up all the incoming edges. If we run this | |
154 | early in IPA, that might be worth doing. For now we just | |
155 | reject that case. */ | |
cde30fe0 | 156 | if (m_path.is_empty ()) |
ad071b2b JL |
157 | return NULL; |
158 | ||
cde30fe0 AH |
159 | if (m_path.length () + 1 |
160 | > (unsigned) PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH)) | |
bb5e62d6 JL |
161 | { |
162 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
163 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
164 | "the number of basic blocks on the path " | |
165 | "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n"); | |
166 | return NULL; | |
167 | } | |
168 | ||
cde30fe0 | 169 | if (m_max_threaded_paths <= 0) |
bb5e62d6 JL |
170 | { |
171 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
172 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
173 | "the number of previously recorded FSM paths to " | |
174 | "thread exceeds PARAM_MAX_FSM_THREAD_PATHS.\n"); | |
175 | return NULL; | |
176 | } | |
177 | ||
178 | /* Add BBI to the path. | |
179 | From this point onward, if we decide we the path is not profitable | |
180 | to thread, we must remove BBI from the path. */ | |
cde30fe0 | 181 | m_path.safe_push (bbi); |
bb5e62d6 JL |
182 | |
183 | int n_insns = 0; | |
184 | gimple_stmt_iterator gsi; | |
cde30fe0 | 185 | loop_p loop = m_path[0]->loop_father; |
bb5e62d6 JL |
186 | bool path_crosses_loops = false; |
187 | bool threaded_through_latch = false; | |
188 | bool multiway_branch_in_path = false; | |
189 | bool threaded_multiway_branch = false; | |
0f0c2cc3 JH |
190 | bool contains_hot_bb = false; |
191 | ||
192 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
193 | fprintf (dump_file, "Checking profitability of path (backwards): "); | |
bb5e62d6 JL |
194 | |
195 | /* Count the number of instructions on the path: as these instructions | |
196 | will have to be duplicated, we will not record the path if there | |
197 | are too many instructions on the path. Also check that all the | |
198 | blocks in the path belong to a single loop. */ | |
cde30fe0 | 199 | for (unsigned j = 0; j < m_path.length (); j++) |
bb5e62d6 | 200 | { |
cde30fe0 | 201 | basic_block bb = m_path[j]; |
bb5e62d6 | 202 | |
0f0c2cc3 JH |
203 | if (dump_file && (dump_flags & TDF_DETAILS)) |
204 | fprintf (dump_file, " bb:%i", bb->index); | |
bb5e62d6 JL |
205 | /* Remember, blocks in the path are stored in opposite order |
206 | in the PATH array. The last entry in the array represents | |
207 | the block with an outgoing edge that we will redirect to the | |
208 | jump threading path. Thus we don't care about that block's | |
209 | loop father, nor how many statements are in that block because | |
210 | it will not be copied or whether or not it ends in a multiway | |
211 | branch. */ | |
cde30fe0 | 212 | if (j < m_path.length () - 1) |
bb5e62d6 | 213 | { |
0f0c2cc3 | 214 | int orig_n_insns = n_insns; |
bb5e62d6 JL |
215 | if (bb->loop_father != loop) |
216 | { | |
217 | path_crosses_loops = true; | |
218 | break; | |
219 | } | |
220 | ||
221 | /* PHIs in the path will create degenerate PHIS in the | |
222 | copied path which will then get propagated away, so | |
223 | looking at just the duplicate path the PHIs would | |
224 | seem unimportant. | |
225 | ||
226 | But those PHIs, because they're assignments to objects | |
227 | typically with lives that exist outside the thread path, | |
228 | will tend to generate PHIs (or at least new PHI arguments) | |
229 | at points where we leave the thread path and rejoin | |
230 | the original blocks. So we do want to account for them. | |
231 | ||
232 | We ignore virtual PHIs. We also ignore cases where BB | |
233 | has a single incoming edge. That's the most common | |
234 | degenerate PHI we'll see here. Finally we ignore PHIs | |
235 | that are associated with the value we're tracking as | |
236 | that object likely dies. */ | |
237 | if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1) | |
238 | { | |
239 | for (gphi_iterator gsip = gsi_start_phis (bb); | |
240 | !gsi_end_p (gsip); | |
241 | gsi_next (&gsip)) | |
242 | { | |
243 | gphi *phi = gsip.phi (); | |
244 | tree dst = gimple_phi_result (phi); | |
245 | ||
246 | /* Note that if both NAME and DST are anonymous | |
247 | SSA_NAMEs, then we do not have enough information | |
248 | to consider them associated. */ | |
fda5c810 RB |
249 | if (dst != name |
250 | && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name) | |
251 | || !SSA_NAME_VAR (dst)) | |
bb5e62d6 JL |
252 | && !virtual_operand_p (dst)) |
253 | ++n_insns; | |
254 | } | |
255 | } | |
256 | ||
cde30fe0 | 257 | if (!contains_hot_bb && m_speed_p) |
0f0c2cc3 | 258 | contains_hot_bb |= optimize_bb_for_speed_p (bb); |
bb5e62d6 JL |
259 | for (gsi = gsi_after_labels (bb); |
260 | !gsi_end_p (gsi); | |
261 | gsi_next_nondebug (&gsi)) | |
262 | { | |
263 | gimple *stmt = gsi_stmt (gsi); | |
120a01d1 TV |
264 | if (gimple_call_internal_p (stmt, IFN_UNIQUE)) |
265 | { | |
266 | m_path.pop (); | |
267 | return NULL; | |
268 | } | |
bb5e62d6 JL |
269 | /* Do not count empty statements and labels. */ |
270 | if (gimple_code (stmt) != GIMPLE_NOP | |
271 | && !(gimple_code (stmt) == GIMPLE_ASSIGN | |
272 | && gimple_assign_rhs_code (stmt) == ASSERT_EXPR) | |
273 | && !is_gimple_debug (stmt)) | |
0f0c2cc3 | 274 | n_insns += estimate_num_insns (stmt, &eni_size_weights); |
bb5e62d6 | 275 | } |
0f0c2cc3 JH |
276 | if (dump_file && (dump_flags & TDF_DETAILS)) |
277 | fprintf (dump_file, " (%i insns)", n_insns-orig_n_insns); | |
bb5e62d6 JL |
278 | |
279 | /* We do not look at the block with the threaded branch | |
280 | in this loop. So if any block with a last statement that | |
281 | is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a | |
282 | multiway branch on our path. | |
283 | ||
284 | The block in PATH[0] is special, it's the block were we're | |
285 | going to be able to eliminate its branch. */ | |
286 | gimple *last = last_stmt (bb); | |
287 | if (last && (gimple_code (last) == GIMPLE_SWITCH | |
288 | || gimple_code (last) == GIMPLE_GOTO)) | |
289 | { | |
290 | if (j == 0) | |
291 | threaded_multiway_branch = true; | |
292 | else | |
293 | multiway_branch_in_path = true; | |
294 | } | |
295 | } | |
296 | ||
297 | /* Note if we thread through the latch, we will want to include | |
298 | the last entry in the array when determining if we thread | |
299 | through the loop latch. */ | |
300 | if (loop->latch == bb) | |
301 | threaded_through_latch = true; | |
302 | } | |
303 | ||
cde30fe0 | 304 | gimple *stmt = get_gimple_control_stmt (m_path[0]); |
27bddc4a JH |
305 | gcc_assert (stmt); |
306 | ||
bb5e62d6 JL |
307 | /* We are going to remove the control statement at the end of the |
308 | last block in the threading path. So don't count it against our | |
309 | statement count. */ | |
bb5e62d6 | 310 | |
0f0c2cc3 JH |
311 | int stmt_insns = estimate_num_insns (stmt, &eni_size_weights); |
312 | n_insns-= stmt_insns; | |
313 | ||
314 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
315 | fprintf (dump_file, "\n Control statement insns: %i\n" | |
316 | " Overall: %i insns\n", | |
317 | stmt_insns, n_insns); | |
27bddc4a | 318 | |
bb5e62d6 JL |
319 | /* We have found a constant value for ARG. For GIMPLE_SWITCH |
320 | and GIMPLE_GOTO, we use it as-is. However, for a GIMPLE_COND | |
321 | we need to substitute, fold and simplify so we can determine | |
322 | the edge taken out of the last block. */ | |
323 | if (gimple_code (stmt) == GIMPLE_COND) | |
324 | { | |
325 | enum tree_code cond_code = gimple_cond_code (stmt); | |
326 | ||
327 | /* We know the underyling format of the condition. */ | |
328 | arg = fold_binary (cond_code, boolean_type_node, | |
329 | arg, gimple_cond_rhs (stmt)); | |
330 | } | |
331 | ||
332 | /* If this path threaded through the loop latch back into the | |
333 | same loop and the destination does not dominate the loop | |
334 | latch, then this thread would create an irreducible loop. | |
335 | ||
336 | We have to know the outgoing edge to figure this out. */ | |
cde30fe0 | 337 | edge taken_edge = find_taken_edge (m_path[0], arg); |
bb5e62d6 JL |
338 | |
339 | /* There are cases where we may not be able to extract the | |
340 | taken edge. For example, a computed goto to an absolute | |
341 | address. Handle those cases gracefully. */ | |
342 | if (taken_edge == NULL) | |
343 | { | |
cde30fe0 | 344 | m_path.pop (); |
bb5e62d6 JL |
345 | return NULL; |
346 | } | |
347 | ||
0a4e5cf3 | 348 | *creates_irreducible_loop = false; |
bb5e62d6 JL |
349 | if (threaded_through_latch |
350 | && loop == taken_edge->dest->loop_father | |
351 | && (determine_bb_domination_status (loop, taken_edge->dest) | |
352 | == DOMST_NONDOMINATING)) | |
0a4e5cf3 | 353 | *creates_irreducible_loop = true; |
bb5e62d6 JL |
354 | |
355 | if (path_crosses_loops) | |
356 | { | |
357 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
358 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
359 | "the path crosses loops.\n"); | |
cde30fe0 | 360 | m_path.pop (); |
bb5e62d6 JL |
361 | return NULL; |
362 | } | |
363 | ||
0f0c2cc3 JH |
364 | /* Threading is profitable if the path duplicated is hot but also |
365 | in a case we separate cold path from hot path and permit optimization | |
366 | of the hot path later. Be on the agressive side here. In some testcases, | |
367 | as in PR 78407 this leads to noticeable improvements. */ | |
cde30fe0 | 368 | if (m_speed_p && (optimize_edge_for_speed_p (taken_edge) || contains_hot_bb)) |
27bddc4a JH |
369 | { |
370 | if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS)) | |
371 | { | |
372 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
373 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
374 | "the number of instructions on the path " | |
375 | "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n"); | |
cde30fe0 | 376 | m_path.pop (); |
27bddc4a JH |
377 | return NULL; |
378 | } | |
379 | } | |
380 | else if (n_insns > 1) | |
bb5e62d6 JL |
381 | { |
382 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
383 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
27bddc4a JH |
384 | "duplication of %i insns is needed and optimizing for size.\n", |
385 | n_insns); | |
cde30fe0 | 386 | m_path.pop (); |
bb5e62d6 JL |
387 | return NULL; |
388 | } | |
389 | ||
390 | /* We avoid creating irreducible inner loops unless we thread through | |
391 | a multiway branch, in which case we have deemed it worth losing | |
392 | other loop optimizations later. | |
393 | ||
394 | We also consider it worth creating an irreducible inner loop if | |
395 | the number of copied statement is low relative to the length of | |
396 | the path -- in that case there's little the traditional loop | |
397 | optimizer would have done anyway, so an irreducible loop is not | |
398 | so bad. */ | |
0a4e5cf3 | 399 | if (!threaded_multiway_branch && *creates_irreducible_loop |
cff44c10 | 400 | && (n_insns * (unsigned) PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS) |
cde30fe0 | 401 | > (m_path.length () * |
cff44c10 | 402 | (unsigned) PARAM_VALUE (PARAM_FSM_SCALE_PATH_BLOCKS)))) |
bb5e62d6 JL |
403 | |
404 | { | |
405 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
406 | fprintf (dump_file, | |
407 | "FSM would create irreducible loop without threading " | |
408 | "multiway branch.\n"); | |
cde30fe0 | 409 | m_path.pop (); |
bb5e62d6 JL |
410 | return NULL; |
411 | } | |
412 | ||
413 | ||
414 | /* If this path does not thread through the loop latch, then we are | |
415 | using the FSM threader to find old style jump threads. This | |
416 | is good, except the FSM threader does not re-use an existing | |
417 | threading path to reduce code duplication. | |
418 | ||
419 | So for that case, drastically reduce the number of statements | |
420 | we are allowed to copy. */ | |
421 | if (!(threaded_through_latch && threaded_multiway_branch) | |
422 | && (n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS) | |
423 | >= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS))) | |
424 | { | |
425 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
426 | fprintf (dump_file, | |
427 | "FSM did not thread around loop and would copy too " | |
428 | "many statements.\n"); | |
cde30fe0 | 429 | m_path.pop (); |
bb5e62d6 JL |
430 | return NULL; |
431 | } | |
432 | ||
433 | /* When there is a multi-way branch on the path, then threading can | |
434 | explode the CFG due to duplicating the edges for that multi-way | |
435 | branch. So like above, only allow a multi-way branch on the path | |
436 | if we actually thread a multi-way branch. */ | |
437 | if (!threaded_multiway_branch && multiway_branch_in_path) | |
438 | { | |
439 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
440 | fprintf (dump_file, | |
441 | "FSM Thread through multiway branch without threading " | |
442 | "a multiway branch.\n"); | |
cde30fe0 | 443 | m_path.pop (); |
bb5e62d6 JL |
444 | return NULL; |
445 | } | |
446 | return taken_edge; | |
447 | } | |
448 | ||
cde30fe0 AH |
449 | /* The current path PATH is a vector of blocks forming a jump threading |
450 | path in reverse order. TAKEN_EDGE is the edge taken from path[0]. | |
081fdda6 | 451 | |
cde30fe0 AH |
452 | Convert the current path into the form used by register_jump_thread and |
453 | register it. */ | |
081fdda6 | 454 | |
cde30fe0 AH |
455 | void |
456 | thread_jumps::convert_and_register_current_path (edge taken_edge) | |
081fdda6 JL |
457 | { |
458 | vec<jump_thread_edge *> *jump_thread_path = new vec<jump_thread_edge *> (); | |
459 | ||
460 | /* Record the edges between the blocks in PATH. */ | |
cde30fe0 | 461 | for (unsigned int j = 0; j + 1 < m_path.length (); j++) |
081fdda6 | 462 | { |
cde30fe0 AH |
463 | basic_block bb1 = m_path[m_path.length () - j - 1]; |
464 | basic_block bb2 = m_path[m_path.length () - j - 2]; | |
8af01c66 | 465 | |
081fdda6 JL |
466 | edge e = find_edge (bb1, bb2); |
467 | gcc_assert (e); | |
468 | jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD); | |
469 | jump_thread_path->safe_push (x); | |
470 | } | |
471 | ||
472 | /* Add the edge taken when the control variable has value ARG. */ | |
473 | jump_thread_edge *x | |
474 | = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK); | |
475 | jump_thread_path->safe_push (x); | |
476 | ||
477 | register_jump_thread (jump_thread_path); | |
cde30fe0 | 478 | --m_max_threaded_paths; |
081fdda6 JL |
479 | } |
480 | ||
cff44c10 AH |
481 | /* While following a chain of SSA_NAME definitions, we jumped from a |
482 | definition in LAST_BB to a definition in NEW_BB (walking | |
483 | backwards). | |
435fd407 | 484 | |
cff44c10 AH |
485 | Verify there is a single path between the blocks and none of the |
486 | blocks in the path is already in VISITED_BBS. If so, then update | |
487 | VISISTED_BBS, add the new blocks to PATH and return TRUE. | |
488 | Otherwise return FALSE. | |
435fd407 JL |
489 | |
490 | Store the length of the subpath in NEXT_PATH_LENGTH. */ | |
491 | ||
cde30fe0 AH |
492 | bool |
493 | thread_jumps::check_subpath_and_update_thread_path (basic_block last_bb, | |
494 | basic_block new_bb, | |
495 | int *next_path_length) | |
435fd407 JL |
496 | { |
497 | edge e; | |
498 | int e_count = 0; | |
499 | edge_iterator ei; | |
cff44c10 | 500 | auto_vec<basic_block> next_path; |
435fd407 JL |
501 | |
502 | FOR_EACH_EDGE (e, ei, last_bb->preds) | |
503 | { | |
cde30fe0 | 504 | hash_set<basic_block> local_visited_bbs; |
435fd407 | 505 | |
cde30fe0 AH |
506 | if (fsm_find_thread_path (new_bb, e->src, next_path, |
507 | local_visited_bbs, e->src->loop_father)) | |
435fd407 JL |
508 | ++e_count; |
509 | ||
435fd407 JL |
510 | /* If there is more than one path, stop. */ |
511 | if (e_count > 1) | |
cff44c10 | 512 | return false; |
435fd407 JL |
513 | } |
514 | ||
515 | /* Stop if we have not found a path: this could occur when the recursion | |
516 | is stopped by one of the bounds. */ | |
517 | if (e_count == 0) | |
cff44c10 | 518 | return false; |
435fd407 JL |
519 | |
520 | /* Make sure we haven't already visited any of the nodes in | |
521 | NEXT_PATH. Don't add them here to avoid pollution. */ | |
cff44c10 | 522 | for (unsigned int i = 0; i + 1 < next_path.length (); i++) |
435fd407 | 523 | { |
cde30fe0 | 524 | if (m_visited_bbs.contains (next_path[i])) |
cff44c10 | 525 | return false; |
435fd407 JL |
526 | } |
527 | ||
528 | /* Now add the nodes to VISISTED_BBS. */ | |
cff44c10 | 529 | for (unsigned int i = 0; i + 1 < next_path.length (); i++) |
cde30fe0 | 530 | m_visited_bbs.add (next_path[i]); |
435fd407 JL |
531 | |
532 | /* Append all the nodes from NEXT_PATH to PATH. */ | |
cde30fe0 | 533 | m_path.safe_splice (next_path); |
cff44c10 | 534 | *next_path_length = next_path.length (); |
435fd407 JL |
535 | |
536 | return true; | |
537 | } | |
538 | ||
cff44c10 AH |
539 | /* If this is a profitable jump thread path, register it. |
540 | ||
541 | NAME is an SSA NAME with a possible constant value of ARG on PATH. | |
542 | ||
cde30fe0 | 543 | DEF_BB is the basic block that ultimately defines the constant. */ |
cff44c10 | 544 | |
cde30fe0 AH |
545 | void |
546 | thread_jumps::register_jump_thread_path_if_profitable (tree name, tree arg, | |
547 | basic_block def_bb) | |
cff44c10 AH |
548 | { |
549 | if (TREE_CODE_CLASS (TREE_CODE (arg)) != tcc_constant) | |
550 | return; | |
551 | ||
552 | bool irreducible = false; | |
cde30fe0 AH |
553 | edge taken_edge = profitable_jump_thread_path (def_bb, name, arg, |
554 | &irreducible); | |
cff44c10 AH |
555 | if (taken_edge) |
556 | { | |
cde30fe0 AH |
557 | convert_and_register_current_path (taken_edge); |
558 | m_path.pop (); | |
cff44c10 AH |
559 | |
560 | if (irreducible) | |
cde30fe0 | 561 | vect_free_loop_info_assumptions (m_path[0]->loop_father); |
cff44c10 AH |
562 | } |
563 | } | |
564 | ||
cff44c10 | 565 | /* Given PHI which defines NAME in block DEF_BB, recurse through the |
435fd407 JL |
566 | PHI's arguments searching for paths where NAME will ultimately have |
567 | a constant value. | |
568 | ||
435fd407 | 569 | PATH contains the series of blocks to traverse that will result in |
cde30fe0 | 570 | NAME having a constant value. */ |
435fd407 | 571 | |
cde30fe0 AH |
572 | void |
573 | thread_jumps::handle_phi (gphi *phi, tree name, basic_block def_bb) | |
435fd407 JL |
574 | { |
575 | /* Iterate over the arguments of PHI. */ | |
576 | for (unsigned int i = 0; i < gimple_phi_num_args (phi); i++) | |
577 | { | |
578 | tree arg = gimple_phi_arg_def (phi, i); | |
579 | basic_block bbi = gimple_phi_arg_edge (phi, i)->src; | |
580 | ||
581 | /* Skip edges pointing outside the current loop. */ | |
cff44c10 | 582 | if (!arg || def_bb->loop_father != bbi->loop_father) |
435fd407 JL |
583 | continue; |
584 | ||
585 | if (TREE_CODE (arg) == SSA_NAME) | |
586 | { | |
cde30fe0 | 587 | m_path.safe_push (bbi); |
435fd407 JL |
588 | /* Recursively follow SSA_NAMEs looking for a constant |
589 | definition. */ | |
cde30fe0 | 590 | fsm_find_control_statement_thread_paths (arg); |
435fd407 | 591 | |
cde30fe0 | 592 | m_path.pop (); |
435fd407 JL |
593 | continue; |
594 | } | |
595 | ||
cde30fe0 | 596 | register_jump_thread_path_if_profitable (name, arg, bbi); |
435fd407 JL |
597 | } |
598 | } | |
599 | ||
600 | /* Return TRUE if STMT is a gimple assignment we want to either directly | |
601 | handle or recurse through. Return FALSE otherwise. | |
602 | ||
603 | Note that adding more cases here requires adding cases to handle_assignment | |
604 | below. */ | |
605 | ||
606 | static bool | |
607 | handle_assignment_p (gimple *stmt) | |
608 | { | |
609 | if (is_gimple_assign (stmt)) | |
610 | { | |
611 | enum tree_code def_code = gimple_assign_rhs_code (stmt); | |
612 | ||
613 | /* If the RHS is an SSA_NAME, then we will recurse through it. | |
614 | Go ahead and filter out cases where the SSA_NAME is a default | |
615 | definition. There's little to be gained by trying to handle that. */ | |
616 | if (def_code == SSA_NAME | |
617 | && !SSA_NAME_IS_DEFAULT_DEF (gimple_assign_rhs1 (stmt))) | |
618 | return true; | |
619 | ||
620 | /* If the RHS is a constant, then it's a terminal that we'll want | |
621 | to handle as well. */ | |
622 | if (TREE_CODE_CLASS (def_code) == tcc_constant) | |
623 | return true; | |
624 | } | |
625 | ||
626 | /* Anything not explicitly allowed is not handled. */ | |
627 | return false; | |
628 | } | |
629 | ||
cff44c10 | 630 | /* Given STMT which defines NAME in block DEF_BB, recurse through the |
435fd407 JL |
631 | PHI's arguments searching for paths where NAME will ultimately have |
632 | a constant value. | |
633 | ||
435fd407 | 634 | PATH contains the series of blocks to traverse that will result in |
cde30fe0 | 635 | NAME having a constant value. */ |
435fd407 | 636 | |
cde30fe0 AH |
637 | void |
638 | thread_jumps::handle_assignment (gimple *stmt, tree name, basic_block def_bb) | |
435fd407 JL |
639 | { |
640 | tree arg = gimple_assign_rhs1 (stmt); | |
641 | ||
642 | if (TREE_CODE (arg) == SSA_NAME) | |
cde30fe0 | 643 | fsm_find_control_statement_thread_paths (arg); |
435fd407 JL |
644 | |
645 | else | |
646 | { | |
cff44c10 | 647 | /* register_jump_thread_path_if_profitable will push the current |
435fd407 JL |
648 | block onto the path. But the path will always have the current |
649 | block at this point. So we can just pop it. */ | |
cde30fe0 | 650 | m_path.pop (); |
435fd407 | 651 | |
cde30fe0 | 652 | register_jump_thread_path_if_profitable (name, arg, def_bb); |
435fd407 JL |
653 | |
654 | /* And put the current block back onto the path so that the | |
655 | state of the stack is unchanged when we leave. */ | |
cde30fe0 | 656 | m_path.safe_push (def_bb); |
435fd407 JL |
657 | } |
658 | } | |
659 | ||
cff44c10 AH |
660 | /* We trace the value of the SSA_NAME NAME back through any phi nodes |
661 | looking for places where it gets a constant value and save the | |
cde30fe0 | 662 | path. */ |
0732f75f | 663 | |
cde30fe0 AH |
664 | void |
665 | thread_jumps::fsm_find_control_statement_thread_paths (tree name) | |
0732f75f | 666 | { |
89d0fecd JL |
667 | /* If NAME appears in an abnormal PHI, then don't try to trace its |
668 | value back through PHI nodes. */ | |
669 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) | |
670 | return; | |
671 | ||
3d466672 | 672 | gimple *def_stmt = SSA_NAME_DEF_STMT (name); |
cff44c10 | 673 | basic_block def_bb = gimple_bb (def_stmt); |
0732f75f | 674 | |
cff44c10 | 675 | if (def_bb == NULL) |
0732f75f JL |
676 | return; |
677 | ||
081fdda6 JL |
678 | /* We allow the SSA chain to contains PHIs and simple copies and constant |
679 | initializations. */ | |
da9a8da8 | 680 | if (gimple_code (def_stmt) != GIMPLE_PHI |
081fdda6 JL |
681 | && gimple_code (def_stmt) != GIMPLE_ASSIGN) |
682 | return; | |
683 | ||
684 | if (gimple_code (def_stmt) == GIMPLE_PHI | |
685 | && (gimple_phi_num_args (def_stmt) | |
da9a8da8 | 686 | >= (unsigned) PARAM_VALUE (PARAM_FSM_MAXIMUM_PHI_ARGUMENTS))) |
0732f75f JL |
687 | return; |
688 | ||
435fd407 JL |
689 | if (is_gimple_assign (def_stmt) |
690 | && ! handle_assignment_p (def_stmt)) | |
081fdda6 JL |
691 | return; |
692 | ||
0732f75f | 693 | /* Avoid infinite recursion. */ |
cde30fe0 | 694 | if (m_visited_bbs.add (def_bb)) |
0732f75f JL |
695 | return; |
696 | ||
0732f75f | 697 | int next_path_length = 0; |
cde30fe0 | 698 | basic_block last_bb_in_path = m_path.last (); |
0732f75f | 699 | |
081fdda6 | 700 | if (loop_containing_stmt (def_stmt)->header == gimple_bb (def_stmt)) |
0732f75f JL |
701 | { |
702 | /* Do not walk through more than one loop PHI node. */ | |
cde30fe0 | 703 | if (m_seen_loop_phi) |
0732f75f | 704 | return; |
cde30fe0 | 705 | m_seen_loop_phi = true; |
0732f75f JL |
706 | } |
707 | ||
708 | /* Following the chain of SSA_NAME definitions, we jumped from a definition in | |
cff44c10 AH |
709 | LAST_BB_IN_PATH to a definition in DEF_BB. When these basic blocks are |
710 | different, append to PATH the blocks from LAST_BB_IN_PATH to DEF_BB. */ | |
711 | if (def_bb != last_bb_in_path) | |
0732f75f | 712 | { |
cff44c10 | 713 | /* When DEF_BB == LAST_BB_IN_PATH, then the first block in the path |
c67e5194 JL |
714 | will already be in VISITED_BBS. When they are not equal, then we |
715 | must ensure that first block is accounted for to ensure we do not | |
716 | create bogus jump threading paths. */ | |
cde30fe0 | 717 | m_visited_bbs.add (m_path[0]); |
cff44c10 | 718 | if (!check_subpath_and_update_thread_path (last_bb_in_path, def_bb, |
435fd407 JL |
719 | &next_path_length)) |
720 | return; | |
0732f75f JL |
721 | } |
722 | ||
cde30fe0 | 723 | gcc_assert (m_path.last () == def_bb); |
0732f75f | 724 | |
081fdda6 | 725 | if (gimple_code (def_stmt) == GIMPLE_PHI) |
cde30fe0 | 726 | handle_phi (as_a <gphi *> (def_stmt), name, def_bb); |
081fdda6 | 727 | else if (gimple_code (def_stmt) == GIMPLE_ASSIGN) |
cde30fe0 | 728 | handle_assignment (def_stmt, name, def_bb); |
0732f75f JL |
729 | |
730 | /* Remove all the nodes that we added from NEXT_PATH. */ | |
731 | if (next_path_length) | |
cde30fe0 | 732 | m_path.truncate (m_path.length () - next_path_length); |
0732f75f JL |
733 | } |
734 | ||
735 | /* Search backwards from BB looking for paths where NAME (an SSA_NAME) | |
736 | is a constant. Record such paths for jump threading. | |
737 | ||
738 | It is assumed that BB ends with a control statement and that by | |
b720e919 | 739 | finding a path where NAME is a constant, we can thread the path. |
ddc1759a | 740 | SPEED_P indicates that we could increase code size to improve the |
cff44c10 | 741 | code path. */ |
0732f75f | 742 | |
cde30fe0 AH |
743 | void |
744 | thread_jumps::find_jump_threads_backwards (basic_block bb, bool speed_p) | |
0732f75f | 745 | { |
8b2ef235 | 746 | gimple *stmt = get_gimple_control_stmt (bb); |
21f0717a JL |
747 | if (!stmt) |
748 | return; | |
749 | ||
750 | enum gimple_code code = gimple_code (stmt); | |
751 | tree name = NULL; | |
752 | if (code == GIMPLE_SWITCH) | |
753 | name = gimple_switch_index (as_a <gswitch *> (stmt)); | |
754 | else if (code == GIMPLE_GOTO) | |
755 | name = gimple_goto_dest (stmt); | |
756 | else if (code == GIMPLE_COND) | |
757 | { | |
758 | if (TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME | |
435fd407 | 759 | && TREE_CODE_CLASS (TREE_CODE (gimple_cond_rhs (stmt))) == tcc_constant |
21f0717a JL |
760 | && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt))) |
761 | || POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt))))) | |
762 | name = gimple_cond_lhs (stmt); | |
763 | } | |
764 | ||
765 | if (!name || TREE_CODE (name) != SSA_NAME) | |
766 | return; | |
767 | ||
cde30fe0 AH |
768 | /* Initialize pass local data that's different for each BB. */ |
769 | m_path.truncate (0); | |
770 | m_path.safe_push (bb); | |
771 | m_visited_bbs.empty (); | |
772 | m_seen_loop_phi = false; | |
773 | m_speed_p = speed_p; | |
774 | m_max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS); | |
0732f75f | 775 | |
cde30fe0 | 776 | fsm_find_control_statement_thread_paths (name); |
21f0717a | 777 | } |
8b2ef235 JL |
778 | |
779 | namespace { | |
780 | ||
781 | const pass_data pass_data_thread_jumps = | |
782 | { | |
783 | GIMPLE_PASS, | |
784 | "thread", | |
785 | OPTGROUP_NONE, | |
786 | TV_TREE_SSA_THREAD_JUMPS, | |
787 | ( PROP_cfg | PROP_ssa ), | |
788 | 0, | |
789 | 0, | |
790 | 0, | |
bb7ebad1 | 791 | TODO_update_ssa, |
8b2ef235 JL |
792 | }; |
793 | ||
794 | class pass_thread_jumps : public gimple_opt_pass | |
795 | { | |
796 | public: | |
797 | pass_thread_jumps (gcc::context *ctxt) | |
798 | : gimple_opt_pass (pass_data_thread_jumps, ctxt) | |
799 | {} | |
800 | ||
801 | opt_pass * clone (void) { return new pass_thread_jumps (m_ctxt); } | |
802 | virtual bool gate (function *); | |
803 | virtual unsigned int execute (function *); | |
804 | }; | |
805 | ||
806 | bool | |
807 | pass_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED) | |
808 | { | |
27bddc4a | 809 | return flag_expensive_optimizations; |
8b2ef235 JL |
810 | } |
811 | ||
812 | ||
813 | unsigned int | |
814 | pass_thread_jumps::execute (function *fun) | |
815 | { | |
bb7ebad1 RB |
816 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES); |
817 | ||
8b2ef235 | 818 | /* Try to thread each block with more than one successor. */ |
cde30fe0 | 819 | thread_jumps threader; |
8b2ef235 JL |
820 | basic_block bb; |
821 | FOR_EACH_BB_FN (bb, fun) | |
822 | { | |
823 | if (EDGE_COUNT (bb->succs) > 1) | |
cde30fe0 | 824 | threader.find_jump_threads_backwards (bb, true); |
8b2ef235 | 825 | } |
bb7ebad1 RB |
826 | bool changed = thread_through_all_blocks (true); |
827 | ||
828 | loop_optimizer_finalize (); | |
829 | return changed ? TODO_cleanup_cfg : 0; | |
8b2ef235 JL |
830 | } |
831 | ||
832 | } | |
833 | ||
834 | gimple_opt_pass * | |
835 | make_pass_thread_jumps (gcc::context *ctxt) | |
836 | { | |
837 | return new pass_thread_jumps (ctxt); | |
838 | } | |
b720e919 JH |
839 | |
840 | namespace { | |
841 | ||
842 | const pass_data pass_data_early_thread_jumps = | |
843 | { | |
844 | GIMPLE_PASS, | |
845 | "ethread", | |
846 | OPTGROUP_NONE, | |
847 | TV_TREE_SSA_THREAD_JUMPS, | |
848 | ( PROP_cfg | PROP_ssa ), | |
849 | 0, | |
850 | 0, | |
851 | 0, | |
852 | ( TODO_cleanup_cfg | TODO_update_ssa ), | |
853 | }; | |
854 | ||
855 | class pass_early_thread_jumps : public gimple_opt_pass | |
856 | { | |
857 | public: | |
858 | pass_early_thread_jumps (gcc::context *ctxt) | |
859 | : gimple_opt_pass (pass_data_early_thread_jumps, ctxt) | |
860 | {} | |
861 | ||
862 | opt_pass * clone (void) { return new pass_early_thread_jumps (m_ctxt); } | |
863 | virtual bool gate (function *); | |
864 | virtual unsigned int execute (function *); | |
865 | }; | |
866 | ||
867 | bool | |
868 | pass_early_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED) | |
869 | { | |
870 | return true; | |
871 | } | |
872 | ||
873 | ||
874 | unsigned int | |
875 | pass_early_thread_jumps::execute (function *fun) | |
876 | { | |
108fdd6d RB |
877 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); |
878 | ||
b720e919 | 879 | /* Try to thread each block with more than one successor. */ |
cde30fe0 | 880 | thread_jumps threader; |
b720e919 JH |
881 | basic_block bb; |
882 | FOR_EACH_BB_FN (bb, fun) | |
883 | { | |
884 | if (EDGE_COUNT (bb->succs) > 1) | |
cde30fe0 | 885 | threader.find_jump_threads_backwards (bb, false); |
b720e919 JH |
886 | } |
887 | thread_through_all_blocks (true); | |
108fdd6d RB |
888 | |
889 | loop_optimizer_finalize (); | |
b720e919 JH |
890 | return 0; |
891 | } | |
892 | ||
893 | } | |
894 | ||
895 | gimple_opt_pass * | |
896 | make_pass_early_thread_jumps (gcc::context *ctxt) | |
897 | { | |
898 | return new pass_early_thread_jumps (ctxt); | |
899 | } |