]>
Commit | Line | Data |
---|---|---|
0732f75f | 1 | /* SSA Jump Threading |
8d9254fc | 2 | Copyright (C) 2005-2020 Free Software Foundation, Inc. |
0732f75f JL |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "backend.h" | |
24 | #include "predict.h" | |
25 | #include "tree.h" | |
26 | #include "gimple.h" | |
27 | #include "fold-const.h" | |
28 | #include "cfgloop.h" | |
29 | #include "gimple-iterator.h" | |
30 | #include "tree-cfg.h" | |
31 | #include "tree-ssa-threadupdate.h" | |
0732f75f JL |
32 | #include "tree-ssa-loop.h" |
33 | #include "cfganal.h" | |
34 | #include "tree-pass.h" | |
8b2ef235 JL |
35 | #include "gimple-ssa.h" |
36 | #include "tree-phinodes.h" | |
27bddc4a | 37 | #include "tree-inline.h" |
0a4e5cf3 | 38 | #include "tree-vectorizer.h" |
0732f75f | 39 | |
cde30fe0 AH |
40 | class thread_jumps |
41 | { | |
42 | public: | |
43 | void find_jump_threads_backwards (basic_block bb, bool speed_p); | |
44 | private: | |
45 | edge profitable_jump_thread_path (basic_block bbi, tree name, tree arg, | |
46 | bool *creates_irreducible_loop); | |
47 | void convert_and_register_current_path (edge taken_edge); | |
48 | void register_jump_thread_path_if_profitable (tree name, tree arg, | |
49 | basic_block def_bb); | |
50 | void handle_assignment (gimple *stmt, tree name, basic_block def_bb); | |
51 | void handle_phi (gphi *phi, tree name, basic_block def_bb); | |
52 | void fsm_find_control_statement_thread_paths (tree name); | |
53 | bool check_subpath_and_update_thread_path (basic_block last_bb, | |
54 | basic_block new_bb, | |
55 | int *next_path_length); | |
56 | ||
57 | /* Maximum number of BBs we are allowed to thread. */ | |
58 | int m_max_threaded_paths; | |
59 | /* Hash to keep track of seen bbs. */ | |
60 | hash_set<basic_block> m_visited_bbs; | |
61 | /* Current path we're analyzing. */ | |
62 | auto_vec<basic_block> m_path; | |
63 | /* Tracks if we have recursed through a loop PHI node. */ | |
64 | bool m_seen_loop_phi; | |
65 | /* Indicate that we could increase code size to improve the | |
66 | code path. */ | |
67 | bool m_speed_p; | |
68 | }; | |
0732f75f | 69 | |
3d466672 | 70 | /* Simple helper to get the last statement from BB, which is assumed |
21f0717a JL |
71 | to be a control statement. Return NULL if the last statement is |
72 | not a control statement. */ | |
73 | ||
3d466672 JL |
74 | static gimple * |
75 | get_gimple_control_stmt (basic_block bb) | |
76 | { | |
21f0717a | 77 | gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); |
3d466672 JL |
78 | |
79 | if (gsi_end_p (gsi)) | |
80 | return NULL; | |
81 | ||
82 | gimple *stmt = gsi_stmt (gsi); | |
83 | enum gimple_code code = gimple_code (stmt); | |
21f0717a JL |
84 | if (code == GIMPLE_COND || code == GIMPLE_SWITCH || code == GIMPLE_GOTO) |
85 | return stmt; | |
86 | return NULL; | |
3d466672 JL |
87 | } |
88 | ||
cff44c10 AH |
89 | /* Return true if the CFG contains at least one path from START_BB to |
90 | END_BB. When a path is found, record in PATH the blocks from | |
cde30fe0 AH |
91 | END_BB to START_BB. LOCAL_VISITED_BBS is used to make sure we |
92 | don't fall into an infinite loop. Bound the recursion to basic | |
93 | blocks belonging to LOOP. */ | |
0732f75f JL |
94 | |
95 | static bool | |
96 | fsm_find_thread_path (basic_block start_bb, basic_block end_bb, | |
cff44c10 | 97 | vec<basic_block> &path, |
cde30fe0 AH |
98 | hash_set<basic_block> &local_visited_bbs, |
99 | loop_p loop) | |
0732f75f JL |
100 | { |
101 | if (loop != start_bb->loop_father) | |
102 | return false; | |
103 | ||
104 | if (start_bb == end_bb) | |
105 | { | |
cff44c10 | 106 | path.safe_push (start_bb); |
0732f75f JL |
107 | return true; |
108 | } | |
109 | ||
cde30fe0 | 110 | if (!local_visited_bbs.add (start_bb)) |
0732f75f JL |
111 | { |
112 | edge e; | |
113 | edge_iterator ei; | |
114 | FOR_EACH_EDGE (e, ei, start_bb->succs) | |
cde30fe0 AH |
115 | if (fsm_find_thread_path (e->dest, end_bb, path, local_visited_bbs, |
116 | loop)) | |
0732f75f | 117 | { |
cff44c10 | 118 | path.safe_push (start_bb); |
0732f75f JL |
119 | return true; |
120 | } | |
121 | } | |
122 | ||
123 | return false; | |
124 | } | |
125 | ||
bb5e62d6 JL |
126 | /* Examine jump threading path PATH to which we want to add BBI. |
127 | ||
128 | If the resulting path is profitable to thread, then return the | |
129 | final taken edge from the path, NULL otherwise. | |
130 | ||
131 | NAME is the SSA_NAME of the variable we found to have a constant | |
cff44c10 | 132 | value on PATH. ARG is the constant value of NAME on that path. |
bb5e62d6 | 133 | |
cde30fe0 AH |
134 | BBI will be appended to PATH when we have a profitable jump |
135 | threading path. Callers are responsible for removing BBI from PATH | |
136 | in that case. */ | |
bb5e62d6 | 137 | |
cde30fe0 AH |
138 | edge |
139 | thread_jumps::profitable_jump_thread_path (basic_block bbi, tree name, | |
140 | tree arg, | |
141 | bool *creates_irreducible_loop) | |
bb5e62d6 JL |
142 | { |
143 | /* Note BBI is not in the path yet, hence the +1 in the test below | |
144 | to make sure BBI is accounted for in the path length test. */ | |
ad071b2b JL |
145 | |
146 | /* We can get a length of 0 here when the statement that | |
147 | makes a conditional generate a compile-time constant | |
148 | result is in the same block as the conditional. | |
149 | ||
150 | That's not really a jump threading opportunity, but instead is | |
151 | simple cprop & simplification. We could handle it here if we | |
152 | wanted by wiring up all the incoming edges. If we run this | |
153 | early in IPA, that might be worth doing. For now we just | |
154 | reject that case. */ | |
cde30fe0 | 155 | if (m_path.is_empty ()) |
ad071b2b JL |
156 | return NULL; |
157 | ||
cde30fe0 | 158 | if (m_path.length () + 1 |
028d4092 | 159 | > (unsigned) param_max_fsm_thread_length) |
bb5e62d6 JL |
160 | { |
161 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
162 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
163 | "the number of basic blocks on the path " | |
164 | "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n"); | |
165 | return NULL; | |
166 | } | |
167 | ||
cde30fe0 | 168 | if (m_max_threaded_paths <= 0) |
bb5e62d6 JL |
169 | { |
170 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
171 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
172 | "the number of previously recorded FSM paths to " | |
173 | "thread exceeds PARAM_MAX_FSM_THREAD_PATHS.\n"); | |
174 | return NULL; | |
175 | } | |
176 | ||
177 | /* Add BBI to the path. | |
178 | From this point onward, if we decide we the path is not profitable | |
179 | to thread, we must remove BBI from the path. */ | |
cde30fe0 | 180 | m_path.safe_push (bbi); |
bb5e62d6 JL |
181 | |
182 | int n_insns = 0; | |
183 | gimple_stmt_iterator gsi; | |
cde30fe0 | 184 | loop_p loop = m_path[0]->loop_father; |
bb5e62d6 JL |
185 | bool path_crosses_loops = false; |
186 | bool threaded_through_latch = false; | |
187 | bool multiway_branch_in_path = false; | |
188 | bool threaded_multiway_branch = false; | |
0f0c2cc3 JH |
189 | bool contains_hot_bb = false; |
190 | ||
191 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
192 | fprintf (dump_file, "Checking profitability of path (backwards): "); | |
bb5e62d6 JL |
193 | |
194 | /* Count the number of instructions on the path: as these instructions | |
195 | will have to be duplicated, we will not record the path if there | |
196 | are too many instructions on the path. Also check that all the | |
197 | blocks in the path belong to a single loop. */ | |
cde30fe0 | 198 | for (unsigned j = 0; j < m_path.length (); j++) |
bb5e62d6 | 199 | { |
cde30fe0 | 200 | basic_block bb = m_path[j]; |
bb5e62d6 | 201 | |
0f0c2cc3 JH |
202 | if (dump_file && (dump_flags & TDF_DETAILS)) |
203 | fprintf (dump_file, " bb:%i", bb->index); | |
bb5e62d6 JL |
204 | /* Remember, blocks in the path are stored in opposite order |
205 | in the PATH array. The last entry in the array represents | |
206 | the block with an outgoing edge that we will redirect to the | |
207 | jump threading path. Thus we don't care about that block's | |
208 | loop father, nor how many statements are in that block because | |
209 | it will not be copied or whether or not it ends in a multiway | |
210 | branch. */ | |
cde30fe0 | 211 | if (j < m_path.length () - 1) |
bb5e62d6 | 212 | { |
0f0c2cc3 | 213 | int orig_n_insns = n_insns; |
bb5e62d6 JL |
214 | if (bb->loop_father != loop) |
215 | { | |
216 | path_crosses_loops = true; | |
217 | break; | |
218 | } | |
219 | ||
220 | /* PHIs in the path will create degenerate PHIS in the | |
221 | copied path which will then get propagated away, so | |
222 | looking at just the duplicate path the PHIs would | |
223 | seem unimportant. | |
224 | ||
225 | But those PHIs, because they're assignments to objects | |
226 | typically with lives that exist outside the thread path, | |
227 | will tend to generate PHIs (or at least new PHI arguments) | |
228 | at points where we leave the thread path and rejoin | |
229 | the original blocks. So we do want to account for them. | |
230 | ||
231 | We ignore virtual PHIs. We also ignore cases where BB | |
232 | has a single incoming edge. That's the most common | |
233 | degenerate PHI we'll see here. Finally we ignore PHIs | |
234 | that are associated with the value we're tracking as | |
235 | that object likely dies. */ | |
236 | if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1) | |
237 | { | |
238 | for (gphi_iterator gsip = gsi_start_phis (bb); | |
239 | !gsi_end_p (gsip); | |
240 | gsi_next (&gsip)) | |
241 | { | |
242 | gphi *phi = gsip.phi (); | |
243 | tree dst = gimple_phi_result (phi); | |
244 | ||
245 | /* Note that if both NAME and DST are anonymous | |
246 | SSA_NAMEs, then we do not have enough information | |
247 | to consider them associated. */ | |
fda5c810 RB |
248 | if (dst != name |
249 | && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name) | |
250 | || !SSA_NAME_VAR (dst)) | |
bb5e62d6 JL |
251 | && !virtual_operand_p (dst)) |
252 | ++n_insns; | |
253 | } | |
254 | } | |
255 | ||
cde30fe0 | 256 | if (!contains_hot_bb && m_speed_p) |
0f0c2cc3 | 257 | contains_hot_bb |= optimize_bb_for_speed_p (bb); |
bb5e62d6 JL |
258 | for (gsi = gsi_after_labels (bb); |
259 | !gsi_end_p (gsi); | |
260 | gsi_next_nondebug (&gsi)) | |
261 | { | |
262 | gimple *stmt = gsi_stmt (gsi); | |
120a01d1 TV |
263 | if (gimple_call_internal_p (stmt, IFN_UNIQUE)) |
264 | { | |
265 | m_path.pop (); | |
266 | return NULL; | |
267 | } | |
bb5e62d6 JL |
268 | /* Do not count empty statements and labels. */ |
269 | if (gimple_code (stmt) != GIMPLE_NOP | |
270 | && !(gimple_code (stmt) == GIMPLE_ASSIGN | |
271 | && gimple_assign_rhs_code (stmt) == ASSERT_EXPR) | |
272 | && !is_gimple_debug (stmt)) | |
0f0c2cc3 | 273 | n_insns += estimate_num_insns (stmt, &eni_size_weights); |
bb5e62d6 | 274 | } |
0f0c2cc3 JH |
275 | if (dump_file && (dump_flags & TDF_DETAILS)) |
276 | fprintf (dump_file, " (%i insns)", n_insns-orig_n_insns); | |
bb5e62d6 JL |
277 | |
278 | /* We do not look at the block with the threaded branch | |
279 | in this loop. So if any block with a last statement that | |
280 | is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a | |
281 | multiway branch on our path. | |
282 | ||
283 | The block in PATH[0] is special, it's the block were we're | |
284 | going to be able to eliminate its branch. */ | |
285 | gimple *last = last_stmt (bb); | |
286 | if (last && (gimple_code (last) == GIMPLE_SWITCH | |
287 | || gimple_code (last) == GIMPLE_GOTO)) | |
288 | { | |
289 | if (j == 0) | |
290 | threaded_multiway_branch = true; | |
291 | else | |
292 | multiway_branch_in_path = true; | |
293 | } | |
294 | } | |
295 | ||
296 | /* Note if we thread through the latch, we will want to include | |
297 | the last entry in the array when determining if we thread | |
298 | through the loop latch. */ | |
299 | if (loop->latch == bb) | |
300 | threaded_through_latch = true; | |
301 | } | |
302 | ||
cde30fe0 | 303 | gimple *stmt = get_gimple_control_stmt (m_path[0]); |
27bddc4a JH |
304 | gcc_assert (stmt); |
305 | ||
bb5e62d6 JL |
306 | /* We are going to remove the control statement at the end of the |
307 | last block in the threading path. So don't count it against our | |
308 | statement count. */ | |
bb5e62d6 | 309 | |
0f0c2cc3 JH |
310 | int stmt_insns = estimate_num_insns (stmt, &eni_size_weights); |
311 | n_insns-= stmt_insns; | |
312 | ||
313 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
314 | fprintf (dump_file, "\n Control statement insns: %i\n" | |
315 | " Overall: %i insns\n", | |
316 | stmt_insns, n_insns); | |
27bddc4a | 317 | |
bb5e62d6 JL |
318 | /* We have found a constant value for ARG. For GIMPLE_SWITCH |
319 | and GIMPLE_GOTO, we use it as-is. However, for a GIMPLE_COND | |
320 | we need to substitute, fold and simplify so we can determine | |
321 | the edge taken out of the last block. */ | |
322 | if (gimple_code (stmt) == GIMPLE_COND) | |
323 | { | |
324 | enum tree_code cond_code = gimple_cond_code (stmt); | |
325 | ||
326 | /* We know the underyling format of the condition. */ | |
327 | arg = fold_binary (cond_code, boolean_type_node, | |
328 | arg, gimple_cond_rhs (stmt)); | |
329 | } | |
330 | ||
331 | /* If this path threaded through the loop latch back into the | |
332 | same loop and the destination does not dominate the loop | |
333 | latch, then this thread would create an irreducible loop. | |
334 | ||
335 | We have to know the outgoing edge to figure this out. */ | |
cde30fe0 | 336 | edge taken_edge = find_taken_edge (m_path[0], arg); |
bb5e62d6 JL |
337 | |
338 | /* There are cases where we may not be able to extract the | |
339 | taken edge. For example, a computed goto to an absolute | |
340 | address. Handle those cases gracefully. */ | |
341 | if (taken_edge == NULL) | |
342 | { | |
cde30fe0 | 343 | m_path.pop (); |
bb5e62d6 JL |
344 | return NULL; |
345 | } | |
346 | ||
0a4e5cf3 | 347 | *creates_irreducible_loop = false; |
bb5e62d6 JL |
348 | if (threaded_through_latch |
349 | && loop == taken_edge->dest->loop_father | |
350 | && (determine_bb_domination_status (loop, taken_edge->dest) | |
351 | == DOMST_NONDOMINATING)) | |
0a4e5cf3 | 352 | *creates_irreducible_loop = true; |
bb5e62d6 JL |
353 | |
354 | if (path_crosses_loops) | |
355 | { | |
356 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
357 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
358 | "the path crosses loops.\n"); | |
cde30fe0 | 359 | m_path.pop (); |
bb5e62d6 JL |
360 | return NULL; |
361 | } | |
362 | ||
0f0c2cc3 JH |
363 | /* Threading is profitable if the path duplicated is hot but also |
364 | in a case we separate cold path from hot path and permit optimization | |
365 | of the hot path later. Be on the agressive side here. In some testcases, | |
366 | as in PR 78407 this leads to noticeable improvements. */ | |
cde30fe0 | 367 | if (m_speed_p && (optimize_edge_for_speed_p (taken_edge) || contains_hot_bb)) |
27bddc4a | 368 | { |
028d4092 | 369 | if (n_insns >= param_max_fsm_thread_path_insns) |
27bddc4a JH |
370 | { |
371 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
372 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
373 | "the number of instructions on the path " | |
374 | "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n"); | |
cde30fe0 | 375 | m_path.pop (); |
27bddc4a JH |
376 | return NULL; |
377 | } | |
378 | } | |
379 | else if (n_insns > 1) | |
bb5e62d6 JL |
380 | { |
381 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
382 | fprintf (dump_file, "FSM jump-thread path not considered: " | |
27bddc4a JH |
383 | "duplication of %i insns is needed and optimizing for size.\n", |
384 | n_insns); | |
cde30fe0 | 385 | m_path.pop (); |
bb5e62d6 JL |
386 | return NULL; |
387 | } | |
388 | ||
389 | /* We avoid creating irreducible inner loops unless we thread through | |
390 | a multiway branch, in which case we have deemed it worth losing | |
391 | other loop optimizations later. | |
392 | ||
393 | We also consider it worth creating an irreducible inner loop if | |
394 | the number of copied statement is low relative to the length of | |
395 | the path -- in that case there's little the traditional loop | |
396 | optimizer would have done anyway, so an irreducible loop is not | |
397 | so bad. */ | |
0a4e5cf3 | 398 | if (!threaded_multiway_branch && *creates_irreducible_loop |
028d4092 | 399 | && (n_insns * (unsigned) param_fsm_scale_path_stmts |
cde30fe0 | 400 | > (m_path.length () * |
028d4092 | 401 | (unsigned) param_fsm_scale_path_blocks))) |
bb5e62d6 JL |
402 | |
403 | { | |
404 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
405 | fprintf (dump_file, | |
406 | "FSM would create irreducible loop without threading " | |
407 | "multiway branch.\n"); | |
cde30fe0 | 408 | m_path.pop (); |
bb5e62d6 JL |
409 | return NULL; |
410 | } | |
411 | ||
412 | ||
413 | /* If this path does not thread through the loop latch, then we are | |
414 | using the FSM threader to find old style jump threads. This | |
415 | is good, except the FSM threader does not re-use an existing | |
416 | threading path to reduce code duplication. | |
417 | ||
418 | So for that case, drastically reduce the number of statements | |
419 | we are allowed to copy. */ | |
420 | if (!(threaded_through_latch && threaded_multiway_branch) | |
028d4092 ML |
421 | && (n_insns * param_fsm_scale_path_stmts |
422 | >= param_max_jump_thread_duplication_stmts)) | |
bb5e62d6 JL |
423 | { |
424 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
425 | fprintf (dump_file, | |
426 | "FSM did not thread around loop and would copy too " | |
427 | "many statements.\n"); | |
cde30fe0 | 428 | m_path.pop (); |
bb5e62d6 JL |
429 | return NULL; |
430 | } | |
431 | ||
432 | /* When there is a multi-way branch on the path, then threading can | |
433 | explode the CFG due to duplicating the edges for that multi-way | |
434 | branch. So like above, only allow a multi-way branch on the path | |
435 | if we actually thread a multi-way branch. */ | |
436 | if (!threaded_multiway_branch && multiway_branch_in_path) | |
437 | { | |
438 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
439 | fprintf (dump_file, | |
440 | "FSM Thread through multiway branch without threading " | |
441 | "a multiway branch.\n"); | |
cde30fe0 | 442 | m_path.pop (); |
bb5e62d6 JL |
443 | return NULL; |
444 | } | |
445 | return taken_edge; | |
446 | } | |
447 | ||
cde30fe0 AH |
448 | /* The current path PATH is a vector of blocks forming a jump threading |
449 | path in reverse order. TAKEN_EDGE is the edge taken from path[0]. | |
081fdda6 | 450 | |
cde30fe0 AH |
451 | Convert the current path into the form used by register_jump_thread and |
452 | register it. */ | |
081fdda6 | 453 | |
cde30fe0 AH |
454 | void |
455 | thread_jumps::convert_and_register_current_path (edge taken_edge) | |
081fdda6 JL |
456 | { |
457 | vec<jump_thread_edge *> *jump_thread_path = new vec<jump_thread_edge *> (); | |
458 | ||
459 | /* Record the edges between the blocks in PATH. */ | |
cde30fe0 | 460 | for (unsigned int j = 0; j + 1 < m_path.length (); j++) |
081fdda6 | 461 | { |
cde30fe0 AH |
462 | basic_block bb1 = m_path[m_path.length () - j - 1]; |
463 | basic_block bb2 = m_path[m_path.length () - j - 2]; | |
8af01c66 | 464 | |
081fdda6 JL |
465 | edge e = find_edge (bb1, bb2); |
466 | gcc_assert (e); | |
467 | jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD); | |
468 | jump_thread_path->safe_push (x); | |
469 | } | |
470 | ||
471 | /* Add the edge taken when the control variable has value ARG. */ | |
472 | jump_thread_edge *x | |
473 | = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK); | |
474 | jump_thread_path->safe_push (x); | |
475 | ||
476 | register_jump_thread (jump_thread_path); | |
cde30fe0 | 477 | --m_max_threaded_paths; |
081fdda6 JL |
478 | } |
479 | ||
cff44c10 AH |
480 | /* While following a chain of SSA_NAME definitions, we jumped from a |
481 | definition in LAST_BB to a definition in NEW_BB (walking | |
482 | backwards). | |
435fd407 | 483 | |
cff44c10 AH |
484 | Verify there is a single path between the blocks and none of the |
485 | blocks in the path is already in VISITED_BBS. If so, then update | |
486 | VISISTED_BBS, add the new blocks to PATH and return TRUE. | |
487 | Otherwise return FALSE. | |
435fd407 JL |
488 | |
489 | Store the length of the subpath in NEXT_PATH_LENGTH. */ | |
490 | ||
cde30fe0 AH |
491 | bool |
492 | thread_jumps::check_subpath_and_update_thread_path (basic_block last_bb, | |
493 | basic_block new_bb, | |
494 | int *next_path_length) | |
435fd407 JL |
495 | { |
496 | edge e; | |
497 | int e_count = 0; | |
498 | edge_iterator ei; | |
cff44c10 | 499 | auto_vec<basic_block> next_path; |
435fd407 JL |
500 | |
501 | FOR_EACH_EDGE (e, ei, last_bb->preds) | |
502 | { | |
cde30fe0 | 503 | hash_set<basic_block> local_visited_bbs; |
435fd407 | 504 | |
cde30fe0 AH |
505 | if (fsm_find_thread_path (new_bb, e->src, next_path, |
506 | local_visited_bbs, e->src->loop_father)) | |
435fd407 JL |
507 | ++e_count; |
508 | ||
435fd407 JL |
509 | /* If there is more than one path, stop. */ |
510 | if (e_count > 1) | |
cff44c10 | 511 | return false; |
435fd407 JL |
512 | } |
513 | ||
514 | /* Stop if we have not found a path: this could occur when the recursion | |
515 | is stopped by one of the bounds. */ | |
516 | if (e_count == 0) | |
cff44c10 | 517 | return false; |
435fd407 JL |
518 | |
519 | /* Make sure we haven't already visited any of the nodes in | |
520 | NEXT_PATH. Don't add them here to avoid pollution. */ | |
cff44c10 | 521 | for (unsigned int i = 0; i + 1 < next_path.length (); i++) |
435fd407 | 522 | { |
cde30fe0 | 523 | if (m_visited_bbs.contains (next_path[i])) |
cff44c10 | 524 | return false; |
435fd407 JL |
525 | } |
526 | ||
527 | /* Now add the nodes to VISISTED_BBS. */ | |
cff44c10 | 528 | for (unsigned int i = 0; i + 1 < next_path.length (); i++) |
cde30fe0 | 529 | m_visited_bbs.add (next_path[i]); |
435fd407 JL |
530 | |
531 | /* Append all the nodes from NEXT_PATH to PATH. */ | |
cde30fe0 | 532 | m_path.safe_splice (next_path); |
cff44c10 | 533 | *next_path_length = next_path.length (); |
435fd407 JL |
534 | |
535 | return true; | |
536 | } | |
537 | ||
cff44c10 AH |
538 | /* If this is a profitable jump thread path, register it. |
539 | ||
540 | NAME is an SSA NAME with a possible constant value of ARG on PATH. | |
541 | ||
cde30fe0 | 542 | DEF_BB is the basic block that ultimately defines the constant. */ |
cff44c10 | 543 | |
cde30fe0 AH |
544 | void |
545 | thread_jumps::register_jump_thread_path_if_profitable (tree name, tree arg, | |
546 | basic_block def_bb) | |
cff44c10 AH |
547 | { |
548 | if (TREE_CODE_CLASS (TREE_CODE (arg)) != tcc_constant) | |
549 | return; | |
550 | ||
551 | bool irreducible = false; | |
cde30fe0 AH |
552 | edge taken_edge = profitable_jump_thread_path (def_bb, name, arg, |
553 | &irreducible); | |
cff44c10 AH |
554 | if (taken_edge) |
555 | { | |
cde30fe0 AH |
556 | convert_and_register_current_path (taken_edge); |
557 | m_path.pop (); | |
cff44c10 AH |
558 | |
559 | if (irreducible) | |
cde30fe0 | 560 | vect_free_loop_info_assumptions (m_path[0]->loop_father); |
cff44c10 AH |
561 | } |
562 | } | |
563 | ||
cff44c10 | 564 | /* Given PHI which defines NAME in block DEF_BB, recurse through the |
435fd407 JL |
565 | PHI's arguments searching for paths where NAME will ultimately have |
566 | a constant value. | |
567 | ||
435fd407 | 568 | PATH contains the series of blocks to traverse that will result in |
cde30fe0 | 569 | NAME having a constant value. */ |
435fd407 | 570 | |
cde30fe0 AH |
571 | void |
572 | thread_jumps::handle_phi (gphi *phi, tree name, basic_block def_bb) | |
435fd407 JL |
573 | { |
574 | /* Iterate over the arguments of PHI. */ | |
575 | for (unsigned int i = 0; i < gimple_phi_num_args (phi); i++) | |
576 | { | |
577 | tree arg = gimple_phi_arg_def (phi, i); | |
578 | basic_block bbi = gimple_phi_arg_edge (phi, i)->src; | |
579 | ||
580 | /* Skip edges pointing outside the current loop. */ | |
cff44c10 | 581 | if (!arg || def_bb->loop_father != bbi->loop_father) |
435fd407 JL |
582 | continue; |
583 | ||
584 | if (TREE_CODE (arg) == SSA_NAME) | |
585 | { | |
cde30fe0 | 586 | m_path.safe_push (bbi); |
435fd407 JL |
587 | /* Recursively follow SSA_NAMEs looking for a constant |
588 | definition. */ | |
cde30fe0 | 589 | fsm_find_control_statement_thread_paths (arg); |
435fd407 | 590 | |
cde30fe0 | 591 | m_path.pop (); |
435fd407 JL |
592 | continue; |
593 | } | |
594 | ||
cde30fe0 | 595 | register_jump_thread_path_if_profitable (name, arg, bbi); |
435fd407 JL |
596 | } |
597 | } | |
598 | ||
599 | /* Return TRUE if STMT is a gimple assignment we want to either directly | |
600 | handle or recurse through. Return FALSE otherwise. | |
601 | ||
602 | Note that adding more cases here requires adding cases to handle_assignment | |
603 | below. */ | |
604 | ||
605 | static bool | |
606 | handle_assignment_p (gimple *stmt) | |
607 | { | |
608 | if (is_gimple_assign (stmt)) | |
609 | { | |
610 | enum tree_code def_code = gimple_assign_rhs_code (stmt); | |
611 | ||
612 | /* If the RHS is an SSA_NAME, then we will recurse through it. | |
613 | Go ahead and filter out cases where the SSA_NAME is a default | |
614 | definition. There's little to be gained by trying to handle that. */ | |
615 | if (def_code == SSA_NAME | |
616 | && !SSA_NAME_IS_DEFAULT_DEF (gimple_assign_rhs1 (stmt))) | |
617 | return true; | |
618 | ||
619 | /* If the RHS is a constant, then it's a terminal that we'll want | |
620 | to handle as well. */ | |
621 | if (TREE_CODE_CLASS (def_code) == tcc_constant) | |
622 | return true; | |
623 | } | |
624 | ||
625 | /* Anything not explicitly allowed is not handled. */ | |
626 | return false; | |
627 | } | |
628 | ||
cff44c10 | 629 | /* Given STMT which defines NAME in block DEF_BB, recurse through the |
435fd407 JL |
630 | PHI's arguments searching for paths where NAME will ultimately have |
631 | a constant value. | |
632 | ||
435fd407 | 633 | PATH contains the series of blocks to traverse that will result in |
cde30fe0 | 634 | NAME having a constant value. */ |
435fd407 | 635 | |
cde30fe0 AH |
636 | void |
637 | thread_jumps::handle_assignment (gimple *stmt, tree name, basic_block def_bb) | |
435fd407 JL |
638 | { |
639 | tree arg = gimple_assign_rhs1 (stmt); | |
640 | ||
641 | if (TREE_CODE (arg) == SSA_NAME) | |
cde30fe0 | 642 | fsm_find_control_statement_thread_paths (arg); |
435fd407 JL |
643 | |
644 | else | |
645 | { | |
cff44c10 | 646 | /* register_jump_thread_path_if_profitable will push the current |
435fd407 JL |
647 | block onto the path. But the path will always have the current |
648 | block at this point. So we can just pop it. */ | |
cde30fe0 | 649 | m_path.pop (); |
435fd407 | 650 | |
cde30fe0 | 651 | register_jump_thread_path_if_profitable (name, arg, def_bb); |
435fd407 JL |
652 | |
653 | /* And put the current block back onto the path so that the | |
654 | state of the stack is unchanged when we leave. */ | |
cde30fe0 | 655 | m_path.safe_push (def_bb); |
435fd407 JL |
656 | } |
657 | } | |
658 | ||
cff44c10 AH |
659 | /* We trace the value of the SSA_NAME NAME back through any phi nodes |
660 | looking for places where it gets a constant value and save the | |
cde30fe0 | 661 | path. */ |
0732f75f | 662 | |
cde30fe0 AH |
663 | void |
664 | thread_jumps::fsm_find_control_statement_thread_paths (tree name) | |
0732f75f | 665 | { |
89d0fecd JL |
666 | /* If NAME appears in an abnormal PHI, then don't try to trace its |
667 | value back through PHI nodes. */ | |
668 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) | |
669 | return; | |
670 | ||
3d466672 | 671 | gimple *def_stmt = SSA_NAME_DEF_STMT (name); |
cff44c10 | 672 | basic_block def_bb = gimple_bb (def_stmt); |
0732f75f | 673 | |
cff44c10 | 674 | if (def_bb == NULL) |
0732f75f JL |
675 | return; |
676 | ||
081fdda6 JL |
677 | /* We allow the SSA chain to contains PHIs and simple copies and constant |
678 | initializations. */ | |
da9a8da8 | 679 | if (gimple_code (def_stmt) != GIMPLE_PHI |
081fdda6 JL |
680 | && gimple_code (def_stmt) != GIMPLE_ASSIGN) |
681 | return; | |
682 | ||
683 | if (gimple_code (def_stmt) == GIMPLE_PHI | |
684 | && (gimple_phi_num_args (def_stmt) | |
028d4092 | 685 | >= (unsigned) param_fsm_maximum_phi_arguments)) |
0732f75f JL |
686 | return; |
687 | ||
435fd407 JL |
688 | if (is_gimple_assign (def_stmt) |
689 | && ! handle_assignment_p (def_stmt)) | |
081fdda6 JL |
690 | return; |
691 | ||
0732f75f | 692 | /* Avoid infinite recursion. */ |
cde30fe0 | 693 | if (m_visited_bbs.add (def_bb)) |
0732f75f JL |
694 | return; |
695 | ||
0732f75f | 696 | int next_path_length = 0; |
cde30fe0 | 697 | basic_block last_bb_in_path = m_path.last (); |
0732f75f | 698 | |
081fdda6 | 699 | if (loop_containing_stmt (def_stmt)->header == gimple_bb (def_stmt)) |
0732f75f JL |
700 | { |
701 | /* Do not walk through more than one loop PHI node. */ | |
cde30fe0 | 702 | if (m_seen_loop_phi) |
0732f75f | 703 | return; |
cde30fe0 | 704 | m_seen_loop_phi = true; |
0732f75f JL |
705 | } |
706 | ||
707 | /* Following the chain of SSA_NAME definitions, we jumped from a definition in | |
cff44c10 AH |
708 | LAST_BB_IN_PATH to a definition in DEF_BB. When these basic blocks are |
709 | different, append to PATH the blocks from LAST_BB_IN_PATH to DEF_BB. */ | |
710 | if (def_bb != last_bb_in_path) | |
0732f75f | 711 | { |
cff44c10 | 712 | /* When DEF_BB == LAST_BB_IN_PATH, then the first block in the path |
c67e5194 JL |
713 | will already be in VISITED_BBS. When they are not equal, then we |
714 | must ensure that first block is accounted for to ensure we do not | |
715 | create bogus jump threading paths. */ | |
cde30fe0 | 716 | m_visited_bbs.add (m_path[0]); |
cff44c10 | 717 | if (!check_subpath_and_update_thread_path (last_bb_in_path, def_bb, |
435fd407 JL |
718 | &next_path_length)) |
719 | return; | |
0732f75f JL |
720 | } |
721 | ||
cde30fe0 | 722 | gcc_assert (m_path.last () == def_bb); |
0732f75f | 723 | |
081fdda6 | 724 | if (gimple_code (def_stmt) == GIMPLE_PHI) |
cde30fe0 | 725 | handle_phi (as_a <gphi *> (def_stmt), name, def_bb); |
081fdda6 | 726 | else if (gimple_code (def_stmt) == GIMPLE_ASSIGN) |
cde30fe0 | 727 | handle_assignment (def_stmt, name, def_bb); |
0732f75f JL |
728 | |
729 | /* Remove all the nodes that we added from NEXT_PATH. */ | |
730 | if (next_path_length) | |
cde30fe0 | 731 | m_path.truncate (m_path.length () - next_path_length); |
0732f75f JL |
732 | } |
733 | ||
734 | /* Search backwards from BB looking for paths where NAME (an SSA_NAME) | |
735 | is a constant. Record such paths for jump threading. | |
736 | ||
737 | It is assumed that BB ends with a control statement and that by | |
b720e919 | 738 | finding a path where NAME is a constant, we can thread the path. |
ddc1759a | 739 | SPEED_P indicates that we could increase code size to improve the |
cff44c10 | 740 | code path. */ |
0732f75f | 741 | |
cde30fe0 AH |
742 | void |
743 | thread_jumps::find_jump_threads_backwards (basic_block bb, bool speed_p) | |
0732f75f | 744 | { |
8b2ef235 | 745 | gimple *stmt = get_gimple_control_stmt (bb); |
21f0717a JL |
746 | if (!stmt) |
747 | return; | |
748 | ||
749 | enum gimple_code code = gimple_code (stmt); | |
750 | tree name = NULL; | |
751 | if (code == GIMPLE_SWITCH) | |
752 | name = gimple_switch_index (as_a <gswitch *> (stmt)); | |
753 | else if (code == GIMPLE_GOTO) | |
754 | name = gimple_goto_dest (stmt); | |
755 | else if (code == GIMPLE_COND) | |
756 | { | |
757 | if (TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME | |
435fd407 | 758 | && TREE_CODE_CLASS (TREE_CODE (gimple_cond_rhs (stmt))) == tcc_constant |
21f0717a JL |
759 | && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt))) |
760 | || POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt))))) | |
761 | name = gimple_cond_lhs (stmt); | |
762 | } | |
763 | ||
764 | if (!name || TREE_CODE (name) != SSA_NAME) | |
765 | return; | |
766 | ||
cde30fe0 AH |
767 | /* Initialize pass local data that's different for each BB. */ |
768 | m_path.truncate (0); | |
769 | m_path.safe_push (bb); | |
770 | m_visited_bbs.empty (); | |
771 | m_seen_loop_phi = false; | |
772 | m_speed_p = speed_p; | |
028d4092 | 773 | m_max_threaded_paths = param_max_fsm_thread_paths; |
0732f75f | 774 | |
cde30fe0 | 775 | fsm_find_control_statement_thread_paths (name); |
21f0717a | 776 | } |
8b2ef235 JL |
777 | |
778 | namespace { | |
779 | ||
780 | const pass_data pass_data_thread_jumps = | |
781 | { | |
782 | GIMPLE_PASS, | |
783 | "thread", | |
784 | OPTGROUP_NONE, | |
785 | TV_TREE_SSA_THREAD_JUMPS, | |
786 | ( PROP_cfg | PROP_ssa ), | |
787 | 0, | |
788 | 0, | |
789 | 0, | |
bb7ebad1 | 790 | TODO_update_ssa, |
8b2ef235 JL |
791 | }; |
792 | ||
793 | class pass_thread_jumps : public gimple_opt_pass | |
794 | { | |
795 | public: | |
796 | pass_thread_jumps (gcc::context *ctxt) | |
797 | : gimple_opt_pass (pass_data_thread_jumps, ctxt) | |
798 | {} | |
799 | ||
800 | opt_pass * clone (void) { return new pass_thread_jumps (m_ctxt); } | |
801 | virtual bool gate (function *); | |
802 | virtual unsigned int execute (function *); | |
803 | }; | |
804 | ||
805 | bool | |
806 | pass_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED) | |
807 | { | |
27bddc4a | 808 | return flag_expensive_optimizations; |
8b2ef235 JL |
809 | } |
810 | ||
811 | ||
812 | unsigned int | |
813 | pass_thread_jumps::execute (function *fun) | |
814 | { | |
bb7ebad1 RB |
815 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES); |
816 | ||
8b2ef235 | 817 | /* Try to thread each block with more than one successor. */ |
cde30fe0 | 818 | thread_jumps threader; |
8b2ef235 JL |
819 | basic_block bb; |
820 | FOR_EACH_BB_FN (bb, fun) | |
821 | { | |
822 | if (EDGE_COUNT (bb->succs) > 1) | |
cde30fe0 | 823 | threader.find_jump_threads_backwards (bb, true); |
8b2ef235 | 824 | } |
bb7ebad1 RB |
825 | bool changed = thread_through_all_blocks (true); |
826 | ||
827 | loop_optimizer_finalize (); | |
828 | return changed ? TODO_cleanup_cfg : 0; | |
8b2ef235 JL |
829 | } |
830 | ||
831 | } | |
832 | ||
833 | gimple_opt_pass * | |
834 | make_pass_thread_jumps (gcc::context *ctxt) | |
835 | { | |
836 | return new pass_thread_jumps (ctxt); | |
837 | } | |
b720e919 JH |
838 | |
839 | namespace { | |
840 | ||
841 | const pass_data pass_data_early_thread_jumps = | |
842 | { | |
843 | GIMPLE_PASS, | |
844 | "ethread", | |
845 | OPTGROUP_NONE, | |
846 | TV_TREE_SSA_THREAD_JUMPS, | |
847 | ( PROP_cfg | PROP_ssa ), | |
848 | 0, | |
849 | 0, | |
850 | 0, | |
851 | ( TODO_cleanup_cfg | TODO_update_ssa ), | |
852 | }; | |
853 | ||
854 | class pass_early_thread_jumps : public gimple_opt_pass | |
855 | { | |
856 | public: | |
857 | pass_early_thread_jumps (gcc::context *ctxt) | |
858 | : gimple_opt_pass (pass_data_early_thread_jumps, ctxt) | |
859 | {} | |
860 | ||
861 | opt_pass * clone (void) { return new pass_early_thread_jumps (m_ctxt); } | |
862 | virtual bool gate (function *); | |
863 | virtual unsigned int execute (function *); | |
864 | }; | |
865 | ||
866 | bool | |
867 | pass_early_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED) | |
868 | { | |
869 | return true; | |
870 | } | |
871 | ||
872 | ||
873 | unsigned int | |
874 | pass_early_thread_jumps::execute (function *fun) | |
875 | { | |
108fdd6d RB |
876 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); |
877 | ||
b720e919 | 878 | /* Try to thread each block with more than one successor. */ |
cde30fe0 | 879 | thread_jumps threader; |
b720e919 JH |
880 | basic_block bb; |
881 | FOR_EACH_BB_FN (bb, fun) | |
882 | { | |
883 | if (EDGE_COUNT (bb->succs) > 1) | |
cde30fe0 | 884 | threader.find_jump_threads_backwards (bb, false); |
b720e919 JH |
885 | } |
886 | thread_through_all_blocks (true); | |
108fdd6d RB |
887 | |
888 | loop_optimizer_finalize (); | |
b720e919 JH |
889 | return 0; |
890 | } | |
891 | ||
892 | } | |
893 | ||
894 | gimple_opt_pass * | |
895 | make_pass_early_thread_jumps (gcc::context *ctxt) | |
896 | { | |
897 | return new pass_early_thread_jumps (ctxt); | |
898 | } |