]>
Commit | Line | Data |
---|---|---|
a7e5372d | 1 | /* Loop invariant motion. |
e42febca | 2 | Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. |
a7e5372d ZD |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by the | |
8 | Free Software Foundation; either version 2, or (at your option) any | |
9 | later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING. If not, write to the Free | |
18 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
19 | 02111-1307, USA. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "tm.h" | |
25 | #include "tree.h" | |
26 | #include "rtl.h" | |
27 | #include "tm_p.h" | |
28 | #include "hard-reg-set.h" | |
29 | #include "basic-block.h" | |
30 | #include "output.h" | |
31 | #include "diagnostic.h" | |
32 | #include "tree-flow.h" | |
33 | #include "tree-dump.h" | |
34 | #include "timevar.h" | |
35 | #include "cfgloop.h" | |
36 | #include "domwalk.h" | |
37 | #include "params.h" | |
38 | #include "tree-pass.h" | |
39 | #include "flags.h" | |
37cca405 | 40 | #include "real.h" |
a7e5372d | 41 | |
f10a6654 ZD |
42 | /* TODO: Support for predicated code motion. I.e. |
43 | ||
44 | while (1) | |
45 | { | |
46 | if (cond) | |
47 | { | |
48 | a = inv; | |
49 | something; | |
50 | } | |
51 | } | |
52 | ||
53 | Where COND and INV are is invariants, but evaluating INV may trap or be | |
54 | invalid from some other reason if !COND. This may be transformed to | |
55 | ||
56 | if (cond) | |
57 | a = inv; | |
58 | while (1) | |
59 | { | |
60 | if (cond) | |
61 | something; | |
62 | } */ | |
63 | ||
a7e5372d ZD |
64 | /* A type for the list of statements that have to be moved in order to be able |
65 | to hoist an invariant computation. */ | |
66 | ||
67 | struct depend | |
68 | { | |
69 | tree stmt; | |
70 | struct depend *next; | |
71 | }; | |
72 | ||
a7e5372d ZD |
73 | /* The auxiliary data kept for each statement. */ |
74 | ||
75 | struct lim_aux_data | |
76 | { | |
77 | struct loop *max_loop; /* The outermost loop in that the statement | |
78 | is invariant. */ | |
79 | ||
80 | struct loop *tgt_loop; /* The loop out of that we want to move the | |
81 | invariant. */ | |
82 | ||
83 | struct loop *always_executed_in; | |
84 | /* The outermost loop for that we are sure | |
85 | the statement is executed if the loop | |
86 | is entered. */ | |
87 | ||
88 | bool sm_done; /* True iff the store motion for a memory | |
89 | reference in the statement has already | |
90 | been executed. */ | |
91 | ||
92 | unsigned cost; /* Cost of the computation performed by the | |
93 | statement. */ | |
94 | ||
95 | struct depend *depends; /* List of statements that must be also hoisted | |
96 | out of the loop when this statement is | |
97 | hoisted; i.e. those that define the operands | |
98 | of the statement and are inside of the | |
99 | MAX_LOOP loop. */ | |
100 | }; | |
101 | ||
30d396e3 ZD |
102 | #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \ |
103 | ? NULL \ | |
104 | : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux)) | |
a7e5372d ZD |
105 | |
106 | /* Description of a memory reference for store motion. */ | |
107 | ||
108 | struct mem_ref | |
109 | { | |
110 | tree *ref; /* The reference itself. */ | |
111 | tree stmt; /* The statement in that it occurs. */ | |
112 | struct mem_ref *next; /* Next use in the chain. */ | |
113 | }; | |
114 | ||
115 | /* Minimum cost of an expensive expression. */ | |
116 | #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE)) | |
117 | ||
118 | /* The outermost loop for that execution of the header guarantees that the | |
119 | block will be executed. */ | |
120 | #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux) | |
121 | ||
30d396e3 ZD |
122 | static unsigned max_stmt_uid; /* Maximal uid of a statement. Uids to phi |
123 | nodes are assigned using the versions of | |
124 | ssa names they define. */ | |
a7e5372d | 125 | |
30d396e3 ZD |
126 | /* Returns uid of statement STMT. */ |
127 | ||
128 | static unsigned | |
129 | get_stmt_uid (tree stmt) | |
130 | { | |
131 | if (TREE_CODE (stmt) == PHI_NODE) | |
132 | return SSA_NAME_VERSION (PHI_RESULT (stmt)) + max_stmt_uid; | |
133 | ||
134 | return stmt_ann (stmt)->uid; | |
135 | } | |
a7e5372d ZD |
136 | |
137 | /* Calls CBCK for each index in memory reference ADDR_P. There are two | |
138 | kinds situations handled; in each of these cases, the memory reference | |
139 | and DATA are passed to the callback: | |
140 | ||
141 | Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also | |
142 | pass the pointer to the index to the callback. | |
143 | ||
144 | Pointer dereference: INDIRECT_REF (addr). In this case we also pass the | |
145 | pointer to addr to the callback. | |
146 | ||
147 | If the callback returns false, the whole search stops and false is returned. | |
148 | Otherwise the function returns true after traversing through the whole | |
149 | reference *ADDR_P. */ | |
150 | ||
151 | bool | |
152 | for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data) | |
153 | { | |
be35cf60 | 154 | tree *nxt, *idx; |
a7e5372d ZD |
155 | |
156 | for (; ; addr_p = nxt) | |
157 | { | |
158 | switch (TREE_CODE (*addr_p)) | |
159 | { | |
160 | case SSA_NAME: | |
161 | return cbck (*addr_p, addr_p, data); | |
162 | ||
7ccf35ed DN |
163 | case MISALIGNED_INDIRECT_REF: |
164 | case ALIGN_INDIRECT_REF: | |
a7e5372d ZD |
165 | case INDIRECT_REF: |
166 | nxt = &TREE_OPERAND (*addr_p, 0); | |
167 | return cbck (*addr_p, nxt, data); | |
168 | ||
169 | case BIT_FIELD_REF: | |
a7e5372d ZD |
170 | case VIEW_CONVERT_EXPR: |
171 | case ARRAY_RANGE_REF: | |
8b11a64c ZD |
172 | case REALPART_EXPR: |
173 | case IMAGPART_EXPR: | |
a7e5372d ZD |
174 | nxt = &TREE_OPERAND (*addr_p, 0); |
175 | break; | |
176 | ||
be35cf60 ZD |
177 | case COMPONENT_REF: |
178 | /* If the component has varying offset, it behaves like index | |
179 | as well. */ | |
180 | idx = &TREE_OPERAND (*addr_p, 2); | |
181 | if (*idx | |
182 | && !cbck (*addr_p, idx, data)) | |
183 | return false; | |
184 | ||
185 | nxt = &TREE_OPERAND (*addr_p, 0); | |
186 | break; | |
187 | ||
a7e5372d ZD |
188 | case ARRAY_REF: |
189 | nxt = &TREE_OPERAND (*addr_p, 0); | |
190 | if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data)) | |
191 | return false; | |
192 | break; | |
193 | ||
194 | case VAR_DECL: | |
195 | case PARM_DECL: | |
196 | case STRING_CST: | |
197 | case RESULT_DECL: | |
198 | return true; | |
199 | ||
200 | default: | |
1e128c5f | 201 | gcc_unreachable (); |
a7e5372d ZD |
202 | } |
203 | } | |
204 | } | |
205 | ||
206 | /* If it is possible to hoist the statement STMT unconditionally, | |
207 | returns MOVE_POSSIBLE. | |
208 | If it is possible to hoist the statement STMT, but we must avoid making | |
209 | it executed if it would not be executed in the original program (e.g. | |
210 | because it may trap), return MOVE_PRESERVE_EXECUTION. | |
211 | Otherwise return MOVE_IMPOSSIBLE. */ | |
212 | ||
40923b20 | 213 | enum move_pos |
a7e5372d ZD |
214 | movement_possibility (tree stmt) |
215 | { | |
216 | tree lhs, rhs; | |
217 | ||
218 | if (flag_unswitch_loops | |
219 | && TREE_CODE (stmt) == COND_EXPR) | |
220 | { | |
221 | /* If we perform unswitching, force the operands of the invariant | |
222 | condition to be moved out of the loop. */ | |
a7e5372d ZD |
223 | return MOVE_POSSIBLE; |
224 | } | |
225 | ||
226 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
227 | return MOVE_IMPOSSIBLE; | |
228 | ||
229 | if (stmt_ends_bb_p (stmt)) | |
230 | return MOVE_IMPOSSIBLE; | |
231 | ||
a7e5372d ZD |
232 | if (stmt_ann (stmt)->has_volatile_ops) |
233 | return MOVE_IMPOSSIBLE; | |
234 | ||
235 | lhs = TREE_OPERAND (stmt, 0); | |
236 | if (TREE_CODE (lhs) == SSA_NAME | |
237 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) | |
238 | return MOVE_IMPOSSIBLE; | |
239 | ||
240 | rhs = TREE_OPERAND (stmt, 1); | |
241 | ||
242 | if (TREE_SIDE_EFFECTS (rhs)) | |
243 | return MOVE_IMPOSSIBLE; | |
244 | ||
245 | if (TREE_CODE (lhs) != SSA_NAME | |
246 | || tree_could_trap_p (rhs)) | |
247 | return MOVE_PRESERVE_EXECUTION; | |
248 | ||
f10a6654 ZD |
249 | if (get_call_expr_in (stmt)) |
250 | { | |
251 | /* While pure or const call is guaranteed to have no side effects, we | |
252 | cannot move it arbitrarily. Consider code like | |
253 | ||
254 | char *s = something (); | |
255 | ||
256 | while (1) | |
257 | { | |
258 | if (s) | |
259 | t = strlen (s); | |
260 | else | |
261 | t = 0; | |
262 | } | |
263 | ||
264 | Here the strlen call cannot be moved out of the loop, even though | |
265 | s is invariant. In addition to possibly creating a call with | |
266 | invalid arguments, moving out a function call that is not executed | |
267 | may cause performance regressions in case the call is costly and | |
268 | not executed at all. */ | |
269 | return MOVE_PRESERVE_EXECUTION; | |
270 | } | |
a7e5372d ZD |
271 | return MOVE_POSSIBLE; |
272 | } | |
273 | ||
274 | /* Suppose that operand DEF is used inside the LOOP. Returns the outermost | |
2a7e31df | 275 | loop to that we could move the expression using DEF if it did not have |
a7e5372d ZD |
276 | other operands, i.e. the outermost loop enclosing LOOP in that the value |
277 | of DEF is invariant. */ | |
278 | ||
279 | static struct loop * | |
280 | outermost_invariant_loop (tree def, struct loop *loop) | |
281 | { | |
282 | tree def_stmt; | |
283 | basic_block def_bb; | |
284 | struct loop *max_loop; | |
285 | ||
286 | if (TREE_CODE (def) != SSA_NAME) | |
287 | return superloop_at_depth (loop, 1); | |
288 | ||
289 | def_stmt = SSA_NAME_DEF_STMT (def); | |
290 | def_bb = bb_for_stmt (def_stmt); | |
291 | if (!def_bb) | |
292 | return superloop_at_depth (loop, 1); | |
293 | ||
294 | max_loop = find_common_loop (loop, def_bb->loop_father); | |
295 | ||
296 | if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop) | |
297 | max_loop = find_common_loop (max_loop, | |
298 | LIM_DATA (def_stmt)->max_loop->outer); | |
299 | if (max_loop == loop) | |
300 | return NULL; | |
301 | max_loop = superloop_at_depth (loop, max_loop->depth + 1); | |
302 | ||
303 | return max_loop; | |
304 | } | |
305 | ||
306 | /* Returns the outermost superloop of LOOP in that the expression EXPR is | |
307 | invariant. */ | |
308 | ||
309 | static struct loop * | |
310 | outermost_invariant_loop_expr (tree expr, struct loop *loop) | |
311 | { | |
6615c446 | 312 | enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr)); |
a7e5372d ZD |
313 | unsigned i, nops; |
314 | struct loop *max_loop = superloop_at_depth (loop, 1), *aloop; | |
315 | ||
316 | if (TREE_CODE (expr) == SSA_NAME | |
317 | || TREE_CODE (expr) == INTEGER_CST | |
318 | || is_gimple_min_invariant (expr)) | |
319 | return outermost_invariant_loop (expr, loop); | |
320 | ||
6615c446 JO |
321 | if (class != tcc_unary |
322 | && class != tcc_binary | |
323 | && class != tcc_expression | |
324 | && class != tcc_comparison) | |
a7e5372d ZD |
325 | return NULL; |
326 | ||
54e4aedb | 327 | nops = TREE_CODE_LENGTH (TREE_CODE (expr)); |
a7e5372d ZD |
328 | for (i = 0; i < nops; i++) |
329 | { | |
330 | aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop); | |
331 | if (!aloop) | |
332 | return NULL; | |
333 | ||
334 | if (flow_loop_nested_p (max_loop, aloop)) | |
335 | max_loop = aloop; | |
336 | } | |
337 | ||
338 | return max_loop; | |
339 | } | |
340 | ||
341 | /* DATA is a structure containing information associated with a statement | |
342 | inside LOOP. DEF is one of the operands of this statement. | |
343 | ||
344 | Find the outermost loop enclosing LOOP in that value of DEF is invariant | |
345 | and record this in DATA->max_loop field. If DEF itself is defined inside | |
346 | this loop as well (i.e. we need to hoist it out of the loop if we want | |
347 | to hoist the statement represented by DATA), record the statement in that | |
348 | DEF is defined to the DATA->depends list. Additionally if ADD_COST is true, | |
349 | add the cost of the computation of DEF to the DATA->cost. | |
350 | ||
351 | If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */ | |
352 | ||
353 | static bool | |
354 | add_dependency (tree def, struct lim_aux_data *data, struct loop *loop, | |
355 | bool add_cost) | |
356 | { | |
357 | tree def_stmt = SSA_NAME_DEF_STMT (def); | |
358 | basic_block def_bb = bb_for_stmt (def_stmt); | |
359 | struct loop *max_loop; | |
360 | struct depend *dep; | |
361 | ||
362 | if (!def_bb) | |
363 | return true; | |
364 | ||
365 | max_loop = outermost_invariant_loop (def, loop); | |
366 | if (!max_loop) | |
367 | return false; | |
368 | ||
369 | if (flow_loop_nested_p (data->max_loop, max_loop)) | |
370 | data->max_loop = max_loop; | |
371 | ||
372 | if (!LIM_DATA (def_stmt)) | |
373 | return true; | |
374 | ||
375 | if (add_cost | |
376 | /* Only add the cost if the statement defining DEF is inside LOOP, | |
377 | i.e. if it is likely that by moving the invariants dependent | |
378 | on it, we will be able to avoid creating a new register for | |
379 | it (since it will be only used in these dependent invariants). */ | |
380 | && def_bb->loop_father == loop) | |
381 | data->cost += LIM_DATA (def_stmt)->cost; | |
382 | ||
383 | dep = xmalloc (sizeof (struct depend)); | |
384 | dep->stmt = def_stmt; | |
385 | dep->next = data->depends; | |
386 | data->depends = dep; | |
387 | ||
388 | return true; | |
389 | } | |
390 | ||
391 | /* Returns an estimate for a cost of statement STMT. TODO -- the values here | |
392 | are just ad-hoc constants. The estimates should be based on target-specific | |
393 | values. */ | |
394 | ||
395 | static unsigned | |
396 | stmt_cost (tree stmt) | |
397 | { | |
e2b8bd6c | 398 | tree rhs; |
a7e5372d ZD |
399 | unsigned cost = 1; |
400 | ||
401 | /* Always try to create possibilities for unswitching. */ | |
402 | if (TREE_CODE (stmt) == COND_EXPR) | |
403 | return LIM_EXPENSIVE; | |
404 | ||
a7e5372d ZD |
405 | rhs = TREE_OPERAND (stmt, 1); |
406 | ||
407 | /* Hoisting memory references out should almost surely be a win. */ | |
9beeb4ee | 408 | if (stmt_references_memory_p (stmt)) |
a7e5372d ZD |
409 | cost += 20; |
410 | ||
411 | switch (TREE_CODE (rhs)) | |
412 | { | |
413 | case CALL_EXPR: | |
414 | /* We should be hoisting calls if possible. */ | |
415 | ||
416 | /* Unless the call is a builtin_constant_p; this always folds to a | |
417 | constant, so moving it is useless. */ | |
418 | rhs = get_callee_fndecl (rhs); | |
8c96cd51 | 419 | if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL |
a7e5372d ZD |
420 | && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P) |
421 | return 0; | |
422 | ||
423 | cost += 20; | |
424 | break; | |
425 | ||
426 | case MULT_EXPR: | |
427 | case TRUNC_DIV_EXPR: | |
428 | case CEIL_DIV_EXPR: | |
429 | case FLOOR_DIV_EXPR: | |
430 | case ROUND_DIV_EXPR: | |
431 | case EXACT_DIV_EXPR: | |
432 | case CEIL_MOD_EXPR: | |
433 | case FLOOR_MOD_EXPR: | |
434 | case ROUND_MOD_EXPR: | |
435 | case TRUNC_MOD_EXPR: | |
b4852851 | 436 | case RDIV_EXPR: |
a7e5372d ZD |
437 | /* Division and multiplication are usually expensive. */ |
438 | cost += 20; | |
439 | break; | |
440 | ||
441 | default: | |
442 | break; | |
443 | } | |
444 | ||
445 | return cost; | |
446 | } | |
447 | ||
448 | /* Determine the outermost loop to that it is possible to hoist a statement | |
449 | STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine | |
450 | the outermost loop in that the value computed by STMT is invariant. | |
451 | If MUST_PRESERVE_EXEC is true, additionally choose such a loop that | |
452 | we preserve the fact whether STMT is executed. It also fills other related | |
453 | information to LIM_DATA (STMT). | |
454 | ||
455 | The function returns false if STMT cannot be hoisted outside of the loop it | |
456 | is defined in, and true otherwise. */ | |
457 | ||
458 | static bool | |
459 | determine_max_movement (tree stmt, bool must_preserve_exec) | |
460 | { | |
461 | basic_block bb = bb_for_stmt (stmt); | |
462 | struct loop *loop = bb->loop_father; | |
463 | struct loop *level; | |
464 | struct lim_aux_data *lim_data = LIM_DATA (stmt); | |
4c124b4c AM |
465 | tree val; |
466 | ssa_op_iter iter; | |
a7e5372d ZD |
467 | |
468 | if (must_preserve_exec) | |
469 | level = ALWAYS_EXECUTED_IN (bb); | |
470 | else | |
471 | level = superloop_at_depth (loop, 1); | |
472 | lim_data->max_loop = level; | |
473 | ||
4c124b4c AM |
474 | FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE) |
475 | if (!add_dependency (val, lim_data, loop, true)) | |
a7e5372d ZD |
476 | return false; |
477 | ||
52328bf6 | 478 | FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS) |
4c124b4c | 479 | if (!add_dependency (val, lim_data, loop, false)) |
a7e5372d ZD |
480 | return false; |
481 | ||
482 | lim_data->cost += stmt_cost (stmt); | |
483 | ||
484 | return true; | |
485 | } | |
486 | ||
487 | /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL, | |
488 | and that one of the operands of this statement is computed by STMT. | |
489 | Ensure that STMT (together with all the statements that define its | |
490 | operands) is hoisted at least out of the loop LEVEL. */ | |
491 | ||
492 | static void | |
493 | set_level (tree stmt, struct loop *orig_loop, struct loop *level) | |
494 | { | |
495 | struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father; | |
496 | struct depend *dep; | |
497 | ||
498 | stmt_loop = find_common_loop (orig_loop, stmt_loop); | |
499 | if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop) | |
500 | stmt_loop = find_common_loop (stmt_loop, | |
501 | LIM_DATA (stmt)->tgt_loop->outer); | |
502 | if (flow_loop_nested_p (stmt_loop, level)) | |
503 | return; | |
504 | ||
1e128c5f GB |
505 | gcc_assert (LIM_DATA (stmt)); |
506 | gcc_assert (level == LIM_DATA (stmt)->max_loop | |
507 | || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level)); | |
a7e5372d ZD |
508 | |
509 | LIM_DATA (stmt)->tgt_loop = level; | |
510 | for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next) | |
511 | set_level (dep->stmt, orig_loop, level); | |
512 | } | |
513 | ||
514 | /* Determines an outermost loop from that we want to hoist the statement STMT. | |
515 | For now we chose the outermost possible loop. TODO -- use profiling | |
516 | information to set it more sanely. */ | |
517 | ||
518 | static void | |
519 | set_profitable_level (tree stmt) | |
520 | { | |
521 | set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop); | |
522 | } | |
523 | ||
524 | /* Returns true if STMT is not a pure call. */ | |
525 | ||
526 | static bool | |
527 | nonpure_call_p (tree stmt) | |
528 | { | |
529 | tree call = get_call_expr_in (stmt); | |
530 | ||
531 | if (!call) | |
532 | return false; | |
533 | ||
534 | return TREE_SIDE_EFFECTS (call) != 0; | |
535 | } | |
536 | ||
537 | /* Releases the memory occupied by DATA. */ | |
538 | ||
539 | static void | |
540 | free_lim_aux_data (struct lim_aux_data *data) | |
541 | { | |
542 | struct depend *dep, *next; | |
543 | ||
544 | for (dep = data->depends; dep; dep = next) | |
545 | { | |
546 | next = dep->next; | |
547 | free (dep); | |
548 | } | |
549 | free (data); | |
550 | } | |
551 | ||
552 | /* Determine the outermost loops in that statements in basic block BB are | |
553 | invariant, and record them to the LIM_DATA associated with the statements. | |
554 | Callback for walk_dominator_tree. */ | |
555 | ||
556 | static void | |
557 | determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED, | |
558 | basic_block bb) | |
559 | { | |
560 | enum move_pos pos; | |
561 | block_stmt_iterator bsi; | |
37cca405 | 562 | tree stmt, rhs; |
a7e5372d ZD |
563 | bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL; |
564 | struct loop *outermost = ALWAYS_EXECUTED_IN (bb); | |
565 | ||
566 | if (!bb->loop_father->outer) | |
567 | return; | |
568 | ||
569 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
570 | fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n", | |
571 | bb->index, bb->loop_father->num, bb->loop_father->depth); | |
572 | ||
573 | for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) | |
574 | { | |
575 | stmt = bsi_stmt (bsi); | |
576 | ||
577 | pos = movement_possibility (stmt); | |
578 | if (pos == MOVE_IMPOSSIBLE) | |
579 | { | |
580 | if (nonpure_call_p (stmt)) | |
581 | { | |
582 | maybe_never = true; | |
583 | outermost = NULL; | |
584 | } | |
585 | continue; | |
586 | } | |
587 | ||
37cca405 DE |
588 | /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal |
589 | to be hoisted out of loop, saving expensive divide. */ | |
590 | if (pos == MOVE_POSSIBLE | |
591 | && (rhs = TREE_OPERAND (stmt, 1)) != NULL | |
592 | && TREE_CODE (rhs) == RDIV_EXPR | |
593 | && flag_unsafe_math_optimizations | |
594 | && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1), | |
595 | loop_containing_stmt (stmt)) != NULL | |
596 | && outermost_invariant_loop_expr (rhs, | |
597 | loop_containing_stmt (stmt)) == NULL) | |
598 | { | |
599 | tree lhs, stmt1, stmt2, var, name; | |
600 | ||
601 | lhs = TREE_OPERAND (stmt, 0); | |
602 | ||
603 | /* stmt must be MODIFY_EXPR. */ | |
604 | var = create_tmp_var (TREE_TYPE (rhs), "reciptmp"); | |
605 | add_referenced_tmp_var (var); | |
606 | ||
607 | stmt1 = build2 (MODIFY_EXPR, void_type_node, var, | |
608 | build2 (RDIV_EXPR, TREE_TYPE (rhs), | |
609 | build_real (TREE_TYPE (rhs), dconst1), | |
610 | TREE_OPERAND (rhs, 1))); | |
611 | name = make_ssa_name (var, stmt1); | |
612 | TREE_OPERAND (stmt1, 0) = name; | |
613 | stmt2 = build2 (MODIFY_EXPR, void_type_node, lhs, | |
614 | build2 (MULT_EXPR, TREE_TYPE (rhs), | |
615 | name, TREE_OPERAND (rhs, 0))); | |
616 | ||
617 | /* Replace division stmt with reciprocal and multiply stmts. | |
618 | The multiply stmt is not invariant, so update iterator | |
619 | and avoid rescanning. */ | |
620 | bsi_replace (&bsi, stmt1, true); | |
37cca405 DE |
621 | bsi_insert_after (&bsi, stmt2, BSI_NEW_STMT); |
622 | SSA_NAME_DEF_STMT (lhs) = stmt2; | |
623 | ||
624 | /* Continue processing with invariant reciprocal statment. */ | |
625 | stmt = stmt1; | |
626 | } | |
627 | ||
a7e5372d ZD |
628 | stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data)); |
629 | LIM_DATA (stmt)->always_executed_in = outermost; | |
630 | ||
631 | if (maybe_never && pos == MOVE_PRESERVE_EXECUTION) | |
632 | continue; | |
633 | ||
634 | if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION)) | |
635 | { | |
636 | LIM_DATA (stmt)->max_loop = NULL; | |
637 | continue; | |
638 | } | |
639 | ||
640 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
641 | { | |
642 | print_generic_stmt_indented (dump_file, stmt, 0, 2); | |
643 | fprintf (dump_file, " invariant up to level %d, cost %d.\n\n", | |
644 | LIM_DATA (stmt)->max_loop->depth, | |
645 | LIM_DATA (stmt)->cost); | |
646 | } | |
647 | ||
648 | if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE) | |
649 | set_profitable_level (stmt); | |
650 | } | |
651 | } | |
652 | ||
653 | /* For each statement determines the outermost loop in that it is invariant, | |
654 | statements on whose motion it depends and the cost of the computation. | |
655 | This information is stored to the LIM_DATA structure associated with | |
656 | each statement. */ | |
657 | ||
658 | static void | |
659 | determine_invariantness (void) | |
660 | { | |
661 | struct dom_walk_data walk_data; | |
662 | ||
663 | memset (&walk_data, 0, sizeof (struct dom_walk_data)); | |
664 | walk_data.before_dom_children_before_stmts = determine_invariantness_stmt; | |
665 | ||
666 | init_walk_dominator_tree (&walk_data); | |
667 | walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); | |
668 | fini_walk_dominator_tree (&walk_data); | |
669 | } | |
670 | ||
671 | /* Commits edge insertions and updates loop structures. */ | |
672 | ||
673 | void | |
674 | loop_commit_inserts (void) | |
675 | { | |
676 | unsigned old_last_basic_block, i; | |
677 | basic_block bb; | |
678 | ||
679 | old_last_basic_block = last_basic_block; | |
8e731e4e | 680 | bsi_commit_edge_inserts (); |
a7e5372d ZD |
681 | for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++) |
682 | { | |
683 | bb = BASIC_BLOCK (i); | |
684 | add_bb_to_loop (bb, | |
c5cbcccf ZD |
685 | find_common_loop (single_pred (bb)->loop_father, |
686 | single_succ (bb)->loop_father)); | |
a7e5372d ZD |
687 | } |
688 | } | |
689 | ||
690 | /* Hoist the statements in basic block BB out of the loops prescribed by | |
2a7e31df | 691 | data stored in LIM_DATA structures associated with each statement. Callback |
a7e5372d ZD |
692 | for walk_dominator_tree. */ |
693 | ||
694 | static void | |
695 | move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED, | |
696 | basic_block bb) | |
697 | { | |
698 | struct loop *level; | |
699 | block_stmt_iterator bsi; | |
700 | tree stmt; | |
701 | unsigned cost = 0; | |
702 | ||
703 | if (!bb->loop_father->outer) | |
704 | return; | |
705 | ||
706 | for (bsi = bsi_start (bb); !bsi_end_p (bsi); ) | |
707 | { | |
708 | stmt = bsi_stmt (bsi); | |
709 | ||
710 | if (!LIM_DATA (stmt)) | |
711 | { | |
712 | bsi_next (&bsi); | |
713 | continue; | |
714 | } | |
715 | ||
716 | cost = LIM_DATA (stmt)->cost; | |
717 | level = LIM_DATA (stmt)->tgt_loop; | |
718 | free_lim_aux_data (LIM_DATA (stmt)); | |
719 | stmt_ann (stmt)->common.aux = NULL; | |
720 | ||
721 | if (!level) | |
722 | { | |
723 | bsi_next (&bsi); | |
724 | continue; | |
725 | } | |
726 | ||
727 | /* We do not really want to move conditionals out of the loop; we just | |
728 | placed it here to force its operands to be moved if necessary. */ | |
729 | if (TREE_CODE (stmt) == COND_EXPR) | |
730 | continue; | |
731 | ||
732 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
733 | { | |
734 | fprintf (dump_file, "Moving statement\n"); | |
735 | print_generic_stmt (dump_file, stmt, 0); | |
736 | fprintf (dump_file, "(cost %u) out of loop %d.\n\n", | |
737 | cost, level->num); | |
738 | } | |
739 | bsi_insert_on_edge (loop_preheader_edge (level), stmt); | |
740 | bsi_remove (&bsi); | |
741 | } | |
742 | } | |
743 | ||
744 | /* Hoist the statements out of the loops prescribed by data stored in | |
2a7e31df | 745 | LIM_DATA structures associated with each statement.*/ |
a7e5372d ZD |
746 | |
747 | static void | |
748 | move_computations (void) | |
749 | { | |
750 | struct dom_walk_data walk_data; | |
751 | ||
752 | memset (&walk_data, 0, sizeof (struct dom_walk_data)); | |
753 | walk_data.before_dom_children_before_stmts = move_computations_stmt; | |
754 | ||
755 | init_walk_dominator_tree (&walk_data); | |
756 | walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); | |
757 | fini_walk_dominator_tree (&walk_data); | |
758 | ||
759 | loop_commit_inserts (); | |
0bca51f0 DN |
760 | |
761 | if (need_ssa_update_p ()) | |
762 | update_ssa (TODO_update_ssa); | |
763 | ||
764 | /* The movement of LI code may cause violation of loop closed SSA | |
765 | form invariants. TODO -- avoid these rewrites completely. | |
766 | Information in virtual phi nodes is sufficient for it. */ | |
767 | rewrite_into_loop_closed_ssa (NULL); | |
a7e5372d ZD |
768 | } |
769 | ||
770 | /* Checks whether the statement defining variable *INDEX can be hoisted | |
771 | out of the loop passed in DATA. Callback for for_each_index. */ | |
772 | ||
773 | static bool | |
774 | may_move_till (tree ref, tree *index, void *data) | |
775 | { | |
776 | struct loop *loop = data, *max_loop; | |
777 | ||
778 | /* If REF is an array reference, check also that the step and the lower | |
779 | bound is invariant in LOOP. */ | |
780 | if (TREE_CODE (ref) == ARRAY_REF) | |
781 | { | |
782 | tree step = array_ref_element_size (ref); | |
783 | tree lbound = array_ref_low_bound (ref); | |
784 | ||
785 | max_loop = outermost_invariant_loop_expr (step, loop); | |
786 | if (!max_loop) | |
787 | return false; | |
788 | ||
789 | max_loop = outermost_invariant_loop_expr (lbound, loop); | |
790 | if (!max_loop) | |
791 | return false; | |
792 | } | |
793 | ||
794 | max_loop = outermost_invariant_loop (*index, loop); | |
795 | if (!max_loop) | |
796 | return false; | |
797 | ||
798 | return true; | |
799 | } | |
800 | ||
2a7e31df | 801 | /* Forces statements defining (invariant) SSA names in expression EXPR to be |
b4042a03 | 802 | moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */ |
a7e5372d ZD |
803 | |
804 | static void | |
b4042a03 | 805 | force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop) |
a7e5372d | 806 | { |
6615c446 | 807 | enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr)); |
a7e5372d ZD |
808 | unsigned i, nops; |
809 | ||
810 | if (TREE_CODE (expr) == SSA_NAME) | |
811 | { | |
812 | tree stmt = SSA_NAME_DEF_STMT (expr); | |
813 | if (IS_EMPTY_STMT (stmt)) | |
814 | return; | |
815 | ||
b4042a03 | 816 | set_level (stmt, orig_loop, loop); |
a7e5372d ZD |
817 | return; |
818 | } | |
819 | ||
6615c446 JO |
820 | if (class != tcc_unary |
821 | && class != tcc_binary | |
822 | && class != tcc_expression | |
823 | && class != tcc_comparison) | |
a7e5372d ZD |
824 | return; |
825 | ||
54e4aedb | 826 | nops = TREE_CODE_LENGTH (TREE_CODE (expr)); |
a7e5372d | 827 | for (i = 0; i < nops; i++) |
b4042a03 | 828 | force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop); |
a7e5372d ZD |
829 | } |
830 | ||
831 | /* Forces statement defining invariants in REF (and *INDEX) to be moved out of | |
b4042a03 ZD |
832 | the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for |
833 | for_each_index. */ | |
834 | ||
835 | struct fmt_data | |
836 | { | |
837 | struct loop *loop; | |
838 | struct loop *orig_loop; | |
839 | }; | |
a7e5372d ZD |
840 | |
841 | static bool | |
842 | force_move_till (tree ref, tree *index, void *data) | |
843 | { | |
844 | tree stmt; | |
b4042a03 | 845 | struct fmt_data *fmt_data = data; |
a7e5372d ZD |
846 | |
847 | if (TREE_CODE (ref) == ARRAY_REF) | |
848 | { | |
849 | tree step = array_ref_element_size (ref); | |
850 | tree lbound = array_ref_low_bound (ref); | |
851 | ||
b4042a03 ZD |
852 | force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop); |
853 | force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop); | |
a7e5372d ZD |
854 | } |
855 | ||
856 | if (TREE_CODE (*index) != SSA_NAME) | |
857 | return true; | |
858 | ||
859 | stmt = SSA_NAME_DEF_STMT (*index); | |
860 | if (IS_EMPTY_STMT (stmt)) | |
861 | return true; | |
862 | ||
b4042a03 | 863 | set_level (stmt, fmt_data->orig_loop, fmt_data->loop); |
a7e5372d ZD |
864 | |
865 | return true; | |
866 | } | |
867 | ||
868 | /* Records memory reference *REF (that occurs in statement STMT) | |
869 | to the list MEM_REFS. */ | |
870 | ||
871 | static void | |
872 | record_mem_ref (struct mem_ref **mem_refs, tree stmt, tree *ref) | |
873 | { | |
874 | struct mem_ref *aref = xmalloc (sizeof (struct mem_ref)); | |
875 | ||
876 | aref->stmt = stmt; | |
877 | aref->ref = ref; | |
878 | ||
879 | aref->next = *mem_refs; | |
880 | *mem_refs = aref; | |
881 | } | |
882 | ||
883 | /* Releases list of memory references MEM_REFS. */ | |
884 | ||
885 | static void | |
886 | free_mem_refs (struct mem_ref *mem_refs) | |
887 | { | |
888 | struct mem_ref *act; | |
889 | ||
890 | while (mem_refs) | |
891 | { | |
892 | act = mem_refs; | |
893 | mem_refs = mem_refs->next; | |
894 | free (act); | |
895 | } | |
896 | } | |
897 | ||
898 | /* If VAR is defined in LOOP and the statement it is defined in does not belong | |
899 | to the set SEEN, add the statement to QUEUE of length IN_QUEUE and | |
900 | to the set SEEN. */ | |
901 | ||
902 | static void | |
903 | maybe_queue_var (tree var, struct loop *loop, | |
904 | sbitmap seen, tree *queue, unsigned *in_queue) | |
905 | { | |
906 | tree stmt = SSA_NAME_DEF_STMT (var); | |
907 | basic_block def_bb = bb_for_stmt (stmt); | |
908 | ||
909 | if (!def_bb | |
910 | || !flow_bb_inside_loop_p (loop, def_bb) | |
30d396e3 | 911 | || TEST_BIT (seen, get_stmt_uid (stmt))) |
a7e5372d ZD |
912 | return; |
913 | ||
30d396e3 | 914 | SET_BIT (seen, get_stmt_uid (stmt)); |
a7e5372d ZD |
915 | queue[(*in_queue)++] = stmt; |
916 | } | |
917 | ||
a3631d97 ZD |
918 | /* If COMMON_REF is NULL, set COMMON_REF to *OP and return true. |
919 | Otherwise return true if the memory reference *OP is equal to COMMON_REF. | |
920 | Record the reference OP to list MEM_REFS. STMT is the statement in that | |
921 | the reference occurs. */ | |
922 | ||
923 | struct sra_data | |
924 | { | |
925 | struct mem_ref **mem_refs; | |
926 | tree common_ref; | |
927 | tree stmt; | |
928 | }; | |
929 | ||
930 | static bool | |
931 | fem_single_reachable_address (tree *op, void *data) | |
932 | { | |
933 | struct sra_data *sra_data = data; | |
934 | ||
935 | if (sra_data->common_ref | |
936 | && !operand_equal_p (*op, sra_data->common_ref, 0)) | |
937 | return false; | |
938 | sra_data->common_ref = *op; | |
939 | ||
940 | record_mem_ref (sra_data->mem_refs, sra_data->stmt, op); | |
941 | return true; | |
942 | } | |
943 | ||
944 | /* Runs CALLBACK for each operand of STMT that is a memory reference. DATA | |
945 | is passed to the CALLBACK as well. The traversal stops if CALLBACK | |
946 | returns false, for_each_memref then returns false as well. Otherwise | |
947 | for_each_memref returns true. */ | |
948 | ||
949 | static bool | |
950 | for_each_memref (tree stmt, bool (*callback)(tree *, void *), void *data) | |
951 | { | |
952 | tree *op; | |
953 | ||
954 | if (TREE_CODE (stmt) == RETURN_EXPR) | |
955 | stmt = TREE_OPERAND (stmt, 1); | |
956 | ||
957 | if (TREE_CODE (stmt) == MODIFY_EXPR) | |
958 | { | |
959 | op = &TREE_OPERAND (stmt, 0); | |
960 | if (TREE_CODE (*op) != SSA_NAME | |
961 | && !callback (op, data)) | |
962 | return false; | |
963 | ||
964 | op = &TREE_OPERAND (stmt, 1); | |
965 | if (TREE_CODE (*op) != SSA_NAME | |
966 | && is_gimple_lvalue (*op) | |
967 | && !callback (op, data)) | |
968 | return false; | |
969 | ||
970 | stmt = TREE_OPERAND (stmt, 1); | |
971 | } | |
972 | ||
973 | if (TREE_CODE (stmt) == WITH_SIZE_EXPR) | |
974 | stmt = TREE_OPERAND (stmt, 0); | |
975 | ||
976 | if (TREE_CODE (stmt) == CALL_EXPR) | |
977 | { | |
978 | tree args; | |
979 | ||
980 | for (args = TREE_OPERAND (stmt, 1); args; args = TREE_CHAIN (args)) | |
981 | { | |
982 | op = &TREE_VALUE (args); | |
983 | ||
984 | if (TREE_CODE (*op) != SSA_NAME | |
985 | && is_gimple_lvalue (*op) | |
986 | && !callback (op, data)) | |
987 | return false; | |
988 | } | |
989 | } | |
990 | ||
991 | return true; | |
992 | } | |
993 | ||
a7e5372d ZD |
994 | /* Determine whether all memory references inside the LOOP that correspond |
995 | to virtual ssa names defined in statement STMT are equal. | |
996 | If so, store the list of the references to MEM_REFS, and return one | |
a3631d97 ZD |
997 | of them. Otherwise store NULL to MEM_REFS and return NULL_TREE. |
998 | *SEEN_CALL_STMT is set to true if the virtual operands suggest | |
999 | that the reference might be clobbered by a call inside the LOOP. */ | |
a7e5372d ZD |
1000 | |
1001 | static tree | |
1002 | single_reachable_address (struct loop *loop, tree stmt, | |
a3631d97 ZD |
1003 | struct mem_ref **mem_refs, |
1004 | bool *seen_call_stmt) | |
a7e5372d | 1005 | { |
30d396e3 | 1006 | unsigned max_uid = max_stmt_uid + num_ssa_names; |
a7e5372d ZD |
1007 | tree *queue = xmalloc (sizeof (tree) * max_uid); |
1008 | sbitmap seen = sbitmap_alloc (max_uid); | |
a7e5372d | 1009 | unsigned in_queue = 1; |
f430bae8 | 1010 | unsigned i; |
a3631d97 ZD |
1011 | struct sra_data sra_data; |
1012 | tree call; | |
4c124b4c AM |
1013 | tree val; |
1014 | ssa_op_iter iter; | |
f430bae8 AM |
1015 | imm_use_iterator imm_iter; |
1016 | use_operand_p use_p; | |
a7e5372d ZD |
1017 | |
1018 | sbitmap_zero (seen); | |
1019 | ||
1020 | *mem_refs = NULL; | |
a3631d97 ZD |
1021 | sra_data.mem_refs = mem_refs; |
1022 | sra_data.common_ref = NULL_TREE; | |
a7e5372d ZD |
1023 | |
1024 | queue[0] = stmt; | |
30d396e3 | 1025 | SET_BIT (seen, get_stmt_uid (stmt)); |
a3631d97 | 1026 | *seen_call_stmt = false; |
a7e5372d ZD |
1027 | |
1028 | while (in_queue) | |
1029 | { | |
1030 | stmt = queue[--in_queue]; | |
a3631d97 | 1031 | sra_data.stmt = stmt; |
a7e5372d ZD |
1032 | |
1033 | if (LIM_DATA (stmt) | |
1034 | && LIM_DATA (stmt)->sm_done) | |
1035 | goto fail; | |
1036 | ||
1037 | switch (TREE_CODE (stmt)) | |
1038 | { | |
1039 | case MODIFY_EXPR: | |
a3631d97 ZD |
1040 | case CALL_EXPR: |
1041 | case RETURN_EXPR: | |
1042 | if (!for_each_memref (stmt, fem_single_reachable_address, | |
1043 | &sra_data)) | |
a7e5372d | 1044 | goto fail; |
a7e5372d | 1045 | |
a3631d97 ZD |
1046 | /* If this is a function that may depend on the memory location, |
1047 | record the fact. We cannot directly refuse call clobbered | |
1048 | operands here, since sra_data.common_ref did not have | |
1049 | to be set yet. */ | |
1050 | call = get_call_expr_in (stmt); | |
1051 | if (call | |
1052 | && !(call_expr_flags (call) & ECF_CONST)) | |
1053 | *seen_call_stmt = true; | |
a7e5372d ZD |
1054 | |
1055 | /* Traverse also definitions of the VUSES (there may be other | |
1056 | distinct from the one we used to get to this statement). */ | |
4c124b4c AM |
1057 | FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES) |
1058 | maybe_queue_var (val, loop, seen, queue, &in_queue); | |
a7e5372d | 1059 | |
a7e5372d ZD |
1060 | break; |
1061 | ||
1062 | case PHI_NODE: | |
1063 | for (i = 0; i < (unsigned) PHI_NUM_ARGS (stmt); i++) | |
ee1f0fb0 DN |
1064 | if (TREE_CODE (PHI_ARG_DEF (stmt, i)) == SSA_NAME) |
1065 | maybe_queue_var (PHI_ARG_DEF (stmt, i), loop, | |
1066 | seen, queue, &in_queue); | |
a7e5372d ZD |
1067 | break; |
1068 | ||
1069 | default: | |
1070 | goto fail; | |
1071 | } | |
1072 | ||
1073 | /* Find uses of virtual names. */ | |
f430bae8 AM |
1074 | if (TREE_CODE (stmt) == PHI_NODE) |
1075 | { | |
1076 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (stmt)))) | |
1077 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, PHI_RESULT (stmt)) | |
1078 | { | |
1079 | tree imm_stmt = USE_STMT (use_p); | |
a7e5372d | 1080 | |
f430bae8 AM |
1081 | if (TEST_BIT (seen, get_stmt_uid (imm_stmt))) |
1082 | continue; | |
a7e5372d | 1083 | |
f430bae8 AM |
1084 | if (!flow_bb_inside_loop_p (loop, bb_for_stmt (imm_stmt))) |
1085 | continue; | |
a7e5372d | 1086 | |
f430bae8 | 1087 | SET_BIT (seen, get_stmt_uid (imm_stmt)); |
a7e5372d | 1088 | |
f430bae8 AM |
1089 | queue[in_queue++] = imm_stmt; |
1090 | } | |
a7e5372d | 1091 | } |
f430bae8 AM |
1092 | else |
1093 | FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_DEFS) | |
1094 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val) | |
1095 | { | |
1096 | tree imm_stmt = USE_STMT (use_p); | |
1097 | ||
1098 | if (TEST_BIT (seen, get_stmt_uid (imm_stmt))) | |
1099 | continue; | |
1100 | ||
1101 | if (!flow_bb_inside_loop_p (loop, bb_for_stmt (imm_stmt))) | |
1102 | continue; | |
1103 | ||
1104 | SET_BIT (seen, get_stmt_uid (imm_stmt)); | |
1105 | ||
1106 | queue[in_queue++] = imm_stmt; | |
1107 | } | |
a7e5372d ZD |
1108 | } |
1109 | ||
1110 | free (queue); | |
1111 | sbitmap_free (seen); | |
1112 | ||
a3631d97 | 1113 | return sra_data.common_ref; |
a7e5372d ZD |
1114 | |
1115 | fail: | |
1116 | free_mem_refs (*mem_refs); | |
1117 | *mem_refs = NULL; | |
1118 | free (queue); | |
1119 | sbitmap_free (seen); | |
1120 | ||
1121 | return NULL; | |
1122 | } | |
1123 | ||
1124 | /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */ | |
1125 | ||
1126 | static void | |
1127 | rewrite_mem_refs (tree tmp_var, struct mem_ref *mem_refs) | |
1128 | { | |
a7e5372d | 1129 | tree var; |
4c124b4c | 1130 | ssa_op_iter iter; |
a7e5372d ZD |
1131 | |
1132 | for (; mem_refs; mem_refs = mem_refs->next) | |
1133 | { | |
52328bf6 | 1134 | FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS) |
0bca51f0 | 1135 | mark_sym_for_renaming (SSA_NAME_VAR (var)); |
a7e5372d ZD |
1136 | |
1137 | *mem_refs->ref = tmp_var; | |
f430bae8 | 1138 | update_stmt (mem_refs->stmt); |
a7e5372d ZD |
1139 | } |
1140 | } | |
1141 | ||
1142 | /* Records request for store motion of memory reference REF from LOOP. | |
2a7e31df | 1143 | MEM_REFS is the list of occurrences of the reference REF inside LOOP; |
a7e5372d ZD |
1144 | these references are rewritten by a new temporary variable. |
1145 | Exits from the LOOP are stored in EXITS, there are N_EXITS of them. | |
1146 | The initialization of the temporary variable is put to the preheader | |
1147 | of the loop, and assignments to the reference from the temporary variable | |
1148 | are emitted to exits. */ | |
1149 | ||
1150 | static void | |
1151 | schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref, | |
1152 | struct mem_ref *mem_refs) | |
1153 | { | |
1154 | struct mem_ref *aref; | |
1155 | tree tmp_var; | |
1156 | unsigned i; | |
1157 | tree load, store; | |
b4042a03 | 1158 | struct fmt_data fmt_data; |
a7e5372d | 1159 | |
a3631d97 ZD |
1160 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1161 | { | |
1162 | fprintf (dump_file, "Executing store motion of "); | |
1163 | print_generic_expr (dump_file, ref, 0); | |
1164 | fprintf (dump_file, " from loop %d\n", loop->num); | |
1165 | } | |
1166 | ||
a7e5372d ZD |
1167 | tmp_var = make_rename_temp (TREE_TYPE (ref), "lsm_tmp"); |
1168 | ||
b4042a03 ZD |
1169 | fmt_data.loop = loop; |
1170 | fmt_data.orig_loop = loop; | |
1171 | for_each_index (&ref, force_move_till, &fmt_data); | |
a7e5372d ZD |
1172 | |
1173 | rewrite_mem_refs (tmp_var, mem_refs); | |
1174 | for (aref = mem_refs; aref; aref = aref->next) | |
1175 | if (LIM_DATA (aref->stmt)) | |
1176 | LIM_DATA (aref->stmt)->sm_done = true; | |
1177 | ||
1178 | /* Emit the load & stores. */ | |
1179 | load = build (MODIFY_EXPR, void_type_node, tmp_var, ref); | |
68b9f53b | 1180 | get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data)); |
a7e5372d ZD |
1181 | LIM_DATA (load)->max_loop = loop; |
1182 | LIM_DATA (load)->tgt_loop = loop; | |
1183 | ||
1184 | /* Put this into the latch, so that we are sure it will be processed after | |
1185 | all dependencies. */ | |
1186 | bsi_insert_on_edge (loop_latch_edge (loop), load); | |
1187 | ||
1188 | for (i = 0; i < n_exits; i++) | |
1189 | { | |
1190 | store = build (MODIFY_EXPR, void_type_node, | |
1191 | unshare_expr (ref), tmp_var); | |
1192 | bsi_insert_on_edge (exits[i], store); | |
1193 | } | |
1194 | } | |
1195 | ||
a3631d97 ZD |
1196 | /* Returns true if REF may be clobbered by calls. */ |
1197 | ||
1198 | static bool | |
1199 | is_call_clobbered_ref (tree ref) | |
1200 | { | |
1201 | tree base; | |
013cc86f DB |
1202 | HOST_WIDE_INT offset, size; |
1203 | subvar_t sv; | |
1204 | subvar_t svars; | |
1205 | tree sref = ref; | |
a3631d97 | 1206 | |
013cc86f DB |
1207 | if (TREE_CODE (sref) == COMPONENT_REF |
1208 | && (sref = okay_component_ref_for_subvars (sref, &offset, &size))) | |
1209 | { | |
1210 | svars = get_subvars_for_var (sref); | |
1211 | for (sv = svars; sv; sv = sv->next) | |
1212 | { | |
1213 | if (overlap_subvar (offset, size, sv, NULL) | |
1214 | && is_call_clobbered (sv->var)) | |
1215 | return true; | |
1216 | } | |
1217 | } | |
1218 | ||
a3631d97 ZD |
1219 | base = get_base_address (ref); |
1220 | if (!base) | |
1221 | return true; | |
1222 | ||
1223 | if (DECL_P (base)) | |
013cc86f DB |
1224 | { |
1225 | if (var_can_have_subvars (base) | |
1226 | && (svars = get_subvars_for_var (base))) | |
1227 | { | |
1228 | for (sv = svars; sv; sv = sv->next) | |
1229 | if (is_call_clobbered (sv->var)) | |
1230 | return true; | |
1231 | return false; | |
1232 | } | |
1233 | else | |
1234 | return is_call_clobbered (base); | |
1235 | } | |
a3631d97 | 1236 | |
1b096a0a | 1237 | if (INDIRECT_REF_P (base)) |
a3631d97 ZD |
1238 | { |
1239 | /* Check whether the alias tags associated with the pointer | |
1240 | are call clobbered. */ | |
1241 | tree ptr = TREE_OPERAND (base, 0); | |
1242 | struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr); | |
1243 | tree nmt = (pi) ? pi->name_mem_tag : NULL_TREE; | |
1244 | tree tmt = var_ann (SSA_NAME_VAR (ptr))->type_mem_tag; | |
1245 | ||
1246 | if ((nmt && is_call_clobbered (nmt)) | |
1247 | || (tmt && is_call_clobbered (tmt))) | |
1248 | return true; | |
1249 | ||
1250 | return false; | |
1251 | } | |
1252 | ||
1e128c5f | 1253 | gcc_unreachable (); |
a3631d97 ZD |
1254 | } |
1255 | ||
a7e5372d ZD |
1256 | /* Determine whether all memory references inside LOOP corresponding to the |
1257 | virtual ssa name REG are equal to each other, and whether the address of | |
1258 | this common reference can be hoisted outside of the loop. If this is true, | |
1259 | prepare the statements that load the value of the memory reference to a | |
1260 | temporary variable in the loop preheader, store it back on the loop exits, | |
1261 | and replace all the references inside LOOP by this temporary variable. | |
1262 | LOOP has N_EXITS stored in EXITS. */ | |
1263 | ||
1264 | static void | |
1265 | determine_lsm_reg (struct loop *loop, edge *exits, unsigned n_exits, tree reg) | |
1266 | { | |
1267 | tree ref; | |
1268 | struct mem_ref *mem_refs, *aref; | |
1269 | struct loop *must_exec; | |
a3631d97 | 1270 | bool sees_call; |
a7e5372d ZD |
1271 | |
1272 | if (is_gimple_reg (reg)) | |
1273 | return; | |
1274 | ||
a3631d97 ZD |
1275 | ref = single_reachable_address (loop, SSA_NAME_DEF_STMT (reg), &mem_refs, |
1276 | &sees_call); | |
a7e5372d ZD |
1277 | if (!ref) |
1278 | return; | |
1279 | ||
a3631d97 ZD |
1280 | /* If we cannot create a ssa name for the result, give up. */ |
1281 | if (!is_gimple_reg_type (TREE_TYPE (ref)) | |
1282 | || TREE_THIS_VOLATILE (ref)) | |
1283 | goto fail; | |
1284 | ||
1285 | /* If there is a call that may use the location, give up as well. */ | |
1286 | if (sees_call | |
1287 | && is_call_clobbered_ref (ref)) | |
1288 | goto fail; | |
1289 | ||
a7e5372d | 1290 | if (!for_each_index (&ref, may_move_till, loop)) |
a3631d97 | 1291 | goto fail; |
a7e5372d ZD |
1292 | |
1293 | if (tree_could_trap_p (ref)) | |
1294 | { | |
1295 | /* If the memory access is unsafe (i.e. it might trap), ensure that some | |
1296 | of the statements in that it occurs is always executed when the loop | |
1297 | is entered. This way we know that by moving the load from the | |
1298 | reference out of the loop we will not cause the error that would not | |
1299 | occur otherwise. | |
1300 | ||
1301 | TODO -- in fact we would like to check for anticipability of the | |
1302 | reference, i.e. that on each path from loop entry to loop exit at | |
1303 | least one of the statements containing the memory reference is | |
1304 | executed. */ | |
1305 | ||
1306 | for (aref = mem_refs; aref; aref = aref->next) | |
1307 | { | |
1308 | if (!LIM_DATA (aref->stmt)) | |
1309 | continue; | |
1310 | ||
1311 | must_exec = LIM_DATA (aref->stmt)->always_executed_in; | |
1312 | if (!must_exec) | |
1313 | continue; | |
1314 | ||
1315 | if (must_exec == loop | |
1316 | || flow_loop_nested_p (must_exec, loop)) | |
1317 | break; | |
1318 | } | |
1319 | ||
1320 | if (!aref) | |
a3631d97 | 1321 | goto fail; |
a7e5372d ZD |
1322 | } |
1323 | ||
1324 | schedule_sm (loop, exits, n_exits, ref, mem_refs); | |
a3631d97 ZD |
1325 | |
1326 | fail: ; | |
a7e5372d ZD |
1327 | free_mem_refs (mem_refs); |
1328 | } | |
1329 | ||
1330 | /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable | |
1331 | for a store motion optimization (i.e. whether we can insert statement | |
1332 | on its exits). */ | |
1333 | ||
1334 | static bool | |
1335 | loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits, | |
1336 | unsigned n_exits) | |
1337 | { | |
1338 | unsigned i; | |
1339 | ||
1340 | for (i = 0; i < n_exits; i++) | |
1341 | if (exits[i]->flags & EDGE_ABNORMAL) | |
1342 | return false; | |
1343 | ||
1344 | return true; | |
1345 | } | |
1346 | ||
1347 | /* Try to perform store motion for all memory references modified inside | |
1348 | LOOP. */ | |
1349 | ||
1350 | static void | |
1351 | determine_lsm_loop (struct loop *loop) | |
1352 | { | |
1353 | tree phi; | |
1354 | unsigned n_exits; | |
1355 | edge *exits = get_loop_exit_edges (loop, &n_exits); | |
1356 | ||
1357 | if (!loop_suitable_for_sm (loop, exits, n_exits)) | |
1358 | { | |
1359 | free (exits); | |
1360 | return; | |
1361 | } | |
1362 | ||
bb29d951 | 1363 | for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) |
a7e5372d ZD |
1364 | determine_lsm_reg (loop, exits, n_exits, PHI_RESULT (phi)); |
1365 | ||
1366 | free (exits); | |
1367 | } | |
1368 | ||
1369 | /* Try to perform store motion for all memory references modified inside | |
1370 | any of LOOPS. */ | |
1371 | ||
1372 | static void | |
1373 | determine_lsm (struct loops *loops) | |
1374 | { | |
1375 | struct loop *loop; | |
1376 | basic_block bb; | |
1377 | ||
d16464bb DB |
1378 | if (!loops->tree_root->inner) |
1379 | return; | |
1380 | ||
a7e5372d ZD |
1381 | /* Create a UID for each statement in the function. Ordering of the |
1382 | UIDs is not important for this pass. */ | |
30d396e3 | 1383 | max_stmt_uid = 0; |
a7e5372d ZD |
1384 | FOR_EACH_BB (bb) |
1385 | { | |
1386 | block_stmt_iterator bsi; | |
a7e5372d ZD |
1387 | |
1388 | for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) | |
30d396e3 | 1389 | stmt_ann (bsi_stmt (bsi))->uid = max_stmt_uid++; |
a7e5372d ZD |
1390 | } |
1391 | ||
a7e5372d ZD |
1392 | /* Pass the loops from the outermost. For each virtual operand loop phi node |
1393 | check whether all the references inside the loop correspond to a single | |
1394 | address, and if so, move them. */ | |
1395 | ||
1396 | loop = loops->tree_root->inner; | |
1397 | while (1) | |
1398 | { | |
1399 | determine_lsm_loop (loop); | |
1400 | ||
1401 | if (loop->inner) | |
1402 | { | |
1403 | loop = loop->inner; | |
1404 | continue; | |
1405 | } | |
1406 | while (!loop->next) | |
1407 | { | |
1408 | loop = loop->outer; | |
1409 | if (loop == loops->tree_root) | |
1410 | { | |
a7e5372d ZD |
1411 | loop_commit_inserts (); |
1412 | return; | |
1413 | } | |
1414 | } | |
1415 | loop = loop->next; | |
1416 | } | |
1417 | } | |
1418 | ||
1419 | /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e. | |
1420 | for each such basic block bb records the outermost loop for that execution | |
1421 | of its header implies execution of bb. CONTAINS_CALL is the bitmap of | |
1422 | blocks that contain a nonpure call. */ | |
1423 | ||
1424 | static void | |
1425 | fill_always_executed_in (struct loop *loop, sbitmap contains_call) | |
1426 | { | |
1427 | basic_block bb = NULL, *bbs, last = NULL; | |
1428 | unsigned i; | |
1429 | edge e; | |
1430 | struct loop *inn_loop = loop; | |
1431 | ||
1432 | if (!loop->header->aux) | |
1433 | { | |
1434 | bbs = get_loop_body_in_dom_order (loop); | |
1435 | ||
1436 | for (i = 0; i < loop->num_nodes; i++) | |
1437 | { | |
628f6a4e | 1438 | edge_iterator ei; |
a7e5372d ZD |
1439 | bb = bbs[i]; |
1440 | ||
1441 | if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
1442 | last = bb; | |
1443 | ||
1444 | if (TEST_BIT (contains_call, bb->index)) | |
1445 | break; | |
1446 | ||
628f6a4e | 1447 | FOR_EACH_EDGE (e, ei, bb->succs) |
a7e5372d ZD |
1448 | if (!flow_bb_inside_loop_p (loop, e->dest)) |
1449 | break; | |
1450 | if (e) | |
1451 | break; | |
1452 | ||
1453 | /* A loop might be infinite (TODO use simple loop analysis | |
1454 | to disprove this if possible). */ | |
1455 | if (bb->flags & BB_IRREDUCIBLE_LOOP) | |
1456 | break; | |
1457 | ||
1458 | if (!flow_bb_inside_loop_p (inn_loop, bb)) | |
1459 | break; | |
1460 | ||
1461 | if (bb->loop_father->header == bb) | |
1462 | { | |
1463 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
1464 | break; | |
1465 | ||
1466 | /* In a loop that is always entered we may proceed anyway. | |
1467 | But record that we entered it and stop once we leave it. */ | |
1468 | inn_loop = bb->loop_father; | |
1469 | } | |
1470 | } | |
1471 | ||
1472 | while (1) | |
1473 | { | |
1474 | last->aux = loop; | |
1475 | if (last == loop->header) | |
1476 | break; | |
1477 | last = get_immediate_dominator (CDI_DOMINATORS, last); | |
1478 | } | |
1479 | ||
1480 | free (bbs); | |
1481 | } | |
1482 | ||
1483 | for (loop = loop->inner; loop; loop = loop->next) | |
1484 | fill_always_executed_in (loop, contains_call); | |
1485 | } | |
1486 | ||
1487 | /* Compute the global information needed by the loop invariant motion pass. | |
1488 | LOOPS is the loop tree. */ | |
1489 | ||
1490 | static void | |
1491 | tree_ssa_lim_initialize (struct loops *loops) | |
1492 | { | |
1493 | sbitmap contains_call = sbitmap_alloc (last_basic_block); | |
1494 | block_stmt_iterator bsi; | |
1495 | struct loop *loop; | |
1496 | basic_block bb; | |
1497 | ||
1498 | sbitmap_zero (contains_call); | |
1499 | FOR_EACH_BB (bb) | |
1500 | { | |
1501 | for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) | |
1502 | { | |
1503 | if (nonpure_call_p (bsi_stmt (bsi))) | |
1504 | break; | |
1505 | } | |
1506 | ||
1507 | if (!bsi_end_p (bsi)) | |
1508 | SET_BIT (contains_call, bb->index); | |
1509 | } | |
1510 | ||
1511 | for (loop = loops->tree_root->inner; loop; loop = loop->next) | |
1512 | fill_always_executed_in (loop, contains_call); | |
1513 | ||
1514 | sbitmap_free (contains_call); | |
1515 | } | |
1516 | ||
1517 | /* Cleans up after the invariant motion pass. */ | |
1518 | ||
1519 | static void | |
1520 | tree_ssa_lim_finalize (void) | |
1521 | { | |
1522 | basic_block bb; | |
1523 | ||
1524 | FOR_EACH_BB (bb) | |
1525 | { | |
1526 | bb->aux = NULL; | |
1527 | } | |
1528 | } | |
1529 | ||
1530 | /* Moves invariants from LOOPS. Only "expensive" invariants are moved out -- | |
1531 | i.e. those that are likely to be win regardless of the register pressure. */ | |
1532 | ||
1533 | void | |
1534 | tree_ssa_lim (struct loops *loops) | |
1535 | { | |
1536 | tree_ssa_lim_initialize (loops); | |
1537 | ||
1538 | /* For each statement determine the outermost loop in that it is | |
1539 | invariant and cost for computing the invariant. */ | |
1540 | determine_invariantness (); | |
1541 | ||
1542 | /* For each memory reference determine whether it is possible to hoist it | |
1543 | out of the loop. Force the necessary invariants to be moved out of the | |
1544 | loops as well. */ | |
1545 | determine_lsm (loops); | |
1546 | ||
1547 | /* Move the expressions that are expensive enough. */ | |
1548 | move_computations (); | |
1549 | ||
1550 | tree_ssa_lim_finalize (); | |
1551 | } |