]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-im.c
2015-06-04 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / tree-ssa-loop-im.c
1 /* Loop invariant motion.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "hash-set.h"
25 #include "vec.h"
26 #include "input.h"
27 #include "alias.h"
28 #include "symtab.h"
29 #include "inchash.h"
30 #include "tree.h"
31 #include "fold-const.h"
32 #include "tm_p.h"
33 #include "predict.h"
34 #include "hard-reg-set.h"
35 #include "input.h"
36 #include "function.h"
37 #include "dominance.h"
38 #include "cfg.h"
39 #include "cfganal.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "hash-map.h"
43 #include "hash-table.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
46 #include "tree-eh.h"
47 #include "gimple-expr.h"
48 #include "is-a.h"
49 #include "gimple.h"
50 #include "gimplify.h"
51 #include "gimple-iterator.h"
52 #include "gimple-ssa.h"
53 #include "tree-cfg.h"
54 #include "tree-phinodes.h"
55 #include "ssa-iterators.h"
56 #include "stringpool.h"
57 #include "tree-ssanames.h"
58 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-into-ssa.h"
61 #include "cfgloop.h"
62 #include "domwalk.h"
63 #include "params.h"
64 #include "tree-pass.h"
65 #include "flags.h"
66 #include "tree-affine.h"
67 #include "tree-ssa-propagate.h"
68 #include "trans-mem.h"
69 #include "gimple-fold.h"
70
71 /* TODO: Support for predicated code motion. I.e.
72
73 while (1)
74 {
75 if (cond)
76 {
77 a = inv;
78 something;
79 }
80 }
81
82 Where COND and INV are invariants, but evaluating INV may trap or be
83 invalid from some other reason if !COND. This may be transformed to
84
85 if (cond)
86 a = inv;
87 while (1)
88 {
89 if (cond)
90 something;
91 } */
92
93 /* The auxiliary data kept for each statement. */
94
95 struct lim_aux_data
96 {
97 struct loop *max_loop; /* The outermost loop in that the statement
98 is invariant. */
99
100 struct loop *tgt_loop; /* The loop out of that we want to move the
101 invariant. */
102
103 struct loop *always_executed_in;
104 /* The outermost loop for that we are sure
105 the statement is executed if the loop
106 is entered. */
107
108 unsigned cost; /* Cost of the computation performed by the
109 statement. */
110
111 vec<gimple> depends; /* Vector of statements that must be also
112 hoisted out of the loop when this statement
113 is hoisted; i.e. those that define the
114 operands of the statement and are inside of
115 the MAX_LOOP loop. */
116 };
117
118 /* Maps statements to their lim_aux_data. */
119
120 static hash_map<gimple, lim_aux_data *> *lim_aux_data_map;
121
122 /* Description of a memory reference location. */
123
124 typedef struct mem_ref_loc
125 {
126 tree *ref; /* The reference itself. */
127 gimple stmt; /* The statement in that it occurs. */
128 } *mem_ref_loc_p;
129
130
131 /* Description of a memory reference. */
132
133 typedef struct im_mem_ref
134 {
135 unsigned id; /* ID assigned to the memory reference
136 (its index in memory_accesses.refs_list) */
137 hashval_t hash; /* Its hash value. */
138
139 /* The memory access itself and associated caching of alias-oracle
140 query meta-data. */
141 ao_ref mem;
142
143 bitmap stored; /* The set of loops in that this memory location
144 is stored to. */
145 vec<mem_ref_loc> accesses_in_loop;
146 /* The locations of the accesses. Vector
147 indexed by the loop number. */
148
149 /* The following sets are computed on demand. We keep both set and
150 its complement, so that we know whether the information was
151 already computed or not. */
152 bitmap_head indep_loop; /* The set of loops in that the memory
153 reference is independent, meaning:
154 If it is stored in the loop, this store
155 is independent on all other loads and
156 stores.
157 If it is only loaded, then it is independent
158 on all stores in the loop. */
159 bitmap_head dep_loop; /* The complement of INDEP_LOOP. */
160 } *mem_ref_p;
161
162 /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
163 to record (in)dependence against stores in the loop and its subloops, the
164 second to record (in)dependence against all references in the loop
165 and its subloops. */
166 #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
167
168 /* Mem_ref hashtable helpers. */
169
170 struct mem_ref_hasher : typed_noop_remove <im_mem_ref>
171 {
172 typedef im_mem_ref *value_type;
173 typedef tree_node *compare_type;
174 static inline hashval_t hash (const im_mem_ref *);
175 static inline bool equal (const im_mem_ref *, const tree_node *);
176 };
177
178 /* A hash function for struct im_mem_ref object OBJ. */
179
180 inline hashval_t
181 mem_ref_hasher::hash (const im_mem_ref *mem)
182 {
183 return mem->hash;
184 }
185
186 /* An equality function for struct im_mem_ref object MEM1 with
187 memory reference OBJ2. */
188
189 inline bool
190 mem_ref_hasher::equal (const im_mem_ref *mem1, const tree_node *obj2)
191 {
192 return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
193 }
194
195
196 /* Description of memory accesses in loops. */
197
198 static struct
199 {
200 /* The hash table of memory references accessed in loops. */
201 hash_table<mem_ref_hasher> *refs;
202
203 /* The list of memory references. */
204 vec<mem_ref_p> refs_list;
205
206 /* The set of memory references accessed in each loop. */
207 vec<bitmap_head> refs_in_loop;
208
209 /* The set of memory references stored in each loop. */
210 vec<bitmap_head> refs_stored_in_loop;
211
212 /* The set of memory references stored in each loop, including subloops . */
213 vec<bitmap_head> all_refs_stored_in_loop;
214
215 /* Cache for expanding memory addresses. */
216 hash_map<tree, name_expansion *> *ttae_cache;
217 } memory_accesses;
218
219 /* Obstack for the bitmaps in the above data structures. */
220 static bitmap_obstack lim_bitmap_obstack;
221 static obstack mem_ref_obstack;
222
223 static bool ref_indep_loop_p (struct loop *, mem_ref_p);
224
225 /* Minimum cost of an expensive expression. */
226 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
227
228 /* The outermost loop for which execution of the header guarantees that the
229 block will be executed. */
230 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
231 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
232
233 /* ID of the shared unanalyzable mem. */
234 #define UNANALYZABLE_MEM_ID 0
235
236 /* Whether the reference was analyzable. */
237 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
238
239 static struct lim_aux_data *
240 init_lim_data (gimple stmt)
241 {
242 lim_aux_data *p = XCNEW (struct lim_aux_data);
243 lim_aux_data_map->put (stmt, p);
244
245 return p;
246 }
247
248 static struct lim_aux_data *
249 get_lim_data (gimple stmt)
250 {
251 lim_aux_data **p = lim_aux_data_map->get (stmt);
252 if (!p)
253 return NULL;
254
255 return *p;
256 }
257
258 /* Releases the memory occupied by DATA. */
259
260 static void
261 free_lim_aux_data (struct lim_aux_data *data)
262 {
263 data->depends.release ();
264 free (data);
265 }
266
267 static void
268 clear_lim_data (gimple stmt)
269 {
270 lim_aux_data **p = lim_aux_data_map->get (stmt);
271 if (!p)
272 return;
273
274 free_lim_aux_data (*p);
275 *p = NULL;
276 }
277
278
279 /* The possibilities of statement movement. */
280 enum move_pos
281 {
282 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
283 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
284 become executed -- memory accesses, ... */
285 MOVE_POSSIBLE /* Unlimited movement. */
286 };
287
288
289 /* If it is possible to hoist the statement STMT unconditionally,
290 returns MOVE_POSSIBLE.
291 If it is possible to hoist the statement STMT, but we must avoid making
292 it executed if it would not be executed in the original program (e.g.
293 because it may trap), return MOVE_PRESERVE_EXECUTION.
294 Otherwise return MOVE_IMPOSSIBLE. */
295
296 enum move_pos
297 movement_possibility (gimple stmt)
298 {
299 tree lhs;
300 enum move_pos ret = MOVE_POSSIBLE;
301
302 if (flag_unswitch_loops
303 && gimple_code (stmt) == GIMPLE_COND)
304 {
305 /* If we perform unswitching, force the operands of the invariant
306 condition to be moved out of the loop. */
307 return MOVE_POSSIBLE;
308 }
309
310 if (gimple_code (stmt) == GIMPLE_PHI
311 && gimple_phi_num_args (stmt) <= 2
312 && !virtual_operand_p (gimple_phi_result (stmt))
313 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
314 return MOVE_POSSIBLE;
315
316 if (gimple_get_lhs (stmt) == NULL_TREE)
317 return MOVE_IMPOSSIBLE;
318
319 if (gimple_vdef (stmt))
320 return MOVE_IMPOSSIBLE;
321
322 if (stmt_ends_bb_p (stmt)
323 || gimple_has_volatile_ops (stmt)
324 || gimple_has_side_effects (stmt)
325 || stmt_could_throw_p (stmt))
326 return MOVE_IMPOSSIBLE;
327
328 if (is_gimple_call (stmt))
329 {
330 /* While pure or const call is guaranteed to have no side effects, we
331 cannot move it arbitrarily. Consider code like
332
333 char *s = something ();
334
335 while (1)
336 {
337 if (s)
338 t = strlen (s);
339 else
340 t = 0;
341 }
342
343 Here the strlen call cannot be moved out of the loop, even though
344 s is invariant. In addition to possibly creating a call with
345 invalid arguments, moving out a function call that is not executed
346 may cause performance regressions in case the call is costly and
347 not executed at all. */
348 ret = MOVE_PRESERVE_EXECUTION;
349 lhs = gimple_call_lhs (stmt);
350 }
351 else if (is_gimple_assign (stmt))
352 lhs = gimple_assign_lhs (stmt);
353 else
354 return MOVE_IMPOSSIBLE;
355
356 if (TREE_CODE (lhs) == SSA_NAME
357 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
358 return MOVE_IMPOSSIBLE;
359
360 if (TREE_CODE (lhs) != SSA_NAME
361 || gimple_could_trap_p (stmt))
362 return MOVE_PRESERVE_EXECUTION;
363
364 /* Non local loads in a transaction cannot be hoisted out. Well,
365 unless the load happens on every path out of the loop, but we
366 don't take this into account yet. */
367 if (flag_tm
368 && gimple_in_transaction (stmt)
369 && gimple_assign_single_p (stmt))
370 {
371 tree rhs = gimple_assign_rhs1 (stmt);
372 if (DECL_P (rhs) && is_global_var (rhs))
373 {
374 if (dump_file)
375 {
376 fprintf (dump_file, "Cannot hoist conditional load of ");
377 print_generic_expr (dump_file, rhs, TDF_SLIM);
378 fprintf (dump_file, " because it is in a transaction.\n");
379 }
380 return MOVE_IMPOSSIBLE;
381 }
382 }
383
384 return ret;
385 }
386
387 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
388 loop to that we could move the expression using DEF if it did not have
389 other operands, i.e. the outermost loop enclosing LOOP in that the value
390 of DEF is invariant. */
391
392 static struct loop *
393 outermost_invariant_loop (tree def, struct loop *loop)
394 {
395 gimple def_stmt;
396 basic_block def_bb;
397 struct loop *max_loop;
398 struct lim_aux_data *lim_data;
399
400 if (!def)
401 return superloop_at_depth (loop, 1);
402
403 if (TREE_CODE (def) != SSA_NAME)
404 {
405 gcc_assert (is_gimple_min_invariant (def));
406 return superloop_at_depth (loop, 1);
407 }
408
409 def_stmt = SSA_NAME_DEF_STMT (def);
410 def_bb = gimple_bb (def_stmt);
411 if (!def_bb)
412 return superloop_at_depth (loop, 1);
413
414 max_loop = find_common_loop (loop, def_bb->loop_father);
415
416 lim_data = get_lim_data (def_stmt);
417 if (lim_data != NULL && lim_data->max_loop != NULL)
418 max_loop = find_common_loop (max_loop,
419 loop_outer (lim_data->max_loop));
420 if (max_loop == loop)
421 return NULL;
422 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
423
424 return max_loop;
425 }
426
427 /* DATA is a structure containing information associated with a statement
428 inside LOOP. DEF is one of the operands of this statement.
429
430 Find the outermost loop enclosing LOOP in that value of DEF is invariant
431 and record this in DATA->max_loop field. If DEF itself is defined inside
432 this loop as well (i.e. we need to hoist it out of the loop if we want
433 to hoist the statement represented by DATA), record the statement in that
434 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
435 add the cost of the computation of DEF to the DATA->cost.
436
437 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
438
439 static bool
440 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
441 bool add_cost)
442 {
443 gimple def_stmt = SSA_NAME_DEF_STMT (def);
444 basic_block def_bb = gimple_bb (def_stmt);
445 struct loop *max_loop;
446 struct lim_aux_data *def_data;
447
448 if (!def_bb)
449 return true;
450
451 max_loop = outermost_invariant_loop (def, loop);
452 if (!max_loop)
453 return false;
454
455 if (flow_loop_nested_p (data->max_loop, max_loop))
456 data->max_loop = max_loop;
457
458 def_data = get_lim_data (def_stmt);
459 if (!def_data)
460 return true;
461
462 if (add_cost
463 /* Only add the cost if the statement defining DEF is inside LOOP,
464 i.e. if it is likely that by moving the invariants dependent
465 on it, we will be able to avoid creating a new register for
466 it (since it will be only used in these dependent invariants). */
467 && def_bb->loop_father == loop)
468 data->cost += def_data->cost;
469
470 data->depends.safe_push (def_stmt);
471
472 return true;
473 }
474
475 /* Returns an estimate for a cost of statement STMT. The values here
476 are just ad-hoc constants, similar to costs for inlining. */
477
478 static unsigned
479 stmt_cost (gimple stmt)
480 {
481 /* Always try to create possibilities for unswitching. */
482 if (gimple_code (stmt) == GIMPLE_COND
483 || gimple_code (stmt) == GIMPLE_PHI)
484 return LIM_EXPENSIVE;
485
486 /* We should be hoisting calls if possible. */
487 if (is_gimple_call (stmt))
488 {
489 tree fndecl;
490
491 /* Unless the call is a builtin_constant_p; this always folds to a
492 constant, so moving it is useless. */
493 fndecl = gimple_call_fndecl (stmt);
494 if (fndecl
495 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
496 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
497 return 0;
498
499 return LIM_EXPENSIVE;
500 }
501
502 /* Hoisting memory references out should almost surely be a win. */
503 if (gimple_references_memory_p (stmt))
504 return LIM_EXPENSIVE;
505
506 if (gimple_code (stmt) != GIMPLE_ASSIGN)
507 return 1;
508
509 switch (gimple_assign_rhs_code (stmt))
510 {
511 case MULT_EXPR:
512 case WIDEN_MULT_EXPR:
513 case WIDEN_MULT_PLUS_EXPR:
514 case WIDEN_MULT_MINUS_EXPR:
515 case DOT_PROD_EXPR:
516 case FMA_EXPR:
517 case TRUNC_DIV_EXPR:
518 case CEIL_DIV_EXPR:
519 case FLOOR_DIV_EXPR:
520 case ROUND_DIV_EXPR:
521 case EXACT_DIV_EXPR:
522 case CEIL_MOD_EXPR:
523 case FLOOR_MOD_EXPR:
524 case ROUND_MOD_EXPR:
525 case TRUNC_MOD_EXPR:
526 case RDIV_EXPR:
527 /* Division and multiplication are usually expensive. */
528 return LIM_EXPENSIVE;
529
530 case LSHIFT_EXPR:
531 case RSHIFT_EXPR:
532 case WIDEN_LSHIFT_EXPR:
533 case LROTATE_EXPR:
534 case RROTATE_EXPR:
535 /* Shifts and rotates are usually expensive. */
536 return LIM_EXPENSIVE;
537
538 case CONSTRUCTOR:
539 /* Make vector construction cost proportional to the number
540 of elements. */
541 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
542
543 case SSA_NAME:
544 case PAREN_EXPR:
545 /* Whether or not something is wrapped inside a PAREN_EXPR
546 should not change move cost. Nor should an intermediate
547 unpropagated SSA name copy. */
548 return 0;
549
550 default:
551 return 1;
552 }
553 }
554
555 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
556 REF is independent. If REF is not independent in LOOP, NULL is returned
557 instead. */
558
559 static struct loop *
560 outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
561 {
562 struct loop *aloop;
563
564 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
565 return NULL;
566
567 for (aloop = outer;
568 aloop != loop;
569 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
570 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
571 && ref_indep_loop_p (aloop, ref))
572 return aloop;
573
574 if (ref_indep_loop_p (loop, ref))
575 return loop;
576 else
577 return NULL;
578 }
579
580 /* If there is a simple load or store to a memory reference in STMT, returns
581 the location of the memory reference, and sets IS_STORE according to whether
582 it is a store or load. Otherwise, returns NULL. */
583
584 static tree *
585 simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
586 {
587 tree *lhs, *rhs;
588
589 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
590 if (!gimple_assign_single_p (stmt))
591 return NULL;
592
593 lhs = gimple_assign_lhs_ptr (stmt);
594 rhs = gimple_assign_rhs1_ptr (stmt);
595
596 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
597 {
598 *is_store = false;
599 return rhs;
600 }
601 else if (gimple_vdef (stmt)
602 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
603 {
604 *is_store = true;
605 return lhs;
606 }
607 else
608 return NULL;
609 }
610
611 /* Returns the memory reference contained in STMT. */
612
613 static mem_ref_p
614 mem_ref_in_stmt (gimple stmt)
615 {
616 bool store;
617 tree *mem = simple_mem_ref_in_stmt (stmt, &store);
618 hashval_t hash;
619 mem_ref_p ref;
620
621 if (!mem)
622 return NULL;
623 gcc_assert (!store);
624
625 hash = iterative_hash_expr (*mem, 0);
626 ref = memory_accesses.refs->find_with_hash (*mem, hash);
627
628 gcc_assert (ref != NULL);
629 return ref;
630 }
631
632 /* From a controlling predicate in DOM determine the arguments from
633 the PHI node PHI that are chosen if the predicate evaluates to
634 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
635 they are non-NULL. Returns true if the arguments can be determined,
636 else return false. */
637
638 static bool
639 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
640 tree *true_arg_p, tree *false_arg_p)
641 {
642 basic_block bb = gimple_bb (phi);
643 edge true_edge, false_edge, tem;
644 tree arg0 = NULL_TREE, arg1 = NULL_TREE;
645
646 /* We have to verify that one edge into the PHI node is dominated
647 by the true edge of the predicate block and the other edge
648 dominated by the false edge. This ensures that the PHI argument
649 we are going to take is completely determined by the path we
650 take from the predicate block.
651 We can only use BB dominance checks below if the destination of
652 the true/false edges are dominated by their edge, thus only
653 have a single predecessor. */
654 extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
655 tem = EDGE_PRED (bb, 0);
656 if (tem == true_edge
657 || (single_pred_p (true_edge->dest)
658 && (tem->src == true_edge->dest
659 || dominated_by_p (CDI_DOMINATORS,
660 tem->src, true_edge->dest))))
661 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
662 else if (tem == false_edge
663 || (single_pred_p (false_edge->dest)
664 && (tem->src == false_edge->dest
665 || dominated_by_p (CDI_DOMINATORS,
666 tem->src, false_edge->dest))))
667 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
668 else
669 return false;
670 tem = EDGE_PRED (bb, 1);
671 if (tem == true_edge
672 || (single_pred_p (true_edge->dest)
673 && (tem->src == true_edge->dest
674 || dominated_by_p (CDI_DOMINATORS,
675 tem->src, true_edge->dest))))
676 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
677 else if (tem == false_edge
678 || (single_pred_p (false_edge->dest)
679 && (tem->src == false_edge->dest
680 || dominated_by_p (CDI_DOMINATORS,
681 tem->src, false_edge->dest))))
682 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
683 else
684 return false;
685 if (!arg0 || !arg1)
686 return false;
687
688 if (true_arg_p)
689 *true_arg_p = arg0;
690 if (false_arg_p)
691 *false_arg_p = arg1;
692
693 return true;
694 }
695
696 /* Determine the outermost loop to that it is possible to hoist a statement
697 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
698 the outermost loop in that the value computed by STMT is invariant.
699 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
700 we preserve the fact whether STMT is executed. It also fills other related
701 information to LIM_DATA (STMT).
702
703 The function returns false if STMT cannot be hoisted outside of the loop it
704 is defined in, and true otherwise. */
705
706 static bool
707 determine_max_movement (gimple stmt, bool must_preserve_exec)
708 {
709 basic_block bb = gimple_bb (stmt);
710 struct loop *loop = bb->loop_father;
711 struct loop *level;
712 struct lim_aux_data *lim_data = get_lim_data (stmt);
713 tree val;
714 ssa_op_iter iter;
715
716 if (must_preserve_exec)
717 level = ALWAYS_EXECUTED_IN (bb);
718 else
719 level = superloop_at_depth (loop, 1);
720 lim_data->max_loop = level;
721
722 if (gphi *phi = dyn_cast <gphi *> (stmt))
723 {
724 use_operand_p use_p;
725 unsigned min_cost = UINT_MAX;
726 unsigned total_cost = 0;
727 struct lim_aux_data *def_data;
728
729 /* We will end up promoting dependencies to be unconditionally
730 evaluated. For this reason the PHI cost (and thus the
731 cost we remove from the loop by doing the invariant motion)
732 is that of the cheapest PHI argument dependency chain. */
733 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
734 {
735 val = USE_FROM_PTR (use_p);
736
737 if (TREE_CODE (val) != SSA_NAME)
738 {
739 /* Assign const 1 to constants. */
740 min_cost = MIN (min_cost, 1);
741 total_cost += 1;
742 continue;
743 }
744 if (!add_dependency (val, lim_data, loop, false))
745 return false;
746
747 gimple def_stmt = SSA_NAME_DEF_STMT (val);
748 if (gimple_bb (def_stmt)
749 && gimple_bb (def_stmt)->loop_father == loop)
750 {
751 def_data = get_lim_data (def_stmt);
752 if (def_data)
753 {
754 min_cost = MIN (min_cost, def_data->cost);
755 total_cost += def_data->cost;
756 }
757 }
758 }
759
760 min_cost = MIN (min_cost, total_cost);
761 lim_data->cost += min_cost;
762
763 if (gimple_phi_num_args (phi) > 1)
764 {
765 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
766 gimple cond;
767 if (gsi_end_p (gsi_last_bb (dom)))
768 return false;
769 cond = gsi_stmt (gsi_last_bb (dom));
770 if (gimple_code (cond) != GIMPLE_COND)
771 return false;
772 /* Verify that this is an extended form of a diamond and
773 the PHI arguments are completely controlled by the
774 predicate in DOM. */
775 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
776 return false;
777
778 /* Fold in dependencies and cost of the condition. */
779 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
780 {
781 if (!add_dependency (val, lim_data, loop, false))
782 return false;
783 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
784 if (def_data)
785 total_cost += def_data->cost;
786 }
787
788 /* We want to avoid unconditionally executing very expensive
789 operations. As costs for our dependencies cannot be
790 negative just claim we are not invariand for this case.
791 We also are not sure whether the control-flow inside the
792 loop will vanish. */
793 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
794 && !(min_cost != 0
795 && total_cost / min_cost <= 2))
796 return false;
797
798 /* Assume that the control-flow in the loop will vanish.
799 ??? We should verify this and not artificially increase
800 the cost if that is not the case. */
801 lim_data->cost += stmt_cost (stmt);
802 }
803
804 return true;
805 }
806 else
807 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
808 if (!add_dependency (val, lim_data, loop, true))
809 return false;
810
811 if (gimple_vuse (stmt))
812 {
813 mem_ref_p ref = mem_ref_in_stmt (stmt);
814
815 if (ref)
816 {
817 lim_data->max_loop
818 = outermost_indep_loop (lim_data->max_loop, loop, ref);
819 if (!lim_data->max_loop)
820 return false;
821 }
822 else
823 {
824 if ((val = gimple_vuse (stmt)) != NULL_TREE)
825 {
826 if (!add_dependency (val, lim_data, loop, false))
827 return false;
828 }
829 }
830 }
831
832 lim_data->cost += stmt_cost (stmt);
833
834 return true;
835 }
836
837 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
838 and that one of the operands of this statement is computed by STMT.
839 Ensure that STMT (together with all the statements that define its
840 operands) is hoisted at least out of the loop LEVEL. */
841
842 static void
843 set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
844 {
845 struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
846 struct lim_aux_data *lim_data;
847 gimple dep_stmt;
848 unsigned i;
849
850 stmt_loop = find_common_loop (orig_loop, stmt_loop);
851 lim_data = get_lim_data (stmt);
852 if (lim_data != NULL && lim_data->tgt_loop != NULL)
853 stmt_loop = find_common_loop (stmt_loop,
854 loop_outer (lim_data->tgt_loop));
855 if (flow_loop_nested_p (stmt_loop, level))
856 return;
857
858 gcc_assert (level == lim_data->max_loop
859 || flow_loop_nested_p (lim_data->max_loop, level));
860
861 lim_data->tgt_loop = level;
862 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
863 set_level (dep_stmt, orig_loop, level);
864 }
865
866 /* Determines an outermost loop from that we want to hoist the statement STMT.
867 For now we chose the outermost possible loop. TODO -- use profiling
868 information to set it more sanely. */
869
870 static void
871 set_profitable_level (gimple stmt)
872 {
873 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
874 }
875
876 /* Returns true if STMT is a call that has side effects. */
877
878 static bool
879 nonpure_call_p (gimple stmt)
880 {
881 if (gimple_code (stmt) != GIMPLE_CALL)
882 return false;
883
884 return gimple_has_side_effects (stmt);
885 }
886
887 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
888
889 static gimple
890 rewrite_reciprocal (gimple_stmt_iterator *bsi)
891 {
892 gassign *stmt, *stmt1, *stmt2;
893 tree name, lhs, type;
894 tree real_one;
895 gimple_stmt_iterator gsi;
896
897 stmt = as_a <gassign *> (gsi_stmt (*bsi));
898 lhs = gimple_assign_lhs (stmt);
899 type = TREE_TYPE (lhs);
900
901 real_one = build_one_cst (type);
902
903 name = make_temp_ssa_name (type, NULL, "reciptmp");
904 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
905 gimple_assign_rhs2 (stmt));
906 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
907 gimple_assign_rhs1 (stmt));
908
909 /* Replace division stmt with reciprocal and multiply stmts.
910 The multiply stmt is not invariant, so update iterator
911 and avoid rescanning. */
912 gsi = *bsi;
913 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
914 gsi_replace (&gsi, stmt2, true);
915
916 /* Continue processing with invariant reciprocal statement. */
917 return stmt1;
918 }
919
920 /* Check if the pattern at *BSI is a bittest of the form
921 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
922
923 static gimple
924 rewrite_bittest (gimple_stmt_iterator *bsi)
925 {
926 gassign *stmt;
927 gimple stmt1;
928 gassign *stmt2;
929 gimple use_stmt;
930 gcond *cond_stmt;
931 tree lhs, name, t, a, b;
932 use_operand_p use;
933
934 stmt = as_a <gassign *> (gsi_stmt (*bsi));
935 lhs = gimple_assign_lhs (stmt);
936
937 /* Verify that the single use of lhs is a comparison against zero. */
938 if (TREE_CODE (lhs) != SSA_NAME
939 || !single_imm_use (lhs, &use, &use_stmt))
940 return stmt;
941 cond_stmt = dyn_cast <gcond *> (use_stmt);
942 if (!cond_stmt)
943 return stmt;
944 if (gimple_cond_lhs (cond_stmt) != lhs
945 || (gimple_cond_code (cond_stmt) != NE_EXPR
946 && gimple_cond_code (cond_stmt) != EQ_EXPR)
947 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
948 return stmt;
949
950 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
951 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
952 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
953 return stmt;
954
955 /* There is a conversion in between possibly inserted by fold. */
956 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
957 {
958 t = gimple_assign_rhs1 (stmt1);
959 if (TREE_CODE (t) != SSA_NAME
960 || !has_single_use (t))
961 return stmt;
962 stmt1 = SSA_NAME_DEF_STMT (t);
963 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
964 return stmt;
965 }
966
967 /* Verify that B is loop invariant but A is not. Verify that with
968 all the stmt walking we are still in the same loop. */
969 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
970 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
971 return stmt;
972
973 a = gimple_assign_rhs1 (stmt1);
974 b = gimple_assign_rhs2 (stmt1);
975
976 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
977 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
978 {
979 gimple_stmt_iterator rsi;
980
981 /* 1 << B */
982 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
983 build_int_cst (TREE_TYPE (a), 1), b);
984 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
985 stmt1 = gimple_build_assign (name, t);
986
987 /* A & (1 << B) */
988 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
989 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
990 stmt2 = gimple_build_assign (name, t);
991
992 /* Replace the SSA_NAME we compare against zero. Adjust
993 the type of zero accordingly. */
994 SET_USE (use, name);
995 gimple_cond_set_rhs (cond_stmt,
996 build_int_cst_type (TREE_TYPE (name),
997 0));
998
999 /* Don't use gsi_replace here, none of the new assignments sets
1000 the variable originally set in stmt. Move bsi to stmt1, and
1001 then remove the original stmt, so that we get a chance to
1002 retain debug info for it. */
1003 rsi = *bsi;
1004 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1005 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1006 gsi_remove (&rsi, true);
1007
1008 return stmt1;
1009 }
1010
1011 return stmt;
1012 }
1013
1014 /* For each statement determines the outermost loop in that it is invariant,
1015 - statements on whose motion it depends and the cost of the computation.
1016 - This information is stored to the LIM_DATA structure associated with
1017 - each statement. */
1018 class invariantness_dom_walker : public dom_walker
1019 {
1020 public:
1021 invariantness_dom_walker (cdi_direction direction)
1022 : dom_walker (direction) {}
1023
1024 virtual void before_dom_children (basic_block);
1025 };
1026
1027 /* Determine the outermost loops in that statements in basic block BB are
1028 invariant, and record them to the LIM_DATA associated with the statements.
1029 Callback for dom_walker. */
1030
1031 void
1032 invariantness_dom_walker::before_dom_children (basic_block bb)
1033 {
1034 enum move_pos pos;
1035 gimple_stmt_iterator bsi;
1036 gimple stmt;
1037 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1038 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1039 struct lim_aux_data *lim_data;
1040
1041 if (!loop_outer (bb->loop_father))
1042 return;
1043
1044 if (dump_file && (dump_flags & TDF_DETAILS))
1045 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1046 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1047
1048 /* Look at PHI nodes, but only if there is at most two.
1049 ??? We could relax this further by post-processing the inserted
1050 code and transforming adjacent cond-exprs with the same predicate
1051 to control flow again. */
1052 bsi = gsi_start_phis (bb);
1053 if (!gsi_end_p (bsi)
1054 && ((gsi_next (&bsi), gsi_end_p (bsi))
1055 || (gsi_next (&bsi), gsi_end_p (bsi))))
1056 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1057 {
1058 stmt = gsi_stmt (bsi);
1059
1060 pos = movement_possibility (stmt);
1061 if (pos == MOVE_IMPOSSIBLE)
1062 continue;
1063
1064 lim_data = init_lim_data (stmt);
1065 lim_data->always_executed_in = outermost;
1066
1067 if (!determine_max_movement (stmt, false))
1068 {
1069 lim_data->max_loop = NULL;
1070 continue;
1071 }
1072
1073 if (dump_file && (dump_flags & TDF_DETAILS))
1074 {
1075 print_gimple_stmt (dump_file, stmt, 2, 0);
1076 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1077 loop_depth (lim_data->max_loop),
1078 lim_data->cost);
1079 }
1080
1081 if (lim_data->cost >= LIM_EXPENSIVE)
1082 set_profitable_level (stmt);
1083 }
1084
1085 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1086 {
1087 stmt = gsi_stmt (bsi);
1088
1089 pos = movement_possibility (stmt);
1090 if (pos == MOVE_IMPOSSIBLE)
1091 {
1092 if (nonpure_call_p (stmt))
1093 {
1094 maybe_never = true;
1095 outermost = NULL;
1096 }
1097 /* Make sure to note always_executed_in for stores to make
1098 store-motion work. */
1099 else if (stmt_makes_single_store (stmt))
1100 {
1101 struct lim_aux_data *lim_data = init_lim_data (stmt);
1102 lim_data->always_executed_in = outermost;
1103 }
1104 continue;
1105 }
1106
1107 if (is_gimple_assign (stmt)
1108 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1109 == GIMPLE_BINARY_RHS))
1110 {
1111 tree op0 = gimple_assign_rhs1 (stmt);
1112 tree op1 = gimple_assign_rhs2 (stmt);
1113 struct loop *ol1 = outermost_invariant_loop (op1,
1114 loop_containing_stmt (stmt));
1115
1116 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1117 to be hoisted out of loop, saving expensive divide. */
1118 if (pos == MOVE_POSSIBLE
1119 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1120 && flag_unsafe_math_optimizations
1121 && !flag_trapping_math
1122 && ol1 != NULL
1123 && outermost_invariant_loop (op0, ol1) == NULL)
1124 stmt = rewrite_reciprocal (&bsi);
1125
1126 /* If the shift count is invariant, convert (A >> B) & 1 to
1127 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1128 saving an expensive shift. */
1129 if (pos == MOVE_POSSIBLE
1130 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1131 && integer_onep (op1)
1132 && TREE_CODE (op0) == SSA_NAME
1133 && has_single_use (op0))
1134 stmt = rewrite_bittest (&bsi);
1135 }
1136
1137 lim_data = init_lim_data (stmt);
1138 lim_data->always_executed_in = outermost;
1139
1140 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1141 continue;
1142
1143 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1144 {
1145 lim_data->max_loop = NULL;
1146 continue;
1147 }
1148
1149 if (dump_file && (dump_flags & TDF_DETAILS))
1150 {
1151 print_gimple_stmt (dump_file, stmt, 2, 0);
1152 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1153 loop_depth (lim_data->max_loop),
1154 lim_data->cost);
1155 }
1156
1157 if (lim_data->cost >= LIM_EXPENSIVE)
1158 set_profitable_level (stmt);
1159 }
1160 }
1161
1162 class move_computations_dom_walker : public dom_walker
1163 {
1164 public:
1165 move_computations_dom_walker (cdi_direction direction)
1166 : dom_walker (direction), todo_ (0) {}
1167
1168 virtual void before_dom_children (basic_block);
1169
1170 unsigned int todo_;
1171 };
1172
1173 /* Hoist the statements in basic block BB out of the loops prescribed by
1174 data stored in LIM_DATA structures associated with each statement. Callback
1175 for walk_dominator_tree. */
1176
1177 void
1178 move_computations_dom_walker::before_dom_children (basic_block bb)
1179 {
1180 struct loop *level;
1181 unsigned cost = 0;
1182 struct lim_aux_data *lim_data;
1183
1184 if (!loop_outer (bb->loop_father))
1185 return;
1186
1187 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1188 {
1189 gassign *new_stmt;
1190 gphi *stmt = bsi.phi ();
1191
1192 lim_data = get_lim_data (stmt);
1193 if (lim_data == NULL)
1194 {
1195 gsi_next (&bsi);
1196 continue;
1197 }
1198
1199 cost = lim_data->cost;
1200 level = lim_data->tgt_loop;
1201 clear_lim_data (stmt);
1202
1203 if (!level)
1204 {
1205 gsi_next (&bsi);
1206 continue;
1207 }
1208
1209 if (dump_file && (dump_flags & TDF_DETAILS))
1210 {
1211 fprintf (dump_file, "Moving PHI node\n");
1212 print_gimple_stmt (dump_file, stmt, 0, 0);
1213 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1214 cost, level->num);
1215 }
1216
1217 if (gimple_phi_num_args (stmt) == 1)
1218 {
1219 tree arg = PHI_ARG_DEF (stmt, 0);
1220 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1221 TREE_CODE (arg), arg);
1222 }
1223 else
1224 {
1225 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1226 gimple cond = gsi_stmt (gsi_last_bb (dom));
1227 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1228 /* Get the PHI arguments corresponding to the true and false
1229 edges of COND. */
1230 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1231 gcc_assert (arg0 && arg1);
1232 t = build2 (gimple_cond_code (cond), boolean_type_node,
1233 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1234 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1235 COND_EXPR, t, arg0, arg1);
1236 todo_ |= TODO_cleanup_cfg;
1237 }
1238 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1239 && (!ALWAYS_EXECUTED_IN (bb)
1240 || (ALWAYS_EXECUTED_IN (bb) != level
1241 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1242 {
1243 tree lhs = gimple_assign_lhs (new_stmt);
1244 SSA_NAME_RANGE_INFO (lhs) = NULL;
1245 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1246 }
1247 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1248 remove_phi_node (&bsi, false);
1249 }
1250
1251 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1252 {
1253 edge e;
1254
1255 gimple stmt = gsi_stmt (bsi);
1256
1257 lim_data = get_lim_data (stmt);
1258 if (lim_data == NULL)
1259 {
1260 gsi_next (&bsi);
1261 continue;
1262 }
1263
1264 cost = lim_data->cost;
1265 level = lim_data->tgt_loop;
1266 clear_lim_data (stmt);
1267
1268 if (!level)
1269 {
1270 gsi_next (&bsi);
1271 continue;
1272 }
1273
1274 /* We do not really want to move conditionals out of the loop; we just
1275 placed it here to force its operands to be moved if necessary. */
1276 if (gimple_code (stmt) == GIMPLE_COND)
1277 continue;
1278
1279 if (dump_file && (dump_flags & TDF_DETAILS))
1280 {
1281 fprintf (dump_file, "Moving statement\n");
1282 print_gimple_stmt (dump_file, stmt, 0, 0);
1283 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1284 cost, level->num);
1285 }
1286
1287 e = loop_preheader_edge (level);
1288 gcc_assert (!gimple_vdef (stmt));
1289 if (gimple_vuse (stmt))
1290 {
1291 /* The new VUSE is the one from the virtual PHI in the loop
1292 header or the one already present. */
1293 gphi_iterator gsi2;
1294 for (gsi2 = gsi_start_phis (e->dest);
1295 !gsi_end_p (gsi2); gsi_next (&gsi2))
1296 {
1297 gphi *phi = gsi2.phi ();
1298 if (virtual_operand_p (gimple_phi_result (phi)))
1299 {
1300 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1301 break;
1302 }
1303 }
1304 }
1305 gsi_remove (&bsi, false);
1306 if (gimple_has_lhs (stmt)
1307 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1308 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1309 && (!ALWAYS_EXECUTED_IN (bb)
1310 || !(ALWAYS_EXECUTED_IN (bb) == level
1311 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1312 {
1313 tree lhs = gimple_get_lhs (stmt);
1314 SSA_NAME_RANGE_INFO (lhs) = NULL;
1315 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1316 }
1317 /* In case this is a stmt that is not unconditionally executed
1318 when the target loop header is executed and the stmt may
1319 invoke undefined integer or pointer overflow rewrite it to
1320 unsigned arithmetic. */
1321 if (is_gimple_assign (stmt)
1322 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1323 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1324 && arith_code_with_undefined_signed_overflow
1325 (gimple_assign_rhs_code (stmt))
1326 && (!ALWAYS_EXECUTED_IN (bb)
1327 || !(ALWAYS_EXECUTED_IN (bb) == level
1328 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1329 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1330 else
1331 gsi_insert_on_edge (e, stmt);
1332 }
1333 }
1334
1335 /* Hoist the statements out of the loops prescribed by data stored in
1336 LIM_DATA structures associated with each statement.*/
1337
1338 static unsigned int
1339 move_computations (void)
1340 {
1341 move_computations_dom_walker walker (CDI_DOMINATORS);
1342 walker.walk (cfun->cfg->x_entry_block_ptr);
1343
1344 gsi_commit_edge_inserts ();
1345 if (need_ssa_update_p (cfun))
1346 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1347
1348 return walker.todo_;
1349 }
1350
1351 /* Checks whether the statement defining variable *INDEX can be hoisted
1352 out of the loop passed in DATA. Callback for for_each_index. */
1353
1354 static bool
1355 may_move_till (tree ref, tree *index, void *data)
1356 {
1357 struct loop *loop = (struct loop *) data, *max_loop;
1358
1359 /* If REF is an array reference, check also that the step and the lower
1360 bound is invariant in LOOP. */
1361 if (TREE_CODE (ref) == ARRAY_REF)
1362 {
1363 tree step = TREE_OPERAND (ref, 3);
1364 tree lbound = TREE_OPERAND (ref, 2);
1365
1366 max_loop = outermost_invariant_loop (step, loop);
1367 if (!max_loop)
1368 return false;
1369
1370 max_loop = outermost_invariant_loop (lbound, loop);
1371 if (!max_loop)
1372 return false;
1373 }
1374
1375 max_loop = outermost_invariant_loop (*index, loop);
1376 if (!max_loop)
1377 return false;
1378
1379 return true;
1380 }
1381
1382 /* If OP is SSA NAME, force the statement that defines it to be
1383 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1384
1385 static void
1386 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1387 {
1388 gimple stmt;
1389
1390 if (!op
1391 || is_gimple_min_invariant (op))
1392 return;
1393
1394 gcc_assert (TREE_CODE (op) == SSA_NAME);
1395
1396 stmt = SSA_NAME_DEF_STMT (op);
1397 if (gimple_nop_p (stmt))
1398 return;
1399
1400 set_level (stmt, orig_loop, loop);
1401 }
1402
1403 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1404 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1405 for_each_index. */
1406
1407 struct fmt_data
1408 {
1409 struct loop *loop;
1410 struct loop *orig_loop;
1411 };
1412
1413 static bool
1414 force_move_till (tree ref, tree *index, void *data)
1415 {
1416 struct fmt_data *fmt_data = (struct fmt_data *) data;
1417
1418 if (TREE_CODE (ref) == ARRAY_REF)
1419 {
1420 tree step = TREE_OPERAND (ref, 3);
1421 tree lbound = TREE_OPERAND (ref, 2);
1422
1423 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1424 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1425 }
1426
1427 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1428
1429 return true;
1430 }
1431
1432 /* A function to free the mem_ref object OBJ. */
1433
1434 static void
1435 memref_free (struct im_mem_ref *mem)
1436 {
1437 mem->accesses_in_loop.release ();
1438 }
1439
1440 /* Allocates and returns a memory reference description for MEM whose hash
1441 value is HASH and id is ID. */
1442
1443 static mem_ref_p
1444 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1445 {
1446 mem_ref_p ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1447 ao_ref_init (&ref->mem, mem);
1448 ref->id = id;
1449 ref->hash = hash;
1450 ref->stored = NULL;
1451 bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1452 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1453 ref->accesses_in_loop.create (1);
1454
1455 return ref;
1456 }
1457
1458 /* Records memory reference location *LOC in LOOP to the memory reference
1459 description REF. The reference occurs in statement STMT. */
1460
1461 static void
1462 record_mem_ref_loc (mem_ref_p ref, gimple stmt, tree *loc)
1463 {
1464 mem_ref_loc aref;
1465 aref.stmt = stmt;
1466 aref.ref = loc;
1467 ref->accesses_in_loop.safe_push (aref);
1468 }
1469
1470 /* Set the LOOP bit in REF stored bitmap and allocate that if
1471 necessary. Return whether a bit was changed. */
1472
1473 static bool
1474 set_ref_stored_in_loop (mem_ref_p ref, struct loop *loop)
1475 {
1476 if (!ref->stored)
1477 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1478 return bitmap_set_bit (ref->stored, loop->num);
1479 }
1480
1481 /* Marks reference REF as stored in LOOP. */
1482
1483 static void
1484 mark_ref_stored (mem_ref_p ref, struct loop *loop)
1485 {
1486 while (loop != current_loops->tree_root
1487 && set_ref_stored_in_loop (ref, loop))
1488 loop = loop_outer (loop);
1489 }
1490
1491 /* Gathers memory references in statement STMT in LOOP, storing the
1492 information about them in the memory_accesses structure. Marks
1493 the vops accessed through unrecognized statements there as
1494 well. */
1495
1496 static void
1497 gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1498 {
1499 tree *mem = NULL;
1500 hashval_t hash;
1501 im_mem_ref **slot;
1502 mem_ref_p ref;
1503 bool is_stored;
1504 unsigned id;
1505
1506 if (!gimple_vuse (stmt))
1507 return;
1508
1509 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1510 if (!mem)
1511 {
1512 /* We use the shared mem_ref for all unanalyzable refs. */
1513 id = UNANALYZABLE_MEM_ID;
1514 ref = memory_accesses.refs_list[id];
1515 if (dump_file && (dump_flags & TDF_DETAILS))
1516 {
1517 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1518 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1519 }
1520 is_stored = gimple_vdef (stmt);
1521 }
1522 else
1523 {
1524 hash = iterative_hash_expr (*mem, 0);
1525 slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
1526 if (*slot)
1527 {
1528 ref = (mem_ref_p) *slot;
1529 id = ref->id;
1530 }
1531 else
1532 {
1533 id = memory_accesses.refs_list.length ();
1534 ref = mem_ref_alloc (*mem, hash, id);
1535 memory_accesses.refs_list.safe_push (ref);
1536 *slot = ref;
1537
1538 if (dump_file && (dump_flags & TDF_DETAILS))
1539 {
1540 fprintf (dump_file, "Memory reference %u: ", id);
1541 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1542 fprintf (dump_file, "\n");
1543 }
1544 }
1545
1546 record_mem_ref_loc (ref, stmt, mem);
1547 }
1548 bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1549 if (is_stored)
1550 {
1551 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1552 mark_ref_stored (ref, loop);
1553 }
1554 return;
1555 }
1556
1557 static unsigned *bb_loop_postorder;
1558
1559 /* qsort sort function to sort blocks after their loop fathers postorder. */
1560
1561 static int
1562 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1563 {
1564 basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1565 basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1566 struct loop *loop1 = bb1->loop_father;
1567 struct loop *loop2 = bb2->loop_father;
1568 if (loop1->num == loop2->num)
1569 return 0;
1570 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1571 }
1572
1573 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1574
1575 static int
1576 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1577 {
1578 mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1579 mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1580 struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1581 struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1582 if (loop1->num == loop2->num)
1583 return 0;
1584 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1585 }
1586
1587 /* Gathers memory references in loops. */
1588
1589 static void
1590 analyze_memory_references (void)
1591 {
1592 gimple_stmt_iterator bsi;
1593 basic_block bb, *bbs;
1594 struct loop *loop, *outer;
1595 unsigned i, n;
1596
1597 /* Collect all basic-blocks in loops and sort them after their
1598 loops postorder. */
1599 i = 0;
1600 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1601 FOR_EACH_BB_FN (bb, cfun)
1602 if (bb->loop_father != current_loops->tree_root)
1603 bbs[i++] = bb;
1604 n = i;
1605 qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1606
1607 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1608 That results in better locality for all the bitmaps. */
1609 for (i = 0; i < n; ++i)
1610 {
1611 basic_block bb = bbs[i];
1612 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1613 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1614 }
1615
1616 /* Sort the location list of gathered memory references after their
1617 loop postorder number. */
1618 im_mem_ref *ref;
1619 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1620 ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1621
1622 free (bbs);
1623 // free (bb_loop_postorder);
1624
1625 /* Propagate the information about accessed memory references up
1626 the loop hierarchy. */
1627 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1628 {
1629 /* Finalize the overall touched references (including subloops). */
1630 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1631 &memory_accesses.refs_stored_in_loop[loop->num]);
1632
1633 /* Propagate the information about accessed memory references up
1634 the loop hierarchy. */
1635 outer = loop_outer (loop);
1636 if (outer == current_loops->tree_root)
1637 continue;
1638
1639 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1640 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1641 }
1642 }
1643
1644 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1645 tree_to_aff_combination_expand. */
1646
1647 static bool
1648 mem_refs_may_alias_p (mem_ref_p mem1, mem_ref_p mem2,
1649 hash_map<tree, name_expansion *> **ttae_cache)
1650 {
1651 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1652 object and their offset differ in such a way that the locations cannot
1653 overlap, then they cannot alias. */
1654 widest_int size1, size2;
1655 aff_tree off1, off2;
1656
1657 /* Perform basic offset and type-based disambiguation. */
1658 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1659 return false;
1660
1661 /* The expansion of addresses may be a bit expensive, thus we only do
1662 the check at -O2 and higher optimization levels. */
1663 if (optimize < 2)
1664 return true;
1665
1666 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1667 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1668 aff_combination_expand (&off1, ttae_cache);
1669 aff_combination_expand (&off2, ttae_cache);
1670 aff_combination_scale (&off1, -1);
1671 aff_combination_add (&off2, &off1);
1672
1673 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1674 return false;
1675
1676 return true;
1677 }
1678
1679 /* Compare function for bsearch searching for reference locations
1680 in a loop. */
1681
1682 static int
1683 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1684 {
1685 struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1686 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1687 struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1688 if (loop->num == loc_loop->num
1689 || flow_loop_nested_p (loop, loc_loop))
1690 return 0;
1691 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1692 ? -1 : 1);
1693 }
1694
1695 /* Iterates over all locations of REF in LOOP and its subloops calling
1696 fn.operator() with the location as argument. When that operator
1697 returns true the iteration is stopped and true is returned.
1698 Otherwise false is returned. */
1699
1700 template <typename FN>
1701 static bool
1702 for_all_locs_in_loop (struct loop *loop, mem_ref_p ref, FN fn)
1703 {
1704 unsigned i;
1705 mem_ref_loc_p loc;
1706
1707 /* Search for the cluster of locs in the accesses_in_loop vector
1708 which is sorted after postorder index of the loop father. */
1709 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1710 if (!loc)
1711 return false;
1712
1713 /* We have found one location inside loop or its sub-loops. Iterate
1714 both forward and backward to cover the whole cluster. */
1715 i = loc - ref->accesses_in_loop.address ();
1716 while (i > 0)
1717 {
1718 --i;
1719 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1720 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1721 break;
1722 if (fn (l))
1723 return true;
1724 }
1725 for (i = loc - ref->accesses_in_loop.address ();
1726 i < ref->accesses_in_loop.length (); ++i)
1727 {
1728 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1729 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1730 break;
1731 if (fn (l))
1732 return true;
1733 }
1734
1735 return false;
1736 }
1737
1738 /* Rewrites location LOC by TMP_VAR. */
1739
1740 struct rewrite_mem_ref_loc
1741 {
1742 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1743 bool operator () (mem_ref_loc_p loc);
1744 tree tmp_var;
1745 };
1746
1747 bool
1748 rewrite_mem_ref_loc::operator () (mem_ref_loc_p loc)
1749 {
1750 *loc->ref = tmp_var;
1751 update_stmt (loc->stmt);
1752 return false;
1753 }
1754
1755 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1756
1757 static void
1758 rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
1759 {
1760 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1761 }
1762
1763 /* Stores the first reference location in LOCP. */
1764
1765 struct first_mem_ref_loc_1
1766 {
1767 first_mem_ref_loc_1 (mem_ref_loc_p *locp_) : locp (locp_) {}
1768 bool operator () (mem_ref_loc_p loc);
1769 mem_ref_loc_p *locp;
1770 };
1771
1772 bool
1773 first_mem_ref_loc_1::operator () (mem_ref_loc_p loc)
1774 {
1775 *locp = loc;
1776 return true;
1777 }
1778
1779 /* Returns the first reference location to REF in LOOP. */
1780
1781 static mem_ref_loc_p
1782 first_mem_ref_loc (struct loop *loop, mem_ref_p ref)
1783 {
1784 mem_ref_loc_p locp = NULL;
1785 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1786 return locp;
1787 }
1788
1789 struct prev_flag_edges {
1790 /* Edge to insert new flag comparison code. */
1791 edge append_cond_position;
1792
1793 /* Edge for fall through from previous flag comparison. */
1794 edge last_cond_fallthru;
1795 };
1796
1797 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1798 MEM along edge EX.
1799
1800 The store is only done if MEM has changed. We do this so no
1801 changes to MEM occur on code paths that did not originally store
1802 into it.
1803
1804 The common case for execute_sm will transform:
1805
1806 for (...) {
1807 if (foo)
1808 stuff;
1809 else
1810 MEM = TMP_VAR;
1811 }
1812
1813 into:
1814
1815 lsm = MEM;
1816 for (...) {
1817 if (foo)
1818 stuff;
1819 else
1820 lsm = TMP_VAR;
1821 }
1822 MEM = lsm;
1823
1824 This function will generate:
1825
1826 lsm = MEM;
1827
1828 lsm_flag = false;
1829 ...
1830 for (...) {
1831 if (foo)
1832 stuff;
1833 else {
1834 lsm = TMP_VAR;
1835 lsm_flag = true;
1836 }
1837 }
1838 if (lsm_flag) <--
1839 MEM = lsm; <--
1840 */
1841
1842 static void
1843 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
1844 {
1845 basic_block new_bb, then_bb, old_dest;
1846 bool loop_has_only_one_exit;
1847 edge then_old_edge, orig_ex = ex;
1848 gimple_stmt_iterator gsi;
1849 gimple stmt;
1850 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1851 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1852
1853 /* ?? Insert store after previous store if applicable. See note
1854 below. */
1855 if (prev_edges)
1856 ex = prev_edges->append_cond_position;
1857
1858 loop_has_only_one_exit = single_pred_p (ex->dest);
1859
1860 if (loop_has_only_one_exit)
1861 ex = split_block_after_labels (ex->dest);
1862
1863 old_dest = ex->dest;
1864 new_bb = split_edge (ex);
1865 then_bb = create_empty_bb (new_bb);
1866 if (irr)
1867 then_bb->flags = BB_IRREDUCIBLE_LOOP;
1868 add_bb_to_loop (then_bb, new_bb->loop_father);
1869
1870 gsi = gsi_start_bb (new_bb);
1871 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1872 NULL_TREE, NULL_TREE);
1873 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1874
1875 gsi = gsi_start_bb (then_bb);
1876 /* Insert actual store. */
1877 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1878 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1879
1880 make_edge (new_bb, then_bb,
1881 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1882 make_edge (new_bb, old_dest,
1883 EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1884 then_old_edge = make_edge (then_bb, old_dest,
1885 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1886
1887 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1888
1889 if (prev_edges)
1890 {
1891 basic_block prevbb = prev_edges->last_cond_fallthru->src;
1892 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1893 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1894 set_immediate_dominator (CDI_DOMINATORS, old_dest,
1895 recompute_dominator (CDI_DOMINATORS, old_dest));
1896 }
1897
1898 /* ?? Because stores may alias, they must happen in the exact
1899 sequence they originally happened. Save the position right after
1900 the (_lsm) store we just created so we can continue appending after
1901 it and maintain the original order. */
1902 {
1903 struct prev_flag_edges *p;
1904
1905 if (orig_ex->aux)
1906 orig_ex->aux = NULL;
1907 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1908 p = (struct prev_flag_edges *) orig_ex->aux;
1909 p->append_cond_position = then_old_edge;
1910 p->last_cond_fallthru = find_edge (new_bb, old_dest);
1911 orig_ex->aux = (void *) p;
1912 }
1913
1914 if (!loop_has_only_one_exit)
1915 for (gphi_iterator gpi = gsi_start_phis (old_dest);
1916 !gsi_end_p (gpi); gsi_next (&gpi))
1917 {
1918 gphi *phi = gpi.phi ();
1919 unsigned i;
1920
1921 for (i = 0; i < gimple_phi_num_args (phi); i++)
1922 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1923 {
1924 tree arg = gimple_phi_arg_def (phi, i);
1925 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
1926 update_stmt (phi);
1927 }
1928 }
1929 /* Remove the original fall through edge. This was the
1930 single_succ_edge (new_bb). */
1931 EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
1932 }
1933
1934 /* When REF is set on the location, set flag indicating the store. */
1935
1936 struct sm_set_flag_if_changed
1937 {
1938 sm_set_flag_if_changed (tree flag_) : flag (flag_) {}
1939 bool operator () (mem_ref_loc_p loc);
1940 tree flag;
1941 };
1942
1943 bool
1944 sm_set_flag_if_changed::operator () (mem_ref_loc_p loc)
1945 {
1946 /* Only set the flag for writes. */
1947 if (is_gimple_assign (loc->stmt)
1948 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
1949 {
1950 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
1951 gimple stmt = gimple_build_assign (flag, boolean_true_node);
1952 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1953 }
1954 return false;
1955 }
1956
1957 /* Helper function for execute_sm. On every location where REF is
1958 set, set an appropriate flag indicating the store. */
1959
1960 static tree
1961 execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
1962 {
1963 tree flag;
1964 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
1965 flag = create_tmp_reg (boolean_type_node, str);
1966 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag));
1967 return flag;
1968 }
1969
1970 /* Executes store motion of memory reference REF from LOOP.
1971 Exits from the LOOP are stored in EXITS. The initialization of the
1972 temporary variable is put to the preheader of the loop, and assignments
1973 to the reference from the temporary variable are emitted to exits. */
1974
1975 static void
1976 execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
1977 {
1978 tree tmp_var, store_flag = NULL_TREE;
1979 unsigned i;
1980 gassign *load;
1981 struct fmt_data fmt_data;
1982 edge ex;
1983 struct lim_aux_data *lim_data;
1984 bool multi_threaded_model_p = false;
1985 gimple_stmt_iterator gsi;
1986
1987 if (dump_file && (dump_flags & TDF_DETAILS))
1988 {
1989 fprintf (dump_file, "Executing store motion of ");
1990 print_generic_expr (dump_file, ref->mem.ref, 0);
1991 fprintf (dump_file, " from loop %d\n", loop->num);
1992 }
1993
1994 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
1995 get_lsm_tmp_name (ref->mem.ref, ~0));
1996
1997 fmt_data.loop = loop;
1998 fmt_data.orig_loop = loop;
1999 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2000
2001 if (bb_in_transaction (loop_preheader_edge (loop)->src)
2002 || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
2003 multi_threaded_model_p = true;
2004
2005 if (multi_threaded_model_p)
2006 store_flag = execute_sm_if_changed_flag_set (loop, ref);
2007
2008 rewrite_mem_refs (loop, ref, tmp_var);
2009
2010 /* Emit the load code on a random exit edge or into the latch if
2011 the loop does not exit, so that we are sure it will be processed
2012 by move_computations after all dependencies. */
2013 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2014
2015 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2016 load altogether, since the store is predicated by a flag. We
2017 could, do the load only if it was originally in the loop. */
2018 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
2019 lim_data = init_lim_data (load);
2020 lim_data->max_loop = loop;
2021 lim_data->tgt_loop = loop;
2022 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2023
2024 if (multi_threaded_model_p)
2025 {
2026 load = gimple_build_assign (store_flag, boolean_false_node);
2027 lim_data = init_lim_data (load);
2028 lim_data->max_loop = loop;
2029 lim_data->tgt_loop = loop;
2030 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2031 }
2032
2033 /* Sink the store to every exit from the loop. */
2034 FOR_EACH_VEC_ELT (exits, i, ex)
2035 if (!multi_threaded_model_p)
2036 {
2037 gassign *store;
2038 store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2039 gsi_insert_on_edge (ex, store);
2040 }
2041 else
2042 execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag);
2043 }
2044
2045 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2046 edges of the LOOP. */
2047
2048 static void
2049 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2050 vec<edge> exits)
2051 {
2052 mem_ref_p ref;
2053 unsigned i;
2054 bitmap_iterator bi;
2055
2056 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2057 {
2058 ref = memory_accesses.refs_list[i];
2059 execute_sm (loop, exits, ref);
2060 }
2061 }
2062
2063 struct ref_always_accessed
2064 {
2065 ref_always_accessed (struct loop *loop_, bool stored_p_)
2066 : loop (loop_), stored_p (stored_p_) {}
2067 bool operator () (mem_ref_loc_p loc);
2068 struct loop *loop;
2069 bool stored_p;
2070 };
2071
2072 bool
2073 ref_always_accessed::operator () (mem_ref_loc_p loc)
2074 {
2075 struct loop *must_exec;
2076
2077 if (!get_lim_data (loc->stmt))
2078 return false;
2079
2080 /* If we require an always executed store make sure the statement
2081 stores to the reference. */
2082 if (stored_p)
2083 {
2084 tree lhs = gimple_get_lhs (loc->stmt);
2085 if (!lhs
2086 || lhs != *loc->ref)
2087 return false;
2088 }
2089
2090 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2091 if (!must_exec)
2092 return false;
2093
2094 if (must_exec == loop
2095 || flow_loop_nested_p (must_exec, loop))
2096 return true;
2097
2098 return false;
2099 }
2100
2101 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2102 make sure REF is always stored to in LOOP. */
2103
2104 static bool
2105 ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2106 {
2107 return for_all_locs_in_loop (loop, ref,
2108 ref_always_accessed (loop, stored_p));
2109 }
2110
2111 /* Returns true if REF1 and REF2 are independent. */
2112
2113 static bool
2114 refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2115 {
2116 if (ref1 == ref2)
2117 return true;
2118
2119 if (dump_file && (dump_flags & TDF_DETAILS))
2120 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2121 ref1->id, ref2->id);
2122
2123 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2124 {
2125 if (dump_file && (dump_flags & TDF_DETAILS))
2126 fprintf (dump_file, "dependent.\n");
2127 return false;
2128 }
2129 else
2130 {
2131 if (dump_file && (dump_flags & TDF_DETAILS))
2132 fprintf (dump_file, "independent.\n");
2133 return true;
2134 }
2135 }
2136
2137 /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2138 and its super-loops. */
2139
2140 static void
2141 record_dep_loop (struct loop *loop, mem_ref_p ref, bool stored_p)
2142 {
2143 /* We can propagate dependent-in-loop bits up the loop
2144 hierarchy to all outer loops. */
2145 while (loop != current_loops->tree_root
2146 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2147 loop = loop_outer (loop);
2148 }
2149
2150 /* Returns true if REF is independent on all other memory references in
2151 LOOP. */
2152
2153 static bool
2154 ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref, bool stored_p)
2155 {
2156 bitmap refs_to_check;
2157 unsigned i;
2158 bitmap_iterator bi;
2159 mem_ref_p aref;
2160
2161 if (stored_p)
2162 refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2163 else
2164 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2165
2166 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2167 return false;
2168
2169 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2170 {
2171 aref = memory_accesses.refs_list[i];
2172 if (!refs_independent_p (ref, aref))
2173 return false;
2174 }
2175
2176 return true;
2177 }
2178
2179 /* Returns true if REF is independent on all other memory references in
2180 LOOP. Wrapper over ref_indep_loop_p_1, caching its results. */
2181
2182 static bool
2183 ref_indep_loop_p_2 (struct loop *loop, mem_ref_p ref, bool stored_p)
2184 {
2185 stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2186
2187 if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2188 return true;
2189 if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2190 return false;
2191
2192 struct loop *inner = loop->inner;
2193 while (inner)
2194 {
2195 if (!ref_indep_loop_p_2 (inner, ref, stored_p))
2196 return false;
2197 inner = inner->next;
2198 }
2199
2200 bool indep_p = ref_indep_loop_p_1 (loop, ref, stored_p);
2201
2202 if (dump_file && (dump_flags & TDF_DETAILS))
2203 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2204 ref->id, loop->num, indep_p ? "independent" : "dependent");
2205
2206 /* Record the computed result in the cache. */
2207 if (indep_p)
2208 {
2209 if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2210 && stored_p)
2211 {
2212 /* If it's independend against all refs then it's independent
2213 against stores, too. */
2214 bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2215 }
2216 }
2217 else
2218 {
2219 record_dep_loop (loop, ref, stored_p);
2220 if (!stored_p)
2221 {
2222 /* If it's dependent against stores it's dependent against
2223 all refs, too. */
2224 record_dep_loop (loop, ref, true);
2225 }
2226 }
2227
2228 return indep_p;
2229 }
2230
2231 /* Returns true if REF is independent on all other memory references in
2232 LOOP. */
2233
2234 static bool
2235 ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2236 {
2237 gcc_checking_assert (MEM_ANALYZABLE (ref));
2238
2239 return ref_indep_loop_p_2 (loop, ref, false);
2240 }
2241
2242 /* Returns true if we can perform store motion of REF from LOOP. */
2243
2244 static bool
2245 can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2246 {
2247 tree base;
2248
2249 /* Can't hoist unanalyzable refs. */
2250 if (!MEM_ANALYZABLE (ref))
2251 return false;
2252
2253 /* It should be movable. */
2254 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2255 || TREE_THIS_VOLATILE (ref->mem.ref)
2256 || !for_each_index (&ref->mem.ref, may_move_till, loop))
2257 return false;
2258
2259 /* If it can throw fail, we do not properly update EH info. */
2260 if (tree_could_throw_p (ref->mem.ref))
2261 return false;
2262
2263 /* If it can trap, it must be always executed in LOOP.
2264 Readonly memory locations may trap when storing to them, but
2265 tree_could_trap_p is a predicate for rvalues, so check that
2266 explicitly. */
2267 base = get_base_address (ref->mem.ref);
2268 if ((tree_could_trap_p (ref->mem.ref)
2269 || (DECL_P (base) && TREE_READONLY (base)))
2270 && !ref_always_accessed_p (loop, ref, true))
2271 return false;
2272
2273 /* And it must be independent on all other memory references
2274 in LOOP. */
2275 if (!ref_indep_loop_p (loop, ref))
2276 return false;
2277
2278 return true;
2279 }
2280
2281 /* Marks the references in LOOP for that store motion should be performed
2282 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2283 motion was performed in one of the outer loops. */
2284
2285 static void
2286 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2287 {
2288 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2289 unsigned i;
2290 bitmap_iterator bi;
2291 mem_ref_p ref;
2292
2293 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2294 {
2295 ref = memory_accesses.refs_list[i];
2296 if (can_sm_ref_p (loop, ref))
2297 bitmap_set_bit (refs_to_sm, i);
2298 }
2299 }
2300
2301 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2302 for a store motion optimization (i.e. whether we can insert statement
2303 on its exits). */
2304
2305 static bool
2306 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2307 vec<edge> exits)
2308 {
2309 unsigned i;
2310 edge ex;
2311
2312 FOR_EACH_VEC_ELT (exits, i, ex)
2313 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2314 return false;
2315
2316 return true;
2317 }
2318
2319 /* Try to perform store motion for all memory references modified inside
2320 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2321 store motion was executed in one of the outer loops. */
2322
2323 static void
2324 store_motion_loop (struct loop *loop, bitmap sm_executed)
2325 {
2326 vec<edge> exits = get_loop_exit_edges (loop);
2327 struct loop *subloop;
2328 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2329
2330 if (loop_suitable_for_sm (loop, exits))
2331 {
2332 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2333 hoist_memory_references (loop, sm_in_loop, exits);
2334 }
2335 exits.release ();
2336
2337 bitmap_ior_into (sm_executed, sm_in_loop);
2338 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2339 store_motion_loop (subloop, sm_executed);
2340 bitmap_and_compl_into (sm_executed, sm_in_loop);
2341 BITMAP_FREE (sm_in_loop);
2342 }
2343
2344 /* Try to perform store motion for all memory references modified inside
2345 loops. */
2346
2347 static void
2348 store_motion (void)
2349 {
2350 struct loop *loop;
2351 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2352
2353 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2354 store_motion_loop (loop, sm_executed);
2355
2356 BITMAP_FREE (sm_executed);
2357 gsi_commit_edge_inserts ();
2358 }
2359
2360 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2361 for each such basic block bb records the outermost loop for that execution
2362 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2363 blocks that contain a nonpure call. */
2364
2365 static void
2366 fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2367 {
2368 basic_block bb = NULL, *bbs, last = NULL;
2369 unsigned i;
2370 edge e;
2371 struct loop *inn_loop = loop;
2372
2373 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2374 {
2375 bbs = get_loop_body_in_dom_order (loop);
2376
2377 for (i = 0; i < loop->num_nodes; i++)
2378 {
2379 edge_iterator ei;
2380 bb = bbs[i];
2381
2382 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2383 last = bb;
2384
2385 if (bitmap_bit_p (contains_call, bb->index))
2386 break;
2387
2388 FOR_EACH_EDGE (e, ei, bb->succs)
2389 if (!flow_bb_inside_loop_p (loop, e->dest))
2390 break;
2391 if (e)
2392 break;
2393
2394 /* A loop might be infinite (TODO use simple loop analysis
2395 to disprove this if possible). */
2396 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2397 break;
2398
2399 if (!flow_bb_inside_loop_p (inn_loop, bb))
2400 break;
2401
2402 if (bb->loop_father->header == bb)
2403 {
2404 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2405 break;
2406
2407 /* In a loop that is always entered we may proceed anyway.
2408 But record that we entered it and stop once we leave it. */
2409 inn_loop = bb->loop_father;
2410 }
2411 }
2412
2413 while (1)
2414 {
2415 SET_ALWAYS_EXECUTED_IN (last, loop);
2416 if (last == loop->header)
2417 break;
2418 last = get_immediate_dominator (CDI_DOMINATORS, last);
2419 }
2420
2421 free (bbs);
2422 }
2423
2424 for (loop = loop->inner; loop; loop = loop->next)
2425 fill_always_executed_in_1 (loop, contains_call);
2426 }
2427
2428 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2429 for each such basic block bb records the outermost loop for that execution
2430 of its header implies execution of bb. */
2431
2432 static void
2433 fill_always_executed_in (void)
2434 {
2435 sbitmap contains_call = sbitmap_alloc (last_basic_block_for_fn (cfun));
2436 basic_block bb;
2437 struct loop *loop;
2438
2439 bitmap_clear (contains_call);
2440 FOR_EACH_BB_FN (bb, cfun)
2441 {
2442 gimple_stmt_iterator gsi;
2443 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2444 {
2445 if (nonpure_call_p (gsi_stmt (gsi)))
2446 break;
2447 }
2448
2449 if (!gsi_end_p (gsi))
2450 bitmap_set_bit (contains_call, bb->index);
2451 }
2452
2453 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2454 fill_always_executed_in_1 (loop, contains_call);
2455
2456 sbitmap_free (contains_call);
2457 }
2458
2459
2460 /* Compute the global information needed by the loop invariant motion pass. */
2461
2462 static void
2463 tree_ssa_lim_initialize (void)
2464 {
2465 struct loop *loop;
2466 unsigned i;
2467
2468 bitmap_obstack_initialize (&lim_bitmap_obstack);
2469 gcc_obstack_init (&mem_ref_obstack);
2470 lim_aux_data_map = new hash_map<gimple, lim_aux_data *>;
2471
2472 if (flag_tm)
2473 compute_transaction_bits ();
2474
2475 alloc_aux_for_edges (0);
2476
2477 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2478 memory_accesses.refs_list.create (100);
2479 /* Allocate a special, unanalyzable mem-ref with ID zero. */
2480 memory_accesses.refs_list.quick_push
2481 (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
2482
2483 memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2484 memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2485 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2486 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2487 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2488 memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2489
2490 for (i = 0; i < number_of_loops (cfun); i++)
2491 {
2492 bitmap_initialize (&memory_accesses.refs_in_loop[i],
2493 &lim_bitmap_obstack);
2494 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2495 &lim_bitmap_obstack);
2496 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2497 &lim_bitmap_obstack);
2498 }
2499
2500 memory_accesses.ttae_cache = NULL;
2501
2502 /* Initialize bb_loop_postorder with a mapping from loop->num to
2503 its postorder index. */
2504 i = 0;
2505 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2506 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2507 bb_loop_postorder[loop->num] = i++;
2508 }
2509
2510 /* Cleans up after the invariant motion pass. */
2511
2512 static void
2513 tree_ssa_lim_finalize (void)
2514 {
2515 basic_block bb;
2516 unsigned i;
2517 mem_ref_p ref;
2518
2519 free_aux_for_edges ();
2520
2521 FOR_EACH_BB_FN (bb, cfun)
2522 SET_ALWAYS_EXECUTED_IN (bb, NULL);
2523
2524 bitmap_obstack_release (&lim_bitmap_obstack);
2525 delete lim_aux_data_map;
2526
2527 delete memory_accesses.refs;
2528 memory_accesses.refs = NULL;
2529
2530 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2531 memref_free (ref);
2532 memory_accesses.refs_list.release ();
2533 obstack_free (&mem_ref_obstack, NULL);
2534
2535 memory_accesses.refs_in_loop.release ();
2536 memory_accesses.refs_stored_in_loop.release ();
2537 memory_accesses.all_refs_stored_in_loop.release ();
2538
2539 if (memory_accesses.ttae_cache)
2540 free_affine_expand_cache (&memory_accesses.ttae_cache);
2541
2542 free (bb_loop_postorder);
2543 }
2544
2545 /* Moves invariants from loops. Only "expensive" invariants are moved out --
2546 i.e. those that are likely to be win regardless of the register pressure. */
2547
2548 unsigned int
2549 tree_ssa_lim (void)
2550 {
2551 unsigned int todo;
2552
2553 tree_ssa_lim_initialize ();
2554
2555 /* Gathers information about memory accesses in the loops. */
2556 analyze_memory_references ();
2557
2558 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
2559 fill_always_executed_in ();
2560
2561 /* For each statement determine the outermost loop in that it is
2562 invariant and cost for computing the invariant. */
2563 invariantness_dom_walker (CDI_DOMINATORS)
2564 .walk (cfun->cfg->x_entry_block_ptr);
2565
2566 /* Execute store motion. Force the necessary invariants to be moved
2567 out of the loops as well. */
2568 store_motion ();
2569
2570 /* Move the expressions that are expensive enough. */
2571 todo = move_computations ();
2572
2573 tree_ssa_lim_finalize ();
2574
2575 return todo;
2576 }
2577
2578 /* Loop invariant motion pass. */
2579
2580 namespace {
2581
2582 const pass_data pass_data_lim =
2583 {
2584 GIMPLE_PASS, /* type */
2585 "lim", /* name */
2586 OPTGROUP_LOOP, /* optinfo_flags */
2587 TV_LIM, /* tv_id */
2588 PROP_cfg, /* properties_required */
2589 0, /* properties_provided */
2590 0, /* properties_destroyed */
2591 0, /* todo_flags_start */
2592 0, /* todo_flags_finish */
2593 };
2594
2595 class pass_lim : public gimple_opt_pass
2596 {
2597 public:
2598 pass_lim (gcc::context *ctxt)
2599 : gimple_opt_pass (pass_data_lim, ctxt)
2600 {}
2601
2602 /* opt_pass methods: */
2603 opt_pass * clone () { return new pass_lim (m_ctxt); }
2604 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2605 virtual unsigned int execute (function *);
2606
2607 }; // class pass_lim
2608
2609 unsigned int
2610 pass_lim::execute (function *fun)
2611 {
2612 if (number_of_loops (fun) <= 1)
2613 return 0;
2614
2615 return tree_ssa_lim ();
2616 }
2617
2618 } // anon namespace
2619
2620 gimple_opt_pass *
2621 make_pass_lim (gcc::context *ctxt)
2622 {
2623 return new pass_lim (ctxt);
2624 }
2625
2626