]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-im.c
2015-06-17 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / tree-ssa-loop-im.c
1 /* Loop invariant motion.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "alias.h"
25 #include "symtab.h"
26 #include "tree.h"
27 #include "fold-const.h"
28 #include "tm_p.h"
29 #include "predict.h"
30 #include "hard-reg-set.h"
31 #include "function.h"
32 #include "dominance.h"
33 #include "cfg.h"
34 #include "cfganal.h"
35 #include "basic-block.h"
36 #include "gimple-pretty-print.h"
37 #include "tree-ssa-alias.h"
38 #include "internal-fn.h"
39 #include "tree-eh.h"
40 #include "gimple-expr.h"
41 #include "gimple.h"
42 #include "gimplify.h"
43 #include "gimple-iterator.h"
44 #include "gimple-ssa.h"
45 #include "tree-cfg.h"
46 #include "tree-phinodes.h"
47 #include "ssa-iterators.h"
48 #include "stringpool.h"
49 #include "tree-ssanames.h"
50 #include "tree-ssa-loop-manip.h"
51 #include "tree-ssa-loop.h"
52 #include "tree-into-ssa.h"
53 #include "cfgloop.h"
54 #include "domwalk.h"
55 #include "params.h"
56 #include "tree-pass.h"
57 #include "flags.h"
58 #include "tree-affine.h"
59 #include "tree-ssa-propagate.h"
60 #include "trans-mem.h"
61 #include "gimple-fold.h"
62
63 /* TODO: Support for predicated code motion. I.e.
64
65 while (1)
66 {
67 if (cond)
68 {
69 a = inv;
70 something;
71 }
72 }
73
74 Where COND and INV are invariants, but evaluating INV may trap or be
75 invalid from some other reason if !COND. This may be transformed to
76
77 if (cond)
78 a = inv;
79 while (1)
80 {
81 if (cond)
82 something;
83 } */
84
85 /* The auxiliary data kept for each statement. */
86
87 struct lim_aux_data
88 {
89 struct loop *max_loop; /* The outermost loop in that the statement
90 is invariant. */
91
92 struct loop *tgt_loop; /* The loop out of that we want to move the
93 invariant. */
94
95 struct loop *always_executed_in;
96 /* The outermost loop for that we are sure
97 the statement is executed if the loop
98 is entered. */
99
100 unsigned cost; /* Cost of the computation performed by the
101 statement. */
102
103 vec<gimple> depends; /* Vector of statements that must be also
104 hoisted out of the loop when this statement
105 is hoisted; i.e. those that define the
106 operands of the statement and are inside of
107 the MAX_LOOP loop. */
108 };
109
110 /* Maps statements to their lim_aux_data. */
111
112 static hash_map<gimple, lim_aux_data *> *lim_aux_data_map;
113
114 /* Description of a memory reference location. */
115
116 typedef struct mem_ref_loc
117 {
118 tree *ref; /* The reference itself. */
119 gimple stmt; /* The statement in that it occurs. */
120 } *mem_ref_loc_p;
121
122
123 /* Description of a memory reference. */
124
125 typedef struct im_mem_ref
126 {
127 unsigned id; /* ID assigned to the memory reference
128 (its index in memory_accesses.refs_list) */
129 hashval_t hash; /* Its hash value. */
130
131 /* The memory access itself and associated caching of alias-oracle
132 query meta-data. */
133 ao_ref mem;
134
135 bitmap stored; /* The set of loops in that this memory location
136 is stored to. */
137 vec<mem_ref_loc> accesses_in_loop;
138 /* The locations of the accesses. Vector
139 indexed by the loop number. */
140
141 /* The following sets are computed on demand. We keep both set and
142 its complement, so that we know whether the information was
143 already computed or not. */
144 bitmap_head indep_loop; /* The set of loops in that the memory
145 reference is independent, meaning:
146 If it is stored in the loop, this store
147 is independent on all other loads and
148 stores.
149 If it is only loaded, then it is independent
150 on all stores in the loop. */
151 bitmap_head dep_loop; /* The complement of INDEP_LOOP. */
152 } *mem_ref_p;
153
154 /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
155 to record (in)dependence against stores in the loop and its subloops, the
156 second to record (in)dependence against all references in the loop
157 and its subloops. */
158 #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
159
160 /* Mem_ref hashtable helpers. */
161
162 struct mem_ref_hasher : typed_noop_remove <im_mem_ref>
163 {
164 typedef im_mem_ref *value_type;
165 typedef tree_node *compare_type;
166 static inline hashval_t hash (const im_mem_ref *);
167 static inline bool equal (const im_mem_ref *, const tree_node *);
168 };
169
170 /* A hash function for struct im_mem_ref object OBJ. */
171
172 inline hashval_t
173 mem_ref_hasher::hash (const im_mem_ref *mem)
174 {
175 return mem->hash;
176 }
177
178 /* An equality function for struct im_mem_ref object MEM1 with
179 memory reference OBJ2. */
180
181 inline bool
182 mem_ref_hasher::equal (const im_mem_ref *mem1, const tree_node *obj2)
183 {
184 return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
185 }
186
187
188 /* Description of memory accesses in loops. */
189
190 static struct
191 {
192 /* The hash table of memory references accessed in loops. */
193 hash_table<mem_ref_hasher> *refs;
194
195 /* The list of memory references. */
196 vec<mem_ref_p> refs_list;
197
198 /* The set of memory references accessed in each loop. */
199 vec<bitmap_head> refs_in_loop;
200
201 /* The set of memory references stored in each loop. */
202 vec<bitmap_head> refs_stored_in_loop;
203
204 /* The set of memory references stored in each loop, including subloops . */
205 vec<bitmap_head> all_refs_stored_in_loop;
206
207 /* Cache for expanding memory addresses. */
208 hash_map<tree, name_expansion *> *ttae_cache;
209 } memory_accesses;
210
211 /* Obstack for the bitmaps in the above data structures. */
212 static bitmap_obstack lim_bitmap_obstack;
213 static obstack mem_ref_obstack;
214
215 static bool ref_indep_loop_p (struct loop *, mem_ref_p);
216
217 /* Minimum cost of an expensive expression. */
218 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
219
220 /* The outermost loop for which execution of the header guarantees that the
221 block will be executed. */
222 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
223 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
224
225 /* ID of the shared unanalyzable mem. */
226 #define UNANALYZABLE_MEM_ID 0
227
228 /* Whether the reference was analyzable. */
229 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
230
231 static struct lim_aux_data *
232 init_lim_data (gimple stmt)
233 {
234 lim_aux_data *p = XCNEW (struct lim_aux_data);
235 lim_aux_data_map->put (stmt, p);
236
237 return p;
238 }
239
240 static struct lim_aux_data *
241 get_lim_data (gimple stmt)
242 {
243 lim_aux_data **p = lim_aux_data_map->get (stmt);
244 if (!p)
245 return NULL;
246
247 return *p;
248 }
249
250 /* Releases the memory occupied by DATA. */
251
252 static void
253 free_lim_aux_data (struct lim_aux_data *data)
254 {
255 data->depends.release ();
256 free (data);
257 }
258
259 static void
260 clear_lim_data (gimple stmt)
261 {
262 lim_aux_data **p = lim_aux_data_map->get (stmt);
263 if (!p)
264 return;
265
266 free_lim_aux_data (*p);
267 *p = NULL;
268 }
269
270
271 /* The possibilities of statement movement. */
272 enum move_pos
273 {
274 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
275 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
276 become executed -- memory accesses, ... */
277 MOVE_POSSIBLE /* Unlimited movement. */
278 };
279
280
281 /* If it is possible to hoist the statement STMT unconditionally,
282 returns MOVE_POSSIBLE.
283 If it is possible to hoist the statement STMT, but we must avoid making
284 it executed if it would not be executed in the original program (e.g.
285 because it may trap), return MOVE_PRESERVE_EXECUTION.
286 Otherwise return MOVE_IMPOSSIBLE. */
287
288 enum move_pos
289 movement_possibility (gimple stmt)
290 {
291 tree lhs;
292 enum move_pos ret = MOVE_POSSIBLE;
293
294 if (flag_unswitch_loops
295 && gimple_code (stmt) == GIMPLE_COND)
296 {
297 /* If we perform unswitching, force the operands of the invariant
298 condition to be moved out of the loop. */
299 return MOVE_POSSIBLE;
300 }
301
302 if (gimple_code (stmt) == GIMPLE_PHI
303 && gimple_phi_num_args (stmt) <= 2
304 && !virtual_operand_p (gimple_phi_result (stmt))
305 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
306 return MOVE_POSSIBLE;
307
308 if (gimple_get_lhs (stmt) == NULL_TREE)
309 return MOVE_IMPOSSIBLE;
310
311 if (gimple_vdef (stmt))
312 return MOVE_IMPOSSIBLE;
313
314 if (stmt_ends_bb_p (stmt)
315 || gimple_has_volatile_ops (stmt)
316 || gimple_has_side_effects (stmt)
317 || stmt_could_throw_p (stmt))
318 return MOVE_IMPOSSIBLE;
319
320 if (is_gimple_call (stmt))
321 {
322 /* While pure or const call is guaranteed to have no side effects, we
323 cannot move it arbitrarily. Consider code like
324
325 char *s = something ();
326
327 while (1)
328 {
329 if (s)
330 t = strlen (s);
331 else
332 t = 0;
333 }
334
335 Here the strlen call cannot be moved out of the loop, even though
336 s is invariant. In addition to possibly creating a call with
337 invalid arguments, moving out a function call that is not executed
338 may cause performance regressions in case the call is costly and
339 not executed at all. */
340 ret = MOVE_PRESERVE_EXECUTION;
341 lhs = gimple_call_lhs (stmt);
342 }
343 else if (is_gimple_assign (stmt))
344 lhs = gimple_assign_lhs (stmt);
345 else
346 return MOVE_IMPOSSIBLE;
347
348 if (TREE_CODE (lhs) == SSA_NAME
349 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
350 return MOVE_IMPOSSIBLE;
351
352 if (TREE_CODE (lhs) != SSA_NAME
353 || gimple_could_trap_p (stmt))
354 return MOVE_PRESERVE_EXECUTION;
355
356 /* Non local loads in a transaction cannot be hoisted out. Well,
357 unless the load happens on every path out of the loop, but we
358 don't take this into account yet. */
359 if (flag_tm
360 && gimple_in_transaction (stmt)
361 && gimple_assign_single_p (stmt))
362 {
363 tree rhs = gimple_assign_rhs1 (stmt);
364 if (DECL_P (rhs) && is_global_var (rhs))
365 {
366 if (dump_file)
367 {
368 fprintf (dump_file, "Cannot hoist conditional load of ");
369 print_generic_expr (dump_file, rhs, TDF_SLIM);
370 fprintf (dump_file, " because it is in a transaction.\n");
371 }
372 return MOVE_IMPOSSIBLE;
373 }
374 }
375
376 return ret;
377 }
378
379 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
380 loop to that we could move the expression using DEF if it did not have
381 other operands, i.e. the outermost loop enclosing LOOP in that the value
382 of DEF is invariant. */
383
384 static struct loop *
385 outermost_invariant_loop (tree def, struct loop *loop)
386 {
387 gimple def_stmt;
388 basic_block def_bb;
389 struct loop *max_loop;
390 struct lim_aux_data *lim_data;
391
392 if (!def)
393 return superloop_at_depth (loop, 1);
394
395 if (TREE_CODE (def) != SSA_NAME)
396 {
397 gcc_assert (is_gimple_min_invariant (def));
398 return superloop_at_depth (loop, 1);
399 }
400
401 def_stmt = SSA_NAME_DEF_STMT (def);
402 def_bb = gimple_bb (def_stmt);
403 if (!def_bb)
404 return superloop_at_depth (loop, 1);
405
406 max_loop = find_common_loop (loop, def_bb->loop_father);
407
408 lim_data = get_lim_data (def_stmt);
409 if (lim_data != NULL && lim_data->max_loop != NULL)
410 max_loop = find_common_loop (max_loop,
411 loop_outer (lim_data->max_loop));
412 if (max_loop == loop)
413 return NULL;
414 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
415
416 return max_loop;
417 }
418
419 /* DATA is a structure containing information associated with a statement
420 inside LOOP. DEF is one of the operands of this statement.
421
422 Find the outermost loop enclosing LOOP in that value of DEF is invariant
423 and record this in DATA->max_loop field. If DEF itself is defined inside
424 this loop as well (i.e. we need to hoist it out of the loop if we want
425 to hoist the statement represented by DATA), record the statement in that
426 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
427 add the cost of the computation of DEF to the DATA->cost.
428
429 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
430
431 static bool
432 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
433 bool add_cost)
434 {
435 gimple def_stmt = SSA_NAME_DEF_STMT (def);
436 basic_block def_bb = gimple_bb (def_stmt);
437 struct loop *max_loop;
438 struct lim_aux_data *def_data;
439
440 if (!def_bb)
441 return true;
442
443 max_loop = outermost_invariant_loop (def, loop);
444 if (!max_loop)
445 return false;
446
447 if (flow_loop_nested_p (data->max_loop, max_loop))
448 data->max_loop = max_loop;
449
450 def_data = get_lim_data (def_stmt);
451 if (!def_data)
452 return true;
453
454 if (add_cost
455 /* Only add the cost if the statement defining DEF is inside LOOP,
456 i.e. if it is likely that by moving the invariants dependent
457 on it, we will be able to avoid creating a new register for
458 it (since it will be only used in these dependent invariants). */
459 && def_bb->loop_father == loop)
460 data->cost += def_data->cost;
461
462 data->depends.safe_push (def_stmt);
463
464 return true;
465 }
466
467 /* Returns an estimate for a cost of statement STMT. The values here
468 are just ad-hoc constants, similar to costs for inlining. */
469
470 static unsigned
471 stmt_cost (gimple stmt)
472 {
473 /* Always try to create possibilities for unswitching. */
474 if (gimple_code (stmt) == GIMPLE_COND
475 || gimple_code (stmt) == GIMPLE_PHI)
476 return LIM_EXPENSIVE;
477
478 /* We should be hoisting calls if possible. */
479 if (is_gimple_call (stmt))
480 {
481 tree fndecl;
482
483 /* Unless the call is a builtin_constant_p; this always folds to a
484 constant, so moving it is useless. */
485 fndecl = gimple_call_fndecl (stmt);
486 if (fndecl
487 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
488 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
489 return 0;
490
491 return LIM_EXPENSIVE;
492 }
493
494 /* Hoisting memory references out should almost surely be a win. */
495 if (gimple_references_memory_p (stmt))
496 return LIM_EXPENSIVE;
497
498 if (gimple_code (stmt) != GIMPLE_ASSIGN)
499 return 1;
500
501 switch (gimple_assign_rhs_code (stmt))
502 {
503 case MULT_EXPR:
504 case WIDEN_MULT_EXPR:
505 case WIDEN_MULT_PLUS_EXPR:
506 case WIDEN_MULT_MINUS_EXPR:
507 case DOT_PROD_EXPR:
508 case FMA_EXPR:
509 case TRUNC_DIV_EXPR:
510 case CEIL_DIV_EXPR:
511 case FLOOR_DIV_EXPR:
512 case ROUND_DIV_EXPR:
513 case EXACT_DIV_EXPR:
514 case CEIL_MOD_EXPR:
515 case FLOOR_MOD_EXPR:
516 case ROUND_MOD_EXPR:
517 case TRUNC_MOD_EXPR:
518 case RDIV_EXPR:
519 /* Division and multiplication are usually expensive. */
520 return LIM_EXPENSIVE;
521
522 case LSHIFT_EXPR:
523 case RSHIFT_EXPR:
524 case WIDEN_LSHIFT_EXPR:
525 case LROTATE_EXPR:
526 case RROTATE_EXPR:
527 /* Shifts and rotates are usually expensive. */
528 return LIM_EXPENSIVE;
529
530 case CONSTRUCTOR:
531 /* Make vector construction cost proportional to the number
532 of elements. */
533 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
534
535 case SSA_NAME:
536 case PAREN_EXPR:
537 /* Whether or not something is wrapped inside a PAREN_EXPR
538 should not change move cost. Nor should an intermediate
539 unpropagated SSA name copy. */
540 return 0;
541
542 default:
543 return 1;
544 }
545 }
546
547 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
548 REF is independent. If REF is not independent in LOOP, NULL is returned
549 instead. */
550
551 static struct loop *
552 outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
553 {
554 struct loop *aloop;
555
556 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
557 return NULL;
558
559 for (aloop = outer;
560 aloop != loop;
561 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
562 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
563 && ref_indep_loop_p (aloop, ref))
564 return aloop;
565
566 if (ref_indep_loop_p (loop, ref))
567 return loop;
568 else
569 return NULL;
570 }
571
572 /* If there is a simple load or store to a memory reference in STMT, returns
573 the location of the memory reference, and sets IS_STORE according to whether
574 it is a store or load. Otherwise, returns NULL. */
575
576 static tree *
577 simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
578 {
579 tree *lhs, *rhs;
580
581 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
582 if (!gimple_assign_single_p (stmt))
583 return NULL;
584
585 lhs = gimple_assign_lhs_ptr (stmt);
586 rhs = gimple_assign_rhs1_ptr (stmt);
587
588 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
589 {
590 *is_store = false;
591 return rhs;
592 }
593 else if (gimple_vdef (stmt)
594 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
595 {
596 *is_store = true;
597 return lhs;
598 }
599 else
600 return NULL;
601 }
602
603 /* Returns the memory reference contained in STMT. */
604
605 static mem_ref_p
606 mem_ref_in_stmt (gimple stmt)
607 {
608 bool store;
609 tree *mem = simple_mem_ref_in_stmt (stmt, &store);
610 hashval_t hash;
611 mem_ref_p ref;
612
613 if (!mem)
614 return NULL;
615 gcc_assert (!store);
616
617 hash = iterative_hash_expr (*mem, 0);
618 ref = memory_accesses.refs->find_with_hash (*mem, hash);
619
620 gcc_assert (ref != NULL);
621 return ref;
622 }
623
624 /* From a controlling predicate in DOM determine the arguments from
625 the PHI node PHI that are chosen if the predicate evaluates to
626 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
627 they are non-NULL. Returns true if the arguments can be determined,
628 else return false. */
629
630 static bool
631 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
632 tree *true_arg_p, tree *false_arg_p)
633 {
634 basic_block bb = gimple_bb (phi);
635 edge true_edge, false_edge, tem;
636 tree arg0 = NULL_TREE, arg1 = NULL_TREE;
637
638 /* We have to verify that one edge into the PHI node is dominated
639 by the true edge of the predicate block and the other edge
640 dominated by the false edge. This ensures that the PHI argument
641 we are going to take is completely determined by the path we
642 take from the predicate block.
643 We can only use BB dominance checks below if the destination of
644 the true/false edges are dominated by their edge, thus only
645 have a single predecessor. */
646 extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
647 tem = EDGE_PRED (bb, 0);
648 if (tem == true_edge
649 || (single_pred_p (true_edge->dest)
650 && (tem->src == true_edge->dest
651 || dominated_by_p (CDI_DOMINATORS,
652 tem->src, true_edge->dest))))
653 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
654 else if (tem == false_edge
655 || (single_pred_p (false_edge->dest)
656 && (tem->src == false_edge->dest
657 || dominated_by_p (CDI_DOMINATORS,
658 tem->src, false_edge->dest))))
659 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
660 else
661 return false;
662 tem = EDGE_PRED (bb, 1);
663 if (tem == true_edge
664 || (single_pred_p (true_edge->dest)
665 && (tem->src == true_edge->dest
666 || dominated_by_p (CDI_DOMINATORS,
667 tem->src, true_edge->dest))))
668 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
669 else if (tem == false_edge
670 || (single_pred_p (false_edge->dest)
671 && (tem->src == false_edge->dest
672 || dominated_by_p (CDI_DOMINATORS,
673 tem->src, false_edge->dest))))
674 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
675 else
676 return false;
677 if (!arg0 || !arg1)
678 return false;
679
680 if (true_arg_p)
681 *true_arg_p = arg0;
682 if (false_arg_p)
683 *false_arg_p = arg1;
684
685 return true;
686 }
687
688 /* Determine the outermost loop to that it is possible to hoist a statement
689 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
690 the outermost loop in that the value computed by STMT is invariant.
691 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
692 we preserve the fact whether STMT is executed. It also fills other related
693 information to LIM_DATA (STMT).
694
695 The function returns false if STMT cannot be hoisted outside of the loop it
696 is defined in, and true otherwise. */
697
698 static bool
699 determine_max_movement (gimple stmt, bool must_preserve_exec)
700 {
701 basic_block bb = gimple_bb (stmt);
702 struct loop *loop = bb->loop_father;
703 struct loop *level;
704 struct lim_aux_data *lim_data = get_lim_data (stmt);
705 tree val;
706 ssa_op_iter iter;
707
708 if (must_preserve_exec)
709 level = ALWAYS_EXECUTED_IN (bb);
710 else
711 level = superloop_at_depth (loop, 1);
712 lim_data->max_loop = level;
713
714 if (gphi *phi = dyn_cast <gphi *> (stmt))
715 {
716 use_operand_p use_p;
717 unsigned min_cost = UINT_MAX;
718 unsigned total_cost = 0;
719 struct lim_aux_data *def_data;
720
721 /* We will end up promoting dependencies to be unconditionally
722 evaluated. For this reason the PHI cost (and thus the
723 cost we remove from the loop by doing the invariant motion)
724 is that of the cheapest PHI argument dependency chain. */
725 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
726 {
727 val = USE_FROM_PTR (use_p);
728
729 if (TREE_CODE (val) != SSA_NAME)
730 {
731 /* Assign const 1 to constants. */
732 min_cost = MIN (min_cost, 1);
733 total_cost += 1;
734 continue;
735 }
736 if (!add_dependency (val, lim_data, loop, false))
737 return false;
738
739 gimple def_stmt = SSA_NAME_DEF_STMT (val);
740 if (gimple_bb (def_stmt)
741 && gimple_bb (def_stmt)->loop_father == loop)
742 {
743 def_data = get_lim_data (def_stmt);
744 if (def_data)
745 {
746 min_cost = MIN (min_cost, def_data->cost);
747 total_cost += def_data->cost;
748 }
749 }
750 }
751
752 min_cost = MIN (min_cost, total_cost);
753 lim_data->cost += min_cost;
754
755 if (gimple_phi_num_args (phi) > 1)
756 {
757 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
758 gimple cond;
759 if (gsi_end_p (gsi_last_bb (dom)))
760 return false;
761 cond = gsi_stmt (gsi_last_bb (dom));
762 if (gimple_code (cond) != GIMPLE_COND)
763 return false;
764 /* Verify that this is an extended form of a diamond and
765 the PHI arguments are completely controlled by the
766 predicate in DOM. */
767 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
768 return false;
769
770 /* Fold in dependencies and cost of the condition. */
771 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
772 {
773 if (!add_dependency (val, lim_data, loop, false))
774 return false;
775 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
776 if (def_data)
777 total_cost += def_data->cost;
778 }
779
780 /* We want to avoid unconditionally executing very expensive
781 operations. As costs for our dependencies cannot be
782 negative just claim we are not invariand for this case.
783 We also are not sure whether the control-flow inside the
784 loop will vanish. */
785 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
786 && !(min_cost != 0
787 && total_cost / min_cost <= 2))
788 return false;
789
790 /* Assume that the control-flow in the loop will vanish.
791 ??? We should verify this and not artificially increase
792 the cost if that is not the case. */
793 lim_data->cost += stmt_cost (stmt);
794 }
795
796 return true;
797 }
798 else
799 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
800 if (!add_dependency (val, lim_data, loop, true))
801 return false;
802
803 if (gimple_vuse (stmt))
804 {
805 mem_ref_p ref = mem_ref_in_stmt (stmt);
806
807 if (ref)
808 {
809 lim_data->max_loop
810 = outermost_indep_loop (lim_data->max_loop, loop, ref);
811 if (!lim_data->max_loop)
812 return false;
813 }
814 else
815 {
816 if ((val = gimple_vuse (stmt)) != NULL_TREE)
817 {
818 if (!add_dependency (val, lim_data, loop, false))
819 return false;
820 }
821 }
822 }
823
824 lim_data->cost += stmt_cost (stmt);
825
826 return true;
827 }
828
829 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
830 and that one of the operands of this statement is computed by STMT.
831 Ensure that STMT (together with all the statements that define its
832 operands) is hoisted at least out of the loop LEVEL. */
833
834 static void
835 set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
836 {
837 struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
838 struct lim_aux_data *lim_data;
839 gimple dep_stmt;
840 unsigned i;
841
842 stmt_loop = find_common_loop (orig_loop, stmt_loop);
843 lim_data = get_lim_data (stmt);
844 if (lim_data != NULL && lim_data->tgt_loop != NULL)
845 stmt_loop = find_common_loop (stmt_loop,
846 loop_outer (lim_data->tgt_loop));
847 if (flow_loop_nested_p (stmt_loop, level))
848 return;
849
850 gcc_assert (level == lim_data->max_loop
851 || flow_loop_nested_p (lim_data->max_loop, level));
852
853 lim_data->tgt_loop = level;
854 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
855 set_level (dep_stmt, orig_loop, level);
856 }
857
858 /* Determines an outermost loop from that we want to hoist the statement STMT.
859 For now we chose the outermost possible loop. TODO -- use profiling
860 information to set it more sanely. */
861
862 static void
863 set_profitable_level (gimple stmt)
864 {
865 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
866 }
867
868 /* Returns true if STMT is a call that has side effects. */
869
870 static bool
871 nonpure_call_p (gimple stmt)
872 {
873 if (gimple_code (stmt) != GIMPLE_CALL)
874 return false;
875
876 return gimple_has_side_effects (stmt);
877 }
878
879 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
880
881 static gimple
882 rewrite_reciprocal (gimple_stmt_iterator *bsi)
883 {
884 gassign *stmt, *stmt1, *stmt2;
885 tree name, lhs, type;
886 tree real_one;
887 gimple_stmt_iterator gsi;
888
889 stmt = as_a <gassign *> (gsi_stmt (*bsi));
890 lhs = gimple_assign_lhs (stmt);
891 type = TREE_TYPE (lhs);
892
893 real_one = build_one_cst (type);
894
895 name = make_temp_ssa_name (type, NULL, "reciptmp");
896 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
897 gimple_assign_rhs2 (stmt));
898 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
899 gimple_assign_rhs1 (stmt));
900
901 /* Replace division stmt with reciprocal and multiply stmts.
902 The multiply stmt is not invariant, so update iterator
903 and avoid rescanning. */
904 gsi = *bsi;
905 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
906 gsi_replace (&gsi, stmt2, true);
907
908 /* Continue processing with invariant reciprocal statement. */
909 return stmt1;
910 }
911
912 /* Check if the pattern at *BSI is a bittest of the form
913 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
914
915 static gimple
916 rewrite_bittest (gimple_stmt_iterator *bsi)
917 {
918 gassign *stmt;
919 gimple stmt1;
920 gassign *stmt2;
921 gimple use_stmt;
922 gcond *cond_stmt;
923 tree lhs, name, t, a, b;
924 use_operand_p use;
925
926 stmt = as_a <gassign *> (gsi_stmt (*bsi));
927 lhs = gimple_assign_lhs (stmt);
928
929 /* Verify that the single use of lhs is a comparison against zero. */
930 if (TREE_CODE (lhs) != SSA_NAME
931 || !single_imm_use (lhs, &use, &use_stmt))
932 return stmt;
933 cond_stmt = dyn_cast <gcond *> (use_stmt);
934 if (!cond_stmt)
935 return stmt;
936 if (gimple_cond_lhs (cond_stmt) != lhs
937 || (gimple_cond_code (cond_stmt) != NE_EXPR
938 && gimple_cond_code (cond_stmt) != EQ_EXPR)
939 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
940 return stmt;
941
942 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
943 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
944 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
945 return stmt;
946
947 /* There is a conversion in between possibly inserted by fold. */
948 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
949 {
950 t = gimple_assign_rhs1 (stmt1);
951 if (TREE_CODE (t) != SSA_NAME
952 || !has_single_use (t))
953 return stmt;
954 stmt1 = SSA_NAME_DEF_STMT (t);
955 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
956 return stmt;
957 }
958
959 /* Verify that B is loop invariant but A is not. Verify that with
960 all the stmt walking we are still in the same loop. */
961 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
962 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
963 return stmt;
964
965 a = gimple_assign_rhs1 (stmt1);
966 b = gimple_assign_rhs2 (stmt1);
967
968 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
969 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
970 {
971 gimple_stmt_iterator rsi;
972
973 /* 1 << B */
974 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
975 build_int_cst (TREE_TYPE (a), 1), b);
976 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
977 stmt1 = gimple_build_assign (name, t);
978
979 /* A & (1 << B) */
980 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
981 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
982 stmt2 = gimple_build_assign (name, t);
983
984 /* Replace the SSA_NAME we compare against zero. Adjust
985 the type of zero accordingly. */
986 SET_USE (use, name);
987 gimple_cond_set_rhs (cond_stmt,
988 build_int_cst_type (TREE_TYPE (name),
989 0));
990
991 /* Don't use gsi_replace here, none of the new assignments sets
992 the variable originally set in stmt. Move bsi to stmt1, and
993 then remove the original stmt, so that we get a chance to
994 retain debug info for it. */
995 rsi = *bsi;
996 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
997 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
998 gsi_remove (&rsi, true);
999
1000 return stmt1;
1001 }
1002
1003 return stmt;
1004 }
1005
1006 /* For each statement determines the outermost loop in that it is invariant,
1007 - statements on whose motion it depends and the cost of the computation.
1008 - This information is stored to the LIM_DATA structure associated with
1009 - each statement. */
1010 class invariantness_dom_walker : public dom_walker
1011 {
1012 public:
1013 invariantness_dom_walker (cdi_direction direction)
1014 : dom_walker (direction) {}
1015
1016 virtual void before_dom_children (basic_block);
1017 };
1018
1019 /* Determine the outermost loops in that statements in basic block BB are
1020 invariant, and record them to the LIM_DATA associated with the statements.
1021 Callback for dom_walker. */
1022
1023 void
1024 invariantness_dom_walker::before_dom_children (basic_block bb)
1025 {
1026 enum move_pos pos;
1027 gimple_stmt_iterator bsi;
1028 gimple stmt;
1029 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1030 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1031 struct lim_aux_data *lim_data;
1032
1033 if (!loop_outer (bb->loop_father))
1034 return;
1035
1036 if (dump_file && (dump_flags & TDF_DETAILS))
1037 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1038 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1039
1040 /* Look at PHI nodes, but only if there is at most two.
1041 ??? We could relax this further by post-processing the inserted
1042 code and transforming adjacent cond-exprs with the same predicate
1043 to control flow again. */
1044 bsi = gsi_start_phis (bb);
1045 if (!gsi_end_p (bsi)
1046 && ((gsi_next (&bsi), gsi_end_p (bsi))
1047 || (gsi_next (&bsi), gsi_end_p (bsi))))
1048 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1049 {
1050 stmt = gsi_stmt (bsi);
1051
1052 pos = movement_possibility (stmt);
1053 if (pos == MOVE_IMPOSSIBLE)
1054 continue;
1055
1056 lim_data = init_lim_data (stmt);
1057 lim_data->always_executed_in = outermost;
1058
1059 if (!determine_max_movement (stmt, false))
1060 {
1061 lim_data->max_loop = NULL;
1062 continue;
1063 }
1064
1065 if (dump_file && (dump_flags & TDF_DETAILS))
1066 {
1067 print_gimple_stmt (dump_file, stmt, 2, 0);
1068 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1069 loop_depth (lim_data->max_loop),
1070 lim_data->cost);
1071 }
1072
1073 if (lim_data->cost >= LIM_EXPENSIVE)
1074 set_profitable_level (stmt);
1075 }
1076
1077 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1078 {
1079 stmt = gsi_stmt (bsi);
1080
1081 pos = movement_possibility (stmt);
1082 if (pos == MOVE_IMPOSSIBLE)
1083 {
1084 if (nonpure_call_p (stmt))
1085 {
1086 maybe_never = true;
1087 outermost = NULL;
1088 }
1089 /* Make sure to note always_executed_in for stores to make
1090 store-motion work. */
1091 else if (stmt_makes_single_store (stmt))
1092 {
1093 struct lim_aux_data *lim_data = init_lim_data (stmt);
1094 lim_data->always_executed_in = outermost;
1095 }
1096 continue;
1097 }
1098
1099 if (is_gimple_assign (stmt)
1100 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1101 == GIMPLE_BINARY_RHS))
1102 {
1103 tree op0 = gimple_assign_rhs1 (stmt);
1104 tree op1 = gimple_assign_rhs2 (stmt);
1105 struct loop *ol1 = outermost_invariant_loop (op1,
1106 loop_containing_stmt (stmt));
1107
1108 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1109 to be hoisted out of loop, saving expensive divide. */
1110 if (pos == MOVE_POSSIBLE
1111 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1112 && flag_unsafe_math_optimizations
1113 && !flag_trapping_math
1114 && ol1 != NULL
1115 && outermost_invariant_loop (op0, ol1) == NULL)
1116 stmt = rewrite_reciprocal (&bsi);
1117
1118 /* If the shift count is invariant, convert (A >> B) & 1 to
1119 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1120 saving an expensive shift. */
1121 if (pos == MOVE_POSSIBLE
1122 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1123 && integer_onep (op1)
1124 && TREE_CODE (op0) == SSA_NAME
1125 && has_single_use (op0))
1126 stmt = rewrite_bittest (&bsi);
1127 }
1128
1129 lim_data = init_lim_data (stmt);
1130 lim_data->always_executed_in = outermost;
1131
1132 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1133 continue;
1134
1135 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1136 {
1137 lim_data->max_loop = NULL;
1138 continue;
1139 }
1140
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1142 {
1143 print_gimple_stmt (dump_file, stmt, 2, 0);
1144 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1145 loop_depth (lim_data->max_loop),
1146 lim_data->cost);
1147 }
1148
1149 if (lim_data->cost >= LIM_EXPENSIVE)
1150 set_profitable_level (stmt);
1151 }
1152 }
1153
1154 class move_computations_dom_walker : public dom_walker
1155 {
1156 public:
1157 move_computations_dom_walker (cdi_direction direction)
1158 : dom_walker (direction), todo_ (0) {}
1159
1160 virtual void before_dom_children (basic_block);
1161
1162 unsigned int todo_;
1163 };
1164
1165 /* Hoist the statements in basic block BB out of the loops prescribed by
1166 data stored in LIM_DATA structures associated with each statement. Callback
1167 for walk_dominator_tree. */
1168
1169 void
1170 move_computations_dom_walker::before_dom_children (basic_block bb)
1171 {
1172 struct loop *level;
1173 unsigned cost = 0;
1174 struct lim_aux_data *lim_data;
1175
1176 if (!loop_outer (bb->loop_father))
1177 return;
1178
1179 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1180 {
1181 gassign *new_stmt;
1182 gphi *stmt = bsi.phi ();
1183
1184 lim_data = get_lim_data (stmt);
1185 if (lim_data == NULL)
1186 {
1187 gsi_next (&bsi);
1188 continue;
1189 }
1190
1191 cost = lim_data->cost;
1192 level = lim_data->tgt_loop;
1193 clear_lim_data (stmt);
1194
1195 if (!level)
1196 {
1197 gsi_next (&bsi);
1198 continue;
1199 }
1200
1201 if (dump_file && (dump_flags & TDF_DETAILS))
1202 {
1203 fprintf (dump_file, "Moving PHI node\n");
1204 print_gimple_stmt (dump_file, stmt, 0, 0);
1205 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1206 cost, level->num);
1207 }
1208
1209 if (gimple_phi_num_args (stmt) == 1)
1210 {
1211 tree arg = PHI_ARG_DEF (stmt, 0);
1212 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1213 TREE_CODE (arg), arg);
1214 }
1215 else
1216 {
1217 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1218 gimple cond = gsi_stmt (gsi_last_bb (dom));
1219 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1220 /* Get the PHI arguments corresponding to the true and false
1221 edges of COND. */
1222 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1223 gcc_assert (arg0 && arg1);
1224 t = build2 (gimple_cond_code (cond), boolean_type_node,
1225 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1226 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1227 COND_EXPR, t, arg0, arg1);
1228 todo_ |= TODO_cleanup_cfg;
1229 }
1230 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1231 && (!ALWAYS_EXECUTED_IN (bb)
1232 || (ALWAYS_EXECUTED_IN (bb) != level
1233 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1234 {
1235 tree lhs = gimple_assign_lhs (new_stmt);
1236 SSA_NAME_RANGE_INFO (lhs) = NULL;
1237 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1238 }
1239 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1240 remove_phi_node (&bsi, false);
1241 }
1242
1243 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1244 {
1245 edge e;
1246
1247 gimple stmt = gsi_stmt (bsi);
1248
1249 lim_data = get_lim_data (stmt);
1250 if (lim_data == NULL)
1251 {
1252 gsi_next (&bsi);
1253 continue;
1254 }
1255
1256 cost = lim_data->cost;
1257 level = lim_data->tgt_loop;
1258 clear_lim_data (stmt);
1259
1260 if (!level)
1261 {
1262 gsi_next (&bsi);
1263 continue;
1264 }
1265
1266 /* We do not really want to move conditionals out of the loop; we just
1267 placed it here to force its operands to be moved if necessary. */
1268 if (gimple_code (stmt) == GIMPLE_COND)
1269 continue;
1270
1271 if (dump_file && (dump_flags & TDF_DETAILS))
1272 {
1273 fprintf (dump_file, "Moving statement\n");
1274 print_gimple_stmt (dump_file, stmt, 0, 0);
1275 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1276 cost, level->num);
1277 }
1278
1279 e = loop_preheader_edge (level);
1280 gcc_assert (!gimple_vdef (stmt));
1281 if (gimple_vuse (stmt))
1282 {
1283 /* The new VUSE is the one from the virtual PHI in the loop
1284 header or the one already present. */
1285 gphi_iterator gsi2;
1286 for (gsi2 = gsi_start_phis (e->dest);
1287 !gsi_end_p (gsi2); gsi_next (&gsi2))
1288 {
1289 gphi *phi = gsi2.phi ();
1290 if (virtual_operand_p (gimple_phi_result (phi)))
1291 {
1292 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1293 break;
1294 }
1295 }
1296 }
1297 gsi_remove (&bsi, false);
1298 if (gimple_has_lhs (stmt)
1299 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1300 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1301 && (!ALWAYS_EXECUTED_IN (bb)
1302 || !(ALWAYS_EXECUTED_IN (bb) == level
1303 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1304 {
1305 tree lhs = gimple_get_lhs (stmt);
1306 SSA_NAME_RANGE_INFO (lhs) = NULL;
1307 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1308 }
1309 /* In case this is a stmt that is not unconditionally executed
1310 when the target loop header is executed and the stmt may
1311 invoke undefined integer or pointer overflow rewrite it to
1312 unsigned arithmetic. */
1313 if (is_gimple_assign (stmt)
1314 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1315 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1316 && arith_code_with_undefined_signed_overflow
1317 (gimple_assign_rhs_code (stmt))
1318 && (!ALWAYS_EXECUTED_IN (bb)
1319 || !(ALWAYS_EXECUTED_IN (bb) == level
1320 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1321 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1322 else
1323 gsi_insert_on_edge (e, stmt);
1324 }
1325 }
1326
1327 /* Hoist the statements out of the loops prescribed by data stored in
1328 LIM_DATA structures associated with each statement.*/
1329
1330 static unsigned int
1331 move_computations (void)
1332 {
1333 move_computations_dom_walker walker (CDI_DOMINATORS);
1334 walker.walk (cfun->cfg->x_entry_block_ptr);
1335
1336 gsi_commit_edge_inserts ();
1337 if (need_ssa_update_p (cfun))
1338 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1339
1340 return walker.todo_;
1341 }
1342
1343 /* Checks whether the statement defining variable *INDEX can be hoisted
1344 out of the loop passed in DATA. Callback for for_each_index. */
1345
1346 static bool
1347 may_move_till (tree ref, tree *index, void *data)
1348 {
1349 struct loop *loop = (struct loop *) data, *max_loop;
1350
1351 /* If REF is an array reference, check also that the step and the lower
1352 bound is invariant in LOOP. */
1353 if (TREE_CODE (ref) == ARRAY_REF)
1354 {
1355 tree step = TREE_OPERAND (ref, 3);
1356 tree lbound = TREE_OPERAND (ref, 2);
1357
1358 max_loop = outermost_invariant_loop (step, loop);
1359 if (!max_loop)
1360 return false;
1361
1362 max_loop = outermost_invariant_loop (lbound, loop);
1363 if (!max_loop)
1364 return false;
1365 }
1366
1367 max_loop = outermost_invariant_loop (*index, loop);
1368 if (!max_loop)
1369 return false;
1370
1371 return true;
1372 }
1373
1374 /* If OP is SSA NAME, force the statement that defines it to be
1375 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1376
1377 static void
1378 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1379 {
1380 gimple stmt;
1381
1382 if (!op
1383 || is_gimple_min_invariant (op))
1384 return;
1385
1386 gcc_assert (TREE_CODE (op) == SSA_NAME);
1387
1388 stmt = SSA_NAME_DEF_STMT (op);
1389 if (gimple_nop_p (stmt))
1390 return;
1391
1392 set_level (stmt, orig_loop, loop);
1393 }
1394
1395 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1396 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1397 for_each_index. */
1398
1399 struct fmt_data
1400 {
1401 struct loop *loop;
1402 struct loop *orig_loop;
1403 };
1404
1405 static bool
1406 force_move_till (tree ref, tree *index, void *data)
1407 {
1408 struct fmt_data *fmt_data = (struct fmt_data *) data;
1409
1410 if (TREE_CODE (ref) == ARRAY_REF)
1411 {
1412 tree step = TREE_OPERAND (ref, 3);
1413 tree lbound = TREE_OPERAND (ref, 2);
1414
1415 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1416 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1417 }
1418
1419 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1420
1421 return true;
1422 }
1423
1424 /* A function to free the mem_ref object OBJ. */
1425
1426 static void
1427 memref_free (struct im_mem_ref *mem)
1428 {
1429 mem->accesses_in_loop.release ();
1430 }
1431
1432 /* Allocates and returns a memory reference description for MEM whose hash
1433 value is HASH and id is ID. */
1434
1435 static mem_ref_p
1436 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1437 {
1438 mem_ref_p ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1439 ao_ref_init (&ref->mem, mem);
1440 ref->id = id;
1441 ref->hash = hash;
1442 ref->stored = NULL;
1443 bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1444 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1445 ref->accesses_in_loop.create (1);
1446
1447 return ref;
1448 }
1449
1450 /* Records memory reference location *LOC in LOOP to the memory reference
1451 description REF. The reference occurs in statement STMT. */
1452
1453 static void
1454 record_mem_ref_loc (mem_ref_p ref, gimple stmt, tree *loc)
1455 {
1456 mem_ref_loc aref;
1457 aref.stmt = stmt;
1458 aref.ref = loc;
1459 ref->accesses_in_loop.safe_push (aref);
1460 }
1461
1462 /* Set the LOOP bit in REF stored bitmap and allocate that if
1463 necessary. Return whether a bit was changed. */
1464
1465 static bool
1466 set_ref_stored_in_loop (mem_ref_p ref, struct loop *loop)
1467 {
1468 if (!ref->stored)
1469 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1470 return bitmap_set_bit (ref->stored, loop->num);
1471 }
1472
1473 /* Marks reference REF as stored in LOOP. */
1474
1475 static void
1476 mark_ref_stored (mem_ref_p ref, struct loop *loop)
1477 {
1478 while (loop != current_loops->tree_root
1479 && set_ref_stored_in_loop (ref, loop))
1480 loop = loop_outer (loop);
1481 }
1482
1483 /* Gathers memory references in statement STMT in LOOP, storing the
1484 information about them in the memory_accesses structure. Marks
1485 the vops accessed through unrecognized statements there as
1486 well. */
1487
1488 static void
1489 gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1490 {
1491 tree *mem = NULL;
1492 hashval_t hash;
1493 im_mem_ref **slot;
1494 mem_ref_p ref;
1495 bool is_stored;
1496 unsigned id;
1497
1498 if (!gimple_vuse (stmt))
1499 return;
1500
1501 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1502 if (!mem)
1503 {
1504 /* We use the shared mem_ref for all unanalyzable refs. */
1505 id = UNANALYZABLE_MEM_ID;
1506 ref = memory_accesses.refs_list[id];
1507 if (dump_file && (dump_flags & TDF_DETAILS))
1508 {
1509 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1510 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1511 }
1512 is_stored = gimple_vdef (stmt);
1513 }
1514 else
1515 {
1516 hash = iterative_hash_expr (*mem, 0);
1517 slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
1518 if (*slot)
1519 {
1520 ref = (mem_ref_p) *slot;
1521 id = ref->id;
1522 }
1523 else
1524 {
1525 id = memory_accesses.refs_list.length ();
1526 ref = mem_ref_alloc (*mem, hash, id);
1527 memory_accesses.refs_list.safe_push (ref);
1528 *slot = ref;
1529
1530 if (dump_file && (dump_flags & TDF_DETAILS))
1531 {
1532 fprintf (dump_file, "Memory reference %u: ", id);
1533 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1534 fprintf (dump_file, "\n");
1535 }
1536 }
1537
1538 record_mem_ref_loc (ref, stmt, mem);
1539 }
1540 bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1541 if (is_stored)
1542 {
1543 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1544 mark_ref_stored (ref, loop);
1545 }
1546 return;
1547 }
1548
1549 static unsigned *bb_loop_postorder;
1550
1551 /* qsort sort function to sort blocks after their loop fathers postorder. */
1552
1553 static int
1554 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1555 {
1556 basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1557 basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1558 struct loop *loop1 = bb1->loop_father;
1559 struct loop *loop2 = bb2->loop_father;
1560 if (loop1->num == loop2->num)
1561 return 0;
1562 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1563 }
1564
1565 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1566
1567 static int
1568 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1569 {
1570 mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1571 mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1572 struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1573 struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1574 if (loop1->num == loop2->num)
1575 return 0;
1576 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1577 }
1578
1579 /* Gathers memory references in loops. */
1580
1581 static void
1582 analyze_memory_references (void)
1583 {
1584 gimple_stmt_iterator bsi;
1585 basic_block bb, *bbs;
1586 struct loop *loop, *outer;
1587 unsigned i, n;
1588
1589 /* Collect all basic-blocks in loops and sort them after their
1590 loops postorder. */
1591 i = 0;
1592 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1593 FOR_EACH_BB_FN (bb, cfun)
1594 if (bb->loop_father != current_loops->tree_root)
1595 bbs[i++] = bb;
1596 n = i;
1597 qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1598
1599 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1600 That results in better locality for all the bitmaps. */
1601 for (i = 0; i < n; ++i)
1602 {
1603 basic_block bb = bbs[i];
1604 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1605 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1606 }
1607
1608 /* Sort the location list of gathered memory references after their
1609 loop postorder number. */
1610 im_mem_ref *ref;
1611 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1612 ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1613
1614 free (bbs);
1615 // free (bb_loop_postorder);
1616
1617 /* Propagate the information about accessed memory references up
1618 the loop hierarchy. */
1619 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1620 {
1621 /* Finalize the overall touched references (including subloops). */
1622 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1623 &memory_accesses.refs_stored_in_loop[loop->num]);
1624
1625 /* Propagate the information about accessed memory references up
1626 the loop hierarchy. */
1627 outer = loop_outer (loop);
1628 if (outer == current_loops->tree_root)
1629 continue;
1630
1631 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1632 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1633 }
1634 }
1635
1636 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1637 tree_to_aff_combination_expand. */
1638
1639 static bool
1640 mem_refs_may_alias_p (mem_ref_p mem1, mem_ref_p mem2,
1641 hash_map<tree, name_expansion *> **ttae_cache)
1642 {
1643 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1644 object and their offset differ in such a way that the locations cannot
1645 overlap, then they cannot alias. */
1646 widest_int size1, size2;
1647 aff_tree off1, off2;
1648
1649 /* Perform basic offset and type-based disambiguation. */
1650 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1651 return false;
1652
1653 /* The expansion of addresses may be a bit expensive, thus we only do
1654 the check at -O2 and higher optimization levels. */
1655 if (optimize < 2)
1656 return true;
1657
1658 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1659 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1660 aff_combination_expand (&off1, ttae_cache);
1661 aff_combination_expand (&off2, ttae_cache);
1662 aff_combination_scale (&off1, -1);
1663 aff_combination_add (&off2, &off1);
1664
1665 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1666 return false;
1667
1668 return true;
1669 }
1670
1671 /* Compare function for bsearch searching for reference locations
1672 in a loop. */
1673
1674 static int
1675 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1676 {
1677 struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1678 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1679 struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1680 if (loop->num == loc_loop->num
1681 || flow_loop_nested_p (loop, loc_loop))
1682 return 0;
1683 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1684 ? -1 : 1);
1685 }
1686
1687 /* Iterates over all locations of REF in LOOP and its subloops calling
1688 fn.operator() with the location as argument. When that operator
1689 returns true the iteration is stopped and true is returned.
1690 Otherwise false is returned. */
1691
1692 template <typename FN>
1693 static bool
1694 for_all_locs_in_loop (struct loop *loop, mem_ref_p ref, FN fn)
1695 {
1696 unsigned i;
1697 mem_ref_loc_p loc;
1698
1699 /* Search for the cluster of locs in the accesses_in_loop vector
1700 which is sorted after postorder index of the loop father. */
1701 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1702 if (!loc)
1703 return false;
1704
1705 /* We have found one location inside loop or its sub-loops. Iterate
1706 both forward and backward to cover the whole cluster. */
1707 i = loc - ref->accesses_in_loop.address ();
1708 while (i > 0)
1709 {
1710 --i;
1711 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1712 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1713 break;
1714 if (fn (l))
1715 return true;
1716 }
1717 for (i = loc - ref->accesses_in_loop.address ();
1718 i < ref->accesses_in_loop.length (); ++i)
1719 {
1720 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1721 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1722 break;
1723 if (fn (l))
1724 return true;
1725 }
1726
1727 return false;
1728 }
1729
1730 /* Rewrites location LOC by TMP_VAR. */
1731
1732 struct rewrite_mem_ref_loc
1733 {
1734 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1735 bool operator () (mem_ref_loc_p loc);
1736 tree tmp_var;
1737 };
1738
1739 bool
1740 rewrite_mem_ref_loc::operator () (mem_ref_loc_p loc)
1741 {
1742 *loc->ref = tmp_var;
1743 update_stmt (loc->stmt);
1744 return false;
1745 }
1746
1747 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1748
1749 static void
1750 rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
1751 {
1752 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1753 }
1754
1755 /* Stores the first reference location in LOCP. */
1756
1757 struct first_mem_ref_loc_1
1758 {
1759 first_mem_ref_loc_1 (mem_ref_loc_p *locp_) : locp (locp_) {}
1760 bool operator () (mem_ref_loc_p loc);
1761 mem_ref_loc_p *locp;
1762 };
1763
1764 bool
1765 first_mem_ref_loc_1::operator () (mem_ref_loc_p loc)
1766 {
1767 *locp = loc;
1768 return true;
1769 }
1770
1771 /* Returns the first reference location to REF in LOOP. */
1772
1773 static mem_ref_loc_p
1774 first_mem_ref_loc (struct loop *loop, mem_ref_p ref)
1775 {
1776 mem_ref_loc_p locp = NULL;
1777 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1778 return locp;
1779 }
1780
1781 struct prev_flag_edges {
1782 /* Edge to insert new flag comparison code. */
1783 edge append_cond_position;
1784
1785 /* Edge for fall through from previous flag comparison. */
1786 edge last_cond_fallthru;
1787 };
1788
1789 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1790 MEM along edge EX.
1791
1792 The store is only done if MEM has changed. We do this so no
1793 changes to MEM occur on code paths that did not originally store
1794 into it.
1795
1796 The common case for execute_sm will transform:
1797
1798 for (...) {
1799 if (foo)
1800 stuff;
1801 else
1802 MEM = TMP_VAR;
1803 }
1804
1805 into:
1806
1807 lsm = MEM;
1808 for (...) {
1809 if (foo)
1810 stuff;
1811 else
1812 lsm = TMP_VAR;
1813 }
1814 MEM = lsm;
1815
1816 This function will generate:
1817
1818 lsm = MEM;
1819
1820 lsm_flag = false;
1821 ...
1822 for (...) {
1823 if (foo)
1824 stuff;
1825 else {
1826 lsm = TMP_VAR;
1827 lsm_flag = true;
1828 }
1829 }
1830 if (lsm_flag) <--
1831 MEM = lsm; <--
1832 */
1833
1834 static void
1835 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
1836 {
1837 basic_block new_bb, then_bb, old_dest;
1838 bool loop_has_only_one_exit;
1839 edge then_old_edge, orig_ex = ex;
1840 gimple_stmt_iterator gsi;
1841 gimple stmt;
1842 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1843 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1844
1845 /* ?? Insert store after previous store if applicable. See note
1846 below. */
1847 if (prev_edges)
1848 ex = prev_edges->append_cond_position;
1849
1850 loop_has_only_one_exit = single_pred_p (ex->dest);
1851
1852 if (loop_has_only_one_exit)
1853 ex = split_block_after_labels (ex->dest);
1854
1855 old_dest = ex->dest;
1856 new_bb = split_edge (ex);
1857 then_bb = create_empty_bb (new_bb);
1858 if (irr)
1859 then_bb->flags = BB_IRREDUCIBLE_LOOP;
1860 add_bb_to_loop (then_bb, new_bb->loop_father);
1861
1862 gsi = gsi_start_bb (new_bb);
1863 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1864 NULL_TREE, NULL_TREE);
1865 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1866
1867 gsi = gsi_start_bb (then_bb);
1868 /* Insert actual store. */
1869 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1870 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1871
1872 make_edge (new_bb, then_bb,
1873 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1874 make_edge (new_bb, old_dest,
1875 EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1876 then_old_edge = make_edge (then_bb, old_dest,
1877 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1878
1879 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1880
1881 if (prev_edges)
1882 {
1883 basic_block prevbb = prev_edges->last_cond_fallthru->src;
1884 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1885 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1886 set_immediate_dominator (CDI_DOMINATORS, old_dest,
1887 recompute_dominator (CDI_DOMINATORS, old_dest));
1888 }
1889
1890 /* ?? Because stores may alias, they must happen in the exact
1891 sequence they originally happened. Save the position right after
1892 the (_lsm) store we just created so we can continue appending after
1893 it and maintain the original order. */
1894 {
1895 struct prev_flag_edges *p;
1896
1897 if (orig_ex->aux)
1898 orig_ex->aux = NULL;
1899 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1900 p = (struct prev_flag_edges *) orig_ex->aux;
1901 p->append_cond_position = then_old_edge;
1902 p->last_cond_fallthru = find_edge (new_bb, old_dest);
1903 orig_ex->aux = (void *) p;
1904 }
1905
1906 if (!loop_has_only_one_exit)
1907 for (gphi_iterator gpi = gsi_start_phis (old_dest);
1908 !gsi_end_p (gpi); gsi_next (&gpi))
1909 {
1910 gphi *phi = gpi.phi ();
1911 unsigned i;
1912
1913 for (i = 0; i < gimple_phi_num_args (phi); i++)
1914 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1915 {
1916 tree arg = gimple_phi_arg_def (phi, i);
1917 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
1918 update_stmt (phi);
1919 }
1920 }
1921 /* Remove the original fall through edge. This was the
1922 single_succ_edge (new_bb). */
1923 EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
1924 }
1925
1926 /* When REF is set on the location, set flag indicating the store. */
1927
1928 struct sm_set_flag_if_changed
1929 {
1930 sm_set_flag_if_changed (tree flag_) : flag (flag_) {}
1931 bool operator () (mem_ref_loc_p loc);
1932 tree flag;
1933 };
1934
1935 bool
1936 sm_set_flag_if_changed::operator () (mem_ref_loc_p loc)
1937 {
1938 /* Only set the flag for writes. */
1939 if (is_gimple_assign (loc->stmt)
1940 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
1941 {
1942 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
1943 gimple stmt = gimple_build_assign (flag, boolean_true_node);
1944 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1945 }
1946 return false;
1947 }
1948
1949 /* Helper function for execute_sm. On every location where REF is
1950 set, set an appropriate flag indicating the store. */
1951
1952 static tree
1953 execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
1954 {
1955 tree flag;
1956 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
1957 flag = create_tmp_reg (boolean_type_node, str);
1958 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag));
1959 return flag;
1960 }
1961
1962 /* Executes store motion of memory reference REF from LOOP.
1963 Exits from the LOOP are stored in EXITS. The initialization of the
1964 temporary variable is put to the preheader of the loop, and assignments
1965 to the reference from the temporary variable are emitted to exits. */
1966
1967 static void
1968 execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
1969 {
1970 tree tmp_var, store_flag = NULL_TREE;
1971 unsigned i;
1972 gassign *load;
1973 struct fmt_data fmt_data;
1974 edge ex;
1975 struct lim_aux_data *lim_data;
1976 bool multi_threaded_model_p = false;
1977 gimple_stmt_iterator gsi;
1978
1979 if (dump_file && (dump_flags & TDF_DETAILS))
1980 {
1981 fprintf (dump_file, "Executing store motion of ");
1982 print_generic_expr (dump_file, ref->mem.ref, 0);
1983 fprintf (dump_file, " from loop %d\n", loop->num);
1984 }
1985
1986 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
1987 get_lsm_tmp_name (ref->mem.ref, ~0));
1988
1989 fmt_data.loop = loop;
1990 fmt_data.orig_loop = loop;
1991 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
1992
1993 if (bb_in_transaction (loop_preheader_edge (loop)->src)
1994 || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
1995 multi_threaded_model_p = true;
1996
1997 if (multi_threaded_model_p)
1998 store_flag = execute_sm_if_changed_flag_set (loop, ref);
1999
2000 rewrite_mem_refs (loop, ref, tmp_var);
2001
2002 /* Emit the load code on a random exit edge or into the latch if
2003 the loop does not exit, so that we are sure it will be processed
2004 by move_computations after all dependencies. */
2005 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2006
2007 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2008 load altogether, since the store is predicated by a flag. We
2009 could, do the load only if it was originally in the loop. */
2010 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
2011 lim_data = init_lim_data (load);
2012 lim_data->max_loop = loop;
2013 lim_data->tgt_loop = loop;
2014 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2015
2016 if (multi_threaded_model_p)
2017 {
2018 load = gimple_build_assign (store_flag, boolean_false_node);
2019 lim_data = init_lim_data (load);
2020 lim_data->max_loop = loop;
2021 lim_data->tgt_loop = loop;
2022 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2023 }
2024
2025 /* Sink the store to every exit from the loop. */
2026 FOR_EACH_VEC_ELT (exits, i, ex)
2027 if (!multi_threaded_model_p)
2028 {
2029 gassign *store;
2030 store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2031 gsi_insert_on_edge (ex, store);
2032 }
2033 else
2034 execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag);
2035 }
2036
2037 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2038 edges of the LOOP. */
2039
2040 static void
2041 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2042 vec<edge> exits)
2043 {
2044 mem_ref_p ref;
2045 unsigned i;
2046 bitmap_iterator bi;
2047
2048 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2049 {
2050 ref = memory_accesses.refs_list[i];
2051 execute_sm (loop, exits, ref);
2052 }
2053 }
2054
2055 struct ref_always_accessed
2056 {
2057 ref_always_accessed (struct loop *loop_, bool stored_p_)
2058 : loop (loop_), stored_p (stored_p_) {}
2059 bool operator () (mem_ref_loc_p loc);
2060 struct loop *loop;
2061 bool stored_p;
2062 };
2063
2064 bool
2065 ref_always_accessed::operator () (mem_ref_loc_p loc)
2066 {
2067 struct loop *must_exec;
2068
2069 if (!get_lim_data (loc->stmt))
2070 return false;
2071
2072 /* If we require an always executed store make sure the statement
2073 stores to the reference. */
2074 if (stored_p)
2075 {
2076 tree lhs = gimple_get_lhs (loc->stmt);
2077 if (!lhs
2078 || lhs != *loc->ref)
2079 return false;
2080 }
2081
2082 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2083 if (!must_exec)
2084 return false;
2085
2086 if (must_exec == loop
2087 || flow_loop_nested_p (must_exec, loop))
2088 return true;
2089
2090 return false;
2091 }
2092
2093 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2094 make sure REF is always stored to in LOOP. */
2095
2096 static bool
2097 ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2098 {
2099 return for_all_locs_in_loop (loop, ref,
2100 ref_always_accessed (loop, stored_p));
2101 }
2102
2103 /* Returns true if REF1 and REF2 are independent. */
2104
2105 static bool
2106 refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2107 {
2108 if (ref1 == ref2)
2109 return true;
2110
2111 if (dump_file && (dump_flags & TDF_DETAILS))
2112 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2113 ref1->id, ref2->id);
2114
2115 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2116 {
2117 if (dump_file && (dump_flags & TDF_DETAILS))
2118 fprintf (dump_file, "dependent.\n");
2119 return false;
2120 }
2121 else
2122 {
2123 if (dump_file && (dump_flags & TDF_DETAILS))
2124 fprintf (dump_file, "independent.\n");
2125 return true;
2126 }
2127 }
2128
2129 /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2130 and its super-loops. */
2131
2132 static void
2133 record_dep_loop (struct loop *loop, mem_ref_p ref, bool stored_p)
2134 {
2135 /* We can propagate dependent-in-loop bits up the loop
2136 hierarchy to all outer loops. */
2137 while (loop != current_loops->tree_root
2138 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2139 loop = loop_outer (loop);
2140 }
2141
2142 /* Returns true if REF is independent on all other memory references in
2143 LOOP. */
2144
2145 static bool
2146 ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref, bool stored_p)
2147 {
2148 bitmap refs_to_check;
2149 unsigned i;
2150 bitmap_iterator bi;
2151 mem_ref_p aref;
2152
2153 if (stored_p)
2154 refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2155 else
2156 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2157
2158 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2159 return false;
2160
2161 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2162 {
2163 aref = memory_accesses.refs_list[i];
2164 if (!refs_independent_p (ref, aref))
2165 return false;
2166 }
2167
2168 return true;
2169 }
2170
2171 /* Returns true if REF is independent on all other memory references in
2172 LOOP. Wrapper over ref_indep_loop_p_1, caching its results. */
2173
2174 static bool
2175 ref_indep_loop_p_2 (struct loop *loop, mem_ref_p ref, bool stored_p)
2176 {
2177 stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2178
2179 if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2180 return true;
2181 if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2182 return false;
2183
2184 struct loop *inner = loop->inner;
2185 while (inner)
2186 {
2187 if (!ref_indep_loop_p_2 (inner, ref, stored_p))
2188 return false;
2189 inner = inner->next;
2190 }
2191
2192 bool indep_p = ref_indep_loop_p_1 (loop, ref, stored_p);
2193
2194 if (dump_file && (dump_flags & TDF_DETAILS))
2195 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2196 ref->id, loop->num, indep_p ? "independent" : "dependent");
2197
2198 /* Record the computed result in the cache. */
2199 if (indep_p)
2200 {
2201 if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2202 && stored_p)
2203 {
2204 /* If it's independend against all refs then it's independent
2205 against stores, too. */
2206 bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2207 }
2208 }
2209 else
2210 {
2211 record_dep_loop (loop, ref, stored_p);
2212 if (!stored_p)
2213 {
2214 /* If it's dependent against stores it's dependent against
2215 all refs, too. */
2216 record_dep_loop (loop, ref, true);
2217 }
2218 }
2219
2220 return indep_p;
2221 }
2222
2223 /* Returns true if REF is independent on all other memory references in
2224 LOOP. */
2225
2226 static bool
2227 ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2228 {
2229 gcc_checking_assert (MEM_ANALYZABLE (ref));
2230
2231 return ref_indep_loop_p_2 (loop, ref, false);
2232 }
2233
2234 /* Returns true if we can perform store motion of REF from LOOP. */
2235
2236 static bool
2237 can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2238 {
2239 tree base;
2240
2241 /* Can't hoist unanalyzable refs. */
2242 if (!MEM_ANALYZABLE (ref))
2243 return false;
2244
2245 /* It should be movable. */
2246 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2247 || TREE_THIS_VOLATILE (ref->mem.ref)
2248 || !for_each_index (&ref->mem.ref, may_move_till, loop))
2249 return false;
2250
2251 /* If it can throw fail, we do not properly update EH info. */
2252 if (tree_could_throw_p (ref->mem.ref))
2253 return false;
2254
2255 /* If it can trap, it must be always executed in LOOP.
2256 Readonly memory locations may trap when storing to them, but
2257 tree_could_trap_p is a predicate for rvalues, so check that
2258 explicitly. */
2259 base = get_base_address (ref->mem.ref);
2260 if ((tree_could_trap_p (ref->mem.ref)
2261 || (DECL_P (base) && TREE_READONLY (base)))
2262 && !ref_always_accessed_p (loop, ref, true))
2263 return false;
2264
2265 /* And it must be independent on all other memory references
2266 in LOOP. */
2267 if (!ref_indep_loop_p (loop, ref))
2268 return false;
2269
2270 return true;
2271 }
2272
2273 /* Marks the references in LOOP for that store motion should be performed
2274 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2275 motion was performed in one of the outer loops. */
2276
2277 static void
2278 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2279 {
2280 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2281 unsigned i;
2282 bitmap_iterator bi;
2283 mem_ref_p ref;
2284
2285 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2286 {
2287 ref = memory_accesses.refs_list[i];
2288 if (can_sm_ref_p (loop, ref))
2289 bitmap_set_bit (refs_to_sm, i);
2290 }
2291 }
2292
2293 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2294 for a store motion optimization (i.e. whether we can insert statement
2295 on its exits). */
2296
2297 static bool
2298 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2299 vec<edge> exits)
2300 {
2301 unsigned i;
2302 edge ex;
2303
2304 FOR_EACH_VEC_ELT (exits, i, ex)
2305 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2306 return false;
2307
2308 return true;
2309 }
2310
2311 /* Try to perform store motion for all memory references modified inside
2312 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2313 store motion was executed in one of the outer loops. */
2314
2315 static void
2316 store_motion_loop (struct loop *loop, bitmap sm_executed)
2317 {
2318 vec<edge> exits = get_loop_exit_edges (loop);
2319 struct loop *subloop;
2320 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2321
2322 if (loop_suitable_for_sm (loop, exits))
2323 {
2324 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2325 hoist_memory_references (loop, sm_in_loop, exits);
2326 }
2327 exits.release ();
2328
2329 bitmap_ior_into (sm_executed, sm_in_loop);
2330 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2331 store_motion_loop (subloop, sm_executed);
2332 bitmap_and_compl_into (sm_executed, sm_in_loop);
2333 BITMAP_FREE (sm_in_loop);
2334 }
2335
2336 /* Try to perform store motion for all memory references modified inside
2337 loops. */
2338
2339 static void
2340 store_motion (void)
2341 {
2342 struct loop *loop;
2343 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2344
2345 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2346 store_motion_loop (loop, sm_executed);
2347
2348 BITMAP_FREE (sm_executed);
2349 gsi_commit_edge_inserts ();
2350 }
2351
2352 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2353 for each such basic block bb records the outermost loop for that execution
2354 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2355 blocks that contain a nonpure call. */
2356
2357 static void
2358 fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2359 {
2360 basic_block bb = NULL, *bbs, last = NULL;
2361 unsigned i;
2362 edge e;
2363 struct loop *inn_loop = loop;
2364
2365 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2366 {
2367 bbs = get_loop_body_in_dom_order (loop);
2368
2369 for (i = 0; i < loop->num_nodes; i++)
2370 {
2371 edge_iterator ei;
2372 bb = bbs[i];
2373
2374 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2375 last = bb;
2376
2377 if (bitmap_bit_p (contains_call, bb->index))
2378 break;
2379
2380 FOR_EACH_EDGE (e, ei, bb->succs)
2381 if (!flow_bb_inside_loop_p (loop, e->dest))
2382 break;
2383 if (e)
2384 break;
2385
2386 /* A loop might be infinite (TODO use simple loop analysis
2387 to disprove this if possible). */
2388 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2389 break;
2390
2391 if (!flow_bb_inside_loop_p (inn_loop, bb))
2392 break;
2393
2394 if (bb->loop_father->header == bb)
2395 {
2396 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2397 break;
2398
2399 /* In a loop that is always entered we may proceed anyway.
2400 But record that we entered it and stop once we leave it. */
2401 inn_loop = bb->loop_father;
2402 }
2403 }
2404
2405 while (1)
2406 {
2407 SET_ALWAYS_EXECUTED_IN (last, loop);
2408 if (last == loop->header)
2409 break;
2410 last = get_immediate_dominator (CDI_DOMINATORS, last);
2411 }
2412
2413 free (bbs);
2414 }
2415
2416 for (loop = loop->inner; loop; loop = loop->next)
2417 fill_always_executed_in_1 (loop, contains_call);
2418 }
2419
2420 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2421 for each such basic block bb records the outermost loop for that execution
2422 of its header implies execution of bb. */
2423
2424 static void
2425 fill_always_executed_in (void)
2426 {
2427 sbitmap contains_call = sbitmap_alloc (last_basic_block_for_fn (cfun));
2428 basic_block bb;
2429 struct loop *loop;
2430
2431 bitmap_clear (contains_call);
2432 FOR_EACH_BB_FN (bb, cfun)
2433 {
2434 gimple_stmt_iterator gsi;
2435 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2436 {
2437 if (nonpure_call_p (gsi_stmt (gsi)))
2438 break;
2439 }
2440
2441 if (!gsi_end_p (gsi))
2442 bitmap_set_bit (contains_call, bb->index);
2443 }
2444
2445 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2446 fill_always_executed_in_1 (loop, contains_call);
2447
2448 sbitmap_free (contains_call);
2449 }
2450
2451
2452 /* Compute the global information needed by the loop invariant motion pass. */
2453
2454 static void
2455 tree_ssa_lim_initialize (void)
2456 {
2457 struct loop *loop;
2458 unsigned i;
2459
2460 bitmap_obstack_initialize (&lim_bitmap_obstack);
2461 gcc_obstack_init (&mem_ref_obstack);
2462 lim_aux_data_map = new hash_map<gimple, lim_aux_data *>;
2463
2464 if (flag_tm)
2465 compute_transaction_bits ();
2466
2467 alloc_aux_for_edges (0);
2468
2469 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2470 memory_accesses.refs_list.create (100);
2471 /* Allocate a special, unanalyzable mem-ref with ID zero. */
2472 memory_accesses.refs_list.quick_push
2473 (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
2474
2475 memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2476 memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2477 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2478 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2479 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2480 memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2481
2482 for (i = 0; i < number_of_loops (cfun); i++)
2483 {
2484 bitmap_initialize (&memory_accesses.refs_in_loop[i],
2485 &lim_bitmap_obstack);
2486 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2487 &lim_bitmap_obstack);
2488 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2489 &lim_bitmap_obstack);
2490 }
2491
2492 memory_accesses.ttae_cache = NULL;
2493
2494 /* Initialize bb_loop_postorder with a mapping from loop->num to
2495 its postorder index. */
2496 i = 0;
2497 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2498 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2499 bb_loop_postorder[loop->num] = i++;
2500 }
2501
2502 /* Cleans up after the invariant motion pass. */
2503
2504 static void
2505 tree_ssa_lim_finalize (void)
2506 {
2507 basic_block bb;
2508 unsigned i;
2509 mem_ref_p ref;
2510
2511 free_aux_for_edges ();
2512
2513 FOR_EACH_BB_FN (bb, cfun)
2514 SET_ALWAYS_EXECUTED_IN (bb, NULL);
2515
2516 bitmap_obstack_release (&lim_bitmap_obstack);
2517 delete lim_aux_data_map;
2518
2519 delete memory_accesses.refs;
2520 memory_accesses.refs = NULL;
2521
2522 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2523 memref_free (ref);
2524 memory_accesses.refs_list.release ();
2525 obstack_free (&mem_ref_obstack, NULL);
2526
2527 memory_accesses.refs_in_loop.release ();
2528 memory_accesses.refs_stored_in_loop.release ();
2529 memory_accesses.all_refs_stored_in_loop.release ();
2530
2531 if (memory_accesses.ttae_cache)
2532 free_affine_expand_cache (&memory_accesses.ttae_cache);
2533
2534 free (bb_loop_postorder);
2535 }
2536
2537 /* Moves invariants from loops. Only "expensive" invariants are moved out --
2538 i.e. those that are likely to be win regardless of the register pressure. */
2539
2540 unsigned int
2541 tree_ssa_lim (void)
2542 {
2543 unsigned int todo;
2544
2545 tree_ssa_lim_initialize ();
2546
2547 /* Gathers information about memory accesses in the loops. */
2548 analyze_memory_references ();
2549
2550 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
2551 fill_always_executed_in ();
2552
2553 /* For each statement determine the outermost loop in that it is
2554 invariant and cost for computing the invariant. */
2555 invariantness_dom_walker (CDI_DOMINATORS)
2556 .walk (cfun->cfg->x_entry_block_ptr);
2557
2558 /* Execute store motion. Force the necessary invariants to be moved
2559 out of the loops as well. */
2560 store_motion ();
2561
2562 /* Move the expressions that are expensive enough. */
2563 todo = move_computations ();
2564
2565 tree_ssa_lim_finalize ();
2566
2567 return todo;
2568 }
2569
2570 /* Loop invariant motion pass. */
2571
2572 namespace {
2573
2574 const pass_data pass_data_lim =
2575 {
2576 GIMPLE_PASS, /* type */
2577 "lim", /* name */
2578 OPTGROUP_LOOP, /* optinfo_flags */
2579 TV_LIM, /* tv_id */
2580 PROP_cfg, /* properties_required */
2581 0, /* properties_provided */
2582 0, /* properties_destroyed */
2583 0, /* todo_flags_start */
2584 0, /* todo_flags_finish */
2585 };
2586
2587 class pass_lim : public gimple_opt_pass
2588 {
2589 public:
2590 pass_lim (gcc::context *ctxt)
2591 : gimple_opt_pass (pass_data_lim, ctxt)
2592 {}
2593
2594 /* opt_pass methods: */
2595 opt_pass * clone () { return new pass_lim (m_ctxt); }
2596 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2597 virtual unsigned int execute (function *);
2598
2599 }; // class pass_lim
2600
2601 unsigned int
2602 pass_lim::execute (function *fun)
2603 {
2604 if (number_of_loops (fun) <= 1)
2605 return 0;
2606
2607 return tree_ssa_lim ();
2608 }
2609
2610 } // anon namespace
2611
2612 gimple_opt_pass *
2613 make_pass_lim (gcc::context *ctxt)
2614 {
2615 return new pass_lim (ctxt);
2616 }
2617
2618