1 /* Loop invariant motion.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
34 #include "gimple-iterator.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
41 #include "tree-affine.h"
42 #include "tree-ssa-propagate.h"
43 #include "trans-mem.h"
44 #include "gimple-fold.h"
45 #include "tree-scalar-evolution.h"
46 #include "tree-ssa-loop-niter.h"
52 /* TODO: Support for predicated code motion. I.e.
63 Where COND and INV are invariants, but evaluating INV may trap or be
64 invalid from some other reason if !COND. This may be transformed to
74 /* The auxiliary data kept for each statement. */
78 class loop
*max_loop
; /* The outermost loop in that the statement
81 class loop
*tgt_loop
; /* The loop out of that we want to move the
84 class loop
*always_executed_in
;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
89 unsigned cost
; /* Cost of the computation performed by the
92 unsigned ref
; /* The simple_mem_ref in this stmt or 0. */
94 vec
<gimple
*> depends
; /* Vector of statements that must be also
95 hoisted out of the loop when this statement
96 is hoisted; i.e. those that define the
97 operands of the statement and are inside of
101 /* Maps statements to their lim_aux_data. */
103 static hash_map
<gimple
*, lim_aux_data
*> *lim_aux_data_map
;
105 /* Description of a memory reference location. */
109 tree
*ref
; /* The reference itself. */
110 gimple
*stmt
; /* The statement in that it occurs. */
114 /* Description of a memory reference. */
119 unsigned id
: 30; /* ID assigned to the memory reference
120 (its index in memory_accesses.refs_list) */
121 unsigned ref_canonical
: 1; /* Whether mem.ref was canonicalized. */
122 unsigned ref_decomposed
: 1; /* Whether the ref was hashed from mem. */
123 hashval_t hash
; /* Its hash value. */
125 /* The memory access itself and associated caching of alias-oracle
129 bitmap stored
; /* The set of loops in that this memory location
131 bitmap loaded
; /* The set of loops in that this memory location
133 vec
<mem_ref_loc
> accesses_in_loop
;
134 /* The locations of the accesses. Vector
135 indexed by the loop number. */
137 /* The following set is computed on demand. */
138 bitmap_head dep_loop
; /* The set of loops in that the memory
139 reference is {in,}dependent in
143 /* We use six bits per loop in the ref->dep_loop bitmap to record
144 the dep_kind x dep_state combinations. */
146 enum dep_kind
{ lim_raw
, sm_war
, sm_waw
};
147 enum dep_state
{ dep_unknown
, dep_independent
, dep_dependent
};
149 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
152 record_loop_dependence (class loop
*loop
, im_mem_ref
*ref
,
153 dep_kind kind
, dep_state state
)
155 gcc_assert (state
!= dep_unknown
);
156 unsigned bit
= 6 * loop
->num
+ kind
* 2 + state
== dep_dependent
? 1 : 0;
157 bitmap_set_bit (&ref
->dep_loop
, bit
);
160 /* Query the loop dependence cache of REF for LOOP, KIND. */
163 query_loop_dependence (class loop
*loop
, im_mem_ref
*ref
, dep_kind kind
)
165 unsigned first_bit
= 6 * loop
->num
+ kind
* 2;
166 if (bitmap_bit_p (&ref
->dep_loop
, first_bit
))
167 return dep_independent
;
168 else if (bitmap_bit_p (&ref
->dep_loop
, first_bit
+ 1))
169 return dep_dependent
;
173 /* Mem_ref hashtable helpers. */
175 struct mem_ref_hasher
: nofree_ptr_hash
<im_mem_ref
>
177 typedef ao_ref
*compare_type
;
178 static inline hashval_t
hash (const im_mem_ref
*);
179 static inline bool equal (const im_mem_ref
*, const ao_ref
*);
182 /* A hash function for class im_mem_ref object OBJ. */
185 mem_ref_hasher::hash (const im_mem_ref
*mem
)
190 /* An equality function for class im_mem_ref object MEM1 with
191 memory reference OBJ2. */
194 mem_ref_hasher::equal (const im_mem_ref
*mem1
, const ao_ref
*obj2
)
196 if (obj2
->max_size_known_p ())
197 return (mem1
->ref_decomposed
198 && operand_equal_p (mem1
->mem
.base
, obj2
->base
, 0)
199 && known_eq (mem1
->mem
.offset
, obj2
->offset
)
200 && known_eq (mem1
->mem
.size
, obj2
->size
)
201 && known_eq (mem1
->mem
.max_size
, obj2
->max_size
)
202 && mem1
->mem
.volatile_p
== obj2
->volatile_p
203 && (mem1
->mem
.ref_alias_set
== obj2
->ref_alias_set
204 /* We are not canonicalizing alias-sets but for the
205 special-case we didn't canonicalize yet and the
206 incoming ref is a alias-set zero MEM we pick
207 the correct one already. */
208 || (!mem1
->ref_canonical
209 && (TREE_CODE (obj2
->ref
) == MEM_REF
210 || TREE_CODE (obj2
->ref
) == TARGET_MEM_REF
)
211 && obj2
->ref_alias_set
== 0)
212 /* Likewise if there's a canonical ref with alias-set zero. */
213 || (mem1
->ref_canonical
&& mem1
->mem
.ref_alias_set
== 0))
214 && types_compatible_p (TREE_TYPE (mem1
->mem
.ref
),
215 TREE_TYPE (obj2
->ref
)));
217 return operand_equal_p (mem1
->mem
.ref
, obj2
->ref
, 0);
221 /* Description of memory accesses in loops. */
225 /* The hash table of memory references accessed in loops. */
226 hash_table
<mem_ref_hasher
> *refs
;
228 /* The list of memory references. */
229 vec
<im_mem_ref
*> refs_list
;
231 /* The set of memory references accessed in each loop. */
232 vec
<bitmap_head
> refs_loaded_in_loop
;
234 /* The set of memory references stored in each loop. */
235 vec
<bitmap_head
> refs_stored_in_loop
;
237 /* The set of memory references stored in each loop, including subloops . */
238 vec
<bitmap_head
> all_refs_stored_in_loop
;
240 /* Cache for expanding memory addresses. */
241 hash_map
<tree
, name_expansion
*> *ttae_cache
;
244 /* Obstack for the bitmaps in the above data structures. */
245 static bitmap_obstack lim_bitmap_obstack
;
246 static obstack mem_ref_obstack
;
248 static bool ref_indep_loop_p (class loop
*, im_mem_ref
*, dep_kind
);
249 static bool ref_always_accessed_p (class loop
*, im_mem_ref
*, bool);
250 static bool refs_independent_p (im_mem_ref
*, im_mem_ref
*, bool = true);
252 /* Minimum cost of an expensive expression. */
253 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
255 /* The outermost loop for which execution of the header guarantees that the
256 block will be executed. */
257 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
258 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
260 /* ID of the shared unanalyzable mem. */
261 #define UNANALYZABLE_MEM_ID 0
263 /* Whether the reference was analyzable. */
264 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
266 static struct lim_aux_data
*
267 init_lim_data (gimple
*stmt
)
269 lim_aux_data
*p
= XCNEW (struct lim_aux_data
);
270 lim_aux_data_map
->put (stmt
, p
);
275 static struct lim_aux_data
*
276 get_lim_data (gimple
*stmt
)
278 lim_aux_data
**p
= lim_aux_data_map
->get (stmt
);
285 /* Releases the memory occupied by DATA. */
288 free_lim_aux_data (struct lim_aux_data
*data
)
290 data
->depends
.release ();
295 clear_lim_data (gimple
*stmt
)
297 lim_aux_data
**p
= lim_aux_data_map
->get (stmt
);
301 free_lim_aux_data (*p
);
306 /* The possibilities of statement movement. */
309 MOVE_IMPOSSIBLE
, /* No movement -- side effect expression. */
310 MOVE_PRESERVE_EXECUTION
, /* Must not cause the non-executed statement
311 become executed -- memory accesses, ... */
312 MOVE_POSSIBLE
/* Unlimited movement. */
316 /* If it is possible to hoist the statement STMT unconditionally,
317 returns MOVE_POSSIBLE.
318 If it is possible to hoist the statement STMT, but we must avoid making
319 it executed if it would not be executed in the original program (e.g.
320 because it may trap), return MOVE_PRESERVE_EXECUTION.
321 Otherwise return MOVE_IMPOSSIBLE. */
324 movement_possibility (gimple
*stmt
)
327 enum move_pos ret
= MOVE_POSSIBLE
;
329 if (flag_unswitch_loops
330 && gimple_code (stmt
) == GIMPLE_COND
)
332 /* If we perform unswitching, force the operands of the invariant
333 condition to be moved out of the loop. */
334 return MOVE_POSSIBLE
;
337 if (gimple_code (stmt
) == GIMPLE_PHI
338 && gimple_phi_num_args (stmt
) <= 2
339 && !virtual_operand_p (gimple_phi_result (stmt
))
340 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt
)))
341 return MOVE_POSSIBLE
;
343 if (gimple_get_lhs (stmt
) == NULL_TREE
)
344 return MOVE_IMPOSSIBLE
;
346 if (gimple_vdef (stmt
))
347 return MOVE_IMPOSSIBLE
;
349 if (stmt_ends_bb_p (stmt
)
350 || gimple_has_volatile_ops (stmt
)
351 || gimple_has_side_effects (stmt
)
352 || stmt_could_throw_p (cfun
, stmt
))
353 return MOVE_IMPOSSIBLE
;
355 if (is_gimple_call (stmt
))
357 /* While pure or const call is guaranteed to have no side effects, we
358 cannot move it arbitrarily. Consider code like
360 char *s = something ();
370 Here the strlen call cannot be moved out of the loop, even though
371 s is invariant. In addition to possibly creating a call with
372 invalid arguments, moving out a function call that is not executed
373 may cause performance regressions in case the call is costly and
374 not executed at all. */
375 ret
= MOVE_PRESERVE_EXECUTION
;
376 lhs
= gimple_call_lhs (stmt
);
378 else if (is_gimple_assign (stmt
))
379 lhs
= gimple_assign_lhs (stmt
);
381 return MOVE_IMPOSSIBLE
;
383 if (TREE_CODE (lhs
) == SSA_NAME
384 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
385 return MOVE_IMPOSSIBLE
;
387 if (TREE_CODE (lhs
) != SSA_NAME
388 || gimple_could_trap_p (stmt
))
389 return MOVE_PRESERVE_EXECUTION
;
391 /* Non local loads in a transaction cannot be hoisted out. Well,
392 unless the load happens on every path out of the loop, but we
393 don't take this into account yet. */
395 && gimple_in_transaction (stmt
)
396 && gimple_assign_single_p (stmt
))
398 tree rhs
= gimple_assign_rhs1 (stmt
);
399 if (DECL_P (rhs
) && is_global_var (rhs
))
403 fprintf (dump_file
, "Cannot hoist conditional load of ");
404 print_generic_expr (dump_file
, rhs
, TDF_SLIM
);
405 fprintf (dump_file
, " because it is in a transaction.\n");
407 return MOVE_IMPOSSIBLE
;
414 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
415 loop to that we could move the expression using DEF if it did not have
416 other operands, i.e. the outermost loop enclosing LOOP in that the value
417 of DEF is invariant. */
420 outermost_invariant_loop (tree def
, class loop
*loop
)
424 class loop
*max_loop
;
425 struct lim_aux_data
*lim_data
;
428 return superloop_at_depth (loop
, 1);
430 if (TREE_CODE (def
) != SSA_NAME
)
432 gcc_assert (is_gimple_min_invariant (def
));
433 return superloop_at_depth (loop
, 1);
436 def_stmt
= SSA_NAME_DEF_STMT (def
);
437 def_bb
= gimple_bb (def_stmt
);
439 return superloop_at_depth (loop
, 1);
441 max_loop
= find_common_loop (loop
, def_bb
->loop_father
);
443 lim_data
= get_lim_data (def_stmt
);
444 if (lim_data
!= NULL
&& lim_data
->max_loop
!= NULL
)
445 max_loop
= find_common_loop (max_loop
,
446 loop_outer (lim_data
->max_loop
));
447 if (max_loop
== loop
)
449 max_loop
= superloop_at_depth (loop
, loop_depth (max_loop
) + 1);
454 /* DATA is a structure containing information associated with a statement
455 inside LOOP. DEF is one of the operands of this statement.
457 Find the outermost loop enclosing LOOP in that value of DEF is invariant
458 and record this in DATA->max_loop field. If DEF itself is defined inside
459 this loop as well (i.e. we need to hoist it out of the loop if we want
460 to hoist the statement represented by DATA), record the statement in that
461 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
462 add the cost of the computation of DEF to the DATA->cost.
464 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
467 add_dependency (tree def
, struct lim_aux_data
*data
, class loop
*loop
,
470 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
471 basic_block def_bb
= gimple_bb (def_stmt
);
472 class loop
*max_loop
;
473 struct lim_aux_data
*def_data
;
478 max_loop
= outermost_invariant_loop (def
, loop
);
482 if (flow_loop_nested_p (data
->max_loop
, max_loop
))
483 data
->max_loop
= max_loop
;
485 def_data
= get_lim_data (def_stmt
);
490 /* Only add the cost if the statement defining DEF is inside LOOP,
491 i.e. if it is likely that by moving the invariants dependent
492 on it, we will be able to avoid creating a new register for
493 it (since it will be only used in these dependent invariants). */
494 && def_bb
->loop_father
== loop
)
495 data
->cost
+= def_data
->cost
;
497 data
->depends
.safe_push (def_stmt
);
502 /* Returns an estimate for a cost of statement STMT. The values here
503 are just ad-hoc constants, similar to costs for inlining. */
506 stmt_cost (gimple
*stmt
)
508 /* Always try to create possibilities for unswitching. */
509 if (gimple_code (stmt
) == GIMPLE_COND
510 || gimple_code (stmt
) == GIMPLE_PHI
)
511 return LIM_EXPENSIVE
;
513 /* We should be hoisting calls if possible. */
514 if (is_gimple_call (stmt
))
518 /* Unless the call is a builtin_constant_p; this always folds to a
519 constant, so moving it is useless. */
520 fndecl
= gimple_call_fndecl (stmt
);
521 if (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_CONSTANT_P
))
524 return LIM_EXPENSIVE
;
527 /* Hoisting memory references out should almost surely be a win. */
528 if (gimple_references_memory_p (stmt
))
529 return LIM_EXPENSIVE
;
531 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
534 switch (gimple_assign_rhs_code (stmt
))
537 case WIDEN_MULT_EXPR
:
538 case WIDEN_MULT_PLUS_EXPR
:
539 case WIDEN_MULT_MINUS_EXPR
:
551 /* Division and multiplication are usually expensive. */
552 return LIM_EXPENSIVE
;
556 case WIDEN_LSHIFT_EXPR
:
559 /* Shifts and rotates are usually expensive. */
560 return LIM_EXPENSIVE
;
563 /* Make vector construction cost proportional to the number
565 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt
));
569 /* Whether or not something is wrapped inside a PAREN_EXPR
570 should not change move cost. Nor should an intermediate
571 unpropagated SSA name copy. */
579 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
580 REF is independent. If REF is not independent in LOOP, NULL is returned
584 outermost_indep_loop (class loop
*outer
, class loop
*loop
, im_mem_ref
*ref
)
588 if (ref
->stored
&& bitmap_bit_p (ref
->stored
, loop
->num
))
593 aloop
= superloop_at_depth (loop
, loop_depth (aloop
) + 1))
594 if ((!ref
->stored
|| !bitmap_bit_p (ref
->stored
, aloop
->num
))
595 && ref_indep_loop_p (aloop
, ref
, lim_raw
))
598 if (ref_indep_loop_p (loop
, ref
, lim_raw
))
604 /* If there is a simple load or store to a memory reference in STMT, returns
605 the location of the memory reference, and sets IS_STORE according to whether
606 it is a store or load. Otherwise, returns NULL. */
609 simple_mem_ref_in_stmt (gimple
*stmt
, bool *is_store
)
613 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
614 if (!gimple_assign_single_p (stmt
))
617 lhs
= gimple_assign_lhs_ptr (stmt
);
618 rhs
= gimple_assign_rhs1_ptr (stmt
);
620 if (TREE_CODE (*lhs
) == SSA_NAME
&& gimple_vuse (stmt
))
625 else if (gimple_vdef (stmt
)
626 && (TREE_CODE (*rhs
) == SSA_NAME
|| is_gimple_min_invariant (*rhs
)))
635 /* From a controlling predicate in DOM determine the arguments from
636 the PHI node PHI that are chosen if the predicate evaluates to
637 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
638 they are non-NULL. Returns true if the arguments can be determined,
639 else return false. */
642 extract_true_false_args_from_phi (basic_block dom
, gphi
*phi
,
643 tree
*true_arg_p
, tree
*false_arg_p
)
646 if (! extract_true_false_controlled_edges (dom
, gimple_bb (phi
),
651 *true_arg_p
= PHI_ARG_DEF (phi
, te
->dest_idx
);
653 *false_arg_p
= PHI_ARG_DEF (phi
, fe
->dest_idx
);
658 /* Determine the outermost loop to that it is possible to hoist a statement
659 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
660 the outermost loop in that the value computed by STMT is invariant.
661 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
662 we preserve the fact whether STMT is executed. It also fills other related
663 information to LIM_DATA (STMT).
665 The function returns false if STMT cannot be hoisted outside of the loop it
666 is defined in, and true otherwise. */
669 determine_max_movement (gimple
*stmt
, bool must_preserve_exec
)
671 basic_block bb
= gimple_bb (stmt
);
672 class loop
*loop
= bb
->loop_father
;
674 struct lim_aux_data
*lim_data
= get_lim_data (stmt
);
678 if (must_preserve_exec
)
679 level
= ALWAYS_EXECUTED_IN (bb
);
681 level
= superloop_at_depth (loop
, 1);
682 lim_data
->max_loop
= level
;
684 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
687 unsigned min_cost
= UINT_MAX
;
688 unsigned total_cost
= 0;
689 struct lim_aux_data
*def_data
;
691 /* We will end up promoting dependencies to be unconditionally
692 evaluated. For this reason the PHI cost (and thus the
693 cost we remove from the loop by doing the invariant motion)
694 is that of the cheapest PHI argument dependency chain. */
695 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
697 val
= USE_FROM_PTR (use_p
);
699 if (TREE_CODE (val
) != SSA_NAME
)
701 /* Assign const 1 to constants. */
702 min_cost
= MIN (min_cost
, 1);
706 if (!add_dependency (val
, lim_data
, loop
, false))
709 gimple
*def_stmt
= SSA_NAME_DEF_STMT (val
);
710 if (gimple_bb (def_stmt
)
711 && gimple_bb (def_stmt
)->loop_father
== loop
)
713 def_data
= get_lim_data (def_stmt
);
716 min_cost
= MIN (min_cost
, def_data
->cost
);
717 total_cost
+= def_data
->cost
;
722 min_cost
= MIN (min_cost
, total_cost
);
723 lim_data
->cost
+= min_cost
;
725 if (gimple_phi_num_args (phi
) > 1)
727 basic_block dom
= get_immediate_dominator (CDI_DOMINATORS
, bb
);
729 if (gsi_end_p (gsi_last_bb (dom
)))
731 cond
= gsi_stmt (gsi_last_bb (dom
));
732 if (gimple_code (cond
) != GIMPLE_COND
)
734 /* Verify that this is an extended form of a diamond and
735 the PHI arguments are completely controlled by the
737 if (!extract_true_false_args_from_phi (dom
, phi
, NULL
, NULL
))
740 /* Fold in dependencies and cost of the condition. */
741 FOR_EACH_SSA_TREE_OPERAND (val
, cond
, iter
, SSA_OP_USE
)
743 if (!add_dependency (val
, lim_data
, loop
, false))
745 def_data
= get_lim_data (SSA_NAME_DEF_STMT (val
));
747 lim_data
->cost
+= def_data
->cost
;
750 /* We want to avoid unconditionally executing very expensive
751 operations. As costs for our dependencies cannot be
752 negative just claim we are not invariand for this case.
753 We also are not sure whether the control-flow inside the
755 if (total_cost
- min_cost
>= 2 * LIM_EXPENSIVE
757 && total_cost
/ min_cost
<= 2))
760 /* Assume that the control-flow in the loop will vanish.
761 ??? We should verify this and not artificially increase
762 the cost if that is not the case. */
763 lim_data
->cost
+= stmt_cost (stmt
);
769 FOR_EACH_SSA_TREE_OPERAND (val
, stmt
, iter
, SSA_OP_USE
)
770 if (!add_dependency (val
, lim_data
, loop
, true))
773 if (gimple_vuse (stmt
))
776 = lim_data
? memory_accesses
.refs_list
[lim_data
->ref
] : NULL
;
778 && MEM_ANALYZABLE (ref
))
780 lim_data
->max_loop
= outermost_indep_loop (lim_data
->max_loop
,
782 if (!lim_data
->max_loop
)
785 else if (! add_dependency (gimple_vuse (stmt
), lim_data
, loop
, false))
789 lim_data
->cost
+= stmt_cost (stmt
);
794 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
795 and that one of the operands of this statement is computed by STMT.
796 Ensure that STMT (together with all the statements that define its
797 operands) is hoisted at least out of the loop LEVEL. */
800 set_level (gimple
*stmt
, class loop
*orig_loop
, class loop
*level
)
802 class loop
*stmt_loop
= gimple_bb (stmt
)->loop_father
;
803 struct lim_aux_data
*lim_data
;
807 stmt_loop
= find_common_loop (orig_loop
, stmt_loop
);
808 lim_data
= get_lim_data (stmt
);
809 if (lim_data
!= NULL
&& lim_data
->tgt_loop
!= NULL
)
810 stmt_loop
= find_common_loop (stmt_loop
,
811 loop_outer (lim_data
->tgt_loop
));
812 if (flow_loop_nested_p (stmt_loop
, level
))
815 gcc_assert (level
== lim_data
->max_loop
816 || flow_loop_nested_p (lim_data
->max_loop
, level
));
818 lim_data
->tgt_loop
= level
;
819 FOR_EACH_VEC_ELT (lim_data
->depends
, i
, dep_stmt
)
820 set_level (dep_stmt
, orig_loop
, level
);
823 /* Determines an outermost loop from that we want to hoist the statement STMT.
824 For now we chose the outermost possible loop. TODO -- use profiling
825 information to set it more sanely. */
828 set_profitable_level (gimple
*stmt
)
830 set_level (stmt
, gimple_bb (stmt
)->loop_father
, get_lim_data (stmt
)->max_loop
);
833 /* Returns true if STMT is a call that has side effects. */
836 nonpure_call_p (gimple
*stmt
)
838 if (gimple_code (stmt
) != GIMPLE_CALL
)
841 return gimple_has_side_effects (stmt
);
844 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
847 rewrite_reciprocal (gimple_stmt_iterator
*bsi
)
849 gassign
*stmt
, *stmt1
, *stmt2
;
850 tree name
, lhs
, type
;
852 gimple_stmt_iterator gsi
;
854 stmt
= as_a
<gassign
*> (gsi_stmt (*bsi
));
855 lhs
= gimple_assign_lhs (stmt
);
856 type
= TREE_TYPE (lhs
);
858 real_one
= build_one_cst (type
);
860 name
= make_temp_ssa_name (type
, NULL
, "reciptmp");
861 stmt1
= gimple_build_assign (name
, RDIV_EXPR
, real_one
,
862 gimple_assign_rhs2 (stmt
));
863 stmt2
= gimple_build_assign (lhs
, MULT_EXPR
, name
,
864 gimple_assign_rhs1 (stmt
));
866 /* Replace division stmt with reciprocal and multiply stmts.
867 The multiply stmt is not invariant, so update iterator
868 and avoid rescanning. */
870 gsi_insert_before (bsi
, stmt1
, GSI_NEW_STMT
);
871 gsi_replace (&gsi
, stmt2
, true);
873 /* Continue processing with invariant reciprocal statement. */
877 /* Check if the pattern at *BSI is a bittest of the form
878 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
881 rewrite_bittest (gimple_stmt_iterator
*bsi
)
888 tree lhs
, name
, t
, a
, b
;
891 stmt
= as_a
<gassign
*> (gsi_stmt (*bsi
));
892 lhs
= gimple_assign_lhs (stmt
);
894 /* Verify that the single use of lhs is a comparison against zero. */
895 if (TREE_CODE (lhs
) != SSA_NAME
896 || !single_imm_use (lhs
, &use
, &use_stmt
))
898 cond_stmt
= dyn_cast
<gcond
*> (use_stmt
);
901 if (gimple_cond_lhs (cond_stmt
) != lhs
902 || (gimple_cond_code (cond_stmt
) != NE_EXPR
903 && gimple_cond_code (cond_stmt
) != EQ_EXPR
)
904 || !integer_zerop (gimple_cond_rhs (cond_stmt
)))
907 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
908 stmt1
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
909 if (gimple_code (stmt1
) != GIMPLE_ASSIGN
)
912 /* There is a conversion in between possibly inserted by fold. */
913 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1
)))
915 t
= gimple_assign_rhs1 (stmt1
);
916 if (TREE_CODE (t
) != SSA_NAME
917 || !has_single_use (t
))
919 stmt1
= SSA_NAME_DEF_STMT (t
);
920 if (gimple_code (stmt1
) != GIMPLE_ASSIGN
)
924 /* Verify that B is loop invariant but A is not. Verify that with
925 all the stmt walking we are still in the same loop. */
926 if (gimple_assign_rhs_code (stmt1
) != RSHIFT_EXPR
927 || loop_containing_stmt (stmt1
) != loop_containing_stmt (stmt
))
930 a
= gimple_assign_rhs1 (stmt1
);
931 b
= gimple_assign_rhs2 (stmt1
);
933 if (outermost_invariant_loop (b
, loop_containing_stmt (stmt1
)) != NULL
934 && outermost_invariant_loop (a
, loop_containing_stmt (stmt1
)) == NULL
)
936 gimple_stmt_iterator rsi
;
939 t
= fold_build2 (LSHIFT_EXPR
, TREE_TYPE (a
),
940 build_int_cst (TREE_TYPE (a
), 1), b
);
941 name
= make_temp_ssa_name (TREE_TYPE (a
), NULL
, "shifttmp");
942 stmt1
= gimple_build_assign (name
, t
);
945 t
= fold_build2 (BIT_AND_EXPR
, TREE_TYPE (a
), a
, name
);
946 name
= make_temp_ssa_name (TREE_TYPE (a
), NULL
, "shifttmp");
947 stmt2
= gimple_build_assign (name
, t
);
949 /* Replace the SSA_NAME we compare against zero. Adjust
950 the type of zero accordingly. */
952 gimple_cond_set_rhs (cond_stmt
,
953 build_int_cst_type (TREE_TYPE (name
),
956 /* Don't use gsi_replace here, none of the new assignments sets
957 the variable originally set in stmt. Move bsi to stmt1, and
958 then remove the original stmt, so that we get a chance to
959 retain debug info for it. */
961 gsi_insert_before (bsi
, stmt1
, GSI_NEW_STMT
);
962 gsi_insert_before (&rsi
, stmt2
, GSI_SAME_STMT
);
963 gimple
*to_release
= gsi_stmt (rsi
);
964 gsi_remove (&rsi
, true);
965 release_defs (to_release
);
973 /* For each statement determines the outermost loop in that it is invariant,
974 - statements on whose motion it depends and the cost of the computation.
975 - This information is stored to the LIM_DATA structure associated with
977 class invariantness_dom_walker
: public dom_walker
980 invariantness_dom_walker (cdi_direction direction
)
981 : dom_walker (direction
) {}
983 virtual edge
before_dom_children (basic_block
);
986 /* Determine the outermost loops in that statements in basic block BB are
987 invariant, and record them to the LIM_DATA associated with the statements.
988 Callback for dom_walker. */
991 invariantness_dom_walker::before_dom_children (basic_block bb
)
994 gimple_stmt_iterator bsi
;
996 bool maybe_never
= ALWAYS_EXECUTED_IN (bb
) == NULL
;
997 class loop
*outermost
= ALWAYS_EXECUTED_IN (bb
);
998 struct lim_aux_data
*lim_data
;
1000 if (!loop_outer (bb
->loop_father
))
1003 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1004 fprintf (dump_file
, "Basic block %d (loop %d -- depth %d):\n\n",
1005 bb
->index
, bb
->loop_father
->num
, loop_depth (bb
->loop_father
));
1007 /* Look at PHI nodes, but only if there is at most two.
1008 ??? We could relax this further by post-processing the inserted
1009 code and transforming adjacent cond-exprs with the same predicate
1010 to control flow again. */
1011 bsi
= gsi_start_phis (bb
);
1012 if (!gsi_end_p (bsi
)
1013 && ((gsi_next (&bsi
), gsi_end_p (bsi
))
1014 || (gsi_next (&bsi
), gsi_end_p (bsi
))))
1015 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1017 stmt
= gsi_stmt (bsi
);
1019 pos
= movement_possibility (stmt
);
1020 if (pos
== MOVE_IMPOSSIBLE
)
1023 lim_data
= get_lim_data (stmt
);
1025 lim_data
= init_lim_data (stmt
);
1026 lim_data
->always_executed_in
= outermost
;
1028 if (!determine_max_movement (stmt
, false))
1030 lim_data
->max_loop
= NULL
;
1034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1036 print_gimple_stmt (dump_file
, stmt
, 2);
1037 fprintf (dump_file
, " invariant up to level %d, cost %d.\n\n",
1038 loop_depth (lim_data
->max_loop
),
1042 if (lim_data
->cost
>= LIM_EXPENSIVE
)
1043 set_profitable_level (stmt
);
1046 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1048 stmt
= gsi_stmt (bsi
);
1050 pos
= movement_possibility (stmt
);
1051 if (pos
== MOVE_IMPOSSIBLE
)
1053 if (nonpure_call_p (stmt
))
1058 /* Make sure to note always_executed_in for stores to make
1059 store-motion work. */
1060 else if (stmt_makes_single_store (stmt
))
1062 struct lim_aux_data
*lim_data
= get_lim_data (stmt
);
1064 lim_data
= init_lim_data (stmt
);
1065 lim_data
->always_executed_in
= outermost
;
1070 if (is_gimple_assign (stmt
)
1071 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
))
1072 == GIMPLE_BINARY_RHS
))
1074 tree op0
= gimple_assign_rhs1 (stmt
);
1075 tree op1
= gimple_assign_rhs2 (stmt
);
1076 class loop
*ol1
= outermost_invariant_loop (op1
,
1077 loop_containing_stmt (stmt
));
1079 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1080 to be hoisted out of loop, saving expensive divide. */
1081 if (pos
== MOVE_POSSIBLE
1082 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
1083 && flag_unsafe_math_optimizations
1084 && !flag_trapping_math
1086 && outermost_invariant_loop (op0
, ol1
) == NULL
)
1087 stmt
= rewrite_reciprocal (&bsi
);
1089 /* If the shift count is invariant, convert (A >> B) & 1 to
1090 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1091 saving an expensive shift. */
1092 if (pos
== MOVE_POSSIBLE
1093 && gimple_assign_rhs_code (stmt
) == BIT_AND_EXPR
1094 && integer_onep (op1
)
1095 && TREE_CODE (op0
) == SSA_NAME
1096 && has_single_use (op0
))
1097 stmt
= rewrite_bittest (&bsi
);
1100 lim_data
= get_lim_data (stmt
);
1102 lim_data
= init_lim_data (stmt
);
1103 lim_data
->always_executed_in
= outermost
;
1105 if (maybe_never
&& pos
== MOVE_PRESERVE_EXECUTION
)
1108 if (!determine_max_movement (stmt
, pos
== MOVE_PRESERVE_EXECUTION
))
1110 lim_data
->max_loop
= NULL
;
1114 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1116 print_gimple_stmt (dump_file
, stmt
, 2);
1117 fprintf (dump_file
, " invariant up to level %d, cost %d.\n\n",
1118 loop_depth (lim_data
->max_loop
),
1122 if (lim_data
->cost
>= LIM_EXPENSIVE
)
1123 set_profitable_level (stmt
);
1128 /* Hoist the statements in basic block BB out of the loops prescribed by
1129 data stored in LIM_DATA structures associated with each statement. Callback
1130 for walk_dominator_tree. */
1133 move_computations_worker (basic_block bb
)
1137 struct lim_aux_data
*lim_data
;
1138 unsigned int todo
= 0;
1140 if (!loop_outer (bb
->loop_father
))
1143 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); )
1146 gphi
*stmt
= bsi
.phi ();
1148 lim_data
= get_lim_data (stmt
);
1149 if (lim_data
== NULL
)
1155 cost
= lim_data
->cost
;
1156 level
= lim_data
->tgt_loop
;
1157 clear_lim_data (stmt
);
1165 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1167 fprintf (dump_file
, "Moving PHI node\n");
1168 print_gimple_stmt (dump_file
, stmt
, 0);
1169 fprintf (dump_file
, "(cost %u) out of loop %d.\n\n",
1173 if (gimple_phi_num_args (stmt
) == 1)
1175 tree arg
= PHI_ARG_DEF (stmt
, 0);
1176 new_stmt
= gimple_build_assign (gimple_phi_result (stmt
),
1177 TREE_CODE (arg
), arg
);
1181 basic_block dom
= get_immediate_dominator (CDI_DOMINATORS
, bb
);
1182 gimple
*cond
= gsi_stmt (gsi_last_bb (dom
));
1183 tree arg0
= NULL_TREE
, arg1
= NULL_TREE
, t
;
1184 /* Get the PHI arguments corresponding to the true and false
1186 extract_true_false_args_from_phi (dom
, stmt
, &arg0
, &arg1
);
1187 gcc_assert (arg0
&& arg1
);
1188 t
= build2 (gimple_cond_code (cond
), boolean_type_node
,
1189 gimple_cond_lhs (cond
), gimple_cond_rhs (cond
));
1190 new_stmt
= gimple_build_assign (gimple_phi_result (stmt
),
1191 COND_EXPR
, t
, arg0
, arg1
);
1192 todo
|= TODO_cleanup_cfg
;
1194 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt
)))
1195 && (!ALWAYS_EXECUTED_IN (bb
)
1196 || (ALWAYS_EXECUTED_IN (bb
) != level
1197 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb
), level
))))
1199 tree lhs
= gimple_assign_lhs (new_stmt
);
1200 SSA_NAME_RANGE_INFO (lhs
) = NULL
;
1202 gsi_insert_on_edge (loop_preheader_edge (level
), new_stmt
);
1203 remove_phi_node (&bsi
, false);
1206 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); )
1210 gimple
*stmt
= gsi_stmt (bsi
);
1212 lim_data
= get_lim_data (stmt
);
1213 if (lim_data
== NULL
)
1219 cost
= lim_data
->cost
;
1220 level
= lim_data
->tgt_loop
;
1221 clear_lim_data (stmt
);
1229 /* We do not really want to move conditionals out of the loop; we just
1230 placed it here to force its operands to be moved if necessary. */
1231 if (gimple_code (stmt
) == GIMPLE_COND
)
1234 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1236 fprintf (dump_file
, "Moving statement\n");
1237 print_gimple_stmt (dump_file
, stmt
, 0);
1238 fprintf (dump_file
, "(cost %u) out of loop %d.\n\n",
1242 e
= loop_preheader_edge (level
);
1243 gcc_assert (!gimple_vdef (stmt
));
1244 if (gimple_vuse (stmt
))
1246 /* The new VUSE is the one from the virtual PHI in the loop
1247 header or the one already present. */
1249 for (gsi2
= gsi_start_phis (e
->dest
);
1250 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
1252 gphi
*phi
= gsi2
.phi ();
1253 if (virtual_operand_p (gimple_phi_result (phi
)))
1255 SET_USE (gimple_vuse_op (stmt
),
1256 PHI_ARG_DEF_FROM_EDGE (phi
, e
));
1261 gsi_remove (&bsi
, false);
1262 if (gimple_has_lhs (stmt
)
1263 && TREE_CODE (gimple_get_lhs (stmt
)) == SSA_NAME
1264 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt
)))
1265 && (!ALWAYS_EXECUTED_IN (bb
)
1266 || !(ALWAYS_EXECUTED_IN (bb
) == level
1267 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb
), level
))))
1269 tree lhs
= gimple_get_lhs (stmt
);
1270 SSA_NAME_RANGE_INFO (lhs
) = NULL
;
1272 /* In case this is a stmt that is not unconditionally executed
1273 when the target loop header is executed and the stmt may
1274 invoke undefined integer or pointer overflow rewrite it to
1275 unsigned arithmetic. */
1276 if (is_gimple_assign (stmt
)
1277 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt
)))
1278 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt
)))
1279 && arith_code_with_undefined_signed_overflow
1280 (gimple_assign_rhs_code (stmt
))
1281 && (!ALWAYS_EXECUTED_IN (bb
)
1282 || !(ALWAYS_EXECUTED_IN (bb
) == level
1283 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb
), level
))))
1284 gsi_insert_seq_on_edge (e
, rewrite_to_defined_overflow (stmt
));
1286 gsi_insert_on_edge (e
, stmt
);
1292 /* Hoist the statements out of the loops prescribed by data stored in
1293 LIM_DATA structures associated with each statement.*/
1296 move_computations (void)
1298 int *rpo
= XNEWVEC (int, last_basic_block_for_fn (cfun
));
1299 int n
= pre_and_rev_post_order_compute_fn (cfun
, NULL
, rpo
, false);
1302 for (int i
= 0; i
< n
; ++i
)
1303 todo
|= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun
, rpo
[i
]));
1307 gsi_commit_edge_inserts ();
1308 if (need_ssa_update_p (cfun
))
1309 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1314 /* Checks whether the statement defining variable *INDEX can be hoisted
1315 out of the loop passed in DATA. Callback for for_each_index. */
1318 may_move_till (tree ref
, tree
*index
, void *data
)
1320 class loop
*loop
= (class loop
*) data
, *max_loop
;
1322 /* If REF is an array reference, check also that the step and the lower
1323 bound is invariant in LOOP. */
1324 if (TREE_CODE (ref
) == ARRAY_REF
)
1326 tree step
= TREE_OPERAND (ref
, 3);
1327 tree lbound
= TREE_OPERAND (ref
, 2);
1329 max_loop
= outermost_invariant_loop (step
, loop
);
1333 max_loop
= outermost_invariant_loop (lbound
, loop
);
1338 max_loop
= outermost_invariant_loop (*index
, loop
);
1345 /* If OP is SSA NAME, force the statement that defines it to be
1346 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1349 force_move_till_op (tree op
, class loop
*orig_loop
, class loop
*loop
)
1354 || is_gimple_min_invariant (op
))
1357 gcc_assert (TREE_CODE (op
) == SSA_NAME
);
1359 stmt
= SSA_NAME_DEF_STMT (op
);
1360 if (gimple_nop_p (stmt
))
1363 set_level (stmt
, orig_loop
, loop
);
1366 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1367 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1373 class loop
*orig_loop
;
1377 force_move_till (tree ref
, tree
*index
, void *data
)
1379 struct fmt_data
*fmt_data
= (struct fmt_data
*) data
;
1381 if (TREE_CODE (ref
) == ARRAY_REF
)
1383 tree step
= TREE_OPERAND (ref
, 3);
1384 tree lbound
= TREE_OPERAND (ref
, 2);
1386 force_move_till_op (step
, fmt_data
->orig_loop
, fmt_data
->loop
);
1387 force_move_till_op (lbound
, fmt_data
->orig_loop
, fmt_data
->loop
);
1390 force_move_till_op (*index
, fmt_data
->orig_loop
, fmt_data
->loop
);
1395 /* A function to free the mem_ref object OBJ. */
1398 memref_free (class im_mem_ref
*mem
)
1400 mem
->accesses_in_loop
.release ();
1403 /* Allocates and returns a memory reference description for MEM whose hash
1404 value is HASH and id is ID. */
1407 mem_ref_alloc (ao_ref
*mem
, unsigned hash
, unsigned id
)
1409 im_mem_ref
*ref
= XOBNEW (&mem_ref_obstack
, class im_mem_ref
);
1413 ao_ref_init (&ref
->mem
, error_mark_node
);
1415 ref
->ref_canonical
= false;
1416 ref
->ref_decomposed
= false;
1420 bitmap_initialize (&ref
->dep_loop
, &lim_bitmap_obstack
);
1421 ref
->accesses_in_loop
.create (1);
1426 /* Records memory reference location *LOC in LOOP to the memory reference
1427 description REF. The reference occurs in statement STMT. */
1430 record_mem_ref_loc (im_mem_ref
*ref
, gimple
*stmt
, tree
*loc
)
1435 ref
->accesses_in_loop
.safe_push (aref
);
1438 /* Set the LOOP bit in REF stored bitmap and allocate that if
1439 necessary. Return whether a bit was changed. */
1442 set_ref_stored_in_loop (im_mem_ref
*ref
, class loop
*loop
)
1445 ref
->stored
= BITMAP_ALLOC (&lim_bitmap_obstack
);
1446 return bitmap_set_bit (ref
->stored
, loop
->num
);
1449 /* Marks reference REF as stored in LOOP. */
1452 mark_ref_stored (im_mem_ref
*ref
, class loop
*loop
)
1454 while (loop
!= current_loops
->tree_root
1455 && set_ref_stored_in_loop (ref
, loop
))
1456 loop
= loop_outer (loop
);
1459 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1460 necessary. Return whether a bit was changed. */
1463 set_ref_loaded_in_loop (im_mem_ref
*ref
, class loop
*loop
)
1466 ref
->loaded
= BITMAP_ALLOC (&lim_bitmap_obstack
);
1467 return bitmap_set_bit (ref
->loaded
, loop
->num
);
1470 /* Marks reference REF as loaded in LOOP. */
1473 mark_ref_loaded (im_mem_ref
*ref
, class loop
*loop
)
1475 while (loop
!= current_loops
->tree_root
1476 && set_ref_loaded_in_loop (ref
, loop
))
1477 loop
= loop_outer (loop
);
1480 /* Gathers memory references in statement STMT in LOOP, storing the
1481 information about them in the memory_accesses structure. Marks
1482 the vops accessed through unrecognized statements there as
1486 gather_mem_refs_stmt (class loop
*loop
, gimple
*stmt
)
1495 if (!gimple_vuse (stmt
))
1498 mem
= simple_mem_ref_in_stmt (stmt
, &is_stored
);
1501 /* We use the shared mem_ref for all unanalyzable refs. */
1502 id
= UNANALYZABLE_MEM_ID
;
1503 ref
= memory_accesses
.refs_list
[id
];
1504 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1506 fprintf (dump_file
, "Unanalyzed memory reference %u: ", id
);
1507 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
1509 is_stored
= gimple_vdef (stmt
);
1513 /* We are looking for equal refs that might differ in structure
1514 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1515 make sure we can canonicalize the ref in the hashtable if
1516 non-operand_equal_p refs are found. For the lookup we mark
1517 the case we want strict equality with aor.max_size == -1. */
1519 ao_ref_init (&aor
, *mem
);
1521 ao_ref_alias_set (&aor
);
1522 HOST_WIDE_INT offset
, size
, max_size
;
1523 poly_int64 saved_maxsize
= aor
.max_size
, mem_off
;
1525 bool ref_decomposed
;
1526 if (aor
.max_size_known_p ()
1527 && aor
.offset
.is_constant (&offset
)
1528 && aor
.size
.is_constant (&size
)
1529 && aor
.max_size
.is_constant (&max_size
)
1531 && (size
% BITS_PER_UNIT
) == 0
1532 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1533 size. Make sure this is consistent with the extraction. */
1534 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem
)))
1535 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem
))),
1537 && (mem_base
= get_addr_base_and_unit_offset (aor
.ref
, &mem_off
)))
1539 ref_decomposed
= true;
1540 hash
= iterative_hash_expr (ao_ref_base (&aor
), 0);
1541 hash
= iterative_hash_host_wide_int (offset
, hash
);
1542 hash
= iterative_hash_host_wide_int (size
, hash
);
1546 ref_decomposed
= false;
1547 hash
= iterative_hash_expr (aor
.ref
, 0);
1550 slot
= memory_accesses
.refs
->find_slot_with_hash (&aor
, hash
, INSERT
);
1551 aor
.max_size
= saved_maxsize
;
1554 if (!(*slot
)->ref_canonical
1555 && !operand_equal_p (*mem
, (*slot
)->mem
.ref
, 0))
1557 /* If we didn't yet canonicalize the hashtable ref (which
1558 we'll end up using for code insertion) and hit a second
1559 equal ref that is not structurally equivalent create
1560 a canonical ref which is a bare MEM_REF. */
1561 if (TREE_CODE (*mem
) == MEM_REF
1562 || TREE_CODE (*mem
) == TARGET_MEM_REF
)
1564 (*slot
)->mem
.ref
= *mem
;
1565 (*slot
)->mem
.base_alias_set
= ao_ref_base_alias_set (&aor
);
1569 tree ref_alias_type
= reference_alias_ptr_type (*mem
);
1570 unsigned int ref_align
= get_object_alignment (*mem
);
1571 tree ref_type
= TREE_TYPE (*mem
);
1572 tree tmp
= build1 (ADDR_EXPR
, ptr_type_node
,
1573 unshare_expr (mem_base
));
1574 if (TYPE_ALIGN (ref_type
) != ref_align
)
1575 ref_type
= build_aligned_type (ref_type
, ref_align
);
1577 = fold_build2 (MEM_REF
, ref_type
, tmp
,
1578 build_int_cst (ref_alias_type
, mem_off
));
1579 if ((*slot
)->mem
.volatile_p
)
1580 TREE_THIS_VOLATILE ((*slot
)->mem
.ref
) = 1;
1581 gcc_checking_assert (TREE_CODE ((*slot
)->mem
.ref
) == MEM_REF
1582 && is_gimple_mem_ref_addr
1583 (TREE_OPERAND ((*slot
)->mem
.ref
,
1585 (*slot
)->mem
.base_alias_set
= (*slot
)->mem
.ref_alias_set
;
1587 (*slot
)->ref_canonical
= true;
1594 id
= memory_accesses
.refs_list
.length ();
1595 ref
= mem_ref_alloc (&aor
, hash
, id
);
1596 ref
->ref_decomposed
= ref_decomposed
;
1597 memory_accesses
.refs_list
.safe_push (ref
);
1600 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1602 fprintf (dump_file
, "Memory reference %u: ", id
);
1603 print_generic_expr (dump_file
, ref
->mem
.ref
, TDF_SLIM
);
1604 fprintf (dump_file
, "\n");
1608 record_mem_ref_loc (ref
, stmt
, mem
);
1612 bitmap_set_bit (&memory_accesses
.refs_stored_in_loop
[loop
->num
], ref
->id
);
1613 mark_ref_stored (ref
, loop
);
1615 /* A not simple memory op is also a read when it is a write. */
1616 if (!is_stored
|| id
== UNANALYZABLE_MEM_ID
)
1618 bitmap_set_bit (&memory_accesses
.refs_loaded_in_loop
[loop
->num
], ref
->id
);
1619 mark_ref_loaded (ref
, loop
);
1621 init_lim_data (stmt
)->ref
= ref
->id
;
1625 static unsigned *bb_loop_postorder
;
1627 /* qsort sort function to sort blocks after their loop fathers postorder. */
1630 sort_bbs_in_loop_postorder_cmp (const void *bb1_
, const void *bb2_
,
1631 void *bb_loop_postorder_
)
1633 unsigned *bb_loop_postorder
= (unsigned *)bb_loop_postorder_
;
1634 basic_block bb1
= *(const basic_block
*)bb1_
;
1635 basic_block bb2
= *(const basic_block
*)bb2_
;
1636 class loop
*loop1
= bb1
->loop_father
;
1637 class loop
*loop2
= bb2
->loop_father
;
1638 if (loop1
->num
== loop2
->num
)
1639 return bb1
->index
- bb2
->index
;
1640 return bb_loop_postorder
[loop1
->num
] < bb_loop_postorder
[loop2
->num
] ? -1 : 1;
1643 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1646 sort_locs_in_loop_postorder_cmp (const void *loc1_
, const void *loc2_
,
1647 void *bb_loop_postorder_
)
1649 unsigned *bb_loop_postorder
= (unsigned *)bb_loop_postorder_
;
1650 const mem_ref_loc
*loc1
= (const mem_ref_loc
*)loc1_
;
1651 const mem_ref_loc
*loc2
= (const mem_ref_loc
*)loc2_
;
1652 class loop
*loop1
= gimple_bb (loc1
->stmt
)->loop_father
;
1653 class loop
*loop2
= gimple_bb (loc2
->stmt
)->loop_father
;
1654 if (loop1
->num
== loop2
->num
)
1656 return bb_loop_postorder
[loop1
->num
] < bb_loop_postorder
[loop2
->num
] ? -1 : 1;
1659 /* Gathers memory references in loops. */
1662 analyze_memory_references (void)
1664 gimple_stmt_iterator bsi
;
1665 basic_block bb
, *bbs
;
1666 class loop
*loop
, *outer
;
1669 /* Collect all basic-blocks in loops and sort them after their
1672 bbs
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
1673 FOR_EACH_BB_FN (bb
, cfun
)
1674 if (bb
->loop_father
!= current_loops
->tree_root
)
1677 gcc_sort_r (bbs
, n
, sizeof (basic_block
), sort_bbs_in_loop_postorder_cmp
,
1680 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1681 That results in better locality for all the bitmaps. */
1682 for (i
= 0; i
< n
; ++i
)
1684 basic_block bb
= bbs
[i
];
1685 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1686 gather_mem_refs_stmt (bb
->loop_father
, gsi_stmt (bsi
));
1689 /* Sort the location list of gathered memory references after their
1690 loop postorder number. */
1692 FOR_EACH_VEC_ELT (memory_accesses
.refs_list
, i
, ref
)
1693 ref
->accesses_in_loop
.sort (sort_locs_in_loop_postorder_cmp
,
1698 /* Propagate the information about accessed memory references up
1699 the loop hierarchy. */
1700 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
1702 /* Finalize the overall touched references (including subloops). */
1703 bitmap_ior_into (&memory_accesses
.all_refs_stored_in_loop
[loop
->num
],
1704 &memory_accesses
.refs_stored_in_loop
[loop
->num
]);
1706 /* Propagate the information about accessed memory references up
1707 the loop hierarchy. */
1708 outer
= loop_outer (loop
);
1709 if (outer
== current_loops
->tree_root
)
1712 bitmap_ior_into (&memory_accesses
.all_refs_stored_in_loop
[outer
->num
],
1713 &memory_accesses
.all_refs_stored_in_loop
[loop
->num
]);
1717 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1718 tree_to_aff_combination_expand. */
1721 mem_refs_may_alias_p (im_mem_ref
*mem1
, im_mem_ref
*mem2
,
1722 hash_map
<tree
, name_expansion
*> **ttae_cache
,
1725 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1726 object and their offset differ in such a way that the locations cannot
1727 overlap, then they cannot alias. */
1728 poly_widest_int size1
, size2
;
1729 aff_tree off1
, off2
;
1731 /* Perform basic offset and type-based disambiguation. */
1732 if (!refs_may_alias_p_1 (&mem1
->mem
, &mem2
->mem
, tbaa_p
))
1735 /* The expansion of addresses may be a bit expensive, thus we only do
1736 the check at -O2 and higher optimization levels. */
1740 get_inner_reference_aff (mem1
->mem
.ref
, &off1
, &size1
);
1741 get_inner_reference_aff (mem2
->mem
.ref
, &off2
, &size2
);
1742 aff_combination_expand (&off1
, ttae_cache
);
1743 aff_combination_expand (&off2
, ttae_cache
);
1744 aff_combination_scale (&off1
, -1);
1745 aff_combination_add (&off2
, &off1
);
1747 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
1753 /* Compare function for bsearch searching for reference locations
1757 find_ref_loc_in_loop_cmp (const void *loop_
, const void *loc_
,
1758 void *bb_loop_postorder_
)
1760 unsigned *bb_loop_postorder
= (unsigned *)bb_loop_postorder_
;
1761 class loop
*loop
= (class loop
*)const_cast<void *>(loop_
);
1762 mem_ref_loc
*loc
= (mem_ref_loc
*)const_cast<void *>(loc_
);
1763 class loop
*loc_loop
= gimple_bb (loc
->stmt
)->loop_father
;
1764 if (loop
->num
== loc_loop
->num
1765 || flow_loop_nested_p (loop
, loc_loop
))
1767 return (bb_loop_postorder
[loop
->num
] < bb_loop_postorder
[loc_loop
->num
]
1771 /* Iterates over all locations of REF in LOOP and its subloops calling
1772 fn.operator() with the location as argument. When that operator
1773 returns true the iteration is stopped and true is returned.
1774 Otherwise false is returned. */
1776 template <typename FN
>
1778 for_all_locs_in_loop (class loop
*loop
, im_mem_ref
*ref
, FN fn
)
1783 /* Search for the cluster of locs in the accesses_in_loop vector
1784 which is sorted after postorder index of the loop father. */
1785 loc
= ref
->accesses_in_loop
.bsearch (loop
, find_ref_loc_in_loop_cmp
,
1790 /* We have found one location inside loop or its sub-loops. Iterate
1791 both forward and backward to cover the whole cluster. */
1792 i
= loc
- ref
->accesses_in_loop
.address ();
1796 mem_ref_loc
*l
= &ref
->accesses_in_loop
[i
];
1797 if (!flow_bb_inside_loop_p (loop
, gimple_bb (l
->stmt
)))
1802 for (i
= loc
- ref
->accesses_in_loop
.address ();
1803 i
< ref
->accesses_in_loop
.length (); ++i
)
1805 mem_ref_loc
*l
= &ref
->accesses_in_loop
[i
];
1806 if (!flow_bb_inside_loop_p (loop
, gimple_bb (l
->stmt
)))
1815 /* Rewrites location LOC by TMP_VAR. */
1817 class rewrite_mem_ref_loc
1820 rewrite_mem_ref_loc (tree tmp_var_
) : tmp_var (tmp_var_
) {}
1821 bool operator () (mem_ref_loc
*loc
);
1826 rewrite_mem_ref_loc::operator () (mem_ref_loc
*loc
)
1828 *loc
->ref
= tmp_var
;
1829 update_stmt (loc
->stmt
);
1833 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1836 rewrite_mem_refs (class loop
*loop
, im_mem_ref
*ref
, tree tmp_var
)
1838 for_all_locs_in_loop (loop
, ref
, rewrite_mem_ref_loc (tmp_var
));
1841 /* Stores the first reference location in LOCP. */
1843 class first_mem_ref_loc_1
1846 first_mem_ref_loc_1 (mem_ref_loc
**locp_
) : locp (locp_
) {}
1847 bool operator () (mem_ref_loc
*loc
);
1852 first_mem_ref_loc_1::operator () (mem_ref_loc
*loc
)
1858 /* Returns the first reference location to REF in LOOP. */
1860 static mem_ref_loc
*
1861 first_mem_ref_loc (class loop
*loop
, im_mem_ref
*ref
)
1863 mem_ref_loc
*locp
= NULL
;
1864 for_all_locs_in_loop (loop
, ref
, first_mem_ref_loc_1 (&locp
));
1868 struct prev_flag_edges
{
1869 /* Edge to insert new flag comparison code. */
1870 edge append_cond_position
;
1872 /* Edge for fall through from previous flag comparison. */
1873 edge last_cond_fallthru
;
1876 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1879 The store is only done if MEM has changed. We do this so no
1880 changes to MEM occur on code paths that did not originally store
1883 The common case for execute_sm will transform:
1903 This function will generate:
1922 execute_sm_if_changed (edge ex
, tree mem
, tree tmp_var
, tree flag
,
1923 edge preheader
, hash_set
<basic_block
> *flag_bbs
)
1925 basic_block new_bb
, then_bb
, old_dest
;
1926 bool loop_has_only_one_exit
;
1927 edge then_old_edge
, orig_ex
= ex
;
1928 gimple_stmt_iterator gsi
;
1930 struct prev_flag_edges
*prev_edges
= (struct prev_flag_edges
*) ex
->aux
;
1931 bool irr
= ex
->flags
& EDGE_IRREDUCIBLE_LOOP
;
1933 profile_count count_sum
= profile_count::zero ();
1934 int nbbs
= 0, ncount
= 0;
1935 profile_probability flag_probability
= profile_probability::uninitialized ();
1937 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1940 This code may look fancy, but it cannot update profile very realistically
1941 because we do not know the probability that flag will be true at given
1944 We look for two interesting extremes
1945 - when exit is dominated by block setting the flag, we know it will
1946 always be true. This is a common case.
1947 - when all blocks setting the flag have very low frequency we know
1948 it will likely be false.
1949 In all other cases we default to 2/3 for flag being true. */
1951 for (hash_set
<basic_block
>::iterator it
= flag_bbs
->begin ();
1952 it
!= flag_bbs
->end (); ++it
)
1954 if ((*it
)->count
.initialized_p ())
1955 count_sum
+= (*it
)->count
, ncount
++;
1956 if (dominated_by_p (CDI_DOMINATORS
, ex
->src
, *it
))
1957 flag_probability
= profile_probability::always ();
1961 profile_probability cap
= profile_probability::always ().apply_scale (2, 3);
1963 if (flag_probability
.initialized_p ())
1965 else if (ncount
== nbbs
1966 && preheader
->count () >= count_sum
&& preheader
->count ().nonzero_p ())
1968 flag_probability
= count_sum
.probability_in (preheader
->count ());
1969 if (flag_probability
> cap
)
1970 flag_probability
= cap
;
1973 if (!flag_probability
.initialized_p ())
1974 flag_probability
= cap
;
1976 /* ?? Insert store after previous store if applicable. See note
1979 ex
= prev_edges
->append_cond_position
;
1981 loop_has_only_one_exit
= single_pred_p (ex
->dest
);
1983 if (loop_has_only_one_exit
)
1984 ex
= split_block_after_labels (ex
->dest
);
1987 for (gphi_iterator gpi
= gsi_start_phis (ex
->dest
);
1988 !gsi_end_p (gpi
); gsi_next (&gpi
))
1990 gphi
*phi
= gpi
.phi ();
1991 if (virtual_operand_p (gimple_phi_result (phi
)))
1994 /* When the destination has a non-virtual PHI node with multiple
1995 predecessors make sure we preserve the PHI structure by
1996 forcing a forwarder block so that hoisting of that PHI will
2003 old_dest
= ex
->dest
;
2004 new_bb
= split_edge (ex
);
2005 then_bb
= create_empty_bb (new_bb
);
2006 then_bb
->count
= new_bb
->count
.apply_probability (flag_probability
);
2008 then_bb
->flags
= BB_IRREDUCIBLE_LOOP
;
2009 add_bb_to_loop (then_bb
, new_bb
->loop_father
);
2011 gsi
= gsi_start_bb (new_bb
);
2012 stmt
= gimple_build_cond (NE_EXPR
, flag
, boolean_false_node
,
2013 NULL_TREE
, NULL_TREE
);
2014 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
2016 gsi
= gsi_start_bb (then_bb
);
2017 /* Insert actual store. */
2018 stmt
= gimple_build_assign (unshare_expr (mem
), tmp_var
);
2019 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
2021 edge e1
= single_succ_edge (new_bb
);
2022 edge e2
= make_edge (new_bb
, then_bb
,
2023 EDGE_TRUE_VALUE
| (irr
? EDGE_IRREDUCIBLE_LOOP
: 0));
2024 e2
->probability
= flag_probability
;
2026 e1
->flags
|= EDGE_FALSE_VALUE
| (irr
? EDGE_IRREDUCIBLE_LOOP
: 0);
2027 e1
->flags
&= ~EDGE_FALLTHRU
;
2029 e1
->probability
= flag_probability
.invert ();
2031 then_old_edge
= make_single_succ_edge (then_bb
, old_dest
,
2032 EDGE_FALLTHRU
| (irr
? EDGE_IRREDUCIBLE_LOOP
: 0));
2034 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, new_bb
);
2038 basic_block prevbb
= prev_edges
->last_cond_fallthru
->src
;
2039 redirect_edge_succ (prev_edges
->last_cond_fallthru
, new_bb
);
2040 set_immediate_dominator (CDI_DOMINATORS
, new_bb
, prevbb
);
2041 set_immediate_dominator (CDI_DOMINATORS
, old_dest
,
2042 recompute_dominator (CDI_DOMINATORS
, old_dest
));
2045 /* ?? Because stores may alias, they must happen in the exact
2046 sequence they originally happened. Save the position right after
2047 the (_lsm) store we just created so we can continue appending after
2048 it and maintain the original order. */
2050 struct prev_flag_edges
*p
;
2053 orig_ex
->aux
= NULL
;
2054 alloc_aux_for_edge (orig_ex
, sizeof (struct prev_flag_edges
));
2055 p
= (struct prev_flag_edges
*) orig_ex
->aux
;
2056 p
->append_cond_position
= then_old_edge
;
2057 p
->last_cond_fallthru
= find_edge (new_bb
, old_dest
);
2058 orig_ex
->aux
= (void *) p
;
2061 if (!loop_has_only_one_exit
)
2062 for (gphi_iterator gpi
= gsi_start_phis (old_dest
);
2063 !gsi_end_p (gpi
); gsi_next (&gpi
))
2065 gphi
*phi
= gpi
.phi ();
2068 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2069 if (gimple_phi_arg_edge (phi
, i
)->src
== new_bb
)
2071 tree arg
= gimple_phi_arg_def (phi
, i
);
2072 add_phi_arg (phi
, arg
, then_old_edge
, UNKNOWN_LOCATION
);
2078 /* When REF is set on the location, set flag indicating the store. */
2080 class sm_set_flag_if_changed
2083 sm_set_flag_if_changed (tree flag_
, hash_set
<basic_block
> *bbs_
)
2084 : flag (flag_
), bbs (bbs_
) {}
2085 bool operator () (mem_ref_loc
*loc
);
2087 hash_set
<basic_block
> *bbs
;
2091 sm_set_flag_if_changed::operator () (mem_ref_loc
*loc
)
2093 /* Only set the flag for writes. */
2094 if (is_gimple_assign (loc
->stmt
)
2095 && gimple_assign_lhs_ptr (loc
->stmt
) == loc
->ref
)
2097 gimple_stmt_iterator gsi
= gsi_for_stmt (loc
->stmt
);
2098 gimple
*stmt
= gimple_build_assign (flag
, boolean_true_node
);
2099 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
2100 bbs
->add (gimple_bb (stmt
));
2105 /* Helper function for execute_sm. On every location where REF is
2106 set, set an appropriate flag indicating the store. */
2109 execute_sm_if_changed_flag_set (class loop
*loop
, im_mem_ref
*ref
,
2110 hash_set
<basic_block
> *bbs
)
2113 char *str
= get_lsm_tmp_name (ref
->mem
.ref
, ~0, "_flag");
2114 flag
= create_tmp_reg (boolean_type_node
, str
);
2115 for_all_locs_in_loop (loop
, ref
, sm_set_flag_if_changed (flag
, bbs
));
2123 hash_set
<basic_block
> flag_bbs
;
2126 /* Executes store motion of memory reference REF from LOOP.
2127 Exits from the LOOP are stored in EXITS. The initialization of the
2128 temporary variable is put to the preheader of the loop, and assignments
2129 to the reference from the temporary variable are emitted to exits. */
2132 execute_sm (class loop
*loop
, im_mem_ref
*ref
,
2133 hash_map
<im_mem_ref
*, sm_aux
*> &aux_map
)
2136 struct fmt_data fmt_data
;
2137 struct lim_aux_data
*lim_data
;
2138 bool multi_threaded_model_p
= false;
2139 gimple_stmt_iterator gsi
;
2140 sm_aux
*aux
= new sm_aux
;
2142 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2144 fprintf (dump_file
, "Executing store motion of ");
2145 print_generic_expr (dump_file
, ref
->mem
.ref
);
2146 fprintf (dump_file
, " from loop %d\n", loop
->num
);
2149 aux
->tmp_var
= create_tmp_reg (TREE_TYPE (ref
->mem
.ref
),
2150 get_lsm_tmp_name (ref
->mem
.ref
, ~0));
2152 fmt_data
.loop
= loop
;
2153 fmt_data
.orig_loop
= loop
;
2154 for_each_index (&ref
->mem
.ref
, force_move_till
, &fmt_data
);
2156 bool always_stored
= ref_always_accessed_p (loop
, ref
, true);
2157 if (bb_in_transaction (loop_preheader_edge (loop
)->src
)
2158 || (! flag_store_data_races
&& ! always_stored
))
2159 multi_threaded_model_p
= true;
2161 if (multi_threaded_model_p
)
2163 = execute_sm_if_changed_flag_set (loop
, ref
, &aux
->flag_bbs
);
2165 aux
->store_flag
= NULL_TREE
;
2167 /* Remember variable setup. */
2168 aux_map
.put (ref
, aux
);
2170 rewrite_mem_refs (loop
, ref
, aux
->tmp_var
);
2172 /* Emit the load code on a random exit edge or into the latch if
2173 the loop does not exit, so that we are sure it will be processed
2174 by move_computations after all dependencies. */
2175 gsi
= gsi_for_stmt (first_mem_ref_loc (loop
, ref
)->stmt
);
2177 /* Avoid doing a load if there was no load of the ref in the loop.
2178 Esp. when the ref is not always stored we cannot optimize it
2179 away later. But when it is not always stored we must use a conditional
2181 if ((!always_stored
&& !multi_threaded_model_p
)
2182 || (ref
->loaded
&& bitmap_bit_p (ref
->loaded
, loop
->num
)))
2183 load
= gimple_build_assign (aux
->tmp_var
, unshare_expr (ref
->mem
.ref
));
2186 /* If not emitting a load mark the uninitialized state on the
2187 loop entry as not to be warned for. */
2188 tree uninit
= create_tmp_reg (TREE_TYPE (aux
->tmp_var
));
2189 TREE_NO_WARNING (uninit
) = 1;
2190 load
= gimple_build_assign (aux
->tmp_var
, uninit
);
2192 lim_data
= init_lim_data (load
);
2193 lim_data
->max_loop
= loop
;
2194 lim_data
->tgt_loop
= loop
;
2195 gsi_insert_before (&gsi
, load
, GSI_SAME_STMT
);
2197 if (multi_threaded_model_p
)
2199 load
= gimple_build_assign (aux
->store_flag
, boolean_false_node
);
2200 lim_data
= init_lim_data (load
);
2201 lim_data
->max_loop
= loop
;
2202 lim_data
->tgt_loop
= loop
;
2203 gsi_insert_before (&gsi
, load
, GSI_SAME_STMT
);
2207 /* sm_ord is used for ordinary stores we can retain order with respect
2209 sm_unord is used for conditional executed stores which need to be
2210 able to execute in arbitrary order with respect to other stores
2211 sm_other is used for stores we do not try to apply store motion to. */
2212 enum sm_kind
{ sm_ord
, sm_unord
, sm_other
};
2215 seq_entry (unsigned f
, sm_kind k
, tree fr
= NULL
)
2216 : first (f
), second (k
), from (fr
) {}
2223 execute_sm_exit (class loop
*loop
, edge ex
, vec
<seq_entry
> &seq
,
2224 hash_map
<im_mem_ref
*, sm_aux
*> &aux_map
, sm_kind kind
)
2226 /* Sink the stores to exit from the loop. */
2227 for (unsigned i
= seq
.length (); i
> 0; --i
)
2229 im_mem_ref
*ref
= memory_accesses
.refs_list
[seq
[i
-1].first
];
2230 if (seq
[i
-1].second
== sm_other
)
2232 gcc_assert (kind
== sm_ord
&& seq
[i
-1].from
!= NULL_TREE
);
2233 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2235 fprintf (dump_file
, "Re-issueing dependent store of ");
2236 print_generic_expr (dump_file
, ref
->mem
.ref
);
2237 fprintf (dump_file
, " from loop %d on exit %d -> %d\n",
2238 loop
->num
, ex
->src
->index
, ex
->dest
->index
);
2240 gassign
*store
= gimple_build_assign (unshare_expr (ref
->mem
.ref
),
2242 gsi_insert_on_edge (ex
, store
);
2246 sm_aux
*aux
= *aux_map
.get (ref
);
2247 if (!aux
->store_flag
)
2250 store
= gimple_build_assign (unshare_expr (ref
->mem
.ref
),
2252 gsi_insert_on_edge (ex
, store
);
2255 execute_sm_if_changed (ex
, ref
->mem
.ref
, aux
->tmp_var
,
2257 loop_preheader_edge (loop
), &aux
->flag_bbs
);
2262 /* Push the SM candidate at index PTR in the sequence SEQ down until
2263 we hit the next SM candidate. Return true if that went OK and
2264 false if we could not disambiguate agains another unrelated ref.
2265 Update *AT to the index where the candidate now resides. */
2268 sm_seq_push_down (vec
<seq_entry
> &seq
, unsigned ptr
, unsigned *at
)
2271 for (; ptr
> 0; --ptr
)
2273 seq_entry
&new_cand
= seq
[ptr
];
2274 seq_entry
&against
= seq
[ptr
-1];
2275 if (against
.second
== sm_ord
2276 || (against
.second
== sm_other
&& against
.from
!= NULL_TREE
))
2277 /* Found the tail of the sequence. */
2279 if (!refs_independent_p (memory_accesses
.refs_list
[new_cand
.first
],
2280 memory_accesses
.refs_list
[against
.first
],
2282 /* ??? Prune new_cand from the list of refs to apply SM to. */
2284 std::swap (new_cand
, against
);
2290 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2291 walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2294 sm_seq_valid_bb (class loop
*loop
, basic_block bb
, tree vdef
,
2295 vec
<seq_entry
> &seq
, bitmap refs_not_in_seq
,
2296 bitmap refs_not_supported
, bool forked
)
2299 for (gimple_stmt_iterator gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);
2302 vdef
= gimple_vdef (gsi_stmt (gsi
));
2308 gphi
*vphi
= get_virtual_phi (bb
);
2310 vdef
= gimple_phi_result (vphi
);
2314 if (single_pred_p (bb
))
2315 /* This handles the perfect nest case. */
2316 return sm_seq_valid_bb (loop
, single_pred (bb
), vdef
,
2317 seq
, refs_not_in_seq
, refs_not_supported
,
2323 gimple
*def
= SSA_NAME_DEF_STMT (vdef
);
2324 if (gimple_bb (def
) != bb
)
2326 /* If we forked by processing a PHI do not allow our walk to
2327 merge again until we handle that robustly. */
2330 /* Mark refs_not_in_seq as unsupported. */
2331 bitmap_ior_into (refs_not_supported
, refs_not_in_seq
);
2334 /* Otherwise it doesn't really matter if we end up in different
2336 bb
= gimple_bb (def
);
2338 if (gphi
*phi
= dyn_cast
<gphi
*> (def
))
2340 /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2341 this is still linear.
2342 Eventually we want to cache intermediate results per BB
2343 (but we can't easily cache for different exits?). */
2344 /* Stop at PHIs with possible backedges. */
2345 if (bb
== bb
->loop_father
->header
2346 || bb
->flags
& BB_IRREDUCIBLE_LOOP
)
2348 /* Mark refs_not_in_seq as unsupported. */
2349 bitmap_ior_into (refs_not_supported
, refs_not_in_seq
);
2352 if (gimple_phi_num_args (phi
) == 1)
2353 return sm_seq_valid_bb (loop
, gimple_phi_arg_edge (phi
, 0)->src
,
2354 gimple_phi_arg_def (phi
, 0), seq
,
2355 refs_not_in_seq
, refs_not_supported
,
2357 auto_vec
<seq_entry
> first_edge_seq
;
2358 auto_bitmap
tem_refs_not_in_seq (&lim_bitmap_obstack
);
2360 bitmap_copy (tem_refs_not_in_seq
, refs_not_in_seq
);
2361 eret
= sm_seq_valid_bb (loop
, gimple_phi_arg_edge (phi
, 0)->src
,
2362 gimple_phi_arg_def (phi
, 0),
2364 tem_refs_not_in_seq
, refs_not_supported
,
2368 /* Simplify our lives by pruning the sequence of !sm_ord. */
2369 while (!first_edge_seq
.is_empty ()
2370 && first_edge_seq
.last ().second
!= sm_ord
)
2371 first_edge_seq
.pop ();
2372 for (unsigned int i
= 1; i
< gimple_phi_num_args (phi
); ++i
)
2374 tree vuse
= gimple_phi_arg_def (phi
, i
);
2375 edge e
= gimple_phi_arg_edge (phi
, i
);
2376 auto_vec
<seq_entry
> edge_seq
;
2377 bitmap_copy (tem_refs_not_in_seq
, refs_not_in_seq
);
2378 eret
= sm_seq_valid_bb (loop
, e
->src
, vuse
, edge_seq
,
2379 tem_refs_not_in_seq
, refs_not_supported
,
2383 /* Simplify our lives by pruning the sequence of !sm_ord. */
2384 while (!edge_seq
.is_empty ()
2385 && edge_seq
.last ().second
!= sm_ord
)
2387 unsigned min_len
= MIN(first_edge_seq
.length (),
2388 edge_seq
.length ());
2389 /* Incrementally merge seqs into first_edge_seq. */
2390 for (unsigned int i
= 0; i
< min_len
; ++i
)
2392 /* ??? We can more intelligently merge when we face different
2393 order by additional sinking operations in one sequence.
2394 For now we simply mark them as to be processed by the
2395 not order-preserving SM code. */
2396 if (first_edge_seq
[i
].first
!= edge_seq
[i
].first
)
2398 if (first_edge_seq
[i
].second
== sm_ord
)
2399 bitmap_set_bit (refs_not_supported
,
2400 first_edge_seq
[i
].first
);
2401 if (edge_seq
[i
].second
== sm_ord
)
2402 bitmap_set_bit (refs_not_supported
, edge_seq
[i
].first
);
2403 first_edge_seq
[i
].second
= sm_other
;
2405 /* sm_other prevails. */
2406 else if (first_edge_seq
[i
].second
!= edge_seq
[i
].second
)
2408 /* This is just an optimization. */
2409 gcc_assert (bitmap_bit_p (refs_not_supported
,
2410 first_edge_seq
[i
].first
));
2411 first_edge_seq
[i
].second
= sm_other
;
2414 /* Any excess elements become sm_other since they are now
2415 coonditionally executed. */
2416 if (first_edge_seq
.length () > edge_seq
.length ())
2418 for (unsigned i
= edge_seq
.length ();
2419 i
< first_edge_seq
.length (); ++i
)
2421 if (first_edge_seq
[i
].second
== sm_ord
)
2422 bitmap_set_bit (refs_not_supported
,
2423 first_edge_seq
[i
].first
);
2424 first_edge_seq
[i
].second
= sm_other
;
2427 else if (edge_seq
.length () > first_edge_seq
.length ())
2429 for (unsigned i
= first_edge_seq
.length ();
2430 i
< edge_seq
.length (); ++i
)
2431 if (edge_seq
[i
].second
== sm_ord
)
2432 bitmap_set_bit (refs_not_supported
, edge_seq
[i
].first
);
2435 /* Use the sequence from the first edge and push SMs down. */
2436 for (unsigned i
= 0; i
< first_edge_seq
.length (); ++i
)
2438 if (first_edge_seq
[i
].second
== sm_other
)
2440 unsigned id
= first_edge_seq
[i
].first
;
2441 seq
.safe_push (first_edge_seq
[i
]);
2443 if (first_edge_seq
[i
].second
== sm_ord
2444 && !sm_seq_push_down (seq
, seq
.length () - 1, &new_idx
))
2446 bitmap_set_bit (refs_not_supported
, id
);
2447 /* Mark it sm_other. */
2448 seq
[new_idx
].second
= sm_other
;
2453 lim_aux_data
*data
= get_lim_data (def
);
2455 if (data
->ref
== UNANALYZABLE_MEM_ID
)
2457 /* One of the stores we want to apply SM to and we've not yet seen. */
2458 else if (bitmap_clear_bit (refs_not_in_seq
, data
->ref
))
2460 seq
.safe_push (seq_entry (data
->ref
, sm_ord
));
2462 /* 1) push it down the queue until a SMed
2463 and not ignored ref is reached, skipping all not SMed refs
2464 and ignored refs via non-TBAA disambiguation. */
2466 if (!sm_seq_push_down (seq
, seq
.length () - 1, &new_idx
)
2467 /* If that fails but we did not fork yet continue, we'll see
2468 to re-materialize all of the stores in the sequence then.
2469 Further stores will only be pushed up to this one. */
2472 bitmap_set_bit (refs_not_supported
, data
->ref
);
2473 /* Mark it sm_other. */
2474 seq
[new_idx
].second
= sm_other
;
2477 /* 2) check whether we've seen all refs we want to SM and if so
2478 declare success for the active exit */
2479 if (bitmap_empty_p (refs_not_in_seq
))
2483 /* Another store not part of the final sequence. Simply push it. */
2484 seq
.safe_push (seq_entry (data
->ref
, sm_other
,
2485 gimple_assign_rhs1 (def
)));
2487 vdef
= gimple_vuse (def
);
2492 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2493 edges of the LOOP. */
2496 hoist_memory_references (class loop
*loop
, bitmap mem_refs
,
2503 /* To address PR57359 before actually applying store-motion check
2504 the candidates found for validity with regards to reordering
2505 relative to other stores which we until here disambiguated using
2506 TBAA which isn't valid.
2507 What matters is the order of the last stores to the mem_refs
2508 with respect to the other stores of the loop at the point of the
2511 /* For each exit compute the store order, pruning from mem_refs
2513 /* The complexity of this is at least
2514 O(number of exits * number of SM refs) but more approaching
2515 O(number of exits * number of SM refs * number of stores). */
2516 /* ??? Somehow do this in a single sweep over the loop body. */
2517 auto_vec
<std::pair
<edge
, vec
<seq_entry
> > > sms
;
2518 auto_bitmap
refs_not_supported (&lim_bitmap_obstack
);
2520 FOR_EACH_VEC_ELT (exits
, i
, e
)
2524 auto_bitmap
refs_not_in_seq (&lim_bitmap_obstack
);
2525 bitmap_copy (refs_not_in_seq
, mem_refs
);
2526 int res
= sm_seq_valid_bb (loop
, e
->src
, NULL_TREE
,
2527 seq
, refs_not_in_seq
,
2528 refs_not_supported
, false);
2531 bitmap_copy (refs_not_supported
, mem_refs
);
2534 sms
.safe_push (std::make_pair (e
, seq
));
2537 /* Prune pruned mem_refs from earlier processed exits. */
2538 bool changed
= !bitmap_empty_p (refs_not_supported
);
2542 std::pair
<edge
, vec
<seq_entry
> > *seq
;
2543 FOR_EACH_VEC_ELT (sms
, i
, seq
)
2545 bool need_to_push
= false;
2546 for (unsigned i
= 0; i
< seq
->second
.length (); ++i
)
2548 sm_kind kind
= seq
->second
[i
].second
;
2549 if (kind
== sm_other
&& seq
->second
[i
].from
== NULL_TREE
)
2551 unsigned id
= seq
->second
[i
].first
;
2554 && bitmap_bit_p (refs_not_supported
, id
))
2556 seq
->second
[i
].second
= sm_other
;
2557 gcc_assert (seq
->second
[i
].from
== NULL_TREE
);
2558 need_to_push
= true;
2560 else if (need_to_push
2561 && !sm_seq_push_down (seq
->second
, i
, &new_idx
))
2563 /* We need to push down both sm_ord and sm_other
2564 but for the latter we need to disqualify all
2568 if (bitmap_set_bit (refs_not_supported
, id
))
2570 seq
->second
[new_idx
].second
= sm_other
;
2574 for (unsigned j
= seq
->second
.length () - 1;
2576 if (seq
->second
[j
].second
== sm_ord
2577 && bitmap_set_bit (refs_not_supported
,
2578 seq
->second
[j
].first
))
2580 seq
->second
.truncate (new_idx
);
2587 std::pair
<edge
, vec
<seq_entry
> > *seq
;
2588 FOR_EACH_VEC_ELT (sms
, i
, seq
)
2590 /* Prune sm_other from the end. */
2591 while (!seq
->second
.is_empty ()
2592 && seq
->second
.last ().second
== sm_other
)
2594 /* Prune duplicates from the start. */
2595 auto_bitmap
seen (&lim_bitmap_obstack
);
2597 for (j
= k
= 0; j
< seq
->second
.length (); ++j
)
2598 if (bitmap_set_bit (seen
, seq
->second
[j
].first
))
2601 seq
->second
[k
] = seq
->second
[j
];
2604 seq
->second
.truncate (k
);
2607 FOR_EACH_VEC_ELT (seq
->second
, j
, e
)
2608 gcc_assert (e
->second
== sm_ord
2609 || (e
->second
== sm_other
&& e
->from
!= NULL_TREE
));
2612 /* Verify dependence for refs we cannot handle with the order preserving
2613 code (refs_not_supported) or prune them from mem_refs. */
2614 auto_vec
<seq_entry
> unord_refs
;
2615 EXECUTE_IF_SET_IN_BITMAP (refs_not_supported
, 0, i
, bi
)
2617 ref
= memory_accesses
.refs_list
[i
];
2618 if (!ref_indep_loop_p (loop
, ref
, sm_waw
))
2619 bitmap_clear_bit (mem_refs
, i
);
2620 /* We've now verified store order for ref with respect to all other
2621 stores in the loop does not matter. */
2623 unord_refs
.safe_push (seq_entry (i
, sm_unord
));
2626 hash_map
<im_mem_ref
*, sm_aux
*> aux_map
;
2628 /* Execute SM but delay the store materialization for ordered
2629 sequences on exit. */
2630 EXECUTE_IF_SET_IN_BITMAP (mem_refs
, 0, i
, bi
)
2632 ref
= memory_accesses
.refs_list
[i
];
2633 execute_sm (loop
, ref
, aux_map
);
2636 /* Materialize ordered store sequences on exits. */
2637 FOR_EACH_VEC_ELT (exits
, i
, e
)
2639 if (i
< sms
.length ())
2641 gcc_assert (sms
[i
].first
== e
);
2642 execute_sm_exit (loop
, e
, sms
[i
].second
, aux_map
, sm_ord
);
2643 sms
[i
].second
.release ();
2645 if (!unord_refs
.is_empty ())
2646 execute_sm_exit (loop
, e
, unord_refs
, aux_map
, sm_unord
);
2649 for (hash_map
<im_mem_ref
*, sm_aux
*>::iterator iter
= aux_map
.begin ();
2650 iter
!= aux_map
.end (); ++iter
)
2651 delete (*iter
).second
;
2654 class ref_always_accessed
2657 ref_always_accessed (class loop
*loop_
, bool stored_p_
)
2658 : loop (loop_
), stored_p (stored_p_
) {}
2659 bool operator () (mem_ref_loc
*loc
);
2665 ref_always_accessed::operator () (mem_ref_loc
*loc
)
2667 class loop
*must_exec
;
2669 struct lim_aux_data
*lim_data
= get_lim_data (loc
->stmt
);
2673 /* If we require an always executed store make sure the statement
2677 tree lhs
= gimple_get_lhs (loc
->stmt
);
2679 || !(DECL_P (lhs
) || REFERENCE_CLASS_P (lhs
)))
2683 must_exec
= lim_data
->always_executed_in
;
2687 if (must_exec
== loop
2688 || flow_loop_nested_p (must_exec
, loop
))
2694 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2695 make sure REF is always stored to in LOOP. */
2698 ref_always_accessed_p (class loop
*loop
, im_mem_ref
*ref
, bool stored_p
)
2700 return for_all_locs_in_loop (loop
, ref
,
2701 ref_always_accessed (loop
, stored_p
));
2704 /* Returns true if REF1 and REF2 are independent. */
2707 refs_independent_p (im_mem_ref
*ref1
, im_mem_ref
*ref2
, bool tbaa_p
)
2712 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2713 fprintf (dump_file
, "Querying dependency of refs %u and %u: ",
2714 ref1
->id
, ref2
->id
);
2716 if (mem_refs_may_alias_p (ref1
, ref2
, &memory_accesses
.ttae_cache
, tbaa_p
))
2718 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2719 fprintf (dump_file
, "dependent.\n");
2724 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2725 fprintf (dump_file
, "independent.\n");
2730 /* Returns true if REF is independent on all other accessess in LOOP.
2731 KIND specifies the kind of dependence to consider.
2732 lim_raw assumes REF is not stored in LOOP and disambiguates RAW
2733 dependences so if true REF can be hoisted out of LOOP
2734 sm_war disambiguates a store REF against all other loads to see
2735 whether the store can be sunk across loads out of LOOP
2736 sm_waw disambiguates a store REF against all other stores to see
2737 whether the store can be sunk across stores out of LOOP. */
2740 ref_indep_loop_p (class loop
*loop
, im_mem_ref
*ref
, dep_kind kind
)
2742 bool indep_p
= true;
2743 bitmap refs_to_check
;
2746 refs_to_check
= &memory_accesses
.refs_loaded_in_loop
[loop
->num
];
2748 refs_to_check
= &memory_accesses
.refs_stored_in_loop
[loop
->num
];
2750 if (bitmap_bit_p (refs_to_check
, UNANALYZABLE_MEM_ID
))
2754 /* tri-state, { unknown, independent, dependent } */
2755 dep_state state
= query_loop_dependence (loop
, ref
, kind
);
2756 if (state
!= dep_unknown
)
2757 return state
== dep_independent
? true : false;
2759 class loop
*inner
= loop
->inner
;
2762 if (!ref_indep_loop_p (inner
, ref
, kind
))
2767 inner
= inner
->next
;
2774 EXECUTE_IF_SET_IN_BITMAP (refs_to_check
, 0, i
, bi
)
2776 im_mem_ref
*aref
= memory_accesses
.refs_list
[i
];
2777 if (!refs_independent_p (ref
, aref
, kind
!= sm_waw
))
2786 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2787 fprintf (dump_file
, "Querying %s dependencies of ref %u in loop %d: %s\n",
2788 kind
== lim_raw
? "RAW" : (kind
== sm_war
? "SM WAR" : "SM WAW"),
2789 ref
->id
, loop
->num
, indep_p
? "independent" : "dependent");
2791 /* Record the computed result in the cache. */
2792 record_loop_dependence (loop
, ref
, kind
,
2793 indep_p
? dep_independent
: dep_dependent
);
2799 /* Returns true if we can perform store motion of REF from LOOP. */
2802 can_sm_ref_p (class loop
*loop
, im_mem_ref
*ref
)
2806 /* Can't hoist unanalyzable refs. */
2807 if (!MEM_ANALYZABLE (ref
))
2810 /* It should be movable. */
2811 if (!is_gimple_reg_type (TREE_TYPE (ref
->mem
.ref
))
2812 || TREE_THIS_VOLATILE (ref
->mem
.ref
)
2813 || !for_each_index (&ref
->mem
.ref
, may_move_till
, loop
))
2816 /* If it can throw fail, we do not properly update EH info. */
2817 if (tree_could_throw_p (ref
->mem
.ref
))
2820 /* If it can trap, it must be always executed in LOOP.
2821 Readonly memory locations may trap when storing to them, but
2822 tree_could_trap_p is a predicate for rvalues, so check that
2824 base
= get_base_address (ref
->mem
.ref
);
2825 if ((tree_could_trap_p (ref
->mem
.ref
)
2826 || (DECL_P (base
) && TREE_READONLY (base
)))
2827 /* ??? We can at least use false here, allowing loads? We
2828 are forcing conditional stores if the ref is not always
2829 stored to later anyway. So this would only guard
2830 the load we need to emit. Thus when the ref is not
2831 loaded we can elide this completely? */
2832 && !ref_always_accessed_p (loop
, ref
, true))
2835 /* Verify all loads of ref can be hoisted. */
2837 && bitmap_bit_p (ref
->loaded
, loop
->num
)
2838 && !ref_indep_loop_p (loop
, ref
, lim_raw
))
2841 /* Verify the candidate can be disambiguated against all loads,
2842 that is, we can elide all in-loop stores. Disambiguation
2843 against stores is done later when we cannot guarantee preserving
2844 the order of stores. */
2845 if (!ref_indep_loop_p (loop
, ref
, sm_war
))
2851 /* Marks the references in LOOP for that store motion should be performed
2852 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2853 motion was performed in one of the outer loops. */
2856 find_refs_for_sm (class loop
*loop
, bitmap sm_executed
, bitmap refs_to_sm
)
2858 bitmap refs
= &memory_accesses
.all_refs_stored_in_loop
[loop
->num
];
2863 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs
, sm_executed
, 0, i
, bi
)
2865 ref
= memory_accesses
.refs_list
[i
];
2866 if (can_sm_ref_p (loop
, ref
) && dbg_cnt (lim
))
2867 bitmap_set_bit (refs_to_sm
, i
);
2871 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2872 for a store motion optimization (i.e. whether we can insert statement
2876 loop_suitable_for_sm (class loop
*loop ATTRIBUTE_UNUSED
,
2882 FOR_EACH_VEC_ELT (exits
, i
, ex
)
2883 if (ex
->flags
& (EDGE_ABNORMAL
| EDGE_EH
))
2889 /* Try to perform store motion for all memory references modified inside
2890 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2891 store motion was executed in one of the outer loops. */
2894 store_motion_loop (class loop
*loop
, bitmap sm_executed
)
2896 vec
<edge
> exits
= get_loop_exit_edges (loop
);
2897 class loop
*subloop
;
2898 bitmap sm_in_loop
= BITMAP_ALLOC (&lim_bitmap_obstack
);
2900 if (loop_suitable_for_sm (loop
, exits
))
2902 find_refs_for_sm (loop
, sm_executed
, sm_in_loop
);
2903 if (!bitmap_empty_p (sm_in_loop
))
2905 hoist_memory_references (loop
, sm_in_loop
, exits
);
2906 /* Commit edge inserts here to preserve the order of stores
2907 when an exit exits multiple loops. */
2908 gsi_commit_edge_inserts ();
2913 bitmap_ior_into (sm_executed
, sm_in_loop
);
2914 for (subloop
= loop
->inner
; subloop
!= NULL
; subloop
= subloop
->next
)
2915 store_motion_loop (subloop
, sm_executed
);
2916 bitmap_and_compl_into (sm_executed
, sm_in_loop
);
2917 BITMAP_FREE (sm_in_loop
);
2920 /* Try to perform store motion for all memory references modified inside
2924 do_store_motion (void)
2927 bitmap sm_executed
= BITMAP_ALLOC (&lim_bitmap_obstack
);
2929 for (loop
= current_loops
->tree_root
->inner
; loop
!= NULL
; loop
= loop
->next
)
2930 store_motion_loop (loop
, sm_executed
);
2932 BITMAP_FREE (sm_executed
);
2935 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2936 for each such basic block bb records the outermost loop for that execution
2937 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2938 blocks that contain a nonpure call. */
2941 fill_always_executed_in_1 (class loop
*loop
, sbitmap contains_call
)
2943 basic_block bb
= NULL
, *bbs
, last
= NULL
;
2946 class loop
*inn_loop
= loop
;
2948 if (ALWAYS_EXECUTED_IN (loop
->header
) == NULL
)
2950 bbs
= get_loop_body_in_dom_order (loop
);
2952 for (i
= 0; i
< loop
->num_nodes
; i
++)
2957 if (dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
2960 if (bitmap_bit_p (contains_call
, bb
->index
))
2963 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2965 /* If there is an exit from this BB. */
2966 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
2968 /* Or we enter a possibly non-finite loop. */
2969 if (flow_loop_nested_p (bb
->loop_father
,
2970 e
->dest
->loop_father
)
2971 && ! finite_loop_p (e
->dest
->loop_father
))
2977 /* A loop might be infinite (TODO use simple loop analysis
2978 to disprove this if possible). */
2979 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
2982 if (!flow_bb_inside_loop_p (inn_loop
, bb
))
2985 if (bb
->loop_father
->header
== bb
)
2987 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
2990 /* In a loop that is always entered we may proceed anyway.
2991 But record that we entered it and stop once we leave it. */
2992 inn_loop
= bb
->loop_father
;
2998 SET_ALWAYS_EXECUTED_IN (last
, loop
);
2999 if (last
== loop
->header
)
3001 last
= get_immediate_dominator (CDI_DOMINATORS
, last
);
3007 for (loop
= loop
->inner
; loop
; loop
= loop
->next
)
3008 fill_always_executed_in_1 (loop
, contains_call
);
3011 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3012 for each such basic block bb records the outermost loop for that execution
3013 of its header implies execution of bb. */
3016 fill_always_executed_in (void)
3021 auto_sbitmap
contains_call (last_basic_block_for_fn (cfun
));
3022 bitmap_clear (contains_call
);
3023 FOR_EACH_BB_FN (bb
, cfun
)
3025 gimple_stmt_iterator gsi
;
3026 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3028 if (nonpure_call_p (gsi_stmt (gsi
)))
3032 if (!gsi_end_p (gsi
))
3033 bitmap_set_bit (contains_call
, bb
->index
);
3036 for (loop
= current_loops
->tree_root
->inner
; loop
; loop
= loop
->next
)
3037 fill_always_executed_in_1 (loop
, contains_call
);
3041 /* Compute the global information needed by the loop invariant motion pass. */
3044 tree_ssa_lim_initialize (void)
3049 bitmap_obstack_initialize (&lim_bitmap_obstack
);
3050 gcc_obstack_init (&mem_ref_obstack
);
3051 lim_aux_data_map
= new hash_map
<gimple
*, lim_aux_data
*>;
3054 compute_transaction_bits ();
3056 alloc_aux_for_edges (0);
3058 memory_accesses
.refs
= new hash_table
<mem_ref_hasher
> (100);
3059 memory_accesses
.refs_list
.create (100);
3060 /* Allocate a special, unanalyzable mem-ref with ID zero. */
3061 memory_accesses
.refs_list
.quick_push
3062 (mem_ref_alloc (NULL
, 0, UNANALYZABLE_MEM_ID
));
3064 memory_accesses
.refs_loaded_in_loop
.create (number_of_loops (cfun
));
3065 memory_accesses
.refs_loaded_in_loop
.quick_grow (number_of_loops (cfun
));
3066 memory_accesses
.refs_stored_in_loop
.create (number_of_loops (cfun
));
3067 memory_accesses
.refs_stored_in_loop
.quick_grow (number_of_loops (cfun
));
3068 memory_accesses
.all_refs_stored_in_loop
.create (number_of_loops (cfun
));
3069 memory_accesses
.all_refs_stored_in_loop
.quick_grow (number_of_loops (cfun
));
3071 for (i
= 0; i
< number_of_loops (cfun
); i
++)
3073 bitmap_initialize (&memory_accesses
.refs_loaded_in_loop
[i
],
3074 &lim_bitmap_obstack
);
3075 bitmap_initialize (&memory_accesses
.refs_stored_in_loop
[i
],
3076 &lim_bitmap_obstack
);
3077 bitmap_initialize (&memory_accesses
.all_refs_stored_in_loop
[i
],
3078 &lim_bitmap_obstack
);
3081 memory_accesses
.ttae_cache
= NULL
;
3083 /* Initialize bb_loop_postorder with a mapping from loop->num to
3084 its postorder index. */
3086 bb_loop_postorder
= XNEWVEC (unsigned, number_of_loops (cfun
));
3087 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
3088 bb_loop_postorder
[loop
->num
] = i
++;
3091 /* Cleans up after the invariant motion pass. */
3094 tree_ssa_lim_finalize (void)
3100 free_aux_for_edges ();
3102 FOR_EACH_BB_FN (bb
, cfun
)
3103 SET_ALWAYS_EXECUTED_IN (bb
, NULL
);
3105 bitmap_obstack_release (&lim_bitmap_obstack
);
3106 delete lim_aux_data_map
;
3108 delete memory_accesses
.refs
;
3109 memory_accesses
.refs
= NULL
;
3111 FOR_EACH_VEC_ELT (memory_accesses
.refs_list
, i
, ref
)
3113 memory_accesses
.refs_list
.release ();
3114 obstack_free (&mem_ref_obstack
, NULL
);
3116 memory_accesses
.refs_loaded_in_loop
.release ();
3117 memory_accesses
.refs_stored_in_loop
.release ();
3118 memory_accesses
.all_refs_stored_in_loop
.release ();
3120 if (memory_accesses
.ttae_cache
)
3121 free_affine_expand_cache (&memory_accesses
.ttae_cache
);
3123 free (bb_loop_postorder
);
3126 /* Moves invariants from loops. Only "expensive" invariants are moved out --
3127 i.e. those that are likely to be win regardless of the register pressure. */
3134 tree_ssa_lim_initialize ();
3136 /* Gathers information about memory accesses in the loops. */
3137 analyze_memory_references ();
3139 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3140 fill_always_executed_in ();
3142 /* For each statement determine the outermost loop in that it is
3143 invariant and cost for computing the invariant. */
3144 invariantness_dom_walker (CDI_DOMINATORS
)
3145 .walk (cfun
->cfg
->x_entry_block_ptr
);
3147 /* Execute store motion. Force the necessary invariants to be moved
3148 out of the loops as well. */
3151 /* Move the expressions that are expensive enough. */
3152 todo
= move_computations ();
3154 tree_ssa_lim_finalize ();
3159 /* Loop invariant motion pass. */
3163 const pass_data pass_data_lim
=
3165 GIMPLE_PASS
, /* type */
3167 OPTGROUP_LOOP
, /* optinfo_flags */
3169 PROP_cfg
, /* properties_required */
3170 0, /* properties_provided */
3171 0, /* properties_destroyed */
3172 0, /* todo_flags_start */
3173 0, /* todo_flags_finish */
3176 class pass_lim
: public gimple_opt_pass
3179 pass_lim (gcc::context
*ctxt
)
3180 : gimple_opt_pass (pass_data_lim
, ctxt
)
3183 /* opt_pass methods: */
3184 opt_pass
* clone () { return new pass_lim (m_ctxt
); }
3185 virtual bool gate (function
*) { return flag_tree_loop_im
!= 0; }
3186 virtual unsigned int execute (function
*);
3188 }; // class pass_lim
3191 pass_lim::execute (function
*fun
)
3193 bool in_loop_pipeline
= scev_initialized_p ();
3194 if (!in_loop_pipeline
)
3195 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
3197 if (number_of_loops (fun
) <= 1)
3199 unsigned int todo
= tree_ssa_lim ();
3201 if (!in_loop_pipeline
)
3202 loop_optimizer_finalize ();
3211 make_pass_lim (gcc::context
*ctxt
)
3213 return new pass_lim (ctxt
);