]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-im.c
Move operand_less_p to vr-values.c.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-im.c
1 /* Loop invariant motion.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "domwalk.h"
41 #include "tree-affine.h"
42 #include "tree-ssa-propagate.h"
43 #include "trans-mem.h"
44 #include "gimple-fold.h"
45 #include "tree-scalar-evolution.h"
46 #include "tree-ssa-loop-niter.h"
47 #include "alias.h"
48 #include "builtins.h"
49 #include "tree-dfa.h"
50 #include "dbgcnt.h"
51
52 /* TODO: Support for predicated code motion. I.e.
53
54 while (1)
55 {
56 if (cond)
57 {
58 a = inv;
59 something;
60 }
61 }
62
63 Where COND and INV are invariants, but evaluating INV may trap or be
64 invalid from some other reason if !COND. This may be transformed to
65
66 if (cond)
67 a = inv;
68 while (1)
69 {
70 if (cond)
71 something;
72 } */
73
74 /* The auxiliary data kept for each statement. */
75
76 struct lim_aux_data
77 {
78 class loop *max_loop; /* The outermost loop in that the statement
79 is invariant. */
80
81 class loop *tgt_loop; /* The loop out of that we want to move the
82 invariant. */
83
84 class loop *always_executed_in;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
88
89 unsigned cost; /* Cost of the computation performed by the
90 statement. */
91
92 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
93
94 vec<gimple *> depends; /* Vector of statements that must be also
95 hoisted out of the loop when this statement
96 is hoisted; i.e. those that define the
97 operands of the statement and are inside of
98 the MAX_LOOP loop. */
99 };
100
101 /* Maps statements to their lim_aux_data. */
102
103 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
104
105 /* Description of a memory reference location. */
106
107 struct mem_ref_loc
108 {
109 tree *ref; /* The reference itself. */
110 gimple *stmt; /* The statement in that it occurs. */
111 };
112
113
114 /* Description of a memory reference. */
115
116 class im_mem_ref
117 {
118 public:
119 unsigned id : 30; /* ID assigned to the memory reference
120 (its index in memory_accesses.refs_list) */
121 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
122 unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
123 hashval_t hash; /* Its hash value. */
124
125 /* The memory access itself and associated caching of alias-oracle
126 query meta-data. */
127 ao_ref mem;
128
129 bitmap stored; /* The set of loops in that this memory location
130 is stored to. */
131 bitmap loaded; /* The set of loops in that this memory location
132 is loaded from. */
133 vec<mem_ref_loc> accesses_in_loop;
134 /* The locations of the accesses. Vector
135 indexed by the loop number. */
136
137 /* The following set is computed on demand. */
138 bitmap_head dep_loop; /* The set of loops in that the memory
139 reference is {in,}dependent in
140 different modes. */
141 };
142
143 /* We use six bits per loop in the ref->dep_loop bitmap to record
144 the dep_kind x dep_state combinations. */
145
146 enum dep_kind { lim_raw, sm_war, sm_waw };
147 enum dep_state { dep_unknown, dep_independent, dep_dependent };
148
149 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
150
151 static void
152 record_loop_dependence (class loop *loop, im_mem_ref *ref,
153 dep_kind kind, dep_state state)
154 {
155 gcc_assert (state != dep_unknown);
156 unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
157 bitmap_set_bit (&ref->dep_loop, bit);
158 }
159
160 /* Query the loop dependence cache of REF for LOOP, KIND. */
161
162 static dep_state
163 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
164 {
165 unsigned first_bit = 6 * loop->num + kind * 2;
166 if (bitmap_bit_p (&ref->dep_loop, first_bit))
167 return dep_independent;
168 else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
169 return dep_dependent;
170 return dep_unknown;
171 }
172
173 /* Mem_ref hashtable helpers. */
174
175 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
176 {
177 typedef ao_ref *compare_type;
178 static inline hashval_t hash (const im_mem_ref *);
179 static inline bool equal (const im_mem_ref *, const ao_ref *);
180 };
181
182 /* A hash function for class im_mem_ref object OBJ. */
183
184 inline hashval_t
185 mem_ref_hasher::hash (const im_mem_ref *mem)
186 {
187 return mem->hash;
188 }
189
190 /* An equality function for class im_mem_ref object MEM1 with
191 memory reference OBJ2. */
192
193 inline bool
194 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
195 {
196 if (obj2->max_size_known_p ())
197 return (mem1->ref_decomposed
198 && operand_equal_p (mem1->mem.base, obj2->base, 0)
199 && known_eq (mem1->mem.offset, obj2->offset)
200 && known_eq (mem1->mem.size, obj2->size)
201 && known_eq (mem1->mem.max_size, obj2->max_size)
202 && mem1->mem.volatile_p == obj2->volatile_p
203 && (mem1->mem.ref_alias_set == obj2->ref_alias_set
204 /* We are not canonicalizing alias-sets but for the
205 special-case we didn't canonicalize yet and the
206 incoming ref is a alias-set zero MEM we pick
207 the correct one already. */
208 || (!mem1->ref_canonical
209 && (TREE_CODE (obj2->ref) == MEM_REF
210 || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
211 && obj2->ref_alias_set == 0)
212 /* Likewise if there's a canonical ref with alias-set zero. */
213 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
214 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
215 TREE_TYPE (obj2->ref)));
216 else
217 return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
218 }
219
220
221 /* Description of memory accesses in loops. */
222
223 static struct
224 {
225 /* The hash table of memory references accessed in loops. */
226 hash_table<mem_ref_hasher> *refs;
227
228 /* The list of memory references. */
229 vec<im_mem_ref *> refs_list;
230
231 /* The set of memory references accessed in each loop. */
232 vec<bitmap_head> refs_loaded_in_loop;
233
234 /* The set of memory references stored in each loop. */
235 vec<bitmap_head> refs_stored_in_loop;
236
237 /* The set of memory references stored in each loop, including subloops . */
238 vec<bitmap_head> all_refs_stored_in_loop;
239
240 /* Cache for expanding memory addresses. */
241 hash_map<tree, name_expansion *> *ttae_cache;
242 } memory_accesses;
243
244 /* Obstack for the bitmaps in the above data structures. */
245 static bitmap_obstack lim_bitmap_obstack;
246 static obstack mem_ref_obstack;
247
248 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
249 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
250 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
251
252 /* Minimum cost of an expensive expression. */
253 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
254
255 /* The outermost loop for which execution of the header guarantees that the
256 block will be executed. */
257 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
258 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
259
260 /* ID of the shared unanalyzable mem. */
261 #define UNANALYZABLE_MEM_ID 0
262
263 /* Whether the reference was analyzable. */
264 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
265
266 static struct lim_aux_data *
267 init_lim_data (gimple *stmt)
268 {
269 lim_aux_data *p = XCNEW (struct lim_aux_data);
270 lim_aux_data_map->put (stmt, p);
271
272 return p;
273 }
274
275 static struct lim_aux_data *
276 get_lim_data (gimple *stmt)
277 {
278 lim_aux_data **p = lim_aux_data_map->get (stmt);
279 if (!p)
280 return NULL;
281
282 return *p;
283 }
284
285 /* Releases the memory occupied by DATA. */
286
287 static void
288 free_lim_aux_data (struct lim_aux_data *data)
289 {
290 data->depends.release ();
291 free (data);
292 }
293
294 static void
295 clear_lim_data (gimple *stmt)
296 {
297 lim_aux_data **p = lim_aux_data_map->get (stmt);
298 if (!p)
299 return;
300
301 free_lim_aux_data (*p);
302 *p = NULL;
303 }
304
305
306 /* The possibilities of statement movement. */
307 enum move_pos
308 {
309 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
310 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
311 become executed -- memory accesses, ... */
312 MOVE_POSSIBLE /* Unlimited movement. */
313 };
314
315
316 /* If it is possible to hoist the statement STMT unconditionally,
317 returns MOVE_POSSIBLE.
318 If it is possible to hoist the statement STMT, but we must avoid making
319 it executed if it would not be executed in the original program (e.g.
320 because it may trap), return MOVE_PRESERVE_EXECUTION.
321 Otherwise return MOVE_IMPOSSIBLE. */
322
323 enum move_pos
324 movement_possibility (gimple *stmt)
325 {
326 tree lhs;
327 enum move_pos ret = MOVE_POSSIBLE;
328
329 if (flag_unswitch_loops
330 && gimple_code (stmt) == GIMPLE_COND)
331 {
332 /* If we perform unswitching, force the operands of the invariant
333 condition to be moved out of the loop. */
334 return MOVE_POSSIBLE;
335 }
336
337 if (gimple_code (stmt) == GIMPLE_PHI
338 && gimple_phi_num_args (stmt) <= 2
339 && !virtual_operand_p (gimple_phi_result (stmt))
340 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
341 return MOVE_POSSIBLE;
342
343 if (gimple_get_lhs (stmt) == NULL_TREE)
344 return MOVE_IMPOSSIBLE;
345
346 if (gimple_vdef (stmt))
347 return MOVE_IMPOSSIBLE;
348
349 if (stmt_ends_bb_p (stmt)
350 || gimple_has_volatile_ops (stmt)
351 || gimple_has_side_effects (stmt)
352 || stmt_could_throw_p (cfun, stmt))
353 return MOVE_IMPOSSIBLE;
354
355 if (is_gimple_call (stmt))
356 {
357 /* While pure or const call is guaranteed to have no side effects, we
358 cannot move it arbitrarily. Consider code like
359
360 char *s = something ();
361
362 while (1)
363 {
364 if (s)
365 t = strlen (s);
366 else
367 t = 0;
368 }
369
370 Here the strlen call cannot be moved out of the loop, even though
371 s is invariant. In addition to possibly creating a call with
372 invalid arguments, moving out a function call that is not executed
373 may cause performance regressions in case the call is costly and
374 not executed at all. */
375 ret = MOVE_PRESERVE_EXECUTION;
376 lhs = gimple_call_lhs (stmt);
377 }
378 else if (is_gimple_assign (stmt))
379 lhs = gimple_assign_lhs (stmt);
380 else
381 return MOVE_IMPOSSIBLE;
382
383 if (TREE_CODE (lhs) == SSA_NAME
384 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
385 return MOVE_IMPOSSIBLE;
386
387 if (TREE_CODE (lhs) != SSA_NAME
388 || gimple_could_trap_p (stmt))
389 return MOVE_PRESERVE_EXECUTION;
390
391 /* Non local loads in a transaction cannot be hoisted out. Well,
392 unless the load happens on every path out of the loop, but we
393 don't take this into account yet. */
394 if (flag_tm
395 && gimple_in_transaction (stmt)
396 && gimple_assign_single_p (stmt))
397 {
398 tree rhs = gimple_assign_rhs1 (stmt);
399 if (DECL_P (rhs) && is_global_var (rhs))
400 {
401 if (dump_file)
402 {
403 fprintf (dump_file, "Cannot hoist conditional load of ");
404 print_generic_expr (dump_file, rhs, TDF_SLIM);
405 fprintf (dump_file, " because it is in a transaction.\n");
406 }
407 return MOVE_IMPOSSIBLE;
408 }
409 }
410
411 return ret;
412 }
413
414 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
415 loop to that we could move the expression using DEF if it did not have
416 other operands, i.e. the outermost loop enclosing LOOP in that the value
417 of DEF is invariant. */
418
419 static class loop *
420 outermost_invariant_loop (tree def, class loop *loop)
421 {
422 gimple *def_stmt;
423 basic_block def_bb;
424 class loop *max_loop;
425 struct lim_aux_data *lim_data;
426
427 if (!def)
428 return superloop_at_depth (loop, 1);
429
430 if (TREE_CODE (def) != SSA_NAME)
431 {
432 gcc_assert (is_gimple_min_invariant (def));
433 return superloop_at_depth (loop, 1);
434 }
435
436 def_stmt = SSA_NAME_DEF_STMT (def);
437 def_bb = gimple_bb (def_stmt);
438 if (!def_bb)
439 return superloop_at_depth (loop, 1);
440
441 max_loop = find_common_loop (loop, def_bb->loop_father);
442
443 lim_data = get_lim_data (def_stmt);
444 if (lim_data != NULL && lim_data->max_loop != NULL)
445 max_loop = find_common_loop (max_loop,
446 loop_outer (lim_data->max_loop));
447 if (max_loop == loop)
448 return NULL;
449 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
450
451 return max_loop;
452 }
453
454 /* DATA is a structure containing information associated with a statement
455 inside LOOP. DEF is one of the operands of this statement.
456
457 Find the outermost loop enclosing LOOP in that value of DEF is invariant
458 and record this in DATA->max_loop field. If DEF itself is defined inside
459 this loop as well (i.e. we need to hoist it out of the loop if we want
460 to hoist the statement represented by DATA), record the statement in that
461 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
462 add the cost of the computation of DEF to the DATA->cost.
463
464 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
465
466 static bool
467 add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
468 bool add_cost)
469 {
470 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
471 basic_block def_bb = gimple_bb (def_stmt);
472 class loop *max_loop;
473 struct lim_aux_data *def_data;
474
475 if (!def_bb)
476 return true;
477
478 max_loop = outermost_invariant_loop (def, loop);
479 if (!max_loop)
480 return false;
481
482 if (flow_loop_nested_p (data->max_loop, max_loop))
483 data->max_loop = max_loop;
484
485 def_data = get_lim_data (def_stmt);
486 if (!def_data)
487 return true;
488
489 if (add_cost
490 /* Only add the cost if the statement defining DEF is inside LOOP,
491 i.e. if it is likely that by moving the invariants dependent
492 on it, we will be able to avoid creating a new register for
493 it (since it will be only used in these dependent invariants). */
494 && def_bb->loop_father == loop)
495 data->cost += def_data->cost;
496
497 data->depends.safe_push (def_stmt);
498
499 return true;
500 }
501
502 /* Returns an estimate for a cost of statement STMT. The values here
503 are just ad-hoc constants, similar to costs for inlining. */
504
505 static unsigned
506 stmt_cost (gimple *stmt)
507 {
508 /* Always try to create possibilities for unswitching. */
509 if (gimple_code (stmt) == GIMPLE_COND
510 || gimple_code (stmt) == GIMPLE_PHI)
511 return LIM_EXPENSIVE;
512
513 /* We should be hoisting calls if possible. */
514 if (is_gimple_call (stmt))
515 {
516 tree fndecl;
517
518 /* Unless the call is a builtin_constant_p; this always folds to a
519 constant, so moving it is useless. */
520 fndecl = gimple_call_fndecl (stmt);
521 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
522 return 0;
523
524 return LIM_EXPENSIVE;
525 }
526
527 /* Hoisting memory references out should almost surely be a win. */
528 if (gimple_references_memory_p (stmt))
529 return LIM_EXPENSIVE;
530
531 if (gimple_code (stmt) != GIMPLE_ASSIGN)
532 return 1;
533
534 switch (gimple_assign_rhs_code (stmt))
535 {
536 case MULT_EXPR:
537 case WIDEN_MULT_EXPR:
538 case WIDEN_MULT_PLUS_EXPR:
539 case WIDEN_MULT_MINUS_EXPR:
540 case DOT_PROD_EXPR:
541 case TRUNC_DIV_EXPR:
542 case CEIL_DIV_EXPR:
543 case FLOOR_DIV_EXPR:
544 case ROUND_DIV_EXPR:
545 case EXACT_DIV_EXPR:
546 case CEIL_MOD_EXPR:
547 case FLOOR_MOD_EXPR:
548 case ROUND_MOD_EXPR:
549 case TRUNC_MOD_EXPR:
550 case RDIV_EXPR:
551 /* Division and multiplication are usually expensive. */
552 return LIM_EXPENSIVE;
553
554 case LSHIFT_EXPR:
555 case RSHIFT_EXPR:
556 case WIDEN_LSHIFT_EXPR:
557 case LROTATE_EXPR:
558 case RROTATE_EXPR:
559 /* Shifts and rotates are usually expensive. */
560 return LIM_EXPENSIVE;
561
562 case CONSTRUCTOR:
563 /* Make vector construction cost proportional to the number
564 of elements. */
565 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
566
567 case SSA_NAME:
568 case PAREN_EXPR:
569 /* Whether or not something is wrapped inside a PAREN_EXPR
570 should not change move cost. Nor should an intermediate
571 unpropagated SSA name copy. */
572 return 0;
573
574 default:
575 return 1;
576 }
577 }
578
579 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
580 REF is independent. If REF is not independent in LOOP, NULL is returned
581 instead. */
582
583 static class loop *
584 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
585 {
586 class loop *aloop;
587
588 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
589 return NULL;
590
591 for (aloop = outer;
592 aloop != loop;
593 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
594 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
595 && ref_indep_loop_p (aloop, ref, lim_raw))
596 return aloop;
597
598 if (ref_indep_loop_p (loop, ref, lim_raw))
599 return loop;
600 else
601 return NULL;
602 }
603
604 /* If there is a simple load or store to a memory reference in STMT, returns
605 the location of the memory reference, and sets IS_STORE according to whether
606 it is a store or load. Otherwise, returns NULL. */
607
608 static tree *
609 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
610 {
611 tree *lhs, *rhs;
612
613 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
614 if (!gimple_assign_single_p (stmt))
615 return NULL;
616
617 lhs = gimple_assign_lhs_ptr (stmt);
618 rhs = gimple_assign_rhs1_ptr (stmt);
619
620 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
621 {
622 *is_store = false;
623 return rhs;
624 }
625 else if (gimple_vdef (stmt)
626 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
627 {
628 *is_store = true;
629 return lhs;
630 }
631 else
632 return NULL;
633 }
634
635 /* From a controlling predicate in DOM determine the arguments from
636 the PHI node PHI that are chosen if the predicate evaluates to
637 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
638 they are non-NULL. Returns true if the arguments can be determined,
639 else return false. */
640
641 static bool
642 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
643 tree *true_arg_p, tree *false_arg_p)
644 {
645 edge te, fe;
646 if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
647 &te, &fe))
648 return false;
649
650 if (true_arg_p)
651 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
652 if (false_arg_p)
653 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
654
655 return true;
656 }
657
658 /* Determine the outermost loop to that it is possible to hoist a statement
659 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
660 the outermost loop in that the value computed by STMT is invariant.
661 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
662 we preserve the fact whether STMT is executed. It also fills other related
663 information to LIM_DATA (STMT).
664
665 The function returns false if STMT cannot be hoisted outside of the loop it
666 is defined in, and true otherwise. */
667
668 static bool
669 determine_max_movement (gimple *stmt, bool must_preserve_exec)
670 {
671 basic_block bb = gimple_bb (stmt);
672 class loop *loop = bb->loop_father;
673 class loop *level;
674 struct lim_aux_data *lim_data = get_lim_data (stmt);
675 tree val;
676 ssa_op_iter iter;
677
678 if (must_preserve_exec)
679 level = ALWAYS_EXECUTED_IN (bb);
680 else
681 level = superloop_at_depth (loop, 1);
682 lim_data->max_loop = level;
683
684 if (gphi *phi = dyn_cast <gphi *> (stmt))
685 {
686 use_operand_p use_p;
687 unsigned min_cost = UINT_MAX;
688 unsigned total_cost = 0;
689 struct lim_aux_data *def_data;
690
691 /* We will end up promoting dependencies to be unconditionally
692 evaluated. For this reason the PHI cost (and thus the
693 cost we remove from the loop by doing the invariant motion)
694 is that of the cheapest PHI argument dependency chain. */
695 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
696 {
697 val = USE_FROM_PTR (use_p);
698
699 if (TREE_CODE (val) != SSA_NAME)
700 {
701 /* Assign const 1 to constants. */
702 min_cost = MIN (min_cost, 1);
703 total_cost += 1;
704 continue;
705 }
706 if (!add_dependency (val, lim_data, loop, false))
707 return false;
708
709 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
710 if (gimple_bb (def_stmt)
711 && gimple_bb (def_stmt)->loop_father == loop)
712 {
713 def_data = get_lim_data (def_stmt);
714 if (def_data)
715 {
716 min_cost = MIN (min_cost, def_data->cost);
717 total_cost += def_data->cost;
718 }
719 }
720 }
721
722 min_cost = MIN (min_cost, total_cost);
723 lim_data->cost += min_cost;
724
725 if (gimple_phi_num_args (phi) > 1)
726 {
727 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
728 gimple *cond;
729 if (gsi_end_p (gsi_last_bb (dom)))
730 return false;
731 cond = gsi_stmt (gsi_last_bb (dom));
732 if (gimple_code (cond) != GIMPLE_COND)
733 return false;
734 /* Verify that this is an extended form of a diamond and
735 the PHI arguments are completely controlled by the
736 predicate in DOM. */
737 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
738 return false;
739
740 /* Fold in dependencies and cost of the condition. */
741 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
742 {
743 if (!add_dependency (val, lim_data, loop, false))
744 return false;
745 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
746 if (def_data)
747 lim_data->cost += def_data->cost;
748 }
749
750 /* We want to avoid unconditionally executing very expensive
751 operations. As costs for our dependencies cannot be
752 negative just claim we are not invariand for this case.
753 We also are not sure whether the control-flow inside the
754 loop will vanish. */
755 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
756 && !(min_cost != 0
757 && total_cost / min_cost <= 2))
758 return false;
759
760 /* Assume that the control-flow in the loop will vanish.
761 ??? We should verify this and not artificially increase
762 the cost if that is not the case. */
763 lim_data->cost += stmt_cost (stmt);
764 }
765
766 return true;
767 }
768 else
769 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
770 if (!add_dependency (val, lim_data, loop, true))
771 return false;
772
773 if (gimple_vuse (stmt))
774 {
775 im_mem_ref *ref
776 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
777 if (ref
778 && MEM_ANALYZABLE (ref))
779 {
780 lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
781 loop, ref);
782 if (!lim_data->max_loop)
783 return false;
784 }
785 else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
786 return false;
787 }
788
789 lim_data->cost += stmt_cost (stmt);
790
791 return true;
792 }
793
794 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
795 and that one of the operands of this statement is computed by STMT.
796 Ensure that STMT (together with all the statements that define its
797 operands) is hoisted at least out of the loop LEVEL. */
798
799 static void
800 set_level (gimple *stmt, class loop *orig_loop, class loop *level)
801 {
802 class loop *stmt_loop = gimple_bb (stmt)->loop_father;
803 struct lim_aux_data *lim_data;
804 gimple *dep_stmt;
805 unsigned i;
806
807 stmt_loop = find_common_loop (orig_loop, stmt_loop);
808 lim_data = get_lim_data (stmt);
809 if (lim_data != NULL && lim_data->tgt_loop != NULL)
810 stmt_loop = find_common_loop (stmt_loop,
811 loop_outer (lim_data->tgt_loop));
812 if (flow_loop_nested_p (stmt_loop, level))
813 return;
814
815 gcc_assert (level == lim_data->max_loop
816 || flow_loop_nested_p (lim_data->max_loop, level));
817
818 lim_data->tgt_loop = level;
819 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
820 set_level (dep_stmt, orig_loop, level);
821 }
822
823 /* Determines an outermost loop from that we want to hoist the statement STMT.
824 For now we chose the outermost possible loop. TODO -- use profiling
825 information to set it more sanely. */
826
827 static void
828 set_profitable_level (gimple *stmt)
829 {
830 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
831 }
832
833 /* Returns true if STMT is a call that has side effects. */
834
835 static bool
836 nonpure_call_p (gimple *stmt)
837 {
838 if (gimple_code (stmt) != GIMPLE_CALL)
839 return false;
840
841 return gimple_has_side_effects (stmt);
842 }
843
844 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
845
846 static gimple *
847 rewrite_reciprocal (gimple_stmt_iterator *bsi)
848 {
849 gassign *stmt, *stmt1, *stmt2;
850 tree name, lhs, type;
851 tree real_one;
852 gimple_stmt_iterator gsi;
853
854 stmt = as_a <gassign *> (gsi_stmt (*bsi));
855 lhs = gimple_assign_lhs (stmt);
856 type = TREE_TYPE (lhs);
857
858 real_one = build_one_cst (type);
859
860 name = make_temp_ssa_name (type, NULL, "reciptmp");
861 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
862 gimple_assign_rhs2 (stmt));
863 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
864 gimple_assign_rhs1 (stmt));
865
866 /* Replace division stmt with reciprocal and multiply stmts.
867 The multiply stmt is not invariant, so update iterator
868 and avoid rescanning. */
869 gsi = *bsi;
870 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
871 gsi_replace (&gsi, stmt2, true);
872
873 /* Continue processing with invariant reciprocal statement. */
874 return stmt1;
875 }
876
877 /* Check if the pattern at *BSI is a bittest of the form
878 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
879
880 static gimple *
881 rewrite_bittest (gimple_stmt_iterator *bsi)
882 {
883 gassign *stmt;
884 gimple *stmt1;
885 gassign *stmt2;
886 gimple *use_stmt;
887 gcond *cond_stmt;
888 tree lhs, name, t, a, b;
889 use_operand_p use;
890
891 stmt = as_a <gassign *> (gsi_stmt (*bsi));
892 lhs = gimple_assign_lhs (stmt);
893
894 /* Verify that the single use of lhs is a comparison against zero. */
895 if (TREE_CODE (lhs) != SSA_NAME
896 || !single_imm_use (lhs, &use, &use_stmt))
897 return stmt;
898 cond_stmt = dyn_cast <gcond *> (use_stmt);
899 if (!cond_stmt)
900 return stmt;
901 if (gimple_cond_lhs (cond_stmt) != lhs
902 || (gimple_cond_code (cond_stmt) != NE_EXPR
903 && gimple_cond_code (cond_stmt) != EQ_EXPR)
904 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
905 return stmt;
906
907 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
908 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
909 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
910 return stmt;
911
912 /* There is a conversion in between possibly inserted by fold. */
913 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
914 {
915 t = gimple_assign_rhs1 (stmt1);
916 if (TREE_CODE (t) != SSA_NAME
917 || !has_single_use (t))
918 return stmt;
919 stmt1 = SSA_NAME_DEF_STMT (t);
920 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
921 return stmt;
922 }
923
924 /* Verify that B is loop invariant but A is not. Verify that with
925 all the stmt walking we are still in the same loop. */
926 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
927 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
928 return stmt;
929
930 a = gimple_assign_rhs1 (stmt1);
931 b = gimple_assign_rhs2 (stmt1);
932
933 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
934 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
935 {
936 gimple_stmt_iterator rsi;
937
938 /* 1 << B */
939 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
940 build_int_cst (TREE_TYPE (a), 1), b);
941 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
942 stmt1 = gimple_build_assign (name, t);
943
944 /* A & (1 << B) */
945 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
946 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
947 stmt2 = gimple_build_assign (name, t);
948
949 /* Replace the SSA_NAME we compare against zero. Adjust
950 the type of zero accordingly. */
951 SET_USE (use, name);
952 gimple_cond_set_rhs (cond_stmt,
953 build_int_cst_type (TREE_TYPE (name),
954 0));
955
956 /* Don't use gsi_replace here, none of the new assignments sets
957 the variable originally set in stmt. Move bsi to stmt1, and
958 then remove the original stmt, so that we get a chance to
959 retain debug info for it. */
960 rsi = *bsi;
961 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
962 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
963 gimple *to_release = gsi_stmt (rsi);
964 gsi_remove (&rsi, true);
965 release_defs (to_release);
966
967 return stmt1;
968 }
969
970 return stmt;
971 }
972
973 /* For each statement determines the outermost loop in that it is invariant,
974 - statements on whose motion it depends and the cost of the computation.
975 - This information is stored to the LIM_DATA structure associated with
976 - each statement. */
977 class invariantness_dom_walker : public dom_walker
978 {
979 public:
980 invariantness_dom_walker (cdi_direction direction)
981 : dom_walker (direction) {}
982
983 virtual edge before_dom_children (basic_block);
984 };
985
986 /* Determine the outermost loops in that statements in basic block BB are
987 invariant, and record them to the LIM_DATA associated with the statements.
988 Callback for dom_walker. */
989
990 edge
991 invariantness_dom_walker::before_dom_children (basic_block bb)
992 {
993 enum move_pos pos;
994 gimple_stmt_iterator bsi;
995 gimple *stmt;
996 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
997 class loop *outermost = ALWAYS_EXECUTED_IN (bb);
998 struct lim_aux_data *lim_data;
999
1000 if (!loop_outer (bb->loop_father))
1001 return NULL;
1002
1003 if (dump_file && (dump_flags & TDF_DETAILS))
1004 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1005 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1006
1007 /* Look at PHI nodes, but only if there is at most two.
1008 ??? We could relax this further by post-processing the inserted
1009 code and transforming adjacent cond-exprs with the same predicate
1010 to control flow again. */
1011 bsi = gsi_start_phis (bb);
1012 if (!gsi_end_p (bsi)
1013 && ((gsi_next (&bsi), gsi_end_p (bsi))
1014 || (gsi_next (&bsi), gsi_end_p (bsi))))
1015 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1016 {
1017 stmt = gsi_stmt (bsi);
1018
1019 pos = movement_possibility (stmt);
1020 if (pos == MOVE_IMPOSSIBLE)
1021 continue;
1022
1023 lim_data = get_lim_data (stmt);
1024 if (! lim_data)
1025 lim_data = init_lim_data (stmt);
1026 lim_data->always_executed_in = outermost;
1027
1028 if (!determine_max_movement (stmt, false))
1029 {
1030 lim_data->max_loop = NULL;
1031 continue;
1032 }
1033
1034 if (dump_file && (dump_flags & TDF_DETAILS))
1035 {
1036 print_gimple_stmt (dump_file, stmt, 2);
1037 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1038 loop_depth (lim_data->max_loop),
1039 lim_data->cost);
1040 }
1041
1042 if (lim_data->cost >= LIM_EXPENSIVE)
1043 set_profitable_level (stmt);
1044 }
1045
1046 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1047 {
1048 stmt = gsi_stmt (bsi);
1049
1050 pos = movement_possibility (stmt);
1051 if (pos == MOVE_IMPOSSIBLE)
1052 {
1053 if (nonpure_call_p (stmt))
1054 {
1055 maybe_never = true;
1056 outermost = NULL;
1057 }
1058 /* Make sure to note always_executed_in for stores to make
1059 store-motion work. */
1060 else if (stmt_makes_single_store (stmt))
1061 {
1062 struct lim_aux_data *lim_data = get_lim_data (stmt);
1063 if (! lim_data)
1064 lim_data = init_lim_data (stmt);
1065 lim_data->always_executed_in = outermost;
1066 }
1067 continue;
1068 }
1069
1070 if (is_gimple_assign (stmt)
1071 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1072 == GIMPLE_BINARY_RHS))
1073 {
1074 tree op0 = gimple_assign_rhs1 (stmt);
1075 tree op1 = gimple_assign_rhs2 (stmt);
1076 class loop *ol1 = outermost_invariant_loop (op1,
1077 loop_containing_stmt (stmt));
1078
1079 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1080 to be hoisted out of loop, saving expensive divide. */
1081 if (pos == MOVE_POSSIBLE
1082 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1083 && flag_unsafe_math_optimizations
1084 && !flag_trapping_math
1085 && ol1 != NULL
1086 && outermost_invariant_loop (op0, ol1) == NULL)
1087 stmt = rewrite_reciprocal (&bsi);
1088
1089 /* If the shift count is invariant, convert (A >> B) & 1 to
1090 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1091 saving an expensive shift. */
1092 if (pos == MOVE_POSSIBLE
1093 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1094 && integer_onep (op1)
1095 && TREE_CODE (op0) == SSA_NAME
1096 && has_single_use (op0))
1097 stmt = rewrite_bittest (&bsi);
1098 }
1099
1100 lim_data = get_lim_data (stmt);
1101 if (! lim_data)
1102 lim_data = init_lim_data (stmt);
1103 lim_data->always_executed_in = outermost;
1104
1105 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1106 continue;
1107
1108 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1109 {
1110 lim_data->max_loop = NULL;
1111 continue;
1112 }
1113
1114 if (dump_file && (dump_flags & TDF_DETAILS))
1115 {
1116 print_gimple_stmt (dump_file, stmt, 2);
1117 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1118 loop_depth (lim_data->max_loop),
1119 lim_data->cost);
1120 }
1121
1122 if (lim_data->cost >= LIM_EXPENSIVE)
1123 set_profitable_level (stmt);
1124 }
1125 return NULL;
1126 }
1127
1128 /* Hoist the statements in basic block BB out of the loops prescribed by
1129 data stored in LIM_DATA structures associated with each statement. Callback
1130 for walk_dominator_tree. */
1131
1132 unsigned int
1133 move_computations_worker (basic_block bb)
1134 {
1135 class loop *level;
1136 unsigned cost = 0;
1137 struct lim_aux_data *lim_data;
1138 unsigned int todo = 0;
1139
1140 if (!loop_outer (bb->loop_father))
1141 return todo;
1142
1143 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1144 {
1145 gassign *new_stmt;
1146 gphi *stmt = bsi.phi ();
1147
1148 lim_data = get_lim_data (stmt);
1149 if (lim_data == NULL)
1150 {
1151 gsi_next (&bsi);
1152 continue;
1153 }
1154
1155 cost = lim_data->cost;
1156 level = lim_data->tgt_loop;
1157 clear_lim_data (stmt);
1158
1159 if (!level)
1160 {
1161 gsi_next (&bsi);
1162 continue;
1163 }
1164
1165 if (dump_file && (dump_flags & TDF_DETAILS))
1166 {
1167 fprintf (dump_file, "Moving PHI node\n");
1168 print_gimple_stmt (dump_file, stmt, 0);
1169 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1170 cost, level->num);
1171 }
1172
1173 if (gimple_phi_num_args (stmt) == 1)
1174 {
1175 tree arg = PHI_ARG_DEF (stmt, 0);
1176 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1177 TREE_CODE (arg), arg);
1178 }
1179 else
1180 {
1181 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1182 gimple *cond = gsi_stmt (gsi_last_bb (dom));
1183 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1184 /* Get the PHI arguments corresponding to the true and false
1185 edges of COND. */
1186 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1187 gcc_assert (arg0 && arg1);
1188 t = build2 (gimple_cond_code (cond), boolean_type_node,
1189 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1190 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1191 COND_EXPR, t, arg0, arg1);
1192 todo |= TODO_cleanup_cfg;
1193 }
1194 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1195 && (!ALWAYS_EXECUTED_IN (bb)
1196 || (ALWAYS_EXECUTED_IN (bb) != level
1197 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1198 {
1199 tree lhs = gimple_assign_lhs (new_stmt);
1200 SSA_NAME_RANGE_INFO (lhs) = NULL;
1201 }
1202 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1203 remove_phi_node (&bsi, false);
1204 }
1205
1206 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1207 {
1208 edge e;
1209
1210 gimple *stmt = gsi_stmt (bsi);
1211
1212 lim_data = get_lim_data (stmt);
1213 if (lim_data == NULL)
1214 {
1215 gsi_next (&bsi);
1216 continue;
1217 }
1218
1219 cost = lim_data->cost;
1220 level = lim_data->tgt_loop;
1221 clear_lim_data (stmt);
1222
1223 if (!level)
1224 {
1225 gsi_next (&bsi);
1226 continue;
1227 }
1228
1229 /* We do not really want to move conditionals out of the loop; we just
1230 placed it here to force its operands to be moved if necessary. */
1231 if (gimple_code (stmt) == GIMPLE_COND)
1232 continue;
1233
1234 if (dump_file && (dump_flags & TDF_DETAILS))
1235 {
1236 fprintf (dump_file, "Moving statement\n");
1237 print_gimple_stmt (dump_file, stmt, 0);
1238 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1239 cost, level->num);
1240 }
1241
1242 e = loop_preheader_edge (level);
1243 gcc_assert (!gimple_vdef (stmt));
1244 if (gimple_vuse (stmt))
1245 {
1246 /* The new VUSE is the one from the virtual PHI in the loop
1247 header or the one already present. */
1248 gphi_iterator gsi2;
1249 for (gsi2 = gsi_start_phis (e->dest);
1250 !gsi_end_p (gsi2); gsi_next (&gsi2))
1251 {
1252 gphi *phi = gsi2.phi ();
1253 if (virtual_operand_p (gimple_phi_result (phi)))
1254 {
1255 SET_USE (gimple_vuse_op (stmt),
1256 PHI_ARG_DEF_FROM_EDGE (phi, e));
1257 break;
1258 }
1259 }
1260 }
1261 gsi_remove (&bsi, false);
1262 if (gimple_has_lhs (stmt)
1263 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1264 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1265 && (!ALWAYS_EXECUTED_IN (bb)
1266 || !(ALWAYS_EXECUTED_IN (bb) == level
1267 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1268 {
1269 tree lhs = gimple_get_lhs (stmt);
1270 SSA_NAME_RANGE_INFO (lhs) = NULL;
1271 }
1272 /* In case this is a stmt that is not unconditionally executed
1273 when the target loop header is executed and the stmt may
1274 invoke undefined integer or pointer overflow rewrite it to
1275 unsigned arithmetic. */
1276 if (is_gimple_assign (stmt)
1277 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1278 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1279 && arith_code_with_undefined_signed_overflow
1280 (gimple_assign_rhs_code (stmt))
1281 && (!ALWAYS_EXECUTED_IN (bb)
1282 || !(ALWAYS_EXECUTED_IN (bb) == level
1283 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1284 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1285 else
1286 gsi_insert_on_edge (e, stmt);
1287 }
1288
1289 return todo;
1290 }
1291
1292 /* Hoist the statements out of the loops prescribed by data stored in
1293 LIM_DATA structures associated with each statement.*/
1294
1295 static unsigned int
1296 move_computations (void)
1297 {
1298 int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
1299 int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
1300 unsigned todo = 0;
1301
1302 for (int i = 0; i < n; ++i)
1303 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun, rpo[i]));
1304
1305 free (rpo);
1306
1307 gsi_commit_edge_inserts ();
1308 if (need_ssa_update_p (cfun))
1309 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1310
1311 return todo;
1312 }
1313
1314 /* Checks whether the statement defining variable *INDEX can be hoisted
1315 out of the loop passed in DATA. Callback for for_each_index. */
1316
1317 static bool
1318 may_move_till (tree ref, tree *index, void *data)
1319 {
1320 class loop *loop = (class loop *) data, *max_loop;
1321
1322 /* If REF is an array reference, check also that the step and the lower
1323 bound is invariant in LOOP. */
1324 if (TREE_CODE (ref) == ARRAY_REF)
1325 {
1326 tree step = TREE_OPERAND (ref, 3);
1327 tree lbound = TREE_OPERAND (ref, 2);
1328
1329 max_loop = outermost_invariant_loop (step, loop);
1330 if (!max_loop)
1331 return false;
1332
1333 max_loop = outermost_invariant_loop (lbound, loop);
1334 if (!max_loop)
1335 return false;
1336 }
1337
1338 max_loop = outermost_invariant_loop (*index, loop);
1339 if (!max_loop)
1340 return false;
1341
1342 return true;
1343 }
1344
1345 /* If OP is SSA NAME, force the statement that defines it to be
1346 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1347
1348 static void
1349 force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1350 {
1351 gimple *stmt;
1352
1353 if (!op
1354 || is_gimple_min_invariant (op))
1355 return;
1356
1357 gcc_assert (TREE_CODE (op) == SSA_NAME);
1358
1359 stmt = SSA_NAME_DEF_STMT (op);
1360 if (gimple_nop_p (stmt))
1361 return;
1362
1363 set_level (stmt, orig_loop, loop);
1364 }
1365
1366 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1367 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1368 for_each_index. */
1369
1370 struct fmt_data
1371 {
1372 class loop *loop;
1373 class loop *orig_loop;
1374 };
1375
1376 static bool
1377 force_move_till (tree ref, tree *index, void *data)
1378 {
1379 struct fmt_data *fmt_data = (struct fmt_data *) data;
1380
1381 if (TREE_CODE (ref) == ARRAY_REF)
1382 {
1383 tree step = TREE_OPERAND (ref, 3);
1384 tree lbound = TREE_OPERAND (ref, 2);
1385
1386 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1387 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1388 }
1389
1390 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1391
1392 return true;
1393 }
1394
1395 /* A function to free the mem_ref object OBJ. */
1396
1397 static void
1398 memref_free (class im_mem_ref *mem)
1399 {
1400 mem->accesses_in_loop.release ();
1401 }
1402
1403 /* Allocates and returns a memory reference description for MEM whose hash
1404 value is HASH and id is ID. */
1405
1406 static im_mem_ref *
1407 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1408 {
1409 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1410 if (mem)
1411 ref->mem = *mem;
1412 else
1413 ao_ref_init (&ref->mem, error_mark_node);
1414 ref->id = id;
1415 ref->ref_canonical = false;
1416 ref->ref_decomposed = false;
1417 ref->hash = hash;
1418 ref->stored = NULL;
1419 ref->loaded = NULL;
1420 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1421 ref->accesses_in_loop.create (1);
1422
1423 return ref;
1424 }
1425
1426 /* Records memory reference location *LOC in LOOP to the memory reference
1427 description REF. The reference occurs in statement STMT. */
1428
1429 static void
1430 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1431 {
1432 mem_ref_loc aref;
1433 aref.stmt = stmt;
1434 aref.ref = loc;
1435 ref->accesses_in_loop.safe_push (aref);
1436 }
1437
1438 /* Set the LOOP bit in REF stored bitmap and allocate that if
1439 necessary. Return whether a bit was changed. */
1440
1441 static bool
1442 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1443 {
1444 if (!ref->stored)
1445 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1446 return bitmap_set_bit (ref->stored, loop->num);
1447 }
1448
1449 /* Marks reference REF as stored in LOOP. */
1450
1451 static void
1452 mark_ref_stored (im_mem_ref *ref, class loop *loop)
1453 {
1454 while (loop != current_loops->tree_root
1455 && set_ref_stored_in_loop (ref, loop))
1456 loop = loop_outer (loop);
1457 }
1458
1459 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1460 necessary. Return whether a bit was changed. */
1461
1462 static bool
1463 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1464 {
1465 if (!ref->loaded)
1466 ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1467 return bitmap_set_bit (ref->loaded, loop->num);
1468 }
1469
1470 /* Marks reference REF as loaded in LOOP. */
1471
1472 static void
1473 mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1474 {
1475 while (loop != current_loops->tree_root
1476 && set_ref_loaded_in_loop (ref, loop))
1477 loop = loop_outer (loop);
1478 }
1479
1480 /* Gathers memory references in statement STMT in LOOP, storing the
1481 information about them in the memory_accesses structure. Marks
1482 the vops accessed through unrecognized statements there as
1483 well. */
1484
1485 static void
1486 gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1487 {
1488 tree *mem = NULL;
1489 hashval_t hash;
1490 im_mem_ref **slot;
1491 im_mem_ref *ref;
1492 bool is_stored;
1493 unsigned id;
1494
1495 if (!gimple_vuse (stmt))
1496 return;
1497
1498 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1499 if (!mem)
1500 {
1501 /* We use the shared mem_ref for all unanalyzable refs. */
1502 id = UNANALYZABLE_MEM_ID;
1503 ref = memory_accesses.refs_list[id];
1504 if (dump_file && (dump_flags & TDF_DETAILS))
1505 {
1506 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1507 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1508 }
1509 is_stored = gimple_vdef (stmt);
1510 }
1511 else
1512 {
1513 /* We are looking for equal refs that might differ in structure
1514 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1515 make sure we can canonicalize the ref in the hashtable if
1516 non-operand_equal_p refs are found. For the lookup we mark
1517 the case we want strict equality with aor.max_size == -1. */
1518 ao_ref aor;
1519 ao_ref_init (&aor, *mem);
1520 ao_ref_base (&aor);
1521 ao_ref_alias_set (&aor);
1522 HOST_WIDE_INT offset, size, max_size;
1523 poly_int64 saved_maxsize = aor.max_size, mem_off;
1524 tree mem_base;
1525 bool ref_decomposed;
1526 if (aor.max_size_known_p ()
1527 && aor.offset.is_constant (&offset)
1528 && aor.size.is_constant (&size)
1529 && aor.max_size.is_constant (&max_size)
1530 && size == max_size
1531 && (size % BITS_PER_UNIT) == 0
1532 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1533 size. Make sure this is consistent with the extraction. */
1534 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1535 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1536 aor.size)
1537 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1538 {
1539 ref_decomposed = true;
1540 hash = iterative_hash_expr (ao_ref_base (&aor), 0);
1541 hash = iterative_hash_host_wide_int (offset, hash);
1542 hash = iterative_hash_host_wide_int (size, hash);
1543 }
1544 else
1545 {
1546 ref_decomposed = false;
1547 hash = iterative_hash_expr (aor.ref, 0);
1548 aor.max_size = -1;
1549 }
1550 slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1551 aor.max_size = saved_maxsize;
1552 if (*slot)
1553 {
1554 if (!(*slot)->ref_canonical
1555 && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1556 {
1557 /* If we didn't yet canonicalize the hashtable ref (which
1558 we'll end up using for code insertion) and hit a second
1559 equal ref that is not structurally equivalent create
1560 a canonical ref which is a bare MEM_REF. */
1561 if (TREE_CODE (*mem) == MEM_REF
1562 || TREE_CODE (*mem) == TARGET_MEM_REF)
1563 {
1564 (*slot)->mem.ref = *mem;
1565 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1566 }
1567 else
1568 {
1569 tree ref_alias_type = reference_alias_ptr_type (*mem);
1570 unsigned int ref_align = get_object_alignment (*mem);
1571 tree ref_type = TREE_TYPE (*mem);
1572 tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1573 unshare_expr (mem_base));
1574 if (TYPE_ALIGN (ref_type) != ref_align)
1575 ref_type = build_aligned_type (ref_type, ref_align);
1576 (*slot)->mem.ref
1577 = fold_build2 (MEM_REF, ref_type, tmp,
1578 build_int_cst (ref_alias_type, mem_off));
1579 if ((*slot)->mem.volatile_p)
1580 TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1581 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1582 && is_gimple_mem_ref_addr
1583 (TREE_OPERAND ((*slot)->mem.ref,
1584 0)));
1585 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1586 }
1587 (*slot)->ref_canonical = true;
1588 }
1589 ref = *slot;
1590 id = ref->id;
1591 }
1592 else
1593 {
1594 id = memory_accesses.refs_list.length ();
1595 ref = mem_ref_alloc (&aor, hash, id);
1596 ref->ref_decomposed = ref_decomposed;
1597 memory_accesses.refs_list.safe_push (ref);
1598 *slot = ref;
1599
1600 if (dump_file && (dump_flags & TDF_DETAILS))
1601 {
1602 fprintf (dump_file, "Memory reference %u: ", id);
1603 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1604 fprintf (dump_file, "\n");
1605 }
1606 }
1607
1608 record_mem_ref_loc (ref, stmt, mem);
1609 }
1610 if (is_stored)
1611 {
1612 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1613 mark_ref_stored (ref, loop);
1614 }
1615 /* A not simple memory op is also a read when it is a write. */
1616 if (!is_stored || id == UNANALYZABLE_MEM_ID)
1617 {
1618 bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1619 mark_ref_loaded (ref, loop);
1620 }
1621 init_lim_data (stmt)->ref = ref->id;
1622 return;
1623 }
1624
1625 static unsigned *bb_loop_postorder;
1626
1627 /* qsort sort function to sort blocks after their loop fathers postorder. */
1628
1629 static int
1630 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1631 void *bb_loop_postorder_)
1632 {
1633 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1634 basic_block bb1 = *(const basic_block *)bb1_;
1635 basic_block bb2 = *(const basic_block *)bb2_;
1636 class loop *loop1 = bb1->loop_father;
1637 class loop *loop2 = bb2->loop_father;
1638 if (loop1->num == loop2->num)
1639 return bb1->index - bb2->index;
1640 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1641 }
1642
1643 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1644
1645 static int
1646 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1647 void *bb_loop_postorder_)
1648 {
1649 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1650 const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1651 const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1652 class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1653 class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1654 if (loop1->num == loop2->num)
1655 return 0;
1656 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1657 }
1658
1659 /* Gathers memory references in loops. */
1660
1661 static void
1662 analyze_memory_references (void)
1663 {
1664 gimple_stmt_iterator bsi;
1665 basic_block bb, *bbs;
1666 class loop *loop, *outer;
1667 unsigned i, n;
1668
1669 /* Collect all basic-blocks in loops and sort them after their
1670 loops postorder. */
1671 i = 0;
1672 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1673 FOR_EACH_BB_FN (bb, cfun)
1674 if (bb->loop_father != current_loops->tree_root)
1675 bbs[i++] = bb;
1676 n = i;
1677 gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1678 bb_loop_postorder);
1679
1680 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1681 That results in better locality for all the bitmaps. */
1682 for (i = 0; i < n; ++i)
1683 {
1684 basic_block bb = bbs[i];
1685 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1686 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1687 }
1688
1689 /* Sort the location list of gathered memory references after their
1690 loop postorder number. */
1691 im_mem_ref *ref;
1692 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1693 ref->accesses_in_loop.sort (sort_locs_in_loop_postorder_cmp,
1694 bb_loop_postorder);
1695
1696 free (bbs);
1697
1698 /* Propagate the information about accessed memory references up
1699 the loop hierarchy. */
1700 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1701 {
1702 /* Finalize the overall touched references (including subloops). */
1703 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1704 &memory_accesses.refs_stored_in_loop[loop->num]);
1705
1706 /* Propagate the information about accessed memory references up
1707 the loop hierarchy. */
1708 outer = loop_outer (loop);
1709 if (outer == current_loops->tree_root)
1710 continue;
1711
1712 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1713 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1714 }
1715 }
1716
1717 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1718 tree_to_aff_combination_expand. */
1719
1720 static bool
1721 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1722 hash_map<tree, name_expansion *> **ttae_cache,
1723 bool tbaa_p)
1724 {
1725 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1726 object and their offset differ in such a way that the locations cannot
1727 overlap, then they cannot alias. */
1728 poly_widest_int size1, size2;
1729 aff_tree off1, off2;
1730
1731 /* Perform basic offset and type-based disambiguation. */
1732 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1733 return false;
1734
1735 /* The expansion of addresses may be a bit expensive, thus we only do
1736 the check at -O2 and higher optimization levels. */
1737 if (optimize < 2)
1738 return true;
1739
1740 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1741 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1742 aff_combination_expand (&off1, ttae_cache);
1743 aff_combination_expand (&off2, ttae_cache);
1744 aff_combination_scale (&off1, -1);
1745 aff_combination_add (&off2, &off1);
1746
1747 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1748 return false;
1749
1750 return true;
1751 }
1752
1753 /* Compare function for bsearch searching for reference locations
1754 in a loop. */
1755
1756 static int
1757 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1758 void *bb_loop_postorder_)
1759 {
1760 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1761 class loop *loop = (class loop *)const_cast<void *>(loop_);
1762 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1763 class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1764 if (loop->num == loc_loop->num
1765 || flow_loop_nested_p (loop, loc_loop))
1766 return 0;
1767 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1768 ? -1 : 1);
1769 }
1770
1771 /* Iterates over all locations of REF in LOOP and its subloops calling
1772 fn.operator() with the location as argument. When that operator
1773 returns true the iteration is stopped and true is returned.
1774 Otherwise false is returned. */
1775
1776 template <typename FN>
1777 static bool
1778 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1779 {
1780 unsigned i;
1781 mem_ref_loc *loc;
1782
1783 /* Search for the cluster of locs in the accesses_in_loop vector
1784 which is sorted after postorder index of the loop father. */
1785 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1786 bb_loop_postorder);
1787 if (!loc)
1788 return false;
1789
1790 /* We have found one location inside loop or its sub-loops. Iterate
1791 both forward and backward to cover the whole cluster. */
1792 i = loc - ref->accesses_in_loop.address ();
1793 while (i > 0)
1794 {
1795 --i;
1796 mem_ref_loc *l = &ref->accesses_in_loop[i];
1797 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1798 break;
1799 if (fn (l))
1800 return true;
1801 }
1802 for (i = loc - ref->accesses_in_loop.address ();
1803 i < ref->accesses_in_loop.length (); ++i)
1804 {
1805 mem_ref_loc *l = &ref->accesses_in_loop[i];
1806 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1807 break;
1808 if (fn (l))
1809 return true;
1810 }
1811
1812 return false;
1813 }
1814
1815 /* Rewrites location LOC by TMP_VAR. */
1816
1817 class rewrite_mem_ref_loc
1818 {
1819 public:
1820 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1821 bool operator () (mem_ref_loc *loc);
1822 tree tmp_var;
1823 };
1824
1825 bool
1826 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1827 {
1828 *loc->ref = tmp_var;
1829 update_stmt (loc->stmt);
1830 return false;
1831 }
1832
1833 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1834
1835 static void
1836 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
1837 {
1838 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1839 }
1840
1841 /* Stores the first reference location in LOCP. */
1842
1843 class first_mem_ref_loc_1
1844 {
1845 public:
1846 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1847 bool operator () (mem_ref_loc *loc);
1848 mem_ref_loc **locp;
1849 };
1850
1851 bool
1852 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1853 {
1854 *locp = loc;
1855 return true;
1856 }
1857
1858 /* Returns the first reference location to REF in LOOP. */
1859
1860 static mem_ref_loc *
1861 first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
1862 {
1863 mem_ref_loc *locp = NULL;
1864 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1865 return locp;
1866 }
1867
1868 struct prev_flag_edges {
1869 /* Edge to insert new flag comparison code. */
1870 edge append_cond_position;
1871
1872 /* Edge for fall through from previous flag comparison. */
1873 edge last_cond_fallthru;
1874 };
1875
1876 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1877 MEM along edge EX.
1878
1879 The store is only done if MEM has changed. We do this so no
1880 changes to MEM occur on code paths that did not originally store
1881 into it.
1882
1883 The common case for execute_sm will transform:
1884
1885 for (...) {
1886 if (foo)
1887 stuff;
1888 else
1889 MEM = TMP_VAR;
1890 }
1891
1892 into:
1893
1894 lsm = MEM;
1895 for (...) {
1896 if (foo)
1897 stuff;
1898 else
1899 lsm = TMP_VAR;
1900 }
1901 MEM = lsm;
1902
1903 This function will generate:
1904
1905 lsm = MEM;
1906
1907 lsm_flag = false;
1908 ...
1909 for (...) {
1910 if (foo)
1911 stuff;
1912 else {
1913 lsm = TMP_VAR;
1914 lsm_flag = true;
1915 }
1916 }
1917 if (lsm_flag) <--
1918 MEM = lsm; <--
1919 */
1920
1921 static void
1922 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1923 edge preheader, hash_set <basic_block> *flag_bbs)
1924 {
1925 basic_block new_bb, then_bb, old_dest;
1926 bool loop_has_only_one_exit;
1927 edge then_old_edge, orig_ex = ex;
1928 gimple_stmt_iterator gsi;
1929 gimple *stmt;
1930 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1931 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1932
1933 profile_count count_sum = profile_count::zero ();
1934 int nbbs = 0, ncount = 0;
1935 profile_probability flag_probability = profile_probability::uninitialized ();
1936
1937 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1938 at loop exit.
1939
1940 This code may look fancy, but it cannot update profile very realistically
1941 because we do not know the probability that flag will be true at given
1942 loop exit.
1943
1944 We look for two interesting extremes
1945 - when exit is dominated by block setting the flag, we know it will
1946 always be true. This is a common case.
1947 - when all blocks setting the flag have very low frequency we know
1948 it will likely be false.
1949 In all other cases we default to 2/3 for flag being true. */
1950
1951 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1952 it != flag_bbs->end (); ++it)
1953 {
1954 if ((*it)->count.initialized_p ())
1955 count_sum += (*it)->count, ncount ++;
1956 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1957 flag_probability = profile_probability::always ();
1958 nbbs++;
1959 }
1960
1961 profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1962
1963 if (flag_probability.initialized_p ())
1964 ;
1965 else if (ncount == nbbs
1966 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1967 {
1968 flag_probability = count_sum.probability_in (preheader->count ());
1969 if (flag_probability > cap)
1970 flag_probability = cap;
1971 }
1972
1973 if (!flag_probability.initialized_p ())
1974 flag_probability = cap;
1975
1976 /* ?? Insert store after previous store if applicable. See note
1977 below. */
1978 if (prev_edges)
1979 ex = prev_edges->append_cond_position;
1980
1981 loop_has_only_one_exit = single_pred_p (ex->dest);
1982
1983 if (loop_has_only_one_exit)
1984 ex = split_block_after_labels (ex->dest);
1985 else
1986 {
1987 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1988 !gsi_end_p (gpi); gsi_next (&gpi))
1989 {
1990 gphi *phi = gpi.phi ();
1991 if (virtual_operand_p (gimple_phi_result (phi)))
1992 continue;
1993
1994 /* When the destination has a non-virtual PHI node with multiple
1995 predecessors make sure we preserve the PHI structure by
1996 forcing a forwarder block so that hoisting of that PHI will
1997 still work. */
1998 split_edge (ex);
1999 break;
2000 }
2001 }
2002
2003 old_dest = ex->dest;
2004 new_bb = split_edge (ex);
2005 then_bb = create_empty_bb (new_bb);
2006 then_bb->count = new_bb->count.apply_probability (flag_probability);
2007 if (irr)
2008 then_bb->flags = BB_IRREDUCIBLE_LOOP;
2009 add_bb_to_loop (then_bb, new_bb->loop_father);
2010
2011 gsi = gsi_start_bb (new_bb);
2012 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2013 NULL_TREE, NULL_TREE);
2014 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2015
2016 gsi = gsi_start_bb (then_bb);
2017 /* Insert actual store. */
2018 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2019 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2020
2021 edge e1 = single_succ_edge (new_bb);
2022 edge e2 = make_edge (new_bb, then_bb,
2023 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2024 e2->probability = flag_probability;
2025
2026 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2027 e1->flags &= ~EDGE_FALLTHRU;
2028
2029 e1->probability = flag_probability.invert ();
2030
2031 then_old_edge = make_single_succ_edge (then_bb, old_dest,
2032 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2033
2034 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2035
2036 if (prev_edges)
2037 {
2038 basic_block prevbb = prev_edges->last_cond_fallthru->src;
2039 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
2040 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2041 set_immediate_dominator (CDI_DOMINATORS, old_dest,
2042 recompute_dominator (CDI_DOMINATORS, old_dest));
2043 }
2044
2045 /* ?? Because stores may alias, they must happen in the exact
2046 sequence they originally happened. Save the position right after
2047 the (_lsm) store we just created so we can continue appending after
2048 it and maintain the original order. */
2049 {
2050 struct prev_flag_edges *p;
2051
2052 if (orig_ex->aux)
2053 orig_ex->aux = NULL;
2054 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
2055 p = (struct prev_flag_edges *) orig_ex->aux;
2056 p->append_cond_position = then_old_edge;
2057 p->last_cond_fallthru = find_edge (new_bb, old_dest);
2058 orig_ex->aux = (void *) p;
2059 }
2060
2061 if (!loop_has_only_one_exit)
2062 for (gphi_iterator gpi = gsi_start_phis (old_dest);
2063 !gsi_end_p (gpi); gsi_next (&gpi))
2064 {
2065 gphi *phi = gpi.phi ();
2066 unsigned i;
2067
2068 for (i = 0; i < gimple_phi_num_args (phi); i++)
2069 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2070 {
2071 tree arg = gimple_phi_arg_def (phi, i);
2072 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2073 update_stmt (phi);
2074 }
2075 }
2076 }
2077
2078 /* When REF is set on the location, set flag indicating the store. */
2079
2080 class sm_set_flag_if_changed
2081 {
2082 public:
2083 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2084 : flag (flag_), bbs (bbs_) {}
2085 bool operator () (mem_ref_loc *loc);
2086 tree flag;
2087 hash_set <basic_block> *bbs;
2088 };
2089
2090 bool
2091 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2092 {
2093 /* Only set the flag for writes. */
2094 if (is_gimple_assign (loc->stmt)
2095 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2096 {
2097 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2098 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2099 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2100 bbs->add (gimple_bb (stmt));
2101 }
2102 return false;
2103 }
2104
2105 /* Helper function for execute_sm. On every location where REF is
2106 set, set an appropriate flag indicating the store. */
2107
2108 static tree
2109 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2110 hash_set <basic_block> *bbs)
2111 {
2112 tree flag;
2113 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2114 flag = create_tmp_reg (boolean_type_node, str);
2115 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2116 return flag;
2117 }
2118
2119 struct sm_aux
2120 {
2121 tree tmp_var;
2122 tree store_flag;
2123 hash_set <basic_block> flag_bbs;
2124 };
2125
2126 /* Executes store motion of memory reference REF from LOOP.
2127 Exits from the LOOP are stored in EXITS. The initialization of the
2128 temporary variable is put to the preheader of the loop, and assignments
2129 to the reference from the temporary variable are emitted to exits. */
2130
2131 static void
2132 execute_sm (class loop *loop, im_mem_ref *ref,
2133 hash_map<im_mem_ref *, sm_aux *> &aux_map)
2134 {
2135 gassign *load;
2136 struct fmt_data fmt_data;
2137 struct lim_aux_data *lim_data;
2138 bool multi_threaded_model_p = false;
2139 gimple_stmt_iterator gsi;
2140 sm_aux *aux = new sm_aux;
2141
2142 if (dump_file && (dump_flags & TDF_DETAILS))
2143 {
2144 fprintf (dump_file, "Executing store motion of ");
2145 print_generic_expr (dump_file, ref->mem.ref);
2146 fprintf (dump_file, " from loop %d\n", loop->num);
2147 }
2148
2149 aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2150 get_lsm_tmp_name (ref->mem.ref, ~0));
2151
2152 fmt_data.loop = loop;
2153 fmt_data.orig_loop = loop;
2154 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2155
2156 bool always_stored = ref_always_accessed_p (loop, ref, true);
2157 if (bb_in_transaction (loop_preheader_edge (loop)->src)
2158 || (! flag_store_data_races && ! always_stored))
2159 multi_threaded_model_p = true;
2160
2161 if (multi_threaded_model_p)
2162 aux->store_flag
2163 = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2164 else
2165 aux->store_flag = NULL_TREE;
2166
2167 /* Remember variable setup. */
2168 aux_map.put (ref, aux);
2169
2170 rewrite_mem_refs (loop, ref, aux->tmp_var);
2171
2172 /* Emit the load code on a random exit edge or into the latch if
2173 the loop does not exit, so that we are sure it will be processed
2174 by move_computations after all dependencies. */
2175 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2176
2177 /* Avoid doing a load if there was no load of the ref in the loop.
2178 Esp. when the ref is not always stored we cannot optimize it
2179 away later. But when it is not always stored we must use a conditional
2180 store then. */
2181 if ((!always_stored && !multi_threaded_model_p)
2182 || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2183 load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2184 else
2185 {
2186 /* If not emitting a load mark the uninitialized state on the
2187 loop entry as not to be warned for. */
2188 tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2189 TREE_NO_WARNING (uninit) = 1;
2190 load = gimple_build_assign (aux->tmp_var, uninit);
2191 }
2192 lim_data = init_lim_data (load);
2193 lim_data->max_loop = loop;
2194 lim_data->tgt_loop = loop;
2195 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2196
2197 if (multi_threaded_model_p)
2198 {
2199 load = gimple_build_assign (aux->store_flag, boolean_false_node);
2200 lim_data = init_lim_data (load);
2201 lim_data->max_loop = loop;
2202 lim_data->tgt_loop = loop;
2203 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2204 }
2205 }
2206
2207 /* sm_ord is used for ordinary stores we can retain order with respect
2208 to other stores
2209 sm_unord is used for conditional executed stores which need to be
2210 able to execute in arbitrary order with respect to other stores
2211 sm_other is used for stores we do not try to apply store motion to. */
2212 enum sm_kind { sm_ord, sm_unord, sm_other };
2213 struct seq_entry
2214 {
2215 seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2216 : first (f), second (k), from (fr) {}
2217 unsigned first;
2218 sm_kind second;
2219 tree from;
2220 };
2221
2222 static void
2223 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2224 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind)
2225 {
2226 /* Sink the stores to exit from the loop. */
2227 for (unsigned i = seq.length (); i > 0; --i)
2228 {
2229 im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2230 if (seq[i-1].second == sm_other)
2231 {
2232 gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2233 if (dump_file && (dump_flags & TDF_DETAILS))
2234 {
2235 fprintf (dump_file, "Re-issueing dependent store of ");
2236 print_generic_expr (dump_file, ref->mem.ref);
2237 fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2238 loop->num, ex->src->index, ex->dest->index);
2239 }
2240 gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
2241 seq[i-1].from);
2242 gsi_insert_on_edge (ex, store);
2243 }
2244 else
2245 {
2246 sm_aux *aux = *aux_map.get (ref);
2247 if (!aux->store_flag)
2248 {
2249 gassign *store;
2250 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2251 aux->tmp_var);
2252 gsi_insert_on_edge (ex, store);
2253 }
2254 else
2255 execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2256 aux->store_flag,
2257 loop_preheader_edge (loop), &aux->flag_bbs);
2258 }
2259 }
2260 }
2261
2262 /* Push the SM candidate at index PTR in the sequence SEQ down until
2263 we hit the next SM candidate. Return true if that went OK and
2264 false if we could not disambiguate agains another unrelated ref.
2265 Update *AT to the index where the candidate now resides. */
2266
2267 static bool
2268 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2269 {
2270 *at = ptr;
2271 for (; ptr > 0; --ptr)
2272 {
2273 seq_entry &new_cand = seq[ptr];
2274 seq_entry &against = seq[ptr-1];
2275 if (against.second == sm_ord
2276 || (against.second == sm_other && against.from != NULL_TREE))
2277 /* Found the tail of the sequence. */
2278 break;
2279 if (!refs_independent_p (memory_accesses.refs_list[new_cand.first],
2280 memory_accesses.refs_list[against.first],
2281 false))
2282 /* ??? Prune new_cand from the list of refs to apply SM to. */
2283 return false;
2284 std::swap (new_cand, against);
2285 *at = ptr - 1;
2286 }
2287 return true;
2288 }
2289
2290 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2291 walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2292
2293 static int
2294 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2295 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2296 bitmap refs_not_supported, bool forked)
2297 {
2298 if (!vdef)
2299 for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2300 gsi_prev (&gsi))
2301 {
2302 vdef = gimple_vdef (gsi_stmt (gsi));
2303 if (vdef)
2304 break;
2305 }
2306 if (!vdef)
2307 {
2308 gphi *vphi = get_virtual_phi (bb);
2309 if (vphi)
2310 vdef = gimple_phi_result (vphi);
2311 }
2312 if (!vdef)
2313 {
2314 if (single_pred_p (bb))
2315 /* This handles the perfect nest case. */
2316 return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2317 seq, refs_not_in_seq, refs_not_supported,
2318 forked);
2319 return 0;
2320 }
2321 do
2322 {
2323 gimple *def = SSA_NAME_DEF_STMT (vdef);
2324 if (gimple_bb (def) != bb)
2325 {
2326 /* If we forked by processing a PHI do not allow our walk to
2327 merge again until we handle that robustly. */
2328 if (forked)
2329 {
2330 /* Mark refs_not_in_seq as unsupported. */
2331 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2332 return 1;
2333 }
2334 /* Otherwise it doesn't really matter if we end up in different
2335 BBs. */
2336 bb = gimple_bb (def);
2337 }
2338 if (gphi *phi = dyn_cast <gphi *> (def))
2339 {
2340 /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2341 this is still linear.
2342 Eventually we want to cache intermediate results per BB
2343 (but we can't easily cache for different exits?). */
2344 /* Stop at PHIs with possible backedges. */
2345 if (bb == bb->loop_father->header
2346 || bb->flags & BB_IRREDUCIBLE_LOOP)
2347 {
2348 /* Mark refs_not_in_seq as unsupported. */
2349 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2350 return 1;
2351 }
2352 if (gimple_phi_num_args (phi) == 1)
2353 return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2354 gimple_phi_arg_def (phi, 0), seq,
2355 refs_not_in_seq, refs_not_supported,
2356 false);
2357 auto_vec<seq_entry> first_edge_seq;
2358 auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2359 int eret;
2360 bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2361 eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2362 gimple_phi_arg_def (phi, 0),
2363 first_edge_seq,
2364 tem_refs_not_in_seq, refs_not_supported,
2365 true);
2366 if (eret != 1)
2367 return -1;
2368 /* Simplify our lives by pruning the sequence of !sm_ord. */
2369 while (!first_edge_seq.is_empty ()
2370 && first_edge_seq.last ().second != sm_ord)
2371 first_edge_seq.pop ();
2372 for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2373 {
2374 tree vuse = gimple_phi_arg_def (phi, i);
2375 edge e = gimple_phi_arg_edge (phi, i);
2376 auto_vec<seq_entry> edge_seq;
2377 bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2378 eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2379 tem_refs_not_in_seq, refs_not_supported,
2380 true);
2381 if (eret != 1)
2382 return -1;
2383 /* Simplify our lives by pruning the sequence of !sm_ord. */
2384 while (!edge_seq.is_empty ()
2385 && edge_seq.last ().second != sm_ord)
2386 edge_seq.pop ();
2387 unsigned min_len = MIN(first_edge_seq.length (),
2388 edge_seq.length ());
2389 /* Incrementally merge seqs into first_edge_seq. */
2390 for (unsigned int i = 0; i < min_len; ++i)
2391 {
2392 /* ??? We can more intelligently merge when we face different
2393 order by additional sinking operations in one sequence.
2394 For now we simply mark them as to be processed by the
2395 not order-preserving SM code. */
2396 if (first_edge_seq[i].first != edge_seq[i].first)
2397 {
2398 if (first_edge_seq[i].second == sm_ord)
2399 bitmap_set_bit (refs_not_supported,
2400 first_edge_seq[i].first);
2401 if (edge_seq[i].second == sm_ord)
2402 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2403 first_edge_seq[i].second = sm_other;
2404 }
2405 /* sm_other prevails. */
2406 else if (first_edge_seq[i].second != edge_seq[i].second)
2407 {
2408 /* This is just an optimization. */
2409 gcc_assert (bitmap_bit_p (refs_not_supported,
2410 first_edge_seq[i].first));
2411 first_edge_seq[i].second = sm_other;
2412 }
2413 }
2414 /* Any excess elements become sm_other since they are now
2415 coonditionally executed. */
2416 if (first_edge_seq.length () > edge_seq.length ())
2417 {
2418 for (unsigned i = edge_seq.length ();
2419 i < first_edge_seq.length (); ++i)
2420 {
2421 if (first_edge_seq[i].second == sm_ord)
2422 bitmap_set_bit (refs_not_supported,
2423 first_edge_seq[i].first);
2424 first_edge_seq[i].second = sm_other;
2425 }
2426 }
2427 else if (edge_seq.length () > first_edge_seq.length ())
2428 {
2429 for (unsigned i = first_edge_seq.length ();
2430 i < edge_seq.length (); ++i)
2431 if (edge_seq[i].second == sm_ord)
2432 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2433 }
2434 }
2435 /* Use the sequence from the first edge and push SMs down. */
2436 for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2437 {
2438 if (first_edge_seq[i].second == sm_other)
2439 break;
2440 unsigned id = first_edge_seq[i].first;
2441 seq.safe_push (first_edge_seq[i]);
2442 unsigned new_idx;
2443 if (first_edge_seq[i].second == sm_ord
2444 && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2445 {
2446 bitmap_set_bit (refs_not_supported, id);
2447 /* Mark it sm_other. */
2448 seq[new_idx].second = sm_other;
2449 }
2450 }
2451 return 1;
2452 }
2453 lim_aux_data *data = get_lim_data (def);
2454 gcc_assert (data);
2455 if (data->ref == UNANALYZABLE_MEM_ID)
2456 return -1;
2457 /* One of the stores we want to apply SM to and we've not yet seen. */
2458 else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2459 {
2460 seq.safe_push (seq_entry (data->ref, sm_ord));
2461
2462 /* 1) push it down the queue until a SMed
2463 and not ignored ref is reached, skipping all not SMed refs
2464 and ignored refs via non-TBAA disambiguation. */
2465 unsigned new_idx;
2466 if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2467 /* If that fails but we did not fork yet continue, we'll see
2468 to re-materialize all of the stores in the sequence then.
2469 Further stores will only be pushed up to this one. */
2470 && forked)
2471 {
2472 bitmap_set_bit (refs_not_supported, data->ref);
2473 /* Mark it sm_other. */
2474 seq[new_idx].second = sm_other;
2475 }
2476
2477 /* 2) check whether we've seen all refs we want to SM and if so
2478 declare success for the active exit */
2479 if (bitmap_empty_p (refs_not_in_seq))
2480 return 1;
2481 }
2482 else
2483 /* Another store not part of the final sequence. Simply push it. */
2484 seq.safe_push (seq_entry (data->ref, sm_other,
2485 gimple_assign_rhs1 (def)));
2486
2487 vdef = gimple_vuse (def);
2488 }
2489 while (1);
2490 }
2491
2492 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2493 edges of the LOOP. */
2494
2495 static void
2496 hoist_memory_references (class loop *loop, bitmap mem_refs,
2497 vec<edge> exits)
2498 {
2499 im_mem_ref *ref;
2500 unsigned i;
2501 bitmap_iterator bi;
2502
2503 /* To address PR57359 before actually applying store-motion check
2504 the candidates found for validity with regards to reordering
2505 relative to other stores which we until here disambiguated using
2506 TBAA which isn't valid.
2507 What matters is the order of the last stores to the mem_refs
2508 with respect to the other stores of the loop at the point of the
2509 loop exits. */
2510
2511 /* For each exit compute the store order, pruning from mem_refs
2512 on the fly. */
2513 /* The complexity of this is at least
2514 O(number of exits * number of SM refs) but more approaching
2515 O(number of exits * number of SM refs * number of stores). */
2516 /* ??? Somehow do this in a single sweep over the loop body. */
2517 auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2518 auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2519 edge e;
2520 FOR_EACH_VEC_ELT (exits, i, e)
2521 {
2522 vec<seq_entry> seq;
2523 seq.create (4);
2524 auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2525 bitmap_copy (refs_not_in_seq, mem_refs);
2526 int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2527 seq, refs_not_in_seq,
2528 refs_not_supported, false);
2529 if (res != 1)
2530 {
2531 bitmap_copy (refs_not_supported, mem_refs);
2532 break;
2533 }
2534 sms.safe_push (std::make_pair (e, seq));
2535 }
2536
2537 /* Prune pruned mem_refs from earlier processed exits. */
2538 bool changed = !bitmap_empty_p (refs_not_supported);
2539 while (changed)
2540 {
2541 changed = false;
2542 std::pair<edge, vec<seq_entry> > *seq;
2543 FOR_EACH_VEC_ELT (sms, i, seq)
2544 {
2545 bool need_to_push = false;
2546 for (unsigned i = 0; i < seq->second.length (); ++i)
2547 {
2548 sm_kind kind = seq->second[i].second;
2549 if (kind == sm_other && seq->second[i].from == NULL_TREE)
2550 break;
2551 unsigned id = seq->second[i].first;
2552 unsigned new_idx;
2553 if (kind == sm_ord
2554 && bitmap_bit_p (refs_not_supported, id))
2555 {
2556 seq->second[i].second = sm_other;
2557 gcc_assert (seq->second[i].from == NULL_TREE);
2558 need_to_push = true;
2559 }
2560 else if (need_to_push
2561 && !sm_seq_push_down (seq->second, i, &new_idx))
2562 {
2563 /* We need to push down both sm_ord and sm_other
2564 but for the latter we need to disqualify all
2565 following refs. */
2566 if (kind == sm_ord)
2567 {
2568 if (bitmap_set_bit (refs_not_supported, id))
2569 changed = true;
2570 seq->second[new_idx].second = sm_other;
2571 }
2572 else
2573 {
2574 for (unsigned j = seq->second.length () - 1;
2575 j > new_idx; --j)
2576 if (seq->second[j].second == sm_ord
2577 && bitmap_set_bit (refs_not_supported,
2578 seq->second[j].first))
2579 changed = true;
2580 seq->second.truncate (new_idx);
2581 break;
2582 }
2583 }
2584 }
2585 }
2586 }
2587 std::pair<edge, vec<seq_entry> > *seq;
2588 FOR_EACH_VEC_ELT (sms, i, seq)
2589 {
2590 /* Prune sm_other from the end. */
2591 while (!seq->second.is_empty ()
2592 && seq->second.last ().second == sm_other)
2593 seq->second.pop ();
2594 /* Prune duplicates from the start. */
2595 auto_bitmap seen (&lim_bitmap_obstack);
2596 unsigned j, k;
2597 for (j = k = 0; j < seq->second.length (); ++j)
2598 if (bitmap_set_bit (seen, seq->second[j].first))
2599 {
2600 if (k != j)
2601 seq->second[k] = seq->second[j];
2602 ++k;
2603 }
2604 seq->second.truncate (k);
2605 /* And verify. */
2606 seq_entry *e;
2607 FOR_EACH_VEC_ELT (seq->second, j, e)
2608 gcc_assert (e->second == sm_ord
2609 || (e->second == sm_other && e->from != NULL_TREE));
2610 }
2611
2612 /* Verify dependence for refs we cannot handle with the order preserving
2613 code (refs_not_supported) or prune them from mem_refs. */
2614 auto_vec<seq_entry> unord_refs;
2615 EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
2616 {
2617 ref = memory_accesses.refs_list[i];
2618 if (!ref_indep_loop_p (loop, ref, sm_waw))
2619 bitmap_clear_bit (mem_refs, i);
2620 /* We've now verified store order for ref with respect to all other
2621 stores in the loop does not matter. */
2622 else
2623 unord_refs.safe_push (seq_entry (i, sm_unord));
2624 }
2625
2626 hash_map<im_mem_ref *, sm_aux *> aux_map;
2627
2628 /* Execute SM but delay the store materialization for ordered
2629 sequences on exit. */
2630 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2631 {
2632 ref = memory_accesses.refs_list[i];
2633 execute_sm (loop, ref, aux_map);
2634 }
2635
2636 /* Materialize ordered store sequences on exits. */
2637 FOR_EACH_VEC_ELT (exits, i, e)
2638 {
2639 if (i < sms.length ())
2640 {
2641 gcc_assert (sms[i].first == e);
2642 execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord);
2643 sms[i].second.release ();
2644 }
2645 if (!unord_refs.is_empty ())
2646 execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord);
2647 }
2648
2649 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2650 iter != aux_map.end (); ++iter)
2651 delete (*iter).second;
2652 }
2653
2654 class ref_always_accessed
2655 {
2656 public:
2657 ref_always_accessed (class loop *loop_, bool stored_p_)
2658 : loop (loop_), stored_p (stored_p_) {}
2659 bool operator () (mem_ref_loc *loc);
2660 class loop *loop;
2661 bool stored_p;
2662 };
2663
2664 bool
2665 ref_always_accessed::operator () (mem_ref_loc *loc)
2666 {
2667 class loop *must_exec;
2668
2669 struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2670 if (!lim_data)
2671 return false;
2672
2673 /* If we require an always executed store make sure the statement
2674 is a store. */
2675 if (stored_p)
2676 {
2677 tree lhs = gimple_get_lhs (loc->stmt);
2678 if (!lhs
2679 || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
2680 return false;
2681 }
2682
2683 must_exec = lim_data->always_executed_in;
2684 if (!must_exec)
2685 return false;
2686
2687 if (must_exec == loop
2688 || flow_loop_nested_p (must_exec, loop))
2689 return true;
2690
2691 return false;
2692 }
2693
2694 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2695 make sure REF is always stored to in LOOP. */
2696
2697 static bool
2698 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
2699 {
2700 return for_all_locs_in_loop (loop, ref,
2701 ref_always_accessed (loop, stored_p));
2702 }
2703
2704 /* Returns true if REF1 and REF2 are independent. */
2705
2706 static bool
2707 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
2708 {
2709 if (ref1 == ref2)
2710 return true;
2711
2712 if (dump_file && (dump_flags & TDF_DETAILS))
2713 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2714 ref1->id, ref2->id);
2715
2716 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
2717 {
2718 if (dump_file && (dump_flags & TDF_DETAILS))
2719 fprintf (dump_file, "dependent.\n");
2720 return false;
2721 }
2722 else
2723 {
2724 if (dump_file && (dump_flags & TDF_DETAILS))
2725 fprintf (dump_file, "independent.\n");
2726 return true;
2727 }
2728 }
2729
2730 /* Returns true if REF is independent on all other accessess in LOOP.
2731 KIND specifies the kind of dependence to consider.
2732 lim_raw assumes REF is not stored in LOOP and disambiguates RAW
2733 dependences so if true REF can be hoisted out of LOOP
2734 sm_war disambiguates a store REF against all other loads to see
2735 whether the store can be sunk across loads out of LOOP
2736 sm_waw disambiguates a store REF against all other stores to see
2737 whether the store can be sunk across stores out of LOOP. */
2738
2739 static bool
2740 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
2741 {
2742 bool indep_p = true;
2743 bitmap refs_to_check;
2744
2745 if (kind == sm_war)
2746 refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
2747 else
2748 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2749
2750 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2751 indep_p = false;
2752 else
2753 {
2754 /* tri-state, { unknown, independent, dependent } */
2755 dep_state state = query_loop_dependence (loop, ref, kind);
2756 if (state != dep_unknown)
2757 return state == dep_independent ? true : false;
2758
2759 class loop *inner = loop->inner;
2760 while (inner)
2761 {
2762 if (!ref_indep_loop_p (inner, ref, kind))
2763 {
2764 indep_p = false;
2765 break;
2766 }
2767 inner = inner->next;
2768 }
2769
2770 if (indep_p)
2771 {
2772 unsigned i;
2773 bitmap_iterator bi;
2774 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2775 {
2776 im_mem_ref *aref = memory_accesses.refs_list[i];
2777 if (!refs_independent_p (ref, aref, kind != sm_waw))
2778 {
2779 indep_p = false;
2780 break;
2781 }
2782 }
2783 }
2784 }
2785
2786 if (dump_file && (dump_flags & TDF_DETAILS))
2787 fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
2788 kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
2789 ref->id, loop->num, indep_p ? "independent" : "dependent");
2790
2791 /* Record the computed result in the cache. */
2792 record_loop_dependence (loop, ref, kind,
2793 indep_p ? dep_independent : dep_dependent);
2794
2795 return indep_p;
2796 }
2797
2798
2799 /* Returns true if we can perform store motion of REF from LOOP. */
2800
2801 static bool
2802 can_sm_ref_p (class loop *loop, im_mem_ref *ref)
2803 {
2804 tree base;
2805
2806 /* Can't hoist unanalyzable refs. */
2807 if (!MEM_ANALYZABLE (ref))
2808 return false;
2809
2810 /* It should be movable. */
2811 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2812 || TREE_THIS_VOLATILE (ref->mem.ref)
2813 || !for_each_index (&ref->mem.ref, may_move_till, loop))
2814 return false;
2815
2816 /* If it can throw fail, we do not properly update EH info. */
2817 if (tree_could_throw_p (ref->mem.ref))
2818 return false;
2819
2820 /* If it can trap, it must be always executed in LOOP.
2821 Readonly memory locations may trap when storing to them, but
2822 tree_could_trap_p is a predicate for rvalues, so check that
2823 explicitly. */
2824 base = get_base_address (ref->mem.ref);
2825 if ((tree_could_trap_p (ref->mem.ref)
2826 || (DECL_P (base) && TREE_READONLY (base)))
2827 /* ??? We can at least use false here, allowing loads? We
2828 are forcing conditional stores if the ref is not always
2829 stored to later anyway. So this would only guard
2830 the load we need to emit. Thus when the ref is not
2831 loaded we can elide this completely? */
2832 && !ref_always_accessed_p (loop, ref, true))
2833 return false;
2834
2835 /* Verify all loads of ref can be hoisted. */
2836 if (ref->loaded
2837 && bitmap_bit_p (ref->loaded, loop->num)
2838 && !ref_indep_loop_p (loop, ref, lim_raw))
2839 return false;
2840
2841 /* Verify the candidate can be disambiguated against all loads,
2842 that is, we can elide all in-loop stores. Disambiguation
2843 against stores is done later when we cannot guarantee preserving
2844 the order of stores. */
2845 if (!ref_indep_loop_p (loop, ref, sm_war))
2846 return false;
2847
2848 return true;
2849 }
2850
2851 /* Marks the references in LOOP for that store motion should be performed
2852 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2853 motion was performed in one of the outer loops. */
2854
2855 static void
2856 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2857 {
2858 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2859 unsigned i;
2860 bitmap_iterator bi;
2861 im_mem_ref *ref;
2862
2863 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2864 {
2865 ref = memory_accesses.refs_list[i];
2866 if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
2867 bitmap_set_bit (refs_to_sm, i);
2868 }
2869 }
2870
2871 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2872 for a store motion optimization (i.e. whether we can insert statement
2873 on its exits). */
2874
2875 static bool
2876 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
2877 vec<edge> exits)
2878 {
2879 unsigned i;
2880 edge ex;
2881
2882 FOR_EACH_VEC_ELT (exits, i, ex)
2883 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2884 return false;
2885
2886 return true;
2887 }
2888
2889 /* Try to perform store motion for all memory references modified inside
2890 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2891 store motion was executed in one of the outer loops. */
2892
2893 static void
2894 store_motion_loop (class loop *loop, bitmap sm_executed)
2895 {
2896 vec<edge> exits = get_loop_exit_edges (loop);
2897 class loop *subloop;
2898 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2899
2900 if (loop_suitable_for_sm (loop, exits))
2901 {
2902 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2903 if (!bitmap_empty_p (sm_in_loop))
2904 {
2905 hoist_memory_references (loop, sm_in_loop, exits);
2906 /* Commit edge inserts here to preserve the order of stores
2907 when an exit exits multiple loops. */
2908 gsi_commit_edge_inserts ();
2909 }
2910 }
2911 exits.release ();
2912
2913 bitmap_ior_into (sm_executed, sm_in_loop);
2914 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2915 store_motion_loop (subloop, sm_executed);
2916 bitmap_and_compl_into (sm_executed, sm_in_loop);
2917 BITMAP_FREE (sm_in_loop);
2918 }
2919
2920 /* Try to perform store motion for all memory references modified inside
2921 loops. */
2922
2923 static void
2924 do_store_motion (void)
2925 {
2926 class loop *loop;
2927 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2928
2929 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2930 store_motion_loop (loop, sm_executed);
2931
2932 BITMAP_FREE (sm_executed);
2933 }
2934
2935 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2936 for each such basic block bb records the outermost loop for that execution
2937 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2938 blocks that contain a nonpure call. */
2939
2940 static void
2941 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
2942 {
2943 basic_block bb = NULL, *bbs, last = NULL;
2944 unsigned i;
2945 edge e;
2946 class loop *inn_loop = loop;
2947
2948 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2949 {
2950 bbs = get_loop_body_in_dom_order (loop);
2951
2952 for (i = 0; i < loop->num_nodes; i++)
2953 {
2954 edge_iterator ei;
2955 bb = bbs[i];
2956
2957 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2958 last = bb;
2959
2960 if (bitmap_bit_p (contains_call, bb->index))
2961 break;
2962
2963 FOR_EACH_EDGE (e, ei, bb->succs)
2964 {
2965 /* If there is an exit from this BB. */
2966 if (!flow_bb_inside_loop_p (loop, e->dest))
2967 break;
2968 /* Or we enter a possibly non-finite loop. */
2969 if (flow_loop_nested_p (bb->loop_father,
2970 e->dest->loop_father)
2971 && ! finite_loop_p (e->dest->loop_father))
2972 break;
2973 }
2974 if (e)
2975 break;
2976
2977 /* A loop might be infinite (TODO use simple loop analysis
2978 to disprove this if possible). */
2979 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2980 break;
2981
2982 if (!flow_bb_inside_loop_p (inn_loop, bb))
2983 break;
2984
2985 if (bb->loop_father->header == bb)
2986 {
2987 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2988 break;
2989
2990 /* In a loop that is always entered we may proceed anyway.
2991 But record that we entered it and stop once we leave it. */
2992 inn_loop = bb->loop_father;
2993 }
2994 }
2995
2996 while (1)
2997 {
2998 SET_ALWAYS_EXECUTED_IN (last, loop);
2999 if (last == loop->header)
3000 break;
3001 last = get_immediate_dominator (CDI_DOMINATORS, last);
3002 }
3003
3004 free (bbs);
3005 }
3006
3007 for (loop = loop->inner; loop; loop = loop->next)
3008 fill_always_executed_in_1 (loop, contains_call);
3009 }
3010
3011 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3012 for each such basic block bb records the outermost loop for that execution
3013 of its header implies execution of bb. */
3014
3015 static void
3016 fill_always_executed_in (void)
3017 {
3018 basic_block bb;
3019 class loop *loop;
3020
3021 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3022 bitmap_clear (contains_call);
3023 FOR_EACH_BB_FN (bb, cfun)
3024 {
3025 gimple_stmt_iterator gsi;
3026 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3027 {
3028 if (nonpure_call_p (gsi_stmt (gsi)))
3029 break;
3030 }
3031
3032 if (!gsi_end_p (gsi))
3033 bitmap_set_bit (contains_call, bb->index);
3034 }
3035
3036 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3037 fill_always_executed_in_1 (loop, contains_call);
3038 }
3039
3040
3041 /* Compute the global information needed by the loop invariant motion pass. */
3042
3043 static void
3044 tree_ssa_lim_initialize (void)
3045 {
3046 class loop *loop;
3047 unsigned i;
3048
3049 bitmap_obstack_initialize (&lim_bitmap_obstack);
3050 gcc_obstack_init (&mem_ref_obstack);
3051 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3052
3053 if (flag_tm)
3054 compute_transaction_bits ();
3055
3056 alloc_aux_for_edges (0);
3057
3058 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3059 memory_accesses.refs_list.create (100);
3060 /* Allocate a special, unanalyzable mem-ref with ID zero. */
3061 memory_accesses.refs_list.quick_push
3062 (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3063
3064 memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3065 memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun));
3066 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3067 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3068 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3069 memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3070
3071 for (i = 0; i < number_of_loops (cfun); i++)
3072 {
3073 bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3074 &lim_bitmap_obstack);
3075 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3076 &lim_bitmap_obstack);
3077 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3078 &lim_bitmap_obstack);
3079 }
3080
3081 memory_accesses.ttae_cache = NULL;
3082
3083 /* Initialize bb_loop_postorder with a mapping from loop->num to
3084 its postorder index. */
3085 i = 0;
3086 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3087 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
3088 bb_loop_postorder[loop->num] = i++;
3089 }
3090
3091 /* Cleans up after the invariant motion pass. */
3092
3093 static void
3094 tree_ssa_lim_finalize (void)
3095 {
3096 basic_block bb;
3097 unsigned i;
3098 im_mem_ref *ref;
3099
3100 free_aux_for_edges ();
3101
3102 FOR_EACH_BB_FN (bb, cfun)
3103 SET_ALWAYS_EXECUTED_IN (bb, NULL);
3104
3105 bitmap_obstack_release (&lim_bitmap_obstack);
3106 delete lim_aux_data_map;
3107
3108 delete memory_accesses.refs;
3109 memory_accesses.refs = NULL;
3110
3111 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3112 memref_free (ref);
3113 memory_accesses.refs_list.release ();
3114 obstack_free (&mem_ref_obstack, NULL);
3115
3116 memory_accesses.refs_loaded_in_loop.release ();
3117 memory_accesses.refs_stored_in_loop.release ();
3118 memory_accesses.all_refs_stored_in_loop.release ();
3119
3120 if (memory_accesses.ttae_cache)
3121 free_affine_expand_cache (&memory_accesses.ttae_cache);
3122
3123 free (bb_loop_postorder);
3124 }
3125
3126 /* Moves invariants from loops. Only "expensive" invariants are moved out --
3127 i.e. those that are likely to be win regardless of the register pressure. */
3128
3129 static unsigned int
3130 tree_ssa_lim (void)
3131 {
3132 unsigned int todo;
3133
3134 tree_ssa_lim_initialize ();
3135
3136 /* Gathers information about memory accesses in the loops. */
3137 analyze_memory_references ();
3138
3139 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3140 fill_always_executed_in ();
3141
3142 /* For each statement determine the outermost loop in that it is
3143 invariant and cost for computing the invariant. */
3144 invariantness_dom_walker (CDI_DOMINATORS)
3145 .walk (cfun->cfg->x_entry_block_ptr);
3146
3147 /* Execute store motion. Force the necessary invariants to be moved
3148 out of the loops as well. */
3149 do_store_motion ();
3150
3151 /* Move the expressions that are expensive enough. */
3152 todo = move_computations ();
3153
3154 tree_ssa_lim_finalize ();
3155
3156 return todo;
3157 }
3158
3159 /* Loop invariant motion pass. */
3160
3161 namespace {
3162
3163 const pass_data pass_data_lim =
3164 {
3165 GIMPLE_PASS, /* type */
3166 "lim", /* name */
3167 OPTGROUP_LOOP, /* optinfo_flags */
3168 TV_LIM, /* tv_id */
3169 PROP_cfg, /* properties_required */
3170 0, /* properties_provided */
3171 0, /* properties_destroyed */
3172 0, /* todo_flags_start */
3173 0, /* todo_flags_finish */
3174 };
3175
3176 class pass_lim : public gimple_opt_pass
3177 {
3178 public:
3179 pass_lim (gcc::context *ctxt)
3180 : gimple_opt_pass (pass_data_lim, ctxt)
3181 {}
3182
3183 /* opt_pass methods: */
3184 opt_pass * clone () { return new pass_lim (m_ctxt); }
3185 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
3186 virtual unsigned int execute (function *);
3187
3188 }; // class pass_lim
3189
3190 unsigned int
3191 pass_lim::execute (function *fun)
3192 {
3193 bool in_loop_pipeline = scev_initialized_p ();
3194 if (!in_loop_pipeline)
3195 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3196
3197 if (number_of_loops (fun) <= 1)
3198 return 0;
3199 unsigned int todo = tree_ssa_lim ();
3200
3201 if (!in_loop_pipeline)
3202 loop_optimizer_finalize ();
3203 else
3204 scev_reset ();
3205 return todo;
3206 }
3207
3208 } // anon namespace
3209
3210 gimple_opt_pass *
3211 make_pass_lim (gcc::context *ctxt)
3212 {
3213 return new pass_lim (ctxt);
3214 }
3215
3216