]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-im.c
Correct a function pre/postcondition [PR102403].
[thirdparty/gcc.git] / gcc / tree-ssa-loop-im.c
1 /* Loop invariant motion.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "tree-affine.h"
41 #include "tree-ssa-propagate.h"
42 #include "trans-mem.h"
43 #include "gimple-fold.h"
44 #include "tree-scalar-evolution.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "alias.h"
47 #include "builtins.h"
48 #include "tree-dfa.h"
49 #include "dbgcnt.h"
50
51 /* TODO: Support for predicated code motion. I.e.
52
53 while (1)
54 {
55 if (cond)
56 {
57 a = inv;
58 something;
59 }
60 }
61
62 Where COND and INV are invariants, but evaluating INV may trap or be
63 invalid from some other reason if !COND. This may be transformed to
64
65 if (cond)
66 a = inv;
67 while (1)
68 {
69 if (cond)
70 something;
71 } */
72
73 /* The auxiliary data kept for each statement. */
74
75 struct lim_aux_data
76 {
77 class loop *max_loop; /* The outermost loop in that the statement
78 is invariant. */
79
80 class loop *tgt_loop; /* The loop out of that we want to move the
81 invariant. */
82
83 class loop *always_executed_in;
84 /* The outermost loop for that we are sure
85 the statement is executed if the loop
86 is entered. */
87
88 unsigned cost; /* Cost of the computation performed by the
89 statement. */
90
91 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
92
93 vec<gimple *> depends; /* Vector of statements that must be also
94 hoisted out of the loop when this statement
95 is hoisted; i.e. those that define the
96 operands of the statement and are inside of
97 the MAX_LOOP loop. */
98 };
99
100 /* Maps statements to their lim_aux_data. */
101
102 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
103
104 /* Description of a memory reference location. */
105
106 struct mem_ref_loc
107 {
108 tree *ref; /* The reference itself. */
109 gimple *stmt; /* The statement in that it occurs. */
110 };
111
112
113 /* Description of a memory reference. */
114
115 class im_mem_ref
116 {
117 public:
118 unsigned id : 30; /* ID assigned to the memory reference
119 (its index in memory_accesses.refs_list) */
120 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
121 unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
122 hashval_t hash; /* Its hash value. */
123
124 /* The memory access itself and associated caching of alias-oracle
125 query meta-data. We are using mem.ref == error_mark_node for the
126 case the reference is represented by its single access stmt
127 in accesses_in_loop[0]. */
128 ao_ref mem;
129
130 bitmap stored; /* The set of loops in that this memory location
131 is stored to. */
132 bitmap loaded; /* The set of loops in that this memory location
133 is loaded from. */
134 vec<mem_ref_loc> accesses_in_loop;
135 /* The locations of the accesses. */
136
137 /* The following set is computed on demand. */
138 bitmap_head dep_loop; /* The set of loops in that the memory
139 reference is {in,}dependent in
140 different modes. */
141 };
142
143 /* We use six bits per loop in the ref->dep_loop bitmap to record
144 the dep_kind x dep_state combinations. */
145
146 enum dep_kind { lim_raw, sm_war, sm_waw };
147 enum dep_state { dep_unknown, dep_independent, dep_dependent };
148
149 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
150
151 static void
152 record_loop_dependence (class loop *loop, im_mem_ref *ref,
153 dep_kind kind, dep_state state)
154 {
155 gcc_assert (state != dep_unknown);
156 unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
157 bitmap_set_bit (&ref->dep_loop, bit);
158 }
159
160 /* Query the loop dependence cache of REF for LOOP, KIND. */
161
162 static dep_state
163 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
164 {
165 unsigned first_bit = 6 * loop->num + kind * 2;
166 if (bitmap_bit_p (&ref->dep_loop, first_bit))
167 return dep_independent;
168 else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
169 return dep_dependent;
170 return dep_unknown;
171 }
172
173 /* Mem_ref hashtable helpers. */
174
175 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
176 {
177 typedef ao_ref *compare_type;
178 static inline hashval_t hash (const im_mem_ref *);
179 static inline bool equal (const im_mem_ref *, const ao_ref *);
180 };
181
182 /* A hash function for class im_mem_ref object OBJ. */
183
184 inline hashval_t
185 mem_ref_hasher::hash (const im_mem_ref *mem)
186 {
187 return mem->hash;
188 }
189
190 /* An equality function for class im_mem_ref object MEM1 with
191 memory reference OBJ2. */
192
193 inline bool
194 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
195 {
196 if (obj2->max_size_known_p ())
197 return (mem1->ref_decomposed
198 && ((TREE_CODE (mem1->mem.base) == MEM_REF
199 && TREE_CODE (obj2->base) == MEM_REF
200 && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
201 TREE_OPERAND (obj2->base, 0), 0)
202 && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
203 mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
204 || (operand_equal_p (mem1->mem.base, obj2->base, 0)
205 && known_eq (mem1->mem.offset, obj2->offset)))
206 && known_eq (mem1->mem.size, obj2->size)
207 && known_eq (mem1->mem.max_size, obj2->max_size)
208 && mem1->mem.volatile_p == obj2->volatile_p
209 && (mem1->mem.ref_alias_set == obj2->ref_alias_set
210 /* We are not canonicalizing alias-sets but for the
211 special-case we didn't canonicalize yet and the
212 incoming ref is a alias-set zero MEM we pick
213 the correct one already. */
214 || (!mem1->ref_canonical
215 && (TREE_CODE (obj2->ref) == MEM_REF
216 || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
217 && obj2->ref_alias_set == 0)
218 /* Likewise if there's a canonical ref with alias-set zero. */
219 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
220 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
221 TREE_TYPE (obj2->ref)));
222 else
223 return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
224 }
225
226
227 /* Description of memory accesses in loops. */
228
229 static struct
230 {
231 /* The hash table of memory references accessed in loops. */
232 hash_table<mem_ref_hasher> *refs;
233
234 /* The list of memory references. */
235 vec<im_mem_ref *> refs_list;
236
237 /* The set of memory references accessed in each loop. */
238 vec<bitmap_head> refs_loaded_in_loop;
239
240 /* The set of memory references stored in each loop. */
241 vec<bitmap_head> refs_stored_in_loop;
242
243 /* The set of memory references stored in each loop, including subloops . */
244 vec<bitmap_head> all_refs_stored_in_loop;
245
246 /* Cache for expanding memory addresses. */
247 hash_map<tree, name_expansion *> *ttae_cache;
248 } memory_accesses;
249
250 /* Obstack for the bitmaps in the above data structures. */
251 static bitmap_obstack lim_bitmap_obstack;
252 static obstack mem_ref_obstack;
253
254 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
255 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
256 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
257
258 /* Minimum cost of an expensive expression. */
259 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
260
261 /* The outermost loop for which execution of the header guarantees that the
262 block will be executed. */
263 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
264 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
265
266 /* ID of the shared unanalyzable mem. */
267 #define UNANALYZABLE_MEM_ID 0
268
269 /* Whether the reference was analyzable. */
270 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
271
272 static struct lim_aux_data *
273 init_lim_data (gimple *stmt)
274 {
275 lim_aux_data *p = XCNEW (struct lim_aux_data);
276 lim_aux_data_map->put (stmt, p);
277
278 return p;
279 }
280
281 static struct lim_aux_data *
282 get_lim_data (gimple *stmt)
283 {
284 lim_aux_data **p = lim_aux_data_map->get (stmt);
285 if (!p)
286 return NULL;
287
288 return *p;
289 }
290
291 /* Releases the memory occupied by DATA. */
292
293 static void
294 free_lim_aux_data (struct lim_aux_data *data)
295 {
296 data->depends.release ();
297 free (data);
298 }
299
300 static void
301 clear_lim_data (gimple *stmt)
302 {
303 lim_aux_data **p = lim_aux_data_map->get (stmt);
304 if (!p)
305 return;
306
307 free_lim_aux_data (*p);
308 *p = NULL;
309 }
310
311
312 /* The possibilities of statement movement. */
313 enum move_pos
314 {
315 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
316 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
317 become executed -- memory accesses, ... */
318 MOVE_POSSIBLE /* Unlimited movement. */
319 };
320
321
322 /* If it is possible to hoist the statement STMT unconditionally,
323 returns MOVE_POSSIBLE.
324 If it is possible to hoist the statement STMT, but we must avoid making
325 it executed if it would not be executed in the original program (e.g.
326 because it may trap), return MOVE_PRESERVE_EXECUTION.
327 Otherwise return MOVE_IMPOSSIBLE. */
328
329 enum move_pos
330 movement_possibility (gimple *stmt)
331 {
332 tree lhs;
333 enum move_pos ret = MOVE_POSSIBLE;
334
335 if (flag_unswitch_loops
336 && gimple_code (stmt) == GIMPLE_COND)
337 {
338 /* If we perform unswitching, force the operands of the invariant
339 condition to be moved out of the loop. */
340 return MOVE_POSSIBLE;
341 }
342
343 if (gimple_code (stmt) == GIMPLE_PHI
344 && gimple_phi_num_args (stmt) <= 2
345 && !virtual_operand_p (gimple_phi_result (stmt))
346 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
347 return MOVE_POSSIBLE;
348
349 if (gimple_get_lhs (stmt) == NULL_TREE)
350 return MOVE_IMPOSSIBLE;
351
352 if (gimple_vdef (stmt))
353 return MOVE_IMPOSSIBLE;
354
355 if (stmt_ends_bb_p (stmt)
356 || gimple_has_volatile_ops (stmt)
357 || gimple_has_side_effects (stmt)
358 || stmt_could_throw_p (cfun, stmt))
359 return MOVE_IMPOSSIBLE;
360
361 if (is_gimple_call (stmt))
362 {
363 /* While pure or const call is guaranteed to have no side effects, we
364 cannot move it arbitrarily. Consider code like
365
366 char *s = something ();
367
368 while (1)
369 {
370 if (s)
371 t = strlen (s);
372 else
373 t = 0;
374 }
375
376 Here the strlen call cannot be moved out of the loop, even though
377 s is invariant. In addition to possibly creating a call with
378 invalid arguments, moving out a function call that is not executed
379 may cause performance regressions in case the call is costly and
380 not executed at all. */
381 ret = MOVE_PRESERVE_EXECUTION;
382 lhs = gimple_call_lhs (stmt);
383 }
384 else if (is_gimple_assign (stmt))
385 lhs = gimple_assign_lhs (stmt);
386 else
387 return MOVE_IMPOSSIBLE;
388
389 if (TREE_CODE (lhs) == SSA_NAME
390 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
391 return MOVE_IMPOSSIBLE;
392
393 if (TREE_CODE (lhs) != SSA_NAME
394 || gimple_could_trap_p (stmt))
395 return MOVE_PRESERVE_EXECUTION;
396
397 /* Non local loads in a transaction cannot be hoisted out. Well,
398 unless the load happens on every path out of the loop, but we
399 don't take this into account yet. */
400 if (flag_tm
401 && gimple_in_transaction (stmt)
402 && gimple_assign_single_p (stmt))
403 {
404 tree rhs = gimple_assign_rhs1 (stmt);
405 if (DECL_P (rhs) && is_global_var (rhs))
406 {
407 if (dump_file)
408 {
409 fprintf (dump_file, "Cannot hoist conditional load of ");
410 print_generic_expr (dump_file, rhs, TDF_SLIM);
411 fprintf (dump_file, " because it is in a transaction.\n");
412 }
413 return MOVE_IMPOSSIBLE;
414 }
415 }
416
417 return ret;
418 }
419
420 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
421 loop to that we could move the expression using DEF if it did not have
422 other operands, i.e. the outermost loop enclosing LOOP in that the value
423 of DEF is invariant. */
424
425 static class loop *
426 outermost_invariant_loop (tree def, class loop *loop)
427 {
428 gimple *def_stmt;
429 basic_block def_bb;
430 class loop *max_loop;
431 struct lim_aux_data *lim_data;
432
433 if (!def)
434 return superloop_at_depth (loop, 1);
435
436 if (TREE_CODE (def) != SSA_NAME)
437 {
438 gcc_assert (is_gimple_min_invariant (def));
439 return superloop_at_depth (loop, 1);
440 }
441
442 def_stmt = SSA_NAME_DEF_STMT (def);
443 def_bb = gimple_bb (def_stmt);
444 if (!def_bb)
445 return superloop_at_depth (loop, 1);
446
447 max_loop = find_common_loop (loop, def_bb->loop_father);
448
449 lim_data = get_lim_data (def_stmt);
450 if (lim_data != NULL && lim_data->max_loop != NULL)
451 max_loop = find_common_loop (max_loop,
452 loop_outer (lim_data->max_loop));
453 if (max_loop == loop)
454 return NULL;
455 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
456
457 return max_loop;
458 }
459
460 /* DATA is a structure containing information associated with a statement
461 inside LOOP. DEF is one of the operands of this statement.
462
463 Find the outermost loop enclosing LOOP in that value of DEF is invariant
464 and record this in DATA->max_loop field. If DEF itself is defined inside
465 this loop as well (i.e. we need to hoist it out of the loop if we want
466 to hoist the statement represented by DATA), record the statement in that
467 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
468 add the cost of the computation of DEF to the DATA->cost.
469
470 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
471
472 static bool
473 add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
474 bool add_cost)
475 {
476 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
477 basic_block def_bb = gimple_bb (def_stmt);
478 class loop *max_loop;
479 struct lim_aux_data *def_data;
480
481 if (!def_bb)
482 return true;
483
484 max_loop = outermost_invariant_loop (def, loop);
485 if (!max_loop)
486 return false;
487
488 if (flow_loop_nested_p (data->max_loop, max_loop))
489 data->max_loop = max_loop;
490
491 def_data = get_lim_data (def_stmt);
492 if (!def_data)
493 return true;
494
495 if (add_cost
496 /* Only add the cost if the statement defining DEF is inside LOOP,
497 i.e. if it is likely that by moving the invariants dependent
498 on it, we will be able to avoid creating a new register for
499 it (since it will be only used in these dependent invariants). */
500 && def_bb->loop_father == loop)
501 data->cost += def_data->cost;
502
503 data->depends.safe_push (def_stmt);
504
505 return true;
506 }
507
508 /* Returns an estimate for a cost of statement STMT. The values here
509 are just ad-hoc constants, similar to costs for inlining. */
510
511 static unsigned
512 stmt_cost (gimple *stmt)
513 {
514 /* Always try to create possibilities for unswitching. */
515 if (gimple_code (stmt) == GIMPLE_COND
516 || gimple_code (stmt) == GIMPLE_PHI)
517 return LIM_EXPENSIVE;
518
519 /* We should be hoisting calls if possible. */
520 if (is_gimple_call (stmt))
521 {
522 tree fndecl;
523
524 /* Unless the call is a builtin_constant_p; this always folds to a
525 constant, so moving it is useless. */
526 fndecl = gimple_call_fndecl (stmt);
527 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
528 return 0;
529
530 return LIM_EXPENSIVE;
531 }
532
533 /* Hoisting memory references out should almost surely be a win. */
534 if (gimple_references_memory_p (stmt))
535 return LIM_EXPENSIVE;
536
537 if (gimple_code (stmt) != GIMPLE_ASSIGN)
538 return 1;
539
540 switch (gimple_assign_rhs_code (stmt))
541 {
542 case MULT_EXPR:
543 case WIDEN_MULT_EXPR:
544 case WIDEN_MULT_PLUS_EXPR:
545 case WIDEN_MULT_MINUS_EXPR:
546 case DOT_PROD_EXPR:
547 case TRUNC_DIV_EXPR:
548 case CEIL_DIV_EXPR:
549 case FLOOR_DIV_EXPR:
550 case ROUND_DIV_EXPR:
551 case EXACT_DIV_EXPR:
552 case CEIL_MOD_EXPR:
553 case FLOOR_MOD_EXPR:
554 case ROUND_MOD_EXPR:
555 case TRUNC_MOD_EXPR:
556 case RDIV_EXPR:
557 /* Division and multiplication are usually expensive. */
558 return LIM_EXPENSIVE;
559
560 case LSHIFT_EXPR:
561 case RSHIFT_EXPR:
562 case WIDEN_LSHIFT_EXPR:
563 case LROTATE_EXPR:
564 case RROTATE_EXPR:
565 /* Shifts and rotates are usually expensive. */
566 return LIM_EXPENSIVE;
567
568 case CONSTRUCTOR:
569 /* Make vector construction cost proportional to the number
570 of elements. */
571 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
572
573 case SSA_NAME:
574 case PAREN_EXPR:
575 /* Whether or not something is wrapped inside a PAREN_EXPR
576 should not change move cost. Nor should an intermediate
577 unpropagated SSA name copy. */
578 return 0;
579
580 default:
581 return 1;
582 }
583 }
584
585 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
586 REF is independent. If REF is not independent in LOOP, NULL is returned
587 instead. */
588
589 static class loop *
590 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
591 {
592 class loop *aloop;
593
594 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
595 return NULL;
596
597 for (aloop = outer;
598 aloop != loop;
599 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
600 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
601 && ref_indep_loop_p (aloop, ref, lim_raw))
602 return aloop;
603
604 if (ref_indep_loop_p (loop, ref, lim_raw))
605 return loop;
606 else
607 return NULL;
608 }
609
610 /* If there is a simple load or store to a memory reference in STMT, returns
611 the location of the memory reference, and sets IS_STORE according to whether
612 it is a store or load. Otherwise, returns NULL. */
613
614 static tree *
615 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
616 {
617 tree *lhs, *rhs;
618
619 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
620 if (!gimple_assign_single_p (stmt))
621 return NULL;
622
623 lhs = gimple_assign_lhs_ptr (stmt);
624 rhs = gimple_assign_rhs1_ptr (stmt);
625
626 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
627 {
628 *is_store = false;
629 return rhs;
630 }
631 else if (gimple_vdef (stmt)
632 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
633 {
634 *is_store = true;
635 return lhs;
636 }
637 else
638 return NULL;
639 }
640
641 /* From a controlling predicate in DOM determine the arguments from
642 the PHI node PHI that are chosen if the predicate evaluates to
643 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
644 they are non-NULL. Returns true if the arguments can be determined,
645 else return false. */
646
647 static bool
648 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
649 tree *true_arg_p, tree *false_arg_p)
650 {
651 edge te, fe;
652 if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
653 &te, &fe))
654 return false;
655
656 if (true_arg_p)
657 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
658 if (false_arg_p)
659 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
660
661 return true;
662 }
663
664 /* Determine the outermost loop to that it is possible to hoist a statement
665 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
666 the outermost loop in that the value computed by STMT is invariant.
667 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
668 we preserve the fact whether STMT is executed. It also fills other related
669 information to LIM_DATA (STMT).
670
671 The function returns false if STMT cannot be hoisted outside of the loop it
672 is defined in, and true otherwise. */
673
674 static bool
675 determine_max_movement (gimple *stmt, bool must_preserve_exec)
676 {
677 basic_block bb = gimple_bb (stmt);
678 class loop *loop = bb->loop_father;
679 class loop *level;
680 struct lim_aux_data *lim_data = get_lim_data (stmt);
681 tree val;
682 ssa_op_iter iter;
683
684 if (must_preserve_exec)
685 level = ALWAYS_EXECUTED_IN (bb);
686 else
687 level = superloop_at_depth (loop, 1);
688 lim_data->max_loop = level;
689
690 if (gphi *phi = dyn_cast <gphi *> (stmt))
691 {
692 use_operand_p use_p;
693 unsigned min_cost = UINT_MAX;
694 unsigned total_cost = 0;
695 struct lim_aux_data *def_data;
696
697 /* We will end up promoting dependencies to be unconditionally
698 evaluated. For this reason the PHI cost (and thus the
699 cost we remove from the loop by doing the invariant motion)
700 is that of the cheapest PHI argument dependency chain. */
701 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
702 {
703 val = USE_FROM_PTR (use_p);
704
705 if (TREE_CODE (val) != SSA_NAME)
706 {
707 /* Assign const 1 to constants. */
708 min_cost = MIN (min_cost, 1);
709 total_cost += 1;
710 continue;
711 }
712 if (!add_dependency (val, lim_data, loop, false))
713 return false;
714
715 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
716 if (gimple_bb (def_stmt)
717 && gimple_bb (def_stmt)->loop_father == loop)
718 {
719 def_data = get_lim_data (def_stmt);
720 if (def_data)
721 {
722 min_cost = MIN (min_cost, def_data->cost);
723 total_cost += def_data->cost;
724 }
725 }
726 }
727
728 min_cost = MIN (min_cost, total_cost);
729 lim_data->cost += min_cost;
730
731 if (gimple_phi_num_args (phi) > 1)
732 {
733 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
734 gimple *cond;
735 if (gsi_end_p (gsi_last_bb (dom)))
736 return false;
737 cond = gsi_stmt (gsi_last_bb (dom));
738 if (gimple_code (cond) != GIMPLE_COND)
739 return false;
740 /* Verify that this is an extended form of a diamond and
741 the PHI arguments are completely controlled by the
742 predicate in DOM. */
743 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
744 return false;
745
746 /* Fold in dependencies and cost of the condition. */
747 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
748 {
749 if (!add_dependency (val, lim_data, loop, false))
750 return false;
751 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
752 if (def_data)
753 lim_data->cost += def_data->cost;
754 }
755
756 /* We want to avoid unconditionally executing very expensive
757 operations. As costs for our dependencies cannot be
758 negative just claim we are not invariand for this case.
759 We also are not sure whether the control-flow inside the
760 loop will vanish. */
761 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
762 && !(min_cost != 0
763 && total_cost / min_cost <= 2))
764 return false;
765
766 /* Assume that the control-flow in the loop will vanish.
767 ??? We should verify this and not artificially increase
768 the cost if that is not the case. */
769 lim_data->cost += stmt_cost (stmt);
770 }
771
772 return true;
773 }
774 else
775 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
776 if (!add_dependency (val, lim_data, loop, true))
777 return false;
778
779 if (gimple_vuse (stmt))
780 {
781 im_mem_ref *ref
782 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
783 if (ref
784 && MEM_ANALYZABLE (ref))
785 {
786 lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
787 loop, ref);
788 if (!lim_data->max_loop)
789 return false;
790 }
791 else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
792 return false;
793 }
794
795 lim_data->cost += stmt_cost (stmt);
796
797 return true;
798 }
799
800 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
801 and that one of the operands of this statement is computed by STMT.
802 Ensure that STMT (together with all the statements that define its
803 operands) is hoisted at least out of the loop LEVEL. */
804
805 static void
806 set_level (gimple *stmt, class loop *orig_loop, class loop *level)
807 {
808 class loop *stmt_loop = gimple_bb (stmt)->loop_father;
809 struct lim_aux_data *lim_data;
810 gimple *dep_stmt;
811 unsigned i;
812
813 stmt_loop = find_common_loop (orig_loop, stmt_loop);
814 lim_data = get_lim_data (stmt);
815 if (lim_data != NULL && lim_data->tgt_loop != NULL)
816 stmt_loop = find_common_loop (stmt_loop,
817 loop_outer (lim_data->tgt_loop));
818 if (flow_loop_nested_p (stmt_loop, level))
819 return;
820
821 gcc_assert (level == lim_data->max_loop
822 || flow_loop_nested_p (lim_data->max_loop, level));
823
824 lim_data->tgt_loop = level;
825 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
826 set_level (dep_stmt, orig_loop, level);
827 }
828
829 /* Determines an outermost loop from that we want to hoist the statement STMT.
830 For now we chose the outermost possible loop. TODO -- use profiling
831 information to set it more sanely. */
832
833 static void
834 set_profitable_level (gimple *stmt)
835 {
836 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
837 }
838
839 /* Returns true if STMT is a call that has side effects. */
840
841 static bool
842 nonpure_call_p (gimple *stmt)
843 {
844 if (gimple_code (stmt) != GIMPLE_CALL)
845 return false;
846
847 return gimple_has_side_effects (stmt);
848 }
849
850 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
851
852 static gimple *
853 rewrite_reciprocal (gimple_stmt_iterator *bsi)
854 {
855 gassign *stmt, *stmt1, *stmt2;
856 tree name, lhs, type;
857 tree real_one;
858 gimple_stmt_iterator gsi;
859
860 stmt = as_a <gassign *> (gsi_stmt (*bsi));
861 lhs = gimple_assign_lhs (stmt);
862 type = TREE_TYPE (lhs);
863
864 real_one = build_one_cst (type);
865
866 name = make_temp_ssa_name (type, NULL, "reciptmp");
867 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
868 gimple_assign_rhs2 (stmt));
869 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
870 gimple_assign_rhs1 (stmt));
871
872 /* Replace division stmt with reciprocal and multiply stmts.
873 The multiply stmt is not invariant, so update iterator
874 and avoid rescanning. */
875 gsi = *bsi;
876 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
877 gsi_replace (&gsi, stmt2, true);
878
879 /* Continue processing with invariant reciprocal statement. */
880 return stmt1;
881 }
882
883 /* Check if the pattern at *BSI is a bittest of the form
884 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
885
886 static gimple *
887 rewrite_bittest (gimple_stmt_iterator *bsi)
888 {
889 gassign *stmt;
890 gimple *stmt1;
891 gassign *stmt2;
892 gimple *use_stmt;
893 gcond *cond_stmt;
894 tree lhs, name, t, a, b;
895 use_operand_p use;
896
897 stmt = as_a <gassign *> (gsi_stmt (*bsi));
898 lhs = gimple_assign_lhs (stmt);
899
900 /* Verify that the single use of lhs is a comparison against zero. */
901 if (TREE_CODE (lhs) != SSA_NAME
902 || !single_imm_use (lhs, &use, &use_stmt))
903 return stmt;
904 cond_stmt = dyn_cast <gcond *> (use_stmt);
905 if (!cond_stmt)
906 return stmt;
907 if (gimple_cond_lhs (cond_stmt) != lhs
908 || (gimple_cond_code (cond_stmt) != NE_EXPR
909 && gimple_cond_code (cond_stmt) != EQ_EXPR)
910 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
911 return stmt;
912
913 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
914 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
915 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
916 return stmt;
917
918 /* There is a conversion in between possibly inserted by fold. */
919 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
920 {
921 t = gimple_assign_rhs1 (stmt1);
922 if (TREE_CODE (t) != SSA_NAME
923 || !has_single_use (t))
924 return stmt;
925 stmt1 = SSA_NAME_DEF_STMT (t);
926 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
927 return stmt;
928 }
929
930 /* Verify that B is loop invariant but A is not. Verify that with
931 all the stmt walking we are still in the same loop. */
932 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
933 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
934 return stmt;
935
936 a = gimple_assign_rhs1 (stmt1);
937 b = gimple_assign_rhs2 (stmt1);
938
939 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
940 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
941 {
942 gimple_stmt_iterator rsi;
943
944 /* 1 << B */
945 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
946 build_int_cst (TREE_TYPE (a), 1), b);
947 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
948 stmt1 = gimple_build_assign (name, t);
949
950 /* A & (1 << B) */
951 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
952 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
953 stmt2 = gimple_build_assign (name, t);
954
955 /* Replace the SSA_NAME we compare against zero. Adjust
956 the type of zero accordingly. */
957 SET_USE (use, name);
958 gimple_cond_set_rhs (cond_stmt,
959 build_int_cst_type (TREE_TYPE (name),
960 0));
961
962 /* Don't use gsi_replace here, none of the new assignments sets
963 the variable originally set in stmt. Move bsi to stmt1, and
964 then remove the original stmt, so that we get a chance to
965 retain debug info for it. */
966 rsi = *bsi;
967 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
968 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
969 gimple *to_release = gsi_stmt (rsi);
970 gsi_remove (&rsi, true);
971 release_defs (to_release);
972
973 return stmt1;
974 }
975
976 return stmt;
977 }
978
979 /* Determine the outermost loops in that statements in basic block BB are
980 invariant, and record them to the LIM_DATA associated with the
981 statements. */
982
983 static void
984 compute_invariantness (basic_block bb)
985 {
986 enum move_pos pos;
987 gimple_stmt_iterator bsi;
988 gimple *stmt;
989 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
990 class loop *outermost = ALWAYS_EXECUTED_IN (bb);
991 struct lim_aux_data *lim_data;
992
993 if (!loop_outer (bb->loop_father))
994 return;
995
996 if (dump_file && (dump_flags & TDF_DETAILS))
997 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
998 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
999
1000 /* Look at PHI nodes, but only if there is at most two.
1001 ??? We could relax this further by post-processing the inserted
1002 code and transforming adjacent cond-exprs with the same predicate
1003 to control flow again. */
1004 bsi = gsi_start_phis (bb);
1005 if (!gsi_end_p (bsi)
1006 && ((gsi_next (&bsi), gsi_end_p (bsi))
1007 || (gsi_next (&bsi), gsi_end_p (bsi))))
1008 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1009 {
1010 stmt = gsi_stmt (bsi);
1011
1012 pos = movement_possibility (stmt);
1013 if (pos == MOVE_IMPOSSIBLE)
1014 continue;
1015
1016 lim_data = get_lim_data (stmt);
1017 if (! lim_data)
1018 lim_data = init_lim_data (stmt);
1019 lim_data->always_executed_in = outermost;
1020
1021 if (!determine_max_movement (stmt, false))
1022 {
1023 lim_data->max_loop = NULL;
1024 continue;
1025 }
1026
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 {
1029 print_gimple_stmt (dump_file, stmt, 2);
1030 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1031 loop_depth (lim_data->max_loop),
1032 lim_data->cost);
1033 }
1034
1035 if (lim_data->cost >= LIM_EXPENSIVE)
1036 set_profitable_level (stmt);
1037 }
1038
1039 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1040 {
1041 stmt = gsi_stmt (bsi);
1042
1043 pos = movement_possibility (stmt);
1044 if (pos == MOVE_IMPOSSIBLE)
1045 {
1046 if (nonpure_call_p (stmt))
1047 {
1048 maybe_never = true;
1049 outermost = NULL;
1050 }
1051 /* Make sure to note always_executed_in for stores to make
1052 store-motion work. */
1053 else if (stmt_makes_single_store (stmt))
1054 {
1055 struct lim_aux_data *lim_data = get_lim_data (stmt);
1056 if (! lim_data)
1057 lim_data = init_lim_data (stmt);
1058 lim_data->always_executed_in = outermost;
1059 }
1060 continue;
1061 }
1062
1063 if (is_gimple_assign (stmt)
1064 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1065 == GIMPLE_BINARY_RHS))
1066 {
1067 tree op0 = gimple_assign_rhs1 (stmt);
1068 tree op1 = gimple_assign_rhs2 (stmt);
1069 class loop *ol1 = outermost_invariant_loop (op1,
1070 loop_containing_stmt (stmt));
1071
1072 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1073 to be hoisted out of loop, saving expensive divide. */
1074 if (pos == MOVE_POSSIBLE
1075 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1076 && flag_unsafe_math_optimizations
1077 && !flag_trapping_math
1078 && ol1 != NULL
1079 && outermost_invariant_loop (op0, ol1) == NULL)
1080 stmt = rewrite_reciprocal (&bsi);
1081
1082 /* If the shift count is invariant, convert (A >> B) & 1 to
1083 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1084 saving an expensive shift. */
1085 if (pos == MOVE_POSSIBLE
1086 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1087 && integer_onep (op1)
1088 && TREE_CODE (op0) == SSA_NAME
1089 && has_single_use (op0))
1090 stmt = rewrite_bittest (&bsi);
1091 }
1092
1093 lim_data = get_lim_data (stmt);
1094 if (! lim_data)
1095 lim_data = init_lim_data (stmt);
1096 lim_data->always_executed_in = outermost;
1097
1098 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1099 continue;
1100
1101 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1102 {
1103 lim_data->max_loop = NULL;
1104 continue;
1105 }
1106
1107 if (dump_file && (dump_flags & TDF_DETAILS))
1108 {
1109 print_gimple_stmt (dump_file, stmt, 2);
1110 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1111 loop_depth (lim_data->max_loop),
1112 lim_data->cost);
1113 }
1114
1115 if (lim_data->cost >= LIM_EXPENSIVE)
1116 set_profitable_level (stmt);
1117 }
1118 }
1119
1120 /* Hoist the statements in basic block BB out of the loops prescribed by
1121 data stored in LIM_DATA structures associated with each statement. Callback
1122 for walk_dominator_tree. */
1123
1124 unsigned int
1125 move_computations_worker (basic_block bb)
1126 {
1127 class loop *level;
1128 unsigned cost = 0;
1129 struct lim_aux_data *lim_data;
1130 unsigned int todo = 0;
1131
1132 if (!loop_outer (bb->loop_father))
1133 return todo;
1134
1135 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1136 {
1137 gassign *new_stmt;
1138 gphi *stmt = bsi.phi ();
1139
1140 lim_data = get_lim_data (stmt);
1141 if (lim_data == NULL)
1142 {
1143 gsi_next (&bsi);
1144 continue;
1145 }
1146
1147 cost = lim_data->cost;
1148 level = lim_data->tgt_loop;
1149 clear_lim_data (stmt);
1150
1151 if (!level)
1152 {
1153 gsi_next (&bsi);
1154 continue;
1155 }
1156
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1158 {
1159 fprintf (dump_file, "Moving PHI node\n");
1160 print_gimple_stmt (dump_file, stmt, 0);
1161 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1162 cost, level->num);
1163 }
1164
1165 if (gimple_phi_num_args (stmt) == 1)
1166 {
1167 tree arg = PHI_ARG_DEF (stmt, 0);
1168 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1169 TREE_CODE (arg), arg);
1170 }
1171 else
1172 {
1173 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1174 gimple *cond = gsi_stmt (gsi_last_bb (dom));
1175 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1176 /* Get the PHI arguments corresponding to the true and false
1177 edges of COND. */
1178 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1179 gcc_assert (arg0 && arg1);
1180 t = build2 (gimple_cond_code (cond), boolean_type_node,
1181 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1182 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1183 COND_EXPR, t, arg0, arg1);
1184 todo |= TODO_cleanup_cfg;
1185 }
1186 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1187 && (!ALWAYS_EXECUTED_IN (bb)
1188 || (ALWAYS_EXECUTED_IN (bb) != level
1189 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1190 {
1191 tree lhs = gimple_assign_lhs (new_stmt);
1192 SSA_NAME_RANGE_INFO (lhs) = NULL;
1193 }
1194 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1195 remove_phi_node (&bsi, false);
1196 }
1197
1198 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1199 {
1200 edge e;
1201
1202 gimple *stmt = gsi_stmt (bsi);
1203
1204 lim_data = get_lim_data (stmt);
1205 if (lim_data == NULL)
1206 {
1207 gsi_next (&bsi);
1208 continue;
1209 }
1210
1211 cost = lim_data->cost;
1212 level = lim_data->tgt_loop;
1213 clear_lim_data (stmt);
1214
1215 if (!level)
1216 {
1217 gsi_next (&bsi);
1218 continue;
1219 }
1220
1221 /* We do not really want to move conditionals out of the loop; we just
1222 placed it here to force its operands to be moved if necessary. */
1223 if (gimple_code (stmt) == GIMPLE_COND)
1224 continue;
1225
1226 if (dump_file && (dump_flags & TDF_DETAILS))
1227 {
1228 fprintf (dump_file, "Moving statement\n");
1229 print_gimple_stmt (dump_file, stmt, 0);
1230 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1231 cost, level->num);
1232 }
1233
1234 e = loop_preheader_edge (level);
1235 gcc_assert (!gimple_vdef (stmt));
1236 if (gimple_vuse (stmt))
1237 {
1238 /* The new VUSE is the one from the virtual PHI in the loop
1239 header or the one already present. */
1240 gphi_iterator gsi2;
1241 for (gsi2 = gsi_start_phis (e->dest);
1242 !gsi_end_p (gsi2); gsi_next (&gsi2))
1243 {
1244 gphi *phi = gsi2.phi ();
1245 if (virtual_operand_p (gimple_phi_result (phi)))
1246 {
1247 SET_USE (gimple_vuse_op (stmt),
1248 PHI_ARG_DEF_FROM_EDGE (phi, e));
1249 break;
1250 }
1251 }
1252 }
1253 gsi_remove (&bsi, false);
1254 if (gimple_has_lhs (stmt)
1255 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1256 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1257 && (!ALWAYS_EXECUTED_IN (bb)
1258 || !(ALWAYS_EXECUTED_IN (bb) == level
1259 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1260 {
1261 tree lhs = gimple_get_lhs (stmt);
1262 SSA_NAME_RANGE_INFO (lhs) = NULL;
1263 }
1264 /* In case this is a stmt that is not unconditionally executed
1265 when the target loop header is executed and the stmt may
1266 invoke undefined integer or pointer overflow rewrite it to
1267 unsigned arithmetic. */
1268 if (is_gimple_assign (stmt)
1269 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1270 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1271 && arith_code_with_undefined_signed_overflow
1272 (gimple_assign_rhs_code (stmt))
1273 && (!ALWAYS_EXECUTED_IN (bb)
1274 || !(ALWAYS_EXECUTED_IN (bb) == level
1275 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1276 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1277 else
1278 gsi_insert_on_edge (e, stmt);
1279 }
1280
1281 return todo;
1282 }
1283
1284 /* Checks whether the statement defining variable *INDEX can be hoisted
1285 out of the loop passed in DATA. Callback for for_each_index. */
1286
1287 static bool
1288 may_move_till (tree ref, tree *index, void *data)
1289 {
1290 class loop *loop = (class loop *) data, *max_loop;
1291
1292 /* If REF is an array reference, check also that the step and the lower
1293 bound is invariant in LOOP. */
1294 if (TREE_CODE (ref) == ARRAY_REF)
1295 {
1296 tree step = TREE_OPERAND (ref, 3);
1297 tree lbound = TREE_OPERAND (ref, 2);
1298
1299 max_loop = outermost_invariant_loop (step, loop);
1300 if (!max_loop)
1301 return false;
1302
1303 max_loop = outermost_invariant_loop (lbound, loop);
1304 if (!max_loop)
1305 return false;
1306 }
1307
1308 max_loop = outermost_invariant_loop (*index, loop);
1309 if (!max_loop)
1310 return false;
1311
1312 return true;
1313 }
1314
1315 /* If OP is SSA NAME, force the statement that defines it to be
1316 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1317
1318 static void
1319 force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1320 {
1321 gimple *stmt;
1322
1323 if (!op
1324 || is_gimple_min_invariant (op))
1325 return;
1326
1327 gcc_assert (TREE_CODE (op) == SSA_NAME);
1328
1329 stmt = SSA_NAME_DEF_STMT (op);
1330 if (gimple_nop_p (stmt))
1331 return;
1332
1333 set_level (stmt, orig_loop, loop);
1334 }
1335
1336 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1337 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1338 for_each_index. */
1339
1340 struct fmt_data
1341 {
1342 class loop *loop;
1343 class loop *orig_loop;
1344 };
1345
1346 static bool
1347 force_move_till (tree ref, tree *index, void *data)
1348 {
1349 struct fmt_data *fmt_data = (struct fmt_data *) data;
1350
1351 if (TREE_CODE (ref) == ARRAY_REF)
1352 {
1353 tree step = TREE_OPERAND (ref, 3);
1354 tree lbound = TREE_OPERAND (ref, 2);
1355
1356 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1357 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1358 }
1359
1360 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1361
1362 return true;
1363 }
1364
1365 /* A function to free the mem_ref object OBJ. */
1366
1367 static void
1368 memref_free (class im_mem_ref *mem)
1369 {
1370 mem->accesses_in_loop.release ();
1371 }
1372
1373 /* Allocates and returns a memory reference description for MEM whose hash
1374 value is HASH and id is ID. */
1375
1376 static im_mem_ref *
1377 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1378 {
1379 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1380 if (mem)
1381 ref->mem = *mem;
1382 else
1383 ao_ref_init (&ref->mem, error_mark_node);
1384 ref->id = id;
1385 ref->ref_canonical = false;
1386 ref->ref_decomposed = false;
1387 ref->hash = hash;
1388 ref->stored = NULL;
1389 ref->loaded = NULL;
1390 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1391 ref->accesses_in_loop.create (1);
1392
1393 return ref;
1394 }
1395
1396 /* Records memory reference location *LOC in LOOP to the memory reference
1397 description REF. The reference occurs in statement STMT. */
1398
1399 static void
1400 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1401 {
1402 mem_ref_loc aref;
1403 aref.stmt = stmt;
1404 aref.ref = loc;
1405 ref->accesses_in_loop.safe_push (aref);
1406 }
1407
1408 /* Set the LOOP bit in REF stored bitmap and allocate that if
1409 necessary. Return whether a bit was changed. */
1410
1411 static bool
1412 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1413 {
1414 if (!ref->stored)
1415 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1416 return bitmap_set_bit (ref->stored, loop->num);
1417 }
1418
1419 /* Marks reference REF as stored in LOOP. */
1420
1421 static void
1422 mark_ref_stored (im_mem_ref *ref, class loop *loop)
1423 {
1424 while (loop != current_loops->tree_root
1425 && set_ref_stored_in_loop (ref, loop))
1426 loop = loop_outer (loop);
1427 }
1428
1429 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1430 necessary. Return whether a bit was changed. */
1431
1432 static bool
1433 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1434 {
1435 if (!ref->loaded)
1436 ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1437 return bitmap_set_bit (ref->loaded, loop->num);
1438 }
1439
1440 /* Marks reference REF as loaded in LOOP. */
1441
1442 static void
1443 mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1444 {
1445 while (loop != current_loops->tree_root
1446 && set_ref_loaded_in_loop (ref, loop))
1447 loop = loop_outer (loop);
1448 }
1449
1450 /* Gathers memory references in statement STMT in LOOP, storing the
1451 information about them in the memory_accesses structure. Marks
1452 the vops accessed through unrecognized statements there as
1453 well. */
1454
1455 static void
1456 gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1457 {
1458 tree *mem = NULL;
1459 hashval_t hash;
1460 im_mem_ref **slot;
1461 im_mem_ref *ref;
1462 bool is_stored;
1463 unsigned id;
1464
1465 if (!gimple_vuse (stmt))
1466 return;
1467
1468 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1469 if (!mem && is_gimple_assign (stmt))
1470 {
1471 /* For aggregate copies record distinct references but use them
1472 only for disambiguation purposes. */
1473 id = memory_accesses.refs_list.length ();
1474 ref = mem_ref_alloc (NULL, 0, id);
1475 memory_accesses.refs_list.safe_push (ref);
1476 if (dump_file && (dump_flags & TDF_DETAILS))
1477 {
1478 fprintf (dump_file, "Unhandled memory reference %u: ", id);
1479 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1480 }
1481 record_mem_ref_loc (ref, stmt, mem);
1482 is_stored = gimple_vdef (stmt);
1483 }
1484 else if (!mem)
1485 {
1486 /* We use the shared mem_ref for all unanalyzable refs. */
1487 id = UNANALYZABLE_MEM_ID;
1488 ref = memory_accesses.refs_list[id];
1489 if (dump_file && (dump_flags & TDF_DETAILS))
1490 {
1491 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1492 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1493 }
1494 is_stored = gimple_vdef (stmt);
1495 }
1496 else
1497 {
1498 /* We are looking for equal refs that might differ in structure
1499 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1500 make sure we can canonicalize the ref in the hashtable if
1501 non-operand_equal_p refs are found. For the lookup we mark
1502 the case we want strict equality with aor.max_size == -1. */
1503 ao_ref aor;
1504 ao_ref_init (&aor, *mem);
1505 ao_ref_base (&aor);
1506 ao_ref_alias_set (&aor);
1507 HOST_WIDE_INT offset, size, max_size;
1508 poly_int64 saved_maxsize = aor.max_size, mem_off;
1509 tree mem_base;
1510 bool ref_decomposed;
1511 if (aor.max_size_known_p ()
1512 && aor.offset.is_constant (&offset)
1513 && aor.size.is_constant (&size)
1514 && aor.max_size.is_constant (&max_size)
1515 && size == max_size
1516 && (size % BITS_PER_UNIT) == 0
1517 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1518 size. Make sure this is consistent with the extraction. */
1519 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1520 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1521 aor.size)
1522 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1523 {
1524 ref_decomposed = true;
1525 tree base = ao_ref_base (&aor);
1526 poly_int64 moffset;
1527 HOST_WIDE_INT mcoffset;
1528 if (TREE_CODE (base) == MEM_REF
1529 && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1530 && moffset.is_constant (&mcoffset))
1531 {
1532 hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1533 hash = iterative_hash_host_wide_int (mcoffset, hash);
1534 }
1535 else
1536 {
1537 hash = iterative_hash_expr (base, 0);
1538 hash = iterative_hash_host_wide_int (offset, hash);
1539 }
1540 hash = iterative_hash_host_wide_int (size, hash);
1541 }
1542 else
1543 {
1544 ref_decomposed = false;
1545 hash = iterative_hash_expr (aor.ref, 0);
1546 aor.max_size = -1;
1547 }
1548 slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1549 aor.max_size = saved_maxsize;
1550 if (*slot)
1551 {
1552 if (!(*slot)->ref_canonical
1553 && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1554 {
1555 /* If we didn't yet canonicalize the hashtable ref (which
1556 we'll end up using for code insertion) and hit a second
1557 equal ref that is not structurally equivalent create
1558 a canonical ref which is a bare MEM_REF. */
1559 if (TREE_CODE (*mem) == MEM_REF
1560 || TREE_CODE (*mem) == TARGET_MEM_REF)
1561 {
1562 (*slot)->mem.ref = *mem;
1563 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1564 }
1565 else
1566 {
1567 tree ref_alias_type = reference_alias_ptr_type (*mem);
1568 unsigned int ref_align = get_object_alignment (*mem);
1569 tree ref_type = TREE_TYPE (*mem);
1570 tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1571 unshare_expr (mem_base));
1572 if (TYPE_ALIGN (ref_type) != ref_align)
1573 ref_type = build_aligned_type (ref_type, ref_align);
1574 (*slot)->mem.ref
1575 = fold_build2 (MEM_REF, ref_type, tmp,
1576 build_int_cst (ref_alias_type, mem_off));
1577 if ((*slot)->mem.volatile_p)
1578 TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1579 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1580 && is_gimple_mem_ref_addr
1581 (TREE_OPERAND ((*slot)->mem.ref,
1582 0)));
1583 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1584 }
1585 (*slot)->ref_canonical = true;
1586 }
1587 ref = *slot;
1588 id = ref->id;
1589 }
1590 else
1591 {
1592 id = memory_accesses.refs_list.length ();
1593 ref = mem_ref_alloc (&aor, hash, id);
1594 ref->ref_decomposed = ref_decomposed;
1595 memory_accesses.refs_list.safe_push (ref);
1596 *slot = ref;
1597
1598 if (dump_file && (dump_flags & TDF_DETAILS))
1599 {
1600 fprintf (dump_file, "Memory reference %u: ", id);
1601 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1602 fprintf (dump_file, "\n");
1603 }
1604 }
1605
1606 record_mem_ref_loc (ref, stmt, mem);
1607 }
1608 if (is_stored)
1609 {
1610 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1611 mark_ref_stored (ref, loop);
1612 }
1613 /* A not simple memory op is also a read when it is a write. */
1614 if (!is_stored || id == UNANALYZABLE_MEM_ID
1615 || ref->mem.ref == error_mark_node)
1616 {
1617 bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1618 mark_ref_loaded (ref, loop);
1619 }
1620 init_lim_data (stmt)->ref = ref->id;
1621 return;
1622 }
1623
1624 static unsigned *bb_loop_postorder;
1625
1626 /* qsort sort function to sort blocks after their loop fathers postorder. */
1627
1628 static int
1629 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1630 void *bb_loop_postorder_)
1631 {
1632 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1633 basic_block bb1 = *(const basic_block *)bb1_;
1634 basic_block bb2 = *(const basic_block *)bb2_;
1635 class loop *loop1 = bb1->loop_father;
1636 class loop *loop2 = bb2->loop_father;
1637 if (loop1->num == loop2->num)
1638 return bb1->index - bb2->index;
1639 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1640 }
1641
1642 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1643
1644 static int
1645 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1646 void *bb_loop_postorder_)
1647 {
1648 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1649 const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1650 const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1651 class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1652 class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1653 if (loop1->num == loop2->num)
1654 return 0;
1655 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1656 }
1657
1658 /* Gathers memory references in loops. */
1659
1660 static void
1661 analyze_memory_references (bool store_motion)
1662 {
1663 gimple_stmt_iterator bsi;
1664 basic_block bb, *bbs;
1665 class loop *outer;
1666 unsigned i, n;
1667
1668 /* Collect all basic-blocks in loops and sort them after their
1669 loops postorder. */
1670 i = 0;
1671 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1672 FOR_EACH_BB_FN (bb, cfun)
1673 if (bb->loop_father != current_loops->tree_root)
1674 bbs[i++] = bb;
1675 n = i;
1676 gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1677 bb_loop_postorder);
1678
1679 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1680 That results in better locality for all the bitmaps. It also
1681 automatically sorts the location list of gathered memory references
1682 after their loop postorder number allowing to binary-search it. */
1683 for (i = 0; i < n; ++i)
1684 {
1685 basic_block bb = bbs[i];
1686 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1687 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1688 }
1689
1690 /* Verify the list of gathered memory references is sorted after their
1691 loop postorder number. */
1692 if (flag_checking)
1693 {
1694 im_mem_ref *ref;
1695 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1696 for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1697 gcc_assert (sort_locs_in_loop_postorder_cmp
1698 (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1699 bb_loop_postorder) <= 0);
1700 }
1701
1702 free (bbs);
1703
1704 if (!store_motion)
1705 return;
1706
1707 /* Propagate the information about accessed memory references up
1708 the loop hierarchy. */
1709 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1710 {
1711 /* Finalize the overall touched references (including subloops). */
1712 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1713 &memory_accesses.refs_stored_in_loop[loop->num]);
1714
1715 /* Propagate the information about accessed memory references up
1716 the loop hierarchy. */
1717 outer = loop_outer (loop);
1718 if (outer == current_loops->tree_root)
1719 continue;
1720
1721 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1722 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1723 }
1724 }
1725
1726 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1727 tree_to_aff_combination_expand. */
1728
1729 static bool
1730 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1731 hash_map<tree, name_expansion *> **ttae_cache,
1732 bool tbaa_p)
1733 {
1734 gcc_checking_assert (mem1->mem.ref != error_mark_node
1735 && mem2->mem.ref != error_mark_node);
1736
1737 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1738 object and their offset differ in such a way that the locations cannot
1739 overlap, then they cannot alias. */
1740 poly_widest_int size1, size2;
1741 aff_tree off1, off2;
1742
1743 /* Perform basic offset and type-based disambiguation. */
1744 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1745 return false;
1746
1747 /* The expansion of addresses may be a bit expensive, thus we only do
1748 the check at -O2 and higher optimization levels. */
1749 if (optimize < 2)
1750 return true;
1751
1752 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1753 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1754 aff_combination_expand (&off1, ttae_cache);
1755 aff_combination_expand (&off2, ttae_cache);
1756 aff_combination_scale (&off1, -1);
1757 aff_combination_add (&off2, &off1);
1758
1759 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1760 return false;
1761
1762 return true;
1763 }
1764
1765 /* Compare function for bsearch searching for reference locations
1766 in a loop. */
1767
1768 static int
1769 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1770 void *bb_loop_postorder_)
1771 {
1772 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1773 class loop *loop = (class loop *)const_cast<void *>(loop_);
1774 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1775 class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1776 if (loop->num == loc_loop->num
1777 || flow_loop_nested_p (loop, loc_loop))
1778 return 0;
1779 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1780 ? -1 : 1);
1781 }
1782
1783 /* Iterates over all locations of REF in LOOP and its subloops calling
1784 fn.operator() with the location as argument. When that operator
1785 returns true the iteration is stopped and true is returned.
1786 Otherwise false is returned. */
1787
1788 template <typename FN>
1789 static bool
1790 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1791 {
1792 unsigned i;
1793 mem_ref_loc *loc;
1794
1795 /* Search for the cluster of locs in the accesses_in_loop vector
1796 which is sorted after postorder index of the loop father. */
1797 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1798 bb_loop_postorder);
1799 if (!loc)
1800 return false;
1801
1802 /* We have found one location inside loop or its sub-loops. Iterate
1803 both forward and backward to cover the whole cluster. */
1804 i = loc - ref->accesses_in_loop.address ();
1805 while (i > 0)
1806 {
1807 --i;
1808 mem_ref_loc *l = &ref->accesses_in_loop[i];
1809 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1810 break;
1811 if (fn (l))
1812 return true;
1813 }
1814 for (i = loc - ref->accesses_in_loop.address ();
1815 i < ref->accesses_in_loop.length (); ++i)
1816 {
1817 mem_ref_loc *l = &ref->accesses_in_loop[i];
1818 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1819 break;
1820 if (fn (l))
1821 return true;
1822 }
1823
1824 return false;
1825 }
1826
1827 /* Rewrites location LOC by TMP_VAR. */
1828
1829 class rewrite_mem_ref_loc
1830 {
1831 public:
1832 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1833 bool operator () (mem_ref_loc *loc);
1834 tree tmp_var;
1835 };
1836
1837 bool
1838 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1839 {
1840 *loc->ref = tmp_var;
1841 update_stmt (loc->stmt);
1842 return false;
1843 }
1844
1845 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1846
1847 static void
1848 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
1849 {
1850 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1851 }
1852
1853 /* Stores the first reference location in LOCP. */
1854
1855 class first_mem_ref_loc_1
1856 {
1857 public:
1858 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1859 bool operator () (mem_ref_loc *loc);
1860 mem_ref_loc **locp;
1861 };
1862
1863 bool
1864 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1865 {
1866 *locp = loc;
1867 return true;
1868 }
1869
1870 /* Returns the first reference location to REF in LOOP. */
1871
1872 static mem_ref_loc *
1873 first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
1874 {
1875 mem_ref_loc *locp = NULL;
1876 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1877 return locp;
1878 }
1879
1880 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1881 MEM along edge EX.
1882
1883 The store is only done if MEM has changed. We do this so no
1884 changes to MEM occur on code paths that did not originally store
1885 into it.
1886
1887 The common case for execute_sm will transform:
1888
1889 for (...) {
1890 if (foo)
1891 stuff;
1892 else
1893 MEM = TMP_VAR;
1894 }
1895
1896 into:
1897
1898 lsm = MEM;
1899 for (...) {
1900 if (foo)
1901 stuff;
1902 else
1903 lsm = TMP_VAR;
1904 }
1905 MEM = lsm;
1906
1907 This function will generate:
1908
1909 lsm = MEM;
1910
1911 lsm_flag = false;
1912 ...
1913 for (...) {
1914 if (foo)
1915 stuff;
1916 else {
1917 lsm = TMP_VAR;
1918 lsm_flag = true;
1919 }
1920 }
1921 if (lsm_flag) <--
1922 MEM = lsm; <--
1923 */
1924
1925 static void
1926 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1927 edge preheader, hash_set <basic_block> *flag_bbs,
1928 edge &append_cond_position, edge &last_cond_fallthru)
1929 {
1930 basic_block new_bb, then_bb, old_dest;
1931 bool loop_has_only_one_exit;
1932 edge then_old_edge;
1933 gimple_stmt_iterator gsi;
1934 gimple *stmt;
1935 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1936
1937 profile_count count_sum = profile_count::zero ();
1938 int nbbs = 0, ncount = 0;
1939 profile_probability flag_probability = profile_probability::uninitialized ();
1940
1941 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1942 at loop exit.
1943
1944 This code may look fancy, but it cannot update profile very realistically
1945 because we do not know the probability that flag will be true at given
1946 loop exit.
1947
1948 We look for two interesting extremes
1949 - when exit is dominated by block setting the flag, we know it will
1950 always be true. This is a common case.
1951 - when all blocks setting the flag have very low frequency we know
1952 it will likely be false.
1953 In all other cases we default to 2/3 for flag being true. */
1954
1955 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1956 it != flag_bbs->end (); ++it)
1957 {
1958 if ((*it)->count.initialized_p ())
1959 count_sum += (*it)->count, ncount ++;
1960 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1961 flag_probability = profile_probability::always ();
1962 nbbs++;
1963 }
1964
1965 profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1966
1967 if (flag_probability.initialized_p ())
1968 ;
1969 else if (ncount == nbbs
1970 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1971 {
1972 flag_probability = count_sum.probability_in (preheader->count ());
1973 if (flag_probability > cap)
1974 flag_probability = cap;
1975 }
1976
1977 if (!flag_probability.initialized_p ())
1978 flag_probability = cap;
1979
1980 /* ?? Insert store after previous store if applicable. See note
1981 below. */
1982 if (append_cond_position)
1983 ex = append_cond_position;
1984
1985 loop_has_only_one_exit = single_pred_p (ex->dest);
1986
1987 if (loop_has_only_one_exit)
1988 ex = split_block_after_labels (ex->dest);
1989 else
1990 {
1991 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1992 !gsi_end_p (gpi); gsi_next (&gpi))
1993 {
1994 gphi *phi = gpi.phi ();
1995 if (virtual_operand_p (gimple_phi_result (phi)))
1996 continue;
1997
1998 /* When the destination has a non-virtual PHI node with multiple
1999 predecessors make sure we preserve the PHI structure by
2000 forcing a forwarder block so that hoisting of that PHI will
2001 still work. */
2002 split_edge (ex);
2003 break;
2004 }
2005 }
2006
2007 old_dest = ex->dest;
2008 new_bb = split_edge (ex);
2009 then_bb = create_empty_bb (new_bb);
2010 then_bb->count = new_bb->count.apply_probability (flag_probability);
2011 if (irr)
2012 then_bb->flags = BB_IRREDUCIBLE_LOOP;
2013 add_bb_to_loop (then_bb, new_bb->loop_father);
2014
2015 gsi = gsi_start_bb (new_bb);
2016 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2017 NULL_TREE, NULL_TREE);
2018 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2019
2020 gsi = gsi_start_bb (then_bb);
2021 /* Insert actual store. */
2022 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2023 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2024
2025 edge e1 = single_succ_edge (new_bb);
2026 edge e2 = make_edge (new_bb, then_bb,
2027 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2028 e2->probability = flag_probability;
2029
2030 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2031 e1->flags &= ~EDGE_FALLTHRU;
2032
2033 e1->probability = flag_probability.invert ();
2034
2035 then_old_edge = make_single_succ_edge (then_bb, old_dest,
2036 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2037
2038 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2039
2040 if (append_cond_position)
2041 {
2042 basic_block prevbb = last_cond_fallthru->src;
2043 redirect_edge_succ (last_cond_fallthru, new_bb);
2044 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2045 set_immediate_dominator (CDI_DOMINATORS, old_dest,
2046 recompute_dominator (CDI_DOMINATORS, old_dest));
2047 }
2048
2049 /* ?? Because stores may alias, they must happen in the exact
2050 sequence they originally happened. Save the position right after
2051 the (_lsm) store we just created so we can continue appending after
2052 it and maintain the original order. */
2053 append_cond_position = then_old_edge;
2054 last_cond_fallthru = find_edge (new_bb, old_dest);
2055
2056 if (!loop_has_only_one_exit)
2057 for (gphi_iterator gpi = gsi_start_phis (old_dest);
2058 !gsi_end_p (gpi); gsi_next (&gpi))
2059 {
2060 gphi *phi = gpi.phi ();
2061 unsigned i;
2062
2063 for (i = 0; i < gimple_phi_num_args (phi); i++)
2064 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2065 {
2066 tree arg = gimple_phi_arg_def (phi, i);
2067 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2068 update_stmt (phi);
2069 }
2070 }
2071 }
2072
2073 /* When REF is set on the location, set flag indicating the store. */
2074
2075 class sm_set_flag_if_changed
2076 {
2077 public:
2078 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2079 : flag (flag_), bbs (bbs_) {}
2080 bool operator () (mem_ref_loc *loc);
2081 tree flag;
2082 hash_set <basic_block> *bbs;
2083 };
2084
2085 bool
2086 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2087 {
2088 /* Only set the flag for writes. */
2089 if (is_gimple_assign (loc->stmt)
2090 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2091 {
2092 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2093 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2094 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2095 bbs->add (gimple_bb (stmt));
2096 }
2097 return false;
2098 }
2099
2100 /* Helper function for execute_sm. On every location where REF is
2101 set, set an appropriate flag indicating the store. */
2102
2103 static tree
2104 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2105 hash_set <basic_block> *bbs)
2106 {
2107 tree flag;
2108 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2109 flag = create_tmp_reg (boolean_type_node, str);
2110 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2111 return flag;
2112 }
2113
2114 struct sm_aux
2115 {
2116 tree tmp_var;
2117 tree store_flag;
2118 hash_set <basic_block> flag_bbs;
2119 };
2120
2121 /* Executes store motion of memory reference REF from LOOP.
2122 Exits from the LOOP are stored in EXITS. The initialization of the
2123 temporary variable is put to the preheader of the loop, and assignments
2124 to the reference from the temporary variable are emitted to exits. */
2125
2126 static void
2127 execute_sm (class loop *loop, im_mem_ref *ref,
2128 hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt)
2129 {
2130 gassign *load;
2131 struct fmt_data fmt_data;
2132 struct lim_aux_data *lim_data;
2133 bool multi_threaded_model_p = false;
2134 gimple_stmt_iterator gsi;
2135 sm_aux *aux = new sm_aux;
2136
2137 if (dump_file && (dump_flags & TDF_DETAILS))
2138 {
2139 fprintf (dump_file, "Executing store motion of ");
2140 print_generic_expr (dump_file, ref->mem.ref);
2141 fprintf (dump_file, " from loop %d\n", loop->num);
2142 }
2143
2144 aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2145 get_lsm_tmp_name (ref->mem.ref, ~0));
2146
2147 fmt_data.loop = loop;
2148 fmt_data.orig_loop = loop;
2149 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2150
2151 bool always_stored = ref_always_accessed_p (loop, ref, true);
2152 if (maybe_mt
2153 && (bb_in_transaction (loop_preheader_edge (loop)->src)
2154 || (! flag_store_data_races && ! always_stored)))
2155 multi_threaded_model_p = true;
2156
2157 if (multi_threaded_model_p)
2158 aux->store_flag
2159 = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2160 else
2161 aux->store_flag = NULL_TREE;
2162
2163 /* Remember variable setup. */
2164 aux_map.put (ref, aux);
2165
2166 rewrite_mem_refs (loop, ref, aux->tmp_var);
2167
2168 /* Emit the load code on a random exit edge or into the latch if
2169 the loop does not exit, so that we are sure it will be processed
2170 by move_computations after all dependencies. */
2171 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2172
2173 /* Avoid doing a load if there was no load of the ref in the loop.
2174 Esp. when the ref is not always stored we cannot optimize it
2175 away later. But when it is not always stored we must use a conditional
2176 store then. */
2177 if ((!always_stored && !multi_threaded_model_p)
2178 || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2179 load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2180 else
2181 {
2182 /* If not emitting a load mark the uninitialized state on the
2183 loop entry as not to be warned for. */
2184 tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2185 suppress_warning (uninit, OPT_Wuninitialized);
2186 load = gimple_build_assign (aux->tmp_var, uninit);
2187 }
2188 lim_data = init_lim_data (load);
2189 lim_data->max_loop = loop;
2190 lim_data->tgt_loop = loop;
2191 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2192
2193 if (multi_threaded_model_p)
2194 {
2195 load = gimple_build_assign (aux->store_flag, boolean_false_node);
2196 lim_data = init_lim_data (load);
2197 lim_data->max_loop = loop;
2198 lim_data->tgt_loop = loop;
2199 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2200 }
2201 }
2202
2203 /* sm_ord is used for ordinary stores we can retain order with respect
2204 to other stores
2205 sm_unord is used for conditional executed stores which need to be
2206 able to execute in arbitrary order with respect to other stores
2207 sm_other is used for stores we do not try to apply store motion to. */
2208 enum sm_kind { sm_ord, sm_unord, sm_other };
2209 struct seq_entry
2210 {
2211 seq_entry () {}
2212 seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2213 : first (f), second (k), from (fr) {}
2214 unsigned first;
2215 sm_kind second;
2216 tree from;
2217 };
2218
2219 static void
2220 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2221 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2222 edge &append_cond_position, edge &last_cond_fallthru)
2223 {
2224 /* Sink the stores to exit from the loop. */
2225 for (unsigned i = seq.length (); i > 0; --i)
2226 {
2227 im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2228 if (seq[i-1].second == sm_other)
2229 {
2230 gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2231 if (dump_file && (dump_flags & TDF_DETAILS))
2232 {
2233 fprintf (dump_file, "Re-issueing dependent store of ");
2234 print_generic_expr (dump_file, ref->mem.ref);
2235 fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2236 loop->num, ex->src->index, ex->dest->index);
2237 }
2238 gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
2239 seq[i-1].from);
2240 gsi_insert_on_edge (ex, store);
2241 }
2242 else
2243 {
2244 sm_aux *aux = *aux_map.get (ref);
2245 if (!aux->store_flag || kind == sm_ord)
2246 {
2247 gassign *store;
2248 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2249 aux->tmp_var);
2250 gsi_insert_on_edge (ex, store);
2251 }
2252 else
2253 execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2254 aux->store_flag,
2255 loop_preheader_edge (loop), &aux->flag_bbs,
2256 append_cond_position, last_cond_fallthru);
2257 }
2258 }
2259 }
2260
2261 /* Push the SM candidate at index PTR in the sequence SEQ down until
2262 we hit the next SM candidate. Return true if that went OK and
2263 false if we could not disambiguate agains another unrelated ref.
2264 Update *AT to the index where the candidate now resides. */
2265
2266 static bool
2267 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2268 {
2269 *at = ptr;
2270 for (; ptr > 0; --ptr)
2271 {
2272 seq_entry &new_cand = seq[ptr];
2273 seq_entry &against = seq[ptr-1];
2274 if (against.second == sm_ord
2275 || (against.second == sm_other && against.from != NULL_TREE))
2276 /* Found the tail of the sequence. */
2277 break;
2278 /* We may not ignore self-dependences here. */
2279 if (new_cand.first == against.first
2280 || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2281 memory_accesses.refs_list[against.first],
2282 false))
2283 /* ??? Prune new_cand from the list of refs to apply SM to. */
2284 return false;
2285 std::swap (new_cand, against);
2286 *at = ptr - 1;
2287 }
2288 return true;
2289 }
2290
2291 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2292 walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2293
2294 static int
2295 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2296 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2297 bitmap refs_not_supported, bool forked,
2298 bitmap fully_visited)
2299 {
2300 if (!vdef)
2301 for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2302 gsi_prev (&gsi))
2303 {
2304 vdef = gimple_vdef (gsi_stmt (gsi));
2305 if (vdef)
2306 break;
2307 }
2308 if (!vdef)
2309 {
2310 gphi *vphi = get_virtual_phi (bb);
2311 if (vphi)
2312 vdef = gimple_phi_result (vphi);
2313 }
2314 if (!vdef)
2315 {
2316 if (single_pred_p (bb))
2317 /* This handles the perfect nest case. */
2318 return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2319 seq, refs_not_in_seq, refs_not_supported,
2320 forked, fully_visited);
2321 return 0;
2322 }
2323 do
2324 {
2325 gimple *def = SSA_NAME_DEF_STMT (vdef);
2326 if (gimple_bb (def) != bb)
2327 {
2328 /* If we forked by processing a PHI do not allow our walk to
2329 merge again until we handle that robustly. */
2330 if (forked)
2331 {
2332 /* Mark refs_not_in_seq as unsupported. */
2333 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2334 return 1;
2335 }
2336 /* Otherwise it doesn't really matter if we end up in different
2337 BBs. */
2338 bb = gimple_bb (def);
2339 }
2340 if (gphi *phi = dyn_cast <gphi *> (def))
2341 {
2342 /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2343 this is still linear.
2344 Eventually we want to cache intermediate results per BB
2345 (but we can't easily cache for different exits?). */
2346 /* Stop at PHIs with possible backedges. */
2347 if (bb == bb->loop_father->header
2348 || bb->flags & BB_IRREDUCIBLE_LOOP)
2349 {
2350 /* Mark refs_not_in_seq as unsupported. */
2351 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2352 return 1;
2353 }
2354 if (gimple_phi_num_args (phi) == 1)
2355 return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2356 gimple_phi_arg_def (phi, 0), seq,
2357 refs_not_in_seq, refs_not_supported,
2358 false, fully_visited);
2359 if (bitmap_bit_p (fully_visited,
2360 SSA_NAME_VERSION (gimple_phi_result (phi))))
2361 return 1;
2362 auto_vec<seq_entry> first_edge_seq;
2363 auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2364 int eret;
2365 bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2366 eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2367 gimple_phi_arg_def (phi, 0),
2368 first_edge_seq,
2369 tem_refs_not_in_seq, refs_not_supported,
2370 true, fully_visited);
2371 if (eret != 1)
2372 return -1;
2373 /* Simplify our lives by pruning the sequence of !sm_ord. */
2374 while (!first_edge_seq.is_empty ()
2375 && first_edge_seq.last ().second != sm_ord)
2376 first_edge_seq.pop ();
2377 for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2378 {
2379 tree vuse = gimple_phi_arg_def (phi, i);
2380 edge e = gimple_phi_arg_edge (phi, i);
2381 auto_vec<seq_entry> edge_seq;
2382 bitmap_and_compl (tem_refs_not_in_seq,
2383 refs_not_in_seq, refs_not_supported);
2384 /* If we've marked all refs we search for as unsupported
2385 we can stop processing and use the sequence as before
2386 the PHI. */
2387 if (bitmap_empty_p (tem_refs_not_in_seq))
2388 return 1;
2389 eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2390 tem_refs_not_in_seq, refs_not_supported,
2391 true, fully_visited);
2392 if (eret != 1)
2393 return -1;
2394 /* Simplify our lives by pruning the sequence of !sm_ord. */
2395 while (!edge_seq.is_empty ()
2396 && edge_seq.last ().second != sm_ord)
2397 edge_seq.pop ();
2398 unsigned min_len = MIN(first_edge_seq.length (),
2399 edge_seq.length ());
2400 /* Incrementally merge seqs into first_edge_seq. */
2401 int first_uneq = -1;
2402 auto_vec<seq_entry, 2> extra_refs;
2403 for (unsigned int i = 0; i < min_len; ++i)
2404 {
2405 /* ??? We can more intelligently merge when we face different
2406 order by additional sinking operations in one sequence.
2407 For now we simply mark them as to be processed by the
2408 not order-preserving SM code. */
2409 if (first_edge_seq[i].first != edge_seq[i].first)
2410 {
2411 if (first_edge_seq[i].second == sm_ord)
2412 bitmap_set_bit (refs_not_supported,
2413 first_edge_seq[i].first);
2414 if (edge_seq[i].second == sm_ord)
2415 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2416 first_edge_seq[i].second = sm_other;
2417 first_edge_seq[i].from = NULL_TREE;
2418 /* Record the dropped refs for later processing. */
2419 if (first_uneq == -1)
2420 first_uneq = i;
2421 extra_refs.safe_push (seq_entry (edge_seq[i].first,
2422 sm_other, NULL_TREE));
2423 }
2424 /* sm_other prevails. */
2425 else if (first_edge_seq[i].second != edge_seq[i].second)
2426 {
2427 /* Make sure the ref is marked as not supported. */
2428 bitmap_set_bit (refs_not_supported,
2429 first_edge_seq[i].first);
2430 first_edge_seq[i].second = sm_other;
2431 first_edge_seq[i].from = NULL_TREE;
2432 }
2433 else if (first_edge_seq[i].second == sm_other
2434 && first_edge_seq[i].from != NULL_TREE
2435 && (edge_seq[i].from == NULL_TREE
2436 || !operand_equal_p (first_edge_seq[i].from,
2437 edge_seq[i].from, 0)))
2438 first_edge_seq[i].from = NULL_TREE;
2439 }
2440 /* Any excess elements become sm_other since they are now
2441 coonditionally executed. */
2442 if (first_edge_seq.length () > edge_seq.length ())
2443 {
2444 for (unsigned i = edge_seq.length ();
2445 i < first_edge_seq.length (); ++i)
2446 {
2447 if (first_edge_seq[i].second == sm_ord)
2448 bitmap_set_bit (refs_not_supported,
2449 first_edge_seq[i].first);
2450 first_edge_seq[i].second = sm_other;
2451 }
2452 }
2453 else if (edge_seq.length () > first_edge_seq.length ())
2454 {
2455 if (first_uneq == -1)
2456 first_uneq = first_edge_seq.length ();
2457 for (unsigned i = first_edge_seq.length ();
2458 i < edge_seq.length (); ++i)
2459 {
2460 if (edge_seq[i].second == sm_ord)
2461 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2462 extra_refs.safe_push (seq_entry (edge_seq[i].first,
2463 sm_other, NULL_TREE));
2464 }
2465 }
2466 /* Put unmerged refs at first_uneq to force dependence checking
2467 on them. */
2468 if (first_uneq != -1)
2469 {
2470 /* Missing ordered_splice_at. */
2471 if ((unsigned)first_uneq == first_edge_seq.length ())
2472 first_edge_seq.safe_splice (extra_refs);
2473 else
2474 {
2475 unsigned fes_length = first_edge_seq.length ();
2476 first_edge_seq.safe_grow (fes_length
2477 + extra_refs.length ());
2478 memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2479 &first_edge_seq[first_uneq],
2480 (fes_length - first_uneq) * sizeof (seq_entry));
2481 memcpy (&first_edge_seq[first_uneq],
2482 extra_refs.address (),
2483 extra_refs.length () * sizeof (seq_entry));
2484 }
2485 }
2486 }
2487 /* Use the sequence from the first edge and push SMs down. */
2488 for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2489 {
2490 unsigned id = first_edge_seq[i].first;
2491 seq.safe_push (first_edge_seq[i]);
2492 unsigned new_idx;
2493 if ((first_edge_seq[i].second == sm_ord
2494 || (first_edge_seq[i].second == sm_other
2495 && first_edge_seq[i].from != NULL_TREE))
2496 && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2497 {
2498 if (first_edge_seq[i].second == sm_ord)
2499 bitmap_set_bit (refs_not_supported, id);
2500 /* Mark it sm_other. */
2501 seq[new_idx].second = sm_other;
2502 seq[new_idx].from = NULL_TREE;
2503 }
2504 }
2505 bitmap_set_bit (fully_visited,
2506 SSA_NAME_VERSION (gimple_phi_result (phi)));
2507 return 1;
2508 }
2509 lim_aux_data *data = get_lim_data (def);
2510 gcc_assert (data);
2511 if (data->ref == UNANALYZABLE_MEM_ID)
2512 return -1;
2513 /* Stop at memory references which we can't move. */
2514 else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node)
2515 {
2516 /* Mark refs_not_in_seq as unsupported. */
2517 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2518 return 1;
2519 }
2520 /* One of the stores we want to apply SM to and we've not yet seen. */
2521 else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2522 {
2523 seq.safe_push (seq_entry (data->ref, sm_ord));
2524
2525 /* 1) push it down the queue until a SMed
2526 and not ignored ref is reached, skipping all not SMed refs
2527 and ignored refs via non-TBAA disambiguation. */
2528 unsigned new_idx;
2529 if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2530 /* If that fails but we did not fork yet continue, we'll see
2531 to re-materialize all of the stores in the sequence then.
2532 Further stores will only be pushed up to this one. */
2533 && forked)
2534 {
2535 bitmap_set_bit (refs_not_supported, data->ref);
2536 /* Mark it sm_other. */
2537 seq[new_idx].second = sm_other;
2538 }
2539
2540 /* 2) check whether we've seen all refs we want to SM and if so
2541 declare success for the active exit */
2542 if (bitmap_empty_p (refs_not_in_seq))
2543 return 1;
2544 }
2545 else
2546 /* Another store not part of the final sequence. Simply push it. */
2547 seq.safe_push (seq_entry (data->ref, sm_other,
2548 gimple_assign_rhs1 (def)));
2549
2550 vdef = gimple_vuse (def);
2551 }
2552 while (1);
2553 }
2554
2555 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2556 edges of the LOOP. */
2557
2558 static void
2559 hoist_memory_references (class loop *loop, bitmap mem_refs,
2560 const vec<edge> &exits)
2561 {
2562 im_mem_ref *ref;
2563 unsigned i;
2564 bitmap_iterator bi;
2565
2566 /* To address PR57359 before actually applying store-motion check
2567 the candidates found for validity with regards to reordering
2568 relative to other stores which we until here disambiguated using
2569 TBAA which isn't valid.
2570 What matters is the order of the last stores to the mem_refs
2571 with respect to the other stores of the loop at the point of the
2572 loop exits. */
2573
2574 /* For each exit compute the store order, pruning from mem_refs
2575 on the fly. */
2576 /* The complexity of this is at least
2577 O(number of exits * number of SM refs) but more approaching
2578 O(number of exits * number of SM refs * number of stores). */
2579 /* ??? Somehow do this in a single sweep over the loop body. */
2580 auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2581 auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2582 edge e;
2583 FOR_EACH_VEC_ELT (exits, i, e)
2584 {
2585 vec<seq_entry> seq;
2586 seq.create (4);
2587 auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2588 bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2589 if (bitmap_empty_p (refs_not_in_seq))
2590 {
2591 seq.release ();
2592 break;
2593 }
2594 auto_bitmap fully_visited;
2595 int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2596 seq, refs_not_in_seq,
2597 refs_not_supported, false,
2598 fully_visited);
2599 if (res != 1)
2600 {
2601 bitmap_copy (refs_not_supported, mem_refs);
2602 seq.release ();
2603 break;
2604 }
2605 sms.safe_push (std::make_pair (e, seq));
2606 }
2607
2608 /* Prune pruned mem_refs from earlier processed exits. */
2609 bool changed = !bitmap_empty_p (refs_not_supported);
2610 while (changed)
2611 {
2612 changed = false;
2613 std::pair<edge, vec<seq_entry> > *seq;
2614 FOR_EACH_VEC_ELT (sms, i, seq)
2615 {
2616 bool need_to_push = false;
2617 for (unsigned i = 0; i < seq->second.length (); ++i)
2618 {
2619 sm_kind kind = seq->second[i].second;
2620 if (kind == sm_other && seq->second[i].from == NULL_TREE)
2621 break;
2622 unsigned id = seq->second[i].first;
2623 unsigned new_idx;
2624 if (kind == sm_ord
2625 && bitmap_bit_p (refs_not_supported, id))
2626 {
2627 seq->second[i].second = sm_other;
2628 gcc_assert (seq->second[i].from == NULL_TREE);
2629 need_to_push = true;
2630 }
2631 else if (need_to_push
2632 && !sm_seq_push_down (seq->second, i, &new_idx))
2633 {
2634 /* We need to push down both sm_ord and sm_other
2635 but for the latter we need to disqualify all
2636 following refs. */
2637 if (kind == sm_ord)
2638 {
2639 if (bitmap_set_bit (refs_not_supported, id))
2640 changed = true;
2641 seq->second[new_idx].second = sm_other;
2642 }
2643 else
2644 {
2645 for (unsigned j = seq->second.length () - 1;
2646 j > new_idx; --j)
2647 if (seq->second[j].second == sm_ord
2648 && bitmap_set_bit (refs_not_supported,
2649 seq->second[j].first))
2650 changed = true;
2651 seq->second.truncate (new_idx);
2652 break;
2653 }
2654 }
2655 }
2656 }
2657 }
2658 std::pair<edge, vec<seq_entry> > *seq;
2659 FOR_EACH_VEC_ELT (sms, i, seq)
2660 {
2661 /* Prune sm_other from the end. */
2662 while (!seq->second.is_empty ()
2663 && seq->second.last ().second == sm_other)
2664 seq->second.pop ();
2665 /* Prune duplicates from the start. */
2666 auto_bitmap seen (&lim_bitmap_obstack);
2667 unsigned j, k;
2668 for (j = k = 0; j < seq->second.length (); ++j)
2669 if (bitmap_set_bit (seen, seq->second[j].first))
2670 {
2671 if (k != j)
2672 seq->second[k] = seq->second[j];
2673 ++k;
2674 }
2675 seq->second.truncate (k);
2676 /* And verify. */
2677 seq_entry *e;
2678 FOR_EACH_VEC_ELT (seq->second, j, e)
2679 gcc_assert (e->second == sm_ord
2680 || (e->second == sm_other && e->from != NULL_TREE));
2681 }
2682
2683 /* Verify dependence for refs we cannot handle with the order preserving
2684 code (refs_not_supported) or prune them from mem_refs. */
2685 auto_vec<seq_entry> unord_refs;
2686 EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
2687 {
2688 ref = memory_accesses.refs_list[i];
2689 if (!ref_indep_loop_p (loop, ref, sm_waw))
2690 bitmap_clear_bit (mem_refs, i);
2691 /* We've now verified store order for ref with respect to all other
2692 stores in the loop does not matter. */
2693 else
2694 unord_refs.safe_push (seq_entry (i, sm_unord));
2695 }
2696
2697 hash_map<im_mem_ref *, sm_aux *> aux_map;
2698
2699 /* Execute SM but delay the store materialization for ordered
2700 sequences on exit. */
2701 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2702 {
2703 ref = memory_accesses.refs_list[i];
2704 execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i));
2705 }
2706
2707 /* Materialize ordered store sequences on exits. */
2708 FOR_EACH_VEC_ELT (exits, i, e)
2709 {
2710 edge append_cond_position = NULL;
2711 edge last_cond_fallthru = NULL;
2712 if (i < sms.length ())
2713 {
2714 gcc_assert (sms[i].first == e);
2715 execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
2716 append_cond_position, last_cond_fallthru);
2717 sms[i].second.release ();
2718 }
2719 if (!unord_refs.is_empty ())
2720 execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
2721 append_cond_position, last_cond_fallthru);
2722 /* Commit edge inserts here to preserve the order of stores
2723 when an exit exits multiple loops. */
2724 gsi_commit_one_edge_insert (e, NULL);
2725 }
2726
2727 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2728 iter != aux_map.end (); ++iter)
2729 delete (*iter).second;
2730 }
2731
2732 class ref_always_accessed
2733 {
2734 public:
2735 ref_always_accessed (class loop *loop_, bool stored_p_)
2736 : loop (loop_), stored_p (stored_p_) {}
2737 bool operator () (mem_ref_loc *loc);
2738 class loop *loop;
2739 bool stored_p;
2740 };
2741
2742 bool
2743 ref_always_accessed::operator () (mem_ref_loc *loc)
2744 {
2745 class loop *must_exec;
2746
2747 struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2748 if (!lim_data)
2749 return false;
2750
2751 /* If we require an always executed store make sure the statement
2752 is a store. */
2753 if (stored_p)
2754 {
2755 tree lhs = gimple_get_lhs (loc->stmt);
2756 if (!lhs
2757 || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
2758 return false;
2759 }
2760
2761 must_exec = lim_data->always_executed_in;
2762 if (!must_exec)
2763 return false;
2764
2765 if (must_exec == loop
2766 || flow_loop_nested_p (must_exec, loop))
2767 return true;
2768
2769 return false;
2770 }
2771
2772 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2773 make sure REF is always stored to in LOOP. */
2774
2775 static bool
2776 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
2777 {
2778 return for_all_locs_in_loop (loop, ref,
2779 ref_always_accessed (loop, stored_p));
2780 }
2781
2782 /* Returns true if REF1 and REF2 are independent. */
2783
2784 static bool
2785 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
2786 {
2787 if (ref1 == ref2)
2788 return true;
2789
2790 if (dump_file && (dump_flags & TDF_DETAILS))
2791 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2792 ref1->id, ref2->id);
2793
2794 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
2795 {
2796 if (dump_file && (dump_flags & TDF_DETAILS))
2797 fprintf (dump_file, "dependent.\n");
2798 return false;
2799 }
2800 else
2801 {
2802 if (dump_file && (dump_flags & TDF_DETAILS))
2803 fprintf (dump_file, "independent.\n");
2804 return true;
2805 }
2806 }
2807
2808 /* Returns true if REF is independent on all other accessess in LOOP.
2809 KIND specifies the kind of dependence to consider.
2810 lim_raw assumes REF is not stored in LOOP and disambiguates RAW
2811 dependences so if true REF can be hoisted out of LOOP
2812 sm_war disambiguates a store REF against all other loads to see
2813 whether the store can be sunk across loads out of LOOP
2814 sm_waw disambiguates a store REF against all other stores to see
2815 whether the store can be sunk across stores out of LOOP. */
2816
2817 static bool
2818 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
2819 {
2820 bool indep_p = true;
2821 bitmap refs_to_check;
2822
2823 if (kind == sm_war)
2824 refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
2825 else
2826 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2827
2828 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
2829 || ref->mem.ref == error_mark_node)
2830 indep_p = false;
2831 else
2832 {
2833 /* tri-state, { unknown, independent, dependent } */
2834 dep_state state = query_loop_dependence (loop, ref, kind);
2835 if (state != dep_unknown)
2836 return state == dep_independent ? true : false;
2837
2838 class loop *inner = loop->inner;
2839 while (inner)
2840 {
2841 if (!ref_indep_loop_p (inner, ref, kind))
2842 {
2843 indep_p = false;
2844 break;
2845 }
2846 inner = inner->next;
2847 }
2848
2849 if (indep_p)
2850 {
2851 unsigned i;
2852 bitmap_iterator bi;
2853 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2854 {
2855 im_mem_ref *aref = memory_accesses.refs_list[i];
2856 if (aref->mem.ref == error_mark_node)
2857 {
2858 gimple *stmt = aref->accesses_in_loop[0].stmt;
2859 if ((kind == sm_war
2860 && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
2861 kind != sm_waw))
2862 || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
2863 kind != sm_waw))
2864 {
2865 indep_p = false;
2866 break;
2867 }
2868 }
2869 else if (!refs_independent_p (ref, aref, kind != sm_waw))
2870 {
2871 indep_p = false;
2872 break;
2873 }
2874 }
2875 }
2876 }
2877
2878 if (dump_file && (dump_flags & TDF_DETAILS))
2879 fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
2880 kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
2881 ref->id, loop->num, indep_p ? "independent" : "dependent");
2882
2883 /* Record the computed result in the cache. */
2884 record_loop_dependence (loop, ref, kind,
2885 indep_p ? dep_independent : dep_dependent);
2886
2887 return indep_p;
2888 }
2889
2890
2891 /* Returns true if we can perform store motion of REF from LOOP. */
2892
2893 static bool
2894 can_sm_ref_p (class loop *loop, im_mem_ref *ref)
2895 {
2896 tree base;
2897
2898 /* Can't hoist unanalyzable refs. */
2899 if (!MEM_ANALYZABLE (ref))
2900 return false;
2901
2902 /* Can't hoist/sink aggregate copies. */
2903 if (ref->mem.ref == error_mark_node)
2904 return false;
2905
2906 /* It should be movable. */
2907 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2908 || TREE_THIS_VOLATILE (ref->mem.ref)
2909 || !for_each_index (&ref->mem.ref, may_move_till, loop))
2910 return false;
2911
2912 /* If it can throw fail, we do not properly update EH info. */
2913 if (tree_could_throw_p (ref->mem.ref))
2914 return false;
2915
2916 /* If it can trap, it must be always executed in LOOP.
2917 Readonly memory locations may trap when storing to them, but
2918 tree_could_trap_p is a predicate for rvalues, so check that
2919 explicitly. */
2920 base = get_base_address (ref->mem.ref);
2921 if ((tree_could_trap_p (ref->mem.ref)
2922 || (DECL_P (base) && TREE_READONLY (base)))
2923 /* ??? We can at least use false here, allowing loads? We
2924 are forcing conditional stores if the ref is not always
2925 stored to later anyway. So this would only guard
2926 the load we need to emit. Thus when the ref is not
2927 loaded we can elide this completely? */
2928 && !ref_always_accessed_p (loop, ref, true))
2929 return false;
2930
2931 /* Verify all loads of ref can be hoisted. */
2932 if (ref->loaded
2933 && bitmap_bit_p (ref->loaded, loop->num)
2934 && !ref_indep_loop_p (loop, ref, lim_raw))
2935 return false;
2936
2937 /* Verify the candidate can be disambiguated against all loads,
2938 that is, we can elide all in-loop stores. Disambiguation
2939 against stores is done later when we cannot guarantee preserving
2940 the order of stores. */
2941 if (!ref_indep_loop_p (loop, ref, sm_war))
2942 return false;
2943
2944 return true;
2945 }
2946
2947 /* Marks the references in LOOP for that store motion should be performed
2948 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2949 motion was performed in one of the outer loops. */
2950
2951 static void
2952 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2953 {
2954 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2955 unsigned i;
2956 bitmap_iterator bi;
2957 im_mem_ref *ref;
2958
2959 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2960 {
2961 ref = memory_accesses.refs_list[i];
2962 if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
2963 bitmap_set_bit (refs_to_sm, i);
2964 }
2965 }
2966
2967 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2968 for a store motion optimization (i.e. whether we can insert statement
2969 on its exits). */
2970
2971 static bool
2972 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
2973 const vec<edge> &exits)
2974 {
2975 unsigned i;
2976 edge ex;
2977
2978 FOR_EACH_VEC_ELT (exits, i, ex)
2979 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2980 return false;
2981
2982 return true;
2983 }
2984
2985 /* Try to perform store motion for all memory references modified inside
2986 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2987 store motion was executed in one of the outer loops. */
2988
2989 static void
2990 store_motion_loop (class loop *loop, bitmap sm_executed)
2991 {
2992 auto_vec<edge> exits = get_loop_exit_edges (loop);
2993 class loop *subloop;
2994 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2995
2996 if (loop_suitable_for_sm (loop, exits))
2997 {
2998 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2999 if (!bitmap_empty_p (sm_in_loop))
3000 hoist_memory_references (loop, sm_in_loop, exits);
3001 }
3002
3003 bitmap_ior_into (sm_executed, sm_in_loop);
3004 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3005 store_motion_loop (subloop, sm_executed);
3006 bitmap_and_compl_into (sm_executed, sm_in_loop);
3007 BITMAP_FREE (sm_in_loop);
3008 }
3009
3010 /* Try to perform store motion for all memory references modified inside
3011 loops. */
3012
3013 static void
3014 do_store_motion (void)
3015 {
3016 class loop *loop;
3017 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3018
3019 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3020 store_motion_loop (loop, sm_executed);
3021
3022 BITMAP_FREE (sm_executed);
3023 }
3024
3025 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3026 for each such basic block bb records the outermost loop for that execution
3027 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
3028 blocks that contain a nonpure call. */
3029
3030 static void
3031 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3032 {
3033 basic_block bb = NULL, last = NULL;
3034 edge e;
3035 class loop *inn_loop = loop;
3036
3037 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
3038 {
3039 auto_vec<basic_block, 64> worklist;
3040 worklist.reserve_exact (loop->num_nodes);
3041 worklist.quick_push (loop->header);
3042 do
3043 {
3044 edge_iterator ei;
3045 bb = worklist.pop ();
3046
3047 if (!flow_bb_inside_loop_p (inn_loop, bb))
3048 {
3049 /* When we are leaving a possibly infinite inner loop
3050 we have to stop processing. */
3051 if (!finite_loop_p (inn_loop))
3052 break;
3053 /* If the loop was finite we can continue with processing
3054 the loop we exited to. */
3055 inn_loop = bb->loop_father;
3056 }
3057
3058 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3059 last = bb;
3060
3061 if (bitmap_bit_p (contains_call, bb->index))
3062 break;
3063
3064 /* If LOOP exits from this BB stop processing. */
3065 FOR_EACH_EDGE (e, ei, bb->succs)
3066 if (!flow_bb_inside_loop_p (loop, e->dest))
3067 break;
3068 if (e)
3069 break;
3070
3071 /* A loop might be infinite (TODO use simple loop analysis
3072 to disprove this if possible). */
3073 if (bb->flags & BB_IRREDUCIBLE_LOOP)
3074 break;
3075
3076 if (bb->loop_father->header == bb)
3077 /* Record that we enter into a subloop since it might not
3078 be finite. */
3079 /* ??? Entering into a not always executed subloop makes
3080 fill_always_executed_in quadratic in loop depth since
3081 we walk those loops N times. This is not a problem
3082 in practice though, see PR102253 for a worst-case testcase. */
3083 inn_loop = bb->loop_father;
3084
3085 /* Walk the body of LOOP sorted by dominance relation. Additionally,
3086 if a basic block S dominates the latch, then only blocks dominated
3087 by S are after it.
3088 This is get_loop_body_in_dom_order using a worklist algorithm and
3089 stopping once we are no longer interested in visiting further
3090 blocks. */
3091 unsigned old_len = worklist.length ();
3092 unsigned postpone = 0;
3093 for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3094 son;
3095 son = next_dom_son (CDI_DOMINATORS, son))
3096 {
3097 if (!flow_bb_inside_loop_p (loop, son))
3098 continue;
3099 if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3100 postpone = worklist.length ();
3101 worklist.quick_push (son);
3102 }
3103 if (postpone)
3104 /* Postponing the block that dominates the latch means
3105 processing it last and thus putting it earliest in the
3106 worklist. */
3107 std::swap (worklist[old_len], worklist[postpone]);
3108 }
3109 while (!worklist.is_empty ());
3110
3111 while (1)
3112 {
3113 if (dump_enabled_p ())
3114 dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3115 last->index, loop->num);
3116 SET_ALWAYS_EXECUTED_IN (last, loop);
3117 if (last == loop->header)
3118 break;
3119 last = get_immediate_dominator (CDI_DOMINATORS, last);
3120 }
3121 }
3122
3123 for (loop = loop->inner; loop; loop = loop->next)
3124 fill_always_executed_in_1 (loop, contains_call);
3125 }
3126
3127 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3128 for each such basic block bb records the outermost loop for that execution
3129 of its header implies execution of bb. */
3130
3131 static void
3132 fill_always_executed_in (void)
3133 {
3134 basic_block bb;
3135 class loop *loop;
3136
3137 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3138 bitmap_clear (contains_call);
3139 FOR_EACH_BB_FN (bb, cfun)
3140 {
3141 gimple_stmt_iterator gsi;
3142 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3143 {
3144 if (nonpure_call_p (gsi_stmt (gsi)))
3145 break;
3146 }
3147
3148 if (!gsi_end_p (gsi))
3149 bitmap_set_bit (contains_call, bb->index);
3150 }
3151
3152 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3153 fill_always_executed_in_1 (loop, contains_call);
3154 }
3155
3156
3157 /* Compute the global information needed by the loop invariant motion pass. */
3158
3159 static void
3160 tree_ssa_lim_initialize (bool store_motion)
3161 {
3162 unsigned i;
3163
3164 bitmap_obstack_initialize (&lim_bitmap_obstack);
3165 gcc_obstack_init (&mem_ref_obstack);
3166 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3167
3168 if (flag_tm)
3169 compute_transaction_bits ();
3170
3171 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3172 memory_accesses.refs_list.create (100);
3173 /* Allocate a special, unanalyzable mem-ref with ID zero. */
3174 memory_accesses.refs_list.quick_push
3175 (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3176
3177 memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3178 memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun));
3179 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3180 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3181 if (store_motion)
3182 {
3183 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3184 memory_accesses.all_refs_stored_in_loop.quick_grow
3185 (number_of_loops (cfun));
3186 }
3187
3188 for (i = 0; i < number_of_loops (cfun); i++)
3189 {
3190 bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3191 &lim_bitmap_obstack);
3192 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3193 &lim_bitmap_obstack);
3194 if (store_motion)
3195 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3196 &lim_bitmap_obstack);
3197 }
3198
3199 memory_accesses.ttae_cache = NULL;
3200
3201 /* Initialize bb_loop_postorder with a mapping from loop->num to
3202 its postorder index. */
3203 i = 0;
3204 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3205 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3206 bb_loop_postorder[loop->num] = i++;
3207 }
3208
3209 /* Cleans up after the invariant motion pass. */
3210
3211 static void
3212 tree_ssa_lim_finalize (void)
3213 {
3214 basic_block bb;
3215 unsigned i;
3216 im_mem_ref *ref;
3217
3218 FOR_EACH_BB_FN (bb, cfun)
3219 SET_ALWAYS_EXECUTED_IN (bb, NULL);
3220
3221 bitmap_obstack_release (&lim_bitmap_obstack);
3222 delete lim_aux_data_map;
3223
3224 delete memory_accesses.refs;
3225 memory_accesses.refs = NULL;
3226
3227 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3228 memref_free (ref);
3229 memory_accesses.refs_list.release ();
3230 obstack_free (&mem_ref_obstack, NULL);
3231
3232 memory_accesses.refs_loaded_in_loop.release ();
3233 memory_accesses.refs_stored_in_loop.release ();
3234 memory_accesses.all_refs_stored_in_loop.release ();
3235
3236 if (memory_accesses.ttae_cache)
3237 free_affine_expand_cache (&memory_accesses.ttae_cache);
3238
3239 free (bb_loop_postorder);
3240 }
3241
3242 /* Moves invariants from loops. Only "expensive" invariants are moved out --
3243 i.e. those that are likely to be win regardless of the register pressure.
3244 Only perform store motion if STORE_MOTION is true. */
3245
3246 unsigned int
3247 loop_invariant_motion_in_fun (function *fun, bool store_motion)
3248 {
3249 unsigned int todo = 0;
3250
3251 tree_ssa_lim_initialize (store_motion);
3252
3253 /* Gathers information about memory accesses in the loops. */
3254 analyze_memory_references (store_motion);
3255
3256 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3257 fill_always_executed_in ();
3258
3259 int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3260 int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3261
3262 /* For each statement determine the outermost loop in that it is
3263 invariant and cost for computing the invariant. */
3264 for (int i = 0; i < n; ++i)
3265 compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3266
3267 /* Execute store motion. Force the necessary invariants to be moved
3268 out of the loops as well. */
3269 if (store_motion)
3270 do_store_motion ();
3271
3272 free (rpo);
3273 rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3274 n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3275
3276 /* Move the expressions that are expensive enough. */
3277 for (int i = 0; i < n; ++i)
3278 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3279
3280 free (rpo);
3281
3282 gsi_commit_edge_inserts ();
3283 if (need_ssa_update_p (fun))
3284 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3285
3286 tree_ssa_lim_finalize ();
3287
3288 return todo;
3289 }
3290
3291 /* Loop invariant motion pass. */
3292
3293 namespace {
3294
3295 const pass_data pass_data_lim =
3296 {
3297 GIMPLE_PASS, /* type */
3298 "lim", /* name */
3299 OPTGROUP_LOOP, /* optinfo_flags */
3300 TV_LIM, /* tv_id */
3301 PROP_cfg, /* properties_required */
3302 0, /* properties_provided */
3303 0, /* properties_destroyed */
3304 0, /* todo_flags_start */
3305 0, /* todo_flags_finish */
3306 };
3307
3308 class pass_lim : public gimple_opt_pass
3309 {
3310 public:
3311 pass_lim (gcc::context *ctxt)
3312 : gimple_opt_pass (pass_data_lim, ctxt)
3313 {}
3314
3315 /* opt_pass methods: */
3316 opt_pass * clone () { return new pass_lim (m_ctxt); }
3317 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
3318 virtual unsigned int execute (function *);
3319
3320 }; // class pass_lim
3321
3322 unsigned int
3323 pass_lim::execute (function *fun)
3324 {
3325 bool in_loop_pipeline = scev_initialized_p ();
3326 if (!in_loop_pipeline)
3327 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3328
3329 if (number_of_loops (fun) <= 1)
3330 return 0;
3331 unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3332
3333 if (!in_loop_pipeline)
3334 loop_optimizer_finalize ();
3335 else
3336 scev_reset ();
3337 return todo;
3338 }
3339
3340 } // anon namespace
3341
3342 gimple_opt_pass *
3343 make_pass_lim (gcc::context *ctxt)
3344 {
3345 return new pass_lim (ctxt);
3346 }
3347
3348