]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-im.c
Put the CL into the right dir.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-im.c
CommitLineData
7d23383d 1/* Loop invariant motion.
fbd26352 2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
48e1416a 3
7d23383d 4This file is part of GCC.
48e1416a 5
7d23383d 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
7d23383d 9later version.
48e1416a 10
7d23383d 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
48e1416a 15
7d23383d 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
7d23383d 19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
9ef16211 23#include "backend.h"
7d23383d 24#include "tree.h"
9ef16211 25#include "gimple.h"
7c29e30e 26#include "cfghooks.h"
27#include "tree-pass.h"
9ef16211 28#include "ssa.h"
7c29e30e 29#include "gimple-pretty-print.h"
b20a8bb4 30#include "fold-const.h"
94ea8568 31#include "cfganal.h"
bc61cadb 32#include "tree-eh.h"
a8783bee 33#include "gimplify.h"
dcf1a1ec 34#include "gimple-iterator.h"
073c1fd5 35#include "tree-cfg.h"
05d9c18a 36#include "tree-ssa-loop-manip.h"
073c1fd5 37#include "tree-ssa-loop.h"
38#include "tree-into-ssa.h"
7d23383d 39#include "cfgloop.h"
40#include "domwalk.h"
41#include "params.h"
063a8bce 42#include "tree-affine.h"
a973ed42 43#include "tree-ssa-propagate.h"
e797f49f 44#include "trans-mem.h"
f6a34e3f 45#include "gimple-fold.h"
b11dc37d 46#include "tree-scalar-evolution.h"
5b9b62c9 47#include "tree-ssa-loop-niter.h"
1b94b295 48#include "alias.h"
49#include "builtins.h"
50#include "tree-dfa.h"
7d23383d 51
ca53beb1 52/* TODO: Support for predicated code motion. I.e.
53
54 while (1)
55 {
56 if (cond)
57 {
58 a = inv;
59 something;
60 }
61 }
62
61025ec0 63 Where COND and INV are invariants, but evaluating INV may trap or be
ca53beb1 64 invalid from some other reason if !COND. This may be transformed to
65
66 if (cond)
67 a = inv;
68 while (1)
69 {
70 if (cond)
71 something;
72 } */
73
7d23383d 74/* The auxiliary data kept for each statement. */
75
76struct lim_aux_data
77{
2e966e2a 78 class loop *max_loop; /* The outermost loop in that the statement
7d23383d 79 is invariant. */
80
2e966e2a 81 class loop *tgt_loop; /* The loop out of that we want to move the
7d23383d 82 invariant. */
83
2e966e2a 84 class loop *always_executed_in;
7d23383d 85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
88
7d23383d 89 unsigned cost; /* Cost of the computation performed by the
90 statement. */
91
735d8b51 92 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
93
94 vec<gimple *> depends; /* Vector of statements that must be also
a2c22c0d 95 hoisted out of the loop when this statement
96 is hoisted; i.e. those that define the
97 operands of the statement and are inside of
98 the MAX_LOOP loop. */
7d23383d 99};
100
75a70cf9 101/* Maps statements to their lim_aux_data. */
102
42acab1c 103static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
7d23383d 104
063a8bce 105/* Description of a memory reference location. */
7d23383d 106
4a7ff262 107struct mem_ref_loc
7d23383d 108{
109 tree *ref; /* The reference itself. */
42acab1c 110 gimple *stmt; /* The statement in that it occurs. */
4a7ff262 111};
063a8bce 112
063a8bce 113
063a8bce 114/* Description of a memory reference. */
115
251317e4 116class im_mem_ref
55a03692 117{
251317e4 118public:
7eae52f1 119 unsigned id : 30; /* ID assigned to the memory reference
063a8bce 120 (its index in memory_accesses.refs_list) */
1b94b295 121 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
7eae52f1 122 unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
55a03692 123 hashval_t hash; /* Its hash value. */
09f4cf62 124
125 /* The memory access itself and associated caching of alias-oracle
126 query meta-data. */
127 ao_ref mem;
128
3e48928c 129 bitmap stored; /* The set of loops in that this memory location
063a8bce 130 is stored to. */
3e48928c 131 vec<mem_ref_loc> accesses_in_loop;
063a8bce 132 /* The locations of the accesses. Vector
133 indexed by the loop number. */
063a8bce 134
135 /* The following sets are computed on demand. We keep both set and
136 its complement, so that we know whether the information was
137 already computed or not. */
feba4360 138 bitmap_head indep_loop; /* The set of loops in that the memory
063a8bce 139 reference is independent, meaning:
140 If it is stored in the loop, this store
141 is independent on all other loads and
142 stores.
143 If it is only loaded, then it is independent
144 on all stores in the loop. */
feba4360 145 bitmap_head dep_loop; /* The complement of INDEP_LOOP. */
4a7ff262 146};
063a8bce 147
8f9829e8 148/* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
149 to record (in)dependence against stores in the loop and its subloops, the
150 second to record (in)dependence against all references in the loop
151 and its subloops. */
152#define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
063a8bce 153
3e871d4d 154/* Mem_ref hashtable helpers. */
155
770ff93b 156struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
3e871d4d 157{
1b94b295 158 typedef ao_ref *compare_type;
9969c043 159 static inline hashval_t hash (const im_mem_ref *);
1b94b295 160 static inline bool equal (const im_mem_ref *, const ao_ref *);
3e871d4d 161};
162
2e966e2a 163/* A hash function for class im_mem_ref object OBJ. */
3e871d4d 164
165inline hashval_t
9969c043 166mem_ref_hasher::hash (const im_mem_ref *mem)
3e871d4d 167{
168 return mem->hash;
169}
170
2e966e2a 171/* An equality function for class im_mem_ref object MEM1 with
3e871d4d 172 memory reference OBJ2. */
173
174inline bool
1b94b295 175mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
176{
177 if (obj2->max_size_known_p ())
7eae52f1 178 return (mem1->ref_decomposed
179 && operand_equal_p (mem1->mem.base, obj2->base, 0)
1b94b295 180 && known_eq (mem1->mem.offset, obj2->offset)
181 && known_eq (mem1->mem.size, obj2->size)
182 && known_eq (mem1->mem.max_size, obj2->max_size)
183 && mem1->mem.volatile_p == obj2->volatile_p
25971ca0 184 && (mem1->mem.ref_alias_set == obj2->ref_alias_set
185 /* We are not canonicalizing alias-sets but for the
186 special-case we didn't canonicalize yet and the
187 incoming ref is a alias-set zero MEM we pick
188 the correct one already. */
189 || (!mem1->ref_canonical
190 && (TREE_CODE (obj2->ref) == MEM_REF
191 || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
192 && obj2->ref_alias_set == 0)
193 /* Likewise if there's a canonical ref with alias-set zero. */
194 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
1b94b295 195 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
196 TREE_TYPE (obj2->ref)));
197 else
198 return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
3e871d4d 199}
063a8bce 200
063a8bce 201
202/* Description of memory accesses in loops. */
203
204static struct
205{
206 /* The hash table of memory references accessed in loops. */
c1f445d2 207 hash_table<mem_ref_hasher> *refs;
063a8bce 208
209 /* The list of memory references. */
4a7ff262 210 vec<im_mem_ref *> refs_list;
063a8bce 211
212 /* The set of memory references accessed in each loop. */
feba4360 213 vec<bitmap_head> refs_in_loop;
063a8bce 214
8f9829e8 215 /* The set of memory references stored in each loop. */
feba4360 216 vec<bitmap_head> refs_stored_in_loop;
8f9829e8 217
8f9829e8 218 /* The set of memory references stored in each loop, including subloops . */
feba4360 219 vec<bitmap_head> all_refs_stored_in_loop;
063a8bce 220
221 /* Cache for expanding memory addresses. */
5f8841a5 222 hash_map<tree, name_expansion *> *ttae_cache;
063a8bce 223} memory_accesses;
224
4fb07d00 225/* Obstack for the bitmaps in the above data structures. */
226static bitmap_obstack lim_bitmap_obstack;
3e48928c 227static obstack mem_ref_obstack;
4fb07d00 228
2e966e2a 229static bool ref_indep_loop_p (class loop *, im_mem_ref *);
230static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
7d23383d 231
232/* Minimum cost of an expensive expression. */
233#define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
234
2fd20c29 235/* The outermost loop for which execution of the header guarantees that the
7d23383d 236 block will be executed. */
2e966e2a 237#define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
2fd20c29 238#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
7d23383d 239
04911505 240/* ID of the shared unanalyzable mem. */
241#define UNANALYZABLE_MEM_ID 0
242
0766b2c0 243/* Whether the reference was analyzable. */
04911505 244#define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
0766b2c0 245
75a70cf9 246static struct lim_aux_data *
42acab1c 247init_lim_data (gimple *stmt)
75a70cf9 248{
06ecf488 249 lim_aux_data *p = XCNEW (struct lim_aux_data);
250 lim_aux_data_map->put (stmt, p);
75a70cf9 251
06ecf488 252 return p;
75a70cf9 253}
254
255static struct lim_aux_data *
42acab1c 256get_lim_data (gimple *stmt)
75a70cf9 257{
06ecf488 258 lim_aux_data **p = lim_aux_data_map->get (stmt);
75a70cf9 259 if (!p)
260 return NULL;
261
06ecf488 262 return *p;
75a70cf9 263}
264
265/* Releases the memory occupied by DATA. */
266
267static void
268free_lim_aux_data (struct lim_aux_data *data)
269{
9af5ce0c 270 data->depends.release ();
75a70cf9 271 free (data);
272}
273
274static void
42acab1c 275clear_lim_data (gimple *stmt)
75a70cf9 276{
06ecf488 277 lim_aux_data **p = lim_aux_data_map->get (stmt);
75a70cf9 278 if (!p)
279 return;
280
06ecf488 281 free_lim_aux_data (*p);
75a70cf9 282 *p = NULL;
283}
284
48e1416a 285
f86b328b 286/* The possibilities of statement movement. */
287enum move_pos
288 {
289 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
290 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
291 become executed -- memory accesses, ... */
292 MOVE_POSSIBLE /* Unlimited movement. */
293 };
aed164c3 294
7d23383d 295
296/* If it is possible to hoist the statement STMT unconditionally,
297 returns MOVE_POSSIBLE.
298 If it is possible to hoist the statement STMT, but we must avoid making
299 it executed if it would not be executed in the original program (e.g.
300 because it may trap), return MOVE_PRESERVE_EXECUTION.
301 Otherwise return MOVE_IMPOSSIBLE. */
302
07c03fb0 303enum move_pos
42acab1c 304movement_possibility (gimple *stmt)
7d23383d 305{
75a70cf9 306 tree lhs;
307 enum move_pos ret = MOVE_POSSIBLE;
7d23383d 308
309 if (flag_unswitch_loops
75a70cf9 310 && gimple_code (stmt) == GIMPLE_COND)
7d23383d 311 {
312 /* If we perform unswitching, force the operands of the invariant
313 condition to be moved out of the loop. */
7d23383d 314 return MOVE_POSSIBLE;
315 }
316
9bf0a3f9 317 if (gimple_code (stmt) == GIMPLE_PHI
318 && gimple_phi_num_args (stmt) <= 2
7c782c9b 319 && !virtual_operand_p (gimple_phi_result (stmt))
9bf0a3f9 320 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
321 return MOVE_POSSIBLE;
322
75a70cf9 323 if (gimple_get_lhs (stmt) == NULL_TREE)
7d23383d 324 return MOVE_IMPOSSIBLE;
325
dd277d48 326 if (gimple_vdef (stmt))
063a8bce 327 return MOVE_IMPOSSIBLE;
328
75a70cf9 329 if (stmt_ends_bb_p (stmt)
330 || gimple_has_volatile_ops (stmt)
331 || gimple_has_side_effects (stmt)
aac19106 332 || stmt_could_throw_p (cfun, stmt))
7d23383d 333 return MOVE_IMPOSSIBLE;
334
75a70cf9 335 if (is_gimple_call (stmt))
ca53beb1 336 {
337 /* While pure or const call is guaranteed to have no side effects, we
338 cannot move it arbitrarily. Consider code like
339
340 char *s = something ();
341
342 while (1)
343 {
344 if (s)
345 t = strlen (s);
346 else
347 t = 0;
348 }
349
350 Here the strlen call cannot be moved out of the loop, even though
351 s is invariant. In addition to possibly creating a call with
352 invalid arguments, moving out a function call that is not executed
353 may cause performance regressions in case the call is costly and
354 not executed at all. */
75a70cf9 355 ret = MOVE_PRESERVE_EXECUTION;
356 lhs = gimple_call_lhs (stmt);
ca53beb1 357 }
75a70cf9 358 else if (is_gimple_assign (stmt))
359 lhs = gimple_assign_lhs (stmt);
360 else
361 return MOVE_IMPOSSIBLE;
362
363 if (TREE_CODE (lhs) == SSA_NAME
364 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
365 return MOVE_IMPOSSIBLE;
366
367 if (TREE_CODE (lhs) != SSA_NAME
368 || gimple_could_trap_p (stmt))
369 return MOVE_PRESERVE_EXECUTION;
370
de60f90c 371 /* Non local loads in a transaction cannot be hoisted out. Well,
372 unless the load happens on every path out of the loop, but we
373 don't take this into account yet. */
374 if (flag_tm
375 && gimple_in_transaction (stmt)
376 && gimple_assign_single_p (stmt))
377 {
378 tree rhs = gimple_assign_rhs1 (stmt);
379 if (DECL_P (rhs) && is_global_var (rhs))
380 {
381 if (dump_file)
382 {
383 fprintf (dump_file, "Cannot hoist conditional load of ");
384 print_generic_expr (dump_file, rhs, TDF_SLIM);
385 fprintf (dump_file, " because it is in a transaction.\n");
386 }
387 return MOVE_IMPOSSIBLE;
388 }
389 }
390
75a70cf9 391 return ret;
7d23383d 392}
393
394/* Suppose that operand DEF is used inside the LOOP. Returns the outermost
91275768 395 loop to that we could move the expression using DEF if it did not have
7d23383d 396 other operands, i.e. the outermost loop enclosing LOOP in that the value
397 of DEF is invariant. */
398
2e966e2a 399static class loop *
400outermost_invariant_loop (tree def, class loop *loop)
7d23383d 401{
42acab1c 402 gimple *def_stmt;
7d23383d 403 basic_block def_bb;
2e966e2a 404 class loop *max_loop;
75a70cf9 405 struct lim_aux_data *lim_data;
7d23383d 406
75a70cf9 407 if (!def)
7d23383d 408 return superloop_at_depth (loop, 1);
409
75a70cf9 410 if (TREE_CODE (def) != SSA_NAME)
411 {
412 gcc_assert (is_gimple_min_invariant (def));
413 return superloop_at_depth (loop, 1);
414 }
415
7d23383d 416 def_stmt = SSA_NAME_DEF_STMT (def);
75a70cf9 417 def_bb = gimple_bb (def_stmt);
7d23383d 418 if (!def_bb)
419 return superloop_at_depth (loop, 1);
420
421 max_loop = find_common_loop (loop, def_bb->loop_father);
422
75a70cf9 423 lim_data = get_lim_data (def_stmt);
424 if (lim_data != NULL && lim_data->max_loop != NULL)
7d23383d 425 max_loop = find_common_loop (max_loop,
75a70cf9 426 loop_outer (lim_data->max_loop));
7d23383d 427 if (max_loop == loop)
428 return NULL;
9e3536f4 429 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
7d23383d 430
431 return max_loop;
432}
433
7d23383d 434/* DATA is a structure containing information associated with a statement
435 inside LOOP. DEF is one of the operands of this statement.
48e1416a 436
7d23383d 437 Find the outermost loop enclosing LOOP in that value of DEF is invariant
438 and record this in DATA->max_loop field. If DEF itself is defined inside
439 this loop as well (i.e. we need to hoist it out of the loop if we want
440 to hoist the statement represented by DATA), record the statement in that
441 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
442 add the cost of the computation of DEF to the DATA->cost.
48e1416a 443
7d23383d 444 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
445
446static bool
2e966e2a 447add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
7d23383d 448 bool add_cost)
449{
42acab1c 450 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
75a70cf9 451 basic_block def_bb = gimple_bb (def_stmt);
2e966e2a 452 class loop *max_loop;
75a70cf9 453 struct lim_aux_data *def_data;
7d23383d 454
455 if (!def_bb)
456 return true;
457
458 max_loop = outermost_invariant_loop (def, loop);
459 if (!max_loop)
460 return false;
461
462 if (flow_loop_nested_p (data->max_loop, max_loop))
463 data->max_loop = max_loop;
464
75a70cf9 465 def_data = get_lim_data (def_stmt);
466 if (!def_data)
7d23383d 467 return true;
468
469 if (add_cost
470 /* Only add the cost if the statement defining DEF is inside LOOP,
471 i.e. if it is likely that by moving the invariants dependent
472 on it, we will be able to avoid creating a new register for
473 it (since it will be only used in these dependent invariants). */
474 && def_bb->loop_father == loop)
75a70cf9 475 data->cost += def_data->cost;
7d23383d 476
a2c22c0d 477 data->depends.safe_push (def_stmt);
7d23383d 478
479 return true;
480}
481
0766b2c0 482/* Returns an estimate for a cost of statement STMT. The values here
483 are just ad-hoc constants, similar to costs for inlining. */
7d23383d 484
485static unsigned
42acab1c 486stmt_cost (gimple *stmt)
7d23383d 487{
7d23383d 488 /* Always try to create possibilities for unswitching. */
9bf0a3f9 489 if (gimple_code (stmt) == GIMPLE_COND
490 || gimple_code (stmt) == GIMPLE_PHI)
7d23383d 491 return LIM_EXPENSIVE;
492
0766b2c0 493 /* We should be hoisting calls if possible. */
75a70cf9 494 if (is_gimple_call (stmt))
7d23383d 495 {
0766b2c0 496 tree fndecl;
7d23383d 497
498 /* Unless the call is a builtin_constant_p; this always folds to a
499 constant, so moving it is useless. */
75a70cf9 500 fndecl = gimple_call_fndecl (stmt);
a0e9bfbb 501 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
7d23383d 502 return 0;
503
0766b2c0 504 return LIM_EXPENSIVE;
75a70cf9 505 }
506
0766b2c0 507 /* Hoisting memory references out should almost surely be a win. */
508 if (gimple_references_memory_p (stmt))
509 return LIM_EXPENSIVE;
510
75a70cf9 511 if (gimple_code (stmt) != GIMPLE_ASSIGN)
0766b2c0 512 return 1;
7d23383d 513
75a70cf9 514 switch (gimple_assign_rhs_code (stmt))
515 {
7d23383d 516 case MULT_EXPR:
23da77dd 517 case WIDEN_MULT_EXPR:
518 case WIDEN_MULT_PLUS_EXPR:
519 case WIDEN_MULT_MINUS_EXPR:
520 case DOT_PROD_EXPR:
7d23383d 521 case TRUNC_DIV_EXPR:
522 case CEIL_DIV_EXPR:
523 case FLOOR_DIV_EXPR:
524 case ROUND_DIV_EXPR:
525 case EXACT_DIV_EXPR:
526 case CEIL_MOD_EXPR:
527 case FLOOR_MOD_EXPR:
528 case ROUND_MOD_EXPR:
529 case TRUNC_MOD_EXPR:
fd544fc8 530 case RDIV_EXPR:
7d23383d 531 /* Division and multiplication are usually expensive. */
0766b2c0 532 return LIM_EXPENSIVE;
7d23383d 533
790963ba 534 case LSHIFT_EXPR:
535 case RSHIFT_EXPR:
23da77dd 536 case WIDEN_LSHIFT_EXPR:
537 case LROTATE_EXPR:
538 case RROTATE_EXPR:
0766b2c0 539 /* Shifts and rotates are usually expensive. */
540 return LIM_EXPENSIVE;
541
542 case CONSTRUCTOR:
543 /* Make vector construction cost proportional to the number
544 of elements. */
545 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
546
547 case SSA_NAME:
548 case PAREN_EXPR:
549 /* Whether or not something is wrapped inside a PAREN_EXPR
550 should not change move cost. Nor should an intermediate
551 unpropagated SSA name copy. */
552 return 0;
790963ba 553
7d23383d 554 default:
0766b2c0 555 return 1;
7d23383d 556 }
7d23383d 557}
558
063a8bce 559/* Finds the outermost loop between OUTER and LOOP in that the memory reference
560 REF is independent. If REF is not independent in LOOP, NULL is returned
561 instead. */
562
2e966e2a 563static class loop *
564outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
063a8bce 565{
2e966e2a 566 class loop *aloop;
063a8bce 567
3e48928c 568 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
063a8bce 569 return NULL;
570
571 for (aloop = outer;
572 aloop != loop;
573 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
3e48928c 574 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
23298f15 575 && ref_indep_loop_p (aloop, ref))
063a8bce 576 return aloop;
577
23298f15 578 if (ref_indep_loop_p (loop, ref))
063a8bce 579 return loop;
580 else
581 return NULL;
582}
583
584/* If there is a simple load or store to a memory reference in STMT, returns
f0b5f617 585 the location of the memory reference, and sets IS_STORE according to whether
063a8bce 586 it is a store or load. Otherwise, returns NULL. */
587
588static tree *
42acab1c 589simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
063a8bce 590{
6d7105fe 591 tree *lhs, *rhs;
063a8bce 592
6d7105fe 593 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
594 if (!gimple_assign_single_p (stmt))
063a8bce 595 return NULL;
596
75a70cf9 597 lhs = gimple_assign_lhs_ptr (stmt);
6d7105fe 598 rhs = gimple_assign_rhs1_ptr (stmt);
063a8bce 599
6d7105fe 600 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
063a8bce 601 {
063a8bce 602 *is_store = false;
6d7105fe 603 return rhs;
063a8bce 604 }
6d7105fe 605 else if (gimple_vdef (stmt)
606 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
063a8bce 607 {
608 *is_store = true;
609 return lhs;
610 }
611 else
612 return NULL;
613}
614
9bf0a3f9 615/* From a controlling predicate in DOM determine the arguments from
616 the PHI node PHI that are chosen if the predicate evaluates to
617 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
618 they are non-NULL. Returns true if the arguments can be determined,
619 else return false. */
620
621static bool
1a91d914 622extract_true_false_args_from_phi (basic_block dom, gphi *phi,
9bf0a3f9 623 tree *true_arg_p, tree *false_arg_p)
624{
df10fba0 625 edge te, fe;
626 if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
627 &te, &fe))
9bf0a3f9 628 return false;
629
630 if (true_arg_p)
df10fba0 631 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
9bf0a3f9 632 if (false_arg_p)
df10fba0 633 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
9bf0a3f9 634
635 return true;
636}
637
7d23383d 638/* Determine the outermost loop to that it is possible to hoist a statement
639 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
640 the outermost loop in that the value computed by STMT is invariant.
641 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
642 we preserve the fact whether STMT is executed. It also fills other related
643 information to LIM_DATA (STMT).
48e1416a 644
7d23383d 645 The function returns false if STMT cannot be hoisted outside of the loop it
646 is defined in, and true otherwise. */
647
648static bool
42acab1c 649determine_max_movement (gimple *stmt, bool must_preserve_exec)
7d23383d 650{
75a70cf9 651 basic_block bb = gimple_bb (stmt);
2e966e2a 652 class loop *loop = bb->loop_father;
653 class loop *level;
75a70cf9 654 struct lim_aux_data *lim_data = get_lim_data (stmt);
43daa21e 655 tree val;
656 ssa_op_iter iter;
48e1416a 657
7d23383d 658 if (must_preserve_exec)
659 level = ALWAYS_EXECUTED_IN (bb);
660 else
661 level = superloop_at_depth (loop, 1);
662 lim_data->max_loop = level;
663
1a91d914 664 if (gphi *phi = dyn_cast <gphi *> (stmt))
9bf0a3f9 665 {
666 use_operand_p use_p;
667 unsigned min_cost = UINT_MAX;
668 unsigned total_cost = 0;
669 struct lim_aux_data *def_data;
670
671 /* We will end up promoting dependencies to be unconditionally
672 evaluated. For this reason the PHI cost (and thus the
673 cost we remove from the loop by doing the invariant motion)
674 is that of the cheapest PHI argument dependency chain. */
1a91d914 675 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
9bf0a3f9 676 {
677 val = USE_FROM_PTR (use_p);
413fb820 678
9bf0a3f9 679 if (TREE_CODE (val) != SSA_NAME)
413fb820 680 {
681 /* Assign const 1 to constants. */
682 min_cost = MIN (min_cost, 1);
683 total_cost += 1;
684 continue;
685 }
9bf0a3f9 686 if (!add_dependency (val, lim_data, loop, false))
687 return false;
07f7799f 688
42acab1c 689 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
07f7799f 690 if (gimple_bb (def_stmt)
691 && gimple_bb (def_stmt)->loop_father == loop)
9bf0a3f9 692 {
07f7799f 693 def_data = get_lim_data (def_stmt);
694 if (def_data)
695 {
696 min_cost = MIN (min_cost, def_data->cost);
697 total_cost += def_data->cost;
698 }
9bf0a3f9 699 }
700 }
701
07f7799f 702 min_cost = MIN (min_cost, total_cost);
9bf0a3f9 703 lim_data->cost += min_cost;
704
1a91d914 705 if (gimple_phi_num_args (phi) > 1)
9bf0a3f9 706 {
707 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
42acab1c 708 gimple *cond;
9bf0a3f9 709 if (gsi_end_p (gsi_last_bb (dom)))
710 return false;
711 cond = gsi_stmt (gsi_last_bb (dom));
712 if (gimple_code (cond) != GIMPLE_COND)
713 return false;
714 /* Verify that this is an extended form of a diamond and
715 the PHI arguments are completely controlled by the
716 predicate in DOM. */
1a91d914 717 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
9bf0a3f9 718 return false;
719
720 /* Fold in dependencies and cost of the condition. */
721 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
722 {
723 if (!add_dependency (val, lim_data, loop, false))
724 return false;
725 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
726 if (def_data)
47578cb1 727 lim_data->cost += def_data->cost;
9bf0a3f9 728 }
729
730 /* We want to avoid unconditionally executing very expensive
731 operations. As costs for our dependencies cannot be
732 negative just claim we are not invariand for this case.
733 We also are not sure whether the control-flow inside the
734 loop will vanish. */
735 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
736 && !(min_cost != 0
737 && total_cost / min_cost <= 2))
738 return false;
739
740 /* Assume that the control-flow in the loop will vanish.
741 ??? We should verify this and not artificially increase
742 the cost if that is not the case. */
743 lim_data->cost += stmt_cost (stmt);
744 }
745
746 return true;
747 }
748 else
749 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
750 if (!add_dependency (val, lim_data, loop, true))
751 return false;
7d23383d 752
dd277d48 753 if (gimple_vuse (stmt))
063a8bce 754 {
735d8b51 755 im_mem_ref *ref
756 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
757 if (ref
758 && MEM_ANALYZABLE (ref))
063a8bce 759 {
735d8b51 760 lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
761 loop, ref);
063a8bce 762 if (!lim_data->max_loop)
763 return false;
764 }
735d8b51 765 else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
766 return false;
063a8bce 767 }
7d23383d 768
769 lim_data->cost += stmt_cost (stmt);
770
771 return true;
772}
773
774/* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
775 and that one of the operands of this statement is computed by STMT.
776 Ensure that STMT (together with all the statements that define its
777 operands) is hoisted at least out of the loop LEVEL. */
778
779static void
2e966e2a 780set_level (gimple *stmt, class loop *orig_loop, class loop *level)
7d23383d 781{
2e966e2a 782 class loop *stmt_loop = gimple_bb (stmt)->loop_father;
75a70cf9 783 struct lim_aux_data *lim_data;
42acab1c 784 gimple *dep_stmt;
a2c22c0d 785 unsigned i;
7d23383d 786
787 stmt_loop = find_common_loop (orig_loop, stmt_loop);
75a70cf9 788 lim_data = get_lim_data (stmt);
789 if (lim_data != NULL && lim_data->tgt_loop != NULL)
7d23383d 790 stmt_loop = find_common_loop (stmt_loop,
75a70cf9 791 loop_outer (lim_data->tgt_loop));
7d23383d 792 if (flow_loop_nested_p (stmt_loop, level))
793 return;
794
75a70cf9 795 gcc_assert (level == lim_data->max_loop
796 || flow_loop_nested_p (lim_data->max_loop, level));
7d23383d 797
75a70cf9 798 lim_data->tgt_loop = level;
a2c22c0d 799 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
800 set_level (dep_stmt, orig_loop, level);
7d23383d 801}
802
803/* Determines an outermost loop from that we want to hoist the statement STMT.
804 For now we chose the outermost possible loop. TODO -- use profiling
805 information to set it more sanely. */
806
807static void
42acab1c 808set_profitable_level (gimple *stmt)
7d23383d 809{
75a70cf9 810 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
7d23383d 811}
812
75a70cf9 813/* Returns true if STMT is a call that has side effects. */
7d23383d 814
815static bool
42acab1c 816nonpure_call_p (gimple *stmt)
7d23383d 817{
75a70cf9 818 if (gimple_code (stmt) != GIMPLE_CALL)
7d23383d 819 return false;
820
75a70cf9 821 return gimple_has_side_effects (stmt);
7d23383d 822}
823
790963ba 824/* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
825
42acab1c 826static gimple *
75a70cf9 827rewrite_reciprocal (gimple_stmt_iterator *bsi)
790963ba 828{
1a91d914 829 gassign *stmt, *stmt1, *stmt2;
03d37e4e 830 tree name, lhs, type;
193b8a15 831 tree real_one;
688ff29b 832 gimple_stmt_iterator gsi;
790963ba 833
1a91d914 834 stmt = as_a <gassign *> (gsi_stmt (*bsi));
75a70cf9 835 lhs = gimple_assign_lhs (stmt);
836 type = TREE_TYPE (lhs);
790963ba 837
74560f89 838 real_one = build_one_cst (type);
790963ba 839
03d37e4e 840 name = make_temp_ssa_name (type, NULL, "reciptmp");
e9cf809e 841 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
842 gimple_assign_rhs2 (stmt));
843 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
844 gimple_assign_rhs1 (stmt));
790963ba 845
846 /* Replace division stmt with reciprocal and multiply stmts.
847 The multiply stmt is not invariant, so update iterator
848 and avoid rescanning. */
688ff29b 849 gsi = *bsi;
850 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
851 gsi_replace (&gsi, stmt2, true);
790963ba 852
853 /* Continue processing with invariant reciprocal statement. */
854 return stmt1;
855}
856
857/* Check if the pattern at *BSI is a bittest of the form
858 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
859
42acab1c 860static gimple *
75a70cf9 861rewrite_bittest (gimple_stmt_iterator *bsi)
790963ba 862{
1a91d914 863 gassign *stmt;
42acab1c 864 gimple *stmt1;
1a91d914 865 gassign *stmt2;
42acab1c 866 gimple *use_stmt;
1a91d914 867 gcond *cond_stmt;
03d37e4e 868 tree lhs, name, t, a, b;
790963ba 869 use_operand_p use;
870
1a91d914 871 stmt = as_a <gassign *> (gsi_stmt (*bsi));
75a70cf9 872 lhs = gimple_assign_lhs (stmt);
790963ba 873
874 /* Verify that the single use of lhs is a comparison against zero. */
875 if (TREE_CODE (lhs) != SSA_NAME
1a91d914 876 || !single_imm_use (lhs, &use, &use_stmt))
790963ba 877 return stmt;
1a91d914 878 cond_stmt = dyn_cast <gcond *> (use_stmt);
879 if (!cond_stmt)
880 return stmt;
881 if (gimple_cond_lhs (cond_stmt) != lhs
882 || (gimple_cond_code (cond_stmt) != NE_EXPR
883 && gimple_cond_code (cond_stmt) != EQ_EXPR)
884 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
790963ba 885 return stmt;
886
887 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
75a70cf9 888 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
889 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
790963ba 890 return stmt;
891
bef304b8 892 /* There is a conversion in between possibly inserted by fold. */
d9659041 893 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
790963ba 894 {
75a70cf9 895 t = gimple_assign_rhs1 (stmt1);
790963ba 896 if (TREE_CODE (t) != SSA_NAME
897 || !has_single_use (t))
898 return stmt;
899 stmt1 = SSA_NAME_DEF_STMT (t);
75a70cf9 900 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
790963ba 901 return stmt;
790963ba 902 }
903
904 /* Verify that B is loop invariant but A is not. Verify that with
905 all the stmt walking we are still in the same loop. */
75a70cf9 906 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
907 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
908 return stmt;
790963ba 909
75a70cf9 910 a = gimple_assign_rhs1 (stmt1);
911 b = gimple_assign_rhs2 (stmt1);
912
913 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
914 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
915 {
688ff29b 916 gimple_stmt_iterator rsi;
917
790963ba 918 /* 1 << B */
790963ba 919 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
920 build_int_cst (TREE_TYPE (a), 1), b);
03d37e4e 921 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
922 stmt1 = gimple_build_assign (name, t);
790963ba 923
924 /* A & (1 << B) */
925 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
03d37e4e 926 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
927 stmt2 = gimple_build_assign (name, t);
3de988b5 928
929 /* Replace the SSA_NAME we compare against zero. Adjust
930 the type of zero accordingly. */
27dc0bdf 931 SET_USE (use, name);
1a91d914 932 gimple_cond_set_rhs (cond_stmt,
933 build_int_cst_type (TREE_TYPE (name),
934 0));
790963ba 935
688ff29b 936 /* Don't use gsi_replace here, none of the new assignments sets
937 the variable originally set in stmt. Move bsi to stmt1, and
938 then remove the original stmt, so that we get a chance to
939 retain debug info for it. */
940 rsi = *bsi;
941 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
942 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
c7f66ec2 943 gimple *to_release = gsi_stmt (rsi);
688ff29b 944 gsi_remove (&rsi, true);
c7f66ec2 945 release_defs (to_release);
790963ba 946
947 return stmt1;
948 }
949
950 return stmt;
951}
952
54c91640 953/* For each statement determines the outermost loop in that it is invariant,
954 - statements on whose motion it depends and the cost of the computation.
955 - This information is stored to the LIM_DATA structure associated with
956 - each statement. */
957class invariantness_dom_walker : public dom_walker
958{
959public:
960 invariantness_dom_walker (cdi_direction direction)
961 : dom_walker (direction) {}
962
96752458 963 virtual edge before_dom_children (basic_block);
54c91640 964};
790963ba 965
7d23383d 966/* Determine the outermost loops in that statements in basic block BB are
967 invariant, and record them to the LIM_DATA associated with the statements.
54c91640 968 Callback for dom_walker. */
7d23383d 969
96752458 970edge
54c91640 971invariantness_dom_walker::before_dom_children (basic_block bb)
7d23383d 972{
973 enum move_pos pos;
75a70cf9 974 gimple_stmt_iterator bsi;
42acab1c 975 gimple *stmt;
7d23383d 976 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
2e966e2a 977 class loop *outermost = ALWAYS_EXECUTED_IN (bb);
75a70cf9 978 struct lim_aux_data *lim_data;
7d23383d 979
9e3536f4 980 if (!loop_outer (bb->loop_father))
96752458 981 return NULL;
7d23383d 982
983 if (dump_file && (dump_flags & TDF_DETAILS))
984 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
9e3536f4 985 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
7d23383d 986
9bf0a3f9 987 /* Look at PHI nodes, but only if there is at most two.
988 ??? We could relax this further by post-processing the inserted
989 code and transforming adjacent cond-exprs with the same predicate
990 to control flow again. */
991 bsi = gsi_start_phis (bb);
992 if (!gsi_end_p (bsi)
993 && ((gsi_next (&bsi), gsi_end_p (bsi))
994 || (gsi_next (&bsi), gsi_end_p (bsi))))
995 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
996 {
997 stmt = gsi_stmt (bsi);
998
999 pos = movement_possibility (stmt);
1000 if (pos == MOVE_IMPOSSIBLE)
1001 continue;
1002
735d8b51 1003 lim_data = get_lim_data (stmt);
1004 if (! lim_data)
1005 lim_data = init_lim_data (stmt);
9bf0a3f9 1006 lim_data->always_executed_in = outermost;
1007
1008 if (!determine_max_movement (stmt, false))
1009 {
1010 lim_data->max_loop = NULL;
1011 continue;
1012 }
1013
1014 if (dump_file && (dump_flags & TDF_DETAILS))
1015 {
1ffa4346 1016 print_gimple_stmt (dump_file, stmt, 2);
9bf0a3f9 1017 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1018 loop_depth (lim_data->max_loop),
1019 lim_data->cost);
1020 }
1021
1022 if (lim_data->cost >= LIM_EXPENSIVE)
1023 set_profitable_level (stmt);
1024 }
1025
75a70cf9 1026 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
7d23383d 1027 {
75a70cf9 1028 stmt = gsi_stmt (bsi);
7d23383d 1029
1030 pos = movement_possibility (stmt);
1031 if (pos == MOVE_IMPOSSIBLE)
1032 {
1033 if (nonpure_call_p (stmt))
1034 {
1035 maybe_never = true;
1036 outermost = NULL;
1037 }
a973ed42 1038 /* Make sure to note always_executed_in for stores to make
1039 store-motion work. */
1040 else if (stmt_makes_single_store (stmt))
1041 {
735d8b51 1042 struct lim_aux_data *lim_data = get_lim_data (stmt);
1043 if (! lim_data)
1044 lim_data = init_lim_data (stmt);
75a70cf9 1045 lim_data->always_executed_in = outermost;
a973ed42 1046 }
7d23383d 1047 continue;
1048 }
1049
75a70cf9 1050 if (is_gimple_assign (stmt)
1051 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1052 == GIMPLE_BINARY_RHS))
63f88450 1053 {
75a70cf9 1054 tree op0 = gimple_assign_rhs1 (stmt);
1055 tree op1 = gimple_assign_rhs2 (stmt);
2e966e2a 1056 class loop *ol1 = outermost_invariant_loop (op1,
75a70cf9 1057 loop_containing_stmt (stmt));
63f88450 1058
1059 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1060 to be hoisted out of loop, saving expensive divide. */
1061 if (pos == MOVE_POSSIBLE
75a70cf9 1062 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
63f88450 1063 && flag_unsafe_math_optimizations
1064 && !flag_trapping_math
75a70cf9 1065 && ol1 != NULL
1066 && outermost_invariant_loop (op0, ol1) == NULL)
63f88450 1067 stmt = rewrite_reciprocal (&bsi);
1068
1069 /* If the shift count is invariant, convert (A >> B) & 1 to
1070 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1071 saving an expensive shift. */
1072 if (pos == MOVE_POSSIBLE
75a70cf9 1073 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1074 && integer_onep (op1)
1075 && TREE_CODE (op0) == SSA_NAME
1076 && has_single_use (op0))
63f88450 1077 stmt = rewrite_bittest (&bsi);
1078 }
fcee97ca 1079
735d8b51 1080 lim_data = get_lim_data (stmt);
1081 if (! lim_data)
1082 lim_data = init_lim_data (stmt);
75a70cf9 1083 lim_data->always_executed_in = outermost;
7d23383d 1084
1085 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1086 continue;
1087
1088 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1089 {
75a70cf9 1090 lim_data->max_loop = NULL;
7d23383d 1091 continue;
1092 }
1093
1094 if (dump_file && (dump_flags & TDF_DETAILS))
1095 {
1ffa4346 1096 print_gimple_stmt (dump_file, stmt, 2);
7d23383d 1097 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
75a70cf9 1098 loop_depth (lim_data->max_loop),
1099 lim_data->cost);
7d23383d 1100 }
1101
75a70cf9 1102 if (lim_data->cost >= LIM_EXPENSIVE)
7d23383d 1103 set_profitable_level (stmt);
1104 }
96752458 1105 return NULL;
7d23383d 1106}
1107
7d23383d 1108/* Hoist the statements in basic block BB out of the loops prescribed by
91275768 1109 data stored in LIM_DATA structures associated with each statement. Callback
7d23383d 1110 for walk_dominator_tree. */
1111
2c7b2f88 1112unsigned int
1113move_computations_worker (basic_block bb)
7d23383d 1114{
2e966e2a 1115 class loop *level;
7d23383d 1116 unsigned cost = 0;
75a70cf9 1117 struct lim_aux_data *lim_data;
2c7b2f88 1118 unsigned int todo = 0;
7d23383d 1119
9e3536f4 1120 if (!loop_outer (bb->loop_father))
2c7b2f88 1121 return todo;
7d23383d 1122
1a91d914 1123 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
9bf0a3f9 1124 {
1a91d914 1125 gassign *new_stmt;
1126 gphi *stmt = bsi.phi ();
9bf0a3f9 1127
1128 lim_data = get_lim_data (stmt);
1129 if (lim_data == NULL)
1130 {
1131 gsi_next (&bsi);
1132 continue;
1133 }
1134
1135 cost = lim_data->cost;
1136 level = lim_data->tgt_loop;
1137 clear_lim_data (stmt);
1138
1139 if (!level)
1140 {
1141 gsi_next (&bsi);
1142 continue;
1143 }
1144
1145 if (dump_file && (dump_flags & TDF_DETAILS))
1146 {
1147 fprintf (dump_file, "Moving PHI node\n");
1ffa4346 1148 print_gimple_stmt (dump_file, stmt, 0);
9bf0a3f9 1149 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1150 cost, level->num);
1151 }
1152
1153 if (gimple_phi_num_args (stmt) == 1)
1154 {
1155 tree arg = PHI_ARG_DEF (stmt, 0);
e9cf809e 1156 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1157 TREE_CODE (arg), arg);
9bf0a3f9 1158 }
1159 else
1160 {
1161 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
42acab1c 1162 gimple *cond = gsi_stmt (gsi_last_bb (dom));
9bf0a3f9 1163 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1164 /* Get the PHI arguments corresponding to the true and false
1165 edges of COND. */
1166 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1167 gcc_assert (arg0 && arg1);
1168 t = build2 (gimple_cond_code (cond), boolean_type_node,
1169 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
e9cf809e 1170 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1171 COND_EXPR, t, arg0, arg1);
2c7b2f88 1172 todo |= TODO_cleanup_cfg;
9bf0a3f9 1173 }
8e50a020 1174 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1175 && (!ALWAYS_EXECUTED_IN (bb)
1176 || (ALWAYS_EXECUTED_IN (bb) != level
1177 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
ccf6cffa 1178 {
1179 tree lhs = gimple_assign_lhs (new_stmt);
1180 SSA_NAME_RANGE_INFO (lhs) = NULL;
ccf6cffa 1181 }
9bf0a3f9 1182 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1183 remove_phi_node (&bsi, false);
1184 }
1185
1a91d914 1186 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
7d23383d 1187 {
e70e8b13 1188 edge e;
1189
42acab1c 1190 gimple *stmt = gsi_stmt (bsi);
7d23383d 1191
75a70cf9 1192 lim_data = get_lim_data (stmt);
1193 if (lim_data == NULL)
7d23383d 1194 {
75a70cf9 1195 gsi_next (&bsi);
7d23383d 1196 continue;
1197 }
1198
75a70cf9 1199 cost = lim_data->cost;
1200 level = lim_data->tgt_loop;
1201 clear_lim_data (stmt);
7d23383d 1202
1203 if (!level)
1204 {
75a70cf9 1205 gsi_next (&bsi);
7d23383d 1206 continue;
1207 }
1208
1209 /* We do not really want to move conditionals out of the loop; we just
1210 placed it here to force its operands to be moved if necessary. */
75a70cf9 1211 if (gimple_code (stmt) == GIMPLE_COND)
7d23383d 1212 continue;
1213
1214 if (dump_file && (dump_flags & TDF_DETAILS))
1215 {
1216 fprintf (dump_file, "Moving statement\n");
1ffa4346 1217 print_gimple_stmt (dump_file, stmt, 0);
7d23383d 1218 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1219 cost, level->num);
1220 }
063a8bce 1221
e70e8b13 1222 e = loop_preheader_edge (level);
1223 gcc_assert (!gimple_vdef (stmt));
1224 if (gimple_vuse (stmt))
1225 {
1226 /* The new VUSE is the one from the virtual PHI in the loop
1227 header or the one already present. */
1a91d914 1228 gphi_iterator gsi2;
e70e8b13 1229 for (gsi2 = gsi_start_phis (e->dest);
1230 !gsi_end_p (gsi2); gsi_next (&gsi2))
1231 {
1a91d914 1232 gphi *phi = gsi2.phi ();
7c782c9b 1233 if (virtual_operand_p (gimple_phi_result (phi)))
e70e8b13 1234 {
1235 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1236 break;
1237 }
1238 }
1239 }
75a70cf9 1240 gsi_remove (&bsi, false);
8e50a020 1241 if (gimple_has_lhs (stmt)
1242 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1243 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1244 && (!ALWAYS_EXECUTED_IN (bb)
1245 || !(ALWAYS_EXECUTED_IN (bb) == level
1246 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
ccf6cffa 1247 {
1248 tree lhs = gimple_get_lhs (stmt);
1249 SSA_NAME_RANGE_INFO (lhs) = NULL;
ccf6cffa 1250 }
bc228ba1 1251 /* In case this is a stmt that is not unconditionally executed
1252 when the target loop header is executed and the stmt may
1253 invoke undefined integer or pointer overflow rewrite it to
1254 unsigned arithmetic. */
1255 if (is_gimple_assign (stmt)
1256 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1257 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1258 && arith_code_with_undefined_signed_overflow
1259 (gimple_assign_rhs_code (stmt))
1260 && (!ALWAYS_EXECUTED_IN (bb)
1261 || !(ALWAYS_EXECUTED_IN (bb) == level
1262 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1263 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1264 else
1265 gsi_insert_on_edge (e, stmt);
7d23383d 1266 }
2c7b2f88 1267
1268 return todo;
7d23383d 1269}
1270
1271/* Hoist the statements out of the loops prescribed by data stored in
91275768 1272 LIM_DATA structures associated with each statement.*/
7d23383d 1273
9bf0a3f9 1274static unsigned int
7d23383d 1275move_computations (void)
1276{
2c7b2f88 1277 int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
1278 int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
1279 unsigned todo = 0;
1280
1281 for (int i = 0; i < n; ++i)
1282 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun, rpo[i]));
1283
1284 free (rpo);
7d23383d 1285
75a70cf9 1286 gsi_commit_edge_inserts ();
dd277d48 1287 if (need_ssa_update_p (cfun))
095dcfa3 1288 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
9bf0a3f9 1289
2c7b2f88 1290 return todo;
7d23383d 1291}
1292
1293/* Checks whether the statement defining variable *INDEX can be hoisted
1294 out of the loop passed in DATA. Callback for for_each_index. */
1295
1296static bool
1297may_move_till (tree ref, tree *index, void *data)
1298{
2e966e2a 1299 class loop *loop = (class loop *) data, *max_loop;
7d23383d 1300
1301 /* If REF is an array reference, check also that the step and the lower
1302 bound is invariant in LOOP. */
1303 if (TREE_CODE (ref) == ARRAY_REF)
1304 {
75a70cf9 1305 tree step = TREE_OPERAND (ref, 3);
1306 tree lbound = TREE_OPERAND (ref, 2);
7d23383d 1307
75a70cf9 1308 max_loop = outermost_invariant_loop (step, loop);
7d23383d 1309 if (!max_loop)
1310 return false;
1311
75a70cf9 1312 max_loop = outermost_invariant_loop (lbound, loop);
7d23383d 1313 if (!max_loop)
1314 return false;
1315 }
1316
1317 max_loop = outermost_invariant_loop (*index, loop);
1318 if (!max_loop)
1319 return false;
1320
1321 return true;
1322}
1323
75a70cf9 1324/* If OP is SSA NAME, force the statement that defines it to be
99e96094 1325 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
7d23383d 1326
1327static void
2e966e2a 1328force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
7d23383d 1329{
42acab1c 1330 gimple *stmt;
7d23383d 1331
75a70cf9 1332 if (!op
1333 || is_gimple_min_invariant (op))
1334 return;
7d23383d 1335
75a70cf9 1336 gcc_assert (TREE_CODE (op) == SSA_NAME);
48e1416a 1337
75a70cf9 1338 stmt = SSA_NAME_DEF_STMT (op);
1339 if (gimple_nop_p (stmt))
7d23383d 1340 return;
1341
75a70cf9 1342 set_level (stmt, orig_loop, loop);
7d23383d 1343}
1344
1345/* Forces statement defining invariants in REF (and *INDEX) to be moved out of
99e96094 1346 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1347 for_each_index. */
1348
1349struct fmt_data
1350{
2e966e2a 1351 class loop *loop;
1352 class loop *orig_loop;
99e96094 1353};
7d23383d 1354
1355static bool
1356force_move_till (tree ref, tree *index, void *data)
1357{
f0d6e81c 1358 struct fmt_data *fmt_data = (struct fmt_data *) data;
7d23383d 1359
1360 if (TREE_CODE (ref) == ARRAY_REF)
1361 {
75a70cf9 1362 tree step = TREE_OPERAND (ref, 3);
1363 tree lbound = TREE_OPERAND (ref, 2);
7d23383d 1364
75a70cf9 1365 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1366 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
7d23383d 1367 }
1368
75a70cf9 1369 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
7d23383d 1370
1371 return true;
1372}
1373
063a8bce 1374/* A function to free the mem_ref object OBJ. */
1375
1376static void
2e966e2a 1377memref_free (class im_mem_ref *mem)
063a8bce 1378{
f1f41a6c 1379 mem->accesses_in_loop.release ();
063a8bce 1380}
1381
1382/* Allocates and returns a memory reference description for MEM whose hash
1383 value is HASH and id is ID. */
1384
4a7ff262 1385static im_mem_ref *
1b94b295 1386mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
063a8bce 1387{
2e966e2a 1388 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1b94b295 1389 if (mem)
1390 ref->mem = *mem;
1391 else
1392 ao_ref_init (&ref->mem, error_mark_node);
063a8bce 1393 ref->id = id;
1b94b295 1394 ref->ref_canonical = false;
7eae52f1 1395 ref->ref_decomposed = false;
063a8bce 1396 ref->hash = hash;
3e48928c 1397 ref->stored = NULL;
feba4360 1398 bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1399 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
3e48928c 1400 ref->accesses_in_loop.create (1);
063a8bce 1401
1402 return ref;
1403}
1404
063a8bce 1405/* Records memory reference location *LOC in LOOP to the memory reference
1406 description REF. The reference occurs in statement STMT. */
7d23383d 1407
1408static void
42acab1c 1409record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
7d23383d 1410{
5c8a8be3 1411 mem_ref_loc aref;
5c8a8be3 1412 aref.stmt = stmt;
1413 aref.ref = loc;
3e48928c 1414 ref->accesses_in_loop.safe_push (aref);
1415}
1416
1417/* Set the LOOP bit in REF stored bitmap and allocate that if
1418 necessary. Return whether a bit was changed. */
1419
1420static bool
2e966e2a 1421set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
3e48928c 1422{
1423 if (!ref->stored)
1424 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1425 return bitmap_set_bit (ref->stored, loop->num);
7d23383d 1426}
1427
063a8bce 1428/* Marks reference REF as stored in LOOP. */
7d23383d 1429
1430static void
2e966e2a 1431mark_ref_stored (im_mem_ref *ref, class loop *loop)
7d23383d 1432{
feba4360 1433 while (loop != current_loops->tree_root
3e48928c 1434 && set_ref_stored_in_loop (ref, loop))
feba4360 1435 loop = loop_outer (loop);
063a8bce 1436}
1437
1438/* Gathers memory references in statement STMT in LOOP, storing the
1439 information about them in the memory_accesses structure. Marks
1440 the vops accessed through unrecognized statements there as
1441 well. */
1442
1443static void
2e966e2a 1444gather_mem_refs_stmt (class loop *loop, gimple *stmt)
063a8bce 1445{
1446 tree *mem = NULL;
1447 hashval_t hash;
1ada9901 1448 im_mem_ref **slot;
4a7ff262 1449 im_mem_ref *ref;
063a8bce 1450 bool is_stored;
063a8bce 1451 unsigned id;
7d23383d 1452
dd277d48 1453 if (!gimple_vuse (stmt))
063a8bce 1454 return;
1455
1456 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1457 if (!mem)
0766b2c0 1458 {
04911505 1459 /* We use the shared mem_ref for all unanalyzable refs. */
1460 id = UNANALYZABLE_MEM_ID;
1461 ref = memory_accesses.refs_list[id];
0766b2c0 1462 if (dump_file && (dump_flags & TDF_DETAILS))
1463 {
1464 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1465 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1466 }
04911505 1467 is_stored = gimple_vdef (stmt);
063a8bce 1468 }
1469 else
7d23383d 1470 {
1b94b295 1471 /* We are looking for equal refs that might differ in structure
1472 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1473 make sure we can canonicalize the ref in the hashtable if
1474 non-operand_equal_p refs are found. For the lookup we mark
1475 the case we want strict equality with aor.max_size == -1. */
1476 ao_ref aor;
1477 ao_ref_init (&aor, *mem);
1478 ao_ref_base (&aor);
1479 ao_ref_alias_set (&aor);
1480 HOST_WIDE_INT offset, size, max_size;
1481 poly_int64 saved_maxsize = aor.max_size, mem_off;
1482 tree mem_base;
7eae52f1 1483 bool ref_decomposed;
1b94b295 1484 if (aor.max_size_known_p ()
1485 && aor.offset.is_constant (&offset)
74dbdf38 1486 && aor.size.is_constant (&size)
1487 && aor.max_size.is_constant (&max_size)
1b94b295 1488 && size == max_size
74dbdf38 1489 && (size % BITS_PER_UNIT) == 0
a657d4f2 1490 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1491 size. Make sure this is consistent with the extraction. */
1492 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1493 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1494 aor.size)
1b94b295 1495 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1496 {
7eae52f1 1497 ref_decomposed = true;
1b94b295 1498 hash = iterative_hash_expr (ao_ref_base (&aor), 0);
1499 hash = iterative_hash_host_wide_int (offset, hash);
1500 hash = iterative_hash_host_wide_int (size, hash);
1501 }
1502 else
1503 {
7eae52f1 1504 ref_decomposed = false;
1b94b295 1505 hash = iterative_hash_expr (aor.ref, 0);
1506 aor.max_size = -1;
1507 }
1508 slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1509 aor.max_size = saved_maxsize;
04911505 1510 if (*slot)
063a8bce 1511 {
1b94b295 1512 if (!(*slot)->ref_canonical
1513 && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1514 {
1515 /* If we didn't yet canonicalize the hashtable ref (which
1516 we'll end up using for code insertion) and hit a second
1517 equal ref that is not structurally equivalent create
1518 a canonical ref which is a bare MEM_REF. */
1519 if (TREE_CODE (*mem) == MEM_REF
1520 || TREE_CODE (*mem) == TARGET_MEM_REF)
1521 {
1522 (*slot)->mem.ref = *mem;
1523 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1524 }
1525 else
1526 {
1527 tree ref_alias_type = reference_alias_ptr_type (*mem);
1528 unsigned int ref_align = get_object_alignment (*mem);
1529 tree ref_type = TREE_TYPE (*mem);
1530 tree tmp = build_fold_addr_expr (unshare_expr (mem_base));
1531 if (TYPE_ALIGN (ref_type) != ref_align)
1532 ref_type = build_aligned_type (ref_type, ref_align);
1533 (*slot)->mem.ref
1534 = fold_build2 (MEM_REF, ref_type, tmp,
1535 build_int_cst (ref_alias_type, mem_off));
1536 if ((*slot)->mem.volatile_p)
1537 TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1538 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1539 && is_gimple_mem_ref_addr
1540 (TREE_OPERAND ((*slot)->mem.ref,
1541 0)));
1542 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1543 }
1544 (*slot)->ref_canonical = true;
1545 }
4a7ff262 1546 ref = *slot;
04911505 1547 id = ref->id;
063a8bce 1548 }
04911505 1549 else
1550 {
1551 id = memory_accesses.refs_list.length ();
1b94b295 1552 ref = mem_ref_alloc (&aor, hash, id);
7eae52f1 1553 ref->ref_decomposed = ref_decomposed;
04911505 1554 memory_accesses.refs_list.safe_push (ref);
1555 *slot = ref;
61025ec0 1556
04911505 1557 if (dump_file && (dump_flags & TDF_DETAILS))
1558 {
1559 fprintf (dump_file, "Memory reference %u: ", id);
1560 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1561 fprintf (dump_file, "\n");
1562 }
1563 }
1564
3e48928c 1565 record_mem_ref_loc (ref, stmt, mem);
04911505 1566 }
feba4360 1567 bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
063a8bce 1568 if (is_stored)
8f9829e8 1569 {
feba4360 1570 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
8f9829e8 1571 mark_ref_stored (ref, loop);
1572 }
735d8b51 1573 init_lim_data (stmt)->ref = ref->id;
063a8bce 1574 return;
7d23383d 1575}
1576
7894a3d9 1577static unsigned *bb_loop_postorder;
1578
1579/* qsort sort function to sort blocks after their loop fathers postorder. */
1580
1581static int
0e23c46f 1582sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1583 void *bb_loop_postorder_)
7894a3d9 1584{
0e23c46f 1585 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
80402f9a 1586 basic_block bb1 = *(const basic_block *)bb1_;
1587 basic_block bb2 = *(const basic_block *)bb2_;
2e966e2a 1588 class loop *loop1 = bb1->loop_father;
1589 class loop *loop2 = bb2->loop_father;
7894a3d9 1590 if (loop1->num == loop2->num)
2bd42196 1591 return bb1->index - bb2->index;
7894a3d9 1592 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1593}
1594
3e48928c 1595/* qsort sort function to sort ref locs after their loop fathers postorder. */
1596
1597static int
0e23c46f 1598sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1599 void *bb_loop_postorder_)
3e48928c 1600{
0e23c46f 1601 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
80402f9a 1602 const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1603 const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
2e966e2a 1604 class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1605 class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
3e48928c 1606 if (loop1->num == loop2->num)
1607 return 0;
1608 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1609}
1610
063a8bce 1611/* Gathers memory references in loops. */
7d23383d 1612
1613static void
feba4360 1614analyze_memory_references (void)
7d23383d 1615{
75a70cf9 1616 gimple_stmt_iterator bsi;
7894a3d9 1617 basic_block bb, *bbs;
2e966e2a 1618 class loop *loop, *outer;
7894a3d9 1619 unsigned i, n;
063a8bce 1620
7894a3d9 1621 /* Collect all basic-blocks in loops and sort them after their
1622 loops postorder. */
1623 i = 0;
a28770e1 1624 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
fc00614f 1625 FOR_EACH_BB_FN (bb, cfun)
7894a3d9 1626 if (bb->loop_father != current_loops->tree_root)
1627 bbs[i++] = bb;
1628 n = i;
0e23c46f 1629 gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1630 bb_loop_postorder);
7894a3d9 1631
1632 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1633 That results in better locality for all the bitmaps. */
1634 for (i = 0; i < n; ++i)
063a8bce 1635 {
7894a3d9 1636 basic_block bb = bbs[i];
75a70cf9 1637 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
7894a3d9 1638 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
063a8bce 1639 }
1640
3e48928c 1641 /* Sort the location list of gathered memory references after their
1642 loop postorder number. */
1ada9901 1643 im_mem_ref *ref;
3e48928c 1644 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
0e23c46f 1645 ref->accesses_in_loop.sort (sort_locs_in_loop_postorder_cmp,
1646 bb_loop_postorder);
3e48928c 1647
7894a3d9 1648 free (bbs);
1649
0766b2c0 1650 /* Propagate the information about accessed memory references up
1651 the loop hierarchy. */
f21d4d00 1652 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
063a8bce 1653 {
8f9829e8 1654 /* Finalize the overall touched references (including subloops). */
feba4360 1655 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1656 &memory_accesses.refs_stored_in_loop[loop->num]);
8f9829e8 1657
1658 /* Propagate the information about accessed memory references up
1659 the loop hierarchy. */
1660 outer = loop_outer (loop);
1661 if (outer == current_loops->tree_root)
063a8bce 1662 continue;
1663
feba4360 1664 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1665 &memory_accesses.all_refs_stored_in_loop[loop->num]);
063a8bce 1666 }
063a8bce 1667}
1668
063a8bce 1669/* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1670 tree_to_aff_combination_expand. */
1671
1672static bool
4a7ff262 1673mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
5f8841a5 1674 hash_map<tree, name_expansion *> **ttae_cache)
063a8bce 1675{
1676 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1677 object and their offset differ in such a way that the locations cannot
1678 overlap, then they cannot alias. */
fbc666b8 1679 poly_widest_int size1, size2;
3391cd1e 1680 aff_tree off1, off2;
063a8bce 1681
3391cd1e 1682 /* Perform basic offset and type-based disambiguation. */
09f4cf62 1683 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
063a8bce 1684 return false;
7d23383d 1685
063a8bce 1686 /* The expansion of addresses may be a bit expensive, thus we only do
1687 the check at -O2 and higher optimization levels. */
1688 if (optimize < 2)
1689 return true;
1690
09f4cf62 1691 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1692 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
063a8bce 1693 aff_combination_expand (&off1, ttae_cache);
1694 aff_combination_expand (&off2, ttae_cache);
e913b5cd 1695 aff_combination_scale (&off1, -1);
063a8bce 1696 aff_combination_add (&off2, &off1);
1697
5fc88ffd 1698 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
063a8bce 1699 return false;
1700
1701 return true;
1702}
1703
3e48928c 1704/* Compare function for bsearch searching for reference locations
1705 in a loop. */
1706
1707static int
0e23c46f 1708find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1709 void *bb_loop_postorder_)
3e48928c 1710{
0e23c46f 1711 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
2e966e2a 1712 class loop *loop = (class loop *)const_cast<void *>(loop_);
3e48928c 1713 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
2e966e2a 1714 class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
3e48928c 1715 if (loop->num == loc_loop->num
1716 || flow_loop_nested_p (loop, loc_loop))
1717 return 0;
1718 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1719 ? -1 : 1);
1720}
1721
5c8a8be3 1722/* Iterates over all locations of REF in LOOP and its subloops calling
1723 fn.operator() with the location as argument. When that operator
1724 returns true the iteration is stopped and true is returned.
1725 Otherwise false is returned. */
063a8bce 1726
5c8a8be3 1727template <typename FN>
1728static bool
2e966e2a 1729for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
063a8bce 1730{
063a8bce 1731 unsigned i;
4a7ff262 1732 mem_ref_loc *loc;
063a8bce 1733
3e48928c 1734 /* Search for the cluster of locs in the accesses_in_loop vector
1735 which is sorted after postorder index of the loop father. */
0e23c46f 1736 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1737 bb_loop_postorder);
3e48928c 1738 if (!loc)
1739 return false;
063a8bce 1740
3e48928c 1741 /* We have found one location inside loop or its sub-loops. Iterate
1742 both forward and backward to cover the whole cluster. */
1743 i = loc - ref->accesses_in_loop.address ();
1744 while (i > 0)
1745 {
1746 --i;
4a7ff262 1747 mem_ref_loc *l = &ref->accesses_in_loop[i];
3e48928c 1748 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1749 break;
1750 if (fn (l))
1751 return true;
1752 }
1753 for (i = loc - ref->accesses_in_loop.address ();
1754 i < ref->accesses_in_loop.length (); ++i)
1755 {
4a7ff262 1756 mem_ref_loc *l = &ref->accesses_in_loop[i];
3e48928c 1757 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1758 break;
1759 if (fn (l))
1760 return true;
1761 }
5c8a8be3 1762
1763 return false;
1764}
1765
1766/* Rewrites location LOC by TMP_VAR. */
1767
251317e4 1768class rewrite_mem_ref_loc
5c8a8be3 1769{
251317e4 1770public:
5c8a8be3 1771 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
4a7ff262 1772 bool operator () (mem_ref_loc *loc);
5c8a8be3 1773 tree tmp_var;
1774};
1775
1776bool
4a7ff262 1777rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
5c8a8be3 1778{
1779 *loc->ref = tmp_var;
1780 update_stmt (loc->stmt);
1781 return false;
063a8bce 1782}
1783
1784/* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1785
1786static void
2e966e2a 1787rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
063a8bce 1788{
5c8a8be3 1789 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
7d23383d 1790}
1791
bc9af2dd 1792/* Stores the first reference location in LOCP. */
1793
251317e4 1794class first_mem_ref_loc_1
bc9af2dd 1795{
251317e4 1796public:
4a7ff262 1797 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1798 bool operator () (mem_ref_loc *loc);
1799 mem_ref_loc **locp;
bc9af2dd 1800};
1801
1802bool
4a7ff262 1803first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
bc9af2dd 1804{
1805 *locp = loc;
1806 return true;
1807}
1808
1809/* Returns the first reference location to REF in LOOP. */
1810
4a7ff262 1811static mem_ref_loc *
2e966e2a 1812first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
bc9af2dd 1813{
4a7ff262 1814 mem_ref_loc *locp = NULL;
bc9af2dd 1815 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1816 return locp;
1817}
1818
61025ec0 1819struct prev_flag_edges {
1820 /* Edge to insert new flag comparison code. */
1821 edge append_cond_position;
1822
1823 /* Edge for fall through from previous flag comparison. */
1824 edge last_cond_fallthru;
1825};
1826
1827/* Helper function for execute_sm. Emit code to store TMP_VAR into
1828 MEM along edge EX.
1829
1830 The store is only done if MEM has changed. We do this so no
1831 changes to MEM occur on code paths that did not originally store
1832 into it.
1833
1834 The common case for execute_sm will transform:
1835
1836 for (...) {
1837 if (foo)
1838 stuff;
1839 else
1840 MEM = TMP_VAR;
1841 }
1842
1843 into:
1844
1845 lsm = MEM;
1846 for (...) {
1847 if (foo)
1848 stuff;
1849 else
1850 lsm = TMP_VAR;
1851 }
1852 MEM = lsm;
1853
1854 This function will generate:
1855
1856 lsm = MEM;
1857
1858 lsm_flag = false;
1859 ...
1860 for (...) {
1861 if (foo)
1862 stuff;
1863 else {
1864 lsm = TMP_VAR;
1865 lsm_flag = true;
1866 }
1867 }
1868 if (lsm_flag) <--
1869 MEM = lsm; <--
1870*/
1871
1872static void
1ea7c812 1873execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1874 edge preheader, hash_set <basic_block> *flag_bbs)
61025ec0 1875{
1876 basic_block new_bb, then_bb, old_dest;
1877 bool loop_has_only_one_exit;
1878 edge then_old_edge, orig_ex = ex;
1879 gimple_stmt_iterator gsi;
42acab1c 1880 gimple *stmt;
61025ec0 1881 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
a1ccf30d 1882 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
61025ec0 1883
1ea7c812 1884 profile_count count_sum = profile_count::zero ();
1885 int nbbs = 0, ncount = 0;
720cfc43 1886 profile_probability flag_probability = profile_probability::uninitialized ();
1ea7c812 1887
1888 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1889 at loop exit.
1890
f4d3c071 1891 This code may look fancy, but it cannot update profile very realistically
1ea7c812 1892 because we do not know the probability that flag will be true at given
1893 loop exit.
1894
1895 We look for two interesting extremes
1896 - when exit is dominated by block setting the flag, we know it will
1897 always be true. This is a common case.
1898 - when all blocks setting the flag have very low frequency we know
1899 it will likely be false.
1900 In all other cases we default to 2/3 for flag being true. */
1901
1902 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1903 it != flag_bbs->end (); ++it)
1904 {
1ea7c812 1905 if ((*it)->count.initialized_p ())
1906 count_sum += (*it)->count, ncount ++;
1907 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
720cfc43 1908 flag_probability = profile_probability::always ();
1ea7c812 1909 nbbs++;
1910 }
1911
720cfc43 1912 profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1913
1914 if (flag_probability.initialized_p ())
1ea7c812 1915 ;
205ce1aa 1916 else if (ncount == nbbs
1917 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1ea7c812 1918 {
ea5d3981 1919 flag_probability = count_sum.probability_in (preheader->count ());
720cfc43 1920 if (flag_probability > cap)
1921 flag_probability = cap;
1ea7c812 1922 }
205ce1aa 1923
1924 if (!flag_probability.initialized_p ())
720cfc43 1925 flag_probability = cap;
1ea7c812 1926
61025ec0 1927 /* ?? Insert store after previous store if applicable. See note
1928 below. */
1929 if (prev_edges)
1930 ex = prev_edges->append_cond_position;
1931
1932 loop_has_only_one_exit = single_pred_p (ex->dest);
1933
1934 if (loop_has_only_one_exit)
1935 ex = split_block_after_labels (ex->dest);
a1ebf6df 1936 else
1937 {
1938 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1939 !gsi_end_p (gpi); gsi_next (&gpi))
1940 {
1941 gphi *phi = gpi.phi ();
1942 if (virtual_operand_p (gimple_phi_result (phi)))
1943 continue;
1944
1945 /* When the destination has a non-virtual PHI node with multiple
1946 predecessors make sure we preserve the PHI structure by
1947 forcing a forwarder block so that hoisting of that PHI will
1948 still work. */
1949 split_edge (ex);
1950 break;
1951 }
1952 }
61025ec0 1953
1954 old_dest = ex->dest;
1955 new_bb = split_edge (ex);
1956 then_bb = create_empty_bb (new_bb);
1ea7c812 1957 then_bb->count = new_bb->count.apply_probability (flag_probability);
a1ccf30d 1958 if (irr)
1959 then_bb->flags = BB_IRREDUCIBLE_LOOP;
1960 add_bb_to_loop (then_bb, new_bb->loop_father);
61025ec0 1961
1962 gsi = gsi_start_bb (new_bb);
1963 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1964 NULL_TREE, NULL_TREE);
1965 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1966
1967 gsi = gsi_start_bb (then_bb);
1968 /* Insert actual store. */
1969 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1970 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1971
1ea7c812 1972 edge e1 = single_succ_edge (new_bb);
1973 edge e2 = make_edge (new_bb, then_bb,
1974 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1975 e2->probability = flag_probability;
1ea7c812 1976
1977 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
1978 e1->flags &= ~EDGE_FALLTHRU;
1979
720cfc43 1980 e1->probability = flag_probability.invert ();
1ea7c812 1981
720cfc43 1982 then_old_edge = make_single_succ_edge (then_bb, old_dest,
a1ccf30d 1983 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
61025ec0 1984
1985 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1986
1987 if (prev_edges)
1988 {
1989 basic_block prevbb = prev_edges->last_cond_fallthru->src;
1990 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1991 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1992 set_immediate_dominator (CDI_DOMINATORS, old_dest,
1993 recompute_dominator (CDI_DOMINATORS, old_dest));
1994 }
1995
1996 /* ?? Because stores may alias, they must happen in the exact
1997 sequence they originally happened. Save the position right after
1998 the (_lsm) store we just created so we can continue appending after
1999 it and maintain the original order. */
2000 {
2001 struct prev_flag_edges *p;
2002
2003 if (orig_ex->aux)
2004 orig_ex->aux = NULL;
2005 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
2006 p = (struct prev_flag_edges *) orig_ex->aux;
2007 p->append_cond_position = then_old_edge;
2008 p->last_cond_fallthru = find_edge (new_bb, old_dest);
2009 orig_ex->aux = (void *) p;
2010 }
2011
2012 if (!loop_has_only_one_exit)
1a91d914 2013 for (gphi_iterator gpi = gsi_start_phis (old_dest);
2014 !gsi_end_p (gpi); gsi_next (&gpi))
61025ec0 2015 {
1a91d914 2016 gphi *phi = gpi.phi ();
61025ec0 2017 unsigned i;
2018
2019 for (i = 0; i < gimple_phi_num_args (phi); i++)
2020 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2021 {
2022 tree arg = gimple_phi_arg_def (phi, i);
60d535d2 2023 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
61025ec0 2024 update_stmt (phi);
2025 }
2026 }
61025ec0 2027}
2028
5c8a8be3 2029/* When REF is set on the location, set flag indicating the store. */
2030
251317e4 2031class sm_set_flag_if_changed
5c8a8be3 2032{
251317e4 2033public:
1ea7c812 2034 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2035 : flag (flag_), bbs (bbs_) {}
4a7ff262 2036 bool operator () (mem_ref_loc *loc);
5c8a8be3 2037 tree flag;
1ea7c812 2038 hash_set <basic_block> *bbs;
5c8a8be3 2039};
2040
2041bool
4a7ff262 2042sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
5c8a8be3 2043{
2044 /* Only set the flag for writes. */
2045 if (is_gimple_assign (loc->stmt)
2046 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2047 {
2048 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
42acab1c 2049 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
5c8a8be3 2050 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1ea7c812 2051 bbs->add (gimple_bb (stmt));
5c8a8be3 2052 }
2053 return false;
2054}
2055
61025ec0 2056/* Helper function for execute_sm. On every location where REF is
2057 set, set an appropriate flag indicating the store. */
2058
2059static tree
2e966e2a 2060execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
1ea7c812 2061 hash_set <basic_block> *bbs)
61025ec0 2062{
61025ec0 2063 tree flag;
f86b328b 2064 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
072f7ab1 2065 flag = create_tmp_reg (boolean_type_node, str);
1ea7c812 2066 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
61025ec0 2067 return flag;
2068}
2069
063a8bce 2070/* Executes store motion of memory reference REF from LOOP.
749ea85f 2071 Exits from the LOOP are stored in EXITS. The initialization of the
2072 temporary variable is put to the preheader of the loop, and assignments
2073 to the reference from the temporary variable are emitted to exits. */
7d23383d 2074
2075static void
2e966e2a 2076execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
7d23383d 2077{
3e48928c 2078 tree tmp_var, store_flag = NULL_TREE;
7d23383d 2079 unsigned i;
1a91d914 2080 gassign *load;
99e96094 2081 struct fmt_data fmt_data;
bc9af2dd 2082 edge ex;
75a70cf9 2083 struct lim_aux_data *lim_data;
61025ec0 2084 bool multi_threaded_model_p = false;
bc9af2dd 2085 gimple_stmt_iterator gsi;
1ea7c812 2086 hash_set<basic_block> flag_bbs;
7d23383d 2087
69154f26 2088 if (dump_file && (dump_flags & TDF_DETAILS))
2089 {
2090 fprintf (dump_file, "Executing store motion of ");
1ffa4346 2091 print_generic_expr (dump_file, ref->mem.ref);
69154f26 2092 fprintf (dump_file, " from loop %d\n", loop->num);
2093 }
2094
09f4cf62 2095 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2096 get_lsm_tmp_name (ref->mem.ref, ~0));
7d23383d 2097
99e96094 2098 fmt_data.loop = loop;
2099 fmt_data.orig_loop = loop;
09f4cf62 2100 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
7d23383d 2101
c927329e 2102 if (bb_in_transaction (loop_preheader_edge (loop)->src)
60b209bc 2103 || (! PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)
2104 && ! ref_always_accessed_p (loop, ref, true)))
61025ec0 2105 multi_threaded_model_p = true;
2106
2107 if (multi_threaded_model_p)
1ea7c812 2108 store_flag = execute_sm_if_changed_flag_set (loop, ref, &flag_bbs);
61025ec0 2109
063a8bce 2110 rewrite_mem_refs (loop, ref, tmp_var);
7d23383d 2111
bc9af2dd 2112 /* Emit the load code on a random exit edge or into the latch if
2113 the loop does not exit, so that we are sure it will be processed
2114 by move_computations after all dependencies. */
2115 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
61025ec0 2116
2117 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2118 load altogether, since the store is predicated by a flag. We
2119 could, do the load only if it was originally in the loop. */
09f4cf62 2120 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
75a70cf9 2121 lim_data = init_lim_data (load);
2122 lim_data->max_loop = loop;
2123 lim_data->tgt_loop = loop;
bc9af2dd 2124 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
7d23383d 2125
61025ec0 2126 if (multi_threaded_model_p)
7d23383d 2127 {
61025ec0 2128 load = gimple_build_assign (store_flag, boolean_false_node);
2129 lim_data = init_lim_data (load);
2130 lim_data->max_loop = loop;
2131 lim_data->tgt_loop = loop;
bc9af2dd 2132 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
7d23383d 2133 }
61025ec0 2134
2135 /* Sink the store to every exit from the loop. */
f1f41a6c 2136 FOR_EACH_VEC_ELT (exits, i, ex)
61025ec0 2137 if (!multi_threaded_model_p)
2138 {
1a91d914 2139 gassign *store;
09f4cf62 2140 store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
61025ec0 2141 gsi_insert_on_edge (ex, store);
2142 }
2143 else
1ea7c812 2144 execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag,
2145 loop_preheader_edge (loop), &flag_bbs);
7d23383d 2146}
2147
063a8bce 2148/* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2149 edges of the LOOP. */
7d23383d 2150
2151static void
2e966e2a 2152hoist_memory_references (class loop *loop, bitmap mem_refs,
f1f41a6c 2153 vec<edge> exits)
7d23383d 2154{
4a7ff262 2155 im_mem_ref *ref;
063a8bce 2156 unsigned i;
2157 bitmap_iterator bi;
69154f26 2158
063a8bce 2159 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
7d23383d 2160 {
f1f41a6c 2161 ref = memory_accesses.refs_list[i];
063a8bce 2162 execute_sm (loop, exits, ref);
7d23383d 2163 }
55a03692 2164}
2165
251317e4 2166class ref_always_accessed
5c8a8be3 2167{
251317e4 2168public:
2e966e2a 2169 ref_always_accessed (class loop *loop_, bool stored_p_)
3e48928c 2170 : loop (loop_), stored_p (stored_p_) {}
4a7ff262 2171 bool operator () (mem_ref_loc *loc);
2e966e2a 2172 class loop *loop;
5c8a8be3 2173 bool stored_p;
2174};
7d23383d 2175
5c8a8be3 2176bool
4a7ff262 2177ref_always_accessed::operator () (mem_ref_loc *loc)
7d23383d 2178{
2e966e2a 2179 class loop *must_exec;
307f7fda 2180
5c8a8be3 2181 if (!get_lim_data (loc->stmt))
2182 return false;
7d23383d 2183
5c8a8be3 2184 /* If we require an always executed store make sure the statement
2185 stores to the reference. */
2186 if (stored_p)
063a8bce 2187 {
3e48928c 2188 tree lhs = gimple_get_lhs (loc->stmt);
2189 if (!lhs
2190 || lhs != *loc->ref)
5c8a8be3 2191 return false;
2192 }
7d23383d 2193
5c8a8be3 2194 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2195 if (!must_exec)
2196 return false;
307f7fda 2197
5c8a8be3 2198 if (must_exec == loop
2199 || flow_loop_nested_p (must_exec, loop))
2200 return true;
7d23383d 2201
5c8a8be3 2202 return false;
2203}
55a03692 2204
5c8a8be3 2205/* Returns true if REF is always accessed in LOOP. If STORED_P is true
2206 make sure REF is always stored to in LOOP. */
2207
2208static bool
2e966e2a 2209ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
5c8a8be3 2210{
5c8a8be3 2211 return for_all_locs_in_loop (loop, ref,
3e48928c 2212 ref_always_accessed (loop, stored_p));
55a03692 2213}
2214
063a8bce 2215/* Returns true if REF1 and REF2 are independent. */
55a03692 2216
063a8bce 2217static bool
4a7ff262 2218refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2)
55a03692 2219{
09f4cf62 2220 if (ref1 == ref2)
063a8bce 2221 return true;
09f4cf62 2222
063a8bce 2223 if (dump_file && (dump_flags & TDF_DETAILS))
2224 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2225 ref1->id, ref2->id);
2226
09f4cf62 2227 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
063a8bce 2228 {
063a8bce 2229 if (dump_file && (dump_flags & TDF_DETAILS))
2230 fprintf (dump_file, "dependent.\n");
2231 return false;
2232 }
2233 else
2234 {
063a8bce 2235 if (dump_file && (dump_flags & TDF_DETAILS))
2236 fprintf (dump_file, "independent.\n");
2237 return true;
2238 }
55a03692 2239}
2240
8f9829e8 2241/* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2242 and its super-loops. */
55a03692 2243
2244static void
2e966e2a 2245record_dep_loop (class loop *loop, im_mem_ref *ref, bool stored_p)
55a03692 2246{
8f9829e8 2247 /* We can propagate dependent-in-loop bits up the loop
2248 hierarchy to all outer loops. */
2249 while (loop != current_loops->tree_root
feba4360 2250 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
8f9829e8 2251 loop = loop_outer (loop);
063a8bce 2252}
55a03692 2253
3326faef 2254/* Returns true if REF is independent on all other memory
23298f15 2255 references in LOOP. */
55a03692 2256
063a8bce 2257static bool
2e966e2a 2258ref_indep_loop_p_1 (class loop *loop, im_mem_ref *ref, bool stored_p)
063a8bce 2259{
3326faef 2260 stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2261
3326faef 2262 bool indep_p = true;
0766b2c0 2263 bitmap refs_to_check;
063a8bce 2264
8f9829e8 2265 if (stored_p)
feba4360 2266 refs_to_check = &memory_accesses.refs_in_loop[loop->num];
0766b2c0 2267 else
feba4360 2268 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
55a03692 2269
04911505 2270 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
3326faef 2271 indep_p = false;
3326faef 2272 else
55a03692 2273 {
3326faef 2274 if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2275 return true;
2276 if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
8f9829e8 2277 return false;
55a03692 2278
2e966e2a 2279 class loop *inner = loop->inner;
3326faef 2280 while (inner)
2281 {
23298f15 2282 if (!ref_indep_loop_p_1 (inner, ref, stored_p))
3326faef 2283 {
2284 indep_p = false;
2285 break;
2286 }
2287 inner = inner->next;
2288 }
f339cf78 2289
3326faef 2290 if (indep_p)
2291 {
2292 unsigned i;
2293 bitmap_iterator bi;
2294 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2295 {
2296 im_mem_ref *aref = memory_accesses.refs_list[i];
2297 if (!refs_independent_p (ref, aref))
2298 {
2299 indep_p = false;
2300 break;
2301 }
2302 }
2303 }
8f9829e8 2304 }
2305
063a8bce 2306 if (dump_file && (dump_flags & TDF_DETAILS))
2307 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
8f9829e8 2308 ref->id, loop->num, indep_p ? "independent" : "dependent");
063a8bce 2309
8f9829e8 2310 /* Record the computed result in the cache. */
2311 if (indep_p)
2312 {
feba4360 2313 if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
8f9829e8 2314 && stored_p)
2315 {
2316 /* If it's independend against all refs then it's independent
2317 against stores, too. */
feba4360 2318 bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
8f9829e8 2319 }
2320 }
2321 else
2322 {
2323 record_dep_loop (loop, ref, stored_p);
2324 if (!stored_p)
2325 {
2326 /* If it's dependent against stores it's dependent against
2327 all refs, too. */
2328 record_dep_loop (loop, ref, true);
2329 }
2330 }
063a8bce 2331
8f9829e8 2332 return indep_p;
2333}
2334
2335/* Returns true if REF is independent on all other memory references in
23298f15 2336 LOOP. */
8f9829e8 2337
2338static bool
2e966e2a 2339ref_indep_loop_p (class loop *loop, im_mem_ref *ref)
8f9829e8 2340{
2341 gcc_checking_assert (MEM_ANALYZABLE (ref));
2342
23298f15 2343 return ref_indep_loop_p_1 (loop, ref, false);
55a03692 2344}
2345
063a8bce 2346/* Returns true if we can perform store motion of REF from LOOP. */
55a03692 2347
063a8bce 2348static bool
2e966e2a 2349can_sm_ref_p (class loop *loop, im_mem_ref *ref)
55a03692 2350{
307f7fda 2351 tree base;
2352
0766b2c0 2353 /* Can't hoist unanalyzable refs. */
2354 if (!MEM_ANALYZABLE (ref))
2355 return false;
2356
063a8bce 2357 /* It should be movable. */
09f4cf62 2358 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2359 || TREE_THIS_VOLATILE (ref->mem.ref)
2360 || !for_each_index (&ref->mem.ref, may_move_till, loop))
063a8bce 2361 return false;
342ea212 2362
16d9b5d8 2363 /* If it can throw fail, we do not properly update EH info. */
09f4cf62 2364 if (tree_could_throw_p (ref->mem.ref))
16d9b5d8 2365 return false;
2366
307f7fda 2367 /* If it can trap, it must be always executed in LOOP.
2368 Readonly memory locations may trap when storing to them, but
2369 tree_could_trap_p is a predicate for rvalues, so check that
2370 explicitly. */
09f4cf62 2371 base = get_base_address (ref->mem.ref);
2372 if ((tree_could_trap_p (ref->mem.ref)
307f7fda 2373 || (DECL_P (base) && TREE_READONLY (base)))
2374 && !ref_always_accessed_p (loop, ref, true))
063a8bce 2375 return false;
342ea212 2376
063a8bce 2377 /* And it must be independent on all other memory references
2378 in LOOP. */
23298f15 2379 if (!ref_indep_loop_p (loop, ref))
063a8bce 2380 return false;
342ea212 2381
063a8bce 2382 return true;
342ea212 2383}
2384
063a8bce 2385/* Marks the references in LOOP for that store motion should be performed
2386 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2387 motion was performed in one of the outer loops. */
342ea212 2388
2389static void
2e966e2a 2390find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
55a03692 2391{
feba4360 2392 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
063a8bce 2393 unsigned i;
2394 bitmap_iterator bi;
4a7ff262 2395 im_mem_ref *ref;
063a8bce 2396
2397 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2398 {
f1f41a6c 2399 ref = memory_accesses.refs_list[i];
063a8bce 2400 if (can_sm_ref_p (loop, ref))
2401 bitmap_set_bit (refs_to_sm, i);
2402 }
342ea212 2403}
55a03692 2404
063a8bce 2405/* Checks whether LOOP (with exits stored in EXITS array) is suitable
2406 for a store motion optimization (i.e. whether we can insert statement
2407 on its exits). */
342ea212 2408
063a8bce 2409static bool
2e966e2a 2410loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
f1f41a6c 2411 vec<edge> exits)
342ea212 2412{
063a8bce 2413 unsigned i;
2414 edge ex;
55a03692 2415
f1f41a6c 2416 FOR_EACH_VEC_ELT (exits, i, ex)
9102193b 2417 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
063a8bce 2418 return false;
2419
2420 return true;
55a03692 2421}
2422
7d23383d 2423/* Try to perform store motion for all memory references modified inside
063a8bce 2424 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2425 store motion was executed in one of the outer loops. */
7d23383d 2426
2427static void
2e966e2a 2428store_motion_loop (class loop *loop, bitmap sm_executed)
7d23383d 2429{
f1f41a6c 2430 vec<edge> exits = get_loop_exit_edges (loop);
2e966e2a 2431 class loop *subloop;
39f8afdf 2432 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
7d23383d 2433
063a8bce 2434 if (loop_suitable_for_sm (loop, exits))
7d23383d 2435 {
063a8bce 2436 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2437 hoist_memory_references (loop, sm_in_loop, exits);
7d23383d 2438 }
f1f41a6c 2439 exits.release ();
063a8bce 2440
2441 bitmap_ior_into (sm_executed, sm_in_loop);
2442 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2443 store_motion_loop (subloop, sm_executed);
2444 bitmap_and_compl_into (sm_executed, sm_in_loop);
2445 BITMAP_FREE (sm_in_loop);
7d23383d 2446}
2447
2448/* Try to perform store motion for all memory references modified inside
7194de72 2449 loops. */
7d23383d 2450
2451static void
063a8bce 2452store_motion (void)
7d23383d 2453{
2e966e2a 2454 class loop *loop;
39f8afdf 2455 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
dda28f7c 2456
063a8bce 2457 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2458 store_motion_loop (loop, sm_executed);
17519ba0 2459
063a8bce 2460 BITMAP_FREE (sm_executed);
75a70cf9 2461 gsi_commit_edge_inserts ();
7d23383d 2462}
2463
2464/* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2465 for each such basic block bb records the outermost loop for that execution
2466 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2467 blocks that contain a nonpure call. */
2468
2469static void
2e966e2a 2470fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
7d23383d 2471{
2472 basic_block bb = NULL, *bbs, last = NULL;
2473 unsigned i;
2474 edge e;
2e966e2a 2475 class loop *inn_loop = loop;
7d23383d 2476
2fd20c29 2477 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
7d23383d 2478 {
2479 bbs = get_loop_body_in_dom_order (loop);
2480
2481 for (i = 0; i < loop->num_nodes; i++)
2482 {
cd665a06 2483 edge_iterator ei;
7d23383d 2484 bb = bbs[i];
2485
2486 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2487 last = bb;
2488
08b7917c 2489 if (bitmap_bit_p (contains_call, bb->index))
7d23383d 2490 break;
2491
cd665a06 2492 FOR_EACH_EDGE (e, ei, bb->succs)
5b9b62c9 2493 {
2494 /* If there is an exit from this BB. */
2495 if (!flow_bb_inside_loop_p (loop, e->dest))
2496 break;
2497 /* Or we enter a possibly non-finite loop. */
2498 if (flow_loop_nested_p (bb->loop_father,
2499 e->dest->loop_father)
2500 && ! finite_loop_p (e->dest->loop_father))
2501 break;
2502 }
7d23383d 2503 if (e)
2504 break;
2505
2506 /* A loop might be infinite (TODO use simple loop analysis
2507 to disprove this if possible). */
2508 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2509 break;
2510
2511 if (!flow_bb_inside_loop_p (inn_loop, bb))
2512 break;
2513
2514 if (bb->loop_father->header == bb)
2515 {
2516 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2517 break;
2518
2519 /* In a loop that is always entered we may proceed anyway.
2520 But record that we entered it and stop once we leave it. */
2521 inn_loop = bb->loop_father;
2522 }
2523 }
2524
2525 while (1)
2526 {
2fd20c29 2527 SET_ALWAYS_EXECUTED_IN (last, loop);
7d23383d 2528 if (last == loop->header)
2529 break;
2530 last = get_immediate_dominator (CDI_DOMINATORS, last);
2531 }
2532
2533 free (bbs);
2534 }
2535
2536 for (loop = loop->inner; loop; loop = loop->next)
feba4360 2537 fill_always_executed_in_1 (loop, contains_call);
7d23383d 2538}
2539
feba4360 2540/* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2541 for each such basic block bb records the outermost loop for that execution
2542 of its header implies execution of bb. */
7d23383d 2543
2544static void
feba4360 2545fill_always_executed_in (void)
7d23383d 2546{
7d23383d 2547 basic_block bb;
2e966e2a 2548 class loop *loop;
4fb07d00 2549
3c6549f8 2550 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
53c5d9d4 2551 bitmap_clear (contains_call);
fc00614f 2552 FOR_EACH_BB_FN (bb, cfun)
7d23383d 2553 {
feba4360 2554 gimple_stmt_iterator gsi;
2555 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7d23383d 2556 {
feba4360 2557 if (nonpure_call_p (gsi_stmt (gsi)))
7d23383d 2558 break;
2559 }
2560
feba4360 2561 if (!gsi_end_p (gsi))
08b7917c 2562 bitmap_set_bit (contains_call, bb->index);
7d23383d 2563 }
2564
7194de72 2565 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
feba4360 2566 fill_always_executed_in_1 (loop, contains_call);
feba4360 2567}
2568
2569
2570/* Compute the global information needed by the loop invariant motion pass. */
75a70cf9 2571
feba4360 2572static void
2573tree_ssa_lim_initialize (void)
2574{
2e966e2a 2575 class loop *loop;
feba4360 2576 unsigned i;
2577
2578 bitmap_obstack_initialize (&lim_bitmap_obstack);
3e48928c 2579 gcc_obstack_init (&mem_ref_obstack);
42acab1c 2580 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
de60f90c 2581
2582 if (flag_tm)
2583 compute_transaction_bits ();
61025ec0 2584
2585 alloc_aux_for_edges (0);
feba4360 2586
c1f445d2 2587 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
feba4360 2588 memory_accesses.refs_list.create (100);
2589 /* Allocate a special, unanalyzable mem-ref with ID zero. */
2590 memory_accesses.refs_list.quick_push
1b94b295 2591 (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
feba4360 2592
41f75a99 2593 memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2594 memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2595 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2596 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2597 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2598 memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
feba4360 2599
41f75a99 2600 for (i = 0; i < number_of_loops (cfun); i++)
feba4360 2601 {
2602 bitmap_initialize (&memory_accesses.refs_in_loop[i],
2603 &lim_bitmap_obstack);
2604 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2605 &lim_bitmap_obstack);
2606 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2607 &lim_bitmap_obstack);
2608 }
2609
2610 memory_accesses.ttae_cache = NULL;
3e48928c 2611
2612 /* Initialize bb_loop_postorder with a mapping from loop->num to
2613 its postorder index. */
2614 i = 0;
2615 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2616 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2617 bb_loop_postorder[loop->num] = i++;
7d23383d 2618}
2619
2620/* Cleans up after the invariant motion pass. */
2621
2622static void
2623tree_ssa_lim_finalize (void)
2624{
2625 basic_block bb;
063a8bce 2626 unsigned i;
4a7ff262 2627 im_mem_ref *ref;
7d23383d 2628
61025ec0 2629 free_aux_for_edges ();
2630
fc00614f 2631 FOR_EACH_BB_FN (bb, cfun)
2fd20c29 2632 SET_ALWAYS_EXECUTED_IN (bb, NULL);
063a8bce 2633
4fb07d00 2634 bitmap_obstack_release (&lim_bitmap_obstack);
06ecf488 2635 delete lim_aux_data_map;
75a70cf9 2636
c1f445d2 2637 delete memory_accesses.refs;
2638 memory_accesses.refs = NULL;
063a8bce 2639
f1f41a6c 2640 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
83b709f2 2641 memref_free (ref);
f1f41a6c 2642 memory_accesses.refs_list.release ();
3e48928c 2643 obstack_free (&mem_ref_obstack, NULL);
83b709f2 2644
f1f41a6c 2645 memory_accesses.refs_in_loop.release ();
8f9829e8 2646 memory_accesses.refs_stored_in_loop.release ();
f1f41a6c 2647 memory_accesses.all_refs_stored_in_loop.release ();
063a8bce 2648
2649 if (memory_accesses.ttae_cache)
78d53e33 2650 free_affine_expand_cache (&memory_accesses.ttae_cache);
3e48928c 2651
2652 free (bb_loop_postorder);
7d23383d 2653}
2654
7194de72 2655/* Moves invariants from loops. Only "expensive" invariants are moved out --
7d23383d 2656 i.e. those that are likely to be win regardless of the register pressure. */
2657
b11dc37d 2658static unsigned int
7194de72 2659tree_ssa_lim (void)
7d23383d 2660{
9bf0a3f9 2661 unsigned int todo;
2662
7194de72 2663 tree_ssa_lim_initialize ();
7d23383d 2664
063a8bce 2665 /* Gathers information about memory accesses in the loops. */
2666 analyze_memory_references ();
2667
feba4360 2668 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
2669 fill_always_executed_in ();
2670
7d23383d 2671 /* For each statement determine the outermost loop in that it is
2672 invariant and cost for computing the invariant. */
54c91640 2673 invariantness_dom_walker (CDI_DOMINATORS)
2674 .walk (cfun->cfg->x_entry_block_ptr);
7d23383d 2675
063a8bce 2676 /* Execute store motion. Force the necessary invariants to be moved
2677 out of the loops as well. */
2678 store_motion ();
7d23383d 2679
2680 /* Move the expressions that are expensive enough. */
9bf0a3f9 2681 todo = move_computations ();
7d23383d 2682
2683 tree_ssa_lim_finalize ();
9bf0a3f9 2684
2685 return todo;
7d23383d 2686}
f86b328b 2687
2688/* Loop invariant motion pass. */
2689
f86b328b 2690namespace {
2691
2692const pass_data pass_data_lim =
2693{
2694 GIMPLE_PASS, /* type */
2695 "lim", /* name */
2696 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 2697 TV_LIM, /* tv_id */
2698 PROP_cfg, /* properties_required */
2699 0, /* properties_provided */
2700 0, /* properties_destroyed */
2701 0, /* todo_flags_start */
2702 0, /* todo_flags_finish */
2703};
2704
2705class pass_lim : public gimple_opt_pass
2706{
2707public:
2708 pass_lim (gcc::context *ctxt)
2709 : gimple_opt_pass (pass_data_lim, ctxt)
2710 {}
2711
2712 /* opt_pass methods: */
2713 opt_pass * clone () { return new pass_lim (m_ctxt); }
31315c24 2714 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
65b0537f 2715 virtual unsigned int execute (function *);
f86b328b 2716
2717}; // class pass_lim
2718
65b0537f 2719unsigned int
2720pass_lim::execute (function *fun)
2721{
b11dc37d 2722 bool in_loop_pipeline = scev_initialized_p ();
2723 if (!in_loop_pipeline)
2724 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2725
65b0537f 2726 if (number_of_loops (fun) <= 1)
2727 return 0;
b11dc37d 2728 unsigned int todo = tree_ssa_lim ();
65b0537f 2729
b11dc37d 2730 if (!in_loop_pipeline)
2731 loop_optimizer_finalize ();
9b33fe18 2732 else
2733 scev_reset ();
b11dc37d 2734 return todo;
65b0537f 2735}
2736
f86b328b 2737} // anon namespace
2738
2739gimple_opt_pass *
2740make_pass_lim (gcc::context *ctxt)
2741{
2742 return new pass_lim (ctxt);
2743}
2744
2745