]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-im.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-im.c
CommitLineData
7d23383d 1/* Loop invariant motion.
fbd26352 2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
48e1416a 3
7d23383d 4This file is part of GCC.
48e1416a 5
7d23383d 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
7d23383d 9later version.
48e1416a 10
7d23383d 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
48e1416a 15
7d23383d 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
7d23383d 19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
9ef16211 23#include "backend.h"
7d23383d 24#include "tree.h"
9ef16211 25#include "gimple.h"
7c29e30e 26#include "cfghooks.h"
27#include "tree-pass.h"
9ef16211 28#include "ssa.h"
7c29e30e 29#include "gimple-pretty-print.h"
b20a8bb4 30#include "fold-const.h"
94ea8568 31#include "cfganal.h"
bc61cadb 32#include "tree-eh.h"
a8783bee 33#include "gimplify.h"
dcf1a1ec 34#include "gimple-iterator.h"
073c1fd5 35#include "tree-cfg.h"
05d9c18a 36#include "tree-ssa-loop-manip.h"
073c1fd5 37#include "tree-ssa-loop.h"
38#include "tree-into-ssa.h"
7d23383d 39#include "cfgloop.h"
40#include "domwalk.h"
41#include "params.h"
063a8bce 42#include "tree-affine.h"
a973ed42 43#include "tree-ssa-propagate.h"
e797f49f 44#include "trans-mem.h"
f6a34e3f 45#include "gimple-fold.h"
b11dc37d 46#include "tree-scalar-evolution.h"
5b9b62c9 47#include "tree-ssa-loop-niter.h"
1b94b295 48#include "alias.h"
49#include "builtins.h"
50#include "tree-dfa.h"
7d23383d 51
ca53beb1 52/* TODO: Support for predicated code motion. I.e.
53
54 while (1)
55 {
56 if (cond)
57 {
58 a = inv;
59 something;
60 }
61 }
62
61025ec0 63 Where COND and INV are invariants, but evaluating INV may trap or be
ca53beb1 64 invalid from some other reason if !COND. This may be transformed to
65
66 if (cond)
67 a = inv;
68 while (1)
69 {
70 if (cond)
71 something;
72 } */
73
7d23383d 74/* The auxiliary data kept for each statement. */
75
76struct lim_aux_data
77{
78 struct loop *max_loop; /* The outermost loop in that the statement
79 is invariant. */
80
81 struct loop *tgt_loop; /* The loop out of that we want to move the
82 invariant. */
83
84 struct loop *always_executed_in;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
88
7d23383d 89 unsigned cost; /* Cost of the computation performed by the
90 statement. */
91
735d8b51 92 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
93
94 vec<gimple *> depends; /* Vector of statements that must be also
a2c22c0d 95 hoisted out of the loop when this statement
96 is hoisted; i.e. those that define the
97 operands of the statement and are inside of
98 the MAX_LOOP loop. */
7d23383d 99};
100
75a70cf9 101/* Maps statements to their lim_aux_data. */
102
42acab1c 103static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
7d23383d 104
063a8bce 105/* Description of a memory reference location. */
7d23383d 106
4a7ff262 107struct mem_ref_loc
7d23383d 108{
109 tree *ref; /* The reference itself. */
42acab1c 110 gimple *stmt; /* The statement in that it occurs. */
4a7ff262 111};
063a8bce 112
063a8bce 113
063a8bce 114/* Description of a memory reference. */
115
4a7ff262 116struct im_mem_ref
55a03692 117{
1b94b295 118 unsigned id : 31; /* ID assigned to the memory reference
063a8bce 119 (its index in memory_accesses.refs_list) */
1b94b295 120 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
55a03692 121 hashval_t hash; /* Its hash value. */
09f4cf62 122
123 /* The memory access itself and associated caching of alias-oracle
124 query meta-data. */
125 ao_ref mem;
126
3e48928c 127 bitmap stored; /* The set of loops in that this memory location
063a8bce 128 is stored to. */
3e48928c 129 vec<mem_ref_loc> accesses_in_loop;
063a8bce 130 /* The locations of the accesses. Vector
131 indexed by the loop number. */
063a8bce 132
133 /* The following sets are computed on demand. We keep both set and
134 its complement, so that we know whether the information was
135 already computed or not. */
feba4360 136 bitmap_head indep_loop; /* The set of loops in that the memory
063a8bce 137 reference is independent, meaning:
138 If it is stored in the loop, this store
139 is independent on all other loads and
140 stores.
141 If it is only loaded, then it is independent
142 on all stores in the loop. */
feba4360 143 bitmap_head dep_loop; /* The complement of INDEP_LOOP. */
4a7ff262 144};
063a8bce 145
8f9829e8 146/* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
147 to record (in)dependence against stores in the loop and its subloops, the
148 second to record (in)dependence against all references in the loop
149 and its subloops. */
150#define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
063a8bce 151
3e871d4d 152/* Mem_ref hashtable helpers. */
153
770ff93b 154struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
3e871d4d 155{
1b94b295 156 typedef ao_ref *compare_type;
9969c043 157 static inline hashval_t hash (const im_mem_ref *);
1b94b295 158 static inline bool equal (const im_mem_ref *, const ao_ref *);
3e871d4d 159};
160
1ada9901 161/* A hash function for struct im_mem_ref object OBJ. */
3e871d4d 162
163inline hashval_t
9969c043 164mem_ref_hasher::hash (const im_mem_ref *mem)
3e871d4d 165{
166 return mem->hash;
167}
168
1ada9901 169/* An equality function for struct im_mem_ref object MEM1 with
3e871d4d 170 memory reference OBJ2. */
171
172inline bool
1b94b295 173mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
174{
175 if (obj2->max_size_known_p ())
176 return (operand_equal_p (mem1->mem.base, obj2->base, 0)
177 && known_eq (mem1->mem.offset, obj2->offset)
178 && known_eq (mem1->mem.size, obj2->size)
179 && known_eq (mem1->mem.max_size, obj2->max_size)
180 && mem1->mem.volatile_p == obj2->volatile_p
181 && mem1->mem.ref_alias_set == obj2->ref_alias_set
182 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
183 TREE_TYPE (obj2->ref)));
184 else
185 return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
3e871d4d 186}
063a8bce 187
063a8bce 188
189/* Description of memory accesses in loops. */
190
191static struct
192{
193 /* The hash table of memory references accessed in loops. */
c1f445d2 194 hash_table<mem_ref_hasher> *refs;
063a8bce 195
196 /* The list of memory references. */
4a7ff262 197 vec<im_mem_ref *> refs_list;
063a8bce 198
199 /* The set of memory references accessed in each loop. */
feba4360 200 vec<bitmap_head> refs_in_loop;
063a8bce 201
8f9829e8 202 /* The set of memory references stored in each loop. */
feba4360 203 vec<bitmap_head> refs_stored_in_loop;
8f9829e8 204
8f9829e8 205 /* The set of memory references stored in each loop, including subloops . */
feba4360 206 vec<bitmap_head> all_refs_stored_in_loop;
063a8bce 207
208 /* Cache for expanding memory addresses. */
5f8841a5 209 hash_map<tree, name_expansion *> *ttae_cache;
063a8bce 210} memory_accesses;
211
4fb07d00 212/* Obstack for the bitmaps in the above data structures. */
213static bitmap_obstack lim_bitmap_obstack;
3e48928c 214static obstack mem_ref_obstack;
4fb07d00 215
23298f15 216static bool ref_indep_loop_p (struct loop *, im_mem_ref *);
60b209bc 217static bool ref_always_accessed_p (struct loop *, im_mem_ref *, bool);
7d23383d 218
219/* Minimum cost of an expensive expression. */
220#define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
221
2fd20c29 222/* The outermost loop for which execution of the header guarantees that the
7d23383d 223 block will be executed. */
224#define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
2fd20c29 225#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
7d23383d 226
04911505 227/* ID of the shared unanalyzable mem. */
228#define UNANALYZABLE_MEM_ID 0
229
0766b2c0 230/* Whether the reference was analyzable. */
04911505 231#define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
0766b2c0 232
75a70cf9 233static struct lim_aux_data *
42acab1c 234init_lim_data (gimple *stmt)
75a70cf9 235{
06ecf488 236 lim_aux_data *p = XCNEW (struct lim_aux_data);
237 lim_aux_data_map->put (stmt, p);
75a70cf9 238
06ecf488 239 return p;
75a70cf9 240}
241
242static struct lim_aux_data *
42acab1c 243get_lim_data (gimple *stmt)
75a70cf9 244{
06ecf488 245 lim_aux_data **p = lim_aux_data_map->get (stmt);
75a70cf9 246 if (!p)
247 return NULL;
248
06ecf488 249 return *p;
75a70cf9 250}
251
252/* Releases the memory occupied by DATA. */
253
254static void
255free_lim_aux_data (struct lim_aux_data *data)
256{
9af5ce0c 257 data->depends.release ();
75a70cf9 258 free (data);
259}
260
261static void
42acab1c 262clear_lim_data (gimple *stmt)
75a70cf9 263{
06ecf488 264 lim_aux_data **p = lim_aux_data_map->get (stmt);
75a70cf9 265 if (!p)
266 return;
267
06ecf488 268 free_lim_aux_data (*p);
75a70cf9 269 *p = NULL;
270}
271
48e1416a 272
f86b328b 273/* The possibilities of statement movement. */
274enum move_pos
275 {
276 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
277 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
278 become executed -- memory accesses, ... */
279 MOVE_POSSIBLE /* Unlimited movement. */
280 };
aed164c3 281
7d23383d 282
283/* If it is possible to hoist the statement STMT unconditionally,
284 returns MOVE_POSSIBLE.
285 If it is possible to hoist the statement STMT, but we must avoid making
286 it executed if it would not be executed in the original program (e.g.
287 because it may trap), return MOVE_PRESERVE_EXECUTION.
288 Otherwise return MOVE_IMPOSSIBLE. */
289
07c03fb0 290enum move_pos
42acab1c 291movement_possibility (gimple *stmt)
7d23383d 292{
75a70cf9 293 tree lhs;
294 enum move_pos ret = MOVE_POSSIBLE;
7d23383d 295
296 if (flag_unswitch_loops
75a70cf9 297 && gimple_code (stmt) == GIMPLE_COND)
7d23383d 298 {
299 /* If we perform unswitching, force the operands of the invariant
300 condition to be moved out of the loop. */
7d23383d 301 return MOVE_POSSIBLE;
302 }
303
9bf0a3f9 304 if (gimple_code (stmt) == GIMPLE_PHI
305 && gimple_phi_num_args (stmt) <= 2
7c782c9b 306 && !virtual_operand_p (gimple_phi_result (stmt))
9bf0a3f9 307 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
308 return MOVE_POSSIBLE;
309
75a70cf9 310 if (gimple_get_lhs (stmt) == NULL_TREE)
7d23383d 311 return MOVE_IMPOSSIBLE;
312
dd277d48 313 if (gimple_vdef (stmt))
063a8bce 314 return MOVE_IMPOSSIBLE;
315
75a70cf9 316 if (stmt_ends_bb_p (stmt)
317 || gimple_has_volatile_ops (stmt)
318 || gimple_has_side_effects (stmt)
aac19106 319 || stmt_could_throw_p (cfun, stmt))
7d23383d 320 return MOVE_IMPOSSIBLE;
321
75a70cf9 322 if (is_gimple_call (stmt))
ca53beb1 323 {
324 /* While pure or const call is guaranteed to have no side effects, we
325 cannot move it arbitrarily. Consider code like
326
327 char *s = something ();
328
329 while (1)
330 {
331 if (s)
332 t = strlen (s);
333 else
334 t = 0;
335 }
336
337 Here the strlen call cannot be moved out of the loop, even though
338 s is invariant. In addition to possibly creating a call with
339 invalid arguments, moving out a function call that is not executed
340 may cause performance regressions in case the call is costly and
341 not executed at all. */
75a70cf9 342 ret = MOVE_PRESERVE_EXECUTION;
343 lhs = gimple_call_lhs (stmt);
ca53beb1 344 }
75a70cf9 345 else if (is_gimple_assign (stmt))
346 lhs = gimple_assign_lhs (stmt);
347 else
348 return MOVE_IMPOSSIBLE;
349
350 if (TREE_CODE (lhs) == SSA_NAME
351 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
352 return MOVE_IMPOSSIBLE;
353
354 if (TREE_CODE (lhs) != SSA_NAME
355 || gimple_could_trap_p (stmt))
356 return MOVE_PRESERVE_EXECUTION;
357
de60f90c 358 /* Non local loads in a transaction cannot be hoisted out. Well,
359 unless the load happens on every path out of the loop, but we
360 don't take this into account yet. */
361 if (flag_tm
362 && gimple_in_transaction (stmt)
363 && gimple_assign_single_p (stmt))
364 {
365 tree rhs = gimple_assign_rhs1 (stmt);
366 if (DECL_P (rhs) && is_global_var (rhs))
367 {
368 if (dump_file)
369 {
370 fprintf (dump_file, "Cannot hoist conditional load of ");
371 print_generic_expr (dump_file, rhs, TDF_SLIM);
372 fprintf (dump_file, " because it is in a transaction.\n");
373 }
374 return MOVE_IMPOSSIBLE;
375 }
376 }
377
75a70cf9 378 return ret;
7d23383d 379}
380
381/* Suppose that operand DEF is used inside the LOOP. Returns the outermost
91275768 382 loop to that we could move the expression using DEF if it did not have
7d23383d 383 other operands, i.e. the outermost loop enclosing LOOP in that the value
384 of DEF is invariant. */
385
386static struct loop *
387outermost_invariant_loop (tree def, struct loop *loop)
388{
42acab1c 389 gimple *def_stmt;
7d23383d 390 basic_block def_bb;
391 struct loop *max_loop;
75a70cf9 392 struct lim_aux_data *lim_data;
7d23383d 393
75a70cf9 394 if (!def)
7d23383d 395 return superloop_at_depth (loop, 1);
396
75a70cf9 397 if (TREE_CODE (def) != SSA_NAME)
398 {
399 gcc_assert (is_gimple_min_invariant (def));
400 return superloop_at_depth (loop, 1);
401 }
402
7d23383d 403 def_stmt = SSA_NAME_DEF_STMT (def);
75a70cf9 404 def_bb = gimple_bb (def_stmt);
7d23383d 405 if (!def_bb)
406 return superloop_at_depth (loop, 1);
407
408 max_loop = find_common_loop (loop, def_bb->loop_father);
409
75a70cf9 410 lim_data = get_lim_data (def_stmt);
411 if (lim_data != NULL && lim_data->max_loop != NULL)
7d23383d 412 max_loop = find_common_loop (max_loop,
75a70cf9 413 loop_outer (lim_data->max_loop));
7d23383d 414 if (max_loop == loop)
415 return NULL;
9e3536f4 416 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
7d23383d 417
418 return max_loop;
419}
420
7d23383d 421/* DATA is a structure containing information associated with a statement
422 inside LOOP. DEF is one of the operands of this statement.
48e1416a 423
7d23383d 424 Find the outermost loop enclosing LOOP in that value of DEF is invariant
425 and record this in DATA->max_loop field. If DEF itself is defined inside
426 this loop as well (i.e. we need to hoist it out of the loop if we want
427 to hoist the statement represented by DATA), record the statement in that
428 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
429 add the cost of the computation of DEF to the DATA->cost.
48e1416a 430
7d23383d 431 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
432
433static bool
434add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
435 bool add_cost)
436{
42acab1c 437 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
75a70cf9 438 basic_block def_bb = gimple_bb (def_stmt);
7d23383d 439 struct loop *max_loop;
75a70cf9 440 struct lim_aux_data *def_data;
7d23383d 441
442 if (!def_bb)
443 return true;
444
445 max_loop = outermost_invariant_loop (def, loop);
446 if (!max_loop)
447 return false;
448
449 if (flow_loop_nested_p (data->max_loop, max_loop))
450 data->max_loop = max_loop;
451
75a70cf9 452 def_data = get_lim_data (def_stmt);
453 if (!def_data)
7d23383d 454 return true;
455
456 if (add_cost
457 /* Only add the cost if the statement defining DEF is inside LOOP,
458 i.e. if it is likely that by moving the invariants dependent
459 on it, we will be able to avoid creating a new register for
460 it (since it will be only used in these dependent invariants). */
461 && def_bb->loop_father == loop)
75a70cf9 462 data->cost += def_data->cost;
7d23383d 463
a2c22c0d 464 data->depends.safe_push (def_stmt);
7d23383d 465
466 return true;
467}
468
0766b2c0 469/* Returns an estimate for a cost of statement STMT. The values here
470 are just ad-hoc constants, similar to costs for inlining. */
7d23383d 471
472static unsigned
42acab1c 473stmt_cost (gimple *stmt)
7d23383d 474{
7d23383d 475 /* Always try to create possibilities for unswitching. */
9bf0a3f9 476 if (gimple_code (stmt) == GIMPLE_COND
477 || gimple_code (stmt) == GIMPLE_PHI)
7d23383d 478 return LIM_EXPENSIVE;
479
0766b2c0 480 /* We should be hoisting calls if possible. */
75a70cf9 481 if (is_gimple_call (stmt))
7d23383d 482 {
0766b2c0 483 tree fndecl;
7d23383d 484
485 /* Unless the call is a builtin_constant_p; this always folds to a
486 constant, so moving it is useless. */
75a70cf9 487 fndecl = gimple_call_fndecl (stmt);
a0e9bfbb 488 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
7d23383d 489 return 0;
490
0766b2c0 491 return LIM_EXPENSIVE;
75a70cf9 492 }
493
0766b2c0 494 /* Hoisting memory references out should almost surely be a win. */
495 if (gimple_references_memory_p (stmt))
496 return LIM_EXPENSIVE;
497
75a70cf9 498 if (gimple_code (stmt) != GIMPLE_ASSIGN)
0766b2c0 499 return 1;
7d23383d 500
75a70cf9 501 switch (gimple_assign_rhs_code (stmt))
502 {
7d23383d 503 case MULT_EXPR:
23da77dd 504 case WIDEN_MULT_EXPR:
505 case WIDEN_MULT_PLUS_EXPR:
506 case WIDEN_MULT_MINUS_EXPR:
507 case DOT_PROD_EXPR:
7d23383d 508 case TRUNC_DIV_EXPR:
509 case CEIL_DIV_EXPR:
510 case FLOOR_DIV_EXPR:
511 case ROUND_DIV_EXPR:
512 case EXACT_DIV_EXPR:
513 case CEIL_MOD_EXPR:
514 case FLOOR_MOD_EXPR:
515 case ROUND_MOD_EXPR:
516 case TRUNC_MOD_EXPR:
fd544fc8 517 case RDIV_EXPR:
7d23383d 518 /* Division and multiplication are usually expensive. */
0766b2c0 519 return LIM_EXPENSIVE;
7d23383d 520
790963ba 521 case LSHIFT_EXPR:
522 case RSHIFT_EXPR:
23da77dd 523 case WIDEN_LSHIFT_EXPR:
524 case LROTATE_EXPR:
525 case RROTATE_EXPR:
0766b2c0 526 /* Shifts and rotates are usually expensive. */
527 return LIM_EXPENSIVE;
528
529 case CONSTRUCTOR:
530 /* Make vector construction cost proportional to the number
531 of elements. */
532 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
533
534 case SSA_NAME:
535 case PAREN_EXPR:
536 /* Whether or not something is wrapped inside a PAREN_EXPR
537 should not change move cost. Nor should an intermediate
538 unpropagated SSA name copy. */
539 return 0;
790963ba 540
7d23383d 541 default:
0766b2c0 542 return 1;
7d23383d 543 }
7d23383d 544}
545
063a8bce 546/* Finds the outermost loop between OUTER and LOOP in that the memory reference
547 REF is independent. If REF is not independent in LOOP, NULL is returned
548 instead. */
549
550static struct loop *
4a7ff262 551outermost_indep_loop (struct loop *outer, struct loop *loop, im_mem_ref *ref)
063a8bce 552{
553 struct loop *aloop;
554
3e48928c 555 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
063a8bce 556 return NULL;
557
558 for (aloop = outer;
559 aloop != loop;
560 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
3e48928c 561 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
23298f15 562 && ref_indep_loop_p (aloop, ref))
063a8bce 563 return aloop;
564
23298f15 565 if (ref_indep_loop_p (loop, ref))
063a8bce 566 return loop;
567 else
568 return NULL;
569}
570
571/* If there is a simple load or store to a memory reference in STMT, returns
f0b5f617 572 the location of the memory reference, and sets IS_STORE according to whether
063a8bce 573 it is a store or load. Otherwise, returns NULL. */
574
575static tree *
42acab1c 576simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
063a8bce 577{
6d7105fe 578 tree *lhs, *rhs;
063a8bce 579
6d7105fe 580 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
581 if (!gimple_assign_single_p (stmt))
063a8bce 582 return NULL;
583
75a70cf9 584 lhs = gimple_assign_lhs_ptr (stmt);
6d7105fe 585 rhs = gimple_assign_rhs1_ptr (stmt);
063a8bce 586
6d7105fe 587 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
063a8bce 588 {
063a8bce 589 *is_store = false;
6d7105fe 590 return rhs;
063a8bce 591 }
6d7105fe 592 else if (gimple_vdef (stmt)
593 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
063a8bce 594 {
595 *is_store = true;
596 return lhs;
597 }
598 else
599 return NULL;
600}
601
9bf0a3f9 602/* From a controlling predicate in DOM determine the arguments from
603 the PHI node PHI that are chosen if the predicate evaluates to
604 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
605 they are non-NULL. Returns true if the arguments can be determined,
606 else return false. */
607
608static bool
1a91d914 609extract_true_false_args_from_phi (basic_block dom, gphi *phi,
9bf0a3f9 610 tree *true_arg_p, tree *false_arg_p)
611{
df10fba0 612 edge te, fe;
613 if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
614 &te, &fe))
9bf0a3f9 615 return false;
616
617 if (true_arg_p)
df10fba0 618 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
9bf0a3f9 619 if (false_arg_p)
df10fba0 620 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
9bf0a3f9 621
622 return true;
623}
624
7d23383d 625/* Determine the outermost loop to that it is possible to hoist a statement
626 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
627 the outermost loop in that the value computed by STMT is invariant.
628 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
629 we preserve the fact whether STMT is executed. It also fills other related
630 information to LIM_DATA (STMT).
48e1416a 631
7d23383d 632 The function returns false if STMT cannot be hoisted outside of the loop it
633 is defined in, and true otherwise. */
634
635static bool
42acab1c 636determine_max_movement (gimple *stmt, bool must_preserve_exec)
7d23383d 637{
75a70cf9 638 basic_block bb = gimple_bb (stmt);
7d23383d 639 struct loop *loop = bb->loop_father;
640 struct loop *level;
75a70cf9 641 struct lim_aux_data *lim_data = get_lim_data (stmt);
43daa21e 642 tree val;
643 ssa_op_iter iter;
48e1416a 644
7d23383d 645 if (must_preserve_exec)
646 level = ALWAYS_EXECUTED_IN (bb);
647 else
648 level = superloop_at_depth (loop, 1);
649 lim_data->max_loop = level;
650
1a91d914 651 if (gphi *phi = dyn_cast <gphi *> (stmt))
9bf0a3f9 652 {
653 use_operand_p use_p;
654 unsigned min_cost = UINT_MAX;
655 unsigned total_cost = 0;
656 struct lim_aux_data *def_data;
657
658 /* We will end up promoting dependencies to be unconditionally
659 evaluated. For this reason the PHI cost (and thus the
660 cost we remove from the loop by doing the invariant motion)
661 is that of the cheapest PHI argument dependency chain. */
1a91d914 662 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
9bf0a3f9 663 {
664 val = USE_FROM_PTR (use_p);
413fb820 665
9bf0a3f9 666 if (TREE_CODE (val) != SSA_NAME)
413fb820 667 {
668 /* Assign const 1 to constants. */
669 min_cost = MIN (min_cost, 1);
670 total_cost += 1;
671 continue;
672 }
9bf0a3f9 673 if (!add_dependency (val, lim_data, loop, false))
674 return false;
07f7799f 675
42acab1c 676 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
07f7799f 677 if (gimple_bb (def_stmt)
678 && gimple_bb (def_stmt)->loop_father == loop)
9bf0a3f9 679 {
07f7799f 680 def_data = get_lim_data (def_stmt);
681 if (def_data)
682 {
683 min_cost = MIN (min_cost, def_data->cost);
684 total_cost += def_data->cost;
685 }
9bf0a3f9 686 }
687 }
688
07f7799f 689 min_cost = MIN (min_cost, total_cost);
9bf0a3f9 690 lim_data->cost += min_cost;
691
1a91d914 692 if (gimple_phi_num_args (phi) > 1)
9bf0a3f9 693 {
694 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
42acab1c 695 gimple *cond;
9bf0a3f9 696 if (gsi_end_p (gsi_last_bb (dom)))
697 return false;
698 cond = gsi_stmt (gsi_last_bb (dom));
699 if (gimple_code (cond) != GIMPLE_COND)
700 return false;
701 /* Verify that this is an extended form of a diamond and
702 the PHI arguments are completely controlled by the
703 predicate in DOM. */
1a91d914 704 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
9bf0a3f9 705 return false;
706
707 /* Fold in dependencies and cost of the condition. */
708 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
709 {
710 if (!add_dependency (val, lim_data, loop, false))
711 return false;
712 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
713 if (def_data)
47578cb1 714 lim_data->cost += def_data->cost;
9bf0a3f9 715 }
716
717 /* We want to avoid unconditionally executing very expensive
718 operations. As costs for our dependencies cannot be
719 negative just claim we are not invariand for this case.
720 We also are not sure whether the control-flow inside the
721 loop will vanish. */
722 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
723 && !(min_cost != 0
724 && total_cost / min_cost <= 2))
725 return false;
726
727 /* Assume that the control-flow in the loop will vanish.
728 ??? We should verify this and not artificially increase
729 the cost if that is not the case. */
730 lim_data->cost += stmt_cost (stmt);
731 }
732
733 return true;
734 }
735 else
736 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
737 if (!add_dependency (val, lim_data, loop, true))
738 return false;
7d23383d 739
dd277d48 740 if (gimple_vuse (stmt))
063a8bce 741 {
735d8b51 742 im_mem_ref *ref
743 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
744 if (ref
745 && MEM_ANALYZABLE (ref))
063a8bce 746 {
735d8b51 747 lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
748 loop, ref);
063a8bce 749 if (!lim_data->max_loop)
750 return false;
751 }
735d8b51 752 else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
753 return false;
063a8bce 754 }
7d23383d 755
756 lim_data->cost += stmt_cost (stmt);
757
758 return true;
759}
760
761/* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
762 and that one of the operands of this statement is computed by STMT.
763 Ensure that STMT (together with all the statements that define its
764 operands) is hoisted at least out of the loop LEVEL. */
765
766static void
42acab1c 767set_level (gimple *stmt, struct loop *orig_loop, struct loop *level)
7d23383d 768{
75a70cf9 769 struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
75a70cf9 770 struct lim_aux_data *lim_data;
42acab1c 771 gimple *dep_stmt;
a2c22c0d 772 unsigned i;
7d23383d 773
774 stmt_loop = find_common_loop (orig_loop, stmt_loop);
75a70cf9 775 lim_data = get_lim_data (stmt);
776 if (lim_data != NULL && lim_data->tgt_loop != NULL)
7d23383d 777 stmt_loop = find_common_loop (stmt_loop,
75a70cf9 778 loop_outer (lim_data->tgt_loop));
7d23383d 779 if (flow_loop_nested_p (stmt_loop, level))
780 return;
781
75a70cf9 782 gcc_assert (level == lim_data->max_loop
783 || flow_loop_nested_p (lim_data->max_loop, level));
7d23383d 784
75a70cf9 785 lim_data->tgt_loop = level;
a2c22c0d 786 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
787 set_level (dep_stmt, orig_loop, level);
7d23383d 788}
789
790/* Determines an outermost loop from that we want to hoist the statement STMT.
791 For now we chose the outermost possible loop. TODO -- use profiling
792 information to set it more sanely. */
793
794static void
42acab1c 795set_profitable_level (gimple *stmt)
7d23383d 796{
75a70cf9 797 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
7d23383d 798}
799
75a70cf9 800/* Returns true if STMT is a call that has side effects. */
7d23383d 801
802static bool
42acab1c 803nonpure_call_p (gimple *stmt)
7d23383d 804{
75a70cf9 805 if (gimple_code (stmt) != GIMPLE_CALL)
7d23383d 806 return false;
807
75a70cf9 808 return gimple_has_side_effects (stmt);
7d23383d 809}
810
790963ba 811/* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
812
42acab1c 813static gimple *
75a70cf9 814rewrite_reciprocal (gimple_stmt_iterator *bsi)
790963ba 815{
1a91d914 816 gassign *stmt, *stmt1, *stmt2;
03d37e4e 817 tree name, lhs, type;
193b8a15 818 tree real_one;
688ff29b 819 gimple_stmt_iterator gsi;
790963ba 820
1a91d914 821 stmt = as_a <gassign *> (gsi_stmt (*bsi));
75a70cf9 822 lhs = gimple_assign_lhs (stmt);
823 type = TREE_TYPE (lhs);
790963ba 824
74560f89 825 real_one = build_one_cst (type);
790963ba 826
03d37e4e 827 name = make_temp_ssa_name (type, NULL, "reciptmp");
e9cf809e 828 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
829 gimple_assign_rhs2 (stmt));
830 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
831 gimple_assign_rhs1 (stmt));
790963ba 832
833 /* Replace division stmt with reciprocal and multiply stmts.
834 The multiply stmt is not invariant, so update iterator
835 and avoid rescanning. */
688ff29b 836 gsi = *bsi;
837 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
838 gsi_replace (&gsi, stmt2, true);
790963ba 839
840 /* Continue processing with invariant reciprocal statement. */
841 return stmt1;
842}
843
844/* Check if the pattern at *BSI is a bittest of the form
845 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
846
42acab1c 847static gimple *
75a70cf9 848rewrite_bittest (gimple_stmt_iterator *bsi)
790963ba 849{
1a91d914 850 gassign *stmt;
42acab1c 851 gimple *stmt1;
1a91d914 852 gassign *stmt2;
42acab1c 853 gimple *use_stmt;
1a91d914 854 gcond *cond_stmt;
03d37e4e 855 tree lhs, name, t, a, b;
790963ba 856 use_operand_p use;
857
1a91d914 858 stmt = as_a <gassign *> (gsi_stmt (*bsi));
75a70cf9 859 lhs = gimple_assign_lhs (stmt);
790963ba 860
861 /* Verify that the single use of lhs is a comparison against zero. */
862 if (TREE_CODE (lhs) != SSA_NAME
1a91d914 863 || !single_imm_use (lhs, &use, &use_stmt))
790963ba 864 return stmt;
1a91d914 865 cond_stmt = dyn_cast <gcond *> (use_stmt);
866 if (!cond_stmt)
867 return stmt;
868 if (gimple_cond_lhs (cond_stmt) != lhs
869 || (gimple_cond_code (cond_stmt) != NE_EXPR
870 && gimple_cond_code (cond_stmt) != EQ_EXPR)
871 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
790963ba 872 return stmt;
873
874 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
75a70cf9 875 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
876 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
790963ba 877 return stmt;
878
bef304b8 879 /* There is a conversion in between possibly inserted by fold. */
d9659041 880 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
790963ba 881 {
75a70cf9 882 t = gimple_assign_rhs1 (stmt1);
790963ba 883 if (TREE_CODE (t) != SSA_NAME
884 || !has_single_use (t))
885 return stmt;
886 stmt1 = SSA_NAME_DEF_STMT (t);
75a70cf9 887 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
790963ba 888 return stmt;
790963ba 889 }
890
891 /* Verify that B is loop invariant but A is not. Verify that with
892 all the stmt walking we are still in the same loop. */
75a70cf9 893 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
894 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
895 return stmt;
790963ba 896
75a70cf9 897 a = gimple_assign_rhs1 (stmt1);
898 b = gimple_assign_rhs2 (stmt1);
899
900 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
901 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
902 {
688ff29b 903 gimple_stmt_iterator rsi;
904
790963ba 905 /* 1 << B */
790963ba 906 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
907 build_int_cst (TREE_TYPE (a), 1), b);
03d37e4e 908 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
909 stmt1 = gimple_build_assign (name, t);
790963ba 910
911 /* A & (1 << B) */
912 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
03d37e4e 913 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
914 stmt2 = gimple_build_assign (name, t);
3de988b5 915
916 /* Replace the SSA_NAME we compare against zero. Adjust
917 the type of zero accordingly. */
27dc0bdf 918 SET_USE (use, name);
1a91d914 919 gimple_cond_set_rhs (cond_stmt,
920 build_int_cst_type (TREE_TYPE (name),
921 0));
790963ba 922
688ff29b 923 /* Don't use gsi_replace here, none of the new assignments sets
924 the variable originally set in stmt. Move bsi to stmt1, and
925 then remove the original stmt, so that we get a chance to
926 retain debug info for it. */
927 rsi = *bsi;
928 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
929 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
c7f66ec2 930 gimple *to_release = gsi_stmt (rsi);
688ff29b 931 gsi_remove (&rsi, true);
c7f66ec2 932 release_defs (to_release);
790963ba 933
934 return stmt1;
935 }
936
937 return stmt;
938}
939
54c91640 940/* For each statement determines the outermost loop in that it is invariant,
941 - statements on whose motion it depends and the cost of the computation.
942 - This information is stored to the LIM_DATA structure associated with
943 - each statement. */
944class invariantness_dom_walker : public dom_walker
945{
946public:
947 invariantness_dom_walker (cdi_direction direction)
948 : dom_walker (direction) {}
949
96752458 950 virtual edge before_dom_children (basic_block);
54c91640 951};
790963ba 952
7d23383d 953/* Determine the outermost loops in that statements in basic block BB are
954 invariant, and record them to the LIM_DATA associated with the statements.
54c91640 955 Callback for dom_walker. */
7d23383d 956
96752458 957edge
54c91640 958invariantness_dom_walker::before_dom_children (basic_block bb)
7d23383d 959{
960 enum move_pos pos;
75a70cf9 961 gimple_stmt_iterator bsi;
42acab1c 962 gimple *stmt;
7d23383d 963 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
964 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
75a70cf9 965 struct lim_aux_data *lim_data;
7d23383d 966
9e3536f4 967 if (!loop_outer (bb->loop_father))
96752458 968 return NULL;
7d23383d 969
970 if (dump_file && (dump_flags & TDF_DETAILS))
971 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
9e3536f4 972 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
7d23383d 973
9bf0a3f9 974 /* Look at PHI nodes, but only if there is at most two.
975 ??? We could relax this further by post-processing the inserted
976 code and transforming adjacent cond-exprs with the same predicate
977 to control flow again. */
978 bsi = gsi_start_phis (bb);
979 if (!gsi_end_p (bsi)
980 && ((gsi_next (&bsi), gsi_end_p (bsi))
981 || (gsi_next (&bsi), gsi_end_p (bsi))))
982 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
983 {
984 stmt = gsi_stmt (bsi);
985
986 pos = movement_possibility (stmt);
987 if (pos == MOVE_IMPOSSIBLE)
988 continue;
989
735d8b51 990 lim_data = get_lim_data (stmt);
991 if (! lim_data)
992 lim_data = init_lim_data (stmt);
9bf0a3f9 993 lim_data->always_executed_in = outermost;
994
995 if (!determine_max_movement (stmt, false))
996 {
997 lim_data->max_loop = NULL;
998 continue;
999 }
1000
1001 if (dump_file && (dump_flags & TDF_DETAILS))
1002 {
1ffa4346 1003 print_gimple_stmt (dump_file, stmt, 2);
9bf0a3f9 1004 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1005 loop_depth (lim_data->max_loop),
1006 lim_data->cost);
1007 }
1008
1009 if (lim_data->cost >= LIM_EXPENSIVE)
1010 set_profitable_level (stmt);
1011 }
1012
75a70cf9 1013 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
7d23383d 1014 {
75a70cf9 1015 stmt = gsi_stmt (bsi);
7d23383d 1016
1017 pos = movement_possibility (stmt);
1018 if (pos == MOVE_IMPOSSIBLE)
1019 {
1020 if (nonpure_call_p (stmt))
1021 {
1022 maybe_never = true;
1023 outermost = NULL;
1024 }
a973ed42 1025 /* Make sure to note always_executed_in for stores to make
1026 store-motion work. */
1027 else if (stmt_makes_single_store (stmt))
1028 {
735d8b51 1029 struct lim_aux_data *lim_data = get_lim_data (stmt);
1030 if (! lim_data)
1031 lim_data = init_lim_data (stmt);
75a70cf9 1032 lim_data->always_executed_in = outermost;
a973ed42 1033 }
7d23383d 1034 continue;
1035 }
1036
75a70cf9 1037 if (is_gimple_assign (stmt)
1038 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1039 == GIMPLE_BINARY_RHS))
63f88450 1040 {
75a70cf9 1041 tree op0 = gimple_assign_rhs1 (stmt);
1042 tree op1 = gimple_assign_rhs2 (stmt);
1043 struct loop *ol1 = outermost_invariant_loop (op1,
1044 loop_containing_stmt (stmt));
63f88450 1045
1046 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1047 to be hoisted out of loop, saving expensive divide. */
1048 if (pos == MOVE_POSSIBLE
75a70cf9 1049 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
63f88450 1050 && flag_unsafe_math_optimizations
1051 && !flag_trapping_math
75a70cf9 1052 && ol1 != NULL
1053 && outermost_invariant_loop (op0, ol1) == NULL)
63f88450 1054 stmt = rewrite_reciprocal (&bsi);
1055
1056 /* If the shift count is invariant, convert (A >> B) & 1 to
1057 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1058 saving an expensive shift. */
1059 if (pos == MOVE_POSSIBLE
75a70cf9 1060 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1061 && integer_onep (op1)
1062 && TREE_CODE (op0) == SSA_NAME
1063 && has_single_use (op0))
63f88450 1064 stmt = rewrite_bittest (&bsi);
1065 }
fcee97ca 1066
735d8b51 1067 lim_data = get_lim_data (stmt);
1068 if (! lim_data)
1069 lim_data = init_lim_data (stmt);
75a70cf9 1070 lim_data->always_executed_in = outermost;
7d23383d 1071
1072 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1073 continue;
1074
1075 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1076 {
75a70cf9 1077 lim_data->max_loop = NULL;
7d23383d 1078 continue;
1079 }
1080
1081 if (dump_file && (dump_flags & TDF_DETAILS))
1082 {
1ffa4346 1083 print_gimple_stmt (dump_file, stmt, 2);
7d23383d 1084 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
75a70cf9 1085 loop_depth (lim_data->max_loop),
1086 lim_data->cost);
7d23383d 1087 }
1088
75a70cf9 1089 if (lim_data->cost >= LIM_EXPENSIVE)
7d23383d 1090 set_profitable_level (stmt);
1091 }
96752458 1092 return NULL;
7d23383d 1093}
1094
7d23383d 1095/* Hoist the statements in basic block BB out of the loops prescribed by
91275768 1096 data stored in LIM_DATA structures associated with each statement. Callback
7d23383d 1097 for walk_dominator_tree. */
1098
2c7b2f88 1099unsigned int
1100move_computations_worker (basic_block bb)
7d23383d 1101{
1102 struct loop *level;
7d23383d 1103 unsigned cost = 0;
75a70cf9 1104 struct lim_aux_data *lim_data;
2c7b2f88 1105 unsigned int todo = 0;
7d23383d 1106
9e3536f4 1107 if (!loop_outer (bb->loop_father))
2c7b2f88 1108 return todo;
7d23383d 1109
1a91d914 1110 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
9bf0a3f9 1111 {
1a91d914 1112 gassign *new_stmt;
1113 gphi *stmt = bsi.phi ();
9bf0a3f9 1114
1115 lim_data = get_lim_data (stmt);
1116 if (lim_data == NULL)
1117 {
1118 gsi_next (&bsi);
1119 continue;
1120 }
1121
1122 cost = lim_data->cost;
1123 level = lim_data->tgt_loop;
1124 clear_lim_data (stmt);
1125
1126 if (!level)
1127 {
1128 gsi_next (&bsi);
1129 continue;
1130 }
1131
1132 if (dump_file && (dump_flags & TDF_DETAILS))
1133 {
1134 fprintf (dump_file, "Moving PHI node\n");
1ffa4346 1135 print_gimple_stmt (dump_file, stmt, 0);
9bf0a3f9 1136 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1137 cost, level->num);
1138 }
1139
1140 if (gimple_phi_num_args (stmt) == 1)
1141 {
1142 tree arg = PHI_ARG_DEF (stmt, 0);
e9cf809e 1143 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1144 TREE_CODE (arg), arg);
9bf0a3f9 1145 }
1146 else
1147 {
1148 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
42acab1c 1149 gimple *cond = gsi_stmt (gsi_last_bb (dom));
9bf0a3f9 1150 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1151 /* Get the PHI arguments corresponding to the true and false
1152 edges of COND. */
1153 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1154 gcc_assert (arg0 && arg1);
1155 t = build2 (gimple_cond_code (cond), boolean_type_node,
1156 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
e9cf809e 1157 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1158 COND_EXPR, t, arg0, arg1);
2c7b2f88 1159 todo |= TODO_cleanup_cfg;
9bf0a3f9 1160 }
8e50a020 1161 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1162 && (!ALWAYS_EXECUTED_IN (bb)
1163 || (ALWAYS_EXECUTED_IN (bb) != level
1164 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
ccf6cffa 1165 {
1166 tree lhs = gimple_assign_lhs (new_stmt);
1167 SSA_NAME_RANGE_INFO (lhs) = NULL;
ccf6cffa 1168 }
9bf0a3f9 1169 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1170 remove_phi_node (&bsi, false);
1171 }
1172
1a91d914 1173 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
7d23383d 1174 {
e70e8b13 1175 edge e;
1176
42acab1c 1177 gimple *stmt = gsi_stmt (bsi);
7d23383d 1178
75a70cf9 1179 lim_data = get_lim_data (stmt);
1180 if (lim_data == NULL)
7d23383d 1181 {
75a70cf9 1182 gsi_next (&bsi);
7d23383d 1183 continue;
1184 }
1185
75a70cf9 1186 cost = lim_data->cost;
1187 level = lim_data->tgt_loop;
1188 clear_lim_data (stmt);
7d23383d 1189
1190 if (!level)
1191 {
75a70cf9 1192 gsi_next (&bsi);
7d23383d 1193 continue;
1194 }
1195
1196 /* We do not really want to move conditionals out of the loop; we just
1197 placed it here to force its operands to be moved if necessary. */
75a70cf9 1198 if (gimple_code (stmt) == GIMPLE_COND)
7d23383d 1199 continue;
1200
1201 if (dump_file && (dump_flags & TDF_DETAILS))
1202 {
1203 fprintf (dump_file, "Moving statement\n");
1ffa4346 1204 print_gimple_stmt (dump_file, stmt, 0);
7d23383d 1205 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1206 cost, level->num);
1207 }
063a8bce 1208
e70e8b13 1209 e = loop_preheader_edge (level);
1210 gcc_assert (!gimple_vdef (stmt));
1211 if (gimple_vuse (stmt))
1212 {
1213 /* The new VUSE is the one from the virtual PHI in the loop
1214 header or the one already present. */
1a91d914 1215 gphi_iterator gsi2;
e70e8b13 1216 for (gsi2 = gsi_start_phis (e->dest);
1217 !gsi_end_p (gsi2); gsi_next (&gsi2))
1218 {
1a91d914 1219 gphi *phi = gsi2.phi ();
7c782c9b 1220 if (virtual_operand_p (gimple_phi_result (phi)))
e70e8b13 1221 {
1222 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1223 break;
1224 }
1225 }
1226 }
75a70cf9 1227 gsi_remove (&bsi, false);
8e50a020 1228 if (gimple_has_lhs (stmt)
1229 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1230 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1231 && (!ALWAYS_EXECUTED_IN (bb)
1232 || !(ALWAYS_EXECUTED_IN (bb) == level
1233 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
ccf6cffa 1234 {
1235 tree lhs = gimple_get_lhs (stmt);
1236 SSA_NAME_RANGE_INFO (lhs) = NULL;
ccf6cffa 1237 }
bc228ba1 1238 /* In case this is a stmt that is not unconditionally executed
1239 when the target loop header is executed and the stmt may
1240 invoke undefined integer or pointer overflow rewrite it to
1241 unsigned arithmetic. */
1242 if (is_gimple_assign (stmt)
1243 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1244 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1245 && arith_code_with_undefined_signed_overflow
1246 (gimple_assign_rhs_code (stmt))
1247 && (!ALWAYS_EXECUTED_IN (bb)
1248 || !(ALWAYS_EXECUTED_IN (bb) == level
1249 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1250 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1251 else
1252 gsi_insert_on_edge (e, stmt);
7d23383d 1253 }
2c7b2f88 1254
1255 return todo;
7d23383d 1256}
1257
1258/* Hoist the statements out of the loops prescribed by data stored in
91275768 1259 LIM_DATA structures associated with each statement.*/
7d23383d 1260
9bf0a3f9 1261static unsigned int
7d23383d 1262move_computations (void)
1263{
2c7b2f88 1264 int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
1265 int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
1266 unsigned todo = 0;
1267
1268 for (int i = 0; i < n; ++i)
1269 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun, rpo[i]));
1270
1271 free (rpo);
7d23383d 1272
75a70cf9 1273 gsi_commit_edge_inserts ();
dd277d48 1274 if (need_ssa_update_p (cfun))
095dcfa3 1275 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
9bf0a3f9 1276
2c7b2f88 1277 return todo;
7d23383d 1278}
1279
1280/* Checks whether the statement defining variable *INDEX can be hoisted
1281 out of the loop passed in DATA. Callback for for_each_index. */
1282
1283static bool
1284may_move_till (tree ref, tree *index, void *data)
1285{
75a70cf9 1286 struct loop *loop = (struct loop *) data, *max_loop;
7d23383d 1287
1288 /* If REF is an array reference, check also that the step and the lower
1289 bound is invariant in LOOP. */
1290 if (TREE_CODE (ref) == ARRAY_REF)
1291 {
75a70cf9 1292 tree step = TREE_OPERAND (ref, 3);
1293 tree lbound = TREE_OPERAND (ref, 2);
7d23383d 1294
75a70cf9 1295 max_loop = outermost_invariant_loop (step, loop);
7d23383d 1296 if (!max_loop)
1297 return false;
1298
75a70cf9 1299 max_loop = outermost_invariant_loop (lbound, loop);
7d23383d 1300 if (!max_loop)
1301 return false;
1302 }
1303
1304 max_loop = outermost_invariant_loop (*index, loop);
1305 if (!max_loop)
1306 return false;
1307
1308 return true;
1309}
1310
75a70cf9 1311/* If OP is SSA NAME, force the statement that defines it to be
99e96094 1312 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
7d23383d 1313
1314static void
75a70cf9 1315force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
7d23383d 1316{
42acab1c 1317 gimple *stmt;
7d23383d 1318
75a70cf9 1319 if (!op
1320 || is_gimple_min_invariant (op))
1321 return;
7d23383d 1322
75a70cf9 1323 gcc_assert (TREE_CODE (op) == SSA_NAME);
48e1416a 1324
75a70cf9 1325 stmt = SSA_NAME_DEF_STMT (op);
1326 if (gimple_nop_p (stmt))
7d23383d 1327 return;
1328
75a70cf9 1329 set_level (stmt, orig_loop, loop);
7d23383d 1330}
1331
1332/* Forces statement defining invariants in REF (and *INDEX) to be moved out of
99e96094 1333 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1334 for_each_index. */
1335
1336struct fmt_data
1337{
1338 struct loop *loop;
1339 struct loop *orig_loop;
1340};
7d23383d 1341
1342static bool
1343force_move_till (tree ref, tree *index, void *data)
1344{
f0d6e81c 1345 struct fmt_data *fmt_data = (struct fmt_data *) data;
7d23383d 1346
1347 if (TREE_CODE (ref) == ARRAY_REF)
1348 {
75a70cf9 1349 tree step = TREE_OPERAND (ref, 3);
1350 tree lbound = TREE_OPERAND (ref, 2);
7d23383d 1351
75a70cf9 1352 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1353 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
7d23383d 1354 }
1355
75a70cf9 1356 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
7d23383d 1357
1358 return true;
1359}
1360
063a8bce 1361/* A function to free the mem_ref object OBJ. */
1362
1363static void
1ada9901 1364memref_free (struct im_mem_ref *mem)
063a8bce 1365{
f1f41a6c 1366 mem->accesses_in_loop.release ();
063a8bce 1367}
1368
1369/* Allocates and returns a memory reference description for MEM whose hash
1370 value is HASH and id is ID. */
1371
4a7ff262 1372static im_mem_ref *
1b94b295 1373mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
063a8bce 1374{
4a7ff262 1375 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1b94b295 1376 if (mem)
1377 ref->mem = *mem;
1378 else
1379 ao_ref_init (&ref->mem, error_mark_node);
063a8bce 1380 ref->id = id;
1b94b295 1381 ref->ref_canonical = false;
063a8bce 1382 ref->hash = hash;
3e48928c 1383 ref->stored = NULL;
feba4360 1384 bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1385 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
3e48928c 1386 ref->accesses_in_loop.create (1);
063a8bce 1387
1388 return ref;
1389}
1390
063a8bce 1391/* Records memory reference location *LOC in LOOP to the memory reference
1392 description REF. The reference occurs in statement STMT. */
7d23383d 1393
1394static void
42acab1c 1395record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
7d23383d 1396{
5c8a8be3 1397 mem_ref_loc aref;
5c8a8be3 1398 aref.stmt = stmt;
1399 aref.ref = loc;
3e48928c 1400 ref->accesses_in_loop.safe_push (aref);
1401}
1402
1403/* Set the LOOP bit in REF stored bitmap and allocate that if
1404 necessary. Return whether a bit was changed. */
1405
1406static bool
4a7ff262 1407set_ref_stored_in_loop (im_mem_ref *ref, struct loop *loop)
3e48928c 1408{
1409 if (!ref->stored)
1410 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1411 return bitmap_set_bit (ref->stored, loop->num);
7d23383d 1412}
1413
063a8bce 1414/* Marks reference REF as stored in LOOP. */
7d23383d 1415
1416static void
4a7ff262 1417mark_ref_stored (im_mem_ref *ref, struct loop *loop)
7d23383d 1418{
feba4360 1419 while (loop != current_loops->tree_root
3e48928c 1420 && set_ref_stored_in_loop (ref, loop))
feba4360 1421 loop = loop_outer (loop);
063a8bce 1422}
1423
1424/* Gathers memory references in statement STMT in LOOP, storing the
1425 information about them in the memory_accesses structure. Marks
1426 the vops accessed through unrecognized statements there as
1427 well. */
1428
1429static void
42acab1c 1430gather_mem_refs_stmt (struct loop *loop, gimple *stmt)
063a8bce 1431{
1432 tree *mem = NULL;
1433 hashval_t hash;
1ada9901 1434 im_mem_ref **slot;
4a7ff262 1435 im_mem_ref *ref;
063a8bce 1436 bool is_stored;
063a8bce 1437 unsigned id;
7d23383d 1438
dd277d48 1439 if (!gimple_vuse (stmt))
063a8bce 1440 return;
1441
1442 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1443 if (!mem)
0766b2c0 1444 {
04911505 1445 /* We use the shared mem_ref for all unanalyzable refs. */
1446 id = UNANALYZABLE_MEM_ID;
1447 ref = memory_accesses.refs_list[id];
0766b2c0 1448 if (dump_file && (dump_flags & TDF_DETAILS))
1449 {
1450 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1451 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1452 }
04911505 1453 is_stored = gimple_vdef (stmt);
063a8bce 1454 }
1455 else
7d23383d 1456 {
1b94b295 1457 /* We are looking for equal refs that might differ in structure
1458 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1459 make sure we can canonicalize the ref in the hashtable if
1460 non-operand_equal_p refs are found. For the lookup we mark
1461 the case we want strict equality with aor.max_size == -1. */
1462 ao_ref aor;
1463 ao_ref_init (&aor, *mem);
1464 ao_ref_base (&aor);
1465 ao_ref_alias_set (&aor);
1466 HOST_WIDE_INT offset, size, max_size;
1467 poly_int64 saved_maxsize = aor.max_size, mem_off;
1468 tree mem_base;
1469 if (aor.max_size_known_p ()
1470 && aor.offset.is_constant (&offset)
1471 && aor.offset.is_constant (&size)
1472 && aor.offset.is_constant (&max_size)
1473 && size == max_size
1474 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1475 {
1476 hash = iterative_hash_expr (ao_ref_base (&aor), 0);
1477 hash = iterative_hash_host_wide_int (offset, hash);
1478 hash = iterative_hash_host_wide_int (size, hash);
1479 }
1480 else
1481 {
1482 hash = iterative_hash_expr (aor.ref, 0);
1483 aor.max_size = -1;
1484 }
1485 slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1486 aor.max_size = saved_maxsize;
04911505 1487 if (*slot)
063a8bce 1488 {
1b94b295 1489 if (!(*slot)->ref_canonical
1490 && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1491 {
1492 /* If we didn't yet canonicalize the hashtable ref (which
1493 we'll end up using for code insertion) and hit a second
1494 equal ref that is not structurally equivalent create
1495 a canonical ref which is a bare MEM_REF. */
1496 if (TREE_CODE (*mem) == MEM_REF
1497 || TREE_CODE (*mem) == TARGET_MEM_REF)
1498 {
1499 (*slot)->mem.ref = *mem;
1500 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1501 }
1502 else
1503 {
1504 tree ref_alias_type = reference_alias_ptr_type (*mem);
1505 unsigned int ref_align = get_object_alignment (*mem);
1506 tree ref_type = TREE_TYPE (*mem);
1507 tree tmp = build_fold_addr_expr (unshare_expr (mem_base));
1508 if (TYPE_ALIGN (ref_type) != ref_align)
1509 ref_type = build_aligned_type (ref_type, ref_align);
1510 (*slot)->mem.ref
1511 = fold_build2 (MEM_REF, ref_type, tmp,
1512 build_int_cst (ref_alias_type, mem_off));
1513 if ((*slot)->mem.volatile_p)
1514 TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1515 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1516 && is_gimple_mem_ref_addr
1517 (TREE_OPERAND ((*slot)->mem.ref,
1518 0)));
1519 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1520 }
1521 (*slot)->ref_canonical = true;
1522 }
4a7ff262 1523 ref = *slot;
04911505 1524 id = ref->id;
063a8bce 1525 }
04911505 1526 else
1527 {
1528 id = memory_accesses.refs_list.length ();
1b94b295 1529 ref = mem_ref_alloc (&aor, hash, id);
04911505 1530 memory_accesses.refs_list.safe_push (ref);
1531 *slot = ref;
61025ec0 1532
04911505 1533 if (dump_file && (dump_flags & TDF_DETAILS))
1534 {
1535 fprintf (dump_file, "Memory reference %u: ", id);
1536 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1537 fprintf (dump_file, "\n");
1538 }
1539 }
1540
3e48928c 1541 record_mem_ref_loc (ref, stmt, mem);
04911505 1542 }
feba4360 1543 bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
063a8bce 1544 if (is_stored)
8f9829e8 1545 {
feba4360 1546 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
8f9829e8 1547 mark_ref_stored (ref, loop);
1548 }
735d8b51 1549 init_lim_data (stmt)->ref = ref->id;
063a8bce 1550 return;
7d23383d 1551}
1552
7894a3d9 1553static unsigned *bb_loop_postorder;
1554
1555/* qsort sort function to sort blocks after their loop fathers postorder. */
1556
1557static int
1558sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1559{
1560 basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1561 basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1562 struct loop *loop1 = bb1->loop_father;
1563 struct loop *loop2 = bb2->loop_father;
1564 if (loop1->num == loop2->num)
2bd42196 1565 return bb1->index - bb2->index;
7894a3d9 1566 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1567}
1568
3e48928c 1569/* qsort sort function to sort ref locs after their loop fathers postorder. */
1570
1571static int
1572sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1573{
1574 mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1575 mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1576 struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1577 struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1578 if (loop1->num == loop2->num)
1579 return 0;
1580 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1581}
1582
063a8bce 1583/* Gathers memory references in loops. */
7d23383d 1584
1585static void
feba4360 1586analyze_memory_references (void)
7d23383d 1587{
75a70cf9 1588 gimple_stmt_iterator bsi;
7894a3d9 1589 basic_block bb, *bbs;
8f9829e8 1590 struct loop *loop, *outer;
7894a3d9 1591 unsigned i, n;
063a8bce 1592
7894a3d9 1593 /* Collect all basic-blocks in loops and sort them after their
1594 loops postorder. */
1595 i = 0;
a28770e1 1596 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
fc00614f 1597 FOR_EACH_BB_FN (bb, cfun)
7894a3d9 1598 if (bb->loop_father != current_loops->tree_root)
1599 bbs[i++] = bb;
1600 n = i;
1601 qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
7894a3d9 1602
1603 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1604 That results in better locality for all the bitmaps. */
1605 for (i = 0; i < n; ++i)
063a8bce 1606 {
7894a3d9 1607 basic_block bb = bbs[i];
75a70cf9 1608 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
7894a3d9 1609 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
063a8bce 1610 }
1611
3e48928c 1612 /* Sort the location list of gathered memory references after their
1613 loop postorder number. */
1ada9901 1614 im_mem_ref *ref;
3e48928c 1615 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1616 ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1617
7894a3d9 1618 free (bbs);
3e48928c 1619// free (bb_loop_postorder);
7894a3d9 1620
0766b2c0 1621 /* Propagate the information about accessed memory references up
1622 the loop hierarchy. */
f21d4d00 1623 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
063a8bce 1624 {
8f9829e8 1625 /* Finalize the overall touched references (including subloops). */
feba4360 1626 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1627 &memory_accesses.refs_stored_in_loop[loop->num]);
8f9829e8 1628
1629 /* Propagate the information about accessed memory references up
1630 the loop hierarchy. */
1631 outer = loop_outer (loop);
1632 if (outer == current_loops->tree_root)
063a8bce 1633 continue;
1634
feba4360 1635 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1636 &memory_accesses.all_refs_stored_in_loop[loop->num]);
063a8bce 1637 }
063a8bce 1638}
1639
063a8bce 1640/* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1641 tree_to_aff_combination_expand. */
1642
1643static bool
4a7ff262 1644mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
5f8841a5 1645 hash_map<tree, name_expansion *> **ttae_cache)
063a8bce 1646{
1647 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1648 object and their offset differ in such a way that the locations cannot
1649 overlap, then they cannot alias. */
fbc666b8 1650 poly_widest_int size1, size2;
3391cd1e 1651 aff_tree off1, off2;
063a8bce 1652
3391cd1e 1653 /* Perform basic offset and type-based disambiguation. */
09f4cf62 1654 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
063a8bce 1655 return false;
7d23383d 1656
063a8bce 1657 /* The expansion of addresses may be a bit expensive, thus we only do
1658 the check at -O2 and higher optimization levels. */
1659 if (optimize < 2)
1660 return true;
1661
09f4cf62 1662 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1663 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
063a8bce 1664 aff_combination_expand (&off1, ttae_cache);
1665 aff_combination_expand (&off2, ttae_cache);
e913b5cd 1666 aff_combination_scale (&off1, -1);
063a8bce 1667 aff_combination_add (&off2, &off1);
1668
5fc88ffd 1669 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
063a8bce 1670 return false;
1671
1672 return true;
1673}
1674
3e48928c 1675/* Compare function for bsearch searching for reference locations
1676 in a loop. */
1677
1678static int
1679find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1680{
1681 struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1682 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1683 struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1684 if (loop->num == loc_loop->num
1685 || flow_loop_nested_p (loop, loc_loop))
1686 return 0;
1687 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1688 ? -1 : 1);
1689}
1690
5c8a8be3 1691/* Iterates over all locations of REF in LOOP and its subloops calling
1692 fn.operator() with the location as argument. When that operator
1693 returns true the iteration is stopped and true is returned.
1694 Otherwise false is returned. */
063a8bce 1695
5c8a8be3 1696template <typename FN>
1697static bool
4a7ff262 1698for_all_locs_in_loop (struct loop *loop, im_mem_ref *ref, FN fn)
063a8bce 1699{
063a8bce 1700 unsigned i;
4a7ff262 1701 mem_ref_loc *loc;
063a8bce 1702
3e48928c 1703 /* Search for the cluster of locs in the accesses_in_loop vector
1704 which is sorted after postorder index of the loop father. */
1705 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1706 if (!loc)
1707 return false;
063a8bce 1708
3e48928c 1709 /* We have found one location inside loop or its sub-loops. Iterate
1710 both forward and backward to cover the whole cluster. */
1711 i = loc - ref->accesses_in_loop.address ();
1712 while (i > 0)
1713 {
1714 --i;
4a7ff262 1715 mem_ref_loc *l = &ref->accesses_in_loop[i];
3e48928c 1716 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1717 break;
1718 if (fn (l))
1719 return true;
1720 }
1721 for (i = loc - ref->accesses_in_loop.address ();
1722 i < ref->accesses_in_loop.length (); ++i)
1723 {
4a7ff262 1724 mem_ref_loc *l = &ref->accesses_in_loop[i];
3e48928c 1725 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1726 break;
1727 if (fn (l))
1728 return true;
1729 }
5c8a8be3 1730
1731 return false;
1732}
1733
1734/* Rewrites location LOC by TMP_VAR. */
1735
1736struct rewrite_mem_ref_loc
1737{
1738 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
4a7ff262 1739 bool operator () (mem_ref_loc *loc);
5c8a8be3 1740 tree tmp_var;
1741};
1742
1743bool
4a7ff262 1744rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
5c8a8be3 1745{
1746 *loc->ref = tmp_var;
1747 update_stmt (loc->stmt);
1748 return false;
063a8bce 1749}
1750
1751/* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1752
1753static void
4a7ff262 1754rewrite_mem_refs (struct loop *loop, im_mem_ref *ref, tree tmp_var)
063a8bce 1755{
5c8a8be3 1756 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
7d23383d 1757}
1758
bc9af2dd 1759/* Stores the first reference location in LOCP. */
1760
1761struct first_mem_ref_loc_1
1762{
4a7ff262 1763 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1764 bool operator () (mem_ref_loc *loc);
1765 mem_ref_loc **locp;
bc9af2dd 1766};
1767
1768bool
4a7ff262 1769first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
bc9af2dd 1770{
1771 *locp = loc;
1772 return true;
1773}
1774
1775/* Returns the first reference location to REF in LOOP. */
1776
4a7ff262 1777static mem_ref_loc *
1778first_mem_ref_loc (struct loop *loop, im_mem_ref *ref)
bc9af2dd 1779{
4a7ff262 1780 mem_ref_loc *locp = NULL;
bc9af2dd 1781 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1782 return locp;
1783}
1784
61025ec0 1785struct prev_flag_edges {
1786 /* Edge to insert new flag comparison code. */
1787 edge append_cond_position;
1788
1789 /* Edge for fall through from previous flag comparison. */
1790 edge last_cond_fallthru;
1791};
1792
1793/* Helper function for execute_sm. Emit code to store TMP_VAR into
1794 MEM along edge EX.
1795
1796 The store is only done if MEM has changed. We do this so no
1797 changes to MEM occur on code paths that did not originally store
1798 into it.
1799
1800 The common case for execute_sm will transform:
1801
1802 for (...) {
1803 if (foo)
1804 stuff;
1805 else
1806 MEM = TMP_VAR;
1807 }
1808
1809 into:
1810
1811 lsm = MEM;
1812 for (...) {
1813 if (foo)
1814 stuff;
1815 else
1816 lsm = TMP_VAR;
1817 }
1818 MEM = lsm;
1819
1820 This function will generate:
1821
1822 lsm = MEM;
1823
1824 lsm_flag = false;
1825 ...
1826 for (...) {
1827 if (foo)
1828 stuff;
1829 else {
1830 lsm = TMP_VAR;
1831 lsm_flag = true;
1832 }
1833 }
1834 if (lsm_flag) <--
1835 MEM = lsm; <--
1836*/
1837
1838static void
1ea7c812 1839execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1840 edge preheader, hash_set <basic_block> *flag_bbs)
61025ec0 1841{
1842 basic_block new_bb, then_bb, old_dest;
1843 bool loop_has_only_one_exit;
1844 edge then_old_edge, orig_ex = ex;
1845 gimple_stmt_iterator gsi;
42acab1c 1846 gimple *stmt;
61025ec0 1847 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
a1ccf30d 1848 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
61025ec0 1849
1ea7c812 1850 profile_count count_sum = profile_count::zero ();
1851 int nbbs = 0, ncount = 0;
720cfc43 1852 profile_probability flag_probability = profile_probability::uninitialized ();
1ea7c812 1853
1854 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1855 at loop exit.
1856
1857 This code may look fancy, but it can not update profile very realistically
1858 because we do not know the probability that flag will be true at given
1859 loop exit.
1860
1861 We look for two interesting extremes
1862 - when exit is dominated by block setting the flag, we know it will
1863 always be true. This is a common case.
1864 - when all blocks setting the flag have very low frequency we know
1865 it will likely be false.
1866 In all other cases we default to 2/3 for flag being true. */
1867
1868 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1869 it != flag_bbs->end (); ++it)
1870 {
1ea7c812 1871 if ((*it)->count.initialized_p ())
1872 count_sum += (*it)->count, ncount ++;
1873 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
720cfc43 1874 flag_probability = profile_probability::always ();
1ea7c812 1875 nbbs++;
1876 }
1877
720cfc43 1878 profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1879
1880 if (flag_probability.initialized_p ())
1ea7c812 1881 ;
205ce1aa 1882 else if (ncount == nbbs
1883 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1ea7c812 1884 {
ea5d3981 1885 flag_probability = count_sum.probability_in (preheader->count ());
720cfc43 1886 if (flag_probability > cap)
1887 flag_probability = cap;
1ea7c812 1888 }
205ce1aa 1889
1890 if (!flag_probability.initialized_p ())
720cfc43 1891 flag_probability = cap;
1ea7c812 1892
61025ec0 1893 /* ?? Insert store after previous store if applicable. See note
1894 below. */
1895 if (prev_edges)
1896 ex = prev_edges->append_cond_position;
1897
1898 loop_has_only_one_exit = single_pred_p (ex->dest);
1899
1900 if (loop_has_only_one_exit)
1901 ex = split_block_after_labels (ex->dest);
a1ebf6df 1902 else
1903 {
1904 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1905 !gsi_end_p (gpi); gsi_next (&gpi))
1906 {
1907 gphi *phi = gpi.phi ();
1908 if (virtual_operand_p (gimple_phi_result (phi)))
1909 continue;
1910
1911 /* When the destination has a non-virtual PHI node with multiple
1912 predecessors make sure we preserve the PHI structure by
1913 forcing a forwarder block so that hoisting of that PHI will
1914 still work. */
1915 split_edge (ex);
1916 break;
1917 }
1918 }
61025ec0 1919
1920 old_dest = ex->dest;
1921 new_bb = split_edge (ex);
1922 then_bb = create_empty_bb (new_bb);
1ea7c812 1923 then_bb->count = new_bb->count.apply_probability (flag_probability);
a1ccf30d 1924 if (irr)
1925 then_bb->flags = BB_IRREDUCIBLE_LOOP;
1926 add_bb_to_loop (then_bb, new_bb->loop_father);
61025ec0 1927
1928 gsi = gsi_start_bb (new_bb);
1929 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1930 NULL_TREE, NULL_TREE);
1931 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1932
1933 gsi = gsi_start_bb (then_bb);
1934 /* Insert actual store. */
1935 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1936 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1937
1ea7c812 1938 edge e1 = single_succ_edge (new_bb);
1939 edge e2 = make_edge (new_bb, then_bb,
1940 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1941 e2->probability = flag_probability;
1ea7c812 1942
1943 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
1944 e1->flags &= ~EDGE_FALLTHRU;
1945
720cfc43 1946 e1->probability = flag_probability.invert ();
1ea7c812 1947
720cfc43 1948 then_old_edge = make_single_succ_edge (then_bb, old_dest,
a1ccf30d 1949 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
61025ec0 1950
1951 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1952
1953 if (prev_edges)
1954 {
1955 basic_block prevbb = prev_edges->last_cond_fallthru->src;
1956 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1957 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1958 set_immediate_dominator (CDI_DOMINATORS, old_dest,
1959 recompute_dominator (CDI_DOMINATORS, old_dest));
1960 }
1961
1962 /* ?? Because stores may alias, they must happen in the exact
1963 sequence they originally happened. Save the position right after
1964 the (_lsm) store we just created so we can continue appending after
1965 it and maintain the original order. */
1966 {
1967 struct prev_flag_edges *p;
1968
1969 if (orig_ex->aux)
1970 orig_ex->aux = NULL;
1971 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1972 p = (struct prev_flag_edges *) orig_ex->aux;
1973 p->append_cond_position = then_old_edge;
1974 p->last_cond_fallthru = find_edge (new_bb, old_dest);
1975 orig_ex->aux = (void *) p;
1976 }
1977
1978 if (!loop_has_only_one_exit)
1a91d914 1979 for (gphi_iterator gpi = gsi_start_phis (old_dest);
1980 !gsi_end_p (gpi); gsi_next (&gpi))
61025ec0 1981 {
1a91d914 1982 gphi *phi = gpi.phi ();
61025ec0 1983 unsigned i;
1984
1985 for (i = 0; i < gimple_phi_num_args (phi); i++)
1986 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1987 {
1988 tree arg = gimple_phi_arg_def (phi, i);
60d535d2 1989 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
61025ec0 1990 update_stmt (phi);
1991 }
1992 }
61025ec0 1993}
1994
5c8a8be3 1995/* When REF is set on the location, set flag indicating the store. */
1996
1997struct sm_set_flag_if_changed
1998{
1ea7c812 1999 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2000 : flag (flag_), bbs (bbs_) {}
4a7ff262 2001 bool operator () (mem_ref_loc *loc);
5c8a8be3 2002 tree flag;
1ea7c812 2003 hash_set <basic_block> *bbs;
5c8a8be3 2004};
2005
2006bool
4a7ff262 2007sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
5c8a8be3 2008{
2009 /* Only set the flag for writes. */
2010 if (is_gimple_assign (loc->stmt)
2011 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2012 {
2013 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
42acab1c 2014 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
5c8a8be3 2015 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1ea7c812 2016 bbs->add (gimple_bb (stmt));
5c8a8be3 2017 }
2018 return false;
2019}
2020
61025ec0 2021/* Helper function for execute_sm. On every location where REF is
2022 set, set an appropriate flag indicating the store. */
2023
2024static tree
1ea7c812 2025execute_sm_if_changed_flag_set (struct loop *loop, im_mem_ref *ref,
2026 hash_set <basic_block> *bbs)
61025ec0 2027{
61025ec0 2028 tree flag;
f86b328b 2029 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
072f7ab1 2030 flag = create_tmp_reg (boolean_type_node, str);
1ea7c812 2031 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
61025ec0 2032 return flag;
2033}
2034
063a8bce 2035/* Executes store motion of memory reference REF from LOOP.
749ea85f 2036 Exits from the LOOP are stored in EXITS. The initialization of the
2037 temporary variable is put to the preheader of the loop, and assignments
2038 to the reference from the temporary variable are emitted to exits. */
7d23383d 2039
2040static void
4a7ff262 2041execute_sm (struct loop *loop, vec<edge> exits, im_mem_ref *ref)
7d23383d 2042{
3e48928c 2043 tree tmp_var, store_flag = NULL_TREE;
7d23383d 2044 unsigned i;
1a91d914 2045 gassign *load;
99e96094 2046 struct fmt_data fmt_data;
bc9af2dd 2047 edge ex;
75a70cf9 2048 struct lim_aux_data *lim_data;
61025ec0 2049 bool multi_threaded_model_p = false;
bc9af2dd 2050 gimple_stmt_iterator gsi;
1ea7c812 2051 hash_set<basic_block> flag_bbs;
7d23383d 2052
69154f26 2053 if (dump_file && (dump_flags & TDF_DETAILS))
2054 {
2055 fprintf (dump_file, "Executing store motion of ");
1ffa4346 2056 print_generic_expr (dump_file, ref->mem.ref);
69154f26 2057 fprintf (dump_file, " from loop %d\n", loop->num);
2058 }
2059
09f4cf62 2060 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2061 get_lsm_tmp_name (ref->mem.ref, ~0));
7d23383d 2062
99e96094 2063 fmt_data.loop = loop;
2064 fmt_data.orig_loop = loop;
09f4cf62 2065 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
7d23383d 2066
c927329e 2067 if (bb_in_transaction (loop_preheader_edge (loop)->src)
60b209bc 2068 || (! PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)
2069 && ! ref_always_accessed_p (loop, ref, true)))
61025ec0 2070 multi_threaded_model_p = true;
2071
2072 if (multi_threaded_model_p)
1ea7c812 2073 store_flag = execute_sm_if_changed_flag_set (loop, ref, &flag_bbs);
61025ec0 2074
063a8bce 2075 rewrite_mem_refs (loop, ref, tmp_var);
7d23383d 2076
bc9af2dd 2077 /* Emit the load code on a random exit edge or into the latch if
2078 the loop does not exit, so that we are sure it will be processed
2079 by move_computations after all dependencies. */
2080 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
61025ec0 2081
2082 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2083 load altogether, since the store is predicated by a flag. We
2084 could, do the load only if it was originally in the loop. */
09f4cf62 2085 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
75a70cf9 2086 lim_data = init_lim_data (load);
2087 lim_data->max_loop = loop;
2088 lim_data->tgt_loop = loop;
bc9af2dd 2089 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
7d23383d 2090
61025ec0 2091 if (multi_threaded_model_p)
7d23383d 2092 {
61025ec0 2093 load = gimple_build_assign (store_flag, boolean_false_node);
2094 lim_data = init_lim_data (load);
2095 lim_data->max_loop = loop;
2096 lim_data->tgt_loop = loop;
bc9af2dd 2097 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
7d23383d 2098 }
61025ec0 2099
2100 /* Sink the store to every exit from the loop. */
f1f41a6c 2101 FOR_EACH_VEC_ELT (exits, i, ex)
61025ec0 2102 if (!multi_threaded_model_p)
2103 {
1a91d914 2104 gassign *store;
09f4cf62 2105 store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
61025ec0 2106 gsi_insert_on_edge (ex, store);
2107 }
2108 else
1ea7c812 2109 execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag,
2110 loop_preheader_edge (loop), &flag_bbs);
7d23383d 2111}
2112
063a8bce 2113/* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2114 edges of the LOOP. */
7d23383d 2115
2116static void
063a8bce 2117hoist_memory_references (struct loop *loop, bitmap mem_refs,
f1f41a6c 2118 vec<edge> exits)
7d23383d 2119{
4a7ff262 2120 im_mem_ref *ref;
063a8bce 2121 unsigned i;
2122 bitmap_iterator bi;
69154f26 2123
063a8bce 2124 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
7d23383d 2125 {
f1f41a6c 2126 ref = memory_accesses.refs_list[i];
063a8bce 2127 execute_sm (loop, exits, ref);
7d23383d 2128 }
55a03692 2129}
2130
5c8a8be3 2131struct ref_always_accessed
2132{
3e48928c 2133 ref_always_accessed (struct loop *loop_, bool stored_p_)
2134 : loop (loop_), stored_p (stored_p_) {}
4a7ff262 2135 bool operator () (mem_ref_loc *loc);
5c8a8be3 2136 struct loop *loop;
5c8a8be3 2137 bool stored_p;
2138};
7d23383d 2139
5c8a8be3 2140bool
4a7ff262 2141ref_always_accessed::operator () (mem_ref_loc *loc)
7d23383d 2142{
063a8bce 2143 struct loop *must_exec;
307f7fda 2144
5c8a8be3 2145 if (!get_lim_data (loc->stmt))
2146 return false;
7d23383d 2147
5c8a8be3 2148 /* If we require an always executed store make sure the statement
2149 stores to the reference. */
2150 if (stored_p)
063a8bce 2151 {
3e48928c 2152 tree lhs = gimple_get_lhs (loc->stmt);
2153 if (!lhs
2154 || lhs != *loc->ref)
5c8a8be3 2155 return false;
2156 }
7d23383d 2157
5c8a8be3 2158 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2159 if (!must_exec)
2160 return false;
307f7fda 2161
5c8a8be3 2162 if (must_exec == loop
2163 || flow_loop_nested_p (must_exec, loop))
2164 return true;
7d23383d 2165
5c8a8be3 2166 return false;
2167}
55a03692 2168
5c8a8be3 2169/* Returns true if REF is always accessed in LOOP. If STORED_P is true
2170 make sure REF is always stored to in LOOP. */
2171
2172static bool
4a7ff262 2173ref_always_accessed_p (struct loop *loop, im_mem_ref *ref, bool stored_p)
5c8a8be3 2174{
5c8a8be3 2175 return for_all_locs_in_loop (loop, ref,
3e48928c 2176 ref_always_accessed (loop, stored_p));
55a03692 2177}
2178
063a8bce 2179/* Returns true if REF1 and REF2 are independent. */
55a03692 2180
063a8bce 2181static bool
4a7ff262 2182refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2)
55a03692 2183{
09f4cf62 2184 if (ref1 == ref2)
063a8bce 2185 return true;
09f4cf62 2186
063a8bce 2187 if (dump_file && (dump_flags & TDF_DETAILS))
2188 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2189 ref1->id, ref2->id);
2190
09f4cf62 2191 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
063a8bce 2192 {
063a8bce 2193 if (dump_file && (dump_flags & TDF_DETAILS))
2194 fprintf (dump_file, "dependent.\n");
2195 return false;
2196 }
2197 else
2198 {
063a8bce 2199 if (dump_file && (dump_flags & TDF_DETAILS))
2200 fprintf (dump_file, "independent.\n");
2201 return true;
2202 }
55a03692 2203}
2204
8f9829e8 2205/* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2206 and its super-loops. */
55a03692 2207
2208static void
4a7ff262 2209record_dep_loop (struct loop *loop, im_mem_ref *ref, bool stored_p)
55a03692 2210{
8f9829e8 2211 /* We can propagate dependent-in-loop bits up the loop
2212 hierarchy to all outer loops. */
2213 while (loop != current_loops->tree_root
feba4360 2214 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
8f9829e8 2215 loop = loop_outer (loop);
063a8bce 2216}
55a03692 2217
3326faef 2218/* Returns true if REF is independent on all other memory
23298f15 2219 references in LOOP. */
55a03692 2220
063a8bce 2221static bool
23298f15 2222ref_indep_loop_p_1 (struct loop *loop, im_mem_ref *ref, bool stored_p)
063a8bce 2223{
3326faef 2224 stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2225
3326faef 2226 bool indep_p = true;
0766b2c0 2227 bitmap refs_to_check;
063a8bce 2228
8f9829e8 2229 if (stored_p)
feba4360 2230 refs_to_check = &memory_accesses.refs_in_loop[loop->num];
0766b2c0 2231 else
feba4360 2232 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
55a03692 2233
04911505 2234 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
3326faef 2235 indep_p = false;
3326faef 2236 else
55a03692 2237 {
3326faef 2238 if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2239 return true;
2240 if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
8f9829e8 2241 return false;
55a03692 2242
3326faef 2243 struct loop *inner = loop->inner;
2244 while (inner)
2245 {
23298f15 2246 if (!ref_indep_loop_p_1 (inner, ref, stored_p))
3326faef 2247 {
2248 indep_p = false;
2249 break;
2250 }
2251 inner = inner->next;
2252 }
f339cf78 2253
3326faef 2254 if (indep_p)
2255 {
2256 unsigned i;
2257 bitmap_iterator bi;
2258 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2259 {
2260 im_mem_ref *aref = memory_accesses.refs_list[i];
2261 if (!refs_independent_p (ref, aref))
2262 {
2263 indep_p = false;
2264 break;
2265 }
2266 }
2267 }
8f9829e8 2268 }
2269
063a8bce 2270 if (dump_file && (dump_flags & TDF_DETAILS))
2271 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
8f9829e8 2272 ref->id, loop->num, indep_p ? "independent" : "dependent");
063a8bce 2273
8f9829e8 2274 /* Record the computed result in the cache. */
2275 if (indep_p)
2276 {
feba4360 2277 if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
8f9829e8 2278 && stored_p)
2279 {
2280 /* If it's independend against all refs then it's independent
2281 against stores, too. */
feba4360 2282 bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
8f9829e8 2283 }
2284 }
2285 else
2286 {
2287 record_dep_loop (loop, ref, stored_p);
2288 if (!stored_p)
2289 {
2290 /* If it's dependent against stores it's dependent against
2291 all refs, too. */
2292 record_dep_loop (loop, ref, true);
2293 }
2294 }
063a8bce 2295
8f9829e8 2296 return indep_p;
2297}
2298
2299/* Returns true if REF is independent on all other memory references in
23298f15 2300 LOOP. */
8f9829e8 2301
2302static bool
23298f15 2303ref_indep_loop_p (struct loop *loop, im_mem_ref *ref)
8f9829e8 2304{
2305 gcc_checking_assert (MEM_ANALYZABLE (ref));
2306
23298f15 2307 return ref_indep_loop_p_1 (loop, ref, false);
55a03692 2308}
2309
063a8bce 2310/* Returns true if we can perform store motion of REF from LOOP. */
55a03692 2311
063a8bce 2312static bool
4a7ff262 2313can_sm_ref_p (struct loop *loop, im_mem_ref *ref)
55a03692 2314{
307f7fda 2315 tree base;
2316
0766b2c0 2317 /* Can't hoist unanalyzable refs. */
2318 if (!MEM_ANALYZABLE (ref))
2319 return false;
2320
063a8bce 2321 /* It should be movable. */
09f4cf62 2322 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2323 || TREE_THIS_VOLATILE (ref->mem.ref)
2324 || !for_each_index (&ref->mem.ref, may_move_till, loop))
063a8bce 2325 return false;
342ea212 2326
16d9b5d8 2327 /* If it can throw fail, we do not properly update EH info. */
09f4cf62 2328 if (tree_could_throw_p (ref->mem.ref))
16d9b5d8 2329 return false;
2330
307f7fda 2331 /* If it can trap, it must be always executed in LOOP.
2332 Readonly memory locations may trap when storing to them, but
2333 tree_could_trap_p is a predicate for rvalues, so check that
2334 explicitly. */
09f4cf62 2335 base = get_base_address (ref->mem.ref);
2336 if ((tree_could_trap_p (ref->mem.ref)
307f7fda 2337 || (DECL_P (base) && TREE_READONLY (base)))
2338 && !ref_always_accessed_p (loop, ref, true))
063a8bce 2339 return false;
342ea212 2340
063a8bce 2341 /* And it must be independent on all other memory references
2342 in LOOP. */
23298f15 2343 if (!ref_indep_loop_p (loop, ref))
063a8bce 2344 return false;
342ea212 2345
063a8bce 2346 return true;
342ea212 2347}
2348
063a8bce 2349/* Marks the references in LOOP for that store motion should be performed
2350 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2351 motion was performed in one of the outer loops. */
342ea212 2352
2353static void
063a8bce 2354find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
55a03692 2355{
feba4360 2356 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
063a8bce 2357 unsigned i;
2358 bitmap_iterator bi;
4a7ff262 2359 im_mem_ref *ref;
063a8bce 2360
2361 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2362 {
f1f41a6c 2363 ref = memory_accesses.refs_list[i];
063a8bce 2364 if (can_sm_ref_p (loop, ref))
2365 bitmap_set_bit (refs_to_sm, i);
2366 }
342ea212 2367}
55a03692 2368
063a8bce 2369/* Checks whether LOOP (with exits stored in EXITS array) is suitable
2370 for a store motion optimization (i.e. whether we can insert statement
2371 on its exits). */
342ea212 2372
063a8bce 2373static bool
2374loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
f1f41a6c 2375 vec<edge> exits)
342ea212 2376{
063a8bce 2377 unsigned i;
2378 edge ex;
55a03692 2379
f1f41a6c 2380 FOR_EACH_VEC_ELT (exits, i, ex)
9102193b 2381 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
063a8bce 2382 return false;
2383
2384 return true;
55a03692 2385}
2386
7d23383d 2387/* Try to perform store motion for all memory references modified inside
063a8bce 2388 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2389 store motion was executed in one of the outer loops. */
7d23383d 2390
2391static void
063a8bce 2392store_motion_loop (struct loop *loop, bitmap sm_executed)
7d23383d 2393{
f1f41a6c 2394 vec<edge> exits = get_loop_exit_edges (loop);
063a8bce 2395 struct loop *subloop;
39f8afdf 2396 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
7d23383d 2397
063a8bce 2398 if (loop_suitable_for_sm (loop, exits))
7d23383d 2399 {
063a8bce 2400 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2401 hoist_memory_references (loop, sm_in_loop, exits);
7d23383d 2402 }
f1f41a6c 2403 exits.release ();
063a8bce 2404
2405 bitmap_ior_into (sm_executed, sm_in_loop);
2406 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2407 store_motion_loop (subloop, sm_executed);
2408 bitmap_and_compl_into (sm_executed, sm_in_loop);
2409 BITMAP_FREE (sm_in_loop);
7d23383d 2410}
2411
2412/* Try to perform store motion for all memory references modified inside
7194de72 2413 loops. */
7d23383d 2414
2415static void
063a8bce 2416store_motion (void)
7d23383d 2417{
2418 struct loop *loop;
39f8afdf 2419 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
dda28f7c 2420
063a8bce 2421 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2422 store_motion_loop (loop, sm_executed);
17519ba0 2423
063a8bce 2424 BITMAP_FREE (sm_executed);
75a70cf9 2425 gsi_commit_edge_inserts ();
7d23383d 2426}
2427
2428/* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2429 for each such basic block bb records the outermost loop for that execution
2430 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2431 blocks that contain a nonpure call. */
2432
2433static void
feba4360 2434fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
7d23383d 2435{
2436 basic_block bb = NULL, *bbs, last = NULL;
2437 unsigned i;
2438 edge e;
2439 struct loop *inn_loop = loop;
2440
2fd20c29 2441 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
7d23383d 2442 {
2443 bbs = get_loop_body_in_dom_order (loop);
2444
2445 for (i = 0; i < loop->num_nodes; i++)
2446 {
cd665a06 2447 edge_iterator ei;
7d23383d 2448 bb = bbs[i];
2449
2450 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2451 last = bb;
2452
08b7917c 2453 if (bitmap_bit_p (contains_call, bb->index))
7d23383d 2454 break;
2455
cd665a06 2456 FOR_EACH_EDGE (e, ei, bb->succs)
5b9b62c9 2457 {
2458 /* If there is an exit from this BB. */
2459 if (!flow_bb_inside_loop_p (loop, e->dest))
2460 break;
2461 /* Or we enter a possibly non-finite loop. */
2462 if (flow_loop_nested_p (bb->loop_father,
2463 e->dest->loop_father)
2464 && ! finite_loop_p (e->dest->loop_father))
2465 break;
2466 }
7d23383d 2467 if (e)
2468 break;
2469
2470 /* A loop might be infinite (TODO use simple loop analysis
2471 to disprove this if possible). */
2472 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2473 break;
2474
2475 if (!flow_bb_inside_loop_p (inn_loop, bb))
2476 break;
2477
2478 if (bb->loop_father->header == bb)
2479 {
2480 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2481 break;
2482
2483 /* In a loop that is always entered we may proceed anyway.
2484 But record that we entered it and stop once we leave it. */
2485 inn_loop = bb->loop_father;
2486 }
2487 }
2488
2489 while (1)
2490 {
2fd20c29 2491 SET_ALWAYS_EXECUTED_IN (last, loop);
7d23383d 2492 if (last == loop->header)
2493 break;
2494 last = get_immediate_dominator (CDI_DOMINATORS, last);
2495 }
2496
2497 free (bbs);
2498 }
2499
2500 for (loop = loop->inner; loop; loop = loop->next)
feba4360 2501 fill_always_executed_in_1 (loop, contains_call);
7d23383d 2502}
2503
feba4360 2504/* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2505 for each such basic block bb records the outermost loop for that execution
2506 of its header implies execution of bb. */
7d23383d 2507
2508static void
feba4360 2509fill_always_executed_in (void)
7d23383d 2510{
7d23383d 2511 basic_block bb;
feba4360 2512 struct loop *loop;
4fb07d00 2513
3c6549f8 2514 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
53c5d9d4 2515 bitmap_clear (contains_call);
fc00614f 2516 FOR_EACH_BB_FN (bb, cfun)
7d23383d 2517 {
feba4360 2518 gimple_stmt_iterator gsi;
2519 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7d23383d 2520 {
feba4360 2521 if (nonpure_call_p (gsi_stmt (gsi)))
7d23383d 2522 break;
2523 }
2524
feba4360 2525 if (!gsi_end_p (gsi))
08b7917c 2526 bitmap_set_bit (contains_call, bb->index);
7d23383d 2527 }
2528
7194de72 2529 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
feba4360 2530 fill_always_executed_in_1 (loop, contains_call);
feba4360 2531}
2532
2533
2534/* Compute the global information needed by the loop invariant motion pass. */
75a70cf9 2535
feba4360 2536static void
2537tree_ssa_lim_initialize (void)
2538{
3e48928c 2539 struct loop *loop;
feba4360 2540 unsigned i;
2541
2542 bitmap_obstack_initialize (&lim_bitmap_obstack);
3e48928c 2543 gcc_obstack_init (&mem_ref_obstack);
42acab1c 2544 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
de60f90c 2545
2546 if (flag_tm)
2547 compute_transaction_bits ();
61025ec0 2548
2549 alloc_aux_for_edges (0);
feba4360 2550
c1f445d2 2551 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
feba4360 2552 memory_accesses.refs_list.create (100);
2553 /* Allocate a special, unanalyzable mem-ref with ID zero. */
2554 memory_accesses.refs_list.quick_push
1b94b295 2555 (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
feba4360 2556
41f75a99 2557 memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2558 memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2559 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2560 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2561 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2562 memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
feba4360 2563
41f75a99 2564 for (i = 0; i < number_of_loops (cfun); i++)
feba4360 2565 {
2566 bitmap_initialize (&memory_accesses.refs_in_loop[i],
2567 &lim_bitmap_obstack);
2568 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2569 &lim_bitmap_obstack);
2570 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2571 &lim_bitmap_obstack);
2572 }
2573
2574 memory_accesses.ttae_cache = NULL;
3e48928c 2575
2576 /* Initialize bb_loop_postorder with a mapping from loop->num to
2577 its postorder index. */
2578 i = 0;
2579 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2580 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2581 bb_loop_postorder[loop->num] = i++;
7d23383d 2582}
2583
2584/* Cleans up after the invariant motion pass. */
2585
2586static void
2587tree_ssa_lim_finalize (void)
2588{
2589 basic_block bb;
063a8bce 2590 unsigned i;
4a7ff262 2591 im_mem_ref *ref;
7d23383d 2592
61025ec0 2593 free_aux_for_edges ();
2594
fc00614f 2595 FOR_EACH_BB_FN (bb, cfun)
2fd20c29 2596 SET_ALWAYS_EXECUTED_IN (bb, NULL);
063a8bce 2597
4fb07d00 2598 bitmap_obstack_release (&lim_bitmap_obstack);
06ecf488 2599 delete lim_aux_data_map;
75a70cf9 2600
c1f445d2 2601 delete memory_accesses.refs;
2602 memory_accesses.refs = NULL;
063a8bce 2603
f1f41a6c 2604 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
83b709f2 2605 memref_free (ref);
f1f41a6c 2606 memory_accesses.refs_list.release ();
3e48928c 2607 obstack_free (&mem_ref_obstack, NULL);
83b709f2 2608
f1f41a6c 2609 memory_accesses.refs_in_loop.release ();
8f9829e8 2610 memory_accesses.refs_stored_in_loop.release ();
f1f41a6c 2611 memory_accesses.all_refs_stored_in_loop.release ();
063a8bce 2612
2613 if (memory_accesses.ttae_cache)
78d53e33 2614 free_affine_expand_cache (&memory_accesses.ttae_cache);
3e48928c 2615
2616 free (bb_loop_postorder);
7d23383d 2617}
2618
7194de72 2619/* Moves invariants from loops. Only "expensive" invariants are moved out --
7d23383d 2620 i.e. those that are likely to be win regardless of the register pressure. */
2621
b11dc37d 2622static unsigned int
7194de72 2623tree_ssa_lim (void)
7d23383d 2624{
9bf0a3f9 2625 unsigned int todo;
2626
7194de72 2627 tree_ssa_lim_initialize ();
7d23383d 2628
063a8bce 2629 /* Gathers information about memory accesses in the loops. */
2630 analyze_memory_references ();
2631
feba4360 2632 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
2633 fill_always_executed_in ();
2634
7d23383d 2635 /* For each statement determine the outermost loop in that it is
2636 invariant and cost for computing the invariant. */
54c91640 2637 invariantness_dom_walker (CDI_DOMINATORS)
2638 .walk (cfun->cfg->x_entry_block_ptr);
7d23383d 2639
063a8bce 2640 /* Execute store motion. Force the necessary invariants to be moved
2641 out of the loops as well. */
2642 store_motion ();
7d23383d 2643
2644 /* Move the expressions that are expensive enough. */
9bf0a3f9 2645 todo = move_computations ();
7d23383d 2646
2647 tree_ssa_lim_finalize ();
9bf0a3f9 2648
2649 return todo;
7d23383d 2650}
f86b328b 2651
2652/* Loop invariant motion pass. */
2653
f86b328b 2654namespace {
2655
2656const pass_data pass_data_lim =
2657{
2658 GIMPLE_PASS, /* type */
2659 "lim", /* name */
2660 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 2661 TV_LIM, /* tv_id */
2662 PROP_cfg, /* properties_required */
2663 0, /* properties_provided */
2664 0, /* properties_destroyed */
2665 0, /* todo_flags_start */
2666 0, /* todo_flags_finish */
2667};
2668
2669class pass_lim : public gimple_opt_pass
2670{
2671public:
2672 pass_lim (gcc::context *ctxt)
2673 : gimple_opt_pass (pass_data_lim, ctxt)
2674 {}
2675
2676 /* opt_pass methods: */
2677 opt_pass * clone () { return new pass_lim (m_ctxt); }
31315c24 2678 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
65b0537f 2679 virtual unsigned int execute (function *);
f86b328b 2680
2681}; // class pass_lim
2682
65b0537f 2683unsigned int
2684pass_lim::execute (function *fun)
2685{
b11dc37d 2686 bool in_loop_pipeline = scev_initialized_p ();
2687 if (!in_loop_pipeline)
2688 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2689
65b0537f 2690 if (number_of_loops (fun) <= 1)
2691 return 0;
b11dc37d 2692 unsigned int todo = tree_ssa_lim ();
65b0537f 2693
b11dc37d 2694 if (!in_loop_pipeline)
2695 loop_optimizer_finalize ();
9b33fe18 2696 else
2697 scev_reset ();
b11dc37d 2698 return todo;
65b0537f 2699}
2700
f86b328b 2701} // anon namespace
2702
2703gimple_opt_pass *
2704make_pass_lim (gcc::context *ctxt)
2705{
2706 return new pass_lim (ctxt);
2707}
2708
2709