]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-predcom.c
alias.c: Reorder #include statements and remove duplicates.
[thirdparty/gcc.git] / gcc / tree-predcom.c
CommitLineData
bbc8a8dc 1/* Predictive commoning.
5624e564 2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
b8698a0f 3
bbc8a8dc 4This file is part of GCC.
b8698a0f 5
bbc8a8dc
ZD
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
9dcd6f09 8Free Software Foundation; either version 3, or (at your option) any
bbc8a8dc 9later version.
b8698a0f 10
bbc8a8dc
ZD
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
b8698a0f 15
bbc8a8dc 16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
bbc8a8dc
ZD
19
20/* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
b8698a0f 32
bbc8a8dc
ZD
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
b8698a0f 66
bbc8a8dc
ZD
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
b8698a0f 79 single root reference of the chain (adjusting their distance
bbc8a8dc
ZD
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
073a8998 102 of a reference reusing its value. Variables R0 up to RN are created,
bbc8a8dc
ZD
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
b8698a0f 135
bbc8a8dc
ZD
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
161 {
162 a[i] = 1;
163 a[i+2] = 2;
164 }
165
166 can be replaced with
167
168 t0 = a[0];
169 t1 = a[1];
170 for (i = 0; i < n; i++)
171 {
172 a[i] = 1;
173 t2 = 2;
174 t0 = t1;
175 t1 = t2;
176 }
177 a[n] = t0;
178 a[n+1] = t1;
179
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
182
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
186
187#include "config.h"
188#include "system.h"
189#include "coretypes.h"
c7131fb2 190#include "backend.h"
957060b5 191#include "rtl.h"
bbc8a8dc 192#include "tree.h"
c7131fb2 193#include "gimple.h"
957060b5
AM
194#include "predict.h"
195#include "tree-pass.h"
196#include "tm_p.h"
c7131fb2 197#include "ssa.h"
957060b5
AM
198#include "expmed.h"
199#include "insn-config.h"
200#include "emit-rtl.h"
201#include "gimple-pretty-print.h"
c7131fb2 202#include "alias.h"
40e23961 203#include "fold-const.h"
bbc8a8dc 204#include "cfgloop.h"
2fb9a547
AM
205#include "internal-fn.h"
206#include "tree-eh.h"
45b0be94 207#include "gimplify.h"
5be5c238 208#include "gimple-iterator.h"
18f429e2 209#include "gimplify-me.h"
e28030cf
AM
210#include "tree-ssa-loop-ivopts.h"
211#include "tree-ssa-loop-manip.h"
212#include "tree-ssa-loop-niter.h"
442b4905
AM
213#include "tree-ssa-loop.h"
214#include "tree-into-ssa.h"
36566b39 215#include "flags.h"
36566b39
PK
216#include "dojump.h"
217#include "explow.h"
218#include "calls.h"
36566b39
PK
219#include "varasm.h"
220#include "stmt.h"
d8a2d370 221#include "expr.h"
442b4905 222#include "tree-dfa.h"
7a300452 223#include "tree-ssa.h"
bbc8a8dc
ZD
224#include "tree-data-ref.h"
225#include "tree-scalar-evolution.h"
bbc8a8dc 226#include "params.h"
bbc8a8dc
ZD
227#include "tree-affine.h"
228#include "tree-inline.h"
229
230/* The maximum number of iterations between the considered memory
231 references. */
232
233#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
b8698a0f 234
726a989a
RB
235/* Data references (or phi nodes that carry data reference values across
236 loop iterations). */
bbc8a8dc 237
7e5487a2 238typedef struct dref_d
bbc8a8dc
ZD
239{
240 /* The reference itself. */
241 struct data_reference *ref;
242
243 /* The statement in that the reference appears. */
355fe088 244 gimple *stmt;
726a989a
RB
245
246 /* In case that STMT is a phi node, this field is set to the SSA name
247 defined by it in replace_phis_by_defined_names (in order to avoid
248 pointing to phi node that got reallocated in the meantime). */
249 tree name_defined_by_phi;
bbc8a8dc
ZD
250
251 /* Distance of the reference from the root of the chain (in number of
252 iterations of the loop). */
253 unsigned distance;
254
255 /* Number of iterations offset from the first reference in the component. */
807e902e 256 widest_int offset;
bbc8a8dc
ZD
257
258 /* Number of the reference in a component, in dominance ordering. */
259 unsigned pos;
260
261 /* True if the memory reference is always accessed when the loop is
262 entered. */
263 unsigned always_accessed : 1;
264} *dref;
265
bbc8a8dc
ZD
266
267/* Type of the chain of the references. */
268
269enum chain_type
270{
271 /* The addresses of the references in the chain are constant. */
272 CT_INVARIANT,
273
274 /* There are only loads in the chain. */
275 CT_LOAD,
276
277 /* Root of the chain is store, the rest are loads. */
278 CT_STORE_LOAD,
279
280 /* A combination of two chains. */
281 CT_COMBINATION
282};
283
284/* Chains of data references. */
285
286typedef struct chain
287{
288 /* Type of the chain. */
289 enum chain_type type;
290
291 /* For combination chains, the operator and the two chains that are
292 combined, and the type of the result. */
82d6e6fc 293 enum tree_code op;
bbc8a8dc
ZD
294 tree rslt_type;
295 struct chain *ch1, *ch2;
296
297 /* The references in the chain. */
9771b263 298 vec<dref> refs;
bbc8a8dc
ZD
299
300 /* The maximum distance of the reference in the chain from the root. */
301 unsigned length;
302
303 /* The variables used to copy the value throughout iterations. */
9771b263 304 vec<tree> vars;
bbc8a8dc
ZD
305
306 /* Initializers for the variables. */
9771b263 307 vec<tree> inits;
bbc8a8dc
ZD
308
309 /* True if there is a use of a variable with the maximal distance
310 that comes after the root in the loop. */
311 unsigned has_max_use_after : 1;
312
313 /* True if all the memory references in the chain are always accessed. */
314 unsigned all_always_accessed : 1;
315
316 /* True if this chain was combined together with some other chain. */
317 unsigned combined : 1;
318} *chain_p;
319
bbc8a8dc
ZD
320
321/* Describes the knowledge about the step of the memory references in
322 the component. */
323
324enum ref_step_type
325{
326 /* The step is zero. */
327 RS_INVARIANT,
328
329 /* The step is nonzero. */
330 RS_NONZERO,
331
332 /* The step may or may not be nonzero. */
333 RS_ANY
334};
335
336/* Components of the data dependence graph. */
337
338struct component
339{
340 /* The references in the component. */
9771b263 341 vec<dref> refs;
bbc8a8dc
ZD
342
343 /* What we know about the step of the references in the component. */
344 enum ref_step_type comp_step;
345
346 /* Next component in the list. */
347 struct component *next;
348};
349
350/* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
351
352static bitmap looparound_phis;
353
354/* Cache used by tree_to_aff_combination_expand. */
355
39c8aaa4 356static hash_map<tree, name_expansion *> *name_expansions;
bbc8a8dc
ZD
357
358/* Dumps data reference REF to FILE. */
359
360extern void dump_dref (FILE *, dref);
361void
362dump_dref (FILE *file, dref ref)
363{
364 if (ref->ref)
365 {
366 fprintf (file, " ");
367 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
368 fprintf (file, " (id %u%s)\n", ref->pos,
369 DR_IS_READ (ref->ref) ? "" : ", write");
370
371 fprintf (file, " offset ");
807e902e 372 print_decs (ref->offset, file);
bbc8a8dc
ZD
373 fprintf (file, "\n");
374
375 fprintf (file, " distance %u\n", ref->distance);
376 }
377 else
378 {
726a989a 379 if (gimple_code (ref->stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
380 fprintf (file, " looparound ref\n");
381 else
382 fprintf (file, " combination ref\n");
383 fprintf (file, " in statement ");
726a989a 384 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
bbc8a8dc
ZD
385 fprintf (file, "\n");
386 fprintf (file, " distance %u\n", ref->distance);
387 }
388
389}
390
391/* Dumps CHAIN to FILE. */
392
393extern void dump_chain (FILE *, chain_p);
394void
395dump_chain (FILE *file, chain_p chain)
396{
397 dref a;
398 const char *chain_type;
399 unsigned i;
400 tree var;
401
402 switch (chain->type)
403 {
404 case CT_INVARIANT:
405 chain_type = "Load motion";
406 break;
407
408 case CT_LOAD:
409 chain_type = "Loads-only";
410 break;
411
412 case CT_STORE_LOAD:
413 chain_type = "Store-loads";
414 break;
415
416 case CT_COMBINATION:
417 chain_type = "Combination";
418 break;
419
420 default:
421 gcc_unreachable ();
422 }
423
424 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
425 chain->combined ? " (combined)" : "");
426 if (chain->type != CT_INVARIANT)
427 fprintf (file, " max distance %u%s\n", chain->length,
428 chain->has_max_use_after ? "" : ", may reuse first");
429
430 if (chain->type == CT_COMBINATION)
431 {
432 fprintf (file, " equal to %p %s %p in type ",
82d6e6fc 433 (void *) chain->ch1, op_symbol_code (chain->op),
bbc8a8dc
ZD
434 (void *) chain->ch2);
435 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
436 fprintf (file, "\n");
437 }
438
9771b263 439 if (chain->vars.exists ())
bbc8a8dc
ZD
440 {
441 fprintf (file, " vars");
9771b263 442 FOR_EACH_VEC_ELT (chain->vars, i, var)
bbc8a8dc
ZD
443 {
444 fprintf (file, " ");
445 print_generic_expr (file, var, TDF_SLIM);
446 }
447 fprintf (file, "\n");
448 }
449
9771b263 450 if (chain->inits.exists ())
bbc8a8dc
ZD
451 {
452 fprintf (file, " inits");
9771b263 453 FOR_EACH_VEC_ELT (chain->inits, i, var)
bbc8a8dc
ZD
454 {
455 fprintf (file, " ");
456 print_generic_expr (file, var, TDF_SLIM);
457 }
458 fprintf (file, "\n");
459 }
460
461 fprintf (file, " references:\n");
9771b263 462 FOR_EACH_VEC_ELT (chain->refs, i, a)
bbc8a8dc
ZD
463 dump_dref (file, a);
464
465 fprintf (file, "\n");
466}
467
468/* Dumps CHAINS to FILE. */
469
9771b263 470extern void dump_chains (FILE *, vec<chain_p> );
bbc8a8dc 471void
9771b263 472dump_chains (FILE *file, vec<chain_p> chains)
bbc8a8dc
ZD
473{
474 chain_p chain;
475 unsigned i;
476
9771b263 477 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc
ZD
478 dump_chain (file, chain);
479}
480
481/* Dumps COMP to FILE. */
482
483extern void dump_component (FILE *, struct component *);
484void
485dump_component (FILE *file, struct component *comp)
486{
487 dref a;
488 unsigned i;
489
490 fprintf (file, "Component%s:\n",
491 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
9771b263 492 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc
ZD
493 dump_dref (file, a);
494 fprintf (file, "\n");
495}
496
497/* Dumps COMPS to FILE. */
498
499extern void dump_components (FILE *, struct component *);
500void
501dump_components (FILE *file, struct component *comps)
502{
503 struct component *comp;
504
505 for (comp = comps; comp; comp = comp->next)
506 dump_component (file, comp);
507}
508
509/* Frees a chain CHAIN. */
510
511static void
512release_chain (chain_p chain)
513{
514 dref ref;
515 unsigned i;
516
517 if (chain == NULL)
518 return;
519
9771b263 520 FOR_EACH_VEC_ELT (chain->refs, i, ref)
bbc8a8dc
ZD
521 free (ref);
522
9771b263
DN
523 chain->refs.release ();
524 chain->vars.release ();
525 chain->inits.release ();
bbc8a8dc
ZD
526
527 free (chain);
528}
529
530/* Frees CHAINS. */
531
532static void
9771b263 533release_chains (vec<chain_p> chains)
bbc8a8dc
ZD
534{
535 unsigned i;
536 chain_p chain;
537
9771b263 538 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc 539 release_chain (chain);
9771b263 540 chains.release ();
bbc8a8dc
ZD
541}
542
543/* Frees a component COMP. */
544
545static void
546release_component (struct component *comp)
547{
9771b263 548 comp->refs.release ();
bbc8a8dc
ZD
549 free (comp);
550}
551
552/* Frees list of components COMPS. */
553
554static void
555release_components (struct component *comps)
556{
557 struct component *act, *next;
558
559 for (act = comps; act; act = next)
560 {
561 next = act->next;
562 release_component (act);
563 }
564}
565
566/* Finds a root of tree given by FATHERS containing A, and performs path
567 shortening. */
568
569static unsigned
570component_of (unsigned fathers[], unsigned a)
571{
572 unsigned root, n;
573
574 for (root = a; root != fathers[root]; root = fathers[root])
575 continue;
576
577 for (; a != root; a = n)
578 {
579 n = fathers[a];
580 fathers[a] = root;
581 }
582
583 return root;
584}
585
586/* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
587 components, A and B are components to merge. */
588
589static void
590merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
591{
592 unsigned ca = component_of (fathers, a);
593 unsigned cb = component_of (fathers, b);
594
595 if (ca == cb)
596 return;
597
598 if (sizes[ca] < sizes[cb])
599 {
600 sizes[cb] += sizes[ca];
601 fathers[ca] = cb;
602 }
603 else
604 {
605 sizes[ca] += sizes[cb];
606 fathers[cb] = ca;
607 }
608}
609
610/* Returns true if A is a reference that is suitable for predictive commoning
611 in the innermost loop that contains it. REF_STEP is set according to the
612 step of the reference A. */
613
614static bool
615suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
616{
617 tree ref = DR_REF (a), step = DR_STEP (a);
618
619 if (!step
64fb0d3a 620 || TREE_THIS_VOLATILE (ref)
7e80c6bf
EB
621 || !is_gimple_reg_type (TREE_TYPE (ref))
622 || tree_could_throw_p (ref))
bbc8a8dc
ZD
623 return false;
624
625 if (integer_zerop (step))
626 *ref_step = RS_INVARIANT;
627 else if (integer_nonzerop (step))
628 *ref_step = RS_NONZERO;
629 else
630 *ref_step = RS_ANY;
631
632 return true;
633}
634
635/* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
636
637static void
638aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
639{
0d82a1c8 640 tree type = TREE_TYPE (DR_OFFSET (dr));
bbc8a8dc
ZD
641 aff_tree delta;
642
0d82a1c8 643 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
bbc8a8dc 644 &name_expansions);
807e902e 645 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
bbc8a8dc
ZD
646 aff_combination_add (offset, &delta);
647}
648
649/* Determines number of iterations of the innermost enclosing loop before B
650 refers to exactly the same location as A and stores it to OFF. If A and
651 B do not have the same step, they never meet, or anything else fails,
652 returns false, otherwise returns true. Both A and B are assumed to
653 satisfy suitable_reference_p. */
654
655static bool
656determine_offset (struct data_reference *a, struct data_reference *b,
807e902e 657 widest_int *off)
bbc8a8dc
ZD
658{
659 aff_tree diff, baseb, step;
49379cb1
ZD
660 tree typea, typeb;
661
662 /* Check that both the references access the location in the same type. */
663 typea = TREE_TYPE (DR_REF (a));
664 typeb = TREE_TYPE (DR_REF (b));
36618b93 665 if (!useless_type_conversion_p (typeb, typea))
49379cb1 666 return false;
bbc8a8dc
ZD
667
668 /* Check whether the base address and the step of both references is the
669 same. */
670 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
671 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
672 return false;
673
674 if (integer_zerop (DR_STEP (a)))
675 {
676 /* If the references have loop invariant address, check that they access
677 exactly the same location. */
807e902e 678 *off = 0;
bbc8a8dc
ZD
679 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
680 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
681 }
682
683 /* Compare the offsets of the addresses, and check whether the difference
684 is a multiple of step. */
685 aff_combination_dr_offset (a, &diff);
686 aff_combination_dr_offset (b, &baseb);
807e902e 687 aff_combination_scale (&baseb, -1);
bbc8a8dc
ZD
688 aff_combination_add (&diff, &baseb);
689
0d82a1c8 690 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
bbc8a8dc
ZD
691 &step, &name_expansions);
692 return aff_combination_constant_multiple_p (&diff, &step, off);
693}
694
695/* Returns the last basic block in LOOP for that we are sure that
696 it is executed whenever the loop is entered. */
697
698static basic_block
699last_always_executed_block (struct loop *loop)
700{
701 unsigned i;
9771b263 702 vec<edge> exits = get_loop_exit_edges (loop);
bbc8a8dc
ZD
703 edge ex;
704 basic_block last = loop->latch;
705
9771b263 706 FOR_EACH_VEC_ELT (exits, i, ex)
bbc8a8dc 707 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
9771b263 708 exits.release ();
bbc8a8dc
ZD
709
710 return last;
711}
712
713/* Splits dependence graph on DATAREFS described by DEPENDS to components. */
714
715static struct component *
716split_data_refs_to_components (struct loop *loop,
9771b263
DN
717 vec<data_reference_p> datarefs,
718 vec<ddr_p> depends)
bbc8a8dc 719{
9771b263 720 unsigned i, n = datarefs.length ();
bbc8a8dc
ZD
721 unsigned ca, ia, ib, bad;
722 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
723 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
724 struct component **comps;
725 struct data_reference *dr, *dra, *drb;
726 struct data_dependence_relation *ddr;
727 struct component *comp_list = NULL, *comp;
728 dref dataref;
729 basic_block last_always_executed = last_always_executed_block (loop);
b8698a0f 730
9771b263 731 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc
ZD
732 {
733 if (!DR_REF (dr))
734 {
735 /* A fake reference for call or asm_expr that may clobber memory;
736 just fail. */
737 goto end;
738 }
5ce9450f
JJ
739 /* predcom pass isn't prepared to handle calls with data references. */
740 if (is_gimple_call (DR_STMT (dr)))
741 goto end;
5417e022 742 dr->aux = (void *) (size_t) i;
bbc8a8dc
ZD
743 comp_father[i] = i;
744 comp_size[i] = 1;
745 }
746
747 /* A component reserved for the "bad" data references. */
748 comp_father[n] = n;
749 comp_size[n] = 1;
750
9771b263 751 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc
ZD
752 {
753 enum ref_step_type dummy;
754
755 if (!suitable_reference_p (dr, &dummy))
756 {
5417e022 757 ia = (unsigned) (size_t) dr->aux;
bbc8a8dc
ZD
758 merge_comps (comp_father, comp_size, n, ia);
759 }
760 }
761
9771b263 762 FOR_EACH_VEC_ELT (depends, i, ddr)
bbc8a8dc 763 {
807e902e 764 widest_int dummy_off;
bbc8a8dc
ZD
765
766 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
767 continue;
768
769 dra = DDR_A (ddr);
770 drb = DDR_B (ddr);
5417e022
ZD
771 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
772 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
bbc8a8dc
ZD
773 if (ia == ib)
774 continue;
775
776 bad = component_of (comp_father, n);
777
778 /* If both A and B are reads, we may ignore unsuitable dependences. */
41626746
JJ
779 if (DR_IS_READ (dra) && DR_IS_READ (drb))
780 {
781 if (ia == bad || ib == bad
782 || !determine_offset (dra, drb, &dummy_off))
783 continue;
784 }
785 /* If A is read and B write or vice versa and there is unsuitable
786 dependence, instead of merging both components into a component
787 that will certainly not pass suitable_component_p, just put the
788 read into bad component, perhaps at least the write together with
789 all the other data refs in it's component will be optimizable. */
790 else if (DR_IS_READ (dra) && ib != bad)
791 {
792 if (ia == bad)
793 continue;
794 else if (!determine_offset (dra, drb, &dummy_off))
795 {
796 merge_comps (comp_father, comp_size, bad, ia);
797 continue;
798 }
799 }
800 else if (DR_IS_READ (drb) && ia != bad)
801 {
802 if (ib == bad)
803 continue;
804 else if (!determine_offset (dra, drb, &dummy_off))
805 {
806 merge_comps (comp_father, comp_size, bad, ib);
807 continue;
808 }
809 }
b8698a0f 810
bbc8a8dc
ZD
811 merge_comps (comp_father, comp_size, ia, ib);
812 }
813
814 comps = XCNEWVEC (struct component *, n);
815 bad = component_of (comp_father, n);
9771b263 816 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc 817 {
5417e022 818 ia = (unsigned) (size_t) dr->aux;
bbc8a8dc
ZD
819 ca = component_of (comp_father, ia);
820 if (ca == bad)
821 continue;
822
823 comp = comps[ca];
824 if (!comp)
825 {
826 comp = XCNEW (struct component);
9771b263 827 comp->refs.create (comp_size[ca]);
bbc8a8dc
ZD
828 comps[ca] = comp;
829 }
830
7e5487a2 831 dataref = XCNEW (struct dref_d);
bbc8a8dc
ZD
832 dataref->ref = dr;
833 dataref->stmt = DR_STMT (dr);
807e902e 834 dataref->offset = 0;
bbc8a8dc
ZD
835 dataref->distance = 0;
836
837 dataref->always_accessed
838 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
726a989a 839 gimple_bb (dataref->stmt));
9771b263
DN
840 dataref->pos = comp->refs.length ();
841 comp->refs.quick_push (dataref);
bbc8a8dc
ZD
842 }
843
844 for (i = 0; i < n; i++)
845 {
846 comp = comps[i];
847 if (comp)
848 {
849 comp->next = comp_list;
850 comp_list = comp;
851 }
852 }
853 free (comps);
854
855end:
856 free (comp_father);
857 free (comp_size);
858 return comp_list;
859}
860
861/* Returns true if the component COMP satisfies the conditions
c80b4100 862 described in 2) at the beginning of this file. LOOP is the current
bbc8a8dc 863 loop. */
b8698a0f 864
bbc8a8dc
ZD
865static bool
866suitable_component_p (struct loop *loop, struct component *comp)
867{
868 unsigned i;
869 dref a, first;
870 basic_block ba, bp = loop->header;
871 bool ok, has_write = false;
872
9771b263 873 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc 874 {
726a989a 875 ba = gimple_bb (a->stmt);
bbc8a8dc
ZD
876
877 if (!just_once_each_iteration_p (loop, ba))
878 return false;
879
880 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
881 bp = ba;
882
b0af49c4 883 if (DR_IS_WRITE (a->ref))
bbc8a8dc
ZD
884 has_write = true;
885 }
886
9771b263 887 first = comp->refs[0];
bbc8a8dc
ZD
888 ok = suitable_reference_p (first->ref, &comp->comp_step);
889 gcc_assert (ok);
807e902e 890 first->offset = 0;
bbc8a8dc 891
9771b263 892 for (i = 1; comp->refs.iterate (i, &a); i++)
bbc8a8dc
ZD
893 {
894 if (!determine_offset (first->ref, a->ref, &a->offset))
895 return false;
896
b2b29377
MM
897 enum ref_step_type a_step;
898 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
899 && a_step == comp->comp_step);
bbc8a8dc
ZD
900 }
901
902 /* If there is a write inside the component, we must know whether the
903 step is nonzero or not -- we would not otherwise be able to recognize
904 whether the value accessed by reads comes from the OFFSET-th iteration
905 or the previous one. */
906 if (has_write && comp->comp_step == RS_ANY)
907 return false;
908
909 return true;
910}
b8698a0f 911
bbc8a8dc
ZD
912/* Check the conditions on references inside each of components COMPS,
913 and remove the unsuitable components from the list. The new list
914 of components is returned. The conditions are described in 2) at
c80b4100 915 the beginning of this file. LOOP is the current loop. */
bbc8a8dc
ZD
916
917static struct component *
918filter_suitable_components (struct loop *loop, struct component *comps)
919{
920 struct component **comp, *act;
921
922 for (comp = &comps; *comp; )
923 {
924 act = *comp;
925 if (suitable_component_p (loop, act))
926 comp = &act->next;
927 else
928 {
a0044be5
JJ
929 dref ref;
930 unsigned i;
931
bbc8a8dc 932 *comp = act->next;
9771b263 933 FOR_EACH_VEC_ELT (act->refs, i, ref)
a0044be5 934 free (ref);
bbc8a8dc
ZD
935 release_component (act);
936 }
937 }
938
939 return comps;
940}
941
942/* Compares two drefs A and B by their offset and position. Callback for
943 qsort. */
944
945static int
946order_drefs (const void *a, const void *b)
947{
3d9a9f94
KG
948 const dref *const da = (const dref *) a;
949 const dref *const db = (const dref *) b;
807e902e 950 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
bbc8a8dc
ZD
951
952 if (offcmp != 0)
953 return offcmp;
954
955 return (*da)->pos - (*db)->pos;
956}
957
958/* Returns root of the CHAIN. */
959
960static inline dref
961get_chain_root (chain_p chain)
962{
9771b263 963 return chain->refs[0];
bbc8a8dc
ZD
964}
965
966/* Adds REF to the chain CHAIN. */
967
968static void
969add_ref_to_chain (chain_p chain, dref ref)
970{
971 dref root = get_chain_root (chain);
bbc8a8dc 972
807e902e
KZ
973 gcc_assert (wi::les_p (root->offset, ref->offset));
974 widest_int dist = ref->offset - root->offset;
975 if (wi::leu_p (MAX_DISTANCE, dist))
a0044be5
JJ
976 {
977 free (ref);
978 return;
979 }
807e902e 980 gcc_assert (wi::fits_uhwi_p (dist));
bbc8a8dc 981
9771b263 982 chain->refs.safe_push (ref);
bbc8a8dc 983
27bcd47c 984 ref->distance = dist.to_uhwi ();
bbc8a8dc
ZD
985
986 if (ref->distance >= chain->length)
987 {
988 chain->length = ref->distance;
989 chain->has_max_use_after = false;
990 }
991
992 if (ref->distance == chain->length
993 && ref->pos > root->pos)
994 chain->has_max_use_after = true;
995
996 chain->all_always_accessed &= ref->always_accessed;
997}
998
999/* Returns the chain for invariant component COMP. */
1000
1001static chain_p
1002make_invariant_chain (struct component *comp)
1003{
1004 chain_p chain = XCNEW (struct chain);
1005 unsigned i;
1006 dref ref;
1007
1008 chain->type = CT_INVARIANT;
1009
1010 chain->all_always_accessed = true;
1011
9771b263 1012 FOR_EACH_VEC_ELT (comp->refs, i, ref)
bbc8a8dc 1013 {
9771b263 1014 chain->refs.safe_push (ref);
bbc8a8dc
ZD
1015 chain->all_always_accessed &= ref->always_accessed;
1016 }
1017
1018 return chain;
1019}
1020
1021/* Make a new chain rooted at REF. */
1022
1023static chain_p
1024make_rooted_chain (dref ref)
1025{
1026 chain_p chain = XCNEW (struct chain);
1027
1028 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1029
9771b263 1030 chain->refs.safe_push (ref);
bbc8a8dc
ZD
1031 chain->all_always_accessed = ref->always_accessed;
1032
1033 ref->distance = 0;
1034
1035 return chain;
1036}
1037
1038/* Returns true if CHAIN is not trivial. */
1039
1040static bool
1041nontrivial_chain_p (chain_p chain)
1042{
9771b263 1043 return chain != NULL && chain->refs.length () > 1;
bbc8a8dc
ZD
1044}
1045
1046/* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1047 is no such name. */
1048
1049static tree
1050name_for_ref (dref ref)
1051{
1052 tree name;
1053
726a989a 1054 if (is_gimple_assign (ref->stmt))
bbc8a8dc
ZD
1055 {
1056 if (!ref->ref || DR_IS_READ (ref->ref))
726a989a 1057 name = gimple_assign_lhs (ref->stmt);
bbc8a8dc 1058 else
726a989a 1059 name = gimple_assign_rhs1 (ref->stmt);
bbc8a8dc
ZD
1060 }
1061 else
1062 name = PHI_RESULT (ref->stmt);
1063
1064 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1065}
1066
1067/* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1068 iterations of the innermost enclosing loop). */
1069
1070static bool
1071valid_initializer_p (struct data_reference *ref,
1072 unsigned distance, struct data_reference *root)
1073{
1074 aff_tree diff, base, step;
807e902e 1075 widest_int off;
bbc8a8dc 1076
bbc8a8dc
ZD
1077 /* Both REF and ROOT must be accessing the same object. */
1078 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1079 return false;
1080
1081 /* The initializer is defined outside of loop, hence its address must be
1082 invariant inside the loop. */
1083 gcc_assert (integer_zerop (DR_STEP (ref)));
1084
1085 /* If the address of the reference is invariant, initializer must access
1086 exactly the same location. */
1087 if (integer_zerop (DR_STEP (root)))
1088 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1089 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1090
1091 /* Verify that this index of REF is equal to the root's index at
1092 -DISTANCE-th iteration. */
1093 aff_combination_dr_offset (root, &diff);
1094 aff_combination_dr_offset (ref, &base);
807e902e 1095 aff_combination_scale (&base, -1);
bbc8a8dc
ZD
1096 aff_combination_add (&diff, &base);
1097
0d82a1c8
RG
1098 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1099 &step, &name_expansions);
bbc8a8dc
ZD
1100 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1101 return false;
1102
807e902e 1103 if (off != distance)
bbc8a8dc
ZD
1104 return false;
1105
1106 return true;
1107}
1108
1109/* Finds looparound phi node of LOOP that copies the value of REF, and if its
1110 initial value is correct (equal to initial value of REF shifted by one
1111 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1112 is the root of the current chain. */
1113
538dd0b7 1114static gphi *
bbc8a8dc
ZD
1115find_looparound_phi (struct loop *loop, dref ref, dref root)
1116{
726a989a 1117 tree name, init, init_ref;
538dd0b7 1118 gphi *phi = NULL;
355fe088 1119 gimple *init_stmt;
bbc8a8dc
ZD
1120 edge latch = loop_latch_edge (loop);
1121 struct data_reference init_dr;
538dd0b7 1122 gphi_iterator psi;
bbc8a8dc 1123
726a989a 1124 if (is_gimple_assign (ref->stmt))
bbc8a8dc
ZD
1125 {
1126 if (DR_IS_READ (ref->ref))
726a989a 1127 name = gimple_assign_lhs (ref->stmt);
bbc8a8dc 1128 else
726a989a 1129 name = gimple_assign_rhs1 (ref->stmt);
bbc8a8dc
ZD
1130 }
1131 else
1132 name = PHI_RESULT (ref->stmt);
1133 if (!name)
726a989a 1134 return NULL;
bbc8a8dc 1135
726a989a
RB
1136 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1137 {
538dd0b7 1138 phi = psi.phi ();
726a989a
RB
1139 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1140 break;
1141 }
bbc8a8dc 1142
726a989a
RB
1143 if (gsi_end_p (psi))
1144 return NULL;
bbc8a8dc
ZD
1145
1146 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1147 if (TREE_CODE (init) != SSA_NAME)
726a989a 1148 return NULL;
bbc8a8dc 1149 init_stmt = SSA_NAME_DEF_STMT (init);
726a989a
RB
1150 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1151 return NULL;
1152 gcc_assert (gimple_assign_lhs (init_stmt) == init);
bbc8a8dc 1153
726a989a 1154 init_ref = gimple_assign_rhs1 (init_stmt);
bbc8a8dc
ZD
1155 if (!REFERENCE_CLASS_P (init_ref)
1156 && !DECL_P (init_ref))
726a989a 1157 return NULL;
bbc8a8dc
ZD
1158
1159 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1160 loop enclosing PHI). */
1161 memset (&init_dr, 0, sizeof (struct data_reference));
1162 DR_REF (&init_dr) = init_ref;
1163 DR_STMT (&init_dr) = phi;
4e4452b6 1164 if (!dr_analyze_innermost (&init_dr, loop))
3661e899 1165 return NULL;
bbc8a8dc
ZD
1166
1167 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
726a989a 1168 return NULL;
bbc8a8dc
ZD
1169
1170 return phi;
1171}
1172
1173/* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1174
1175static void
538dd0b7 1176insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
bbc8a8dc 1177{
7e5487a2 1178 dref nw = XCNEW (struct dref_d), aref;
bbc8a8dc
ZD
1179 unsigned i;
1180
1181 nw->stmt = phi;
1182 nw->distance = ref->distance + 1;
1183 nw->always_accessed = 1;
1184
9771b263 1185 FOR_EACH_VEC_ELT (chain->refs, i, aref)
bbc8a8dc
ZD
1186 if (aref->distance >= nw->distance)
1187 break;
9771b263 1188 chain->refs.safe_insert (i, nw);
bbc8a8dc
ZD
1189
1190 if (nw->distance > chain->length)
1191 {
1192 chain->length = nw->distance;
1193 chain->has_max_use_after = false;
1194 }
1195}
1196
1197/* For references in CHAIN that are copied around the LOOP (created previously
1198 by PRE, or by user), add the results of such copies to the chain. This
1199 enables us to remove the copies by unrolling, and may need less registers
1200 (also, it may allow us to combine chains together). */
1201
1202static void
1203add_looparound_copies (struct loop *loop, chain_p chain)
1204{
1205 unsigned i;
1206 dref ref, root = get_chain_root (chain);
538dd0b7 1207 gphi *phi;
bbc8a8dc 1208
9771b263 1209 FOR_EACH_VEC_ELT (chain->refs, i, ref)
bbc8a8dc
ZD
1210 {
1211 phi = find_looparound_phi (loop, ref, root);
1212 if (!phi)
1213 continue;
1214
1215 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1216 insert_looparound_copy (chain, ref, phi);
1217 }
1218}
1219
1220/* Find roots of the values and determine distances in the component COMP.
1221 The references are redistributed into CHAINS. LOOP is the current
1222 loop. */
1223
1224static void
1225determine_roots_comp (struct loop *loop,
1226 struct component *comp,
9771b263 1227 vec<chain_p> *chains)
bbc8a8dc
ZD
1228{
1229 unsigned i;
1230 dref a;
1231 chain_p chain = NULL;
807e902e 1232 widest_int last_ofs = 0;
bbc8a8dc
ZD
1233
1234 /* Invariants are handled specially. */
1235 if (comp->comp_step == RS_INVARIANT)
1236 {
1237 chain = make_invariant_chain (comp);
9771b263 1238 chains->safe_push (chain);
bbc8a8dc
ZD
1239 return;
1240 }
1241
9771b263 1242 comp->refs.qsort (order_drefs);
bbc8a8dc 1243
9771b263 1244 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc 1245 {
b0af49c4 1246 if (!chain || DR_IS_WRITE (a->ref)
807e902e 1247 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
bbc8a8dc
ZD
1248 {
1249 if (nontrivial_chain_p (chain))
b61b1f17
MM
1250 {
1251 add_looparound_copies (loop, chain);
9771b263 1252 chains->safe_push (chain);
b61b1f17 1253 }
bbc8a8dc
ZD
1254 else
1255 release_chain (chain);
1256 chain = make_rooted_chain (a);
b61b1f17 1257 last_ofs = a->offset;
bbc8a8dc
ZD
1258 continue;
1259 }
1260
1261 add_ref_to_chain (chain, a);
1262 }
1263
1264 if (nontrivial_chain_p (chain))
1265 {
1266 add_looparound_copies (loop, chain);
9771b263 1267 chains->safe_push (chain);
bbc8a8dc
ZD
1268 }
1269 else
1270 release_chain (chain);
1271}
1272
1273/* Find roots of the values and determine distances in components COMPS, and
1274 separates the references to CHAINS. LOOP is the current loop. */
1275
1276static void
1277determine_roots (struct loop *loop,
9771b263 1278 struct component *comps, vec<chain_p> *chains)
bbc8a8dc
ZD
1279{
1280 struct component *comp;
1281
1282 for (comp = comps; comp; comp = comp->next)
1283 determine_roots_comp (loop, comp, chains);
1284}
1285
1286/* Replace the reference in statement STMT with temporary variable
82d6e6fc 1287 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
bbc8a8dc
ZD
1288 the reference in the statement. IN_LHS is true if the reference
1289 is in the lhs of STMT, false if it is in rhs. */
1290
1291static void
355fe088 1292replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
bbc8a8dc 1293{
726a989a 1294 tree val;
538dd0b7 1295 gassign *new_stmt;
726a989a 1296 gimple_stmt_iterator bsi, psi;
bbc8a8dc 1297
726a989a 1298 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
1299 {
1300 gcc_assert (!in_lhs && !set);
1301
1302 val = PHI_RESULT (stmt);
726a989a
RB
1303 bsi = gsi_after_labels (gimple_bb (stmt));
1304 psi = gsi_for_stmt (stmt);
1305 remove_phi_node (&psi, false);
bbc8a8dc 1306
726a989a 1307 /* Turn the phi node into GIMPLE_ASSIGN. */
82d6e6fc 1308 new_stmt = gimple_build_assign (val, new_tree);
726a989a 1309 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
bbc8a8dc
ZD
1310 return;
1311 }
b8698a0f 1312
bbc8a8dc
ZD
1313 /* Since the reference is of gimple_reg type, it should only
1314 appear as lhs or rhs of modify statement. */
726a989a
RB
1315 gcc_assert (is_gimple_assign (stmt));
1316
1317 bsi = gsi_for_stmt (stmt);
bbc8a8dc 1318
82d6e6fc 1319 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
bbc8a8dc
ZD
1320 if (!set)
1321 {
1322 gcc_assert (!in_lhs);
82d6e6fc 1323 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
726a989a 1324 stmt = gsi_stmt (bsi);
bbc8a8dc
ZD
1325 update_stmt (stmt);
1326 return;
1327 }
1328
bbc8a8dc
ZD
1329 if (in_lhs)
1330 {
726a989a 1331 /* We have statement
b8698a0f 1332
726a989a 1333 OLD = VAL
bbc8a8dc 1334
726a989a
RB
1335 If OLD is a memory reference, then VAL is gimple_val, and we transform
1336 this to
bbc8a8dc
ZD
1337
1338 OLD = VAL
1339 NEW = VAL
1340
b8698a0f 1341 Otherwise, we are replacing a combination chain,
726a989a
RB
1342 VAL is the expression that performs the combination, and OLD is an
1343 SSA name. In this case, we transform the assignment to
1344
1345 OLD = VAL
1346 NEW = OLD
1347
1348 */
1349
1350 val = gimple_assign_lhs (stmt);
1351 if (TREE_CODE (val) != SSA_NAME)
1352 {
726a989a 1353 val = gimple_assign_rhs1 (stmt);
7e8b1c4b
JJ
1354 gcc_assert (gimple_assign_single_p (stmt));
1355 if (TREE_CLOBBER_P (val))
32244553 1356 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
7e8b1c4b
JJ
1357 else
1358 gcc_assert (gimple_assign_copy_p (stmt));
726a989a 1359 }
bbc8a8dc
ZD
1360 }
1361 else
1362 {
bbc8a8dc
ZD
1363 /* VAL = OLD
1364
1365 is transformed to
1366
1367 VAL = OLD
1368 NEW = VAL */
726a989a
RB
1369
1370 val = gimple_assign_lhs (stmt);
bbc8a8dc
ZD
1371 }
1372
82d6e6fc 1373 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
726a989a 1374 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
bbc8a8dc
ZD
1375}
1376
9f2b860b
RB
1377/* Returns a memory reference to DR in the ITER-th iteration of
1378 the loop it was analyzed in. Append init stmts to STMTS. */
1379
1380static tree
1381ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1382{
1383 tree off = DR_OFFSET (dr);
1384 tree coff = DR_INIT (dr);
1385 if (iter == 0)
1386 ;
1387 else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1388 coff = size_binop (PLUS_EXPR, coff,
1389 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
bbc8a8dc 1390 else
9f2b860b
RB
1391 off = size_binop (PLUS_EXPR, off,
1392 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1393 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
7ec44c3d
RB
1394 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1395 is_gimple_mem_ref_addr, NULL_TREE);
cb3d1e3e
RB
1396 tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1397 /* While data-ref analysis punts on bit offsets it still handles
1398 bitfield accesses at byte boundaries. Cope with that. Note that
1399 we cannot simply re-apply the outer COMPONENT_REF because the
1400 byte-granular portion of it is already applied via DR_INIT and
1401 DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1402 start at offset zero. */
1403 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1404 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1405 {
1406 tree field = TREE_OPERAND (DR_REF (dr), 1);
1407 return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1408 build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
1409 addr, alias_ptr),
1410 DECL_SIZE (field), bitsize_zero_node);
1411 }
1412 else
1413 return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
bbc8a8dc
ZD
1414}
1415
1416/* Get the initialization expression for the INDEX-th temporary variable
1417 of CHAIN. */
1418
1419static tree
1420get_init_expr (chain_p chain, unsigned index)
1421{
1422 if (chain->type == CT_COMBINATION)
1423 {
1424 tree e1 = get_init_expr (chain->ch1, index);
1425 tree e2 = get_init_expr (chain->ch2, index);
1426
82d6e6fc 1427 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
bbc8a8dc
ZD
1428 }
1429 else
9771b263 1430 return chain->inits[index];
bbc8a8dc
ZD
1431}
1432
2664efb6
ZD
1433/* Returns a new temporary variable used for the I-th variable carrying
1434 value of REF. The variable's uid is marked in TMP_VARS. */
1435
1436static tree
1437predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1438{
1439 tree type = TREE_TYPE (ref);
2664efb6
ZD
1440 /* We never access the components of the temporary variable in predictive
1441 commoning. */
acd63801 1442 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
2664efb6
ZD
1443 bitmap_set_bit (tmp_vars, DECL_UID (var));
1444 return var;
1445}
1446
bbc8a8dc
ZD
1447/* Creates the variables for CHAIN, as well as phi nodes for them and
1448 initialization on entry to LOOP. Uids of the newly created
1449 temporary variables are marked in TMP_VARS. */
1450
1451static void
1452initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1453{
1454 unsigned i;
1455 unsigned n = chain->length;
1456 dref root = get_chain_root (chain);
1457 bool reuse_first = !chain->has_max_use_after;
726a989a 1458 tree ref, init, var, next;
538dd0b7 1459 gphi *phi;
726a989a 1460 gimple_seq stmts;
bbc8a8dc
ZD
1461 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1462
1463 /* If N == 0, then all the references are within the single iteration. And
1464 since this is an nonempty chain, reuse_first cannot be true. */
1465 gcc_assert (n > 0 || !reuse_first);
1466
9771b263 1467 chain->vars.create (n + 1);
bbc8a8dc
ZD
1468
1469 if (chain->type == CT_COMBINATION)
726a989a 1470 ref = gimple_assign_lhs (root->stmt);
bbc8a8dc
ZD
1471 else
1472 ref = DR_REF (root->ref);
1473
1474 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1475 {
2664efb6 1476 var = predcom_tmp_var (ref, i, tmp_vars);
9771b263 1477 chain->vars.quick_push (var);
bbc8a8dc
ZD
1478 }
1479 if (reuse_first)
9771b263 1480 chain->vars.quick_push (chain->vars[0]);
b8698a0f 1481
9771b263 1482 FOR_EACH_VEC_ELT (chain->vars, i, var)
b731b390 1483 chain->vars[i] = make_ssa_name (var);
bbc8a8dc
ZD
1484
1485 for (i = 0; i < n; i++)
1486 {
9771b263
DN
1487 var = chain->vars[i];
1488 next = chain->vars[i + 1];
bbc8a8dc
ZD
1489 init = get_init_expr (chain, i);
1490
1491 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1492 if (stmts)
5006671f 1493 gsi_insert_seq_on_edge_immediate (entry, stmts);
bbc8a8dc
ZD
1494
1495 phi = create_phi_node (var, loop->header);
9e227d60
DC
1496 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1497 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
bbc8a8dc
ZD
1498 }
1499}
1500
1501/* Create the variables and initialization statement for root of chain
1502 CHAIN. Uids of the newly created temporary variables are marked
1503 in TMP_VARS. */
1504
1505static void
1506initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1507{
1508 dref root = get_chain_root (chain);
1509 bool in_lhs = (chain->type == CT_STORE_LOAD
1510 || chain->type == CT_COMBINATION);
1511
1512 initialize_root_vars (loop, chain, tmp_vars);
1513 replace_ref_with (root->stmt,
9771b263 1514 chain->vars[chain->length],
bbc8a8dc
ZD
1515 true, in_lhs);
1516}
1517
1518/* Initializes a variable for load motion for ROOT and prepares phi nodes and
1519 initialization on entry to LOOP if necessary. The ssa name for the variable
1520 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1521 around the loop is created. Uid of the newly created temporary variable
1522 is marked in TMP_VARS. INITS is the list containing the (single)
1523 initializer. */
1524
1525static void
1526initialize_root_vars_lm (struct loop *loop, dref root, bool written,
9771b263 1527 vec<tree> *vars, vec<tree> inits,
bbc8a8dc
ZD
1528 bitmap tmp_vars)
1529{
1530 unsigned i;
726a989a
RB
1531 tree ref = DR_REF (root->ref), init, var, next;
1532 gimple_seq stmts;
538dd0b7 1533 gphi *phi;
bbc8a8dc
ZD
1534 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1535
1536 /* Find the initializer for the variable, and check that it cannot
1537 trap. */
9771b263 1538 init = inits[0];
bbc8a8dc 1539
9771b263 1540 vars->create (written ? 2 : 1);
2664efb6 1541 var = predcom_tmp_var (ref, 0, tmp_vars);
9771b263 1542 vars->quick_push (var);
bbc8a8dc 1543 if (written)
9771b263 1544 vars->quick_push ((*vars)[0]);
b8698a0f 1545
9771b263 1546 FOR_EACH_VEC_ELT (*vars, i, var)
b731b390 1547 (*vars)[i] = make_ssa_name (var);
bbc8a8dc 1548
9771b263 1549 var = (*vars)[0];
b8698a0f 1550
bbc8a8dc
ZD
1551 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1552 if (stmts)
5006671f 1553 gsi_insert_seq_on_edge_immediate (entry, stmts);
bbc8a8dc
ZD
1554
1555 if (written)
1556 {
9771b263 1557 next = (*vars)[1];
bbc8a8dc 1558 phi = create_phi_node (var, loop->header);
9e227d60
DC
1559 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1560 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
bbc8a8dc
ZD
1561 }
1562 else
1563 {
538dd0b7 1564 gassign *init_stmt = gimple_build_assign (var, init);
726a989a 1565 gsi_insert_on_edge_immediate (entry, init_stmt);
bbc8a8dc
ZD
1566 }
1567}
1568
1569
1570/* Execute load motion for references in chain CHAIN. Uids of the newly
1571 created temporary variables are marked in TMP_VARS. */
1572
1573static void
1574execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1575{
ef062b13 1576 auto_vec<tree> vars;
bbc8a8dc
ZD
1577 dref a;
1578 unsigned n_writes = 0, ridx, i;
1579 tree var;
1580
1581 gcc_assert (chain->type == CT_INVARIANT);
1582 gcc_assert (!chain->combined);
9771b263 1583 FOR_EACH_VEC_ELT (chain->refs, i, a)
b0af49c4 1584 if (DR_IS_WRITE (a->ref))
bbc8a8dc 1585 n_writes++;
b8698a0f 1586
bbc8a8dc 1587 /* If there are no reads in the loop, there is nothing to do. */
9771b263 1588 if (n_writes == chain->refs.length ())
bbc8a8dc
ZD
1589 return;
1590
1591 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1592 &vars, chain->inits, tmp_vars);
1593
1594 ridx = 0;
9771b263 1595 FOR_EACH_VEC_ELT (chain->refs, i, a)
bbc8a8dc
ZD
1596 {
1597 bool is_read = DR_IS_READ (a->ref);
bbc8a8dc 1598
b0af49c4 1599 if (DR_IS_WRITE (a->ref))
bbc8a8dc
ZD
1600 {
1601 n_writes--;
1602 if (n_writes)
1603 {
9771b263 1604 var = vars[0];
b731b390 1605 var = make_ssa_name (SSA_NAME_VAR (var));
9771b263 1606 vars[0] = var;
bbc8a8dc
ZD
1607 }
1608 else
1609 ridx = 1;
1610 }
b8698a0f 1611
9771b263 1612 replace_ref_with (a->stmt, vars[ridx],
bbc8a8dc
ZD
1613 !is_read, !is_read);
1614 }
bbc8a8dc
ZD
1615}
1616
1617/* Returns the single statement in that NAME is used, excepting
1618 the looparound phi nodes contained in one of the chains. If there is no
726a989a 1619 such statement, or more statements, NULL is returned. */
bbc8a8dc 1620
355fe088 1621static gimple *
bbc8a8dc
ZD
1622single_nonlooparound_use (tree name)
1623{
1624 use_operand_p use;
1625 imm_use_iterator it;
355fe088 1626 gimple *stmt, *ret = NULL;
bbc8a8dc
ZD
1627
1628 FOR_EACH_IMM_USE_FAST (use, it, name)
1629 {
1630 stmt = USE_STMT (use);
1631
726a989a 1632 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
1633 {
1634 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1635 could not be processed anyway, so just fail for them. */
1636 if (bitmap_bit_p (looparound_phis,
1637 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1638 continue;
1639
726a989a 1640 return NULL;
bbc8a8dc 1641 }
b63f974e
JJ
1642 else if (is_gimple_debug (stmt))
1643 continue;
726a989a
RB
1644 else if (ret != NULL)
1645 return NULL;
bbc8a8dc
ZD
1646 else
1647 ret = stmt;
1648 }
1649
1650 return ret;
1651}
1652
1653/* Remove statement STMT, as well as the chain of assignments in that it is
1654 used. */
1655
1656static void
355fe088 1657remove_stmt (gimple *stmt)
bbc8a8dc 1658{
726a989a 1659 tree name;
355fe088 1660 gimple *next;
726a989a 1661 gimple_stmt_iterator psi;
bbc8a8dc 1662
726a989a 1663 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
1664 {
1665 name = PHI_RESULT (stmt);
1666 next = single_nonlooparound_use (name);
273ccb6d 1667 reset_debug_uses (stmt);
726a989a
RB
1668 psi = gsi_for_stmt (stmt);
1669 remove_phi_node (&psi, true);
bbc8a8dc
ZD
1670
1671 if (!next
5f8ecf45 1672 || !gimple_assign_ssa_name_copy_p (next)
726a989a 1673 || gimple_assign_rhs1 (next) != name)
bbc8a8dc
ZD
1674 return;
1675
1676 stmt = next;
1677 }
1678
1679 while (1)
1680 {
726a989a 1681 gimple_stmt_iterator bsi;
b8698a0f 1682
726a989a 1683 bsi = gsi_for_stmt (stmt);
bbc8a8dc 1684
726a989a 1685 name = gimple_assign_lhs (stmt);
bbc8a8dc
ZD
1686 gcc_assert (TREE_CODE (name) == SSA_NAME);
1687
1688 next = single_nonlooparound_use (name);
273ccb6d 1689 reset_debug_uses (stmt);
bbc8a8dc 1690
13714310 1691 unlink_stmt_vdef (stmt);
726a989a 1692 gsi_remove (&bsi, true);
5f8ecf45 1693 release_defs (stmt);
bbc8a8dc
ZD
1694
1695 if (!next
5f8ecf45 1696 || !gimple_assign_ssa_name_copy_p (next)
726a989a 1697 || gimple_assign_rhs1 (next) != name)
bbc8a8dc
ZD
1698 return;
1699
1700 stmt = next;
1701 }
1702}
1703
1704/* Perform the predictive commoning optimization for a chain CHAIN.
1705 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1706
1707static void
1708execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1709 bitmap tmp_vars)
1710{
1711 unsigned i;
13714310 1712 dref a;
bbc8a8dc
ZD
1713 tree var;
1714
1715 if (chain->combined)
1716 {
1717 /* For combined chains, just remove the statements that are used to
a933d47f
RB
1718 compute the values of the expression (except for the root one).
1719 We delay this until after all chains are processed. */
bbc8a8dc
ZD
1720 }
1721 else
1722 {
1723 /* For non-combined chains, set up the variables that hold its value,
1724 and replace the uses of the original references by these
1725 variables. */
bbc8a8dc 1726 initialize_root (loop, chain, tmp_vars);
9771b263 1727 for (i = 1; chain->refs.iterate (i, &a); i++)
bbc8a8dc 1728 {
9771b263 1729 var = chain->vars[chain->length - a->distance];
bbc8a8dc
ZD
1730 replace_ref_with (a->stmt, var, false, false);
1731 }
1732 }
1733}
1734
1735/* Determines the unroll factor necessary to remove as many temporary variable
1736 copies as possible. CHAINS is the list of chains that will be
1737 optimized. */
1738
1739static unsigned
9771b263 1740determine_unroll_factor (vec<chain_p> chains)
bbc8a8dc
ZD
1741{
1742 chain_p chain;
1743 unsigned factor = 1, af, nfactor, i;
1744 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1745
9771b263 1746 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc 1747 {
8039a35d 1748 if (chain->type == CT_INVARIANT)
bbc8a8dc
ZD
1749 continue;
1750
8039a35d
RB
1751 if (chain->combined)
1752 {
1753 /* For combined chains, we can't handle unrolling if we replace
1754 looparound PHIs. */
1755 dref a;
1756 unsigned j;
1757 for (j = 1; chain->refs.iterate (j, &a); j++)
1758 if (gimple_code (a->stmt) == GIMPLE_PHI)
1759 return 1;
1760 continue;
1761 }
1762
bbc8a8dc
ZD
1763 /* The best unroll factor for this chain is equal to the number of
1764 temporary variables that we create for it. */
1765 af = chain->length;
1766 if (chain->has_max_use_after)
1767 af++;
1768
1769 nfactor = factor * af / gcd (factor, af);
1770 if (nfactor <= max)
1771 factor = nfactor;
1772 }
1773
1774 return factor;
1775}
1776
1777/* Perform the predictive commoning optimization for CHAINS.
1778 Uids of the newly created temporary variables are marked in TMP_VARS. */
1779
1780static void
9771b263 1781execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
bbc8a8dc
ZD
1782 bitmap tmp_vars)
1783{
1784 chain_p chain;
1785 unsigned i;
1786
9771b263 1787 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc
ZD
1788 {
1789 if (chain->type == CT_INVARIANT)
1790 execute_load_motion (loop, chain, tmp_vars);
1791 else
1792 execute_pred_commoning_chain (loop, chain, tmp_vars);
1793 }
b8698a0f 1794
a933d47f
RB
1795 FOR_EACH_VEC_ELT (chains, i, chain)
1796 {
1797 if (chain->type == CT_INVARIANT)
1798 ;
1799 else if (chain->combined)
1800 {
1801 /* For combined chains, just remove the statements that are used to
1802 compute the values of the expression (except for the root one). */
1803 dref a;
1804 unsigned j;
1805 for (j = 1; chain->refs.iterate (j, &a); j++)
1806 remove_stmt (a->stmt);
1807 }
1808 }
1809
bbc8a8dc
ZD
1810 update_ssa (TODO_update_ssa_only_virtuals);
1811}
1812
c80b4100 1813/* For each reference in CHAINS, if its defining statement is
726a989a 1814 phi node, record the ssa name that is defined by it. */
bbc8a8dc
ZD
1815
1816static void
9771b263 1817replace_phis_by_defined_names (vec<chain_p> chains)
bbc8a8dc
ZD
1818{
1819 chain_p chain;
1820 dref a;
1821 unsigned i, j;
1822
9771b263
DN
1823 FOR_EACH_VEC_ELT (chains, i, chain)
1824 FOR_EACH_VEC_ELT (chain->refs, j, a)
bbc8a8dc 1825 {
726a989a
RB
1826 if (gimple_code (a->stmt) == GIMPLE_PHI)
1827 {
1828 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1829 a->stmt = NULL;
1830 }
bbc8a8dc
ZD
1831 }
1832}
1833
726a989a
RB
1834/* For each reference in CHAINS, if name_defined_by_phi is not
1835 NULL, use it to set the stmt field. */
bbc8a8dc
ZD
1836
1837static void
9771b263 1838replace_names_by_phis (vec<chain_p> chains)
bbc8a8dc
ZD
1839{
1840 chain_p chain;
1841 dref a;
1842 unsigned i, j;
1843
9771b263
DN
1844 FOR_EACH_VEC_ELT (chains, i, chain)
1845 FOR_EACH_VEC_ELT (chain->refs, j, a)
726a989a 1846 if (a->stmt == NULL)
bbc8a8dc 1847 {
726a989a
RB
1848 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1849 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1850 a->name_defined_by_phi = NULL_TREE;
bbc8a8dc
ZD
1851 }
1852}
1853
1854/* Wrapper over execute_pred_commoning, to pass it as a callback
1855 to tree_transform_and_unroll_loop. */
1856
1857struct epcc_data
1858{
9771b263 1859 vec<chain_p> chains;
bbc8a8dc
ZD
1860 bitmap tmp_vars;
1861};
1862
1863static void
1864execute_pred_commoning_cbck (struct loop *loop, void *data)
1865{
3d9a9f94 1866 struct epcc_data *const dta = (struct epcc_data *) data;
bbc8a8dc
ZD
1867
1868 /* Restore phi nodes that were replaced by ssa names before
1869 tree_transform_and_unroll_loop (see detailed description in
1870 tree_predictive_commoning_loop). */
1871 replace_names_by_phis (dta->chains);
1872 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1873}
1874
bbc8a8dc
ZD
1875/* Base NAME and all the names in the chain of phi nodes that use it
1876 on variable VAR. The phi nodes are recognized by being in the copies of
1877 the header of the LOOP. */
1878
1879static void
1880base_names_in_chain_on (struct loop *loop, tree name, tree var)
1881{
355fe088 1882 gimple *stmt, *phi;
bbc8a8dc 1883 imm_use_iterator iter;
bbc8a8dc 1884
b2ec94d4 1885 replace_ssa_name_symbol (name, var);
bbc8a8dc
ZD
1886
1887 while (1)
1888 {
1889 phi = NULL;
1890 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1891 {
726a989a
RB
1892 if (gimple_code (stmt) == GIMPLE_PHI
1893 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
bbc8a8dc
ZD
1894 {
1895 phi = stmt;
1896 BREAK_FROM_IMM_USE_STMT (iter);
1897 }
1898 }
1899 if (!phi)
1900 return;
1901
bbc8a8dc 1902 name = PHI_RESULT (phi);
b2ec94d4 1903 replace_ssa_name_symbol (name, var);
bbc8a8dc
ZD
1904 }
1905}
1906
1907/* Given an unrolled LOOP after predictive commoning, remove the
1908 register copies arising from phi nodes by changing the base
1909 variables of SSA names. TMP_VARS is the set of the temporary variables
1910 for those we want to perform this. */
1911
1912static void
1913eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1914{
1915 edge e;
538dd0b7 1916 gphi *phi;
355fe088 1917 gimple *stmt;
726a989a 1918 tree name, use, var;
538dd0b7 1919 gphi_iterator psi;
bbc8a8dc
ZD
1920
1921 e = loop_latch_edge (loop);
726a989a 1922 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
bbc8a8dc 1923 {
538dd0b7 1924 phi = psi.phi ();
bbc8a8dc
ZD
1925 name = PHI_RESULT (phi);
1926 var = SSA_NAME_VAR (name);
70b5e7dc 1927 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
bbc8a8dc
ZD
1928 continue;
1929 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1930 gcc_assert (TREE_CODE (use) == SSA_NAME);
1931
1932 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1933 stmt = SSA_NAME_DEF_STMT (use);
726a989a 1934 while (gimple_code (stmt) == GIMPLE_PHI
1b0cfaa6
ZD
1935 /* In case we could not unroll the loop enough to eliminate
1936 all copies, we may reach the loop header before the defining
1937 statement (in that case, some register copies will be present
1938 in loop latch in the final code, corresponding to the newly
1939 created looparound phi nodes). */
726a989a 1940 && gimple_bb (stmt) != loop->header)
bbc8a8dc 1941 {
726a989a 1942 gcc_assert (single_pred_p (gimple_bb (stmt)));
bbc8a8dc
ZD
1943 use = PHI_ARG_DEF (stmt, 0);
1944 stmt = SSA_NAME_DEF_STMT (use);
1945 }
1946
1947 base_names_in_chain_on (loop, use, var);
1948 }
1949}
1950
1951/* Returns true if CHAIN is suitable to be combined. */
1952
1953static bool
1954chain_can_be_combined_p (chain_p chain)
1955{
1956 return (!chain->combined
1957 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1958}
1959
1960/* Returns the modify statement that uses NAME. Skips over assignment
1961 statements, NAME is replaced with the actual name used in the returned
1962 statement. */
1963
355fe088 1964static gimple *
bbc8a8dc
ZD
1965find_use_stmt (tree *name)
1966{
355fe088 1967 gimple *stmt;
726a989a 1968 tree rhs, lhs;
bbc8a8dc
ZD
1969
1970 /* Skip over assignments. */
1971 while (1)
1972 {
1973 stmt = single_nonlooparound_use (*name);
1974 if (!stmt)
726a989a 1975 return NULL;
bbc8a8dc 1976
726a989a
RB
1977 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1978 return NULL;
bbc8a8dc 1979
726a989a 1980 lhs = gimple_assign_lhs (stmt);
bbc8a8dc 1981 if (TREE_CODE (lhs) != SSA_NAME)
726a989a 1982 return NULL;
bbc8a8dc 1983
726a989a
RB
1984 if (gimple_assign_copy_p (stmt))
1985 {
1986 rhs = gimple_assign_rhs1 (stmt);
1987 if (rhs != *name)
1988 return NULL;
bbc8a8dc 1989
726a989a
RB
1990 *name = lhs;
1991 }
1992 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1993 == GIMPLE_BINARY_RHS)
1994 return stmt;
1995 else
1996 return NULL;
bbc8a8dc 1997 }
bbc8a8dc
ZD
1998}
1999
2000/* Returns true if we may perform reassociation for operation CODE in TYPE. */
2001
2002static bool
2003may_reassociate_p (tree type, enum tree_code code)
2004{
2005 if (FLOAT_TYPE_P (type)
2006 && !flag_unsafe_math_optimizations)
2007 return false;
2008
2009 return (commutative_tree_code (code)
2010 && associative_tree_code (code));
2011}
2012
2013/* If the operation used in STMT is associative and commutative, go through the
2014 tree of the same operations and returns its root. Distance to the root
2015 is stored in DISTANCE. */
2016
355fe088
TS
2017static gimple *
2018find_associative_operation_root (gimple *stmt, unsigned *distance)
bbc8a8dc 2019{
726a989a 2020 tree lhs;
355fe088 2021 gimple *next;
726a989a
RB
2022 enum tree_code code = gimple_assign_rhs_code (stmt);
2023 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
bbc8a8dc
ZD
2024 unsigned dist = 0;
2025
726a989a
RB
2026 if (!may_reassociate_p (type, code))
2027 return NULL;
bbc8a8dc
ZD
2028
2029 while (1)
2030 {
726a989a 2031 lhs = gimple_assign_lhs (stmt);
bbc8a8dc
ZD
2032 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2033
2034 next = find_use_stmt (&lhs);
726a989a
RB
2035 if (!next
2036 || gimple_assign_rhs_code (next) != code)
bbc8a8dc
ZD
2037 break;
2038
2039 stmt = next;
2040 dist++;
2041 }
2042
2043 if (distance)
2044 *distance = dist;
2045 return stmt;
2046}
2047
2048/* Returns the common statement in that NAME1 and NAME2 have a use. If there
2049 is no such statement, returns NULL_TREE. In case the operation used on
c80b4100 2050 NAME1 and NAME2 is associative and commutative, returns the root of the
bbc8a8dc
ZD
2051 tree formed by this operation instead of the statement that uses NAME1 or
2052 NAME2. */
2053
355fe088 2054static gimple *
bbc8a8dc
ZD
2055find_common_use_stmt (tree *name1, tree *name2)
2056{
355fe088 2057 gimple *stmt1, *stmt2;
bbc8a8dc
ZD
2058
2059 stmt1 = find_use_stmt (name1);
2060 if (!stmt1)
726a989a 2061 return NULL;
bbc8a8dc
ZD
2062
2063 stmt2 = find_use_stmt (name2);
2064 if (!stmt2)
726a989a 2065 return NULL;
bbc8a8dc
ZD
2066
2067 if (stmt1 == stmt2)
2068 return stmt1;
2069
2070 stmt1 = find_associative_operation_root (stmt1, NULL);
2071 if (!stmt1)
726a989a 2072 return NULL;
bbc8a8dc
ZD
2073 stmt2 = find_associative_operation_root (stmt2, NULL);
2074 if (!stmt2)
726a989a 2075 return NULL;
bbc8a8dc 2076
726a989a 2077 return (stmt1 == stmt2 ? stmt1 : NULL);
bbc8a8dc
ZD
2078}
2079
2080/* Checks whether R1 and R2 are combined together using CODE, with the result
2081 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2082 if it is true. If CODE is ERROR_MARK, set these values instead. */
2083
2084static bool
2085combinable_refs_p (dref r1, dref r2,
2086 enum tree_code *code, bool *swap, tree *rslt_type)
2087{
2088 enum tree_code acode;
2089 bool aswap;
2090 tree atype;
726a989a 2091 tree name1, name2;
355fe088 2092 gimple *stmt;
bbc8a8dc
ZD
2093
2094 name1 = name_for_ref (r1);
2095 name2 = name_for_ref (r2);
2096 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2097
2098 stmt = find_common_use_stmt (&name1, &name2);
2099
7906dbe4
RB
2100 if (!stmt
2101 /* A simple post-dominance check - make sure the combination
2102 is executed under the same condition as the references. */
2103 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2104 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
bbc8a8dc
ZD
2105 return false;
2106
726a989a 2107 acode = gimple_assign_rhs_code (stmt);
bbc8a8dc 2108 aswap = (!commutative_tree_code (acode)
726a989a
RB
2109 && gimple_assign_rhs1 (stmt) != name1);
2110 atype = TREE_TYPE (gimple_assign_lhs (stmt));
bbc8a8dc
ZD
2111
2112 if (*code == ERROR_MARK)
2113 {
2114 *code = acode;
2115 *swap = aswap;
2116 *rslt_type = atype;
2117 return true;
2118 }
2119
2120 return (*code == acode
2121 && *swap == aswap
2122 && *rslt_type == atype);
2123}
2124
2125/* Remove OP from the operation on rhs of STMT, and replace STMT with
2126 an assignment of the remaining operand. */
2127
2128static void
355fe088 2129remove_name_from_operation (gimple *stmt, tree op)
bbc8a8dc 2130{
726a989a
RB
2131 tree other_op;
2132 gimple_stmt_iterator si;
bbc8a8dc 2133
726a989a 2134 gcc_assert (is_gimple_assign (stmt));
bbc8a8dc 2135
726a989a
RB
2136 if (gimple_assign_rhs1 (stmt) == op)
2137 other_op = gimple_assign_rhs2 (stmt);
bbc8a8dc 2138 else
726a989a
RB
2139 other_op = gimple_assign_rhs1 (stmt);
2140
2141 si = gsi_for_stmt (stmt);
2142 gimple_assign_set_rhs_from_tree (&si, other_op);
2143
2144 /* We should not have reallocated STMT. */
2145 gcc_assert (gsi_stmt (si) == stmt);
2146
bbc8a8dc
ZD
2147 update_stmt (stmt);
2148}
2149
2150/* Reassociates the expression in that NAME1 and NAME2 are used so that they
2151 are combined in a single statement, and returns this statement. */
2152
355fe088 2153static gimple *
bbc8a8dc
ZD
2154reassociate_to_the_same_stmt (tree name1, tree name2)
2155{
355fe088 2156 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
538dd0b7 2157 gassign *new_stmt, *tmp_stmt;
726a989a 2158 tree new_name, tmp_name, var, r1, r2;
bbc8a8dc
ZD
2159 unsigned dist1, dist2;
2160 enum tree_code code;
2161 tree type = TREE_TYPE (name1);
726a989a 2162 gimple_stmt_iterator bsi;
bbc8a8dc
ZD
2163
2164 stmt1 = find_use_stmt (&name1);
2165 stmt2 = find_use_stmt (&name2);
2166 root1 = find_associative_operation_root (stmt1, &dist1);
2167 root2 = find_associative_operation_root (stmt2, &dist2);
726a989a 2168 code = gimple_assign_rhs_code (stmt1);
bbc8a8dc
ZD
2169
2170 gcc_assert (root1 && root2 && root1 == root2
726a989a 2171 && code == gimple_assign_rhs_code (stmt2));
bbc8a8dc
ZD
2172
2173 /* Find the root of the nearest expression in that both NAME1 and NAME2
2174 are used. */
2175 r1 = name1;
2176 s1 = stmt1;
2177 r2 = name2;
2178 s2 = stmt2;
2179
2180 while (dist1 > dist2)
2181 {
2182 s1 = find_use_stmt (&r1);
726a989a 2183 r1 = gimple_assign_lhs (s1);
bbc8a8dc
ZD
2184 dist1--;
2185 }
2186 while (dist2 > dist1)
2187 {
2188 s2 = find_use_stmt (&r2);
726a989a 2189 r2 = gimple_assign_lhs (s2);
bbc8a8dc
ZD
2190 dist2--;
2191 }
2192
2193 while (s1 != s2)
2194 {
2195 s1 = find_use_stmt (&r1);
726a989a 2196 r1 = gimple_assign_lhs (s1);
bbc8a8dc 2197 s2 = find_use_stmt (&r2);
726a989a 2198 r2 = gimple_assign_lhs (s2);
bbc8a8dc
ZD
2199 }
2200
2201 /* Remove NAME1 and NAME2 from the statements in that they are used
2202 currently. */
2203 remove_name_from_operation (stmt1, name1);
2204 remove_name_from_operation (stmt2, name2);
2205
2206 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2207 combine it with the rhs of S1. */
acd63801 2208 var = create_tmp_reg (type, "predreastmp");
b731b390 2209 new_name = make_ssa_name (var);
0d0e4a03 2210 new_stmt = gimple_build_assign (new_name, code, name1, name2);
bbc8a8dc 2211
acd63801 2212 var = create_tmp_reg (type, "predreastmp");
b731b390 2213 tmp_name = make_ssa_name (var);
726a989a
RB
2214
2215 /* Rhs of S1 may now be either a binary expression with operation
2216 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2217 so that name1 or name2 was removed from it). */
0d0e4a03
JJ
2218 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2219 gimple_assign_rhs1 (s1),
2220 gimple_assign_rhs2 (s1));
726a989a
RB
2221
2222 bsi = gsi_for_stmt (s1);
2223 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2224 s1 = gsi_stmt (bsi);
bbc8a8dc
ZD
2225 update_stmt (s1);
2226
726a989a
RB
2227 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2228 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
bbc8a8dc
ZD
2229
2230 return new_stmt;
2231}
2232
2233/* Returns the statement that combines references R1 and R2. In case R1
2234 and R2 are not used in the same statement, but they are used with an
2235 associative and commutative operation in the same expression, reassociate
2236 the expression so that they are used in the same statement. */
2237
355fe088 2238static gimple *
bbc8a8dc
ZD
2239stmt_combining_refs (dref r1, dref r2)
2240{
355fe088 2241 gimple *stmt1, *stmt2;
bbc8a8dc
ZD
2242 tree name1 = name_for_ref (r1);
2243 tree name2 = name_for_ref (r2);
2244
2245 stmt1 = find_use_stmt (&name1);
2246 stmt2 = find_use_stmt (&name2);
2247 if (stmt1 == stmt2)
2248 return stmt1;
2249
2250 return reassociate_to_the_same_stmt (name1, name2);
2251}
2252
2253/* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2254 description of the new chain is returned, otherwise we return NULL. */
2255
2256static chain_p
2257combine_chains (chain_p ch1, chain_p ch2)
2258{
2259 dref r1, r2, nw;
2260 enum tree_code op = ERROR_MARK;
2261 bool swap = false;
2262 chain_p new_chain;
2263 unsigned i;
355fe088 2264 gimple *root_stmt;
bbc8a8dc
ZD
2265 tree rslt_type = NULL_TREE;
2266
2267 if (ch1 == ch2)
a90352a0 2268 return NULL;
bbc8a8dc
ZD
2269 if (ch1->length != ch2->length)
2270 return NULL;
2271
9771b263 2272 if (ch1->refs.length () != ch2->refs.length ())
bbc8a8dc
ZD
2273 return NULL;
2274
9771b263
DN
2275 for (i = 0; (ch1->refs.iterate (i, &r1)
2276 && ch2->refs.iterate (i, &r2)); i++)
bbc8a8dc
ZD
2277 {
2278 if (r1->distance != r2->distance)
2279 return NULL;
2280
2281 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2282 return NULL;
2283 }
2284
2285 if (swap)
6b4db501 2286 std::swap (ch1, ch2);
bbc8a8dc
ZD
2287
2288 new_chain = XCNEW (struct chain);
2289 new_chain->type = CT_COMBINATION;
82d6e6fc 2290 new_chain->op = op;
bbc8a8dc
ZD
2291 new_chain->ch1 = ch1;
2292 new_chain->ch2 = ch2;
2293 new_chain->rslt_type = rslt_type;
2294 new_chain->length = ch1->length;
2295
9771b263
DN
2296 for (i = 0; (ch1->refs.iterate (i, &r1)
2297 && ch2->refs.iterate (i, &r2)); i++)
bbc8a8dc 2298 {
7e5487a2 2299 nw = XCNEW (struct dref_d);
bbc8a8dc
ZD
2300 nw->stmt = stmt_combining_refs (r1, r2);
2301 nw->distance = r1->distance;
2302
9771b263 2303 new_chain->refs.safe_push (nw);
bbc8a8dc
ZD
2304 }
2305
2306 new_chain->has_max_use_after = false;
2307 root_stmt = get_chain_root (new_chain)->stmt;
9771b263 2308 for (i = 1; new_chain->refs.iterate (i, &nw); i++)
bbc8a8dc
ZD
2309 {
2310 if (nw->distance == new_chain->length
2311 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2312 {
2313 new_chain->has_max_use_after = true;
2314 break;
2315 }
2316 }
2317
2318 ch1->combined = true;
2319 ch2->combined = true;
2320 return new_chain;
2321}
2322
2323/* Try to combine the CHAINS. */
2324
2325static void
9771b263 2326try_combine_chains (vec<chain_p> *chains)
bbc8a8dc
ZD
2327{
2328 unsigned i, j;
2329 chain_p ch1, ch2, cch;
ef062b13 2330 auto_vec<chain_p> worklist;
bbc8a8dc 2331
9771b263 2332 FOR_EACH_VEC_ELT (*chains, i, ch1)
bbc8a8dc 2333 if (chain_can_be_combined_p (ch1))
9771b263 2334 worklist.safe_push (ch1);
bbc8a8dc 2335
9771b263 2336 while (!worklist.is_empty ())
bbc8a8dc 2337 {
9771b263 2338 ch1 = worklist.pop ();
bbc8a8dc
ZD
2339 if (!chain_can_be_combined_p (ch1))
2340 continue;
2341
9771b263 2342 FOR_EACH_VEC_ELT (*chains, j, ch2)
bbc8a8dc
ZD
2343 {
2344 if (!chain_can_be_combined_p (ch2))
2345 continue;
2346
2347 cch = combine_chains (ch1, ch2);
2348 if (cch)
2349 {
9771b263
DN
2350 worklist.safe_push (cch);
2351 chains->safe_push (cch);
bbc8a8dc
ZD
2352 break;
2353 }
2354 }
2355 }
2356}
2357
bbc8a8dc
ZD
2358/* Prepare initializers for CHAIN in LOOP. Returns false if this is
2359 impossible because one of these initializers may trap, true otherwise. */
2360
2361static bool
2362prepare_initializers_chain (struct loop *loop, chain_p chain)
2363{
2364 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2365 struct data_reference *dr = get_chain_root (chain)->ref;
726a989a 2366 tree init;
bbc8a8dc
ZD
2367 dref laref;
2368 edge entry = loop_preheader_edge (loop);
2369
2370 /* Find the initializers for the variables, and check that they cannot
2371 trap. */
9771b263 2372 chain->inits.create (n);
bbc8a8dc 2373 for (i = 0; i < n; i++)
9771b263 2374 chain->inits.quick_push (NULL_TREE);
bbc8a8dc
ZD
2375
2376 /* If we have replaced some looparound phi nodes, use their initializers
2377 instead of creating our own. */
9771b263 2378 FOR_EACH_VEC_ELT (chain->refs, i, laref)
bbc8a8dc 2379 {
726a989a 2380 if (gimple_code (laref->stmt) != GIMPLE_PHI)
bbc8a8dc
ZD
2381 continue;
2382
2383 gcc_assert (laref->distance > 0);
9771b263
DN
2384 chain->inits[n - laref->distance]
2385 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
bbc8a8dc
ZD
2386 }
2387
2388 for (i = 0; i < n; i++)
2389 {
7ec44c3d
RB
2390 gimple_seq stmts = NULL;
2391
9771b263 2392 if (chain->inits[i] != NULL_TREE)
bbc8a8dc
ZD
2393 continue;
2394
9f2b860b 2395 init = ref_at_iteration (dr, (int) i - n, &stmts);
bbc8a8dc 2396 if (!chain->all_always_accessed && tree_could_trap_p (init))
7ec44c3d
RB
2397 {
2398 gimple_seq_discard (stmts);
2399 return false;
2400 }
bbc8a8dc 2401
bbc8a8dc 2402 if (stmts)
5006671f 2403 gsi_insert_seq_on_edge_immediate (entry, stmts);
bbc8a8dc 2404
9771b263 2405 chain->inits[i] = init;
bbc8a8dc
ZD
2406 }
2407
2408 return true;
2409}
2410
2411/* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2412 be used because the initializers might trap. */
2413
2414static void
9771b263 2415prepare_initializers (struct loop *loop, vec<chain_p> chains)
bbc8a8dc
ZD
2416{
2417 chain_p chain;
2418 unsigned i;
2419
9771b263 2420 for (i = 0; i < chains.length (); )
bbc8a8dc 2421 {
9771b263 2422 chain = chains[i];
bbc8a8dc
ZD
2423 if (prepare_initializers_chain (loop, chain))
2424 i++;
2425 else
2426 {
2427 release_chain (chain);
9771b263 2428 chains.unordered_remove (i);
bbc8a8dc
ZD
2429 }
2430 }
2431}
2432
2433/* Performs predictive commoning for LOOP. Returns true if LOOP was
2434 unrolled. */
2435
2436static bool
2437tree_predictive_commoning_loop (struct loop *loop)
2438{
9771b263
DN
2439 vec<data_reference_p> datarefs;
2440 vec<ddr_p> dependences;
bbc8a8dc 2441 struct component *components;
6e1aa848 2442 vec<chain_p> chains = vNULL;
bbc8a8dc
ZD
2443 unsigned unroll_factor;
2444 struct tree_niter_desc desc;
2445 bool unroll = false;
2446 edge exit;
2447 bitmap tmp_vars;
2448
2449 if (dump_file && (dump_flags & TDF_DETAILS))
2450 fprintf (dump_file, "Processing loop %d\n", loop->num);
2451
2452 /* Find the data references and split them into components according to their
2453 dependence relations. */
00f96dc9 2454 auto_vec<loop_p, 3> loop_nest;
9771b263 2455 dependences.create (10);
07687835 2456 datarefs.create (10);
9ca3d00e
AB
2457 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2458 &dependences))
2459 {
2460 if (dump_file && (dump_flags & TDF_DETAILS))
2461 fprintf (dump_file, "Cannot analyze data dependencies\n");
9ca3d00e
AB
2462 free_data_refs (datarefs);
2463 free_dependence_relations (dependences);
2464 return false;
2465 }
2466
bbc8a8dc
ZD
2467 if (dump_file && (dump_flags & TDF_DETAILS))
2468 dump_data_dependence_relations (dump_file, dependences);
2469
2470 components = split_data_refs_to_components (loop, datarefs, dependences);
9771b263 2471 loop_nest.release ();
bbc8a8dc
ZD
2472 free_dependence_relations (dependences);
2473 if (!components)
2474 {
2475 free_data_refs (datarefs);
4f87d581 2476 free_affine_expand_cache (&name_expansions);
bbc8a8dc
ZD
2477 return false;
2478 }
2479
2480 if (dump_file && (dump_flags & TDF_DETAILS))
2481 {
2482 fprintf (dump_file, "Initial state:\n\n");
2483 dump_components (dump_file, components);
2484 }
2485
2486 /* Find the suitable components and split them into chains. */
2487 components = filter_suitable_components (loop, components);
2488
2489 tmp_vars = BITMAP_ALLOC (NULL);
2490 looparound_phis = BITMAP_ALLOC (NULL);
2491 determine_roots (loop, components, &chains);
2492 release_components (components);
2493
9771b263 2494 if (!chains.exists ())
bbc8a8dc
ZD
2495 {
2496 if (dump_file && (dump_flags & TDF_DETAILS))
2497 fprintf (dump_file,
2498 "Predictive commoning failed: no suitable chains\n");
2499 goto end;
2500 }
2501 prepare_initializers (loop, chains);
2502
2503 /* Try to combine the chains that are always worked with together. */
2504 try_combine_chains (&chains);
2505
2506 if (dump_file && (dump_flags & TDF_DETAILS))
2507 {
2508 fprintf (dump_file, "Before commoning:\n\n");
2509 dump_chains (dump_file, chains);
2510 }
2511
2512 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2513 that its number of iterations is divisible by the factor. */
2514 unroll_factor = determine_unroll_factor (chains);
2515 scev_reset ();
e1e6dc73
RG
2516 unroll = (unroll_factor > 1
2517 && can_unroll_loop_p (loop, unroll_factor, &desc));
bbc8a8dc
ZD
2518 exit = single_dom_exit (loop);
2519
2520 /* Execute the predictive commoning transformations, and possibly unroll the
2521 loop. */
2522 if (unroll)
2523 {
2524 struct epcc_data dta;
2525
2526 if (dump_file && (dump_flags & TDF_DETAILS))
2527 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2528
2529 dta.chains = chains;
2530 dta.tmp_vars = tmp_vars;
b8698a0f 2531
bbc8a8dc
ZD
2532 update_ssa (TODO_update_ssa_only_virtuals);
2533
2534 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2535 execute_pred_commoning_cbck is called may cause phi nodes to be
2536 reallocated, which is a problem since CHAINS may point to these
2537 statements. To fix this, we store the ssa names defined by the
2538 phi nodes here instead of the phi nodes themselves, and restore
2539 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2540 replace_phis_by_defined_names (chains);
2541
2542 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2543 execute_pred_commoning_cbck, &dta);
2544 eliminate_temp_copies (loop, tmp_vars);
2545 }
2546 else
2547 {
2548 if (dump_file && (dump_flags & TDF_DETAILS))
2549 fprintf (dump_file,
2550 "Executing predictive commoning without unrolling.\n");
2551 execute_pred_commoning (loop, chains, tmp_vars);
2552 }
2553
2554end: ;
2555 release_chains (chains);
2556 free_data_refs (datarefs);
2557 BITMAP_FREE (tmp_vars);
2558 BITMAP_FREE (looparound_phis);
2559
2560 free_affine_expand_cache (&name_expansions);
2561
2562 return unroll;
2563}
2564
2565/* Runs predictive commoning. */
2566
592c303d 2567unsigned
bbc8a8dc
ZD
2568tree_predictive_commoning (void)
2569{
2570 bool unrolled = false;
2571 struct loop *loop;
592c303d 2572 unsigned ret = 0;
bbc8a8dc
ZD
2573
2574 initialize_original_copy_tables ();
f0bd40b1 2575 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
8bcf15f6
JH
2576 if (optimize_loop_for_speed_p (loop))
2577 {
2578 unrolled |= tree_predictive_commoning_loop (loop);
2579 }
bbc8a8dc
ZD
2580
2581 if (unrolled)
2582 {
2583 scev_reset ();
592c303d 2584 ret = TODO_cleanup_cfg;
bbc8a8dc
ZD
2585 }
2586 free_original_copy_tables ();
592c303d
ZD
2587
2588 return ret;
bbc8a8dc 2589}
c1bf2a39
AM
2590
2591/* Predictive commoning Pass. */
2592
2593static unsigned
726338f4 2594run_tree_predictive_commoning (struct function *fun)
c1bf2a39 2595{
726338f4 2596 if (number_of_loops (fun) <= 1)
c1bf2a39
AM
2597 return 0;
2598
2599 return tree_predictive_commoning ();
2600}
2601
c1bf2a39
AM
2602namespace {
2603
2604const pass_data pass_data_predcom =
2605{
2606 GIMPLE_PASS, /* type */
2607 "pcom", /* name */
2608 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
2609 TV_PREDCOM, /* tv_id */
2610 PROP_cfg, /* properties_required */
2611 0, /* properties_provided */
2612 0, /* properties_destroyed */
2613 0, /* todo_flags_start */
2614 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2615};
2616
2617class pass_predcom : public gimple_opt_pass
2618{
2619public:
2620 pass_predcom (gcc::context *ctxt)
2621 : gimple_opt_pass (pass_data_predcom, ctxt)
2622 {}
2623
2624 /* opt_pass methods: */
1a3d085c 2625 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
726338f4 2626 virtual unsigned int execute (function *fun)
be55bfe6 2627 {
726338f4 2628 return run_tree_predictive_commoning (fun);
be55bfe6 2629 }
c1bf2a39
AM
2630
2631}; // class pass_predcom
2632
2633} // anon namespace
2634
2635gimple_opt_pass *
2636make_pass_predcom (gcc::context *ctxt)
2637{
2638 return new pass_predcom (ctxt);
2639}
2640
2641