]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-predcom.c
tree-vect-loop-manip.c (adjust_vec_debug_stmts): Don't release adjust_vec automatically.
[thirdparty/gcc.git] / gcc / tree-predcom.c
CommitLineData
bbc8a8dc 1/* Predictive commoning.
818ab71a 2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
b8698a0f 3
bbc8a8dc 4This file is part of GCC.
b8698a0f 5
bbc8a8dc
ZD
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
9dcd6f09 8Free Software Foundation; either version 3, or (at your option) any
bbc8a8dc 9later version.
b8698a0f 10
bbc8a8dc
ZD
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
b8698a0f 15
bbc8a8dc 16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
bbc8a8dc
ZD
19
20/* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
b8698a0f 32
bbc8a8dc
ZD
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
b8698a0f 66
bbc8a8dc
ZD
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
b8698a0f 79 single root reference of the chain (adjusting their distance
bbc8a8dc
ZD
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
073a8998 102 of a reference reusing its value. Variables R0 up to RN are created,
bbc8a8dc
ZD
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
b8698a0f 135
bbc8a8dc
ZD
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
161 {
162 a[i] = 1;
163 a[i+2] = 2;
164 }
165
166 can be replaced with
167
168 t0 = a[0];
169 t1 = a[1];
170 for (i = 0; i < n; i++)
171 {
172 a[i] = 1;
173 t2 = 2;
174 t0 = t1;
175 t1 = t2;
176 }
177 a[n] = t0;
178 a[n+1] = t1;
179
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
182
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
186
187#include "config.h"
188#include "system.h"
189#include "coretypes.h"
c7131fb2 190#include "backend.h"
957060b5 191#include "rtl.h"
bbc8a8dc 192#include "tree.h"
c7131fb2 193#include "gimple.h"
957060b5
AM
194#include "predict.h"
195#include "tree-pass.h"
c7131fb2 196#include "ssa.h"
957060b5 197#include "gimple-pretty-print.h"
c7131fb2 198#include "alias.h"
40e23961 199#include "fold-const.h"
bbc8a8dc 200#include "cfgloop.h"
2fb9a547 201#include "tree-eh.h"
45b0be94 202#include "gimplify.h"
5be5c238 203#include "gimple-iterator.h"
18f429e2 204#include "gimplify-me.h"
e28030cf
AM
205#include "tree-ssa-loop-ivopts.h"
206#include "tree-ssa-loop-manip.h"
207#include "tree-ssa-loop-niter.h"
442b4905
AM
208#include "tree-ssa-loop.h"
209#include "tree-into-ssa.h"
210#include "tree-dfa.h"
7a300452 211#include "tree-ssa.h"
bbc8a8dc
ZD
212#include "tree-data-ref.h"
213#include "tree-scalar-evolution.h"
bbc8a8dc 214#include "params.h"
bbc8a8dc 215#include "tree-affine.h"
59537744 216#include "builtins.h"
bbc8a8dc
ZD
217
218/* The maximum number of iterations between the considered memory
219 references. */
220
221#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
b8698a0f 222
726a989a
RB
223/* Data references (or phi nodes that carry data reference values across
224 loop iterations). */
bbc8a8dc 225
7e5487a2 226typedef struct dref_d
bbc8a8dc
ZD
227{
228 /* The reference itself. */
229 struct data_reference *ref;
230
231 /* The statement in that the reference appears. */
355fe088 232 gimple *stmt;
726a989a
RB
233
234 /* In case that STMT is a phi node, this field is set to the SSA name
235 defined by it in replace_phis_by_defined_names (in order to avoid
236 pointing to phi node that got reallocated in the meantime). */
237 tree name_defined_by_phi;
bbc8a8dc
ZD
238
239 /* Distance of the reference from the root of the chain (in number of
240 iterations of the loop). */
241 unsigned distance;
242
243 /* Number of iterations offset from the first reference in the component. */
807e902e 244 widest_int offset;
bbc8a8dc
ZD
245
246 /* Number of the reference in a component, in dominance ordering. */
247 unsigned pos;
248
249 /* True if the memory reference is always accessed when the loop is
250 entered. */
251 unsigned always_accessed : 1;
252} *dref;
253
bbc8a8dc
ZD
254
255/* Type of the chain of the references. */
256
257enum chain_type
258{
259 /* The addresses of the references in the chain are constant. */
260 CT_INVARIANT,
261
262 /* There are only loads in the chain. */
263 CT_LOAD,
264
265 /* Root of the chain is store, the rest are loads. */
266 CT_STORE_LOAD,
267
268 /* A combination of two chains. */
269 CT_COMBINATION
270};
271
272/* Chains of data references. */
273
274typedef struct chain
275{
276 /* Type of the chain. */
277 enum chain_type type;
278
279 /* For combination chains, the operator and the two chains that are
280 combined, and the type of the result. */
82d6e6fc 281 enum tree_code op;
bbc8a8dc
ZD
282 tree rslt_type;
283 struct chain *ch1, *ch2;
284
285 /* The references in the chain. */
9771b263 286 vec<dref> refs;
bbc8a8dc
ZD
287
288 /* The maximum distance of the reference in the chain from the root. */
289 unsigned length;
290
291 /* The variables used to copy the value throughout iterations. */
9771b263 292 vec<tree> vars;
bbc8a8dc
ZD
293
294 /* Initializers for the variables. */
9771b263 295 vec<tree> inits;
bbc8a8dc
ZD
296
297 /* True if there is a use of a variable with the maximal distance
298 that comes after the root in the loop. */
299 unsigned has_max_use_after : 1;
300
301 /* True if all the memory references in the chain are always accessed. */
302 unsigned all_always_accessed : 1;
303
304 /* True if this chain was combined together with some other chain. */
305 unsigned combined : 1;
306} *chain_p;
307
bbc8a8dc
ZD
308
309/* Describes the knowledge about the step of the memory references in
310 the component. */
311
312enum ref_step_type
313{
314 /* The step is zero. */
315 RS_INVARIANT,
316
317 /* The step is nonzero. */
318 RS_NONZERO,
319
320 /* The step may or may not be nonzero. */
321 RS_ANY
322};
323
324/* Components of the data dependence graph. */
325
326struct component
327{
328 /* The references in the component. */
9771b263 329 vec<dref> refs;
bbc8a8dc
ZD
330
331 /* What we know about the step of the references in the component. */
332 enum ref_step_type comp_step;
333
334 /* Next component in the list. */
335 struct component *next;
336};
337
338/* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
339
340static bitmap looparound_phis;
341
342/* Cache used by tree_to_aff_combination_expand. */
343
39c8aaa4 344static hash_map<tree, name_expansion *> *name_expansions;
bbc8a8dc
ZD
345
346/* Dumps data reference REF to FILE. */
347
348extern void dump_dref (FILE *, dref);
349void
350dump_dref (FILE *file, dref ref)
351{
352 if (ref->ref)
353 {
354 fprintf (file, " ");
355 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
356 fprintf (file, " (id %u%s)\n", ref->pos,
357 DR_IS_READ (ref->ref) ? "" : ", write");
358
359 fprintf (file, " offset ");
807e902e 360 print_decs (ref->offset, file);
bbc8a8dc
ZD
361 fprintf (file, "\n");
362
363 fprintf (file, " distance %u\n", ref->distance);
364 }
365 else
366 {
726a989a 367 if (gimple_code (ref->stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
368 fprintf (file, " looparound ref\n");
369 else
370 fprintf (file, " combination ref\n");
371 fprintf (file, " in statement ");
726a989a 372 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
bbc8a8dc
ZD
373 fprintf (file, "\n");
374 fprintf (file, " distance %u\n", ref->distance);
375 }
376
377}
378
379/* Dumps CHAIN to FILE. */
380
381extern void dump_chain (FILE *, chain_p);
382void
383dump_chain (FILE *file, chain_p chain)
384{
385 dref a;
386 const char *chain_type;
387 unsigned i;
388 tree var;
389
390 switch (chain->type)
391 {
392 case CT_INVARIANT:
393 chain_type = "Load motion";
394 break;
395
396 case CT_LOAD:
397 chain_type = "Loads-only";
398 break;
399
400 case CT_STORE_LOAD:
401 chain_type = "Store-loads";
402 break;
403
404 case CT_COMBINATION:
405 chain_type = "Combination";
406 break;
407
408 default:
409 gcc_unreachable ();
410 }
411
412 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
413 chain->combined ? " (combined)" : "");
414 if (chain->type != CT_INVARIANT)
415 fprintf (file, " max distance %u%s\n", chain->length,
416 chain->has_max_use_after ? "" : ", may reuse first");
417
418 if (chain->type == CT_COMBINATION)
419 {
420 fprintf (file, " equal to %p %s %p in type ",
82d6e6fc 421 (void *) chain->ch1, op_symbol_code (chain->op),
bbc8a8dc
ZD
422 (void *) chain->ch2);
423 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
424 fprintf (file, "\n");
425 }
426
9771b263 427 if (chain->vars.exists ())
bbc8a8dc
ZD
428 {
429 fprintf (file, " vars");
9771b263 430 FOR_EACH_VEC_ELT (chain->vars, i, var)
bbc8a8dc
ZD
431 {
432 fprintf (file, " ");
433 print_generic_expr (file, var, TDF_SLIM);
434 }
435 fprintf (file, "\n");
436 }
437
9771b263 438 if (chain->inits.exists ())
bbc8a8dc
ZD
439 {
440 fprintf (file, " inits");
9771b263 441 FOR_EACH_VEC_ELT (chain->inits, i, var)
bbc8a8dc
ZD
442 {
443 fprintf (file, " ");
444 print_generic_expr (file, var, TDF_SLIM);
445 }
446 fprintf (file, "\n");
447 }
448
449 fprintf (file, " references:\n");
9771b263 450 FOR_EACH_VEC_ELT (chain->refs, i, a)
bbc8a8dc
ZD
451 dump_dref (file, a);
452
453 fprintf (file, "\n");
454}
455
456/* Dumps CHAINS to FILE. */
457
9771b263 458extern void dump_chains (FILE *, vec<chain_p> );
bbc8a8dc 459void
9771b263 460dump_chains (FILE *file, vec<chain_p> chains)
bbc8a8dc
ZD
461{
462 chain_p chain;
463 unsigned i;
464
9771b263 465 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc
ZD
466 dump_chain (file, chain);
467}
468
469/* Dumps COMP to FILE. */
470
471extern void dump_component (FILE *, struct component *);
472void
473dump_component (FILE *file, struct component *comp)
474{
475 dref a;
476 unsigned i;
477
478 fprintf (file, "Component%s:\n",
479 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
9771b263 480 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc
ZD
481 dump_dref (file, a);
482 fprintf (file, "\n");
483}
484
485/* Dumps COMPS to FILE. */
486
487extern void dump_components (FILE *, struct component *);
488void
489dump_components (FILE *file, struct component *comps)
490{
491 struct component *comp;
492
493 for (comp = comps; comp; comp = comp->next)
494 dump_component (file, comp);
495}
496
497/* Frees a chain CHAIN. */
498
499static void
500release_chain (chain_p chain)
501{
502 dref ref;
503 unsigned i;
504
505 if (chain == NULL)
506 return;
507
9771b263 508 FOR_EACH_VEC_ELT (chain->refs, i, ref)
bbc8a8dc
ZD
509 free (ref);
510
9771b263
DN
511 chain->refs.release ();
512 chain->vars.release ();
513 chain->inits.release ();
bbc8a8dc
ZD
514
515 free (chain);
516}
517
518/* Frees CHAINS. */
519
520static void
9771b263 521release_chains (vec<chain_p> chains)
bbc8a8dc
ZD
522{
523 unsigned i;
524 chain_p chain;
525
9771b263 526 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc 527 release_chain (chain);
9771b263 528 chains.release ();
bbc8a8dc
ZD
529}
530
531/* Frees a component COMP. */
532
533static void
534release_component (struct component *comp)
535{
9771b263 536 comp->refs.release ();
bbc8a8dc
ZD
537 free (comp);
538}
539
540/* Frees list of components COMPS. */
541
542static void
543release_components (struct component *comps)
544{
545 struct component *act, *next;
546
547 for (act = comps; act; act = next)
548 {
549 next = act->next;
550 release_component (act);
551 }
552}
553
554/* Finds a root of tree given by FATHERS containing A, and performs path
555 shortening. */
556
557static unsigned
558component_of (unsigned fathers[], unsigned a)
559{
560 unsigned root, n;
561
562 for (root = a; root != fathers[root]; root = fathers[root])
563 continue;
564
565 for (; a != root; a = n)
566 {
567 n = fathers[a];
568 fathers[a] = root;
569 }
570
571 return root;
572}
573
574/* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
575 components, A and B are components to merge. */
576
577static void
578merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
579{
580 unsigned ca = component_of (fathers, a);
581 unsigned cb = component_of (fathers, b);
582
583 if (ca == cb)
584 return;
585
586 if (sizes[ca] < sizes[cb])
587 {
588 sizes[cb] += sizes[ca];
589 fathers[ca] = cb;
590 }
591 else
592 {
593 sizes[ca] += sizes[cb];
594 fathers[cb] = ca;
595 }
596}
597
598/* Returns true if A is a reference that is suitable for predictive commoning
599 in the innermost loop that contains it. REF_STEP is set according to the
600 step of the reference A. */
601
602static bool
603suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
604{
605 tree ref = DR_REF (a), step = DR_STEP (a);
606
607 if (!step
64fb0d3a 608 || TREE_THIS_VOLATILE (ref)
7e80c6bf
EB
609 || !is_gimple_reg_type (TREE_TYPE (ref))
610 || tree_could_throw_p (ref))
bbc8a8dc
ZD
611 return false;
612
613 if (integer_zerop (step))
614 *ref_step = RS_INVARIANT;
615 else if (integer_nonzerop (step))
616 *ref_step = RS_NONZERO;
617 else
618 *ref_step = RS_ANY;
619
620 return true;
621}
622
623/* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
624
625static void
626aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
627{
0d82a1c8 628 tree type = TREE_TYPE (DR_OFFSET (dr));
bbc8a8dc
ZD
629 aff_tree delta;
630
0d82a1c8 631 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
bbc8a8dc 632 &name_expansions);
807e902e 633 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
bbc8a8dc
ZD
634 aff_combination_add (offset, &delta);
635}
636
637/* Determines number of iterations of the innermost enclosing loop before B
638 refers to exactly the same location as A and stores it to OFF. If A and
639 B do not have the same step, they never meet, or anything else fails,
640 returns false, otherwise returns true. Both A and B are assumed to
641 satisfy suitable_reference_p. */
642
643static bool
644determine_offset (struct data_reference *a, struct data_reference *b,
807e902e 645 widest_int *off)
bbc8a8dc
ZD
646{
647 aff_tree diff, baseb, step;
49379cb1
ZD
648 tree typea, typeb;
649
650 /* Check that both the references access the location in the same type. */
651 typea = TREE_TYPE (DR_REF (a));
652 typeb = TREE_TYPE (DR_REF (b));
36618b93 653 if (!useless_type_conversion_p (typeb, typea))
49379cb1 654 return false;
bbc8a8dc
ZD
655
656 /* Check whether the base address and the step of both references is the
657 same. */
658 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
659 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
660 return false;
661
662 if (integer_zerop (DR_STEP (a)))
663 {
664 /* If the references have loop invariant address, check that they access
665 exactly the same location. */
807e902e 666 *off = 0;
bbc8a8dc
ZD
667 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
668 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
669 }
670
671 /* Compare the offsets of the addresses, and check whether the difference
672 is a multiple of step. */
673 aff_combination_dr_offset (a, &diff);
674 aff_combination_dr_offset (b, &baseb);
807e902e 675 aff_combination_scale (&baseb, -1);
bbc8a8dc
ZD
676 aff_combination_add (&diff, &baseb);
677
0d82a1c8 678 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
bbc8a8dc
ZD
679 &step, &name_expansions);
680 return aff_combination_constant_multiple_p (&diff, &step, off);
681}
682
683/* Returns the last basic block in LOOP for that we are sure that
684 it is executed whenever the loop is entered. */
685
686static basic_block
687last_always_executed_block (struct loop *loop)
688{
689 unsigned i;
9771b263 690 vec<edge> exits = get_loop_exit_edges (loop);
bbc8a8dc
ZD
691 edge ex;
692 basic_block last = loop->latch;
693
9771b263 694 FOR_EACH_VEC_ELT (exits, i, ex)
bbc8a8dc 695 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
9771b263 696 exits.release ();
bbc8a8dc
ZD
697
698 return last;
699}
700
701/* Splits dependence graph on DATAREFS described by DEPENDS to components. */
702
703static struct component *
704split_data_refs_to_components (struct loop *loop,
9771b263
DN
705 vec<data_reference_p> datarefs,
706 vec<ddr_p> depends)
bbc8a8dc 707{
9771b263 708 unsigned i, n = datarefs.length ();
bbc8a8dc
ZD
709 unsigned ca, ia, ib, bad;
710 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
711 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
712 struct component **comps;
713 struct data_reference *dr, *dra, *drb;
714 struct data_dependence_relation *ddr;
715 struct component *comp_list = NULL, *comp;
716 dref dataref;
717 basic_block last_always_executed = last_always_executed_block (loop);
b8698a0f 718
9771b263 719 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc
ZD
720 {
721 if (!DR_REF (dr))
722 {
723 /* A fake reference for call or asm_expr that may clobber memory;
724 just fail. */
725 goto end;
726 }
5ce9450f
JJ
727 /* predcom pass isn't prepared to handle calls with data references. */
728 if (is_gimple_call (DR_STMT (dr)))
729 goto end;
5417e022 730 dr->aux = (void *) (size_t) i;
bbc8a8dc
ZD
731 comp_father[i] = i;
732 comp_size[i] = 1;
733 }
734
735 /* A component reserved for the "bad" data references. */
736 comp_father[n] = n;
737 comp_size[n] = 1;
738
9771b263 739 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc
ZD
740 {
741 enum ref_step_type dummy;
742
743 if (!suitable_reference_p (dr, &dummy))
744 {
5417e022 745 ia = (unsigned) (size_t) dr->aux;
bbc8a8dc
ZD
746 merge_comps (comp_father, comp_size, n, ia);
747 }
748 }
749
9771b263 750 FOR_EACH_VEC_ELT (depends, i, ddr)
bbc8a8dc 751 {
807e902e 752 widest_int dummy_off;
bbc8a8dc
ZD
753
754 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
755 continue;
756
757 dra = DDR_A (ddr);
758 drb = DDR_B (ddr);
5417e022
ZD
759 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
760 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
bbc8a8dc
ZD
761 if (ia == ib)
762 continue;
763
764 bad = component_of (comp_father, n);
765
766 /* If both A and B are reads, we may ignore unsuitable dependences. */
41626746
JJ
767 if (DR_IS_READ (dra) && DR_IS_READ (drb))
768 {
769 if (ia == bad || ib == bad
770 || !determine_offset (dra, drb, &dummy_off))
771 continue;
772 }
773 /* If A is read and B write or vice versa and there is unsuitable
774 dependence, instead of merging both components into a component
775 that will certainly not pass suitable_component_p, just put the
776 read into bad component, perhaps at least the write together with
777 all the other data refs in it's component will be optimizable. */
778 else if (DR_IS_READ (dra) && ib != bad)
779 {
780 if (ia == bad)
781 continue;
782 else if (!determine_offset (dra, drb, &dummy_off))
783 {
784 merge_comps (comp_father, comp_size, bad, ia);
785 continue;
786 }
787 }
788 else if (DR_IS_READ (drb) && ia != bad)
789 {
790 if (ib == bad)
791 continue;
792 else if (!determine_offset (dra, drb, &dummy_off))
793 {
794 merge_comps (comp_father, comp_size, bad, ib);
795 continue;
796 }
797 }
b8698a0f 798
bbc8a8dc
ZD
799 merge_comps (comp_father, comp_size, ia, ib);
800 }
801
802 comps = XCNEWVEC (struct component *, n);
803 bad = component_of (comp_father, n);
9771b263 804 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc 805 {
5417e022 806 ia = (unsigned) (size_t) dr->aux;
bbc8a8dc
ZD
807 ca = component_of (comp_father, ia);
808 if (ca == bad)
809 continue;
810
811 comp = comps[ca];
812 if (!comp)
813 {
814 comp = XCNEW (struct component);
9771b263 815 comp->refs.create (comp_size[ca]);
bbc8a8dc
ZD
816 comps[ca] = comp;
817 }
818
7e5487a2 819 dataref = XCNEW (struct dref_d);
bbc8a8dc
ZD
820 dataref->ref = dr;
821 dataref->stmt = DR_STMT (dr);
807e902e 822 dataref->offset = 0;
bbc8a8dc
ZD
823 dataref->distance = 0;
824
825 dataref->always_accessed
826 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
726a989a 827 gimple_bb (dataref->stmt));
9771b263
DN
828 dataref->pos = comp->refs.length ();
829 comp->refs.quick_push (dataref);
bbc8a8dc
ZD
830 }
831
832 for (i = 0; i < n; i++)
833 {
834 comp = comps[i];
835 if (comp)
836 {
837 comp->next = comp_list;
838 comp_list = comp;
839 }
840 }
841 free (comps);
842
843end:
844 free (comp_father);
845 free (comp_size);
846 return comp_list;
847}
848
849/* Returns true if the component COMP satisfies the conditions
c80b4100 850 described in 2) at the beginning of this file. LOOP is the current
bbc8a8dc 851 loop. */
b8698a0f 852
bbc8a8dc
ZD
853static bool
854suitable_component_p (struct loop *loop, struct component *comp)
855{
856 unsigned i;
857 dref a, first;
858 basic_block ba, bp = loop->header;
859 bool ok, has_write = false;
860
9771b263 861 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc 862 {
726a989a 863 ba = gimple_bb (a->stmt);
bbc8a8dc
ZD
864
865 if (!just_once_each_iteration_p (loop, ba))
866 return false;
867
868 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
869 bp = ba;
870
b0af49c4 871 if (DR_IS_WRITE (a->ref))
bbc8a8dc
ZD
872 has_write = true;
873 }
874
9771b263 875 first = comp->refs[0];
bbc8a8dc
ZD
876 ok = suitable_reference_p (first->ref, &comp->comp_step);
877 gcc_assert (ok);
807e902e 878 first->offset = 0;
bbc8a8dc 879
9771b263 880 for (i = 1; comp->refs.iterate (i, &a); i++)
bbc8a8dc
ZD
881 {
882 if (!determine_offset (first->ref, a->ref, &a->offset))
883 return false;
884
b2b29377
MM
885 enum ref_step_type a_step;
886 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
887 && a_step == comp->comp_step);
bbc8a8dc
ZD
888 }
889
890 /* If there is a write inside the component, we must know whether the
891 step is nonzero or not -- we would not otherwise be able to recognize
892 whether the value accessed by reads comes from the OFFSET-th iteration
893 or the previous one. */
894 if (has_write && comp->comp_step == RS_ANY)
895 return false;
896
897 return true;
898}
b8698a0f 899
bbc8a8dc
ZD
900/* Check the conditions on references inside each of components COMPS,
901 and remove the unsuitable components from the list. The new list
902 of components is returned. The conditions are described in 2) at
c80b4100 903 the beginning of this file. LOOP is the current loop. */
bbc8a8dc
ZD
904
905static struct component *
906filter_suitable_components (struct loop *loop, struct component *comps)
907{
908 struct component **comp, *act;
909
910 for (comp = &comps; *comp; )
911 {
912 act = *comp;
913 if (suitable_component_p (loop, act))
914 comp = &act->next;
915 else
916 {
a0044be5
JJ
917 dref ref;
918 unsigned i;
919
bbc8a8dc 920 *comp = act->next;
9771b263 921 FOR_EACH_VEC_ELT (act->refs, i, ref)
a0044be5 922 free (ref);
bbc8a8dc
ZD
923 release_component (act);
924 }
925 }
926
927 return comps;
928}
929
930/* Compares two drefs A and B by their offset and position. Callback for
931 qsort. */
932
933static int
934order_drefs (const void *a, const void *b)
935{
3d9a9f94
KG
936 const dref *const da = (const dref *) a;
937 const dref *const db = (const dref *) b;
807e902e 938 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
bbc8a8dc
ZD
939
940 if (offcmp != 0)
941 return offcmp;
942
943 return (*da)->pos - (*db)->pos;
944}
945
946/* Returns root of the CHAIN. */
947
948static inline dref
949get_chain_root (chain_p chain)
950{
9771b263 951 return chain->refs[0];
bbc8a8dc
ZD
952}
953
954/* Adds REF to the chain CHAIN. */
955
956static void
957add_ref_to_chain (chain_p chain, dref ref)
958{
959 dref root = get_chain_root (chain);
bbc8a8dc 960
807e902e
KZ
961 gcc_assert (wi::les_p (root->offset, ref->offset));
962 widest_int dist = ref->offset - root->offset;
963 if (wi::leu_p (MAX_DISTANCE, dist))
a0044be5
JJ
964 {
965 free (ref);
966 return;
967 }
807e902e 968 gcc_assert (wi::fits_uhwi_p (dist));
bbc8a8dc 969
9771b263 970 chain->refs.safe_push (ref);
bbc8a8dc 971
27bcd47c 972 ref->distance = dist.to_uhwi ();
bbc8a8dc
ZD
973
974 if (ref->distance >= chain->length)
975 {
976 chain->length = ref->distance;
977 chain->has_max_use_after = false;
978 }
979
980 if (ref->distance == chain->length
981 && ref->pos > root->pos)
982 chain->has_max_use_after = true;
983
984 chain->all_always_accessed &= ref->always_accessed;
985}
986
987/* Returns the chain for invariant component COMP. */
988
989static chain_p
990make_invariant_chain (struct component *comp)
991{
992 chain_p chain = XCNEW (struct chain);
993 unsigned i;
994 dref ref;
995
996 chain->type = CT_INVARIANT;
997
998 chain->all_always_accessed = true;
999
9771b263 1000 FOR_EACH_VEC_ELT (comp->refs, i, ref)
bbc8a8dc 1001 {
9771b263 1002 chain->refs.safe_push (ref);
bbc8a8dc
ZD
1003 chain->all_always_accessed &= ref->always_accessed;
1004 }
1005
1006 return chain;
1007}
1008
1009/* Make a new chain rooted at REF. */
1010
1011static chain_p
1012make_rooted_chain (dref ref)
1013{
1014 chain_p chain = XCNEW (struct chain);
1015
1016 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1017
9771b263 1018 chain->refs.safe_push (ref);
bbc8a8dc
ZD
1019 chain->all_always_accessed = ref->always_accessed;
1020
1021 ref->distance = 0;
1022
1023 return chain;
1024}
1025
1026/* Returns true if CHAIN is not trivial. */
1027
1028static bool
1029nontrivial_chain_p (chain_p chain)
1030{
9771b263 1031 return chain != NULL && chain->refs.length () > 1;
bbc8a8dc
ZD
1032}
1033
1034/* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1035 is no such name. */
1036
1037static tree
1038name_for_ref (dref ref)
1039{
1040 tree name;
1041
726a989a 1042 if (is_gimple_assign (ref->stmt))
bbc8a8dc
ZD
1043 {
1044 if (!ref->ref || DR_IS_READ (ref->ref))
726a989a 1045 name = gimple_assign_lhs (ref->stmt);
bbc8a8dc 1046 else
726a989a 1047 name = gimple_assign_rhs1 (ref->stmt);
bbc8a8dc
ZD
1048 }
1049 else
1050 name = PHI_RESULT (ref->stmt);
1051
1052 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1053}
1054
1055/* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1056 iterations of the innermost enclosing loop). */
1057
1058static bool
1059valid_initializer_p (struct data_reference *ref,
1060 unsigned distance, struct data_reference *root)
1061{
1062 aff_tree diff, base, step;
807e902e 1063 widest_int off;
bbc8a8dc 1064
bbc8a8dc
ZD
1065 /* Both REF and ROOT must be accessing the same object. */
1066 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1067 return false;
1068
1069 /* The initializer is defined outside of loop, hence its address must be
1070 invariant inside the loop. */
1071 gcc_assert (integer_zerop (DR_STEP (ref)));
1072
1073 /* If the address of the reference is invariant, initializer must access
1074 exactly the same location. */
1075 if (integer_zerop (DR_STEP (root)))
1076 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1077 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1078
1079 /* Verify that this index of REF is equal to the root's index at
1080 -DISTANCE-th iteration. */
1081 aff_combination_dr_offset (root, &diff);
1082 aff_combination_dr_offset (ref, &base);
807e902e 1083 aff_combination_scale (&base, -1);
bbc8a8dc
ZD
1084 aff_combination_add (&diff, &base);
1085
0d82a1c8
RG
1086 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1087 &step, &name_expansions);
bbc8a8dc
ZD
1088 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1089 return false;
1090
807e902e 1091 if (off != distance)
bbc8a8dc
ZD
1092 return false;
1093
1094 return true;
1095}
1096
1097/* Finds looparound phi node of LOOP that copies the value of REF, and if its
1098 initial value is correct (equal to initial value of REF shifted by one
1099 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1100 is the root of the current chain. */
1101
538dd0b7 1102static gphi *
bbc8a8dc
ZD
1103find_looparound_phi (struct loop *loop, dref ref, dref root)
1104{
726a989a 1105 tree name, init, init_ref;
538dd0b7 1106 gphi *phi = NULL;
355fe088 1107 gimple *init_stmt;
bbc8a8dc
ZD
1108 edge latch = loop_latch_edge (loop);
1109 struct data_reference init_dr;
538dd0b7 1110 gphi_iterator psi;
bbc8a8dc 1111
726a989a 1112 if (is_gimple_assign (ref->stmt))
bbc8a8dc
ZD
1113 {
1114 if (DR_IS_READ (ref->ref))
726a989a 1115 name = gimple_assign_lhs (ref->stmt);
bbc8a8dc 1116 else
726a989a 1117 name = gimple_assign_rhs1 (ref->stmt);
bbc8a8dc
ZD
1118 }
1119 else
1120 name = PHI_RESULT (ref->stmt);
1121 if (!name)
726a989a 1122 return NULL;
bbc8a8dc 1123
726a989a
RB
1124 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1125 {
538dd0b7 1126 phi = psi.phi ();
726a989a
RB
1127 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1128 break;
1129 }
bbc8a8dc 1130
726a989a
RB
1131 if (gsi_end_p (psi))
1132 return NULL;
bbc8a8dc
ZD
1133
1134 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1135 if (TREE_CODE (init) != SSA_NAME)
726a989a 1136 return NULL;
bbc8a8dc 1137 init_stmt = SSA_NAME_DEF_STMT (init);
726a989a
RB
1138 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1139 return NULL;
1140 gcc_assert (gimple_assign_lhs (init_stmt) == init);
bbc8a8dc 1141
726a989a 1142 init_ref = gimple_assign_rhs1 (init_stmt);
bbc8a8dc
ZD
1143 if (!REFERENCE_CLASS_P (init_ref)
1144 && !DECL_P (init_ref))
726a989a 1145 return NULL;
bbc8a8dc
ZD
1146
1147 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1148 loop enclosing PHI). */
1149 memset (&init_dr, 0, sizeof (struct data_reference));
1150 DR_REF (&init_dr) = init_ref;
1151 DR_STMT (&init_dr) = phi;
4e4452b6 1152 if (!dr_analyze_innermost (&init_dr, loop))
3661e899 1153 return NULL;
bbc8a8dc
ZD
1154
1155 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
726a989a 1156 return NULL;
bbc8a8dc
ZD
1157
1158 return phi;
1159}
1160
1161/* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1162
1163static void
538dd0b7 1164insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
bbc8a8dc 1165{
7e5487a2 1166 dref nw = XCNEW (struct dref_d), aref;
bbc8a8dc
ZD
1167 unsigned i;
1168
1169 nw->stmt = phi;
1170 nw->distance = ref->distance + 1;
1171 nw->always_accessed = 1;
1172
9771b263 1173 FOR_EACH_VEC_ELT (chain->refs, i, aref)
bbc8a8dc
ZD
1174 if (aref->distance >= nw->distance)
1175 break;
9771b263 1176 chain->refs.safe_insert (i, nw);
bbc8a8dc
ZD
1177
1178 if (nw->distance > chain->length)
1179 {
1180 chain->length = nw->distance;
1181 chain->has_max_use_after = false;
1182 }
1183}
1184
1185/* For references in CHAIN that are copied around the LOOP (created previously
1186 by PRE, or by user), add the results of such copies to the chain. This
1187 enables us to remove the copies by unrolling, and may need less registers
1188 (also, it may allow us to combine chains together). */
1189
1190static void
1191add_looparound_copies (struct loop *loop, chain_p chain)
1192{
1193 unsigned i;
1194 dref ref, root = get_chain_root (chain);
538dd0b7 1195 gphi *phi;
bbc8a8dc 1196
9771b263 1197 FOR_EACH_VEC_ELT (chain->refs, i, ref)
bbc8a8dc
ZD
1198 {
1199 phi = find_looparound_phi (loop, ref, root);
1200 if (!phi)
1201 continue;
1202
1203 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1204 insert_looparound_copy (chain, ref, phi);
1205 }
1206}
1207
1208/* Find roots of the values and determine distances in the component COMP.
1209 The references are redistributed into CHAINS. LOOP is the current
1210 loop. */
1211
1212static void
1213determine_roots_comp (struct loop *loop,
1214 struct component *comp,
9771b263 1215 vec<chain_p> *chains)
bbc8a8dc
ZD
1216{
1217 unsigned i;
1218 dref a;
1219 chain_p chain = NULL;
807e902e 1220 widest_int last_ofs = 0;
bbc8a8dc
ZD
1221
1222 /* Invariants are handled specially. */
1223 if (comp->comp_step == RS_INVARIANT)
1224 {
1225 chain = make_invariant_chain (comp);
9771b263 1226 chains->safe_push (chain);
bbc8a8dc
ZD
1227 return;
1228 }
1229
9771b263 1230 comp->refs.qsort (order_drefs);
bbc8a8dc 1231
9771b263 1232 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc 1233 {
b0af49c4 1234 if (!chain || DR_IS_WRITE (a->ref)
807e902e 1235 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
bbc8a8dc
ZD
1236 {
1237 if (nontrivial_chain_p (chain))
b61b1f17
MM
1238 {
1239 add_looparound_copies (loop, chain);
9771b263 1240 chains->safe_push (chain);
b61b1f17 1241 }
bbc8a8dc
ZD
1242 else
1243 release_chain (chain);
1244 chain = make_rooted_chain (a);
b61b1f17 1245 last_ofs = a->offset;
bbc8a8dc
ZD
1246 continue;
1247 }
1248
1249 add_ref_to_chain (chain, a);
1250 }
1251
1252 if (nontrivial_chain_p (chain))
1253 {
1254 add_looparound_copies (loop, chain);
9771b263 1255 chains->safe_push (chain);
bbc8a8dc
ZD
1256 }
1257 else
1258 release_chain (chain);
1259}
1260
1261/* Find roots of the values and determine distances in components COMPS, and
1262 separates the references to CHAINS. LOOP is the current loop. */
1263
1264static void
1265determine_roots (struct loop *loop,
9771b263 1266 struct component *comps, vec<chain_p> *chains)
bbc8a8dc
ZD
1267{
1268 struct component *comp;
1269
1270 for (comp = comps; comp; comp = comp->next)
1271 determine_roots_comp (loop, comp, chains);
1272}
1273
1274/* Replace the reference in statement STMT with temporary variable
82d6e6fc 1275 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
bbc8a8dc
ZD
1276 the reference in the statement. IN_LHS is true if the reference
1277 is in the lhs of STMT, false if it is in rhs. */
1278
1279static void
355fe088 1280replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
bbc8a8dc 1281{
726a989a 1282 tree val;
538dd0b7 1283 gassign *new_stmt;
726a989a 1284 gimple_stmt_iterator bsi, psi;
bbc8a8dc 1285
726a989a 1286 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
1287 {
1288 gcc_assert (!in_lhs && !set);
1289
1290 val = PHI_RESULT (stmt);
726a989a
RB
1291 bsi = gsi_after_labels (gimple_bb (stmt));
1292 psi = gsi_for_stmt (stmt);
1293 remove_phi_node (&psi, false);
bbc8a8dc 1294
726a989a 1295 /* Turn the phi node into GIMPLE_ASSIGN. */
82d6e6fc 1296 new_stmt = gimple_build_assign (val, new_tree);
726a989a 1297 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
bbc8a8dc
ZD
1298 return;
1299 }
b8698a0f 1300
bbc8a8dc
ZD
1301 /* Since the reference is of gimple_reg type, it should only
1302 appear as lhs or rhs of modify statement. */
726a989a
RB
1303 gcc_assert (is_gimple_assign (stmt));
1304
1305 bsi = gsi_for_stmt (stmt);
bbc8a8dc 1306
82d6e6fc 1307 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
bbc8a8dc
ZD
1308 if (!set)
1309 {
1310 gcc_assert (!in_lhs);
82d6e6fc 1311 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
726a989a 1312 stmt = gsi_stmt (bsi);
bbc8a8dc
ZD
1313 update_stmt (stmt);
1314 return;
1315 }
1316
bbc8a8dc
ZD
1317 if (in_lhs)
1318 {
726a989a 1319 /* We have statement
b8698a0f 1320
726a989a 1321 OLD = VAL
bbc8a8dc 1322
726a989a
RB
1323 If OLD is a memory reference, then VAL is gimple_val, and we transform
1324 this to
bbc8a8dc
ZD
1325
1326 OLD = VAL
1327 NEW = VAL
1328
b8698a0f 1329 Otherwise, we are replacing a combination chain,
726a989a
RB
1330 VAL is the expression that performs the combination, and OLD is an
1331 SSA name. In this case, we transform the assignment to
1332
1333 OLD = VAL
1334 NEW = OLD
1335
1336 */
1337
1338 val = gimple_assign_lhs (stmt);
1339 if (TREE_CODE (val) != SSA_NAME)
1340 {
726a989a 1341 val = gimple_assign_rhs1 (stmt);
7e8b1c4b
JJ
1342 gcc_assert (gimple_assign_single_p (stmt));
1343 if (TREE_CLOBBER_P (val))
32244553 1344 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
7e8b1c4b
JJ
1345 else
1346 gcc_assert (gimple_assign_copy_p (stmt));
726a989a 1347 }
bbc8a8dc
ZD
1348 }
1349 else
1350 {
bbc8a8dc
ZD
1351 /* VAL = OLD
1352
1353 is transformed to
1354
1355 VAL = OLD
1356 NEW = VAL */
726a989a
RB
1357
1358 val = gimple_assign_lhs (stmt);
bbc8a8dc
ZD
1359 }
1360
82d6e6fc 1361 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
726a989a 1362 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
bbc8a8dc
ZD
1363}
1364
9f2b860b
RB
1365/* Returns a memory reference to DR in the ITER-th iteration of
1366 the loop it was analyzed in. Append init stmts to STMTS. */
1367
b1ad9be2 1368static tree
9f2b860b
RB
1369ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1370{
1371 tree off = DR_OFFSET (dr);
1372 tree coff = DR_INIT (dr);
b1ad9be2
BE
1373 tree ref = DR_REF (dr);
1374 enum tree_code ref_code = ERROR_MARK;
1375 tree ref_type = NULL_TREE;
1376 tree ref_op1 = NULL_TREE;
1377 tree ref_op2 = NULL_TREE;
9f2b860b
RB
1378 if (iter == 0)
1379 ;
1380 else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1381 coff = size_binop (PLUS_EXPR, coff,
1382 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
bbc8a8dc 1383 else
9f2b860b
RB
1384 off = size_binop (PLUS_EXPR, off,
1385 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
cb3d1e3e
RB
1386 /* While data-ref analysis punts on bit offsets it still handles
1387 bitfield accesses at byte boundaries. Cope with that. Note that
b1ad9be2
BE
1388 if the bitfield object also starts at a byte-boundary we can simply
1389 replicate the COMPONENT_REF, but we have to subtract the component's
1390 byte-offset from the MEM_REF address first.
1391 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
cb3d1e3e 1392 start at offset zero. */
b1ad9be2
BE
1393 if (TREE_CODE (ref) == COMPONENT_REF
1394 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
cb3d1e3e 1395 {
b1ad9be2
BE
1396 unsigned HOST_WIDE_INT boff;
1397 tree field = TREE_OPERAND (ref, 1);
1398 tree offset = component_ref_field_offset (ref);
1399 ref_type = TREE_TYPE (ref);
1400 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1401 /* This can occur in Ada. See the comment in get_bit_range. */
1402 if (boff % BITS_PER_UNIT != 0
1403 || !tree_fits_uhwi_p (offset))
1404 {
1405 ref_code = BIT_FIELD_REF;
1406 ref_op1 = DECL_SIZE (field);
1407 ref_op2 = bitsize_zero_node;
1408 }
1409 else
1410 {
1411 boff >>= LOG2_BITS_PER_UNIT;
1412 boff += tree_to_uhwi (offset);
1413 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1414 ref_code = COMPONENT_REF;
1415 ref_op1 = field;
1416 ref_op2 = TREE_OPERAND (ref, 2);
1417 ref = TREE_OPERAND (ref, 0);
1418 }
cb3d1e3e 1419 }
b1ad9be2
BE
1420 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1421 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1422 is_gimple_mem_ref_addr, NULL_TREE);
1423 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1424 tree type = build_aligned_type (TREE_TYPE (ref),
1425 get_object_alignment (ref));
1426 ref = build2 (MEM_REF, type, addr, alias_ptr);
1427 if (ref_type)
1428 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1429 return ref;
bbc8a8dc
ZD
1430}
1431
1432/* Get the initialization expression for the INDEX-th temporary variable
1433 of CHAIN. */
1434
1435static tree
1436get_init_expr (chain_p chain, unsigned index)
1437{
1438 if (chain->type == CT_COMBINATION)
1439 {
1440 tree e1 = get_init_expr (chain->ch1, index);
1441 tree e2 = get_init_expr (chain->ch2, index);
1442
82d6e6fc 1443 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
bbc8a8dc
ZD
1444 }
1445 else
9771b263 1446 return chain->inits[index];
bbc8a8dc
ZD
1447}
1448
2664efb6
ZD
1449/* Returns a new temporary variable used for the I-th variable carrying
1450 value of REF. The variable's uid is marked in TMP_VARS. */
1451
1452static tree
1453predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1454{
1455 tree type = TREE_TYPE (ref);
2664efb6
ZD
1456 /* We never access the components of the temporary variable in predictive
1457 commoning. */
acd63801 1458 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
2664efb6
ZD
1459 bitmap_set_bit (tmp_vars, DECL_UID (var));
1460 return var;
1461}
1462
bbc8a8dc
ZD
1463/* Creates the variables for CHAIN, as well as phi nodes for them and
1464 initialization on entry to LOOP. Uids of the newly created
1465 temporary variables are marked in TMP_VARS. */
1466
1467static void
1468initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1469{
1470 unsigned i;
1471 unsigned n = chain->length;
1472 dref root = get_chain_root (chain);
1473 bool reuse_first = !chain->has_max_use_after;
726a989a 1474 tree ref, init, var, next;
538dd0b7 1475 gphi *phi;
726a989a 1476 gimple_seq stmts;
bbc8a8dc
ZD
1477 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1478
1479 /* If N == 0, then all the references are within the single iteration. And
1480 since this is an nonempty chain, reuse_first cannot be true. */
1481 gcc_assert (n > 0 || !reuse_first);
1482
9771b263 1483 chain->vars.create (n + 1);
bbc8a8dc
ZD
1484
1485 if (chain->type == CT_COMBINATION)
726a989a 1486 ref = gimple_assign_lhs (root->stmt);
bbc8a8dc
ZD
1487 else
1488 ref = DR_REF (root->ref);
1489
1490 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1491 {
2664efb6 1492 var = predcom_tmp_var (ref, i, tmp_vars);
9771b263 1493 chain->vars.quick_push (var);
bbc8a8dc
ZD
1494 }
1495 if (reuse_first)
9771b263 1496 chain->vars.quick_push (chain->vars[0]);
b8698a0f 1497
9771b263 1498 FOR_EACH_VEC_ELT (chain->vars, i, var)
b731b390 1499 chain->vars[i] = make_ssa_name (var);
bbc8a8dc
ZD
1500
1501 for (i = 0; i < n; i++)
1502 {
9771b263
DN
1503 var = chain->vars[i];
1504 next = chain->vars[i + 1];
bbc8a8dc
ZD
1505 init = get_init_expr (chain, i);
1506
1507 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1508 if (stmts)
5006671f 1509 gsi_insert_seq_on_edge_immediate (entry, stmts);
bbc8a8dc
ZD
1510
1511 phi = create_phi_node (var, loop->header);
9e227d60
DC
1512 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1513 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
bbc8a8dc
ZD
1514 }
1515}
1516
1517/* Create the variables and initialization statement for root of chain
1518 CHAIN. Uids of the newly created temporary variables are marked
1519 in TMP_VARS. */
1520
1521static void
1522initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1523{
1524 dref root = get_chain_root (chain);
1525 bool in_lhs = (chain->type == CT_STORE_LOAD
1526 || chain->type == CT_COMBINATION);
1527
1528 initialize_root_vars (loop, chain, tmp_vars);
1529 replace_ref_with (root->stmt,
9771b263 1530 chain->vars[chain->length],
bbc8a8dc
ZD
1531 true, in_lhs);
1532}
1533
1534/* Initializes a variable for load motion for ROOT and prepares phi nodes and
1535 initialization on entry to LOOP if necessary. The ssa name for the variable
1536 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1537 around the loop is created. Uid of the newly created temporary variable
1538 is marked in TMP_VARS. INITS is the list containing the (single)
1539 initializer. */
1540
1541static void
1542initialize_root_vars_lm (struct loop *loop, dref root, bool written,
9771b263 1543 vec<tree> *vars, vec<tree> inits,
bbc8a8dc
ZD
1544 bitmap tmp_vars)
1545{
1546 unsigned i;
726a989a
RB
1547 tree ref = DR_REF (root->ref), init, var, next;
1548 gimple_seq stmts;
538dd0b7 1549 gphi *phi;
bbc8a8dc
ZD
1550 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1551
1552 /* Find the initializer for the variable, and check that it cannot
1553 trap. */
9771b263 1554 init = inits[0];
bbc8a8dc 1555
9771b263 1556 vars->create (written ? 2 : 1);
2664efb6 1557 var = predcom_tmp_var (ref, 0, tmp_vars);
9771b263 1558 vars->quick_push (var);
bbc8a8dc 1559 if (written)
9771b263 1560 vars->quick_push ((*vars)[0]);
b8698a0f 1561
9771b263 1562 FOR_EACH_VEC_ELT (*vars, i, var)
b731b390 1563 (*vars)[i] = make_ssa_name (var);
bbc8a8dc 1564
9771b263 1565 var = (*vars)[0];
b8698a0f 1566
bbc8a8dc
ZD
1567 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1568 if (stmts)
5006671f 1569 gsi_insert_seq_on_edge_immediate (entry, stmts);
bbc8a8dc
ZD
1570
1571 if (written)
1572 {
9771b263 1573 next = (*vars)[1];
bbc8a8dc 1574 phi = create_phi_node (var, loop->header);
9e227d60
DC
1575 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1576 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
bbc8a8dc
ZD
1577 }
1578 else
1579 {
538dd0b7 1580 gassign *init_stmt = gimple_build_assign (var, init);
726a989a 1581 gsi_insert_on_edge_immediate (entry, init_stmt);
bbc8a8dc
ZD
1582 }
1583}
1584
1585
1586/* Execute load motion for references in chain CHAIN. Uids of the newly
1587 created temporary variables are marked in TMP_VARS. */
1588
1589static void
1590execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1591{
ef062b13 1592 auto_vec<tree> vars;
bbc8a8dc
ZD
1593 dref a;
1594 unsigned n_writes = 0, ridx, i;
1595 tree var;
1596
1597 gcc_assert (chain->type == CT_INVARIANT);
1598 gcc_assert (!chain->combined);
9771b263 1599 FOR_EACH_VEC_ELT (chain->refs, i, a)
b0af49c4 1600 if (DR_IS_WRITE (a->ref))
bbc8a8dc 1601 n_writes++;
b8698a0f 1602
bbc8a8dc 1603 /* If there are no reads in the loop, there is nothing to do. */
9771b263 1604 if (n_writes == chain->refs.length ())
bbc8a8dc
ZD
1605 return;
1606
1607 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1608 &vars, chain->inits, tmp_vars);
1609
1610 ridx = 0;
9771b263 1611 FOR_EACH_VEC_ELT (chain->refs, i, a)
bbc8a8dc
ZD
1612 {
1613 bool is_read = DR_IS_READ (a->ref);
bbc8a8dc 1614
b0af49c4 1615 if (DR_IS_WRITE (a->ref))
bbc8a8dc
ZD
1616 {
1617 n_writes--;
1618 if (n_writes)
1619 {
9771b263 1620 var = vars[0];
b731b390 1621 var = make_ssa_name (SSA_NAME_VAR (var));
9771b263 1622 vars[0] = var;
bbc8a8dc
ZD
1623 }
1624 else
1625 ridx = 1;
1626 }
b8698a0f 1627
9771b263 1628 replace_ref_with (a->stmt, vars[ridx],
bbc8a8dc
ZD
1629 !is_read, !is_read);
1630 }
bbc8a8dc
ZD
1631}
1632
1633/* Returns the single statement in that NAME is used, excepting
1634 the looparound phi nodes contained in one of the chains. If there is no
726a989a 1635 such statement, or more statements, NULL is returned. */
bbc8a8dc 1636
355fe088 1637static gimple *
bbc8a8dc
ZD
1638single_nonlooparound_use (tree name)
1639{
1640 use_operand_p use;
1641 imm_use_iterator it;
355fe088 1642 gimple *stmt, *ret = NULL;
bbc8a8dc
ZD
1643
1644 FOR_EACH_IMM_USE_FAST (use, it, name)
1645 {
1646 stmt = USE_STMT (use);
1647
726a989a 1648 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
1649 {
1650 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1651 could not be processed anyway, so just fail for them. */
1652 if (bitmap_bit_p (looparound_phis,
1653 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1654 continue;
1655
726a989a 1656 return NULL;
bbc8a8dc 1657 }
b63f974e
JJ
1658 else if (is_gimple_debug (stmt))
1659 continue;
726a989a
RB
1660 else if (ret != NULL)
1661 return NULL;
bbc8a8dc
ZD
1662 else
1663 ret = stmt;
1664 }
1665
1666 return ret;
1667}
1668
1669/* Remove statement STMT, as well as the chain of assignments in that it is
1670 used. */
1671
1672static void
355fe088 1673remove_stmt (gimple *stmt)
bbc8a8dc 1674{
726a989a 1675 tree name;
355fe088 1676 gimple *next;
726a989a 1677 gimple_stmt_iterator psi;
bbc8a8dc 1678
726a989a 1679 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
1680 {
1681 name = PHI_RESULT (stmt);
1682 next = single_nonlooparound_use (name);
273ccb6d 1683 reset_debug_uses (stmt);
726a989a
RB
1684 psi = gsi_for_stmt (stmt);
1685 remove_phi_node (&psi, true);
bbc8a8dc
ZD
1686
1687 if (!next
5f8ecf45 1688 || !gimple_assign_ssa_name_copy_p (next)
726a989a 1689 || gimple_assign_rhs1 (next) != name)
bbc8a8dc
ZD
1690 return;
1691
1692 stmt = next;
1693 }
1694
1695 while (1)
1696 {
726a989a 1697 gimple_stmt_iterator bsi;
b8698a0f 1698
726a989a 1699 bsi = gsi_for_stmt (stmt);
bbc8a8dc 1700
726a989a 1701 name = gimple_assign_lhs (stmt);
bbc8a8dc
ZD
1702 gcc_assert (TREE_CODE (name) == SSA_NAME);
1703
1704 next = single_nonlooparound_use (name);
273ccb6d 1705 reset_debug_uses (stmt);
bbc8a8dc 1706
13714310 1707 unlink_stmt_vdef (stmt);
726a989a 1708 gsi_remove (&bsi, true);
5f8ecf45 1709 release_defs (stmt);
bbc8a8dc
ZD
1710
1711 if (!next
5f8ecf45 1712 || !gimple_assign_ssa_name_copy_p (next)
726a989a 1713 || gimple_assign_rhs1 (next) != name)
bbc8a8dc
ZD
1714 return;
1715
1716 stmt = next;
1717 }
1718}
1719
1720/* Perform the predictive commoning optimization for a chain CHAIN.
1721 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1722
1723static void
1724execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1725 bitmap tmp_vars)
1726{
1727 unsigned i;
13714310 1728 dref a;
bbc8a8dc
ZD
1729 tree var;
1730
1731 if (chain->combined)
1732 {
1733 /* For combined chains, just remove the statements that are used to
a933d47f
RB
1734 compute the values of the expression (except for the root one).
1735 We delay this until after all chains are processed. */
bbc8a8dc
ZD
1736 }
1737 else
1738 {
1739 /* For non-combined chains, set up the variables that hold its value,
1740 and replace the uses of the original references by these
1741 variables. */
bbc8a8dc 1742 initialize_root (loop, chain, tmp_vars);
9771b263 1743 for (i = 1; chain->refs.iterate (i, &a); i++)
bbc8a8dc 1744 {
9771b263 1745 var = chain->vars[chain->length - a->distance];
bbc8a8dc
ZD
1746 replace_ref_with (a->stmt, var, false, false);
1747 }
1748 }
1749}
1750
1751/* Determines the unroll factor necessary to remove as many temporary variable
1752 copies as possible. CHAINS is the list of chains that will be
1753 optimized. */
1754
1755static unsigned
9771b263 1756determine_unroll_factor (vec<chain_p> chains)
bbc8a8dc
ZD
1757{
1758 chain_p chain;
1759 unsigned factor = 1, af, nfactor, i;
1760 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1761
9771b263 1762 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc 1763 {
8039a35d 1764 if (chain->type == CT_INVARIANT)
bbc8a8dc
ZD
1765 continue;
1766
8039a35d
RB
1767 if (chain->combined)
1768 {
1769 /* For combined chains, we can't handle unrolling if we replace
1770 looparound PHIs. */
1771 dref a;
1772 unsigned j;
1773 for (j = 1; chain->refs.iterate (j, &a); j++)
1774 if (gimple_code (a->stmt) == GIMPLE_PHI)
1775 return 1;
1776 continue;
1777 }
1778
bbc8a8dc
ZD
1779 /* The best unroll factor for this chain is equal to the number of
1780 temporary variables that we create for it. */
1781 af = chain->length;
1782 if (chain->has_max_use_after)
1783 af++;
1784
1785 nfactor = factor * af / gcd (factor, af);
1786 if (nfactor <= max)
1787 factor = nfactor;
1788 }
1789
1790 return factor;
1791}
1792
1793/* Perform the predictive commoning optimization for CHAINS.
1794 Uids of the newly created temporary variables are marked in TMP_VARS. */
1795
1796static void
9771b263 1797execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
bbc8a8dc
ZD
1798 bitmap tmp_vars)
1799{
1800 chain_p chain;
1801 unsigned i;
1802
9771b263 1803 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc
ZD
1804 {
1805 if (chain->type == CT_INVARIANT)
1806 execute_load_motion (loop, chain, tmp_vars);
1807 else
1808 execute_pred_commoning_chain (loop, chain, tmp_vars);
1809 }
b8698a0f 1810
a933d47f
RB
1811 FOR_EACH_VEC_ELT (chains, i, chain)
1812 {
1813 if (chain->type == CT_INVARIANT)
1814 ;
1815 else if (chain->combined)
1816 {
1817 /* For combined chains, just remove the statements that are used to
1818 compute the values of the expression (except for the root one). */
1819 dref a;
1820 unsigned j;
1821 for (j = 1; chain->refs.iterate (j, &a); j++)
1822 remove_stmt (a->stmt);
1823 }
1824 }
1825
bbc8a8dc
ZD
1826 update_ssa (TODO_update_ssa_only_virtuals);
1827}
1828
c80b4100 1829/* For each reference in CHAINS, if its defining statement is
726a989a 1830 phi node, record the ssa name that is defined by it. */
bbc8a8dc
ZD
1831
1832static void
9771b263 1833replace_phis_by_defined_names (vec<chain_p> chains)
bbc8a8dc
ZD
1834{
1835 chain_p chain;
1836 dref a;
1837 unsigned i, j;
1838
9771b263
DN
1839 FOR_EACH_VEC_ELT (chains, i, chain)
1840 FOR_EACH_VEC_ELT (chain->refs, j, a)
bbc8a8dc 1841 {
726a989a
RB
1842 if (gimple_code (a->stmt) == GIMPLE_PHI)
1843 {
1844 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1845 a->stmt = NULL;
1846 }
bbc8a8dc
ZD
1847 }
1848}
1849
726a989a
RB
1850/* For each reference in CHAINS, if name_defined_by_phi is not
1851 NULL, use it to set the stmt field. */
bbc8a8dc
ZD
1852
1853static void
9771b263 1854replace_names_by_phis (vec<chain_p> chains)
bbc8a8dc
ZD
1855{
1856 chain_p chain;
1857 dref a;
1858 unsigned i, j;
1859
9771b263
DN
1860 FOR_EACH_VEC_ELT (chains, i, chain)
1861 FOR_EACH_VEC_ELT (chain->refs, j, a)
726a989a 1862 if (a->stmt == NULL)
bbc8a8dc 1863 {
726a989a
RB
1864 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1865 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1866 a->name_defined_by_phi = NULL_TREE;
bbc8a8dc
ZD
1867 }
1868}
1869
1870/* Wrapper over execute_pred_commoning, to pass it as a callback
1871 to tree_transform_and_unroll_loop. */
1872
1873struct epcc_data
1874{
9771b263 1875 vec<chain_p> chains;
bbc8a8dc
ZD
1876 bitmap tmp_vars;
1877};
1878
1879static void
1880execute_pred_commoning_cbck (struct loop *loop, void *data)
1881{
3d9a9f94 1882 struct epcc_data *const dta = (struct epcc_data *) data;
bbc8a8dc
ZD
1883
1884 /* Restore phi nodes that were replaced by ssa names before
1885 tree_transform_and_unroll_loop (see detailed description in
1886 tree_predictive_commoning_loop). */
1887 replace_names_by_phis (dta->chains);
1888 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1889}
1890
bbc8a8dc
ZD
1891/* Base NAME and all the names in the chain of phi nodes that use it
1892 on variable VAR. The phi nodes are recognized by being in the copies of
1893 the header of the LOOP. */
1894
1895static void
1896base_names_in_chain_on (struct loop *loop, tree name, tree var)
1897{
355fe088 1898 gimple *stmt, *phi;
bbc8a8dc 1899 imm_use_iterator iter;
bbc8a8dc 1900
b2ec94d4 1901 replace_ssa_name_symbol (name, var);
bbc8a8dc
ZD
1902
1903 while (1)
1904 {
1905 phi = NULL;
1906 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1907 {
726a989a
RB
1908 if (gimple_code (stmt) == GIMPLE_PHI
1909 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
bbc8a8dc
ZD
1910 {
1911 phi = stmt;
1912 BREAK_FROM_IMM_USE_STMT (iter);
1913 }
1914 }
1915 if (!phi)
1916 return;
1917
bbc8a8dc 1918 name = PHI_RESULT (phi);
b2ec94d4 1919 replace_ssa_name_symbol (name, var);
bbc8a8dc
ZD
1920 }
1921}
1922
1923/* Given an unrolled LOOP after predictive commoning, remove the
1924 register copies arising from phi nodes by changing the base
1925 variables of SSA names. TMP_VARS is the set of the temporary variables
1926 for those we want to perform this. */
1927
1928static void
1929eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1930{
1931 edge e;
538dd0b7 1932 gphi *phi;
355fe088 1933 gimple *stmt;
726a989a 1934 tree name, use, var;
538dd0b7 1935 gphi_iterator psi;
bbc8a8dc
ZD
1936
1937 e = loop_latch_edge (loop);
726a989a 1938 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
bbc8a8dc 1939 {
538dd0b7 1940 phi = psi.phi ();
bbc8a8dc
ZD
1941 name = PHI_RESULT (phi);
1942 var = SSA_NAME_VAR (name);
70b5e7dc 1943 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
bbc8a8dc
ZD
1944 continue;
1945 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1946 gcc_assert (TREE_CODE (use) == SSA_NAME);
1947
1948 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1949 stmt = SSA_NAME_DEF_STMT (use);
726a989a 1950 while (gimple_code (stmt) == GIMPLE_PHI
1b0cfaa6
ZD
1951 /* In case we could not unroll the loop enough to eliminate
1952 all copies, we may reach the loop header before the defining
1953 statement (in that case, some register copies will be present
1954 in loop latch in the final code, corresponding to the newly
1955 created looparound phi nodes). */
726a989a 1956 && gimple_bb (stmt) != loop->header)
bbc8a8dc 1957 {
726a989a 1958 gcc_assert (single_pred_p (gimple_bb (stmt)));
bbc8a8dc
ZD
1959 use = PHI_ARG_DEF (stmt, 0);
1960 stmt = SSA_NAME_DEF_STMT (use);
1961 }
1962
1963 base_names_in_chain_on (loop, use, var);
1964 }
1965}
1966
1967/* Returns true if CHAIN is suitable to be combined. */
1968
1969static bool
1970chain_can_be_combined_p (chain_p chain)
1971{
1972 return (!chain->combined
1973 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1974}
1975
1976/* Returns the modify statement that uses NAME. Skips over assignment
1977 statements, NAME is replaced with the actual name used in the returned
1978 statement. */
1979
355fe088 1980static gimple *
bbc8a8dc
ZD
1981find_use_stmt (tree *name)
1982{
355fe088 1983 gimple *stmt;
726a989a 1984 tree rhs, lhs;
bbc8a8dc
ZD
1985
1986 /* Skip over assignments. */
1987 while (1)
1988 {
1989 stmt = single_nonlooparound_use (*name);
1990 if (!stmt)
726a989a 1991 return NULL;
bbc8a8dc 1992
726a989a
RB
1993 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1994 return NULL;
bbc8a8dc 1995
726a989a 1996 lhs = gimple_assign_lhs (stmt);
bbc8a8dc 1997 if (TREE_CODE (lhs) != SSA_NAME)
726a989a 1998 return NULL;
bbc8a8dc 1999
726a989a
RB
2000 if (gimple_assign_copy_p (stmt))
2001 {
2002 rhs = gimple_assign_rhs1 (stmt);
2003 if (rhs != *name)
2004 return NULL;
bbc8a8dc 2005
726a989a
RB
2006 *name = lhs;
2007 }
2008 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2009 == GIMPLE_BINARY_RHS)
2010 return stmt;
2011 else
2012 return NULL;
bbc8a8dc 2013 }
bbc8a8dc
ZD
2014}
2015
2016/* Returns true if we may perform reassociation for operation CODE in TYPE. */
2017
2018static bool
2019may_reassociate_p (tree type, enum tree_code code)
2020{
2021 if (FLOAT_TYPE_P (type)
2022 && !flag_unsafe_math_optimizations)
2023 return false;
2024
2025 return (commutative_tree_code (code)
2026 && associative_tree_code (code));
2027}
2028
2029/* If the operation used in STMT is associative and commutative, go through the
2030 tree of the same operations and returns its root. Distance to the root
2031 is stored in DISTANCE. */
2032
355fe088
TS
2033static gimple *
2034find_associative_operation_root (gimple *stmt, unsigned *distance)
bbc8a8dc 2035{
726a989a 2036 tree lhs;
355fe088 2037 gimple *next;
726a989a
RB
2038 enum tree_code code = gimple_assign_rhs_code (stmt);
2039 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
bbc8a8dc
ZD
2040 unsigned dist = 0;
2041
726a989a
RB
2042 if (!may_reassociate_p (type, code))
2043 return NULL;
bbc8a8dc
ZD
2044
2045 while (1)
2046 {
726a989a 2047 lhs = gimple_assign_lhs (stmt);
bbc8a8dc
ZD
2048 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2049
2050 next = find_use_stmt (&lhs);
726a989a
RB
2051 if (!next
2052 || gimple_assign_rhs_code (next) != code)
bbc8a8dc
ZD
2053 break;
2054
2055 stmt = next;
2056 dist++;
2057 }
2058
2059 if (distance)
2060 *distance = dist;
2061 return stmt;
2062}
2063
2064/* Returns the common statement in that NAME1 and NAME2 have a use. If there
2065 is no such statement, returns NULL_TREE. In case the operation used on
c80b4100 2066 NAME1 and NAME2 is associative and commutative, returns the root of the
bbc8a8dc
ZD
2067 tree formed by this operation instead of the statement that uses NAME1 or
2068 NAME2. */
2069
355fe088 2070static gimple *
bbc8a8dc
ZD
2071find_common_use_stmt (tree *name1, tree *name2)
2072{
355fe088 2073 gimple *stmt1, *stmt2;
bbc8a8dc
ZD
2074
2075 stmt1 = find_use_stmt (name1);
2076 if (!stmt1)
726a989a 2077 return NULL;
bbc8a8dc
ZD
2078
2079 stmt2 = find_use_stmt (name2);
2080 if (!stmt2)
726a989a 2081 return NULL;
bbc8a8dc
ZD
2082
2083 if (stmt1 == stmt2)
2084 return stmt1;
2085
2086 stmt1 = find_associative_operation_root (stmt1, NULL);
2087 if (!stmt1)
726a989a 2088 return NULL;
bbc8a8dc
ZD
2089 stmt2 = find_associative_operation_root (stmt2, NULL);
2090 if (!stmt2)
726a989a 2091 return NULL;
bbc8a8dc 2092
726a989a 2093 return (stmt1 == stmt2 ? stmt1 : NULL);
bbc8a8dc
ZD
2094}
2095
2096/* Checks whether R1 and R2 are combined together using CODE, with the result
2097 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2098 if it is true. If CODE is ERROR_MARK, set these values instead. */
2099
2100static bool
2101combinable_refs_p (dref r1, dref r2,
2102 enum tree_code *code, bool *swap, tree *rslt_type)
2103{
2104 enum tree_code acode;
2105 bool aswap;
2106 tree atype;
726a989a 2107 tree name1, name2;
355fe088 2108 gimple *stmt;
bbc8a8dc
ZD
2109
2110 name1 = name_for_ref (r1);
2111 name2 = name_for_ref (r2);
2112 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2113
2114 stmt = find_common_use_stmt (&name1, &name2);
2115
7906dbe4
RB
2116 if (!stmt
2117 /* A simple post-dominance check - make sure the combination
2118 is executed under the same condition as the references. */
2119 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2120 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
bbc8a8dc
ZD
2121 return false;
2122
726a989a 2123 acode = gimple_assign_rhs_code (stmt);
bbc8a8dc 2124 aswap = (!commutative_tree_code (acode)
726a989a
RB
2125 && gimple_assign_rhs1 (stmt) != name1);
2126 atype = TREE_TYPE (gimple_assign_lhs (stmt));
bbc8a8dc
ZD
2127
2128 if (*code == ERROR_MARK)
2129 {
2130 *code = acode;
2131 *swap = aswap;
2132 *rslt_type = atype;
2133 return true;
2134 }
2135
2136 return (*code == acode
2137 && *swap == aswap
2138 && *rslt_type == atype);
2139}
2140
2141/* Remove OP from the operation on rhs of STMT, and replace STMT with
2142 an assignment of the remaining operand. */
2143
2144static void
355fe088 2145remove_name_from_operation (gimple *stmt, tree op)
bbc8a8dc 2146{
726a989a
RB
2147 tree other_op;
2148 gimple_stmt_iterator si;
bbc8a8dc 2149
726a989a 2150 gcc_assert (is_gimple_assign (stmt));
bbc8a8dc 2151
726a989a
RB
2152 if (gimple_assign_rhs1 (stmt) == op)
2153 other_op = gimple_assign_rhs2 (stmt);
bbc8a8dc 2154 else
726a989a
RB
2155 other_op = gimple_assign_rhs1 (stmt);
2156
2157 si = gsi_for_stmt (stmt);
2158 gimple_assign_set_rhs_from_tree (&si, other_op);
2159
2160 /* We should not have reallocated STMT. */
2161 gcc_assert (gsi_stmt (si) == stmt);
2162
bbc8a8dc
ZD
2163 update_stmt (stmt);
2164}
2165
2166/* Reassociates the expression in that NAME1 and NAME2 are used so that they
2167 are combined in a single statement, and returns this statement. */
2168
355fe088 2169static gimple *
bbc8a8dc
ZD
2170reassociate_to_the_same_stmt (tree name1, tree name2)
2171{
355fe088 2172 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
538dd0b7 2173 gassign *new_stmt, *tmp_stmt;
726a989a 2174 tree new_name, tmp_name, var, r1, r2;
bbc8a8dc
ZD
2175 unsigned dist1, dist2;
2176 enum tree_code code;
2177 tree type = TREE_TYPE (name1);
726a989a 2178 gimple_stmt_iterator bsi;
bbc8a8dc
ZD
2179
2180 stmt1 = find_use_stmt (&name1);
2181 stmt2 = find_use_stmt (&name2);
2182 root1 = find_associative_operation_root (stmt1, &dist1);
2183 root2 = find_associative_operation_root (stmt2, &dist2);
726a989a 2184 code = gimple_assign_rhs_code (stmt1);
bbc8a8dc
ZD
2185
2186 gcc_assert (root1 && root2 && root1 == root2
726a989a 2187 && code == gimple_assign_rhs_code (stmt2));
bbc8a8dc
ZD
2188
2189 /* Find the root of the nearest expression in that both NAME1 and NAME2
2190 are used. */
2191 r1 = name1;
2192 s1 = stmt1;
2193 r2 = name2;
2194 s2 = stmt2;
2195
2196 while (dist1 > dist2)
2197 {
2198 s1 = find_use_stmt (&r1);
726a989a 2199 r1 = gimple_assign_lhs (s1);
bbc8a8dc
ZD
2200 dist1--;
2201 }
2202 while (dist2 > dist1)
2203 {
2204 s2 = find_use_stmt (&r2);
726a989a 2205 r2 = gimple_assign_lhs (s2);
bbc8a8dc
ZD
2206 dist2--;
2207 }
2208
2209 while (s1 != s2)
2210 {
2211 s1 = find_use_stmt (&r1);
726a989a 2212 r1 = gimple_assign_lhs (s1);
bbc8a8dc 2213 s2 = find_use_stmt (&r2);
726a989a 2214 r2 = gimple_assign_lhs (s2);
bbc8a8dc
ZD
2215 }
2216
2217 /* Remove NAME1 and NAME2 from the statements in that they are used
2218 currently. */
2219 remove_name_from_operation (stmt1, name1);
2220 remove_name_from_operation (stmt2, name2);
2221
2222 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2223 combine it with the rhs of S1. */
acd63801 2224 var = create_tmp_reg (type, "predreastmp");
b731b390 2225 new_name = make_ssa_name (var);
0d0e4a03 2226 new_stmt = gimple_build_assign (new_name, code, name1, name2);
bbc8a8dc 2227
acd63801 2228 var = create_tmp_reg (type, "predreastmp");
b731b390 2229 tmp_name = make_ssa_name (var);
726a989a
RB
2230
2231 /* Rhs of S1 may now be either a binary expression with operation
2232 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2233 so that name1 or name2 was removed from it). */
0d0e4a03
JJ
2234 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2235 gimple_assign_rhs1 (s1),
2236 gimple_assign_rhs2 (s1));
726a989a
RB
2237
2238 bsi = gsi_for_stmt (s1);
2239 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2240 s1 = gsi_stmt (bsi);
bbc8a8dc
ZD
2241 update_stmt (s1);
2242
726a989a
RB
2243 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2244 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
bbc8a8dc
ZD
2245
2246 return new_stmt;
2247}
2248
2249/* Returns the statement that combines references R1 and R2. In case R1
2250 and R2 are not used in the same statement, but they are used with an
2251 associative and commutative operation in the same expression, reassociate
2252 the expression so that they are used in the same statement. */
2253
355fe088 2254static gimple *
bbc8a8dc
ZD
2255stmt_combining_refs (dref r1, dref r2)
2256{
355fe088 2257 gimple *stmt1, *stmt2;
bbc8a8dc
ZD
2258 tree name1 = name_for_ref (r1);
2259 tree name2 = name_for_ref (r2);
2260
2261 stmt1 = find_use_stmt (&name1);
2262 stmt2 = find_use_stmt (&name2);
2263 if (stmt1 == stmt2)
2264 return stmt1;
2265
2266 return reassociate_to_the_same_stmt (name1, name2);
2267}
2268
2269/* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2270 description of the new chain is returned, otherwise we return NULL. */
2271
2272static chain_p
2273combine_chains (chain_p ch1, chain_p ch2)
2274{
2275 dref r1, r2, nw;
2276 enum tree_code op = ERROR_MARK;
2277 bool swap = false;
2278 chain_p new_chain;
2279 unsigned i;
355fe088 2280 gimple *root_stmt;
bbc8a8dc
ZD
2281 tree rslt_type = NULL_TREE;
2282
2283 if (ch1 == ch2)
a90352a0 2284 return NULL;
bbc8a8dc
ZD
2285 if (ch1->length != ch2->length)
2286 return NULL;
2287
9771b263 2288 if (ch1->refs.length () != ch2->refs.length ())
bbc8a8dc
ZD
2289 return NULL;
2290
9771b263
DN
2291 for (i = 0; (ch1->refs.iterate (i, &r1)
2292 && ch2->refs.iterate (i, &r2)); i++)
bbc8a8dc
ZD
2293 {
2294 if (r1->distance != r2->distance)
2295 return NULL;
2296
2297 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2298 return NULL;
2299 }
2300
2301 if (swap)
6b4db501 2302 std::swap (ch1, ch2);
bbc8a8dc
ZD
2303
2304 new_chain = XCNEW (struct chain);
2305 new_chain->type = CT_COMBINATION;
82d6e6fc 2306 new_chain->op = op;
bbc8a8dc
ZD
2307 new_chain->ch1 = ch1;
2308 new_chain->ch2 = ch2;
2309 new_chain->rslt_type = rslt_type;
2310 new_chain->length = ch1->length;
2311
9771b263
DN
2312 for (i = 0; (ch1->refs.iterate (i, &r1)
2313 && ch2->refs.iterate (i, &r2)); i++)
bbc8a8dc 2314 {
7e5487a2 2315 nw = XCNEW (struct dref_d);
bbc8a8dc
ZD
2316 nw->stmt = stmt_combining_refs (r1, r2);
2317 nw->distance = r1->distance;
2318
9771b263 2319 new_chain->refs.safe_push (nw);
bbc8a8dc
ZD
2320 }
2321
2322 new_chain->has_max_use_after = false;
2323 root_stmt = get_chain_root (new_chain)->stmt;
9771b263 2324 for (i = 1; new_chain->refs.iterate (i, &nw); i++)
bbc8a8dc
ZD
2325 {
2326 if (nw->distance == new_chain->length
2327 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2328 {
2329 new_chain->has_max_use_after = true;
2330 break;
2331 }
2332 }
2333
2334 ch1->combined = true;
2335 ch2->combined = true;
2336 return new_chain;
2337}
2338
2339/* Try to combine the CHAINS. */
2340
2341static void
9771b263 2342try_combine_chains (vec<chain_p> *chains)
bbc8a8dc
ZD
2343{
2344 unsigned i, j;
2345 chain_p ch1, ch2, cch;
ef062b13 2346 auto_vec<chain_p> worklist;
bbc8a8dc 2347
9771b263 2348 FOR_EACH_VEC_ELT (*chains, i, ch1)
bbc8a8dc 2349 if (chain_can_be_combined_p (ch1))
9771b263 2350 worklist.safe_push (ch1);
bbc8a8dc 2351
9771b263 2352 while (!worklist.is_empty ())
bbc8a8dc 2353 {
9771b263 2354 ch1 = worklist.pop ();
bbc8a8dc
ZD
2355 if (!chain_can_be_combined_p (ch1))
2356 continue;
2357
9771b263 2358 FOR_EACH_VEC_ELT (*chains, j, ch2)
bbc8a8dc
ZD
2359 {
2360 if (!chain_can_be_combined_p (ch2))
2361 continue;
2362
2363 cch = combine_chains (ch1, ch2);
2364 if (cch)
2365 {
9771b263
DN
2366 worklist.safe_push (cch);
2367 chains->safe_push (cch);
bbc8a8dc
ZD
2368 break;
2369 }
2370 }
2371 }
2372}
2373
bbc8a8dc
ZD
2374/* Prepare initializers for CHAIN in LOOP. Returns false if this is
2375 impossible because one of these initializers may trap, true otherwise. */
2376
2377static bool
2378prepare_initializers_chain (struct loop *loop, chain_p chain)
2379{
2380 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2381 struct data_reference *dr = get_chain_root (chain)->ref;
726a989a 2382 tree init;
bbc8a8dc
ZD
2383 dref laref;
2384 edge entry = loop_preheader_edge (loop);
2385
2386 /* Find the initializers for the variables, and check that they cannot
2387 trap. */
9771b263 2388 chain->inits.create (n);
bbc8a8dc 2389 for (i = 0; i < n; i++)
9771b263 2390 chain->inits.quick_push (NULL_TREE);
bbc8a8dc
ZD
2391
2392 /* If we have replaced some looparound phi nodes, use their initializers
2393 instead of creating our own. */
9771b263 2394 FOR_EACH_VEC_ELT (chain->refs, i, laref)
bbc8a8dc 2395 {
726a989a 2396 if (gimple_code (laref->stmt) != GIMPLE_PHI)
bbc8a8dc
ZD
2397 continue;
2398
2399 gcc_assert (laref->distance > 0);
9771b263
DN
2400 chain->inits[n - laref->distance]
2401 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
bbc8a8dc
ZD
2402 }
2403
2404 for (i = 0; i < n; i++)
2405 {
7ec44c3d
RB
2406 gimple_seq stmts = NULL;
2407
9771b263 2408 if (chain->inits[i] != NULL_TREE)
bbc8a8dc
ZD
2409 continue;
2410
9f2b860b 2411 init = ref_at_iteration (dr, (int) i - n, &stmts);
bbc8a8dc 2412 if (!chain->all_always_accessed && tree_could_trap_p (init))
7ec44c3d
RB
2413 {
2414 gimple_seq_discard (stmts);
2415 return false;
2416 }
bbc8a8dc 2417
bbc8a8dc 2418 if (stmts)
5006671f 2419 gsi_insert_seq_on_edge_immediate (entry, stmts);
bbc8a8dc 2420
9771b263 2421 chain->inits[i] = init;
bbc8a8dc
ZD
2422 }
2423
2424 return true;
2425}
2426
2427/* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2428 be used because the initializers might trap. */
2429
2430static void
9771b263 2431prepare_initializers (struct loop *loop, vec<chain_p> chains)
bbc8a8dc
ZD
2432{
2433 chain_p chain;
2434 unsigned i;
2435
9771b263 2436 for (i = 0; i < chains.length (); )
bbc8a8dc 2437 {
9771b263 2438 chain = chains[i];
bbc8a8dc
ZD
2439 if (prepare_initializers_chain (loop, chain))
2440 i++;
2441 else
2442 {
2443 release_chain (chain);
9771b263 2444 chains.unordered_remove (i);
bbc8a8dc
ZD
2445 }
2446 }
2447}
2448
2449/* Performs predictive commoning for LOOP. Returns true if LOOP was
2450 unrolled. */
2451
2452static bool
2453tree_predictive_commoning_loop (struct loop *loop)
2454{
9771b263
DN
2455 vec<data_reference_p> datarefs;
2456 vec<ddr_p> dependences;
bbc8a8dc 2457 struct component *components;
6e1aa848 2458 vec<chain_p> chains = vNULL;
bbc8a8dc
ZD
2459 unsigned unroll_factor;
2460 struct tree_niter_desc desc;
2461 bool unroll = false;
2462 edge exit;
2463 bitmap tmp_vars;
2464
2465 if (dump_file && (dump_flags & TDF_DETAILS))
2466 fprintf (dump_file, "Processing loop %d\n", loop->num);
2467
2468 /* Find the data references and split them into components according to their
2469 dependence relations. */
00f96dc9 2470 auto_vec<loop_p, 3> loop_nest;
9771b263 2471 dependences.create (10);
07687835 2472 datarefs.create (10);
9ca3d00e
AB
2473 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2474 &dependences))
2475 {
2476 if (dump_file && (dump_flags & TDF_DETAILS))
2477 fprintf (dump_file, "Cannot analyze data dependencies\n");
9ca3d00e
AB
2478 free_data_refs (datarefs);
2479 free_dependence_relations (dependences);
2480 return false;
2481 }
2482
bbc8a8dc
ZD
2483 if (dump_file && (dump_flags & TDF_DETAILS))
2484 dump_data_dependence_relations (dump_file, dependences);
2485
2486 components = split_data_refs_to_components (loop, datarefs, dependences);
9771b263 2487 loop_nest.release ();
bbc8a8dc
ZD
2488 free_dependence_relations (dependences);
2489 if (!components)
2490 {
2491 free_data_refs (datarefs);
4f87d581 2492 free_affine_expand_cache (&name_expansions);
bbc8a8dc
ZD
2493 return false;
2494 }
2495
2496 if (dump_file && (dump_flags & TDF_DETAILS))
2497 {
2498 fprintf (dump_file, "Initial state:\n\n");
2499 dump_components (dump_file, components);
2500 }
2501
2502 /* Find the suitable components and split them into chains. */
2503 components = filter_suitable_components (loop, components);
2504
2505 tmp_vars = BITMAP_ALLOC (NULL);
2506 looparound_phis = BITMAP_ALLOC (NULL);
2507 determine_roots (loop, components, &chains);
2508 release_components (components);
2509
9771b263 2510 if (!chains.exists ())
bbc8a8dc
ZD
2511 {
2512 if (dump_file && (dump_flags & TDF_DETAILS))
2513 fprintf (dump_file,
2514 "Predictive commoning failed: no suitable chains\n");
2515 goto end;
2516 }
2517 prepare_initializers (loop, chains);
2518
2519 /* Try to combine the chains that are always worked with together. */
2520 try_combine_chains (&chains);
2521
2522 if (dump_file && (dump_flags & TDF_DETAILS))
2523 {
2524 fprintf (dump_file, "Before commoning:\n\n");
2525 dump_chains (dump_file, chains);
2526 }
2527
2528 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2529 that its number of iterations is divisible by the factor. */
2530 unroll_factor = determine_unroll_factor (chains);
2531 scev_reset ();
e1e6dc73
RG
2532 unroll = (unroll_factor > 1
2533 && can_unroll_loop_p (loop, unroll_factor, &desc));
bbc8a8dc
ZD
2534 exit = single_dom_exit (loop);
2535
2536 /* Execute the predictive commoning transformations, and possibly unroll the
2537 loop. */
2538 if (unroll)
2539 {
2540 struct epcc_data dta;
2541
2542 if (dump_file && (dump_flags & TDF_DETAILS))
2543 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2544
2545 dta.chains = chains;
2546 dta.tmp_vars = tmp_vars;
b8698a0f 2547
bbc8a8dc
ZD
2548 update_ssa (TODO_update_ssa_only_virtuals);
2549
2550 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2551 execute_pred_commoning_cbck is called may cause phi nodes to be
2552 reallocated, which is a problem since CHAINS may point to these
2553 statements. To fix this, we store the ssa names defined by the
2554 phi nodes here instead of the phi nodes themselves, and restore
2555 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2556 replace_phis_by_defined_names (chains);
2557
2558 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2559 execute_pred_commoning_cbck, &dta);
2560 eliminate_temp_copies (loop, tmp_vars);
2561 }
2562 else
2563 {
2564 if (dump_file && (dump_flags & TDF_DETAILS))
2565 fprintf (dump_file,
2566 "Executing predictive commoning without unrolling.\n");
2567 execute_pred_commoning (loop, chains, tmp_vars);
2568 }
2569
2570end: ;
2571 release_chains (chains);
2572 free_data_refs (datarefs);
2573 BITMAP_FREE (tmp_vars);
2574 BITMAP_FREE (looparound_phis);
2575
2576 free_affine_expand_cache (&name_expansions);
2577
2578 return unroll;
2579}
2580
2581/* Runs predictive commoning. */
2582
592c303d 2583unsigned
bbc8a8dc
ZD
2584tree_predictive_commoning (void)
2585{
2586 bool unrolled = false;
2587 struct loop *loop;
592c303d 2588 unsigned ret = 0;
bbc8a8dc
ZD
2589
2590 initialize_original_copy_tables ();
f0bd40b1 2591 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
8bcf15f6
JH
2592 if (optimize_loop_for_speed_p (loop))
2593 {
2594 unrolled |= tree_predictive_commoning_loop (loop);
2595 }
bbc8a8dc
ZD
2596
2597 if (unrolled)
2598 {
2599 scev_reset ();
592c303d 2600 ret = TODO_cleanup_cfg;
bbc8a8dc
ZD
2601 }
2602 free_original_copy_tables ();
592c303d
ZD
2603
2604 return ret;
bbc8a8dc 2605}
c1bf2a39
AM
2606
2607/* Predictive commoning Pass. */
2608
2609static unsigned
726338f4 2610run_tree_predictive_commoning (struct function *fun)
c1bf2a39 2611{
726338f4 2612 if (number_of_loops (fun) <= 1)
c1bf2a39
AM
2613 return 0;
2614
2615 return tree_predictive_commoning ();
2616}
2617
c1bf2a39
AM
2618namespace {
2619
2620const pass_data pass_data_predcom =
2621{
2622 GIMPLE_PASS, /* type */
2623 "pcom", /* name */
2624 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
2625 TV_PREDCOM, /* tv_id */
2626 PROP_cfg, /* properties_required */
2627 0, /* properties_provided */
2628 0, /* properties_destroyed */
2629 0, /* todo_flags_start */
2630 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2631};
2632
2633class pass_predcom : public gimple_opt_pass
2634{
2635public:
2636 pass_predcom (gcc::context *ctxt)
2637 : gimple_opt_pass (pass_data_predcom, ctxt)
2638 {}
2639
2640 /* opt_pass methods: */
1a3d085c 2641 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
726338f4 2642 virtual unsigned int execute (function *fun)
be55bfe6 2643 {
726338f4 2644 return run_tree_predictive_commoning (fun);
be55bfe6 2645 }
c1bf2a39
AM
2646
2647}; // class pass_predcom
2648
2649} // anon namespace
2650
2651gimple_opt_pass *
2652make_pass_predcom (gcc::context *ctxt)
2653{
2654 return new pass_predcom (ctxt);
2655}
2656
2657