]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-predcom.c
* cgraphbuild.c (rebuild_cgraph_edges): Export.
[thirdparty/gcc.git] / gcc / tree-predcom.c
1 /* Predictive commoning.
2 Copyright (C) 2005, 2007 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 upto RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
161 {
162 a[i] = 1;
163 a[i+2] = 2;
164 }
165
166 can be replaced with
167
168 t0 = a[0];
169 t1 = a[1];
170 for (i = 0; i < n; i++)
171 {
172 a[i] = 1;
173 t2 = 2;
174 t0 = t1;
175 t1 = t2;
176 }
177 a[n] = t0;
178 a[n+1] = t1;
179
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
182
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
186
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "tm.h"
191 #include "tree.h"
192 #include "tm_p.h"
193 #include "cfgloop.h"
194 #include "tree-flow.h"
195 #include "ggc.h"
196 #include "tree-data-ref.h"
197 #include "tree-scalar-evolution.h"
198 #include "tree-chrec.h"
199 #include "params.h"
200 #include "diagnostic.h"
201 #include "tree-pass.h"
202 #include "tree-affine.h"
203 #include "tree-inline.h"
204
205 /* The maximum number of iterations between the considered memory
206 references. */
207
208 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
209
210 /* Data references. */
211
212 typedef struct dref
213 {
214 /* The reference itself. */
215 struct data_reference *ref;
216
217 /* The statement in that the reference appears. */
218 tree stmt;
219
220 /* Distance of the reference from the root of the chain (in number of
221 iterations of the loop). */
222 unsigned distance;
223
224 /* Number of iterations offset from the first reference in the component. */
225 double_int offset;
226
227 /* Number of the reference in a component, in dominance ordering. */
228 unsigned pos;
229
230 /* True if the memory reference is always accessed when the loop is
231 entered. */
232 unsigned always_accessed : 1;
233 } *dref;
234
235 DEF_VEC_P (dref);
236 DEF_VEC_ALLOC_P (dref, heap);
237
238 /* Type of the chain of the references. */
239
240 enum chain_type
241 {
242 /* The addresses of the references in the chain are constant. */
243 CT_INVARIANT,
244
245 /* There are only loads in the chain. */
246 CT_LOAD,
247
248 /* Root of the chain is store, the rest are loads. */
249 CT_STORE_LOAD,
250
251 /* A combination of two chains. */
252 CT_COMBINATION
253 };
254
255 /* Chains of data references. */
256
257 typedef struct chain
258 {
259 /* Type of the chain. */
260 enum chain_type type;
261
262 /* For combination chains, the operator and the two chains that are
263 combined, and the type of the result. */
264 enum tree_code operator;
265 tree rslt_type;
266 struct chain *ch1, *ch2;
267
268 /* The references in the chain. */
269 VEC(dref,heap) *refs;
270
271 /* The maximum distance of the reference in the chain from the root. */
272 unsigned length;
273
274 /* The variables used to copy the value throughout iterations. */
275 VEC(tree,heap) *vars;
276
277 /* Initializers for the variables. */
278 VEC(tree,heap) *inits;
279
280 /* True if there is a use of a variable with the maximal distance
281 that comes after the root in the loop. */
282 unsigned has_max_use_after : 1;
283
284 /* True if all the memory references in the chain are always accessed. */
285 unsigned all_always_accessed : 1;
286
287 /* True if this chain was combined together with some other chain. */
288 unsigned combined : 1;
289 } *chain_p;
290
291 DEF_VEC_P (chain_p);
292 DEF_VEC_ALLOC_P (chain_p, heap);
293
294 /* Describes the knowledge about the step of the memory references in
295 the component. */
296
297 enum ref_step_type
298 {
299 /* The step is zero. */
300 RS_INVARIANT,
301
302 /* The step is nonzero. */
303 RS_NONZERO,
304
305 /* The step may or may not be nonzero. */
306 RS_ANY
307 };
308
309 /* Components of the data dependence graph. */
310
311 struct component
312 {
313 /* The references in the component. */
314 VEC(dref,heap) *refs;
315
316 /* What we know about the step of the references in the component. */
317 enum ref_step_type comp_step;
318
319 /* Next component in the list. */
320 struct component *next;
321 };
322
323 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
324
325 static bitmap looparound_phis;
326
327 /* Cache used by tree_to_aff_combination_expand. */
328
329 static struct pointer_map_t *name_expansions;
330
331 /* Dumps data reference REF to FILE. */
332
333 extern void dump_dref (FILE *, dref);
334 void
335 dump_dref (FILE *file, dref ref)
336 {
337 if (ref->ref)
338 {
339 fprintf (file, " ");
340 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
341 fprintf (file, " (id %u%s)\n", ref->pos,
342 DR_IS_READ (ref->ref) ? "" : ", write");
343
344 fprintf (file, " offset ");
345 dump_double_int (file, ref->offset, false);
346 fprintf (file, "\n");
347
348 fprintf (file, " distance %u\n", ref->distance);
349 }
350 else
351 {
352 if (TREE_CODE (ref->stmt) == PHI_NODE)
353 fprintf (file, " looparound ref\n");
354 else
355 fprintf (file, " combination ref\n");
356 fprintf (file, " in statement ");
357 print_generic_expr (file, ref->stmt, TDF_SLIM);
358 fprintf (file, "\n");
359 fprintf (file, " distance %u\n", ref->distance);
360 }
361
362 }
363
364 /* Dumps CHAIN to FILE. */
365
366 extern void dump_chain (FILE *, chain_p);
367 void
368 dump_chain (FILE *file, chain_p chain)
369 {
370 dref a;
371 const char *chain_type;
372 unsigned i;
373 tree var;
374
375 switch (chain->type)
376 {
377 case CT_INVARIANT:
378 chain_type = "Load motion";
379 break;
380
381 case CT_LOAD:
382 chain_type = "Loads-only";
383 break;
384
385 case CT_STORE_LOAD:
386 chain_type = "Store-loads";
387 break;
388
389 case CT_COMBINATION:
390 chain_type = "Combination";
391 break;
392
393 default:
394 gcc_unreachable ();
395 }
396
397 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
398 chain->combined ? " (combined)" : "");
399 if (chain->type != CT_INVARIANT)
400 fprintf (file, " max distance %u%s\n", chain->length,
401 chain->has_max_use_after ? "" : ", may reuse first");
402
403 if (chain->type == CT_COMBINATION)
404 {
405 fprintf (file, " equal to %p %s %p in type ",
406 (void *) chain->ch1, op_symbol_code (chain->operator),
407 (void *) chain->ch2);
408 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
409 fprintf (file, "\n");
410 }
411
412 if (chain->vars)
413 {
414 fprintf (file, " vars");
415 for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
416 {
417 fprintf (file, " ");
418 print_generic_expr (file, var, TDF_SLIM);
419 }
420 fprintf (file, "\n");
421 }
422
423 if (chain->inits)
424 {
425 fprintf (file, " inits");
426 for (i = 0; VEC_iterate (tree, chain->inits, i, var); i++)
427 {
428 fprintf (file, " ");
429 print_generic_expr (file, var, TDF_SLIM);
430 }
431 fprintf (file, "\n");
432 }
433
434 fprintf (file, " references:\n");
435 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
436 dump_dref (file, a);
437
438 fprintf (file, "\n");
439 }
440
441 /* Dumps CHAINS to FILE. */
442
443 extern void dump_chains (FILE *, VEC (chain_p, heap) *);
444 void
445 dump_chains (FILE *file, VEC (chain_p, heap) *chains)
446 {
447 chain_p chain;
448 unsigned i;
449
450 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
451 dump_chain (file, chain);
452 }
453
454 /* Dumps COMP to FILE. */
455
456 extern void dump_component (FILE *, struct component *);
457 void
458 dump_component (FILE *file, struct component *comp)
459 {
460 dref a;
461 unsigned i;
462
463 fprintf (file, "Component%s:\n",
464 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
465 for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
466 dump_dref (file, a);
467 fprintf (file, "\n");
468 }
469
470 /* Dumps COMPS to FILE. */
471
472 extern void dump_components (FILE *, struct component *);
473 void
474 dump_components (FILE *file, struct component *comps)
475 {
476 struct component *comp;
477
478 for (comp = comps; comp; comp = comp->next)
479 dump_component (file, comp);
480 }
481
482 /* Frees a chain CHAIN. */
483
484 static void
485 release_chain (chain_p chain)
486 {
487 dref ref;
488 unsigned i;
489
490 if (chain == NULL)
491 return;
492
493 for (i = 0; VEC_iterate (dref, chain->refs, i, ref); i++)
494 free (ref);
495
496 VEC_free (dref, heap, chain->refs);
497 VEC_free (tree, heap, chain->vars);
498 VEC_free (tree, heap, chain->inits);
499
500 free (chain);
501 }
502
503 /* Frees CHAINS. */
504
505 static void
506 release_chains (VEC (chain_p, heap) *chains)
507 {
508 unsigned i;
509 chain_p chain;
510
511 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
512 release_chain (chain);
513 VEC_free (chain_p, heap, chains);
514 }
515
516 /* Frees a component COMP. */
517
518 static void
519 release_component (struct component *comp)
520 {
521 VEC_free (dref, heap, comp->refs);
522 free (comp);
523 }
524
525 /* Frees list of components COMPS. */
526
527 static void
528 release_components (struct component *comps)
529 {
530 struct component *act, *next;
531
532 for (act = comps; act; act = next)
533 {
534 next = act->next;
535 release_component (act);
536 }
537 }
538
539 /* Finds a root of tree given by FATHERS containing A, and performs path
540 shortening. */
541
542 static unsigned
543 component_of (unsigned fathers[], unsigned a)
544 {
545 unsigned root, n;
546
547 for (root = a; root != fathers[root]; root = fathers[root])
548 continue;
549
550 for (; a != root; a = n)
551 {
552 n = fathers[a];
553 fathers[a] = root;
554 }
555
556 return root;
557 }
558
559 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
560 components, A and B are components to merge. */
561
562 static void
563 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
564 {
565 unsigned ca = component_of (fathers, a);
566 unsigned cb = component_of (fathers, b);
567
568 if (ca == cb)
569 return;
570
571 if (sizes[ca] < sizes[cb])
572 {
573 sizes[cb] += sizes[ca];
574 fathers[ca] = cb;
575 }
576 else
577 {
578 sizes[ca] += sizes[cb];
579 fathers[cb] = ca;
580 }
581 }
582
583 /* Returns true if A is a reference that is suitable for predictive commoning
584 in the innermost loop that contains it. REF_STEP is set according to the
585 step of the reference A. */
586
587 static bool
588 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
589 {
590 tree ref = DR_REF (a), step = DR_STEP (a);
591
592 if (!step
593 || !is_gimple_reg_type (TREE_TYPE (ref)))
594 return false;
595
596 if (integer_zerop (step))
597 *ref_step = RS_INVARIANT;
598 else if (integer_nonzerop (step))
599 *ref_step = RS_NONZERO;
600 else
601 *ref_step = RS_ANY;
602
603 return true;
604 }
605
606 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
607
608 static void
609 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
610 {
611 aff_tree delta;
612
613 tree_to_aff_combination_expand (DR_OFFSET (dr), sizetype, offset,
614 &name_expansions);
615 aff_combination_const (&delta, sizetype, tree_to_double_int (DR_INIT (dr)));
616 aff_combination_add (offset, &delta);
617 }
618
619 /* Determines number of iterations of the innermost enclosing loop before B
620 refers to exactly the same location as A and stores it to OFF. If A and
621 B do not have the same step, they never meet, or anything else fails,
622 returns false, otherwise returns true. Both A and B are assumed to
623 satisfy suitable_reference_p. */
624
625 static bool
626 determine_offset (struct data_reference *a, struct data_reference *b,
627 double_int *off)
628 {
629 aff_tree diff, baseb, step;
630 tree typea, typeb;
631
632 /* Check that both the references access the location in the same type. */
633 typea = TREE_TYPE (DR_REF (a));
634 typeb = TREE_TYPE (DR_REF (b));
635 if (!useless_type_conversion_p (typeb, typea))
636 return false;
637
638 /* Check whether the base address and the step of both references is the
639 same. */
640 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
641 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
642 return false;
643
644 if (integer_zerop (DR_STEP (a)))
645 {
646 /* If the references have loop invariant address, check that they access
647 exactly the same location. */
648 *off = double_int_zero;
649 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
650 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
651 }
652
653 /* Compare the offsets of the addresses, and check whether the difference
654 is a multiple of step. */
655 aff_combination_dr_offset (a, &diff);
656 aff_combination_dr_offset (b, &baseb);
657 aff_combination_scale (&baseb, double_int_minus_one);
658 aff_combination_add (&diff, &baseb);
659
660 tree_to_aff_combination_expand (DR_STEP (a), sizetype,
661 &step, &name_expansions);
662 return aff_combination_constant_multiple_p (&diff, &step, off);
663 }
664
665 /* Returns the last basic block in LOOP for that we are sure that
666 it is executed whenever the loop is entered. */
667
668 static basic_block
669 last_always_executed_block (struct loop *loop)
670 {
671 unsigned i;
672 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
673 edge ex;
674 basic_block last = loop->latch;
675
676 for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
677 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
678 VEC_free (edge, heap, exits);
679
680 return last;
681 }
682
683 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
684
685 static struct component *
686 split_data_refs_to_components (struct loop *loop,
687 VEC (data_reference_p, heap) *datarefs,
688 VEC (ddr_p, heap) *depends)
689 {
690 unsigned i, n = VEC_length (data_reference_p, datarefs);
691 unsigned ca, ia, ib, bad;
692 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
693 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
694 struct component **comps;
695 struct data_reference *dr, *dra, *drb;
696 struct data_dependence_relation *ddr;
697 struct component *comp_list = NULL, *comp;
698 dref dataref;
699 basic_block last_always_executed = last_always_executed_block (loop);
700
701 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
702 {
703 if (!DR_REF (dr))
704 {
705 /* A fake reference for call or asm_expr that may clobber memory;
706 just fail. */
707 goto end;
708 }
709 dr->aux = (void *) (size_t) i;
710 comp_father[i] = i;
711 comp_size[i] = 1;
712 }
713
714 /* A component reserved for the "bad" data references. */
715 comp_father[n] = n;
716 comp_size[n] = 1;
717
718 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
719 {
720 enum ref_step_type dummy;
721
722 if (!suitable_reference_p (dr, &dummy))
723 {
724 ia = (unsigned) (size_t) dr->aux;
725 merge_comps (comp_father, comp_size, n, ia);
726 }
727 }
728
729 for (i = 0; VEC_iterate (ddr_p, depends, i, ddr); i++)
730 {
731 double_int dummy_off;
732
733 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
734 continue;
735
736 dra = DDR_A (ddr);
737 drb = DDR_B (ddr);
738 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
739 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
740 if (ia == ib)
741 continue;
742
743 bad = component_of (comp_father, n);
744
745 /* If both A and B are reads, we may ignore unsuitable dependences. */
746 if (DR_IS_READ (dra) && DR_IS_READ (drb)
747 && (ia == bad || ib == bad
748 || !determine_offset (dra, drb, &dummy_off)))
749 continue;
750
751 merge_comps (comp_father, comp_size, ia, ib);
752 }
753
754 comps = XCNEWVEC (struct component *, n);
755 bad = component_of (comp_father, n);
756 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
757 {
758 ia = (unsigned) (size_t) dr->aux;
759 ca = component_of (comp_father, ia);
760 if (ca == bad)
761 continue;
762
763 comp = comps[ca];
764 if (!comp)
765 {
766 comp = XCNEW (struct component);
767 comp->refs = VEC_alloc (dref, heap, comp_size[ca]);
768 comps[ca] = comp;
769 }
770
771 dataref = XCNEW (struct dref);
772 dataref->ref = dr;
773 dataref->stmt = DR_STMT (dr);
774 dataref->offset = double_int_zero;
775 dataref->distance = 0;
776
777 dataref->always_accessed
778 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
779 bb_for_stmt (dataref->stmt));
780 dataref->pos = VEC_length (dref, comp->refs);
781 VEC_quick_push (dref, comp->refs, dataref);
782 }
783
784 for (i = 0; i < n; i++)
785 {
786 comp = comps[i];
787 if (comp)
788 {
789 comp->next = comp_list;
790 comp_list = comp;
791 }
792 }
793 free (comps);
794
795 end:
796 free (comp_father);
797 free (comp_size);
798 return comp_list;
799 }
800
801 /* Returns true if the component COMP satisfies the conditions
802 described in 2) at the beginning of this file. LOOP is the current
803 loop. */
804
805 static bool
806 suitable_component_p (struct loop *loop, struct component *comp)
807 {
808 unsigned i;
809 dref a, first;
810 basic_block ba, bp = loop->header;
811 bool ok, has_write = false;
812
813 for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
814 {
815 ba = bb_for_stmt (a->stmt);
816
817 if (!just_once_each_iteration_p (loop, ba))
818 return false;
819
820 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
821 bp = ba;
822
823 if (!DR_IS_READ (a->ref))
824 has_write = true;
825 }
826
827 first = VEC_index (dref, comp->refs, 0);
828 ok = suitable_reference_p (first->ref, &comp->comp_step);
829 gcc_assert (ok);
830 first->offset = double_int_zero;
831
832 for (i = 1; VEC_iterate (dref, comp->refs, i, a); i++)
833 {
834 if (!determine_offset (first->ref, a->ref, &a->offset))
835 return false;
836
837 #ifdef ENABLE_CHECKING
838 {
839 enum ref_step_type a_step;
840 ok = suitable_reference_p (a->ref, &a_step);
841 gcc_assert (ok && a_step == comp->comp_step);
842 }
843 #endif
844 }
845
846 /* If there is a write inside the component, we must know whether the
847 step is nonzero or not -- we would not otherwise be able to recognize
848 whether the value accessed by reads comes from the OFFSET-th iteration
849 or the previous one. */
850 if (has_write && comp->comp_step == RS_ANY)
851 return false;
852
853 return true;
854 }
855
856 /* Check the conditions on references inside each of components COMPS,
857 and remove the unsuitable components from the list. The new list
858 of components is returned. The conditions are described in 2) at
859 the beginning of this file. LOOP is the current loop. */
860
861 static struct component *
862 filter_suitable_components (struct loop *loop, struct component *comps)
863 {
864 struct component **comp, *act;
865
866 for (comp = &comps; *comp; )
867 {
868 act = *comp;
869 if (suitable_component_p (loop, act))
870 comp = &act->next;
871 else
872 {
873 *comp = act->next;
874 release_component (act);
875 }
876 }
877
878 return comps;
879 }
880
881 /* Compares two drefs A and B by their offset and position. Callback for
882 qsort. */
883
884 static int
885 order_drefs (const void *a, const void *b)
886 {
887 const dref *da = a;
888 const dref *db = b;
889 int offcmp = double_int_scmp ((*da)->offset, (*db)->offset);
890
891 if (offcmp != 0)
892 return offcmp;
893
894 return (*da)->pos - (*db)->pos;
895 }
896
897 /* Returns root of the CHAIN. */
898
899 static inline dref
900 get_chain_root (chain_p chain)
901 {
902 return VEC_index (dref, chain->refs, 0);
903 }
904
905 /* Adds REF to the chain CHAIN. */
906
907 static void
908 add_ref_to_chain (chain_p chain, dref ref)
909 {
910 dref root = get_chain_root (chain);
911 double_int dist;
912
913 gcc_assert (double_int_scmp (root->offset, ref->offset) <= 0);
914 dist = double_int_add (ref->offset, double_int_neg (root->offset));
915 if (double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), dist) <= 0)
916 return;
917 gcc_assert (double_int_fits_in_uhwi_p (dist));
918
919 VEC_safe_push (dref, heap, chain->refs, ref);
920
921 ref->distance = double_int_to_uhwi (dist);
922
923 if (ref->distance >= chain->length)
924 {
925 chain->length = ref->distance;
926 chain->has_max_use_after = false;
927 }
928
929 if (ref->distance == chain->length
930 && ref->pos > root->pos)
931 chain->has_max_use_after = true;
932
933 chain->all_always_accessed &= ref->always_accessed;
934 }
935
936 /* Returns the chain for invariant component COMP. */
937
938 static chain_p
939 make_invariant_chain (struct component *comp)
940 {
941 chain_p chain = XCNEW (struct chain);
942 unsigned i;
943 dref ref;
944
945 chain->type = CT_INVARIANT;
946
947 chain->all_always_accessed = true;
948
949 for (i = 0; VEC_iterate (dref, comp->refs, i, ref); i++)
950 {
951 VEC_safe_push (dref, heap, chain->refs, ref);
952 chain->all_always_accessed &= ref->always_accessed;
953 }
954
955 return chain;
956 }
957
958 /* Make a new chain rooted at REF. */
959
960 static chain_p
961 make_rooted_chain (dref ref)
962 {
963 chain_p chain = XCNEW (struct chain);
964
965 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
966
967 VEC_safe_push (dref, heap, chain->refs, ref);
968 chain->all_always_accessed = ref->always_accessed;
969
970 ref->distance = 0;
971
972 return chain;
973 }
974
975 /* Returns true if CHAIN is not trivial. */
976
977 static bool
978 nontrivial_chain_p (chain_p chain)
979 {
980 return chain != NULL && VEC_length (dref, chain->refs) > 1;
981 }
982
983 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
984 is no such name. */
985
986 static tree
987 name_for_ref (dref ref)
988 {
989 tree name;
990
991 if (TREE_CODE (ref->stmt) == GIMPLE_MODIFY_STMT)
992 {
993 if (!ref->ref || DR_IS_READ (ref->ref))
994 name = GIMPLE_STMT_OPERAND (ref->stmt, 0);
995 else
996 name = GIMPLE_STMT_OPERAND (ref->stmt, 1);
997 }
998 else
999 name = PHI_RESULT (ref->stmt);
1000
1001 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1002 }
1003
1004 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1005 iterations of the innermost enclosing loop). */
1006
1007 static bool
1008 valid_initializer_p (struct data_reference *ref,
1009 unsigned distance, struct data_reference *root)
1010 {
1011 aff_tree diff, base, step;
1012 double_int off;
1013
1014 if (!DR_BASE_ADDRESS (ref))
1015 return false;
1016
1017 /* Both REF and ROOT must be accessing the same object. */
1018 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1019 return false;
1020
1021 /* The initializer is defined outside of loop, hence its address must be
1022 invariant inside the loop. */
1023 gcc_assert (integer_zerop (DR_STEP (ref)));
1024
1025 /* If the address of the reference is invariant, initializer must access
1026 exactly the same location. */
1027 if (integer_zerop (DR_STEP (root)))
1028 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1029 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1030
1031 /* Verify that this index of REF is equal to the root's index at
1032 -DISTANCE-th iteration. */
1033 aff_combination_dr_offset (root, &diff);
1034 aff_combination_dr_offset (ref, &base);
1035 aff_combination_scale (&base, double_int_minus_one);
1036 aff_combination_add (&diff, &base);
1037
1038 tree_to_aff_combination_expand (DR_STEP (root), sizetype, &step,
1039 &name_expansions);
1040 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1041 return false;
1042
1043 if (!double_int_equal_p (off, uhwi_to_double_int (distance)))
1044 return false;
1045
1046 return true;
1047 }
1048
1049 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1050 initial value is correct (equal to initial value of REF shifted by one
1051 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1052 is the root of the current chain. */
1053
1054 static tree
1055 find_looparound_phi (struct loop *loop, dref ref, dref root)
1056 {
1057 tree name, phi, init, init_stmt, init_ref;
1058 edge latch = loop_latch_edge (loop);
1059 struct data_reference init_dr;
1060
1061 if (TREE_CODE (ref->stmt) == GIMPLE_MODIFY_STMT)
1062 {
1063 if (DR_IS_READ (ref->ref))
1064 name = GIMPLE_STMT_OPERAND (ref->stmt, 0);
1065 else
1066 name = GIMPLE_STMT_OPERAND (ref->stmt, 1);
1067 }
1068 else
1069 name = PHI_RESULT (ref->stmt);
1070 if (!name)
1071 return NULL_TREE;
1072
1073 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1074 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1075 break;
1076
1077 if (!phi)
1078 return NULL_TREE;
1079
1080 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1081 if (TREE_CODE (init) != SSA_NAME)
1082 return NULL_TREE;
1083 init_stmt = SSA_NAME_DEF_STMT (init);
1084 if (TREE_CODE (init_stmt) != GIMPLE_MODIFY_STMT)
1085 return NULL_TREE;
1086 gcc_assert (GIMPLE_STMT_OPERAND (init_stmt, 0) == init);
1087
1088 init_ref = GIMPLE_STMT_OPERAND (init_stmt, 1);
1089 if (!REFERENCE_CLASS_P (init_ref)
1090 && !DECL_P (init_ref))
1091 return NULL_TREE;
1092
1093 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1094 loop enclosing PHI). */
1095 memset (&init_dr, 0, sizeof (struct data_reference));
1096 DR_REF (&init_dr) = init_ref;
1097 DR_STMT (&init_dr) = phi;
1098 dr_analyze_innermost (&init_dr);
1099
1100 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1101 return NULL_TREE;
1102
1103 return phi;
1104 }
1105
1106 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1107
1108 static void
1109 insert_looparound_copy (chain_p chain, dref ref, tree phi)
1110 {
1111 dref nw = XCNEW (struct dref), aref;
1112 unsigned i;
1113
1114 nw->stmt = phi;
1115 nw->distance = ref->distance + 1;
1116 nw->always_accessed = 1;
1117
1118 for (i = 0; VEC_iterate (dref, chain->refs, i, aref); i++)
1119 if (aref->distance >= nw->distance)
1120 break;
1121 VEC_safe_insert (dref, heap, chain->refs, i, nw);
1122
1123 if (nw->distance > chain->length)
1124 {
1125 chain->length = nw->distance;
1126 chain->has_max_use_after = false;
1127 }
1128 }
1129
1130 /* For references in CHAIN that are copied around the LOOP (created previously
1131 by PRE, or by user), add the results of such copies to the chain. This
1132 enables us to remove the copies by unrolling, and may need less registers
1133 (also, it may allow us to combine chains together). */
1134
1135 static void
1136 add_looparound_copies (struct loop *loop, chain_p chain)
1137 {
1138 unsigned i;
1139 dref ref, root = get_chain_root (chain);
1140 tree phi;
1141
1142 for (i = 0; VEC_iterate (dref, chain->refs, i, ref); i++)
1143 {
1144 phi = find_looparound_phi (loop, ref, root);
1145 if (!phi)
1146 continue;
1147
1148 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1149 insert_looparound_copy (chain, ref, phi);
1150 }
1151 }
1152
1153 /* Find roots of the values and determine distances in the component COMP.
1154 The references are redistributed into CHAINS. LOOP is the current
1155 loop. */
1156
1157 static void
1158 determine_roots_comp (struct loop *loop,
1159 struct component *comp,
1160 VEC (chain_p, heap) **chains)
1161 {
1162 unsigned i;
1163 dref a;
1164 chain_p chain = NULL;
1165
1166 /* Invariants are handled specially. */
1167 if (comp->comp_step == RS_INVARIANT)
1168 {
1169 chain = make_invariant_chain (comp);
1170 VEC_safe_push (chain_p, heap, *chains, chain);
1171 return;
1172 }
1173
1174 qsort (VEC_address (dref, comp->refs), VEC_length (dref, comp->refs),
1175 sizeof (dref), order_drefs);
1176
1177 for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
1178 {
1179 if (!chain || !DR_IS_READ (a->ref))
1180 {
1181 if (nontrivial_chain_p (chain))
1182 VEC_safe_push (chain_p, heap, *chains, chain);
1183 else
1184 release_chain (chain);
1185 chain = make_rooted_chain (a);
1186 continue;
1187 }
1188
1189 add_ref_to_chain (chain, a);
1190 }
1191
1192 if (nontrivial_chain_p (chain))
1193 {
1194 add_looparound_copies (loop, chain);
1195 VEC_safe_push (chain_p, heap, *chains, chain);
1196 }
1197 else
1198 release_chain (chain);
1199 }
1200
1201 /* Find roots of the values and determine distances in components COMPS, and
1202 separates the references to CHAINS. LOOP is the current loop. */
1203
1204 static void
1205 determine_roots (struct loop *loop,
1206 struct component *comps, VEC (chain_p, heap) **chains)
1207 {
1208 struct component *comp;
1209
1210 for (comp = comps; comp; comp = comp->next)
1211 determine_roots_comp (loop, comp, chains);
1212 }
1213
1214 /* Replace the reference in statement STMT with temporary variable
1215 NEW. If SET is true, NEW is instead initialized to the value of
1216 the reference in the statement. IN_LHS is true if the reference
1217 is in the lhs of STMT, false if it is in rhs. */
1218
1219 static void
1220 replace_ref_with (tree stmt, tree new, bool set, bool in_lhs)
1221 {
1222 tree val, new_stmt;
1223 block_stmt_iterator bsi;
1224
1225 if (TREE_CODE (stmt) == PHI_NODE)
1226 {
1227 gcc_assert (!in_lhs && !set);
1228
1229 val = PHI_RESULT (stmt);
1230 bsi = bsi_after_labels (bb_for_stmt (stmt));
1231 remove_phi_node (stmt, NULL_TREE, false);
1232
1233 /* Turn the phi node into GIMPLE_MODIFY_STMT. */
1234 new_stmt = build_gimple_modify_stmt (val, new);
1235 SSA_NAME_DEF_STMT (val) = new_stmt;
1236 bsi_insert_before (&bsi, new_stmt, BSI_NEW_STMT);
1237 return;
1238 }
1239
1240 /* Since the reference is of gimple_reg type, it should only
1241 appear as lhs or rhs of modify statement. */
1242 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1243
1244 /* If we do not need to initialize NEW, just replace the use of OLD. */
1245 if (!set)
1246 {
1247 gcc_assert (!in_lhs);
1248 GIMPLE_STMT_OPERAND (stmt, 1) = new;
1249 update_stmt (stmt);
1250 return;
1251 }
1252
1253 bsi = bsi_for_stmt (stmt);
1254 if (in_lhs)
1255 {
1256 val = GIMPLE_STMT_OPERAND (stmt, 1);
1257
1258 /* OLD = VAL
1259
1260 is transformed to
1261
1262 OLD = VAL
1263 NEW = VAL
1264
1265 (since the reference is of gimple_reg type, VAL is either gimple
1266 invariant or ssa name). */
1267 }
1268 else
1269 {
1270 val = GIMPLE_STMT_OPERAND (stmt, 0);
1271
1272 /* VAL = OLD
1273
1274 is transformed to
1275
1276 VAL = OLD
1277 NEW = VAL */
1278 }
1279
1280 new_stmt = build_gimple_modify_stmt (new, unshare_expr (val));
1281 bsi_insert_after (&bsi, new_stmt, BSI_NEW_STMT);
1282 SSA_NAME_DEF_STMT (new) = new_stmt;
1283 }
1284
1285 /* Returns the reference to the address of REF in the ITER-th iteration of
1286 LOOP, or NULL if we fail to determine it (ITER may be negative). We
1287 try to preserve the original shape of the reference (not rewrite it
1288 as an indirect ref to the address), to make tree_could_trap_p in
1289 prepare_initializers_chain return false more often. */
1290
1291 static tree
1292 ref_at_iteration (struct loop *loop, tree ref, int iter)
1293 {
1294 tree idx, *idx_p, type, val, op0 = NULL_TREE, ret;
1295 affine_iv iv;
1296 bool ok;
1297
1298 if (handled_component_p (ref))
1299 {
1300 op0 = ref_at_iteration (loop, TREE_OPERAND (ref, 0), iter);
1301 if (!op0)
1302 return NULL_TREE;
1303 }
1304 else if (!INDIRECT_REF_P (ref))
1305 return unshare_expr (ref);
1306
1307 if (TREE_CODE (ref) == INDIRECT_REF)
1308 {
1309 ret = build1 (INDIRECT_REF, TREE_TYPE (ref), NULL_TREE);
1310 idx = TREE_OPERAND (ref, 0);
1311 idx_p = &TREE_OPERAND (ret, 0);
1312 }
1313 else if (TREE_CODE (ref) == COMPONENT_REF)
1314 {
1315 /* Check that the offset is loop invariant. */
1316 if (TREE_OPERAND (ref, 2)
1317 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
1318 return NULL_TREE;
1319
1320 return build3 (COMPONENT_REF, TREE_TYPE (ref), op0,
1321 unshare_expr (TREE_OPERAND (ref, 1)),
1322 unshare_expr (TREE_OPERAND (ref, 2)));
1323 }
1324 else if (TREE_CODE (ref) == ARRAY_REF)
1325 {
1326 /* Check that the lower bound and the step are loop invariant. */
1327 if (TREE_OPERAND (ref, 2)
1328 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
1329 return NULL_TREE;
1330 if (TREE_OPERAND (ref, 3)
1331 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 3)))
1332 return NULL_TREE;
1333
1334 ret = build4 (ARRAY_REF, TREE_TYPE (ref), op0, NULL_TREE,
1335 unshare_expr (TREE_OPERAND (ref, 2)),
1336 unshare_expr (TREE_OPERAND (ref, 3)));
1337 idx = TREE_OPERAND (ref, 1);
1338 idx_p = &TREE_OPERAND (ret, 1);
1339 }
1340 else
1341 return NULL_TREE;
1342
1343 ok = simple_iv (loop, first_stmt (loop->header), idx, &iv, true);
1344 if (!ok)
1345 return NULL_TREE;
1346 iv.base = expand_simple_operations (iv.base);
1347 if (integer_zerop (iv.step))
1348 *idx_p = unshare_expr (iv.base);
1349 else
1350 {
1351 type = TREE_TYPE (iv.base);
1352 if (POINTER_TYPE_P (type))
1353 {
1354 val = fold_build2 (MULT_EXPR, sizetype, iv.step,
1355 size_int (iter));
1356 val = fold_build2 (POINTER_PLUS_EXPR, type, iv.base, val);
1357 }
1358 else
1359 {
1360 val = fold_build2 (MULT_EXPR, type, iv.step,
1361 build_int_cst_type (type, iter));
1362 val = fold_build2 (PLUS_EXPR, type, iv.base, val);
1363 }
1364 *idx_p = unshare_expr (val);
1365 }
1366
1367 return ret;
1368 }
1369
1370 /* Get the initialization expression for the INDEX-th temporary variable
1371 of CHAIN. */
1372
1373 static tree
1374 get_init_expr (chain_p chain, unsigned index)
1375 {
1376 if (chain->type == CT_COMBINATION)
1377 {
1378 tree e1 = get_init_expr (chain->ch1, index);
1379 tree e2 = get_init_expr (chain->ch2, index);
1380
1381 return fold_build2 (chain->operator, chain->rslt_type, e1, e2);
1382 }
1383 else
1384 return VEC_index (tree, chain->inits, index);
1385 }
1386
1387 /* Marks all virtual operands of statement STMT for renaming. */
1388
1389 void
1390 mark_virtual_ops_for_renaming (tree stmt)
1391 {
1392 ssa_op_iter iter;
1393 tree var;
1394
1395 if (TREE_CODE (stmt) == PHI_NODE)
1396 {
1397 var = PHI_RESULT (stmt);
1398 if (is_gimple_reg (var))
1399 return;
1400
1401 if (TREE_CODE (var) == SSA_NAME)
1402 var = SSA_NAME_VAR (var);
1403 mark_sym_for_renaming (var);
1404 return;
1405 }
1406
1407 update_stmt (stmt);
1408
1409 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_VIRTUALS)
1410 {
1411 if (TREE_CODE (var) == SSA_NAME)
1412 var = SSA_NAME_VAR (var);
1413 mark_sym_for_renaming (var);
1414 }
1415 }
1416
1417 /* Calls mark_virtual_ops_for_renaming for all members of LIST. */
1418
1419 static void
1420 mark_virtual_ops_for_renaming_list (tree list)
1421 {
1422 tree_stmt_iterator tsi;
1423
1424 for (tsi = tsi_start (list); !tsi_end_p (tsi); tsi_next (&tsi))
1425 mark_virtual_ops_for_renaming (tsi_stmt (tsi));
1426 }
1427
1428 /* Returns a new temporary variable used for the I-th variable carrying
1429 value of REF. The variable's uid is marked in TMP_VARS. */
1430
1431 static tree
1432 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1433 {
1434 tree type = TREE_TYPE (ref);
1435 tree var = create_tmp_var (type, get_lsm_tmp_name (ref, i));
1436
1437 /* We never access the components of the temporary variable in predictive
1438 commoning. */
1439 if (TREE_CODE (type) == COMPLEX_TYPE
1440 || TREE_CODE (type) == VECTOR_TYPE)
1441 DECL_GIMPLE_REG_P (var) = 1;
1442
1443 add_referenced_var (var);
1444 bitmap_set_bit (tmp_vars, DECL_UID (var));
1445 return var;
1446 }
1447
1448 /* Creates the variables for CHAIN, as well as phi nodes for them and
1449 initialization on entry to LOOP. Uids of the newly created
1450 temporary variables are marked in TMP_VARS. */
1451
1452 static void
1453 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1454 {
1455 unsigned i;
1456 unsigned n = chain->length;
1457 dref root = get_chain_root (chain);
1458 bool reuse_first = !chain->has_max_use_after;
1459 tree ref, init, var, next, stmts;
1460 tree phi;
1461 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1462
1463 /* If N == 0, then all the references are within the single iteration. And
1464 since this is an nonempty chain, reuse_first cannot be true. */
1465 gcc_assert (n > 0 || !reuse_first);
1466
1467 chain->vars = VEC_alloc (tree, heap, n + 1);
1468
1469 if (chain->type == CT_COMBINATION)
1470 ref = GIMPLE_STMT_OPERAND (root->stmt, 0);
1471 else
1472 ref = DR_REF (root->ref);
1473
1474 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1475 {
1476 var = predcom_tmp_var (ref, i, tmp_vars);
1477 VEC_quick_push (tree, chain->vars, var);
1478 }
1479 if (reuse_first)
1480 VEC_quick_push (tree, chain->vars, VEC_index (tree, chain->vars, 0));
1481
1482 for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
1483 VEC_replace (tree, chain->vars, i, make_ssa_name (var, NULL_TREE));
1484
1485 for (i = 0; i < n; i++)
1486 {
1487 var = VEC_index (tree, chain->vars, i);
1488 next = VEC_index (tree, chain->vars, i + 1);
1489 init = get_init_expr (chain, i);
1490
1491 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1492 if (stmts)
1493 {
1494 mark_virtual_ops_for_renaming_list (stmts);
1495 bsi_insert_on_edge_immediate (entry, stmts);
1496 }
1497
1498 phi = create_phi_node (var, loop->header);
1499 SSA_NAME_DEF_STMT (var) = phi;
1500 add_phi_arg (phi, init, entry);
1501 add_phi_arg (phi, next, latch);
1502 }
1503 }
1504
1505 /* Create the variables and initialization statement for root of chain
1506 CHAIN. Uids of the newly created temporary variables are marked
1507 in TMP_VARS. */
1508
1509 static void
1510 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1511 {
1512 dref root = get_chain_root (chain);
1513 bool in_lhs = (chain->type == CT_STORE_LOAD
1514 || chain->type == CT_COMBINATION);
1515
1516 initialize_root_vars (loop, chain, tmp_vars);
1517 replace_ref_with (root->stmt,
1518 VEC_index (tree, chain->vars, chain->length),
1519 true, in_lhs);
1520 }
1521
1522 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1523 initialization on entry to LOOP if necessary. The ssa name for the variable
1524 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1525 around the loop is created. Uid of the newly created temporary variable
1526 is marked in TMP_VARS. INITS is the list containing the (single)
1527 initializer. */
1528
1529 static void
1530 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1531 VEC(tree, heap) **vars, VEC(tree, heap) *inits,
1532 bitmap tmp_vars)
1533 {
1534 unsigned i;
1535 tree ref = DR_REF (root->ref), init, var, next, stmts;
1536 tree phi;
1537 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1538
1539 /* Find the initializer for the variable, and check that it cannot
1540 trap. */
1541 init = VEC_index (tree, inits, 0);
1542
1543 *vars = VEC_alloc (tree, heap, written ? 2 : 1);
1544 var = predcom_tmp_var (ref, 0, tmp_vars);
1545 VEC_quick_push (tree, *vars, var);
1546 if (written)
1547 VEC_quick_push (tree, *vars, VEC_index (tree, *vars, 0));
1548
1549 for (i = 0; VEC_iterate (tree, *vars, i, var); i++)
1550 VEC_replace (tree, *vars, i, make_ssa_name (var, NULL_TREE));
1551
1552 var = VEC_index (tree, *vars, 0);
1553
1554 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1555 if (stmts)
1556 {
1557 mark_virtual_ops_for_renaming_list (stmts);
1558 bsi_insert_on_edge_immediate (entry, stmts);
1559 }
1560
1561 if (written)
1562 {
1563 next = VEC_index (tree, *vars, 1);
1564 phi = create_phi_node (var, loop->header);
1565 SSA_NAME_DEF_STMT (var) = phi;
1566 add_phi_arg (phi, init, entry);
1567 add_phi_arg (phi, next, latch);
1568 }
1569 else
1570 {
1571 init = build_gimple_modify_stmt (var, init);
1572 SSA_NAME_DEF_STMT (var) = init;
1573 mark_virtual_ops_for_renaming (init);
1574 bsi_insert_on_edge_immediate (entry, init);
1575 }
1576 }
1577
1578
1579 /* Execute load motion for references in chain CHAIN. Uids of the newly
1580 created temporary variables are marked in TMP_VARS. */
1581
1582 static void
1583 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1584 {
1585 VEC (tree, heap) *vars;
1586 dref a;
1587 unsigned n_writes = 0, ridx, i;
1588 tree var;
1589
1590 gcc_assert (chain->type == CT_INVARIANT);
1591 gcc_assert (!chain->combined);
1592 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
1593 if (!DR_IS_READ (a->ref))
1594 n_writes++;
1595
1596 /* If there are no reads in the loop, there is nothing to do. */
1597 if (n_writes == VEC_length (dref, chain->refs))
1598 return;
1599
1600 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1601 &vars, chain->inits, tmp_vars);
1602
1603 ridx = 0;
1604 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
1605 {
1606 bool is_read = DR_IS_READ (a->ref);
1607 mark_virtual_ops_for_renaming (a->stmt);
1608
1609 if (!DR_IS_READ (a->ref))
1610 {
1611 n_writes--;
1612 if (n_writes)
1613 {
1614 var = VEC_index (tree, vars, 0);
1615 var = make_ssa_name (SSA_NAME_VAR (var), NULL_TREE);
1616 VEC_replace (tree, vars, 0, var);
1617 }
1618 else
1619 ridx = 1;
1620 }
1621
1622 replace_ref_with (a->stmt, VEC_index (tree, vars, ridx),
1623 !is_read, !is_read);
1624 }
1625
1626 VEC_free (tree, heap, vars);
1627 }
1628
1629 /* Returns the single statement in that NAME is used, excepting
1630 the looparound phi nodes contained in one of the chains. If there is no
1631 such statement, or more statements, NULL_TREE is returned. */
1632
1633 static tree
1634 single_nonlooparound_use (tree name)
1635 {
1636 use_operand_p use;
1637 imm_use_iterator it;
1638 tree stmt, ret = NULL_TREE;
1639
1640 FOR_EACH_IMM_USE_FAST (use, it, name)
1641 {
1642 stmt = USE_STMT (use);
1643
1644 if (TREE_CODE (stmt) == PHI_NODE)
1645 {
1646 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1647 could not be processed anyway, so just fail for them. */
1648 if (bitmap_bit_p (looparound_phis,
1649 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1650 continue;
1651
1652 return NULL_TREE;
1653 }
1654 else if (ret != NULL_TREE)
1655 return NULL_TREE;
1656 else
1657 ret = stmt;
1658 }
1659
1660 return ret;
1661 }
1662
1663 /* Remove statement STMT, as well as the chain of assignments in that it is
1664 used. */
1665
1666 static void
1667 remove_stmt (tree stmt)
1668 {
1669 tree next, name;
1670
1671 if (TREE_CODE (stmt) == PHI_NODE)
1672 {
1673 name = PHI_RESULT (stmt);
1674 next = single_nonlooparound_use (name);
1675 remove_phi_node (stmt, NULL_TREE, true);
1676
1677 if (!next
1678 || TREE_CODE (next) != GIMPLE_MODIFY_STMT
1679 || GIMPLE_STMT_OPERAND (next, 1) != name)
1680 return;
1681
1682 stmt = next;
1683 }
1684
1685 while (1)
1686 {
1687 block_stmt_iterator bsi;
1688
1689 bsi = bsi_for_stmt (stmt);
1690
1691 name = GIMPLE_STMT_OPERAND (stmt, 0);
1692 gcc_assert (TREE_CODE (name) == SSA_NAME);
1693
1694 next = single_nonlooparound_use (name);
1695
1696 mark_virtual_ops_for_renaming (stmt);
1697 bsi_remove (&bsi, true);
1698
1699 if (!next
1700 || TREE_CODE (next) != GIMPLE_MODIFY_STMT
1701 || GIMPLE_STMT_OPERAND (next, 1) != name)
1702 return;
1703
1704 stmt = next;
1705 }
1706 }
1707
1708 /* Perform the predictive commoning optimization for a chain CHAIN.
1709 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1710
1711 static void
1712 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1713 bitmap tmp_vars)
1714 {
1715 unsigned i;
1716 dref a, root;
1717 tree var;
1718
1719 if (chain->combined)
1720 {
1721 /* For combined chains, just remove the statements that are used to
1722 compute the values of the expression (except for the root one). */
1723 for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
1724 remove_stmt (a->stmt);
1725 }
1726 else
1727 {
1728 /* For non-combined chains, set up the variables that hold its value,
1729 and replace the uses of the original references by these
1730 variables. */
1731 root = get_chain_root (chain);
1732 mark_virtual_ops_for_renaming (root->stmt);
1733
1734 initialize_root (loop, chain, tmp_vars);
1735 for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
1736 {
1737 mark_virtual_ops_for_renaming (a->stmt);
1738 var = VEC_index (tree, chain->vars, chain->length - a->distance);
1739 replace_ref_with (a->stmt, var, false, false);
1740 }
1741 }
1742 }
1743
1744 /* Determines the unroll factor necessary to remove as many temporary variable
1745 copies as possible. CHAINS is the list of chains that will be
1746 optimized. */
1747
1748 static unsigned
1749 determine_unroll_factor (VEC (chain_p, heap) *chains)
1750 {
1751 chain_p chain;
1752 unsigned factor = 1, af, nfactor, i;
1753 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1754
1755 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1756 {
1757 if (chain->type == CT_INVARIANT || chain->combined)
1758 continue;
1759
1760 /* The best unroll factor for this chain is equal to the number of
1761 temporary variables that we create for it. */
1762 af = chain->length;
1763 if (chain->has_max_use_after)
1764 af++;
1765
1766 nfactor = factor * af / gcd (factor, af);
1767 if (nfactor <= max)
1768 factor = nfactor;
1769 }
1770
1771 return factor;
1772 }
1773
1774 /* Perform the predictive commoning optimization for CHAINS.
1775 Uids of the newly created temporary variables are marked in TMP_VARS. */
1776
1777 static void
1778 execute_pred_commoning (struct loop *loop, VEC (chain_p, heap) *chains,
1779 bitmap tmp_vars)
1780 {
1781 chain_p chain;
1782 unsigned i;
1783
1784 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1785 {
1786 if (chain->type == CT_INVARIANT)
1787 execute_load_motion (loop, chain, tmp_vars);
1788 else
1789 execute_pred_commoning_chain (loop, chain, tmp_vars);
1790 }
1791
1792 update_ssa (TODO_update_ssa_only_virtuals);
1793 }
1794
1795 /* For each reference in CHAINS, if its defining statement is
1796 ssa name, set it to phi node that defines it. */
1797
1798 static void
1799 replace_phis_by_defined_names (VEC (chain_p, heap) *chains)
1800 {
1801 chain_p chain;
1802 dref a;
1803 unsigned i, j;
1804
1805 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1806 for (j = 0; VEC_iterate (dref, chain->refs, j, a); j++)
1807 {
1808 gcc_assert (TREE_CODE (a->stmt) != SSA_NAME);
1809 if (TREE_CODE (a->stmt) == PHI_NODE)
1810 a->stmt = PHI_RESULT (a->stmt);
1811 }
1812 }
1813
1814 /* For each reference in CHAINS, if its defining statement is
1815 phi node, set it to the ssa name that is defined by it. */
1816
1817 static void
1818 replace_names_by_phis (VEC (chain_p, heap) *chains)
1819 {
1820 chain_p chain;
1821 dref a;
1822 unsigned i, j;
1823
1824 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1825 for (j = 0; VEC_iterate (dref, chain->refs, j, a); j++)
1826 if (TREE_CODE (a->stmt) == SSA_NAME)
1827 {
1828 a->stmt = SSA_NAME_DEF_STMT (a->stmt);
1829 gcc_assert (TREE_CODE (a->stmt) == PHI_NODE);
1830 }
1831 }
1832
1833 /* Wrapper over execute_pred_commoning, to pass it as a callback
1834 to tree_transform_and_unroll_loop. */
1835
1836 struct epcc_data
1837 {
1838 VEC (chain_p, heap) *chains;
1839 bitmap tmp_vars;
1840 };
1841
1842 static void
1843 execute_pred_commoning_cbck (struct loop *loop, void *data)
1844 {
1845 struct epcc_data *dta = data;
1846
1847 /* Restore phi nodes that were replaced by ssa names before
1848 tree_transform_and_unroll_loop (see detailed description in
1849 tree_predictive_commoning_loop). */
1850 replace_names_by_phis (dta->chains);
1851 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1852 }
1853
1854 /* Returns true if we can and should unroll LOOP FACTOR times. Number
1855 of iterations of the loop is returned in NITER. */
1856
1857 static bool
1858 should_unroll_loop_p (struct loop *loop, unsigned factor,
1859 struct tree_niter_desc *niter)
1860 {
1861 edge exit;
1862
1863 if (factor == 1)
1864 return false;
1865
1866 /* Check whether unrolling is possible. We only want to unroll loops
1867 for that we are able to determine number of iterations. We also
1868 want to split the extra iterations of the loop from its end,
1869 therefore we require that the loop has precisely one
1870 exit. */
1871
1872 exit = single_dom_exit (loop);
1873 if (!exit)
1874 return false;
1875
1876 if (!number_of_iterations_exit (loop, exit, niter, false))
1877 return false;
1878
1879 /* And of course, we must be able to duplicate the loop. */
1880 if (!can_duplicate_loop_p (loop))
1881 return false;
1882
1883 /* The final loop should be small enough. */
1884 if (tree_num_loop_insns (loop, &eni_size_weights) * factor
1885 > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
1886 return false;
1887
1888 return true;
1889 }
1890
1891 /* Base NAME and all the names in the chain of phi nodes that use it
1892 on variable VAR. The phi nodes are recognized by being in the copies of
1893 the header of the LOOP. */
1894
1895 static void
1896 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1897 {
1898 tree stmt, phi;
1899 imm_use_iterator iter;
1900 edge e;
1901
1902 SSA_NAME_VAR (name) = var;
1903
1904 while (1)
1905 {
1906 phi = NULL;
1907 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1908 {
1909 if (TREE_CODE (stmt) == PHI_NODE
1910 && flow_bb_inside_loop_p (loop, bb_for_stmt (stmt)))
1911 {
1912 phi = stmt;
1913 BREAK_FROM_IMM_USE_STMT (iter);
1914 }
1915 }
1916 if (!phi)
1917 return;
1918
1919 if (bb_for_stmt (phi) == loop->header)
1920 e = loop_latch_edge (loop);
1921 else
1922 e = single_pred_edge (bb_for_stmt (stmt));
1923
1924 name = PHI_RESULT (phi);
1925 SSA_NAME_VAR (name) = var;
1926 }
1927 }
1928
1929 /* Given an unrolled LOOP after predictive commoning, remove the
1930 register copies arising from phi nodes by changing the base
1931 variables of SSA names. TMP_VARS is the set of the temporary variables
1932 for those we want to perform this. */
1933
1934 static void
1935 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1936 {
1937 edge e;
1938 tree phi, name, use, var, stmt;
1939
1940 e = loop_latch_edge (loop);
1941 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1942 {
1943 name = PHI_RESULT (phi);
1944 var = SSA_NAME_VAR (name);
1945 if (!bitmap_bit_p (tmp_vars, DECL_UID (var)))
1946 continue;
1947 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1948 gcc_assert (TREE_CODE (use) == SSA_NAME);
1949
1950 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1951 stmt = SSA_NAME_DEF_STMT (use);
1952 while (TREE_CODE (stmt) == PHI_NODE
1953 /* In case we could not unroll the loop enough to eliminate
1954 all copies, we may reach the loop header before the defining
1955 statement (in that case, some register copies will be present
1956 in loop latch in the final code, corresponding to the newly
1957 created looparound phi nodes). */
1958 && bb_for_stmt (stmt) != loop->header)
1959 {
1960 gcc_assert (single_pred_p (bb_for_stmt (stmt)));
1961 use = PHI_ARG_DEF (stmt, 0);
1962 stmt = SSA_NAME_DEF_STMT (use);
1963 }
1964
1965 base_names_in_chain_on (loop, use, var);
1966 }
1967 }
1968
1969 /* Returns true if CHAIN is suitable to be combined. */
1970
1971 static bool
1972 chain_can_be_combined_p (chain_p chain)
1973 {
1974 return (!chain->combined
1975 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1976 }
1977
1978 /* Returns the modify statement that uses NAME. Skips over assignment
1979 statements, NAME is replaced with the actual name used in the returned
1980 statement. */
1981
1982 static tree
1983 find_use_stmt (tree *name)
1984 {
1985 tree stmt, rhs, lhs;
1986
1987 /* Skip over assignments. */
1988 while (1)
1989 {
1990 stmt = single_nonlooparound_use (*name);
1991 if (!stmt)
1992 return NULL_TREE;
1993
1994 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1995 return NULL_TREE;
1996
1997 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1998 if (TREE_CODE (lhs) != SSA_NAME)
1999 return NULL_TREE;
2000
2001 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2002 if (rhs != *name)
2003 break;
2004
2005 *name = lhs;
2006 }
2007
2008 if (!EXPR_P (rhs)
2009 || REFERENCE_CLASS_P (rhs)
2010 || TREE_CODE_LENGTH (TREE_CODE (rhs)) != 2)
2011 return NULL_TREE;
2012
2013 return stmt;
2014 }
2015
2016 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2017
2018 static bool
2019 may_reassociate_p (tree type, enum tree_code code)
2020 {
2021 if (FLOAT_TYPE_P (type)
2022 && !flag_unsafe_math_optimizations)
2023 return false;
2024
2025 return (commutative_tree_code (code)
2026 && associative_tree_code (code));
2027 }
2028
2029 /* If the operation used in STMT is associative and commutative, go through the
2030 tree of the same operations and returns its root. Distance to the root
2031 is stored in DISTANCE. */
2032
2033 static tree
2034 find_associative_operation_root (tree stmt, unsigned *distance)
2035 {
2036 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1), lhs, next;
2037 enum tree_code code = TREE_CODE (rhs);
2038 unsigned dist = 0;
2039
2040 if (!may_reassociate_p (TREE_TYPE (rhs), code))
2041 return NULL_TREE;
2042
2043 while (1)
2044 {
2045 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2046 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2047
2048 next = find_use_stmt (&lhs);
2049 if (!next)
2050 break;
2051
2052 rhs = GIMPLE_STMT_OPERAND (next, 1);
2053 if (TREE_CODE (rhs) != code)
2054 break;
2055
2056 stmt = next;
2057 dist++;
2058 }
2059
2060 if (distance)
2061 *distance = dist;
2062 return stmt;
2063 }
2064
2065 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2066 is no such statement, returns NULL_TREE. In case the operation used on
2067 NAME1 and NAME2 is associative and commutative, returns the root of the
2068 tree formed by this operation instead of the statement that uses NAME1 or
2069 NAME2. */
2070
2071 static tree
2072 find_common_use_stmt (tree *name1, tree *name2)
2073 {
2074 tree stmt1, stmt2;
2075
2076 stmt1 = find_use_stmt (name1);
2077 if (!stmt1)
2078 return NULL_TREE;
2079
2080 stmt2 = find_use_stmt (name2);
2081 if (!stmt2)
2082 return NULL_TREE;
2083
2084 if (stmt1 == stmt2)
2085 return stmt1;
2086
2087 stmt1 = find_associative_operation_root (stmt1, NULL);
2088 if (!stmt1)
2089 return NULL_TREE;
2090 stmt2 = find_associative_operation_root (stmt2, NULL);
2091 if (!stmt2)
2092 return NULL_TREE;
2093
2094 return (stmt1 == stmt2 ? stmt1 : NULL_TREE);
2095 }
2096
2097 /* Checks whether R1 and R2 are combined together using CODE, with the result
2098 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2099 if it is true. If CODE is ERROR_MARK, set these values instead. */
2100
2101 static bool
2102 combinable_refs_p (dref r1, dref r2,
2103 enum tree_code *code, bool *swap, tree *rslt_type)
2104 {
2105 enum tree_code acode;
2106 bool aswap;
2107 tree atype;
2108 tree name1, name2, stmt, rhs;
2109
2110 name1 = name_for_ref (r1);
2111 name2 = name_for_ref (r2);
2112 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2113
2114 stmt = find_common_use_stmt (&name1, &name2);
2115
2116 if (!stmt)
2117 return false;
2118
2119 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2120 acode = TREE_CODE (rhs);
2121 aswap = (!commutative_tree_code (acode)
2122 && TREE_OPERAND (rhs, 0) != name1);
2123 atype = TREE_TYPE (rhs);
2124
2125 if (*code == ERROR_MARK)
2126 {
2127 *code = acode;
2128 *swap = aswap;
2129 *rslt_type = atype;
2130 return true;
2131 }
2132
2133 return (*code == acode
2134 && *swap == aswap
2135 && *rslt_type == atype);
2136 }
2137
2138 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2139 an assignment of the remaining operand. */
2140
2141 static void
2142 remove_name_from_operation (tree stmt, tree op)
2143 {
2144 tree *rhs;
2145
2146 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2147
2148 rhs = &GIMPLE_STMT_OPERAND (stmt, 1);
2149 if (TREE_OPERAND (*rhs, 0) == op)
2150 *rhs = TREE_OPERAND (*rhs, 1);
2151 else if (TREE_OPERAND (*rhs, 1) == op)
2152 *rhs = TREE_OPERAND (*rhs, 0);
2153 else
2154 gcc_unreachable ();
2155 update_stmt (stmt);
2156 }
2157
2158 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2159 are combined in a single statement, and returns this statement. */
2160
2161 static tree
2162 reassociate_to_the_same_stmt (tree name1, tree name2)
2163 {
2164 tree stmt1, stmt2, root1, root2, r1, r2, s1, s2;
2165 tree new_stmt, tmp_stmt, new_name, tmp_name, var;
2166 unsigned dist1, dist2;
2167 enum tree_code code;
2168 tree type = TREE_TYPE (name1);
2169 block_stmt_iterator bsi;
2170
2171 stmt1 = find_use_stmt (&name1);
2172 stmt2 = find_use_stmt (&name2);
2173 root1 = find_associative_operation_root (stmt1, &dist1);
2174 root2 = find_associative_operation_root (stmt2, &dist2);
2175 code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt1, 1));
2176
2177 gcc_assert (root1 && root2 && root1 == root2
2178 && code == TREE_CODE (GIMPLE_STMT_OPERAND (stmt2, 1)));
2179
2180 /* Find the root of the nearest expression in that both NAME1 and NAME2
2181 are used. */
2182 r1 = name1;
2183 s1 = stmt1;
2184 r2 = name2;
2185 s2 = stmt2;
2186
2187 while (dist1 > dist2)
2188 {
2189 s1 = find_use_stmt (&r1);
2190 r1 = GIMPLE_STMT_OPERAND (s1, 0);
2191 dist1--;
2192 }
2193 while (dist2 > dist1)
2194 {
2195 s2 = find_use_stmt (&r2);
2196 r2 = GIMPLE_STMT_OPERAND (s2, 0);
2197 dist2--;
2198 }
2199
2200 while (s1 != s2)
2201 {
2202 s1 = find_use_stmt (&r1);
2203 r1 = GIMPLE_STMT_OPERAND (s1, 0);
2204 s2 = find_use_stmt (&r2);
2205 r2 = GIMPLE_STMT_OPERAND (s2, 0);
2206 }
2207
2208 /* Remove NAME1 and NAME2 from the statements in that they are used
2209 currently. */
2210 remove_name_from_operation (stmt1, name1);
2211 remove_name_from_operation (stmt2, name2);
2212
2213 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2214 combine it with the rhs of S1. */
2215 var = create_tmp_var (type, "predreastmp");
2216 add_referenced_var (var);
2217 new_name = make_ssa_name (var, NULL_TREE);
2218 new_stmt = build_gimple_modify_stmt (new_name,
2219 fold_build2 (code, type, name1, name2));
2220 SSA_NAME_DEF_STMT (new_name) = new_stmt;
2221
2222 var = create_tmp_var (type, "predreastmp");
2223 add_referenced_var (var);
2224 tmp_name = make_ssa_name (var, NULL_TREE);
2225 tmp_stmt = build_gimple_modify_stmt (tmp_name,
2226 GIMPLE_STMT_OPERAND (s1, 1));
2227 SSA_NAME_DEF_STMT (tmp_name) = tmp_stmt;
2228
2229 GIMPLE_STMT_OPERAND (s1, 1) = fold_build2 (code, type, new_name, tmp_name);
2230 update_stmt (s1);
2231
2232 bsi = bsi_for_stmt (s1);
2233 bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
2234 bsi_insert_before (&bsi, tmp_stmt, BSI_SAME_STMT);
2235
2236 return new_stmt;
2237 }
2238
2239 /* Returns the statement that combines references R1 and R2. In case R1
2240 and R2 are not used in the same statement, but they are used with an
2241 associative and commutative operation in the same expression, reassociate
2242 the expression so that they are used in the same statement. */
2243
2244 static tree
2245 stmt_combining_refs (dref r1, dref r2)
2246 {
2247 tree stmt1, stmt2;
2248 tree name1 = name_for_ref (r1);
2249 tree name2 = name_for_ref (r2);
2250
2251 stmt1 = find_use_stmt (&name1);
2252 stmt2 = find_use_stmt (&name2);
2253 if (stmt1 == stmt2)
2254 return stmt1;
2255
2256 return reassociate_to_the_same_stmt (name1, name2);
2257 }
2258
2259 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2260 description of the new chain is returned, otherwise we return NULL. */
2261
2262 static chain_p
2263 combine_chains (chain_p ch1, chain_p ch2)
2264 {
2265 dref r1, r2, nw;
2266 enum tree_code op = ERROR_MARK;
2267 bool swap = false;
2268 chain_p new_chain;
2269 unsigned i;
2270 tree root_stmt;
2271 tree rslt_type = NULL_TREE;
2272
2273 if (ch1 == ch2)
2274 return false;
2275 if (ch1->length != ch2->length)
2276 return NULL;
2277
2278 if (VEC_length (dref, ch1->refs) != VEC_length (dref, ch2->refs))
2279 return NULL;
2280
2281 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
2282 && VEC_iterate (dref, ch2->refs, i, r2)); i++)
2283 {
2284 if (r1->distance != r2->distance)
2285 return NULL;
2286
2287 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2288 return NULL;
2289 }
2290
2291 if (swap)
2292 {
2293 chain_p tmp = ch1;
2294 ch1 = ch2;
2295 ch2 = tmp;
2296 }
2297
2298 new_chain = XCNEW (struct chain);
2299 new_chain->type = CT_COMBINATION;
2300 new_chain->operator = op;
2301 new_chain->ch1 = ch1;
2302 new_chain->ch2 = ch2;
2303 new_chain->rslt_type = rslt_type;
2304 new_chain->length = ch1->length;
2305
2306 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
2307 && VEC_iterate (dref, ch2->refs, i, r2)); i++)
2308 {
2309 nw = XCNEW (struct dref);
2310 nw->stmt = stmt_combining_refs (r1, r2);
2311 nw->distance = r1->distance;
2312
2313 VEC_safe_push (dref, heap, new_chain->refs, nw);
2314 }
2315
2316 new_chain->has_max_use_after = false;
2317 root_stmt = get_chain_root (new_chain)->stmt;
2318 for (i = 1; VEC_iterate (dref, new_chain->refs, i, nw); i++)
2319 {
2320 if (nw->distance == new_chain->length
2321 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2322 {
2323 new_chain->has_max_use_after = true;
2324 break;
2325 }
2326 }
2327
2328 ch1->combined = true;
2329 ch2->combined = true;
2330 return new_chain;
2331 }
2332
2333 /* Try to combine the CHAINS. */
2334
2335 static void
2336 try_combine_chains (VEC (chain_p, heap) **chains)
2337 {
2338 unsigned i, j;
2339 chain_p ch1, ch2, cch;
2340 VEC (chain_p, heap) *worklist = NULL;
2341
2342 for (i = 0; VEC_iterate (chain_p, *chains, i, ch1); i++)
2343 if (chain_can_be_combined_p (ch1))
2344 VEC_safe_push (chain_p, heap, worklist, ch1);
2345
2346 while (!VEC_empty (chain_p, worklist))
2347 {
2348 ch1 = VEC_pop (chain_p, worklist);
2349 if (!chain_can_be_combined_p (ch1))
2350 continue;
2351
2352 for (j = 0; VEC_iterate (chain_p, *chains, j, ch2); j++)
2353 {
2354 if (!chain_can_be_combined_p (ch2))
2355 continue;
2356
2357 cch = combine_chains (ch1, ch2);
2358 if (cch)
2359 {
2360 VEC_safe_push (chain_p, heap, worklist, cch);
2361 VEC_safe_push (chain_p, heap, *chains, cch);
2362 break;
2363 }
2364 }
2365 }
2366 }
2367
2368 /* Sets alias information based on data reference DR for REF,
2369 if necessary. */
2370
2371 static void
2372 set_alias_info (tree ref, struct data_reference *dr)
2373 {
2374 tree var;
2375 tree tag = DR_SYMBOL_TAG (dr);
2376
2377 gcc_assert (tag != NULL_TREE);
2378
2379 ref = get_base_address (ref);
2380 if (!ref || !INDIRECT_REF_P (ref))
2381 return;
2382
2383 var = SSA_NAME_VAR (TREE_OPERAND (ref, 0));
2384 if (var_ann (var)->symbol_mem_tag)
2385 return;
2386
2387 if (!MTAG_P (tag))
2388 new_type_alias (var, tag, ref);
2389 else
2390 var_ann (var)->symbol_mem_tag = tag;
2391
2392 var_ann (var)->subvars = DR_SUBVARS (dr);
2393 }
2394
2395 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2396 impossible because one of these initializers may trap, true otherwise. */
2397
2398 static bool
2399 prepare_initializers_chain (struct loop *loop, chain_p chain)
2400 {
2401 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2402 struct data_reference *dr = get_chain_root (chain)->ref;
2403 tree init, stmts;
2404 dref laref;
2405 edge entry = loop_preheader_edge (loop);
2406
2407 /* Find the initializers for the variables, and check that they cannot
2408 trap. */
2409 chain->inits = VEC_alloc (tree, heap, n);
2410 for (i = 0; i < n; i++)
2411 VEC_quick_push (tree, chain->inits, NULL_TREE);
2412
2413 /* If we have replaced some looparound phi nodes, use their initializers
2414 instead of creating our own. */
2415 for (i = 0; VEC_iterate (dref, chain->refs, i, laref); i++)
2416 {
2417 if (TREE_CODE (laref->stmt) != PHI_NODE)
2418 continue;
2419
2420 gcc_assert (laref->distance > 0);
2421 VEC_replace (tree, chain->inits, n - laref->distance,
2422 PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry));
2423 }
2424
2425 for (i = 0; i < n; i++)
2426 {
2427 if (VEC_index (tree, chain->inits, i) != NULL_TREE)
2428 continue;
2429
2430 init = ref_at_iteration (loop, DR_REF (dr), (int) i - n);
2431 if (!init)
2432 return false;
2433
2434 if (!chain->all_always_accessed && tree_could_trap_p (init))
2435 return false;
2436
2437 init = force_gimple_operand (init, &stmts, false, NULL_TREE);
2438 if (stmts)
2439 {
2440 mark_virtual_ops_for_renaming_list (stmts);
2441 bsi_insert_on_edge_immediate (entry, stmts);
2442 }
2443 set_alias_info (init, dr);
2444
2445 VEC_replace (tree, chain->inits, i, init);
2446 }
2447
2448 return true;
2449 }
2450
2451 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2452 be used because the initializers might trap. */
2453
2454 static void
2455 prepare_initializers (struct loop *loop, VEC (chain_p, heap) *chains)
2456 {
2457 chain_p chain;
2458 unsigned i;
2459
2460 for (i = 0; i < VEC_length (chain_p, chains); )
2461 {
2462 chain = VEC_index (chain_p, chains, i);
2463 if (prepare_initializers_chain (loop, chain))
2464 i++;
2465 else
2466 {
2467 release_chain (chain);
2468 VEC_unordered_remove (chain_p, chains, i);
2469 }
2470 }
2471 }
2472
2473 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2474 unrolled. */
2475
2476 static bool
2477 tree_predictive_commoning_loop (struct loop *loop)
2478 {
2479 VEC (data_reference_p, heap) *datarefs;
2480 VEC (ddr_p, heap) *dependences;
2481 struct component *components;
2482 VEC (chain_p, heap) *chains = NULL;
2483 unsigned unroll_factor;
2484 struct tree_niter_desc desc;
2485 bool unroll = false;
2486 edge exit;
2487 bitmap tmp_vars;
2488
2489 if (dump_file && (dump_flags & TDF_DETAILS))
2490 fprintf (dump_file, "Processing loop %d\n", loop->num);
2491
2492 /* Find the data references and split them into components according to their
2493 dependence relations. */
2494 datarefs = VEC_alloc (data_reference_p, heap, 10);
2495 dependences = VEC_alloc (ddr_p, heap, 10);
2496 compute_data_dependences_for_loop (loop, true, &datarefs, &dependences);
2497 if (dump_file && (dump_flags & TDF_DETAILS))
2498 dump_data_dependence_relations (dump_file, dependences);
2499
2500 components = split_data_refs_to_components (loop, datarefs, dependences);
2501 free_dependence_relations (dependences);
2502 if (!components)
2503 {
2504 free_data_refs (datarefs);
2505 return false;
2506 }
2507
2508 if (dump_file && (dump_flags & TDF_DETAILS))
2509 {
2510 fprintf (dump_file, "Initial state:\n\n");
2511 dump_components (dump_file, components);
2512 }
2513
2514 /* Find the suitable components and split them into chains. */
2515 components = filter_suitable_components (loop, components);
2516
2517 tmp_vars = BITMAP_ALLOC (NULL);
2518 looparound_phis = BITMAP_ALLOC (NULL);
2519 determine_roots (loop, components, &chains);
2520 release_components (components);
2521
2522 if (!chains)
2523 {
2524 if (dump_file && (dump_flags & TDF_DETAILS))
2525 fprintf (dump_file,
2526 "Predictive commoning failed: no suitable chains\n");
2527 goto end;
2528 }
2529 prepare_initializers (loop, chains);
2530
2531 /* Try to combine the chains that are always worked with together. */
2532 try_combine_chains (&chains);
2533
2534 if (dump_file && (dump_flags & TDF_DETAILS))
2535 {
2536 fprintf (dump_file, "Before commoning:\n\n");
2537 dump_chains (dump_file, chains);
2538 }
2539
2540 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2541 that its number of iterations is divisible by the factor. */
2542 unroll_factor = determine_unroll_factor (chains);
2543 scev_reset ();
2544 unroll = should_unroll_loop_p (loop, unroll_factor, &desc);
2545 exit = single_dom_exit (loop);
2546
2547 /* Execute the predictive commoning transformations, and possibly unroll the
2548 loop. */
2549 if (unroll)
2550 {
2551 struct epcc_data dta;
2552
2553 if (dump_file && (dump_flags & TDF_DETAILS))
2554 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2555
2556 dta.chains = chains;
2557 dta.tmp_vars = tmp_vars;
2558
2559 update_ssa (TODO_update_ssa_only_virtuals);
2560
2561 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2562 execute_pred_commoning_cbck is called may cause phi nodes to be
2563 reallocated, which is a problem since CHAINS may point to these
2564 statements. To fix this, we store the ssa names defined by the
2565 phi nodes here instead of the phi nodes themselves, and restore
2566 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2567 replace_phis_by_defined_names (chains);
2568
2569 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2570 execute_pred_commoning_cbck, &dta);
2571 eliminate_temp_copies (loop, tmp_vars);
2572 }
2573 else
2574 {
2575 if (dump_file && (dump_flags & TDF_DETAILS))
2576 fprintf (dump_file,
2577 "Executing predictive commoning without unrolling.\n");
2578 execute_pred_commoning (loop, chains, tmp_vars);
2579 }
2580
2581 end: ;
2582 release_chains (chains);
2583 free_data_refs (datarefs);
2584 BITMAP_FREE (tmp_vars);
2585 BITMAP_FREE (looparound_phis);
2586
2587 free_affine_expand_cache (&name_expansions);
2588
2589 return unroll;
2590 }
2591
2592 /* Runs predictive commoning. */
2593
2594 unsigned
2595 tree_predictive_commoning (void)
2596 {
2597 bool unrolled = false;
2598 struct loop *loop;
2599 loop_iterator li;
2600 unsigned ret = 0;
2601
2602 initialize_original_copy_tables ();
2603 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
2604 {
2605 unrolled |= tree_predictive_commoning_loop (loop);
2606 }
2607
2608 if (unrolled)
2609 {
2610 scev_reset ();
2611 ret = TODO_cleanup_cfg;
2612 }
2613 free_original_copy_tables ();
2614
2615 return ret;
2616 }