]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-predcom.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-predcom.c
1 /* Predictive commoning.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
161 {
162 a[i] = 1;
163 a[i+2] = 2;
164 }
165
166 can be replaced with
167
168 t0 = a[0];
169 t1 = a[1];
170 for (i = 0; i < n; i++)
171 {
172 a[i] = 1;
173 t2 = 2;
174 t0 = t1;
175 t1 = t2;
176 }
177 a[n] = t0;
178 a[n+1] = t1;
179
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
182
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
186
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "backend.h"
191 #include "rtl.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "predict.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "gimple-pretty-print.h"
198 #include "alias.h"
199 #include "fold-const.h"
200 #include "cfgloop.h"
201 #include "tree-eh.h"
202 #include "gimplify.h"
203 #include "gimple-iterator.h"
204 #include "gimplify-me.h"
205 #include "tree-ssa-loop-ivopts.h"
206 #include "tree-ssa-loop-manip.h"
207 #include "tree-ssa-loop-niter.h"
208 #include "tree-ssa-loop.h"
209 #include "tree-into-ssa.h"
210 #include "tree-dfa.h"
211 #include "tree-ssa.h"
212 #include "tree-data-ref.h"
213 #include "tree-scalar-evolution.h"
214 #include "params.h"
215 #include "tree-affine.h"
216 #include "builtins.h"
217
218 /* The maximum number of iterations between the considered memory
219 references. */
220
221 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
222
223 /* Data references (or phi nodes that carry data reference values across
224 loop iterations). */
225
226 typedef struct dref_d
227 {
228 /* The reference itself. */
229 struct data_reference *ref;
230
231 /* The statement in that the reference appears. */
232 gimple *stmt;
233
234 /* In case that STMT is a phi node, this field is set to the SSA name
235 defined by it in replace_phis_by_defined_names (in order to avoid
236 pointing to phi node that got reallocated in the meantime). */
237 tree name_defined_by_phi;
238
239 /* Distance of the reference from the root of the chain (in number of
240 iterations of the loop). */
241 unsigned distance;
242
243 /* Number of iterations offset from the first reference in the component. */
244 widest_int offset;
245
246 /* Number of the reference in a component, in dominance ordering. */
247 unsigned pos;
248
249 /* True if the memory reference is always accessed when the loop is
250 entered. */
251 unsigned always_accessed : 1;
252 } *dref;
253
254
255 /* Type of the chain of the references. */
256
257 enum chain_type
258 {
259 /* The addresses of the references in the chain are constant. */
260 CT_INVARIANT,
261
262 /* There are only loads in the chain. */
263 CT_LOAD,
264
265 /* Root of the chain is store, the rest are loads. */
266 CT_STORE_LOAD,
267
268 /* A combination of two chains. */
269 CT_COMBINATION
270 };
271
272 /* Chains of data references. */
273
274 typedef struct chain
275 {
276 /* Type of the chain. */
277 enum chain_type type;
278
279 /* For combination chains, the operator and the two chains that are
280 combined, and the type of the result. */
281 enum tree_code op;
282 tree rslt_type;
283 struct chain *ch1, *ch2;
284
285 /* The references in the chain. */
286 vec<dref> refs;
287
288 /* The maximum distance of the reference in the chain from the root. */
289 unsigned length;
290
291 /* The variables used to copy the value throughout iterations. */
292 vec<tree> vars;
293
294 /* Initializers for the variables. */
295 vec<tree> inits;
296
297 /* True if there is a use of a variable with the maximal distance
298 that comes after the root in the loop. */
299 unsigned has_max_use_after : 1;
300
301 /* True if all the memory references in the chain are always accessed. */
302 unsigned all_always_accessed : 1;
303
304 /* True if this chain was combined together with some other chain. */
305 unsigned combined : 1;
306 } *chain_p;
307
308
309 /* Describes the knowledge about the step of the memory references in
310 the component. */
311
312 enum ref_step_type
313 {
314 /* The step is zero. */
315 RS_INVARIANT,
316
317 /* The step is nonzero. */
318 RS_NONZERO,
319
320 /* The step may or may not be nonzero. */
321 RS_ANY
322 };
323
324 /* Components of the data dependence graph. */
325
326 struct component
327 {
328 /* The references in the component. */
329 vec<dref> refs;
330
331 /* What we know about the step of the references in the component. */
332 enum ref_step_type comp_step;
333
334 /* Next component in the list. */
335 struct component *next;
336 };
337
338 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
339
340 static bitmap looparound_phis;
341
342 /* Cache used by tree_to_aff_combination_expand. */
343
344 static hash_map<tree, name_expansion *> *name_expansions;
345
346 /* Dumps data reference REF to FILE. */
347
348 extern void dump_dref (FILE *, dref);
349 void
350 dump_dref (FILE *file, dref ref)
351 {
352 if (ref->ref)
353 {
354 fprintf (file, " ");
355 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
356 fprintf (file, " (id %u%s)\n", ref->pos,
357 DR_IS_READ (ref->ref) ? "" : ", write");
358
359 fprintf (file, " offset ");
360 print_decs (ref->offset, file);
361 fprintf (file, "\n");
362
363 fprintf (file, " distance %u\n", ref->distance);
364 }
365 else
366 {
367 if (gimple_code (ref->stmt) == GIMPLE_PHI)
368 fprintf (file, " looparound ref\n");
369 else
370 fprintf (file, " combination ref\n");
371 fprintf (file, " in statement ");
372 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
373 fprintf (file, "\n");
374 fprintf (file, " distance %u\n", ref->distance);
375 }
376
377 }
378
379 /* Dumps CHAIN to FILE. */
380
381 extern void dump_chain (FILE *, chain_p);
382 void
383 dump_chain (FILE *file, chain_p chain)
384 {
385 dref a;
386 const char *chain_type;
387 unsigned i;
388 tree var;
389
390 switch (chain->type)
391 {
392 case CT_INVARIANT:
393 chain_type = "Load motion";
394 break;
395
396 case CT_LOAD:
397 chain_type = "Loads-only";
398 break;
399
400 case CT_STORE_LOAD:
401 chain_type = "Store-loads";
402 break;
403
404 case CT_COMBINATION:
405 chain_type = "Combination";
406 break;
407
408 default:
409 gcc_unreachable ();
410 }
411
412 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
413 chain->combined ? " (combined)" : "");
414 if (chain->type != CT_INVARIANT)
415 fprintf (file, " max distance %u%s\n", chain->length,
416 chain->has_max_use_after ? "" : ", may reuse first");
417
418 if (chain->type == CT_COMBINATION)
419 {
420 fprintf (file, " equal to %p %s %p in type ",
421 (void *) chain->ch1, op_symbol_code (chain->op),
422 (void *) chain->ch2);
423 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
424 fprintf (file, "\n");
425 }
426
427 if (chain->vars.exists ())
428 {
429 fprintf (file, " vars");
430 FOR_EACH_VEC_ELT (chain->vars, i, var)
431 {
432 fprintf (file, " ");
433 print_generic_expr (file, var, TDF_SLIM);
434 }
435 fprintf (file, "\n");
436 }
437
438 if (chain->inits.exists ())
439 {
440 fprintf (file, " inits");
441 FOR_EACH_VEC_ELT (chain->inits, i, var)
442 {
443 fprintf (file, " ");
444 print_generic_expr (file, var, TDF_SLIM);
445 }
446 fprintf (file, "\n");
447 }
448
449 fprintf (file, " references:\n");
450 FOR_EACH_VEC_ELT (chain->refs, i, a)
451 dump_dref (file, a);
452
453 fprintf (file, "\n");
454 }
455
456 /* Dumps CHAINS to FILE. */
457
458 extern void dump_chains (FILE *, vec<chain_p> );
459 void
460 dump_chains (FILE *file, vec<chain_p> chains)
461 {
462 chain_p chain;
463 unsigned i;
464
465 FOR_EACH_VEC_ELT (chains, i, chain)
466 dump_chain (file, chain);
467 }
468
469 /* Dumps COMP to FILE. */
470
471 extern void dump_component (FILE *, struct component *);
472 void
473 dump_component (FILE *file, struct component *comp)
474 {
475 dref a;
476 unsigned i;
477
478 fprintf (file, "Component%s:\n",
479 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
480 FOR_EACH_VEC_ELT (comp->refs, i, a)
481 dump_dref (file, a);
482 fprintf (file, "\n");
483 }
484
485 /* Dumps COMPS to FILE. */
486
487 extern void dump_components (FILE *, struct component *);
488 void
489 dump_components (FILE *file, struct component *comps)
490 {
491 struct component *comp;
492
493 for (comp = comps; comp; comp = comp->next)
494 dump_component (file, comp);
495 }
496
497 /* Frees a chain CHAIN. */
498
499 static void
500 release_chain (chain_p chain)
501 {
502 dref ref;
503 unsigned i;
504
505 if (chain == NULL)
506 return;
507
508 FOR_EACH_VEC_ELT (chain->refs, i, ref)
509 free (ref);
510
511 chain->refs.release ();
512 chain->vars.release ();
513 chain->inits.release ();
514
515 free (chain);
516 }
517
518 /* Frees CHAINS. */
519
520 static void
521 release_chains (vec<chain_p> chains)
522 {
523 unsigned i;
524 chain_p chain;
525
526 FOR_EACH_VEC_ELT (chains, i, chain)
527 release_chain (chain);
528 chains.release ();
529 }
530
531 /* Frees a component COMP. */
532
533 static void
534 release_component (struct component *comp)
535 {
536 comp->refs.release ();
537 free (comp);
538 }
539
540 /* Frees list of components COMPS. */
541
542 static void
543 release_components (struct component *comps)
544 {
545 struct component *act, *next;
546
547 for (act = comps; act; act = next)
548 {
549 next = act->next;
550 release_component (act);
551 }
552 }
553
554 /* Finds a root of tree given by FATHERS containing A, and performs path
555 shortening. */
556
557 static unsigned
558 component_of (unsigned fathers[], unsigned a)
559 {
560 unsigned root, n;
561
562 for (root = a; root != fathers[root]; root = fathers[root])
563 continue;
564
565 for (; a != root; a = n)
566 {
567 n = fathers[a];
568 fathers[a] = root;
569 }
570
571 return root;
572 }
573
574 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
575 components, A and B are components to merge. */
576
577 static void
578 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
579 {
580 unsigned ca = component_of (fathers, a);
581 unsigned cb = component_of (fathers, b);
582
583 if (ca == cb)
584 return;
585
586 if (sizes[ca] < sizes[cb])
587 {
588 sizes[cb] += sizes[ca];
589 fathers[ca] = cb;
590 }
591 else
592 {
593 sizes[ca] += sizes[cb];
594 fathers[cb] = ca;
595 }
596 }
597
598 /* Returns true if A is a reference that is suitable for predictive commoning
599 in the innermost loop that contains it. REF_STEP is set according to the
600 step of the reference A. */
601
602 static bool
603 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
604 {
605 tree ref = DR_REF (a), step = DR_STEP (a);
606
607 if (!step
608 || TREE_THIS_VOLATILE (ref)
609 || !is_gimple_reg_type (TREE_TYPE (ref))
610 || tree_could_throw_p (ref))
611 return false;
612
613 if (integer_zerop (step))
614 *ref_step = RS_INVARIANT;
615 else if (integer_nonzerop (step))
616 *ref_step = RS_NONZERO;
617 else
618 *ref_step = RS_ANY;
619
620 return true;
621 }
622
623 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
624
625 static void
626 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
627 {
628 tree type = TREE_TYPE (DR_OFFSET (dr));
629 aff_tree delta;
630
631 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
632 &name_expansions);
633 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
634 aff_combination_add (offset, &delta);
635 }
636
637 /* Determines number of iterations of the innermost enclosing loop before B
638 refers to exactly the same location as A and stores it to OFF. If A and
639 B do not have the same step, they never meet, or anything else fails,
640 returns false, otherwise returns true. Both A and B are assumed to
641 satisfy suitable_reference_p. */
642
643 static bool
644 determine_offset (struct data_reference *a, struct data_reference *b,
645 widest_int *off)
646 {
647 aff_tree diff, baseb, step;
648 tree typea, typeb;
649
650 /* Check that both the references access the location in the same type. */
651 typea = TREE_TYPE (DR_REF (a));
652 typeb = TREE_TYPE (DR_REF (b));
653 if (!useless_type_conversion_p (typeb, typea))
654 return false;
655
656 /* Check whether the base address and the step of both references is the
657 same. */
658 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
659 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
660 return false;
661
662 if (integer_zerop (DR_STEP (a)))
663 {
664 /* If the references have loop invariant address, check that they access
665 exactly the same location. */
666 *off = 0;
667 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
668 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
669 }
670
671 /* Compare the offsets of the addresses, and check whether the difference
672 is a multiple of step. */
673 aff_combination_dr_offset (a, &diff);
674 aff_combination_dr_offset (b, &baseb);
675 aff_combination_scale (&baseb, -1);
676 aff_combination_add (&diff, &baseb);
677
678 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
679 &step, &name_expansions);
680 return aff_combination_constant_multiple_p (&diff, &step, off);
681 }
682
683 /* Returns the last basic block in LOOP for that we are sure that
684 it is executed whenever the loop is entered. */
685
686 static basic_block
687 last_always_executed_block (struct loop *loop)
688 {
689 unsigned i;
690 vec<edge> exits = get_loop_exit_edges (loop);
691 edge ex;
692 basic_block last = loop->latch;
693
694 FOR_EACH_VEC_ELT (exits, i, ex)
695 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
696 exits.release ();
697
698 return last;
699 }
700
701 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
702
703 static struct component *
704 split_data_refs_to_components (struct loop *loop,
705 vec<data_reference_p> datarefs,
706 vec<ddr_p> depends)
707 {
708 unsigned i, n = datarefs.length ();
709 unsigned ca, ia, ib, bad;
710 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
711 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
712 struct component **comps;
713 struct data_reference *dr, *dra, *drb;
714 struct data_dependence_relation *ddr;
715 struct component *comp_list = NULL, *comp;
716 dref dataref;
717 basic_block last_always_executed = last_always_executed_block (loop);
718
719 FOR_EACH_VEC_ELT (datarefs, i, dr)
720 {
721 if (!DR_REF (dr))
722 {
723 /* A fake reference for call or asm_expr that may clobber memory;
724 just fail. */
725 goto end;
726 }
727 /* predcom pass isn't prepared to handle calls with data references. */
728 if (is_gimple_call (DR_STMT (dr)))
729 goto end;
730 dr->aux = (void *) (size_t) i;
731 comp_father[i] = i;
732 comp_size[i] = 1;
733 }
734
735 /* A component reserved for the "bad" data references. */
736 comp_father[n] = n;
737 comp_size[n] = 1;
738
739 FOR_EACH_VEC_ELT (datarefs, i, dr)
740 {
741 enum ref_step_type dummy;
742
743 if (!suitable_reference_p (dr, &dummy))
744 {
745 ia = (unsigned) (size_t) dr->aux;
746 merge_comps (comp_father, comp_size, n, ia);
747 }
748 }
749
750 FOR_EACH_VEC_ELT (depends, i, ddr)
751 {
752 widest_int dummy_off;
753
754 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
755 continue;
756
757 dra = DDR_A (ddr);
758 drb = DDR_B (ddr);
759 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
760 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
761 if (ia == ib)
762 continue;
763
764 bad = component_of (comp_father, n);
765
766 /* If both A and B are reads, we may ignore unsuitable dependences. */
767 if (DR_IS_READ (dra) && DR_IS_READ (drb))
768 {
769 if (ia == bad || ib == bad
770 || !determine_offset (dra, drb, &dummy_off))
771 continue;
772 }
773 /* If A is read and B write or vice versa and there is unsuitable
774 dependence, instead of merging both components into a component
775 that will certainly not pass suitable_component_p, just put the
776 read into bad component, perhaps at least the write together with
777 all the other data refs in it's component will be optimizable. */
778 else if (DR_IS_READ (dra) && ib != bad)
779 {
780 if (ia == bad)
781 continue;
782 else if (!determine_offset (dra, drb, &dummy_off))
783 {
784 merge_comps (comp_father, comp_size, bad, ia);
785 continue;
786 }
787 }
788 else if (DR_IS_READ (drb) && ia != bad)
789 {
790 if (ib == bad)
791 continue;
792 else if (!determine_offset (dra, drb, &dummy_off))
793 {
794 merge_comps (comp_father, comp_size, bad, ib);
795 continue;
796 }
797 }
798
799 merge_comps (comp_father, comp_size, ia, ib);
800 }
801
802 comps = XCNEWVEC (struct component *, n);
803 bad = component_of (comp_father, n);
804 FOR_EACH_VEC_ELT (datarefs, i, dr)
805 {
806 ia = (unsigned) (size_t) dr->aux;
807 ca = component_of (comp_father, ia);
808 if (ca == bad)
809 continue;
810
811 comp = comps[ca];
812 if (!comp)
813 {
814 comp = XCNEW (struct component);
815 comp->refs.create (comp_size[ca]);
816 comps[ca] = comp;
817 }
818
819 dataref = XCNEW (struct dref_d);
820 dataref->ref = dr;
821 dataref->stmt = DR_STMT (dr);
822 dataref->offset = 0;
823 dataref->distance = 0;
824
825 dataref->always_accessed
826 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
827 gimple_bb (dataref->stmt));
828 dataref->pos = comp->refs.length ();
829 comp->refs.quick_push (dataref);
830 }
831
832 for (i = 0; i < n; i++)
833 {
834 comp = comps[i];
835 if (comp)
836 {
837 comp->next = comp_list;
838 comp_list = comp;
839 }
840 }
841 free (comps);
842
843 end:
844 free (comp_father);
845 free (comp_size);
846 return comp_list;
847 }
848
849 /* Returns true if the component COMP satisfies the conditions
850 described in 2) at the beginning of this file. LOOP is the current
851 loop. */
852
853 static bool
854 suitable_component_p (struct loop *loop, struct component *comp)
855 {
856 unsigned i;
857 dref a, first;
858 basic_block ba, bp = loop->header;
859 bool ok, has_write = false;
860
861 FOR_EACH_VEC_ELT (comp->refs, i, a)
862 {
863 ba = gimple_bb (a->stmt);
864
865 if (!just_once_each_iteration_p (loop, ba))
866 return false;
867
868 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
869 bp = ba;
870
871 if (DR_IS_WRITE (a->ref))
872 has_write = true;
873 }
874
875 first = comp->refs[0];
876 ok = suitable_reference_p (first->ref, &comp->comp_step);
877 gcc_assert (ok);
878 first->offset = 0;
879
880 for (i = 1; comp->refs.iterate (i, &a); i++)
881 {
882 if (!determine_offset (first->ref, a->ref, &a->offset))
883 return false;
884
885 enum ref_step_type a_step;
886 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
887 && a_step == comp->comp_step);
888 }
889
890 /* If there is a write inside the component, we must know whether the
891 step is nonzero or not -- we would not otherwise be able to recognize
892 whether the value accessed by reads comes from the OFFSET-th iteration
893 or the previous one. */
894 if (has_write && comp->comp_step == RS_ANY)
895 return false;
896
897 return true;
898 }
899
900 /* Check the conditions on references inside each of components COMPS,
901 and remove the unsuitable components from the list. The new list
902 of components is returned. The conditions are described in 2) at
903 the beginning of this file. LOOP is the current loop. */
904
905 static struct component *
906 filter_suitable_components (struct loop *loop, struct component *comps)
907 {
908 struct component **comp, *act;
909
910 for (comp = &comps; *comp; )
911 {
912 act = *comp;
913 if (suitable_component_p (loop, act))
914 comp = &act->next;
915 else
916 {
917 dref ref;
918 unsigned i;
919
920 *comp = act->next;
921 FOR_EACH_VEC_ELT (act->refs, i, ref)
922 free (ref);
923 release_component (act);
924 }
925 }
926
927 return comps;
928 }
929
930 /* Compares two drefs A and B by their offset and position. Callback for
931 qsort. */
932
933 static int
934 order_drefs (const void *a, const void *b)
935 {
936 const dref *const da = (const dref *) a;
937 const dref *const db = (const dref *) b;
938 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
939
940 if (offcmp != 0)
941 return offcmp;
942
943 return (*da)->pos - (*db)->pos;
944 }
945
946 /* Returns root of the CHAIN. */
947
948 static inline dref
949 get_chain_root (chain_p chain)
950 {
951 return chain->refs[0];
952 }
953
954 /* Adds REF to the chain CHAIN. */
955
956 static void
957 add_ref_to_chain (chain_p chain, dref ref)
958 {
959 dref root = get_chain_root (chain);
960
961 gcc_assert (wi::les_p (root->offset, ref->offset));
962 widest_int dist = ref->offset - root->offset;
963 if (wi::leu_p (MAX_DISTANCE, dist))
964 {
965 free (ref);
966 return;
967 }
968 gcc_assert (wi::fits_uhwi_p (dist));
969
970 chain->refs.safe_push (ref);
971
972 ref->distance = dist.to_uhwi ();
973
974 if (ref->distance >= chain->length)
975 {
976 chain->length = ref->distance;
977 chain->has_max_use_after = false;
978 }
979
980 if (ref->distance == chain->length
981 && ref->pos > root->pos)
982 chain->has_max_use_after = true;
983
984 chain->all_always_accessed &= ref->always_accessed;
985 }
986
987 /* Returns the chain for invariant component COMP. */
988
989 static chain_p
990 make_invariant_chain (struct component *comp)
991 {
992 chain_p chain = XCNEW (struct chain);
993 unsigned i;
994 dref ref;
995
996 chain->type = CT_INVARIANT;
997
998 chain->all_always_accessed = true;
999
1000 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1001 {
1002 chain->refs.safe_push (ref);
1003 chain->all_always_accessed &= ref->always_accessed;
1004 }
1005
1006 return chain;
1007 }
1008
1009 /* Make a new chain rooted at REF. */
1010
1011 static chain_p
1012 make_rooted_chain (dref ref)
1013 {
1014 chain_p chain = XCNEW (struct chain);
1015
1016 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1017
1018 chain->refs.safe_push (ref);
1019 chain->all_always_accessed = ref->always_accessed;
1020
1021 ref->distance = 0;
1022
1023 return chain;
1024 }
1025
1026 /* Returns true if CHAIN is not trivial. */
1027
1028 static bool
1029 nontrivial_chain_p (chain_p chain)
1030 {
1031 return chain != NULL && chain->refs.length () > 1;
1032 }
1033
1034 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1035 is no such name. */
1036
1037 static tree
1038 name_for_ref (dref ref)
1039 {
1040 tree name;
1041
1042 if (is_gimple_assign (ref->stmt))
1043 {
1044 if (!ref->ref || DR_IS_READ (ref->ref))
1045 name = gimple_assign_lhs (ref->stmt);
1046 else
1047 name = gimple_assign_rhs1 (ref->stmt);
1048 }
1049 else
1050 name = PHI_RESULT (ref->stmt);
1051
1052 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1053 }
1054
1055 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1056 iterations of the innermost enclosing loop). */
1057
1058 static bool
1059 valid_initializer_p (struct data_reference *ref,
1060 unsigned distance, struct data_reference *root)
1061 {
1062 aff_tree diff, base, step;
1063 widest_int off;
1064
1065 /* Both REF and ROOT must be accessing the same object. */
1066 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1067 return false;
1068
1069 /* The initializer is defined outside of loop, hence its address must be
1070 invariant inside the loop. */
1071 gcc_assert (integer_zerop (DR_STEP (ref)));
1072
1073 /* If the address of the reference is invariant, initializer must access
1074 exactly the same location. */
1075 if (integer_zerop (DR_STEP (root)))
1076 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1077 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1078
1079 /* Verify that this index of REF is equal to the root's index at
1080 -DISTANCE-th iteration. */
1081 aff_combination_dr_offset (root, &diff);
1082 aff_combination_dr_offset (ref, &base);
1083 aff_combination_scale (&base, -1);
1084 aff_combination_add (&diff, &base);
1085
1086 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1087 &step, &name_expansions);
1088 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1089 return false;
1090
1091 if (off != distance)
1092 return false;
1093
1094 return true;
1095 }
1096
1097 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1098 initial value is correct (equal to initial value of REF shifted by one
1099 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1100 is the root of the current chain. */
1101
1102 static gphi *
1103 find_looparound_phi (struct loop *loop, dref ref, dref root)
1104 {
1105 tree name, init, init_ref;
1106 gphi *phi = NULL;
1107 gimple *init_stmt;
1108 edge latch = loop_latch_edge (loop);
1109 struct data_reference init_dr;
1110 gphi_iterator psi;
1111
1112 if (is_gimple_assign (ref->stmt))
1113 {
1114 if (DR_IS_READ (ref->ref))
1115 name = gimple_assign_lhs (ref->stmt);
1116 else
1117 name = gimple_assign_rhs1 (ref->stmt);
1118 }
1119 else
1120 name = PHI_RESULT (ref->stmt);
1121 if (!name)
1122 return NULL;
1123
1124 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1125 {
1126 phi = psi.phi ();
1127 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1128 break;
1129 }
1130
1131 if (gsi_end_p (psi))
1132 return NULL;
1133
1134 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1135 if (TREE_CODE (init) != SSA_NAME)
1136 return NULL;
1137 init_stmt = SSA_NAME_DEF_STMT (init);
1138 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1139 return NULL;
1140 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1141
1142 init_ref = gimple_assign_rhs1 (init_stmt);
1143 if (!REFERENCE_CLASS_P (init_ref)
1144 && !DECL_P (init_ref))
1145 return NULL;
1146
1147 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1148 loop enclosing PHI). */
1149 memset (&init_dr, 0, sizeof (struct data_reference));
1150 DR_REF (&init_dr) = init_ref;
1151 DR_STMT (&init_dr) = phi;
1152 if (!dr_analyze_innermost (&init_dr, loop))
1153 return NULL;
1154
1155 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1156 return NULL;
1157
1158 return phi;
1159 }
1160
1161 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1162
1163 static void
1164 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1165 {
1166 dref nw = XCNEW (struct dref_d), aref;
1167 unsigned i;
1168
1169 nw->stmt = phi;
1170 nw->distance = ref->distance + 1;
1171 nw->always_accessed = 1;
1172
1173 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1174 if (aref->distance >= nw->distance)
1175 break;
1176 chain->refs.safe_insert (i, nw);
1177
1178 if (nw->distance > chain->length)
1179 {
1180 chain->length = nw->distance;
1181 chain->has_max_use_after = false;
1182 }
1183 }
1184
1185 /* For references in CHAIN that are copied around the LOOP (created previously
1186 by PRE, or by user), add the results of such copies to the chain. This
1187 enables us to remove the copies by unrolling, and may need less registers
1188 (also, it may allow us to combine chains together). */
1189
1190 static void
1191 add_looparound_copies (struct loop *loop, chain_p chain)
1192 {
1193 unsigned i;
1194 dref ref, root = get_chain_root (chain);
1195 gphi *phi;
1196
1197 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1198 {
1199 phi = find_looparound_phi (loop, ref, root);
1200 if (!phi)
1201 continue;
1202
1203 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1204 insert_looparound_copy (chain, ref, phi);
1205 }
1206 }
1207
1208 /* Find roots of the values and determine distances in the component COMP.
1209 The references are redistributed into CHAINS. LOOP is the current
1210 loop. */
1211
1212 static void
1213 determine_roots_comp (struct loop *loop,
1214 struct component *comp,
1215 vec<chain_p> *chains)
1216 {
1217 unsigned i;
1218 dref a;
1219 chain_p chain = NULL;
1220 widest_int last_ofs = 0;
1221
1222 /* Invariants are handled specially. */
1223 if (comp->comp_step == RS_INVARIANT)
1224 {
1225 chain = make_invariant_chain (comp);
1226 chains->safe_push (chain);
1227 return;
1228 }
1229
1230 comp->refs.qsort (order_drefs);
1231
1232 FOR_EACH_VEC_ELT (comp->refs, i, a)
1233 {
1234 if (!chain || DR_IS_WRITE (a->ref)
1235 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1236 {
1237 if (nontrivial_chain_p (chain))
1238 {
1239 add_looparound_copies (loop, chain);
1240 chains->safe_push (chain);
1241 }
1242 else
1243 release_chain (chain);
1244 chain = make_rooted_chain (a);
1245 last_ofs = a->offset;
1246 continue;
1247 }
1248
1249 add_ref_to_chain (chain, a);
1250 }
1251
1252 if (nontrivial_chain_p (chain))
1253 {
1254 add_looparound_copies (loop, chain);
1255 chains->safe_push (chain);
1256 }
1257 else
1258 release_chain (chain);
1259 }
1260
1261 /* Find roots of the values and determine distances in components COMPS, and
1262 separates the references to CHAINS. LOOP is the current loop. */
1263
1264 static void
1265 determine_roots (struct loop *loop,
1266 struct component *comps, vec<chain_p> *chains)
1267 {
1268 struct component *comp;
1269
1270 for (comp = comps; comp; comp = comp->next)
1271 determine_roots_comp (loop, comp, chains);
1272 }
1273
1274 /* Replace the reference in statement STMT with temporary variable
1275 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1276 the reference in the statement. IN_LHS is true if the reference
1277 is in the lhs of STMT, false if it is in rhs. */
1278
1279 static void
1280 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1281 {
1282 tree val;
1283 gassign *new_stmt;
1284 gimple_stmt_iterator bsi, psi;
1285
1286 if (gimple_code (stmt) == GIMPLE_PHI)
1287 {
1288 gcc_assert (!in_lhs && !set);
1289
1290 val = PHI_RESULT (stmt);
1291 bsi = gsi_after_labels (gimple_bb (stmt));
1292 psi = gsi_for_stmt (stmt);
1293 remove_phi_node (&psi, false);
1294
1295 /* Turn the phi node into GIMPLE_ASSIGN. */
1296 new_stmt = gimple_build_assign (val, new_tree);
1297 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1298 return;
1299 }
1300
1301 /* Since the reference is of gimple_reg type, it should only
1302 appear as lhs or rhs of modify statement. */
1303 gcc_assert (is_gimple_assign (stmt));
1304
1305 bsi = gsi_for_stmt (stmt);
1306
1307 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1308 if (!set)
1309 {
1310 gcc_assert (!in_lhs);
1311 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1312 stmt = gsi_stmt (bsi);
1313 update_stmt (stmt);
1314 return;
1315 }
1316
1317 if (in_lhs)
1318 {
1319 /* We have statement
1320
1321 OLD = VAL
1322
1323 If OLD is a memory reference, then VAL is gimple_val, and we transform
1324 this to
1325
1326 OLD = VAL
1327 NEW = VAL
1328
1329 Otherwise, we are replacing a combination chain,
1330 VAL is the expression that performs the combination, and OLD is an
1331 SSA name. In this case, we transform the assignment to
1332
1333 OLD = VAL
1334 NEW = OLD
1335
1336 */
1337
1338 val = gimple_assign_lhs (stmt);
1339 if (TREE_CODE (val) != SSA_NAME)
1340 {
1341 val = gimple_assign_rhs1 (stmt);
1342 gcc_assert (gimple_assign_single_p (stmt));
1343 if (TREE_CLOBBER_P (val))
1344 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1345 else
1346 gcc_assert (gimple_assign_copy_p (stmt));
1347 }
1348 }
1349 else
1350 {
1351 /* VAL = OLD
1352
1353 is transformed to
1354
1355 VAL = OLD
1356 NEW = VAL */
1357
1358 val = gimple_assign_lhs (stmt);
1359 }
1360
1361 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1362 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1363 }
1364
1365 /* Returns a memory reference to DR in the ITER-th iteration of
1366 the loop it was analyzed in. Append init stmts to STMTS. */
1367
1368 static tree
1369 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1370 {
1371 tree off = DR_OFFSET (dr);
1372 tree coff = DR_INIT (dr);
1373 tree ref = DR_REF (dr);
1374 enum tree_code ref_code = ERROR_MARK;
1375 tree ref_type = NULL_TREE;
1376 tree ref_op1 = NULL_TREE;
1377 tree ref_op2 = NULL_TREE;
1378 if (iter == 0)
1379 ;
1380 else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1381 coff = size_binop (PLUS_EXPR, coff,
1382 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1383 else
1384 off = size_binop (PLUS_EXPR, off,
1385 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1386 /* While data-ref analysis punts on bit offsets it still handles
1387 bitfield accesses at byte boundaries. Cope with that. Note that
1388 if the bitfield object also starts at a byte-boundary we can simply
1389 replicate the COMPONENT_REF, but we have to subtract the component's
1390 byte-offset from the MEM_REF address first.
1391 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1392 start at offset zero. */
1393 if (TREE_CODE (ref) == COMPONENT_REF
1394 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1395 {
1396 unsigned HOST_WIDE_INT boff;
1397 tree field = TREE_OPERAND (ref, 1);
1398 tree offset = component_ref_field_offset (ref);
1399 ref_type = TREE_TYPE (ref);
1400 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1401 /* This can occur in Ada. See the comment in get_bit_range. */
1402 if (boff % BITS_PER_UNIT != 0
1403 || !tree_fits_uhwi_p (offset))
1404 {
1405 ref_code = BIT_FIELD_REF;
1406 ref_op1 = DECL_SIZE (field);
1407 ref_op2 = bitsize_zero_node;
1408 }
1409 else
1410 {
1411 boff >>= LOG2_BITS_PER_UNIT;
1412 boff += tree_to_uhwi (offset);
1413 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1414 ref_code = COMPONENT_REF;
1415 ref_op1 = field;
1416 ref_op2 = TREE_OPERAND (ref, 2);
1417 ref = TREE_OPERAND (ref, 0);
1418 }
1419 }
1420 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1421 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1422 is_gimple_mem_ref_addr, NULL_TREE);
1423 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1424 tree type = build_aligned_type (TREE_TYPE (ref),
1425 get_object_alignment (ref));
1426 ref = build2 (MEM_REF, type, addr, alias_ptr);
1427 if (ref_type)
1428 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1429 return ref;
1430 }
1431
1432 /* Get the initialization expression for the INDEX-th temporary variable
1433 of CHAIN. */
1434
1435 static tree
1436 get_init_expr (chain_p chain, unsigned index)
1437 {
1438 if (chain->type == CT_COMBINATION)
1439 {
1440 tree e1 = get_init_expr (chain->ch1, index);
1441 tree e2 = get_init_expr (chain->ch2, index);
1442
1443 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1444 }
1445 else
1446 return chain->inits[index];
1447 }
1448
1449 /* Returns a new temporary variable used for the I-th variable carrying
1450 value of REF. The variable's uid is marked in TMP_VARS. */
1451
1452 static tree
1453 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1454 {
1455 tree type = TREE_TYPE (ref);
1456 /* We never access the components of the temporary variable in predictive
1457 commoning. */
1458 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1459 bitmap_set_bit (tmp_vars, DECL_UID (var));
1460 return var;
1461 }
1462
1463 /* Creates the variables for CHAIN, as well as phi nodes for them and
1464 initialization on entry to LOOP. Uids of the newly created
1465 temporary variables are marked in TMP_VARS. */
1466
1467 static void
1468 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1469 {
1470 unsigned i;
1471 unsigned n = chain->length;
1472 dref root = get_chain_root (chain);
1473 bool reuse_first = !chain->has_max_use_after;
1474 tree ref, init, var, next;
1475 gphi *phi;
1476 gimple_seq stmts;
1477 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1478
1479 /* If N == 0, then all the references are within the single iteration. And
1480 since this is an nonempty chain, reuse_first cannot be true. */
1481 gcc_assert (n > 0 || !reuse_first);
1482
1483 chain->vars.create (n + 1);
1484
1485 if (chain->type == CT_COMBINATION)
1486 ref = gimple_assign_lhs (root->stmt);
1487 else
1488 ref = DR_REF (root->ref);
1489
1490 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1491 {
1492 var = predcom_tmp_var (ref, i, tmp_vars);
1493 chain->vars.quick_push (var);
1494 }
1495 if (reuse_first)
1496 chain->vars.quick_push (chain->vars[0]);
1497
1498 FOR_EACH_VEC_ELT (chain->vars, i, var)
1499 chain->vars[i] = make_ssa_name (var);
1500
1501 for (i = 0; i < n; i++)
1502 {
1503 var = chain->vars[i];
1504 next = chain->vars[i + 1];
1505 init = get_init_expr (chain, i);
1506
1507 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1508 if (stmts)
1509 gsi_insert_seq_on_edge_immediate (entry, stmts);
1510
1511 phi = create_phi_node (var, loop->header);
1512 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1513 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1514 }
1515 }
1516
1517 /* Create the variables and initialization statement for root of chain
1518 CHAIN. Uids of the newly created temporary variables are marked
1519 in TMP_VARS. */
1520
1521 static void
1522 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1523 {
1524 dref root = get_chain_root (chain);
1525 bool in_lhs = (chain->type == CT_STORE_LOAD
1526 || chain->type == CT_COMBINATION);
1527
1528 initialize_root_vars (loop, chain, tmp_vars);
1529 replace_ref_with (root->stmt,
1530 chain->vars[chain->length],
1531 true, in_lhs);
1532 }
1533
1534 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1535 initialization on entry to LOOP if necessary. The ssa name for the variable
1536 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1537 around the loop is created. Uid of the newly created temporary variable
1538 is marked in TMP_VARS. INITS is the list containing the (single)
1539 initializer. */
1540
1541 static void
1542 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1543 vec<tree> *vars, vec<tree> inits,
1544 bitmap tmp_vars)
1545 {
1546 unsigned i;
1547 tree ref = DR_REF (root->ref), init, var, next;
1548 gimple_seq stmts;
1549 gphi *phi;
1550 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1551
1552 /* Find the initializer for the variable, and check that it cannot
1553 trap. */
1554 init = inits[0];
1555
1556 vars->create (written ? 2 : 1);
1557 var = predcom_tmp_var (ref, 0, tmp_vars);
1558 vars->quick_push (var);
1559 if (written)
1560 vars->quick_push ((*vars)[0]);
1561
1562 FOR_EACH_VEC_ELT (*vars, i, var)
1563 (*vars)[i] = make_ssa_name (var);
1564
1565 var = (*vars)[0];
1566
1567 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1568 if (stmts)
1569 gsi_insert_seq_on_edge_immediate (entry, stmts);
1570
1571 if (written)
1572 {
1573 next = (*vars)[1];
1574 phi = create_phi_node (var, loop->header);
1575 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1576 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1577 }
1578 else
1579 {
1580 gassign *init_stmt = gimple_build_assign (var, init);
1581 gsi_insert_on_edge_immediate (entry, init_stmt);
1582 }
1583 }
1584
1585
1586 /* Execute load motion for references in chain CHAIN. Uids of the newly
1587 created temporary variables are marked in TMP_VARS. */
1588
1589 static void
1590 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1591 {
1592 auto_vec<tree> vars;
1593 dref a;
1594 unsigned n_writes = 0, ridx, i;
1595 tree var;
1596
1597 gcc_assert (chain->type == CT_INVARIANT);
1598 gcc_assert (!chain->combined);
1599 FOR_EACH_VEC_ELT (chain->refs, i, a)
1600 if (DR_IS_WRITE (a->ref))
1601 n_writes++;
1602
1603 /* If there are no reads in the loop, there is nothing to do. */
1604 if (n_writes == chain->refs.length ())
1605 return;
1606
1607 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1608 &vars, chain->inits, tmp_vars);
1609
1610 ridx = 0;
1611 FOR_EACH_VEC_ELT (chain->refs, i, a)
1612 {
1613 bool is_read = DR_IS_READ (a->ref);
1614
1615 if (DR_IS_WRITE (a->ref))
1616 {
1617 n_writes--;
1618 if (n_writes)
1619 {
1620 var = vars[0];
1621 var = make_ssa_name (SSA_NAME_VAR (var));
1622 vars[0] = var;
1623 }
1624 else
1625 ridx = 1;
1626 }
1627
1628 replace_ref_with (a->stmt, vars[ridx],
1629 !is_read, !is_read);
1630 }
1631 }
1632
1633 /* Returns the single statement in that NAME is used, excepting
1634 the looparound phi nodes contained in one of the chains. If there is no
1635 such statement, or more statements, NULL is returned. */
1636
1637 static gimple *
1638 single_nonlooparound_use (tree name)
1639 {
1640 use_operand_p use;
1641 imm_use_iterator it;
1642 gimple *stmt, *ret = NULL;
1643
1644 FOR_EACH_IMM_USE_FAST (use, it, name)
1645 {
1646 stmt = USE_STMT (use);
1647
1648 if (gimple_code (stmt) == GIMPLE_PHI)
1649 {
1650 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1651 could not be processed anyway, so just fail for them. */
1652 if (bitmap_bit_p (looparound_phis,
1653 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1654 continue;
1655
1656 return NULL;
1657 }
1658 else if (is_gimple_debug (stmt))
1659 continue;
1660 else if (ret != NULL)
1661 return NULL;
1662 else
1663 ret = stmt;
1664 }
1665
1666 return ret;
1667 }
1668
1669 /* Remove statement STMT, as well as the chain of assignments in that it is
1670 used. */
1671
1672 static void
1673 remove_stmt (gimple *stmt)
1674 {
1675 tree name;
1676 gimple *next;
1677 gimple_stmt_iterator psi;
1678
1679 if (gimple_code (stmt) == GIMPLE_PHI)
1680 {
1681 name = PHI_RESULT (stmt);
1682 next = single_nonlooparound_use (name);
1683 reset_debug_uses (stmt);
1684 psi = gsi_for_stmt (stmt);
1685 remove_phi_node (&psi, true);
1686
1687 if (!next
1688 || !gimple_assign_ssa_name_copy_p (next)
1689 || gimple_assign_rhs1 (next) != name)
1690 return;
1691
1692 stmt = next;
1693 }
1694
1695 while (1)
1696 {
1697 gimple_stmt_iterator bsi;
1698
1699 bsi = gsi_for_stmt (stmt);
1700
1701 name = gimple_assign_lhs (stmt);
1702 gcc_assert (TREE_CODE (name) == SSA_NAME);
1703
1704 next = single_nonlooparound_use (name);
1705 reset_debug_uses (stmt);
1706
1707 unlink_stmt_vdef (stmt);
1708 gsi_remove (&bsi, true);
1709 release_defs (stmt);
1710
1711 if (!next
1712 || !gimple_assign_ssa_name_copy_p (next)
1713 || gimple_assign_rhs1 (next) != name)
1714 return;
1715
1716 stmt = next;
1717 }
1718 }
1719
1720 /* Perform the predictive commoning optimization for a chain CHAIN.
1721 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1722
1723 static void
1724 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1725 bitmap tmp_vars)
1726 {
1727 unsigned i;
1728 dref a;
1729 tree var;
1730
1731 if (chain->combined)
1732 {
1733 /* For combined chains, just remove the statements that are used to
1734 compute the values of the expression (except for the root one).
1735 We delay this until after all chains are processed. */
1736 }
1737 else
1738 {
1739 /* For non-combined chains, set up the variables that hold its value,
1740 and replace the uses of the original references by these
1741 variables. */
1742 initialize_root (loop, chain, tmp_vars);
1743 for (i = 1; chain->refs.iterate (i, &a); i++)
1744 {
1745 var = chain->vars[chain->length - a->distance];
1746 replace_ref_with (a->stmt, var, false, false);
1747 }
1748 }
1749 }
1750
1751 /* Determines the unroll factor necessary to remove as many temporary variable
1752 copies as possible. CHAINS is the list of chains that will be
1753 optimized. */
1754
1755 static unsigned
1756 determine_unroll_factor (vec<chain_p> chains)
1757 {
1758 chain_p chain;
1759 unsigned factor = 1, af, nfactor, i;
1760 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1761
1762 FOR_EACH_VEC_ELT (chains, i, chain)
1763 {
1764 if (chain->type == CT_INVARIANT)
1765 continue;
1766
1767 if (chain->combined)
1768 {
1769 /* For combined chains, we can't handle unrolling if we replace
1770 looparound PHIs. */
1771 dref a;
1772 unsigned j;
1773 for (j = 1; chain->refs.iterate (j, &a); j++)
1774 if (gimple_code (a->stmt) == GIMPLE_PHI)
1775 return 1;
1776 continue;
1777 }
1778
1779 /* The best unroll factor for this chain is equal to the number of
1780 temporary variables that we create for it. */
1781 af = chain->length;
1782 if (chain->has_max_use_after)
1783 af++;
1784
1785 nfactor = factor * af / gcd (factor, af);
1786 if (nfactor <= max)
1787 factor = nfactor;
1788 }
1789
1790 return factor;
1791 }
1792
1793 /* Perform the predictive commoning optimization for CHAINS.
1794 Uids of the newly created temporary variables are marked in TMP_VARS. */
1795
1796 static void
1797 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1798 bitmap tmp_vars)
1799 {
1800 chain_p chain;
1801 unsigned i;
1802
1803 FOR_EACH_VEC_ELT (chains, i, chain)
1804 {
1805 if (chain->type == CT_INVARIANT)
1806 execute_load_motion (loop, chain, tmp_vars);
1807 else
1808 execute_pred_commoning_chain (loop, chain, tmp_vars);
1809 }
1810
1811 FOR_EACH_VEC_ELT (chains, i, chain)
1812 {
1813 if (chain->type == CT_INVARIANT)
1814 ;
1815 else if (chain->combined)
1816 {
1817 /* For combined chains, just remove the statements that are used to
1818 compute the values of the expression (except for the root one). */
1819 dref a;
1820 unsigned j;
1821 for (j = 1; chain->refs.iterate (j, &a); j++)
1822 remove_stmt (a->stmt);
1823 }
1824 }
1825
1826 update_ssa (TODO_update_ssa_only_virtuals);
1827 }
1828
1829 /* For each reference in CHAINS, if its defining statement is
1830 phi node, record the ssa name that is defined by it. */
1831
1832 static void
1833 replace_phis_by_defined_names (vec<chain_p> chains)
1834 {
1835 chain_p chain;
1836 dref a;
1837 unsigned i, j;
1838
1839 FOR_EACH_VEC_ELT (chains, i, chain)
1840 FOR_EACH_VEC_ELT (chain->refs, j, a)
1841 {
1842 if (gimple_code (a->stmt) == GIMPLE_PHI)
1843 {
1844 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1845 a->stmt = NULL;
1846 }
1847 }
1848 }
1849
1850 /* For each reference in CHAINS, if name_defined_by_phi is not
1851 NULL, use it to set the stmt field. */
1852
1853 static void
1854 replace_names_by_phis (vec<chain_p> chains)
1855 {
1856 chain_p chain;
1857 dref a;
1858 unsigned i, j;
1859
1860 FOR_EACH_VEC_ELT (chains, i, chain)
1861 FOR_EACH_VEC_ELT (chain->refs, j, a)
1862 if (a->stmt == NULL)
1863 {
1864 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1865 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1866 a->name_defined_by_phi = NULL_TREE;
1867 }
1868 }
1869
1870 /* Wrapper over execute_pred_commoning, to pass it as a callback
1871 to tree_transform_and_unroll_loop. */
1872
1873 struct epcc_data
1874 {
1875 vec<chain_p> chains;
1876 bitmap tmp_vars;
1877 };
1878
1879 static void
1880 execute_pred_commoning_cbck (struct loop *loop, void *data)
1881 {
1882 struct epcc_data *const dta = (struct epcc_data *) data;
1883
1884 /* Restore phi nodes that were replaced by ssa names before
1885 tree_transform_and_unroll_loop (see detailed description in
1886 tree_predictive_commoning_loop). */
1887 replace_names_by_phis (dta->chains);
1888 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1889 }
1890
1891 /* Base NAME and all the names in the chain of phi nodes that use it
1892 on variable VAR. The phi nodes are recognized by being in the copies of
1893 the header of the LOOP. */
1894
1895 static void
1896 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1897 {
1898 gimple *stmt, *phi;
1899 imm_use_iterator iter;
1900
1901 replace_ssa_name_symbol (name, var);
1902
1903 while (1)
1904 {
1905 phi = NULL;
1906 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1907 {
1908 if (gimple_code (stmt) == GIMPLE_PHI
1909 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1910 {
1911 phi = stmt;
1912 BREAK_FROM_IMM_USE_STMT (iter);
1913 }
1914 }
1915 if (!phi)
1916 return;
1917
1918 name = PHI_RESULT (phi);
1919 replace_ssa_name_symbol (name, var);
1920 }
1921 }
1922
1923 /* Given an unrolled LOOP after predictive commoning, remove the
1924 register copies arising from phi nodes by changing the base
1925 variables of SSA names. TMP_VARS is the set of the temporary variables
1926 for those we want to perform this. */
1927
1928 static void
1929 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1930 {
1931 edge e;
1932 gphi *phi;
1933 gimple *stmt;
1934 tree name, use, var;
1935 gphi_iterator psi;
1936
1937 e = loop_latch_edge (loop);
1938 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1939 {
1940 phi = psi.phi ();
1941 name = PHI_RESULT (phi);
1942 var = SSA_NAME_VAR (name);
1943 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1944 continue;
1945 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1946 gcc_assert (TREE_CODE (use) == SSA_NAME);
1947
1948 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1949 stmt = SSA_NAME_DEF_STMT (use);
1950 while (gimple_code (stmt) == GIMPLE_PHI
1951 /* In case we could not unroll the loop enough to eliminate
1952 all copies, we may reach the loop header before the defining
1953 statement (in that case, some register copies will be present
1954 in loop latch in the final code, corresponding to the newly
1955 created looparound phi nodes). */
1956 && gimple_bb (stmt) != loop->header)
1957 {
1958 gcc_assert (single_pred_p (gimple_bb (stmt)));
1959 use = PHI_ARG_DEF (stmt, 0);
1960 stmt = SSA_NAME_DEF_STMT (use);
1961 }
1962
1963 base_names_in_chain_on (loop, use, var);
1964 }
1965 }
1966
1967 /* Returns true if CHAIN is suitable to be combined. */
1968
1969 static bool
1970 chain_can_be_combined_p (chain_p chain)
1971 {
1972 return (!chain->combined
1973 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1974 }
1975
1976 /* Returns the modify statement that uses NAME. Skips over assignment
1977 statements, NAME is replaced with the actual name used in the returned
1978 statement. */
1979
1980 static gimple *
1981 find_use_stmt (tree *name)
1982 {
1983 gimple *stmt;
1984 tree rhs, lhs;
1985
1986 /* Skip over assignments. */
1987 while (1)
1988 {
1989 stmt = single_nonlooparound_use (*name);
1990 if (!stmt)
1991 return NULL;
1992
1993 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1994 return NULL;
1995
1996 lhs = gimple_assign_lhs (stmt);
1997 if (TREE_CODE (lhs) != SSA_NAME)
1998 return NULL;
1999
2000 if (gimple_assign_copy_p (stmt))
2001 {
2002 rhs = gimple_assign_rhs1 (stmt);
2003 if (rhs != *name)
2004 return NULL;
2005
2006 *name = lhs;
2007 }
2008 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2009 == GIMPLE_BINARY_RHS)
2010 return stmt;
2011 else
2012 return NULL;
2013 }
2014 }
2015
2016 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2017
2018 static bool
2019 may_reassociate_p (tree type, enum tree_code code)
2020 {
2021 if (FLOAT_TYPE_P (type)
2022 && !flag_unsafe_math_optimizations)
2023 return false;
2024
2025 return (commutative_tree_code (code)
2026 && associative_tree_code (code));
2027 }
2028
2029 /* If the operation used in STMT is associative and commutative, go through the
2030 tree of the same operations and returns its root. Distance to the root
2031 is stored in DISTANCE. */
2032
2033 static gimple *
2034 find_associative_operation_root (gimple *stmt, unsigned *distance)
2035 {
2036 tree lhs;
2037 gimple *next;
2038 enum tree_code code = gimple_assign_rhs_code (stmt);
2039 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2040 unsigned dist = 0;
2041
2042 if (!may_reassociate_p (type, code))
2043 return NULL;
2044
2045 while (1)
2046 {
2047 lhs = gimple_assign_lhs (stmt);
2048 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2049
2050 next = find_use_stmt (&lhs);
2051 if (!next
2052 || gimple_assign_rhs_code (next) != code)
2053 break;
2054
2055 stmt = next;
2056 dist++;
2057 }
2058
2059 if (distance)
2060 *distance = dist;
2061 return stmt;
2062 }
2063
2064 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2065 is no such statement, returns NULL_TREE. In case the operation used on
2066 NAME1 and NAME2 is associative and commutative, returns the root of the
2067 tree formed by this operation instead of the statement that uses NAME1 or
2068 NAME2. */
2069
2070 static gimple *
2071 find_common_use_stmt (tree *name1, tree *name2)
2072 {
2073 gimple *stmt1, *stmt2;
2074
2075 stmt1 = find_use_stmt (name1);
2076 if (!stmt1)
2077 return NULL;
2078
2079 stmt2 = find_use_stmt (name2);
2080 if (!stmt2)
2081 return NULL;
2082
2083 if (stmt1 == stmt2)
2084 return stmt1;
2085
2086 stmt1 = find_associative_operation_root (stmt1, NULL);
2087 if (!stmt1)
2088 return NULL;
2089 stmt2 = find_associative_operation_root (stmt2, NULL);
2090 if (!stmt2)
2091 return NULL;
2092
2093 return (stmt1 == stmt2 ? stmt1 : NULL);
2094 }
2095
2096 /* Checks whether R1 and R2 are combined together using CODE, with the result
2097 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2098 if it is true. If CODE is ERROR_MARK, set these values instead. */
2099
2100 static bool
2101 combinable_refs_p (dref r1, dref r2,
2102 enum tree_code *code, bool *swap, tree *rslt_type)
2103 {
2104 enum tree_code acode;
2105 bool aswap;
2106 tree atype;
2107 tree name1, name2;
2108 gimple *stmt;
2109
2110 name1 = name_for_ref (r1);
2111 name2 = name_for_ref (r2);
2112 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2113
2114 stmt = find_common_use_stmt (&name1, &name2);
2115
2116 if (!stmt
2117 /* A simple post-dominance check - make sure the combination
2118 is executed under the same condition as the references. */
2119 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2120 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2121 return false;
2122
2123 acode = gimple_assign_rhs_code (stmt);
2124 aswap = (!commutative_tree_code (acode)
2125 && gimple_assign_rhs1 (stmt) != name1);
2126 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2127
2128 if (*code == ERROR_MARK)
2129 {
2130 *code = acode;
2131 *swap = aswap;
2132 *rslt_type = atype;
2133 return true;
2134 }
2135
2136 return (*code == acode
2137 && *swap == aswap
2138 && *rslt_type == atype);
2139 }
2140
2141 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2142 an assignment of the remaining operand. */
2143
2144 static void
2145 remove_name_from_operation (gimple *stmt, tree op)
2146 {
2147 tree other_op;
2148 gimple_stmt_iterator si;
2149
2150 gcc_assert (is_gimple_assign (stmt));
2151
2152 if (gimple_assign_rhs1 (stmt) == op)
2153 other_op = gimple_assign_rhs2 (stmt);
2154 else
2155 other_op = gimple_assign_rhs1 (stmt);
2156
2157 si = gsi_for_stmt (stmt);
2158 gimple_assign_set_rhs_from_tree (&si, other_op);
2159
2160 /* We should not have reallocated STMT. */
2161 gcc_assert (gsi_stmt (si) == stmt);
2162
2163 update_stmt (stmt);
2164 }
2165
2166 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2167 are combined in a single statement, and returns this statement. */
2168
2169 static gimple *
2170 reassociate_to_the_same_stmt (tree name1, tree name2)
2171 {
2172 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2173 gassign *new_stmt, *tmp_stmt;
2174 tree new_name, tmp_name, var, r1, r2;
2175 unsigned dist1, dist2;
2176 enum tree_code code;
2177 tree type = TREE_TYPE (name1);
2178 gimple_stmt_iterator bsi;
2179
2180 stmt1 = find_use_stmt (&name1);
2181 stmt2 = find_use_stmt (&name2);
2182 root1 = find_associative_operation_root (stmt1, &dist1);
2183 root2 = find_associative_operation_root (stmt2, &dist2);
2184 code = gimple_assign_rhs_code (stmt1);
2185
2186 gcc_assert (root1 && root2 && root1 == root2
2187 && code == gimple_assign_rhs_code (stmt2));
2188
2189 /* Find the root of the nearest expression in that both NAME1 and NAME2
2190 are used. */
2191 r1 = name1;
2192 s1 = stmt1;
2193 r2 = name2;
2194 s2 = stmt2;
2195
2196 while (dist1 > dist2)
2197 {
2198 s1 = find_use_stmt (&r1);
2199 r1 = gimple_assign_lhs (s1);
2200 dist1--;
2201 }
2202 while (dist2 > dist1)
2203 {
2204 s2 = find_use_stmt (&r2);
2205 r2 = gimple_assign_lhs (s2);
2206 dist2--;
2207 }
2208
2209 while (s1 != s2)
2210 {
2211 s1 = find_use_stmt (&r1);
2212 r1 = gimple_assign_lhs (s1);
2213 s2 = find_use_stmt (&r2);
2214 r2 = gimple_assign_lhs (s2);
2215 }
2216
2217 /* Remove NAME1 and NAME2 from the statements in that they are used
2218 currently. */
2219 remove_name_from_operation (stmt1, name1);
2220 remove_name_from_operation (stmt2, name2);
2221
2222 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2223 combine it with the rhs of S1. */
2224 var = create_tmp_reg (type, "predreastmp");
2225 new_name = make_ssa_name (var);
2226 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2227
2228 var = create_tmp_reg (type, "predreastmp");
2229 tmp_name = make_ssa_name (var);
2230
2231 /* Rhs of S1 may now be either a binary expression with operation
2232 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2233 so that name1 or name2 was removed from it). */
2234 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2235 gimple_assign_rhs1 (s1),
2236 gimple_assign_rhs2 (s1));
2237
2238 bsi = gsi_for_stmt (s1);
2239 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2240 s1 = gsi_stmt (bsi);
2241 update_stmt (s1);
2242
2243 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2244 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2245
2246 return new_stmt;
2247 }
2248
2249 /* Returns the statement that combines references R1 and R2. In case R1
2250 and R2 are not used in the same statement, but they are used with an
2251 associative and commutative operation in the same expression, reassociate
2252 the expression so that they are used in the same statement. */
2253
2254 static gimple *
2255 stmt_combining_refs (dref r1, dref r2)
2256 {
2257 gimple *stmt1, *stmt2;
2258 tree name1 = name_for_ref (r1);
2259 tree name2 = name_for_ref (r2);
2260
2261 stmt1 = find_use_stmt (&name1);
2262 stmt2 = find_use_stmt (&name2);
2263 if (stmt1 == stmt2)
2264 return stmt1;
2265
2266 return reassociate_to_the_same_stmt (name1, name2);
2267 }
2268
2269 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2270 description of the new chain is returned, otherwise we return NULL. */
2271
2272 static chain_p
2273 combine_chains (chain_p ch1, chain_p ch2)
2274 {
2275 dref r1, r2, nw;
2276 enum tree_code op = ERROR_MARK;
2277 bool swap = false;
2278 chain_p new_chain;
2279 unsigned i;
2280 gimple *root_stmt;
2281 tree rslt_type = NULL_TREE;
2282
2283 if (ch1 == ch2)
2284 return NULL;
2285 if (ch1->length != ch2->length)
2286 return NULL;
2287
2288 if (ch1->refs.length () != ch2->refs.length ())
2289 return NULL;
2290
2291 for (i = 0; (ch1->refs.iterate (i, &r1)
2292 && ch2->refs.iterate (i, &r2)); i++)
2293 {
2294 if (r1->distance != r2->distance)
2295 return NULL;
2296
2297 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2298 return NULL;
2299 }
2300
2301 if (swap)
2302 std::swap (ch1, ch2);
2303
2304 new_chain = XCNEW (struct chain);
2305 new_chain->type = CT_COMBINATION;
2306 new_chain->op = op;
2307 new_chain->ch1 = ch1;
2308 new_chain->ch2 = ch2;
2309 new_chain->rslt_type = rslt_type;
2310 new_chain->length = ch1->length;
2311
2312 for (i = 0; (ch1->refs.iterate (i, &r1)
2313 && ch2->refs.iterate (i, &r2)); i++)
2314 {
2315 nw = XCNEW (struct dref_d);
2316 nw->stmt = stmt_combining_refs (r1, r2);
2317 nw->distance = r1->distance;
2318
2319 new_chain->refs.safe_push (nw);
2320 }
2321
2322 new_chain->has_max_use_after = false;
2323 root_stmt = get_chain_root (new_chain)->stmt;
2324 for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2325 {
2326 if (nw->distance == new_chain->length
2327 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2328 {
2329 new_chain->has_max_use_after = true;
2330 break;
2331 }
2332 }
2333
2334 ch1->combined = true;
2335 ch2->combined = true;
2336 return new_chain;
2337 }
2338
2339 /* Try to combine the CHAINS. */
2340
2341 static void
2342 try_combine_chains (vec<chain_p> *chains)
2343 {
2344 unsigned i, j;
2345 chain_p ch1, ch2, cch;
2346 auto_vec<chain_p> worklist;
2347
2348 FOR_EACH_VEC_ELT (*chains, i, ch1)
2349 if (chain_can_be_combined_p (ch1))
2350 worklist.safe_push (ch1);
2351
2352 while (!worklist.is_empty ())
2353 {
2354 ch1 = worklist.pop ();
2355 if (!chain_can_be_combined_p (ch1))
2356 continue;
2357
2358 FOR_EACH_VEC_ELT (*chains, j, ch2)
2359 {
2360 if (!chain_can_be_combined_p (ch2))
2361 continue;
2362
2363 cch = combine_chains (ch1, ch2);
2364 if (cch)
2365 {
2366 worklist.safe_push (cch);
2367 chains->safe_push (cch);
2368 break;
2369 }
2370 }
2371 }
2372 }
2373
2374 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2375 impossible because one of these initializers may trap, true otherwise. */
2376
2377 static bool
2378 prepare_initializers_chain (struct loop *loop, chain_p chain)
2379 {
2380 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2381 struct data_reference *dr = get_chain_root (chain)->ref;
2382 tree init;
2383 dref laref;
2384 edge entry = loop_preheader_edge (loop);
2385
2386 /* Find the initializers for the variables, and check that they cannot
2387 trap. */
2388 chain->inits.create (n);
2389 for (i = 0; i < n; i++)
2390 chain->inits.quick_push (NULL_TREE);
2391
2392 /* If we have replaced some looparound phi nodes, use their initializers
2393 instead of creating our own. */
2394 FOR_EACH_VEC_ELT (chain->refs, i, laref)
2395 {
2396 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2397 continue;
2398
2399 gcc_assert (laref->distance > 0);
2400 chain->inits[n - laref->distance]
2401 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2402 }
2403
2404 for (i = 0; i < n; i++)
2405 {
2406 gimple_seq stmts = NULL;
2407
2408 if (chain->inits[i] != NULL_TREE)
2409 continue;
2410
2411 init = ref_at_iteration (dr, (int) i - n, &stmts);
2412 if (!chain->all_always_accessed && tree_could_trap_p (init))
2413 {
2414 gimple_seq_discard (stmts);
2415 return false;
2416 }
2417
2418 if (stmts)
2419 gsi_insert_seq_on_edge_immediate (entry, stmts);
2420
2421 chain->inits[i] = init;
2422 }
2423
2424 return true;
2425 }
2426
2427 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2428 be used because the initializers might trap. */
2429
2430 static void
2431 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2432 {
2433 chain_p chain;
2434 unsigned i;
2435
2436 for (i = 0; i < chains.length (); )
2437 {
2438 chain = chains[i];
2439 if (prepare_initializers_chain (loop, chain))
2440 i++;
2441 else
2442 {
2443 release_chain (chain);
2444 chains.unordered_remove (i);
2445 }
2446 }
2447 }
2448
2449 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2450 unrolled. */
2451
2452 static bool
2453 tree_predictive_commoning_loop (struct loop *loop)
2454 {
2455 vec<data_reference_p> datarefs;
2456 vec<ddr_p> dependences;
2457 struct component *components;
2458 vec<chain_p> chains = vNULL;
2459 unsigned unroll_factor;
2460 struct tree_niter_desc desc;
2461 bool unroll = false;
2462 edge exit;
2463 bitmap tmp_vars;
2464
2465 if (dump_file && (dump_flags & TDF_DETAILS))
2466 fprintf (dump_file, "Processing loop %d\n", loop->num);
2467
2468 /* Nothing for predicitive commoning if loop only iterates 1 time. */
2469 if (get_max_loop_iterations_int (loop) == 0)
2470 {
2471 if (dump_file && (dump_flags & TDF_DETAILS))
2472 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
2473
2474 return false;
2475 }
2476
2477 /* Find the data references and split them into components according to their
2478 dependence relations. */
2479 auto_vec<loop_p, 3> loop_nest;
2480 dependences.create (10);
2481 datarefs.create (10);
2482 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2483 &dependences))
2484 {
2485 if (dump_file && (dump_flags & TDF_DETAILS))
2486 fprintf (dump_file, "Cannot analyze data dependencies\n");
2487 free_data_refs (datarefs);
2488 free_dependence_relations (dependences);
2489 return false;
2490 }
2491
2492 if (dump_file && (dump_flags & TDF_DETAILS))
2493 dump_data_dependence_relations (dump_file, dependences);
2494
2495 components = split_data_refs_to_components (loop, datarefs, dependences);
2496 loop_nest.release ();
2497 free_dependence_relations (dependences);
2498 if (!components)
2499 {
2500 free_data_refs (datarefs);
2501 free_affine_expand_cache (&name_expansions);
2502 return false;
2503 }
2504
2505 if (dump_file && (dump_flags & TDF_DETAILS))
2506 {
2507 fprintf (dump_file, "Initial state:\n\n");
2508 dump_components (dump_file, components);
2509 }
2510
2511 /* Find the suitable components and split them into chains. */
2512 components = filter_suitable_components (loop, components);
2513
2514 tmp_vars = BITMAP_ALLOC (NULL);
2515 looparound_phis = BITMAP_ALLOC (NULL);
2516 determine_roots (loop, components, &chains);
2517 release_components (components);
2518
2519 if (!chains.exists ())
2520 {
2521 if (dump_file && (dump_flags & TDF_DETAILS))
2522 fprintf (dump_file,
2523 "Predictive commoning failed: no suitable chains\n");
2524 goto end;
2525 }
2526 prepare_initializers (loop, chains);
2527
2528 /* Try to combine the chains that are always worked with together. */
2529 try_combine_chains (&chains);
2530
2531 if (dump_file && (dump_flags & TDF_DETAILS))
2532 {
2533 fprintf (dump_file, "Before commoning:\n\n");
2534 dump_chains (dump_file, chains);
2535 }
2536
2537 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2538 that its number of iterations is divisible by the factor. */
2539 unroll_factor = determine_unroll_factor (chains);
2540 scev_reset ();
2541 unroll = (unroll_factor > 1
2542 && can_unroll_loop_p (loop, unroll_factor, &desc));
2543 exit = single_dom_exit (loop);
2544
2545 /* Execute the predictive commoning transformations, and possibly unroll the
2546 loop. */
2547 if (unroll)
2548 {
2549 struct epcc_data dta;
2550
2551 if (dump_file && (dump_flags & TDF_DETAILS))
2552 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2553
2554 dta.chains = chains;
2555 dta.tmp_vars = tmp_vars;
2556
2557 update_ssa (TODO_update_ssa_only_virtuals);
2558
2559 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2560 execute_pred_commoning_cbck is called may cause phi nodes to be
2561 reallocated, which is a problem since CHAINS may point to these
2562 statements. To fix this, we store the ssa names defined by the
2563 phi nodes here instead of the phi nodes themselves, and restore
2564 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2565 replace_phis_by_defined_names (chains);
2566
2567 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2568 execute_pred_commoning_cbck, &dta);
2569 eliminate_temp_copies (loop, tmp_vars);
2570 }
2571 else
2572 {
2573 if (dump_file && (dump_flags & TDF_DETAILS))
2574 fprintf (dump_file,
2575 "Executing predictive commoning without unrolling.\n");
2576 execute_pred_commoning (loop, chains, tmp_vars);
2577 }
2578
2579 end: ;
2580 release_chains (chains);
2581 free_data_refs (datarefs);
2582 BITMAP_FREE (tmp_vars);
2583 BITMAP_FREE (looparound_phis);
2584
2585 free_affine_expand_cache (&name_expansions);
2586
2587 return unroll;
2588 }
2589
2590 /* Runs predictive commoning. */
2591
2592 unsigned
2593 tree_predictive_commoning (void)
2594 {
2595 bool unrolled = false;
2596 struct loop *loop;
2597 unsigned ret = 0;
2598
2599 initialize_original_copy_tables ();
2600 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2601 if (optimize_loop_for_speed_p (loop))
2602 {
2603 unrolled |= tree_predictive_commoning_loop (loop);
2604 }
2605
2606 if (unrolled)
2607 {
2608 scev_reset ();
2609 ret = TODO_cleanup_cfg;
2610 }
2611 free_original_copy_tables ();
2612
2613 return ret;
2614 }
2615
2616 /* Predictive commoning Pass. */
2617
2618 static unsigned
2619 run_tree_predictive_commoning (struct function *fun)
2620 {
2621 if (number_of_loops (fun) <= 1)
2622 return 0;
2623
2624 return tree_predictive_commoning ();
2625 }
2626
2627 namespace {
2628
2629 const pass_data pass_data_predcom =
2630 {
2631 GIMPLE_PASS, /* type */
2632 "pcom", /* name */
2633 OPTGROUP_LOOP, /* optinfo_flags */
2634 TV_PREDCOM, /* tv_id */
2635 PROP_cfg, /* properties_required */
2636 0, /* properties_provided */
2637 0, /* properties_destroyed */
2638 0, /* todo_flags_start */
2639 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2640 };
2641
2642 class pass_predcom : public gimple_opt_pass
2643 {
2644 public:
2645 pass_predcom (gcc::context *ctxt)
2646 : gimple_opt_pass (pass_data_predcom, ctxt)
2647 {}
2648
2649 /* opt_pass methods: */
2650 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
2651 virtual unsigned int execute (function *fun)
2652 {
2653 return run_tree_predictive_commoning (fun);
2654 }
2655
2656 }; // class pass_predcom
2657
2658 } // anon namespace
2659
2660 gimple_opt_pass *
2661 make_pass_predcom (gcc::context *ctxt)
2662 {
2663 return new pass_predcom (ctxt);
2664 }
2665
2666