]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-predcom.c
alias.c: Reorder #include statements and remove duplicates.
[thirdparty/gcc.git] / gcc / tree-predcom.c
1 /* Predictive commoning.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
161 {
162 a[i] = 1;
163 a[i+2] = 2;
164 }
165
166 can be replaced with
167
168 t0 = a[0];
169 t1 = a[1];
170 for (i = 0; i < n; i++)
171 {
172 a[i] = 1;
173 t2 = 2;
174 t0 = t1;
175 t1 = t2;
176 }
177 a[n] = t0;
178 a[n+1] = t1;
179
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
182
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
186
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "backend.h"
191 #include "rtl.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "predict.h"
195 #include "tree-pass.h"
196 #include "tm_p.h"
197 #include "ssa.h"
198 #include "expmed.h"
199 #include "insn-config.h"
200 #include "emit-rtl.h"
201 #include "gimple-pretty-print.h"
202 #include "alias.h"
203 #include "fold-const.h"
204 #include "cfgloop.h"
205 #include "internal-fn.h"
206 #include "tree-eh.h"
207 #include "gimplify.h"
208 #include "gimple-iterator.h"
209 #include "gimplify-me.h"
210 #include "tree-ssa-loop-ivopts.h"
211 #include "tree-ssa-loop-manip.h"
212 #include "tree-ssa-loop-niter.h"
213 #include "tree-ssa-loop.h"
214 #include "tree-into-ssa.h"
215 #include "flags.h"
216 #include "dojump.h"
217 #include "explow.h"
218 #include "calls.h"
219 #include "varasm.h"
220 #include "stmt.h"
221 #include "expr.h"
222 #include "tree-dfa.h"
223 #include "tree-ssa.h"
224 #include "tree-data-ref.h"
225 #include "tree-scalar-evolution.h"
226 #include "params.h"
227 #include "tree-affine.h"
228 #include "tree-inline.h"
229
230 /* The maximum number of iterations between the considered memory
231 references. */
232
233 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
234
235 /* Data references (or phi nodes that carry data reference values across
236 loop iterations). */
237
238 typedef struct dref_d
239 {
240 /* The reference itself. */
241 struct data_reference *ref;
242
243 /* The statement in that the reference appears. */
244 gimple *stmt;
245
246 /* In case that STMT is a phi node, this field is set to the SSA name
247 defined by it in replace_phis_by_defined_names (in order to avoid
248 pointing to phi node that got reallocated in the meantime). */
249 tree name_defined_by_phi;
250
251 /* Distance of the reference from the root of the chain (in number of
252 iterations of the loop). */
253 unsigned distance;
254
255 /* Number of iterations offset from the first reference in the component. */
256 widest_int offset;
257
258 /* Number of the reference in a component, in dominance ordering. */
259 unsigned pos;
260
261 /* True if the memory reference is always accessed when the loop is
262 entered. */
263 unsigned always_accessed : 1;
264 } *dref;
265
266
267 /* Type of the chain of the references. */
268
269 enum chain_type
270 {
271 /* The addresses of the references in the chain are constant. */
272 CT_INVARIANT,
273
274 /* There are only loads in the chain. */
275 CT_LOAD,
276
277 /* Root of the chain is store, the rest are loads. */
278 CT_STORE_LOAD,
279
280 /* A combination of two chains. */
281 CT_COMBINATION
282 };
283
284 /* Chains of data references. */
285
286 typedef struct chain
287 {
288 /* Type of the chain. */
289 enum chain_type type;
290
291 /* For combination chains, the operator and the two chains that are
292 combined, and the type of the result. */
293 enum tree_code op;
294 tree rslt_type;
295 struct chain *ch1, *ch2;
296
297 /* The references in the chain. */
298 vec<dref> refs;
299
300 /* The maximum distance of the reference in the chain from the root. */
301 unsigned length;
302
303 /* The variables used to copy the value throughout iterations. */
304 vec<tree> vars;
305
306 /* Initializers for the variables. */
307 vec<tree> inits;
308
309 /* True if there is a use of a variable with the maximal distance
310 that comes after the root in the loop. */
311 unsigned has_max_use_after : 1;
312
313 /* True if all the memory references in the chain are always accessed. */
314 unsigned all_always_accessed : 1;
315
316 /* True if this chain was combined together with some other chain. */
317 unsigned combined : 1;
318 } *chain_p;
319
320
321 /* Describes the knowledge about the step of the memory references in
322 the component. */
323
324 enum ref_step_type
325 {
326 /* The step is zero. */
327 RS_INVARIANT,
328
329 /* The step is nonzero. */
330 RS_NONZERO,
331
332 /* The step may or may not be nonzero. */
333 RS_ANY
334 };
335
336 /* Components of the data dependence graph. */
337
338 struct component
339 {
340 /* The references in the component. */
341 vec<dref> refs;
342
343 /* What we know about the step of the references in the component. */
344 enum ref_step_type comp_step;
345
346 /* Next component in the list. */
347 struct component *next;
348 };
349
350 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
351
352 static bitmap looparound_phis;
353
354 /* Cache used by tree_to_aff_combination_expand. */
355
356 static hash_map<tree, name_expansion *> *name_expansions;
357
358 /* Dumps data reference REF to FILE. */
359
360 extern void dump_dref (FILE *, dref);
361 void
362 dump_dref (FILE *file, dref ref)
363 {
364 if (ref->ref)
365 {
366 fprintf (file, " ");
367 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
368 fprintf (file, " (id %u%s)\n", ref->pos,
369 DR_IS_READ (ref->ref) ? "" : ", write");
370
371 fprintf (file, " offset ");
372 print_decs (ref->offset, file);
373 fprintf (file, "\n");
374
375 fprintf (file, " distance %u\n", ref->distance);
376 }
377 else
378 {
379 if (gimple_code (ref->stmt) == GIMPLE_PHI)
380 fprintf (file, " looparound ref\n");
381 else
382 fprintf (file, " combination ref\n");
383 fprintf (file, " in statement ");
384 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
385 fprintf (file, "\n");
386 fprintf (file, " distance %u\n", ref->distance);
387 }
388
389 }
390
391 /* Dumps CHAIN to FILE. */
392
393 extern void dump_chain (FILE *, chain_p);
394 void
395 dump_chain (FILE *file, chain_p chain)
396 {
397 dref a;
398 const char *chain_type;
399 unsigned i;
400 tree var;
401
402 switch (chain->type)
403 {
404 case CT_INVARIANT:
405 chain_type = "Load motion";
406 break;
407
408 case CT_LOAD:
409 chain_type = "Loads-only";
410 break;
411
412 case CT_STORE_LOAD:
413 chain_type = "Store-loads";
414 break;
415
416 case CT_COMBINATION:
417 chain_type = "Combination";
418 break;
419
420 default:
421 gcc_unreachable ();
422 }
423
424 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
425 chain->combined ? " (combined)" : "");
426 if (chain->type != CT_INVARIANT)
427 fprintf (file, " max distance %u%s\n", chain->length,
428 chain->has_max_use_after ? "" : ", may reuse first");
429
430 if (chain->type == CT_COMBINATION)
431 {
432 fprintf (file, " equal to %p %s %p in type ",
433 (void *) chain->ch1, op_symbol_code (chain->op),
434 (void *) chain->ch2);
435 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
436 fprintf (file, "\n");
437 }
438
439 if (chain->vars.exists ())
440 {
441 fprintf (file, " vars");
442 FOR_EACH_VEC_ELT (chain->vars, i, var)
443 {
444 fprintf (file, " ");
445 print_generic_expr (file, var, TDF_SLIM);
446 }
447 fprintf (file, "\n");
448 }
449
450 if (chain->inits.exists ())
451 {
452 fprintf (file, " inits");
453 FOR_EACH_VEC_ELT (chain->inits, i, var)
454 {
455 fprintf (file, " ");
456 print_generic_expr (file, var, TDF_SLIM);
457 }
458 fprintf (file, "\n");
459 }
460
461 fprintf (file, " references:\n");
462 FOR_EACH_VEC_ELT (chain->refs, i, a)
463 dump_dref (file, a);
464
465 fprintf (file, "\n");
466 }
467
468 /* Dumps CHAINS to FILE. */
469
470 extern void dump_chains (FILE *, vec<chain_p> );
471 void
472 dump_chains (FILE *file, vec<chain_p> chains)
473 {
474 chain_p chain;
475 unsigned i;
476
477 FOR_EACH_VEC_ELT (chains, i, chain)
478 dump_chain (file, chain);
479 }
480
481 /* Dumps COMP to FILE. */
482
483 extern void dump_component (FILE *, struct component *);
484 void
485 dump_component (FILE *file, struct component *comp)
486 {
487 dref a;
488 unsigned i;
489
490 fprintf (file, "Component%s:\n",
491 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
492 FOR_EACH_VEC_ELT (comp->refs, i, a)
493 dump_dref (file, a);
494 fprintf (file, "\n");
495 }
496
497 /* Dumps COMPS to FILE. */
498
499 extern void dump_components (FILE *, struct component *);
500 void
501 dump_components (FILE *file, struct component *comps)
502 {
503 struct component *comp;
504
505 for (comp = comps; comp; comp = comp->next)
506 dump_component (file, comp);
507 }
508
509 /* Frees a chain CHAIN. */
510
511 static void
512 release_chain (chain_p chain)
513 {
514 dref ref;
515 unsigned i;
516
517 if (chain == NULL)
518 return;
519
520 FOR_EACH_VEC_ELT (chain->refs, i, ref)
521 free (ref);
522
523 chain->refs.release ();
524 chain->vars.release ();
525 chain->inits.release ();
526
527 free (chain);
528 }
529
530 /* Frees CHAINS. */
531
532 static void
533 release_chains (vec<chain_p> chains)
534 {
535 unsigned i;
536 chain_p chain;
537
538 FOR_EACH_VEC_ELT (chains, i, chain)
539 release_chain (chain);
540 chains.release ();
541 }
542
543 /* Frees a component COMP. */
544
545 static void
546 release_component (struct component *comp)
547 {
548 comp->refs.release ();
549 free (comp);
550 }
551
552 /* Frees list of components COMPS. */
553
554 static void
555 release_components (struct component *comps)
556 {
557 struct component *act, *next;
558
559 for (act = comps; act; act = next)
560 {
561 next = act->next;
562 release_component (act);
563 }
564 }
565
566 /* Finds a root of tree given by FATHERS containing A, and performs path
567 shortening. */
568
569 static unsigned
570 component_of (unsigned fathers[], unsigned a)
571 {
572 unsigned root, n;
573
574 for (root = a; root != fathers[root]; root = fathers[root])
575 continue;
576
577 for (; a != root; a = n)
578 {
579 n = fathers[a];
580 fathers[a] = root;
581 }
582
583 return root;
584 }
585
586 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
587 components, A and B are components to merge. */
588
589 static void
590 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
591 {
592 unsigned ca = component_of (fathers, a);
593 unsigned cb = component_of (fathers, b);
594
595 if (ca == cb)
596 return;
597
598 if (sizes[ca] < sizes[cb])
599 {
600 sizes[cb] += sizes[ca];
601 fathers[ca] = cb;
602 }
603 else
604 {
605 sizes[ca] += sizes[cb];
606 fathers[cb] = ca;
607 }
608 }
609
610 /* Returns true if A is a reference that is suitable for predictive commoning
611 in the innermost loop that contains it. REF_STEP is set according to the
612 step of the reference A. */
613
614 static bool
615 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
616 {
617 tree ref = DR_REF (a), step = DR_STEP (a);
618
619 if (!step
620 || TREE_THIS_VOLATILE (ref)
621 || !is_gimple_reg_type (TREE_TYPE (ref))
622 || tree_could_throw_p (ref))
623 return false;
624
625 if (integer_zerop (step))
626 *ref_step = RS_INVARIANT;
627 else if (integer_nonzerop (step))
628 *ref_step = RS_NONZERO;
629 else
630 *ref_step = RS_ANY;
631
632 return true;
633 }
634
635 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
636
637 static void
638 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
639 {
640 tree type = TREE_TYPE (DR_OFFSET (dr));
641 aff_tree delta;
642
643 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
644 &name_expansions);
645 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
646 aff_combination_add (offset, &delta);
647 }
648
649 /* Determines number of iterations of the innermost enclosing loop before B
650 refers to exactly the same location as A and stores it to OFF. If A and
651 B do not have the same step, they never meet, or anything else fails,
652 returns false, otherwise returns true. Both A and B are assumed to
653 satisfy suitable_reference_p. */
654
655 static bool
656 determine_offset (struct data_reference *a, struct data_reference *b,
657 widest_int *off)
658 {
659 aff_tree diff, baseb, step;
660 tree typea, typeb;
661
662 /* Check that both the references access the location in the same type. */
663 typea = TREE_TYPE (DR_REF (a));
664 typeb = TREE_TYPE (DR_REF (b));
665 if (!useless_type_conversion_p (typeb, typea))
666 return false;
667
668 /* Check whether the base address and the step of both references is the
669 same. */
670 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
671 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
672 return false;
673
674 if (integer_zerop (DR_STEP (a)))
675 {
676 /* If the references have loop invariant address, check that they access
677 exactly the same location. */
678 *off = 0;
679 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
680 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
681 }
682
683 /* Compare the offsets of the addresses, and check whether the difference
684 is a multiple of step. */
685 aff_combination_dr_offset (a, &diff);
686 aff_combination_dr_offset (b, &baseb);
687 aff_combination_scale (&baseb, -1);
688 aff_combination_add (&diff, &baseb);
689
690 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
691 &step, &name_expansions);
692 return aff_combination_constant_multiple_p (&diff, &step, off);
693 }
694
695 /* Returns the last basic block in LOOP for that we are sure that
696 it is executed whenever the loop is entered. */
697
698 static basic_block
699 last_always_executed_block (struct loop *loop)
700 {
701 unsigned i;
702 vec<edge> exits = get_loop_exit_edges (loop);
703 edge ex;
704 basic_block last = loop->latch;
705
706 FOR_EACH_VEC_ELT (exits, i, ex)
707 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
708 exits.release ();
709
710 return last;
711 }
712
713 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
714
715 static struct component *
716 split_data_refs_to_components (struct loop *loop,
717 vec<data_reference_p> datarefs,
718 vec<ddr_p> depends)
719 {
720 unsigned i, n = datarefs.length ();
721 unsigned ca, ia, ib, bad;
722 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
723 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
724 struct component **comps;
725 struct data_reference *dr, *dra, *drb;
726 struct data_dependence_relation *ddr;
727 struct component *comp_list = NULL, *comp;
728 dref dataref;
729 basic_block last_always_executed = last_always_executed_block (loop);
730
731 FOR_EACH_VEC_ELT (datarefs, i, dr)
732 {
733 if (!DR_REF (dr))
734 {
735 /* A fake reference for call or asm_expr that may clobber memory;
736 just fail. */
737 goto end;
738 }
739 /* predcom pass isn't prepared to handle calls with data references. */
740 if (is_gimple_call (DR_STMT (dr)))
741 goto end;
742 dr->aux = (void *) (size_t) i;
743 comp_father[i] = i;
744 comp_size[i] = 1;
745 }
746
747 /* A component reserved for the "bad" data references. */
748 comp_father[n] = n;
749 comp_size[n] = 1;
750
751 FOR_EACH_VEC_ELT (datarefs, i, dr)
752 {
753 enum ref_step_type dummy;
754
755 if (!suitable_reference_p (dr, &dummy))
756 {
757 ia = (unsigned) (size_t) dr->aux;
758 merge_comps (comp_father, comp_size, n, ia);
759 }
760 }
761
762 FOR_EACH_VEC_ELT (depends, i, ddr)
763 {
764 widest_int dummy_off;
765
766 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
767 continue;
768
769 dra = DDR_A (ddr);
770 drb = DDR_B (ddr);
771 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
772 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
773 if (ia == ib)
774 continue;
775
776 bad = component_of (comp_father, n);
777
778 /* If both A and B are reads, we may ignore unsuitable dependences. */
779 if (DR_IS_READ (dra) && DR_IS_READ (drb))
780 {
781 if (ia == bad || ib == bad
782 || !determine_offset (dra, drb, &dummy_off))
783 continue;
784 }
785 /* If A is read and B write or vice versa and there is unsuitable
786 dependence, instead of merging both components into a component
787 that will certainly not pass suitable_component_p, just put the
788 read into bad component, perhaps at least the write together with
789 all the other data refs in it's component will be optimizable. */
790 else if (DR_IS_READ (dra) && ib != bad)
791 {
792 if (ia == bad)
793 continue;
794 else if (!determine_offset (dra, drb, &dummy_off))
795 {
796 merge_comps (comp_father, comp_size, bad, ia);
797 continue;
798 }
799 }
800 else if (DR_IS_READ (drb) && ia != bad)
801 {
802 if (ib == bad)
803 continue;
804 else if (!determine_offset (dra, drb, &dummy_off))
805 {
806 merge_comps (comp_father, comp_size, bad, ib);
807 continue;
808 }
809 }
810
811 merge_comps (comp_father, comp_size, ia, ib);
812 }
813
814 comps = XCNEWVEC (struct component *, n);
815 bad = component_of (comp_father, n);
816 FOR_EACH_VEC_ELT (datarefs, i, dr)
817 {
818 ia = (unsigned) (size_t) dr->aux;
819 ca = component_of (comp_father, ia);
820 if (ca == bad)
821 continue;
822
823 comp = comps[ca];
824 if (!comp)
825 {
826 comp = XCNEW (struct component);
827 comp->refs.create (comp_size[ca]);
828 comps[ca] = comp;
829 }
830
831 dataref = XCNEW (struct dref_d);
832 dataref->ref = dr;
833 dataref->stmt = DR_STMT (dr);
834 dataref->offset = 0;
835 dataref->distance = 0;
836
837 dataref->always_accessed
838 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
839 gimple_bb (dataref->stmt));
840 dataref->pos = comp->refs.length ();
841 comp->refs.quick_push (dataref);
842 }
843
844 for (i = 0; i < n; i++)
845 {
846 comp = comps[i];
847 if (comp)
848 {
849 comp->next = comp_list;
850 comp_list = comp;
851 }
852 }
853 free (comps);
854
855 end:
856 free (comp_father);
857 free (comp_size);
858 return comp_list;
859 }
860
861 /* Returns true if the component COMP satisfies the conditions
862 described in 2) at the beginning of this file. LOOP is the current
863 loop. */
864
865 static bool
866 suitable_component_p (struct loop *loop, struct component *comp)
867 {
868 unsigned i;
869 dref a, first;
870 basic_block ba, bp = loop->header;
871 bool ok, has_write = false;
872
873 FOR_EACH_VEC_ELT (comp->refs, i, a)
874 {
875 ba = gimple_bb (a->stmt);
876
877 if (!just_once_each_iteration_p (loop, ba))
878 return false;
879
880 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
881 bp = ba;
882
883 if (DR_IS_WRITE (a->ref))
884 has_write = true;
885 }
886
887 first = comp->refs[0];
888 ok = suitable_reference_p (first->ref, &comp->comp_step);
889 gcc_assert (ok);
890 first->offset = 0;
891
892 for (i = 1; comp->refs.iterate (i, &a); i++)
893 {
894 if (!determine_offset (first->ref, a->ref, &a->offset))
895 return false;
896
897 enum ref_step_type a_step;
898 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
899 && a_step == comp->comp_step);
900 }
901
902 /* If there is a write inside the component, we must know whether the
903 step is nonzero or not -- we would not otherwise be able to recognize
904 whether the value accessed by reads comes from the OFFSET-th iteration
905 or the previous one. */
906 if (has_write && comp->comp_step == RS_ANY)
907 return false;
908
909 return true;
910 }
911
912 /* Check the conditions on references inside each of components COMPS,
913 and remove the unsuitable components from the list. The new list
914 of components is returned. The conditions are described in 2) at
915 the beginning of this file. LOOP is the current loop. */
916
917 static struct component *
918 filter_suitable_components (struct loop *loop, struct component *comps)
919 {
920 struct component **comp, *act;
921
922 for (comp = &comps; *comp; )
923 {
924 act = *comp;
925 if (suitable_component_p (loop, act))
926 comp = &act->next;
927 else
928 {
929 dref ref;
930 unsigned i;
931
932 *comp = act->next;
933 FOR_EACH_VEC_ELT (act->refs, i, ref)
934 free (ref);
935 release_component (act);
936 }
937 }
938
939 return comps;
940 }
941
942 /* Compares two drefs A and B by their offset and position. Callback for
943 qsort. */
944
945 static int
946 order_drefs (const void *a, const void *b)
947 {
948 const dref *const da = (const dref *) a;
949 const dref *const db = (const dref *) b;
950 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
951
952 if (offcmp != 0)
953 return offcmp;
954
955 return (*da)->pos - (*db)->pos;
956 }
957
958 /* Returns root of the CHAIN. */
959
960 static inline dref
961 get_chain_root (chain_p chain)
962 {
963 return chain->refs[0];
964 }
965
966 /* Adds REF to the chain CHAIN. */
967
968 static void
969 add_ref_to_chain (chain_p chain, dref ref)
970 {
971 dref root = get_chain_root (chain);
972
973 gcc_assert (wi::les_p (root->offset, ref->offset));
974 widest_int dist = ref->offset - root->offset;
975 if (wi::leu_p (MAX_DISTANCE, dist))
976 {
977 free (ref);
978 return;
979 }
980 gcc_assert (wi::fits_uhwi_p (dist));
981
982 chain->refs.safe_push (ref);
983
984 ref->distance = dist.to_uhwi ();
985
986 if (ref->distance >= chain->length)
987 {
988 chain->length = ref->distance;
989 chain->has_max_use_after = false;
990 }
991
992 if (ref->distance == chain->length
993 && ref->pos > root->pos)
994 chain->has_max_use_after = true;
995
996 chain->all_always_accessed &= ref->always_accessed;
997 }
998
999 /* Returns the chain for invariant component COMP. */
1000
1001 static chain_p
1002 make_invariant_chain (struct component *comp)
1003 {
1004 chain_p chain = XCNEW (struct chain);
1005 unsigned i;
1006 dref ref;
1007
1008 chain->type = CT_INVARIANT;
1009
1010 chain->all_always_accessed = true;
1011
1012 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1013 {
1014 chain->refs.safe_push (ref);
1015 chain->all_always_accessed &= ref->always_accessed;
1016 }
1017
1018 return chain;
1019 }
1020
1021 /* Make a new chain rooted at REF. */
1022
1023 static chain_p
1024 make_rooted_chain (dref ref)
1025 {
1026 chain_p chain = XCNEW (struct chain);
1027
1028 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1029
1030 chain->refs.safe_push (ref);
1031 chain->all_always_accessed = ref->always_accessed;
1032
1033 ref->distance = 0;
1034
1035 return chain;
1036 }
1037
1038 /* Returns true if CHAIN is not trivial. */
1039
1040 static bool
1041 nontrivial_chain_p (chain_p chain)
1042 {
1043 return chain != NULL && chain->refs.length () > 1;
1044 }
1045
1046 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1047 is no such name. */
1048
1049 static tree
1050 name_for_ref (dref ref)
1051 {
1052 tree name;
1053
1054 if (is_gimple_assign (ref->stmt))
1055 {
1056 if (!ref->ref || DR_IS_READ (ref->ref))
1057 name = gimple_assign_lhs (ref->stmt);
1058 else
1059 name = gimple_assign_rhs1 (ref->stmt);
1060 }
1061 else
1062 name = PHI_RESULT (ref->stmt);
1063
1064 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1065 }
1066
1067 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1068 iterations of the innermost enclosing loop). */
1069
1070 static bool
1071 valid_initializer_p (struct data_reference *ref,
1072 unsigned distance, struct data_reference *root)
1073 {
1074 aff_tree diff, base, step;
1075 widest_int off;
1076
1077 /* Both REF and ROOT must be accessing the same object. */
1078 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1079 return false;
1080
1081 /* The initializer is defined outside of loop, hence its address must be
1082 invariant inside the loop. */
1083 gcc_assert (integer_zerop (DR_STEP (ref)));
1084
1085 /* If the address of the reference is invariant, initializer must access
1086 exactly the same location. */
1087 if (integer_zerop (DR_STEP (root)))
1088 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1089 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1090
1091 /* Verify that this index of REF is equal to the root's index at
1092 -DISTANCE-th iteration. */
1093 aff_combination_dr_offset (root, &diff);
1094 aff_combination_dr_offset (ref, &base);
1095 aff_combination_scale (&base, -1);
1096 aff_combination_add (&diff, &base);
1097
1098 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1099 &step, &name_expansions);
1100 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1101 return false;
1102
1103 if (off != distance)
1104 return false;
1105
1106 return true;
1107 }
1108
1109 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1110 initial value is correct (equal to initial value of REF shifted by one
1111 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1112 is the root of the current chain. */
1113
1114 static gphi *
1115 find_looparound_phi (struct loop *loop, dref ref, dref root)
1116 {
1117 tree name, init, init_ref;
1118 gphi *phi = NULL;
1119 gimple *init_stmt;
1120 edge latch = loop_latch_edge (loop);
1121 struct data_reference init_dr;
1122 gphi_iterator psi;
1123
1124 if (is_gimple_assign (ref->stmt))
1125 {
1126 if (DR_IS_READ (ref->ref))
1127 name = gimple_assign_lhs (ref->stmt);
1128 else
1129 name = gimple_assign_rhs1 (ref->stmt);
1130 }
1131 else
1132 name = PHI_RESULT (ref->stmt);
1133 if (!name)
1134 return NULL;
1135
1136 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1137 {
1138 phi = psi.phi ();
1139 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1140 break;
1141 }
1142
1143 if (gsi_end_p (psi))
1144 return NULL;
1145
1146 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1147 if (TREE_CODE (init) != SSA_NAME)
1148 return NULL;
1149 init_stmt = SSA_NAME_DEF_STMT (init);
1150 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1151 return NULL;
1152 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1153
1154 init_ref = gimple_assign_rhs1 (init_stmt);
1155 if (!REFERENCE_CLASS_P (init_ref)
1156 && !DECL_P (init_ref))
1157 return NULL;
1158
1159 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1160 loop enclosing PHI). */
1161 memset (&init_dr, 0, sizeof (struct data_reference));
1162 DR_REF (&init_dr) = init_ref;
1163 DR_STMT (&init_dr) = phi;
1164 if (!dr_analyze_innermost (&init_dr, loop))
1165 return NULL;
1166
1167 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1168 return NULL;
1169
1170 return phi;
1171 }
1172
1173 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1174
1175 static void
1176 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1177 {
1178 dref nw = XCNEW (struct dref_d), aref;
1179 unsigned i;
1180
1181 nw->stmt = phi;
1182 nw->distance = ref->distance + 1;
1183 nw->always_accessed = 1;
1184
1185 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1186 if (aref->distance >= nw->distance)
1187 break;
1188 chain->refs.safe_insert (i, nw);
1189
1190 if (nw->distance > chain->length)
1191 {
1192 chain->length = nw->distance;
1193 chain->has_max_use_after = false;
1194 }
1195 }
1196
1197 /* For references in CHAIN that are copied around the LOOP (created previously
1198 by PRE, or by user), add the results of such copies to the chain. This
1199 enables us to remove the copies by unrolling, and may need less registers
1200 (also, it may allow us to combine chains together). */
1201
1202 static void
1203 add_looparound_copies (struct loop *loop, chain_p chain)
1204 {
1205 unsigned i;
1206 dref ref, root = get_chain_root (chain);
1207 gphi *phi;
1208
1209 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1210 {
1211 phi = find_looparound_phi (loop, ref, root);
1212 if (!phi)
1213 continue;
1214
1215 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1216 insert_looparound_copy (chain, ref, phi);
1217 }
1218 }
1219
1220 /* Find roots of the values and determine distances in the component COMP.
1221 The references are redistributed into CHAINS. LOOP is the current
1222 loop. */
1223
1224 static void
1225 determine_roots_comp (struct loop *loop,
1226 struct component *comp,
1227 vec<chain_p> *chains)
1228 {
1229 unsigned i;
1230 dref a;
1231 chain_p chain = NULL;
1232 widest_int last_ofs = 0;
1233
1234 /* Invariants are handled specially. */
1235 if (comp->comp_step == RS_INVARIANT)
1236 {
1237 chain = make_invariant_chain (comp);
1238 chains->safe_push (chain);
1239 return;
1240 }
1241
1242 comp->refs.qsort (order_drefs);
1243
1244 FOR_EACH_VEC_ELT (comp->refs, i, a)
1245 {
1246 if (!chain || DR_IS_WRITE (a->ref)
1247 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1248 {
1249 if (nontrivial_chain_p (chain))
1250 {
1251 add_looparound_copies (loop, chain);
1252 chains->safe_push (chain);
1253 }
1254 else
1255 release_chain (chain);
1256 chain = make_rooted_chain (a);
1257 last_ofs = a->offset;
1258 continue;
1259 }
1260
1261 add_ref_to_chain (chain, a);
1262 }
1263
1264 if (nontrivial_chain_p (chain))
1265 {
1266 add_looparound_copies (loop, chain);
1267 chains->safe_push (chain);
1268 }
1269 else
1270 release_chain (chain);
1271 }
1272
1273 /* Find roots of the values and determine distances in components COMPS, and
1274 separates the references to CHAINS. LOOP is the current loop. */
1275
1276 static void
1277 determine_roots (struct loop *loop,
1278 struct component *comps, vec<chain_p> *chains)
1279 {
1280 struct component *comp;
1281
1282 for (comp = comps; comp; comp = comp->next)
1283 determine_roots_comp (loop, comp, chains);
1284 }
1285
1286 /* Replace the reference in statement STMT with temporary variable
1287 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1288 the reference in the statement. IN_LHS is true if the reference
1289 is in the lhs of STMT, false if it is in rhs. */
1290
1291 static void
1292 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1293 {
1294 tree val;
1295 gassign *new_stmt;
1296 gimple_stmt_iterator bsi, psi;
1297
1298 if (gimple_code (stmt) == GIMPLE_PHI)
1299 {
1300 gcc_assert (!in_lhs && !set);
1301
1302 val = PHI_RESULT (stmt);
1303 bsi = gsi_after_labels (gimple_bb (stmt));
1304 psi = gsi_for_stmt (stmt);
1305 remove_phi_node (&psi, false);
1306
1307 /* Turn the phi node into GIMPLE_ASSIGN. */
1308 new_stmt = gimple_build_assign (val, new_tree);
1309 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1310 return;
1311 }
1312
1313 /* Since the reference is of gimple_reg type, it should only
1314 appear as lhs or rhs of modify statement. */
1315 gcc_assert (is_gimple_assign (stmt));
1316
1317 bsi = gsi_for_stmt (stmt);
1318
1319 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1320 if (!set)
1321 {
1322 gcc_assert (!in_lhs);
1323 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1324 stmt = gsi_stmt (bsi);
1325 update_stmt (stmt);
1326 return;
1327 }
1328
1329 if (in_lhs)
1330 {
1331 /* We have statement
1332
1333 OLD = VAL
1334
1335 If OLD is a memory reference, then VAL is gimple_val, and we transform
1336 this to
1337
1338 OLD = VAL
1339 NEW = VAL
1340
1341 Otherwise, we are replacing a combination chain,
1342 VAL is the expression that performs the combination, and OLD is an
1343 SSA name. In this case, we transform the assignment to
1344
1345 OLD = VAL
1346 NEW = OLD
1347
1348 */
1349
1350 val = gimple_assign_lhs (stmt);
1351 if (TREE_CODE (val) != SSA_NAME)
1352 {
1353 val = gimple_assign_rhs1 (stmt);
1354 gcc_assert (gimple_assign_single_p (stmt));
1355 if (TREE_CLOBBER_P (val))
1356 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1357 else
1358 gcc_assert (gimple_assign_copy_p (stmt));
1359 }
1360 }
1361 else
1362 {
1363 /* VAL = OLD
1364
1365 is transformed to
1366
1367 VAL = OLD
1368 NEW = VAL */
1369
1370 val = gimple_assign_lhs (stmt);
1371 }
1372
1373 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1374 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1375 }
1376
1377 /* Returns a memory reference to DR in the ITER-th iteration of
1378 the loop it was analyzed in. Append init stmts to STMTS. */
1379
1380 static tree
1381 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1382 {
1383 tree off = DR_OFFSET (dr);
1384 tree coff = DR_INIT (dr);
1385 if (iter == 0)
1386 ;
1387 else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1388 coff = size_binop (PLUS_EXPR, coff,
1389 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1390 else
1391 off = size_binop (PLUS_EXPR, off,
1392 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1393 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1394 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1395 is_gimple_mem_ref_addr, NULL_TREE);
1396 tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1397 /* While data-ref analysis punts on bit offsets it still handles
1398 bitfield accesses at byte boundaries. Cope with that. Note that
1399 we cannot simply re-apply the outer COMPONENT_REF because the
1400 byte-granular portion of it is already applied via DR_INIT and
1401 DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1402 start at offset zero. */
1403 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1404 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1405 {
1406 tree field = TREE_OPERAND (DR_REF (dr), 1);
1407 return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1408 build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
1409 addr, alias_ptr),
1410 DECL_SIZE (field), bitsize_zero_node);
1411 }
1412 else
1413 return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
1414 }
1415
1416 /* Get the initialization expression for the INDEX-th temporary variable
1417 of CHAIN. */
1418
1419 static tree
1420 get_init_expr (chain_p chain, unsigned index)
1421 {
1422 if (chain->type == CT_COMBINATION)
1423 {
1424 tree e1 = get_init_expr (chain->ch1, index);
1425 tree e2 = get_init_expr (chain->ch2, index);
1426
1427 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1428 }
1429 else
1430 return chain->inits[index];
1431 }
1432
1433 /* Returns a new temporary variable used for the I-th variable carrying
1434 value of REF. The variable's uid is marked in TMP_VARS. */
1435
1436 static tree
1437 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1438 {
1439 tree type = TREE_TYPE (ref);
1440 /* We never access the components of the temporary variable in predictive
1441 commoning. */
1442 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1443 bitmap_set_bit (tmp_vars, DECL_UID (var));
1444 return var;
1445 }
1446
1447 /* Creates the variables for CHAIN, as well as phi nodes for them and
1448 initialization on entry to LOOP. Uids of the newly created
1449 temporary variables are marked in TMP_VARS. */
1450
1451 static void
1452 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1453 {
1454 unsigned i;
1455 unsigned n = chain->length;
1456 dref root = get_chain_root (chain);
1457 bool reuse_first = !chain->has_max_use_after;
1458 tree ref, init, var, next;
1459 gphi *phi;
1460 gimple_seq stmts;
1461 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1462
1463 /* If N == 0, then all the references are within the single iteration. And
1464 since this is an nonempty chain, reuse_first cannot be true. */
1465 gcc_assert (n > 0 || !reuse_first);
1466
1467 chain->vars.create (n + 1);
1468
1469 if (chain->type == CT_COMBINATION)
1470 ref = gimple_assign_lhs (root->stmt);
1471 else
1472 ref = DR_REF (root->ref);
1473
1474 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1475 {
1476 var = predcom_tmp_var (ref, i, tmp_vars);
1477 chain->vars.quick_push (var);
1478 }
1479 if (reuse_first)
1480 chain->vars.quick_push (chain->vars[0]);
1481
1482 FOR_EACH_VEC_ELT (chain->vars, i, var)
1483 chain->vars[i] = make_ssa_name (var);
1484
1485 for (i = 0; i < n; i++)
1486 {
1487 var = chain->vars[i];
1488 next = chain->vars[i + 1];
1489 init = get_init_expr (chain, i);
1490
1491 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1492 if (stmts)
1493 gsi_insert_seq_on_edge_immediate (entry, stmts);
1494
1495 phi = create_phi_node (var, loop->header);
1496 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1497 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1498 }
1499 }
1500
1501 /* Create the variables and initialization statement for root of chain
1502 CHAIN. Uids of the newly created temporary variables are marked
1503 in TMP_VARS. */
1504
1505 static void
1506 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1507 {
1508 dref root = get_chain_root (chain);
1509 bool in_lhs = (chain->type == CT_STORE_LOAD
1510 || chain->type == CT_COMBINATION);
1511
1512 initialize_root_vars (loop, chain, tmp_vars);
1513 replace_ref_with (root->stmt,
1514 chain->vars[chain->length],
1515 true, in_lhs);
1516 }
1517
1518 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1519 initialization on entry to LOOP if necessary. The ssa name for the variable
1520 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1521 around the loop is created. Uid of the newly created temporary variable
1522 is marked in TMP_VARS. INITS is the list containing the (single)
1523 initializer. */
1524
1525 static void
1526 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1527 vec<tree> *vars, vec<tree> inits,
1528 bitmap tmp_vars)
1529 {
1530 unsigned i;
1531 tree ref = DR_REF (root->ref), init, var, next;
1532 gimple_seq stmts;
1533 gphi *phi;
1534 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1535
1536 /* Find the initializer for the variable, and check that it cannot
1537 trap. */
1538 init = inits[0];
1539
1540 vars->create (written ? 2 : 1);
1541 var = predcom_tmp_var (ref, 0, tmp_vars);
1542 vars->quick_push (var);
1543 if (written)
1544 vars->quick_push ((*vars)[0]);
1545
1546 FOR_EACH_VEC_ELT (*vars, i, var)
1547 (*vars)[i] = make_ssa_name (var);
1548
1549 var = (*vars)[0];
1550
1551 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1552 if (stmts)
1553 gsi_insert_seq_on_edge_immediate (entry, stmts);
1554
1555 if (written)
1556 {
1557 next = (*vars)[1];
1558 phi = create_phi_node (var, loop->header);
1559 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1560 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1561 }
1562 else
1563 {
1564 gassign *init_stmt = gimple_build_assign (var, init);
1565 gsi_insert_on_edge_immediate (entry, init_stmt);
1566 }
1567 }
1568
1569
1570 /* Execute load motion for references in chain CHAIN. Uids of the newly
1571 created temporary variables are marked in TMP_VARS. */
1572
1573 static void
1574 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1575 {
1576 auto_vec<tree> vars;
1577 dref a;
1578 unsigned n_writes = 0, ridx, i;
1579 tree var;
1580
1581 gcc_assert (chain->type == CT_INVARIANT);
1582 gcc_assert (!chain->combined);
1583 FOR_EACH_VEC_ELT (chain->refs, i, a)
1584 if (DR_IS_WRITE (a->ref))
1585 n_writes++;
1586
1587 /* If there are no reads in the loop, there is nothing to do. */
1588 if (n_writes == chain->refs.length ())
1589 return;
1590
1591 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1592 &vars, chain->inits, tmp_vars);
1593
1594 ridx = 0;
1595 FOR_EACH_VEC_ELT (chain->refs, i, a)
1596 {
1597 bool is_read = DR_IS_READ (a->ref);
1598
1599 if (DR_IS_WRITE (a->ref))
1600 {
1601 n_writes--;
1602 if (n_writes)
1603 {
1604 var = vars[0];
1605 var = make_ssa_name (SSA_NAME_VAR (var));
1606 vars[0] = var;
1607 }
1608 else
1609 ridx = 1;
1610 }
1611
1612 replace_ref_with (a->stmt, vars[ridx],
1613 !is_read, !is_read);
1614 }
1615 }
1616
1617 /* Returns the single statement in that NAME is used, excepting
1618 the looparound phi nodes contained in one of the chains. If there is no
1619 such statement, or more statements, NULL is returned. */
1620
1621 static gimple *
1622 single_nonlooparound_use (tree name)
1623 {
1624 use_operand_p use;
1625 imm_use_iterator it;
1626 gimple *stmt, *ret = NULL;
1627
1628 FOR_EACH_IMM_USE_FAST (use, it, name)
1629 {
1630 stmt = USE_STMT (use);
1631
1632 if (gimple_code (stmt) == GIMPLE_PHI)
1633 {
1634 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1635 could not be processed anyway, so just fail for them. */
1636 if (bitmap_bit_p (looparound_phis,
1637 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1638 continue;
1639
1640 return NULL;
1641 }
1642 else if (is_gimple_debug (stmt))
1643 continue;
1644 else if (ret != NULL)
1645 return NULL;
1646 else
1647 ret = stmt;
1648 }
1649
1650 return ret;
1651 }
1652
1653 /* Remove statement STMT, as well as the chain of assignments in that it is
1654 used. */
1655
1656 static void
1657 remove_stmt (gimple *stmt)
1658 {
1659 tree name;
1660 gimple *next;
1661 gimple_stmt_iterator psi;
1662
1663 if (gimple_code (stmt) == GIMPLE_PHI)
1664 {
1665 name = PHI_RESULT (stmt);
1666 next = single_nonlooparound_use (name);
1667 reset_debug_uses (stmt);
1668 psi = gsi_for_stmt (stmt);
1669 remove_phi_node (&psi, true);
1670
1671 if (!next
1672 || !gimple_assign_ssa_name_copy_p (next)
1673 || gimple_assign_rhs1 (next) != name)
1674 return;
1675
1676 stmt = next;
1677 }
1678
1679 while (1)
1680 {
1681 gimple_stmt_iterator bsi;
1682
1683 bsi = gsi_for_stmt (stmt);
1684
1685 name = gimple_assign_lhs (stmt);
1686 gcc_assert (TREE_CODE (name) == SSA_NAME);
1687
1688 next = single_nonlooparound_use (name);
1689 reset_debug_uses (stmt);
1690
1691 unlink_stmt_vdef (stmt);
1692 gsi_remove (&bsi, true);
1693 release_defs (stmt);
1694
1695 if (!next
1696 || !gimple_assign_ssa_name_copy_p (next)
1697 || gimple_assign_rhs1 (next) != name)
1698 return;
1699
1700 stmt = next;
1701 }
1702 }
1703
1704 /* Perform the predictive commoning optimization for a chain CHAIN.
1705 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1706
1707 static void
1708 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1709 bitmap tmp_vars)
1710 {
1711 unsigned i;
1712 dref a;
1713 tree var;
1714
1715 if (chain->combined)
1716 {
1717 /* For combined chains, just remove the statements that are used to
1718 compute the values of the expression (except for the root one).
1719 We delay this until after all chains are processed. */
1720 }
1721 else
1722 {
1723 /* For non-combined chains, set up the variables that hold its value,
1724 and replace the uses of the original references by these
1725 variables. */
1726 initialize_root (loop, chain, tmp_vars);
1727 for (i = 1; chain->refs.iterate (i, &a); i++)
1728 {
1729 var = chain->vars[chain->length - a->distance];
1730 replace_ref_with (a->stmt, var, false, false);
1731 }
1732 }
1733 }
1734
1735 /* Determines the unroll factor necessary to remove as many temporary variable
1736 copies as possible. CHAINS is the list of chains that will be
1737 optimized. */
1738
1739 static unsigned
1740 determine_unroll_factor (vec<chain_p> chains)
1741 {
1742 chain_p chain;
1743 unsigned factor = 1, af, nfactor, i;
1744 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1745
1746 FOR_EACH_VEC_ELT (chains, i, chain)
1747 {
1748 if (chain->type == CT_INVARIANT)
1749 continue;
1750
1751 if (chain->combined)
1752 {
1753 /* For combined chains, we can't handle unrolling if we replace
1754 looparound PHIs. */
1755 dref a;
1756 unsigned j;
1757 for (j = 1; chain->refs.iterate (j, &a); j++)
1758 if (gimple_code (a->stmt) == GIMPLE_PHI)
1759 return 1;
1760 continue;
1761 }
1762
1763 /* The best unroll factor for this chain is equal to the number of
1764 temporary variables that we create for it. */
1765 af = chain->length;
1766 if (chain->has_max_use_after)
1767 af++;
1768
1769 nfactor = factor * af / gcd (factor, af);
1770 if (nfactor <= max)
1771 factor = nfactor;
1772 }
1773
1774 return factor;
1775 }
1776
1777 /* Perform the predictive commoning optimization for CHAINS.
1778 Uids of the newly created temporary variables are marked in TMP_VARS. */
1779
1780 static void
1781 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1782 bitmap tmp_vars)
1783 {
1784 chain_p chain;
1785 unsigned i;
1786
1787 FOR_EACH_VEC_ELT (chains, i, chain)
1788 {
1789 if (chain->type == CT_INVARIANT)
1790 execute_load_motion (loop, chain, tmp_vars);
1791 else
1792 execute_pred_commoning_chain (loop, chain, tmp_vars);
1793 }
1794
1795 FOR_EACH_VEC_ELT (chains, i, chain)
1796 {
1797 if (chain->type == CT_INVARIANT)
1798 ;
1799 else if (chain->combined)
1800 {
1801 /* For combined chains, just remove the statements that are used to
1802 compute the values of the expression (except for the root one). */
1803 dref a;
1804 unsigned j;
1805 for (j = 1; chain->refs.iterate (j, &a); j++)
1806 remove_stmt (a->stmt);
1807 }
1808 }
1809
1810 update_ssa (TODO_update_ssa_only_virtuals);
1811 }
1812
1813 /* For each reference in CHAINS, if its defining statement is
1814 phi node, record the ssa name that is defined by it. */
1815
1816 static void
1817 replace_phis_by_defined_names (vec<chain_p> chains)
1818 {
1819 chain_p chain;
1820 dref a;
1821 unsigned i, j;
1822
1823 FOR_EACH_VEC_ELT (chains, i, chain)
1824 FOR_EACH_VEC_ELT (chain->refs, j, a)
1825 {
1826 if (gimple_code (a->stmt) == GIMPLE_PHI)
1827 {
1828 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1829 a->stmt = NULL;
1830 }
1831 }
1832 }
1833
1834 /* For each reference in CHAINS, if name_defined_by_phi is not
1835 NULL, use it to set the stmt field. */
1836
1837 static void
1838 replace_names_by_phis (vec<chain_p> chains)
1839 {
1840 chain_p chain;
1841 dref a;
1842 unsigned i, j;
1843
1844 FOR_EACH_VEC_ELT (chains, i, chain)
1845 FOR_EACH_VEC_ELT (chain->refs, j, a)
1846 if (a->stmt == NULL)
1847 {
1848 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1849 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1850 a->name_defined_by_phi = NULL_TREE;
1851 }
1852 }
1853
1854 /* Wrapper over execute_pred_commoning, to pass it as a callback
1855 to tree_transform_and_unroll_loop. */
1856
1857 struct epcc_data
1858 {
1859 vec<chain_p> chains;
1860 bitmap tmp_vars;
1861 };
1862
1863 static void
1864 execute_pred_commoning_cbck (struct loop *loop, void *data)
1865 {
1866 struct epcc_data *const dta = (struct epcc_data *) data;
1867
1868 /* Restore phi nodes that were replaced by ssa names before
1869 tree_transform_and_unroll_loop (see detailed description in
1870 tree_predictive_commoning_loop). */
1871 replace_names_by_phis (dta->chains);
1872 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1873 }
1874
1875 /* Base NAME and all the names in the chain of phi nodes that use it
1876 on variable VAR. The phi nodes are recognized by being in the copies of
1877 the header of the LOOP. */
1878
1879 static void
1880 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1881 {
1882 gimple *stmt, *phi;
1883 imm_use_iterator iter;
1884
1885 replace_ssa_name_symbol (name, var);
1886
1887 while (1)
1888 {
1889 phi = NULL;
1890 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1891 {
1892 if (gimple_code (stmt) == GIMPLE_PHI
1893 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1894 {
1895 phi = stmt;
1896 BREAK_FROM_IMM_USE_STMT (iter);
1897 }
1898 }
1899 if (!phi)
1900 return;
1901
1902 name = PHI_RESULT (phi);
1903 replace_ssa_name_symbol (name, var);
1904 }
1905 }
1906
1907 /* Given an unrolled LOOP after predictive commoning, remove the
1908 register copies arising from phi nodes by changing the base
1909 variables of SSA names. TMP_VARS is the set of the temporary variables
1910 for those we want to perform this. */
1911
1912 static void
1913 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1914 {
1915 edge e;
1916 gphi *phi;
1917 gimple *stmt;
1918 tree name, use, var;
1919 gphi_iterator psi;
1920
1921 e = loop_latch_edge (loop);
1922 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1923 {
1924 phi = psi.phi ();
1925 name = PHI_RESULT (phi);
1926 var = SSA_NAME_VAR (name);
1927 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1928 continue;
1929 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1930 gcc_assert (TREE_CODE (use) == SSA_NAME);
1931
1932 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1933 stmt = SSA_NAME_DEF_STMT (use);
1934 while (gimple_code (stmt) == GIMPLE_PHI
1935 /* In case we could not unroll the loop enough to eliminate
1936 all copies, we may reach the loop header before the defining
1937 statement (in that case, some register copies will be present
1938 in loop latch in the final code, corresponding to the newly
1939 created looparound phi nodes). */
1940 && gimple_bb (stmt) != loop->header)
1941 {
1942 gcc_assert (single_pred_p (gimple_bb (stmt)));
1943 use = PHI_ARG_DEF (stmt, 0);
1944 stmt = SSA_NAME_DEF_STMT (use);
1945 }
1946
1947 base_names_in_chain_on (loop, use, var);
1948 }
1949 }
1950
1951 /* Returns true if CHAIN is suitable to be combined. */
1952
1953 static bool
1954 chain_can_be_combined_p (chain_p chain)
1955 {
1956 return (!chain->combined
1957 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1958 }
1959
1960 /* Returns the modify statement that uses NAME. Skips over assignment
1961 statements, NAME is replaced with the actual name used in the returned
1962 statement. */
1963
1964 static gimple *
1965 find_use_stmt (tree *name)
1966 {
1967 gimple *stmt;
1968 tree rhs, lhs;
1969
1970 /* Skip over assignments. */
1971 while (1)
1972 {
1973 stmt = single_nonlooparound_use (*name);
1974 if (!stmt)
1975 return NULL;
1976
1977 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1978 return NULL;
1979
1980 lhs = gimple_assign_lhs (stmt);
1981 if (TREE_CODE (lhs) != SSA_NAME)
1982 return NULL;
1983
1984 if (gimple_assign_copy_p (stmt))
1985 {
1986 rhs = gimple_assign_rhs1 (stmt);
1987 if (rhs != *name)
1988 return NULL;
1989
1990 *name = lhs;
1991 }
1992 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1993 == GIMPLE_BINARY_RHS)
1994 return stmt;
1995 else
1996 return NULL;
1997 }
1998 }
1999
2000 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2001
2002 static bool
2003 may_reassociate_p (tree type, enum tree_code code)
2004 {
2005 if (FLOAT_TYPE_P (type)
2006 && !flag_unsafe_math_optimizations)
2007 return false;
2008
2009 return (commutative_tree_code (code)
2010 && associative_tree_code (code));
2011 }
2012
2013 /* If the operation used in STMT is associative and commutative, go through the
2014 tree of the same operations and returns its root. Distance to the root
2015 is stored in DISTANCE. */
2016
2017 static gimple *
2018 find_associative_operation_root (gimple *stmt, unsigned *distance)
2019 {
2020 tree lhs;
2021 gimple *next;
2022 enum tree_code code = gimple_assign_rhs_code (stmt);
2023 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2024 unsigned dist = 0;
2025
2026 if (!may_reassociate_p (type, code))
2027 return NULL;
2028
2029 while (1)
2030 {
2031 lhs = gimple_assign_lhs (stmt);
2032 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2033
2034 next = find_use_stmt (&lhs);
2035 if (!next
2036 || gimple_assign_rhs_code (next) != code)
2037 break;
2038
2039 stmt = next;
2040 dist++;
2041 }
2042
2043 if (distance)
2044 *distance = dist;
2045 return stmt;
2046 }
2047
2048 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2049 is no such statement, returns NULL_TREE. In case the operation used on
2050 NAME1 and NAME2 is associative and commutative, returns the root of the
2051 tree formed by this operation instead of the statement that uses NAME1 or
2052 NAME2. */
2053
2054 static gimple *
2055 find_common_use_stmt (tree *name1, tree *name2)
2056 {
2057 gimple *stmt1, *stmt2;
2058
2059 stmt1 = find_use_stmt (name1);
2060 if (!stmt1)
2061 return NULL;
2062
2063 stmt2 = find_use_stmt (name2);
2064 if (!stmt2)
2065 return NULL;
2066
2067 if (stmt1 == stmt2)
2068 return stmt1;
2069
2070 stmt1 = find_associative_operation_root (stmt1, NULL);
2071 if (!stmt1)
2072 return NULL;
2073 stmt2 = find_associative_operation_root (stmt2, NULL);
2074 if (!stmt2)
2075 return NULL;
2076
2077 return (stmt1 == stmt2 ? stmt1 : NULL);
2078 }
2079
2080 /* Checks whether R1 and R2 are combined together using CODE, with the result
2081 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2082 if it is true. If CODE is ERROR_MARK, set these values instead. */
2083
2084 static bool
2085 combinable_refs_p (dref r1, dref r2,
2086 enum tree_code *code, bool *swap, tree *rslt_type)
2087 {
2088 enum tree_code acode;
2089 bool aswap;
2090 tree atype;
2091 tree name1, name2;
2092 gimple *stmt;
2093
2094 name1 = name_for_ref (r1);
2095 name2 = name_for_ref (r2);
2096 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2097
2098 stmt = find_common_use_stmt (&name1, &name2);
2099
2100 if (!stmt
2101 /* A simple post-dominance check - make sure the combination
2102 is executed under the same condition as the references. */
2103 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2104 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2105 return false;
2106
2107 acode = gimple_assign_rhs_code (stmt);
2108 aswap = (!commutative_tree_code (acode)
2109 && gimple_assign_rhs1 (stmt) != name1);
2110 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2111
2112 if (*code == ERROR_MARK)
2113 {
2114 *code = acode;
2115 *swap = aswap;
2116 *rslt_type = atype;
2117 return true;
2118 }
2119
2120 return (*code == acode
2121 && *swap == aswap
2122 && *rslt_type == atype);
2123 }
2124
2125 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2126 an assignment of the remaining operand. */
2127
2128 static void
2129 remove_name_from_operation (gimple *stmt, tree op)
2130 {
2131 tree other_op;
2132 gimple_stmt_iterator si;
2133
2134 gcc_assert (is_gimple_assign (stmt));
2135
2136 if (gimple_assign_rhs1 (stmt) == op)
2137 other_op = gimple_assign_rhs2 (stmt);
2138 else
2139 other_op = gimple_assign_rhs1 (stmt);
2140
2141 si = gsi_for_stmt (stmt);
2142 gimple_assign_set_rhs_from_tree (&si, other_op);
2143
2144 /* We should not have reallocated STMT. */
2145 gcc_assert (gsi_stmt (si) == stmt);
2146
2147 update_stmt (stmt);
2148 }
2149
2150 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2151 are combined in a single statement, and returns this statement. */
2152
2153 static gimple *
2154 reassociate_to_the_same_stmt (tree name1, tree name2)
2155 {
2156 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2157 gassign *new_stmt, *tmp_stmt;
2158 tree new_name, tmp_name, var, r1, r2;
2159 unsigned dist1, dist2;
2160 enum tree_code code;
2161 tree type = TREE_TYPE (name1);
2162 gimple_stmt_iterator bsi;
2163
2164 stmt1 = find_use_stmt (&name1);
2165 stmt2 = find_use_stmt (&name2);
2166 root1 = find_associative_operation_root (stmt1, &dist1);
2167 root2 = find_associative_operation_root (stmt2, &dist2);
2168 code = gimple_assign_rhs_code (stmt1);
2169
2170 gcc_assert (root1 && root2 && root1 == root2
2171 && code == gimple_assign_rhs_code (stmt2));
2172
2173 /* Find the root of the nearest expression in that both NAME1 and NAME2
2174 are used. */
2175 r1 = name1;
2176 s1 = stmt1;
2177 r2 = name2;
2178 s2 = stmt2;
2179
2180 while (dist1 > dist2)
2181 {
2182 s1 = find_use_stmt (&r1);
2183 r1 = gimple_assign_lhs (s1);
2184 dist1--;
2185 }
2186 while (dist2 > dist1)
2187 {
2188 s2 = find_use_stmt (&r2);
2189 r2 = gimple_assign_lhs (s2);
2190 dist2--;
2191 }
2192
2193 while (s1 != s2)
2194 {
2195 s1 = find_use_stmt (&r1);
2196 r1 = gimple_assign_lhs (s1);
2197 s2 = find_use_stmt (&r2);
2198 r2 = gimple_assign_lhs (s2);
2199 }
2200
2201 /* Remove NAME1 and NAME2 from the statements in that they are used
2202 currently. */
2203 remove_name_from_operation (stmt1, name1);
2204 remove_name_from_operation (stmt2, name2);
2205
2206 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2207 combine it with the rhs of S1. */
2208 var = create_tmp_reg (type, "predreastmp");
2209 new_name = make_ssa_name (var);
2210 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2211
2212 var = create_tmp_reg (type, "predreastmp");
2213 tmp_name = make_ssa_name (var);
2214
2215 /* Rhs of S1 may now be either a binary expression with operation
2216 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2217 so that name1 or name2 was removed from it). */
2218 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2219 gimple_assign_rhs1 (s1),
2220 gimple_assign_rhs2 (s1));
2221
2222 bsi = gsi_for_stmt (s1);
2223 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2224 s1 = gsi_stmt (bsi);
2225 update_stmt (s1);
2226
2227 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2228 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2229
2230 return new_stmt;
2231 }
2232
2233 /* Returns the statement that combines references R1 and R2. In case R1
2234 and R2 are not used in the same statement, but they are used with an
2235 associative and commutative operation in the same expression, reassociate
2236 the expression so that they are used in the same statement. */
2237
2238 static gimple *
2239 stmt_combining_refs (dref r1, dref r2)
2240 {
2241 gimple *stmt1, *stmt2;
2242 tree name1 = name_for_ref (r1);
2243 tree name2 = name_for_ref (r2);
2244
2245 stmt1 = find_use_stmt (&name1);
2246 stmt2 = find_use_stmt (&name2);
2247 if (stmt1 == stmt2)
2248 return stmt1;
2249
2250 return reassociate_to_the_same_stmt (name1, name2);
2251 }
2252
2253 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2254 description of the new chain is returned, otherwise we return NULL. */
2255
2256 static chain_p
2257 combine_chains (chain_p ch1, chain_p ch2)
2258 {
2259 dref r1, r2, nw;
2260 enum tree_code op = ERROR_MARK;
2261 bool swap = false;
2262 chain_p new_chain;
2263 unsigned i;
2264 gimple *root_stmt;
2265 tree rslt_type = NULL_TREE;
2266
2267 if (ch1 == ch2)
2268 return NULL;
2269 if (ch1->length != ch2->length)
2270 return NULL;
2271
2272 if (ch1->refs.length () != ch2->refs.length ())
2273 return NULL;
2274
2275 for (i = 0; (ch1->refs.iterate (i, &r1)
2276 && ch2->refs.iterate (i, &r2)); i++)
2277 {
2278 if (r1->distance != r2->distance)
2279 return NULL;
2280
2281 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2282 return NULL;
2283 }
2284
2285 if (swap)
2286 std::swap (ch1, ch2);
2287
2288 new_chain = XCNEW (struct chain);
2289 new_chain->type = CT_COMBINATION;
2290 new_chain->op = op;
2291 new_chain->ch1 = ch1;
2292 new_chain->ch2 = ch2;
2293 new_chain->rslt_type = rslt_type;
2294 new_chain->length = ch1->length;
2295
2296 for (i = 0; (ch1->refs.iterate (i, &r1)
2297 && ch2->refs.iterate (i, &r2)); i++)
2298 {
2299 nw = XCNEW (struct dref_d);
2300 nw->stmt = stmt_combining_refs (r1, r2);
2301 nw->distance = r1->distance;
2302
2303 new_chain->refs.safe_push (nw);
2304 }
2305
2306 new_chain->has_max_use_after = false;
2307 root_stmt = get_chain_root (new_chain)->stmt;
2308 for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2309 {
2310 if (nw->distance == new_chain->length
2311 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2312 {
2313 new_chain->has_max_use_after = true;
2314 break;
2315 }
2316 }
2317
2318 ch1->combined = true;
2319 ch2->combined = true;
2320 return new_chain;
2321 }
2322
2323 /* Try to combine the CHAINS. */
2324
2325 static void
2326 try_combine_chains (vec<chain_p> *chains)
2327 {
2328 unsigned i, j;
2329 chain_p ch1, ch2, cch;
2330 auto_vec<chain_p> worklist;
2331
2332 FOR_EACH_VEC_ELT (*chains, i, ch1)
2333 if (chain_can_be_combined_p (ch1))
2334 worklist.safe_push (ch1);
2335
2336 while (!worklist.is_empty ())
2337 {
2338 ch1 = worklist.pop ();
2339 if (!chain_can_be_combined_p (ch1))
2340 continue;
2341
2342 FOR_EACH_VEC_ELT (*chains, j, ch2)
2343 {
2344 if (!chain_can_be_combined_p (ch2))
2345 continue;
2346
2347 cch = combine_chains (ch1, ch2);
2348 if (cch)
2349 {
2350 worklist.safe_push (cch);
2351 chains->safe_push (cch);
2352 break;
2353 }
2354 }
2355 }
2356 }
2357
2358 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2359 impossible because one of these initializers may trap, true otherwise. */
2360
2361 static bool
2362 prepare_initializers_chain (struct loop *loop, chain_p chain)
2363 {
2364 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2365 struct data_reference *dr = get_chain_root (chain)->ref;
2366 tree init;
2367 dref laref;
2368 edge entry = loop_preheader_edge (loop);
2369
2370 /* Find the initializers for the variables, and check that they cannot
2371 trap. */
2372 chain->inits.create (n);
2373 for (i = 0; i < n; i++)
2374 chain->inits.quick_push (NULL_TREE);
2375
2376 /* If we have replaced some looparound phi nodes, use their initializers
2377 instead of creating our own. */
2378 FOR_EACH_VEC_ELT (chain->refs, i, laref)
2379 {
2380 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2381 continue;
2382
2383 gcc_assert (laref->distance > 0);
2384 chain->inits[n - laref->distance]
2385 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2386 }
2387
2388 for (i = 0; i < n; i++)
2389 {
2390 gimple_seq stmts = NULL;
2391
2392 if (chain->inits[i] != NULL_TREE)
2393 continue;
2394
2395 init = ref_at_iteration (dr, (int) i - n, &stmts);
2396 if (!chain->all_always_accessed && tree_could_trap_p (init))
2397 {
2398 gimple_seq_discard (stmts);
2399 return false;
2400 }
2401
2402 if (stmts)
2403 gsi_insert_seq_on_edge_immediate (entry, stmts);
2404
2405 chain->inits[i] = init;
2406 }
2407
2408 return true;
2409 }
2410
2411 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2412 be used because the initializers might trap. */
2413
2414 static void
2415 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2416 {
2417 chain_p chain;
2418 unsigned i;
2419
2420 for (i = 0; i < chains.length (); )
2421 {
2422 chain = chains[i];
2423 if (prepare_initializers_chain (loop, chain))
2424 i++;
2425 else
2426 {
2427 release_chain (chain);
2428 chains.unordered_remove (i);
2429 }
2430 }
2431 }
2432
2433 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2434 unrolled. */
2435
2436 static bool
2437 tree_predictive_commoning_loop (struct loop *loop)
2438 {
2439 vec<data_reference_p> datarefs;
2440 vec<ddr_p> dependences;
2441 struct component *components;
2442 vec<chain_p> chains = vNULL;
2443 unsigned unroll_factor;
2444 struct tree_niter_desc desc;
2445 bool unroll = false;
2446 edge exit;
2447 bitmap tmp_vars;
2448
2449 if (dump_file && (dump_flags & TDF_DETAILS))
2450 fprintf (dump_file, "Processing loop %d\n", loop->num);
2451
2452 /* Find the data references and split them into components according to their
2453 dependence relations. */
2454 auto_vec<loop_p, 3> loop_nest;
2455 dependences.create (10);
2456 datarefs.create (10);
2457 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2458 &dependences))
2459 {
2460 if (dump_file && (dump_flags & TDF_DETAILS))
2461 fprintf (dump_file, "Cannot analyze data dependencies\n");
2462 free_data_refs (datarefs);
2463 free_dependence_relations (dependences);
2464 return false;
2465 }
2466
2467 if (dump_file && (dump_flags & TDF_DETAILS))
2468 dump_data_dependence_relations (dump_file, dependences);
2469
2470 components = split_data_refs_to_components (loop, datarefs, dependences);
2471 loop_nest.release ();
2472 free_dependence_relations (dependences);
2473 if (!components)
2474 {
2475 free_data_refs (datarefs);
2476 free_affine_expand_cache (&name_expansions);
2477 return false;
2478 }
2479
2480 if (dump_file && (dump_flags & TDF_DETAILS))
2481 {
2482 fprintf (dump_file, "Initial state:\n\n");
2483 dump_components (dump_file, components);
2484 }
2485
2486 /* Find the suitable components and split them into chains. */
2487 components = filter_suitable_components (loop, components);
2488
2489 tmp_vars = BITMAP_ALLOC (NULL);
2490 looparound_phis = BITMAP_ALLOC (NULL);
2491 determine_roots (loop, components, &chains);
2492 release_components (components);
2493
2494 if (!chains.exists ())
2495 {
2496 if (dump_file && (dump_flags & TDF_DETAILS))
2497 fprintf (dump_file,
2498 "Predictive commoning failed: no suitable chains\n");
2499 goto end;
2500 }
2501 prepare_initializers (loop, chains);
2502
2503 /* Try to combine the chains that are always worked with together. */
2504 try_combine_chains (&chains);
2505
2506 if (dump_file && (dump_flags & TDF_DETAILS))
2507 {
2508 fprintf (dump_file, "Before commoning:\n\n");
2509 dump_chains (dump_file, chains);
2510 }
2511
2512 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2513 that its number of iterations is divisible by the factor. */
2514 unroll_factor = determine_unroll_factor (chains);
2515 scev_reset ();
2516 unroll = (unroll_factor > 1
2517 && can_unroll_loop_p (loop, unroll_factor, &desc));
2518 exit = single_dom_exit (loop);
2519
2520 /* Execute the predictive commoning transformations, and possibly unroll the
2521 loop. */
2522 if (unroll)
2523 {
2524 struct epcc_data dta;
2525
2526 if (dump_file && (dump_flags & TDF_DETAILS))
2527 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2528
2529 dta.chains = chains;
2530 dta.tmp_vars = tmp_vars;
2531
2532 update_ssa (TODO_update_ssa_only_virtuals);
2533
2534 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2535 execute_pred_commoning_cbck is called may cause phi nodes to be
2536 reallocated, which is a problem since CHAINS may point to these
2537 statements. To fix this, we store the ssa names defined by the
2538 phi nodes here instead of the phi nodes themselves, and restore
2539 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2540 replace_phis_by_defined_names (chains);
2541
2542 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2543 execute_pred_commoning_cbck, &dta);
2544 eliminate_temp_copies (loop, tmp_vars);
2545 }
2546 else
2547 {
2548 if (dump_file && (dump_flags & TDF_DETAILS))
2549 fprintf (dump_file,
2550 "Executing predictive commoning without unrolling.\n");
2551 execute_pred_commoning (loop, chains, tmp_vars);
2552 }
2553
2554 end: ;
2555 release_chains (chains);
2556 free_data_refs (datarefs);
2557 BITMAP_FREE (tmp_vars);
2558 BITMAP_FREE (looparound_phis);
2559
2560 free_affine_expand_cache (&name_expansions);
2561
2562 return unroll;
2563 }
2564
2565 /* Runs predictive commoning. */
2566
2567 unsigned
2568 tree_predictive_commoning (void)
2569 {
2570 bool unrolled = false;
2571 struct loop *loop;
2572 unsigned ret = 0;
2573
2574 initialize_original_copy_tables ();
2575 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2576 if (optimize_loop_for_speed_p (loop))
2577 {
2578 unrolled |= tree_predictive_commoning_loop (loop);
2579 }
2580
2581 if (unrolled)
2582 {
2583 scev_reset ();
2584 ret = TODO_cleanup_cfg;
2585 }
2586 free_original_copy_tables ();
2587
2588 return ret;
2589 }
2590
2591 /* Predictive commoning Pass. */
2592
2593 static unsigned
2594 run_tree_predictive_commoning (struct function *fun)
2595 {
2596 if (number_of_loops (fun) <= 1)
2597 return 0;
2598
2599 return tree_predictive_commoning ();
2600 }
2601
2602 namespace {
2603
2604 const pass_data pass_data_predcom =
2605 {
2606 GIMPLE_PASS, /* type */
2607 "pcom", /* name */
2608 OPTGROUP_LOOP, /* optinfo_flags */
2609 TV_PREDCOM, /* tv_id */
2610 PROP_cfg, /* properties_required */
2611 0, /* properties_provided */
2612 0, /* properties_destroyed */
2613 0, /* todo_flags_start */
2614 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2615 };
2616
2617 class pass_predcom : public gimple_opt_pass
2618 {
2619 public:
2620 pass_predcom (gcc::context *ctxt)
2621 : gimple_opt_pass (pass_data_predcom, ctxt)
2622 {}
2623
2624 /* opt_pass methods: */
2625 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
2626 virtual unsigned int execute (function *fun)
2627 {
2628 return run_tree_predictive_commoning (fun);
2629 }
2630
2631 }; // class pass_predcom
2632
2633 } // anon namespace
2634
2635 gimple_opt_pass *
2636 make_pass_predcom (gcc::context *ctxt)
2637 {
2638 return new pass_predcom (ctxt);
2639 }
2640
2641