]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-predcom.c
PR testsuite/92016 - Excess errors in Wstringop-overflow-17.c
[thirdparty/gcc.git] / gcc / tree-predcom.c
1 /* Predictive commoning.
2 Copyright (C) 2005-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
162
163 for (i = 0; i < n; i++)
164 {
165 a[i] = 1;
166 a[i+2] = 2;
167 }
168
169 It can be replaced with:
170
171 t0 = a[0];
172 t1 = a[1];
173 for (i = 0; i < n; i++)
174 {
175 a[i] = 1;
176 t2 = 2;
177 t0 = t1;
178 t1 = t2;
179 }
180 a[n] = t0;
181 a[n+1] = t1;
182
183 If the loop runs more than 1 iterations, it can be further simplified into:
184
185 for (i = 0; i < n; i++)
186 {
187 a[i] = 1;
188 }
189 a[n] = 2;
190 a[n+1] = 2;
191
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
194
195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
198
199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
201 ask for unroll.
202
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
206
207 #include "config.h"
208 #include "system.h"
209 #include "coretypes.h"
210 #include "backend.h"
211 #include "rtl.h"
212 #include "tree.h"
213 #include "gimple.h"
214 #include "predict.h"
215 #include "tree-pass.h"
216 #include "ssa.h"
217 #include "gimple-pretty-print.h"
218 #include "alias.h"
219 #include "fold-const.h"
220 #include "cfgloop.h"
221 #include "tree-eh.h"
222 #include "gimplify.h"
223 #include "gimple-iterator.h"
224 #include "gimplify-me.h"
225 #include "tree-ssa-loop-ivopts.h"
226 #include "tree-ssa-loop-manip.h"
227 #include "tree-ssa-loop-niter.h"
228 #include "tree-ssa-loop.h"
229 #include "tree-into-ssa.h"
230 #include "tree-dfa.h"
231 #include "tree-ssa.h"
232 #include "tree-data-ref.h"
233 #include "tree-scalar-evolution.h"
234 #include "params.h"
235 #include "tree-affine.h"
236 #include "builtins.h"
237
238 /* The maximum number of iterations between the considered memory
239 references. */
240
241 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
242
243 /* Data references (or phi nodes that carry data reference values across
244 loop iterations). */
245
246 typedef class dref_d
247 {
248 public:
249 /* The reference itself. */
250 struct data_reference *ref;
251
252 /* The statement in that the reference appears. */
253 gimple *stmt;
254
255 /* In case that STMT is a phi node, this field is set to the SSA name
256 defined by it in replace_phis_by_defined_names (in order to avoid
257 pointing to phi node that got reallocated in the meantime). */
258 tree name_defined_by_phi;
259
260 /* Distance of the reference from the root of the chain (in number of
261 iterations of the loop). */
262 unsigned distance;
263
264 /* Number of iterations offset from the first reference in the component. */
265 widest_int offset;
266
267 /* Number of the reference in a component, in dominance ordering. */
268 unsigned pos;
269
270 /* True if the memory reference is always accessed when the loop is
271 entered. */
272 unsigned always_accessed : 1;
273 } *dref;
274
275
276 /* Type of the chain of the references. */
277
278 enum chain_type
279 {
280 /* The addresses of the references in the chain are constant. */
281 CT_INVARIANT,
282
283 /* There are only loads in the chain. */
284 CT_LOAD,
285
286 /* Root of the chain is store, the rest are loads. */
287 CT_STORE_LOAD,
288
289 /* There are only stores in the chain. */
290 CT_STORE_STORE,
291
292 /* A combination of two chains. */
293 CT_COMBINATION
294 };
295
296 /* Chains of data references. */
297
298 typedef struct chain
299 {
300 /* Type of the chain. */
301 enum chain_type type;
302
303 /* For combination chains, the operator and the two chains that are
304 combined, and the type of the result. */
305 enum tree_code op;
306 tree rslt_type;
307 struct chain *ch1, *ch2;
308
309 /* The references in the chain. */
310 vec<dref> refs;
311
312 /* The maximum distance of the reference in the chain from the root. */
313 unsigned length;
314
315 /* The variables used to copy the value throughout iterations. */
316 vec<tree> vars;
317
318 /* Initializers for the variables. */
319 vec<tree> inits;
320
321 /* Finalizers for the eliminated stores. */
322 vec<tree> finis;
323
324 /* gimple stmts intializing the initial variables of the chain. */
325 gimple_seq init_seq;
326
327 /* gimple stmts finalizing the eliminated stores of the chain. */
328 gimple_seq fini_seq;
329
330 /* True if there is a use of a variable with the maximal distance
331 that comes after the root in the loop. */
332 unsigned has_max_use_after : 1;
333
334 /* True if all the memory references in the chain are always accessed. */
335 unsigned all_always_accessed : 1;
336
337 /* True if this chain was combined together with some other chain. */
338 unsigned combined : 1;
339
340 /* True if this is store elimination chain and eliminated stores store
341 loop invariant value into memory. */
342 unsigned inv_store_elimination : 1;
343 } *chain_p;
344
345
346 /* Describes the knowledge about the step of the memory references in
347 the component. */
348
349 enum ref_step_type
350 {
351 /* The step is zero. */
352 RS_INVARIANT,
353
354 /* The step is nonzero. */
355 RS_NONZERO,
356
357 /* The step may or may not be nonzero. */
358 RS_ANY
359 };
360
361 /* Components of the data dependence graph. */
362
363 struct component
364 {
365 /* The references in the component. */
366 vec<dref> refs;
367
368 /* What we know about the step of the references in the component. */
369 enum ref_step_type comp_step;
370
371 /* True if all references in component are stores and we try to do
372 intra/inter loop iteration dead store elimination. */
373 bool eliminate_store_p;
374
375 /* Next component in the list. */
376 struct component *next;
377 };
378
379 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
380
381 static bitmap looparound_phis;
382
383 /* Cache used by tree_to_aff_combination_expand. */
384
385 static hash_map<tree, name_expansion *> *name_expansions;
386
387 /* Dumps data reference REF to FILE. */
388
389 extern void dump_dref (FILE *, dref);
390 void
391 dump_dref (FILE *file, dref ref)
392 {
393 if (ref->ref)
394 {
395 fprintf (file, " ");
396 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
397 fprintf (file, " (id %u%s)\n", ref->pos,
398 DR_IS_READ (ref->ref) ? "" : ", write");
399
400 fprintf (file, " offset ");
401 print_decs (ref->offset, file);
402 fprintf (file, "\n");
403
404 fprintf (file, " distance %u\n", ref->distance);
405 }
406 else
407 {
408 if (gimple_code (ref->stmt) == GIMPLE_PHI)
409 fprintf (file, " looparound ref\n");
410 else
411 fprintf (file, " combination ref\n");
412 fprintf (file, " in statement ");
413 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
414 fprintf (file, "\n");
415 fprintf (file, " distance %u\n", ref->distance);
416 }
417
418 }
419
420 /* Dumps CHAIN to FILE. */
421
422 extern void dump_chain (FILE *, chain_p);
423 void
424 dump_chain (FILE *file, chain_p chain)
425 {
426 dref a;
427 const char *chain_type;
428 unsigned i;
429 tree var;
430
431 switch (chain->type)
432 {
433 case CT_INVARIANT:
434 chain_type = "Load motion";
435 break;
436
437 case CT_LOAD:
438 chain_type = "Loads-only";
439 break;
440
441 case CT_STORE_LOAD:
442 chain_type = "Store-loads";
443 break;
444
445 case CT_STORE_STORE:
446 chain_type = "Store-stores";
447 break;
448
449 case CT_COMBINATION:
450 chain_type = "Combination";
451 break;
452
453 default:
454 gcc_unreachable ();
455 }
456
457 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
458 chain->combined ? " (combined)" : "");
459 if (chain->type != CT_INVARIANT)
460 fprintf (file, " max distance %u%s\n", chain->length,
461 chain->has_max_use_after ? "" : ", may reuse first");
462
463 if (chain->type == CT_COMBINATION)
464 {
465 fprintf (file, " equal to %p %s %p in type ",
466 (void *) chain->ch1, op_symbol_code (chain->op),
467 (void *) chain->ch2);
468 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
469 fprintf (file, "\n");
470 }
471
472 if (chain->vars.exists ())
473 {
474 fprintf (file, " vars");
475 FOR_EACH_VEC_ELT (chain->vars, i, var)
476 {
477 fprintf (file, " ");
478 print_generic_expr (file, var, TDF_SLIM);
479 }
480 fprintf (file, "\n");
481 }
482
483 if (chain->inits.exists ())
484 {
485 fprintf (file, " inits");
486 FOR_EACH_VEC_ELT (chain->inits, i, var)
487 {
488 fprintf (file, " ");
489 print_generic_expr (file, var, TDF_SLIM);
490 }
491 fprintf (file, "\n");
492 }
493
494 fprintf (file, " references:\n");
495 FOR_EACH_VEC_ELT (chain->refs, i, a)
496 dump_dref (file, a);
497
498 fprintf (file, "\n");
499 }
500
501 /* Dumps CHAINS to FILE. */
502
503 extern void dump_chains (FILE *, vec<chain_p> );
504 void
505 dump_chains (FILE *file, vec<chain_p> chains)
506 {
507 chain_p chain;
508 unsigned i;
509
510 FOR_EACH_VEC_ELT (chains, i, chain)
511 dump_chain (file, chain);
512 }
513
514 /* Dumps COMP to FILE. */
515
516 extern void dump_component (FILE *, struct component *);
517 void
518 dump_component (FILE *file, struct component *comp)
519 {
520 dref a;
521 unsigned i;
522
523 fprintf (file, "Component%s:\n",
524 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
525 FOR_EACH_VEC_ELT (comp->refs, i, a)
526 dump_dref (file, a);
527 fprintf (file, "\n");
528 }
529
530 /* Dumps COMPS to FILE. */
531
532 extern void dump_components (FILE *, struct component *);
533 void
534 dump_components (FILE *file, struct component *comps)
535 {
536 struct component *comp;
537
538 for (comp = comps; comp; comp = comp->next)
539 dump_component (file, comp);
540 }
541
542 /* Frees a chain CHAIN. */
543
544 static void
545 release_chain (chain_p chain)
546 {
547 dref ref;
548 unsigned i;
549
550 if (chain == NULL)
551 return;
552
553 FOR_EACH_VEC_ELT (chain->refs, i, ref)
554 free (ref);
555
556 chain->refs.release ();
557 chain->vars.release ();
558 chain->inits.release ();
559 if (chain->init_seq)
560 gimple_seq_discard (chain->init_seq);
561
562 chain->finis.release ();
563 if (chain->fini_seq)
564 gimple_seq_discard (chain->fini_seq);
565
566 free (chain);
567 }
568
569 /* Frees CHAINS. */
570
571 static void
572 release_chains (vec<chain_p> chains)
573 {
574 unsigned i;
575 chain_p chain;
576
577 FOR_EACH_VEC_ELT (chains, i, chain)
578 release_chain (chain);
579 chains.release ();
580 }
581
582 /* Frees a component COMP. */
583
584 static void
585 release_component (struct component *comp)
586 {
587 comp->refs.release ();
588 free (comp);
589 }
590
591 /* Frees list of components COMPS. */
592
593 static void
594 release_components (struct component *comps)
595 {
596 struct component *act, *next;
597
598 for (act = comps; act; act = next)
599 {
600 next = act->next;
601 release_component (act);
602 }
603 }
604
605 /* Finds a root of tree given by FATHERS containing A, and performs path
606 shortening. */
607
608 static unsigned
609 component_of (unsigned fathers[], unsigned a)
610 {
611 unsigned root, n;
612
613 for (root = a; root != fathers[root]; root = fathers[root])
614 continue;
615
616 for (; a != root; a = n)
617 {
618 n = fathers[a];
619 fathers[a] = root;
620 }
621
622 return root;
623 }
624
625 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
626 components, A and B are components to merge. */
627
628 static void
629 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
630 {
631 unsigned ca = component_of (fathers, a);
632 unsigned cb = component_of (fathers, b);
633
634 if (ca == cb)
635 return;
636
637 if (sizes[ca] < sizes[cb])
638 {
639 sizes[cb] += sizes[ca];
640 fathers[ca] = cb;
641 }
642 else
643 {
644 sizes[ca] += sizes[cb];
645 fathers[cb] = ca;
646 }
647 }
648
649 /* Returns true if A is a reference that is suitable for predictive commoning
650 in the innermost loop that contains it. REF_STEP is set according to the
651 step of the reference A. */
652
653 static bool
654 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
655 {
656 tree ref = DR_REF (a), step = DR_STEP (a);
657
658 if (!step
659 || TREE_THIS_VOLATILE (ref)
660 || !is_gimple_reg_type (TREE_TYPE (ref))
661 || tree_could_throw_p (ref))
662 return false;
663
664 if (integer_zerop (step))
665 *ref_step = RS_INVARIANT;
666 else if (integer_nonzerop (step))
667 *ref_step = RS_NONZERO;
668 else
669 *ref_step = RS_ANY;
670
671 return true;
672 }
673
674 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
675
676 static void
677 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
678 {
679 tree type = TREE_TYPE (DR_OFFSET (dr));
680 aff_tree delta;
681
682 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
683 &name_expansions);
684 aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
685 aff_combination_add (offset, &delta);
686 }
687
688 /* Determines number of iterations of the innermost enclosing loop before B
689 refers to exactly the same location as A and stores it to OFF. If A and
690 B do not have the same step, they never meet, or anything else fails,
691 returns false, otherwise returns true. Both A and B are assumed to
692 satisfy suitable_reference_p. */
693
694 static bool
695 determine_offset (struct data_reference *a, struct data_reference *b,
696 poly_widest_int *off)
697 {
698 aff_tree diff, baseb, step;
699 tree typea, typeb;
700
701 /* Check that both the references access the location in the same type. */
702 typea = TREE_TYPE (DR_REF (a));
703 typeb = TREE_TYPE (DR_REF (b));
704 if (!useless_type_conversion_p (typeb, typea))
705 return false;
706
707 /* Check whether the base address and the step of both references is the
708 same. */
709 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
710 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
711 return false;
712
713 if (integer_zerop (DR_STEP (a)))
714 {
715 /* If the references have loop invariant address, check that they access
716 exactly the same location. */
717 *off = 0;
718 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
719 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
720 }
721
722 /* Compare the offsets of the addresses, and check whether the difference
723 is a multiple of step. */
724 aff_combination_dr_offset (a, &diff);
725 aff_combination_dr_offset (b, &baseb);
726 aff_combination_scale (&baseb, -1);
727 aff_combination_add (&diff, &baseb);
728
729 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
730 &step, &name_expansions);
731 return aff_combination_constant_multiple_p (&diff, &step, off);
732 }
733
734 /* Returns the last basic block in LOOP for that we are sure that
735 it is executed whenever the loop is entered. */
736
737 static basic_block
738 last_always_executed_block (class loop *loop)
739 {
740 unsigned i;
741 vec<edge> exits = get_loop_exit_edges (loop);
742 edge ex;
743 basic_block last = loop->latch;
744
745 FOR_EACH_VEC_ELT (exits, i, ex)
746 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
747 exits.release ();
748
749 return last;
750 }
751
752 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
753
754 static struct component *
755 split_data_refs_to_components (class loop *loop,
756 vec<data_reference_p> datarefs,
757 vec<ddr_p> depends)
758 {
759 unsigned i, n = datarefs.length ();
760 unsigned ca, ia, ib, bad;
761 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
762 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
763 struct component **comps;
764 struct data_reference *dr, *dra, *drb;
765 struct data_dependence_relation *ddr;
766 struct component *comp_list = NULL, *comp;
767 dref dataref;
768 /* Don't do store elimination if loop has multiple exit edges. */
769 bool eliminate_store_p = single_exit (loop) != NULL;
770 basic_block last_always_executed = last_always_executed_block (loop);
771
772 FOR_EACH_VEC_ELT (datarefs, i, dr)
773 {
774 if (!DR_REF (dr))
775 {
776 /* A fake reference for call or asm_expr that may clobber memory;
777 just fail. */
778 goto end;
779 }
780 /* predcom pass isn't prepared to handle calls with data references. */
781 if (is_gimple_call (DR_STMT (dr)))
782 goto end;
783 dr->aux = (void *) (size_t) i;
784 comp_father[i] = i;
785 comp_size[i] = 1;
786 }
787
788 /* A component reserved for the "bad" data references. */
789 comp_father[n] = n;
790 comp_size[n] = 1;
791
792 FOR_EACH_VEC_ELT (datarefs, i, dr)
793 {
794 enum ref_step_type dummy;
795
796 if (!suitable_reference_p (dr, &dummy))
797 {
798 ia = (unsigned) (size_t) dr->aux;
799 merge_comps (comp_father, comp_size, n, ia);
800 }
801 }
802
803 FOR_EACH_VEC_ELT (depends, i, ddr)
804 {
805 poly_widest_int dummy_off;
806
807 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
808 continue;
809
810 dra = DDR_A (ddr);
811 drb = DDR_B (ddr);
812
813 /* Don't do store elimination if there is any unknown dependence for
814 any store data reference. */
815 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
816 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
817 || DDR_NUM_DIST_VECTS (ddr) == 0))
818 eliminate_store_p = false;
819
820 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
821 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
822 if (ia == ib)
823 continue;
824
825 bad = component_of (comp_father, n);
826
827 /* If both A and B are reads, we may ignore unsuitable dependences. */
828 if (DR_IS_READ (dra) && DR_IS_READ (drb))
829 {
830 if (ia == bad || ib == bad
831 || !determine_offset (dra, drb, &dummy_off))
832 continue;
833 }
834 /* If A is read and B write or vice versa and there is unsuitable
835 dependence, instead of merging both components into a component
836 that will certainly not pass suitable_component_p, just put the
837 read into bad component, perhaps at least the write together with
838 all the other data refs in it's component will be optimizable. */
839 else if (DR_IS_READ (dra) && ib != bad)
840 {
841 if (ia == bad)
842 continue;
843 else if (!determine_offset (dra, drb, &dummy_off))
844 {
845 merge_comps (comp_father, comp_size, bad, ia);
846 continue;
847 }
848 }
849 else if (DR_IS_READ (drb) && ia != bad)
850 {
851 if (ib == bad)
852 continue;
853 else if (!determine_offset (dra, drb, &dummy_off))
854 {
855 merge_comps (comp_father, comp_size, bad, ib);
856 continue;
857 }
858 }
859 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
860 && ia != bad && ib != bad
861 && !determine_offset (dra, drb, &dummy_off))
862 {
863 merge_comps (comp_father, comp_size, bad, ia);
864 merge_comps (comp_father, comp_size, bad, ib);
865 continue;
866 }
867
868 merge_comps (comp_father, comp_size, ia, ib);
869 }
870
871 if (eliminate_store_p)
872 {
873 tree niters = number_of_latch_executions (loop);
874
875 /* Don't do store elimination if niters info is unknown because stores
876 in the last iteration can't be eliminated and we need to recover it
877 after loop. */
878 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
879 }
880
881 comps = XCNEWVEC (struct component *, n);
882 bad = component_of (comp_father, n);
883 FOR_EACH_VEC_ELT (datarefs, i, dr)
884 {
885 ia = (unsigned) (size_t) dr->aux;
886 ca = component_of (comp_father, ia);
887 if (ca == bad)
888 continue;
889
890 comp = comps[ca];
891 if (!comp)
892 {
893 comp = XCNEW (struct component);
894 comp->refs.create (comp_size[ca]);
895 comp->eliminate_store_p = eliminate_store_p;
896 comps[ca] = comp;
897 }
898
899 dataref = XCNEW (class dref_d);
900 dataref->ref = dr;
901 dataref->stmt = DR_STMT (dr);
902 dataref->offset = 0;
903 dataref->distance = 0;
904
905 dataref->always_accessed
906 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
907 gimple_bb (dataref->stmt));
908 dataref->pos = comp->refs.length ();
909 comp->refs.quick_push (dataref);
910 }
911
912 for (i = 0; i < n; i++)
913 {
914 comp = comps[i];
915 if (comp)
916 {
917 comp->next = comp_list;
918 comp_list = comp;
919 }
920 }
921 free (comps);
922
923 end:
924 free (comp_father);
925 free (comp_size);
926 return comp_list;
927 }
928
929 /* Returns true if the component COMP satisfies the conditions
930 described in 2) at the beginning of this file. LOOP is the current
931 loop. */
932
933 static bool
934 suitable_component_p (class loop *loop, struct component *comp)
935 {
936 unsigned i;
937 dref a, first;
938 basic_block ba, bp = loop->header;
939 bool ok, has_write = false;
940
941 FOR_EACH_VEC_ELT (comp->refs, i, a)
942 {
943 ba = gimple_bb (a->stmt);
944
945 if (!just_once_each_iteration_p (loop, ba))
946 return false;
947
948 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
949 bp = ba;
950
951 if (DR_IS_WRITE (a->ref))
952 has_write = true;
953 }
954
955 first = comp->refs[0];
956 ok = suitable_reference_p (first->ref, &comp->comp_step);
957 gcc_assert (ok);
958 first->offset = 0;
959
960 for (i = 1; comp->refs.iterate (i, &a); i++)
961 {
962 /* Polynomial offsets are no use, since we need to know the
963 gap between iteration numbers at compile time. */
964 poly_widest_int offset;
965 if (!determine_offset (first->ref, a->ref, &offset)
966 || !offset.is_constant (&a->offset))
967 return false;
968
969 enum ref_step_type a_step;
970 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
971 && a_step == comp->comp_step);
972 }
973
974 /* If there is a write inside the component, we must know whether the
975 step is nonzero or not -- we would not otherwise be able to recognize
976 whether the value accessed by reads comes from the OFFSET-th iteration
977 or the previous one. */
978 if (has_write && comp->comp_step == RS_ANY)
979 return false;
980
981 return true;
982 }
983
984 /* Check the conditions on references inside each of components COMPS,
985 and remove the unsuitable components from the list. The new list
986 of components is returned. The conditions are described in 2) at
987 the beginning of this file. LOOP is the current loop. */
988
989 static struct component *
990 filter_suitable_components (class loop *loop, struct component *comps)
991 {
992 struct component **comp, *act;
993
994 for (comp = &comps; *comp; )
995 {
996 act = *comp;
997 if (suitable_component_p (loop, act))
998 comp = &act->next;
999 else
1000 {
1001 dref ref;
1002 unsigned i;
1003
1004 *comp = act->next;
1005 FOR_EACH_VEC_ELT (act->refs, i, ref)
1006 free (ref);
1007 release_component (act);
1008 }
1009 }
1010
1011 return comps;
1012 }
1013
1014 /* Compares two drefs A and B by their offset and position. Callback for
1015 qsort. */
1016
1017 static int
1018 order_drefs (const void *a, const void *b)
1019 {
1020 const dref *const da = (const dref *) a;
1021 const dref *const db = (const dref *) b;
1022 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
1023
1024 if (offcmp != 0)
1025 return offcmp;
1026
1027 return (*da)->pos - (*db)->pos;
1028 }
1029
1030 /* Compares two drefs A and B by their position. Callback for qsort. */
1031
1032 static int
1033 order_drefs_by_pos (const void *a, const void *b)
1034 {
1035 const dref *const da = (const dref *) a;
1036 const dref *const db = (const dref *) b;
1037
1038 return (*da)->pos - (*db)->pos;
1039 }
1040
1041 /* Returns root of the CHAIN. */
1042
1043 static inline dref
1044 get_chain_root (chain_p chain)
1045 {
1046 return chain->refs[0];
1047 }
1048
1049 /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1050 exist. */
1051
1052 static inline dref
1053 get_chain_last_write_at (chain_p chain, unsigned distance)
1054 {
1055 for (unsigned i = chain->refs.length (); i > 0; i--)
1056 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1057 && distance == chain->refs[i - 1]->distance)
1058 return chain->refs[i - 1];
1059
1060 return NULL;
1061 }
1062
1063 /* Given CHAIN, returns the last write ref with the same distance before load
1064 at index LOAD_IDX, or NULL if it doesn't exist. */
1065
1066 static inline dref
1067 get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1068 {
1069 gcc_assert (load_idx < chain->refs.length ());
1070
1071 unsigned distance = chain->refs[load_idx]->distance;
1072
1073 for (unsigned i = load_idx; i > 0; i--)
1074 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1075 && distance == chain->refs[i - 1]->distance)
1076 return chain->refs[i - 1];
1077
1078 return NULL;
1079 }
1080
1081 /* Adds REF to the chain CHAIN. */
1082
1083 static void
1084 add_ref_to_chain (chain_p chain, dref ref)
1085 {
1086 dref root = get_chain_root (chain);
1087
1088 gcc_assert (wi::les_p (root->offset, ref->offset));
1089 widest_int dist = ref->offset - root->offset;
1090 gcc_assert (wi::fits_uhwi_p (dist));
1091
1092 chain->refs.safe_push (ref);
1093
1094 ref->distance = dist.to_uhwi ();
1095
1096 if (ref->distance >= chain->length)
1097 {
1098 chain->length = ref->distance;
1099 chain->has_max_use_after = false;
1100 }
1101
1102 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1103 if (DR_IS_WRITE (ref->ref))
1104 chain->type = CT_STORE_STORE;
1105
1106 /* Don't set the flag for store-store chain since there is no use. */
1107 if (chain->type != CT_STORE_STORE
1108 && ref->distance == chain->length
1109 && ref->pos > root->pos)
1110 chain->has_max_use_after = true;
1111
1112 chain->all_always_accessed &= ref->always_accessed;
1113 }
1114
1115 /* Returns the chain for invariant component COMP. */
1116
1117 static chain_p
1118 make_invariant_chain (struct component *comp)
1119 {
1120 chain_p chain = XCNEW (struct chain);
1121 unsigned i;
1122 dref ref;
1123
1124 chain->type = CT_INVARIANT;
1125
1126 chain->all_always_accessed = true;
1127
1128 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1129 {
1130 chain->refs.safe_push (ref);
1131 chain->all_always_accessed &= ref->always_accessed;
1132 }
1133
1134 chain->inits = vNULL;
1135 chain->finis = vNULL;
1136
1137 return chain;
1138 }
1139
1140 /* Make a new chain of type TYPE rooted at REF. */
1141
1142 static chain_p
1143 make_rooted_chain (dref ref, enum chain_type type)
1144 {
1145 chain_p chain = XCNEW (struct chain);
1146
1147 chain->type = type;
1148 chain->refs.safe_push (ref);
1149 chain->all_always_accessed = ref->always_accessed;
1150 ref->distance = 0;
1151
1152 chain->inits = vNULL;
1153 chain->finis = vNULL;
1154
1155 return chain;
1156 }
1157
1158 /* Returns true if CHAIN is not trivial. */
1159
1160 static bool
1161 nontrivial_chain_p (chain_p chain)
1162 {
1163 return chain != NULL && chain->refs.length () > 1;
1164 }
1165
1166 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1167 is no such name. */
1168
1169 static tree
1170 name_for_ref (dref ref)
1171 {
1172 tree name;
1173
1174 if (is_gimple_assign (ref->stmt))
1175 {
1176 if (!ref->ref || DR_IS_READ (ref->ref))
1177 name = gimple_assign_lhs (ref->stmt);
1178 else
1179 name = gimple_assign_rhs1 (ref->stmt);
1180 }
1181 else
1182 name = PHI_RESULT (ref->stmt);
1183
1184 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1185 }
1186
1187 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1188 iterations of the innermost enclosing loop). */
1189
1190 static bool
1191 valid_initializer_p (struct data_reference *ref,
1192 unsigned distance, struct data_reference *root)
1193 {
1194 aff_tree diff, base, step;
1195 poly_widest_int off;
1196
1197 /* Both REF and ROOT must be accessing the same object. */
1198 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1199 return false;
1200
1201 /* The initializer is defined outside of loop, hence its address must be
1202 invariant inside the loop. */
1203 gcc_assert (integer_zerop (DR_STEP (ref)));
1204
1205 /* If the address of the reference is invariant, initializer must access
1206 exactly the same location. */
1207 if (integer_zerop (DR_STEP (root)))
1208 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1209 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1210
1211 /* Verify that this index of REF is equal to the root's index at
1212 -DISTANCE-th iteration. */
1213 aff_combination_dr_offset (root, &diff);
1214 aff_combination_dr_offset (ref, &base);
1215 aff_combination_scale (&base, -1);
1216 aff_combination_add (&diff, &base);
1217
1218 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1219 &step, &name_expansions);
1220 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1221 return false;
1222
1223 if (maybe_ne (off, distance))
1224 return false;
1225
1226 return true;
1227 }
1228
1229 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1230 initial value is correct (equal to initial value of REF shifted by one
1231 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1232 is the root of the current chain. */
1233
1234 static gphi *
1235 find_looparound_phi (class loop *loop, dref ref, dref root)
1236 {
1237 tree name, init, init_ref;
1238 gphi *phi = NULL;
1239 gimple *init_stmt;
1240 edge latch = loop_latch_edge (loop);
1241 struct data_reference init_dr;
1242 gphi_iterator psi;
1243
1244 if (is_gimple_assign (ref->stmt))
1245 {
1246 if (DR_IS_READ (ref->ref))
1247 name = gimple_assign_lhs (ref->stmt);
1248 else
1249 name = gimple_assign_rhs1 (ref->stmt);
1250 }
1251 else
1252 name = PHI_RESULT (ref->stmt);
1253 if (!name)
1254 return NULL;
1255
1256 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1257 {
1258 phi = psi.phi ();
1259 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1260 break;
1261 }
1262
1263 if (gsi_end_p (psi))
1264 return NULL;
1265
1266 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1267 if (TREE_CODE (init) != SSA_NAME)
1268 return NULL;
1269 init_stmt = SSA_NAME_DEF_STMT (init);
1270 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1271 return NULL;
1272 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1273
1274 init_ref = gimple_assign_rhs1 (init_stmt);
1275 if (!REFERENCE_CLASS_P (init_ref)
1276 && !DECL_P (init_ref))
1277 return NULL;
1278
1279 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1280 loop enclosing PHI). */
1281 memset (&init_dr, 0, sizeof (struct data_reference));
1282 DR_REF (&init_dr) = init_ref;
1283 DR_STMT (&init_dr) = phi;
1284 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop,
1285 init_stmt))
1286 return NULL;
1287
1288 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1289 return NULL;
1290
1291 return phi;
1292 }
1293
1294 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1295
1296 static void
1297 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1298 {
1299 dref nw = XCNEW (class dref_d), aref;
1300 unsigned i;
1301
1302 nw->stmt = phi;
1303 nw->distance = ref->distance + 1;
1304 nw->always_accessed = 1;
1305
1306 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1307 if (aref->distance >= nw->distance)
1308 break;
1309 chain->refs.safe_insert (i, nw);
1310
1311 if (nw->distance > chain->length)
1312 {
1313 chain->length = nw->distance;
1314 chain->has_max_use_after = false;
1315 }
1316 }
1317
1318 /* For references in CHAIN that are copied around the LOOP (created previously
1319 by PRE, or by user), add the results of such copies to the chain. This
1320 enables us to remove the copies by unrolling, and may need less registers
1321 (also, it may allow us to combine chains together). */
1322
1323 static void
1324 add_looparound_copies (class loop *loop, chain_p chain)
1325 {
1326 unsigned i;
1327 dref ref, root = get_chain_root (chain);
1328 gphi *phi;
1329
1330 if (chain->type == CT_STORE_STORE)
1331 return;
1332
1333 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1334 {
1335 phi = find_looparound_phi (loop, ref, root);
1336 if (!phi)
1337 continue;
1338
1339 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1340 insert_looparound_copy (chain, ref, phi);
1341 }
1342 }
1343
1344 /* Find roots of the values and determine distances in the component COMP.
1345 The references are redistributed into CHAINS. LOOP is the current
1346 loop. */
1347
1348 static void
1349 determine_roots_comp (class loop *loop,
1350 struct component *comp,
1351 vec<chain_p> *chains)
1352 {
1353 unsigned i;
1354 dref a;
1355 chain_p chain = NULL;
1356 widest_int last_ofs = 0;
1357 enum chain_type type;
1358
1359 /* Invariants are handled specially. */
1360 if (comp->comp_step == RS_INVARIANT)
1361 {
1362 chain = make_invariant_chain (comp);
1363 chains->safe_push (chain);
1364 return;
1365 }
1366
1367 /* Trivial component. */
1368 if (comp->refs.length () <= 1)
1369 {
1370 if (comp->refs.length () == 1)
1371 {
1372 free (comp->refs[0]);
1373 comp->refs.truncate (0);
1374 }
1375 return;
1376 }
1377
1378 comp->refs.qsort (order_drefs);
1379
1380 /* For Store-Store chain, we only support load if it is dominated by a
1381 store statement in the same iteration of loop. */
1382 if (comp->eliminate_store_p)
1383 for (a = NULL, i = 0; i < comp->refs.length (); i++)
1384 {
1385 if (DR_IS_WRITE (comp->refs[i]->ref))
1386 a = comp->refs[i];
1387 else if (a == NULL || a->offset != comp->refs[i]->offset)
1388 {
1389 /* If there is load that is not dominated by a store in the
1390 same iteration of loop, clear the flag so no Store-Store
1391 chain is generated for this component. */
1392 comp->eliminate_store_p = false;
1393 break;
1394 }
1395 }
1396
1397 /* Determine roots and create chains for components. */
1398 FOR_EACH_VEC_ELT (comp->refs, i, a)
1399 {
1400 if (!chain
1401 || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
1402 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1403 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1404 {
1405 if (nontrivial_chain_p (chain))
1406 {
1407 add_looparound_copies (loop, chain);
1408 chains->safe_push (chain);
1409 }
1410 else
1411 release_chain (chain);
1412
1413 /* Determine type of the chain. If the root reference is a load,
1414 this can only be a CT_LOAD chain; other chains are intialized
1415 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1416 new reference is added. */
1417 type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
1418 chain = make_rooted_chain (a, type);
1419 last_ofs = a->offset;
1420 continue;
1421 }
1422
1423 add_ref_to_chain (chain, a);
1424 }
1425
1426 if (nontrivial_chain_p (chain))
1427 {
1428 add_looparound_copies (loop, chain);
1429 chains->safe_push (chain);
1430 }
1431 else
1432 release_chain (chain);
1433 }
1434
1435 /* Find roots of the values and determine distances in components COMPS, and
1436 separates the references to CHAINS. LOOP is the current loop. */
1437
1438 static void
1439 determine_roots (class loop *loop,
1440 struct component *comps, vec<chain_p> *chains)
1441 {
1442 struct component *comp;
1443
1444 for (comp = comps; comp; comp = comp->next)
1445 determine_roots_comp (loop, comp, chains);
1446 }
1447
1448 /* Replace the reference in statement STMT with temporary variable
1449 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1450 the reference in the statement. IN_LHS is true if the reference
1451 is in the lhs of STMT, false if it is in rhs. */
1452
1453 static void
1454 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1455 {
1456 tree val;
1457 gassign *new_stmt;
1458 gimple_stmt_iterator bsi, psi;
1459
1460 if (gimple_code (stmt) == GIMPLE_PHI)
1461 {
1462 gcc_assert (!in_lhs && !set);
1463
1464 val = PHI_RESULT (stmt);
1465 bsi = gsi_after_labels (gimple_bb (stmt));
1466 psi = gsi_for_stmt (stmt);
1467 remove_phi_node (&psi, false);
1468
1469 /* Turn the phi node into GIMPLE_ASSIGN. */
1470 new_stmt = gimple_build_assign (val, new_tree);
1471 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1472 return;
1473 }
1474
1475 /* Since the reference is of gimple_reg type, it should only
1476 appear as lhs or rhs of modify statement. */
1477 gcc_assert (is_gimple_assign (stmt));
1478
1479 bsi = gsi_for_stmt (stmt);
1480
1481 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1482 if (!set)
1483 {
1484 gcc_assert (!in_lhs);
1485 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1486 stmt = gsi_stmt (bsi);
1487 update_stmt (stmt);
1488 return;
1489 }
1490
1491 if (in_lhs)
1492 {
1493 /* We have statement
1494
1495 OLD = VAL
1496
1497 If OLD is a memory reference, then VAL is gimple_val, and we transform
1498 this to
1499
1500 OLD = VAL
1501 NEW = VAL
1502
1503 Otherwise, we are replacing a combination chain,
1504 VAL is the expression that performs the combination, and OLD is an
1505 SSA name. In this case, we transform the assignment to
1506
1507 OLD = VAL
1508 NEW = OLD
1509
1510 */
1511
1512 val = gimple_assign_lhs (stmt);
1513 if (TREE_CODE (val) != SSA_NAME)
1514 {
1515 val = gimple_assign_rhs1 (stmt);
1516 gcc_assert (gimple_assign_single_p (stmt));
1517 if (TREE_CLOBBER_P (val))
1518 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1519 else
1520 gcc_assert (gimple_assign_copy_p (stmt));
1521 }
1522 }
1523 else
1524 {
1525 /* VAL = OLD
1526
1527 is transformed to
1528
1529 VAL = OLD
1530 NEW = VAL */
1531
1532 val = gimple_assign_lhs (stmt);
1533 }
1534
1535 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1536 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1537 }
1538
1539 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1540 of the loop it was analyzed in. Append init stmts to STMTS. */
1541
1542 static tree
1543 ref_at_iteration (data_reference_p dr, int iter,
1544 gimple_seq *stmts, tree niters = NULL_TREE)
1545 {
1546 tree off = DR_OFFSET (dr);
1547 tree coff = DR_INIT (dr);
1548 tree ref = DR_REF (dr);
1549 enum tree_code ref_code = ERROR_MARK;
1550 tree ref_type = NULL_TREE;
1551 tree ref_op1 = NULL_TREE;
1552 tree ref_op2 = NULL_TREE;
1553 tree new_offset;
1554
1555 if (iter != 0)
1556 {
1557 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1558 if (TREE_CODE (new_offset) == INTEGER_CST)
1559 coff = size_binop (PLUS_EXPR, coff, new_offset);
1560 else
1561 off = size_binop (PLUS_EXPR, off, new_offset);
1562 }
1563
1564 if (niters != NULL_TREE)
1565 {
1566 niters = fold_convert (ssizetype, niters);
1567 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1568 if (TREE_CODE (niters) == INTEGER_CST)
1569 coff = size_binop (PLUS_EXPR, coff, new_offset);
1570 else
1571 off = size_binop (PLUS_EXPR, off, new_offset);
1572 }
1573
1574 /* While data-ref analysis punts on bit offsets it still handles
1575 bitfield accesses at byte boundaries. Cope with that. Note that
1576 if the bitfield object also starts at a byte-boundary we can simply
1577 replicate the COMPONENT_REF, but we have to subtract the component's
1578 byte-offset from the MEM_REF address first.
1579 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1580 start at offset zero. */
1581 if (TREE_CODE (ref) == COMPONENT_REF
1582 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1583 {
1584 unsigned HOST_WIDE_INT boff;
1585 tree field = TREE_OPERAND (ref, 1);
1586 tree offset = component_ref_field_offset (ref);
1587 ref_type = TREE_TYPE (ref);
1588 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1589 /* This can occur in Ada. See the comment in get_bit_range. */
1590 if (boff % BITS_PER_UNIT != 0
1591 || !tree_fits_uhwi_p (offset))
1592 {
1593 ref_code = BIT_FIELD_REF;
1594 ref_op1 = DECL_SIZE (field);
1595 ref_op2 = bitsize_zero_node;
1596 }
1597 else
1598 {
1599 boff >>= LOG2_BITS_PER_UNIT;
1600 boff += tree_to_uhwi (offset);
1601 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1602 ref_code = COMPONENT_REF;
1603 ref_op1 = field;
1604 ref_op2 = TREE_OPERAND (ref, 2);
1605 ref = TREE_OPERAND (ref, 0);
1606 }
1607 }
1608 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1609 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1610 is_gimple_mem_ref_addr, NULL_TREE);
1611 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1612 tree type = build_aligned_type (TREE_TYPE (ref),
1613 get_object_alignment (ref));
1614 ref = build2 (MEM_REF, type, addr, alias_ptr);
1615 if (ref_type)
1616 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1617 return ref;
1618 }
1619
1620 /* Get the initialization expression for the INDEX-th temporary variable
1621 of CHAIN. */
1622
1623 static tree
1624 get_init_expr (chain_p chain, unsigned index)
1625 {
1626 if (chain->type == CT_COMBINATION)
1627 {
1628 tree e1 = get_init_expr (chain->ch1, index);
1629 tree e2 = get_init_expr (chain->ch2, index);
1630
1631 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1632 }
1633 else
1634 return chain->inits[index];
1635 }
1636
1637 /* Returns a new temporary variable used for the I-th variable carrying
1638 value of REF. The variable's uid is marked in TMP_VARS. */
1639
1640 static tree
1641 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1642 {
1643 tree type = TREE_TYPE (ref);
1644 /* We never access the components of the temporary variable in predictive
1645 commoning. */
1646 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1647 bitmap_set_bit (tmp_vars, DECL_UID (var));
1648 return var;
1649 }
1650
1651 /* Creates the variables for CHAIN, as well as phi nodes for them and
1652 initialization on entry to LOOP. Uids of the newly created
1653 temporary variables are marked in TMP_VARS. */
1654
1655 static void
1656 initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
1657 {
1658 unsigned i;
1659 unsigned n = chain->length;
1660 dref root = get_chain_root (chain);
1661 bool reuse_first = !chain->has_max_use_after;
1662 tree ref, init, var, next;
1663 gphi *phi;
1664 gimple_seq stmts;
1665 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1666
1667 /* If N == 0, then all the references are within the single iteration. And
1668 since this is an nonempty chain, reuse_first cannot be true. */
1669 gcc_assert (n > 0 || !reuse_first);
1670
1671 chain->vars.create (n + 1);
1672
1673 if (chain->type == CT_COMBINATION)
1674 ref = gimple_assign_lhs (root->stmt);
1675 else
1676 ref = DR_REF (root->ref);
1677
1678 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1679 {
1680 var = predcom_tmp_var (ref, i, tmp_vars);
1681 chain->vars.quick_push (var);
1682 }
1683 if (reuse_first)
1684 chain->vars.quick_push (chain->vars[0]);
1685
1686 FOR_EACH_VEC_ELT (chain->vars, i, var)
1687 chain->vars[i] = make_ssa_name (var);
1688
1689 for (i = 0; i < n; i++)
1690 {
1691 var = chain->vars[i];
1692 next = chain->vars[i + 1];
1693 init = get_init_expr (chain, i);
1694
1695 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1696 if (stmts)
1697 gsi_insert_seq_on_edge_immediate (entry, stmts);
1698
1699 phi = create_phi_node (var, loop->header);
1700 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1701 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1702 }
1703 }
1704
1705 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1706 all stores to be eliminated store loop invariant values into memory.
1707 In this case, we can use these invariant values directly after LOOP. */
1708
1709 static bool
1710 is_inv_store_elimination_chain (class loop *loop, chain_p chain)
1711 {
1712 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1713 return false;
1714
1715 gcc_assert (!chain->has_max_use_after);
1716
1717 /* If loop iterates for unknown times or fewer times than chain->length,
1718 we still need to setup root variable and propagate it with PHI node. */
1719 tree niters = number_of_latch_executions (loop);
1720 if (TREE_CODE (niters) != INTEGER_CST
1721 || wi::leu_p (wi::to_wide (niters), chain->length))
1722 return false;
1723
1724 /* Check stores in chain for elimination if they only store loop invariant
1725 values. */
1726 for (unsigned i = 0; i < chain->length; i++)
1727 {
1728 dref a = get_chain_last_write_at (chain, i);
1729 if (a == NULL)
1730 continue;
1731
1732 gimple *def_stmt, *stmt = a->stmt;
1733 if (!gimple_assign_single_p (stmt))
1734 return false;
1735
1736 tree val = gimple_assign_rhs1 (stmt);
1737 if (TREE_CLOBBER_P (val))
1738 return false;
1739
1740 if (CONSTANT_CLASS_P (val))
1741 continue;
1742
1743 if (TREE_CODE (val) != SSA_NAME)
1744 return false;
1745
1746 def_stmt = SSA_NAME_DEF_STMT (val);
1747 if (gimple_nop_p (def_stmt))
1748 continue;
1749
1750 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1751 return false;
1752 }
1753 return true;
1754 }
1755
1756 /* Creates root variables for store elimination CHAIN in which stores for
1757 elimination only store loop invariant values. In this case, we neither
1758 need to load root variables before loop nor propagate it with PHI nodes. */
1759
1760 static void
1761 initialize_root_vars_store_elim_1 (chain_p chain)
1762 {
1763 tree var;
1764 unsigned i, n = chain->length;
1765
1766 chain->vars.create (n);
1767 chain->vars.safe_grow_cleared (n);
1768
1769 /* Initialize root value for eliminated stores at each distance. */
1770 for (i = 0; i < n; i++)
1771 {
1772 dref a = get_chain_last_write_at (chain, i);
1773 if (a == NULL)
1774 continue;
1775
1776 var = gimple_assign_rhs1 (a->stmt);
1777 chain->vars[a->distance] = var;
1778 }
1779
1780 /* We don't propagate values with PHI nodes, so manually propagate value
1781 to bubble positions. */
1782 var = chain->vars[0];
1783 for (i = 1; i < n; i++)
1784 {
1785 if (chain->vars[i] != NULL_TREE)
1786 {
1787 var = chain->vars[i];
1788 continue;
1789 }
1790 chain->vars[i] = var;
1791 }
1792
1793 /* Revert the vector. */
1794 for (i = 0; i < n / 2; i++)
1795 std::swap (chain->vars[i], chain->vars[n - i - 1]);
1796 }
1797
1798 /* Creates root variables for store elimination CHAIN in which stores for
1799 elimination store loop variant values. In this case, we may need to
1800 load root variables before LOOP and propagate it with PHI nodes. Uids
1801 of the newly created root variables are marked in TMP_VARS. */
1802
1803 static void
1804 initialize_root_vars_store_elim_2 (class loop *loop,
1805 chain_p chain, bitmap tmp_vars)
1806 {
1807 unsigned i, n = chain->length;
1808 tree ref, init, var, next, val, phi_result;
1809 gimple *stmt;
1810 gimple_seq stmts;
1811
1812 chain->vars.create (n);
1813
1814 ref = DR_REF (get_chain_root (chain)->ref);
1815 for (i = 0; i < n; i++)
1816 {
1817 var = predcom_tmp_var (ref, i, tmp_vars);
1818 chain->vars.quick_push (var);
1819 }
1820
1821 FOR_EACH_VEC_ELT (chain->vars, i, var)
1822 chain->vars[i] = make_ssa_name (var);
1823
1824 /* Root values are either rhs operand of stores to be eliminated, or
1825 loaded from memory before loop. */
1826 auto_vec<tree> vtemps;
1827 vtemps.safe_grow_cleared (n);
1828 for (i = 0; i < n; i++)
1829 {
1830 init = get_init_expr (chain, i);
1831 if (init == NULL_TREE)
1832 {
1833 /* Root value is rhs operand of the store to be eliminated if
1834 it isn't loaded from memory before loop. */
1835 dref a = get_chain_last_write_at (chain, i);
1836 val = gimple_assign_rhs1 (a->stmt);
1837 if (TREE_CLOBBER_P (val))
1838 {
1839 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1840 gimple_assign_set_rhs1 (a->stmt, val);
1841 }
1842
1843 vtemps[n - i - 1] = val;
1844 }
1845 else
1846 {
1847 edge latch = loop_latch_edge (loop);
1848 edge entry = loop_preheader_edge (loop);
1849
1850 /* Root value is loaded from memory before loop, we also need
1851 to add PHI nodes to propagate the value across iterations. */
1852 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1853 if (stmts)
1854 gsi_insert_seq_on_edge_immediate (entry, stmts);
1855
1856 next = chain->vars[n - i];
1857 phi_result = copy_ssa_name (next);
1858 gphi *phi = create_phi_node (phi_result, loop->header);
1859 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1860 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1861 vtemps[n - i - 1] = phi_result;
1862 }
1863 }
1864
1865 /* Find the insertion position. */
1866 dref last = get_chain_root (chain);
1867 for (i = 0; i < chain->refs.length (); i++)
1868 {
1869 if (chain->refs[i]->pos > last->pos)
1870 last = chain->refs[i];
1871 }
1872
1873 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1874
1875 /* Insert statements copying root value to root variable. */
1876 for (i = 0; i < n; i++)
1877 {
1878 var = chain->vars[i];
1879 val = vtemps[i];
1880 stmt = gimple_build_assign (var, val);
1881 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1882 }
1883 }
1884
1885 /* Generates stores for CHAIN's eliminated stores in LOOP's last
1886 (CHAIN->length - 1) iterations. */
1887
1888 static void
1889 finalize_eliminated_stores (class loop *loop, chain_p chain)
1890 {
1891 unsigned i, n = chain->length;
1892
1893 for (i = 0; i < n; i++)
1894 {
1895 tree var = chain->vars[i];
1896 tree fini = chain->finis[n - i - 1];
1897 gimple *stmt = gimple_build_assign (fini, var);
1898
1899 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1900 }
1901
1902 if (chain->fini_seq)
1903 {
1904 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1905 chain->fini_seq = NULL;
1906 }
1907 }
1908
1909 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1910 initialization on entry to LOOP if necessary. The ssa name for the variable
1911 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1912 around the loop is created. Uid of the newly created temporary variable
1913 is marked in TMP_VARS. INITS is the list containing the (single)
1914 initializer. */
1915
1916 static void
1917 initialize_root_vars_lm (class loop *loop, dref root, bool written,
1918 vec<tree> *vars, vec<tree> inits,
1919 bitmap tmp_vars)
1920 {
1921 unsigned i;
1922 tree ref = DR_REF (root->ref), init, var, next;
1923 gimple_seq stmts;
1924 gphi *phi;
1925 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1926
1927 /* Find the initializer for the variable, and check that it cannot
1928 trap. */
1929 init = inits[0];
1930
1931 vars->create (written ? 2 : 1);
1932 var = predcom_tmp_var (ref, 0, tmp_vars);
1933 vars->quick_push (var);
1934 if (written)
1935 vars->quick_push ((*vars)[0]);
1936
1937 FOR_EACH_VEC_ELT (*vars, i, var)
1938 (*vars)[i] = make_ssa_name (var);
1939
1940 var = (*vars)[0];
1941
1942 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1943 if (stmts)
1944 gsi_insert_seq_on_edge_immediate (entry, stmts);
1945
1946 if (written)
1947 {
1948 next = (*vars)[1];
1949 phi = create_phi_node (var, loop->header);
1950 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1951 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1952 }
1953 else
1954 {
1955 gassign *init_stmt = gimple_build_assign (var, init);
1956 gsi_insert_on_edge_immediate (entry, init_stmt);
1957 }
1958 }
1959
1960
1961 /* Execute load motion for references in chain CHAIN. Uids of the newly
1962 created temporary variables are marked in TMP_VARS. */
1963
1964 static void
1965 execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
1966 {
1967 auto_vec<tree> vars;
1968 dref a;
1969 unsigned n_writes = 0, ridx, i;
1970 tree var;
1971
1972 gcc_assert (chain->type == CT_INVARIANT);
1973 gcc_assert (!chain->combined);
1974 FOR_EACH_VEC_ELT (chain->refs, i, a)
1975 if (DR_IS_WRITE (a->ref))
1976 n_writes++;
1977
1978 /* If there are no reads in the loop, there is nothing to do. */
1979 if (n_writes == chain->refs.length ())
1980 return;
1981
1982 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1983 &vars, chain->inits, tmp_vars);
1984
1985 ridx = 0;
1986 FOR_EACH_VEC_ELT (chain->refs, i, a)
1987 {
1988 bool is_read = DR_IS_READ (a->ref);
1989
1990 if (DR_IS_WRITE (a->ref))
1991 {
1992 n_writes--;
1993 if (n_writes)
1994 {
1995 var = vars[0];
1996 var = make_ssa_name (SSA_NAME_VAR (var));
1997 vars[0] = var;
1998 }
1999 else
2000 ridx = 1;
2001 }
2002
2003 replace_ref_with (a->stmt, vars[ridx],
2004 !is_read, !is_read);
2005 }
2006 }
2007
2008 /* Returns the single statement in that NAME is used, excepting
2009 the looparound phi nodes contained in one of the chains. If there is no
2010 such statement, or more statements, NULL is returned. */
2011
2012 static gimple *
2013 single_nonlooparound_use (tree name)
2014 {
2015 use_operand_p use;
2016 imm_use_iterator it;
2017 gimple *stmt, *ret = NULL;
2018
2019 FOR_EACH_IMM_USE_FAST (use, it, name)
2020 {
2021 stmt = USE_STMT (use);
2022
2023 if (gimple_code (stmt) == GIMPLE_PHI)
2024 {
2025 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2026 could not be processed anyway, so just fail for them. */
2027 if (bitmap_bit_p (looparound_phis,
2028 SSA_NAME_VERSION (PHI_RESULT (stmt))))
2029 continue;
2030
2031 return NULL;
2032 }
2033 else if (is_gimple_debug (stmt))
2034 continue;
2035 else if (ret != NULL)
2036 return NULL;
2037 else
2038 ret = stmt;
2039 }
2040
2041 return ret;
2042 }
2043
2044 /* Remove statement STMT, as well as the chain of assignments in that it is
2045 used. */
2046
2047 static void
2048 remove_stmt (gimple *stmt)
2049 {
2050 tree name;
2051 gimple *next;
2052 gimple_stmt_iterator psi;
2053
2054 if (gimple_code (stmt) == GIMPLE_PHI)
2055 {
2056 name = PHI_RESULT (stmt);
2057 next = single_nonlooparound_use (name);
2058 reset_debug_uses (stmt);
2059 psi = gsi_for_stmt (stmt);
2060 remove_phi_node (&psi, true);
2061
2062 if (!next
2063 || !gimple_assign_ssa_name_copy_p (next)
2064 || gimple_assign_rhs1 (next) != name)
2065 return;
2066
2067 stmt = next;
2068 }
2069
2070 while (1)
2071 {
2072 gimple_stmt_iterator bsi;
2073
2074 bsi = gsi_for_stmt (stmt);
2075
2076 name = gimple_assign_lhs (stmt);
2077 if (TREE_CODE (name) == SSA_NAME)
2078 {
2079 next = single_nonlooparound_use (name);
2080 reset_debug_uses (stmt);
2081 }
2082 else
2083 {
2084 /* This is a store to be eliminated. */
2085 gcc_assert (gimple_vdef (stmt) != NULL);
2086 next = NULL;
2087 }
2088
2089 unlink_stmt_vdef (stmt);
2090 gsi_remove (&bsi, true);
2091 release_defs (stmt);
2092
2093 if (!next
2094 || !gimple_assign_ssa_name_copy_p (next)
2095 || gimple_assign_rhs1 (next) != name)
2096 return;
2097
2098 stmt = next;
2099 }
2100 }
2101
2102 /* Perform the predictive commoning optimization for a chain CHAIN.
2103 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2104
2105 static void
2106 execute_pred_commoning_chain (class loop *loop, chain_p chain,
2107 bitmap tmp_vars)
2108 {
2109 unsigned i;
2110 dref a;
2111 tree var;
2112 bool in_lhs;
2113
2114 if (chain->combined)
2115 {
2116 /* For combined chains, just remove the statements that are used to
2117 compute the values of the expression (except for the root one).
2118 We delay this until after all chains are processed. */
2119 }
2120 else if (chain->type == CT_STORE_STORE)
2121 {
2122 if (chain->length > 0)
2123 {
2124 if (chain->inv_store_elimination)
2125 {
2126 /* If dead stores in this chain only store loop invariant
2127 values, we can simply record the invariant value and use
2128 it directly after loop. */
2129 initialize_root_vars_store_elim_1 (chain);
2130 }
2131 else
2132 {
2133 /* If dead stores in this chain store loop variant values,
2134 we need to set up the variables by loading from memory
2135 before loop and propagating it with PHI nodes. */
2136 initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2137 }
2138
2139 /* For inter-iteration store elimination chain, stores at each
2140 distance in loop's last (chain->length - 1) iterations can't
2141 be eliminated, because there is no following killing store.
2142 We need to generate these stores after loop. */
2143 finalize_eliminated_stores (loop, chain);
2144 }
2145
2146 bool last_store_p = true;
2147 for (i = chain->refs.length (); i > 0; i--)
2148 {
2149 a = chain->refs[i - 1];
2150 /* Preserve the last store of the chain. Eliminate other stores
2151 which are killed by the last one. */
2152 if (DR_IS_WRITE (a->ref))
2153 {
2154 if (last_store_p)
2155 last_store_p = false;
2156 else
2157 remove_stmt (a->stmt);
2158
2159 continue;
2160 }
2161
2162 /* Any load in Store-Store chain must be dominated by a previous
2163 store, we replace the load reference with rhs of the store. */
2164 dref b = get_chain_last_write_before_load (chain, i - 1);
2165 gcc_assert (b != NULL);
2166 var = gimple_assign_rhs1 (b->stmt);
2167 replace_ref_with (a->stmt, var, false, false);
2168 }
2169 }
2170 else
2171 {
2172 /* For non-combined chains, set up the variables that hold its value. */
2173 initialize_root_vars (loop, chain, tmp_vars);
2174 a = get_chain_root (chain);
2175 in_lhs = (chain->type == CT_STORE_LOAD
2176 || chain->type == CT_COMBINATION);
2177 replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2178
2179 /* Replace the uses of the original references by these variables. */
2180 for (i = 1; chain->refs.iterate (i, &a); i++)
2181 {
2182 var = chain->vars[chain->length - a->distance];
2183 replace_ref_with (a->stmt, var, false, false);
2184 }
2185 }
2186 }
2187
2188 /* Determines the unroll factor necessary to remove as many temporary variable
2189 copies as possible. CHAINS is the list of chains that will be
2190 optimized. */
2191
2192 static unsigned
2193 determine_unroll_factor (vec<chain_p> chains)
2194 {
2195 chain_p chain;
2196 unsigned factor = 1, af, nfactor, i;
2197 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
2198
2199 FOR_EACH_VEC_ELT (chains, i, chain)
2200 {
2201 if (chain->type == CT_INVARIANT)
2202 continue;
2203 /* For now we can't handle unrolling when eliminating stores. */
2204 else if (chain->type == CT_STORE_STORE)
2205 return 1;
2206
2207 if (chain->combined)
2208 {
2209 /* For combined chains, we can't handle unrolling if we replace
2210 looparound PHIs. */
2211 dref a;
2212 unsigned j;
2213 for (j = 1; chain->refs.iterate (j, &a); j++)
2214 if (gimple_code (a->stmt) == GIMPLE_PHI)
2215 return 1;
2216 continue;
2217 }
2218
2219 /* The best unroll factor for this chain is equal to the number of
2220 temporary variables that we create for it. */
2221 af = chain->length;
2222 if (chain->has_max_use_after)
2223 af++;
2224
2225 nfactor = factor * af / gcd (factor, af);
2226 if (nfactor <= max)
2227 factor = nfactor;
2228 }
2229
2230 return factor;
2231 }
2232
2233 /* Perform the predictive commoning optimization for CHAINS.
2234 Uids of the newly created temporary variables are marked in TMP_VARS. */
2235
2236 static void
2237 execute_pred_commoning (class loop *loop, vec<chain_p> chains,
2238 bitmap tmp_vars)
2239 {
2240 chain_p chain;
2241 unsigned i;
2242
2243 FOR_EACH_VEC_ELT (chains, i, chain)
2244 {
2245 if (chain->type == CT_INVARIANT)
2246 execute_load_motion (loop, chain, tmp_vars);
2247 else
2248 execute_pred_commoning_chain (loop, chain, tmp_vars);
2249 }
2250
2251 FOR_EACH_VEC_ELT (chains, i, chain)
2252 {
2253 if (chain->type == CT_INVARIANT)
2254 ;
2255 else if (chain->combined)
2256 {
2257 /* For combined chains, just remove the statements that are used to
2258 compute the values of the expression (except for the root one). */
2259 dref a;
2260 unsigned j;
2261 for (j = 1; chain->refs.iterate (j, &a); j++)
2262 remove_stmt (a->stmt);
2263 }
2264 }
2265
2266 update_ssa (TODO_update_ssa_only_virtuals);
2267 }
2268
2269 /* For each reference in CHAINS, if its defining statement is
2270 phi node, record the ssa name that is defined by it. */
2271
2272 static void
2273 replace_phis_by_defined_names (vec<chain_p> chains)
2274 {
2275 chain_p chain;
2276 dref a;
2277 unsigned i, j;
2278
2279 FOR_EACH_VEC_ELT (chains, i, chain)
2280 FOR_EACH_VEC_ELT (chain->refs, j, a)
2281 {
2282 if (gimple_code (a->stmt) == GIMPLE_PHI)
2283 {
2284 a->name_defined_by_phi = PHI_RESULT (a->stmt);
2285 a->stmt = NULL;
2286 }
2287 }
2288 }
2289
2290 /* For each reference in CHAINS, if name_defined_by_phi is not
2291 NULL, use it to set the stmt field. */
2292
2293 static void
2294 replace_names_by_phis (vec<chain_p> chains)
2295 {
2296 chain_p chain;
2297 dref a;
2298 unsigned i, j;
2299
2300 FOR_EACH_VEC_ELT (chains, i, chain)
2301 FOR_EACH_VEC_ELT (chain->refs, j, a)
2302 if (a->stmt == NULL)
2303 {
2304 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2305 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2306 a->name_defined_by_phi = NULL_TREE;
2307 }
2308 }
2309
2310 /* Wrapper over execute_pred_commoning, to pass it as a callback
2311 to tree_transform_and_unroll_loop. */
2312
2313 struct epcc_data
2314 {
2315 vec<chain_p> chains;
2316 bitmap tmp_vars;
2317 };
2318
2319 static void
2320 execute_pred_commoning_cbck (class loop *loop, void *data)
2321 {
2322 struct epcc_data *const dta = (struct epcc_data *) data;
2323
2324 /* Restore phi nodes that were replaced by ssa names before
2325 tree_transform_and_unroll_loop (see detailed description in
2326 tree_predictive_commoning_loop). */
2327 replace_names_by_phis (dta->chains);
2328 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2329 }
2330
2331 /* Base NAME and all the names in the chain of phi nodes that use it
2332 on variable VAR. The phi nodes are recognized by being in the copies of
2333 the header of the LOOP. */
2334
2335 static void
2336 base_names_in_chain_on (class loop *loop, tree name, tree var)
2337 {
2338 gimple *stmt, *phi;
2339 imm_use_iterator iter;
2340
2341 replace_ssa_name_symbol (name, var);
2342
2343 while (1)
2344 {
2345 phi = NULL;
2346 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2347 {
2348 if (gimple_code (stmt) == GIMPLE_PHI
2349 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
2350 {
2351 phi = stmt;
2352 BREAK_FROM_IMM_USE_STMT (iter);
2353 }
2354 }
2355 if (!phi)
2356 return;
2357
2358 name = PHI_RESULT (phi);
2359 replace_ssa_name_symbol (name, var);
2360 }
2361 }
2362
2363 /* Given an unrolled LOOP after predictive commoning, remove the
2364 register copies arising from phi nodes by changing the base
2365 variables of SSA names. TMP_VARS is the set of the temporary variables
2366 for those we want to perform this. */
2367
2368 static void
2369 eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
2370 {
2371 edge e;
2372 gphi *phi;
2373 gimple *stmt;
2374 tree name, use, var;
2375 gphi_iterator psi;
2376
2377 e = loop_latch_edge (loop);
2378 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2379 {
2380 phi = psi.phi ();
2381 name = PHI_RESULT (phi);
2382 var = SSA_NAME_VAR (name);
2383 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2384 continue;
2385 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2386 gcc_assert (TREE_CODE (use) == SSA_NAME);
2387
2388 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2389 stmt = SSA_NAME_DEF_STMT (use);
2390 while (gimple_code (stmt) == GIMPLE_PHI
2391 /* In case we could not unroll the loop enough to eliminate
2392 all copies, we may reach the loop header before the defining
2393 statement (in that case, some register copies will be present
2394 in loop latch in the final code, corresponding to the newly
2395 created looparound phi nodes). */
2396 && gimple_bb (stmt) != loop->header)
2397 {
2398 gcc_assert (single_pred_p (gimple_bb (stmt)));
2399 use = PHI_ARG_DEF (stmt, 0);
2400 stmt = SSA_NAME_DEF_STMT (use);
2401 }
2402
2403 base_names_in_chain_on (loop, use, var);
2404 }
2405 }
2406
2407 /* Returns true if CHAIN is suitable to be combined. */
2408
2409 static bool
2410 chain_can_be_combined_p (chain_p chain)
2411 {
2412 return (!chain->combined
2413 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2414 }
2415
2416 /* Returns the modify statement that uses NAME. Skips over assignment
2417 statements, NAME is replaced with the actual name used in the returned
2418 statement. */
2419
2420 static gimple *
2421 find_use_stmt (tree *name)
2422 {
2423 gimple *stmt;
2424 tree rhs, lhs;
2425
2426 /* Skip over assignments. */
2427 while (1)
2428 {
2429 stmt = single_nonlooparound_use (*name);
2430 if (!stmt)
2431 return NULL;
2432
2433 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2434 return NULL;
2435
2436 lhs = gimple_assign_lhs (stmt);
2437 if (TREE_CODE (lhs) != SSA_NAME)
2438 return NULL;
2439
2440 if (gimple_assign_copy_p (stmt))
2441 {
2442 rhs = gimple_assign_rhs1 (stmt);
2443 if (rhs != *name)
2444 return NULL;
2445
2446 *name = lhs;
2447 }
2448 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2449 == GIMPLE_BINARY_RHS)
2450 return stmt;
2451 else
2452 return NULL;
2453 }
2454 }
2455
2456 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2457
2458 static bool
2459 may_reassociate_p (tree type, enum tree_code code)
2460 {
2461 if (FLOAT_TYPE_P (type)
2462 && !flag_unsafe_math_optimizations)
2463 return false;
2464
2465 return (commutative_tree_code (code)
2466 && associative_tree_code (code));
2467 }
2468
2469 /* If the operation used in STMT is associative and commutative, go through the
2470 tree of the same operations and returns its root. Distance to the root
2471 is stored in DISTANCE. */
2472
2473 static gimple *
2474 find_associative_operation_root (gimple *stmt, unsigned *distance)
2475 {
2476 tree lhs;
2477 gimple *next;
2478 enum tree_code code = gimple_assign_rhs_code (stmt);
2479 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2480 unsigned dist = 0;
2481
2482 if (!may_reassociate_p (type, code))
2483 return NULL;
2484
2485 while (1)
2486 {
2487 lhs = gimple_assign_lhs (stmt);
2488 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2489
2490 next = find_use_stmt (&lhs);
2491 if (!next
2492 || gimple_assign_rhs_code (next) != code)
2493 break;
2494
2495 stmt = next;
2496 dist++;
2497 }
2498
2499 if (distance)
2500 *distance = dist;
2501 return stmt;
2502 }
2503
2504 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2505 is no such statement, returns NULL_TREE. In case the operation used on
2506 NAME1 and NAME2 is associative and commutative, returns the root of the
2507 tree formed by this operation instead of the statement that uses NAME1 or
2508 NAME2. */
2509
2510 static gimple *
2511 find_common_use_stmt (tree *name1, tree *name2)
2512 {
2513 gimple *stmt1, *stmt2;
2514
2515 stmt1 = find_use_stmt (name1);
2516 if (!stmt1)
2517 return NULL;
2518
2519 stmt2 = find_use_stmt (name2);
2520 if (!stmt2)
2521 return NULL;
2522
2523 if (stmt1 == stmt2)
2524 return stmt1;
2525
2526 stmt1 = find_associative_operation_root (stmt1, NULL);
2527 if (!stmt1)
2528 return NULL;
2529 stmt2 = find_associative_operation_root (stmt2, NULL);
2530 if (!stmt2)
2531 return NULL;
2532
2533 return (stmt1 == stmt2 ? stmt1 : NULL);
2534 }
2535
2536 /* Checks whether R1 and R2 are combined together using CODE, with the result
2537 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2538 if it is true. If CODE is ERROR_MARK, set these values instead. */
2539
2540 static bool
2541 combinable_refs_p (dref r1, dref r2,
2542 enum tree_code *code, bool *swap, tree *rslt_type)
2543 {
2544 enum tree_code acode;
2545 bool aswap;
2546 tree atype;
2547 tree name1, name2;
2548 gimple *stmt;
2549
2550 name1 = name_for_ref (r1);
2551 name2 = name_for_ref (r2);
2552 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2553
2554 stmt = find_common_use_stmt (&name1, &name2);
2555
2556 if (!stmt
2557 /* A simple post-dominance check - make sure the combination
2558 is executed under the same condition as the references. */
2559 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2560 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2561 return false;
2562
2563 acode = gimple_assign_rhs_code (stmt);
2564 aswap = (!commutative_tree_code (acode)
2565 && gimple_assign_rhs1 (stmt) != name1);
2566 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2567
2568 if (*code == ERROR_MARK)
2569 {
2570 *code = acode;
2571 *swap = aswap;
2572 *rslt_type = atype;
2573 return true;
2574 }
2575
2576 return (*code == acode
2577 && *swap == aswap
2578 && *rslt_type == atype);
2579 }
2580
2581 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2582 an assignment of the remaining operand. */
2583
2584 static void
2585 remove_name_from_operation (gimple *stmt, tree op)
2586 {
2587 tree other_op;
2588 gimple_stmt_iterator si;
2589
2590 gcc_assert (is_gimple_assign (stmt));
2591
2592 if (gimple_assign_rhs1 (stmt) == op)
2593 other_op = gimple_assign_rhs2 (stmt);
2594 else
2595 other_op = gimple_assign_rhs1 (stmt);
2596
2597 si = gsi_for_stmt (stmt);
2598 gimple_assign_set_rhs_from_tree (&si, other_op);
2599
2600 /* We should not have reallocated STMT. */
2601 gcc_assert (gsi_stmt (si) == stmt);
2602
2603 update_stmt (stmt);
2604 }
2605
2606 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2607 are combined in a single statement, and returns this statement. */
2608
2609 static gimple *
2610 reassociate_to_the_same_stmt (tree name1, tree name2)
2611 {
2612 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2613 gassign *new_stmt, *tmp_stmt;
2614 tree new_name, tmp_name, var, r1, r2;
2615 unsigned dist1, dist2;
2616 enum tree_code code;
2617 tree type = TREE_TYPE (name1);
2618 gimple_stmt_iterator bsi;
2619
2620 stmt1 = find_use_stmt (&name1);
2621 stmt2 = find_use_stmt (&name2);
2622 root1 = find_associative_operation_root (stmt1, &dist1);
2623 root2 = find_associative_operation_root (stmt2, &dist2);
2624 code = gimple_assign_rhs_code (stmt1);
2625
2626 gcc_assert (root1 && root2 && root1 == root2
2627 && code == gimple_assign_rhs_code (stmt2));
2628
2629 /* Find the root of the nearest expression in that both NAME1 and NAME2
2630 are used. */
2631 r1 = name1;
2632 s1 = stmt1;
2633 r2 = name2;
2634 s2 = stmt2;
2635
2636 while (dist1 > dist2)
2637 {
2638 s1 = find_use_stmt (&r1);
2639 r1 = gimple_assign_lhs (s1);
2640 dist1--;
2641 }
2642 while (dist2 > dist1)
2643 {
2644 s2 = find_use_stmt (&r2);
2645 r2 = gimple_assign_lhs (s2);
2646 dist2--;
2647 }
2648
2649 while (s1 != s2)
2650 {
2651 s1 = find_use_stmt (&r1);
2652 r1 = gimple_assign_lhs (s1);
2653 s2 = find_use_stmt (&r2);
2654 r2 = gimple_assign_lhs (s2);
2655 }
2656
2657 /* Remove NAME1 and NAME2 from the statements in that they are used
2658 currently. */
2659 remove_name_from_operation (stmt1, name1);
2660 remove_name_from_operation (stmt2, name2);
2661
2662 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2663 combine it with the rhs of S1. */
2664 var = create_tmp_reg (type, "predreastmp");
2665 new_name = make_ssa_name (var);
2666 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2667
2668 var = create_tmp_reg (type, "predreastmp");
2669 tmp_name = make_ssa_name (var);
2670
2671 /* Rhs of S1 may now be either a binary expression with operation
2672 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2673 so that name1 or name2 was removed from it). */
2674 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2675 gimple_assign_rhs1 (s1),
2676 gimple_assign_rhs2 (s1));
2677
2678 bsi = gsi_for_stmt (s1);
2679 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2680 s1 = gsi_stmt (bsi);
2681 update_stmt (s1);
2682
2683 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2684 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2685
2686 return new_stmt;
2687 }
2688
2689 /* Returns the statement that combines references R1 and R2. In case R1
2690 and R2 are not used in the same statement, but they are used with an
2691 associative and commutative operation in the same expression, reassociate
2692 the expression so that they are used in the same statement. */
2693
2694 static gimple *
2695 stmt_combining_refs (dref r1, dref r2)
2696 {
2697 gimple *stmt1, *stmt2;
2698 tree name1 = name_for_ref (r1);
2699 tree name2 = name_for_ref (r2);
2700
2701 stmt1 = find_use_stmt (&name1);
2702 stmt2 = find_use_stmt (&name2);
2703 if (stmt1 == stmt2)
2704 return stmt1;
2705
2706 return reassociate_to_the_same_stmt (name1, name2);
2707 }
2708
2709 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2710 description of the new chain is returned, otherwise we return NULL. */
2711
2712 static chain_p
2713 combine_chains (chain_p ch1, chain_p ch2)
2714 {
2715 dref r1, r2, nw;
2716 enum tree_code op = ERROR_MARK;
2717 bool swap = false;
2718 chain_p new_chain;
2719 unsigned i;
2720 tree rslt_type = NULL_TREE;
2721
2722 if (ch1 == ch2)
2723 return NULL;
2724 if (ch1->length != ch2->length)
2725 return NULL;
2726
2727 if (ch1->refs.length () != ch2->refs.length ())
2728 return NULL;
2729
2730 for (i = 0; (ch1->refs.iterate (i, &r1)
2731 && ch2->refs.iterate (i, &r2)); i++)
2732 {
2733 if (r1->distance != r2->distance)
2734 return NULL;
2735
2736 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2737 return NULL;
2738 }
2739
2740 if (swap)
2741 std::swap (ch1, ch2);
2742
2743 new_chain = XCNEW (struct chain);
2744 new_chain->type = CT_COMBINATION;
2745 new_chain->op = op;
2746 new_chain->ch1 = ch1;
2747 new_chain->ch2 = ch2;
2748 new_chain->rslt_type = rslt_type;
2749 new_chain->length = ch1->length;
2750
2751 for (i = 0; (ch1->refs.iterate (i, &r1)
2752 && ch2->refs.iterate (i, &r2)); i++)
2753 {
2754 nw = XCNEW (class dref_d);
2755 nw->stmt = stmt_combining_refs (r1, r2);
2756 nw->distance = r1->distance;
2757
2758 new_chain->refs.safe_push (nw);
2759 }
2760
2761 ch1->combined = true;
2762 ch2->combined = true;
2763 return new_chain;
2764 }
2765
2766 /* Recursively update position information of all offspring chains to ROOT
2767 chain's position information. */
2768
2769 static void
2770 update_pos_for_combined_chains (chain_p root)
2771 {
2772 chain_p ch1 = root->ch1, ch2 = root->ch2;
2773 dref ref, ref1, ref2;
2774 for (unsigned j = 0; (root->refs.iterate (j, &ref)
2775 && ch1->refs.iterate (j, &ref1)
2776 && ch2->refs.iterate (j, &ref2)); ++j)
2777 ref1->pos = ref2->pos = ref->pos;
2778
2779 if (ch1->type == CT_COMBINATION)
2780 update_pos_for_combined_chains (ch1);
2781 if (ch2->type == CT_COMBINATION)
2782 update_pos_for_combined_chains (ch2);
2783 }
2784
2785 /* Returns true if statement S1 dominates statement S2. */
2786
2787 static bool
2788 pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2789 {
2790 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2791
2792 if (!bb1 || s1 == s2)
2793 return true;
2794
2795 if (bb1 == bb2)
2796 return gimple_uid (s1) < gimple_uid (s2);
2797
2798 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2799 }
2800
2801 /* Try to combine the CHAINS in LOOP. */
2802
2803 static void
2804 try_combine_chains (class loop *loop, vec<chain_p> *chains)
2805 {
2806 unsigned i, j;
2807 chain_p ch1, ch2, cch;
2808 auto_vec<chain_p> worklist;
2809 bool combined_p = false;
2810
2811 FOR_EACH_VEC_ELT (*chains, i, ch1)
2812 if (chain_can_be_combined_p (ch1))
2813 worklist.safe_push (ch1);
2814
2815 while (!worklist.is_empty ())
2816 {
2817 ch1 = worklist.pop ();
2818 if (!chain_can_be_combined_p (ch1))
2819 continue;
2820
2821 FOR_EACH_VEC_ELT (*chains, j, ch2)
2822 {
2823 if (!chain_can_be_combined_p (ch2))
2824 continue;
2825
2826 cch = combine_chains (ch1, ch2);
2827 if (cch)
2828 {
2829 worklist.safe_push (cch);
2830 chains->safe_push (cch);
2831 combined_p = true;
2832 break;
2833 }
2834 }
2835 }
2836 if (!combined_p)
2837 return;
2838
2839 /* Setup UID for all statements in dominance order. */
2840 basic_block *bbs = get_loop_body_in_dom_order (loop);
2841 renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2842 free (bbs);
2843
2844 /* Re-association in combined chains may generate statements different to
2845 order of references of the original chain. We need to keep references
2846 of combined chain in dominance order so that all uses will be inserted
2847 after definitions. Note:
2848 A) This is necessary for all combined chains.
2849 B) This is only necessary for ZERO distance references because other
2850 references inherit value from loop carried PHIs.
2851
2852 We first update position information for all combined chains. */
2853 dref ref;
2854 for (i = 0; chains->iterate (i, &ch1); ++i)
2855 {
2856 if (ch1->type != CT_COMBINATION || ch1->combined)
2857 continue;
2858
2859 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2860 ref->pos = gimple_uid (ref->stmt);
2861
2862 update_pos_for_combined_chains (ch1);
2863 }
2864 /* Then sort references according to newly updated position information. */
2865 for (i = 0; chains->iterate (i, &ch1); ++i)
2866 {
2867 if (ch1->type != CT_COMBINATION && !ch1->combined)
2868 continue;
2869
2870 /* Find the first reference with non-ZERO distance. */
2871 if (ch1->length == 0)
2872 j = ch1->refs.length();
2873 else
2874 {
2875 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2876 if (ref->distance != 0)
2877 break;
2878 }
2879
2880 /* Sort all ZERO distance references by position. */
2881 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2882
2883 if (ch1->combined)
2884 continue;
2885
2886 /* For ZERO length chain, has_max_use_after must be true since root
2887 combined stmt must dominates others. */
2888 if (ch1->length == 0)
2889 {
2890 ch1->has_max_use_after = true;
2891 continue;
2892 }
2893 /* Check if there is use at max distance after root for combined chains
2894 and set flag accordingly. */
2895 ch1->has_max_use_after = false;
2896 gimple *root_stmt = get_chain_root (ch1)->stmt;
2897 for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2898 {
2899 if (ref->distance == ch1->length
2900 && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2901 {
2902 ch1->has_max_use_after = true;
2903 break;
2904 }
2905 }
2906 }
2907 }
2908
2909 /* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2910 if this is impossible because one of these initializers may trap, true
2911 otherwise. */
2912
2913 static bool
2914 prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
2915 {
2916 unsigned i, n = chain->length;
2917
2918 /* For now we can't eliminate stores if some of them are conditional
2919 executed. */
2920 if (!chain->all_always_accessed)
2921 return false;
2922
2923 /* Nothing to intialize for intra-iteration store elimination. */
2924 if (n == 0 && chain->type == CT_STORE_STORE)
2925 return true;
2926
2927 /* For store elimination chain, there is nothing to initialize if stores
2928 to be eliminated only store loop invariant values into memory. */
2929 if (chain->type == CT_STORE_STORE
2930 && is_inv_store_elimination_chain (loop, chain))
2931 {
2932 chain->inv_store_elimination = true;
2933 return true;
2934 }
2935
2936 chain->inits.create (n);
2937 chain->inits.safe_grow_cleared (n);
2938
2939 /* For store eliminatin chain like below:
2940
2941 for (i = 0; i < len; i++)
2942 {
2943 a[i] = 1;
2944 // a[i + 1] = ...
2945 a[i + 2] = 3;
2946 }
2947
2948 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2949 content of a[i + 1] remain the same if the loop iterates fewer times
2950 than chain->length. We need to set up root variables for such stores
2951 by loading from memory before loop. Note we only need to load bubble
2952 elements because loop body is guaranteed to be executed at least once
2953 after loop's preheader edge. */
2954 auto_vec<bool> bubbles;
2955 bubbles.safe_grow_cleared (n + 1);
2956 for (i = 0; i < chain->refs.length (); i++)
2957 bubbles[chain->refs[i]->distance] = true;
2958
2959 struct data_reference *dr = get_chain_root (chain)->ref;
2960 for (i = 0; i < n; i++)
2961 {
2962 if (bubbles[i])
2963 continue;
2964
2965 gimple_seq stmts = NULL;
2966
2967 tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
2968 if (stmts)
2969 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2970
2971 chain->inits[i] = init;
2972 }
2973
2974 return true;
2975 }
2976
2977 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2978 impossible because one of these initializers may trap, true otherwise. */
2979
2980 static bool
2981 prepare_initializers_chain (class loop *loop, chain_p chain)
2982 {
2983 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2984 struct data_reference *dr = get_chain_root (chain)->ref;
2985 tree init;
2986 dref laref;
2987 edge entry = loop_preheader_edge (loop);
2988
2989 if (chain->type == CT_STORE_STORE)
2990 return prepare_initializers_chain_store_elim (loop, chain);
2991
2992 /* Find the initializers for the variables, and check that they cannot
2993 trap. */
2994 chain->inits.create (n);
2995 for (i = 0; i < n; i++)
2996 chain->inits.quick_push (NULL_TREE);
2997
2998 /* If we have replaced some looparound phi nodes, use their initializers
2999 instead of creating our own. */
3000 FOR_EACH_VEC_ELT (chain->refs, i, laref)
3001 {
3002 if (gimple_code (laref->stmt) != GIMPLE_PHI)
3003 continue;
3004
3005 gcc_assert (laref->distance > 0);
3006 chain->inits[n - laref->distance]
3007 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
3008 }
3009
3010 for (i = 0; i < n; i++)
3011 {
3012 gimple_seq stmts = NULL;
3013
3014 if (chain->inits[i] != NULL_TREE)
3015 continue;
3016
3017 init = ref_at_iteration (dr, (int) i - n, &stmts);
3018 if (!chain->all_always_accessed && tree_could_trap_p (init))
3019 {
3020 gimple_seq_discard (stmts);
3021 return false;
3022 }
3023
3024 if (stmts)
3025 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3026
3027 chain->inits[i] = init;
3028 }
3029
3030 return true;
3031 }
3032
3033 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3034 be used because the initializers might trap. */
3035
3036 static void
3037 prepare_initializers (class loop *loop, vec<chain_p> chains)
3038 {
3039 chain_p chain;
3040 unsigned i;
3041
3042 for (i = 0; i < chains.length (); )
3043 {
3044 chain = chains[i];
3045 if (prepare_initializers_chain (loop, chain))
3046 i++;
3047 else
3048 {
3049 release_chain (chain);
3050 chains.unordered_remove (i);
3051 }
3052 }
3053 }
3054
3055 /* Generates finalizer memory references for CHAIN in LOOP. Returns true
3056 if finalizer code for CHAIN can be generated, otherwise false. */
3057
3058 static bool
3059 prepare_finalizers_chain (class loop *loop, chain_p chain)
3060 {
3061 unsigned i, n = chain->length;
3062 struct data_reference *dr = get_chain_root (chain)->ref;
3063 tree fini, niters = number_of_latch_executions (loop);
3064
3065 /* For now we can't eliminate stores if some of them are conditional
3066 executed. */
3067 if (!chain->all_always_accessed)
3068 return false;
3069
3070 chain->finis.create (n);
3071 for (i = 0; i < n; i++)
3072 chain->finis.quick_push (NULL_TREE);
3073
3074 /* We never use looparound phi node for store elimination chains. */
3075
3076 /* Find the finalizers for the variables, and check that they cannot
3077 trap. */
3078 for (i = 0; i < n; i++)
3079 {
3080 gimple_seq stmts = NULL;
3081 gcc_assert (chain->finis[i] == NULL_TREE);
3082
3083 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3084 {
3085 niters = unshare_expr (niters);
3086 niters = force_gimple_operand (niters, &stmts, true, NULL);
3087 if (stmts)
3088 {
3089 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3090 stmts = NULL;
3091 }
3092 }
3093 fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3094 if (stmts)
3095 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3096
3097 chain->finis[i] = fini;
3098 }
3099
3100 return true;
3101 }
3102
3103 /* Generates finalizer memory reference for CHAINS in LOOP. Returns true
3104 if finalizer code generation for CHAINS breaks loop closed ssa form. */
3105
3106 static bool
3107 prepare_finalizers (class loop *loop, vec<chain_p> chains)
3108 {
3109 chain_p chain;
3110 unsigned i;
3111 bool loop_closed_ssa = false;
3112
3113 for (i = 0; i < chains.length ();)
3114 {
3115 chain = chains[i];
3116
3117 /* Finalizer is only necessary for inter-iteration store elimination
3118 chains. */
3119 if (chain->length == 0 || chain->type != CT_STORE_STORE)
3120 {
3121 i++;
3122 continue;
3123 }
3124
3125 if (prepare_finalizers_chain (loop, chain))
3126 {
3127 i++;
3128 /* Be conservative, assume loop closed ssa form is corrupted
3129 by store-store chain. Though it's not always the case if
3130 eliminated stores only store loop invariant values into
3131 memory. */
3132 loop_closed_ssa = true;
3133 }
3134 else
3135 {
3136 release_chain (chain);
3137 chains.unordered_remove (i);
3138 }
3139 }
3140 return loop_closed_ssa;
3141 }
3142
3143 /* Insert all initializing gimple stmts into loop's entry edge. */
3144
3145 static void
3146 insert_init_seqs (class loop *loop, vec<chain_p> chains)
3147 {
3148 unsigned i;
3149 edge entry = loop_preheader_edge (loop);
3150
3151 for (i = 0; i < chains.length (); ++i)
3152 if (chains[i]->init_seq)
3153 {
3154 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3155 chains[i]->init_seq = NULL;
3156 }
3157 }
3158
3159 /* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3160 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3161 form was corrupted. */
3162
3163 static unsigned
3164 tree_predictive_commoning_loop (class loop *loop)
3165 {
3166 vec<data_reference_p> datarefs;
3167 vec<ddr_p> dependences;
3168 struct component *components;
3169 vec<chain_p> chains = vNULL;
3170 unsigned unroll_factor;
3171 class tree_niter_desc desc;
3172 bool unroll = false, loop_closed_ssa = false;
3173 edge exit;
3174
3175 if (dump_file && (dump_flags & TDF_DETAILS))
3176 fprintf (dump_file, "Processing loop %d\n", loop->num);
3177
3178 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3179 if (get_max_loop_iterations_int (loop) == 0)
3180 {
3181 if (dump_file && (dump_flags & TDF_DETAILS))
3182 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3183
3184 return 0;
3185 }
3186
3187 /* Find the data references and split them into components according to their
3188 dependence relations. */
3189 auto_vec<loop_p, 3> loop_nest;
3190 dependences.create (10);
3191 datarefs.create (10);
3192 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3193 &dependences))
3194 {
3195 if (dump_file && (dump_flags & TDF_DETAILS))
3196 fprintf (dump_file, "Cannot analyze data dependencies\n");
3197 free_data_refs (datarefs);
3198 free_dependence_relations (dependences);
3199 return 0;
3200 }
3201
3202 if (dump_file && (dump_flags & TDF_DETAILS))
3203 dump_data_dependence_relations (dump_file, dependences);
3204
3205 components = split_data_refs_to_components (loop, datarefs, dependences);
3206 loop_nest.release ();
3207 free_dependence_relations (dependences);
3208 if (!components)
3209 {
3210 free_data_refs (datarefs);
3211 free_affine_expand_cache (&name_expansions);
3212 return 0;
3213 }
3214
3215 if (dump_file && (dump_flags & TDF_DETAILS))
3216 {
3217 fprintf (dump_file, "Initial state:\n\n");
3218 dump_components (dump_file, components);
3219 }
3220
3221 /* Find the suitable components and split them into chains. */
3222 components = filter_suitable_components (loop, components);
3223
3224 auto_bitmap tmp_vars;
3225 looparound_phis = BITMAP_ALLOC (NULL);
3226 determine_roots (loop, components, &chains);
3227 release_components (components);
3228
3229 if (!chains.exists ())
3230 {
3231 if (dump_file && (dump_flags & TDF_DETAILS))
3232 fprintf (dump_file,
3233 "Predictive commoning failed: no suitable chains\n");
3234 goto end;
3235 }
3236 prepare_initializers (loop, chains);
3237 loop_closed_ssa = prepare_finalizers (loop, chains);
3238
3239 /* Try to combine the chains that are always worked with together. */
3240 try_combine_chains (loop, &chains);
3241
3242 insert_init_seqs (loop, chains);
3243
3244 if (dump_file && (dump_flags & TDF_DETAILS))
3245 {
3246 fprintf (dump_file, "Before commoning:\n\n");
3247 dump_chains (dump_file, chains);
3248 }
3249
3250 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3251 that its number of iterations is divisible by the factor. */
3252 unroll_factor = determine_unroll_factor (chains);
3253 scev_reset ();
3254 unroll = (unroll_factor > 1
3255 && can_unroll_loop_p (loop, unroll_factor, &desc));
3256 exit = single_dom_exit (loop);
3257
3258 /* Execute the predictive commoning transformations, and possibly unroll the
3259 loop. */
3260 if (unroll)
3261 {
3262 struct epcc_data dta;
3263
3264 if (dump_file && (dump_flags & TDF_DETAILS))
3265 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3266
3267 dta.chains = chains;
3268 dta.tmp_vars = tmp_vars;
3269
3270 update_ssa (TODO_update_ssa_only_virtuals);
3271
3272 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3273 execute_pred_commoning_cbck is called may cause phi nodes to be
3274 reallocated, which is a problem since CHAINS may point to these
3275 statements. To fix this, we store the ssa names defined by the
3276 phi nodes here instead of the phi nodes themselves, and restore
3277 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3278 replace_phis_by_defined_names (chains);
3279
3280 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3281 execute_pred_commoning_cbck, &dta);
3282 eliminate_temp_copies (loop, tmp_vars);
3283 }
3284 else
3285 {
3286 if (dump_file && (dump_flags & TDF_DETAILS))
3287 fprintf (dump_file,
3288 "Executing predictive commoning without unrolling.\n");
3289 execute_pred_commoning (loop, chains, tmp_vars);
3290 }
3291
3292 end: ;
3293 release_chains (chains);
3294 free_data_refs (datarefs);
3295 BITMAP_FREE (looparound_phis);
3296
3297 free_affine_expand_cache (&name_expansions);
3298
3299 return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
3300 }
3301
3302 /* Runs predictive commoning. */
3303
3304 unsigned
3305 tree_predictive_commoning (void)
3306 {
3307 class loop *loop;
3308 unsigned ret = 0, changed = 0;
3309
3310 initialize_original_copy_tables ();
3311 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
3312 if (optimize_loop_for_speed_p (loop))
3313 {
3314 changed |= tree_predictive_commoning_loop (loop);
3315 }
3316 free_original_copy_tables ();
3317
3318 if (changed > 0)
3319 {
3320 scev_reset ();
3321
3322 if (changed > 1)
3323 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3324
3325 ret = TODO_cleanup_cfg;
3326 }
3327
3328 return ret;
3329 }
3330
3331 /* Predictive commoning Pass. */
3332
3333 static unsigned
3334 run_tree_predictive_commoning (struct function *fun)
3335 {
3336 if (number_of_loops (fun) <= 1)
3337 return 0;
3338
3339 return tree_predictive_commoning ();
3340 }
3341
3342 namespace {
3343
3344 const pass_data pass_data_predcom =
3345 {
3346 GIMPLE_PASS, /* type */
3347 "pcom", /* name */
3348 OPTGROUP_LOOP, /* optinfo_flags */
3349 TV_PREDCOM, /* tv_id */
3350 PROP_cfg, /* properties_required */
3351 0, /* properties_provided */
3352 0, /* properties_destroyed */
3353 0, /* todo_flags_start */
3354 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3355 };
3356
3357 class pass_predcom : public gimple_opt_pass
3358 {
3359 public:
3360 pass_predcom (gcc::context *ctxt)
3361 : gimple_opt_pass (pass_data_predcom, ctxt)
3362 {}
3363
3364 /* opt_pass methods: */
3365 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
3366 virtual unsigned int execute (function *fun)
3367 {
3368 return run_tree_predictive_commoning (fun);
3369 }
3370
3371 }; // class pass_predcom
3372
3373 } // anon namespace
3374
3375 gimple_opt_pass *
3376 make_pass_predcom (gcc::context *ctxt)
3377 {
3378 return new pass_predcom (ctxt);
3379 }
3380
3381